X-Git-Url: https://git.dogcows.com/gitweb?p=chaz%2Ftar;a=blobdiff_plain;f=src%2Fsparse.c;h=6a976763419e78a5d5041fe3a01046e787dccdc8;hp=a39d0ed424b5d1c176fe1793ab316fd601b8e150;hb=45ccda119355a1087450039a250359c1d0de0d08;hpb=1b9c48d934b83a36f1192c5bfc2940870e19dd7c diff --git a/src/sparse.c b/src/sparse.c index a39d0ed..6a97676 100644 --- a/src/sparse.c +++ b/src/sparse.c @@ -1,10 +1,10 @@ /* Functions for dealing with sparse files - Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc. + Copyright 2003-2007, 2010, 2013-2014 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the - Free Software Foundation; either version 2, or (at your option) any later + Free Software Foundation; either version 3, or (at your option) any later version. This program is distributed in the hope that it will be useful, but @@ -13,8 +13,7 @@ Public License for more details. You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + with this program. If not, see . */ #include #include @@ -216,43 +215,45 @@ sparse_scan_file (struct tar_sparse_file *file) struct tar_stat_info *st = file->stat_info; int fd = file->fd; char buffer[BLOCKSIZE]; - size_t count; + size_t count = 0; off_t offset = 0; struct sp_array sp = {0, 0}; - if (!lseek_or_error (file, 0)) - return false; - st->archive_file_size = 0; - - if (!tar_sparse_scan (file, scan_begin, NULL)) - return false; - while ((count = safe_read (fd, buffer, sizeof buffer)) != 0 - && count != SAFE_READ_ERROR) + if (ST_NBLOCKS (st->stat) == 0) + offset = st->stat.st_size; + else { - /* Analyze the block. */ - if (zero_block_p (buffer, count)) + if (!tar_sparse_scan (file, scan_begin, NULL)) + return false; + + while ((count = blocking_read (fd, buffer, sizeof buffer)) != 0 + && count != SAFE_READ_ERROR) { - if (sp.numbytes) + /* Analyze the block. */ + if (zero_block_p (buffer, count)) + { + if (sp.numbytes) + { + sparse_add_map (st, &sp); + sp.numbytes = 0; + if (!tar_sparse_scan (file, scan_block, NULL)) + return false; + } + } + else { - sparse_add_map (st, &sp); - sp.numbytes = 0; - if (!tar_sparse_scan (file, scan_block, NULL)) + if (sp.numbytes == 0) + sp.offset = offset; + sp.numbytes += count; + st->archive_file_size += count; + if (!tar_sparse_scan (file, scan_block, buffer)) return false; } - } - else - { - if (sp.numbytes == 0) - sp.offset = offset; - sp.numbytes += count; - st->archive_file_size += count; - if (!tar_sparse_scan (file, scan_block, buffer)) - return false; - } - offset += count; + offset += count; + } } if (sp.numbytes == 0) @@ -333,7 +334,7 @@ sparse_dump_region (struct tar_sparse_file *file, size_t i) static bool sparse_extract_region (struct tar_sparse_file *file, size_t i) { - size_t write_size; + off_t write_size; if (!lseek_or_error (file, file->stat_info->sparse_map[i].offset)) return false; @@ -357,9 +358,10 @@ sparse_extract_region (struct tar_sparse_file *file, size_t i) return false; } set_next_block_after (blk); - count = full_write (file->fd, blk->buffer, wrbytes); + count = blocking_write (file->fd, blk->buffer, wrbytes); write_size -= count; file->dumped_size += count; + mv_size_left (file->stat_info->archive_file_size - file->dumped_size); file->offset += count; if (count != wrbytes) { @@ -396,6 +398,9 @@ sparse_dump_file (int fd, struct tar_stat_info *st) { size_t i; + mv_begin_write (file.stat_info->file_name, + file.stat_info->stat.st_size, + file.stat_info->archive_file_size - file.dumped_size); for (i = 0; rc && i < file.stat_info->sparse_map_avail; i++) rc = tar_sparse_dump_region (&file, i); } @@ -405,15 +410,6 @@ sparse_dump_file (int fd, struct tar_stat_info *st) return (tar_sparse_done (&file) && rc) ? dump_status_ok : dump_status_short; } -/* Returns true if the file represented by stat is a sparse one */ -bool -sparse_file_p (struct tar_stat_info *st) -{ - return (ST_NBLOCKS (st->stat) - < (st->stat.st_size / ST_NBLOCKSIZE - + (st->stat.st_size % ST_NBLOCKSIZE != 0))); -} - bool sparse_member_p (struct tar_stat_info *st) { @@ -471,7 +467,7 @@ sparse_skip_file (struct tar_stat_info *st) file.fd = -1; rc = tar_sparse_decode_header (&file); - skip_file (file.stat_info->archive_file_size); + skip_file (file.stat_info->archive_file_size - file.dumped_size); return (tar_sparse_done (&file) && rc) ? dump_status_ok : dump_status_short; } @@ -513,11 +509,13 @@ check_sparse_region (struct tar_sparse_file *file, off_t beg, off_t end) static bool check_data_region (struct tar_sparse_file *file, size_t i) { - size_t size_left; + off_t size_left; if (!lseek_or_error (file, file->stat_info->sparse_map[i].offset)) return false; size_left = file->stat_info->sparse_map[i].numbytes; + mv_size_left (file->stat_info->archive_file_size - file->dumped_size); + while (size_left > 0) { size_t bytes_read; @@ -543,6 +541,7 @@ check_data_region (struct tar_sparse_file *file, size_t i) } file->dumped_size += bytes_read; size_left -= bytes_read; + mv_size_left (file->stat_info->archive_file_size - file->dumped_size); if (memcmp (blk->buffer, diff_buffer, rdsize)) { report_difference (file->stat_info, _("Contents differ")); @@ -566,8 +565,9 @@ sparse_diff_file (int fd, struct tar_stat_info *st) file.stat_info = st; file.fd = fd; file.seekable = true; /* File *must* be seekable for compare to work */ - + rc = tar_sparse_decode_header (&file); + mv_begin_read (st); for (i = 0; rc && i < file.stat_info->sparse_map_avail; i++) { rc = check_sparse_region (&file, @@ -579,6 +579,7 @@ sparse_diff_file (int fd, struct tar_stat_info *st) if (!rc) skip_file (file.stat_info->archive_file_size - file.dumped_size); + mv_end (); tar_sparse_done (&file); return rc; @@ -588,18 +589,18 @@ sparse_diff_file (int fd, struct tar_stat_info *st) /* Old GNU Format. The sparse file information is stored in the oldgnu_header in the following manner: - The header is marked with type 'S'. Its `size' field contains + The header is marked with type 'S'. Its 'size' field contains the cumulative size of all non-empty blocks of the file. The - actual file size is stored in `realsize' member of oldgnu_header. + actual file size is stored in 'realsize' member of oldgnu_header. - The map of the file is stored in a list of `struct sparse'. + The map of the file is stored in a list of 'struct sparse'. Each struct contains offset to the block of data and its size (both as octal numbers). The first file header contains at most 4 such structs (SPARSES_IN_OLDGNU_HEADER). If the map - contains more structs, then the field `isextended' of the main - header is set to 1 (binary) and the `struct sparse_header' + contains more structs, then the field 'isextended' of the main + header is set to 1 (binary) and the 'struct sparse_header' header follows, containing at most 21 following structs - (SPARSES_IN_SPARSE_HEADER). If more structs follow, `isextended' + (SPARSES_IN_SPARSE_HEADER). If more structs follow, 'isextended' field of the extended header is set and next next extension header follows, etc... */ @@ -625,8 +626,9 @@ oldgnu_add_sparse (struct tar_sparse_file *file, struct sparse *s) if (s->numbytes[0] == '\0') return add_finish; sp.offset = OFF_FROM_HEADER (s->offset); - sp.numbytes = SIZE_FROM_HEADER (s->numbytes); - if (sp.offset < 0 + sp.numbytes = OFF_FROM_HEADER (s->numbytes); + if (sp.offset < 0 || sp.numbytes < 0 + || INT_ADD_OVERFLOW (sp.offset, sp.numbytes) || file->stat_info->stat.st_size < sp.offset + sp.numbytes || file->stat_info->archive_file_size < 0) return add_fail; @@ -640,10 +642,10 @@ oldgnu_fixup_header (struct tar_sparse_file *file) { /* NOTE! st_size was initialized from the header which actually contains archived size. The following fixes it */ + off_t realsize = OFF_FROM_HEADER (current_header->oldgnu_header.realsize); file->stat_info->archive_file_size = file->stat_info->stat.st_size; - file->stat_info->stat.st_size = - OFF_FROM_HEADER (current_header->oldgnu_header.realsize); - return true; + file->stat_info->stat.st_size = max (0, realsize); + return 0 <= realsize; } /* Convert old GNU format sparse data to internal representation */ @@ -695,8 +697,8 @@ oldgnu_store_sparse_info (struct tar_sparse_file *file, size_t *pindex, { OFF_TO_CHARS (file->stat_info->sparse_map[*pindex].offset, sp->offset); - SIZE_TO_CHARS (file->stat_info->sparse_map[*pindex].numbytes, - sp->numbytes); + OFF_TO_CHARS (file->stat_info->sparse_map[*pindex].numbytes, + sp->numbytes); } } @@ -731,11 +733,9 @@ oldgnu_dump_header (struct tar_sparse_file *file) oldgnu_store_sparse_info (file, &i, blk->sparse_header.sp, SPARSES_IN_SPARSE_HEADER); - set_next_block_after (blk); if (i < file->stat_info->sparse_map_avail) blk->sparse_header.isextended = 1; - else - break; + set_next_block_after (blk); } return true; } @@ -766,10 +766,10 @@ star_fixup_header (struct tar_sparse_file *file) { /* NOTE! st_size was initialized from the header which actually contains archived size. The following fixes it */ + off_t realsize = OFF_FROM_HEADER (current_header->star_in_header.realsize); file->stat_info->archive_file_size = file->stat_info->stat.st_size; - file->stat_info->stat.st_size = - OFF_FROM_HEADER (current_header->star_in_header.realsize); - return true; + file->stat_info->stat.st_size = max (0, realsize); + return 0 <= realsize; } /* Convert STAR format sparse data to internal representation */ @@ -809,6 +809,7 @@ star_get_sparse_info (struct tar_sparse_file *file) set_next_block_after (h); for (i = 0; i < SPARSES_IN_STAR_EXT_HEADER && rc == add_ok; i++) rc = oldgnu_add_sparse (file, &h->star_ext_header.sp[i]); + file->dumped_size += BLOCKSIZE; } if (rc == add_fail) @@ -834,16 +835,15 @@ static struct tar_sparse_optab const star_optab = { }; -/* GNU PAX sparse file format. The sparse file map is stored in - x header: +/* GNU PAX sparse file format. There are several versions: + + * 0.0 + + The initial version of sparse format used by tar 1.14-1.15.1. + The sparse file map is stored in x header: GNU.sparse.size Real size of the stored file GNU.sparse.numblocks Number of blocks in the sparse map - GNU.sparse.map Map of non-null data chunks. A string consisting - of comma-separated values "offset,size[,offset,size]..." - - Tar versions 1.14-1.15.1 instead of the latter used: - repeat numblocks time GNU.sparse.offset Offset of the next data block GNU.sparse.numbytes Size of the next data block @@ -851,44 +851,101 @@ static struct tar_sparse_optab const star_optab = { This has been reported as conflicting with the POSIX specs. The reason is that offsets and sizes of non-zero data blocks were stored in multiple - instances of GNU.sparse.offset/GNU.sparse.numbytes variables. However, + instances of GNU.sparse.offset/GNU.sparse.numbytes variables, whereas POSIX requires the latest occurrence of the variable to override all previous occurrences. - - To avoid this incompatibility new keyword GNU.sparse.map was introduced - in tar 1.15.2. Some people might still need the 1.14 way of handling - sparse files for the compatibility reasons: it can be achieved by - specifying `--pax-option delete=GNU.sparse.map' in the command line. - See FIXME-1.14-1.15.1-1.20, below. + To avoid this incompatibility two following versions were introduced. + + * 0.1 + + Used by tar 1.15.2 -- 1.15.91 (alpha releases). + + The sparse file map is stored in + x header: + + GNU.sparse.size Real size of the stored file + GNU.sparse.numblocks Number of blocks in the sparse map + GNU.sparse.map Map of non-null data chunks. A string consisting + of comma-separated values "offset,size[,offset,size]..." + + The resulting GNU.sparse.map string can be *very* long. While POSIX does not + impose any limit on the length of a x header variable, this can confuse some + tars. + + * 1.0 + + Starting from this version, the exact sparse format version is specified + explicitely in the header using the following variables: + + GNU.sparse.major Major version + GNU.sparse.minor Minor version + + X header keeps the following variables: + + GNU.sparse.name Real file name of the sparse file + GNU.sparse.realsize Real size of the stored file (corresponds to the old + GNU.sparse.size variable) + + The name field of the ustar header is constructed using the pattern + "%d/GNUSparseFile.%p/%f". + + The sparse map itself is stored in the file data block, preceding the actual + file data. It consists of a series of octal numbers of arbitrary length, + delimited by newlines. The map is padded with nulls to the nearest block + boundary. + + The first number gives the number of entries in the map. Following are map + entries, each one consisting of two numbers giving the offset and size of + the data block it describes. + + The format is designed in such a way that non-posix aware tars and tars not + supporting GNU.sparse.* keywords will extract each sparse file in its + condensed form with the file map attached and will place it into a separate + directory. Then, using a simple program it would be possible to expand the + file to its original form even without GNU tar. + + Bu default, v.1.0 archives are created. To use other formats, + --sparse-version option is provided. Additionally, v.0.0 can be obtained + by deleting GNU.sparse.map from 0.1 format: --sparse-version 0.1 + --pax-option delete=GNU.sparse.map */ static bool pax_sparse_member_p (struct tar_sparse_file *file) { - return file->stat_info->sparse_map_avail > 0; + return file->stat_info->sparse_map_avail > 0 + || file->stat_info->sparse_major > 0; +} + +/* Start a header that uses the effective (shrunken) file size. */ +static union block * +pax_start_header (struct tar_stat_info *st) +{ + off_t realsize = st->stat.st_size; + union block *blk; + st->stat.st_size = st->archive_file_size; + blk = start_header (st); + st->stat.st_size = realsize; + return blk; } static bool -pax_dump_header (struct tar_sparse_file *file) +pax_dump_header_0 (struct tar_sparse_file *file) { off_t block_ordinal = current_block_ordinal (); union block *blk; size_t i; char nbuf[UINTMAX_STRSIZE_BOUND]; struct sp_array *map = file->stat_info->sparse_map; - + char *save_file_name = NULL; + /* Store the real file size */ xheader_store ("GNU.sparse.size", file->stat_info, NULL); xheader_store ("GNU.sparse.numblocks", file->stat_info, NULL); - /* FIXME-1.14-1.15.1-1.20: See the comment above. - Starting with 1.17 this should display a warning about POSIX-incompatible - keywords being generated. In 1.20, the true branch of the if block below - will be removed and GNU.sparse.map will be marked in xhdr_tab as - protected. */ - - if (xheader_keyword_deleted_p ("GNU.sparse.map")) + if (xheader_keyword_deleted_p ("GNU.sparse.map") + || tar_sparse_minor == 0) { for (i = 0; i < file->stat_info->sparse_map_avail; i++) { @@ -898,21 +955,225 @@ pax_dump_header (struct tar_sparse_file *file) } else { - xheader_string_begin (); + xheader_store ("GNU.sparse.name", file->stat_info, NULL); + save_file_name = file->stat_info->file_name; + file->stat_info->file_name = xheader_format_name (file->stat_info, + "%d/GNUSparseFile.%p/%f", 0); + + xheader_string_begin (&file->stat_info->xhdr); for (i = 0; i < file->stat_info->sparse_map_avail; i++) { if (i) - xheader_string_add (","); - xheader_string_add (umaxtostr (map[i].offset, nbuf)); - xheader_string_add (","); - xheader_string_add (umaxtostr (map[i].numbytes, nbuf)); + xheader_string_add (&file->stat_info->xhdr, ","); + xheader_string_add (&file->stat_info->xhdr, + umaxtostr (map[i].offset, nbuf)); + xheader_string_add (&file->stat_info->xhdr, ","); + xheader_string_add (&file->stat_info->xhdr, + umaxtostr (map[i].numbytes, nbuf)); + } + if (!xheader_string_end (&file->stat_info->xhdr, + "GNU.sparse.map")) + { + free (file->stat_info->file_name); + file->stat_info->file_name = save_file_name; + return false; } - xheader_string_end ("GNU.sparse.map"); } - blk = start_header (file->stat_info); - /* Store the effective (shrunken) file size */ - OFF_TO_CHARS (file->stat_info->archive_file_size, blk->header.size); + blk = pax_start_header (file->stat_info); finish_header (file->stat_info, blk, block_ordinal); + if (save_file_name) + { + free (file->stat_info->file_name); + file->stat_info->file_name = save_file_name; + } + return true; +} + +static bool +pax_dump_header_1 (struct tar_sparse_file *file) +{ + off_t block_ordinal = current_block_ordinal (); + union block *blk; + char *p, *q; + size_t i; + char nbuf[UINTMAX_STRSIZE_BOUND]; + off_t size = 0; + struct sp_array *map = file->stat_info->sparse_map; + char *save_file_name = file->stat_info->file_name; + +#define COPY_STRING(b,dst,src) do \ + { \ + char *endp = b->buffer + BLOCKSIZE; \ + char const *srcp = src; \ + while (*srcp) \ + { \ + if (dst == endp) \ + { \ + set_next_block_after (b); \ + b = find_next_block (); \ + dst = b->buffer; \ + endp = b->buffer + BLOCKSIZE; \ + } \ + *dst++ = *srcp++; \ + } \ + } while (0) + + /* Compute stored file size */ + p = umaxtostr (file->stat_info->sparse_map_avail, nbuf); + size += strlen (p) + 1; + for (i = 0; i < file->stat_info->sparse_map_avail; i++) + { + p = umaxtostr (map[i].offset, nbuf); + size += strlen (p) + 1; + p = umaxtostr (map[i].numbytes, nbuf); + size += strlen (p) + 1; + } + size = (size + BLOCKSIZE - 1) / BLOCKSIZE; + file->stat_info->archive_file_size += size * BLOCKSIZE; + file->dumped_size += size * BLOCKSIZE; + + /* Store sparse file identification */ + xheader_store ("GNU.sparse.major", file->stat_info, NULL); + xheader_store ("GNU.sparse.minor", file->stat_info, NULL); + xheader_store ("GNU.sparse.name", file->stat_info, NULL); + xheader_store ("GNU.sparse.realsize", file->stat_info, NULL); + + file->stat_info->file_name = + xheader_format_name (file->stat_info, "%d/GNUSparseFile.%p/%f", 0); + /* Make sure the created header name is shorter than NAME_FIELD_SIZE: */ + if (strlen (file->stat_info->file_name) > NAME_FIELD_SIZE) + file->stat_info->file_name[NAME_FIELD_SIZE] = 0; + + blk = pax_start_header (file->stat_info); + finish_header (file->stat_info, blk, block_ordinal); + free (file->stat_info->file_name); + file->stat_info->file_name = save_file_name; + + blk = find_next_block (); + q = blk->buffer; + p = umaxtostr (file->stat_info->sparse_map_avail, nbuf); + COPY_STRING (blk, q, p); + COPY_STRING (blk, q, "\n"); + for (i = 0; i < file->stat_info->sparse_map_avail; i++) + { + p = umaxtostr (map[i].offset, nbuf); + COPY_STRING (blk, q, p); + COPY_STRING (blk, q, "\n"); + p = umaxtostr (map[i].numbytes, nbuf); + COPY_STRING (blk, q, p); + COPY_STRING (blk, q, "\n"); + } + memset (q, 0, BLOCKSIZE - (q - blk->buffer)); + set_next_block_after (blk); + return true; +} + +static bool +pax_dump_header (struct tar_sparse_file *file) +{ + file->stat_info->sparse_major = tar_sparse_major; + file->stat_info->sparse_minor = tar_sparse_minor; + + return (file->stat_info->sparse_major == 0) ? + pax_dump_header_0 (file) : pax_dump_header_1 (file); +} + +static bool +decode_num (uintmax_t *num, char const *arg, uintmax_t maxval) +{ + uintmax_t u; + char *arg_lim; + + if (!ISDIGIT (*arg)) + return false; + + errno = 0; + u = strtoumax (arg, &arg_lim, 10); + + if (! (u <= maxval && errno != ERANGE) || *arg_lim) + return false; + + *num = u; + return true; +} + +static bool +pax_decode_header (struct tar_sparse_file *file) +{ + if (file->stat_info->sparse_major > 0) + { + uintmax_t u; + char nbuf[UINTMAX_STRSIZE_BOUND]; + union block *blk; + char *p; + size_t i; + +#define COPY_BUF(b,buf,src) do \ + { \ + char *endp = b->buffer + BLOCKSIZE; \ + char *dst = buf; \ + do \ + { \ + if (dst == buf + UINTMAX_STRSIZE_BOUND -1) \ + { \ + ERROR ((0, 0, _("%s: numeric overflow in sparse archive member"), \ + file->stat_info->orig_file_name)); \ + return false; \ + } \ + if (src == endp) \ + { \ + set_next_block_after (b); \ + file->dumped_size += BLOCKSIZE; \ + b = find_next_block (); \ + src = b->buffer; \ + endp = b->buffer + BLOCKSIZE; \ + } \ + *dst = *src++; \ + } \ + while (*dst++ != '\n'); \ + dst[-1] = 0; \ + } while (0) + + set_next_block_after (current_header); + file->dumped_size += BLOCKSIZE; + blk = find_next_block (); + p = blk->buffer; + COPY_BUF (blk,nbuf,p); + if (!decode_num (&u, nbuf, TYPE_MAXIMUM (size_t))) + { + ERROR ((0, 0, _("%s: malformed sparse archive member"), + file->stat_info->orig_file_name)); + return false; + } + file->stat_info->sparse_map_size = u; + file->stat_info->sparse_map = xcalloc (file->stat_info->sparse_map_size, + sizeof (*file->stat_info->sparse_map)); + file->stat_info->sparse_map_avail = 0; + for (i = 0; i < file->stat_info->sparse_map_size; i++) + { + struct sp_array sp; + + COPY_BUF (blk,nbuf,p); + if (!decode_num (&u, nbuf, TYPE_MAXIMUM (off_t))) + { + ERROR ((0, 0, _("%s: malformed sparse archive member"), + file->stat_info->orig_file_name)); + return false; + } + sp.offset = u; + COPY_BUF (blk,nbuf,p); + if (!decode_num (&u, nbuf, TYPE_MAXIMUM (off_t))) + { + ERROR ((0, 0, _("%s: malformed sparse archive member"), + file->stat_info->orig_file_name)); + return false; + } + sp.numbytes = u; + sparse_add_map (file->stat_info, &sp); + } + set_next_block_after (blk); + } + return true; } @@ -921,8 +1182,8 @@ static struct tar_sparse_optab const pax_optab = { NULL, /* No done function */ pax_sparse_member_p, pax_dump_header, - NULL, /* No decode_header function */ - NULL, /* No fixup_header function */ + NULL, + pax_decode_header, NULL, /* No scan_block function */ sparse_dump_region, sparse_extract_region,