/* Functions for dealing with sparse files
- Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
+ Copyright 2003-2007, 2010, 2013-2014 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
- Free Software Foundation; either version 2, or (at your option) any later
+ Free Software Foundation; either version 3, or (at your option) any later
version.
This program is distributed in the hope that it will be useful, but
Public License for more details.
You should have received a copy of the GNU General Public License along
- with this program; if not, write to the Free Software Foundation, Inc.,
- 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+ with this program. If not, see <http://www.gnu.org/licenses/>. */
#include <system.h>
#include <inttostr.h>
struct tar_stat_info *st = file->stat_info;
int fd = file->fd;
char buffer[BLOCKSIZE];
- size_t count;
+ size_t count = 0;
off_t offset = 0;
struct sp_array sp = {0, 0};
- if (!lseek_or_error (file, 0))
- return false;
-
st->archive_file_size = 0;
-
- if (!tar_sparse_scan (file, scan_begin, NULL))
- return false;
- while ((count = safe_read (fd, buffer, sizeof buffer)) != 0
- && count != SAFE_READ_ERROR)
+ if (ST_NBLOCKS (st->stat) == 0)
+ offset = st->stat.st_size;
+ else
{
- /* Analyze the block. */
- if (zero_block_p (buffer, count))
+ if (!tar_sparse_scan (file, scan_begin, NULL))
+ return false;
+
+ while ((count = blocking_read (fd, buffer, sizeof buffer)) != 0
+ && count != SAFE_READ_ERROR)
{
- if (sp.numbytes)
+ /* Analyze the block. */
+ if (zero_block_p (buffer, count))
+ {
+ if (sp.numbytes)
+ {
+ sparse_add_map (st, &sp);
+ sp.numbytes = 0;
+ if (!tar_sparse_scan (file, scan_block, NULL))
+ return false;
+ }
+ }
+ else
{
- sparse_add_map (st, &sp);
- sp.numbytes = 0;
- if (!tar_sparse_scan (file, scan_block, NULL))
+ if (sp.numbytes == 0)
+ sp.offset = offset;
+ sp.numbytes += count;
+ st->archive_file_size += count;
+ if (!tar_sparse_scan (file, scan_block, buffer))
return false;
}
- }
- else
- {
- if (sp.numbytes == 0)
- sp.offset = offset;
- sp.numbytes += count;
- st->archive_file_size += count;
- if (!tar_sparse_scan (file, scan_block, buffer))
- return false;
- }
- offset += count;
+ offset += count;
+ }
}
if (sp.numbytes == 0)
static bool
sparse_extract_region (struct tar_sparse_file *file, size_t i)
{
- size_t write_size;
+ off_t write_size;
if (!lseek_or_error (file, file->stat_info->sparse_map[i].offset))
return false;
return false;
}
set_next_block_after (blk);
- count = full_write (file->fd, blk->buffer, wrbytes);
+ count = blocking_write (file->fd, blk->buffer, wrbytes);
write_size -= count;
file->dumped_size += count;
mv_size_left (file->stat_info->archive_file_size - file->dumped_size);
{
size_t i;
+ mv_begin_write (file.stat_info->file_name,
+ file.stat_info->stat.st_size,
+ file.stat_info->archive_file_size - file.dumped_size);
for (i = 0; rc && i < file.stat_info->sparse_map_avail; i++)
rc = tar_sparse_dump_region (&file, i);
}
return (tar_sparse_done (&file) && rc) ? dump_status_ok : dump_status_short;
}
-/* Returns true if the file represented by stat is a sparse one */
-bool
-sparse_file_p (struct tar_stat_info *st)
-{
- return (ST_NBLOCKS (st->stat)
- < (st->stat.st_size / ST_NBLOCKSIZE
- + (st->stat.st_size % ST_NBLOCKSIZE != 0)));
-}
-
bool
sparse_member_p (struct tar_stat_info *st)
{
file.fd = -1;
rc = tar_sparse_decode_header (&file);
- skip_file (file.stat_info->archive_file_size);
+ skip_file (file.stat_info->archive_file_size - file.dumped_size);
return (tar_sparse_done (&file) && rc) ? dump_status_ok : dump_status_short;
}
static bool
check_data_region (struct tar_sparse_file *file, size_t i)
{
- size_t size_left;
+ off_t size_left;
if (!lseek_or_error (file, file->stat_info->sparse_map[i].offset))
return false;
size_left = file->stat_info->sparse_map[i].numbytes;
mv_size_left (file->stat_info->archive_file_size - file->dumped_size);
-
+
while (size_left > 0)
{
size_t bytes_read;
file.stat_info = st;
file.fd = fd;
file.seekable = true; /* File *must* be seekable for compare to work */
-
+
rc = tar_sparse_decode_header (&file);
- mv_begin (st);
+ mv_begin_read (st);
for (i = 0; rc && i < file.stat_info->sparse_map_avail; i++)
{
rc = check_sparse_region (&file,
if (!rc)
skip_file (file.stat_info->archive_file_size - file.dumped_size);
mv_end ();
-
+
tar_sparse_done (&file);
return rc;
}
/* Old GNU Format. The sparse file information is stored in the
oldgnu_header in the following manner:
- The header is marked with type 'S'. Its `size' field contains
+ The header is marked with type 'S'. Its 'size' field contains
the cumulative size of all non-empty blocks of the file. The
- actual file size is stored in `realsize' member of oldgnu_header.
+ actual file size is stored in 'realsize' member of oldgnu_header.
- The map of the file is stored in a list of `struct sparse'.
+ The map of the file is stored in a list of 'struct sparse'.
Each struct contains offset to the block of data and its
size (both as octal numbers). The first file header contains
at most 4 such structs (SPARSES_IN_OLDGNU_HEADER). If the map
- contains more structs, then the field `isextended' of the main
- header is set to 1 (binary) and the `struct sparse_header'
+ contains more structs, then the field 'isextended' of the main
+ header is set to 1 (binary) and the 'struct sparse_header'
header follows, containing at most 21 following structs
- (SPARSES_IN_SPARSE_HEADER). If more structs follow, `isextended'
+ (SPARSES_IN_SPARSE_HEADER). If more structs follow, 'isextended'
field of the extended header is set and next next extension header
follows, etc... */
if (s->numbytes[0] == '\0')
return add_finish;
sp.offset = OFF_FROM_HEADER (s->offset);
- sp.numbytes = SIZE_FROM_HEADER (s->numbytes);
- if (sp.offset < 0
+ sp.numbytes = OFF_FROM_HEADER (s->numbytes);
+ if (sp.offset < 0 || sp.numbytes < 0
+ || INT_ADD_OVERFLOW (sp.offset, sp.numbytes)
|| file->stat_info->stat.st_size < sp.offset + sp.numbytes
|| file->stat_info->archive_file_size < 0)
return add_fail;
{
/* NOTE! st_size was initialized from the header
which actually contains archived size. The following fixes it */
+ off_t realsize = OFF_FROM_HEADER (current_header->oldgnu_header.realsize);
file->stat_info->archive_file_size = file->stat_info->stat.st_size;
- file->stat_info->stat.st_size =
- OFF_FROM_HEADER (current_header->oldgnu_header.realsize);
- return true;
+ file->stat_info->stat.st_size = max (0, realsize);
+ return 0 <= realsize;
}
/* Convert old GNU format sparse data to internal representation */
{
OFF_TO_CHARS (file->stat_info->sparse_map[*pindex].offset,
sp->offset);
- SIZE_TO_CHARS (file->stat_info->sparse_map[*pindex].numbytes,
- sp->numbytes);
+ OFF_TO_CHARS (file->stat_info->sparse_map[*pindex].numbytes,
+ sp->numbytes);
}
}
oldgnu_store_sparse_info (file, &i,
blk->sparse_header.sp,
SPARSES_IN_SPARSE_HEADER);
- set_next_block_after (blk);
if (i < file->stat_info->sparse_map_avail)
blk->sparse_header.isextended = 1;
- else
- break;
+ set_next_block_after (blk);
}
return true;
}
{
/* NOTE! st_size was initialized from the header
which actually contains archived size. The following fixes it */
+ off_t realsize = OFF_FROM_HEADER (current_header->star_in_header.realsize);
file->stat_info->archive_file_size = file->stat_info->stat.st_size;
- file->stat_info->stat.st_size =
- OFF_FROM_HEADER (current_header->star_in_header.realsize);
- return true;
+ file->stat_info->stat.st_size = max (0, realsize);
+ return 0 <= realsize;
}
/* Convert STAR format sparse data to internal representation */
set_next_block_after (h);
for (i = 0; i < SPARSES_IN_STAR_EXT_HEADER && rc == add_ok; i++)
rc = oldgnu_add_sparse (file, &h->star_ext_header.sp[i]);
+ file->dumped_size += BLOCKSIZE;
}
if (rc == add_fail)
};
\f
-/* GNU PAX sparse file format. The sparse file map is stored in
- x header:
+/* GNU PAX sparse file format. There are several versions:
+
+ * 0.0
+
+ The initial version of sparse format used by tar 1.14-1.15.1.
+ The sparse file map is stored in x header:
GNU.sparse.size Real size of the stored file
GNU.sparse.numblocks Number of blocks in the sparse map
- GNU.sparse.map Map of non-null data chunks. A string consisting
- of comma-separated values "offset,size[,offset,size]..."
-
- Tar versions 1.14-1.15.1 instead of the latter used:
-
repeat numblocks time
GNU.sparse.offset Offset of the next data block
GNU.sparse.numbytes Size of the next data block
This has been reported as conflicting with the POSIX specs. The reason is
that offsets and sizes of non-zero data blocks were stored in multiple
- instances of GNU.sparse.offset/GNU.sparse.numbytes variables. However,
+ instances of GNU.sparse.offset/GNU.sparse.numbytes variables, whereas
POSIX requires the latest occurrence of the variable to override all
previous occurrences.
-
- To avoid this incompatibility new keyword GNU.sparse.map was introduced
- in tar 1.15.2. Some people might still need the 1.14 way of handling
- sparse files for the compatibility reasons: it can be achieved by
- specifying `--pax-option delete=GNU.sparse.map' in the command line.
- See FIXME-1.14-1.15.1-1.20, below.
+ To avoid this incompatibility two following versions were introduced.
+
+ * 0.1
+
+ Used by tar 1.15.2 -- 1.15.91 (alpha releases).
+
+ The sparse file map is stored in
+ x header:
+
+ GNU.sparse.size Real size of the stored file
+ GNU.sparse.numblocks Number of blocks in the sparse map
+ GNU.sparse.map Map of non-null data chunks. A string consisting
+ of comma-separated values "offset,size[,offset,size]..."
+
+ The resulting GNU.sparse.map string can be *very* long. While POSIX does not
+ impose any limit on the length of a x header variable, this can confuse some
+ tars.
+
+ * 1.0
+
+ Starting from this version, the exact sparse format version is specified
+ explicitely in the header using the following variables:
+
+ GNU.sparse.major Major version
+ GNU.sparse.minor Minor version
+
+ X header keeps the following variables:
+
+ GNU.sparse.name Real file name of the sparse file
+ GNU.sparse.realsize Real size of the stored file (corresponds to the old
+ GNU.sparse.size variable)
+
+ The name field of the ustar header is constructed using the pattern
+ "%d/GNUSparseFile.%p/%f".
+
+ The sparse map itself is stored in the file data block, preceding the actual
+ file data. It consists of a series of octal numbers of arbitrary length,
+ delimited by newlines. The map is padded with nulls to the nearest block
+ boundary.
+
+ The first number gives the number of entries in the map. Following are map
+ entries, each one consisting of two numbers giving the offset and size of
+ the data block it describes.
+
+ The format is designed in such a way that non-posix aware tars and tars not
+ supporting GNU.sparse.* keywords will extract each sparse file in its
+ condensed form with the file map attached and will place it into a separate
+ directory. Then, using a simple program it would be possible to expand the
+ file to its original form even without GNU tar.
+
+ Bu default, v.1.0 archives are created. To use other formats,
+ --sparse-version option is provided. Additionally, v.0.0 can be obtained
+ by deleting GNU.sparse.map from 0.1 format: --sparse-version 0.1
+ --pax-option delete=GNU.sparse.map
*/
static bool
pax_sparse_member_p (struct tar_sparse_file *file)
{
- return file->stat_info->sparse_map_avail > 0;
+ return file->stat_info->sparse_map_avail > 0
+ || file->stat_info->sparse_major > 0;
+}
+
+/* Start a header that uses the effective (shrunken) file size. */
+static union block *
+pax_start_header (struct tar_stat_info *st)
+{
+ off_t realsize = st->stat.st_size;
+ union block *blk;
+ st->stat.st_size = st->archive_file_size;
+ blk = start_header (st);
+ st->stat.st_size = realsize;
+ return blk;
}
static bool
-pax_dump_header (struct tar_sparse_file *file)
+pax_dump_header_0 (struct tar_sparse_file *file)
{
off_t block_ordinal = current_block_ordinal ();
union block *blk;
size_t i;
char nbuf[UINTMAX_STRSIZE_BOUND];
struct sp_array *map = file->stat_info->sparse_map;
-
+ char *save_file_name = NULL;
+
/* Store the real file size */
xheader_store ("GNU.sparse.size", file->stat_info, NULL);
xheader_store ("GNU.sparse.numblocks", file->stat_info, NULL);
- /* FIXME-1.14-1.15.1-1.20: See the comment above.
- Starting with 1.17 this should display a warning about POSIX-incompatible
- keywords being generated. In 1.20, the true branch of the if block below
- will be removed and GNU.sparse.map will be marked in xhdr_tab as
- protected. */
-
- if (xheader_keyword_deleted_p ("GNU.sparse.map"))
+ if (xheader_keyword_deleted_p ("GNU.sparse.map")
+ || tar_sparse_minor == 0)
{
for (i = 0; i < file->stat_info->sparse_map_avail; i++)
{
}
else
{
- xheader_string_begin ();
+ xheader_store ("GNU.sparse.name", file->stat_info, NULL);
+ save_file_name = file->stat_info->file_name;
+ file->stat_info->file_name = xheader_format_name (file->stat_info,
+ "%d/GNUSparseFile.%p/%f", 0);
+
+ xheader_string_begin (&file->stat_info->xhdr);
for (i = 0; i < file->stat_info->sparse_map_avail; i++)
{
if (i)
- xheader_string_add (",");
- xheader_string_add (umaxtostr (map[i].offset, nbuf));
- xheader_string_add (",");
- xheader_string_add (umaxtostr (map[i].numbytes, nbuf));
+ xheader_string_add (&file->stat_info->xhdr, ",");
+ xheader_string_add (&file->stat_info->xhdr,
+ umaxtostr (map[i].offset, nbuf));
+ xheader_string_add (&file->stat_info->xhdr, ",");
+ xheader_string_add (&file->stat_info->xhdr,
+ umaxtostr (map[i].numbytes, nbuf));
+ }
+ if (!xheader_string_end (&file->stat_info->xhdr,
+ "GNU.sparse.map"))
+ {
+ free (file->stat_info->file_name);
+ file->stat_info->file_name = save_file_name;
+ return false;
}
- xheader_string_end ("GNU.sparse.map");
}
- blk = start_header (file->stat_info);
- /* Store the effective (shrunken) file size */
- OFF_TO_CHARS (file->stat_info->archive_file_size, blk->header.size);
+ blk = pax_start_header (file->stat_info);
finish_header (file->stat_info, blk, block_ordinal);
+ if (save_file_name)
+ {
+ free (file->stat_info->file_name);
+ file->stat_info->file_name = save_file_name;
+ }
+ return true;
+}
+
+static bool
+pax_dump_header_1 (struct tar_sparse_file *file)
+{
+ off_t block_ordinal = current_block_ordinal ();
+ union block *blk;
+ char *p, *q;
+ size_t i;
+ char nbuf[UINTMAX_STRSIZE_BOUND];
+ off_t size = 0;
+ struct sp_array *map = file->stat_info->sparse_map;
+ char *save_file_name = file->stat_info->file_name;
+
+#define COPY_STRING(b,dst,src) do \
+ { \
+ char *endp = b->buffer + BLOCKSIZE; \
+ char const *srcp = src; \
+ while (*srcp) \
+ { \
+ if (dst == endp) \
+ { \
+ set_next_block_after (b); \
+ b = find_next_block (); \
+ dst = b->buffer; \
+ endp = b->buffer + BLOCKSIZE; \
+ } \
+ *dst++ = *srcp++; \
+ } \
+ } while (0)
+
+ /* Compute stored file size */
+ p = umaxtostr (file->stat_info->sparse_map_avail, nbuf);
+ size += strlen (p) + 1;
+ for (i = 0; i < file->stat_info->sparse_map_avail; i++)
+ {
+ p = umaxtostr (map[i].offset, nbuf);
+ size += strlen (p) + 1;
+ p = umaxtostr (map[i].numbytes, nbuf);
+ size += strlen (p) + 1;
+ }
+ size = (size + BLOCKSIZE - 1) / BLOCKSIZE;
+ file->stat_info->archive_file_size += size * BLOCKSIZE;
+ file->dumped_size += size * BLOCKSIZE;
+
+ /* Store sparse file identification */
+ xheader_store ("GNU.sparse.major", file->stat_info, NULL);
+ xheader_store ("GNU.sparse.minor", file->stat_info, NULL);
+ xheader_store ("GNU.sparse.name", file->stat_info, NULL);
+ xheader_store ("GNU.sparse.realsize", file->stat_info, NULL);
+
+ file->stat_info->file_name =
+ xheader_format_name (file->stat_info, "%d/GNUSparseFile.%p/%f", 0);
+ /* Make sure the created header name is shorter than NAME_FIELD_SIZE: */
+ if (strlen (file->stat_info->file_name) > NAME_FIELD_SIZE)
+ file->stat_info->file_name[NAME_FIELD_SIZE] = 0;
+
+ blk = pax_start_header (file->stat_info);
+ finish_header (file->stat_info, blk, block_ordinal);
+ free (file->stat_info->file_name);
+ file->stat_info->file_name = save_file_name;
+
+ blk = find_next_block ();
+ q = blk->buffer;
+ p = umaxtostr (file->stat_info->sparse_map_avail, nbuf);
+ COPY_STRING (blk, q, p);
+ COPY_STRING (blk, q, "\n");
+ for (i = 0; i < file->stat_info->sparse_map_avail; i++)
+ {
+ p = umaxtostr (map[i].offset, nbuf);
+ COPY_STRING (blk, q, p);
+ COPY_STRING (blk, q, "\n");
+ p = umaxtostr (map[i].numbytes, nbuf);
+ COPY_STRING (blk, q, p);
+ COPY_STRING (blk, q, "\n");
+ }
+ memset (q, 0, BLOCKSIZE - (q - blk->buffer));
+ set_next_block_after (blk);
+ return true;
+}
+
+static bool
+pax_dump_header (struct tar_sparse_file *file)
+{
+ file->stat_info->sparse_major = tar_sparse_major;
+ file->stat_info->sparse_minor = tar_sparse_minor;
+
+ return (file->stat_info->sparse_major == 0) ?
+ pax_dump_header_0 (file) : pax_dump_header_1 (file);
+}
+
+static bool
+decode_num (uintmax_t *num, char const *arg, uintmax_t maxval)
+{
+ uintmax_t u;
+ char *arg_lim;
+
+ if (!ISDIGIT (*arg))
+ return false;
+
+ errno = 0;
+ u = strtoumax (arg, &arg_lim, 10);
+
+ if (! (u <= maxval && errno != ERANGE) || *arg_lim)
+ return false;
+
+ *num = u;
+ return true;
+}
+
+static bool
+pax_decode_header (struct tar_sparse_file *file)
+{
+ if (file->stat_info->sparse_major > 0)
+ {
+ uintmax_t u;
+ char nbuf[UINTMAX_STRSIZE_BOUND];
+ union block *blk;
+ char *p;
+ size_t i;
+
+#define COPY_BUF(b,buf,src) do \
+ { \
+ char *endp = b->buffer + BLOCKSIZE; \
+ char *dst = buf; \
+ do \
+ { \
+ if (dst == buf + UINTMAX_STRSIZE_BOUND -1) \
+ { \
+ ERROR ((0, 0, _("%s: numeric overflow in sparse archive member"), \
+ file->stat_info->orig_file_name)); \
+ return false; \
+ } \
+ if (src == endp) \
+ { \
+ set_next_block_after (b); \
+ file->dumped_size += BLOCKSIZE; \
+ b = find_next_block (); \
+ src = b->buffer; \
+ endp = b->buffer + BLOCKSIZE; \
+ } \
+ *dst = *src++; \
+ } \
+ while (*dst++ != '\n'); \
+ dst[-1] = 0; \
+ } while (0)
+
+ set_next_block_after (current_header);
+ file->dumped_size += BLOCKSIZE;
+ blk = find_next_block ();
+ p = blk->buffer;
+ COPY_BUF (blk,nbuf,p);
+ if (!decode_num (&u, nbuf, TYPE_MAXIMUM (size_t)))
+ {
+ ERROR ((0, 0, _("%s: malformed sparse archive member"),
+ file->stat_info->orig_file_name));
+ return false;
+ }
+ file->stat_info->sparse_map_size = u;
+ file->stat_info->sparse_map = xcalloc (file->stat_info->sparse_map_size,
+ sizeof (*file->stat_info->sparse_map));
+ file->stat_info->sparse_map_avail = 0;
+ for (i = 0; i < file->stat_info->sparse_map_size; i++)
+ {
+ struct sp_array sp;
+
+ COPY_BUF (blk,nbuf,p);
+ if (!decode_num (&u, nbuf, TYPE_MAXIMUM (off_t)))
+ {
+ ERROR ((0, 0, _("%s: malformed sparse archive member"),
+ file->stat_info->orig_file_name));
+ return false;
+ }
+ sp.offset = u;
+ COPY_BUF (blk,nbuf,p);
+ if (!decode_num (&u, nbuf, TYPE_MAXIMUM (off_t)))
+ {
+ ERROR ((0, 0, _("%s: malformed sparse archive member"),
+ file->stat_info->orig_file_name));
+ return false;
+ }
+ sp.numbytes = u;
+ sparse_add_map (file->stat_info, &sp);
+ }
+ set_next_block_after (blk);
+ }
+
return true;
}
NULL, /* No done function */
pax_sparse_member_p,
pax_dump_header,
- NULL, /* No decode_header function */
- NULL, /* No fixup_header function */
+ NULL,
+ pax_decode_header,
NULL, /* No scan_block function */
sparse_dump_region,
sparse_extract_region,