]> Dogcows Code - chaz/tar/blob - src/sparse.c
Initial implementation of GNU/pax sparse file format.
[chaz/tar] / src / sparse.c
1 /* Functions for dealing with sparse files
2
3 Copyright (C) 2003 Free Software Foundation, Inc.
4
5 This program is free software; you can redistribute it and/or modify it
6 under the terms of the GNU General Public License as published by the
7 Free Software Foundation; either version 2, or (at your option) any later
8 version.
9
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
13 Public License for more details.
14
15 You should have received a copy of the GNU General Public License along
16 with this program; if not, write to the Free Software Foundation, Inc.,
17 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18
19 #include "system.h"
20 #include <quotearg.h>
21 #include "common.h"
22
23 struct tar_sparse_file;
24
25 enum sparse_scan_state
26 {
27 scan_begin,
28 scan_block,
29 scan_end
30 };
31
32 struct tar_sparse_optab
33 {
34 bool (*init) (struct tar_sparse_file *);
35 bool (*done) (struct tar_sparse_file *);
36 bool (*dump_header) (struct tar_sparse_file *);
37 bool (*decode_header) (struct tar_sparse_file *);
38 bool (*scan_block) (struct tar_sparse_file *, enum sparse_scan_state,
39 void *);
40 bool (*dump_region) (struct tar_sparse_file *, size_t index);
41 bool (*extract_region) (struct tar_sparse_file *, size_t index);
42 };
43
44 struct tar_sparse_file
45 {
46 int fd; /* File descriptor */
47 size_t dumped_size; /* Number of bytes actually written
48 to the archive */
49 struct tar_stat_info *stat_info; /* Information about the file */
50 struct tar_sparse_optab *optab;
51 void *closure; /* Any additional data optab calls might
52 reqiure */
53 };
54
55 static bool
56 tar_sparse_init (struct tar_sparse_file *file)
57 {
58 file->dumped_size = 0;
59 if (file->optab->init)
60 return file->optab->init (file);
61 return true;
62 }
63
64 static bool
65 tar_sparse_done (struct tar_sparse_file *file)
66 {
67 if (file->optab->done)
68 return file->optab->done (file);
69 return true;
70 }
71
72 static bool
73 tar_sparse_scan (struct tar_sparse_file *file, enum sparse_scan_state state,
74 void *block)
75 {
76 if (file->optab->scan_block)
77 return file->optab->scan_block (file, state, block);
78 return true;
79 }
80
81 static bool
82 tar_sparse_dump_region (struct tar_sparse_file *file, size_t index)
83 {
84 if (file->optab->dump_region)
85 return file->optab->dump_region (file, index);
86 return false;
87 }
88
89 static bool
90 tar_sparse_extract_region (struct tar_sparse_file *file, size_t index)
91 {
92 if (file->optab->extract_region)
93 return file->optab->extract_region (file, index);
94 return false;
95 }
96
97 static bool
98 tar_sparse_dump_header (struct tar_sparse_file *file)
99 {
100 if (file->optab->dump_header)
101 return file->optab->dump_header (file);
102 return false;
103 }
104
105 static bool
106 tar_sparse_decode_header (struct tar_sparse_file *file)
107 {
108 if (file->optab->decode_header)
109 return file->optab->decode_header (file);
110 return false;
111 }
112
113 \f
114 static bool
115 lseek_or_error (struct tar_sparse_file *file, off_t offset, int whence)
116 {
117 if (lseek (file->fd, offset, whence) < 0)
118 {
119 seek_diag_details (file->stat_info->orig_file_name, offset);
120 return false;
121 }
122 return true;
123 }
124
125 /* Takes a blockful of data and basically cruises through it to see if
126 it's made *entirely* of zeros, returning a 0 the instant it finds
127 something that is a nonzero, i.e., useful data. */
128 static bool
129 zero_block_p (char *buffer, size_t size)
130 {
131 while (size--)
132 if (*buffer++)
133 return false;
134 return true;
135 }
136
137 #define clear_block(p) memset (p, 0, BLOCKSIZE);
138
139 #define SPARSES_INIT_COUNT SPARSES_IN_SPARSE_HEADER
140
141 static void
142 sparse_add_map (struct tar_sparse_file *file, struct sp_array *sp)
143 {
144 if (file->stat_info->sparse_map == NULL)
145 {
146 file->stat_info->sparse_map =
147 xmalloc (SPARSES_INIT_COUNT * sizeof file->stat_info->sparse_map[0]);
148 file->stat_info->sparse_map_size = SPARSES_INIT_COUNT;
149 }
150 else if (file->stat_info->sparse_map_avail == file->stat_info->sparse_map_size)
151 {
152 file->stat_info->sparse_map_size *= 2;
153 file->stat_info->sparse_map =
154 xrealloc (file->stat_info->sparse_map,
155 file->stat_info->sparse_map_size
156 * sizeof file->stat_info->sparse_map[0]);
157 }
158 file->stat_info->sparse_map[file->stat_info->sparse_map_avail++] = *sp;
159 }
160
161 /* Scan the sparse file and create its map */
162 static bool
163 sparse_scan_file (struct tar_sparse_file *file)
164 {
165 static char buffer[BLOCKSIZE];
166 size_t count;
167 size_t offset = 0;
168 struct sp_array sp = {0, 0};
169
170 if (!lseek_or_error (file, 0, SEEK_SET))
171 return false;
172 clear_block (buffer);
173
174 file->stat_info->sparse_map_size = 0;
175 file->stat_info->archive_file_size = 0;
176
177 if (!tar_sparse_scan (file, scan_begin, NULL))
178 return false;
179
180 while ((count = safe_read (file->fd, buffer, sizeof buffer)) > 0)
181 {
182 /* Analize the block */
183 if (zero_block_p (buffer, count))
184 {
185 if (sp.numbytes)
186 {
187 sparse_add_map (file, &sp);
188 sp.numbytes = 0;
189 if (!tar_sparse_scan (file, scan_block, NULL))
190 return false;
191 }
192 }
193 else
194 {
195 if (sp.numbytes == 0)
196 sp.offset = offset;
197 sp.numbytes += count;
198 file->stat_info->archive_file_size += count;
199 if (!tar_sparse_scan (file, scan_block, buffer))
200 return false;
201 }
202
203 offset += count;
204 clear_block (buffer);
205 }
206
207 if (sp.numbytes == 0)
208 {
209 sp.offset = offset - 1;
210 sp.numbytes = 1;
211 }
212 sparse_add_map (file, &sp);
213 file->stat_info->archive_file_size += count;
214 return tar_sparse_scan (file, scan_end, NULL);
215 }
216
217 static struct tar_sparse_optab oldgnu_optab;
218 static struct tar_sparse_optab star_optab;
219 static struct tar_sparse_optab pax_optab;
220
221 static bool
222 sparse_select_optab (struct tar_sparse_file *file)
223 {
224 switch (current_format == DEFAULT_FORMAT ? archive_format : current_format)
225 {
226 case V7_FORMAT:
227 case USTAR_FORMAT:
228 return false;
229
230 case OLDGNU_FORMAT:
231 case GNU_FORMAT: /*FIXME: This one should disappear? */
232 file->optab = &oldgnu_optab;
233 break;
234
235 case POSIX_FORMAT:
236 file->optab = &pax_optab;
237 break;
238
239 case STAR_FORMAT:
240 file->optab = &star_optab;
241 break;
242
243 default:
244 return false;
245 }
246 return true;
247 }
248
249 static bool
250 sparse_dump_region (struct tar_sparse_file *file, size_t index)
251 {
252 union block *blk;
253 off_t bytes_left = file->stat_info->sparse_map[index].numbytes;
254
255 if (!lseek_or_error (file, file->stat_info->sparse_map[index].offset,
256 SEEK_SET))
257 return false;
258
259 do
260 {
261 size_t bufsize = (bytes_left > BLOCKSIZE) ? BLOCKSIZE : bytes_left;
262 off_t bytes_read;
263
264 blk = find_next_block ();
265 memset (blk->buffer, 0, BLOCKSIZE);
266 bytes_read = safe_read (file->fd, blk->buffer, bufsize);
267 if (bytes_read < 0)
268 {
269 read_diag_details (file->stat_info->orig_file_name,
270 file->stat_info->sparse_map[index].offset
271 + file->stat_info->sparse_map[index].numbytes
272 - bytes_left,
273 bufsize);
274 return false;
275 }
276
277 bytes_left -= bytes_read;
278 file->dumped_size += bytes_read;
279 set_next_block_after (blk);
280 }
281 while (bytes_left > 0);
282 return true;
283 }
284
285 static bool
286 sparse_extract_region (struct tar_sparse_file *file, size_t index)
287 {
288 size_t write_size;
289
290 if (!lseek_or_error (file, file->stat_info->sparse_map[index].offset,
291 SEEK_SET))
292 return false;
293 write_size = file->stat_info->sparse_map[index].numbytes;
294 while (write_size > 0)
295 {
296 size_t count;
297 size_t wrbytes = (write_size > BLOCKSIZE) ? BLOCKSIZE : write_size;
298 union block *blk = find_next_block ();
299 if (!blk)
300 {
301 ERROR ((0, 0, _("Unexpected EOF in archive")));
302 return false;
303 }
304 set_next_block_after (blk);
305 count = full_write (file->fd, blk->buffer, wrbytes);
306 write_size -= count;
307 file->dumped_size += count;
308 if (count != wrbytes)
309 {
310 write_error_details (file->stat_info->orig_file_name,
311 count, wrbytes);
312 return false;
313 }
314 }
315 return true;
316 }
317
318 \f
319
320 /* Interface functions */
321 enum dump_status
322 sparse_dump_file (int fd, struct tar_stat_info *stat)
323 {
324 bool rc;
325 struct tar_sparse_file file;
326
327 file.stat_info = stat;
328 file.fd = fd;
329
330 if (!sparse_select_optab (&file)
331 || !tar_sparse_init (&file))
332 return dump_status_not_implemented;
333
334 rc = sparse_scan_file (&file);
335 if (rc && file.optab->dump_region)
336 {
337 tar_sparse_dump_header (&file);
338
339 if (fd >= 0)
340 {
341 size_t i;
342
343 for (i = 0; rc && i < file.stat_info->sparse_map_avail; i++)
344 rc = tar_sparse_dump_region (&file, i);
345 }
346 }
347
348 pad_archive(file.stat_info->archive_file_size - file.dumped_size);
349 return (tar_sparse_done (&file) && rc) ? dump_status_ok : dump_status_short;
350 }
351
352 /* Returns true if the file represented by stat is a sparse one */
353 bool
354 sparse_file_p (struct tar_stat_info *stat)
355 {
356 return (ST_NBLOCKS (stat->stat)
357 < (stat->stat.st_size / ST_NBLOCKSIZE
358 + (stat->stat.st_size % ST_NBLOCKSIZE != 0)));
359 }
360
361 enum dump_status
362 sparse_extract_file (int fd, struct tar_stat_info *stat, off_t *size)
363 {
364 bool rc = true;
365 struct tar_sparse_file file;
366 size_t i;
367
368 file.stat_info = stat;
369 file.fd = fd;
370
371 if (!sparse_select_optab (&file)
372 || !tar_sparse_init (&file))
373 return dump_status_not_implemented;
374
375 rc = tar_sparse_decode_header (&file);
376 for (i = 0; rc && i < file.stat_info->sparse_map_avail; i++)
377 rc = tar_sparse_extract_region (&file, i);
378 *size = file.stat_info->archive_file_size - file.dumped_size;
379 return (tar_sparse_done (&file) && rc) ? dump_status_ok : dump_status_short;
380 }
381
382 \f
383 static char diff_buffer[BLOCKSIZE];
384
385 static bool
386 check_sparse_region (struct tar_sparse_file *file, off_t beg, off_t end)
387 {
388 if (!lseek_or_error (file, beg, SEEK_SET))
389 return false;
390
391 while (beg < end)
392 {
393 size_t bytes_read;
394 size_t rdsize = end - beg;
395
396 if (rdsize > BLOCKSIZE)
397 rdsize = BLOCKSIZE;
398 clear_block (diff_buffer);
399 bytes_read = safe_read (file->fd, diff_buffer, rdsize);
400 if (bytes_read < 0)
401 {
402 read_diag_details (file->stat_info->orig_file_name,
403 beg,
404 rdsize);
405 return false;
406 }
407 if (!zero_block_p (diff_buffer, bytes_read))
408 {
409 report_difference (file->stat_info,
410 _("File fragment at %lu is not a hole"), beg);
411 return false;
412 }
413
414 beg += bytes_read;
415 }
416 return true;
417 }
418
419 static bool
420 check_data_region (struct tar_sparse_file *file, size_t index)
421 {
422 size_t size_left;
423
424 if (!lseek_or_error (file, file->stat_info->sparse_map[index].offset,
425 SEEK_SET))
426 return false;
427 size_left = file->stat_info->sparse_map[index].numbytes;
428 while (size_left > 0)
429 {
430 size_t bytes_read;
431 size_t rdsize = (size_left > BLOCKSIZE) ? BLOCKSIZE : size_left;
432
433 union block *blk = find_next_block ();
434 if (!blk)
435 {
436 ERROR ((0, 0, _("Unexpected EOF in archive")));
437 return false;
438 }
439 set_next_block_after (blk);
440 bytes_read = safe_read (file->fd, diff_buffer, rdsize);
441 if (bytes_read < 0)
442 {
443 read_diag_details (file->stat_info->orig_file_name,
444 file->stat_info->sparse_map[index].offset
445 + file->stat_info->sparse_map[index].numbytes
446 - size_left,
447 rdsize);
448 return false;
449 }
450 file->dumped_size += bytes_read;
451 size_left -= bytes_read;
452 if (memcmp (blk->buffer, diff_buffer, rdsize))
453 {
454 report_difference (file->stat_info, _("Contents differ"));
455 return false;
456 }
457 }
458 return true;
459 }
460
461 bool
462 sparse_diff_file (int fd, struct tar_stat_info *stat)
463 {
464 bool rc = true;
465 struct tar_sparse_file file;
466 size_t i;
467 off_t offset = 0;
468
469 file.stat_info = stat;
470 file.fd = fd;
471
472 if (!sparse_select_optab (&file)
473 || !tar_sparse_init (&file))
474 return dump_status_not_implemented;
475
476 rc = tar_sparse_decode_header (&file);
477 for (i = 0; rc && i < file.stat_info->sparse_map_avail; i++)
478 {
479 rc = check_sparse_region (&file,
480 offset, file.stat_info->sparse_map[i].offset)
481 && check_data_region (&file, i);
482 offset = file.stat_info->sparse_map[i].offset
483 + file.stat_info->sparse_map[i].numbytes;
484 }
485
486 if (rc)
487 skip_file (file.stat_info->archive_file_size - file.dumped_size);
488
489 tar_sparse_done (&file);
490 return rc;
491 }
492
493 \f
494 /* Old GNU Format. The sparse file information is stored in the
495 oldgnu_header in the following manner:
496
497 The header is marked with type 'S'. Its `size' field contains
498 the cumulative size of all non-empty blocks of the file. The
499 actual file size is stored in `realsize' member of oldgnu_header.
500
501 The map of the file is stored in a list of `struct sparse'.
502 Each struct contains offset to the block of data and its
503 size (both as octal numbers). The first file header contains
504 at most 4 such structs (SPARSES_IN_OLDGNU_HEADER). If the map
505 contains more structs, then the field `isextended' of the main
506 header is set to 1 (binary) and the `struct sparse_header'
507 header follows, containing at most 21 following structs
508 (SPARSES_IN_SPARSE_HEADER). If more structs follow, `isextended'
509 field of the extended header is set and next next extension header
510 follows, etc... */
511
512 enum oldgnu_add_status
513 {
514 add_ok,
515 add_finish,
516 add_fail
517 };
518
519 /* Add a sparse item to the sparse file and its obstack */
520 static enum oldgnu_add_status
521 oldgnu_add_sparse (struct tar_sparse_file *file, struct sparse *s)
522 {
523 struct sp_array sp;
524
525 if (s->numbytes[0] == '\0')
526 return add_finish;
527 sp.offset = OFF_FROM_HEADER (s->offset);
528 sp.numbytes = SIZE_FROM_HEADER (s->numbytes);
529 if (sp.offset < 0
530 || file->stat_info->stat.st_size < sp.offset + sp.numbytes
531 || file->stat_info->archive_file_size < 0)
532 return add_fail;
533
534 sparse_add_map (file, &sp);
535 return add_ok;
536 }
537
538 /* Convert old GNU format sparse data to internal representation
539 FIXME: Clubbers current_header! */
540 static bool
541 oldgnu_get_sparse_info (struct tar_sparse_file *file)
542 {
543 size_t i;
544 union block *h = current_header;
545 int ext_p;
546 static enum oldgnu_add_status rc;
547
548 /* FIXME: note this! st_size was initialized from the header
549 which actually contains archived size. The following fixes it */
550 file->stat_info->archive_file_size = file->stat_info->stat.st_size;
551 file->stat_info->stat.st_size =
552 OFF_FROM_HEADER (current_header->oldgnu_header.realsize);
553
554 file->stat_info->sparse_map_size = 0;
555 for (i = 0; i < SPARSES_IN_OLDGNU_HEADER; i++)
556 {
557 rc = oldgnu_add_sparse (file, &h->oldgnu_header.sp[i]);
558 if (rc != add_ok)
559 break;
560 }
561
562 for (ext_p = h->oldgnu_header.isextended;
563 rc == add_ok && ext_p; ext_p = h->sparse_header.isextended)
564 {
565 h = find_next_block ();
566 if (!h)
567 {
568 ERROR ((0, 0, _("Unexpected EOF in archive")));
569 return false;
570 }
571 set_next_block_after (h);
572 for (i = 0; i < SPARSES_IN_SPARSE_HEADER && rc == add_ok; i++)
573 rc = oldgnu_add_sparse (file, &h->sparse_header.sp[i]);
574 }
575
576 if (rc == add_fail)
577 {
578 ERROR ((0, 0, _("%s: invalid sparse archive member"),
579 file->stat_info->orig_file_name));
580 return false;
581 }
582 return true;
583 }
584
585 static void
586 oldgnu_store_sparse_info (struct tar_sparse_file *file, size_t *pindex,
587 struct sparse *sp, size_t sparse_size)
588 {
589 for (; *pindex < file->stat_info->sparse_map_avail
590 && sparse_size > 0; sparse_size--, sp++, ++*pindex)
591 {
592 OFF_TO_CHARS (file->stat_info->sparse_map[*pindex].offset,
593 sp->offset);
594 SIZE_TO_CHARS (file->stat_info->sparse_map[*pindex].numbytes,
595 sp->numbytes);
596 }
597 }
598
599 static bool
600 oldgnu_dump_header (struct tar_sparse_file *file)
601 {
602 off_t block_ordinal = current_block_ordinal ();
603 union block *blk;
604 size_t i;
605
606 blk = start_header (file->stat_info);
607 blk->header.typeflag = GNUTYPE_SPARSE;
608 if (file->stat_info->sparse_map_avail > SPARSES_IN_OLDGNU_HEADER)
609 blk->oldgnu_header.isextended = 1;
610
611 /* Store the real file size */
612 OFF_TO_CHARS (file->stat_info->stat.st_size, blk->oldgnu_header.realsize);
613 /* Store the effective (shrunken) file size */
614 OFF_TO_CHARS (file->stat_info->archive_file_size, blk->header.size);
615
616 i = 0;
617 oldgnu_store_sparse_info (file, &i,
618 blk->oldgnu_header.sp,
619 SPARSES_IN_OLDGNU_HEADER);
620 blk->oldgnu_header.isextended = i < file->stat_info->sparse_map_avail;
621 finish_header (file->stat_info, blk, block_ordinal);
622
623 while (i < file->stat_info->sparse_map_avail)
624 {
625 blk = find_next_block ();
626 memset (blk->buffer, 0, BLOCKSIZE);
627 oldgnu_store_sparse_info (file, &i,
628 blk->sparse_header.sp,
629 SPARSES_IN_SPARSE_HEADER);
630 set_next_block_after (blk);
631 if (i < file->stat_info->sparse_map_avail)
632 blk->sparse_header.isextended = 1;
633 else
634 break;
635 }
636 return true;
637 }
638
639 static struct tar_sparse_optab oldgnu_optab = {
640 NULL, /* No init function */
641 NULL, /* No done function */
642 oldgnu_dump_header,
643 oldgnu_get_sparse_info,
644 NULL, /* No scan_block function */
645 sparse_dump_region,
646 sparse_extract_region,
647 };
648
649 \f
650 /* Star */
651
652 /* Convert STAR format sparse data to internal representation
653 FIXME: Clubbers current_header! */
654 static bool
655 star_get_sparse_info (struct tar_sparse_file *file)
656 {
657 size_t i;
658 union block *h = current_header;
659 int ext_p;
660 static enum oldgnu_add_status rc;
661
662 /* FIXME: note this! st_size was initialized from the header
663 which actually contains archived size. The following fixes it */
664 file->stat_info->archive_file_size = file->stat_info->stat.st_size;
665 file->stat_info->stat.st_size =
666 OFF_FROM_HEADER (current_header->star_in_header.realsize);
667
668 file->stat_info->sparse_map_size = 0;
669
670 if (h->star_in_header.prefix[0] == '\0'
671 && h->star_in_header.sp[0].offset[10] != '\0')
672 {
673 /* Old star format */
674 for (i = 0; i < SPARSES_IN_STAR_HEADER; i++)
675 {
676 rc = oldgnu_add_sparse (file, &h->star_in_header.sp[i]);
677 if (rc != add_ok)
678 break;
679 }
680 ext_p = h->star_in_header.isextended;
681 }
682 else
683 ext_p = 1;
684
685 for (; rc == add_ok && ext_p; ext_p = h->star_ext_header.isextended)
686 {
687 h = find_next_block ();
688 if (!h)
689 {
690 ERROR ((0, 0, _("Unexpected EOF in archive")));
691 return false;
692 }
693 set_next_block_after (h);
694 for (i = 0; i < SPARSES_IN_STAR_EXT_HEADER && rc == add_ok; i++)
695 rc = oldgnu_add_sparse (file, &h->star_ext_header.sp[i]);
696 }
697
698 if (rc == add_fail)
699 {
700 ERROR ((0, 0, _("%s: invalid sparse archive member"),
701 file->stat_info->orig_file_name));
702 return false;
703 }
704 return true;
705 }
706
707
708 static struct tar_sparse_optab star_optab = {
709 NULL, /* No init function */
710 NULL, /* No done function */
711 NULL,
712 star_get_sparse_info,
713 NULL, /* No scan_block function */
714 NULL, /* No dump region function */
715 sparse_extract_region,
716 };
717
718 \f
719 /* GNU PAX sparse file format. The sparse file map is stored in
720 x header:
721
722 GNU.sparse.size Real size of the stored file
723 GNU.sparse.numblocks Number of blocks in the sparse map
724 repeat numblocks time
725 GNU.sparse.offset Offset of the next data block
726 GNU.sparse.numbytes Size of the next data block
727 end repeat
728 */
729
730 static bool
731 pax_dump_header (struct tar_sparse_file *file)
732 {
733 off_t block_ordinal = current_block_ordinal ();
734 union block *blk;
735 size_t i;
736
737 /* Store the real file size */
738 xheader_store ("GNU.sparse.size", file->stat_info, NULL);
739 xheader_store ("GNU.sparse.numblocks", file->stat_info, NULL);
740 for (i = 0; i < file->stat_info->sparse_map_avail; i++)
741 {
742 xheader_store ("GNU.sparse.offset", file->stat_info, &i);
743 xheader_store ("GNU.sparse.numbytes", file->stat_info, &i);
744 }
745
746 blk = start_header (file->stat_info);
747 /* Store the effective (shrunken) file size */
748 OFF_TO_CHARS (file->stat_info->archive_file_size, blk->header.size);
749 finish_header (file->stat_info, blk, block_ordinal);
750 return true;
751 }
752
753 static bool
754 pax_decode_header (struct tar_sparse_file *file)
755 {
756 /* Restore actual size */
757 size_t s = file->stat_info->archive_file_size;
758 file->stat_info->archive_file_size = file->stat_info->stat.st_size;
759 file->stat_info->stat.st_size = s;
760 return true;
761 }
762
763 static struct tar_sparse_optab pax_optab = {
764 NULL, /* No init function */
765 NULL, /* No done function */
766 pax_dump_header,
767 pax_decode_header,
768 NULL, /* No scan_block function */
769 sparse_dump_region,
770 sparse_extract_region,
771 };
772
This page took 0.064447 seconds and 5 git commands to generate.