]> Dogcows Code - chaz/tar/blob - src/sparse.c
Support for star sparse format
[chaz/tar] / src / sparse.c
1 /* Functions for dealing with sparse files
2
3 Copyright (C) 2003 Free Software Foundation, Inc.
4
5 This program is free software; you can redistribute it and/or modify it
6 under the terms of the GNU General Public License as published by the
7 Free Software Foundation; either version 2, or (at your option) any later
8 version.
9
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
13 Public License for more details.
14
15 You should have received a copy of the GNU General Public License along
16 with this program; if not, write to the Free Software Foundation, Inc.,
17 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18
19 #include "system.h"
20 #include <quotearg.h>
21 #include "common.h"
22
23 struct tar_sparse_file;
24
25 enum sparse_scan_state
26 {
27 scan_begin,
28 scan_block,
29 scan_end
30 };
31
32 struct tar_sparse_optab
33 {
34 bool (*init) (struct tar_sparse_file *);
35 bool (*done) (struct tar_sparse_file *);
36 bool (*dump_header) (struct tar_sparse_file *);
37 bool (*decode_header) (struct tar_sparse_file *);
38 bool (*scan_block) (struct tar_sparse_file *, enum sparse_scan_state,
39 void *);
40 bool (*dump_region) (struct tar_sparse_file *, size_t index);
41 bool (*extract_region) (struct tar_sparse_file *, size_t index);
42 };
43
44 struct tar_sparse_file
45 {
46 int fd; /* File descriptor */
47 size_t dumped_size; /* Number of bytes actually written
48 to the archive */
49 struct tar_stat_info *stat_info; /* Information about the file */
50 struct tar_sparse_optab *optab;
51 void *closure; /* Any additional data optab calls might
52 reqiure */
53 };
54
55 static bool
56 tar_sparse_init (struct tar_sparse_file *file)
57 {
58 file->dumped_size = 0;
59 if (file->optab->init)
60 return file->optab->init (file);
61 return true;
62 }
63
64 static bool
65 tar_sparse_done (struct tar_sparse_file *file)
66 {
67 if (file->optab->done)
68 return file->optab->done (file);
69 return true;
70 }
71
72 static bool
73 tar_sparse_scan (struct tar_sparse_file *file, enum sparse_scan_state state,
74 void *block)
75 {
76 if (file->optab->scan_block)
77 return file->optab->scan_block (file, state, block);
78 return true;
79 }
80
81 static bool
82 tar_sparse_dump_region (struct tar_sparse_file *file, size_t index)
83 {
84 if (file->optab->dump_region)
85 return file->optab->dump_region (file, index);
86 return false;
87 }
88
89 static bool
90 tar_sparse_extract_region (struct tar_sparse_file *file, size_t index)
91 {
92 if (file->optab->extract_region)
93 return file->optab->extract_region (file, index);
94 return false;
95 }
96
97 static bool
98 tar_sparse_dump_header (struct tar_sparse_file *file)
99 {
100 if (file->optab->dump_header)
101 return file->optab->dump_header (file);
102 return false;
103 }
104
105 static bool
106 tar_sparse_decode_header (struct tar_sparse_file *file)
107 {
108 if (file->optab->decode_header)
109 return file->optab->decode_header (file);
110 return false;
111 }
112
113 \f
114 static bool
115 lseek_or_error (struct tar_sparse_file *file, off_t offset, int whence)
116 {
117 if (lseek (file->fd, offset, whence) < 0)
118 {
119 seek_diag_details (file->stat_info->orig_file_name, offset);
120 return false;
121 }
122 return true;
123 }
124
125 /* Takes a blockful of data and basically cruises through it to see if
126 it's made *entirely* of zeros, returning a 0 the instant it finds
127 something that is a nonzero, i.e., useful data. */
128 static bool
129 zero_block_p (char *buffer, size_t size)
130 {
131 while (size--)
132 if (*buffer++)
133 return false;
134 return true;
135 }
136
137 #define clear_block(p) memset (p, 0, BLOCKSIZE);
138
139 #define SPARSES_INIT_COUNT SPARSES_IN_SPARSE_HEADER
140
141 static void
142 sparse_add_map (struct tar_sparse_file *file, struct sp_array *sp)
143 {
144 if (file->stat_info->sparse_map == NULL)
145 {
146 file->stat_info->sparse_map =
147 xmalloc (SPARSES_INIT_COUNT * sizeof file->stat_info->sparse_map[0]);
148 file->stat_info->sparse_map_size = SPARSES_INIT_COUNT;
149 }
150 else if (file->stat_info->sparse_map_avail == file->stat_info->sparse_map_size)
151 {
152 file->stat_info->sparse_map_size *= 2;
153 file->stat_info->sparse_map =
154 xrealloc (file->stat_info->sparse_map,
155 file->stat_info->sparse_map_size
156 * sizeof file->stat_info->sparse_map[0]);
157 }
158 file->stat_info->sparse_map[file->stat_info->sparse_map_avail++] = *sp;
159 }
160
161 /* Scan the sparse file and create its map */
162 static bool
163 sparse_scan_file (struct tar_sparse_file *file)
164 {
165 static char buffer[BLOCKSIZE];
166 size_t count;
167 size_t offset = 0;
168 struct sp_array sp = {0, 0};
169
170 if (!lseek_or_error (file, 0, SEEK_SET))
171 return false;
172 clear_block (buffer);
173
174 file->stat_info->sparse_map_size = 0;
175 file->stat_info->archive_file_size = 0;
176
177 if (!tar_sparse_scan (file, scan_begin, NULL))
178 return false;
179
180 while ((count = safe_read (file->fd, buffer, sizeof buffer)) > 0)
181 {
182 /* Analize the block */
183 if (zero_block_p (buffer, count))
184 {
185 if (sp.numbytes)
186 {
187 sparse_add_map (file, &sp);
188 sp.numbytes = 0;
189 if (!tar_sparse_scan (file, scan_block, NULL))
190 return false;
191 }
192 }
193 else
194 {
195 if (sp.numbytes == 0)
196 sp.offset = offset;
197 sp.numbytes += count;
198 file->stat_info->archive_file_size += count;
199 if (!tar_sparse_scan (file, scan_block, buffer))
200 return false;
201 }
202
203 offset += count;
204 clear_block (buffer);
205 }
206
207 if (sp.numbytes == 0)
208 {
209 sp.offset = offset - 1;
210 sp.numbytes = 1;
211 }
212 sparse_add_map (file, &sp);
213 file->stat_info->archive_file_size += count;
214 return tar_sparse_scan (file, scan_end, NULL);
215 }
216
217 static struct tar_sparse_optab oldgnu_optab;
218 static struct tar_sparse_optab star_optab;
219
220 static bool
221 sparse_select_optab (struct tar_sparse_file *file)
222 {
223 switch (current_format == DEFAULT_FORMAT ? archive_format : current_format)
224 {
225 case V7_FORMAT:
226 case USTAR_FORMAT:
227 return false;
228
229 case OLDGNU_FORMAT:
230 case GNU_FORMAT: /*FIXME: This one should disappear? */
231 file->optab = &oldgnu_optab;
232 break;
233
234 case POSIX_FORMAT:
235 /* FIXME: Add method */
236 return false;
237
238 case STAR_FORMAT:
239 file->optab = &star_optab;
240 break;
241
242 default:
243 return false;
244 }
245 return true;
246 }
247
248 static bool
249 sparse_dump_region (struct tar_sparse_file *file, size_t index)
250 {
251 union block *blk;
252 off_t bytes_left = file->stat_info->sparse_map[index].numbytes;
253
254 if (!lseek_or_error (file, file->stat_info->sparse_map[index].offset,
255 SEEK_SET))
256 return false;
257
258 do
259 {
260 size_t bufsize = (bytes_left > BLOCKSIZE) ? BLOCKSIZE : bytes_left;
261 off_t bytes_read;
262
263 blk = find_next_block ();
264 memset (blk->buffer, 0, BLOCKSIZE);
265 bytes_read = safe_read (file->fd, blk->buffer, bufsize);
266 if (bytes_read < 0)
267 {
268 read_diag_details (file->stat_info->orig_file_name,
269 file->stat_info->sparse_map[index].offset
270 + file->stat_info->sparse_map[index].numbytes
271 - bytes_left,
272 bufsize);
273 return false;
274 }
275
276 bytes_left -= bytes_read;
277 file->dumped_size += bytes_read;
278 set_next_block_after (blk);
279 }
280 while (bytes_left > 0);
281 return true;
282 }
283
284 static bool
285 sparse_extract_region (struct tar_sparse_file *file, size_t index)
286 {
287 size_t write_size;
288
289 if (!lseek_or_error (file, file->stat_info->sparse_map[index].offset,
290 SEEK_SET))
291 return false;
292 write_size = file->stat_info->sparse_map[index].numbytes;
293 while (write_size > 0)
294 {
295 size_t count;
296 size_t wrbytes = (write_size > BLOCKSIZE) ? BLOCKSIZE : write_size;
297 union block *blk = find_next_block ();
298 if (!blk)
299 {
300 ERROR ((0, 0, _("Unexpected EOF in archive")));
301 return false;
302 }
303 set_next_block_after (blk);
304 count = full_write (file->fd, blk->buffer, wrbytes);
305 write_size -= count;
306 file->dumped_size += count;
307 if (count != wrbytes)
308 {
309 write_error_details (file->stat_info->orig_file_name,
310 count, wrbytes);
311 return false;
312 }
313 }
314 return true;
315 }
316
317 \f
318
319 /* Interface functions */
320 enum dump_status
321 sparse_dump_file (int fd, struct tar_stat_info *stat)
322 {
323 bool rc;
324 struct tar_sparse_file file;
325
326 file.stat_info = stat;
327 file.fd = fd;
328
329 if (!sparse_select_optab (&file)
330 || !tar_sparse_init (&file))
331 return dump_status_not_implemented;
332
333 rc = sparse_scan_file (&file);
334 if (rc && file.optab->dump_region)
335 {
336 tar_sparse_dump_header (&file);
337
338 if (fd >= 0)
339 {
340 size_t i;
341
342 for (i = 0; rc && i < file.stat_info->sparse_map_avail; i++)
343 rc = tar_sparse_dump_region (&file, i);
344 }
345 }
346
347 pad_archive(file.stat_info->archive_file_size - file.dumped_size);
348 return (tar_sparse_done (&file) && rc) ? dump_status_ok : dump_status_short;
349 }
350
351 /* Returns true if the file represented by stat is a sparse one */
352 bool
353 sparse_file_p (struct tar_stat_info *stat)
354 {
355 return (ST_NBLOCKS (stat->stat)
356 < (stat->stat.st_size / ST_NBLOCKSIZE
357 + (stat->stat.st_size % ST_NBLOCKSIZE != 0)));
358 }
359
360 enum dump_status
361 sparse_extract_file (int fd, struct tar_stat_info *stat, off_t *size)
362 {
363 bool rc = true;
364 struct tar_sparse_file file;
365 size_t i;
366
367 file.stat_info = stat;
368 file.fd = fd;
369
370 if (!sparse_select_optab (&file)
371 || !tar_sparse_init (&file))
372 return dump_status_not_implemented;
373
374 rc = tar_sparse_decode_header (&file);
375 for (i = 0; rc && i < file.stat_info->sparse_map_avail; i++)
376 rc = tar_sparse_extract_region (&file, i);
377 *size = file.stat_info->archive_file_size - file.dumped_size;
378 return (tar_sparse_done (&file) && rc) ? dump_status_ok : dump_status_short;
379 }
380
381 \f
382 static char diff_buffer[BLOCKSIZE];
383
384 static bool
385 check_sparse_region (struct tar_sparse_file *file, off_t beg, off_t end)
386 {
387 if (!lseek_or_error (file, beg, SEEK_SET))
388 return false;
389
390 while (beg < end)
391 {
392 size_t bytes_read;
393 size_t rdsize = end - beg;
394
395 if (rdsize > BLOCKSIZE)
396 rdsize = BLOCKSIZE;
397 clear_block (diff_buffer);
398 bytes_read = safe_read (file->fd, diff_buffer, rdsize);
399 if (bytes_read < 0)
400 {
401 read_diag_details (file->stat_info->orig_file_name,
402 beg,
403 rdsize);
404 return false;
405 }
406 if (!zero_block_p (diff_buffer, bytes_read))
407 {
408 report_difference (file->stat_info,
409 _("File fragment at %lu is not a hole"), beg);
410 return false;
411 }
412
413 beg += bytes_read;
414 }
415 return true;
416 }
417
418 static bool
419 check_data_region (struct tar_sparse_file *file, size_t index)
420 {
421 size_t size_left;
422
423 if (!lseek_or_error (file, file->stat_info->sparse_map[index].offset,
424 SEEK_SET))
425 return false;
426 size_left = file->stat_info->sparse_map[index].numbytes;
427 while (size_left > 0)
428 {
429 size_t bytes_read;
430 size_t rdsize = (size_left > BLOCKSIZE) ? BLOCKSIZE : size_left;
431
432 union block *blk = find_next_block ();
433 if (!blk)
434 {
435 ERROR ((0, 0, _("Unexpected EOF in archive")));
436 return false;
437 }
438 set_next_block_after (blk);
439 bytes_read = safe_read (file->fd, diff_buffer, rdsize);
440 if (bytes_read < 0)
441 {
442 read_diag_details (file->stat_info->orig_file_name,
443 file->stat_info->sparse_map[index].offset
444 + file->stat_info->sparse_map[index].numbytes
445 - size_left,
446 rdsize);
447 return false;
448 }
449 file->dumped_size += bytes_read;
450 size_left -= bytes_read;
451 if (memcmp (blk->buffer, diff_buffer, rdsize))
452 {
453 report_difference (file->stat_info, _("Contents differ"));
454 return false;
455 }
456 }
457 return true;
458 }
459
460 bool
461 sparse_diff_file (int fd, struct tar_stat_info *stat)
462 {
463 bool rc = true;
464 struct tar_sparse_file file;
465 size_t i;
466 off_t offset = 0;
467
468 file.stat_info = stat;
469 file.fd = fd;
470
471 if (!sparse_select_optab (&file)
472 || !tar_sparse_init (&file))
473 return dump_status_not_implemented;
474
475 rc = tar_sparse_decode_header (&file);
476 for (i = 0; rc && i < file.stat_info->sparse_map_avail; i++)
477 {
478 rc = check_sparse_region (&file,
479 offset, file.stat_info->sparse_map[i].offset)
480 && check_data_region (&file, i);
481 offset = file.stat_info->sparse_map[i].offset
482 + file.stat_info->sparse_map[i].numbytes;
483 }
484
485 if (rc)
486 skip_file (file.stat_info->archive_file_size - file.dumped_size);
487
488 tar_sparse_done (&file);
489 return rc;
490 }
491
492 \f
493 /* Old GNU Format. The sparse file information is stored in the
494 oldgnu_header in the following manner:
495
496 The header is marked with type 'S'. Its `size' field contains
497 the cumulative size of all non-empty blocks of the file. The
498 actual file size is stored in `realsize' member of oldgnu_header.
499
500 The map of the file is stored in a list of `struct sparse'.
501 Each struct contains offset to the block of data and its
502 size (both as octal numbers). The first file header contains
503 at most 4 such structs (SPARSES_IN_OLDGNU_HEADER). If the map
504 contains more structs, then the field `isextended' of the main
505 header is set to 1 (binary) and the `struct sparse_header'
506 header follows, containing at most 21 following structs
507 (SPARSES_IN_SPARSE_HEADER). If more structs follow, `isextended'
508 field of the extended header is set and next next extension header
509 follows, etc... */
510
511 enum oldgnu_add_status
512 {
513 add_ok,
514 add_finish,
515 add_fail
516 };
517
518 /* Add a sparse item to the sparse file and its obstack */
519 static enum oldgnu_add_status
520 oldgnu_add_sparse (struct tar_sparse_file *file, struct sparse *s)
521 {
522 struct sp_array sp;
523
524 if (s->numbytes[0] == '\0')
525 return add_finish;
526 sp.offset = OFF_FROM_HEADER (s->offset);
527 sp.numbytes = SIZE_FROM_HEADER (s->numbytes);
528 if (sp.offset < 0
529 || file->stat_info->stat.st_size < sp.offset + sp.numbytes
530 || file->stat_info->archive_file_size < 0)
531 return add_fail;
532
533 sparse_add_map (file, &sp);
534 return add_ok;
535 }
536
537 /* Convert old GNU format sparse data to internal representation
538 FIXME: Clubbers current_header! */
539 static bool
540 oldgnu_get_sparse_info (struct tar_sparse_file *file)
541 {
542 size_t i;
543 union block *h = current_header;
544 int ext_p;
545 static enum oldgnu_add_status rc;
546
547 /* FIXME: note this! st_size was initialized from the header
548 which actually contains archived size. The following fixes it */
549 file->stat_info->archive_file_size = file->stat_info->stat.st_size;
550 file->stat_info->stat.st_size =
551 OFF_FROM_HEADER (current_header->oldgnu_header.realsize);
552
553 file->stat_info->sparse_map_size = 0;
554 for (i = 0; i < SPARSES_IN_OLDGNU_HEADER; i++)
555 {
556 rc = oldgnu_add_sparse (file, &h->oldgnu_header.sp[i]);
557 if (rc != add_ok)
558 break;
559 }
560
561 for (ext_p = h->oldgnu_header.isextended;
562 rc == add_ok && ext_p; ext_p = h->sparse_header.isextended)
563 {
564 h = find_next_block ();
565 if (!h)
566 {
567 ERROR ((0, 0, _("Unexpected EOF in archive")));
568 return false;
569 }
570 set_next_block_after (h);
571 for (i = 0; i < SPARSES_IN_SPARSE_HEADER && rc == add_ok; i++)
572 rc = oldgnu_add_sparse (file, &h->sparse_header.sp[i]);
573 }
574
575 if (rc == add_fail)
576 {
577 ERROR ((0, 0, _("%s: invalid sparse archive member"),
578 file->stat_info->orig_file_name));
579 return false;
580 }
581 return true;
582 }
583
584 static void
585 oldgnu_store_sparse_info (struct tar_sparse_file *file, size_t *pindex,
586 struct sparse *sp, size_t sparse_size)
587 {
588 for (; *pindex < file->stat_info->sparse_map_avail
589 && sparse_size > 0; sparse_size--, sp++, ++*pindex)
590 {
591 OFF_TO_CHARS (file->stat_info->sparse_map[*pindex].offset,
592 sp->offset);
593 SIZE_TO_CHARS (file->stat_info->sparse_map[*pindex].numbytes,
594 sp->numbytes);
595 }
596 }
597
598 static bool
599 oldgnu_dump_header (struct tar_sparse_file *file)
600 {
601 off_t block_ordinal = current_block_ordinal ();
602 union block *blk;
603 size_t i;
604
605 blk = start_header (file->stat_info);
606 blk->header.typeflag = GNUTYPE_SPARSE;
607 if (file->stat_info->sparse_map_avail > SPARSES_IN_OLDGNU_HEADER)
608 blk->oldgnu_header.isextended = 1;
609
610 /* Store the real file size */
611 OFF_TO_CHARS (file->stat_info->stat.st_size, blk->oldgnu_header.realsize);
612 /* Store the effective (shrunken) file size */
613 OFF_TO_CHARS (file->stat_info->archive_file_size, blk->header.size);
614
615 i = 0;
616 oldgnu_store_sparse_info (file, &i,
617 blk->oldgnu_header.sp,
618 SPARSES_IN_OLDGNU_HEADER);
619 blk->oldgnu_header.isextended = i < file->stat_info->sparse_map_avail;
620 finish_header (file->stat_info, blk, block_ordinal);
621
622 while (i < file->stat_info->sparse_map_avail)
623 {
624 blk = find_next_block ();
625 memset (blk->buffer, 0, BLOCKSIZE);
626 oldgnu_store_sparse_info (file, &i,
627 blk->sparse_header.sp,
628 SPARSES_IN_SPARSE_HEADER);
629 set_next_block_after (blk);
630 if (i < file->stat_info->sparse_map_avail)
631 blk->sparse_header.isextended = 1;
632 else
633 break;
634 }
635 return true;
636 }
637
638 static struct tar_sparse_optab oldgnu_optab = {
639 NULL, /* No init function */
640 NULL, /* No done function */
641 oldgnu_dump_header,
642 oldgnu_get_sparse_info,
643 NULL, /* No scan_block function */
644 sparse_dump_region,
645 sparse_extract_region,
646 };
647
648 \f
649 /* Star */
650
651 /* Convert STAR format sparse data to internal representation
652 FIXME: Clubbers current_header! */
653 static bool
654 star_get_sparse_info (struct tar_sparse_file *file)
655 {
656 size_t i;
657 union block *h = current_header;
658 int ext_p;
659 static enum oldgnu_add_status rc;
660
661 /* FIXME: note this! st_size was initialized from the header
662 which actually contains archived size. The following fixes it */
663 file->stat_info->archive_file_size = file->stat_info->stat.st_size;
664 file->stat_info->stat.st_size =
665 OFF_FROM_HEADER (current_header->star_in_header.realsize);
666
667 file->stat_info->sparse_map_size = 0;
668
669 if (h->star_in_header.prefix[0] == '\0'
670 && h->star_in_header.sp[0].offset[10] != '\0')
671 {
672 /* Old star format */
673 for (i = 0; i < SPARSES_IN_STAR_HEADER; i++)
674 {
675 rc = oldgnu_add_sparse (file, &h->star_in_header.sp[i]);
676 if (rc != add_ok)
677 break;
678 }
679 ext_p = h->star_in_header.isextended;
680 }
681 else
682 ext_p = 1;
683
684 for (; rc == add_ok && ext_p; ext_p = h->star_ext_header.isextended)
685 {
686 h = find_next_block ();
687 if (!h)
688 {
689 ERROR ((0, 0, _("Unexpected EOF in archive")));
690 return false;
691 }
692 set_next_block_after (h);
693 for (i = 0; i < SPARSES_IN_STAR_EXT_HEADER && rc == add_ok; i++)
694 rc = oldgnu_add_sparse (file, &h->star_ext_header.sp[i]);
695 }
696
697 if (rc == add_fail)
698 {
699 ERROR ((0, 0, _("%s: invalid sparse archive member"),
700 file->stat_info->orig_file_name));
701 return false;
702 }
703 return true;
704 }
705
706
707 static struct tar_sparse_optab star_optab = {
708 NULL, /* No init function */
709 NULL, /* No done function */
710 NULL,
711 star_get_sparse_info,
712 NULL, /* No scan_block function */
713 NULL, /* No dump region function */
714 sparse_extract_region,
715 };
This page took 0.063779 seconds and 5 git commands to generate.