]> Dogcows Code - chaz/tar/blob - src/sparse.c
(sparse_diff_file): New function
[chaz/tar] / src / sparse.c
1 /* Functions for dealing with sparse files
2
3 Copyright (C) 2003 Free Software Foundation, Inc.
4
5 This program is free software; you can redistribute it and/or modify it
6 under the terms of the GNU General Public License as published by the
7 Free Software Foundation; either version 2, or (at your option) any later
8 version.
9
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
13 Public License for more details.
14
15 You should have received a copy of the GNU General Public License along
16 with this program; if not, write to the Free Software Foundation, Inc.,
17 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18
19 #include "system.h"
20 #include <quotearg.h>
21 #include "common.h"
22
23 struct tar_sparse_file;
24
25 enum sparse_scan_state
26 {
27 scan_begin,
28 scan_block,
29 scan_end
30 };
31
32 struct tar_sparse_optab
33 {
34 bool (*init) (struct tar_sparse_file *);
35 bool (*done) (struct tar_sparse_file *);
36 bool (*dump_header) (struct tar_sparse_file *);
37 bool (*decode_header) (struct tar_sparse_file *);
38 bool (*scan_block) (struct tar_sparse_file *, enum sparse_scan_state,
39 void *);
40 bool (*dump_region) (struct tar_sparse_file *, size_t index);
41 bool (*extract_region) (struct tar_sparse_file *, size_t index);
42 };
43
44 struct tar_sparse_file
45 {
46 int fd; /* File descriptor */
47 size_t dumped_size; /* Number of bytes actually written
48 to the archive */
49 struct tar_stat_info *stat_info; /* Information about the file */
50 struct tar_sparse_optab *optab;
51 void *closure; /* Any additional data optab calls might
52 reqiure */
53 };
54
55 static bool
56 tar_sparse_init (struct tar_sparse_file *file)
57 {
58 file->dumped_size = 0;
59 if (file->optab->init)
60 return file->optab->init (file);
61 return true;
62 }
63
64 static bool
65 tar_sparse_done (struct tar_sparse_file *file)
66 {
67 if (file->optab->done)
68 return file->optab->done (file);
69 return true;
70 }
71
72 static bool
73 tar_sparse_scan (struct tar_sparse_file *file, enum sparse_scan_state state,
74 void *block)
75 {
76 if (file->optab->scan_block)
77 return file->optab->scan_block (file, state, block);
78 return true;
79 }
80
81 static bool
82 tar_sparse_dump_region (struct tar_sparse_file *file, size_t index)
83 {
84 if (file->optab->dump_region)
85 return file->optab->dump_region (file, index);
86 return false;
87 }
88
89 static bool
90 tar_sparse_extract_region (struct tar_sparse_file *file, size_t index)
91 {
92 if (file->optab->extract_region)
93 return file->optab->extract_region (file, index);
94 return false;
95 }
96
97 static bool
98 tar_sparse_dump_header (struct tar_sparse_file *file)
99 {
100 if (file->optab->dump_header)
101 return file->optab->dump_header (file);
102 return false;
103 }
104
105 static bool
106 tar_sparse_decode_header (struct tar_sparse_file *file)
107 {
108 if (file->optab->decode_header)
109 return file->optab->decode_header (file);
110 return false;
111 }
112
113 \f
114 static bool
115 lseek_or_error (struct tar_sparse_file *file, off_t offset, int whence)
116 {
117 if (lseek (file->fd, offset, whence) < 0)
118 {
119 seek_diag_details (file->stat_info->orig_file_name, offset);
120 return false;
121 }
122 return true;
123 }
124
125 /* Takes a blockful of data and basically cruises through it to see if
126 it's made *entirely* of zeros, returning a 0 the instant it finds
127 something that is a nonzero, i.e., useful data. */
128 static bool
129 zero_block_p (char *buffer, size_t size)
130 {
131 while (size--)
132 if (*buffer++)
133 return false;
134 return true;
135 }
136
137 #define clear_block(p) memset (p, 0, BLOCKSIZE);
138
139 #define SPARSES_INIT_COUNT SPARSES_IN_SPARSE_HEADER
140
141 static void
142 sparse_add_map (struct tar_sparse_file *file, struct sp_array *sp)
143 {
144 if (file->stat_info->sparse_map == NULL)
145 {
146 file->stat_info->sparse_map =
147 xmalloc (SPARSES_INIT_COUNT * sizeof file->stat_info->sparse_map[0]);
148 file->stat_info->sparse_map_size = SPARSES_INIT_COUNT;
149 }
150 else if (file->stat_info->sparse_map_avail == file->stat_info->sparse_map_size)
151 {
152 file->stat_info->sparse_map_size *= 2;
153 file->stat_info->sparse_map =
154 xrealloc (file->stat_info->sparse_map,
155 file->stat_info->sparse_map_size
156 * sizeof file->stat_info->sparse_map[0]);
157 }
158 file->stat_info->sparse_map[file->stat_info->sparse_map_avail++] = *sp;
159 }
160
161 /* Scan the sparse file and create its map */
162 static bool
163 sparse_scan_file (struct tar_sparse_file *file)
164 {
165 static char buffer[BLOCKSIZE];
166 size_t count;
167 size_t offset = 0;
168 struct sp_array sp = {0, 0};
169
170 if (!lseek_or_error (file, 0, SEEK_SET))
171 return false;
172 clear_block (buffer);
173
174 file->stat_info->sparse_map_size = 0;
175 file->stat_info->archive_file_size = 0;
176
177 if (!tar_sparse_scan (file, scan_begin, NULL))
178 return false;
179
180 while ((count = safe_read (file->fd, buffer, sizeof buffer)) > 0)
181 {
182 /* Analize the block */
183 if (zero_block_p (buffer, count))
184 {
185 if (sp.numbytes)
186 {
187 sparse_add_map (file, &sp);
188 sp.numbytes = 0;
189 if (!tar_sparse_scan (file, scan_block, NULL))
190 return false;
191 }
192 }
193 else
194 {
195 if (sp.numbytes == 0)
196 sp.offset = offset;
197 sp.numbytes += count;
198 file->stat_info->archive_file_size += count;
199 if (!tar_sparse_scan (file, scan_block, buffer))
200 return false;
201 }
202
203 offset += count;
204 clear_block (buffer);
205 }
206
207 if (sp.numbytes == 0)
208 {
209 sp.offset = offset - 1;
210 sp.numbytes = 1;
211 }
212 sparse_add_map (file, &sp);
213 file->stat_info->archive_file_size += count;
214 return tar_sparse_scan (file, scan_end, NULL);
215 }
216
217 static struct tar_sparse_optab oldgnu_optab;
218
219 static bool
220 sparse_select_optab (struct tar_sparse_file *file)
221 {
222 switch (archive_format)
223 {
224 case V7_FORMAT:
225 case USTAR_FORMAT:
226 return false;
227
228 case OLDGNU_FORMAT:
229 case GNU_FORMAT: /*FIXME: This one should disappear? */
230 file->optab = &oldgnu_optab;
231 break;
232
233 case POSIX_FORMAT:
234 case STAR_FORMAT:
235 /* FIXME: Add methods */
236 return false;
237
238 default:
239 break;
240 }
241 return true;
242 }
243
244 static bool
245 sparse_dump_region (struct tar_sparse_file *file, size_t index)
246 {
247 union block *blk;
248 off_t bytes_left = file->stat_info->sparse_map[index].numbytes;
249
250 if (!lseek_or_error (file, file->stat_info->sparse_map[index].offset,
251 SEEK_SET))
252 return false;
253
254 do
255 {
256 size_t bufsize = (bytes_left > BLOCKSIZE) ? BLOCKSIZE : bytes_left;
257 off_t bytes_read;
258
259 blk = find_next_block ();
260 memset (blk->buffer, 0, BLOCKSIZE);
261 bytes_read = safe_read (file->fd, blk->buffer, bufsize);
262 if (bytes_read < 0)
263 {
264 read_diag_details (file->stat_info->orig_file_name,
265 file->stat_info->sparse_map[index].offset
266 + file->stat_info->sparse_map[index].numbytes
267 - bytes_left,
268 bufsize);
269 return false;
270 }
271
272 bytes_left -= bytes_read;
273 file->dumped_size += bytes_read;
274 set_next_block_after (blk);
275 }
276 while (bytes_left > 0);
277 return true;
278 }
279
280 static bool
281 sparse_extract_region (struct tar_sparse_file *file, size_t index)
282 {
283 size_t write_size;
284
285 if (!lseek_or_error (file, file->stat_info->sparse_map[index].offset,
286 SEEK_SET))
287 return false;
288 write_size = file->stat_info->sparse_map[index].numbytes;
289 while (write_size > 0)
290 {
291 size_t count;
292 size_t wrbytes = (write_size > BLOCKSIZE) ? BLOCKSIZE : write_size;
293 union block *blk = find_next_block ();
294 if (!blk)
295 {
296 ERROR ((0, 0, _("Unexpected EOF in archive")));
297 return false;
298 }
299 set_next_block_after (blk);
300 count = full_write (file->fd, blk->buffer, wrbytes);
301 write_size -= count;
302 file->dumped_size += count;
303 if (count != wrbytes)
304 {
305 write_error_details (file->stat_info->orig_file_name,
306 count, wrbytes);
307 return false;
308 }
309 }
310 return true;
311 }
312
313 \f
314
315 /* Interface functions */
316 enum dump_status
317 sparse_dump_file (int fd, struct tar_stat_info *stat)
318 {
319 bool rc;
320 struct tar_sparse_file file;
321
322 file.stat_info = stat;
323 file.fd = fd;
324
325 if (!sparse_select_optab (&file)
326 || !tar_sparse_init (&file))
327 return dump_status_not_implemented;
328
329 rc = sparse_scan_file (&file);
330 if (rc && file.optab->dump_region)
331 {
332 tar_sparse_dump_header (&file);
333
334 if (fd >= 0)
335 {
336 size_t i;
337
338 for (i = 0; rc && i < file.stat_info->sparse_map_avail; i++)
339 rc = tar_sparse_dump_region (&file, i);
340 }
341 }
342
343 pad_archive(file.stat_info->archive_file_size - file.dumped_size);
344 return (tar_sparse_done (&file) && rc) ? dump_status_ok : dump_status_short;
345 }
346
347 /* Returns true if the file represented by stat is a sparse one */
348 bool
349 sparse_file_p (struct tar_stat_info *stat)
350 {
351 return (ST_NBLOCKS (stat->stat)
352 < (stat->stat.st_size / ST_NBLOCKSIZE
353 + (stat->stat.st_size % ST_NBLOCKSIZE != 0)));
354 }
355
356 enum dump_status
357 sparse_extract_file (int fd, struct tar_stat_info *stat, off_t *size)
358 {
359 bool rc = true;
360 struct tar_sparse_file file;
361 size_t i;
362
363 file.stat_info = stat;
364 file.fd = fd;
365
366 if (!sparse_select_optab (&file)
367 || !tar_sparse_init (&file))
368 return dump_status_not_implemented;
369
370 rc = tar_sparse_decode_header (&file);
371 for (i = 0; rc && i < file.stat_info->sparse_map_avail; i++)
372 rc = tar_sparse_extract_region (&file, i);
373 *size = file.stat_info->archive_file_size - file.dumped_size;
374 return (tar_sparse_done (&file) && rc) ? dump_status_ok : dump_status_short;
375 }
376
377 \f
378 static char diff_buffer[BLOCKSIZE];
379
380 static bool
381 check_sparse_region (struct tar_sparse_file *file, off_t beg, off_t end)
382 {
383 if (!lseek_or_error (file, beg, SEEK_SET))
384 return false;
385
386 while (beg < end)
387 {
388 size_t bytes_read;
389 size_t rdsize = end - beg;
390
391 if (rdsize > BLOCKSIZE)
392 rdsize = BLOCKSIZE;
393 clear_block (diff_buffer);
394 bytes_read = safe_read (file->fd, diff_buffer, rdsize);
395 if (bytes_read < 0)
396 {
397 read_diag_details (file->stat_info->orig_file_name,
398 beg,
399 rdsize);
400 return false;
401 }
402 if (!zero_block_p (diff_buffer, bytes_read))
403 {
404 report_difference (file->stat_info,
405 _("File fragment at %lu is not a hole"), beg);
406 return false;
407 }
408
409 beg += bytes_read;
410 }
411 return true;
412 }
413
414 static bool
415 check_data_region (struct tar_sparse_file *file, size_t index)
416 {
417 size_t size_left;
418
419 if (!lseek_or_error (file, file->stat_info->sparse_map[index].offset,
420 SEEK_SET))
421 return false;
422 size_left = file->stat_info->sparse_map[index].numbytes;
423 while (size_left > 0)
424 {
425 size_t bytes_read;
426 size_t rdsize = (size_left > BLOCKSIZE) ? BLOCKSIZE : size_left;
427
428 union block *blk = find_next_block ();
429 if (!blk)
430 {
431 ERROR ((0, 0, _("Unexpected EOF in archive")));
432 return false;
433 }
434 set_next_block_after (blk);
435 bytes_read = safe_read (file->fd, diff_buffer, rdsize);
436 if (bytes_read < 0)
437 {
438 read_diag_details (file->stat_info->orig_file_name,
439 file->stat_info->sparse_map[index].offset
440 + file->stat_info->sparse_map[index].numbytes
441 - size_left,
442 rdsize);
443 return false;
444 }
445 file->dumped_size += bytes_read;
446 size_left -= bytes_read;
447 if (memcmp (blk->buffer, diff_buffer, rdsize))
448 {
449 report_difference (file->stat_info, _("Contents differ"));
450 return false;
451 }
452 }
453 return true;
454 }
455
456 bool
457 sparse_diff_file (int fd, struct tar_stat_info *stat)
458 {
459 bool rc = true;
460 struct tar_sparse_file file;
461 size_t i;
462 off_t offset = 0;
463
464 file.stat_info = stat;
465 file.fd = fd;
466
467 if (!sparse_select_optab (&file)
468 || !tar_sparse_init (&file))
469 return dump_status_not_implemented;
470
471 rc = tar_sparse_decode_header (&file);
472 for (i = 0; rc && i < file.stat_info->sparse_map_avail; i++)
473 {
474 rc = check_sparse_region (&file,
475 offset, file.stat_info->sparse_map[i].offset)
476 && check_data_region (&file, i);
477 offset = file.stat_info->sparse_map[i].offset
478 + file.stat_info->sparse_map[i].numbytes;
479 }
480
481 if (rc)
482 skip_file (file.stat_info->archive_file_size - file.dumped_size);
483
484 tar_sparse_done (&file);
485 return rc;
486 }
487
488 \f
489 /* Old GNU Format. The sparse file information is stored in the
490 oldgnu_header in the following manner:
491
492 The header is marked with type 'S'. Its `size' field contains
493 the cumulative size of all non-empty blocks of the file. The
494 actual file size is stored in `realsize' member of oldgnu_header.
495
496 The map of the file is stored in a list of `struct sparse'.
497 Each struct contains offset to the block of data and its
498 size (both as octal numbers). The first file header contains
499 at most 4 such structs (SPARSES_IN_OLDGNU_HEADER). If the map
500 contains more structs, then the field `isextended' of the main
501 header is set to 1 (binary) and the `struct sparse_header'
502 header follows, containing at most 21 following structs
503 (SPARSES_IN_SPARSE_HEADER). If more structs follow, `isextended'
504 field of the extended header is set and next next extension header
505 follows, etc... */
506
507 enum oldgnu_add_status
508 {
509 add_ok,
510 add_finish,
511 add_fail
512 };
513
514 /* Add a sparse item to the sparse file and its obstack */
515 static enum oldgnu_add_status
516 oldgnu_add_sparse (struct tar_sparse_file *file, struct sparse *s)
517 {
518 struct sp_array sp;
519
520 if (s->numbytes[0] == '\0')
521 return add_finish;
522 sp.offset = OFF_FROM_HEADER (s->offset);
523 sp.numbytes = SIZE_FROM_HEADER (s->numbytes);
524 if (sp.offset < 0
525 || file->stat_info->stat.st_size < sp.offset + sp.numbytes
526 || file->stat_info->archive_file_size < 0)
527 return add_fail;
528
529 sparse_add_map (file, &sp);
530 return add_ok;
531 }
532
533 /* Convert old GNU format sparse data to internal representation
534 FIXME: Clubbers current_header! */
535 static bool
536 oldgnu_get_sparse_info (struct tar_sparse_file *file)
537 {
538 size_t i;
539 union block *h = current_header;
540 int ext_p;
541 static enum oldgnu_add_status rc;
542
543 /* FIXME: note this! st_size was initialized from the header
544 which actually contains archived size. The following fixes it */
545 file->stat_info->archive_file_size = file->stat_info->stat.st_size;
546 file->stat_info->stat.st_size =
547 OFF_FROM_HEADER (current_header->oldgnu_header.realsize);
548
549 file->stat_info->sparse_map_size = 0;
550 for (i = 0; i < SPARSES_IN_OLDGNU_HEADER; i++)
551 {
552 rc = oldgnu_add_sparse (file, &h->oldgnu_header.sp[i]);
553 if (rc != add_ok)
554 break;
555 }
556
557 for (ext_p = h->oldgnu_header.isextended;
558 rc == add_ok && ext_p; ext_p = h->sparse_header.isextended)
559 {
560 h = find_next_block ();
561 if (!h)
562 {
563 ERROR ((0, 0, _("Unexpected EOF in archive")));
564 return false;
565 }
566 set_next_block_after (h);
567 for (i = 0; i < SPARSES_IN_SPARSE_HEADER && rc == add_ok; i++)
568 rc = oldgnu_add_sparse (file, &h->sparse_header.sp[i]);
569 }
570
571 if (rc == add_fail)
572 {
573 ERROR ((0, 0, _("%s: invalid sparse archive member"),
574 file->stat_info->orig_file_name));
575 return false;
576 }
577 return true;
578 }
579
580 static void
581 oldgnu_store_sparse_info (struct tar_sparse_file *file, size_t *pindex,
582 struct sparse *sp, size_t sparse_size)
583 {
584 for (; *pindex < file->stat_info->sparse_map_avail
585 && sparse_size > 0; sparse_size--, sp++, ++*pindex)
586 {
587 OFF_TO_CHARS (file->stat_info->sparse_map[*pindex].offset,
588 sp->offset);
589 SIZE_TO_CHARS (file->stat_info->sparse_map[*pindex].numbytes,
590 sp->numbytes);
591 }
592 }
593
594 static bool
595 oldgnu_dump_header (struct tar_sparse_file *file)
596 {
597 off_t block_ordinal = current_block_ordinal ();
598 union block *blk;
599 size_t i;
600
601 blk = start_header (file->stat_info);
602 blk->header.typeflag = GNUTYPE_SPARSE;
603 if (file->stat_info->sparse_map_avail > SPARSES_IN_OLDGNU_HEADER)
604 blk->oldgnu_header.isextended = 1;
605
606 /* Store the real file size */
607 OFF_TO_CHARS (file->stat_info->stat.st_size, blk->oldgnu_header.realsize);
608 /* Store the effective (shrunken) file size */
609 OFF_TO_CHARS (file->stat_info->archive_file_size, blk->header.size);
610
611 i = 0;
612 oldgnu_store_sparse_info (file, &i,
613 blk->oldgnu_header.sp,
614 SPARSES_IN_OLDGNU_HEADER);
615 blk->oldgnu_header.isextended = i < file->stat_info->sparse_map_avail;
616 finish_header (file->stat_info, blk, block_ordinal);
617
618 while (i < file->stat_info->sparse_map_avail)
619 {
620 blk = find_next_block ();
621 memset (blk->buffer, 0, BLOCKSIZE);
622 oldgnu_store_sparse_info (file, &i,
623 blk->sparse_header.sp,
624 SPARSES_IN_SPARSE_HEADER);
625 set_next_block_after (blk);
626 if (i < file->stat_info->sparse_map_avail)
627 blk->sparse_header.isextended = 1;
628 else
629 break;
630 }
631 return true;
632 }
633
634 static struct tar_sparse_optab oldgnu_optab = {
635 NULL, /* No init function */
636 NULL, /* No done function */
637 oldgnu_dump_header,
638 oldgnu_get_sparse_info,
639 NULL, /* No scan_block function */
640 sparse_dump_region,
641 sparse_extract_region,
642 };
This page took 0.067981 seconds and 5 git commands to generate.