]> Dogcows Code - chaz/tar/blob - scripts/tar-snapshot-edit
56c24a1a3008e7bee25927da179220f0c90b5749
[chaz/tar] / scripts / tar-snapshot-edit
1 #! /usr/bin/perl -w
2 # Display and edit the 'dev' field in tar's snapshots
3 # Copyright 2007, 2011, 2013 Free Software Foundation, Inc.
4
5 # This file is part of GNU tar.
6
7 # GNU tar is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 3 of the License, or
10 # (at your option) any later version.
11
12 # GNU tar is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
16
17 # You should have received a copy of the GNU General Public License
18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
20
21 # tar-snapshot-edit
22 #
23 # This script is capable of replacing values in the 'dev' field of an
24 # incremental backup 'snapshot' file. This is useful when the device
25 # used to store files in a tar archive changes, without the files
26 # themselves changing. This may happen when, for example, a device
27 # driver changes major or minor numbers.
28 #
29 # It can also run a check on all the field values found in the
30 # snapshot file, printing out a detailed message when it finds values
31 # that would cause an "Unexpected field value in snapshot file",
32 # "Numerical result out of range", or "Invalid argument" error
33 # if tar were run using that snapshot file as input. (See the
34 # comments included in the definition of the check_field_values
35 # routine for more detailed information regarding these checks.)
36 #
37 #
38 #
39 # Author: Dustin J. Mitchell <dustin@zmanda.com>
40 #
41 # Modified Aug 25, 2011 by Nathan Stratton Treadway <nathanst AT ontko.com>:
42 # * update Perl syntax to work correctly with more recent versions of
43 # Perl. (The original code worked with in the v5.8 timeframe but
44 # not with Perl v5.10.1 and later.)
45 # * added a "-c" option to check the snapshot file for invalid field values.
46 # * handle NFS indicator character ("+") in version 0 and 1 files
47 # * preserve the original header/version line when editing version 1
48 # or 2 files.
49 # * tweak output formatting
50 #
51 # Modified March 13, 2013 by Nathan Stratton Treadway <nathanst AT ontko.com>:
52 # * configure field ranges used for -c option based on the system
53 # architecture (in response to the December 2012 update to GNU tar
54 # enabling support for systems with signed dev_t values).
55 # * when printing the list of device ids found in the snapshot file
56 # (when run in the default mode), print the raw device id values
57 # instead of the hex-string version in those cases where they
58 # can't be converted successfully.
59
60 use Getopt::Std;
61 use Config;
62
63 my %snapshot_field_ranges; # used in check_field_values function
64
65 ## reading
66
67 sub read_incr_db ($) {
68 my $filename = shift;
69 open(my $file, "<$filename") || die "Could not open '$filename' for reading";
70
71 my $header_str = <$file>;
72 my $file_version;
73 if ($header_str =~ /^GNU tar-[^-]*-([0-9]+)\n$/) {
74 $file_version = $1+0;
75 } else {
76 $file_version = 0;
77 }
78
79 print "\nFile: $filename\n";
80 print " Detected snapshot file version: $file_version\n\n";
81
82 if ($file_version == 0) {
83 return read_incr_db_0($file, $header_str);
84 } elsif ($file_version == 1) {
85 return read_incr_db_1($file, $header_str);
86 } elsif ($file_version == 2) {
87 return read_incr_db_2($file, $header_str);
88 } else {
89 die "Unrecognized snapshot version in header '$header_str'";
90 }
91 }
92
93 sub read_incr_db_0 ($$) {
94 my $file = shift;
95 my $header_str = shift;
96
97 my $hdr_timestamp_sec = $header_str;
98 chop $hdr_timestamp_sec;
99 my $hdr_timestamp_nsec = ''; # not present in file format 0
100
101 my $nfs;
102 my @dirs;
103
104 while (<$file>) {
105 /^(\+?)([0-9]*) ([0-9]*) (.*)\n$/ || die("Bad snapshot line $_");
106
107 if ( $1 eq "+" ) {
108 $nfs="1";
109 } else {
110 $nfs="0";
111 }
112 push @dirs, { nfs=>$nfs,
113 dev=>$2,
114 ino=>$3,
115 name=>$4 };
116 }
117
118 close($file);
119
120 # file version, timestamp, timestamp, dir list, file header line
121 return [ 0, $hdr_timestamp_sec, $hdr_timestamp_nsec, \@dirs, ""];
122 }
123
124 sub read_incr_db_1 ($$) {
125 my $file = shift;
126 my $header_str = shift;
127
128
129 my $timestamp = <$file>; # "sec nsec"
130 my ($hdr_timestamp_sec, $hdr_timestamp_nsec) = ($timestamp =~ /([0-9]*) ([0-9]*)/);
131
132 my $nfs;
133 my @dirs;
134
135 while (<$file>) {
136 /^(\+?)([0-9]*) ([0-9]*) ([0-9]*) ([0-9]*) (.*)\n$/ || die("Bad snapshot line $_");
137
138 if ( $1 eq "+" ) {
139 $nfs="1";
140 } else {
141 $nfs="0";
142 }
143
144 push @dirs, { nfs=>$nfs,
145 timestamp_sec=>$2,
146 timestamp_nsec=>$3,
147 dev=>$4,
148 ino=>$5,
149 name=>$6 };
150 }
151
152 close($file);
153
154 # file version, timestamp, timestamp, dir list, file header line
155 return [ 1, $hdr_timestamp_sec, $hdr_timestamp_nsec, \@dirs, $header_str ];
156 }
157
158 sub read_incr_db_2 ($$) {
159 my $file = shift;
160 my $header_str = shift;
161
162 $/="\0"; # $INPUT_RECORD_SEPARATOR
163 my $hdr_timestamp_sec = <$file>;
164 chop $hdr_timestamp_sec;
165 my $hdr_timestamp_nsec = <$file>;
166 chop $hdr_timestamp_nsec;
167 my @dirs;
168
169 while (1) {
170 last if eof($file);
171
172 my $nfs = <$file>;
173 my $timestamp_sec = <$file>;
174 my $timestamp_nsec = <$file>;
175 my $dev = <$file>;
176 my $ino = <$file>;
177 my $name = <$file>;
178
179 # get rid of trailing NULs
180 chop $nfs;
181 chop $timestamp_sec;
182 chop $timestamp_nsec;
183 chop $dev;
184 chop $ino;
185 chop $name;
186
187 my @dirents;
188 while (my $dirent = <$file>) {
189 chop $dirent;
190 push @dirents, $dirent;
191 last if ($dirent eq "");
192 }
193 die "missing terminator" unless (<$file> eq "\0");
194
195 push @dirs, { nfs=>$nfs,
196 timestamp_sec=>$timestamp_sec,
197 timestamp_nsec=>$timestamp_nsec,
198 dev=>$dev,
199 ino=>$ino,
200 name=>$name,
201 dirents=>\@dirents };
202 }
203
204 close($file);
205 $/ = "\n"; # reset to normal
206
207 # file version, timestamp, timestamp, dir list, file header line
208 return [ 2, $hdr_timestamp_sec, $hdr_timestamp_nsec, \@dirs, $header_str];
209 }
210
211 ## display
212
213 sub show_device_counts ($) {
214 my $info = shift;
215 my %devices;
216 foreach my $dir (@{$info->[3]}) {
217 my $dev = $dir->{'dev'};
218 $devices{$dev}++;
219 }
220
221 my $devstr;
222 foreach $dev (sort {$a <=> $b} keys %devices) {
223 $devstr = sprintf ("0x%04x", $dev);
224 if ( $dev > 0xffffffff or $dev < 0 or hex($devstr) != $dev ) {
225 # sprintf "%x" will not return a useful value for device ids
226 # that are negative or which overflow the integer size on this
227 # instance of Perl, so we convert the hex string back to a
228 # number, and if it doesn't (numerically) equal the original
229 # device id value, we know the hex conversion hasn't worked.
230 #
231 # Unfortunately, since we're running in "-w" mode, Perl will
232 # also print a warning message if the hex() routine is called
233 # on anything larger than "0xffffffff", even in 64-bit Perl
234 # where such values are actually supported... so we have to
235 # avoid calling hex() at all if the device id is too large or
236 # negative. (If it's negative, the conversion to an unsigned
237 # integer for the "%x" specifier will mean the result will
238 # always trigger hex()'s warning on a 64-bit machine.)
239 #
240 # These situations don't seem to occur very often, so for now
241 # when they do occur, we simply print the original text value
242 # that was read from the snapshot file; it will look a bit
243 # funny next to the values that do print in hex, but that's
244 # preferable to printing values that aren't actually correct.
245 $devstr = $dev;
246 }
247 printf " Device %s occurs $devices{$dev} times.\n", $devstr;
248 }
249 }
250
251 ## check field values
252
253 # initializes the global %snapshot_field_ranges hash, based on the "-a"
254 # command-line option if given, otherwise based on the "archname" of
255 # the current system.
256 #
257 # Each value in the hash is a two-element array containing the minimum
258 # and maximum allowed values, respectively, for that field in the snapshot
259 # file. GNU tar's allowed values for each architecture are determined
260 # in the incremen.c source file, where the TYPE_MIN and TYPE_MAX
261 # pre-processor expressions are used to determine the range that can be
262 # expressed by the C data type used for each field; the values in the
263 # array defined below should match those calculations.
264
265 sub choose_architecture ($) {
266 my $opt_a = shift;
267
268 my $arch = $opt_a ? $opt_a : $Config{'archname'};
269
270 # These ranges apply to Linux 2.4/2.6 on iX86 systems, but are used
271 # by default on unrecognized/unsupported systems, too.
272 %iX86_linux_field_ranges = (
273 timestamp_sec => [ -2147483648, 2147483647 ], # min/max of time_t
274 timestamp_nsec => [ 0, 999999999 ], # 0 to BILLION-1
275 nfs => [ 0, 1 ],
276 dev => [ 0, 18446744073709551615 ], # min/max of dev_t
277 ino => [ 0, 4294967295 ], # min/max of ino_t
278 );
279
280
281 if ( $arch =~ m/^i[\dxX]86-linux/i ) {
282 %snapshot_field_ranges = %iX86_linux_field_ranges;
283 print "Checking snapshot field values using \"iX86-linux\" ranges.\n\n";
284 } elsif ( $arch =~ m/^x86_64-linux/i ) {
285 %snapshot_field_ranges = (
286 timestamp_sec => [ -9223372036854775808, 9223372036854775807 ],
287 timestamp_nsec => [ 0, 999999999 ],
288 nfs => [ 0, 1 ],
289 dev => [ 0, 18446744073709551615 ],
290 ino => [ 0, 18446744073709551615 ],
291 );
292 print "Checking snapshot field values using \"x86_64-linux\" ranges.\n\n";
293 } elsif ( $arch =~ m/^IA64.ARCHREV_0/i ) {
294 # HP/UX running on Itanium/ia64 architecture
295 %snapshot_field_ranges = (
296 timestamp_sec => [ -2147483648, 2147483647 ],
297 timestamp_nsec => [ 0, 999999999 ],
298 nfs => [ 0, 1 ],
299 dev => [ -2147483648, 2147483647 ],
300 ino => [ 0, 4294967295 ],
301 );
302 print "Checking snapshot field values using \"IA64.ARCHREV_0\" (HP/UX) ranges.\n\n";
303 } else {
304 %snapshot_field_ranges = %iX86_linux_field_ranges;
305 print "Unrecognized architecture \"$arch\"; defaulting to \"iX86-linux\".\n";
306 print "(Use -a option to override.)\n" unless $opt_a;
307 print "\n";
308 }
309
310 if ( ref(1) ne "" ) {
311 print "(\"bignum\" mode is in effect; skipping 64-bit-integer check.)\n\n"
312 } else {
313 # find the largest max value in the current set of ranges
314 my $maxmax = 0;
315 for $v (values %snapshot_field_ranges ) {
316 $maxmax = $v->[1] if ($v->[1] > $maxmax);
317 }
318
319 # "~0" translates into a platform-native integer with all bits turned
320 # on -- that is, the largest value that can be represented as
321 # an integer. We print a warning if our $maxmax value is greater
322 # than that largest integer, since in that case Perl will switch
323 # to using floats for those large max values. The wording of
324 # the message assumes that the only way this situation can exist
325 # is that the platform uses 32-bit integers but some of the
326 # snapshot-file fields have 64-bit values.
327 if ( ~0 < $maxmax ) {
328 print <<EOF
329 Note: this version of Perl uses 32-bit integers, which means that it
330 will switch to using floating-point numbers when checking the ranges
331 for 64-bit snapshot-file fields. This normally will work fine, but
332 might fail to detect cases where the value in the input field value is
333 only slightly out of range. (For example, a "9223372036854775808"
334 might not be recognized as being larger than 9223372036854775807.)
335 If you suspect you are experiencing this problem, you can try running
336 the program using the "-Mbignum" option, as in
337 \$ perl $0 -Mbignum -c [FILES]
338 (but doing so will make the program run *much* slower).
339
340 EOF
341 }
342 }
343
344
345 }
346
347 # returns a warning message if $field_value isn't a valid string
348 # representation of an integer, or if the resulting integer is out of range
349 # defined by the two-element array retrieved using up the $field_name key in
350 # the global %snapshot_field_ranges hash.
351 sub validate_integer_field ($$) {
352 my $field_value = shift;
353 my $field_name = shift;
354
355 my ($min, $max) = @{$snapshot_field_ranges{$field_name}};
356
357 my $msg = "";
358
359 if ( not $field_value =~ /^-?\d+$/ ) {
360 $msg = " $field_name value contains invalid characters: \"$field_value\"\n";
361 } else {
362 if ( $field_value < $min ) {
363 $msg = " $field_name value too low: \"$field_value\" < $min \n";
364 } elsif ( $field_value > $max ) {
365 $msg = " $field_name value too high: \"$field_value\" > $max \n";
366 }
367 }
368 return $msg;
369 }
370
371
372 # This routine loops through each directory entry in the $info data
373 # structure and prints a warning message if tar would abort with an
374 # "Unexpected field value in snapshot file", "Numerical result out of
375 # range", or "Invalid argument" error upon reading this snapshot file.
376 #
377 # (Note that the "Unexpected field value in snapshot file" error message
378 # was introduced along with the change to snapshot file format "2",
379 # starting with tar v1.16 [or, more precisely, v1.15.91], while the
380 # other two were introduced in v1.27.)
381 #
382 # The checks here are intended to match those found in the incremen.c
383 # source file. See the choose_architecture() function (above) for more
384 # information on how to configure the range of values considered valid
385 # by this script.
386 #
387 # (Note: the checks here are taken from the code that processes
388 # version 2 snapshot files, but to keep things simple we apply those
389 # same checks to files having earlier versions -- but only for
390 # the fields that actually exist in those input files.)
391
392 sub check_field_values ($) {
393 my $info = shift;
394
395 my $msg;
396 my $error_found = 0;
397
398 print " Checking field values in snapshot file...\n";
399
400 $snapver = $info->[0];
401
402 $msg = "";
403 $msg .= validate_integer_field($info->[1], 'timestamp_sec');
404 if ($snapver >= 1) {
405 $msg .= validate_integer_field($info->[2], 'timestamp_nsec');
406 }
407 if ( $msg ne "" ) {
408 $error_found = 1;
409 print "\n shapshot file header:\n";
410 print $msg;
411 }
412
413
414 foreach my $dir (@{$info->[3]}) {
415
416 $msg = "";
417
418 $msg .= validate_integer_field($dir->{'nfs'}, 'nfs');
419 if ($snapver >= 1) {
420 $msg .= validate_integer_field($dir->{'timestamp_sec'}, 'timestamp_sec');
421 $msg .= validate_integer_field($dir->{'timestamp_nsec'}, 'timestamp_nsec');
422 }
423 $msg .= validate_integer_field($dir->{'dev'}, 'dev');
424 $msg .= validate_integer_field($dir->{'ino'}, 'ino');
425
426 if ( $msg ne "" ) {
427 $error_found = 1;
428 print "\n directory: $dir->{'name'}\n";
429 print $msg;
430 }
431 }
432
433 print "\n Snapshot field value check complete" ,
434 $error_found ? "" : ", no errors found" ,
435 ".\n";
436 }
437
438 ## editing
439
440 sub replace_device_number ($@) {
441 my $info = shift(@_);
442 my @repl = @_;
443
444 my $count = 0;
445
446 foreach my $dir (@{$info->[3]}) {
447 foreach $x (@repl) {
448 if ($dir->{'dev'} eq $$x[0]) {
449 $dir->{'dev'} = $$x[1];
450 $count++;
451 last;
452 }
453 }
454 }
455 print " Updated $count records.\n"
456 }
457
458 ## writing
459
460 sub write_incr_db ($$) {
461 my $info = shift;
462 my $filename = shift;
463 my $file_version = $$info[0];
464
465 open($file, ">$filename") || die "Could not open '$filename' for writing";
466
467 if ($file_version == 0) {
468 write_incr_db_0($info, $file);
469 } elsif ($file_version == 1) {
470 write_incr_db_1($info, $file);
471 } elsif ($file_version == 2) {
472 write_incr_db_2($info, $file);
473 } else {
474 die "Unknown file version $file_version.";
475 }
476
477 close($file);
478 }
479
480 sub write_incr_db_0 ($$) {
481 my $info = shift;
482 my $file = shift;
483
484 my $timestamp_sec = $info->[1];
485 print $file "$timestamp_sec\n";
486
487 foreach my $dir (@{$info->[3]}) {
488 if ($dir->{'nfs'}) {
489 print $file '+'
490 }
491 print $file "$dir->{'dev'} ";
492 print $file "$dir->{'ino'} ";
493 print $file "$dir->{'name'}\n";
494 }
495 }
496
497
498 sub write_incr_db_1 ($$) {
499 my $info = shift;
500 my $file = shift;
501
502 print $file $info->[4];
503
504 my $timestamp_sec = $info->[1];
505 my $timestamp_nsec = $info->[2];
506 print $file "$timestamp_sec $timestamp_nsec\n";
507
508 foreach my $dir (@{$info->[3]}) {
509 if ($dir->{'nfs'}) {
510 print $file '+'
511 }
512 print $file "$dir->{'timestamp_sec'} ";
513 print $file "$dir->{'timestamp_nsec'} ";
514 print $file "$dir->{'dev'} ";
515 print $file "$dir->{'ino'} ";
516 print $file "$dir->{'name'}\n";
517 }
518 }
519
520
521 sub write_incr_db_2 ($$) {
522 my $info = shift;
523 my $file = shift;
524
525 print $file $info->[4];
526
527 my $timestamp_sec = $info->[1];
528 my $timestamp_nsec = $info->[2];
529 print $file $timestamp_sec . "\0";
530 print $file $timestamp_nsec . "\0";
531
532 foreach my $dir (@{$info->[3]}) {
533 print $file $dir->{'nfs'} . "\0";
534 print $file $dir->{'timestamp_sec'} . "\0";
535 print $file $dir->{'timestamp_nsec'} . "\0";
536 print $file $dir->{'dev'} . "\0";
537 print $file $dir->{'ino'} . "\0";
538 print $file $dir->{'name'} . "\0";
539 foreach my $dirent (@{$dir->{'dirents'}}) {
540 print $file $dirent . "\0";
541 }
542 print $file "\0";
543 }
544 }
545
546 ## main
547
548 sub main {
549 our ($opt_b, $opt_r, $opt_h, $opt_c, $opt_a);
550 getopts('br:hca:');
551 HELP_MESSAGE() if ($opt_h || $#ARGV == -1 || ($opt_b && !$opt_r) ||
552 ($opt_a && !$opt_c) || ($opt_r && $opt_c) );
553
554 my @repl;
555 if ($opt_r) {
556 foreach my $spec (split(/,/, $opt_r)) {
557 ($spec =~ /^([^-]+)-([^-]+)/) || die "Invalid replacement specification '$opt_r'";
558 push @repl, [interpret_dev($1), interpret_dev($2)];
559 }
560 }
561
562 choose_architecture($opt_a) if ($opt_c);
563
564 foreach my $snapfile (@ARGV) {
565 my $info = read_incr_db($snapfile);
566 if ($opt_r) {
567 if ($opt_b) {
568 rename($snapfile, $snapfile . "~") || die "Could not rename '$snapfile' to backup";
569 }
570
571 replace_device_number($info, @repl);
572 write_incr_db($info, $snapfile);
573 } elsif ($opt_c) {
574 check_field_values($info);
575 } else {
576 show_device_counts($info);
577 }
578 }
579 }
580
581 sub HELP_MESSAGE {
582 print <<EOF;
583
584 Usage:
585 tar-snapshot-edit SNAPFILE [SNAPFILE [...]]
586 tar-snapshot-edit -r 'DEV1-DEV2[,DEV3-DEV4...]' [-b] SNAPFILE [SNAPFILE [...]]
587 tar-snapshot-edit -c [-aARCH] SNAPFILE [SNAPFILE [...]]
588
589 With no options specified: print a summary of the 'device' values
590 found in each SNAPFILE.
591
592 With -r: replace occurrences of DEV1 with DEV2 in each SNAPFILE.
593 DEV1 and DEV2 may be specified in hex (e.g., 0xfe01), decimal (e.g.,
594 65025), or MAJ:MIN (e.g., 254:1). To replace multiple occurrences,
595 separate them with commas. If -b is also specified, backup files
596 (ending with '~') will be created.
597
598 With -c: Check the field values in each SNAPFILE and print warning
599 messages if any invalid values are found. (An invalid value is one
600 that would cause \"tar\" to abort with an error message such as
601 Unexpected field value in snapshot file
602 Numerical result out of range
603 or
604 Invalid argument
605 as it processed the snapshot file.)
606
607 Normally the program automatically chooses the valid ranges for
608 the fields based on the current system's architecture, but the
609 -a option can be used to override the selection, e.g. in order
610 to validate a snapshot file generated on a some other system.
611 (Currently only three architectures are supported, "iX86-linux",
612 "x86_64-linux", and "IA64.ARCHREV_0" [HP/UX running on Itanium/ia64],
613 and if the current system isn't recognized, then the iX86-linux
614 values are used by default.)
615
616 EOF
617 exit 1;
618 }
619
620 sub interpret_dev ($) {
621 my $dev = shift;
622
623 if ($dev =~ /^([0-9]+):([0-9]+)$/) {
624 return $1 * 256 + $2;
625 } elsif ($dev =~ /^0x[0-9a-fA-F]+$/) {
626 return oct $dev;
627 } elsif ($dev =~ /^[0-9]+$/) {
628 return $dev+0;
629 } else {
630 die "Invalid device specification '$dev'";
631 }
632 }
633
634 main
This page took 0.080127 seconds and 3 git commands to generate.