X-Git-Url: https://git.dogcows.com/gitweb?a=blobdiff_plain;f=scripts%2Ftar-snapshot-edit;h=6d9457da1e1b3858e20803aedeb18263cd0c6cfd;hb=HEAD;hp=81b4b2b8eed792e10aa3ce7b9e708b1575753fc4;hpb=01cf5c53151cb0c4166dbc061551b79c173d13f5;p=chaz%2Ftar diff --git a/scripts/tar-snapshot-edit b/scripts/tar-snapshot-edit index 81b4b2b..6d9457d 100755 --- a/scripts/tar-snapshot-edit +++ b/scripts/tar-snapshot-edit @@ -1,31 +1,66 @@ #! /usr/bin/perl -w # Display and edit the 'dev' field in tar's snapshots -# Copyright (C) 2007 Free Software Foundation, Inc. -# -# This program is free software; you can redistribute it and/or modify +# Copyright 2007, 2011, 2013-2014 Free Software Foundation, Inc. + +# This file is part of GNU tar. + +# GNU tar is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2, or (at your option) -# any later version. -# -# This program is distributed in the hope that it will be useful, +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. + +# GNU tar is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. -# + # You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA -# 02110-1301, USA. -# -# Author: Dustin J. Mitchell +# along with this program. If not, see . + + +# tar-snapshot-edit # # This script is capable of replacing values in the 'dev' field of an # incremental backup 'snapshot' file. This is useful when the device # used to store files in a tar archive changes, without the files # themselves changing. This may happen when, for example, a device # driver changes major or minor numbers. +# +# It can also run a check on all the field values found in the +# snapshot file, printing out a detailed message when it finds values +# that would cause an "Unexpected field value in snapshot file", +# "Numerical result out of range", or "Invalid argument" error +# if tar were run using that snapshot file as input. (See the +# comments included in the definition of the check_field_values +# routine for more detailed information regarding these checks.) +# +# +# +# Author: Dustin J. Mitchell +# +# Modified Aug 25, 2011 by Nathan Stratton Treadway : +# * update Perl syntax to work correctly with more recent versions of +# Perl. (The original code worked with in the v5.8 timeframe but +# not with Perl v5.10.1 and later.) +# * added a "-c" option to check the snapshot file for invalid field values. +# * handle NFS indicator character ("+") in version 0 and 1 files +# * preserve the original header/version line when editing version 1 +# or 2 files. +# * tweak output formatting +# +# Modified March 13, 2013 by Nathan Stratton Treadway : +# * configure field ranges used for -c option based on the system +# architecture (in response to the December 2012 update to GNU tar +# enabling support for systems with signed dev_t values). +# * when printing the list of device ids found in the snapshot file +# (when run in the default mode), print the raw device id values +# instead of the hex-string version in those cases where they +# can't be converted successfully. use Getopt::Std; +use Config; + +my %snapshot_field_ranges; # used in check_field_values function ## reading @@ -41,14 +76,15 @@ sub read_incr_db ($) { $file_version = 0; } - print "file version $file_version\n"; + print "\nFile: $filename\n"; + print " Detected snapshot file version: $file_version\n\n"; if ($file_version == 0) { return read_incr_db_0($file, $header_str); } elsif ($file_version == 1) { - return read_incr_db_1($file); + return read_incr_db_1($file, $header_str); } elsif ($file_version == 2) { - return read_incr_db_2($file); + return read_incr_db_2($file, $header_str); } else { die "Unrecognized snapshot version in header '$header_str'"; } @@ -62,48 +98,66 @@ sub read_incr_db_0 ($$) { chop $hdr_timestamp_sec; my $hdr_timestamp_nsec = ''; # not present in file format 0 + my $nfs; my @dirs; while (<$file>) { - /^([0-9]*) ([0-9]*) (.*)\n$/ || die("Bad snapshot line $_"); + /^(\+?)([0-9]*) ([0-9]*) (.*)\n$/ || die("Bad snapshot line $_"); - push @dirs, { dev=>$1, - ino=>$2, - name=>$3 }; + if ( $1 eq "+" ) { + $nfs="1"; + } else { + $nfs="0"; + } + push @dirs, { nfs=>$nfs, + dev=>$2, + ino=>$3, + name=>$4 }; } close($file); - # file version, timestamp, timestamp, dir list - return [ 0, $hdr_timestamp_sec, $hdr_timestamp_nsec, \@dirs ]; + # file version, timestamp, timestamp, dir list, file header line + return [ 0, $hdr_timestamp_sec, $hdr_timestamp_nsec, \@dirs, ""]; } -sub read_incr_db_1 ($) { +sub read_incr_db_1 ($$) { my $file = shift; + my $header_str = shift; + my $timestamp = <$file>; # "sec nsec" my ($hdr_timestamp_sec, $hdr_timestamp_nsec) = ($timestamp =~ /([0-9]*) ([0-9]*)/); + my $nfs; my @dirs; while (<$file>) { - /^([0-9]*) ([0-9]*) ([0-9]*) ([0-9]*) (.*)\n$/ || die("Bad snapshot line $_"); + /^(\+?)([0-9]*) ([0-9]*) ([0-9]*) ([0-9]*) (.*)\n$/ || die("Bad snapshot line $_"); + + if ( $1 eq "+" ) { + $nfs="1"; + } else { + $nfs="0"; + } - push @dirs, { timestamp_sec=>$1, - timestamp_nsec=>$2, - dev=>$3, - ino=>$4, - name=>$5 }; + push @dirs, { nfs=>$nfs, + timestamp_sec=>$2, + timestamp_nsec=>$3, + dev=>$4, + ino=>$5, + name=>$6 }; } close($file); - # file version, timestamp, timestamp, dir list - return [ 1, $hdr_timestamp_sec, $hdr_timestamp_nsec, \@dirs ]; + # file version, timestamp, timestamp, dir list, file header line + return [ 1, $hdr_timestamp_sec, $hdr_timestamp_nsec, \@dirs, $header_str ]; } -sub read_incr_db_2 ($) { +sub read_incr_db_2 ($$) { my $file = shift; + my $header_str = shift; $/="\0"; # $INPUT_RECORD_SEPARATOR my $hdr_timestamp_sec = <$file>; @@ -150,24 +204,237 @@ sub read_incr_db_2 ($) { close($file); $/ = "\n"; # reset to normal - # file version, timestamp, timestamp, dir list - return [ 2, $hdr_timestamp_sec, $hdr_timestamp_nsec, \@dirs ]; + # file version, timestamp, timestamp, dir list, file header line + return [ 2, $hdr_timestamp_sec, $hdr_timestamp_nsec, \@dirs, $header_str]; } ## display -sub show_device_counts ($$) { +sub show_device_counts ($) { my $info = shift; - my $filename = shift; my %devices; - foreach my $dir (@{${@$info}[3]}) { - my $dev = ${%$dir}{'dev'}; + foreach my $dir (@{$info->[3]}) { + my $dev = $dir->{'dev'}; $devices{$dev}++; } - foreach $dev (sort keys %devices) { - printf "$filename: Device 0x%04x occurs $devices{$dev} times.\n", $dev; + my $devstr; + foreach $dev (sort {$a <=> $b} keys %devices) { + $devstr = sprintf ("0x%04x", $dev); + if ( $dev > 0xffffffff or $dev < 0 or hex($devstr) != $dev ) { + # sprintf "%x" will not return a useful value for device ids + # that are negative or which overflow the integer size on this + # instance of Perl, so we convert the hex string back to a + # number, and if it doesn't (numerically) equal the original + # device id value, we know the hex conversion hasn't worked. + # + # Unfortunately, since we're running in "-w" mode, Perl will + # also print a warning message if the hex() routine is called + # on anything larger than "0xffffffff", even in 64-bit Perl + # where such values are actually supported... so we have to + # avoid calling hex() at all if the device id is too large or + # negative. (If it's negative, the conversion to an unsigned + # integer for the "%x" specifier will mean the result will + # always trigger hex()'s warning on a 64-bit machine.) + # + # These situations don't seem to occur very often, so for now + # when they do occur, we simply print the original text value + # that was read from the snapshot file; it will look a bit + # funny next to the values that do print in hex, but that's + # preferable to printing values that aren't actually correct. + $devstr = $dev; + } + printf " Device %s occurs $devices{$dev} times.\n", $devstr; + } +} + +## check field values + +# initializes the global %snapshot_field_ranges hash, based on the "-a" +# command-line option if given, otherwise based on the "archname" of +# the current system. +# +# Each value in the hash is a two-element array containing the minimum +# and maximum allowed values, respectively, for that field in the snapshot +# file. GNU tar's allowed values for each architecture are determined +# in the incremen.c source file, where the TYPE_MIN and TYPE_MAX +# pre-processor expressions are used to determine the range that can be +# expressed by the C data type used for each field; the values in the +# array defined below should match those calculations. (For tar v1.27 +# and later, the valid ranges for a particular tar binary can easily +# be determined using the "tar --show-snapshot-field-ranges" command.) + +sub choose_architecture ($) { + my $opt_a = shift; + + my $arch = $opt_a ? $opt_a : $Config{'archname'}; + + # These ranges apply to Linux 2.4/2.6 on iX86 systems, but are used + # by default on unrecognized/unsupported systems, too. + %iX86_linux_field_ranges = ( + timestamp_sec => [ -2147483648, 2147483647 ], # min/max of time_t + timestamp_nsec => [ 0, 999999999 ], # 0 to BILLION-1 + nfs => [ 0, 1 ], + dev => [ 0, 18446744073709551615 ], # min/max of dev_t + ino => [ 0, 4294967295 ], # min/max of ino_t + ); + + + if ( $arch =~ m/^i[\dxX]86-linux/i ) { + %snapshot_field_ranges = %iX86_linux_field_ranges; + print "Checking snapshot field values using \"iX86-linux\" ranges.\n\n"; + } elsif ( $arch =~ m/^x86_64-linux/i ) { + %snapshot_field_ranges = ( + timestamp_sec => [ -9223372036854775808, 9223372036854775807 ], + timestamp_nsec => [ 0, 999999999 ], + nfs => [ 0, 1 ], + dev => [ 0, 18446744073709551615 ], + ino => [ 0, 18446744073709551615 ], + ); + print "Checking snapshot field values using \"x86_64-linux\" ranges.\n\n"; + } elsif ( $arch =~ m/^IA64.ARCHREV_0/i ) { + # HP/UX running on Itanium/ia64 architecture + %snapshot_field_ranges = ( + timestamp_sec => [ -2147483648, 2147483647 ], + timestamp_nsec => [ 0, 999999999 ], + nfs => [ 0, 1 ], + dev => [ -2147483648, 2147483647 ], + ino => [ 0, 4294967295 ], + ); + print "Checking snapshot field values using \"IA64.ARCHREV_0\" (HP/UX) ranges.\n\n"; + } else { + %snapshot_field_ranges = %iX86_linux_field_ranges; + print "Unrecognized architecture \"$arch\"; defaulting to \"iX86-linux\".\n"; + print "(Use -a option to override.)\n" unless $opt_a; + print "\n"; + } + + if ( ref(1) ne "" ) { + print "(\"bignum\" mode is in effect; skipping 64-bit-integer check.)\n\n" + } else { + # find the largest max value in the current set of ranges + my $maxmax = 0; + for $v (values %snapshot_field_ranges ) { + $maxmax = $v->[1] if ($v->[1] > $maxmax); + } + + # "~0" translates into a platform-native integer with all bits turned + # on -- that is, the largest value that can be represented as + # an integer. We print a warning if our $maxmax value is greater + # than that largest integer, since in that case Perl will switch + # to using floats for those large max values. The wording of + # the message assumes that the only way this situation can exist + # is that the platform uses 32-bit integers but some of the + # snapshot-file fields have 64-bit values. + if ( ~0 < $maxmax ) { + print < $max ) { + $msg = " $field_name value too high: \"$field_value\" > $max \n"; + } + } + return $msg; +} + + +# This routine loops through each directory entry in the $info data +# structure and prints a warning message if tar would abort with an +# "Unexpected field value in snapshot file", "Numerical result out of +# range", or "Invalid argument" error upon reading this snapshot file. +# +# (Note that the "Unexpected field value in snapshot file" error message +# was introduced along with the change to snapshot file format "2", +# starting with tar v1.16 [or, more precisely, v1.15.91], while the +# other two were introduced in v1.27.) +# +# The checks here are intended to match those found in the incremen.c +# source file. See the choose_architecture() function (above) for more +# information on how to configure the range of values considered valid +# by this script. +# +# (Note: the checks here are taken from the code that processes +# version 2 snapshot files, but to keep things simple we apply those +# same checks to files having earlier versions -- but only for +# the fields that actually exist in those input files.) + +sub check_field_values ($) { + my $info = shift; + + my $msg; + my $error_found = 0; + + print " Checking field values in snapshot file...\n"; + + $snapver = $info->[0]; + + $msg = ""; + $msg .= validate_integer_field($info->[1], 'timestamp_sec'); + if ($snapver >= 1) { + $msg .= validate_integer_field($info->[2], 'timestamp_nsec'); } + if ( $msg ne "" ) { + $error_found = 1; + print "\n shapshot file header:\n"; + print $msg; + } + + + foreach my $dir (@{$info->[3]}) { + + $msg = ""; + + $msg .= validate_integer_field($dir->{'nfs'}, 'nfs'); + if ($snapver >= 1) { + $msg .= validate_integer_field($dir->{'timestamp_sec'}, 'timestamp_sec'); + $msg .= validate_integer_field($dir->{'timestamp_nsec'}, 'timestamp_nsec'); + } + $msg .= validate_integer_field($dir->{'dev'}, 'dev'); + $msg .= validate_integer_field($dir->{'ino'}, 'ino'); + + if ( $msg ne "" ) { + $error_found = 1; + print "\n directory: $dir->{'name'}\n"; + print $msg; + } + } + + print "\n Snapshot field value check complete" , + $error_found ? "" : ", no errors found" , + ".\n"; } ## editing @@ -176,14 +443,18 @@ sub replace_device_number ($@) { my $info = shift(@_); my @repl = @_; - foreach my $dir (@{${@$info}[3]}) { - foreach $x (@repl) { - if (${%$dir}{'dev'} eq $$x[0]) { - ${%$dir}{'dev'} = $$x[1]; - last; - } + my $count = 0; + + foreach my $dir (@{$info->[3]}) { + foreach $x (@repl) { + if ($dir->{'dev'} eq $$x[0]) { + $dir->{'dev'} = $$x[1]; + $count++; + last; + } } } + print " Updated $count records.\n" } ## writing @@ -211,14 +482,17 @@ sub write_incr_db ($$) { sub write_incr_db_0 ($$) { my $info = shift; my $file = shift; - + my $timestamp_sec = $info->[1]; print $file "$timestamp_sec\n"; - foreach my $dir (@{${@$info}[3]}) { - print $file "${%$dir}{'dev'} "; - print $file "${%$dir}{'ino'} "; - print $file "${%$dir}{'name'}\n"; + foreach my $dir (@{$info->[3]}) { + if ($dir->{'nfs'}) { + print $file '+' + } + print $file "$dir->{'dev'} "; + print $file "$dir->{'ino'} "; + print $file "$dir->{'name'}\n"; } } @@ -226,19 +500,22 @@ sub write_incr_db_0 ($$) { sub write_incr_db_1 ($$) { my $info = shift; my $file = shift; - - print $file "GNU tar-1.15-1\n"; + + print $file $info->[4]; my $timestamp_sec = $info->[1]; my $timestamp_nsec = $info->[2]; print $file "$timestamp_sec $timestamp_nsec\n"; - foreach my $dir (@{${@$info}[3]}) { - print $file "${%$dir}{'timestamp_sec'} "; - print $file "${%$dir}{'timestamp_nsec'} "; - print $file "${%$dir}{'dev'} "; - print $file "${%$dir}{'ino'} "; - print $file "${%$dir}{'name'}\n"; + foreach my $dir (@{$info->[3]}) { + if ($dir->{'nfs'}) { + print $file '+' + } + print $file "$dir->{'timestamp_sec'} "; + print $file "$dir->{'timestamp_nsec'} "; + print $file "$dir->{'dev'} "; + print $file "$dir->{'ino'} "; + print $file "$dir->{'name'}\n"; } } @@ -246,22 +523,22 @@ sub write_incr_db_1 ($$) { sub write_incr_db_2 ($$) { my $info = shift; my $file = shift; - - print $file "GNU tar-1.16-2\n"; + + print $file $info->[4]; my $timestamp_sec = $info->[1]; my $timestamp_nsec = $info->[2]; print $file $timestamp_sec . "\0"; print $file $timestamp_nsec . "\0"; - foreach my $dir (@{${@$info}[3]}) { - print $file ${%$dir}{'nfs'} . "\0"; - print $file ${%$dir}{'timestamp_sec'} . "\0"; - print $file ${%$dir}{'timestamp_nsec'} . "\0"; - print $file ${%$dir}{'dev'} . "\0"; - print $file ${%$dir}{'ino'} . "\0"; - print $file ${%$dir}{'name'} . "\0"; - foreach my $dirent (@{${%$dir}{'dirents'}}) { + foreach my $dir (@{$info->[3]}) { + print $file $dir->{'nfs'} . "\0"; + print $file $dir->{'timestamp_sec'} . "\0"; + print $file $dir->{'timestamp_nsec'} . "\0"; + print $file $dir->{'dev'} . "\0"; + print $file $dir->{'ino'} . "\0"; + print $file $dir->{'name'} . "\0"; + foreach my $dirent (@{$dir->{'dirents'}}) { print $file $dirent . "\0"; } print $file "\0"; @@ -271,9 +548,10 @@ sub write_incr_db_2 ($$) { ## main sub main { - our ($opt_b, $opt_r, $opt_h); - getopts('br:h'); - HELP_MESSAGE() if ($opt_h || $#ARGV == -1 || ($opt_b && !$opt_r)); + our ($opt_b, $opt_r, $opt_h, $opt_c, $opt_a); + getopts('br:hca:'); + HELP_MESSAGE() if ($opt_h || $#ARGV == -1 || ($opt_b && !$opt_r) || + ($opt_a && !$opt_c) || ($opt_r && $opt_c) ); my @repl; if ($opt_r) { @@ -283,31 +561,61 @@ sub main { } } + choose_architecture($opt_a) if ($opt_c); + foreach my $snapfile (@ARGV) { my $info = read_incr_db($snapfile); - if ($opt_r ) { + if ($opt_r) { if ($opt_b) { rename($snapfile, $snapfile . "~") || die "Could not rename '$snapfile' to backup"; } replace_device_number($info, @repl); write_incr_db($info, $snapfile); + } elsif ($opt_c) { + check_field_values($info); } else { - show_device_counts($info, $snapfile); + show_device_counts($info); } } } sub HELP_MESSAGE { - print "Usage: tar-snapshot-edit.pl [-r 'DEV1-DEV2[,DEV3-DEV4...]' [-b]] SNAPFILE [SNAPFILE [..]]\n"; - print "\n"; - print " Without -r, summarize the 'device' values in each SNAPFILE.\n"; - print "\n"; - print " With -r, replace occurrences of DEV1 with DEV2 in each SNAPFILE.\n"; - print " DEV1 and DEV2 may be specified in hex (e.g., 0xfe01), decimal (e.g.,\n"; - print " 65025), or MAJ:MIN (e.g., 254:1). To replace multiple occurrences,\n"; - print " separate them with commas. If -b is also specified, backup\n"; - print " files (ending with '~') will be created.\n"; + print <