From 51b272f5723b782fb079b1cd55daf934331596b6 Mon Sep 17 00:00:00 2001 From: Nathan Stratton Treadway Date: Mon, 19 Sep 2011 15:29:00 +0300 Subject: [PATCH] Upgrade tar-snapshot-edit script. * scripts/tar-snapshot-edit: Update Perl syntax to work correctly with more recent versions of Perl. (The original code worked with in the v5.8 timeframe but not with Perl v5.10.1 and later.) Add a "-c" option to check the snapshot file for invalid field values. Handle NFS indicator character ("+") in version 0 and 1 files. Preserve the original header/version line when editing version 1 or 2 files. Tweak output formatting. * doc/tar-snapshot-edit.texi: Update documentation. --- doc/tar-snapshot-edit.texi | 66 ++++++-- scripts/tar-snapshot-edit | 324 +++++++++++++++++++++++++++++-------- 2 files changed, 304 insertions(+), 86 deletions(-) diff --git a/doc/tar-snapshot-edit.texi b/doc/tar-snapshot-edit.texi index ed9640c..697a409 100644 --- a/doc/tar-snapshot-edit.texi +++ b/doc/tar-snapshot-edit.texi @@ -6,24 +6,28 @@ @cindex Device numbers, changing @cindex snapshot files, editing @cindex snapshot files, fixing device numbers - Sometimes device numbers can change after upgrading your kernel -version or reconfiguring the hardware. Reportedly this is the case with -some newer @i{Linux} kernels, when using @acronym{LVM}. In majority of + Various situations can cause device numbers to change: upgrading your +kernel version, reconfiguring your hardware, loading kernel modules in a +different order, using virtual volumes that are assembled dynamically +(such as with @acronym{LVM} or @acronym{RAID}), hot-plugging drives +(e.g. external USB or Firewire drives), etc. In the majority of cases this change is unnoticed by the users. However, it influences @command{tar} incremental backups: the device number is stored in tar snapshot files (@pxref{Snapshot Files}) and is used to determine whether the file has changed since the last backup. If the device numbers -change for some reason, the next backup you run will be a full backup. +change for some reason, by default the next backup you run will be a +full backup. + @pindex tar-snapshot-edit To minimize the impact in these cases, GNU @command{tar} comes with the @command{tar-snapshot-edit} utility for inspecting and updating -device numbers in snapshot files. The utility, written by -Dustin J.@: Mitchell, is available from +device numbers in snapshot files. (The utility, written by +Dustin J.@: Mitchell, is also available from the @uref{http://www.gnu.org/@/software/@/tar/@/utils/@/tar-snapshot-edit.html, -@GNUTAR{} home page}. +@GNUTAR{} home page}.) - To obtain the device numbers used in the snapshot file, run + To obtain a summary of the device numbers found in the snapshot file, run @smallexample $ @kbd{tar-snapshot-edit @var{snapfile}} @@ -31,10 +35,19 @@ $ @kbd{tar-snapshot-edit @var{snapfile}} @noindent where @var{snapfile} is the name of the snapshot file (you can supply as many -files as you wish in a single command line). +files as you wish in a single command line). You can then compare the +numbers across snapshot files, or against those currently in use on the +live filesystem (using @command{ls -l} or @command{stat}). + + Assuming the device numbers have indeed changed, it's often possible +to simply tell @GNUTAR{} to ignore the device number when processing the +incremental snapshot files for these backups, using the +@option{--no-check-device} option (@pxref{device numbers}). -To update all occurrences of the given device number in the file, use -@option{-r} option. It takes a single argument of the form + Alternatively, you can use the @command{tar-edit-snapshot} script's +@option{-r} option to update all occurrences of the given device +number in the snapshot file(s). It takes a single argument +of the form @samp{@var{olddev}-@var{newdev}}, where @var{olddev} is the device number used in the snapshot file, and @var{newdev} is the corresponding new device number. Both numbers may be specified in hex (e.g., @samp{0xfe01}), @@ -49,9 +62,30 @@ backup file is obtained by appending @samp{~} to the original file name. An example session: @smallexample -$ @kbd{tar-snapshot-edit /var/backup/snap.a} -file version 2 -/tmp/snap: Device 0x0306 occurs 634 times. -$ @kbd{tar-snapshot-edit -b -r 0x0306-0x4500 /var/backup/snap.a} -file version 2 +$ @kbd{tar-snapshot-edit root_snap.0 boot_snap.0} +File: root_snap.0 + Detected snapshot file version: 2 + + Device 0x0000 occurs 1 times. + Device 0x0003 occurs 1 times. + Device 0x0005 occurs 1 times. + Device 0x0013 occurs 1 times. + Device 0x6801 occurs 1 times. + Device 0x6803 occurs 6626 times. + Device 0xfb00 occurs 1 times. + +File: boot_snap.0 + Detected snapshot file version: 2 + + Device 0x6801 occurs 3 times. +$ @kbd{tar-snapshot-edit -b -r 0x6801-0x6901,0x6803-0x6903 root_snap.0 boot_snap.0} +File: root_snap.0 + Detected snapshot file version: 2 + + Updated 6627 records. + +File: boot_snap.0 + Detected snapshot file version: 2 + + Updated 3 records. @end smallexample diff --git a/scripts/tar-snapshot-edit b/scripts/tar-snapshot-edit index a54902f..92741d3 100755 --- a/scripts/tar-snapshot-edit +++ b/scripts/tar-snapshot-edit @@ -1,10 +1,10 @@ #! /usr/bin/perl -w # Display and edit the 'dev' field in tar's snapshots -# Copyright (C) 2007 Free Software Foundation, Inc. +# Copyright (C) 2007,2011 Free Software Foundation, Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 3, or (at your option) +# the Free Software Foundation; either version 2, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, @@ -17,13 +17,37 @@ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. # -# Author: Dustin J. Mitchell +# +# tar-snapshot-edit # # This script is capable of replacing values in the 'dev' field of an # incremental backup 'snapshot' file. This is useful when the device # used to store files in a tar archive changes, without the files # themselves changing. This may happen when, for example, a device # driver changes major or minor numbers. +# +# It can also run a check on all the field values found in the +# snapshot file, printing out a detailed message when it finds values +# that would cause an "Unexpected field value in snapshot file" error +# if tar were run using that snapshot file as input. (See the +# comments included in the definition of the check_field_values +# routine for more detailed information regarding these checks.) +# +# +# +# Author: Dustin J. Mitchell +# +# Modified Aug 25, 2011 by Nathan Stratton Treadway : +# * update Perl syntax to work correctly with more recent versions of +# Perl. (The original code worked with in the v5.8 timeframe but +# not with Perl v5.10.1 and later.) +# * added a "-c" option to check the snapshot file for invalid field values. +# * handle NFS indicator character ("+") in version 0 and 1 files +# * preserve the original header/version line when editing version 1 +# or 2 files. +# * tweak output formatting +# +# use Getopt::Std; @@ -41,14 +65,15 @@ sub read_incr_db ($) { $file_version = 0; } - print "file version $file_version\n"; + print "\nFile: $filename\n"; + print " Detected snapshot file version: $file_version\n\n"; if ($file_version == 0) { return read_incr_db_0($file, $header_str); } elsif ($file_version == 1) { - return read_incr_db_1($file); + return read_incr_db_1($file, $header_str); } elsif ($file_version == 2) { - return read_incr_db_2($file); + return read_incr_db_2($file, $header_str); } else { die "Unrecognized snapshot version in header '$header_str'"; } @@ -62,48 +87,66 @@ sub read_incr_db_0 ($$) { chop $hdr_timestamp_sec; my $hdr_timestamp_nsec = ''; # not present in file format 0 + my $nfs; my @dirs; while (<$file>) { - /^([0-9]*) ([0-9]*) (.*)\n$/ || die("Bad snapshot line $_"); + /^(\+?)([0-9]*) ([0-9]*) (.*)\n$/ || die("Bad snapshot line $_"); - push @dirs, { dev=>$1, - ino=>$2, - name=>$3 }; + if ( $1 eq "+" ) { + $nfs="1"; + } else { + $nfs="0"; + } + push @dirs, { nfs=>$nfs, + dev=>$2, + ino=>$3, + name=>$4 }; } close($file); - # file version, timestamp, timestamp, dir list - return [ 0, $hdr_timestamp_sec, $hdr_timestamp_nsec, \@dirs ]; + # file version, timestamp, timestamp, dir list, file header line + return [ 0, $hdr_timestamp_sec, $hdr_timestamp_nsec, \@dirs, ""]; } -sub read_incr_db_1 ($) { +sub read_incr_db_1 ($$) { my $file = shift; + my $header_str = shift; + my $timestamp = <$file>; # "sec nsec" my ($hdr_timestamp_sec, $hdr_timestamp_nsec) = ($timestamp =~ /([0-9]*) ([0-9]*)/); + my $nfs; my @dirs; while (<$file>) { - /^([0-9]*) ([0-9]*) ([0-9]*) ([0-9]*) (.*)\n$/ || die("Bad snapshot line $_"); - - push @dirs, { timestamp_sec=>$1, - timestamp_nsec=>$2, - dev=>$3, - ino=>$4, - name=>$5 }; + /^(\+?)([0-9]*) ([0-9]*) ([0-9]*) ([0-9]*) (.*)\n$/ || die("Bad snapshot line $_"); + + if ( $1 eq "+" ) { + $nfs="1"; + } else { + $nfs="0"; + } + + push @dirs, { nfs=>$nfs, + timestamp_sec=>$2, + timestamp_nsec=>$3, + dev=>$4, + ino=>$5, + name=>$6 }; } close($file); - # file version, timestamp, timestamp, dir list - return [ 1, $hdr_timestamp_sec, $hdr_timestamp_nsec, \@dirs ]; + # file version, timestamp, timestamp, dir list, file header line + return [ 1, $hdr_timestamp_sec, $hdr_timestamp_nsec, \@dirs, $header_str ]; } -sub read_incr_db_2 ($) { +sub read_incr_db_2 ($$) { my $file = shift; + my $header_str = shift; $/="\0"; # $INPUT_RECORD_SEPARATOR my $hdr_timestamp_sec = <$file>; @@ -150,40 +193,158 @@ sub read_incr_db_2 ($) { close($file); $/ = "\n"; # reset to normal - # file version, timestamp, timestamp, dir list - return [ 2, $hdr_timestamp_sec, $hdr_timestamp_nsec, \@dirs ]; + # file version, timestamp, timestamp, dir list, file header line + return [ 2, $hdr_timestamp_sec, $hdr_timestamp_nsec, \@dirs, $header_str]; } ## display -sub show_device_counts ($$) { +sub show_device_counts ($) { my $info = shift; - my $filename = shift; my %devices; - foreach my $dir (@{${@$info}[3]}) { - my $dev = ${%$dir}{'dev'}; + foreach my $dir (@{$info->[3]}) { + my $dev = $dir->{'dev'}; $devices{$dev}++; } - foreach $dev (sort keys %devices) { - printf "$filename: Device 0x%04x occurs $devices{$dev} times.\n", $dev; + foreach $dev (sort {$a <=> $b} keys %devices) { + printf " Device 0x%04x occurs $devices{$dev} times.\n", $dev; } } +## check field values + +# returns a warning message if $field isn't a valid string representation +# of an integer, or if the resulting integer is out of the specified range +sub validate_integer_field ($$$$) { + my $field = shift; + my $field_name = shift; + my $min = shift; + my $max = shift; + + my $msg = ""; + + if ( not $field =~ /^-?\d+$/ ) { + $msg = " $field_name value contains invalid characters: \"$field\"\n"; + } else { + if ( $field < $min ) { + $msg = " $field_name value too low: \"$field\" < $min \n"; + } elsif ( $field > $max ) { + $msg = " $field_name value too high: \"$field\" > $max \n"; + } + } + return $msg; +} + + +# This routine loops through each directory entry in the $info data +# structure and prints a warning message if tar would abort with an +# "Unexpected field value in snapshot file" error upon reading this +# snapshot file. +# +# (Note that this specific error message was introduced along with the +# change to snapshot file format "2", starting with tar v1.16 [or, +# more precisely, v1.15.91].) +# +# The checks here are intended to match those found in the incremen.c +# source file (as of tar v1.16.1). +# +# In that code, the checks are done against pre-processor expressions, +# as defined in the C header files at compile time. In the routine +# below, a Perl variable is created for each expression used as part of +# one of these checks, assigned the value of the related pre-processor +# expression as found on a Linux 2.6.8/i386 system. +# +# It seems likely that these settings will catch most invalid +# field values found in actual snapshot files on all systems. However, +# if "tar" is erroring out on a snapshot file that this check routine +# does not complain about, that probably indicates that the values +# below need to be adjusted to match those used by "tar" in that +# particular environment. +# +# (Note: the checks here are taken from the code that processes +# version 2 snapshot files, but to keep things simple we apply those +# same checks to files having earlier versions -- but only for +# the fields that actually exist in those input files.) + +sub check_field_values ($) { + my $info = shift; + + # set up a variable with the value of each pre-processor + # expression used for field-value checks in incremen.c + # (these values here are from a Linux 2.6.8/i386 system) + my $BILLION = 1000000000; # BILLION + my $MIN_TIME_T = -2147483648; # TYPE_MINIMUM(time_t) + my $MAX_TIME_T = 2147483647; # TYPE_MAXIUMUM(time_t) + my $MAX_DEV_T = 4294967295; # TYPE_MAXIUMUM(dev_t) + my $MAX_INO_T = 4294967295; # TYPE_MAXIUMUM(ino_t) + + + my $msg; + my $error_found = 0; + + print " Checking field values in snapshot file...\n"; + + $snapver = $info->[0]; + + $msg = ""; + $msg .= validate_integer_field($info->[1], + 'timestamp_sec', $MIN_TIME_T, $MAX_TIME_T); + if ($snapver >= 1) { + $msg .= validate_integer_field($info->[2], + 'timestamp_nsec', 0, $BILLION-1); + } + if ( $msg ne "" ) { + $error_found = 1; + print "\n shapshot file header:\n"; + print $msg; + } + + + foreach my $dir (@{$info->[3]}) { + + $msg = ""; + + $msg .= validate_integer_field($dir->{'nfs'}, 'nfs', 0, 1); + if ($snapver >= 1) { + $msg .= validate_integer_field($dir->{'timestamp_sec'}, + 'timestamp_sec', $MIN_TIME_T, $MAX_TIME_T); + $msg .= validate_integer_field($dir->{'timestamp_nsec'}, + 'timestamp_nsec', 0, $BILLION-1); + } + $msg .= validate_integer_field($dir->{'dev'}, 'dev', 0, $MAX_DEV_T); + $msg .= validate_integer_field($dir->{'ino'}, 'ino', 0, $MAX_INO_T); + + if ( $msg ne "" ) { + $error_found = 1; + print "\n directory: $dir->{'name'}\n"; + print $msg; + } + } + + print "\n Snapshot field value check complete" , + $error_found ? "" : ", no errors found" , + ".\n"; +} + ## editing sub replace_device_number ($@) { my $info = shift(@_); my @repl = @_; - foreach my $dir (@{${@$info}[3]}) { + my $count = 0; + + foreach my $dir (@{$info->[3]}) { foreach $x (@repl) { - if (${%$dir}{'dev'} eq $$x[0]) { - ${%$dir}{'dev'} = $$x[1]; + if ($dir->{'dev'} eq $$x[0]) { + $dir->{'dev'} = $$x[1]; + $count++; last; } } } + print " Updated $count records.\n" } ## writing @@ -211,14 +372,17 @@ sub write_incr_db ($$) { sub write_incr_db_0 ($$) { my $info = shift; my $file = shift; - + my $timestamp_sec = $info->[1]; print $file "$timestamp_sec\n"; - foreach my $dir (@{${@$info}[3]}) { - print $file "${%$dir}{'dev'} "; - print $file "${%$dir}{'ino'} "; - print $file "${%$dir}{'name'}\n"; + foreach my $dir (@{$info->[3]}) { + if ($dir->{'nfs'}) { + print $file '+' + } + print $file "$dir->{'dev'} "; + print $file "$dir->{'ino'} "; + print $file "$dir->{'name'}\n"; } } @@ -226,19 +390,22 @@ sub write_incr_db_0 ($$) { sub write_incr_db_1 ($$) { my $info = shift; my $file = shift; - - print $file "GNU tar-1.15-1\n"; + + print $file $info->[4]; my $timestamp_sec = $info->[1]; my $timestamp_nsec = $info->[2]; print $file "$timestamp_sec $timestamp_nsec\n"; - foreach my $dir (@{${@$info}[3]}) { - print $file "${%$dir}{'timestamp_sec'} "; - print $file "${%$dir}{'timestamp_nsec'} "; - print $file "${%$dir}{'dev'} "; - print $file "${%$dir}{'ino'} "; - print $file "${%$dir}{'name'}\n"; + foreach my $dir (@{$info->[3]}) { + if ($dir->{'nfs'}) { + print $file '+' + } + print $file "$dir->{'timestamp_sec'} "; + print $file "$dir->{'timestamp_nsec'} "; + print $file "$dir->{'dev'} "; + print $file "$dir->{'ino'} "; + print $file "$dir->{'name'}\n"; } } @@ -246,22 +413,22 @@ sub write_incr_db_1 ($$) { sub write_incr_db_2 ($$) { my $info = shift; my $file = shift; - - print $file "GNU tar-1.16-2\n"; + + print $file $info->[4]; my $timestamp_sec = $info->[1]; my $timestamp_nsec = $info->[2]; print $file $timestamp_sec . "\0"; print $file $timestamp_nsec . "\0"; - foreach my $dir (@{${@$info}[3]}) { - print $file ${%$dir}{'nfs'} . "\0"; - print $file ${%$dir}{'timestamp_sec'} . "\0"; - print $file ${%$dir}{'timestamp_nsec'} . "\0"; - print $file ${%$dir}{'dev'} . "\0"; - print $file ${%$dir}{'ino'} . "\0"; - print $file ${%$dir}{'name'} . "\0"; - foreach my $dirent (@{${%$dir}{'dirents'}}) { + foreach my $dir (@{$info->[3]}) { + print $file $dir->{'nfs'} . "\0"; + print $file $dir->{'timestamp_sec'} . "\0"; + print $file $dir->{'timestamp_nsec'} . "\0"; + print $file $dir->{'dev'} . "\0"; + print $file $dir->{'ino'} . "\0"; + print $file $dir->{'name'} . "\0"; + foreach my $dirent (@{$dir->{'dirents'}}) { print $file $dirent . "\0"; } print $file "\0"; @@ -271,9 +438,10 @@ sub write_incr_db_2 ($$) { ## main sub main { - our ($opt_b, $opt_r, $opt_h); - getopts('br:h'); - HELP_MESSAGE() if ($opt_h || $#ARGV == -1 || ($opt_b && !$opt_r)); + our ($opt_b, $opt_r, $opt_h, $opt_c); + getopts('br:hc'); + HELP_MESSAGE() if ($opt_h || $#ARGV == -1 || ($opt_b && !$opt_r) || + ($opt_r && $opt_c) ); my @repl; if ($opt_r) { @@ -292,22 +460,38 @@ sub main { replace_device_number($info, @repl); write_incr_db($info, $snapfile); + } elsif ($opt_c) { + check_field_values($info); } else { - show_device_counts($info, $snapfile); + show_device_counts($info); } } } sub HELP_MESSAGE { - print "Usage: tar-snapshot-edit.pl [-r 'DEV1-DEV2[,DEV3-DEV4...]' [-b]] SNAPFILE [SNAPFILE [..]]\n"; - print "\n"; - print " Without -r, summarize the 'device' values in each SNAPFILE.\n"; - print "\n"; - print " With -r, replace occurrences of DEV1 with DEV2 in each SNAPFILE.\n"; - print " DEV1 and DEV2 may be specified in hex (e.g., 0xfe01), decimal (e.g.,\n"; - print " 65025), or MAJ:MIN (e.g., 254:1). To replace multiple occurrences,\n"; - print " separate them with commas. If -b is also specified, backup\n"; - print " files (ending with '~') will be created.\n"; + print <