From cac45fffc58cc10056c528582ee4b78b8ee175e0 Mon Sep 17 00:00:00 2001 From: Sergey Poznyakoff Date: Wed, 14 Oct 2009 23:26:52 +0300 Subject: [PATCH] Rewrite update algorithm. * src/common.h (namebuf_t): New typedef. (namebuf_create, namebuf_free) (namebuf_name): New prototypes. (remname): New prototype. * src/misc.c (struct namebuf): New structure. (namebuf_create, namebuf_free) (namebuf_name): New functions. * src/create.c (dup_dir0): Remove is_avoided_name checks. This is taken care of in update_archive. * src/incremen.c (scan_directory): Use namebuf to produce full file names. * src/names.c (nametail): Remove extra level of indirection. All uses updated. (avoided_name_table, add_avoided_name) (is_avoided_name): Remove. * src/update.c (update_archive): Change algorithm. Instead of adding unmodified files to the avoided_name table, create namelist so that it contains only modified files. * tests/Makefile.am: Add update01.at, update02.at * tests/testsuite.at: Likewise. * tests/update.at (AT_KEYWORDS): Add update00. --- src/common.h | 6 +++ src/create.c | 114 +++++++++++++++++++++------------------------ src/incremen.c | 51 +++++++------------- src/misc.c | 41 ++++++++++++++++ src/names.c | 50 +++++++------------- src/update.c | 34 +++++++++++--- tests/Makefile.am | 2 + tests/testsuite.at | 2 + tests/update.at | 3 +- tests/update01.at | 58 +++++++++++++++++++++++ tests/update02.at | 55 ++++++++++++++++++++++ 11 files changed, 283 insertions(+), 133 deletions(-) create mode 100644 tests/update01.at create mode 100644 tests/update02.at diff --git a/src/common.h b/src/common.h index 32b9ab3..360fb86 100644 --- a/src/common.h +++ b/src/common.h @@ -596,6 +596,11 @@ char *normalize_filename (const char *name); void replace_prefix (char **pname, const char *samp, size_t slen, const char *repl, size_t rlen); +typedef struct namebuf *namebuf_t; +namebuf_t namebuf_create (const char *dir); +void namebuf_free (namebuf_t buf); +char *namebuf_name (namebuf_t buf, const char *name); + void code_ns_fraction (int ns, char *p); char const *code_timespec (struct timespec ts, char *sbuf); enum { BILLION = 1000000000, LOG10_BILLION = 9 }; @@ -666,6 +671,7 @@ const char *name_next (int change_dirs); void name_gather (void); struct name *addname (char const *string, int change_dir, bool cmdline, struct name *parent); +void remname (struct name *name); bool name_match (const char *name); void names_notfound (void); void collect_and_sort_names (void); diff --git a/src/create.c b/src/create.c index a964bc2..3add5a0 100644 --- a/src/create.c +++ b/src/create.c @@ -1096,73 +1096,70 @@ dump_dir0 (char *directory, { dev_t our_device = st->stat.st_dev; const char *tag_file_name; - - if (!is_avoided_name (st->orig_file_name)) - { - union block *blk = NULL; - off_t block_ordinal = current_block_ordinal (); - st->stat.st_size = 0; /* force 0 size on dir */ + union block *blk = NULL; + off_t block_ordinal = current_block_ordinal (); - blk = start_header (st); - if (!blk) - return; + st->stat.st_size = 0; /* force 0 size on dir */ + + blk = start_header (st); + if (!blk) + return; - if (incremental_option && archive_format != POSIX_FORMAT) - blk->header.typeflag = GNUTYPE_DUMPDIR; - else /* if (standard_option) */ - blk->header.typeflag = DIRTYPE; + if (incremental_option && archive_format != POSIX_FORMAT) + blk->header.typeflag = GNUTYPE_DUMPDIR; + else /* if (standard_option) */ + blk->header.typeflag = DIRTYPE; - /* If we're gnudumping, we aren't done yet so don't close it. */ + /* If we're gnudumping, we aren't done yet so don't close it. */ - if (!incremental_option) - finish_header (st, blk, block_ordinal); - else if (gnu_list_name->directory) + if (!incremental_option) + finish_header (st, blk, block_ordinal); + else if (gnu_list_name->directory) + { + if (archive_format == POSIX_FORMAT) { - if (archive_format == POSIX_FORMAT) - { - xheader_store ("GNU.dumpdir", st, - safe_directory_contents (gnu_list_name->directory)); - finish_header (st, blk, block_ordinal); - } - else + xheader_store ("GNU.dumpdir", st, + safe_directory_contents (gnu_list_name->directory)); + finish_header (st, blk, block_ordinal); + } + else + { + off_t size_left; + off_t totsize; + size_t bufsize; + ssize_t count; + const char *buffer, *p_buffer; + + block_ordinal = current_block_ordinal (); + buffer = safe_directory_contents (gnu_list_name->directory); + totsize = dumpdir_size (buffer); + OFF_TO_CHARS (totsize, blk->header.size); + finish_header (st, blk, block_ordinal); + p_buffer = buffer; + size_left = totsize; + + mv_begin (st); + mv_total_size (totsize); + while (size_left > 0) { - off_t size_left; - off_t totsize; - size_t bufsize; - ssize_t count; - const char *buffer, *p_buffer; - - block_ordinal = current_block_ordinal (); - buffer = safe_directory_contents (gnu_list_name->directory); - totsize = dumpdir_size (buffer); - OFF_TO_CHARS (totsize, blk->header.size); - finish_header (st, blk, block_ordinal); - p_buffer = buffer; - size_left = totsize; - - mv_begin (st); - mv_total_size (totsize); - while (size_left > 0) + mv_size_left (size_left); + blk = find_next_block (); + bufsize = available_space_after (blk); + if (size_left < bufsize) { - mv_size_left (size_left); - blk = find_next_block (); - bufsize = available_space_after (blk); - if (size_left < bufsize) - { - bufsize = size_left; - count = bufsize % BLOCKSIZE; - if (count) - memset (blk->buffer + size_left, 0, BLOCKSIZE - count); - } - memcpy (blk->buffer, p_buffer, bufsize); - size_left -= bufsize; - p_buffer += bufsize; - set_next_block_after (blk + (bufsize - 1) / BLOCKSIZE); + bufsize = size_left; + count = bufsize % BLOCKSIZE; + if (count) + memset (blk->buffer + size_left, 0, BLOCKSIZE - count); } - mv_end (); + memcpy (blk->buffer, p_buffer, bufsize); + size_left -= bufsize; + p_buffer += bufsize; + set_next_block_after (blk + (bufsize - 1) / BLOCKSIZE); } - return; + mv_end (); } + return; } if (!recursion_option) @@ -1557,9 +1554,6 @@ dump_file0 (struct tar_stat_info *st, const char *p, return; } - if (is_avoided_name (p)) - return; - is_dir = S_ISDIR (st->stat.st_mode) != 0; if (!is_dir && dump_hard_link (st)) diff --git a/src/incremen.c b/src/incremen.c index fcfdaa9..59be617 100644 --- a/src/incremen.c +++ b/src/incremen.c @@ -692,9 +692,8 @@ struct directory * scan_directory (char *dir, dev_t device, bool cmdline) { char *dirp = savedir (dir); /* for scanning directory */ - char *name_buffer; /* directory, `/', and directory member */ - size_t name_buffer_size; /* allocated size of name_buffer, minus 2 */ - size_t name_length; /* used length in name_buffer */ + namebuf_t nbuf; + char *tmp; struct stat stat_data; struct directory *directory; char ch; @@ -702,35 +701,28 @@ scan_directory (char *dir, dev_t device, bool cmdline) if (! dirp) savedir_error (dir); - name_buffer_size = strlen (dir) + NAME_FIELD_SIZE; - name_buffer = xmalloc (name_buffer_size + 2); - strcpy (name_buffer, dir); - zap_slashes (name_buffer); + tmp = xstrdup (dir); + zap_slashes (tmp); - if (deref_stat (dereference_option, name_buffer, &stat_data)) + if (deref_stat (dereference_option, tmp, &stat_data)) { - dir_removed_diag (name_buffer, cmdline, stat_diag); - free (name_buffer); + dir_removed_diag (tmp, cmdline, stat_diag); + free (tmp); free (dirp); return NULL; } - directory = procdir (name_buffer, &stat_data, device, + directory = procdir (tmp, &stat_data, device, (cmdline ? PD_FORCE_INIT : 0), &ch); - name_length = strlen (name_buffer); - if (! ISSLASH (name_buffer[name_length - 1])) - { - name_buffer[name_length] = DIRECTORY_SEPARATOR; - /* name_buffer has been allocated an extra slot */ - name_buffer[++name_length] = 0; - } + free (tmp); + + nbuf = namebuf_create (dir); if (dirp && directory->children != NO_CHILDREN) { char *entry; /* directory entry being scanned */ - size_t entrylen; /* length of directory entry */ dumpdir_iter_t itr; makedumpdir (directory, dirp); @@ -739,25 +731,17 @@ scan_directory (char *dir, dev_t device, bool cmdline) entry; entry = dumpdir_next (itr)) { - entrylen = strlen (entry); - if (name_buffer_size <= entrylen - 1 + name_length) - { - do - name_buffer_size += NAME_FIELD_SIZE; - while (name_buffer_size <= entrylen - 1 + name_length); - name_buffer = xrealloc (name_buffer, name_buffer_size + 2); - } - strcpy (name_buffer + name_length, entry + 1); + char *full_name = namebuf_name (nbuf, entry + 1); if (*entry == 'I') /* Ignored entry */ *entry = 'N'; - else if (excluded_name (name_buffer)) + else if (excluded_name (full_name)) *entry = 'N'; else { - if (deref_stat (dereference_option, name_buffer, &stat_data)) + if (deref_stat (dereference_option, full_name, &stat_data)) { - file_removed_diag (name_buffer, false, stat_diag); + file_removed_diag (full_name, false, stat_diag); *entry = 'N'; continue; } @@ -770,7 +754,7 @@ scan_directory (char *dir, dev_t device, bool cmdline) else if (directory->children == ALL_CHILDREN) pd_flag |= PD_FORCE_CHILDREN | ALL_CHILDREN; *entry = 'D'; - procdir (name_buffer, &stat_data, device, pd_flag, entry); + procdir (full_name, &stat_data, device, pd_flag, entry); } else if (one_file_system_option && device != stat_data.st_dev) @@ -792,7 +776,8 @@ scan_directory (char *dir, dev_t device, bool cmdline) free (itr); } - free (name_buffer); + namebuf_free (nbuf); + if (dirp) free (dirp); diff --git a/src/misc.c b/src/misc.c index a087263..cdb2608 100644 --- a/src/misc.c +++ b/src/misc.c @@ -827,3 +827,44 @@ page_aligned_alloc (void **ptr, size_t size) return ptr_align (*ptr, alignment); } + + +struct namebuf +{ + char *buffer; /* directory, `/', and directory member */ + size_t buffer_size; /* allocated size of name_buffer */ + size_t dir_length; /* length of directory part in buffer */ +}; + +namebuf_t +namebuf_create (const char *dir) +{ + namebuf_t buf = xmalloc (sizeof (*buf)); + buf->buffer_size = strlen (dir) + 2; + buf->buffer = xmalloc (buf->buffer_size); + strcpy (buf->buffer, dir); + buf->dir_length = strlen (buf->buffer); + if (!ISSLASH (buf->buffer[buf->dir_length - 1])) + buf->buffer[buf->dir_length++] = DIRECTORY_SEPARATOR; + return buf; +} + +void +namebuf_free (namebuf_t buf) +{ + free (buf->buffer); + free (buf); +} + +char * +namebuf_name (namebuf_t buf, const char *name) +{ + size_t len = strlen (name); + while (buf->dir_length + len + 1 >= buf->buffer_size) + buf->buffer = x2realloc (buf->buffer, &buf->buffer_size); + strcpy (buf->buffer + buf->dir_length, name); + return buf->buffer; +} + + + diff --git a/src/names.c b/src/names.c index b12efe1..0e50aa4 100644 --- a/src/names.c +++ b/src/names.c @@ -205,7 +205,7 @@ free_name (struct name *p) /* Names from the command call. */ static struct name *namelist; /* first name in list, if any */ -static struct name **nametail = &namelist; /* end of name list */ +static struct name *nametail; /* end of name list */ /* File name arguments are processed in two stages: first a name_array (see below) is filled, then the names from it @@ -422,8 +422,7 @@ name_gather (void) buffer->parent = NULL; buffer->cmdline = true; - namelist = buffer; - nametail = &namelist->next; + namelist = nametail = buffer; } else if (change_dir) addname (0, change_dir, false, NULL); @@ -457,7 +456,7 @@ addname (char const *string, int change_dir, bool cmdline, struct name *parent) { struct name *name = make_name (string); - name->prev = *nametail; + name->prev = nametail; name->next = NULL; name->found_count = 0; name->matching_flags = matching_flags; @@ -465,9 +464,12 @@ addname (char const *string, int change_dir, bool cmdline, struct name *parent) name->directory = NULL; name->parent = parent; name->cmdline = cmdline; - - *nametail = name; - nametail = &name->next; + + if (nametail) + nametail->next = name; + else + namelist = name; + nametail = name; return name; } @@ -501,7 +503,7 @@ remname (struct name *name) if ((p = name->next) != NULL) p->prev = name->prev; else - nametail = &name->prev; + nametail = name->prev; } /* Return true if and only if name FILE_NAME (from an archive) matches any @@ -521,8 +523,8 @@ name_match (const char *file_name) if (cursor->name[0] == 0) { chdir_do (cursor->change_dir); - namelist = 0; - nametail = &namelist; + namelist = NULL; + nametail = NULL; return true; } @@ -535,8 +537,8 @@ name_match (const char *file_name) if (starting_file_option) { free (namelist); - namelist = 0; - nametail = &namelist; + namelist = NULL; + nametail = NULL; } chdir_do (cursor->change_dir); @@ -627,8 +629,8 @@ names_notfound (void) } /* Don't bother freeing the name list; we're about to exit. */ - namelist = 0; - nametail = &namelist; + namelist = NULL; + nametail = NULL; if (same_order_option) { @@ -975,7 +977,7 @@ collect_and_sort_names (void) prev_name = name; num_names++; } - nametail = &prev_name; + nametail = prev_name; hash_free (nametab); namelist = merge_sort (namelist, num_names, compare_names_found); @@ -1074,24 +1076,6 @@ excluded_name (char const *name) { return excluded_file_name (excluded, name + FILE_SYSTEM_PREFIX_LEN (name)); } - -/* Names to avoid dumping. */ -static Hash_table *avoided_name_table; - -/* Remember to not archive NAME. */ -void -add_avoided_name (char const *name) -{ - hash_string_insert (&avoided_name_table, name); -} - -/* Should NAME be avoided when archiving? */ -bool -is_avoided_name (char const *name) -{ - return hash_string_lookup (avoided_name_table, name); -} - static Hash_table *individual_file_table; diff --git a/src/update.c b/src/update.c index ade4283..0f5dadf 100644 --- a/src/update.c +++ b/src/update.c @@ -137,13 +137,35 @@ update_archive (void) chdir_do (name->change_dir); if (deref_stat (dereference_option, - current_stat_info.file_name, &s) == 0 - && (tar_timespec_cmp (get_stat_mtime (&s), - current_stat_info.mtime) - <= 0)) - add_avoided_name (current_stat_info.file_name); + current_stat_info.file_name, &s) == 0) + { + if (S_ISDIR (s.st_mode)) + { + char *p, *dirp; + dirp = savedir (name->name); + if (!dirp) + savedir_error (name->name); + else + { + namebuf_t nbuf = namebuf_create (name->name); + + for (p = dirp; *p; p += strlen (p) + 1) + addname (namebuf_name (nbuf, p), + 0, false, NULL); + + namebuf_free (nbuf); + free (dirp); + + remname (name); + } + } + else if (tar_timespec_cmp (get_stat_mtime (&s), + current_stat_info.mtime) + <= 0) + remname (name); + } } - + skip_member (); break; } diff --git a/tests/Makefile.am b/tests/Makefile.am index 787b9d0..006a694 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -130,6 +130,8 @@ TESTSUITE_AT = \ spmvp10.at\ truncate.at\ update.at\ + update01.at\ + update02.at\ volsize.at\ volume.at\ verbose.at\ diff --git a/tests/testsuite.at b/tests/testsuite.at index 17bec7e..b67d016 100644 --- a/tests/testsuite.at +++ b/tests/testsuite.at @@ -207,6 +207,8 @@ m4_include([spmvp01.at]) m4_include([spmvp10.at]) m4_include([update.at]) +m4_include([update01.at]) +m4_include([update02.at]) m4_include([volume.at]) m4_include([volsize.at]) diff --git a/tests/update.at b/tests/update.at index cec70f0..6e6e5aa 100644 --- a/tests/update.at +++ b/tests/update.at @@ -23,9 +23,10 @@ # References: <42AB0D28.6030706@mein-horde.de> # by Martin Lohmeier # on Sat, 11 Jun 2005 18:11:20 +0200 +# http://lists.gnu.org/archive/html/bug-tar/2005-06/msg00024.html AT_SETUP([update unchanged directories]) -AT_KEYWORDS([update]) +AT_KEYWORDS([update update00]) AT_TAR_CHECK([ AT_SORT_PREREQ diff --git a/tests/update01.at b/tests/update01.at new file mode 100644 index 0000000..161b06f --- /dev/null +++ b/tests/update01.at @@ -0,0 +1,58 @@ +# Process this file with autom4te to create testsuite. -*- Autotest -*- + +# Test suite for GNU tar. +# Copyright (C) 2009 Free Software Foundation, Inc. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +# 02110-1301, USA. + +# Description: If dir is a directory and arc is a tar archive which +# contains that directory, and dir contains some modifications added +# after adding it to the archive, then `tar -u dir' would add dir/ to +# the archive. +# Last-Affected-Version: 1.22.90 +# References: <4AD4E703.80500@teclabs.eu> +# http://lists.gnu.org/archive/html/bug-tar/2009-10/msg00017.html + +AT_SETUP([update directories]) +AT_KEYWORDS([update update01]) + +AT_TAR_CHECK([ +AT_SORT_PREREQ +mkdir a +genfile --file a/b + +tar cf arc a + +echo "separator" + +sleep 2 +genfile --file a/c + +tar ufv arc a +echo "separator" +tar tf arc | sort || exit 1 +], +[0], +[separator +a/c +separator +a/ +a/b +a/c +]) + +AT_CLEANUP + diff --git a/tests/update02.at b/tests/update02.at new file mode 100644 index 0000000..40f90b9 --- /dev/null +++ b/tests/update02.at @@ -0,0 +1,55 @@ +# Process this file with autom4te to create testsuite. -*- Autotest -*- + +# Test suite for GNU tar. +# Copyright (C) 2009 Free Software Foundation, Inc. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +# 02110-1301, USA. + +# Description: See update01.at +# Last-Affected-Version: 1.22.90 +# References: <4AD4E703.80500@teclabs.eu> +# http://lists.gnu.org/archive/html/bug-tar/2009-10/msg00017.html + +AT_SETUP([update changed files]) +AT_KEYWORDS([update update02]) + +AT_TAR_CHECK([ +AT_SORT_PREREQ +mkdir a +genfile --file a/b + +tar cf arc a + +echo "separator" + +sleep 2 +touch a/b + +tar ufv arc a +echo "separator" +tar tf arc | sort || exit 1 +], +[0], +[separator +a/b +separator +a/ +a/b +a/b +]) + +AT_CLEANUP + -- 2.44.0