From 26538c9bfc5fd726d625bef5fa3f08212d50173a Mon Sep 17 00:00:00 2001 From: Sergey Poznyakoff Date: Sun, 4 Aug 2013 14:26:35 +0300 Subject: [PATCH] Reduce memory consuption when handling the -T option. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The commit cdb27293 made the -T option more flexible, but incurred a very considerable memory overhead by storing all file names in the argument array. In case of very big file lists this caused tar to run out of memory. This was reported by Christian Wetzel on March 14, 2013 (http://lists.gnu.org/archive/html/bug-tar/2013-03/msg00018.html). On the other hand, Michal Žeidl discovered that tar misfunctioned when given empty file lists or lists with the trailing newline misssing in the last entry. This was reported by Pavel Raiskup on July 23 (http://lists.gnu.org/archive/html/bug-tar/2013-07/msg00009.html and msg00010.html). This change fixes both issues. * src/common.h (name_add_file,request_stdin): New prototype. (more_options): New prototype. * src/names.c (NELT_FILE): New entry type. (name_elt) : New union member. (name_add_file): New function. (read_name_from_file): New function, a rewrite of the same function from tar.c (read_next_name,copy_name): New static functions. (name_next_elt): Handle NELT_FILE entries. * src/tar.c (request_stdin): Make extern. (read_name_from_file,add_file_id) (update_argv): Removed. (parse_opt): Change handling of the -T option. (more_options): New function. * tests/T-null.at: Rewrite test. * tests/T-zfile.at: New file. * tests/T-nonl.at: New file. * tests/Makefile.am: Add new testcases. * tests/testsuite.at: Likewise. * THANKS: Update. --- THANKS | 2 + src/common.h | 4 + src/names.c | 249 +++++++++++++++++++++++++++++++++++++++------ src/tar.c | 191 +++------------------------------- tests/Makefile.am | 2 + tests/T-empty.at | 11 +- tests/T-nonl.at | 62 +++++++++++ tests/T-null.at | 24 ++--- tests/T-zfile.at | 47 +++++++++ tests/testsuite.at | 2 + 10 files changed, 367 insertions(+), 227 deletions(-) create mode 100644 tests/T-nonl.at create mode 100644 tests/T-zfile.at diff --git a/THANKS b/THANKS index 6459157..c30df33 100644 --- a/THANKS +++ b/THANKS @@ -91,6 +91,7 @@ Christian Kirsch ck@held.mind.de Christian Laubscher christian.laubscher@tiscalinet.ch Christian T. Dum ctd@mpe-garching.mpg.de Christian von Roques roques@pond.sub.org +Christian Wetzel wetzel@phoenix-pacs.de Christoph Litauer litauer@mailhost.uni-koblenz.de Christophe Colle colle@krtkg1.rug.ac.be Christophe Kalt Christophe.Kalt@kbcfp.com @@ -350,6 +351,7 @@ Michael P Urban urban@cobra.jpl.nasa.gov Michael Schmidt michael@muc.de Michael Schwingen m.schwingen@stochastik.rwth-aachen.de Michael Smolsky fnsiguc@astro.weizmann.ac.il +Michal Žejdl zejdl@suas.cz Mike Muuss mike@brl.mil Mike Nolan nolan@lpl.arizona.edu Mike Rogers mike@demon.net diff --git a/src/common.h b/src/common.h index 9ac22b8..21df8c1 100644 --- a/src/common.h +++ b/src/common.h @@ -704,6 +704,7 @@ int uname_to_uid (char const *uname, uid_t *puid); void name_init (void); void name_add_name (const char *name, int matching_flags); void name_add_dir (const char *name); +void name_add_file (const char *name, int term); void name_term (void); const char *name_next (int change_dirs); void name_gather (void); @@ -748,6 +749,9 @@ const char *archive_format_string (enum archive_format fmt); const char *subcommand_string (enum subcommand c); void set_exit_status (int val); +void request_stdin (const char *option); +void more_options (int argc, char **argv); + /* Module update.c. */ extern char *output_start; diff --git a/src/names.c b/src/names.c index 32bd8e6..3704a91 100644 --- a/src/names.c +++ b/src/names.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "common.h" @@ -211,7 +212,8 @@ static struct name *nametail; /* end of name list */ #define NELT_NAME 0 /* File name */ #define NELT_CHDIR 1 /* Change directory request */ #define NELT_FMASK 2 /* Change fnmatch options request */ - +#define NELT_FILE 3 /* Read file names from that file */ + struct name_elt /* A name_array element. */ { char type; /* Element type, see NELT_* constants above */ @@ -219,6 +221,12 @@ struct name_elt /* A name_array element. */ { const char *name; /* File or directory name */ int matching_flags;/* fnmatch options if type == NELT_FMASK */ + struct + { + const char *name;/* File name */ + int term; /* File name terminator in the list */ + FILE *fp; + } file; } v; }; @@ -274,6 +282,16 @@ name_add_dir (const char *name) ep->v.name = name; } +void +name_add_file (const char *name, int term) +{ + struct name_elt *ep; + check_name_alloc (); + ep = &name_array[entries++]; + ep->type = NELT_FILE; + ep->v.file.name = name; + ep->v.file.term = term; +} /* Names from external name file. */ @@ -295,7 +313,185 @@ name_term (void) free (name_buffer); free (name_array); } + +/* Prevent recursive inclusion of the same file */ +struct file_id_list +{ + struct file_id_list *next; + ino_t ino; + dev_t dev; +}; + +static struct file_id_list *file_id_list; + +static void +add_file_id (const char *filename) +{ + struct file_id_list *p; + struct stat st; + + if (stat (filename, &st)) + stat_fatal (filename); + for (p = file_id_list; p; p = p->next) + if (p->ino == st.st_ino && p->dev == st.st_dev) + { + FATAL_ERROR ((0, 0, _("%s: file list already read"), + quotearg_colon (filename))); + } + p = xmalloc (sizeof *p); + p->next = file_id_list; + p->ino = st.st_ino; + p->dev = st.st_dev; + file_id_list = p; +} + +enum read_file_list_state /* Result of reading file name from the list file */ + { + file_list_success, /* OK, name read successfully */ + file_list_end, /* End of list file */ + file_list_zero, /* Zero separator encountered where it should not */ + file_list_skip /* Empty (zero-length) entry encountered, skip it */ + }; + +/* Read from FP a sequence of characters up to TERM and put them + into STK. + */ +static enum read_file_list_state +read_name_from_file (struct name_elt *ent) +{ + int c; + size_t counter = 0; + FILE *fp = ent->v.file.fp; + int term = ent->v.file.term; + size_t count; + + for (c = getc (fp); c != EOF && c != term; c = getc (fp)) + { + if (count == name_buffer_length) + name_buffer = x2realloc (name_buffer, &name_buffer_length); + name_buffer[counter++] = c; + if (c == 0) + { + /* We have read a zero separator. The file possibly is + zero-separated */ + return file_list_zero; + } + } + + if (counter == 0 && c != EOF) + return file_list_skip; + + if (count == name_buffer_length) + name_buffer = x2realloc (name_buffer, &name_buffer_length); + name_buffer[counter] = 0; + + return (counter == 0 && c == EOF) ? file_list_end : file_list_success; +} +static int +handle_option (const char *str) +{ + struct wordsplit ws; + int i; + + while (*str && isspace (*str)) + ; + if (*str != '-') + return 1; + + ws.ws_offs = 1; + if (wordsplit (str, &ws, WRDSF_DEFFLAGS|WRDSF_DOOFFS)) + FATAL_ERROR ((0, 0, _("cannot split string '%s': %s"), + str, wordsplit_strerror (&ws))); + ws.ws_wordv[0] = "tar"; + more_options (ws.ws_wordc+ws.ws_offs, ws.ws_wordv); + for (i = 0; i < ws.ws_wordc+ws.ws_offs; i++) + ws.ws_wordv[i] = NULL; + + wordsplit_free (&ws); + return 0; +} + +static int +read_next_name (struct name_elt *ent, struct name_elt *ret) +{ + enum read_file_list_state read_state; + + if (!ent->v.file.fp) + { + if (!strcmp (ent->v.file.name, "-")) + { + request_stdin ("-T"); + ent->v.file.fp = stdin; + } + else + { + add_file_id (ent->v.file.name); + if ((ent->v.file.fp = fopen (ent->v.file.name, "r")) == NULL) + open_fatal (ent->v.file.name); + } + } + + while (1) + { + switch (read_name_from_file (ent)) + { + case file_list_skip: + continue; + + case file_list_zero: + WARNOPT (WARN_FILENAME_WITH_NULS, + (0, 0, N_("%s: file name read contains nul character"), + quotearg_colon (ent->v.file.name))); + ent->v.file.term = 0; + /* fall through */ + case file_list_success: + if (handle_option (name_buffer) == 0) + continue; + ret->type = NELT_NAME; + ret->v.name = name_buffer; + return 0; + + case file_list_end: + if (strcmp (ent->v.file.name, "-")) + fclose (ent->v.file.fp); + ent->v.file.fp = NULL; + return 1; + } + } +} + +static void +copy_name (struct name_elt *ep) +{ + const char *source; + size_t source_len; + char *cursor; + + source = ep->v.name; + source_len = strlen (source); + if (name_buffer_length < source_len) + { + do + { + name_buffer_length *= 2; + if (! name_buffer_length) + xalloc_die (); + } + while (name_buffer_length < source_len); + + free (name_buffer); + name_buffer = xmalloc(name_buffer_length + 2); + } + strcpy (name_buffer, source); + + /* Zap trailing slashes. */ + cursor = name_buffer + strlen (name_buffer) - 1; + while (cursor > name_buffer && ISSLASH (*cursor)) + *cursor-- = '\0'; +} + + static int matching_flags; /* exclude_fnmatch options */ /* Get the next NELT_NAME element from name_array. Result is in @@ -310,51 +506,40 @@ static struct name_elt * name_next_elt (int change_dirs) { static struct name_elt entry; - const char *source; - char *cursor; while (scanned != entries) { struct name_elt *ep; - size_t source_len; - ep = &name_array[scanned++]; + ep = &name_array[scanned]; if (ep->type == NELT_FMASK) { matching_flags = ep->v.matching_flags; + ++scanned; continue; } - source = ep->v.name; - source_len = strlen (source); - if (name_buffer_length < source_len) + switch (ep->type) { - do + case NELT_FILE: + if (read_next_name (ep, &entry) == 0) + return &entry; + ++scanned; + continue; + + case NELT_CHDIR: + if (change_dirs) { - name_buffer_length *= 2; - if (! name_buffer_length) - xalloc_die (); + ++scanned; + copy_name (ep); + if (chdir (name_buffer) < 0) + chdir_fatal (name_buffer); + break; } - while (name_buffer_length < source_len); - - free (name_buffer); - name_buffer = xmalloc (name_buffer_length + 2); - } - strcpy (name_buffer, source); - - /* Zap trailing slashes. */ - - cursor = name_buffer + strlen (name_buffer) - 1; - while (cursor > name_buffer && ISSLASH (*cursor)) - *cursor-- = '\0'; - - if (change_dirs && ep->type == NELT_CHDIR) - { - if (chdir (name_buffer) < 0) - chdir_fatal (name_buffer); - } - else - { + /* fall trhough */ + case NELT_NAME: + ++scanned; + copy_name (ep); if (unquote_option) unquote_string (name_buffer); entry.type = ep->type; diff --git a/src/tar.c b/src/tar.c index 9111433..c3c2459 100644 --- a/src/tar.c +++ b/src/tar.c @@ -79,7 +79,7 @@ static size_t allocated_archive_names; static const char *stdin_used_by; /* Doesn't return if stdin already requested. */ -static void +void request_stdin (const char *option) { if (stdin_used_by) @@ -1106,84 +1106,9 @@ report_textual_dates (struct tar_args *args) } } - - -/* Either NL or NUL, as decided by the --null option. */ -static char filename_terminator; - -enum read_file_list_state /* Result of reading file name from the list file */ - { - file_list_success, /* OK, name read successfully */ - file_list_end, /* End of list file */ - file_list_zero, /* Zero separator encountered where it should not */ - file_list_skip /* Empty (zero-length) entry encountered, skip it */ - }; - -/* Read from FP a sequence of characters up to TERM and put them - into STK. - */ -static enum read_file_list_state -read_name_from_file (FILE *fp, struct obstack *stk, int term) -{ - int c; - size_t counter = 0; - - for (c = getc (fp); c != EOF && c != term; c = getc (fp)) - { - if (c == 0) - { - /* We have read a zero separator. The file possibly is - zero-separated */ - return file_list_zero; - } - obstack_1grow (stk, c); - counter++; - } - - if (counter == 0 && c != EOF) - return file_list_skip; - - obstack_1grow (stk, 0); - - return (counter == 0 && c == EOF) ? file_list_end : file_list_success; -} - static bool files_from_option; /* When set, tar will not refuse to create empty archives */ -static struct obstack argv_stk; /* Storage for additional command line options - read using -T option */ - -/* Prevent recursive inclusion of the same file */ -struct file_id_list -{ - struct file_id_list *next; - ino_t ino; - dev_t dev; -}; - -static struct file_id_list *file_id_list; - -static void -add_file_id (const char *filename) -{ - struct file_id_list *p; - struct stat st; - - if (stat (filename, &st)) - stat_fatal (filename); - for (p = file_id_list; p; p = p->next) - if (p->ino == st.st_ino && p->dev == st.st_dev) - { - FATAL_ERROR ((0, 0, _("%s: file list already read"), - quotearg_colon (filename))); - } - p = xmalloc (sizeof *p); - p->next = file_id_list; - p->ino = st.st_ino; - p->dev = st.st_dev; - file_id_list = p; -} /* Default density numbers for [0-9][lmh] device specifications */ @@ -1201,101 +1126,6 @@ add_file_id (const char *filename) # endif #endif -static void -update_argv (const char *filename, struct argp_state *state) -{ - FILE *fp; - size_t count = 0, i; - char *start, *p; - char **new_argv; - size_t new_argc; - bool is_stdin = false; - enum read_file_list_state read_state; - int term = filename_terminator; - - if (!strcmp (filename, "-")) - { - is_stdin = true; - request_stdin ("-T"); - fp = stdin; - } - else - { - add_file_id (filename); - if ((fp = fopen (filename, "r")) == NULL) - open_fatal (filename); - } - - while ((read_state = read_name_from_file (fp, &argv_stk, term)) - != file_list_end) - { - switch (read_state) - { - case file_list_success: - count++; - break; - - case file_list_end: /* won't happen, just to pacify gcc */ - break; - - case file_list_zero: - { - size_t size; - - WARNOPT (WARN_FILENAME_WITH_NULS, - (0, 0, N_("%s: file name read contains nul character"), - quotearg_colon (filename))); - - /* Prepare new stack contents */ - size = obstack_object_size (&argv_stk); - p = obstack_finish (&argv_stk); - for (; size > 0; size--, p++) - if (*p) - obstack_1grow (&argv_stk, *p); - else - obstack_1grow (&argv_stk, '\n'); - obstack_1grow (&argv_stk, 0); - count = 1; - /* Read rest of files using new filename terminator */ - term = 0; - break; - } - - case file_list_skip: - break; - } - } - - if (!is_stdin) - fclose (fp); - - if (count == 0) - return; - - start = obstack_finish (&argv_stk); - - if (term == 0) - for (p = start; *p; p += strlen (p) + 1) - if (p[0] == '-') - count++; - - new_argc = state->argc + count; - new_argv = xmalloc (sizeof (state->argv[0]) * (new_argc + 1)); - memcpy (new_argv, state->argv, sizeof (state->argv[0]) * (state->argc + 1)); - state->argv = new_argv; - memmove (&state->argv[state->next + count], &state->argv[state->next], - (state->argc - state->next + 1) * sizeof (state->argv[0])); - - state->argc = new_argc; - - for (i = state->next, p = start; *p; p += strlen (p) + 1, i++) - { - if (term == 0 && p[0] == '-') - state->argv[i++] = (char *) "--add-file"; - state->argv[i] = p; - } -} - static char * tar_help_filter (int key, const char *text, void *input) @@ -1462,6 +1292,9 @@ parse_owner_group (char *arg, uintmax_t field_max, char const **name_option) #define TAR_SIZE_SUFFIXES "bBcGgkKMmPTtw" +/* Either NL or NUL, as decided by the --null option. */ +static char filename_terminator; + static error_t parse_opt (int key, char *arg, struct argp_state *state) { @@ -1745,7 +1578,7 @@ parse_opt (int key, char *arg, struct argp_state *state) break; case 'T': - update_argv (arg, state); + name_add_file (arg, filename_terminator); /* Indicate we've been given -T option. This is for backward compatibility only, so that `tar cfT archive /dev/null will succeed */ @@ -2371,11 +2204,12 @@ static int subcommand_class[] = { /* Return t if the subcommand_option is in class(es) f */ #define IS_SUBCOMMAND_CLASS(f) (subcommand_class[subcommand_option] & (f)) +static struct tar_args args; + static void decode_options (int argc, char **argv) { int idx; - struct tar_args args; argp_version_setup ("tar", tar_authors); @@ -2476,7 +2310,6 @@ decode_options (int argc, char **argv) if (argp_parse (&argp, argc, argv, ARGP_IN_ORDER, &idx, &args)) exit (TAREXIT_FAILURE); - /* Special handling for 'o' option: GNU tar used to say "output old format". @@ -2742,6 +2575,14 @@ decode_options (int argc, char **argv) report_textual_dates (&args); } +void +more_options (int argc, char **argv) +{ + int idx; + if (argp_parse (&argp, argc, argv, ARGP_IN_ORDER, + &idx, &args)) + exit (TAREXIT_FAILURE); +} /* Tar proper. */ @@ -2771,8 +2612,6 @@ main (int argc, char **argv) xmalloc (sizeof (const char *) * allocated_archive_names); archive_names = 0; - obstack_init (&argv_stk); - /* System V fork+wait does not work if SIGCHLD is ignored. */ signal (SIGCHLD, SIG_DFL); diff --git a/tests/Makefile.am b/tests/Makefile.am index 8b7acb1..e13b079 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -44,6 +44,8 @@ $(srcdir)/package.m4: $(top_srcdir)/configure.ac TESTSUITE_AT = \ T-empty.at\ T-null.at\ + T-zfile.at\ + T-nonl.at\ testsuite.at\ append.at\ append01.at\ diff --git a/tests/T-empty.at b/tests/T-empty.at index d3b28df..5261445 100644 --- a/tests/T-empty.at +++ b/tests/T-empty.at @@ -23,8 +23,8 @@ # Reported by: Karl Berry # References: <200610301353.k9UDr1O30680@f7.net> -AT_SETUP([files-from: empty entries]) -AT_KEYWORDS([files-from empty]) +AT_SETUP([empty entries]) +AT_KEYWORDS([files-from empty-line]) AT_DATA([file-list], [jeden @@ -34,17 +34,16 @@ trzy ]) AT_TAR_CHECK([ -AT_SORT_PREREQ genfile --file jeden genfile --file dwa genfile --file trzy -tar cfvT archive ../file-list | sort +tar cfvT archive ../file-list ], [0], -[dwa -jeden +[jeden +dwa trzy ], [],[],[],[ustar]) # Testing one format is enough diff --git a/tests/T-nonl.at b/tests/T-nonl.at new file mode 100644 index 0000000..a694155 --- /dev/null +++ b/tests/T-nonl.at @@ -0,0 +1,62 @@ +# Process this file with autom4te to create testsuite. -*- Autotest -*- +# +# Test suite for GNU tar. +# Copyright 2013 Free Software Foundation, Inc. +# +# This file is part of GNU tar. +# +# GNU tar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# GNU tar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# Tar malfunctioned when given a file list with the last line not ending +# in a newline. +# +# Reported by: Michal Žejdl +# References: + +AT_SETUP([entries with missing newlines]) +AT_KEYWORDS([files-from nonewline nonl T-nonl]) + +AT_TAR_CHECK([ +genfile --length=0 --file empty +AS_ECHO_N(c) > 1.nonl +echo d > 2.nonl +AS_ECHO_N(e) >> 2.nonl +touch a b c d e +AT_DATA([filelist],[a +b +]) + +tar cf archive -T empty -T 1.nonl -T 2.nonl -T filelist +tar tf archive +echo == +tar cf archive -T 2.nonl -T empty -T filelist -T 1.nonl +tar tf archive +], +[0], +[c +d +e +a +b +== +d +e +a +b +c +], +[],[],[],[ustar]) + +AT_CLEANUP + diff --git a/tests/T-null.at b/tests/T-null.at index 8f42e5b..5db5cf4 100644 --- a/tests/T-null.at +++ b/tests/T-null.at @@ -18,30 +18,28 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -AT_SETUP([files-from: 0-separated file without -0]) +AT_SETUP([0-separated file without -0]) AT_KEYWORDS([files-from null T-null]) -AT_DATA([expout],[jeden\ndwa -trzy -]) - AT_TAR_CHECK([ AT_SORT_PREREQ -echo dwa > temp +echo jeden > temp +echo dwa >> temp echo trzy >> temp -cat temp | tr '\n' '\0' > temp1 -echo jeden > file-list -cat temp1 >> file-list +cat temp | tr '\n' '\0' > file-list -genfile -f "jeden -dwa" || AT_SKIP_TEST +genfile -f jeden +genfile -f dwa genfile -f trzy -tar cfTv archive file-list | sort +tar cfTv archive file-list ], [0], -[expout], +[jeden +dwa +trzy +], [tar: file-list: file name read contains nul character ],[],[],[ustar]) # Testing one format is enough diff --git a/tests/T-zfile.at b/tests/T-zfile.at new file mode 100644 index 0000000..f4adfe8 --- /dev/null +++ b/tests/T-zfile.at @@ -0,0 +1,47 @@ +# Process this file with autom4te to create testsuite. -*- Autotest -*- +# +# Test suite for GNU tar. +# Copyright 2013 Free Software Foundation, Inc. +# +# This file is part of GNU tar. +# +# GNU tar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# GNU tar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +AT_SETUP([empty file]) +AT_KEYWORDS([files-from empty-file]) + +AT_TAR_CHECK([ +genfile --length=0 --file empty +genfile --file a +genfile --file b +AT_DATA([valid],[a +b +]) + +tar cf archive -T empty -T valid +tar tf archive +echo "==" +tar cf archive -T valid -T empty +tar tf archive +], +[0], +[a +b +== +a +b +], +[],[],[],[ustar]) # Testing one format is enough + +AT_CLEANUP diff --git a/tests/testsuite.at b/tests/testsuite.at index d3fabe5..f1b6cc3 100644 --- a/tests/testsuite.at +++ b/tests/testsuite.at @@ -199,6 +199,8 @@ m4_include([opcomp06.at]) AT_BANNER([The -T option]) m4_include([T-empty.at]) m4_include([T-null.at]) +m4_include([T-zfile.at]) +m4_include([T-nonl.at]) AT_BANNER([Various options]) m4_include([indexfile.at]) -- 2.43.0