X-Git-Url: https://git.dogcows.com/gitweb?p=chaz%2Ftar;a=blobdiff_plain;f=src%2Fmisc.c;h=e92c8aa830e36fe2fca6cd25aeaf9f107d7b6bac;hp=cdb2608aa45511cee77721445d4bca2a6e4fa880;hb=45ccda119355a1087450039a250359c1d0de0d08;hpb=cac45fffc58cc10056c528582ee4b78b8ee175e0 diff --git a/src/misc.c b/src/misc.c index cdb2608..e92c8aa 100644 --- a/src/misc.c +++ b/src/misc.c @@ -1,7 +1,7 @@ /* Miscellaneous functions, not really specific to GNU tar. - Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1999, 2000, 2001, - 2003, 2004, 2005, 2006, 2007, 2009 Free Software Foundation, Inc. + Copyright 1988, 1992, 1994-1997, 1999-2001, 2003-2007, 2009-2010, + 2012-2014 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the @@ -14,26 +14,23 @@ Public License for more details. You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + with this program. If not, see . */ +#define COMMON_INLINE _GL_EXTERN_INLINE #include #include #include "common.h" #include -#include #include #include #include -#include -#if HAVE_STROPTS_H -# include -#endif -#if HAVE_SYS_FILIO_H -# include +#ifndef DOUBLE_SLASH_IS_DISTINCT_ROOT +# define DOUBLE_SLASH_IS_DISTINCT_ROOT 0 #endif +static const char *tar_getcdpath (int); + /* Handling strings. */ @@ -42,11 +39,13 @@ void assign_string (char **string, const char *value) { - if (*string) - free (*string); + free (*string); *string = value ? xstrdup (value) : 0; } +#if 0 +/* This function is currently unused; perhaps it should be removed? */ + /* Allocate a copy of the string quoted as in C, and returns that. If the string does not have to be quoted, it returns a null pointer. The allocated copy should normally be freed with free() after the @@ -59,7 +58,7 @@ assign_string (char **string, const char *value) when reading directory files. This means that we can't use quotearg, as quotearg is locale-dependent and is meant for human consumption. */ -char * +static char * quote_copy_string (const char *string) { const char *source = string; @@ -100,6 +99,7 @@ quote_copy_string (const char *string) } return 0; } +#endif /* Takes a quoted C string (like those produced by quote_copy_string) and turns it back into the un-quoted original. This is done in @@ -107,7 +107,7 @@ quote_copy_string (const char *string) completes the unquoting anyway. This is used for reading the saved directory file in incremental - dumps. It is used for decoding old `N' records (demangling names). + dumps. It is used for decoding old 'N' records (demangling names). But also, it is used for decoding file arguments, would they come from the shell or a -T file, and for decoding the --exclude argument. */ @@ -230,10 +230,84 @@ zap_slashes (char *name) return name; } +/* Normalize FILE_NAME by removing redundant slashes and "." + components, including redundant trailing slashes. + Leave ".." alone, as it may be significant in the presence + of symlinks and on platforms where "/.." != "/". + + Destructive version: modifies its argument. */ +void +normalize_filename_x (char *file_name) +{ + char *name = file_name + FILE_SYSTEM_PREFIX_LEN (file_name); + char *p; + char const *q; + char c; + + /* Don't squeeze leading "//" to "/", on hosts where they're distinct. */ + name += (DOUBLE_SLASH_IS_DISTINCT_ROOT + && ISSLASH (*name) && ISSLASH (name[1]) && ! ISSLASH (name[2])); + + /* Omit redundant leading "." components. */ + for (q = p = name; (*p = *q) == '.' && ISSLASH (q[1]); p += !*q) + for (q += 2; ISSLASH (*q); q++) + continue; + + /* Copy components from Q to P, omitting redundant slashes and + internal "." components. */ + while ((*p++ = c = *q++) != '\0') + if (ISSLASH (c)) + while (ISSLASH (q[*q == '.'])) + q += (*q == '.') + 1; + + /* Omit redundant trailing "." component and slash. */ + if (2 < p - name) + { + p -= p[-2] == '.' && ISSLASH (p[-3]); + p -= 2 < p - name && ISSLASH (p[-2]); + p[-1] = '\0'; + } +} + +/* Normalize NAME by removing redundant slashes and "." components, + including redundant trailing slashes. + + Return a normalized newly-allocated copy. */ + char * -normalize_filename (const char *name) +normalize_filename (int cdidx, const char *name) { - return zap_slashes (canonicalize_filename_mode (name, CAN_MISSING)); + char *copy = NULL; + + if (IS_RELATIVE_FILE_NAME (name)) + { + /* Set COPY to the absolute path for this name. + + FIXME: There should be no need to get the absolute file name. + tar_getcdpath does not return a true "canonical" path, so + this following approach may lead to situations where the same + file or directory is processed twice under different absolute + paths without that duplication being detected. Perhaps we + should use dev+ino pairs instead of names? */ + const char *cdpath = tar_getcdpath (cdidx); + size_t copylen; + bool need_separator; + + if (!cdpath) + call_arg_fatal ("getcwd", "."); + copylen = strlen (cdpath); + need_separator = ! (DOUBLE_SLASH_IS_DISTINCT_ROOT + && copylen == 2 && ISSLASH (cdpath[1])); + copy = xmalloc (copylen + need_separator + strlen (name) + 1); + strcpy (copy, cdpath); + copy[copylen] = DIRECTORY_SEPARATOR; + strcpy (copy + copylen + need_separator, name); + } + + if (!copy) + copy = xstrdup (name); + normalize_filename_x (copy); + return copy; } @@ -258,6 +332,76 @@ replace_prefix (char **pname, const char *samp, size_t slen, /* Handling numbers. */ +/* Convert VALUE, which is converted from a system integer type whose + minimum value is MINVAL and maximum MINVAL, to an decimal + integer string. Use the storage in BUF and return a pointer to the + converted string. If VALUE is converted from a negative integer in + the range MINVAL .. -1, represent it with a string representation + of the negative integer, using leading '-'. */ +#if ! (INTMAX_MAX <= UINTMAX_MAX / 2) +# error "sysinttostr: uintmax_t cannot represent all intmax_t values" +#endif +char * +sysinttostr (uintmax_t value, intmax_t minval, uintmax_t maxval, + char buf[SYSINT_BUFSIZE]) +{ + if (value <= maxval) + return umaxtostr (value, buf); + else + { + intmax_t i = value - minval; + return imaxtostr (i + minval, buf); + } +} + +/* Convert a prefix of the string ARG to a system integer type whose + minimum value is MINVAL and maximum MAXVAL. If MINVAL is negative, + negative integers MINVAL .. -1 are assumed to be represented using + leading '-' in the usual way. If the represented value exceeds + INTMAX_MAX, return a negative integer V such that (uintmax_t) V + yields the represented value. If ARGLIM is nonnull, store into + *ARGLIM a pointer to the first character after the prefix. + + This is the inverse of sysinttostr. + + On a normal return, set errno = 0. + On conversion error, return 0 and set errno = EINVAL. + On overflow, return an extreme value and set errno = ERANGE. */ +#if ! (INTMAX_MAX <= UINTMAX_MAX) +# error "strtosysint: nonnegative intmax_t does not fit in uintmax_t" +#endif +intmax_t +strtosysint (char const *arg, char **arglim, intmax_t minval, uintmax_t maxval) +{ + errno = 0; + if (maxval <= INTMAX_MAX) + { + if (ISDIGIT (arg[*arg == '-'])) + { + intmax_t i = strtoimax (arg, arglim, 10); + intmax_t imaxval = maxval; + if (minval <= i && i <= imaxval) + return i; + errno = ERANGE; + return i < minval ? minval : maxval; + } + } + else + { + if (ISDIGIT (*arg)) + { + uintmax_t i = strtoumax (arg, arglim, 10); + if (i <= maxval) + return represent_uintmax (i); + errno = ERANGE; + return maxval; + } + } + + errno = EINVAL; + return 0; +} + /* Output fraction and trailing digits appropriate for a nanoseconds count equal to NS, but don't output unnecessary '.' or trailing zeros. */ @@ -298,6 +442,10 @@ code_timespec (struct timespec t, char sbuf[TIMESPEC_STRSIZE_BOUND]) char *np; bool negative = s < 0; + /* ignore invalid values of ns */ + if (BILLION <= ns || ns < 0) + ns = 0; + if (negative && ns != 0) { s++; @@ -310,6 +458,84 @@ code_timespec (struct timespec t, char sbuf[TIMESPEC_STRSIZE_BOUND]) code_ns_fraction (ns, sbuf + UINTMAX_STRSIZE_BOUND); return np; } + +struct timespec +decode_timespec (char const *arg, char **arg_lim, bool parse_fraction) +{ + time_t s = TYPE_MINIMUM (time_t); + int ns = -1; + char const *p = arg; + bool negative = *arg == '-'; + struct timespec r; + + if (! ISDIGIT (arg[negative])) + errno = EINVAL; + else + { + errno = 0; + + if (negative) + { + intmax_t i = strtoimax (arg, arg_lim, 10); + if (TYPE_SIGNED (time_t) ? TYPE_MINIMUM (time_t) <= i : 0 <= i) + s = i; + else + errno = ERANGE; + } + else + { + uintmax_t i = strtoumax (arg, arg_lim, 10); + if (i <= TYPE_MAXIMUM (time_t)) + s = i; + else + errno = ERANGE; + } + + p = *arg_lim; + ns = 0; + + if (parse_fraction && *p == '.') + { + int digits = 0; + bool trailing_nonzero = false; + + while (ISDIGIT (*++p)) + if (digits < LOG10_BILLION) + digits++, ns = 10 * ns + (*p - '0'); + else + trailing_nonzero |= *p != '0'; + + while (digits < LOG10_BILLION) + digits++, ns *= 10; + + if (negative) + { + /* Convert "-1.10000000000001" to s == -2, ns == 89999999. + I.e., truncate time stamps towards minus infinity while + converting them to internal form. */ + ns += trailing_nonzero; + if (ns != 0) + { + if (s == TYPE_MINIMUM (time_t)) + ns = -1; + else + { + s--; + ns = BILLION - ns; + } + } + } + } + + if (errno == ERANGE) + ns = -1; + } + + *arg_lim = (char *) p; + r.tv_sec = s; + r.tv_nsec = ns; + return r; +} /* File handling. */ @@ -318,7 +544,7 @@ static char *before_backup_name; static char *after_backup_name; /* Return 1 if FILE_NAME is obviously "." or "/". */ -static bool +bool must_be_dot_or_slash (char const *file_name) { file_name += FILE_SYSTEM_PREFIX_LEN (file_name); @@ -359,7 +585,7 @@ safer_rmdir (const char *file_name) return -1; } - return rmdir (file_name); + return unlinkat (chdir_fd, file_name, AT_REMOVEDIR); } /* Remove FILE_NAME, returning 1 on success. If FILE_NAME is a directory, @@ -379,7 +605,7 @@ remove_any_file (const char *file_name, enum remove_option option) if (try_unlink_first) { - if (unlink (file_name) == 0) + if (unlinkat (chdir_fd, file_name, 0) == 0) return 1; /* POSIX 1003.1-2001 requires EPERM when attempting to unlink a @@ -395,7 +621,7 @@ remove_any_file (const char *file_name, enum remove_option option) switch (errno) { case ENOTDIR: - return !try_unlink_first && unlink (file_name) == 0; + return !try_unlink_first && unlinkat (chdir_fd, file_name, 0) == 0; case 0: case EEXIST: @@ -412,7 +638,7 @@ remove_any_file (const char *file_name, enum remove_option option) case RECURSIVE_REMOVE_OPTION: { - char *directory = savedir (file_name); + char *directory = tar_savedir (file_name, 0); char const *entry; size_t entrylen; @@ -472,7 +698,7 @@ maybe_backup_file (const char *file_name, bool this_is_the_archive) if (this_is_the_archive && _remdev (file_name)) return true; - if (stat (file_name, &file_stat)) + if (deref_stat (file_name, &file_stat) != 0) { if (errno == ENOENT) return true; @@ -492,7 +718,8 @@ maybe_backup_file (const char *file_name, bool this_is_the_archive) if (! after_backup_name) xalloc_die (); - if (rename (before_backup_name, after_backup_name) == 0) + if (renameat (chdir_fd, before_backup_name, chdir_fd, after_backup_name) + == 0) { if (verbose_option) fprintf (stdlis, _("Renaming %s to %s\n"), @@ -519,7 +746,8 @@ undo_last_backup (void) { if (after_backup_name) { - if (rename (after_backup_name, before_backup_name) != 0) + if (renameat (chdir_fd, after_backup_name, chdir_fd, before_backup_name) + != 0) { int e = errno; ERROR ((0, e, _("%s: Cannot rename to %s"), @@ -534,39 +762,91 @@ undo_last_backup (void) } } -/* Depending on DEREF, apply either stat or lstat to (NAME, BUF). */ +/* Apply either stat or lstat to (NAME, BUF), depending on the + presence of the --dereference option. NAME is relative to the + most-recent argument to chdir_do. */ int -deref_stat (bool deref, char const *name, struct stat *buf) +deref_stat (char const *name, struct stat *buf) { - return deref ? stat (name, buf) : lstat (name, buf); + return fstatat (chdir_fd, name, buf, fstatat_flags); } -/* Set FD's (i.e., FILE's) access time to TIMESPEC[0]. If that's not - possible to do by itself, set its access and data modification - times to TIMESPEC[0] and TIMESPEC[1], respectively. */ -int -set_file_atime (int fd, char const *file, struct timespec const timespec[2]) +/* Read from FD into the buffer BUF with COUNT bytes. Attempt to fill + BUF. Wait until input is available; this matters because files are + opened O_NONBLOCK for security reasons, and on some file systems + this can cause read to fail with errno == EAGAIN. Return the + actual number of bytes read, zero for EOF, or + SAFE_READ_ERROR upon error. */ +size_t +blocking_read (int fd, void *buf, size_t count) { -#ifdef _FIOSATIME - if (0 <= fd) + size_t bytes = safe_read (fd, buf, count); + +#if defined F_SETFL && O_NONBLOCK + if (bytes == SAFE_READ_ERROR && errno == EAGAIN) { - struct timeval timeval; - timeval.tv_sec = timespec[0].tv_sec; - timeval.tv_usec = timespec[0].tv_nsec / 1000; - if (ioctl (fd, _FIOSATIME, &timeval) == 0) - return 0; + int flags = fcntl (fd, F_GETFL); + if (0 <= flags && flags & O_NONBLOCK + && fcntl (fd, F_SETFL, flags & ~O_NONBLOCK) != -1) + bytes = safe_read (fd, buf, count); } #endif - return gl_futimens (fd, file, timespec); + return bytes; +} + +/* Write to FD from the buffer BUF with COUNT bytes. Do a full write. + Wait until an output buffer is available; this matters because + files are opened O_NONBLOCK for security reasons, and on some file + systems this can cause write to fail with errno == EAGAIN. Return + the actual number of bytes written, setting errno if that is less + than COUNT. */ +size_t +blocking_write (int fd, void const *buf, size_t count) +{ + size_t bytes = full_write (fd, buf, count); + +#if defined F_SETFL && O_NONBLOCK + if (bytes < count && errno == EAGAIN) + { + int flags = fcntl (fd, F_GETFL); + if (0 <= flags && flags & O_NONBLOCK + && fcntl (fd, F_SETFL, flags & ~O_NONBLOCK) != -1) + { + char const *buffer = buf; + bytes += full_write (fd, buffer + bytes, count - bytes); + } + } +#endif + + return bytes; +} + +/* Set FD's (i.e., assuming the working directory is PARENTFD, FILE's) + access time to ATIME. */ +int +set_file_atime (int fd, int parentfd, char const *file, struct timespec atime) +{ + struct timespec ts[2]; + ts[0] = atime; + ts[1].tv_nsec = UTIME_OMIT; + return fdutimensat (fd, parentfd, file, ts, fstatat_flags); } /* A description of a working directory. */ struct wd { + /* The directory's name. */ char const *name; - int saved; - struct saved_cwd saved_cwd; + /* "Absolute" path representing this directory; in the contrast to + the real absolute pathname, it can contain /../ components (see + normalize_filename_x for the reason of it). It is NULL if the + absolute path could not be determined. */ + char *abspath; + /* If nonzero, the file descriptor of the directory, or AT_FDCWD if + the working directory. If zero, the directory needs to be opened + to be used. */ + int fd; }; /* A vector of chdir targets. wd[0] is the initial working directory. */ @@ -578,8 +858,21 @@ static size_t wd_count; /* The allocated size of the vector. */ static size_t wd_alloc; +/* The maximum number of chdir targets with open directories. + Don't make it too large, as many operating systems have a small + limit on the number of open file descriptors. Also, the current + implementation does not scale well. */ +enum { CHDIR_CACHE_SIZE = 16 }; + +/* Indexes into WD of chdir targets with open file descriptors, sorted + most-recently used first. Zero indexes are unused. */ +static int wdcache[CHDIR_CACHE_SIZE]; + +/* Number of nonzero entries in WDCACHE. */ +static size_t wdcache_count; + int -chdir_count () +chdir_count (void) { if (wd_count == 0) return wd_count; @@ -591,20 +884,19 @@ chdir_count () int chdir_arg (char const *dir) { + char *absdir; + if (wd_count == wd_alloc) { if (wd_alloc == 0) - { - wd_alloc = 2; - wd = xmalloc (sizeof *wd * wd_alloc); - } - else - wd = x2nrealloc (wd, &wd_alloc, sizeof *wd); + wd_alloc = 2; + wd = x2nrealloc (wd, &wd_alloc, sizeof *wd); if (! wd_count) { wd[wd_count].name = "."; - wd[wd_count].saved = 0; + wd[wd_count].abspath = xgetcwd (); + wd[wd_count].fd = AT_FDCWD; wd_count++; } } @@ -620,68 +912,121 @@ chdir_arg (char const *dir) return wd_count - 1; } + + /* If the given name is absolute, use it to represent this directory; + otherwise, construct a name based on the previous -C option. */ + if (IS_ABSOLUTE_FILE_NAME (dir)) + absdir = xstrdup (dir); + else if (wd[wd_count - 1].abspath) + { + namebuf_t nbuf = namebuf_create (wd[wd_count - 1].abspath); + namebuf_add_dir (nbuf, dir); + absdir = namebuf_finish (nbuf); + } + else + absdir = 0; + wd[wd_count].name = dir; - wd[wd_count].saved = 0; + wd[wd_count].abspath = absdir; + wd[wd_count].fd = 0; return wd_count++; } -/* Change to directory I. If I is 0, change to the initial working - directory; otherwise, I must be a value returned by chdir_arg. */ +/* Index of current directory. */ +int chdir_current; + +/* Value suitable for use as the first argument to openat, and in + similar locations for fstatat, etc. This is an open file + descriptor, or AT_FDCWD if the working directory is current. It is + valid until the next invocation of chdir_do. */ +int chdir_fd = AT_FDCWD; + +/* Change to directory I, in a virtual way. This does not actually + invoke chdir; it merely sets chdir_fd to an int suitable as the + first argument for openat, etc. If I is 0, change to the initial + working directory; otherwise, I must be a value returned by + chdir_arg. */ void chdir_do (int i) { - static int previous; - - if (previous != i) + if (chdir_current != i) { - struct wd *prev = &wd[previous]; struct wd *curr = &wd[i]; + int fd = curr->fd; - if (! prev->saved) + if (! fd) { - int err = 0; - prev->saved = 1; - if (save_cwd (&prev->saved_cwd) != 0) - err = errno; - else if (0 <= prev->saved_cwd.desc) + if (! IS_ABSOLUTE_FILE_NAME (curr->name)) + chdir_do (i - 1); + fd = openat (chdir_fd, curr->name, + open_searchdir_flags & ~ O_NOFOLLOW); + if (fd < 0) + open_fatal (curr->name); + + curr->fd = fd; + + /* Add I to the cache, tossing out the lowest-ranking entry if the + cache is full. */ + if (wdcache_count < CHDIR_CACHE_SIZE) + wdcache[wdcache_count++] = i; + else { - /* Make sure we still have at least one descriptor available. */ - int fd1 = prev->saved_cwd.desc; - int fd2 = dup (fd1); - if (0 <= fd2) - close (fd2); - else if (errno == EMFILE) - { - /* Force restore_cwd to use chdir_long. */ - close (fd1); - prev->saved_cwd.desc = -1; - prev->saved_cwd.name = xgetcwd (); - } - else - err = errno; + struct wd *stale = &wd[wdcache[CHDIR_CACHE_SIZE - 1]]; + if (close (stale->fd) != 0) + close_diag (stale->name); + stale->fd = 0; + wdcache[CHDIR_CACHE_SIZE - 1] = i; } - - if (err) - FATAL_ERROR ((0, err, _("Cannot save working directory"))); } - if (curr->saved) - { - if (restore_cwd (&curr->saved_cwd)) - FATAL_ERROR ((0, 0, _("Cannot change working directory"))); - } - else + if (0 < fd) { - if (i && ! ISSLASH (curr->name[0])) - chdir_do (i - 1); - if (chdir (curr->name) != 0) - chdir_fatal (curr->name); + /* Move the i value to the front of the cache. This is + O(CHDIR_CACHE_SIZE), but the cache is small. */ + size_t ci; + int prev = wdcache[0]; + for (ci = 1; prev != i; ci++) + { + int cur = wdcache[ci]; + wdcache[ci] = prev; + if (cur == i) + break; + prev = cur; + } + wdcache[0] = i; } - previous = i; + chdir_current = i; + chdir_fd = fd; } } +const char * +tar_dirname (void) +{ + return wd[chdir_current].name; +} + +/* Return the absolute path that represents the working + directory referenced by IDX. + + If wd is empty, then there were no -C options given, and + chdir_args() has never been called, so we simply return the + process's actual cwd. (Note that in this case IDX is ignored, + since it should always be 0.) */ +static const char * +tar_getcdpath (int idx) +{ + if (!wd) + { + static char *cwd; + if (!cwd) + cwd = xgetcwd (); + return cwd; + } + return wd[idx].abspath; +} + void close_diag (char const *name) { @@ -755,21 +1100,6 @@ file_removed_diag (const char *name, bool top_level, (0, 0, _("%s: File removed before we read it"), quotearg_colon (name))); set_exit_status (TAREXIT_DIFFERS); - } - else - diagfn (name); -} - -void -dir_removed_diag (const char *name, bool top_level, - void (*diagfn) (char const *name)) -{ - if (!top_level && errno == ENOENT) - { - WARNOPT (WARN_FILE_REMOVED, - (0, 0, _("%s: Directory removed before we read it"), - quotearg_colon (name))); - set_exit_status (TAREXIT_DIFFERS); } else diagfn (name); @@ -831,7 +1161,7 @@ page_aligned_alloc (void **ptr, size_t size) struct namebuf { - char *buffer; /* directory, `/', and directory member */ + char *buffer; /* directory, '/', and directory member */ size_t buffer_size; /* allocated size of name_buffer */ size_t dir_length; /* length of directory part in buffer */ }; @@ -866,5 +1196,54 @@ namebuf_name (namebuf_t buf, const char *name) return buf->buffer; } +void +namebuf_add_dir (namebuf_t buf, const char *name) +{ + static char dirsep[] = { DIRECTORY_SEPARATOR, 0 }; + if (!ISSLASH (buf->buffer[buf->dir_length - 1])) + { + namebuf_name (buf, dirsep); + buf->dir_length++; + } + namebuf_name (buf, name); + buf->dir_length += strlen (name); +} + +char * +namebuf_finish (namebuf_t buf) +{ + char *res = buf->buffer; + + if (ISSLASH (buf->buffer[buf->dir_length - 1])) + buf->buffer[buf->dir_length] = 0; + free (buf); + return res; +} + +/* Return the filenames in directory NAME, relative to the chdir_fd. + If the directory does not exist, report error if MUST_EXIST is + true. - + Return NULL on errors. +*/ +char * +tar_savedir (const char *name, int must_exist) +{ + char *ret = NULL; + DIR *dir = NULL; + int fd = openat (chdir_fd, name, open_read_flags | O_DIRECTORY); + if (fd < 0) + { + if (!must_exist && errno == ENOENT) + return NULL; + open_error (name); + } + else if (! ((dir = fdopendir (fd)) + && (ret = streamsavedir (dir)))) + savedir_error (name); + + if (dir ? closedir (dir) != 0 : 0 <= fd && close (fd) != 0) + savedir_error (name); + + return ret; +}