X-Git-Url: https://git.dogcows.com/gitweb?p=chaz%2Ftar;a=blobdiff_plain;f=src%2Ftransform.c;h=cd9e27ccf4f6fe624d9f401a58ffd4304bc9794b;hp=c5eafc347e8260352fd3ed1274a5b9cc687a0425;hb=45ccda119355a1087450039a250359c1d0de0d08;hpb=c8aa01c80c9154b1153a4662a66ac8a5f43d7e30 diff --git a/src/transform.c b/src/transform.c index c5eafc3..cd9e27c 100644 --- a/src/transform.c +++ b/src/transform.c @@ -1,9 +1,9 @@ -/* This file is part of GNU tar. - Copyright (C) 2006 Free Software Foundation, Inc. +/* This file is part of GNU tar. + Copyright 2006-2008, 2013-2014 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the - Free Software Foundation; either version 2, or (at your option) any later + Free Software Foundation; either version 3, or (at your option) any later version. This program is distributed in the hope that it will be useful, but @@ -12,8 +12,7 @@ Public License for more details. You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + with this program. If not, see . */ #include #include @@ -21,18 +20,24 @@ enum transform_type { - transform_none, transform_first, transform_global - } -transform_type = transform_none; -static regex_t regex; -static struct obstack stk; + }; enum replace_segm_type { segm_literal, /* Literal segment */ segm_backref, /* Back-reference segment */ + segm_case_ctl /* Case control segment (GNU extension) */ + }; + +enum case_ctl_type + { + ctl_stop, /* Stop case conversion */ + ctl_upcase_next,/* Turn the next character to uppercase */ + ctl_locase_next,/* Turn the next character to lowercase */ + ctl_upcase, /* Turn the replacement to uppercase until ctl_stop */ + ctl_locase /* Turn the replacement to lowercase until ctl_stop */ }; struct replace_segm @@ -45,35 +50,62 @@ struct replace_segm { char *ptr; size_t size; - } literal; - size_t ref; + } literal; /* type == segm_literal */ + size_t ref; /* type == segm_backref */ + enum case_ctl_type ctl; /* type == segm_case_ctl */ } v; }; -static struct replace_segm *repl_head, *repl_tail; -static segm_count; +struct transform +{ + struct transform *next; + enum transform_type transform_type; + int flags; + unsigned match_number; + regex_t regex; + /* Compiled replacement expression */ + struct replace_segm *repl_head, *repl_tail; + size_t segm_count; /* Number of elements in the above list */ +}; + + + +static int transform_flags = XFORM_ALL; +static struct transform *transform_head, *transform_tail; + +static struct transform * +new_transform (void) +{ + struct transform *p = xzalloc (sizeof *p); + if (transform_tail) + transform_tail->next = p; + else + transform_head = p; + transform_tail = p; + return p; +} static struct replace_segm * -add_segment (void) +add_segment (struct transform *tf) { struct replace_segm *segm = xmalloc (sizeof *segm); segm->next = NULL; - if (repl_tail) - repl_tail->next = segm; + if (tf->repl_tail) + tf->repl_tail->next = segm; else - repl_head = segm; - repl_tail = segm; - segm_count++; + tf->repl_head = segm; + tf->repl_tail = segm; + tf->segm_count++; return segm; } static void -add_literal_segment (char *str, char *end) +add_literal_segment (struct transform *tf, char *str, char *end) { size_t len = end - str; if (len) { - struct replace_segm *segm = add_segment (); + struct replace_segm *segm = add_segment (tf); segm->type = segm_literal; segm->v.literal.ptr = xmalloc (len + 1); memcpy (segm->v.literal.ptr, str, len); @@ -83,44 +115,98 @@ add_literal_segment (char *str, char *end) } static void -add_char_segment (int chr) +add_char_segment (struct transform *tf, int chr) { - struct replace_segm *segm = add_segment (); + struct replace_segm *segm = add_segment (tf); segm->type = segm_literal; segm->v.literal.ptr = xmalloc (2); segm->v.literal.ptr[0] = chr; segm->v.literal.ptr[1] = 0; - segm->v.literal.size = 2; + segm->v.literal.size = 1; } static void -add_backref_segment (size_t ref) +add_backref_segment (struct transform *tf, size_t ref) { - struct replace_segm *segm = add_segment (); + struct replace_segm *segm = add_segment (tf); segm->type = segm_backref; segm->v.ref = ref; } -void -set_transform_expr (const char *expr) +static int +parse_xform_flags (int *pflags, int c) +{ + switch (c) + { + case 'r': + *pflags |= XFORM_REGFILE; + break; + + case 'R': + *pflags &= ~XFORM_REGFILE; + break; + + case 'h': + *pflags |= XFORM_LINK; + break; + + case 'H': + *pflags &= ~XFORM_LINK; + break; + + case 's': + *pflags |= XFORM_SYMLINK; + break; + + case 'S': + *pflags &= ~XFORM_SYMLINK; + break; + + default: + return 1; + } + return 0; +} + +static void +add_case_ctl_segment (struct transform *tf, enum case_ctl_type ctl) +{ + struct replace_segm *segm = add_segment (tf); + segm->type = segm_case_ctl; + segm->v.ctl = ctl; +} + +static const char * +parse_transform_expr (const char *expr) { int delim; int i, j, rc; char *str, *beg, *cur; const char *p; int cflags = 0; + struct transform *tf = new_transform (); - if (transform_type == transform_none) - obstack_init (&stk); - else + if (expr[0] != 's') { - /* Redefinition of the transform expression */ - regfree (®ex); + if (strncmp (expr, "flags=", 6) == 0) + { + transform_flags = 0; + for (expr += 6; *expr; expr++) + { + if (*expr == ';') + { + expr++; + break; + } + if (parse_xform_flags (&transform_flags, *expr)) + USAGE_ERROR ((0, 0, _("Unknown transform flag: %c"), + *expr)); + } + return expr; + } + USAGE_ERROR ((0, 0, _("Invalid transform expression"))); } - if (expr[0] != 's') - USAGE_ERROR ((0, 0, _("Invalid transform expression"))); - delim = expr[1]; /* Scan regular expression */ @@ -140,12 +226,13 @@ set_transform_expr (const char *expr) USAGE_ERROR ((0, 0, _("Invalid transform expression"))); /* Check flags */ - transform_type = transform_first; - for (p = expr + j + 1; *p; p++) + tf->transform_type = transform_first; + tf->flags = transform_flags; + for (p = expr + j + 1; *p && *p != ';'; p++) switch (*p) { case 'g': - transform_type = transform_global; + tf->transform_type = transform_global; break; case 'i': @@ -155,28 +242,39 @@ set_transform_expr (const char *expr) case 'x': cflags |= REG_EXTENDED; break; - + + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + tf->match_number = strtoul (p, (char**) &p, 0); + p--; + break; + default: - USAGE_ERROR ((0, 0, _("Unknown flag in transform expression"))); + if (parse_xform_flags (&tf->flags, *p)) + USAGE_ERROR ((0, 0, _("Unknown flag in transform expression: %c"), + *p)); } + if (*p == ';') + p++; + /* Extract and compile regex */ str = xmalloc (i - 1); memcpy (str, expr + 2, i - 2); str[i - 2] = 0; - rc = regcomp (®ex, str, cflags); - + rc = regcomp (&tf->regex, str, cflags); + if (rc) { char errbuf[512]; - regerror (rc, ®ex, errbuf, sizeof (errbuf)); + regerror (rc, &tf->regex, errbuf, sizeof (errbuf)); USAGE_ERROR ((0, 0, _("Invalid transform expression: %s"), errbuf)); } if (str[0] == '^' || str[strlen (str) - 1] == '$') - transform_type = transform_first; - + tf->transform_type = transform_first; + free (str); /* Extract and compile replacement expr */ @@ -190,60 +288,92 @@ set_transform_expr (const char *expr) if (*cur == '\\') { size_t n; - - add_literal_segment (beg, cur); + + add_literal_segment (tf, beg, cur); switch (*++cur) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': n = strtoul (cur, &cur, 10); - if (n > regex.re_nsub) + if (n > tf->regex.re_nsub) USAGE_ERROR ((0, 0, _("Invalid transform replacement: back reference out of range"))); - add_backref_segment (n); + add_backref_segment (tf, n); break; case '\\': - add_char_segment ('\\'); + add_char_segment (tf, '\\'); cur++; break; case 'a': - add_char_segment ('\a'); + add_char_segment (tf, '\a'); cur++; break; - + case 'b': - add_char_segment ('\b'); + add_char_segment (tf, '\b'); cur++; break; - + case 'f': - add_char_segment ('\f'); + add_char_segment (tf, '\f'); cur++; break; - + case 'n': - add_char_segment ('\n'); + add_char_segment (tf, '\n'); cur++; break; - + case 'r': - add_char_segment ('\r'); + add_char_segment (tf, '\r'); cur++; break; - + case 't': - add_char_segment ('\t'); + add_char_segment (tf, '\t'); cur++; break; - + case 'v': - add_char_segment ('\v'); + add_char_segment (tf, '\v'); cur++; break; case '&': - add_char_segment ('&'); + add_char_segment (tf, '&'); + cur++; + break; + + case 'L': + /* Turn the replacement to lowercase until a '\U' or '\E' + is found, */ + add_case_ctl_segment (tf, ctl_locase); + cur++; + break; + + case 'l': + /* Turn the next character to lowercase, */ + add_case_ctl_segment (tf, ctl_locase_next); + cur++; + break; + + case 'U': + /* Turn the replacement to uppercase until a '\L' or '\E' + is found, */ + add_case_ctl_segment (tf, ctl_upcase); + cur++; + break; + + case 'u': + /* Turn the next character to uppercase, */ + add_case_ctl_segment (tf, ctl_upcase_next); + cur++; + break; + + case 'E': + /* Stop case conversion started by '\L' or '\U'. */ + add_case_ctl_segment (tf, ctl_stop); cur++; break; @@ -253,7 +383,7 @@ set_transform_expr (const char *expr) char buf[2]; buf[0] = '\\'; buf[1] = *cur; - add_literal_segment (buf, buf + 2); + add_literal_segment (tf, buf, buf + 2); } cur++; break; @@ -262,35 +392,96 @@ set_transform_expr (const char *expr) } else if (*cur == '&') { - add_literal_segment (beg, cur); - add_backref_segment (0); + add_literal_segment (tf, beg, cur); + add_backref_segment (tf, 0); beg = ++cur; } else cur++; } - add_literal_segment (beg, cur); - + add_literal_segment (tf, beg, cur); + + return p; } -bool -_transform_name_to_obstack (char *input) +void +set_transform_expr (const char *expr) { - regmatch_t *rmp; + while (*expr) + expr = parse_transform_expr (expr); +} + +/* Run case conversion specified by CASE_CTL on array PTR of SIZE + characters. Returns pointer to statically allocated storage. */ +static char * +run_case_conv (enum case_ctl_type case_ctl, char *ptr, size_t size) +{ + static char *case_ctl_buffer; + static size_t case_ctl_bufsize; char *p; + + if (case_ctl_bufsize < size) + { + case_ctl_bufsize = size; + case_ctl_buffer = xrealloc (case_ctl_buffer, case_ctl_bufsize); + } + memcpy (case_ctl_buffer, ptr, size); + switch (case_ctl) + { + case ctl_upcase_next: + case_ctl_buffer[0] = toupper ((unsigned char) case_ctl_buffer[0]); + break; + + case ctl_locase_next: + case_ctl_buffer[0] = tolower ((unsigned char) case_ctl_buffer[0]); + break; + + case ctl_upcase: + for (p = case_ctl_buffer; p < case_ctl_buffer + size; p++) + *p = toupper ((unsigned char) *p); + break; + + case ctl_locase: + for (p = case_ctl_buffer; p < case_ctl_buffer + size; p++) + *p = tolower ((unsigned char) *p); + break; + + case ctl_stop: + break; + } + return case_ctl_buffer; +} + + +static struct obstack stk; +static bool stk_init; + +static void +_single_transform_name_to_obstack (struct transform *tf, char *input) +{ + regmatch_t *rmp; int rc; - - if (transform_type == transform_none) - return false; + size_t nmatches = 0; + enum case_ctl_type case_ctl = ctl_stop, /* Current case conversion op */ + save_ctl = ctl_stop; /* Saved case_ctl for \u and \l */ - rmp = xmalloc ((regex.re_nsub + 1) * sizeof (*rmp)); + /* Reset case conversion after a single-char operation */ +#define CASE_CTL_RESET() if (case_ctl == ctl_upcase_next \ + || case_ctl == ctl_locase_next) \ + { \ + case_ctl = save_ctl; \ + save_ctl = ctl_stop; \ + } + + rmp = xmalloc ((tf->regex.re_nsub + 1) * sizeof (*rmp)); while (*input) { size_t disp; - - rc = regexec (®ex, input, regex.re_nsub + 1, rmp, 0); - + char *ptr; + + rc = regexec (&tf->regex, input, tf->regex.re_nsub + 1, rmp, 0); + if (rc == 0) { struct replace_segm *segm; @@ -299,23 +490,70 @@ _transform_name_to_obstack (char *input) if (rmp[0].rm_so) obstack_grow (&stk, input, rmp[0].rm_so); - - for (segm = repl_head; segm; segm = segm->next) + + nmatches++; + if (tf->match_number && nmatches < tf->match_number) + { + obstack_grow (&stk, input, disp); + input += disp; + continue; + } + + for (segm = tf->repl_head; segm; segm = segm->next) { switch (segm->type) { case segm_literal: /* Literal segment */ - obstack_grow (&stk, segm->v.literal.ptr, - segm->v.literal.size); + if (case_ctl == ctl_stop) + ptr = segm->v.literal.ptr; + else + { + ptr = run_case_conv (case_ctl, + segm->v.literal.ptr, + segm->v.literal.size); + CASE_CTL_RESET(); + } + obstack_grow (&stk, ptr, segm->v.literal.size); break; - + case segm_backref: /* Back-reference segment */ if (rmp[segm->v.ref].rm_so != -1 && rmp[segm->v.ref].rm_eo != -1) - obstack_grow (&stk, - input + rmp[segm->v.ref].rm_so, - rmp[segm->v.ref].rm_eo - rmp[segm->v.ref].rm_so); + { + size_t size = rmp[segm->v.ref].rm_eo + - rmp[segm->v.ref].rm_so; + ptr = input + rmp[segm->v.ref].rm_so; + if (case_ctl != ctl_stop) + { + ptr = run_case_conv (case_ctl, ptr, size); + CASE_CTL_RESET(); + } + + obstack_grow (&stk, ptr, size); + } break; + + case segm_case_ctl: + switch (segm->v.ctl) + { + case ctl_upcase_next: + case ctl_locase_next: + switch (save_ctl) + { + case ctl_stop: + case ctl_upcase: + case ctl_locase: + save_ctl = case_ctl; + default: + break; + } + /*FALL THROUGH*/ + + case ctl_upcase: + case ctl_locase: + case ctl_stop: + case_ctl = segm->v.ctl; + } } } } @@ -327,7 +565,7 @@ _transform_name_to_obstack (char *input) input += disp; - if (transform_type == transform_first) + if (tf->transform_type == transform_first) { obstack_grow (&stk, input, strlen (input)); break; @@ -336,43 +574,62 @@ _transform_name_to_obstack (char *input) obstack_1grow (&stk, 0); free (rmp); - return true; } - + +static bool +_transform_name_to_obstack (int flags, char *input, char **output) +{ + struct transform *tf; + bool alloced = false; + + if (!stk_init) + { + obstack_init (&stk); + stk_init = true; + } + + for (tf = transform_head; tf; tf = tf->next) + { + if (tf->flags & flags) + { + _single_transform_name_to_obstack (tf, input); + input = obstack_finish (&stk); + alloced = true; + } + } + *output = input; + return alloced; +} + bool -transform_name_fp (char **pinput, char *(*fun)(char *)) +transform_name_fp (char **pinput, int flags, + char *(*fun)(char *, void *), void *dat) { - char *str, *p; - bool ret = _transform_name_to_obstack (*pinput); + char *str; + bool ret = _transform_name_to_obstack (flags, *pinput, &str); if (ret) { - str = obstack_finish (&stk); - assign_string (pinput, fun ? fun (str) : str); + assign_string (pinput, fun ? fun (str, dat) : str); obstack_free (&stk, str); } + else if (fun) + { + *pinput = NULL; + assign_string (pinput, fun (str, dat)); + free (str); + ret = true; + } return ret; } bool -transform_name (char **pinput) +transform_name (char **pinput, int type) { - return transform_name_fp (pinput, NULL); + return transform_name_fp (pinput, type, NULL, NULL); } -#if 0 -void -read_and_transform_loop () +bool +transform_program_p (void) { - char buf[512]; - while (fgets (buf, sizeof buf, stdin)) - { - char *p = buf + strlen (buf); - if (p[-1] == '\n') - p[-1] = 0; - if (transform_name (buf, &p)) - printf ("=> %s\n", p); - else - printf ("=\n"); - } + return transform_head != NULL; } -#endif