]> Dogcows Code - chaz/tar/blob - src/transform.c
(add_char_segment): Fix length assignement
[chaz/tar] / src / transform.c
1 /* This file is part of GNU tar.
2 Copyright (C) 2006 Free Software Foundation, Inc.
3
4 This program is free software; you can redistribute it and/or modify it
5 under the terms of the GNU General Public License as published by the
6 Free Software Foundation; either version 2, or (at your option) any later
7 version.
8
9 This program is distributed in the hope that it will be useful, but
10 WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
12 Public License for more details.
13
14 You should have received a copy of the GNU General Public License along
15 with this program; if not, write to the Free Software Foundation, Inc.,
16 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
17
18 #include <system.h>
19 #include <regex.h>
20 #include "common.h"
21
22 enum transform_type
23 {
24 transform_none,
25 transform_first,
26 transform_global
27 }
28 transform_type = transform_none;
29 static unsigned match_number = 0;
30 static regex_t regex;
31 static struct obstack stk;
32
33 enum replace_segm_type
34 {
35 segm_literal, /* Literal segment */
36 segm_backref, /* Back-reference segment */
37 segm_case_ctl /* Case control segment (GNU extension) */
38 };
39
40 enum case_ctl_type
41 {
42 ctl_stop, /* Stop case conversion */
43 ctl_upcase_next,/* Turn the next character to uppercase */
44 ctl_locase_next,/* Turn the next character to lowercase */
45 ctl_upcase, /* Turn the replacement to uppercase until ctl_stop */
46 ctl_locase /* Turn the replacement to lowercase until ctl_stop */
47 };
48
49 struct replace_segm
50 {
51 struct replace_segm *next;
52 enum replace_segm_type type;
53 union
54 {
55 struct
56 {
57 char *ptr;
58 size_t size;
59 } literal; /* type == segm_literal */
60 size_t ref; /* type == segm_backref */
61 enum case_ctl_type ctl; /* type == segm_case_ctl */
62 } v;
63 };
64
65 /* Compiled replacement expression */
66 static struct replace_segm *repl_head, *repl_tail;
67 static segm_count; /* Number of elements in the above list */
68
69 static struct replace_segm *
70 add_segment (void)
71 {
72 struct replace_segm *segm = xmalloc (sizeof *segm);
73 segm->next = NULL;
74 if (repl_tail)
75 repl_tail->next = segm;
76 else
77 repl_head = segm;
78 repl_tail = segm;
79 segm_count++;
80 return segm;
81 }
82
83 static void
84 add_literal_segment (char *str, char *end)
85 {
86 size_t len = end - str;
87 if (len)
88 {
89 struct replace_segm *segm = add_segment ();
90 segm->type = segm_literal;
91 segm->v.literal.ptr = xmalloc (len + 1);
92 memcpy (segm->v.literal.ptr, str, len);
93 segm->v.literal.ptr[len] = 0;
94 segm->v.literal.size = len;
95 }
96 }
97
98 static void
99 add_char_segment (int chr)
100 {
101 struct replace_segm *segm = add_segment ();
102 segm->type = segm_literal;
103 segm->v.literal.ptr = xmalloc (2);
104 segm->v.literal.ptr[0] = chr;
105 segm->v.literal.ptr[1] = 0;
106 segm->v.literal.size = 1;
107 }
108
109 static void
110 add_backref_segment (size_t ref)
111 {
112 struct replace_segm *segm = add_segment ();
113 segm->type = segm_backref;
114 segm->v.ref = ref;
115 }
116
117 static void
118 add_case_ctl_segment (enum case_ctl_type ctl)
119 {
120 struct replace_segm *segm = add_segment ();
121 segm->type = segm_case_ctl;
122 segm->v.ctl = ctl;
123 }
124
125 void
126 set_transform_expr (const char *expr)
127 {
128 int delim;
129 int i, j, rc;
130 char *str, *beg, *cur;
131 const char *p;
132 int cflags = 0;
133
134 if (transform_type == transform_none)
135 obstack_init (&stk);
136 else
137 {
138 /* Redefinition of the transform expression */
139 regfree (&regex);
140 }
141
142 if (expr[0] != 's')
143 USAGE_ERROR ((0, 0, _("Invalid transform expression")));
144
145 delim = expr[1];
146
147 /* Scan regular expression */
148 for (i = 2; expr[i] && expr[i] != delim; i++)
149 if (expr[i] == '\\' && expr[i+1])
150 i++;
151
152 if (expr[i] != delim)
153 USAGE_ERROR ((0, 0, _("Invalid transform expression")));
154
155 /* Scan replacement expression */
156 for (j = i + 1; expr[j] && expr[j] != delim; j++)
157 if (expr[j] == '\\' && expr[j+1])
158 j++;
159
160 if (expr[j] != delim)
161 USAGE_ERROR ((0, 0, _("Invalid transform expression")));
162
163 /* Check flags */
164 transform_type = transform_first;
165 for (p = expr + j + 1; *p; p++)
166 switch (*p)
167 {
168 case 'g':
169 transform_type = transform_global;
170 break;
171
172 case 'i':
173 cflags |= REG_ICASE;
174 break;
175
176 case 'x':
177 cflags |= REG_EXTENDED;
178 break;
179
180 case '0': case '1': case '2': case '3': case '4':
181 case '5': case '6': case '7': case '8': case '9':
182 match_number = strtoul (p, (char**) &p, 0);
183 p--;
184 break;
185
186 default:
187 USAGE_ERROR ((0, 0, _("Unknown flag in transform expression")));
188 }
189
190 /* Extract and compile regex */
191 str = xmalloc (i - 1);
192 memcpy (str, expr + 2, i - 2);
193 str[i - 2] = 0;
194
195 rc = regcomp (&regex, str, cflags);
196
197 if (rc)
198 {
199 char errbuf[512];
200 regerror (rc, &regex, errbuf, sizeof (errbuf));
201 USAGE_ERROR ((0, 0, _("Invalid transform expression: %s"), errbuf));
202 }
203
204 if (str[0] == '^' || str[strlen (str) - 1] == '$')
205 transform_type = transform_first;
206
207 free (str);
208
209 /* Extract and compile replacement expr */
210 i++;
211 str = xmalloc (j - i + 1);
212 memcpy (str, expr + i, j - i);
213 str[j - i] = 0;
214
215 for (cur = beg = str; *cur;)
216 {
217 if (*cur == '\\')
218 {
219 size_t n;
220
221 add_literal_segment (beg, cur);
222 switch (*++cur)
223 {
224 case '0': case '1': case '2': case '3': case '4':
225 case '5': case '6': case '7': case '8': case '9':
226 n = strtoul (cur, &cur, 10);
227 if (n > regex.re_nsub)
228 USAGE_ERROR ((0, 0, _("Invalid transform replacement: back reference out of range")));
229 add_backref_segment (n);
230 break;
231
232 case '\\':
233 add_char_segment ('\\');
234 cur++;
235 break;
236
237 case 'a':
238 add_char_segment ('\a');
239 cur++;
240 break;
241
242 case 'b':
243 add_char_segment ('\b');
244 cur++;
245 break;
246
247 case 'f':
248 add_char_segment ('\f');
249 cur++;
250 break;
251
252 case 'n':
253 add_char_segment ('\n');
254 cur++;
255 break;
256
257 case 'r':
258 add_char_segment ('\r');
259 cur++;
260 break;
261
262 case 't':
263 add_char_segment ('\t');
264 cur++;
265 break;
266
267 case 'v':
268 add_char_segment ('\v');
269 cur++;
270 break;
271
272 case '&':
273 add_char_segment ('&');
274 cur++;
275 break;
276
277 case 'L':
278 /* Turn the replacement to lowercase until a `\U' or `\E'
279 is found, */
280 add_case_ctl_segment (ctl_locase);
281 cur++;
282 break;
283
284 case 'l':
285 /* Turn the next character to lowercase, */
286 add_case_ctl_segment (ctl_locase_next);
287 cur++;
288 break;
289
290 case 'U':
291 /* Turn the replacement to uppercase until a `\L' or `\E'
292 is found, */
293 add_case_ctl_segment (ctl_upcase);
294 cur++;
295 break;
296
297 case 'u':
298 /* Turn the next character to uppercase, */
299 add_case_ctl_segment (ctl_upcase_next);
300 cur++;
301 break;
302
303 case 'E':
304 /* Stop case conversion started by `\L' or `\U'. */
305 add_case_ctl_segment (ctl_stop);
306 cur++;
307 break;
308
309 default:
310 /* Try to be nice */
311 {
312 char buf[2];
313 buf[0] = '\\';
314 buf[1] = *cur;
315 add_literal_segment (buf, buf + 2);
316 }
317 cur++;
318 break;
319 }
320 beg = cur;
321 }
322 else if (*cur == '&')
323 {
324 add_literal_segment (beg, cur);
325 add_backref_segment (0);
326 beg = ++cur;
327 }
328 else
329 cur++;
330 }
331 add_literal_segment (beg, cur);
332
333 }
334
335 /* Run case conversion specified by CASE_CTL on array PTR of SIZE
336 characters. Returns pointer to statically allocated storage. */
337 static char *
338 run_case_conv (enum case_ctl_type case_ctl, char *ptr, size_t size)
339 {
340 static char *case_ctl_buffer;
341 static size_t case_ctl_bufsize;
342 char *p;
343
344 if (case_ctl_bufsize < size)
345 {
346 case_ctl_bufsize = size;
347 case_ctl_buffer = xrealloc (case_ctl_buffer, case_ctl_bufsize);
348 }
349 memcpy (case_ctl_buffer, ptr, size);
350 switch (case_ctl)
351 {
352 case ctl_upcase_next:
353 case_ctl_buffer[0] = toupper (case_ctl_buffer[0]);
354 break;
355
356 case ctl_locase_next:
357 case_ctl_buffer[0] = tolower (case_ctl_buffer[0]);
358 break;
359
360 case ctl_upcase:
361 for (p = case_ctl_buffer; p < case_ctl_buffer + size; p++)
362 *p = toupper (*p);
363 break;
364
365 case ctl_locase:
366 for (p = case_ctl_buffer; p < case_ctl_buffer + size; p++)
367 *p = tolower (*p);
368 break;
369
370 case ctl_stop:
371 break;
372 }
373 return case_ctl_buffer;
374 }
375
376 bool
377 _transform_name_to_obstack (char *input)
378 {
379 regmatch_t *rmp;
380 char *p;
381 int rc;
382 size_t nmatches = 0;
383 enum case_ctl_type case_ctl = ctl_stop, /* Current case conversion op */
384 save_ctl = ctl_stop; /* Saved case_ctl for \u and \l */
385
386 /* Reset case conversion after a single-char operation */
387 #define CASE_CTL_RESET() if (case_ctl == ctl_upcase_next \
388 || case_ctl == ctl_locase_next) \
389 { \
390 case_ctl = save_ctl; \
391 save_ctl = ctl_stop; \
392 }
393
394 if (transform_type == transform_none)
395 return false;
396
397 rmp = xmalloc ((regex.re_nsub + 1) * sizeof (*rmp));
398
399 while (*input)
400 {
401 size_t disp;
402 char *ptr;
403
404 rc = regexec (&regex, input, regex.re_nsub + 1, rmp, 0);
405
406 if (rc == 0)
407 {
408 struct replace_segm *segm;
409
410 disp = rmp[0].rm_eo;
411
412 if (rmp[0].rm_so)
413 obstack_grow (&stk, input, rmp[0].rm_so);
414
415 nmatches++;
416 if (match_number && nmatches < match_number)
417 {
418 obstack_grow (&stk, input, disp);
419 input += disp;
420 continue;
421 }
422
423 for (segm = repl_head; segm; segm = segm->next)
424 {
425 switch (segm->type)
426 {
427 case segm_literal: /* Literal segment */
428 if (case_ctl == ctl_stop)
429 ptr = segm->v.literal.ptr;
430 else
431 {
432 ptr = run_case_conv (case_ctl,
433 segm->v.literal.ptr,
434 segm->v.literal.size);
435 CASE_CTL_RESET();
436 }
437 obstack_grow (&stk, ptr, segm->v.literal.size);
438 break;
439
440 case segm_backref: /* Back-reference segment */
441 if (rmp[segm->v.ref].rm_so != -1
442 && rmp[segm->v.ref].rm_eo != -1)
443 {
444 size_t size = rmp[segm->v.ref].rm_eo
445 - rmp[segm->v.ref].rm_so;
446 ptr = input + rmp[segm->v.ref].rm_so;
447 if (case_ctl != ctl_stop)
448 {
449 ptr = run_case_conv (case_ctl, ptr, size);
450 CASE_CTL_RESET();
451 }
452
453 obstack_grow (&stk, ptr, size);
454 }
455 break;
456
457 case segm_case_ctl:
458 switch (segm->v.ctl)
459 {
460 case ctl_upcase_next:
461 case ctl_locase_next:
462 switch (save_ctl)
463 {
464 case ctl_stop:
465 case ctl_upcase:
466 case ctl_locase:
467 save_ctl = case_ctl;
468 default:
469 break;
470 }
471 /*FALL THROUGH*/
472
473 case ctl_upcase:
474 case ctl_locase:
475 case ctl_stop:
476 case_ctl = segm->v.ctl;
477 }
478 }
479 }
480 }
481 else
482 {
483 disp = strlen (input);
484 obstack_grow (&stk, input, disp);
485 }
486
487 input += disp;
488
489 if (transform_type == transform_first)
490 {
491 obstack_grow (&stk, input, strlen (input));
492 break;
493 }
494 }
495
496 obstack_1grow (&stk, 0);
497 free (rmp);
498 return true;
499 }
500
501 bool
502 transform_name_fp (char **pinput, char *(*fun)(char *))
503 {
504 char *str, *p;
505 bool ret = _transform_name_to_obstack (*pinput);
506 if (ret)
507 {
508 str = obstack_finish (&stk);
509 assign_string (pinput, fun ? fun (str) : str);
510 obstack_free (&stk, str);
511 }
512 else if (fun)
513 {
514 str = *pinput;
515 *pinput = NULL;
516 assign_string (pinput, fun (str));
517 free (str);
518 ret = true;
519 }
520 return ret;
521 }
522
523 bool
524 transform_name (char **pinput)
525 {
526 return transform_name_fp (pinput, NULL);
527 }
528
529 #if 0
530 void
531 read_and_transform_loop ()
532 {
533 char buf[512];
534 while (fgets (buf, sizeof buf, stdin))
535 {
536 char *p = buf + strlen (buf);
537 if (p[-1] == '\n')
538 p[-1] = 0;
539 if (transform_name (buf, &p))
540 printf ("=> %s\n", p);
541 else
542 printf ("=\n");
543 }
544 }
545 #endif
This page took 0.060562 seconds and 5 git commands to generate.