]> Dogcows Code - chaz/tar/blob - lib/wordsplit.c
Pass command line arguments to external commands.
[chaz/tar] / lib / wordsplit.c
1 /* wordsplit - a word splitter
2 Copyright (C) 2009-2013 Free Software Foundation, Inc.
3
4 This program is free software; you can redistribute it and/or modify it
5 under the terms of the GNU General Public License as published by the
6 Free Software Foundation; either version 3 of the License, or (at your
7 option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License along
15 with this program. If not, see <http://www.gnu.org/licenses/>.
16
17 Written by Sergey Poznyakoff
18 */
19
20 #ifdef HAVE_CONFIG_H
21 # include <config.h>
22 #endif
23
24 #include <errno.h>
25 #include <ctype.h>
26 #include <unistd.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <stdio.h>
30 #include <stdarg.h>
31
32 #if ENABLE_NLS
33 # include <gettext.h>
34 #else
35 # define gettext(msgid) msgid
36 #endif
37 #define _(msgid) gettext (msgid)
38 #define N_(msgid) msgid
39
40 #include <wordsplit.h>
41
42 #define ISWS(c) ((c)==' '||(c)=='\t'||(c)=='\n')
43 #define ISDELIM(ws,c) \
44 (strchr ((ws)->ws_delim, (c)) != NULL)
45 #define ISPUNCT(c) (strchr("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",(c))!=NULL)
46 #define ISUPPER(c) ('A' <= ((unsigned) (c)) && ((unsigned) (c)) <= 'Z')
47 #define ISLOWER(c) ('a' <= ((unsigned) (c)) && ((unsigned) (c)) <= 'z')
48 #define ISALPHA(c) (ISUPPER(c) || ISLOWER(c))
49 #define ISDIGIT(c) ('0' <= ((unsigned) (c)) && ((unsigned) (c)) <= '9')
50 #define ISXDIGIT(c) (strchr("abcdefABCDEF", c)!=NULL)
51 #define ISALNUM(c) (ISALPHA(c) || ISDIGIT(c))
52 #define ISPRINT(c) (' ' <= ((unsigned) (c)) && ((unsigned) (c)) <= 127)
53
54 #define ALLOC_INIT 128
55 #define ALLOC_INCR 128
56
57 static void
58 _wsplt_alloc_die (struct wordsplit *wsp)
59 {
60 wsp->ws_error (_("memory exhausted"));
61 abort ();
62 }
63
64 static void
65 _wsplt_error (const char *fmt, ...)
66 {
67 va_list ap;
68
69 va_start (ap, fmt);
70 vfprintf (stderr, fmt, ap);
71 va_end (ap);
72 fputc ('\n', stderr);
73 }
74
75 static void wordsplit_free_nodes (struct wordsplit *);
76
77 static int
78 _wsplt_nomem (struct wordsplit *wsp)
79 {
80 errno = ENOMEM;
81 wsp->ws_errno = WRDSE_NOSPACE;
82 if (wsp->ws_flags & WRDSF_ENOMEMABRT)
83 wsp->ws_alloc_die (wsp);
84 if (wsp->ws_flags & WRDSF_SHOWERR)
85 wordsplit_perror (wsp);
86 if (!(wsp->ws_flags & WRDSF_REUSE))
87 wordsplit_free (wsp);
88 wordsplit_free_nodes (wsp);
89 return wsp->ws_errno;
90 }
91
92 static void
93 wordsplit_init0 (struct wordsplit *wsp)
94 {
95 if (wsp->ws_flags & WRDSF_REUSE)
96 {
97 if (!(wsp->ws_flags & WRDSF_APPEND))
98 wordsplit_free_words (wsp);
99 }
100 else
101 {
102 wsp->ws_wordv = NULL;
103 wsp->ws_wordc = 0;
104 wsp->ws_wordn = 0;
105 }
106
107 wsp->ws_errno = 0;
108 wsp->ws_head = wsp->ws_tail = NULL;
109 }
110
111 static int
112 wordsplit_init (struct wordsplit *wsp, const char *input, size_t len,
113 int flags)
114 {
115 wsp->ws_flags = flags;
116
117 if (!(wsp->ws_flags & WRDSF_ALLOC_DIE))
118 wsp->ws_alloc_die = _wsplt_alloc_die;
119 if (!(wsp->ws_flags & WRDSF_ERROR))
120 wsp->ws_error = _wsplt_error;
121
122 if (!(wsp->ws_flags & WRDSF_NOVAR)
123 && !(wsp->ws_flags & (WRDSF_ENV | WRDSF_GETVAR)))
124 {
125 errno = EINVAL;
126 wsp->ws_errno = WRDSE_USAGE;
127 if (wsp->ws_flags & WRDSF_SHOWERR)
128 wordsplit_perror (wsp);
129 return wsp->ws_errno;
130 }
131
132 if (!(wsp->ws_flags & WRDSF_NOCMD))
133 {
134 errno = EINVAL;
135 wsp->ws_errno = WRDSE_NOSUPP;
136 if (wsp->ws_flags & WRDSF_SHOWERR)
137 wordsplit_perror (wsp);
138 return wsp->ws_errno;
139 }
140
141 if (wsp->ws_flags & WRDSF_SHOWDBG)
142 {
143 if (!(wsp->ws_flags & WRDSF_DEBUG))
144 {
145 if (wsp->ws_flags & WRDSF_ERROR)
146 wsp->ws_debug = wsp->ws_error;
147 else if (wsp->ws_flags & WRDSF_SHOWERR)
148 wsp->ws_debug = _wsplt_error;
149 else
150 wsp->ws_flags &= ~WRDSF_SHOWDBG;
151 }
152 }
153
154 wsp->ws_input = input;
155 wsp->ws_len = len;
156
157 if (!(wsp->ws_flags & WRDSF_DOOFFS))
158 wsp->ws_offs = 0;
159
160 if (!(wsp->ws_flags & WRDSF_DELIM))
161 wsp->ws_delim = " \t\n";
162
163 if (!(wsp->ws_flags & WRDSF_COMMENT))
164 wsp->ws_comment = NULL;
165
166 if (!(wsp->ws_flags & WRDSF_CLOSURE))
167 wsp->ws_closure = NULL;
168
169 wsp->ws_endp = 0;
170
171 wordsplit_init0 (wsp);
172
173 return 0;
174 }
175
176 static int
177 alloc_space (struct wordsplit *wsp, size_t count)
178 {
179 size_t offs = (wsp->ws_flags & WRDSF_DOOFFS) ? wsp->ws_offs : 0;
180 char **ptr;
181 size_t newalloc;
182
183 if (wsp->ws_wordv == NULL)
184 {
185 newalloc = offs + count > ALLOC_INIT ? count : ALLOC_INIT;
186 ptr = calloc (newalloc, sizeof (ptr[0]));
187 }
188 else if (wsp->ws_wordn < offs + wsp->ws_wordc + count)
189 {
190 newalloc = offs + wsp->ws_wordc +
191 (count > ALLOC_INCR ? count : ALLOC_INCR);
192 ptr = realloc (wsp->ws_wordv, newalloc * sizeof (ptr[0]));
193 }
194 else
195 return 0;
196
197 if (ptr)
198 {
199 wsp->ws_wordn = newalloc;
200 wsp->ws_wordv = ptr;
201 }
202 else
203 return _wsplt_nomem (wsp);
204 return 0;
205 }
206 \f
207
208 /* Node state flags */
209 #define _WSNF_NULL 0x01 /* null node (a noop) */
210 #define _WSNF_WORD 0x02 /* node contains word in v.word */
211 #define _WSNF_QUOTE 0x04 /* text is quoted */
212 #define _WSNF_NOEXPAND 0x08 /* text is not subject to expansion */
213 #define _WSNF_JOIN 0x10 /* node must be joined with the next node */
214 #define _WSNF_SEXP 0x20 /* is a sed expression */
215
216 #define _WSNF_EMPTYOK 0x0100 /* special flag indicating that
217 wordsplit_add_segm must add the
218 segment even if it is empty */
219
220 struct wordsplit_node
221 {
222 struct wordsplit_node *prev; /* Previous element */
223 struct wordsplit_node *next; /* Next element */
224 int flags; /* Node flags */
225 union
226 {
227 struct
228 {
229 size_t beg; /* Start of word in ws_input */
230 size_t end; /* End of word in ws_input */
231 } segm;
232 char *word;
233 } v;
234 };
235
236 static const char *
237 wsnode_flagstr (int flags)
238 {
239 static char retbuf[6];
240 char *p = retbuf;
241
242 if (flags & _WSNF_WORD)
243 *p++ = 'w';
244 else if (flags & _WSNF_NULL)
245 *p++ = 'n';
246 else
247 *p++ = '-';
248 if (flags & _WSNF_QUOTE)
249 *p++ = 'q';
250 else
251 *p++ = '-';
252 if (flags & _WSNF_NOEXPAND)
253 *p++ = 'E';
254 else
255 *p++ = '-';
256 if (flags & _WSNF_JOIN)
257 *p++ = 'j';
258 else
259 *p++ = '-';
260 if (flags & _WSNF_SEXP)
261 *p++ = 's';
262 else
263 *p++ = '-';
264 *p = 0;
265 return retbuf;
266 }
267
268 static const char *
269 wsnode_ptr (struct wordsplit *wsp, struct wordsplit_node *p)
270 {
271 if (p->flags & _WSNF_NULL)
272 return "";
273 else if (p->flags & _WSNF_WORD)
274 return p->v.word;
275 else
276 return wsp->ws_input + p->v.segm.beg;
277 }
278
279 static size_t
280 wsnode_len (struct wordsplit_node *p)
281 {
282 if (p->flags & _WSNF_NULL)
283 return 0;
284 else if (p->flags & _WSNF_WORD)
285 return strlen (p->v.word);
286 else
287 return p->v.segm.end - p->v.segm.beg;
288 }
289
290 static int
291 wsnode_new (struct wordsplit *wsp, struct wordsplit_node **pnode)
292 {
293 struct wordsplit_node *node = calloc (1, sizeof (*node));
294 if (!node)
295 return _wsplt_nomem (wsp);
296 *pnode = node;
297 return 0;
298 }
299
300 static void
301 wsnode_free (struct wordsplit_node *p)
302 {
303 if (p->flags & _WSNF_WORD)
304 free (p->v.word);
305 free (p);
306 }
307
308 static void
309 wsnode_append (struct wordsplit *wsp, struct wordsplit_node *node)
310 {
311 node->next = NULL;
312 node->prev = wsp->ws_tail;
313 if (wsp->ws_tail)
314 wsp->ws_tail->next = node;
315 else
316 wsp->ws_head = node;
317 wsp->ws_tail = node;
318 }
319
320 static void
321 wsnode_remove (struct wordsplit *wsp, struct wordsplit_node *node)
322 {
323 struct wordsplit_node *p;
324
325 p = node->prev;
326 if (p)
327 {
328 p->next = node->next;
329 if (!node->next)
330 p->flags &= ~_WSNF_JOIN;
331 }
332 else
333 wsp->ws_head = node->next;
334
335 p = node->next;
336 if (p)
337 p->prev = node->prev;
338 else
339 wsp->ws_tail = node->prev;
340
341 node->next = node->prev = NULL;
342 }
343
344 static void
345 wsnode_insert (struct wordsplit *wsp, struct wordsplit_node *node,
346 struct wordsplit_node *anchor, int before)
347 {
348 if (!wsp->ws_head)
349 {
350 node->next = node->prev = NULL;
351 wsp->ws_head = wsp->ws_tail = node;
352 }
353 else if (before)
354 {
355 if (anchor->prev)
356 wsnode_insert (wsp, node, anchor->prev, 0);
357 else
358 {
359 node->prev = NULL;
360 node->next = anchor;
361 anchor->prev = node;
362 wsp->ws_head = node;
363 }
364 }
365 else
366 {
367 struct wordsplit_node *p;
368
369 p = anchor->next;
370 if (p)
371 p->prev = node;
372 else
373 wsp->ws_tail = node;
374 node->next = p;
375 node->prev = anchor;
376 anchor->next = node;
377 }
378 }
379
380 static int
381 wordsplit_add_segm (struct wordsplit *wsp, size_t beg, size_t end, int flg)
382 {
383 struct wordsplit_node *node;
384 int rc;
385
386 if (end == beg && !(flg & _WSNF_EMPTYOK))
387 return 0;
388 rc = wsnode_new (wsp, &node);
389 if (rc)
390 return rc;
391 node->flags = flg & ~(_WSNF_WORD | _WSNF_EMPTYOK);
392 node->v.segm.beg = beg;
393 node->v.segm.end = end;
394 wsnode_append (wsp, node);
395 return 0;
396 }
397
398 static void
399 wordsplit_free_nodes (struct wordsplit *wsp)
400 {
401 struct wordsplit_node *p;
402
403 for (p = wsp->ws_head; p;)
404 {
405 struct wordsplit_node *next = p->next;
406 wsnode_free (p);
407 p = next;
408 }
409 wsp->ws_head = wsp->ws_tail = NULL;
410 }
411
412 static void
413 wordsplit_dump_nodes (struct wordsplit *wsp)
414 {
415 struct wordsplit_node *p;
416 int n = 0;
417
418 for (p = wsp->ws_head, n = 0; p; p = p->next, n++)
419 {
420 if (p->flags & _WSNF_WORD)
421 wsp->ws_debug ("%4d: %p: %#04x (%s):%s;",
422 n, p, p->flags, wsnode_flagstr (p->flags), p->v.word);
423 else
424 wsp->ws_debug ("%4d: %p: %#04x (%s):%.*s;",
425 n, p, p->flags, wsnode_flagstr (p->flags),
426 (int) (p->v.segm.end - p->v.segm.beg),
427 wsp->ws_input + p->v.segm.beg);
428 }
429 }
430
431 static int
432 coalesce_segment (struct wordsplit *wsp, struct wordsplit_node *node)
433 {
434 struct wordsplit_node *p, *end;
435 size_t len = 0;
436 char *buf, *cur;
437 int stop;
438
439 for (p = node; p && (p->flags & _WSNF_JOIN); p = p->next)
440 {
441 len += wsnode_len (p);
442 }
443 len += wsnode_len (p);
444 end = p;
445
446 buf = malloc (len + 1);
447 if (!buf)
448 return _wsplt_nomem (wsp);
449 cur = buf;
450
451 p = node;
452 for (stop = 0; !stop;)
453 {
454 struct wordsplit_node *next = p->next;
455 const char *str = wsnode_ptr (wsp, p);
456 size_t slen = wsnode_len (p);
457
458 memcpy (cur, str, slen);
459 cur += slen;
460 if (p != node)
461 {
462 wsnode_remove (wsp, p);
463 stop = p == end;
464 wsnode_free (p);
465 }
466 p = next;
467 }
468
469 *cur = 0;
470
471 node->flags &= ~_WSNF_JOIN;
472
473 if (node->flags & _WSNF_WORD)
474 free (node->v.word);
475 else
476 node->flags |= _WSNF_WORD;
477 node->v.word = buf;
478 return 0;
479 }
480
481 static int
482 wsnode_quoteremoval (struct wordsplit *wsp)
483 {
484 struct wordsplit_node *p;
485 void (*uqfn) (char *, const char *, size_t) =
486 (wsp->ws_flags & WRDSF_CESCAPES) ?
487 wordsplit_c_unquote_copy : wordsplit_sh_unquote_copy;
488
489 for (p = wsp->ws_head; p; p = p->next)
490 {
491 const char *str = wsnode_ptr (wsp, p);
492 size_t slen = wsnode_len (p);
493 int unquote;
494
495 if (wsp->ws_flags & WRDSF_QUOTE)
496 {
497 unquote = !(p->flags & _WSNF_NOEXPAND);
498 }
499 else
500 unquote = 0;
501
502 if (unquote)
503 {
504 if (!(p->flags & _WSNF_WORD))
505 {
506 char *newstr = malloc (slen + 1);
507 if (!newstr)
508 return _wsplt_nomem (wsp);
509 memcpy (newstr, str, slen);
510 newstr[slen] = 0;
511 p->v.word = newstr;
512 p->flags |= _WSNF_WORD;
513 }
514
515 if (wsp->ws_flags & WRDSF_ESCAPE)
516 wordsplit_general_unquote_copy (p->v.word, str, slen,
517 wsp->ws_escape);
518 else
519 uqfn (p->v.word, str, slen);
520 }
521 }
522 return 0;
523 }
524
525 static int
526 wsnode_coalesce (struct wordsplit *wsp)
527 {
528 struct wordsplit_node *p;
529
530 for (p = wsp->ws_head; p; p = p->next)
531 {
532 if (p->flags & _WSNF_JOIN)
533 if (coalesce_segment (wsp, p))
534 return 1;
535 }
536 return 0;
537 }
538
539 static int
540 wordsplit_finish (struct wordsplit *wsp)
541 {
542 struct wordsplit_node *p;
543 size_t n;
544
545 n = 0;
546
547 for (p = wsp->ws_head; p; p = p->next)
548 n++;
549
550 if (alloc_space (wsp, n + 1))
551 return 1;
552
553 for (p = wsp->ws_head; p; p = p->next)
554 {
555 const char *str = wsnode_ptr (wsp, p);
556 size_t slen = wsnode_len (p);
557 char *newstr = malloc (slen + 1);
558
559 /* Assign newstr first, even if it is NULL. This way
560 wordsplit_free will work even if we return
561 nomem later. */
562 wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc] = newstr;
563 if (!newstr)
564 return _wsplt_nomem (wsp);
565 memcpy (newstr, str, slen);
566 newstr[slen] = 0;
567
568 wsp->ws_wordc++;
569
570 }
571 wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc] = NULL;
572 return 0;
573 }
574 \f
575
576 /* Variable expansion */
577 static int
578 node_split_prefix (struct wordsplit *wsp,
579 struct wordsplit_node **ptail,
580 struct wordsplit_node *node,
581 size_t beg, size_t len, int flg)
582 {
583 struct wordsplit_node *newnode;
584
585 if (len == 0)
586 return 0;
587 if (wsnode_new (wsp, &newnode))
588 return 1;
589 wsnode_insert (wsp, newnode, *ptail, 0);
590 if (node->flags & _WSNF_WORD)
591 {
592 const char *str = wsnode_ptr (wsp, node);
593 char *newstr = malloc (len + 1);
594 if (!newstr)
595 return _wsplt_nomem (wsp);
596 memcpy (newstr, str + beg, len);
597 newstr[len] = 0;
598 newnode->flags = _WSNF_WORD;
599 newnode->v.word = newstr;
600 }
601 else
602 {
603 newnode->v.segm.beg = node->v.segm.beg + beg;
604 newnode->v.segm.end = newnode->v.segm.beg + len;
605 }
606 newnode->flags |= flg;
607 *ptail = newnode;
608 return 0;
609 }
610
611 static int
612 find_closing_cbrace (const char *str, size_t i, size_t len, size_t * poff)
613 {
614 enum
615 { st_init, st_squote, st_dquote } state = st_init;
616 size_t level = 1;
617
618 for (; i < len; i++)
619 {
620 switch (state)
621 {
622 case st_init:
623 switch (str[i])
624 {
625 case '{':
626 level++;
627 break;
628
629 case '}':
630 if (--level == 0)
631 {
632 *poff = i;
633 return 0;
634 }
635 break;
636
637 case '"':
638 state = st_dquote;
639 break;
640
641 case '\'':
642 state = st_squote;
643 break;
644 }
645 break;
646
647 case st_squote:
648 if (str[i] == '\'')
649 state = st_init;
650 break;
651
652 case st_dquote:
653 if (str[i] == '\\')
654 i++;
655 else if (str[i] == '"')
656 state = st_init;
657 break;
658 }
659 }
660 return 1;
661 }
662
663 static const char *
664 wordsplit_find_env (struct wordsplit *wsp, const char *name, size_t len)
665 {
666 size_t i;
667
668 if (!(wsp->ws_flags & WRDSF_ENV))
669 return NULL;
670
671 if (wsp->ws_flags & WRDSF_ENV_KV)
672 {
673 /* A key-value pair environment */
674 for (i = 0; wsp->ws_env[i]; i++)
675 {
676 size_t elen = strlen (wsp->ws_env[i]);
677 if (elen == len && memcmp (wsp->ws_env[i], name, elen) == 0)
678 return wsp->ws_env[i + 1];
679 /* Skip the value. Break the loop if it is NULL. */
680 i++;
681 if (wsp->ws_env[i] == NULL)
682 break;
683 }
684 }
685 else
686 {
687 /* Usual (A=B) environment. */
688 for (i = 0; wsp->ws_env[i]; i++)
689 {
690 size_t j;
691 const char *var = wsp->ws_env[i];
692
693 for (j = 0; j < len; j++)
694 if (name[j] != var[j])
695 break;
696 if (j == len && var[j] == '=')
697 return var + j + 1;
698 }
699 }
700 return NULL;
701 }
702
703 static int
704 expvar (struct wordsplit *wsp, const char *str, size_t len,
705 struct wordsplit_node **ptail, const char **pend, int flg)
706 {
707 size_t i = 0;
708 const char *defstr = NULL;
709 const char *value;
710 const char *vptr;
711 struct wordsplit_node *newnode;
712 const char *start = str - 1;
713
714 if (ISALPHA (str[0]) || str[0] == '_')
715 {
716 for (i = 1; i < len; i++)
717 if (!(ISALNUM (str[i]) || str[i] == '_'))
718 break;
719 *pend = str + i - 1;
720 }
721 else if (str[0] == '{')
722 {
723 str++;
724 len--;
725 for (i = 1; i < len; i++)
726 if (str[i] == '}' || str[i] == ':')
727 break;
728 if (str[i] == ':')
729 {
730 size_t j;
731
732 defstr = str + i + 1;
733 if (find_closing_cbrace (str, i + 1, len, &j))
734 {
735 wsp->ws_errno = WRDSE_CBRACE;
736 return 1;
737 }
738 *pend = str + j;
739 }
740 else if (str[i] == '}')
741 {
742 defstr = NULL;
743 *pend = str + i;
744 }
745 else
746 {
747 wsp->ws_errno = WRDSE_CBRACE;
748 return 1;
749 }
750 }
751 else
752 {
753 if (wsnode_new (wsp, &newnode))
754 return 1;
755 wsnode_insert (wsp, newnode, *ptail, 0);
756 *ptail = newnode;
757 newnode->flags = _WSNF_WORD | flg;
758 newnode->v.word = malloc (3);
759 if (!newnode->v.word)
760 return _wsplt_nomem (wsp);
761 newnode->v.word[0] = '$';
762 newnode->v.word[1] = str[0];
763 newnode->v.word[2] = 0;
764 *pend = str;
765 return 0;
766 }
767
768 /* Actually expand the variable */
769 /* str - start of the variable name
770 i - its length
771 defstr - default replacement str */
772
773 vptr = wordsplit_find_env (wsp, str, i);
774 if (vptr)
775 {
776 value = strdup (vptr);
777 if (!value)
778 return _wsplt_nomem (wsp);
779 }
780 else if (wsp->ws_flags & WRDSF_GETVAR)
781 value = wsp->ws_getvar (str, i, wsp->ws_closure);
782 else if (wsp->ws_flags & WRDSF_UNDEF)
783 {
784 wsp->ws_errno = WRDSE_UNDEF;
785 if (wsp->ws_flags & WRDSF_SHOWERR)
786 wordsplit_perror (wsp);
787 return 1;
788 }
789 else
790 {
791 if (wsp->ws_flags & WRDSF_WARNUNDEF)
792 wsp->ws_error (_("warning: undefined variable `%.*s'"), (int) i, str);
793 if (wsp->ws_flags & WRDSF_KEEPUNDEF)
794 value = NULL;
795 else
796 value = "";
797 }
798 /* FIXME: handle defstr */
799 if (value)
800 {
801 if (flg & _WSNF_QUOTE)
802 {
803 if (wsnode_new (wsp, &newnode))
804 return 1;
805 wsnode_insert (wsp, newnode, *ptail, 0);
806 *ptail = newnode;
807 newnode->flags = _WSNF_WORD | _WSNF_NOEXPAND | flg;
808 newnode->v.word = strdup (value);
809 if (!newnode->v.word)
810 return _wsplt_nomem (wsp);
811 }
812 else if (*value == 0)
813 {
814 /* Empty string is a special case */
815 if (wsnode_new (wsp, &newnode))
816 return 1;
817 wsnode_insert (wsp, newnode, *ptail, 0);
818 *ptail = newnode;
819 newnode->flags = _WSNF_NULL;
820 }
821 else
822 {
823 struct wordsplit ws;
824 int i;
825
826 ws.ws_delim = wsp->ws_delim;
827 if (wordsplit (value, &ws,
828 WRDSF_NOVAR | WRDSF_NOCMD | WRDSF_DELIM | WRDSF_WS))
829 {
830 wordsplit_free (&ws);
831 return 1;
832 }
833 for (i = 0; i < ws.ws_wordc; i++)
834 {
835 if (wsnode_new (wsp, &newnode))
836 return 1;
837 wsnode_insert (wsp, newnode, *ptail, 0);
838 *ptail = newnode;
839 newnode->flags = _WSNF_WORD |
840 _WSNF_NOEXPAND |
841 (i + 1 < ws.ws_wordc ? (flg & ~_WSNF_JOIN) : flg);
842 newnode->v.word = strdup (ws.ws_wordv[i]);
843 if (!newnode->v.word)
844 return _wsplt_nomem (wsp);
845 }
846 wordsplit_free (&ws);
847 }
848 }
849 else if (wsp->ws_flags & WRDSF_KEEPUNDEF)
850 {
851 size_t size = *pend - start + 1;
852
853 if (wsnode_new (wsp, &newnode))
854 return 1;
855 wsnode_insert (wsp, newnode, *ptail, 0);
856 *ptail = newnode;
857 newnode->flags = _WSNF_WORD | _WSNF_NOEXPAND | flg;
858 newnode->v.word = malloc (size + 1);
859 if (!newnode->v.word)
860 return _wsplt_nomem (wsp);
861 memcpy (newnode->v.word, start, size);
862 newnode->v.word[size] = 0;
863 }
864 else
865 {
866 if (wsnode_new (wsp, &newnode))
867 return 1;
868 wsnode_insert (wsp, newnode, *ptail, 0);
869 *ptail = newnode;
870 newnode->flags = _WSNF_NULL;
871 }
872 return 0;
873 }
874
875 static int
876 node_expand_vars (struct wordsplit *wsp, struct wordsplit_node *node)
877 {
878 const char *str = wsnode_ptr (wsp, node);
879 size_t slen = wsnode_len (node);
880 const char *end = str + slen;
881 const char *p;
882 size_t off = 0;
883 struct wordsplit_node *tail = node;
884
885 for (p = str; p < end; p++)
886 {
887 if (*p == '\\')
888 {
889 p++;
890 continue;
891 }
892 if (*p == '$')
893 {
894 size_t n = p - str;
895
896 if (tail != node)
897 tail->flags |= _WSNF_JOIN;
898 if (node_split_prefix (wsp, &tail, node, off, n, _WSNF_JOIN))
899 return 1;
900 p++;
901 if (expvar (wsp, p, slen - n, &tail, &p,
902 node->flags & (_WSNF_JOIN | _WSNF_QUOTE)))
903 return 1;
904 off += p - str + 1;
905 str = p + 1;
906 }
907 }
908 if (p > str)
909 {
910 if (tail != node)
911 tail->flags |= _WSNF_JOIN;
912 if (node_split_prefix (wsp, &tail, node, off, p - str,
913 node->flags & _WSNF_JOIN))
914 return 1;
915 }
916 if (tail != node)
917 {
918 wsnode_remove (wsp, node);
919 wsnode_free (node);
920 }
921 return 0;
922 }
923
924 /* Remove NULL lists */
925 static void
926 wsnode_nullelim (struct wordsplit *wsp)
927 {
928 struct wordsplit_node *p;
929
930 for (p = wsp->ws_head; p;)
931 {
932 struct wordsplit_node *next = p->next;
933 if (p->flags & _WSNF_NULL)
934 {
935 wsnode_remove (wsp, p);
936 wsnode_free (p);
937 }
938 p = next;
939 }
940 }
941
942 static int
943 wordsplit_varexp (struct wordsplit *wsp)
944 {
945 struct wordsplit_node *p;
946
947 for (p = wsp->ws_head; p;)
948 {
949 struct wordsplit_node *next = p->next;
950 if (!(p->flags & _WSNF_NOEXPAND))
951 if (node_expand_vars (wsp, p))
952 return 1;
953 p = next;
954 }
955
956 wsnode_nullelim (wsp);
957 return 0;
958 }
959 \f
960 /* Strip off any leading and trailing whitespace. This function is called
961 right after the initial scanning, therefore it assumes that every
962 node in the list is a text reference node. */
963 static void
964 wordsplit_trimws (struct wordsplit *wsp)
965 {
966 struct wordsplit_node *p;
967
968 for (p = wsp->ws_head; p; p = p->next)
969 {
970 size_t n;
971
972 if (p->flags & _WSNF_QUOTE)
973 continue;
974
975 /* Skip leading whitespace: */
976 for (n = p->v.segm.beg; n < p->v.segm.end && ISWS (wsp->ws_input[n]);
977 n++)
978 ;
979 p->v.segm.beg = n;
980 /* Trim trailing whitespace */
981 for (n = p->v.segm.end;
982 n > p->v.segm.beg && ISWS (wsp->ws_input[n - 1]); n--);
983 p->v.segm.end = n;
984 if (p->v.segm.beg == p->v.segm.end)
985 p->flags |= _WSNF_NULL;
986 }
987
988 wsnode_nullelim (wsp);
989 }
990 \f
991 static int
992 skip_sed_expr (const char *command, size_t i, size_t len)
993 {
994 int state;
995
996 do
997 {
998 int delim;
999
1000 if (command[i] == ';')
1001 i++;
1002 if (!(command[i] == 's' && i + 3 < len && ISPUNCT (command[i + 1])))
1003 break;
1004
1005 delim = command[++i];
1006 state = 1;
1007 for (i++; i < len; i++)
1008 {
1009 if (state == 3)
1010 {
1011 if (command[i] == delim || !ISALNUM (command[i]))
1012 break;
1013 }
1014 else if (command[i] == '\\')
1015 i++;
1016 else if (command[i] == delim)
1017 state++;
1018 }
1019 }
1020 while (state == 3 && i < len && command[i] == ';');
1021 return i;
1022 }
1023
1024 static size_t
1025 skip_delim (struct wordsplit *wsp)
1026 {
1027 size_t start = wsp->ws_endp;
1028 if (wsp->ws_flags & WRDSF_SQUEEZE_DELIMS)
1029 {
1030 if ((wsp->ws_flags & WRDSF_RETURN_DELIMS) &&
1031 ISDELIM (wsp, wsp->ws_input[start]))
1032 {
1033 int delim = wsp->ws_input[start];
1034 do
1035 start++;
1036 while (start < wsp->ws_len && delim == wsp->ws_input[start]);
1037 }
1038 else
1039 {
1040 do
1041 start++;
1042 while (start < wsp->ws_len && ISDELIM (wsp, wsp->ws_input[start]));
1043 }
1044 start--;
1045 }
1046
1047 if (!(wsp->ws_flags & WRDSF_RETURN_DELIMS))
1048 start++;
1049
1050 return start;
1051 }
1052
1053 #define _WRDS_EOF 0
1054 #define _WRDS_OK 1
1055 #define _WRDS_ERR 2
1056
1057 static int
1058 scan_qstring (struct wordsplit *wsp, size_t start, size_t * end)
1059 {
1060 size_t j;
1061 const char *command = wsp->ws_input;
1062 size_t len = wsp->ws_len;
1063 char q = command[start];
1064
1065 for (j = start + 1; j < len && command[j] != q; j++)
1066 if (q == '"' && command[j] == '\\')
1067 j++;
1068 if (j < len && command[j] == q)
1069 {
1070 int flags = _WSNF_QUOTE | _WSNF_EMPTYOK;
1071 if (q == '\'')
1072 flags |= _WSNF_NOEXPAND;
1073 if (wordsplit_add_segm (wsp, start + 1, j, flags))
1074 return _WRDS_ERR;
1075 *end = j;
1076 }
1077 else
1078 {
1079 wsp->ws_endp = start;
1080 wsp->ws_errno = WRDSE_QUOTE;
1081 if (wsp->ws_flags & WRDSF_SHOWERR)
1082 wordsplit_perror (wsp);
1083 return _WRDS_ERR;
1084 }
1085 return 0;
1086 }
1087
1088 static int
1089 scan_word (struct wordsplit *wsp, size_t start)
1090 {
1091 size_t len = wsp->ws_len;
1092 const char *command = wsp->ws_input;
1093 const char *comment = wsp->ws_comment;
1094 int join = 0;
1095 int flags = 0;
1096
1097 size_t i = start;
1098
1099 if (i >= len)
1100 {
1101 wsp->ws_errno = WRDSE_EOF;
1102 return _WRDS_EOF;
1103 }
1104
1105 start = i;
1106
1107 if (wsp->ws_flags & WRDSF_SED_EXPR
1108 && command[i] == 's' && i + 3 < len && ISPUNCT (command[i + 1]))
1109 {
1110 flags = _WSNF_SEXP;
1111 i = skip_sed_expr (command, i, len);
1112 }
1113 else if (!ISDELIM (wsp, command[i]))
1114 {
1115 while (i < len)
1116 {
1117 if (comment && strchr (comment, command[i]) != NULL)
1118 {
1119 size_t j;
1120 for (j = i + 1; j < len && command[j] != '\n'; j++)
1121 ;
1122 if (wordsplit_add_segm (wsp, start, i, 0))
1123 return _WRDS_ERR;
1124 wsp->ws_endp = j;
1125 return _WRDS_OK;
1126 }
1127
1128 if (wsp->ws_flags & WRDSF_QUOTE)
1129 {
1130 if (command[i] == '\\')
1131 {
1132 if (++i == len)
1133 break;
1134 i++;
1135 continue;
1136 }
1137
1138 if (((wsp->ws_flags & WRDSF_SQUOTE) && command[i] == '\'') ||
1139 ((wsp->ws_flags & WRDSF_DQUOTE) && command[i] == '"'))
1140 {
1141 if (join && wsp->ws_tail)
1142 wsp->ws_tail->flags |= _WSNF_JOIN;
1143 if (wordsplit_add_segm (wsp, start, i, _WSNF_JOIN))
1144 return _WRDS_ERR;
1145 if (scan_qstring (wsp, i, &i))
1146 return _WRDS_ERR;
1147 start = i + 1;
1148 join = 1;
1149 }
1150 }
1151
1152 if (ISDELIM (wsp, command[i]))
1153 break;
1154 else
1155 i++;
1156 }
1157 }
1158 else if (wsp->ws_flags & WRDSF_RETURN_DELIMS)
1159 {
1160 i++;
1161 }
1162 else if (!(wsp->ws_flags & WRDSF_SQUEEZE_DELIMS))
1163 flags |= _WSNF_EMPTYOK;
1164
1165 if (join && i > start && wsp->ws_tail)
1166 wsp->ws_tail->flags |= _WSNF_JOIN;
1167 if (wordsplit_add_segm (wsp, start, i, flags))
1168 return _WRDS_ERR;
1169 wsp->ws_endp = i;
1170 if (wsp->ws_flags & WRDSF_INCREMENTAL)
1171 return _WRDS_EOF;
1172 return _WRDS_OK;
1173 }
1174
1175 static char quote_transtab[] = "\\\\\"\"a\ab\bf\fn\nr\rt\tv\v";
1176
1177 int
1178 wordsplit_c_unquote_char (int c)
1179 {
1180 char *p;
1181
1182 for (p = quote_transtab; *p; p += 2)
1183 {
1184 if (*p == c)
1185 return p[1];
1186 }
1187 return c;
1188 }
1189
1190 int
1191 wordsplit_c_quote_char (int c)
1192 {
1193 char *p;
1194
1195 for (p = quote_transtab + sizeof (quote_transtab) - 2;
1196 p > quote_transtab; p -= 2)
1197 {
1198 if (*p == c)
1199 return p[-1];
1200 }
1201 return -1;
1202 }
1203
1204 #define to_num(c) \
1205 (ISDIGIT(c) ? c - '0' : (ISXDIGIT(c) ? toupper(c) - 'A' + 10 : 255 ))
1206
1207 static int
1208 xtonum (int *pval, const char *src, int base, int cnt)
1209 {
1210 int i, val;
1211
1212 for (i = 0, val = 0; i < cnt; i++, src++)
1213 {
1214 int n = *(unsigned char *) src;
1215 if (n > 127 || (n = to_num (n)) >= base)
1216 break;
1217 val = val * base + n;
1218 }
1219 *pval = val;
1220 return i;
1221 }
1222
1223 size_t
1224 wordsplit_c_quoted_length (const char *str, int quote_hex, int *quote)
1225 {
1226 size_t len = 0;
1227
1228 *quote = 0;
1229 for (; *str; str++)
1230 {
1231 if (strchr (" \"", *str))
1232 *quote = 1;
1233
1234 if (*str == ' ')
1235 len++;
1236 else if (*str == '"')
1237 len += 2;
1238 else if (*str != '\t' && *str != '\\' && ISPRINT (*str))
1239 len++;
1240 else if (quote_hex)
1241 len += 3;
1242 else
1243 {
1244 if (wordsplit_c_quote_char (*str) != -1)
1245 len += 2;
1246 else
1247 len += 4;
1248 }
1249 }
1250 return len;
1251 }
1252
1253 void
1254 wordsplit_general_unquote_copy (char *dst, const char *src, size_t n,
1255 const char *escapable)
1256 {
1257 int i;
1258
1259 for (i = 0; i < n;)
1260 {
1261 if (src[i] == '\\' && i < n && strchr (escapable, src[i + 1]))
1262 i++;
1263 *dst++ = src[i++];
1264 }
1265 *dst = 0;
1266 }
1267
1268 void
1269 wordsplit_sh_unquote_copy (char *dst, const char *src, size_t n)
1270 {
1271 int i;
1272
1273 for (i = 0; i < n;)
1274 {
1275 if (src[i] == '\\')
1276 i++;
1277 *dst++ = src[i++];
1278 }
1279 *dst = 0;
1280 }
1281
1282 void
1283 wordsplit_c_unquote_copy (char *dst, const char *src, size_t n)
1284 {
1285 int i = 0;
1286 int c;
1287
1288 while (i < n)
1289 {
1290 if (src[i] == '\\')
1291 {
1292 ++i;
1293 if (src[i] == 'x' || src[i] == 'X')
1294 {
1295 if (n - i < 2)
1296 {
1297 *dst++ = '\\';
1298 *dst++ = src[i++];
1299 }
1300 else
1301 {
1302 int off = xtonum (&c, src + i + 1,
1303 16, 2);
1304 if (off == 0)
1305 {
1306 *dst++ = '\\';
1307 *dst++ = src[i++];
1308 }
1309 else
1310 {
1311 *dst++ = c;
1312 i += off + 1;
1313 }
1314 }
1315 }
1316 else if ((unsigned char) src[i] < 128 && ISDIGIT (src[i]))
1317 {
1318 if (n - i < 1)
1319 {
1320 *dst++ = '\\';
1321 *dst++ = src[i++];
1322 }
1323 else
1324 {
1325 int off = xtonum (&c, src + i, 8, 3);
1326 if (off == 0)
1327 {
1328 *dst++ = '\\';
1329 *dst++ = src[i++];
1330 }
1331 else
1332 {
1333 *dst++ = c;
1334 i += off;
1335 }
1336 }
1337 }
1338 else
1339 *dst++ = wordsplit_c_unquote_char (src[i++]);
1340 }
1341 else
1342 *dst++ = src[i++];
1343 }
1344 *dst = 0;
1345 }
1346
1347 void
1348 wordsplit_c_quote_copy (char *dst, const char *src, int quote_hex)
1349 {
1350 for (; *src; src++)
1351 {
1352 if (*src == '"')
1353 {
1354 *dst++ = '\\';
1355 *dst++ = *src;
1356 }
1357 else if (*src != '\t' && *src != '\\' && ISPRINT (*src))
1358 *dst++ = *src;
1359 else
1360 {
1361 char tmp[4];
1362
1363 if (quote_hex)
1364 {
1365 snprintf (tmp, sizeof tmp, "%%%02X", *(unsigned char *) src);
1366 memcpy (dst, tmp, 3);
1367 dst += 3;
1368 }
1369 else
1370 {
1371 int c = wordsplit_c_quote_char (*src);
1372 *dst++ = '\\';
1373 if (c != -1)
1374 *dst++ = c;
1375 else
1376 {
1377 snprintf (tmp, sizeof tmp, "%03o", *(unsigned char *) src);
1378 memcpy (dst, tmp, 3);
1379 dst += 3;
1380 }
1381 }
1382 }
1383 }
1384 }
1385
1386 static int
1387 wordsplit_process_list (struct wordsplit *wsp, size_t start)
1388 {
1389 if (wsp->ws_flags & WRDSF_NOSPLIT)
1390 {
1391 /* Treat entire input as a quoted argument */
1392 if (wordsplit_add_segm (wsp, start, wsp->ws_len, _WSNF_QUOTE))
1393 return wsp->ws_errno;
1394 }
1395 else
1396 {
1397 int rc;
1398
1399 while ((rc = scan_word (wsp, start)) == _WRDS_OK)
1400 start = skip_delim (wsp);
1401 /* Make sure tail element is not joinable */
1402 if (wsp->ws_tail)
1403 wsp->ws_tail->flags &= ~_WSNF_JOIN;
1404 if (rc == _WRDS_ERR)
1405 return wsp->ws_errno;
1406 }
1407
1408 if (wsp->ws_flags & WRDSF_SHOWDBG)
1409 {
1410 wsp->ws_debug ("Initial list:");
1411 wordsplit_dump_nodes (wsp);
1412 }
1413
1414 if (wsp->ws_flags & WRDSF_WS)
1415 {
1416 /* Trim leading and trailing whitespace */
1417 wordsplit_trimws (wsp);
1418 if (wsp->ws_flags & WRDSF_SHOWDBG)
1419 {
1420 wsp->ws_debug ("After WS trimming:");
1421 wordsplit_dump_nodes (wsp);
1422 }
1423 }
1424
1425 /* Expand variables (FIXME: & commands) */
1426 if (!(wsp->ws_flags & WRDSF_NOVAR))
1427 {
1428 if (wordsplit_varexp (wsp))
1429 {
1430 wordsplit_free_nodes (wsp);
1431 return wsp->ws_errno;
1432 }
1433 if (wsp->ws_flags & WRDSF_SHOWDBG)
1434 {
1435 wsp->ws_debug ("Expanded list:");
1436 wordsplit_dump_nodes (wsp);
1437 }
1438 }
1439
1440 do
1441 {
1442 if (wsnode_quoteremoval (wsp))
1443 break;
1444 if (wsp->ws_flags & WRDSF_SHOWDBG)
1445 {
1446 wsp->ws_debug ("After quote removal:");
1447 wordsplit_dump_nodes (wsp);
1448 }
1449
1450 if (wsnode_coalesce (wsp))
1451 break;
1452
1453 if (wsp->ws_flags & WRDSF_SHOWDBG)
1454 {
1455 wsp->ws_debug ("Coalesced list:");
1456 wordsplit_dump_nodes (wsp);
1457 }
1458 }
1459 while (0);
1460 return wsp->ws_errno;
1461 }
1462
1463 int
1464 wordsplit_len (const char *command, size_t length, struct wordsplit *wsp,
1465 int flags)
1466 {
1467 int rc;
1468 size_t start;
1469 const char *cmdptr;
1470 size_t cmdlen;
1471
1472 if (!command)
1473 {
1474 if (!(flags & WRDSF_INCREMENTAL))
1475 return EINVAL;
1476
1477 start = skip_delim (wsp);
1478 if (wsp->ws_endp == wsp->ws_len)
1479 {
1480 wsp->ws_errno = WRDSE_NOINPUT;
1481 if (wsp->ws_flags & WRDSF_SHOWERR)
1482 wordsplit_perror (wsp);
1483 return wsp->ws_errno;
1484 }
1485
1486 cmdptr = wsp->ws_input + wsp->ws_endp;
1487 cmdlen = wsp->ws_len - wsp->ws_endp;
1488 wsp->ws_flags |= WRDSF_REUSE;
1489 wordsplit_init0 (wsp);
1490 }
1491 else
1492 {
1493 cmdptr = command;
1494 cmdlen = length;
1495 start = 0;
1496 rc = wordsplit_init (wsp, cmdptr, cmdlen, flags);
1497 if (rc)
1498 return rc;
1499 }
1500
1501 if (wsp->ws_flags & WRDSF_SHOWDBG)
1502 wsp->ws_debug ("Input:%.*s;", (int) cmdlen, cmdptr);
1503
1504 rc = wordsplit_process_list (wsp, start);
1505 if (rc == 0 && (flags & WRDSF_INCREMENTAL))
1506 {
1507 while (!wsp->ws_head && wsp->ws_endp < wsp->ws_len)
1508 {
1509 start = skip_delim (wsp);
1510 if (wsp->ws_flags & WRDSF_SHOWDBG)
1511 {
1512 cmdptr = wsp->ws_input + wsp->ws_endp;
1513 cmdlen = wsp->ws_len - wsp->ws_endp;
1514 wsp->ws_debug ("Restart:%.*s;", (int) cmdlen, cmdptr);
1515 }
1516 rc = wordsplit_process_list (wsp, start);
1517 if (rc)
1518 break;
1519 }
1520 }
1521 if (rc)
1522 {
1523 wordsplit_free_nodes (wsp);
1524 return rc;
1525 }
1526 wordsplit_finish (wsp);
1527 wordsplit_free_nodes (wsp);
1528 return wsp->ws_errno;
1529 }
1530
1531 int
1532 wordsplit (const char *command, struct wordsplit *ws, int flags)
1533 {
1534 return wordsplit_len (command, command ? strlen (command) : 0, ws,
1535 flags);
1536 }
1537
1538 void
1539 wordsplit_free_words (struct wordsplit *ws)
1540 {
1541 size_t i;
1542
1543 for (i = 0; i < ws->ws_wordc; i++)
1544 {
1545 char *p = ws->ws_wordv[ws->ws_offs + i];
1546 if (p)
1547 {
1548 free (p);
1549 ws->ws_wordv[ws->ws_offs + i] = NULL;
1550 }
1551 }
1552 ws->ws_wordc = 0;
1553 }
1554
1555 void
1556 wordsplit_free (struct wordsplit *ws)
1557 {
1558 wordsplit_free_words (ws);
1559 free (ws->ws_wordv);
1560 ws->ws_wordv = NULL;
1561 }
1562
1563 void
1564 wordsplit_perror (struct wordsplit *wsp)
1565 {
1566 switch (wsp->ws_errno)
1567 {
1568 case WRDSE_EOF:
1569 wsp->ws_error (_("no error"));
1570 break;
1571
1572 case WRDSE_QUOTE:
1573 wsp->ws_error (_("missing closing %c (start near #%lu)"),
1574 wsp->ws_input[wsp->ws_endp],
1575 (unsigned long) wsp->ws_endp);
1576 break;
1577
1578 case WRDSE_NOSPACE:
1579 wsp->ws_error (_("memory exhausted"));
1580 break;
1581
1582 case WRDSE_NOSUPP:
1583 wsp->ws_error (_("command substitution is not yet supported"));
1584
1585 case WRDSE_USAGE:
1586 wsp->ws_error (_("invalid wordsplit usage"));
1587 break;
1588
1589 case WRDSE_CBRACE:
1590 wsp->ws_error (_("unbalanced curly brace"));
1591 break;
1592
1593 case WRDSE_UNDEF:
1594 wsp->ws_error (_("undefined variable"));
1595 break;
1596
1597 case WRDSE_NOINPUT:
1598 wsp->ws_error (_("input exhausted"));
1599 break;
1600
1601 default:
1602 wsp->ws_error (_("unknown error"));
1603 }
1604 }
1605
1606 const char *_wordsplit_errstr[] = {
1607 N_("no error"),
1608 N_("missing closing quote"),
1609 N_("memory exhausted"),
1610 N_("command substitution is not yet supported"),
1611 N_("invalid wordsplit usage"),
1612 N_("unbalanced curly brace"),
1613 N_("undefined variable"),
1614 N_("input exhausted")
1615 };
1616 int _wordsplit_nerrs =
1617 sizeof (_wordsplit_errstr) / sizeof (_wordsplit_errstr[0]);
1618
1619 const char *
1620 wordsplit_strerror (struct wordsplit *ws)
1621 {
1622 if (ws->ws_errno < _wordsplit_nerrs)
1623 return _wordsplit_errstr[ws->ws_errno];
1624 return N_("unknown error");
1625 }
This page took 0.096893 seconds and 5 git commands to generate.