]> Dogcows Code - chaz/tar/blob - lib/wordsplit.c
Update copyright years.
[chaz/tar] / lib / wordsplit.c
1 /* wordsplit - a word splitter
2 Copyright (C) 2009-2014 Free Software Foundation, Inc.
3
4 This program is free software; you can redistribute it and/or modify it
5 under the terms of the GNU General Public License as published by the
6 Free Software Foundation; either version 3 of the License, or (at your
7 option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License along
15 with this program. If not, see <http://www.gnu.org/licenses/>.
16
17 Written by Sergey Poznyakoff
18 */
19
20 #ifdef HAVE_CONFIG_H
21 # include <config.h>
22 #endif
23
24 #include <errno.h>
25 #include <ctype.h>
26 #include <unistd.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <stdio.h>
30 #include <stdarg.h>
31
32 #if ENABLE_NLS
33 # include <gettext.h>
34 #else
35 # define gettext(msgid) msgid
36 #endif
37 #define _(msgid) gettext (msgid)
38 #define N_(msgid) msgid
39
40 #include <wordsplit.h>
41
42 #define ISWS(c) ((c)==' '||(c)=='\t'||(c)=='\n')
43 #define ISDELIM(ws,c) \
44 (strchr ((ws)->ws_delim, (c)) != NULL)
45 #define ISPUNCT(c) (strchr("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",(c))!=NULL)
46 #define ISUPPER(c) ('A' <= ((unsigned) (c)) && ((unsigned) (c)) <= 'Z')
47 #define ISLOWER(c) ('a' <= ((unsigned) (c)) && ((unsigned) (c)) <= 'z')
48 #define ISALPHA(c) (ISUPPER(c) || ISLOWER(c))
49 #define ISDIGIT(c) ('0' <= ((unsigned) (c)) && ((unsigned) (c)) <= '9')
50 #define ISXDIGIT(c) (strchr("abcdefABCDEF", c)!=NULL)
51 #define ISALNUM(c) (ISALPHA(c) || ISDIGIT(c))
52 #define ISPRINT(c) (' ' <= ((unsigned) (c)) && ((unsigned) (c)) <= 127)
53
54 #define ALLOC_INIT 128
55 #define ALLOC_INCR 128
56
57 static void
58 _wsplt_alloc_die (struct wordsplit *wsp)
59 {
60 wsp->ws_error (_("memory exhausted"));
61 abort ();
62 }
63
64 static void __attribute__ ((__format__ (__printf__, 1, 2)))
65 _wsplt_error (const char *fmt, ...)
66 {
67 va_list ap;
68
69 va_start (ap, fmt);
70 vfprintf (stderr, fmt, ap);
71 va_end (ap);
72 fputc ('\n', stderr);
73 }
74
75 static void wordsplit_free_nodes (struct wordsplit *);
76
77 static int
78 _wsplt_nomem (struct wordsplit *wsp)
79 {
80 errno = ENOMEM;
81 wsp->ws_errno = WRDSE_NOSPACE;
82 if (wsp->ws_flags & WRDSF_ENOMEMABRT)
83 wsp->ws_alloc_die (wsp);
84 if (wsp->ws_flags & WRDSF_SHOWERR)
85 wordsplit_perror (wsp);
86 if (!(wsp->ws_flags & WRDSF_REUSE))
87 wordsplit_free (wsp);
88 wordsplit_free_nodes (wsp);
89 return wsp->ws_errno;
90 }
91
92 static void
93 wordsplit_init0 (struct wordsplit *wsp)
94 {
95 if (wsp->ws_flags & WRDSF_REUSE)
96 {
97 if (!(wsp->ws_flags & WRDSF_APPEND))
98 wordsplit_free_words (wsp);
99 }
100 else
101 {
102 wsp->ws_wordv = NULL;
103 wsp->ws_wordc = 0;
104 wsp->ws_wordn = 0;
105 }
106
107 wsp->ws_errno = 0;
108 wsp->ws_head = wsp->ws_tail = NULL;
109 }
110
111 static int
112 wordsplit_init (struct wordsplit *wsp, const char *input, size_t len,
113 int flags)
114 {
115 wsp->ws_flags = flags;
116
117 if (!(wsp->ws_flags & WRDSF_ALLOC_DIE))
118 wsp->ws_alloc_die = _wsplt_alloc_die;
119 if (!(wsp->ws_flags & WRDSF_ERROR))
120 wsp->ws_error = _wsplt_error;
121
122 if (!(wsp->ws_flags & WRDSF_NOVAR)
123 && !(wsp->ws_flags & (WRDSF_ENV | WRDSF_GETVAR)))
124 {
125 errno = EINVAL;
126 wsp->ws_errno = WRDSE_USAGE;
127 if (wsp->ws_flags & WRDSF_SHOWERR)
128 wordsplit_perror (wsp);
129 return wsp->ws_errno;
130 }
131
132 if (!(wsp->ws_flags & WRDSF_NOCMD))
133 {
134 errno = EINVAL;
135 wsp->ws_errno = WRDSE_NOSUPP;
136 if (wsp->ws_flags & WRDSF_SHOWERR)
137 wordsplit_perror (wsp);
138 return wsp->ws_errno;
139 }
140
141 if (wsp->ws_flags & WRDSF_SHOWDBG)
142 {
143 if (!(wsp->ws_flags & WRDSF_DEBUG))
144 {
145 if (wsp->ws_flags & WRDSF_ERROR)
146 wsp->ws_debug = wsp->ws_error;
147 else if (wsp->ws_flags & WRDSF_SHOWERR)
148 wsp->ws_debug = _wsplt_error;
149 else
150 wsp->ws_flags &= ~WRDSF_SHOWDBG;
151 }
152 }
153
154 wsp->ws_input = input;
155 wsp->ws_len = len;
156
157 if (!(wsp->ws_flags & WRDSF_DOOFFS))
158 wsp->ws_offs = 0;
159
160 if (!(wsp->ws_flags & WRDSF_DELIM))
161 wsp->ws_delim = " \t\n";
162
163 if (!(wsp->ws_flags & WRDSF_COMMENT))
164 wsp->ws_comment = NULL;
165
166 if (!(wsp->ws_flags & WRDSF_CLOSURE))
167 wsp->ws_closure = NULL;
168
169 wsp->ws_endp = 0;
170
171 wordsplit_init0 (wsp);
172
173 return 0;
174 }
175
176 static int
177 alloc_space (struct wordsplit *wsp, size_t count)
178 {
179 size_t offs = (wsp->ws_flags & WRDSF_DOOFFS) ? wsp->ws_offs : 0;
180 char **ptr;
181 size_t newalloc;
182
183 if (wsp->ws_wordv == NULL)
184 {
185 newalloc = offs + count > ALLOC_INIT ? count : ALLOC_INIT;
186 ptr = calloc (newalloc, sizeof (ptr[0]));
187 }
188 else if (wsp->ws_wordn < offs + wsp->ws_wordc + count)
189 {
190 newalloc = offs + wsp->ws_wordc +
191 (count > ALLOC_INCR ? count : ALLOC_INCR);
192 ptr = realloc (wsp->ws_wordv, newalloc * sizeof (ptr[0]));
193 }
194 else
195 return 0;
196
197 if (ptr)
198 {
199 wsp->ws_wordn = newalloc;
200 wsp->ws_wordv = ptr;
201 }
202 else
203 return _wsplt_nomem (wsp);
204 return 0;
205 }
206 \f
207
208 /* Node state flags */
209 #define _WSNF_NULL 0x01 /* null node (a noop) */
210 #define _WSNF_WORD 0x02 /* node contains word in v.word */
211 #define _WSNF_QUOTE 0x04 /* text is quoted */
212 #define _WSNF_NOEXPAND 0x08 /* text is not subject to expansion */
213 #define _WSNF_JOIN 0x10 /* node must be joined with the next node */
214 #define _WSNF_SEXP 0x20 /* is a sed expression */
215
216 #define _WSNF_EMPTYOK 0x0100 /* special flag indicating that
217 wordsplit_add_segm must add the
218 segment even if it is empty */
219
220 struct wordsplit_node
221 {
222 struct wordsplit_node *prev; /* Previous element */
223 struct wordsplit_node *next; /* Next element */
224 int flags; /* Node flags */
225 union
226 {
227 struct
228 {
229 size_t beg; /* Start of word in ws_input */
230 size_t end; /* End of word in ws_input */
231 } segm;
232 char *word;
233 } v;
234 };
235
236 static const char *
237 wsnode_flagstr (int flags)
238 {
239 static char retbuf[6];
240 char *p = retbuf;
241
242 if (flags & _WSNF_WORD)
243 *p++ = 'w';
244 else if (flags & _WSNF_NULL)
245 *p++ = 'n';
246 else
247 *p++ = '-';
248 if (flags & _WSNF_QUOTE)
249 *p++ = 'q';
250 else
251 *p++ = '-';
252 if (flags & _WSNF_NOEXPAND)
253 *p++ = 'E';
254 else
255 *p++ = '-';
256 if (flags & _WSNF_JOIN)
257 *p++ = 'j';
258 else
259 *p++ = '-';
260 if (flags & _WSNF_SEXP)
261 *p++ = 's';
262 else
263 *p++ = '-';
264 *p = 0;
265 return retbuf;
266 }
267
268 static const char *
269 wsnode_ptr (struct wordsplit *wsp, struct wordsplit_node *p)
270 {
271 if (p->flags & _WSNF_NULL)
272 return "";
273 else if (p->flags & _WSNF_WORD)
274 return p->v.word;
275 else
276 return wsp->ws_input + p->v.segm.beg;
277 }
278
279 static size_t
280 wsnode_len (struct wordsplit_node *p)
281 {
282 if (p->flags & _WSNF_NULL)
283 return 0;
284 else if (p->flags & _WSNF_WORD)
285 return strlen (p->v.word);
286 else
287 return p->v.segm.end - p->v.segm.beg;
288 }
289
290 static int
291 wsnode_new (struct wordsplit *wsp, struct wordsplit_node **pnode)
292 {
293 struct wordsplit_node *node = calloc (1, sizeof (*node));
294 if (!node)
295 return _wsplt_nomem (wsp);
296 *pnode = node;
297 return 0;
298 }
299
300 static void
301 wsnode_free (struct wordsplit_node *p)
302 {
303 if (p->flags & _WSNF_WORD)
304 free (p->v.word);
305 free (p);
306 }
307
308 static void
309 wsnode_append (struct wordsplit *wsp, struct wordsplit_node *node)
310 {
311 node->next = NULL;
312 node->prev = wsp->ws_tail;
313 if (wsp->ws_tail)
314 wsp->ws_tail->next = node;
315 else
316 wsp->ws_head = node;
317 wsp->ws_tail = node;
318 }
319
320 static void
321 wsnode_remove (struct wordsplit *wsp, struct wordsplit_node *node)
322 {
323 struct wordsplit_node *p;
324
325 p = node->prev;
326 if (p)
327 {
328 p->next = node->next;
329 if (!node->next)
330 p->flags &= ~_WSNF_JOIN;
331 }
332 else
333 wsp->ws_head = node->next;
334
335 p = node->next;
336 if (p)
337 p->prev = node->prev;
338 else
339 wsp->ws_tail = node->prev;
340
341 node->next = node->prev = NULL;
342 }
343
344 static void
345 wsnode_insert (struct wordsplit *wsp, struct wordsplit_node *node,
346 struct wordsplit_node *anchor, int before)
347 {
348 if (!wsp->ws_head)
349 {
350 node->next = node->prev = NULL;
351 wsp->ws_head = wsp->ws_tail = node;
352 }
353 else if (before)
354 {
355 if (anchor->prev)
356 wsnode_insert (wsp, node, anchor->prev, 0);
357 else
358 {
359 node->prev = NULL;
360 node->next = anchor;
361 anchor->prev = node;
362 wsp->ws_head = node;
363 }
364 }
365 else
366 {
367 struct wordsplit_node *p;
368
369 p = anchor->next;
370 if (p)
371 p->prev = node;
372 else
373 wsp->ws_tail = node;
374 node->next = p;
375 node->prev = anchor;
376 anchor->next = node;
377 }
378 }
379
380 static int
381 wordsplit_add_segm (struct wordsplit *wsp, size_t beg, size_t end, int flg)
382 {
383 struct wordsplit_node *node;
384 int rc;
385
386 if (end == beg && !(flg & _WSNF_EMPTYOK))
387 return 0;
388 rc = wsnode_new (wsp, &node);
389 if (rc)
390 return rc;
391 node->flags = flg & ~(_WSNF_WORD | _WSNF_EMPTYOK);
392 node->v.segm.beg = beg;
393 node->v.segm.end = end;
394 wsnode_append (wsp, node);
395 return 0;
396 }
397
398 static void
399 wordsplit_free_nodes (struct wordsplit *wsp)
400 {
401 struct wordsplit_node *p;
402
403 for (p = wsp->ws_head; p;)
404 {
405 struct wordsplit_node *next = p->next;
406 wsnode_free (p);
407 p = next;
408 }
409 wsp->ws_head = wsp->ws_tail = NULL;
410 }
411
412 static void
413 wordsplit_dump_nodes (struct wordsplit *wsp)
414 {
415 struct wordsplit_node *p;
416 int n = 0;
417
418 for (p = wsp->ws_head, n = 0; p; p = p->next, n++)
419 {
420 if (p->flags & _WSNF_WORD)
421 wsp->ws_debug ("%4d: %p: %#04x (%s):%s;",
422 n, p, p->flags, wsnode_flagstr (p->flags), p->v.word);
423 else
424 wsp->ws_debug ("%4d: %p: %#04x (%s):%.*s;",
425 n, p, p->flags, wsnode_flagstr (p->flags),
426 (int) (p->v.segm.end - p->v.segm.beg),
427 wsp->ws_input + p->v.segm.beg);
428 }
429 }
430
431 static int
432 coalesce_segment (struct wordsplit *wsp, struct wordsplit_node *node)
433 {
434 struct wordsplit_node *p, *end;
435 size_t len = 0;
436 char *buf, *cur;
437 int stop;
438
439 for (p = node; p && (p->flags & _WSNF_JOIN); p = p->next)
440 {
441 len += wsnode_len (p);
442 }
443 len += wsnode_len (p);
444 end = p;
445
446 buf = malloc (len + 1);
447 if (!buf)
448 return _wsplt_nomem (wsp);
449 cur = buf;
450
451 p = node;
452 for (stop = 0; !stop;)
453 {
454 struct wordsplit_node *next = p->next;
455 const char *str = wsnode_ptr (wsp, p);
456 size_t slen = wsnode_len (p);
457
458 memcpy (cur, str, slen);
459 cur += slen;
460 if (p != node)
461 {
462 wsnode_remove (wsp, p);
463 stop = p == end;
464 wsnode_free (p);
465 }
466 p = next;
467 }
468
469 *cur = 0;
470
471 node->flags &= ~_WSNF_JOIN;
472
473 if (node->flags & _WSNF_WORD)
474 free (node->v.word);
475 else
476 node->flags |= _WSNF_WORD;
477 node->v.word = buf;
478 return 0;
479 }
480
481 static int
482 wsnode_quoteremoval (struct wordsplit *wsp)
483 {
484 struct wordsplit_node *p;
485 void (*uqfn) (char *, const char *, size_t) =
486 (wsp->ws_flags & WRDSF_CESCAPES) ?
487 wordsplit_c_unquote_copy : wordsplit_sh_unquote_copy;
488
489 for (p = wsp->ws_head; p; p = p->next)
490 {
491 const char *str = wsnode_ptr (wsp, p);
492 size_t slen = wsnode_len (p);
493 int unquote;
494
495 if (wsp->ws_flags & WRDSF_QUOTE)
496 {
497 unquote = !(p->flags & _WSNF_NOEXPAND);
498 }
499 else
500 unquote = 0;
501
502 if (unquote)
503 {
504 if (!(p->flags & _WSNF_WORD))
505 {
506 char *newstr = malloc (slen + 1);
507 if (!newstr)
508 return _wsplt_nomem (wsp);
509 memcpy (newstr, str, slen);
510 newstr[slen] = 0;
511 p->v.word = newstr;
512 p->flags |= _WSNF_WORD;
513 }
514
515 if (wsp->ws_flags & WRDSF_ESCAPE)
516 wordsplit_general_unquote_copy (p->v.word, str, slen,
517 wsp->ws_escape);
518 else
519 uqfn (p->v.word, str, slen);
520 }
521 }
522 return 0;
523 }
524
525 static int
526 wsnode_coalesce (struct wordsplit *wsp)
527 {
528 struct wordsplit_node *p;
529
530 for (p = wsp->ws_head; p; p = p->next)
531 {
532 if (p->flags & _WSNF_JOIN)
533 if (coalesce_segment (wsp, p))
534 return 1;
535 }
536 return 0;
537 }
538
539 static int
540 wordsplit_finish (struct wordsplit *wsp)
541 {
542 struct wordsplit_node *p;
543 size_t n;
544
545 n = 0;
546
547 for (p = wsp->ws_head; p; p = p->next)
548 n++;
549
550 if (alloc_space (wsp, n + 1))
551 return 1;
552
553 for (p = wsp->ws_head; p; p = p->next)
554 {
555 const char *str = wsnode_ptr (wsp, p);
556 size_t slen = wsnode_len (p);
557 char *newstr = malloc (slen + 1);
558
559 /* Assign newstr first, even if it is NULL. This way
560 wordsplit_free will work even if we return
561 nomem later. */
562 wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc] = newstr;
563 if (!newstr)
564 return _wsplt_nomem (wsp);
565 memcpy (newstr, str, slen);
566 newstr[slen] = 0;
567
568 wsp->ws_wordc++;
569
570 }
571 wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc] = NULL;
572 return 0;
573 }
574 \f
575
576 /* Variable expansion */
577 static int
578 node_split_prefix (struct wordsplit *wsp,
579 struct wordsplit_node **ptail,
580 struct wordsplit_node *node,
581 size_t beg, size_t len, int flg)
582 {
583 struct wordsplit_node *newnode;
584
585 if (len == 0)
586 return 0;
587 if (wsnode_new (wsp, &newnode))
588 return 1;
589 wsnode_insert (wsp, newnode, *ptail, 0);
590 if (node->flags & _WSNF_WORD)
591 {
592 const char *str = wsnode_ptr (wsp, node);
593 char *newstr = malloc (len + 1);
594 if (!newstr)
595 return _wsplt_nomem (wsp);
596 memcpy (newstr, str + beg, len);
597 newstr[len] = 0;
598 newnode->flags = _WSNF_WORD;
599 newnode->v.word = newstr;
600 }
601 else
602 {
603 newnode->v.segm.beg = node->v.segm.beg + beg;
604 newnode->v.segm.end = newnode->v.segm.beg + len;
605 }
606 newnode->flags |= flg;
607 *ptail = newnode;
608 return 0;
609 }
610
611 static int
612 find_closing_cbrace (const char *str, size_t i, size_t len, size_t * poff)
613 {
614 enum
615 { st_init, st_squote, st_dquote } state = st_init;
616 size_t level = 1;
617
618 for (; i < len; i++)
619 {
620 switch (state)
621 {
622 case st_init:
623 switch (str[i])
624 {
625 case '{':
626 level++;
627 break;
628
629 case '}':
630 if (--level == 0)
631 {
632 *poff = i;
633 return 0;
634 }
635 break;
636
637 case '"':
638 state = st_dquote;
639 break;
640
641 case '\'':
642 state = st_squote;
643 break;
644 }
645 break;
646
647 case st_squote:
648 if (str[i] == '\'')
649 state = st_init;
650 break;
651
652 case st_dquote:
653 if (str[i] == '\\')
654 i++;
655 else if (str[i] == '"')
656 state = st_init;
657 break;
658 }
659 }
660 return 1;
661 }
662
663 static const char *
664 wordsplit_find_env (struct wordsplit *wsp, const char *name, size_t len)
665 {
666 size_t i;
667
668 if (!(wsp->ws_flags & WRDSF_ENV))
669 return NULL;
670
671 if (wsp->ws_flags & WRDSF_ENV_KV)
672 {
673 /* A key-value pair environment */
674 for (i = 0; wsp->ws_env[i]; i++)
675 {
676 size_t elen = strlen (wsp->ws_env[i]);
677 if (elen == len && memcmp (wsp->ws_env[i], name, elen) == 0)
678 return wsp->ws_env[i + 1];
679 /* Skip the value. Break the loop if it is NULL. */
680 i++;
681 if (wsp->ws_env[i] == NULL)
682 break;
683 }
684 }
685 else
686 {
687 /* Usual (A=B) environment. */
688 for (i = 0; wsp->ws_env[i]; i++)
689 {
690 size_t j;
691 const char *var = wsp->ws_env[i];
692
693 for (j = 0; j < len; j++)
694 if (name[j] != var[j])
695 break;
696 if (j == len && var[j] == '=')
697 return var + j + 1;
698 }
699 }
700 return NULL;
701 }
702
703 static int
704 expvar (struct wordsplit *wsp, const char *str, size_t len,
705 struct wordsplit_node **ptail, const char **pend, int flg)
706 {
707 size_t i = 0;
708 const char *defstr = NULL;
709 const char *value;
710 const char *vptr;
711 struct wordsplit_node *newnode;
712 const char *start = str - 1;
713
714 if (ISALPHA (str[0]) || str[0] == '_')
715 {
716 for (i = 1; i < len; i++)
717 if (!(ISALNUM (str[i]) || str[i] == '_'))
718 break;
719 *pend = str + i - 1;
720 }
721 else if (str[0] == '{')
722 {
723 str++;
724 len--;
725 for (i = 1; i < len; i++)
726 if (str[i] == '}' || str[i] == ':')
727 break;
728 if (str[i] == ':')
729 {
730 size_t j;
731
732 defstr = str + i + 1;
733 if (find_closing_cbrace (str, i + 1, len, &j))
734 {
735 wsp->ws_errno = WRDSE_CBRACE;
736 return 1;
737 }
738 *pend = str + j;
739 }
740 else if (str[i] == '}')
741 {
742 defstr = NULL;
743 *pend = str + i;
744 }
745 else
746 {
747 wsp->ws_errno = WRDSE_CBRACE;
748 return 1;
749 }
750 }
751 else
752 {
753 if (wsnode_new (wsp, &newnode))
754 return 1;
755 wsnode_insert (wsp, newnode, *ptail, 0);
756 *ptail = newnode;
757 newnode->flags = _WSNF_WORD | flg;
758 newnode->v.word = malloc (3);
759 if (!newnode->v.word)
760 return _wsplt_nomem (wsp);
761 newnode->v.word[0] = '$';
762 newnode->v.word[1] = str[0];
763 newnode->v.word[2] = 0;
764 *pend = str;
765 return 0;
766 }
767
768 /* Actually expand the variable */
769 /* str - start of the variable name
770 i - its length
771 defstr - default replacement str */
772
773 vptr = wordsplit_find_env (wsp, str, i);
774 if (vptr)
775 {
776 value = strdup (vptr);
777 if (!value)
778 return _wsplt_nomem (wsp);
779 }
780 else if (wsp->ws_flags & WRDSF_GETVAR)
781 value = wsp->ws_getvar (str, i, wsp->ws_closure);
782 else if (wsp->ws_flags & WRDSF_UNDEF)
783 {
784 wsp->ws_errno = WRDSE_UNDEF;
785 if (wsp->ws_flags & WRDSF_SHOWERR)
786 wordsplit_perror (wsp);
787 return 1;
788 }
789 else
790 {
791 if (wsp->ws_flags & WRDSF_WARNUNDEF)
792 wsp->ws_error (_("warning: undefined variable `%.*s'"), (int) i, str);
793 if (wsp->ws_flags & WRDSF_KEEPUNDEF)
794 value = NULL;
795 else
796 value = "";
797 }
798
799 /* FIXME: handle defstr */
800 (void) defstr;
801
802 if (value)
803 {
804 if (flg & _WSNF_QUOTE)
805 {
806 if (wsnode_new (wsp, &newnode))
807 return 1;
808 wsnode_insert (wsp, newnode, *ptail, 0);
809 *ptail = newnode;
810 newnode->flags = _WSNF_WORD | _WSNF_NOEXPAND | flg;
811 newnode->v.word = strdup (value);
812 if (!newnode->v.word)
813 return _wsplt_nomem (wsp);
814 }
815 else if (*value == 0)
816 {
817 /* Empty string is a special case */
818 if (wsnode_new (wsp, &newnode))
819 return 1;
820 wsnode_insert (wsp, newnode, *ptail, 0);
821 *ptail = newnode;
822 newnode->flags = _WSNF_NULL;
823 }
824 else
825 {
826 struct wordsplit ws;
827 int i;
828
829 ws.ws_delim = wsp->ws_delim;
830 if (wordsplit (value, &ws,
831 WRDSF_NOVAR | WRDSF_NOCMD | WRDSF_DELIM | WRDSF_WS))
832 {
833 wordsplit_free (&ws);
834 return 1;
835 }
836 for (i = 0; i < ws.ws_wordc; i++)
837 {
838 if (wsnode_new (wsp, &newnode))
839 return 1;
840 wsnode_insert (wsp, newnode, *ptail, 0);
841 *ptail = newnode;
842 newnode->flags = _WSNF_WORD |
843 _WSNF_NOEXPAND |
844 (i + 1 < ws.ws_wordc ? (flg & ~_WSNF_JOIN) : flg);
845 newnode->v.word = strdup (ws.ws_wordv[i]);
846 if (!newnode->v.word)
847 return _wsplt_nomem (wsp);
848 }
849 wordsplit_free (&ws);
850 }
851 }
852 else if (wsp->ws_flags & WRDSF_KEEPUNDEF)
853 {
854 size_t size = *pend - start + 1;
855
856 if (wsnode_new (wsp, &newnode))
857 return 1;
858 wsnode_insert (wsp, newnode, *ptail, 0);
859 *ptail = newnode;
860 newnode->flags = _WSNF_WORD | _WSNF_NOEXPAND | flg;
861 newnode->v.word = malloc (size + 1);
862 if (!newnode->v.word)
863 return _wsplt_nomem (wsp);
864 memcpy (newnode->v.word, start, size);
865 newnode->v.word[size] = 0;
866 }
867 else
868 {
869 if (wsnode_new (wsp, &newnode))
870 return 1;
871 wsnode_insert (wsp, newnode, *ptail, 0);
872 *ptail = newnode;
873 newnode->flags = _WSNF_NULL;
874 }
875 return 0;
876 }
877
878 static int
879 node_expand_vars (struct wordsplit *wsp, struct wordsplit_node *node)
880 {
881 const char *str = wsnode_ptr (wsp, node);
882 size_t slen = wsnode_len (node);
883 const char *end = str + slen;
884 const char *p;
885 size_t off = 0;
886 struct wordsplit_node *tail = node;
887
888 for (p = str; p < end; p++)
889 {
890 if (*p == '\\')
891 {
892 p++;
893 continue;
894 }
895 if (*p == '$')
896 {
897 size_t n = p - str;
898
899 if (tail != node)
900 tail->flags |= _WSNF_JOIN;
901 if (node_split_prefix (wsp, &tail, node, off, n, _WSNF_JOIN))
902 return 1;
903 p++;
904 if (expvar (wsp, p, slen - n, &tail, &p,
905 node->flags & (_WSNF_JOIN | _WSNF_QUOTE)))
906 return 1;
907 off += p - str + 1;
908 str = p + 1;
909 }
910 }
911 if (p > str)
912 {
913 if (tail != node)
914 tail->flags |= _WSNF_JOIN;
915 if (node_split_prefix (wsp, &tail, node, off, p - str,
916 node->flags & _WSNF_JOIN))
917 return 1;
918 }
919 if (tail != node)
920 {
921 wsnode_remove (wsp, node);
922 wsnode_free (node);
923 }
924 return 0;
925 }
926
927 /* Remove NULL lists */
928 static void
929 wsnode_nullelim (struct wordsplit *wsp)
930 {
931 struct wordsplit_node *p;
932
933 for (p = wsp->ws_head; p;)
934 {
935 struct wordsplit_node *next = p->next;
936 if (p->flags & _WSNF_NULL)
937 {
938 wsnode_remove (wsp, p);
939 wsnode_free (p);
940 }
941 p = next;
942 }
943 }
944
945 static int
946 wordsplit_varexp (struct wordsplit *wsp)
947 {
948 struct wordsplit_node *p;
949
950 for (p = wsp->ws_head; p;)
951 {
952 struct wordsplit_node *next = p->next;
953 if (!(p->flags & _WSNF_NOEXPAND))
954 if (node_expand_vars (wsp, p))
955 return 1;
956 p = next;
957 }
958
959 wsnode_nullelim (wsp);
960 return 0;
961 }
962 \f
963 /* Strip off any leading and trailing whitespace. This function is called
964 right after the initial scanning, therefore it assumes that every
965 node in the list is a text reference node. */
966 static void
967 wordsplit_trimws (struct wordsplit *wsp)
968 {
969 struct wordsplit_node *p;
970
971 for (p = wsp->ws_head; p; p = p->next)
972 {
973 size_t n;
974
975 if (p->flags & _WSNF_QUOTE)
976 continue;
977
978 /* Skip leading whitespace: */
979 for (n = p->v.segm.beg; n < p->v.segm.end && ISWS (wsp->ws_input[n]);
980 n++)
981 ;
982 p->v.segm.beg = n;
983 /* Trim trailing whitespace */
984 for (n = p->v.segm.end;
985 n > p->v.segm.beg && ISWS (wsp->ws_input[n - 1]); n--);
986 p->v.segm.end = n;
987 if (p->v.segm.beg == p->v.segm.end)
988 p->flags |= _WSNF_NULL;
989 }
990
991 wsnode_nullelim (wsp);
992 }
993 \f
994 static int
995 skip_sed_expr (const char *command, size_t i, size_t len)
996 {
997 int state;
998
999 do
1000 {
1001 int delim;
1002
1003 if (command[i] == ';')
1004 i++;
1005 if (!(command[i] == 's' && i + 3 < len && ISPUNCT (command[i + 1])))
1006 break;
1007
1008 delim = command[++i];
1009 state = 1;
1010 for (i++; i < len; i++)
1011 {
1012 if (state == 3)
1013 {
1014 if (command[i] == delim || !ISALNUM (command[i]))
1015 break;
1016 }
1017 else if (command[i] == '\\')
1018 i++;
1019 else if (command[i] == delim)
1020 state++;
1021 }
1022 }
1023 while (state == 3 && i < len && command[i] == ';');
1024 return i;
1025 }
1026
1027 static size_t
1028 skip_delim (struct wordsplit *wsp)
1029 {
1030 size_t start = wsp->ws_endp;
1031 if (wsp->ws_flags & WRDSF_SQUEEZE_DELIMS)
1032 {
1033 if ((wsp->ws_flags & WRDSF_RETURN_DELIMS) &&
1034 ISDELIM (wsp, wsp->ws_input[start]))
1035 {
1036 int delim = wsp->ws_input[start];
1037 do
1038 start++;
1039 while (start < wsp->ws_len && delim == wsp->ws_input[start]);
1040 }
1041 else
1042 {
1043 do
1044 start++;
1045 while (start < wsp->ws_len && ISDELIM (wsp, wsp->ws_input[start]));
1046 }
1047 start--;
1048 }
1049
1050 if (!(wsp->ws_flags & WRDSF_RETURN_DELIMS))
1051 start++;
1052
1053 return start;
1054 }
1055
1056 #define _WRDS_EOF 0
1057 #define _WRDS_OK 1
1058 #define _WRDS_ERR 2
1059
1060 static int
1061 scan_qstring (struct wordsplit *wsp, size_t start, size_t * end)
1062 {
1063 size_t j;
1064 const char *command = wsp->ws_input;
1065 size_t len = wsp->ws_len;
1066 char q = command[start];
1067
1068 for (j = start + 1; j < len && command[j] != q; j++)
1069 if (q == '"' && command[j] == '\\')
1070 j++;
1071 if (j < len && command[j] == q)
1072 {
1073 int flags = _WSNF_QUOTE | _WSNF_EMPTYOK;
1074 if (q == '\'')
1075 flags |= _WSNF_NOEXPAND;
1076 if (wordsplit_add_segm (wsp, start + 1, j, flags))
1077 return _WRDS_ERR;
1078 *end = j;
1079 }
1080 else
1081 {
1082 wsp->ws_endp = start;
1083 wsp->ws_errno = WRDSE_QUOTE;
1084 if (wsp->ws_flags & WRDSF_SHOWERR)
1085 wordsplit_perror (wsp);
1086 return _WRDS_ERR;
1087 }
1088 return 0;
1089 }
1090
1091 static int
1092 scan_word (struct wordsplit *wsp, size_t start)
1093 {
1094 size_t len = wsp->ws_len;
1095 const char *command = wsp->ws_input;
1096 const char *comment = wsp->ws_comment;
1097 int join = 0;
1098 int flags = 0;
1099
1100 size_t i = start;
1101
1102 if (i >= len)
1103 {
1104 wsp->ws_errno = WRDSE_EOF;
1105 return _WRDS_EOF;
1106 }
1107
1108 start = i;
1109
1110 if (wsp->ws_flags & WRDSF_SED_EXPR
1111 && command[i] == 's' && i + 3 < len && ISPUNCT (command[i + 1]))
1112 {
1113 flags = _WSNF_SEXP;
1114 i = skip_sed_expr (command, i, len);
1115 }
1116 else if (!ISDELIM (wsp, command[i]))
1117 {
1118 while (i < len)
1119 {
1120 if (comment && strchr (comment, command[i]) != NULL)
1121 {
1122 size_t j;
1123 for (j = i + 1; j < len && command[j] != '\n'; j++)
1124 ;
1125 if (wordsplit_add_segm (wsp, start, i, 0))
1126 return _WRDS_ERR;
1127 wsp->ws_endp = j;
1128 return _WRDS_OK;
1129 }
1130
1131 if (wsp->ws_flags & WRDSF_QUOTE)
1132 {
1133 if (command[i] == '\\')
1134 {
1135 if (++i == len)
1136 break;
1137 i++;
1138 continue;
1139 }
1140
1141 if (((wsp->ws_flags & WRDSF_SQUOTE) && command[i] == '\'') ||
1142 ((wsp->ws_flags & WRDSF_DQUOTE) && command[i] == '"'))
1143 {
1144 if (join && wsp->ws_tail)
1145 wsp->ws_tail->flags |= _WSNF_JOIN;
1146 if (wordsplit_add_segm (wsp, start, i, _WSNF_JOIN))
1147 return _WRDS_ERR;
1148 if (scan_qstring (wsp, i, &i))
1149 return _WRDS_ERR;
1150 start = i + 1;
1151 join = 1;
1152 }
1153 }
1154
1155 if (ISDELIM (wsp, command[i]))
1156 break;
1157 else
1158 i++;
1159 }
1160 }
1161 else if (wsp->ws_flags & WRDSF_RETURN_DELIMS)
1162 {
1163 i++;
1164 }
1165 else if (!(wsp->ws_flags & WRDSF_SQUEEZE_DELIMS))
1166 flags |= _WSNF_EMPTYOK;
1167
1168 if (join && i > start && wsp->ws_tail)
1169 wsp->ws_tail->flags |= _WSNF_JOIN;
1170 if (wordsplit_add_segm (wsp, start, i, flags))
1171 return _WRDS_ERR;
1172 wsp->ws_endp = i;
1173 if (wsp->ws_flags & WRDSF_INCREMENTAL)
1174 return _WRDS_EOF;
1175 return _WRDS_OK;
1176 }
1177
1178 static char quote_transtab[] = "\\\\\"\"a\ab\bf\fn\nr\rt\tv\v";
1179
1180 int
1181 wordsplit_c_unquote_char (int c)
1182 {
1183 char *p;
1184
1185 for (p = quote_transtab; *p; p += 2)
1186 {
1187 if (*p == c)
1188 return p[1];
1189 }
1190 return c;
1191 }
1192
1193 int
1194 wordsplit_c_quote_char (int c)
1195 {
1196 char *p;
1197
1198 for (p = quote_transtab + sizeof (quote_transtab) - 2;
1199 p > quote_transtab; p -= 2)
1200 {
1201 if (*p == c)
1202 return p[-1];
1203 }
1204 return -1;
1205 }
1206
1207 #define to_num(c) \
1208 (ISDIGIT(c) ? c - '0' : (ISXDIGIT(c) ? toupper(c) - 'A' + 10 : 255 ))
1209
1210 static int
1211 xtonum (int *pval, const char *src, int base, int cnt)
1212 {
1213 int i, val;
1214
1215 for (i = 0, val = 0; i < cnt; i++, src++)
1216 {
1217 int n = *(unsigned char *) src;
1218 if (n > 127 || (n = to_num (n)) >= base)
1219 break;
1220 val = val * base + n;
1221 }
1222 *pval = val;
1223 return i;
1224 }
1225
1226 size_t
1227 wordsplit_c_quoted_length (const char *str, int quote_hex, int *quote)
1228 {
1229 size_t len = 0;
1230
1231 *quote = 0;
1232 for (; *str; str++)
1233 {
1234 if (strchr (" \"", *str))
1235 *quote = 1;
1236
1237 if (*str == ' ')
1238 len++;
1239 else if (*str == '"')
1240 len += 2;
1241 else if (*str != '\t' && *str != '\\' && ISPRINT (*str))
1242 len++;
1243 else if (quote_hex)
1244 len += 3;
1245 else
1246 {
1247 if (wordsplit_c_quote_char (*str) != -1)
1248 len += 2;
1249 else
1250 len += 4;
1251 }
1252 }
1253 return len;
1254 }
1255
1256 void
1257 wordsplit_general_unquote_copy (char *dst, const char *src, size_t n,
1258 const char *escapable)
1259 {
1260 int i;
1261
1262 for (i = 0; i < n;)
1263 {
1264 if (src[i] == '\\' && i < n && strchr (escapable, src[i + 1]))
1265 i++;
1266 *dst++ = src[i++];
1267 }
1268 *dst = 0;
1269 }
1270
1271 void
1272 wordsplit_sh_unquote_copy (char *dst, const char *src, size_t n)
1273 {
1274 int i;
1275
1276 for (i = 0; i < n;)
1277 {
1278 if (src[i] == '\\')
1279 i++;
1280 *dst++ = src[i++];
1281 }
1282 *dst = 0;
1283 }
1284
1285 void
1286 wordsplit_c_unquote_copy (char *dst, const char *src, size_t n)
1287 {
1288 int i = 0;
1289 int c;
1290
1291 while (i < n)
1292 {
1293 if (src[i] == '\\')
1294 {
1295 ++i;
1296 if (src[i] == 'x' || src[i] == 'X')
1297 {
1298 if (n - i < 2)
1299 {
1300 *dst++ = '\\';
1301 *dst++ = src[i++];
1302 }
1303 else
1304 {
1305 int off = xtonum (&c, src + i + 1,
1306 16, 2);
1307 if (off == 0)
1308 {
1309 *dst++ = '\\';
1310 *dst++ = src[i++];
1311 }
1312 else
1313 {
1314 *dst++ = c;
1315 i += off + 1;
1316 }
1317 }
1318 }
1319 else if ((unsigned char) src[i] < 128 && ISDIGIT (src[i]))
1320 {
1321 if (n - i < 1)
1322 {
1323 *dst++ = '\\';
1324 *dst++ = src[i++];
1325 }
1326 else
1327 {
1328 int off = xtonum (&c, src + i, 8, 3);
1329 if (off == 0)
1330 {
1331 *dst++ = '\\';
1332 *dst++ = src[i++];
1333 }
1334 else
1335 {
1336 *dst++ = c;
1337 i += off;
1338 }
1339 }
1340 }
1341 else
1342 *dst++ = wordsplit_c_unquote_char (src[i++]);
1343 }
1344 else
1345 *dst++ = src[i++];
1346 }
1347 *dst = 0;
1348 }
1349
1350 void
1351 wordsplit_c_quote_copy (char *dst, const char *src, int quote_hex)
1352 {
1353 for (; *src; src++)
1354 {
1355 if (*src == '"')
1356 {
1357 *dst++ = '\\';
1358 *dst++ = *src;
1359 }
1360 else if (*src != '\t' && *src != '\\' && ISPRINT (*src))
1361 *dst++ = *src;
1362 else
1363 {
1364 char tmp[4];
1365
1366 if (quote_hex)
1367 {
1368 snprintf (tmp, sizeof tmp, "%%%02X", *(unsigned char *) src);
1369 memcpy (dst, tmp, 3);
1370 dst += 3;
1371 }
1372 else
1373 {
1374 int c = wordsplit_c_quote_char (*src);
1375 *dst++ = '\\';
1376 if (c != -1)
1377 *dst++ = c;
1378 else
1379 {
1380 snprintf (tmp, sizeof tmp, "%03o", *(unsigned char *) src);
1381 memcpy (dst, tmp, 3);
1382 dst += 3;
1383 }
1384 }
1385 }
1386 }
1387 }
1388
1389 static int
1390 wordsplit_process_list (struct wordsplit *wsp, size_t start)
1391 {
1392 if (wsp->ws_flags & WRDSF_NOSPLIT)
1393 {
1394 /* Treat entire input as a quoted argument */
1395 if (wordsplit_add_segm (wsp, start, wsp->ws_len, _WSNF_QUOTE))
1396 return wsp->ws_errno;
1397 }
1398 else
1399 {
1400 int rc;
1401
1402 while ((rc = scan_word (wsp, start)) == _WRDS_OK)
1403 start = skip_delim (wsp);
1404 /* Make sure tail element is not joinable */
1405 if (wsp->ws_tail)
1406 wsp->ws_tail->flags &= ~_WSNF_JOIN;
1407 if (rc == _WRDS_ERR)
1408 return wsp->ws_errno;
1409 }
1410
1411 if (wsp->ws_flags & WRDSF_SHOWDBG)
1412 {
1413 wsp->ws_debug ("Initial list:");
1414 wordsplit_dump_nodes (wsp);
1415 }
1416
1417 if (wsp->ws_flags & WRDSF_WS)
1418 {
1419 /* Trim leading and trailing whitespace */
1420 wordsplit_trimws (wsp);
1421 if (wsp->ws_flags & WRDSF_SHOWDBG)
1422 {
1423 wsp->ws_debug ("After WS trimming:");
1424 wordsplit_dump_nodes (wsp);
1425 }
1426 }
1427
1428 /* Expand variables (FIXME: & commands) */
1429 if (!(wsp->ws_flags & WRDSF_NOVAR))
1430 {
1431 if (wordsplit_varexp (wsp))
1432 {
1433 wordsplit_free_nodes (wsp);
1434 return wsp->ws_errno;
1435 }
1436 if (wsp->ws_flags & WRDSF_SHOWDBG)
1437 {
1438 wsp->ws_debug ("Expanded list:");
1439 wordsplit_dump_nodes (wsp);
1440 }
1441 }
1442
1443 do
1444 {
1445 if (wsnode_quoteremoval (wsp))
1446 break;
1447 if (wsp->ws_flags & WRDSF_SHOWDBG)
1448 {
1449 wsp->ws_debug ("After quote removal:");
1450 wordsplit_dump_nodes (wsp);
1451 }
1452
1453 if (wsnode_coalesce (wsp))
1454 break;
1455
1456 if (wsp->ws_flags & WRDSF_SHOWDBG)
1457 {
1458 wsp->ws_debug ("Coalesced list:");
1459 wordsplit_dump_nodes (wsp);
1460 }
1461 }
1462 while (0);
1463 return wsp->ws_errno;
1464 }
1465
1466 int
1467 wordsplit_len (const char *command, size_t length, struct wordsplit *wsp,
1468 int flags)
1469 {
1470 int rc;
1471 size_t start;
1472 const char *cmdptr;
1473 size_t cmdlen;
1474
1475 if (!command)
1476 {
1477 if (!(flags & WRDSF_INCREMENTAL))
1478 return EINVAL;
1479
1480 start = skip_delim (wsp);
1481 if (wsp->ws_endp == wsp->ws_len)
1482 {
1483 wsp->ws_errno = WRDSE_NOINPUT;
1484 if (wsp->ws_flags & WRDSF_SHOWERR)
1485 wordsplit_perror (wsp);
1486 return wsp->ws_errno;
1487 }
1488
1489 cmdptr = wsp->ws_input + wsp->ws_endp;
1490 cmdlen = wsp->ws_len - wsp->ws_endp;
1491 wsp->ws_flags |= WRDSF_REUSE;
1492 wordsplit_init0 (wsp);
1493 }
1494 else
1495 {
1496 cmdptr = command;
1497 cmdlen = length;
1498 start = 0;
1499 rc = wordsplit_init (wsp, cmdptr, cmdlen, flags);
1500 if (rc)
1501 return rc;
1502 }
1503
1504 if (wsp->ws_flags & WRDSF_SHOWDBG)
1505 wsp->ws_debug ("Input:%.*s;", (int) cmdlen, cmdptr);
1506
1507 rc = wordsplit_process_list (wsp, start);
1508 if (rc == 0 && (flags & WRDSF_INCREMENTAL))
1509 {
1510 while (!wsp->ws_head && wsp->ws_endp < wsp->ws_len)
1511 {
1512 start = skip_delim (wsp);
1513 if (wsp->ws_flags & WRDSF_SHOWDBG)
1514 {
1515 cmdptr = wsp->ws_input + wsp->ws_endp;
1516 cmdlen = wsp->ws_len - wsp->ws_endp;
1517 wsp->ws_debug ("Restart:%.*s;", (int) cmdlen, cmdptr);
1518 }
1519 rc = wordsplit_process_list (wsp, start);
1520 if (rc)
1521 break;
1522 }
1523 }
1524 if (rc)
1525 {
1526 wordsplit_free_nodes (wsp);
1527 return rc;
1528 }
1529 wordsplit_finish (wsp);
1530 wordsplit_free_nodes (wsp);
1531 return wsp->ws_errno;
1532 }
1533
1534 int
1535 wordsplit (const char *command, struct wordsplit *ws, int flags)
1536 {
1537 return wordsplit_len (command, command ? strlen (command) : 0, ws,
1538 flags);
1539 }
1540
1541 void
1542 wordsplit_free_words (struct wordsplit *ws)
1543 {
1544 size_t i;
1545
1546 for (i = 0; i < ws->ws_wordc; i++)
1547 {
1548 char *p = ws->ws_wordv[ws->ws_offs + i];
1549 if (p)
1550 {
1551 free (p);
1552 ws->ws_wordv[ws->ws_offs + i] = NULL;
1553 }
1554 }
1555 ws->ws_wordc = 0;
1556 }
1557
1558 void
1559 wordsplit_free (struct wordsplit *ws)
1560 {
1561 wordsplit_free_words (ws);
1562 free (ws->ws_wordv);
1563 ws->ws_wordv = NULL;
1564 }
1565
1566 void
1567 wordsplit_perror (struct wordsplit *wsp)
1568 {
1569 switch (wsp->ws_errno)
1570 {
1571 case WRDSE_EOF:
1572 wsp->ws_error (_("no error"));
1573 break;
1574
1575 case WRDSE_QUOTE:
1576 wsp->ws_error (_("missing closing %c (start near #%lu)"),
1577 wsp->ws_input[wsp->ws_endp],
1578 (unsigned long) wsp->ws_endp);
1579 break;
1580
1581 case WRDSE_NOSPACE:
1582 wsp->ws_error (_("memory exhausted"));
1583 break;
1584
1585 case WRDSE_NOSUPP:
1586 wsp->ws_error (_("command substitution is not yet supported"));
1587
1588 case WRDSE_USAGE:
1589 wsp->ws_error (_("invalid wordsplit usage"));
1590 break;
1591
1592 case WRDSE_CBRACE:
1593 wsp->ws_error (_("unbalanced curly brace"));
1594 break;
1595
1596 case WRDSE_UNDEF:
1597 wsp->ws_error (_("undefined variable"));
1598 break;
1599
1600 case WRDSE_NOINPUT:
1601 wsp->ws_error (_("input exhausted"));
1602 break;
1603
1604 default:
1605 wsp->ws_error (_("unknown error"));
1606 }
1607 }
1608
1609 const char *_wordsplit_errstr[] = {
1610 N_("no error"),
1611 N_("missing closing quote"),
1612 N_("memory exhausted"),
1613 N_("command substitution is not yet supported"),
1614 N_("invalid wordsplit usage"),
1615 N_("unbalanced curly brace"),
1616 N_("undefined variable"),
1617 N_("input exhausted")
1618 };
1619 int _wordsplit_nerrs =
1620 sizeof (_wordsplit_errstr) / sizeof (_wordsplit_errstr[0]);
1621
1622 const char *
1623 wordsplit_strerror (struct wordsplit *ws)
1624 {
1625 if (ws->ws_errno < _wordsplit_nerrs)
1626 return _wordsplit_errstr[ws->ws_errno];
1627 return N_("unknown error");
1628 }
This page took 0.107844 seconds and 4 git commands to generate.