1 /* wordsplit - a word splitter
2 Copyright (C) 2009-2013 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify it
5 under the terms of the GNU General Public License as published by the
6 Free Software Foundation; either version 3 of the License, or (at your
7 option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License along
15 with this program. If not, see <http://www.gnu.org/licenses/>.
17 Written by Sergey Poznyakoff
35 # define gettext(msgid) msgid
37 #define _(msgid) gettext (msgid)
38 #define N_(msgid) msgid
40 #include <wordsplit.h>
42 #define ISWS(c) ((c)==' '||(c)=='\t'||(c)=='\n')
43 #define ISDELIM(ws,c) \
44 (strchr ((ws)->ws_delim, (c)) != NULL)
45 #define ISPUNCT(c) (strchr("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",(c))!=NULL)
46 #define ISUPPER(c) ('A' <= ((unsigned) (c)) && ((unsigned) (c)) <= 'Z')
47 #define ISLOWER(c) ('a' <= ((unsigned) (c)) && ((unsigned) (c)) <= 'z')
48 #define ISALPHA(c) (ISUPPER(c) || ISLOWER(c))
49 #define ISDIGIT(c) ('0' <= ((unsigned) (c)) && ((unsigned) (c)) <= '9')
50 #define ISXDIGIT(c) (strchr("abcdefABCDEF", c)!=NULL)
51 #define ISALNUM(c) (ISALPHA(c) || ISDIGIT(c))
52 #define ISPRINT(c) (' ' <= ((unsigned) (c)) && ((unsigned) (c)) <= 127)
54 #define ALLOC_INIT 128
55 #define ALLOC_INCR 128
58 _wsplt_alloc_die (struct wordsplit
*wsp
)
60 wsp
->ws_error (_("memory exhausted"));
65 _wsplt_error (const char *fmt
, ...)
70 vfprintf (stderr
, fmt
, ap
);
75 static void wordsplit_free_nodes (struct wordsplit
*);
78 _wsplt_nomem (struct wordsplit
*wsp
)
81 wsp
->ws_errno
= WRDSE_NOSPACE
;
82 if (wsp
->ws_flags
& WRDSF_ENOMEMABRT
)
83 wsp
->ws_alloc_die (wsp
);
84 if (wsp
->ws_flags
& WRDSF_SHOWERR
)
85 wordsplit_perror (wsp
);
86 if (!(wsp
->ws_flags
& WRDSF_REUSE
))
88 wordsplit_free_nodes (wsp
);
93 wordsplit_init0 (struct wordsplit
*wsp
)
95 if (wsp
->ws_flags
& WRDSF_REUSE
)
97 if (!(wsp
->ws_flags
& WRDSF_APPEND
))
98 wordsplit_free_words (wsp
);
102 wsp
->ws_wordv
= NULL
;
108 wsp
->ws_head
= wsp
->ws_tail
= NULL
;
112 wordsplit_init (struct wordsplit
*wsp
, const char *input
, size_t len
,
115 wsp
->ws_flags
= flags
;
117 if (!(wsp
->ws_flags
& WRDSF_ALLOC_DIE
))
118 wsp
->ws_alloc_die
= _wsplt_alloc_die
;
119 if (!(wsp
->ws_flags
& WRDSF_ERROR
))
120 wsp
->ws_error
= _wsplt_error
;
122 if (!(wsp
->ws_flags
& WRDSF_NOVAR
)
123 && !(wsp
->ws_flags
& (WRDSF_ENV
| WRDSF_GETVAR
)))
126 wsp
->ws_errno
= WRDSE_USAGE
;
127 if (wsp
->ws_flags
& WRDSF_SHOWERR
)
128 wordsplit_perror (wsp
);
129 return wsp
->ws_errno
;
132 if (!(wsp
->ws_flags
& WRDSF_NOCMD
))
135 wsp
->ws_errno
= WRDSE_NOSUPP
;
136 if (wsp
->ws_flags
& WRDSF_SHOWERR
)
137 wordsplit_perror (wsp
);
138 return wsp
->ws_errno
;
141 if (wsp
->ws_flags
& WRDSF_SHOWDBG
)
143 if (!(wsp
->ws_flags
& WRDSF_DEBUG
))
145 if (wsp
->ws_flags
& WRDSF_ERROR
)
146 wsp
->ws_debug
= wsp
->ws_error
;
147 else if (wsp
->ws_flags
& WRDSF_SHOWERR
)
148 wsp
->ws_debug
= _wsplt_error
;
150 wsp
->ws_flags
&= ~WRDSF_SHOWDBG
;
154 wsp
->ws_input
= input
;
157 if (!(wsp
->ws_flags
& WRDSF_DOOFFS
))
160 if (!(wsp
->ws_flags
& WRDSF_DELIM
))
161 wsp
->ws_delim
= " \t\n";
163 if (!(wsp
->ws_flags
& WRDSF_COMMENT
))
164 wsp
->ws_comment
= NULL
;
166 if (!(wsp
->ws_flags
& WRDSF_CLOSURE
))
167 wsp
->ws_closure
= NULL
;
171 wordsplit_init0 (wsp
);
177 alloc_space (struct wordsplit
*wsp
, size_t count
)
179 size_t offs
= (wsp
->ws_flags
& WRDSF_DOOFFS
) ? wsp
->ws_offs
: 0;
183 if (wsp
->ws_wordv
== NULL
)
185 newalloc
= offs
+ count
> ALLOC_INIT
? count
: ALLOC_INIT
;
186 ptr
= calloc (newalloc
, sizeof (ptr
[0]));
188 else if (wsp
->ws_wordn
< offs
+ wsp
->ws_wordc
+ count
)
190 newalloc
= offs
+ wsp
->ws_wordc
+
191 (count
> ALLOC_INCR
? count
: ALLOC_INCR
);
192 ptr
= realloc (wsp
->ws_wordv
, newalloc
* sizeof (ptr
[0]));
199 wsp
->ws_wordn
= newalloc
;
203 return _wsplt_nomem (wsp
);
208 /* Node state flags */
209 #define _WSNF_NULL 0x01 /* null node (a noop) */
210 #define _WSNF_WORD 0x02 /* node contains word in v.word */
211 #define _WSNF_QUOTE 0x04 /* text is quoted */
212 #define _WSNF_NOEXPAND 0x08 /* text is not subject to expansion */
213 #define _WSNF_JOIN 0x10 /* node must be joined with the next node */
214 #define _WSNF_SEXP 0x20 /* is a sed expression */
216 #define _WSNF_EMPTYOK 0x0100 /* special flag indicating that
217 wordsplit_add_segm must add the
218 segment even if it is empty */
220 struct wordsplit_node
222 struct wordsplit_node
*prev
; /* Previous element */
223 struct wordsplit_node
*next
; /* Next element */
224 int flags
; /* Node flags */
229 size_t beg
; /* Start of word in ws_input */
230 size_t end
; /* End of word in ws_input */
237 wsnode_flagstr (int flags
)
239 static char retbuf
[6];
242 if (flags
& _WSNF_WORD
)
244 else if (flags
& _WSNF_NULL
)
248 if (flags
& _WSNF_QUOTE
)
252 if (flags
& _WSNF_NOEXPAND
)
256 if (flags
& _WSNF_JOIN
)
260 if (flags
& _WSNF_SEXP
)
269 wsnode_ptr (struct wordsplit
*wsp
, struct wordsplit_node
*p
)
271 if (p
->flags
& _WSNF_NULL
)
273 else if (p
->flags
& _WSNF_WORD
)
276 return wsp
->ws_input
+ p
->v
.segm
.beg
;
280 wsnode_len (struct wordsplit_node
*p
)
282 if (p
->flags
& _WSNF_NULL
)
284 else if (p
->flags
& _WSNF_WORD
)
285 return strlen (p
->v
.word
);
287 return p
->v
.segm
.end
- p
->v
.segm
.beg
;
291 wsnode_new (struct wordsplit
*wsp
, struct wordsplit_node
**pnode
)
293 struct wordsplit_node
*node
= calloc (1, sizeof (*node
));
295 return _wsplt_nomem (wsp
);
301 wsnode_free (struct wordsplit_node
*p
)
303 if (p
->flags
& _WSNF_WORD
)
309 wsnode_append (struct wordsplit
*wsp
, struct wordsplit_node
*node
)
312 node
->prev
= wsp
->ws_tail
;
314 wsp
->ws_tail
->next
= node
;
321 wsnode_remove (struct wordsplit
*wsp
, struct wordsplit_node
*node
)
323 struct wordsplit_node
*p
;
328 p
->next
= node
->next
;
330 p
->flags
&= ~_WSNF_JOIN
;
333 wsp
->ws_head
= node
->next
;
337 p
->prev
= node
->prev
;
339 wsp
->ws_tail
= node
->prev
;
341 node
->next
= node
->prev
= NULL
;
345 wsnode_insert (struct wordsplit
*wsp
, struct wordsplit_node
*node
,
346 struct wordsplit_node
*anchor
, int before
)
350 node
->next
= node
->prev
= NULL
;
351 wsp
->ws_head
= wsp
->ws_tail
= node
;
356 wsnode_insert (wsp
, node
, anchor
->prev
, 0);
367 struct wordsplit_node
*p
;
381 wordsplit_add_segm (struct wordsplit
*wsp
, size_t beg
, size_t end
, int flg
)
383 struct wordsplit_node
*node
;
386 if (end
== beg
&& !(flg
& _WSNF_EMPTYOK
))
388 rc
= wsnode_new (wsp
, &node
);
391 node
->flags
= flg
& ~(_WSNF_WORD
| _WSNF_EMPTYOK
);
392 node
->v
.segm
.beg
= beg
;
393 node
->v
.segm
.end
= end
;
394 wsnode_append (wsp
, node
);
399 wordsplit_free_nodes (struct wordsplit
*wsp
)
401 struct wordsplit_node
*p
;
403 for (p
= wsp
->ws_head
; p
;)
405 struct wordsplit_node
*next
= p
->next
;
409 wsp
->ws_head
= wsp
->ws_tail
= NULL
;
413 wordsplit_dump_nodes (struct wordsplit
*wsp
)
415 struct wordsplit_node
*p
;
418 for (p
= wsp
->ws_head
, n
= 0; p
; p
= p
->next
, n
++)
420 if (p
->flags
& _WSNF_WORD
)
421 wsp
->ws_debug ("%4d: %p: %#04x (%s):%s;",
422 n
, p
, p
->flags
, wsnode_flagstr (p
->flags
), p
->v
.word
);
424 wsp
->ws_debug ("%4d: %p: %#04x (%s):%.*s;",
425 n
, p
, p
->flags
, wsnode_flagstr (p
->flags
),
426 (int) (p
->v
.segm
.end
- p
->v
.segm
.beg
),
427 wsp
->ws_input
+ p
->v
.segm
.beg
);
432 coalesce_segment (struct wordsplit
*wsp
, struct wordsplit_node
*node
)
434 struct wordsplit_node
*p
, *end
;
439 for (p
= node
; p
&& (p
->flags
& _WSNF_JOIN
); p
= p
->next
)
441 len
+= wsnode_len (p
);
443 len
+= wsnode_len (p
);
446 buf
= malloc (len
+ 1);
448 return _wsplt_nomem (wsp
);
452 for (stop
= 0; !stop
;)
454 struct wordsplit_node
*next
= p
->next
;
455 const char *str
= wsnode_ptr (wsp
, p
);
456 size_t slen
= wsnode_len (p
);
458 memcpy (cur
, str
, slen
);
462 wsnode_remove (wsp
, p
);
471 node
->flags
&= ~_WSNF_JOIN
;
473 if (node
->flags
& _WSNF_WORD
)
476 node
->flags
|= _WSNF_WORD
;
482 wsnode_quoteremoval (struct wordsplit
*wsp
)
484 struct wordsplit_node
*p
;
485 void (*uqfn
) (char *, const char *, size_t) =
486 (wsp
->ws_flags
& WRDSF_CESCAPES
) ?
487 wordsplit_c_unquote_copy
: wordsplit_sh_unquote_copy
;
489 for (p
= wsp
->ws_head
; p
; p
= p
->next
)
491 const char *str
= wsnode_ptr (wsp
, p
);
492 size_t slen
= wsnode_len (p
);
495 if (wsp
->ws_flags
& WRDSF_QUOTE
)
497 unquote
= !(p
->flags
& _WSNF_NOEXPAND
);
504 if (!(p
->flags
& _WSNF_WORD
))
506 char *newstr
= malloc (slen
+ 1);
508 return _wsplt_nomem (wsp
);
509 memcpy (newstr
, str
, slen
);
512 p
->flags
|= _WSNF_WORD
;
515 if (wsp
->ws_flags
& WRDSF_ESCAPE
)
516 wordsplit_general_unquote_copy (p
->v
.word
, str
, slen
,
519 uqfn (p
->v
.word
, str
, slen
);
526 wsnode_coalesce (struct wordsplit
*wsp
)
528 struct wordsplit_node
*p
;
530 for (p
= wsp
->ws_head
; p
; p
= p
->next
)
532 if (p
->flags
& _WSNF_JOIN
)
533 if (coalesce_segment (wsp
, p
))
540 wordsplit_finish (struct wordsplit
*wsp
)
542 struct wordsplit_node
*p
;
547 for (p
= wsp
->ws_head
; p
; p
= p
->next
)
550 if (alloc_space (wsp
, n
+ 1))
553 for (p
= wsp
->ws_head
; p
; p
= p
->next
)
555 const char *str
= wsnode_ptr (wsp
, p
);
556 size_t slen
= wsnode_len (p
);
557 char *newstr
= malloc (slen
+ 1);
559 /* Assign newstr first, even if it is NULL. This way
560 wordsplit_free will work even if we return
562 wsp
->ws_wordv
[wsp
->ws_offs
+ wsp
->ws_wordc
] = newstr
;
564 return _wsplt_nomem (wsp
);
565 memcpy (newstr
, str
, slen
);
571 wsp
->ws_wordv
[wsp
->ws_offs
+ wsp
->ws_wordc
] = NULL
;
576 /* Variable expansion */
578 node_split_prefix (struct wordsplit
*wsp
,
579 struct wordsplit_node
**ptail
,
580 struct wordsplit_node
*node
,
581 size_t beg
, size_t len
, int flg
)
583 struct wordsplit_node
*newnode
;
587 if (wsnode_new (wsp
, &newnode
))
589 wsnode_insert (wsp
, newnode
, *ptail
, 0);
590 if (node
->flags
& _WSNF_WORD
)
592 const char *str
= wsnode_ptr (wsp
, node
);
593 char *newstr
= malloc (len
+ 1);
595 return _wsplt_nomem (wsp
);
596 memcpy (newstr
, str
+ beg
, len
);
598 newnode
->flags
= _WSNF_WORD
;
599 newnode
->v
.word
= newstr
;
603 newnode
->v
.segm
.beg
= node
->v
.segm
.beg
+ beg
;
604 newnode
->v
.segm
.end
= newnode
->v
.segm
.beg
+ len
;
606 newnode
->flags
|= flg
;
612 find_closing_cbrace (const char *str
, size_t i
, size_t len
, size_t * poff
)
615 { st_init
, st_squote
, st_dquote
} state
= st_init
;
655 else if (str
[i
] == '"')
664 wordsplit_find_env (struct wordsplit
*wsp
, const char *name
, size_t len
)
668 if (!(wsp
->ws_flags
& WRDSF_ENV
))
671 if (wsp
->ws_flags
& WRDSF_ENV_KV
)
673 /* A key-value pair environment */
674 for (i
= 0; wsp
->ws_env
[i
]; i
++)
676 size_t elen
= strlen (wsp
->ws_env
[i
]);
677 if (elen
== len
&& memcmp (wsp
->ws_env
[i
], name
, elen
) == 0)
678 return wsp
->ws_env
[i
+ 1];
679 /* Skip the value. Break the loop if it is NULL. */
681 if (wsp
->ws_env
[i
] == NULL
)
687 /* Usual (A=B) environment. */
688 for (i
= 0; wsp
->ws_env
[i
]; i
++)
691 const char *var
= wsp
->ws_env
[i
];
693 for (j
= 0; j
< len
; j
++)
694 if (name
[j
] != var
[j
])
696 if (j
== len
&& var
[j
] == '=')
704 expvar (struct wordsplit
*wsp
, const char *str
, size_t len
,
705 struct wordsplit_node
**ptail
, const char **pend
, int flg
)
708 const char *defstr
= NULL
;
711 struct wordsplit_node
*newnode
;
712 const char *start
= str
- 1;
714 if (ISALPHA (str
[0]) || str
[0] == '_')
716 for (i
= 1; i
< len
; i
++)
717 if (!(ISALNUM (str
[i
]) || str
[i
] == '_'))
721 else if (str
[0] == '{')
725 for (i
= 1; i
< len
; i
++)
726 if (str
[i
] == '}' || str
[i
] == ':')
732 defstr
= str
+ i
+ 1;
733 if (find_closing_cbrace (str
, i
+ 1, len
, &j
))
735 wsp
->ws_errno
= WRDSE_CBRACE
;
740 else if (str
[i
] == '}')
747 wsp
->ws_errno
= WRDSE_CBRACE
;
753 if (wsnode_new (wsp
, &newnode
))
755 wsnode_insert (wsp
, newnode
, *ptail
, 0);
757 newnode
->flags
= _WSNF_WORD
| flg
;
758 newnode
->v
.word
= malloc (3);
759 if (!newnode
->v
.word
)
760 return _wsplt_nomem (wsp
);
761 newnode
->v
.word
[0] = '$';
762 newnode
->v
.word
[1] = str
[0];
763 newnode
->v
.word
[2] = 0;
768 /* Actually expand the variable */
769 /* str - start of the variable name
771 defstr - default replacement str */
773 vptr
= wordsplit_find_env (wsp
, str
, i
);
776 value
= strdup (vptr
);
778 return _wsplt_nomem (wsp
);
780 else if (wsp
->ws_flags
& WRDSF_GETVAR
)
781 value
= wsp
->ws_getvar (str
, i
, wsp
->ws_closure
);
782 else if (wsp
->ws_flags
& WRDSF_UNDEF
)
784 wsp
->ws_errno
= WRDSE_UNDEF
;
785 if (wsp
->ws_flags
& WRDSF_SHOWERR
)
786 wordsplit_perror (wsp
);
791 if (wsp
->ws_flags
& WRDSF_WARNUNDEF
)
792 wsp
->ws_error (_("warning: undefined variable `%.*s'"), (int) i
, str
);
793 if (wsp
->ws_flags
& WRDSF_KEEPUNDEF
)
798 /* FIXME: handle defstr */
801 if (flg
& _WSNF_QUOTE
)
803 if (wsnode_new (wsp
, &newnode
))
805 wsnode_insert (wsp
, newnode
, *ptail
, 0);
807 newnode
->flags
= _WSNF_WORD
| _WSNF_NOEXPAND
| flg
;
808 newnode
->v
.word
= strdup (value
);
809 if (!newnode
->v
.word
)
810 return _wsplt_nomem (wsp
);
812 else if (*value
== 0)
814 /* Empty string is a special case */
815 if (wsnode_new (wsp
, &newnode
))
817 wsnode_insert (wsp
, newnode
, *ptail
, 0);
819 newnode
->flags
= _WSNF_NULL
;
826 ws
.ws_delim
= wsp
->ws_delim
;
827 if (wordsplit (value
, &ws
,
828 WRDSF_NOVAR
| WRDSF_NOCMD
| WRDSF_DELIM
| WRDSF_WS
))
830 wordsplit_free (&ws
);
833 for (i
= 0; i
< ws
.ws_wordc
; i
++)
835 if (wsnode_new (wsp
, &newnode
))
837 wsnode_insert (wsp
, newnode
, *ptail
, 0);
839 newnode
->flags
= _WSNF_WORD
|
841 (i
+ 1 < ws
.ws_wordc
? (flg
& ~_WSNF_JOIN
) : flg
);
842 newnode
->v
.word
= strdup (ws
.ws_wordv
[i
]);
843 if (!newnode
->v
.word
)
844 return _wsplt_nomem (wsp
);
846 wordsplit_free (&ws
);
849 else if (wsp
->ws_flags
& WRDSF_KEEPUNDEF
)
851 size_t size
= *pend
- start
+ 1;
853 if (wsnode_new (wsp
, &newnode
))
855 wsnode_insert (wsp
, newnode
, *ptail
, 0);
857 newnode
->flags
= _WSNF_WORD
| _WSNF_NOEXPAND
| flg
;
858 newnode
->v
.word
= malloc (size
+ 1);
859 if (!newnode
->v
.word
)
860 return _wsplt_nomem (wsp
);
861 memcpy (newnode
->v
.word
, start
, size
);
862 newnode
->v
.word
[size
] = 0;
866 if (wsnode_new (wsp
, &newnode
))
868 wsnode_insert (wsp
, newnode
, *ptail
, 0);
870 newnode
->flags
= _WSNF_NULL
;
876 node_expand_vars (struct wordsplit
*wsp
, struct wordsplit_node
*node
)
878 const char *str
= wsnode_ptr (wsp
, node
);
879 size_t slen
= wsnode_len (node
);
880 const char *end
= str
+ slen
;
883 struct wordsplit_node
*tail
= node
;
885 for (p
= str
; p
< end
; p
++)
897 tail
->flags
|= _WSNF_JOIN
;
898 if (node_split_prefix (wsp
, &tail
, node
, off
, n
, _WSNF_JOIN
))
901 if (expvar (wsp
, p
, slen
- n
, &tail
, &p
,
902 node
->flags
& (_WSNF_JOIN
| _WSNF_QUOTE
)))
911 tail
->flags
|= _WSNF_JOIN
;
912 if (node_split_prefix (wsp
, &tail
, node
, off
, p
- str
,
913 node
->flags
& _WSNF_JOIN
))
918 wsnode_remove (wsp
, node
);
924 /* Remove NULL lists */
926 wsnode_nullelim (struct wordsplit
*wsp
)
928 struct wordsplit_node
*p
;
930 for (p
= wsp
->ws_head
; p
;)
932 struct wordsplit_node
*next
= p
->next
;
933 if (p
->flags
& _WSNF_NULL
)
935 wsnode_remove (wsp
, p
);
943 wordsplit_varexp (struct wordsplit
*wsp
)
945 struct wordsplit_node
*p
;
947 for (p
= wsp
->ws_head
; p
;)
949 struct wordsplit_node
*next
= p
->next
;
950 if (!(p
->flags
& _WSNF_NOEXPAND
))
951 if (node_expand_vars (wsp
, p
))
956 wsnode_nullelim (wsp
);
960 /* Strip off any leading and trailing whitespace. This function is called
961 right after the initial scanning, therefore it assumes that every
962 node in the list is a text reference node. */
964 wordsplit_trimws (struct wordsplit
*wsp
)
966 struct wordsplit_node
*p
;
968 for (p
= wsp
->ws_head
; p
; p
= p
->next
)
972 if (p
->flags
& _WSNF_QUOTE
)
975 /* Skip leading whitespace: */
976 for (n
= p
->v
.segm
.beg
; n
< p
->v
.segm
.end
&& ISWS (wsp
->ws_input
[n
]);
980 /* Trim trailing whitespace */
981 for (n
= p
->v
.segm
.end
;
982 n
> p
->v
.segm
.beg
&& ISWS (wsp
->ws_input
[n
- 1]); n
--);
984 if (p
->v
.segm
.beg
== p
->v
.segm
.end
)
985 p
->flags
|= _WSNF_NULL
;
988 wsnode_nullelim (wsp
);
992 skip_sed_expr (const char *command
, size_t i
, size_t len
)
1000 if (command
[i
] == ';')
1002 if (!(command
[i
] == 's' && i
+ 3 < len
&& ISPUNCT (command
[i
+ 1])))
1005 delim
= command
[++i
];
1007 for (i
++; i
< len
; i
++)
1011 if (command
[i
] == delim
|| !ISALNUM (command
[i
]))
1014 else if (command
[i
] == '\\')
1016 else if (command
[i
] == delim
)
1020 while (state
== 3 && i
< len
&& command
[i
] == ';');
1025 skip_delim (struct wordsplit
*wsp
)
1027 size_t start
= wsp
->ws_endp
;
1028 if (wsp
->ws_flags
& WRDSF_SQUEEZE_DELIMS
)
1030 if ((wsp
->ws_flags
& WRDSF_RETURN_DELIMS
) &&
1031 ISDELIM (wsp
, wsp
->ws_input
[start
]))
1033 int delim
= wsp
->ws_input
[start
];
1036 while (start
< wsp
->ws_len
&& delim
== wsp
->ws_input
[start
]);
1042 while (start
< wsp
->ws_len
&& ISDELIM (wsp
, wsp
->ws_input
[start
]));
1047 if (!(wsp
->ws_flags
& WRDSF_RETURN_DELIMS
))
1058 scan_qstring (struct wordsplit
*wsp
, size_t start
, size_t * end
)
1061 const char *command
= wsp
->ws_input
;
1062 size_t len
= wsp
->ws_len
;
1063 char q
= command
[start
];
1065 for (j
= start
+ 1; j
< len
&& command
[j
] != q
; j
++)
1066 if (q
== '"' && command
[j
] == '\\')
1068 if (j
< len
&& command
[j
] == q
)
1070 int flags
= _WSNF_QUOTE
| _WSNF_EMPTYOK
;
1072 flags
|= _WSNF_NOEXPAND
;
1073 if (wordsplit_add_segm (wsp
, start
+ 1, j
, flags
))
1079 wsp
->ws_endp
= start
;
1080 wsp
->ws_errno
= WRDSE_QUOTE
;
1081 if (wsp
->ws_flags
& WRDSF_SHOWERR
)
1082 wordsplit_perror (wsp
);
1089 scan_word (struct wordsplit
*wsp
, size_t start
)
1091 size_t len
= wsp
->ws_len
;
1092 const char *command
= wsp
->ws_input
;
1093 const char *comment
= wsp
->ws_comment
;
1101 wsp
->ws_errno
= WRDSE_EOF
;
1107 if (wsp
->ws_flags
& WRDSF_SED_EXPR
1108 && command
[i
] == 's' && i
+ 3 < len
&& ISPUNCT (command
[i
+ 1]))
1111 i
= skip_sed_expr (command
, i
, len
);
1113 else if (!ISDELIM (wsp
, command
[i
]))
1117 if (comment
&& strchr (comment
, command
[i
]) != NULL
)
1120 for (j
= i
+ 1; j
< len
&& command
[j
] != '\n'; j
++)
1122 if (wordsplit_add_segm (wsp
, start
, i
, 0))
1128 if (wsp
->ws_flags
& WRDSF_QUOTE
)
1130 if (command
[i
] == '\\')
1138 if (((wsp
->ws_flags
& WRDSF_SQUOTE
) && command
[i
] == '\'') ||
1139 ((wsp
->ws_flags
& WRDSF_DQUOTE
) && command
[i
] == '"'))
1141 if (join
&& wsp
->ws_tail
)
1142 wsp
->ws_tail
->flags
|= _WSNF_JOIN
;
1143 if (wordsplit_add_segm (wsp
, start
, i
, _WSNF_JOIN
))
1145 if (scan_qstring (wsp
, i
, &i
))
1152 if (ISDELIM (wsp
, command
[i
]))
1158 else if (wsp
->ws_flags
& WRDSF_RETURN_DELIMS
)
1162 else if (!(wsp
->ws_flags
& WRDSF_SQUEEZE_DELIMS
))
1163 flags
|= _WSNF_EMPTYOK
;
1165 if (join
&& i
> start
&& wsp
->ws_tail
)
1166 wsp
->ws_tail
->flags
|= _WSNF_JOIN
;
1167 if (wordsplit_add_segm (wsp
, start
, i
, flags
))
1170 if (wsp
->ws_flags
& WRDSF_INCREMENTAL
)
1175 static char quote_transtab
[] = "\\\\\"\"a\ab\bf\fn\nr\rt\tv\v";
1178 wordsplit_c_unquote_char (int c
)
1182 for (p
= quote_transtab
; *p
; p
+= 2)
1191 wordsplit_c_quote_char (int c
)
1195 for (p
= quote_transtab
+ sizeof (quote_transtab
) - 2;
1196 p
> quote_transtab
; p
-= 2)
1205 (ISDIGIT(c) ? c - '0' : (ISXDIGIT(c) ? toupper(c) - 'A' + 10 : 255 ))
1208 xtonum (int *pval
, const char *src
, int base
, int cnt
)
1212 for (i
= 0, val
= 0; i
< cnt
; i
++, src
++)
1214 int n
= *(unsigned char *) src
;
1215 if (n
> 127 || (n
= to_num (n
)) >= base
)
1217 val
= val
* base
+ n
;
1224 wordsplit_c_quoted_length (const char *str
, int quote_hex
, int *quote
)
1231 if (strchr (" \"", *str
))
1236 else if (*str
== '"')
1238 else if (*str
!= '\t' && *str
!= '\\' && ISPRINT (*str
))
1244 if (wordsplit_c_quote_char (*str
) != -1)
1254 wordsplit_general_unquote_copy (char *dst
, const char *src
, size_t n
,
1255 const char *escapable
)
1261 if (src
[i
] == '\\' && i
< n
&& strchr (escapable
, src
[i
+ 1]))
1269 wordsplit_sh_unquote_copy (char *dst
, const char *src
, size_t n
)
1283 wordsplit_c_unquote_copy (char *dst
, const char *src
, size_t n
)
1293 if (src
[i
] == 'x' || src
[i
] == 'X')
1302 int off
= xtonum (&c
, src
+ i
+ 1,
1316 else if ((unsigned char) src
[i
] < 128 && ISDIGIT (src
[i
]))
1325 int off
= xtonum (&c
, src
+ i
, 8, 3);
1339 *dst
++ = wordsplit_c_unquote_char (src
[i
++]);
1348 wordsplit_c_quote_copy (char *dst
, const char *src
, int quote_hex
)
1357 else if (*src
!= '\t' && *src
!= '\\' && ISPRINT (*src
))
1365 snprintf (tmp
, sizeof tmp
, "%%%02X", *(unsigned char *) src
);
1366 memcpy (dst
, tmp
, 3);
1371 int c
= wordsplit_c_quote_char (*src
);
1377 snprintf (tmp
, sizeof tmp
, "%03o", *(unsigned char *) src
);
1378 memcpy (dst
, tmp
, 3);
1387 wordsplit_process_list (struct wordsplit
*wsp
, size_t start
)
1389 if (wsp
->ws_flags
& WRDSF_NOSPLIT
)
1391 /* Treat entire input as a quoted argument */
1392 if (wordsplit_add_segm (wsp
, start
, wsp
->ws_len
, _WSNF_QUOTE
))
1393 return wsp
->ws_errno
;
1399 while ((rc
= scan_word (wsp
, start
)) == _WRDS_OK
)
1400 start
= skip_delim (wsp
);
1401 /* Make sure tail element is not joinable */
1403 wsp
->ws_tail
->flags
&= ~_WSNF_JOIN
;
1404 if (rc
== _WRDS_ERR
)
1405 return wsp
->ws_errno
;
1408 if (wsp
->ws_flags
& WRDSF_SHOWDBG
)
1410 wsp
->ws_debug ("Initial list:");
1411 wordsplit_dump_nodes (wsp
);
1414 if (wsp
->ws_flags
& WRDSF_WS
)
1416 /* Trim leading and trailing whitespace */
1417 wordsplit_trimws (wsp
);
1418 if (wsp
->ws_flags
& WRDSF_SHOWDBG
)
1420 wsp
->ws_debug ("After WS trimming:");
1421 wordsplit_dump_nodes (wsp
);
1425 /* Expand variables (FIXME: & commands) */
1426 if (!(wsp
->ws_flags
& WRDSF_NOVAR
))
1428 if (wordsplit_varexp (wsp
))
1430 wordsplit_free_nodes (wsp
);
1431 return wsp
->ws_errno
;
1433 if (wsp
->ws_flags
& WRDSF_SHOWDBG
)
1435 wsp
->ws_debug ("Expanded list:");
1436 wordsplit_dump_nodes (wsp
);
1442 if (wsnode_quoteremoval (wsp
))
1444 if (wsp
->ws_flags
& WRDSF_SHOWDBG
)
1446 wsp
->ws_debug ("After quote removal:");
1447 wordsplit_dump_nodes (wsp
);
1450 if (wsnode_coalesce (wsp
))
1453 if (wsp
->ws_flags
& WRDSF_SHOWDBG
)
1455 wsp
->ws_debug ("Coalesced list:");
1456 wordsplit_dump_nodes (wsp
);
1460 return wsp
->ws_errno
;
1464 wordsplit_len (const char *command
, size_t length
, struct wordsplit
*wsp
,
1474 if (!(flags
& WRDSF_INCREMENTAL
))
1477 start
= skip_delim (wsp
);
1478 if (wsp
->ws_endp
== wsp
->ws_len
)
1480 wsp
->ws_errno
= WRDSE_NOINPUT
;
1481 if (wsp
->ws_flags
& WRDSF_SHOWERR
)
1482 wordsplit_perror (wsp
);
1483 return wsp
->ws_errno
;
1486 cmdptr
= wsp
->ws_input
+ wsp
->ws_endp
;
1487 cmdlen
= wsp
->ws_len
- wsp
->ws_endp
;
1488 wsp
->ws_flags
|= WRDSF_REUSE
;
1489 wordsplit_init0 (wsp
);
1496 rc
= wordsplit_init (wsp
, cmdptr
, cmdlen
, flags
);
1501 if (wsp
->ws_flags
& WRDSF_SHOWDBG
)
1502 wsp
->ws_debug ("Input:%.*s;", (int) cmdlen
, cmdptr
);
1504 rc
= wordsplit_process_list (wsp
, start
);
1505 if (rc
== 0 && (flags
& WRDSF_INCREMENTAL
))
1507 while (!wsp
->ws_head
&& wsp
->ws_endp
< wsp
->ws_len
)
1509 start
= skip_delim (wsp
);
1510 if (wsp
->ws_flags
& WRDSF_SHOWDBG
)
1512 cmdptr
= wsp
->ws_input
+ wsp
->ws_endp
;
1513 cmdlen
= wsp
->ws_len
- wsp
->ws_endp
;
1514 wsp
->ws_debug ("Restart:%.*s;", (int) cmdlen
, cmdptr
);
1516 rc
= wordsplit_process_list (wsp
, start
);
1523 wordsplit_free_nodes (wsp
);
1526 wordsplit_finish (wsp
);
1527 wordsplit_free_nodes (wsp
);
1528 return wsp
->ws_errno
;
1532 wordsplit (const char *command
, struct wordsplit
*ws
, int flags
)
1534 return wordsplit_len (command
, command
? strlen (command
) : 0, ws
,
1539 wordsplit_free_words (struct wordsplit
*ws
)
1543 for (i
= 0; i
< ws
->ws_wordc
; i
++)
1545 char *p
= ws
->ws_wordv
[ws
->ws_offs
+ i
];
1549 ws
->ws_wordv
[ws
->ws_offs
+ i
] = NULL
;
1556 wordsplit_free (struct wordsplit
*ws
)
1558 wordsplit_free_words (ws
);
1559 free (ws
->ws_wordv
);
1560 ws
->ws_wordv
= NULL
;
1564 wordsplit_perror (struct wordsplit
*wsp
)
1566 switch (wsp
->ws_errno
)
1569 wsp
->ws_error (_("no error"));
1573 wsp
->ws_error (_("missing closing %c (start near #%lu)"),
1574 wsp
->ws_input
[wsp
->ws_endp
],
1575 (unsigned long) wsp
->ws_endp
);
1579 wsp
->ws_error (_("memory exhausted"));
1583 wsp
->ws_error (_("command substitution is not yet supported"));
1586 wsp
->ws_error (_("invalid wordsplit usage"));
1590 wsp
->ws_error (_("unbalanced curly brace"));
1594 wsp
->ws_error (_("undefined variable"));
1598 wsp
->ws_error (_("input exhausted"));
1602 wsp
->ws_error (_("unknown error"));
1606 const char *_wordsplit_errstr
[] = {
1608 N_("missing closing quote"),
1609 N_("memory exhausted"),
1610 N_("command substitution is not yet supported"),
1611 N_("invalid wordsplit usage"),
1612 N_("unbalanced curly brace"),
1613 N_("undefined variable"),
1614 N_("input exhausted")
1616 int _wordsplit_nerrs
=
1617 sizeof (_wordsplit_errstr
) / sizeof (_wordsplit_errstr
[0]);
1620 wordsplit_strerror (struct wordsplit
*ws
)
1622 if (ws
->ws_errno
< _wordsplit_nerrs
)
1623 return _wordsplit_errstr
[ws
->ws_errno
];
1624 return N_("unknown error");