diff options
author | Laurent Bercot <ska-skaware@skarnet.org> | 2022-05-26 12:46:37 +0000 |
---|---|---|
committer | Laurent Bercot <ska@appnovation.com> | 2022-05-26 12:46:37 +0000 |
commit | 7b631e14a70f6c2e8dcabd2713422bc585af2703 (patch) | |
tree | 3b5d8592acc340a5d79bb6dbeaed4b719662d689 | |
parent | db5bf6b28e46da0305d13383201e3dfbf2e80178 (diff) | |
download | execline-7b631e14a70f6c2e8dcabd2713422bc585af2703.tar.gz execline-7b631e14a70f6c2e8dcabd2713422bc585af2703.tar.xz execline-7b631e14a70f6c2e8dcabd2713422bc585af2703.zip |
Rewrite el_parse.c, document the transition table
Signed-off-by: Laurent Bercot <ska@appnovation.com>
-rw-r--r-- | doc/execlineb.html | 8 | ||||
-rw-r--r-- | src/libexecline/PARSING.txt | 99 | ||||
-rw-r--r-- | src/libexecline/el_parse.c | 95 |
3 files changed, 157 insertions, 45 deletions
diff --git a/doc/execlineb.html b/doc/execlineb.html index fe4f0d3..f9d3e4d 100644 --- a/doc/execlineb.html +++ b/doc/execlineb.html @@ -133,10 +133,11 @@ newlines disappear completely. </li> <li> <tt>\0x<em>ab</em></tt> sequences are recognized in quoted strings and evaluate to ASCII hexadecimal number <em>ab</em>. </li> <li> <tt>\0<em>abc</em></tt> sequences are recognized in quoted strings -and evaluate to ASCII octal number <em>abc</em>. </li> +and evaluate to ASCII octal number <em>abc</em>. <em>abc</em> must not +be greater than <em>377</em>, or evaluate to 0. </li> <li> <tt>\<em>abc</em></tt> sequences are recognized in quoted strings and evaluate to ASCII decimal number <em>abc</em>. <em>a</em> must not -be zero. </li> +be zero. <em>abc</em> must not be greater than 255, or evaluate to 0. </li> <li> A comment starts with a <tt>#</tt> and ends with the line. Comments are not recognized inside quoted strings. </li> <li> Anything else is an unquoted string, that can evaluate to @@ -144,6 +145,9 @@ zero or more words. </li> <li> Any character can be escaped in unquoted strings by prepending it with a backslash. It works the same way in quoted strings, except for the special sequences described above. </li> + <li> As a special case, an unquoted backslash at the end of a line, or at +the end of the input, is ignored. This is to make it easier to copy +execline fragments from a shell script. </li> </ul> <p> diff --git a/src/libexecline/PARSING.txt b/src/libexecline/PARSING.txt new file mode 100644 index 0000000..b84c0ff --- /dev/null +++ b/src/libexecline/PARSING.txt @@ -0,0 +1,99 @@ +el_parse.c: + +class | 0 1 2 3 4 5 6 7 8 9 a b c d e f +st\ev | \0 space # " newline \ normal abf 1-7 8-9 0 nrtv x A-Fcde { } + +START | n n p n p n p n p n p n p n p n p n p n p +00 | END START COMMENT Q START Q1 W W W W W W W W OPENB CLOSEB + +COMMENT | +01 | END COMMENT COMMENT COMMENT START COMMENT COMMENT COMMENT COMMENT COMMENT COMMENT COMMENT COMMENT COMMENT COMMENT COMMENT + +OPENB | { p { p p p p p p p p p p +02 | X START W Q START Q1 W W W W W W W W W W + +CLOSEB | } 0 } 0 p } 0 p p p p p p p p p p +03 | END START W Q START Q1 W W W W W W W W W W + +W | 0 0 p 0 p p p p p p p p p p +04 | END START W Q START Q2 W W W W W W W W W W + +Q1 | n p n p n p n p n p n p n p n p n p n p n p n p n p n p +05 | END W W W START W W W W W W W W W W W + +Q2 | p p p p p p p p p p p p p p p +06 | X W W W W W W W W W W W W W W W + +Q3 | p p p p p c m p b m p b b c p p p p +07 | X Q Q Q Q Q Q Q DEC1 DEC1 OCT Q Q Q Q Q + +Q | p p p p p p p p p p p p p +08 | X Q Q W Q Q3 Q Q Q Q Q Q Q Q Q Q + +OCT | m p m p b +09 | X X X X X X X X OCT1 X OCT1 X HEX X X X + +OCT1 | s 0 s p s p s s p s s p s p p s p p s p s p s p s p s p +0a | END Q Q W Q Q3 Q Q OCT2 Q OCT2 Q Q Q Q Q + +OCT2 | s 0 s p s p s s p s s p s p p s p p s p s p s p s p s p +0b | END Q Q W Q Q3 Q Q ENDNUM Q ENDNUM Q Q Q Q Q + +DEC1 | s 0 s p s p s s p s s p s p p p p s p s p s p s p s p +0c | END Q Q W Q Q3 Q Q DEC2 DEC2 DEC2 Q Q Q Q Q + +DEC2 | s 0 s p s p s s p s s p s p p p p s p s p s p s p s p +0d | END Q Q W Q Q3 Q Q ENDNUM ENDNUM ENDNUM Q Q Q Q Q + +HEX | m p m p m p m p m p +0e | X X X X X X X HEX1 HEX1 HEX1 HEX1 X X HEX1 X X + +HEX1 | s 0 s p s p s s p s s p p p p p s p s p p s p s p +0f | END Q Q W Q Q3 Q ENDNUM ENDNUM ENDNUM ENDNUM Q Q ENDNUM Q Q + +ENDNUM | s 0 s p s p s s p s s p s p s p s p s p s p s p s p s p s p +10 | END Q Q W Q Q3 Q Q Q Q Q Q Q Q Q Q + +END +11 + +X +12 + + +States + +START: in whitespace; initial state +COMMENT: in a comment line +OPENB: after a raw { +CLOSEB: after a raw } +W: in an unquoted word +Q1: after a backslash in whitespace +Q2: after a backslash in an unquoted word +Q3: after a backslash in a quoted string +Q: in a quoted string +OCT: after \0 in a quoted string +OCT1: after \0a in a quoted string +OCT2: after \0ab in a quoted string +DEC1: after \a in a quoted string +DEC2: after \ab in a quoted string +HEX: after \0x in a quoted string +HEX1: after \0xa in a quoted string +ENDNUM: after \0abc, \abc or \0xab in a quoted string +END: success +X: syntax error + + +Actions + +8000 s scan integer from mark to cur +4000 m set mark +2000 n add blevel spaces +1000 { inc blevel + unpush blevel +0800 } dec blevel + unpush 2 +0400 p add cur to word +0200 c add control char (depending on cur) to word +0100 0 end word +0080 b switch base according to cur +0040 unused +0020 unused diff --git a/src/libexecline/el_parse.c b/src/libexecline/el_parse.c index b45b776..6ef7ce9 100644 --- a/src/libexecline/el_parse.c +++ b/src/libexecline/el_parse.c @@ -1,88 +1,97 @@ /* ISC license. */ -#include <sys/types.h> +#include <stddef.h> #include <stdint.h> -#include <skalibs/types.h> +#include <limits.h> +#include <errno.h> + +#include <skalibs/uint64.h> #include <skalibs/bytestr.h> #include <skalibs/stralloc.h> -#include <skalibs/djbunix.h> + #include <execline/execline.h> int el_parse (stralloc *sa, el_chargen_func_ref next, void *source) { static unsigned char const class[256] = "`aaaaaaaaadaaaaaaaaaaaaaaaaaaaaaafcbffffffffffffjhhhhhhhiifffffffmmmmmmfffffffffffffffffffffeffffggmmmgfffffffkfffkfkfkflffnfoffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff" ; - static uint16_t const table[16][16] = + static uint16_t const table[17][16] = { - { 0x0011, 0x4011, 0x0010, 0x0010, 0x0010, 0x0011, 0x0010, 0x0010, 0x0010, 0x0010, 0x0010, 0x0010, 0x0010, 0x0010, 0x0010, 0x4091 }, - { 0x0000, 0x4000, 0x8001, 0x8003, 0x8003, 0x0005, 0x0010, 0x8403, 0x8403, 0x8403, 0x8403, 0x0010, 0x8403, 0x8403, 0x0100, 0x4080 }, - { 0x0005, 0x8001, 0x8001, 0x8003, 0x8003, 0x0005, 0x0010, 0x8403, 0x8403, 0x8403, 0x8403, 0x0010, 0x8403, 0x8403, 0x8001, 0x8001 }, - { 0x0203, 0x0003, 0x8001, 0x0001, 0x8003, 0x0005, 0x0010, 0x0401, 0x0401, 0x0401, 0x0401, 0x0010, 0x0401, 0x0401, 0x0003, 0x0003 }, - { 0x0000, 0x4000, 0x8001, 0x8003, 0x0003, 0x0000, 0x0010, 0x8403, 0x8403, 0x8403, 0x8403, 0x0010, 0x8403, 0x8403, 0x0100, 0x4080 }, - { 0x0202, 0x0002, 0x8001, 0x0004, 0x8003, 0x0005, 0x0010, 0x0404, 0x0404, 0x0404, 0x0404, 0x0010, 0x0404, 0x0404, 0x0002, 0x0002 }, - { 0x8201, 0x8001, 0x8001, 0x8003, 0x8003, 0x0005, 0x0010, 0x8403, 0x8403, 0x8403, 0x8403, 0x0010, 0x8403, 0x8403, 0x8001, 0x8001 }, - { 0x8201, 0x8001, 0x8001, 0x8003, 0x2003, 0x0005, 0x0010, 0x8403, 0x8403, 0x8403, 0x8403, 0x880c, 0x800d, 0x8403, 0x8001, 0x8001 }, - { 0x8201, 0x8001, 0x8001, 0x8003, 0x9809, 0x0005, 0x8807, 0x8008, 0x800d, 0x800a, 0x800d, 0x880c, 0x800d, 0x8403, 0x8001, 0x8001 }, - { 0x8201, 0x8001, 0x8001, 0x8003, 0x9809, 0x0005, 0x0010, 0x8403, 0x8403, 0x800a, 0x800d, 0x880c, 0x800d, 0x8403, 0x8001, 0x8001 }, - { 0x8201, 0x8001, 0x8001, 0x8003, 0x1006, 0x0005, 0x8807, 0x8008, 0x800d, 0x800a, 0x800d, 0x880c, 0x800d, 0x8403, 0x8001, 0x8001 }, - { 0x8201, 0x8001, 0x8001, 0x8003, 0x2003, 0x0005, 0x0010, 0x8403, 0x8403, 0x8403, 0x8403, 0x0010, 0x8403, 0x8403, 0x8001, 0x8001 }, - { 0x8201, 0x8001, 0x8001, 0x8003, 0x8003, 0x0005, 0x100b, 0x8403, 0x8403, 0x8403, 0x8403, 0x0010, 0x8403, 0x8403, 0x8001, 0x8001 }, - { 0x8201, 0x8001, 0x8001, 0x8003, 0x8003, 0x0005, 0x0010, 0x8403, 0x8403, 0x8403, 0x8403, 0x880c, 0x800d, 0x8403, 0x8001, 0x8001 }, - { 0x820e, 0x8001, 0x8001, 0x8003, 0x8003, 0x0005, 0x0010, 0x8403, 0x8403, 0x8403, 0x8403, 0x0010, 0x8403, 0x8403, 0x8001, 0x8001 }, - { 0x820f, 0x8001, 0x8001, 0x8003, 0x8003, 0x0005, 0x0010, 0x8403, 0x8403, 0x8403, 0x8403, 0x0010, 0x8403, 0x8403, 0x8001, 0x8001 } + { 0x0011, 0x0000, 0x0001, 0x2008, 0x0000, 0x0005, 0x2404, 0x2404, 0x2404, 0x2404, 0x2404, 0x2404, 0x2404, 0x2404, 0x2402, 0x2403 }, + { 0x0011, 0x0001, 0x0001, 0x0001, 0x0000, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001 }, + { 0x0012, 0x1000, 0x0404, 0x0008, 0x1000, 0x0005, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404 }, + { 0x0911, 0x0900, 0x0404, 0x0008, 0x0900, 0x0005, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404 }, + { 0x0111, 0x0100, 0x0404, 0x0008, 0x0100, 0x0006, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404 }, + { 0x0011, 0x2404, 0x2404, 0x2404, 0x0000, 0x2404, 0x2404, 0x2404, 0x2404, 0x2404, 0x2404, 0x2404, 0x2404, 0x2404, 0x2404, 0x2404 }, + { 0x0012, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404 }, + { 0x0012, 0x0408, 0x0408, 0x0408, 0x0008, 0x0408, 0x0408, 0x0208, 0x448c, 0x448c, 0x0089, 0x0208, 0x0408, 0x0408, 0x0408, 0x0408 }, + { 0x0012, 0x0408, 0x0408, 0x0004, 0x0408, 0x0007, 0x0408, 0x0408, 0x0408, 0x0408, 0x0408, 0x0408, 0x0408, 0x0408, 0x0408, 0x0408 }, + { 0x0012, 0x0012, 0x0012, 0x0012, 0x0012, 0x0012, 0x0012, 0x0012, 0x440a, 0x0012, 0x440a, 0x0012, 0x008e, 0x0012, 0x0012, 0x0012 }, + { 0x8111, 0x8408, 0x8408, 0x8004, 0x8408, 0x8007, 0x8408, 0x8408, 0x040b, 0x8408, 0x040b, 0x8408, 0x8408, 0x8408, 0x8408, 0x8408 }, + { 0x8111, 0x8408, 0x8408, 0x8004, 0x8408, 0x8007, 0x8408, 0x8408, 0x0410, 0x8408, 0x0410, 0x8408, 0x8408, 0x8408, 0x8408, 0x8408 }, + { 0x8111, 0x8408, 0x8408, 0x8004, 0x8408, 0x8007, 0x8408, 0x8408, 0x040d, 0x040d, 0x040d, 0x8408, 0x8408, 0x8408, 0x8408, 0x8408 }, + { 0x8111, 0x8408, 0x8408, 0x8004, 0x8408, 0x8007, 0x8408, 0x8408, 0x0410, 0x0410, 0x0410, 0x8408, 0x8408, 0x8408, 0x8408, 0x8408 }, + { 0x0012, 0x0012, 0x0012, 0x0012, 0x0012, 0x0012, 0x0012, 0x440f, 0x440f, 0x440f, 0x440f, 0x0012, 0x0012, 0x440f, 0x0012, 0x0012 }, + { 0x8111, 0x8408, 0x8408, 0x8004, 0x8408, 0x8007, 0x8408, 0x0410, 0x0410, 0x0410, 0x0410, 0x8408, 0x8408, 0x0410, 0x8408, 0x8408 }, + { 0x8111, 0x8408, 0x8408, 0x8004, 0x8408, 0x8007, 0x8408, 0x8408, 0x8408, 0x8408, 0x8408, 0x8408, 0x8408, 0x8408, 0x8408, 0x8408 } } ; size_t mark = 0 ; - int n = 0 ; - unsigned int blevel = 0 ; - unsigned char state = 0, base = 10 ; + unsigned int n = 0, blevel = 0 ; + uint8_t state = 0, base = 10 ; - while (state < 0x10) + while (state < 0x11) { uint16_t c ; unsigned char cur ; if (!(*next)(&cur, source)) return -1 ; - c = table[class[cur]-'`'][state] ; + c = table[state][class[cur]-'`'] ; state = c & 0x1F ; - if (c & 0x0400) + if (c & 0x8000U) { - unsigned int z ; + uint64_t u ; if (!stralloc_0(sa)) return -1 ; sa->len = mark ; - uint_scan_base(sa->s + sa->len, &z, base) ; - sa->s[sa->len++] = (unsigned char)z ; + uint64_scan_base(sa->s + sa->len, &u, base) ; + if (!u || u > 0xff) return -2 ; + sa->s[sa->len++] = (unsigned char)u ; } - if (c & 0x0800) mark = sa->len ; - if (c & 0x0200) + if (c & 0x4000U) mark = sa->len ; + if (c & 0x2000U) { - char tilde = EXECLINE_BLOCK_QUOTE_CHAR ; unsigned int i = blevel ; - if (!stralloc_readyplus(sa, i<<1)) return -1 ; - while (i--) stralloc_catb(sa, &tilde, 1) ; + if (!stralloc_readyplus(sa, i<<2)) return -1 ; + while (i--) sa->s[sa->len++] = ' ' ; } - if (c & 0x0100) sa->len -= ++blevel ; - if (c & 0x0080) + if (c & 0x1000U) sa->len -= ++blevel ; + if (c & 0x0800U) { if (!blevel--) return -4 ; - sa->s[--sa->len-1] = EXECLINE_BLOCK_END_CHAR ; - if (!EXECLINE_BLOCK_END_CHAR) sa->len-- ; + sa->len -= 2 ; } - if (c & 0x8000) if (!stralloc_catb(sa, (char *)&cur, 1)) return -1 ; - if (c & 0x2000) + if (c & 0x0400) if (!stralloc_catb(sa, (char *)&cur, 1)) return -1 ; + if (c & 0x0200) { char x = 7 + byte_chr("abtnvfr", 7, cur) ; if (!stralloc_catb(sa, &x, 1)) return -1 ; } - if (c & 0x4000) if (n++, !stralloc_0(sa)) return -1 ; - if (c & 0x1000) + if (c & 0x0100) + { + if (n++ >= INT_MAX) return (errno = E2BIG, -1) ; + if (!stralloc_0(sa)) return -1 ; + } + if (c & 0x0080) + { switch (cur) { case 'x' : base = 16 ; break ; case '0' : base = 8 ; break ; default : base = 10 ; } + } } - if (state == 0x10) return -2 ; + + if (state > 0x11) return -2 ; if (blevel) return -3 ; return n ; } |