summary refs log tree commit diff
diff options
context:
space:
mode:
authorLaurent Bercot <ska-skaware@skarnet.org>2022-05-26 12:46:37 +0000
committerLaurent Bercot <ska@appnovation.com>2022-05-26 12:46:37 +0000
commit7b631e14a70f6c2e8dcabd2713422bc585af2703 (patch)
tree3b5d8592acc340a5d79bb6dbeaed4b719662d689
parentdb5bf6b28e46da0305d13383201e3dfbf2e80178 (diff)
downloadexecline-7b631e14a70f6c2e8dcabd2713422bc585af2703.tar.gz
execline-7b631e14a70f6c2e8dcabd2713422bc585af2703.tar.xz
execline-7b631e14a70f6c2e8dcabd2713422bc585af2703.zip
Rewrite el_parse.c, document the transition table
Signed-off-by: Laurent Bercot <ska@appnovation.com>
-rw-r--r--doc/execlineb.html8
-rw-r--r--src/libexecline/PARSING.txt99
-rw-r--r--src/libexecline/el_parse.c95
3 files changed, 157 insertions, 45 deletions
diff --git a/doc/execlineb.html b/doc/execlineb.html
index fe4f0d3..f9d3e4d 100644
--- a/doc/execlineb.html
+++ b/doc/execlineb.html
@@ -133,10 +133,11 @@ newlines disappear completely. </li>
  <li> <tt>\0x<em>ab</em></tt> sequences are recognized in quoted strings
 and evaluate to ASCII hexadecimal number <em>ab</em>. </li>
  <li> <tt>\0<em>abc</em></tt> sequences are recognized in quoted strings
-and evaluate to ASCII octal number <em>abc</em>. </li>
+and evaluate to ASCII octal number <em>abc</em>. <em>abc</em> must not
+be greater than <em>377</em>, or evaluate to 0. </li>
  <li> <tt>\<em>abc</em></tt> sequences are recognized in quoted strings
 and evaluate to ASCII decimal number <em>abc</em>. <em>a</em> must not
-be zero. </li>
+be zero. <em>abc</em> must not be greater than 255, or evaluate to 0. </li>
  <li> A comment starts with a <tt>#</tt> and ends with the line. Comments
 are not recognized inside quoted strings. </li>
  <li> Anything else is an unquoted string, that can evaluate to
@@ -144,6 +145,9 @@ zero or more words. </li>
  <li> Any character can be escaped in unquoted strings by prepending
 it with a backslash. It works the same way in quoted strings, except
 for the special sequences described above. </li>
+ <li> As a special case, an unquoted backslash at the end of a line, or at
+the end of the input, is ignored. This is to make it easier to copy
+execline fragments from a shell script. </li>
 </ul>
 
 <p>
diff --git a/src/libexecline/PARSING.txt b/src/libexecline/PARSING.txt
new file mode 100644
index 0000000..b84c0ff
--- /dev/null
+++ b/src/libexecline/PARSING.txt
@@ -0,0 +1,99 @@
+el_parse.c:
+
+class	|	0	1	2	3	4	5	6	7	8	9	a	b	c	d	e	f
+st\ev	|	\0	space	#	"	newline	\	normal	abf	1-7	8-9	0	nrtv	x	A-Fcde	{	}
+
+START	|				n			n p	n p	n p	n p	n p	n p	n p	n p	n p	n p
+00	|	END	START	COMMENT Q	START	Q1	W	W	W	W	W	W	W	W	OPENB	CLOSEB
+
+COMMENT |
+01	|	END	COMMENT	COMMENT	COMMENT	START	COMMENT	COMMENT	COMMENT	COMMENT	COMMENT	COMMENT	COMMENT	COMMENT	COMMENT	COMMENT	COMMENT
+
+OPENB	|		{	p		{		p	p	p	p	p	p	p	p	p	p
+02	|	X	START	W	Q	START	Q1	W	W	W	W	W	W	W	W	W	W
+
+CLOSEB	|	} 0	} 0	p		} 0		p	p	p	p	p	p	p	p	p	p
+03	|	END	START	W	Q	START	Q1	W	W	W	W	W	W	W	W	W	W
+
+W	|	0	0	p		0		p	p	p	p	p	p	p	p	p	p
+04	|	END	START	W	Q	START	Q2	W	W	W	W	W	W	W	W	W	W
+
+Q1	|		n p	n p	n p		n p	n p	n p	n p	n p	n p	n p	n p	n p	n p	n p
+05	|	END	W	W	W	START	W	W	W	W	W	W	W	W	W	W	W
+
+Q2	|		p	p	p	p	p	p	p	p	p	p	p	p	p	p	p
+06	|	X	W	W	W	W	W	W	W	W	W	W	W	W	W	W	W
+
+Q3	|		p	p	p		p	p	c	m p b	m p b	b	c	p	p	p	p
+07	|	X	Q	Q	Q	Q	Q	Q	Q	DEC1	DEC1	OCT	Q	Q	Q	Q	Q
+
+Q	|		p	p		p		p	p	p	p	p	p	p	p	p	p
+08	|	X	Q	Q	W	Q	Q3	Q	Q	Q	Q	Q	Q	Q	Q	Q	Q	
+
+OCT	|									m p		m p		b
+09	|	X	X	X	X	X	X	X	X	OCT1	X	OCT1	X	HEX	X	X	X
+
+OCT1	|	s 0	s p	s p	s	s p	s	s p	s p	p	s p	p	s p	s p	s p	s p	s p
+0a	|	END	Q	Q	W	Q	Q3	Q	Q	OCT2	Q	OCT2	Q	Q	Q	Q	Q
+
+OCT2	|	s 0	s p	s p	s	s p	s	s p	s p	p	s p	p	s p	s p	s p	s p	s p
+0b	|	END	Q	Q	W	Q	Q3	Q	Q	ENDNUM	Q	ENDNUM	Q	Q	Q	Q	Q
+
+DEC1	|	s 0	s p	s p	s	s p	s	s p	s p	p	p	p	s p	s p	s p	s p	s p
+0c	|	END	Q	Q	W	Q	Q3	Q	Q	DEC2	DEC2	DEC2	Q	Q	Q	Q	Q
+
+DEC2	|	s 0	s p	s p	s	s p	s	s p	s p	p	p	p	s p	s p	s p	s p	s p
+0d	|	END	Q	Q	W	Q	Q3	Q	Q	ENDNUM	ENDNUM	ENDNUM	Q	Q	Q	Q	Q
+
+HEX	|								m p	m p	m p	m p			m p
+0e	|	X	X	X	X	X	X	X	HEX1	HEX1	HEX1	HEX1	X	X	HEX1	X	X
+
+HEX1	|	s 0	s p	s p	s	s p	s	s p	p	p	p	p	s p	s p	p	s p	s p
+0f	|	END	Q	Q	W	Q	Q3	Q	ENDNUM	ENDNUM	ENDNUM	ENDNUM	Q	Q	ENDNUM	Q	Q
+
+ENDNUM	|	s 0	s p	s p	s	s p	s	s p	s p	s p	s p	s p	s p	s p	s p	s p	s p
+10	|	END	Q	Q	W	Q	Q3	Q	Q	Q	Q	Q	Q	Q	Q	Q	Q
+
+END
+11
+
+X
+12
+
+
+States
+
+START: in whitespace; initial state
+COMMENT: in a comment line
+OPENB: after a raw {
+CLOSEB: after a raw }
+W: in an unquoted word
+Q1: after a backslash in whitespace
+Q2: after a backslash in an unquoted word
+Q3: after a backslash in a quoted string
+Q: in a quoted string
+OCT: after \0 in a quoted string
+OCT1: after \0a in a quoted string
+OCT2: after \0ab in a quoted string
+DEC1: after \a in a quoted string
+DEC2: after \ab in a quoted string
+HEX: after \0x in a quoted string
+HEX1: after \0xa in a quoted string
+ENDNUM: after \0abc, \abc or \0xab in a quoted string
+END: success
+X: syntax error
+
+
+Actions
+
+8000	s	scan integer from mark to cur
+4000	m	set mark
+2000	n	add blevel spaces
+1000	{	inc blevel + unpush blevel
+0800	}	dec blevel + unpush 2
+0400	p	add cur to word
+0200	c	add control char (depending on cur) to word
+0100	0	end word
+0080	b	switch base according to cur
+0040	unused
+0020	unused
diff --git a/src/libexecline/el_parse.c b/src/libexecline/el_parse.c
index b45b776..6ef7ce9 100644
--- a/src/libexecline/el_parse.c
+++ b/src/libexecline/el_parse.c
@@ -1,88 +1,97 @@
 /* ISC license. */
 
-#include <sys/types.h>
+#include <stddef.h>
 #include <stdint.h>
-#include <skalibs/types.h>
+#include <limits.h>
+#include <errno.h>
+
+#include <skalibs/uint64.h>
 #include <skalibs/bytestr.h>
 #include <skalibs/stralloc.h>
-#include <skalibs/djbunix.h>
+
 #include <execline/execline.h>
 
 int el_parse (stralloc *sa, el_chargen_func_ref next, void *source)
 {
   static unsigned char const class[256] = "`aaaaaaaaadaaaaaaaaaaaaaaaaaaaaaafcbffffffffffffjhhhhhhhiifffffffmmmmmmfffffffffffffffffffffeffffggmmmgfffffffkfffkfkfkflffnfoffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff" ;
-  static uint16_t const table[16][16] =
+  static uint16_t const table[17][16] =
   {
-    { 0x0011, 0x4011, 0x0010, 0x0010, 0x0010, 0x0011, 0x0010, 0x0010, 0x0010, 0x0010, 0x0010, 0x0010, 0x0010, 0x0010, 0x0010, 0x4091 },
-    { 0x0000, 0x4000, 0x8001, 0x8003, 0x8003, 0x0005, 0x0010, 0x8403, 0x8403, 0x8403, 0x8403, 0x0010, 0x8403, 0x8403, 0x0100, 0x4080 },
-    { 0x0005, 0x8001, 0x8001, 0x8003, 0x8003, 0x0005, 0x0010, 0x8403, 0x8403, 0x8403, 0x8403, 0x0010, 0x8403, 0x8403, 0x8001, 0x8001 },
-    { 0x0203, 0x0003, 0x8001, 0x0001, 0x8003, 0x0005, 0x0010, 0x0401, 0x0401, 0x0401, 0x0401, 0x0010, 0x0401, 0x0401, 0x0003, 0x0003 },
-    { 0x0000, 0x4000, 0x8001, 0x8003, 0x0003, 0x0000, 0x0010, 0x8403, 0x8403, 0x8403, 0x8403, 0x0010, 0x8403, 0x8403, 0x0100, 0x4080 },
-    { 0x0202, 0x0002, 0x8001, 0x0004, 0x8003, 0x0005, 0x0010, 0x0404, 0x0404, 0x0404, 0x0404, 0x0010, 0x0404, 0x0404, 0x0002, 0x0002 },
-    { 0x8201, 0x8001, 0x8001, 0x8003, 0x8003, 0x0005, 0x0010, 0x8403, 0x8403, 0x8403, 0x8403, 0x0010, 0x8403, 0x8403, 0x8001, 0x8001 },
-    { 0x8201, 0x8001, 0x8001, 0x8003, 0x2003, 0x0005, 0x0010, 0x8403, 0x8403, 0x8403, 0x8403, 0x880c, 0x800d, 0x8403, 0x8001, 0x8001 },
-    { 0x8201, 0x8001, 0x8001, 0x8003, 0x9809, 0x0005, 0x8807, 0x8008, 0x800d, 0x800a, 0x800d, 0x880c, 0x800d, 0x8403, 0x8001, 0x8001 },
-    { 0x8201, 0x8001, 0x8001, 0x8003, 0x9809, 0x0005, 0x0010, 0x8403, 0x8403, 0x800a, 0x800d, 0x880c, 0x800d, 0x8403, 0x8001, 0x8001 },
-    { 0x8201, 0x8001, 0x8001, 0x8003, 0x1006, 0x0005, 0x8807, 0x8008, 0x800d, 0x800a, 0x800d, 0x880c, 0x800d, 0x8403, 0x8001, 0x8001 },
-    { 0x8201, 0x8001, 0x8001, 0x8003, 0x2003, 0x0005, 0x0010, 0x8403, 0x8403, 0x8403, 0x8403, 0x0010, 0x8403, 0x8403, 0x8001, 0x8001 },
-    { 0x8201, 0x8001, 0x8001, 0x8003, 0x8003, 0x0005, 0x100b, 0x8403, 0x8403, 0x8403, 0x8403, 0x0010, 0x8403, 0x8403, 0x8001, 0x8001 },
-    { 0x8201, 0x8001, 0x8001, 0x8003, 0x8003, 0x0005, 0x0010, 0x8403, 0x8403, 0x8403, 0x8403, 0x880c, 0x800d, 0x8403, 0x8001, 0x8001 },
-    { 0x820e, 0x8001, 0x8001, 0x8003, 0x8003, 0x0005, 0x0010, 0x8403, 0x8403, 0x8403, 0x8403, 0x0010, 0x8403, 0x8403, 0x8001, 0x8001 },
-    { 0x820f, 0x8001, 0x8001, 0x8003, 0x8003, 0x0005, 0x0010, 0x8403, 0x8403, 0x8403, 0x8403, 0x0010, 0x8403, 0x8403, 0x8001, 0x8001 }
+    { 0x0011, 0x0000, 0x0001, 0x2008, 0x0000, 0x0005, 0x2404, 0x2404, 0x2404, 0x2404, 0x2404, 0x2404, 0x2404, 0x2404, 0x2402, 0x2403 },
+    { 0x0011, 0x0001, 0x0001, 0x0001, 0x0000, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001 },
+    { 0x0012, 0x1000, 0x0404, 0x0008, 0x1000, 0x0005, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404 },
+    { 0x0911, 0x0900, 0x0404, 0x0008, 0x0900, 0x0005, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404 },
+    { 0x0111, 0x0100, 0x0404, 0x0008, 0x0100, 0x0006, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404 },
+    { 0x0011, 0x2404, 0x2404, 0x2404, 0x0000, 0x2404, 0x2404, 0x2404, 0x2404, 0x2404, 0x2404, 0x2404, 0x2404, 0x2404, 0x2404, 0x2404 },
+    { 0x0012, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404 },
+    { 0x0012, 0x0408, 0x0408, 0x0408, 0x0008, 0x0408, 0x0408, 0x0208, 0x448c, 0x448c, 0x0089, 0x0208, 0x0408, 0x0408, 0x0408, 0x0408 },
+    { 0x0012, 0x0408, 0x0408, 0x0004, 0x0408, 0x0007, 0x0408, 0x0408, 0x0408, 0x0408, 0x0408, 0x0408, 0x0408, 0x0408, 0x0408, 0x0408 },
+    { 0x0012, 0x0012, 0x0012, 0x0012, 0x0012, 0x0012, 0x0012, 0x0012, 0x440a, 0x0012, 0x440a, 0x0012, 0x008e, 0x0012, 0x0012, 0x0012 },
+    { 0x8111, 0x8408, 0x8408, 0x8004, 0x8408, 0x8007, 0x8408, 0x8408, 0x040b, 0x8408, 0x040b, 0x8408, 0x8408, 0x8408, 0x8408, 0x8408 },
+    { 0x8111, 0x8408, 0x8408, 0x8004, 0x8408, 0x8007, 0x8408, 0x8408, 0x0410, 0x8408, 0x0410, 0x8408, 0x8408, 0x8408, 0x8408, 0x8408 },
+    { 0x8111, 0x8408, 0x8408, 0x8004, 0x8408, 0x8007, 0x8408, 0x8408, 0x040d, 0x040d, 0x040d, 0x8408, 0x8408, 0x8408, 0x8408, 0x8408 },
+    { 0x8111, 0x8408, 0x8408, 0x8004, 0x8408, 0x8007, 0x8408, 0x8408, 0x0410, 0x0410, 0x0410, 0x8408, 0x8408, 0x8408, 0x8408, 0x8408 },
+    { 0x0012, 0x0012, 0x0012, 0x0012, 0x0012, 0x0012, 0x0012, 0x440f, 0x440f, 0x440f, 0x440f, 0x0012, 0x0012, 0x440f, 0x0012, 0x0012 },
+    { 0x8111, 0x8408, 0x8408, 0x8004, 0x8408, 0x8007, 0x8408, 0x0410, 0x0410, 0x0410, 0x0410, 0x8408, 0x8408, 0x0410, 0x8408, 0x8408 },
+    { 0x8111, 0x8408, 0x8408, 0x8004, 0x8408, 0x8007, 0x8408, 0x8408, 0x8408, 0x8408, 0x8408, 0x8408, 0x8408, 0x8408, 0x8408, 0x8408 }
   } ;
 
   size_t mark = 0 ;
-  int n = 0 ;
-  unsigned int blevel = 0 ;
-  unsigned char state = 0, base = 10 ;
+  unsigned int n = 0, blevel = 0 ;
+  uint8_t state = 0, base = 10 ;
 
-  while (state < 0x10)
+  while (state < 0x11)
   {
     uint16_t c ;
     unsigned char cur ;
     if (!(*next)(&cur, source)) return -1 ;
-    c = table[class[cur]-'`'][state] ;
+    c = table[state][class[cur]-'`'] ;
     state = c & 0x1F ;
 
-    if (c & 0x0400)
+    if (c & 0x8000U)
     {
-      unsigned int z ;
+      uint64_t u ;
       if (!stralloc_0(sa)) return -1 ;
       sa->len = mark ;
-      uint_scan_base(sa->s + sa->len, &z, base) ;
-      sa->s[sa->len++] = (unsigned char)z ;
+      uint64_scan_base(sa->s + sa->len, &u, base) ;
+      if (!u || u > 0xff) return -2 ;
+      sa->s[sa->len++] = (unsigned char)u ;
     }
-    if (c & 0x0800) mark = sa->len ;
-    if (c & 0x0200)
+    if (c & 0x4000U) mark = sa->len ;
+    if (c & 0x2000U)
     {
-      char tilde = EXECLINE_BLOCK_QUOTE_CHAR ;
       unsigned int i = blevel ;
-      if (!stralloc_readyplus(sa, i<<1)) return -1 ;
-      while (i--) stralloc_catb(sa, &tilde, 1) ;
+      if (!stralloc_readyplus(sa, i<<2)) return -1 ;
+      while (i--) sa->s[sa->len++] = ' ' ;
     }
-    if (c & 0x0100) sa->len -= ++blevel ;
-    if (c & 0x0080)
+    if (c & 0x1000U) sa->len -= ++blevel ;
+    if (c & 0x0800U)
     {
       if (!blevel--) return -4 ;
-      sa->s[--sa->len-1] = EXECLINE_BLOCK_END_CHAR ;
-      if (!EXECLINE_BLOCK_END_CHAR) sa->len-- ;
+      sa->len -= 2 ;
     }
-    if (c & 0x8000) if (!stralloc_catb(sa, (char *)&cur, 1)) return -1 ;
-    if (c & 0x2000)
+    if (c & 0x0400) if (!stralloc_catb(sa, (char *)&cur, 1)) return -1 ;
+    if (c & 0x0200)
     {
       char x = 7 + byte_chr("abtnvfr", 7, cur) ;
       if (!stralloc_catb(sa, &x, 1)) return -1 ;
     }
-    if (c & 0x4000) if (n++, !stralloc_0(sa)) return -1 ;
-    if (c & 0x1000)
+    if (c & 0x0100)
+    {
+      if (n++ >= INT_MAX) return (errno = E2BIG, -1) ;
+      if (!stralloc_0(sa)) return -1 ;
+    }
+    if (c & 0x0080)
+    {
       switch (cur)
       {
         case 'x' : base = 16 ; break ;
         case '0' : base = 8 ; break ;
         default : base = 10 ;
       }
+    }
   }
-  if (state == 0x10) return -2 ;
+
+  if (state > 0x11) return -2 ;
   if (blevel) return -3 ;
   return n ;
 }