about summary refs log tree commit diff
diff options
context:
space:
mode:
authorPeter Stephenson <pws@zsh.org>2015-02-20 16:25:47 +0000
committerPeter Stephenson <pws@zsh.org>2015-02-20 16:25:54 +0000
commitb237ba0a8eaa5001283ac8448872021723b90aff (patch)
tree9c7e94bae7f00853ea6ed705229492dd8e9f3f68
parentdf7a657b10df436e88a4c293cdfa5c7b156fba72 (diff)
downloadzsh-b237ba0a8eaa5001283ac8448872021723b90aff.tar.gz
zsh-b237ba0a8eaa5001283ac8448872021723b90aff.tar.xz
zsh-b237ba0a8eaa5001283ac8448872021723b90aff.zip
34587: ensure multibyte characters don't overflow.
They could start incorporating tokens, with bad karma.

Add test.
-rw-r--r--ChangeLog5
-rw-r--r--Src/utils.c8
-rw-r--r--Test/D07multibyte.ztst20
3 files changed, 28 insertions, 5 deletions
diff --git a/ChangeLog b/ChangeLog
index c4ff0223e..fbf11386b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2015-02-20  Peter Stephenson  <p.stephenson@samsung.com>
+
+	* 34587: Src/utils.c, Test/D07multibyte.ztst: ensure multibyte
+	characters don't overflow into tokens and add test.
+
 2015-02-19  Barton E. Schaefer  <schaefer@zsh.org>
 
 	* 34568: Src/Module.c: use META_HEAPDUP when passing dlerror()
diff --git a/Src/utils.c b/Src/utils.c
index 702829c0c..1bcceb091 100644
--- a/Src/utils.c
+++ b/Src/utils.c
@@ -4797,6 +4797,14 @@ mb_metacharlenconv_r(const char *s, wint_t *wcp, mbstate_t *mbsp)
 	    inchar = *++ptr ^ 32;
 	    DPUTS(!*ptr,
 		  "BUG: unexpected end of string in mb_metacharlen()\n");
+	} else if (imeta(*ptr)) {
+	    /*
+	     * As this is metafied input, this is a token --- this
+	     * can't be a part of the string.  It might be
+	     * something on the end of an unbracketed parameter
+	     * reference, for example.
+	     */
+	    break;
 	} else
 	    inchar = *ptr;
 	ptr++;
diff --git a/Test/D07multibyte.ztst b/Test/D07multibyte.ztst
index 2cb995346..33e76bee7 100644
--- a/Test/D07multibyte.ztst
+++ b/Test/D07multibyte.ztst
@@ -448,20 +448,30 @@
 0:read passes through invalid multibyte characters
 >0xC5
 
-  word=abcま          
+  word=abcま
   word[-1]=
   print $word
-  word=abcま 
+  word=abcま
   word[-2]=
   print $word
-  word=abcま 
+  word=abcま
   word[4]=d
   print $word
-  word=abcま 
+  word=abcま
   word[3]=not_c
-  print $word  
+  print $word
 0:assignment with negative indices
 >abc
 >abま
 >abcd
 >abnot_cま
+
+  # The following doesn't necessarily need UTF-8, but this gives
+  # us the full effect --- if we parse this wrongly the \xe9
+  # in combination with the tokenized input afterwards looks like a
+  # valid UTF-8 character.  But it isn't.
+  print $'$\xe9#``' >test_bad_param
+  (setopt nonomatch
+  . ./test_bad_param)
+127:Invalid parameter name with following tokenized input
+?./test_bad_param:1: command not found: $\M-i#