about summary refs log tree commit diff
diff options
context:
space:
mode:
authorOliver Kiddle <opk@zsh.org>2022-12-17 00:09:37 +0100
committerOliver Kiddle <opk@zsh.org>2022-12-17 00:37:19 +0100
commit35a2f155c3b92e67957325e1f49e409546378e3e (patch)
tree7c4593cf459ae5067627043c7fba15866c48c8c6
parent2701ab161df1f259b8292a650a4ea5cebd668d81 (diff)
downloadzsh-35a2f155c3b92e67957325e1f49e409546378e3e.tar.gz
zsh-35a2f155c3b92e67957325e1f49e409546378e3e.tar.xz
zsh-35a2f155c3b92e67957325e1f49e409546378e3e.zip
51214: handle read -d and a delimiter that can't be decoded into a character
Terminate input at the raw byte value of the delimiter.
Also document and test the use of an empty string as a way to specify
NUL as the delimiter.
-rw-r--r--ChangeLog4
-rw-r--r--Doc/Zsh/builtins.yo3
-rw-r--r--Src/builtin.c7
-rw-r--r--Test/B04read.ztst4
-rw-r--r--Test/D07multibyte.ztst14
5 files changed, 29 insertions, 3 deletions
diff --git a/ChangeLog b/ChangeLog
index 5b0af2135..130bec319 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,9 @@
 2022-12-16  Oliver Kiddle  <opk@zsh.org>
 
+	* 51214: Doc/Zsh/builtins.yo, Src/builtin.c, Test/B04read.ztst,
+	Test/D07multibyte.ztst: with read -d and a delimiter that can't be
+	decoded into a character terminate input at the raw byte value
+
 	* Jun T.: 51207: Src/builtin.c, Test/B04read.ztst:
 	fix for read -d when the delimiter is a byte >= 0x80
 
diff --git a/Doc/Zsh/builtins.yo b/Doc/Zsh/builtins.yo
index b6217f66d..56428a714 100644
--- a/Doc/Zsh/builtins.yo
+++ b/Doc/Zsh/builtins.yo
@@ -1589,7 +1589,8 @@ Input is read from the coprocess.
 )
 item(tt(-d) var(delim))(
 Input is terminated by the first character of var(delim) instead of
-by newline.
+by newline.  For compatibility with other shells, if var(delim) is an
+empty string, input is terminated at the first NUL.
 )
 item(tt(-t) [ var(num) ])(
 Test if input is available before attempting to read.  If var(num)
diff --git a/Src/builtin.c b/Src/builtin.c
index 951970138..70a950666 100644
--- a/Src/builtin.c
+++ b/Src/builtin.c
@@ -6282,6 +6282,7 @@ bin_read(char *name, char **args, Options ops, UNUSED(int func))
     long izle_timeout = 0;
 #ifdef MULTIBYTE_SUPPORT
     wchar_t delim = L'\n', wc;
+    int rawbyte = 0;
     mbstate_t mbs;
     char *laststart;
     size_t ret;
@@ -6412,9 +6413,11 @@ bin_read(char *name, char **args, Options ops, UNUSED(int func))
 	    wi = WEOF;
 	if (wi != WEOF)
 	    delim = (wchar_t)wi;
-	else
+	else {
 	    delim = (wchar_t) (unsigned char) ((delimstr[0] == Meta) ?
 			      delimstr[1] ^ 32 : delimstr[0]);
+	    rawbyte = 1;
+	}
 #else
         delim = (unsigned char) ((delimstr[0] == Meta) ?
 			delimstr[1] ^ 32 : delimstr[0]);
@@ -6842,7 +6845,7 @@ bin_read(char *name, char **args, Options ops, UNUSED(int func))
 		break;
 	    }
 	    *bptr = (char)c;
-	    if (isset(MULTIBYTE)) {
+	    if (isset(MULTIBYTE) && !rawbyte) {
 		ret = mbrtowc(&wc, bptr, 1, &mbs);
 		if (!ret)	/* NULL */
 		    ret = 1;
diff --git a/Test/B04read.ztst b/Test/B04read.ztst
index 96adf51c7..14bdaeef5 100644
--- a/Test/B04read.ztst
+++ b/Test/B04read.ztst
@@ -82,6 +82,10 @@
 >Testing the
 >null hypothesis
 
+ read -ed '' <<<$'one\0two'
+0:empty delimiter terminates at nulls
+>one
+
  print -n $'first line\x80second line\x80' |
  while read -d $'\x80' line; do print $line; done
 0:read with a delimiter >= 0x80
diff --git a/Test/D07multibyte.ztst b/Test/D07multibyte.ztst
index 6909346cb..413c4fe73 100644
--- a/Test/D07multibyte.ztst
+++ b/Test/D07multibyte.ztst
@@ -212,6 +212,20 @@
 >first
 >second
 
+  read -ed £
+0:read with multibyte delimiter where bytes of delimiter also occur in input
+<one¤twoãthree£four
+>one¤twoãthree
+
+  read -ed $'\xa0' <<<$'first\xa0second'
+0:read delimited by a byte that isn't a valid multibyte character
+>first
+
+  read -ed $'\xc2'
+0:read delimited by a single byte terminates if the byte is part of a multibyte character
+<one£two
+>one
+
   (IFS=«
   read -d » -A array
   print -l $array)