about summary refs log tree commit diff
diff options
context:
space:
mode:
authorJun-ichi Takimoto <takimoto-j@kba.biglobe.ne.jp>2023-06-26 16:52:40 +0900
committerJun-ichi Takimoto <takimoto-j@kba.biglobe.ne.jp>2023-06-26 16:52:40 +0900
commit1b9bc3441ca0e6d155243084d6e7b98925dc02cb (patch)
tree5d2d1da390e86c628ec0248eb8bc9c9df5d49d7f
parent4345eed1fe5dd6c881b948331cfa8f4a48beda42 (diff)
downloadzsh-1b9bc3441ca0e6d155243084d6e7b98925dc02cb.tar.gz
zsh-1b9bc3441ca0e6d155243084d6e7b98925dc02cb.tar.xz
zsh-1b9bc3441ca0e6d155243084d6e7b98925dc02cb.zip
51884: reset IFS if it contains invalid characters
This happens only if MULTIBYTE option is on.
-rw-r--r--ChangeLog6
-rw-r--r--Doc/Zsh/params.yo7
-rw-r--r--Src/params.c3
-rw-r--r--Src/utils.c42
-rw-r--r--Test/D04parameter.ztst21
5 files changed, 61 insertions, 18 deletions
diff --git a/ChangeLog b/ChangeLog
index 0011cc947..51a091aff 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+2023-06-26  Jun-ichi Takimoto  <takimoto-j@kba.biglobe.ne.jp>
+
+	* 51884: Doc/Zsh/params.yo, Src/params.c, Src/utils.c,
+	Test/D04parameter.ztst: if MULTIBYTE option is on and IFS contains
+	invalid bytes in curret locale then reset it to default
+
 2023-06-22  Bart Schaefer  <schaefer@zsh.org>
 
 	* 51887: Src/math.c, Src/params.c, Test/K02parameter.ztst:
diff --git a/Doc/Zsh/params.yo b/Doc/Zsh/params.yo
index 57d10b8bd..e0410d673 100644
--- a/Doc/Zsh/params.yo
+++ b/Doc/Zsh/params.yo
@@ -1325,15 +1325,18 @@ Internal field separators (by default space, tab, newline and NUL), that
 are used to separate words which result from
 command or parameter expansion and words read by
 the tt(read) builtin.  Any characters from the set space, tab and
-newline that appear in the IFS are called em(IFS white space).
+newline that appear in the tt(IFS) are called em(IFS white space).
 One or more IFS white space characters or one non-IFS white space
 character together with any adjacent IFS white space character delimit
 a field.  If an IFS white space character appears twice consecutively
-in the IFS, this character is treated as if it were not an IFS white
+in the tt(IFS), this character is treated as if it were not an IFS white
 space character.
 
 If the parameter is unset, the default is used.  Note this has
 a different effect from setting the parameter to an empty string.
+
+If tt(MULTIBYTE) option is on and tt(IFS) contains invalid characters in
+the current locale, it is reset to the default.
 )
 vindex(KEYBOARD_HACK)
 item(tt(KEYBOARD_HACK))(
diff --git a/Src/params.c b/Src/params.c
index 2b0837e03..f5750a4b4 100644
--- a/Src/params.c
+++ b/Src/params.c
@@ -4748,6 +4748,7 @@ setlang(char *x)
 	if ((x = getsparam_u(ln->name)) && *x)
 	    setlocale(ln->category, x);
     unqueue_signals();
+    inittyptab();
 }
 
 /**/
@@ -4771,6 +4772,7 @@ lc_allsetfn(Param pm, char *x)
     else {
 	setlocale(LC_ALL, unmeta(x));
 	clear_mbstate();
+	inittyptab();
     }
 }
 
@@ -4809,6 +4811,7 @@ lcsetfn(Param pm, char *x)
     }
     unqueue_signals();
     clear_mbstate();	/* LC_CTYPE may have changed */
+    inittyptab();
 }
 #endif /* USE_LOCALE */
 
diff --git a/Src/utils.c b/Src/utils.c
index f13e3a79d..94a33453f 100644
--- a/Src/utils.c
+++ b/Src/utils.c
@@ -74,9 +74,6 @@ set_widearray(char *mb_array, Widechar_array wca)
     }
     wca->len = 0;
 
-    if (!isset(MULTIBYTE))
-	return;
-
     if (mb_array) {
 	VARARR(wchar_t, tmpwcs, strlen(mb_array));
 	wchar_t *wcptr = tmpwcs;
@@ -87,8 +84,7 @@ set_widearray(char *mb_array, Widechar_array wca)
 	    int mblen;
 
 	    if ((unsigned char) *mb_array <= 0x7f) {
-		mb_array++;
-		*wcptr++ = (wchar_t)*mb_array;
+		*wcptr++ = (wchar_t)*mb_array++;
 		continue;
 	    }
 
@@ -4121,8 +4117,9 @@ inittyptab(void)
      * having IIDENT here is a good idea at all, but this code
      * should disappear into history...
      */
-    for (t0 = 0240; t0 != 0400; t0++)
-	typtab[t0] = IALPHA | IALNUM | IIDENT | IUSER | IWORD;
+    if isset(MULTIBYTE)
+	for (t0 = 0240; t0 != 0400; t0++)
+	    typtab[t0] = IALPHA | IALNUM | IIDENT | IUSER | IWORD;
 #endif
     /* typtab['.'] |= IIDENT; */ /* Allow '.' in variable names - broken */
     typtab['_'] = IIDENT | IUSER;
@@ -4137,11 +4134,24 @@ inittyptab(void)
 	typtab[t0] |= ITOK | IMETA;
     for (t0 = (int) (unsigned char) Snull; t0 <= (int) (unsigned char) Nularg; t0++)
 	typtab[t0] |= ITOK | IMETA | INULL;
-    for (s = ifs ? ifs : EMULATION(EMULATE_KSH|EMULATE_SH) ?
-	DEFAULT_IFS_SH : DEFAULT_IFS; *s; s++) {
+     /* ifs */
+#define CURRENT_DEFAULT_IFS (EMULATION(EMULATE_KSH|EMULATE_SH) ? \
+			    DEFAULT_IFS_SH : DEFAULT_IFS)
+#ifdef MULTIBYTE_SUPPORT
+    if (isset(MULTIBYTE)) {
+	set_widearray(ifs ? ifs : CURRENT_DEFAULT_IFS, &ifs_wide);
+	if (ifs && !ifs_wide.chars) {
+	    zwarn("IFS has an invalid character; resetting IFS to default");
+	    zsfree(ifs);
+	    ifs = ztrdup(CURRENT_DEFAULT_IFS);
+	    set_widearray(ifs, &ifs_wide);
+	}
+    }
+#endif
+     for (s = ifs ? ifs : CURRENT_DEFAULT_IFS; *s; s++) {
 	int c = (unsigned char) (*s == Meta ? *++s ^ 32 : *s);
 #ifdef MULTIBYTE_SUPPORT
-	if (!isascii(c)) {
+	if (isset(MULTIBYTE) && !isascii(c)) {
 	    /* see comment for wordchars below */
 	    continue;
 	}
@@ -4154,10 +4164,15 @@ inittyptab(void)
 	}
 	typtab[c] |= ISEP;
     }
+    /* wordchars */
+#ifdef MULTIBYTE_SUPPORT
+    if (isset(MULTIBYTE))
+	set_widearray(wordchars, &wordchars_wide);
+#endif
     for (s = wordchars ? wordchars : DEFAULT_WORDCHARS; *s; s++) {
 	int c = (unsigned char) (*s == Meta ? *++s ^ 32 : *s);
 #ifdef MULTIBYTE_SUPPORT
-	if (!isascii(c)) {
+	if (isset(MULTIBYTE) && !isascii(c)) {
 	    /*
 	     * If we have support for multibyte characters, we don't
 	     * handle non-ASCII characters here; instead, we turn
@@ -4170,11 +4185,6 @@ inittyptab(void)
 #endif
 	typtab[c] |= IWORD;
     }
-#ifdef MULTIBYTE_SUPPORT
-    set_widearray(wordchars, &wordchars_wide);
-    set_widearray(ifs ? ifs : EMULATION(EMULATE_KSH|EMULATE_SH) ?
-	DEFAULT_IFS_SH : DEFAULT_IFS, &ifs_wide);
-#endif
     for (s = SPECCHARS; *s; s++)
 	typtab[(unsigned char) *s] |= ISPECIAL;
     if (typtab_flags & ZTF_SP_COMMA)
diff --git a/Test/D04parameter.ztst b/Test/D04parameter.ztst
index 2fd2f975f..0d44558a7 100644
--- a/Test/D04parameter.ztst
+++ b/Test/D04parameter.ztst
@@ -2280,6 +2280,27 @@ F:We do not care what $OLDPWD is, as long as it does not cause an error
 F:As of this writing, var=$@ and var="$@" with null IFS have unspecified
 F:behavior, see http://austingroupbugs.net/view.php?id=888
 
+  (
+  IFS=$'\x80'
+  if [[ $IFS = $' \t\n\0' ]]; then
+    echo OK     # if $'\x80' is illegal (e.g. Linux)
+  else          # otherwise (e.g. macOS), it should work as a separator
+    s=$'foo\x80\bar'
+    [[ ${${=s}[1]} = foo ]] && echo OK
+  fi
+  )
+0D:reset IFS to default if it contains illegal character
+>OK
+
+  (
+  unsetopt multibyte
+  IFS=$'\xc3\xa9'
+  s=$'foo\xc3bar\xa9boo'
+  echo ${${=s}[2]}
+  )
+0:eight bit chars in IFS should work if multibute option is off
+>bar
+
   () {
     setopt localoptions extendedglob
     [[ $- = [[:alnum:]]## ]] || print Failed 1