about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog5
-rw-r--r--Doc/Zsh/options.yo30
-rwxr-xr-xMisc/globtests1
-rw-r--r--Src/options.c8
-rw-r--r--Src/pattern.c4
-rw-r--r--Test/D02glob.ztst9
-rw-r--r--Test/D07multibyte.ztst38
7 files changed, 79 insertions, 16 deletions
diff --git a/ChangeLog b/ChangeLog
index 62f8cec19..cfc6373e3 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,10 @@
 2006-07-25  Peter Stephenson  <pws@csr.com>
 
+	* 22557: Doc/Zsh/options.yo, Misc/globtests, Src/options.c,
+	Src/pattern.c, Test/D02glob.ztst, Test/D07multibyte.ztst:
+	Turn on multibyte option by default for MULTIBYTE_SUPPORT and fix
+	tests and patterns.
+
 	* unposted: Src/pattern.c, Src/utils.c: minor typos in
 	22556 found when MULTIBYTE_SUPPORT is not defined.
 
diff --git a/Doc/Zsh/options.yo b/Doc/Zsh/options.yo
index 589ed79cb..02d8fa046 100644
--- a/Doc/Zsh/options.yo
+++ b/Doc/Zsh/options.yo
@@ -411,19 +411,31 @@ item(tt(MARK_DIRS) (tt(-8), ksh: tt(-X)))(
 Append a trailing `tt(/)' to all directory
 names resulting from filename generation (globbing).
 )
-pindex(MULTIBYTE)
+pindex(MULTIBYTE <D>)
 cindex(characters, multibyte, in expansion and globbing)
 cindex(multibyte characters, in expansion and globbing)
 item(tt(MULTIBYTE))(
-Respect multibyte characters when found during pattern matching.
-When this option is set, characters strings are examined using the
+Respect multibyte characters when found in strings.
+When this option is set, strings are examined using the
 system library to determine how many bytes form a character, depending
-on the current locale.  If the option is unset
-(or the shell was not compiled with the configuration option
-tt(MULTIBYTE_SUPPORT)) a single byte is always treated as a single
-character.  The option will eventually be extended to cover expansion.
-Note, however, that it does not affect the shellʼs editor, which always
-uses the locale to determine multibyte characters.
+on the current locale.  This affects the way characters are counted in
+pattern matching, parameter values and various delimiters.
+
+The option is on by default if the shell was compiled with
+tt(MULTIBYTE_SUPPORT); otherwise it is off by default and has no effect if
+turned on.
+
+If the option is off a single byte is always treated as a single
+character.  This setting is designed purely for examining strings
+known to contain raw bytes or other values that may not be characters
+in the current locale.  It is not necessary to unset the option merely
+because the character set for the current locale does not contain multibyte
+characters.
+
+The option does not affect the shell's editor,  which always uses the
+locale to determine multibyte characters.  This is because
+the character set displayed by the terminal emulator is independent of
+shell settings.
 )
 pindex(NOMATCH)
 cindex(globbing, no matches)
diff --git a/Misc/globtests b/Misc/globtests
index 232fe3daa..a5f7c4a00 100755
--- a/Misc/globtests
+++ b/Misc/globtests
@@ -182,6 +182,5 @@ f atest/path    *((#s)|/)test((#e)|/)*
 f path/testy    *((#s)|/)test((#e)|/)*
 f path/testy/ohyes *((#s)|/)test((#e)|/)*
 f path/atest/ohyes *((#s)|/)test((#e)|/)*
-t bjrn		*[]*
 EOT
 print "$failed tests failed."
diff --git a/Src/options.c b/Src/options.c
index 307bd5430..05e878687 100644
--- a/Src/options.c
+++ b/Src/options.c
@@ -166,7 +166,13 @@ static struct optname optns[] = {
 {{NULL, "markdirs",	      0},			 MARKDIRS},
 {{NULL, "menucomplete",	      0},			 MENUCOMPLETE},
 {{NULL, "monitor",	      OPT_SPECIAL},		 MONITOR},
-{{NULL, "multibyte",	      0/*TBD*/},		 MULTIBYTE},
+{{NULL, "multibyte",
+#ifdef MULTIBYTE_SUPPORT
+			      OPT_ALL
+#else
+			      0
+#endif
+			      },			 MULTIBYTE},
 {{NULL, "multios",	      OPT_EMULATE|OPT_ZSH},	 MULTIOS},
 {{NULL, "nomatch",	      OPT_EMULATE|OPT_NONBOURNE},NOMATCH},
 {{NULL, "notify",	      OPT_ZSH},			 NOTIFY},
diff --git a/Src/pattern.c b/Src/pattern.c
index 24077768d..9ae00ca94 100644
--- a/Src/pattern.c
+++ b/Src/pattern.c
@@ -343,7 +343,7 @@ metacharinc(char **x)
     /* Error.  Treat as single byte. */
     /* Reset the shift state for next time. */
     memset(&shiftstate, 0, sizeof(shiftstate));
-    return (wchar_t) *(*x)++;
+    return (wchar_t) STOUC(*(*x)++);
 }
 
 #else
@@ -595,7 +595,7 @@ patcompile(char *exp, int inflags, char **endexp)
 			while (oplen--) {
 			    if (imeta(*opnd)) {
 				*dst++ = Meta;
-				*dst++ = *opnd ^ 32;
+				*dst++ = *opnd++ ^ 32;
 			    } else {
 				*dst++ = *opnd++;
 			    }
diff --git a/Test/D02glob.ztst b/Test/D02glob.ztst
index 409a73e30..7c76414f0 100644
--- a/Test/D02glob.ztst
+++ b/Test/D02glob.ztst
@@ -6,7 +6,9 @@
   mkdir glob.tmp/dir3/subdir
   : >glob.tmp/{,{dir1,dir2}/}{a,b,c}
 
-  globtest () { $ZTST_testdir/../Src/zsh -f $ZTST_srcdir/../Misc/$1 }
+  globtest () {
+    $ZTST_testdir/../Src/zsh -f $ZTST_srcdir/../Misc/$1
+  }
 
   regress_absolute_path_and_core_dump() {
     local absolute_dir=$(cd glob.tmp && pwd -P)
@@ -175,7 +177,6 @@
 >1:  [[ path/testy = *((#s)|/)test((#e)|/)* ]]
 >1:  [[ path/testy/ohyes = *((#s)|/)test((#e)|/)* ]]
 >1:  [[ path/atest/ohyes = *((#s)|/)test((#e)|/)* ]]
->0:  [[ bjrn = *[]* ]]
 >0 tests failed.
 
   globtest globtests.ksh
@@ -263,6 +264,10 @@
 >0:  [[ Modules = (#i)*m* ]]
 >0 tests failed.
 
+ (unsetopt multibyte
+ [[ bjrn = *[]* ]])
+0:single byte match with top bit set
+
   ( regress_absolute_path_and_core_dump )
 0:exclusions regression test
 >
diff --git a/Test/D07multibyte.ztst b/Test/D07multibyte.ztst
index 683e8350e..263a7a44e 100644
--- a/Test/D07multibyte.ztst
+++ b/Test/D07multibyte.ztst
@@ -176,7 +176,7 @@
 ?(eval):1: command not found: hähä=3
 
   foo="Ølaf«Ødd«øpénëd«ån«àpple"
-  print -l ${(s.«.)foo}         
+  print -l ${(s.«.)foo}
   ioh="Ἐν ἀρχῇ ἦν ὁ λόγος, καὶ ὁ λόγος ἦν πρὸς τὸν θεόν, καὶ θεὸς ἦν ὁ λόγος."
   print -l ${=ioh}
   print ${(w)#ioh}
@@ -228,3 +228,39 @@
 0:read multibyte characters
 <«»ignored
 >«»
+
+  # See if the system grokks first-century Greek...
+  ioh="Ἐν ἀρχῇ ἦν ὁ λόγος, καὶ ὁ λόγος ἦν πρὸς τὸν θεόν, καὶ θεὸς ἦν ὁ λόγος."
+  for (( i = 1; i <= ${#ioh}; i++ )); do
+    # FC3 doesn't recognise ῇ (U+1FC7: Greek small letter eta with
+    # perispomeni and ypogegrammeni, of course) as a lower case character.
+    if [[ $ioh[i] != [[:lower:]] && $i != 7 ]]; then
+      for tp in upper space punct invalid; do
+        if [[ $tp = invalid || $ioh[i] = [[:${tp}:]] ]]; then
+          print "$i: $tp"
+	  break
+	fi
+      done
+    fi
+  done
+0:isw* functions on non-ASCII wide characters
+>1: upper
+>3: space
+>8: space
+>11: space
+>13: space
+>19: punct
+>20: space
+>24: space
+>26: space
+>32: space
+>35: space
+>40: space
+>44: space
+>49: punct
+>50: space
+>54: space
+>59: space
+>62: space
+>64: space
+>70: punct