22544: Improve use of ztype tests for multibyte characters. Add

POSIX_IDENTIFIERS option to control allowability of multibyte alphanumeric characters in parameter and module names.
author: Peter Stephenson <pws@users.sourceforge.net> 2006-07-10 13:08:22 +0000
committer: Peter Stephenson <pws@users.sourceforge.net> 2006-07-10 13:08:22 +0000
commit: 4a67f2479892fda348546404216270aaaff523ea (patch)
tree: 3157f967e4324cdf147aa656c021e55e96f29731
parent: 272256f5d6f4748aed680256589a67713e517383 (diff)
download: zsh-4a67f2479892fda348546404216270aaaff523ea.tar.gz
zsh-4a67f2479892fda348546404216270aaaff523ea.tar.xz
zsh-4a67f2479892fda348546404216270aaaff523ea.zip
18 files changed, 270 insertions, 110 deletions
diff --git a/ChangeLog b/ChangeLog
index 7ab600179..a51b5909b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,13 @@
+2006-07-10  Peter Stephenson  <pws@csr.com>
+
+	* 22544: README, Doc/Zsh/options.yo, Src/builtin.c, Src/glob.c,
+	Src/lex.c, Src/math.c, Src/module.c, Src/options.c, Src/params.c,
+	Src/parse.c, Src/subst.c, Src/utils.c, Src/zsh.h, Src/ztype.h,
+	Src/Zle/compcore.c, Src/Zle/zle_tricky.c, Test/D07multibyte.ztst:
+	Improve use of ztype tests for multibyte characters.  Add
+	POSIX_IDENTIFIERS option to control allowability of multibyte
+	alphanumeric characters in parameter and module names.
+
 2006-07-09  Clint Adams  <clint@zsh.org>
 
 	* 22543: Completion/Unix/Command/_cssh: completion for
diff --git a/Doc/Zsh/options.yo b/Doc/Zsh/options.yo
index 0fb87302e..589ed79cb 100644
--- a/Doc/Zsh/options.yo
+++ b/Doc/Zsh/options.yo
@@ -1204,6 +1204,27 @@ tt(times),
 tt(trap) and
 tt(unset).
 )
+pindex(POSIX_IDENTIFIERS)
+cindex(identifiers, non-portable characters in)
+cindex(parameter names, non-portable characters in)
+item(tt(POSIX_IDENTIFIERS) <K> <S>)(
+When this option is set, only the ASCII characters tt(a) to tt(z), tt(A) to
+tt(Z), tt(0) to tt(9) and tt(_) may be used in identifiers (names
+of shell parameters and modules).
+
+When the option is unset and multibyte character support is enabled (i.e. it
+is compiled in and the option tt(MULTIBYTE) is set), then additionally any
+alphanumeric characters in the local character set may be used in
+identifiers.  Note that scripts and functions written with this feature are
+not portable, and also that both options must be set before the script
+or function is parsed; setting them during execution is not sufficient
+as the syntax var(variable)tt(=)var(value) has already been parsed as
+a command rather than an assignment.
+
+If multibyte character support is not compiled into the shell this option is
+ignored; all octets with the top bit set may be used in identifiers.
+This is non-standard but is the traditional zsh behaviour.
+)
 pindex(SH_FILE_EXPANSION)
 cindex(sh, expansion style)
 cindex(expansion style, sh)
diff --git a/README b/README
index 91e1c1c40..c2d4dff88 100644
--- a/README
+++ b/README
@@ -50,11 +50,23 @@ other shell at startup; it must be present in the environment or set
 subsequently by the user.  It is valid for the variable to be unset.
 
 Zsh has previously been lax about whether it allows octets with the
-top bit set to be part of a shell identifier.  With --enable-multibyte set,
-this is now completely disabled.  This is a temporary fix until the main
-shell handles multibyte characters properly and the appropriate library
-tests can be used.  This change may be reviewed if no such permanent fix
-is forthcoming.
+top bit set to be part of a shell identifier.  Older versions of the shell
+assumed all such octets were allowed in identifiers, however the POSIX
+standard does not allow such characters in identifiers.  The older
+behaviour is still obtained with --disable-multibyte in effect.
+With --enable-multibyte set there are three possible cases:
+  MULTIBYTE option unset:  only ASCII characters are allowed; the
+    shell does not attempt to identify non-ASCII characters at all.
+  MULTIBYTE option set, POSIX_IDENTIFIERS option unset: in addition
+    to the POSIX characters, any alphanumeric characters in the
+    local character set are allowed.  Note that scripts and functions that
+    take advantage of this are non-portable; however, this is in the spirit
+    of previous versions of the shell.  Note also that the options must
+    be set before the shell parses the script or function; setting
+    them during execution is not sufficient.
+  MULITBYTE option set, POSIX_IDENTIFIERS set:  only ASCII characters
+    are allowed in identifiers even though the shell will recognise
+    alphanumeric multibyte characters.
 
 The completion style pine-directory must now be set to use completion
 for PINE mailbox folders; previously it had the default ~/mail.  This
diff --git a/Src/Zle/compcore.c b/Src/Zle/compcore.c
index 008f49185..38b1934e2 100644
--- a/Src/Zle/compcore.c
+++ b/Src/Zle/compcore.c
@@ -1081,7 +1081,7 @@ check_param(char *s, int set, int test)
     }
     if ((*p == String || *p == Qstring) && p[1] != Inpar && p[1] != Inbrack) {
 	/* This is really a parameter expression (not $(...) or $[...]). */
-	char *b = p + 1, *e = b;
+	char *b = p + 1, *e = b, *ie;
 	int n = 0, br = 1, nest = 0;
 
 	if (*b == Inbrace) {
@@ -1124,10 +1124,16 @@ check_param(char *s, int set, int test)
 	else if (idigit(*e))
 	    while (idigit(*e))
 		e++;
-	else if (iident(*e))
-	    while (iident(*e) ||
-		   (comppatmatch && *comppatmatch && (*e == Star || *e == Quest)))
-		e++;
+	else if ((ie = itype_end(e, IIDENT, 0)) != e) {
+	    do {
+		e = ie;
+		if (comppatmatch && *comppatmatch &&
+		    (*e == Star || *e == Quest))
+		    ie = e + 1;
+		else
+		    ie = itype_end(e, IIDENT, 0);
+	    } while (ie != e);
+	}
 
 	/* Now make sure that the cursor is inside the name. */
 	if (offs <= e - s && offs >= b - s && n <= 0) {
diff --git a/Src/Zle/zle_tricky.c b/Src/Zle/zle_tricky.c
index 250804648..28857b03e 100644
--- a/Src/Zle/zle_tricky.c
+++ b/Src/Zle/zle_tricky.c
@@ -551,9 +551,8 @@ parambeg(char *s)
 	else if (idigit(*e))
 	    while (idigit(*e))
 		e++;
-	else if (iident(*e))
-	    while (iident(*e))
-		e++;
+	else
+	    e = itype_end(e, IIDENT, 0);
 
 	/* Now make sure that the cursor is inside the name. */
 	if (offs <= e - s && offs >= b - s && n <= 0) {
@@ -740,8 +739,7 @@ docomplete(int lst)
 			    else if (idigit(*q))
 				do q++; while (idigit(*q));
 			    else
-				while (iident(*q))
-				    q++;
+				q = itype_end(q, IIDENT, 0);
 			    sav = *q;
 			    *q = '\0';
 			    if (zlemetacs - wb == q - s &&
@@ -1293,7 +1291,7 @@ get_comp_string(void)
 	if (varq)
 	    tt = clwords[clwpos];
 
-	for (s = tt; iident(*s); s++);
+	s = itype_end(tt, IIDENT, 0);
 	sav = *s;
 	*s = '\0';
 	zsfree(varname);
@@ -1360,17 +1358,29 @@ get_comp_string(void)
      * as being in math.                                              */
     if (inwhat != IN_MATH) {
 	int i = 0;
-	char *nnb = (iident(*s) ? s : s + 1), *nb = NULL, *ne = NULL;
-	
-	for (tt = s; ++tt < s + zlemetacs - wb;)
+	char *nnb, *nb = NULL, *ne = NULL;
+
+	MB_METACHARINIT();
+	if (itype_end(s, IIDENT, 1) == s)
+	    nnb = s + MB_METACHARLEN(s);
+	else
+	    nnb = s;
+	for (tt = s; tt < s + zlemetacs - wb;) {
 	    if (*tt == Inbrack) {
 		i++;
 		nb = nnb;
 		ne = tt;
-	    } else if (i && *tt == Outbrack)
+		tt++;
+	    } else if (i && *tt == Outbrack) {
 		i--;
-	    else if (!iident(*tt))
-		nnb = tt + 1;
+		tt++;
+	    } else {
+		int nclen = MB_METACHARLEN(tt);
+		if (itype_end(tt, IIDENT, 1) == tt)
+		    nnb = tt + nclen;
+		tt += nclen;
+	    }
+	}
 	if (i) {
 	    inwhat = IN_MATH;
 	    insubscr = 1;
@@ -1415,33 +1425,59 @@ get_comp_string(void)
 	    /* In mathematical expression, we complete parameter names  *
 	     * (even if they don't have a `$' in front of them).  So we *
 	     * have to find that name.                                  */
-	    for (we = zlemetacs; iident(zlemetaline[we]); we++);
-	    for (wb = zlemetacs; --wb >= 0 && iident(zlemetaline[wb]););
-	    wb++;
+	    char *cspos = zlemetaline + zlemetacs, *wptr, *cptr;
+	    we = itype_end(cspos, IIDENT, 0) - cspos;
+
+	    /*
+	     * With multibyte characters we need to go forwards,
+	     * so start at the beginning of the line and continue
+	     * until cspos.
+	     */
+	    wptr = cptr = zlemetaline;
+	    for (;;) {
+		cptr = itype_end(wptr, IIDENT, 0);
+		if (cptr == wptr) {
+		    /* not an ident character */
+		    wptr = (cptr += MB_METACHARLEN(cptr));
+		}
+		if (cptr >= cspos) {
+		    wb = wptr - zlemetaline;
+		    break;
+		}
+	    }
 	}
 	zsfree(s);
 	s = zalloc(we - wb + 1);
 	strncpy(s, zlemetaline + wb, we - wb);
 	s[we - wb] = '\0';
-	if (wb > 2 && zlemetaline[wb - 1] == '[' &&
-	    iident(zlemetaline[wb - 2])) {
-	    int i = wb - 3;
-	    char sav = zlemetaline[wb - 1];
 
-	    while (i >= 0 && iident(zlemetaline[i]))
-		i--;
+	if (wb > 2 && zlemetaline[wb - 1] == '[') {
+	    char *sqbr = zlemetaline + wb - 1, *cptr, *wptr;
 
-	    zlemetaline[wb - 1] = '\0';
-	    zsfree(varname);
-	    varname = ztrdup(zlemetaline + i + 1);
-	    zlemetaline[wb - 1] = sav;
-	    if ((keypm = (Param) paramtab->getnode(paramtab, varname)) &&
-		(keypm->node.flags & PM_HASHED)) {
-		if (insubscr != 3)
-		    insubscr = 2;
-	    } else
-		insubscr = 1;
+	    /* Need to search forward for word characters */
+	    cptr = wptr = zlemetaline;
+	    for (;;) {
+		cptr = itype_end(wptr, IIDENT, 0);
+		if (cptr == wptr) {
+		    /* not an ident character */
+		    wptr = (cptr += MB_METACHARLEN(cptr));
+		}
+		if (cptr >= sqbr)
+		    break;
+	    }
+
+	    if (wptr < sqbr) {
+		zsfree(varname);
+		varname = ztrduppfx(wptr, sqbr - wptr);
+		if ((keypm = (Param) paramtab->getnode(paramtab, varname)) &&
+		    (keypm->node.flags & PM_HASHED)) {
+		    if (insubscr != 3)
+			insubscr = 2;
+		} else
+		    insubscr = 1;
+	    }
 	}
+
 	parse_subst_string(s);
     }
     /* This variable will hold the current word in quoted form. */
@@ -1562,12 +1598,12 @@ get_comp_string(void)
 			*tp == '@')
 			p++, i++;
 		    else {
+			char *ie;
 			if (idigit(*tp))
 			    while (idigit(*tp))
 				tp++;
-			else if (iident(*tp))
-			    while (iident(*tp))
-				tp++;
+			else if ((ie = itype_end(tp, IIDENT, 0)) != tp)
+			    tp = ie;
 			else {
 			    tt = NULL;
 			    break;
diff --git a/Src/builtin.c b/Src/builtin.c
index ff396fb47..71dcbffc3 100644
--- a/Src/builtin.c
+++ b/Src/builtin.c
@@ -2629,9 +2629,7 @@ bin_functions(char *name, char **argv, Options ops, int func)
 	    char *modname = NULL;
 	    char *ptr;
 
-	    for (ptr = funcname; *ptr; ptr++)
-		if (!iident(*ptr))
-		    break;
+	    ptr = itype_end(funcname, IIDENT, 0);
 	    if (idigit(*funcname) || funcname == ptr || *ptr) {
 		zwarnnam(name, "-M %s: bad math function name", funcname);
 		return 1;
diff --git a/Src/glob.c b/Src/glob.c
index 3a1b28784..26b288efc 100644
--- a/Src/glob.c
+++ b/Src/glob.c
@@ -1443,9 +1443,7 @@ zglob(LinkList list, LinkNode np, int nountok)
 
 		    if (s[-1] == '+') {
 			plus = 0;
-			tt = s;
-			while (iident(*tt))
-			    tt++;
+			tt = itype_end(s, IIDENT, 0);
 			if (tt == s)
 			{
 			    zerr("missing identifier after `+'");
diff --git a/Src/lex.c b/Src/lex.c
index 635e847d2..57b752309 100644
--- a/Src/lex.c
+++ b/Src/lex.c
@@ -1135,10 +1135,13 @@ gettokstr(int c, int sub)
 		if (idigit(*t))
 		    while (++t < bptr && idigit(*t));
 		else {
-		    while (iident(*t) && ++t < bptr);
+		    int sav = *bptr;
+		    *bptr = '\0';
+		    t = itype_end(t, IIDENT, 0);
 		    if (t < bptr) {
-			*bptr = '\0';
 			skipparens(Inbrack, Outbrack, &t);
+		    } else {
+			*bptr = sav;
 		    }
 		}
 		if (*t == '+')
diff --git a/Src/math.c b/Src/math.c
index bd48288ec..fe5f7b74b 100644
--- a/Src/math.c
+++ b/Src/math.c
@@ -265,11 +265,12 @@ zzlex(void)
 {
     int cct = 0;
     yyval.type = MN_INTEGER;
+    char *ie;
 
     for (;; cct = 0)
 	switch (*ptr++) {
 	case '+':
-	    if (*ptr == '+' && (unary || !ialnum(*ptr))) {
+	    if (*ptr == '+') {
 		ptr++;
 		return (unary) ? PREPLUS : POSTPLUS;
 	    }
@@ -279,7 +280,7 @@ zzlex(void)
 	    }
 	    return (unary) ? UPLUS : PLUS;
 	case '-':
-	    if (*ptr == '-' && (unary || !ialnum(*ptr))) {
+	    if (*ptr == '-') {
 		ptr++;
 		return (unary) ? PREMINUS : POSTMINUS;
 	    }
@@ -469,12 +470,12 @@ zzlex(void)
 		}
 		cct = 1;
 	    }
-	    if (iident(*ptr)) {
+	    if ((ie = itype_end(ptr, IIDENT, 0)) != ptr) {
 		int func = 0;
 		char *p;
 
 		p = ptr;
-		while (iident(*++ptr));
+		ptr = ie;
 		if (*ptr == '[' || (!cct && *ptr == '(')) {
 		    char op = *ptr, cp = ((*ptr == '[') ? ']' : ')');
 		    int l;
diff --git a/Src/module.c b/Src/module.c
index fde78ac1d..398958cce 100644
--- a/Src/module.c
+++ b/Src/module.c
@@ -734,12 +734,8 @@ static int
 modname_ok(char const *p)
 {
     do {
-	if(*p != '_' && !ialnum(*p))
-	    return 0;
-	do {
-	    p++;
-	} while(*p == '_' || ialnum(*p));
-	if(!*p)
+	p = itype_end(p, IIDENT, 0);
+	if (!*p)
 	    return 1;
     } while(*p++ == '/');
     return 0;
diff --git a/Src/options.c b/Src/options.c
index 6c3fb26d1..307bd5430 100644
--- a/Src/options.c
+++ b/Src/options.c
@@ -176,6 +176,7 @@ static struct optname optns[] = {
 {{NULL, "overstrike",	      0},			 OVERSTRIKE},
 {{NULL, "pathdirs",	      OPT_EMULATE},		 PATHDIRS},
 {{NULL, "posixbuiltins",      OPT_EMULATE|OPT_BOURNE},	 POSIXBUILTINS},
+{{NULL, "posixidentifiers",   OPT_EMULATE|OPT_BOURNE},	 POSIXIDENTIFIERS},
 {{NULL, "printeightbit",      0},                        PRINTEIGHTBIT},
 {{NULL, "printexitvalue",     0},			 PRINTEXITVALUE},
 {{NULL, "privileged",	      OPT_SPECIAL},		 PRIVILEGED},
diff --git a/Src/params.c b/Src/params.c
index f589a740e..17ce2c54d 100644
--- a/Src/params.c
+++ b/Src/params.c
@@ -899,9 +899,7 @@ isident(char *s)
 		break;
     } else {
 	/* Find the first character in `s' not in the iident type table */
-	for (ss = s; *ss; ss++)
-	    if (!iident(*ss))
-		break;
+	ss = itype_end(s, IIDENT, 0);
     }
 
     /* If the next character is not [, then it is *
@@ -1653,7 +1651,7 @@ getvalue(Value v, char **pptr, int bracks)
 mod_export Value
 fetchvalue(Value v, char **pptr, int bracks, int flags)
 {
-    char *s, *t;
+    char *s, *t, *ie;
     char sav, c;
     int ppar = 0;
 
@@ -1665,9 +1663,8 @@ fetchvalue(Value v, char **pptr, int bracks, int flags)
 	else
 	    ppar = *s++ - '0';
     }
-    else if (iident(c))
-	while (iident(*s))
-	    s++;
+    else if ((ie = itype_end(s, IIDENT, 0)) != s)
+	s = ie;
     else if (c == Quest)
 	*s++ = '?';
     else if (c == Pound)
@@ -1732,7 +1729,7 @@ fetchvalue(Value v, char **pptr, int bracks, int flags)
 		return v;
 	    }
 	} else if (!(flags & SCANPM_ASSIGNING) && v->isarr &&
-		   iident(*t) && isset(KSHARRAYS))
+		   itype_end(t, IIDENT, 1) != t && isset(KSHARRAYS))
 	    v->end = 1, v->isarr = 0;
     }
     if (!bracks && *s)
diff --git a/Src/parse.c b/Src/parse.c
index 61ef9dcdc..17a24f2e9 100644
--- a/Src/parse.c
+++ b/Src/parse.c
@@ -1603,10 +1603,7 @@ par_simple(int *complex, int nr)
 
 		if (*ptr == Outbrace && ptr > tokstr + 1)
 		{
-		    while (--ptr > tokstr)
-			if (!iident(*ptr))
-			    break;
-		    if (ptr == tokstr)
+		    if (itype_end(tokstr, IIDENT, 0) >= ptr - 1)
 		    {
 			char *toksave = tokstr;
 			char *idstring = dupstrpfx(tokstr+1, eptr-tokstr-1);
diff --git a/Src/subst.c b/Src/subst.c
index 12df115a0..821c1c79a 100644
--- a/Src/subst.c
+++ b/Src/subst.c
@@ -475,15 +475,14 @@ filesubstr(char **namptr, int assign)
 		return 0;
 	    *namptr = dyncat(ds, ptr);
 	    return 1;
-	} else if (iuser(str[1])) {   /* ~foo */
-	    char *ptr, *hom, save;
+	} else if ((ptr = itype_end(str+1, IUSER, 0)) != str+1) {   /* ~foo */
+	    char *hom, save;
 
-	    for (ptr = ++str; *ptr && iuser(*ptr); ptr++);
 	    save = *ptr;
 	    if (!isend(save))
 		return 0;
 	    *ptr = 0;
-	    if (!(hom = getnameddir(str))) {
+	    if (!(hom = getnameddir(++str))) {
 		if (isset(NOMATCH))
 		    zerr("no such user or named directory: %s", str);
 		*ptr = save;
@@ -1146,9 +1145,10 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
      * Shouldn't this be a table or something?  We test for all
      * these later on, too.
      */
-    if (!ialnum(c = *s) && c != '#' && c != Pound && c != '-' &&
-	c != '!' && c != '$' && c != String && c != Qstring &&
-	c != '?' && c != Quest && c != '_' &&
+    c = *s;
+    if (itype_end(s, IIDENT, 1) == s && *s != '#' && c != Pound &&
+	c != '-' && c != '!' && c != '$' && c != String && c != Qstring &&
+	c != '?' && c != Quest &&
 	c != '*' && c != Star && c != '@' && c != '{' &&
 	c != Inbrace && c != '=' && c != Equals && c != Hat &&
 	c != '^' && c != '~' && c != Tilde && c != '+') {
@@ -1446,8 +1446,8 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
 	    } else
 		spbreak = 2;
 	} else if ((c == '#' || c == Pound) &&
-		   (iident(cc = s[1])
-		    || cc == '*' || cc == Star || cc == '@'
+		   (itype_end(s+1, IIDENT, 0) != s + 1
+		    || (cc = s[1]) == '*' || cc == Star || cc == '@'
 		    || cc == '-' || (cc == ':' && s[2] == '-')
 		    || (isstring(cc) && (s[2] == Inbrace || s[2] == Inpar)))) {
 	    getlen = 1 + whichlen, s++;
@@ -1471,7 +1471,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
 	     * Try to handle this when parameter is named
 	     * by (P) (second part of test).
 	     */
-	    if (iident(s[1]) || (aspar && isstring(s[1]) &&
+	    if (itype_end(s+1, IIDENT, 0) != s+1 || (aspar && isstring(s[1]) &&
 				 (s[2] == Inbrace || s[2] == Inpar)))
 		chkset = 1, s++;
 	    else if (!inbrace) {
diff --git a/Src/utils.c b/Src/utils.c
index 75a736596..0d6cd8866 100644
--- a/Src/utils.c
+++ b/Src/utils.c
@@ -1921,7 +1921,7 @@ spckword(char **s, int hist, int cmd, int ask)
 	return;
     if (**s == String && !*t) {
 	guess = *s + 1;
-	if (*t || !ialpha(*guess))
+	if (itype_end(guess, IIDENT, 1) == guess)
 	    return;
 	ic = String;
 	d = 100;
@@ -2750,11 +2750,8 @@ wcsiword(wchar_t c)
  * iident() macro extended to support wide characters.
  *
  * The macro is intended to test if a character is allowed in an
- * internal zsh identifier.  Until the main shell handles multibyte
- * characters it's not a good idea to allow characters other than
- * ASCII characters; it would cause zle to allow characters that
- * the main shell would reject.  Eventually we should be able
- * to allow all alphanumerics.
+ * internal zsh identifier.  We allow all alphanumerics outside
+ * the ASCII range unless POSIXIDENTIFIERS is set.
  *
  * Otherwise similar to wcsiword.
  */
@@ -2774,14 +2771,90 @@ wcsiident(wchar_t c)
     } else if (len == 1 && iascii(*outstr)) {
 	return iident(*outstr);
     } else {
-	/* TODO: not currently allowed, see above */
-	return 0;
+	return !isset(POSIXIDENTIFIERS) && iswalnum(c);
     }
 }
 /**/
 #endif
 
 
+/*
+ * Find the end of a set of characters in the set specified by itype;
+ * one of IALNUM, IIDENT, IWORD or IUSER.  For non-ASCII characters, we assume
+ * alphanumerics are part of the set, with the exception that
+ * identifiers are not treated that way if POSIXIDENTIFIERS is set.
+ *
+ * See notes above for identifiers.
+ * Returns the same pointer as passed if not on an identifier character.
+ * If "once" is set, just test the first character, i.e. (outptr !=
+ * inptr) tests whether the first character is valid in an identifier.
+ *
+ * Currently this is only called with itype IIDENT or IUSER.
+ */
+
+/**/
+mod_export char *
+itype_end(const char *ptr, int itype, int once)
+{
+#ifdef MULTIBYTE_SUPPORT
+    if (isset(MULTIBYTE) &&
+	(itype != IIDENT || !isset(POSIXIDENTIFIERS))) {
+	mb_metacharinit();
+	while (*ptr) {
+	    wint_t wc;
+	    int len = mb_metacharlenconv(ptr, &wc);
+
+	    if (!len)
+		break;
+
+	    if (wc == WEOF) {
+		/* invalid, treat as single character */
+		int chr = STOUC(*ptr == Meta ? ptr[1] ^ 32 : *ptr);
+		/* in this case non-ASCII characters can't match */
+		if (chr > 127 || !zistype(chr,itype))
+		    break;
+	    } else if (len == 1 && iascii(*ptr)) {
+		/* ASCII: can't be metafied, use standard test */
+		if (!zistype(*ptr,itype))
+		    break;
+	    } else {
+		/*
+		 * Valid non-ASCII character.  Allow all alphanumerics;
+		 * if testing for words, allow all wordchars.
+		 */
+		if (!(iswalnum(wc) ||
+		      (itype == IWORD && wcschr(wordchars_wide, wc))))
+		    break;
+	    }
+	    ptr += len;
+
+	    if (once)
+		break;
+	}
+    } else
+#endif
+	for (;;) {
+	    int chr = STOUC(*ptr == Meta ? ptr[1] ^ 32 : *ptr);
+	    if (!zistype(chr,itype))
+		break;
+	    ptr += (*ptr == Meta) ? 2 : 1;
+
+	    if (once)
+		break;
+	}
+
+    /*
+     * Nasty.  The first argument is const char * because we
+     * don't modify it here.  However, we really want to pass
+     * back the same type as was passed down, to allow idioms like
+     *   p = itype_end(p, IIDENT, 0);
+     * So returning a const char * isn't really the right thing to do.
+     * Without having two different functions the following seems
+     * to be the best we can do.
+     */
+    return (char *)ptr;
+}
+
 /**/
 mod_export char **
 arrdup(char **s)
@@ -3710,9 +3783,10 @@ mb_metacharinit(void)
 
 /**/
 int
-mb_metacharlenconv(char *s, wint_t *wcp)
+mb_metacharlenconv(const char *s, wint_t *wcp)
 {
-    char inchar, *ptr;
+    char inchar;
+    const char *ptr;
     size_t ret;
     wchar_t wc;
 
diff --git a/Src/zsh.h b/Src/zsh.h
index 8554b5c96..b5f675db5 100644
--- a/Src/zsh.h
+++ b/Src/zsh.h
@@ -1610,6 +1610,7 @@ enum {
     OVERSTRIKE,
     PATHDIRS,
     POSIXBUILTINS,
+    POSIXIDENTIFIERS,
     PRINTEIGHTBIT,
     PRINTEXITVALUE,
     PRIVILEGED,
diff --git a/Src/ztype.h b/Src/ztype.h
index b8bc584db..7b7973602 100644
--- a/Src/ztype.h
+++ b/Src/ztype.h
@@ -42,22 +42,22 @@
 #define IMETA    (1 << 12)
 #define IWSEP    (1 << 13)
 #define INULL    (1 << 14)
-#define _icom(X,Y) (typtab[STOUC(X)] & Y)
-#define idigit(X) _icom(X,IDIGIT)
-#define ialnum(X) _icom(X,IALNUM)
-#define iblank(X) _icom(X,IBLANK)	/* blank, not including \n */
-#define inblank(X) _icom(X,INBLANK)	/* blank or \n */
-#define itok(X) _icom(X,ITOK)
-#define isep(X) _icom(X,ISEP)
-#define ialpha(X) _icom(X,IALPHA)
-#define iident(X) _icom(X,IIDENT)
-#define iuser(X) _icom(X,IUSER)	/* username char */
-#define icntrl(X) _icom(X,ICNTRL)
-#define iword(X) _icom(X,IWORD)
-#define ispecial(X) _icom(X,ISPECIAL)
-#define imeta(X) _icom(X,IMETA)
-#define iwsep(X) _icom(X,IWSEP)
-#define inull(X) _icom(X,INULL)
+#define zistype(X,Y) (typtab[STOUC(X)] & Y)
+#define idigit(X) zistype(X,IDIGIT)
+#define ialnum(X) zistype(X,IALNUM)
+#define iblank(X) zistype(X,IBLANK)	/* blank, not including \n */
+#define inblank(X) zistype(X,INBLANK)	/* blank or \n */
+#define itok(X) zistype(X,ITOK)
+#define isep(X) zistype(X,ISEP)
+#define ialpha(X) zistype(X,IALPHA)
+#define iident(X) zistype(X,IIDENT)
+#define iuser(X) zistype(X,IUSER)	/* username char */
+#define icntrl(X) zistype(X,ICNTRL)
+#define iword(X) zistype(X,IWORD)
+#define ispecial(X) zistype(X,ISPECIAL)
+#define imeta(X) zistype(X,IMETA)
+#define iwsep(X) zistype(X,IWSEP)
+#define inull(X) zistype(X,INULL)
 
 #define iascii(X) isascii(STOUC(X))
 #define ilower(X) islower(STOUC(X))
diff --git a/Test/D07multibyte.ztst b/Test/D07multibyte.ztst
index c30bf1172..85aa24ade 100644
--- a/Test/D07multibyte.ztst
+++ b/Test/D07multibyte.ztst
@@ -165,3 +165,12 @@
 >165
 >163
 >945 945
+
+  unsetopt posix_identifiers
+  expr='hähä=3 || exit 1; print $hähä'
+  eval $expr
+  setopt posix_identifiers
+  (eval $expr)
+1:POSIX_IDENTIFIERS option
+>3
+?(eval):1: command not found: hähä=3
author	Peter Stephenson <pws@users.sourceforge.net>	2006-07-10 13:08:22 +0000
committer	Peter Stephenson <pws@users.sourceforge.net>	2006-07-10 13:08:22 +0000
commit	4a67f2479892fda348546404216270aaaff523ea (patch)
tree	3157f967e4324cdf147aa656c021e55e96f29731
parent	272256f5d6f4748aed680256589a67713e517383 (diff)
download	zsh-4a67f2479892fda348546404216270aaaff523ea.tar.gz zsh-4a67f2479892fda348546404216270aaaff523ea.tar.xz zsh-4a67f2479892fda348546404216270aaaff523ea.zip