25138(? mailing list stuck): rewrite of completion matching.

Will one day use multibyte/wide characters, doesn't yet.
author: Peter Stephenson <pws@users.sourceforge.net> 2008-06-08 17:53:53 +0000
committer: Peter Stephenson <pws@users.sourceforge.net> 2008-06-08 17:53:53 +0000
commit: bb68ee8db7971b683fba7dd7bf404186872ba7cf (patch)
tree: ce1f5ebe661c12386675eb3ca98521280d521a0b /Src
parent: 2dcc8627c9f4be547e6e49c76d45bea70c714a71 (diff)
download: zsh-bb68ee8db7971b683fba7dd7bf404186872ba7cf.tar.gz
zsh-bb68ee8db7971b683fba7dd7bf404186872ba7cf.tar.xz
zsh-bb68ee8db7971b683fba7dd7bf404186872ba7cf.zip
6 files changed, 1468 insertions, 287 deletions
diff --git a/Src/Zle/comp.h b/Src/Zle/comp.h
index bcad3df05..7ac051c25 100644
--- a/Src/Zle/comp.h
+++ b/Src/Zle/comp.h
@@ -162,12 +162,49 @@ struct cmatcher {
 #define CMF_RIGHT 4
 #define CMF_INTER 8
 
+/*
+ * Types of cpattern structure.
+ * Note freecpattern() assumes any <= CPAT_EQUIV have string.
+ */
+enum {
+    CPAT_CCLASS,		/* [...]: ordinary character class */
+    CPAT_NCLASS,		/* [!...]: ordinary character class, negated */
+    CPAT_EQUIV,			/* {...}: equivalence class */
+    CPAT_ANY,			/* ?: any character */
+    CPAT_CHAR			/* Single character given explicitly */
+};
+
+/*
+ * A pattern element in a matcher specification.
+ * Unlike normal patterns this only presents one character in
+ * either the test completion or the word on the command line.
+ */
 struct cpattern {
     Cpattern next;		/* next sub-pattern */
-    unsigned char tab[256];	/* table of matched characters */
-    int equiv;			/* if this is a {...} class */
+    int tp;			/* type of object as above */
+    union {
+	char *str;		/* if a character class, the objects
+				 * in it in a similar form to normal
+				 * pattern matching (a metafied string
+				 * with tokens).
+				 * Note the allocated length may be longer
+				 * than the null-terminated string.
+				 */
+	int chr;		/* if a single character, it
+				 * TODO: eventually should be a
+				 * convchar_t.
+				 */
+    } u;
 };
 
+/*
+ * For now this just handles single-byte characters.
+ * TODO: this will change.
+ */
+#define PATMATCHRANGE(r, c, ip, mtp)	patmatchrange(r, c, ip, mtp)
+#define PATMATCHINDEX(r, i, cp, mtp)	patmatchindex(r, i, cp, mtp)
+#define CONVCAST(c)	(c)
+
 /* This is a special return value for parse_cmatcher(), *
  * signalling an error. */
 
diff --git a/Src/Zle/complete.c b/Src/Zle/complete.c
index 36c4c8a3d..4b947834b 100644
--- a/Src/Zle/complete.c
+++ b/Src/Zle/complete.c
@@ -122,13 +122,15 @@ freecpattern(Cpattern p)
 
     while (p) {
 	n = p->next;
+	if (p->tp <= CPAT_EQUIV)
+	    free(p->u.str);
 	zfree(p, sizeof(struct cpattern));
 
 	p = n;
     }
 }
 
-/* Copy a completion matcher list. */
+/* Copy a completion matcher list into permanent storage. */
 
 /**/
 mod_export Cmatcher
@@ -157,22 +159,51 @@ cpcmatcher(Cmatcher m)
     return r;
 }
 
+/*
+ * Copy a single entry in a matcher pattern.
+ * If useheap is 1, it comes from the heap.
+ */
+
+/**/
+mod_export Cpattern
+cp_cpattern_element(Cpattern o)
+{
+    Cpattern n = zalloc(sizeof(struct cpattern));
+
+    n->next = NULL;
+
+    n->tp = o->tp;
+    switch (o->tp)
+    {
+    case CPAT_CCLASS:
+    case CPAT_NCLASS:
+    case CPAT_EQUIV:
+	n->u.str = ztrdup(o->u.str);
+	break;
+
+    case CPAT_CHAR:
+	n->u.chr = o->u.chr;
+	break;
+
+    default:
+	/* just to keep compiler quiet */
+	break;
+    }
+
+    return n;
+}
+
 /* Copy a completion matcher pattern. */
 
 /**/
 static Cpattern
 cpcpattern(Cpattern o)
 {
-    Cpattern r = NULL, *p = &r, n;
+    Cpattern r = NULL, *p = &r;
 
     while (o) {
-	*p = n = (Cpattern) zalloc(sizeof(struct cpattern));
-
-	n->next = NULL;
-	memcpy(n->tab, o->tab, 256);
-	n->equiv = o->equiv;
-
-	p = &(n->next);
+	*p = cp_cpattern_element(o);
+	p = &((*p)->next);
 	o = o->next;
     }
     return r;
@@ -331,14 +362,26 @@ parse_cmatcher(char *name, char *s)
     return ret;
 }
 
-/* Parse a pattern for matcher control. */
+/*
+ * Parse a pattern for matcher control. 
+ * name is the name of the builtin from which this is called, for errors.
+ * *sp is the input string and will be updated to the end of the parsed
+ *   pattern.
+ * *lp will be set to the number of characters (possibly multibyte)
+ *   that the pattern will match.  This must be deterministic, given
+ *   the syntax allowed here.
+ * e, if non-zero, is the ASCII end character to match; if zero,
+ *   stop on a blank.
+ * *err is set to 1 to indicate an error, else to 0.
+ */
 
 /**/
 static Cpattern
 parse_pattern(char *name, char **sp, int *lp, char e, int *err)
 {
     Cpattern ret = NULL, r = NULL, n;
-    unsigned char *s = (unsigned char *) *sp;
+    char *s = *sp;
+    int inchar;
     int l = 0;
 
     *err = 0;
@@ -346,25 +389,18 @@ parse_pattern(char *name, char **sp, int *lp, char e, int *err)
     while (*s && (e ? (*s != e) : !inblank(*s))) {
 	n = (Cpattern) hcalloc(sizeof(*n));
 	n->next = NULL;
-	n->equiv = 0;
 
-	if (*s == '[') {
-	    s = parse_class(n, s + 1, ']');
-	    if (!*s) {
-		*err = 1;
-		zwarnnam(name, "unterminated character class");
-		return NULL;
-	    }
-	} else if (*s == '{') {
-	    n->equiv = 1;
-	    s = parse_class(n, s + 1, '}');
+	if (*s == '[' || *s == '{') {
+	    s = parse_class(n, s);
 	    if (!*s) {
 		*err = 1;
 		zwarnnam(name, "unterminated character class");
 		return NULL;
 	    }
+	    s++;
 	} else if (*s == '?') {
-	    memset(n->tab, 1, 256);
+	    n->tp = CPAT_ANY;
+	    s++;
 	} else if (*s == '*' || *s == '(' || *s == ')' || *s == '=') {
 	    *err = 1;
 	    zwarnnam(name, "invalid pattern character `%c'", *s);
@@ -373,8 +409,13 @@ parse_pattern(char *name, char **sp, int *lp, char e, int *err)
 	    if (*s == '\\' && s[1])
 		s++;
 
-	    memset(n->tab, 0, 256);
-	    n->tab[*s] = 1;
+	    if (*s == Meta)
+		inchar = STOUC(*++s) ^ 32;
+	    else
+		inchar = STOUC(*s);
+	    s++;
+	    n->tp = CPAT_CHAR;
+	    n->u.chr = inchar;
 	}
 	if (ret)
 	    r->next = n;
@@ -384,7 +425,6 @@ parse_pattern(char *name, char **sp, int *lp, char e, int *err)
 	r = n;
 
 	l++;
-	s++;
     }
     *sp = (char *) s;
     *lp = l;
@@ -394,28 +434,86 @@ parse_pattern(char *name, char **sp, int *lp, char e, int *err)
 /* Parse a character class for matcher control. */
 
 /**/
-static unsigned char *
-parse_class(Cpattern p, unsigned char *s, unsigned char e)
+static char *
+parse_class(Cpattern p, char *iptr)
 {
-    int n = 0, i = 1, j, eq = (e == '}'), k = 1;
-
-    if (!eq && (*s == '!' || *s == '^') && s[1] != e) { n = 1; s++; }
-
-    memset(p->tab, n, 256);
-
-    n = !n;
-    while (*s && (k || *s != e)) {
-	if (s[1] == '-' && s[2] && s[2] != e) {
-	    /* a run of characters */
-	    for (j = (int) *s; j <= (int) s[2]; j++)
-		p->tab[j] = (eq ? i++ : n);
-
-	    s += 3;
+    int endchar, firsttime = 1;
+    char *optr, *nptr;
+
+    if (*iptr++ == '[') {
+	endchar = ']';
+	/* TODO: surely [^]] is valid? */
+	if ((*iptr == '!' || *iptr == '^') && iptr[1] != ']') {
+	    p->tp = CPAT_NCLASS;
+	    iptr++;
 	} else
-	    p->tab[*s++] = (eq ? i++ : n);
-	k = 0;
+	    p->tp = CPAT_CCLASS;
+    } else {
+	endchar = '}';
+	p->tp = CPAT_EQUIV;
     }
-    return s;
+
+    /* find end of class.  End character can appear literally first. */
+    for (optr = iptr; optr == iptr || *optr != endchar; optr++)
+	if (!*optr)
+	    return optr;
+    /*
+     * We can always fit the parsed class within the same length
+     * because of the tokenization (including a null byte).
+     *
+     * As the input string is metafied, but shouldn't contain shell
+     * tokens, we can just add our own tokens willy nilly.
+     */
+    optr = p->u.str = zalloc((optr-iptr) + 1);
+
+    while (firsttime || *iptr != endchar) {
+	int ch;
+
+	if (*iptr == '[' && iptr[1] == ':' &&
+	    (nptr = strchr((char *)iptr + 2, ':')) && nptr[1] == ']') {
+	    /* Range type */
+	    iptr += 2;
+	    ch = range_type((char *)iptr, nptr-iptr);
+	    iptr = nptr + 2;
+	    if (ch != PP_UNKWN)
+		*optr++ = STOUC(Meta) + ch;
+	} else {
+	    /* characters stay metafied */
+	    char *ptr1 = iptr;
+	    if (*iptr == Meta)
+		iptr++;
+	    iptr++;
+	    if (*iptr == '-' && iptr[1] && iptr[1] != endchar) {
+		/* a run of characters */
+		iptr++;
+		/* range token */
+		*optr++ = Meta + PP_RANGE;
+
+		/* start of range character */
+		if (*ptr1 == Meta) {
+		    *optr++ = Meta;
+		    *optr++ = ptr1[1] ^ 32;
+		} else
+		    *optr++ = *ptr1;
+
+		if (*iptr == Meta) {
+		    *optr++ = *iptr++;
+		    *optr++ = *iptr++;
+		} else
+		    *optr++ = *iptr++;
+	    } else {
+		if (*ptr1 == Meta) {
+		    *optr++ = Meta;
+		    *optr++ = ptr1[1] ^ 32;
+		} else
+		    *optr++ = *ptr1;
+	    }
+	}
+	firsttime = 0;
+    }
+
+    *optr = '\0';
+    return iptr;
 }
 
 /**/
diff --git a/Src/Zle/compmatch.c b/Src/Zle/compmatch.c
index bf9d2cb0d..e59443c01 100644
--- a/Src/Zle/compmatch.c
+++ b/Src/Zle/compmatch.c
@@ -30,37 +30,68 @@
 #include "complete.mdh"
 #include "compmatch.pro"
 
-/* This compares two cpattern lists and returns non-zero if they are
- * equal. */
+/*
+ * This compares two cpattern lists and returns non-zero if they are
+ * equal (N.B. opposite sense to usual *cmp()).
+ *
+ * The old version of this didn't worry about whether the lists
+ * were the same length.  This one does.  It's hard to see how
+ * that can be wrong even if it's unnecessary.
+ */
 
 /**/
 static int
-cmp_cpatterns(Cpattern a, Cpattern b)
+cpatterns_same(Cpattern a, Cpattern b)
 {
     while (a) {
-	if (a->equiv != b->equiv || memcmp(a->tab, b->tab, 256))
+	if (!b)
+	    return 0;
+	if (a->tp != b->tp)
 	    return 0;
+	switch (a->tp) {
+	case CPAT_CCLASS:
+	case CPAT_NCLASS:
+	case CPAT_EQUIV:
+	    /*
+	     * Patterns can actually match the same even if
+	     * the range strings don't compare differently, but
+	     * I don't think we need to handle that subtlety.
+	     */
+	    if (strcmp(a->u.str, b->u.str) != 0)
+		return 0;
+	    break;
+
+	case CPAT_CHAR:
+	    if (a->u.chr != b->u.chr)
+		return 0;
+	    break;
+
+	default:
+	    /* here to silence compiler */
+	    break;
+	}
+
 	a = a->next;
 	b = b->next;
     }
-    return 1;
+    return !b;
 }
 
 /* This compares two cmatchers and returns non-zero if they are equal. */
 
 /**/
 static int
-cmp_cmatchers(Cmatcher a, Cmatcher b)
+cmatchers_same(Cmatcher a, Cmatcher b)
 {
     return (a == b ||
 	    (a->flags == b->flags &&
 	     a->llen == b->llen && a->wlen == b->wlen &&
-	     (!a->llen || cmp_cpatterns(a->line, b->line)) &&
-	     (a->wlen <= 0 || cmp_cpatterns(a->word, b->word)) &&
+	     (!a->llen || cpatterns_same(a->line, b->line)) &&
+	     (a->wlen <= 0 || cpatterns_same(a->word, b->word)) &&
 	     (!(a->flags & (CMF_LEFT | CMF_RIGHT)) ||
 	      (a->lalen == b->lalen && a->ralen == b->ralen &&
-	       (!a->lalen || cmp_cpatterns(a->left, b->left)) &&
-	       (!a->ralen || cmp_cpatterns(a->right, b->right))))));
+	       (!a->lalen || cpatterns_same(a->left, b->left)) &&
+	       (!a->ralen || cpatterns_same(a->right, b->right))))));
 }
 
 /* Add the given matchers to the bmatcher list. */
@@ -97,7 +128,7 @@ update_bmatchers(void)
 	t = 0;
 	for (ms = mstack; ms && !t; ms = ms->next)
 	    for (mp = ms->matcher; mp && !t; mp = mp->next)
-		t = cmp_cmatchers(mp, p->matcher);
+		t = cmatchers_same(mp, p->matcher);
 
 	p = p->next;
 	if (!t) {
@@ -449,7 +480,7 @@ add_match_sub(Cmatcher m, char *l, int ll, char *w, int wl)
     }
 }
 
-/* This tests if the string from the line l matches the word w. In bp
+/* This tests if the string from the line l matches the word w. In *bpp
  * the offset for the brace is returned, in rwlp the length of the
  * matched prefix or suffix, not including the stuff before or after
  * the last anchor is given. When sfx is non-zero matching is done from
@@ -1113,55 +1144,330 @@ comp_match(char *pfx, char *sfx, char *w, Patprog cp, Cline *clp, int qu,
     return r;
 }
 
-/* Check if the given pattern matches the given string.             *
+
+/*
+ * Guts of a single pattern for pattern_match().
+ * Return non-zero if match successful.
+ * If the class was an equivalence, return 1 + the index into
+ * the equivalence class (see pattern.c for how this is calculated).
+ */
+
+/**/
+mod_export int
+pattern_match1(Cpattern p, int c, int *mtp)
+{
+    /* TODO: should become convchar_t */
+    int ind;
+
+    *mtp = 0;
+    switch (p->tp) {
+    case CPAT_CCLASS:
+	return PATMATCHRANGE(p->u.str, CONVCAST(c), NULL, NULL);
+
+    case CPAT_NCLASS:
+	return !PATMATCHRANGE(p->u.str, CONVCAST(c), NULL, NULL);
+
+    case CPAT_EQUIV:
+	if (PATMATCHRANGE(p->u.str, CONVCAST(c), &ind, mtp))
+	    return ind + 1;
+	else
+	    return 0;
+
+    case CPAT_ANY:
+	return 1;
+
+    case CPAT_CHAR:
+	return (p->u.chr == c);
+
+    default:
+	DPUTS(1, "bad matcher pattern type");
+	return 0;
+    }
+}
+
+
+/*
+ * Use an equivalence to deduce the line character from the word, or
+ * vice versa.  (If vice versa, then "line" and "word" are reversed
+ * in what follows.  The logic is symmetric.)
+ * lp is the line pattern.
+ * wind is the index returned by a pattern match on the word pattern,
+ * with type wmtp.
+ * wchr is the word character.
+ * Return -1 if no matching character, else the character.
+ *
+ * Only makes sense if lp->tp == CPAT_EQUIV and the (unseen) word
+ * pattern also has that type.
+ */
+static int
+pattern_match_equivalence(Cpattern lp, int wind, int wmtp, int wchr)
+{
+    int lchr, lmtp;
+
+    if (!PATMATCHINDEX(lp->u.str, wind-1, &lchr, &lmtp)) {
+	/*
+	 * No equivalent.  No possible match; give up.
+	 */
+	return -1;
+    }
+    /*
+     * If we matched an exact character rather than a range
+     * type, return it.
+     */
+    if (lchr != -1)
+	return lchr;
+
+    /*
+     * Check the match types.  We may want a case-changed
+     * version of the word character.
+     */
+    if (wmtp == PP_UPPER && lmtp == PP_LOWER)
+	return tulower(wchr);
+    else if (wmtp == PP_LOWER && lmtp == PP_UPPER)
+	return tuupper(wchr);
+    else if (wmtp == lmtp) {
+	/*
+	 * Be lenient and allow identical replacements
+	 * for character classes, although in fact this
+	 * doesn't give special functionality for equivalence
+	 * classes.
+	 */
+	return wchr;
+    } else {
+	/*
+	 * Non-matching generic types; this can't work.
+	 */
+	return -1;
+    }
+}
+
+/*
+ * Check if the given pattern matches the given string.
  *  p and  s are either anchor or line pattern and string;
  * wp and ws are word (candidate) pattern and string
  *
- * If only one pattern is given, we just check if characters match
+ * If only one pattern is given, we just check if characters match.
  * If both line and word are given, we check that characters match
- * for {...} classes by comparing relative numbers in sequence.
+ * for {...} classes by comparing positions in the strings.
  *
  * Patterns and strings are always passed in pairs, so it is enough
  * to check for non-NULL wp. p should always be present.
+ *
+ * If prestrict is not NULL, it is a chain of patterns at least as long
+ * as the line string.  In this case we are still assembling the line at
+ * s (which has been allocated but doesn't yet contain anything useful)
+ * and must continue to do so as we go along; prestrict gives
+ * restrictions on the line character to be applied along side the other
+ * patterns.  In the simple case a restriction is a character to be put
+ * in place; otherwise it is a set of possible characters and we have to
+ * deduce an actual matching character.  Note prestrict is never an
+ * equivalence class.  In extreme cases we can't deduce a unique
+ * character; then the match fails.
  */
 
 /**/
 mod_export int
-pattern_match(Cpattern p, char *s, Cpattern wp, char *ws)
+pattern_match_restrict(Cpattern p, char *s, Cpattern wp, char *ws,
+		       Cpattern prestrict)
 {
-    unsigned char c;
-    unsigned char wc;
+    int c, ind;
+    int wc, wind;
+    int len, wlen, mt, wmt;
 
     while (p && wp && *s && *ws) {
-	c = p->tab[*((unsigned char *) s)];
-	wc = wp->tab[*((unsigned char *) ws)];
-
-	if (!c || !wc || c != wc)
+	/* First test the word character */
+	if (*ws == Meta) {
+	    wc = STOUC(ws[1]) ^ 32;
+	    wlen = 2;
+	} else {
+	    wc = STOUC(*ws);
+	    wlen = 1;
+	}
+	wind = pattern_match1(wp, wc, &wmt);
+	if (!wind)
 	    return 0;
 
-	s++;
-	ws++;
+	/*
+	 * Now the line character; deal with the case where
+	 * we don't yet have it, only a restriction on it.
+	 */
+	if (prestrict) {
+	    if (prestrict->tp == CPAT_CHAR) {
+		/*
+		 * Easy case: restricted to an exact character on
+		 * the line.  Procede as normal.
+		 */
+		c = prestrict->u.chr;
+	    } else {
+		if (p->tp == CPAT_CHAR) {
+		    /*
+		     * Normal line pattern is an exact character:  as
+		     * long as this matches prestrict, we can proceed
+		     * as usual.
+		     */
+		    c = p->u.chr;
+		} else if (p->tp == CPAT_EQUIV) {
+		    /*
+		     * An equivalence, so we can deduce the character
+		     * backwards from the word pattern and see if it
+		     * matches prestrict.
+		     */
+		    if ((c = pattern_match_equivalence(p, wind, wmt, wc)) == -1)
+			return 0;
+		} else {
+		    /*
+		     * Not an equivalence, so that means we must match
+		     * the word (not just the word pattern), so grab it
+		     * and make sure it fulfills our needs.  I think.
+		     * Not 100% sure about that, but what else can
+		     * we do?  We haven't actually been passed a string
+		     * from the command line.
+		     */
+		    c = wc;
+		}
+		/* Character so deduced must match the restriction. */
+		if (!pattern_match1(prestrict, c, &mt))
+		    return 0;
+	    }
+	    len = imeta(c) ? 2 : 1;
+	} else {
+	    /* We have the character itself. */
+	    if (*s == Meta) {
+		c = STOUC(s[1]) ^ 32;
+		len = 2;
+	    } else {
+		c = STOUC(*s);
+		len = 1;
+	    }
+	}
+	/*
+	 * If either is "?", they match each other; no further tests.
+	 * Apply this even if the character wasn't convertable;
+	 * there's no point trying to be clever in that case.
+	 */
+	if (p->tp != CPAT_ANY || wp->tp != CPAT_ANY)
+	{
+	    ind = pattern_match1(p, c, &mt);
+	    if (!ind)
+		return 0;
+	    if (ind != wind)
+		return 0;
+	    if (mt != wmt) {
+		/*
+		 * Special case if matching lower vs. upper or
+		 * vice versa.  The transformed characters must match.
+		 * We don't need to check the transformation is
+		 * the appropriate one for each character separately,
+		 * since that was done in pattern_match1(), so just
+		 * compare lower-cased versions of both.
+		 */
+		if ((mt == PP_LOWER || mt == PP_UPPER) &&
+		    (wmt == PP_LOWER || wmt == PP_UPPER)) {
+		    if (tulower(c) != tulower(wc))
+			return 0;
+		} else {
+		    /* Other different classes can't match. */
+		    return 0;
+		}
+	    }
+	}
+
+	if (prestrict) {
+	    /* We need to assemble the line */
+	    if (imeta(c)) {
+		*s++ = Meta;
+		*s++ = c ^ 32;
+	    } else {
+		*s++ = c;
+	    }
+	    prestrict = prestrict->next;
+	} else
+	    s += len;
+	ws += wlen;
 	p = p->next;
 	wp = wp->next;
     }
 
     while (p && *s) {
-	if (!p->tab[*((unsigned char *) s)])
+	if (prestrict) {
+	    /*
+	     * As above, but with even less info to go on.
+	     * (Can this happen?)  At least handle the cases where
+	     * one of our patterns has given us a specific character.
+	     */
+	    if (prestrict->tp == CPAT_CHAR) {
+		c = prestrict->u.chr;
+	    } else {
+		if (p->tp == CPAT_CHAR) {
+		    c = p->u.chr;
+		} else {
+		    /*
+		     * OK.  Here we are in a function with just a line
+		     * pattern and another pattern to restrict the
+		     * characters that can go on the line, and no actual
+		     * characters.  We're matching two patterns against
+		     * one another to generate a character to insert.
+		     * This is a bit too psychedelic, so I'm going to
+		     * bale out now.  See you on the ground.
+		     */
+		    return 0;
+		}
+		if (!pattern_match1(prestrict, c, &mt))
+		    return 0;
+	    }
+	} else {
+	    if (*s == Meta) {
+		c = STOUC(s[1]) ^ 32;
+		len = 2;
+	    } else {
+		c = STOUC(*s);
+		len = 1;
+	    }
+	}
+	if (!pattern_match1(p, c, &mt))
 	    return 0;
 	p = p->next;
-	s++;
+	if (prestrict) {
+	    if (imeta(c)) {
+		*s++ = Meta;
+		*s++ = c ^ 32;
+	    } else {
+		*s++ = c;
+	    }
+	    prestrict = prestrict->next;
+	} else
+	    s += len;
     }
 
     while (wp && *ws) {
-	if (!wp->tab[*((unsigned char *) ws)])
+	/* No funny business when we only have the word pattern. */
+	if (*ws == Meta) {
+	    wc = STOUC(ws[1]) ^ 32;
+	    wlen = 2;
+	} else {
+	    wc = STOUC(*ws);
+	    wlen = 1;
+	}
+	if (!pattern_match1(wp, wc, &wmt))
 	    return 0;
 	wp = wp->next;
-	ws++;
+	ws += wlen;
     }
 
     return 1;
 }
 
+/*
+ * The usual version of pattern matching, without the line string
+ * being handled by restriction.
+ */
+/**/
+mod_export int
+pattern_match(Cpattern p, char *s, Cpattern wp, char *ws)
+{
+    return pattern_match_restrict(p, s, wp, ws, NULL);
+}
+
 /* This splits the given string into a list of cline structs, separated
  * at those places where one of the anchors of an `*' pattern was found.
  * plen gives the number of characters on the line that matched this
@@ -1256,11 +1562,11 @@ bld_parts(char *str, int len, int plen, Cline *lp, Cline *lprem)
     return ret;
 }
 
-/* This builds all the possible line patterns for the pattern pat in the
- * buffer line. Initially line is the same as lp, but during recursive
- * calls lp is incremented for storing successive characters. Whenever
- * a full possible string is build, we test if this line matches the
- * string given by wlen and word.
+
+/*
+ * This builds all the possible line patterns for the pattern pat in the
+ * buffer line.  Then we test if this line matches the string given by
+ * wlen and word.
  *
  * wpat contains pattern that matched previously
  * lpat contains the pattern for line we build
@@ -1269,91 +1575,297 @@ bld_parts(char *str, int len, int plen, Cline *lp, Cline *lprem)
  *
  * The return value is the length of the string matched in the word, it
  * is zero if we couldn't build a line that matches the word.
+ *
+ * TODO: a lot of the nastiness associated with variable string
+ * lengths can go when we switch to wide characters.  (Why didn't
+ * I just keep line unmetafied and metafy into place at the end?  Er...)
  */
 
-
 /**/
 static int
-bld_line(Cpattern wpat, Cpattern lpat, char *line, char *lp,
-	 char *mword, char *word, int wlen, int sfx)
+bld_line(Cmatcher mp, char **linep, char *mword, char *word, int wlen, int sfx)
 {
-    if (lpat) {
-	/* Still working on the pattern. */
-
-	int i, l;
-	unsigned char c = 0;
-
-	/* Get the number of the character for a correspondence class
-	 * if it has a corresponding class. */
-	if (lpat->equiv)
-	    if (wpat && *mword) {
-		c = wpat->tab[STOUC(*mword)];
-		wpat = wpat->next;
-		mword++;
+    Cpattern lpat = mp->line;
+    Cpattern wpat = mp->word;
+    Cpattern curgenpat;
+    VARARR(struct cpattern, genpatarr, mp->llen);
+    Cmlist ms;
+    int llen, rl;
+    char *oword = word, *line = *linep;
+
+    /*
+     * Loop over all characters.  At this stage, line is an empty
+     * space of length llen (not counting the null byte) which we assemble as
+     * we go along.
+     *
+     * However, first we need to know what characters can appear at each
+     * point in the line.  For this we assemble an list genpatarr of the
+     * same length as the line.  (It's convenient to store this as an
+     * array but it's linked as a list, too.)  If there are equivalences
+     * we use mword to derive the equivalent character; when we've
+     * reached the end of mword, equivalences are treated just like
+     * ordinary character classes.  For character classes we just attach
+     * the class to the genpatarr list and apply it as a restriction
+     * when we finally match the line against the set of matchers.
+     */
+    curgenpat = genpatarr;
+    while (lpat) {
+	int wchr = (*mword == Meta) ? STOUC(mword[1]) ^ 32 : STOUC(*mword);
+	int wmtp, wind;
+	/*
+	 * If the line pattern is an equivalence, query wpat to find the
+	 * word part of the equivalence.  If we don't find one we don't try
+	 * equivalencing but use lpat as an ordinary match.  (It's not
+	 * entirely clear to me this is the correct behaviour on a
+	 * failed character match within the equivalence, but that was
+	 * the behaviour of the old logic that this replaces.)
+	 */
+	if (lpat->tp == CPAT_EQUIV && wpat && *mword) {
+	    wind = pattern_match1(wpat, wchr, &wmtp);
+	    wpat = wpat->next;
+	    mword += (*mword == Meta) ? 2 : 1;
+	} else
+	    wind = 0;
+	if (wind) {
+	    /*
+	     * Successful match for word side of equivalence.
+	     * Find the line equivalent.
+	     */
+	    int lchr;
+	    if ((lchr = pattern_match_equivalence(lpat, wind, wmtp, wchr))
+		== -1) {
+		/*
+		 * No equivalent.  No possible match; give up.
+		 */
+		return 0;
+	    }
+	    /*
+	     * We now have an exact character to match,
+	     * so make up a pattern element for it.
+	     */
+	    curgenpat->tp = CPAT_CHAR;
+	    curgenpat->u.chr = lchr;
+	} else {
+	    /*
+	     * Not an equivalence class, so we just keep the
+	     * test in the lpat as it is.
+	     */
+	    curgenpat->tp = lpat->tp;
+	    if (lpat->tp == CPAT_CHAR)
+		curgenpat->u.chr = lpat->u.chr;
+	    else if (lpat->tp != CPAT_ANY) {
+		/*
+		 * The string isn't modified and is only needed in calls from
+		 * this function, so we don't even need to copy it.
+		 */
+		curgenpat->u.str = lpat->u.str;
 	    }
+	}
+	lpat = lpat->next;
+	/*
+	 * This linked list is defined above as an array.
+	 * We could get away with just keeping it as an array
+	 * and passing it down as such, but that's a bit icky
+	 * since the generic linkage of Cpatterns is as a linked
+	 * list and we should keep our local memory management
+	 * problems to ourselvess.
+	 */
+	if (lpat)
+	    curgenpat->next = curgenpat+1;
+	else
+	    curgenpat->next = NULL;
+	curgenpat++;
+    }
 
+    /*
+     * We now know how to match the word with the line patterns; let's
+     * see if it does.  We will use the information in curgenpat if we
+     * are successful to work out what character goes on the line.  This
+     * is a bit hairy, as in "the Yeti is a creature that is a bit
+     * hairy".
+     */
+    llen = mp->llen;
+    rl = 0;
 
-	/* Walk through the table in the pattern and try the characters
-	 * that may appear in the current position. */
-	for (i = 0; i < 256; i++)
-	    if ((lpat->equiv && c) ? (c == lpat->tab[i]) : lpat->tab[i]) {
-		*lp = i;
-		/* We stored the character, now call ourselves to build
-		 * the rest. */
-		if ((l = bld_line(wpat, lpat->next, line, lp + 1,
-				  mword, word, wlen, sfx)))
-		    return l;
-	    }
-    } else {
-	/* We reached the end, i.e. the line string is fully build, now
-	 * see if it matches the given word. */
+    if (sfx)
+    {
+	/*
+	 * We need to work backwards from the end of both the
+	 * word and the line strings.
+	 *
+	 * Position at the end of the word by counting characters.
+	 */
+	int l = wlen;
+	while (l--)
+	    word += (*word == Meta) ? 2 : 1;
 
-	Cmlist ms;
-	Cmatcher mp;
-	int l = lp - line, t, rl = 0, ind, add;
+	/*
+	 * We construct the line from the end.  We've left
+	 * enough space for possible Meta's.
+	 */
+	line += 2 * llen;
+	*line = '\0';
+	curgenpat = genpatarr + llen;
+    } else
+	curgenpat = genpatarr;
 
-	/* Quick test if the strings are exactly the same. */
-	if (l == wlen && !strncmp(line, word, l))
-	    return l;
+    /* we now reuse mp, lpat, wpat for the global matchers */
+    while (llen && wlen) {
+	int wchr, wmtp;
+	char *wp;
+	Cpattern tmpgenpat;
 
 	if (sfx) {
-	    line = lp; word += wlen;
-	    ind = -1; add = -1;
-	} else {
-	    ind = 0; add = 1;
-	}
-	/* We loop through the whole line string built. */
-	while (l && wlen) {
-	    if (word[ind] == line[ind]) {
-		/* The same character in both strings, skip over. */
-		line += add; word += add;
-		l--; wlen--; rl++;
+	    if (word > oword + 1 && word[-2] == Meta)
+		wp = word - 2;
+	    else
+		wp = word - 1;
+	    curgenpat--;
+	} else
+	    wp = word;
+	if (*wp == Meta)
+	    wchr = STOUC(wp[1]) ^ 32;
+	else
+	    wchr = STOUC(*wp);
+	if (pattern_match1(curgenpat, wchr, &wmtp))
+	{
+	    int lchr;
+	    /*
+	     * We can match the line character directly with the word
+	     * character.  If the line character is a fixed one,
+	     * keep it, since we went to all that trouble above,
+	     * else if it's generic, keep the word character,
+	     * since we have no choice.
+	     */
+	    if (curgenpat->tp == CPAT_CHAR)
+		lchr = curgenpat->u.chr;
+	    else
+		lchr = wchr;
+	    if (imeta(lchr)) {
+		if (sfx)
+		    line -= 2;
+		line[0] = Meta;
+		line[1] = lchr ^ 32;
+		if (!sfx)
+		    line += 2;
 	    } else {
-		t = 0;
-		for (ms = bmatchers; ms && !t; ms = ms->next) {
-		    mp = ms->matcher;
-		    if (mp && !mp->flags && mp->wlen <= wlen && mp->llen <= l &&
-			pattern_match(mp->line, (sfx ? line - mp->llen : line),
-				      mp->word, (sfx ? word - mp->wlen : word))) {
-			/* Both the line and the word pattern matched,
-			 * now skip over the matched portions. */
+		if (sfx)
+		    line--;
+		line[0] = lchr;
+		if (!sfx)
+		    line++;
+	    }
+
+	    llen--;
+	    wlen--;
+	    rl++;
+
+	    if (sfx)
+		word = wp;
+	    else {
+		if (llen)
+		    curgenpat++;
+		word += (*word == Meta) ? 2 : 1;
+	    }
+	}
+	else
+	{
+	    char *lp;
+	    /*
+	     * Need to loop over pattern matchers.
+	     */
+	    for (ms = bmatchers; ms; ms = ms->next) {
+		mp = ms->matcher;
+		/*
+		 * This is the nightmare case: we have line and
+		 * and word matchers and some pattern which restricts
+		 * the value on the line without us knowing exactly
+		 * what it is.  Despatch to the special function
+		 * for that.
+		 */
+		if (mp && !mp->flags && mp->wlen <= wlen &&
+		    mp->llen <= llen)
+		{
+		    if (sfx) {
+			/*
+			 * We haven't assembled the line yet, and with
+			 * Meta characters we don't yet know the length.
+			 * We'll fix this up later.
+			 */
+			lp = line - 2 * mp->llen;
+		    } else
+			lp = line;
+		    wp = word;
+		    if (sfx) {
+			int l = mp->wlen;
+			while (l--) {
+			    if (wp > oword + 1 && wp[-2] == Meta)
+				wp -= 2;
+			    else
+				wp--;
+			}
+
+			tmpgenpat = curgenpat - mp->llen;
+		    } else
+			tmpgenpat = curgenpat;
+		    if (pattern_match_restrict(mp->line, lp,
+					       mp->word, wp, tmpgenpat)) {
+			/*
+			 * Matched: advance over as many characters
+			 * of the patterns and strings as
+			 * we've done matches.
+			 */
 			if (sfx) {
-			    line -= mp->llen; word -= mp->wlen;
+			    int imove = mp->llen, nchar;
+			    char *pmove = lp;
+			    word = wp;
+			    
+			    /* Close the gap we left in the line string */
+			    while (imove--)
+				pmove += (*pmove == Meta) ? 2 : 1;
+			    /* Number of bytes to move */
+			    nchar = (int)(pmove - lp);
+			    /* The size of the gap */
+			    imove = 2 * mp->llen - nchar;
+			    if (imove) {
+				lp = line - imove;
+				/* Moving up, so start at the top */
+				while (nchar--)
+				    *--line = *--lp;
+				/* line is at the start of the moved text */
+			    }
+
+			    curgenpat = tmpgenpat;
 			} else {
-			    line += mp->llen; word += mp->wlen;
+			    int cnt = mp->llen;
+			    while (cnt--) {
+				line += (*line == Meta) ? 2 : 1;
+			    }
+
+			    cnt = mp->wlen;
+			    while (cnt--)
+				word += (*word == Meta) ? 2 : 1;
+
+			    curgenpat += mp->llen;
 			}
-			l -= mp->llen; wlen -= mp->wlen; rl += mp->wlen;
-			t = 1;
+			llen -= mp->llen;
+			wlen -= mp->wlen;
+			rl += mp->wlen;
+			break;
 		    }
 		}
-		if (!t)
-		    /* Didn't match, give up. */
-		    return 0;
 	    }
+	    if (!ms)
+		return 0;	/* Didn't match, give up */
 	}
-	if (!l)
-	    /* Unmatched portion in the line built, return matched length. */
-	    return rl;
+    }
+    if (!llen) {
+	/* Unmatched portion in the line built, return matched length. */
+	if (sfx)
+	    *linep = line;
+	else
+	    *line = '\0';
+	return rl;
     }
     return 0;
 }
@@ -1386,8 +1898,10 @@ join_strs(int la, char *sa, int lb, char *sb)
 		    if ((t = pattern_match(mp->word, sa, NULL, NULL)) ||
 			pattern_match(mp->word, sb, NULL, NULL)) {
 			/* It matched one of the strings, t says which one. */
-			VARARR(char, line, mp->llen + 1);
-			char **ap, **bp;
+			/* TODO: double to allow Meta, not necessary
+			   when properly unmetafied */
+			VARARR(char, linearr, 2*mp->llen + 1);
+			char **ap, **bp, *line = linearr;
 			int *alp, *blp;
 
 			if (t) {
@@ -1399,10 +1913,8 @@ join_strs(int la, char *sa, int lb, char *sb)
 			}
 			/* Now try to build a string that matches the other
 			 * string. */
-			if ((bl = bld_line(mp->word, mp->line, line, line,
-					   *ap, *bp, *blp, 0))) {
+			if ((bl = bld_line(mp, &line, *ap, *bp, *blp, 0))) {
 			    /* Found one, put it into the return string. */
-			    line[mp->llen] = '\0';
 			    if (rr <= mp->llen) {
 				char *or = rs;
 
@@ -1444,7 +1956,11 @@ join_strs(int la, char *sa, int lb, char *sb)
     return rs;
 }
 
-/* This compares the anchors stored in two top-level clines. */
+/*
+ * This compares the anchors stored in two top-level clines.
+ * It returns 1 if the anchors are the same, 2 if they are
+ * compatible (and have been combined in "o"), 0 otherwise.
+ */
 
 /**/
 static int
@@ -1591,9 +2107,11 @@ join_sub(Cmdata md, char *str, int len, int *mlen, int sfx, int join)
 				       NULL, NULL)) ||
 		     pattern_match(mp->word, nw - (sfx ? mp->wlen : 0),
 				   NULL, NULL))) {
-		    VARARR(char, line, mp->llen + 1);
+		    /* TODO: doubled to allow Meta, not necessary
+		     * when properly unmetafied */
+		    VARARR(char, linearr, 2*mp->llen + 1);
 		    int bl;
-		    char *mw;
+		    char *mw, *line = linearr;
 
 		    /* Then build all the possible lines and see
 		     * if one of them matches the other string. */
@@ -1602,11 +2120,10 @@ join_sub(Cmdata md, char *str, int len, int *mlen, int sfx, int join)
 		    else
 			mw = nw - (sfx ? mp->wlen : 0);
 
-		    if ((bl = bld_line(mp->word, mp->line, line, line,
-				       mw, (t ? nw : ow), (t ? nl : ol), sfx)))  {
+		    if ((bl = bld_line(mp, &line, mw, (t ? nw : ow),
+				       (t ? nl : ol), sfx)))  {
 			/* Yep, one of the lines matched the other
 			 * string. */
-			line[mp->llen] = '\0';
 
 			if (t) {
 			    ol = mp->wlen; nl = bl;
diff --git a/Src/Zle/computil.c b/Src/Zle/computil.c
index 9d116b93a..2b3efa776 100644
--- a/Src/Zle/computil.c
+++ b/Src/Zle/computil.c
@@ -3997,6 +3997,239 @@ cfp_test_exact(LinkList names, char **accept, char *skipped)
     return (found ? ret : NULL);
 }
 
+
+/*
+ * This code constructs (from heap) and returns a string that
+ * corresponds to a series of matches; when compiled as a pattern, at
+ * each position it matches either the character from the string "add"
+ * or the corresponding single-character match from the set of matchers.
+ * To take a simple case, if add is "a" and the single matcher for the
+ * character position matches "[0-9]", the pattern returned is "[0-9a]".
+ * We take account of equivalences between the word and line, too.
+ *
+ * As there are virtually no comments in this file, I don't really
+ * know why we're doing this, but it's to do with a matcher which
+ * is passed as an argument to the utility compfiles -p/-P.
+ */
+static char *
+cfp_matcher_range(Cmatcher *ms, char *add)
+{
+    Cmatcher *mp, m;
+    int len = 0, mt;
+    char *ret = NULL, *p = NULL, *adds = add;
+
+    /*
+     * Do this twice:  once to work out the length of the
+     * string in len, the second time to build it in ret.
+     * This is probably worthwhile because otherwise memory
+     * management is difficult.
+     */
+    for (;;) {
+	for (mp = ms; *add; add++, mp++) {
+	    if (!(m = *mp)) {
+		/*
+		 * No matcher, so just match the character
+		 * itself.
+		 *
+		 * TODO: surely this needs quoting if it's a
+		 * metacharacter?
+		 */
+		if (ret) {
+		    if (imeta(*add)) {
+			*p++ = Meta;
+			*p++ = *add ^ 32;
+		    } else
+			*p++ = *add;
+		} else
+		    len += imeta(*add) ? 2 : 1;
+	    } else if (m->flags & CMF_RIGHT) {
+		/*
+		 * Right-anchored:  match anything followed
+		 * by the character itself.
+		 */
+		if (ret) {
+		    *p++ = '*';
+		    /* TODO: quote again? */
+		    if (imeta(*add)) {
+			*p++ = Meta;
+			*p++ = *add ^ 32;
+		    } else
+			*p++ = *add;
+		} else
+		    len += imeta(*add) ? 3 : 2;
+	    } else {
+		/* The usual set of matcher possibilities. */
+		int ind;
+		if (m->line->tp == CPAT_EQUIV &&
+		    m->word->tp == CPAT_EQUIV) {
+		    /*
+		     * Genuine equivalence.  Add the character to match
+		     * and the equivalent character from the word
+		     * pattern.
+		     *
+		     * TODO: we could be more careful here with special
+		     * cases as we are in the basic character class
+		     * code below.
+		     */
+		    if (ret) {
+			*p++ = '[';
+			if (imeta(*add)) {
+			    *p++ = Meta;
+			    *p++ = *add ^ 32;
+			} else
+			    *p++ = *add;
+		    } else
+			len += imeta(*add) ? 3 : 2;
+		    if (PATMATCHRANGE(m->line->u.str, CONVCAST(*add),
+				      &ind, &mt)) {
+			/*
+			 * Find the equivalent match for ind in the
+			 * word pattern.
+			 */
+			if ((ind = pattern_match_equivalence
+			     (m->word, ind, mt, CONVCAST(*add))) != -1) {
+			    if (ret) {
+				if (imeta(ind)) {
+				    *p++ = Meta;
+				    *p++ = ind ^ 32;
+				} else
+				    *p++ = ind;
+			    } else
+				len += imeta(ind) ? 2 : 1;
+			}
+		    }
+		    if (ret)
+			*p++ = ']';
+		    else
+			len++;
+		} else {
+		    int newlen, addadd;
+
+		    switch (m->word->tp) {
+		    case CPAT_NCLASS:
+			/*
+			 * TODO: the old logic implies that we need to
+			 * match *add, i.e. it should be deleted from
+			 * the set of character's we're not allowed to
+			 * match.  That's too much like hard work for
+			 * now.  Indeed, in general it's impossible
+			 * without trickery.  Consider *add == 'A',
+			 * range == "[^[:upper:]]": we would have to
+			 * resort to something like "(A|[^[:upper:]])";
+			 * and in an expression like that *add may or
+			 * may not need backslashing.  So we're deep
+			 * into see-if-we-can-get-away-without
+			 * territory.
+			 */
+			if (ret) {
+			    *p++ = '[';
+			    *p++ = '^';
+			} else
+			    len += 2;
+			/*
+			 * Convert the compiled range string back
+			 * to an ordinary string.
+			 */
+			newlen =
+			    pattern_range_to_string(m->word->u.str, p);
+			DPUTS(!newlen, "empty character range");
+			if (ret) {
+			    p += newlen;
+			    *p++ = ']';
+			} else
+			    len += newlen + 1;
+			break;
+			    
+		    case CPAT_CCLASS:
+			/*
+			 * If there is an equivalence only on one
+			 * side it's not equivalent to anything.
+			 * Treat it as an ordinary character class.
+			 */ 
+		    case CPAT_EQUIV:
+		    case CPAT_CHAR:
+			if (ret)
+			    *p++ = '[';
+			else
+			    len++;
+			/*
+			 * We needed to add *add specially only if
+			 * it is not covered by the range.  This
+			 * is necessary for correct syntax---consider
+			 * if *add is ] and ] is also the first
+			 * character in the range.
+			 */
+			addadd = !pattern_match1(m->word, CONVCAST(*add), &mt);
+			if (addadd && *add == ']') {
+			    if (ret)
+				*p++ = *add;
+			    else
+				len++;
+			}
+			if (m->word->tp == CPAT_CHAR) {
+			    /*
+			     * The matcher just matches a single
+			     * character, but we need to be able
+			     * to match *add, too, hence we do
+			     * this as a [...].
+			     */
+			    if (ret) {
+				if (imeta(m->word->u.chr)) {
+				    *p++ = Meta;
+				    *p++ = m->word->u.chr ^ 32;
+				} else
+				    *p++ = m->word->u.chr;
+			    } else
+				len += imeta(m->word->u.chr) ? 2 : 1;
+			} else {
+			    /*
+			     * Convert the compiled range string back
+			     * to an ordinary string.
+			     */
+			    newlen =
+				pattern_range_to_string(m->word->u.str, p);
+			    DPUTS(!newlen, "empty character range");
+			    if (ret)
+				p += newlen;
+			    else
+				len += newlen;
+			}
+			if (addadd && *add != ']') {
+			    if (ret) {
+				if (imeta(*add)) {
+				    *p++ = Meta;
+				    *p++ = *add ^ 32;
+				} else
+				    *p++ = *add;
+			    } else
+				len += imeta(*add) ? 2 : 1;
+			}
+			if (ret)
+			    *p++ = ']';
+			else
+			    len++;
+			break;
+
+		    case CPAT_ANY:
+			if (ret)
+			    *p++ = '?';
+			else
+			    len++;
+			break;
+		    }
+		}
+	    }
+	}
+	if (ret) {
+	    *p = '\0';
+	    return ret;
+	}
+	p = ret = zhalloc(len + 1);
+	add = adds;
+    }
+}
+
+
 static char *
 cfp_matcher_pats(char *matcher, char *add)
 {
@@ -4064,64 +4297,8 @@ cfp_matcher_pats(char *matcher, char *add)
 			break;
 		    }
 	}
-	if (*add) {
-	    char *ret = "", buf[259];
-
-	    for (mp = ms; *add; add++, mp++) {
-		if (!(m = *mp)) {
-		    buf[0] = *add;
-		    buf[1] = '\0';
-		} else if (m->flags & CMF_RIGHT) {
-		    buf[0] = '*';
-		    buf[1] = *add;
-		    buf[2] = '\0';
-		} else {
-		    unsigned char *t, c;
-		    char *p = buf;
-		    int i;
-
-		    for (i = 256, t = m->word->tab; i--; t++)
-			if (*t)
-			    break;
-		    if (i) {
-			t = m->word->tab;
-			*p++ = '[';
-			if (m->line->equiv && m->word->equiv) {
-			    *p++ = *add;
-			    c = m->line->tab[STOUC(*add)];
-			    for (i = 0; i < 256; i++)
-				if (m->word->tab[i] == c) {
-				    *p++ = (char) i;
-				    break;
-				}
-			} else {
-			    if (*add == ']' || t[STOUC(']')])
-				*p++ = ']';
-			    for (i = 0; i < 256; i++, t++)
-				if (*t && ((char) i) != *add &&
-				    i != ']' && i != '-' &&
-				    i != '^' && i != '!')
-				    *p++ = (char) i;
-			    *p++ = *add;
-			    t = m->word->tab;
-			    if (*add != '^' && t[STOUC('^')])
-				*p++ = '^';
-			    if (*add != '!' && t[STOUC('!')])
-				*p++ = '!';
-			    if (*add != '-' && t[STOUC('-')])
-				*p++ = '-';
-			}
-			*p++ = ']';
-			*p = '\0';
-		    } else {
-			*p = '?';
-			p[1] = '\0';
-		    }
-		}
-		ret = dyncat(ret, buf);
-	    }
-	    return ret;
-	}
+	if (*add)
+	    return cfp_matcher_range(ms, add);
     }
     return add;
 }
diff --git a/Src/pattern.c b/Src/pattern.c
index f7ef7774e..81e4bce8b 100644
--- a/Src/pattern.c
+++ b/Src/pattern.c
@@ -193,25 +193,6 @@ typedef union upat *Upat;
  *	     	    v      v  	  	    ^
  *	            ------------------------
  */
-#define PP_ALPHA  1
-#define PP_ALNUM  2
-#define PP_ASCII  3
-#define PP_BLANK  4
-#define PP_CNTRL  5
-#define PP_DIGIT  6
-#define PP_GRAPH  7
-#define PP_LOWER  8
-#define PP_PRINT  9
-#define PP_PUNCT  10
-#define PP_SPACE  11
-#define PP_UPPER  12
-#define PP_XDIGIT 13
-#define PP_IDENT  14
-#define PP_IFS    15
-#define PP_IFSSPACE   16
-#define PP_WORD   17
-#define PP_UNKWN  18
-#define PP_RANGE  19
 
 #define	P_OP(p)		((p)->l & 0xff)
 #define	P_NEXT(p)	((p)->l >> 8)
@@ -1057,6 +1038,127 @@ patgetglobflags(char **strp, long *assertp, int *ignore)
     return 1;
 }
 
+
+static const char *colon_stuffs[]  = {
+    "alpha", "alnum", "ascii", "blank", "cntrl", "digit", "graph", 
+    "lower", "print", "punct", "space", "upper", "xdigit", "IDENT",
+    "IFS", "IFSSPACE", "WORD", NULL
+};
+
+/*
+ * Handle the guts of a [:stuff:] character class element.
+ * start is the beginning of "stuff" and len is its length.
+ * This code is exported for the benefit of completion matching.
+ */
+
+/**/
+mod_export int
+range_type(char *start, int len)
+{
+    const char **csp;
+
+    for (csp = colon_stuffs; *csp; csp++) {
+	if (!strncmp(start, *csp, len))
+	    return (csp - colon_stuffs) + PP_FIRST;
+    }
+
+    return PP_UNKWN;
+}
+
+
+/*
+ * Convert the contents of a [...] or [^...] expression (just the
+ * ... part) back into a string.  This is used by compfiles -p/-P
+ * for some reason.  The compiled form (a metafied string) is
+ * passed in rangestr.
+ *
+ * If outstr is non-NULL the compiled form is placed there.  It
+ * must be sufficiently long.  A terminating NULL is appended.
+ *
+ * Return the length required, not including the terminating NULL.
+ *
+ * TODO: this is non-multibyte for now.  It will need to be defined
+ * appropriately with MULTIBYTE_SUPPORT when the completion matching
+ * code catches up.
+ */
+
+/**/
+mod_export int
+pattern_range_to_string(char *rangestr, char *outstr)
+{
+    int len = 0;
+
+    while (*rangestr) {
+	if (imeta(STOUC(*rangestr))) {
+	    int swtype = STOUC(*rangestr) - STOUC(Meta);
+
+	    if (swtype == 0) {
+		/* Ordindary metafied character */
+		if (outstr)
+		{
+		    *outstr++ = Meta;
+		    *outstr++ = rangestr[1] ^ 32;
+		}
+		len += 2;
+		rangestr += 2;
+	    } else if (swtype == PP_RANGE) {
+		/* X-Y range */
+		int i;
+
+		for (i = 0; i < 2; i++) {
+		    if (*rangestr == Meta) {
+			if (outstr) {
+			    *outstr++ = Meta;
+			    *outstr++ = rangestr[1];
+			}
+			len += 2;
+			rangestr += 2;
+		    } else {
+			if (outstr)
+			    *outstr++ = *rangestr;
+			len++;
+			rangestr++;
+		    }
+
+		    if (i == 0) {
+			if (outstr)
+			    *outstr++ = '-';
+			len++;
+		    }
+		}
+	    } else if (swtype >= PP_FIRST && swtype <= PP_LAST) {
+		/* [:stuff:]; we need to output [: and :] */
+		const char *found = colon_stuffs[swtype - PP_FIRST];
+		int newlen = strlen(found);
+		if (outstr) {
+		    strcpy(outstr, "[:");
+		    outstr += 2;
+		    memcpy(outstr, found, newlen);
+		    outstr += newlen;
+		    strcpy(outstr, ":]");
+		    outstr += 2;
+		}
+		len += newlen + 4;
+		rangestr++;
+	    } else {
+		/* shouldn't happen */
+		DPUTS(1, "BUG: unknown PP_ code in pattern range");
+		rangestr++;
+	    }
+	} else {
+	    /* ordinary character, guaranteed no Meta handling needed */
+	    if (outstr)
+		*outstr++ = *rangestr;
+	    len++;
+	    rangestr++;
+	}
+    }
+
+    if (outstr)
+	*outstr = '\0';
+    return len;
+}
+
 /*
  * compile a chunk such as a literal string or a [...] followed
  * by a possible hash operator
@@ -1230,45 +1332,10 @@ patcomppiece(int *flagp)
 			/* Posix range. */
 			patparse += 2;
 			len = nptr - patparse;
-			if (!strncmp(patparse, "alpha", len))
-			    ch = PP_ALPHA;
-			else if (!strncmp(patparse, "alnum", len))
-			    ch = PP_ALNUM;
-			else if (!strncmp(patparse, "ascii", len))
-			    ch = PP_ASCII;
-			else if (!strncmp(patparse, "blank", len))
-			    ch = PP_BLANK;
-			else if (!strncmp(patparse, "cntrl", len))
-			    ch = PP_CNTRL;
-			else if (!strncmp(patparse, "digit", len))
-			    ch = PP_DIGIT;
-			else if (!strncmp(patparse, "graph", len))
-			    ch = PP_GRAPH;
-			else if (!strncmp(patparse, "lower", len))
-			    ch = PP_LOWER;
-			else if (!strncmp(patparse, "print", len))
-			    ch = PP_PRINT;
-			else if (!strncmp(patparse, "punct", len))
-			    ch = PP_PUNCT;
-			else if (!strncmp(patparse, "space", len))
-			    ch = PP_SPACE;
-			else if (!strncmp(patparse, "upper", len))
-			    ch = PP_UPPER;
-			else if (!strncmp(patparse, "xdigit", len))
-			    ch = PP_XDIGIT;
-			else if (!strncmp(patparse, "IDENT", len))
-			    ch = PP_IDENT;
-			else if (!strncmp(patparse, "IFS", len))
-			    ch = PP_IFS;
-			else if (!strncmp(patparse, "IFSSPACE", len))
-			    ch = PP_IFSSPACE;
-			else if (!strncmp(patparse, "WORD", len))
-			    ch = PP_WORD;
-			else
-			    ch = PP_UNKWN;
+			ch = range_type(patparse, len);
 			patparse = nptr + 2;
 			if (ch != PP_UNKWN)
-			    patadd(NULL, STOUC(Meta+ch), 1, PA_NOALIGN);
+			    patadd(NULL, STOUC(Meta) + ch, 1, PA_NOALIGN);
 			continue;
 		}
 		charstart = patparse;
@@ -1276,7 +1343,7 @@ patcomppiece(int *flagp)
 
 		if (*patparse == '-' && patparse[1] &&
 		    patparse[1] != Outbrack) {
-		    patadd(NULL, STOUC(Meta+PP_RANGE), 1, PA_NOALIGN);
+		    patadd(NULL, STOUC(Meta)+PP_RANGE, 1, PA_NOALIGN);
 		    if (itok(*charstart)) {
 			patadd(0, STOUC(ztokens[*charstart - Pound]), 1,
 			       PA_NOALIGN);
@@ -2369,19 +2436,19 @@ patmatch(Upat prog)
 		wchar_t cr = CHARREF(patinput, patinend);
 		char *scanop = (char *)P_OPERAND(scan);
 		if (patglobflags & GF_MULTIBYTE) {
-		    if (mb_patmatchrange(scanop, cr) ^
+		    if (mb_patmatchrange(scanop, cr, NULL, NULL) ^
 			(P_OP(scan) == P_ANYOF))
 			fail = 1;
 		    else
 			CHARINC(patinput, patinend);
-		} else if (patmatchrange(scanop, (int)cr) ^
+		} else if (patmatchrange(scanop, (int)cr, NULL, NULL) ^
 			   (P_OP(scan) == P_ANYOF))
 		    fail = 1;
 		else
 		    CHARINC(patinput, patinend);
 #else
 		if (patmatchrange((char *)P_OPERAND(scan),
-				   CHARREF(patinput, patinend)) ^
+				  CHARREF(patinput, patinend), NULL, NULL) ^
 		    (P_OP(scan) == P_ANYOF))
 		    fail = 1;
 		else
@@ -3122,12 +3189,33 @@ patmatch(Upat prog)
 /**/
 #ifdef MULTIBYTE_SUPPORT
 
+/*
+ * See if character ch matches a pattern range specification.
+ * The null-terminated specification is in range; the test
+ * character is in ch.
+ *
+ * indptr is used by completion matching, which is why this
+ * function is exported.  If indptr is not NULL we set *indptr
+ * to the index of the character in the range string, adjusted
+ * in the case of "A-B" ranges such that A would count as its
+ * normal index (say IA), B would count as IA + (B-A), and any
+ * character within the range as appropriate.  We're not strictly
+ * guaranteed this fits within a wint_t, but if this is Unicode
+ * in 32 bits we have a fair amount of distance left over.
+ *
+ * mtp is used in the same circumstances.  *mtp returns the match type:
+ * 0 for a standard character, else the PP_ index.  It's not
+ * useful if the match failed.
+ */
+
 /**/
-static int
-mb_patmatchrange(char *range, wchar_t ch)
+mod_export int
+mb_patmatchrange(char *range, wchar_t ch, wint_t *indptr, int *mtp)
 {
     wchar_t r1, r2;
 
+    if (indptr)
+	*indptr = 0;
     /*
      * Careful here: unlike other strings, range is a NULL-terminated,
      * metafied string, because we need to treat the Posix and hyphenated
@@ -3135,7 +3223,10 @@ mb_patmatchrange(char *range, wchar_t ch)
      */
     while (*range) {
 	if (imeta(STOUC(*range))) {
-	    switch (STOUC(*range++) - STOUC(Meta)) {
+	    int swtype = STOUC(*range++) - STOUC(Meta);
+	    if (mtp)
+		*mtp = swtype;
+	    switch (swtype) {
 	    case 0:
 		/* ordinary metafied character */
 		range--;
@@ -3214,8 +3305,19 @@ mb_patmatchrange(char *range, wchar_t ch)
 	    case PP_RANGE:
 		r1 = metacharinc(&range);
 		r2 = metacharinc(&range);
-		if (r1 <= ch && ch <= r2)
+		if (r1 <= ch && ch <= r2) {
+		    if (indptr)
+			*indptr += ch - r1;
 		    return 1;
+		}
+		/* Careful not to screw up counting with bogus range */
+		if (indptr && r1 < r2) {
+		    /*
+		     * This gets incremented again below to get
+		     * us past the range end.  This is correct.
+		     */
+		    *indptr += r2 - r1;
+		}
 		break;
 	    case PP_UNKWN:
 		DPUTS(1, "BUG: unknown posix range passed through.\n");
@@ -3224,21 +3326,130 @@ mb_patmatchrange(char *range, wchar_t ch)
 		DPUTS(1, "BUG: unknown metacharacter in range.");
 		break;
 	    }
-	} else if (metacharinc(&range) == ch)
+	} else if (metacharinc(&range) == ch) {
+	    if (mtp)
+		*mtp = 0;
 	    return 1;
+	}
+	if (indptr)
+	    (*indptr)++;
     }
     return 0;
 }
 
+
+#if 0
+/*
+ * This is effectively the reverse of mb_patmatchrange().
+ * Given a range descriptor of the same form, and an index into it,
+ * try to determine the character that is matched.  If the index
+ * points to a [:...:] generic style match, set chr to WEOF and
+ * return the type in mtp instead.  Return 1 if successful, 0 if
+ * there was no corresponding index.  Note all pointer arguments
+ * must be non-null.
+ *
+ * TODO: for now the completion matching code does not handle
+ * multibyte.  When it does, we will need either this, or
+ * patmatchindex(), but not both---unlike user-initiated pattern
+ * matching, multibyte mode in the line editor is always on when available.
+ */
+
 /**/
+mod_export int
+mb_patmatchindex(char *range, wint_t ind, wint_t *chr, int *mtp)
+{
+    wchar_t r1, r2, rchr;
+    wint_t rdiff;
+
+    *chr = WEOF;
+    *mtp = 0;
+
+    while (*range) {
+	if (imeta(STOUC(*range))) {
+	    int swtype = STOUC(*range++) - STOUC(Meta);
+	    switch (swtype) {
+	    case 0:
+		range--;
+		rchr = metacharinc(&range);
+		if (!ind) {
+		    *chr = (wint_t) rchr;
+		    return 1;
+		}
+		break;
+
+	    case PP_ALPHA:
+	    case PP_ALNUM:
+	    case PP_ASCII:
+	    case PP_BLANK:
+	    case PP_CNTRL:
+	    case PP_DIGIT:
+	    case PP_GRAPH:
+	    case PP_LOWER:
+	    case PP_PRINT:
+	    case PP_PUNCT:
+	    case PP_SPACE:
+	    case PP_UPPER:
+	    case PP_XDIGIT:
+	    case PP_IDENT:
+	    case PP_IFS:
+	    case PP_IFSSPACE:
+	    case PP_WORD:
+		if (!ind) {
+		    *mtp = swtype;
+		    return 1;
+		}
+		break;
+
+	    case PP_RANGE:
+		r1 = metacharinc(&range);
+		r2 = metacharinc(&range);
+		rdiff = (wint_t)r2 - (wint_t)r1; 
+		if (rdiff >= ind) {
+		    *chr = (wint_t)r1 + ind;
+		    return 1;
+		}
+		/* note the extra decrement to ind below */
+		ind -= rdiff;
+		break;
+	    case PP_UNKWN:
+		DPUTS(1, "BUG: unknown posix range passed through.\n");
+		break;
+	    default:
+		DPUTS(1, "BUG: unknown metacharacter in range.");
+		break;
+	    }
+	} else {
+	    rchr = metacharinc(&range);
+	    if (!ind) {
+		*chr = (wint_t)rchr;
+		return 1;
+	    }
+	}
+	if (!ind--)
+	    break;
+    }
+
+    /* No corresponding index. */
+    return 0;
+}
 #endif
 
 /**/
-static int
-patmatchrange(char *range, int ch)
+#endif
+
+/*
+ * Identical function to mb_patmatchrange() above for single-byte
+ * characters.
+ */
+
+/**/
+mod_export int
+patmatchrange(char *range, int ch, int *indptr, int *mtp)
 {
     int r1, r2;
 
+    if (indptr)
+	*indptr = 0;
     /*
      * Careful here: unlike other strings, range is a NULL-terminated,
      * metafied string, because we need to treat the Posix and hyphenated
@@ -3246,7 +3457,10 @@ patmatchrange(char *range, int ch)
      */
     for (; *range; range++) {
 	if (imeta(STOUC(*range))) {
-	    switch (STOUC(*range)-STOUC(Meta)) {
+	    int swtype = STOUC(*range) - STOUC(Meta);
+	    if (mtp)
+		*mtp = swtype;
+	    switch (swtype) {
 	    case 0:
 		if (STOUC(*++range ^ 32) == ch)
 		    return 1;
@@ -3326,8 +3540,13 @@ patmatchrange(char *range, int ch)
 		r2 = STOUC(UNMETA(range));
 		if (*range == Meta)
 		    range++;
-		if (r1 <= ch && ch <= r2)
+		if (r1 <= ch && ch <= r2) {
+		    if (indptr)
+			*indptr += ch - r1;
 		    return 1;
+		}
+		if (indptr && r1 < r2)
+		    *indptr += r2 - r1;
 		break;
 	    case PP_UNKWN:
 		DPUTS(1, "BUG: unknown posix range passed through.\n");
@@ -3336,9 +3555,100 @@ patmatchrange(char *range, int ch)
 		DPUTS(1, "BUG: unknown metacharacter in range.");
 		break;
 	    }
-	} else if (STOUC(*range) == ch)
+	} else if (STOUC(*range) == ch) {
+	    if (mtp)
+		*mtp = 0;
 	    return 1;
+	}
+	if (indptr)
+	    (*indptr)++;
+    }
+    return 0;
+}
+
+/*
+ * Identical function to mb_patmatchindex() above for single-byte
+ * characters.  Here -1 represents a character that needs a special type.
+ */
+
+/**/
+mod_export int
+patmatchindex(char *range, int ind, int *chr, int *mtp)
+{
+    int r1, r2, rdiff, rchr;
+
+    *chr = -1;
+    *mtp = 0;
+
+    for (; *range; range++) {
+	if (imeta(STOUC(*range))) {
+	    int swtype = STOUC(*range) - STOUC(Meta);
+	    switch (swtype) {
+	    case 0:
+		/* ordinary metafied character */
+		rchr = STOUC(*++range) ^ 32;
+		if (!ind) {
+		    *chr = rchr;
+		    return 1;
+		}
+		break;
+
+	    case PP_ALPHA:
+	    case PP_ALNUM:
+	    case PP_ASCII:
+	    case PP_BLANK:
+	    case PP_CNTRL:
+	    case PP_DIGIT:
+	    case PP_GRAPH:
+	    case PP_LOWER:
+	    case PP_PRINT:
+	    case PP_PUNCT:
+	    case PP_SPACE:
+	    case PP_UPPER:
+	    case PP_XDIGIT:
+	    case PP_IDENT:
+	    case PP_IFS:
+	    case PP_IFSSPACE:
+	    case PP_WORD:
+		if (!ind) {
+		    *mtp = swtype;
+		    return 1;
+		}
+		break;
+
+	    case PP_RANGE:
+		range++;
+		r1 = STOUC(UNMETA(range));
+		METACHARINC(range);
+		r2 = STOUC(UNMETA(range));
+		if (*range == Meta)
+		    range++;
+		rdiff = r2 - r1; 
+		if (rdiff >= ind) {
+		    *chr = r1 + ind;
+		    return 1;
+		}
+		/* note the extra decrement to ind below */
+		ind -= rdiff;
+		break;
+	    case PP_UNKWN:
+		DPUTS(1, "BUG: unknown posix range passed through.\n");
+		break;
+	    default:
+		DPUTS(1, "BUG: unknown metacharacter in range.");
+		break;
+	    }
+	} else {
+	    if (!ind) {
+		*chr = STOUC(*range);
+		return 1;
+	    }
+	}
+	if (!ind--)
+	    break;
     }
+
+    /* No corresponding index. */
     return 0;
 }
 
@@ -3382,14 +3692,14 @@ static int patrepeat(Upat p, char *charstart)
 #ifdef MULTIBYTE_SUPPORT
 	    wchar_t cr = CHARREF(scan, patinend);
 	    if (patglobflags & GF_MULTIBYTE) {
-		if (mb_patmatchrange(opnd, cr) ^
+		if (mb_patmatchrange(opnd, cr, NULL, NULL) ^
 		    (P_OP(p) == P_ANYOF))
 		    break;
-	    } else if (patmatchrange(opnd, (int)cr) ^
+	    } else if (patmatchrange(opnd, (int)cr, NULL, NULL) ^
 		       (P_OP(p) == P_ANYOF))
 		break;
 #else
-	    if (patmatchrange(opnd, CHARREF(scan, patinend)) ^
+	    if (patmatchrange(opnd, CHARREF(scan, patinend), NULL, NULL) ^
 		(P_OP(p) == P_ANYOF))
 		break;
 #endif
diff --git a/Src/zsh.h b/Src/zsh.h
index e40761199..b5de54bc9 100644
--- a/Src/zsh.h
+++ b/Src/zsh.h
@@ -1307,6 +1307,48 @@ struct patprog {
 #define PAT_HAS_EXCLUDP	0x0800	/* (internal): top-level path1~path2. */
 #define PAT_LCMATCHUC   0x1000  /* equivalent to setting (#l) */
 
+/*
+ * Special match types used in character classes.  These
+ * are represented as tokens, with Meta added.  The character
+ * class is represented as a metafied string, with only these
+ * tokens special.  Note that an active leading "!" or "^" for
+ * negation is not part of the string but is flagged in the
+ * surrounding context.
+ *
+ * These types are also used in character and equivalence classes
+ * in completion matching.
+ *
+ * This must be kept ordered by the array colon_stuffs in pattern.c.
+ */
+/* Special value for first definition */
+#define PP_FIRST  1
+/* POSIX-defined types:  [:alpha:] etc. */
+#define PP_ALPHA  1
+#define PP_ALNUM  2
+#define PP_ASCII  3
+#define PP_BLANK  4
+#define PP_CNTRL  5
+#define PP_DIGIT  6
+#define PP_GRAPH  7
+#define PP_LOWER  8
+#define PP_PRINT  9
+#define PP_PUNCT  10
+#define PP_SPACE  11
+#define PP_UPPER  12
+#define PP_XDIGIT 13
+/* Zsh additions:  [:IDENT:] etc. */
+#define PP_IDENT  14
+#define PP_IFS    15
+#define PP_IFSSPACE   16
+#define PP_WORD   17
+/* Special value for last definition */
+#define PP_LAST   17
+
+/* Unknown type.  Not used in a valid token. */
+#define PP_UNKWN  18
+/* Range: token followed by the (possibly multibyte) start and end */
+#define PP_RANGE  19
+
 /* Globbing flags: lower 8 bits gives approx count */
 #define GF_LCMATCHUC	0x0100
 #define GF_IGNCASE	0x0200
author	Peter Stephenson <pws@users.sourceforge.net>	2008-06-08 17:53:53 +0000
committer	Peter Stephenson <pws@users.sourceforge.net>	2008-06-08 17:53:53 +0000
commit	bb68ee8db7971b683fba7dd7bf404186872ba7cf (patch)
tree	ce1f5ebe661c12386675eb3ca98521280d521a0b /Src
parent	2dcc8627c9f4be547e6e49c76d45bea70c714a71 (diff)
download	zsh-bb68ee8db7971b683fba7dd7bf404186872ba7cf.tar.gz zsh-bb68ee8db7971b683fba7dd7bf404186872ba7cf.tar.xz zsh-bb68ee8db7971b683fba7dd7bf404186872ba7cf.zip