about summary refs log tree commit diff
diff options
context:
space:
mode:
authorTanaka Akira <akr@users.sourceforge.net>1999-09-01 14:15:12 +0000
committerTanaka Akira <akr@users.sourceforge.net>1999-09-01 14:15:12 +0000
commit2d4fc9a653546ea9bfee9fe22cc2a79da1b3c1f8 (patch)
treedbb6316942e937c80d7bafa3be6e88226600fe80
parentafea70b88ee7918b0d2e9648d5be4c6982f29777 (diff)
downloadzsh-2d4fc9a653546ea9bfee9fe22cc2a79da1b3c1f8.tar.gz
zsh-2d4fc9a653546ea9bfee9fe22cc2a79da1b3c1f8.tar.xz
zsh-2d4fc9a653546ea9bfee9fe22cc2a79da1b3c1f8.zip
zsh-workers/7611
-rw-r--r--Doc/Zsh/expn.yo38
-rwxr-xr-xMisc/globtests2
-rw-r--r--Src/parse.c6
-rw-r--r--Src/pattern.c698
4 files changed, 413 insertions, 331 deletions
diff --git a/Doc/Zsh/expn.yo b/Doc/Zsh/expn.yo
index f78c959bb..ebb95a833 100644
--- a/Doc/Zsh/expn.yo
+++ b/Doc/Zsh/expn.yo
@@ -1089,13 +1089,16 @@ not in the given set.
 item(tt(<)[var(x)]tt(-)[var(y)]tt(>))(
 Matches any number in the range var(x) to var(y), inclusive.
 Either of the numbers may be omitted to make the range open-ended;
-hence `tt(<->)' matches any number.
+hence `tt(<->)' matches any number.  To match individual digits, the
+tt([)...tt(]) form is more efficient.
 )
 item(tt(LPAR())...tt(RPAR()))(
 Matches the enclosed pattern.  This is used for grouping.
 If the tt(KSH_GLOB) option is set, then a
 `tt(@)', `tt(*)', `tt(+)', `tt(?)' or `tt(!)' immediately preceding
-the `tt(LPAR())' is treated specially, as detailed below.
+the `tt(LPAR())' is treated specially, as detailed below. The option
+tt(SH_GLOB) prevents bare parentheses from being used in this way, though
+the tt(KSH_GLOB) option is still available.
 Note that grouping cannot currently extend over multiple directories:
 a `tt(/)' separating a directory terminates processing of the current
 group; processing resumes after the end of the group.
@@ -1120,7 +1123,7 @@ This has lower precedence than any operator except `tt(|)', so
 `tt(*/*~foo/bar)' will search for all files in all directories in `tt(.)'
 and then exclude `tt(foo/bar)' if there was such a match.
 Multiple patterns can be excluded by
-`var(foo)tt(~LPAR())var(bar)tt(|)var(baz)tt(RPAR())'.
+`var(foo)tt(~)var(bar)tt(~)var(baz)'.
 In the exclusion pattern (var(y)), `tt(/)' and `tt(.)' are not treated
 specially the way they usually are in globbing.
 )
@@ -1128,13 +1131,19 @@ item(var(x)tt(#))(
 (Requires tt(EXTENDED_GLOB) to be set.)
 Matches zero or more occurrences of the pattern var(x).
 This operator has high precedence; `tt(12#)' is equivalent to `tt(1(2#))',
-rather than `tt((12)#)'.
+rather than `tt((12)#)'.  It is an error for an unquoted `tt(#)' to follow
+something which cannot be repeated; this includes an empty string, a
+pattern already followed by `tt(##)', or parentheses when part of a
+tt(KSH_GLOB) pattern (for example, `tt(!LPAR())var(foo)tt(RPAR()#)' is
+invalid and must be replaced by
+`tt(*LPAR()!LPAR())var(foo)tt(RPAR()RPAR())').
 )
 item(var(x)tt(##))(
 (Requires tt(EXTENDED_GLOB) to be set.)
 Matches one or more occurrences of the pattern var(x).
 This operator has high precedence; `tt(12##)' is equivalent to `tt(1(2##))',
-rather than `tt((12)##)'.
+rather than `tt((12)##)'.  No more than two active `tt(#)' characters may
+appear together.
 )
 enditem()
 subsect(ksh-like Glob Operators)
@@ -1162,6 +1171,20 @@ Match anything but the expression in parentheses.
 (Like `tt(LPAR()^LPAR())...tt(RPAR()RPAR())'.)
 )
 enditem()
+subsect(Precedence)
+cindex(precedence of glob operators)
+The precedence of the operators given above is (highest) `tt(^)', `tt(/)',
+`tt(~)', `tt(|)' (lowest); the
+remaining operators are simply treated from left to right as part of a
+string, with `tt(#)' and `tt(##)' applying to the shortest possible
+preceeding unit (i.e. a character, `tt(?)', `tt([)...tt(])',
+`tt(<)...tt(>)', or a parenthesised expression).  As mentioned
+above, a `tt(/)' used as a directory separator may not appear inside
+parentheses, while a `tt(|)' must do so; in patterns used in other contexts
+than filename generation (for example, in tt(case) statements and tests
+within `tt([[)...tt(]])'), a `tt(/)' is not special; and `tt(/)' is also
+not special after a `tt(~)' appearing outside parentheses in a filename
+pattern.
 subsect(Globbing Flags)
 There are various flags which affect any text to their right up to the
 end of the enclosing group or to the end of the pattern; they require
@@ -1258,7 +1281,10 @@ tt(LPAR()#a1)tt(RPAR()cat)tt(LPAR()LPAR()#a0)tt(RPAR()dog)tt(RPAR()fox)
 allows one error in total, which may not occur in the tt(dog) section, and
 the pattern
 tt(LPAR()#a1)tt(RPAR()cat)tt(LPAR()#a0)tt(RPAR()dog)tt(LPAR()#a1)tt(RPAR()fox)
-is equivalent.
+is equivalent.  Note that the point at which an error is first found is the
+crucial one for establishing whether to use approximation; for example,
+tt((#a1)abc(#a0)xyz) will not match tt(abcdxyz), because the error occurs
+at the `tt(x)', where approximation is turned off.
 
 subsect(Recursive Globbing)
 A pathname component of the form `tt(LPAR())var(foo)tt(/RPAR()#)'
diff --git a/Misc/globtests b/Misc/globtests
index b2fe8d8ee..a0f8184ec 100755
--- a/Misc/globtests
+++ b/Misc/globtests
@@ -16,6 +16,8 @@ while read res str pat; do
 done <<EOT
 # a few simple things certain nameless idiots have been known to mess up
 t foo~                foo~
+t foo~                (foo~)
+t foo~                (foo~|)
 t foo.c               *.c~boo*
 f foo.c               *.c~boo*~foo*
 # closures
diff --git a/Src/parse.c b/Src/parse.c
index 658a66660..d0a9bc45f 100644
--- a/Src/parse.c
+++ b/Src/parse.c
@@ -623,6 +623,12 @@ par_case(Cmd c)
 		    }
 		    if (*s || pct || s == str + 1)
 			YYERRORV;
+		    /* Simplify pattern by removing surrounding (...) */
+		    sl = strlen(str);
+		    DPUTS(str[1] != Inpar || str[sl-1] != Outpar,
+			  "BUG: strange case pattern");
+		    str[sl-1] = '\0';
+		    chuck(str+1);
 		    break;
 		} else {
 		    char *str2;
diff --git a/Src/pattern.c b/Src/pattern.c
index 048e3d3ec..bfc9102a5 100644
--- a/Src/pattern.c
+++ b/Src/pattern.c
@@ -1,5 +1,5 @@
 /*
- * glob.c - filename generation
+ * pattern.c - pattern matching
  *
  * This file is part of zsh, the Z shell.
  *
@@ -50,6 +50,24 @@
  */
 
 #include "zsh.mdh"
+
+/*
+ * The following union is used mostly for alignment purposes.
+ * Normal nodes are longs, while certain nodes take a char * as an argument;
+ * here we make sure that they both work out to the same length.
+ * The compiled regexp we construct consists of upats stuck together;
+ * anything else to be added (strings, numbers) is stuck after and
+ * then aligned to a whole number of upat units.
+ *
+ * Note also that offsets are in terms of the sizes of these things.
+ */
+union upat {
+    long l;
+    unsigned char *p;
+};
+
+typedef union upat *Upat;
+
 #include "pattern.pro"
 
 /*
@@ -154,36 +172,17 @@
 #define PP_UNKWN  13
 #define PP_RANGE  14
 
-/* Align everything to the pointer type. */
-typedef char *zalign_t;
-
-#define	P_OP(p)		(*(long *)(p) & 0xff)
-#define	P_NEXT(p)	(*(long *)(p) >> 8)
-#define	P_OPERAND(p)	((p) + sizeof(zalign_t))
-#define P_ISBRANCH(p)   (*(long *)(p) & 0x20)
-#define P_ISEXCLUDE(p)	((*(long *)(p) & 0x30) == 0x30)
-#define P_NOTDOT(p)	(*(long *)(p) & 0x40)
+#define	P_OP(p)		((p)->l & 0xff)
+#define	P_NEXT(p)	((p)->l >> 8)
+#define	P_OPERAND(p)	((p) + 1)
+#define P_ISBRANCH(p)   ((p)->l & 0x20)
+#define P_ISEXCLUDE(p)	(((p)->l & 0x30) == 0x30)
+#define P_NOTDOT(p)	((p)->l & 0x40)
 
 #define P_SIMPLE        0x01	/* Simple enough to be #/## operand. */
 #define P_HSTART        0x02	/* Starts with # or ##'d pattern. */
 #define P_PURESTR	0x04	/* Can be matched with a strcmp */
 
-/*
- * pointer is end string to be parsed?
- * a bit dire because of extendedglob possibilities:
- * we need to make sure a ~ at the end of a string isn't mistaken
- * for an excluder or lots of emacs users get very cross.
- */
-#define ISENDCHAR(X) (!*(X) || ((patflags & PAT_FILE) && *(X) == '/') || \
-		      *(X) == Bar || *(X) == Outpar || \
-		      (isset(EXTENDEDGLOB) && \
-		       (*(X) == Hat || \
-			(*(X) == Tilde && \
-			 !(!(X)[1] || ((patflags & PAT_FILE) && \
-				       (X)[1] == '/') || \
-			   (X)[1] == Bar || (X)[1] == Outpar || \
-			   (X)[1] == Tilde)))))
-
 /* Next character after one which may be a Meta (x is any char *) */
 #define METANEXT(x)	(*(x) == Meta ? (x)+2 : (x)+1)
 /*
@@ -202,6 +201,29 @@ typedef zlong zrange_t;
 typedef unsigned long zrange_t;
 #endif
 
+/*
+ * Characters which terminate a pattern segment.  We actually use
+ * a pointer patendseg which skips the first character if we are not
+ * parsing a file pattern.
+ * Note that the size of this and the next array are hard-wired into
+ * patcompile.
+ */
+
+static char endseg[] = {
+    '/',			/* file only */
+    '\0', Bar, Outpar,		/* all patterns */
+    Tilde			/* extended glob only */
+};
+
+/* Characters which terminate a simple string */
+
+static char endstr[] = {
+    '/',			/* file only */
+    '\0', Bar, Outpar, Quest, Star, Inbrack, Inpar, Inang,
+				/* all patterns */
+    Tilde, Hat, Pound		/* extended glob only */
+};
+
 /* Default size for pattern buffer */
 #define P_DEF_ALLOC 256
 
@@ -212,6 +234,10 @@ static char *patcode;		/* point of code emission */
 static long patsize;		/* size of code */
 static char *patout;		/* start of code emission string */
 static long patalloc;		/* size allocated for same */
+static char *patendseg;		/* characters ending segment */
+static int patendseglen;	/* length of same */
+static char *patendstr;		/* characters ending plain string */
+static int patendstrlen;	/* length of sameo */
 
 /* Flags used in both compilation and execution */
 static int patflags;		    /* flags passed down to patcompile */
@@ -226,8 +252,8 @@ patadd(char *add, int ch, long n, int noalgn)
     /* Make sure everything gets aligned unless we get noalgn. */
     long newpatsize = patsize + n;
     if (!noalgn)
-	newpatsize = (newpatsize + sizeof(zalign_t) - 1) &
-		      ~(sizeof(zalign_t) - 1);
+	newpatsize = (newpatsize + sizeof(union upat) - 1) &
+		      ~(sizeof(union upat) - 1);
     if (patalloc < newpatsize) {
 	long newpatalloc =
 	    2*(newpatsize > patalloc ? newpatsize : patalloc);
@@ -245,7 +271,7 @@ patadd(char *add, int ch, long n, int noalgn)
 }
 
 static long rn_offs;
-/* operates on poiners, returns a pointer */
+/* operates on pointers to union upat, returns a pointer */
 #define PATNEXT(p) ((rn_offs = P_NEXT(p)) ? \
 		    (P_OP(p) == P_BACK) ? \
 		    ((p)-rn_offs) : ((p)+rn_offs) : NULL)
@@ -267,11 +293,10 @@ patcompile(char *exp, int inflags, char **endexp)
 {
     int flags, len;
     long startoff;
-    char *pscan, *lng;
+    Upat pscan;
+    char *lng;
     Patprog p;
 
-    DPUTS(sizeof(long) > sizeof(zalign_t), "BUG: patprog alignment too small");
-
 #ifdef BACKREFERENCES
     startoff = (inflags & PAT_BACKR) ? sizeof(struct patprog) :
 	sizeof(struct patprog_short);
@@ -279,7 +304,7 @@ patcompile(char *exp, int inflags, char **endexp)
     startoff = sizeof(struct patprog);
 #endif
     /* Ensure alignment of start of program string */
-    startoff = (startoff + sizeof(zalign_t) - 1) & ~(sizeof(zalign_t) - 1);
+    startoff = (startoff + sizeof(union upat) - 1) & ~(sizeof(union upat) - 1);
 
     /* Allocate reasonable sized chunk if none, reduce size if too big */
     if (patalloc != P_DEF_ALLOC)
@@ -290,7 +315,16 @@ patcompile(char *exp, int inflags, char **endexp)
     patnpar = 1;
     patflags = inflags;
 
+    patendseg = endseg;
+    patendseglen = isset(EXTENDEDGLOB) ? 5 : 4;
+    patendstr = endstr;
+    patendstrlen = isset(EXTENDEDGLOB) ? 12 : 9;
+
     if (!(patflags & PAT_FILE)) {
+	patendseg++;
+	patendstr++;
+	patendseglen--;
+	patendstrlen--;
 	remnulargs(exp);
 	patglobflags = 0;
     }
@@ -313,7 +347,7 @@ patcompile(char *exp, int inflags, char **endexp)
     p->mustoff = 0;
     p->size = patsize;
     p->patmlen = 0;
-    pscan = patout + startoff;
+    pscan = (Upat)(patout + startoff);
 
     if (!(patflags & PAT_ANY) && P_OP(PATNEXT(pscan)) == P_END) {
 	/* only one top level choice */
@@ -324,24 +358,25 @@ patcompile(char *exp, int inflags, char **endexp)
 	     * The pattern can be matched with a simple strncmp/strcmp.
 	     * Careful in case we've overwritten the node for the next ptr.
 	     */
-	    char *dst = patout + startoff, *next;
+	    char *dst = patout + startoff;
+	    Upat next;
 	    p->flags |= PAT_PURES;
 	    for (; pscan; pscan = next) {
 		next = PATNEXT(pscan);
 		if (P_OP(pscan) == P_EXACTLY) {
-		    char *opnd = P_OPERAND(pscan);
+		    char *opnd = (char *)P_OPERAND(pscan);
 		    while ((*dst = *opnd++))
 			dst++;
 		}
 	    }
 	    *dst++ = '\0';
 	    p->size = dst - patout;
-	    /* patmlen is reall strlen, don't include null byte */
+	    /* patmlen is really strlen, don't include null byte */
 	    p->patmlen = p->size - startoff - 1;
 	} else {
 	    /* starting point info */
 	    if (P_OP(pscan) == P_EXACTLY && !p->globflags)
-		p->patstartch = *P_OPERAND(pscan);
+		p->patstartch = *(char *)P_OPERAND(pscan);
 	    /* Find the longest literal string in something expensive.
 	     * This is itself not all that cheap if we have case-insensitive
 	     * matching or approximation, so don't.
@@ -352,8 +387,8 @@ patcompile(char *exp, int inflags, char **endexp)
 		for (; pscan; pscan = PATNEXT(pscan))
 		    if (P_OP(pscan) == P_EXACTLY &&
 			strlen((char *)P_OPERAND(pscan)) >= len) {
-			lng = P_OPERAND(pscan);
-			len = strlen((char *)P_OPERAND(pscan));
+			lng = (char *)P_OPERAND(pscan);
+			len = strlen(lng);
 		    }
 		if (lng) {
 		    p->mustoff = lng - patout;
@@ -393,7 +428,7 @@ patcompswitch(int paren, int *flagp)
     int parno = 0;
 #endif
     int flags, gfchanged = 0, savflags = patflags, savglobflags = patglobflags;
-    char *ptr;
+    Upat ptr;
 
     *flagp = 0;
 
@@ -424,17 +459,15 @@ patcompswitch(int paren, int *flagp)
     *flagp |= flags & (P_HSTART|P_PURESTR);
 
     while (*patparse == Bar ||
-	   (isset(EXTENDEDGLOB) &&
-	    *patparse == Tilde && patparse[1] && patparse[1] != Bar &&
-	    patparse[1] != Outpar && patparse[1] != Tilde &&
-	    !((patflags & PAT_FILE) && patparse[1] == '/'))) {
+	   (isset(EXTENDEDGLOB) && *patparse == Tilde &&
+	    !memchr(patendseg, patparse[1], patendseglen))) {
 	int tilde = *patparse++ == Tilde;
 	long gfnode = 0, newbr;
 
 	*flagp &= ~P_PURESTR;
 
 	if (tilde) {
-	    unsigned char *unull = NULL;
+	    union upat up;
 	    /* excsync remembers the P_EXCSYNC node before a chain of
 	     * exclusions:  all point back to this.  only the
 	     * original (non-excluded) branch gets a trailing P_EXCSYNC.
@@ -455,7 +488,8 @@ patcompswitch(int paren, int *flagp)
 	    patglobflags &= ~0xff;
 	    br = patnode(!(patflags & PAT_FILET) || paren ?
 			 P_EXCLUDE : P_EXCLUDP);
-	    patadd((char *)&unull, 0, sizeof(unull), 0);
+	    up.p = NULL;
+	    patadd((char *)&up, 0, sizeof(up), 0);
 	    /* / is not treated as special if we are at top level */
 	    if (!paren)
 		patflags &= ~PAT_FILE;
@@ -485,9 +519,10 @@ patcompswitch(int paren, int *flagp)
 		     * No gfchanged, as nothing to follow branch at top
 		     * level. 
 		     */
+		    union upat up;
 		    gfnode = patnode(P_GFLAGS);
-		    patadd((char *)&patglobflags, 0, sizeof(long),
-			   0);
+		    up.l = patglobflags;
+		    patadd((char *)&up, 0, sizeof(union upat), 0);
 		}
 	    } else {
 		patglobflags = savglobflags;
@@ -525,9 +560,9 @@ patcompswitch(int paren, int *flagp)
      * Hook the tails of the branches to the closing node,
      * except for exclusions which terminate where they are.
      */
-    for (ptr = patout + starter; ptr; ptr = PATNEXT(ptr))
+    for (ptr = (Upat)patout + starter; ptr; ptr = PATNEXT(ptr))
 	if (!P_ISEXCLUDE(ptr))
-	    patoptail(ptr-patout, ender);
+	    patoptail(ptr-(Upat)patout, ender);
 
     /* check for proper termination */
     if ((paren && *patparse++ != Outpar) ||
@@ -566,12 +601,9 @@ patcompbranch(int *flagp)
     *flagp = P_PURESTR;
 
     starter = chain = 0;
-    while (*patparse && !((patflags & PAT_FILE) && *patparse == '/') &&
-	   *patparse != Bar && *patparse != Outpar &&
-	   (!isset(EXTENDEDGLOB) || *patparse != Tilde ||
-	    !patparse[1] || patparse[1] == Bar || patparse[1] == Outpar
-	    || patparse[1] == Tilde ||
-	    ((patflags & PAT_FILE) && patparse[1] == '/'))) {
+    while (!memchr(patendseg, *patparse, patendseglen) &&
+	   (*patparse != Tilde ||
+	    memchr(patendseg, patparse[1], patendseglen))) {
 	if (isset(EXTENDEDGLOB) &&
 	    ((!isset(SHGLOB) &&
 	      (*patparse == Inpar && patparse[1] == Pound)) ||
@@ -601,8 +633,10 @@ patcompbranch(int *flagp)
 	     */
 	    if (oldglobflags != patglobflags) {
 		/* Flags changed */
+		union upat up;
 		latest = patnode(P_GFLAGS);
-		patadd((char *)&patglobflags, 0, sizeof(int), 0);
+		up.l = patglobflags;
+		patadd((char *)&up, 0, sizeof(union upat), 0);
 	    } else {
 		/* No effect. */
 		continue;
@@ -696,156 +730,104 @@ static long
 patcomppiece(int *flagp)
 {
     long starter, next, pound, op;
-    int flags, kshchar;
-    unsigned char *ptr;
-
-    starter = patcompatom(&flags, &kshchar);
-    if (!starter)
-	return 0;
-
-    if (!(pound = (*patparse == Pound && isset(EXTENDEDGLOB))) &&
-	(kshchar <= 0 || kshchar == '@' || kshchar == '!')) {
-	*flagp = flags;
-	return starter;
-    }
-
-    /* too much at once doesn't currently work */
-    if (kshchar && pound)
-	return 0;
-
-    if (kshchar == '*') {
-	op = P_ONEHASH;
-	*flagp = P_HSTART;
-    } else if (kshchar == '+') {
-	op = P_TWOHASH;
-	*flagp = P_HSTART;
-    } else if (kshchar == '?') {
-	op = 0;
-	*flagp = 0;
-    } else if (*++patparse == Pound) {
-	op = P_TWOHASH;
-	patparse++;
-	*flagp = P_HSTART;
-    } else {
-	op = P_ONEHASH;
-	*flagp = P_HSTART;
-    }
-
-    /*
-     * Note optimizations with pointers into P_NOTHING branches:  some
-     * should logically point to next node after current piece.
-     *
-     * Backtracking is also encoded in a slightly obscure way:  the
-     * code emitted ensures we test the non-empty branch of complex
-     * patterns before the empty branch on each repetition.  Hence
-     * each time we fail on a non-empty branch, we try the empty branch,
-     * which is equivalent to backtracking.
-     */
-    if ((flags & P_SIMPLE) && op == P_ONEHASH &&
-	P_OP(patout+starter) == P_ANY) {
-	/* Optimize ?# to *.  Silly thing to do, since who would use
-	 * use ?# ? But it makes the later code shorter.
-	 */
-	long *lptr = (long *)(patout + starter);
-	*lptr = (*lptr & ~0xff) | P_STAR;
-    } else if ((flags & P_SIMPLE) && op && !(patglobflags & 0xff)) {
-	/* Don't simplify if we need to look for approximations. */
-	patinsert(op, starter, NULL, 0);
-    } else if (op == P_ONEHASH) {
-	/* Emit x# as (x&|), where & means "self". */
-	ptr = NULL;
-	patinsert(P_WBRANCH, starter, (char *)&ptr, sizeof(ptr));
-	                                      /* Either x */
-	patoptail(starter, patnode(P_BACK));  /* and loop */
-	patoptail(starter, starter);	      /* back */
-	pattail(starter, patnode(P_BRANCH));  /* or */
-	pattail(starter, patnode(P_NOTHING)); /* null. */
-    } else if (op == P_TWOHASH) {
-	/* Emit x## as x(&|) where & means "self". */
-	next = patnode(P_WBRANCH);	      /* Either */
-	ptr = NULL;
-	patadd((char *)&ptr , 0, sizeof(ptr), 0);
-	pattail(starter, next);
-	pattail(patnode(P_BACK), starter);    /* loop back */
-	pattail(next, patnode(P_BRANCH));     /* or */
-	pattail(starter, patnode(P_NOTHING)); /* null. */
-    } else if (kshchar == '?') {
-	/* Emit ?(x) as (x|) */
-	patinsert(P_BRANCH, starter, NULL, 0); /* Either x */
-	pattail(starter, patnode(P_BRANCH));   /* or */
-	next = patnode(P_NOTHING);	       /* null */
-	pattail(starter, next);
-	patoptail(starter, next);
-    }
-    if (*patparse == Pound)
-	return 0;
-
-    return starter;
-}
-
-/*
- * Parse lowest level pattern.  If doing ordinary characters, we
- * gobble a complete string as far as we can get.
- * kshcharp returns a character found before an Inpar, for handling
- * as a closure.
- */
-
-/**/
-static long
-patcompatom(int *flagp, int *kshcharp)
-{
-    long starter;
-    int patch, flags, len, ch;
+    int flags, flags2, kshchar, len, ch, patch;
+    union upat up;
     char *nptr, *str0, cbuf[2];
     zrange_t from, to;
 
-    *flagp = 0;
+    flags = 0;
     str0 = patparse;
     for (;;) {
-	/* check kshglob here */
-	*kshcharp = '\0';
+	/*
+	 * Check if we have a string. First, we need to make sure
+	 * the string doesn't introduce a ksh-like parenthesised expression.
+	 */
+	kshchar = '\0';
 	if (isset(KSHGLOB) && *patparse && patparse[1] == Inpar) {
-	    if (strchr("?*+!@", (char)*patparse))
-		*kshcharp = STOUC(*patparse);
+	    if (strchr("?*+!@", *patparse))
+		kshchar = STOUC(*patparse);
 	    else if (*patparse == Star || *patparse == Quest)
-		*kshcharp = STOUC(ztokens[*patparse - Pound]);
+		kshchar = STOUC(ztokens[*patparse - Pound]);
 	}
 
-	if (patparse > str0) {
-	    /*
-	     * This is up here instead of at the end to simplify the
-	     * kshglob bracket testing.  Note patparse doesn't
-	     * get incremented till afterwards.
-	     */
-	    if (ISENDCHAR(patparse) || *kshcharp || *patparse == Quest ||
-		*patparse == Star || *patparse == Inbrack ||
-		(*patparse == Inpar && !isset(SHGLOB)) ||
-		*patparse == Inang ||
-		(isset(EXTENDEDGLOB) && *patparse == Pound))
-		break;
-	    else {
-		METAINC(patparse);
-		continue;
-	    }
-	}
+	/*
+	 * End of string (or no string at all) if ksh-type parentheses,
+	 * or special character, unless that character is a tilde and
+	 * the character following is an end-of-segment character.  Thus
+	 * tildes are not special if there is nothing following to
+	 * be excluded.
+	 */
+	if (kshchar || (memchr(patendstr, *patparse, patendstrlen) &&
+			(*patparse != Tilde ||
+			 !memchr(patendseg, patparse[1], patendseglen))))
+	    break;
+
+	METAINC(patparse);
+    }
 
-	if (*kshcharp)
+    if (patparse > str0) {
+	/* Ordinary string: cancel kshchar lookahead */
+	kshchar = '\0';
+	/*
+	 * Assume it matches a simple string until we find otherwise.
+	 */
+	flags |= P_PURESTR;
+	DPUTS(patparse == str0, "BUG: matched nothing in patcomppiece.");
+	/* more than one character matched? */
+	len = str0 + (*str0 == Meta ? 2 : 1) < patparse;
+	/*
+	 * If we have more than one character, a following hash only
+	 * applies to the last, so decrement.
+	 */
+	if (isset(EXTENDEDGLOB) && *patparse == Pound && len)
+	    patparse -= (patparse > str0 + 1 && patparse[-2] == Meta) ? 2 : 1;
+	/*
+	 * If len is 1, we can't have an active # following, so doesn't
+	 * matter that we don't make X in `XX#' simple.
+	 */
+	if (!len)
+	    flags |= P_SIMPLE;
+	starter = patnode(P_EXACTLY);
+	/* add enough space including null byte */
+	len = patparse - str0;
+	patadd(str0, 0, len + 1, 0);
+	nptr = (char *)P_OPERAND((Upat)patout + starter);
+	nptr[len] = '\0';
+	/*
+	 * It's much simpler to turn off pure string mode for
+	 * any case-insensitive or approximate matching; usually,
+	 * that is correct, or they wouldn't have been turned on.
+	 * However, we need to make sure we match a "." or ".."
+	 * in a file name as a pure string.  There's a minor bug
+	 * that this will also apply to something like
+	 * ..(#a1).. (i.e. the (#a1) has no effect), but if you're
+	 * going to write funny patterns, you get no sympathy from me.
+	 */
+	if (patglobflags &&
+	    (!(patflags & PAT_FILE) || (strcmp(nptr, ".") &&
+					strcmp(nptr, ".."))))
+	    flags &= ~P_PURESTR;
+	for (; *nptr; METAINC(nptr))
+	    if (itok(*nptr))
+		*nptr = ztokens[*nptr - Pound];
+    } else {
+	if (kshchar)
 	    patparse++;
 
 	patch = *patparse;
 	METAINC(patparse);
 	switch(patch) {
 	case Quest:
-	    *flagp |= P_SIMPLE;
-	    return patnode(P_ANY);
+	    flags |= P_SIMPLE;
+	    starter = patnode(P_ANY);
 	    break;
 	case Star:
 	    /* kshchar is used as a sign that we can't have #'s. */
-	    *kshcharp = -1;
-	    return patnode(P_STAR);
+	    kshchar = -1;
+	    starter = patnode(P_STAR);
 	    break;
 	case Inbrack:
-	    *flagp |= P_SIMPLE;
+	    flags |= P_SIMPLE;
 	    if (*patparse == Hat || *patparse == '^' || *patparse == '!') {
 		patparse++;
 		starter = patnode(P_ANYBUT);
@@ -919,15 +901,14 @@ patcompatom(int *flagp, int *kshcharp)
 	    patparse++;
 	    /* terminate null string and fix alignment */
 	    patadd(NULL, 0, 1, 0);
-	    return starter;
 	    break;
 	case Inpar:
 	    /* is this how to treat parentheses in SHGLOB? */
-	    if (isset(SHGLOB) && !*kshcharp)
+	    if (isset(SHGLOB) && !kshchar)
 		return 0;
-	    if (*kshcharp == '!') {
+	    if (kshchar == '!') {
 		/* This is nasty, we should really either handle all
-		 * kshglobbing upstairs or down here.  But most of the
+		 * kshglobbing below or here.  But most of the
 		 * others look like non-ksh patterns, while this one
 		 * doesn't, so we handle it here and leave the rest.
 		 * We treat it like an extendedglob ^, except that
@@ -939,12 +920,11 @@ patcompatom(int *flagp, int *kshcharp)
 		 * the expense of allowing the user to do things
 		 * they shouldn't.
 		 */
-		if (!(starter = patcompnot(1, &flags)))
+		if (!(starter = patcompnot(1, &flags2)))
 		    return 0;
-	    } else if (!(starter = patcompswitch(1, &flags)))
+	    } else if (!(starter = patcompswitch(1, &flags2)))
 		return 0;
-	    *flagp |= flags & P_HSTART;
-	    return starter;
+	    flags |= flags2 & P_HSTART;
 	    break;
 	case Inang:
 	    /* Numeric glob */
@@ -990,68 +970,101 @@ patcompatom(int *flagp, int *kshcharp)
 	     * Mention in manual that matching digits with [...]
 	     * is more efficient.
 	     */
-	    return starter;
 	    break;
 	case Pound:
-	    if (!isset(EXTENDEDGLOB))
-		break;
+	    DPUTS(!isset(EXTENDEDGLOB), "BUG: # not treated as string");
+	    /*
+	     * A hash here is an error; it should follow something
+	     * repeatable.
+	     */
 	    return 0;
 	    break;
 #ifdef DEBUG
-	case Bar:
-	case Outpar:
-	case '\0':
-	    dputs("BUG: wrong character in patcompatom.");
+	default:
+	    dputs("BUG: character not handled in patcomppiece");
 	    return 0;
 	    break;
 #endif
 	}
     }
 
-    /* Simple string: cancel kshchar lookahead */
-    *kshcharp = '\0';
-    /*
-     * Assume it matches a simple string until we find otherwise.
-     */
-    *flagp |= P_PURESTR;
-    DPUTS(patparse == str0, "BUG: matched nothing in patcompatom.");
-    /* more than one character matched? */
-    len = str0 + (*str0 == Meta ? 2 : 1) < patparse;
+    if (!(pound = (*patparse == Pound && isset(EXTENDEDGLOB))) &&
+	(kshchar <= 0 || kshchar == '@' || kshchar == '!')) {
+	*flagp = flags;
+	return starter;
+    }
+
+    /* too much at once doesn't currently work */
+    if (kshchar && pound)
+	return 0;
+
+    if (kshchar == '*') {
+	op = P_ONEHASH;
+	*flagp = P_HSTART;
+    } else if (kshchar == '+') {
+	op = P_TWOHASH;
+	*flagp = P_HSTART;
+    } else if (kshchar == '?') {
+	op = 0;
+	*flagp = 0;
+    } else if (*++patparse == Pound) {
+	op = P_TWOHASH;
+	patparse++;
+	*flagp = P_HSTART;
+    } else {
+	op = P_ONEHASH;
+	*flagp = P_HSTART;
+    }
+
     /*
-     * Ordinary string of characters.
-     * If we have more than one character, a following hash only
-     * applies to the last, so decrement.
-     */
-    if (isset(EXTENDEDGLOB) && *patparse == Pound && len)
-	patparse -= (patparse > str0 + 1 && patparse[-2] == Meta) ? 2 : 1;
-    /* if len is 1, we can't have an active # following, so doesn't
-     * matter that we don't make X in `XX#' simple.
+     * Note optimizations with pointers into P_NOTHING branches:  some
+     * should logically point to next node after current piece.
+     *
+     * Backtracking is also encoded in a slightly obscure way:  the
+     * code emitted ensures we test the non-empty branch of complex
+     * patterns before the empty branch on each repetition.  Hence
+     * each time we fail on a non-empty branch, we try the empty branch,
+     * which is equivalent to backtracking.
      */
-    if (!len)
-	*flagp |= P_SIMPLE;
-    starter = patnode(P_EXACTLY);
-    while (str0 < patparse) {
-	if (*str0 == Meta) {
-	    cbuf[0] = *str0++;
-	    cbuf[1] = *str0++;
-	} else {
-	    cbuf[0] = itok(*str0) ? ztokens[*str0 - Pound] : *str0;
-	    str0++;
-	}
-	ch = UNMETA(cbuf);
-	/*
-	 * HACK: this cause a string consisting of any number of
-	 * dots in files to be matched exactly, even with approximation.
-	 * We just want to limit it to the first two.
+    if ((flags & P_SIMPLE) && op == P_ONEHASH &&
+	P_OP((Upat)patout+starter) == P_ANY) {
+	/* Optimize ?# to *.  Silly thing to do, since who would use
+	 * use ?# ? But it makes the later code shorter.
 	 */
-	if (((patglobflags & C_IGNCASE) && (islower(ch) || isupper(ch))) ||
-	    ((patglobflags & C_LCMATCHUC) && islower(ch)) ||
-	    ((patglobflags & 0xff) && !((patflags & PAT_FILE) && ch == '.')))
-	    *flagp &= ~P_PURESTR;
-	patadd(cbuf, 0, (cbuf[0] == Meta) ? 2 : 1, 1);
+	Upat uptr = (Upat)patout + starter;
+	uptr->l = (uptr->l & ~0xff) | P_STAR;
+    } else if ((flags & P_SIMPLE) && op && !(patglobflags & 0xff)) {
+	/* Don't simplify if we need to look for approximations. */
+	patinsert(op, starter, NULL, 0);
+    } else if (op == P_ONEHASH) {
+	/* Emit x# as (x&|), where & means "self". */
+	up.p = NULL;
+	patinsert(P_WBRANCH, starter, (char *)&up, sizeof(up));
+	                                      /* Either x */
+	patoptail(starter, patnode(P_BACK));  /* and loop */
+	patoptail(starter, starter);	      /* back */
+	pattail(starter, patnode(P_BRANCH));  /* or */
+	pattail(starter, patnode(P_NOTHING)); /* null. */
+    } else if (op == P_TWOHASH) {
+	/* Emit x## as x(&|) where & means "self". */
+	next = patnode(P_WBRANCH);	      /* Either */
+	up.p = NULL;
+	patadd((char *)&up, 0, sizeof(up), 0);
+	pattail(starter, next);
+	pattail(patnode(P_BACK), starter);    /* loop back */
+	pattail(next, patnode(P_BRANCH));     /* or */
+	pattail(starter, patnode(P_NOTHING)); /* null. */
+    } else if (kshchar == '?') {
+	/* Emit ?(x) as (x|) */
+	patinsert(P_BRANCH, starter, NULL, 0); /* Either x */
+	pattail(starter, patnode(P_BRANCH));   /* or */
+	next = patnode(P_NOTHING);	       /* null */
+	pattail(starter, next);
+	patoptail(starter, next);
     }
-    /* null terminate and fix alignment */
-    patadd(NULL, 0, 1, 0);
+    if (*patparse == Pound)
+	return 0;
+
     return starter;
 }
 
@@ -1064,7 +1077,7 @@ patcompatom(int *flagp, int *kshcharp)
 static long
 patcompnot(int paren, int *flagsp)
 {
-    unsigned char *unull = NULL;
+    union upat up;
     long excsync, br, excl, n, starter;
     int dummy;
 
@@ -1076,7 +1089,8 @@ patcompnot(int paren, int *flagsp)
     excsync = patnode(P_EXCSYNC);
     pattail(br, excsync);
     pattail(starter, excl = patnode(P_EXCLUDE));
-    patadd((char *)&unull, 0, sizeof(unull), 0);
+    up.p = NULL;
+    patadd((char *)&up, 0, sizeof(up), 0);
     if (!(br = (paren ? patcompswitch(1, &dummy) : patcompbranch(&dummy))))
 	return 0;
     pattail(br, patnode(P_EXCEND));
@@ -1093,9 +1107,11 @@ patcompnot(int paren, int *flagsp)
 static long
 patnode(long op)
 {
-    long starter = patcode - patout;
+    long starter = (Upat)patcode - (Upat)patout;
+    union upat up;
 
-    patadd((char *)&op, 0, sizeof(long), 0);
+    up.l = op;
+    patadd((char *)&up, 0, sizeof(union upat), 0);
     return starter;
 }
 
@@ -1109,22 +1125,22 @@ static void
 patinsert(long op, int opnd, char *xtra, int sz)
 {
     char *src, *dst, *opdst;
-    long buf, *lptr;
+    union upat buf, *lptr;
 
-    buf = 0;
-    patadd((char *)&buf, 0, sizeof(long), 0);
+    buf.l = 0;
+    patadd((char *)&buf, 0, sizeof(buf), 0);
     if (sz)
 	patadd(xtra, 0, sz, 0);
-    src = patcode - sizeof(long) - sz;
+    src = patcode - sizeof(union upat) - sz;
     dst = patcode;
-    opdst = patout + opnd;
+    opdst = patout + opnd * sizeof(union upat);
     while (src > opdst)
 	*--dst = *--src;
 
     /* A cast can't be an lvalue */
-    lptr = (long *)opdst;
-    *lptr = op;
-    opdst += sizeof(long);
+    lptr = (Upat)opdst;
+    lptr->l = op;
+    opdst += sizeof(union upat);
     while (sz--)
 	*opdst++ = *xtra++;
 }
@@ -1135,10 +1151,10 @@ patinsert(long op, int opnd, char *xtra, int sz)
 static void
 pattail(long p, long val)
 {
-    char *scan, *temp;
-    long offset, *lptr;
+    Upat scan, temp;
+    long offset;
 
-    scan = patout + p;
+    scan = (Upat)patout + p;
     for (;;) {
 	if (!(temp = PATNEXT(scan)))
 	    break;
@@ -1146,10 +1162,9 @@ pattail(long p, long val)
     }
 
     offset = (P_OP(scan) == P_BACK)
-	? (scan - patout) - val : val - (scan - patout);
+	? (scan - (Upat)patout) - val : val - (scan - (Upat)patout);
 
-    lptr = (long *)scan;
-    *lptr |= offset << 8;
+    scan->l |= offset << 8;
 }
 
 /* do pattail, but on operand of first argument; nop if operandless */
@@ -1157,14 +1172,14 @@ pattail(long p, long val)
 /**/
 static void patoptail(long p, long val)
 {
-    char *ptr = patout + p;
+    Upat ptr = (Upat)patout + p;
     int op = P_OP(ptr);
     if (!p || !P_ISBRANCH(ptr))
 	return;
     if (op == P_BRANCH)
 	pattail(P_OPERAND(p), val);
     else
-	pattail(P_OPERAND(p) + sizeof(char *), val);
+	pattail(P_OPERAND(p) + 1, val);
 }
 
 
@@ -1258,7 +1273,7 @@ pattry(Patprog prog, char *string)
 	}
 #endif
 
-	if (patmatch(progstr)) {
+	if (patmatch((Upat)progstr)) {
 #ifdef BACKREFERENCES
 	    if (patflags & PAT_BACKR) {
 		prog->ppStartp[0] = string;
@@ -1314,11 +1329,12 @@ static char *exactpos;
  */
 
 /**/
-int
-patmatch(char *prog)
+static int
+patmatch(Upat prog)
 {
     /* Current and next nodes */
-    char *scan = prog, *next, *opnd, *save, *start;
+    Upat scan = prog, next, opnd;
+    char *start, *save, *chrop;
     int savglobflags, op, no, min, nextch, fail = 0, saverrsfound;
     zrange_t from, to, comp;
 
@@ -1338,36 +1354,38 @@ patmatch(char *prog)
 	    break;
 	case P_EXACTLY:
 	    /*
-	     * acts as nothing if *opnd is null:  this is used by
+	     * acts as nothing if *chrop is null:  this is used by
 	     * approx code.
 	     */
-	    opnd = exactpos ? exactpos : P_OPERAND(scan);
+	    chrop = exactpos ? exactpos : (char *)P_OPERAND(scan);
 	    exactpos = NULL;
-	    while (*opnd && *patinput) {
+	    while (*chrop && *patinput) {
 		int chin = STOUC(UNMETA(patinput));
-		int chpa = STOUC(UNMETA(opnd));
+		int chpa = STOUC(UNMETA(chrop));
 		if (!CHARMATCH(chin, chpa)) {
 		    fail = 1;
 		    break;
 		}
-		METAINC(opnd);
+		METAINC(chrop);
 		METAINC(patinput);
 	    }
-	    if (*opnd) {
-		exactpos = opnd;
+	    if (*chrop) {
+		exactpos = chrop;
 		fail = 1;
 	    }
 	    break;
 	case P_ANYOF:
 	    if (!*patinput ||
-		!patmatchrange(P_OPERAND(scan), STOUC(UNMETA(patinput))))
+		!patmatchrange((char *)P_OPERAND(scan),
+			       STOUC(UNMETA(patinput))))
 		fail = 1;
 	    else
 		METAINC(patinput);
 	    break;
 	case P_ANYBUT:
 	    if (!*patinput ||
-		patmatchrange(P_OPERAND(scan), STOUC(UNMETA(patinput))))
+		patmatchrange((char *)P_OPERAND(scan),
+			      STOUC(UNMETA(patinput))))
 		fail = 1;
 	    else
 		METAINC(patinput);
@@ -1384,12 +1402,12 @@ patmatch(char *prog)
 	     * the first attempt.
 	     */
 	    op = P_OP(scan);
-	    start = P_OPERAND(scan);
+	    start = (char *)P_OPERAND(scan);
 	    from = to = 0;
 	    if (op != P_NUMTO) {
 #ifdef ZSH_64_BIT_TYPE
 		/* We can't rely on pointer alignment being good enough. */
-		memcpy((char *)&from, (char *)start, sizeof(zrange_t));
+		memcpy((char *)&from, start, sizeof(zrange_t));
 #else
 		from = *((zrange_t *) start);
 #endif
@@ -1414,7 +1432,8 @@ patmatch(char *prog)
 		    return 1;
 		}
 		if (!no && P_OP(next) == P_EXACTLY &&
-		    !idigit(STOUC(*P_OPERAND(next))) && !(patglobflags & 0xff))
+		    !idigit(STOUC(*(char *)P_OPERAND(next))) &&
+		    !(patglobflags & 0xff))
 		    return 0;
 		patinput = --save;
 		no++;
@@ -1434,7 +1453,8 @@ patmatch(char *prog)
 		if (patmatch(next))
 		    return 1;
 		if (!no && P_OP(next) == P_EXACTLY &&
-		    !idigit(STOUC(*P_OPERAND(next))) && !(patglobflags & 0xff))
+		    !idigit(STOUC(*(char *)P_OPERAND(next))) &&
+		    !(patglobflags & 0xff))
 		    return 0;
 		patinput = --save;
 		no++;
@@ -1447,7 +1467,7 @@ patmatch(char *prog)
 	case P_BACK:
 	    break;
 	case P_GFLAGS:
-	    patglobflags = *(int *)P_OPERAND(scan);
+	    patglobflags = P_OPERAND(scan)->l;
 	    break;
 #ifdef BACKREFERENCES
 	case P_OPEN:
@@ -1502,16 +1522,16 @@ patmatch(char *prog)
 	    break;
 #endif
 	case P_EXCSYNC:
-	    /* See the P_EXCLUDE code below for where syncstrp comes from */
+	    /* See the P_EXCLUDE code below for where syncptr comes from */
 	    {
-		unsigned char **syncstrp, *syncptr;
-		char *after;
+		unsigned char *syncptr;
+		Upat after;
 		after = P_OPERAND(scan);
 		DPUTS(!P_ISEXCLUDE(after),
 		      "BUG: EXCSYNC not followed by EXCLUDE.");
-		syncstrp = (unsigned char **)P_OPERAND(after);
-		DPUTS(!*syncstrp, "BUG: EXCSYNC not handled by EXCLUDE");
-		syncptr = *syncstrp + (patinput - patinstart);
+		DPUTS(!P_OPERAND(after)->p,
+		      "BUG: EXCSYNC not handled by EXCLUDE");
+		syncptr = P_OPERAND(after)->p + (patinput - patinstart);
 		/*
 		 * If we already matched from here, this time we fail.
 		 * See WBRANCH code for story about error count.
@@ -1570,7 +1590,8 @@ patmatch(char *prog)
 			 * The pointer also tells us where the asserted
 			 * pattern matched for use by the exclusion.
 			 */
-			unsigned char **syncstrp, *oldsyncstr;
+			Upat syncstrp;
+			unsigned char *oldsyncstr;
 			char *matchpt = NULL;
 			int ret, savglobdots, matchederrs = 0;
 #ifdef BACKREFERENCES
@@ -1578,7 +1599,7 @@ patmatch(char *prog)
 #endif
 			DPUTS(P_OP(scan) == P_WBRANCH,
 			      "BUG: excluded WBRANCH");
-			syncstrp = (unsigned char **)P_OPERAND(next);
+			syncstrp = P_OPERAND(next);
 			/*
 			 * Unlike WBRANCH, each test at the same exclude
 			 * sync point (due to an external loop) is separate,
@@ -1586,10 +1607,10 @@ patmatch(char *prog)
 			 * (foo~bar)(foo~bar)... from the exclusion point
 			 * of view, so we use a different sync string.
 			 */
-			oldsyncstr = *syncstrp;
+			oldsyncstr = syncstrp->p;
 			if (!patinlen)
 			    patinlen = strlen(patinstart)+1;
-			*syncstrp = (unsigned char *)zcalloc(patinlen);
+			syncstrp->p = (unsigned char *)zcalloc(patinlen);
 			while ((ret = patmatch(P_OPERAND(scan)))) {
 			    unsigned char *syncpt;
 			    char savchar, *testptr;
@@ -1607,9 +1628,9 @@ patmatch(char *prog)
 			     * possibilities for approximation have been
 			     * checked.)
 			     */
-			    for (syncpt = *syncstrp; !*syncpt; syncpt++)
+			    for (syncpt = syncstrp->p; !*syncpt; syncpt++)
 				;
-			    testptr = patinstart + (syncpt - *syncstrp);
+			    testptr = patinstart + (syncpt - syncstrp->p);
 			    DPUTS(testptr > matchpt, "BUG: EXCSYNC failed");
 			    savchar = *testptr;
 			    *testptr = '\0';
@@ -1625,7 +1646,7 @@ patmatch(char *prog)
 				 */
 				patglobflags &= ~0xff;
 				errsfound = 0;
-				opnd = P_OPERAND(next) + sizeof(char *);
+				opnd = P_OPERAND(next) + 1;
 				if (P_OP(next) == P_EXCLUDP && pathpos) {
 				    /*
 				     * top level exclusion with a file,
@@ -1666,8 +1687,8 @@ patmatch(char *prog)
 			    patglobflags = savglobflags;
 			    errsfound = saverrsfound;
 			}
-			zfree((char *)*syncstrp, patinlen);
-			*syncstrp = oldsyncstr;
+			zfree((char *)syncstrp->p, patinlen);
+			syncstrp->p = oldsyncstr;
 			if (ret) {
 			    patinput = matchpt;
 			    errsfound = matchederrs;
@@ -1678,7 +1699,8 @@ patmatch(char *prog)
 			    ;
 		    } else {
 			int ret = 1, pfree = 0;
-			unsigned char **ptrp = NULL, *ptr;
+			Upat ptrp = NULL;
+			unsigned char *ptr;
 			if (P_OP(scan) == P_WBRANCH) {
 			    /*
 			     * This is where we make sure that we are not
@@ -1696,15 +1718,14 @@ patmatch(char *prog)
 			     * there is already a 1, then the test fails.
 			     */
 			    opnd = P_OPERAND(scan);
-			    ptrp = (unsigned char **)opnd;
-			    opnd += sizeof(unsigned char *);
-			    if (!*ptrp) {
+			    ptrp = opnd++;
+			    if (!ptrp->p) {
 				if (!patinlen)
 				    patinlen = strlen((char *)patinstart)+1;
-				*ptrp = (unsigned char *)zcalloc(patinlen);
+				ptrp->p = (unsigned char *)zcalloc(patinlen);
 				pfree = 1;
 			    }
-			    ptr = *ptrp + (patinput - patinstart);
+			    ptr = ptrp->p + (patinput - patinstart);
 
 			    /*
 			     * Without approximation, this is just a
@@ -1712,7 +1733,7 @@ patmatch(char *prog)
 			     * need to know how many errors there were
 			     * last time we made the test.  If errsfound
 			     * is now smaller than it was, hence we can
-			     * maker more approximations in the remaining
+			     * make more approximations in the remaining
 			     * code, we continue with the test.
 			     * (This is why the max number of errors is
 			     * 254, not 255.)
@@ -1725,8 +1746,8 @@ patmatch(char *prog)
 			if (ret)
 			    ret = patmatch(opnd);
 			if (pfree) {
-			    zfree((char *)*ptrp, patinlen);
-			    *ptrp = NULL;
+			    zfree((char *)ptrp->p, patinlen);
+			    ptrp->p = NULL;
 			}
 			if (ret)
 			    return 1;
@@ -1769,12 +1790,38 @@ patmatch(char *prog)
 		    return 0;
 		no = patrepeat(P_OPERAND(scan));
 	    }
-	    /* Lookahead to avoid useless matches */
-	    if (P_OP(next) == P_EXACTLY && !(patglobflags & 0xff))
-		nextch = STOUC(UNMETA(P_OPERAND(next)));
-	    else
-		nextch = -1;
 	    min = (op == P_TWOHASH) ? 1 : 0;
+	    /*
+	     * Lookahead to avoid useless matches. This is not possible
+	     * with approximation.
+	     */
+	    if (P_OP(next) == P_EXACTLY && !(patglobflags & 0xff)) {
+		char *nextop = (char *)P_OPERAND(next);
+		/*
+		 * If that P_EXACTLY is last (common in simple patterns,
+		 * such as *.c), then it can be only be matched at one
+		 * point in the test string, so record that.
+		 */
+		if (P_OP(PATNEXT(next)) == P_END &&
+		    !(patflags & PAT_NOANCH)) {
+		    int ptlen = strlen(patinput);
+		    int oplen = strlen(nextop);
+		    /* Are we in the right range? */
+		    if (oplen > strlen(min ? METANEXT(start) : start) ||
+			oplen < ptlen)
+			return 0;
+		    /* Yes, just position appropriately and test. */
+		    patinput += ptlen - oplen;
+		    if (patinput > start && patinput[-1] == Meta) {
+			/* doesn't align properly, no go */
+			return 0;
+		    }
+		    /* Continue loop with P_EXACTLY test. */
+		    break;
+		}
+		nextch = STOUC(UNMETA(nextop));
+	    } else
+		nextch = -1;
 	    save = patinput;
 	    savglobflags = patglobflags;
 	    saverrsfound = errsfound;
@@ -2010,13 +2057,13 @@ patmatchrange(char *range, int ch)
 /* repeatedly match something simple and say how many times */
 
 /**/
-static int patrepeat(char *p)
+static int patrepeat(Upat p)
 {
     int count = 0, tch, inch;
     char *scan, *opnd;
 
     scan = patinput;
-    opnd = P_OPERAND(p);
+    opnd = (char *)P_OPERAND(p);
 
     switch(P_OP(p)) {
 #ifdef DEBUG
@@ -2067,7 +2114,8 @@ static int patrepeat(char *p)
 static void
 patdump(Patprog r)
 {
-    char *s, *next, *codestart, *base, op = P_EXACTLY;
+    char *s, *base, op = P_EXACTLY;
+    Upat up, codestart, next;
 
     base = (char *)r;
     s = base + r->startoff;
@@ -2075,13 +2123,14 @@ patdump(Patprog r)
     if (r->flags & PAT_PURES) {
 	printf("STRING:%s\n", (char *)s);
     } else {
-	codestart = s;
+	codestart = (Upat)s;
 	while (op != P_END) {
-	    op = P_OP(s);
-	    printf("%2d%s", s-codestart, patprop(s));
-	    next = PATNEXT(s);
-	    printf("(%d)", next ? (s-codestart)+(next-s) : 0);
-	    s += sizeof(zalign_t);
+	    up = (Upat)s;
+	    op = P_OP(up);
+	    printf("%2d%s", up-codestart, patprop(up));
+	    next = PATNEXT(up);
+	    printf("(%d)", next ? next-codestart : 0);
+	    s += sizeof(union upat);
 	    if (op == P_ANYOF || op == P_ANYBUT || op == P_EXACTLY) {
 		while (*s != '\0') {
 		    if (itok(*s)) {
@@ -2107,16 +2156,15 @@ patdump(Patprog r)
 		    s += sizeof(zrange_t);
 		}
 	    } else if (op == P_GFLAGS) {
-		long *lptr = (long *)s;
-		printf("%ld, %ld", *lptr & ~0xff, *lptr & 0xff);
-		s += sizeof(long);
+		printf("%ld, %ld", (++up)->l & ~0xff, (++up)->l & 0xff);
+		s += sizeof(union upat);
 	    } else if (op == P_WBRANCH || op == P_EXCLUDE ||
 		       op == P_EXCLUDP) {
-		s += sizeof(char *);
+		s += sizeof(union upat);
 	    }
 	    putchar('\n');
-	    s = base + (((s - base) + sizeof(zalign_t) - 1) &
-			~(sizeof(zalign_t) - 1));
+	    s = base + (((s - base) + sizeof(union upat) - 1) &
+			~(sizeof(union upat) - 1));
 	}
     }
 
@@ -2146,7 +2194,7 @@ patdump(Patprog r)
 
 /**/
 static char *
-patprop(char *op)
+patprop(Upat op)
 {
     char *p = NULL;
     static char buf[50];