about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog6
-rw-r--r--Src/Zle/complist.c4
-rw-r--r--Src/glob.c49
-rw-r--r--Src/pattern.c27
4 files changed, 53 insertions, 33 deletions
diff --git a/ChangeLog b/ChangeLog
index 8654c70dc..80eb9ae71 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+2005-04-24  Peter Stephenson  <pws@pwstephenson.fsnet.co.uk>
+
+	* 21170: Src/glob.c, Src/pattern.c: optimise length calculations
+	which were causing inefficieny in ${...//.../...} on large
+	string.
+
 2005-04-22  Geoff Wing  <gcw@zsh.org>
 
 	* 20162: Src/Zle/compresult.c: in printlist() don't output new line
diff --git a/Src/Zle/complist.c b/Src/Zle/complist.c
index 51b160919..3a3d77971 100644
--- a/Src/Zle/complist.c
+++ b/Src/Zle/complist.c
@@ -601,7 +601,7 @@ putmatchcol(Listcols c, char *group, char *n)
 
     for (pc = c->pats; pc; pc = pc->next)
 	if ((!pc->prog || !group || pattry(pc->prog, group)) &&
-	    pattryrefs(pc->pat, n, -1, 0, &nrefs, begpos, endpos)) {
+	    pattryrefs(pc->pat, n, -1, -1, 0, &nrefs, begpos, endpos)) {
 	    if (pc->cols[1]) {
 		patcols = pc->cols;
 
@@ -639,7 +639,7 @@ putfilecol(Listcols c, char *group, char *n, mode_t m)
 
     for (pc = c->pats; pc; pc = pc->next)
 	if ((!pc->prog || !group || pattry(pc->prog, group)) &&
-	    pattryrefs(pc->pat, n, -1, 0, &nrefs, begpos, endpos)) {
+	    pattryrefs(pc->pat, n, -1, -1, 0, &nrefs, begpos, endpos)) {
 	    if (pc->cols[1]) {
 		patcols = pc->cols;
 
diff --git a/Src/glob.c b/Src/glob.c
index 5334f70fa..06f956ed0 100644
--- a/Src/glob.c
+++ b/Src/glob.c
@@ -2223,11 +2223,12 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr)
 {
     char *s = *sp, *t;
     /*
-     * Note that ioff and ml count characters in the character
+     * Note that ioff and uml count characters in the character
      * set (Meta's are not included), while l counts characters in the
-     * string.
+     * metafied string.  umlen is a counter for (unmetafied) character
+     * lengths.
      */
-    int ioff, l = strlen(*sp), ml = ztrlen(*sp), matched = 1;
+    int ioff, l = strlen(*sp), uml = ztrlen(*sp), matched = 1, umlen;
 
     repllist = NULL;
 
@@ -2273,9 +2274,9 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr)
 		     * ... now we know whether it's worth looking for the
 		     * shortest, which we do by brute force.
 		     */
-		    for (t = s; t < s + mlen; METAINC(t)) {
+		    for (t = s, umlen = 0; t < s + mlen; METAINC(t), umlen++) {
 			set_pat_end(p, *t);
-			if (pattrylen(p, s, t - s, 0)) {
+			if (pattrylen(p, s, t - s, umlen, 0)) {
 			    mlen = patmatchlen();
 			    break;
 			}
@@ -2290,9 +2291,10 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr)
 	    /* Smallest possible match at tail of string:  *
 	     * move back down string until we get a match. *
 	     * There's no optimization here.               */
-	    for (ioff = ml, t = s + l; t >= s; t--, ioff--) {
+	    for (ioff = uml, t = s + l, umlen = 0; t >= s;
+		 t--, ioff--, umlen++) {
 		set_pat_start(p, t-s);
-		if (pattrylen(p, t, -1, ioff)) {
+		if (pattrylen(p, t, s + l - t, umlen, ioff)) {
 		    *sp = get_match_ret(*sp, t - s, l, fl, replstr);
 		    return 1;
 		}
@@ -2305,9 +2307,10 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr)
 	    /* Largest possible match at tail of string:       *
 	     * move forward along string until we get a match. *
 	     * Again there's no optimisation.                  */
-	    for (ioff = 0, t = s; t < s + l; ioff++, t++) {
+	    for (ioff = 0, t = s, umlen = uml; t < s + l;
+		 ioff++, t++, umlen--) {
 		set_pat_start(p, t-s);
-		if (pattrylen(p, t, -1, ioff)) {
+		if (pattrylen(p, t, s + l - t, umlen, ioff)) {
 		    *sp = get_match_ret(*sp, t-s, l, fl, replstr);
 		    return 1;
 		}
@@ -2329,19 +2332,22 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr)
 	    if (fl & SUB_GLOBAL)
 		repllist = newlinklist();
 	    ioff = 0;		/* offset into string */
+	    umlen = uml;
 	    do {
 		/* loop over all matches for global substitution */
 		matched = 0;
-		for (; t < s + l; t++, ioff++) {
+		for (; t < s + l; t++, ioff++, umlen--) {
 		    /* Find the longest match from this position. */
 		    set_pat_start(p, t-s);
-		    if (pattrylen(p, t, -1, ioff)) {
+		    if (pattrylen(p, t, s + l - t, umlen, ioff)) {
 			char *mpos = t + patmatchlen();
 			if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) {
 			    char *ptr;
-			    for (ptr = t; ptr < mpos; METAINC(ptr)) {
+			    int umlen2;
+			    for (ptr = t, umlen2 = 0; ptr < mpos;
+				 METAINC(ptr), umlen2++) {
 				set_pat_end(p, *ptr);
-				if (pattrylen(p, t, ptr - t, ioff)) {
+				if (pattrylen(p, t, ptr - t, umlen2, ioff)) {
 				    mpos = t + patmatchlen();
 				    break;
 				}
@@ -2370,7 +2376,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr)
 			 * which is already marked for replacement.
 			 */
 			matched = 1;
-			for ( ; t < mpos; t++, ioff++)
+			for ( ; t < mpos; t++, ioff++, umlen--)
 			    if (*t == Meta)
 				t++;
 			break;
@@ -2397,23 +2403,26 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr)
 	    /* Longest/shortest at end, matching substrings.       */
 	    if (!(fl & SUB_LONG)) {
 		set_pat_start(p, l);
-		if (pattrylen(p, s + l, -1, ml) && !--n) {
+		if (pattrylen(p, s + l, 0, 0, uml) && !--n) {
 		    *sp = get_match_ret(*sp, l, l, fl, replstr);
 		    return 1;
 		}
 	    }
-	    for (ioff = ml - 1, t = s + l - 1; t >= s; t--, ioff--) {
+	    for (ioff = uml - 1, t = s + l - 1, umlen = 1; t >= s;
+		 t--, ioff--, umlen++) {
 		if (t > s && t[-1] == Meta)
 		    t--;
 		set_pat_start(p, t-s);
-		if (pattrylen(p, t, -1, ioff) && !--n) {
+		if (pattrylen(p, t, s + l - t, umlen, ioff) && !--n) {
 		    /* Found the longest match */
 		    char *mpos = t + patmatchlen();
 		    if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) {
 			char *ptr;
-			for (ptr = t; ptr < mpos; METAINC(ptr)) {
+			int umlen2;
+			for (ptr = t, umlen2 = 0; ptr < mpos;
+			     METAINC(ptr), umlen2++) {
 			    set_pat_end(p, *ptr);
-			    if (pattrylen(p, t, ptr - t, ioff)) {
+			    if (pattrylen(p, t, ptr - t, umlen2, ioff)) {
 				mpos = t + patmatchlen();
 				break;
 			    }
@@ -2424,7 +2433,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr)
 		}
 	    }
 	    set_pat_start(p, l);
-	    if ((fl & SUB_LONG) && pattrylen(p, s + l, -1, ml) && !--n) {
+	    if ((fl & SUB_LONG) && pattrylen(p, s + l, 0, 0, uml) && !--n) {
 		*sp = get_match_ret(*sp, l, l, fl, replstr);
 		return 1;
 	    }
diff --git a/Src/pattern.c b/Src/pattern.c
index 679a8399e..1033c776f 100644
--- a/Src/pattern.c
+++ b/Src/pattern.c
@@ -1496,7 +1496,7 @@ pattrystart(void)
 mod_export int
 pattry(Patprog prog, char *string)
 {
-    return pattryrefs(prog, string, -1, 0, NULL, NULL, NULL);
+    return pattryrefs(prog, string, -1, -1, 0, NULL, NULL, NULL);
 }
 
 /*
@@ -1507,19 +1507,22 @@ pattry(Patprog prog, char *string)
 
 /**/
 mod_export int
-pattrylen(Patprog prog, char *string, int len, int offset)
+pattrylen(Patprog prog, char *string, int len, int unmetalen, int offset)
 {
-    return pattryrefs(prog, string, len, offset, NULL, NULL, NULL);
+    return pattryrefs(prog, string, len, unmetalen, offset, NULL, NULL, NULL);
 }
 
 /*
- * Test prog against string with given length stringlen, which
- * may be -1 to indicate a null-terminated string.  The input
- * string is metafied; the length is the raw string length, not the
- * number of possibly metafied characters.
+ * Test prog against string with given lengths.  The input
+ * string is metafied; stringlen is the raw string length, and
+ * unmetalen the number of characters in the original string (some
+ * of which may now be metafied).  Either value may be -1
+ * to indicate a null-terminated string which will be counted.  Note
+ * there may be a severe penalty for this if a lot of matching is done
+ * on one string.
  *
  * offset is the position in the original string (not seen by
- * the patter module) at which we are trying to match.
+ * the pattern module) at which we are trying to match.
  * This is added in to the positions recorded in patbeginp and patendp
  * when we are looking for substrings.  Currently this only happens
  * in the parameter substitution code.
@@ -1535,10 +1538,11 @@ pattrylen(Patprog prog, char *string, int len, int offset)
 
 /**/
 mod_export int
-pattryrefs(Patprog prog, char *string, int stringlen, int patoffset,
+pattryrefs(Patprog prog, char *string, int stringlen, int unmetalen,
+	   int patoffset,
 	   int *nump, int *begp, int *endp)
 {
-    int i, maxnpos = 0, ret, needfullpath, unmetalen, unmetalenp;
+    int i, maxnpos = 0, ret, needfullpath, unmetalenp;
     int origlen;
     char **sp, **ep, *tryalloced, *ptr;
     char *progstr = (char *)prog + prog->startoff;
@@ -1564,7 +1568,8 @@ pattryrefs(Patprog prog, char *string, int stringlen, int patoffset,
     needfullpath = (patflags & PAT_HAS_EXCLUDP) && pathpos;
 
     /* Get the length of the full string when unmetafied. */
-    unmetalen = ztrsub(string + stringlen, string);
+    if (unmetalen < 0)
+	unmetalen = ztrsub(string + stringlen, string);
     if (needfullpath)
 	unmetalenp = ztrsub(pathbuf + pathpos, pathbuf);
     else