From 5b947bf671944cea660dc87f6b3cbea741b68b87 Mon Sep 17 00:00:00 2001 From: Bart Schaefer Date: Sun, 24 Apr 2005 17:13:18 +0000 Subject: 21170: optimize length calculations in pattern matching --- Src/Zle/complist.c | 1994 ++++++++++++++++++++++++++++++++++++++++++++++++---- Src/glob.c | 51 +- Src/pattern.c | 1196 ++++++++++++++++++++++--------- 3 files changed, 2768 insertions(+), 473 deletions(-) (limited to 'Src') diff --git a/Src/Zle/complist.c b/Src/Zle/complist.c index a7cab9807..311a5679f 100644 --- a/Src/Zle/complist.c +++ b/Src/Zle/complist.c @@ -37,7 +37,7 @@ static Widget w_menuselect; -static Keymap mskeymap; +static Keymap mskeymap, lskeymap; /* Indixes into the terminal string arrays. */ @@ -76,8 +76,8 @@ static char *colnames[] = { /* Default values. */ static char *defcols[] = { - "0", "0", "1;34", "1;36", "33", "1;35", "1;33", "1;33", "1;32", NULL, - "\033[", "m", NULL, "0", "0", "7", "0", "0" + "0", "0", "1;31", "1;36", "33", "1;35", "1;33", "1;33", "1;32", NULL, + "\033[", "m", NULL, "0", "0", "7", NULL, NULL }; /* This describes a terminal string for a file type. */ @@ -122,13 +122,17 @@ struct listcols { Extcol exts; /* strings for extensions */ }; +/* Combined length of LC and RC, maximum length of capability strings. */ + +static int lr_caplen, max_caplen; + /* This parses the value of a definition (the part after the `='). * The return value is a pointer to the character after it. */ static char * getcolval(char *s, int multi) { - char *p; + char *p, *o = s; for (p = s; *s && *s != ':' && (!multi || *s != '='); p++, s++) { if (*s == '\\' && s[1]) { @@ -172,6 +176,8 @@ getcolval(char *s, int multi) } if (p != s) *p = '\0'; + if ((s - o) > max_caplen) + max_caplen = s - o; return s; } @@ -325,10 +331,6 @@ filecol(char *col) return fc; } -/* Combined length of LC and RC, maximum length of capability strings. */ - -static int lr_caplen, max_caplen; - /* This initializes the given terminal color structure. */ static void @@ -337,6 +339,8 @@ getcols(Listcols c) char *s; int i, l; + max_caplen = lr_caplen = 0; + queue_signals(); if (!(s = getsparam("ZLS_COLORS")) && !(s = getsparam("ZLS_COLOURS"))) { for (i = 0; i < NUM_COLS; i++) @@ -348,21 +352,25 @@ getcols(Listcols c) c->files[COL_MA] = filecol(s); c->files[COL_EC] = filecol(tcstr[TCSTANDOUTEND]); } else - c->files[COL_MA] = filecol(""); + c->files[COL_MA] = filecol(defcols[COL_MA]); lr_caplen = 0; if ((max_caplen = strlen(c->files[COL_MA]->col)) < (l = strlen(c->files[COL_EC]->col))) max_caplen = l; + unqueue_signals(); return; } /* We have one of the parameters, use it. */ memset(c, 0, sizeof(*c)); s = dupstring(s); while (*s) - s = getcoldef(c, s); + if (*s == ':') + s++; + else + s = getcoldef(c, s); + unqueue_signals(); /* Use default values for those that aren't set explicitly. */ - max_caplen = lr_caplen = 0; for (i = 0; i < NUM_COLS; i++) { if (!c->files[i] || !c->files[i]->col) c->files[i] = filecol(defcols[i]); @@ -382,11 +390,24 @@ getcols(Listcols c) /* Information about the list shown. */ static int noselect, mselect, inselect, mcol, mline, mcols, mlines, mmlen; -static int selected; +static int selected, mlbeg = -1, mlend = 9999999, mscroll, mrestlines; +static int mnew, mlastcols, mlastlines, mhasstat, mfirstl, mlastm; +static int mlprinted, molbeg = -2, mocol = 0, moline = 0, mstatprinted; +static char *mstatus, *mlistp; static Cmatch **mtab, **mmtabp; +static int mtab_been_reallocated; static Cmgroup *mgtab, *mgtabp; static struct listcols mcolors; +/* Used in mtab/mgtab, for explanations. */ + +#define MMARK ((unsigned long) 1) +#define mmarked(v) (((unsigned long) (v)) & MMARK) +#define mtmark(v) ((Cmatch *) (((unsigned long) (v)) | MMARK)) +#define mtunmark(v) ((Cmatch *) (((unsigned long) (v)) & ~MMARK)) +#define mgmark(v) ((Cmgroup) (((unsigned long) (v)) | MMARK)) +#define mgunmark(v) ((Cmgroup) (((unsigned long) (v)) & ~MMARK)) + /* Information for in-string colours. */ static int nrefs; @@ -428,6 +449,7 @@ zcputs(Listcols c, char *group, int colour) return; } + zlrputs(c, "0"); } /* Turn off colouring. */ @@ -481,7 +503,7 @@ doiscol(Listcols c, int pos) /* insert e in sendpos */ for (i = curissend; sendpos[i] <= e; ++i) ; - for (j = i + 1; j < MAX_POS; ++j) + for (j = MAX_POS - 1; j > i; --j) sendpos[j] = sendpos[j-1]; sendpos[i] = e; @@ -496,10 +518,10 @@ doiscol(Listcols c, int pos) /* Stripped-down version of printfmt(). But can do in-string colouring. */ -static void -clprintfmt(Listcols c, char *p) +static int +clprintfmt(Listcols c, char *p, int ml) { - int cc = 0, i = 0; + int cc = 0, i = 0, ask, beg; initiscol(c); @@ -507,34 +529,32 @@ clprintfmt(Listcols c, char *p) doiscol(c, i++); cc++; if (*p == '\n') { - if (tccan(TCCLEAREOL)) + if (mlbeg >= 0 && tccan(TCCLEAREOL)) tcout(TCCLEAREOL); - else { - int s = columns - 1 - (cc % columns); - - while (s-- > 0) - putc(' ', shout); - } cc = 0; } + if (ml == mlend - 1 && (cc % columns) == columns - 1) + return 0; + putc(*p, shout); + if ((beg = !(cc % columns))) + ml++; + if (mscroll && !(cc % columns) && + !--mrestlines && (ask = asklistscroll(ml))) + return ask; } - if (tccan(TCCLEAREOL)) + if (mlbeg >= 0 && tccan(TCCLEAREOL)) tcout(TCCLEAREOL); - else { - int s = columns - 1 - (cc % columns); - - while (s-- > 0) - putc(' ', shout); - } + return 0; } /* Local version of nicezputs() with in-string colouring. */ -static void -clnicezputs(Listcols c, char *s) +static int +clnicezputs(Listcols c, char *s, int ml) { - int cc, i = 0; + int cc, i = 0, col = 0, ask, oml = ml; + char *t; initiscol(c); @@ -548,8 +568,26 @@ clnicezputs(Listcols c, char *s) } if (cc == Meta) cc = *s++ ^ 32; - fputs(nicechar(cc), shout); + + for (t = nicechar(cc); *t; t++) { + if (ml == mlend - 1 && col == columns - 1) { + mlprinted = ml - oml; + return 0; + } + putc(*t, shout); + if (++col == columns) { + ml++; + if (mscroll && !--mrestlines && (ask = asklistscroll(ml))) { + mlprinted = ml - oml; + return ask; + } + col = 0; + fputs(" \010", shout); + } + } } + mlprinted = ml - oml; + return 0; } /* Get the terminal color string for the given match. */ @@ -563,7 +601,7 @@ putmatchcol(Listcols c, char *group, char *n) for (pc = c->pats; pc; pc = pc->next) if ((!pc->prog || !group || pattry(pc->prog, group)) && - pattryrefs(pc->pat, n, &nrefs, begpos, endpos)) { + pattryrefs(pc->pat, n, -1, -1, 0, &nrefs, begpos, endpos)) { if (pc->cols[1]) { patcols = pc->cols; @@ -601,7 +639,7 @@ putfilecol(Listcols c, char *group, char *n, mode_t m) for (pc = c->pats; pc; pc = pc->next) if ((!pc->prog || !group || pattry(pc->prog, group)) && - pattryrefs(pc->pat, n, &nrefs, begpos, endpos)) { + pattryrefs(pc->pat, n, -1, -1, 0, &nrefs, begpos, endpos)) { if (pc->cols[1]) { patcols = pc->cols; @@ -636,12 +674,694 @@ putfilecol(Listcols c, char *group, char *n, mode_t m) static Cmgroup last_group; -static void -clprintm(Cmgroup g, Cmatch *mp, int mc, int ml, int lastc, int width, - char *path, struct stat *buf) +/**/ +static int +asklistscroll(int ml) +{ + Thingy cmd; + int i, ret = 0; + + compprintfmt(NULL, 1, 1, 1, ml, NULL); + + fflush(shout); + zsetterm(); + selectlocalmap(lskeymap); + if (!(cmd = getkeycmd()) || cmd == Th(z_sendbreak)) + ret = 1; + else if (cmd == Th(z_acceptline) || + cmd == Th(z_downhistory) || + cmd == Th(z_downlineorhistory) || + cmd == Th(z_downlineorsearch) || + cmd == Th(z_vidownlineorhistory)) + mrestlines = 1; + else if (cmd == Th(z_completeword) || + cmd == Th(z_expandorcomplete) || + cmd == Th(z_expandorcompleteprefix) || + cmd == Th(z_menucomplete) || + cmd == Th(z_menuexpandorcomplete) || + !strcmp(cmd->nam, "menu-select") || + !strcmp(cmd->nam, "complete-word") || + !strcmp(cmd->nam, "expand-or-complete") || + !strcmp(cmd->nam, "expand-or-complete-prefix") || + !strcmp(cmd->nam, "menu-complete") || + !strcmp(cmd->nam, "menu-expand-or-complete")) + mrestlines = lines - 1; + else { + ungetkeycmd(); + ret = 1; + } + selectlocalmap(NULL); + settyinfo(&shttyinfo); + putc('\r', shout); + for (i = columns - 1; i--; ) + putc(' ', shout); + putc('\r', shout); + + return ret; +} + +#define dolist(X) ((X) >= mlbeg && (X) < mlend) +#define dolistcl(X) ((X) >= mlbeg && (X) < mlend + 1) +#define dolistnl(X) ((X) >= mlbeg && (X) < mlend - 1) + +/**/ +static int +compprintnl(int ml) +{ + int ask; + + if (mlbeg >= 0 && tccan(TCCLEAREOL)) + tcout(TCCLEAREOL); + putc('\n', shout); + + if (mscroll && !--mrestlines && (ask = asklistscroll(ml))) + return ask; + + return 0; +} + +/* This is used to print the strings (e.g. explanations). * + * It returns the number of lines printed. */ + +/**/ +static int +compprintfmt(char *fmt, int n, int dopr, int doesc, int ml, int *stop) +{ + char *p, nc[2*DIGBUFSIZE + 12], nbuf[2*DIGBUFSIZE + 12]; + int l = 0, cc = 0, b = 0, s = 0, u = 0, m, ask, beg, stat; + + if ((stat = !fmt)) { + if (mlbeg >= 0) { + if (!(fmt = mstatus)) { + mlprinted = 0; + return 0; + } + cc = -1; + } else + fmt = mlistp; + } + for (p = fmt; *p; p++) { + if (doesc && *p == '%') { + if (*++p) { + m = 0; + switch (*p) { + case '%': + if (dopr == 1) + putc('%', shout); + cc++; + break; + case 'n': + if (!stat) { + sprintf(nc, "%d", n); + if (dopr == 1) + fputs(nc, shout); + cc += strlen(nc); + } + break; + case 'B': + b = 1; + if (dopr) + tcout(TCBOLDFACEBEG); + break; + case 'b': + b = 0; m = 1; + if (dopr) + tcout(TCALLATTRSOFF); + break; + case 'S': + s = 1; + if (dopr) + tcout(TCSTANDOUTBEG); + break; + case 's': + s = 0; m = 1; + if (dopr) + tcout(TCSTANDOUTEND); + break; + case 'U': + u = 1; + if (dopr) + tcout(TCUNDERLINEBEG); + break; + case 'u': + u = 0; m = 1; + if (dopr) + tcout(TCUNDERLINEEND); + break; + case '{': + for (p++; *p && (*p != '%' || p[1] != '}'); p++) + if (dopr) + putc(*p, shout); + if (*p) + p++; + else + p--; + break; + case 'm': + if (stat) { + sprintf(nc, "%d/%d", (n ? mlastm : mselect), + listdat.nlist); + m = 2; + } + break; + case 'M': + if (stat) { + sprintf(nbuf, "%d/%d", (n ? mlastm : mselect), + listdat.nlist); + sprintf(nc, "%-9s", nbuf); + m = 2; + } + break; + case 'l': + if (stat) { + sprintf(nc, "%d/%d", ml + 1, listdat.nlines); + m = 2; + } + break; + case 'L': + if (stat) { + sprintf(nbuf, "%d/%d", ml + 1, listdat.nlines); + sprintf(nc, "%-9s", nbuf); + m = 2; + } + break; + case 'p': + if (stat) { + if (ml == listdat.nlines - 1) + strcpy(nc, "Bottom"); + else if (n ? mfirstl : (mlbeg > 0 || ml != mfirstl)) + sprintf(nc, "%d%%", + ((ml + 1) * 100) / listdat.nlines); + else + strcpy(nc, "Top"); + m = 2; + } + break; + case 'P': + if (stat) { + if (ml == listdat.nlines - 1) + strcpy(nc, "Bottom"); + else if (n ? mfirstl : (mlbeg > 0 || ml != mfirstl)) + sprintf(nc, "%2d%% ", + ((ml + 1) * 100) / listdat.nlines); + else + strcpy(nc, "Top "); + m = 2; + } + break; + } + if (m == 2 && dopr == 1) { + int l = strlen(nc); + + if (l + cc > columns - 2) + nc[l -= l + cc - (columns - 2)] = '\0'; + fputs(nc, shout); + cc += l; + } else if (dopr && m == 1) { + if (b) + tcout(TCBOLDFACEBEG); + if (s) + tcout(TCSTANDOUTBEG); + if (u) + tcout(TCUNDERLINEBEG); + } + } else + break; + } else { + if ((++cc == columns - 2 || *p == '\n') && stat) + dopr = 2; + if (*p == '\n') { + if (dopr == 1 && mlbeg >= 0 && tccan(TCCLEAREOL)) + tcout(TCCLEAREOL); + l += 1 + ((cc - 1) / columns); + cc = 0; + } + if (dopr == 1) { + if (ml == mlend - 1 && (cc % columns) == columns - 1) { + dopr = 0; + continue; + } + putc(*p, shout); + if ((beg = !(cc % columns)) && !stat) { + ml++; + fputs(" \010", shout); + } + if (mscroll && beg && !--mrestlines && (ask = asklistscroll(ml))) { + *stop = 1; + if (stat && n) + mfirstl = -1; + return (mlprinted = l + (cc / columns)); + } + } + } + } + if (dopr) { + if (!(cc % columns)) + fputs(" \010", shout); + if (mlbeg >= 0 && tccan(TCCLEAREOL)) + tcout(TCCLEAREOL); + } + if (stat && n) + mfirstl = -1; + + return (mlprinted = l + (cc / columns)); +} + +/* This is like zputs(), but allows scrolling. */ + +/**/ +static int +compzputs(char const *s, int ml) +{ + int c, col = 0, ask; + + while (*s) { + if (*s == Meta) + c = *++s ^ 32; + else if(itok(*s)) { + s++; + continue; + } else + c = *s; + s++; + putc(c, shout); + if (c == '\n' && mlbeg >= 0 && tccan(TCCLEAREOL)) + tcout(TCCLEAREOL); + if (mscroll && (++col == columns || c == '\n')) { + ml++; + if (!--mrestlines && (ask = asklistscroll(ml))) + return ask; + + col = 0; + } + } + return 0; +} + +/* This is like nicezputs(), but allows scrolling. */ + +/**/ +static int +compnicezputs(char *s, int ml) +{ + int c, col = 0, ask, oml = ml; + char *t; + + while ((c = *s++)) { + if (itok(c)) { + if (c <= Comma) + c = ztokens[c - Pound]; + else + continue; + } + if (c == Meta) + c = *s++ ^ 32; + + for (t = nicechar(c); *t; t++) { + if (ml == mlend - 1 && col == columns - 1) { + mlprinted = ml - oml; + return 0; + } + putc(*t, shout); + if (++col == columns) { + ml++; + if (mscroll && !--mrestlines && (ask = asklistscroll(ml))) { + mlprinted = ml - oml; + return ask; + } + col = 0; + } + } + } + mlprinted = ml - oml; + return 0; +} + +/**/ +static int +compprintlist(int showall) +{ + static int lasttype = 0, lastbeg = 0, lastml = 0, lastinvcount = -1; + static int lastn = 0, lastnl = 0, lastnlnct = -1; + static Cmgroup lastg = NULL; + static Cmatch *lastp = NULL; + static Cexpl *lastexpl = NULL; + + Cmgroup g; + Cmatch *p, m; + Cexpl *e; + int pnl = 0, cl, mc = 0, ml = 0, printed = 0, stop = 0, asked = 1; + int lastused = 0; + + mfirstl = -1; + if (mnew || lastinvcount != invcount || lastbeg != mlbeg || mlbeg < 0) { + lasttype = 0; + lastg = NULL; + lastexpl = NULL; + lastml = 0; + lastnlnct = -1; + } + cl = (listdat.nlines > lines - nlnct - mhasstat ? + lines - nlnct - mhasstat : listdat.nlines) - (lastnlnct > nlnct); + lastnlnct = nlnct; + mrestlines = lines - 1; + lastinvcount = invcount; + + if (cl < 2) { + cl = -1; + if (tccan(TCCLEAREOD)) + tcout(TCCLEAREOD); + } else if (mlbeg >= 0 && !tccan(TCCLEAREOL) && tccan(TCCLEAREOD)) + tcout(TCCLEAREOD); + + g = ((lasttype && lastg) ? lastg : amatches); + while (g) { + char **pp = g->ylist; + + if ((e = g->expls)) { + int l; + + if (!lastused && lasttype == 1) { + e = lastexpl; + ml = lastml; + lastused = 1; + } + while (*e) { + if (((*e)->count || (*e)->always) && + (!listdat.onlyexpl || + (listdat.onlyexpl & ((*e)->always > 0 ? 2 : 1)))) { + if (pnl) { + if (dolistnl(ml) && compprintnl(ml)) + goto end; + pnl = 0; + ml++; + if (dolistcl(ml) && cl >= 0 && --cl <= 1) { + cl = -1; + if (tccan(TCCLEAREOD)) + tcout(TCCLEAREOD); + } + } + if (mlbeg < 0 && mfirstl < 0) + mfirstl = ml; + l = compprintfmt((*e)->str, + ((*e)->always ? -1 : (*e)->count), + dolist(ml), 1, ml, &stop); + if (mselect >= 0) { + int mm = (mcols * ml), i; + + for (i = mcols; i--; ) { + mtab[mm + i] = mtmark(NULL); + mgtab[mm + i] = mgmark(NULL); + } + } + if (stop) + goto end; + if (!lasttype && ml >= mlbeg) { + lasttype = 1; + lastg = g; + lastbeg = mlbeg; + lastml = ml; + lastexpl = e; + lastp = NULL; + lastused = 1; + } + ml += mlprinted; + if (dolistcl(ml) && cl >= 0 && (cl -= mlprinted) <= 1) { + cl = -1; + if (tccan(TCCLEAREOD)) + tcout(TCCLEAREOD); + } + pnl = 1; + } + e++; + if (!mnew && ml > mlend) + goto end; + } + } + if (!listdat.onlyexpl && mlbeg < 0 && pp && *pp) { + if (pnl) { + if (dolistnl(ml) && compprintnl(ml)) + goto end; + pnl = 0; + ml++; + if (cl >= 0 && --cl <= 1) { + cl = -1; + if (tccan(TCCLEAREOD)) + tcout(TCCLEAREOD); + } + } + if (mlbeg < 0 && mfirstl < 0) + mfirstl = ml; + if (g->flags & CGF_LINES) { + while (*pp) { + if (compzputs(*pp, ml)) + goto end; + if (*++pp && compprintnl(ml)) + goto end; + } + } else { + int n = g->lcount, nl, nc, i, a; + char **pq; + + nl = nc = g->lins; + + while (n && nl--) { + i = g->cols; + mc = 0; + pq = pp; + while (n && i--) { + if (pq - g->ylist >= g->lcount) + break; + if (compzputs(*pq, mscroll)) + goto end; + if (i) { + a = (g->widths ? g->widths[mc] : g->width) - + strlen(*pq); + while (a--) + putc(' ', shout); + } + pq += ((g->flags & CGF_ROWS) ? 1 : nc); + mc++; + n--; + } + if (n) { + if (compprintnl(ml)) + goto end; + ml++; + if (cl >= 0 && --cl <= 1) { + cl = -1; + if (tccan(TCCLEAREOD)) + tcout(TCCLEAREOD); + } + } + pp += ((g->flags & CGF_ROWS) ? g->cols : 1); + } + } + } else if (!listdat.onlyexpl && + (g->lcount || (showall && g->mcount))) { + int n = g->dcount, nl, nc, i, j, wid; + Cmatch *q; + + nl = nc = g->lins; + + if ((g->flags & CGF_HASDL) && + (lastused || !lasttype || lasttype == 2)) { + if (!lastused && lasttype == 2) { + p = lastp; + ml = lastml; + n = lastn; + nl = lastnl; + lastused = 1; + pnl = 0; + } else + p = g->matches; + + for (; (m = *p); p++) { + if (m->disp && (m->flags & CMF_DISPLINE) && + (showall || !(m->flags & (CMF_HIDE|CMF_NOLIST)))) { + if (pnl) { + if (dolistnl(ml) && compprintnl(ml)) + goto end; + pnl = 0; + ml++; + if (dolistcl(ml) && cl >= 0 && --cl <= 1) { + cl = -1; + if (tccan(TCCLEAREOD)) + tcout(TCCLEAREOD); + } + } + if (!lasttype && ml >= mlbeg) { + lasttype = 2; + lastg = g; + lastbeg = mlbeg; + lastml = ml; + lastp = p; + lastn = n; + lastnl = nl; + lastused = 1; + } + if (mfirstl < 0) + mfirstl = ml; + if (dolist(ml)) + printed++; + if (clprintm(g, p, 0, ml, 1, 0)) + goto end; + ml += mlprinted; + if (dolistcl(ml) && (cl -= mlprinted) <= 1) { + cl = -1; + if (tccan(TCCLEAREOD)) + tcout(TCCLEAREOD); + } + pnl = 1; + } + if (!mnew && ml > mlend) + goto end; + } + } + if (n && pnl) { + if (dolistnl(ml) && compprintnl(ml)) + goto end; + pnl = 0; + ml++; + if (dolistcl(ml) && cl >= 0 && --cl <= 1) { + cl = -1; + if (tccan(TCCLEAREOD)) + tcout(TCCLEAREOD); + } + } + if (!lastused && lasttype == 3) { + p = lastp; + n = lastn; + nl = lastnl; + ml = lastml; + lastused = 1; + } else + p = skipnolist(g->matches, showall); + + while (n && nl--) { + if (!lasttype && ml >= mlbeg) { + lasttype = 3; + lastg = g; + lastbeg = mlbeg; + lastml = ml; + lastp = p; + lastn = n; + lastnl = nl + 1; + lastused = 1; + } + i = g->cols; + mc = 0; + q = p; + while (n && i--) { + wid = (g->widths ? g->widths[mc] : g->width); + if (!(m = *q)) { + if (clprintm(g, NULL, mc, ml, (!i), wid)) + goto end; + break; + } + if (clprintm(g, q, mc, ml, (!i), wid)) + goto end; + + if (dolist(ml)) + printed++; + ml += mlprinted; + if (dolistcl(ml) && (cl -= mlprinted) < 1) { + cl = -1; + if (tccan(TCCLEAREOD)) + tcout(TCCLEAREOD); + } + if (mfirstl < 0) + mfirstl = ml; + + if (--n) + for (j = ((g->flags & CGF_ROWS) ? 1 : nc); + j && *q; j--) + q = skipnolist(q + 1, showall); + mc++; + } + while (i-- > 0) { + if (clprintm(g, NULL, mc, ml, (!i), + (g->widths ? g->widths[mc] : g->width))) + goto end; + mc++; + } + if (n) { + if (dolistnl(ml) && compprintnl(ml)) + goto end; + ml++; + if (dolistcl(ml) && cl >= 0 && --cl <= 1) { + cl = -1; + if (tccan(TCCLEAREOD)) + tcout(TCCLEAREOD); + } + if (nl) + for (j = ((g->flags & CGF_ROWS) ? g->cols : 1); + j && *p; j--) + p = skipnolist(p + 1, showall); + } + if (!mnew && ml > mlend) + goto end; + } + } + if (g->lcount || (showall && g->mcount)) + pnl = 1; + g = g->next; + } + asked = 0; + end: + mstatprinted = 0; + lastlistlen = 0; + if (nlnct <= 1) + mscroll = 0; + if (clearflag) { + int nl; + + /* Move the cursor up to the prompt, if always_last_prompt * + * is set and all that... */ + if (mlbeg >= 0) { + if ((nl = listdat.nlines + nlnct) >= lines) { + if (mhasstat) { + putc('\n', shout); + compprintfmt(NULL, 0, 1, 1, mline, NULL); + mstatprinted = 1; + } + nl = lines - 1; + } else + nl--; + tcmultout(TCUP, TCMULTUP, nl); + showinglist = -1; + + lastlistlen = listdat.nlines; + } else if ((nl = listdat.nlines + nlnct - 1) < lines) { + if (mlbeg >= 0 && tccan(TCCLEAREOL)) + tcout(TCCLEAREOL); + tcmultout(TCUP, TCMULTUP, nl); + showinglist = -1; + + lastlistlen = listdat.nlines; + } else { + clearflag = 0; + if (!asked) { + mrestlines = (ml + nlnct > lines); + compprintnl(ml); + } + } + } else if (!asked) { + mrestlines = (ml + nlnct > lines); + compprintnl(ml); + } + listshown = (clearflag ? 1 : -1); + mnew = 0; + + return printed; +} + +/**/ +static int +clprintm(Cmgroup g, Cmatch *mp, int mc, int ml, int lastc, int width) { Cmatch m; - int len, subcols = 0; + int len, subcols = 0, stop = 0, ret = 0; if (g != last_group) *last_cap = '\0'; @@ -649,22 +1369,41 @@ clprintm(Cmgroup g, Cmatch *mp, int mc, int ml, int lastc, int width, last_group = g; if (!mp) { - zcputs(&mcolors, g->name, COL_SP); - len = width - 2; - while (len-- > 0) - putc(' ', shout); - zcoff(); - return; + if (dolist(ml)) { + zcputs(&mcolors, g->name, COL_SP); + len = width - 2; + while (len-- > 0) + putc(' ', shout); + zcoff(); + } + mlprinted = 0; + return 0; } m = *mp; + + if ((m->flags & CMF_ALL) && (!m->disp || !m->disp[0])) + bld_all_str(m); + + mlastm = m->gnum; if (m->disp && (m->flags & CMF_DISPLINE)) { if (mselect >= 0) { int mm = (mcols * ml), i; - for (i = mcols; i--; ) { - mtab[mm + i] = mp; - mgtab[mm + i] = g; - } + if (m->flags & CMF_DUMMY) { + for (i = mcols; i--; ) { + mtab[mm + i] = mtmark(mp); + mgtab[mm + i] = mgmark(g); + } + } else { + for (i = mcols; i--; ) { + mtab[mm + i] = mp; + mgtab[mm + i] = g; + } + } + } + if (!dolist(ml)) { + mlprinted = printfmt(m->disp, 0, 0, 0) / columns; + return 0; } if (m->gnum == mselect) { int mm = (mcols * ml); @@ -674,16 +1413,21 @@ clprintm(Cmgroup g, Cmatch *mp, int mc, int ml, int lastc, int width, mgtabp = mgtab + mm; mmlen = mcols; zcputs(&mcolors, g->name, COL_MA); - } else if (m->flags & CMF_NOLIST) + } else if ((m->flags & CMF_NOLIST) && + mcolors.files[COL_HI] && mcolors.files[COL_HI]->col) zcputs(&mcolors, g->name, COL_HI); - else if (mselect >= 0 && (m->flags & (CMF_MULT | CMF_FMULT))) + else if (mselect >= 0 && (m->flags & (CMF_MULT | CMF_FMULT)) && + mcolors.files[COL_DU] && mcolors.files[COL_DU]->col) zcputs(&mcolors, g->name, COL_DU); else subcols = putmatchcol(&mcolors, g->name, m->disp); if (subcols) - clprintfmt(&mcolors, m->disp); - else - printfmt(m->disp, 0, 1, 0); + ret = clprintfmt(&mcolors, m->disp, ml); + else { + compprintfmt(m->disp, 0, 1, 0, ml, &stop); + if (stop) + ret = 1; + } zcoff(); } else { int mx; @@ -699,10 +1443,21 @@ clprintm(Cmgroup g, Cmatch *mp, int mc, int ml, int lastc, int width, if (mselect >= 0) { int mm = mcols * ml, i; - for (i = (width ? width : mcols); i--; ) { - mtab[mx + mm + i] = mp; - mgtab[mx + mm + i] = g; - } + if (m->flags & CMF_DUMMY) { + for (i = (width ? width : mcols); i--; ) { + mtab[mx + mm + i] = mtmark(mp); + mgtab[mx + mm + i] = mgmark(g); + } + } else { + for (i = (width ? width : mcols); i--; ) { + mtab[mx + mm + i] = mp; + mgtab[mx + mm + i] = g; + } + } + } + if (!dolist(ml)) { + mlprinted = niceztrlen(m->disp ? m->disp : m->str) / columns; + return 0; } if (m->gnum == mselect) { int mm = mcols * ml; @@ -717,23 +1472,28 @@ clprintm(Cmgroup g, Cmatch *mp, int mc, int ml, int lastc, int width, zcputs(&mcolors, g->name, COL_HI); else if (mselect >= 0 && (m->flags & (CMF_MULT | CMF_FMULT))) zcputs(&mcolors, g->name, COL_DU); - else if (buf) - subcols = putfilecol(&mcolors, g->name, m->str, buf->st_mode); + else if (m->mode) + subcols = putfilecol(&mcolors, g->name, m->str, m->mode); else subcols = putmatchcol(&mcolors, g->name, (m->disp ? m->disp : m->str)); if (subcols) - clnicezputs(&mcolors, (m->disp ? m->disp : m->str)); + ret = clnicezputs(&mcolors, (m->disp ? m->disp : m->str), ml); else - nicezputs((m->disp ? m->disp : m->str), shout); + ret = compnicezputs((m->disp ? m->disp : m->str), ml); + if (ret) { + zcoff(); + return 1; + } len = niceztrlen(m->disp ? m->disp : m->str); + mlprinted = len / columns; - if (isset(LISTTYPES) && buf) { + if ((g->flags & CGF_FILES) && m->modec) { if (m->gnum != mselect) { zcoff(); zcputs(&mcolors, g->name, COL_TC); } - putc(file_type(buf->st_mode), shout); + putc(m->modec, shout); len++; } if ((len = width - len - 2) > 0) { @@ -751,43 +1511,155 @@ clprintm(Cmgroup g, Cmatch *mp, int mc, int ml, int lastc, int width, zcoff(); } } + return ret; } static int -complistmatches(Hookdef dummy, Chdata dat) +singlecalc(int *cp, int l, int *lcp) +{ + int c = *cp, n, j, first = 1; + Cmatch **p, *op, *mp = mtab[l * columns + c]; + + for (n = 0, j = c, p = mtab + l * columns + c, op = NULL; j >= 0; j--, p--) { + if (*p == mp) + c = j; + if (!first && *p != op) + n++; + op = *p; + first = 0; + } + *cp = c; + *lcp = 1; + for (p = mtab + l * columns + c; c < columns; c++, p++) + if (*p && mp != *p) + *lcp = 0; + + return n; +} + +static void +singledraw() { + Cmgroup g; + int mc1, mc2, ml1, ml2, md1, md2, mcc1, mcc2, lc1, lc2, t1, t2; + + t1 = mline - mlbeg; + t2 = moline - molbeg; + + if (t2 < t1) { + mc1 = mocol; ml1 = moline; md1 = t2; + mc2 = mcol; ml2 = mline; md2 = t1; + } else { + mc1 = mcol; ml1 = mline; md1 = t1; + mc2 = mocol; ml2 = moline; md2 = t2; + } + mcc1 = singlecalc(&mc1, ml1, &lc1); + mcc2 = singlecalc(&mc2, ml2, &lc2); + + if (md1) + tc_downcurs(md1); + if (mc1) + tcmultout(TCRIGHT, TCMULTRIGHT, mc1); + g = mgtab[ml1 * columns + mc1]; + clprintm(g, mtab[ml1 * columns + mc1], mcc1, ml1, lc1, + (g->widths ? g->widths[mcc1] : g->width)); + putc('\r', shout); + + if (md2 != md1) + tc_downcurs(md2 - md1); + if (mc2) + tcmultout(TCRIGHT, TCMULTRIGHT, mc2); + g = mgtab[ml2 * columns + mc2]; + clprintm(g, mtab[ml2 * columns + mc2], mcc2, ml2, lc2, + (g->widths ? g->widths[mcc2] : g->width)); + putc('\r', shout); + + if (mstatprinted) { + int i = lines - md2 - nlnct; + + tc_downcurs(i - 1); + compprintfmt(NULL, 0, 1, 1, mline, NULL); + tcmultout(TCUP, TCMULTUP, lines - 1); + } else + tcmultout(TCUP, TCMULTUP, md2 + nlnct); + + showinglist = -1; + listshown = 1; +} + +static int +complistmatches(UNUSED(Hookdef dummy), Chdata dat) +{ + static int onlnct = -1; + Cmgroup oamatches = amatches; amatches = dat->matches; - if (minfo.asked == 2) { + noselect = 0; + + if ((minfo.asked == 2 && mselect < 0) || nlnct >= lines) { showinglist = 0; amatches = oamatches; return (noselect = 1); } getcols(&mcolors); - calclist(mselect >= 0); + mnew = ((calclist(mselect >= 0) || mlastcols != columns || + mlastlines != listdat.nlines) && mselect >= 0); if (!listdat.nlines || (mselect >= 0 && - (!(isset(USEZLE) && !termflags && - complastprompt && *complastprompt) || - (listdat.nlines + nlnct - 1) >= lines))) { + !(isset(USEZLE) && !termflags && + complastprompt && *complastprompt))) { showinglist = listshown = 0; noselect = 1; amatches = oamatches; return 1; } - if (inselect) + if (inselect || mlbeg >= 0) clearflag = 0; - if (asklist()) { - amatches = oamatches; - return (noselect = 1); + mscroll = 0; + mlistp = NULL; + + queue_signals(); + if (mselect >= 0 || mlbeg >= 0 || + (mlistp = dupstring(getsparam("LISTPROMPT")))) { + unqueue_signals(); + if (mlistp && !*mlistp) + mlistp = "%SAt %p: Hit TAB for more, or the character to insert%s"; + trashzle(); + showinglist = listshown = 0; + + lastlistlen = 0; + + if (mlistp) { + clearflag = (isset(USEZLE) && !termflags && dolastprompt); + mscroll = 1; + } else { + clearflag = 1; + minfo.asked = (listdat.nlines + nlnct <= lines); + } + } else { + unqueue_signals(); + mlistp = NULL; + if (asklist()) { + amatches = oamatches; + return (noselect = 1); + } } - if (mselect >= 0) { + if (mlbeg >= 0) { + mlend = mlbeg + lines - nlnct - mhasstat; + while (mline >= mlend) + mlbeg++, mlend++; + } else + mlend = 9999999; + + if (mnew) { int i; + mtab_been_reallocated = 1; + i = columns * listdat.nlines; free(mtab); mtab = (Cmatch **) zalloc(i * sizeof(Cmatch **)); @@ -795,16 +1667,22 @@ complistmatches(Hookdef dummy, Chdata dat) free(mgtab); mgtab = (Cmgroup *) zalloc(i * sizeof(Cmgroup)); memset(mgtab, 0, i * sizeof(Cmgroup)); - mcols = columns; - mlines = listdat.nlines; + mlastcols = mcols = columns; + mlastlines = mlines = listdat.nlines; } last_cap = (char *) zhalloc(max_caplen + 1); *last_cap = '\0'; - if (!printlist(1, clprintm, (mselect >= 0)) || listdat.nlines >= lines || - !clearflag) + if (!mnew && inselect && onlnct == nlnct && mlbeg >= 0 && mlbeg == molbeg) + singledraw(); + else if (!compprintlist(mselect >= 0) || !clearflag) noselect = 1; + onlnct = nlnct; + molbeg = mlbeg; + mocol = mcol; + moline = mline; + amatches = oamatches; return noselect; @@ -818,8 +1696,8 @@ adjust_mcol(int wish, Cmatch ***tabp, Cmgroup **grp) tab -= mcol; - for (p = wish; p >= 0 && !tab[p]; p--); - for (n = wish; n < mcols && !tab[n]; n++); + for (p = wish; p >= 0 && (!tab[p] || mmarked(tab[p])); p--); + for (n = wish; n < mcols && (!tab[n] || mmarked(tab[n])); n++); if (n == mcols) n = -1; @@ -849,41 +1727,402 @@ struct menustack { Brinfo brbeg; Brinfo brend; int nbrbeg, nbrend; - int cs, acc, nmatches; + int cs, acc, nmatches, mline, mlbeg, nolist; struct menuinfo info; Cmgroup amatches, pmatches, lastmatches, lastlmatches; + /* + * Status for how line looked like previously. + */ + char *origline; + int origcs, origll; + /* + * Status for interactive mode. status is the line + * printed above the matches saying what the interactive + * completion prefix is. mode says whether we are in + * interactive or some search mode. + * typed. + */ + char *status; + int mode; +}; + +typedef struct menusearch *Menusearch; + +struct menusearch { + Menusearch prev; + char *str; + int line; + int col; + int back; + int state; + Cmatch **ptr; }; +#define MS_OK 0 +#define MS_FAILED 1 +#define MS_WRAPPED 2 + +#define MAX_STATUS 128 + +static char * +setmstatus(char *status, char *sline, int sll, int scs, + int *csp, int *llp, int *lenp) +{ + char *p, *s, *ret = NULL; + int pl, sl, max; + + if (csp) { + *csp = cs; + *llp = ll; + *lenp = lastend - wb; + + ret = dupstring((char *) line); + + p = (char *) zhalloc(cs - wb + 1); + strncpy(p, (char *) line + wb, cs - wb); + p[cs - wb] = '\0'; + if (lastend < cs) + s = ""; + else { + s = (char *) zhalloc(lastend - cs + 1); + strncpy(s, (char *) line + cs, lastend - cs); + s[lastend - cs] = '\0'; + } + cs = 0; + foredel(ll); + spaceinline(sll); + memcpy(line, sline, sll); + cs = scs; + } else { + p = complastprefix; + s = complastsuffix; + } + pl = strlen(p); + sl = strlen(s); + max = (columns < MAX_STATUS ? columns : MAX_STATUS) - 14; + + if (max > 12) { + int h = (max - 2) >> 1; + + strcpy(status, "interactive: "); + if (pl > h - 3) { + strcat(status, "..."); + strcat(status, p + pl - h - 3); + } else + strcat(status, p); + + strcat(status, "[]"); + if (sl > h - 3) { + strncat(status, s, h - 3); + strcat(status, "..."); + } else + strcat(status, s); + } + return ret; +} + +static Menusearch msearchstack; +static char *msearchstr = NULL; +static int msearchstate; + +static void +msearchpush(Cmatch **p, int back) +{ + Menusearch s = (Menusearch) zhalloc(sizeof(struct menusearch)); + + s->prev = msearchstack; + msearchstack = s; + s->str = dupstring(msearchstr); + s->line = mline; + s->col = mcol; + s->back = back; + s->state = msearchstate; + s->ptr = p; +} + +static Cmatch ** +msearchpop(int *backp) +{ + Menusearch s = msearchstack; + + if (s->prev) + msearchstack = s->prev; + + msearchstr = s->str; + mline = s->line; + mcol = s->col; + msearchstate = s->state; + + *backp = s->back; + + return s->ptr; +} + +static Cmatch ** +msearch(Cmatch **ptr, int ins, int back, int rep, int *wrapp) +{ + char s[2]; + Cmatch **p, *l = NULL, m; + int x = mcol, y = mline; + int ex, ey, wrap = 0, owrap = (msearchstate & MS_WRAPPED); + + msearchpush(ptr, back); + + if (ins) { + s[0] = lastchar; + s[1] = '\0'; + + msearchstr = dyncat(msearchstr, s); + } + if (back) { + ex = mcols - 1; + ey = -1; + } else { + ex = 0; + ey = listdat.nlines; + } + p = mtab + (mline * mcols) + mcol; + if (rep) + l = *p; + while (1) { + if (!rep && mtunmark(*p) && *p != l) { + l = *p; + m = *mtunmark(*p); + + if (strstr((m->disp ? m->disp : m->str), msearchstr)) { + mcol = x; + mline = y; + + return p; + } + } + rep = 0; + + if (back) { + p--; + if (--x < 0) { + x = mcols - 1; + y--; + } + } else { + p++; + if (++x == mcols) { + x = 0; + y++; + } + } + if (x == ex && y == ey) { + if (wrap) { + msearchstate = MS_FAILED | owrap; + break; + } + msearchstate |= MS_WRAPPED; + + if (back) { + x = mcols - 1; + y = listdat.nlines - 1; + p = mtab + (y * mcols) + x; + } else { + x = y = 0; + p = mtab; + } + ex = mcol; + ey = mline; + wrap = 1; + *wrapp = 1; + } + } + return NULL; +} + +/* + * Values to assign to mode: interactive, etc. + */ +#define MM_INTER 1 +#define MM_FSEARCH 2 +#define MM_BSEARCH 3 + static int domenuselect(Hookdef dummy, Chdata dat) { static Chdata fdat = NULL; + static char *lastsearch = NULL; Cmatch **p; Cmgroup *pg; - Thingy cmd; + Thingy cmd = 0; + int do_last_key = 0; Menustack u = NULL; int i = 0, acc = 0, wishcol = 0, setwish = 0, oe = onlyexpl, wasnext = 0; + int space, lbeg = 0, step = 1, wrap, pl = nlnct, broken = 0, first = 1; + int nolist = 0, mode = 0, modecs, modell, modelen; char *s; + char status[MAX_STATUS], *modeline = NULL; + + msearchstack = NULL; + msearchstr = ""; + msearchstate = MS_OK; - if (fdat || (dummy && (!(s = getsparam("SELECTMIN")) || + status[0] = '\0'; + queue_signals(); + if (fdat || (dummy && (!(s = getsparam("MENUSELECT")) || (dat && dat->num < atoi(s))))) { if (fdat) { fdat->matches = dat->matches; fdat->num = dat->num; + fdat->nmesg = dat->nmesg; } + unqueue_signals(); return 0; } + if ((s = getsparam("MENUSCROLL"))) { + if (!(step = mathevali(s))) + step = (lines - nlnct) >> 1; + else if (step < 0) + if ((step += lines - nlnct) < 0) + step = 1; + } + if ((s = getsparam("MENUMODE"))) { + if (!strcmp(s, "interactive")) { + int l = strlen(origline); + + /* + * In interactive completion mode we don't insert + * the completion onto the command line, instead + * we show just what the user has typed and + * the match so far underneath (stored in "status"). + * So put the command line back to how it + * was before completion started. + */ + mode = MM_INTER; + cs = 0; + foredel(ll); + spaceinline(l); + strncpy((char *) line, origline, l); + cs = origcs; + setmstatus(status, NULL, 0 , 0, NULL, NULL, NULL); + } else if (strpfx("search", s)) { + mode = (strstr(s, "back") ? MM_BSEARCH : MM_FSEARCH); + } + } + if ((mstatus = dupstring(getsparam("MENUPROMPT"))) && !*mstatus) + mstatus = "%SScrolling active: current selection at %p%s"; + unqueue_signals(); + mhasstat = (mstatus && *mstatus); fdat = dat; selectlocalmap(mskeymap); - noselect = 0; + noselect = 1; + while ((menuacc && + !hasbrpsfx(*(minfo.cur), minfo.prebr, minfo.postbr)) || + ((*minfo.cur)->flags & CMF_DUMMY) || + (((*minfo.cur)->flags & (CMF_NOLIST | CMF_MULT)) && + (!(*minfo.cur)->str || !*(*minfo.cur)->str))) + do_menucmp(0); + mselect = (*(minfo.cur))->gnum; + mline = 0; + mlines = 999999; + mlbeg = 0; + molbeg = -42; for (;;) { - onlyexpl = 0; - showinglist = -2; - zrefresh(); - inselect = 1; - if (noselect) - break; + mtab_been_reallocated = 0; + if (mline < 0) { + int x, y; + Cmatch **p = mtab; + + for (y = 0; y < mlines; y++) { + for (x = mcols; x; x--, p++) + if (*p && !mmarked(*p) && **p && mselect == (**p)->gnum) + break; + if (x) { + mcol = mcols - x; + break; + } + } + if (y < mlines) + mline = y; + } + while (mline < mlbeg) + if ((mlbeg -= step) < 0) + mlbeg = 0; + + if (mlbeg && lbeg != mlbeg) { + Cmatch **p = mtab + ((mlbeg - 1) * columns), **q; + int c; + + while (mlbeg) { + for (q = p, c = columns; c; q++, c--) + if (*q && !mmarked(*q)) + break; + if (c) + break; + p -= columns; + mlbeg--; + } + } + if ((space = lines - pl - mhasstat)) + while (mline >= mlbeg + space) + if ((mlbeg += step) + space > mlines) + mlbeg = mlines - space; + if (lbeg != mlbeg) { + Cmatch **p = mtab + (mlbeg * columns), **q; + int c; + + while (mlbeg < mlines) { + for (q = p, c = columns; c; q++, c--) + if (*q) + break; + if (c) + break; + p += columns; + mlbeg++; + } + } + lbeg = mlbeg; + onlyexpl = 0; + showinglist = -2; + if (first && !listshown && isset(LISTBEEP)) + zbeep(); + if (first) { + /* + * remember the original data that we will use when + * performing interactive completion to restore the + * command line when a menu completion is inserted. + * this is because menu completion will insert + * the next match in the loop; for interactive + * completion we don't want that, we always want to + * be able to type the next character. + */ + modeline = dupstring(line); + modecs = cs; + modell = ll; + modelen = minfo.len; + } + first = 0; + if (mode == MM_INTER) { + statusline = status; + statusll = strlen(status); + } else if (mode) { + int l = sprintf(status, "%s%sisearch%s: ", + ((msearchstate & MS_FAILED) ? "failed " : ""), + ((msearchstate & MS_WRAPPED) ? "wrapped " : ""), + (mode == MM_FSEARCH ? "" : " backward")); + + strncat(status, msearchstr, MAX_STATUS - l - 1); + + statusline = status; + statusll = strlen(status); + } else { + statusline = NULL; + statusll = 0; + } + zrefresh(); + statusline = NULL; + statusll = 0; + inselect = 1; + if (noselect) { + broken = 1; + break; + } selected = 1; if (!i) { i = mcols * mlines; @@ -911,73 +2150,224 @@ domenuselect(Hookdef dummy, Chdata dat) getk: - if (!(cmd = getkeycmd()) || cmd == Th(z_sendbreak)) + if (!do_last_key) { + cmd = getkeycmd(); + if (mtab_been_reallocated) { + do_last_key = 1; + continue; + } + } + do_last_key = 0; + + if (!cmd || cmd == Th(z_sendbreak)) { + zbeep(); + molbeg = -1; break; - else if (cmd == Th(z_acceptline)) { + } else if (nolist && cmd != Th(z_undo) && + (!mode || (cmd != Th(z_backwarddeletechar) && + cmd != Th(z_selfinsert) && + cmd != Th(z_selfinsertunmeta)))) { + ungetkeycmd(); + break; + } else if (cmd == Th(z_acceptline)) { + if (mode == MM_FSEARCH || mode == MM_BSEARCH) { + mode = 0; + continue; + } acc = 1; break; - } else if (cmd == Th(z_acceptandinfernexthistory)) { + } else if (cmd == Th(z_viinsert)) { + if (mode == MM_INTER) + mode = 0; + else { + int l = strlen(origline); + + /* + * Entering interactive completion mode: + * same code as when we enter it on menu selection + * start. + */ + mode = MM_INTER; + cs = 0; + foredel(ll); + spaceinline(l); + strncpy((char *) line, origline, l); + cs = origcs; + setmstatus(status, NULL, 0, 0, NULL, NULL, NULL); + + continue; + } + } else if (cmd == Th(z_acceptandinfernexthistory) || + (mode == MM_INTER && (cmd == Th(z_selfinsert) || + cmd == Th(z_selfinsertunmeta)))) { + char *saveline = NULL; + int savell = 0; + int savecs = 0; Menustack s = (Menustack) zhalloc(sizeof(*s)); s->prev = u; u = s; s->line = dupstring((char *) line); s->cs = cs; + s->mline = mline; + s->mlbeg = mlbeg; memcpy(&(s->info), &minfo, sizeof(struct menuinfo)); s->amatches = amatches; s->pmatches = pmatches; s->lastmatches = lastmatches; s->lastlmatches = lastlmatches; + s->nolist = nolist; s->acc = menuacc; s->brbeg = dupbrinfo(brbeg, NULL, 1); s->brend = dupbrinfo(brend, NULL, 1); s->nbrbeg = nbrbeg; s->nbrend = nbrend; s->nmatches = nmatches; + s->origline = origline; + s->origcs = origcs; + s->origll = origll; + s->status = dupstring(status); + s->mode = mode; menucmp = menuacc = hasoldlist = 0; + minfo.cur = NULL; fixsuffix(); + handleundo(); validlist = 0; amatches = pmatches = lastmatches = NULL; invalidate_list(); + iforcemenu = 1; + comprecursive = 1; + if (cmd != Th(z_acceptandinfernexthistory)) { + int l = strlen(origline); + + /* + * Interactive mode: we need to restore the + * line, add the character, then remember how + * this new line looks in order to keep + * the command line as it is with just the + * characters typed by the user. + */ + cs = 0; + foredel(ll); + spaceinline(l); + strncpy((char *) line, origline, l); + cs = origcs; + + if (cmd == Th(z_selfinsert)) + selfinsert(zlenoargs); + else + selfinsertunmeta(zlenoargs); + + saveline = (char *) zhalloc(ll); + memcpy(saveline, line, ll); + savell = ll; + savecs = cs; + iforcemenu = -1; + } else + mode = 0; menucomplete(zlenoargs); - if (dat->num < 2 || !minfo.cur || !*(minfo.cur)) { - noselect = clearlist = listshown = 1; - onlyexpl = 0; - zrefresh(); - break; + iforcemenu = 0; + + if (cmd != Th(z_acceptandinfernexthistory)) + modeline = setmstatus(status, saveline, savell, savecs, + &modecs, &modell, &modelen); + + if (nmatches < 1 || !minfo.cur || !*(minfo.cur)) { + nolist = 1; + if (mode == MM_INTER) { + statusline = status; + statusll = strlen(status); + } + if (nmessages) { + showinglist = -2; + zrefresh(); + } else { + trashzle(); + zsetterm(); + if (tccan(TCCLEAREOD)) + tcout(TCCLEAREOD); + fputs("no matches\r", shout); + fflush(shout); + tcmultout(TCUP, TCMULTUP, nlnct); + showinglist = clearlist = 0; + clearflag = 1; + zrefresh(); + showinglist = clearlist = 0; + } + statusline = NULL; + statusll = 0; + + goto getk; } clearlist = listshown = 1; mselect = (*(minfo.cur))->gnum; setwish = wasnext = 1; + mline = 0; + molbeg = -42; continue; } else if (cmd == Th(z_acceptandhold) || cmd == Th(z_acceptandmenucomplete)) { Menustack s = (Menustack) zhalloc(sizeof(*s)); + int ol; + mode = 0; s->prev = u; u = s; s->line = dupstring((char *) line); s->cs = cs; + s->mline = mline; + s->mlbeg = mlbeg; memcpy(&(s->info), &minfo, sizeof(struct menuinfo)); s->amatches = s->pmatches = s->lastmatches = s->lastlmatches = NULL; + s->nolist = nolist; s->acc = menuacc; s->brbeg = dupbrinfo(brbeg, NULL, 1); s->brend = dupbrinfo(brend, NULL, 1); s->nbrbeg = nbrbeg; s->nbrend = nbrend; s->nmatches = nmatches; + s->origline = origline; + s->origcs = origcs; + s->origll = origll; + s->status = dupstring(status); + s->mode = mode; accept_last(); + handleundo(); + comprecursive = 1; do_menucmp(0); mselect = (*(minfo.cur))->gnum; + + p -= mcol; + mcol = 0; + ol = mline; + do { + for (mcol = 0; mcol < mcols; mcol++, p++) + if (*p == minfo.cur) + break; + if (mcol != mcols) + break; + if (++mline == mlines) { + mline = 0; + p -= mlines * mcols; + } + } while (mline != ol); + if (*p != minfo.cur) { + noselect = clearlist = listshown = 1; + onlyexpl = 0; + zrefresh(); + break; + } setwish = 1; continue; - } else if (cmd == Th(z_undo)) { + } else if (cmd == Th(z_undo) || + (mode == MM_INTER && cmd == Th(z_backwarddeletechar))) { int l; if (!u) - goto getk; + break; + handleundo(); cs = 0; foredel(ll); spaceinline(l = strlen(u->line)); @@ -986,15 +2376,17 @@ domenuselect(Hookdef dummy, Chdata dat) menuacc = u->acc; memcpy(&minfo, &(u->info), sizeof(struct menuinfo)); p = &(minfo.cur); + mline = u->mline; + mlbeg = u->mlbeg; if (u->lastmatches && lastmatches != u->lastmatches) { if (lastmatches) - freematches(lastmatches); + freematches(lastmatches, 0); amatches = u->amatches; pmatches = u->pmatches; lastmatches = u->lastmatches; lastlmatches = u->lastlmatches; nmatches = u->nmatches; - hasoldlist = 1; + hasoldlist = validlist = 1; } freebrinfo(brbeg); freebrinfo(brend); @@ -1002,82 +2394,288 @@ domenuselect(Hookdef dummy, Chdata dat) brend = dupbrinfo(u->brend, &lastbrend, 0); nbrbeg = u->nbrbeg; nbrend = u->nbrend; + origline = u->origline; + origcs = u->origcs; + origll = u->origll; + strcpy(status, u->status); + mode = u->mode; + nolist = u->nolist; u = u->prev; clearlist = 1; setwish = 1; listdat.valid = 0; + molbeg = -42; + + if (nolist) { + if (mode == MM_INTER) { + statusline = status; + statusll = strlen(status); + } + zrefresh(); + statusline = NULL; + statusll = 0; + goto getk; + } + if (mode) + continue; } else if (cmd == Th(z_redisplay)) { redisplay(zlenoargs); + molbeg = -42; continue; } else if (cmd == Th(z_clearscreen)) { clearscreen(zlenoargs); + molbeg = -42; continue; } else if (cmd == Th(z_downhistory) || cmd == Th(z_downlineorhistory) || cmd == Th(z_downlineorsearch) || cmd == Th(z_vidownlineorhistory)) { + int omline; + Cmatch **op; + + mode = 0; + wrap = 0; + + down: + + omline = mline; + op = p; + do { if (mline == mlines - 1) { + if (wrap & 2) { + mline = omline; + p = op; + break; + } p -= mline * mcols; mline = 0; + wrap |= 1; } else { mline++; p += mcols; } if (adjust_mcol(wishcol, &p, NULL)) continue; - } while (!*p); + } while (!*p || mmarked(*p)); + + if (wrap == 1) + goto right; } else if (cmd == Th(z_uphistory) || cmd == Th(z_uplineorhistory) || cmd == Th(z_uplineorsearch) || cmd == Th(z_viuplineorhistory)) { + int omline; + Cmatch **op; + + mode = 0; + wrap = 0; + + up: + + omline = mline; + op = p; + do { if (!mline) { + if (wrap & 2) { + mline = omline; + p = op; + break; + } mline = mlines - 1; p += mline * mcols; + wrap |= 1; } else { mline--; p -= mcols; } if (adjust_mcol(wishcol, &p, NULL)) continue; - } while (!*p); + } while (!*p || mmarked(*p)); + + if (wrap == 1) { + if (mcol == wishcol) + goto left; + + wishcol = mcol; + } + } else if (cmd == Th(z_emacsforwardword) || + cmd == Th(z_viforwardword) || + cmd == Th(z_viforwardwordend) || + cmd == Th(z_forwardword)) { + int i = lines - pl - 1, oi = i, ll = 0; + Cmatch **lp = NULL; + + mode = 0; + if (mline == mlines - 1) + goto top; + while (i > 0) { + if (mline == mlines - 1) { + if (i != oi && lp) + break; + goto top; + } else { + mline++; + p += mcols; + } + if (adjust_mcol(wishcol, &p, NULL)) + continue; + if (*p && !mmarked(*p)) { + i--; + lp = p; + ll = mline; + } + } + p = lp; + mline = ll; + } else if (cmd == Th(z_emacsbackwardword) || + cmd == Th(z_vibackwardword) || + cmd == Th(z_backwardword)) { + int i = lines - pl - 1, oi = i, ll = 0; + Cmatch **lp = NULL; + + mode = 0; + if (!mline) + goto bottom; + while (i > 0) { + if (!mline) { + if (i != oi && lp) + break; + goto bottom; + } else { + mline--; + p -= mcols; + } + if (adjust_mcol(wishcol, &p, NULL)) + continue; + if (*p || !mmarked(*p)) { + i--; + lp = p; + ll = mline; + } + } + p = lp; + mline = ll; + } else if (cmd == Th(z_beginningofhistory)) { + int ll; + Cmatch **lp; + + mode = 0; + + top: + + ll = mline; + lp = p; + while (mline) { + mline--; + p -= mcols; + if (adjust_mcol(wishcol, &p, NULL)) + continue; + if (*p && !mmarked(*p)) { + lp = p; + ll = mline; + } + } + mline = ll; + p = lp; + } else if (cmd == Th(z_endofhistory)) { + int ll; + Cmatch **lp; + + mode = 0; + + bottom: + + ll = mline; + lp = p; + while (mline < mlines - 1) { + mline++; + p += mcols; + if (adjust_mcol(wishcol, &p, NULL)) + continue; + if (*p && !mmarked(*p)) { + lp = p; + ll = mline; + } + } + mline = ll; + p = lp; } else if (cmd == Th(z_forwardchar) || cmd == Th(z_viforwardchar)) { - int omcol = mcol; - Cmatch *op = *p; + int omcol; + Cmatch **op; + + mode = 0; + wrap = 0; + + right: + + omcol = mcol; + op = p; do { if (mcol == mcols - 1) { + if (wrap & 1) { + p = op; + mcol = omcol; + break; + } p -= mcol; mcol = 0; + wrap |= 2; } else { mcol++; p++; } - } while (!*p || (mcol != omcol && *p == op)); + } while (!*p || mmarked(*p) || (mcol != omcol && *p == *op)); wishcol = mcol; + + if (wrap == 2) + goto down; } else if (cmd == Th(z_backwardchar) || cmd == Th(z_vibackwardchar)) { - int omcol = mcol; - Cmatch *op = *p; + int omcol; + Cmatch **op; + + mode = 0; + wrap = 0; + + left: + + omcol = mcol; + op = p; do { if (!mcol) { + if (wrap & 1) { + p = op; + mcol = omcol; + break; + } mcol = mcols - 1; p += mcol; + wrap |= 2; } else { mcol--; p--; } - } while (!*p || (mcol != omcol && *p == op)); + } while (!*p || mmarked(*p) || (mcol != omcol && *p == *op)); wishcol = mcol; + + if (wrap == 2) { + p += mcols - 1 - mcol; + wishcol = mcol = mcols - 1; + adjust_mcol(wishcol, &p, NULL); + goto up; + } } else if (cmd == Th(z_beginningofbufferorhistory) || cmd == Th(z_beginningofline) || cmd == Th(z_beginningoflinehist) || cmd == Th(z_vibeginningofline)) { + mode = 0; p -= mcol; mcol = 0; - while (!*p) { + while (!*p || mmarked(*p)) { mcol++; p++; } @@ -1086,20 +2684,20 @@ domenuselect(Hookdef dummy, Chdata dat) cmd == Th(z_endofline) || cmd == Th(z_endoflinehist) || cmd == Th(z_viendofline)) { + mode = 0; p += mcols - mcol - 1; mcol = mcols - 1; - while (!*p) { + while (!*p || mmarked(*p)) { mcol--; p--; } wishcol = mcols - 1; - } else if (cmd == Th(z_forwardword) || - cmd == Th(z_emacsforwardword) || - cmd == Th(z_viforwardword) || - cmd == Th(z_viforwardwordend)) { + } else if (cmd == Th(z_viforwardblankword) || + cmd == Th(z_viforwardblankwordend)) { Cmgroup g = *pg; int ol = mline; + mode = 0; do { if (mline == mlines - 1) { p -= mline * mcols; @@ -1112,13 +2710,12 @@ domenuselect(Hookdef dummy, Chdata dat) } if (adjust_mcol(wishcol, &p, &pg)) continue; - } while (ol != mline && (*pg == g || !*pg)); - } else if (cmd == Th(z_backwardword) || - cmd == Th(z_emacsbackwardword) || - cmd == Th(z_vibackwardword)) { + } while (ol != mline && (*pg == g || !*pg || mmarked(*pg))); + } else if (cmd == Th(z_vibackwardblankword)) { Cmgroup g = *pg; int ol = mline; + mode = 0; do { if (!mline) { mline = mlines - 1; @@ -1131,7 +2728,7 @@ domenuselect(Hookdef dummy, Chdata dat) } if (adjust_mcol(wishcol, &p, &pg)) continue; - } while (ol != mline && (*pg == g || !*pg)); + } while (ol != mline && (*pg == g || !*pg || mmarked(*pg))); } else if (cmd == Th(z_completeword) || cmd == Th(z_expandorcomplete) || cmd == Th(z_expandorcompleteprefix) || @@ -1143,50 +2740,160 @@ domenuselect(Hookdef dummy, Chdata dat) !strcmp(cmd->nam, "expand-or-complete-prefix") || !strcmp(cmd->nam, "menu-complete") || !strcmp(cmd->nam, "menu-expand-or-complete")) { - do_menucmp(0); - mselect = (*(minfo.cur))->gnum; - setwish = 1; + if (mode == MM_INTER) { + /* + * do_menucmp() has inserted the completion onto + * the command line. In interactive mode we + * don't want that, just what the user typed, + * so restore the information. + */ + origline = modeline; + origcs = modecs; + origll = modell; + cs = 0; + foredel(ll); + spaceinline(origll); + strncpy((char *) line, origline, origll); + cs = origcs; + minfo.len = modelen; + } else { + mode = 0; + comprecursive = 1; + do_menucmp(0); + mselect = (*(minfo.cur))->gnum; + setwish = 1; + mline = -1; + } continue; } else if (cmd == Th(z_reversemenucomplete) || !strcmp(cmd->nam, "reverse-menu-complete")) { + mode = 0; + comprecursive = 1; reversemenucomplete(zlenoargs); mselect = (*(minfo.cur))->gnum; setwish = 1; + mline = -1; + continue; + } else if (cmd == Th(z_historyincrementalsearchforward) || + cmd == Th(z_historyincrementalsearchbackward) || + ((mode == MM_FSEARCH || mode == MM_BSEARCH) && + (cmd == Th(z_selfinsert) || + cmd == Th(z_selfinsertunmeta)))) { + Cmatch **np, **op = p; + int was = (mode == MM_FSEARCH || mode == MM_BSEARCH); + int ins = (cmd == Th(z_selfinsert) || cmd == Th(z_selfinsertunmeta)); + int back = (cmd == Th(z_historyincrementalsearchbackward)); + int wrap; + + do { + if (was) { + p += wishcol - mcol; + mcol = wishcol; + } + if (!ins) { + if (was) { + if (!*msearchstr && lastsearch) { + msearchstr = dupstring(lastsearch); + mode = 0; + } + } else { + msearchstr = ""; + msearchstack = NULL; + } + } + if (cmd == Th(z_selfinsertunmeta)) { + lastchar &= 0x7f; + if (lastchar == '\r') + lastchar = '\n'; + } + wrap = 0; + np = msearch(p, ins, (ins ? (mode == MM_BSEARCH) : back), + (was && !ins), &wrap); + + if (!ins) + mode = (back ? MM_BSEARCH : MM_FSEARCH); + + if (*msearchstr) { + zsfree(lastsearch); + lastsearch = ztrdup(msearchstr); + } + if (np) { + wishcol = mcol; + p = np; + } + adjust_mcol(wishcol, &p, NULL); + + } while ((back || cmd == Th(z_historyincrementalsearchforward)) && + np && !wrap && was && **p == **op); + + } else if ((mode == MM_FSEARCH || mode == MM_BSEARCH) && + cmd == Th(z_backwarddeletechar)) { + int back; + Cmatch **np = msearchpop(&back); + + mode = (back ? MM_BSEARCH : MM_FSEARCH); + wishcol = mcol; + if (np) { + p = np; + adjust_mcol(wishcol, &p, NULL); + } + } else if (cmd == Th(z_undefinedkey)) { + mode = 0; continue; } else { ungetkeycmd(); + if (cmd->widget && (cmd->widget->flags & WIDGET_NCOMP)) { + acc = 0; + broken = 2; + } else + acc = 1; break; } + metafy_line(); do_single(**p); + unmetafy_line(); mselect = (**p)->gnum; } if (u) for (; u; u = u->prev) if (u->lastmatches != lastmatches) - freematches(u->lastmatches); + freematches(u->lastmatches, 0); selectlocalmap(NULL); - mselect = -1; - inselect = 0; - if (acc) { + mselect = mlastcols = mlastlines = -1; + mstatus = NULL; + inselect = mhasstat = 0; + if (nolist) + clearlist = listshown = 1; + if (acc && validlist && minfo.cur) { menucmp = lastambig = hasoldlist = 0; + metafy_line(); do_single(*(minfo.cur)); + unmetafy_line(); } - if (wasnext) { + if (wasnext || broken) { menucmp = 2; - showinglist = -2; + showinglist = ((validlist && !nolist) ? -2 : 0); minfo.asked = 0; + if (!noselect) { + int nos = noselect; + + zrefresh(); + noselect = nos; + } } - if (!noselect) { - showinglist = -2; + if (!noselect && (!dat || acc)) { + showinglist = ((validlist && !nolist) ? -2 : 0); onlyexpl = oe; if (!smatches) - clearlist = 1; + clearlist = listshown = 1; zrefresh(); } + mlbeg = -1; fdat = NULL; - return (!noselect ^ acc); + return (broken == 2 ? 3 : + ((dat && !broken) ? (acc ? 1 : 2) : (!noselect ^ acc))); } /* The widget function. */ @@ -1211,7 +2918,7 @@ menuselect(char **args) /**/ int -setup_(Module m) +setup_(UNUSED(Module m)) { return 0; } @@ -1247,12 +2954,20 @@ boot_(Module m) bindkey(mskeymap, "\33OB", refthingy(t_downlineorhistory), NULL); bindkey(mskeymap, "\33OC", refthingy(t_forwardchar), NULL); bindkey(mskeymap, "\33OD", refthingy(t_backwardchar), NULL); + lskeymap = newkeymap(NULL, "listscroll"); + linkkeymap(lskeymap, "listscroll", 1); + bindkey(lskeymap, "\t", refthingy(t_completeword), NULL); + bindkey(lskeymap, " ", refthingy(t_completeword), NULL); + bindkey(lskeymap, "\n", refthingy(t_acceptline), NULL); + bindkey(lskeymap, "\r", refthingy(t_acceptline), NULL); + bindkey(lskeymap, "\33[B", refthingy(t_downlineorhistory), NULL); + bindkey(lskeymap, "\33OB", refthingy(t_downlineorhistory), NULL); return 0; } /**/ int -cleanup_(Module m) +cleanup_(UNUSED(Module m)) { free(mtab); free(mgtab); @@ -1261,12 +2976,13 @@ cleanup_(Module m) deletehookfunc("comp_list_matches", (Hookfn) complistmatches); deletehookfunc("menu_start", (Hookfn) domenuselect); unlinkkeymap("menuselect", 1); + unlinkkeymap("listscroll", 1); return 0; } /**/ int -finish_(Module m) +finish_(UNUSED(Module m)) { return 0; } diff --git a/Src/glob.c b/Src/glob.c index 5334f70fa..4af70053e 100644 --- a/Src/glob.c +++ b/Src/glob.c @@ -2223,11 +2223,12 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr) { char *s = *sp, *t; /* - * Note that ioff and ml count characters in the character + * Note that ioff and uml count characters in the character * set (Meta's are not included), while l counts characters in the - * string. + * metafied string. umlen is a counter for (unmetafied) character + * lengths. */ - int ioff, l = strlen(*sp), ml = ztrlen(*sp), matched = 1; + int ioff, l = strlen(*sp), uml = ztrlen(*sp), matched = 1, umlen; repllist = NULL; @@ -2273,9 +2274,9 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr) * ... now we know whether it's worth looking for the * shortest, which we do by brute force. */ - for (t = s; t < s + mlen; METAINC(t)) { + for (t = s, umlen = 0; t < s + mlen; METAINC(t), umlen++) { set_pat_end(p, *t); - if (pattrylen(p, s, t - s, 0)) { + if (pattrylen(p, s, t - s, umlen, 0)) { mlen = patmatchlen(); break; } @@ -2290,9 +2291,12 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr) /* Smallest possible match at tail of string: * * move back down string until we get a match. * * There's no optimization here. */ - for (ioff = ml, t = s + l; t >= s; t--, ioff--) { + for (ioff = uml, t = s + l, umlen = 0; t >= s; + t--, ioff--, umlen++) { + if (t > s && t[-1] == Meta) + t--; set_pat_start(p, t-s); - if (pattrylen(p, t, -1, ioff)) { + if (pattrylen(p, t, s + l - t, umlen, ioff)) { *sp = get_match_ret(*sp, t - s, l, fl, replstr); return 1; } @@ -2305,9 +2309,10 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr) /* Largest possible match at tail of string: * * move forward along string until we get a match. * * Again there's no optimisation. */ - for (ioff = 0, t = s; t < s + l; ioff++, t++) { + for (ioff = 0, t = s, umlen = uml; t < s + l; + ioff++, METAINC(t), umlen--) { set_pat_start(p, t-s); - if (pattrylen(p, t, -1, ioff)) { + if (pattrylen(p, t, s + l - t, umlen, ioff)) { *sp = get_match_ret(*sp, t-s, l, fl, replstr); return 1; } @@ -2329,19 +2334,22 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr) if (fl & SUB_GLOBAL) repllist = newlinklist(); ioff = 0; /* offset into string */ + umlen = uml; do { /* loop over all matches for global substitution */ matched = 0; - for (; t < s + l; t++, ioff++) { + for (; t < s + l; METAINC(t), ioff++, umlen--) { /* Find the longest match from this position. */ set_pat_start(p, t-s); - if (pattrylen(p, t, -1, ioff)) { + if (pattrylen(p, t, s + l - t, umlen, ioff)) { char *mpos = t + patmatchlen(); if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) { char *ptr; - for (ptr = t; ptr < mpos; METAINC(ptr)) { + int umlen2; + for (ptr = t, umlen2 = 0; ptr < mpos; + METAINC(ptr), umlen2++) { set_pat_end(p, *ptr); - if (pattrylen(p, t, ptr - t, ioff)) { + if (pattrylen(p, t, ptr - t, umlen2, ioff)) { mpos = t + patmatchlen(); break; } @@ -2370,7 +2378,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr) * which is already marked for replacement. */ matched = 1; - for ( ; t < mpos; t++, ioff++) + for ( ; t < mpos; t++, ioff++, umlen--) if (*t == Meta) t++; break; @@ -2397,23 +2405,26 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr) /* Longest/shortest at end, matching substrings. */ if (!(fl & SUB_LONG)) { set_pat_start(p, l); - if (pattrylen(p, s + l, -1, ml) && !--n) { + if (pattrylen(p, s + l, 0, 0, uml) && !--n) { *sp = get_match_ret(*sp, l, l, fl, replstr); return 1; } } - for (ioff = ml - 1, t = s + l - 1; t >= s; t--, ioff--) { + for (ioff = uml - 1, t = s + l - 1, umlen = 1; t >= s; + t--, ioff--, umlen++) { if (t > s && t[-1] == Meta) t--; set_pat_start(p, t-s); - if (pattrylen(p, t, -1, ioff) && !--n) { + if (pattrylen(p, t, s + l - t, umlen, ioff) && !--n) { /* Found the longest match */ char *mpos = t + patmatchlen(); if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) { char *ptr; - for (ptr = t; ptr < mpos; METAINC(ptr)) { + int umlen2; + for (ptr = t, umlen2 = 0; ptr < mpos; + METAINC(ptr), umlen2++) { set_pat_end(p, *ptr); - if (pattrylen(p, t, ptr - t, ioff)) { + if (pattrylen(p, t, ptr - t, umlen2, ioff)) { mpos = t + patmatchlen(); break; } @@ -2424,7 +2435,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr) } } set_pat_start(p, l); - if ((fl & SUB_LONG) && pattrylen(p, s + l, -1, ml) && !--n) { + if ((fl & SUB_LONG) && pattrylen(p, s + l, 0, 0, uml) && !--n) { *sp = get_match_ret(*sp, l, l, fl, replstr); return 1; } diff --git a/Src/pattern.c b/Src/pattern.c index 4cfdc1336..1033c776f 100644 --- a/Src/pattern.c +++ b/Src/pattern.c @@ -47,6 +47,26 @@ * * Eagle-eyed readers will notice this is an altered version. Incredibly * sharp-eyed readers might even find bits that weren't altered. + * + * + * And I experienced a sense that, like certain regular + * expressions, seemed to match the day from beginning to end, so + * that I did not need to identify the parenthesised subexpression + * that told of dawn, nor the group of characters that indicated + * the moment when my grandfather returned home with news of + * Swann's departure for Paris; and the whole length of the month + * of May, as if matched by a closure, fitted into the buffer of my + * life with no sign of overflowing, turning the days, like a + * procession of insects that could consist of this or that + * species, into a random and unstructured repetition of different + * sequences, anchored from the first day of the month to the last + * in the same fashion as the weeks when I knew I would not see + * Gilberte and would search in vain for any occurrences of the + * string in the avenue of hawthorns by Tansonville, without my + * having to delimit explicitly the start or finish of the pattern. + * + * M. Proust, "In Search of Lost Files", + * bk I, "The Walk by Bourne's Place". */ #include "zsh.mdh" @@ -70,7 +90,7 @@ typedef union upat *Upat; #include "pattern.pro" -/* Number of active parenthesised expressions allowed in backreferencing */ +/* Number of active parenthesized expressions allowed in backreferencing */ #define NSUBEXP 9 /* definition number opnd? meaning */ @@ -78,11 +98,13 @@ typedef union upat *Upat; #define P_EXCSYNC 0x01 /* no Test if following exclude already failed */ #define P_EXCEND 0x02 /* no Test if exclude matched orig branch */ #define P_BACK 0x03 /* no Match "", "next" ptr points backward. */ -#define P_EXACTLY 0x04 /* str Match this string. */ +#define P_EXACTLY 0x04 /* lstr Match this string. */ #define P_NOTHING 0x05 /* no Match empty string. */ #define P_ONEHASH 0x06 /* node Match this (simple) thing 0 or more times. */ #define P_TWOHASH 0x07 /* node Match this (simple) thing 1 or more times. */ #define P_GFLAGS 0x08 /* long Match nothing and set globbing flags */ +#define P_ISSTART 0x09 /* no Match start of string. */ +#define P_ISEND 0x0a /* no Match end of string. */ /* numbered so we can test bit 5 for a branch */ #define P_BRANCH 0x20 /* node Match this alternative, or the next... */ #define P_WBRANCH 0x21 /* uc* node P_BRANCH, but match at least 1 char */ @@ -101,10 +123,14 @@ typedef union upat *Upat; /* spaces left for P_OPEN+n,... for backreferences */ #define P_OPEN 0x80 /* no Mark this point in input as start of n. */ #define P_CLOSE 0x90 /* no Analogous to OPEN. */ -/* zl is the range type zrange_t: may be zlong or unsigned long - * char is a single char - * uc* is a pointer to unsigned char, used at run time and initialised +/* + * no no argument + * zr the range type zrange_t: may be zlong or unsigned long + * char a single char + * uc* a pointer to unsigned char, used at run time and initialised * to NULL. + * str null-terminated, metafied string + * lstr length as long then string, not null-terminated, unmetafied. */ /* @@ -117,7 +143,7 @@ typedef union upat *Upat; * * P_ANY, P_ANYOF: the operand is a null terminated * string. Normal characters match as expected. Characters - * in the range Meta+PP_ALPHA..Meta+PP_UNKNWN do the approprate + * in the range Meta+PP_ALPHA..Meta+PP_UNKNWN do the appropriate * Posix range tests. This relies on imeta returning true for these * characters. We treat unknown POSIX ranges as never matching. * PP_RANGE means the next two (possibly metafied) characters form @@ -156,18 +182,19 @@ typedef union upat *Upat; */ #define PP_ALPHA 1 #define PP_ALNUM 2 -#define PP_BLANK 3 -#define PP_CNTRL 4 -#define PP_DIGIT 5 -#define PP_GRAPH 6 -#define PP_LOWER 7 -#define PP_PRINT 8 -#define PP_PUNCT 9 -#define PP_SPACE 10 -#define PP_UPPER 11 -#define PP_XDIGIT 12 -#define PP_UNKWN 13 -#define PP_RANGE 14 +#define PP_ASCII 3 +#define PP_BLANK 4 +#define PP_CNTRL 5 +#define PP_DIGIT 6 +#define PP_GRAPH 7 +#define PP_LOWER 8 +#define PP_PRINT 9 +#define PP_PUNCT 10 +#define PP_SPACE 11 +#define PP_UPPER 12 +#define PP_XDIGIT 13 +#define PP_UNKWN 14 +#define PP_RANGE 15 #define P_OP(p) ((p)->l & 0xff) #define P_NEXT(p) ((p)->l >> 8) @@ -176,15 +203,24 @@ typedef union upat *Upat; #define P_ISEXCLUDE(p) (((p)->l & 0x30) == 0x30) #define P_NOTDOT(p) ((p)->l & 0x40) +/* Specific to lstr type, i.e. P_EXACTLY. */ +#define P_LS_LEN(p) ((p)[1].l) /* can be used as lvalue */ +#define P_LS_STR(p) ((char *)((p) + 2)) + +/* Flags needed when pattern is executed */ #define P_SIMPLE 0x01 /* Simple enough to be #/## operand. */ #define P_HSTART 0x02 /* Starts with # or ##'d pattern. */ #define P_PURESTR 0x04 /* Can be matched with a strcmp */ -/* Next character after one which may be a Meta (x is any char *) */ -#define METANEXT(x) (*(x) == Meta ? (x)+2 : (x)+1) /* * Increment pointer which may be on a Meta (x is a pointer variable), * returning the incremented value (i.e. like pre-increment). + * + * In future the following will need to refer to metafied multibyte + * characters. References to invidual characters are not turned + * into a macro when the characters is metafied (c.f. CHARREF() + * below then the character is not metafied) and will need tracking + * down. */ #define METAINC(x) ((x) += (*(x) == Meta) ? 2 : 1) /* @@ -194,6 +230,7 @@ typedef union upat *Upat; #if defined(ZSH_64_BIT_TYPE) || defined(LONG_IS_64_BIT) typedef zlong zrange_t; +#define ZRANGE_T_IS_SIGNED (1) #else typedef unsigned long zrange_t; #endif @@ -249,13 +286,18 @@ static int patglobflags; /* globbing flags & approx */ /* Add n more characters, ensuring there is enough space. */ +enum { + PA_NOALIGN = 1, + PA_UNMETA = 2 +}; + /**/ static void -patadd(char *add, int ch, long n, int noalgn) +patadd(char *add, int ch, long n, int paflags) { - /* Make sure everything gets aligned unless we get noalgn. */ + /* Make sure everything gets aligned unless we get PA_NOALIGN. */ long newpatsize = patsize + n; - if (!noalgn) + if (!(paflags & PA_NOALIGN)) newpatsize = (newpatsize + sizeof(union upat) - 1) & ~(sizeof(union upat) - 1); if (patalloc < newpatsize) { @@ -267,8 +309,25 @@ patadd(char *add, int ch, long n, int noalgn) } patsize = newpatsize; if (add) { - while (n--) - *patcode++ = *add++; + if (paflags & PA_UNMETA) { + /* + * Unmetafy and untokenize the string as we go. + * The Meta characters in add aren't counted in n. + */ + while (n--) { + if (itok(*add)) + *patcode++ = ztokens[*add++ - Pound]; + else if (*add == Meta) { + add++; + *patcode++ = *add++ ^ 32; + } else { + *patcode++ = *add++; + } + } + } else { + while (n--) + *patcode++ = *add++; + } } else *patcode++ = ch; patcode = patout + patsize; @@ -286,16 +345,28 @@ static long rn_offs; void patcompstart(void) { - patglobflags = 0; + if (isset(CASEGLOB)) + patglobflags = 0; + else + patglobflags = GF_IGNCASE; } -/* Top level pattern compilation subroutine */ +/* + * Top level pattern compilation subroutine + * exp is a null-terminated, metafied string. + * inflags is an or of some PAT_* flags. + * endexp, if non-null, is set to a pointer to the end of the + * part of exp which was compiled. This is used when + * compiling patterns for directories which must be + * matched recursively. + */ /**/ mod_export Patprog patcompile(char *exp, int inflags, char **endexp) { - int flags = 0, len = 0; + int flags = 0; + long len = 0; long startoff; Upat pscan; char *lng, *strp = NULL; @@ -316,7 +387,7 @@ patcompile(char *exp, int inflags, char **endexp) * in struct is actual count of parentheses. */ patnpar = 1; - patflags = inflags & ~PAT_PURES; + patflags = inflags & ~(PAT_PURES|PAT_HAS_EXCLUDP); patendseg = endseg; patendseglen = isset(EXTENDEDGLOB) ? PATENDSEGLEN_EXT : PATENDSEGLEN_NORM; @@ -328,7 +399,7 @@ patcompile(char *exp, int inflags, char **endexp) patendstr++; patendseglen--; patendstrlen--; - remnulargs(exp); + remnulargs(patparse); patglobflags = 0; } /* @@ -338,18 +409,42 @@ patcompile(char *exp, int inflags, char **endexp) if (!(patflags & PAT_ANY)) { /* Look for a really pure string, with no tokens at all. */ - if (!patglobflags) + if (!patglobflags +#ifdef __CYGWIN__ + /* + * If the OS treats files case-insensitively and we + * are looking at files, we don't need to use pattern + * matching to find the file. + */ + || (!(patglobflags & ~GF_IGNCASE) && (patflags & PAT_FILE)) +#endif + ) + { + /* + * Waah! I wish I understood this. + * Empty metafied strings have an initial Nularg. + * This never corresponds to a real character in + * a glob pattern or string, so skip it. + */ + if (*exp == Nularg) + exp++; for (strp = exp; *strp && (!(patflags & PAT_FILE) || *strp != '/') && !itok(*strp); strp++) ; + } if (!strp || (*strp && *strp != '/')) { /* No, do normal compilation. */ strp = NULL; if (patcompswitch(0, &flags) == 0) return NULL; } else { - /* Yes, copy the string and skip compilation altogether */ + /* + * Yes, copy the string, and skip compilation altogether. + * Null terminate for the benefit of globbing. + * Leave metafied both for globbing and for our own + * efficiency. + */ patparse = strp; len = strp - exp; patadd(exp, 0, len + 1, 0); @@ -387,19 +482,52 @@ patcompile(char *exp, int inflags, char **endexp) for (; pscan; pscan = next) { next = PATNEXT(pscan); if (P_OP(pscan) == P_EXACTLY) { - char *opnd = (char *)P_OPERAND(pscan); - while ((*dst = *opnd++)) - dst++; + char *opnd = P_LS_STR(pscan), *mtest; + long oplen = P_LS_LEN(pscan), ilen; + int nmeta = 0; + /* + * Unfortunately we unmetafied the string + * and we need to put any metacharacters + * back now we know it's a pure string. + * This shouldn't happen too often, it's + * just that there are some cases such + * as . and .. in files where we really + * need a pure string even if there are + * pattern characters flying around. + */ + for (mtest = opnd, ilen = oplen; ilen; + mtest++, ilen--) + if (imeta(*mtest)) + nmeta++; + if (nmeta) { + char *oldpatout = patout; + patadd(NULL, 0, nmeta, 0); + /* + * Yuk. + */ + p = (Patprog)patout; + opnd = patout + (opnd - oldpatout); + dst = patout + startoff; + } + + while (oplen--) { + if (imeta(*opnd)) { + *dst++ = Meta; + *dst++ = *opnd ^ 32; + } else { + *dst++ = *opnd++; + } + } } } - *dst++ = '\0'; p->size = dst - patout; - /* patmlen is really strlen, don't include null byte */ - p->patmlen = p->size - startoff - 1; + /* patmlen is really strlen. We don't need a null. */ + p->patmlen = p->size - startoff; } else { /* starting point info */ - if (P_OP(pscan) == P_EXACTLY && !p->globflags) - p->patstartch = *(char *)P_OPERAND(pscan); + if (P_OP(pscan) == P_EXACTLY && !p->globflags && + P_LS_LEN(pscan)) + p->patstartch = *P_LS_STR(pscan); /* * Find the longest literal string in something expensive. * This is itself not all that cheap if we have @@ -410,9 +538,9 @@ patcompile(char *exp, int inflags, char **endexp) len = 0; for (; pscan; pscan = PATNEXT(pscan)) if (P_OP(pscan) == P_EXACTLY && - strlen((char *)P_OPERAND(pscan)) >= len) { - lng = (char *)P_OPERAND(pscan); - len = strlen(lng); + P_LS_LEN(pscan) >= len) { + lng = P_LS_STR(pscan); + len = P_LS_LEN(pscan); } if (lng) { p->mustoff = lng - patout; @@ -427,7 +555,7 @@ patcompile(char *exp, int inflags, char **endexp) * The pattern was compiled in a fixed buffer: unless told otherwise, * we stick the compiled pattern on the heap. This is necessary * for files where we will often be compiling multiple segments at once. - * But if we get the ZDUP flag w always put it in zalloc()ed memory. + * But if we get the ZDUP flag we always put it in zalloc()ed memory. */ if (patflags & PAT_ZDUP) { Patprog newp = (Patprog)zalloc(patsize); @@ -445,7 +573,7 @@ patcompile(char *exp, int inflags, char **endexp) } /* - * Main body or parenthesised subexpression in pattern + * Main body or parenthesized subexpression in pattern * Parenthesis (and any ksh_glob gubbins) will have been removed. */ @@ -486,7 +614,8 @@ patcompswitch(int paren, int *flagp) while (*patparse == Bar || (isset(EXTENDEDGLOB) && *patparse == Tilde && - !memchr(patendseg, patparse[1], patendseglen))) { + (patparse[1] == '/' || + !memchr(patendseg, patparse[1], patendseglen)))) { int tilde = *patparse++ == Tilde; long gfnode = 0, newbr; @@ -507,13 +636,20 @@ patcompswitch(int paren, int *flagp) * we need to do this here as otherwise the code compiling * the exclusion doesn't know if the flags have really * changed if the error count gets restored. - * - * At top level (paren == 0) in a file glob !(patflags &PAT_FILET) - * do the exclusion prepending the file path so far, else don't. */ patglobflags &= ~0xff; - br = patnode(!(patflags & PAT_FILET) || paren ? - P_EXCLUDE : P_EXCLUDP); + if (!(patflags & PAT_FILET) || paren) { + br = patnode(P_EXCLUDE); + } else { + /* + * At top level (paren == 0) in a file glob !(patflags + * &PAT_FILET) do the exclusion prepending the file path + * so far. We need to flag this to avoid unnecessarily + * copying the path. + */ + br = patnode(P_EXCLUDP); + patflags |= PAT_HAS_EXCLUDP; + } up.p = NULL; patadd((char *)&up, 0, sizeof(up), 0); /* / is not treated as special if we are at top level */ @@ -627,14 +763,14 @@ patcompswitch(int paren, int *flagp) static long patcompbranch(int *flagp) { - long chain, latest, starter; - int flags; + long chain, latest = 0, starter; + int flags = 0; *flagp = P_PURESTR; starter = chain = 0; while (!memchr(patendseg, *patparse, patendseglen) || - (*patparse == Tilde && + (*patparse == Tilde && patparse[1] != '/' && memchr(patendseg, patparse[1], patendseglen))) { if (isset(EXTENDEDGLOB) && ((!isset(SHGLOB) && @@ -643,36 +779,51 @@ patcompbranch(int *flagp) patparse[2] == Pound))) { /* Globbing flags. */ char *pp1 = patparse; - int oldglobflags = patglobflags; + int oldglobflags = patglobflags, ignore; + long assert; patparse += (*patparse == '@') ? 3 : 2; - if (!patgetglobflags(&patparse)) - return 0; - if (pp1 == patstart) { - /* Right at start of pattern, the simplest case. - * Put them into the flags and don't emit anything. - */ - ((Patprog)patout)->globflags = patglobflags; - continue; - } else if (!*patparse) { - /* Right at the end, so just leave the flags for - * the next Patprog in the chain to pick up. - */ + if (!patgetglobflags(&patparse, &assert, &ignore)) + return 0; + if (!ignore) { + if (assert) { + /* + * Start/end assertion looking like flags, but + * actually handled as a normal node + */ + latest = patnode(assert); + flags = 0; + } else { + if (pp1 == patstart) { + /* Right at start of pattern, the simplest case. + * Put them into the flags and don't emit anything. + */ + ((Patprog)patout)->globflags = patglobflags; + continue; + } else if (!*patparse) { + /* Right at the end, so just leave the flags for + * the next Patprog in the chain to pick up. + */ + break; + } + /* + * Otherwise, we have to stick them in as a pattern + * matching nothing. + */ + if (oldglobflags != patglobflags) { + /* Flags changed */ + union upat up; + latest = patnode(P_GFLAGS); + up.l = patglobflags; + patadd((char *)&up, 0, sizeof(union upat), 0); + } else { + /* No effect. */ + continue; + } + } + } else if (!*patparse) break; - } - /* - * Otherwise, we have to stick them in as a pattern - * matching nothing. - */ - if (oldglobflags != patglobflags) { - /* Flags changed */ - union upat up; - latest = patnode(P_GFLAGS); - up.l = patglobflags; - patadd((char *)&up, 0, sizeof(union upat), 0); - } else { - /* No effect. */ + else continue; - } } else if (isset(EXTENDEDGLOB) && *patparse == Hat) { /* * ^pat: anything but pat. For proper backtracking, @@ -706,68 +857,90 @@ patcompbranch(int *flagp) /**/ int -patgetglobflags(char **strp) +patgetglobflags(char **strp, long *assertp, int *ignore) { char *nptr, *ptr = *strp; zlong ret; + + *assertp = 0; + *ignore = 1; /* (#X): assumes we are still positioned on the first X */ for (; *ptr && *ptr != Outpar; ptr++) { - switch (*ptr) { - case 'a': - /* Approximate matching, max no. of errors follows */ - ret = zstrtol(++ptr, &nptr, 10); - /* - * We can't have more than 254, because we need 255 to - * mark 254 errors in wbranch and exclude sync strings - * (hypothetically --- hope no-one tries it). - */ - if (ret < 0 || ret > 254 || ptr == nptr) - return 0; - patglobflags = (patglobflags & ~0xff) | (ret & 0xff); - ptr = nptr-1; + if (*ptr == 'q') { + /* Glob qualifiers, ignored in pattern code */ + while (*ptr && *ptr != Outpar) + ptr++; break; + } else { + *ignore = 0; + switch (*ptr) { + case 'a': + /* Approximate matching, max no. of errors follows */ + ret = zstrtol(++ptr, &nptr, 10); + /* + * We can't have more than 254, because we need 255 to + * mark 254 errors in wbranch and exclude sync strings + * (hypothetically --- hope no-one tries it). + */ + if (ret < 0 || ret > 254 || ptr == nptr) + return 0; + patglobflags = (patglobflags & ~0xff) | (ret & 0xff); + ptr = nptr-1; + break; - case 'l': - /* Lowercase in pattern matches lower or upper in target */ - patglobflags = (patglobflags & ~GF_IGNCASE) | GF_LCMATCHUC; - break; + case 'l': + /* Lowercase in pattern matches lower or upper in target */ + patglobflags = (patglobflags & ~GF_IGNCASE) | GF_LCMATCHUC; + break; - case 'i': - /* Fully case insensitive */ - patglobflags = (patglobflags & ~GF_LCMATCHUC) | GF_IGNCASE; - break; + case 'i': + /* Fully case insensitive */ + patglobflags = (patglobflags & ~GF_LCMATCHUC) | GF_IGNCASE; + break; - case 'I': - /* Restore case sensitivity */ - patglobflags &= ~(GF_LCMATCHUC|GF_IGNCASE); - break; + case 'I': + /* Restore case sensitivity */ + patglobflags &= ~(GF_LCMATCHUC|GF_IGNCASE); + break; - case 'b': - /* Make backreferences */ - patglobflags |= GF_BACKREF; - break; + case 'b': + /* Make backreferences */ + patglobflags |= GF_BACKREF; + break; - case 'B': - /* Don't make backreferences */ - patglobflags &= ~GF_BACKREF; - break; + case 'B': + /* Don't make backreferences */ + patglobflags &= ~GF_BACKREF; + break; - case 'm': - /* Make references to complete match */ - patglobflags |= GF_MATCHREF; - break; + case 'm': + /* Make references to complete match */ + patglobflags |= GF_MATCHREF; + break; - case 'M': - /* Don't */ - patglobflags &= ~GF_MATCHREF; - break; + case 'M': + /* Don't */ + patglobflags &= ~GF_MATCHREF; + break; - default: - return 0; + case 's': + *assertp = P_ISSTART; + break; + + case 'e': + *assertp = P_ISEND; + break; + + default: + return 0; + } } } if (*ptr != Outpar) return 0; + /* Start/end assertions must appear on their own. */ + if (*assertp && (*strp)[1] != Outpar) + return 0; *strp = ptr + 1; return 1; } @@ -781,10 +954,10 @@ patgetglobflags(char **strp) static long patcomppiece(int *flagp) { - long starter, next, pound, op; - int flags, flags2, kshchar, len, ch, patch; + long starter = 0, next, pound, op; + int flags, flags2, kshchar, len, ch, patch, nmeta; union upat up; - char *nptr, *str0, cbuf[2]; + char *nptr, *str0, *ptr, cbuf[2]; zrange_t from, to; flags = 0; @@ -792,7 +965,7 @@ patcomppiece(int *flagp) for (;;) { /* * Check if we have a string. First, we need to make sure - * the string doesn't introduce a ksh-like parenthesised expression. + * the string doesn't introduce a ksh-like parenthesized expression. */ kshchar = '\0'; if (isset(KSHGLOB) && *patparse && patparse[1] == Inpar) { @@ -811,6 +984,7 @@ patcomppiece(int *flagp) */ if (kshchar || (memchr(patendstr, *patparse, patendstrlen) && (*patparse != Tilde || + patparse[1] == '/' || !memchr(patendseg, patparse[1], patendseglen)))) break; @@ -818,6 +992,9 @@ patcomppiece(int *flagp) } if (patparse > str0) { + long slen = patparse - str0; + int morelen; + /* Ordinary string: cancel kshchar lookahead */ kshchar = '\0'; /* @@ -826,25 +1003,43 @@ patcomppiece(int *flagp) flags |= P_PURESTR; DPUTS(patparse == str0, "BUG: matched nothing in patcomppiece."); /* more than one character matched? */ - len = str0 + (*str0 == Meta ? 2 : 1) < patparse; + morelen = str0 + (*str0 == Meta ? 2 : 1) < patparse; /* * If we have more than one character, a following hash only * applies to the last, so decrement. */ - if (isset(EXTENDEDGLOB) && *patparse == Pound && len) + if (isset(EXTENDEDGLOB) && *patparse == Pound && morelen) patparse -= (patparse > str0 + 1 && patparse[-2] == Meta) ? 2 : 1; /* * If len is 1, we can't have an active # following, so doesn't * matter that we don't make X in `XX#' simple. */ - if (!len) + if (!morelen) flags |= P_SIMPLE; starter = patnode(P_EXACTLY); - /* add enough space including null byte */ - len = patparse - str0; - patadd(str0, 0, len + 1, 0); - nptr = (char *)P_OPERAND((Upat)patout + starter); - nptr[len] = '\0'; + + /* Get length of string without metafication. */ + nmeta = 0; + /* inherited from domatch, but why, exactly? */ + if (*str0 == Nularg) + str0++; + for (ptr = str0; ptr < patparse; ptr++) { + if (*ptr == Meta) { + nmeta++; + ptr++; + } + } + slen = (patparse - str0) - nmeta; + /* First add length, which is a long */ + patadd((char *)&slen, 0, sizeof(long), 0); + /* + * Then the string, not null terminated. + * Unmetafy and untokenize; pass the final length, + * which is what we need to allocate, i.e. not including + * a count for each Meta in the string. + */ + patadd(str0, 0, slen, PA_UNMETA); + nptr = P_LS_STR((Upat)patout + starter); /* * It's much simpler to turn off pure string mode for * any case-insensitive or approximate matching; usually, @@ -855,13 +1050,13 @@ patcomppiece(int *flagp) * ..(#a1).. (i.e. the (#a1) has no effect), but if you're * going to write funny patterns, you get no sympathy from me. */ - if (patglobflags && - (!(patflags & PAT_FILE) || (strcmp(nptr, ".") && - strcmp(nptr, "..")))) - flags &= ~P_PURESTR; - for (; *nptr; METAINC(nptr)) - if (itok(*nptr)) - *nptr = ztokens[*nptr - Pound]; + if (patglobflags) { + if (!(patflags & PAT_FILE)) + flags &= ~P_PURESTR; + else if (!(nptr[0] == '.' && + (slen == 1 || (nptr[1] == '.' && slen == 2)))) + flags &= ~P_PURESTR; + } } else { if (kshchar) patparse++; @@ -887,7 +1082,7 @@ patcomppiece(int *flagp) starter = patnode(P_ANYOF); if (*patparse == Outbrack) { patparse++; - patadd(NULL, ']', 1, 1); + patadd(NULL, ']', 1, PA_NOALIGN); } while (*patparse && *patparse != Outbrack) { /* Meta is not a token */ @@ -901,6 +1096,8 @@ patcomppiece(int *flagp) ch = PP_ALPHA; else if (!strncmp(patparse, "alnum", len)) ch = PP_ALNUM; + else if (!strncmp(patparse, "ascii", len)) + ch = PP_ASCII; else if (!strncmp(patparse, "blank", len)) ch = PP_BLANK; else if (!strncmp(patparse, "cntrl", len)) @@ -925,7 +1122,7 @@ patcomppiece(int *flagp) ch = PP_UNKWN; patparse = nptr + 2; if (ch != PP_UNKWN) - patadd(NULL, STOUC(Meta+ch), 1, 1); + patadd(NULL, STOUC(Meta+ch), 1, PA_NOALIGN); continue; } if (itok(*patparse)) { @@ -938,15 +1135,17 @@ patcomppiece(int *flagp) patparse++; if (*patparse == '-' && patparse[1] != Outbrack) { - patadd(NULL, STOUC(Meta+PP_RANGE), 1, 1); - patadd(cbuf, 0, (cbuf[0] == Meta) ? 2 : 1, 1); + patadd(NULL, STOUC(Meta+PP_RANGE), 1, PA_NOALIGN); + patadd(cbuf, 0, (cbuf[0] == Meta) ? 2 : 1, PA_NOALIGN); if (itok(*++patparse)) { - patadd(0, STOUC(ztokens[*patparse - Pound]), 1, 1); + patadd(0, STOUC(ztokens[*patparse - Pound]), 1, + PA_NOALIGN); } else - patadd(patparse, 0, (*patparse == Meta) ? 2 : 1, 1); + patadd(patparse, 0, (*patparse == Meta) ? 2 : 1, + PA_NOALIGN); METAINC(patparse); } else - patadd(cbuf, 0, (cbuf[0] == Meta) ? 2 : 1, 1); + patadd(cbuf, 0, (cbuf[0] == Meta) ? 2 : 1, PA_NOALIGN); } if (*patparse != Outbrack) return 0; @@ -987,19 +1186,17 @@ patcomppiece(int *flagp) patparse = nptr; len |= 1; } - if (*patparse == '-') { - patparse++; - if (idigit(*patparse)) { - to = (zrange_t) zstrtol((char *)patparse, - (char **)&nptr, 10); - patparse = nptr; - len |= 2; - } + DPUTS(*patparse != '-', "BUG: - missing from numeric glob"); + patparse++; + if (idigit(*patparse)) { + to = (zrange_t) zstrtol((char *)patparse, + (char **)&nptr, 10); + patparse = nptr; + len |= 2; } if (*patparse != Outang) return 0; patparse++; - starter = 0; /* shut compiler up */ switch(len) { case 3: starter = patnode(P_NUMRNG); @@ -1078,13 +1275,19 @@ patcomppiece(int *flagp) * each time we fail on a non-empty branch, we try the empty branch, * which is equivalent to backtracking. */ - if ((flags & P_SIMPLE) && op == P_ONEHASH && + if ((flags & P_SIMPLE) && (op == P_ONEHASH || op == P_TWOHASH) && P_OP((Upat)patout+starter) == P_ANY) { /* Optimize ?# to *. Silly thing to do, since who would use * use ?# ? But it makes the later code shorter. */ Upat uptr = (Upat)patout + starter; - uptr->l = (uptr->l & ~0xff) | P_STAR; + if (op == P_TWOHASH) { + /* ?## becomes ?* */ + uptr->l = (uptr->l & ~0xff) | P_ANY; + pattail(starter, patnode(P_STAR)); + } else { + uptr->l = (uptr->l & ~0xff) | P_STAR; + } } else if ((flags & P_SIMPLE) && op && !(patglobflags & 0xff)) { /* Don't simplify if we need to look for approximations. */ patinsert(op, starter, NULL, 0); @@ -1239,21 +1442,12 @@ static void patoptail(long p, long val) * Run a pattern. */ static char *patinstart; /* Start of input string */ - -/**/ -char *patinput; /* String input pointer */ - -/* Length of input string, plus null byte, if needed */ -static int patinlen; - -/* - * Offset of string at which we are trying to match. - * This is added in to the positions recorded in patbeginp and patendp - * when we are looking for substrings. Currently this only happens - * in the parameter substitution code. - */ -/**/ -int patoffset; +static char *patinend; /* End of input string */ +static char *patinput; /* String input pointer */ +static char *patinpath; /* Full path for use with ~ exclusions */ +static int patinlen; /* Length of last successful match. + * Includes count of Meta characters. + */ static char *patbeginp[NSUBEXP]; /* Pointer to backref beginnings */ static char *patendp[NSUBEXP]; /* Pointer to backref ends */ @@ -1261,9 +1455,24 @@ static int parsfound; /* parentheses (with backrefs) found */ static int globdots; /* Glob initial dots? */ +/* + * Macros which are currently trivial but are likely to be less + * so when we handle multibyte characters. They operate on + * umetafied strings. + */ + +/* Get a character from the start point in a string */ +#define CHARREF(x) (STOUC(*x)) +/* Get a pointer to the next character */ +#define CHARNEXT(x) (x+1) +/* Increment a pointer past the current character. */ +#define CHARINC(x) (x++) +/* Counter the number of characters between two pointers, largest first */ +#define CHARSUB(x,y) (x-y) + /* * The following need to be accessed in the globbing scanner for - * a multi-component file path. See horror story there. + * a multi-component file path. See horror story in glob.c. */ /**/ int errsfound; /* Total error count so far */ @@ -1279,23 +1488,63 @@ pattrystart(void) errsfound = 0; } +/* + * Test prog against null-terminated, metafied string. + */ + /**/ mod_export int pattry(Patprog prog, char *string) { - return pattryrefs(prog, string, NULL, NULL, NULL); + return pattryrefs(prog, string, -1, -1, 0, NULL, NULL, NULL); +} + +/* + * Test prog against string of given length, no null termination + * but still metafied at this point. offset gives an offset + * to include in reported match indices + */ + +/**/ +mod_export int +pattrylen(Patprog prog, char *string, int len, int unmetalen, int offset) +{ + return pattryrefs(prog, string, len, unmetalen, offset, NULL, NULL, NULL); } -/* The last three arguments are used to report the positions for the +/* + * Test prog against string with given lengths. The input + * string is metafied; stringlen is the raw string length, and + * unmetalen the number of characters in the original string (some + * of which may now be metafied). Either value may be -1 + * to indicate a null-terminated string which will be counted. Note + * there may be a severe penalty for this if a lot of matching is done + * on one string. + * + * offset is the position in the original string (not seen by + * the pattern module) at which we are trying to match. + * This is added in to the positions recorded in patbeginp and patendp + * when we are looking for substrings. Currently this only happens + * in the parameter substitution code. + * + * Note this is a character offset, i.e. a metafied character + * counts as 1. + * + * The last three arguments are used to report the positions for the * backreferences. On entry, *nump should contain the maximum number - * positions to report. */ + * of positions to report. In this case the match, mbegin, mend + * arrays are not altered. + */ /**/ mod_export int -pattryrefs(Patprog prog, char *string, int *nump, int *begp, int *endp) +pattryrefs(Patprog prog, char *string, int stringlen, int unmetalen, + int patoffset, + int *nump, int *begp, int *endp) { - int i, maxnpos = 0; - char **sp, **ep; + int i, maxnpos = 0, ret, needfullpath, unmetalenp; + int origlen; + char **sp, **ep, *tryalloced, *ptr; char *progstr = (char *)prog + prog->startoff; if (nump) { @@ -1306,31 +1555,183 @@ pattryrefs(Patprog prog, char *string, int *nump, int *begp, int *endp) if (*string == Nularg) string++; - patinstart = patinput = string; + if (stringlen < 0) + stringlen = strlen(string); + origlen = stringlen; + + patflags = prog->flags; + /* + * For a top-level ~-exclusion, we will need the full + * path to exclude, so copy the path so far and append the + * current test string. + */ + needfullpath = (patflags & PAT_HAS_EXCLUDP) && pathpos; + + /* Get the length of the full string when unmetafied. */ + if (unmetalen < 0) + unmetalen = ztrsub(string + stringlen, string); + if (needfullpath) + unmetalenp = ztrsub(pathbuf + pathpos, pathbuf); + else + unmetalenp = 0; + + DPUTS(needfullpath && (patflags & (PAT_PURES|PAT_ANY)), + "rum sort of file exclusion"); + /* + * Partly for efficiency, and partly for the convenience of + * globbing, we don't unmetafy pure string patterns, and + * there's no reason to if the pattern is just a *. + */ + if (!(patflags & (PAT_PURES|PAT_ANY)) + && (needfullpath || unmetalen != stringlen)) { + /* + * We need to copy if we need to prepend the path so far + * (in which case we copy both chunks), or if we have + * Meta characters. + */ + char *dst; + int icopy, ncopy; + + dst = tryalloced = zalloc(unmetalen + unmetalenp); + + if (needfullpath) { + /* loop twice, copy path buffer first time */ + ptr = pathbuf; + ncopy = unmetalenp; + } else { + /* just loop once, copy string with unmetafication */ + ptr = string; + ncopy = unmetalen; + } + for (icopy = 0; icopy < 2; icopy++) { + for (i = 0; i < ncopy; i++) { + if (*ptr == Meta) { + ptr++; + *dst++ = *ptr++ ^ 32; + } else { + *dst++ = *ptr++; + } + } + if (!needfullpath) + break; + /* next time append test string to path so far */ + ptr = string; + ncopy = unmetalen; + } + + if (needfullpath) { + patinstart = tryalloced + unmetalenp; + patinpath = tryalloced; + } else { + patinstart = tryalloced; + patinpath = NULL; + } + stringlen = unmetalen; + } else { + patinstart = string; + tryalloced = patinpath = NULL; + } + + patinend = patinstart + stringlen; + /* + * From now on we do not require NULL termination of + * the test string. There should also be no more references + * to the variable string. + */ if (prog->flags & (PAT_PURES|PAT_ANY)) { - if ((prog->flags & PAT_ANY) || - ((prog->flags & PAT_NOANCH) ? - !strncmp(progstr, string, prog->patmlen) - : !strcmp(progstr, string))) { - /* in case used for ${..#..} etc. */ - patinput = string + prog->patmlen; - /* if matching files, must update globbing flags */ - patglobflags = prog->globend; - return 1; - } else - return 0; + /* + * Either we are testing against a pure string, + * or we can match anything at all. + */ + int ret; + if (prog->flags & PAT_ANY) { + /* + * Optimisation for a single "*": always matches + * (except for no_glob_dots, see below). + */ + ret = 1; + } else { + /* + * Testing a pure string. See if initial + * components match. + */ + int lendiff = stringlen - prog->patmlen; + if (lendiff < 0) { + /* No, the pattern string is too long. */ + ret = 0; + } else if (!memcmp(progstr, patinstart, prog->patmlen)) { + /* + * Initial component matches. Matches either + * if lengths are the same or we are not anchored + * to the end of the string. + */ + ret = !lendiff || (prog->flags & PAT_NOANCH); + } else { + /* No match. */ + ret = 0; + } + } + if (ret) { + /* + * For files, we won't match initial "."s unless + * glob_dots is set. + */ + if ((prog->flags & PAT_NOGLD) && *patinstart == '.') { + ret = 0; + } else { + /* + * Remember the length in case used for ${..#..} etc. + * In this case, we didn't unmetafy the string. + */ + patinlen = (int)prog->patmlen; + /* if matching files, must update globbing flags */ + patglobflags = prog->globend; + } + } + + if (tryalloced) + zfree(tryalloced, unmetalen + unmetalenp); + + return ret; } else { /* * Test for a `must match' string, unless we're scanning for a match * in which case we don't need to do this each time. */ - if (!(prog->flags & PAT_SCAN) && prog->mustoff && - !strstr(string, (char *)prog + prog->mustoff)) + ret = 1; + if (!(prog->flags & PAT_SCAN) && prog->mustoff) + { + char *testptr; /* start pointer into test string */ + char *teststop; /* last point from which we can match */ + char *patptr = (char *)prog + prog->mustoff; + int patlen = prog->patmlen; + int found = 0; + + if (patlen > stringlen) { + /* Too long, can't match. */ + ret = 0; + } else { + teststop = patinend - patlen; + + for (testptr = patinstart; testptr <= teststop; testptr++) + { + if (!memcmp(testptr, patptr, patlen)) { + found = 1; + break; + } + } + + if (!found) + ret = 0; + } + } + if (!ret) { + if (tryalloced) + zfree(tryalloced, unmetalen + unmetalenp); return 0; + } - patinlen = 0; /* don't calculate length unless needed */ - patflags = prog->flags; patglobflags = prog->globflags; if (!(patflags & PAT_FILE)) { forceerrs = -1; @@ -1339,12 +1740,31 @@ pattryrefs(Patprog prog, char *string, int *nump, int *begp, int *endp) globdots = !(patflags & PAT_NOGLD); parsfound = 0; + patinput = patinstart; + if (patmatch((Upat)progstr)) { /* * we were lazy and didn't save the globflags if an exclusion * failed, so set it now */ patglobflags = prog->globend; + + /* + * Record length of successful match, including Meta + * characters. Do it here so that patmatchlen() can return + * it even if we delete the pattern strings. + */ + patinlen = patinput - patinstart; + /* + * Optimization: if we didn't find any Meta characters + * to begin with, we don't need to look for them now. + */ + if (unmetalen != origlen) { + for (ptr = patinstart; ptr < patinput; ptr++) + if (imeta(*ptr)) + patinlen++; + } + /* * Should we clear backreferences and matches on a failed * match? @@ -1353,15 +1773,18 @@ pattryrefs(Patprog prog, char *string, int *nump, int *begp, int *endp) /* * m flag: for global match. This carries no overhead * in the pattern matching part. + * + * Remember the test pattern is already unmetafied. */ char *str; - int mlen = ztrsub(patinput, patinstart); + int mlen = CHARSUB(patinput, patinstart); - str = ztrduppfx(patinstart, patinput - patinstart); + str = metafy(patinstart, patinput - patinstart, META_DUP); setsparam("MATCH", str); setiparam("MBEGIN", (zlong)(patoffset + !isset(KSHARRAYS))); setiparam("MEND", - (zlong)(mlen + patoffset + !isset(KSHARRAYS) - 1)); + (zlong)(mlen + patoffset + + !isset(KSHARRAYS) - 1)); } if (prog->patnpar && nump) { /* @@ -1376,9 +1799,10 @@ pattryrefs(Patprog prog, char *string, int *nump, int *begp, int *endp) for (i = 0; i < prog->patnpar && i < maxnpos; i++) { if (parsfound & (1 << i)) { if (begp) - *begp++ = ztrsub(*sp, patinstart) + patoffset; + *begp++ = CHARSUB(*sp, patinstart) + patoffset; if (endp) - *endp++ = ztrsub(*ep, patinstart) + patoffset - 1; + *endp++ = CHARSUB(*ep, patinstart) + patoffset + - 1; } else { if (begp) *begp++ = -1; @@ -1397,16 +1821,16 @@ pattryrefs(Patprog prog, char *string, int *nump, int *begp, int *endp) char **matcharr, **mbeginarr, **mendarr; char numbuf[DIGBUFSIZE]; - matcharr = zcalloc(palen*sizeof(char *)); - mbeginarr = zcalloc(palen*sizeof(char *)); - mendarr = zcalloc(palen*sizeof(char *)); + matcharr = zshcalloc(palen*sizeof(char *)); + mbeginarr = zshcalloc(palen*sizeof(char *)); + mendarr = zshcalloc(palen*sizeof(char *)); sp = patbeginp; ep = patendp; for (i = 0; i < prog->patnpar; i++) { if (parsfound & (1 << i)) { - matcharr[i] = ztrduppfx(*sp, *ep - *sp); + matcharr[i] = metafy(*sp, *ep - *sp, META_DUP); /* * mbegin and mend give indexes into the string * in the standard notation, i.e. respecting @@ -1417,12 +1841,12 @@ pattryrefs(Patprog prog, char *string, int *nump, int *begp, int *endp) * corresponds to indexing as ${foo[1,1]}. */ sprintf(numbuf, "%ld", - (long)(ztrsub(*sp, patinstart) + + (long)(CHARSUB(*sp, patinstart) + patoffset + !isset(KSHARRAYS))); mbeginarr[i] = ztrdup(numbuf); sprintf(numbuf, "%ld", - (long)(ztrsub(*ep, patinstart) + + (long)(CHARSUB(*ep, patinstart) + patoffset + !isset(KSHARRAYS) - 1)); mendarr[i] = ztrdup(numbuf); @@ -1442,12 +1866,31 @@ pattryrefs(Patprog prog, char *string, int *nump, int *begp, int *endp) setaparam("mbegin", mbeginarr); setaparam("mend", mendarr); } - return 1; + + ret = 1; } else - return 0; + ret = 0; + + if (tryalloced) + zfree(tryalloced, unmetalen + unmetalenp); + + return ret; } } +/* + * Return length of previous succesful match. This is + * in metafied bytes, i.e. includes a count of Meta characters. + * Unusual and futile attempt at modular encapsulation. + */ + +/**/ +int +patmatchlen(void) +{ + return patinlen; +} + /* * Match literal characters with case insensitivity test: the first * comes from the input string, the second the current pattern. @@ -1458,13 +1901,22 @@ pattryrefs(Patprog prog, char *string, int *nump, int *begp, int *endp) (isupper(chpa) ? tolower(chpa) : chpa)) : \ (patglobflags & GF_LCMATCHUC) ? \ (islower(chpa) && toupper(chpa) == chin) : 0)) +/* + * The same but caching an expression from the first argument, + * Requires local charmatch_cache definition. + */ +#define CHARMATCH_EXPR(expr, chpa) \ + (charmatch_cache = (expr), CHARMATCH(charmatch_cache, chpa)) /* * exactpos is used to remember how far down an exact string we have * matched, if we are doing approximation and can therefore redo from * the same point; we never need to otherwise. + * + * exactend is a pointer to the end of the string, which isn't + * null-terminated. */ -static char *exactpos; +static char *exactpos, *exactend; /* * Main matching routine. @@ -1479,7 +1931,7 @@ patmatch(Upat prog) { /* Current and next nodes */ Upat scan = prog, next, opnd; - char *start, *save, *chrop; + char *start, *save, *chrop, *chrend, *compend; int savglobflags, op, no, min, nextch, fail = 0, saverrsfound; zrange_t from, to, comp; @@ -1487,60 +1939,67 @@ patmatch(Upat prog) next = PATNEXT(scan); if (!globdots && P_NOTDOT(scan) && patinput == patinstart && - *patinput == '.') + patinput < patinend && *patinput == '.') return 0; switch (P_OP(scan)) { case P_ANY: - if (!*patinput) + if (patinput == patinend) fail = 1; else - METAINC(patinput); + CHARINC(patinput); break; case P_EXACTLY: /* * acts as nothing if *chrop is null: this is used by * approx code. */ - chrop = exactpos ? exactpos : (char *)P_OPERAND(scan); + if (exactpos) { + chrop = exactpos; + chrend = exactend; + } else { + chrop = P_LS_STR(scan); + chrend = chrop + P_LS_LEN(scan); + } exactpos = NULL; - while (*chrop && *patinput) { - int chin = STOUC(UNMETA(patinput)); - int chpa = STOUC(UNMETA(chrop)); + while (chrop < chrend && patinput < patinend) { + int chin = CHARREF(patinput); + int chpa = CHARREF(chrop); if (!CHARMATCH(chin, chpa)) { fail = 1; break; } - METAINC(chrop); - METAINC(patinput); + CHARINC(chrop); + CHARINC(patinput); } - if (*chrop) { + if (chrop < chrend) { exactpos = chrop; + exactend = chrend; fail = 1; } break; case P_ANYOF: - if (!*patinput || + if (patinput == patinend || !patmatchrange((char *)P_OPERAND(scan), - STOUC(UNMETA(patinput)))) + CHARREF(patinput))) fail = 1; else - METAINC(patinput); + CHARINC(patinput); break; case P_ANYBUT: - if (!*patinput || + if (patinput == patinend || patmatchrange((char *)P_OPERAND(scan), - STOUC(UNMETA(patinput)))) + CHARREF(patinput))) fail = 1; else - METAINC(patinput); + CHARINC(patinput); break; case P_NUMRNG: case P_NUMFROM: case P_NUMTO: /* * To do this properly, we really have to treat numbers as - * closures: that's so things like like <1-1000>33 will + * closures: that's so things like <1-1000>33 will * match 633 (they didn't up to 3.1.6). To avoid making this * too inefficient, we see if there's an exact match next: * if there is, and it's not a digit, we return 1 after @@ -1565,24 +2024,62 @@ patmatch(Upat prog) to = *((zrange_t *) start); #endif } - start = patinput; - comp = (zrange_t) zstrtol(patinput, &save, 10); - patinput = save; + start = compend = patinput; + comp = 0; + while (patinput < patinend && idigit(*patinput)) { + if (comp) + comp *= 10; + comp += *patinput - '0'; + patinput++; + compend++; + + if (comp & ((zrange_t)1 << (sizeof(comp)*8 - +#ifdef ZRANGE_T_IS_SIGNED + 2 +#else + 1 +#endif + ))) { + /* + * Out of range (allowing for signedness, which + * we need if we are using zlongs). + * This is as far as we can go. + * If we're doing a range "from", skip all the + * remaining numbers. Otherwise, we can't + * match beyond the previous point anyway. + * Leave the pointer to the last calculated + * position (compend) where it was before. + */ + if (op == P_NUMFROM) { + while (patinput < patinend && idigit(*patinput)) + patinput++; + } + } + } + save = patinput; no = 0; while (patinput > start) { /* if already too small, no power on earth can save it */ - if (comp < from) + if (comp < from && patinput <= compend) break; if ((op == P_NUMFROM || comp <= to) && patmatch(next)) { return 1; } if (!no && P_OP(next) == P_EXACTLY && - !idigit(STOUC(*(char *)P_OPERAND(next))) && + (!P_LS_LEN(next) || + !idigit(STOUC(*P_LS_STR(next)))) && !(patglobflags & 0xff)) return 0; patinput = --save; no++; - comp /= 10; + /* + * With a range start and an unrepresentable test + * number, we just back down the test string without + * changing the number until we get to a representable + * one. + */ + if (patinput < compend) + comp /= 10; } patinput = start; fail = 1; @@ -1590,7 +2087,7 @@ patmatch(Upat prog) case P_NUMANY: /* This is <->: any old set of digits, don't bother comparing */ start = patinput; - while (idigit(STOUC(*patinput))) + while (patinput < patinend && idigit(CHARREF(patinput))) patinput++; save = patinput; no = 0; @@ -1598,7 +2095,8 @@ patmatch(Upat prog) if (patmatch(next)) return 1; if (!no && P_OP(next) == P_EXACTLY && - !idigit(STOUC(*(char *)P_OPERAND(next))) && + (!P_LS_LEN(next) || + !idigit(CHARREF(P_LS_STR(next)))) && !(patglobflags & 0xff)) return 0; patinput = --save; @@ -1698,7 +2196,7 @@ patmatch(Upat prog) * to the end of the asserted pattern; the endpoint * in the target string is nulled out. */ - if (!(fail = (*patinput != '\0'))) + if (!(fail = (patinput < patinend))) return 1; break; case P_BRANCH: @@ -1718,7 +2216,8 @@ patmatch(Upat prog) /* * The strategy is to test the asserted pattern, * recording via P_EXCSYNC how far the part to - * be excluded matched. We then null out that + * be excluded matched. We then set the + * length of the test string to that * point and see if the exclusion as far as * P_EXCEND also matches that string. * We need to keep testing the asserted pattern @@ -1731,8 +2230,16 @@ patmatch(Upat prog) * over, that doesn't matter: we should fail anyway. * The pointer also tells us where the asserted * pattern matched for use by the exclusion. + * + * It's hard to allocate space for this + * beforehand since we may need to do it + * recursively. + * + * P.S. in case you were wondering, this code + * is horrible. */ Upat syncstrp; + char *origpatinend; unsigned char *oldsyncstr; char *matchpt = NULL; int ret, savglobdots, matchederrs = 0; @@ -1748,13 +2255,14 @@ patmatch(Upat prog) * of view, so we use a different sync string. */ oldsyncstr = syncstrp->p; - if (!patinlen) - patinlen = strlen(patinstart)+1; - syncstrp->p = (unsigned char *)zcalloc(patinlen); + syncstrp->p = (unsigned char *) + zshcalloc((patinend - patinstart) + 1); + origpatinend = patinend; while ((ret = patmatch(P_OPERAND(scan)))) { unsigned char *syncpt; - char savchar, *testptr; + char *savpatinstart; int savforce = forceerrs; + int savpatflags = patflags, synclen; forceerrs = -1; savglobdots = globdots; matchederrs = errsfound; @@ -1770,13 +2278,25 @@ patmatch(Upat prog) */ for (syncpt = syncstrp->p; !*syncpt; syncpt++) ; - testptr = patinstart + (syncpt - syncstrp->p); - DPUTS(testptr > matchpt, "BUG: EXCSYNC failed"); - savchar = *testptr; - *testptr = '\0'; + synclen = syncpt - syncstrp->p; + if (patinstart + synclen != patinend) { + /* + * Temporarily mark the string as + * ending at this point. + */ + DPUTS(patinstart + synclen > matchpt, + "BUG: EXCSYNC failed"); + + patinend = patinstart + synclen; + /* + * If this isn't really the end of the string, + * remember this for the (#e) assertion. + */ + patflags |= PAT_NOTEND; + } + savpatinstart = patinstart; next = PATNEXT(scan); while (next && P_ISEXCLUDE(next)) { - char *buf = NULL; patinput = save; /* * turn off approximations in exclusions: @@ -1787,19 +2307,18 @@ patmatch(Upat prog) patglobflags &= ~0xff; errsfound = 0; opnd = P_OPERAND(next) + 1; - if (P_OP(next) == P_EXCLUDP && pathpos) { + if (P_OP(next) == P_EXCLUDP && patinpath) { /* - * top level exclusion with a file, + * Top level exclusion with a file, * applies to whole path so add the - * segments already matched + * segments already matched. + * We copied these in front of the + * test pattern, so patinend doesn't + * need moving. */ DPUTS(patinput != patinstart, "BUG: not at start excluding path"); - buf = (char *) - zalloc(pathpos + patinlen); - strcpy(buf, pathbuf); - strcpy(buf + pathpos, patinput); - patinput = buf; + patinput = patinstart = patinpath; } if (patmatch(opnd)) { ret = 0; @@ -1810,13 +2329,20 @@ patmatch(Upat prog) */ parsfound = savparsfound; } - if (buf) - zfree(buf, pathpos + patinlen); + if (patinpath) { + patinput = savpatinstart + + (patinput - patinstart); + patinstart = savpatinstart; + } if (!ret) break; next = PATNEXT(next); } - *testptr = savchar; + /* + * Restore original end position. + */ + patinend = origpatinend; + patflags = savpatflags; globdots = savglobdots; forceerrs = savforce; if (ret) @@ -1825,7 +2351,8 @@ patmatch(Upat prog) patglobflags = savglobflags; errsfound = saverrsfound; } - zfree((char *)syncstrp->p, patinlen); + zfree((char *)syncstrp->p, + (patinend - patinstart) + 1); syncstrp->p = oldsyncstr; if (ret) { patinput = matchpt; @@ -1858,9 +2385,8 @@ patmatch(Upat prog) opnd = P_OPERAND(scan); ptrp = opnd++; if (!ptrp->p) { - if (!patinlen) - patinlen = strlen((char *)patinstart)+1; - ptrp->p = (unsigned char *)zcalloc(patinlen); + ptrp->p = (unsigned char *) + zshcalloc((patinend - patinstart) + 1); pfree = 1; } ptr = ptrp->p + (patinput - patinstart); @@ -1884,7 +2410,8 @@ patmatch(Upat prog) if (ret) ret = patmatch(opnd); if (pfree) { - zfree((char *)ptrp->p, patinlen); + zfree((char *)ptrp->p, + (patinend - patinstart) + 1); ptrp->p = NULL; } if (ret) @@ -1909,13 +2436,13 @@ patmatch(Upat prog) * This is just simple cases, matching one character. * With approximations, we still handle * this way, since * no approximation is ever necessary, but other closures - * are handled by the more compicated branching method + * are handled by the more complicated branching method */ op = P_OP(scan); /* Note that no counts possibly metafied characters */ start = patinput; if (op == P_STAR) { - for (no = 0; *patinput; METAINC(patinput)) + for (no = 0; patinput < patinend; CHARINC(patinput)) no++; /* simple optimization for reasonably common case */ if (P_OP(next) == P_END) @@ -1924,7 +2451,8 @@ patmatch(Upat prog) DPUTS(patglobflags & 0xff, "BUG: wrong backtracking with approximation."); if (!globdots && P_NOTDOT(P_OPERAND(scan)) && - patinput == patinstart && *patinput == '.') + patinput == patinstart && patinput < patinend && + CHARREF(patinput) == '.') return 0; no = patrepeat(P_OPERAND(scan)); } @@ -1933,8 +2461,9 @@ patmatch(Upat prog) * Lookahead to avoid useless matches. This is not possible * with approximation. */ - if (P_OP(next) == P_EXACTLY && !(patglobflags & 0xff)) { - char *nextop = (char *)P_OPERAND(next); + if (P_OP(next) == P_EXACTLY && P_LS_LEN(next) && + !(patglobflags & 0xff)) { + char *nextop = P_LS_STR(next); /* * If that P_EXACTLY is last (common in simple patterns, * such as *.c), then it can be only be matched at one @@ -1942,37 +2471,41 @@ patmatch(Upat prog) */ if (P_OP(PATNEXT(next)) == P_END && !(patflags & PAT_NOANCH)) { - int ptlen = strlen(patinput); - int oplen = strlen(nextop); + int ptlen = patinend - patinput; + int lenmatch = patinend - (min ? CHARNEXT(start) : start); /* Are we in the right range? */ - if (oplen > strlen(min ? METANEXT(start) : start) || - oplen < ptlen) + if (P_LS_LEN(next) > lenmatch || P_LS_LEN(next) < ptlen) return 0; /* Yes, just position appropriately and test. */ - patinput += ptlen - oplen; - if (patinput > start && patinput[-1] == Meta) { - /* doesn't align properly, no go */ - return 0; - } + patinput += ptlen - P_LS_LEN(next); + /* + * Here we will need to be careful that patinput is not + * in the middle of a multibyte character. + */ /* Continue loop with P_EXACTLY test. */ break; } - nextch = STOUC(UNMETA(nextop)); + nextch = CHARREF(nextop); } else nextch = -1; save = patinput; savglobflags = patglobflags; saverrsfound = errsfound; while (no >= min) { - int chin; - if ((nextch < 0 || (chin = STOUC(UNMETA(patinput)), - CHARMATCH(chin, nextch))) && - patmatch(next)) - return 1; + int charmatch_cache; + if (nextch < 0 || + (patinput < patinend && + CHARMATCH_EXPR(CHARREF(patinput), nextch))) { + if (patmatch(next)) + return 1; + } no--; save--; - if (save > start && save[-1] == Meta) - save--; + /* + * Here we will need to make sure save is + * decremented properly to the start of + * the preceeding multibyte character. + */ patinput = save; patglobflags = savglobflags; errsfound = saverrsfound; @@ -1983,8 +2516,16 @@ patmatch(Upat prog) * anything here. */ return 0; + case P_ISSTART: + if (patinput != patinstart || (patflags & PAT_NOTSTART)) + fail = 1; + break; + case P_ISEND: + if (patinput < patinend || (patflags & PAT_NOTEND)) + fail = 1; + break; case P_END: - if (!(fail = (*patinput && !(patflags & PAT_NOANCH)))) + if (!(fail = (patinput < patinend && !(patflags & PAT_NOANCH)))) return 1; break; #ifdef DEBUG @@ -2027,8 +2568,8 @@ patmatch(Upat prog) "BUG: non-exact match has set exactpos"); /* Try omitting a character from the input string */ - if (*patinput) { - METAINC(patinput); + if (patinput < patinend) { + CHARINC(patinput); /* If we are not on an exact match, then this is * our last gasp effort, so we can optimize out * the recursive call. @@ -2041,13 +2582,13 @@ patmatch(Upat prog) if (P_OP(scan) == P_EXACTLY) { char *nextexact = savexact; - DPUTS(!savexact || !*savexact, + DPUTS(!savexact, "BUG: exact match has not set exactpos"); - METAINC(nextexact); + CHARINC(nextexact); - if (*save) { + if (save < patinend) { char *nextin = save; - METAINC(nextin); + CHARINC(nextin); patglobflags = savglobflags; errsfound = saverrsfound; exactpos = savexact; @@ -2056,18 +2597,19 @@ patmatch(Upat prog) * Try swapping two characters in patinput and * exactpos */ - if (*save && *nextin && *nextexact) { - int cin0 = UNMETA(save); - int cpa0 = UNMETA(exactpos); - int cin1 = UNMETA(nextin); - int cpa1 = UNMETA(nextexact); + if (save < patinend && nextin < patinend && + nextexact < exactend) { + int cin0 = CHARREF(save); + int cpa0 = CHARREF(exactpos); + int cin1 = CHARREF(nextin); + int cpa1 = CHARREF(nextexact); if (CHARMATCH(cin0, cpa1) && CHARMATCH(cin1, cpa0)) { patinput = nextin; - METAINC(patinput); + CHARINC(patinput); exactpos = nextexact; - METAINC(exactpos); + CHARINC(exactpos); if (patmatch(scan)) return 1; @@ -2090,12 +2632,13 @@ patmatch(Upat prog) exactpos = savexact; } + DPUTS(exactpos == exactend, "approximating too far"); /* * Try moving up the exact match pattern. * This must be the last attempt, so just loop * instead of calling recursively. */ - METAINC(exactpos); + CHARINC(exactpos); continue; } } @@ -2115,6 +2658,11 @@ patmatchrange(char *range, int ch) { int r1, r2; + /* + * Careful here: unlike other strings, range is a NULL-terminated, + * metafied string, because we need to treat the Posix and hyphenated + * ranges specially. + */ for (; *range; range++) { if (imeta(STOUC(*range))) { switch (STOUC(*range)-STOUC(Meta)) { @@ -2130,6 +2678,10 @@ patmatchrange(char *range, int ch) if (isalnum(ch)) return 1; break; + case PP_ASCII: + if ((ch & ~0x7f) == 0) + return 1; + break; case PP_BLANK: if (ch == ' ' || ch == '\t') return 1; @@ -2169,6 +2721,7 @@ patmatchrange(char *range, int ch) case PP_XDIGIT: if (isxdigit(ch)) return 1; + break; case PP_RANGE: range++; r1 = STOUC(UNMETA(range)); @@ -2186,7 +2739,7 @@ patmatchrange(char *range, int ch) DPUTS(1, "BUG: unknown metacharacter in range."); break; } - } else if (*range == ch) + } else if (STOUC(*range) == ch) return 1; } return 0; @@ -2197,7 +2750,7 @@ patmatchrange(char *range, int ch) /**/ static int patrepeat(Upat p) { - int count = 0, tch, inch; + int count = 0, tch, charmatch_cache; char *scan, *opnd; scan = patinput; @@ -2211,22 +2764,24 @@ static int patrepeat(Upat p) break; #endif case P_EXACTLY: - tch = STOUC(UNMETA(opnd)); - while (*scan && (inch = STOUC(UNMETA(scan)), CHARMATCH(inch, tch))) { + DPUTS(P_LS_LEN(p) != 1, "closure following more than one character"); + tch = CHARREF(P_LS_STR(p)); + while (scan < patinend && + CHARMATCH_EXPR(CHARREF(scan), tch)) { count++; - METAINC(scan); + CHARINC(scan); } break; case P_ANYOF: - while (*scan && patmatchrange(opnd, STOUC(UNMETA(scan)))) { + while (scan < patinend && patmatchrange(opnd, CHARREF(scan))) { count++; - METAINC(scan); + CHARINC(scan); } break; case P_ANYBUT: - while (*scan && !patmatchrange(opnd, STOUC(UNMETA(scan)))) { + while (scan < patinend && !patmatchrange(opnd, CHARREF(scan))) { count++; - METAINC(scan); + CHARINC(scan); } break; #ifdef DEBUG @@ -2279,7 +2834,14 @@ patdump(Patprog r) next = PATNEXT(up); printf("(%d)", next ? next-codestart : 0); s += sizeof(union upat); - if (op == P_ANYOF || op == P_ANYBUT || op == P_EXACTLY) { + if (op == P_EXACTLY) { + long llen = *(long *)s; + s += sizeof(long); + while (llen--) { + putchar(CHARREF(s)); + CHARINC(s); + } + } else if (op == P_ANYOF || op == P_ANYBUT) { while (*s != '\0') { if (itok(*s)) { if (*s == Meta + PP_RANGE) { @@ -2381,6 +2943,12 @@ patprop(Upat op) case P_GFLAGS: p = "GFLAGS"; break; + case P_ISSTART: + p = "ISSTART"; + break; + case P_ISEND: + p = "ISEND"; + break; case P_NOTHING: p = "NOTHING"; break; -- cgit 1.4.1