diff options
Diffstat (limited to 'Src/pattern.c')
-rw-r--r-- | Src/pattern.c | 104 |
1 files changed, 62 insertions, 42 deletions
diff --git a/Src/pattern.c b/Src/pattern.c index e947d1216..1e0ae88d9 100644 --- a/Src/pattern.c +++ b/Src/pattern.c @@ -239,7 +239,7 @@ typedef unsigned long zrange_t; * a bit tricky... */ #define WCHAR_INVALID(ch) \ - ((wchar_t) (0xDC00 + STOUC(ch))) + ((wchar_t) (0xDC00 + (unsigned char) ch)) #endif /* MULTIBYTE_SUPPORT */ /* @@ -346,7 +346,7 @@ metacharinc(char **x) * set doesn't have the property that all bytes with the 8th * bit clear are single characters then we are stuffed. */ - if (!(patglobflags & GF_MULTIBYTE) || !(STOUC(*inptr) & 0x80)) + if (!(patglobflags & GF_MULTIBYTE) || !((unsigned char) *inptr & 0x80)) { if (itok(*inptr)) inchar = ztokens[*inptr++ - Pound]; @@ -357,7 +357,7 @@ metacharinc(char **x) inchar = *inptr++; } *x = inptr; - return (wchar_t)STOUC(inchar); + return (wchar_t)(unsigned char) inchar; } while (*inptr) { @@ -1181,8 +1181,8 @@ pattern_range_to_string(char *rangestr, char *outstr) int len = 0; while (*rangestr) { - if (imeta(STOUC(*rangestr))) { - int swtype = STOUC(*rangestr) - STOUC(Meta); + if (imeta((unsigned char) *rangestr)) { + int swtype = (unsigned char) *rangestr - (unsigned char) Meta; if (swtype == 0) { /* Ordindary metafied character */ @@ -1278,17 +1278,17 @@ patcomppiece(int *flagp, int paren) kshchar = '\0'; if (*patparse && patparse[1] == Inpar) { if (*patparse == zpc_special[ZPC_KSH_PLUS]) - kshchar = STOUC('+'); + kshchar = (unsigned char) '+'; else if (*patparse == zpc_special[ZPC_KSH_BANG]) - kshchar = STOUC('!'); + kshchar = (unsigned char) '!'; else if (*patparse == zpc_special[ZPC_KSH_BANG2]) - kshchar = STOUC('!'); + kshchar = (unsigned char) '!'; else if (*patparse == zpc_special[ZPC_KSH_AT]) - kshchar = STOUC('@'); + kshchar = (unsigned char) '@'; else if (*patparse == zpc_special[ZPC_KSH_STAR]) - kshchar = STOUC('*'); + kshchar = (unsigned char) '*'; else if (*patparse == zpc_special[ZPC_KSH_QUEST]) - kshchar = STOUC('?'); + kshchar = (unsigned char) '?'; } /* @@ -1468,7 +1468,8 @@ patcomppiece(int *flagp, int paren) ch = range_type(patparse, len); patparse = nptr + 2; if (ch != PP_UNKWN) - patadd(NULL, STOUC(Meta) + ch, 1, PA_NOALIGN); + patadd(NULL, (unsigned char) Meta + ch, 1, + PA_NOALIGN); continue; } charstart = patparse; @@ -1476,10 +1477,10 @@ patcomppiece(int *flagp, int paren) if (*patparse == Dash && patparse[1] && patparse[1] != Outbrack) { - patadd(NULL, STOUC(Meta)+PP_RANGE, 1, PA_NOALIGN); + patadd(NULL, (unsigned char) Meta+PP_RANGE, 1, PA_NOALIGN); if (itok(*charstart)) { - patadd(0, STOUC(ztokens[*charstart - Pound]), 1, - PA_NOALIGN); + patadd(0, (unsigned char) ztokens[*charstart - Pound], + 1, PA_NOALIGN); } else { patadd(charstart, 0, patparse-charstart, PA_NOALIGN); } @@ -1487,7 +1488,7 @@ patcomppiece(int *flagp, int paren) METACHARINC(patparse); } if (itok(*charstart)) { - patadd(0, STOUC(ztokens[*charstart - Pound]), 1, + patadd(0, (unsigned char) ztokens[*charstart - Pound], 1, PA_NOALIGN); } else { patadd(charstart, 0, patparse-charstart, PA_NOALIGN); @@ -1910,8 +1911,8 @@ charref(char *x, char *y, int *zmb_ind) wchar_t wc; size_t ret; - if (!(patglobflags & GF_MULTIBYTE) || !(STOUC(*x) & 0x80)) - return (wchar_t) STOUC(*x); + if (!(patglobflags & GF_MULTIBYTE) || !((unsigned char) *x & 0x80)) + return (wchar_t) (unsigned char) *x; ret = mbrtowc(&wc, x, y-x, &shiftstate); @@ -1937,7 +1938,7 @@ charnext(char *x, char *y) wchar_t wc; size_t ret; - if (!(patglobflags & GF_MULTIBYTE) || !(STOUC(*x) & 0x80)) + if (!(patglobflags & GF_MULTIBYTE) || !((unsigned char) *x & 0x80)) return x + 1; ret = mbrtowc(&wc, x, y-x, &shiftstate); @@ -1965,8 +1966,8 @@ charrefinc(char **x, char *y, int *z) wchar_t wc; size_t ret; - if (!(patglobflags & GF_MULTIBYTE) || !(STOUC(**x) & 0x80)) - return (wchar_t) STOUC(*(*x)++); + if (!(patglobflags & GF_MULTIBYTE) || !((unsigned char) **x & 0x80)) + return (wchar_t) (unsigned char) *(*x)++; ret = mbrtowc(&wc, *x, y-*x, &shiftstate); @@ -2025,13 +2026,13 @@ charsub(char *x, char *y) #else /* no MULTIBYTE_SUPPORT */ /* Get a character from the start point in a string */ -#define CHARREF(x, y) (STOUC(*(x))) +#define CHARREF(x, y) ((unsigned char) (*(x))) /* Get a pointer to the next character */ #define CHARNEXT(x, y) ((x)+1) /* Increment a pointer past the current character. */ #define CHARINC(x, y) ((x)++) /* Get a character and increment */ -#define CHARREFINC(x, y, z) (STOUC(*(x)++)) +#define CHARREFINC(x, y, z) ((unsigned char) (*(x)++)) /* Counter the number of characters between two pointers, smaller first */ #define CHARSUB(x,y) ((y) - (x)) @@ -2890,7 +2891,7 @@ patmatch(Upat prog) } if (!no && P_OP(next) == P_EXACTLY && (!P_LS_LEN(next) || - !idigit(STOUC(*P_LS_STR(next)))) && + !idigit((unsigned char) (*P_LS_STR(next)))) && !(patglobflags & 0xff)) return 0; patinput = --save; @@ -2986,14 +2987,15 @@ patmatch(Upat prog) case P_EXCSYNC: /* See the P_EXCLUDE code below for where syncptr comes from */ { - unsigned char *syncptr; + unsigned char *syncstart, *syncptr, *ptr; Upat after; after = P_OPERAND(scan); DPUTS(!P_ISEXCLUDE(after), "BUG: EXCSYNC not followed by EXCLUDE."); DPUTS(!P_OPERAND(after)->p, "BUG: EXCSYNC not handled by EXCLUDE"); - syncptr = P_OPERAND(after)->p + (patinput - patinstart); + syncstart = P_OPERAND(after)->p; + syncptr = syncstart + (patinput - patinstart); /* * If we already matched from here, this time we fail. * See WBRANCH code for story about error count. @@ -3008,6 +3010,23 @@ patmatch(Upat prog) * failed anyway. */ *syncptr = errsfound + 1; + /* + * Because of backtracking, any match before this point + * can't apply to the current branch we're on so is now + * a failure --- this can happen if, on a previous + * branch, we initially marked a success before failing + * on a later part of the pattern after marking up the + * P_EXCSYNC (even an end anchor will have this effect). + * To make sure we record the current match point + * correctly, mark those down now. + * + * This might have side effects on the efficiency of + * pathological cases involving nested branches. To + * fix that we'd probably need to record matches on + * different branches separately. + */ + for (ptr = syncstart; ptr < syncptr; ++ptr) + *ptr = 0; } break; case P_EXCEND: @@ -3600,8 +3619,8 @@ mb_patmatchrange(char *range, wchar_t ch, int zmb_ind, wint_t *indptr, int *mtp) * ranges specially. */ while (*range) { - if (imeta(STOUC(*range))) { - int swtype = STOUC(*range++) - STOUC(Meta); + if (imeta((unsigned char) *range)) { + int swtype = (unsigned char) *range++ - (unsigned char) Meta; if (mtp) *mtp = swtype; switch (swtype) { @@ -3672,6 +3691,7 @@ mb_patmatchrange(char *range, wchar_t ch, int zmb_ind, wint_t *indptr, int *mtp) return 1; break; case PP_IDENT: + /* Could use INAMESPC here? */ if (wcsitype(ch, IIDENT)) return 1; break; @@ -3753,8 +3773,8 @@ mb_patmatchindex(char *range, wint_t ind, wint_t *chr, int *mtp) *mtp = 0; while (*range) { - if (imeta(STOUC(*range))) { - int swtype = STOUC(*range++) - STOUC(Meta); + if (imeta((unsigned char) *range)) { + int swtype = (unsigned char) *range++ - (unsigned char) Meta; switch (swtype) { case 0: range--; @@ -3845,13 +3865,13 @@ patmatchrange(char *range, int ch, int *indptr, int *mtp) * ranges specially. */ for (; *range; range++) { - if (imeta(STOUC(*range))) { - int swtype = STOUC(*range) - STOUC(Meta); + if (imeta((unsigned char) *range)) { + int swtype = (unsigned char) *range - (unsigned char) Meta; if (mtp) *mtp = swtype; switch (swtype) { case 0: - if (STOUC(*++range ^ 32) == ch) + if ((unsigned char) (*++range ^ 32) == ch) return 1; break; case PP_ALPHA: @@ -3931,9 +3951,9 @@ patmatchrange(char *range, int ch, int *indptr, int *mtp) break; case PP_RANGE: range++; - r1 = STOUC(UNMETA(range)); + r1 = (unsigned char) UNMETA(range); METACHARINC(range); - r2 = STOUC(UNMETA(range)); + r2 = (unsigned char) UNMETA(range); if (*range == Meta) range++; if (r1 <= ch && ch <= r2) { @@ -3955,7 +3975,7 @@ patmatchrange(char *range, int ch, int *indptr, int *mtp) DPUTS(1, "BUG: unknown metacharacter in range."); break; } - } else if (STOUC(*range) == ch) { + } else if ((unsigned char) *range == ch) { if (mtp) *mtp = 0; return 1; @@ -3989,12 +4009,12 @@ patmatchindex(char *range, int ind, int *chr, int *mtp) *mtp = 0; for (; *range; range++) { - if (imeta(STOUC(*range))) { - int swtype = STOUC(*range) - STOUC(Meta); + if (imeta((unsigned char) *range)) { + int swtype = (unsigned char) *range - (unsigned char) Meta; switch (swtype) { case 0: /* ordinary metafied character */ - rchr = STOUC(*++range) ^ 32; + rchr = (unsigned char) *++range ^ 32; if (!ind) { *chr = rchr; return 1; @@ -4028,9 +4048,9 @@ patmatchindex(char *range, int ind, int *chr, int *mtp) case PP_RANGE: range++; - r1 = STOUC(UNMETA(range)); + r1 = (unsigned char) UNMETA(range); METACHARINC(range); - r2 = STOUC(UNMETA(range)); + r2 = (unsigned char) UNMETA(range); if (*range == Meta) range++; rdiff = r2 - r1; @@ -4050,7 +4070,7 @@ patmatchindex(char *range, int ind, int *chr, int *mtp) } } else { if (!ind) { - *chr = STOUC(*range); + *chr = (unsigned char) *range; return 1; } } |