From b62e911341c8ec7446378b477c47da4256053dc0 Mon Sep 17 00:00:00 2001 From: Oliver Kiddle Date: Sat, 13 May 2023 00:53:32 +0200 Subject: 51723: migrate pcre module to pcre2 --- Src/Modules/pcre.c | 223 +++++++++++++++++++++-------------------------------- 1 file changed, 87 insertions(+), 136 deletions(-) (limited to 'Src/Modules/pcre.c') diff --git a/Src/Modules/pcre.c b/Src/Modules/pcre.c index 46875a59b..079ecc2c5 100644 --- a/Src/Modules/pcre.c +++ b/Src/Modules/pcre.c @@ -34,11 +34,11 @@ #define CPCRE_PLAIN 0 /**/ -#if defined(HAVE_PCRE_COMPILE) && defined(HAVE_PCRE_EXEC) -#include +#if defined(HAVE_PCRE2_COMPILE_8) && defined(HAVE_PCRE2_H) +#define PCRE2_CODE_UNIT_WIDTH 8 +#include -static pcre *pcre_pattern; -static pcre_extra *pcre_hints; +static pcre2_code *pcre_pattern; /**/ static int @@ -54,8 +54,8 @@ zpcre_utf8_enabled(void) return 0; if ((have_utf8_pcre == -1) && - (pcre_config(PCRE_CONFIG_UTF8, &have_utf8_pcre))) { - have_utf8_pcre = -2; /* erk, failed to ask */ + (pcre2_config(PCRE2_CONFIG_UNICODE, &have_utf8_pcre))) { + have_utf8_pcre = -2; /* erk, failed to ask */ } return (have_utf8_pcre == 1) && (!strcmp(nl_langinfo(CODESET), "UTF-8")); @@ -69,115 +69,87 @@ zpcre_utf8_enabled(void) static int bin_pcre_compile(char *nam, char **args, Options ops, UNUSED(int func)) { - int pcre_opts = 0, pcre_errptr, target_len; - const char *pcre_error; + uint32_t pcre_opts = 0; + int target_len; + int pcre_error; + PCRE2_SIZE pcre_offset; char *target; - if(OPT_ISSET(ops,'a')) pcre_opts |= PCRE_ANCHORED; - if(OPT_ISSET(ops,'i')) pcre_opts |= PCRE_CASELESS; - if(OPT_ISSET(ops,'m')) pcre_opts |= PCRE_MULTILINE; - if(OPT_ISSET(ops,'x')) pcre_opts |= PCRE_EXTENDED; - if(OPT_ISSET(ops,'s')) pcre_opts |= PCRE_DOTALL; + if (OPT_ISSET(ops, 'a')) pcre_opts |= PCRE2_ANCHORED; + if (OPT_ISSET(ops, 'i')) pcre_opts |= PCRE2_CASELESS; + if (OPT_ISSET(ops, 'm')) pcre_opts |= PCRE2_MULTILINE; + if (OPT_ISSET(ops, 'x')) pcre_opts |= PCRE2_EXTENDED; + if (OPT_ISSET(ops, 's')) pcre_opts |= PCRE2_DOTALL; if (zpcre_utf8_enabled()) - pcre_opts |= PCRE_UTF8; - -#ifdef HAVE_PCRE_STUDY - if (pcre_hints) -#ifdef PCRE_CONFIG_JIT - pcre_free_study(pcre_hints); -#else - pcre_free(pcre_hints); -#endif - pcre_hints = NULL; -#endif + pcre_opts |= PCRE2_UTF; if (pcre_pattern) - pcre_free(pcre_pattern); + pcre2_code_free(pcre_pattern); pcre_pattern = NULL; target = ztrdup(*args); unmetafy(target, &target_len); - if ((int)strlen(target) != target_len) { - zwarnnam(nam, "embedded NULs in PCRE pattern terminate pattern"); - } - - pcre_pattern = pcre_compile(target, pcre_opts, &pcre_error, &pcre_errptr, NULL); + pcre_pattern = pcre2_compile((PCRE2_SPTR) target, (PCRE2_SIZE) target_len, + pcre_opts, &pcre_error, &pcre_offset, NULL); free(target); if (pcre_pattern == NULL) { - zwarnnam(nam, "error in regex: %s", pcre_error); + PCRE2_UCHAR buffer[256]; + pcre2_get_error_message(pcre_error, buffer, sizeof(buffer)); + zwarnnam(nam, "error in regex: %s", buffer); return 1; } return 0; } -/**/ -#ifdef HAVE_PCRE_STUDY - /**/ static int bin_pcre_study(char *nam, UNUSED(char **args), UNUSED(Options ops), UNUSED(int func)) { - const char *pcre_error; - if (pcre_pattern == NULL) { zwarnnam(nam, "no pattern has been compiled for study"); return 1; } - - if (pcre_hints) -#ifdef PCRE_CONFIG_JIT - pcre_free_study(pcre_hints); -#else - pcre_free(pcre_hints); -#endif - pcre_hints = NULL; - pcre_hints = pcre_study(pcre_pattern, 0, &pcre_error); - if (pcre_error != NULL) - { - zwarnnam(nam, "error while studying regex: %s", pcre_error); - return 1; + int jit = 0; + if (!pcre2_config(PCRE2_CONFIG_JIT, &jit) && jit) { + if (pcre2_jit_compile(pcre_pattern, PCRE2_JIT_COMPLETE) < 0) { + zwarnnam(nam, "error while studying regex"); + return 1; + } } return 0; } -/**/ -#else /* !HAVE_PCRE_STUDY */ - -# define bin_pcre_study bin_notavail - -/**/ -#endif /* !HAVE_PCRE_STUDY */ - -/**/ static int -zpcre_get_substrings(char *arg, int *ovec, int captured_count, char *matchvar, - char *substravar, int want_offset_pair, int matchedinarr, - int want_begin_end) +zpcre_get_substrings(char *arg, pcre2_match_data *mdata, int captured_count, + char *matchvar, char *substravar, int want_offset_pair, + int matchedinarr, int want_begin_end) { - char **captures, *match_all, **matches; + PCRE2_SIZE *ovec; + char *match_all, **matches; char offset_all[50]; int capture_start = 1; if (matchedinarr) { - /* bash-style captures[0] entire-matched string in the array */ + /* bash-style ovec[0] entire-matched string in the array */ capture_start = 0; } - /* captures[0] will be entire matched string, [1] first substring */ - if (!pcre_get_substring_list(arg, ovec, captured_count, (const char ***)&captures)) { - int nelem = arrlen(captures)-1; + /* ovec[0] will be entire matched string, [1] first substring */ + ovec = pcre2_get_ovector_pointer(mdata); + if (ovec) { + int nelem = captured_count - 1; /* Set to the offsets of the complete match */ if (want_offset_pair) { - sprintf(offset_all, "%d %d", ovec[0], ovec[1]); + sprintf(offset_all, "%ld %ld", ovec[0], ovec[1]); setsparam("ZPCRE_OP", ztrdup(offset_all)); } /* @@ -186,7 +158,7 @@ zpcre_get_substrings(char *arg, int *ovec, int captured_count, char *matchvar, * ovec is length 2*(1+capture_list_length) */ if (matchvar) { - match_all = metafy(captures[0], ovec[1] - ovec[0], META_DUP); + match_all = metafy(arg + ovec[0], ovec[1] - ovec[0], META_DUP); setsparam(matchvar, match_all); } /* @@ -201,16 +173,12 @@ zpcre_get_substrings(char *arg, int *ovec, int captured_count, char *matchvar, */ if (substravar && (!want_begin_end || nelem)) { - char **x, **y; + char **x; int vec_off, i; - y = &captures[capture_start]; matches = x = (char **) zalloc(sizeof(char *) * (captured_count+1-capture_start)); - for (i = capture_start; i < captured_count; i++, y++) { + for (i = capture_start; i < captured_count; i++) { vec_off = 2*i; - if (*y) - *x++ = metafy(*y, ovec[vec_off+1]-ovec[vec_off], META_DUP); - else - *x++ = NULL; + *x++ = metafy(arg + ovec[vec_off], ovec[vec_off+1]-ovec[vec_off], META_DUP); } *x = NULL; setaparam(substravar, matches); @@ -247,7 +215,8 @@ zpcre_get_substrings(char *arg, int *ovec, int captured_count, char *matchvar, setiparam("MEND", offs + !isset(KSHARRAYS) - 1); if (nelem) { char **mbegin, **mend, **bptr, **eptr; - int i, *ipair; + int i; + size_t *ipair; bptr = mbegin = zalloc(sizeof(char*)*(nelem+1)); eptr = mend = zalloc(sizeof(char*)*(nelem+1)); @@ -287,8 +256,6 @@ zpcre_get_substrings(char *arg, int *ovec, int captured_count, char *matchvar, setaparam("mend", mend); } } - - pcre_free_substring_list((const char **)captures); } return 0; @@ -314,7 +281,8 @@ getposint(char *instr, char *nam) static int bin_pcre_match(char *nam, char **args, Options ops, UNUSED(int func)) { - int ret, capcount, *ovec, ovecsize, c; + int ret, c; + pcre2_match_data *pcre_mdata = NULL; char *matched_portion = NULL; char *plaintext = NULL; char *receptacle = NULL; @@ -344,36 +312,30 @@ bin_pcre_match(char *nam, char **args, Options ops, UNUSED(int func)) /* For the entire match, 'Return' the offset byte positions instead of the matched string */ if(OPT_ISSET(ops,'b')) want_offset_pair = 1; - if ((ret = pcre_fullinfo(pcre_pattern, pcre_hints, PCRE_INFO_CAPTURECOUNT, &capcount))) - { - zwarnnam(nam, "error %d in fullinfo", ret); - return 1; - } - - ovecsize = (capcount+1)*3; - ovec = zalloc(ovecsize*sizeof(int)); - plaintext = ztrdup(*args); unmetafy(plaintext, &subject_len); if (offset_start > 0 && offset_start >= subject_len) - ret = PCRE_ERROR_NOMATCH; - else - ret = pcre_exec(pcre_pattern, pcre_hints, plaintext, subject_len, offset_start, 0, ovec, ovecsize); + ret = PCRE2_ERROR_NOMATCH; + else { + pcre_mdata = pcre2_match_data_create_from_pattern(pcre_pattern, NULL); + ret = pcre2_match(pcre_pattern, (PCRE2_SPTR) plaintext, subject_len, + offset_start, 0, pcre_mdata, NULL); + } if (ret==0) return_value = 0; - else if (ret==PCRE_ERROR_NOMATCH) /* no match */; + else if (ret == PCRE2_ERROR_NOMATCH) /* no match */; else if (ret>0) { - zpcre_get_substrings(plaintext, ovec, ret, matched_portion, receptacle, + zpcre_get_substrings(plaintext, pcre_mdata, ret, matched_portion, receptacle, want_offset_pair, 0, 0); return_value = 0; } else { - zwarnnam(nam, "error in pcre_exec [%d]", ret); + zwarnnam(nam, "error in pcre2_match [%d]", ret); } - if (ovec) - zfree(ovec, ovecsize*sizeof(int)); + if (pcre_mdata) + pcre2_match_data_free(pcre_mdata); zsfree(plaintext); return return_value; @@ -383,17 +345,19 @@ bin_pcre_match(char *nam, char **args, Options ops, UNUSED(int func)) static int cond_pcre_match(char **a, int id) { - pcre *pcre_pat; - const char *pcre_err; + pcre2_code *pcre_pat = NULL; + int pcre_err; + PCRE2_SIZE pcre_erroff; char *lhstr, *rhre, *lhstr_plain, *rhre_plain, *avar, *svar; - int r = 0, pcre_opts = 0, pcre_errptr, capcnt, *ov, ovsize; + int r = 0, pcre_opts = 0; + pcre2_match_data *pcre_mdata = NULL; int lhstr_plain_len, rhre_plain_len; int return_value = 0; if (zpcre_utf8_enabled()) - pcre_opts |= PCRE_UTF8; + pcre_opts |= PCRE2_UTF; if (isset(REMATCHPCRE) && !isset(CASEMATCH)) - pcre_opts |= PCRE_CASELESS; + pcre_opts |= PCRE2_CASELESS; lhstr = cond_str(a,0,0); rhre = cond_str(a,1,0); @@ -401,9 +365,6 @@ cond_pcre_match(char **a, int id) rhre_plain = ztrdup(rhre); unmetafy(lhstr_plain, &lhstr_plain_len); unmetafy(rhre_plain, &rhre_plain_len); - pcre_pat = NULL; - ov = NULL; - ovsize = 0; if (isset(BASHREMATCH)) { svar = NULL; @@ -415,27 +376,27 @@ cond_pcre_match(char **a, int id) switch(id) { case CPCRE_PLAIN: - if ((int)strlen(rhre_plain) != rhre_plain_len) { - zwarn("embedded NULs in PCRE pattern terminate pattern"); - } - pcre_pat = pcre_compile(rhre_plain, pcre_opts, &pcre_err, &pcre_errptr, NULL); - if (pcre_pat == NULL) { - zwarn("failed to compile regexp /%s/: %s", rhre, pcre_err); + if (!(pcre_pat = pcre2_compile((PCRE2_SPTR) rhre_plain, + (PCRE2_SIZE) rhre_plain_len, pcre_opts, + &pcre_err, &pcre_erroff, NULL))) + { + PCRE2_UCHAR buffer[256]; + pcre2_get_error_message(pcre_err, buffer, sizeof(buffer)); + zwarn("failed to compile regexp /%s/: %s", rhre, buffer); break; } - pcre_fullinfo(pcre_pat, NULL, PCRE_INFO_CAPTURECOUNT, &capcnt); - ovsize = (capcnt+1)*3; - ov = zalloc(ovsize*sizeof(int)); - r = pcre_exec(pcre_pat, NULL, lhstr_plain, lhstr_plain_len, 0, 0, ov, ovsize); - /* r < 0 => error; r==0 match but not enough size in ov + pcre_mdata = pcre2_match_data_create_from_pattern(pcre_pat, NULL); + r = pcre2_match(pcre_pat, (PCRE2_SPTR8) lhstr_plain, lhstr_plain_len, + 0, 0, pcre_mdata, NULL); + /* r < 0 => error; r==0 match but not enough size in match data * r > 0 => (r-1) substrings found; r==1 => no substrings */ if (r==0) { - zwarn("reportable zsh problem: pcre_exec() returned 0"); + zwarn("reportable zsh problem: pcre2_match() returned 0"); return_value = 1; break; } - else if (r==PCRE_ERROR_NOMATCH) { + else if (r == PCRE2_ERROR_NOMATCH) { return_value = 0; /* no match */ break; } @@ -444,7 +405,7 @@ cond_pcre_match(char **a, int id) break; } else if (r>0) { - zpcre_get_substrings(lhstr_plain, ov, r, svar, avar, 0, + zpcre_get_substrings(lhstr_plain, pcre_mdata, r, svar, avar, 0, isset(BASHREMATCH), !isset(BASHREMATCH)); return_value = 1; @@ -457,10 +418,10 @@ cond_pcre_match(char **a, int id) free(lhstr_plain); if(rhre_plain) free(rhre_plain); + if (pcre_mdata) + pcre2_match_data_free(pcre_mdata); if (pcre_pat) - pcre_free(pcre_pat); - if (ov) - zfree(ov, ovsize*sizeof(int)); + pcre2_code_free(pcre_pat); return return_value; } @@ -489,11 +450,11 @@ static struct builtin bintab[] = { static struct features module_features = { bintab, sizeof(bintab)/sizeof(*bintab), -#if defined(HAVE_PCRE_COMPILE) && defined(HAVE_PCRE_EXEC) +#if defined(HAVE_PCRE2_COMPILE_8) && defined(HAVE_PCRE2_H) cotab, sizeof(cotab)/sizeof(*cotab), -#else /* !(HAVE_PCRE_COMPILE && HAVE_PCRE_EXEC) */ +#else /* !(HAVE_PCRE2_COMPILE_8 && HAVE_PCRE2_H) */ NULL, 0, -#endif /* !(HAVE_PCRE_COMPILE && HAVE_PCRE_EXEC) */ +#endif /* !(HAVE_PCRE2_COMPILE_8 && HAVE_PCRE2_H) */ NULL, 0, NULL, 0, 0 @@ -540,19 +501,9 @@ cleanup_(Module m) int finish_(UNUSED(Module m)) { -#if defined(HAVE_PCRE_COMPILE) && defined(HAVE_PCRE_EXEC) -#ifdef HAVE_PCRE_STUDY - if (pcre_hints) -#ifdef PCRE_CONFIG_JIT - pcre_free_study(pcre_hints); -#else - pcre_free(pcre_hints); -#endif - pcre_hints = NULL; -#endif - +#if defined(HAVE_PCRE2_COMPILE_8) && defined(HAVE_PCRE2_H) if (pcre_pattern) - pcre_free(pcre_pattern); + pcre2_code_free(pcre_pattern); pcre_pattern = NULL; #endif -- cgit 1.4.1