diff options
author | Peter Stephenson <pws@users.sourceforge.net> | 2007-05-01 22:05:03 +0000 |
---|---|---|
committer | Peter Stephenson <pws@users.sourceforge.net> | 2007-05-01 22:05:03 +0000 |
commit | 7f03c3d851c571d86d18c701d882c13ad5d06c6c (patch) | |
tree | 17edf1709daf6247f505f82d12bbd5c67fa69906 /Src/Modules | |
parent | eb4c3d4bf2e6b13078afd0820375dc02a2eb2a1f (diff) | |
download | zsh-7f03c3d851c571d86d18c701d882c13ad5d06c6c.tar.gz zsh-7f03c3d851c571d86d18c701d882c13ad5d06c6c.tar.xz zsh-7f03c3d851c571d86d18c701d882c13ad5d06c6c.zip |
23375: Phil Pennock: =~, zsh/regex etc. etc.
Diffstat (limited to 'Src/Modules')
-rw-r--r-- | Src/Modules/pcre.c | 162 | ||||
-rw-r--r-- | Src/Modules/regex.c | 161 | ||||
-rw-r--r-- | Src/Modules/regex.mdd | 10 |
3 files changed, 294 insertions, 39 deletions
diff --git a/Src/Modules/pcre.c b/Src/Modules/pcre.c index c19c8c137..21988e05c 100644 --- a/Src/Modules/pcre.c +++ b/Src/Modules/pcre.c @@ -3,7 +3,7 @@ * * This file is part of zsh, the Z shell. * - * Copyright (c) 2001, 2002, 2003, 2004 Clint Adams + * Copyright (c) 2001, 2002, 2003, 2004, 2007 Clint Adams * All rights reserved. * * Permission is hereby granted, without written agreement and without @@ -42,6 +42,37 @@ static pcre_extra *pcre_hints; /**/ static int +zpcre_utf8_enabled(void) +{ +#if defined(MULTIBYTE_SUPPORT) && defined(HAVE_NL_LANGINFO) && defined(CODESET) + static int have_utf8_pcre = -1; + + /* value can toggle based on MULTIBYTE, so don't + * be too eager with caching */ + if (have_utf8_pcre < -1) + return 0; + + if (!isset(MULTIBYTE)) + return 0; + + if ((have_utf8_pcre == -1) && + (!strcmp(nl_langinfo(CODESET), "UTF-8"))) { + + if (pcre_config(PCRE_CONFIG_UTF8, &have_utf8_pcre)) + have_utf8_pcre = -2; /* erk, failed to ask */ + } + + if (have_utf8_pcre < 0) + return 0; + return have_utf8_pcre; + +#else + return 0; +#endif +} + +/**/ +static int bin_pcre_compile(char *nam, char **args, Options ops, UNUSED(int func)) { int pcre_opts = 0, pcre_errptr; @@ -52,8 +83,14 @@ bin_pcre_compile(char *nam, char **args, Options ops, UNUSED(int func)) if(OPT_ISSET(ops,'m')) pcre_opts |= PCRE_MULTILINE; if(OPT_ISSET(ops,'x')) pcre_opts |= PCRE_EXTENDED; + if (zpcre_utf8_enabled()) + pcre_opts |= PCRE_UTF8; + pcre_hints = NULL; /* Is this necessary? */ + if (pcre_pattern) + pcre_free(pcre_pattern); + pcre_pattern = pcre_compile(*args, pcre_opts, &pcre_error, &pcre_errptr, NULL); if (pcre_pattern == NULL) @@ -100,37 +137,52 @@ bin_pcre_study(char *nam, UNUSED(char **args), UNUSED(Options ops), UNUSED(int f /**/ static int -zpcre_get_substrings(char *arg, int *ovec, int ret, char *receptacle) +zpcre_get_substrings(char *arg, int *ovec, int ret, char *matchvar, char *substravar, int matchedinarr) { - char **captures, **matches; - - if(!pcre_get_substring_list(arg, ovec, ret, (const char ***)&captures)) { - - matches = zarrdup(&captures[1]); /* first one would be entire string */ - if (receptacle == NULL) - setaparam("match", matches); - else - setaparam(receptacle, matches); - - pcre_free_substring_list((const char **)captures); - } + char **captures, **match_all, **matches; + int capture_start = 1; - return 0; + if (matchedinarr) + capture_start = 0; + if (matchvar == NULL) + matchvar = "MATCH"; + if (substravar == NULL) + substravar = "match"; + + /* captures[0] will be entire matched string, [1] first substring */ + if(!pcre_get_substring_list(arg, ovec, ret, (const char ***)&captures)) { + match_all = ztrdup(captures[0]); + setsparam(matchvar, match_all); + matches = zarrdup(&captures[capture_start]); + setaparam(substravar, matches); + pcre_free_substring_list((const char **)captures); + } + + return 0; } /**/ static int bin_pcre_match(char *nam, char **args, Options ops, UNUSED(int func)) { - int ret, capcount, *ovec, ovecsize; + int ret, capcount, *ovec, ovecsize, c; + char *matched_portion = NULL; char *receptacle = NULL; + int return_value = 1; + + if (pcre_pattern == NULL) { + zwarnnam(nam, "no pattern has been compiled"); + return 1; + } - if(OPT_ISSET(ops,'a')) { - receptacle = *args++; - if(!*args) { - zwarnnam(nam, "not enough arguments"); - return 1; - } + if(OPT_HASARG(ops,c='a')) { + receptacle = OPT_ARG(ops,c); + } + if(OPT_HASARG(ops,c='v')) { + matched_portion = OPT_ARG(ops,c); + } + if(!*args) { + zwarnnam(nam, "not enough arguments"); } if ((ret = pcre_fullinfo(pcre_pattern, pcre_hints, PCRE_INFO_CAPTURECOUNT, &capcount))) @@ -144,18 +196,20 @@ bin_pcre_match(char *nam, char **args, Options ops, UNUSED(int func)) ret = pcre_exec(pcre_pattern, pcre_hints, *args, strlen(*args), 0, 0, ovec, ovecsize); - if (ret==0) return 0; - else if (ret==PCRE_ERROR_NOMATCH) return 1; /* no match */ + if (ret==0) return_value = 0; + else if (ret==PCRE_ERROR_NOMATCH) /* no match */; else if (ret>0) { - zpcre_get_substrings(*args, ovec, ret, receptacle); - return 0; + zpcre_get_substrings(*args, ovec, ret, matched_portion, receptacle, 0); + return_value = 0; } else { zwarnnam(nam, "error in pcre_exec"); - return 1; } - return 1; + if (ovec) + zfree(ovec, ovecsize*sizeof(int)); + + return return_value; } /**/ @@ -164,33 +218,63 @@ cond_pcre_match(char **a, int id) { pcre *pcre_pat; const char *pcre_err; - char *lhstr, *rhre; + char *lhstr, *rhre, *avar=NULL; int r = 0, pcre_opts = 0, pcre_errptr, capcnt, *ov, ovsize; + int return_value = 0; + + if (zpcre_utf8_enabled()) + pcre_opts |= PCRE_UTF8; lhstr = cond_str(a,0,0); rhre = cond_str(a,1,0); + pcre_pat = ov = NULL; + + if (isset(BASHREMATCH)) + avar="BASH_REMATCH"; switch(id) { case CPCRE_PLAIN: - pcre_pat = pcre_compile(rhre, pcre_opts, &pcre_err, &pcre_errptr, NULL); - pcre_fullinfo(pcre_pat, NULL, PCRE_INFO_CAPTURECOUNT, &capcnt); - ovsize = (capcnt+1)*3; - ov = zalloc(ovsize*sizeof(int)); - r = pcre_exec(pcre_pat, NULL, lhstr, strlen(lhstr), 0, 0, ov, ovsize); - if (r==0) return 1; + pcre_pat = pcre_compile(rhre, pcre_opts, &pcre_err, &pcre_errptr, NULL); + if (pcre_pat == NULL) { + zwarn("failed to compile regexp /%s/: %s", rhre, pcre_err); + break; + } + pcre_fullinfo(pcre_pat, NULL, PCRE_INFO_CAPTURECOUNT, &capcnt); + ovsize = (capcnt+1)*3; + ov = zalloc(ovsize*sizeof(int)); + r = pcre_exec(pcre_pat, NULL, lhstr, strlen(lhstr), 0, 0, ov, ovsize); + /* r < 0 => error; r==0 match but not enough size in ov + * r > 0 => (r-1) substrings found; r==1 => no substrings + */ + if (r==0) { + zwarn("reportable zsh problem: pcre_exec() returned 0"); + return_value = 1; + break; + } else if (r==PCRE_ERROR_NOMATCH) return 0; /* no match */ + else if (r<0) { + zwarn("pcre_exec() error: %d", r); + break; + } else if (r>0) { - zpcre_get_substrings(lhstr, ov, r, NULL); - return 1; + zpcre_get_substrings(lhstr, ov, r, NULL, avar, isset(BASHREMATCH)); + return_value = 1; + break; } break; } - return 0; + if (pcre_pat) + pcre_free(pcre_pat); + if (ov) + zfree(ov, ovsize*sizeof(int)); + + return return_value; } static struct conddef cotab[] = { CONDDEF("pcre-match", CONDF_INFIX, cond_pcre_match, 0, 0, CPCRE_PLAIN) + /* CONDDEF can register =~ but it won't be found */ }; /**/ @@ -206,7 +290,7 @@ static struct conddef cotab[] = { static struct builtin bintab[] = { BUILTIN("pcre_compile", 0, bin_pcre_compile, 1, 1, 0, "aimx", NULL), BUILTIN("pcre_study", 0, bin_pcre_study, 0, 0, 0, NULL, NULL), - BUILTIN("pcre_match", 0, bin_pcre_match, 1, 2, 0, "a", NULL) + BUILTIN("pcre_match", 0, bin_pcre_match, 1, 1, 0, "a:v:", NULL) }; diff --git a/Src/Modules/regex.c b/Src/Modules/regex.c new file mode 100644 index 000000000..44019a1b9 --- /dev/null +++ b/Src/Modules/regex.c @@ -0,0 +1,161 @@ +/* + * regex.c + * + * This file is part of zsh, the Z shell. + * + * Copyright (c) 2007 Phil Pennock + * All Rights Reserved. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and to distribute modified versions of this software for any + * purpose, provided that the above copyright notice and the following + * two paragraphs appear in all copies of this software. + * + * In no event shall Phil Pennock or the Zsh Development Group be liable + * to any party for direct, indirect, special, incidental, or consequential + * damages arising out of the use of this software and its documentation, + * even if Phil Pennock and the Zsh Development Group have been advised of + * the possibility of such damage. + * + * Phil Pennock and the Zsh Development Group specifically disclaim any + * warranties, including, but not limited to, the implied warranties of + * merchantability and fitness for a particular purpose. The software + * provided hereunder is on an "as is" basis, and Phil Pennock and the + * Zsh Development Group have no obligation to provide maintenance, + * support, updates, enhancements, or modifications. + * + */ + +#include "regex.mdh" +#include "regex.pro" + +#include <regex.h> + +/* we default to a vaguely modern syntax and set of capabilities */ +#define ZREGEX_EXTENDED 0 +/* if you want Basic syntax, make it an alternative options */ + +static void +zregex_regerrwarn(int r, regex_t *re, char *msg) +{ + char *errbuf; + size_t errbufsz; + + errbufsz = regerror(r, re, NULL, 0); + errbuf = zalloc(errbufsz*sizeof(char)); + regerror(r, re, errbuf, errbufsz); + zwarn("%s: %s", msg, errbuf); + zfree(errbuf, errbufsz); +} + +/**/ +static int +zcond_regex_match(char **a, int id) +{ + regex_t re; + regmatch_t *m, *matches = NULL; + size_t matchessz; + char *lhstr, *rhre, *s, **arr, **x; + int r, n, return_value, rcflags, reflags, nelem, start; + + lhstr = cond_str(a,0,0); + rhre = cond_str(a,1,0); + rcflags = reflags = 0; + return_value = 0; /* 1 => matched successfully */ + + switch(id) { + case ZREGEX_EXTENDED: + rcflags |= REG_EXTENDED; + if (!isset(CASEMATCH)) + rcflags |= REG_ICASE; + r = regcomp(&re, rhre, rcflags); + if (r) { + zregex_regerrwarn(r, &re, "failed to compile regex"); + break; + } + /* re.re_nsub is number of parenthesized groups, we also need + * 1 for the 0 offset, which is the entire matched portion + */ + if (re.re_nsub < 0) { + zwarn("INTERNAL ERROR: regcomp() returned " + "negative subpattern count %d", re.re_nsub); + break; + } + matchessz = (re.re_nsub + 1) * sizeof(regmatch_t); + matches = zalloc(matchessz); + r = regexec(&re, lhstr, re.re_nsub+1, matches, reflags); + if (r == REG_NOMATCH) /**/; + else if (r == 0) { + return_value = 1; + if (isset(BASHREMATCH)) { + start = 0; + nelem = re.re_nsub + 1; + } else { + start = 1; + nelem = re.re_nsub; + } + arr = NULL; /* bogus gcc warning of used uninitialised */ + /* entire matched portion + re_nsub substrings + NULL */ + if (nelem) { + arr = x = (char **) zalloc(sizeof(char *) * (nelem + 1)); + for (m = matches + start, n = start; n <= re.re_nsub; ++n, ++m, ++x) { + *x = ztrduppfx(lhstr + m->rm_so, m->rm_eo - m->rm_so); + } + *x = NULL; + } + if (isset(BASHREMATCH)) { + setaparam("BASH_REMATCH", arr); + } else { + m = matches; + s = ztrduppfx(lhstr + m->rm_so, m->rm_eo - m->rm_so); + setsparam("MATCH", s); + if (nelem) + setaparam("match", arr); + } + } + else zregex_regerrwarn(r, &re, "regex matching error"); + break; + default: + DPUTS(1, "bad regex option"); + break; + } + + if (matches) + zfree(matches, matchessz); + regfree(&re); + return return_value; +} + +static struct conddef cotab[] = { + CONDDEF("regex-match", CONDF_INFIX, zcond_regex_match, 0, 0, ZREGEX_EXTENDED) +}; + +/**/ +int +setup_(UNUSED(Module m)) +{ + return 0; +} + +/**/ +int +boot_(Module m) +{ + return !addconddefs(m->nam, cotab, sizeof(cotab)/sizeof(*cotab)); +} + +/**/ +int +cleanup_(Module m) +{ + deleteconddefs(m->nam, cotab, sizeof(cotab)/sizeof(*cotab)); + return 0; +} + +/**/ +int +finish_(UNUSED(Module m)) +{ + return 0; +} diff --git a/Src/Modules/regex.mdd b/Src/Modules/regex.mdd new file mode 100644 index 000000000..d025cabde --- /dev/null +++ b/Src/Modules/regex.mdd @@ -0,0 +1,10 @@ +name=zsh/regex +link=`if test x$ac_cv_func_regcomp = xyes && \ + test x$ac_cv_func_regexec = xyes && \ + test x$ac_cv_func_regerror = xyes && \ + test x$ac_cv_func_regfree = xyes; then echo dynamic; else echo no; fi` +load=no + +autobins="" + +objects="regex.o" |