about summary refs log tree commit diff
path: root/Src/Modules
diff options
context:
space:
mode:
Diffstat (limited to 'Src/Modules')
-rw-r--r--Src/Modules/pcre.c79
-rw-r--r--Src/Modules/regex.c56
2 files changed, 128 insertions, 7 deletions
diff --git a/Src/Modules/pcre.c b/Src/Modules/pcre.c
index 08205d144..f8b79adea 100644
--- a/Src/Modules/pcre.c
+++ b/Src/Modules/pcre.c
@@ -138,8 +138,9 @@ bin_pcre_study(char *nam, UNUSED(char **args), UNUSED(Options ops), UNUSED(int f
 
 /**/
 static int
-zpcre_get_substrings(char *arg, int *ovec, int ret, char *matchvar, char *substravar, 
-    int want_offset_pair, int matchedinarr)
+zpcre_get_substrings(char *arg, int *ovec, int ret, char *matchvar,
+		     char *substravar, int want_offset_pair, int matchedinarr,
+		     int want_begin_end)
 {
     char **captures, *match_all, **matches;
     char offset_all[50];
@@ -154,6 +155,7 @@ zpcre_get_substrings(char *arg, int *ovec, int ret, char *matchvar, char *substr
     
     /* captures[0] will be entire matched string, [1] first substring */
     if (!pcre_get_substring_list(arg, ovec, ret, (const char ***)&captures)) {
+	int nelem = arrlen(captures)-1;
 	/* Set to the offsets of the complete match */
 	if (want_offset_pair) {
 	    sprintf(offset_all, "%d %d", ovec[0], ovec[1]);
@@ -161,8 +163,70 @@ zpcre_get_substrings(char *arg, int *ovec, int ret, char *matchvar, char *substr
 	}
 	match_all = ztrdup(captures[0]);
 	setsparam(matchvar, match_all);
-	matches = zarrdup(&captures[capture_start]);
-	setaparam(substravar, matches);
+	/*
+	 * If we're setting match, mbegin, mend we only do
+	 * so if there were parenthesised matches, for consistency
+	 * (c.f. regex.c).
+	 */
+	if (!want_begin_end || nelem) {
+	    matches = zarrdup(&captures[capture_start]);
+	    setaparam(substravar, matches);
+	}
+
+	if (want_begin_end) {
+	    char *ptr = arg;
+	    zlong offs = 0;
+
+	    /* Count the characters before the match */
+	    MB_METACHARINIT();
+	    while (ptr < arg + ovec[0]) {
+		offs++;
+		ptr += MB_METACHARLEN(ptr);
+	    }
+	    setiparam("MBEGIN", offs + !isset(KSHARRAYS));
+	    /* Add on the characters in the match */
+	    while (ptr < arg + ovec[1]) {
+		offs++;
+		ptr += MB_METACHARLEN(ptr);
+	    }
+	    setiparam("MEND", offs + !isset(KSHARRAYS) - 1);
+	    if (nelem) {
+		char **mbegin, **mend, **bptr, **eptr;
+		int i, *ipair;
+
+		bptr = mbegin = zalloc(nelem+1);
+		eptr = mend = zalloc(nelem+1);
+
+		for (ipair = ovec + 2, i = 0;
+		     i < nelem;
+		     ipair += 2, i++, bptr++, eptr++)
+		{
+		    char buf[DIGBUFSIZE];
+		    ptr = arg;
+		    offs = 0;
+		    /* Find the start offset */
+		    MB_METACHARINIT();
+		    while (ptr < arg + ipair[0]) {
+			offs++;
+			ptr += MB_METACHARLEN(ptr);
+		    }
+		    convbase(buf, offs + !isset(KSHARRAYS), 10);
+		    *bptr = ztrdup(buf);
+		    /* Continue to the end offset */
+		    while (ptr < arg + ipair[1]) {
+			offs++;
+			ptr += MB_METACHARLEN(ptr);
+		    }
+		    convbase(buf, offs + !isset(KSHARRAYS) - 1, 10);
+		    *eptr = ztrdup(buf);
+		}
+		*bptr = *eptr = NULL;
+
+		setaparam("mbegin", mbegin);
+		setaparam("mend", mend);
+	    }
+	}
+
 	pcre_free_substring_list((const char **)captures);
     }
 
@@ -238,7 +302,8 @@ bin_pcre_match(char *nam, char **args, Options ops, UNUSED(int func))
     if (ret==0) return_value = 0;
     else if (ret==PCRE_ERROR_NOMATCH) /* no match */;
     else if (ret>0) {
-	zpcre_get_substrings(*args, ovec, ret, matched_portion, receptacle, want_offset_pair, 0);
+	zpcre_get_substrings(*args, ovec, ret, matched_portion, receptacle,
+			     want_offset_pair, 0, 0);
 	return_value = 0;
     }
     else {
@@ -297,7 +362,9 @@ cond_pcre_match(char **a, int id)
 		    break;
 		}
                 else if (r>0) {
-		    zpcre_get_substrings(lhstr, ov, r, NULL, avar, 0, isset(BASHREMATCH));
+		    zpcre_get_substrings(lhstr, ov, r, NULL, avar, 0,
+					 isset(BASHREMATCH),
+					 !isset(BASHREMATCH));
 		    return_value = 1;
 		    break;
 		}
diff --git a/Src/Modules/regex.c b/Src/Modules/regex.c
index 8a9f3e608..25dbddf07 100644
--- a/Src/Modules/regex.c
+++ b/Src/Modules/regex.c
@@ -108,11 +108,65 @@ zcond_regex_match(char **a, int id)
 	    if (isset(BASHREMATCH)) {
 		setaparam("BASH_REMATCH", arr);
 	    } else {
+		zlong offs;
+		char *ptr;
+
 		m = matches;
 		s = ztrduppfx(lhstr + m->rm_so, m->rm_eo - m->rm_so);
 		setsparam("MATCH", s);
-		if (nelem)
+		/*
+		 * Count the characters before the match.
+		 */
+		ptr = lhstr;
+		offs = 0;
+		MB_METACHARINIT();
+		while (ptr < lhstr + m->rm_so) {
+		    offs++;
+		    ptr += MB_METACHARLEN(ptr);
+		}
+		setiparam("MBEGIN", offs + !isset(KSHARRAYS));
+		/*
+		 * Add on the characters in the match.
+		 */
+		while (ptr < lhstr + m->rm_eo) {
+		    offs++;
+		    ptr += MB_METACHARLEN(ptr);
+		}
+		setiparam("MEND", offs + !isset(KSHARRAYS) - 1);
+		if (nelem) {
+		    char **mbegin, **mend, **bptr, **eptr;
+		    bptr = mbegin = (char **)zalloc(nelem+1);
+		    eptr = mend = (char **)zalloc(nelem+1);
+
+		    for (m = matches + start, n = start;
+			 n <= (int)re.re_nsub;
+			 ++n, ++m, ++bptr, ++eptr)
+		    {
+			char buf[DIGBUFSIZE];
+			ptr = lhstr;
+			offs = 0;
+			/* Find the start offset */
+			MB_METACHARINIT();
+			while (ptr < lhstr + m->rm_so) {
+			    offs++;
+			    ptr += MB_METACHARLEN(ptr);
+			}
+			convbase(buf, offs + !isset(KSHARRAYS), 10);
+			*bptr = ztrdup(buf);
+			/* Continue to the end offset */
+			while (ptr < lhstr + m->rm_eo) {
+			    offs++;
+			    ptr += MB_METACHARLEN(ptr);
+			}
+			convbase(buf, offs + !isset(KSHARRAYS) - 1, 10);
+			*eptr = ztrdup(buf);
+		    }
+		    *bptr = *eptr = NULL;
+
 		    setaparam("match", arr);
+		    setaparam("mbegin", mbegin);
+		    setaparam("mend", mend);
+		}
 	    }
 	}
 	else