summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog3
-rw-r--r--Src/Modules/pcre.c33
-rw-r--r--Test/V07pcre.ztst11
3 files changed, 37 insertions, 10 deletions
diff --git a/ChangeLog b/ChangeLog
index dc091bf0a..d666f21ce 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,8 @@
 2016-01-08  Barton E. Schaefer  <schaefer@zsh.org>
 
+	* Jun T.: 37515: Src/Modules/pcre.c, Test/V07pcre.ztst: multibyte
+	handling as per 35448.
+
 	* unposted (cf. Jun T.: 37516): Src/builtin.c: refine READ_MSTREAM
 	to avoid unsequenced evaluation
 
diff --git a/Src/Modules/pcre.c b/Src/Modules/pcre.c
index 2393cd1e7..aa5c8ed5b 100644
--- a/Src/Modules/pcre.c
+++ b/Src/Modules/pcre.c
@@ -190,18 +190,25 @@ zpcre_get_substrings(char *arg, int *ovec, int ret, char *matchvar,
 	if (want_begin_end) {
 	    char *ptr = arg;
 	    zlong offs = 0;
+	    int clen, leftlen;
 
 	    /* Count the characters before the match */
-	    MB_METACHARINIT();
-	    while (ptr < arg + ovec[0]) {
+	    MB_CHARINIT();
+	    leftlen = ovec[0];
+	    while (leftlen) {
 		offs++;
-		ptr += MB_METACHARLEN(ptr);
+		clen = MB_CHARLEN(ptr, leftlen);
+		ptr += clen;
+		leftlen -= clen;
 	    }
 	    setiparam("MBEGIN", offs + !isset(KSHARRAYS));
 	    /* Add on the characters in the match */
-	    while (ptr < arg + ovec[1]) {
+	    leftlen = ovec[1] - ovec[0];
+	    while (leftlen) {
 		offs++;
-		ptr += MB_METACHARLEN(ptr);
+		clen = MB_CHARLEN(ptr, leftlen);
+		ptr += clen;
+		leftlen -= clen;
 	    }
 	    setiparam("MEND", offs + !isset(KSHARRAYS) - 1);
 	    if (nelem) {
@@ -219,17 +226,23 @@ zpcre_get_substrings(char *arg, int *ovec, int ret, char *matchvar,
 		    ptr = arg;
 		    offs = 0;
 		    /* Find the start offset */
-		    MB_METACHARINIT();
-		    while (ptr < arg + ipair[0]) {
+		    MB_CHARINIT();
+		    leftlen = ipair[0];
+		    while (leftlen) {
 			offs++;
-			ptr += MB_METACHARLEN(ptr);
+			clen = MB_CHARLEN(ptr, leftlen);
+			ptr += clen;
+			leftlen -= clen;
 		    }
 		    convbase(buf, offs + !isset(KSHARRAYS), 10);
 		    *bptr = ztrdup(buf);
 		    /* Continue to the end offset */
-		    while (ptr < arg + ipair[1]) {
+		    leftlen = ipair[1] - ipair[0];
+		    while (leftlen) {
 			offs++;
-			ptr += MB_METACHARLEN(ptr);
+			clen = MB_CHARLEN(ptr, leftlen);
+			ptr += clen;
+			leftlen -= clen;
 		    }
 		    convbase(buf, offs + !isset(KSHARRAYS) - 1, 10);
 		    *eptr = ztrdup(buf);
diff --git a/Test/V07pcre.ztst b/Test/V07pcre.ztst
index ddfd3f5cd..39077564c 100644
--- a/Test/V07pcre.ztst
+++ b/Test/V07pcre.ztst
@@ -37,6 +37,17 @@
 >o→b
 >→
 
+  unset match mend
+  s=$'\u00a0'
+  [[ $s =~ '^.$' ]] && print OK
+  [[ A${s}B =~ .(.). && $match[1] == $s ]] && print OK
+  [[ A${s}${s}B =~ A([^[:ascii:]]*)B && $mend[1] == 3 ]] && print OK
+  unset s
+0:Raw IMETA characters in input string
+>OK
+>OK
+>OK
+
   [[ foo =~ f.+ ]] ; print $?
   [[ foo =~ x.+ ]] ; print $?
   [[ ! foo =~ f.+ ]] ; print $?