From 5eae5b58b1b99946e14ac8ddc54dc14189a56a6c Mon Sep 17 00:00:00 2001 From: "Barton E. Schaefer" Date: Fri, 8 Jan 2016 20:42:00 -0800 Subject: Jun T.: 37515: multibyte handling as per 35448. --- ChangeLog | 3 +++ Src/Modules/pcre.c | 33 +++++++++++++++++++++++---------- Test/V07pcre.ztst | 11 +++++++++++ 3 files changed, 37 insertions(+), 10 deletions(-) diff --git a/ChangeLog b/ChangeLog index dc091bf0a..d666f21ce 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,8 @@ 2016-01-08 Barton E. Schaefer + * Jun T.: 37515: Src/Modules/pcre.c, Test/V07pcre.ztst: multibyte + handling as per 35448. + * unposted (cf. Jun T.: 37516): Src/builtin.c: refine READ_MSTREAM to avoid unsequenced evaluation diff --git a/Src/Modules/pcre.c b/Src/Modules/pcre.c index 2393cd1e7..aa5c8ed5b 100644 --- a/Src/Modules/pcre.c +++ b/Src/Modules/pcre.c @@ -190,18 +190,25 @@ zpcre_get_substrings(char *arg, int *ovec, int ret, char *matchvar, if (want_begin_end) { char *ptr = arg; zlong offs = 0; + int clen, leftlen; /* Count the characters before the match */ - MB_METACHARINIT(); - while (ptr < arg + ovec[0]) { + MB_CHARINIT(); + leftlen = ovec[0]; + while (leftlen) { offs++; - ptr += MB_METACHARLEN(ptr); + clen = MB_CHARLEN(ptr, leftlen); + ptr += clen; + leftlen -= clen; } setiparam("MBEGIN", offs + !isset(KSHARRAYS)); /* Add on the characters in the match */ - while (ptr < arg + ovec[1]) { + leftlen = ovec[1] - ovec[0]; + while (leftlen) { offs++; - ptr += MB_METACHARLEN(ptr); + clen = MB_CHARLEN(ptr, leftlen); + ptr += clen; + leftlen -= clen; } setiparam("MEND", offs + !isset(KSHARRAYS) - 1); if (nelem) { @@ -219,17 +226,23 @@ zpcre_get_substrings(char *arg, int *ovec, int ret, char *matchvar, ptr = arg; offs = 0; /* Find the start offset */ - MB_METACHARINIT(); - while (ptr < arg + ipair[0]) { + MB_CHARINIT(); + leftlen = ipair[0]; + while (leftlen) { offs++; - ptr += MB_METACHARLEN(ptr); + clen = MB_CHARLEN(ptr, leftlen); + ptr += clen; + leftlen -= clen; } convbase(buf, offs + !isset(KSHARRAYS), 10); *bptr = ztrdup(buf); /* Continue to the end offset */ - while (ptr < arg + ipair[1]) { + leftlen = ipair[1] - ipair[0]; + while (leftlen) { offs++; - ptr += MB_METACHARLEN(ptr); + clen = MB_CHARLEN(ptr, leftlen); + ptr += clen; + leftlen -= clen; } convbase(buf, offs + !isset(KSHARRAYS) - 1, 10); *eptr = ztrdup(buf); diff --git a/Test/V07pcre.ztst b/Test/V07pcre.ztst index ddfd3f5cd..39077564c 100644 --- a/Test/V07pcre.ztst +++ b/Test/V07pcre.ztst @@ -37,6 +37,17 @@ >o→b >→ + unset match mend + s=$'\u00a0' + [[ $s =~ '^.$' ]] && print OK + [[ A${s}B =~ .(.). && $match[1] == $s ]] && print OK + [[ A${s}${s}B =~ A([^[:ascii:]]*)B && $mend[1] == 3 ]] && print OK + unset s +0:Raw IMETA characters in input string +>OK +>OK +>OK + [[ foo =~ f.+ ]] ; print $? [[ foo =~ x.+ ]] ; print $? [[ ! foo =~ f.+ ]] ; print $? -- cgit 1.4.1