blob: 9feeb47fb7ac7c21d860fd3639a6c6a46776bb7b (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
|
%prep
if ! zmodload -F zsh/pcre C:pcre-match 2>/dev/null
then
ZTST_unimplemented="the zsh/pcre module is not available"
return 0
fi
# Load the rest of the builtins
zmodload zsh/pcre
setopt rematch_pcre
# Find a UTF-8 locale.
setopt multibyte
# Don't let LC_* override our choice of locale.
unset -m LC_\*
mb_ok=
langs=(en_{US,GB}.{UTF-,utf}8 en.UTF-8
$(locale -a 2>/dev/null | egrep 'utf8|UTF-8'))
for LANG in $langs; do
if [[ é = ? ]]; then
mb_ok=1
break;
fi
done
if [[ -z $mb_ok ]]; then
ZTST_unimplemented="no UTF-8 locale or multibyte mode is not implemented"
else
print -u $ZTST_fd Testing PCRE multibyte with locale $LANG
mkdir multibyte.tmp && cd multibyte.tmp
fi
%test
[[ 'foo→bar' =~ .([^[:ascii:]]). ]]
print $MATCH
print $match[1]
0:Basic non-ASCII regexp matching
>o→b
>→
unset match mend
s=$'\u00a0'
[[ $s =~ '^.$' ]] && print OK
[[ A${s}B =~ .(.). && $match[1] == $s ]] && print OK
[[ A${s}${s}B =~ A([^[:ascii:]]*)B && $mend[1] == 3 ]] && print OK
unset s
0:Raw IMETA characters in input string
>OK
>OK
>OK
[[ foo =~ f.+ ]] ; print $?
[[ foo =~ x.+ ]] ; print $?
[[ ! foo =~ f.+ ]] ; print $?
[[ ! foo =~ x.+ ]] ; print $?
[[ foo =~ f.+ && bar =~ b.+ ]] ; print $?
[[ foo =~ x.+ && bar =~ b.+ ]] ; print $?
[[ foo =~ f.+ && bar =~ x.+ ]] ; print $?
[[ ! foo =~ f.+ && bar =~ b.+ ]] ; print $?
[[ foo =~ f.+ && ! bar =~ b.+ ]] ; print $?
[[ ! ( foo =~ f.+ && bar =~ b.+ ) ]] ; print $?
[[ ! foo =~ x.+ && bar =~ b.+ ]] ; print $?
[[ foo =~ x.+ && ! bar =~ b.+ ]] ; print $?
[[ ! ( foo =~ x.+ && bar =~ b.+ ) ]] ; print $?
0:Regex result inversion detection
>0
>1
>1
>0
>0
>1
>1
>1
>1
>1
>0
>1
>0
# Note that PCRE_ANCHORED only means anchored at the start
# Also note that we don't unset MATCH/match on failed match (and it's an
# open issue as to whether or not we should)
pcre_compile '.(→.)'
pcre_match foo→bar
print $? $MATCH $match ; unset MATCH match
pcre_match foo.bar
print $? $MATCH $match ; unset MATCH match
pcre_match foo†bar
print $? $MATCH $match ; unset MATCH match
pcre_match foo→†ar
print $? $MATCH $match ; unset MATCH match
pcre_study
pcre_match foo→bar
print $? $MATCH $match ; unset MATCH match
pcre_compile -a '.(→.)'
pcre_match foo→bar
print $? $MATCH $match ; unset MATCH match
pcre_match o→bar
print $? $MATCH $match ; unset MATCH match
pcre_match o→b
print $? $MATCH $match ; unset MATCH match
pcre_compile 'x.(→.)'
pcre_match xo→t
print $? $MATCH $match ; unset MATCH match
pcre_match Xo→t
print $? $MATCH $match ; unset MATCH match
pcre_compile -i 'x.(→.)'
pcre_match xo→t
print $? $MATCH $match ; unset MATCH match
pcre_match Xo→t
print $? $MATCH $match ; unset MATCH match
0:pcre_compile interface testing: basic, anchored & case-insensitive
>0 o→b →b
>1
>1
>0 o→† →†
>0 o→b →b
>1
>0 o→b →b
>0 o→b →b
>0 xo→t →t
>1
>0 xo→t →t
>0 Xo→t →t
string="The following zip codes: 78884 90210 99513"
pcre_compile -m "\d{5}"
pcre_match -b -- $string && print "$MATCH; ZPCRE_OP: $ZPCRE_OP"
pcre_match -b -n $ZPCRE_OP[(w)2] -- $string || print failed
print "$MATCH; ZPCRE_OP: $ZPCRE_OP"
0:pcre_match -b and pcre_match -n
>78884; ZPCRE_OP: 25 30
>90210; ZPCRE_OP: 31 36
# Embedded NULs allowed in plaintext, but not in RE (although \0 as two-chars allowed)
[[ $'a\0bc\0d' =~ '^(a\0.)(.+)$' ]]
print "${#MATCH}; ${#match[1]}; ${#match[2]}"
0:ensure ASCII NUL passes in and out of matched plaintext
>6; 3; 3
# Ensure the long-form infix operator works
[[ foo -pcre-match ^f..$ ]]
print $?
[[ foo -pcre-match ^g..$ ]]
print $?
[[ ! foo -pcre-match ^g..$ ]]
print $?
0:infix -pcre-match works
>0
>1
>0
# Bash mode; note zsh documents that variables not updated on match failure,
# which remains different from bash
setopt bash_rematch
[[ "goo" -pcre-match ^f.+$ ]] ; print $?
[[ "foo" -pcre-match ^f.+$ ]] ; print -l $? _${^BASH_REMATCH[@]}
[[ "foot" -pcre-match ^f([aeiou]+)(.)$ ]]; print -l $? _${^BASH_REMATCH[@]}
[[ "foo" -pcre-match ^f.+$ ]] ; print -l $? _${^BASH_REMATCH[@]}
[[ ! "goo" -pcre-match ^f.+$ ]] ; print $?
unsetopt bash_rematch
0:bash-compatibility works
>1
>0
>_foo
>0
>_foot
>_oo
>_t
>0
>_foo
>0
# Subshell because crash on failure
( setopt re_match_pcre
[[ test.txt =~ '^(.*_)?(test)' ]]
echo $match[2] )
0:regression for segmentation fault, workers/38307
>test
|