about summary refs log tree commit diff
path: root/Test/V07pcre.ztst
blob: ab67f3d8084cd1ddc89519a986235ee925746cfc (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
%prep

  if grep '^name=zsh/pcre .* link=no ' $ZTST_testdir/../config.modules >/dev/null
  then
    ZTST_unimplemented="the zsh/pcre module was disabled by configure (see config.modules)"
    return 0
  fi
  zmodload zsh/pcre
  setopt rematch_pcre
# Find a UTF-8 locale.
  setopt multibyte
# Don't let LC_* override our choice of locale.
  unset -m LC_\*
  mb_ok=
  langs=(en_{US,GB}.{UTF-,utf}8 en.UTF-8
	 $(locale -a 2>/dev/null | egrep 'utf8|UTF-8'))
  for LANG in $langs; do
    if [[ é = ? ]]; then
      mb_ok=1
      break;
    fi
  done
  if [[ -z $mb_ok ]]; then
    ZTST_unimplemented="no UTF-8 locale or multibyte mode is not implemented"
  else
    print -u $ZTST_fd Testing PCRE multibyte with locale $LANG
    mkdir multibyte.tmp && cd multibyte.tmp
  fi

%test

  [[ 'foo→bar' =~ .([^[:ascii:]]). ]]
  print $MATCH
  print $match[1]
0:Basic non-ASCII regexp matching
>o→b
>→

  unset match mend
  s=$'\u00a0'
  [[ $s =~ '^.$' ]] && print OK
  [[ A${s}B =~ .(.). && $match[1] == $s ]] && print OK
  [[ A${s}${s}B =~ A([^[:ascii:]]*)B && $mend[1] == 3 ]] && print OK
  unset s
0:Raw IMETA characters in input string
>OK
>OK
>OK

  [[ foo =~ f.+ ]] ; print $?
  [[ foo =~ x.+ ]] ; print $?
  [[ ! foo =~ f.+ ]] ; print $?
  [[ ! foo =~ x.+ ]] ; print $?
  [[ foo =~ f.+ && bar =~ b.+ ]] ; print $?
  [[ foo =~ x.+ && bar =~ b.+ ]] ; print $?
  [[ foo =~ f.+ && bar =~ x.+ ]] ; print $?
  [[ ! foo =~ f.+ && bar =~ b.+ ]] ; print $?
  [[ foo =~ f.+ && ! bar =~ b.+ ]] ; print $?
  [[ ! ( foo =~ f.+ && bar =~ b.+ ) ]] ; print $?
  [[ ! foo =~ x.+ && bar =~ b.+ ]] ; print $?
  [[ foo =~ x.+ && ! bar =~ b.+ ]] ; print $?
  [[ ! ( foo =~ x.+ && bar =~ b.+ ) ]] ; print $?
0:Regex result inversion detection
>0
>1
>1
>0
>0
>1
>1
>1
>1
>1
>0
>1
>0

# Note that PCRE_ANCHORED only means anchored at the start
# Also note that we don't unset MATCH/match on failed match (and it's an
# open issue as to whether or not we should)
  pcre_compile '.(→.)'
  pcre_match foo→bar
  print $? $MATCH $match ; unset MATCH match
  pcre_match foo.bar
  print $? $MATCH $match ; unset MATCH match
  pcre_match foo†bar
  print $? $MATCH $match ; unset MATCH match
  pcre_match foo→†ar
  print $? $MATCH $match ; unset MATCH match
  pcre_study
  pcre_match foo→bar
  print $? $MATCH $match ; unset MATCH match
  pcre_compile -a '.(→.)'
  pcre_match foo→bar
  print $? $MATCH $match ; unset MATCH match
  pcre_match o→bar
  print $? $MATCH $match ; unset MATCH match
  pcre_match o→b
  print $? $MATCH $match ; unset MATCH match
  pcre_compile 'x.(→.)'
  pcre_match xo→t
  print $? $MATCH $match ; unset MATCH match
  pcre_match Xo→t
  print $? $MATCH $match ; unset MATCH match
  pcre_compile -i 'x.(→.)'
  pcre_match xo→t
  print $? $MATCH $match ; unset MATCH match
  pcre_match Xo→t
  print $? $MATCH $match ; unset MATCH match
0:pcre_compile interface testing: basic, anchored & case-insensitive
>0 o→b →b
>1
>1
>0 o→† →†
>0 o→b →b
>1
>0 o→b →b
>0 o→b →b
>0 xo→t →t
>1
>0 xo→t →t
>0 Xo→t →t

  string="The following zip codes: 78884 90210 99513"
  pcre_compile -m "\d{5}"
  pcre_match -b -- $string && print "$MATCH; ZPCRE_OP: $ZPCRE_OP"
  pcre_match -b -n $ZPCRE_OP[(w)2] -- $string || print failed
  print "$MATCH; ZPCRE_OP: $ZPCRE_OP"
0:pcre_match -b and pcre_match -n
>78884; ZPCRE_OP: 25 30
>90210; ZPCRE_OP: 31 36

# Embedded NULs allowed in plaintext, but not in RE (although \0 as two-chars allowed)
  [[ $'a\0bc\0d' =~ '^(a\0.)(.+)$' ]]
  print "${#MATCH}; ${#match[1]}; ${#match[2]}"
0:ensure ASCII NUL passes in and out of matched plaintext
>6; 3; 3

# Ensure the long-form infix operator works
  [[ foo -pcre-match ^f..$ ]]
  print $?
  [[ foo -pcre-match ^g..$ ]]
  print $?
  [[ ! foo -pcre-match ^g..$ ]]
  print $?
0:infix -pcre-match works
>0
>1
>0

# Bash mode; note zsh documents that variables not updated on match failure,
# which remains different from bash
  setopt bash_rematch
  [[ "goo" -pcre-match ^f.+$ ]] ; print $?
  [[ "foo" -pcre-match ^f.+$ ]] ; print -l $? _${^BASH_REMATCH[@]}
  [[ "foot" -pcre-match ^f([aeiou]+)(.)$ ]]; print -l $? _${^BASH_REMATCH[@]}
  [[ "foo" -pcre-match ^f.+$ ]] ; print -l $? _${^BASH_REMATCH[@]}
  [[ ! "goo" -pcre-match ^f.+$ ]] ; print $?
  unsetopt bash_rematch
0:bash-compatibility works
>1
>0
>_foo
>0
>_foot
>_oo
>_t
>0
>_foo
>0

# Subshell because crash on failure
  ( setopt re_match_pcre
    [[ test.txt =~ '^(.*_)?(test)' ]]
    echo $match[2] )
0:regression for segmentation fault, workers/38307
>test