%prep # Find a UTF-8 locale. setopt multibyte mb_ok= langs=(en_US.UTF-8 en_GB.UTF-8 en.UTF-8 $(locale -a 2>/dev/null | sed -e 's/utf8/UTF-8/' | grep UTF-8)) for LANG in $langs; do if [[ é = ? ]]; then mb_ok=1 break; fi done if [[ -z $mb_ok ]]; then ZTST_unimplemented="no UTF-8 locale or multibyte mode is not implemented" else print Testing multibyte with locale $LANG >&8 fi %test a=ténébreux for i in {1..9}; do print ${a[i]} for j in {$i..9}; do print $i $j ${a[i,j]} ${a[-j,-i]} done done 0:Basic indexing with multibyte characters >t >1 1 t x >1 2 té ux >1 3 tén eux >1 4 téné reux >1 5 ténéb breux >1 6 ténébr ébreux >1 7 ténébre nébreux >1 8 ténébreu énébreux >1 9 ténébreux ténébreux >é >2 2 é u >2 3 én eu >2 4 éné reu >2 5 énéb breu >2 6 énébr ébreu >2 7 énébre nébreu >2 8 énébreu énébreu >2 9 énébreux ténébreu >n >3 3 n e >3 4 né re >3 5 néb bre >3 6 nébr ébre >3 7 nébre nébre >3 8 nébreu énébre >3 9 nébreux ténébre >é >4 4 é r >4 5 éb br >4 6 ébr ébr >4 7 ébre nébr >4 8 ébreu énébr >4 9 ébreux ténébr >b >5 5 b b >5 6 br éb >5 7 bre néb >5 8 breu énéb >5 9 breux ténéb >r >6 6 r é >6 7 re né >6 8 reu éné >6 9 reux téné >e >7 7 e n >7 8 eu én >7 9 eux tén >u >8 8 u é >8 9 ux té >x >9 9 x t s=é print A${s[-2]}A B${s[-1]}B C${s[0]}C D${s[1]}D E${s[2]}E 0:Out of range subscripts with multibyte characters >AA BéB CéC DéD EE print ${a[(i)é]} ${a[(I)é]} ${a[${a[(i)é]},${a[(I)é]}]} 0:Reverse indexing with multibyte characters >2 4 éné print ${a[(r)én,(r)éb]} 0:Subscript searching with multibyte characters >énéb print ${a[(rb:1:)é,-1]} print ${a[(rb:2:)é,-1]} print ${a[(rb:3:)é,-1]} print ${a[(rb:4:)é,-1]} print ${a[(rb:5:)é,-1]} 0:Subscript searching with initial offset >énébreux >énébreux >ébreux >ébreux > print ${a[(rn:1:)é,-1]} print ${a[(rn:2:)é,-1]} print ${a[(rn:3:)é,-1]} 0:Subscript searching with count >énébreux >ébreux > print ${a[(R)én,(R)éb]} 0:Backward subscript searching with multibyte characters >énéb # Starting offsets with (R) seem to be so strange as to be hardly # worth testing. setopt extendedglob [[ $a = (#b)t(én)(éb)reux ]] || print "Failed to match." >&2 for i in {1..${#match}}; do print $match[i] $mbegin[i] $mend[i] ${a[$mbegin[i],$mend[i]]} done 0:Multibyte offsets in pattern tests >én 2 3 én >éb 4 5 éb b=${(U)a} print $b print ${(L)b} desdichado="Je suis le $a, le veuf, l'inconsolé" print ${(C)desdichado} lxiv="l'état c'est moi" print ${(C)lxiv} 0:Case modification of multibyte strings >TÉNÉBREUX >ténébreux >Je Suis Le Ténébreux, Le Veuf, L'Inconsolé >L'État C'Est Moi array=(ølaf ødd øpened án encyclopædia) barray=(${(U)array}) print $barray print ${(L)barray} print ${(C)array} print ${(C)barray} 0:Case modification of arrays with multibyte strings >ØLAF ØDD ØPENED ÁN ENCYCLOPÆDIA >ølaf ødd øpened án encyclopædia >Ølaf Ødd Øpened Án Encyclopædia >Ølaf Ødd Øpened Án Encyclopædia print $(( ##¥ )) pound=£ print $(( #pound )) alpha=α print $(( ##α )) $(( #alpha )) 0:Conversion to Unicode in mathematical expressions >165 >163 >945 945 unsetopt posix_identifiers expr='hähä=3 || exit 1; print $hähä' eval $expr setopt posix_identifiers (eval $expr) 1:POSIX_IDENTIFIERS option >3 ?(eval):1: command not found: hähä=3 foo="Ølaf«Ødd«øpénëd«ån«àpple" print -l ${(s.«.)foo} ioh="Ἐν ἀρχῇ ἦν ὁ λόγος, καὶ ὁ λόγος ἦν πρὸς τὸν θεόν, καὶ θεὸς ἦν ὁ λόγος." print -l ${=ioh} print ${(w)#ioh} 0:Splitting with multibyte characters >Ølaf >Ødd >øpénëd >ån >àpple >Ἐν >ἀρχῇ >ἦν >ὁ >λόγος, >καὶ >ὁ >λόγος >ἦν >πρὸς >τὸν >θεόν, >καὶ >θεὸς >ἦν >ὁ >λόγος. >17 read -d £ one read -d £ two print $one print $two 0:read with multibyte delimiter first >second (IFS=« read -d » -A array print -l $array) 0:read -A with multibyte IFS dominus >illuminatio >mea read -k2 -u0 twochars print $twochars 0:read multibyte characters <«»ignored >«»