diff options
author | Ulrich Drepper <drepper@redhat.com> | 2000-05-24 20:22:51 +0000 |
---|---|---|
committer | Ulrich Drepper <drepper@redhat.com> | 2000-05-24 20:22:51 +0000 |
commit | acb5ee2e561276d64c6e26ef4b82f59a4db5ae90 (patch) | |
tree | 1f7ebfcaf8bf2874ae5cdb6348205dccfd9499c2 | |
parent | b7cbee1cb029f6471aa069552a69f04a3d1b4d70 (diff) | |
download | glibc-acb5ee2e561276d64c6e26ef4b82f59a4db5ae90.tar.gz glibc-acb5ee2e561276d64c6e26ef4b82f59a4db5ae90.tar.xz glibc-acb5ee2e561276d64c6e26ef4b82f59a4db5ae90.zip |
Update.
2000-05-24 Ulrich Drepper <drepper@redhat.com> * locale/programs/ld-collate.c (struct element_t): Add mbseqorder and wcseqorder members. (struct locale_collate_t): Likewise. (collate_finish): Assign collation sequence value to each character. Create tables for output. (collate_output): Write out tables with collation sequence information. * locale/C-collate.c: Provide C locale data for collation sequence table. * locale/langinfo.h: Add _NL_COLLATE_COLLSEQMB and _NL_COLLATE_COLLSEQWC. * locale/categories.def: Add entries for _NL_COLLATE_COLLSEQMB and _NL_COLLATE_COLLSEQWC. * posix/fnmatch.c: Define SUFFIX and WIDE_CHAR_VERSION before include fnmatch_loop.c. * posix/fnmatch_loop.c: Don't use strcoll while determining whether character is matched by range expression. Use collation sequence table. Outside glibc fall back on simple character value comparison.
-rw-r--r-- | ChangeLog | 20 | ||||
-rw-r--r-- | locale/C-collate.c | 82 | ||||
-rw-r--r-- | locale/categories.def | 2 | ||||
-rw-r--r-- | locale/langinfo.h | 2 | ||||
-rw-r--r-- | localedata/ChangeLog | 4 | ||||
-rw-r--r-- | localedata/locales/iso14651_t1 | 1406 | ||||
-rw-r--r-- | posix/fnmatch.c | 15 | ||||
-rw-r--r-- | posix/fnmatch_loop.c | 135 |
8 files changed, 1644 insertions, 22 deletions
diff --git a/ChangeLog b/ChangeLog index 9440cba153..5ce40be794 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,23 @@ +2000-05-24 Ulrich Drepper <drepper@redhat.com> + + * locale/programs/ld-collate.c (struct element_t): Add mbseqorder + and wcseqorder members. + (struct locale_collate_t): Likewise. + (collate_finish): Assign collation sequence value to each character. + Create tables for output. + (collate_output): Write out tables with collation sequence information. + * locale/C-collate.c: Provide C locale data for collation sequence + table. + * locale/langinfo.h: Add _NL_COLLATE_COLLSEQMB and + _NL_COLLATE_COLLSEQWC. + * locale/categories.def: Add entries for _NL_COLLATE_COLLSEQMB and + _NL_COLLATE_COLLSEQWC. + * posix/fnmatch.c: Define SUFFIX and WIDE_CHAR_VERSION before + include fnmatch_loop.c. + * posix/fnmatch_loop.c: Don't use strcoll while determining whether + character is matched by range expression. Use collation sequence + table. Outside glibc fall back on simple character value comparison. + 2000-05-24 Andreas Jaeger <aj@suse.de> * sysdeps/mips/elf/start.S (ENTRY_POINT): Align stack for double diff --git a/locale/C-collate.c b/locale/C-collate.c index 679ed30871..0ad0efe271 100644 --- a/locale/C-collate.c +++ b/locale/C-collate.c @@ -1,4 +1,4 @@ -/* Copyright (C) 1995, 1996, 1997, 1999 Free Software Foundation, Inc. +/* Copyright (C) 1995, 1996, 1997, 1999, 2000 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1995. @@ -20,12 +20,84 @@ #include <endian.h> #include "localeinfo.h" +static const char collseqmb[] = +{ + '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07', + '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f', + '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17', + '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f', + '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27', + '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f', + '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37', + '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f', + '\x40', '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47', + '\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f', + '\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57', + '\x58', '\x59', '\x5a', '\x5b', '\x5c', '\x5d', '\x5e', '\x5f', + '\x60', '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67', + '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f', + '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77', + '\x78', '\x79', '\x7a', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f', + '\x80', '\x81', '\x82', '\x83', '\x84', '\x85', '\x86', '\x87', + '\x88', '\x89', '\x8a', '\x8b', '\x8c', '\x8d', '\x8e', '\x8f', + '\x90', '\x91', '\x92', '\x93', '\x94', '\x95', '\x96', '\x97', + '\x98', '\x99', '\x9a', '\x9b', '\x9c', '\x9d', '\x9e', '\x9f', + '\xa0', '\xa1', '\xa2', '\xa3', '\xa4', '\xa5', '\xa6', '\xa7', + '\xa8', '\xa9', '\xaa', '\xab', '\xac', '\xad', '\xae', '\xaf', + '\xb0', '\xb1', '\xb2', '\xb3', '\xb4', '\xb5', '\xb6', '\xb7', + '\xb8', '\xb9', '\xba', '\xbb', '\xbc', '\xbd', '\xbe', '\xbf', + '\xc0', '\xc1', '\xc2', '\xc3', '\xc4', '\xc5', '\xc6', '\xc7', + '\xc8', '\xc9', '\xca', '\xcb', '\xcc', '\xcd', '\xce', '\xcf', + '\xd0', '\xd1', '\xd2', '\xd3', '\xd4', '\xd5', '\xd6', '\xd7', + '\xd8', '\xd9', '\xda', '\xdb', '\xdc', '\xdd', '\xde', '\xdf', + '\xe0', '\xe1', '\xe2', '\xe3', '\xe4', '\xe5', '\xe6', '\xe7', + '\xe8', '\xe9', '\xea', '\xeb', '\xec', '\xed', '\xee', '\xef', + '\xf0', '\xf1', '\xf2', '\xf3', '\xf4', '\xf5', '\xf6', '\xf7', + '\xf8', '\xf9', '\xfa', '\xfb', '\xfc', '\xfd', '\xfe', '\xff' +}; + +static const uint32_t collseqwc[] = +{ + L'\x00', L'\x01', L'\x02', L'\x03', L'\x04', L'\x05', L'\x06', L'\x07', + L'\x08', L'\x09', L'\x0a', L'\x0b', L'\x0c', L'\x0d', L'\x0e', L'\x0f', + L'\x10', L'\x11', L'\x12', L'\x13', L'\x14', L'\x15', L'\x16', L'\x17', + L'\x18', L'\x19', L'\x1a', L'\x1b', L'\x1c', L'\x1d', L'\x1e', L'\x1f', + L'\x20', L'\x21', L'\x22', L'\x23', L'\x24', L'\x25', L'\x26', L'\x27', + L'\x28', L'\x29', L'\x2a', L'\x2b', L'\x2c', L'\x2d', L'\x2e', L'\x2f', + L'\x30', L'\x31', L'\x32', L'\x33', L'\x34', L'\x35', L'\x36', L'\x37', + L'\x38', L'\x39', L'\x3a', L'\x3b', L'\x3c', L'\x3d', L'\x3e', L'\x3f', + L'\x40', L'\x41', L'\x42', L'\x43', L'\x44', L'\x45', L'\x46', L'\x47', + L'\x48', L'\x49', L'\x4a', L'\x4b', L'\x4c', L'\x4d', L'\x4e', L'\x4f', + L'\x50', L'\x51', L'\x52', L'\x53', L'\x54', L'\x55', L'\x56', L'\x57', + L'\x58', L'\x59', L'\x5a', L'\x5b', L'\x5c', L'\x5d', L'\x5e', L'\x5f', + L'\x60', L'\x61', L'\x62', L'\x63', L'\x64', L'\x65', L'\x66', L'\x67', + L'\x68', L'\x69', L'\x6a', L'\x6b', L'\x6c', L'\x6d', L'\x6e', L'\x6f', + L'\x70', L'\x71', L'\x72', L'\x73', L'\x74', L'\x75', L'\x76', L'\x77', + L'\x78', L'\x79', L'\x7a', L'\x7b', L'\x7c', L'\x7d', L'\x7e', L'\x7f', + L'\x80', L'\x81', L'\x82', L'\x83', L'\x84', L'\x85', L'\x86', L'\x87', + L'\x88', L'\x89', L'\x8a', L'\x8b', L'\x8c', L'\x8d', L'\x8e', L'\x8f', + L'\x90', L'\x91', L'\x92', L'\x93', L'\x94', L'\x95', L'\x96', L'\x97', + L'\x98', L'\x99', L'\x9a', L'\x9b', L'\x9c', L'\x9d', L'\x9e', L'\x9f', + L'\xa0', L'\xa1', L'\xa2', L'\xa3', L'\xa4', L'\xa5', L'\xa6', L'\xa7', + L'\xa8', L'\xa9', L'\xaa', L'\xab', L'\xac', L'\xad', L'\xae', L'\xaf', + L'\xb0', L'\xb1', L'\xb2', L'\xb3', L'\xb4', L'\xb5', L'\xb6', L'\xb7', + L'\xb8', L'\xb9', L'\xba', L'\xbb', L'\xbc', L'\xbd', L'\xbe', L'\xbf', + L'\xc0', L'\xc1', L'\xc2', L'\xc3', L'\xc4', L'\xc5', L'\xc6', L'\xc7', + L'\xc8', L'\xc9', L'\xca', L'\xcb', L'\xcc', L'\xcd', L'\xce', L'\xcf', + L'\xd0', L'\xd1', L'\xd2', L'\xd3', L'\xd4', L'\xd5', L'\xd6', L'\xd7', + L'\xd8', L'\xd9', L'\xda', L'\xdb', L'\xdc', L'\xdd', L'\xde', L'\xdf', + L'\xe0', L'\xe1', L'\xe2', L'\xe3', L'\xe4', L'\xe5', L'\xe6', L'\xe7', + L'\xe8', L'\xe9', L'\xea', L'\xeb', L'\xec', L'\xed', L'\xee', L'\xef', + L'\xf0', L'\xf1', L'\xf2', L'\xf3', L'\xf4', L'\xf5', L'\xf6', L'\xf7', + L'\xf8', L'\xf9', L'\xfa', L'\xfb', L'\xfc', L'\xfd', L'\xfe', L'\xff' +}; + const struct locale_data _nl_C_LC_COLLATE = { _nl_C_name, NULL, 0, 0, /* no file mapped */ UNDELETABLE, - 16, + 18, { { word: 0 }, { string: NULL }, @@ -40,8 +112,10 @@ const struct locale_data _nl_C_LC_COLLATE = { string: NULL }, { string: NULL }, { string: NULL }, - { word: 0 }, { string: NULL }, - { string: NULL } + { string: NULL }, + { string: NULL }, + { string: collseqmb }, + { wstr: collseqwc } } }; diff --git a/locale/categories.def b/locale/categories.def index b02c1cac58..19e06879fd 100644 --- a/locale/categories.def +++ b/locale/categories.def @@ -58,6 +58,8 @@ DEFINE_CATEGORY DEFINE_ELEMENT (_NL_COLLATE_SYMB_HASH_SIZEMB, "collate-symb-hash-sizemb", std, word) DEFINE_ELEMENT (_NL_COLLATE_SYMB_TABLEMB, "collate-symb-tablemb", std, string) DEFINE_ELEMENT (_NL_COLLATE_SYMB_EXTRAMB, "collate-symb-extramb", std, string) + DEFINE_ELEMENT (_NL_COLLATE_COLLSEQMB, "collate-collseqmb", std, string) + DEFINE_ELEMENT (_NL_COLLATE_COLLSEQWC, "collate-collseqwc", std, string) ), NO_POSTLOAD) diff --git a/locale/langinfo.h b/locale/langinfo.h index 7d1183434a..b5ccac6b88 100644 --- a/locale/langinfo.h +++ b/locale/langinfo.h @@ -248,6 +248,8 @@ enum _NL_COLLATE_SYMB_HASH_SIZEMB, _NL_COLLATE_SYMB_TABLEMB, _NL_COLLATE_SYMB_EXTRAMB, + _NL_COLLATE_COLLSEQMB, + _NL_COLLATE_COLLSEQWC, _NL_NUM_LC_COLLATE, /* LC_CTYPE category: character classification. diff --git a/localedata/ChangeLog b/localedata/ChangeLog index e59ba8317a..3f40616dd3 100644 --- a/localedata/ChangeLog +++ b/localedata/ChangeLog @@ -1,3 +1,7 @@ +2000-05-24 Ulrich Drepper <drepper@redhat.com> + + * locales/iso14651_t1: New file. + 2000-05-15 Andreas Jaeger <aj@suse.de> * tst-fmon.data: Change testcase following fixes for diff --git a/localedata/locales/iso14651_t1 b/localedata/locales/iso14651_t1 new file mode 100644 index 0000000000..0402a1f510 --- /dev/null +++ b/localedata/locales/iso14651_t1 @@ -0,0 +1,1406 @@ +LC_COLLATE + +COLL_WEIGHT_MAX=4 + +# Déclaration des systèmes d'écriture / Declaration of scripts +script <SPECIAL> +script <LATIN> +script <ARABINT> +script <ARABFOR> +script <HEBREU> +script <GREC> +script <CYRIL> +script <HAN> + +# Déclaration des symboles internes / Declaration of internal symbols +# +# SYMB N° Expl. +# +collating-symbol <RES-1> +# +# <ARABINT>/<ARABFOR> +# +# +collating-symbol <ANO> # 2 normal --> voir/see <MIN> +collating-symbol <AIS> # 3 isol. +collating-symbol <AFI> # 4 final +collating-symbol <AII> # 5 initial +collating-symbol <AME> # 6 medial/m<e'>dian +# +collating-symbol <MIN> # 7 minuscule/minuscule (bas de casse/lower case) +collating-symbol <IMI> # 8 inférieur min./subscript min. (indice/index) +collating-symbol <EMI> # 9 supér. min./superscript min. (exposant/exponent) +collating-symbol <CAP> # 10 capitale/capital (haut de casse/upper case) +collating-symbol <AMI> # 8 minuscule grecque/Greek lower case +collating-symbol <ICA> # 11 inférieur en capitale/subscript capital +collating-symbol <ECA> # 12 supérieur en capitale/superscript capital +# +# <ARABINT>/<ARABFOR> +# +collating-symbol <AMA> # 13 accent madda +collating-symbol <AHA> # 14 accent hamza +collating-symbol <AHW> # 14-1 accent hamza/waw +collating-symbol <AHS> # 14-2 accent hamza under / hamza souscrit +collating-symbol <AYE> # 14-3 accent under yeh / accent souscrit du ya' +collating-symbol <YBA> # 14-4 accent hamza/yeh barree +# +collating-symbol <BAS> # 15 de base/basic (non accentué/non-accented) +# +collating-symbol <PCL> # 16 particulier/peculiar +collating-symbol <LIG> # 17 ligature/ligature +collating-symbol <ACA> # 18 accent aigu/acute accent +collating-symbol <GRA> # 20 accent grave/grave accent +collating-symbol <BRE> # 21 brève/breve +collating-symbol <CIR> # 22 accent circonflexe/circumflex accent +collating-symbol <CAR> # 23 caron/caron +collating-symbol <RNE> # 24 rond supérieur/ring above +collating-symbol <REU> # 25 tréma/diaeresis (ou/or umlaut) +collating-symbol <DAC> # 26 double ac. aigu/double acute ac. +collating-symbol <TIL> # 27 tilde/tilde +collating-symbol <PCT> # 28 point/dot +collating-symbol <OBL> # 29 barre oblique/oblique +collating-symbol <CDI> # 30 cédille/cedilla +collating-symbol <OGO> # 31 ogonek/ogonek +collating-symbol <MAC> # 32 macron/macron +# +# GREC +# +collating-symbol <TNS> # accent aigu/tonos/acute accent +collating-symbol <DLT> # tr<e'>ma/dialytica/diaeresis +collating-symbol <DTT> # dialytika tonos +# +collating-symbol <0> +collating-symbol <1> +collating-symbol <2> +collating-symbol <3> +collating-symbol <4> +collating-symbol <5> +collating-symbol <6> +collating-symbol <7> +collating-symbol <8> +collating-symbol <9> +# +collating-symbol <a> +collating-symbol <b> +collating-symbol <c> +collating-symbol <d> +collating-symbol <e> +collating-symbol <f> +collating-symbol <g> +collating-symbol <h> +collating-symbol <i> +collating-symbol <j> +collating-symbol <k> +collating-symbol <l> +collating-symbol <m> +collating-symbol <n> +collating-symbol <o> +collating-symbol <p> +collating-symbol <q> +collating-symbol <r> +collating-symbol <s> +collating-symbol <t> +collating-symbol <u> +collating-symbol <v> +collating-symbol <w> +collating-symbol <x> +collating-symbol <y> +collating-symbol <z> +# +# <ARABINT>/<ARABFOR> +# +collating-symbol <hamza> +collating-symbol <alef> +collating-symbol <beh> +collating-symbol <peh> +collating-symbol <teh_marbuta> +collating-symbol <teh> +collating-symbol <tteh> +collating-symbol <theh> +collating-symbol <jeem> +collating-symbol <tcheh> +collating-symbol <hah> +collating-symbol <khah> +collating-symbol <dal> +collating-symbol <ddal> +collating-symbol <thal> +collating-symbol <reh> +collating-symbol <rreh> +collating-symbol <zain> +collating-symbol <jeh> +collating-symbol <seen> +collating-symbol <sheen> +collating-symbol <sad> +collating-symbol <dad> +collating-symbol <tah> +collating-symbol <zah> +collating-symbol <ain> +collating-symbol <ghain> +collating-symbol <feh> +collating-symbol <qaf> +collating-symbol <kaf> +collating-symbol <keheh> +collating-symbol <gaf> +collating-symbol <lam> +collating-symbol <meem> +collating-symbol <noon> +collating-symbol <noon_ghunna> +collating-symbol <heh> +collating-symbol <heh_yeh> +collating-symbol <waw> +collating-symbol <alef_maksura> +collating-symbol <yeh_barree> +# +# <HEBREU> +# +collating-symbol <alef> +collating-symbol <bet> +collating-symbol <gimel> +collating-symbol <dalet> +collating-symbol <he> +collating-symbol <vav> +collating-symbol <zayin> +collating-symbol <het> +collating-symbol <tet> +collating-symbol <yod> +collating-symbol <kaf_fin> +collating-symbol <kaf> +collating-symbol <lamed> +collating-symbol <mem_fin> +collating-symbol <mem> +collating-symbol <nun_fin> +collating-symbol <nun> +collating-symbol <samekh> +collating-symbol <ayin> +collating-symbol <pe_fin> +collating-symbol <pe> +collating-symbol <tsad_fin> +collating-symbol <tsadi> +collating-symbol <qof> +collating-symbol <resh> +collating-symbol <shin> +collating-symbol <tav> +# +# GREC +# +collating-symbol <ALPHA> +collating-symbol <BETA> +collating-symbol <GAMMA> +collating-symbol <DELTA> +collating-symbol <EPSILON> +collating-symbol <ZETA> +collating-symbol <ETA> +collating-symbol <THETA> +collating-symbol <IOTA> +collating-symbol <KAPPA> +collating-symbol <LAMBDA> +collating-symbol <MU> +collating-symbol <NU> +collating-symbol <XI> +collating-symbol <OMICRON> +collating-symbol <PI> +collating-symbol <RHO> +collating-symbol <SIGMA> +collating-symbol <TAU> +collating-symbol <UPSILON> +collating-symbol <PHI> +collating-symbol <KHI> +collating-symbol <PSI> +collating-symbol <OMEGA> +# +# CYRIL +# +collating-symbol <CYR-A> +collating-symbol <CYR-BE> +collating-symbol <CYR-VE> +collating-symbol <CYR-GHE> +collating-symbol <CYR-DE> +collating-symbol <CYR-GZHE> +collating-symbol <CYR-DJE> +collating-symbol <CYR-IE> +collating-symbol <UKR-IE> +collating-symbol <CYR-IO> +collating-symbol <CYR-ZHE> +collating-symbol <CYR-ZE> +collating-symbol <CYR-DZE> +collating-symbol <CYR-I> +collating-symbol <UKR-I> +collating-symbol <UKR-YI> +collating-symbol <CYR-IBRE> +collating-symbol <CYR-JE> +collating-symbol <CYR-KA> +collating-symbol <CYR-EL> +collating-symbol <CYR-LJE> +collating-symbol <CYR-EM> +collating-symbol <CYR-EN> +collating-symbol <CYR-NJE> +collating-symbol <CYR-O> +collating-symbol <CYR-PE> +collating-symbol <CYR-ER> +collating-symbol <CYR-ES> +collating-symbol <CYR-TE> +collating-symbol <CYR-KJE> +collating-symbol <CYR-TSHE> +collating-symbol <CYR-OU> +collating-symbol <CYR-OUBRE> +collating-symbol <CYR-EF> +collating-symbol <CYR-HA> +collating-symbol <CYR-TSE> +collating-symbol <CYR-TSHE> +collating-symbol <CYR-DCHE> +collating-symbol <CYR-SHA> +collating-symbol <CYR-SHTSHA> +collating-symbol <CYR-SIGDUR> +collating-symbol <CYR-YEROU> +collating-symbol <CYR-SIGMOUIL> +collating-symbol <CYR-E> +collating-symbol <CYR-YOU> +collating-symbol <CYR-YA> + +# Ordre des symboles internes / Order of internal symbols +# +# SYMB. N° +# +<RES-1> +<MIN> +# forme de base (bas de casse, arabe intrinsèque, +# hébreu intrinsèque, etc. +# basic form (lower case, intrinsic Arabic +# intrinsic Hebrew and so on) +# +# <ARABINT>/<ARABFOR> +# +# +<ANO> # voir +<MIN> +<AIS> # isol. +# 3 +<AFI> # final +# 4 +<AII> # initial +# 5 +<AME> # medial/m<e'>dian +# 6 +# +<IMI> # 7 +<EMI> # 8 +<CAP> # 9 +<ICA> # 10 +<ECA> # 11 +<AMI> +#alternate lower case/ +# 12 +# +#minuscules spéciales après majuscules +# <ARABINT>/<ARABFOR> +# +<AMA> # accent madda #13 +<AHA> # accent hamza #14 +<AHW> # accent hamza/waw #14 1 +<AHS> # accent hamza under / hamza souscrit #14 2 +<AYE> # accent under yeh / accent souscrit du ya' #14 3 +<YBA> # accent hamza/yeh barree #14 4 +# +<BAS> # 15 +# +<PCL> # 16 +<LIG> # 17 +<ACA> # 18 +<GRA> # 19 +<BRE> # 20 +<CIR> # 21 +<CAR> # 22 +<RNE> # 23 +<REU> # 24 +<DAC> # 25 +<TIL> # 26 +<PCT> # 27 +<OBL> # 28 +<CDI> # 29 +<OGO> # 30 +<MAC> # 31 +# +# GREC +# +<TNS> # accent aigu/tonos/acute accent +<DLT> # tr<e'>ma/dialytica/diaeresis +<DTT> # dialytika tonos +# +<0> # 48 +<1> # 49 +<2> # 50 +<3> # 51 +<4> # 52 +<5> # 53 +<6> # 54 +<7> # 55 +<8> # 56 +<9> # 57 +# +<a> # 97 +<b> # 98 +<c> # 99 +<d> # 100 +<e> # 101 +<f> # 102 +<g> # 103 +<h> # 104 +<i> # 105 +<j> # 106 +<k> # 107 +<l> # 108 +<m> # 109 +<n> # 110 +<o> # 111 +<p> # 112 +<q> # 113 +<r> # 114 +<s> # 115 +<t> # 116 +<u> # 117 +<v> # 118 +<w> # 119 +<x> # 120 +<y> # 121 +<z> # 122 +<th># 122b +# +# <ARABINT>/<ARABFOR> +# +<hamza> +<alef> +<beh> +<peh> +<teh_marbuta> +<teh> +<tteh> +<theh> +<jeem> +<tcheh> +<hah> +<khah> +<dal> +<ddal> +<thal> +<reh> +<rreh> +<zain> +<jeh> +<seen> +<sheen> +<sad> +<dad> +<tah> +<zah> +<ain> +<ghain> +<feh> +<qaf> +<kaf> +<keheh> +<gaf> +<lam> +<meem> +<noon> +<noon_ghunna> +<heh> +<heh_yeh> +<waw> +<alef_maksura> +<yeh_barree> +# +# <HEBREU> +# +<alef> +<bet> +<gimel> +<dalet> +<he> +<vav> +<zayin> +<het> +<tet> +<yod> +<kaf_fin> +<kaf> +<lamed> +<mem_fin> +<mem> +<nun_fin> +<nun> +<samekh> +<ayin> +<pe_fin> +<pe> +<tsad_fin> +<tsadi> +<qof> +<resh> +<shin> +<tav> +# +#GREC +# +<ALPHA> +<BETA> +<GAMMA> +<DELTA> +<EPSILON> +<ZETA> +<ETA> +<THETA> +<IOTA> +<KAPPA> +<LAMBDA> +<MU> +<NU> +<XI> +<OMICRON> +<PI> +<RHO> +<SIGMA> +<TAU> +<UPSILON> +<PHI> +<CHI> +<PSI> +<OMEGA> +# +#CYRIL +# +<CYR-A> +<CYR-BE> +<CYR-VE> +<CYR-GHE> +<CYR-DE> +<CYR-GZHE> +<CYR-DJE> +<CYR-IE> +<UKR-IE> +<CYR-IO> +<CYR-ZHE> +<CYR-ZE> +<CYR-DZE> +<CYR-I> +<UKR-I> +<UKR-YI> +<CYR-IBRE> +<CYR-JE> +<CYR-KA> +<CYR-EL> +<CYR-LJE> +<CYR-EM> +<CYR-EN> +<CYR-NJE> +<CYR-O> +<CYR-PE> +<CYR-ER> +<CYR-ES> +<CYR-TE> +<CYR-KJE> +<CYR-TSHE> +<CYR-OU> +<CYR-OUBRE> +<CYR-EF> +<CYR-HA> +<CYR-TSE> +<CYR-TSHE> +<CYR-DCHE> +<CYR-SHA> +<CYR-SHTSHA> +<CYR-SIGDUR> +<CYR-YEROU> +<CYR-SIGMOUIL> +<CYR-E> +<CYR-YOU> +<CYR-YA> + +order_start <SPECIAL>;forward;backward;forward;forward,position +# +# Tout caractère non précisément défini sera considéré comme caractère spécial +# et considéré uniquement au dernier niveau. +# +# Any character not precisely specified will be considered as a special +# character and considered only at the last level. +# <U0000>......<U7FFFFFFF> IGNORE;IGNORE;IGNORE;<U0000>......<U7FFFFFFF> +# +# SYMB. N° GLY +# +<U0020> IGNORE;IGNORE;IGNORE;<U0020> # 32 <SP> +<U005F> IGNORE;IGNORE;IGNORE;<U005F> # 33 _ +<U0332> IGNORE;IGNORE;IGNORE;<U0332> # 34 <"_> +<U00AF> IGNORE;IGNORE;IGNORE;<U00AF> # 35 - (MACRON) +<U00AD> IGNORE;IGNORE;IGNORE;<U00AD> # 36 <SHY> +<U002D> IGNORE;IGNORE;IGNORE;<U002D> # 37 - +<U002C> IGNORE;IGNORE;IGNORE;<U002C> # 38 , +<U003B> IGNORE;IGNORE;IGNORE;<U003B> # 39 ; +<U003A> IGNORE;IGNORE;IGNORE;<U003A> # 40 : +<U0021> IGNORE;IGNORE;IGNORE;<U0021> # 41 ! +<U00A1> IGNORE;IGNORE;IGNORE;<U00A1> # 42 ¡ +<U003F> IGNORE;IGNORE;IGNORE;<U003F> # 43 ? +<U00BF> IGNORE;IGNORE;IGNORE;<U00BF> # 44 ¿ +<U002F> IGNORE;IGNORE;IGNORE;<U002F> # 45 / +<U0338> IGNORE;IGNORE;IGNORE;<U0338> # 46 <"/> +<U002E> IGNORE;IGNORE;IGNORE;<U002E> # 47 . +<U00B7> IGNORE;IGNORE;IGNORE;<U00B7> # 58 × +<U00B8> IGNORE;IGNORE;IGNORE;<U00B8> # 59 ¸ +<U0328> IGNORE;IGNORE;IGNORE;<U0328> # 60 <";> +<U0027> IGNORE;IGNORE;IGNORE;<U0027> # 61 ' +<U2018> IGNORE;IGNORE;IGNORE;<U2018> # 62 <'6> +<U2019> IGNORE;IGNORE;IGNORE;<U2019> # 63 <'9> +<U0022> IGNORE;IGNORE;IGNORE;<U0022> # 64 " +<U201C> IGNORE;IGNORE;IGNORE;<U201C> # 65 <"6> +<U201D> IGNORE;IGNORE;IGNORE;<U201D> # 66 <"9> +<U00AB> IGNORE;IGNORE;IGNORE;<U00AB> # 67 « +<U00BB> IGNORE;IGNORE;IGNORE;<U00BB> # 68 » +<U0028> IGNORE;IGNORE;IGNORE;<U0028> # 69 ( +<U207D> IGNORE;IGNORE;IGNORE;<U207d> # 70 <(S> +<U0029> IGNORE;IGNORE;IGNORE;<U0029> # 71 ) +<U207E> IGNORE;IGNORE;IGNORE;<U207E> # 72 <)S> +<U005B> IGNORE;IGNORE;IGNORE;<U005B> # 73 [ +<U005D> IGNORE;IGNORE;IGNORE;<U005D> # 74 ] +<U007B> IGNORE;IGNORE;IGNORE;<U007B> # 75 { +<U007D> IGNORE;IGNORE;IGNORE;<U007D> # 76 } +<U00A7> IGNORE;IGNORE;IGNORE;<U00A7> # 77 § +<U00B6> IGNORE;IGNORE;IGNORE;<U00B6> # 78 ¶ +<U00A9> IGNORE;IGNORE;IGNORE;<U00A9> # 79 © +<U00AE> IGNORE;IGNORE;IGNORE;<U00AE> # 80 ® +<U2122> IGNORE;IGNORE;IGNORE;<U2122> # 81 <TM> +<U0040> IGNORE;IGNORE;IGNORE;<U0040> # 82 @ +<U00A4> IGNORE;IGNORE;IGNORE;<U00A4> # 83 ¤ +<U00A2> IGNORE;IGNORE;IGNORE;<U00A2> # 84 ¢ +<U0024> IGNORE;IGNORE;IGNORE;<U0024> # 85 $ +<U00A3> IGNORE;IGNORE;IGNORE;<U00A3> # 86 £ +<U00A5> IGNORE;IGNORE;IGNORE;<U00A5> # 87 ¥ +<U002A> IGNORE;IGNORE;IGNORE;<U002A> # 88 * +<U005C> IGNORE;IGNORE;IGNORE;<U005C> # 89 \ +<U0026> IGNORE;IGNORE;IGNORE;<U0026> # 90 & +<U0023> IGNORE;IGNORE;IGNORE;<U0023> # 91 # +<U0025> IGNORE;IGNORE;IGNORE;<U0025> # 92 % +<U207B> IGNORE;IGNORE;IGNORE;<U207D> # 93 <-S> +<U002B> IGNORE;IGNORE;IGNORE;<U002B> # 94 + +<U207A> IGNORE;IGNORE;IGNORE;<U207E> # 95 <+S> +<U00B1> IGNORE;IGNORE;IGNORE;<U00B1> # 96 ± +<U00B4> IGNORE;IGNORE;IGNORE;<0> # 123 ´ +<U0060> IGNORE;IGNORE;IGNORE;<1> # 124 ` +<U0306> IGNORE;IGNORE;IGNORE;<2> # 125 <"(> +<U005E> IGNORE;IGNORE;IGNORE;<3> # 126 ^ +<U030C> IGNORE;IGNORE;IGNORE;<4> # 127 <"<> +<U030A> IGNORE;IGNORE;IGNORE;<5> # 128 <"0> +<U00A8> IGNORE;IGNORE;IGNORE;<6> # 129 ¨ +<U030B> IGNORE;IGNORE;IGNORE;<7> # 130 <""> +<U007E> IGNORE;IGNORE;IGNORE;<8> # 131 ~ +<U0307> IGNORE;IGNORE;IGNORE;<9> # 132 <".> +<U00F7> IGNORE;IGNORE;IGNORE;<a> # 133 ¸ +<U00D7> IGNORE;IGNORE;IGNORE;<b> # 134 ´ +<U2260> IGNORE;IGNORE;IGNORE;<c> # 135 <!=> +<U003C> IGNORE;IGNORE;IGNORE;<d> # 136 < +<U2264> IGNORE;IGNORE;IGNORE;<e> # 137 <=<> +<U003D> IGNORE;IGNORE;IGNORE;<f> # 138 = +<U2265> IGNORE;IGNORE;IGNORE;<g> # 139 </>=> +<U003E> IGNORE;IGNORE;IGNORE;<h> # 140 > +<U00AC> IGNORE;IGNORE;IGNORE;<i> # 141 ¬ +<U007C> IGNORE;IGNORE;IGNORE;<j> # 142 | +<U00A6> IGNORE;IGNORE;IGNORE;<k> # 143 | +<U00B0> IGNORE;IGNORE;IGNORE;<l> # 144 ° +<U00B5> IGNORE;IGNORE;IGNORE;<m> # 145 m +<U2126> IGNORE;IGNORE;IGNORE;<n> # 146 <Om> +<U220E> IGNORE;IGNORE;IGNORE;<o> # 147 <FP> +<U250C> IGNORE;IGNORE;IGNORE;<p> # 148 <_V/>> +<U252C> IGNORE;IGNORE;IGNORE;<q> # 149 <_V-> +<U2510> IGNORE;IGNORE;IGNORE;<r> # 150 <_V<w> +<U251C> IGNORE;IGNORE;IGNORE;<s> # 151 <_!/>> +<U253C> IGNORE;IGNORE;IGNORE;<t> # 152 <_!-> +<U2524> IGNORE;IGNORE;IGNORE;<u> # 153 <_!<> +<U2514> IGNORE;IGNORE;IGNORE;<v> # 154 <_A/>> +<U2534> IGNORE;IGNORE;IGNORE;<w> # 155 <_-A> +<U2518> IGNORE;IGNORE;IGNORE;<x> # 156 <_A<> +<U2502> IGNORE;IGNORE;IGNORE;<y> # 157 <_!> +<U2500> IGNORE;IGNORE;IGNORE;<z> # 158 <_-> # +<U2501> IGNORE;IGNORE;IGNORE;<U2501> # 159 <_=> +<U2190> IGNORE;IGNORE;IGNORE;<U2190> # 160 <<-> +<U2192> IGNORE;IGNORE;IGNORE;<U2192> # 161 <-/>> +<U20D1> IGNORE;IGNORE;IGNORE;<U20D1> # 162 <"7> +<U2191> IGNORE;IGNORE;IGNORE;<U2191> # 163 <-!> +<U2193> IGNORE;IGNORE;IGNORE;<U2193> # 164 <-v> +<U266A> IGNORE;IGNORE;IGNORE;<U266A> # 165 <_d!> +<U2571> IGNORE;IGNORE;IGNORE;<U2571> # 166 <_/>//> +<U2572> IGNORE;IGNORE;IGNORE;<U2572> # 167 <_<\> +<U25E2> IGNORE;IGNORE;IGNORE;<U25E2> # 168 <_./>//> +<U25E3> IGNORE;IGNORE;IGNORE;<U25E3> # 169 <_.<\> # # <ARABINT>/<ARABFOR> # +<U060C> IGNORE;IGNORE;IGNORE;<U060C> +<U061B> IGNORE;IGNORE;IGNORE;<U061B> +<U061F> IGNORE;IGNORE;IGNORE;<U061F> +<U0640> IGNORE;IGNORE;IGNORE;<U0640> +<U066A> IGNORE;IGNORE;IGNORE;<U066A> +<U066B> IGNORE;IGNORE;IGNORE;<U066B> +<U066C> IGNORE;IGNORE;IGNORE;<U066C> +<U066D> IGNORE;IGNORE;IGNORE;<U066D> +<U064B> IGNORE;IGNORE;IGNORE;<U064B> #<fathatan_no> +<UFE70> IGNORE;IGNORE;IGNORE;<UFE70> #<fathatan_is> +<UFE71> IGNORE;IGNORE;IGNORE;<UFE71> #<fathatan_me> +<U064C> IGNORE;IGNORE;IGNORE;<U064C> #<dammatan_no> +<UFE72> IGNORE;IGNORE;IGNORE;<UFE72> #<dammatan_is> +<U064D> IGNORE;IGNORE;IGNORE;<U064D> #<kasratan_no> +<UFE74> IGNORE;IGNORE;IGNORE;<UFE74> #<kasratan_is> +<U064E> IGNORE;IGNORE;IGNORE;<U064E> #<fatha_no> +<UFE76> IGNORE;IGNORE;IGNORE;<UFE76> #<fatha_is> +<UFE77> IGNORE;IGNORE;IGNORE;<UFE77> #<fatha_me> +<U064F> IGNORE;IGNORE;IGNORE;<U064F> #<damma_no> +<UFE78> IGNORE;IGNORE;IGNORE;<UFE78> #<damma_is> +<UFE79> IGNORE;IGNORE;IGNORE;<UFE79> #<damma_me> +<U0650> IGNORE;IGNORE;IGNORE;<U0650> #<kasra_no> +<UFE7A> IGNORE;IGNORE;IGNORE;<UFE7A> #<kasra_is> +<UFE7B> IGNORE;IGNORE;IGNORE;<UFE7B> #<kasra_me> +<U0651> IGNORE;IGNORE;IGNORE;<U0651> #<shadda_no> +<UFE7C> IGNORE;IGNORE;IGNORE;<UFE7C> #<shadda_is> +<UFE7D> IGNORE;IGNORE;IGNORE;<UFE7D> #<shadda_me> +<U0652> IGNORE;IGNORE;IGNORE;<U0652> #<sukun_no> +<UFE7E> IGNORE;IGNORE;IGNORE;<UFE7E> #<sukun_is> +<UFE7F> IGNORE;IGNORE;IGNORE;<UFE7F> #<sukun_me> # # <HEBREU> # +<U05B0> IGNORE;IGNORE;IGNORE;<U05B0> #point_sheva +<U05B1> IGNORE;IGNORE;IGNORE;<U05B1> #point_hataf_segol +<U05B2> IGNORE;IGNORE;IGNORE;<U05B2> #point_hataf_patah +<U05B3> IGNORE;IGNORE;IGNORE;<U05B3> #point_hataf_qamats +<U05B4> IGNORE;IGNORE;IGNORE;<U05B4> #point_hiriq +<U05B5> IGNORE;IGNORE;IGNORE;<U05B5> #point_tsere +<U05B6> IGNORE;IGNORE;IGNORE;<U05B6> #point_segol +<U05B7> IGNORE;IGNORE;IGNORE;<U05B7> #point_patah +<U05B8> IGNORE;IGNORE;IGNORE;<U05B8> #point_qamats +<U05B9> IGNORE;IGNORE;IGNORE;<U05B9> #point_holam +<U05BB> IGNORE;IGNORE;IGNORE;<U05BB> #point_qubuts +<U05BC> IGNORE;IGNORE;IGNORE;<U05BC> #point_dagesh +<U05BD> IGNORE;IGNORE;IGNORE;<U05BD> #point_meteg +<U05BE> IGNORE;IGNORE;IGNORE;<U05BE> #maqaf +<U05BF> IGNORE;IGNORE;IGNORE;<U05BF> #point_rafe +<U05C0> IGNORE;IGNORE;IGNORE;<U05C0> #paseq +<U05C1> IGNORE;IGNORE;IGNORE;<U05C1> #point_shin_dot +<U05C2> IGNORE;IGNORE;IGNORE;<U05C2> #point_sin_dot +<U05C3> IGNORE;IGNORE;IGNORE;<U05C3> #sof pasuq + +order_start <LATIN>;forward;backward;forward;forward,position +# +<U00A0> U0020;<BAS>;<MIN>;IGNORE # 170<NBSP> +# +<U0030> <0>;<BAS>;<MIN>;IGNORE # 171 0 +<U0031> <1>;<BAS>;<MIN>;IGNORE # 172 1 +<U0032> <2>;<BAS>;<MIN>;IGNORE # 173 2 +<U0033> <3>;<BAS>;<MIN>;IGNORE # 174 3 +<U0034> <4>;<BAS>;<MIN>;IGNORE # 175 4 +<U0035> <5>;<BAS>;<MIN>;IGNORE # 176 5 +<U0036> <6>;<BAS>;<MIN>;IGNORE # 177 6 +<U0037> <7>;<BAS>;<MIN>;IGNORE # 178 7 +<U0038> <8>;<BAS>;<MIN>;IGNORE # 179 8 +<U0039> <9>;<BAS>;<MIN>;IGNORE # 180 9 +# +<U215B> <0>;<GRA>;<MIN>;IGNORE # 181 <18> +<U00BC> <0>;<BRE>;<MIN>;IGNORE # 182 ¼ +<U215C> <0>;<CIR>;<MIN>;IGNORE # 183 <38> +<U215D> <0>;<RNE>;<MIN>;IGNORE # 184 <58> +<U215E> <0>;<DAC>;<MIN>;IGNORE # 185 <78> +<U00BD> <0>;<CAR>;<MIN>;IGNORE # 186 ½ +<U00BE> <0>;<REU>;<MIN>;IGNORE # 187 ¾ +<U2070> <0>;<BAS>;<EMI>;IGNORE # 188 <0S> +<U00B9> <1>;<BAS>;<EMI>;IGNORE # 189 ¹ +<U00B2> <2>;<BAS>;<EMI>;IGNORE # 190 ² +<U00B3> <3>;<BAS>;<EMI>;IGNORE # 191 ³ +<U2074> <4>;<BAS>;<EMI>;IGNORE # 192 <4S> +<U2075> <5>;<BAS>;<EMI>;IGNORE # 193 <5S> +<U2076> <6>;<BAS>;<EMI>;IGNORE # 194 <6S> +<U2077> <7>;<BAS>;<EMI>;IGNORE # 195 <7S> +<U2078> <8>;<BAS>;<EMI>;IGNORE # 196 <8S> +<U2079> <9>;<BAS>;<EMI>;IGNORE # 197 <9S> +# +<U0061> <a>;<BAS>;<MIN>;IGNORE # 198 a +<U00AA> <a>;<PCL>;<EMI>;IGNORE # 199 ª +<U00E1> <a>;<ACA>;<MIN>;IGNORE # 200 á +<U00E0> <a>;<GRA>;<MIN>;IGNORE # 201 à +<U00E2> <a>;<CIR>;<MIN>;IGNORE # 202 â +<U00E3> <a>;<TIL>;<MIN>;IGNORE # 203 ã +<U00E4> <a>;<REU>;<MIN>;IGNORE # 204 ä +<U00E5> <a>;<RNE>;<MIN>;IGNORE # 205 å +<U0103> <a>;<BRE>;<MIN>;IGNORE # 206 <a(> +<U0105> <a>;<OGO>;<MIN>;IGNORE # 207 <a;> +<U0101> <a>;<MAC>;<MIN>;IGNORE # 208 <a-> +<U00E6> <a><e>;<LIG><LIG>;<MIN><MIN>;IGNORE # 209 æ +<U0062> <b>;<BAS>;<MIN>;IGNORE # 210 b +<U0063> <c>;<BAS>;<MIN>;IGNORE # 211 c +<U00E7> <c>;<CDI>;<MIN>;IGNORE # 212 ç +<U0107> <c>;<ACA>;<MIN>;IGNORE # 213 <c'> +<U0109> <c>;<CIR>;<MIN>;IGNORE # 214 <c/>> +<U010D> <c>;<CAR>;<MIN>;IGNORE # 215 <c<> +<U010B> <c>;<PCT>;<MIN>;IGNORE # 216 <c.> +<U0064> <d>;<BAS>;<MIN>;IGNORE # 217 d +<U00F0> <d>;<PCL>;<MIN>;IGNORE # 218 ð +<U010F> <d>;<CAR>;<MIN>;IGNORE # 219 <d<> +<U0111> <d>;<OBL>;<MIN>;IGNORE # 220 <d//> +<U0065> <e>;<BAS>;<MIN>;IGNORE # 221 e +<U00E9> <e>;<ACA>;<MIN>;IGNORE # 222 é +<U00E8> <e>;<GRA>;<MIN>;IGNORE # 223 è +<U00EA> <e>;<CIR>;<MIN>;IGNORE # 224 ê +<U00EB> <e>;<REU>;<MIN>;IGNORE # 225 ë +<U011B> <e>;<CAR>;<MIN>;IGNORE # 226 <e<> +<U0117> <e>;<PCT>;<MIN>;IGNORE # 227 <e.> +<U0119> <e>;<OGO>;<MIN>;IGNORE # 228 <e;> +<U0113> <e>;<MAC>;<MIN>;IGNORE # 229 <e-> +<U0066> <f>;<BAS>;<MIN>;IGNORE # 230 f +<U0067> <g>;<BAS>;<MIN>;IGNORE # 231 g +<U011F> <g>;<BRE>;<MIN>;IGNORE # 232 <g(> +<U011D> <g>;<CIR>;<MIN>;IGNORE # 233 <g/>> +<U0121> <g>;<PCT>;<MIN>;IGNORE # 234 <g.> +<U0123> <g>;<CDI>;<MIN>;IGNORE # 235 <g,> +<U0068> <h>;<BAS>;<MIN>;IGNORE # 236 h +<U0125> <h>;<CIR>;<MIN>;IGNORE # 237 <h/>> +<U0127> <h>;<OBL>;<MIN>;IGNORE # 238 <h//> +<U0069> <i>;<BAS>;<MIN>;IGNORE # 239 i +<U00ED> <i>;<ACA>;<MIN>;IGNORE # 240 í +<U00EC> <i>;<GRA>;<MIN>;IGNORE # 241 ì +<U00EE> <i>;<CIR>;<MIN>;IGNORE # 242 î +<U00EF> <i>;<REU>;<MIN>;IGNORE # 243 ï +<U0131> <i>;<PCL>;<MIN>;IGNORE # 244 <i.> +<U0129> <i>;<TIL>;<MIN>;IGNORE # 245 <i?> +<U012F> <i>;<OGO>;<MIN>;IGNORE # 246 <i;> +<U012B> <i>;<MAC>;<MIN>;IGNORE # 247 <i-> +<U0133> <i><j>;<LIG><LIG>;<MIN><MIN>;IGNORE # 248 <ij> +<U006A> <j>;<BAS>;<MIN>;IGNORE # 249 j +<U0135> <j>;<CIR>;<MIN>;IGNORE # 250 <j/>> +<U006B> <k>;<BAS>;<MIN>;IGNORE # 251 k +<U0138> <k>;<PCL>;<MIN>;IGNORE # 252 <kk> +<U0137> <k>;<CDI>;<MIN>;IGNORE # 253 <k,> +<U006C> <l>;<BAS>;<MIN>;IGNORE # 254 l +<U013A> <l>;<ACA>;<MIN>;IGNORE # 255 <l'> +<U013E> <l>;<CAR>;<MIN>;IGNORE # 256 <l<> +<U0142> <l>;<OBL>;<MIN>;IGNORE # 257 <l//> +<U013C> <l>;<CDI>;<MIN>;IGNORE # 258 <l,> +<U0140> <l>;<PCT>;<MIN>;IGNORE # 259 <l.> +<U006D> <m>;<BAS>;<MIN>;IGNORE # 260 m +<U006E> <n>;<BAS>;<MIN>;IGNORE # 261 n +<U00F1> <n>;<TIL>;<MIN>;IGNORE # 262 ñ +<U0149> <n>;<PCL>;<MIN>;IGNORE # 263 <'n> +<U0144> <n>;<ACA>;<MIN>;IGNORE # 264 <n'> +<U0148> <n>;<CAR>;<MIN>;IGNORE # 265 <n<> +<U0146> <n>;<CDI>;<MIN>;IGNORE # 266 <n,> +<U014B> <n><g>;<LIG><LIG>;<MIN><MIN>;IGNORE # 267 <ng> +<U006F> <o>;<BAS>;<MIN>;IGNORE # 268 o +<U00BA> <o>;<PCL>;<EMI>;IGNORE # 269 º +<U00F3> <o>;<ACA>;<MIN>;IGNORE # 270 ó +<U00F2> <o>;<GRA>;<MIN>;IGNORE # 271 ò +<U00F4> <o>;<CIR>;<MIN>;IGNORE # 272 ô +<U00F5> <o>;<TIL>;<MIN>;IGNORE # 273 õ +<U00F6> <o>;<REU>;<MIN>;IGNORE # 274 ö +<U00F8> <o>;<OBL>;<MIN>;IGNORE # 275 ø +<U0151> <o>;<DAC>;<MIN>;IGNORE # 276 <o"> +<U014D> <o>;<MAC>;<MIN>;IGNORE # 277 <o-> +<U0153> <o><e>;<LIG><LIG>;<MIN><MIN>;IGNORE # 278 <oe> +<U0070> <p>;<BAS>;<MIN>;IGNORE # 279 p +<U0071> <q>;<BAS>;<MIN>;IGNORE # 280 q +<U0072> <r>;<BAS>;<MIN>;IGNORE # 281 r +<U0155> <r>;<ACA>;<MIN>;IGNORE # 282 <r'> +<U0159> <r>;<CAR>;<MIN>;IGNORE # 283 <r<> +<U0157> <r>;<CDI>;<MIN>;IGNORE # 284 <r,> +<U0073> <s>;<BAS>;<MIN>;IGNORE # 285 s +<U015B> <s>;<ACA>;<MIN>;IGNORE # 286 <s'> +<U015D> <s>;<CIR>;<MIN>;IGNORE # 287 <s/>> +<U0161> <s>;<CAR>;<MIN>;IGNORE # 288 <s<> +<U015F> <s>;<CDI>;<MIN>;IGNORE # 289 <s,> +<U00DF> <s><s>;<LIG><LIG>;<MIN><MIN>;IGNORE # 290 ß +<U0074> <t>;<BAS>;<MIN>;IGNORE # 291 t +<U0165> <t>;<CAR>;<MIN>;IGNORE # 292 <t<> +<U0167> <t>;<OBL>;<MIN>;IGNORE # 293 <t//> +<U0163> <t>;<CDI>;<MIN>;IGNORE # 294 <t,> +<U0075> <u>;<BAS>;<MIN>;IGNORE # 296 u +<U00FA> <u>;<ACA>;<MIN>;IGNORE # 297 ú +<U00F9> <u>;<GRA>;<MIN>;IGNORE # 298 ù +<U00FB> <u>;<CIR>;<MIN>;IGNORE # 299 û +<U00FC> <u>;<REU>;<MIN>;IGNORE # 300 ü +<U016D> <u>;<BRE>;<MIN>;IGNORE # 301 <u(> +<U016F> <u>;<RNE>;<MIN>;IGNORE # 302 <u0> +<U0171> <u>;<DAC>;<MIN>;IGNORE # 303 <u"> +<U0169> <u>;<TIL>;<MIN>;IGNORE # 304 <u?> +<U0173> <u>;<OGO>;<MIN>;IGNORE # 305 <u;> +<U016B> <u>;<MAC>;<MIN>;IGNORE # 306 <u-> +<U0076> <v>;<BAS>;<MIN>;IGNORE # 307 v +<U0077> <w>;<BAS>;<MIN>;IGNORE # 308 w +<U0175> <w>;<CIR>;<MIN>;IGNORE # 309 <w/>> +<U0078> <x>;<BAS>;<MIN>;IGNORE # 310 x +<U0079> <y>;<BAS>;<MIN>;IGNORE # 311 y +<U00FD> <y>;<ACA>;<MIN>;IGNORE # 312 ý +<U00FF> <y>;<REU>;<MIN>;IGNORE # 313 _ +<U0177> <y>;<CIR>;<MIN>;IGNORE # 314 <y/>> +<U007A> <z>;<BAS>;<MIN>;IGNORE # 315 z +<U017A> <z>;<ACA>;<MIN>;IGNORE # 316 <z'> +<U017E> <z>;<CAR>;<MIN>;IGNORE # 317 <z<> +<U017C> <z>;<PCT>;<MIN>;IGNORE # 318 <z.> +<U00FE> <th>;<BAS>;<MIN>;IGNORE # 318b Þ # +<U0041> <a>;<BAS>;<CAP>;IGNORE # 319 A +<U00C1> <a>;<ACA>;<CAP>;IGNORE # 320 Á +<U00C0> <a>;<GRA>;<CAP>;IGNORE # 321 À +<U00C2> <a>;<CIR>;<CAP>;IGNORE # 322 Â +<U00C3> <a>;<TIL>;<CAP>;IGNORE # 323 Ã +<U00C4> <a>;<REU>;<CAP>;IGNORE # 324 Ä +<U00C5> <a>;<RNE>;<CAP>;IGNORE # 325 Å +<U0102> <a>;<BRE>;<CAP>;IGNORE # 326 <A(> +<U0104> <a>;<OGO>;<CAP>;IGNORE # 327 <A;> +<U0100> <a>;<MAC>;<CAP>;IGNORE # 328 <A-> +<U00C6> <a><e>;<LIG><LIG>;<CAP><CAP>;IGNORE # 329 Æ +<U0042> <b>;<BAS>;<CAP>;IGNORE # 330 B +<U0043> <c>;<BAS>;<CAP>;IGNORE # 331 C +<U00C7> <c>;<CDI>;<CAP>;IGNORE # 332 Ç +<U0106> <c>;<ACA>;<CAP>;IGNORE # 333 <C'> +<U0108> <c>;<CIR>;<CAP>;IGNORE # 334 <C/>> +<U010C> <c>;<CAR>;<CAP>;IGNORE # 335 <C>> +<U010A> <c>;<PCT>;<CAP>;IGNORE # 336 <C.> +<U0044> <d>;<BAS>;<CAP>;IGNORE # 337 D +<U00D0> <d>;<PCL>;<CAP>;IGNORE # 338 Ð +<U010E> <d>;<CAR>;<CAP>;IGNORE # 339 <D<> +<U0110> <d>;<OBL>;<CAP>;IGNORE # 340 <D//> +<U0045> <e>;<BAS>;<CAP>;IGNORE # 341 E +<U00C9> <e>;<ACA>;<CAP>;IGNORE # 342 É +<U00C8> <e>;<GRA>;<CAP>;IGNORE # 343 È +<U00CA> <e>;<CIR>;<CAP>;IGNORE # 344 Ê +<U00CB> <e>;<REU>;<CAP>;IGNORE # 345 Ë +<U011A> <e>;<CAR>;<CAP>;IGNORE # 346 <E<> +<U0116> <e>;<PCT>;<CAP>;IGNORE # 347 <E.> +<U0118> <e>;<OGO>;<CAP>;IGNORE # 348 <E;> +<U0112> <e>;<MAC>;<CAP>;IGNORE # 349 <E-> +<U0046> <f>;<BAS>;<CAP>;IGNORE # 350 F +<U0047> <g>;<BAS>;<CAP>;IGNORE # 351 G +<U011E> <g>;<BRE>;<CAP>;IGNORE # 352 <G(> +<U011C> <g>;<CIR>;<CAP>;IGNORE # 353 <G/>> +<U0120> <g>;<PCT>;<CAP>;IGNORE # 354 <G.> +<U0122> <g>;<CDI>;<CAP>;IGNORE # 355 <G,> +<U0048> <h>;<BAS>;<CAP>;IGNORE # 356 H +<U0124> <h>;<CIR>;<CAP>;IGNORE # 357 <H/>> +<U0126> <h>;<OBL>;<CAP>;IGNORE # 358 <H//> +<U0049> <i>;<BAS>;<CAP>;IGNORE # 359 I +<U00CD> <i>;<ACA>;<CAP>;IGNORE # 360 Í +<U00CC> <i>;<GRA>;<CAP>;IGNORE # 361 Ì +<U00CE> <i>;<CIR>;<CAP>;IGNORE # 362 Î +<U00CF> <i>;<REU>;<CAP>;IGNORE # 363 Ï +<U0130> <i>;<PCL>;<CAP>;IGNORE # 364 <I.> +<U0128> <i>;<TIL>;<CAP>;IGNORE # 365 <I?> +<U012E> <i>;<OGO>;<CAP>;IGNORE # 366 <I;> +<U012A> <i>;<MAC>;<CAP>;IGNORE # 367 <I-> +<U0132> <i><j>;<LIG><LIG>;<CAP><CAP>;IGNORE # 368 <IJ> +<U004A> <j>;<BAS>;<CAP>;IGNORE # 369 J +<U0134> <j>;<CIR>;<CAP>;IGNORE # 370 <J/>> +<U004B> <k>;<BAS>;<CAP>;IGNORE # 371 K +<U0136> <k>;<CDI>;<CAP>;IGNORE # 372 <K,> +<U004C> <l>;<BAS>;<CAP>;IGNORE # 373 L +<U0139> <l>;<ACA>;<CAP>;IGNORE # 374 <L'> +<U013D> <l>;<CAR>;<CAP>;IGNORE # 375 <L<> +<U0141> <l>;<OBL>;<CAP>;IGNORE # 376 <L//> +<U013B> <l>;<CDI>;<CAP>;IGNORE # 377 <L,> +<U013F> <l>;<PCT>;<CAP>;IGNORE # 378 <L.> +<U004D> <m>;<BAS>;<CAP>;IGNORE # 379 M +<U004E> <n>;<BAS>;<CAP>;IGNORE # 380 N +<U00D1> <n>;<TIL>;<CAP>;IGNORE # 381 Ñ +<U0143> <n>;<ACA>;<CAP>;IGNORE # 382 <N'> +<U0147> <n>;<CAR>;<CAP>;IGNORE # 383 <N<> +<U0145> <n>;<CDI>;<CAP>;IGNORE # 384 <N,> +<U014A> <n><g>;<LIG><LIG>;<CAP><CAP>;IGNORE # 385 <NG> +<U004F> <o>;<BAS>;<CAP>;IGNORE # 386 O +<U00D3> <o>;<ACA>;<CAP>;IGNORE # 387 Ó +<U00D2> <o>;<GRA>;<CAP>;IGNORE # 388 Ò +<U00D4> <o>;<CIR>;<CAP>;IGNORE # 389 Ô +<U00D5> <o>;<TIL>;<CAP>;IGNORE # 390 Õ +<U00D6> <o>;<REU>;<CAP>;IGNORE # 391 Ö +<U00D8> <o>;<OBL>;<CAP>;IGNORE # 392 Ø +<U0150> <o>;<DAC>;<CAP>;IGNORE # 393 <O"> +<U014C> <o>;<MAC>;<CAP>;IGNORE # 394 <O-> +<U0152> <o><e>;<LIG><LIG>;<CAP><CAP>;IGNORE # 395 <OE> +<U0050> <p>;<BAS>;<CAP>;IGNORE # 396 P +<U0051> <q>;<BAS>;<CAP>;IGNORE # 397 Q +<U0052> <r>;<BAS>;<CAP>;IGNORE # 398 R +<U0154> <r>;<ACA>;<CAP>;IGNORE # 399 <R'> +<U0158> <r>;<CAR>;<CAP>;IGNORE # 400 <R<> +<U0156> <r>;<CDI>;<CAP>;IGNORE # 401 <R,> +<U0053> <s>;<BAS>;<CAP>;IGNORE # 402 S +<U015A> <s>;<ACA>;<CAP>;IGNORE # 403 <S'> +<U015C> <s>;<CIR>;<CAP>;IGNORE # 404 <S/>> +<U0160> <s>;<CAR>;<CAP>;IGNORE # 405 <S<> +<U015E> <s>;<CDI>;<CAP>;IGNORE # 406 <S,> +<U0054> <t>;<BAS>;<CAP>;IGNORE # 407 T +<U0164> <t>;<CAR>;<CAP>;IGNORE # 408 <T<> +<U0166> <t>;<OBL>;<CAP>;IGNORE # 409 <T//> +<U0162> <t>;<CDI>;<CAP>;IGNORE # 410 <T,> +<U0055> <u>;<BAS>;<CAP>;IGNORE # 412 U +<U00DA> <u>;<ACA>;<CAP>;IGNORE # 413 Ú +<U00D9> <u>;<GRA>;<CAP>;IGNORE # 414 Ù +<U00DB> <u>;<CIR>;<CAP>;IGNORE # 415 Û +<U00DC> <u>;<REU>;<CAP>;IGNORE # 416 Ü +<U016C> <u>;<BRE>;<CAP>;IGNORE # 417 <U(> +<U016E> <u>;<RNE>;<CAP>;IGNORE # 418 <U0> +<U0170> <u>;<DAC>;<CAP>;IGNORE # 419 <U"> +<U0168> <u>;<TIL>;<CAP>;IGNORE # 420 <U?> +<U0172> <u>;<OGO>;<CAP>;IGNORE # 421 <U;> +<U016A> <u>;<MAC>;<CAP>;IGNORE # 422 <U-> +<U0056> <v>;<BAS>;<CAP>;IGNORE # 423 V +<U0057> <w>;<BAS>;<CAP>;IGNORE # 424 W +<U0174> <w>;<CIR>;<CAP>;IGNORE # 425 <W/>> +<U0058> <x>;<BAS>;<CAP>;IGNORE # 426 X +<U0059> <y>;<BAS>;<CAP>;IGNORE # 427 Y +<U00DD> <y>;<ACA>;<CAP>;IGNORE # 428 Ý +<U0176> <y>;<CIR>;<CAP>;IGNORE # 429 <Y/>> +<U0178> <y>;<REU>;<CAP>;IGNORE # 430 <Y:> +<U005A> <z>;<BAS>;<CAP>;IGNORE # 431 Z +<U0179> <z>;<ACA>;<CAP>;IGNORE # 432 <Z'> +<U017D> <z>;<CAR>;<CAP>;IGNORE # 433 <Z<> +<U017B> <z>;<PCT>;<CAP>;IGNORE # 434 <Z.> +<U00DE> <th>;<BAS>;<CAP>;IGNORE # 411 þ + +order_start <ARABINT>;forward;forward;forward;forward,position +<U0660> <0>;<BAS>;<MIN>;IGNORE +<U06F0> <0>;<PCL>;<MIN>;IGNORE +<U0661> <1>;<BAS>;<MIN>;IGNORE +<U06F1> <1>;<PCL>;<MIN>;IGNORE +<U0662> <2>;<BAS>;<MIN>;IGNORE +<U06F2> <2>;<PCL>;<MIN>;IGNORE +<U0663> <3>;<BAS>;<MIN>;IGNORE +<U06F3> <3>;<PCL>;<MIN>;IGNORE +<U0664> <4>;<BAS>;<MIN>;IGNORE +<U06F4> <4>;<PCL>;<MIN>;IGNORE +<U0665> <5>;<BAS>;<MIN>;IGNORE +<U06F5> <5>;<PCL>;<MIN>;IGNORE +<U0666> <6>;<BAS>;<MIN>;IGNORE +<U06F6> <6>;<PCL>;<MIN>;IGNORE +<U0667> <7>;<BAS>;<MIN>;IGNORE +<U06F7> <7>;<PCL>;<MIN>;IGNORE +<U0668> <8>;<BAS>;<MIN>;IGNORE +<U06F8> <8>;<PCL>;<MIN>;IGNORE +<U0669> <9>;<BAS>;<MIN>;IGNORE +<U06F9> <9>;<PCL>;<MIN>;IGNORE +<U0621> <hamza>;<BAS>;<MIN>;IGNORE +<U0622> <alef>;<AMA>;<MIN>;IGNORE +<U0623> <alef>;<AHA>;<MIN>;IGNORE +<U0625> <alef>;<AHS>;<MIN>;IGNORE +<U0627> <alef>;<BAS>;<MIN>;IGNORE +<U0628> <beh>;<BAS>;<MIN>;IGNORE +<U067E> <peh>;<BAS>;<MIN>;IGNORE +<U0629> <teh_marbuta>;<BAS>;<MIN>;IGNORE +<U062A> <teh>;<BAS>;<MIN>;IGNORE +<U0679> <tteh>;<BAS>;<MIN>;IGNORE +<U062B> <theh>;<BAS>;<MIN>;IGNORE +<U062C> <jeem>;<BAS>;<MIN>;IGNORE +<U0686> <tcheh>;<BAS>;<MIN>;IGNORE +<U062D> <hah>;<BAS>;<MIN>;IGNORE +<U062E> <khah>;<BAS>;<MIN>;IGNORE +<U062F> <dal>;<BAS>;<MIN>;IGNORE +<U0688> <ddal>;<BAS>;<MIN>;IGNORE +<U0630> <thal>;<BAS>;<MIN>;IGNORE +<U0631> <reh>;<BAS>;<MIN>;IGNORE +<U0691> <rreh>;<BAS>;<MIN>;IGNORE +<U0632> <zain>;<BAS>;<MIN>;IGNORE +<U0698> <jeh>;<BAS>;<MIN>;IGNORE +<U0633> <seen>;<BAS>;<MIN>;IGNORE +<U0634> <sheen>;<BAS>;<MIN>;IGNORE +<U0635> <sad>;<BAS>;<MIN>;IGNORE +<U0636> <dad>;<BAS>;<MIN>;IGNORE +<U0637> <tah>;<BAS>;<MIN>;IGNORE +<U0638> <zah>;<BAS>;<MIN>;IGNORE +<U0639> <ain>;<BAS>;<MIN>;IGNORE +<U063A> <ghain>;<BAS>;<MIN>;IGNORE +<U0641> <feh>;<BAS>;<MIN>;IGNORE +<U0642> <qaf>;<BAS>;<MIN>;IGNORE +<U0643> <kaf>;<BAS>;<MIN>;IGNORE +<U06A9> <keheh>;<BAS>;<MIN>;IGNORE +<U06AF> <gaf>;<BAS>;<MIN>;IGNORE +<U0644> <lam>;<BAS>;<MIN>;IGNORE +<U0645> <meem>;<BAS>;<MIN>;IGNORE +<U0646> <noon>>;<BAS>;<MIN>;IGNORE +<U06BA> <noon_ghunna>;<BAS>;<MIN>;IGNORE +<U0647> <heh>;<BAS>;<MIN>;IGNORE +<U06C0> <heh_yeh>;<BAS>;<MIN>;IGNORE +<U0624> <waw>;<AHW>;<MIN>;IGNORE +<U0648> <waw>;<BAS>;<MIN>;IGNORE +<U0649> <alef_maksura>;<BAS>;<MIN>;IGNORE +<U0626> <alef_maksura><hamza>;<BAS><BAS>;<MIN><MIN>;IGNORE +<U064A> <alef_maksura>;<AYE>;<MIN>;IGNORE +<U06D3> <yeh_barree>;<YBA>;<MIN>;IGNORE +<U06D2> <yeh_barree>;<BAS>;<MIN>;IGNORE + +order_start <ARABFOR>;backward;backward;backward;forward,position +<UFE80> <hamza>;<BAS>;<AIS>;IGNORE +<UFE81> <alef>;<AMA>;<AIS>;IGNORE +<UFE82> <alef>;<AMA>;<AFI>;IGNORE +<UFE83> <alef>;<AHA>;<AIS>;IGNORE +<UFE84> <alef>;<AHA>;<AFI>;IGNORE +<UFE87> <alef>;<AHS>;<AIS>;IGNORE +<UFE88> <alef>;<AHS>;<AFI>;IGNORE +<UFE8D> <alef>;<BAS>;<AIS>;IGNORE +<UFE8E> <alef>;<BAS>;<AFI>;IGNORE +<UFE8F> <beh>;<BAS>;<AIS>;IGNORE +<UFE90> <beh>;<BAS>;<AFI>;IGNORE +<UFE91> <beh>;<BAS>;<AII>;IGNORE +<UFE92> <beh>;<BAS>;<AME>;IGNORE +<UFB56> <peh>;<BAS>;<AIS>;IGNORE +<UFB57> <peh>;<BAS>;<AFI>;IGNORE +<UFB58> <peh>;<BAS>;<AII>;IGNORE +<UFB59> <peh>;<BAS>;<AME>;IGNORE +<UFE93> <teh_marbuta>;<BAS>;<AIS>;IGNORE +<UFE94> <teh_marbuta>;<BAS>;<AFI>;IGNORE +<UFE95> <teh>;<BAS>;<AIS>;IGNORE +<UFE96> <teh>;<BAS>;<AFI>;IGNORE +<UFE97> <teh>;<BAS>;<AII>;IGNORE +<UFE98> <teh>;<BAS>;<AME>;IGNORE +<UFB66> <tteh>;<BAS>;<AIS>;IGNORE +<UFB67> <tteh>;<BAS>;<AFI>;IGNORE +<UFB68> <tteh>;<BAS>;<AII>;IGNORE +<UFB69> <tteh>;<BAS>;<AME>;IGNORE +<UFE99> <theh>;<BAS>;<AIS>;IGNORE +<UFE9A> <theh>;<BAS>;<AFI>;IGNORE +<UFE9B> <theh>;<BAS>;<AII>;IGNORE +<UFE9C> <theh>;<BAS>;<AME>;IGNORE +<UFE9D> <jeem>;<BAS>;<AIS>;IGNORE +<UFE9E> <jeem>;<BAS>;<AFI>;IGNORE +<UFE9F> <jeem>;<BAS>;<AII>;IGNORE +<UFEA0> <jeem>;<BAS>;<AME>;IGNORE +<UFB7A> <tcheh>;<BAS>;<AIS>;IGNORE +<UFB7B> <tcheh>;<BAS>;<AFI>;IGNORE +<UFB7C> <tcheh>;<BAS>;<AII>;IGNORE +<UFB7D> <tcheh>;<BAS>;<AME>;IGNORE +<UFEA1> <hah>;<BAS>;<AIS>;IGNORE +<UFEA2> <hah>;<BAS>;<AFI>;IGNORE +<UFEA3> <hah>;<BAS>;<AII>;IGNORE +<UFEA4> <hah>;<BAS>;<AME>;IGNORE +<UFEA5> <khah>;<BAS>;<AIS>;IGNORE +<UFEA6> <khah>;<BAS>;<AFI>;IGNORE +<UFEA7> <khah>;<BAS>;<AII>;IGNORE +<UFEA8> <khah>;<BAS>;<AME>;IGNORE +<UFEA9> <dal>;<BAS>;<AIS>;IGNORE +<UFEAA> <dal>;<BAS>;<AFI>;IGNORE +<UFB88> <ddal>;<BAS>;<AIS>;IGNORE +<UFB89> <ddal>;<BAS>;<AFI>;IGNORE +<UFEAB> <thal>;<BAS>;<AIS>;IGNORE +<UFEAC> <thal>;<BAS>;<AFI>;IGNORE +<UFEAD> <reh>;<BAS>;<AIS>;IGNORE +<UFEAE> <reh>;<BAS>;<AFI>;IGNORE +<UFB8C> <rreh>;<BAS>;<AIS>;IGNORE +<UFB8D> <rreh>;<BAS>;<AFI>;IGNORE +<UFEAF> <zain>;<BAS>;<AIS>;IGNORE +<UFEB0> <zain>;<BAS>;<AFI>;IGNORE +<UFB8A> <jeh>;<BAS>;<AIS>;IGNORE +<UFB8B> <jeh>;<BAS>;<AFI>;IGNORE +<UFEB1> <seen>;<BAS>;<AIS>;IGNORE +<UFEB2> <seen>;<BAS>;<AFI>;IGNORE +<UFEB3> <seen>;<BAS>;<AII>;IGNORE +<UFEB4> <seen>;<BAS>;<AME>;IGNORE +<UFEB5> <sheen>;<BAS>;<AIS>;IGNORE +<UFEB6> <sheen>;<BAS>;<AFI>;IGNORE +<UFEB7> <sheen>;<BAS>;<AII>;IGNORE +<UFEB8> <sheen>;<BAS>;<AME>;IGNORE +<UFEB9> <sad>;<BAS>;<AIS>;IGNORE +<UFEBA> <sad>;<BAS>;<AFI>;IGNORE +<UFEBB> <sad>;<BAS>;<AII>;IGNORE +<UFEBC> <sad>;<BAS>;<AME>;IGNORE +<UFEBD> <dad>;<BAS>;<AIS>;IGNORE +<UFEBE> <dad>;<BAS>;<AFI>;IGNORE +<UFEBF> <dad>;<BAS>;<AII>;IGNORE +<UFEC0> <dad>;<BAS>;<AME>;IGNORE +<UFEC1> <tah>;<BAS>;<AIS>;IGNORE +<UFEC2> <tah>;<BAS>;<AFI>;IGNORE +<UFEC3> <tah>;<BAS>;<AII>;IGNORE +<UFEC4> <tah>;<BAS>;<AME>;IGNORE +<UFEC5> <zah>;<BAS>;<AIS>;IGNORE +<UFEC6> <zah>;<BAS>;<AFI>;IGNORE +<UFEC7> <zah>;<BAS>;<AII>;IGNORE +<UFEC8> <zah>;<BAS>;<AME>;IGNORE +<UFEC9> <ain>;<BAS>;<AIS>;IGNORE +<UFECA> <ain>;<BAS>;<AFI>;IGNORE +<UFECB> <ain>;<BAS>;<AII>;IGNORE +<UFECC> <ain>;<BAS>;<AME>;IGNORE +<UFECD> <ghain>;<BAS>;<AIS>;IGNORE +<UFECE> <ghain>;<BAS>;<AFI>;IGNORE +<UFECF> <ghain>;<BAS>;<AII>;IGNORE +<UFED0> <ghain>;<BAS>;<AME>;IGNORE +<UFED1> <feh>;<BAS>;<AIS>;IGNORE +<UFED2> <feh>;<BAS>;<AFI>;IGNORE +<UFED3> <feh>;<BAS>;<AII>;IGNORE +<UFED4> <feh>;<BAS>;<AME>;IGNORE +<UFED5> <qaf>;<BAS>;<AIS>;IGNORE +<UFED6> <qaf>;<BAS>;<AFI>;IGNORE +<UFED7> <qaf>;<BAS>;<AII>;IGNORE +<UFED8> <qaf>;<BAS>;<AME>;IGNORE +<UFED9> <kaf>;<BAS>;<AIS>;IGNORE +<UFEDA> <kaf>;<BAS>;<AFI>;IGNORE +<UFEDB> <kaf>;<BAS>;<AII>;IGNORE +<UFEDC> <kaf>;<BAS>;<AME>;IGNORE +<UFB8E> <keheh>;<BAS>;<AIS>;IGNORE +<UFB8F> <keheh>;<BAS>;<AFI>;IGNORE +<UFB90> <keheh>;<BAS>;<AII>;IGNORE +<UFB91> <keheh>;<BAS>;<AME>;IGNORE +<UFB92> <gaf>;<BAS>;<AIS>;IGNORE +<UFB93> <gaf>;<BAS>;<AFI>;IGNORE +<UFB94> <gaf>;<BAS>;<AII>;IGNORE +<UFB95> <gaf>;<BAS>;<AME>;IGNORE +<UFEDD> <lam>;<BAS>;<AIS>;IGNORE +<UFEDE> <lam>;<BAS>;<AFI>;IGNORE +<UFEDF> <lam>;<BAS>;<AII>;IGNORE +<UFEE0> <lam>;<BAS>;<AME>;IGNORE +<UFEE1> <meem>;<BAS>;<AIS>;IGNORE +<UFEE2> <meem>;<BAS>;<AFI>;IGNORE +<UFEE3> <meem>;<BAS>;<AII>;IGNORE +<UFEE4> <meem>;<BAS>;<AME>;IGNORE +<UFEE5> <noon>;<BAS>;<AIS>;IGNORE +<UFEE6> <noon>;<BAS>;<AFI>;IGNORE +<UFEE7> <noon>;<BAS>;<AII>;IGNORE +<UFEE8> <noon>;<BAS>;<AME>;IGNORE +<UFB9E> <noon_ghunna>;<BAS>;<AIS>;IGNORE +<UFB9F> <noon_ghunna>;<BAS>;<AFI>;IGNORE +<UFEE9> <heh>;<BAS>;<AIS>;IGNORE <UFEEA> <heh>;<BAS>;<AFI>;IGNORE +<UFEEB> <heh>;<BAS>;<AII>;IGNORE <UFEEC> <heh>;<BAS>;<AME>;IGNORE +<UFBA4> <heh_yeh>;<BAS>;<AIS>;IGNORE <UFBA5> <heh_yeh>;<BAS>;<AFI>;IGNORE +<UFE85> <waw>;<AHW>;<AIS>;IGNORE <UFE86> <waw>;<AHW>;<AFI>;IGNORE +<UFEED> <waw>;<BAS>;<AIS>;IGNORE <UFEEE> <waw>;<BAS>;<AFI>;IGNORE +<UFEEF> <alef_maksura>;<BAS>;<AIS>;IGNORE +<UFEF0> <alef_maksura>;<BAS>;<AFI>;IGNORE +<UFE89> <alef_maksura><hamza>;<BAS><BAS>;<AIS><AIS>;IGNORE +<UFE8A> <alef_maksura><hamza>;<BAS><BAS>;<AFI><AIS>;IGNORE +<UFE8B> <alef_maksura><hamza>;<BAS><BAS>;<AII><AIS>;IGNORE +<UFE8C> <alef_maksura><hamza>;<BAS><BAS>;<AME><AIS>;IGNORE +<UFEF1> <alef_maksura>;<AYE>;<AIS>;IGNORE +<UFEF2> <alef_maksura>;<AYE>;<AFI>;IGNORE +<UFEF3> <alef_maksura>;<AYE>;<AII>;IGNORE +<UFEF4> <alef_maksura>;<AYE>;<AME>;IGNORE +<UFBB0> <yeh_barree>;<YBA>;<AIS>;IGNORE +<UFBB1> <yeh_barree>;<YBA>;<AFI>;IGNORE +<UFBAE> <yeh_barree>;<BAS>;<AIS>;IGNORE +<UFBAF> <yeh_barree>;<BAS>;<AFI>;IGNORE +<UFEF5> <lam><alef>;<BAS><AMA>;<AIS><AFI>;IGNORE +<UFEF6> <lam><alef>;<BAS><AMA>;<AFI>;<AFI>;IGNORE +<UFEF7> <lam><alef>;<BAS><AHA>;<AIS>;<AFI>;IGNORE +<UFEF8> <lam><alef>;<BAS><AHA>;<AFI>;<AFI>;IGNORE +<UFEF9> <lam><alef>;<BAS><AHS>;<AIS>;<AFI>;IGNORE +<UFEFA> <lam><alef>;<BAS><AHS>;<AFI><AFI>;IGNORE +<UFEFB> <lam><alef>;<BAS><BAS>;<AIS><AFI>;IGNORE +<UFEFC> <lam><alef>;<BAS><BAS>;<AFI><AFI>;IGNORE + +order_start <HEBREU>;forward;forward;forward;forward,position +<U05D0> <alef>;<BAS>;IGNORE;IGNORE +<U05D1> <bet>;<BAS>;IGNORE;IGNORE +<U05D2> <gimel>;<BAS>;IGNORE;IGNORE +<U05D3> <dalet>;<BAS>;IGNORE;IGNORE +<U05D4> <he>;<BAS>;IGNORE;IGNORE +<U05D5> <vav>;<BAS>;IGNORE;IGNORE +<U05D6> <zayin>;<BAS>;IGNORE;IGNORE +<U05D7> <het>;<BAS>;IGNORE;IGNORE +<U05D8> <tet>;<BAS>;IGNORE;IGNORE +<U05D9> <yod>;<BAS>;IGNORE;IGNORE +<U05DA> <kaf_fin>;<BAS>;IGNORE;IGNORE +<U05DB> <kaf>;<BAS>;IGNORE;IGNORE +<U05DC> <lamed>;<BAS>;IGNORE;IGNORE +<U05DD> <mem_fin>;<BAS>;IGNORE;IGNORE +<U05DE> <mem>;<BAS>;IGNORE;IGNORE +<U05DF> <nun_fin>;<BAS>;IGNORE;IGNORE +<U05E0> <nun>;<BAS>;IGNORE;IGNORE +<U05E1> <samekh>;<BAS>;IGNORE;IGNORE +<U05E2> <ayin>;<BAS>;IGNORE;IGNORE +<U05E3> <pe_fin>;<BAS>;IGNORE;IGNORE +<U05E4> <pe>;<BAS>;IGNORE;IGNORE +<U05E5> <tsadi_fin>;<BAS>;IGNORE;IGNORE +<U05E6> <tsadi>;<BAS>;IGNORE;IGNORE +<U05E7> <qof>;<BAS>;IGNORE;IGNORE +<U05E8> <resh>;<BAS>;IGNORE;IGNORE +<U05E9> <shin>;<BAS>;IGNORE;IGNORE +<U05EA> <tav>;<BAS>;IGNORE;IGNORE + +order_start <GREC>;forward;backward;forward +<U0391> <ALPHA>;<BAS>;<CAP>;IGNORE +<U03B1> <ALPHA>;<BAS>;<AMI>;IGNORE +<U0386> <ALPHA>;<TNS>;<CAP>;IGNORE +<U03AC> <ALPHA>;<TNS>;<AMI>;IGNORE +<U0392> <BETA>;<BAS>;<CAP>;IGNORE +<U03B2> <BETA>;<BAS>;<AMI>;IGNORE +<U03D0> <BETA>;<PCL>;<AMI>;IGNORE +<U0393> <GAMMA>;<BAS>;<CAP>;IGNORE +<U03B3> <GAMMA>;<BAS>;<AMI>;IGNORE +<U03DC> <GAMMA>;<PCL>;<CAP>;IGNORE # digamma copte +<U0394> <DELTA>;<BAS>;<CAP>;IGNORE +<U03B4> <DELTA>;<BAS>;<AMI>;IGNORE +<U03EA> <DELTA>;<PCL>;<CAP>;IGNORE # GANGIA COPTE +<U03EB> <DELTA>;<BAS>;<AMI>;IGNORE # gangia copte +<U0395> <EPSILON>;<BAS>;<CAP>;IGNORE +<U03B5> <EPSILON>;<BAS>;<AMI>;IGNORE +<U0388> <EPSILON>;<TNS>;<CAP>;IGNORE +<U03AD> <EPSILON>;<TNS>;<AMI>;IGNORE +<U0396> <ZETA>;<BAS>;<CAP>;IGNORE +<U03B6> <ZETA>;<BAS>;<AMI>;IGNORE +<U03E8> <ZETA>;<PCL>;<CAP>;IGNORE # HORI COPTE +<U03E9> <ZETA>;<PCL>;<AMI>;IGNORE # hori copte +<U0397> <ETA>;<BAS>;<CAP>;IGNORE +<U03B7> <ETA>;<BAS>;<AMI>;IGNORE +<U0389> <ETA>;<TNS>;<CAP>;IGNORE +<U03AE> <ETA>;<TNS>;<AMI>;IGNORE +<U0398> <THETA>;<BAS>;<CAP>;IGNORE +<U03B8> <THETA>;<BAS>;<AMI>;IGNORE +<U03D1> <THETA>;<PCL>;<AMI>;IGNORE +<U0399> <IOTA>;<BAS>;<CAP>;IGNORE +<U03B9> <IOTA>;<BAS>;<AMI>;IGNORE +<U038A> <IOTA>;<TNS>;<CAP>;IGNORE +<U03AF> <IOTA>;<TNS>;<AMI>;IGNORE +<U03AA> <IOTA>;<DLT>;<CAP>;IGNORE +<U03CA> <IOTA>;<DLT>;<AMI>;IGNORE +<U0390> <IOTA>;<DTT>;<AMI>;IGNORE +<U03F3> <IOTA>;<OGO>;<AMI>;IGNORE # yot +<U039A> <KAPPA>;<BAS>;<CAP>;IGNORE +<U03BA> <KAPPA>;<BAS>;<AMI>;IGNORE +<U03DE> <KAPPA>;<PCL>;<CAP>;IGNORE # koppa copte +<U03F0> <KAPPA>;<PCL>;<AMI>;IGNORE +<U03E6> <KAPPA>;<LIG>;<CAP>;IGNORE # KHEI COPTE +<U03E7> <KAPPA>;<LIG>;<AMI>;IGNORE # khei copte +<U039B> <LAMBDA>;<BAS>;<CAP>;IGNORE +<U03BB> <LAMBDA>;<BAS>;<CAP>;IGNORE +<U039C> <MU>;<BAS>;<CAP>;IGNORE +<U03BC> <MU>;<BAS>;<AMI>;IGNORE +<U039D> <NU>;<BAS>;<CAP>;IGNORE +<U03BD> <NU>;<BAS>;<AMI>;IGNORE +<U039E> <XI>;<BAS>;<CAP>;IGNORE +<U03BE> <XI>;<BAS>;<AMI>;IGNORE +<U039F> <OMICRON>;<BAS>;<CAP>;IGNORE +<U03BF> <OMICRON>;<BAS>;<AMI>;IGNORE +<U038C> <OMICRON>;<TNS>;<CAP>;IGNORE +<U03CC> <OMICRON>;<TNS>;<AMI>;IGNORE +<U03A0> <PI>;<BAS>;<CAP>;IGNORE +<U03C0> <PI>;<BAS>;<AMI>;IGNORE +<U03D6> <PI>;<PCL>;<AMI>;IGNORE +<U03A1> <RHO>;<BAS>;<CAP>;IGNORE +<U03C1> <RHO>;<BAS>;<CAP>;IGNORE +<U03F1> <RHO>;<PCL>;<AMI>;IGNORE +<U03A3> <SIGMA>;<BAS>;<CAP>;IGNORE +<U03C3> <SIGMA>;<BAS>;<AMI>;IGNORE +<U03C2> <SIGMA>;<PCL>;<AMI>;IGNORE +<U03DA> <SIGMA>;<PCL>;<CAP>;IGNORE # STIGMA ARCH. +<U03EC> <SIGMA>;<LIG>;<CAP>;IGNORE # SHIMA COPTE +<U03ED> <SIGMA>;<LIG>;<AMI>;IGNORE # shima copte +<U03F2> <SIGMA>;<OGO>;<AMI>;IGNORE +<U03A4> <TAU>;<BAS>;<CAP>;IGNORE +<U03C4> <TAU>;<BAS>;<AMI>;IGNORE +<U03EE> <TAU>;<PCL>;<CAP>;IGNORE # DEI COPTE +<U03EF> <TAU>;<PCL>;<AMI>;IGNORE # dei copte +<U03A5> <UPSILON>;<BAS>;<CAP>;IGNORE +<U03C5> <UPSILON>;<BAS>;<AMI>;IGNORE +<U038E> <UPSILON>;<TNS>;<CAP>;IGNORE +<U03CD> <UPSILON>;<TNS>;<AMI>;IGNORE +<U03AB> <UPSILON>;<DLT>;<CAP>;IGNORE +<U03CB> <UPSILON>;<DLT>;<AMI>;IGNORE +<U03B0> <UPSILON>;<DTT>;<AMI>;IGNORE +<U03D4> <UPSILON>;<DTT>;<CAP>;IGNORE +<U03D2> <UPSILON>;<OGO>;<CAP>;IGNORE +<U03D3> <UPSILON>;<MAC>;<CAP>;IGNORE +<U03A6> <PHI>;<BAS>;<CAP>;IGNORE +<U03C6> <PHI>;<BAS>;<AMI>;IGNORE +<U03D5> <PHI>;<PCL>;<AMI>;IGNORE +<U03E4> <PHI>;<LIG>;<CAP>;IGNORE # FEI COPTE +<U03E5> <PHI>;<LIG>;<AMI>;IGNORE # fei copte +<U03A7> <KHI>;<BAS>;<CAP>;IGNORE +<U03C7> <KHI>;<BAS>;<AMI>;IGNORE +<U03E0> <KHI>;<PCL>;<CAP>;IGNORE # sampi copte +<U03A8> <PSI>;<BAS>;<CAP>;IGNORE +<U03C8> <PSI>;<BAS>;<AMI>;IGNORE +<U03E2> <PSI>;<PCL>;<CAP>;IGNORE # SHEI COPTE +<U03E3> <PSI>;<PCL>;<AMI>;IGNORE # shei copte +<U03A9> <OMEGA>;<BAS>;<CAP>;IGNORE +<U03C9> <OMEGA>;<BAS>;<AMI>;IGNORE +<U038F> <OMEGA>;<TNS>;<CAP>;IGNORE +<U03CE> <OMEGA>;<TNS>;<AMI>;IGNORE + +order_start <CYRIL>;forward;forward;forward;forward,position +<U0430> <CYR-A>;<BAS>;<MIN>;IGNORE +<U0410> <CYR-A>;<BAS>;<CAP>;IGNORE +<U0431> <CYR-BE>;<BAS>;<MIN>;IGNORE +<U0411> <CYR-BE>;<BAS>;<CAP>;IGNORE +<U0432> <CYR-VE>;<BAS>;<MIN>;IGNORE +<U0412> <CYR-VE>;<BAS>;<CAP>;IGNORE +<U0433> <CYR-GHE>;<BAS>;<MIN>;IGNORE +<U0413> <CYR-GHE>;<BAS>;<CAP>;IGNORE +<U0434> <CYR-DE>;<BAS>;<MIN>;IGNORE +<U0414> <CYR-DE>;<BAS>;<CAP>;IGNORE +<U0453> <CYR-GZHE>;<BAS>;<MIN>;IGNORE +<U0403> <CYR-GZHE>;<BAS>;<CAP>;IGNORE +<U0452> <CYR-DJE>;<BAS>;<MIN>;IGNORE +<U0402> <CYR-DJE>;<BAS>;<CAP>;IGNORE +<U0435> <CYR-IE>;<BAS>;<MIN>;IGNORE +<U0415> <CYR-IE>;<BAS>;<CAP>;IGNORE +<U0454> <UKR-IE>;<BAS>;<MIN>;IGNORE +<U0404> <UKR-IE>;<BAS>;<CAP>;IGNORE +<U0451> <CYR-IO>;<BAS>;<MIN>;IGNORE +<U0401> <CYR-IO>;<BAS>;<CAP>;IGNORE +<U0436> <CYR-ZHE>;<BAS>;<MIN>;IGNORE +<U0416> <CYR-ZHE>;<BAS>;<CAP>;IGNORE +<U0437> <CYR-ZE>;<BAS>;<MIN>;IGNORE +<U0417> <CYR-ZE>;<BAS>;<CAP>;IGNORE +<U0455> <CYR-DZE>;<BAS>;<MIN>;IGNORE +<U0405> <CYR-DZE>;<BAS>;<CAP>;IGNORE +<U0438> <CYR-I>;<BAS>;<MIN>;IGNORE +<U0418> <CYR-I>;<BAS>;<CAP>;IGNORE +<U0456> <UKR-I>;<BAS>;<MIN>;IGNORE +<U0406> <UKR-I>;<BAS>;<MIN>;IGNORE +<U0457> <UKR-YI>;<BAS>;<MIN>;IGNORE +<U0407> <UKR-YI>;<BAS>;<CAP>;IGNORE +<U0439> <CYR-IBRE>;<BAS>;<MIN>;IGNORE +<U0419> <CYR-IBRE>;<BAS>;<CAP>;IGNORE +<U0458> <CYR-JE>;<BAS>;<MIN>;IGNORE +<U0408> <CYR-JE>;<BAS>;<CAP>;IGNORE +<U043A> <CYR-KA>;<BAS>;<MIN>;IGNORE +<U041A> <CYR-KA>;<BAS>;<CAP>;IGNORE +<U043B> <CYR-EL>;<BAS>;<MIN>;IGNORE +<U041B> <CYR-EL>;<BAS>;<CAP>;IGNORE +<U0459> <CYR-LJE>;<BAS>;<MIN>;IGNORE +<U0409> <CYR-LJE>;<BAS>;<CAP>;IGNORE +<U043C> <CYR-EM>;<BAS>;<MIN>;IGNORE +<U041C> <CYR-EM>;<BAS>;<CAP>;IGNORE +<U043D> <CYR-EN>;<BAS>;<MIN>;IGNORE +<U041D> <CYR-EN>;<BAS>;<CAP>;IGNORE +<U045A> <CYR-NJE>;<BAS>;<MIN>;IGNORE +<U040A> <CYR-NJE>;<BAS>;<CAP>;IGNORE +<U043E> <CYR-O>;<BAS>;<MIN>;IGNORE +<U041E> <CYR-O>;<BAS>;<CAP>;IGNORE +<U043F> <CYR-PE>;<BAS>;<MIN>;IGNORE +<U041F> <CYR-PE>;<BAS>;<CAP>;IGNORE +<U0440> <CYR-ER>;<BAS>;<MIN>;IGNORE +<U0420> <CYR-ER>;<BAS>;<CAP>;IGNORE +<U0441> <CYR-ES>;<BAS>;<MIN>;IGNORE +<U0421> <CYR-ES>;<BAS>;<CAP>;IGNORE +<U0442> <CYR-TE>;<BAS>;<MIN>;IGNORE +<U0422> <CYR-TE>;<BAS>;<CAP>;IGNORE +<U045C> <CYR-KJE>;<BAS>;<MIN>;IGNORE +<U040C> <CYR-KJE>;<BAS>;<CAP>;IGNORE +<U045B> <CYR-TSHE>;<BAS>;<MIN>;IGNORE +<U040B> <CYR-TSHE>;<BAS>;<CAP>;IGNORE +<U0443> <CYR-OU>;<BAS>;<MIN>;IGNORE +<U0423> <CYR-OU>;<BAS>;<CAP>;IGNORE +<U045E> <CYR-OUBRE>;<BAS>;<MIN>;IGNORE +<U040E> <CYR-OUBRE>;<BAS>;<CAP>;IGNORE +<U0444> <CYR-EF>;<BAS>;<MIN>;IGNORE +<U0424> <CYR-EF>;<BAS>;<CAP>;IGNORE +<U0445> <CYR-HA>;<BAS>;<MIN>;IGNORE +<U0425> <CYR-HA>;<BAS>;<CAP>;IGNORE +<U0446> <CYR-TSE>;<BAS>;<MIN>;IGNORE +<U0426> <CYR-TSE>;<BAS>;<CAP>;IGNORE +<U0447> <CYR-TSHE>;<BAS>;<MIN>;IGNORE +<U0427> <CYR-TSHE>;<BAS>;<CAP>;IGNORE +<U045F> <CYR-DCHE>;<BAS>;<MIN>;IGNORE +<U040F> <CYR-DCHE>;<BAS>;<CAP>;IGNORE +<U0448> <CYR-SHA>;<BAS>;<MIN>;IGNORE +<U0428> <CYR-SHA>;<BAS>;<CAP>;IGNORE +<U0449> <CYR-SHTSHA>;<BAS>;<MIN>;IGNORE +<U0429> <CYR-SHTSHA>;<BAS>;<CAP>;IGNORE +<U044A> <CYR-SIGDUR>;<BAS>;<MIN>;IGNORE +<U042A> <CYR-SIGDUR>;<BAS>;<CAP>;IGNORE +<U044B> <CYR-YEROU>;<BAS>;<MIN>;IGNORE +<U042B> <CYR-YEROU>;<BAS>;<CAP>;IGNORE +<U044C> <CYR-SIGMOUIL>;<BAS>;<MIN>;IGNORE +<U042C> <CYR-SIGMOUIL>;<BAS>;<CAP>;IGNORE +<U044D> <CYR-E>;<BAS>;<MIN>;IGNORE +<U042D> <CYR-E>;<BAS>;<CAP>;IGNORE +<U044E> <CYR-YOU>;<BAS>;<MIN>;IGNORE +<U042E> <CYR-YOU>;<BAS>;<CAP>;IGNORE +<U044F> <CYR-YA>;<BAS>;<MIN>;IGNORE +<U042F> <CYR-YA>;<BAS>;<CAP>;IGNORE + +order_start <HAN>;forward;forward;forward;forward,position +<U4E00>......<U9FA5> <U4E00>......<U9FA5>;IGNORE;IGNORE;IGNORE +# +order_end +# +END LC_COLLATE diff --git a/posix/fnmatch.c b/posix/fnmatch.c index 18abf5da27..c4b11080fe 100644 --- a/posix/fnmatch.c +++ b/posix/fnmatch.c @@ -48,6 +48,15 @@ # include <wctype.h> #endif +/* We need some of the locale data (the collation sequence information) + but there is no interface to get this information in general. Therefore + we support a correct implementation only in glibc. */ +#ifdef _LIBC +# include "../locale/localeinfo.h" + +# define CONCAT(a,b) __CONCAT(a,b) +#endif + /* Comment out all this code if we are using the GNU C Library, and are not actually compiling the library itself. This code is part of the GNU C Library, but also included in many other GNU distributions. Compiling @@ -192,6 +201,7 @@ __wcschrnul (s, c) # define STRCHR(S, C) strchr (S, C) # define STRCHRNUL(S, C) __strchrnul (S, C) # define STRCOLL(S1, S2) strcoll (S1, S2) +# define SUFFIX MB # include "fnmatch_loop.c" @@ -209,7 +219,10 @@ __wcschrnul (s, c) # define BTOWC(C) (C) # define STRCHR(S, C) wcschr (S, C) # define STRCHRNUL(S, C) __wcschrnul (S, C) -# define STRCOLL(S1, S2) wcscoll (S1, S2) +# define STRCOLL(S1, S2) wcscoll (S1, S2) +# define SUFFIX WC +# define WIDE_CHAR_VERSION 1 + # undef IS_CHAR_CLASS # ifdef _LIBC diff --git a/posix/fnmatch_loop.c b/posix/fnmatch_loop.c index 5f6c05710e..831bd0631a 100644 --- a/posix/fnmatch_loop.c +++ b/posix/fnmatch_loop.c @@ -31,6 +31,16 @@ FCT (pattern, string, no_leading_period, flags) { register const CHAR *p = pattern, *n = string; register UCHAR c; +#ifdef _LIBC + const UCHAR *collseq = (const UCHAR *) + _NL_CURRENT(LC_COLLATE, CONCAT(_NL_COLLATE_COLLSEQ,SUFFIX)); +# ifdef WIDE_CHAR_VERSION + const wint_t *names = (const wint_t *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_NAMES); + size_t size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_HASH_SIZE); + size_t layers = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_HASH_LAYERS); +# endif +#endif while ((c = *p++) != L('\0')) { @@ -210,9 +220,9 @@ FCT (pattern, string, no_leading_period, flags) /* Leave room for the null. */ CHAR str[CHAR_CLASS_MAX_LENGTH + 1]; size_t c1 = 0; -# if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H) +#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H) wctype_t wt; -# endif +#endif const CHAR *startp = p; for (;;) @@ -240,7 +250,7 @@ FCT (pattern, string, no_leading_period, flags) } str[c1] = L('\0'); -# if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H) +#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H) wt = IS_CHAR_CLASS (str); if (wt == 0) /* Invalid character class name. */ @@ -248,7 +258,7 @@ FCT (pattern, string, no_leading_period, flags) if (ISWCTYPE (BTOWC ((UCHAR) *n), wt)) goto matched; -# else +#else if ((STREQ (str, L("alnum")) && ISALNUM ((UCHAR) *n)) || (STREQ (str, L("alpha")) && ISALPHA ((UCHAR) *n)) || (STREQ (str, L("blank")) && ISBLANK ((UCHAR) *n)) @@ -262,7 +272,7 @@ FCT (pattern, string, no_leading_period, flags) || (STREQ (str, L("upper")) && ISUPPER ((UCHAR) *n)) || (STREQ (str, L("xdigit")) && ISXDIGIT ((UCHAR) *n))) goto matched; -# endif +#endif } else if (c == L('\0')) /* [ (unterminated) loses. */ @@ -279,27 +289,117 @@ FCT (pattern, string, no_leading_period, flags) if (c == L('-') && *p != L(']')) { - /* It is a range. */ - CHAR lo[2]; - CHAR fc[2]; +#if _LIBC + /* We have to find the collation sequence + value for C. Collation sequence is nothing + we can regularly access. The sequence + value is defined by the order in which the + definitions of the collation values for the + various characters appear in the source + file. A strange concept, nowhere + documented. */ + int32_t fseqidx; + int32_t lseqidx; UCHAR cend = *p++; +# ifdef WIDE_CHAR_VERSION + size_t cnt; +# endif + if (!(flags & FNM_NOESCAPE) && cend == L('\\')) cend = *p++; if (cend == L('\0')) return FNM_NOMATCH; - lo[0] = cold; - lo[1] = L('\0'); - fc[0] = fn; - fc[1] = L('\0'); - if (STRCOLL (lo, fc) <= 0) +# ifdef WIDE_CHAR_VERSION + /* Search in the `names' array for the characters. */ + fseqidx = fn % size; + cnt = 0; + while (names[fseqidx] != fn) { - CHAR hi[2]; - hi[0] = FOLD (cend); - hi[1] = L('\0'); - if (STRCOLL (fc, hi) <= 0) + if (++cnt == layers) + /* XXX We don't know anything about + the character we are supposed to + match. This means we are failing. */ + goto range_not_matched; + + fseqidx += size; + } + lseqidx = cold % size; + cnt = 0; + while (names[lseqidx] != cold) + { + if (++cnt == layers) + { + lseqidx = -1; + break; + } + lseqidx += size; + } +# else + fseqidx = fn; + lseqidx = cold; +# endif + + /* XXX It is not entirely clear to me how to handle + characters which are not mentioned in the + collation specification. */ + if ( +# ifdef WIDE_CHAR_VERSION + lseqidx == -1 || +# endif + collseq[lseqidx] <= collseq[fseqidx]) + { + /* We have to look at the upper bound. */ + int32_t hseqidx; + + cend = FOLD (cend); +# ifdef WIDE_CHAR_VERSION + hseqidx = cend % size; + cnt = 0; + while (names[hseqidx] != cend) + { + if (++cnt == layers) + { + /* Hum, no information about the upper + bound. The matching succeeds if the + lower bound is matched exactly. */ + if (lseqidx == -1 || cold != fn) + goto range_not_matched; + + goto matched; + } + } +# else + hseqidx = cend; +# endif + + if ( +# ifdef WIDE_CHAR_VERSION + (lseqidx == -1 + && collseq[fseqidx] == collseq[hseqidx]) || +# endif + collseq[fseqidx] <= collseq[hseqidx]) goto matched; } +# ifdef WIDE_CHAR_VERSION + range_not_matched: +# endif +#else + /* We use a boring value comparison of the character + values. This is better than comparing using + `strcoll' since the latter would have surprising + and sometimes fatal consequences. */ + UCHAR cend = *p++; + + if (!(flags & FNM_NOESCAPE) && cend == L('\\')) + cend = *p++; + if (cend == L('\0')) + return FNM_NOMATCH; + + /* It is a range. */ + if (cold <= fc && fc <= c) + goto matched; +#endif c = *p++; } @@ -371,3 +471,4 @@ FCT (pattern, string, no_leading_period, flags) #undef STRCOLL #undef L #undef BTOWC +#undef SUFFIX |