From dd8e8e547647bf7a3f6feb816a848a846feeaf14 Mon Sep 17 00:00:00 2001 From: Carlos O'Donell Date: Wed, 9 Dec 2015 22:27:41 -0500 Subject: Update transliteration support to Unicode 7.0.0. The transliteration files are now autogenerated from upstream Unicode data. --- localedata/locales/translit_combining | 636 +++++++++++++++++++++++++++++++++- 1 file changed, 618 insertions(+), 18 deletions(-) (limited to 'localedata/locales/translit_combining') diff --git a/localedata/locales/translit_combining b/localedata/locales/translit_combining index 44c62f9d09..67497ec32d 100644 --- a/localedata/locales/translit_combining +++ b/localedata/locales/translit_combining @@ -3,7 +3,7 @@ comment_char % % Transliterations that remove all combining characters (accents, % pronounciation marks, etc.). -% Generated from UnicodeData.txt. +% Generated automatically from UnicodeData.txt by gen_translit_combining.py on 2015-12-09 for Unicode 7.0.0. LC_CTYPE @@ -167,6 +167,40 @@ translit_start "" % COMBINING UPWARDS ARROW BELOW "" +% COMBINING GRAPHEME JOINER + "" +% COMBINING RIGHT ARROWHEAD ABOVE + "" +% COMBINING LEFT HALF RING ABOVE + "" +% COMBINING FERMATA + "" +% COMBINING X BELOW + "" +% COMBINING LEFT ARROWHEAD BELOW + "" +% COMBINING RIGHT ARROWHEAD BELOW + "" +% COMBINING RIGHT ARROWHEAD AND UP ARROWHEAD BELOW + "" +% COMBINING RIGHT HALF RING ABOVE + "" +% COMBINING DOT ABOVE RIGHT + "" +% COMBINING ASTERISK BELOW + "" +% COMBINING DOUBLE RING BELOW + "" +% COMBINING ZIGZAG ABOVE + "" +% COMBINING DOUBLE BREVE BELOW + "" +% COMBINING DOUBLE BREVE + "" +% COMBINING DOUBLE MACRON + "" +% COMBINING DOUBLE MACRON BELOW + "" % COMBINING DOUBLE TILDE "" % COMBINING DOUBLE INVERTED BREVE @@ -199,6 +233,68 @@ translit_start "" % COMBINING LATIN SMALL LETTER X "" +% HEBREW ACCENT ETNAHTA + "" +% HEBREW ACCENT SEGOL + "" +% HEBREW ACCENT SHALSHELET + "" +% HEBREW ACCENT ZAQEF QATAN + "" +% HEBREW ACCENT ZAQEF GADOL + "" +% HEBREW ACCENT TIPEHA + "" +% HEBREW ACCENT REVIA + "" +% HEBREW ACCENT ZARQA + "" +% HEBREW ACCENT PASHTA + "" +% HEBREW ACCENT YETIV + "" +% HEBREW ACCENT TEVIR + "" +% HEBREW ACCENT GERESH + "" +% HEBREW ACCENT GERESH MUQDAM + "" +% HEBREW ACCENT GERSHAYIM + "" +% HEBREW ACCENT QARNEY PARA + "" +% HEBREW ACCENT TELISHA GEDOLA + "" +% HEBREW ACCENT PAZER + "" +% HEBREW ACCENT ATNAH HAFUKH + "" +% HEBREW ACCENT MUNAH + "" +% HEBREW ACCENT MAHAPAKH + "" +% HEBREW ACCENT MERKHA + "" +% HEBREW ACCENT MERKHA KEFULA + "" +% HEBREW ACCENT DARGA + "" +% HEBREW ACCENT QADMA + "" +% HEBREW ACCENT TELISHA QETANA + "" +% HEBREW ACCENT YERAH BEN YOMO + "" +% HEBREW ACCENT OLE + "" +% HEBREW ACCENT ILUY + "" +% HEBREW ACCENT DEHI + "" +% HEBREW ACCENT ZINOR + "" +% HEBREW MARK MASORA CIRCLE + "" % HEBREW POINT SHEVA "" % HEBREW POINT HATAF SEGOL @@ -219,6 +315,8 @@ translit_start "" % HEBREW POINT HOLAM "" +% HEBREW POINT HOLAM HASER FOR VAV + "" % HEBREW POINT QUBUTS "" % HEBREW POINT DAGESH OR MAPIQ @@ -231,12 +329,358 @@ translit_start "" % HEBREW POINT SIN DOT "" +% HEBREW MARK UPPER DOT + "" +% HEBREW MARK LOWER DOT + "" +% HEBREW POINT QAMATS QATAN + "" +% ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM + "" +% ARABIC SIGN ALAYHE ASSALLAM + "" +% ARABIC SIGN RAHMATULLAH ALAYHE + "" +% ARABIC SIGN RADI ALLAHOU ANHU + "" +% ARABIC SIGN TAKHALLUS + "" +% ARABIC SMALL HIGH TAH + "" +% ARABIC SMALL HIGH LIGATURE ALEF WITH LAM WITH YEH + "" +% ARABIC SMALL HIGH ZAIN + "" +% ARABIC SMALL FATHA + "" +% ARABIC SMALL DAMMA + "" +% ARABIC SMALL KASRA + "" +% ARABIC FATHATAN + "" +% ARABIC DAMMATAN + "" +% ARABIC KASRATAN + "" +% ARABIC FATHA + "" +% ARABIC DAMMA + "" +% ARABIC KASRA + "" +% ARABIC SHADDA + "" +% ARABIC SUKUN + "" % ARABIC MADDAH ABOVE "" % ARABIC HAMZA ABOVE "" % ARABIC HAMZA BELOW "" +% ARABIC SUBSCRIPT ALEF + "" +% ARABIC INVERTED DAMMA + "" +% ARABIC MARK NOON GHUNNA + "" +% ARABIC ZWARAKAY + "" +% ARABIC VOWEL SIGN SMALL V ABOVE + "" +% ARABIC VOWEL SIGN INVERTED SMALL V ABOVE + "" +% ARABIC VOWEL SIGN DOT BELOW + "" +% ARABIC REVERSED DAMMA + "" +% ARABIC FATHA WITH TWO DOTS + "" +% ARABIC WAVY HAMZA BELOW + "" +% ARABIC LETTER SUPERSCRIPT ALEF + "" +% ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA + "" +% ARABIC SMALL HIGH LIGATURE QAF WITH LAM WITH ALEF MAKSURA + "" +% ARABIC SMALL HIGH MEEM INITIAL FORM + "" +% ARABIC SMALL HIGH LAM ALEF + "" +% ARABIC SMALL HIGH JEEM + "" +% ARABIC SMALL HIGH THREE DOTS + "" +% ARABIC SMALL HIGH SEEN + "" +% ARABIC SMALL HIGH ROUNDED ZERO + "" +% ARABIC SMALL HIGH UPRIGHT RECTANGULAR ZERO + "" +% ARABIC SMALL HIGH DOTLESS HEAD OF KHAH + "" +% ARABIC SMALL HIGH MEEM ISOLATED FORM + "" +% ARABIC SMALL LOW SEEN + "" +% ARABIC SMALL HIGH MADDA + "" +% ARABIC SMALL HIGH YEH + "" +% ARABIC SMALL HIGH NOON + "" +% ARABIC EMPTY CENTRE LOW STOP + "" +% ARABIC EMPTY CENTRE HIGH STOP + "" +% ARABIC ROUNDED HIGH STOP WITH FILLED CENTRE + "" +% ARABIC SMALL LOW MEEM + "" +% ARABIC CURLY FATHA + "" +% ARABIC CURLY DAMMA + "" +% ARABIC CURLY KASRA + "" +% ARABIC CURLY FATHATAN + "" +% ARABIC CURLY DAMMATAN + "" +% ARABIC CURLY KASRATAN + "" +% ARABIC TONE ONE DOT ABOVE + "" +% ARABIC TONE TWO DOTS ABOVE + "" +% ARABIC TONE LOOP ABOVE + "" +% ARABIC TONE ONE DOT BELOW + "" +% ARABIC TONE TWO DOTS BELOW + "" +% ARABIC TONE LOOP BELOW + "" +% ARABIC OPEN FATHATAN + "" +% ARABIC OPEN DAMMATAN + "" +% ARABIC OPEN KASRATAN + "" +% ARABIC SMALL HIGH WAW + "" +% ARABIC FATHA WITH RING + "" +% ARABIC FATHA WITH DOT ABOVE + "" +% ARABIC KASRA WITH DOT BELOW + "" +% ARABIC LEFT ARROWHEAD ABOVE + "" +% ARABIC RIGHT ARROWHEAD ABOVE + "" +% ARABIC LEFT ARROWHEAD BELOW + "" +% ARABIC RIGHT ARROWHEAD BELOW + "" +% ARABIC DOUBLE RIGHT ARROWHEAD ABOVE + "" +% ARABIC DOUBLE RIGHT ARROWHEAD ABOVE WITH DOT + "" +% ARABIC RIGHT ARROWHEAD ABOVE WITH DOT + "" +% ARABIC DAMMA WITH DOT + "" +% ARABIC MARK SIDEWAYS NOON GHUNNA + "" +% COMBINING DOUBLED CIRCUMFLEX ACCENT + "" +% COMBINING DIAERESIS-RING + "" +% COMBINING INFINITY + "" +% COMBINING DOWNWARDS ARROW + "" +% COMBINING TRIPLE DOT + "" +% COMBINING X-X BELOW + "" +% COMBINING WIGGLY LINE BELOW + "" +% COMBINING OPEN MARK BELOW + "" +% COMBINING DOUBLE OPEN MARK BELOW + "" +% COMBINING LIGHT CENTRALIZATION STROKE BELOW + "" +% COMBINING STRONG CENTRALIZATION STROKE BELOW + "" +% COMBINING PARENTHESES ABOVE + "" +% COMBINING DOUBLE PARENTHESES ABOVE + "" +% COMBINING PARENTHESES BELOW + "" +% COMBINING PARENTHESES OVERLAY + "" +% COMBINING DOTTED GRAVE ACCENT + "" +% COMBINING DOTTED ACUTE ACCENT + "" +% COMBINING SNAKE BELOW + "" +% COMBINING SUSPENSION MARK + "" +% COMBINING MACRON-ACUTE + "" +% COMBINING GRAVE-MACRON + "" +% COMBINING MACRON-GRAVE + "" +% COMBINING ACUTE-MACRON + "" +% COMBINING GRAVE-ACUTE-GRAVE + "" +% COMBINING ACUTE-GRAVE-ACUTE + "" +% COMBINING LATIN SMALL LETTER R BELOW + "" +% COMBINING BREVE-MACRON + "" +% COMBINING MACRON-BREVE + "" +% COMBINING DOUBLE CIRCUMFLEX ABOVE + "" +% COMBINING OGONEK ABOVE + "" +% COMBINING ZIGZAG BELOW + "" +% COMBINING IS BELOW + "" +% COMBINING UR ABOVE + "" +% COMBINING US ABOVE + "" +% COMBINING LATIN SMALL LETTER FLATTENED OPEN A ABOVE + "" +% COMBINING LATIN SMALL LETTER AE + "" +% COMBINING LATIN SMALL LETTER AO + "" +% COMBINING LATIN SMALL LETTER AV + "" +% COMBINING LATIN SMALL LETTER C CEDILLA + "" +% COMBINING LATIN SMALL LETTER INSULAR D + "" +% COMBINING LATIN SMALL LETTER ETH + "" +% COMBINING LATIN SMALL LETTER G + "" +% COMBINING LATIN LETTER SMALL CAPITAL G + "" +% COMBINING LATIN SMALL LETTER K + "" +% COMBINING LATIN SMALL LETTER L + "" +% COMBINING LATIN LETTER SMALL CAPITAL L + "" +% COMBINING LATIN LETTER SMALL CAPITAL M + "" +% COMBINING LATIN SMALL LETTER N + "" +% COMBINING LATIN LETTER SMALL CAPITAL N + "" +% COMBINING LATIN LETTER SMALL CAPITAL R + "" +% COMBINING LATIN SMALL LETTER R ROTUNDA + "" +% COMBINING LATIN SMALL LETTER S + "" +% COMBINING LATIN SMALL LETTER LONG S + "" +% COMBINING LATIN SMALL LETTER Z + "" +% COMBINING LATIN SMALL LETTER ALPHA + "" +% COMBINING LATIN SMALL LETTER B + "" +% COMBINING LATIN SMALL LETTER BETA + "" +% COMBINING LATIN SMALL LETTER SCHWA + "" +% COMBINING LATIN SMALL LETTER F + "" +% COMBINING LATIN SMALL LETTER L WITH DOUBLE MIDDLE TILDE + "" +% COMBINING LATIN SMALL LETTER O WITH LIGHT CENTRALIZATION STROKE + "" +% COMBINING LATIN SMALL LETTER P + "" +% COMBINING LATIN SMALL LETTER ESH + "" +% COMBINING LATIN SMALL LETTER U WITH LIGHT CENTRALIZATION STROKE + "" +% COMBINING LATIN SMALL LETTER W + "" +% COMBINING LATIN SMALL LETTER A WITH DIAERESIS + "" +% COMBINING LATIN SMALL LETTER O WITH DIAERESIS + "" +% COMBINING LATIN SMALL LETTER U WITH DIAERESIS + "" +% COMBINING UP TACK ABOVE + "" +% COMBINING DOUBLE INVERTED BREVE BELOW + "" +% COMBINING ALMOST EQUAL TO BELOW + "" +% COMBINING LEFT ARROWHEAD ABOVE + "" +% COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW + "" +% COMBINING LEFT HARPOON ABOVE + "" +% COMBINING RIGHT HARPOON ABOVE + "" +% COMBINING LONG VERTICAL LINE OVERLAY + "" +% COMBINING SHORT VERTICAL LINE OVERLAY + "" +% COMBINING ANTICLOCKWISE ARROW ABOVE + "" +% COMBINING CLOCKWISE ARROW ABOVE + "" +% COMBINING LEFT ARROW ABOVE + "" +% COMBINING RIGHT ARROW ABOVE + "" +% COMBINING RING OVERLAY + "" +% COMBINING CLOCKWISE RING OVERLAY + "" +% COMBINING ANTICLOCKWISE RING OVERLAY + "" +% COMBINING THREE DOTS ABOVE + "" +% COMBINING FOUR DOTS ABOVE + "" +% COMBINING ENCLOSING CIRCLE + "" +% COMBINING ENCLOSING SQUARE + "" +% COMBINING ENCLOSING DIAMOND + "" +% COMBINING ENCLOSING CIRCLE BACKSLASH + "" +% COMBINING LEFT RIGHT ARROW ABOVE + "" +% COMBINING ENCLOSING SCREEN + "" +% COMBINING ENCLOSING KEYCAP + "" % COMBINING ENCLOSING UPWARD POINTING TRIANGLE "" % COMBINING REVERSE SOLIDUS OVERLAY @@ -251,10 +695,70 @@ translit_start "" % COMBINING LEFTWARDS ARROW OVERLAY "" +% COMBINING LONG DOUBLE SOLIDUS OVERLAY + "" +% COMBINING RIGHTWARDS HARPOON WITH BARB DOWNWARDS + "" +% COMBINING LEFTWARDS HARPOON WITH BARB DOWNWARDS + "" +% COMBINING LEFT ARROW BELOW + "" +% COMBINING RIGHT ARROW BELOW + "" +% COMBINING ASTERISK ABOVE + "" % COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK "" % COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK "" +% HEBREW POINT JUDEO-SPANISH VARIKA + "" +% COMBINING LIGATURE LEFT HALF + "" +% COMBINING LIGATURE RIGHT HALF + "" +% COMBINING DOUBLE TILDE LEFT HALF + "" +% COMBINING DOUBLE TILDE RIGHT HALF + "" +% COMBINING MACRON LEFT HALF + "" +% COMBINING MACRON RIGHT HALF + "" +% COMBINING CONJOINING MACRON + "" +% COMBINING LIGATURE LEFT HALF BELOW + "" +% COMBINING LIGATURE RIGHT HALF BELOW + "" +% COMBINING TILDE LEFT HALF BELOW + "" +% COMBINING TILDE RIGHT HALF BELOW + "" +% COMBINING MACRON LEFT HALF BELOW + "" +% COMBINING MACRON RIGHT HALF BELOW + "" +% COMBINING CONJOINING MACRON BELOW + "" +% PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE + "" +% COMBINING OLD PERMIC LETTER AN + "" +% COMBINING OLD PERMIC LETTER DOI + "" +% COMBINING OLD PERMIC LETTER ZATA + "" +% COMBINING OLD PERMIC LETTER NENOE + "" +% COMBINING OLD PERMIC LETTER SII + "" +% COMBINING GREEK MUSICAL TRISEME + "" +% COMBINING GREEK MUSICAL TETRASEME + "" +% COMBINING GREEK MUSICAL PENTASEME + "" % LATIN CAPITAL LETTER A WITH GRAVE @@ -268,6 +772,8 @@ translit_start % LATIN CAPITAL LETTER A WITH RING ABOVE +% LATIN CAPITAL LETTER AE + "" % LATIN CAPITAL LETTER C WITH CEDILLA % LATIN CAPITAL LETTER E WITH GRAVE @@ -298,6 +804,8 @@ translit_start % LATIN CAPITAL LETTER O WITH DIAERESIS +% LATIN CAPITAL LETTER O WITH STROKE + % LATIN CAPITAL LETTER U WITH GRAVE % LATIN CAPITAL LETTER U WITH ACUTE @@ -320,6 +828,8 @@ translit_start % LATIN SMALL LETTER A WITH RING ABOVE +% LATIN SMALL LETTER AE + "" % LATIN SMALL LETTER C WITH CEDILLA % LATIN SMALL LETTER E WITH GRAVE @@ -350,6 +860,8 @@ translit_start % LATIN SMALL LETTER O WITH DIAERESIS +% LATIN SMALL LETTER O WITH STROKE + % LATIN SMALL LETTER U WITH GRAVE % LATIN SMALL LETTER U WITH ACUTE @@ -472,10 +984,6 @@ translit_start % LATIN SMALL LETTER L WITH CARON -% LATIN CAPITAL LETTER L WITH STROKE - -% LATIN SMALL LETTER L WITH STROKE - % LATIN CAPITAL LETTER N WITH ACUTE % LATIN SMALL LETTER N WITH ACUTE @@ -673,9 +1181,9 @@ translit_start % LATIN SMALL LETTER AE WITH ACUTE ;"" % LATIN CAPITAL LETTER O WITH STROKE AND ACUTE - + ; % LATIN SMALL LETTER O WITH STROKE AND ACUTE - + ; % LATIN CAPITAL LETTER A WITH DOUBLE GRAVE % LATIN SMALL LETTER A WITH DOUBLE GRAVE @@ -764,14 +1272,6 @@ translit_start % LATIN SMALL LETTER Y WITH MACRON -% COMBINING GRAVE TONE MARK - -% COMBINING ACUTE TONE MARK - -% COMBINING GREEK KORONIS - -% COMBINING GREEK DIALYTIKA TONOS - % GREEK NUMERAL SIGN % GREEK QUESTION MARK @@ -928,6 +1428,8 @@ translit_start % CYRILLIC SMALL LETTER YERU WITH DIAERESIS +% HEBREW LIGATURE YIDDISH DOUBLE YOD + "" % ARABIC LETTER ALEF WITH MADDA ABOVE % ARABIC LETTER ALEF WITH HAMZA ABOVE @@ -1017,7 +1519,7 @@ translit_start % KANNADA VOWEL SIGN O "" % KANNADA VOWEL SIGN OO - "" + "" % MALAYALAM VOWEL SIGN O "" % MALAYALAM VOWEL SIGN OO @@ -1029,7 +1531,7 @@ translit_start % SINHALA VOWEL SIGN KOMBUVA HAA AELA-PILLA "" % SINHALA VOWEL SIGN KOMBUVA HAA DIGA AELA-PILLA - "" + "" % SINHALA VOWEL SIGN KOMBUVA HAA GAYANUKITTA "" % TIBETAN LETTER GHA @@ -2020,16 +2522,114 @@ translit_start ; % EM QUAD ; +% EN SPACE + +% EM SPACE + % OHM SIGN % KELVIN SIGN % ANGSTROM SIGN - + +% LEFTWARDS ARROW WITH STROKE + +% RIGHTWARDS ARROW WITH STROKE + +% LEFT RIGHT ARROW WITH STROKE + "" +% LEFTWARDS DOUBLE ARROW WITH STROKE + "" +% LEFT RIGHT DOUBLE ARROW WITH STROKE + "" +% RIGHTWARDS DOUBLE ARROW WITH STROKE + "" +% THERE DOES NOT EXIST + "" +% NOT AN ELEMENT OF + "" +% DOES NOT CONTAIN AS MEMBER + "" +% DOES NOT DIVIDE + "" +% NOT PARALLEL TO + "" +% NOT TILDE + "" +% NOT ASYMPTOTICALLY EQUAL TO + "" +% NEITHER APPROXIMATELY NOR ACTUALLY EQUAL TO + "" +% NOT ALMOST EQUAL TO + "" +% NOT EQUAL TO + "" +% NOT IDENTICAL TO + "" +% NOT EQUIVALENT TO + "" +% NOT LESS-THAN + "" +% NOT GREATER-THAN + "" +% NEITHER LESS-THAN NOR EQUAL TO + "" +% NEITHER GREATER-THAN NOR EQUAL TO + "" +% NEITHER LESS-THAN NOR EQUIVALENT TO + "" +% NEITHER GREATER-THAN NOR EQUIVALENT TO + "" +% NEITHER LESS-THAN NOR GREATER-THAN + "" +% NEITHER GREATER-THAN NOR LESS-THAN + "" +% DOES NOT PRECEDE + "" +% DOES NOT SUCCEED + "" +% NOT A SUBSET OF + "" +% NOT A SUPERSET OF + "" +% NEITHER A SUBSET OF NOR EQUAL TO + "" +% NEITHER A SUPERSET OF NOR EQUAL TO + "" +% DOES NOT PROVE + "" +% NOT TRUE + "" +% DOES NOT FORCE + "" +% NEGATED DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE + "" +% DOES NOT PRECEDE OR EQUAL + "" +% DOES NOT SUCCEED OR EQUAL + "" +% NOT SQUARE IMAGE OF OR EQUAL TO + "" +% NOT SQUARE ORIGINAL OF OR EQUAL TO + "" +% NOT NORMAL SUBGROUP OF + "" +% DOES NOT CONTAIN AS NORMAL SUBGROUP + "" +% NOT NORMAL SUBGROUP OF OR EQUAL TO + "" +% DOES NOT CONTAIN AS NORMAL SUBGROUP OR EQUAL + "" % LEFT-POINTING ANGLE BRACKET ; % RIGHT-POINTING ANGLE BRACKET ; +% FORKING + "" +% LEFT ANGLE BRACKET + +% RIGHT ANGLE BRACKET + % HIRAGANA LETTER GA % HIRAGANA LETTER GI -- cgit 1.4.1