From dd8e8e547647bf7a3f6feb816a848a846feeaf14 Mon Sep 17 00:00:00 2001 From: Carlos O'Donell Date: Wed, 9 Dec 2015 22:27:41 -0500 Subject: Update transliteration support to Unicode 7.0.0. The transliteration files are now autogenerated from upstream Unicode data. --- localedata/locales/translit_compat | 578 ++++++++++++++++++++++++++++++++++++- 1 file changed, 571 insertions(+), 7 deletions(-) (limited to 'localedata/locales/translit_compat') diff --git a/localedata/locales/translit_compat b/localedata/locales/translit_compat index bb9d660d05..bf8d19181b 100644 --- a/localedata/locales/translit_compat +++ b/localedata/locales/translit_compat @@ -2,18 +2,24 @@ escape_char / comment_char % % Transliterations of compatibility characters and ligatures. -% Generated through -% $ grep '^[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;' UnicodeData.txt | \ -% sed -e 's/^\([^;]*\);\([^;]*\);[^;]*;[^;]*;[^;]*; \([^;]*\);.*$/ ""% \2/' | grep -v '0020 03[0-6][0-9A-F]' | sed -e 'h' -e 's/^\([^%]*\)% .*$/\1/' -e 's/\([0-9A-F]\) \([0-9A-F]\)/\1> "" +% SUPERSCRIPT TWO + "" +% SUPERSCRIPT THREE + "" % MICRO SIGN - ""; + "";"" +% SUPERSCRIPT ONE + "" +% MASCULINE ORDINAL INDICATOR + "" % LATIN CAPITAL LIGATURE IJ "" % LATIN SMALL LIGATURE IJ @@ -54,6 +60,38 @@ translit_start "" % LATIN SMALL LETTER DZ "" +% MODIFIER LETTER SMALL H + "" +% MODIFIER LETTER SMALL H WITH HOOK + "" +% MODIFIER LETTER SMALL J + "" +% MODIFIER LETTER SMALL R + "" +% MODIFIER LETTER SMALL TURNED R + "" +% MODIFIER LETTER SMALL TURNED R WITH HOOK + "" +% MODIFIER LETTER SMALL CAPITAL INVERTED R + "" +% MODIFIER LETTER SMALL W + "" +% MODIFIER LETTER SMALL Y + "" +% MODIFIER LETTER APOSTROPHE + "" +% MODIFIER LETTER SMALL GAMMA + "" +% MODIFIER LETTER SMALL L + "" +% MODIFIER LETTER SMALL S + "" +% MODIFIER LETTER SMALL X + "" +% MODIFIER LETTER SMALL REVERSED GLOTTAL STOP + "" +% GREEK SMALL LETTER MU + "" % GREEK BETA SYMBOL "" % GREEK THETA SYMBOL @@ -74,6 +112,20 @@ translit_start "" % GREEK LUNATE EPSILON SYMBOL "" +% GREEK CAPITAL LUNATE SIGMA SYMBOL + "" +% CYRILLIC CAPITAL LIGATURE EN GHE + "" +% CYRILLIC SMALL LIGATURE EN GHE + "" +% CYRILLIC CAPITAL LIGATURE TE TSE + "" +% CYRILLIC SMALL LIGATURE TE TSE + "" +% CYRILLIC CAPITAL LIGATURE A IE + "" +% CYRILLIC SMALL LIGATURE A IE + "" % ARMENIAN SMALL LIGATURE ECH YIWN "" % HEBREW LIGATURE YIDDISH DOUBLE VAV @@ -102,6 +154,204 @@ translit_start "" % TIBETAN VOWEL SIGN VOCALIC LL "" +% MODIFIER LETTER GEORGIAN NAR + "" +% MODIFIER LETTER CAPITAL A + "" +% MODIFIER LETTER CAPITAL AE + "" +% MODIFIER LETTER CAPITAL B + "" +% MODIFIER LETTER CAPITAL D + "" +% MODIFIER LETTER CAPITAL E + "" +% MODIFIER LETTER CAPITAL REVERSED E + "" +% MODIFIER LETTER CAPITAL G + "" +% MODIFIER LETTER CAPITAL H + "" +% MODIFIER LETTER CAPITAL I + "" +% MODIFIER LETTER CAPITAL J + "" +% MODIFIER LETTER CAPITAL K + "" +% MODIFIER LETTER CAPITAL L + "" +% MODIFIER LETTER CAPITAL M + "" +% MODIFIER LETTER CAPITAL N + "" +% MODIFIER LETTER CAPITAL O + "" +% MODIFIER LETTER CAPITAL OU + "" +% MODIFIER LETTER CAPITAL P + "" +% MODIFIER LETTER CAPITAL R + "" +% MODIFIER LETTER CAPITAL T + "" +% MODIFIER LETTER CAPITAL U + "" +% MODIFIER LETTER CAPITAL W + "" +% MODIFIER LETTER SMALL A + "" +% MODIFIER LETTER SMALL TURNED A + "" +% MODIFIER LETTER SMALL ALPHA + "" +% MODIFIER LETTER SMALL TURNED AE + "" +% MODIFIER LETTER SMALL B + "" +% MODIFIER LETTER SMALL D + "" +% MODIFIER LETTER SMALL E + "" +% MODIFIER LETTER SMALL SCHWA + "" +% MODIFIER LETTER SMALL OPEN E + "" +% MODIFIER LETTER SMALL TURNED OPEN E + "" +% MODIFIER LETTER SMALL G + "" +% MODIFIER LETTER SMALL K + "" +% MODIFIER LETTER SMALL M + "" +% MODIFIER LETTER SMALL ENG + "" +% MODIFIER LETTER SMALL O + "" +% MODIFIER LETTER SMALL OPEN O + "" +% MODIFIER LETTER SMALL TOP HALF O + "" +% MODIFIER LETTER SMALL BOTTOM HALF O + "" +% MODIFIER LETTER SMALL P + "" +% MODIFIER LETTER SMALL T + "" +% MODIFIER LETTER SMALL U + "" +% MODIFIER LETTER SMALL SIDEWAYS U + "" +% MODIFIER LETTER SMALL TURNED M + "" +% MODIFIER LETTER SMALL V + "" +% MODIFIER LETTER SMALL AIN + "" +% MODIFIER LETTER SMALL BETA + "" +% MODIFIER LETTER SMALL GREEK GAMMA + "" +% MODIFIER LETTER SMALL DELTA + "" +% MODIFIER LETTER SMALL GREEK PHI + "" +% MODIFIER LETTER SMALL CHI + "" +% LATIN SUBSCRIPT SMALL LETTER I + "" +% LATIN SUBSCRIPT SMALL LETTER R + "" +% LATIN SUBSCRIPT SMALL LETTER U + "" +% LATIN SUBSCRIPT SMALL LETTER V + "" +% GREEK SUBSCRIPT SMALL LETTER BETA + "" +% GREEK SUBSCRIPT SMALL LETTER GAMMA + "" +% GREEK SUBSCRIPT SMALL LETTER RHO + "" +% GREEK SUBSCRIPT SMALL LETTER PHI + "" +% GREEK SUBSCRIPT SMALL LETTER CHI + "" +% MODIFIER LETTER CYRILLIC EN + "" +% MODIFIER LETTER SMALL TURNED ALPHA + "" +% MODIFIER LETTER SMALL C + "" +% MODIFIER LETTER SMALL C WITH CURL + "" +% MODIFIER LETTER SMALL ETH + "" +% MODIFIER LETTER SMALL REVERSED OPEN E + "" +% MODIFIER LETTER SMALL F + "" +% MODIFIER LETTER SMALL DOTLESS J WITH STROKE + "" +% MODIFIER LETTER SMALL SCRIPT G + "" +% MODIFIER LETTER SMALL TURNED H + "" +% MODIFIER LETTER SMALL I WITH STROKE + "" +% MODIFIER LETTER SMALL IOTA + "" +% MODIFIER LETTER SMALL CAPITAL I + "" +% MODIFIER LETTER SMALL CAPITAL I WITH STROKE + "" +% MODIFIER LETTER SMALL J WITH CROSSED-TAIL + "" +% MODIFIER LETTER SMALL L WITH RETROFLEX HOOK + "" +% MODIFIER LETTER SMALL L WITH PALATAL HOOK + "" +% MODIFIER LETTER SMALL CAPITAL L + "" +% MODIFIER LETTER SMALL M WITH HOOK + "" +% MODIFIER LETTER SMALL TURNED M WITH LONG LEG + "" +% MODIFIER LETTER SMALL N WITH LEFT HOOK + "" +% MODIFIER LETTER SMALL N WITH RETROFLEX HOOK + "" +% MODIFIER LETTER SMALL CAPITAL N + "" +% MODIFIER LETTER SMALL BARRED O + "" +% MODIFIER LETTER SMALL PHI + "" +% MODIFIER LETTER SMALL S WITH HOOK + "" +% MODIFIER LETTER SMALL ESH + "" +% MODIFIER LETTER SMALL T WITH PALATAL HOOK + "" +% MODIFIER LETTER SMALL U BAR + "" +% MODIFIER LETTER SMALL UPSILON + "" +% MODIFIER LETTER SMALL CAPITAL U + "" +% MODIFIER LETTER SMALL V WITH HOOK + "" +% MODIFIER LETTER SMALL TURNED V + "" +% MODIFIER LETTER SMALL Z + "" +% MODIFIER LETTER SMALL Z WITH RETROFLEX HOOK + "" +% MODIFIER LETTER SMALL Z WITH CURL + "" +% MODIFIER LETTER SMALL EZH + "" +% MODIFIER LETTER SMALL THETA + "" % LATIN SMALL LETTER A WITH RIGHT HALF RING "" % EN SPACE @@ -146,6 +396,90 @@ translit_start "" % MEDIUM MATHEMATICAL SPACE "" +% SUPERSCRIPT ZERO + "" +% SUPERSCRIPT LATIN SMALL LETTER I + "" +% SUPERSCRIPT FOUR + "" +% SUPERSCRIPT FIVE + "" +% SUPERSCRIPT SIX + "" +% SUPERSCRIPT SEVEN + "" +% SUPERSCRIPT EIGHT + "" +% SUPERSCRIPT NINE + "" +% SUPERSCRIPT PLUS SIGN + "" +% SUPERSCRIPT MINUS + "" +% SUPERSCRIPT EQUALS SIGN + "" +% SUPERSCRIPT LEFT PARENTHESIS + "" +% SUPERSCRIPT RIGHT PARENTHESIS + "" +% SUPERSCRIPT LATIN SMALL LETTER N + "" +% SUBSCRIPT ZERO + "" +% SUBSCRIPT ONE + "" +% SUBSCRIPT TWO + "" +% SUBSCRIPT THREE + "" +% SUBSCRIPT FOUR + "" +% SUBSCRIPT FIVE + "" +% SUBSCRIPT SIX + "" +% SUBSCRIPT SEVEN + "" +% SUBSCRIPT EIGHT + "" +% SUBSCRIPT NINE + "" +% SUBSCRIPT PLUS SIGN + "" +% SUBSCRIPT MINUS + "" +% SUBSCRIPT EQUALS SIGN + "" +% SUBSCRIPT LEFT PARENTHESIS + "" +% SUBSCRIPT RIGHT PARENTHESIS + "" +% LATIN SUBSCRIPT SMALL LETTER A + "" +% LATIN SUBSCRIPT SMALL LETTER E + "" +% LATIN SUBSCRIPT SMALL LETTER O + "" +% LATIN SUBSCRIPT SMALL LETTER X + "" +% LATIN SUBSCRIPT SMALL LETTER SCHWA + "" +% LATIN SUBSCRIPT SMALL LETTER H + "" +% LATIN SUBSCRIPT SMALL LETTER K + "" +% LATIN SUBSCRIPT SMALL LETTER L + "" +% LATIN SUBSCRIPT SMALL LETTER M + "" +% LATIN SUBSCRIPT SMALL LETTER N + "" +% LATIN SUBSCRIPT SMALL LETTER P + "" +% LATIN SUBSCRIPT SMALL LETTER S + "" +% LATIN SUBSCRIPT SMALL LETTER T + "" % RUPEE SIGN "" % ACCOUNT OF @@ -164,8 +498,12 @@ translit_start "" % NUMERO SIGN "" +% SERVICE MARK + "" % TELEPHONE SIGN "" +% TRADE MARK SIGN + "" % ALEF SYMBOL "" % BET SYMBOL @@ -174,6 +512,8 @@ translit_start "" % DALET SYMBOL "" +% FACSIMILE SIGN + "" % ROMAN NUMERAL ONE "" % ROMAN NUMERAL TWO @@ -386,6 +726,12 @@ translit_start "" % THREE CONSECUTIVE EQUALS SIGNS "" +% LATIN SUBSCRIPT SMALL LETTER J + "" +% MODIFIER LETTER CAPITAL V + "" +% TIFINAGH MODIFIER LETTER LABIALIZATION MARK + "" % CJK RADICAL MOTHER "" % CJK RADICAL C-SIMPLIFIED TURTLE @@ -830,6 +1176,10 @@ translit_start "" % KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK "" +% HIRAGANA DIGRAPH YORI + "" +% KATAKANA DIGRAPH KOTO + "" % HANGUL LETTER KIYEOK "" % HANGUL LETTER SSANGKIYEOK @@ -1018,6 +1368,34 @@ translit_start "" % HANGUL LETTER ARAEAE "" +% IDEOGRAPHIC ANNOTATION ONE MARK + "" +% IDEOGRAPHIC ANNOTATION TWO MARK + "" +% IDEOGRAPHIC ANNOTATION THREE MARK + "" +% IDEOGRAPHIC ANNOTATION FOUR MARK + "" +% IDEOGRAPHIC ANNOTATION TOP MARK + "" +% IDEOGRAPHIC ANNOTATION MIDDLE MARK + "" +% IDEOGRAPHIC ANNOTATION BOTTOM MARK + "" +% IDEOGRAPHIC ANNOTATION FIRST MARK + "" +% IDEOGRAPHIC ANNOTATION SECOND MARK + "" +% IDEOGRAPHIC ANNOTATION THIRD MARK + "" +% IDEOGRAPHIC ANNOTATION FOURTH MARK + "" +% IDEOGRAPHIC ANNOTATION HEAVEN MARK + "" +% IDEOGRAPHIC ANNOTATION EARTH MARK + "" +% IDEOGRAPHIC ANNOTATION MAN MARK + "" % PARENTHESIZED HANGUL KIYEOK "" % PARENTHESIZED HANGUL NIEUN @@ -1076,6 +1454,10 @@ translit_start "" % PARENTHESIZED HANGUL CIEUC U "" +% PARENTHESIZED KOREAN CHARACTER OJEON + "" +% PARENTHESIZED KOREAN CHARACTER O HU + "" % PARENTHESIZED IDEOGRAPH ONE "" % PARENTHESIZED IDEOGRAPH TWO @@ -1284,6 +1666,24 @@ translit_start "" % IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY THIRTY-ONE "" +% MODIFIER LETTER CYRILLIC HARD SIGN + "" +% MODIFIER LETTER CYRILLIC SOFT SIGN + "" +% MODIFIER LETTER US + "" +% MODIFIER LETTER CAPITAL H WITH STROKE + "" +% MODIFIER LETTER SMALL LIGATURE OE + "" +% MODIFIER LETTER SMALL HENG + "" +% MODIFIER LETTER SMALL L WITH INVERTED LAZY S + "" +% MODIFIER LETTER SMALL L WITH MIDDLE TILDE + "" +% MODIFIER LETTER SMALL U WITH LEFT HOOK + "" % LATIN SMALL LIGATURE FF "" % LATIN SMALL LIGATURE FI @@ -1295,7 +1695,7 @@ translit_start % LATIN SMALL LIGATURE FFL "" % LATIN SMALL LIGATURE LONG S T - "" + "" % LATIN SMALL LIGATURE ST "" % ARMENIAN SMALL LIGATURE MEN NOW @@ -1310,6 +1710,72 @@ translit_start "" % HEBREW LIGATURE ALEF LAMED "" +% PRESENTATION FORM FOR VERTICAL COMMA + "" +% PRESENTATION FORM FOR VERTICAL IDEOGRAPHIC COMMA + "" +% PRESENTATION FORM FOR VERTICAL IDEOGRAPHIC FULL STOP + "" +% PRESENTATION FORM FOR VERTICAL COLON + "" +% PRESENTATION FORM FOR VERTICAL SEMICOLON + "" +% PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK + "" +% PRESENTATION FORM FOR VERTICAL QUESTION MARK + "" +% PRESENTATION FORM FOR VERTICAL LEFT WHITE LENTICULAR BRACKET + "" +% PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRAKCET + "" +% PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS + "" +% PRESENTATION FORM FOR VERTICAL TWO DOT LEADER + "" +% PRESENTATION FORM FOR VERTICAL EM DASH + "" +% PRESENTATION FORM FOR VERTICAL EN DASH + "" +% PRESENTATION FORM FOR VERTICAL LOW LINE + "" +% PRESENTATION FORM FOR VERTICAL WAVY LOW LINE + "" +% PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS + "" +% PRESENTATION FORM FOR VERTICAL RIGHT PARENTHESIS + "" +% PRESENTATION FORM FOR VERTICAL LEFT CURLY BRACKET + "" +% PRESENTATION FORM FOR VERTICAL RIGHT CURLY BRACKET + "" +% PRESENTATION FORM FOR VERTICAL LEFT TORTOISE SHELL BRACKET + "" +% PRESENTATION FORM FOR VERTICAL RIGHT TORTOISE SHELL BRACKET + "" +% PRESENTATION FORM FOR VERTICAL LEFT BLACK LENTICULAR BRACKET + "" +% PRESENTATION FORM FOR VERTICAL RIGHT BLACK LENTICULAR BRACKET + "" +% PRESENTATION FORM FOR VERTICAL LEFT DOUBLE ANGLE BRACKET + "" +% PRESENTATION FORM FOR VERTICAL RIGHT DOUBLE ANGLE BRACKET + "" +% PRESENTATION FORM FOR VERTICAL LEFT ANGLE BRACKET + "" +% PRESENTATION FORM FOR VERTICAL RIGHT ANGLE BRACKET + "" +% PRESENTATION FORM FOR VERTICAL LEFT CORNER BRACKET + "" +% PRESENTATION FORM FOR VERTICAL RIGHT CORNER BRACKET + "" +% PRESENTATION FORM FOR VERTICAL LEFT WHITE CORNER BRACKET + "" +% PRESENTATION FORM FOR VERTICAL RIGHT WHITE CORNER BRACKET + "" +% PRESENTATION FORM FOR VERTICAL LEFT SQUARE BRACKET + "" +% PRESENTATION FORM FOR VERTICAL RIGHT SQUARE BRACKET + "" % DASHED OVERLINE "" % CENTRELINE OVERLINE @@ -1324,6 +1790,104 @@ translit_start "" % WAVY LOW LINE "" +% DIGIT ZERO FULL STOP + "" +% DIGIT ZERO COMMA + "" +% DIGIT ONE COMMA + "" +% DIGIT TWO COMMA + "" +% DIGIT THREE COMMA + "" +% DIGIT FOUR COMMA + "" +% DIGIT FIVE COMMA + "" +% DIGIT SIX COMMA + "" +% DIGIT SEVEN COMMA + "" +% DIGIT EIGHT COMMA + "" +% DIGIT NINE COMMA + "" +% PARENTHESIZED LATIN CAPITAL LETTER A + "" +% PARENTHESIZED LATIN CAPITAL LETTER B + "" +% PARENTHESIZED LATIN CAPITAL LETTER C + "" +% PARENTHESIZED LATIN CAPITAL LETTER D + "" +% PARENTHESIZED LATIN CAPITAL LETTER E + "" +% PARENTHESIZED LATIN CAPITAL LETTER F + "" +% PARENTHESIZED LATIN CAPITAL LETTER G + "" +% PARENTHESIZED LATIN CAPITAL LETTER H + "" +% PARENTHESIZED LATIN CAPITAL LETTER I + "" +% PARENTHESIZED LATIN CAPITAL LETTER J + "" +% PARENTHESIZED LATIN CAPITAL LETTER K + "" +% PARENTHESIZED LATIN CAPITAL LETTER L + "" +% PARENTHESIZED LATIN CAPITAL LETTER M + "" +% PARENTHESIZED LATIN CAPITAL LETTER N + "" +% PARENTHESIZED LATIN CAPITAL LETTER O + "" +% PARENTHESIZED LATIN CAPITAL LETTER P + "" +% PARENTHESIZED LATIN CAPITAL LETTER Q + "" +% PARENTHESIZED LATIN CAPITAL LETTER R + "" +% PARENTHESIZED LATIN CAPITAL LETTER S + "" +% PARENTHESIZED LATIN CAPITAL LETTER T + "" +% PARENTHESIZED LATIN CAPITAL LETTER U + "" +% PARENTHESIZED LATIN CAPITAL LETTER V + "" +% PARENTHESIZED LATIN CAPITAL LETTER W + "" +% PARENTHESIZED LATIN CAPITAL LETTER X + "" +% PARENTHESIZED LATIN CAPITAL LETTER Y + "" +% PARENTHESIZED LATIN CAPITAL LETTER Z + "" +% TORTOISE SHELL BRACKETED LATIN CAPITAL LETTER S + "" +% RAISED MC SIGN + "" +% RAISED MD SIGN + "" +% TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C + "" +% TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-4E09 + "" +% TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-4E8C + "" +% TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-5B89 + "" +% TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-70B9 + "" +% TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6253 + "" +% TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-76D7 + "" +% TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-52DD + "" +% TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557 + "" translit_end -- cgit 1.4.1