diff options
author | Mike FABIAN <mfabian@redhat.com> | 2021-09-27 15:27:36 +0200 |
---|---|---|
committer | Mike FABIAN <mfabian@redhat.com> | 2021-10-04 08:54:27 +0200 |
commit | b5172560152052ad265c3f11d58cf5e33e98e478 (patch) | |
tree | 9763f4d60f1acb4a129abaf52292e1fc71a00650 /localedata/unicode-gen/PropList.txt | |
parent | eae81d70574e923ce3c59078b8df857ae192efa6 (diff) | |
download | glibc-b5172560152052ad265c3f11d58cf5e33e98e478.tar.gz glibc-b5172560152052ad265c3f11d58cf5e33e98e478.tar.xz glibc-b5172560152052ad265c3f11d58cf5e33e98e478.zip |
Update to Unicode 14.0.0 [BZ #28390]
Unicode 14.0.0 Support: Character encoding, character type info, and transliteration tables are all updated to Unicode 14.0.0, using the generator scripts contributed by Mike FABIAN (Red Hat). Total added characters in newly generated CHARMAP: 838 Total removed characters in newly generated WIDTH: 1 (Characters not in WIDTH get width 1 by default, i.e. these have width 1 now.) removed: <U1734> 0 : eaw=N category=Mc bidi=L name=HANUNOO SIGN PAMUDPOD That seems intentional, the character had category Mn (Mark, nonspacing) before and now has Mc (Mark, spacing combining) Total changed characters in newly generated WIDTH: 0 Total added characters in newly generated WIDTH: 175
Diffstat (limited to 'localedata/unicode-gen/PropList.txt')
-rw-r--r-- | localedata/unicode-gen/PropList.txt | 100 |
1 files changed, 73 insertions, 27 deletions
diff --git a/localedata/unicode-gen/PropList.txt b/localedata/unicode-gen/PropList.txt index 7d2f44c56f..0a5a934682 100644 --- a/localedata/unicode-gen/PropList.txt +++ b/localedata/unicode-gen/PropList.txt @@ -1,6 +1,6 @@ -# PropList-13.0.0.txt -# Date: 2019-11-27, 03:13:28 GMT -# © 2019 Unicode®, Inc. +# PropList-14.0.0.txt +# Date: 2021-08-12, 23:13:05 GMT +# © 2021 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see http://www.unicode.org/terms_of_use.html # @@ -54,6 +54,7 @@ 2E1A ; Dash # Pd HYPHEN WITH DIAERESIS 2E3A..2E3B ; Dash # Pd [2] TWO-EM DASH..THREE-EM DASH 2E40 ; Dash # Pd DOUBLE HYPHEN +2E5D ; Dash # Pd OBLIQUE HYPHEN 301C ; Dash # Pd WAVE DASH 3030 ; Dash # Pd WAVY DASH 30A0 ; Dash # Pd KATAKANA-HIRAGANA DOUBLE HYPHEN @@ -63,7 +64,7 @@ FE63 ; Dash # Pd SMALL HYPHEN-MINUS FF0D ; Dash # Pd FULLWIDTH HYPHEN-MINUS 10EAD ; Dash # Pd YEZIDI HYPHENATION MARK -# Total code points: 29 +# Total code points: 30 # ================================================ @@ -126,7 +127,7 @@ FF63 ; Quotation_Mark # Pe HALFWIDTH RIGHT CORNER BRACKET 05C3 ; Terminal_Punctuation # Po HEBREW PUNCTUATION SOF PASUQ 060C ; Terminal_Punctuation # Po ARABIC COMMA 061B ; Terminal_Punctuation # Po ARABIC SEMICOLON -061E..061F ; Terminal_Punctuation # Po [2] ARABIC TRIPLE DOT PUNCTUATION MARK..ARABIC QUESTION MARK +061D..061F ; Terminal_Punctuation # Po [3] ARABIC END OF TEXT MARK..ARABIC QUESTION MARK 06D4 ; Terminal_Punctuation # Po ARABIC FULL STOP 0700..070A ; Terminal_Punctuation # Po [11] SYRIAC END OF PARAGRAPH..SYRIAC CONTRACTION 070C ; Terminal_Punctuation # Po SYRIAC HARKLEAN METOBELUS @@ -150,6 +151,7 @@ FF63 ; Quotation_Mark # Pe HALFWIDTH RIGHT CORNER BRACKET 1AA8..1AAB ; Terminal_Punctuation # Po [4] TAI THAM SIGN KAAN..TAI THAM SIGN SATKAANKUU 1B5A..1B5B ; Terminal_Punctuation # Po [2] BALINESE PANTI..BALINESE PAMADA 1B5D..1B5F ; Terminal_Punctuation # Po [3] BALINESE CARIK PAMUNGKAH..BALINESE CARIK PAREREN +1B7D..1B7E ; Terminal_Punctuation # Po [2] BALINESE PANTI LANTANG..BALINESE PAMADA LANTANG 1C3B..1C3F ; Terminal_Punctuation # Po [5] LEPCHA PUNCTUATION TA-ROL..LEPCHA PUNCTUATION TSHOOK 1C7E..1C7F ; Terminal_Punctuation # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD 203C..203D ; Terminal_Punctuation # Po [2] DOUBLE EXCLAMATION MARK..INTERROBANG @@ -159,6 +161,7 @@ FF63 ; Quotation_Mark # Pe HALFWIDTH RIGHT CORNER BRACKET 2E41 ; Terminal_Punctuation # Po REVERSED COMMA 2E4C ; Terminal_Punctuation # Po MEDIEVAL COMMA 2E4E..2E4F ; Terminal_Punctuation # Po [2] PUNCTUS ELEVATUS MARK..CORNISH VERSE DIVIDER +2E53..2E54 ; Terminal_Punctuation # Po [2] MEDIEVAL EXCLAMATION MARK..MEDIEVAL QUESTION MARK 3001..3002 ; Terminal_Punctuation # Po [2] IDEOGRAPHIC COMMA..IDEOGRAPHIC FULL STOP A4FE..A4FF ; Terminal_Punctuation # Po [2] LISU PUNCTUATION COMMA..LISU PUNCTUATION FULL STOP A60D..A60F ; Terminal_Punctuation # Po [3] VAI COMMA..VAI QUESTION MARK @@ -189,6 +192,7 @@ FF64 ; Terminal_Punctuation # Po HALFWIDTH IDEOGRAPHIC COMMA 10B3A..10B3F ; Terminal_Punctuation # Po [6] TINY TWO DOTS OVER ONE DOT PUNCTUATION..LARGE ONE RING OVER TWO RINGS PUNCTUATION 10B99..10B9C ; Terminal_Punctuation # Po [4] PSALTER PAHLAVI SECTION MARK..PSALTER PAHLAVI FOUR DOTS WITH DOT 10F55..10F59 ; Terminal_Punctuation # Po [5] SOGDIAN PUNCTUATION TWO VERTICAL BARS..SOGDIAN PUNCTUATION HALF CIRCLE WITH DOT +10F86..10F89 ; Terminal_Punctuation # Po [4] OLD UYGHUR PUNCTUATION BAR..OLD UYGHUR PUNCTUATION FOUR DOTS 11047..1104D ; Terminal_Punctuation # Po [7] BRAHMI DANDA..BRAHMI PUNCTUATION LOTUS 110BE..110C1 ; Terminal_Punctuation # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA 11141..11143 ; Terminal_Punctuation # Po [3] CHAKMA DANDA..CHAKMA QUESTION MARK @@ -220,7 +224,7 @@ FF64 ; Terminal_Punctuation # Po HALFWIDTH IDEOGRAPHIC COMMA 1BC9F ; Terminal_Punctuation # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP 1DA87..1DA8A ; Terminal_Punctuation # Po [4] SIGNWRITING COMMA..SIGNWRITING COLON -# Total code points: 267 +# Total code points: 276 # ================================================ @@ -600,6 +604,7 @@ FF41..FF46 ; Hex_Digit # L& [6] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L 1A6D..1A72 ; Other_Alphabetic # Mc [6] TAI THAM VOWEL SIGN OY..TAI THAM VOWEL SIGN THAM AI 1A73..1A74 ; Other_Alphabetic # Mn [2] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN MAI KANG 1ABF..1AC0 ; Other_Alphabetic # Mn [2] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER TURNED W BELOW +1ACC..1ACE ; Other_Alphabetic # Mn [3] COMBINING LATIN SMALL LETTER INSULAR G..COMBINING LATIN SMALL LETTER INSULAR T 1B00..1B03 ; Other_Alphabetic # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; Other_Alphabetic # Mc BALINESE SIGN BISAH 1B35 ; Other_Alphabetic # Mc BALINESE VOWEL SIGN TEDUNG @@ -686,10 +691,12 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA 11001 ; Other_Alphabetic # Mn BRAHMI SIGN ANUSVARA 11002 ; Other_Alphabetic # Mc BRAHMI SIGN VISARGA 11038..11045 ; Other_Alphabetic # Mn [14] BRAHMI VOWEL SIGN AA..BRAHMI VOWEL SIGN AU +11073..11074 ; Other_Alphabetic # Mn [2] BRAHMI VOWEL SIGN OLD TAMIL SHORT E..BRAHMI VOWEL SIGN OLD TAMIL SHORT O 11082 ; Other_Alphabetic # Mc KAITHI SIGN VISARGA 110B0..110B2 ; Other_Alphabetic # Mc [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II 110B3..110B6 ; Other_Alphabetic # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI 110B7..110B8 ; Other_Alphabetic # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU +110C2 ; Other_Alphabetic # Mn KAITHI VOWEL SIGN VOCALIC R 11100..11102 ; Other_Alphabetic # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA 11127..1112B ; Other_Alphabetic # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU 1112C ; Other_Alphabetic # Mc CHAKMA VOWEL SIGN E @@ -815,7 +822,7 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA 1F150..1F169 ; Other_Alphabetic # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; Other_Alphabetic # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 1398 +# Total code points: 1404 # ================================================ @@ -824,7 +831,7 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA 3021..3029 ; Ideographic # Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE 3038..303A ; Ideographic # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY 3400..4DBF ; Ideographic # Lo [6592] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DBF -4E00..9FFC ; Ideographic # Lo [20989] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FFC +4E00..9FFF ; Ideographic # Lo [20992] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FFF F900..FA6D ; Ideographic # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 16FE4 ; Ideographic # Mn KHITAN SMALL SCRIPT FILLER @@ -832,15 +839,15 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM 18800..18CD5 ; Ideographic # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 18D00..18D08 ; Ideographic # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 1B170..1B2FB ; Ideographic # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB -20000..2A6DD ; Ideographic # Lo [42718] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DD -2A700..2B734 ; Ideographic # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734 +20000..2A6DF ; Ideographic # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF +2A700..2B738 ; Ideographic # Lo [4153] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B738 2B740..2B81D ; Ideographic # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D 2B820..2CEA1 ; Ideographic # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 2CEB0..2EBE0 ; Ideographic # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 2F800..2FA1D ; Ideographic # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 30000..3134A ; Ideographic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A -# Total code points: 101652 +# Total code points: 101661 # ================================================ @@ -885,6 +892,9 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM 07EB..07F3 ; Diacritic # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE 07F4..07F5 ; Diacritic # Lm [2] NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE 0818..0819 ; Diacritic # Mn [2] SAMARITAN MARK OCCLUSION..SAMARITAN MARK DAGESH +0898..089F ; Diacritic # Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA +08C9 ; Diacritic # Lm ARABIC SMALL FARSI YEH +08CA..08D2 ; Diacritic # Mn [9] ARABIC SMALL HIGH FARSI YEH..ARABIC LARGE ROUND DOT INSIDE CIRCLE BELOW 08E3..08FE ; Diacritic # Mn [28] ARABIC TURNED DAMMA BELOW..ARABIC DAMMA WITH DOT 093C ; Diacritic # Mn DEVANAGARI SIGN NUKTA 094D ; Diacritic # Mn DEVANAGARI SIGN VIRAMA @@ -901,6 +911,7 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM 0B4D ; Diacritic # Mn ORIYA SIGN VIRAMA 0B55 ; Diacritic # Mn ORIYA SIGN OVERLINE 0BCD ; Diacritic # Mn TAMIL SIGN VIRAMA +0C3C ; Diacritic # Mn TELUGU SIGN NUKTA 0C4D ; Diacritic # Mn TELUGU SIGN VIRAMA 0CBC ; Diacritic # Mn KANNADA SIGN NUKTA 0CCD ; Diacritic # Mn KANNADA SIGN VIRAMA @@ -928,12 +939,16 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM 108F ; Diacritic # Mc MYANMAR SIGN RUMAI PALAUNG TONE-5 109A..109B ; Diacritic # Mc [2] MYANMAR SIGN KHAMTI TONE-1..MYANMAR SIGN KHAMTI TONE-3 135D..135F ; Diacritic # Mn [3] ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK..ETHIOPIC COMBINING GEMINATION MARK +1714 ; Diacritic # Mn TAGALOG SIGN VIRAMA +1715 ; Diacritic # Mc TAGALOG SIGN PAMUDPOD 17C9..17D3 ; Diacritic # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT 17DD ; Diacritic # Mn KHMER SIGN ATTHACAN 1939..193B ; Diacritic # Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I 1A75..1A7C ; Diacritic # Mn [8] TAI THAM SIGN TONE-1..TAI THAM SIGN KHUEN-LUE KARAN 1A7F ; Diacritic # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT 1AB0..1ABD ; Diacritic # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW +1ABE ; Diacritic # Me COMBINING PARENTHESES OVERLAY +1AC1..1ACB ; Diacritic # Mn [11] COMBINING LEFT PARENTHESIS ABOVE LEFT..COMBINING TRIPLE ACUTE ACCENT 1B34 ; Diacritic # Mn BALINESE SIGN REREKAN 1B44 ; Diacritic # Mc BALINESE ADEG ADEG 1B6B..1B73 ; Diacritic # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG @@ -952,8 +967,7 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM 1CF8..1CF9 ; Diacritic # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE 1D2C..1D6A ; Diacritic # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI 1DC4..1DCF ; Diacritic # Mn [12] COMBINING MACRON-ACUTE..COMBINING ZIGZAG BELOW -1DF5..1DF9 ; Diacritic # Mn [5] COMBINING UP TACK ABOVE..COMBINING WIDE INVERTED BRIDGE BELOW -1DFD..1DFF ; Diacritic # Mn [3] COMBINING ALMOST EQUAL TO BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW +1DF5..1DFF ; Diacritic # Mn [11] COMBINING UP TACK ABOVE..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW 1FBD ; Diacritic # Sk GREEK KORONIS 1FBF..1FC1 ; Diacritic # Sk [3] GREEK PSILI..GREEK DIALYTIKA AND PERISPOMENI 1FCD..1FCF ; Diacritic # Sk [3] GREEK PSILI AND VARIA..GREEK PSILI AND PERISPOMENI @@ -1008,10 +1022,16 @@ FF70 ; Diacritic # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND FF9E..FF9F ; Diacritic # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK FFE3 ; Diacritic # Sk FULLWIDTH MACRON 102E0 ; Diacritic # Mn COPTIC EPACT THOUSANDS MARK +10780..10785 ; Diacritic # Lm [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK +10787..107B0 ; Diacritic # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK +107B2..107BA ; Diacritic # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL 10AE5..10AE6 ; Diacritic # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW 10D22..10D23 ; Diacritic # Lo [2] HANIFI ROHINGYA MARK SAKIN..HANIFI ROHINGYA MARK NA KHONNA 10D24..10D27 ; Diacritic # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10F46..10F50 ; Diacritic # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW +10F82..10F85 ; Diacritic # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW +11046 ; Diacritic # Mn BRAHMI VIRAMA +11070 ; Diacritic # Mn BRAHMI SIGN OLD TAMIL VIRAMA 110B9..110BA ; Diacritic # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA 11133..11134 ; Diacritic # Mn [2] CHAKMA VIRAMA..CHAKMA MAAYYAA 11173 ; Diacritic # Mn MAHAJANI SIGN NUKTA @@ -1049,18 +1069,24 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON 16F8F..16F92 ; Diacritic # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW 16F93..16F9F ; Diacritic # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 16FF0..16FF1 ; Diacritic # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY +1AFF0..1AFF3 ; Diacritic # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 +1AFF5..1AFFB ; Diacritic # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 +1AFFD..1AFFE ; Diacritic # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 +1CF00..1CF2D ; Diacritic # Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT +1CF30..1CF46 ; Diacritic # Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG 1D167..1D169 ; Diacritic # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 1D16D..1D172 ; Diacritic # Mc [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5 1D17B..1D182 ; Diacritic # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE 1D185..1D18B ; Diacritic # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE 1D1AA..1D1AD ; Diacritic # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO 1E130..1E136 ; Diacritic # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D +1E2AE ; Diacritic # Mn TOTO SIGN RISING TONE 1E2EC..1E2EF ; Diacritic # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI 1E8D0..1E8D6 ; Diacritic # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS 1E944..1E946 ; Diacritic # Mn [3] ADLAM ALIF LENGTHENER..ADLAM GEMINATION MARK 1E948..1E94A ; Diacritic # Mn [3] ADLAM CONSONANT MODIFIER..ADLAM NUKTA -# Total code points: 882 +# Total code points: 1064 # ================================================ @@ -1088,6 +1114,7 @@ AA70 ; Extender # Lm MYANMAR MODIFIER LETTER KHAMTI REDUPLICATION AADD ; Extender # Lm TAI VIET SYMBOL SAM AAF3..AAF4 ; Extender # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK FF70 ; Extender # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK +10781..10782 ; Extender # Lm [2] MODIFIER LETTER SUPERSCRIPT TRIANGULAR COLON..MODIFIER LETTER SUPERSCRIPT HALF TRIANGULAR COLON 1135D ; Extender # Lo GRANTHA SIGN PLUTA 115C6..115C8 ; Extender # Po [3] SIDDHAM REPETITION MARK-1..SIDDHAM REPETITION MARK-3 11A98 ; Extender # Mn SOYOMBO GEMINATION MARK @@ -1097,7 +1124,7 @@ FF70 ; Extender # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND 1E13C..1E13D ; Extender # Lm [2] NYIAKENG PUACHUE HMONG SIGN XW XW..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER 1E944..1E946 ; Extender # Mn [3] ADLAM ALIF LENGTHENER..ADLAM GEMINATION MARK -# Total code points: 48 +# Total code points: 50 # ================================================ @@ -1121,8 +1148,12 @@ A69C..A69D ; Other_Lowercase # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..M A770 ; Other_Lowercase # Lm MODIFIER LETTER US A7F8..A7F9 ; Other_Lowercase # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE AB5C..AB5F ; Other_Lowercase # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK +10780 ; Other_Lowercase # Lm MODIFIER LETTER SMALL CAPITAL AA +10783..10785 ; Other_Lowercase # Lm [3] MODIFIER LETTER SMALL AE..MODIFIER LETTER SMALL B WITH HOOK +10787..107B0 ; Other_Lowercase # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK +107B2..107BA ; Other_Lowercase # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL -# Total code points: 189 +# Total code points: 244 # ================================================ @@ -1211,7 +1242,7 @@ E0020..E007F ; Other_Grapheme_Extend # Cf [96] TAG SPACE..CANCEL TAG # ================================================ 3400..4DBF ; Unified_Ideograph # Lo [6592] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DBF -4E00..9FFC ; Unified_Ideograph # Lo [20989] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FFC +4E00..9FFF ; Unified_Ideograph # Lo [20992] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FFF FA0E..FA0F ; Unified_Ideograph # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA0E..CJK COMPATIBILITY IDEOGRAPH-FA0F FA11 ; Unified_Ideograph # Lo CJK COMPATIBILITY IDEOGRAPH-FA11 FA13..FA14 ; Unified_Ideograph # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA13..CJK COMPATIBILITY IDEOGRAPH-FA14 @@ -1219,14 +1250,14 @@ FA1F ; Unified_Ideograph # Lo CJK COMPATIBILITY IDEOGRAPH-FA1F FA21 ; Unified_Ideograph # Lo CJK COMPATIBILITY IDEOGRAPH-FA21 FA23..FA24 ; Unified_Ideograph # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA23..CJK COMPATIBILITY IDEOGRAPH-FA24 FA27..FA29 ; Unified_Ideograph # Lo [3] CJK COMPATIBILITY IDEOGRAPH-FA27..CJK COMPATIBILITY IDEOGRAPH-FA29 -20000..2A6DD ; Unified_Ideograph # Lo [42718] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DD -2A700..2B734 ; Unified_Ideograph # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734 +20000..2A6DF ; Unified_Ideograph # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF +2A700..2B738 ; Unified_Ideograph # Lo [4153] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B738 2B740..2B81D ; Unified_Ideograph # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D 2B820..2CEA1 ; Unified_Ideograph # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 2CEB0..2EBE0 ; Unified_Ideograph # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 30000..3134A ; Unified_Ideograph # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A -# Total code points: 92856 +# Total code points: 92865 # ================================================ @@ -1291,8 +1322,9 @@ E0001 ; Deprecated # Cf LANGUAGE TAG 1D62A..1D62B ; Soft_Dotted # L& [2] MATHEMATICAL SANS-SERIF ITALIC SMALL I..MATHEMATICAL SANS-SERIF ITALIC SMALL J 1D65E..1D65F ; Soft_Dotted # L& [2] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL I..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL J 1D692..1D693 ; Soft_Dotted # L& [2] MATHEMATICAL MONOSPACE SMALL I..MATHEMATICAL MONOSPACE SMALL J +1DF1A ; Soft_Dotted # L& LATIN SMALL LETTER I WITH STROKE AND RETROFLEX HOOK -# Total code points: 46 +# Total code points: 47 # ================================================ @@ -1330,7 +1362,7 @@ AABB..AABC ; Logical_Order_Exception # Lo [2] TAI VIET VOWEL AUE..TAI VIET 002E ; Sentence_Terminal # Po FULL STOP 003F ; Sentence_Terminal # Po QUESTION MARK 0589 ; Sentence_Terminal # Po ARMENIAN FULL STOP -061E..061F ; Sentence_Terminal # Po [2] ARABIC TRIPLE DOT PUNCTUATION MARK..ARABIC QUESTION MARK +061D..061F ; Sentence_Terminal # Po [3] ARABIC END OF TEXT MARK..ARABIC QUESTION MARK 06D4 ; Sentence_Terminal # Po ARABIC FULL STOP 0700..0702 ; Sentence_Terminal # Po [3] SYRIAC END OF PARAGRAPH..SYRIAC SUBLINEAR FULL STOP 07F9 ; Sentence_Terminal # Po NKO EXCLAMATION MARK @@ -1349,12 +1381,14 @@ AABB..AABC ; Logical_Order_Exception # Lo [2] TAI VIET VOWEL AUE..TAI VIET 1AA8..1AAB ; Sentence_Terminal # Po [4] TAI THAM SIGN KAAN..TAI THAM SIGN SATKAANKUU 1B5A..1B5B ; Sentence_Terminal # Po [2] BALINESE PANTI..BALINESE PAMADA 1B5E..1B5F ; Sentence_Terminal # Po [2] BALINESE CARIK SIKI..BALINESE CARIK PAREREN +1B7D..1B7E ; Sentence_Terminal # Po [2] BALINESE PANTI LANTANG..BALINESE PAMADA LANTANG 1C3B..1C3C ; Sentence_Terminal # Po [2] LEPCHA PUNCTUATION TA-ROL..LEPCHA PUNCTUATION NYET THYOOM TA-ROL 1C7E..1C7F ; Sentence_Terminal # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD 203C..203D ; Sentence_Terminal # Po [2] DOUBLE EXCLAMATION MARK..INTERROBANG 2047..2049 ; Sentence_Terminal # Po [3] DOUBLE QUESTION MARK..EXCLAMATION QUESTION MARK 2E2E ; Sentence_Terminal # Po REVERSED QUESTION MARK 2E3C ; Sentence_Terminal # Po STENOGRAPHIC FULL STOP +2E53..2E54 ; Sentence_Terminal # Po [2] MEDIEVAL EXCLAMATION MARK..MEDIEVAL QUESTION MARK 3002 ; Sentence_Terminal # Po IDEOGRAPHIC FULL STOP A4FF ; Sentence_Terminal # Po LISU PUNCTUATION FULL STOP A60E..A60F ; Sentence_Terminal # Po [2] VAI FULL STOP..VAI QUESTION MARK @@ -1375,6 +1409,7 @@ FF1F ; Sentence_Terminal # Po FULLWIDTH QUESTION MARK FF61 ; Sentence_Terminal # Po HALFWIDTH IDEOGRAPHIC FULL STOP 10A56..10A57 ; Sentence_Terminal # Po [2] KHAROSHTHI PUNCTUATION DANDA..KHAROSHTHI PUNCTUATION DOUBLE DANDA 10F55..10F59 ; Sentence_Terminal # Po [5] SOGDIAN PUNCTUATION TWO VERTICAL BARS..SOGDIAN PUNCTUATION HALF CIRCLE WITH DOT +10F86..10F89 ; Sentence_Terminal # Po [4] OLD UYGHUR PUNCTUATION BAR..OLD UYGHUR PUNCTUATION FOUR DOTS 11047..11048 ; Sentence_Terminal # Po [2] BRAHMI DANDA..BRAHMI DOUBLE DANDA 110BE..110C1 ; Sentence_Terminal # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA 11141..11143 ; Sentence_Terminal # Po [3] CHAKMA DANDA..CHAKMA QUESTION MARK @@ -1403,15 +1438,16 @@ FF61 ; Sentence_Terminal # Po HALFWIDTH IDEOGRAPHIC FULL STOP 1BC9F ; Sentence_Terminal # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP 1DA88 ; Sentence_Terminal # Po SIGNWRITING FULL STOP -# Total code points: 143 +# Total code points: 152 # ================================================ 180B..180D ; Variation_Selector # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE +180F ; Variation_Selector # Mn MONGOLIAN FREE VARIATION SELECTOR FOUR FE00..FE0F ; Variation_Selector # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16 E0100..E01EF ; Variation_Selector # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 259 +# Total code points: 260 # ================================================ @@ -1644,8 +1680,17 @@ E0100..E01EF ; Variation_Selector # Mn [240] VARIATION SELECTOR-17..VARIATION S 2E42 ; Pattern_Syntax # Ps DOUBLE LOW-REVERSED-9 QUOTATION MARK 2E43..2E4F ; Pattern_Syntax # Po [13] DASH WITH LEFT UPTURN..CORNISH VERSE DIVIDER 2E50..2E51 ; Pattern_Syntax # So [2] CROSS PATTY WITH RIGHT CROSSBAR..CROSS PATTY WITH LEFT CROSSBAR -2E52 ; Pattern_Syntax # Po TIRONIAN SIGN CAPITAL ET -2E53..2E7F ; Pattern_Syntax # Cn [45] <reserved-2E53>..<reserved-2E7F> +2E52..2E54 ; Pattern_Syntax # Po [3] TIRONIAN SIGN CAPITAL ET..MEDIEVAL QUESTION MARK +2E55 ; Pattern_Syntax # Ps LEFT SQUARE BRACKET WITH STROKE +2E56 ; Pattern_Syntax # Pe RIGHT SQUARE BRACKET WITH STROKE +2E57 ; Pattern_Syntax # Ps LEFT SQUARE BRACKET WITH DOUBLE STROKE +2E58 ; Pattern_Syntax # Pe RIGHT SQUARE BRACKET WITH DOUBLE STROKE +2E59 ; Pattern_Syntax # Ps TOP HALF LEFT PARENTHESIS +2E5A ; Pattern_Syntax # Pe TOP HALF RIGHT PARENTHESIS +2E5B ; Pattern_Syntax # Ps BOTTOM HALF LEFT PARENTHESIS +2E5C ; Pattern_Syntax # Pe BOTTOM HALF RIGHT PARENTHESIS +2E5D ; Pattern_Syntax # Pd OBLIQUE HYPHEN +2E5E..2E7F ; Pattern_Syntax # Cn [34] <reserved-2E5E>..<reserved-2E7F> 3001..3003 ; Pattern_Syntax # Po [3] IDEOGRAPHIC COMMA..DITTO MARK 3008 ; Pattern_Syntax # Ps LEFT ANGLE BRACKET 3009 ; Pattern_Syntax # Pe RIGHT ANGLE BRACKET @@ -1682,11 +1727,12 @@ FE45..FE46 ; Pattern_Syntax # Po [2] SESAME DOT..WHITE SESAME DOT 0600..0605 ; Prepended_Concatenation_Mark # Cf [6] ARABIC NUMBER SIGN..ARABIC NUMBER MARK ABOVE 06DD ; Prepended_Concatenation_Mark # Cf ARABIC END OF AYAH 070F ; Prepended_Concatenation_Mark # Cf SYRIAC ABBREVIATION MARK +0890..0891 ; Prepended_Concatenation_Mark # Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE 08E2 ; Prepended_Concatenation_Mark # Cf ARABIC DISPUTED END OF AYAH 110BD ; Prepended_Concatenation_Mark # Cf KAITHI NUMBER SIGN 110CD ; Prepended_Concatenation_Mark # Cf KAITHI NUMBER SIGN ABOVE -# Total code points: 11 +# Total code points: 13 # ================================================ |