about summary refs log tree commit diff
path: root/localedata/locales/ml_IN
diff options
context:
space:
mode:
Diffstat (limited to 'localedata/locales/ml_IN')
-rw-r--r--localedata/locales/ml_IN158
1 files changed, 157 insertions, 1 deletions
diff --git a/localedata/locales/ml_IN b/localedata/locales/ml_IN
index 32b467f96d..2e6cfe52ca 100644
--- a/localedata/locales/ml_IN
+++ b/localedata/locales/ml_IN
@@ -65,8 +65,164 @@ END LC_CTYPE
 %
 %
 LC_COLLATE
-% Copy the template from ISO/IEC 14651
+% CLDR collation rules for Malayalam:
+% (see: https://unicode.org/cldr/trac/browser/trunk/common/collation/ml.xml)
+%
+% <collation type="standard" references="Sabdatharavali Malayalam Dictionary 23rd Ed. by Sahithya Pravarthaka Co-operative Society Ltd.">
+%   <cr><![CDATA[
+%     [reorder Mlym Latn Deva Arab Taml Knda Telu Beng Guru Gujr Orya Sinh]  # native speaker's special list
+%     #
+%     #  Avagraha and Visarga are primary ignorables.
+%     #
+%     &ഃ<<ഽ
+%     #
+%     #  Vowel sign AU ( ൌ) and AU length mark ( ൗ) need to differ
+%     #  only on secondary level, not primary.
+%     #
+%     &\u0D4C<<\u0D57
+%     #
+%     #  Pre-5.1 Chillus secondary equal to 5.1 chillus.
+%     #  Chillus primary equal to their consonant_dead form.
+%     #
+%     &ക്<<ക്\u200D<<<ൿ
+%     &ണ്<<ണ്\u200D<<<ൺ
+%     &ന്<<ന്\u200D<<<ൻ
+%     &ര്<<ര്\u200D<<<ർ
+%     &ല്<<ല്\u200D<<<ൽ
+%     &ള്<<ള്\u200D<<<ൾ
+%     #
+%     #  Anuswara primary equal to MA_dead.
+%     #
+%     &മ്<<ം
+%     #
+%     #  /nta/ is sorted as <NA, Virama, RRA>.
+%     #
+%     &ന്<<<ൻ്
+%   ]]></cr>
+% </collation>
+%
+% And CLDR also lists the following
+% index characters:
+% (see: https://unicode.org/cldr/trac/browser/trunk/common/main/ml.xml)
+%
+% <exemplarCharacters type="index" draft="contributed">[അ ആ ഇ ഈ ഉ ഊ ഋ എ ഏ ഐ ഒ ഓ ഔ ക ഖ ഗ ഘ ങ ച ഛ ജ ഝ ഞ ട ഠ ഡ ഢ ണ ത ഥ ദ ധ ന പ ഫ ബ ഭ മ യ ര ല വ ശ ഷ സ ഹ ള ഴ റ]</exemplarCharacters>
+%
+% The following rules implement the same order for glibc.
 copy "iso14651_t1"
+%     &ക്<<ക്\u200D<<<ൿ
+collating-element <e0d15-0d4d> from "<U0D15><U0D4D>"
+collating-symbol <s0d15-0d4d>
+collating-element <e0d15-0d4d-200d> from "<U0D15><U0D4D><U200D>"
+collating-symbol <s0d15-0d4d-200d>
+%     &ണ്<<ണ്\u200D<<<ൺ
+collating-element <e0d23-0d4d> from "<U0D23><U0D4D>"
+collating-symbol <s0d23-0d4d>
+collating-element <e0d23-0d4d-200d> from "<U0D23><U0D4D><U200D>"
+collating-symbol <s0d23-0d4d-200d>
+%     &ന്<sന്\u200D<<<ൻ
+collating-element <e0d28-0d4d> from "<U0D28><U0D4D>"
+collating-symbol <s0d28-0d4d>
+collating-element <e0d28-0d4d-200d> from "<U0D28><U0D4D><U200D>"
+collating-symbol <s0d28-0d4d-200d>
+%     &ര്<<ര്\u200D<<<ർ
+collating-element <e0d30-0d4d> from "<U0D30><U0D4D>"
+collating-symbol <s0d30-0d4d>
+collating-element <e0d30-0d4d-200d> from "<U0D30><U0D4D><U200D>"
+collating-symbol <s0d30-0d4d-200d>
+%     &ല്<<ല്\u200D<<<ൽ
+collating-element <e0d32-0d4d> from "<U0D32><U0D4D>"
+collating-symbol <s0d32-0d4d>
+collating-element <e0d32-0d4d-200d> from "<U0D32><U0D4D><U200D>"
+collating-symbol <s0d32-0d4d-200d>
+%     &ള്<<ള്\u200D<<<ൾ
+collating-element <e0d33-0d4d> from "<U0D33><U0D4D>"
+collating-symbol <s0d33-0d4d>
+collating-element <e0d33-0d4d-200d> from "<U0D33><U0D4D><U200D>"
+collating-symbol <s0d33-0d4d-200d>
+%     #
+%     #  Anuswara primary equal to MA_dead.
+%     #
+%     &മ്<<ം
+collating-element <e0d2e-0d4d> from "<U0D2e><U0D4D>"
+collating-symbol <s0d2e-0d4d>
+%     #
+%     #  /nta/ is sorted as <NA, Virama, RRA>.
+%     #
+%     &ന്<<<ൻ്
+% already defined:
+% collating-element <e0d28-0d4d> from "<U0D28><U0D4D>"
+% already defined:
+% collating-symbol <s0d28-0d4d>
+collating-element <e0d7b-0d4d> from "<U0D7B><U0D4D>"
+collating-symbol <s0d7b-0d4d>
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Finished defining collating-elements and collating-symbols
+%
+% One dummy reorder-after statement here to avoid a syntax error
+% because the first rule reordering stuff starts without a reorder-after:
+collating-symbol <dummy>
+reorder-after <AFTER-A>
+<dummy>
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% # Avagraha and Visarga are primary ignorables.
+% &ഃ<<ഽ
+<U0D03> IGNORE;<VISARGA>;<MIN>;<U0D03> % MALAYALAM SIGN VISARGA
+<U0D3D> IGNORE;<VRNT1>;<MIN>;<U0D3D> % MALAYALAM SIGN AVAGRAHA
+% #  Vowel sign AU ( ൌ) and AU length mark ( ൗ) need to differ
+% #  only on secondary level, not primary.
+% #
+% &\u0D4C<<\u0D57
+<U0D4C> <S0D4C>;<BASE>;<MIN>;<U0D4C> % MALAYALAM VOWEL SIGN AU
+<U0D57> <S0D4C>;<VRNT1>;<MIN>;<U0D57> % MALAYALAM AU LENGTH MARK
+% &ക്<<ക്\u200D<<<ൿ
+<e0d15-0d4d>      "<S0D15><S0D4D>";"<BASE><BASE>";"<MIN><MIN>";IGNORE
+<e0d15-0d4d-200d> "<S0D15><S0D4D>";"<BASE><VRNT1>";"<MIN><MIN>";IGNORE
+<U0D7F>           "<S0D15><S0D4D>";"<BASE><VRNT1>";"<COMPAT><COMPAT>";<U0D7F>
+% &ണ്<<ണ്\u200D<<<ൺ
+<e0d23-0d4d>      "<S0D23><S0D4D>";"<BASE><BASE>";"<MIN><MIN>";IGNORE
+<e0d23-0d4d-200d> "<S0D23><S0D4D>";"<BASE><VRNT1>";"<MIN><MIN>";IGNORE
+<U0D7A>           "<S0D23><S0D4D>";"<BASE><VRNT1>";"<COMPAT><COMPAT>";<U0D7A>
+% &ന്<<ന്\u200D<<<ൻ
+<e0d28-0d4d>      "<S0D28><S0D4D>";"<BASE><BASE>";"<MIN><MIN>";IGNORE % ന്
+<e0d28-0d4d-200d> "<S0D28><S0D4D>";"<BASE><VRNT1>";"<MIN><MIN>";IGNORE % ന്‍
+<U0D7B>           "<S0D28><S0D4D>";"<BASE><VRNT1>";"<COMPATCAP><COMPATCAP>";<U0D7B> % ൻ
+% &ര്<<ര്\u200D<<<ർ
+<e0d30-0d4d>      "<S0D30><S0D4D>";"<BASE><BASE>";"<MIN><MIN>";IGNORE
+<e0d30-0d4d-200d> "<S0D30><S0D4D>";"<BASE><VRNT1>";"<MIN><MIN>";IGNORE
+<U0D7C>           "<S0D30><S0D4D>";"<BASE><VRNT1>";"<COMPAT><COMPAT>";<U0D7C> % ർ
+% &ല്<<ല്\u200D<<<ൽ
+<e0d32-0d4d>      "<S0D32><S0D4D>";"<BASE><BASE>";"<MIN><MIN>";IGNORE
+<e0d32-0d4d-200d> "<S0D32><S0D4D>";"<BASE><VRNT1>";"<MIN><MIN>";IGNORE
+<U0D7D>           "<S0D32><S0D4D>";"<BASE><VRNT1>";"<COMPAT><COMPAT>";<U0D7D>
+% &ള്<<ള്\u200D<<<ൾ
+<e0d33-0d4d>      "<S0D33><S0D4D>";"<BASE><BASE>";"<MIN><MIN>";IGNORE
+<e0d33-0d4d-200d> "<S0D33><S0D4D>";"<BASE><VRNT1>";"<MIN><MIN>";IGNORE
+<U0D7E>           "<S0D33><S0D4D>";"<BASE><VRNT1>";"<COMPAT><COMPAT>";<U0D7E>
+% #
+% #  Anuswara primary equal to MA_dead.
+% #
+% &മ്<<ം
+<e0d2e-0d4d> "<S0D2E><S0D4D>";"<BASE><BASE>";"<MIN><MIN>";IGNORE % മ്
+<U0D02>      "<S0D2E><S0D4D>";"<BASE><VRNT1>";"<MIN><MIN>";IGNORE % MALAYALAM SIGN ANUSVARA
+% #
+% #  /nta/ is sorted as <NA, Virama, RRA>.
+% #
+% &ന്<<<ൻ്
+%
+% It looks to me that the above line
+% is a contradiction to the earlier rule: &ന്<<ന്\u200D<<<ൻ
+% I experimented with libicu to see how libicu sorts given these rules.
+% And the end result seems to be the same as if the above two rules had been
+% combined in a rule like this:
+%
+%  &ന്<<ന്\u200D<<<ൻ്<<<ൻ
+%
+% So I write the glibc rules to reproduce that behaviour.
+<e0d28-0d4d> "<S0D28><S0D4D>";"<BASE><BASE>";"<MIN><MIN>";<U0D28> % ന്
+<e0d7b-0d4d> "<S0D28><S0D4D>";"<BASE><VRNT1>";"<COMPAT><COMPAT>";<U0D7B> % ൻ്
+
+reorder-end
+
 END LC_COLLATE
 %
 LC_MONETARY