diff options
Diffstat (limited to 'REORG.TODO/locale')
109 files changed, 33629 insertions, 0 deletions
diff --git a/REORG.TODO/locale/C-address.c b/REORG.TODO/locale/C-address.c new file mode 100644 index 0000000000..6512524c8b --- /dev/null +++ b/REORG.TODO/locale/C-address.c @@ -0,0 +1,49 @@ +/* Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <endian.h> + +#include "localeinfo.h" + +/* This table's entries are taken from ISO 14652, the table in section + 4.10 "LC_ADDRESS". */ + +const struct __locale_data _nl_C_LC_ADDRESS attribute_hidden = +{ + _nl_C_name, + NULL, 0, 0, /* no file mapped */ + { NULL, }, /* no cached data */ + UNDELETABLE, + 0, + 13, + { + { .string = "%a%N%f%N%d%N%b%N%s %h %e %r%N%C-%z %T%N%c%N" }, + { .string = "" }, + { .string = "" }, + { .string = "" }, + { .string = "" }, + { .string = "" }, + { .word = 0 }, + { .string = "" }, + { .string = "" }, + { .string = "" }, + { .string = "" }, + { .string = "" }, + { .string = _nl_C_codeset } + } +}; diff --git a/REORG.TODO/locale/C-collate.c b/REORG.TODO/locale/C-collate.c new file mode 100644 index 0000000000..6e395b6966 --- /dev/null +++ b/REORG.TODO/locale/C-collate.c @@ -0,0 +1,149 @@ +/* Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1995. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <endian.h> +#include <stdint.h> +#include "localeinfo.h" + +static const char collseqmb[] = +{ + '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07', + '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f', + '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17', + '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f', + '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27', + '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f', + '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37', + '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f', + '\x40', '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47', + '\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f', + '\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57', + '\x58', '\x59', '\x5a', '\x5b', '\x5c', '\x5d', '\x5e', '\x5f', + '\x60', '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67', + '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f', + '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77', + '\x78', '\x79', '\x7a', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f', + '\x80', '\x81', '\x82', '\x83', '\x84', '\x85', '\x86', '\x87', + '\x88', '\x89', '\x8a', '\x8b', '\x8c', '\x8d', '\x8e', '\x8f', + '\x90', '\x91', '\x92', '\x93', '\x94', '\x95', '\x96', '\x97', + '\x98', '\x99', '\x9a', '\x9b', '\x9c', '\x9d', '\x9e', '\x9f', + '\xa0', '\xa1', '\xa2', '\xa3', '\xa4', '\xa5', '\xa6', '\xa7', + '\xa8', '\xa9', '\xaa', '\xab', '\xac', '\xad', '\xae', '\xaf', + '\xb0', '\xb1', '\xb2', '\xb3', '\xb4', '\xb5', '\xb6', '\xb7', + '\xb8', '\xb9', '\xba', '\xbb', '\xbc', '\xbd', '\xbe', '\xbf', + '\xc0', '\xc1', '\xc2', '\xc3', '\xc4', '\xc5', '\xc6', '\xc7', + '\xc8', '\xc9', '\xca', '\xcb', '\xcc', '\xcd', '\xce', '\xcf', + '\xd0', '\xd1', '\xd2', '\xd3', '\xd4', '\xd5', '\xd6', '\xd7', + '\xd8', '\xd9', '\xda', '\xdb', '\xdc', '\xdd', '\xde', '\xdf', + '\xe0', '\xe1', '\xe2', '\xe3', '\xe4', '\xe5', '\xe6', '\xe7', + '\xe8', '\xe9', '\xea', '\xeb', '\xec', '\xed', '\xee', '\xef', + '\xf0', '\xf1', '\xf2', '\xf3', '\xf4', '\xf5', '\xf6', '\xf7', + '\xf8', '\xf9', '\xfa', '\xfb', '\xfc', '\xfd', '\xfe', '\xff' +}; + +static const uint32_t collseqwc[] = +{ + 8, 1, 8, 0x0, 0xff, + /* 1st-level table */ + 6 * sizeof (uint32_t), + /* 2nd-level table */ + 7 * sizeof (uint32_t), + /* 3rd-level table */ + L'\x00', L'\x01', L'\x02', L'\x03', L'\x04', L'\x05', L'\x06', L'\x07', + L'\x08', L'\x09', L'\x0a', L'\x0b', L'\x0c', L'\x0d', L'\x0e', L'\x0f', + L'\x10', L'\x11', L'\x12', L'\x13', L'\x14', L'\x15', L'\x16', L'\x17', + L'\x18', L'\x19', L'\x1a', L'\x1b', L'\x1c', L'\x1d', L'\x1e', L'\x1f', + L'\x20', L'\x21', L'\x22', L'\x23', L'\x24', L'\x25', L'\x26', L'\x27', + L'\x28', L'\x29', L'\x2a', L'\x2b', L'\x2c', L'\x2d', L'\x2e', L'\x2f', + L'\x30', L'\x31', L'\x32', L'\x33', L'\x34', L'\x35', L'\x36', L'\x37', + L'\x38', L'\x39', L'\x3a', L'\x3b', L'\x3c', L'\x3d', L'\x3e', L'\x3f', + L'\x40', L'\x41', L'\x42', L'\x43', L'\x44', L'\x45', L'\x46', L'\x47', + L'\x48', L'\x49', L'\x4a', L'\x4b', L'\x4c', L'\x4d', L'\x4e', L'\x4f', + L'\x50', L'\x51', L'\x52', L'\x53', L'\x54', L'\x55', L'\x56', L'\x57', + L'\x58', L'\x59', L'\x5a', L'\x5b', L'\x5c', L'\x5d', L'\x5e', L'\x5f', + L'\x60', L'\x61', L'\x62', L'\x63', L'\x64', L'\x65', L'\x66', L'\x67', + L'\x68', L'\x69', L'\x6a', L'\x6b', L'\x6c', L'\x6d', L'\x6e', L'\x6f', + L'\x70', L'\x71', L'\x72', L'\x73', L'\x74', L'\x75', L'\x76', L'\x77', + L'\x78', L'\x79', L'\x7a', L'\x7b', L'\x7c', L'\x7d', L'\x7e', L'\x7f', + L'\x80', L'\x81', L'\x82', L'\x83', L'\x84', L'\x85', L'\x86', L'\x87', + L'\x88', L'\x89', L'\x8a', L'\x8b', L'\x8c', L'\x8d', L'\x8e', L'\x8f', + L'\x90', L'\x91', L'\x92', L'\x93', L'\x94', L'\x95', L'\x96', L'\x97', + L'\x98', L'\x99', L'\x9a', L'\x9b', L'\x9c', L'\x9d', L'\x9e', L'\x9f', + L'\xa0', L'\xa1', L'\xa2', L'\xa3', L'\xa4', L'\xa5', L'\xa6', L'\xa7', + L'\xa8', L'\xa9', L'\xaa', L'\xab', L'\xac', L'\xad', L'\xae', L'\xaf', + L'\xb0', L'\xb1', L'\xb2', L'\xb3', L'\xb4', L'\xb5', L'\xb6', L'\xb7', + L'\xb8', L'\xb9', L'\xba', L'\xbb', L'\xbc', L'\xbd', L'\xbe', L'\xbf', + L'\xc0', L'\xc1', L'\xc2', L'\xc3', L'\xc4', L'\xc5', L'\xc6', L'\xc7', + L'\xc8', L'\xc9', L'\xca', L'\xcb', L'\xcc', L'\xcd', L'\xce', L'\xcf', + L'\xd0', L'\xd1', L'\xd2', L'\xd3', L'\xd4', L'\xd5', L'\xd6', L'\xd7', + L'\xd8', L'\xd9', L'\xda', L'\xdb', L'\xdc', L'\xdd', L'\xde', L'\xdf', + L'\xe0', L'\xe1', L'\xe2', L'\xe3', L'\xe4', L'\xe5', L'\xe6', L'\xe7', + L'\xe8', L'\xe9', L'\xea', L'\xeb', L'\xec', L'\xed', L'\xee', L'\xef', + L'\xf0', L'\xf1', L'\xf2', L'\xf3', L'\xf4', L'\xf5', L'\xf6', L'\xf7', + L'\xf8', L'\xf9', L'\xfa', L'\xfb', L'\xfc', L'\xfd', L'\xfe', L'\xff' +}; + +const struct __locale_data _nl_C_LC_COLLATE attribute_hidden = +{ + _nl_C_name, + NULL, 0, 0, /* no file mapped */ + { NULL, }, /* no cached data */ + UNDELETABLE, + 0, + 19, + { + /* _NL_COLLATE_NRULES */ + { .word = 0 }, + /* _NL_COLLATE_RULESETS */ + { .string = NULL }, + /* _NL_COLLATE_TABLEMB */ + { .string = NULL }, + /* _NL_COLLATE_WEIGHTMB */ + { .string = NULL }, + /* _NL_COLLATE_EXTRAMB */ + { .string = NULL }, + /* _NL_COLLATE_INDIRECTMB */ + { .string = NULL }, + /* _NL_COLLATE_GAP1 */ + { .string = NULL }, + /* _NL_COLLATE_GAP2 */ + { .string = NULL }, + /* _NL_COLLATE_GAP3 */ + { .string = NULL }, + /* _NL_COLLATE_TABLEWC */ + { .string = NULL }, + /* _NL_COLLATE_WEIGHTWC */ + { .string = NULL }, + /* _NL_COLLATE_EXTRAWC */ + { .string = NULL }, + /* _NL_COLLATE_INDIRECTWC */ + { .string = NULL }, + /* _NL_COLLATE_SYMB_HASH_SIZEMB */ + { .string = NULL }, + /* _NL_COLLATE_SYMB_TABLEMB */ + { .string = NULL }, + /* _NL_COLLATE_SYMB_EXTRAMB */ + { .string = NULL }, + /* _NL_COLLATE_COLLSEQMB */ + { .string = collseqmb }, + /* _NL_COLLATE_COLLSEQWC */ + { .string = (const char *) collseqwc }, + /* _NL_COLLATE_CODESET */ + { .string = _nl_C_codeset } + } +}; diff --git a/REORG.TODO/locale/C-ctype.c b/REORG.TODO/locale/C-ctype.c new file mode 100644 index 0000000000..06418436b8 --- /dev/null +++ b/REORG.TODO/locale/C-ctype.c @@ -0,0 +1,676 @@ +/* Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1995. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include "localeinfo.h" +#include <endian.h> +#include <stdint.h> + +#include "C-translit.h" + +/* This table's entries are taken from POSIX.2 Table 2-6 + ``LC_CTYPE Category Definition in the POSIX Locale''. + + The `_nl_C_LC_CTYPE_width' array is a GNU extension. + + In the `_nl_C_LC_CTYPE_class' array the value for EOF (== -1) + is set to always return 0 and the conversion arrays return EOF. */ + +const char _nl_C_LC_CTYPE_class[768] attribute_hidden = + /* 0x80 */ "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + /* 0x86 */ "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + /* 0x8c */ "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + /* 0x92 */ "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + /* 0x98 */ "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + /* 0x9e */ "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + /* 0xa4 */ "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + /* 0xaa */ "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + /* 0xb0 */ "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + /* 0xb6 */ "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + /* 0xbc */ "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + /* 0xc2 */ "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + /* 0xc8 */ "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + /* 0xce */ "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + /* 0xd4 */ "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + /* 0xda */ "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + /* 0xe0 */ "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + /* 0xe6 */ "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + /* 0xec */ "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + /* 0xf2 */ "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + /* 0xf8 */ "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + /* 0xfe */ "\000\000" "\000\000" "\002\000" "\002\000" "\002\000" "\002\000" + /* 0x04 */ "\002\000" "\002\000" "\002\000" "\002\000" "\002\000" "\003\040" + /* 0x0a */ "\002\040" "\002\040" "\002\040" "\002\040" "\002\000" "\002\000" + /* 0x10 */ "\002\000" "\002\000" "\002\000" "\002\000" "\002\000" "\002\000" + /* 0x16 */ "\002\000" "\002\000" "\002\000" "\002\000" "\002\000" "\002\000" + /* 0x1c */ "\002\000" "\002\000" "\002\000" "\002\000" "\001\140" "\004\300" + /* 0x22 */ "\004\300" "\004\300" "\004\300" "\004\300" "\004\300" "\004\300" + /* 0x28 */ "\004\300" "\004\300" "\004\300" "\004\300" "\004\300" "\004\300" + /* 0x2e */ "\004\300" "\004\300" "\010\330" "\010\330" "\010\330" "\010\330" + /* 0x34 */ "\010\330" "\010\330" "\010\330" "\010\330" "\010\330" "\010\330" + /* 0x3a */ "\004\300" "\004\300" "\004\300" "\004\300" "\004\300" "\004\300" + /* 0x40 */ "\004\300" "\010\325" "\010\325" "\010\325" "\010\325" "\010\325" + /* 0x46 */ "\010\325" "\010\305" "\010\305" "\010\305" "\010\305" "\010\305" + /* 0x4c */ "\010\305" "\010\305" "\010\305" "\010\305" "\010\305" "\010\305" + /* 0x52 */ "\010\305" "\010\305" "\010\305" "\010\305" "\010\305" "\010\305" + /* 0x58 */ "\010\305" "\010\305" "\010\305" "\004\300" "\004\300" "\004\300" + /* 0x5e */ "\004\300" "\004\300" "\004\300" "\010\326" "\010\326" "\010\326" + /* 0x64 */ "\010\326" "\010\326" "\010\326" "\010\306" "\010\306" "\010\306" + /* 0x6a */ "\010\306" "\010\306" "\010\306" "\010\306" "\010\306" "\010\306" + /* 0x70 */ "\010\306" "\010\306" "\010\306" "\010\306" "\010\306" "\010\306" + /* 0x76 */ "\010\306" "\010\306" "\010\306" "\010\306" "\010\306" "\004\300" + /* 0x7c */ "\004\300" "\004\300" "\004\300" "\002\000" "\000\000" "\000\000" + /* 0x82 */ "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + /* 0x88 */ "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + /* 0x8e */ "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + /* 0x94 */ "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + /* 0x9a */ "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + /* 0xa0 */ "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + /* 0xa6 */ "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + /* 0xac */ "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + /* 0xb2 */ "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + /* 0xb8 */ "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + /* 0xbe */ "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + /* 0xc4 */ "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + /* 0xca */ "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + /* 0xd0 */ "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + /* 0xd6 */ "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + /* 0xdc */ "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + /* 0xe2 */ "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + /* 0xe8 */ "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + /* 0xee */ "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + /* 0xf4 */ "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + /* 0xfa */ "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" +; +const char _nl_C_LC_CTYPE_class32[1024] attribute_hidden = + /* 0x00 */ "\000\000\002\000" "\000\000\002\000" "\000\000\002\000" + /* 0x03 */ "\000\000\002\000" "\000\000\002\000" "\000\000\002\000" + /* 0x06 */ "\000\000\002\000" "\000\000\002\000" "\000\000\002\000" + /* 0x09 */ "\000\000\003\040" "\000\000\002\040" "\000\000\002\040" + /* 0x0c */ "\000\000\002\040" "\000\000\002\040" "\000\000\002\000" + /* 0x0f */ "\000\000\002\000" "\000\000\002\000" "\000\000\002\000" + /* 0x12 */ "\000\000\002\000" "\000\000\002\000" "\000\000\002\000" + /* 0x15 */ "\000\000\002\000" "\000\000\002\000" "\000\000\002\000" + /* 0x18 */ "\000\000\002\000" "\000\000\002\000" "\000\000\002\000" + /* 0x1b */ "\000\000\002\000" "\000\000\002\000" "\000\000\002\000" + /* 0x1e */ "\000\000\002\000" "\000\000\002\000" "\000\000\001\140" + /* 0x21 */ "\000\000\004\300" "\000\000\004\300" "\000\000\004\300" + /* 0x24 */ "\000\000\004\300" "\000\000\004\300" "\000\000\004\300" + /* 0x27 */ "\000\000\004\300" "\000\000\004\300" "\000\000\004\300" + /* 0x2a */ "\000\000\004\300" "\000\000\004\300" "\000\000\004\300" + /* 0x2d */ "\000\000\004\300" "\000\000\004\300" "\000\000\004\300" + /* 0x30 */ "\000\000\010\330" "\000\000\010\330" "\000\000\010\330" + /* 0x33 */ "\000\000\010\330" "\000\000\010\330" "\000\000\010\330" + /* 0x36 */ "\000\000\010\330" "\000\000\010\330" "\000\000\010\330" + /* 0x39 */ "\000\000\010\330" "\000\000\004\300" "\000\000\004\300" + /* 0x3c */ "\000\000\004\300" "\000\000\004\300" "\000\000\004\300" + /* 0x3f */ "\000\000\004\300" "\000\000\004\300" "\000\000\010\325" + /* 0x42 */ "\000\000\010\325" "\000\000\010\325" "\000\000\010\325" + /* 0x45 */ "\000\000\010\325" "\000\000\010\325" "\000\000\010\305" + /* 0x48 */ "\000\000\010\305" "\000\000\010\305" "\000\000\010\305" + /* 0x4b */ "\000\000\010\305" "\000\000\010\305" "\000\000\010\305" + /* 0x4e */ "\000\000\010\305" "\000\000\010\305" "\000\000\010\305" + /* 0x51 */ "\000\000\010\305" "\000\000\010\305" "\000\000\010\305" + /* 0x54 */ "\000\000\010\305" "\000\000\010\305" "\000\000\010\305" + /* 0x57 */ "\000\000\010\305" "\000\000\010\305" "\000\000\010\305" + /* 0x5a */ "\000\000\010\305" "\000\000\004\300" "\000\000\004\300" + /* 0x5d */ "\000\000\004\300" "\000\000\004\300" "\000\000\004\300" + /* 0x60 */ "\000\000\004\300" "\000\000\010\326" "\000\000\010\326" + /* 0x63 */ "\000\000\010\326" "\000\000\010\326" "\000\000\010\326" + /* 0x66 */ "\000\000\010\326" "\000\000\010\306" "\000\000\010\306" + /* 0x69 */ "\000\000\010\306" "\000\000\010\306" "\000\000\010\306" + /* 0x6c */ "\000\000\010\306" "\000\000\010\306" "\000\000\010\306" + /* 0x6f */ "\000\000\010\306" "\000\000\010\306" "\000\000\010\306" + /* 0x72 */ "\000\000\010\306" "\000\000\010\306" "\000\000\010\306" + /* 0x75 */ "\000\000\010\306" "\000\000\010\306" "\000\000\010\306" + /* 0x78 */ "\000\000\010\306" "\000\000\010\306" "\000\000\010\306" + /* 0x7b */ "\000\000\004\300" "\000\000\004\300" "\000\000\004\300" + /* 0x7e */ "\000\000\004\300" "\000\000\002\000" "\000\000\000\000" + /* 0x81 */ "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + /* 0x84 */ "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + /* 0x87 */ "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + /* 0x8a */ "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + /* 0x8d */ "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + /* 0x90 */ "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + /* 0x93 */ "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + /* 0x96 */ "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + /* 0x99 */ "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + /* 0x9c */ "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + /* 0x9f */ "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + /* 0xa2 */ "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + /* 0xa5 */ "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + /* 0xa8 */ "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + /* 0xab */ "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + /* 0xae */ "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + /* 0xb1 */ "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + /* 0xb4 */ "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + /* 0xb7 */ "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + /* 0xba */ "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + /* 0xbd */ "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + /* 0xc0 */ "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + /* 0xc3 */ "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + /* 0xc6 */ "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + /* 0xc9 */ "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + /* 0xcc */ "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + /* 0xcf */ "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + /* 0xd2 */ "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + /* 0xd5 */ "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + /* 0xd8 */ "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + /* 0xdb */ "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + /* 0xde */ "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + /* 0xe1 */ "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + /* 0xe4 */ "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + /* 0xe7 */ "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + /* 0xea */ "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + /* 0xed */ "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + /* 0xf0 */ "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + /* 0xf3 */ "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + /* 0xf6 */ "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + /* 0xf9 */ "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + /* 0xfc */ "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + /* 0xff */ "\000\000\000\000" +; +const uint32_t _nl_C_LC_CTYPE_toupper[384] attribute_hidden = +{ + /* 0x80 */ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + /* 0x88 */ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + /* 0x90 */ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + /* 0x98 */ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, + /* 0xa0 */ 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, + /* 0xa8 */ 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, + /* 0xb0 */ 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, + /* 0xb8 */ 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, + /* 0xc0 */ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, + /* 0xc8 */ 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, + /* 0xd0 */ 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, + /* 0xd8 */ 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, + /* 0xe0 */ 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, + /* 0xe8 */ 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, + /* 0xf0 */ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, + /* 0xf8 */ 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xffffffff, + /* 0x00 */ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + /* 0x08 */ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + /* 0x10 */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + /* 0x18 */ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, + /* 0x20 */ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, + /* 0x28 */ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, + /* 0x30 */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + /* 0x38 */ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, + /* 0x40 */ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, + /* 0x48 */ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, + /* 0x50 */ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, + /* 0x58 */ 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, + /* 0x60 */ 0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, + /* 0x68 */ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, + /* 0x70 */ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, + /* 0x78 */ 0x58, 0x59, 0x5a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, + /* 0x80 */ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + /* 0x88 */ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + /* 0x90 */ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + /* 0x98 */ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, + /* 0xa0 */ 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, + /* 0xa8 */ 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, + /* 0xb0 */ 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, + /* 0xb8 */ 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, + /* 0xc0 */ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, + /* 0xc8 */ 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, + /* 0xd0 */ 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, + /* 0xd8 */ 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, + /* 0xe0 */ 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, + /* 0xe8 */ 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, + /* 0xf0 */ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, + /* 0xf8 */ 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff +}; +const uint32_t _nl_C_LC_CTYPE_tolower[384] attribute_hidden = +{ + /* 0x80 */ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + /* 0x88 */ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + /* 0x90 */ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + /* 0x98 */ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, + /* 0xa0 */ 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, + /* 0xa8 */ 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, + /* 0xb0 */ 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, + /* 0xb8 */ 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, + /* 0xc0 */ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, + /* 0xc8 */ 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, + /* 0xd0 */ 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, + /* 0xd8 */ 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, + /* 0xe0 */ 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, + /* 0xe8 */ 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, + /* 0xf0 */ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, + /* 0xf8 */ 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xffffffff, + /* 0x00 */ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + /* 0x08 */ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + /* 0x10 */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + /* 0x18 */ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, + /* 0x20 */ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, + /* 0x28 */ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, + /* 0x30 */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + /* 0x38 */ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, + /* 0x40 */ 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + /* 0x48 */ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, + /* 0x50 */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + /* 0x58 */ 0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, + /* 0x60 */ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + /* 0x68 */ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, + /* 0x70 */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + /* 0x78 */ 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, + /* 0x80 */ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + /* 0x88 */ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + /* 0x90 */ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + /* 0x98 */ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, + /* 0xa0 */ 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, + /* 0xa8 */ 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, + /* 0xb0 */ 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, + /* 0xb8 */ 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, + /* 0xc0 */ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, + /* 0xc8 */ 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, + /* 0xd0 */ 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, + /* 0xd8 */ 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, + /* 0xe0 */ 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, + /* 0xe8 */ 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, + /* 0xf0 */ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, + /* 0xf8 */ 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff +}; + +#define STRUCT_CTYPE_CLASS(p, q) \ + struct \ + { \ + uint32_t isctype_data[8]; \ + uint32_t header[5]; \ + uint32_t level1[1]; \ + uint32_t level2[1 << q]; \ + uint32_t level3[1 << p]; \ + } + +const STRUCT_CTYPE_CLASS(1, 1) _nl_C_LC_CTYPE_class_upper attribute_hidden = +{ + { 0x00000000, 0x00000000, 0x07fffffe, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000 + }, + { 7, 1, 6, 1, 1 }, + /* 1st-level table */ + { 6 * sizeof (uint32_t) }, + /* 2nd-level table */ + { 0, 8 * sizeof (uint32_t) }, + /* 3rd-level table */ + { 0x07fffffe, 0x00000000 } +}; +const STRUCT_CTYPE_CLASS(1, 1) _nl_C_LC_CTYPE_class_lower attribute_hidden = +{ + { 0x00000000, 0x00000000, 0x00000000, 0x07fffffe, + 0x00000000, 0x00000000, 0x00000000, 0x00000000 + }, + { 7, 1, 6, 1, 1 }, + /* 1st-level table */ + { 6 * sizeof (uint32_t) }, + /* 2nd-level table */ + { 0, 8 * sizeof (uint32_t) }, + /* 3rd-level table */ + { 0x00000000, 0x07fffffe } +}; +const STRUCT_CTYPE_CLASS(1, 1) _nl_C_LC_CTYPE_class_alpha attribute_hidden = +{ + { 0x00000000, 0x00000000, 0x07fffffe, 0x07fffffe, + 0x00000000, 0x00000000, 0x00000000, 0x00000000 + }, + { 7, 1, 6, 1, 1 }, + /* 1st-level table */ + { 6 * sizeof (uint32_t) }, + /* 2nd-level table */ + { 0, 8 * sizeof (uint32_t) }, + /* 3rd-level table */ + { 0x07fffffe, 0x07fffffe } +}; +const STRUCT_CTYPE_CLASS(1, 0) _nl_C_LC_CTYPE_class_digit attribute_hidden = +{ + { 0x00000000, 0x03ff0000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000 + }, + { 6, 1, 6, 0, 1 }, + /* 1st-level table */ + { 6 * sizeof (uint32_t) }, + /* 2nd-level table */ + { 7 * sizeof (uint32_t) }, + /* 3rd-level table */ + { 0x00000000, 0x03ff0000 } +}; +const STRUCT_CTYPE_CLASS(2, 0) _nl_C_LC_CTYPE_class_xdigit attribute_hidden = +{ + { 0x00000000, 0x03ff0000, 0x0000007e, 0x0000007e, + 0x00000000, 0x00000000, 0x00000000, 0x00000000 + }, + { 7, 1, 7, 0, 3 }, + /* 1st-level table */ + { 6 * sizeof (uint32_t) }, + /* 2nd-level table */ + { 7 * sizeof (uint32_t) }, + /* 3rd-level table */ + { 0x00000000, 0x03ff0000, 0x0000007e, 0x0000007e } +}; +const STRUCT_CTYPE_CLASS(1, 0) _nl_C_LC_CTYPE_class_space attribute_hidden = +{ + { 0x00003e00, 0x00000001, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000 + }, + { 6, 1, 6, 0, 1 }, + /* 1st-level table */ + { 6 * sizeof (uint32_t) }, + /* 2nd-level table */ + { 7 * sizeof (uint32_t) }, + /* 3rd-level table */ + { 0x00003e00, 0x00000001 } +}; +const STRUCT_CTYPE_CLASS(2, 0) _nl_C_LC_CTYPE_class_print attribute_hidden = +{ + { 0x00000000, 0xffffffff, 0xffffffff, 0x7fffffff, + 0x00000000, 0x00000000, 0x00000000, 0x00000000 + }, + { 7, 1, 7, 0, 3 }, + /* 1st-level table */ + { 6 * sizeof (uint32_t) }, + /* 2nd-level table */ + { 7 * sizeof (uint32_t) }, + /* 3rd-level table */ + { 0x00000000, 0xffffffff, 0xffffffff, 0x7fffffff } +}; +const STRUCT_CTYPE_CLASS(2, 0) _nl_C_LC_CTYPE_class_graph attribute_hidden = +{ + { 0x00000000, 0xfffffffe, 0xffffffff, 0x7fffffff, + 0x00000000, 0x00000000, 0x00000000, 0x00000000 + }, + { 7, 1, 7, 0, 3 }, + /* 1st-level table */ + { 6 * sizeof (uint32_t) }, + /* 2nd-level table */ + { 7 * sizeof (uint32_t) }, + /* 3rd-level table */ + { 0x00000000, 0xfffffffe, 0xffffffff, 0x7fffffff } +}; +const STRUCT_CTYPE_CLASS(1, 0) _nl_C_LC_CTYPE_class_blank attribute_hidden = +{ + { 0x00000200, 0x00000001, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000 + }, + { 6, 1, 6, 0, 1 }, + /* 1st-level table */ + { 6 * sizeof (uint32_t) }, + /* 2nd-level table */ + { 7 * sizeof (uint32_t) }, + /* 3rd-level table */ + { 0x00000200, 0x00000001 } +}; +const STRUCT_CTYPE_CLASS(2, 0) _nl_C_LC_CTYPE_class_cntrl attribute_hidden = +{ + { 0xffffffff, 0x00000000, 0x00000000, 0x80000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000 + }, + { 7, 1, 7, 0, 3 }, + /* 1st-level table */ + { 6 * sizeof (uint32_t) }, + /* 2nd-level table */ + { 7 * sizeof (uint32_t) }, + /* 3rd-level table */ + { 0xffffffff, 0x00000000, 0x00000000, 0x80000000 } +}; +const STRUCT_CTYPE_CLASS(2, 0) _nl_C_LC_CTYPE_class_punct attribute_hidden = +{ + { 0x00000000, 0xfc00fffe, 0xf8000001, 0x78000001, + 0x00000000, 0x00000000, 0x00000000, 0x00000000 + }, + { 7, 1, 7, 0, 3 }, + /* 1st-level table */ + { 6 * sizeof (uint32_t) }, + /* 2nd-level table */ + { 7 * sizeof (uint32_t) }, + /* 3rd-level table */ + { 0x00000000, 0xfc00fffe, 0xf8000001, 0x78000001 } +}; +const STRUCT_CTYPE_CLASS(2, 0) _nl_C_LC_CTYPE_class_alnum attribute_hidden = +{ + { 0x00000000, 0x03ff0000, 0x07fffffe, 0x07fffffe, + 0x00000000, 0x00000000, 0x00000000, 0x00000000 + }, + { 7, 1, 7, 0, 3 }, + /* 1st-level table */ + { 6 * sizeof (uint32_t) }, + /* 2nd-level table */ + { 7 * sizeof (uint32_t) }, + /* 3rd-level table */ + { 0x00000000, 0x03ff0000, 0x07fffffe, 0x07fffffe } +}; + +const struct +{ + uint32_t header[5]; + uint32_t level1[1]; + uint32_t level2[4]; + int32_t level3[32]; +} +_nl_C_LC_CTYPE_map_toupper attribute_hidden = +{ + { 7, 1, 5, 3, 31 }, + /* 1st-level table */ + { 6 * sizeof (uint32_t) }, + /* 2nd-level table */ + { 0, 0, 0, 10 * sizeof (uint32_t) }, + /* 3rd-level table */ + { + 0x00000000, 0xffffffe0, 0xffffffe0, 0xffffffe0, + 0xffffffe0, 0xffffffe0, 0xffffffe0, 0xffffffe0, + 0xffffffe0, 0xffffffe0, 0xffffffe0, 0xffffffe0, + 0xffffffe0, 0xffffffe0, 0xffffffe0, 0xffffffe0, + 0xffffffe0, 0xffffffe0, 0xffffffe0, 0xffffffe0, + 0xffffffe0, 0xffffffe0, 0xffffffe0, 0xffffffe0, + 0xffffffe0, 0xffffffe0, 0xffffffe0, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000 + } +}, +_nl_C_LC_CTYPE_map_tolower attribute_hidden = +{ + { 7, 1, 5, 3, 31 }, + /* 1st-level table */ + { 6 * sizeof (uint32_t) }, + /* 2nd-level table */ + { 0, 0, 10 * sizeof (uint32_t), 0 }, + /* 3rd-level table */ + { + 0x00000000, 0x00000020, 0x00000020, 0x00000020, + 0x00000020, 0x00000020, 0x00000020, 0x00000020, + 0x00000020, 0x00000020, 0x00000020, 0x00000020, + 0x00000020, 0x00000020, 0x00000020, 0x00000020, + 0x00000020, 0x00000020, 0x00000020, 0x00000020, + 0x00000020, 0x00000020, 0x00000020, 0x00000020, + 0x00000020, 0x00000020, 0x00000020, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000 + } +}; + +const struct +{ + uint32_t header[5]; + uint32_t level1[1]; + uint32_t level2[8]; + int8_t level3[33]; +} +_nl_C_LC_CTYPE_width attribute_hidden = +{ + { 7, 1, 4, 7, 15 }, + /* 1st-level table */ + { 6 * sizeof (uint32_t) }, + /* 2nd-level table */ + { + 14 * sizeof (uint32_t) + 0, 0, + 14 * sizeof (uint32_t) + 16, 14 * sizeof (uint32_t) + 16, + 14 * sizeof (uint32_t) + 16, 14 * sizeof (uint32_t) + 16, + 14 * sizeof (uint32_t) + 16, 14 * sizeof (uint32_t) + 17 + }, + /* 3rd-level table */ + { + 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + -1 + } +}; + +/* Number of fields with fixed meanings, starting at 0. */ +#define NR_FIXED 72 +/* Number of class fields, starting at CLASS_OFFSET. */ +#define NR_CLASSES 12 +/* Number of map fields, starting at MAP_OFFSET. */ +#define NR_MAPS 2 + +/* Compile time verification of + NR_FIXED == _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1). */ +typedef int assertion1[1 - 2 * (NR_FIXED != _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1))]; + +const struct __locale_data _nl_C_LC_CTYPE attribute_hidden = +{ + _nl_C_name, + NULL, 0, 0, /* no file mapped */ + { NULL, }, /* no cached data */ + UNDELETABLE, + 1, /* Enable transliteration by default. */ + NR_FIXED + NR_CLASSES + NR_MAPS, + { + /* _NL_CTYPE_CLASS */ + { .string = _nl_C_LC_CTYPE_class }, + /* _NL_CTYPE_TOUPPER */ + { .string = (const char *) _nl_C_LC_CTYPE_toupper }, + /* _NL_CTYPE_GAP1 */ + { .string = NULL }, + /* _NL_CTYPE_TOLOWER */ + { .string = (const char *) _nl_C_LC_CTYPE_tolower }, + /* _NL_CTYPE_GAP2 */ + { .string = NULL }, + /* _NL_CTYPE_CLASS32 */ + { .string = _nl_C_LC_CTYPE_class32 }, + /* _NL_CTYPE_GAP3 */ + { .string = NULL }, + /* _NL_CTYPE_GAP4 */ + { .string = NULL }, + /* _NL_CTYPE_GAP5 */ + { .string = NULL }, + /* _NL_CTYPE_GAP6 */ + { .string = NULL }, + /* _NL_CTYPE_CLASS_NAMES */ + { .string = "upper\0" "lower\0" "alpha\0" "digit\0" "xdigit\0" "space\0" + "print\0" "graph\0" "blank\0" "cntrl\0" "punct\0" "alnum\0" }, + /* _NL_CTYPE_MAP_NAMES */ + { .string = "toupper\0" "tolower\0" }, + /* _NL_CTYPE_WIDTH */ + { .string = (const char *) _nl_C_LC_CTYPE_width.header }, + /* _NL_CTYPE_MB_CUR_MAX */ + { .word = 1 }, + /* _NL_CTYPE_CODESET_NAME */ + { .string = _nl_C_codeset }, + /* _NL_CTYPE_TOUPPER32 */ + { .string = (const char *) &_nl_C_LC_CTYPE_toupper[128] }, + /* _NL_CTYPE_TOLOWER32 */ + { .string = (const char *) &_nl_C_LC_CTYPE_tolower[128] }, + /* _NL_CTYPE_CLASS_OFFSET */ + { .word = NR_FIXED }, + /* _NL_CTYPE_MAP_OFFSET */ + { .word = NR_FIXED + NR_CLASSES }, + /* _NL_CTYPE_INDIGITS_MB_LEN */ + { .word = 1 }, + /* _NL_CTYPE_INDIGITS0_MB .. _NL_CTYPE_INDIGITS9_MB */ + { .string = "0" }, + { .string = "1" }, + { .string = "2" }, + { .string = "3" }, + { .string = "4" }, + { .string = "5" }, + { .string = "6" }, + { .string = "7" }, + { .string = "8" }, + { .string = "9" }, + /* _NL_CTYPE_INDIGITS_WC_LEN */ + { .word = 1 }, + /* _NL_CTYPE_INDIGITS0_WC .. _NL_CTYPE_INDIGITS9_WC */ + { .wstr = (uint32_t *) L"0" }, + { .wstr = (uint32_t *) L"1" }, + { .wstr = (uint32_t *) L"2" }, + { .wstr = (uint32_t *) L"3" }, + { .wstr = (uint32_t *) L"4" }, + { .wstr = (uint32_t *) L"5" }, + { .wstr = (uint32_t *) L"6" }, + { .wstr = (uint32_t *) L"7" }, + { .wstr = (uint32_t *) L"8" }, + { .wstr = (uint32_t *) L"9" }, + /* _NL_CTYPE_OUTDIGIT0_MB .. _NL_CTYPE_OUTDIGIT9_MB */ + { .string = "0" }, + { .string = "1" }, + { .string = "2" }, + { .string = "3" }, + { .string = "4" }, + { .string = "5" }, + { .string = "6" }, + { .string = "7" }, + { .string = "8" }, + { .string = "9" }, + /* _NL_CTYPE_OUTDIGIT0_WC .. _NL_CTYPE_OUTDIGIT9_WC */ + { .word = L'0' }, + { .word = L'1' }, + { .word = L'2' }, + { .word = L'3' }, + { .word = L'4' }, + { .word = L'5' }, + { .word = L'6' }, + { .word = L'7' }, + { .word = L'8' }, + { .word = L'9' }, + /* _NL_CTYPE_TRANSLIT_TAB_SIZE */ + { .word = NTRANSLIT }, + /* _NL_CTYPE_TRANSLIT_FROM_IDX */ + { .wstr = translit_from_idx }, + /* _NL_CTYPE_TRANSLIT_FROM_TBL */ + { .wstr = (uint32_t *) translit_from_tbl }, + /* _NL_CTYPE_TRANSLIT_TO_IDX */ + { .wstr = translit_to_idx }, + /* _NL_CTYPE_TRANSLIT_TO_TBL */ + { .wstr = (uint32_t *) translit_to_tbl }, + /* _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN */ + { .word = 1 }, + /* _NL_CTYPE_TRANSLIT_DEFAULT_MISSING */ + { .wstr = (uint32_t *) L"?" }, + /* _NL_CTYPE_TRANSLIT_IGNORE_LEN */ + { .word = 0 }, + /* _NL_CTYPE_TRANSLIT_IGNORE */ + { .wstr = NULL }, + /* _NL_CTYPE_MAP_TO_NONASCII */ + { .word = 0 }, + /* _NL_CTYPE_NONASCII_CASE */ + { .word = 0 }, + /* NR_CLASSES wctype_tables */ + { .string = (const char *) _nl_C_LC_CTYPE_class_upper.header }, + { .string = (const char *) _nl_C_LC_CTYPE_class_lower.header }, + { .string = (const char *) _nl_C_LC_CTYPE_class_alpha.header }, + { .string = (const char *) _nl_C_LC_CTYPE_class_digit.header }, + { .string = (const char *) _nl_C_LC_CTYPE_class_xdigit.header }, + { .string = (const char *) _nl_C_LC_CTYPE_class_space.header }, + { .string = (const char *) _nl_C_LC_CTYPE_class_print.header }, + { .string = (const char *) _nl_C_LC_CTYPE_class_graph.header }, + { .string = (const char *) _nl_C_LC_CTYPE_class_blank.header }, + { .string = (const char *) _nl_C_LC_CTYPE_class_cntrl.header }, + { .string = (const char *) _nl_C_LC_CTYPE_class_punct.header }, + { .string = (const char *) _nl_C_LC_CTYPE_class_alnum.header }, + /* NR_MAPS wctrans_tables */ + { .string = (const char *) _nl_C_LC_CTYPE_map_toupper.header }, + { .string = (const char *) _nl_C_LC_CTYPE_map_tolower.header } + } +}; diff --git a/REORG.TODO/locale/C-identification.c b/REORG.TODO/locale/C-identification.c new file mode 100644 index 0000000000..0e782fdbaf --- /dev/null +++ b/REORG.TODO/locale/C-identification.c @@ -0,0 +1,56 @@ +/* Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <endian.h> + +#include "localeinfo.h" + +/* This table's entries are taken from ISO 14652, the table in section + 4.12 "LC_IDENTIFICATION". */ + +const struct __locale_data _nl_C_LC_IDENTIFICATION attribute_hidden = +{ + _nl_C_name, + NULL, 0, 0, /* no file mapped */ + { NULL, }, /* no cached data */ + UNDELETABLE, + 0, + 16, + { + { .string = "ISO/IEC 14652 i18n FDCC-set" }, + { .string = "ISO/IEC JTC1/SC22/WG20 - internationalization" }, + { .string = "C/o Keld Simonsen, Skt. Jorgens Alle 8, DK-1615 Kobenhavn V" }, + { .string = "Keld Simonsen" }, + { .string = "keld@dkuug.dk" }, + { .string = "+45 3122-6543" }, + { .string = "+45 3325-6543" }, + { .string = "" }, + { .string = "ISO" }, + { .string = "" }, + { .string = "" }, + { .string = "" }, + { .string = "1.0" }, + { .string = "1997-12-20" }, + { .string = "i18n:1999\0" "i18n:1999\0" "i18n:1999\0" "i18n:1999\0" + "i18n:1999\0" "i18n:1999\0" "\0" "i18n:1999\0" + "i18n:1999\0" "i18n:1999\0" "i18n:1999\0" "i18n:1999\0" + "i18n:1999\0" "i18n:1999\0" "i18n:1999\0" "i18n:1999\0" + "i18n:1999" }, + { .string = _nl_C_codeset } + } +}; diff --git a/REORG.TODO/locale/C-measurement.c b/REORG.TODO/locale/C-measurement.c new file mode 100644 index 0000000000..385674ca6e --- /dev/null +++ b/REORG.TODO/locale/C-measurement.c @@ -0,0 +1,38 @@ +/* Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <endian.h> + +#include "localeinfo.h" + +/* This table's entries are taken from ISO 14652, the table in section + 4.12 "LC_MEASUREMENT". */ + +const struct __locale_data _nl_C_LC_MEASUREMENT attribute_hidden = +{ + _nl_C_name, + NULL, 0, 0, /* no file mapped */ + { NULL, }, /* no cached data */ + UNDELETABLE, + 0, + 2, + { + { .string = "\1" }, + { .string = _nl_C_codeset } + } +}; diff --git a/REORG.TODO/locale/C-messages.c b/REORG.TODO/locale/C-messages.c new file mode 100644 index 0000000000..33a15cf040 --- /dev/null +++ b/REORG.TODO/locale/C-messages.c @@ -0,0 +1,41 @@ +/* Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include "localeinfo.h" + +/* This table's entries are taken from POSIX.2 Table 2-12 + ``LC_MESSAGES Category Definition in the POSIX Locale''. + + The last two fields are a GNU extension. */ + +const struct __locale_data _nl_C_LC_MESSAGES attribute_hidden = +{ + _nl_C_name, + NULL, 0, 0, /* no file mapped */ + { NULL, }, /* no cached data */ + UNDELETABLE, + 0, + 5, + { + { .string = "^[yY]" }, + { .string = "^[nN]" }, + { .string = "" }, + { .string = "" }, + { .string = _nl_C_codeset } + } +}; diff --git a/REORG.TODO/locale/C-monetary.c b/REORG.TODO/locale/C-monetary.c new file mode 100644 index 0000000000..df4abf477c --- /dev/null +++ b/REORG.TODO/locale/C-monetary.c @@ -0,0 +1,83 @@ +/* Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1995. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include "localeinfo.h" + +/* This table's entries are taken from POSIX.2 Table 2-9 + ``LC_MONETARY Category Definition in the POSIX Locale'', + with additions from ISO 14652, section 4.4. */ +static const char not_available[] = "\377"; +static const uint32_t conversion_rate[] = { 1, 1 }; + +const struct __locale_data _nl_C_LC_MONETARY attribute_hidden = +{ + _nl_C_name, + NULL, 0, 0, /* no file mapped */ + { NULL, }, /* no cached data */ + UNDELETABLE, + 0, + 46, + { + { .string = "" }, + { .string = "" }, + { .string = "" }, + { .string = "" }, + { .string = "" }, + { .string = "" }, + { .string = "" }, + { .string = not_available }, + { .string = not_available }, + { .string = not_available }, + { .string = not_available }, + { .string = not_available }, + { .string = not_available }, + { .string = not_available }, + { .string = not_available }, + { .string = "-" }, + { .string = not_available }, + { .string = not_available }, + { .string = not_available }, + { .string = not_available }, + { .string = not_available }, + { .string = not_available }, + { .string = "" }, + { .string = "" }, + { .string = not_available }, + { .string = not_available }, + { .string = not_available }, + { .string = not_available }, + { .string = not_available }, + { .string = not_available }, + { .string = not_available }, + { .string = not_available }, + { .string = not_available }, + { .string = not_available }, + { .string = not_available }, + { .string = not_available }, + { .string = not_available }, + { .string = not_available }, + { .word = 10101 }, + { .word = 99991231 }, + { .word = 10101 }, + { .word = 99991231 }, + { .string = (const char *) conversion_rate }, + { .word = (unsigned int) L'\0' }, + { .word = (unsigned int) L'\0' }, + { .string = _nl_C_codeset } + } +}; diff --git a/REORG.TODO/locale/C-name.c b/REORG.TODO/locale/C-name.c new file mode 100644 index 0000000000..e5923857ca --- /dev/null +++ b/REORG.TODO/locale/C-name.c @@ -0,0 +1,43 @@ +/* Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <endian.h> + +#include "localeinfo.h" + +/* This table's entries are taken from ISO 14652, the table in section + 4.9 "LC_NAME". */ + +const struct __locale_data _nl_C_LC_NAME attribute_hidden = +{ + _nl_C_name, + NULL, 0, 0, /* no file mapped */ + { NULL, }, /* no cached data */ + UNDELETABLE, + 0, + 7, + { + { .string = "%p%t%g%t%m%t%f" }, + { .string = "" }, + { .string = "" }, + { .string = "" }, + { .string = "" }, + { .string = "" }, + { .string = _nl_C_codeset } + } +}; diff --git a/REORG.TODO/locale/C-numeric.c b/REORG.TODO/locale/C-numeric.c new file mode 100644 index 0000000000..a035e76b8c --- /dev/null +++ b/REORG.TODO/locale/C-numeric.c @@ -0,0 +1,39 @@ +/* Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1995. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include "localeinfo.h" + +/* This table's entries are taken from POSIX.2 Table 2-10 + ``LC_NUMERIC Category Definition in the POSIX Locale''. */ +const struct __locale_data _nl_C_LC_NUMERIC attribute_hidden = +{ + _nl_C_name, + NULL, 0, 0, /* no file mapped */ + { NULL, }, /* no cached data */ + UNDELETABLE, + 0, + 6, + { + { .string = "." }, + { .string = "" }, + { .string = "" }, + { .word = (unsigned int) L'.' }, + { .word = (unsigned int) L'\0' }, + { .string = _nl_C_codeset } + } +}; diff --git a/REORG.TODO/locale/C-paper.c b/REORG.TODO/locale/C-paper.c new file mode 100644 index 0000000000..eef3ab345b --- /dev/null +++ b/REORG.TODO/locale/C-paper.c @@ -0,0 +1,39 @@ +/* Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <endian.h> + +#include "localeinfo.h" + +/* This table's entries are taken from ISO 14652, the table in section + 4.8 "LC_PAPER". */ + +const struct __locale_data _nl_C_LC_PAPER attribute_hidden = +{ + _nl_C_name, + NULL, 0, 0, /* no file mapped */ + { NULL, }, /* no cached data */ + UNDELETABLE, + 0, + 3, + { + { .word = 297 }, + { .word = 210 }, + { .string = _nl_C_codeset } + } +}; diff --git a/REORG.TODO/locale/C-telephone.c b/REORG.TODO/locale/C-telephone.c new file mode 100644 index 0000000000..03d467960a --- /dev/null +++ b/REORG.TODO/locale/C-telephone.c @@ -0,0 +1,41 @@ +/* Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <endian.h> + +#include "localeinfo.h" + +/* This table's entries are taken from ISO 14652, the table in section + 4.11 "LC_TELEPHONE". */ + +const struct __locale_data _nl_C_LC_TELEPHONE attribute_hidden = +{ + _nl_C_name, + NULL, 0, 0, /* no file mapped */ + { NULL, }, /* no cached data */ + UNDELETABLE, + 0, + 5, + { + { .string = "+%c %a %l" }, + { .string = "" }, + { .string = "" }, + { .string = "" }, + { .string = _nl_C_codeset } + } +}; diff --git a/REORG.TODO/locale/C-time.c b/REORG.TODO/locale/C-time.c new file mode 100644 index 0000000000..31d8704fd8 --- /dev/null +++ b/REORG.TODO/locale/C-time.c @@ -0,0 +1,147 @@ +/* Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@gnu.org>, 1995. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <stdint.h> +#include "localeinfo.h" + +/* This table's entries are taken from POSIX.2 Table 2-11 + ``LC_TIME Category Definition in the POSIX Locale'', + with additions from ISO 14652, section 4.6. */ + +const struct __locale_data _nl_C_LC_TIME attribute_hidden = +{ + _nl_C_name, + NULL, 0, 0, /* no file mapped */ + { NULL, }, /* no cached data */ + UNDELETABLE, + 0, + 111, + { + { .string = "Sun" }, + { .string = "Mon" }, + { .string = "Tue" }, + { .string = "Wed" }, + { .string = "Thu" }, + { .string = "Fri" }, + { .string = "Sat" }, + { .string = "Sunday" }, + { .string = "Monday" }, + { .string = "Tuesday" }, + { .string = "Wednesday" }, + { .string = "Thursday" }, + { .string = "Friday" }, + { .string = "Saturday" }, + { .string = "Jan" }, + { .string = "Feb" }, + { .string = "Mar" }, + { .string = "Apr" }, + { .string = "May" }, + { .string = "Jun" }, + { .string = "Jul" }, + { .string = "Aug" }, + { .string = "Sep" }, + { .string = "Oct" }, + { .string = "Nov" }, + { .string = "Dec" }, + { .string = "January" }, + { .string = "February" }, + { .string = "March" }, + { .string = "April" }, + { .string = "May" }, + { .string = "June" }, + { .string = "July" }, + { .string = "August" }, + { .string = "September" }, + { .string = "October" }, + { .string = "November" }, + { .string = "December" }, + { .string = "AM" }, + { .string = "PM" }, + { .string = "%a %b %e %H:%M:%S %Y" }, + { .string = "%m/%d/%y" }, + { .string = "%H:%M:%S" }, + { .string = "%I:%M:%S %p" }, + { .string = "" }, + { .string = "" }, + { .string = "" }, + { .string = "" }, + { .string = "" }, + { .string = "" }, + { .word = 0 }, + { .string = "" }, + { .wstr = (const uint32_t *) L"Sun" }, + { .wstr = (const uint32_t *) L"Mon" }, + { .wstr = (const uint32_t *) L"Tue" }, + { .wstr = (const uint32_t *) L"Wed" }, + { .wstr = (const uint32_t *) L"Thu" }, + { .wstr = (const uint32_t *) L"Fri" }, + { .wstr = (const uint32_t *) L"Sat" }, + { .wstr = (const uint32_t *) L"Sunday" }, + { .wstr = (const uint32_t *) L"Monday" }, + { .wstr = (const uint32_t *) L"Tuesday" }, + { .wstr = (const uint32_t *) L"Wednesday" }, + { .wstr = (const uint32_t *) L"Thursday" }, + { .wstr = (const uint32_t *) L"Friday" }, + { .wstr = (const uint32_t *) L"Saturday" }, + { .wstr = (const uint32_t *) L"Jan" }, + { .wstr = (const uint32_t *) L"Feb" }, + { .wstr = (const uint32_t *) L"Mar" }, + { .wstr = (const uint32_t *) L"Apr" }, + { .wstr = (const uint32_t *) L"May" }, + { .wstr = (const uint32_t *) L"Jun" }, + { .wstr = (const uint32_t *) L"Jul" }, + { .wstr = (const uint32_t *) L"Aug" }, + { .wstr = (const uint32_t *) L"Sep" }, + { .wstr = (const uint32_t *) L"Oct" }, + { .wstr = (const uint32_t *) L"Nov" }, + { .wstr = (const uint32_t *) L"Dec" }, + { .wstr = (const uint32_t *) L"January" }, + { .wstr = (const uint32_t *) L"February" }, + { .wstr = (const uint32_t *) L"March" }, + { .wstr = (const uint32_t *) L"April" }, + { .wstr = (const uint32_t *) L"May" }, + { .wstr = (const uint32_t *) L"June" }, + { .wstr = (const uint32_t *) L"July" }, + { .wstr = (const uint32_t *) L"August" }, + { .wstr = (const uint32_t *) L"September" }, + { .wstr = (const uint32_t *) L"October" }, + { .wstr = (const uint32_t *) L"November" }, + { .wstr = (const uint32_t *) L"December" }, + { .wstr = (const uint32_t *) L"AM" }, + { .wstr = (const uint32_t *) L"PM" }, + { .wstr = (const uint32_t *) L"%a %b %e %H:%M:%S %Y" }, + { .wstr = (const uint32_t *) L"%m/%d/%y" }, + { .wstr = (const uint32_t *) L"%H:%M:%S" }, + { .wstr = (const uint32_t *) L"%I:%M:%S %p" }, + { .wstr = (const uint32_t *) L"" }, + { .wstr = (const uint32_t *) L"" }, + { .wstr = (const uint32_t *) L"" }, + { .wstr = (const uint32_t *) L"" }, + { .wstr = (const uint32_t *) L"" }, + { .string = "\7" }, + { .word = 19971130 }, + { .string = "\4" }, + { .string = "\1" }, + { .string = "\2" }, + { .string = "\1" }, + { .string = "" }, + { .string = "%a %b %e %H:%M:%S %Z %Y" }, + { .wstr = (const uint32_t *) L"%a %b %e %H:%M:%S %Z %Y" }, + { .string = _nl_C_codeset } + } +}; diff --git a/REORG.TODO/locale/C-translit.h b/REORG.TODO/locale/C-translit.h new file mode 100644 index 0000000000..c0f1fdbd4f --- /dev/null +++ b/REORG.TODO/locale/C-translit.h @@ -0,0 +1,836 @@ +#include <stdint.h> +#define NTRANSLIT 1353 +static const uint32_t translit_from_idx[] = +{ + 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, + 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, + 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, + 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94, + 96, 98, 100, 102, 104, 106, 108, 110, 112, 114, 116, 118, + 120, 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 142, + 144, 146, 148, 150, 152, 154, 156, 158, 160, 162, 164, 166, + 168, 170, 172, 174, 176, 178, 180, 182, 184, 186, 188, 190, + 192, 194, 196, 198, 200, 202, 204, 206, 208, 210, 212, 214, + 216, 218, 220, 222, 224, 226, 228, 230, 232, 234, 236, 238, + 240, 242, 244, 246, 248, 250, 252, 254, 256, 258, 260, 262, + 264, 266, 268, 270, 272, 274, 276, 278, 280, 282, 284, 286, + 288, 290, 292, 294, 296, 298, 300, 302, 304, 306, 308, 310, + 312, 314, 316, 318, 320, 322, 324, 326, 328, 330, 332, 334, + 336, 338, 340, 342, 344, 346, 348, 350, 352, 354, 356, 358, + 360, 362, 364, 366, 368, 370, 372, 374, 376, 378, 380, 382, + 384, 386, 388, 390, 392, 394, 396, 398, 400, 402, 404, 406, + 408, 410, 412, 414, 416, 418, 420, 422, 424, 426, 428, 430, + 432, 434, 436, 438, 440, 442, 444, 446, 448, 450, 452, 454, + 456, 458, 460, 462, 464, 466, 468, 470, 472, 474, 476, 478, + 480, 482, 484, 486, 488, 490, 492, 494, 496, 498, 500, 502, + 504, 506, 508, 510, 512, 514, 516, 518, 520, 522, 524, 526, + 528, 530, 532, 534, 536, 538, 540, 542, 544, 546, 548, 550, + 552, 554, 556, 558, 560, 562, 564, 566, 568, 570, 572, 574, + 576, 578, 580, 582, 584, 586, 588, 590, 592, 594, 596, 598, + 600, 602, 604, 606, 608, 610, 612, 614, 616, 618, 620, 622, + 624, 626, 628, 630, 632, 634, 636, 638, 640, 642, 644, 646, + 648, 650, 652, 654, 656, 658, 660, 662, 664, 666, 668, 670, + 672, 674, 676, 678, 680, 682, 684, 686, 688, 690, 692, 694, + 696, 698, 700, 702, 704, 706, 708, 710, 712, 714, 716, 718, + 720, 722, 724, 726, 728, 730, 732, 734, 736, 738, 740, 742, + 744, 746, 748, 750, 752, 754, 756, 758, 760, 762, 764, 766, + 768, 770, 772, 774, 776, 778, 780, 782, 784, 786, 788, 790, + 792, 794, 796, 798, 800, 802, 804, 806, 808, 810, 812, 814, + 816, 818, 820, 822, 824, 826, 828, 830, 832, 834, 836, 838, + 840, 842, 844, 846, 848, 850, 852, 854, 856, 858, 860, 862, + 864, 866, 868, 870, 872, 874, 876, 878, 880, 882, 884, 886, + 888, 890, 892, 894, 896, 898, 900, 902, 904, 906, 908, 910, + 912, 914, 916, 918, 920, 922, 924, 926, 928, 930, 932, 934, + 936, 938, 940, 942, 944, 946, 948, 950, 952, 954, 956, 958, + 960, 962, 964, 966, 968, 970, 972, 974, 976, 978, 980, 982, + 984, 986, 988, 990, 992, 994, 996, 998, 1000, 1002, 1004, 1006, + 1008, 1010, 1012, 1014, 1016, 1018, 1020, 1022, 1024, 1026, 1028, 1030, + 1032, 1034, 1036, 1038, 1040, 1042, 1044, 1046, 1048, 1050, 1052, 1054, + 1056, 1058, 1060, 1062, 1064, 1066, 1068, 1070, 1072, 1074, 1076, 1078, + 1080, 1082, 1084, 1086, 1088, 1090, 1092, 1094, 1096, 1098, 1100, 1102, + 1104, 1106, 1108, 1110, 1112, 1114, 1116, 1118, 1120, 1122, 1124, 1126, + 1128, 1130, 1132, 1134, 1136, 1138, 1140, 1142, 1144, 1146, 1148, 1150, + 1152, 1154, 1156, 1158, 1160, 1162, 1164, 1166, 1168, 1170, 1172, 1174, + 1176, 1178, 1180, 1182, 1184, 1186, 1188, 1190, 1192, 1194, 1196, 1198, + 1200, 1202, 1204, 1206, 1208, 1210, 1212, 1214, 1216, 1218, 1220, 1222, + 1224, 1226, 1228, 1230, 1232, 1234, 1236, 1238, 1240, 1242, 1244, 1246, + 1248, 1250, 1252, 1254, 1256, 1258, 1260, 1262, 1264, 1266, 1268, 1270, + 1272, 1274, 1276, 1278, 1280, 1282, 1284, 1286, 1288, 1290, 1292, 1294, + 1296, 1298, 1300, 1302, 1304, 1306, 1308, 1310, 1312, 1314, 1316, 1318, + 1320, 1322, 1324, 1326, 1328, 1330, 1332, 1334, 1336, 1338, 1340, 1342, + 1344, 1346, 1348, 1350, 1352, 1354, 1356, 1358, 1360, 1362, 1364, 1366, + 1368, 1370, 1372, 1374, 1376, 1378, 1380, 1382, 1384, 1386, 1388, 1390, + 1392, 1394, 1396, 1398, 1400, 1402, 1404, 1406, 1408, 1410, 1412, 1414, + 1416, 1418, 1420, 1422, 1424, 1426, 1428, 1430, 1432, 1434, 1436, 1438, + 1440, 1442, 1444, 1446, 1448, 1450, 1452, 1454, 1456, 1458, 1460, 1462, + 1464, 1466, 1468, 1470, 1472, 1474, 1476, 1478, 1480, 1482, 1484, 1486, + 1488, 1490, 1492, 1494, 1496, 1498, 1500, 1502, 1504, 1506, 1508, 1510, + 1512, 1514, 1516, 1518, 1520, 1522, 1524, 1526, 1528, 1530, 1532, 1534, + 1536, 1538, 1540, 1542, 1544, 1546, 1548, 1550, 1552, 1554, 1556, 1558, + 1560, 1562, 1564, 1566, 1568, 1570, 1572, 1574, 1576, 1578, 1580, 1582, + 1584, 1586, 1588, 1590, 1592, 1594, 1596, 1598, 1600, 1602, 1604, 1606, + 1608, 1610, 1612, 1614, 1616, 1618, 1620, 1622, 1624, 1626, 1628, 1630, + 1632, 1634, 1636, 1638, 1640, 1642, 1644, 1646, 1648, 1650, 1652, 1654, + 1656, 1658, 1660, 1662, 1664, 1666, 1668, 1670, 1672, 1674, 1676, 1678, + 1680, 1682, 1684, 1686, 1688, 1690, 1692, 1694, 1696, 1698, 1700, 1702, + 1704, 1706, 1708, 1710, 1712, 1714, 1716, 1718, 1720, 1722, 1724, 1726, + 1728, 1730, 1732, 1734, 1736, 1738, 1740, 1742, 1744, 1746, 1748, 1750, + 1752, 1754, 1756, 1758, 1760, 1762, 1764, 1766, 1768, 1770, 1772, 1774, + 1776, 1778, 1780, 1782, 1784, 1786, 1788, 1790, 1792, 1794, 1796, 1798, + 1800, 1802, 1804, 1806, 1808, 1810, 1812, 1814, 1816, 1818, 1820, 1822, + 1824, 1826, 1828, 1830, 1832, 1834, 1836, 1838, 1840, 1842, 1844, 1846, + 1848, 1850, 1852, 1854, 1856, 1858, 1860, 1862, 1864, 1866, 1868, 1870, + 1872, 1874, 1876, 1878, 1880, 1882, 1884, 1886, 1888, 1890, 1892, 1894, + 1896, 1898, 1900, 1902, 1904, 1906, 1908, 1910, 1912, 1914, 1916, 1918, + 1920, 1922, 1924, 1926, 1928, 1930, 1932, 1934, 1936, 1938, 1940, 1942, + 1944, 1946, 1948, 1950, 1952, 1954, 1956, 1958, 1960, 1962, 1964, 1966, + 1968, 1970, 1972, 1974, 1976, 1978, 1980, 1982, 1984, 1986, 1988, 1990, + 1992, 1994, 1996, 1998, 2000, 2002, 2004, 2006, 2008, 2010, 2012, 2014, + 2016, 2018, 2020, 2022, 2024, 2026, 2028, 2030, 2032, 2034, 2036, 2038, + 2040, 2042, 2044, 2046, 2048, 2050, 2052, 2054, 2056, 2058, 2060, 2062, + 2064, 2066, 2068, 2070, 2072, 2074, 2076, 2078, 2080, 2082, 2084, 2086, + 2088, 2090, 2092, 2094, 2096, 2098, 2100, 2102, 2104, 2106, 2108, 2110, + 2112, 2114, 2116, 2118, 2120, 2122, 2124, 2126, 2128, 2130, 2132, 2134, + 2136, 2138, 2140, 2142, 2144, 2146, 2148, 2150, 2152, 2154, 2156, 2158, + 2160, 2162, 2164, 2166, 2168, 2170, 2172, 2174, 2176, 2178, 2180, 2182, + 2184, 2186, 2188, 2190, 2192, 2194, 2196, 2198, 2200, 2202, 2204, 2206, + 2208, 2210, 2212, 2214, 2216, 2218, 2220, 2222, 2224, 2226, 2228, 2230, + 2232, 2234, 2236, 2238, 2240, 2242, 2244, 2246, 2248, 2250, 2252, 2254, + 2256, 2258, 2260, 2262, 2264, 2266, 2268, 2270, 2272, 2274, 2276, 2278, + 2280, 2282, 2284, 2286, 2288, 2290, 2292, 2294, 2296, 2298, 2300, 2302, + 2304, 2306, 2308, 2310, 2312, 2314, 2316, 2318, 2320, 2322, 2324, 2326, + 2328, 2330, 2332, 2334, 2336, 2338, 2340, 2342, 2344, 2346, 2348, 2350, + 2352, 2354, 2356, 2358, 2360, 2362, 2364, 2366, 2368, 2370, 2372, 2374, + 2376, 2378, 2380, 2382, 2384, 2386, 2388, 2390, 2392, 2394, 2396, 2398, + 2400, 2402, 2404, 2406, 2408, 2410, 2412, 2414, 2416, 2418, 2420, 2422, + 2424, 2426, 2428, 2430, 2432, 2434, 2436, 2438, 2440, 2442, 2444, 2446, + 2448, 2450, 2452, 2454, 2456, 2458, 2460, 2462, 2464, 2466, 2468, 2470, + 2472, 2474, 2476, 2478, 2480, 2482, 2484, 2486, 2488, 2490, 2492, 2494, + 2496, 2498, 2500, 2502, 2504, 2506, 2508, 2510, 2512, 2514, 2516, 2518, + 2520, 2522, 2524, 2526, 2528, 2530, 2532, 2534, 2536, 2538, 2540, 2542, + 2544, 2546, 2548, 2550, 2552, 2554, 2556, 2558, 2560, 2562, 2564, 2566, + 2568, 2570, 2572, 2574, 2576, 2578, 2580, 2582, 2584, 2586, 2588, 2590, + 2592, 2594, 2596, 2598, 2600, 2602, 2604, 2606, 2608, 2610, 2612, 2614, + 2616, 2618, 2620, 2622, 2624, 2626, 2628, 2630, 2632, 2634, 2636, 2638, + 2640, 2642, 2644, 2646, 2648, 2650, 2652, 2654, 2656, 2658, 2660, 2662, + 2664, 2666, 2668, 2670, 2672, 2674, 2676, 2678, 2680, 2682, 2684, 2686, + 2688, 2690, 2692, 2694, 2696, 2698, 2700, 2702, 2704 +}; +static const wchar_t translit_from_tbl[] = + L"\x00a0" L"\0" L"\x00a9" L"\0" L"\x00ab" L"\0" L"\x00ad" L"\0" L"\x00ae" + L"\0" L"\x00b5" L"\0" L"\x00b8" L"\0" L"\x00bb" L"\0" L"\x00bc" L"\0" + L"\x00bd" L"\0" L"\x00be" L"\0" L"\x00c6" L"\0" L"\x00d7" L"\0" L"\x00df" + L"\0" L"\x00e6" L"\0" L"\x0132" L"\0" L"\x0133" L"\0" L"\x0149" L"\0" + L"\x0152" L"\0" L"\x0153" L"\0" L"\x017f" L"\0" L"\x01c7" L"\0" L"\x01c8" + L"\0" L"\x01c9" L"\0" L"\x01ca" L"\0" L"\x01cb" L"\0" L"\x01cc" L"\0" + L"\x01f1" L"\0" L"\x01f2" L"\0" L"\x01f3" L"\0" L"\x02bc" L"\0" L"\x02c6" + L"\0" L"\x02c8" L"\0" L"\x02cb" L"\0" L"\x02cd" L"\0" L"\x02d0" L"\0" + L"\x02dc" L"\0" L"\x2002" L"\0" L"\x2003" L"\0" L"\x2004" L"\0" L"\x2005" + L"\0" L"\x2006" L"\0" L"\x2008" L"\0" L"\x2009" L"\0" L"\x200a" L"\0" + L"\x200b" L"\0" L"\x2010" L"\0" L"\x2011" L"\0" L"\x2012" L"\0" L"\x2013" + L"\0" L"\x2014" L"\0" L"\x2015" L"\0" L"\x2018" L"\0" L"\x2019" L"\0" + L"\x201a" L"\0" L"\x201b" L"\0" L"\x201c" L"\0" L"\x201d" L"\0" L"\x201e" + L"\0" L"\x201f" L"\0" L"\x2020" L"\0" L"\x2022" L"\0" L"\x2024" L"\0" + L"\x2025" L"\0" L"\x2026" L"\0" L"\x202f" L"\0" L"\x2035" L"\0" L"\x2036" + L"\0" L"\x2037" L"\0" L"\x2039" L"\0" L"\x203a" L"\0" L"\x203c" L"\0" + L"\x2044" L"\0" L"\x2047" L"\0" L"\x2048" L"\0" L"\x2049" L"\0" L"\x205f" + L"\0" L"\x2060" L"\0" L"\x2061" L"\0" L"\x2062" L"\0" L"\x2063" L"\0" + L"\x20a1" L"\0" L"\x20a8" L"\0" L"\x20ac" L"\0" L"\x20b9" L"\0" L"\x2100" + L"\0" L"\x2101" L"\0" L"\x2102" L"\0" L"\x2105" L"\0" L"\x2106" L"\0" + L"\x210a" L"\0" L"\x210b" L"\0" L"\x210c" L"\0" L"\x210d" L"\0" L"\x210e" + L"\0" L"\x2110" L"\0" L"\x2111" L"\0" L"\x2112" L"\0" L"\x2113" L"\0" + L"\x2115" L"\0" L"\x2116" L"\0" L"\x2119" L"\0" L"\x211a" L"\0" L"\x211b" + L"\0" L"\x211c" L"\0" L"\x211d" L"\0" L"\x2121" L"\0" L"\x2122" L"\0" + L"\x2124" L"\0" L"\x2126" L"\0" L"\x2128" L"\0" L"\x212c" L"\0" L"\x212d" + L"\0" L"\x212e" L"\0" L"\x212f" L"\0" L"\x2130" L"\0" L"\x2131" L"\0" + L"\x2133" L"\0" L"\x2134" L"\0" L"\x2139" L"\0" L"\x2145" L"\0" L"\x2146" + L"\0" L"\x2147" L"\0" L"\x2148" L"\0" L"\x2149" L"\0" L"\x2153" L"\0" + L"\x2154" L"\0" L"\x2155" L"\0" L"\x2156" L"\0" L"\x2157" L"\0" L"\x2158" + L"\0" L"\x2159" L"\0" L"\x215a" L"\0" L"\x215b" L"\0" L"\x215c" L"\0" + L"\x215d" L"\0" L"\x215e" L"\0" L"\x215f" L"\0" L"\x2160" L"\0" L"\x2161" + L"\0" L"\x2162" L"\0" L"\x2163" L"\0" L"\x2164" L"\0" L"\x2165" L"\0" + L"\x2166" L"\0" L"\x2167" L"\0" L"\x2168" L"\0" L"\x2169" L"\0" L"\x216a" + L"\0" L"\x216b" L"\0" L"\x216c" L"\0" L"\x216d" L"\0" L"\x216e" L"\0" + L"\x216f" L"\0" L"\x2170" L"\0" L"\x2171" L"\0" L"\x2172" L"\0" L"\x2173" + L"\0" L"\x2174" L"\0" L"\x2175" L"\0" L"\x2176" L"\0" L"\x2177" L"\0" + L"\x2178" L"\0" L"\x2179" L"\0" L"\x217a" L"\0" L"\x217b" L"\0" L"\x217c" + L"\0" L"\x217d" L"\0" L"\x217e" L"\0" L"\x217f" L"\0" L"\x2190" L"\0" + L"\x2192" L"\0" L"\x2194" L"\0" L"\x21d0" L"\0" L"\x21d2" L"\0" L"\x21d4" + L"\0" L"\x2212" L"\0" L"\x2215" L"\0" L"\x2216" L"\0" L"\x2217" L"\0" + L"\x2223" L"\0" L"\x2236" L"\0" L"\x223c" L"\0" L"\x2264" L"\0" L"\x2265" + L"\0" L"\x226a" L"\0" L"\x226b" L"\0" L"\x22d8" L"\0" L"\x22d9" L"\0" + L"\x2400" L"\0" L"\x2401" L"\0" L"\x2402" L"\0" L"\x2403" L"\0" L"\x2404" + L"\0" L"\x2405" L"\0" L"\x2406" L"\0" L"\x2407" L"\0" L"\x2408" L"\0" + L"\x2409" L"\0" L"\x240a" L"\0" L"\x240b" L"\0" L"\x240c" L"\0" L"\x240d" + L"\0" L"\x240e" L"\0" L"\x240f" L"\0" L"\x2410" L"\0" L"\x2411" L"\0" + L"\x2412" L"\0" L"\x2413" L"\0" L"\x2414" L"\0" L"\x2415" L"\0" L"\x2416" + L"\0" L"\x2417" L"\0" L"\x2418" L"\0" L"\x2419" L"\0" L"\x241a" L"\0" + L"\x241b" L"\0" L"\x241c" L"\0" L"\x241d" L"\0" L"\x241e" L"\0" L"\x241f" + L"\0" L"\x2420" L"\0" L"\x2421" L"\0" L"\x2423" L"\0" L"\x2424" L"\0" + L"\x2460" L"\0" L"\x2461" L"\0" L"\x2462" L"\0" L"\x2463" L"\0" L"\x2464" + L"\0" L"\x2465" L"\0" L"\x2466" L"\0" L"\x2467" L"\0" L"\x2468" L"\0" + L"\x2469" L"\0" L"\x246a" L"\0" L"\x246b" L"\0" L"\x246c" L"\0" L"\x246d" + L"\0" L"\x246e" L"\0" L"\x246f" L"\0" L"\x2470" L"\0" L"\x2471" L"\0" + L"\x2472" L"\0" L"\x2473" L"\0" L"\x2474" L"\0" L"\x2475" L"\0" L"\x2476" + L"\0" L"\x2477" L"\0" L"\x2478" L"\0" L"\x2479" L"\0" L"\x247a" L"\0" + L"\x247b" L"\0" L"\x247c" L"\0" L"\x247d" L"\0" L"\x247e" L"\0" L"\x247f" + L"\0" L"\x2480" L"\0" L"\x2481" L"\0" L"\x2482" L"\0" L"\x2483" L"\0" + L"\x2484" L"\0" L"\x2485" L"\0" L"\x2486" L"\0" L"\x2487" L"\0" L"\x2488" + L"\0" L"\x2489" L"\0" L"\x248a" L"\0" L"\x248b" L"\0" L"\x248c" L"\0" + L"\x248d" L"\0" L"\x248e" L"\0" L"\x248f" L"\0" L"\x2490" L"\0" L"\x2491" + L"\0" L"\x2492" L"\0" L"\x2493" L"\0" L"\x2494" L"\0" L"\x2495" L"\0" + L"\x2496" L"\0" L"\x2497" L"\0" L"\x2498" L"\0" L"\x2499" L"\0" L"\x249a" + L"\0" L"\x249b" L"\0" L"\x249c" L"\0" L"\x249d" L"\0" L"\x249e" L"\0" + L"\x249f" L"\0" L"\x24a0" L"\0" L"\x24a1" L"\0" L"\x24a2" L"\0" L"\x24a3" + L"\0" L"\x24a4" L"\0" L"\x24a5" L"\0" L"\x24a6" L"\0" L"\x24a7" L"\0" + L"\x24a8" L"\0" L"\x24a9" L"\0" L"\x24aa" L"\0" L"\x24ab" L"\0" L"\x24ac" + L"\0" L"\x24ad" L"\0" L"\x24ae" L"\0" L"\x24af" L"\0" L"\x24b0" L"\0" + L"\x24b1" L"\0" L"\x24b2" L"\0" L"\x24b3" L"\0" L"\x24b4" L"\0" L"\x24b5" + L"\0" L"\x24b6" L"\0" L"\x24b7" L"\0" L"\x24b8" L"\0" L"\x24b9" L"\0" + L"\x24ba" L"\0" L"\x24bb" L"\0" L"\x24bc" L"\0" L"\x24bd" L"\0" L"\x24be" + L"\0" L"\x24bf" L"\0" L"\x24c0" L"\0" L"\x24c1" L"\0" L"\x24c2" L"\0" + L"\x24c3" L"\0" L"\x24c4" L"\0" L"\x24c5" L"\0" L"\x24c6" L"\0" L"\x24c7" + L"\0" L"\x24c8" L"\0" L"\x24c9" L"\0" L"\x24ca" L"\0" L"\x24cb" L"\0" + L"\x24cc" L"\0" L"\x24cd" L"\0" L"\x24ce" L"\0" L"\x24cf" L"\0" L"\x24d0" + L"\0" L"\x24d1" L"\0" L"\x24d2" L"\0" L"\x24d3" L"\0" L"\x24d4" L"\0" + L"\x24d5" L"\0" L"\x24d6" L"\0" L"\x24d7" L"\0" L"\x24d8" L"\0" L"\x24d9" + L"\0" L"\x24da" L"\0" L"\x24db" L"\0" L"\x24dc" L"\0" L"\x24dd" L"\0" + L"\x24de" L"\0" L"\x24df" L"\0" L"\x24e0" L"\0" L"\x24e1" L"\0" L"\x24e2" + L"\0" L"\x24e3" L"\0" L"\x24e4" L"\0" L"\x24e5" L"\0" L"\x24e6" L"\0" + L"\x24e7" L"\0" L"\x24e8" L"\0" L"\x24e9" L"\0" L"\x24ea" L"\0" L"\x2500" + L"\0" L"\x2502" L"\0" L"\x250c" L"\0" L"\x2510" L"\0" L"\x2514" L"\0" + L"\x2518" L"\0" L"\x251c" L"\0" L"\x2524" L"\0" L"\x252c" L"\0" L"\x2534" + L"\0" L"\x253c" L"\0" L"\x25e6" L"\0" L"\x2a74" L"\0" L"\x2a75" L"\0" + L"\x2a76" L"\0" L"\x3000" L"\0" L"\x30a0" L"\0" L"\x3251" L"\0" L"\x3252" + L"\0" L"\x3253" L"\0" L"\x3254" L"\0" L"\x3255" L"\0" L"\x3256" L"\0" + L"\x3257" L"\0" L"\x3258" L"\0" L"\x3259" L"\0" L"\x325a" L"\0" L"\x325b" + L"\0" L"\x325c" L"\0" L"\x325d" L"\0" L"\x325e" L"\0" L"\x325f" L"\0" + L"\x32b1" L"\0" L"\x32b2" L"\0" L"\x32b3" L"\0" L"\x32b4" L"\0" L"\x32b5" + L"\0" L"\x32b6" L"\0" L"\x32b7" L"\0" L"\x32b8" L"\0" L"\x32b9" L"\0" + L"\x32ba" L"\0" L"\x32bb" L"\0" L"\x32bc" L"\0" L"\x32bd" L"\0" L"\x32be" + L"\0" L"\x32bf" L"\0" L"\x3371" L"\0" L"\x3372" L"\0" L"\x3373" L"\0" + L"\x3374" L"\0" L"\x3375" L"\0" L"\x3376" L"\0" L"\x3380" L"\0" L"\x3381" + L"\0" L"\x3382" L"\0" L"\x3383" L"\0" L"\x3384" L"\0" L"\x3385" L"\0" + L"\x3386" L"\0" L"\x3387" L"\0" L"\x3388" L"\0" L"\x3389" L"\0" L"\x338a" + L"\0" L"\x338b" L"\0" L"\x338c" L"\0" L"\x338d" L"\0" L"\x338e" L"\0" + L"\x338f" L"\0" L"\x3390" L"\0" L"\x3391" L"\0" L"\x3392" L"\0" L"\x3393" + L"\0" L"\x3394" L"\0" L"\x3395" L"\0" L"\x3396" L"\0" L"\x3397" L"\0" + L"\x3398" L"\0" L"\x3399" L"\0" L"\x339a" L"\0" L"\x339b" L"\0" L"\x339c" + L"\0" L"\x339d" L"\0" L"\x339e" L"\0" L"\x339f" L"\0" L"\x33a0" L"\0" + L"\x33a1" L"\0" L"\x33a2" L"\0" L"\x33a3" L"\0" L"\x33a4" L"\0" L"\x33a5" + L"\0" L"\x33a6" L"\0" L"\x33a7" L"\0" L"\x33a8" L"\0" L"\x33a9" L"\0" + L"\x33aa" L"\0" L"\x33ab" L"\0" L"\x33ac" L"\0" L"\x33ad" L"\0" L"\x33ae" + L"\0" L"\x33af" L"\0" L"\x33b0" L"\0" L"\x33b1" L"\0" L"\x33b2" L"\0" + L"\x33b3" L"\0" L"\x33b4" L"\0" L"\x33b5" L"\0" L"\x33b6" L"\0" L"\x33b7" + L"\0" L"\x33b8" L"\0" L"\x33b9" L"\0" L"\x33ba" L"\0" L"\x33bb" L"\0" + L"\x33bc" L"\0" L"\x33bd" L"\0" L"\x33be" L"\0" L"\x33bf" L"\0" L"\x33c2" + L"\0" L"\x33c3" L"\0" L"\x33c4" L"\0" L"\x33c5" L"\0" L"\x33c6" L"\0" + L"\x33c7" L"\0" L"\x33c8" L"\0" L"\x33c9" L"\0" L"\x33ca" L"\0" L"\x33cb" + L"\0" L"\x33cc" L"\0" L"\x33cd" L"\0" L"\x33ce" L"\0" L"\x33cf" L"\0" + L"\x33d0" L"\0" L"\x33d1" L"\0" L"\x33d2" L"\0" L"\x33d3" L"\0" L"\x33d4" + L"\0" L"\x33d5" L"\0" L"\x33d6" L"\0" L"\x33d7" L"\0" L"\x33d8" L"\0" + L"\x33d9" L"\0" L"\x33da" L"\0" L"\x33db" L"\0" L"\x33dc" L"\0" L"\x33dd" + L"\0" L"\xfb00" L"\0" L"\xfb01" L"\0" L"\xfb02" L"\0" L"\xfb03" L"\0" + L"\xfb04" L"\0" L"\xfb06" L"\0" L"\xfb29" L"\0" L"\xfe00" L"\0" L"\xfe01" + L"\0" L"\xfe02" L"\0" L"\xfe03" L"\0" L"\xfe04" L"\0" L"\xfe05" L"\0" + L"\xfe06" L"\0" L"\xfe07" L"\0" L"\xfe08" L"\0" L"\xfe09" L"\0" L"\xfe0a" + L"\0" L"\xfe0b" L"\0" L"\xfe0c" L"\0" L"\xfe0d" L"\0" L"\xfe0e" L"\0" + L"\xfe0f" L"\0" L"\xfe4d" L"\0" L"\xfe4e" L"\0" L"\xfe4f" L"\0" L"\xfe50" + L"\0" L"\xfe52" L"\0" L"\xfe54" L"\0" L"\xfe55" L"\0" L"\xfe56" L"\0" + L"\xfe57" L"\0" L"\xfe59" L"\0" L"\xfe5a" L"\0" L"\xfe5b" L"\0" L"\xfe5c" + L"\0" L"\xfe5f" L"\0" L"\xfe60" L"\0" L"\xfe61" L"\0" L"\xfe62" L"\0" + L"\xfe63" L"\0" L"\xfe64" L"\0" L"\xfe65" L"\0" L"\xfe66" L"\0" L"\xfe68" + L"\0" L"\xfe69" L"\0" L"\xfe6a" L"\0" L"\xfe6b" L"\0" L"\xfeff" L"\0" + L"\xff01" L"\0" L"\xff02" L"\0" L"\xff03" L"\0" L"\xff04" L"\0" L"\xff05" + L"\0" L"\xff06" L"\0" L"\xff07" L"\0" L"\xff08" L"\0" L"\xff09" L"\0" + L"\xff0a" L"\0" L"\xff0b" L"\0" L"\xff0c" L"\0" L"\xff0d" L"\0" L"\xff0e" + L"\0" L"\xff0f" L"\0" L"\xff10" L"\0" L"\xff11" L"\0" L"\xff12" L"\0" + L"\xff13" L"\0" L"\xff14" L"\0" L"\xff15" L"\0" L"\xff16" L"\0" L"\xff17" + L"\0" L"\xff18" L"\0" L"\xff19" L"\0" L"\xff1a" L"\0" L"\xff1b" L"\0" + L"\xff1c" L"\0" L"\xff1d" L"\0" L"\xff1e" L"\0" L"\xff1f" L"\0" L"\xff20" + L"\0" L"\xff21" L"\0" L"\xff22" L"\0" L"\xff23" L"\0" L"\xff24" L"\0" + L"\xff25" L"\0" L"\xff26" L"\0" L"\xff27" L"\0" L"\xff28" L"\0" L"\xff29" + L"\0" L"\xff2a" L"\0" L"\xff2b" L"\0" L"\xff2c" L"\0" L"\xff2d" L"\0" + L"\xff2e" L"\0" L"\xff2f" L"\0" L"\xff30" L"\0" L"\xff31" L"\0" L"\xff32" + L"\0" L"\xff33" L"\0" L"\xff34" L"\0" L"\xff35" L"\0" L"\xff36" L"\0" + L"\xff37" L"\0" L"\xff38" L"\0" L"\xff39" L"\0" L"\xff3a" L"\0" L"\xff3b" + L"\0" L"\xff3c" L"\0" L"\xff3d" L"\0" L"\xff3e" L"\0" L"\xff3f" L"\0" + L"\xff40" L"\0" L"\xff41" L"\0" L"\xff42" L"\0" L"\xff43" L"\0" L"\xff44" + L"\0" L"\xff45" L"\0" L"\xff46" L"\0" L"\xff47" L"\0" L"\xff48" L"\0" + L"\xff49" L"\0" L"\xff4a" L"\0" L"\xff4b" L"\0" L"\xff4c" L"\0" L"\xff4d" + L"\0" L"\xff4e" L"\0" L"\xff4f" L"\0" L"\xff50" L"\0" L"\xff51" L"\0" + L"\xff52" L"\0" L"\xff53" L"\0" L"\xff54" L"\0" L"\xff55" L"\0" L"\xff56" + L"\0" L"\xff57" L"\0" L"\xff58" L"\0" L"\xff59" L"\0" L"\xff5a" L"\0" + L"\xff5b" L"\0" L"\xff5c" L"\0" L"\xff5d" L"\0" L"\xff5e" L"\0" + L"\x0001d400" L"\0" L"\x0001d401" L"\0" L"\x0001d402" L"\0" L"\x0001d403" + L"\0" L"\x0001d404" L"\0" L"\x0001d405" L"\0" L"\x0001d406" L"\0" + L"\x0001d407" L"\0" L"\x0001d408" L"\0" L"\x0001d409" L"\0" L"\x0001d40a" + L"\0" L"\x0001d40b" L"\0" L"\x0001d40c" L"\0" L"\x0001d40d" L"\0" + L"\x0001d40e" L"\0" L"\x0001d40f" L"\0" L"\x0001d410" L"\0" L"\x0001d411" + L"\0" L"\x0001d412" L"\0" L"\x0001d413" L"\0" L"\x0001d414" L"\0" + L"\x0001d415" L"\0" L"\x0001d416" L"\0" L"\x0001d417" L"\0" L"\x0001d418" + L"\0" L"\x0001d419" L"\0" L"\x0001d41a" L"\0" L"\x0001d41b" L"\0" + L"\x0001d41c" L"\0" L"\x0001d41d" L"\0" L"\x0001d41e" L"\0" L"\x0001d41f" + L"\0" L"\x0001d420" L"\0" L"\x0001d421" L"\0" L"\x0001d422" L"\0" + L"\x0001d423" L"\0" L"\x0001d424" L"\0" L"\x0001d425" L"\0" L"\x0001d426" + L"\0" L"\x0001d427" L"\0" L"\x0001d428" L"\0" L"\x0001d429" L"\0" + L"\x0001d42a" L"\0" L"\x0001d42b" L"\0" L"\x0001d42c" L"\0" L"\x0001d42d" + L"\0" L"\x0001d42e" L"\0" L"\x0001d42f" L"\0" L"\x0001d430" L"\0" + L"\x0001d431" L"\0" L"\x0001d432" L"\0" L"\x0001d433" L"\0" L"\x0001d434" + L"\0" L"\x0001d435" L"\0" L"\x0001d436" L"\0" L"\x0001d437" L"\0" + L"\x0001d438" L"\0" L"\x0001d439" L"\0" L"\x0001d43a" L"\0" L"\x0001d43b" + L"\0" L"\x0001d43c" L"\0" L"\x0001d43d" L"\0" L"\x0001d43e" L"\0" + L"\x0001d43f" L"\0" L"\x0001d440" L"\0" L"\x0001d441" L"\0" L"\x0001d442" + L"\0" L"\x0001d443" L"\0" L"\x0001d444" L"\0" L"\x0001d445" L"\0" + L"\x0001d446" L"\0" L"\x0001d447" L"\0" L"\x0001d448" L"\0" L"\x0001d449" + L"\0" L"\x0001d44a" L"\0" L"\x0001d44b" L"\0" L"\x0001d44c" L"\0" + L"\x0001d44d" L"\0" L"\x0001d44e" L"\0" L"\x0001d44f" L"\0" L"\x0001d450" + L"\0" L"\x0001d451" L"\0" L"\x0001d452" L"\0" L"\x0001d453" L"\0" + L"\x0001d454" L"\0" L"\x0001d456" L"\0" L"\x0001d457" L"\0" L"\x0001d458" + L"\0" L"\x0001d459" L"\0" L"\x0001d45a" L"\0" L"\x0001d45b" L"\0" + L"\x0001d45c" L"\0" L"\x0001d45d" L"\0" L"\x0001d45e" L"\0" L"\x0001d45f" + L"\0" L"\x0001d460" L"\0" L"\x0001d461" L"\0" L"\x0001d462" L"\0" + L"\x0001d463" L"\0" L"\x0001d464" L"\0" L"\x0001d465" L"\0" L"\x0001d466" + L"\0" L"\x0001d467" L"\0" L"\x0001d468" L"\0" L"\x0001d469" L"\0" + L"\x0001d46a" L"\0" L"\x0001d46b" L"\0" L"\x0001d46c" L"\0" L"\x0001d46d" + L"\0" L"\x0001d46e" L"\0" L"\x0001d46f" L"\0" L"\x0001d470" L"\0" + L"\x0001d471" L"\0" L"\x0001d472" L"\0" L"\x0001d473" L"\0" L"\x0001d474" + L"\0" L"\x0001d475" L"\0" L"\x0001d476" L"\0" L"\x0001d477" L"\0" + L"\x0001d478" L"\0" L"\x0001d479" L"\0" L"\x0001d47a" L"\0" L"\x0001d47b" + L"\0" L"\x0001d47c" L"\0" L"\x0001d47d" L"\0" L"\x0001d47e" L"\0" + L"\x0001d47f" L"\0" L"\x0001d480" L"\0" L"\x0001d481" L"\0" L"\x0001d482" + L"\0" L"\x0001d483" L"\0" L"\x0001d484" L"\0" L"\x0001d485" L"\0" + L"\x0001d486" L"\0" L"\x0001d487" L"\0" L"\x0001d488" L"\0" L"\x0001d489" + L"\0" L"\x0001d48a" L"\0" L"\x0001d48b" L"\0" L"\x0001d48c" L"\0" + L"\x0001d48d" L"\0" L"\x0001d48e" L"\0" L"\x0001d48f" L"\0" L"\x0001d490" + L"\0" L"\x0001d491" L"\0" L"\x0001d492" L"\0" L"\x0001d493" L"\0" + L"\x0001d494" L"\0" L"\x0001d495" L"\0" L"\x0001d496" L"\0" L"\x0001d497" + L"\0" L"\x0001d498" L"\0" L"\x0001d499" L"\0" L"\x0001d49a" L"\0" + L"\x0001d49b" L"\0" L"\x0001d49c" L"\0" L"\x0001d49e" L"\0" L"\x0001d49f" + L"\0" L"\x0001d4a2" L"\0" L"\x0001d4a5" L"\0" L"\x0001d4a6" L"\0" + L"\x0001d4a9" L"\0" L"\x0001d4aa" L"\0" L"\x0001d4ab" L"\0" L"\x0001d4ac" + L"\0" L"\x0001d4ae" L"\0" L"\x0001d4af" L"\0" L"\x0001d4b0" L"\0" + L"\x0001d4b1" L"\0" L"\x0001d4b2" L"\0" L"\x0001d4b3" L"\0" L"\x0001d4b4" + L"\0" L"\x0001d4b5" L"\0" L"\x0001d4b6" L"\0" L"\x0001d4b7" L"\0" + L"\x0001d4b8" L"\0" L"\x0001d4b9" L"\0" L"\x0001d4bb" L"\0" L"\x0001d4bd" + L"\0" L"\x0001d4be" L"\0" L"\x0001d4bf" L"\0" L"\x0001d4c0" L"\0" + L"\x0001d4c2" L"\0" L"\x0001d4c3" L"\0" L"\x0001d4c5" L"\0" L"\x0001d4c6" + L"\0" L"\x0001d4c7" L"\0" L"\x0001d4c8" L"\0" L"\x0001d4c9" L"\0" + L"\x0001d4ca" L"\0" L"\x0001d4cb" L"\0" L"\x0001d4cc" L"\0" L"\x0001d4cd" + L"\0" L"\x0001d4ce" L"\0" L"\x0001d4cf" L"\0" L"\x0001d4d0" L"\0" + L"\x0001d4d1" L"\0" L"\x0001d4d2" L"\0" L"\x0001d4d3" L"\0" L"\x0001d4d4" + L"\0" L"\x0001d4d5" L"\0" L"\x0001d4d6" L"\0" L"\x0001d4d7" L"\0" + L"\x0001d4d8" L"\0" L"\x0001d4d9" L"\0" L"\x0001d4da" L"\0" L"\x0001d4db" + L"\0" L"\x0001d4dc" L"\0" L"\x0001d4dd" L"\0" L"\x0001d4de" L"\0" + L"\x0001d4df" L"\0" L"\x0001d4e0" L"\0" L"\x0001d4e1" L"\0" L"\x0001d4e2" + L"\0" L"\x0001d4e3" L"\0" L"\x0001d4e4" L"\0" L"\x0001d4e5" L"\0" + L"\x0001d4e6" L"\0" L"\x0001d4e7" L"\0" L"\x0001d4e8" L"\0" L"\x0001d4e9" + L"\0" L"\x0001d4ea" L"\0" L"\x0001d4eb" L"\0" L"\x0001d4ec" L"\0" + L"\x0001d4ed" L"\0" L"\x0001d4ee" L"\0" L"\x0001d4ef" L"\0" L"\x0001d4f0" + L"\0" L"\x0001d4f1" L"\0" L"\x0001d4f2" L"\0" L"\x0001d4f3" L"\0" + L"\x0001d4f4" L"\0" L"\x0001d4f5" L"\0" L"\x0001d4f6" L"\0" L"\x0001d4f7" + L"\0" L"\x0001d4f8" L"\0" L"\x0001d4f9" L"\0" L"\x0001d4fa" L"\0" + L"\x0001d4fb" L"\0" L"\x0001d4fc" L"\0" L"\x0001d4fd" L"\0" L"\x0001d4fe" + L"\0" L"\x0001d4ff" L"\0" L"\x0001d500" L"\0" L"\x0001d501" L"\0" + L"\x0001d502" L"\0" L"\x0001d503" L"\0" L"\x0001d504" L"\0" L"\x0001d505" + L"\0" L"\x0001d507" L"\0" L"\x0001d508" L"\0" L"\x0001d509" L"\0" + L"\x0001d50a" L"\0" L"\x0001d50d" L"\0" L"\x0001d50e" L"\0" L"\x0001d50f" + L"\0" L"\x0001d510" L"\0" L"\x0001d511" L"\0" L"\x0001d512" L"\0" + L"\x0001d513" L"\0" L"\x0001d514" L"\0" L"\x0001d516" L"\0" L"\x0001d517" + L"\0" L"\x0001d518" L"\0" L"\x0001d519" L"\0" L"\x0001d51a" L"\0" + L"\x0001d51b" L"\0" L"\x0001d51c" L"\0" L"\x0001d51e" L"\0" L"\x0001d51f" + L"\0" L"\x0001d520" L"\0" L"\x0001d521" L"\0" L"\x0001d522" L"\0" + L"\x0001d523" L"\0" L"\x0001d524" L"\0" L"\x0001d525" L"\0" L"\x0001d526" + L"\0" L"\x0001d527" L"\0" L"\x0001d528" L"\0" L"\x0001d529" L"\0" + L"\x0001d52a" L"\0" L"\x0001d52b" L"\0" L"\x0001d52c" L"\0" L"\x0001d52d" + L"\0" L"\x0001d52e" L"\0" L"\x0001d52f" L"\0" L"\x0001d530" L"\0" + L"\x0001d531" L"\0" L"\x0001d532" L"\0" L"\x0001d533" L"\0" L"\x0001d534" + L"\0" L"\x0001d535" L"\0" L"\x0001d536" L"\0" L"\x0001d537" L"\0" + L"\x0001d538" L"\0" L"\x0001d539" L"\0" L"\x0001d53b" L"\0" L"\x0001d53c" + L"\0" L"\x0001d53d" L"\0" L"\x0001d53e" L"\0" L"\x0001d540" L"\0" + L"\x0001d541" L"\0" L"\x0001d542" L"\0" L"\x0001d543" L"\0" L"\x0001d544" + L"\0" L"\x0001d546" L"\0" L"\x0001d54a" L"\0" L"\x0001d54b" L"\0" + L"\x0001d54c" L"\0" L"\x0001d54d" L"\0" L"\x0001d54e" L"\0" L"\x0001d54f" + L"\0" L"\x0001d550" L"\0" L"\x0001d552" L"\0" L"\x0001d553" L"\0" + L"\x0001d554" L"\0" L"\x0001d555" L"\0" L"\x0001d556" L"\0" L"\x0001d557" + L"\0" L"\x0001d558" L"\0" L"\x0001d559" L"\0" L"\x0001d55a" L"\0" + L"\x0001d55b" L"\0" L"\x0001d55c" L"\0" L"\x0001d55d" L"\0" L"\x0001d55e" + L"\0" L"\x0001d55f" L"\0" L"\x0001d560" L"\0" L"\x0001d561" L"\0" + L"\x0001d562" L"\0" L"\x0001d563" L"\0" L"\x0001d564" L"\0" L"\x0001d565" + L"\0" L"\x0001d566" L"\0" L"\x0001d567" L"\0" L"\x0001d568" L"\0" + L"\x0001d569" L"\0" L"\x0001d56a" L"\0" L"\x0001d56b" L"\0" L"\x0001d56c" + L"\0" L"\x0001d56d" L"\0" L"\x0001d56e" L"\0" L"\x0001d56f" L"\0" + L"\x0001d570" L"\0" L"\x0001d571" L"\0" L"\x0001d572" L"\0" L"\x0001d573" + L"\0" L"\x0001d574" L"\0" L"\x0001d575" L"\0" L"\x0001d576" L"\0" + L"\x0001d577" L"\0" L"\x0001d578" L"\0" L"\x0001d579" L"\0" L"\x0001d57a" + L"\0" L"\x0001d57b" L"\0" L"\x0001d57c" L"\0" L"\x0001d57d" L"\0" + L"\x0001d57e" L"\0" L"\x0001d57f" L"\0" L"\x0001d580" L"\0" L"\x0001d581" + L"\0" L"\x0001d582" L"\0" L"\x0001d583" L"\0" L"\x0001d584" L"\0" + L"\x0001d585" L"\0" L"\x0001d586" L"\0" L"\x0001d587" L"\0" L"\x0001d588" + L"\0" L"\x0001d589" L"\0" L"\x0001d58a" L"\0" L"\x0001d58b" L"\0" + L"\x0001d58c" L"\0" L"\x0001d58d" L"\0" L"\x0001d58e" L"\0" L"\x0001d58f" + L"\0" L"\x0001d590" L"\0" L"\x0001d591" L"\0" L"\x0001d592" L"\0" + L"\x0001d593" L"\0" L"\x0001d594" L"\0" L"\x0001d595" L"\0" L"\x0001d596" + L"\0" L"\x0001d597" L"\0" L"\x0001d598" L"\0" L"\x0001d599" L"\0" + L"\x0001d59a" L"\0" L"\x0001d59b" L"\0" L"\x0001d59c" L"\0" L"\x0001d59d" + L"\0" L"\x0001d59e" L"\0" L"\x0001d59f" L"\0" L"\x0001d5a0" L"\0" + L"\x0001d5a1" L"\0" L"\x0001d5a2" L"\0" L"\x0001d5a3" L"\0" L"\x0001d5a4" + L"\0" L"\x0001d5a5" L"\0" L"\x0001d5a6" L"\0" L"\x0001d5a7" L"\0" + L"\x0001d5a8" L"\0" L"\x0001d5a9" L"\0" L"\x0001d5aa" L"\0" L"\x0001d5ab" + L"\0" L"\x0001d5ac" L"\0" L"\x0001d5ad" L"\0" L"\x0001d5ae" L"\0" + L"\x0001d5af" L"\0" L"\x0001d5b0" L"\0" L"\x0001d5b1" L"\0" L"\x0001d5b2" + L"\0" L"\x0001d5b3" L"\0" L"\x0001d5b4" L"\0" L"\x0001d5b5" L"\0" + L"\x0001d5b6" L"\0" L"\x0001d5b7" L"\0" L"\x0001d5b8" L"\0" L"\x0001d5b9" + L"\0" L"\x0001d5ba" L"\0" L"\x0001d5bb" L"\0" L"\x0001d5bc" L"\0" + L"\x0001d5bd" L"\0" L"\x0001d5be" L"\0" L"\x0001d5bf" L"\0" L"\x0001d5c0" + L"\0" L"\x0001d5c1" L"\0" L"\x0001d5c2" L"\0" L"\x0001d5c3" L"\0" + L"\x0001d5c4" L"\0" L"\x0001d5c5" L"\0" L"\x0001d5c6" L"\0" L"\x0001d5c7" + L"\0" L"\x0001d5c8" L"\0" L"\x0001d5c9" L"\0" L"\x0001d5ca" L"\0" + L"\x0001d5cb" L"\0" L"\x0001d5cc" L"\0" L"\x0001d5cd" L"\0" L"\x0001d5ce" + L"\0" L"\x0001d5cf" L"\0" L"\x0001d5d0" L"\0" L"\x0001d5d1" L"\0" + L"\x0001d5d2" L"\0" L"\x0001d5d3" L"\0" L"\x0001d5d4" L"\0" L"\x0001d5d5" + L"\0" L"\x0001d5d6" L"\0" L"\x0001d5d7" L"\0" L"\x0001d5d8" L"\0" + L"\x0001d5d9" L"\0" L"\x0001d5da" L"\0" L"\x0001d5db" L"\0" L"\x0001d5dc" + L"\0" L"\x0001d5dd" L"\0" L"\x0001d5de" L"\0" L"\x0001d5df" L"\0" + L"\x0001d5e0" L"\0" L"\x0001d5e1" L"\0" L"\x0001d5e2" L"\0" L"\x0001d5e3" + L"\0" L"\x0001d5e4" L"\0" L"\x0001d5e5" L"\0" L"\x0001d5e6" L"\0" + L"\x0001d5e7" L"\0" L"\x0001d5e8" L"\0" L"\x0001d5e9" L"\0" L"\x0001d5ea" + L"\0" L"\x0001d5eb" L"\0" L"\x0001d5ec" L"\0" L"\x0001d5ed" L"\0" + L"\x0001d5ee" L"\0" L"\x0001d5ef" L"\0" L"\x0001d5f0" L"\0" L"\x0001d5f1" + L"\0" L"\x0001d5f2" L"\0" L"\x0001d5f3" L"\0" L"\x0001d5f4" L"\0" + L"\x0001d5f5" L"\0" L"\x0001d5f6" L"\0" L"\x0001d5f7" L"\0" L"\x0001d5f8" + L"\0" L"\x0001d5f9" L"\0" L"\x0001d5fa" L"\0" L"\x0001d5fb" L"\0" + L"\x0001d5fc" L"\0" L"\x0001d5fd" L"\0" L"\x0001d5fe" L"\0" L"\x0001d5ff" + L"\0" L"\x0001d600" L"\0" L"\x0001d601" L"\0" L"\x0001d602" L"\0" + L"\x0001d603" L"\0" L"\x0001d604" L"\0" L"\x0001d605" L"\0" L"\x0001d606" + L"\0" L"\x0001d607" L"\0" L"\x0001d608" L"\0" L"\x0001d609" L"\0" + L"\x0001d60a" L"\0" L"\x0001d60b" L"\0" L"\x0001d60c" L"\0" L"\x0001d60d" + L"\0" L"\x0001d60e" L"\0" L"\x0001d60f" L"\0" L"\x0001d610" L"\0" + L"\x0001d611" L"\0" L"\x0001d612" L"\0" L"\x0001d613" L"\0" L"\x0001d614" + L"\0" L"\x0001d615" L"\0" L"\x0001d616" L"\0" L"\x0001d617" L"\0" + L"\x0001d618" L"\0" L"\x0001d619" L"\0" L"\x0001d61a" L"\0" L"\x0001d61b" + L"\0" L"\x0001d61c" L"\0" L"\x0001d61d" L"\0" L"\x0001d61e" L"\0" + L"\x0001d61f" L"\0" L"\x0001d620" L"\0" L"\x0001d621" L"\0" L"\x0001d622" + L"\0" L"\x0001d623" L"\0" L"\x0001d624" L"\0" L"\x0001d625" L"\0" + L"\x0001d626" L"\0" L"\x0001d627" L"\0" L"\x0001d628" L"\0" L"\x0001d629" + L"\0" L"\x0001d62a" L"\0" L"\x0001d62b" L"\0" L"\x0001d62c" L"\0" + L"\x0001d62d" L"\0" L"\x0001d62e" L"\0" L"\x0001d62f" L"\0" L"\x0001d630" + L"\0" L"\x0001d631" L"\0" L"\x0001d632" L"\0" L"\x0001d633" L"\0" + L"\x0001d634" L"\0" L"\x0001d635" L"\0" L"\x0001d636" L"\0" L"\x0001d637" + L"\0" L"\x0001d638" L"\0" L"\x0001d639" L"\0" L"\x0001d63a" L"\0" + L"\x0001d63b" L"\0" L"\x0001d63c" L"\0" L"\x0001d63d" L"\0" L"\x0001d63e" + L"\0" L"\x0001d63f" L"\0" L"\x0001d640" L"\0" L"\x0001d641" L"\0" + L"\x0001d642" L"\0" L"\x0001d643" L"\0" L"\x0001d644" L"\0" L"\x0001d645" + L"\0" L"\x0001d646" L"\0" L"\x0001d647" L"\0" L"\x0001d648" L"\0" + L"\x0001d649" L"\0" L"\x0001d64a" L"\0" L"\x0001d64b" L"\0" L"\x0001d64c" + L"\0" L"\x0001d64d" L"\0" L"\x0001d64e" L"\0" L"\x0001d64f" L"\0" + L"\x0001d650" L"\0" L"\x0001d651" L"\0" L"\x0001d652" L"\0" L"\x0001d653" + L"\0" L"\x0001d654" L"\0" L"\x0001d655" L"\0" L"\x0001d656" L"\0" + L"\x0001d657" L"\0" L"\x0001d658" L"\0" L"\x0001d659" L"\0" L"\x0001d65a" + L"\0" L"\x0001d65b" L"\0" L"\x0001d65c" L"\0" L"\x0001d65d" L"\0" + L"\x0001d65e" L"\0" L"\x0001d65f" L"\0" L"\x0001d660" L"\0" L"\x0001d661" + L"\0" L"\x0001d662" L"\0" L"\x0001d663" L"\0" L"\x0001d664" L"\0" + L"\x0001d665" L"\0" L"\x0001d666" L"\0" L"\x0001d667" L"\0" L"\x0001d668" + L"\0" L"\x0001d669" L"\0" L"\x0001d66a" L"\0" L"\x0001d66b" L"\0" + L"\x0001d66c" L"\0" L"\x0001d66d" L"\0" L"\x0001d66e" L"\0" L"\x0001d66f" + L"\0" L"\x0001d670" L"\0" L"\x0001d671" L"\0" L"\x0001d672" L"\0" + L"\x0001d673" L"\0" L"\x0001d674" L"\0" L"\x0001d675" L"\0" L"\x0001d676" + L"\0" L"\x0001d677" L"\0" L"\x0001d678" L"\0" L"\x0001d679" L"\0" + L"\x0001d67a" L"\0" L"\x0001d67b" L"\0" L"\x0001d67c" L"\0" L"\x0001d67d" + L"\0" L"\x0001d67e" L"\0" L"\x0001d67f" L"\0" L"\x0001d680" L"\0" + L"\x0001d681" L"\0" L"\x0001d682" L"\0" L"\x0001d683" L"\0" L"\x0001d684" + L"\0" L"\x0001d685" L"\0" L"\x0001d686" L"\0" L"\x0001d687" L"\0" + L"\x0001d688" L"\0" L"\x0001d689" L"\0" L"\x0001d68a" L"\0" L"\x0001d68b" + L"\0" L"\x0001d68c" L"\0" L"\x0001d68d" L"\0" L"\x0001d68e" L"\0" + L"\x0001d68f" L"\0" L"\x0001d690" L"\0" L"\x0001d691" L"\0" L"\x0001d692" + L"\0" L"\x0001d693" L"\0" L"\x0001d694" L"\0" L"\x0001d695" L"\0" + L"\x0001d696" L"\0" L"\x0001d697" L"\0" L"\x0001d698" L"\0" L"\x0001d699" + L"\0" L"\x0001d69a" L"\0" L"\x0001d69b" L"\0" L"\x0001d69c" L"\0" + L"\x0001d69d" L"\0" L"\x0001d69e" L"\0" L"\x0001d69f" L"\0" L"\x0001d6a0" + L"\0" L"\x0001d6a1" L"\0" L"\x0001d6a2" L"\0" L"\x0001d6a3" L"\0" + L"\x0001d7ce" L"\0" L"\x0001d7cf" L"\0" L"\x0001d7d0" L"\0" L"\x0001d7d1" + L"\0" L"\x0001d7d2" L"\0" L"\x0001d7d3" L"\0" L"\x0001d7d4" L"\0" + L"\x0001d7d5" L"\0" L"\x0001d7d6" L"\0" L"\x0001d7d7" L"\0" L"\x0001d7d8" + L"\0" L"\x0001d7d9" L"\0" L"\x0001d7da" L"\0" L"\x0001d7db" L"\0" + L"\x0001d7dc" L"\0" L"\x0001d7dd" L"\0" L"\x0001d7de" L"\0" L"\x0001d7df" + L"\0" L"\x0001d7e0" L"\0" L"\x0001d7e1" L"\0" L"\x0001d7e2" L"\0" + L"\x0001d7e3" L"\0" L"\x0001d7e4" L"\0" L"\x0001d7e5" L"\0" L"\x0001d7e6" + L"\0" L"\x0001d7e7" L"\0" L"\x0001d7e8" L"\0" L"\x0001d7e9" L"\0" + L"\x0001d7ea" L"\0" L"\x0001d7eb" L"\0" L"\x0001d7ec" L"\0" L"\x0001d7ed" + L"\0" L"\x0001d7ee" L"\0" L"\x0001d7ef" L"\0" L"\x0001d7f0" L"\0" + L"\x0001d7f1" L"\0" L"\x0001d7f2" L"\0" L"\x0001d7f3" L"\0" L"\x0001d7f4" + L"\0" L"\x0001d7f5" L"\0" L"\x0001d7f6" L"\0" L"\x0001d7f7" L"\0" + L"\x0001d7f8" L"\0" L"\x0001d7f9" L"\0" L"\x0001d7fa" L"\0" L"\x0001d7fb" + L"\0" L"\x0001d7fc" L"\0" L"\x0001d7fd" L"\0" L"\x0001d7fe" L"\0" + L"\x0001d7ff"; +static const uint32_t translit_to_idx[] = +{}; +static const wchar_t translit_to_tbl[] = + L" \0" L"\0" L"(C)\0" L"\0" L"<<\0" L"\0" L"-\0" L"\0" L"(R)\0" L"\0" L"u\0" + L"\0" L",\0" L"\0" L">>\0" L"\0" L" 1/4 \0" L"\0" L" 1/2 \0" L"\0" + L" 3/4 \0" L"\0" L"AE\0" L"\0" L"x\0" L"\0" L"ss\0" L"\0" L"ae\0" L"\0" + L"IJ\0" L"\0" L"ij\0" L"\0" L"'n\0" L"\0" L"OE\0" L"\0" L"oe\0" L"\0" L"s\0" + L"\0" L"LJ\0" L"\0" L"Lj\0" L"\0" L"lj\0" L"\0" L"NJ\0" L"\0" L"Nj\0" L"\0" + L"nj\0" L"\0" L"DZ\0" L"\0" L"Dz\0" L"\0" L"dz\0" L"\0" L"'\0" L"\0" L"^\0" + L"\0" L"'\0" L"\0" L"`\0" L"\0" L"_\0" L"\0" L":\0" L"\0" L"~\0" L"\0" + L" \0" L"\0" L" \0" L"\0" L" \0" L"\0" L" \0" L"\0" L" \0" L"\0" L" \0" + L"\0" L" \0" L"\0" L" \0" L"\0" L"\0" L"\0" L"-\0" L"\0" L"-\0" L"\0" L"-\0" + L"\0" L"-\0" L"\0" L"--\0" L"\0" L"-\0" L"\0" L"'\0" L"\0" L"'\0" L"\0" + L",\0" L"\0" L"'\0" L"\0" L"\"\0" L"\0" L"\"\0" L"\0" L",,\0" L"\0" L"\"\0" + L"\0" L"+\0" L"\0" L"o\0" L"\0" L".\0" L"\0" L"..\0" L"\0" L"...\0" L"\0" + L" \0" L"\0" L"`\0" L"\0" L"``\0" L"\0" L"```\0" L"\0" L"<\0" L"\0" L">\0" + L"\0" L"!!\0" L"\0" L"/\0" L"\0" L"??\0" L"\0" L"?!\0" L"\0" L"!?\0" L"\0" + L" \0" L"\0" L"\0" L"\0" L"\0" L"\0" L"\0" L"\0" L"\0" L"\0" L"C=\0" L"\0" + L"Rs\0" L"\0" L"EUR\0" L"\0" L"INR\0" L"\0" L"a/c\0" L"\0" L"a/s\0" L"\0" + L"C\0" L"\0" L"c/o\0" L"\0" L"c/u\0" L"\0" L"g\0" L"\0" L"H\0" L"\0" L"H\0" + L"\0" L"H\0" L"\0" L"h\0" L"\0" L"I\0" L"\0" L"I\0" L"\0" L"L\0" L"\0" + L"l\0" L"\0" L"N\0" L"\0" L"No\0" L"\0" L"P\0" L"\0" L"Q\0" L"\0" L"R\0" + L"\0" L"R\0" L"\0" L"R\0" L"\0" L"TEL\0" L"\0" L"(TM)\0" L"\0" L"Z\0" L"\0" + L"Ohm\0" L"\0" L"Z\0" L"\0" L"B\0" L"\0" L"C\0" L"\0" L"e\0" L"\0" L"e\0" + L"\0" L"E\0" L"\0" L"F\0" L"\0" L"M\0" L"\0" L"o\0" L"\0" L"i\0" L"\0" + L"D\0" L"\0" L"d\0" L"\0" L"e\0" L"\0" L"i\0" L"\0" L"j\0" L"\0" L" 1/3 \0" + L"\0" L" 2/3 \0" L"\0" L" 1/5 \0" L"\0" L" 2/5 \0" L"\0" L" 3/5 \0" L"\0" + L" 4/5 \0" L"\0" L" 1/6 \0" L"\0" L" 5/6 \0" L"\0" L" 1/8 \0" L"\0" + L" 3/8 \0" L"\0" L" 5/8 \0" L"\0" L" 7/8 \0" L"\0" L" 1/\0" L"\0" L"I\0" + L"\0" L"II\0" L"\0" L"III\0" L"\0" L"IV\0" L"\0" L"V\0" L"\0" L"VI\0" L"\0" + L"VII\0" L"\0" L"VIII\0" L"\0" L"IX\0" L"\0" L"X\0" L"\0" L"XI\0" L"\0" + L"XII\0" L"\0" L"L\0" L"\0" L"C\0" L"\0" L"D\0" L"\0" L"M\0" L"\0" L"i\0" + L"\0" L"ii\0" L"\0" L"iii\0" L"\0" L"iv\0" L"\0" L"v\0" L"\0" L"vi\0" L"\0" + L"vii\0" L"\0" L"viii\0" L"\0" L"ix\0" L"\0" L"x\0" L"\0" L"xi\0" L"\0" + L"xii\0" L"\0" L"l\0" L"\0" L"c\0" L"\0" L"d\0" L"\0" L"m\0" L"\0" L"<-\0" + L"\0" L"->\0" L"\0" L"<->\0" L"\0" L"<=\0" L"\0" L"=>\0" L"\0" L"<=>\0" + L"\0" L"-\0" L"\0" L"/\0" L"\0" L"\\\0" L"\0" L"*\0" L"\0" L"|\0" L"\0" + L":\0" L"\0" L"~\0" L"\0" L"<=\0" L"\0" L">=\0" L"\0" L"<<\0" L"\0" L">>\0" + L"\0" L"<<<\0" L"\0" L">>>\0" L"\0" L"NUL\0" L"\0" L"SOH\0" L"\0" L"STX\0" + L"\0" L"ETX\0" L"\0" L"EOT\0" L"\0" L"ENQ\0" L"\0" L"ACK\0" L"\0" L"BEL\0" + L"\0" L"BS\0" L"\0" L"HT\0" L"\0" L"LF\0" L"\0" L"VT\0" L"\0" L"FF\0" L"\0" + L"CR\0" L"\0" L"SO\0" L"\0" L"SI\0" L"\0" L"DLE\0" L"\0" L"DC1\0" L"\0" + L"DC2\0" L"\0" L"DC3\0" L"\0" L"DC4\0" L"\0" L"NAK\0" L"\0" L"SYN\0" L"\0" + L"ETB\0" L"\0" L"CAN\0" L"\0" L"EM\0" L"\0" L"SUB\0" L"\0" L"ESC\0" L"\0" + L"FS\0" L"\0" L"GS\0" L"\0" L"RS\0" L"\0" L"US\0" L"\0" L"SP\0" L"\0" + L"DEL\0" L"\0" L"_\0" L"\0" L"NL\0" L"\0" L"(1)\0" L"\0" L"(2)\0" L"\0" + L"(3)\0" L"\0" L"(4)\0" L"\0" L"(5)\0" L"\0" L"(6)\0" L"\0" L"(7)\0" L"\0" + L"(8)\0" L"\0" L"(9)\0" L"\0" L"(10)\0" L"\0" L"(11)\0" L"\0" L"(12)\0" + L"\0" L"(13)\0" L"\0" L"(14)\0" L"\0" L"(15)\0" L"\0" L"(16)\0" L"\0" + L"(17)\0" L"\0" L"(18)\0" L"\0" L"(19)\0" L"\0" L"(20)\0" L"\0" L"(1)\0" + L"\0" L"(2)\0" L"\0" L"(3)\0" L"\0" L"(4)\0" L"\0" L"(5)\0" L"\0" L"(6)\0" + L"\0" L"(7)\0" L"\0" L"(8)\0" L"\0" L"(9)\0" L"\0" L"(10)\0" L"\0" L"(11)\0" + L"\0" L"(12)\0" L"\0" L"(13)\0" L"\0" L"(14)\0" L"\0" L"(15)\0" L"\0" + L"(16)\0" L"\0" L"(17)\0" L"\0" L"(18)\0" L"\0" L"(19)\0" L"\0" L"(20)\0" + L"\0" L"1.\0" L"\0" L"2.\0" L"\0" L"3.\0" L"\0" L"4.\0" L"\0" L"5.\0" L"\0" + L"6.\0" L"\0" L"7.\0" L"\0" L"8.\0" L"\0" L"9.\0" L"\0" L"10.\0" L"\0" + L"11.\0" L"\0" L"12.\0" L"\0" L"13.\0" L"\0" L"14.\0" L"\0" L"15.\0" L"\0" + L"16.\0" L"\0" L"17.\0" L"\0" L"18.\0" L"\0" L"19.\0" L"\0" L"20.\0" L"\0" + L"(a)\0" L"\0" L"(b)\0" L"\0" L"(c)\0" L"\0" L"(d)\0" L"\0" L"(e)\0" L"\0" + L"(f)\0" L"\0" L"(g)\0" L"\0" L"(h)\0" L"\0" L"(i)\0" L"\0" L"(j)\0" L"\0" + L"(k)\0" L"\0" L"(l)\0" L"\0" L"(m)\0" L"\0" L"(n)\0" L"\0" L"(o)\0" L"\0" + L"(p)\0" L"\0" L"(q)\0" L"\0" L"(r)\0" L"\0" L"(s)\0" L"\0" L"(t)\0" L"\0" + L"(u)\0" L"\0" L"(v)\0" L"\0" L"(w)\0" L"\0" L"(x)\0" L"\0" L"(y)\0" L"\0" + L"(z)\0" L"\0" L"(A)\0" L"\0" L"(B)\0" L"\0" L"(C)\0" L"\0" L"(D)\0" L"\0" + L"(E)\0" L"\0" L"(F)\0" L"\0" L"(G)\0" L"\0" L"(H)\0" L"\0" L"(I)\0" L"\0" + L"(J)\0" L"\0" L"(K)\0" L"\0" L"(L)\0" L"\0" L"(M)\0" L"\0" L"(N)\0" L"\0" + L"(O)\0" L"\0" L"(P)\0" L"\0" L"(Q)\0" L"\0" L"(R)\0" L"\0" L"(S)\0" L"\0" + L"(T)\0" L"\0" L"(U)\0" L"\0" L"(V)\0" L"\0" L"(W)\0" L"\0" L"(X)\0" L"\0" + L"(Y)\0" L"\0" L"(Z)\0" L"\0" L"(a)\0" L"\0" L"(b)\0" L"\0" L"(c)\0" L"\0" + L"(d)\0" L"\0" L"(e)\0" L"\0" L"(f)\0" L"\0" L"(g)\0" L"\0" L"(h)\0" L"\0" + L"(i)\0" L"\0" L"(j)\0" L"\0" L"(k)\0" L"\0" L"(l)\0" L"\0" L"(m)\0" L"\0" + L"(n)\0" L"\0" L"(o)\0" L"\0" L"(p)\0" L"\0" L"(q)\0" L"\0" L"(r)\0" L"\0" + L"(s)\0" L"\0" L"(t)\0" L"\0" L"(u)\0" L"\0" L"(v)\0" L"\0" L"(w)\0" L"\0" + L"(x)\0" L"\0" L"(y)\0" L"\0" L"(z)\0" L"\0" L"(0)\0" L"\0" L"-\0" L"\0" + L"|\0" L"\0" L"+\0" L"\0" L"+\0" L"\0" L"+\0" L"\0" L"+\0" L"\0" L"+\0" + L"\0" L"+\0" L"\0" L"+\0" L"\0" L"+\0" L"\0" L"+\0" L"\0" L"o\0" L"\0" + L"::=\0" L"\0" L"==\0" L"\0" L"===\0" L"\0" L" \0" L"\0" L"=\0" L"\0" + L"(21)\0" L"\0" L"(22)\0" L"\0" L"(23)\0" L"\0" L"(24)\0" L"\0" L"(25)\0" + L"\0" L"(26)\0" L"\0" L"(27)\0" L"\0" L"(28)\0" L"\0" L"(29)\0" L"\0" + L"(30)\0" L"\0" L"(31)\0" L"\0" L"(32)\0" L"\0" L"(33)\0" L"\0" L"(34)\0" + L"\0" L"(35)\0" L"\0" L"(36)\0" L"\0" L"(37)\0" L"\0" L"(38)\0" L"\0" + L"(39)\0" L"\0" L"(40)\0" L"\0" L"(41)\0" L"\0" L"(42)\0" L"\0" L"(43)\0" + L"\0" L"(44)\0" L"\0" L"(45)\0" L"\0" L"(46)\0" L"\0" L"(47)\0" L"\0" + L"(48)\0" L"\0" L"(49)\0" L"\0" L"(50)\0" L"\0" L"hPa\0" L"\0" L"da\0" L"\0" + L"AU\0" L"\0" L"bar\0" L"\0" L"oV\0" L"\0" L"pc\0" L"\0" L"pA\0" L"\0" + L"nA\0" L"\0" L"uA\0" L"\0" L"mA\0" L"\0" L"kA\0" L"\0" L"KB\0" L"\0" + L"MB\0" L"\0" L"GB\0" L"\0" L"cal\0" L"\0" L"kcal\0" L"\0" L"pF\0" L"\0" + L"nF\0" L"\0" L"uF\0" L"\0" L"ug\0" L"\0" L"mg\0" L"\0" L"kg\0" L"\0" + L"Hz\0" L"\0" L"kHz\0" L"\0" L"MHz\0" L"\0" L"GHz\0" L"\0" L"THz\0" L"\0" + L"ul\0" L"\0" L"ml\0" L"\0" L"dl\0" L"\0" L"kl\0" L"\0" L"fm\0" L"\0" + L"nm\0" L"\0" L"um\0" L"\0" L"mm\0" L"\0" L"cm\0" L"\0" L"km\0" L"\0" + L"mm^2\0" L"\0" L"cm^2\0" L"\0" L"m^2\0" L"\0" L"km^2\0" L"\0" L"mm^3\0" + L"\0" L"cm^3\0" L"\0" L"m^3\0" L"\0" L"km^3\0" L"\0" L"m/s\0" L"\0" + L"m/s^2\0" L"\0" L"Pa\0" L"\0" L"kPa\0" L"\0" L"MPa\0" L"\0" L"GPa\0" L"\0" + L"rad\0" L"\0" L"rad/s\0" L"\0" L"rad/s^2\0" L"\0" L"ps\0" L"\0" L"ns\0" + L"\0" L"us\0" L"\0" L"ms\0" L"\0" L"pV\0" L"\0" L"nV\0" L"\0" L"uV\0" L"\0" + L"mV\0" L"\0" L"kV\0" L"\0" L"MV\0" L"\0" L"pW\0" L"\0" L"nW\0" L"\0" + L"uW\0" L"\0" L"mW\0" L"\0" L"kW\0" L"\0" L"MW\0" L"\0" L"a.m.\0" L"\0" + L"Bq\0" L"\0" L"cc\0" L"\0" L"cd\0" L"\0" L"C/kg\0" L"\0" L"Co.\0" L"\0" + L"dB\0" L"\0" L"Gy\0" L"\0" L"ha\0" L"\0" L"HP\0" L"\0" L"in\0" L"\0" + L"KK\0" L"\0" L"KM\0" L"\0" L"kt\0" L"\0" L"lm\0" L"\0" L"ln\0" L"\0" + L"log\0" L"\0" L"lx\0" L"\0" L"mb\0" L"\0" L"mil\0" L"\0" L"mol\0" L"\0" + L"PH\0" L"\0" L"p.m.\0" L"\0" L"PPM\0" L"\0" L"PR\0" L"\0" L"sr\0" L"\0" + L"Sv\0" L"\0" L"Wb\0" L"\0" L"ff\0" L"\0" L"fi\0" L"\0" L"fl\0" L"\0" + L"ffi\0" L"\0" L"ffl\0" L"\0" L"st\0" L"\0" L"+\0" L"\0" L"\0" L"\0" L"\0" + L"\0" L"\0" L"\0" L"\0" L"\0" L"\0" L"\0" L"\0" L"\0" L"\0" L"\0" L"\0" + L"\0" L"\0" L"\0" L"\0" L"\0" L"\0" L"\0" L"\0" L"\0" L"\0" L"\0" L"\0" + L"\0" L"\0" L"\0" L"\0" L"\0" L"_\0" L"\0" L"_\0" L"\0" L"_\0" L"\0" L",\0" + L"\0" L".\0" L"\0" L";\0" L"\0" L":\0" L"\0" L"?\0" L"\0" L"!\0" L"\0" + L"(\0" L"\0" L")\0" L"\0" L"{\0" L"\0" L"}\0" L"\0" L"#\0" L"\0" L"&\0" + L"\0" L"*\0" L"\0" L"+\0" L"\0" L"-\0" L"\0" L"<\0" L"\0" L">\0" L"\0" + L"=\0" L"\0" L"\\\0" L"\0" L"$\0" L"\0" L"%\0" L"\0" L"@\0" L"\0" L"\0" + L"\0" L"!\0" L"\0" L"\"\0" L"\0" L"#\0" L"\0" L"$\0" L"\0" L"%\0" L"\0" + L"&\0" L"\0" L"'\0" L"\0" L"(\0" L"\0" L")\0" L"\0" L"*\0" L"\0" L"+\0" + L"\0" L",\0" L"\0" L"-\0" L"\0" L".\0" L"\0" L"/\0" L"\0" L"0\0" L"\0" + L"1\0" L"\0" L"2\0" L"\0" L"3\0" L"\0" L"4\0" L"\0" L"5\0" L"\0" L"6\0" + L"\0" L"7\0" L"\0" L"8\0" L"\0" L"9\0" L"\0" L":\0" L"\0" L";\0" L"\0" + L"<\0" L"\0" L"=\0" L"\0" L">\0" L"\0" L"?\0" L"\0" L"@\0" L"\0" L"A\0" + L"\0" L"B\0" L"\0" L"C\0" L"\0" L"D\0" L"\0" L"E\0" L"\0" L"F\0" L"\0" + L"G\0" L"\0" L"H\0" L"\0" L"I\0" L"\0" L"J\0" L"\0" L"K\0" L"\0" L"L\0" + L"\0" L"M\0" L"\0" L"N\0" L"\0" L"O\0" L"\0" L"P\0" L"\0" L"Q\0" L"\0" + L"R\0" L"\0" L"S\0" L"\0" L"T\0" L"\0" L"U\0" L"\0" L"V\0" L"\0" L"W\0" + L"\0" L"X\0" L"\0" L"Y\0" L"\0" L"Z\0" L"\0" L"[\0" L"\0" L"\\\0" L"\0" + L"]\0" L"\0" L"^\0" L"\0" L"_\0" L"\0" L"`\0" L"\0" L"a\0" L"\0" L"b\0" + L"\0" L"c\0" L"\0" L"d\0" L"\0" L"e\0" L"\0" L"f\0" L"\0" L"g\0" L"\0" + L"h\0" L"\0" L"i\0" L"\0" L"j\0" L"\0" L"k\0" L"\0" L"l\0" L"\0" L"m\0" + L"\0" L"n\0" L"\0" L"o\0" L"\0" L"p\0" L"\0" L"q\0" L"\0" L"r\0" L"\0" + L"s\0" L"\0" L"t\0" L"\0" L"u\0" L"\0" L"v\0" L"\0" L"w\0" L"\0" L"x\0" + L"\0" L"y\0" L"\0" L"z\0" L"\0" L"{\0" L"\0" L"|\0" L"\0" L"}\0" L"\0" + L"~\0" L"\0" L"A\0" L"\0" L"B\0" L"\0" L"C\0" L"\0" L"D\0" L"\0" L"E\0" + L"\0" L"F\0" L"\0" L"G\0" L"\0" L"H\0" L"\0" L"I\0" L"\0" L"J\0" L"\0" + L"K\0" L"\0" L"L\0" L"\0" L"M\0" L"\0" L"N\0" L"\0" L"O\0" L"\0" L"P\0" + L"\0" L"Q\0" L"\0" L"R\0" L"\0" L"S\0" L"\0" L"T\0" L"\0" L"U\0" L"\0" + L"V\0" L"\0" L"W\0" L"\0" L"X\0" L"\0" L"Y\0" L"\0" L"Z\0" L"\0" L"a\0" + L"\0" L"b\0" L"\0" L"c\0" L"\0" L"d\0" L"\0" L"e\0" L"\0" L"f\0" L"\0" + L"g\0" L"\0" L"h\0" L"\0" L"i\0" L"\0" L"j\0" L"\0" L"k\0" L"\0" L"l\0" + L"\0" L"m\0" L"\0" L"n\0" L"\0" L"o\0" L"\0" L"p\0" L"\0" L"q\0" L"\0" + L"r\0" L"\0" L"s\0" L"\0" L"t\0" L"\0" L"u\0" L"\0" L"v\0" L"\0" L"w\0" + L"\0" L"x\0" L"\0" L"y\0" L"\0" L"z\0" L"\0" L"A\0" L"\0" L"B\0" L"\0" + L"C\0" L"\0" L"D\0" L"\0" L"E\0" L"\0" L"F\0" L"\0" L"G\0" L"\0" L"H\0" + L"\0" L"I\0" L"\0" L"J\0" L"\0" L"K\0" L"\0" L"L\0" L"\0" L"M\0" L"\0" + L"N\0" L"\0" L"O\0" L"\0" L"P\0" L"\0" L"Q\0" L"\0" L"R\0" L"\0" L"S\0" + L"\0" L"T\0" L"\0" L"U\0" L"\0" L"V\0" L"\0" L"W\0" L"\0" L"X\0" L"\0" + L"Y\0" L"\0" L"Z\0" L"\0" L"a\0" L"\0" L"b\0" L"\0" L"c\0" L"\0" L"d\0" + L"\0" L"e\0" L"\0" L"f\0" L"\0" L"g\0" L"\0" L"i\0" L"\0" L"j\0" L"\0" + L"k\0" L"\0" L"l\0" L"\0" L"m\0" L"\0" L"n\0" L"\0" L"o\0" L"\0" L"p\0" + L"\0" L"q\0" L"\0" L"r\0" L"\0" L"s\0" L"\0" L"t\0" L"\0" L"u\0" L"\0" + L"v\0" L"\0" L"w\0" L"\0" L"x\0" L"\0" L"y\0" L"\0" L"z\0" L"\0" L"A\0" + L"\0" L"B\0" L"\0" L"C\0" L"\0" L"D\0" L"\0" L"E\0" L"\0" L"F\0" L"\0" + L"G\0" L"\0" L"H\0" L"\0" L"I\0" L"\0" L"J\0" L"\0" L"K\0" L"\0" L"L\0" + L"\0" L"M\0" L"\0" L"N\0" L"\0" L"O\0" L"\0" L"P\0" L"\0" L"Q\0" L"\0" + L"R\0" L"\0" L"S\0" L"\0" L"T\0" L"\0" L"U\0" L"\0" L"V\0" L"\0" L"W\0" + L"\0" L"X\0" L"\0" L"Y\0" L"\0" L"Z\0" L"\0" L"a\0" L"\0" L"b\0" L"\0" + L"c\0" L"\0" L"d\0" L"\0" L"e\0" L"\0" L"f\0" L"\0" L"g\0" L"\0" L"h\0" + L"\0" L"i\0" L"\0" L"j\0" L"\0" L"k\0" L"\0" L"l\0" L"\0" L"m\0" L"\0" + L"n\0" L"\0" L"o\0" L"\0" L"p\0" L"\0" L"q\0" L"\0" L"r\0" L"\0" L"s\0" + L"\0" L"t\0" L"\0" L"u\0" L"\0" L"v\0" L"\0" L"w\0" L"\0" L"x\0" L"\0" + L"y\0" L"\0" L"z\0" L"\0" L"A\0" L"\0" L"C\0" L"\0" L"D\0" L"\0" L"G\0" + L"\0" L"J\0" L"\0" L"K\0" L"\0" L"N\0" L"\0" L"O\0" L"\0" L"P\0" L"\0" + L"Q\0" L"\0" L"S\0" L"\0" L"T\0" L"\0" L"U\0" L"\0" L"V\0" L"\0" L"W\0" + L"\0" L"X\0" L"\0" L"Y\0" L"\0" L"Z\0" L"\0" L"a\0" L"\0" L"b\0" L"\0" + L"c\0" L"\0" L"d\0" L"\0" L"f\0" L"\0" L"h\0" L"\0" L"i\0" L"\0" L"j\0" + L"\0" L"k\0" L"\0" L"m\0" L"\0" L"n\0" L"\0" L"p\0" L"\0" L"q\0" L"\0" + L"r\0" L"\0" L"s\0" L"\0" L"t\0" L"\0" L"u\0" L"\0" L"v\0" L"\0" L"w\0" + L"\0" L"x\0" L"\0" L"y\0" L"\0" L"z\0" L"\0" L"A\0" L"\0" L"B\0" L"\0" + L"C\0" L"\0" L"D\0" L"\0" L"E\0" L"\0" L"F\0" L"\0" L"G\0" L"\0" L"H\0" + L"\0" L"I\0" L"\0" L"J\0" L"\0" L"K\0" L"\0" L"L\0" L"\0" L"M\0" L"\0" + L"N\0" L"\0" L"O\0" L"\0" L"P\0" L"\0" L"Q\0" L"\0" L"R\0" L"\0" L"S\0" + L"\0" L"T\0" L"\0" L"U\0" L"\0" L"V\0" L"\0" L"W\0" L"\0" L"X\0" L"\0" + L"Y\0" L"\0" L"Z\0" L"\0" L"a\0" L"\0" L"b\0" L"\0" L"c\0" L"\0" L"d\0" + L"\0" L"e\0" L"\0" L"f\0" L"\0" L"g\0" L"\0" L"h\0" L"\0" L"i\0" L"\0" + L"j\0" L"\0" L"k\0" L"\0" L"l\0" L"\0" L"m\0" L"\0" L"n\0" L"\0" L"o\0" + L"\0" L"p\0" L"\0" L"q\0" L"\0" L"r\0" L"\0" L"s\0" L"\0" L"t\0" L"\0" + L"u\0" L"\0" L"v\0" L"\0" L"w\0" L"\0" L"x\0" L"\0" L"y\0" L"\0" L"z\0" + L"\0" L"A\0" L"\0" L"B\0" L"\0" L"D\0" L"\0" L"E\0" L"\0" L"F\0" L"\0" + L"G\0" L"\0" L"J\0" L"\0" L"K\0" L"\0" L"L\0" L"\0" L"M\0" L"\0" L"N\0" + L"\0" L"O\0" L"\0" L"P\0" L"\0" L"Q\0" L"\0" L"S\0" L"\0" L"T\0" L"\0" + L"U\0" L"\0" L"V\0" L"\0" L"W\0" L"\0" L"X\0" L"\0" L"Y\0" L"\0" L"a\0" + L"\0" L"b\0" L"\0" L"c\0" L"\0" L"d\0" L"\0" L"e\0" L"\0" L"f\0" L"\0" + L"g\0" L"\0" L"h\0" L"\0" L"i\0" L"\0" L"j\0" L"\0" L"k\0" L"\0" L"l\0" + L"\0" L"m\0" L"\0" L"n\0" L"\0" L"o\0" L"\0" L"p\0" L"\0" L"q\0" L"\0" + L"r\0" L"\0" L"s\0" L"\0" L"t\0" L"\0" L"u\0" L"\0" L"v\0" L"\0" L"w\0" + L"\0" L"x\0" L"\0" L"y\0" L"\0" L"z\0" L"\0" L"A\0" L"\0" L"B\0" L"\0" + L"D\0" L"\0" L"E\0" L"\0" L"F\0" L"\0" L"G\0" L"\0" L"I\0" L"\0" L"J\0" + L"\0" L"K\0" L"\0" L"L\0" L"\0" L"M\0" L"\0" L"O\0" L"\0" L"S\0" L"\0" + L"T\0" L"\0" L"U\0" L"\0" L"V\0" L"\0" L"W\0" L"\0" L"X\0" L"\0" L"Y\0" + L"\0" L"a\0" L"\0" L"b\0" L"\0" L"c\0" L"\0" L"d\0" L"\0" L"e\0" L"\0" + L"f\0" L"\0" L"g\0" L"\0" L"h\0" L"\0" L"i\0" L"\0" L"j\0" L"\0" L"k\0" + L"\0" L"l\0" L"\0" L"m\0" L"\0" L"n\0" L"\0" L"o\0" L"\0" L"p\0" L"\0" + L"q\0" L"\0" L"r\0" L"\0" L"s\0" L"\0" L"t\0" L"\0" L"u\0" L"\0" L"v\0" + L"\0" L"w\0" L"\0" L"x\0" L"\0" L"y\0" L"\0" L"z\0" L"\0" L"A\0" L"\0" + L"B\0" L"\0" L"C\0" L"\0" L"D\0" L"\0" L"E\0" L"\0" L"F\0" L"\0" L"G\0" + L"\0" L"H\0" L"\0" L"I\0" L"\0" L"J\0" L"\0" L"K\0" L"\0" L"L\0" L"\0" + L"M\0" L"\0" L"N\0" L"\0" L"O\0" L"\0" L"P\0" L"\0" L"Q\0" L"\0" L"R\0" + L"\0" L"S\0" L"\0" L"T\0" L"\0" L"U\0" L"\0" L"V\0" L"\0" L"W\0" L"\0" + L"X\0" L"\0" L"Y\0" L"\0" L"Z\0" L"\0" L"a\0" L"\0" L"b\0" L"\0" L"c\0" + L"\0" L"d\0" L"\0" L"e\0" L"\0" L"f\0" L"\0" L"g\0" L"\0" L"h\0" L"\0" + L"i\0" L"\0" L"j\0" L"\0" L"k\0" L"\0" L"l\0" L"\0" L"m\0" L"\0" L"n\0" + L"\0" L"o\0" L"\0" L"p\0" L"\0" L"q\0" L"\0" L"r\0" L"\0" L"s\0" L"\0" + L"t\0" L"\0" L"u\0" L"\0" L"v\0" L"\0" L"w\0" L"\0" L"x\0" L"\0" L"y\0" + L"\0" L"z\0" L"\0" L"A\0" L"\0" L"B\0" L"\0" L"C\0" L"\0" L"D\0" L"\0" + L"E\0" L"\0" L"F\0" L"\0" L"G\0" L"\0" L"H\0" L"\0" L"I\0" L"\0" L"J\0" + L"\0" L"K\0" L"\0" L"L\0" L"\0" L"M\0" L"\0" L"N\0" L"\0" L"O\0" L"\0" + L"P\0" L"\0" L"Q\0" L"\0" L"R\0" L"\0" L"S\0" L"\0" L"T\0" L"\0" L"U\0" + L"\0" L"V\0" L"\0" L"W\0" L"\0" L"X\0" L"\0" L"Y\0" L"\0" L"Z\0" L"\0" + L"a\0" L"\0" L"b\0" L"\0" L"c\0" L"\0" L"d\0" L"\0" L"e\0" L"\0" L"f\0" + L"\0" L"g\0" L"\0" L"h\0" L"\0" L"i\0" L"\0" L"j\0" L"\0" L"k\0" L"\0" + L"l\0" L"\0" L"m\0" L"\0" L"n\0" L"\0" L"o\0" L"\0" L"p\0" L"\0" L"q\0" + L"\0" L"r\0" L"\0" L"s\0" L"\0" L"t\0" L"\0" L"u\0" L"\0" L"v\0" L"\0" + L"w\0" L"\0" L"x\0" L"\0" L"y\0" L"\0" L"z\0" L"\0" L"A\0" L"\0" L"B\0" + L"\0" L"C\0" L"\0" L"D\0" L"\0" L"E\0" L"\0" L"F\0" L"\0" L"G\0" L"\0" + L"H\0" L"\0" L"I\0" L"\0" L"J\0" L"\0" L"K\0" L"\0" L"L\0" L"\0" L"M\0" + L"\0" L"N\0" L"\0" L"O\0" L"\0" L"P\0" L"\0" L"Q\0" L"\0" L"R\0" L"\0" + L"S\0" L"\0" L"T\0" L"\0" L"U\0" L"\0" L"V\0" L"\0" L"W\0" L"\0" L"X\0" + L"\0" L"Y\0" L"\0" L"Z\0" L"\0" L"a\0" L"\0" L"b\0" L"\0" L"c\0" L"\0" + L"d\0" L"\0" L"e\0" L"\0" L"f\0" L"\0" L"g\0" L"\0" L"h\0" L"\0" L"i\0" + L"\0" L"j\0" L"\0" L"k\0" L"\0" L"l\0" L"\0" L"m\0" L"\0" L"n\0" L"\0" + L"o\0" L"\0" L"p\0" L"\0" L"q\0" L"\0" L"r\0" L"\0" L"s\0" L"\0" L"t\0" + L"\0" L"u\0" L"\0" L"v\0" L"\0" L"w\0" L"\0" L"x\0" L"\0" L"y\0" L"\0" + L"z\0" L"\0" L"A\0" L"\0" L"B\0" L"\0" L"C\0" L"\0" L"D\0" L"\0" L"E\0" + L"\0" L"F\0" L"\0" L"G\0" L"\0" L"H\0" L"\0" L"I\0" L"\0" L"J\0" L"\0" + L"K\0" L"\0" L"L\0" L"\0" L"M\0" L"\0" L"N\0" L"\0" L"O\0" L"\0" L"P\0" + L"\0" L"Q\0" L"\0" L"R\0" L"\0" L"S\0" L"\0" L"T\0" L"\0" L"U\0" L"\0" + L"V\0" L"\0" L"W\0" L"\0" L"X\0" L"\0" L"Y\0" L"\0" L"Z\0" L"\0" L"a\0" + L"\0" L"b\0" L"\0" L"c\0" L"\0" L"d\0" L"\0" L"e\0" L"\0" L"f\0" L"\0" + L"g\0" L"\0" L"h\0" L"\0" L"i\0" L"\0" L"j\0" L"\0" L"k\0" L"\0" L"l\0" + L"\0" L"m\0" L"\0" L"n\0" L"\0" L"o\0" L"\0" L"p\0" L"\0" L"q\0" L"\0" + L"r\0" L"\0" L"s\0" L"\0" L"t\0" L"\0" L"u\0" L"\0" L"v\0" L"\0" L"w\0" + L"\0" L"x\0" L"\0" L"y\0" L"\0" L"z\0" L"\0" L"A\0" L"\0" L"B\0" L"\0" + L"C\0" L"\0" L"D\0" L"\0" L"E\0" L"\0" L"F\0" L"\0" L"G\0" L"\0" L"H\0" + L"\0" L"I\0" L"\0" L"J\0" L"\0" L"K\0" L"\0" L"L\0" L"\0" L"M\0" L"\0" + L"N\0" L"\0" L"O\0" L"\0" L"P\0" L"\0" L"Q\0" L"\0" L"R\0" L"\0" L"S\0" + L"\0" L"T\0" L"\0" L"U\0" L"\0" L"V\0" L"\0" L"W\0" L"\0" L"X\0" L"\0" + L"Y\0" L"\0" L"Z\0" L"\0" L"a\0" L"\0" L"b\0" L"\0" L"c\0" L"\0" L"d\0" + L"\0" L"e\0" L"\0" L"f\0" L"\0" L"g\0" L"\0" L"h\0" L"\0" L"i\0" L"\0" + L"j\0" L"\0" L"k\0" L"\0" L"l\0" L"\0" L"m\0" L"\0" L"n\0" L"\0" L"o\0" + L"\0" L"p\0" L"\0" L"q\0" L"\0" L"r\0" L"\0" L"s\0" L"\0" L"t\0" L"\0" + L"u\0" L"\0" L"v\0" L"\0" L"w\0" L"\0" L"x\0" L"\0" L"y\0" L"\0" L"z\0" + L"\0" L"A\0" L"\0" L"B\0" L"\0" L"C\0" L"\0" L"D\0" L"\0" L"E\0" L"\0" + L"F\0" L"\0" L"G\0" L"\0" L"H\0" L"\0" L"I\0" L"\0" L"J\0" L"\0" L"K\0" + L"\0" L"L\0" L"\0" L"M\0" L"\0" L"N\0" L"\0" L"O\0" L"\0" L"P\0" L"\0" + L"Q\0" L"\0" L"R\0" L"\0" L"S\0" L"\0" L"T\0" L"\0" L"U\0" L"\0" L"V\0" + L"\0" L"W\0" L"\0" L"X\0" L"\0" L"Y\0" L"\0" L"Z\0" L"\0" L"a\0" L"\0" + L"b\0" L"\0" L"c\0" L"\0" L"d\0" L"\0" L"e\0" L"\0" L"f\0" L"\0" L"g\0" + L"\0" L"h\0" L"\0" L"i\0" L"\0" L"j\0" L"\0" L"k\0" L"\0" L"l\0" L"\0" + L"m\0" L"\0" L"n\0" L"\0" L"o\0" L"\0" L"p\0" L"\0" L"q\0" L"\0" L"r\0" + L"\0" L"s\0" L"\0" L"t\0" L"\0" L"u\0" L"\0" L"v\0" L"\0" L"w\0" L"\0" + L"x\0" L"\0" L"y\0" L"\0" L"z\0" L"\0" L"0\0" L"\0" L"1\0" L"\0" L"2\0" + L"\0" L"3\0" L"\0" L"4\0" L"\0" L"5\0" L"\0" L"6\0" L"\0" L"7\0" L"\0" + L"8\0" L"\0" L"9\0" L"\0" L"0\0" L"\0" L"1\0" L"\0" L"2\0" L"\0" L"3\0" + L"\0" L"4\0" L"\0" L"5\0" L"\0" L"6\0" L"\0" L"7\0" L"\0" L"8\0" L"\0" + L"9\0" L"\0" L"0\0" L"\0" L"1\0" L"\0" L"2\0" L"\0" L"3\0" L"\0" L"4\0" + L"\0" L"5\0" L"\0" L"6\0" L"\0" L"7\0" L"\0" L"8\0" L"\0" L"9\0" L"\0" + L"0\0" L"\0" L"1\0" L"\0" L"2\0" L"\0" L"3\0" L"\0" L"4\0" L"\0" L"5\0" + L"\0" L"6\0" L"\0" L"7\0" L"\0" L"8\0" L"\0" L"9\0" L"\0" L"0\0" L"\0" + L"1\0" L"\0" L"2\0" L"\0" L"3\0" L"\0" L"4\0" L"\0" L"5\0" L"\0" L"6\0" + L"\0" L"7\0" L"\0" L"8\0" L"\0" L"9\0"; diff --git a/REORG.TODO/locale/C-translit.h.in b/REORG.TODO/locale/C-translit.h.in new file mode 100644 index 0000000000..591e81de86 --- /dev/null +++ b/REORG.TODO/locale/C-translit.h.in @@ -0,0 +1,1374 @@ +/* Transliteration for the C locale. -*-C-*- + Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@redhat.com>, 2000. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* The entries here have to be sorted relative to the input string. */ + +"\x00a0" " " /* <U00A0> NO-BREAK SPACE */ +"\x00a9" "(C)" /* <U00A9> COPYRIGHT SIGN */ +"\x00ab" "<<" /* <U00AB> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK */ +"\x00ad" "-" /* <U00AD> SOFT HYPHEN */ +"\x00ae" "(R)" /* <U00AE> REGISTERED SIGN */ +"\x00b5" "u" /* <U00B5> MICRO SIGN */ +"\x00b8" "," /* <U00B8> CEDILLA */ +"\x00bb" ">>" /* <U00BB> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK */ +"\x00bc" " 1/4 " /* <U00BC> VULGAR FRACTION ONE QUARTER */ +"\x00bd" " 1/2 " /* <U00BD> VULGAR FRACTION ONE HALF */ +"\x00be" " 3/4 " /* <U00BE> VULGAR FRACTION THREE QUARTERS */ +"\x00c6" "AE" /* <U00C6> LATIN CAPITAL LETTER AE */ +"\x00d7" "x" /* <U00D7> MULTIPLICATION SIGN */ +"\x00df" "ss" /* <U00DF> LATIN SMALL LETTER SHARP S */ +"\x00e6" "ae" /* <U00E6> LATIN SMALL LETTER AE */ +"\x0132" "IJ" /* <U0132> LATIN CAPITAL LIGATURE IJ */ +"\x0133" "ij" /* <U0133> LATIN SMALL LIGATURE IJ */ +"\x0149" "'n" /* <U0149> LATIN SMALL LETTER N PRECEDED BY APOSTROPHE */ +"\x0152" "OE" /* <U0152> LATIN CAPITAL LIGATURE OE */ +"\x0153" "oe" /* <U0153> LATIN SMALL LIGATURE OE */ +"\x017f" "s" /* <U017F> LATIN SMALL LETTER LONG S */ +"\x01c7" "LJ" /* <U01C7> LATIN CAPITAL LETTER LJ */ +"\x01c8" "Lj" /* <U01C8> LATIN CAPITAL LETTER L WITH SMALL LETTER J */ +"\x01c9" "lj" /* <U01C9> LATIN SMALL LETTER LJ */ +"\x01ca" "NJ" /* <U01CA> LATIN CAPITAL LETTER NJ */ +"\x01cb" "Nj" /* <U01CB> LATIN CAPITAL LETTER N WITH SMALL LETTER J */ +"\x01cc" "nj" /* <U01CC> LATIN SMALL LETTER NJ */ +"\x01f1" "DZ" /* <U01F1> LATIN CAPITAL LETTER DZ */ +"\x01f2" "Dz" /* <U01F2> LATIN CAPITAL LETTER D WITH SMALL LETTER Z */ +"\x01f3" "dz" /* <U01F3> LATIN SMALL LETTER DZ */ +"\x02bc" "'" /* <U02BC> MODIFIER LETTER APOSTROPHE */ +"\x02c6" "^" /* <U02C6> MODIFIER LETTER CIRCUMFLEX ACCENT */ +"\x02c8" "'" /* <U02C8> MODIFIER LETTER VERTICAL LINE */ +"\x02cb" "`" /* <U02CB> MODIFIER LETTER GRAVE ACCENT */ +"\x02cd" "_" /* <U02CD> MODIFIER LETTER LOW MACRON */ +"\x02d0" ":" /* <U02D0> MODIFIER LETTER TRIANGULAR COLON */ +"\x02dc" "~" /* <U02DC> SMALL TILDE */ +"\x2002" " " /* <U2002> EN SPACE */ +"\x2003" " " /* <U2003> EM SPACE */ +"\x2004" " " /* <U2004> THREE-PER-EM SPACE */ +"\x2005" " " /* <U2005> FOUR-PER-EM SPACE */ +"\x2006" " " /* <U2006> SIX-PER-EM SPACE */ +"\x2008" " " /* <U2008> PUNCTUATION SPACE */ +"\x2009" " " /* <U2009> THIN SPACE */ +"\x200a" " " /* <U200A> HAIR SPACE */ +"\x200b" "" /* <U200B> ZERO WIDTH SPACE */ +"\x2010" "-" /* <U2010> HYPHEN */ +"\x2011" "-" /* <U2011> NON-BREAKING HYPHEN */ +"\x2012" "-" /* <U2012> FIGURE DASH */ +"\x2013" "-" /* <U2013> EN DASH */ +"\x2014" "--" /* <U2014> EM DASH */ +"\x2015" "-" /* <U2015> HORIZONTAL BAR */ +"\x2018" "'" /* <U2018> LEFT SINGLE QUOTATION MARK */ +"\x2019" "'" /* <U2019> RIGHT SINGLE QUOTATION MARK */ +"\x201a" "," /* <U201A> SINGLE LOW-9 QUOTATION MARK */ +"\x201b" "'" /* <U201B> SINGLE HIGH-REVERSED-9 QUOTATION MARK */ +"\x201c" "\"" /* <U201C> LEFT DOUBLE QUOTATION MARK */ +"\x201d" "\"" /* <U201D> RIGHT DOUBLE QUOTATION MARK */ +"\x201e" ",," /* <U201E> DOUBLE LOW-9 QUOTATION MARK */ +"\x201f" "\"" /* <U201F> DOUBLE HIGH-REVERSED-9 QUOTATION MARK */ +"\x2020" "+" /* <U2020> DAGGER */ +"\x2022" "o" /* <U2022> BULLET */ +"\x2024" "." /* <U2024> ONE DOT LEADER */ +"\x2025" ".." /* <U2025> TWO DOT LEADER */ +"\x2026" "..." /* <U2026> HORIZONTAL ELLIPSIS */ +"\x202f" " " /* <U202F> NARROW NO-BREAK SPACE */ +"\x2035" "`" /* <U2035> REVERSED PRIME */ +"\x2036" "``" /* <U2036> REVERSED DOUBLE PRIME */ +"\x2037" "```" /* <U2037> REVERSED TRIPLE PRIME */ +"\x2039" "<" /* <U2039> SINGLE LEFT-POINTING ANGLE QUOTATION MARK */ +"\x203a" ">" /* <U203A> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK */ +"\x203c" "!!" /* <U203C> DOUBLE EXCLAMATION MARK */ +"\x2044" "/" /* <U2044> FRACTION SLASH */ +"\x2047" "??" /* <U2047> DOUBLE QUESTION MARK */ +"\x2048" "?!" /* <U2048> QUESTION EXCLAMATION MARK */ +"\x2049" "!?" /* <U2049> EXCLAMATION QUESTION MARK */ +"\x205f" " " /* <U205F> MEDIUM MATHEMATICAL SPACE */ +"\x2060" "" /* <U2060> WORD JOINER */ +"\x2061" "" /* <U2061> FUNCTION APPLICATION */ +"\x2062" "" /* <U2062> INVISIBLE TIMES */ +"\x2063" "" /* <U2063> INVISIBLE SEPARATOR */ +"\x20a1" "C=" /* <U20A1> COLON SIGN */ +"\x20a8" "Rs" /* <U20A8> RUPEE SIGN */ +"\x20ac" "EUR" /* <U20AC> EURO SIGN */ +"\x20b9" "INR" /* <U20B9> INDIAN RUPEE SIGN */ +"\x2100" "a/c" /* <U2100> ACCOUNT OF */ +"\x2101" "a/s" /* <U2101> ADDRESSED TO THE SUBJECT */ +"\x2102" "C" /* <U2102> DOUBLE-STRUCK CAPITAL C */ +"\x2105" "c/o" /* <U2105> CARE OF */ +"\x2106" "c/u" /* <U2106> CADA UNA */ +"\x210a" "g" /* <U210A> SCRIPT SMALL G */ +"\x210b" "H" /* <U210B> SCRIPT CAPITAL H */ +"\x210c" "H" /* <U210C> BLACK-LETTER CAPITAL H */ +"\x210d" "H" /* <U210D> DOUBLE-STRUCK CAPITAL H */ +"\x210e" "h" /* <U210E> PLANCK CONSTANT */ +"\x2110" "I" /* <U2110> SCRIPT CAPITAL I */ +"\x2111" "I" /* <U2111> BLACK-LETTER CAPITAL I */ +"\x2112" "L" /* <U2112> SCRIPT CAPITAL L */ +"\x2113" "l" /* <U2113> SCRIPT SMALL L */ +"\x2115" "N" /* <U2115> DOUBLE-STRUCK CAPITAL N */ +"\x2116" "No" /* <U2116> NUMERO SIGN */ +"\x2119" "P" /* <U2119> DOUBLE-STRUCK CAPITAL P */ +"\x211a" "Q" /* <U211A> DOUBLE-STRUCK CAPITAL Q */ +"\x211b" "R" /* <U211B> SCRIPT CAPITAL R */ +"\x211c" "R" /* <U211C> BLACK-LETTER CAPITAL R */ +"\x211d" "R" /* <U211D> DOUBLE-STRUCK CAPITAL R */ +"\x2121" "TEL" /* <U2121> TELEPHONE SIGN */ +"\x2122" "(TM)" /* <U2122> TRADE MARK SIGN */ +"\x2124" "Z" /* <U2124> DOUBLE-STRUCK CAPITAL Z */ +"\x2126" "Ohm" /* <U2126> OHM SIGN */ +"\x2128" "Z" /* <U2128> BLACK-LETTER CAPITAL Z */ +"\x212c" "B" /* <U212C> SCRIPT CAPITAL B */ +"\x212d" "C" /* <U212D> BLACK-LETTER CAPITAL C */ +"\x212e" "e" /* <U212E> ESTIMATED SYMBOL */ +"\x212f" "e" /* <U212F> SCRIPT SMALL E */ +"\x2130" "E" /* <U2130> SCRIPT CAPITAL E */ +"\x2131" "F" /* <U2131> SCRIPT CAPITAL F */ +"\x2133" "M" /* <U2133> SCRIPT CAPITAL M */ +"\x2134" "o" /* <U2134> SCRIPT SMALL O */ +"\x2139" "i" /* <U2139> INFORMATION SOURCE */ +"\x2145" "D" /* <U2145> DOUBLE-STRUCK ITALIC CAPITAL D */ +"\x2146" "d" /* <U2146> DOUBLE-STRUCK ITALIC SMALL D */ +"\x2147" "e" /* <U2147> DOUBLE-STRUCK ITALIC SMALL E */ +"\x2148" "i" /* <U2148> DOUBLE-STRUCK ITALIC SMALL I */ +"\x2149" "j" /* <U2149> DOUBLE-STRUCK ITALIC SMALL J */ +"\x2153" " 1/3 " /* <U2153> VULGAR FRACTION ONE THIRD */ +"\x2154" " 2/3 " /* <U2154> VULGAR FRACTION TWO THIRDS */ +"\x2155" " 1/5 " /* <U2155> VULGAR FRACTION ONE FIFTH */ +"\x2156" " 2/5 " /* <U2156> VULGAR FRACTION TWO FIFTHS */ +"\x2157" " 3/5 " /* <U2157> VULGAR FRACTION THREE FIFTHS */ +"\x2158" " 4/5 " /* <U2158> VULGAR FRACTION FOUR FIFTHS */ +"\x2159" " 1/6 " /* <U2159> VULGAR FRACTION ONE SIXTH */ +"\x215a" " 5/6 " /* <U215A> VULGAR FRACTION FIVE SIXTHS */ +"\x215b" " 1/8 " /* <U215B> VULGAR FRACTION ONE EIGHTH */ +"\x215c" " 3/8 " /* <U215C> VULGAR FRACTION THREE EIGHTHS */ +"\x215d" " 5/8 " /* <U215D> VULGAR FRACTION FIVE EIGHTHS */ +"\x215e" " 7/8 " /* <U215E> VULGAR FRACTION SEVEN EIGHTHS */ +"\x215f" " 1/" /* <U215F> FRACTION NUMERATOR ONE */ +"\x2160" "I" /* <U2160> ROMAN NUMERAL ONE */ +"\x2161" "II" /* <U2161> ROMAN NUMERAL TWO */ +"\x2162" "III" /* <U2162> ROMAN NUMERAL THREE */ +"\x2163" "IV" /* <U2163> ROMAN NUMERAL FOUR */ +"\x2164" "V" /* <U2164> ROMAN NUMERAL FIVE */ +"\x2165" "VI" /* <U2165> ROMAN NUMERAL SIX */ +"\x2166" "VII" /* <U2166> ROMAN NUMERAL SEVEN */ +"\x2167" "VIII" /* <U2167> ROMAN NUMERAL EIGHT */ +"\x2168" "IX" /* <U2168> ROMAN NUMERAL NINE */ +"\x2169" "X" /* <U2169> ROMAN NUMERAL TEN */ +"\x216a" "XI" /* <U216A> ROMAN NUMERAL ELEVEN */ +"\x216b" "XII" /* <U216B> ROMAN NUMERAL TWELVE */ +"\x216c" "L" /* <U216C> ROMAN NUMERAL FIFTY */ +"\x216d" "C" /* <U216D> ROMAN NUMERAL ONE HUNDRED */ +"\x216e" "D" /* <U216E> ROMAN NUMERAL FIVE HUNDRED */ +"\x216f" "M" /* <U216F> ROMAN NUMERAL ONE THOUSAND */ +"\x2170" "i" /* <U2170> SMALL ROMAN NUMERAL ONE */ +"\x2171" "ii" /* <U2171> SMALL ROMAN NUMERAL TWO */ +"\x2172" "iii" /* <U2172> SMALL ROMAN NUMERAL THREE */ +"\x2173" "iv" /* <U2173> SMALL ROMAN NUMERAL FOUR */ +"\x2174" "v" /* <U2174> SMALL ROMAN NUMERAL FIVE */ +"\x2175" "vi" /* <U2175> SMALL ROMAN NUMERAL SIX */ +"\x2176" "vii" /* <U2176> SMALL ROMAN NUMERAL SEVEN */ +"\x2177" "viii" /* <U2177> SMALL ROMAN NUMERAL EIGHT */ +"\x2178" "ix" /* <U2178> SMALL ROMAN NUMERAL NINE */ +"\x2179" "x" /* <U2179> SMALL ROMAN NUMERAL TEN */ +"\x217a" "xi" /* <U217A> SMALL ROMAN NUMERAL ELEVEN */ +"\x217b" "xii" /* <U217B> SMALL ROMAN NUMERAL TWELVE */ +"\x217c" "l" /* <U217C> SMALL ROMAN NUMERAL FIFTY */ +"\x217d" "c" /* <U217D> SMALL ROMAN NUMERAL ONE HUNDRED */ +"\x217e" "d" /* <U217E> SMALL ROMAN NUMERAL FIVE HUNDRED */ +"\x217f" "m" /* <U217F> SMALL ROMAN NUMERAL ONE THOUSAND */ +"\x2190" "<-" /* <U2190> LEFTWARDS ARROW */ +"\x2192" "->" /* <U2192> RIGHTWARDS ARROW */ +"\x2194" "<->" /* <U2194> LEFT RIGHT ARROW */ +"\x21d0" "<=" /* <U21D0> LEFTWARDS DOUBLE ARROW */ +"\x21d2" "=>" /* <U21D2> RIGHTWARDS DOUBLE ARROW */ +"\x21d4" "<=>" /* <U21D4> LEFT RIGHT DOUBLE ARROW */ +"\x2212" "-" /* <U2212> MINUS SIGN */ +"\x2215" "/" /* <U2215> DIVISION SLASH */ +"\x2216" "\\" /* <U2216> SET MINUS */ +"\x2217" "*" /* <U2217> ASTERISK OPERATOR */ +"\x2223" "|" /* <U2223> DIVIDES */ +"\x2236" ":" /* <U2236> RATIO */ +"\x223c" "~" /* <U223C> TILDE OPERATOR */ +"\x2264" "<=" /* <U2264> LESS-THAN OR EQUAL TO */ +"\x2265" ">=" /* <U2265> GREATER-THAN OR EQUAL TO */ +"\x226a" "<<" /* <U226A> MUCH LESS-THAN */ +"\x226b" ">>" /* <U226B> MUCH GREATER-THAN */ +"\x22d8" "<<<" /* <U22D8> VERY MUCH LESS-THAN */ +"\x22d9" ">>>" /* <U22D9> VERY MUCH GREATER-THAN */ +"\x2400" "NUL" /* <U2400> SYMBOL FOR NULL */ +"\x2401" "SOH" /* <U2401> SYMBOL FOR START OF HEADING */ +"\x2402" "STX" /* <U2402> SYMBOL FOR START OF TEXT */ +"\x2403" "ETX" /* <U2403> SYMBOL FOR END OF TEXT */ +"\x2404" "EOT" /* <U2404> SYMBOL FOR END OF TRANSMISSION */ +"\x2405" "ENQ" /* <U2405> SYMBOL FOR ENQUIRY */ +"\x2406" "ACK" /* <U2406> SYMBOL FOR ACKNOWLEDGE */ +"\x2407" "BEL" /* <U2407> SYMBOL FOR BELL */ +"\x2408" "BS" /* <U2408> SYMBOL FOR BACKSPACE */ +"\x2409" "HT" /* <U2409> SYMBOL FOR HORIZONTAL TABULATION */ +"\x240a" "LF" /* <U240A> SYMBOL FOR LINE FEED */ +"\x240b" "VT" /* <U240B> SYMBOL FOR VERTICAL TABULATION */ +"\x240c" "FF" /* <U240C> SYMBOL FOR FORM FEED */ +"\x240d" "CR" /* <U240D> SYMBOL FOR CARRIAGE RETURN */ +"\x240e" "SO" /* <U240E> SYMBOL FOR SHIFT OUT */ +"\x240f" "SI" /* <U240F> SYMBOL FOR SHIFT IN */ +"\x2410" "DLE" /* <U2410> SYMBOL FOR DATA LINK ESCAPE */ +"\x2411" "DC1" /* <U2411> SYMBOL FOR DEVICE CONTROL ONE */ +"\x2412" "DC2" /* <U2412> SYMBOL FOR DEVICE CONTROL TWO */ +"\x2413" "DC3" /* <U2413> SYMBOL FOR DEVICE CONTROL THREE */ +"\x2414" "DC4" /* <U2414> SYMBOL FOR DEVICE CONTROL FOUR */ +"\x2415" "NAK" /* <U2415> SYMBOL FOR NEGATIVE ACKNOWLEDGE */ +"\x2416" "SYN" /* <U2416> SYMBOL FOR SYNCHRONOUS IDLE */ +"\x2417" "ETB" /* <U2417> SYMBOL FOR END OF TRANSMISSION BLOCK */ +"\x2418" "CAN" /* <U2418> SYMBOL FOR CANCEL */ +"\x2419" "EM" /* <U2419> SYMBOL FOR END OF MEDIUM */ +"\x241a" "SUB" /* <U241A> SYMBOL FOR SUBSTITUTE */ +"\x241b" "ESC" /* <U241B> SYMBOL FOR ESCAPE */ +"\x241c" "FS" /* <U241C> SYMBOL FOR FILE SEPARATOR */ +"\x241d" "GS" /* <U241D> SYMBOL FOR GROUP SEPARATOR */ +"\x241e" "RS" /* <U241E> SYMBOL FOR RECORD SEPARATOR */ +"\x241f" "US" /* <U241F> SYMBOL FOR UNIT SEPARATOR */ +"\x2420" "SP" /* <U2420> SYMBOL FOR SPACE */ +"\x2421" "DEL" /* <U2421> SYMBOL FOR DELETE */ +"\x2423" "_" /* <U2423> OPEN BOX */ +"\x2424" "NL" /* <U2424> SYMBOL FOR NEWLINE */ +"\x2460" "(1)" /* <U2460> CIRCLED DIGIT ONE */ +"\x2461" "(2)" /* <U2461> CIRCLED DIGIT TWO */ +"\x2462" "(3)" /* <U2462> CIRCLED DIGIT THREE */ +"\x2463" "(4)" /* <U2463> CIRCLED DIGIT FOUR */ +"\x2464" "(5)" /* <U2464> CIRCLED DIGIT FIVE */ +"\x2465" "(6)" /* <U2465> CIRCLED DIGIT SIX */ +"\x2466" "(7)" /* <U2466> CIRCLED DIGIT SEVEN */ +"\x2467" "(8)" /* <U2467> CIRCLED DIGIT EIGHT */ +"\x2468" "(9)" /* <U2468> CIRCLED DIGIT NINE */ +"\x2469" "(10)" /* <U2469> CIRCLED NUMBER TEN */ +"\x246a" "(11)" /* <U246A> CIRCLED NUMBER ELEVEN */ +"\x246b" "(12)" /* <U246B> CIRCLED NUMBER TWELVE */ +"\x246c" "(13)" /* <U246C> CIRCLED NUMBER THIRTEEN */ +"\x246d" "(14)" /* <U246D> CIRCLED NUMBER FOURTEEN */ +"\x246e" "(15)" /* <U246E> CIRCLED NUMBER FIFTEEN */ +"\x246f" "(16)" /* <U246F> CIRCLED NUMBER SIXTEEN */ +"\x2470" "(17)" /* <U2470> CIRCLED NUMBER SEVENTEEN */ +"\x2471" "(18)" /* <U2471> CIRCLED NUMBER EIGHTEEN */ +"\x2472" "(19)" /* <U2472> CIRCLED NUMBER NINETEEN */ +"\x2473" "(20)" /* <U2473> CIRCLED NUMBER TWENTY */ +"\x2474" "(1)" /* <U2474> PARENTHESIZED DIGIT ONE */ +"\x2475" "(2)" /* <U2475> PARENTHESIZED DIGIT TWO */ +"\x2476" "(3)" /* <U2476> PARENTHESIZED DIGIT THREE */ +"\x2477" "(4)" /* <U2477> PARENTHESIZED DIGIT FOUR */ +"\x2478" "(5)" /* <U2478> PARENTHESIZED DIGIT FIVE */ +"\x2479" "(6)" /* <U2479> PARENTHESIZED DIGIT SIX */ +"\x247a" "(7)" /* <U247A> PARENTHESIZED DIGIT SEVEN */ +"\x247b" "(8)" /* <U247B> PARENTHESIZED DIGIT EIGHT */ +"\x247c" "(9)" /* <U247C> PARENTHESIZED DIGIT NINE */ +"\x247d" "(10)" /* <U247D> PARENTHESIZED NUMBER TEN */ +"\x247e" "(11)" /* <U247E> PARENTHESIZED NUMBER ELEVEN */ +"\x247f" "(12)" /* <U247F> PARENTHESIZED NUMBER TWELVE */ +"\x2480" "(13)" /* <U2480> PARENTHESIZED NUMBER THIRTEEN */ +"\x2481" "(14)" /* <U2481> PARENTHESIZED NUMBER FOURTEEN */ +"\x2482" "(15)" /* <U2482> PARENTHESIZED NUMBER FIFTEEN */ +"\x2483" "(16)" /* <U2483> PARENTHESIZED NUMBER SIXTEEN */ +"\x2484" "(17)" /* <U2484> PARENTHESIZED NUMBER SEVENTEEN */ +"\x2485" "(18)" /* <U2485> PARENTHESIZED NUMBER EIGHTEEN */ +"\x2486" "(19)" /* <U2486> PARENTHESIZED NUMBER NINETEEN */ +"\x2487" "(20)" /* <U2487> PARENTHESIZED NUMBER TWENTY */ +"\x2488" "1." /* <U2488> DIGIT ONE FULL STOP */ +"\x2489" "2." /* <U2489> DIGIT TWO FULL STOP */ +"\x248a" "3." /* <U248A> DIGIT THREE FULL STOP */ +"\x248b" "4." /* <U248B> DIGIT FOUR FULL STOP */ +"\x248c" "5." /* <U248C> DIGIT FIVE FULL STOP */ +"\x248d" "6." /* <U248D> DIGIT SIX FULL STOP */ +"\x248e" "7." /* <U248E> DIGIT SEVEN FULL STOP */ +"\x248f" "8." /* <U248F> DIGIT EIGHT FULL STOP */ +"\x2490" "9." /* <U2490> DIGIT NINE FULL STOP */ +"\x2491" "10." /* <U2491> NUMBER TEN FULL STOP */ +"\x2492" "11." /* <U2492> NUMBER ELEVEN FULL STOP */ +"\x2493" "12." /* <U2493> NUMBER TWELVE FULL STOP */ +"\x2494" "13." /* <U2494> NUMBER THIRTEEN FULL STOP */ +"\x2495" "14." /* <U2495> NUMBER FOURTEEN FULL STOP */ +"\x2496" "15." /* <U2496> NUMBER FIFTEEN FULL STOP */ +"\x2497" "16." /* <U2497> NUMBER SIXTEEN FULL STOP */ +"\x2498" "17." /* <U2498> NUMBER SEVENTEEN FULL STOP */ +"\x2499" "18." /* <U2499> NUMBER EIGHTEEN FULL STOP */ +"\x249a" "19." /* <U249A> NUMBER NINETEEN FULL STOP */ +"\x249b" "20." /* <U249B> NUMBER TWENTY FULL STOP */ +"\x249c" "(a)" /* <U249C> PARENTHESIZED LATIN SMALL LETTER A */ +"\x249d" "(b)" /* <U249D> PARENTHESIZED LATIN SMALL LETTER B */ +"\x249e" "(c)" /* <U249E> PARENTHESIZED LATIN SMALL LETTER C */ +"\x249f" "(d)" /* <U249F> PARENTHESIZED LATIN SMALL LETTER D */ +"\x24a0" "(e)" /* <U24A0> PARENTHESIZED LATIN SMALL LETTER E */ +"\x24a1" "(f)" /* <U24A1> PARENTHESIZED LATIN SMALL LETTER F */ +"\x24a2" "(g)" /* <U24A2> PARENTHESIZED LATIN SMALL LETTER G */ +"\x24a3" "(h)" /* <U24A3> PARENTHESIZED LATIN SMALL LETTER H */ +"\x24a4" "(i)" /* <U24A4> PARENTHESIZED LATIN SMALL LETTER I */ +"\x24a5" "(j)" /* <U24A5> PARENTHESIZED LATIN SMALL LETTER J */ +"\x24a6" "(k)" /* <U24A6> PARENTHESIZED LATIN SMALL LETTER K */ +"\x24a7" "(l)" /* <U24A7> PARENTHESIZED LATIN SMALL LETTER L */ +"\x24a8" "(m)" /* <U24A8> PARENTHESIZED LATIN SMALL LETTER M */ +"\x24a9" "(n)" /* <U24A9> PARENTHESIZED LATIN SMALL LETTER N */ +"\x24aa" "(o)" /* <U24AA> PARENTHESIZED LATIN SMALL LETTER O */ +"\x24ab" "(p)" /* <U24AB> PARENTHESIZED LATIN SMALL LETTER P */ +"\x24ac" "(q)" /* <U24AC> PARENTHESIZED LATIN SMALL LETTER Q */ +"\x24ad" "(r)" /* <U24AD> PARENTHESIZED LATIN SMALL LETTER R */ +"\x24ae" "(s)" /* <U24AE> PARENTHESIZED LATIN SMALL LETTER S */ +"\x24af" "(t)" /* <U24AF> PARENTHESIZED LATIN SMALL LETTER T */ +"\x24b0" "(u)" /* <U24B0> PARENTHESIZED LATIN SMALL LETTER U */ +"\x24b1" "(v)" /* <U24B1> PARENTHESIZED LATIN SMALL LETTER V */ +"\x24b2" "(w)" /* <U24B2> PARENTHESIZED LATIN SMALL LETTER W */ +"\x24b3" "(x)" /* <U24B3> PARENTHESIZED LATIN SMALL LETTER X */ +"\x24b4" "(y)" /* <U24B4> PARENTHESIZED LATIN SMALL LETTER Y */ +"\x24b5" "(z)" /* <U24B5> PARENTHESIZED LATIN SMALL LETTER Z */ +"\x24b6" "(A)" /* <U24B6> CIRCLED LATIN CAPITAL LETTER A */ +"\x24b7" "(B)" /* <U24B7> CIRCLED LATIN CAPITAL LETTER B */ +"\x24b8" "(C)" /* <U24B8> CIRCLED LATIN CAPITAL LETTER C */ +"\x24b9" "(D)" /* <U24B9> CIRCLED LATIN CAPITAL LETTER D */ +"\x24ba" "(E)" /* <U24BA> CIRCLED LATIN CAPITAL LETTER E */ +"\x24bb" "(F)" /* <U24BB> CIRCLED LATIN CAPITAL LETTER F */ +"\x24bc" "(G)" /* <U24BC> CIRCLED LATIN CAPITAL LETTER G */ +"\x24bd" "(H)" /* <U24BD> CIRCLED LATIN CAPITAL LETTER H */ +"\x24be" "(I)" /* <U24BE> CIRCLED LATIN CAPITAL LETTER I */ +"\x24bf" "(J)" /* <U24BF> CIRCLED LATIN CAPITAL LETTER J */ +"\x24c0" "(K)" /* <U24C0> CIRCLED LATIN CAPITAL LETTER K */ +"\x24c1" "(L)" /* <U24C1> CIRCLED LATIN CAPITAL LETTER L */ +"\x24c2" "(M)" /* <U24C2> CIRCLED LATIN CAPITAL LETTER M */ +"\x24c3" "(N)" /* <U24C3> CIRCLED LATIN CAPITAL LETTER N */ +"\x24c4" "(O)" /* <U24C4> CIRCLED LATIN CAPITAL LETTER O */ +"\x24c5" "(P)" /* <U24C5> CIRCLED LATIN CAPITAL LETTER P */ +"\x24c6" "(Q)" /* <U24C6> CIRCLED LATIN CAPITAL LETTER Q */ +"\x24c7" "(R)" /* <U24C7> CIRCLED LATIN CAPITAL LETTER R */ +"\x24c8" "(S)" /* <U24C8> CIRCLED LATIN CAPITAL LETTER S */ +"\x24c9" "(T)" /* <U24C9> CIRCLED LATIN CAPITAL LETTER T */ +"\x24ca" "(U)" /* <U24CA> CIRCLED LATIN CAPITAL LETTER U */ +"\x24cb" "(V)" /* <U24CB> CIRCLED LATIN CAPITAL LETTER V */ +"\x24cc" "(W)" /* <U24CC> CIRCLED LATIN CAPITAL LETTER W */ +"\x24cd" "(X)" /* <U24CD> CIRCLED LATIN CAPITAL LETTER X */ +"\x24ce" "(Y)" /* <U24CE> CIRCLED LATIN CAPITAL LETTER Y */ +"\x24cf" "(Z)" /* <U24CF> CIRCLED LATIN CAPITAL LETTER Z */ +"\x24d0" "(a)" /* <U24D0> CIRCLED LATIN SMALL LETTER A */ +"\x24d1" "(b)" /* <U24D1> CIRCLED LATIN SMALL LETTER B */ +"\x24d2" "(c)" /* <U24D2> CIRCLED LATIN SMALL LETTER C */ +"\x24d3" "(d)" /* <U24D3> CIRCLED LATIN SMALL LETTER D */ +"\x24d4" "(e)" /* <U24D4> CIRCLED LATIN SMALL LETTER E */ +"\x24d5" "(f)" /* <U24D5> CIRCLED LATIN SMALL LETTER F */ +"\x24d6" "(g)" /* <U24D6> CIRCLED LATIN SMALL LETTER G */ +"\x24d7" "(h)" /* <U24D7> CIRCLED LATIN SMALL LETTER H */ +"\x24d8" "(i)" /* <U24D8> CIRCLED LATIN SMALL LETTER I */ +"\x24d9" "(j)" /* <U24D9> CIRCLED LATIN SMALL LETTER J */ +"\x24da" "(k)" /* <U24DA> CIRCLED LATIN SMALL LETTER K */ +"\x24db" "(l)" /* <U24DB> CIRCLED LATIN SMALL LETTER L */ +"\x24dc" "(m)" /* <U24DC> CIRCLED LATIN SMALL LETTER M */ +"\x24dd" "(n)" /* <U24DD> CIRCLED LATIN SMALL LETTER N */ +"\x24de" "(o)" /* <U24DE> CIRCLED LATIN SMALL LETTER O */ +"\x24df" "(p)" /* <U24DF> CIRCLED LATIN SMALL LETTER P */ +"\x24e0" "(q)" /* <U24E0> CIRCLED LATIN SMALL LETTER Q */ +"\x24e1" "(r)" /* <U24E1> CIRCLED LATIN SMALL LETTER R */ +"\x24e2" "(s)" /* <U24E2> CIRCLED LATIN SMALL LETTER S */ +"\x24e3" "(t)" /* <U24E3> CIRCLED LATIN SMALL LETTER T */ +"\x24e4" "(u)" /* <U24E4> CIRCLED LATIN SMALL LETTER U */ +"\x24e5" "(v)" /* <U24E5> CIRCLED LATIN SMALL LETTER V */ +"\x24e6" "(w)" /* <U24E6> CIRCLED LATIN SMALL LETTER W */ +"\x24e7" "(x)" /* <U24E7> CIRCLED LATIN SMALL LETTER X */ +"\x24e8" "(y)" /* <U24E8> CIRCLED LATIN SMALL LETTER Y */ +"\x24e9" "(z)" /* <U24E9> CIRCLED LATIN SMALL LETTER Z */ +"\x24ea" "(0)" /* <U24EA> CIRCLED DIGIT ZERO */ +"\x2500" "-" /* <U2500> BOX DRAWINGS LIGHT HORIZONTAL */ +"\x2502" "|" /* <U2502> BOX DRAWINGS LIGHT VERTICAL */ +"\x250c" "+" /* <U250C> BOX DRAWINGS LIGHT DOWN AND RIGHT */ +"\x2510" "+" /* <U2510> BOX DRAWINGS LIGHT DOWN AND LEFT */ +"\x2514" "+" /* <U2514> BOX DRAWINGS LIGHT UP AND RIGHT */ +"\x2518" "+" /* <U2518> BOX DRAWINGS LIGHT UP AND LEFT */ +"\x251c" "+" /* <U251C> BOX DRAWINGS LIGHT VERTICAL AND RIGHT */ +"\x2524" "+" /* <U2524> BOX DRAWINGS LIGHT VERTICAL AND LEFT */ +"\x252c" "+" /* <U252C> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL */ +"\x2534" "+" /* <U2534> BOX DRAWINGS LIGHT UP AND HORIZONTAL */ +"\x253c" "+" /* <U253C> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL */ +"\x25e6" "o" /* <U25E6> WHITE BULLET */ +"\x2a74" "::=" /* <U2A74> DOUBLE COLON EQUAL */ +"\x2a75" "==" /* <U2A75> TWO CONSECUTIVE EQUALS SIGNS */ +"\x2a76" "===" /* <U2A76> THREE CONSECUTIVE EQUALS SIGNS */ +"\x3000" " " /* <U3000> IDEOGRAPHIC SPACE */ +"\x30a0" "=" /* <U30A0> KATAKANA-HIRAGANA DOUBLE HYPHEN */ +"\x3251" "(21)" /* <U3251> CIRCLED NUMBER TWENTY ONE */ +"\x3252" "(22)" /* <U3252> CIRCLED NUMBER TWENTY TWO */ +"\x3253" "(23)" /* <U3253> CIRCLED NUMBER TWENTY THREE */ +"\x3254" "(24)" /* <U3254> CIRCLED NUMBER TWENTY FOUR */ +"\x3255" "(25)" /* <U3255> CIRCLED NUMBER TWENTY FIVE */ +"\x3256" "(26)" /* <U3256> CIRCLED NUMBER TWENTY SIX */ +"\x3257" "(27)" /* <U3257> CIRCLED NUMBER TWENTY SEVEN */ +"\x3258" "(28)" /* <U3258> CIRCLED NUMBER TWENTY EIGHT */ +"\x3259" "(29)" /* <U3259> CIRCLED NUMBER TWENTY NINE */ +"\x325a" "(30)" /* <U325A> CIRCLED NUMBER THIRTY */ +"\x325b" "(31)" /* <U325B> CIRCLED NUMBER THIRTY ONE */ +"\x325c" "(32)" /* <U325C> CIRCLED NUMBER THIRTY TWO */ +"\x325d" "(33)" /* <U325D> CIRCLED NUMBER THIRTY THREE */ +"\x325e" "(34)" /* <U325E> CIRCLED NUMBER THIRTY FOUR */ +"\x325f" "(35)" /* <U325F> CIRCLED NUMBER THIRTY FIVE */ +"\x32b1" "(36)" /* <U32B1> CIRCLED NUMBER THIRTY SIX */ +"\x32b2" "(37)" /* <U32B2> CIRCLED NUMBER THIRTY SEVEN */ +"\x32b3" "(38)" /* <U32B3> CIRCLED NUMBER THIRTY EIGHT */ +"\x32b4" "(39)" /* <U32B4> CIRCLED NUMBER THIRTY NINE */ +"\x32b5" "(40)" /* <U32B5> CIRCLED NUMBER FORTY */ +"\x32b6" "(41)" /* <U32B6> CIRCLED NUMBER FORTY ONE */ +"\x32b7" "(42)" /* <U32B7> CIRCLED NUMBER FORTY TWO */ +"\x32b8" "(43)" /* <U32B8> CIRCLED NUMBER FORTY THREE */ +"\x32b9" "(44)" /* <U32B9> CIRCLED NUMBER FORTY FOUR */ +"\x32ba" "(45)" /* <U32BA> CIRCLED NUMBER FORTY FIVE */ +"\x32bb" "(46)" /* <U32BB> CIRCLED NUMBER FORTY SIX */ +"\x32bc" "(47)" /* <U32BC> CIRCLED NUMBER FORTY SEVEN */ +"\x32bd" "(48)" /* <U32BD> CIRCLED NUMBER FORTY EIGHT */ +"\x32be" "(49)" /* <U32BE> CIRCLED NUMBER FORTY NINE */ +"\x32bf" "(50)" /* <U32BF> CIRCLED NUMBER FIFTY */ +"\x3371" "hPa" /* <U3371> SQUARE HPA */ +"\x3372" "da" /* <U3372> SQUARE DA */ +"\x3373" "AU" /* <U3373> SQUARE AU */ +"\x3374" "bar" /* <U3374> SQUARE BAR */ +"\x3375" "oV" /* <U3375> SQUARE OV */ +"\x3376" "pc" /* <U3376> SQUARE PC */ +"\x3380" "pA" /* <U3380> SQUARE PA AMPS */ +"\x3381" "nA" /* <U3381> SQUARE NA */ +"\x3382" "uA" /* <U3382> SQUARE MU A */ +"\x3383" "mA" /* <U3383> SQUARE MA */ +"\x3384" "kA" /* <U3384> SQUARE KA */ +"\x3385" "KB" /* <U3385> SQUARE KB */ +"\x3386" "MB" /* <U3386> SQUARE MB */ +"\x3387" "GB" /* <U3387> SQUARE GB */ +"\x3388" "cal" /* <U3388> SQUARE CAL */ +"\x3389" "kcal" /* <U3389> SQUARE KCAL */ +"\x338a" "pF" /* <U338A> SQUARE PF */ +"\x338b" "nF" /* <U338B> SQUARE NF */ +"\x338c" "uF" /* <U338C> SQUARE MU F */ +"\x338d" "ug" /* <U338D> SQUARE MU G */ +"\x338e" "mg" /* <U338E> SQUARE MG */ +"\x338f" "kg" /* <U338F> SQUARE KG */ +"\x3390" "Hz" /* <U3390> SQUARE HZ */ +"\x3391" "kHz" /* <U3391> SQUARE KHZ */ +"\x3392" "MHz" /* <U3392> SQUARE MHZ */ +"\x3393" "GHz" /* <U3393> SQUARE GHZ */ +"\x3394" "THz" /* <U3394> SQUARE THZ */ +"\x3395" "ul" /* <U3395> SQUARE MU L */ +"\x3396" "ml" /* <U3396> SQUARE ML */ +"\x3397" "dl" /* <U3397> SQUARE DL */ +"\x3398" "kl" /* <U3398> SQUARE KL */ +"\x3399" "fm" /* <U3399> SQUARE FM */ +"\x339a" "nm" /* <U339A> SQUARE NM */ +"\x339b" "um" /* <U339B> SQUARE MU M */ +"\x339c" "mm" /* <U339C> SQUARE MM */ +"\x339d" "cm" /* <U339D> SQUARE CM */ +"\x339e" "km" /* <U339E> SQUARE KM */ +"\x339f" "mm^2" /* <U339F> SQUARE MM SQUARED */ +"\x33a0" "cm^2" /* <U33A0> SQUARE CM SQUARED */ +"\x33a1" "m^2" /* <U33A1> SQUARE M SQUARED */ +"\x33a2" "km^2" /* <U33A2> SQUARE KM SQUARED */ +"\x33a3" "mm^3" /* <U33A3> SQUARE MM CUBED */ +"\x33a4" "cm^3" /* <U33A4> SQUARE CM CUBED */ +"\x33a5" "m^3" /* <U33A5> SQUARE M CUBED */ +"\x33a6" "km^3" /* <U33A6> SQUARE KM CUBED */ +"\x33a7" "m/s" /* <U33A7> SQUARE M OVER S */ +"\x33a8" "m/s^2" /* <U33A8> SQUARE M OVER S SQUARED */ +"\x33a9" "Pa" /* <U33A9> SQUARE PA */ +"\x33aa" "kPa" /* <U33AA> SQUARE KPA */ +"\x33ab" "MPa" /* <U33AB> SQUARE MPA */ +"\x33ac" "GPa" /* <U33AC> SQUARE GPA */ +"\x33ad" "rad" /* <U33AD> SQUARE RAD */ +"\x33ae" "rad/s" /* <U33AE> SQUARE RAD OVER S */ +"\x33af" "rad/s^2" /* <U33AF> SQUARE RAD OVER S SQUARED */ +"\x33b0" "ps" /* <U33B0> SQUARE PS */ +"\x33b1" "ns" /* <U33B1> SQUARE NS */ +"\x33b2" "us" /* <U33B2> SQUARE MU S */ +"\x33b3" "ms" /* <U33B3> SQUARE MS */ +"\x33b4" "pV" /* <U33B4> SQUARE PV */ +"\x33b5" "nV" /* <U33B5> SQUARE NV */ +"\x33b6" "uV" /* <U33B6> SQUARE MU V */ +"\x33b7" "mV" /* <U33B7> SQUARE MV */ +"\x33b8" "kV" /* <U33B8> SQUARE KV */ +"\x33b9" "MV" /* <U33B9> SQUARE MV MEGA */ +"\x33ba" "pW" /* <U33BA> SQUARE PW */ +"\x33bb" "nW" /* <U33BB> SQUARE NW */ +"\x33bc" "uW" /* <U33BC> SQUARE MU W */ +"\x33bd" "mW" /* <U33BD> SQUARE MW */ +"\x33be" "kW" /* <U33BE> SQUARE KW */ +"\x33bf" "MW" /* <U33BF> SQUARE MW MEGA */ +"\x33c2" "a.m." /* <U33C2> SQUARE AM */ +"\x33c3" "Bq" /* <U33C3> SQUARE BQ */ +"\x33c4" "cc" /* <U33C4> SQUARE CC */ +"\x33c5" "cd" /* <U33C5> SQUARE CD */ +"\x33c6" "C/kg" /* <U33C6> SQUARE C OVER KG */ +"\x33c7" "Co." /* <U33C7> SQUARE CO */ +"\x33c8" "dB" /* <U33C8> SQUARE DB */ +"\x33c9" "Gy" /* <U33C9> SQUARE GY */ +"\x33ca" "ha" /* <U33CA> SQUARE HA */ +"\x33cb" "HP" /* <U33CB> SQUARE HP */ +"\x33cc" "in" /* <U33CC> SQUARE IN */ +"\x33cd" "KK" /* <U33CD> SQUARE KK */ +"\x33ce" "KM" /* <U33CE> SQUARE KM CAPITAL */ +"\x33cf" "kt" /* <U33CF> SQUARE KT */ +"\x33d0" "lm" /* <U33D0> SQUARE LM */ +"\x33d1" "ln" /* <U33D1> SQUARE LN */ +"\x33d2" "log" /* <U33D2> SQUARE LOG */ +"\x33d3" "lx" /* <U33D3> SQUARE LX */ +"\x33d4" "mb" /* <U33D4> SQUARE MB SMALL */ +"\x33d5" "mil" /* <U33D5> SQUARE MIL */ +"\x33d6" "mol" /* <U33D6> SQUARE MOL */ +"\x33d7" "PH" /* <U33D7> SQUARE PH */ +"\x33d8" "p.m." /* <U33D8> SQUARE PM */ +"\x33d9" "PPM" /* <U33D9> SQUARE PPM */ +"\x33da" "PR" /* <U33DA> SQUARE PR */ +"\x33db" "sr" /* <U33DB> SQUARE SR */ +"\x33dc" "Sv" /* <U33DC> SQUARE SV */ +"\x33dd" "Wb" /* <U33DD> SQUARE WB */ +"\xfb00" "ff" /* <UFB00> LATIN SMALL LIGATURE FF */ +"\xfb01" "fi" /* <UFB01> LATIN SMALL LIGATURE FI */ +"\xfb02" "fl" /* <UFB02> LATIN SMALL LIGATURE FL */ +"\xfb03" "ffi" /* <UFB03> LATIN SMALL LIGATURE FFI */ +"\xfb04" "ffl" /* <UFB04> LATIN SMALL LIGATURE FFL */ +"\xfb06" "st" /* <UFB06> LATIN SMALL LIGATURE ST */ +"\xfb29" "+" /* <UFB29> HEBREW LETTER ALTERNATIVE PLUS SIGN */ +"\xfe00" "" /* VARIATION SELECTOR-1 */ +"\xfe01" "" /* VARIATION SELECTOR-2 */ +"\xfe02" "" /* VARIATION SELECTOR-3 */ +"\xfe03" "" /* VARIATION SELECTOR-4 */ +"\xfe04" "" /* VARIATION SELECTOR-5 */ +"\xfe05" "" /* VARIATION SELECTOR-6 */ +"\xfe06" "" /* VARIATION SELECTOR-7 */ +"\xfe07" "" /* VARIATION SELECTOR-8 */ +"\xfe08" "" /* VARIATION SELECTOR-9 */ +"\xfe09" "" /* VARIATION SELECTOR-10 */ +"\xfe0a" "" /* VARIATION SELECTOR-11 */ +"\xfe0b" "" /* VARIATION SELECTOR-12 */ +"\xfe0c" "" /* VARIATION SELECTOR-13 */ +"\xfe0d" "" /* VARIATION SELECTOR-14 */ +"\xfe0e" "" /* VARIATION SELECTOR-15 */ +"\xfe0f" "" /* VARIATION SELECTOR-16 */ +"\xfe4d" "_" /* <UFE4D> DASHED LOW LINE */ +"\xfe4e" "_" /* <UFE4E> CENTRELINE LOW LINE */ +"\xfe4f" "_" /* <UFE4F> WAVY LOW LINE */ +"\xfe50" "," /* <UFE50> SMALL COMMA */ +"\xfe52" "." /* <UFE52> SMALL FULL STOP */ +"\xfe54" ";" /* <UFE54> SMALL SEMICOLON */ +"\xfe55" ":" /* <UFE55> SMALL COLON */ +"\xfe56" "?" /* <UFE56> SMALL QUESTION MARK */ +"\xfe57" "!" /* <UFE57> SMALL EXCLAMATION MARK */ +"\xfe59" "(" /* <UFE59> SMALL LEFT PARENTHESIS */ +"\xfe5a" ")" /* <UFE5A> SMALL RIGHT PARENTHESIS */ +"\xfe5b" "{" /* <UFE5B> SMALL LEFT CURLY BRACKET */ +"\xfe5c" "}" /* <UFE5C> SMALL RIGHT CURLY BRACKET */ +"\xfe5f" "#" /* <UFE5F> SMALL NUMBER SIGN */ +"\xfe60" "&" /* <UFE60> SMALL AMPERSAND */ +"\xfe61" "*" /* <UFE61> SMALL ASTERISK */ +"\xfe62" "+" /* <UFE62> SMALL PLUS SIGN */ +"\xfe63" "-" /* <UFE63> SMALL HYPHEN-MINUS */ +"\xfe64" "<" /* <UFE64> SMALL LESS-THAN SIGN */ +"\xfe65" ">" /* <UFE65> SMALL GREATER-THAN SIGN */ +"\xfe66" "=" /* <UFE66> SMALL EQUALS SIGN */ +"\xfe68" "\\" /* <UFE68> SMALL REVERSE SOLIDUS */ +"\xfe69" "$" /* <UFE69> SMALL DOLLAR SIGN */ +"\xfe6a" "%" /* <UFE6A> SMALL PERCENT SIGN */ +"\xfe6b" "@" /* <UFE6B> SMALL COMMERCIAL AT */ +"\xfeff" "" /* <UFEFF> ZERO WIDTH NO-BREAK SPACE */ +"\xff01" "!" /* <UFF01> FULLWIDTH EXCLAMATION MARK */ +"\xff02" "\"" /* <UFF02> FULLWIDTH QUOTATION MARK */ +"\xff03" "#" /* <UFF03> FULLWIDTH NUMBER SIGN */ +"\xff04" "$" /* <UFF04> FULLWIDTH DOLLAR SIGN */ +"\xff05" "%" /* <UFF05> FULLWIDTH PERCENT SIGN */ +"\xff06" "&" /* <UFF06> FULLWIDTH AMPERSAND */ +"\xff07" "'" /* <UFF07> FULLWIDTH APOSTROPHE */ +"\xff08" "(" /* <UFF08> FULLWIDTH LEFT PARENTHESIS */ +"\xff09" ")" /* <UFF09> FULLWIDTH RIGHT PARENTHESIS */ +"\xff0a" "*" /* <UFF0A> FULLWIDTH ASTERISK */ +"\xff0b" "+" /* <UFF0B> FULLWIDTH PLUS SIGN */ +"\xff0c" "," /* <UFF0C> FULLWIDTH COMMA */ +"\xff0d" "-" /* <UFF0D> FULLWIDTH HYPHEN-MINUS */ +"\xff0e" "." /* <UFF0E> FULLWIDTH FULL STOP */ +"\xff0f" "/" /* <UFF0F> FULLWIDTH SOLIDUS */ +"\xff10" "0" /* <UFF10> FULLWIDTH DIGIT ZERO */ +"\xff11" "1" /* <UFF11> FULLWIDTH DIGIT ONE */ +"\xff12" "2" /* <UFF12> FULLWIDTH DIGIT TWO */ +"\xff13" "3" /* <UFF13> FULLWIDTH DIGIT THREE */ +"\xff14" "4" /* <UFF14> FULLWIDTH DIGIT FOUR */ +"\xff15" "5" /* <UFF15> FULLWIDTH DIGIT FIVE */ +"\xff16" "6" /* <UFF16> FULLWIDTH DIGIT SIX */ +"\xff17" "7" /* <UFF17> FULLWIDTH DIGIT SEVEN */ +"\xff18" "8" /* <UFF18> FULLWIDTH DIGIT EIGHT */ +"\xff19" "9" /* <UFF19> FULLWIDTH DIGIT NINE */ +"\xff1a" ":" /* <UFF1A> FULLWIDTH COLON */ +"\xff1b" ";" /* <UFF1B> FULLWIDTH SEMICOLON */ +"\xff1c" "<" /* <UFF1C> FULLWIDTH LESS-THAN SIGN */ +"\xff1d" "=" /* <UFF1D> FULLWIDTH EQUALS SIGN */ +"\xff1e" ">" /* <UFF1E> FULLWIDTH GREATER-THAN SIGN */ +"\xff1f" "?" /* <UFF1F> FULLWIDTH QUESTION MARK */ +"\xff20" "@" /* <UFF20> FULLWIDTH COMMERCIAL AT */ +"\xff21" "A" /* <UFF21> FULLWIDTH LATIN CAPITAL LETTER A */ +"\xff22" "B" /* <UFF22> FULLWIDTH LATIN CAPITAL LETTER B */ +"\xff23" "C" /* <UFF23> FULLWIDTH LATIN CAPITAL LETTER C */ +"\xff24" "D" /* <UFF24> FULLWIDTH LATIN CAPITAL LETTER D */ +"\xff25" "E" /* <UFF25> FULLWIDTH LATIN CAPITAL LETTER E */ +"\xff26" "F" /* <UFF26> FULLWIDTH LATIN CAPITAL LETTER F */ +"\xff27" "G" /* <UFF27> FULLWIDTH LATIN CAPITAL LETTER G */ +"\xff28" "H" /* <UFF28> FULLWIDTH LATIN CAPITAL LETTER H */ +"\xff29" "I" /* <UFF29> FULLWIDTH LATIN CAPITAL LETTER I */ +"\xff2a" "J" /* <UFF2A> FULLWIDTH LATIN CAPITAL LETTER J */ +"\xff2b" "K" /* <UFF2B> FULLWIDTH LATIN CAPITAL LETTER K */ +"\xff2c" "L" /* <UFF2C> FULLWIDTH LATIN CAPITAL LETTER L */ +"\xff2d" "M" /* <UFF2D> FULLWIDTH LATIN CAPITAL LETTER M */ +"\xff2e" "N" /* <UFF2E> FULLWIDTH LATIN CAPITAL LETTER N */ +"\xff2f" "O" /* <UFF2F> FULLWIDTH LATIN CAPITAL LETTER O */ +"\xff30" "P" /* <UFF30> FULLWIDTH LATIN CAPITAL LETTER P */ +"\xff31" "Q" /* <UFF31> FULLWIDTH LATIN CAPITAL LETTER Q */ +"\xff32" "R" /* <UFF32> FULLWIDTH LATIN CAPITAL LETTER R */ +"\xff33" "S" /* <UFF33> FULLWIDTH LATIN CAPITAL LETTER S */ +"\xff34" "T" /* <UFF34> FULLWIDTH LATIN CAPITAL LETTER T */ +"\xff35" "U" /* <UFF35> FULLWIDTH LATIN CAPITAL LETTER U */ +"\xff36" "V" /* <UFF36> FULLWIDTH LATIN CAPITAL LETTER V */ +"\xff37" "W" /* <UFF37> FULLWIDTH LATIN CAPITAL LETTER W */ +"\xff38" "X" /* <UFF38> FULLWIDTH LATIN CAPITAL LETTER X */ +"\xff39" "Y" /* <UFF39> FULLWIDTH LATIN CAPITAL LETTER Y */ +"\xff3a" "Z" /* <UFF3A> FULLWIDTH LATIN CAPITAL LETTER Z */ +"\xff3b" "[" /* <UFF3B> FULLWIDTH LEFT SQUARE BRACKET */ +"\xff3c" "\\" /* <UFF3C> FULLWIDTH REVERSE SOLIDUS */ +"\xff3d" "]" /* <UFF3D> FULLWIDTH RIGHT SQUARE BRACKET */ +"\xff3e" "^" /* <UFF3E> FULLWIDTH CIRCUMFLEX ACCENT */ +"\xff3f" "_" /* <UFF3F> FULLWIDTH LOW LINE */ +"\xff40" "`" /* <UFF40> FULLWIDTH GRAVE ACCENT */ +"\xff41" "a" /* <UFF41> FULLWIDTH LATIN SMALL LETTER A */ +"\xff42" "b" /* <UFF42> FULLWIDTH LATIN SMALL LETTER B */ +"\xff43" "c" /* <UFF43> FULLWIDTH LATIN SMALL LETTER C */ +"\xff44" "d" /* <UFF44> FULLWIDTH LATIN SMALL LETTER D */ +"\xff45" "e" /* <UFF45> FULLWIDTH LATIN SMALL LETTER E */ +"\xff46" "f" /* <UFF46> FULLWIDTH LATIN SMALL LETTER F */ +"\xff47" "g" /* <UFF47> FULLWIDTH LATIN SMALL LETTER G */ +"\xff48" "h" /* <UFF48> FULLWIDTH LATIN SMALL LETTER H */ +"\xff49" "i" /* <UFF49> FULLWIDTH LATIN SMALL LETTER I */ +"\xff4a" "j" /* <UFF4A> FULLWIDTH LATIN SMALL LETTER J */ +"\xff4b" "k" /* <UFF4B> FULLWIDTH LATIN SMALL LETTER K */ +"\xff4c" "l" /* <UFF4C> FULLWIDTH LATIN SMALL LETTER L */ +"\xff4d" "m" /* <UFF4D> FULLWIDTH LATIN SMALL LETTER M */ +"\xff4e" "n" /* <UFF4E> FULLWIDTH LATIN SMALL LETTER N */ +"\xff4f" "o" /* <UFF4F> FULLWIDTH LATIN SMALL LETTER O */ +"\xff50" "p" /* <UFF50> FULLWIDTH LATIN SMALL LETTER P */ +"\xff51" "q" /* <UFF51> FULLWIDTH LATIN SMALL LETTER Q */ +"\xff52" "r" /* <UFF52> FULLWIDTH LATIN SMALL LETTER R */ +"\xff53" "s" /* <UFF53> FULLWIDTH LATIN SMALL LETTER S */ +"\xff54" "t" /* <UFF54> FULLWIDTH LATIN SMALL LETTER T */ +"\xff55" "u" /* <UFF55> FULLWIDTH LATIN SMALL LETTER U */ +"\xff56" "v" /* <UFF56> FULLWIDTH LATIN SMALL LETTER V */ +"\xff57" "w" /* <UFF57> FULLWIDTH LATIN SMALL LETTER W */ +"\xff58" "x" /* <UFF58> FULLWIDTH LATIN SMALL LETTER X */ +"\xff59" "y" /* <UFF59> FULLWIDTH LATIN SMALL LETTER Y */ +"\xff5a" "z" /* <UFF5A> FULLWIDTH LATIN SMALL LETTER Z */ +"\xff5b" "{" /* <UFF5B> FULLWIDTH LEFT CURLY BRACKET */ +"\xff5c" "|" /* <UFF5C> FULLWIDTH VERTICAL LINE */ +"\xff5d" "}" /* <UFF5D> FULLWIDTH RIGHT CURLY BRACKET */ +"\xff5e" "~" /* <UFF5E> FULLWIDTH TILDE */ +"\x0001d400" "A" /* <U0001D400> MATHEMATICAL BOLD CAPITAL A */ +"\x0001d401" "B" /* <U0001D401> MATHEMATICAL BOLD CAPITAL B */ +"\x0001d402" "C" /* <U0001D402> MATHEMATICAL BOLD CAPITAL C */ +"\x0001d403" "D" /* <U0001D403> MATHEMATICAL BOLD CAPITAL D */ +"\x0001d404" "E" /* <U0001D404> MATHEMATICAL BOLD CAPITAL E */ +"\x0001d405" "F" /* <U0001D405> MATHEMATICAL BOLD CAPITAL F */ +"\x0001d406" "G" /* <U0001D406> MATHEMATICAL BOLD CAPITAL G */ +"\x0001d407" "H" /* <U0001D407> MATHEMATICAL BOLD CAPITAL H */ +"\x0001d408" "I" /* <U0001D408> MATHEMATICAL BOLD CAPITAL I */ +"\x0001d409" "J" /* <U0001D409> MATHEMATICAL BOLD CAPITAL J */ +"\x0001d40a" "K" /* <U0001D40A> MATHEMATICAL BOLD CAPITAL K */ +"\x0001d40b" "L" /* <U0001D40B> MATHEMATICAL BOLD CAPITAL L */ +"\x0001d40c" "M" /* <U0001D40C> MATHEMATICAL BOLD CAPITAL M */ +"\x0001d40d" "N" /* <U0001D40D> MATHEMATICAL BOLD CAPITAL N */ +"\x0001d40e" "O" /* <U0001D40E> MATHEMATICAL BOLD CAPITAL O */ +"\x0001d40f" "P" /* <U0001D40F> MATHEMATICAL BOLD CAPITAL P */ +"\x0001d410" "Q" /* <U0001D410> MATHEMATICAL BOLD CAPITAL Q */ +"\x0001d411" "R" /* <U0001D411> MATHEMATICAL BOLD CAPITAL R */ +"\x0001d412" "S" /* <U0001D412> MATHEMATICAL BOLD CAPITAL S */ +"\x0001d413" "T" /* <U0001D413> MATHEMATICAL BOLD CAPITAL T */ +"\x0001d414" "U" /* <U0001D414> MATHEMATICAL BOLD CAPITAL U */ +"\x0001d415" "V" /* <U0001D415> MATHEMATICAL BOLD CAPITAL V */ +"\x0001d416" "W" /* <U0001D416> MATHEMATICAL BOLD CAPITAL W */ +"\x0001d417" "X" /* <U0001D417> MATHEMATICAL BOLD CAPITAL X */ +"\x0001d418" "Y" /* <U0001D418> MATHEMATICAL BOLD CAPITAL Y */ +"\x0001d419" "Z" /* <U0001D419> MATHEMATICAL BOLD CAPITAL Z */ +"\x0001d41a" "a" /* <U0001D41A> MATHEMATICAL BOLD SMALL A */ +"\x0001d41b" "b" /* <U0001D41B> MATHEMATICAL BOLD SMALL B */ +"\x0001d41c" "c" /* <U0001D41C> MATHEMATICAL BOLD SMALL C */ +"\x0001d41d" "d" /* <U0001D41D> MATHEMATICAL BOLD SMALL D */ +"\x0001d41e" "e" /* <U0001D41E> MATHEMATICAL BOLD SMALL E */ +"\x0001d41f" "f" /* <U0001D41F> MATHEMATICAL BOLD SMALL F */ +"\x0001d420" "g" /* <U0001D420> MATHEMATICAL BOLD SMALL G */ +"\x0001d421" "h" /* <U0001D421> MATHEMATICAL BOLD SMALL H */ +"\x0001d422" "i" /* <U0001D422> MATHEMATICAL BOLD SMALL I */ +"\x0001d423" "j" /* <U0001D423> MATHEMATICAL BOLD SMALL J */ +"\x0001d424" "k" /* <U0001D424> MATHEMATICAL BOLD SMALL K */ +"\x0001d425" "l" /* <U0001D425> MATHEMATICAL BOLD SMALL L */ +"\x0001d426" "m" /* <U0001D426> MATHEMATICAL BOLD SMALL M */ +"\x0001d427" "n" /* <U0001D427> MATHEMATICAL BOLD SMALL N */ +"\x0001d428" "o" /* <U0001D428> MATHEMATICAL BOLD SMALL O */ +"\x0001d429" "p" /* <U0001D429> MATHEMATICAL BOLD SMALL P */ +"\x0001d42a" "q" /* <U0001D42A> MATHEMATICAL BOLD SMALL Q */ +"\x0001d42b" "r" /* <U0001D42B> MATHEMATICAL BOLD SMALL R */ +"\x0001d42c" "s" /* <U0001D42C> MATHEMATICAL BOLD SMALL S */ +"\x0001d42d" "t" /* <U0001D42D> MATHEMATICAL BOLD SMALL T */ +"\x0001d42e" "u" /* <U0001D42E> MATHEMATICAL BOLD SMALL U */ +"\x0001d42f" "v" /* <U0001D42F> MATHEMATICAL BOLD SMALL V */ +"\x0001d430" "w" /* <U0001D430> MATHEMATICAL BOLD SMALL W */ +"\x0001d431" "x" /* <U0001D431> MATHEMATICAL BOLD SMALL X */ +"\x0001d432" "y" /* <U0001D432> MATHEMATICAL BOLD SMALL Y */ +"\x0001d433" "z" /* <U0001D433> MATHEMATICAL BOLD SMALL Z */ +"\x0001d434" "A" /* <U0001D434> MATHEMATICAL ITALIC CAPITAL A */ +"\x0001d435" "B" /* <U0001D435> MATHEMATICAL ITALIC CAPITAL B */ +"\x0001d436" "C" /* <U0001D436> MATHEMATICAL ITALIC CAPITAL C */ +"\x0001d437" "D" /* <U0001D437> MATHEMATICAL ITALIC CAPITAL D */ +"\x0001d438" "E" /* <U0001D438> MATHEMATICAL ITALIC CAPITAL E */ +"\x0001d439" "F" /* <U0001D439> MATHEMATICAL ITALIC CAPITAL F */ +"\x0001d43a" "G" /* <U0001D43A> MATHEMATICAL ITALIC CAPITAL G */ +"\x0001d43b" "H" /* <U0001D43B> MATHEMATICAL ITALIC CAPITAL H */ +"\x0001d43c" "I" /* <U0001D43C> MATHEMATICAL ITALIC CAPITAL I */ +"\x0001d43d" "J" /* <U0001D43D> MATHEMATICAL ITALIC CAPITAL J */ +"\x0001d43e" "K" /* <U0001D43E> MATHEMATICAL ITALIC CAPITAL K */ +"\x0001d43f" "L" /* <U0001D43F> MATHEMATICAL ITALIC CAPITAL L */ +"\x0001d440" "M" /* <U0001D440> MATHEMATICAL ITALIC CAPITAL M */ +"\x0001d441" "N" /* <U0001D441> MATHEMATICAL ITALIC CAPITAL N */ +"\x0001d442" "O" /* <U0001D442> MATHEMATICAL ITALIC CAPITAL O */ +"\x0001d443" "P" /* <U0001D443> MATHEMATICAL ITALIC CAPITAL P */ +"\x0001d444" "Q" /* <U0001D444> MATHEMATICAL ITALIC CAPITAL Q */ +"\x0001d445" "R" /* <U0001D445> MATHEMATICAL ITALIC CAPITAL R */ +"\x0001d446" "S" /* <U0001D446> MATHEMATICAL ITALIC CAPITAL S */ +"\x0001d447" "T" /* <U0001D447> MATHEMATICAL ITALIC CAPITAL T */ +"\x0001d448" "U" /* <U0001D448> MATHEMATICAL ITALIC CAPITAL U */ +"\x0001d449" "V" /* <U0001D449> MATHEMATICAL ITALIC CAPITAL V */ +"\x0001d44a" "W" /* <U0001D44A> MATHEMATICAL ITALIC CAPITAL W */ +"\x0001d44b" "X" /* <U0001D44B> MATHEMATICAL ITALIC CAPITAL X */ +"\x0001d44c" "Y" /* <U0001D44C> MATHEMATICAL ITALIC CAPITAL Y */ +"\x0001d44d" "Z" /* <U0001D44D> MATHEMATICAL ITALIC CAPITAL Z */ +"\x0001d44e" "a" /* <U0001D44E> MATHEMATICAL ITALIC SMALL A */ +"\x0001d44f" "b" /* <U0001D44F> MATHEMATICAL ITALIC SMALL B */ +"\x0001d450" "c" /* <U0001D450> MATHEMATICAL ITALIC SMALL C */ +"\x0001d451" "d" /* <U0001D451> MATHEMATICAL ITALIC SMALL D */ +"\x0001d452" "e" /* <U0001D452> MATHEMATICAL ITALIC SMALL E */ +"\x0001d453" "f" /* <U0001D453> MATHEMATICAL ITALIC SMALL F */ +"\x0001d454" "g" /* <U0001D454> MATHEMATICAL ITALIC SMALL G */ +"\x0001d456" "i" /* <U0001D456> MATHEMATICAL ITALIC SMALL I */ +"\x0001d457" "j" /* <U0001D457> MATHEMATICAL ITALIC SMALL J */ +"\x0001d458" "k" /* <U0001D458> MATHEMATICAL ITALIC SMALL K */ +"\x0001d459" "l" /* <U0001D459> MATHEMATICAL ITALIC SMALL L */ +"\x0001d45a" "m" /* <U0001D45A> MATHEMATICAL ITALIC SMALL M */ +"\x0001d45b" "n" /* <U0001D45B> MATHEMATICAL ITALIC SMALL N */ +"\x0001d45c" "o" /* <U0001D45C> MATHEMATICAL ITALIC SMALL O */ +"\x0001d45d" "p" /* <U0001D45D> MATHEMATICAL ITALIC SMALL P */ +"\x0001d45e" "q" /* <U0001D45E> MATHEMATICAL ITALIC SMALL Q */ +"\x0001d45f" "r" /* <U0001D45F> MATHEMATICAL ITALIC SMALL R */ +"\x0001d460" "s" /* <U0001D460> MATHEMATICAL ITALIC SMALL S */ +"\x0001d461" "t" /* <U0001D461> MATHEMATICAL ITALIC SMALL T */ +"\x0001d462" "u" /* <U0001D462> MATHEMATICAL ITALIC SMALL U */ +"\x0001d463" "v" /* <U0001D463> MATHEMATICAL ITALIC SMALL V */ +"\x0001d464" "w" /* <U0001D464> MATHEMATICAL ITALIC SMALL W */ +"\x0001d465" "x" /* <U0001D465> MATHEMATICAL ITALIC SMALL X */ +"\x0001d466" "y" /* <U0001D466> MATHEMATICAL ITALIC SMALL Y */ +"\x0001d467" "z" /* <U0001D467> MATHEMATICAL ITALIC SMALL Z */ +"\x0001d468" "A" /* <U0001D468> MATHEMATICAL BOLD ITALIC CAPITAL A */ +"\x0001d469" "B" /* <U0001D469> MATHEMATICAL BOLD ITALIC CAPITAL B */ +"\x0001d46a" "C" /* <U0001D46A> MATHEMATICAL BOLD ITALIC CAPITAL C */ +"\x0001d46b" "D" /* <U0001D46B> MATHEMATICAL BOLD ITALIC CAPITAL D */ +"\x0001d46c" "E" /* <U0001D46C> MATHEMATICAL BOLD ITALIC CAPITAL E */ +"\x0001d46d" "F" /* <U0001D46D> MATHEMATICAL BOLD ITALIC CAPITAL F */ +"\x0001d46e" "G" /* <U0001D46E> MATHEMATICAL BOLD ITALIC CAPITAL G */ +"\x0001d46f" "H" /* <U0001D46F> MATHEMATICAL BOLD ITALIC CAPITAL H */ +"\x0001d470" "I" /* <U0001D470> MATHEMATICAL BOLD ITALIC CAPITAL I */ +"\x0001d471" "J" /* <U0001D471> MATHEMATICAL BOLD ITALIC CAPITAL J */ +"\x0001d472" "K" /* <U0001D472> MATHEMATICAL BOLD ITALIC CAPITAL K */ +"\x0001d473" "L" /* <U0001D473> MATHEMATICAL BOLD ITALIC CAPITAL L */ +"\x0001d474" "M" /* <U0001D474> MATHEMATICAL BOLD ITALIC CAPITAL M */ +"\x0001d475" "N" /* <U0001D475> MATHEMATICAL BOLD ITALIC CAPITAL N */ +"\x0001d476" "O" /* <U0001D476> MATHEMATICAL BOLD ITALIC CAPITAL O */ +"\x0001d477" "P" /* <U0001D477> MATHEMATICAL BOLD ITALIC CAPITAL P */ +"\x0001d478" "Q" /* <U0001D478> MATHEMATICAL BOLD ITALIC CAPITAL Q */ +"\x0001d479" "R" /* <U0001D479> MATHEMATICAL BOLD ITALIC CAPITAL R */ +"\x0001d47a" "S" /* <U0001D47A> MATHEMATICAL BOLD ITALIC CAPITAL S */ +"\x0001d47b" "T" /* <U0001D47B> MATHEMATICAL BOLD ITALIC CAPITAL T */ +"\x0001d47c" "U" /* <U0001D47C> MATHEMATICAL BOLD ITALIC CAPITAL U */ +"\x0001d47d" "V" /* <U0001D47D> MATHEMATICAL BOLD ITALIC CAPITAL V */ +"\x0001d47e" "W" /* <U0001D47E> MATHEMATICAL BOLD ITALIC CAPITAL W */ +"\x0001d47f" "X" /* <U0001D47F> MATHEMATICAL BOLD ITALIC CAPITAL X */ +"\x0001d480" "Y" /* <U0001D480> MATHEMATICAL BOLD ITALIC CAPITAL Y */ +"\x0001d481" "Z" /* <U0001D481> MATHEMATICAL BOLD ITALIC CAPITAL Z */ +"\x0001d482" "a" /* <U0001D482> MATHEMATICAL BOLD ITALIC SMALL A */ +"\x0001d483" "b" /* <U0001D483> MATHEMATICAL BOLD ITALIC SMALL B */ +"\x0001d484" "c" /* <U0001D484> MATHEMATICAL BOLD ITALIC SMALL C */ +"\x0001d485" "d" /* <U0001D485> MATHEMATICAL BOLD ITALIC SMALL D */ +"\x0001d486" "e" /* <U0001D486> MATHEMATICAL BOLD ITALIC SMALL E */ +"\x0001d487" "f" /* <U0001D487> MATHEMATICAL BOLD ITALIC SMALL F */ +"\x0001d488" "g" /* <U0001D488> MATHEMATICAL BOLD ITALIC SMALL G */ +"\x0001d489" "h" /* <U0001D489> MATHEMATICAL BOLD ITALIC SMALL H */ +"\x0001d48a" "i" /* <U0001D48A> MATHEMATICAL BOLD ITALIC SMALL I */ +"\x0001d48b" "j" /* <U0001D48B> MATHEMATICAL BOLD ITALIC SMALL J */ +"\x0001d48c" "k" /* <U0001D48C> MATHEMATICAL BOLD ITALIC SMALL K */ +"\x0001d48d" "l" /* <U0001D48D> MATHEMATICAL BOLD ITALIC SMALL L */ +"\x0001d48e" "m" /* <U0001D48E> MATHEMATICAL BOLD ITALIC SMALL M */ +"\x0001d48f" "n" /* <U0001D48F> MATHEMATICAL BOLD ITALIC SMALL N */ +"\x0001d490" "o" /* <U0001D490> MATHEMATICAL BOLD ITALIC SMALL O */ +"\x0001d491" "p" /* <U0001D491> MATHEMATICAL BOLD ITALIC SMALL P */ +"\x0001d492" "q" /* <U0001D492> MATHEMATICAL BOLD ITALIC SMALL Q */ +"\x0001d493" "r" /* <U0001D493> MATHEMATICAL BOLD ITALIC SMALL R */ +"\x0001d494" "s" /* <U0001D494> MATHEMATICAL BOLD ITALIC SMALL S */ +"\x0001d495" "t" /* <U0001D495> MATHEMATICAL BOLD ITALIC SMALL T */ +"\x0001d496" "u" /* <U0001D496> MATHEMATICAL BOLD ITALIC SMALL U */ +"\x0001d497" "v" /* <U0001D497> MATHEMATICAL BOLD ITALIC SMALL V */ +"\x0001d498" "w" /* <U0001D498> MATHEMATICAL BOLD ITALIC SMALL W */ +"\x0001d499" "x" /* <U0001D499> MATHEMATICAL BOLD ITALIC SMALL X */ +"\x0001d49a" "y" /* <U0001D49A> MATHEMATICAL BOLD ITALIC SMALL Y */ +"\x0001d49b" "z" /* <U0001D49B> MATHEMATICAL BOLD ITALIC SMALL Z */ +"\x0001d49c" "A" /* <U0001D49C> MATHEMATICAL SCRIPT CAPITAL A */ +"\x0001d49e" "C" /* <U0001D49E> MATHEMATICAL SCRIPT CAPITAL C */ +"\x0001d49f" "D" /* <U0001D49F> MATHEMATICAL SCRIPT CAPITAL D */ +"\x0001d4a2" "G" /* <U0001D4A2> MATHEMATICAL SCRIPT CAPITAL G */ +"\x0001d4a5" "J" /* <U0001D4A5> MATHEMATICAL SCRIPT CAPITAL J */ +"\x0001d4a6" "K" /* <U0001D4A6> MATHEMATICAL SCRIPT CAPITAL K */ +"\x0001d4a9" "N" /* <U0001D4A9> MATHEMATICAL SCRIPT CAPITAL N */ +"\x0001d4aa" "O" /* <U0001D4AA> MATHEMATICAL SCRIPT CAPITAL O */ +"\x0001d4ab" "P" /* <U0001D4AB> MATHEMATICAL SCRIPT CAPITAL P */ +"\x0001d4ac" "Q" /* <U0001D4AC> MATHEMATICAL SCRIPT CAPITAL Q */ +"\x0001d4ae" "S" /* <U0001D4AE> MATHEMATICAL SCRIPT CAPITAL S */ +"\x0001d4af" "T" /* <U0001D4AF> MATHEMATICAL SCRIPT CAPITAL T */ +"\x0001d4b0" "U" /* <U0001D4B0> MATHEMATICAL SCRIPT CAPITAL U */ +"\x0001d4b1" "V" /* <U0001D4B1> MATHEMATICAL SCRIPT CAPITAL V */ +"\x0001d4b2" "W" /* <U0001D4B2> MATHEMATICAL SCRIPT CAPITAL W */ +"\x0001d4b3" "X" /* <U0001D4B3> MATHEMATICAL SCRIPT CAPITAL X */ +"\x0001d4b4" "Y" /* <U0001D4B4> MATHEMATICAL SCRIPT CAPITAL Y */ +"\x0001d4b5" "Z" /* <U0001D4B5> MATHEMATICAL SCRIPT CAPITAL Z */ +"\x0001d4b6" "a" /* <U0001D4B6> MATHEMATICAL SCRIPT SMALL A */ +"\x0001d4b7" "b" /* <U0001D4B7> MATHEMATICAL SCRIPT SMALL B */ +"\x0001d4b8" "c" /* <U0001D4B8> MATHEMATICAL SCRIPT SMALL C */ +"\x0001d4b9" "d" /* <U0001D4B9> MATHEMATICAL SCRIPT SMALL D */ +"\x0001d4bb" "f" /* <U0001D4BB> MATHEMATICAL SCRIPT SMALL F */ +"\x0001d4bd" "h" /* <U0001D4BD> MATHEMATICAL SCRIPT SMALL H */ +"\x0001d4be" "i" /* <U0001D4BE> MATHEMATICAL SCRIPT SMALL I */ +"\x0001d4bf" "j" /* <U0001D4BF> MATHEMATICAL SCRIPT SMALL J */ +"\x0001d4c0" "k" /* <U0001D4C0> MATHEMATICAL SCRIPT SMALL K */ +"\x0001d4c2" "m" /* <U0001D4C2> MATHEMATICAL SCRIPT SMALL M */ +"\x0001d4c3" "n" /* <U0001D4C3> MATHEMATICAL SCRIPT SMALL N */ +"\x0001d4c5" "p" /* <U0001D4C5> MATHEMATICAL SCRIPT SMALL P */ +"\x0001d4c6" "q" /* <U0001D4C6> MATHEMATICAL SCRIPT SMALL Q */ +"\x0001d4c7" "r" /* <U0001D4C7> MATHEMATICAL SCRIPT SMALL R */ +"\x0001d4c8" "s" /* <U0001D4C8> MATHEMATICAL SCRIPT SMALL S */ +"\x0001d4c9" "t" /* <U0001D4C9> MATHEMATICAL SCRIPT SMALL T */ +"\x0001d4ca" "u" /* <U0001D4CA> MATHEMATICAL SCRIPT SMALL U */ +"\x0001d4cb" "v" /* <U0001D4CB> MATHEMATICAL SCRIPT SMALL V */ +"\x0001d4cc" "w" /* <U0001D4CC> MATHEMATICAL SCRIPT SMALL W */ +"\x0001d4cd" "x" /* <U0001D4CD> MATHEMATICAL SCRIPT SMALL X */ +"\x0001d4ce" "y" /* <U0001D4CE> MATHEMATICAL SCRIPT SMALL Y */ +"\x0001d4cf" "z" /* <U0001D4CF> MATHEMATICAL SCRIPT SMALL Z */ +"\x0001d4d0" "A" /* <U0001D4D0> MATHEMATICAL BOLD SCRIPT CAPITAL A */ +"\x0001d4d1" "B" /* <U0001D4D1> MATHEMATICAL BOLD SCRIPT CAPITAL B */ +"\x0001d4d2" "C" /* <U0001D4D2> MATHEMATICAL BOLD SCRIPT CAPITAL C */ +"\x0001d4d3" "D" /* <U0001D4D3> MATHEMATICAL BOLD SCRIPT CAPITAL D */ +"\x0001d4d4" "E" /* <U0001D4D4> MATHEMATICAL BOLD SCRIPT CAPITAL E */ +"\x0001d4d5" "F" /* <U0001D4D5> MATHEMATICAL BOLD SCRIPT CAPITAL F */ +"\x0001d4d6" "G" /* <U0001D4D6> MATHEMATICAL BOLD SCRIPT CAPITAL G */ +"\x0001d4d7" "H" /* <U0001D4D7> MATHEMATICAL BOLD SCRIPT CAPITAL H */ +"\x0001d4d8" "I" /* <U0001D4D8> MATHEMATICAL BOLD SCRIPT CAPITAL I */ +"\x0001d4d9" "J" /* <U0001D4D9> MATHEMATICAL BOLD SCRIPT CAPITAL J */ +"\x0001d4da" "K" /* <U0001D4DA> MATHEMATICAL BOLD SCRIPT CAPITAL K */ +"\x0001d4db" "L" /* <U0001D4DB> MATHEMATICAL BOLD SCRIPT CAPITAL L */ +"\x0001d4dc" "M" /* <U0001D4DC> MATHEMATICAL BOLD SCRIPT CAPITAL M */ +"\x0001d4dd" "N" /* <U0001D4DD> MATHEMATICAL BOLD SCRIPT CAPITAL N */ +"\x0001d4de" "O" /* <U0001D4DE> MATHEMATICAL BOLD SCRIPT CAPITAL O */ +"\x0001d4df" "P" /* <U0001D4DF> MATHEMATICAL BOLD SCRIPT CAPITAL P */ +"\x0001d4e0" "Q" /* <U0001D4E0> MATHEMATICAL BOLD SCRIPT CAPITAL Q */ +"\x0001d4e1" "R" /* <U0001D4E1> MATHEMATICAL BOLD SCRIPT CAPITAL R */ +"\x0001d4e2" "S" /* <U0001D4E2> MATHEMATICAL BOLD SCRIPT CAPITAL S */ +"\x0001d4e3" "T" /* <U0001D4E3> MATHEMATICAL BOLD SCRIPT CAPITAL T */ +"\x0001d4e4" "U" /* <U0001D4E4> MATHEMATICAL BOLD SCRIPT CAPITAL U */ +"\x0001d4e5" "V" /* <U0001D4E5> MATHEMATICAL BOLD SCRIPT CAPITAL V */ +"\x0001d4e6" "W" /* <U0001D4E6> MATHEMATICAL BOLD SCRIPT CAPITAL W */ +"\x0001d4e7" "X" /* <U0001D4E7> MATHEMATICAL BOLD SCRIPT CAPITAL X */ +"\x0001d4e8" "Y" /* <U0001D4E8> MATHEMATICAL BOLD SCRIPT CAPITAL Y */ +"\x0001d4e9" "Z" /* <U0001D4E9> MATHEMATICAL BOLD SCRIPT CAPITAL Z */ +"\x0001d4ea" "a" /* <U0001D4EA> MATHEMATICAL BOLD SCRIPT SMALL A */ +"\x0001d4eb" "b" /* <U0001D4EB> MATHEMATICAL BOLD SCRIPT SMALL B */ +"\x0001d4ec" "c" /* <U0001D4EC> MATHEMATICAL BOLD SCRIPT SMALL C */ +"\x0001d4ed" "d" /* <U0001D4ED> MATHEMATICAL BOLD SCRIPT SMALL D */ +"\x0001d4ee" "e" /* <U0001D4EE> MATHEMATICAL BOLD SCRIPT SMALL E */ +"\x0001d4ef" "f" /* <U0001D4EF> MATHEMATICAL BOLD SCRIPT SMALL F */ +"\x0001d4f0" "g" /* <U0001D4F0> MATHEMATICAL BOLD SCRIPT SMALL G */ +"\x0001d4f1" "h" /* <U0001D4F1> MATHEMATICAL BOLD SCRIPT SMALL H */ +"\x0001d4f2" "i" /* <U0001D4F2> MATHEMATICAL BOLD SCRIPT SMALL I */ +"\x0001d4f3" "j" /* <U0001D4F3> MATHEMATICAL BOLD SCRIPT SMALL J */ +"\x0001d4f4" "k" /* <U0001D4F4> MATHEMATICAL BOLD SCRIPT SMALL K */ +"\x0001d4f5" "l" /* <U0001D4F5> MATHEMATICAL BOLD SCRIPT SMALL L */ +"\x0001d4f6" "m" /* <U0001D4F6> MATHEMATICAL BOLD SCRIPT SMALL M */ +"\x0001d4f7" "n" /* <U0001D4F7> MATHEMATICAL BOLD SCRIPT SMALL N */ +"\x0001d4f8" "o" /* <U0001D4F8> MATHEMATICAL BOLD SCRIPT SMALL O */ +"\x0001d4f9" "p" /* <U0001D4F9> MATHEMATICAL BOLD SCRIPT SMALL P */ +"\x0001d4fa" "q" /* <U0001D4FA> MATHEMATICAL BOLD SCRIPT SMALL Q */ +"\x0001d4fb" "r" /* <U0001D4FB> MATHEMATICAL BOLD SCRIPT SMALL R */ +"\x0001d4fc" "s" /* <U0001D4FC> MATHEMATICAL BOLD SCRIPT SMALL S */ +"\x0001d4fd" "t" /* <U0001D4FD> MATHEMATICAL BOLD SCRIPT SMALL T */ +"\x0001d4fe" "u" /* <U0001D4FE> MATHEMATICAL BOLD SCRIPT SMALL U */ +"\x0001d4ff" "v" /* <U0001D4FF> MATHEMATICAL BOLD SCRIPT SMALL V */ +"\x0001d500" "w" /* <U0001D500> MATHEMATICAL BOLD SCRIPT SMALL W */ +"\x0001d501" "x" /* <U0001D501> MATHEMATICAL BOLD SCRIPT SMALL X */ +"\x0001d502" "y" /* <U0001D502> MATHEMATICAL BOLD SCRIPT SMALL Y */ +"\x0001d503" "z" /* <U0001D503> MATHEMATICAL BOLD SCRIPT SMALL Z */ +"\x0001d504" "A" /* <U0001D504> MATHEMATICAL FRAKTUR CAPITAL A */ +"\x0001d505" "B" /* <U0001D505> MATHEMATICAL FRAKTUR CAPITAL B */ +"\x0001d507" "D" /* <U0001D507> MATHEMATICAL FRAKTUR CAPITAL D */ +"\x0001d508" "E" /* <U0001D508> MATHEMATICAL FRAKTUR CAPITAL E */ +"\x0001d509" "F" /* <U0001D509> MATHEMATICAL FRAKTUR CAPITAL F */ +"\x0001d50a" "G" /* <U0001D50A> MATHEMATICAL FRAKTUR CAPITAL G */ +"\x0001d50d" "J" /* <U0001D50D> MATHEMATICAL FRAKTUR CAPITAL J */ +"\x0001d50e" "K" /* <U0001D50E> MATHEMATICAL FRAKTUR CAPITAL K */ +"\x0001d50f" "L" /* <U0001D50F> MATHEMATICAL FRAKTUR CAPITAL L */ +"\x0001d510" "M" /* <U0001D510> MATHEMATICAL FRAKTUR CAPITAL M */ +"\x0001d511" "N" /* <U0001D511> MATHEMATICAL FRAKTUR CAPITAL N */ +"\x0001d512" "O" /* <U0001D512> MATHEMATICAL FRAKTUR CAPITAL O */ +"\x0001d513" "P" /* <U0001D513> MATHEMATICAL FRAKTUR CAPITAL P */ +"\x0001d514" "Q" /* <U0001D514> MATHEMATICAL FRAKTUR CAPITAL Q */ +"\x0001d516" "S" /* <U0001D516> MATHEMATICAL FRAKTUR CAPITAL S */ +"\x0001d517" "T" /* <U0001D517> MATHEMATICAL FRAKTUR CAPITAL T */ +"\x0001d518" "U" /* <U0001D518> MATHEMATICAL FRAKTUR CAPITAL U */ +"\x0001d519" "V" /* <U0001D519> MATHEMATICAL FRAKTUR CAPITAL V */ +"\x0001d51a" "W" /* <U0001D51A> MATHEMATICAL FRAKTUR CAPITAL W */ +"\x0001d51b" "X" /* <U0001D51B> MATHEMATICAL FRAKTUR CAPITAL X */ +"\x0001d51c" "Y" /* <U0001D51C> MATHEMATICAL FRAKTUR CAPITAL Y */ +"\x0001d51e" "a" /* <U0001D51E> MATHEMATICAL FRAKTUR SMALL A */ +"\x0001d51f" "b" /* <U0001D51F> MATHEMATICAL FRAKTUR SMALL B */ +"\x0001d520" "c" /* <U0001D520> MATHEMATICAL FRAKTUR SMALL C */ +"\x0001d521" "d" /* <U0001D521> MATHEMATICAL FRAKTUR SMALL D */ +"\x0001d522" "e" /* <U0001D522> MATHEMATICAL FRAKTUR SMALL E */ +"\x0001d523" "f" /* <U0001D523> MATHEMATICAL FRAKTUR SMALL F */ +"\x0001d524" "g" /* <U0001D524> MATHEMATICAL FRAKTUR SMALL G */ +"\x0001d525" "h" /* <U0001D525> MATHEMATICAL FRAKTUR SMALL H */ +"\x0001d526" "i" /* <U0001D526> MATHEMATICAL FRAKTUR SMALL I */ +"\x0001d527" "j" /* <U0001D527> MATHEMATICAL FRAKTUR SMALL J */ +"\x0001d528" "k" /* <U0001D528> MATHEMATICAL FRAKTUR SMALL K */ +"\x0001d529" "l" /* <U0001D529> MATHEMATICAL FRAKTUR SMALL L */ +"\x0001d52a" "m" /* <U0001D52A> MATHEMATICAL FRAKTUR SMALL M */ +"\x0001d52b" "n" /* <U0001D52B> MATHEMATICAL FRAKTUR SMALL N */ +"\x0001d52c" "o" /* <U0001D52C> MATHEMATICAL FRAKTUR SMALL O */ +"\x0001d52d" "p" /* <U0001D52D> MATHEMATICAL FRAKTUR SMALL P */ +"\x0001d52e" "q" /* <U0001D52E> MATHEMATICAL FRAKTUR SMALL Q */ +"\x0001d52f" "r" /* <U0001D52F> MATHEMATICAL FRAKTUR SMALL R */ +"\x0001d530" "s" /* <U0001D530> MATHEMATICAL FRAKTUR SMALL S */ +"\x0001d531" "t" /* <U0001D531> MATHEMATICAL FRAKTUR SMALL T */ +"\x0001d532" "u" /* <U0001D532> MATHEMATICAL FRAKTUR SMALL U */ +"\x0001d533" "v" /* <U0001D533> MATHEMATICAL FRAKTUR SMALL V */ +"\x0001d534" "w" /* <U0001D534> MATHEMATICAL FRAKTUR SMALL W */ +"\x0001d535" "x" /* <U0001D535> MATHEMATICAL FRAKTUR SMALL X */ +"\x0001d536" "y" /* <U0001D536> MATHEMATICAL FRAKTUR SMALL Y */ +"\x0001d537" "z" /* <U0001D537> MATHEMATICAL FRAKTUR SMALL Z */ +"\x0001d538" "A" /* <U0001D538> MATHEMATICAL DOUBLE-STRUCK CAPITAL A */ +"\x0001d539" "B" /* <U0001D539> MATHEMATICAL DOUBLE-STRUCK CAPITAL B */ +"\x0001d53b" "D" /* <U0001D53B> MATHEMATICAL DOUBLE-STRUCK CAPITAL D */ +"\x0001d53c" "E" /* <U0001D53C> MATHEMATICAL DOUBLE-STRUCK CAPITAL E */ +"\x0001d53d" "F" /* <U0001D53D> MATHEMATICAL DOUBLE-STRUCK CAPITAL F */ +"\x0001d53e" "G" /* <U0001D53E> MATHEMATICAL DOUBLE-STRUCK CAPITAL G */ +"\x0001d540" "I" /* <U0001D540> MATHEMATICAL DOUBLE-STRUCK CAPITAL I */ +"\x0001d541" "J" /* <U0001D541> MATHEMATICAL DOUBLE-STRUCK CAPITAL J */ +"\x0001d542" "K" /* <U0001D542> MATHEMATICAL DOUBLE-STRUCK CAPITAL K */ +"\x0001d543" "L" /* <U0001D543> MATHEMATICAL DOUBLE-STRUCK CAPITAL L */ +"\x0001d544" "M" /* <U0001D544> MATHEMATICAL DOUBLE-STRUCK CAPITAL M */ +"\x0001d546" "O" /* <U0001D546> MATHEMATICAL DOUBLE-STRUCK CAPITAL O */ +"\x0001d54a" "S" /* <U0001D54A> MATHEMATICAL DOUBLE-STRUCK CAPITAL S */ +"\x0001d54b" "T" /* <U0001D54B> MATHEMATICAL DOUBLE-STRUCK CAPITAL T */ +"\x0001d54c" "U" /* <U0001D54C> MATHEMATICAL DOUBLE-STRUCK CAPITAL U */ +"\x0001d54d" "V" /* <U0001D54D> MATHEMATICAL DOUBLE-STRUCK CAPITAL V */ +"\x0001d54e" "W" /* <U0001D54E> MATHEMATICAL DOUBLE-STRUCK CAPITAL W */ +"\x0001d54f" "X" /* <U0001D54F> MATHEMATICAL DOUBLE-STRUCK CAPITAL X */ +"\x0001d550" "Y" /* <U0001D550> MATHEMATICAL DOUBLE-STRUCK CAPITAL Y */ +"\x0001d552" "a" /* <U0001D552> MATHEMATICAL DOUBLE-STRUCK SMALL A */ +"\x0001d553" "b" /* <U0001D553> MATHEMATICAL DOUBLE-STRUCK SMALL B */ +"\x0001d554" "c" /* <U0001D554> MATHEMATICAL DOUBLE-STRUCK SMALL C */ +"\x0001d555" "d" /* <U0001D555> MATHEMATICAL DOUBLE-STRUCK SMALL D */ +"\x0001d556" "e" /* <U0001D556> MATHEMATICAL DOUBLE-STRUCK SMALL E */ +"\x0001d557" "f" /* <U0001D557> MATHEMATICAL DOUBLE-STRUCK SMALL F */ +"\x0001d558" "g" /* <U0001D558> MATHEMATICAL DOUBLE-STRUCK SMALL G */ +"\x0001d559" "h" /* <U0001D559> MATHEMATICAL DOUBLE-STRUCK SMALL H */ +"\x0001d55a" "i" /* <U0001D55A> MATHEMATICAL DOUBLE-STRUCK SMALL I */ +"\x0001d55b" "j" /* <U0001D55B> MATHEMATICAL DOUBLE-STRUCK SMALL J */ +"\x0001d55c" "k" /* <U0001D55C> MATHEMATICAL DOUBLE-STRUCK SMALL K */ +"\x0001d55d" "l" /* <U0001D55D> MATHEMATICAL DOUBLE-STRUCK SMALL L */ +"\x0001d55e" "m" /* <U0001D55E> MATHEMATICAL DOUBLE-STRUCK SMALL M */ +"\x0001d55f" "n" /* <U0001D55F> MATHEMATICAL DOUBLE-STRUCK SMALL N */ +"\x0001d560" "o" /* <U0001D560> MATHEMATICAL DOUBLE-STRUCK SMALL O */ +"\x0001d561" "p" /* <U0001D561> MATHEMATICAL DOUBLE-STRUCK SMALL P */ +"\x0001d562" "q" /* <U0001D562> MATHEMATICAL DOUBLE-STRUCK SMALL Q */ +"\x0001d563" "r" /* <U0001D563> MATHEMATICAL DOUBLE-STRUCK SMALL R */ +"\x0001d564" "s" /* <U0001D564> MATHEMATICAL DOUBLE-STRUCK SMALL S */ +"\x0001d565" "t" /* <U0001D565> MATHEMATICAL DOUBLE-STRUCK SMALL T */ +"\x0001d566" "u" /* <U0001D566> MATHEMATICAL DOUBLE-STRUCK SMALL U */ +"\x0001d567" "v" /* <U0001D567> MATHEMATICAL DOUBLE-STRUCK SMALL V */ +"\x0001d568" "w" /* <U0001D568> MATHEMATICAL DOUBLE-STRUCK SMALL W */ +"\x0001d569" "x" /* <U0001D569> MATHEMATICAL DOUBLE-STRUCK SMALL X */ +"\x0001d56a" "y" /* <U0001D56A> MATHEMATICAL DOUBLE-STRUCK SMALL Y */ +"\x0001d56b" "z" /* <U0001D56B> MATHEMATICAL DOUBLE-STRUCK SMALL Z */ +"\x0001d56c" "A" /* <U0001D56C> MATHEMATICAL BOLD FRAKTUR CAPITAL A */ +"\x0001d56d" "B" /* <U0001D56D> MATHEMATICAL BOLD FRAKTUR CAPITAL B */ +"\x0001d56e" "C" /* <U0001D56E> MATHEMATICAL BOLD FRAKTUR CAPITAL C */ +"\x0001d56f" "D" /* <U0001D56F> MATHEMATICAL BOLD FRAKTUR CAPITAL D */ +"\x0001d570" "E" /* <U0001D570> MATHEMATICAL BOLD FRAKTUR CAPITAL E */ +"\x0001d571" "F" /* <U0001D571> MATHEMATICAL BOLD FRAKTUR CAPITAL F */ +"\x0001d572" "G" /* <U0001D572> MATHEMATICAL BOLD FRAKTUR CAPITAL G */ +"\x0001d573" "H" /* <U0001D573> MATHEMATICAL BOLD FRAKTUR CAPITAL H */ +"\x0001d574" "I" /* <U0001D574> MATHEMATICAL BOLD FRAKTUR CAPITAL I */ +"\x0001d575" "J" /* <U0001D575> MATHEMATICAL BOLD FRAKTUR CAPITAL J */ +"\x0001d576" "K" /* <U0001D576> MATHEMATICAL BOLD FRAKTUR CAPITAL K */ +"\x0001d577" "L" /* <U0001D577> MATHEMATICAL BOLD FRAKTUR CAPITAL L */ +"\x0001d578" "M" /* <U0001D578> MATHEMATICAL BOLD FRAKTUR CAPITAL M */ +"\x0001d579" "N" /* <U0001D579> MATHEMATICAL BOLD FRAKTUR CAPITAL N */ +"\x0001d57a" "O" /* <U0001D57A> MATHEMATICAL BOLD FRAKTUR CAPITAL O */ +"\x0001d57b" "P" /* <U0001D57B> MATHEMATICAL BOLD FRAKTUR CAPITAL P */ +"\x0001d57c" "Q" /* <U0001D57C> MATHEMATICAL BOLD FRAKTUR CAPITAL Q */ +"\x0001d57d" "R" /* <U0001D57D> MATHEMATICAL BOLD FRAKTUR CAPITAL R */ +"\x0001d57e" "S" /* <U0001D57E> MATHEMATICAL BOLD FRAKTUR CAPITAL S */ +"\x0001d57f" "T" /* <U0001D57F> MATHEMATICAL BOLD FRAKTUR CAPITAL T */ +"\x0001d580" "U" /* <U0001D580> MATHEMATICAL BOLD FRAKTUR CAPITAL U */ +"\x0001d581" "V" /* <U0001D581> MATHEMATICAL BOLD FRAKTUR CAPITAL V */ +"\x0001d582" "W" /* <U0001D582> MATHEMATICAL BOLD FRAKTUR CAPITAL W */ +"\x0001d583" "X" /* <U0001D583> MATHEMATICAL BOLD FRAKTUR CAPITAL X */ +"\x0001d584" "Y" /* <U0001D584> MATHEMATICAL BOLD FRAKTUR CAPITAL Y */ +"\x0001d585" "Z" /* <U0001D585> MATHEMATICAL BOLD FRAKTUR CAPITAL Z */ +"\x0001d586" "a" /* <U0001D586> MATHEMATICAL BOLD FRAKTUR SMALL A */ +"\x0001d587" "b" /* <U0001D587> MATHEMATICAL BOLD FRAKTUR SMALL B */ +"\x0001d588" "c" /* <U0001D588> MATHEMATICAL BOLD FRAKTUR SMALL C */ +"\x0001d589" "d" /* <U0001D589> MATHEMATICAL BOLD FRAKTUR SMALL D */ +"\x0001d58a" "e" /* <U0001D58A> MATHEMATICAL BOLD FRAKTUR SMALL E */ +"\x0001d58b" "f" /* <U0001D58B> MATHEMATICAL BOLD FRAKTUR SMALL F */ +"\x0001d58c" "g" /* <U0001D58C> MATHEMATICAL BOLD FRAKTUR SMALL G */ +"\x0001d58d" "h" /* <U0001D58D> MATHEMATICAL BOLD FRAKTUR SMALL H */ +"\x0001d58e" "i" /* <U0001D58E> MATHEMATICAL BOLD FRAKTUR SMALL I */ +"\x0001d58f" "j" /* <U0001D58F> MATHEMATICAL BOLD FRAKTUR SMALL J */ +"\x0001d590" "k" /* <U0001D590> MATHEMATICAL BOLD FRAKTUR SMALL K */ +"\x0001d591" "l" /* <U0001D591> MATHEMATICAL BOLD FRAKTUR SMALL L */ +"\x0001d592" "m" /* <U0001D592> MATHEMATICAL BOLD FRAKTUR SMALL M */ +"\x0001d593" "n" /* <U0001D593> MATHEMATICAL BOLD FRAKTUR SMALL N */ +"\x0001d594" "o" /* <U0001D594> MATHEMATICAL BOLD FRAKTUR SMALL O */ +"\x0001d595" "p" /* <U0001D595> MATHEMATICAL BOLD FRAKTUR SMALL P */ +"\x0001d596" "q" /* <U0001D596> MATHEMATICAL BOLD FRAKTUR SMALL Q */ +"\x0001d597" "r" /* <U0001D597> MATHEMATICAL BOLD FRAKTUR SMALL R */ +"\x0001d598" "s" /* <U0001D598> MATHEMATICAL BOLD FRAKTUR SMALL S */ +"\x0001d599" "t" /* <U0001D599> MATHEMATICAL BOLD FRAKTUR SMALL T */ +"\x0001d59a" "u" /* <U0001D59A> MATHEMATICAL BOLD FRAKTUR SMALL U */ +"\x0001d59b" "v" /* <U0001D59B> MATHEMATICAL BOLD FRAKTUR SMALL V */ +"\x0001d59c" "w" /* <U0001D59C> MATHEMATICAL BOLD FRAKTUR SMALL W */ +"\x0001d59d" "x" /* <U0001D59D> MATHEMATICAL BOLD FRAKTUR SMALL X */ +"\x0001d59e" "y" /* <U0001D59E> MATHEMATICAL BOLD FRAKTUR SMALL Y */ +"\x0001d59f" "z" /* <U0001D59F> MATHEMATICAL BOLD FRAKTUR SMALL Z */ +"\x0001d5a0" "A" /* <U0001D5A0> MATHEMATICAL SANS-SERIF CAPITAL A */ +"\x0001d5a1" "B" /* <U0001D5A1> MATHEMATICAL SANS-SERIF CAPITAL B */ +"\x0001d5a2" "C" /* <U0001D5A2> MATHEMATICAL SANS-SERIF CAPITAL C */ +"\x0001d5a3" "D" /* <U0001D5A3> MATHEMATICAL SANS-SERIF CAPITAL D */ +"\x0001d5a4" "E" /* <U0001D5A4> MATHEMATICAL SANS-SERIF CAPITAL E */ +"\x0001d5a5" "F" /* <U0001D5A5> MATHEMATICAL SANS-SERIF CAPITAL F */ +"\x0001d5a6" "G" /* <U0001D5A6> MATHEMATICAL SANS-SERIF CAPITAL G */ +"\x0001d5a7" "H" /* <U0001D5A7> MATHEMATICAL SANS-SERIF CAPITAL H */ +"\x0001d5a8" "I" /* <U0001D5A8> MATHEMATICAL SANS-SERIF CAPITAL I */ +"\x0001d5a9" "J" /* <U0001D5A9> MATHEMATICAL SANS-SERIF CAPITAL J */ +"\x0001d5aa" "K" /* <U0001D5AA> MATHEMATICAL SANS-SERIF CAPITAL K */ +"\x0001d5ab" "L" /* <U0001D5AB> MATHEMATICAL SANS-SERIF CAPITAL L */ +"\x0001d5ac" "M" /* <U0001D5AC> MATHEMATICAL SANS-SERIF CAPITAL M */ +"\x0001d5ad" "N" /* <U0001D5AD> MATHEMATICAL SANS-SERIF CAPITAL N */ +"\x0001d5ae" "O" /* <U0001D5AE> MATHEMATICAL SANS-SERIF CAPITAL O */ +"\x0001d5af" "P" /* <U0001D5AF> MATHEMATICAL SANS-SERIF CAPITAL P */ +"\x0001d5b0" "Q" /* <U0001D5B0> MATHEMATICAL SANS-SERIF CAPITAL Q */ +"\x0001d5b1" "R" /* <U0001D5B1> MATHEMATICAL SANS-SERIF CAPITAL R */ +"\x0001d5b2" "S" /* <U0001D5B2> MATHEMATICAL SANS-SERIF CAPITAL S */ +"\x0001d5b3" "T" /* <U0001D5B3> MATHEMATICAL SANS-SERIF CAPITAL T */ +"\x0001d5b4" "U" /* <U0001D5B4> MATHEMATICAL SANS-SERIF CAPITAL U */ +"\x0001d5b5" "V" /* <U0001D5B5> MATHEMATICAL SANS-SERIF CAPITAL V */ +"\x0001d5b6" "W" /* <U0001D5B6> MATHEMATICAL SANS-SERIF CAPITAL W */ +"\x0001d5b7" "X" /* <U0001D5B7> MATHEMATICAL SANS-SERIF CAPITAL X */ +"\x0001d5b8" "Y" /* <U0001D5B8> MATHEMATICAL SANS-SERIF CAPITAL Y */ +"\x0001d5b9" "Z" /* <U0001D5B9> MATHEMATICAL SANS-SERIF CAPITAL Z */ +"\x0001d5ba" "a" /* <U0001D5BA> MATHEMATICAL SANS-SERIF SMALL A */ +"\x0001d5bb" "b" /* <U0001D5BB> MATHEMATICAL SANS-SERIF SMALL B */ +"\x0001d5bc" "c" /* <U0001D5BC> MATHEMATICAL SANS-SERIF SMALL C */ +"\x0001d5bd" "d" /* <U0001D5BD> MATHEMATICAL SANS-SERIF SMALL D */ +"\x0001d5be" "e" /* <U0001D5BE> MATHEMATICAL SANS-SERIF SMALL E */ +"\x0001d5bf" "f" /* <U0001D5BF> MATHEMATICAL SANS-SERIF SMALL F */ +"\x0001d5c0" "g" /* <U0001D5C0> MATHEMATICAL SANS-SERIF SMALL G */ +"\x0001d5c1" "h" /* <U0001D5C1> MATHEMATICAL SANS-SERIF SMALL H */ +"\x0001d5c2" "i" /* <U0001D5C2> MATHEMATICAL SANS-SERIF SMALL I */ +"\x0001d5c3" "j" /* <U0001D5C3> MATHEMATICAL SANS-SERIF SMALL J */ +"\x0001d5c4" "k" /* <U0001D5C4> MATHEMATICAL SANS-SERIF SMALL K */ +"\x0001d5c5" "l" /* <U0001D5C5> MATHEMATICAL SANS-SERIF SMALL L */ +"\x0001d5c6" "m" /* <U0001D5C6> MATHEMATICAL SANS-SERIF SMALL M */ +"\x0001d5c7" "n" /* <U0001D5C7> MATHEMATICAL SANS-SERIF SMALL N */ +"\x0001d5c8" "o" /* <U0001D5C8> MATHEMATICAL SANS-SERIF SMALL O */ +"\x0001d5c9" "p" /* <U0001D5C9> MATHEMATICAL SANS-SERIF SMALL P */ +"\x0001d5ca" "q" /* <U0001D5CA> MATHEMATICAL SANS-SERIF SMALL Q */ +"\x0001d5cb" "r" /* <U0001D5CB> MATHEMATICAL SANS-SERIF SMALL R */ +"\x0001d5cc" "s" /* <U0001D5CC> MATHEMATICAL SANS-SERIF SMALL S */ +"\x0001d5cd" "t" /* <U0001D5CD> MATHEMATICAL SANS-SERIF SMALL T */ +"\x0001d5ce" "u" /* <U0001D5CE> MATHEMATICAL SANS-SERIF SMALL U */ +"\x0001d5cf" "v" /* <U0001D5CF> MATHEMATICAL SANS-SERIF SMALL V */ +"\x0001d5d0" "w" /* <U0001D5D0> MATHEMATICAL SANS-SERIF SMALL W */ +"\x0001d5d1" "x" /* <U0001D5D1> MATHEMATICAL SANS-SERIF SMALL X */ +"\x0001d5d2" "y" /* <U0001D5D2> MATHEMATICAL SANS-SERIF SMALL Y */ +"\x0001d5d3" "z" /* <U0001D5D3> MATHEMATICAL SANS-SERIF SMALL Z */ +"\x0001d5d4" "A" /* <U0001D5D4> MATHEMATICAL SANS-SERIF BOLD CAPITAL A */ +"\x0001d5d5" "B" /* <U0001D5D5> MATHEMATICAL SANS-SERIF BOLD CAPITAL B */ +"\x0001d5d6" "C" /* <U0001D5D6> MATHEMATICAL SANS-SERIF BOLD CAPITAL C */ +"\x0001d5d7" "D" /* <U0001D5D7> MATHEMATICAL SANS-SERIF BOLD CAPITAL D */ +"\x0001d5d8" "E" /* <U0001D5D8> MATHEMATICAL SANS-SERIF BOLD CAPITAL E */ +"\x0001d5d9" "F" /* <U0001D5D9> MATHEMATICAL SANS-SERIF BOLD CAPITAL F */ +"\x0001d5da" "G" /* <U0001D5DA> MATHEMATICAL SANS-SERIF BOLD CAPITAL G */ +"\x0001d5db" "H" /* <U0001D5DB> MATHEMATICAL SANS-SERIF BOLD CAPITAL H */ +"\x0001d5dc" "I" /* <U0001D5DC> MATHEMATICAL SANS-SERIF BOLD CAPITAL I */ +"\x0001d5dd" "J" /* <U0001D5DD> MATHEMATICAL SANS-SERIF BOLD CAPITAL J */ +"\x0001d5de" "K" /* <U0001D5DE> MATHEMATICAL SANS-SERIF BOLD CAPITAL K */ +"\x0001d5df" "L" /* <U0001D5DF> MATHEMATICAL SANS-SERIF BOLD CAPITAL L */ +"\x0001d5e0" "M" /* <U0001D5E0> MATHEMATICAL SANS-SERIF BOLD CAPITAL M */ +"\x0001d5e1" "N" /* <U0001D5E1> MATHEMATICAL SANS-SERIF BOLD CAPITAL N */ +"\x0001d5e2" "O" /* <U0001D5E2> MATHEMATICAL SANS-SERIF BOLD CAPITAL O */ +"\x0001d5e3" "P" /* <U0001D5E3> MATHEMATICAL SANS-SERIF BOLD CAPITAL P */ +"\x0001d5e4" "Q" /* <U0001D5E4> MATHEMATICAL SANS-SERIF BOLD CAPITAL Q */ +"\x0001d5e5" "R" /* <U0001D5E5> MATHEMATICAL SANS-SERIF BOLD CAPITAL R */ +"\x0001d5e6" "S" /* <U0001D5E6> MATHEMATICAL SANS-SERIF BOLD CAPITAL S */ +"\x0001d5e7" "T" /* <U0001D5E7> MATHEMATICAL SANS-SERIF BOLD CAPITAL T */ +"\x0001d5e8" "U" /* <U0001D5E8> MATHEMATICAL SANS-SERIF BOLD CAPITAL U */ +"\x0001d5e9" "V" /* <U0001D5E9> MATHEMATICAL SANS-SERIF BOLD CAPITAL V */ +"\x0001d5ea" "W" /* <U0001D5EA> MATHEMATICAL SANS-SERIF BOLD CAPITAL W */ +"\x0001d5eb" "X" /* <U0001D5EB> MATHEMATICAL SANS-SERIF BOLD CAPITAL X */ +"\x0001d5ec" "Y" /* <U0001D5EC> MATHEMATICAL SANS-SERIF BOLD CAPITAL Y */ +"\x0001d5ed" "Z" /* <U0001D5ED> MATHEMATICAL SANS-SERIF BOLD CAPITAL Z */ +"\x0001d5ee" "a" /* <U0001D5EE> MATHEMATICAL SANS-SERIF BOLD SMALL A */ +"\x0001d5ef" "b" /* <U0001D5EF> MATHEMATICAL SANS-SERIF BOLD SMALL B */ +"\x0001d5f0" "c" /* <U0001D5F0> MATHEMATICAL SANS-SERIF BOLD SMALL C */ +"\x0001d5f1" "d" /* <U0001D5F1> MATHEMATICAL SANS-SERIF BOLD SMALL D */ +"\x0001d5f2" "e" /* <U0001D5F2> MATHEMATICAL SANS-SERIF BOLD SMALL E */ +"\x0001d5f3" "f" /* <U0001D5F3> MATHEMATICAL SANS-SERIF BOLD SMALL F */ +"\x0001d5f4" "g" /* <U0001D5F4> MATHEMATICAL SANS-SERIF BOLD SMALL G */ +"\x0001d5f5" "h" /* <U0001D5F5> MATHEMATICAL SANS-SERIF BOLD SMALL H */ +"\x0001d5f6" "i" /* <U0001D5F6> MATHEMATICAL SANS-SERIF BOLD SMALL I */ +"\x0001d5f7" "j" /* <U0001D5F7> MATHEMATICAL SANS-SERIF BOLD SMALL J */ +"\x0001d5f8" "k" /* <U0001D5F8> MATHEMATICAL SANS-SERIF BOLD SMALL K */ +"\x0001d5f9" "l" /* <U0001D5F9> MATHEMATICAL SANS-SERIF BOLD SMALL L */ +"\x0001d5fa" "m" /* <U0001D5FA> MATHEMATICAL SANS-SERIF BOLD SMALL M */ +"\x0001d5fb" "n" /* <U0001D5FB> MATHEMATICAL SANS-SERIF BOLD SMALL N */ +"\x0001d5fc" "o" /* <U0001D5FC> MATHEMATICAL SANS-SERIF BOLD SMALL O */ +"\x0001d5fd" "p" /* <U0001D5FD> MATHEMATICAL SANS-SERIF BOLD SMALL P */ +"\x0001d5fe" "q" /* <U0001D5FE> MATHEMATICAL SANS-SERIF BOLD SMALL Q */ +"\x0001d5ff" "r" /* <U0001D5FF> MATHEMATICAL SANS-SERIF BOLD SMALL R */ +"\x0001d600" "s" /* <U0001D600> MATHEMATICAL SANS-SERIF BOLD SMALL S */ +"\x0001d601" "t" /* <U0001D601> MATHEMATICAL SANS-SERIF BOLD SMALL T */ +"\x0001d602" "u" /* <U0001D602> MATHEMATICAL SANS-SERIF BOLD SMALL U */ +"\x0001d603" "v" /* <U0001D603> MATHEMATICAL SANS-SERIF BOLD SMALL V */ +"\x0001d604" "w" /* <U0001D604> MATHEMATICAL SANS-SERIF BOLD SMALL W */ +"\x0001d605" "x" /* <U0001D605> MATHEMATICAL SANS-SERIF BOLD SMALL X */ +"\x0001d606" "y" /* <U0001D606> MATHEMATICAL SANS-SERIF BOLD SMALL Y */ +"\x0001d607" "z" /* <U0001D607> MATHEMATICAL SANS-SERIF BOLD SMALL Z */ +"\x0001d608" "A" /* <U0001D608> MATHEMATICAL SANS-SERIF ITALIC CAPITAL A */ +"\x0001d609" "B" /* <U0001D609> MATHEMATICAL SANS-SERIF ITALIC CAPITAL B */ +"\x0001d60a" "C" /* <U0001D60A> MATHEMATICAL SANS-SERIF ITALIC CAPITAL C */ +"\x0001d60b" "D" /* <U0001D60B> MATHEMATICAL SANS-SERIF ITALIC CAPITAL D */ +"\x0001d60c" "E" /* <U0001D60C> MATHEMATICAL SANS-SERIF ITALIC CAPITAL E */ +"\x0001d60d" "F" /* <U0001D60D> MATHEMATICAL SANS-SERIF ITALIC CAPITAL F */ +"\x0001d60e" "G" /* <U0001D60E> MATHEMATICAL SANS-SERIF ITALIC CAPITAL G */ +"\x0001d60f" "H" /* <U0001D60F> MATHEMATICAL SANS-SERIF ITALIC CAPITAL H */ +"\x0001d610" "I" /* <U0001D610> MATHEMATICAL SANS-SERIF ITALIC CAPITAL I */ +"\x0001d611" "J" /* <U0001D611> MATHEMATICAL SANS-SERIF ITALIC CAPITAL J */ +"\x0001d612" "K" /* <U0001D612> MATHEMATICAL SANS-SERIF ITALIC CAPITAL K */ +"\x0001d613" "L" /* <U0001D613> MATHEMATICAL SANS-SERIF ITALIC CAPITAL L */ +"\x0001d614" "M" /* <U0001D614> MATHEMATICAL SANS-SERIF ITALIC CAPITAL M */ +"\x0001d615" "N" /* <U0001D615> MATHEMATICAL SANS-SERIF ITALIC CAPITAL N */ +"\x0001d616" "O" /* <U0001D616> MATHEMATICAL SANS-SERIF ITALIC CAPITAL O */ +"\x0001d617" "P" /* <U0001D617> MATHEMATICAL SANS-SERIF ITALIC CAPITAL P */ +"\x0001d618" "Q" /* <U0001D618> MATHEMATICAL SANS-SERIF ITALIC CAPITAL Q */ +"\x0001d619" "R" /* <U0001D619> MATHEMATICAL SANS-SERIF ITALIC CAPITAL R */ +"\x0001d61a" "S" /* <U0001D61A> MATHEMATICAL SANS-SERIF ITALIC CAPITAL S */ +"\x0001d61b" "T" /* <U0001D61B> MATHEMATICAL SANS-SERIF ITALIC CAPITAL T */ +"\x0001d61c" "U" /* <U0001D61C> MATHEMATICAL SANS-SERIF ITALIC CAPITAL U */ +"\x0001d61d" "V" /* <U0001D61D> MATHEMATICAL SANS-SERIF ITALIC CAPITAL V */ +"\x0001d61e" "W" /* <U0001D61E> MATHEMATICAL SANS-SERIF ITALIC CAPITAL W */ +"\x0001d61f" "X" /* <U0001D61F> MATHEMATICAL SANS-SERIF ITALIC CAPITAL X */ +"\x0001d620" "Y" /* <U0001D620> MATHEMATICAL SANS-SERIF ITALIC CAPITAL Y */ +"\x0001d621" "Z" /* <U0001D621> MATHEMATICAL SANS-SERIF ITALIC CAPITAL Z */ +"\x0001d622" "a" /* <U0001D622> MATHEMATICAL SANS-SERIF ITALIC SMALL A */ +"\x0001d623" "b" /* <U0001D623> MATHEMATICAL SANS-SERIF ITALIC SMALL B */ +"\x0001d624" "c" /* <U0001D624> MATHEMATICAL SANS-SERIF ITALIC SMALL C */ +"\x0001d625" "d" /* <U0001D625> MATHEMATICAL SANS-SERIF ITALIC SMALL D */ +"\x0001d626" "e" /* <U0001D626> MATHEMATICAL SANS-SERIF ITALIC SMALL E */ +"\x0001d627" "f" /* <U0001D627> MATHEMATICAL SANS-SERIF ITALIC SMALL F */ +"\x0001d628" "g" /* <U0001D628> MATHEMATICAL SANS-SERIF ITALIC SMALL G */ +"\x0001d629" "h" /* <U0001D629> MATHEMATICAL SANS-SERIF ITALIC SMALL H */ +"\x0001d62a" "i" /* <U0001D62A> MATHEMATICAL SANS-SERIF ITALIC SMALL I */ +"\x0001d62b" "j" /* <U0001D62B> MATHEMATICAL SANS-SERIF ITALIC SMALL J */ +"\x0001d62c" "k" /* <U0001D62C> MATHEMATICAL SANS-SERIF ITALIC SMALL K */ +"\x0001d62d" "l" /* <U0001D62D> MATHEMATICAL SANS-SERIF ITALIC SMALL L */ +"\x0001d62e" "m" /* <U0001D62E> MATHEMATICAL SANS-SERIF ITALIC SMALL M */ +"\x0001d62f" "n" /* <U0001D62F> MATHEMATICAL SANS-SERIF ITALIC SMALL N */ +"\x0001d630" "o" /* <U0001D630> MATHEMATICAL SANS-SERIF ITALIC SMALL O */ +"\x0001d631" "p" /* <U0001D631> MATHEMATICAL SANS-SERIF ITALIC SMALL P */ +"\x0001d632" "q" /* <U0001D632> MATHEMATICAL SANS-SERIF ITALIC SMALL Q */ +"\x0001d633" "r" /* <U0001D633> MATHEMATICAL SANS-SERIF ITALIC SMALL R */ +"\x0001d634" "s" /* <U0001D634> MATHEMATICAL SANS-SERIF ITALIC SMALL S */ +"\x0001d635" "t" /* <U0001D635> MATHEMATICAL SANS-SERIF ITALIC SMALL T */ +"\x0001d636" "u" /* <U0001D636> MATHEMATICAL SANS-SERIF ITALIC SMALL U */ +"\x0001d637" "v" /* <U0001D637> MATHEMATICAL SANS-SERIF ITALIC SMALL V */ +"\x0001d638" "w" /* <U0001D638> MATHEMATICAL SANS-SERIF ITALIC SMALL W */ +"\x0001d639" "x" /* <U0001D639> MATHEMATICAL SANS-SERIF ITALIC SMALL X */ +"\x0001d63a" "y" /* <U0001D63A> MATHEMATICAL SANS-SERIF ITALIC SMALL Y */ +"\x0001d63b" "z" /* <U0001D63B> MATHEMATICAL SANS-SERIF ITALIC SMALL Z */ +"\x0001d63c" "A" /* <U0001D63C> MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL A */ +"\x0001d63d" "B" /* <U0001D63D> MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL B */ +"\x0001d63e" "C" /* <U0001D63E> MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL C */ +"\x0001d63f" "D" /* <U0001D63F> MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL D */ +"\x0001d640" "E" /* <U0001D640> MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL E */ +"\x0001d641" "F" /* <U0001D641> MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL F */ +"\x0001d642" "G" /* <U0001D642> MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL G */ +"\x0001d643" "H" /* <U0001D643> MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL H */ +"\x0001d644" "I" /* <U0001D644> MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL I */ +"\x0001d645" "J" /* <U0001D645> MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL J */ +"\x0001d646" "K" /* <U0001D646> MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL K */ +"\x0001d647" "L" /* <U0001D647> MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL L */ +"\x0001d648" "M" /* <U0001D648> MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL M */ +"\x0001d649" "N" /* <U0001D649> MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL N */ +"\x0001d64a" "O" /* <U0001D64A> MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL O */ +"\x0001d64b" "P" /* <U0001D64B> MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL P */ +"\x0001d64c" "Q" /* <U0001D64C> MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL Q */ +"\x0001d64d" "R" /* <U0001D64D> MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL R */ +"\x0001d64e" "S" /* <U0001D64E> MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL S */ +"\x0001d64f" "T" /* <U0001D64F> MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL T */ +"\x0001d650" "U" /* <U0001D650> MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL U */ +"\x0001d651" "V" /* <U0001D651> MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL V */ +"\x0001d652" "W" /* <U0001D652> MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL W */ +"\x0001d653" "X" /* <U0001D653> MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL X */ +"\x0001d654" "Y" /* <U0001D654> MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL Y */ +"\x0001d655" "Z" /* <U0001D655> MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL Z */ +"\x0001d656" "a" /* <U0001D656> MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL A */ +"\x0001d657" "b" /* <U0001D657> MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL B */ +"\x0001d658" "c" /* <U0001D658> MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL C */ +"\x0001d659" "d" /* <U0001D659> MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL D */ +"\x0001d65a" "e" /* <U0001D65A> MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL E */ +"\x0001d65b" "f" /* <U0001D65B> MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL F */ +"\x0001d65c" "g" /* <U0001D65C> MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL G */ +"\x0001d65d" "h" /* <U0001D65D> MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL H */ +"\x0001d65e" "i" /* <U0001D65E> MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL I */ +"\x0001d65f" "j" /* <U0001D65F> MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL J */ +"\x0001d660" "k" /* <U0001D660> MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL K */ +"\x0001d661" "l" /* <U0001D661> MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL L */ +"\x0001d662" "m" /* <U0001D662> MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL M */ +"\x0001d663" "n" /* <U0001D663> MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL N */ +"\x0001d664" "o" /* <U0001D664> MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL O */ +"\x0001d665" "p" /* <U0001D665> MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL P */ +"\x0001d666" "q" /* <U0001D666> MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL Q */ +"\x0001d667" "r" /* <U0001D667> MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL R */ +"\x0001d668" "s" /* <U0001D668> MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL S */ +"\x0001d669" "t" /* <U0001D669> MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL T */ +"\x0001d66a" "u" /* <U0001D66A> MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL U */ +"\x0001d66b" "v" /* <U0001D66B> MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL V */ +"\x0001d66c" "w" /* <U0001D66C> MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL W */ +"\x0001d66d" "x" /* <U0001D66D> MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL X */ +"\x0001d66e" "y" /* <U0001D66E> MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL Y */ +"\x0001d66f" "z" /* <U0001D66F> MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL Z */ +"\x0001d670" "A" /* <U0001D670> MATHEMATICAL MONOSPACE CAPITAL A */ +"\x0001d671" "B" /* <U0001D671> MATHEMATICAL MONOSPACE CAPITAL B */ +"\x0001d672" "C" /* <U0001D672> MATHEMATICAL MONOSPACE CAPITAL C */ +"\x0001d673" "D" /* <U0001D673> MATHEMATICAL MONOSPACE CAPITAL D */ +"\x0001d674" "E" /* <U0001D674> MATHEMATICAL MONOSPACE CAPITAL E */ +"\x0001d675" "F" /* <U0001D675> MATHEMATICAL MONOSPACE CAPITAL F */ +"\x0001d676" "G" /* <U0001D676> MATHEMATICAL MONOSPACE CAPITAL G */ +"\x0001d677" "H" /* <U0001D677> MATHEMATICAL MONOSPACE CAPITAL H */ +"\x0001d678" "I" /* <U0001D678> MATHEMATICAL MONOSPACE CAPITAL I */ +"\x0001d679" "J" /* <U0001D679> MATHEMATICAL MONOSPACE CAPITAL J */ +"\x0001d67a" "K" /* <U0001D67A> MATHEMATICAL MONOSPACE CAPITAL K */ +"\x0001d67b" "L" /* <U0001D67B> MATHEMATICAL MONOSPACE CAPITAL L */ +"\x0001d67c" "M" /* <U0001D67C> MATHEMATICAL MONOSPACE CAPITAL M */ +"\x0001d67d" "N" /* <U0001D67D> MATHEMATICAL MONOSPACE CAPITAL N */ +"\x0001d67e" "O" /* <U0001D67E> MATHEMATICAL MONOSPACE CAPITAL O */ +"\x0001d67f" "P" /* <U0001D67F> MATHEMATICAL MONOSPACE CAPITAL P */ +"\x0001d680" "Q" /* <U0001D680> MATHEMATICAL MONOSPACE CAPITAL Q */ +"\x0001d681" "R" /* <U0001D681> MATHEMATICAL MONOSPACE CAPITAL R */ +"\x0001d682" "S" /* <U0001D682> MATHEMATICAL MONOSPACE CAPITAL S */ +"\x0001d683" "T" /* <U0001D683> MATHEMATICAL MONOSPACE CAPITAL T */ +"\x0001d684" "U" /* <U0001D684> MATHEMATICAL MONOSPACE CAPITAL U */ +"\x0001d685" "V" /* <U0001D685> MATHEMATICAL MONOSPACE CAPITAL V */ +"\x0001d686" "W" /* <U0001D686> MATHEMATICAL MONOSPACE CAPITAL W */ +"\x0001d687" "X" /* <U0001D687> MATHEMATICAL MONOSPACE CAPITAL X */ +"\x0001d688" "Y" /* <U0001D688> MATHEMATICAL MONOSPACE CAPITAL Y */ +"\x0001d689" "Z" /* <U0001D689> MATHEMATICAL MONOSPACE CAPITAL Z */ +"\x0001d68a" "a" /* <U0001D68A> MATHEMATICAL MONOSPACE SMALL A */ +"\x0001d68b" "b" /* <U0001D68B> MATHEMATICAL MONOSPACE SMALL B */ +"\x0001d68c" "c" /* <U0001D68C> MATHEMATICAL MONOSPACE SMALL C */ +"\x0001d68d" "d" /* <U0001D68D> MATHEMATICAL MONOSPACE SMALL D */ +"\x0001d68e" "e" /* <U0001D68E> MATHEMATICAL MONOSPACE SMALL E */ +"\x0001d68f" "f" /* <U0001D68F> MATHEMATICAL MONOSPACE SMALL F */ +"\x0001d690" "g" /* <U0001D690> MATHEMATICAL MONOSPACE SMALL G */ +"\x0001d691" "h" /* <U0001D691> MATHEMATICAL MONOSPACE SMALL H */ +"\x0001d692" "i" /* <U0001D692> MATHEMATICAL MONOSPACE SMALL I */ +"\x0001d693" "j" /* <U0001D693> MATHEMATICAL MONOSPACE SMALL J */ +"\x0001d694" "k" /* <U0001D694> MATHEMATICAL MONOSPACE SMALL K */ +"\x0001d695" "l" /* <U0001D695> MATHEMATICAL MONOSPACE SMALL L */ +"\x0001d696" "m" /* <U0001D696> MATHEMATICAL MONOSPACE SMALL M */ +"\x0001d697" "n" /* <U0001D697> MATHEMATICAL MONOSPACE SMALL N */ +"\x0001d698" "o" /* <U0001D698> MATHEMATICAL MONOSPACE SMALL O */ +"\x0001d699" "p" /* <U0001D699> MATHEMATICAL MONOSPACE SMALL P */ +"\x0001d69a" "q" /* <U0001D69A> MATHEMATICAL MONOSPACE SMALL Q */ +"\x0001d69b" "r" /* <U0001D69B> MATHEMATICAL MONOSPACE SMALL R */ +"\x0001d69c" "s" /* <U0001D69C> MATHEMATICAL MONOSPACE SMALL S */ +"\x0001d69d" "t" /* <U0001D69D> MATHEMATICAL MONOSPACE SMALL T */ +"\x0001d69e" "u" /* <U0001D69E> MATHEMATICAL MONOSPACE SMALL U */ +"\x0001d69f" "v" /* <U0001D69F> MATHEMATICAL MONOSPACE SMALL V */ +"\x0001d6a0" "w" /* <U0001D6A0> MATHEMATICAL MONOSPACE SMALL W */ +"\x0001d6a1" "x" /* <U0001D6A1> MATHEMATICAL MONOSPACE SMALL X */ +"\x0001d6a2" "y" /* <U0001D6A2> MATHEMATICAL MONOSPACE SMALL Y */ +"\x0001d6a3" "z" /* <U0001D6A3> MATHEMATICAL MONOSPACE SMALL Z */ +"\x0001d7ce" "0" /* <U0001D7CE> MATHEMATICAL BOLD DIGIT ZERO */ +"\x0001d7cf" "1" /* <U0001D7CF> MATHEMATICAL BOLD DIGIT ONE */ +"\x0001d7d0" "2" /* <U0001D7D0> MATHEMATICAL BOLD DIGIT TWO */ +"\x0001d7d1" "3" /* <U0001D7D1> MATHEMATICAL BOLD DIGIT THREE */ +"\x0001d7d2" "4" /* <U0001D7D2> MATHEMATICAL BOLD DIGIT FOUR */ +"\x0001d7d3" "5" /* <U0001D7D3> MATHEMATICAL BOLD DIGIT FIVE */ +"\x0001d7d4" "6" /* <U0001D7D4> MATHEMATICAL BOLD DIGIT SIX */ +"\x0001d7d5" "7" /* <U0001D7D5> MATHEMATICAL BOLD DIGIT SEVEN */ +"\x0001d7d6" "8" /* <U0001D7D6> MATHEMATICAL BOLD DIGIT EIGHT */ +"\x0001d7d7" "9" /* <U0001D7D7> MATHEMATICAL BOLD DIGIT NINE */ +"\x0001d7d8" "0" /* <U0001D7D8> MATHEMATICAL DOUBLE-STRUCK DIGIT ZERO */ +"\x0001d7d9" "1" /* <U0001D7D9> MATHEMATICAL DOUBLE-STRUCK DIGIT ONE */ +"\x0001d7da" "2" /* <U0001D7DA> MATHEMATICAL DOUBLE-STRUCK DIGIT TWO */ +"\x0001d7db" "3" /* <U0001D7DB> MATHEMATICAL DOUBLE-STRUCK DIGIT THREE */ +"\x0001d7dc" "4" /* <U0001D7DC> MATHEMATICAL DOUBLE-STRUCK DIGIT FOUR */ +"\x0001d7dd" "5" /* <U0001D7DD> MATHEMATICAL DOUBLE-STRUCK DIGIT FIVE */ +"\x0001d7de" "6" /* <U0001D7DE> MATHEMATICAL DOUBLE-STRUCK DIGIT SIX */ +"\x0001d7df" "7" /* <U0001D7DF> MATHEMATICAL DOUBLE-STRUCK DIGIT SEVEN */ +"\x0001d7e0" "8" /* <U0001D7E0> MATHEMATICAL DOUBLE-STRUCK DIGIT EIGHT */ +"\x0001d7e1" "9" /* <U0001D7E1> MATHEMATICAL DOUBLE-STRUCK DIGIT NINE */ +"\x0001d7e2" "0" /* <U0001D7E2> MATHEMATICAL SANS-SERIF DIGIT ZERO */ +"\x0001d7e3" "1" /* <U0001D7E3> MATHEMATICAL SANS-SERIF DIGIT ONE */ +"\x0001d7e4" "2" /* <U0001D7E4> MATHEMATICAL SANS-SERIF DIGIT TWO */ +"\x0001d7e5" "3" /* <U0001D7E5> MATHEMATICAL SANS-SERIF DIGIT THREE */ +"\x0001d7e6" "4" /* <U0001D7E6> MATHEMATICAL SANS-SERIF DIGIT FOUR */ +"\x0001d7e7" "5" /* <U0001D7E7> MATHEMATICAL SANS-SERIF DIGIT FIVE */ +"\x0001d7e8" "6" /* <U0001D7E8> MATHEMATICAL SANS-SERIF DIGIT SIX */ +"\x0001d7e9" "7" /* <U0001D7E9> MATHEMATICAL SANS-SERIF DIGIT SEVEN */ +"\x0001d7ea" "8" /* <U0001D7EA> MATHEMATICAL SANS-SERIF DIGIT EIGHT */ +"\x0001d7eb" "9" /* <U0001D7EB> MATHEMATICAL SANS-SERIF DIGIT NINE */ +"\x0001d7ec" "0" /* <U0001D7EC> MATHEMATICAL SANS-SERIF BOLD DIGIT ZERO */ +"\x0001d7ed" "1" /* <U0001D7ED> MATHEMATICAL SANS-SERIF BOLD DIGIT ONE */ +"\x0001d7ee" "2" /* <U0001D7EE> MATHEMATICAL SANS-SERIF BOLD DIGIT TWO */ +"\x0001d7ef" "3" /* <U0001D7EF> MATHEMATICAL SANS-SERIF BOLD DIGIT THREE */ +"\x0001d7f0" "4" /* <U0001D7F0> MATHEMATICAL SANS-SERIF BOLD DIGIT FOUR */ +"\x0001d7f1" "5" /* <U0001D7F1> MATHEMATICAL SANS-SERIF BOLD DIGIT FIVE */ +"\x0001d7f2" "6" /* <U0001D7F2> MATHEMATICAL SANS-SERIF BOLD DIGIT SIX */ +"\x0001d7f3" "7" /* <U0001D7F3> MATHEMATICAL SANS-SERIF BOLD DIGIT SEVEN */ +"\x0001d7f4" "8" /* <U0001D7F4> MATHEMATICAL SANS-SERIF BOLD DIGIT EIGHT */ +"\x0001d7f5" "9" /* <U0001D7F5> MATHEMATICAL SANS-SERIF BOLD DIGIT NINE */ +"\x0001d7f6" "0" /* <U0001D7F6> MATHEMATICAL MONOSPACE DIGIT ZERO */ +"\x0001d7f7" "1" /* <U0001D7F7> MATHEMATICAL MONOSPACE DIGIT ONE */ +"\x0001d7f8" "2" /* <U0001D7F8> MATHEMATICAL MONOSPACE DIGIT TWO */ +"\x0001d7f9" "3" /* <U0001D7F9> MATHEMATICAL MONOSPACE DIGIT THREE */ +"\x0001d7fa" "4" /* <U0001D7FA> MATHEMATICAL MONOSPACE DIGIT FOUR */ +"\x0001d7fb" "5" /* <U0001D7FB> MATHEMATICAL MONOSPACE DIGIT FIVE */ +"\x0001d7fc" "6" /* <U0001D7FC> MATHEMATICAL MONOSPACE DIGIT SIX */ +"\x0001d7fd" "7" /* <U0001D7FD> MATHEMATICAL MONOSPACE DIGIT SEVEN */ +"\x0001d7fe" "8" /* <U0001D7FE> MATHEMATICAL MONOSPACE DIGIT EIGHT */ +"\x0001d7ff" "9" /* <U0001D7FF> MATHEMATICAL MONOSPACE DIGIT NINE */ diff --git a/REORG.TODO/locale/C_name.c b/REORG.TODO/locale/C_name.c new file mode 100644 index 0000000000..7612544f2f --- /dev/null +++ b/REORG.TODO/locale/C_name.c @@ -0,0 +1,11 @@ +/* Define a constant for the name of the standard C locale, so the string + constant is not repeated in dozens of object files. */ + +#include "localeinfo.h" + +/* Name of our standard locale. */ +const char _nl_C_name[] = "C"; +const char _nl_POSIX_name[] = "POSIX"; + +/* The standard codeset. */ +const char _nl_C_codeset[] = "ANSI_X3.4-1968"; diff --git a/REORG.TODO/locale/Makefile b/REORG.TODO/locale/Makefile new file mode 100644 index 0000000000..d9ef48fe93 --- /dev/null +++ b/REORG.TODO/locale/Makefile @@ -0,0 +1,104 @@ +# Copyright (C) 1991-2017 Free Software Foundation, Inc. +# This file is part of the GNU C Library. + +# The GNU C Library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. + +# The GNU C Library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with the GNU C Library; if not, see +# <http://www.gnu.org/licenses/>. + +# +# Makefile for locales. +# +subdir := locale + +include ../Makeconfig + +headers = locale.h bits/locale.h langinfo.h xlocale.h +routines = setlocale findlocale loadlocale loadarchive \ + localeconv nl_langinfo nl_langinfo_l mb_cur_max \ + newlocale duplocale freelocale uselocale +tests = tst-C-locale tst-locname tst-duplocale +categories = ctype messages monetary numeric time paper name \ + address telephone measurement identification collate +aux = $(categories:%=lc-%) $(categories:%=C-%) SYS_libc C_name \ + xlocale localename global-locale coll-lookup +others = localedef locale +#others-static = localedef locale +install-bin = localedef locale +extra-objs = $(localedef-modules:=.o) $(localedef-aux:=.o) \ + $(locale-modules:=.o) $(lib-modules:=.o) + +extra-libs = libBrokenLocale +extra-libs-others = $(extra-libs) + +libBrokenLocale-routines = broken_cur_max + +subdir-dirs = programs +vpath %.c programs ../crypt +vpath %.h programs +vpath %.gperf programs + +localedef-modules := localedef $(categories:%=ld-%) \ + charmap linereader locfile \ + repertoire locarchive +localedef-aux := md5 +locale-modules := locale locale-spec +lib-modules := charmap-dir simple-hash xmalloc xstrdup + + +GPERF = gperf +GPERFFLAGS = -acCgopt -k1,2,5,9,$$ -L ANSI-C + +include ../Rules + +CFLAGS-md5.c = -I../crypt + +programs/%-kw.h: programs/%-kw.gperf + cd programs \ + && $(GPERF) $(GPERFFLAGS) -N $(@F:-kw.h=_hash) $(<F) > $(@F).new + mv -f $@.new $@ + +$(objpfx)localedef: $(localedef-modules:%=$(objpfx)%.o) +$(objpfx)localedef: $(localedef-aux:%=$(objpfx)%.o) +$(objpfx)locale: $(locale-modules:%=$(objpfx)%.o) +$(objpfx)localedef $(objpfx)locale: $(lib-modules:%=$(objpfx)%.o) + +C-translit.h: C-translit.h.in gen-translit.pl + $(PERL) gen-translit.pl < $< > $@.tmp + mv -f $@.tmp $@ + +# The path to the compiled binary locale archive or compiled locales, +# along with the parent path to the source locales and source +# charmaps. +localepath = "$(complocaledir):$(i18ndir)" + +# -Iprograms doesn't really belong here, but this gets it at the head +# of the list instead of the tail, where CPPFLAGS-$(lib) gets added. +# We need it before the standard -I's to see programs/config.h first. +locale-CPPFLAGS = -DCOMPLOCALEDIR='"$(complocaledir)"' \ + -DLOCALE_ALIAS_PATH='"$(localedir)"' \ + -Iprograms + +CPPFLAGS-locale-programs = -DLOCALE_PATH='$(localepath)' \ + -DCHARMAP_PATH='"$(i18ndir)/charmaps"' \ + -DREPERTOIREMAP_PATH='"$(i18ndir)/repertoiremaps"' \ + -DLOCSRCDIR='"$(i18ndir)/locales"' + +CFLAGS-charmap.c = -Wno-write-strings -Wno-char-subscripts +CFLAGS-locfile.c = -Wno-write-strings -Wno-char-subscripts +CFLAGS-charmap-dir.c = -Wno-write-strings + +# Set libof-* for each routine. +cpp-srcs-left := $(localedef-modules) $(localedef-aux) $(locale-modules) \ + $(lib-modules) +lib := locale-programs +include $(patsubst %,$(..)libof-iterator.mk,$(cpp-srcs-left)) diff --git a/REORG.TODO/locale/SYS_libc.c b/REORG.TODO/locale/SYS_libc.c new file mode 100644 index 0000000000..a879b68a25 --- /dev/null +++ b/REORG.TODO/locale/SYS_libc.c @@ -0,0 +1,7 @@ +/* Define a constant for the dgettext domainname for libc internal messages, + so the string constant is not repeated in dozens of object files. */ + +#include <libintl.h> + +const char _libc_intl_domainname[] = "libc"; +libc_hidden_data_def (_libc_intl_domainname) diff --git a/REORG.TODO/locale/Versions b/REORG.TODO/locale/Versions new file mode 100644 index 0000000000..72119349c1 --- /dev/null +++ b/REORG.TODO/locale/Versions @@ -0,0 +1,74 @@ +libBrokenLocale { + GLIBC_2.0 { + __ctype_get_mb_cur_max; + } +} + +libc { + GLIBC_2.0 { + # global variables + _libc_intl_domainname; + + # functions used in inline functions or macros + __ctype_get_mb_cur_max; + + # l* + localeconv; + + # n* + nl_langinfo; + + # s* + setlocale; + } + GLIBC_2.1 { + # functions from the experimental locale implementation + __newlocale; __duplocale; __freelocale; + __isalnum_l; __isalpha_l; __isascii_l; __isblank_l; __iscntrl_l; + __isdigit_l; __isgraph_l; __islower_l; __isprint_l; __ispunct_l; + __isspace_l; __isupper_l; __iswalnum_l; __iswalpha_l; __iswblank_l; + __iswcntrl_l; __iswctype_l; __iswdigit_l; __iswgraph_l; __iswlower_l; + __iswprint_l; __iswpunct_l; __iswspace_l; __iswupper_l; __iswxdigit_l; + __isxdigit_l; __strcasecmp_l; __strcoll_l; __strfmon_l; __strncasecmp_l; + __strtod_l; __strtof_l; __strtol_l; __strtold_l; __strtoll_l; __strtoul_l; + __strtoull_l; __strxfrm_l; __toascii_l; __tolower_l; __toupper_l; + __towctrans_l; __towlower_l; __towupper_l; __wcscasecmp_l; __wcscoll_l; + __wcsncasecmp_l; __wcstod_l; __wcstof_l; __wcstol_l; __wcstold_l; + __wcstoll_l; __wcstoul_l; __wcstoull_l; __wcsxfrm_l; __wctype_l; + } + GLIBC_2.2 { + # The data structure changed. + localeconv; + + # more functions from the experimental locale implementation + __wctrans_l; + + # missing function from the experimental locale implementation + __nl_langinfo_l; + } + GLIBC_2.3 { + # the new "experimental" interface is now public + newlocale; duplocale; freelocale; uselocale; + + # this name is used by libstdc++ as well as libpthread + __uselocale; + + isalnum_l; isalpha_l; isascii_l; isblank_l; iscntrl_l; + isdigit_l; isgraph_l; islower_l; isprint_l; ispunct_l; + isspace_l; isupper_l; iswalnum_l; iswalpha_l; iswblank_l; + iswcntrl_l; iswctype_l; iswdigit_l; iswgraph_l; iswlower_l; + iswprint_l; iswpunct_l; iswspace_l; iswupper_l; iswxdigit_l; + isxdigit_l; strcasecmp_l; strcoll_l; strfmon_l; strncasecmp_l; + strtod_l; strtof_l; strtol_l; strtold_l; strtoul_l; + strxfrm_l; toascii_l; tolower_l; toupper_l; + towctrans_l; towlower_l; towupper_l; wcscasecmp_l; wcscoll_l; + wcsncasecmp_l; wcstod_l; wcstof_l; wcstol_l; wcstold_l; + wcstoll_l; wcstoul_l; wcstoull_l; wcsxfrm_l; wctype_l; + wctrans_l; nl_langinfo_l; + } + GLIBC_PRIVATE { + # global variables + __collate_element_hash; __collate_element_strings; + __collate_symbol_classes; __collate_symbol_hash; __collate_symbol_strings; + } +} diff --git a/REORG.TODO/locale/bits/locale.h b/REORG.TODO/locale/bits/locale.h new file mode 100644 index 0000000000..c8eee6fab7 --- /dev/null +++ b/REORG.TODO/locale/bits/locale.h @@ -0,0 +1,40 @@ +/* Definition of locale category symbol values. + Copyright (C) 2001-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if !defined _LOCALE_H && !defined _LANGINFO_H +# error "Never use <bits/locale.h> directly; include <locale.h> instead." +#endif + +#ifndef _BITS_LOCALE_H +#define _BITS_LOCALE_H 1 + +#define __LC_CTYPE 0 +#define __LC_NUMERIC 1 +#define __LC_TIME 2 +#define __LC_COLLATE 3 +#define __LC_MONETARY 4 +#define __LC_MESSAGES 5 +#define __LC_ALL 6 +#define __LC_PAPER 7 +#define __LC_NAME 8 +#define __LC_ADDRESS 9 +#define __LC_TELEPHONE 10 +#define __LC_MEASUREMENT 11 +#define __LC_IDENTIFICATION 12 + +#endif /* bits/locale.h */ diff --git a/REORG.TODO/locale/broken_cur_max.c b/REORG.TODO/locale/broken_cur_max.c new file mode 100644 index 0000000000..1f849c3e9a --- /dev/null +++ b/REORG.TODO/locale/broken_cur_max.c @@ -0,0 +1,50 @@ +/* Return number of characters in multibyte representation for current + character set. + Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <langinfo.h> +#include <locale.h> +#include <stdlib.h> +#include "localeinfo.h" + + +/* This is a gross hack to get broken programs running. + + ISO C provides no mean to find out how many bytes the wide + character representation really uses. But it defines MB_CUR_LEN to + return the information for the multi-byte character representation. + Many programmers don't know the difference between the two and + think this means the same. But assuming all characters have a size + of MB_CUR_LEN after they have been processed by `mbrtowc' is wrong. + Instead the maximum number of characters used for the conversion is + MB_CUR_LEN. + + It is known that some Motif applications have this problem. To + cure this one has to make sure the glibc uses the function in this + file instead of the one in locale/mb_cur_max.c. This can either be + done by linking with this file or by using the LD_PRELOAD feature + of the dynamic linker. */ +size_t +__ctype_get_mb_cur_max (void) +{ + union locale_data_value u; + + u.string = nl_langinfo (_NL_CTYPE_MB_CUR_MAX); + return ((size_t []) { 1, 1, 1, 2, 2, 3, 4 })[u.word]; +} diff --git a/REORG.TODO/locale/categories.def b/REORG.TODO/locale/categories.def new file mode 100644 index 0000000000..27a6129ddc --- /dev/null +++ b/REORG.TODO/locale/categories.def @@ -0,0 +1,346 @@ +/* Definition of all available locale categories and their items. -*- C -*- + Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* These definitions are used by the locale-related files in the C library + and the programs `localedef' and `locale'. + + The general format of the descriptions is like this: + + DEFINE_CATEGORY (ID, name, ( items ), setlocale-postload) + + where items itself is an array of entries in the form + + { ID, name, standard, value-type, min, max } + + The usage of the load, check, output functions depends on the individual + program code which loads this file. + + The various value types for the items are `string', `stringarray', `byte' + `bytearray', and `word'. These cover all possible values in the current + locale definitions. `min' and `max' can be individually used again. */ + +#ifndef NO_POSTLOAD +#define NO_POSTLOAD NULL +#endif + +DEFINE_CATEGORY +( + LC_COLLATE, "LC_COLLATE", + ( + DEFINE_ELEMENT (_NL_COLLATE_NRULES, "collate-nrules", std, word) + DEFINE_ELEMENT (_NL_COLLATE_RULESETS, "collate-rulesets", std, string) + DEFINE_ELEMENT (_NL_COLLATE_TABLEMB, "collate-tablemb", std, wstring) + DEFINE_ELEMENT (_NL_COLLATE_WEIGHTMB, "collate-weightmb", std, wstring) + DEFINE_ELEMENT (_NL_COLLATE_EXTRAMB, "collate-extramb", std, wstring) + DEFINE_ELEMENT (_NL_COLLATE_INDIRECTMB, "collate-indirectmb", std, wstring) + DEFINE_ELEMENT (_NL_COLLATE_TABLEWC, "collate-tablewc", std, wstring) + DEFINE_ELEMENT (_NL_COLLATE_WEIGHTWC, "collate-weightwc", std, wstring) + DEFINE_ELEMENT (_NL_COLLATE_EXTRAWC, "collate-extrawc", std, wstring) + DEFINE_ELEMENT (_NL_COLLATE_INDIRECTWC, "collate-indirectwc", std, wstring) + DEFINE_ELEMENT (_NL_COLLATE_SYMB_HASH_SIZEMB, "collate-symb-hash-sizemb", std, word) + DEFINE_ELEMENT (_NL_COLLATE_SYMB_TABLEMB, "collate-symb-tablemb", std, wstring) + DEFINE_ELEMENT (_NL_COLLATE_SYMB_EXTRAMB, "collate-symb-extramb", std, wstring) + DEFINE_ELEMENT (_NL_COLLATE_COLLSEQMB, "collate-collseqmb", std, wstring) + DEFINE_ELEMENT (_NL_COLLATE_COLLSEQWC, "collate-collseqwc", std, wstring) + DEFINE_ELEMENT (_NL_COLLATE_CODESET, "collate-codeset", std, string) + ), NO_POSTLOAD) + + +/* The actual definition of ctype is meaningless here. It is hard coded in + the code because it has to be handled very specially. Only the names of + the functions and the value types are important. */ +DEFINE_CATEGORY +( + LC_CTYPE, "LC_CTYPE", + ( + DEFINE_ELEMENT (_NL_CTYPE_CLASS, "ctype-class", std, wstring) + DEFINE_ELEMENT (_NL_CTYPE_TOUPPER, "ctype-toupper", std, wstring) + DEFINE_ELEMENT (_NL_CTYPE_TOLOWER, "ctype-tolower", std, wstring) + DEFINE_ELEMENT (_NL_CTYPE_CLASS32, "ctype-class32", std, wstring) + DEFINE_ELEMENT (_NL_CTYPE_CLASS_NAMES, "ctype-class-names", std, stringlist, 10, 32) + DEFINE_ELEMENT (_NL_CTYPE_MAP_NAMES, "ctype-map-names", std, stringlist, 2, 32) + DEFINE_ELEMENT (_NL_CTYPE_WIDTH, "ctype-width", std, bytearray) + DEFINE_ELEMENT (_NL_CTYPE_MB_CUR_MAX, "ctype-mb-cur-max", std, word) + DEFINE_ELEMENT (_NL_CTYPE_CODESET_NAME, "charmap", std, string) + DEFINE_ELEMENT (_NL_CTYPE_TOUPPER32, "ctype-toupper32", std, wstring) + DEFINE_ELEMENT (_NL_CTYPE_TOLOWER32, "ctype-tolower32", std, wstring) + DEFINE_ELEMENT (_NL_CTYPE_CLASS_OFFSET, "ctype-class-offset", std, word) + DEFINE_ELEMENT (_NL_CTYPE_MAP_OFFSET, "ctype-map-offset", std, word) + DEFINE_ELEMENT (_NL_CTYPE_INDIGITS_MB_LEN, "ctype-indigits_mb-len", std, word) + DEFINE_ELEMENT (_NL_CTYPE_INDIGITS0_MB, "ctype-indigits0_mb", std, string) + DEFINE_ELEMENT (_NL_CTYPE_INDIGITS1_MB, "ctype-indigits1_mb", std, string) + DEFINE_ELEMENT (_NL_CTYPE_INDIGITS2_MB, "ctype-indigits2_mb", std, string) + DEFINE_ELEMENT (_NL_CTYPE_INDIGITS3_MB, "ctype-indigits3_mb", std, string) + DEFINE_ELEMENT (_NL_CTYPE_INDIGITS4_MB, "ctype-indigits4_mb", std, string) + DEFINE_ELEMENT (_NL_CTYPE_INDIGITS5_MB, "ctype-indigits5_mb", std, string) + DEFINE_ELEMENT (_NL_CTYPE_INDIGITS6_MB, "ctype-indigits6_mb", std, string) + DEFINE_ELEMENT (_NL_CTYPE_INDIGITS7_MB, "ctype-indigits7_mb", std, string) + DEFINE_ELEMENT (_NL_CTYPE_INDIGITS8_MB, "ctype-indigits8_mb", std, string) + DEFINE_ELEMENT (_NL_CTYPE_INDIGITS9_MB, "ctype-indigits9_mb", std, string) + DEFINE_ELEMENT (_NL_CTYPE_INDIGITS_WC_LEN, "ctype-indigits_wc-len", std, word) + DEFINE_ELEMENT (_NL_CTYPE_INDIGITS0_WC, "ctype-indigits0_wc", std, wstring) + DEFINE_ELEMENT (_NL_CTYPE_INDIGITS1_WC, "ctype-indigits1_wc", std, wstring) + DEFINE_ELEMENT (_NL_CTYPE_INDIGITS2_WC, "ctype-indigits2_wc", std, wstring) + DEFINE_ELEMENT (_NL_CTYPE_INDIGITS3_WC, "ctype-indigits3_wc", std, wstring) + DEFINE_ELEMENT (_NL_CTYPE_INDIGITS4_WC, "ctype-indigits4_wc", std, wstring) + DEFINE_ELEMENT (_NL_CTYPE_INDIGITS5_WC, "ctype-indigits5_wc", std, wstring) + DEFINE_ELEMENT (_NL_CTYPE_INDIGITS6_WC, "ctype-indigits6_wc", std, wstring) + DEFINE_ELEMENT (_NL_CTYPE_INDIGITS7_WC, "ctype-indigits7_wc", std, wstring) + DEFINE_ELEMENT (_NL_CTYPE_INDIGITS8_WC, "ctype-indigits8_wc", std, wstring) + DEFINE_ELEMENT (_NL_CTYPE_INDIGITS9_WC, "ctype-indigits9_wc", std, wstring) + DEFINE_ELEMENT (_NL_CTYPE_OUTDIGIT0_MB, "ctype-outdigit0_mb", std, string) + DEFINE_ELEMENT (_NL_CTYPE_OUTDIGIT1_MB, "ctype-outdigit1_mb", std, string) + DEFINE_ELEMENT (_NL_CTYPE_OUTDIGIT2_MB, "ctype-outdigit2_mb", std, string) + DEFINE_ELEMENT (_NL_CTYPE_OUTDIGIT3_MB, "ctype-outdigit3_mb", std, string) + DEFINE_ELEMENT (_NL_CTYPE_OUTDIGIT4_MB, "ctype-outdigit4_mb", std, string) + DEFINE_ELEMENT (_NL_CTYPE_OUTDIGIT5_MB, "ctype-outdigit5_mb", std, string) + DEFINE_ELEMENT (_NL_CTYPE_OUTDIGIT6_MB, "ctype-outdigit6_mb", std, string) + DEFINE_ELEMENT (_NL_CTYPE_OUTDIGIT7_MB, "ctype-outdigit7_mb", std, string) + DEFINE_ELEMENT (_NL_CTYPE_OUTDIGIT8_MB, "ctype-outdigit8_mb", std, string) + DEFINE_ELEMENT (_NL_CTYPE_OUTDIGIT9_MB, "ctype-outdigit9_mb", std, string) + DEFINE_ELEMENT (_NL_CTYPE_OUTDIGIT0_WC, "ctype-outdigit0_wc", std, word) + DEFINE_ELEMENT (_NL_CTYPE_OUTDIGIT1_WC, "ctype-outdigit1_wc", std, word) + DEFINE_ELEMENT (_NL_CTYPE_OUTDIGIT2_WC, "ctype-outdigit2_wc", std, word) + DEFINE_ELEMENT (_NL_CTYPE_OUTDIGIT3_WC, "ctype-outdigit3_wc", std, word) + DEFINE_ELEMENT (_NL_CTYPE_OUTDIGIT4_WC, "ctype-outdigit4_wc", std, word) + DEFINE_ELEMENT (_NL_CTYPE_OUTDIGIT5_WC, "ctype-outdigit5_wc", std, word) + DEFINE_ELEMENT (_NL_CTYPE_OUTDIGIT6_WC, "ctype-outdigit6_wc", std, word) + DEFINE_ELEMENT (_NL_CTYPE_OUTDIGIT7_WC, "ctype-outdigit7_wc", std, word) + DEFINE_ELEMENT (_NL_CTYPE_OUTDIGIT8_WC, "ctype-outdigit8_wc", std, word) + DEFINE_ELEMENT (_NL_CTYPE_OUTDIGIT9_WC, "ctype-outdigit9_wc", std, word) + DEFINE_ELEMENT (_NL_CTYPE_TRANSLIT_TAB_SIZE, "ctype-translit-tab-size", std, word) + DEFINE_ELEMENT (_NL_CTYPE_TRANSLIT_FROM_IDX, "ctype-translit-from-idx", std, wstring) + DEFINE_ELEMENT (_NL_CTYPE_TRANSLIT_FROM_TBL, "ctype-translit-from-tbl", std, wstring) + DEFINE_ELEMENT (_NL_CTYPE_TRANSLIT_TO_IDX, "ctype-translit-to-idx", std, wstring) + DEFINE_ELEMENT (_NL_CTYPE_TRANSLIT_TO_TBL, "ctype-translit-to-tbl", std, wstring) + DEFINE_ELEMENT (_NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN, "ctype-translit-default-missing-len", std, word) + DEFINE_ELEMENT (_NL_CTYPE_TRANSLIT_DEFAULT_MISSING, "ctype-translit-default-missing", std, wstring) + DEFINE_ELEMENT (_NL_CTYPE_TRANSLIT_IGNORE_LEN, "ctype-translit-ignore-len", std, word) + DEFINE_ELEMENT (_NL_CTYPE_TRANSLIT_IGNORE, "ctype-translit-ignore", std, string) + DEFINE_ELEMENT (_NL_CTYPE_MAP_TO_NONASCII, "map-to-nonascii", std, word) + DEFINE_ELEMENT (_NL_CTYPE_NONASCII_CASE, "nonascii-case", std, word) + ), _nl_postload_ctype) + + +DEFINE_CATEGORY +( + LC_MONETARY, "LC_MONETARY", + ( + DEFINE_ELEMENT (INT_CURR_SYMBOL, "int_curr_symbol", std, string) + DEFINE_ELEMENT (CURRENCY_SYMBOL, "currency_symbol", std, string) + DEFINE_ELEMENT (MON_DECIMAL_POINT, "mon_decimal_point", std, string) + DEFINE_ELEMENT (MON_THOUSANDS_SEP, "mon_thousands_sep", std, string) + DEFINE_ELEMENT (MON_GROUPING, "mon_grouping", std, bytearray) + DEFINE_ELEMENT (POSITIVE_SIGN, "positive_sign", std, string) + DEFINE_ELEMENT (NEGATIVE_SIGN, "negative_sign", std, string) + DEFINE_ELEMENT (INT_FRAC_DIGITS, "int_frac_digits", std, byte) + DEFINE_ELEMENT (FRAC_DIGITS, "frac_digits", std, byte) + DEFINE_ELEMENT (P_CS_PRECEDES, "p_cs_precedes", std, byte, 0, 1) + DEFINE_ELEMENT (P_SEP_BY_SPACE, "p_sep_by_space", std, byte, 0, 2) + DEFINE_ELEMENT (N_CS_PRECEDES, "n_cs_precedes", std, byte, 0, 1) + DEFINE_ELEMENT (N_SEP_BY_SPACE, "n_sep_by_space", std, byte, 0, 2) + DEFINE_ELEMENT (P_SIGN_POSN, "p_sign_posn", std, byte, 0, 4) + DEFINE_ELEMENT (N_SIGN_POSN, "n_sign_posn", std, byte, 0, 4) + DEFINE_ELEMENT (_NL_MONETARY_CRNCYSTR, "crncystr", std, string) + DEFINE_ELEMENT (__INT_P_CS_PRECEDES, "int_p_cs_precedes", std, byte, 0, 1) + DEFINE_ELEMENT (__INT_P_SEP_BY_SPACE, "int_p_sep_by_space", std, byte, 0, 2) + DEFINE_ELEMENT (__INT_N_CS_PRECEDES, "int_n_cs_precedes", std, byte, 0, 1) + DEFINE_ELEMENT (__INT_N_SEP_BY_SPACE, "int_n_sep_by_space", std, byte, 0, 2) + DEFINE_ELEMENT (__INT_P_SIGN_POSN, "int_p_sign_posn", std, byte, 0, 4) + DEFINE_ELEMENT (__INT_N_SIGN_POSN, "int_n_sign_posn", std, byte, 0, 4) + DEFINE_ELEMENT (_NL_MONETARY_DUO_INT_CURR_SYMBOL, "duo_int_curr_symbol", std, string) + DEFINE_ELEMENT (_NL_MONETARY_DUO_CURRENCY_SYMBOL, "duo_currency_symbol", std, string) + DEFINE_ELEMENT (_NL_MONETARY_DUO_INT_FRAC_DIGITS, "duo_int_frac_digits", std, byte) + DEFINE_ELEMENT (_NL_MONETARY_DUO_FRAC_DIGITS, "duo_frac_digits", std, byte) + DEFINE_ELEMENT (_NL_MONETARY_DUO_P_CS_PRECEDES, "duo_p_cs_precedes", std, byte, 0, 1) + DEFINE_ELEMENT (_NL_MONETARY_DUO_P_SEP_BY_SPACE, "duo_p_sep_by_space", std, byte, 0, 2) + DEFINE_ELEMENT (_NL_MONETARY_DUO_N_CS_PRECEDES, "duo_n_cs_precedes", std, byte, 0, 1) + DEFINE_ELEMENT (_NL_MONETARY_DUO_N_SEP_BY_SPACE, "duo_n_sep_by_space", std, byte, 0, 2) + DEFINE_ELEMENT (_NL_MONETARY_DUO_INT_P_CS_PRECEDES, "duo_int_p_cs_precedes", std, byte, 0, 1) + DEFINE_ELEMENT (_NL_MONETARY_DUO_INT_P_SEP_BY_SPACE, "duo_int_p_sep_by_space", std, byte, 0, 2) + DEFINE_ELEMENT (_NL_MONETARY_DUO_INT_N_CS_PRECEDES, "duo_int_n_cs_precedes", std, byte, 0, 1) + DEFINE_ELEMENT (_NL_MONETARY_DUO_INT_N_SEP_BY_SPACE, "duo_int_n_sep_by_space", std, byte, 0, 2) + DEFINE_ELEMENT (_NL_MONETARY_DUO_P_SIGN_POSN, "duo_p_sign_posn", std, byte, 0, 4) + DEFINE_ELEMENT (_NL_MONETARY_DUO_N_SIGN_POSN, "duo_n_sign_posn", std, byte, 0, 4) + DEFINE_ELEMENT (_NL_MONETARY_DUO_INT_P_SIGN_POSN, "duo_int_p_sign_posn", std, byte, 0, 4) + DEFINE_ELEMENT (_NL_MONETARY_DUO_INT_N_SIGN_POSN, "duo_int_n_sign_posn", std, byte, 0, 4) + DEFINE_ELEMENT (_NL_MONETARY_UNO_VALID_FROM, "uno_valid_from", std, word) + DEFINE_ELEMENT (_NL_MONETARY_UNO_VALID_TO, "uno_valid_to", std, word) + DEFINE_ELEMENT (_NL_MONETARY_DUO_VALID_FROM, "duo_valid_from", std, word) + DEFINE_ELEMENT (_NL_MONETARY_DUO_VALID_TO, "duo_valid_to", std, word) + DEFINE_ELEMENT (_NL_MONETARY_CONVERSION_RATE, "conversion_rate", std, wordarray, 2, 2) + DEFINE_ELEMENT (_NL_MONETARY_DECIMAL_POINT_WC, "monetary-decimal-point-wc", std, word) + DEFINE_ELEMENT (_NL_MONETARY_THOUSANDS_SEP_WC, "monetary-thousands-sep-wc", std, word) + DEFINE_ELEMENT (_NL_MONETARY_CODESET, "monetary-codeset", std, string) + ), NO_POSTLOAD) + + +DEFINE_CATEGORY +( + LC_NUMERIC, "LC_NUMERIC", + ( + DEFINE_ELEMENT (DECIMAL_POINT, "decimal_point", std, string) + DEFINE_ELEMENT (THOUSANDS_SEP, "thousands_sep", std, string) + DEFINE_ELEMENT (GROUPING, "grouping", std, bytearray) + DEFINE_ELEMENT (_NL_NUMERIC_DECIMAL_POINT_WC, "numeric-decimal-point-wc", std, word) + DEFINE_ELEMENT (_NL_NUMERIC_THOUSANDS_SEP_WC, "numeric-thousands-sep-wc", std, word) + DEFINE_ELEMENT (_NL_NUMERIC_CODESET, "numeric-codeset", std, string) + + ), NO_POSTLOAD) + + +DEFINE_CATEGORY +( + LC_TIME, "LC_TIME", + ( + DEFINE_ELEMENT (ABDAY_1, "abday", std, stringarray, 7, 7) + DEFINE_ELEMENT (DAY_1, "day", std, stringarray, 7, 7) + DEFINE_ELEMENT (ABMON_1, "abmon", std, stringarray, 12, 12) + DEFINE_ELEMENT (MON_1, "mon", std, stringarray, 12, 12) + DEFINE_ELEMENT (AM_STR, "am_pm", std, stringarray, 2, 2) + DEFINE_ELEMENT (D_T_FMT, "d_t_fmt", std, string) + DEFINE_ELEMENT (D_FMT, "d_fmt", std, string) + DEFINE_ELEMENT (T_FMT, "t_fmt", std, string) + DEFINE_ELEMENT (T_FMT_AMPM, "t_fmt_ampm", std, string) + DEFINE_ELEMENT (ERA, "era", opt, stringlist, 0, 100) + DEFINE_ELEMENT (ERA_YEAR, "era_year", opt, string) + DEFINE_ELEMENT (ERA_D_FMT, "era_d_fmt", opt, string) + DEFINE_ELEMENT (ALT_DIGITS, "alt_digits", opt, stringlist, 100, 100) + DEFINE_ELEMENT (ERA_D_T_FMT, "era_d_t_fmt", opt, string) + DEFINE_ELEMENT (ERA_T_FMT, "era_t_fmt", opt, string) + DEFINE_ELEMENT (_NL_TIME_ERA_NUM_ENTRIES, "time-era-num-entries", opt, word) + DEFINE_ELEMENT (_NL_TIME_ERA_ENTRIES, "time-era-entries", opt, string) + DEFINE_ELEMENT (_NL_WABDAY_1, "wide-abday", std, wstringarray, 7, 7) + DEFINE_ELEMENT (_NL_WDAY_1, "wide-day", std, wstringarray, 7, 7) + DEFINE_ELEMENT (_NL_WABMON_1, "wide-abmon", std, wstringarray, 12, 12) + DEFINE_ELEMENT (_NL_WMON_1, "wide-mon", std, wstringarray, 12, 12) + DEFINE_ELEMENT (_NL_WAM_STR, "wide-am_pm", std, wstringarray, 2, 2) + DEFINE_ELEMENT (_NL_WD_T_FMT, "wide-d_t_fmt", std, wstring) + DEFINE_ELEMENT (_NL_WD_FMT, "wide-d_fmt", std, wstring) + DEFINE_ELEMENT (_NL_WT_FMT, "wide-t_fmt", std, wstring) + DEFINE_ELEMENT (_NL_WT_FMT_AMPM, "wide-t_fmt_ampm", std, wstring) + DEFINE_ELEMENT (_NL_WERA_YEAR, "wide-era_year", opt, wstring) + DEFINE_ELEMENT (_NL_WERA_D_FMT, "wide-era_d_fmt", opt, wstring) + DEFINE_ELEMENT (_NL_WALT_DIGITS, "wide-alt_digits", opt, wstringlist, 1000, 100) + DEFINE_ELEMENT (_NL_WERA_D_T_FMT, "wide-era_d_t_fmt", opt, wstring) + DEFINE_ELEMENT (_NL_WERA_T_FMT, "wide-era_t_fmt", opt, wstring) + DEFINE_ELEMENT (_NL_TIME_WEEK_NDAYS, "week-ndays", std, byte) + DEFINE_ELEMENT (_NL_TIME_WEEK_1STDAY, "week-1stday", std, word) + DEFINE_ELEMENT (_NL_TIME_WEEK_1STWEEK, "week-1stweek", std, byte) + DEFINE_ELEMENT (_NL_TIME_FIRST_WEEKDAY, "first_weekday", std, byte) + DEFINE_ELEMENT (_NL_TIME_FIRST_WORKDAY, "first_workday", std, byte) + DEFINE_ELEMENT (_NL_TIME_CAL_DIRECTION, "cal_direction", std, byte) + DEFINE_ELEMENT (_NL_TIME_TIMEZONE, "timezone", std, string) + DEFINE_ELEMENT (_DATE_FMT, "date_fmt", opt, string) + DEFINE_ELEMENT (_NL_W_DATE_FMT, "wide-date_fmt", opt, wstring) + DEFINE_ELEMENT (_NL_TIME_CODESET, "time-codeset", std, string) + ), NO_POSTLOAD) + + +DEFINE_CATEGORY +( + LC_MESSAGES, "LC_MESSAGES", + ( + DEFINE_ELEMENT (YESEXPR, "yesexpr", std, string) + DEFINE_ELEMENT (NOEXPR, "noexpr", std, string) + DEFINE_ELEMENT (YESSTR, "yesstr", opt, string) + DEFINE_ELEMENT (NOSTR, "nostr", opt, string) + DEFINE_ELEMENT (_NL_MESSAGES_CODESET, "messages-codeset", std, string) + ), NO_POSTLOAD) + +DEFINE_CATEGORY +( + LC_PAPER, "LC_PAPER", + ( + DEFINE_ELEMENT (_NL_PAPER_HEIGHT, "height", std, word) + DEFINE_ELEMENT (_NL_PAPER_WIDTH, "width", std, word) + DEFINE_ELEMENT (_NL_PAPER_CODESET, "paper-codeset", std, string) + ), NO_POSTLOAD) + +DEFINE_CATEGORY +( + LC_NAME, "LC_NAME", + ( + DEFINE_ELEMENT (_NL_NAME_NAME_FMT, "name_fmt", std, string) + DEFINE_ELEMENT (_NL_NAME_NAME_GEN, "name_gen", std, string) + DEFINE_ELEMENT (_NL_NAME_NAME_MR, "name_mr", std, string) + DEFINE_ELEMENT (_NL_NAME_NAME_MRS, "name_mrs", std, string) + DEFINE_ELEMENT (_NL_NAME_NAME_MISS, "name_miss", std, string) + DEFINE_ELEMENT (_NL_NAME_NAME_MS, "name_ms", std, string) + DEFINE_ELEMENT (_NL_NAME_CODESET, "name-codeset", std, string) + ), NO_POSTLOAD) + +DEFINE_CATEGORY +( + LC_ADDRESS, "LC_ADDRESS", + ( + DEFINE_ELEMENT (_NL_ADDRESS_POSTAL_FMT, "postal_fmt", std, string) + DEFINE_ELEMENT (_NL_ADDRESS_COUNTRY_NAME, "country_name", std, string) + DEFINE_ELEMENT (_NL_ADDRESS_COUNTRY_POST, "country_post", std, string) + DEFINE_ELEMENT (_NL_ADDRESS_COUNTRY_AB2, "country_ab2", std, string) + DEFINE_ELEMENT (_NL_ADDRESS_COUNTRY_AB3, "country_ab3", std, string) + DEFINE_ELEMENT (_NL_ADDRESS_COUNTRY_CAR, "country_car", std, string) + DEFINE_ELEMENT (_NL_ADDRESS_COUNTRY_NUM, "country_num", std, word) + DEFINE_ELEMENT (_NL_ADDRESS_COUNTRY_ISBN, "country_isbn", std, string) + DEFINE_ELEMENT (_NL_ADDRESS_LANG_NAME, "lang_name", std, string) + DEFINE_ELEMENT (_NL_ADDRESS_LANG_AB, "lang_ab", std, string) + DEFINE_ELEMENT (_NL_ADDRESS_LANG_TERM, "lang_term", std, string) + DEFINE_ELEMENT (_NL_ADDRESS_LANG_LIB, "lang_lib", std, string) + DEFINE_ELEMENT (_NL_ADDRESS_CODESET, "address-codeset", std, string) + ), NO_POSTLOAD) + +DEFINE_CATEGORY +( + LC_TELEPHONE, "LC_TELEPHONE", + ( + DEFINE_ELEMENT (_NL_TELEPHONE_TEL_INT_FMT, "tel_int_fmt", std, string) + DEFINE_ELEMENT (_NL_TELEPHONE_TEL_DOM_FMT, "tel_dom_fmt", std, string) + DEFINE_ELEMENT (_NL_TELEPHONE_INT_SELECT, "int_select", std, string) + DEFINE_ELEMENT (_NL_TELEPHONE_INT_PREFIX, "int_prefix", std, string) + DEFINE_ELEMENT (_NL_TELEPHONE_CODESET, "telephone-codeset", std, string) + ), NO_POSTLOAD) + +DEFINE_CATEGORY +( + LC_MEASUREMENT, "LC_MEASUREMENT", + ( + DEFINE_ELEMENT (_NL_MEASUREMENT_MEASUREMENT, "measurement", std, byte) + DEFINE_ELEMENT (_NL_MEASUREMENT_CODESET, "measurement-codeset", std, string) + ), NO_POSTLOAD) + +DEFINE_CATEGORY +( + LC_IDENTIFICATION, "LC_IDENTIFICATION", + ( + DEFINE_ELEMENT (_NL_IDENTIFICATION_TITLE, "title", std, string) + DEFINE_ELEMENT (_NL_IDENTIFICATION_SOURCE, "source", std, string) + DEFINE_ELEMENT (_NL_IDENTIFICATION_ADDRESS, "address", std, string) + DEFINE_ELEMENT (_NL_IDENTIFICATION_CONTACT, "contact", std, string) + DEFINE_ELEMENT (_NL_IDENTIFICATION_EMAIL, "email", std, string) + DEFINE_ELEMENT (_NL_IDENTIFICATION_TEL, "tel", std, string) + DEFINE_ELEMENT (_NL_IDENTIFICATION_FAX, "fax", std, string) + DEFINE_ELEMENT (_NL_IDENTIFICATION_LANGUAGE, "language", std, string) + DEFINE_ELEMENT (_NL_IDENTIFICATION_TERRITORY, "territory", std, string) + DEFINE_ELEMENT (_NL_IDENTIFICATION_AUDIENCE, "audience", std, string) + DEFINE_ELEMENT (_NL_IDENTIFICATION_APPLICATION, "application", std, string) + DEFINE_ELEMENT (_NL_IDENTIFICATION_ABBREVIATION, "abbreviation", std, string) + DEFINE_ELEMENT (_NL_IDENTIFICATION_REVISION, "revision", std, string) + DEFINE_ELEMENT (_NL_IDENTIFICATION_DATE, "date", std, string) + DEFINE_ELEMENT (_NL_IDENTIFICATION_CATEGORY, "category", std, stringarray, 13, 13) + DEFINE_ELEMENT (_NL_IDENTIFICATION_CODESET, "identification-codeset", std, string) + ), NO_POSTLOAD) diff --git a/REORG.TODO/locale/coll-lookup.c b/REORG.TODO/locale/coll-lookup.c new file mode 100644 index 0000000000..a4deb9c0da --- /dev/null +++ b/REORG.TODO/locale/coll-lookup.c @@ -0,0 +1,80 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Bruno Haible <haible@clisp.cons.org>, 2000. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <stdint.h> + +/* Lookup in a table of int32_t, with default value 0. */ +int32_t +internal_function +__collidx_table_lookup (const char *table, uint32_t wc) +{ + uint32_t shift1 = ((const uint32_t *) table)[0]; + uint32_t index1 = wc >> shift1; + uint32_t bound = ((const uint32_t *) table)[1]; + if (index1 < bound) + { + uint32_t lookup1 = ((const uint32_t *) table)[5 + index1]; + if (lookup1 != 0) + { + uint32_t shift2 = ((const uint32_t *) table)[2]; + uint32_t mask2 = ((const uint32_t *) table)[3]; + uint32_t index2 = (wc >> shift2) & mask2; + uint32_t lookup2 = ((const uint32_t *)(table + lookup1))[index2]; + if (lookup2 != 0) + { + uint32_t mask3 = ((const uint32_t *) table)[4]; + uint32_t index3 = wc & mask3; + int32_t lookup3 = ((const int32_t *)(table + lookup2))[index3]; + + return lookup3; + } + } + } + return 0; +} + + +/* Lookup in a table of uint32_t, with default value 0xffffffff. */ +uint32_t +internal_function +__collseq_table_lookup (const char *table, uint32_t wc) +{ + uint32_t shift1 = ((const uint32_t *) table)[0]; + uint32_t index1 = wc >> shift1; + uint32_t bound = ((const uint32_t *) table)[1]; + if (index1 < bound) + { + uint32_t lookup1 = ((const uint32_t *) table)[5 + index1]; + if (lookup1 != 0) + { + uint32_t shift2 = ((const uint32_t *) table)[2]; + uint32_t mask2 = ((const uint32_t *) table)[3]; + uint32_t index2 = (wc >> shift2) & mask2; + uint32_t lookup2 = ((const uint32_t *)(table + lookup1))[index2]; + if (lookup2 != 0) + { + uint32_t mask3 = ((const uint32_t *) table)[4]; + uint32_t index3 = wc & mask3; + uint32_t lookup3 = ((const uint32_t *)(table + lookup2))[index3]; + + return lookup3; + } + } + } + return ~((uint32_t) 0); +} diff --git a/REORG.TODO/locale/coll-lookup.h b/REORG.TODO/locale/coll-lookup.h new file mode 100644 index 0000000000..d95408ad68 --- /dev/null +++ b/REORG.TODO/locale/coll-lookup.h @@ -0,0 +1,27 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Bruno Haible <haible@clisp.cons.org>, 2000. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <stdint.h> + +/* Lookup in a table of int32_t, with default value 0. */ +extern int32_t __collidx_table_lookup (const char *table, uint32_t wc) + internal_function; + +/* Lookup in a table of uint32_t, with default value 0xffffffff. */ +extern uint32_t __collseq_table_lookup (const char *table, uint32_t wc) + internal_function; diff --git a/REORG.TODO/locale/duplocale.c b/REORG.TODO/locale/duplocale.c new file mode 100644 index 0000000000..07c2c58646 --- /dev/null +++ b/REORG.TODO/locale/duplocale.c @@ -0,0 +1,89 @@ +/* Duplicate handle for selection of locales. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <locale.h> +#include <libc-lock.h> +#include <stdlib.h> +#include <string.h> + +#include <localeinfo.h> + + +/* Lock for protecting global data. */ +__libc_rwlock_define (extern , __libc_setlocale_lock attribute_hidden) + + +__locale_t +__duplocale (__locale_t dataset) +{ + /* This static object is returned for newlocale (LC_ALL_MASK, "C"). */ + if (dataset == _nl_C_locobj_ptr) + return dataset; + + /* Handle a special value. */ + if (dataset == LC_GLOBAL_LOCALE) + dataset = &_nl_global_locale; + + __locale_t result; + int cnt; + size_t names_len = 0; + + /* Calculate the total space we need to store all the names. */ + for (cnt = 0; cnt < __LC_LAST; ++cnt) + if (cnt != LC_ALL && dataset->__names[cnt] != _nl_C_name) + names_len += strlen (dataset->__names[cnt]) + 1; + + /* Get memory. */ + result = malloc (sizeof (struct __locale_struct) + names_len); + + if (result != NULL) + { + char *namep = (char *) (result + 1); + + /* We modify global data (the usage counts). */ + __libc_rwlock_wrlock (__libc_setlocale_lock); + + for (cnt = 0; cnt < __LC_LAST; ++cnt) + if (cnt != LC_ALL) + { + result->__locales[cnt] = dataset->__locales[cnt]; + if (result->__locales[cnt]->usage_count < MAX_USAGE_COUNT) + ++result->__locales[cnt]->usage_count; + + if (dataset->__names[cnt] == _nl_C_name) + result->__names[cnt] = _nl_C_name; + else + { + result->__names[cnt] = namep; + namep = __stpcpy (namep, dataset->__names[cnt]) + 1; + } + } + + /* Update the special members. */ + result->__ctype_b = dataset->__ctype_b; + result->__ctype_tolower = dataset->__ctype_tolower; + result->__ctype_toupper = dataset->__ctype_toupper; + + /* It's done. */ + __libc_rwlock_unlock (__libc_setlocale_lock); + } + + return result; +} +weak_alias (__duplocale, duplocale) diff --git a/REORG.TODO/locale/elem-hash.h b/REORG.TODO/locale/elem-hash.h new file mode 100644 index 0000000000..0953ebc6fc --- /dev/null +++ b/REORG.TODO/locale/elem-hash.h @@ -0,0 +1,33 @@ +/* Copyright (C) 1999-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Written by Ulrich Drepper, <drepper@cygnus.com>. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + + +/* The hashing function used for the table with collation symbols. */ +static int32_t __attribute__ ((pure, unused)) +elem_hash (const char *str, int_fast32_t n) +{ + int32_t result = n; + + while (n-- > 0) + { + result <<= 3; + result += *str++; + } + + return result; +} diff --git a/REORG.TODO/locale/findlocale.c b/REORG.TODO/locale/findlocale.c new file mode 100644 index 0000000000..02a97ac654 --- /dev/null +++ b/REORG.TODO/locale/findlocale.c @@ -0,0 +1,359 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <assert.h> +#include <errno.h> +#include <locale.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#ifdef _POSIX_MAPPED_FILES +# include <sys/mman.h> +#endif + +#include "localeinfo.h" +#include "../iconv/gconv_charset.h" +#include "../iconv/gconv_int.h" + + +#ifdef NL_CURRENT_INDIRECT +# define DEFINE_CATEGORY(category, category_name, items, a) \ +extern struct __locale_data _nl_C_##category; \ +weak_extern (_nl_C_##category) +# include "categories.def" +# undef DEFINE_CATEGORY + +/* Array indexed by category of pointers to _nl_C_CATEGORY slots. + Elements are zero for categories whose data is never used. */ +struct __locale_data *const _nl_C[] attribute_hidden = + { +# define DEFINE_CATEGORY(category, category_name, items, a) \ + [category] = &_nl_C_##category, +# include "categories.def" +# undef DEFINE_CATEGORY + }; +#else +# define _nl_C (_nl_C_locobj.__locales) +#endif + + +/* For each category we keep a list of records for the locale files + which are somehow addressed. */ +struct loaded_l10nfile *_nl_locale_file_list[__LC_LAST]; + +const char _nl_default_locale_path[] attribute_hidden = COMPLOCALEDIR; + +/* Checks if the name is actually present, that is, not NULL and not + empty. */ +static inline int +name_present (const char *name) +{ + return name != NULL && name[0] != '\0'; +} + +/* Checks that the locale name neither extremely long, nor contains a + ".." path component (to prevent directory traversal). */ +static inline int +valid_locale_name (const char *name) +{ + /* Not set. */ + size_t namelen = strlen (name); + /* Name too long. The limit is arbitrary and prevents stack overflow + issues later. */ + if (__glibc_unlikely (namelen > 255)) + return 0; + /* Directory traversal attempt. */ + static const char slashdot[4] = {'/', '.', '.', '/'}; + if (__glibc_unlikely (__memmem (name, namelen, + slashdot, sizeof (slashdot)) != NULL)) + return 0; + if (namelen == 2 && __glibc_unlikely (name[0] == '.' && name [1] == '.')) + return 0; + if (namelen >= 3 + && __glibc_unlikely (((name[0] == '.' + && name[1] == '.' + && name[2] == '/') + || (name[namelen - 3] == '/' + && name[namelen - 2] == '.' + && name[namelen - 1] == '.')))) + return 0; + /* If there is a slash in the name, it must start with one. */ + if (__glibc_unlikely (memchr (name, '/', namelen) != NULL) && name[0] != '/') + return 0; + return 1; +} + +struct __locale_data * +internal_function +_nl_find_locale (const char *locale_path, size_t locale_path_len, + int category, const char **name) +{ + int mask; + /* Name of the locale for this category. */ + const char *cloc_name = *name; + const char *language; + const char *modifier; + const char *territory; + const char *codeset; + const char *normalized_codeset; + struct loaded_l10nfile *locale_file; + + if (cloc_name[0] == '\0') + { + /* The user decides which locale to use by setting environment + variables. */ + cloc_name = getenv ("LC_ALL"); + if (!name_present (cloc_name)) + cloc_name = getenv (_nl_category_names.str + + _nl_category_name_idxs[category]); + if (!name_present (cloc_name)) + cloc_name = getenv ("LANG"); + if (!name_present (cloc_name)) + cloc_name = _nl_C_name; + } + + /* We used to fall back to the C locale if the name contains a slash + character '/', but we now check for directory traversal in + valid_locale_name, so this is no longer necessary. */ + + if (__builtin_expect (strcmp (cloc_name, _nl_C_name), 1) == 0 + || __builtin_expect (strcmp (cloc_name, _nl_POSIX_name), 1) == 0) + { + /* We need not load anything. The needed data is contained in + the library itself. */ + *name = _nl_C_name; + return _nl_C[category]; + } + else if (!valid_locale_name (cloc_name)) + { + __set_errno (EINVAL); + return NULL; + } + + *name = cloc_name; + + /* We really have to load some data. First we try the archive, + but only if there was no LOCPATH environment variable specified. */ + if (__glibc_likely (locale_path == NULL)) + { + struct __locale_data *data + = _nl_load_locale_from_archive (category, name); + if (__glibc_likely (data != NULL)) + return data; + + /* Nothing in the archive with the given name. Expanding it as + an alias and retry. */ + cloc_name = _nl_expand_alias (*name); + if (cloc_name != NULL) + { + data = _nl_load_locale_from_archive (category, &cloc_name); + if (__builtin_expect (data != NULL, 1)) + return data; + } + + /* Nothing in the archive. Set the default path to search below. */ + locale_path = _nl_default_locale_path; + locale_path_len = sizeof _nl_default_locale_path; + } + else + /* We really have to load some data. First see whether the name is + an alias. Please note that this makes it impossible to have "C" + or "POSIX" as aliases. */ + cloc_name = _nl_expand_alias (*name); + + if (cloc_name == NULL) + /* It is no alias. */ + cloc_name = *name; + + /* Make a writable copy of the locale name. */ + char *loc_name = strdupa (cloc_name); + + /* LOCALE can consist of up to four recognized parts for the XPG syntax: + + language[_territory[.codeset]][@modifier] + + Beside the first all of them are allowed to be missing. If the + full specified locale is not found, the less specific one are + looked for. The various part will be stripped off according to + the following order: + (1) codeset + (2) normalized codeset + (3) territory + (4) modifier + */ + mask = _nl_explode_name (loc_name, &language, &modifier, &territory, + &codeset, &normalized_codeset); + if (mask == -1) + /* Memory allocate problem. */ + return NULL; + + /* If exactly this locale was already asked for we have an entry with + the complete name. */ + locale_file = _nl_make_l10nflist (&_nl_locale_file_list[category], + locale_path, locale_path_len, mask, + language, territory, codeset, + normalized_codeset, modifier, + _nl_category_names.str + + _nl_category_name_idxs[category], 0); + + if (locale_file == NULL) + { + /* Find status record for addressed locale file. We have to search + through all directories in the locale path. */ + locale_file = _nl_make_l10nflist (&_nl_locale_file_list[category], + locale_path, locale_path_len, mask, + language, territory, codeset, + normalized_codeset, modifier, + _nl_category_names.str + + _nl_category_name_idxs[category], 1); + if (locale_file == NULL) + /* This means we are out of core. */ + return NULL; + } + + /* The space for normalized_codeset is dynamically allocated. Free it. */ + if (mask & XPG_NORM_CODESET) + free ((void *) normalized_codeset); + + if (locale_file->decided == 0) + _nl_load_locale (locale_file, category); + + if (locale_file->data == NULL) + { + int cnt; + for (cnt = 0; locale_file->successor[cnt] != NULL; ++cnt) + { + if (locale_file->successor[cnt]->decided == 0) + _nl_load_locale (locale_file->successor[cnt], category); + if (locale_file->successor[cnt]->data != NULL) + break; + } + /* Move the entry we found (or NULL) to the first place of + successors. */ + locale_file->successor[0] = locale_file->successor[cnt]; + locale_file = locale_file->successor[cnt]; + + if (locale_file == NULL) + return NULL; + } + + /* The LC_CTYPE category allows to check whether a locale is really + usable. If the locale name contains a charset name and the + charset name used in the locale (present in the LC_CTYPE data) is + not the same (after resolving aliases etc) we reject the locale + since using it would irritate users expecting the charset named + in the locale name. */ + if (codeset != NULL) + { + /* Get the codeset information from the locale file. */ + static const int codeset_idx[] = + { + [__LC_CTYPE] = _NL_ITEM_INDEX (CODESET), + [__LC_NUMERIC] = _NL_ITEM_INDEX (_NL_NUMERIC_CODESET), + [__LC_TIME] = _NL_ITEM_INDEX (_NL_TIME_CODESET), + [__LC_COLLATE] = _NL_ITEM_INDEX (_NL_COLLATE_CODESET), + [__LC_MONETARY] = _NL_ITEM_INDEX (_NL_MONETARY_CODESET), + [__LC_MESSAGES] = _NL_ITEM_INDEX (_NL_MESSAGES_CODESET), + [__LC_PAPER] = _NL_ITEM_INDEX (_NL_PAPER_CODESET), + [__LC_NAME] = _NL_ITEM_INDEX (_NL_NAME_CODESET), + [__LC_ADDRESS] = _NL_ITEM_INDEX (_NL_ADDRESS_CODESET), + [__LC_TELEPHONE] = _NL_ITEM_INDEX (_NL_TELEPHONE_CODESET), + [__LC_MEASUREMENT] = _NL_ITEM_INDEX (_NL_MEASUREMENT_CODESET), + [__LC_IDENTIFICATION] = _NL_ITEM_INDEX (_NL_IDENTIFICATION_CODESET) + }; + const struct __locale_data *data; + const char *locale_codeset; + char *clocale_codeset; + char *ccodeset; + + data = (const struct __locale_data *) locale_file->data; + locale_codeset = + (const char *) data->values[codeset_idx[category]].string; + assert (locale_codeset != NULL); + /* Note the length of the allocated memory: +3 for up to two slashes + and the NUL byte. */ + clocale_codeset = (char *) alloca (strlen (locale_codeset) + 3); + strip (clocale_codeset, locale_codeset); + + ccodeset = (char *) alloca (strlen (codeset) + 3); + strip (ccodeset, codeset); + + if (__gconv_compare_alias (upstr (ccodeset, ccodeset), + upstr (clocale_codeset, + clocale_codeset)) != 0) + /* The codesets are not identical, don't use the locale. */ + return NULL; + } + + /* Determine the locale name for which loading succeeded. This + information comes from the file name. The form is + <path>/<locale>/LC_foo. We must extract the <locale> part. */ + if (((const struct __locale_data *) locale_file->data)->name == NULL) + { + char *cp, *endp; + + endp = strrchr (locale_file->filename, '/'); + cp = endp - 1; + while (cp[-1] != '/') + --cp; + ((struct __locale_data *) locale_file->data)->name + = __strndup (cp, endp - cp); + } + + /* Determine whether the user wants transliteration or not. */ + if (modifier != NULL + && __strcasecmp_l (modifier, "TRANSLIT", _nl_C_locobj_ptr) == 0) + ((struct __locale_data *) locale_file->data)->use_translit = 1; + + /* Increment the usage count. */ + if (((const struct __locale_data *) locale_file->data)->usage_count + < MAX_USAGE_COUNT) + ++((struct __locale_data *) locale_file->data)->usage_count; + + return (struct __locale_data *) locale_file->data; +} + + +/* Calling this function assumes the lock for handling global locale data + is acquired. */ +void +internal_function +_nl_remove_locale (int locale, struct __locale_data *data) +{ + if (--data->usage_count == 0) + { + if (data->alloc != ld_archive) + { + /* First search the entry in the list of loaded files. */ + struct loaded_l10nfile *ptr = _nl_locale_file_list[locale]; + + /* Search for the entry. It must be in the list. Otherwise it + is a bug and we crash badly. */ + while ((struct __locale_data *) ptr->data != data) + ptr = ptr->next; + + /* Mark the data as not available anymore. So when the data has + to be used again it is reloaded. */ + ptr->decided = 0; + ptr->data = NULL; + } + + /* This does the real work. */ + _nl_unload_locale (data); + } +} diff --git a/REORG.TODO/locale/freelocale.c b/REORG.TODO/locale/freelocale.c new file mode 100644 index 0000000000..393fab5ea6 --- /dev/null +++ b/REORG.TODO/locale/freelocale.c @@ -0,0 +1,54 @@ +/* Free data allocated by a call to setlocale_r + Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <locale.h> +#include <stdlib.h> +#include <libc-lock.h> + +#include "localeinfo.h" + + +/* Lock for protecting global data. */ +__libc_rwlock_define (extern , __libc_setlocale_lock attribute_hidden) + + +void +__freelocale (__locale_t dataset) +{ + int cnt; + + /* This static object is returned for newlocale (LC_ALL_MASK, "C"). */ + if (dataset == _nl_C_locobj_ptr) + return; + + /* We modify global data (the usage counts). */ + __libc_rwlock_wrlock (__libc_setlocale_lock); + + for (cnt = 0; cnt < __LC_LAST; ++cnt) + if (cnt != LC_ALL && dataset->__locales[cnt]->usage_count != UNDELETABLE) + /* We can remove the data. */ + _nl_remove_locale (cnt, dataset->__locales[cnt]); + + /* It's done. */ + __libc_rwlock_unlock (__libc_setlocale_lock); + + /* Free the locale_t handle itself. */ + free (dataset); +} +weak_alias (__freelocale, freelocale) diff --git a/REORG.TODO/locale/gen-translit.pl b/REORG.TODO/locale/gen-translit.pl new file mode 100644 index 0000000000..30d3f2f195 --- /dev/null +++ b/REORG.TODO/locale/gen-translit.pl @@ -0,0 +1,144 @@ +#!/usr/bin/perl -w +open F, "cat C-translit.h.in | gcc -E - |" || die "Cannot preprocess input file"; + + +sub cstrlen { + my($str) = @_; + my($len) = length($str); + my($cnt); + my($res) = 0; + + for ($cnt = 0; $cnt < $len; ++$cnt) { + if (substr($str, $cnt, 1) eq '\\') { + # Recognize the escape sequence. + if (substr($str, $cnt + 1, 1) eq 'x') { + my($inner); + for ($inner = $cnt + 2; $inner < $len && $inner < $cnt + 10; ++$inner) { + my($ch) = substr($str, $inner, 1); + next if (($ch ge '0' && $ch le '9') + || ($ch ge 'a' && $ch le 'f') + || ($ch ge 'A' && $ch le 'F')); + last; + } + $cnt = $inner; + ++$res; + } else { + die "invalid input" if ($cnt + 1 >= $len); + ++$res; + ++$cnt; + } + } else { + ++$res; + } + } + + return $res; +} + +while (<F>) { + next if (/^#/); + next if (/^[ ]*$/); + chop; + + if (/"([^\"]*)"[ ]*"(.*)"/) { + my($from) = $1; + my($to) = $2; + my($fromlen) = cstrlen($from); + my($tolen) = cstrlen($to); + + push(@froms, $from); + push(@fromlens, $fromlen); + push(@tos, $to); + push(@tolens, $tolen); + } +} + +printf "#include <stdint.h>\n"; + +printf "#define NTRANSLIT %d\n", $#froms + 1; + +printf "static const uint32_t translit_from_idx[] =\n{\n "; +$col = 2; +$total = 0; +for ($cnt = 0; $cnt <= $#fromlens; ++$cnt) { + if ($cnt != 0) { + if ($col + 7 >= 79) { + printf(",\n "); + $col = 2; + } else { + printf(", "); + $col += 2; + } + } + printf("%4d", $total); + $total += $fromlens[$cnt] + 1; + $col += 4; +} +printf("\n};\n"); + +printf "static const wchar_t translit_from_tbl[] =\n "; +$col = 1; +for ($cnt = 0; $cnt <= $#froms; ++$cnt) { + if ($cnt != 0) { + if ($col + 6 >= 79) { + printf("\n "); + $col = 1; + } + printf(" L\"\\0\""); + $col += 6; + } + if ($col > 2 && $col + length($froms[$cnt]) + 4 >= 79) { + printf("\n "); + $col = 2; + } else { + printf(" "); + ++$col; + } + printf("L\"$froms[$cnt]\""); + $col += length($froms[$cnt]) + 3; +} +printf(";\n"); + +printf "static const uint32_t translit_to_idx[] =\n{\n "; +$col = 2; +$total = 0; +for ($cnt = 0; $cnt <= $#tolens; ++$cnt) { + if ($cnt != 0) { + if ($col + 7 >= 79) { + printf(",\n "); + $col = 2; + } else { + printf(", "); + $col += 2; + } + } + printf("%4d", $total); + $total += $tolens[$cnt] + 2; + $col += 4; +} +printf("\n};\n"); + +printf "static const wchar_t translit_to_tbl[] =\n "; +$col = 1; +for ($cnt = 0; $cnt <= $#tos; ++$cnt) { + if ($cnt != 0) { + if ($col + 6 >= 79) { + printf("\n "); + $col = 1; + } + printf(" L\"\\0\""); + $col += 6; + } + if ($col > 2 && $col + length($tos[$cnt]) + 6 >= 79) { + printf("\n "); + $col = 2; + } else { + printf(" "); + ++$col; + } + printf("%s", "L\"$tos[$cnt]\\0\""); + $col += length($tos[$cnt]) + 5; +} +printf(";\n"); + +exit 0; diff --git a/REORG.TODO/locale/global-locale.c b/REORG.TODO/locale/global-locale.c new file mode 100644 index 0000000000..3629c72207 --- /dev/null +++ b/REORG.TODO/locale/global-locale.c @@ -0,0 +1,64 @@ +/* Locale object representing the global locale controlled by setlocale. + Copyright (C) 2002-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <locale.h> +#include "localeinfo.h" + +#define DEFINE_CATEGORY(category, category_name, items, a) \ +extern struct __locale_data _nl_C_##category; weak_extern (_nl_C_##category) +#include "categories.def" +#undef DEFINE_CATEGORY + +/* Defined in locale/C-ctype.c. */ +extern const char _nl_C_LC_CTYPE_class[] attribute_hidden; +extern const char _nl_C_LC_CTYPE_toupper[] attribute_hidden; +extern const char _nl_C_LC_CTYPE_tolower[] attribute_hidden; +weak_extern (_nl_C_LC_CTYPE_class) +weak_extern (_nl_C_LC_CTYPE_toupper) +weak_extern (_nl_C_LC_CTYPE_tolower) + +/* Here we define the locale object maintained by setlocale. + The references in the initializer are weak, so the parts of + the structure that are never referred to will be zero. */ + +struct __locale_struct _nl_global_locale attribute_hidden = + { + .__locales = + { +#define DEFINE_CATEGORY(category, category_name, items, a) \ + [category] = &_nl_C_##category, +#include "categories.def" +#undef DEFINE_CATEGORY + }, + .__names = + { + [LC_ALL] = _nl_C_name, +#define DEFINE_CATEGORY(category, category_name, items, a) \ + [category] = _nl_C_name, +#include "categories.def" +#undef DEFINE_CATEGORY + }, + .__ctype_b = (const unsigned short int *) _nl_C_LC_CTYPE_class + 128, + .__ctype_tolower = (const int *) _nl_C_LC_CTYPE_tolower + 128, + .__ctype_toupper = (const int *) _nl_C_LC_CTYPE_toupper + 128 + }; + +#include <tls.h> + +/* The tsd macros don't permit an initializer. */ +__thread __locale_t __libc_tsd_LOCALE = &_nl_global_locale; diff --git a/REORG.TODO/locale/hashval.h b/REORG.TODO/locale/hashval.h new file mode 100644 index 0000000000..13dabcd692 --- /dev/null +++ b/REORG.TODO/locale/hashval.h @@ -0,0 +1,43 @@ +/* Implement simple hashing table with string based keys. + Copyright (C) 1994-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Written by Ulrich Drepper <drepper@gnu.ai.mit.edu>, October 1994. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef hashval_t +# define hashval_t unsigned long int +#endif +#include <limits.h> /* For CHAR_BIT. */ + +hashval_t +compute_hashval (const void *key, size_t keylen) +{ + size_t cnt; + hashval_t hval; + + /* Compute the hash value for the given string. The algorithm + is taken from [Aho,Sethi,Ullman], modified to reduce the number of + collisions for short strings with very varied bit patterns. + See http://www.clisp.org/haible/hashfunc.html. */ + cnt = 0; + hval = keylen; + while (cnt < keylen) + { + hval = (hval << 9) | (hval >> (sizeof hval * CHAR_BIT - 9)); + hval += (hashval_t) ((const unsigned char *) key)[cnt++]; + } + return hval != 0 ? hval : ~((hashval_t) 0); +} diff --git a/REORG.TODO/locale/indigits.h b/REORG.TODO/locale/indigits.h new file mode 100644 index 0000000000..ab38c84d1e --- /dev/null +++ b/REORG.TODO/locale/indigits.h @@ -0,0 +1,97 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@gnu.org>, 2000. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <assert.h> +#include <langinfo.h> +#include <string.h> + +/* Look up the value of the next multibyte character and return its numerical + value if it is one of the digits known in the locale. If *DECIDED is + -1 this means it is not yet decided which form it is and we have to + search through all available digits. Otherwise we know which script + the digits are from. */ +static inline int +indigit_value (const char **s, size_t *len, int *decided) +{ + int from_level; + int to_level; + const char *mbdigits[10]; + int i; + int n; + + if (*decided != -1) + from_level = to_level = *decided; + else + { + from_level = 0; + to_level = _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_INDIGITS_MB_LEN) - 1; + assert (from_level <= to_level); + } + + /* In this round we get the pointer to the digit strings and also perform + the first round of comparisons. */ + for (n = 0; n < 10; ++n) + { + size_t dlen; + + /* Get the string for the digits with value N. */ + mbdigits[n] = _NL_CURRENT (LC_CTYPE, _NL_CTYPE_INDIGITS0_MB + n); + dlen = strlen (mbdigits[n]); + + if (from_level == 0 && dlen <= *len + && memcmp (*s, mbdigits[n], dlen) == 0) + { + /* Found it. */ + *s += dlen; + *len -= dlen; + if (*decided == -1) + *decided = 0; + return n; + } + + /* Advance the pointer to the next string. */ + mbdigits[n] += dlen + 1; + } + + /* Now perform the remaining tests. */ + for (i = 1; i <= to_level; ++i) + { + /* Search all ten digits of this level. */ + for (n = 0; n < 10; ++n) + { + size_t dlen = strlen (mbdigits[n]); + + if (i >= from_level && dlen <= *len + && memcmp (*s, mbdigits[n], dlen) == 0) + { + /* Found it. */ + *s += dlen; + *len -= dlen; + if (*decided == -1) + *decided = from_level; + return n; + } + + /* Advance the pointer to the next string. */ + mbdigits[n] += dlen + 1; + } + } + + /* If we reach this point no matching digit was found. */ + return -1; +} diff --git a/REORG.TODO/locale/indigitswc.h b/REORG.TODO/locale/indigitswc.h new file mode 100644 index 0000000000..7e689b99d5 --- /dev/null +++ b/REORG.TODO/locale/indigitswc.h @@ -0,0 +1,85 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@gnu.org>, 2000. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <assert.h> +#include <langinfo.h> + +/* Look up the value of the next multibyte character and return its numerical + value if it is one of the digits known in the locale. If *DECIDED is + -1 this means it is not yet decided which form it is and we have to + search through all available digits. Otherwise we know which script + the digits are from. */ +static inline int +indigitwc_value (wchar_t wc, int *decided) +{ + int from_level; + int to_level; + const wchar_t *wcdigits[10]; + int n; + + if (*decided != -1) + from_level = to_level = *decided; + else + { + from_level = 0; + to_level = _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_INDIGITS_WC_LEN) - 1; + assert (from_level <= to_level); + } + + /* In this round we get the pointer to the digit strings and also perform + the first round of comparisons. */ + for (n = 0; n < 10; ++n) + { + /* Get the string for the digits with value N. */ + wcdigits[n] = _NL_CURRENT (LC_CTYPE, _NL_CTYPE_INDIGITS0_WC + n); + wcdigits[n] += from_level; + + if (wc == *wcdigits[n]) + { + /* Found it. */ + if (*decided == -1) + *decided = 0; + return n; + } + + /* Advance the pointer to the next string. */ + ++wcdigits[n]; + } + + /* Now perform the remaining tests. */ + while (++from_level <= to_level) + { + /* Search all ten digits of this level. */ + for (n = 0; n < 10; ++n) + { + if (wc == *wcdigits[n]) + { + /* Found it. */ + if (*decided == -1) + *decided = from_level; + return n; + } + + /* Advance the pointer to the next string. */ + ++wcdigits[n]; + } + } + + /* If we reach this point no matching digit was found. */ + return -1; +} diff --git a/REORG.TODO/locale/iso-3166.def b/REORG.TODO/locale/iso-3166.def new file mode 100644 index 0000000000..52997b4bf0 --- /dev/null +++ b/REORG.TODO/locale/iso-3166.def @@ -0,0 +1,258 @@ +/* + * Defines the country codes and abbreviations according to ISO 3166. + * This is used in ld-address.c (address_finish). + * + * If you find something missing or wrong, please go to the URL + * http://www.gnu.org/software/libc/bugs.html and follow + * instructions there to file a bug report. + */ +DEFINE_COUNTRY_CODE ("AALAND ISLANDS", AX, ALA, 248) +DEFINE_COUNTRY_CODE ("AFGHANISTAN", AF, AFG, 4) +DEFINE_COUNTRY_CODE ("ALBANIA", AL, ALB, 8) +DEFINE_COUNTRY_CODE ("ALGERIA", DZ, DZA, 12) +DEFINE_COUNTRY_CODE ("AMERICAN SAMOA", AS, ASM, 16) +DEFINE_COUNTRY_CODE ("ANDORRA", AD, AND, 20) +DEFINE_COUNTRY_CODE ("ANGOLA", AO, AGO, 24) +DEFINE_COUNTRY_CODE ("ANGUILLA", AI, AIA, 660) +DEFINE_COUNTRY_CODE ("ANTARCTICA", AQ, ATA, 10) +DEFINE_COUNTRY_CODE ("ANTIGUA AND BARBUDA", AG, ATG, 28) +DEFINE_COUNTRY_CODE ("ARGENTINA", AR, ARG, 32) +DEFINE_COUNTRY_CODE ("ARMENIA", AM, ARM, 51) +DEFINE_COUNTRY_CODE ("ARUBA", AW, ABW, 533) +DEFINE_COUNTRY_CODE ("AUSTRALIA", AU, AUS, 36) +DEFINE_COUNTRY_CODE ("AUSTRIA", AT, AUT, 40) +DEFINE_COUNTRY_CODE ("AZERBAIJAN", AZ, AZE, 31) +DEFINE_COUNTRY_CODE ("BAHAMAS", BS, BHS, 44) +DEFINE_COUNTRY_CODE ("BAHRAIN", BH, BHR, 48) +DEFINE_COUNTRY_CODE ("BANGLADESH", BD, BGD, 50) +DEFINE_COUNTRY_CODE ("BARBADOS", BB, BRB, 52) +DEFINE_COUNTRY_CODE ("BELARUS", BY, BLR, 112) +DEFINE_COUNTRY_CODE ("BELGIUM", BE, BEL, 56) +DEFINE_COUNTRY_CODE ("BELIZE", BZ, BLZ, 84) +DEFINE_COUNTRY_CODE ("BENIN", BJ, BEN, 204) +DEFINE_COUNTRY_CODE ("BERMUDA", BM, BMU, 60) +DEFINE_COUNTRY_CODE ("BHUTAN", BT, BTN, 64) +DEFINE_COUNTRY_CODE ("BOLIVIA", BO, BOL, 68) +DEFINE_COUNTRY_CODE ("BONAIRE, SINT EUSTATIUS AND SABA", BQ, BES, 535) +DEFINE_COUNTRY_CODE ("BOSNIA AND HERZEGOVINA", BA, BIH, 70) +DEFINE_COUNTRY_CODE ("BOTSWANA", BW, BWA, 72) +DEFINE_COUNTRY_CODE ("BOUVET ISLAND", BV, BVT, 74) +DEFINE_COUNTRY_CODE ("BRAZIL", BR, BRA, 76) +DEFINE_COUNTRY_CODE ("BRITISH INDIAN OCEAN TERRITORY", IO, IOT, 86) +DEFINE_COUNTRY_CODE ("BRUNEI DARUSSALAM", BN, BRN, 96) +DEFINE_COUNTRY_CODE ("BULGARIA", BG, BGR, 100) +DEFINE_COUNTRY_CODE ("BURKINA FASO", BF, BFA, 854) +DEFINE_COUNTRY_CODE ("BURUNDI", BI, BDI, 108) +DEFINE_COUNTRY_CODE ("CAMBODIA", KH, KHM, 116) +DEFINE_COUNTRY_CODE ("CAMEROON", CM, CMR, 120) +DEFINE_COUNTRY_CODE ("CANADA", CA, CAN, 124) +DEFINE_COUNTRY_CODE ("CAPE VERDE", CV, CPV, 132) +DEFINE_COUNTRY_CODE ("CAYMAN ISLANDS", KY, CYM, 136) +DEFINE_COUNTRY_CODE ("CENTRAL AFRICAN REPUBLIC", CF, CAF, 140) +DEFINE_COUNTRY_CODE ("CHAD", TD, TCD, 148) +DEFINE_COUNTRY_CODE ("CHILE", CL, CHL, 152) +DEFINE_COUNTRY_CODE ("CHINA", CN, CHN, 156) +DEFINE_COUNTRY_CODE ("CHRISTMAS ISLAND", CX, CXR, 162) +DEFINE_COUNTRY_CODE ("COCOS (KEELING) ISLANDS", CC, CCK, 166) +DEFINE_COUNTRY_CODE ("COLOMBIA", CO, COL, 170) +DEFINE_COUNTRY_CODE ("COMOROS", KM, COM, 174) +DEFINE_COUNTRY_CODE ("CONGO", CG, COG, 178) +DEFINE_COUNTRY_CODE ("CONGO, THE DEMOCRATIC REPUBLIC OF THE", CD, COD, 180) +DEFINE_COUNTRY_CODE ("COOK ISLANDS", CK, COK, 184) +DEFINE_COUNTRY_CODE ("COSTA RICA", CR, CRI, 188) +DEFINE_COUNTRY_CODE ("COTE D'IVOIRE", CI, CIV, 384) +DEFINE_COUNTRY_CODE ("CROATIA", HR, HRV, 191) +DEFINE_COUNTRY_CODE ("CUBA", CU, CUB, 192) +DEFINE_COUNTRY_CODE ("CURACAO", CW, CUW, 531) +DEFINE_COUNTRY_CODE ("CYPRUS", CY, CYP, 196) +DEFINE_COUNTRY_CODE ("CZECH REPUBLIC", CZ, CZE, 203) +DEFINE_COUNTRY_CODE ("DENMARK", DK, DNK, 208) +DEFINE_COUNTRY_CODE ("DJIBOUTI", DJ, DJI, 262) +DEFINE_COUNTRY_CODE ("DOMINICA", DM, DMA, 212) +DEFINE_COUNTRY_CODE ("DOMINICAN REPUBLIC", DO, DOM, 214) +DEFINE_COUNTRY_CODE ("ECUADOR", EC, ECU, 218) +DEFINE_COUNTRY_CODE ("EGYPT", EG, EGY, 818) +DEFINE_COUNTRY_CODE ("EL SALVADOR", SV, SLV, 222) +DEFINE_COUNTRY_CODE ("EQUATORIAL GUINEA", GQ, GNQ, 226) +DEFINE_COUNTRY_CODE ("ERITREA", ER, ERI, 232) +DEFINE_COUNTRY_CODE ("ESTONIA", EE, EST, 233) +DEFINE_COUNTRY_CODE ("ETHIOPIA", ET, ETH, 231) +DEFINE_COUNTRY_CODE ("FALKLAND ISLANDS (MALVINAS)", FK, FLK, 238) +DEFINE_COUNTRY_CODE ("FAROE ISLANDS", FO, FRO, 234) +DEFINE_COUNTRY_CODE ("FIJI", FJ, FJI, 242) +DEFINE_COUNTRY_CODE ("FINLAND", FI, FIN, 246) +DEFINE_COUNTRY_CODE ("FRANCE", FR, FRA, 250) +DEFINE_COUNTRY_CODE ("FRENCH GUIANA", GF, GUF, 254) +DEFINE_COUNTRY_CODE ("FRENCH POLYNESIA", PF, PYF, 258) +DEFINE_COUNTRY_CODE ("FRENCH SOUTHERN TERRITORIES", TF, ATF, 260) +DEFINE_COUNTRY_CODE ("GABON", GA, GAB, 266) +DEFINE_COUNTRY_CODE ("GAMBIA", GM, GMB, 270) +DEFINE_COUNTRY_CODE ("GEORGIA", GE, GEO, 268) +DEFINE_COUNTRY_CODE ("GERMANY", DE, DEU, 276) +DEFINE_COUNTRY_CODE ("GHANA", GH, GHA, 288) +DEFINE_COUNTRY_CODE ("GIBRALTAR", GI, GIB, 292) +DEFINE_COUNTRY_CODE ("GREECE", GR, GRC, 300) +DEFINE_COUNTRY_CODE ("GREENLAND", GL, GRL, 304) +DEFINE_COUNTRY_CODE ("GRENADA", GD, GRD, 308) +DEFINE_COUNTRY_CODE ("GUADELOUPE", GP, GLP, 312) +DEFINE_COUNTRY_CODE ("GUAM", GU, GUM, 316) +DEFINE_COUNTRY_CODE ("GUATEMALA", GT, GTM, 320) +DEFINE_COUNTRY_CODE ("GUERNSEY", GG, GGY, 831) +DEFINE_COUNTRY_CODE ("GUINEA", GN, GIN, 324) +DEFINE_COUNTRY_CODE ("GUINEA-BISSAU", GW, GNB, 624) +DEFINE_COUNTRY_CODE ("GUYANA", GY, GUY, 328) +DEFINE_COUNTRY_CODE ("HAITI", HT, HTI, 332) +DEFINE_COUNTRY_CODE ("HEARD ISLAND AND MCDONALD ISLANDS", HM, HMD, 334) +DEFINE_COUNTRY_CODE ("HOLY SEE (VATICAN CITY STATE)", VA, VAT, 336) +DEFINE_COUNTRY_CODE ("HONDURAS", HN, HND, 340) +DEFINE_COUNTRY_CODE ("HONG KONG", HK, HKG, 344) +DEFINE_COUNTRY_CODE ("HUNGARY", HU, HUN, 348) +DEFINE_COUNTRY_CODE ("ICELAND", IS, ISL, 352) +DEFINE_COUNTRY_CODE ("INDIA", IN, IND, 356) +DEFINE_COUNTRY_CODE ("INDONESIA", ID, IDN, 360) +DEFINE_COUNTRY_CODE ("IRAN, ISLAMIC REPUBLIC OF", IR, IRN, 364) +DEFINE_COUNTRY_CODE ("IRAQ", IQ, IRQ, 368) +DEFINE_COUNTRY_CODE ("IRELAND", IE, IRL, 372) +DEFINE_COUNTRY_CODE ("ISLE OF MAN", IM, IMN, 833) +DEFINE_COUNTRY_CODE ("ISRAEL", IL, ISR, 376) +DEFINE_COUNTRY_CODE ("ITALY", IT, ITA, 380) +DEFINE_COUNTRY_CODE ("JAMAICA", JM, JAM, 388) +DEFINE_COUNTRY_CODE ("JAPAN", JP, JPN, 392) +DEFINE_COUNTRY_CODE ("JERSEY", JE, JEY, 832) +DEFINE_COUNTRY_CODE ("JORDAN", JO, JOR, 400) +DEFINE_COUNTRY_CODE ("KAZAKSTAN", KZ, KAZ, 398) +DEFINE_COUNTRY_CODE ("KENYA", KE, KEN, 404) +DEFINE_COUNTRY_CODE ("KIRIBATI", KI, KIR, 296) +DEFINE_COUNTRY_CODE ("KOREA, DEMOCRATIC PEOPLE'S REPUBLIC OF", KP, PRK, 408) +DEFINE_COUNTRY_CODE ("KOREA, REPUBLIC OF", KR, KOR, 410) +DEFINE_COUNTRY_CODE ("KUWAIT", KW, KWT, 414) +DEFINE_COUNTRY_CODE ("KYRGYZSTAN", KG, KGZ, 417) +DEFINE_COUNTRY_CODE ("LAO PEOPLE'S DEMOCRATIC REPUBLIC", LA, LAO, 418) +DEFINE_COUNTRY_CODE ("LATVIA", LV, LVA, 428) +DEFINE_COUNTRY_CODE ("LEBANON", LB, LBN, 422) +DEFINE_COUNTRY_CODE ("LESOTHO", LS, LSO, 426) +DEFINE_COUNTRY_CODE ("LIBERIA", LR, LBR, 430) +DEFINE_COUNTRY_CODE ("LIBYAN ARAB JAMAHIRIYA", LY, LBY, 434) +DEFINE_COUNTRY_CODE ("LIECHTENSTEIN", LI, LIE, 438) +DEFINE_COUNTRY_CODE ("LITHUANIA", LT, LTU, 440) +DEFINE_COUNTRY_CODE ("LUXEMBOURG", LU, LUX, 442) +DEFINE_COUNTRY_CODE ("MACAU", MO, MAC, 446) +DEFINE_COUNTRY_CODE ("MACEDONIA, THE FORMER YUGOSLAV REPUBLIC OF", MK, MKD, 807) +DEFINE_COUNTRY_CODE ("MADAGASCAR", MG, MDG, 450) +DEFINE_COUNTRY_CODE ("MALAWI", MW, MWI, 454) +DEFINE_COUNTRY_CODE ("MALAYSIA", MY, MYS, 458) +DEFINE_COUNTRY_CODE ("MALDIVES", MV, MDV, 462) +DEFINE_COUNTRY_CODE ("MALI", ML, MLI, 466) +DEFINE_COUNTRY_CODE ("MALTA", MT, MLT, 470) +DEFINE_COUNTRY_CODE ("MARSHALL ISLANDS", MH, MHL, 584) +DEFINE_COUNTRY_CODE ("MARTINIQUE", MQ, MTQ, 474) +DEFINE_COUNTRY_CODE ("MAURITANIA", MR, MRT, 478) +DEFINE_COUNTRY_CODE ("MAURITIUS", MU, MUS, 480) +DEFINE_COUNTRY_CODE ("MAYOTTE", YT, MYT, 175) +DEFINE_COUNTRY_CODE ("MEXICO", MX, MEX, 484) +DEFINE_COUNTRY_CODE ("MICRONESIA, FEDERATED STATES OF", FM, FSM, 583) +DEFINE_COUNTRY_CODE ("MOLDOVA, REPUBLIC OF", MD, MDA, 498) +DEFINE_COUNTRY_CODE ("MONACO", MC, MCO, 492) +DEFINE_COUNTRY_CODE ("MONGOLIA", MN, MNG, 496) +DEFINE_COUNTRY_CODE ("MONTENEGRO", ME, MNE, 499) +DEFINE_COUNTRY_CODE ("MONTSERRAT", MS, MSR, 500) +DEFINE_COUNTRY_CODE ("MOROCCO", MA, MAR, 504) +DEFINE_COUNTRY_CODE ("MOZAMBIQUE", MZ, MOZ, 508) +DEFINE_COUNTRY_CODE ("MYANMAR", MM, MMR, 104) +DEFINE_COUNTRY_CODE ("NAMIBIA", NA, NAM, 516) +DEFINE_COUNTRY_CODE ("NAURU", NR, NRU, 520) +DEFINE_COUNTRY_CODE ("NEPAL", NP, NPL, 524) +DEFINE_COUNTRY_CODE ("NETHERLANDS", NL, NLD, 528) +DEFINE_COUNTRY_CODE ("NETHERLANDS ANTILLES", AN, ANT, 530) +DEFINE_COUNTRY_CODE ("NEW CALEDONIA", NC, NCL, 540) +DEFINE_COUNTRY_CODE ("NEW ZEALAND", NZ, NZL, 554) +DEFINE_COUNTRY_CODE ("NICARAGUA", NI, NIC, 558) +DEFINE_COUNTRY_CODE ("NIGER", NE, NER, 562) +DEFINE_COUNTRY_CODE ("NIGERIA", NG, NGA, 566) +DEFINE_COUNTRY_CODE ("NIUE", NU, NIU, 570) +DEFINE_COUNTRY_CODE ("NORFOLK ISLAND", NF, NFK, 574) +DEFINE_COUNTRY_CODE ("NORTHERN MARIANA ISLANDS", MP, MNP, 580) +DEFINE_COUNTRY_CODE ("NORWAY", NO, NOR, 578) +DEFINE_COUNTRY_CODE ("OMAN", OM, OMN, 512) +DEFINE_COUNTRY_CODE ("PAKISTAN", PK, PAK, 586) +DEFINE_COUNTRY_CODE ("PALAU", PW, PLW, 585) +DEFINE_COUNTRY_CODE ("PALESTINE, STATE OF", PS, PSE, 275) +DEFINE_COUNTRY_CODE ("PANAMA", PA, PAN, 591) +DEFINE_COUNTRY_CODE ("PAPUA NEW GUINEA", PG, PNG, 598) +DEFINE_COUNTRY_CODE ("PARAGUAY", PY, PRY, 600) +DEFINE_COUNTRY_CODE ("PERU", PE, PER, 604) +DEFINE_COUNTRY_CODE ("PHILIPPINES", PH, PHL, 608) +DEFINE_COUNTRY_CODE ("PITCAIRN", PN, PCN, 612) +DEFINE_COUNTRY_CODE ("POLAND", PL, POL, 616) +DEFINE_COUNTRY_CODE ("PORTUGAL", PT, PRT, 620) +DEFINE_COUNTRY_CODE ("PUERTO RICO", PR, PRI, 630) +DEFINE_COUNTRY_CODE ("QATAR", QA, QAT, 634) +DEFINE_COUNTRY_CODE ("REUNION", RE, REU, 638) +DEFINE_COUNTRY_CODE ("ROMANIA", RO, ROU, 642) +DEFINE_COUNTRY_CODE ("RUSSIAN FEDERATION", RU, RUS, 643) +DEFINE_COUNTRY_CODE ("RWANDA", RW, RWA, 646) +DEFINE_COUNTRY_CODE ("SAINT BARTHELEMY", BL, BLM, 652) +DEFINE_COUNTRY_CODE ("SAINT HELENA", SH, SHN, 654) +DEFINE_COUNTRY_CODE ("SAINT KITTS AND NEVIS", KN, KNA, 659) +DEFINE_COUNTRY_CODE ("SAINT LUCIA", LC, LCA, 662) +DEFINE_COUNTRY_CODE ("SAINT MARTIN (FRENCH PART)", MF, MAF, 663) +DEFINE_COUNTRY_CODE ("SAINT PIERRE AND MIQUELON", PM, SPM, 666) +DEFINE_COUNTRY_CODE ("SAINT VINCENT AND THE GRENADINES", VC, VCT, 670) +DEFINE_COUNTRY_CODE ("SAMOA", WS, WSM, 882) +DEFINE_COUNTRY_CODE ("SAN MARINO", SM, SMR, 674) +DEFINE_COUNTRY_CODE ("SAO TOME AND PRINCIPE", ST, STP, 678) +DEFINE_COUNTRY_CODE ("SAUDI ARABIA", SA, SAU, 682) +DEFINE_COUNTRY_CODE ("SENEGAL", SN, SEN, 686) +DEFINE_COUNTRY_CODE ("SERBIA", RS, SRB, 688) +DEFINE_COUNTRY_CODE ("SEYCHELLES", SC, SYC, 690) +DEFINE_COUNTRY_CODE ("SIERRA LEONE", SL, SLE, 694) +DEFINE_COUNTRY_CODE ("SINGAPORE", SG, SGP, 702) +DEFINE_COUNTRY_CODE ("SINT MAARTEN (DUTCH PART)", SX, SXM, 534) +DEFINE_COUNTRY_CODE ("SLOVAKIA", SK, SVK, 703) +DEFINE_COUNTRY_CODE ("SLOVENIA", SI, SVN, 705) +DEFINE_COUNTRY_CODE ("SOLOMON ISLANDS", SB, SLB, 90) +DEFINE_COUNTRY_CODE ("SOMALIA", SO, SOM, 706) +DEFINE_COUNTRY_CODE ("SOUTH AFRICA", ZA, ZAF, 710) +DEFINE_COUNTRY_CODE ("SOUTH GEORGIA AND THE SOUTH SANDWICH ISLANDS", GS, SGS, 239) +DEFINE_COUNTRY_CODE ("SOUTH SUDAN", SS, SSD, 728) +DEFINE_COUNTRY_CODE ("SPAIN", ES, ESP, 724) +DEFINE_COUNTRY_CODE ("SRI LANKA", LK, LKA, 144) +DEFINE_COUNTRY_CODE ("SUDAN", SD, SDN, 729) +DEFINE_COUNTRY_CODE ("SURINAME", SR, SUR, 740) +DEFINE_COUNTRY_CODE ("SVALBARD AND JAN MAYEN", SJ, SJM, 744) +DEFINE_COUNTRY_CODE ("SWAZILAND", SZ, SWZ, 748) +DEFINE_COUNTRY_CODE ("SWEDEN", SE, SWE, 752) +DEFINE_COUNTRY_CODE ("SWITZERLAND", CH, CHE, 756) +DEFINE_COUNTRY_CODE ("SYRIAN ARAB REPUBLIC", SY, SYR, 760) +DEFINE_COUNTRY_CODE ("TAIWAN, PROVINCE OF CHINA", TW, TWN, 158) +DEFINE_COUNTRY_CODE ("TAJIKISTAN", TJ, TJK, 762) +DEFINE_COUNTRY_CODE ("TANZANIA, UNITED REPUBLIC OF", TZ, TZA, 834) +DEFINE_COUNTRY_CODE ("THAILAND", TH, THA, 764) +DEFINE_COUNTRY_CODE ("TIMOR-LESTE", TL, TLS, 626) +DEFINE_COUNTRY_CODE ("TOGO", TG, TGO, 768) +DEFINE_COUNTRY_CODE ("TOKELAU", TK, TKL, 772) +DEFINE_COUNTRY_CODE ("TONGA", TO, TON, 776) +DEFINE_COUNTRY_CODE ("TRINIDAD AND TOBAGO", TT, TTO, 780) +DEFINE_COUNTRY_CODE ("TUNISIA", TN, TUN, 788) +DEFINE_COUNTRY_CODE ("TURKEY", TR, TUR, 792) +DEFINE_COUNTRY_CODE ("TURKMENISTAN", TM, TKM, 795) +DEFINE_COUNTRY_CODE ("TURKS AND CAICOS ISLANDS", TC, TCA, 796) +DEFINE_COUNTRY_CODE ("TUVALU", TV, TUV, 798) +DEFINE_COUNTRY_CODE ("UGANDA", UG, UGA, 800) +DEFINE_COUNTRY_CODE ("UKRAINE", UA, UKR, 804) +DEFINE_COUNTRY_CODE ("UNITED ARAB EMIRATES", AE, ARE, 784) +DEFINE_COUNTRY_CODE ("UNITED KINGDOM", GB, GBR, 826) +DEFINE_COUNTRY_CODE ("UNITED STATES", US, USA, 840) +DEFINE_COUNTRY_CODE ("UNITED STATES MINOR OUTLYING ISLANDS", UM, UMI, 581) +DEFINE_COUNTRY_CODE ("URUGUAY", UY, URY, 858) +DEFINE_COUNTRY_CODE ("UZBEKISTAN", UZ, UZB, 860) +DEFINE_COUNTRY_CODE ("VANUATU", VU, VUT, 548) +DEFINE_COUNTRY_CODE ("VENEZUELA", VE, VEN, 862) +DEFINE_COUNTRY_CODE ("VIET NAM", VN, VNM, 704) +DEFINE_COUNTRY_CODE ("VIRGIN ISLANDS, BRITISH", VG, VGB, 92) +DEFINE_COUNTRY_CODE ("VIRGIN ISLANDS, U.S.", VI, VIR, 850) +DEFINE_COUNTRY_CODE ("WALLIS AND FUTUNA", WF, WLF, 876) +DEFINE_COUNTRY_CODE ("WESTERN SAHARA", EH, ESH, 732) +DEFINE_COUNTRY_CODE ("YEMEN", YE, YEM, 887) +DEFINE_COUNTRY_CODE ("ZAMBIA", ZM, ZMB, 894) +DEFINE_COUNTRY_CODE ("ZIMBABWE", ZW, ZWE, 716) diff --git a/REORG.TODO/locale/iso-4217.def b/REORG.TODO/locale/iso-4217.def new file mode 100644 index 0000000000..84cebb97da --- /dev/null +++ b/REORG.TODO/locale/iso-4217.def @@ -0,0 +1,172 @@ +/* + * Defines the valid international currency symbols according to ISO 4217. + * This is used in monetary.c(monetary_check). + * + * If you find something missing or wrong, please go to the URL + * http://www.gnu.org/software/libc/bugs.html and follow + * instructions there to file a bug report. + * + * !!! The list has to be sorted !!! + */ +DEFINE_INT_CURR("AED") /* United Arab Emirates Dirham */ +DEFINE_INT_CURR("AFN") /* Afghanistan Afgani */ +DEFINE_INT_CURR("ALL") /* Albanian Lek */ +DEFINE_INT_CURR("AMD") /* Armenia Dram */ +DEFINE_INT_CURR("ANG") /* Netherlands Antilles */ +DEFINE_INT_CURR("AOA") /* Angolan Kwanza */ +DEFINE_INT_CURR("ARS") /* Argentine Peso */ +DEFINE_INT_CURR("AUD") /* Australian Dollar */ +DEFINE_INT_CURR("AWG") /* Aruba Guilder */ +DEFINE_INT_CURR("AZN") /* Azerbaijan Manat */ +DEFINE_INT_CURR("BAM") /* Bosnian and Herzegovina Convertible Mark */ +DEFINE_INT_CURR("BBD") /* Barbados Dollar */ +DEFINE_INT_CURR("BDT") /* Bangladesh Taka */ +DEFINE_INT_CURR("BGN") /* Bulgarian Lev */ +DEFINE_INT_CURR("BHD") /* Bahraini Dinar */ +DEFINE_INT_CURR("BIF") /* Burundi Franc */ +DEFINE_INT_CURR("BMD") /* Burmudian Dollar */ +DEFINE_INT_CURR("BND") /* Brunei Dollar */ +DEFINE_INT_CURR("BOB") /* Bolivian Boliviano */ +DEFINE_INT_CURR("BRL") /* Brazil Real */ +DEFINE_INT_CURR("BSD") /* Bahamas Dollar */ +DEFINE_INT_CURR("BTN") /* Bhutan Ngultrum */ +DEFINE_INT_CURR("BWP") /* Botswana Pula */ +DEFINE_INT_CURR("BYR") /* Belarus Ruble */ +DEFINE_INT_CURR("BZD") /* Belize Dollar */ +DEFINE_INT_CURR("CAD") /* Canadian Dollar */ +DEFINE_INT_CURR("CDF") /* Congo Dem.Rep. Franc */ +DEFINE_INT_CURR("CHF") /* Swiss Franc (Liechtenstein) */ +DEFINE_INT_CURR("CLP") /* Chilean Peso */ +DEFINE_INT_CURR("CNY") /* China Yuan Renminbi */ +DEFINE_INT_CURR("COP") /* Colombian Peso */ +DEFINE_INT_CURR("CRC") /* Costa Rican Colon */ +DEFINE_INT_CURR("CUP") /* Cuban Peso */ +DEFINE_INT_CURR("CVE") /* Cape Verde Escudo */ +DEFINE_INT_CURR("CYP") /* Cypriot Pound */ +DEFINE_INT_CURR("CZK") /* Czech Koruna */ +DEFINE_INT_CURR("DJF") /* Djibouti Franc */ +DEFINE_INT_CURR("DKK") /* Danish Krone (Faroe Islands, Greenland) */ +DEFINE_INT_CURR("DOP") /* Dominican Republic */ +DEFINE_INT_CURR("DZD") /* Algerian Dinar */ +DEFINE_INT_CURR("EEK") /* Estonian Kroon */ +DEFINE_INT_CURR("EGP") /* Egyptian Pound */ +DEFINE_INT_CURR("ERN") /* Eritrean Nakfa */ +DEFINE_INT_CURR("ETB") /* Ethiopian Birr */ +DEFINE_INT_CURR("EUR") /* European Union Euro */ +DEFINE_INT_CURR("FJD") /* Fiji Dollar */ +DEFINE_INT_CURR("FKP") /* Falkland Islands Pound (Malvinas) */ +DEFINE_INT_CURR("GBP") /* British Pound */ +DEFINE_INT_CURR("GEL") /* Georgia Lari */ +DEFINE_INT_CURR("GHS") /* Ghana Cedi */ +DEFINE_INT_CURR("GIP") /* Gibraltar Pound */ +DEFINE_INT_CURR("GMD") /* Gambian Dalasi */ +DEFINE_INT_CURR("GNF") /* Guinea Franc */ +DEFINE_INT_CURR("GTQ") /* Guatemala Quetzal */ +DEFINE_INT_CURR("GYD") /* Guyana Dollar */ +DEFINE_INT_CURR("HKD") /* Hong Kong Dollar */ +DEFINE_INT_CURR("HNL") /* Honduras Lempira */ +DEFINE_INT_CURR("HRK") /* Croatia Kuna */ +DEFINE_INT_CURR("HTG") /* Haiti Gourde */ +DEFINE_INT_CURR("HUF") /* Hungarian Forint */ +DEFINE_INT_CURR("IDR") /* Indonesia Rupiah */ +DEFINE_INT_CURR("ILS") /* Israeli Shekel */ +DEFINE_INT_CURR("INR") /* Indian Rupee (Bhutan) */ +DEFINE_INT_CURR("IQD") /* Iraqi Dinar */ +DEFINE_INT_CURR("IRR") /* Iranian Rial */ +DEFINE_INT_CURR("ISK") /* Iceland Krona */ +DEFINE_INT_CURR("JMD") /* Jamaican Dollar */ +DEFINE_INT_CURR("JOD") /* Jordanian Dinar */ +DEFINE_INT_CURR("JPY") /* Japanese Yen */ +DEFINE_INT_CURR("KES") /* Kenyan Shilling */ +DEFINE_INT_CURR("KGS") /* Kyrgyzstan Som */ +DEFINE_INT_CURR("KHR") /* Democratic Kampuchea Riel */ +DEFINE_INT_CURR("KMF") /* Comoros Franc */ +DEFINE_INT_CURR("KPW") /* Democratic People's of Korea Won */ +DEFINE_INT_CURR("KRW") /* Republic of Korea Won */ +DEFINE_INT_CURR("KWD") /* Kuwaiti Dinar */ +DEFINE_INT_CURR("KYD") /* Cayman Islands */ +DEFINE_INT_CURR("KZT") /* Kazakhstan Tenge */ +DEFINE_INT_CURR("LAK") /* Lao People's Democratic Republic New Kip */ +DEFINE_INT_CURR("LBP") /* Lebanese Pound */ +DEFINE_INT_CURR("LKR") /* Sri Lankan Rupee */ +DEFINE_INT_CURR("LRD") /* Liberian Dollar */ +DEFINE_INT_CURR("LSL") /* Lesotho Maloti */ +DEFINE_INT_CURR("LTL") /* Lithuanian Litas */ +DEFINE_INT_CURR("LVL") /* Latvia Lat */ +DEFINE_INT_CURR("LYD") /* Libyan Arab Jamahiriya Dinar */ +DEFINE_INT_CURR("MAD") /* Moroccan Dirham */ +DEFINE_INT_CURR("MDL") /* Moldova Lei */ +DEFINE_INT_CURR("MGA") /* Madagasy Ariary */ +DEFINE_INT_CURR("MKD") /* Macedonia Denar */ +DEFINE_INT_CURR("MMK") /* Myanmar Kyat */ +DEFINE_INT_CURR("MNT") /* Mongolia Tugrik */ +DEFINE_INT_CURR("MOP") /* Macau Pataca */ +DEFINE_INT_CURR("MRO") /* Mauritania Ouguiya */ +DEFINE_INT_CURR("MTL") /* Maltese Lira */ +DEFINE_INT_CURR("MUR") /* Mauritius Rupee */ +DEFINE_INT_CURR("MVR") /* Maldives Rufiyaa */ +DEFINE_INT_CURR("MWK") /* Malawi Kwacha */ +DEFINE_INT_CURR("MXN") /* Mexican Peso */ +DEFINE_INT_CURR("MYR") /* Malaysian Ringgit */ +DEFINE_INT_CURR("MZN") /* Mozambique Metical */ +DEFINE_INT_CURR("NAD") /* Namibia Dollar */ +DEFINE_INT_CURR("NGN") /* Nigeria Naira */ +DEFINE_INT_CURR("NIO") /* Nicaragua Cordoba Oro */ +DEFINE_INT_CURR("NOK") /* Norwegian Krone */ +DEFINE_INT_CURR("NPR") /* Nepalese Rupee */ +DEFINE_INT_CURR("NZD") /* New Zealand Dollar */ +DEFINE_INT_CURR("OMR") /* Omani Rial */ +DEFINE_INT_CURR("PAB") /* Panamaniam Balboa */ +DEFINE_INT_CURR("PEN") /* Peruvian New Sol */ +DEFINE_INT_CURR("PGK") /* Papau New Guinea Kina */ +DEFINE_INT_CURR("PHP") /* Philippines Peso */ +DEFINE_INT_CURR("PKR") /* Pakistan Rupee */ +DEFINE_INT_CURR("PLN") /* Polish Zloty */ +DEFINE_INT_CURR("PYG") /* Paraguay Guarani */ +DEFINE_INT_CURR("QAR") /* Qatar Rial */ +DEFINE_INT_CURR("RON") /* Romanian New Leu */ +DEFINE_INT_CURR("RSD") /* Serbian Dinars */ +DEFINE_INT_CURR("RUB") /* Russian Ruble */ +DEFINE_INT_CURR("RWF") /* Rwanda Franc */ +DEFINE_INT_CURR("SAR") /* Saudi Arabia Riyal */ +DEFINE_INT_CURR("SBD") /* Solomon Islands Dollar */ +DEFINE_INT_CURR("SCR") /* Seychelles Rupee */ +DEFINE_INT_CURR("SDG") /* Sudanese Pound */ +DEFINE_INT_CURR("SEK") /* Swedish Krona */ +DEFINE_INT_CURR("SGD") /* Singapore Dollar */ +DEFINE_INT_CURR("SHP") /* St. Helena Pound */ +DEFINE_INT_CURR("SLL") /* Sierra Leone Leone */ +DEFINE_INT_CURR("SOS") /* Somalia Schilling */ +DEFINE_INT_CURR("SRD") /* Suriname Dollar */ +DEFINE_INT_CURR("SSP") /* South Sudanese Pound */ +DEFINE_INT_CURR("STD") /* Sao Tome and Principe Dobra */ +DEFINE_INT_CURR("SVC") /* El Salvador Colon */ +DEFINE_INT_CURR("SYP") /* Syrian Arab Republic Pound */ +DEFINE_INT_CURR("SZL") /* Swaziland Lilangeni */ +DEFINE_INT_CURR("THB") /* Thai Baht */ +DEFINE_INT_CURR("TJS") /* Tajikistani Somoni */ +DEFINE_INT_CURR("TMM") /* Turkmenistan Manet */ +DEFINE_INT_CURR("TND") /* Tunisian Dinar */ +DEFINE_INT_CURR("TOP") /* Tonga Pa'Anga */ +DEFINE_INT_CURR("TRY") /* New Turkish Lira */ +DEFINE_INT_CURR("TTD") /* Trinidad and Tobago */ +DEFINE_INT_CURR("TWD") /* Taiwan, Province of China Dollar */ +DEFINE_INT_CURR("TZS") /* United Republic of Tanzania Shilling */ +DEFINE_INT_CURR("UAH") /* Ukraine Hryvna */ +DEFINE_INT_CURR("UGX") /* Ugandan Shilling */ +DEFINE_INT_CURR("USD") /* United States Dollar */ +DEFINE_INT_CURR("UYU") /* Uruguay Peso Uruguayo */ +DEFINE_INT_CURR("UZS") /* Uzbekistan Sum */ +DEFINE_INT_CURR("VEF") /* Venezuelan Bolivar Fuerte */ +DEFINE_INT_CURR("VND") /* Viet Nam Dong */ +DEFINE_INT_CURR("VUV") /* Vanuatu Vatu */ +DEFINE_INT_CURR("WST") /* Samoa Tala */ +DEFINE_INT_CURR("XAF") /* Central African Franc (United Republic of Cameroon, Central African Republic, Chad, Congo, Gabon) */ +DEFINE_INT_CURR("XCD") /* East Caribbean Dollar (Antiqua, Dominica, Grenada, Montserrat, St. Kitts-Nevis-Anguilla, Saint Lucia, Saint Vincent and the Grenadines) */ +DEFINE_INT_CURR("XDR") /* International Monetary Fund */ +DEFINE_INT_CURR("XOF") /* West African Franc (Benin, Ivory Coast, Niger, Senegal, Togo, Upper Volta) */ +DEFINE_INT_CURR("XPF") /* French polynesia, New Caledonia, Wallis and Futuna Islands */ +DEFINE_INT_CURR("YER") /* Yemeni Rial */ +DEFINE_INT_CURR("ZAR") /* South Africa Rand (Lesotho, Namibia) */ +DEFINE_INT_CURR("ZMW") /* Zambian Kwacha */ +DEFINE_INT_CURR("ZWD") /* Zimbabwe Dollar */ diff --git a/REORG.TODO/locale/iso-639.def b/REORG.TODO/locale/iso-639.def new file mode 100644 index 0000000000..5e8c048b2a --- /dev/null +++ b/REORG.TODO/locale/iso-639.def @@ -0,0 +1,528 @@ +/* + * Defines the languages codes and abbreviations according to ISO 639-[123]. + * This is used in ld-address.c (address_finish). + * + * Format is: ("English name", 639-1-code, 639-2/T-code, 639-2/B-code) + * For some languages which have no 639-2 code the 639-3 code is used. + * If you find something missing or wrong, please go to the URL + * http://www.gnu.org/software/libc/bugs.html and follow + * instructions there to file a bug report. + */ + +DEFINE_LANGUAGE_CODE ("Abkhazian", ab, abk, abk) +DEFINE_LANGUAGE_CODE3 ("Achinese", ace, ace) +DEFINE_LANGUAGE_CODE3 ("Acoli", ach, ach) +DEFINE_LANGUAGE_CODE3 ("Adangme", ada, ada) +DEFINE_LANGUAGE_CODE3 ("Adyghe; Adygei", ady, ady) +DEFINE_LANGUAGE_CODE ("Afar", aa, aar, aar) +DEFINE_LANGUAGE_CODE3 ("Afrihili", afh, afh) +DEFINE_LANGUAGE_CODE ("Afrikaans", af, afr, afr) +DEFINE_LANGUAGE_CODE3 ("Afro-Asiatic (Other)", afa, afa) +DEFINE_LANGUAGE_CODE3 ("Ainu", ain, ain) +DEFINE_LANGUAGE_CODE ("Akan", ak, aka, aka) +DEFINE_LANGUAGE_CODE3 ("Akkadian", akk, akk) +DEFINE_LANGUAGE_CODE ("Albanian", sq, sqi, alb) +DEFINE_LANGUAGE_CODE3 ("Aleut", ale, ale) +DEFINE_LANGUAGE_CODE3 ("Algonquian languages", alg, alg) +DEFINE_LANGUAGE_CODE3 ("Southern Altai", alt, alt) +DEFINE_LANGUAGE_CODE3 ("Altaic (Other)", tut, tut) +DEFINE_LANGUAGE_CODE ("Amharic", am, amh, amh) +DEFINE_LANGUAGE_CODE3 ("Angika", anp, anp) +DEFINE_LANGUAGE_CODE3 ("Apache languages", apa, apa) +DEFINE_LANGUAGE_CODE ("Arabic", ar, ara, ara) +DEFINE_LANGUAGE_CODE ("Aragonese", an, arg, arg) +DEFINE_LANGUAGE_CODE3 ("Aramaic", arc, arc) +DEFINE_LANGUAGE_CODE3 ("Arapaho", arp, arp) +DEFINE_LANGUAGE_CODE3 ("Araucanian", arn, arn) +DEFINE_LANGUAGE_CODE3 ("Arawak", arw, arw) +DEFINE_LANGUAGE_CODE ("Armenian", hy, hye, arm) +DEFINE_LANGUAGE_CODE3 ("Aromanian; Arumanian; Macedo-Romanian", rup, rup) +DEFINE_LANGUAGE_CODE3 ("Artificial (Other)", art, art) +DEFINE_LANGUAGE_CODE ("Assamese", as, asm, asm) +DEFINE_LANGUAGE_CODE3 ("Asturian; Bable", ast, ast) +DEFINE_LANGUAGE_CODE3 ("Athapascan languages", ath, ath) +DEFINE_LANGUAGE_CODE3 ("Australian languages", aus, aus) +DEFINE_LANGUAGE_CODE3 ("Austronesian (Other)", map, map) +DEFINE_LANGUAGE_CODE ("Avaric", av, ava, ava) +DEFINE_LANGUAGE_CODE ("Avestan", ae, ave, ave) +DEFINE_LANGUAGE_CODE3 ("Awadhi", awa, awa) +DEFINE_LANGUAGE_CODE ("Aymara, Southern", ay, ayc, ayc) +DEFINE_LANGUAGE_CODE ("Aymara", ay, aym, aym) +DEFINE_LANGUAGE_CODE ("Azerbaijani", az, aze, aze) +DEFINE_LANGUAGE_CODE3 ("Balinese", ban, ban) +DEFINE_LANGUAGE_CODE3 ("Baltic (Other)", bat, bat) +DEFINE_LANGUAGE_CODE3 ("Baluchi", bal, bal) +DEFINE_LANGUAGE_CODE ("Bambara", bm, bam, bam) +DEFINE_LANGUAGE_CODE3 ("Bamileke languages", bai, bai) +DEFINE_LANGUAGE_CODE3 ("Banda", bad, bad) +DEFINE_LANGUAGE_CODE3 ("Bantu (Other)", bnt, bnt) +DEFINE_LANGUAGE_CODE3 ("Basa", bas, bas) +DEFINE_LANGUAGE_CODE ("Bashkir", ba, bak, bak) +DEFINE_LANGUAGE_CODE ("Basque", eu, eus, baq) +DEFINE_LANGUAGE_CODE3 ("Batak (Indonesia)", btk, btk) +DEFINE_LANGUAGE_CODE3 ("Beja", bej, bej) +DEFINE_LANGUAGE_CODE ("Belarusian", be, bel, bel) +DEFINE_LANGUAGE_CODE3 ("Bemba", bem, bem) +DEFINE_LANGUAGE_CODE ("Bengali", bn, ben, ben) +DEFINE_LANGUAGE_CODE3 ("Berber (Other)", ber, ber) +DEFINE_LANGUAGE_CODE3 ("Bhili", bhb, bhb) +DEFINE_LANGUAGE_CODE3 ("Bhojpuri", bho, bho) +DEFINE_LANGUAGE_CODE ("Bihari", bh, bih, bih) +DEFINE_LANGUAGE_CODE3 ("Bikol", bik, bik) +DEFINE_LANGUAGE_CODE3 ("Bini", bin, bin) +DEFINE_LANGUAGE_CODE ("Bislama", bi, bis, bis) +DEFINE_LANGUAGE_CODE3 ("Blin; Bilin", byn, byn) +DEFINE_LANGUAGE_CODE3 ("Bodo", brx, brx) +DEFINE_LANGUAGE_CODE ("Bosnian", bs, bos, bos) +DEFINE_LANGUAGE_CODE3 ("Braj", bra, bra) +DEFINE_LANGUAGE_CODE ("Breton", br, bre, bre) +DEFINE_LANGUAGE_CODE3 ("Buginese", bug, bug) +DEFINE_LANGUAGE_CODE ("Bulgarian", bg, bul, bul) +DEFINE_LANGUAGE_CODE3 ("Buriat", bua, bua) +DEFINE_LANGUAGE_CODE ("Burmese", my, mya, bur) +DEFINE_LANGUAGE_CODE3 ("Caddo", cad, cad) +DEFINE_LANGUAGE_CODE3 ("Carib", car, car) +DEFINE_LANGUAGE_CODE ("Catalan; Valencian", ca, cat, cat) +DEFINE_LANGUAGE_CODE3 ("Caucasian (Other)", cau, cau) +DEFINE_LANGUAGE_CODE3 ("Cebuano", ceb, ceb) +DEFINE_LANGUAGE_CODE3 ("Celtic (Other)", cel, cel) +DEFINE_LANGUAGE_CODE3 ("Central American Indian (Other)", cai, cai) +DEFINE_LANGUAGE_CODE3 ("Central Nahuatl", nhn, nhn) +DEFINE_LANGUAGE_CODE3 ("Central Sama", sml, sml) +DEFINE_LANGUAGE_CODE3 ("Chagatai", chg, chg) +DEFINE_LANGUAGE_CODE3 ("Chamic languages", cmc, cmc) +DEFINE_LANGUAGE_CODE ("Chamorro", ch, cha, cha) +DEFINE_LANGUAGE_CODE ("Chechen", ce, che, che) +DEFINE_LANGUAGE_CODE3 ("Cherokee", chr, chr) +DEFINE_LANGUAGE_CODE3 ("Cheyenne", chy, chy) +DEFINE_LANGUAGE_CODE2 ("Chhattisgarhi", hne) /* ISO 639-3 */ +DEFINE_LANGUAGE_CODE3 ("Chibcha", chb, chb) +DEFINE_LANGUAGE_CODE ("Chichewa; Chewa; Nyanja", ny, nya, nya) +DEFINE_LANGUAGE_CODE3 ("Chiga", cgg, cgg) +DEFINE_LANGUAGE_CODE ("Chinese", zh, zho, chi) +DEFINE_LANGUAGE_CODE3 ("Chinook jargon", chn, chn) +DEFINE_LANGUAGE_CODE3 ("Chipewyan", chp, chp) +DEFINE_LANGUAGE_CODE3 ("Choctaw", cho, cho) +DEFINE_LANGUAGE_CODE ("Church Slavic; Old Slavonic; Church Slavonic; Old Bulgarian; Old Church Slavonic", cu, chu, chu) +DEFINE_LANGUAGE_CODE3 ("Chuukese", chk, chk) +DEFINE_LANGUAGE_CODE ("Chuvash", cv, chv, chv) +DEFINE_LANGUAGE_CODE3 ("Classical Newari; Old Newari; Classical Nepal Bhasa", nwc, nwc) +DEFINE_LANGUAGE_CODE3 ("Coptic", cop, cop) +DEFINE_LANGUAGE_CODE ("Cornish", kw, cor, cor) +DEFINE_LANGUAGE_CODE ("Corsican", co, cos, cos) +DEFINE_LANGUAGE_CODE3 ("Creek", mus, mus) +DEFINE_LANGUAGE_CODE ("Cree", cr, cre, cre) +DEFINE_LANGUAGE_CODE3 ("Creoles and pidgins (Other)", crp, crp) +DEFINE_LANGUAGE_CODE3 ("Creoles and pidgins, English based (Other)", cpe, cpe) +DEFINE_LANGUAGE_CODE3 ("Creoles and pidgins, French-based (Other)", cpf, cpf) +DEFINE_LANGUAGE_CODE3 ("Creoles and pidgins, Portuguese-based (Other)", cpp, cpp) +DEFINE_LANGUAGE_CODE3 ("Crimean Tatar; Crimean Turkish", crh, crh) +DEFINE_LANGUAGE_CODE ("Croatian", hr, hrv, scr) +DEFINE_LANGUAGE_CODE3 ("Cushitic (Other)", cus, cus) +DEFINE_LANGUAGE_CODE ("Czech", cs, ces, cze) +DEFINE_LANGUAGE_CODE3 ("Dakota", dak, dak) +DEFINE_LANGUAGE_CODE ("Danish", da, dan, dan) +DEFINE_LANGUAGE_CODE3 ("Dargwa", dar, dar) +DEFINE_LANGUAGE_CODE3 ("Dayak", day, day) +DEFINE_LANGUAGE_CODE3 ("Delaware", del, del) +DEFINE_LANGUAGE_CODE3 ("Dinka", din, din) +DEFINE_LANGUAGE_CODE ("Divehi", dv, div, div) +DEFINE_LANGUAGE_CODE3 ("Dogrib", dgr, dgr) +DEFINE_LANGUAGE_CODE3 ("Dogri", doi, doi) +DEFINE_LANGUAGE_CODE3 ("Dravidian (Other)", dra, dra) +DEFINE_LANGUAGE_CODE3 ("Duala", dua, dua) +DEFINE_LANGUAGE_CODE3 ("Dutch, Middle (ca.1050-1350)", dum, dum) +DEFINE_LANGUAGE_CODE ("Dutch; Flemish", nl, nld, dut) +DEFINE_LANGUAGE_CODE3 ("Dyula", dyu, dyu) +DEFINE_LANGUAGE_CODE ("Dzongkha", dz, dzo, dzo) +DEFINE_LANGUAGE_CODE3 ("Efik", efi, efi) +DEFINE_LANGUAGE_CODE3 ("Egyptian (Ancient)", egy, egy) +DEFINE_LANGUAGE_CODE3 ("Ekajuk", eka, eka) +DEFINE_LANGUAGE_CODE3 ("Elamite", elx, elx) +DEFINE_LANGUAGE_CODE3 ("English, Middle (1100-1500)", enm, enm) +DEFINE_LANGUAGE_CODE3 ("English, Old (ca.450-1100)", ang, ang) +DEFINE_LANGUAGE_CODE ("English", en, eng, eng) +DEFINE_LANGUAGE_CODE3 ("Erzya", myv, myv) +DEFINE_LANGUAGE_CODE ("Esperanto", eo, epo, epo) +DEFINE_LANGUAGE_CODE ("Estonian", et, est, est) +DEFINE_LANGUAGE_CODE ("Ewe", ee, ewe, ewe) +DEFINE_LANGUAGE_CODE3 ("Ewondo", ewo, ewo) +DEFINE_LANGUAGE_CODE3 ("Fang", fan, fan) +DEFINE_LANGUAGE_CODE3 ("Fanti", fat, fat) +DEFINE_LANGUAGE_CODE ("Faroese", fo, fao, fao) +DEFINE_LANGUAGE_CODE ("Fijian", fj, fij, fij) +DEFINE_LANGUAGE_CODE3 ("Filipino; Pilipino", fil, fil) +DEFINE_LANGUAGE_CODE ("Finnish", fi, fin, fin) +DEFINE_LANGUAGE_CODE3 ("Finno-Ugrian (Other)", fiu, fiu) +DEFINE_LANGUAGE_CODE3 ("Fon", fon, fon) +DEFINE_LANGUAGE_CODE3 ("French, Middle (ca.1400-1800)", frm, frm) +DEFINE_LANGUAGE_CODE3 ("French, Old (842-ca.1400)", fro, fro) +DEFINE_LANGUAGE_CODE ("French", fr, fra, fre) +DEFINE_LANGUAGE_CODE3 ("Northern Frisian", frr, frr) +DEFINE_LANGUAGE_CODE3 ("Eastern Frisian", frs, frs) +DEFINE_LANGUAGE_CODE ("Western Frisian", fy, fry, fry) +DEFINE_LANGUAGE_CODE3 ("Friulian", fur, fur) +DEFINE_LANGUAGE_CODE ("Fulah", ff, ful, ful) +DEFINE_LANGUAGE_CODE ("Gaelic; Scottish Gaelic", gd, gla, gla) +DEFINE_LANGUAGE_CODE ("Galician", gl, glg, glg) +DEFINE_LANGUAGE_CODE3 ("Gan Chinese", gan, gan) +DEFINE_LANGUAGE_CODE ("Ganda", lg, lug, lug) +DEFINE_LANGUAGE_CODE3 ("Gayo", gay, gay) +DEFINE_LANGUAGE_CODE3 ("Ga", gaa, gaa) +DEFINE_LANGUAGE_CODE3 ("Gbaya", gba, gba) +DEFINE_LANGUAGE_CODE3 ("Geez", gez, gez) +DEFINE_LANGUAGE_CODE ("Georgian", ka, kat, geo) +DEFINE_LANGUAGE_CODE3 ("German, Middle High (ca.1050-1500)", gmh, gmh) +DEFINE_LANGUAGE_CODE3 ("German, Old High (ca.750-1050)", goh, goh) +DEFINE_LANGUAGE_CODE3 ("Germanic (Other)", gem, gem) +DEFINE_LANGUAGE_CODE ("German", de, deu, ger) +DEFINE_LANGUAGE_CODE3 ("Swiss German; Alemannic", gsw, gsw) +DEFINE_LANGUAGE_CODE3 ("Gilbertese", gil, gil) +DEFINE_LANGUAGE_CODE3 ("Gondi", gon, gon) +DEFINE_LANGUAGE_CODE3 ("Gorontalo", gor, gor) +DEFINE_LANGUAGE_CODE3 ("Gothic", got, got) +DEFINE_LANGUAGE_CODE3 ("Grebo", grb, grb) +DEFINE_LANGUAGE_CODE3 ("Greek, Ancient (to 1453)", grc, grc) +DEFINE_LANGUAGE_CODE ("Greek, Modern (1453-)", el, ell, gre) +DEFINE_LANGUAGE_CODE ("Guarani", gn, grn, grn) +DEFINE_LANGUAGE_CODE ("Gujarati", gu, guj, guj) +DEFINE_LANGUAGE_CODE3 ("Gwich´in", gwi, gwi) +DEFINE_LANGUAGE_CODE3 ("Haida", hai, hai) +DEFINE_LANGUAGE_CODE ("Haitian; Haitian Creole", ht, hat, hat) +DEFINE_LANGUAGE_CODE3 ("Hakka Chinese", hak, hak) +DEFINE_LANGUAGE_CODE ("Hausa", ha, hau, hau) +DEFINE_LANGUAGE_CODE3 ("Hawaiian", haw, haw) +DEFINE_LANGUAGE_CODE ("Hebrew", he, heb, heb) +DEFINE_LANGUAGE_CODE ("Herero", hz, her, her) +DEFINE_LANGUAGE_CODE3 ("Hiligaynon", hil, hil) +DEFINE_LANGUAGE_CODE3 ("Himachali", him, him) +DEFINE_LANGUAGE_CODE ("Hindi", hi, hin, hin) +DEFINE_LANGUAGE_CODE ("Hiri Motu", ho, hmo, hmo) +DEFINE_LANGUAGE_CODE3 ("Hittite", hit, hit) +DEFINE_LANGUAGE_CODE3 ("Hmong", hmn, hmn) +DEFINE_LANGUAGE_CODE3 ("Huizhou Chinese", czh, czh) +DEFINE_LANGUAGE_CODE ("Hungarian", hu, hun, hun) +DEFINE_LANGUAGE_CODE3 ("Hupa", hup, hup) +DEFINE_LANGUAGE_CODE3 ("Iban", iba, iba) +DEFINE_LANGUAGE_CODE ("Icelandic", is, isl, ice) +DEFINE_LANGUAGE_CODE ("Ido", io, ido, ido) +DEFINE_LANGUAGE_CODE ("Igbo", ig, ibo, ibo) +DEFINE_LANGUAGE_CODE3 ("Ijo", ijo, ijo) +DEFINE_LANGUAGE_CODE3 ("Iloko", ilo, ilo) +DEFINE_LANGUAGE_CODE3 ("Inari Sami", smn, smn) +DEFINE_LANGUAGE_CODE3 ("Indic (Other)", inc, inc) +DEFINE_LANGUAGE_CODE3 ("Indo-European (Other)", ine, ine) +DEFINE_LANGUAGE_CODE ("Indonesian", id, ind, ind) +DEFINE_LANGUAGE_CODE3 ("Ingush", inh, inh) +DEFINE_LANGUAGE_CODE ("Interlingua (International Auxiliary Language Association)", ia, ina, ina) +DEFINE_LANGUAGE_CODE ("Interlingue", ie, ile, ile) +DEFINE_LANGUAGE_CODE ("Inuktitut", iu, iku, iku) +DEFINE_LANGUAGE_CODE ("Inupiaq", ik, ipk, ipk) +DEFINE_LANGUAGE_CODE3 ("Iranian (Other)", ira, ira) +DEFINE_LANGUAGE_CODE3 ("Irish, Middle (900-1200)", mga, mga) +DEFINE_LANGUAGE_CODE3 ("Irish, Old (to 900)", sga, sga) +DEFINE_LANGUAGE_CODE ("Irish", ga, gle, gle) +DEFINE_LANGUAGE_CODE3 ("Iroquoian languages", iro, iro) +DEFINE_LANGUAGE_CODE ("Italian", it, ita, ita) +DEFINE_LANGUAGE_CODE ("Japanese", ja, jpn, jpn) +DEFINE_LANGUAGE_CODE ("Javanese", jv, jav, jav) +DEFINE_LANGUAGE_CODE3 ("Jinyu Chinese", cjy, cjy) +DEFINE_LANGUAGE_CODE3 ("Judeo-Arabic", jrb, jrb) +DEFINE_LANGUAGE_CODE3 ("Judeo-Persian", jpr, jpr) +DEFINE_LANGUAGE_CODE3 ("Kabardian", kbd, kbd) +DEFINE_LANGUAGE_CODE3 ("Kabyle", kab, kab) +DEFINE_LANGUAGE_CODE3 ("Kachin", kac, kac) +DEFINE_LANGUAGE_CODE ("Kalaallisut; Greenlandic", kl, kal, kal) +DEFINE_LANGUAGE_CODE3 ("Kalmyk", xal, xal) +DEFINE_LANGUAGE_CODE3 ("Kamba", kam, kam) +DEFINE_LANGUAGE_CODE ("Kannada", kn, kan, kan) +DEFINE_LANGUAGE_CODE ("Kanuri", kr, kau, kau) +DEFINE_LANGUAGE_CODE3 ("Kara-Kalpak", kaa, kaa) +DEFINE_LANGUAGE_CODE3 ("Karachay-Balkar", krc, krc) +DEFINE_LANGUAGE_CODE3 ("Karelian", krl, krl) +DEFINE_LANGUAGE_CODE3 ("Karen", kar, kar) +DEFINE_LANGUAGE_CODE ("Kashmiri", ks, kas, kas) +DEFINE_LANGUAGE_CODE3 ("Kashubian", csb, csb) +DEFINE_LANGUAGE_CODE3 ("Kawi", kaw, kaw) +DEFINE_LANGUAGE_CODE ("Kazakh", kk, kaz, kaz) +DEFINE_LANGUAGE_CODE3 ("Khasi", kha, kha) +DEFINE_LANGUAGE_CODE ("Khmer", km, khm, khm) +DEFINE_LANGUAGE_CODE3 ("Khoisan (Other)", khi, khi) +DEFINE_LANGUAGE_CODE3 ("Khotanese", kho, kho) +DEFINE_LANGUAGE_CODE ("Kikuyu; Gikuyu", ki, kik, kik) +DEFINE_LANGUAGE_CODE3 ("Kimbundu", kmb, kmb) +DEFINE_LANGUAGE_CODE ("Kinyarwanda", rw, kin, kin) +DEFINE_LANGUAGE_CODE ("Kirghiz", ky, kir, kir) +DEFINE_LANGUAGE_CODE3 ("Klingon; tlhIngan-Hol", tlh, tlh) +DEFINE_LANGUAGE_CODE ("Komi", kv, kom, kom) +DEFINE_LANGUAGE_CODE ("Kongo", kg, kon, kon) +DEFINE_LANGUAGE_CODE3 ("Konkani", kok, kok) +DEFINE_LANGUAGE_CODE ("Korean", ko, kor, kor) +DEFINE_LANGUAGE_CODE3 ("Kosraean", kos, kos) +DEFINE_LANGUAGE_CODE3 ("Kpelle", kpe, kpe) +DEFINE_LANGUAGE_CODE3 ("Kru", kro, kro) +DEFINE_LANGUAGE_CODE ("Kuanyama; Kwanyama", kj, kua, kua) +DEFINE_LANGUAGE_CODE3 ("Kumyk", kum, kum) +DEFINE_LANGUAGE_CODE ("Kurdish", ku, kur, kur) +DEFINE_LANGUAGE_CODE3 ("Kurukh", kru, kru) +DEFINE_LANGUAGE_CODE3 ("Kutenai", kut, kut) +DEFINE_LANGUAGE_CODE3 ("Ladino", lad, lad) +DEFINE_LANGUAGE_CODE3 ("Lahnda", lah, lah) +DEFINE_LANGUAGE_CODE3 ("Lamba", lam, lam) +DEFINE_LANGUAGE_CODE ("Lao", lo, lao, lao) +DEFINE_LANGUAGE_CODE ("Latin", la, lat, lat) +DEFINE_LANGUAGE_CODE ("Latvian", lv, lav, lav) +DEFINE_LANGUAGE_CODE3 ("Lezghian", lez, lez) +DEFINE_LANGUAGE_CODE3 ("Ligurian", lij, lij) +DEFINE_LANGUAGE_CODE ("Limburgan; Limburger; Limburgish", li, lim, lim) +DEFINE_LANGUAGE_CODE ("Lingala", ln, lin, lin) +DEFINE_LANGUAGE_CODE3 ("Literary Chinese", lzh, lzh) +DEFINE_LANGUAGE_CODE ("Lithuanian", lt, lit, lit) +DEFINE_LANGUAGE_CODE3 ("Lojban", jbo, jbo) +DEFINE_LANGUAGE_CODE3 ("Low German; Low Saxon; German, Low; Saxon, Low", nds, nds) +DEFINE_LANGUAGE_CODE3 ("Lower Sorbian", dsb, dsb) +DEFINE_LANGUAGE_CODE3 ("Lozi", loz, loz) +DEFINE_LANGUAGE_CODE ("Luba-Katanga", lu, lub, lub) +DEFINE_LANGUAGE_CODE3 ("Luba-Lulua", lua, lua) +DEFINE_LANGUAGE_CODE3 ("Luiseno", lui, lui) +DEFINE_LANGUAGE_CODE3 ("Lule Sami", smj, smj) +DEFINE_LANGUAGE_CODE3 ("Lunda", lun, lun) +DEFINE_LANGUAGE_CODE3 ("Luo (Kenya and Tanzania)", luo, luo) +DEFINE_LANGUAGE_CODE3 ("Lushai", lus, lus) +DEFINE_LANGUAGE_CODE ("Luxembourgish; Letzeburgesch", lb, ltz, ltz) +DEFINE_LANGUAGE_CODE ("Macedonian", mk, mkd, mac) +DEFINE_LANGUAGE_CODE3 ("Madurese", mad, mad) +DEFINE_LANGUAGE_CODE3 ("Magahi", mag, mag) +DEFINE_LANGUAGE_CODE3 ("Maithili", mai, mai) +DEFINE_LANGUAGE_CODE3 ("Makasar", mak, mak) +DEFINE_LANGUAGE_CODE ("Malagasy", mg, mlg, mlg) +DEFINE_LANGUAGE_CODE ("Malayalam", ml, mal, mal) +DEFINE_LANGUAGE_CODE ("Malay", ms, msa, may) +DEFINE_LANGUAGE_CODE ("Maltese", mt, mlt, mlt) +DEFINE_LANGUAGE_CODE3 ("Manchu", mnc, mnc) +DEFINE_LANGUAGE_CODE3 ("Mandarin Chinese", cmn, cmn) +DEFINE_LANGUAGE_CODE3 ("Mandar", mdr, mdr) +DEFINE_LANGUAGE_CODE3 ("Mandingo", man, man) +DEFINE_LANGUAGE_CODE3 ("Manipuri", mni, mni) +DEFINE_LANGUAGE_CODE3 ("Manobo languages", mno, mno) +DEFINE_LANGUAGE_CODE ("Manx", gv, glv, glv) +DEFINE_LANGUAGE_CODE ("Maori", mi, mri, mao) +DEFINE_LANGUAGE_CODE ("Marathi", mr, mar, mar) +DEFINE_LANGUAGE_CODE3 ("Mari", chm, chm) +DEFINE_LANGUAGE_CODE ("Marshallese", mh, mah, mah) +DEFINE_LANGUAGE_CODE3 ("Marwari", mwr, mwr) +DEFINE_LANGUAGE_CODE3 ("Masai", mas, mas) +DEFINE_LANGUAGE_CODE3 ("Mayan languages", myn, myn) +DEFINE_LANGUAGE_CODE3 ("Meadow Mari", mhr, mhr) +DEFINE_LANGUAGE_CODE3 ("Mende", men, men) +DEFINE_LANGUAGE_CODE3 ("Mi'kmaq; Micmac", mic, mic) +DEFINE_LANGUAGE_CODE3 ("Minangkabau", min, min) +DEFINE_LANGUAGE_CODE3 ("Min Bei Chinese", mnp, mnp) +DEFINE_LANGUAGE_CODE3 ("Min Dong Chinese", cdo, cdo) +DEFINE_LANGUAGE_CODE3 ("Min Nan Chinese", nan, nan) +DEFINE_LANGUAGE_CODE3 ("Min Zhong Chinese", czo, czo) +DEFINE_LANGUAGE_CODE3 ("Mirandese", mwl, mwl) +DEFINE_LANGUAGE_CODE3 ("Miscellaneous languages", mis, mis) +DEFINE_LANGUAGE_CODE3 ("Mohawk", moh, moh) +DEFINE_LANGUAGE_CODE3 ("Moksha", mdf, mdf) +DEFINE_LANGUAGE_CODE ("Moldavian", mo, mol, mol) +DEFINE_LANGUAGE_CODE3 ("Mon-Khmer (Other)", mkh, mkh) +DEFINE_LANGUAGE_CODE ("Mongolian", mn, mon, mon) +DEFINE_LANGUAGE_CODE3 ("Mongo", lol, lol) +DEFINE_LANGUAGE_CODE3 ("Moroccan Arabic", ary, ary) +DEFINE_LANGUAGE_CODE3 ("Mossi", mos, mos) +DEFINE_LANGUAGE_CODE3 ("Multiple languages", mul, mul) +DEFINE_LANGUAGE_CODE3 ("Munda languages", mun, mun) +DEFINE_LANGUAGE_CODE3 ("Nahuatl", nah, nah) +DEFINE_LANGUAGE_CODE ("Nauru", na, nau, nau) +DEFINE_LANGUAGE_CODE ("Navajo; Navaho", nv, nav, nav) +DEFINE_LANGUAGE_CODE ("Ndebele, North; North Ndebele", nd, nde, nde) +DEFINE_LANGUAGE_CODE ("Ndebele, South; South Ndebele", nr, nbl, nbl) +DEFINE_LANGUAGE_CODE ("Ndonga", ng, ndo, ndo) +DEFINE_LANGUAGE_CODE3 ("Neapolitan", nap, nap) +DEFINE_LANGUAGE_CODE3 ("Nepal Bhasa; Newari", new, new) +DEFINE_LANGUAGE_CODE ("Nepali", ne, nep, nep) +DEFINE_LANGUAGE_CODE3 ("Nias", nia, nia) +DEFINE_LANGUAGE_CODE3 ("Niger-Kordofanian (Other)", nic, nic) +DEFINE_LANGUAGE_CODE3 ("Nilo-Saharan (Other)", ssa, ssa) +DEFINE_LANGUAGE_CODE3 ("Niuean", niu, niu) +DEFINE_LANGUAGE_CODE3 ("N'Ko", nqo, nqo) +DEFINE_LANGUAGE_CODE3 ("Nogai", nog, nog) +DEFINE_LANGUAGE_CODE3 ("Norse, Old", non, non) +DEFINE_LANGUAGE_CODE3 ("North American Indian", nai, nai) +DEFINE_LANGUAGE_CODE ("Northern Sami", se, sme, sme) +DEFINE_LANGUAGE_CODE3 ("Northern Sotho; Pedi; Sepedi", nso, nso) +DEFINE_LANGUAGE_CODE ("Norwegian Bokmål", nb, nob, nob) +DEFINE_LANGUAGE_CODE ("Norwegian Nynorsk", nn, nno, nno) +DEFINE_LANGUAGE_CODE ("Norwegian", no, nor, nor) +DEFINE_LANGUAGE_CODE3 ("Nubian languages", nub, nub) +DEFINE_LANGUAGE_CODE3 ("Nyamwezi", nym, nym) +DEFINE_LANGUAGE_CODE3 ("Nyankole", nyn, nyn) +DEFINE_LANGUAGE_CODE3 ("Nyoro", nyo, nyo) +DEFINE_LANGUAGE_CODE3 ("Nzima", nzi, nzi) +DEFINE_LANGUAGE_CODE ("Occitan (post 1500); Provençal", oc, oci, oci) +DEFINE_LANGUAGE_CODE ("Ojibwa", oj, oji, oji) +DEFINE_LANGUAGE_CODE ("Odia", or, ori, ori) +DEFINE_LANGUAGE_CODE ("Oromo", om, orm, orm) +DEFINE_LANGUAGE_CODE3 ("Osage", osa, osa) +DEFINE_LANGUAGE_CODE ("Ossetian; Ossetic", os, oss, oss) +DEFINE_LANGUAGE_CODE3 ("Otomian languages", oto, oto) +DEFINE_LANGUAGE_CODE3 ("Pahlavi", pal, pal) +DEFINE_LANGUAGE_CODE3 ("Palauan", pau, pau) +DEFINE_LANGUAGE_CODE ("Pali", pi, pli, pli) +DEFINE_LANGUAGE_CODE3 ("Pampanga", pam, pam) +DEFINE_LANGUAGE_CODE3 ("Pangasinan", pag, pag) +DEFINE_LANGUAGE_CODE ("Panjabi; Punjabi", pa, pan, pan) +DEFINE_LANGUAGE_CODE3 ("Papiamento", pap, pap) +DEFINE_LANGUAGE_CODE3 ("Papuan (Other)", paa, paa) +DEFINE_LANGUAGE_CODE3 ("Persian, Old (ca.600-400 B.C.)", peo, peo) +DEFINE_LANGUAGE_CODE ("Persian", fa, fas, per) +DEFINE_LANGUAGE_CODE3 ("Philippine (Other)", phi, phi) +DEFINE_LANGUAGE_CODE3 ("Phoenician", phn, phn) +DEFINE_LANGUAGE_CODE3 ("Pohnpeian", pon, pon) +DEFINE_LANGUAGE_CODE ("Polish", pl, pol, pol) +DEFINE_LANGUAGE_CODE ("Portuguese", pt, por, por) +DEFINE_LANGUAGE_CODE3 ("Prakrit languages", pra, pra) +DEFINE_LANGUAGE_CODE3 ("Provençal, Old (to 1500)", pro, pro) +DEFINE_LANGUAGE_CODE3 ("Pu-Xian Chinese", cpx, cpx) +DEFINE_LANGUAGE_CODE ("Pushto", ps, pus, pus) +DEFINE_LANGUAGE_CODE ("Quechua", qu, que, que) +DEFINE_LANGUAGE_CODE3 ("Quechua, Southern", quz, quz) +DEFINE_LANGUAGE_CODE ("Raeto-Romance", rm, roh, roh) +DEFINE_LANGUAGE_CODE3 ("Rajasthani", raj, raj) +DEFINE_LANGUAGE_CODE3 ("Rapanui", rap, rap) +DEFINE_LANGUAGE_CODE3 ("Rarotongan", rar, rar) +DEFINE_LANGUAGE_CODE3 ("Romance (Other)", roa, roa) +DEFINE_LANGUAGE_CODE ("Romanian", ro, ron, rum) +DEFINE_LANGUAGE_CODE3 ("Romany", rom, rom) +DEFINE_LANGUAGE_CODE ("Rundi", rn, run, run) +DEFINE_LANGUAGE_CODE ("Russian", ru, rus, rus) +DEFINE_LANGUAGE_CODE3 ("Samogitian", sgs, sgs) +DEFINE_LANGUAGE_CODE3 ("Salishan languages", sal, sal) +DEFINE_LANGUAGE_CODE3 ("Samaritan Aramaic", sam, sam) +DEFINE_LANGUAGE_CODE3 ("Sami languages (Other)", smi, smi) +DEFINE_LANGUAGE_CODE ("Samoan", sm, smo, smo) +DEFINE_LANGUAGE_CODE3 ("Sandawe", sad, sad) +DEFINE_LANGUAGE_CODE ("Sango", sg, sag, sag) +DEFINE_LANGUAGE_CODE ("Sanskrit", sa, san, san) +DEFINE_LANGUAGE_CODE3 ("Santali", sat, sat) +DEFINE_LANGUAGE_CODE ("Sardinian", sc, srd, srd) +DEFINE_LANGUAGE_CODE3 ("Sasak", sas, sas) +DEFINE_LANGUAGE_CODE3 ("Scots", sco, sco) +DEFINE_LANGUAGE_CODE3 ("Selkup", sel, sel) +DEFINE_LANGUAGE_CODE3 ("Semitic (Other)", sem, sem) +DEFINE_LANGUAGE_CODE ("Serbian", sr, srp, scc) +DEFINE_LANGUAGE_CODE3 ("Serer", srr, srr) +DEFINE_LANGUAGE_CODE3 ("Shan", shn, shn) +DEFINE_LANGUAGE_CODE ("Shona", sn, sna, sna) +DEFINE_LANGUAGE_CODE3 ("Shuswap", shs, shs) +DEFINE_LANGUAGE_CODE ("Sichuan Yi", ii, iii, iii) +DEFINE_LANGUAGE_CODE3 ("Sicilian", scn, scn) +DEFINE_LANGUAGE_CODE3 ("Sidamo", sid, sid) +DEFINE_LANGUAGE_CODE3 ("Sign Languages", sgn, sgn) +DEFINE_LANGUAGE_CODE3 ("Siksika", bla, bla) +DEFINE_LANGUAGE_CODE3 ("Silesian", szl, szl) +DEFINE_LANGUAGE_CODE ("Sindhi", sd, snd, snd) +DEFINE_LANGUAGE_CODE ("Sinhala; Sinhalese", si, sin, sin) +DEFINE_LANGUAGE_CODE3 ("Sino-Tibetan (Other)", sit, sit) +DEFINE_LANGUAGE_CODE3 ("Siouan languages", sio, sio) +DEFINE_LANGUAGE_CODE3 ("Skolt Sami", sms, sms) +DEFINE_LANGUAGE_CODE3 ("Slave (Athapascan)", den, den) +DEFINE_LANGUAGE_CODE3 ("Slavic (Other)", sla, sla) +DEFINE_LANGUAGE_CODE ("Slovak", sk, slk, slo) +DEFINE_LANGUAGE_CODE ("Slovenian", sl, slv, slv) +DEFINE_LANGUAGE_CODE3 ("Sogdian", sog, sog) +DEFINE_LANGUAGE_CODE ("Somali", so, som, som) +DEFINE_LANGUAGE_CODE3 ("Songhai", son, son) +DEFINE_LANGUAGE_CODE3 ("Soninke", snk, snk) +DEFINE_LANGUAGE_CODE3 ("Sorani", ckb, ckb) +DEFINE_LANGUAGE_CODE3 ("Sorbian languages", wen, wen) +DEFINE_LANGUAGE_CODE ("Sotho, Southern", st, sot, sot) +DEFINE_LANGUAGE_CODE3 ("South American Indian (Other)", sai, sai) +DEFINE_LANGUAGE_CODE3 ("Southern Sami", sma, sma) +DEFINE_LANGUAGE_CODE ("Spanish; Castilian", es, spa, spa) +DEFINE_LANGUAGE_CODE3 ("Sranan Tongo", srn, srn) +DEFINE_LANGUAGE_CODE3 ("Sukuma", suk, suk) +DEFINE_LANGUAGE_CODE3 ("Sumerian", sux, sux) +DEFINE_LANGUAGE_CODE ("Sundanese", su, sun, sun) +DEFINE_LANGUAGE_CODE3 ("Susu", sus, sus) +DEFINE_LANGUAGE_CODE ("Swahili", sw, swa, swa) +DEFINE_LANGUAGE_CODE ("Swati", ss, ssw, ssw) +DEFINE_LANGUAGE_CODE ("Swedish", sv, swe, swe) +DEFINE_LANGUAGE_CODE3 ("Classical Syriac", syc, syc) +DEFINE_LANGUAGE_CODE3 ("Syriac", syr, syr) +DEFINE_LANGUAGE_CODE ("Tagalog", tl, tgl, tgl) +DEFINE_LANGUAGE_CODE ("Tahitian", ty, tah, tah) +DEFINE_LANGUAGE_CODE3 ("Tai (Other)", tai, tai) +DEFINE_LANGUAGE_CODE ("Tajik", tg, tgk, tgk) +DEFINE_LANGUAGE_CODE3 ("Talossan", tzl, tzl) +DEFINE_LANGUAGE_CODE3 ("Tamashek", tmh, tmh) +DEFINE_LANGUAGE_CODE ("Tamil", ta, tam, tam) +DEFINE_LANGUAGE_CODE ("Tatar", tt, tat, tat) +DEFINE_LANGUAGE_CODE ("Telugu", te, tel, tel) +DEFINE_LANGUAGE_CODE3 ("Tereno", ter, ter) +DEFINE_LANGUAGE_CODE3 ("Tetum", tet, tet) +DEFINE_LANGUAGE_CODE ("Thai", th, tha, tha) +DEFINE_LANGUAGE_CODE3 ("Tharu, Chitwani", the, the) +DEFINE_LANGUAGE_CODE ("Tibetan", bo, bod, tib) +DEFINE_LANGUAGE_CODE3 ("Tigre", tig, tig) +DEFINE_LANGUAGE_CODE ("Tigrinya", ti, tir, tir) +DEFINE_LANGUAGE_CODE3 ("Timne", tem, tem) +DEFINE_LANGUAGE_CODE3 ("Tiv", tiv, tiv) +DEFINE_LANGUAGE_CODE3 ("Tlingit", tli, tli) +DEFINE_LANGUAGE_CODE3 ("Tok Pisin", tpi, tpi) +DEFINE_LANGUAGE_CODE3 ("Tokelau", tkl, tkl) +DEFINE_LANGUAGE_CODE3 ("Tonga (Nyasa)", tog, tog) +DEFINE_LANGUAGE_CODE ("Tonga (Tonga Islands)", to, ton, ton) +DEFINE_LANGUAGE_CODE3 ("Tsimshian", tsi, tsi) +DEFINE_LANGUAGE_CODE ("Tsonga", ts, tso, tso) +DEFINE_LANGUAGE_CODE ("Tswana", tn, tsn, tsn) +DEFINE_LANGUAGE_CODE3 ("Tulu", tcy, tcy) +DEFINE_LANGUAGE_CODE3 ("Tumbuka", tum, tum) +DEFINE_LANGUAGE_CODE3 ("Tupi languages", tup, tup) +DEFINE_LANGUAGE_CODE3 ("Turkish, Ottoman (1500-1928)", ota, ota) +DEFINE_LANGUAGE_CODE ("Turkish", tr, tur, tur) +DEFINE_LANGUAGE_CODE ("Turkmen", tk, tuk, tuk) +DEFINE_LANGUAGE_CODE3 ("Tuvalu", tvl, tvl) +DEFINE_LANGUAGE_CODE3 ("Tuvinian", tyv, tyv) +DEFINE_LANGUAGE_CODE ("Twi", tw, twi, twi) +DEFINE_LANGUAGE_CODE3 ("Udmurt", udm, udm) +DEFINE_LANGUAGE_CODE3 ("Ugaritic", uga, uga) +DEFINE_LANGUAGE_CODE ("Uighur; Uyghur", ug, uig, uig) +DEFINE_LANGUAGE_CODE ("Ukrainian", uk, ukr, ukr) +DEFINE_LANGUAGE_CODE3 ("Umbundu", umb, umb) +DEFINE_LANGUAGE_CODE3 ("Unami Delaware", unm, unm) +DEFINE_LANGUAGE_CODE3 ("Undetermined", und, und) +DEFINE_LANGUAGE_CODE3 ("Upper Sorbian", hsb, hsb) +DEFINE_LANGUAGE_CODE ("Urdu", ur, urd, urd) +DEFINE_LANGUAGE_CODE ("Uzbek", uz, uzb, uzb) +DEFINE_LANGUAGE_CODE3 ("Vai", vai, vai) +DEFINE_LANGUAGE_CODE ("Venda", ve, ven, ven) +DEFINE_LANGUAGE_CODE ("Vietnamese", vi, vie, vie) +DEFINE_LANGUAGE_CODE ("Volapük", vo, vol, vol) +DEFINE_LANGUAGE_CODE3 ("Votic", vot, vot) +DEFINE_LANGUAGE_CODE3 ("Wakashan languages", wak, wak) +DEFINE_LANGUAGE_CODE3 ("Walser", wae, wae) +DEFINE_LANGUAGE_CODE3 ("Walaita", wal, wal) +DEFINE_LANGUAGE_CODE ("Walloon", wa, wln, wln) +DEFINE_LANGUAGE_CODE3 ("Waray", war, war) +DEFINE_LANGUAGE_CODE3 ("Washo", was, was) +DEFINE_LANGUAGE_CODE ("Welsh", cy, cym, wel) +DEFINE_LANGUAGE_CODE ("Wolof", wo, wol, wol) +DEFINE_LANGUAGE_CODE3 ("Wu Chinese", wuu, wuu) +DEFINE_LANGUAGE_CODE ("Xhosa", xh, xho, xho) +DEFINE_LANGUAGE_CODE3 ("Xiang Chinese", hsn, hsn) +DEFINE_LANGUAGE_CODE3 ("Yakut", sah, sah) +DEFINE_LANGUAGE_CODE3 ("Yao", yao, yao) +DEFINE_LANGUAGE_CODE3 ("Yapese", yap, yap) +DEFINE_LANGUAGE_CODE ("Yiddish", yi, yid, yid) +DEFINE_LANGUAGE_CODE ("Yoruba", yo, yor, yor) +DEFINE_LANGUAGE_CODE3 ("Yue Chinese", yue, yue) +DEFINE_LANGUAGE_CODE3 ("Yupik languages", ypk, ypk) +DEFINE_LANGUAGE_CODE3 ("Zande", znd, znd) +DEFINE_LANGUAGE_CODE3 ("Zapotec", zap, zap) +DEFINE_LANGUAGE_CODE3 ("Zenaga", zen, zen) +DEFINE_LANGUAGE_CODE ("Zhuang; Chuang", za, zha, zha) +DEFINE_LANGUAGE_CODE ("Zulu", zu, zul, zul) +DEFINE_LANGUAGE_CODE3 ("Zuni", zun, zun) +DEFINE_LANGUAGE_CODE3 ("Zaza; Dimili; Dimli; Kirdki; Kirmanjki; Zazaki", zza, zza) diff --git a/REORG.TODO/locale/langinfo.h b/REORG.TODO/locale/langinfo.h new file mode 100644 index 0000000000..759adfbd1f --- /dev/null +++ b/REORG.TODO/locale/langinfo.h @@ -0,0 +1,599 @@ +/* Access to locale-dependent parameters. + Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _LANGINFO_H +#define _LANGINFO_H 1 + +/* Get the type definition. */ +#include <nl_types.h> + +#include <bits/locale.h> /* Define the __LC_* category names. */ + + +__BEGIN_DECLS + +/* Construct an `nl_item' value for `nl_langinfo' from a locale category + (LC_*) and an item index within the category. Some code may depend on + the item values within a category increasing monotonically with the + indices. */ +#define _NL_ITEM(category, index) (((category) << 16) | (index)) + +/* Extract the category and item index from a constructed `nl_item' value. */ +#define _NL_ITEM_CATEGORY(item) ((int) (item) >> 16) +#define _NL_ITEM_INDEX(item) ((int) (item) & 0xffff) + +/* Enumeration of locale items that can be queried with `nl_langinfo'. */ +enum +{ + /* LC_TIME category: date and time formatting. */ + + /* Abbreviated days of the week. */ + ABDAY_1 = _NL_ITEM (__LC_TIME, 0), /* Sun */ +#define ABDAY_1 ABDAY_1 + ABDAY_2, +#define ABDAY_2 ABDAY_2 + ABDAY_3, +#define ABDAY_3 ABDAY_3 + ABDAY_4, +#define ABDAY_4 ABDAY_4 + ABDAY_5, +#define ABDAY_5 ABDAY_5 + ABDAY_6, +#define ABDAY_6 ABDAY_6 + ABDAY_7, +#define ABDAY_7 ABDAY_7 + + /* Long-named days of the week. */ + DAY_1, /* Sunday */ +#define DAY_1 DAY_1 + DAY_2, /* Monday */ +#define DAY_2 DAY_2 + DAY_3, /* Tuesday */ +#define DAY_3 DAY_3 + DAY_4, /* Wednesday */ +#define DAY_4 DAY_4 + DAY_5, /* Thursday */ +#define DAY_5 DAY_5 + DAY_6, /* Friday */ +#define DAY_6 DAY_6 + DAY_7, /* Saturday */ +#define DAY_7 DAY_7 + + /* Abbreviated month names. */ + ABMON_1, /* Jan */ +#define ABMON_1 ABMON_1 + ABMON_2, +#define ABMON_2 ABMON_2 + ABMON_3, +#define ABMON_3 ABMON_3 + ABMON_4, +#define ABMON_4 ABMON_4 + ABMON_5, +#define ABMON_5 ABMON_5 + ABMON_6, +#define ABMON_6 ABMON_6 + ABMON_7, +#define ABMON_7 ABMON_7 + ABMON_8, +#define ABMON_8 ABMON_8 + ABMON_9, +#define ABMON_9 ABMON_9 + ABMON_10, +#define ABMON_10 ABMON_10 + ABMON_11, +#define ABMON_11 ABMON_11 + ABMON_12, +#define ABMON_12 ABMON_12 + + /* Long month names. */ + MON_1, /* January */ +#define MON_1 MON_1 + MON_2, +#define MON_2 MON_2 + MON_3, +#define MON_3 MON_3 + MON_4, +#define MON_4 MON_4 + MON_5, +#define MON_5 MON_5 + MON_6, +#define MON_6 MON_6 + MON_7, +#define MON_7 MON_7 + MON_8, +#define MON_8 MON_8 + MON_9, +#define MON_9 MON_9 + MON_10, +#define MON_10 MON_10 + MON_11, +#define MON_11 MON_11 + MON_12, +#define MON_12 MON_12 + + AM_STR, /* Ante meridiem string. */ +#define AM_STR AM_STR + PM_STR, /* Post meridiem string. */ +#define PM_STR PM_STR + + D_T_FMT, /* Date and time format for strftime. */ +#define D_T_FMT D_T_FMT + D_FMT, /* Date format for strftime. */ +#define D_FMT D_FMT + T_FMT, /* Time format for strftime. */ +#define T_FMT T_FMT + T_FMT_AMPM, /* 12-hour time format for strftime. */ +#define T_FMT_AMPM T_FMT_AMPM + + ERA, /* Alternate era. */ +#define ERA ERA + __ERA_YEAR, /* Year in alternate era format. */ +#ifdef __USE_GNU +# define ERA_YEAR __ERA_YEAR +#endif + ERA_D_FMT, /* Date in alternate era format. */ +#define ERA_D_FMT ERA_D_FMT + ALT_DIGITS, /* Alternate symbols for digits. */ +#define ALT_DIGITS ALT_DIGITS + ERA_D_T_FMT, /* Date and time in alternate era format. */ +#define ERA_D_T_FMT ERA_D_T_FMT + ERA_T_FMT, /* Time in alternate era format. */ +#define ERA_T_FMT ERA_T_FMT + + _NL_TIME_ERA_NUM_ENTRIES, /* Number entries in the era arrays. */ + _NL_TIME_ERA_ENTRIES, /* Structure with era entries in usable form.*/ + + _NL_WABDAY_1, /* Sun */ + _NL_WABDAY_2, + _NL_WABDAY_3, + _NL_WABDAY_4, + _NL_WABDAY_5, + _NL_WABDAY_6, + _NL_WABDAY_7, + + /* Long-named days of the week. */ + _NL_WDAY_1, /* Sunday */ + _NL_WDAY_2, /* Monday */ + _NL_WDAY_3, /* Tuesday */ + _NL_WDAY_4, /* Wednesday */ + _NL_WDAY_5, /* Thursday */ + _NL_WDAY_6, /* Friday */ + _NL_WDAY_7, /* Saturday */ + + /* Abbreviated month names. */ + _NL_WABMON_1, /* Jan */ + _NL_WABMON_2, + _NL_WABMON_3, + _NL_WABMON_4, + _NL_WABMON_5, + _NL_WABMON_6, + _NL_WABMON_7, + _NL_WABMON_8, + _NL_WABMON_9, + _NL_WABMON_10, + _NL_WABMON_11, + _NL_WABMON_12, + + /* Long month names. */ + _NL_WMON_1, /* January */ + _NL_WMON_2, + _NL_WMON_3, + _NL_WMON_4, + _NL_WMON_5, + _NL_WMON_6, + _NL_WMON_7, + _NL_WMON_8, + _NL_WMON_9, + _NL_WMON_10, + _NL_WMON_11, + _NL_WMON_12, + + _NL_WAM_STR, /* Ante meridiem string. */ + _NL_WPM_STR, /* Post meridiem string. */ + + _NL_WD_T_FMT, /* Date and time format for strftime. */ + _NL_WD_FMT, /* Date format for strftime. */ + _NL_WT_FMT, /* Time format for strftime. */ + _NL_WT_FMT_AMPM, /* 12-hour time format for strftime. */ + + _NL_WERA_YEAR, /* Year in alternate era format. */ + _NL_WERA_D_FMT, /* Date in alternate era format. */ + _NL_WALT_DIGITS, /* Alternate symbols for digits. */ + _NL_WERA_D_T_FMT, /* Date and time in alternate era format. */ + _NL_WERA_T_FMT, /* Time in alternate era format. */ + + _NL_TIME_WEEK_NDAYS, + _NL_TIME_WEEK_1STDAY, + _NL_TIME_WEEK_1STWEEK, + _NL_TIME_FIRST_WEEKDAY, + _NL_TIME_FIRST_WORKDAY, + _NL_TIME_CAL_DIRECTION, + _NL_TIME_TIMEZONE, + + _DATE_FMT, /* strftime format for date. */ +#define _DATE_FMT _DATE_FMT + _NL_W_DATE_FMT, + + _NL_TIME_CODESET, + + _NL_NUM_LC_TIME, /* Number of indices in LC_TIME category. */ + + /* LC_COLLATE category: text sorting. + This information is accessed by the strcoll and strxfrm functions. + These `nl_langinfo' names are used only internally. */ + _NL_COLLATE_NRULES = _NL_ITEM (__LC_COLLATE, 0), + _NL_COLLATE_RULESETS, + _NL_COLLATE_TABLEMB, + _NL_COLLATE_WEIGHTMB, + _NL_COLLATE_EXTRAMB, + _NL_COLLATE_INDIRECTMB, + _NL_COLLATE_GAP1, + _NL_COLLATE_GAP2, + _NL_COLLATE_GAP3, + _NL_COLLATE_TABLEWC, + _NL_COLLATE_WEIGHTWC, + _NL_COLLATE_EXTRAWC, + _NL_COLLATE_INDIRECTWC, + _NL_COLLATE_SYMB_HASH_SIZEMB, + _NL_COLLATE_SYMB_TABLEMB, + _NL_COLLATE_SYMB_EXTRAMB, + _NL_COLLATE_COLLSEQMB, + _NL_COLLATE_COLLSEQWC, + _NL_COLLATE_CODESET, + _NL_NUM_LC_COLLATE, + + /* LC_CTYPE category: character classification. + This information is accessed by the functions in <ctype.h>. + These `nl_langinfo' names are used only internally. */ + _NL_CTYPE_CLASS = _NL_ITEM (__LC_CTYPE, 0), + _NL_CTYPE_TOUPPER, + _NL_CTYPE_GAP1, + _NL_CTYPE_TOLOWER, + _NL_CTYPE_GAP2, + _NL_CTYPE_CLASS32, + _NL_CTYPE_GAP3, + _NL_CTYPE_GAP4, + _NL_CTYPE_GAP5, + _NL_CTYPE_GAP6, + _NL_CTYPE_CLASS_NAMES, + _NL_CTYPE_MAP_NAMES, + _NL_CTYPE_WIDTH, + _NL_CTYPE_MB_CUR_MAX, + _NL_CTYPE_CODESET_NAME, + CODESET = _NL_CTYPE_CODESET_NAME, +#define CODESET CODESET + _NL_CTYPE_TOUPPER32, + _NL_CTYPE_TOLOWER32, + _NL_CTYPE_CLASS_OFFSET, + _NL_CTYPE_MAP_OFFSET, + _NL_CTYPE_INDIGITS_MB_LEN, + _NL_CTYPE_INDIGITS0_MB, + _NL_CTYPE_INDIGITS1_MB, + _NL_CTYPE_INDIGITS2_MB, + _NL_CTYPE_INDIGITS3_MB, + _NL_CTYPE_INDIGITS4_MB, + _NL_CTYPE_INDIGITS5_MB, + _NL_CTYPE_INDIGITS6_MB, + _NL_CTYPE_INDIGITS7_MB, + _NL_CTYPE_INDIGITS8_MB, + _NL_CTYPE_INDIGITS9_MB, + _NL_CTYPE_INDIGITS_WC_LEN, + _NL_CTYPE_INDIGITS0_WC, + _NL_CTYPE_INDIGITS1_WC, + _NL_CTYPE_INDIGITS2_WC, + _NL_CTYPE_INDIGITS3_WC, + _NL_CTYPE_INDIGITS4_WC, + _NL_CTYPE_INDIGITS5_WC, + _NL_CTYPE_INDIGITS6_WC, + _NL_CTYPE_INDIGITS7_WC, + _NL_CTYPE_INDIGITS8_WC, + _NL_CTYPE_INDIGITS9_WC, + _NL_CTYPE_OUTDIGIT0_MB, + _NL_CTYPE_OUTDIGIT1_MB, + _NL_CTYPE_OUTDIGIT2_MB, + _NL_CTYPE_OUTDIGIT3_MB, + _NL_CTYPE_OUTDIGIT4_MB, + _NL_CTYPE_OUTDIGIT5_MB, + _NL_CTYPE_OUTDIGIT6_MB, + _NL_CTYPE_OUTDIGIT7_MB, + _NL_CTYPE_OUTDIGIT8_MB, + _NL_CTYPE_OUTDIGIT9_MB, + _NL_CTYPE_OUTDIGIT0_WC, + _NL_CTYPE_OUTDIGIT1_WC, + _NL_CTYPE_OUTDIGIT2_WC, + _NL_CTYPE_OUTDIGIT3_WC, + _NL_CTYPE_OUTDIGIT4_WC, + _NL_CTYPE_OUTDIGIT5_WC, + _NL_CTYPE_OUTDIGIT6_WC, + _NL_CTYPE_OUTDIGIT7_WC, + _NL_CTYPE_OUTDIGIT8_WC, + _NL_CTYPE_OUTDIGIT9_WC, + _NL_CTYPE_TRANSLIT_TAB_SIZE, + _NL_CTYPE_TRANSLIT_FROM_IDX, + _NL_CTYPE_TRANSLIT_FROM_TBL, + _NL_CTYPE_TRANSLIT_TO_IDX, + _NL_CTYPE_TRANSLIT_TO_TBL, + _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN, + _NL_CTYPE_TRANSLIT_DEFAULT_MISSING, + _NL_CTYPE_TRANSLIT_IGNORE_LEN, + _NL_CTYPE_TRANSLIT_IGNORE, + _NL_CTYPE_MAP_TO_NONASCII, + _NL_CTYPE_NONASCII_CASE, + _NL_CTYPE_EXTRA_MAP_1, + _NL_CTYPE_EXTRA_MAP_2, + _NL_CTYPE_EXTRA_MAP_3, + _NL_CTYPE_EXTRA_MAP_4, + _NL_CTYPE_EXTRA_MAP_5, + _NL_CTYPE_EXTRA_MAP_6, + _NL_CTYPE_EXTRA_MAP_7, + _NL_CTYPE_EXTRA_MAP_8, + _NL_CTYPE_EXTRA_MAP_9, + _NL_CTYPE_EXTRA_MAP_10, + _NL_CTYPE_EXTRA_MAP_11, + _NL_CTYPE_EXTRA_MAP_12, + _NL_CTYPE_EXTRA_MAP_13, + _NL_CTYPE_EXTRA_MAP_14, + _NL_NUM_LC_CTYPE, + + /* LC_MONETARY category: formatting of monetary quantities. + These items each correspond to a member of `struct lconv', + defined in <locale.h>. */ + __INT_CURR_SYMBOL = _NL_ITEM (__LC_MONETARY, 0), +#ifdef __USE_GNU +# define INT_CURR_SYMBOL __INT_CURR_SYMBOL +#endif + __CURRENCY_SYMBOL, +#ifdef __USE_GNU +# define CURRENCY_SYMBOL __CURRENCY_SYMBOL +#endif + __MON_DECIMAL_POINT, +#ifdef __USE_GNU +# define MON_DECIMAL_POINT __MON_DECIMAL_POINT +#endif + __MON_THOUSANDS_SEP, +#ifdef __USE_GNU +# define MON_THOUSANDS_SEP __MON_THOUSANDS_SEP +#endif + __MON_GROUPING, +#ifdef __USE_GNU +# define MON_GROUPING __MON_GROUPING +#endif + __POSITIVE_SIGN, +#ifdef __USE_GNU +# define POSITIVE_SIGN __POSITIVE_SIGN +#endif + __NEGATIVE_SIGN, +#ifdef __USE_GNU +# define NEGATIVE_SIGN __NEGATIVE_SIGN +#endif + __INT_FRAC_DIGITS, +#ifdef __USE_GNU +# define INT_FRAC_DIGITS __INT_FRAC_DIGITS +#endif + __FRAC_DIGITS, +#ifdef __USE_GNU +# define FRAC_DIGITS __FRAC_DIGITS +#endif + __P_CS_PRECEDES, +#ifdef __USE_GNU +# define P_CS_PRECEDES __P_CS_PRECEDES +#endif + __P_SEP_BY_SPACE, +#ifdef __USE_GNU +# define P_SEP_BY_SPACE __P_SEP_BY_SPACE +#endif + __N_CS_PRECEDES, +#ifdef __USE_GNU +# define N_CS_PRECEDES __N_CS_PRECEDES +#endif + __N_SEP_BY_SPACE, +#ifdef __USE_GNU +# define N_SEP_BY_SPACE __N_SEP_BY_SPACE +#endif + __P_SIGN_POSN, +#ifdef __USE_GNU +# define P_SIGN_POSN __P_SIGN_POSN +#endif + __N_SIGN_POSN, +#ifdef __USE_GNU +# define N_SIGN_POSN __N_SIGN_POSN +#endif + _NL_MONETARY_CRNCYSTR, +#define CRNCYSTR _NL_MONETARY_CRNCYSTR + __INT_P_CS_PRECEDES, +#ifdef __USE_GNU +# define INT_P_CS_PRECEDES __INT_P_CS_PRECEDES +#endif + __INT_P_SEP_BY_SPACE, +#ifdef __USE_GNU +# define INT_P_SEP_BY_SPACE __INT_P_SEP_BY_SPACE +#endif + __INT_N_CS_PRECEDES, +#ifdef __USE_GNU +# define INT_N_CS_PRECEDES __INT_N_CS_PRECEDES +#endif + __INT_N_SEP_BY_SPACE, +#ifdef __USE_GNU +# define INT_N_SEP_BY_SPACE __INT_N_SEP_BY_SPACE +#endif + __INT_P_SIGN_POSN, +#ifdef __USE_GNU +# define INT_P_SIGN_POSN __INT_P_SIGN_POSN +#endif + __INT_N_SIGN_POSN, +#ifdef __USE_GNU +# define INT_N_SIGN_POSN __INT_N_SIGN_POSN +#endif + _NL_MONETARY_DUO_INT_CURR_SYMBOL, + _NL_MONETARY_DUO_CURRENCY_SYMBOL, + _NL_MONETARY_DUO_INT_FRAC_DIGITS, + _NL_MONETARY_DUO_FRAC_DIGITS, + _NL_MONETARY_DUO_P_CS_PRECEDES, + _NL_MONETARY_DUO_P_SEP_BY_SPACE, + _NL_MONETARY_DUO_N_CS_PRECEDES, + _NL_MONETARY_DUO_N_SEP_BY_SPACE, + _NL_MONETARY_DUO_INT_P_CS_PRECEDES, + _NL_MONETARY_DUO_INT_P_SEP_BY_SPACE, + _NL_MONETARY_DUO_INT_N_CS_PRECEDES, + _NL_MONETARY_DUO_INT_N_SEP_BY_SPACE, + _NL_MONETARY_DUO_P_SIGN_POSN, + _NL_MONETARY_DUO_N_SIGN_POSN, + _NL_MONETARY_DUO_INT_P_SIGN_POSN, + _NL_MONETARY_DUO_INT_N_SIGN_POSN, + _NL_MONETARY_UNO_VALID_FROM, + _NL_MONETARY_UNO_VALID_TO, + _NL_MONETARY_DUO_VALID_FROM, + _NL_MONETARY_DUO_VALID_TO, + _NL_MONETARY_CONVERSION_RATE, + _NL_MONETARY_DECIMAL_POINT_WC, + _NL_MONETARY_THOUSANDS_SEP_WC, + _NL_MONETARY_CODESET, + _NL_NUM_LC_MONETARY, + + /* LC_NUMERIC category: formatting of numbers. + These also correspond to members of `struct lconv'; see <locale.h>. */ + __DECIMAL_POINT = _NL_ITEM (__LC_NUMERIC, 0), +#ifdef __USE_GNU +# define DECIMAL_POINT __DECIMAL_POINT +#endif + RADIXCHAR = __DECIMAL_POINT, +#define RADIXCHAR RADIXCHAR + __THOUSANDS_SEP, +#ifdef __USE_GNU +# define THOUSANDS_SEP __THOUSANDS_SEP +#endif + THOUSEP = __THOUSANDS_SEP, +#define THOUSEP THOUSEP + __GROUPING, +#ifdef __USE_GNU +# define GROUPING __GROUPING +#endif + _NL_NUMERIC_DECIMAL_POINT_WC, + _NL_NUMERIC_THOUSANDS_SEP_WC, + _NL_NUMERIC_CODESET, + _NL_NUM_LC_NUMERIC, + + __YESEXPR = _NL_ITEM (__LC_MESSAGES, 0), /* Regex matching ``yes'' input. */ +#define YESEXPR __YESEXPR + __NOEXPR, /* Regex matching ``no'' input. */ +#define NOEXPR __NOEXPR + __YESSTR, /* Output string for ``yes''. */ +#if defined __USE_GNU || (defined __USE_XOPEN && !defined __USE_XOPEN2K) +# define YESSTR __YESSTR +#endif + __NOSTR, /* Output string for ``no''. */ +#if defined __USE_GNU || (defined __USE_XOPEN && !defined __USE_XOPEN2K) +# define NOSTR __NOSTR +#endif + _NL_MESSAGES_CODESET, + _NL_NUM_LC_MESSAGES, + + _NL_PAPER_HEIGHT = _NL_ITEM (__LC_PAPER, 0), + _NL_PAPER_WIDTH, + _NL_PAPER_CODESET, + _NL_NUM_LC_PAPER, + + _NL_NAME_NAME_FMT = _NL_ITEM (__LC_NAME, 0), + _NL_NAME_NAME_GEN, + _NL_NAME_NAME_MR, + _NL_NAME_NAME_MRS, + _NL_NAME_NAME_MISS, + _NL_NAME_NAME_MS, + _NL_NAME_CODESET, + _NL_NUM_LC_NAME, + + _NL_ADDRESS_POSTAL_FMT = _NL_ITEM (__LC_ADDRESS, 0), + _NL_ADDRESS_COUNTRY_NAME, + _NL_ADDRESS_COUNTRY_POST, + _NL_ADDRESS_COUNTRY_AB2, + _NL_ADDRESS_COUNTRY_AB3, + _NL_ADDRESS_COUNTRY_CAR, + _NL_ADDRESS_COUNTRY_NUM, + _NL_ADDRESS_COUNTRY_ISBN, + _NL_ADDRESS_LANG_NAME, + _NL_ADDRESS_LANG_AB, + _NL_ADDRESS_LANG_TERM, + _NL_ADDRESS_LANG_LIB, + _NL_ADDRESS_CODESET, + _NL_NUM_LC_ADDRESS, + + _NL_TELEPHONE_TEL_INT_FMT = _NL_ITEM (__LC_TELEPHONE, 0), + _NL_TELEPHONE_TEL_DOM_FMT, + _NL_TELEPHONE_INT_SELECT, + _NL_TELEPHONE_INT_PREFIX, + _NL_TELEPHONE_CODESET, + _NL_NUM_LC_TELEPHONE, + + _NL_MEASUREMENT_MEASUREMENT = _NL_ITEM (__LC_MEASUREMENT, 0), + _NL_MEASUREMENT_CODESET, + _NL_NUM_LC_MEASUREMENT, + + _NL_IDENTIFICATION_TITLE = _NL_ITEM (__LC_IDENTIFICATION, 0), + _NL_IDENTIFICATION_SOURCE, + _NL_IDENTIFICATION_ADDRESS, + _NL_IDENTIFICATION_CONTACT, + _NL_IDENTIFICATION_EMAIL, + _NL_IDENTIFICATION_TEL, + _NL_IDENTIFICATION_FAX, + _NL_IDENTIFICATION_LANGUAGE, + _NL_IDENTIFICATION_TERRITORY, + _NL_IDENTIFICATION_AUDIENCE, + _NL_IDENTIFICATION_APPLICATION, + _NL_IDENTIFICATION_ABBREVIATION, + _NL_IDENTIFICATION_REVISION, + _NL_IDENTIFICATION_DATE, + _NL_IDENTIFICATION_CATEGORY, + _NL_IDENTIFICATION_CODESET, + _NL_NUM_LC_IDENTIFICATION, + + /* This marks the highest value used. */ + _NL_NUM +}; + +/* This macro produces an item you can pass to `nl_langinfo' or + `nl_langinfo_l' to get the name of the locale in use for CATEGORY. */ +#define _NL_LOCALE_NAME(category) _NL_ITEM ((category), \ + _NL_ITEM_INDEX (-1)) +#ifdef __USE_GNU +# define NL_LOCALE_NAME(category) _NL_LOCALE_NAME (category) +#endif + + +/* Return the current locale's value for ITEM. + If ITEM is invalid, an empty string is returned. + + The string returned will not change until `setlocale' is called; + it is usually in read-only memory and cannot be modified. */ + +extern char *nl_langinfo (nl_item __item) __THROW; + + +#ifdef __USE_XOPEN2K8 +/* This interface is for the extended locale model. See <locale.h> for + more information. */ + +/* Get locale datatype definition. */ +# include <xlocale.h> + +/* Just like nl_langinfo but get the information from the locale object L. */ +extern char *nl_langinfo_l (nl_item __item, __locale_t __l); +#endif + +__END_DECLS + +#endif /* langinfo.h */ diff --git a/REORG.TODO/locale/lc-address.c b/REORG.TODO/locale/lc-address.c new file mode 100644 index 0000000000..ef50efbae9 --- /dev/null +++ b/REORG.TODO/locale/lc-address.c @@ -0,0 +1,21 @@ +/* Define current locale data for LC_ADDRESS category. + Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include "localeinfo.h" + +_NL_CURRENT_DEFINE (LC_ADDRESS); diff --git a/REORG.TODO/locale/lc-collate.c b/REORG.TODO/locale/lc-collate.c new file mode 100644 index 0000000000..387a365c2c --- /dev/null +++ b/REORG.TODO/locale/lc-collate.c @@ -0,0 +1,23 @@ +/* Define current locale data for LC_COLLATE category. + Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include "localeinfo.h" +#include <endian.h> + + +_NL_CURRENT_DEFINE (LC_COLLATE); diff --git a/REORG.TODO/locale/lc-ctype.c b/REORG.TODO/locale/lc-ctype.c new file mode 100644 index 0000000000..752577d92e --- /dev/null +++ b/REORG.TODO/locale/lc-ctype.c @@ -0,0 +1,110 @@ +/* Define current locale data for LC_CTYPE category. + Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include "localeinfo.h" +#include <ctype.h> +#include <endian.h> +#include <stdint.h> + +_NL_CURRENT_DEFINE (LC_CTYPE); + +/* We are called after loading LC_CTYPE data to load it into + the variables used by the ctype.h macros. */ + + + +void +_nl_postload_ctype (void) +{ +#define current(type,x,offset) \ + ((const type *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_##x) + offset) + + const union locale_data_value *const ctypes + = _nl_global_locale.__locales[LC_CTYPE]->values; + +/* These thread-local variables are defined in ctype-info.c. + The declarations here must match those in localeinfo.h. + + These point into arrays of 384, so they can be indexed by any `unsigned + char' value [0,255]; by EOF (-1); or by any `signed char' value + [-128,-1). ISO C requires that the ctype functions work for `unsigned + char' values and for EOF; we also support negative `signed char' values + for broken old programs. The case conversion arrays are of `int's + rather than `unsigned char's because tolower (EOF) must be EOF, which + doesn't fit into an `unsigned char'. But today more important is that + the arrays are also used for multi-byte character sets. + + First we update the special members of _nl_global_locale as newlocale + would. This is necessary for uselocale (LC_GLOBAL_LOCALE) to find these + values properly. */ + + _nl_global_locale.__ctype_b = (const unsigned short int *) + ctypes[_NL_ITEM_INDEX (_NL_CTYPE_CLASS)].string + 128; + _nl_global_locale.__ctype_tolower = (const int *) + ctypes[_NL_ITEM_INDEX (_NL_CTYPE_TOLOWER)].string + 128; + _nl_global_locale.__ctype_toupper = (const int *) + ctypes[_NL_ITEM_INDEX (_NL_CTYPE_TOUPPER)].string + 128; + + /* Next we must set the thread-local caches if and only if this thread is + in fact using the global locale. */ + if (_NL_CURRENT_LOCALE == &_nl_global_locale) + { + __libc_tsd_set (const uint16_t *, CTYPE_B, + (void *) _nl_global_locale.__ctype_b); + __libc_tsd_set (const int32_t *, CTYPE_TOUPPER, + (void *) _nl_global_locale.__ctype_toupper); + __libc_tsd_set (const int32_t *, CTYPE_TOLOWER, + (void *) _nl_global_locale.__ctype_tolower); + } + +#include <shlib-compat.h> +#if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3) + /* We must use the exported names to access these so we are sure to + be accessing the main executable's copy if it has COPY relocs. */ + + extern const unsigned short int *__ctype_b; /* Characteristics. */ + extern const __int32_t *__ctype_tolower; /* Case conversions. */ + extern const __int32_t *__ctype_toupper; /* Case conversions. */ + + extern const uint32_t *__ctype32_b; + extern const uint32_t *__ctype32_toupper; + extern const uint32_t *__ctype32_tolower; + + /* We need the .symver declarations these macros generate so that + our references are explicitly bound to the versioned symbol names + rather than the unadorned names that are not exported. When the + linker sees these bound to local symbols (as the unexported names are) + then it doesn't generate a proper relocation to the global symbols. + We need those relocations so that a versioned definition with a COPY + reloc in an executable will override the libc.so definition. */ + +compat_symbol (libc, __ctype_b, __ctype_b, GLIBC_2_0); +compat_symbol (libc, __ctype_tolower, __ctype_tolower, GLIBC_2_0); +compat_symbol (libc, __ctype_toupper, __ctype_toupper, GLIBC_2_0); +compat_symbol (libc, __ctype32_b, __ctype32_b, GLIBC_2_0); +compat_symbol (libc, __ctype32_tolower, __ctype32_tolower, GLIBC_2_2); +compat_symbol (libc, __ctype32_toupper, __ctype32_toupper, GLIBC_2_2); + + __ctype_b = current (uint16_t, CLASS, 128); + __ctype_toupper = current (int32_t, TOUPPER, 128); + __ctype_tolower = current (int32_t, TOLOWER, 128); + __ctype32_b = current (uint32_t, CLASS32, 0); + __ctype32_toupper = current (uint32_t, TOUPPER32, 0); + __ctype32_tolower = current (uint32_t, TOLOWER32, 0); +#endif +} diff --git a/REORG.TODO/locale/lc-identification.c b/REORG.TODO/locale/lc-identification.c new file mode 100644 index 0000000000..abaf50adf2 --- /dev/null +++ b/REORG.TODO/locale/lc-identification.c @@ -0,0 +1,21 @@ +/* Define current locale data for LC_IDENTIFICATION category. + Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include "localeinfo.h" + +_NL_CURRENT_DEFINE (LC_IDENTIFICATION); diff --git a/REORG.TODO/locale/lc-measurement.c b/REORG.TODO/locale/lc-measurement.c new file mode 100644 index 0000000000..8eb1e0cde1 --- /dev/null +++ b/REORG.TODO/locale/lc-measurement.c @@ -0,0 +1,21 @@ +/* Define current locale data for LC_MEASUREMENT category. + Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include "localeinfo.h" + +_NL_CURRENT_DEFINE (LC_MEASUREMENT); diff --git a/REORG.TODO/locale/lc-messages.c b/REORG.TODO/locale/lc-messages.c new file mode 100644 index 0000000000..df4e44dcae --- /dev/null +++ b/REORG.TODO/locale/lc-messages.c @@ -0,0 +1,21 @@ +/* Define current locale data for LC_MESSAGES category. + Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include "localeinfo.h" + +_NL_CURRENT_DEFINE (LC_MESSAGES); diff --git a/REORG.TODO/locale/lc-monetary.c b/REORG.TODO/locale/lc-monetary.c new file mode 100644 index 0000000000..8b49eeb831 --- /dev/null +++ b/REORG.TODO/locale/lc-monetary.c @@ -0,0 +1,21 @@ +/* Define current locale data for LC_MONETARY category. + Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include "localeinfo.h" + +_NL_CURRENT_DEFINE (LC_MONETARY); diff --git a/REORG.TODO/locale/lc-name.c b/REORG.TODO/locale/lc-name.c new file mode 100644 index 0000000000..46df7280ba --- /dev/null +++ b/REORG.TODO/locale/lc-name.c @@ -0,0 +1,21 @@ +/* Define current locale data for LC_NAME category. + Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include "localeinfo.h" + +_NL_CURRENT_DEFINE (LC_NAME); diff --git a/REORG.TODO/locale/lc-numeric.c b/REORG.TODO/locale/lc-numeric.c new file mode 100644 index 0000000000..58d7fb4cc3 --- /dev/null +++ b/REORG.TODO/locale/lc-numeric.c @@ -0,0 +1,21 @@ +/* Define current locale data for LC_NUMERIC category. + Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include "localeinfo.h" + +_NL_CURRENT_DEFINE (LC_NUMERIC); diff --git a/REORG.TODO/locale/lc-paper.c b/REORG.TODO/locale/lc-paper.c new file mode 100644 index 0000000000..3b8677e540 --- /dev/null +++ b/REORG.TODO/locale/lc-paper.c @@ -0,0 +1,21 @@ +/* Define current locale data for LC_PAPER category. + Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include "localeinfo.h" + +_NL_CURRENT_DEFINE (LC_PAPER); diff --git a/REORG.TODO/locale/lc-telephone.c b/REORG.TODO/locale/lc-telephone.c new file mode 100644 index 0000000000..7514599ba4 --- /dev/null +++ b/REORG.TODO/locale/lc-telephone.c @@ -0,0 +1,21 @@ +/* Define current locale data for LC_TELEPHONE category. + Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include "localeinfo.h" + +_NL_CURRENT_DEFINE (LC_TELEPHONE); diff --git a/REORG.TODO/locale/lc-time.c b/REORG.TODO/locale/lc-time.c new file mode 100644 index 0000000000..590dd006b6 --- /dev/null +++ b/REORG.TODO/locale/lc-time.c @@ -0,0 +1,21 @@ +/* Define current locale data for LC_TIME category. + Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include "localeinfo.h" + +_NL_CURRENT_DEFINE (LC_TIME); diff --git a/REORG.TODO/locale/loadarchive.c b/REORG.TODO/locale/loadarchive.c new file mode 100644 index 0000000000..e6e1a05d2e --- /dev/null +++ b/REORG.TODO/locale/loadarchive.c @@ -0,0 +1,547 @@ +/* Code to load locale data from the locale archive file. + Copyright (C) 2002-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <locale.h> +#include <stddef.h> +#include <stdlib.h> +#include <stdbool.h> +#include <errno.h> +#include <assert.h> +#include <string.h> +#include <fcntl.h> +#include <unistd.h> +#include <stdint.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <sys/param.h> + +#include "localeinfo.h" +#include "locarchive.h" +#include <not-cancel.h> + +/* Define the hash function. We define the function as static inline. */ +#define compute_hashval static inline compute_hashval +#define hashval_t uint32_t +#include "hashval.h" +#undef compute_hashval + + +/* Name of the locale archive file. */ +static const char archfname[] = COMPLOCALEDIR "/locale-archive"; + +/* Size of initial mapping window, optimal if large enough to + cover the header plus the initial locale. */ +#define ARCHIVE_MAPPING_WINDOW (2 * 1024 * 1024) + +#ifndef MAP_COPY +/* This is not quite as good as MAP_COPY since unexamined pages + can change out from under us and give us inconsistent data. + But we rely on the user not to diddle the system's live archive. + Even though we only ever use PROT_READ, using MAP_SHARED would + not give the system sufficient freedom to e.g. let the on disk + file go away because it doesn't know we won't call mprotect later. */ +# define MAP_COPY MAP_PRIVATE +#endif +#ifndef MAP_FILE + /* Some systems do not have this flag; it is superfluous. */ +# define MAP_FILE 0 +#endif + +/* Record of contiguous pages already mapped from the locale archive. */ +struct archmapped +{ + void *ptr; + uint32_t from; + uint32_t len; + struct archmapped *next; +}; +static struct archmapped *archmapped; + +/* This describes the mapping at the beginning of the file that contains + the header data. There could be data in the following partial page, + so this is searched like any other. Once the archive has been used, + ARCHMAPPED points to this; if mapping the archive header failed, + then headmap.ptr is null. */ +static struct archmapped headmap; +static struct stat64 archive_stat; /* stat of archive when header mapped. */ + +/* Record of locales that we have already loaded from the archive. */ +struct locale_in_archive +{ + struct locale_in_archive *next; + char *name; + struct __locale_data *data[__LC_LAST]; +}; +static struct locale_in_archive *archloaded; + + +/* Local structure and subroutine of _nl_load_archive, see below. */ +struct range +{ + uint32_t from; + uint32_t len; + int category; + void *result; +}; + +static int +rangecmp (const void *p1, const void *p2) +{ + return ((struct range *) p1)->from - ((struct range *) p2)->from; +} + + +/* Calculate the amount of space needed for all the tables described + by the given header. Note we do not include the empty table space + that has been preallocated in the file, so our mapping may not be + large enough if localedef adds data to the file in place. However, + doing that would permute the header fields while we are accessing + them and thus not be safe anyway, so we don't allow for that. */ +static inline off_t +calculate_head_size (const struct locarhead *h) +{ + off_t namehash_end = (h->namehash_offset + + h->namehash_size * sizeof (struct namehashent)); + off_t string_end = h->string_offset + h->string_used; + off_t locrectab_end = (h->locrectab_offset + + h->locrectab_used * sizeof (struct locrecent)); + return MAX (namehash_end, MAX (string_end, locrectab_end)); +} + + +/* Find the locale *NAMEP in the locale archive, and return the + internalized data structure for its CATEGORY data. If this locale has + already been loaded from the archive, just returns the existing data + structure. If successful, sets *NAMEP to point directly into the mapped + archive string table; that way, the next call can short-circuit strcmp. */ +struct __locale_data * +internal_function +_nl_load_locale_from_archive (int category, const char **namep) +{ + const char *name = *namep; + struct + { + void *addr; + size_t len; + } results[__LC_LAST]; + struct locale_in_archive *lia; + struct locarhead *head; + struct namehashent *namehashtab; + struct locrecent *locrec; + struct archmapped *mapped; + struct archmapped *last; + unsigned long int hval; + size_t idx; + size_t incr; + struct range ranges[__LC_LAST - 1]; + int nranges; + int cnt; + size_t ps = __sysconf (_SC_PAGE_SIZE); + int fd = -1; + + /* Check if we have already loaded this locale from the archive. + If we previously loaded the locale but found bogons in the data, + then we will have stored a null pointer to return here. */ + for (lia = archloaded; lia != NULL; lia = lia->next) + if (name == lia->name || !strcmp (name, lia->name)) + { + *namep = lia->name; + return lia->data[category]; + } + + { + /* If the name contains a codeset, then we normalize the name before + doing the lookup. */ + const char *p = strchr (name, '.'); + if (p != NULL && p[1] != '@' && p[1] != '\0') + { + const char *rest = __strchrnul (++p, '@'); + const char *normalized_codeset = _nl_normalize_codeset (p, rest - p); + if (normalized_codeset == NULL) /* malloc failure */ + return NULL; + if (strncmp (normalized_codeset, p, rest - p) != 0 + || normalized_codeset[rest - p] != '\0') + { + /* There is a normalized codeset name that is different from + what was specified; reconstruct a new locale name using it. */ + size_t normlen = strlen (normalized_codeset); + size_t restlen = strlen (rest) + 1; + char *newname = alloca (p - name + normlen + restlen); + memcpy (__mempcpy (__mempcpy (newname, name, p - name), + normalized_codeset, normlen), + rest, restlen); + name = newname; + } + free ((char *) normalized_codeset); + } + } + + /* Make sure the archive is loaded. */ + if (archmapped == NULL) + { + void *result; + size_t headsize, mapsize; + + /* We do this early as a sign that we have tried to open the archive. + If headmap.ptr remains null, that's an indication that we tried + and failed, so we won't try again. */ + archmapped = &headmap; + + /* The archive has never been opened. */ + fd = open_not_cancel_2 (archfname, O_RDONLY|O_LARGEFILE|O_CLOEXEC); + if (fd < 0) + /* Cannot open the archive, for whatever reason. */ + return NULL; + + if (__fxstat64 (_STAT_VER, fd, &archive_stat) == -1) + { + /* stat failed, very strange. */ + close_and_out: + if (fd >= 0) + close_not_cancel_no_status (fd); + return NULL; + } + + + /* Map an initial window probably large enough to cover the header + and the first locale's data. With a large address space, we can + just map the whole file and be sure everything is covered. */ + + mapsize = (sizeof (void *) > 4 ? archive_stat.st_size + : MIN (archive_stat.st_size, ARCHIVE_MAPPING_WINDOW)); + + result = __mmap64 (NULL, mapsize, PROT_READ, MAP_FILE|MAP_COPY, fd, 0); + if (result == MAP_FAILED) + goto close_and_out; + + /* Check whether the file is large enough for the sizes given in + the header. Theoretically an archive could be so large that + just the header fails to fit in our initial mapping window. */ + headsize = calculate_head_size ((const struct locarhead *) result); + if (headsize > mapsize) + { + (void) __munmap (result, mapsize); + if (sizeof (void *) > 4 || headsize > archive_stat.st_size) + /* The file is not big enough for the header. Bogus. */ + goto close_and_out; + + /* Freakishly long header. */ + /* XXX could use mremap when available */ + mapsize = (headsize + ps - 1) & ~(ps - 1); + result = __mmap64 (NULL, mapsize, PROT_READ, MAP_FILE|MAP_COPY, + fd, 0); + if (result == MAP_FAILED) + goto close_and_out; + } + + if (sizeof (void *) > 4 || mapsize >= archive_stat.st_size) + { + /* We've mapped the whole file already, so we can be + sure we won't need this file descriptor later. */ + close_not_cancel_no_status (fd); + fd = -1; + } + + headmap.ptr = result; + /* headmap.from already initialized to zero. */ + headmap.len = mapsize; + } + + /* If there is no archive or it cannot be loaded for some reason fail. */ + if (__glibc_unlikely (headmap.ptr == NULL)) + goto close_and_out; + + /* We have the archive available. To find the name we first have to + determine its hash value. */ + hval = compute_hashval (name, strlen (name)); + + head = headmap.ptr; + namehashtab = (struct namehashent *) ((char *) head + + head->namehash_offset); + + /* Avoid division by 0 if the file is corrupted. */ + if (__glibc_unlikely (head->namehash_size == 0)) + goto close_and_out; + + idx = hval % head->namehash_size; + incr = 1 + hval % (head->namehash_size - 2); + + /* If the name_offset field is zero this means this is a + deleted entry and therefore no entry can be found. */ + while (1) + { + if (namehashtab[idx].name_offset == 0) + /* Not found. */ + goto close_and_out; + + if (namehashtab[idx].hashval == hval + && strcmp (name, headmap.ptr + namehashtab[idx].name_offset) == 0) + /* Found the entry. */ + break; + + idx += incr; + if (idx >= head->namehash_size) + idx -= head->namehash_size; + } + + /* We found an entry. It might be a placeholder for a removed one. */ + if (namehashtab[idx].locrec_offset == 0) + goto close_and_out; + + locrec = (struct locrecent *) (headmap.ptr + namehashtab[idx].locrec_offset); + + if (sizeof (void *) > 4 /* || headmap.len == archive_stat.st_size */) + { + /* We already have the whole locale archive mapped in. */ + assert (headmap.len == archive_stat.st_size); + for (cnt = 0; cnt < __LC_LAST; ++cnt) + if (cnt != LC_ALL) + { + if (locrec->record[cnt].offset + locrec->record[cnt].len + > headmap.len) + /* The archive locrectab contains bogus offsets. */ + goto close_and_out; + results[cnt].addr = headmap.ptr + locrec->record[cnt].offset; + results[cnt].len = locrec->record[cnt].len; + } + } + else + { + /* Get the offsets of the data files and sort them. */ + for (cnt = nranges = 0; cnt < __LC_LAST; ++cnt) + if (cnt != LC_ALL) + { + ranges[nranges].from = locrec->record[cnt].offset; + ranges[nranges].len = locrec->record[cnt].len; + ranges[nranges].category = cnt; + ranges[nranges].result = NULL; + + ++nranges; + } + + qsort (ranges, nranges, sizeof (ranges[0]), rangecmp); + + /* The information about mmap'd blocks is kept in a list. + Skip over the blocks which are before the data we need. */ + last = mapped = archmapped; + for (cnt = 0; cnt < nranges; ++cnt) + { + int upper; + size_t from; + size_t to; + void *addr; + struct archmapped *newp; + + /* Determine whether the appropriate page is already mapped. */ + while (mapped != NULL + && (mapped->from + mapped->len + <= ranges[cnt].from + ranges[cnt].len)) + { + last = mapped; + mapped = mapped->next; + } + + /* Do we have a match? */ + if (mapped != NULL + && mapped->from <= ranges[cnt].from + && (ranges[cnt].from + ranges[cnt].len + <= mapped->from + mapped->len)) + { + /* Yep, already loaded. */ + results[ranges[cnt].category].addr = ((char *) mapped->ptr + + ranges[cnt].from + - mapped->from); + results[ranges[cnt].category].len = ranges[cnt].len; + continue; + } + + /* Map the range with the locale data from the file. We will + try to cover as much of the locale as possible. I.e., if the + next category (next as in "next offset") is on the current or + immediately following page we use it as well. */ + assert (powerof2 (ps)); + from = ranges[cnt].from & ~(ps - 1); + upper = cnt; + do + { + to = ranges[upper].from + ranges[upper].len; + if (to > (size_t) archive_stat.st_size) + /* The archive locrectab contains bogus offsets. */ + goto close_and_out; + to = (to + ps - 1) & ~(ps - 1); + + /* If a range is already mmaped in, stop. */ + if (mapped != NULL && ranges[upper].from >= mapped->from) + break; + + ++upper; + } + /* Loop while still in contiguous pages. */ + while (upper < nranges && ranges[upper].from < to + ps); + + /* Open the file if it hasn't happened yet. */ + if (fd == -1) + { + struct stat64 st; + fd = open_not_cancel_2 (archfname, + O_RDONLY|O_LARGEFILE|O_CLOEXEC); + if (fd == -1) + /* Cannot open the archive, for whatever reason. */ + return NULL; + /* Now verify we think this is really the same archive file + we opened before. If it has been changed we cannot trust + the header we read previously. */ + if (__fxstat64 (_STAT_VER, fd, &st) < 0 + || st.st_size != archive_stat.st_size + || st.st_mtime != archive_stat.st_mtime + || st.st_dev != archive_stat.st_dev + || st.st_ino != archive_stat.st_ino) + goto close_and_out; + } + + /* Map the range from the archive. */ + addr = __mmap64 (NULL, to - from, PROT_READ, MAP_FILE|MAP_COPY, + fd, from); + if (addr == MAP_FAILED) + goto close_and_out; + + /* Allocate a record for this mapping. */ + newp = (struct archmapped *) malloc (sizeof (struct archmapped)); + if (newp == NULL) + { + (void) __munmap (addr, to - from); + goto close_and_out; + } + + /* And queue it. */ + newp->ptr = addr; + newp->from = from; + newp->len = to - from; + assert (last->next == mapped); + newp->next = mapped; + last->next = newp; + last = newp; + + /* Determine the load addresses for the category data. */ + do + { + assert (ranges[cnt].from >= from); + results[ranges[cnt].category].addr = ((char *) addr + + ranges[cnt].from - from); + results[ranges[cnt].category].len = ranges[cnt].len; + } + while (++cnt < upper); + --cnt; /* The 'for' will increase 'cnt' again. */ + } + } + + /* We don't need the file descriptor any longer. */ + if (fd >= 0) + close_not_cancel_no_status (fd); + fd = -1; + + /* We succeeded in mapping all the necessary regions of the archive. + Now we need the expected data structures to point into the data. */ + + lia = malloc (sizeof *lia); + if (__glibc_unlikely (lia == NULL)) + return NULL; + + lia->name = __strdup (*namep); + if (__glibc_unlikely (lia->name == NULL)) + { + free (lia); + return NULL; + } + + lia->next = archloaded; + archloaded = lia; + + for (cnt = 0; cnt < __LC_LAST; ++cnt) + if (cnt != LC_ALL) + { + lia->data[cnt] = _nl_intern_locale_data (cnt, + results[cnt].addr, + results[cnt].len); + if (__glibc_likely (lia->data[cnt] != NULL)) + { + /* _nl_intern_locale_data leaves us these fields to initialize. */ + lia->data[cnt]->alloc = ld_archive; + lia->data[cnt]->name = lia->name; + + /* We do this instead of bumping the count each time we return + this data because the mappings stay around forever anyway + and we might as well hold on to a little more memory and not + have to rebuild it on the next lookup of the same thing. + If we were to maintain the usage_count normally and let the + structures be freed, we would have to remove the elements + from archloaded too. */ + lia->data[cnt]->usage_count = UNDELETABLE; + } + } + + *namep = lia->name; + return lia->data[category]; +} + +void __libc_freeres_fn_section +_nl_archive_subfreeres (void) +{ + struct locale_in_archive *lia; + struct archmapped *am; + + /* Toss out our cached locales. */ + lia = archloaded; + while (lia != NULL) + { + int category; + struct locale_in_archive *dead = lia; + lia = lia->next; + + free (dead->name); + for (category = 0; category < __LC_LAST; ++category) + if (category != LC_ALL && dead->data[category] != NULL) + { + /* _nl_unload_locale just does this free for the archive case. */ + if (dead->data[category]->private.cleanup) + (*dead->data[category]->private.cleanup) (dead->data[category]); + + free (dead->data[category]); + } + free (dead); + } + archloaded = NULL; + + if (archmapped != NULL) + { + /* Now toss all the mapping windows, which we know nothing is using any + more because we just tossed all the locales that point into them. */ + + assert (archmapped == &headmap); + archmapped = NULL; + (void) __munmap (headmap.ptr, headmap.len); + am = headmap.next; + while (am != NULL) + { + struct archmapped *dead = am; + am = am->next; + (void) __munmap (dead->ptr, dead->len); + free (dead); + } + } +} diff --git a/REORG.TODO/locale/loadlocale.c b/REORG.TODO/locale/loadlocale.c new file mode 100644 index 0000000000..9bca30463e --- /dev/null +++ b/REORG.TODO/locale/loadlocale.c @@ -0,0 +1,309 @@ +/* Functions to read locale data files. + Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <assert.h> +#include <errno.h> +#include <fcntl.h> +#include <locale.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#ifdef _POSIX_MAPPED_FILES +# include <sys/mman.h> +#endif +#include <sys/stat.h> + +#include <not-cancel.h> +#include "localeinfo.h" + + +static const size_t _nl_category_num_items[] = +{ +#define DEFINE_CATEGORY(category, category_name, items, a) \ + [category] = _NL_ITEM_INDEX (_NL_NUM_##category), +#include "categories.def" +#undef DEFINE_CATEGORY +}; + + +#define NO_PAREN(arg, rest...) arg, ##rest + +#define DEFINE_CATEGORY(category, category_name, items, a) \ +static const enum value_type _nl_value_type_##category[] = { NO_PAREN items }; +#define DEFINE_ELEMENT(element, element_name, optstd, type, rest...) \ + [_NL_ITEM_INDEX (element)] = type, +#include "categories.def" +#undef DEFINE_CATEGORY + +static const enum value_type *const _nl_value_types[] = +{ +#define DEFINE_CATEGORY(category, category_name, items, a) \ + [category] = _nl_value_type_##category, +#include "categories.def" +#undef DEFINE_CATEGORY +}; + + +struct __locale_data * +internal_function +_nl_intern_locale_data (int category, const void *data, size_t datasize) +{ + const struct + { + unsigned int magic; + unsigned int nstrings; + unsigned int strindex[0]; + } *const filedata = data; + struct __locale_data *newdata; + size_t cnt; + + if (__builtin_expect (datasize < sizeof *filedata, 0) + || __builtin_expect (filedata->magic != LIMAGIC (category), 0)) + { + /* Bad data file. */ + __set_errno (EINVAL); + return NULL; + } + + if (__builtin_expect (filedata->nstrings < _nl_category_num_items[category], + 0) + || (__builtin_expect (sizeof *filedata + + filedata->nstrings * sizeof (unsigned int) + >= datasize, 0))) + { + /* Insufficient data. */ + __set_errno (EINVAL); + return NULL; + } + + newdata = malloc (sizeof *newdata + + filedata->nstrings * sizeof (union locale_data_value)); + if (newdata == NULL) + return NULL; + + newdata->filedata = (void *) filedata; + newdata->filesize = datasize; + newdata->private.data = NULL; + newdata->private.cleanup = NULL; + newdata->usage_count = 0; + newdata->use_translit = 0; + newdata->nstrings = filedata->nstrings; + for (cnt = 0; cnt < newdata->nstrings; ++cnt) + { + size_t idx = filedata->strindex[cnt]; + if (__glibc_unlikely (idx > (size_t) newdata->filesize)) + { + puntdata: + free (newdata); + __set_errno (EINVAL); + return NULL; + } + + /* Determine the type. There is one special case: the LC_CTYPE + category can have more elements than there are in the + _nl_value_type_LC_XYZ array. There are all pointers. */ + switch (category) + { +#define CATTEST(cat) \ + case LC_##cat: \ + if (cnt >= (sizeof (_nl_value_type_LC_##cat) \ + / sizeof (_nl_value_type_LC_##cat[0]))) \ + goto puntdata; \ + break + CATTEST (NUMERIC); + CATTEST (TIME); + CATTEST (COLLATE); + CATTEST (MONETARY); + CATTEST (MESSAGES); + CATTEST (PAPER); + CATTEST (NAME); + CATTEST (ADDRESS); + CATTEST (TELEPHONE); + CATTEST (MEASUREMENT); + CATTEST (IDENTIFICATION); + default: + assert (category == LC_CTYPE); + break; + } + + if ((category == LC_CTYPE + && cnt >= (sizeof (_nl_value_type_LC_CTYPE) + / sizeof (_nl_value_type_LC_CTYPE[0]))) + || __builtin_expect (_nl_value_types[category][cnt] != word, 1)) + newdata->values[cnt].string = newdata->filedata + idx; + else + { + if (!LOCFILE_ALIGNED_P (idx)) + goto puntdata; + newdata->values[cnt].word = + *((const u_int32_t *) (newdata->filedata + idx)); + } + } + + return newdata; +} + +void +internal_function +_nl_load_locale (struct loaded_l10nfile *file, int category) +{ + int fd; + void *filedata; + struct stat64 st; + struct __locale_data *newdata; + int save_err; + int alloc = ld_mapped; + + file->decided = 1; + file->data = NULL; + + fd = open_not_cancel_2 (file->filename, O_RDONLY | O_CLOEXEC); + if (__builtin_expect (fd, 0) < 0) + /* Cannot open the file. */ + return; + + if (__builtin_expect (__fxstat64 (_STAT_VER, fd, &st), 0) < 0) + { + puntfd: + close_not_cancel_no_status (fd); + return; + } + if (__glibc_unlikely (S_ISDIR (st.st_mode))) + { + /* LOCALE/LC_foo is a directory; open LOCALE/LC_foo/SYS_LC_foo + instead. */ + char *newp; + size_t filenamelen; + + close_not_cancel_no_status (fd); + + filenamelen = strlen (file->filename); + newp = (char *) alloca (filenamelen + + 5 + _nl_category_name_sizes[category] + 1); + __mempcpy (__mempcpy (__mempcpy (newp, file->filename, filenamelen), + "/SYS_", 5), + _nl_category_names.str + _nl_category_name_idxs[category], + _nl_category_name_sizes[category] + 1); + + fd = open_not_cancel_2 (newp, O_RDONLY | O_CLOEXEC); + if (__builtin_expect (fd, 0) < 0) + return; + + if (__builtin_expect (__fxstat64 (_STAT_VER, fd, &st), 0) < 0) + goto puntfd; + } + + /* Map in the file's data. */ + save_err = errno; +#ifdef _POSIX_MAPPED_FILES +# ifndef MAP_COPY + /* Linux seems to lack read-only copy-on-write. */ +# define MAP_COPY MAP_PRIVATE +# endif +# ifndef MAP_FILE + /* Some systems do not have this flag; it is superfluous. */ +# define MAP_FILE 0 +# endif + filedata = __mmap ((caddr_t) 0, st.st_size, + PROT_READ, MAP_FILE|MAP_COPY, fd, 0); + if (__glibc_unlikely (filedata == MAP_FAILED)) + { + filedata = NULL; + if (__builtin_expect (errno, ENOSYS) == ENOSYS) + { +#endif /* _POSIX_MAPPED_FILES */ + /* No mmap; allocate a buffer and read from the file. */ + alloc = ld_malloced; + filedata = malloc (st.st_size); + if (filedata != NULL) + { + off_t to_read = st.st_size; + ssize_t nread; + char *p = (char *) filedata; + while (to_read > 0) + { + nread = read_not_cancel (fd, p, to_read); + if (__builtin_expect (nread, 1) <= 0) + { + free (filedata); + if (nread == 0) + __set_errno (EINVAL); /* Bizarreness going on. */ + goto puntfd; + } + p += nread; + to_read -= nread; + } + __set_errno (save_err); + } +#ifdef _POSIX_MAPPED_FILES + } + } +#endif /* _POSIX_MAPPED_FILES */ + + /* We have mapped the data, so we no longer need the descriptor. */ + close_not_cancel_no_status (fd); + + if (__glibc_unlikely (filedata == NULL)) + /* We failed to map or read the data. */ + return; + + newdata = _nl_intern_locale_data (category, filedata, st.st_size); + if (__glibc_unlikely (newdata == NULL)) + /* Bad data. */ + { +#ifdef _POSIX_MAPPED_FILES + if (alloc == ld_mapped) + __munmap ((caddr_t) filedata, st.st_size); +#endif + return; + } + + /* _nl_intern_locale_data leaves us these fields to initialize. */ + newdata->name = NULL; /* This will be filled if necessary in findlocale.c. */ + newdata->alloc = alloc; + + file->data = newdata; +} + +void +internal_function +_nl_unload_locale (struct __locale_data *locale) +{ + if (locale->private.cleanup) + (*locale->private.cleanup) (locale); + + switch (__builtin_expect (locale->alloc, ld_mapped)) + { + case ld_malloced: + free ((void *) locale->filedata); + break; + case ld_mapped: +#ifdef _POSIX_MAPPED_FILES + __munmap ((caddr_t) locale->filedata, locale->filesize); + break; +#endif + case ld_archive: /* Nothing to do. */ + break; + } + + if (__builtin_expect (locale->alloc, ld_mapped) != ld_archive) + free ((char *) locale->name); + + free (locale); +} diff --git a/REORG.TODO/locale/locale.h b/REORG.TODO/locale/locale.h new file mode 100644 index 0000000000..9a5fce9d47 --- /dev/null +++ b/REORG.TODO/locale/locale.h @@ -0,0 +1,203 @@ +/* Copyright (C) 1991-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* + * ISO C99 Standard: 7.11 Localization <locale.h> + */ + +#ifndef _LOCALE_H +#define _LOCALE_H 1 + +#include <features.h> + +#define __need_NULL +#include <stddef.h> +#include <bits/locale.h> + +__BEGIN_DECLS + +/* These are the possibilities for the first argument to setlocale. + The code assumes that the lowest LC_* symbol has the value zero. */ +#define LC_CTYPE __LC_CTYPE +#define LC_NUMERIC __LC_NUMERIC +#define LC_TIME __LC_TIME +#define LC_COLLATE __LC_COLLATE +#define LC_MONETARY __LC_MONETARY +#define LC_MESSAGES __LC_MESSAGES +#define LC_ALL __LC_ALL +#define LC_PAPER __LC_PAPER +#define LC_NAME __LC_NAME +#define LC_ADDRESS __LC_ADDRESS +#define LC_TELEPHONE __LC_TELEPHONE +#define LC_MEASUREMENT __LC_MEASUREMENT +#define LC_IDENTIFICATION __LC_IDENTIFICATION + + +/* Structure giving information about numeric and monetary notation. */ +struct lconv +{ + /* Numeric (non-monetary) information. */ + + char *decimal_point; /* Decimal point character. */ + char *thousands_sep; /* Thousands separator. */ + /* Each element is the number of digits in each group; + elements with higher indices are farther left. + An element with value CHAR_MAX means that no further grouping is done. + An element with value 0 means that the previous element is used + for all groups farther left. */ + char *grouping; + + /* Monetary information. */ + + /* First three chars are a currency symbol from ISO 4217. + Fourth char is the separator. Fifth char is '\0'. */ + char *int_curr_symbol; + char *currency_symbol; /* Local currency symbol. */ + char *mon_decimal_point; /* Decimal point character. */ + char *mon_thousands_sep; /* Thousands separator. */ + char *mon_grouping; /* Like `grouping' element (above). */ + char *positive_sign; /* Sign for positive values. */ + char *negative_sign; /* Sign for negative values. */ + char int_frac_digits; /* Int'l fractional digits. */ + char frac_digits; /* Local fractional digits. */ + /* 1 if currency_symbol precedes a positive value, 0 if succeeds. */ + char p_cs_precedes; + /* 1 iff a space separates currency_symbol from a positive value. */ + char p_sep_by_space; + /* 1 if currency_symbol precedes a negative value, 0 if succeeds. */ + char n_cs_precedes; + /* 1 iff a space separates currency_symbol from a negative value. */ + char n_sep_by_space; + /* Positive and negative sign positions: + 0 Parentheses surround the quantity and currency_symbol. + 1 The sign string precedes the quantity and currency_symbol. + 2 The sign string follows the quantity and currency_symbol. + 3 The sign string immediately precedes the currency_symbol. + 4 The sign string immediately follows the currency_symbol. */ + char p_sign_posn; + char n_sign_posn; +#ifdef __USE_ISOC99 + /* 1 if int_curr_symbol precedes a positive value, 0 if succeeds. */ + char int_p_cs_precedes; + /* 1 iff a space separates int_curr_symbol from a positive value. */ + char int_p_sep_by_space; + /* 1 if int_curr_symbol precedes a negative value, 0 if succeeds. */ + char int_n_cs_precedes; + /* 1 iff a space separates int_curr_symbol from a negative value. */ + char int_n_sep_by_space; + /* Positive and negative sign positions: + 0 Parentheses surround the quantity and int_curr_symbol. + 1 The sign string precedes the quantity and int_curr_symbol. + 2 The sign string follows the quantity and int_curr_symbol. + 3 The sign string immediately precedes the int_curr_symbol. + 4 The sign string immediately follows the int_curr_symbol. */ + char int_p_sign_posn; + char int_n_sign_posn; +#else + char __int_p_cs_precedes; + char __int_p_sep_by_space; + char __int_n_cs_precedes; + char __int_n_sep_by_space; + char __int_p_sign_posn; + char __int_n_sign_posn; +#endif +}; + + +/* Set and/or return the current locale. */ +extern char *setlocale (int __category, const char *__locale) __THROW; + +/* Return the numeric/monetary information for the current locale. */ +extern struct lconv *localeconv (void) __THROW; + + +#ifdef __USE_XOPEN2K8 +/* The concept of one static locale per category is not very well + thought out. Many applications will need to process its data using + information from several different locales. Another application is + the implementation of the internationalization handling in the + upcoming ISO C++ standard library. To support this another set of + the functions using locale data exist which have an additional + argument. + + Attention: all these functions are *not* standardized in any form. + This is a proof-of-concept implementation. */ + +/* Get locale datatype definition. */ +# include <xlocale.h> + +/* Return a reference to a data structure representing a set of locale + datasets. Unlike for the CATEGORY parameter for `setlocale' the + CATEGORY_MASK parameter here uses a single bit for each category, + made by OR'ing together LC_*_MASK bits above. */ +extern __locale_t newlocale (int __category_mask, const char *__locale, + __locale_t __base) __THROW; + +/* These are the bits that can be set in the CATEGORY_MASK argument to + `newlocale'. In the GNU implementation, LC_FOO_MASK has the value + of (1 << LC_FOO), but this is not a part of the interface that + callers can assume will be true. */ +# define LC_CTYPE_MASK (1 << __LC_CTYPE) +# define LC_NUMERIC_MASK (1 << __LC_NUMERIC) +# define LC_TIME_MASK (1 << __LC_TIME) +# define LC_COLLATE_MASK (1 << __LC_COLLATE) +# define LC_MONETARY_MASK (1 << __LC_MONETARY) +# define LC_MESSAGES_MASK (1 << __LC_MESSAGES) +# define LC_PAPER_MASK (1 << __LC_PAPER) +# define LC_NAME_MASK (1 << __LC_NAME) +# define LC_ADDRESS_MASK (1 << __LC_ADDRESS) +# define LC_TELEPHONE_MASK (1 << __LC_TELEPHONE) +# define LC_MEASUREMENT_MASK (1 << __LC_MEASUREMENT) +# define LC_IDENTIFICATION_MASK (1 << __LC_IDENTIFICATION) +# define LC_ALL_MASK (LC_CTYPE_MASK \ + | LC_NUMERIC_MASK \ + | LC_TIME_MASK \ + | LC_COLLATE_MASK \ + | LC_MONETARY_MASK \ + | LC_MESSAGES_MASK \ + | LC_PAPER_MASK \ + | LC_NAME_MASK \ + | LC_ADDRESS_MASK \ + | LC_TELEPHONE_MASK \ + | LC_MEASUREMENT_MASK \ + | LC_IDENTIFICATION_MASK \ + ) + +/* Return a duplicate of the set of locale in DATASET. All usage + counters are increased if necessary. */ +extern __locale_t duplocale (__locale_t __dataset) __THROW; + +/* Free the data associated with a locale dataset previously returned + by a call to `setlocale_r'. */ +extern void freelocale (__locale_t __dataset) __THROW; + +/* Switch the current thread's locale to DATASET. + If DATASET is null, instead just return the current setting. + The special value LC_GLOBAL_LOCALE is the initial setting + for all threads and can also be installed any time, meaning + the thread uses the global settings controlled by `setlocale'. */ +extern __locale_t uselocale (__locale_t __dataset) __THROW; + +/* This value can be passed to `uselocale' and may be returned by it. + Passing this value to any other function has undefined behavior. */ +# define LC_GLOBAL_LOCALE ((__locale_t) -1L) + +#endif + +__END_DECLS + +#endif /* locale.h */ diff --git a/REORG.TODO/locale/localeconv.c b/REORG.TODO/locale/localeconv.c new file mode 100644 index 0000000000..b7800c9863 --- /dev/null +++ b/REORG.TODO/locale/localeconv.c @@ -0,0 +1,72 @@ +/* Copyright (C) 1991-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <locale.h> +#include "localeinfo.h" +#include <shlib-compat.h> + +/* Return monetary and numeric information about the current locale. */ +struct lconv * +__localeconv (void) +{ + static struct lconv result; + + result.decimal_point = (char *) _NL_CURRENT (LC_NUMERIC, DECIMAL_POINT); + result.thousands_sep = (char *) _NL_CURRENT (LC_NUMERIC, THOUSANDS_SEP); + result.grouping = (char *) _NL_CURRENT (LC_NUMERIC, GROUPING); + if (*result.grouping == '\177' || *result.grouping == '\377') + result.grouping = (char *) ""; + + result.int_curr_symbol = (char *) _NL_CURRENT (LC_MONETARY, INT_CURR_SYMBOL); + result.currency_symbol = (char *) _NL_CURRENT (LC_MONETARY, CURRENCY_SYMBOL); + result.mon_decimal_point = (char *) _NL_CURRENT (LC_MONETARY, + MON_DECIMAL_POINT); + result.mon_thousands_sep = (char *) _NL_CURRENT (LC_MONETARY, + MON_THOUSANDS_SEP); + result.mon_grouping = (char *) _NL_CURRENT (LC_MONETARY, MON_GROUPING); + if (*result.mon_grouping == '\177' || *result.mon_grouping == '\377') + result.mon_grouping = (char *) ""; + result.positive_sign = (char *) _NL_CURRENT (LC_MONETARY, POSITIVE_SIGN); + result.negative_sign = (char *) _NL_CURRENT (LC_MONETARY, NEGATIVE_SIGN); + +#define INT_ELEM(member, element) \ + result.member = *(char *) _NL_CURRENT (LC_MONETARY, element); \ + if (result.member == '\377') result.member = CHAR_MAX + + INT_ELEM (int_frac_digits, INT_FRAC_DIGITS); + INT_ELEM (frac_digits, FRAC_DIGITS); + INT_ELEM (p_cs_precedes, P_CS_PRECEDES); + INT_ELEM (p_sep_by_space, P_SEP_BY_SPACE); + INT_ELEM (n_cs_precedes, N_CS_PRECEDES); + INT_ELEM (n_sep_by_space, N_SEP_BY_SPACE); + INT_ELEM (p_sign_posn, P_SIGN_POSN); + INT_ELEM (n_sign_posn, N_SIGN_POSN); + INT_ELEM (int_p_cs_precedes, INT_P_CS_PRECEDES); + INT_ELEM (int_p_sep_by_space, INT_P_SEP_BY_SPACE); + INT_ELEM (int_n_cs_precedes, INT_N_CS_PRECEDES); + INT_ELEM (int_n_sep_by_space, INT_N_SEP_BY_SPACE); + INT_ELEM (int_p_sign_posn, INT_P_SIGN_POSN); + INT_ELEM (int_n_sign_posn, INT_N_SIGN_POSN); + + return &result; +} + +versioned_symbol (libc, __localeconv, localeconv, GLIBC_2_2); +#if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_2) +strong_alias (__localeconv, __localeconv20) +compat_symbol (libc, __localeconv20, localeconv, GLIBC_2_0); +#endif diff --git a/REORG.TODO/locale/localeinfo.h b/REORG.TODO/locale/localeinfo.h new file mode 100644 index 0000000000..f0694dc84e --- /dev/null +++ b/REORG.TODO/locale/localeinfo.h @@ -0,0 +1,412 @@ +/* Declarations for internal libc locale interfaces + Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _LOCALEINFO_H +#define _LOCALEINFO_H 1 + +#include <stddef.h> +#include <langinfo.h> +#include <limits.h> +#include <locale.h> +#include <time.h> +#include <stdint.h> +#include <sys/types.h> + +#include <intl/loadinfo.h> /* For loaded_l10nfile definition. */ + +/* Magic number at the beginning of a locale data file for CATEGORY. */ +#define LIMAGIC(category) \ + (category == LC_COLLATE \ + ? ((unsigned int) (0x20051014 ^ (category))) \ + : category == LC_CTYPE \ + ? ((unsigned int) (0x20090720 ^ (category))) \ + : ((unsigned int) (0x20031115 ^ (category)))) + +/* Two special weight constants for the collation data. */ +#define IGNORE_CHAR 2 + +/* We use a special value for the usage counter in `__locale_data' to + signal that this data must never be removed anymore. */ +#define MAX_USAGE_COUNT (UINT_MAX - 1) +#define UNDELETABLE UINT_MAX + +/* Structure describing locale data in core for a category. */ +struct __locale_data +{ + const char *name; + const char *filedata; /* Region mapping the file data. */ + off_t filesize; /* Size of the file (and the region). */ + enum /* Flavor of storage used for those. */ + { + ld_malloced, /* Both are malloc'd. */ + ld_mapped, /* name is malloc'd, filedata mmap'd */ + ld_archive /* Both point into mmap'd archive regions. */ + } alloc; + + /* This provides a slot for category-specific code to cache data computed + about this locale. That code can set a cleanup function to deallocate + the data. */ + struct + { + void (*cleanup) (struct __locale_data *) internal_function; + union + { + void *data; + struct lc_time_data *time; + const struct gconv_fcts *ctype; + }; + } private; + + unsigned int usage_count; /* Counter for users. */ + + int use_translit; /* Nonzero if the mb*towv*() and wc*tomb() + functions should use transliteration. */ + + unsigned int nstrings; /* Number of strings below. */ + union locale_data_value + { + const uint32_t *wstr; + const char *string; + unsigned int word; /* Note endian issues vs 64-bit pointers. */ + } + values __flexarr; /* Items, usually pointers into `filedata'. */ +}; + +/* This alignment is used for 32-bit integers in locale files, both + those that are explicitly int32_t or uint32_t and those that are + wchar_t, regardless of the (possibly smaller) alignment required + for such integers on a particular host. */ +#define LOCFILE_ALIGN sizeof (int32_t) +#define LOCFILE_ALIGN_MASK (LOCFILE_ALIGN - 1) +#define LOCFILE_ALIGN_UP(x) (((x) + LOCFILE_ALIGN - 1) \ + & ~LOCFILE_ALIGN_MASK) +#define LOCFILE_ALIGNED_P(x) (((x) & LOCFILE_ALIGN_MASK) == 0) + +/* We know three kinds of collation sorting rules. */ +enum coll_sort_rule +{ + illegal_0__, + sort_forward, + sort_backward, + illegal_3__, + sort_position, + sort_forward_position, + sort_backward_position, + sort_mask +}; + +/* We can map the types of the entries into a few categories. */ +enum value_type +{ + none, + string, + stringarray, + byte, + bytearray, + word, + stringlist, + wordarray, + wstring, + wstringarray, + wstringlist +}; + + +/* Definitions for `era' information from LC_TIME. */ +#define ERA_NAME_FORMAT_MEMBERS 4 +#define ERA_M_NAME 0 +#define ERA_M_FORMAT 1 +#define ERA_W_NAME 2 +#define ERA_W_FORMAT 3 + + +/* Structure to access `era' information from LC_TIME. */ +struct era_entry +{ + uint32_t direction; /* Contains '+' or '-'. */ + int32_t offset; + int32_t start_date[3]; + int32_t stop_date[3]; + const char *era_name; + const char *era_format; + const wchar_t *era_wname; + const wchar_t *era_wformat; + int absolute_direction; + /* absolute direction: + +1 indicates that year number is higher in the future. (like A.D.) + -1 indicates that year number is higher in the past. (like B.C.) */ +}; + +/* Structure caching computed data about information from LC_TIME. + The `private.time' member of `struct __locale_data' points to this. */ +struct lc_time_data +{ + struct era_entry *eras; + size_t num_eras; + int era_initialized; + + const char **alt_digits; + const wchar_t **walt_digits; + int alt_digits_initialized; + int walt_digits_initialized; +}; + + +/* LC_CTYPE specific: + Hardwired indices for standard wide character translation mappings. */ +enum +{ + __TOW_toupper = 0, + __TOW_tolower = 1 +}; + + +/* LC_CTYPE specific: + Access a wide character class with a single character index. + _ISCTYPE (c, desc) = iswctype (btowc (c), desc). + c must be an `unsigned char'. desc must be a nonzero wctype_t. */ +#define _ISCTYPE(c, desc) \ + (((((const uint32_t *) (desc)) - 8)[(c) >> 5] >> ((c) & 0x1f)) & 1) + +/* Category name handling variables. */ +#define CATNAMEMF(line) CATNAMEMF1 (line) +#define CATNAMEMF1(line) str##line +extern const union catnamestr_t +{ + struct + { +#define DEFINE_CATEGORY(category, category_name, items, a) \ + char CATNAMEMF (__LINE__)[sizeof (category_name)]; +#include "categories.def" +#undef DEFINE_CATEGORY + }; + char str[0]; +} _nl_category_names attribute_hidden; +extern const uint8_t _nl_category_name_idxs[__LC_LAST] attribute_hidden; +extern const uint8_t _nl_category_name_sizes[__LC_LAST] attribute_hidden; + +/* Name of the standard locales. */ +extern const char _nl_C_name[] attribute_hidden; +extern const char _nl_POSIX_name[] attribute_hidden; + +/* The standard codeset. */ +extern const char _nl_C_codeset[] attribute_hidden; + +/* This is the internal locale_t object that holds the global locale + controlled by calls to setlocale. A thread's TSD locale pointer + points to this when `uselocale (LC_GLOBAL_LOCALE)' is in effect. */ +extern struct __locale_struct _nl_global_locale attribute_hidden; + +/* This fetches the thread-local locale_t pointer, either one set with + uselocale or &_nl_global_locale. */ +#define _NL_CURRENT_LOCALE (__libc_tsd_get (__locale_t, LOCALE)) +#include <libc-tsd.h> +__libc_tsd_define (extern, __locale_t, LOCALE) + + +/* For static linking it is desireable to avoid always linking in the code + and data for every category when we can tell at link time that they are + unused. We can manage this playing some tricks with weak references. + But with thread-local locale settings, it becomes quite ungainly unless + we can use __thread variables. So only in that case do we attempt this. */ +#ifndef SHARED +# include <tls.h> +# define NL_CURRENT_INDIRECT 1 +#endif + +#ifdef NL_CURRENT_INDIRECT + +/* For each category declare the thread-local variable for the current + locale data. This has an extra indirection so it points at the + __locales[CATEGORY] element in either _nl_global_locale or the current + locale object set by uselocale, which points at the actual data. The + reason for having these variables is so that references to particular + categories will link in the lc-CATEGORY.c module to define this symbol, + and we arrange that linking that module is what brings in all the code + associated with this category. */ +#define DEFINE_CATEGORY(category, category_name, items, a) \ +extern __thread struct __locale_data *const *_nl_current_##category \ + attribute_hidden attribute_tls_model_ie; +#include "categories.def" +#undef DEFINE_CATEGORY + +/* Return a pointer to the current `struct __locale_data' for CATEGORY. */ +#define _NL_CURRENT_DATA(category) (*_nl_current_##category) + +/* Extract the current CATEGORY locale's string for ITEM. */ +#define _NL_CURRENT(category, item) \ + ((*_nl_current_##category)->values[_NL_ITEM_INDEX (item)].string) + +/* Extract the current CATEGORY locale's string for ITEM. */ +#define _NL_CURRENT_WSTR(category, item) \ + ((wchar_t *) (*_nl_current_##category)->values[_NL_ITEM_INDEX (item)].wstr) + +/* Extract the current CATEGORY locale's word for ITEM. */ +#define _NL_CURRENT_WORD(category, item) \ + ((uint32_t) (*_nl_current_##category)->values[_NL_ITEM_INDEX (item)].word) + +/* This is used in lc-CATEGORY.c to define _nl_current_CATEGORY. The symbol + _nl_current_CATEGORY_used is set to a value unequal to zero to mark this + category as used. On S390 the used relocation to load the symbol address + can only handle even addresses. */ +#define _NL_CURRENT_DEFINE(category) \ + __thread struct __locale_data *const *_nl_current_##category \ + attribute_hidden = &_nl_global_locale.__locales[category]; \ + asm (".globl " __SYMBOL_PREFIX "_nl_current_" #category "_used\n" \ + _NL_CURRENT_DEFINE_ABS (_nl_current_##category##_used, 2)); +#ifdef HAVE_ASM_SET_DIRECTIVE +# define _NL_CURRENT_DEFINE_ABS(sym, val) ".set " #sym ", " #val +#else +# define _NL_CURRENT_DEFINE_ABS(sym, val) #sym " = " #val +#endif + +#else + +/* All categories are always loaded in the shared library, so there is no + point in having lots of separate symbols for linking. */ + +/* Return a pointer to the current `struct __locale_data' for CATEGORY. */ +# define _NL_CURRENT_DATA(category) \ + (_NL_CURRENT_LOCALE->__locales[category]) + +/* Extract the current CATEGORY locale's string for ITEM. */ +# define _NL_CURRENT(category, item) \ + (_NL_CURRENT_DATA (category)->values[_NL_ITEM_INDEX (item)].string) + +/* Extract the current CATEGORY locale's string for ITEM. */ +# define _NL_CURRENT_WSTR(category, item) \ + ((wchar_t *) _NL_CURRENT_DATA (category)->values[_NL_ITEM_INDEX (item)].wstr) + +/* Extract the current CATEGORY locale's word for ITEM. */ +# define _NL_CURRENT_WORD(category, item) \ + ((uint32_t) _NL_CURRENT_DATA (category)->values[_NL_ITEM_INDEX (item)].word) + +/* This is used in lc-CATEGORY.c to define _nl_current_CATEGORY. */ +# define _NL_CURRENT_DEFINE(category) \ + /* No per-category variable here. */ + +#endif + +/* Extract CATEGORY locale's string for ITEM. */ +static inline const char * +_nl_lookup (locale_t l, int category, int item) +{ + return l->__locales[category]->values[_NL_ITEM_INDEX (item)].string; +} + +/* Extract CATEGORY locale's wide string for ITEM. */ +static inline const wchar_t * +_nl_lookup_wstr (locale_t l, int category, int item) +{ + return (wchar_t *) l->__locales[category] + ->values[_NL_ITEM_INDEX (item)].wstr; +} + +/* Extract the CATEGORY locale's word for ITEM. */ +static inline uint32_t +_nl_lookup_word (locale_t l, int category, int item) +{ + return l->__locales[category]->values[_NL_ITEM_INDEX (item)].word; +} + +/* Default search path if no LOCPATH environment variable. */ +extern const char _nl_default_locale_path[] attribute_hidden; + +/* Load the locale data for CATEGORY from the file specified by *NAME. + If *NAME is "", use environment variables as specified by POSIX, and + fill in *NAME with the actual name used. If LOCALE_PATH is not null, + those directories are searched for the locale files. If it's null, + the locale archive is checked first and then _nl_default_locale_path + is searched for locale files. */ +extern struct __locale_data *_nl_find_locale (const char *locale_path, + size_t locale_path_len, + int category, const char **name) + internal_function attribute_hidden; + +/* Try to load the file described by FILE. */ +extern void _nl_load_locale (struct loaded_l10nfile *file, int category) + internal_function attribute_hidden; + +/* Free all resource. */ +extern void _nl_unload_locale (struct __locale_data *locale) + internal_function attribute_hidden; + +/* Free the locale and give back all memory if the usage count is one. */ +extern void _nl_remove_locale (int locale, struct __locale_data *data) + internal_function attribute_hidden; + +/* Find the locale *NAMEP in the locale archive, and return the + internalized data structure for its CATEGORY data. If this locale has + already been loaded from the archive, just returns the existing data + structure. If successful, sets *NAMEP to point directly into the mapped + archive string table; that way, the next call can short-circuit strcmp. */ +extern struct __locale_data *_nl_load_locale_from_archive (int category, + const char **namep) + internal_function attribute_hidden; + +/* Subroutine of setlocale's __libc_subfreeres hook. */ +extern void _nl_archive_subfreeres (void) attribute_hidden; + +/* Subroutine of gconv-db's __libc_subfreeres hook. */ +extern void _nl_locale_subfreeres (void) attribute_hidden; + +/* Validate the contents of a locale file and set up the in-core + data structure to point into the data. This leaves the `alloc' + and `name' fields uninitialized, for the caller to fill in. + If any bogons are detected in the data, this will refuse to + intern it, and return a null pointer instead. */ +extern struct __locale_data *_nl_intern_locale_data (int category, + const void *data, + size_t datasize) + internal_function attribute_hidden; + + +/* Return `era' entry which corresponds to TP. Used in strftime. */ +extern struct era_entry *_nl_get_era_entry (const struct tm *tp, + struct __locale_data *lc_time) + internal_function attribute_hidden; + +/* Return `era' cnt'th entry . Used in strptime. */ +extern struct era_entry *_nl_select_era_entry (int cnt, + struct __locale_data *lc_time) + internal_function attribute_hidden; + +/* Return `alt_digit' which corresponds to NUMBER. Used in strftime. */ +extern const char *_nl_get_alt_digit (unsigned int number, + struct __locale_data *lc_time) + internal_function attribute_hidden; + +/* Similar, but now for wide characters. */ +extern const wchar_t *_nl_get_walt_digit (unsigned int number, + struct __locale_data *lc_time) + internal_function attribute_hidden; + +/* Parse string as alternative digit and return numeric value. */ +extern int _nl_parse_alt_digit (const char **strp, + struct __locale_data *lc_time) + internal_function attribute_hidden; + +/* Postload processing. */ +extern void _nl_postload_ctype (void); + +/* Functions used for the `private.cleanup' hook. */ +extern void _nl_cleanup_time (struct __locale_data *) + internal_function attribute_hidden; + + +#endif /* localeinfo.h */ diff --git a/REORG.TODO/locale/localename.c b/REORG.TODO/locale/localename.c new file mode 100644 index 0000000000..f7cc53552c --- /dev/null +++ b/REORG.TODO/locale/localename.c @@ -0,0 +1,26 @@ +/* current locale setting names + Copyright (C) 2002-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include "localeinfo.h" + +attribute_hidden +const char * +__current_locale_name (int category) +{ + return _NL_CURRENT_LOCALE->__names[category]; +} diff --git a/REORG.TODO/locale/locarchive.h b/REORG.TODO/locale/locarchive.h new file mode 100644 index 0000000000..7c9fc7f067 --- /dev/null +++ b/REORG.TODO/locale/locarchive.h @@ -0,0 +1,107 @@ +/* Definitions for locale archive handling. + Copyright (C) 2002-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _LOCARCHIVE_H +#define _LOCARCHIVE_H 1 + +#include <stdint.h> + + +#define AR_MAGIC 0xde020109 + +struct locarhead +{ + uint32_t magic; + /* Serial number. */ + uint32_t serial; + /* Name hash table. */ + uint32_t namehash_offset; + uint32_t namehash_used; + uint32_t namehash_size; + /* String table. */ + uint32_t string_offset; + uint32_t string_used; + uint32_t string_size; + /* Table with locale records. */ + uint32_t locrectab_offset; + uint32_t locrectab_used; + uint32_t locrectab_size; + /* MD5 sum hash table. */ + uint32_t sumhash_offset; + uint32_t sumhash_used; + uint32_t sumhash_size; +}; + + +struct namehashent +{ + /* Hash value of the name. */ + uint32_t hashval; + /* Offset of the name in the string table. */ + uint32_t name_offset; + /* Offset of the locale record. */ + uint32_t locrec_offset; +}; + + +struct sumhashent +{ + /* MD5 sum. */ + char sum[16]; + /* Offset of the file in the archive. */ + uint32_t file_offset; +}; + +struct locrecent +{ + uint32_t refs; /* # of namehashent records that point here */ + struct + { + uint32_t offset; + uint32_t len; + } record[__LC_LAST]; +}; + + +struct locarhandle +{ + /* Full path to the locale archive file. */ + const char *fname; + int fd; + void *addr; + size_t mmaped; + size_t reserved; + /* If this mmap required adjustment (such as re-aligning), then this is the + real address that was returned from mmap and thus should be passed to the + munmap call. The addr field above is the first usable address. */ + void *mmap_base; + /* Same as above for mmap_base vs addr, but this is the real length of the + map rather than the usable (which is what reserved represents). */ + size_t mmap_len; +}; + + +/* In memory data for the locales with their checksums. */ +typedef struct locale_category_data +{ + off64_t size; + void *addr; + char sum[16]; +} locale_data_t[__LC_LAST]; + +#endif /* locarchive.h */ diff --git a/REORG.TODO/locale/mb_cur_max.c b/REORG.TODO/locale/mb_cur_max.c new file mode 100644 index 0000000000..4f074eb4b8 --- /dev/null +++ b/REORG.TODO/locale/mb_cur_max.c @@ -0,0 +1,32 @@ +/* Return number of characters in multibyte representation for current + character set. + Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <langinfo.h> +#include <locale.h> +#include <stdlib.h> +#include "localeinfo.h" + + +size_t +weak_function +__ctype_get_mb_cur_max (void) +{ + return _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MB_CUR_MAX); +} diff --git a/REORG.TODO/locale/newlocale.c b/REORG.TODO/locale/newlocale.c new file mode 100644 index 0000000000..2190b7351b --- /dev/null +++ b/REORG.TODO/locale/newlocale.c @@ -0,0 +1,280 @@ +/* Return a reference to locale information record. + Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <argz.h> +#include <libc-lock.h> +#include <errno.h> +#include <locale.h> +#include <stdlib.h> +#include <string.h> + +#include "localeinfo.h" + + +/* Lock for protecting global data. */ +__libc_rwlock_define (extern , __libc_setlocale_lock attribute_hidden) + + +/* Use this when we come along an error. */ +#define ERROR_RETURN \ + do { \ + __set_errno (EINVAL); \ + return NULL; \ + } while (0) + + +__locale_t +__newlocale (int category_mask, const char *locale, __locale_t base) +{ + /* Intermediate memory for result. */ + const char *newnames[__LC_LAST]; + struct __locale_struct result; + __locale_t result_ptr; + char *locale_path; + size_t locale_path_len; + const char *locpath_var; + int cnt; + size_t names_len; + + /* We treat LC_ALL in the same way as if all bits were set. */ + if (category_mask == 1 << LC_ALL) + category_mask = (1 << __LC_LAST) - 1 - (1 << LC_ALL); + + /* Sanity check for CATEGORY argument. */ + if ((category_mask & ~((1 << __LC_LAST) - 1 - (1 << LC_ALL))) != 0) + ERROR_RETURN; + + /* `newlocale' does not support asking for the locale name. */ + if (locale == NULL) + ERROR_RETURN; + + if (base == _nl_C_locobj_ptr) + /* We're to modify BASE, returned for a previous call with "C". + We can't really modify the read-only structure, so instead + start over by copying it. */ + base = NULL; + + if ((base == NULL || category_mask == (1 << __LC_LAST) - 1 - (1 << LC_ALL)) + && (category_mask == 0 || !strcmp (locale, "C"))) + /* Asking for the "C" locale needn't allocate a new object. */ + return _nl_C_locobj_ptr; + + /* Allocate memory for the result. */ + if (base != NULL) + result = *base; + else + /* Fill with pointers to C locale data. */ + result = _nl_C_locobj; + + /* If no category is to be set we return BASE if available or a + dataset using the C locale data. */ + if (category_mask == 0) + { + result_ptr = (__locale_t) malloc (sizeof (struct __locale_struct)); + if (result_ptr == NULL) + return NULL; + *result_ptr = result; + + goto update; + } + + /* We perhaps really have to load some data. So we determine the + path in which to look for the data now. The environment variable + `LOCPATH' must only be used when the binary has no SUID or SGID + bit set. If using the default path, we tell _nl_find_locale + by passing null and it can check the canonical locale archive. */ + locale_path = NULL; + locale_path_len = 0; + + locpath_var = getenv ("LOCPATH"); + if (locpath_var != NULL && locpath_var[0] != '\0') + { + if (__argz_create_sep (locpath_var, ':', + &locale_path, &locale_path_len) != 0) + return NULL; + + if (__argz_add_sep (&locale_path, &locale_path_len, + _nl_default_locale_path, ':') != 0) + return NULL; + } + + /* Get the names for the locales we are interested in. We either + allow a composite name or a single name. */ + for (cnt = 0; cnt < __LC_LAST; ++cnt) + if (cnt != LC_ALL) + newnames[cnt] = locale; + if (strchr (locale, ';') != NULL) + { + /* This is a composite name. Make a copy and split it up. */ + char *np = strdupa (locale); + char *cp; + int specified_mask = 0; + + while ((cp = strchr (np, '=')) != NULL) + { + for (cnt = 0; cnt < __LC_LAST; ++cnt) + if (cnt != LC_ALL + && (size_t) (cp - np) == _nl_category_name_sizes[cnt] + && memcmp (np, (_nl_category_names.str + + _nl_category_name_idxs[cnt]), cp - np) == 0) + break; + + if (cnt == __LC_LAST) + /* Bogus category name. */ + ERROR_RETURN; + + /* Found the category this clause sets. */ + specified_mask |= 1 << cnt; + newnames[cnt] = ++cp; + cp = strchr (cp, ';'); + if (cp != NULL) + { + /* Examine the next clause. */ + *cp = '\0'; + np = cp + 1; + } + else + /* This was the last clause. We are done. */ + break; + } + + if (category_mask &~ specified_mask) + /* The composite name did not specify all categories we need. */ + ERROR_RETURN; + } + + /* Protect global data. */ + __libc_rwlock_wrlock (__libc_setlocale_lock); + + /* Now process all categories we are interested in. */ + names_len = 0; + for (cnt = 0; cnt < __LC_LAST; ++cnt) + { + if ((category_mask & 1 << cnt) != 0) + { + result.__locales[cnt] = _nl_find_locale (locale_path, + locale_path_len, + cnt, &newnames[cnt]); + if (result.__locales[cnt] == NULL) + { + free_cnt_data_and_exit: + while (cnt-- > 0) + if (((category_mask & 1 << cnt) != 0) + && result.__locales[cnt]->usage_count != UNDELETABLE) + /* We can remove the data. */ + _nl_remove_locale (cnt, result.__locales[cnt]); + + /* Critical section left. */ + __libc_rwlock_unlock (__libc_setlocale_lock); + return NULL; + } + + if (newnames[cnt] != _nl_C_name) + names_len += strlen (newnames[cnt]) + 1; + } + else if (cnt != LC_ALL && result.__names[cnt] != _nl_C_name) + /* Tally up the unchanged names from BASE as well. */ + names_len += strlen (result.__names[cnt]) + 1; + } + + /* We successfully loaded all required data. Allocate a new structure. + We can't just reuse the BASE pointer, because the name strings are + changing and we need the old name string area intact so we can copy + out of it into the new one without overlap problems should some + category's name be getting longer. */ + result_ptr = malloc (sizeof (struct __locale_struct) + names_len); + if (result_ptr == NULL) + { + cnt = __LC_LAST; + goto free_cnt_data_and_exit; + } + + if (base == NULL) + { + /* Fill in this new structure from scratch. */ + + char *namep = (char *) (result_ptr + 1); + + /* Install copied new names in the new structure's __names array. + If resolved to "C", that is already in RESULT.__names to start. */ + for (cnt = 0; cnt < __LC_LAST; ++cnt) + if ((category_mask & 1 << cnt) != 0 && newnames[cnt] != _nl_C_name) + { + result.__names[cnt] = namep; + namep = __stpcpy (namep, newnames[cnt]) + 1; + } + + *result_ptr = result; + } + else + { + /* We modify the base structure. */ + + char *namep = (char *) (result_ptr + 1); + + for (cnt = 0; cnt < __LC_LAST; ++cnt) + if ((category_mask & 1 << cnt) != 0) + { + if (base->__locales[cnt]->usage_count != UNDELETABLE) + /* We can remove the old data. */ + _nl_remove_locale (cnt, base->__locales[cnt]); + result_ptr->__locales[cnt] = result.__locales[cnt]; + + if (newnames[cnt] == _nl_C_name) + result_ptr->__names[cnt] = _nl_C_name; + else + { + result_ptr->__names[cnt] = namep; + namep = __stpcpy (namep, newnames[cnt]) + 1; + } + } + else if (cnt != LC_ALL) + { + /* The RESULT members point into the old BASE structure. */ + result_ptr->__locales[cnt] = result.__locales[cnt]; + if (result.__names[cnt] == _nl_C_name) + result_ptr->__names[cnt] = _nl_C_name; + else + { + result_ptr->__names[cnt] = namep; + namep = __stpcpy (namep, result.__names[cnt]) + 1; + } + } + + free (base); + } + + /* Critical section left. */ + __libc_rwlock_unlock (__libc_setlocale_lock); + + /* Update the special members. */ + update: + { + union locale_data_value *ctypes = result_ptr->__locales[LC_CTYPE]->values; + result_ptr->__ctype_b = (const unsigned short int *) + ctypes[_NL_ITEM_INDEX (_NL_CTYPE_CLASS)].string + 128; + result_ptr->__ctype_tolower = (const int *) + ctypes[_NL_ITEM_INDEX (_NL_CTYPE_TOLOWER)].string + 128; + result_ptr->__ctype_toupper = (const int *) + ctypes[_NL_ITEM_INDEX (_NL_CTYPE_TOUPPER)].string + 128; + } + + return result_ptr; +} +weak_alias (__newlocale, newlocale) diff --git a/REORG.TODO/locale/nl_langinfo.c b/REORG.TODO/locale/nl_langinfo.c new file mode 100644 index 0000000000..3a21fba231 --- /dev/null +++ b/REORG.TODO/locale/nl_langinfo.c @@ -0,0 +1,33 @@ +/* User interface for extracting locale-dependent parameters. + Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <langinfo.h> +#include <locale.h> +#include <errno.h> +#include <stddef.h> +#include "localeinfo.h" + + +/* Return a string with the data for locale-dependent parameter ITEM. */ + +char * +nl_langinfo (nl_item item) +{ + return __nl_langinfo_l (item, _NL_CURRENT_LOCALE); +} +libc_hidden_def (nl_langinfo) diff --git a/REORG.TODO/locale/nl_langinfo_l.c b/REORG.TODO/locale/nl_langinfo_l.c new file mode 100644 index 0000000000..89acdbde01 --- /dev/null +++ b/REORG.TODO/locale/nl_langinfo_l.c @@ -0,0 +1,69 @@ +/* User interface for extracting locale-dependent parameters. + Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <langinfo.h> +#include <locale.h> +#include <errno.h> +#include <stddef.h> +#include <stdlib.h> +#include "localeinfo.h" + + +/* Return a string with the data for locale-dependent parameter ITEM. */ + +char * +__nl_langinfo_l (nl_item item, __locale_t l) +{ + int category = _NL_ITEM_CATEGORY (item); + unsigned int index = _NL_ITEM_INDEX (item); + const struct __locale_data *data; + + if (category < 0 || category == LC_ALL || category >= __LC_LAST) + /* Bogus category: bogus item. */ + return (char *) ""; + + /* Special case value for NL_LOCALE_NAME (category). + This is not a real item index in the string table. */ + if (index == _NL_ITEM_INDEX (_NL_LOCALE_NAME (category))) + return (char *) l->__names[category]; + +#if defined NL_CURRENT_INDIRECT + /* Make direct reference to every _nl_current_CATEGORY symbol, + since we know only at runtime which categories are used. */ + switch (category) + { +# define DEFINE_CATEGORY(category, category_name, items, a) \ + case category: data = *_nl_current_##category; break; +# include "categories.def" +# undef DEFINE_CATEGORY + default: /* Should be impossible. */ + abort(); + } +#else + data = l->__locales[category]; +#endif + + if (index >= data->nstrings) + /* Bogus index for this category: bogus item. */ + return (char *) ""; + + /* Return the string for the specified item. */ + return (char *) data->values[index].string; +} +libc_hidden_def (__nl_langinfo_l) +weak_alias (__nl_langinfo_l, nl_langinfo_l) diff --git a/REORG.TODO/locale/outdigits.h b/REORG.TODO/locale/outdigits.h new file mode 100644 index 0000000000..f64bb85326 --- /dev/null +++ b/REORG.TODO/locale/outdigits.h @@ -0,0 +1,44 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@gnu.org>, 2000. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <assert.h> +#include <langinfo.h> +#include <string.h> +#include "../locale/localeinfo.h" + +/* Look up the value of the next multibyte character and return its numerical + value if it is one of the digits known in the locale. If *DECIDED is + -1 this means it is not yet decided which form it is and we have to + search through all available digits. Otherwise we know which script + the digits are from. */ +static inline char * +outdigit_value (char *s, int n) +{ + const char *outdigit; + size_t dlen; + + assert (0 <= n && n <= 9); + outdigit = _NL_CURRENT (LC_CTYPE, _NL_CTYPE_OUTDIGIT0_MB + n); + dlen = strlen (outdigit); + + s -= dlen; + while (dlen-- > 0) + s[dlen] = outdigit[dlen]; + + return s; +} diff --git a/REORG.TODO/locale/outdigitswc.h b/REORG.TODO/locale/outdigitswc.h new file mode 100644 index 0000000000..56dc25547e --- /dev/null +++ b/REORG.TODO/locale/outdigitswc.h @@ -0,0 +1,34 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@gnu.org>, 2000. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <assert.h> +#include <langinfo.h> +#include "../locale/localeinfo.h" + +/* Look up the value of the next multibyte character and return its numerical + value if it is one of the digits known in the locale. If *DECIDED is + -1 this means it is not yet decided which form it is and we have to + search through all available digits. Otherwise we know which script + the digits are from. */ +static inline wchar_t +outdigitwc_value (int n) +{ + assert (0 <= n && n <= 9); + + return _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_OUTDIGIT0_WC + n); +} diff --git a/REORG.TODO/locale/programs/3level.h b/REORG.TODO/locale/programs/3level.h new file mode 100644 index 0000000000..15e192dc49 --- /dev/null +++ b/REORG.TODO/locale/programs/3level.h @@ -0,0 +1,328 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Bruno Haible <haible@clisp.cons.org>, 2000. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#include <stdint.h> + +/* Construction of sparse 3-level tables. + See wchar-lookup.h or coll-lookup.h for their structure and the + meaning of p and q. + + Before including this file, set + TABLE to the name of the structure to be defined + ELEMENT to the type of every entry + DEFAULT to the default value for empty entries + ITERATE if you want the TABLE_iterate function to be defined + NO_ADD_LOCALE if you don't want the add_locale_TABLE function + to be defined + + This will define + + struct TABLE; + void TABLE_init (struct TABLE *t); + ELEMENT TABLE_get (struct TABLE *t, uint32_t wc); + void TABLE_add (struct TABLE *t, uint32_t wc, ELEMENT value); + void TABLE_iterate (struct TABLE *t, + void (*fn) (uint32_t wc, ELEMENT value)); + void add_locale_TABLE (struct locale_file *file, struct TABLE *t); +*/ + +#define CONCAT(a,b) CONCAT1(a,b) +#define CONCAT1(a,b) a##b + +struct TABLE +{ + /* Parameters. */ + unsigned int p; + unsigned int q; + /* Working representation. */ + size_t level1_alloc; + size_t level1_size; + uint32_t *level1; + size_t level2_alloc; + size_t level2_size; + uint32_t *level2; + size_t level3_alloc; + size_t level3_size; + ELEMENT *level3; + /* Size of compressed representation. */ + size_t result_size; +}; + +/* Initialize. Assumes t->p and t->q have already been set. */ +static inline void +CONCAT(TABLE,_init) (struct TABLE *t) +{ + t->level1 = NULL; + t->level1_alloc = t->level1_size = 0; + t->level2 = NULL; + t->level2_alloc = t->level2_size = 0; + t->level3 = NULL; + t->level3_alloc = t->level3_size = 0; +} + +/* Marker for an empty slot. This has the value 0xFFFFFFFF, regardless + whether 'int' is 16 bit, 32 bit, or 64 bit. */ +#define EMPTY ((uint32_t) ~0) + +/* Retrieve an entry. */ +static inline ELEMENT +__attribute ((always_inline)) +CONCAT(TABLE,_get) (struct TABLE *t, uint32_t wc) +{ + uint32_t index1 = wc >> (t->q + t->p); + if (index1 < t->level1_size) + { + uint32_t lookup1 = t->level1[index1]; + if (lookup1 != EMPTY) + { + uint32_t index2 = ((wc >> t->p) & ((1 << t->q) - 1)) + + (lookup1 << t->q); + uint32_t lookup2 = t->level2[index2]; + if (lookup2 != EMPTY) + { + uint32_t index3 = (wc & ((1 << t->p) - 1)) + + (lookup2 << t->p); + ELEMENT lookup3 = t->level3[index3]; + + return lookup3; + } + } + } + return DEFAULT; +} + +/* Add one entry. */ +static void +CONCAT(TABLE,_add) (struct TABLE *t, uint32_t wc, ELEMENT value) +{ + uint32_t index1 = wc >> (t->q + t->p); + uint32_t index2 = (wc >> t->p) & ((1 << t->q) - 1); + uint32_t index3 = wc & ((1 << t->p) - 1); + size_t i, i1, i2; + + if (value == CONCAT(TABLE,_get) (t, wc)) + return; + + if (index1 >= t->level1_size) + { + if (index1 >= t->level1_alloc) + { + size_t alloc = 2 * t->level1_alloc; + if (alloc <= index1) + alloc = index1 + 1; + t->level1 = (uint32_t *) xrealloc ((char *) t->level1, + alloc * sizeof (uint32_t)); + t->level1_alloc = alloc; + } + while (index1 >= t->level1_size) + t->level1[t->level1_size++] = EMPTY; + } + + if (t->level1[index1] == EMPTY) + { + if (t->level2_size == t->level2_alloc) + { + size_t alloc = 2 * t->level2_alloc + 1; + t->level2 = (uint32_t *) xrealloc ((char *) t->level2, + (alloc << t->q) * sizeof (uint32_t)); + t->level2_alloc = alloc; + } + i1 = t->level2_size << t->q; + i2 = (t->level2_size + 1) << t->q; + for (i = i1; i < i2; i++) + t->level2[i] = EMPTY; + t->level1[index1] = t->level2_size++; + } + + index2 += t->level1[index1] << t->q; + + if (t->level2[index2] == EMPTY) + { + if (t->level3_size == t->level3_alloc) + { + size_t alloc = 2 * t->level3_alloc + 1; + t->level3 = (ELEMENT *) xrealloc ((char *) t->level3, + (alloc << t->p) * sizeof (ELEMENT)); + t->level3_alloc = alloc; + } + i1 = t->level3_size << t->p; + i2 = (t->level3_size + 1) << t->p; + for (i = i1; i < i2; i++) + t->level3[i] = DEFAULT; + t->level2[index2] = t->level3_size++; + } + + index3 += t->level2[index2] << t->p; + + t->level3[index3] = value; +} + +#ifdef ITERATE +/* Apply a function to all entries in the table. */ +static void +CONCAT(TABLE,_iterate) (struct TABLE *t, + void (*fn) (uint32_t wc, ELEMENT value)) +{ + uint32_t index1; + for (index1 = 0; index1 < t->level1_size; index1++) + { + uint32_t lookup1 = t->level1[index1]; + if (lookup1 != EMPTY) + { + uint32_t lookup1_shifted = lookup1 << t->q; + uint32_t index2; + for (index2 = 0; index2 < (1 << t->q); index2++) + { + uint32_t lookup2 = t->level2[index2 + lookup1_shifted]; + if (lookup2 != EMPTY) + { + uint32_t lookup2_shifted = lookup2 << t->p; + uint32_t index3; + for (index3 = 0; index3 < (1 << t->p); index3++) + { + ELEMENT lookup3 = t->level3[index3 + lookup2_shifted]; + if (lookup3 != DEFAULT) + fn ((((index1 << t->q) + index2) << t->p) + index3, + lookup3); + } + } + } + } + } +} +#endif + +#ifndef NO_ADD_LOCALE +/* Finalize and shrink. */ +static void +CONCAT(add_locale_,TABLE) (struct locale_file *file, struct TABLE *t) +{ + size_t i, j, k; + uint32_t reorder3[t->level3_size]; + uint32_t reorder2[t->level2_size]; + uint32_t level2_offset, level3_offset, last_offset; + + /* Uniquify level3 blocks. */ + k = 0; + for (j = 0; j < t->level3_size; j++) + { + for (i = 0; i < k; i++) + if (memcmp (&t->level3[i << t->p], &t->level3[j << t->p], + (1 << t->p) * sizeof (ELEMENT)) == 0) + break; + /* Relocate block j to block i. */ + reorder3[j] = i; + if (i == k) + { + if (i != j) + memcpy (&t->level3[i << t->p], &t->level3[j << t->p], + (1 << t->p) * sizeof (ELEMENT)); + k++; + } + } + t->level3_size = k; + + for (i = 0; i < (t->level2_size << t->q); i++) + if (t->level2[i] != EMPTY) + t->level2[i] = reorder3[t->level2[i]]; + + /* Uniquify level2 blocks. */ + k = 0; + for (j = 0; j < t->level2_size; j++) + { + for (i = 0; i < k; i++) + if (memcmp (&t->level2[i << t->q], &t->level2[j << t->q], + (1 << t->q) * sizeof (uint32_t)) == 0) + break; + /* Relocate block j to block i. */ + reorder2[j] = i; + if (i == k) + { + if (i != j) + memcpy (&t->level2[i << t->q], &t->level2[j << t->q], + (1 << t->q) * sizeof (uint32_t)); + k++; + } + } + t->level2_size = k; + + for (i = 0; i < t->level1_size; i++) + if (t->level1[i] != EMPTY) + t->level1[i] = reorder2[t->level1[i]]; + + /* Create and fill the resulting compressed representation. */ + last_offset = + 5 * sizeof (uint32_t) + + t->level1_size * sizeof (uint32_t) + + (t->level2_size << t->q) * sizeof (uint32_t) + + (t->level3_size << t->p) * sizeof (ELEMENT); + t->result_size = LOCFILE_ALIGN_UP (last_offset); + + level2_offset = + 5 * sizeof (uint32_t) + + t->level1_size * sizeof (uint32_t); + level3_offset = + 5 * sizeof (uint32_t) + + t->level1_size * sizeof (uint32_t) + + (t->level2_size << t->q) * sizeof (uint32_t); + + start_locale_structure (file); + add_locale_uint32 (file, t->q + t->p); + add_locale_uint32 (file, t->level1_size); + add_locale_uint32 (file, t->p); + add_locale_uint32 (file, (1 << t->q) - 1); + add_locale_uint32 (file, (1 << t->p) - 1); + + for (i = 0; i < t->level1_size; i++) + add_locale_uint32 + (file, + t->level1[i] == EMPTY + ? 0 + : (t->level1[i] << t->q) * sizeof (uint32_t) + level2_offset); + + for (i = 0; i < (t->level2_size << t->q); i++) + add_locale_uint32 + (file, + t->level2[i] == EMPTY + ? 0 + : (t->level2[i] << t->p) * sizeof (ELEMENT) + level3_offset); + + if (sizeof (ELEMENT) == 1) + add_locale_raw_data (file, t->level3, t->level3_size << t->p); + else if (sizeof (ELEMENT) == sizeof (uint32_t)) + add_locale_uint32_array (file, (uint32_t *) t->level3, + t->level3_size << t->p); + else + abort (); + align_locale_data (file, LOCFILE_ALIGN); + end_locale_structure (file); + + if (t->level1_alloc > 0) + free (t->level1); + if (t->level2_alloc > 0) + free (t->level2); + if (t->level3_alloc > 0) + free (t->level3); +} +#endif + +#undef EMPTY +#undef TABLE +#undef ELEMENT +#undef DEFAULT +#undef ITERATE +#undef NO_ADD_LOCALE diff --git a/REORG.TODO/locale/programs/charmap-dir.c b/REORG.TODO/locale/programs/charmap-dir.c new file mode 100644 index 0000000000..e55ab86e28 --- /dev/null +++ b/REORG.TODO/locale/programs/charmap-dir.c @@ -0,0 +1,309 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#include <dirent.h> +#include <errno.h> +#include <error.h> +#include <fcntl.h> +#include <libintl.h> +#include <spawn.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/stat.h> + +#include "localedef.h" +#include "charmap-dir.h" + +/* The data type of a charmap directory being traversed. */ +struct charmap_dir +{ + DIR *dir; + /* The directory pathname, ending in a slash. */ + char *directory; + size_t directory_len; + /* Scratch area used for returning pathnames. */ + char *pathname; + size_t pathname_size; +}; + +/* Starts a charmap directory traversal. + Returns a CHARMAP_DIR, or NULL if the directory doesn't exist. */ +CHARMAP_DIR * +charmap_opendir (const char *directory) +{ + struct charmap_dir *cdir; + DIR *dir; + size_t len; + int add_slash; + + dir = opendir (directory); + if (dir == NULL) + { + WITH_CUR_LOCALE (error (1, errno, gettext ("\ +cannot read character map directory `%s'"), directory)); + return NULL; + } + + cdir = (struct charmap_dir *) xmalloc (sizeof (struct charmap_dir)); + cdir->dir = dir; + + len = strlen (directory); + add_slash = (len == 0 || directory[len - 1] != '/'); + cdir->directory = (char *) xmalloc (len + add_slash + 1); + memcpy (cdir->directory, directory, len); + if (add_slash) + cdir->directory[len] = '/'; + cdir->directory[len + add_slash] = '\0'; + cdir->directory_len = len + add_slash; + + cdir->pathname = NULL; + cdir->pathname_size = 0; + + return cdir; +} + +/* Reads the next directory entry. + Returns its charmap name, or NULL if past the last entry or upon error. + The storage returned may be overwritten by a later charmap_readdir + call on the same CHARMAP_DIR. */ +const char * +charmap_readdir (CHARMAP_DIR *cdir) +{ + for (;;) + { + struct dirent64 *dirent; + size_t len; + size_t size; + char *filename; + mode_t mode; + + dirent = readdir64 (cdir->dir); + if (dirent == NULL) + return NULL; + if (strcmp (dirent->d_name, ".") == 0) + continue; + if (strcmp (dirent->d_name, "..") == 0) + continue; + + len = strlen (dirent->d_name); + + size = cdir->directory_len + len + 1; + if (size > cdir->pathname_size) + { + free (cdir->pathname); + if (size < 2 * cdir->pathname_size) + size = 2 * cdir->pathname_size; + cdir->pathname = (char *) xmalloc (size); + cdir->pathname_size = size; + } + + stpcpy (stpcpy (cdir->pathname, cdir->directory), dirent->d_name); + filename = cdir->pathname + cdir->directory_len; + +#ifdef _DIRENT_HAVE_D_TYPE + if (dirent->d_type != DT_UNKNOWN && dirent->d_type != DT_LNK) + mode = DTTOIF (dirent->d_type); + else +#endif + { + struct stat64 statbuf; + + if (stat64 (cdir->pathname, &statbuf) < 0) + continue; + + mode = statbuf.st_mode; + } + + if (!S_ISREG (mode)) + continue; + + /* For compressed charmaps, the canonical charmap name does not + include the extension. */ + if (len > 3 && memcmp (&filename[len - 3], ".gz", 3) == 0) + filename[len - 3] = '\0'; + else if (len > 4 && memcmp (&filename[len - 4], ".bz2", 4) == 0) + filename[len - 4] = '\0'; + + return filename; + } +} + +/* Finishes a charmap directory traversal, and frees the resources + attached to the CHARMAP_DIR. */ +int +charmap_closedir (CHARMAP_DIR *cdir) +{ + DIR *dir = cdir->dir; + + free (cdir->directory); + free (cdir->pathname); + free (cdir); + return closedir (dir); +} + +/* Creates a subprocess decompressing the given pathname, and returns + a stream reading its output (the decompressed data). */ +static +FILE * +fopen_uncompressed (const char *pathname, const char *compressor) +{ + int pfd; + + pfd = open (pathname, O_RDONLY); + if (pfd >= 0) + { + struct stat64 statbuf; + int fd[2]; + + if (fstat64 (pfd, &statbuf) >= 0 + && S_ISREG (statbuf.st_mode) + && pipe (fd) >= 0) + { + char *argv[4] + = { (char *) compressor, (char *) "-d", (char *) "-c", NULL }; + posix_spawn_file_actions_t actions; + + if (posix_spawn_file_actions_init (&actions) == 0) + { + if (posix_spawn_file_actions_adddup2 (&actions, + fd[1], STDOUT_FILENO) == 0 + && posix_spawn_file_actions_addclose (&actions, fd[1]) == 0 + && posix_spawn_file_actions_addclose (&actions, fd[0]) == 0 + && posix_spawn_file_actions_adddup2 (&actions, + pfd, STDIN_FILENO) == 0 + && posix_spawn_file_actions_addclose (&actions, pfd) == 0 + && posix_spawnp (NULL, compressor, &actions, NULL, + argv, environ) == 0) + { + posix_spawn_file_actions_destroy (&actions); + close (fd[1]); + close (pfd); + return fdopen (fd[0], "r"); + } + posix_spawn_file_actions_destroy (&actions); + } + close (fd[1]); + close (fd[0]); + } + close (pfd); + } + return NULL; +} + +/* Opens a charmap for reading, given its name (not an alias name). */ +FILE * +charmap_open (const char *directory, const char *name) +{ + size_t dlen = strlen (directory); + int add_slash = (dlen == 0 || directory[dlen - 1] != '/'); + size_t nlen = strlen (name); + char *pathname; + char *p; + FILE *stream; + + pathname = alloca (dlen + add_slash + nlen + 5); + p = stpcpy (pathname, directory); + if (add_slash) + *p++ = '/'; + p = stpcpy (p, name); + + stream = fopen (pathname, "rm"); + if (stream != NULL) + return stream; + + memcpy (p, ".gz", 4); + stream = fopen_uncompressed (pathname, "gzip"); + if (stream != NULL) + return stream; + + memcpy (p, ".bz2", 5); + stream = fopen_uncompressed (pathname, "bzip2"); + if (stream != NULL) + return stream; + + return NULL; +} + +/* An empty alias list. Avoids the need to return NULL from + charmap_aliases. */ +static char *empty[1]; + +/* Returns a NULL terminated list of alias names of a charmap. */ +char ** +charmap_aliases (const char *directory, const char *name) +{ + FILE *stream; + char **aliases; + size_t naliases; + + stream = charmap_open (directory, name); + if (stream == NULL) + return empty; + + aliases = NULL; + naliases = 0; + + while (!feof (stream)) + { + char *alias = NULL; + char junk[BUFSIZ]; + + if (fscanf (stream, " <code_set_name> %ms", &alias) == 1 + || fscanf (stream, "%% alias %ms", &alias) == 1) + { + aliases = (char **) xrealloc (aliases, + (naliases + 2) * sizeof (char *)); + aliases[naliases++] = alias; + } + + /* Read the rest of the line. */ + if (fgets (junk, sizeof junk, stream) != NULL) + { + if (strstr (junk, "CHARMAP") != NULL) + /* We cannot expect more aliases from now on. */ + break; + + while (strchr (junk, '\n') == NULL + && fgets (junk, sizeof junk, stream) != NULL) + continue; + } + } + + fclose (stream); + + if (naliases == 0) + return empty; + + aliases[naliases] = NULL; + return aliases; +} + +/* Frees an alias list returned by charmap_aliases. */ +void +charmap_free_aliases (char **aliases) +{ + if (aliases != empty) + { + char **p; + + for (p = aliases; *p; p++) + free (*p); + + free (aliases); + } +} diff --git a/REORG.TODO/locale/programs/charmap-dir.h b/REORG.TODO/locale/programs/charmap-dir.h new file mode 100644 index 0000000000..c27d7fe614 --- /dev/null +++ b/REORG.TODO/locale/programs/charmap-dir.h @@ -0,0 +1,46 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifndef _CHARMAP_DIR_H +#define _CHARMAP_DIR_H 1 + +/* The data type of a charmap directory being traversed. */ +typedef struct charmap_dir CHARMAP_DIR; + +/* Starts a charmap directory traversal. + Returns a CHARMAP_DIR, or NULL if the directory doesn't exist. */ +extern CHARMAP_DIR *charmap_opendir (const char *directory); + +/* Reads the next directory entry. + Returns its charmap name, or NULL if past the last entry or upon error. + The storage returned may be overwritten by a later charmap_readdir + call on the same CHARMAP_DIR. */ +extern const char *charmap_readdir (CHARMAP_DIR *dir); + +/* Finishes a charmap directory traversal, and frees the resources + attached to the CHARMAP_DIR. */ +extern int charmap_closedir (CHARMAP_DIR *dir); + +/* Returns a NULL terminated list of alias names of a charmap. */ +extern char **charmap_aliases (const char *directory, const char *name); + +/* Frees an alias list returned by charmap_aliases. */ +extern void charmap_free_aliases (char **aliases); + +/* Opens a charmap for reading, given its name (not an alias name). */ +extern FILE *charmap_open (const char *directory, const char *name); + +#endif /* _CHARMAP_DIR_H */ diff --git a/REORG.TODO/locale/programs/charmap-kw.gperf b/REORG.TODO/locale/programs/charmap-kw.gperf new file mode 100644 index 0000000000..0ebdfeb26e --- /dev/null +++ b/REORG.TODO/locale/programs/charmap-kw.gperf @@ -0,0 +1,42 @@ +%{ +/* Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper, <drepper@gnu.org>. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +#include "locfile-token.h" +%} +struct keyword_t ; +%% +code_set_name, tok_code_set_name, 1 +mb_cur_max, tok_mb_cur_max, 1 +mb_cur_min, tok_mb_cur_min, 1 +escape_char, tok_escape_char, 1 +comment_char, tok_comment_char, 1 +g0esc, tok_g0esc, 1 +g1esc, tok_g1esc, 1 +g2esc, tok_g2esc, 1 +g3esc, tok_g3esc, 1 +escseq, tok_escseq, 1 +addset, tok_addset, 1 +include, tok_include, 1 +CHARMAP, tok_charmap, 0 +END, tok_end, 0 +WIDTH, tok_width, 0 +WIDTH_VARIABLE, tok_width_variable, 0 +WIDTH_DEFAULT, tok_width_default, 0 diff --git a/REORG.TODO/locale/programs/charmap-kw.h b/REORG.TODO/locale/programs/charmap-kw.h new file mode 100644 index 0000000000..9e2969c4a1 --- /dev/null +++ b/REORG.TODO/locale/programs/charmap-kw.h @@ -0,0 +1,195 @@ +/* ANSI-C code produced by gperf version 3.0.4 */ +/* Command-line: gperf -acCgopt -k'1,2,5,9,$' -L ANSI-C -N charmap_hash charmap-kw.gperf */ + +#ifa' == 97) && ('b' == 98) \ + && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \ + && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \ + && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \ + && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \ + && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \ + && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \ + && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126)) +/* The character set is not based on ISO-646. */ +#error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gnu-gperf@gnu.org>." +#endif + +#line 1 "charmap-kw.gperf" + +/* Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper, <drepper@gnu.org>. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +#include "locfile-token.h" +#line 24 "charmap-kw.gperf" +struct keyword_t ; + +#define TOTAL_KEYWORDS 17 +#define MIN_WORD_LENGTH 3 +#define MAX_WORD_LENGTH 14 +#define MIN_HASH_VALUE 3 +#define MAX_HASH_VALUE 35 +/* maximum key range = 33, duplicates = 0 */ + +#ifdef __GNUC__ +__inline +#else +#ifdef __cplusplus +inline +#endif +#endif +static unsigned int +hash (register const char *str, register unsigned int len) +{ + static const unsigned char asso_values[] = + { + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 25, 20, + 15, 10, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 5, 0, 0, + 5, 36, 0, 0, 36, 36, 36, 5, 0, 36, + 0, 36, 0, 36, 0, 36, 36, 0, 36, 36, + 36, 36, 36, 36, 36, 0, 36, 5, 0, 0, + 5, 0, 36, 5, 0, 0, 36, 36, 36, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 36, 36, + 0, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36 + }; + register int hval = len; + + switch (hval) + { + default: + hval += asso_values[(unsigned char)str[8]]; + /*FALLTHROUGH*/ + case 8: + case 7: + case 6: + case 5: + hval += asso_values[(unsigned char)str[4]]; + /*FALLTHROUGH*/ + case 4: + case 3: + case 2: + hval += asso_values[(unsigned char)str[1]]; + /*FALLTHROUGH*/ + case 1: + hval += asso_values[(unsigned char)str[0]]; + break; + } + return hval + asso_values[(unsigned char)str[len - 1]]; +} + +#ifdef __GNUC__ +__inline +#if defined __GNUC_STDC_INLINE__ || defined __GNUC_GNU_INLINE__ +__attribute__ ((__gnu_inline__)) +#endif +#endif +const struct keyword_t * +charmap_hash (register const char *str, register unsigned int len) +{ + static const struct keyword_t wordlist[] = + { + {""}, {""}, {""}, +#line 39 "charmap-kw.gperf" + {"END", tok_end, 0}, + {""}, +#line 40 "charmap-kw.gperf" + {"WIDTH", tok_width, 0}, +#line 35 "charmap-kw.gperf" + {"escseq", tok_escseq, 1}, +#line 37 "charmap-kw.gperf" + {"include", tok_include, 1}, + {""}, {""}, +#line 28 "charmap-kw.gperf" + {"mb_cur_min", tok_mb_cur_min, 1}, +#line 29 "charmap-kw.gperf" + {"escape_char", tok_escape_char, 1}, +#line 30 "charmap-kw.gperf" + {"comment_char", tok_comment_char, 1}, +#line 26 "charmap-kw.gperf" + {"code_set_name", tok_code_set_name, 1}, +#line 41 "charmap-kw.gperf" + {"WIDTH_VARIABLE", tok_width_variable, 0}, +#line 27 "charmap-kw.gperf" + {"mb_cur_max", tok_mb_cur_max, 1}, +#line 36 "charmap-kw.gperf" + {"addset", tok_addset, 1}, +#line 38 "charmap-kw.gperf" + {"CHARMAP", tok_charmap, 0}, +#line 42 "charmap-kw.gperf" + {"WIDTH_DEFAULT", tok_width_default, 0}, + {""}, +#line 34 "charmap-kw.gperf" + {"g3esc", tok_g3esc, 1}, + {""}, {""}, {""}, {""}, +#line 33 "charmap-kw.gperf" + {"g2esc", tok_g2esc, 1}, + {""}, {""}, {""}, {""}, +#line 32 "charmap-kw.gperf" + {"g1esc", tok_g1esc, 1}, + {""}, {""}, {""}, {""}, +#line 31 "charmap-kw.gperf" + {"g0esc", tok_g0esc, 1} + }; + + if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH) + { + register int key = hash (str, len); + + if (key <= MAX_HASH_VALUE && key >= 0) + { + register const char *s = wordlist[key].name; + + if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0') + return &wordlist[key]; + } + } + return 0; +} diff --git a/REORG.TODO/locale/programs/charmap.c b/REORG.TODO/locale/programs/charmap.c new file mode 100644 index 0000000000..129aefffc1 --- /dev/null +++ b/REORG.TODO/locale/programs/charmap.c @@ -0,0 +1,1104 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@gnu.org>, 1996. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <ctype.h> +#include <errno.h> +#include <libintl.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <error.h> +#include <stdint.h> + +#include "localedef.h" +#include "linereader.h" +#include "charmap.h" +#include "charmap-dir.h" + +#include <assert.h> + + +/* Define the lookup function. */ +#include "charmap-kw.h" + + +/* Prototypes for local functions. */ +static struct charmap_t *parse_charmap (struct linereader *cmfile, + int verbose, int be_quiet); +static void new_width (struct linereader *cmfile, struct charmap_t *result, + const char *from, const char *to, + unsigned long int width); +static void charmap_new_char (struct linereader *lr, struct charmap_t *cm, + size_t nbytes, unsigned char *bytes, + const char *from, const char *to, + int decimal_ellipsis, int step); + + +bool enc_not_ascii_compatible; + + +#ifdef NEED_NULL_POINTER +static const char *null_pointer; +#endif + +static struct linereader * +cmlr_open (const char *directory, const char *name, kw_hash_fct_t hf) +{ + FILE *fp; + + fp = charmap_open (directory, name); + if (fp == NULL) + return NULL; + else + { + size_t dlen = strlen (directory); + int add_slash = (dlen == 0 || directory[dlen - 1] != '/'); + size_t nlen = strlen (name); + char *pathname; + char *p; + + pathname = alloca (dlen + add_slash + nlen + 1); + p = stpcpy (pathname, directory); + if (add_slash) + *p++ = '/'; + stpcpy (p, name); + + return lr_create (fp, pathname, hf); + } +} + +struct charmap_t * +charmap_read (const char *filename, int verbose, int error_not_found, + int be_quiet, int use_default) +{ + struct charmap_t *result = NULL; + + if (filename != NULL) + { + struct linereader *cmfile; + + /* First try the name as found in the parameter. */ + cmfile = lr_open (filename, charmap_hash); + if (cmfile == NULL) + { + /* No successful. So start looking through the directories + in the I18NPATH if this is a simple name. */ + if (strchr (filename, '/') == NULL) + { + char *i18npath = getenv ("I18NPATH"); + if (i18npath != NULL && *i18npath != '\0') + { + const size_t pathlen = strlen (i18npath); + char i18npathbuf[pathlen + 1]; + char path[pathlen + sizeof ("/charmaps")]; + char *next; + i18npath = memcpy (i18npathbuf, i18npath, pathlen + 1); + + while (cmfile == NULL + && (next = strsep (&i18npath, ":")) != NULL) + { + stpcpy (stpcpy (path, next), "/charmaps"); + cmfile = cmlr_open (path, filename, charmap_hash); + + if (cmfile == NULL) + /* Try without the "/charmaps" part. */ + cmfile = cmlr_open (next, filename, charmap_hash); + } + } + + if (cmfile == NULL) + /* Try the default directory. */ + cmfile = cmlr_open (CHARMAP_PATH, filename, charmap_hash); + } + } + + if (cmfile != NULL) + result = parse_charmap (cmfile, verbose, be_quiet); + + if (result == NULL && error_not_found) + WITH_CUR_LOCALE (error (0, errno, _("\ +character map file `%s' not found"), filename)); + } + + if (result == NULL && filename != NULL && strchr (filename, '/') == NULL) + { + /* OK, one more try. We also accept the names given to the + character sets in the files. Sometimes they differ from the + file name. */ + CHARMAP_DIR *dir; + + dir = charmap_opendir (CHARMAP_PATH); + if (dir != NULL) + { + const char *dirent; + + while ((dirent = charmap_readdir (dir)) != NULL) + { + char **aliases; + char **p; + int found; + + aliases = charmap_aliases (CHARMAP_PATH, dirent); + found = 0; + for (p = aliases; *p; p++) + if (strcasecmp (*p, filename) == 0) + { + found = 1; + break; + } + charmap_free_aliases (aliases); + + if (found) + { + struct linereader *cmfile; + + cmfile = cmlr_open (CHARMAP_PATH, dirent, charmap_hash); + if (cmfile != NULL) + result = parse_charmap (cmfile, verbose, be_quiet); + + break; + } + } + + charmap_closedir (dir); + } + } + + if (result == NULL && DEFAULT_CHARMAP != NULL) + { + struct linereader *cmfile; + + cmfile = cmlr_open (CHARMAP_PATH, DEFAULT_CHARMAP, charmap_hash); + if (cmfile != NULL) + result = parse_charmap (cmfile, verbose, be_quiet); + + if (result == NULL) + WITH_CUR_LOCALE (error (4, errno, _("\ +default character map file `%s' not found"), DEFAULT_CHARMAP)); + } + + if (result != NULL && result->code_set_name == NULL) + /* The input file does not specify a code set name. This + shouldn't happen but we should cope with it. */ + result->code_set_name = basename (filename); + + /* Test of ASCII compatibility of locale encoding. + + Verify that the encoding to be used in a locale is ASCII compatible, + at least for the graphic characters, excluding the control characters, + '$' and '@'. This constraint comes from an ISO C 99 restriction. + + ISO C 99 section 7.17.(2) (about wchar_t): + the null character shall have the code value zero and each member of + the basic character set shall have a code value equal to its value + when used as the lone character in an integer character constant. + ISO C 99 section 5.2.1.(3): + Both the basic source and basic execution character sets shall have + the following members: the 26 uppercase letters of the Latin alphabet + A B C D E F G H I J K L M N O P Q R S T U V W X Y Z + the 26 lowercase letters of the Latin alphabet + a b c d e f g h i j k l m n o p q r s t u v w x y z + the 10 decimal digits + 0 1 2 3 4 5 6 7 8 9 + the following 29 graphic characters + ! " # % & ' ( ) * + , - . / : ; < = > ? [ \ ] ^ _ { | } ~ + the space character, and control characters representing horizontal + tab, vertical tab, and form feed. + + Therefore, for all members of the "basic character set", the 'char' code + must have the same value as the 'wchar_t' code, which in glibc is the + same as the Unicode code, which for all of the enumerated characters + is identical to the ASCII code. */ + if (result != NULL && use_default) + { + static const char basic_charset[] = + { + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', + 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', + 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', + '!', '"', '#', '%', '&', '\'', '(', ')', '*', '+', ',', '-', + '.', '/', ':', ';', '<', '=', '>', '?', '[', '\\', ']', '^', + '_', '{', '|', '}', '~', ' ', '\t', '\v', '\f', '\0' + }; + int failed = 0; + const char *p = basic_charset; + + do + { + struct charseq *seq = charmap_find_symbol (result, p, 1); + + if (seq == NULL || seq->ucs4 != (uint32_t) *p) + failed = 1; + } + while (*p++ != '\0'); + + if (failed) + { + WITH_CUR_LOCALE (fprintf (stderr, _("\ +character map `%s' is not ASCII compatible, locale not ISO C compliant\n"), + result->code_set_name)); + enc_not_ascii_compatible = true; + } + } + + return result; +} + + +static struct charmap_t * +parse_charmap (struct linereader *cmfile, int verbose, int be_quiet) +{ + struct charmap_t *result; + int state; + enum token_t expected_tok = tok_error; + const char *expected_str = NULL; + char *from_name = NULL; + char *to_name = NULL; + enum token_t ellipsis = 0; + int step = 1; + + /* We don't want symbolic names in string to be translated. */ + cmfile->translate_strings = 0; + + /* Allocate room for result. */ + result = (struct charmap_t *) xmalloc (sizeof (struct charmap_t)); + memset (result, '\0', sizeof (struct charmap_t)); + /* The default DEFAULT_WIDTH is 1. */ + result->width_default = 1; + +#define obstack_chunk_alloc malloc +#define obstack_chunk_free free + obstack_init (&result->mem_pool); + + if (init_hash (&result->char_table, 256) + || init_hash (&result->byte_table, 256)) + { + free (result); + return NULL; + } + + /* We use a state machine to describe the charmap description file + format. */ + state = 1; + while (1) + { + /* What's on? */ + struct token *now = lr_token (cmfile, NULL, NULL, NULL, verbose); + enum token_t nowtok = now->tok; + struct token *arg; + + if (nowtok == tok_eof) + break; + + switch (state) + { + case 1: + /* The beginning. We expect the special declarations, EOL or + `CHARMAP'. */ + if (nowtok == tok_eol) + /* Ignore empty lines. */ + continue; + + if (nowtok == tok_charmap) + { + from_name = NULL; + to_name = NULL; + + /* We have to set up the real work. Fill in some + default values. */ + if (result->mb_cur_max == 0) + result->mb_cur_max = 1; + if (result->mb_cur_min == 0) + result->mb_cur_min = result->mb_cur_max; + if (result->mb_cur_min > result->mb_cur_max) + { + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: <mb_cur_max> must be greater than <mb_cur_min>\n"), + cmfile->fname)); + + result->mb_cur_min = result->mb_cur_max; + } + + lr_ignore_rest (cmfile, 1); + + state = 2; + continue; + } + + if (nowtok != tok_code_set_name && nowtok != tok_mb_cur_max + && nowtok != tok_mb_cur_min && nowtok != tok_escape_char + && nowtok != tok_comment_char && nowtok != tok_g0esc + && nowtok != tok_g1esc && nowtok != tok_g2esc + && nowtok != tok_g3esc && nowtok != tok_repertoiremap + && nowtok != tok_include) + { + lr_error (cmfile, _("syntax error in prolog: %s"), + _("invalid definition")); + + lr_ignore_rest (cmfile, 0); + continue; + } + + /* We know that we need an argument. */ + arg = lr_token (cmfile, NULL, NULL, NULL, verbose); + + switch (nowtok) + { + case tok_code_set_name: + case tok_repertoiremap: + if (arg->tok != tok_ident && arg->tok != tok_string) + { + badarg: + lr_error (cmfile, _("syntax error in prolog: %s"), + _("bad argument")); + + lr_ignore_rest (cmfile, 0); + continue; + } + + if (nowtok == tok_code_set_name) + result->code_set_name = obstack_copy0 (&result->mem_pool, + arg->val.str.startmb, + arg->val.str.lenmb); + else + result->repertoiremap = obstack_copy0 (&result->mem_pool, + arg->val.str.startmb, + arg->val.str.lenmb); + + lr_ignore_rest (cmfile, 1); + continue; + + case tok_mb_cur_max: + case tok_mb_cur_min: + if (arg->tok != tok_number) + goto badarg; + + if (verbose + && ((nowtok == tok_mb_cur_max + && result->mb_cur_max != 0) + || (nowtok == tok_mb_cur_max + && result->mb_cur_max != 0))) + lr_error (cmfile, _("duplicate definition of <%s>"), + nowtok == tok_mb_cur_min + ? "mb_cur_min" : "mb_cur_max"); + + if (arg->val.num < 1) + { + lr_error (cmfile, + _("value for <%s> must be 1 or greater"), + nowtok == tok_mb_cur_min + ? "mb_cur_min" : "mb_cur_max"); + + lr_ignore_rest (cmfile, 0); + continue; + } + if ((nowtok == tok_mb_cur_max && result->mb_cur_min != 0 + && (int) arg->val.num < result->mb_cur_min) + || (nowtok == tok_mb_cur_min && result->mb_cur_max != 0 + && (int) arg->val.num > result->mb_cur_max)) + { + lr_error (cmfile, _("\ +value of <%s> must be greater or equal than the value of <%s>"), + "mb_cur_max", "mb_cur_min"); + + lr_ignore_rest (cmfile, 0); + continue; + } + + if (nowtok == tok_mb_cur_max) + result->mb_cur_max = arg->val.num; + else + result->mb_cur_min = arg->val.num; + + lr_ignore_rest (cmfile, 1); + continue; + + case tok_escape_char: + case tok_comment_char: + if (arg->tok != tok_ident) + goto badarg; + + if (arg->val.str.lenmb != 1) + { + lr_error (cmfile, _("\ +argument to <%s> must be a single character"), + nowtok == tok_escape_char ? "escape_char" + : "comment_char"); + + lr_ignore_rest (cmfile, 0); + continue; + } + + if (nowtok == tok_escape_char) + cmfile->escape_char = *arg->val.str.startmb; + else + cmfile->comment_char = *arg->val.str.startmb; + + lr_ignore_rest (cmfile, 1); + continue; + + case tok_g0esc: + case tok_g1esc: + case tok_g2esc: + case tok_g3esc: + case tok_escseq: + lr_ignore_rest (cmfile, 0); /* XXX */ + continue; + + case tok_include: + lr_error (cmfile, _("\ +character sets with locking states are not supported")); + exit (4); + + default: + /* Cannot happen. */ + assert (! "Should not happen"); + } + break; + + case 2: + /* We have seen `CHARMAP' and now are in the body. Each line + must have the format "%s %s %s\n" or "%s...%s %s %s\n". */ + if (nowtok == tok_eol) + /* Ignore empty lines. */ + continue; + + if (nowtok == tok_end) + { + expected_tok = tok_charmap; + expected_str = "CHARMAP"; + state = 90; + continue; + } + + if (nowtok != tok_bsymbol && nowtok != tok_ucs4) + { + lr_error (cmfile, _("syntax error in %s definition: %s"), + "CHARMAP", _("no symbolic name given")); + + lr_ignore_rest (cmfile, 0); + continue; + } + + /* If the previous line was not completely correct free the + used memory. */ + if (from_name != NULL) + obstack_free (&result->mem_pool, from_name); + + if (nowtok == tok_bsymbol) + from_name = (char *) obstack_copy0 (&result->mem_pool, + now->val.str.startmb, + now->val.str.lenmb); + else + { + obstack_printf (&result->mem_pool, "U%08X", + cmfile->token.val.ucs4); + obstack_1grow (&result->mem_pool, '\0'); + from_name = (char *) obstack_finish (&result->mem_pool); + } + to_name = NULL; + + state = 3; + continue; + + case 3: + /* We have two possibilities: We can see an ellipsis or an + encoding value. */ + if (nowtok == tok_ellipsis3 || nowtok == tok_ellipsis4 + || nowtok == tok_ellipsis2 || nowtok == tok_ellipsis4_2 + || nowtok == tok_ellipsis2_2) + { + ellipsis = nowtok; + if (nowtok == tok_ellipsis4_2) + { + step = 2; + nowtok = tok_ellipsis4; + } + else if (nowtok == tok_ellipsis2_2) + { + step = 2; + nowtok = tok_ellipsis2; + } + state = 4; + continue; + } + /* FALLTHROUGH */ + + case 5: + if (nowtok != tok_charcode) + { + lr_error (cmfile, _("syntax error in %s definition: %s"), + "CHARMAP", _("invalid encoding given")); + + lr_ignore_rest (cmfile, 0); + + state = 2; + continue; + } + + if (now->val.charcode.nbytes < result->mb_cur_min) + lr_error (cmfile, _("too few bytes in character encoding")); + else if (now->val.charcode.nbytes > result->mb_cur_max) + lr_error (cmfile, _("too many bytes in character encoding")); + else + charmap_new_char (cmfile, result, now->val.charcode.nbytes, + now->val.charcode.bytes, from_name, to_name, + ellipsis != tok_ellipsis2, step); + + /* Ignore trailing comment silently. */ + lr_ignore_rest (cmfile, 0); + + from_name = NULL; + to_name = NULL; + ellipsis = tok_none; + step = 1; + + state = 2; + continue; + + case 4: + if (nowtok != tok_bsymbol && nowtok != tok_ucs4) + { + lr_error (cmfile, _("syntax error in %s definition: %s"), + "CHARMAP", + _("no symbolic name given for end of range")); + + lr_ignore_rest (cmfile, 0); + continue; + } + + /* Copy the to-name in a safe place. */ + if (nowtok == tok_bsymbol) + to_name = (char *) obstack_copy0 (&result->mem_pool, + cmfile->token.val.str.startmb, + cmfile->token.val.str.lenmb); + else + { + obstack_printf (&result->mem_pool, "U%08X", + cmfile->token.val.ucs4); + obstack_1grow (&result->mem_pool, '\0'); + to_name = (char *) obstack_finish (&result->mem_pool); + } + + state = 5; + continue; + + case 90: + if (nowtok != expected_tok) + lr_error (cmfile, _("\ +%1$s: definition does not end with `END %1$s'"), expected_str); + + lr_ignore_rest (cmfile, nowtok == expected_tok); + state = 91; + continue; + + case 91: + /* Waiting for WIDTH... */ + if (nowtok == tok_eol) + /* Ignore empty lines. */ + continue; + + if (nowtok == tok_width_default) + { + state = 92; + continue; + } + + if (nowtok == tok_width) + { + lr_ignore_rest (cmfile, 1); + state = 93; + continue; + } + + if (nowtok == tok_width_variable) + { + lr_ignore_rest (cmfile, 1); + state = 98; + continue; + } + + lr_error (cmfile, _("\ +only WIDTH definitions are allowed to follow the CHARMAP definition")); + + lr_ignore_rest (cmfile, 0); + continue; + + case 92: + if (nowtok != tok_number) + lr_error (cmfile, _("value for %s must be an integer"), + "WIDTH_DEFAULT"); + else + result->width_default = now->val.num; + + lr_ignore_rest (cmfile, nowtok == tok_number); + + state = 91; + continue; + + case 93: + /* We now expect `END WIDTH' or lines of the format "%s %d\n" or + "%s...%s %d\n". */ + if (nowtok == tok_eol) + /* ignore empty lines. */ + continue; + + if (nowtok == tok_end) + { + expected_tok = tok_width; + expected_str = "WIDTH"; + state = 90; + continue; + } + + if (nowtok != tok_bsymbol && nowtok != tok_ucs4) + { + lr_error (cmfile, _("syntax error in %s definition: %s"), + "WIDTH", _("no symbolic name given")); + + lr_ignore_rest (cmfile, 0); + continue; + } + + if (from_name != NULL) + obstack_free (&result->mem_pool, from_name); + + if (nowtok == tok_bsymbol) + from_name = (char *) obstack_copy0 (&result->mem_pool, + now->val.str.startmb, + now->val.str.lenmb); + else + { + obstack_printf (&result->mem_pool, "U%08X", + cmfile->token.val.ucs4); + obstack_1grow (&result->mem_pool, '\0'); + from_name = (char *) obstack_finish (&result->mem_pool); + } + + to_name = NULL; + + state = 94; + continue; + + case 94: + if (nowtok == tok_ellipsis3) + { + state = 95; + continue; + } + + case 96: + if (nowtok != tok_number) + lr_error (cmfile, _("value for %s must be an integer"), + "WIDTH"); + else + { + /* Store width for chars. */ + new_width (cmfile, result, from_name, to_name, now->val.num); + + from_name = NULL; + to_name = NULL; + } + + lr_ignore_rest (cmfile, nowtok == tok_number); + + state = 93; + continue; + + case 95: + if (nowtok != tok_bsymbol && nowtok != tok_ucs4) + { + lr_error (cmfile, _("syntax error in %s definition: %s"), + "WIDTH", _("no symbolic name given for end of range")); + + lr_ignore_rest (cmfile, 0); + + state = 93; + continue; + } + + if (nowtok == tok_bsymbol) + to_name = (char *) obstack_copy0 (&result->mem_pool, + now->val.str.startmb, + now->val.str.lenmb); + else + { + obstack_printf (&result->mem_pool, "U%08X", + cmfile->token.val.ucs4); + obstack_1grow (&result->mem_pool, '\0'); + to_name = (char *) obstack_finish (&result->mem_pool); + } + + state = 96; + continue; + + case 98: + /* We now expect `END WIDTH_VARIABLE' or lines of the format + "%s\n" or "%s...%s\n". */ + if (nowtok == tok_eol) + /* ignore empty lines. */ + continue; + + if (nowtok == tok_end) + { + expected_tok = tok_width_variable; + expected_str = "WIDTH_VARIABLE"; + state = 90; + continue; + } + + if (nowtok != tok_bsymbol && nowtok != tok_ucs4) + { + lr_error (cmfile, _("syntax error in %s definition: %s"), + "WIDTH_VARIABLE", _("no symbolic name given")); + + lr_ignore_rest (cmfile, 0); + + continue; + } + + if (from_name != NULL) + obstack_free (&result->mem_pool, from_name); + + if (nowtok == tok_bsymbol) + from_name = (char *) obstack_copy0 (&result->mem_pool, + now->val.str.startmb, + now->val.str.lenmb); + else + { + obstack_printf (&result->mem_pool, "U%08X", + cmfile->token.val.ucs4); + obstack_1grow (&result->mem_pool, '\0'); + from_name = (char *) obstack_finish (&result->mem_pool); + } + to_name = NULL; + + state = 99; + continue; + + case 99: + if (nowtok == tok_ellipsis3) + state = 100; + + /* Store info. */ + from_name = NULL; + + /* Warn */ + state = 98; + continue; + + case 100: + if (nowtok != tok_bsymbol && nowtok != tok_ucs4) + { + lr_error (cmfile, _("syntax error in %s definition: %s"), + "WIDTH_VARIABLE", + _("no symbolic name given for end of range")); + lr_ignore_rest (cmfile, 0); + continue; + } + + if (nowtok == tok_bsymbol) + to_name = (char *) obstack_copy0 (&result->mem_pool, + now->val.str.startmb, + now->val.str.lenmb); + else + { + obstack_printf (&result->mem_pool, "U%08X", + cmfile->token.val.ucs4); + obstack_1grow (&result->mem_pool, '\0'); + to_name = (char *) obstack_finish (&result->mem_pool); + } + + /* XXX Enter value into table. */ + + lr_ignore_rest (cmfile, 1); + + state = 98; + continue; + + default: + WITH_CUR_LOCALE (error (5, 0, _("%s: error in state machine"), + __FILE__)); + /* NOTREACHED */ + } + break; + } + + if (state != 91 && !be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("%s: premature end of file"), + cmfile->fname)); + + lr_close (cmfile); + + return result; +} + + +static void +new_width (struct linereader *cmfile, struct charmap_t *result, + const char *from, const char *to, unsigned long int width) +{ + struct charseq *from_val; + struct charseq *to_val; + + from_val = charmap_find_value (result, from, strlen (from)); + if (from_val == NULL) + { + lr_error (cmfile, _("unknown character `%s'"), from); + return; + } + + if (to == NULL) + to_val = from_val; + else + { + to_val = charmap_find_value (result, to, strlen (to)); + if (to_val == NULL) + { + lr_error (cmfile, _("unknown character `%s'"), to); + return; + } + + /* Make sure the number of bytes for the end points of the range + is correct. */ + if (from_val->nbytes != to_val->nbytes) + { + lr_error (cmfile, _("\ +number of bytes for byte sequence of beginning and end of range not the same: %d vs %d"), + from_val->nbytes, to_val->nbytes); + return; + } + } + + if (result->nwidth_rules >= result->nwidth_rules_max) + { + size_t new_size = result->nwidth_rules + 32; + struct width_rule *new_rules = + (struct width_rule *) obstack_alloc (&result->mem_pool, + (new_size + * sizeof (struct width_rule))); + + memcpy (new_rules, result->width_rules, + result->nwidth_rules_max * sizeof (struct width_rule)); + + result->width_rules = new_rules; + result->nwidth_rules_max = new_size; + } + + result->width_rules[result->nwidth_rules].from = from_val; + result->width_rules[result->nwidth_rules].to = to_val; + result->width_rules[result->nwidth_rules].width = (unsigned int) width; + ++result->nwidth_rules; +} + + +struct charseq * +charmap_find_value (const struct charmap_t *cm, const char *name, size_t len) +{ + void *result; + + return (find_entry ((hash_table *) &cm->char_table, name, len, &result) + < 0 ? NULL : (struct charseq *) result); +} + + +static void +charmap_new_char (struct linereader *lr, struct charmap_t *cm, + size_t nbytes, unsigned char *bytes, + const char *from, const char *to, + int decimal_ellipsis, int step) +{ + hash_table *ht = &cm->char_table; + hash_table *bt = &cm->byte_table; + struct obstack *ob = &cm->mem_pool; + char *from_end; + char *to_end; + const char *cp; + int prefix_len, len1, len2; + unsigned int from_nr, to_nr, cnt; + struct charseq *newp; + + len1 = strlen (from); + + if (to == NULL) + { + newp = (struct charseq *) obstack_alloc (ob, sizeof (*newp) + nbytes); + newp->nbytes = nbytes; + memcpy (newp->bytes, bytes, nbytes); + newp->name = from; + + newp->ucs4 = UNINITIALIZED_CHAR_VALUE; + if ((from[0] == 'U' || from[0] == 'P') && (len1 == 5 || len1 == 9)) + { + /* Maybe the name is of the form `Uxxxx' or `Uxxxxxxxx' where + xxxx and xxxxxxxx are hexadecimal numbers. In this case + we use the value of xxxx or xxxxxxxx as the UCS4 value of + this character and we don't have to consult the repertoire + map. + + If the name is of the form `Pxxxx' or `Pxxxxxxxx' the xxxx + and xxxxxxxx also give the code point in UCS4 but this must + be in the private, i.e., unassigned, area. This should be + used for characters which do not (yet) have an equivalent + in ISO 10646 and Unicode. */ + char *endp; + + errno = 0; + newp->ucs4 = strtoul (from + 1, &endp, 16); + if (endp - from != len1 + || (newp->ucs4 == ~((uint32_t) 0) && errno == ERANGE) + || newp->ucs4 >= 0x80000000) + /* This wasn't successful. Signal this name cannot be a + correct UCS value. */ + newp->ucs4 = UNINITIALIZED_CHAR_VALUE; + } + + insert_entry (ht, from, len1, newp); + insert_entry (bt, newp->bytes, nbytes, newp); + /* Please note that it isn't a bug if a symbol is defined more + than once. All later definitions are simply discarded. */ + return; + } + + /* We have a range: the names must have names with equal prefixes + and an equal number of digits, where the second number is greater + or equal than the first. */ + len2 = strlen (to); + + if (len1 != len2) + { + illegal_range: + lr_error (lr, _("invalid names for character range")); + return; + } + + cp = &from[len1 - 1]; + if (decimal_ellipsis) + while (isdigit (*cp) && cp >= from) + --cp; + else + while (isxdigit (*cp) && cp >= from) + { + if (!isdigit (*cp) && !isupper (*cp)) + lr_error (lr, _("\ +hexadecimal range format should use only capital characters")); + --cp; + } + + prefix_len = (cp - from) + 1; + + if (cp == &from[len1 - 1] || strncmp (from, to, prefix_len) != 0) + goto illegal_range; + + errno = 0; + from_nr = strtoul (&from[prefix_len], &from_end, decimal_ellipsis ? 10 : 16); + if (*from_end != '\0' || (from_nr == UINT_MAX && errno == ERANGE) + || ((to_nr = strtoul (&to[prefix_len], &to_end, + decimal_ellipsis ? 10 : 16)) == UINT_MAX + && errno == ERANGE) + || *to_end != '\0') + { + lr_error (lr, _("<%s> and <%s> are invalid names for range"), from, to); + return; + } + + if (from_nr > to_nr) + { + lr_error (lr, _("upper limit in range is smaller than lower limit")); + return; + } + + for (cnt = from_nr; cnt <= to_nr; cnt += step) + { + char *name_end; + obstack_printf (ob, decimal_ellipsis ? "%.*s%0*d" : "%.*s%0*X", + prefix_len, from, len1 - prefix_len, cnt); + obstack_1grow (ob, '\0'); + name_end = obstack_finish (ob); + + newp = (struct charseq *) obstack_alloc (ob, sizeof (*newp) + nbytes); + newp->nbytes = nbytes; + memcpy (newp->bytes, bytes, nbytes); + newp->name = name_end; + + newp->ucs4 = UNINITIALIZED_CHAR_VALUE; + if ((name_end[0] == 'U' || name_end[0] == 'P') + && (len1 == 5 || len1 == 9)) + { + /* Maybe the name is of the form `Uxxxx' or `Uxxxxxxxx' where + xxxx and xxxxxxxx are hexadecimal numbers. In this case + we use the value of xxxx or xxxxxxxx as the UCS4 value of + this character and we don't have to consult the repertoire + map. + + If the name is of the form `Pxxxx' or `Pxxxxxxxx' the xxxx + and xxxxxxxx also give the code point in UCS4 but this must + be in the private, i.e., unassigned, area. This should be + used for characters which do not (yet) have an equivalent + in ISO 10646 and Unicode. */ + char *endp; + + errno = 0; + newp->ucs4 = strtoul (name_end + 1, &endp, 16); + if (endp - name_end != len1 + || (newp->ucs4 == ~((uint32_t) 0) && errno == ERANGE) + || newp->ucs4 >= 0x80000000) + /* This wasn't successful. Signal this name cannot be a + correct UCS value. */ + newp->ucs4 = UNINITIALIZED_CHAR_VALUE; + } + + insert_entry (ht, name_end, len1, newp); + insert_entry (bt, newp->bytes, nbytes, newp); + /* Please note we don't examine the return value since it is no error + if we have two definitions for a symbol. */ + + /* Increment the value in the byte sequence. */ + if (++bytes[nbytes - 1] == '\0') + { + int b = nbytes - 2; + + do + if (b < 0) + { + lr_error (lr, + _("resulting bytes for range not representable.")); + return; + } + while (++bytes[b--] == 0); + } + } +} + + +struct charseq * +charmap_find_symbol (const struct charmap_t *cm, const char *bytes, + size_t nbytes) +{ + void *result; + + return (find_entry ((hash_table *) &cm->byte_table, bytes, nbytes, &result) + < 0 ? NULL : (struct charseq *) result); +} diff --git a/REORG.TODO/locale/programs/charmap.h b/REORG.TODO/locale/programs/charmap.h new file mode 100644 index 0000000000..5d6b48f59c --- /dev/null +++ b/REORG.TODO/locale/programs/charmap.h @@ -0,0 +1,84 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@gnu.org>, 1996. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifndef _CHARMAP_H +#define _CHARMAP_H + +#include <obstack.h> +#include <stdbool.h> +#include <stdint.h> + +#include "repertoire.h" +#include "simple-hash.h" + + +struct width_rule +{ + struct charseq *from; + struct charseq *to; + unsigned int width; +}; + + +struct charmap_t +{ + const char *code_set_name; + const char *repertoiremap; + int mb_cur_min; + int mb_cur_max; + + struct width_rule *width_rules; + size_t nwidth_rules; + size_t nwidth_rules_max; + unsigned int width_default; + + struct obstack mem_pool; + hash_table char_table; + hash_table byte_table; + hash_table ucs4_table; +}; + + +/* This is the structure used for entries in the hash table. It represents + the sequence of bytes used for the coded character. */ +struct charseq +{ + const char *name; + uint32_t ucs4; + int nbytes; + unsigned char bytes[0]; +}; + + +/* True if the encoding is not ASCII compatible. */ +extern bool enc_not_ascii_compatible; + + +/* Prototypes for charmap handling functions. */ +extern struct charmap_t *charmap_read (const char *filename, int verbose, + int error_not_found, int be_quiet, + int use_default); + +/* Return the value stored under the given key in the hashing table. */ +extern struct charseq *charmap_find_value (const struct charmap_t *charmap, + const char *name, size_t len); + +/* Return symbol for given multibyte sequence. */ +extern struct charseq *charmap_find_symbol (const struct charmap_t *charmap, + const char *name, size_t len); + +#endif /* charmap.h */ diff --git a/REORG.TODO/locale/programs/config.h b/REORG.TODO/locale/programs/config.h new file mode 100644 index 0000000000..5b416be0d8 --- /dev/null +++ b/REORG.TODO/locale/programs/config.h @@ -0,0 +1,35 @@ +/* Configuration for localedef program. + Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1995. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifndef _LD_CONFIG_H +#define _LD_CONFIG_H 1 + +/* Use the internal textdomain used for libc messages. */ +#define PACKAGE _libc_intl_domainname +#ifndef VERSION +/* Get libc version number. */ +#include "../../version.h" +#endif + +#define DEFAULT_CHARMAP "ANSI_X3.4-1968" /* ASCII */ + +/* This must be one higer than the last used LC_xxx category value. */ +#define __LC_LAST 13 + +#include_next <config.h> +#endif diff --git a/REORG.TODO/locale/programs/ld-address.c b/REORG.TODO/locale/programs/ld-address.c new file mode 100644 index 0000000000..2488a5ce5c --- /dev/null +++ b/REORG.TODO/locale/programs/ld-address.c @@ -0,0 +1,545 @@ +/* Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <byteswap.h> +#include <error.h> +#include <langinfo.h> +#include <string.h> +#include <stdint.h> +#include <sys/uio.h> + +#include <assert.h> + +#include "localedef.h" +#include "localeinfo.h" +#include "locfile.h" + + +static struct +{ + const char ab2[3]; + const char ab3[4]; + uint32_t num; +} iso3166[] = +{ +#define DEFINE_COUNTRY_CODE(Name, Ab2, Ab3, Num) \ + { #Ab2, #Ab3, Num }, +#include "iso-3166.def" +}; + + +static struct +{ + const char ab[3]; + const char term[4]; + const char lib[4]; +} iso639[] = +{ +#define DEFINE_LANGUAGE_CODE(Name, Ab, Term, Lib) \ + { #Ab, #Term, #Lib }, +#define DEFINE_LANGUAGE_CODE3(Name, Term, Lib) \ + { "", #Term, #Lib }, +#define DEFINE_LANGUAGE_CODE2(Name, Term) \ + { "", #Term, "" }, +#include "iso-639.def" +}; + + +/* The real definition of the struct for the LC_ADDRESS locale. */ +struct locale_address_t +{ + const char *postal_fmt; + const char *country_name; + const char *country_post; + const char *country_ab2; + const char *country_ab3; + uint32_t country_num; + const char *country_car; + const char *country_isbn; + const char *lang_name; + const char *lang_ab; + const char *lang_term; + const char *lang_lib; +}; + + +static void +address_startup (struct linereader *lr, struct localedef_t *locale, + int ignore_content) +{ + if (!ignore_content) + locale->categories[LC_ADDRESS].address = + (struct locale_address_t *) xcalloc (1, + sizeof (struct locale_address_t)); + + if (lr != NULL) + { + lr->translate_strings = 1; + lr->return_widestr = 0; + } +} + + +void +address_finish (struct localedef_t *locale, const struct charmap_t *charmap) +{ + struct locale_address_t *address = locale->categories[LC_ADDRESS].address; + size_t cnt; + int helper; + int nothing = 0; + + /* Now resolve copying and also handle completely missing definitions. */ + if (address == NULL) + { + /* First see whether we were supposed to copy. If yes, find the + actual definition. */ + if (locale->copy_name[LC_ADDRESS] != NULL) + { + /* Find the copying locale. This has to happen transitively since + the locale we are copying from might also copying another one. */ + struct localedef_t *from = locale; + + do + from = find_locale (LC_ADDRESS, from->copy_name[LC_ADDRESS], + from->repertoire_name, charmap); + while (from->categories[LC_ADDRESS].address == NULL + && from->copy_name[LC_ADDRESS] != NULL); + + address = locale->categories[LC_ADDRESS].address + = from->categories[LC_ADDRESS].address; + } + + /* If there is still no definition issue an warning and create an + empty one. */ + if (address == NULL) + { + if (! be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +No definition for %s category found"), "LC_ADDRESS")); + address_startup (NULL, locale, 0); + address = locale->categories[LC_ADDRESS].address; + nothing = 1; + } + } + + if (address->postal_fmt == NULL) + { + if (! nothing) + WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"), + "LC_ADDRESS", "postal_fmt")); + /* Use as the default value the value of the i18n locale. */ + address->postal_fmt = "%a%N%f%N%d%N%b%N%s %h %e %r%N%C-%z %T%N%c%N"; + } + else + { + /* We must check whether the format string contains only the allowed + escape sequences. Last checked against ISO 30112 WD10 [2014]. */ + const char *cp = address->postal_fmt; + + if (*cp == '\0') + WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' must not be empty"), + "LC_ADDRESS", "postal_fmt")); + else + while (*cp != '\0') + { + if (*cp == '%') + { + if (*++cp == 'R') + /* Romanize-flag. */ + ++cp; + if (strchr ("nafdbshNtreClzTSc%", *cp) == NULL) + { + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: invalid escape `%%%c' sequence in field `%s'"), + "LC_ADDRESS", *cp, "postal_fmt")); + break; + } + } + ++cp; + } + } + +#define TEST_ELEM(cat) \ + if (address->cat == NULL) \ + { \ + if (verbose && ! nothing) \ + WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"), \ + "LC_ADDRESS", #cat)); \ + address->cat = ""; \ + } + + TEST_ELEM (country_name); + /* XXX Test against list of defined codes. */ + TEST_ELEM (country_post); + /* XXX Test against list of defined codes. */ + TEST_ELEM (country_car); + /* XXX Test against list of defined codes. */ + TEST_ELEM (country_isbn); + TEST_ELEM (lang_name); + + helper = 1; + if (address->lang_term == NULL) + { + if (verbose && ! nothing) + WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"), + "LC_ADDRESS", "lang_term")); + address->lang_term = ""; + cnt = sizeof (iso639) / sizeof (iso639[0]); + } + else if (address->lang_term[0] == '\0') + { + if (verbose) + WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' must not be empty"), + "LC_ADDRESS", "lang_term")); + cnt = sizeof (iso639) / sizeof (iso639[0]); + } + else + { + /* Look for this language in the table. */ + for (cnt = 0; cnt < sizeof (iso639) / sizeof (iso639[0]); ++cnt) + if (strcmp (address->lang_term, iso639[cnt].term) == 0) + break; + if (cnt == sizeof (iso639) / sizeof (iso639[0])) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: terminology language code `%s' not defined"), + "LC_ADDRESS", address->lang_term)); + } + + if (address->lang_ab == NULL) + { + if ((cnt == sizeof (iso639) / sizeof (iso639[0]) + || iso639[cnt].ab[0] != '\0') + && verbose && ! nothing) + WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"), + "LC_ADDRESS", "lang_ab")); + address->lang_ab = ""; + } + else if (address->lang_ab[0] == '\0') + { + if ((cnt == sizeof (iso639) / sizeof (iso639[0]) + || iso639[cnt].ab[0] != '\0') + && verbose) + WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' must not be empty"), + "LC_ADDRESS", "lang_ab")); + } + else if (cnt < sizeof (iso639) / sizeof (iso639[0]) + && iso639[cnt].ab[0] == '\0') + { + WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' must not be defined"), + "LC_ADDRESS", "lang_ab")); + + address->lang_ab = ""; + } + else + { + if (cnt == sizeof (iso639) / sizeof (iso639[0])) + { + helper = 2; + for (cnt = 0; cnt < sizeof (iso639) / sizeof (iso639[0]); ++cnt) + if (strcmp (address->lang_ab, iso639[cnt].ab) == 0) + break; + if (cnt == sizeof (iso639) / sizeof (iso639[0])) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: language abbreviation `%s' not defined"), + "LC_ADDRESS", address->lang_ab)); + } + else + if (strcmp (iso639[cnt].ab, address->lang_ab) != 0 + && iso639[cnt].ab[0] != '\0') + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: `%s' value does not match `%s' value"), + "LC_ADDRESS", "lang_ab", "lang_term")); + } + + if (address->lang_lib == NULL) + /* This is no error. */ + address->lang_lib = address->lang_term; + else if (address->lang_lib[0] == '\0') + { + if (verbose) + WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' must not be empty"), + "LC_ADDRESS", "lang_lib")); + } + else + { + if (cnt == sizeof (iso639) / sizeof (iso639[0])) + { + for (cnt = 0; cnt < sizeof (iso639) / sizeof (iso639[0]); ++cnt) + if (strcmp (address->lang_lib, iso639[cnt].lib) == 0) + break; + if (cnt == sizeof (iso639) / sizeof (iso639[0])) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: language abbreviation `%s' not defined"), + "LC_ADDRESS", address->lang_lib)); + } + else + if (strcmp (iso639[cnt].ab, address->lang_ab) != 0) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: `%s' value does not match `%s' value"), "LC_ADDRESS", "lang_lib", + helper == 1 ? "lang_term" : "lang_ab")); + } + + if (address->country_num == 0) + { + if (verbose && ! nothing) + WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"), + "LC_ADDRESS", "country_num")); + cnt = sizeof (iso3166) / sizeof (iso3166[0]); + } + else + { + for (cnt = 0; cnt < sizeof (iso3166) / sizeof (iso3166[0]); ++cnt) + if (address->country_num == iso3166[cnt].num) + break; + + if (cnt == sizeof (iso3166) / sizeof (iso3166[0])) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: numeric country code `%d' not valid"), + "LC_ADDRESS", address->country_num)); + } + + if (address->country_ab2 == NULL) + { + if (verbose && ! nothing) + WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"), + "LC_ADDRESS", "country_ab2")); + address->country_ab2 = " "; + } + else if (cnt != sizeof (iso3166) / sizeof (iso3166[0]) + && strcmp (address->country_ab2, iso3166[cnt].ab2) != 0) + WITH_CUR_LOCALE (error (0, 0, + _("%s: `%s' value does not match `%s' value"), + "LC_ADDRESS", "country_ab2", "country_num")); + + if (address->country_ab3 == NULL) + { + if (verbose && ! nothing) + WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"), + "LC_ADDRESS", "country_ab3")); + address->country_ab3 = " "; + } + else if (cnt != sizeof (iso3166) / sizeof (iso3166[0]) + && strcmp (address->country_ab3, iso3166[cnt].ab3) != 0) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: `%s' value does not match `%s' value"), + "LC_ADDRESS", "country_ab3", "country_num")); +} + + +void +address_output (struct localedef_t *locale, const struct charmap_t *charmap, + const char *output_path) +{ + struct locale_address_t *address = locale->categories[LC_ADDRESS].address; + struct locale_file file; + + init_locale_data (&file, _NL_ITEM_INDEX (_NL_NUM_LC_ADDRESS)); + add_locale_string (&file, address->postal_fmt); + add_locale_string (&file, address->country_name); + add_locale_string (&file, address->country_post); + add_locale_string (&file, address->country_ab2); + add_locale_string (&file, address->country_ab3); + add_locale_string (&file, address->country_car); + add_locale_uint32 (&file, address->country_num); + add_locale_string (&file, address->country_isbn); + add_locale_string (&file, address->lang_name); + add_locale_string (&file, address->lang_ab); + add_locale_string (&file, address->lang_term); + add_locale_string (&file, address->lang_lib); + add_locale_string (&file, charmap->code_set_name); + write_locale_data (output_path, LC_ADDRESS, "LC_ADDRESS", &file); +} + + +/* The parser for the LC_ADDRESS section of the locale definition. */ +void +address_read (struct linereader *ldfile, struct localedef_t *result, + const struct charmap_t *charmap, const char *repertoire_name, + int ignore_content) +{ + struct locale_address_t *address; + struct token *now; + struct token *arg; + enum token_t nowtok; + + /* The rest of the line containing `LC_ADDRESS' must be free. */ + lr_ignore_rest (ldfile, 1); + + + do + { + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + } + while (nowtok == tok_eol); + + /* If we see `copy' now we are almost done. */ + if (nowtok == tok_copy) + { + handle_copy (ldfile, charmap, repertoire_name, result, tok_lc_address, + LC_ADDRESS, "LC_ADDRESS", ignore_content); + return; + } + + /* Prepare the data structures. */ + address_startup (ldfile, result, ignore_content); + address = result->categories[LC_ADDRESS].address; + + while (1) + { + /* Of course we don't proceed beyond the end of file. */ + if (nowtok == tok_eof) + break; + + /* Ignore empty lines. */ + if (nowtok == tok_eol) + { + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + continue; + } + + switch (nowtok) + { +#define STR_ELEM(cat) \ + case tok_##cat: \ + /* Ignore the rest of the line if we don't need the input of \ + this line. */ \ + if (ignore_content) \ + { \ + lr_ignore_rest (ldfile, 0); \ + break; \ + } \ + \ + arg = lr_token (ldfile, charmap, result, NULL, verbose); \ + if (arg->tok != tok_string) \ + goto err_label; \ + if (address->cat != NULL) \ + lr_error (ldfile, _("\ +%s: field `%s' declared more than once"), "LC_ADDRESS", #cat); \ + else if (!ignore_content && arg->val.str.startmb == NULL) \ + { \ + lr_error (ldfile, _("\ +%s: unknown character in field `%s'"), "LC_ADDRESS", #cat); \ + address->cat = ""; \ + } \ + else if (!ignore_content) \ + address->cat = arg->val.str.startmb; \ + break + + STR_ELEM (postal_fmt); + STR_ELEM (country_name); + STR_ELEM (country_post); + STR_ELEM (country_ab2); + STR_ELEM (country_ab3); + STR_ELEM (country_car); + STR_ELEM (lang_name); + STR_ELEM (lang_ab); + STR_ELEM (lang_term); + STR_ELEM (lang_lib); + +#define INT_STR_ELEM(cat) \ + case tok_##cat: \ + /* Ignore the rest of the line if we don't need the input of \ + this line. */ \ + if (ignore_content) \ + { \ + lr_ignore_rest (ldfile, 0); \ + break; \ + } \ + \ + arg = lr_token (ldfile, charmap, result, NULL, verbose); \ + if (arg->tok != tok_string && arg->tok != tok_number) \ + goto err_label; \ + if (address->cat != NULL) \ + lr_error (ldfile, _("\ +%s: field `%s' declared more than once"), "LC_ADDRESS", #cat); \ + else if (!ignore_content && arg->tok == tok_string \ + && arg->val.str.startmb == NULL) \ + { \ + lr_error (ldfile, _("\ +%s: unknown character in field `%s'"), "LC_ADDRESS", #cat); \ + address->cat = ""; \ + } \ + else if (!ignore_content) \ + { \ + if (arg->tok == tok_string) \ + address->cat = arg->val.str.startmb; \ + else \ + { \ + char *numbuf = (char *) xmalloc (21); \ + snprintf (numbuf, 21, "%ld", arg->val.num); \ + address->cat = numbuf; \ + } \ + } \ + break + + INT_STR_ELEM (country_isbn); + +#define INT_ELEM(cat) \ + case tok_##cat: \ + /* Ignore the rest of the line if we don't need the input of \ + this line. */ \ + if (ignore_content) \ + { \ + lr_ignore_rest (ldfile, 0); \ + break; \ + } \ + \ + arg = lr_token (ldfile, charmap, result, NULL, verbose); \ + if (arg->tok != tok_number) \ + goto err_label; \ + else if (address->cat != 0) \ + lr_error (ldfile, _("\ +%s: field `%s' declared more than once"), "LC_ADDRESS", #cat); \ + else if (!ignore_content) \ + address->cat = arg->val.num; \ + break + + INT_ELEM (country_num); + + case tok_end: + /* Next we assume `LC_ADDRESS'. */ + arg = lr_token (ldfile, charmap, result, NULL, verbose); + if (arg->tok == tok_eof) + break; + if (arg->tok == tok_eol) + lr_error (ldfile, _("%s: incomplete `END' line"), + "LC_ADDRESS"); + else if (arg->tok != tok_lc_address) + lr_error (ldfile, _("\ +%1$s: definition does not end with `END %1$s'"), "LC_ADDRESS"); + lr_ignore_rest (ldfile, arg->tok == tok_lc_address); + return; + + default: + err_label: + SYNTAX_ERROR (_("%s: syntax error"), "LC_ADDRESS"); + } + + /* Prepare for the next round. */ + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + } + + /* When we come here we reached the end of the file. */ + lr_error (ldfile, _("%s: premature end of file"), "LC_ADDRESS"); +} diff --git a/REORG.TODO/locale/programs/ld-collate.c b/REORG.TODO/locale/programs/ld-collate.c new file mode 100644 index 0000000000..cec848cb7c --- /dev/null +++ b/REORG.TODO/locale/programs/ld-collate.c @@ -0,0 +1,3978 @@ +/* Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@gnu.org>, 1995. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <errno.h> +#include <error.h> +#include <stdlib.h> +#include <wchar.h> +#include <stdint.h> +#include <sys/param.h> + +#include "localedef.h" +#include "charmap.h" +#include "localeinfo.h" +#include "linereader.h" +#include "locfile.h" +#include "elem-hash.h" + +/* Uncomment the following line in the production version. */ +/* #define NDEBUG 1 */ +#include <assert.h> + +#define obstack_chunk_alloc malloc +#define obstack_chunk_free free + +static inline void +__attribute ((always_inline)) +obstack_int32_grow (struct obstack *obstack, int32_t data) +{ + assert (LOCFILE_ALIGNED_P (obstack_object_size (obstack))); + data = maybe_swap_uint32 (data); + if (sizeof (int32_t) == sizeof (int)) + obstack_int_grow (obstack, data); + else + obstack_grow (obstack, &data, sizeof (int32_t)); +} + +static inline void +__attribute ((always_inline)) +obstack_int32_grow_fast (struct obstack *obstack, int32_t data) +{ + assert (LOCFILE_ALIGNED_P (obstack_object_size (obstack))); + data = maybe_swap_uint32 (data); + if (sizeof (int32_t) == sizeof (int)) + obstack_int_grow_fast (obstack, data); + else + obstack_grow (obstack, &data, sizeof (int32_t)); +} + +/* Forward declaration. */ +struct element_t; + +/* Data type for list of strings. */ +struct section_list +{ + /* Successor in the known_sections list. */ + struct section_list *def_next; + /* Successor in the sections list. */ + struct section_list *next; + /* Name of the section. */ + const char *name; + /* First element of this section. */ + struct element_t *first; + /* Last element of this section. */ + struct element_t *last; + /* These are the rules for this section. */ + enum coll_sort_rule *rules; + /* Index of the rule set in the appropriate section of the output file. */ + int ruleidx; +}; + +struct element_t; + +struct element_list_t +{ + /* Number of elements. */ + int cnt; + + struct element_t **w; +}; + +/* Data type for collating element. */ +struct element_t +{ + const char *name; + + const char *mbs; + size_t nmbs; + const uint32_t *wcs; + size_t nwcs; + int *mborder; + int wcorder; + + /* The following is a bit mask which bits are set if this element is + used in the appropriate level. Interesting for the singlebyte + weight computation. + + XXX The type here restricts the number of levels to 32. It could + be changed if necessary but I doubt this is necessary. */ + unsigned int used_in_level; + + struct element_list_t *weights; + + /* Nonzero if this is a real character definition. */ + int is_character; + + /* Order of the character in the sequence. This information will + be used in range expressions. */ + int mbseqorder; + int wcseqorder; + + /* Where does the definition come from. */ + const char *file; + size_t line; + + /* Which section does this belong to. */ + struct section_list *section; + + /* Predecessor and successor in the order list. */ + struct element_t *last; + struct element_t *next; + + /* Next element in multibyte output list. */ + struct element_t *mbnext; + struct element_t *mblast; + + /* Next element in wide character output list. */ + struct element_t *wcnext; + struct element_t *wclast; +}; + +/* Special element value. */ +#define ELEMENT_ELLIPSIS2 ((struct element_t *) 1) +#define ELEMENT_ELLIPSIS3 ((struct element_t *) 2) +#define ELEMENT_ELLIPSIS4 ((struct element_t *) 3) + +/* Data type for collating symbol. */ +struct symbol_t +{ + const char *name; + + /* Point to place in the order list. */ + struct element_t *order; + + /* Where does the definition come from. */ + const char *file; + size_t line; +}; + +/* Sparse table of struct element_t *. */ +#define TABLE wchead_table +#define ELEMENT struct element_t * +#define DEFAULT NULL +#define ITERATE +#define NO_ADD_LOCALE +#include "3level.h" + +/* Sparse table of int32_t. */ +#define TABLE collidx_table +#define ELEMENT int32_t +#define DEFAULT 0 +#include "3level.h" + +/* Sparse table of uint32_t. */ +#define TABLE collseq_table +#define ELEMENT uint32_t +#define DEFAULT ~((uint32_t) 0) +#include "3level.h" + + +/* Simple name list for the preprocessor. */ +struct name_list +{ + struct name_list *next; + char str[0]; +}; + + +/* The real definition of the struct for the LC_COLLATE locale. */ +struct locale_collate_t +{ + int col_weight_max; + int cur_weight_max; + + /* List of known scripts. */ + struct section_list *known_sections; + /* List of used sections. */ + struct section_list *sections; + /* Current section using definition. */ + struct section_list *current_section; + /* There always can be an unnamed section. */ + struct section_list unnamed_section; + /* Flag whether the unnamed section has been defined. */ + bool unnamed_section_defined; + /* To make handling of errors easier we have another section. */ + struct section_list error_section; + /* Sometimes we are defining the values for collating symbols before + the first actual section. */ + struct section_list symbol_section; + + /* Start of the order list. */ + struct element_t *start; + + /* The undefined element. */ + struct element_t undefined; + + /* This is the cursor for `reorder_after' insertions. */ + struct element_t *cursor; + + /* This value is used when handling ellipsis. */ + struct element_t ellipsis_weight; + + /* Known collating elements. */ + hash_table elem_table; + + /* Known collating symbols. */ + hash_table sym_table; + + /* Known collation sequences. */ + hash_table seq_table; + + struct obstack mempool; + + /* The LC_COLLATE category is a bit special as it is sometimes possible + that the definitions from more than one input file contains information. + Therefore we keep all relevant input in a list. */ + struct locale_collate_t *next; + + /* Arrays with heads of the list for each of the leading bytes in + the multibyte sequences. */ + struct element_t *mbheads[256]; + + /* Arrays with heads of the list for each of the leading bytes in + the multibyte sequences. */ + struct wchead_table wcheads; + + /* The arrays with the collation sequence order. */ + unsigned char mbseqorder[256]; + struct collseq_table wcseqorder; + + /* State of the preprocessor. */ + enum + { + else_none = 0, + else_ignore, + else_seen + } + else_action; +}; + + +/* We have a few global variables which are used for reading all + LC_COLLATE category descriptions in all files. */ +static uint32_t nrules; + +/* List of defined preprocessor symbols. */ +static struct name_list *defined; + + +/* We need UTF-8 encoding of numbers. */ +static inline int +__attribute ((always_inline)) +utf8_encode (char *buf, int val) +{ + int retval; + + if (val < 0x80) + { + *buf++ = (char) val; + retval = 1; + } + else + { + int step; + + for (step = 2; step < 6; ++step) + if ((val & (~(uint32_t)0 << (5 * step + 1))) == 0) + break; + retval = step; + + *buf = (unsigned char) (~0xff >> step); + --step; + do + { + buf[step] = 0x80 | (val & 0x3f); + val >>= 6; + } + while (--step > 0); + *buf |= val; + } + + return retval; +} + + +static struct section_list * +make_seclist_elem (struct locale_collate_t *collate, const char *string, + struct section_list *next) +{ + struct section_list *newp; + + newp = (struct section_list *) obstack_alloc (&collate->mempool, + sizeof (*newp)); + newp->next = next; + newp->name = string; + newp->first = NULL; + newp->last = NULL; + + return newp; +} + + +static struct element_t * +new_element (struct locale_collate_t *collate, const char *mbs, size_t mbslen, + const uint32_t *wcs, const char *name, size_t namelen, + int is_character) +{ + struct element_t *newp; + + newp = (struct element_t *) obstack_alloc (&collate->mempool, + sizeof (*newp)); + newp->name = name == NULL ? NULL : obstack_copy0 (&collate->mempool, + name, namelen); + if (mbs != NULL) + { + newp->mbs = obstack_copy0 (&collate->mempool, mbs, mbslen); + newp->nmbs = mbslen; + } + else + { + newp->mbs = NULL; + newp->nmbs = 0; + } + if (wcs != NULL) + { + size_t nwcs = wcslen ((wchar_t *) wcs); + uint32_t zero = 0; + /* Handle <U0000> as a single character. */ + if (nwcs == 0) + nwcs = 1; + obstack_grow (&collate->mempool, wcs, nwcs * sizeof (uint32_t)); + obstack_grow (&collate->mempool, &zero, sizeof (uint32_t)); + newp->wcs = (uint32_t *) obstack_finish (&collate->mempool); + newp->nwcs = nwcs; + } + else + { + newp->wcs = NULL; + newp->nwcs = 0; + } + newp->mborder = NULL; + newp->wcorder = 0; + newp->used_in_level = 0; + newp->is_character = is_character; + + /* Will be assigned later. XXX */ + newp->mbseqorder = 0; + newp->wcseqorder = 0; + + /* Will be allocated later. */ + newp->weights = NULL; + + newp->file = NULL; + newp->line = 0; + + newp->section = collate->current_section; + + newp->last = NULL; + newp->next = NULL; + + newp->mbnext = NULL; + newp->mblast = NULL; + + newp->wcnext = NULL; + newp->wclast = NULL; + + return newp; +} + + +static struct symbol_t * +new_symbol (struct locale_collate_t *collate, const char *name, size_t len) +{ + struct symbol_t *newp; + + newp = (struct symbol_t *) obstack_alloc (&collate->mempool, sizeof (*newp)); + + newp->name = obstack_copy0 (&collate->mempool, name, len); + newp->order = NULL; + + newp->file = NULL; + newp->line = 0; + + return newp; +} + + +/* Test whether this name is already defined somewhere. */ +static int +check_duplicate (struct linereader *ldfile, struct locale_collate_t *collate, + const struct charmap_t *charmap, + struct repertoire_t *repertoire, const char *symbol, + size_t symbol_len) +{ + void *ignore = NULL; + + if (find_entry (&charmap->char_table, symbol, symbol_len, &ignore) == 0) + { + lr_error (ldfile, _("`%.*s' already defined in charmap"), + (int) symbol_len, symbol); + return 1; + } + + if (repertoire != NULL + && (find_entry (&repertoire->char_table, symbol, symbol_len, &ignore) + == 0)) + { + lr_error (ldfile, _("`%.*s' already defined in repertoire"), + (int) symbol_len, symbol); + return 1; + } + + if (find_entry (&collate->sym_table, symbol, symbol_len, &ignore) == 0) + { + lr_error (ldfile, _("`%.*s' already defined as collating symbol"), + (int) symbol_len, symbol); + return 1; + } + + if (find_entry (&collate->elem_table, symbol, symbol_len, &ignore) == 0) + { + lr_error (ldfile, _("`%.*s' already defined as collating element"), + (int) symbol_len, symbol); + return 1; + } + + return 0; +} + + +/* Read the direction specification. */ +static void +read_directions (struct linereader *ldfile, struct token *arg, + const struct charmap_t *charmap, + struct repertoire_t *repertoire, struct localedef_t *result) +{ + int cnt = 0; + int max = nrules ?: 10; + enum coll_sort_rule *rules = calloc (max, sizeof (*rules)); + int warned = 0; + struct locale_collate_t *collate = result->categories[LC_COLLATE].collate; + + while (1) + { + int valid = 0; + + if (arg->tok == tok_forward) + { + if (rules[cnt] & sort_backward) + { + if (! warned) + { + lr_error (ldfile, _("\ +%s: `forward' and `backward' are mutually excluding each other"), + "LC_COLLATE"); + warned = 1; + } + } + else if (rules[cnt] & sort_forward) + { + if (! warned) + { + lr_error (ldfile, _("\ +%s: `%s' mentioned more than once in definition of weight %d"), + "LC_COLLATE", "forward", cnt + 1); + } + } + else + rules[cnt] |= sort_forward; + + valid = 1; + } + else if (arg->tok == tok_backward) + { + if (rules[cnt] & sort_forward) + { + if (! warned) + { + lr_error (ldfile, _("\ +%s: `forward' and `backward' are mutually excluding each other"), + "LC_COLLATE"); + warned = 1; + } + } + else if (rules[cnt] & sort_backward) + { + if (! warned) + { + lr_error (ldfile, _("\ +%s: `%s' mentioned more than once in definition of weight %d"), + "LC_COLLATE", "backward", cnt + 1); + } + } + else + rules[cnt] |= sort_backward; + + valid = 1; + } + else if (arg->tok == tok_position) + { + if (rules[cnt] & sort_position) + { + if (! warned) + { + lr_error (ldfile, _("\ +%s: `%s' mentioned more than once in definition of weight %d"), + "LC_COLLATE", "position", cnt + 1); + } + } + else + rules[cnt] |= sort_position; + + valid = 1; + } + + if (valid) + arg = lr_token (ldfile, charmap, result, repertoire, verbose); + + if (arg->tok == tok_eof || arg->tok == tok_eol || arg->tok == tok_comma + || arg->tok == tok_semicolon) + { + if (! valid && ! warned) + { + lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE"); + warned = 1; + } + + /* See whether we have to increment the counter. */ + if (arg->tok != tok_comma && rules[cnt] != 0) + { + /* Add the default `forward' if we have seen only `position'. */ + if (rules[cnt] == sort_position) + rules[cnt] = sort_position | sort_forward; + + ++cnt; + } + + if (arg->tok == tok_eof || arg->tok == tok_eol) + /* End of line or file, so we exit the loop. */ + break; + + if (nrules == 0) + { + /* See whether we have enough room in the array. */ + if (cnt == max) + { + max += 10; + rules = (enum coll_sort_rule *) xrealloc (rules, + max + * sizeof (*rules)); + memset (&rules[cnt], '\0', (max - cnt) * sizeof (*rules)); + } + } + else + { + if (cnt == nrules) + { + /* There must not be any more rule. */ + if (! warned) + { + lr_error (ldfile, _("\ +%s: too many rules; first entry only had %d"), + "LC_COLLATE", nrules); + warned = 1; + } + + lr_ignore_rest (ldfile, 0); + break; + } + } + } + else + { + if (! warned) + { + lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE"); + warned = 1; + } + } + + arg = lr_token (ldfile, charmap, result, repertoire, verbose); + } + + if (nrules == 0) + { + /* Now we know how many rules we have. */ + nrules = cnt; + rules = (enum coll_sort_rule *) xrealloc (rules, + nrules * sizeof (*rules)); + } + else + { + if (cnt < nrules) + { + /* Not enough rules in this specification. */ + if (! warned) + lr_error (ldfile, _("%s: not enough sorting rules"), "LC_COLLATE"); + + do + rules[cnt] = sort_forward; + while (++cnt < nrules); + } + } + + collate->current_section->rules = rules; +} + + +static struct element_t * +find_element (struct linereader *ldfile, struct locale_collate_t *collate, + const char *str, size_t len) +{ + void *result = NULL; + + /* Search for the entries among the collation sequences already define. */ + if (find_entry (&collate->seq_table, str, len, &result) != 0) + { + /* Nope, not define yet. So we see whether it is a + collation symbol. */ + void *ptr; + + if (find_entry (&collate->sym_table, str, len, &ptr) == 0) + { + /* It's a collation symbol. */ + struct symbol_t *sym = (struct symbol_t *) ptr; + result = sym->order; + + if (result == NULL) + result = sym->order = new_element (collate, NULL, 0, NULL, + NULL, 0, 0); + } + else if (find_entry (&collate->elem_table, str, len, &result) != 0) + { + /* It's also no collation element. So it is a character + element defined later. */ + result = new_element (collate, NULL, 0, NULL, str, len, 1); + /* Insert it into the sequence table. */ + insert_entry (&collate->seq_table, str, len, result); + } + } + + return (struct element_t *) result; +} + + +static void +unlink_element (struct locale_collate_t *collate) +{ + if (collate->cursor == collate->start) + { + assert (collate->cursor->next == NULL); + assert (collate->cursor->last == NULL); + collate->cursor = NULL; + } + else + { + if (collate->cursor->next != NULL) + collate->cursor->next->last = collate->cursor->last; + if (collate->cursor->last != NULL) + collate->cursor->last->next = collate->cursor->next; + collate->cursor = collate->cursor->last; + } +} + + +static void +insert_weights (struct linereader *ldfile, struct element_t *elem, + const struct charmap_t *charmap, + struct repertoire_t *repertoire, struct localedef_t *result, + enum token_t ellipsis) +{ + int weight_cnt; + struct token *arg; + struct locale_collate_t *collate = result->categories[LC_COLLATE].collate; + + /* Initialize all the fields. */ + elem->file = ldfile->fname; + elem->line = ldfile->lineno; + + elem->last = collate->cursor; + elem->next = collate->cursor ? collate->cursor->next : NULL; + if (collate->cursor != NULL && collate->cursor->next != NULL) + collate->cursor->next->last = elem; + if (collate->cursor != NULL) + collate->cursor->next = elem; + if (collate->start == NULL) + { + assert (collate->cursor == NULL); + collate->start = elem; + } + + elem->section = collate->current_section; + + if (collate->current_section->first == NULL) + collate->current_section->first = elem; + if (collate->current_section->last == collate->cursor) + collate->current_section->last = elem; + + collate->cursor = elem; + + elem->weights = (struct element_list_t *) + obstack_alloc (&collate->mempool, nrules * sizeof (struct element_list_t)); + memset (elem->weights, '\0', nrules * sizeof (struct element_list_t)); + + weight_cnt = 0; + + arg = lr_token (ldfile, charmap, result, repertoire, verbose); + do + { + if (arg->tok == tok_eof || arg->tok == tok_eol) + break; + + if (arg->tok == tok_ignore) + { + /* The weight for this level has to be ignored. We use the + null pointer to indicate this. */ + elem->weights[weight_cnt].w = (struct element_t **) + obstack_alloc (&collate->mempool, sizeof (struct element_t *)); + elem->weights[weight_cnt].w[0] = NULL; + elem->weights[weight_cnt].cnt = 1; + } + else if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4) + { + char ucs4str[10]; + struct element_t *val; + char *symstr; + size_t symlen; + + if (arg->tok == tok_bsymbol) + { + symstr = arg->val.str.startmb; + symlen = arg->val.str.lenmb; + } + else + { + snprintf (ucs4str, sizeof (ucs4str), "U%08X", arg->val.ucs4); + symstr = ucs4str; + symlen = 9; + } + + val = find_element (ldfile, collate, symstr, symlen); + if (val == NULL) + break; + + elem->weights[weight_cnt].w = (struct element_t **) + obstack_alloc (&collate->mempool, sizeof (struct element_t *)); + elem->weights[weight_cnt].w[0] = val; + elem->weights[weight_cnt].cnt = 1; + } + else if (arg->tok == tok_string) + { + /* Split the string up in the individual characters and put + the element definitions in the list. */ + const char *cp = arg->val.str.startmb; + int cnt = 0; + struct element_t *charelem; + struct element_t **weights = NULL; + int max = 0; + + if (*cp == '\0') + { + lr_error (ldfile, _("%s: empty weight string not allowed"), + "LC_COLLATE"); + lr_ignore_rest (ldfile, 0); + break; + } + + do + { + if (*cp == '<') + { + /* Ahh, it's a bsymbol or an UCS4 value. If it's + the latter we have to unify the name. */ + const char *startp = ++cp; + size_t len; + + while (*cp != '>') + { + if (*cp == ldfile->escape_char) + ++cp; + if (*cp == '\0') + /* It's a syntax error. */ + goto syntax; + + ++cp; + } + + if (cp - startp == 5 && startp[0] == 'U' + && isxdigit (startp[1]) && isxdigit (startp[2]) + && isxdigit (startp[3]) && isxdigit (startp[4])) + { + unsigned int ucs4 = strtoul (startp + 1, NULL, 16); + char *newstr; + + newstr = (char *) xmalloc (10); + snprintf (newstr, 10, "U%08X", ucs4); + startp = newstr; + + len = 9; + } + else + len = cp - startp; + + charelem = find_element (ldfile, collate, startp, len); + ++cp; + } + else + { + /* People really shouldn't use characters directly in + the string. Especially since it's not really clear + what this means. We interpret all characters in the + string as if that would be bsymbols. Otherwise we + would have to match back to bsymbols somehow and this + is normally not what people normally expect. */ + charelem = find_element (ldfile, collate, cp++, 1); + } + + if (charelem == NULL) + { + /* We ignore the rest of the line. */ + lr_ignore_rest (ldfile, 0); + break; + } + + /* Add the pointer. */ + if (cnt >= max) + { + struct element_t **newp; + max += 10; + newp = (struct element_t **) + alloca (max * sizeof (struct element_t *)); + memcpy (newp, weights, cnt * sizeof (struct element_t *)); + weights = newp; + } + weights[cnt++] = charelem; + } + while (*cp != '\0'); + + /* Now store the information. */ + elem->weights[weight_cnt].w = (struct element_t **) + obstack_alloc (&collate->mempool, + cnt * sizeof (struct element_t *)); + memcpy (elem->weights[weight_cnt].w, weights, + cnt * sizeof (struct element_t *)); + elem->weights[weight_cnt].cnt = cnt; + + /* We don't need the string anymore. */ + free (arg->val.str.startmb); + } + else if (ellipsis != tok_none + && (arg->tok == tok_ellipsis2 + || arg->tok == tok_ellipsis3 + || arg->tok == tok_ellipsis4)) + { + /* It must be the same ellipsis as used in the initial column. */ + if (arg->tok != ellipsis) + lr_error (ldfile, _("\ +%s: weights must use the same ellipsis symbol as the name"), + "LC_COLLATE"); + + /* The weight for this level will depend on the element + iterating over the range. Put a placeholder. */ + elem->weights[weight_cnt].w = (struct element_t **) + obstack_alloc (&collate->mempool, sizeof (struct element_t *)); + elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2; + elem->weights[weight_cnt].cnt = 1; + } + else + { + syntax: + /* It's a syntax error. */ + lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE"); + lr_ignore_rest (ldfile, 0); + break; + } + + arg = lr_token (ldfile, charmap, result, repertoire, verbose); + /* This better should be the end of the line or a semicolon. */ + if (arg->tok == tok_semicolon) + /* OK, ignore this and read the next token. */ + arg = lr_token (ldfile, charmap, result, repertoire, verbose); + else if (arg->tok != tok_eof && arg->tok != tok_eol) + { + /* It's a syntax error. */ + lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE"); + lr_ignore_rest (ldfile, 0); + break; + } + } + while (++weight_cnt < nrules); + + if (weight_cnt < nrules) + { + /* This means the rest of the line uses the current element as + the weight. */ + do + { + elem->weights[weight_cnt].w = (struct element_t **) + obstack_alloc (&collate->mempool, sizeof (struct element_t *)); + if (ellipsis == tok_none) + elem->weights[weight_cnt].w[0] = elem; + else + elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2; + elem->weights[weight_cnt].cnt = 1; + } + while (++weight_cnt < nrules); + } + else + { + if (arg->tok == tok_ignore || arg->tok == tok_bsymbol) + { + /* Too many rule values. */ + lr_error (ldfile, _("%s: too many values"), "LC_COLLATE"); + lr_ignore_rest (ldfile, 0); + } + else + lr_ignore_rest (ldfile, arg->tok != tok_eol && arg->tok != tok_eof); + } +} + + +static int +insert_value (struct linereader *ldfile, const char *symstr, size_t symlen, + const struct charmap_t *charmap, struct repertoire_t *repertoire, + struct localedef_t *result) +{ + /* First find out what kind of symbol this is. */ + struct charseq *seq; + uint32_t wc; + struct element_t *elem = NULL; + struct locale_collate_t *collate = result->categories[LC_COLLATE].collate; + + /* Try to find the character in the charmap. */ + seq = charmap_find_value (charmap, symstr, symlen); + + /* Determine the wide character. */ + if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE) + { + wc = repertoire_find_value (repertoire, symstr, symlen); + if (seq != NULL) + seq->ucs4 = wc; + } + else + wc = seq->ucs4; + + if (wc == ILLEGAL_CHAR_VALUE && seq == NULL) + { + /* It's no character, so look through the collation elements and + symbol list. */ + void *ptr = elem; + if (find_entry (&collate->elem_table, symstr, symlen, &ptr) != 0) + { + void *result; + struct symbol_t *sym = NULL; + + /* It's also collation element. Therefore it's either a + collating symbol or it's a character which is not + supported by the character set. In the later case we + simply create a dummy entry. */ + if (find_entry (&collate->sym_table, symstr, symlen, &result) == 0) + { + /* It's a collation symbol. */ + sym = (struct symbol_t *) result; + + elem = sym->order; + } + + if (elem == NULL) + { + elem = new_element (collate, NULL, 0, NULL, symstr, symlen, 0); + + if (sym != NULL) + sym->order = elem; + else + /* Enter a fake element in the sequence table. This + won't cause anything in the output since there is + no multibyte or wide character associated with + it. */ + insert_entry (&collate->seq_table, symstr, symlen, elem); + } + } + else + /* Copy the result back. */ + elem = ptr; + } + else + { + /* Otherwise the symbols stands for a character. */ + void *ptr = elem; + if (find_entry (&collate->seq_table, symstr, symlen, &ptr) != 0) + { + uint32_t wcs[2] = { wc, 0 }; + + /* We have to allocate an entry. */ + elem = new_element (collate, + seq != NULL ? (char *) seq->bytes : NULL, + seq != NULL ? seq->nbytes : 0, + wc == ILLEGAL_CHAR_VALUE ? NULL : wcs, + symstr, symlen, 1); + + /* And add it to the table. */ + if (insert_entry (&collate->seq_table, symstr, symlen, elem) != 0) + /* This cannot happen. */ + assert (! "Internal error"); + } + else + { + /* Copy the result back. */ + elem = ptr; + + /* Maybe the character was used before the definition. In this case + we have to insert the byte sequences now. */ + if (elem->mbs == NULL && seq != NULL) + { + elem->mbs = obstack_copy0 (&collate->mempool, + seq->bytes, seq->nbytes); + elem->nmbs = seq->nbytes; + } + + if (elem->wcs == NULL && wc != ILLEGAL_CHAR_VALUE) + { + uint32_t wcs[2] = { wc, 0 }; + + elem->wcs = obstack_copy (&collate->mempool, wcs, sizeof (wcs)); + elem->nwcs = 1; + } + } + } + + /* Test whether this element is not already in the list. */ + if (elem->next != NULL || elem == collate->cursor) + { + lr_error (ldfile, _("order for `%.*s' already defined at %s:%Zu"), + (int) symlen, symstr, elem->file, elem->line); + lr_ignore_rest (ldfile, 0); + return 1; + } + + insert_weights (ldfile, elem, charmap, repertoire, result, tok_none); + + return 0; +} + + +static void +handle_ellipsis (struct linereader *ldfile, const char *symstr, size_t symlen, + enum token_t ellipsis, const struct charmap_t *charmap, + struct repertoire_t *repertoire, + struct localedef_t *result) +{ + struct element_t *startp; + struct element_t *endp; + struct locale_collate_t *collate = result->categories[LC_COLLATE].collate; + + /* Unlink the entry added for the ellipsis. */ + unlink_element (collate); + startp = collate->cursor; + + /* Process and add the end-entry. */ + if (symstr != NULL + && insert_value (ldfile, symstr, symlen, charmap, repertoire, result)) + /* Something went wrong with inserting the to-value. This means + we cannot process the ellipsis. */ + return; + + /* Reset the cursor. */ + collate->cursor = startp; + + /* Now we have to handle many different situations: + - we have to distinguish between the three different ellipsis forms + - the is the ellipsis at the beginning, in the middle, or at the end. + */ + endp = collate->cursor->next; + assert (symstr == NULL || endp != NULL); + + /* XXX The following is probably very wrong since also collating symbols + can appear in ranges. But do we want/can refine the test for that? */ +#if 0 + /* Both, the start and the end symbol, must stand for characters. */ + if ((startp != NULL && (startp->name == NULL || ! startp->is_character)) + || (endp != NULL && (endp->name == NULL|| ! endp->is_character))) + { + lr_error (ldfile, _("\ +%s: the start and the end symbol of a range must stand for characters"), + "LC_COLLATE"); + return; + } +#endif + + if (ellipsis == tok_ellipsis3) + { + /* One requirement we make here: the length of the byte + sequences for the first and end character must be the same. + This is mainly to prevent unwanted effects and this is often + not what is wanted. */ + size_t len = (startp->mbs != NULL ? startp->nmbs + : (endp->mbs != NULL ? endp->nmbs : 0)); + char mbcnt[len + 1]; + char mbend[len + 1]; + + /* Well, this should be caught somewhere else already. Just to + make sure. */ + assert (startp == NULL || startp->wcs == NULL || startp->wcs[1] == 0); + assert (endp == NULL || endp->wcs == NULL || endp->wcs[1] == 0); + + if (startp != NULL && endp != NULL + && startp->mbs != NULL && endp->mbs != NULL + && startp->nmbs != endp->nmbs) + { + lr_error (ldfile, _("\ +%s: byte sequences of first and last character must have the same length"), + "LC_COLLATE"); + return; + } + + /* Determine whether we have to generate multibyte sequences. */ + if ((startp == NULL || startp->mbs != NULL) + && (endp == NULL || endp->mbs != NULL)) + { + int cnt; + int ret; + + /* Prepare the beginning byte sequence. This is either from the + beginning byte sequence or it is all nulls if it was an + initial ellipsis. */ + if (startp == NULL || startp->mbs == NULL) + memset (mbcnt, '\0', len); + else + { + memcpy (mbcnt, startp->mbs, len); + + /* And increment it so that the value is the first one we will + try to insert. */ + for (cnt = len - 1; cnt >= 0; --cnt) + if (++mbcnt[cnt] != '\0') + break; + } + mbcnt[len] = '\0'; + + /* And the end sequence. */ + if (endp == NULL || endp->mbs == NULL) + memset (mbend, '\0', len); + else + memcpy (mbend, endp->mbs, len); + mbend[len] = '\0'; + + /* Test whether we have a correct range. */ + ret = memcmp (mbcnt, mbend, len); + if (ret >= 0) + { + if (ret > 0) + lr_error (ldfile, _("%s: byte sequence of first character of \ +range is not lower than that of the last character"), "LC_COLLATE"); + return; + } + + /* Generate the byte sequences data. */ + while (1) + { + struct charseq *seq; + + /* Quite a bit of work ahead. We have to find the character + definition for the byte sequence and then determine the + wide character belonging to it. */ + seq = charmap_find_symbol (charmap, mbcnt, len); + if (seq != NULL) + { + struct element_t *elem; + size_t namelen; + + /* I don't think this can ever happen. */ + assert (seq->name != NULL); + namelen = strlen (seq->name); + + if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE) + seq->ucs4 = repertoire_find_value (repertoire, seq->name, + namelen); + + /* Now we are ready to insert the new value in the + sequence. Find out whether the element is + already known. */ + void *ptr; + if (find_entry (&collate->seq_table, seq->name, namelen, + &ptr) != 0) + { + uint32_t wcs[2] = { seq->ucs4, 0 }; + + /* We have to allocate an entry. */ + elem = new_element (collate, mbcnt, len, + seq->ucs4 == ILLEGAL_CHAR_VALUE + ? NULL : wcs, seq->name, + namelen, 1); + + /* And add it to the table. */ + if (insert_entry (&collate->seq_table, seq->name, + namelen, elem) != 0) + /* This cannot happen. */ + assert (! "Internal error"); + } + else + /* Copy the result. */ + elem = ptr; + + /* Test whether this element is not already in the list. */ + if (elem->next != NULL || (collate->cursor != NULL + && elem->next == collate->cursor)) + { + lr_error (ldfile, _("\ +order for `%.*s' already defined at %s:%Zu"), + (int) namelen, seq->name, + elem->file, elem->line); + goto increment; + } + + /* Enqueue the new element. */ + elem->last = collate->cursor; + if (collate->cursor == NULL) + elem->next = NULL; + else + { + elem->next = collate->cursor->next; + elem->last->next = elem; + if (elem->next != NULL) + elem->next->last = elem; + } + if (collate->start == NULL) + { + assert (collate->cursor == NULL); + collate->start = elem; + } + collate->cursor = elem; + + /* Add the weight value. We take them from the + `ellipsis_weights' member of `collate'. */ + elem->weights = (struct element_list_t *) + obstack_alloc (&collate->mempool, + nrules * sizeof (struct element_list_t)); + for (cnt = 0; cnt < nrules; ++cnt) + if (collate->ellipsis_weight.weights[cnt].cnt == 1 + && (collate->ellipsis_weight.weights[cnt].w[0] + == ELEMENT_ELLIPSIS2)) + { + elem->weights[cnt].w = (struct element_t **) + obstack_alloc (&collate->mempool, + sizeof (struct element_t *)); + elem->weights[cnt].w[0] = elem; + elem->weights[cnt].cnt = 1; + } + else + { + /* Simply use the weight from `ellipsis_weight'. */ + elem->weights[cnt].w = + collate->ellipsis_weight.weights[cnt].w; + elem->weights[cnt].cnt = + collate->ellipsis_weight.weights[cnt].cnt; + } + } + + /* Increment for the next round. */ + increment: + for (cnt = len - 1; cnt >= 0; --cnt) + if (++mbcnt[cnt] != '\0') + break; + + /* Find out whether this was all. */ + if (cnt < 0 || memcmp (mbcnt, mbend, len) >= 0) + /* Yep, that's all. */ + break; + } + } + } + else + { + /* For symbolic range we naturally must have a beginning and an + end specified by the user. */ + if (startp == NULL) + lr_error (ldfile, _("\ +%s: symbolic range ellipsis must not directly follow `order_start'"), + "LC_COLLATE"); + else if (endp == NULL) + lr_error (ldfile, _("\ +%s: symbolic range ellipsis must not be directly followed by `order_end'"), + "LC_COLLATE"); + else + { + /* Determine the range. To do so we have to determine the + common prefix of the both names and then the numeric + values of both ends. */ + size_t lenfrom = strlen (startp->name); + size_t lento = strlen (endp->name); + char buf[lento + 1]; + int preflen = 0; + long int from; + long int to; + char *cp; + int base = ellipsis == tok_ellipsis2 ? 16 : 10; + + if (lenfrom != lento) + { + invalid_range: + lr_error (ldfile, _("\ +`%s' and `%.*s' are not valid names for symbolic range"), + startp->name, (int) lento, endp->name); + return; + } + + while (startp->name[preflen] == endp->name[preflen]) + if (startp->name[preflen] == '\0') + /* Nothing to be done. The start and end point are identical + and while inserting the end point we have already given + the user an error message. */ + return; + else + ++preflen; + + errno = 0; + from = strtol (startp->name + preflen, &cp, base); + if ((from == UINT_MAX && errno == ERANGE) || *cp != '\0') + goto invalid_range; + + errno = 0; + to = strtol (endp->name + preflen, &cp, base); + if ((to == UINT_MAX && errno == ERANGE) || *cp != '\0') + goto invalid_range; + + /* Copy the prefix. */ + memcpy (buf, startp->name, preflen); + + /* Loop over all values. */ + for (++from; from < to; ++from) + { + struct element_t *elem = NULL; + struct charseq *seq; + uint32_t wc; + int cnt; + + /* Generate the name. */ + sprintf (buf + preflen, base == 10 ? "%0*ld" : "%0*lX", + (int) (lenfrom - preflen), from); + + /* Look whether this name is already defined. */ + void *ptr; + if (find_entry (&collate->seq_table, buf, symlen, &ptr) == 0) + { + /* Copy back the result. */ + elem = ptr; + + if (elem->next != NULL || (collate->cursor != NULL + && elem->next == collate->cursor)) + { + lr_error (ldfile, _("\ +%s: order for `%.*s' already defined at %s:%Zu"), + "LC_COLLATE", (int) lenfrom, buf, + elem->file, elem->line); + continue; + } + + if (elem->name == NULL) + { + lr_error (ldfile, _("%s: `%s' must be a character"), + "LC_COLLATE", buf); + continue; + } + } + + if (elem == NULL || (elem->mbs == NULL && elem->wcs == NULL)) + { + /* Search for a character of this name. */ + seq = charmap_find_value (charmap, buf, lenfrom); + if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE) + { + wc = repertoire_find_value (repertoire, buf, lenfrom); + + if (seq != NULL) + seq->ucs4 = wc; + } + else + wc = seq->ucs4; + + if (wc == ILLEGAL_CHAR_VALUE && seq == NULL) + /* We don't know anything about a character with this + name. XXX Should we warn? */ + continue; + + if (elem == NULL) + { + uint32_t wcs[2] = { wc, 0 }; + + /* We have to allocate an entry. */ + elem = new_element (collate, + seq != NULL + ? (char *) seq->bytes : NULL, + seq != NULL ? seq->nbytes : 0, + wc == ILLEGAL_CHAR_VALUE + ? NULL : wcs, buf, lenfrom, 1); + } + else + { + /* Update the element. */ + if (seq != NULL) + { + elem->mbs = obstack_copy0 (&collate->mempool, + seq->bytes, seq->nbytes); + elem->nmbs = seq->nbytes; + } + + if (wc != ILLEGAL_CHAR_VALUE) + { + uint32_t zero = 0; + + obstack_grow (&collate->mempool, + &wc, sizeof (uint32_t)); + obstack_grow (&collate->mempool, + &zero, sizeof (uint32_t)); + elem->wcs = obstack_finish (&collate->mempool); + elem->nwcs = 1; + } + } + + elem->file = ldfile->fname; + elem->line = ldfile->lineno; + elem->section = collate->current_section; + } + + /* Enqueue the new element. */ + elem->last = collate->cursor; + elem->next = collate->cursor->next; + elem->last->next = elem; + if (elem->next != NULL) + elem->next->last = elem; + collate->cursor = elem; + + /* Now add the weights. They come from the `ellipsis_weights' + member of `collate'. */ + elem->weights = (struct element_list_t *) + obstack_alloc (&collate->mempool, + nrules * sizeof (struct element_list_t)); + for (cnt = 0; cnt < nrules; ++cnt) + if (collate->ellipsis_weight.weights[cnt].cnt == 1 + && (collate->ellipsis_weight.weights[cnt].w[0] + == ELEMENT_ELLIPSIS2)) + { + elem->weights[cnt].w = (struct element_t **) + obstack_alloc (&collate->mempool, + sizeof (struct element_t *)); + elem->weights[cnt].w[0] = elem; + elem->weights[cnt].cnt = 1; + } + else + { + /* Simly use the weight from `ellipsis_weight'. */ + elem->weights[cnt].w = + collate->ellipsis_weight.weights[cnt].w; + elem->weights[cnt].cnt = + collate->ellipsis_weight.weights[cnt].cnt; + } + } + } + } +} + + +static void +collate_startup (struct linereader *ldfile, struct localedef_t *locale, + struct localedef_t *copy_locale, int ignore_content) +{ + if (!ignore_content && locale->categories[LC_COLLATE].collate == NULL) + { + struct locale_collate_t *collate; + + if (copy_locale == NULL) + { + collate = locale->categories[LC_COLLATE].collate = + (struct locale_collate_t *) + xcalloc (1, sizeof (struct locale_collate_t)); + + /* Init the various data structures. */ + init_hash (&collate->elem_table, 100); + init_hash (&collate->sym_table, 100); + init_hash (&collate->seq_table, 500); + obstack_init (&collate->mempool); + + collate->col_weight_max = -1; + } + else + /* Reuse the copy_locale's data structures. */ + collate = locale->categories[LC_COLLATE].collate = + copy_locale->categories[LC_COLLATE].collate; + } + + ldfile->translate_strings = 0; + ldfile->return_widestr = 0; +} + + +void +collate_finish (struct localedef_t *locale, const struct charmap_t *charmap) +{ + /* Now is the time when we can assign the individual collation + values for all the symbols. We have possibly different values + for the wide- and the multibyte-character symbols. This is done + since it might make a difference in the encoding if there is in + some cases no multibyte-character but there are wide-characters. + (The other way around it is not important since theencoded + collation value in the wide-character case is 32 bits wide and + therefore requires no encoding). + + The lowest collation value assigned is 2. Zero is reserved for + the NUL byte terminating the strings in the `strxfrm'/`wcsxfrm' + functions and 1 is used to separate the individual passes for the + different rules. + + We also have to construct is list with all the bytes/words which + can come first in a sequence, followed by all the elements which + also start with this byte/word. The order is reverse which has + among others the important effect that longer strings are located + first in the list. This is required for the output data since + the algorithm used in `strcoll' etc depends on this. + + The multibyte case is easy. We simply sort into an array with + 256 elements. */ + struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate; + int mbact[nrules]; + int wcact; + int mbseqact; + int wcseqact; + struct element_t *runp; + int i; + int need_undefined = 0; + struct section_list *sect; + int ruleidx; + int nr_wide_elems = 0; + + if (collate == NULL) + { + /* No data, no check. */ + if (! be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("No definition for %s category found"), + "LC_COLLATE")); + return; + } + + /* If this assertion is hit change the type in `element_t'. */ + assert (nrules <= sizeof (runp->used_in_level) * 8); + + /* Make sure that the `position' rule is used either in all sections + or in none. */ + for (i = 0; i < nrules; ++i) + for (sect = collate->sections; sect != NULL; sect = sect->next) + if (sect != collate->current_section + && sect->rules != NULL + && ((sect->rules[i] & sort_position) + != (collate->current_section->rules[i] & sort_position))) + { + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: `position' must be used for a specific level in all sections or none"), + "LC_COLLATE")); + break; + } + + /* Find out which elements are used at which level. At the same + time we find out whether we have any undefined symbols. */ + runp = collate->start; + while (runp != NULL) + { + if (runp->mbs != NULL) + { + for (i = 0; i < nrules; ++i) + { + int j; + + for (j = 0; j < runp->weights[i].cnt; ++j) + /* A NULL pointer as the weight means IGNORE. */ + if (runp->weights[i].w[j] != NULL) + { + if (runp->weights[i].w[j]->weights == NULL) + { + WITH_CUR_LOCALE (error_at_line (0, 0, runp->file, + runp->line, + _("symbol `%s' not defined"), + runp->weights[i].w[j]->name)); + + need_undefined = 1; + runp->weights[i].w[j] = &collate->undefined; + } + else + /* Set the bit for the level. */ + runp->weights[i].w[j]->used_in_level |= 1 << i; + } + } + } + + /* Up to the next entry. */ + runp = runp->next; + } + + /* Walk through the list of defined sequences and assign weights. Also + create the data structure which will allow generating the single byte + character based tables. + + Since at each time only the weights for each of the rules are + only compared to other weights for this rule it is possible to + assign more compact weight values than simply counting all + weights in sequence. We can assign weights from 3, one for each + rule individually and only for those elements, which are actually + used for this rule. + + Why is this important? It is not for the wide char table. But + it is for the singlebyte output since here larger numbers have to + be encoded to make it possible to emit the value as a byte + string. */ + for (i = 0; i < nrules; ++i) + mbact[i] = 2; + wcact = 2; + mbseqact = 0; + wcseqact = 0; + runp = collate->start; + while (runp != NULL) + { + /* Determine the order. */ + if (runp->used_in_level != 0) + { + runp->mborder = (int *) obstack_alloc (&collate->mempool, + nrules * sizeof (int)); + + for (i = 0; i < nrules; ++i) + if ((runp->used_in_level & (1 << i)) != 0) + runp->mborder[i] = mbact[i]++; + else + runp->mborder[i] = 0; + } + + if (runp->mbs != NULL) + { + struct element_t **eptr; + struct element_t *lastp = NULL; + + /* Find the point where to insert in the list. */ + eptr = &collate->mbheads[((unsigned char *) runp->mbs)[0]]; + while (*eptr != NULL) + { + if ((*eptr)->nmbs < runp->nmbs) + break; + + if ((*eptr)->nmbs == runp->nmbs) + { + int c = memcmp ((*eptr)->mbs, runp->mbs, runp->nmbs); + + if (c == 0) + { + /* This should not happen. It means that we have + to symbols with the same byte sequence. It is + of course an error. */ + WITH_CUR_LOCALE (error_at_line (0, 0, (*eptr)->file, + (*eptr)->line, + _("\ +symbol `%s' has the same encoding as"), (*eptr)->name); + error_at_line (0, 0, runp->file, + runp->line, + _("symbol `%s'"), + runp->name)); + goto dont_insert; + } + else if (c < 0) + /* Insert it here. */ + break; + } + + /* To the next entry. */ + lastp = *eptr; + eptr = &(*eptr)->mbnext; + } + + /* Set the pointers. */ + runp->mbnext = *eptr; + runp->mblast = lastp; + if (*eptr != NULL) + (*eptr)->mblast = runp; + *eptr = runp; + dont_insert: + ; + } + + if (runp->used_in_level) + { + runp->wcorder = wcact++; + + /* We take the opportunity to count the elements which have + wide characters. */ + ++nr_wide_elems; + } + + if (runp->is_character) + { + if (runp->nmbs == 1) + collate->mbseqorder[((unsigned char *) runp->mbs)[0]] = mbseqact++; + + runp->wcseqorder = wcseqact++; + } + else if (runp->mbs != NULL && runp->weights != NULL) + /* This is for collation elements. */ + runp->wcseqorder = wcseqact++; + + /* Up to the next entry. */ + runp = runp->next; + } + + /* Find out whether any of the `mbheads' entries is unset. In this + case we use the UNDEFINED entry. */ + for (i = 1; i < 256; ++i) + if (collate->mbheads[i] == NULL) + { + need_undefined = 1; + collate->mbheads[i] = &collate->undefined; + } + + /* Now to the wide character case. */ + collate->wcheads.p = 6; + collate->wcheads.q = 10; + wchead_table_init (&collate->wcheads); + + collate->wcseqorder.p = 6; + collate->wcseqorder.q = 10; + collseq_table_init (&collate->wcseqorder); + + /* Start adding. */ + runp = collate->start; + while (runp != NULL) + { + if (runp->wcs != NULL) + { + struct element_t *e; + struct element_t **eptr; + struct element_t *lastp; + + /* Insert the collation sequence value. */ + if (runp->is_character) + collseq_table_add (&collate->wcseqorder, runp->wcs[0], + runp->wcseqorder); + + /* Find the point where to insert in the list. */ + e = wchead_table_get (&collate->wcheads, runp->wcs[0]); + eptr = &e; + lastp = NULL; + while (*eptr != NULL) + { + if ((*eptr)->nwcs < runp->nwcs) + break; + + if ((*eptr)->nwcs == runp->nwcs) + { + int c = wmemcmp ((wchar_t *) (*eptr)->wcs, + (wchar_t *) runp->wcs, runp->nwcs); + + if (c == 0) + { + /* This should not happen. It means that we have + two symbols with the same byte sequence. It is + of course an error. */ + WITH_CUR_LOCALE (error_at_line (0, 0, (*eptr)->file, + (*eptr)->line, + _("\ +symbol `%s' has the same encoding as"), (*eptr)->name); + error_at_line (0, 0, runp->file, + runp->line, + _("symbol `%s'"), + runp->name)); + goto dont_insertwc; + } + else if (c < 0) + /* Insert it here. */ + break; + } + + /* To the next entry. */ + lastp = *eptr; + eptr = &(*eptr)->wcnext; + } + + /* Set the pointers. */ + runp->wcnext = *eptr; + runp->wclast = lastp; + if (*eptr != NULL) + (*eptr)->wclast = runp; + *eptr = runp; + if (eptr == &e) + wchead_table_add (&collate->wcheads, runp->wcs[0], e); + dont_insertwc: + ; + } + + /* Up to the next entry. */ + runp = runp->next; + } + + /* Now determine whether the UNDEFINED entry is needed and if yes, + whether it was defined. */ + collate->undefined.used_in_level = need_undefined ? ~0ul : 0; + if (collate->undefined.file == NULL) + { + if (need_undefined) + { + /* This seems not to be enforced by recent standards. Don't + emit an error, simply append UNDEFINED at the end. */ + if (0) + WITH_CUR_LOCALE (error (0, 0, _("no definition of `UNDEFINED'"))); + + /* Add UNDEFINED at the end. */ + collate->undefined.mborder = + (int *) obstack_alloc (&collate->mempool, nrules * sizeof (int)); + + for (i = 0; i < nrules; ++i) + collate->undefined.mborder[i] = mbact[i]++; + } + + /* In any case we will need the definition for the wide character + case. But we will not complain that it is missing since the + specification strangely enough does not seem to account for + this. */ + collate->undefined.wcorder = wcact++; + } + + /* Finally, try to unify the rules for the sections. Whenever the rules + for a section are the same as those for another section give the + ruleset the same index. Since there are never many section we can + use an O(n^2) algorithm here. */ + sect = collate->sections; + while (sect != NULL && sect->rules == NULL) + sect = sect->next; + + /* Bail out if we have no sections because of earlier errors. */ + if (sect == NULL) + { + WITH_CUR_LOCALE (error (EXIT_FAILURE, 0, + _("too many errors; giving up"))); + return; + } + + ruleidx = 0; + do + { + struct section_list *osect = collate->sections; + + while (osect != sect) + if (osect->rules != NULL + && memcmp (osect->rules, sect->rules, + nrules * sizeof (osect->rules[0])) == 0) + break; + else + osect = osect->next; + + if (osect == sect) + sect->ruleidx = ruleidx++; + else + sect->ruleidx = osect->ruleidx; + + /* Next section. */ + do + sect = sect->next; + while (sect != NULL && sect->rules == NULL); + } + while (sect != NULL); + /* We are currently not prepared for more than 128 rulesets. But this + should never really be a problem. */ + assert (ruleidx <= 128); +} + + +static int32_t +output_weight (struct obstack *pool, struct locale_collate_t *collate, + struct element_t *elem) +{ + size_t cnt; + int32_t retval; + + /* Optimize the use of UNDEFINED. */ + if (elem == &collate->undefined) + /* The weights are already inserted. */ + return 0; + + /* This byte can start exactly one collation element and this is + a single byte. We can directly give the index to the weights. */ + retval = obstack_object_size (pool); + + /* Construct the weight. */ + for (cnt = 0; cnt < nrules; ++cnt) + { + char buf[elem->weights[cnt].cnt * 7]; + int len = 0; + int i; + + for (i = 0; i < elem->weights[cnt].cnt; ++i) + /* Encode the weight value. We do nothing for IGNORE entries. */ + if (elem->weights[cnt].w[i] != NULL) + len += utf8_encode (&buf[len], + elem->weights[cnt].w[i]->mborder[cnt]); + + /* And add the buffer content. */ + obstack_1grow (pool, len); + obstack_grow (pool, buf, len); + } + + return retval | ((elem->section->ruleidx & 0x7f) << 24); +} + + +static int32_t +output_weightwc (struct obstack *pool, struct locale_collate_t *collate, + struct element_t *elem) +{ + size_t cnt; + int32_t retval; + + /* Optimize the use of UNDEFINED. */ + if (elem == &collate->undefined) + /* The weights are already inserted. */ + return 0; + + /* This byte can start exactly one collation element and this is + a single byte. We can directly give the index to the weights. */ + retval = obstack_object_size (pool) / sizeof (int32_t); + + /* Construct the weight. */ + for (cnt = 0; cnt < nrules; ++cnt) + { + int32_t buf[elem->weights[cnt].cnt]; + int i; + int32_t j; + + for (i = 0, j = 0; i < elem->weights[cnt].cnt; ++i) + if (elem->weights[cnt].w[i] != NULL) + buf[j++] = elem->weights[cnt].w[i]->wcorder; + + /* And add the buffer content. */ + obstack_int32_grow (pool, j); + + obstack_grow (pool, buf, j * sizeof (int32_t)); + maybe_swap_uint32_obstack (pool, j); + } + + return retval | ((elem->section->ruleidx & 0x7f) << 24); +} + +/* If localedef is every threaded, this would need to be __thread var. */ +static struct +{ + struct obstack *weightpool; + struct obstack *extrapool; + struct obstack *indpool; + struct locale_collate_t *collate; + struct collidx_table *tablewc; +} atwc; + +static void add_to_tablewc (uint32_t ch, struct element_t *runp); + +static void +add_to_tablewc (uint32_t ch, struct element_t *runp) +{ + if (runp->wcnext == NULL && runp->nwcs == 1) + { + int32_t weigthidx = output_weightwc (atwc.weightpool, atwc.collate, + runp); + collidx_table_add (atwc.tablewc, ch, weigthidx); + } + else + { + /* As for the singlebyte table, we recognize sequences and + compress them. */ + + collidx_table_add (atwc.tablewc, ch, + -(obstack_object_size (atwc.extrapool) + / sizeof (uint32_t))); + + do + { + /* Store the current index in the weight table. We know that + the current position in the `extrapool' is aligned on a + 32-bit address. */ + int32_t weightidx; + int added; + + /* Find out wether this is a single entry or we have more than + one consecutive entry. */ + if (runp->wcnext != NULL + && runp->nwcs == runp->wcnext->nwcs + && wmemcmp ((wchar_t *) runp->wcs, + (wchar_t *)runp->wcnext->wcs, + runp->nwcs - 1) == 0 + && (runp->wcs[runp->nwcs - 1] + == runp->wcnext->wcs[runp->nwcs - 1] + 1)) + { + int i; + struct element_t *series_startp = runp; + struct element_t *curp; + + /* Now add first the initial byte sequence. */ + added = (1 + 1 + 2 * (runp->nwcs - 1)) * sizeof (int32_t); + if (sizeof (int32_t) == sizeof (int)) + obstack_make_room (atwc.extrapool, added); + + /* More than one consecutive entry. We mark this by having + a negative index into the indirect table. */ + obstack_int32_grow_fast (atwc.extrapool, + -(obstack_object_size (atwc.indpool) + / sizeof (int32_t))); + obstack_int32_grow_fast (atwc.extrapool, runp->nwcs - 1); + + do + runp = runp->wcnext; + while (runp->wcnext != NULL + && runp->nwcs == runp->wcnext->nwcs + && wmemcmp ((wchar_t *) runp->wcs, + (wchar_t *)runp->wcnext->wcs, + runp->nwcs - 1) == 0 + && (runp->wcs[runp->nwcs - 1] + == runp->wcnext->wcs[runp->nwcs - 1] + 1)); + + /* Now walk backward from here to the beginning. */ + curp = runp; + + for (i = 1; i < runp->nwcs; ++i) + obstack_int32_grow_fast (atwc.extrapool, curp->wcs[i]); + + /* Now find the end of the consecutive sequence and + add all the indeces in the indirect pool. */ + do + { + weightidx = output_weightwc (atwc.weightpool, atwc.collate, + curp); + obstack_int32_grow (atwc.indpool, weightidx); + + curp = curp->wclast; + } + while (curp != series_startp); + + /* Add the final weight. */ + weightidx = output_weightwc (atwc.weightpool, atwc.collate, + curp); + obstack_int32_grow (atwc.indpool, weightidx); + + /* And add the end byte sequence. Without length this + time. */ + for (i = 1; i < curp->nwcs; ++i) + obstack_int32_grow (atwc.extrapool, curp->wcs[i]); + } + else + { + /* A single entry. Simply add the index and the length and + string (except for the first character which is already + tested for). */ + int i; + + /* Output the weight info. */ + weightidx = output_weightwc (atwc.weightpool, atwc.collate, + runp); + + assert (runp->nwcs > 0); + added = (1 + 1 + runp->nwcs - 1) * sizeof (int32_t); + if (sizeof (int) == sizeof (int32_t)) + obstack_make_room (atwc.extrapool, added); + + obstack_int32_grow_fast (atwc.extrapool, weightidx); + obstack_int32_grow_fast (atwc.extrapool, runp->nwcs - 1); + for (i = 1; i < runp->nwcs; ++i) + obstack_int32_grow_fast (atwc.extrapool, runp->wcs[i]); + } + + /* Next entry. */ + runp = runp->wcnext; + } + while (runp != NULL); + } +} + +void +collate_output (struct localedef_t *locale, const struct charmap_t *charmap, + const char *output_path) +{ + struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate; + const size_t nelems = _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE); + struct locale_file file; + size_t ch; + int32_t tablemb[256]; + struct obstack weightpool; + struct obstack extrapool; + struct obstack indirectpool; + struct section_list *sect; + struct collidx_table tablewc; + uint32_t elem_size; + uint32_t *elem_table; + int i; + struct element_t *runp; + + init_locale_data (&file, nelems); + add_locale_uint32 (&file, nrules); + + /* If we have no LC_COLLATE data emit only the number of rules as zero. */ + if (collate == NULL) + { + size_t idx; + for (idx = 1; idx < nelems; idx++) + { + /* The words have to be handled specially. */ + if (idx == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB)) + add_locale_uint32 (&file, 0); + else + add_locale_empty (&file); + } + write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", &file); + return; + } + + obstack_init (&weightpool); + obstack_init (&extrapool); + obstack_init (&indirectpool); + + /* Since we are using the sign of an integer to mark indirection the + offsets in the arrays we are indirectly referring to must not be + zero since -0 == 0. Therefore we add a bit of dummy content. */ + obstack_int32_grow (&extrapool, 0); + obstack_int32_grow (&indirectpool, 0); + + /* Prepare the ruleset table. */ + for (sect = collate->sections, i = 0; sect != NULL; sect = sect->next) + if (sect->rules != NULL && sect->ruleidx == i) + { + int j; + + obstack_make_room (&weightpool, nrules); + + for (j = 0; j < nrules; ++j) + obstack_1grow_fast (&weightpool, sect->rules[j]); + ++i; + } + /* And align the output. */ + i = (nrules * i) % LOCFILE_ALIGN; + if (i > 0) + do + obstack_1grow (&weightpool, '\0'); + while (++i < LOCFILE_ALIGN); + + add_locale_raw_obstack (&file, &weightpool); + + /* Generate the 8-bit table. Walk through the lists of sequences + starting with the same byte and add them one after the other to + the table. In case we have more than one sequence starting with + the same byte we have to use extra indirection. + + First add a record for the NUL byte. This entry will never be used + so it does not matter. */ + tablemb[0] = 0; + + /* Now insert the `UNDEFINED' value if it is used. Since this value + will probably be used more than once it is good to store the + weights only once. */ + if (collate->undefined.used_in_level != 0) + output_weight (&weightpool, collate, &collate->undefined); + + for (ch = 1; ch < 256; ++ch) + if (collate->mbheads[ch]->mbnext == NULL + && collate->mbheads[ch]->nmbs <= 1) + { + tablemb[ch] = output_weight (&weightpool, collate, + collate->mbheads[ch]); + } + else + { + /* The entries in the list are sorted by length and then + alphabetically. This is the order in which we will add the + elements to the collation table. This allows simply walking + the table in sequence and stopping at the first matching + entry. Since the longer sequences are coming first in the + list they have the possibility to match first, just as it + has to be. In the worst case we are walking to the end of + the list where we put, if no singlebyte sequence is defined + in the locale definition, the weights for UNDEFINED. + + To reduce the length of the search list we compress them a bit. + This happens by collecting sequences of consecutive byte + sequences in one entry (having and begin and end byte sequence) + and add only one index into the weight table. We can find the + consecutive entries since they are also consecutive in the list. */ + struct element_t *runp = collate->mbheads[ch]; + struct element_t *lastp; + + assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool))); + + tablemb[ch] = -obstack_object_size (&extrapool); + + do + { + /* Store the current index in the weight table. We know that + the current position in the `extrapool' is aligned on a + 32-bit address. */ + int32_t weightidx; + int added; + + /* Find out wether this is a single entry or we have more than + one consecutive entry. */ + if (runp->mbnext != NULL + && runp->nmbs == runp->mbnext->nmbs + && memcmp (runp->mbs, runp->mbnext->mbs, runp->nmbs - 1) == 0 + && (runp->mbs[runp->nmbs - 1] + == runp->mbnext->mbs[runp->nmbs - 1] + 1)) + { + int i; + struct element_t *series_startp = runp; + struct element_t *curp; + + /* Compute how much space we will need. */ + added = LOCFILE_ALIGN_UP (sizeof (int32_t) + 1 + + 2 * (runp->nmbs - 1)); + assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool))); + obstack_make_room (&extrapool, added); + + /* More than one consecutive entry. We mark this by having + a negative index into the indirect table. */ + obstack_int32_grow_fast (&extrapool, + -(obstack_object_size (&indirectpool) + / sizeof (int32_t))); + + /* Now search first the end of the series. */ + do + runp = runp->mbnext; + while (runp->mbnext != NULL + && runp->nmbs == runp->mbnext->nmbs + && memcmp (runp->mbs, runp->mbnext->mbs, + runp->nmbs - 1) == 0 + && (runp->mbs[runp->nmbs - 1] + == runp->mbnext->mbs[runp->nmbs - 1] + 1)); + + /* Now walk backward from here to the beginning. */ + curp = runp; + + assert (runp->nmbs <= 256); + obstack_1grow_fast (&extrapool, curp->nmbs - 1); + for (i = 1; i < curp->nmbs; ++i) + obstack_1grow_fast (&extrapool, curp->mbs[i]); + + /* Now find the end of the consecutive sequence and + add all the indeces in the indirect pool. */ + do + { + weightidx = output_weight (&weightpool, collate, curp); + obstack_int32_grow (&indirectpool, weightidx); + + curp = curp->mblast; + } + while (curp != series_startp); + + /* Add the final weight. */ + weightidx = output_weight (&weightpool, collate, curp); + obstack_int32_grow (&indirectpool, weightidx); + + /* And add the end byte sequence. Without length this + time. */ + for (i = 1; i < curp->nmbs; ++i) + obstack_1grow_fast (&extrapool, curp->mbs[i]); + } + else + { + /* A single entry. Simply add the index and the length and + string (except for the first character which is already + tested for). */ + int i; + + /* Output the weight info. */ + weightidx = output_weight (&weightpool, collate, runp); + + added = LOCFILE_ALIGN_UP (sizeof (int32_t) + 1 + + runp->nmbs - 1); + assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool))); + obstack_make_room (&extrapool, added); + + obstack_int32_grow_fast (&extrapool, weightidx); + assert (runp->nmbs <= 256); + obstack_1grow_fast (&extrapool, runp->nmbs - 1); + + for (i = 1; i < runp->nmbs; ++i) + obstack_1grow_fast (&extrapool, runp->mbs[i]); + } + + /* Add alignment bytes if necessary. */ + while (!LOCFILE_ALIGNED_P (obstack_object_size (&extrapool))) + obstack_1grow_fast (&extrapool, '\0'); + + /* Next entry. */ + lastp = runp; + runp = runp->mbnext; + } + while (runp != NULL); + + assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool))); + + /* If the final entry in the list is not a single character we + add an UNDEFINED entry here. */ + if (lastp->nmbs != 1) + { + int added = LOCFILE_ALIGN_UP (sizeof (int32_t) + 1 + 1); + obstack_make_room (&extrapool, added); + + obstack_int32_grow_fast (&extrapool, 0); + /* XXX What rule? We just pick the first. */ + obstack_1grow_fast (&extrapool, 0); + /* Length is zero. */ + obstack_1grow_fast (&extrapool, 0); + + /* Add alignment bytes if necessary. */ + while (!LOCFILE_ALIGNED_P (obstack_object_size (&extrapool))) + obstack_1grow_fast (&extrapool, '\0'); + } + } + + /* Add padding to the tables if necessary. */ + while (!LOCFILE_ALIGNED_P (obstack_object_size (&weightpool))) + obstack_1grow (&weightpool, 0); + + /* Now add the four tables. */ + add_locale_uint32_array (&file, (const uint32_t *) tablemb, 256); + add_locale_raw_obstack (&file, &weightpool); + add_locale_raw_obstack (&file, &extrapool); + add_locale_raw_obstack (&file, &indirectpool); + + /* Now the same for the wide character table. We need to store some + more information here. */ + add_locale_empty (&file); + add_locale_empty (&file); + add_locale_empty (&file); + + /* Since we are using the sign of an integer to mark indirection the + offsets in the arrays we are indirectly referring to must not be + zero since -0 == 0. Therefore we add a bit of dummy content. */ + obstack_int32_grow (&extrapool, 0); + obstack_int32_grow (&indirectpool, 0); + + /* Now insert the `UNDEFINED' value if it is used. Since this value + will probably be used more than once it is good to store the + weights only once. */ + if (output_weightwc (&weightpool, collate, &collate->undefined) != 0) + abort (); + + /* Generate the table. Walk through the lists of sequences starting + with the same wide character and add them one after the other to + the table. In case we have more than one sequence starting with + the same byte we have to use extra indirection. */ + tablewc.p = 6; + tablewc.q = 10; + collidx_table_init (&tablewc); + + atwc.weightpool = &weightpool; + atwc.extrapool = &extrapool; + atwc.indpool = &indirectpool; + atwc.collate = collate; + atwc.tablewc = &tablewc; + + wchead_table_iterate (&collate->wcheads, add_to_tablewc); + + memset (&atwc, 0, sizeof (atwc)); + + /* Now add the four tables. */ + add_locale_collidx_table (&file, &tablewc); + add_locale_raw_obstack (&file, &weightpool); + add_locale_raw_obstack (&file, &extrapool); + add_locale_raw_obstack (&file, &indirectpool); + + /* Finally write the table with collation element names out. It is + a hash table with a simple function which gets the name of the + character as the input. One character might have many names. The + value associated with the name is an index into the weight table + where we are then interested in the first-level weight value. + + To determine how large the table should be we are counting the + elements have to put in. Since we are using internal chaining + using a secondary hash function we have to make the table a bit + larger to avoid extremely long search times. We can achieve + good results with a 40% larger table than there are entries. */ + elem_size = 0; + runp = collate->start; + while (runp != NULL) + { + if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character) + /* Yep, the element really counts. */ + ++elem_size; + + runp = runp->next; + } + /* Add 40% and find the next prime number. */ + elem_size = next_prime (elem_size * 1.4); + + /* Allocate the table. Each entry consists of two words: the hash + value and an index in a secondary table which provides the index + into the weight table and the string itself (so that a match can + be determined). */ + elem_table = (uint32_t *) obstack_alloc (&extrapool, + elem_size * 2 * sizeof (uint32_t)); + memset (elem_table, '\0', elem_size * 2 * sizeof (uint32_t)); + + /* Now add the elements. */ + runp = collate->start; + while (runp != NULL) + { + if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character) + { + /* Compute the hash value of the name. */ + uint32_t namelen = strlen (runp->name); + uint32_t hash = elem_hash (runp->name, namelen); + size_t idx = hash % elem_size; +#ifndef NDEBUG + size_t start_idx = idx; +#endif + + if (elem_table[idx * 2] != 0) + { + /* The spot is already taken. Try iterating using the value + from the secondary hashing function. */ + size_t iter = hash % (elem_size - 2) + 1; + + do + { + idx += iter; + if (idx >= elem_size) + idx -= elem_size; + assert (idx != start_idx); + } + while (elem_table[idx * 2] != 0); + } + /* This is the spot where we will insert the value. */ + elem_table[idx * 2] = hash; + elem_table[idx * 2 + 1] = obstack_object_size (&extrapool); + + /* The string itself including length. */ + obstack_1grow (&extrapool, namelen); + obstack_grow (&extrapool, runp->name, namelen); + + /* And the multibyte representation. */ + obstack_1grow (&extrapool, runp->nmbs); + obstack_grow (&extrapool, runp->mbs, runp->nmbs); + + /* And align again to 32 bits. */ + if ((1 + namelen + 1 + runp->nmbs) % sizeof (int32_t) != 0) + obstack_grow (&extrapool, "\0\0", + (sizeof (int32_t) + - ((1 + namelen + 1 + runp->nmbs) + % sizeof (int32_t)))); + + /* Now some 32-bit values: multibyte collation sequence, + wide char string (including length), and wide char + collation sequence. */ + obstack_int32_grow (&extrapool, runp->mbseqorder); + + obstack_int32_grow (&extrapool, runp->nwcs); + obstack_grow (&extrapool, runp->wcs, + runp->nwcs * sizeof (uint32_t)); + maybe_swap_uint32_obstack (&extrapool, runp->nwcs); + + obstack_int32_grow (&extrapool, runp->wcseqorder); + } + + runp = runp->next; + } + + /* Prepare to write out this data. */ + add_locale_uint32 (&file, elem_size); + add_locale_uint32_array (&file, elem_table, 2 * elem_size); + add_locale_raw_obstack (&file, &extrapool); + add_locale_raw_data (&file, collate->mbseqorder, 256); + add_locale_collseq_table (&file, &collate->wcseqorder); + add_locale_string (&file, charmap->code_set_name); + write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", &file); + + obstack_free (&weightpool, NULL); + obstack_free (&extrapool, NULL); + obstack_free (&indirectpool, NULL); +} + + +static enum token_t +skip_to (struct linereader *ldfile, struct locale_collate_t *collate, + const struct charmap_t *charmap, int to_endif) +{ + while (1) + { + struct token *now = lr_token (ldfile, charmap, NULL, NULL, 0); + enum token_t nowtok = now->tok; + + if (nowtok == tok_eof || nowtok == tok_end) + return nowtok; + + if (nowtok == tok_ifdef || nowtok == tok_ifndef) + { + lr_error (ldfile, _("%s: nested conditionals not supported"), + "LC_COLLATE"); + nowtok = skip_to (ldfile, collate, charmap, tok_endif); + if (nowtok == tok_eof || nowtok == tok_end) + return nowtok; + } + else if (nowtok == tok_endif || (!to_endif && nowtok == tok_else)) + { + lr_ignore_rest (ldfile, 1); + return nowtok; + } + else if (!to_endif && (nowtok == tok_elifdef || nowtok == tok_elifndef)) + { + /* Do not read the rest of the line. */ + return nowtok; + } + else if (nowtok == tok_else) + { + lr_error (ldfile, _("%s: more than one 'else'"), "LC_COLLATE"); + } + + lr_ignore_rest (ldfile, 0); + } +} + + +void +collate_read (struct linereader *ldfile, struct localedef_t *result, + const struct charmap_t *charmap, const char *repertoire_name, + int ignore_content) +{ + struct repertoire_t *repertoire = NULL; + struct locale_collate_t *collate; + struct token *now; + struct token *arg = NULL; + enum token_t nowtok; + enum token_t was_ellipsis = tok_none; + struct localedef_t *copy_locale = NULL; + /* Parsing state: + 0 - start + 1 - between `order-start' and `order-end' + 2 - after `order-end' + 3 - after `reorder-after', waiting for `reorder-end' + 4 - after `reorder-end' + 5 - after `reorder-sections-after', waiting for `reorder-sections-end' + 6 - after `reorder-sections-end' + */ + int state = 0; + + /* Get the repertoire we have to use. */ + if (repertoire_name != NULL) + repertoire = repertoire_read (repertoire_name); + + /* The rest of the line containing `LC_COLLATE' must be free. */ + lr_ignore_rest (ldfile, 1); + + while (1) + { + do + { + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + } + while (nowtok == tok_eol); + + if (nowtok != tok_define) + break; + + if (ignore_content) + lr_ignore_rest (ldfile, 0); + else + { + arg = lr_token (ldfile, charmap, result, NULL, verbose); + if (arg->tok != tok_ident) + SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE"); + else + { + /* Simply add the new symbol. */ + struct name_list *newsym = xmalloc (sizeof (*newsym) + + arg->val.str.lenmb + 1); + memcpy (newsym->str, arg->val.str.startmb, arg->val.str.lenmb); + newsym->str[arg->val.str.lenmb] = '\0'; + newsym->next = defined; + defined = newsym; + + lr_ignore_rest (ldfile, 1); + } + } + } + + if (nowtok == tok_copy) + { + now = lr_token (ldfile, charmap, result, NULL, verbose); + if (now->tok != tok_string) + { + SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE"); + + skip_category: + do + now = lr_token (ldfile, charmap, result, NULL, verbose); + while (now->tok != tok_eof && now->tok != tok_end); + + if (now->tok != tok_eof + || (now = lr_token (ldfile, charmap, result, NULL, verbose), + now->tok == tok_eof)) + lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE"); + else if (now->tok != tok_lc_collate) + { + lr_error (ldfile, _("\ +%1$s: definition does not end with `END %1$s'"), "LC_COLLATE"); + lr_ignore_rest (ldfile, 0); + } + else + lr_ignore_rest (ldfile, 1); + + return; + } + + if (! ignore_content) + { + /* Get the locale definition. */ + copy_locale = load_locale (LC_COLLATE, now->val.str.startmb, + repertoire_name, charmap, NULL); + if ((copy_locale->avail & COLLATE_LOCALE) == 0) + { + /* Not yet loaded. So do it now. */ + if (locfile_read (copy_locale, charmap) != 0) + goto skip_category; + } + + if (copy_locale->categories[LC_COLLATE].collate == NULL) + return; + } + + lr_ignore_rest (ldfile, 1); + + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + } + + /* Prepare the data structures. */ + collate_startup (ldfile, result, copy_locale, ignore_content); + collate = result->categories[LC_COLLATE].collate; + + while (1) + { + char ucs4buf[10]; + char *symstr; + size_t symlen; + + /* Of course we don't proceed beyond the end of file. */ + if (nowtok == tok_eof) + break; + + /* Ingore empty lines. */ + if (nowtok == tok_eol) + { + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + continue; + } + + switch (nowtok) + { + case tok_copy: + /* Allow copying other locales. */ + now = lr_token (ldfile, charmap, result, NULL, verbose); + if (now->tok != tok_string) + goto err_label; + + if (! ignore_content) + load_locale (LC_COLLATE, now->val.str.startmb, repertoire_name, + charmap, result); + + lr_ignore_rest (ldfile, 1); + break; + + case tok_coll_weight_max: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + if (state != 0) + goto err_label; + + arg = lr_token (ldfile, charmap, result, NULL, verbose); + if (arg->tok != tok_number) + goto err_label; + if (collate->col_weight_max != -1) + lr_error (ldfile, _("%s: duplicate definition of `%s'"), + "LC_COLLATE", "col_weight_max"); + else + collate->col_weight_max = arg->val.num; + lr_ignore_rest (ldfile, 1); + break; + + case tok_section_symbol: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + if (state != 0) + goto err_label; + + arg = lr_token (ldfile, charmap, result, repertoire, verbose); + if (arg->tok != tok_bsymbol) + goto err_label; + else if (!ignore_content) + { + /* Check whether this section is already known. */ + struct section_list *known = collate->sections; + while (known != NULL) + { + if (strcmp (known->name, arg->val.str.startmb) == 0) + break; + known = known->next; + } + + if (known != NULL) + { + lr_error (ldfile, + _("%s: duplicate declaration of section `%s'"), + "LC_COLLATE", arg->val.str.startmb); + free (arg->val.str.startmb); + } + else + collate->sections = make_seclist_elem (collate, + arg->val.str.startmb, + collate->sections); + + lr_ignore_rest (ldfile, known == NULL); + } + else + { + free (arg->val.str.startmb); + lr_ignore_rest (ldfile, 0); + } + break; + + case tok_collating_element: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + if (state != 0 && state != 2) + goto err_label; + + arg = lr_token (ldfile, charmap, result, repertoire, verbose); + if (arg->tok != tok_bsymbol) + goto err_label; + else + { + const char *symbol = arg->val.str.startmb; + size_t symbol_len = arg->val.str.lenmb; + + /* Next the `from' keyword. */ + arg = lr_token (ldfile, charmap, result, repertoire, verbose); + if (arg->tok != tok_from) + { + free ((char *) symbol); + goto err_label; + } + + ldfile->return_widestr = 1; + ldfile->translate_strings = 1; + + /* Finally the string with the replacement. */ + arg = lr_token (ldfile, charmap, result, repertoire, verbose); + + ldfile->return_widestr = 0; + ldfile->translate_strings = 0; + + if (arg->tok != tok_string) + goto err_label; + + if (!ignore_content && symbol != NULL) + { + /* The name is already defined. */ + if (check_duplicate (ldfile, collate, charmap, + repertoire, symbol, symbol_len)) + goto col_elem_free; + + if (arg->val.str.startmb != NULL) + insert_entry (&collate->elem_table, symbol, symbol_len, + new_element (collate, + arg->val.str.startmb, + arg->val.str.lenmb - 1, + arg->val.str.startwc, + symbol, symbol_len, 0)); + } + else + { + col_elem_free: + free ((char *) symbol); + free (arg->val.str.startmb); + free (arg->val.str.startwc); + } + lr_ignore_rest (ldfile, 1); + } + break; + + case tok_collating_symbol: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + if (state != 0 && state != 2) + goto err_label; + + arg = lr_token (ldfile, charmap, result, repertoire, verbose); + if (arg->tok != tok_bsymbol) + goto err_label; + else + { + char *symbol = arg->val.str.startmb; + size_t symbol_len = arg->val.str.lenmb; + char *endsymbol = NULL; + size_t endsymbol_len = 0; + enum token_t ellipsis = tok_none; + + arg = lr_token (ldfile, charmap, result, repertoire, verbose); + if (arg->tok == tok_ellipsis2 || arg->tok == tok_ellipsis4) + { + ellipsis = arg->tok; + + arg = lr_token (ldfile, charmap, result, repertoire, + verbose); + if (arg->tok != tok_bsymbol) + { + free (symbol); + goto err_label; + } + + endsymbol = arg->val.str.startmb; + endsymbol_len = arg->val.str.lenmb; + + lr_ignore_rest (ldfile, 1); + } + else if (arg->tok != tok_eol) + { + free (symbol); + goto err_label; + } + + if (!ignore_content) + { + if (symbol == NULL + || (ellipsis != tok_none && endsymbol == NULL)) + { + lr_error (ldfile, _("\ +%s: unknown character in collating symbol name"), + "LC_COLLATE"); + goto col_sym_free; + } + else if (ellipsis == tok_none) + { + /* A single symbol, no ellipsis. */ + if (check_duplicate (ldfile, collate, charmap, + repertoire, symbol, symbol_len)) + /* The name is already defined. */ + goto col_sym_free; + + insert_entry (&collate->sym_table, symbol, symbol_len, + new_symbol (collate, symbol, symbol_len)); + } + else if (symbol_len != endsymbol_len) + { + col_sym_inv_range: + lr_error (ldfile, + _("invalid names for character range")); + goto col_sym_free; + } + else + { + /* Oh my, we have to handle an ellipsis. First, as + usual, determine the common prefix and then + convert the rest into a range. */ + size_t prefixlen; + unsigned long int from; + unsigned long int to; + char *endp; + + for (prefixlen = 0; prefixlen < symbol_len; ++prefixlen) + if (symbol[prefixlen] != endsymbol[prefixlen]) + break; + + /* Convert the rest into numbers. */ + symbol[symbol_len] = '\0'; + from = strtoul (&symbol[prefixlen], &endp, + ellipsis == tok_ellipsis2 ? 16 : 10); + if (*endp != '\0') + goto col_sym_inv_range; + + endsymbol[symbol_len] = '\0'; + to = strtoul (&endsymbol[prefixlen], &endp, + ellipsis == tok_ellipsis2 ? 16 : 10); + if (*endp != '\0') + goto col_sym_inv_range; + + if (from > to) + goto col_sym_inv_range; + + /* Now loop over all entries. */ + while (from <= to) + { + char *symbuf; + + symbuf = (char *) obstack_alloc (&collate->mempool, + symbol_len + 1); + + /* Create the name. */ + sprintf (symbuf, + ellipsis == tok_ellipsis2 + ? "%.*s%.*lX" : "%.*s%.*lu", + (int) prefixlen, symbol, + (int) (symbol_len - prefixlen), from); + + if (check_duplicate (ldfile, collate, charmap, + repertoire, symbuf, symbol_len)) + /* The name is already defined. */ + goto col_sym_free; + + insert_entry (&collate->sym_table, symbuf, + symbol_len, + new_symbol (collate, symbuf, + symbol_len)); + + /* Increment the counter. */ + ++from; + } + + goto col_sym_free; + } + } + else + { + col_sym_free: + free (symbol); + free (endsymbol); + } + } + break; + + case tok_symbol_equivalence: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + if (state != 0) + goto err_label; + + arg = lr_token (ldfile, charmap, result, repertoire, verbose); + if (arg->tok != tok_bsymbol) + goto err_label; + else + { + const char *newname = arg->val.str.startmb; + size_t newname_len = arg->val.str.lenmb; + const char *symname; + size_t symname_len; + void *symval; /* Actually struct symbol_t* */ + + arg = lr_token (ldfile, charmap, result, repertoire, verbose); + if (arg->tok != tok_bsymbol) + { + free ((char *) newname); + goto err_label; + } + + symname = arg->val.str.startmb; + symname_len = arg->val.str.lenmb; + + if (newname == NULL) + { + lr_error (ldfile, _("\ +%s: unknown character in equivalent definition name"), + "LC_COLLATE"); + + sym_equiv_free: + free ((char *) newname); + free ((char *) symname); + break; + } + if (symname == NULL) + { + lr_error (ldfile, _("\ +%s: unknown character in equivalent definition value"), + "LC_COLLATE"); + goto sym_equiv_free; + } + + /* See whether the symbol name is already defined. */ + if (find_entry (&collate->sym_table, symname, symname_len, + &symval) != 0) + { + lr_error (ldfile, _("\ +%s: unknown symbol `%s' in equivalent definition"), + "LC_COLLATE", symname); + goto sym_equiv_free; + } + + if (insert_entry (&collate->sym_table, + newname, newname_len, symval) < 0) + { + lr_error (ldfile, _("\ +error while adding equivalent collating symbol")); + goto sym_equiv_free; + } + + free ((char *) symname); + } + lr_ignore_rest (ldfile, 1); + break; + + case tok_script: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + /* We get told about the scripts we know. */ + arg = lr_token (ldfile, charmap, result, repertoire, verbose); + if (arg->tok != tok_bsymbol) + goto err_label; + else + { + struct section_list *runp = collate->known_sections; + char *name; + + while (runp != NULL) + if (strncmp (runp->name, arg->val.str.startmb, + arg->val.str.lenmb) == 0 + && runp->name[arg->val.str.lenmb] == '\0') + break; + else + runp = runp->def_next; + + if (runp != NULL) + { + lr_error (ldfile, _("duplicate definition of script `%s'"), + runp->name); + lr_ignore_rest (ldfile, 0); + break; + } + + runp = (struct section_list *) xcalloc (1, sizeof (*runp)); + name = (char *) xmalloc (arg->val.str.lenmb + 1); + memcpy (name, arg->val.str.startmb, arg->val.str.lenmb); + name[arg->val.str.lenmb] = '\0'; + runp->name = name; + + runp->def_next = collate->known_sections; + collate->known_sections = runp; + } + lr_ignore_rest (ldfile, 1); + break; + + case tok_order_start: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + if (state != 0 && state != 1 && state != 2) + goto err_label; + state = 1; + + /* The 14652 draft does not specify whether all `order_start' lines + must contain the same number of sort-rules, but 14651 does. So + we require this here as well. */ + arg = lr_token (ldfile, charmap, result, repertoire, verbose); + if (arg->tok == tok_bsymbol) + { + /* This better should be a section name. */ + struct section_list *sp = collate->known_sections; + while (sp != NULL + && (sp->name == NULL + || strncmp (sp->name, arg->val.str.startmb, + arg->val.str.lenmb) != 0 + || sp->name[arg->val.str.lenmb] != '\0')) + sp = sp->def_next; + + if (sp == NULL) + { + lr_error (ldfile, _("\ +%s: unknown section name `%.*s'"), + "LC_COLLATE", (int) arg->val.str.lenmb, + arg->val.str.startmb); + /* We use the error section. */ + collate->current_section = &collate->error_section; + + if (collate->error_section.first == NULL) + { + /* Insert &collate->error_section at the end of + the collate->sections list. */ + if (collate->sections == NULL) + collate->sections = &collate->error_section; + else + { + sp = collate->sections; + while (sp->next != NULL) + sp = sp->next; + + sp->next = &collate->error_section; + } + collate->error_section.next = NULL; + } + } + else + { + /* One should not be allowed to open the same + section twice. */ + if (sp->first != NULL) + lr_error (ldfile, _("\ +%s: multiple order definitions for section `%s'"), + "LC_COLLATE", sp->name); + else + { + /* Insert sp in the collate->sections list, + right after collate->current_section. */ + if (collate->current_section != NULL) + { + sp->next = collate->current_section->next; + collate->current_section->next = sp; + } + else if (collate->sections == NULL) + /* This is the first section to be defined. */ + collate->sections = sp; + + collate->current_section = sp; + } + + /* Next should come the end of the line or a semicolon. */ + arg = lr_token (ldfile, charmap, result, repertoire, + verbose); + if (arg->tok == tok_eol) + { + uint32_t cnt; + + /* This means we have exactly one rule: `forward'. */ + if (nrules > 1) + lr_error (ldfile, _("\ +%s: invalid number of sorting rules"), + "LC_COLLATE"); + else + nrules = 1; + sp->rules = obstack_alloc (&collate->mempool, + (sizeof (enum coll_sort_rule) + * nrules)); + for (cnt = 0; cnt < nrules; ++cnt) + sp->rules[cnt] = sort_forward; + + /* Next line. */ + break; + } + + /* Get the next token. */ + arg = lr_token (ldfile, charmap, result, repertoire, + verbose); + } + } + else + { + /* There is no section symbol. Therefore we use the unnamed + section. */ + collate->current_section = &collate->unnamed_section; + + if (collate->unnamed_section_defined) + lr_error (ldfile, _("\ +%s: multiple order definitions for unnamed section"), + "LC_COLLATE"); + else + { + /* Insert &collate->unnamed_section at the beginning of + the collate->sections list. */ + collate->unnamed_section.next = collate->sections; + collate->sections = &collate->unnamed_section; + collate->unnamed_section_defined = true; + } + } + + /* Now read the direction names. */ + read_directions (ldfile, arg, charmap, repertoire, result); + + /* From now we need the strings untranslated. */ + ldfile->translate_strings = 0; + break; + + case tok_order_end: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + if (state != 1) + goto err_label; + + /* Handle ellipsis at end of list. */ + if (was_ellipsis != tok_none) + { + handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap, + repertoire, result); + was_ellipsis = tok_none; + } + + state = 2; + lr_ignore_rest (ldfile, 1); + break; + + case tok_reorder_after: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + if (state == 1) + { + lr_error (ldfile, _("%s: missing `order_end' keyword"), + "LC_COLLATE"); + state = 2; + + /* Handle ellipsis at end of list. */ + if (was_ellipsis != tok_none) + { + handle_ellipsis (ldfile, arg->val.str.startmb, + arg->val.str.lenmb, was_ellipsis, charmap, + repertoire, result); + was_ellipsis = tok_none; + } + } + else if (state == 0 && copy_locale == NULL) + goto err_label; + else if (state != 0 && state != 2 && state != 3) + goto err_label; + state = 3; + + arg = lr_token (ldfile, charmap, result, repertoire, verbose); + if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4) + { + /* Find this symbol in the sequence table. */ + char ucsbuf[10]; + char *startmb; + size_t lenmb; + struct element_t *insp; + int no_error = 1; + void *ptr; + + if (arg->tok == tok_bsymbol) + { + startmb = arg->val.str.startmb; + lenmb = arg->val.str.lenmb; + } + else + { + sprintf (ucsbuf, "U%08X", arg->val.ucs4); + startmb = ucsbuf; + lenmb = 9; + } + + if (find_entry (&collate->seq_table, startmb, lenmb, &ptr) == 0) + /* Yes, the symbol exists. Simply point the cursor + to it. */ + collate->cursor = (struct element_t *) ptr; + else + { + struct symbol_t *symbp; + void *ptr; + + if (find_entry (&collate->sym_table, startmb, lenmb, + &ptr) == 0) + { + symbp = ptr; + + if (symbp->order->last != NULL + || symbp->order->next != NULL) + collate->cursor = symbp->order; + else + { + /* This is a collating symbol but its position + is not yet defined. */ + lr_error (ldfile, _("\ +%s: order for collating symbol %.*s not yet defined"), + "LC_COLLATE", (int) lenmb, startmb); + collate->cursor = NULL; + no_error = 0; + } + } + else if (find_entry (&collate->elem_table, startmb, lenmb, + &ptr) == 0) + { + insp = (struct element_t *) ptr; + + if (insp->last != NULL || insp->next != NULL) + collate->cursor = insp; + else + { + /* This is a collating element but its position + is not yet defined. */ + lr_error (ldfile, _("\ +%s: order for collating element %.*s not yet defined"), + "LC_COLLATE", (int) lenmb, startmb); + collate->cursor = NULL; + no_error = 0; + } + } + else + { + /* This is bad. The symbol after which we have to + insert does not exist. */ + lr_error (ldfile, _("\ +%s: cannot reorder after %.*s: symbol not known"), + "LC_COLLATE", (int) lenmb, startmb); + collate->cursor = NULL; + no_error = 0; + } + } + + lr_ignore_rest (ldfile, no_error); + } + else + /* This must not happen. */ + goto err_label; + break; + + case tok_reorder_end: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + break; + + if (state != 3) + goto err_label; + state = 4; + lr_ignore_rest (ldfile, 1); + break; + + case tok_reorder_sections_after: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + if (state == 1) + { + lr_error (ldfile, _("%s: missing `order_end' keyword"), + "LC_COLLATE"); + state = 2; + + /* Handle ellipsis at end of list. */ + if (was_ellipsis != tok_none) + { + handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap, + repertoire, result); + was_ellipsis = tok_none; + } + } + else if (state == 3) + { + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: missing `reorder-end' keyword"), "LC_COLLATE")); + state = 4; + } + else if (state != 2 && state != 4) + goto err_label; + state = 5; + + /* Get the name of the sections we are adding after. */ + arg = lr_token (ldfile, charmap, result, repertoire, verbose); + if (arg->tok == tok_bsymbol) + { + /* Now find a section with this name. */ + struct section_list *runp = collate->sections; + + while (runp != NULL) + { + if (runp->name != NULL + && strlen (runp->name) == arg->val.str.lenmb + && memcmp (runp->name, arg->val.str.startmb, + arg->val.str.lenmb) == 0) + break; + + runp = runp->next; + } + + if (runp != NULL) + collate->current_section = runp; + else + { + /* This is bad. The section after which we have to + reorder does not exist. Therefore we cannot + process the whole rest of this reorder + specification. */ + lr_error (ldfile, _("%s: section `%.*s' not known"), + "LC_COLLATE", (int) arg->val.str.lenmb, + arg->val.str.startmb); + + do + { + lr_ignore_rest (ldfile, 0); + + now = lr_token (ldfile, charmap, result, NULL, verbose); + } + while (now->tok == tok_reorder_sections_after + || now->tok == tok_reorder_sections_end + || now->tok == tok_end); + + /* Process the token we just saw. */ + nowtok = now->tok; + continue; + } + } + else + /* This must not happen. */ + goto err_label; + break; + + case tok_reorder_sections_end: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + break; + + if (state != 5) + goto err_label; + state = 6; + lr_ignore_rest (ldfile, 1); + break; + + case tok_bsymbol: + case tok_ucs4: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + if (state != 0 && state != 1 && state != 3 && state != 5) + goto err_label; + + if ((state == 0 || state == 5) && nowtok == tok_ucs4) + goto err_label; + + if (nowtok == tok_ucs4) + { + snprintf (ucs4buf, sizeof (ucs4buf), "U%08X", now->val.ucs4); + symstr = ucs4buf; + symlen = 9; + } + else if (arg != NULL) + { + symstr = arg->val.str.startmb; + symlen = arg->val.str.lenmb; + } + else + { + lr_error (ldfile, _("%s: bad symbol <%.*s>"), "LC_COLLATE", + (int) ldfile->token.val.str.lenmb, + ldfile->token.val.str.startmb); + break; + } + + struct element_t *seqp; + if (state == 0) + { + /* We are outside an `order_start' region. This means + we must only accept definitions of values for + collation symbols since these are purely abstract + values and don't need directions associated. */ + void *ptr; + + if (find_entry (&collate->seq_table, symstr, symlen, &ptr) == 0) + { + seqp = ptr; + + /* It's already defined. First check whether this + is really a collating symbol. */ + if (seqp->is_character) + goto err_label; + + goto move_entry; + } + else + { + void *result; + + if (find_entry (&collate->sym_table, symstr, symlen, + &result) != 0) + /* No collating symbol, it's an error. */ + goto err_label; + + /* Maybe this is the first time we define a symbol + value and it is before the first actual section. */ + if (collate->sections == NULL) + collate->sections = collate->current_section = + &collate->symbol_section; + } + + if (was_ellipsis != tok_none) + { + handle_ellipsis (ldfile, symstr, symlen, was_ellipsis, + charmap, repertoire, result); + + /* Remember that we processed the ellipsis. */ + was_ellipsis = tok_none; + + /* And don't add the value a second time. */ + break; + } + } + else if (state == 3) + { + /* It is possible that we already have this collation sequence. + In this case we move the entry. */ + void *sym; + void *ptr; + + /* If the symbol after which we have to insert was not found + ignore all entries. */ + if (collate->cursor == NULL) + { + lr_ignore_rest (ldfile, 0); + break; + } + + if (find_entry (&collate->seq_table, symstr, symlen, &ptr) == 0) + { + seqp = (struct element_t *) ptr; + goto move_entry; + } + + if (find_entry (&collate->sym_table, symstr, symlen, &sym) == 0 + && (seqp = ((struct symbol_t *) sym)->order) != NULL) + goto move_entry; + + if (find_entry (&collate->elem_table, symstr, symlen, &ptr) == 0 + && (seqp = (struct element_t *) ptr, + seqp->last != NULL || seqp->next != NULL + || (collate->start != NULL && seqp == collate->start))) + { + move_entry: + /* Remove the entry from the old position. */ + if (seqp->last == NULL) + collate->start = seqp->next; + else + seqp->last->next = seqp->next; + if (seqp->next != NULL) + seqp->next->last = seqp->last; + + /* We also have to check whether this entry is the + first or last of a section. */ + if (seqp->section->first == seqp) + { + if (seqp->section->first == seqp->section->last) + /* This section has no content anymore. */ + seqp->section->first = seqp->section->last = NULL; + else + seqp->section->first = seqp->next; + } + else if (seqp->section->last == seqp) + seqp->section->last = seqp->last; + + /* Now insert it in the new place. */ + insert_weights (ldfile, seqp, charmap, repertoire, result, + tok_none); + break; + } + + /* Otherwise we just add a new entry. */ + } + else if (state == 5) + { + /* We are reordering sections. Find the named section. */ + struct section_list *runp = collate->sections; + struct section_list *prevp = NULL; + + while (runp != NULL) + { + if (runp->name != NULL + && strlen (runp->name) == symlen + && memcmp (runp->name, symstr, symlen) == 0) + break; + + prevp = runp; + runp = runp->next; + } + + if (runp == NULL) + { + lr_error (ldfile, _("%s: section `%.*s' not known"), + "LC_COLLATE", (int) symlen, symstr); + lr_ignore_rest (ldfile, 0); + } + else + { + if (runp != collate->current_section) + { + /* Remove the named section from the old place and + insert it in the new one. */ + prevp->next = runp->next; + + runp->next = collate->current_section->next; + collate->current_section->next = runp; + collate->current_section = runp; + } + + /* Process the rest of the line which might change + the collation rules. */ + arg = lr_token (ldfile, charmap, result, repertoire, + verbose); + if (arg->tok != tok_eof && arg->tok != tok_eol) + read_directions (ldfile, arg, charmap, repertoire, + result); + } + break; + } + else if (was_ellipsis != tok_none) + { + /* Using the information in the `ellipsis_weight' + element and this and the last value we have to handle + the ellipsis now. */ + assert (state == 1); + + handle_ellipsis (ldfile, symstr, symlen, was_ellipsis, charmap, + repertoire, result); + + /* Remember that we processed the ellipsis. */ + was_ellipsis = tok_none; + + /* And don't add the value a second time. */ + break; + } + + /* Now insert in the new place. */ + insert_value (ldfile, symstr, symlen, charmap, repertoire, result); + break; + + case tok_undefined: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + if (state != 1) + goto err_label; + + if (was_ellipsis != tok_none) + { + lr_error (ldfile, + _("%s: cannot have `%s' as end of ellipsis range"), + "LC_COLLATE", "UNDEFINED"); + + unlink_element (collate); + was_ellipsis = tok_none; + } + + /* See whether UNDEFINED already appeared somewhere. */ + if (collate->undefined.next != NULL + || &collate->undefined == collate->cursor) + { + lr_error (ldfile, + _("%s: order for `%.*s' already defined at %s:%Zu"), + "LC_COLLATE", 9, "UNDEFINED", + collate->undefined.file, + collate->undefined.line); + lr_ignore_rest (ldfile, 0); + } + else + /* Parse the weights. */ + insert_weights (ldfile, &collate->undefined, charmap, + repertoire, result, tok_none); + break; + + case tok_ellipsis2: /* symbolic hexadecimal ellipsis */ + case tok_ellipsis3: /* absolute ellipsis */ + case tok_ellipsis4: /* symbolic decimal ellipsis */ + /* This is the symbolic (decimal or hexadecimal) or absolute + ellipsis. */ + if (was_ellipsis != tok_none) + goto err_label; + + if (state != 0 && state != 1 && state != 3) + goto err_label; + + was_ellipsis = nowtok; + + insert_weights (ldfile, &collate->ellipsis_weight, charmap, + repertoire, result, nowtok); + break; + + case tok_end: + seen_end: + /* Next we assume `LC_COLLATE'. */ + if (!ignore_content) + { + if (state == 0 && copy_locale == NULL) + /* We must either see a copy statement or have + ordering values. */ + lr_error (ldfile, + _("%s: empty category description not allowed"), + "LC_COLLATE"); + else if (state == 1) + { + lr_error (ldfile, _("%s: missing `order_end' keyword"), + "LC_COLLATE"); + + /* Handle ellipsis at end of list. */ + if (was_ellipsis != tok_none) + { + handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap, + repertoire, result); + was_ellipsis = tok_none; + } + } + else if (state == 3) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: missing `reorder-end' keyword"), "LC_COLLATE")); + else if (state == 5) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: missing `reorder-sections-end' keyword"), "LC_COLLATE")); + } + arg = lr_token (ldfile, charmap, result, NULL, verbose); + if (arg->tok == tok_eof) + break; + if (arg->tok == tok_eol) + lr_error (ldfile, _("%s: incomplete `END' line"), "LC_COLLATE"); + else if (arg->tok != tok_lc_collate) + lr_error (ldfile, _("\ +%1$s: definition does not end with `END %1$s'"), "LC_COLLATE"); + lr_ignore_rest (ldfile, arg->tok == tok_lc_collate); + return; + + case tok_define: + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + arg = lr_token (ldfile, charmap, result, NULL, verbose); + if (arg->tok != tok_ident) + goto err_label; + + /* Simply add the new symbol. */ + struct name_list *newsym = xmalloc (sizeof (*newsym) + + arg->val.str.lenmb + 1); + memcpy (newsym->str, arg->val.str.startmb, arg->val.str.lenmb); + newsym->str[arg->val.str.lenmb] = '\0'; + newsym->next = defined; + defined = newsym; + + lr_ignore_rest (ldfile, 1); + break; + + case tok_undef: + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + arg = lr_token (ldfile, charmap, result, NULL, verbose); + if (arg->tok != tok_ident) + goto err_label; + + /* Remove _all_ occurrences of the symbol from the list. */ + struct name_list *prevdef = NULL; + struct name_list *curdef = defined; + while (curdef != NULL) + if (strncmp (arg->val.str.startmb, curdef->str, + arg->val.str.lenmb) == 0 + && curdef->str[arg->val.str.lenmb] == '\0') + { + if (prevdef == NULL) + defined = curdef->next; + else + prevdef->next = curdef->next; + + struct name_list *olddef = curdef; + curdef = curdef->next; + + free (olddef); + } + else + { + prevdef = curdef; + curdef = curdef->next; + } + + lr_ignore_rest (ldfile, 1); + break; + + case tok_ifdef: + case tok_ifndef: + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + found_ifdef: + arg = lr_token (ldfile, charmap, result, NULL, verbose); + if (arg->tok != tok_ident) + goto err_label; + lr_ignore_rest (ldfile, 1); + + if (collate->else_action == else_none) + { + curdef = defined; + while (curdef != NULL) + if (strncmp (arg->val.str.startmb, curdef->str, + arg->val.str.lenmb) == 0 + && curdef->str[arg->val.str.lenmb] == '\0') + break; + else + curdef = curdef->next; + + if ((nowtok == tok_ifdef && curdef != NULL) + || (nowtok == tok_ifndef && curdef == NULL)) + { + /* We have to use the if-branch. */ + collate->else_action = else_ignore; + } + else + { + /* We have to use the else-branch, if there is one. */ + nowtok = skip_to (ldfile, collate, charmap, 0); + if (nowtok == tok_else) + collate->else_action = else_seen; + else if (nowtok == tok_elifdef) + { + nowtok = tok_ifdef; + goto found_ifdef; + } + else if (nowtok == tok_elifndef) + { + nowtok = tok_ifndef; + goto found_ifdef; + } + else if (nowtok == tok_eof) + goto seen_eof; + else if (nowtok == tok_end) + goto seen_end; + } + } + else + { + /* XXX Should it really become necessary to support nested + preprocessor handling we will push the state here. */ + lr_error (ldfile, _("%s: nested conditionals not supported"), + "LC_COLLATE"); + nowtok = skip_to (ldfile, collate, charmap, 1); + if (nowtok == tok_eof) + goto seen_eof; + else if (nowtok == tok_end) + goto seen_end; + } + break; + + case tok_elifdef: + case tok_elifndef: + case tok_else: + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + lr_ignore_rest (ldfile, 1); + + if (collate->else_action == else_ignore) + { + /* Ignore everything until the endif. */ + nowtok = skip_to (ldfile, collate, charmap, 1); + if (nowtok == tok_eof) + goto seen_eof; + else if (nowtok == tok_end) + goto seen_end; + } + else + { + assert (collate->else_action == else_none); + lr_error (ldfile, _("\ +%s: '%s' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE", + nowtok == tok_else ? "else" + : nowtok == tok_elifdef ? "elifdef" : "elifndef"); + } + break; + + case tok_endif: + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + lr_ignore_rest (ldfile, 1); + + if (collate->else_action != else_ignore + && collate->else_action != else_seen) + lr_error (ldfile, _("\ +%s: 'endif' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE"); + + /* XXX If we support nested preprocessor directives we pop + the state here. */ + collate->else_action = else_none; + break; + + default: + err_label: + SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE"); + } + + /* Prepare for the next round. */ + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + } + + seen_eof: + /* When we come here we reached the end of the file. */ + lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE"); +} diff --git a/REORG.TODO/locale/programs/ld-ctype.c b/REORG.TODO/locale/programs/ld-ctype.c new file mode 100644 index 0000000000..df266c20d6 --- /dev/null +++ b/REORG.TODO/locale/programs/ld-ctype.c @@ -0,0 +1,4030 @@ +/* Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@gnu.org>, 1995. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <alloca.h> +#include <byteswap.h> +#include <endian.h> +#include <errno.h> +#include <limits.h> +#include <obstack.h> +#include <stdlib.h> +#include <string.h> +#include <wchar.h> +#include <wctype.h> +#include <stdint.h> +#include <sys/uio.h> + +#include "localedef.h" +#include "charmap.h" +#include "localeinfo.h" +#include "langinfo.h" +#include "linereader.h" +#include "locfile-token.h" +#include "locfile.h" + +#include <assert.h> + + +/* The bit used for representing a special class. */ +#define BITPOS(class) ((class) - tok_upper) +#define BIT(class) (_ISbit (BITPOS (class))) +#define BITw(class) (_ISwbit (BITPOS (class))) + +#define ELEM(ctype, collection, idx, value) \ + *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx, \ + &ctype->collection##_act idx, value) + + +/* To be compatible with former implementations we for now restrict + the number of bits for character classes to 16. When compatibility + is not necessary anymore increase the number to 32. */ +#define char_class_t uint16_t +#define char_class32_t uint32_t + + +/* Type to describe a transliteration action. We have a possibly + multiple character from-string and a set of multiple character + to-strings. All are 32bit values since this is what is used in + the gconv functions. */ +struct translit_to_t +{ + uint32_t *str; + + struct translit_to_t *next; +}; + +struct translit_t +{ + uint32_t *from; + + const char *fname; + size_t lineno; + + struct translit_to_t *to; + + struct translit_t *next; +}; + +struct translit_ignore_t +{ + uint32_t from; + uint32_t to; + uint32_t step; + + const char *fname; + size_t lineno; + + struct translit_ignore_t *next; +}; + + +/* Type to describe a transliteration include statement. */ +struct translit_include_t +{ + const char *copy_locale; + const char *copy_repertoire; + + struct translit_include_t *next; +}; + +/* Provide some dummy pointer for empty string. */ +static uint32_t no_str[] = { 0 }; + + +/* Sparse table of uint32_t. */ +#define TABLE idx_table +#define ELEMENT uint32_t +#define DEFAULT ((uint32_t) ~0) +#define NO_ADD_LOCALE +#include "3level.h" + +#define TABLE wcwidth_table +#define ELEMENT uint8_t +#define DEFAULT 0xff +#include "3level.h" + +#define TABLE wctrans_table +#define ELEMENT int32_t +#define DEFAULT 0 +#define wctrans_table_add wctrans_table_add_internal +#include "3level.h" +#undef wctrans_table_add +/* The wctrans_table must actually store the difference between the + desired result and the argument. */ +static inline void +wctrans_table_add (struct wctrans_table *t, uint32_t wc, uint32_t mapped_wc) +{ + wctrans_table_add_internal (t, wc, mapped_wc - wc); +} + +/* Construction of sparse 3-level tables. + See wchar-lookup.h for their structure and the meaning of p and q. */ + +struct wctype_table +{ + /* Parameters. */ + unsigned int p; + unsigned int q; + /* Working representation. */ + size_t level1_alloc; + size_t level1_size; + uint32_t *level1; + size_t level2_alloc; + size_t level2_size; + uint32_t *level2; + size_t level3_alloc; + size_t level3_size; + uint32_t *level3; + size_t result_size; +}; + +static void add_locale_wctype_table (struct locale_file *file, + struct wctype_table *t); + +/* The real definition of the struct for the LC_CTYPE locale. */ +struct locale_ctype_t +{ + uint32_t *charnames; + size_t charnames_max; + size_t charnames_act; + /* An index lookup table, to speedup find_idx. */ + struct idx_table charnames_idx; + + struct repertoire_t *repertoire; + + /* We will allow up to 8 * sizeof (uint32_t) character classes. */ +#define MAX_NR_CHARCLASS (8 * sizeof (uint32_t)) + size_t nr_charclass; + const char *classnames[MAX_NR_CHARCLASS]; + uint32_t last_class_char; + uint32_t class256_collection[256]; + uint32_t *class_collection; + size_t class_collection_max; + size_t class_collection_act; + uint32_t class_done; + uint32_t class_offset; + + struct charseq **mbdigits; + size_t mbdigits_act; + size_t mbdigits_max; + uint32_t *wcdigits; + size_t wcdigits_act; + size_t wcdigits_max; + + struct charseq *mboutdigits[10]; + uint32_t wcoutdigits[10]; + size_t outdigits_act; + + /* If the following number ever turns out to be too small simply + increase it. But I doubt it will. --drepper@gnu */ +#define MAX_NR_CHARMAP 16 + const char *mapnames[MAX_NR_CHARMAP]; + uint32_t *map_collection[MAX_NR_CHARMAP]; + uint32_t map256_collection[2][256]; + size_t map_collection_max[MAX_NR_CHARMAP]; + size_t map_collection_act[MAX_NR_CHARMAP]; + size_t map_collection_nr; + size_t last_map_idx; + int tomap_done[MAX_NR_CHARMAP]; + uint32_t map_offset; + + /* Transliteration information. */ + struct translit_include_t *translit_include; + struct translit_t *translit; + struct translit_ignore_t *translit_ignore; + uint32_t ntranslit_ignore; + + uint32_t *default_missing; + const char *default_missing_file; + size_t default_missing_lineno; + + uint32_t to_nonascii; + uint32_t nonascii_case; + + /* The arrays for the binary representation. */ + char_class_t *ctype_b; + char_class32_t *ctype32_b; + uint32_t **map_b; + uint32_t **map32_b; + uint32_t **class_b; + struct wctype_table *class_3level; + struct wctrans_table *map_3level; + uint32_t *class_name_ptr; + uint32_t *map_name_ptr; + struct wcwidth_table width; + uint32_t mb_cur_max; + const char *codeset_name; + uint32_t *translit_from_idx; + uint32_t *translit_from_tbl; + uint32_t *translit_to_idx; + uint32_t *translit_to_tbl; + uint32_t translit_idx_size; + size_t translit_from_tbl_size; + size_t translit_to_tbl_size; + + struct obstack mempool; +}; + + +/* Marker for an empty slot. This has the value 0xFFFFFFFF, regardless + whether 'int' is 16 bit, 32 bit, or 64 bit. */ +#define EMPTY ((uint32_t) ~0) + + +#define obstack_chunk_alloc xmalloc +#define obstack_chunk_free free + + +/* Prototypes for local functions. */ +static void ctype_startup (struct linereader *lr, struct localedef_t *locale, + const struct charmap_t *charmap, + struct localedef_t *copy_locale, + int ignore_content); +static void ctype_class_new (struct linereader *lr, + struct locale_ctype_t *ctype, const char *name); +static void ctype_map_new (struct linereader *lr, + struct locale_ctype_t *ctype, + const char *name, const struct charmap_t *charmap); +static uint32_t *find_idx (struct locale_ctype_t *ctype, uint32_t **table, + size_t *max, size_t *act, uint32_t idx); +static void set_class_defaults (struct locale_ctype_t *ctype, + const struct charmap_t *charmap, + struct repertoire_t *repertoire); +static void allocate_arrays (struct locale_ctype_t *ctype, + const struct charmap_t *charmap, + struct repertoire_t *repertoire); + + +static const char *longnames[] = +{ + "zero", "one", "two", "three", "four", + "five", "six", "seven", "eight", "nine" +}; +static const char *uninames[] = +{ + "U00000030", "U00000031", "U00000032", "U00000033", "U00000034", + "U00000035", "U00000036", "U00000037", "U00000038", "U00000039" +}; +static const unsigned char digits[] = "0123456789"; + + +static void +ctype_startup (struct linereader *lr, struct localedef_t *locale, + const struct charmap_t *charmap, + struct localedef_t *copy_locale, int ignore_content) +{ + unsigned int cnt; + struct locale_ctype_t *ctype; + + if (!ignore_content && locale->categories[LC_CTYPE].ctype == NULL) + { + if (copy_locale == NULL) + { + /* Allocate the needed room. */ + locale->categories[LC_CTYPE].ctype = ctype = + (struct locale_ctype_t *) xcalloc (1, + sizeof (struct locale_ctype_t)); + + /* We have seen no names yet. */ + ctype->charnames_max = charmap->mb_cur_max == 1 ? 256 : 512; + ctype->charnames = (uint32_t *) xmalloc (ctype->charnames_max + * sizeof (uint32_t)); + for (cnt = 0; cnt < 256; ++cnt) + ctype->charnames[cnt] = cnt; + ctype->charnames_act = 256; + idx_table_init (&ctype->charnames_idx); + + /* Fill character class information. */ + ctype->last_class_char = ILLEGAL_CHAR_VALUE; + /* The order of the following instructions determines the bit + positions! */ + ctype_class_new (lr, ctype, "upper"); + ctype_class_new (lr, ctype, "lower"); + ctype_class_new (lr, ctype, "alpha"); + ctype_class_new (lr, ctype, "digit"); + ctype_class_new (lr, ctype, "xdigit"); + ctype_class_new (lr, ctype, "space"); + ctype_class_new (lr, ctype, "print"); + ctype_class_new (lr, ctype, "graph"); + ctype_class_new (lr, ctype, "blank"); + ctype_class_new (lr, ctype, "cntrl"); + ctype_class_new (lr, ctype, "punct"); + ctype_class_new (lr, ctype, "alnum"); + + ctype->class_collection_max = charmap->mb_cur_max == 1 ? 256 : 512; + ctype->class_collection + = (uint32_t *) xcalloc (sizeof (unsigned long int), + ctype->class_collection_max); + ctype->class_collection_act = 256; + + /* Fill character map information. */ + ctype->last_map_idx = MAX_NR_CHARMAP; + ctype_map_new (lr, ctype, "toupper", charmap); + ctype_map_new (lr, ctype, "tolower", charmap); + + /* Fill first 256 entries in `toXXX' arrays. */ + for (cnt = 0; cnt < 256; ++cnt) + { + ctype->map_collection[0][cnt] = cnt; + ctype->map_collection[1][cnt] = cnt; + + ctype->map256_collection[0][cnt] = cnt; + ctype->map256_collection[1][cnt] = cnt; + } + + if (enc_not_ascii_compatible) + ctype->to_nonascii = 1; + + obstack_init (&ctype->mempool); + } + else + ctype = locale->categories[LC_CTYPE].ctype = + copy_locale->categories[LC_CTYPE].ctype; + } +} + + +void +ctype_finish (struct localedef_t *locale, const struct charmap_t *charmap) +{ + /* See POSIX.2, table 2-6 for the meaning of the following table. */ +#define NCLASS 12 + static const struct + { + const char *name; + const char allow[NCLASS]; + } + valid_table[NCLASS] = + { + /* The order is important. See token.h for more information. + M = Always, D = Default, - = Permitted, X = Mutually exclusive */ + { "upper", "--MX-XDDXXX-" }, + { "lower", "--MX-XDDXXX-" }, + { "alpha", "---X-XDDXXX-" }, + { "digit", "XXX--XDDXXX-" }, + { "xdigit", "-----XDDXXX-" }, + { "space", "XXXXX------X" }, + { "print", "---------X--" }, + { "graph", "---------X--" }, + { "blank", "XXXXXM-----X" }, + { "cntrl", "XXXXX-XX--XX" }, + { "punct", "XXXXX-DD-X-X" }, + { "alnum", "-----XDDXXX-" } + }; + size_t cnt; + int cls1, cls2; + uint32_t space_value; + struct charseq *space_seq; + struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype; + int warned; + const void *key; + size_t len; + void *vdata; + void *curs; + + /* Now resolve copying and also handle completely missing definitions. */ + if (ctype == NULL) + { + const char *repertoire_name; + + /* First see whether we were supposed to copy. If yes, find the + actual definition. */ + if (locale->copy_name[LC_CTYPE] != NULL) + { + /* Find the copying locale. This has to happen transitively since + the locale we are copying from might also copying another one. */ + struct localedef_t *from = locale; + + do + from = find_locale (LC_CTYPE, from->copy_name[LC_CTYPE], + from->repertoire_name, charmap); + while (from->categories[LC_CTYPE].ctype == NULL + && from->copy_name[LC_CTYPE] != NULL); + + ctype = locale->categories[LC_CTYPE].ctype + = from->categories[LC_CTYPE].ctype; + } + + /* If there is still no definition issue an warning and create an + empty one. */ + if (ctype == NULL) + { + if (! be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +No definition for %s category found"), "LC_CTYPE")); + ctype_startup (NULL, locale, charmap, NULL, 0); + ctype = locale->categories[LC_CTYPE].ctype; + } + + /* Get the repertoire we have to use. */ + repertoire_name = locale->repertoire_name ?: repertoire_global; + if (repertoire_name != NULL) + ctype->repertoire = repertoire_read (repertoire_name); + } + + /* We need the name of the currently used 8-bit character set to + make correct conversion between this 8-bit representation and the + ISO 10646 character set used internally for wide characters. */ + ctype->codeset_name = charmap->code_set_name; + if (ctype->codeset_name == NULL) + { + if (! be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +No character set name specified in charmap"))); + ctype->codeset_name = "//UNKNOWN//"; + } + + /* Set default value for classes not specified. */ + set_class_defaults (ctype, charmap, ctype->repertoire); + + /* Check according to table. */ + for (cnt = 0; cnt < ctype->class_collection_act; ++cnt) + { + uint32_t tmp = ctype->class_collection[cnt]; + + if (tmp != 0) + { + for (cls1 = 0; cls1 < NCLASS; ++cls1) + if ((tmp & _ISwbit (cls1)) != 0) + for (cls2 = 0; cls2 < NCLASS; ++cls2) + if (valid_table[cls1].allow[cls2] != '-') + { + int eq = (tmp & _ISwbit (cls2)) != 0; + switch (valid_table[cls1].allow[cls2]) + { + case 'M': + if (!eq) + { + uint32_t value = ctype->charnames[cnt]; + + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +character L'\\u%0*x' in class `%s' must be in class `%s'"), + value > 0xffff ? 8 : 4, + value, + valid_table[cls1].name, + valid_table[cls2].name)); + } + break; + + case 'X': + if (eq) + { + uint32_t value = ctype->charnames[cnt]; + + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +character L'\\u%0*x' in class `%s' must not be in class `%s'"), + value > 0xffff ? 8 : 4, + value, + valid_table[cls1].name, + valid_table[cls2].name)); + } + break; + + case 'D': + ctype->class_collection[cnt] |= _ISwbit (cls2); + break; + + default: + WITH_CUR_LOCALE (error (5, 0, _("\ +internal error in %s, line %u"), __FUNCTION__, __LINE__)); + } + } + } + } + + for (cnt = 0; cnt < 256; ++cnt) + { + uint32_t tmp = ctype->class256_collection[cnt]; + + if (tmp != 0) + { + for (cls1 = 0; cls1 < NCLASS; ++cls1) + if ((tmp & _ISbit (cls1)) != 0) + for (cls2 = 0; cls2 < NCLASS; ++cls2) + if (valid_table[cls1].allow[cls2] != '-') + { + int eq = (tmp & _ISbit (cls2)) != 0; + switch (valid_table[cls1].allow[cls2]) + { + case 'M': + if (!eq) + { + char buf[17]; + + snprintf (buf, sizeof buf, "\\%Zo", cnt); + + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +character '%s' in class `%s' must be in class `%s'"), + buf, + valid_table[cls1].name, + valid_table[cls2].name)); + } + break; + + case 'X': + if (eq) + { + char buf[17]; + + snprintf (buf, sizeof buf, "\\%Zo", cnt); + + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +character '%s' in class `%s' must not be in class `%s'"), + buf, + valid_table[cls1].name, + valid_table[cls2].name)); + } + break; + + case 'D': + ctype->class256_collection[cnt] |= _ISbit (cls2); + break; + + default: + WITH_CUR_LOCALE (error (5, 0, _("\ +internal error in %s, line %u"), __FUNCTION__, __LINE__)); + } + } + } + } + + /* ... and now test <SP> as a special case. */ + space_value = 32; + if (((cnt = BITPOS (tok_space), + (ELEM (ctype, class_collection, , space_value) + & BITw (tok_space)) == 0) + || (cnt = BITPOS (tok_blank), + (ELEM (ctype, class_collection, , space_value) + & BITw (tok_blank)) == 0))) + { + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("<SP> character not in class `%s'"), + valid_table[cnt].name)); + } + else if (((cnt = BITPOS (tok_punct), + (ELEM (ctype, class_collection, , space_value) + & BITw (tok_punct)) != 0) + || (cnt = BITPOS (tok_graph), + (ELEM (ctype, class_collection, , space_value) + & BITw (tok_graph)) + != 0))) + { + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +<SP> character must not be in class `%s'"), + valid_table[cnt].name)); + } + else + ELEM (ctype, class_collection, , space_value) |= BITw (tok_print); + + space_seq = charmap_find_value (charmap, "SP", 2); + if (space_seq == NULL) + space_seq = charmap_find_value (charmap, "space", 5); + if (space_seq == NULL) + space_seq = charmap_find_value (charmap, "U00000020", 9); + if (space_seq == NULL || space_seq->nbytes != 1) + { + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +character <SP> not defined in character map"))); + } + else if (((cnt = BITPOS (tok_space), + (ctype->class256_collection[space_seq->bytes[0]] + & BIT (tok_space)) == 0) + || (cnt = BITPOS (tok_blank), + (ctype->class256_collection[space_seq->bytes[0]] + & BIT (tok_blank)) == 0))) + { + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("<SP> character not in class `%s'"), + valid_table[cnt].name)); + } + else if (((cnt = BITPOS (tok_punct), + (ctype->class256_collection[space_seq->bytes[0]] + & BIT (tok_punct)) != 0) + || (cnt = BITPOS (tok_graph), + (ctype->class256_collection[space_seq->bytes[0]] + & BIT (tok_graph)) != 0))) + { + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +<SP> character must not be in class `%s'"), + valid_table[cnt].name)); + } + else + ctype->class256_collection[space_seq->bytes[0]] |= BIT (tok_print); + + /* Check whether all single-byte characters make to their upper/lowercase + equivalent according to the ASCII rules. */ + for (cnt = 'A'; cnt <= 'Z'; ++cnt) + { + uint32_t uppval = ctype->map256_collection[0][cnt]; + uint32_t lowval = ctype->map256_collection[1][cnt]; + uint32_t lowuppval = ctype->map256_collection[0][lowval]; + uint32_t lowlowval = ctype->map256_collection[1][lowval]; + + if (uppval != cnt + || lowval != cnt + 0x20 + || lowuppval != cnt + || lowlowval != cnt + 0x20) + ctype->nonascii_case = 1; + } + for (cnt = 0; cnt < 256; ++cnt) + if (cnt < 'A' || (cnt > 'Z' && cnt < 'a') || cnt > 'z') + if (ctype->map256_collection[0][cnt] != cnt + || ctype->map256_collection[1][cnt] != cnt) + ctype->nonascii_case = 1; + + /* Now that the tests are done make sure the name array contains all + characters which are handled in the WIDTH section of the + character set definition file. */ + if (charmap->width_rules != NULL) + for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt) + { + unsigned char bytes[charmap->mb_cur_max]; + int nbytes = charmap->width_rules[cnt].from->nbytes; + + /* We have the range of character for which the width is + specified described using byte sequences of the multibyte + charset. We have to convert this to UCS4 now. And we + cannot simply convert the beginning and the end of the + sequence, we have to iterate over the byte sequence and + convert it for every single character. */ + memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes); + + while (nbytes < charmap->width_rules[cnt].to->nbytes + || memcmp (bytes, charmap->width_rules[cnt].to->bytes, + nbytes) <= 0) + { + /* Find the UCS value for `bytes'. */ + int inner; + uint32_t wch; + struct charseq *seq + = charmap_find_symbol (charmap, (char *) bytes, nbytes); + + if (seq == NULL) + wch = ILLEGAL_CHAR_VALUE; + else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE) + wch = seq->ucs4; + else + wch = repertoire_find_value (ctype->repertoire, seq->name, + strlen (seq->name)); + + if (wch != ILLEGAL_CHAR_VALUE) + /* We are only interested in the side-effects of the + `find_idx' call. It will add appropriate entries in + the name array if this is necessary. */ + (void) find_idx (ctype, NULL, NULL, NULL, wch); + + /* "Increment" the bytes sequence. */ + inner = nbytes - 1; + while (inner >= 0 && bytes[inner] == 0xff) + --inner; + + if (inner < 0) + { + /* We have to extend the byte sequence. */ + if (nbytes >= charmap->width_rules[cnt].to->nbytes) + break; + + bytes[0] = 1; + memset (&bytes[1], 0, nbytes); + ++nbytes; + } + else + { + ++bytes[inner]; + while (++inner < nbytes) + bytes[inner] = 0; + } + } + } + + /* Now set all the other characters of the character set to the + default width. */ + curs = NULL; + while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0) + { + struct charseq *data = (struct charseq *) vdata; + + if (data->ucs4 == UNINITIALIZED_CHAR_VALUE) + data->ucs4 = repertoire_find_value (ctype->repertoire, + data->name, len); + + if (data->ucs4 != ILLEGAL_CHAR_VALUE) + (void) find_idx (ctype, NULL, NULL, NULL, data->ucs4); + } + + /* There must be a multiple of 10 digits. */ + if (ctype->mbdigits_act % 10 != 0) + { + assert (ctype->mbdigits_act == ctype->wcdigits_act); + ctype->wcdigits_act -= ctype->mbdigits_act % 10; + ctype->mbdigits_act -= ctype->mbdigits_act % 10; + WITH_CUR_LOCALE (error (0, 0, _("\ +`digit' category has not entries in groups of ten"))); + } + + /* Check the input digits. There must be a multiple of ten available. + In each group it could be that one or the other character is missing. + In this case the whole group must be removed. */ + cnt = 0; + while (cnt < ctype->mbdigits_act) + { + size_t inner; + for (inner = 0; inner < 10; ++inner) + if (ctype->mbdigits[cnt + inner] == NULL) + break; + + if (inner == 10) + cnt += 10; + else + { + /* Remove the group. */ + memmove (&ctype->mbdigits[cnt], &ctype->mbdigits[cnt + 10], + ((ctype->wcdigits_act - cnt - 10) + * sizeof (ctype->mbdigits[0]))); + ctype->mbdigits_act -= 10; + } + } + + /* If no input digits are given use the default. */ + if (ctype->mbdigits_act == 0) + { + if (ctype->mbdigits_max == 0) + { + ctype->mbdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool, + 10 * sizeof (struct charseq *)); + ctype->mbdigits_max = 10; + } + + for (cnt = 0; cnt < 10; ++cnt) + { + ctype->mbdigits[cnt] = charmap_find_symbol (charmap, + (char *) digits + cnt, 1); + if (ctype->mbdigits[cnt] == NULL) + { + ctype->mbdigits[cnt] = charmap_find_symbol (charmap, + longnames[cnt], + strlen (longnames[cnt])); + if (ctype->mbdigits[cnt] == NULL) + { + /* Hum, this ain't good. */ + WITH_CUR_LOCALE (error (0, 0, _("\ +no input digits defined and none of the standard names in the charmap"))); + + ctype->mbdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool, + sizeof (struct charseq) + 1); + + /* This is better than nothing. */ + ctype->mbdigits[cnt]->bytes[0] = digits[cnt]; + ctype->mbdigits[cnt]->nbytes = 1; + } + } + } + + ctype->mbdigits_act = 10; + } + + /* Check the wide character input digits. There must be a multiple + of ten available. In each group it could be that one or the other + character is missing. In this case the whole group must be + removed. */ + cnt = 0; + while (cnt < ctype->wcdigits_act) + { + size_t inner; + for (inner = 0; inner < 10; ++inner) + if (ctype->wcdigits[cnt + inner] == ILLEGAL_CHAR_VALUE) + break; + + if (inner == 10) + cnt += 10; + else + { + /* Remove the group. */ + memmove (&ctype->wcdigits[cnt], &ctype->wcdigits[cnt + 10], + ((ctype->wcdigits_act - cnt - 10) + * sizeof (ctype->wcdigits[0]))); + ctype->wcdigits_act -= 10; + } + } + + /* If no input digits are given use the default. */ + if (ctype->wcdigits_act == 0) + { + if (ctype->wcdigits_max == 0) + { + ctype->wcdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool, + 10 * sizeof (uint32_t)); + ctype->wcdigits_max = 10; + } + + for (cnt = 0; cnt < 10; ++cnt) + ctype->wcdigits[cnt] = L'0' + cnt; + + ctype->mbdigits_act = 10; + } + + /* Check the outdigits. */ + warned = 0; + for (cnt = 0; cnt < 10; ++cnt) + if (ctype->mboutdigits[cnt] == NULL) + { + static struct charseq replace[2]; + + if (!warned) + { + WITH_CUR_LOCALE (error (0, 0, _("\ +not all characters used in `outdigit' are available in the charmap"))); + warned = 1; + } + + replace[0].nbytes = 1; + replace[0].bytes[0] = '?'; + replace[0].bytes[1] = '\0'; + ctype->mboutdigits[cnt] = &replace[0]; + } + + warned = 0; + for (cnt = 0; cnt < 10; ++cnt) + if (ctype->wcoutdigits[cnt] == 0) + { + if (!warned) + { + WITH_CUR_LOCALE (error (0, 0, _("\ +not all characters used in `outdigit' are available in the repertoire"))); + warned = 1; + } + + ctype->wcoutdigits[cnt] = L'?'; + } + + /* Sort the entries in the translit_ignore list. */ + if (ctype->translit_ignore != NULL) + { + struct translit_ignore_t *firstp = ctype->translit_ignore; + struct translit_ignore_t *runp; + + ctype->ntranslit_ignore = 1; + + for (runp = firstp->next; runp != NULL; runp = runp->next) + { + struct translit_ignore_t *lastp = NULL; + struct translit_ignore_t *cmpp; + + ++ctype->ntranslit_ignore; + + for (cmpp = firstp; cmpp != NULL; lastp = cmpp, cmpp = cmpp->next) + if (runp->from < cmpp->from) + break; + + runp->next = lastp; + if (lastp == NULL) + firstp = runp; + } + + ctype->translit_ignore = firstp; + } +} + + +void +ctype_output (struct localedef_t *locale, const struct charmap_t *charmap, + const char *output_path) +{ + struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype; + const size_t nelems = (_NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1) + + ctype->nr_charclass + ctype->map_collection_nr); + struct locale_file file; + uint32_t default_missing_len; + size_t elem, cnt; + + /* Now prepare the output: Find the sizes of the table we can use. */ + allocate_arrays (ctype, charmap, ctype->repertoire); + + default_missing_len = (ctype->default_missing + ? wcslen ((wchar_t *) ctype->default_missing) + : 0); + + init_locale_data (&file, nelems); + for (elem = 0; elem < nelems; ++elem) + { + if (elem < _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1)) + switch (elem) + { +#define CTYPE_EMPTY(name) \ + case name: \ + add_locale_empty (&file); \ + break + + CTYPE_EMPTY(_NL_CTYPE_GAP1); + CTYPE_EMPTY(_NL_CTYPE_GAP2); + CTYPE_EMPTY(_NL_CTYPE_GAP3); + CTYPE_EMPTY(_NL_CTYPE_GAP4); + CTYPE_EMPTY(_NL_CTYPE_GAP5); + CTYPE_EMPTY(_NL_CTYPE_GAP6); + +#define CTYPE_RAW_DATA(name, base, size) \ + case _NL_ITEM_INDEX (name): \ + add_locale_raw_data (&file, base, size); \ + break + + CTYPE_RAW_DATA (_NL_CTYPE_CLASS, + ctype->ctype_b, + (256 + 128) * sizeof (char_class_t)); + +#define CTYPE_UINT32_ARRAY(name, base, n_elems) \ + case _NL_ITEM_INDEX (name): \ + add_locale_uint32_array (&file, base, n_elems); \ + break + + CTYPE_UINT32_ARRAY (_NL_CTYPE_TOUPPER, ctype->map_b[0], 256 + 128); + CTYPE_UINT32_ARRAY (_NL_CTYPE_TOLOWER, ctype->map_b[1], 256 + 128); + CTYPE_UINT32_ARRAY (_NL_CTYPE_TOUPPER32, ctype->map32_b[0], 256); + CTYPE_UINT32_ARRAY (_NL_CTYPE_TOLOWER32, ctype->map32_b[1], 256); + CTYPE_RAW_DATA (_NL_CTYPE_CLASS32, + ctype->ctype32_b, + 256 * sizeof (char_class32_t)); + +#define CTYPE_UINT32(name, value) \ + case _NL_ITEM_INDEX (name): \ + add_locale_uint32 (&file, value); \ + break + + CTYPE_UINT32 (_NL_CTYPE_CLASS_OFFSET, ctype->class_offset); + CTYPE_UINT32 (_NL_CTYPE_MAP_OFFSET, ctype->map_offset); + CTYPE_UINT32 (_NL_CTYPE_TRANSLIT_TAB_SIZE, ctype->translit_idx_size); + + CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_FROM_IDX, + ctype->translit_from_idx, + ctype->translit_idx_size); + + CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_FROM_TBL, + ctype->translit_from_tbl, + ctype->translit_from_tbl_size + / sizeof (uint32_t)); + + CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_TO_IDX, + ctype->translit_to_idx, + ctype->translit_idx_size); + + CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_TO_TBL, + ctype->translit_to_tbl, + ctype->translit_to_tbl_size / sizeof (uint32_t)); + + case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES): + /* The class name array. */ + start_locale_structure (&file); + for (cnt = 0; cnt < ctype->nr_charclass; ++cnt) + add_locale_string (&file, ctype->classnames[cnt]); + add_locale_char (&file, 0); + align_locale_data (&file, LOCFILE_ALIGN); + end_locale_structure (&file); + break; + + case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES): + /* The class name array. */ + start_locale_structure (&file); + for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt) + add_locale_string (&file, ctype->mapnames[cnt]); + add_locale_char (&file, 0); + align_locale_data (&file, LOCFILE_ALIGN); + end_locale_structure (&file); + break; + + case _NL_ITEM_INDEX (_NL_CTYPE_WIDTH): + add_locale_wcwidth_table (&file, &ctype->width); + break; + + CTYPE_UINT32 (_NL_CTYPE_MB_CUR_MAX, ctype->mb_cur_max); + + case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME): + add_locale_string (&file, ctype->codeset_name); + break; + + CTYPE_UINT32 (_NL_CTYPE_MAP_TO_NONASCII, ctype->to_nonascii); + + CTYPE_UINT32 (_NL_CTYPE_NONASCII_CASE, ctype->nonascii_case); + + case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN): + add_locale_uint32 (&file, ctype->mbdigits_act / 10); + break; + + case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN): + add_locale_uint32 (&file, ctype->wcdigits_act / 10); + break; + + case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_MB): + start_locale_structure (&file); + for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB); + cnt < ctype->mbdigits_act; cnt += 10) + { + add_locale_raw_data (&file, ctype->mbdigits[cnt]->bytes, + ctype->mbdigits[cnt]->nbytes); + add_locale_char (&file, 0); + } + end_locale_structure (&file); + break; + + case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_MB): + start_locale_structure (&file); + cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB); + add_locale_raw_data (&file, ctype->mboutdigits[cnt]->bytes, + ctype->mboutdigits[cnt]->nbytes); + add_locale_char (&file, 0); + end_locale_structure (&file); + break; + + case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC): + start_locale_structure (&file); + for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC); + cnt < ctype->wcdigits_act; cnt += 10) + add_locale_uint32 (&file, ctype->wcdigits[cnt]); + end_locale_structure (&file); + break; + + case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC): + cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC); + add_locale_uint32 (&file, ctype->wcoutdigits[cnt]); + break; + + case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN): + add_locale_uint32 (&file, default_missing_len); + break; + + case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING): + add_locale_uint32_array (&file, ctype->default_missing, + default_missing_len); + break; + + case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE_LEN): + add_locale_uint32 (&file, ctype->ntranslit_ignore); + break; + + case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE): + start_locale_structure (&file); + { + struct translit_ignore_t *runp; + for (runp = ctype->translit_ignore; runp != NULL; + runp = runp->next) + { + add_locale_uint32 (&file, runp->from); + add_locale_uint32 (&file, runp->to); + add_locale_uint32 (&file, runp->step); + } + } + end_locale_structure (&file); + break; + + default: + assert (! "unknown CTYPE element"); + } + else + { + /* Handle extra maps. */ + size_t nr = elem - _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1); + if (nr < ctype->nr_charclass) + { + start_locale_prelude (&file); + add_locale_uint32_array (&file, ctype->class_b[nr], 256 / 32); + end_locale_prelude (&file); + add_locale_wctype_table (&file, &ctype->class_3level[nr]); + } + else + { + nr -= ctype->nr_charclass; + assert (nr < ctype->map_collection_nr); + add_locale_wctrans_table (&file, &ctype->map_3level[nr]); + } + } + } + + write_locale_data (output_path, LC_CTYPE, "LC_CTYPE", &file); +} + + +/* Local functions. */ +static void +ctype_class_new (struct linereader *lr, struct locale_ctype_t *ctype, + const char *name) +{ + size_t cnt; + + for (cnt = 0; cnt < ctype->nr_charclass; ++cnt) + if (strcmp (ctype->classnames[cnt], name) == 0) + break; + + if (cnt < ctype->nr_charclass) + { + lr_error (lr, _("character class `%s' already defined"), name); + return; + } + + if (ctype->nr_charclass == MAX_NR_CHARCLASS) + /* Exit code 2 is prescribed in P1003.2b. */ + WITH_CUR_LOCALE (error (2, 0, _("\ +implementation limit: no more than %Zd character classes allowed"), + MAX_NR_CHARCLASS)); + + ctype->classnames[ctype->nr_charclass++] = name; +} + + +static void +ctype_map_new (struct linereader *lr, struct locale_ctype_t *ctype, + const char *name, const struct charmap_t *charmap) +{ + size_t max_chars = 0; + size_t cnt; + + for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt) + { + if (strcmp (ctype->mapnames[cnt], name) == 0) + break; + + if (max_chars < ctype->map_collection_max[cnt]) + max_chars = ctype->map_collection_max[cnt]; + } + + if (cnt < ctype->map_collection_nr) + { + lr_error (lr, _("character map `%s' already defined"), name); + return; + } + + if (ctype->map_collection_nr == MAX_NR_CHARMAP) + /* Exit code 2 is prescribed in P1003.2b. */ + WITH_CUR_LOCALE (error (2, 0, _("\ +implementation limit: no more than %d character maps allowed"), + MAX_NR_CHARMAP)); + + ctype->mapnames[cnt] = name; + + if (max_chars == 0) + ctype->map_collection_max[cnt] = charmap->mb_cur_max == 1 ? 256 : 512; + else + ctype->map_collection_max[cnt] = max_chars; + + ctype->map_collection[cnt] = (uint32_t *) + xcalloc (sizeof (uint32_t), ctype->map_collection_max[cnt]); + ctype->map_collection_act[cnt] = 256; + + ++ctype->map_collection_nr; +} + + +/* We have to be prepared that TABLE, MAX, and ACT can be NULL. This + is possible if we only want to extend the name array. */ +static uint32_t * +find_idx (struct locale_ctype_t *ctype, uint32_t **table, size_t *max, + size_t *act, uint32_t idx) +{ + size_t cnt; + + if (idx < 256) + return table == NULL ? NULL : &(*table)[idx]; + + /* Use the charnames_idx lookup table instead of the slow search loop. */ +#if 1 + cnt = idx_table_get (&ctype->charnames_idx, idx); + if (cnt == EMPTY) + /* Not found. */ + cnt = ctype->charnames_act; +#else + for (cnt = 256; cnt < ctype->charnames_act; ++cnt) + if (ctype->charnames[cnt] == idx) + break; +#endif + + /* We have to distinguish two cases: the name is found or not. */ + if (cnt == ctype->charnames_act) + { + /* Extend the name array. */ + if (ctype->charnames_act == ctype->charnames_max) + { + ctype->charnames_max *= 2; + ctype->charnames = (uint32_t *) + xrealloc (ctype->charnames, + sizeof (uint32_t) * ctype->charnames_max); + } + ctype->charnames[ctype->charnames_act++] = idx; + idx_table_add (&ctype->charnames_idx, idx, cnt); + } + + if (table == NULL) + /* We have done everything we are asked to do. */ + return NULL; + + if (max == NULL) + /* The caller does not want to extend the table. */ + return (cnt >= *act ? NULL : &(*table)[cnt]); + + if (cnt >= *act) + { + if (cnt >= *max) + { + size_t old_max = *max; + do + *max *= 2; + while (*max <= cnt); + + *table = + (uint32_t *) xrealloc (*table, *max * sizeof (uint32_t)); + memset (&(*table)[old_max], '\0', + (*max - old_max) * sizeof (uint32_t)); + } + + *act = cnt + 1; + } + + return &(*table)[cnt]; +} + + +static int +get_character (struct token *now, const struct charmap_t *charmap, + struct repertoire_t *repertoire, + struct charseq **seqp, uint32_t *wchp) +{ + if (now->tok == tok_bsymbol) + { + /* This will hopefully be the normal case. */ + *wchp = repertoire_find_value (repertoire, now->val.str.startmb, + now->val.str.lenmb); + *seqp = charmap_find_value (charmap, now->val.str.startmb, + now->val.str.lenmb); + } + else if (now->tok == tok_ucs4) + { + char utmp[10]; + + snprintf (utmp, sizeof (utmp), "U%08X", now->val.ucs4); + *seqp = charmap_find_value (charmap, utmp, 9); + + if (*seqp == NULL) + *seqp = repertoire_find_seq (repertoire, now->val.ucs4); + + if (*seqp == NULL) + { + /* Compute the value in the charmap from the UCS value. */ + const char *symbol = repertoire_find_symbol (repertoire, + now->val.ucs4); + + if (symbol == NULL) + *seqp = NULL; + else + *seqp = charmap_find_value (charmap, symbol, strlen (symbol)); + + if (*seqp == NULL) + { + if (repertoire != NULL) + { + /* Insert a negative entry. */ + static const struct charseq negative + = { .ucs4 = ILLEGAL_CHAR_VALUE }; + uint32_t *newp = obstack_alloc (&repertoire->mem_pool, + sizeof (uint32_t)); + *newp = now->val.ucs4; + + insert_entry (&repertoire->seq_table, newp, + sizeof (uint32_t), (void *) &negative); + } + } + else + (*seqp)->ucs4 = now->val.ucs4; + } + else if ((*seqp)->ucs4 != now->val.ucs4) + *seqp = NULL; + + *wchp = now->val.ucs4; + } + else if (now->tok == tok_charcode) + { + /* We must map from the byte code to UCS4. */ + *seqp = charmap_find_symbol (charmap, now->val.str.startmb, + now->val.str.lenmb); + + if (*seqp == NULL) + *wchp = ILLEGAL_CHAR_VALUE; + else + { + if ((*seqp)->ucs4 == UNINITIALIZED_CHAR_VALUE) + (*seqp)->ucs4 = repertoire_find_value (repertoire, (*seqp)->name, + strlen ((*seqp)->name)); + *wchp = (*seqp)->ucs4; + } + } + else + return 1; + + return 0; +} + + +/* Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>' and + the .(2). counterparts. */ +static void +charclass_symbolic_ellipsis (struct linereader *ldfile, + struct locale_ctype_t *ctype, + const struct charmap_t *charmap, + struct repertoire_t *repertoire, + struct token *now, + const char *last_str, + unsigned long int class256_bit, + unsigned long int class_bit, int base, + int ignore_content, int handle_digits, int step) +{ + const char *nowstr = now->val.str.startmb; + char tmp[now->val.str.lenmb + 1]; + const char *cp; + char *endp; + unsigned long int from; + unsigned long int to; + + /* We have to compute the ellipsis values using the symbolic names. */ + assert (last_str != NULL); + + if (strlen (last_str) != now->val.str.lenmb) + { + invalid_range: + lr_error (ldfile, + _("`%s' and `%.*s' are not valid names for symbolic range"), + last_str, (int) now->val.str.lenmb, nowstr); + return; + } + + if (memcmp (last_str, nowstr, now->val.str.lenmb) == 0) + /* Nothing to do, the names are the same. */ + return; + + for (cp = last_str; *cp == *(nowstr + (cp - last_str)); ++cp) + ; + + errno = 0; + from = strtoul (cp, &endp, base); + if ((from == UINT_MAX && errno == ERANGE) || *endp != '\0') + goto invalid_range; + + to = strtoul (nowstr + (cp - last_str), &endp, base); + if ((to == UINT_MAX && errno == ERANGE) + || (endp - nowstr) != now->val.str.lenmb || from >= to) + goto invalid_range; + + /* OK, we have a range FROM - TO. Now we can create the symbolic names. */ + if (!ignore_content) + { + now->val.str.startmb = tmp; + while ((from += step) <= to) + { + struct charseq *seq; + uint32_t wch; + + sprintf (tmp, (base == 10 ? "%.*s%0*ld" : "%.*s%0*lX"), + (int) (cp - last_str), last_str, + (int) (now->val.str.lenmb - (cp - last_str)), + from); + + get_character (now, charmap, repertoire, &seq, &wch); + + if (seq != NULL && seq->nbytes == 1) + /* Yep, we can store information about this byte sequence. */ + ctype->class256_collection[seq->bytes[0]] |= class256_bit; + + if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0) + /* We have the UCS4 position. */ + *find_idx (ctype, &ctype->class_collection, + &ctype->class_collection_max, + &ctype->class_collection_act, wch) |= class_bit; + + if (handle_digits == 1) + { + /* We must store the digit values. */ + if (ctype->mbdigits_act == ctype->mbdigits_max) + { + ctype->mbdigits_max *= 2; + ctype->mbdigits = xrealloc (ctype->mbdigits, + (ctype->mbdigits_max + * sizeof (char *))); + ctype->wcdigits_max *= 2; + ctype->wcdigits = xrealloc (ctype->wcdigits, + (ctype->wcdigits_max + * sizeof (uint32_t))); + } + + ctype->mbdigits[ctype->mbdigits_act++] = seq; + ctype->wcdigits[ctype->wcdigits_act++] = wch; + } + else if (handle_digits == 2) + { + /* We must store the digit values. */ + if (ctype->outdigits_act >= 10) + { + lr_error (ldfile, _("\ +%s: field `%s' does not contain exactly ten entries"), + "LC_CTYPE", "outdigit"); + return; + } + + ctype->mboutdigits[ctype->outdigits_act] = seq; + ctype->wcoutdigits[ctype->outdigits_act] = wch; + ++ctype->outdigits_act; + } + } + } +} + + +/* Ellipsis like in `<U1234>..<U2345>' or `<U1234>..(2)..<U2345>'. */ +static void +charclass_ucs4_ellipsis (struct linereader *ldfile, + struct locale_ctype_t *ctype, + const struct charmap_t *charmap, + struct repertoire_t *repertoire, + struct token *now, uint32_t last_wch, + unsigned long int class256_bit, + unsigned long int class_bit, int ignore_content, + int handle_digits, int step) +{ + if (last_wch > now->val.ucs4) + { + lr_error (ldfile, _("\ +to-value <U%0*X> of range is smaller than from-value <U%0*X>"), + (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, now->val.ucs4, + (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, last_wch); + return; + } + + if (!ignore_content) + while ((last_wch += step) <= now->val.ucs4) + { + /* We have to find out whether there is a byte sequence corresponding + to this UCS4 value. */ + struct charseq *seq; + char utmp[10]; + + snprintf (utmp, sizeof (utmp), "U%08X", last_wch); + seq = charmap_find_value (charmap, utmp, 9); + if (seq == NULL) + { + snprintf (utmp, sizeof (utmp), "U%04X", last_wch); + seq = charmap_find_value (charmap, utmp, 5); + } + + if (seq == NULL) + /* Try looking in the repertoire map. */ + seq = repertoire_find_seq (repertoire, last_wch); + + /* If this is the first time we look for this sequence create a new + entry. */ + if (seq == NULL) + { + static const struct charseq negative + = { .ucs4 = ILLEGAL_CHAR_VALUE }; + + /* Find the symbolic name for this UCS4 value. */ + if (repertoire != NULL) + { + const char *symbol = repertoire_find_symbol (repertoire, + last_wch); + uint32_t *newp = obstack_alloc (&repertoire->mem_pool, + sizeof (uint32_t)); + *newp = last_wch; + + if (symbol != NULL) + /* We have a name, now search the multibyte value. */ + seq = charmap_find_value (charmap, symbol, strlen (symbol)); + + if (seq == NULL) + /* We have to create a fake entry. */ + seq = (struct charseq *) &negative; + else + seq->ucs4 = last_wch; + + insert_entry (&repertoire->seq_table, newp, sizeof (uint32_t), + seq); + } + else + /* We have to create a fake entry. */ + seq = (struct charseq *) &negative; + } + + /* We have a name, now search the multibyte value. */ + if (seq->ucs4 == last_wch && seq->nbytes == 1) + /* Yep, we can store information about this byte sequence. */ + ctype->class256_collection[(size_t) seq->bytes[0]] + |= class256_bit; + + /* And of course we have the UCS4 position. */ + if (class_bit != 0) + *find_idx (ctype, &ctype->class_collection, + &ctype->class_collection_max, + &ctype->class_collection_act, last_wch) |= class_bit; + + if (handle_digits == 1) + { + /* We must store the digit values. */ + if (ctype->mbdigits_act == ctype->mbdigits_max) + { + ctype->mbdigits_max *= 2; + ctype->mbdigits = xrealloc (ctype->mbdigits, + (ctype->mbdigits_max + * sizeof (char *))); + ctype->wcdigits_max *= 2; + ctype->wcdigits = xrealloc (ctype->wcdigits, + (ctype->wcdigits_max + * sizeof (uint32_t))); + } + + ctype->mbdigits[ctype->mbdigits_act++] = (seq->ucs4 == last_wch + ? seq : NULL); + ctype->wcdigits[ctype->wcdigits_act++] = last_wch; + } + else if (handle_digits == 2) + { + /* We must store the digit values. */ + if (ctype->outdigits_act >= 10) + { + lr_error (ldfile, _("\ +%s: field `%s' does not contain exactly ten entries"), + "LC_CTYPE", "outdigit"); + return; + } + + ctype->mboutdigits[ctype->outdigits_act] = (seq->ucs4 == last_wch + ? seq : NULL); + ctype->wcoutdigits[ctype->outdigits_act] = last_wch; + ++ctype->outdigits_act; + } + } +} + + +/* Ellipsis as in `/xea/x12.../xea/x34'. */ +static void +charclass_charcode_ellipsis (struct linereader *ldfile, + struct locale_ctype_t *ctype, + const struct charmap_t *charmap, + struct repertoire_t *repertoire, + struct token *now, char *last_charcode, + uint32_t last_charcode_len, + unsigned long int class256_bit, + unsigned long int class_bit, int ignore_content, + int handle_digits) +{ + /* First check whether the to-value is larger. */ + if (now->val.charcode.nbytes != last_charcode_len) + { + lr_error (ldfile, _("\ +start and end character sequence of range must have the same length")); + return; + } + + if (memcmp (last_charcode, now->val.charcode.bytes, last_charcode_len) > 0) + { + lr_error (ldfile, _("\ +to-value character sequence is smaller than from-value sequence")); + return; + } + + if (!ignore_content) + { + do + { + /* Increment the byte sequence value. */ + struct charseq *seq; + uint32_t wch; + int i; + + for (i = last_charcode_len - 1; i >= 0; --i) + if (++last_charcode[i] != 0) + break; + + if (last_charcode_len == 1) + /* Of course we have the charcode value. */ + ctype->class256_collection[(size_t) last_charcode[0]] + |= class256_bit; + + /* Find the symbolic name. */ + seq = charmap_find_symbol (charmap, last_charcode, + last_charcode_len); + if (seq != NULL) + { + if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE) + seq->ucs4 = repertoire_find_value (repertoire, seq->name, + strlen (seq->name)); + wch = seq == NULL ? ILLEGAL_CHAR_VALUE : seq->ucs4; + + if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0) + *find_idx (ctype, &ctype->class_collection, + &ctype->class_collection_max, + &ctype->class_collection_act, wch) |= class_bit; + } + else + wch = ILLEGAL_CHAR_VALUE; + + if (handle_digits == 1) + { + /* We must store the digit values. */ + if (ctype->mbdigits_act == ctype->mbdigits_max) + { + ctype->mbdigits_max *= 2; + ctype->mbdigits = xrealloc (ctype->mbdigits, + (ctype->mbdigits_max + * sizeof (char *))); + ctype->wcdigits_max *= 2; + ctype->wcdigits = xrealloc (ctype->wcdigits, + (ctype->wcdigits_max + * sizeof (uint32_t))); + } + + seq = xmalloc (sizeof (struct charseq) + last_charcode_len); + memcpy ((char *) (seq + 1), last_charcode, last_charcode_len); + seq->nbytes = last_charcode_len; + + ctype->mbdigits[ctype->mbdigits_act++] = seq; + ctype->wcdigits[ctype->wcdigits_act++] = wch; + } + else if (handle_digits == 2) + { + struct charseq *seq; + /* We must store the digit values. */ + if (ctype->outdigits_act >= 10) + { + lr_error (ldfile, _("\ +%s: field `%s' does not contain exactly ten entries"), + "LC_CTYPE", "outdigit"); + return; + } + + seq = xmalloc (sizeof (struct charseq) + last_charcode_len); + memcpy ((char *) (seq + 1), last_charcode, last_charcode_len); + seq->nbytes = last_charcode_len; + + ctype->mboutdigits[ctype->outdigits_act] = seq; + ctype->wcoutdigits[ctype->outdigits_act] = wch; + ++ctype->outdigits_act; + } + } + while (memcmp (last_charcode, now->val.charcode.bytes, + last_charcode_len) != 0); + } +} + + +static uint32_t * +find_translit2 (struct locale_ctype_t *ctype, const struct charmap_t *charmap, + uint32_t wch) +{ + struct translit_t *trunp = ctype->translit; + struct translit_ignore_t *tirunp = ctype->translit_ignore; + + while (trunp != NULL) + { + /* XXX We simplify things here. The transliterations we look + for are only allowed to have one character. */ + if (trunp->from[0] == wch && trunp->from[1] == 0) + { + /* Found it. Now look for a transliteration which can be + represented with the character set. */ + struct translit_to_t *torunp = trunp->to; + + while (torunp != NULL) + { + int i; + + for (i = 0; torunp->str[i] != 0; ++i) + { + char utmp[10]; + + snprintf (utmp, sizeof (utmp), "U%08X", torunp->str[i]); + if (charmap_find_value (charmap, utmp, 9) == NULL) + /* This character cannot be represented. */ + break; + } + + if (torunp->str[i] == 0) + return torunp->str; + + torunp = torunp->next; + } + + break; + } + + trunp = trunp->next; + } + + /* Check for ignored chars. */ + while (tirunp != NULL) + { + if (tirunp->from <= wch && tirunp->to >= wch) + { + uint32_t wi; + + for (wi = tirunp->from; wi <= wch; wi += tirunp->step) + if (wi == wch) + return no_str; + } + } + + /* Nothing found. */ + return NULL; +} + + +uint32_t * +find_translit (struct localedef_t *locale, const struct charmap_t *charmap, + uint32_t wch) +{ + struct locale_ctype_t *ctype; + uint32_t *result = NULL; + + assert (locale != NULL); + ctype = locale->categories[LC_CTYPE].ctype; + + if (ctype == NULL) + return NULL; + + if (ctype->translit != NULL) + result = find_translit2 (ctype, charmap, wch); + + if (result == NULL) + { + struct translit_include_t *irunp = ctype->translit_include; + + while (irunp != NULL && result == NULL) + { + result = find_translit (find_locale (CTYPE_LOCALE, + irunp->copy_locale, + irunp->copy_repertoire, + charmap), + charmap, wch); + irunp = irunp->next; + } + } + + return result; +} + + +/* Read one transliteration entry. */ +static uint32_t * +read_widestring (struct linereader *ldfile, struct token *now, + const struct charmap_t *charmap, + struct repertoire_t *repertoire) +{ + uint32_t *wstr; + + if (now->tok == tok_default_missing) + /* The special name "" will denote this case. */ + wstr = no_str; + else if (now->tok == tok_bsymbol) + { + /* Get the value from the repertoire. */ + wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t)); + wstr[0] = repertoire_find_value (repertoire, now->val.str.startmb, + now->val.str.lenmb); + if (wstr[0] == ILLEGAL_CHAR_VALUE) + { + /* We cannot proceed, we don't know the UCS4 value. */ + free (wstr); + return NULL; + } + + wstr[1] = 0; + } + else if (now->tok == tok_ucs4) + { + wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t)); + wstr[0] = now->val.ucs4; + wstr[1] = 0; + } + else if (now->tok == tok_charcode) + { + /* Argh, we have to convert to the symbol name first and then to the + UCS4 value. */ + struct charseq *seq = charmap_find_symbol (charmap, + now->val.str.startmb, + now->val.str.lenmb); + if (seq == NULL) + /* Cannot find the UCS4 value. */ + return NULL; + + if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE) + seq->ucs4 = repertoire_find_value (repertoire, seq->name, + strlen (seq->name)); + if (seq->ucs4 == ILLEGAL_CHAR_VALUE) + /* We cannot proceed, we don't know the UCS4 value. */ + return NULL; + + wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t)); + wstr[0] = seq->ucs4; + wstr[1] = 0; + } + else if (now->tok == tok_string) + { + wstr = now->val.str.startwc; + if (wstr == NULL || wstr[0] == 0) + return NULL; + } + else + { + if (now->tok != tok_eol && now->tok != tok_eof) + lr_ignore_rest (ldfile, 0); + SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE"); + return (uint32_t *) -1l; + } + + return wstr; +} + + +static void +read_translit_entry (struct linereader *ldfile, struct locale_ctype_t *ctype, + struct token *now, const struct charmap_t *charmap, + struct repertoire_t *repertoire) +{ + uint32_t *from_wstr = read_widestring (ldfile, now, charmap, repertoire); + struct translit_t *result; + struct translit_to_t **top; + struct obstack *ob = &ctype->mempool; + int first; + int ignore; + + if (from_wstr == NULL) + /* There is no valid from string. */ + return; + + result = (struct translit_t *) obstack_alloc (ob, + sizeof (struct translit_t)); + result->from = from_wstr; + result->fname = ldfile->fname; + result->lineno = ldfile->lineno; + result->next = NULL; + result->to = NULL; + top = &result->to; + first = 1; + ignore = 0; + + while (1) + { + uint32_t *to_wstr; + + /* Next we have one or more transliterations. They are + separated by semicolons. */ + now = lr_token (ldfile, charmap, NULL, repertoire, verbose); + + if (!first && (now->tok == tok_semicolon || now->tok == tok_eol)) + { + /* One string read. */ + const uint32_t zero = 0; + + if (!ignore) + { + obstack_grow (ob, &zero, 4); + to_wstr = obstack_finish (ob); + + *top = obstack_alloc (ob, sizeof (struct translit_to_t)); + (*top)->str = to_wstr; + (*top)->next = NULL; + } + + if (now->tok == tok_eol) + { + result->next = ctype->translit; + ctype->translit = result; + return; + } + + if (!ignore) + top = &(*top)->next; + ignore = 0; + } + else + { + to_wstr = read_widestring (ldfile, now, charmap, repertoire); + if (to_wstr == (uint32_t *) -1l) + { + /* An error occurred. */ + obstack_free (ob, result); + return; + } + + if (to_wstr == NULL) + ignore = 1; + else + /* This value is usable. */ + obstack_grow (ob, to_wstr, wcslen ((wchar_t *) to_wstr) * 4); + + first = 0; + } + } +} + + +static void +read_translit_ignore_entry (struct linereader *ldfile, + struct locale_ctype_t *ctype, + const struct charmap_t *charmap, + struct repertoire_t *repertoire) +{ + /* We expect a semicolon-separated list of characters we ignore. We are + only interested in the wide character definitions. These must be + single characters, possibly defining a range when an ellipsis is used. */ + while (1) + { + struct token *now = lr_token (ldfile, charmap, NULL, repertoire, + verbose); + struct translit_ignore_t *newp; + uint32_t from; + + if (now->tok == tok_eol || now->tok == tok_eof) + { + lr_error (ldfile, + _("premature end of `translit_ignore' definition")); + return; + } + + if (now->tok != tok_bsymbol && now->tok != tok_ucs4) + { + lr_error (ldfile, _("syntax error")); + lr_ignore_rest (ldfile, 0); + return; + } + + if (now->tok == tok_ucs4) + from = now->val.ucs4; + else + /* Try to get the value. */ + from = repertoire_find_value (repertoire, now->val.str.startmb, + now->val.str.lenmb); + + if (from == ILLEGAL_CHAR_VALUE) + { + lr_error (ldfile, "invalid character name"); + newp = NULL; + } + else + { + newp = (struct translit_ignore_t *) + obstack_alloc (&ctype->mempool, sizeof (struct translit_ignore_t)); + newp->from = from; + newp->to = from; + newp->step = 1; + + newp->next = ctype->translit_ignore; + ctype->translit_ignore = newp; + } + + /* Now we expect either a semicolon, an ellipsis, or the end of the + line. */ + now = lr_token (ldfile, charmap, NULL, repertoire, verbose); + + if (now->tok == tok_ellipsis2 || now->tok == tok_ellipsis2_2) + { + /* XXX Should we bother implementing `....'? `...' certainly + will not be implemented. */ + uint32_t to; + int step = now->tok == tok_ellipsis2_2 ? 2 : 1; + + now = lr_token (ldfile, charmap, NULL, repertoire, verbose); + + if (now->tok == tok_eol || now->tok == tok_eof) + { + lr_error (ldfile, + _("premature end of `translit_ignore' definition")); + return; + } + + if (now->tok != tok_bsymbol && now->tok != tok_ucs4) + { + lr_error (ldfile, _("syntax error")); + lr_ignore_rest (ldfile, 0); + return; + } + + if (now->tok == tok_ucs4) + to = now->val.ucs4; + else + /* Try to get the value. */ + to = repertoire_find_value (repertoire, now->val.str.startmb, + now->val.str.lenmb); + + if (to == ILLEGAL_CHAR_VALUE) + lr_error (ldfile, "invalid character name"); + else + { + /* Make sure the `to'-value is larger. */ + if (to >= from) + { + newp->to = to; + newp->step = step; + } + else + lr_error (ldfile, _("\ +to-value <U%0*X> of range is smaller than from-value <U%0*X>"), + (to | from) < 65536 ? 4 : 8, to, + (to | from) < 65536 ? 4 : 8, from); + } + + /* And the next token. */ + now = lr_token (ldfile, charmap, NULL, repertoire, verbose); + } + + if (now->tok == tok_eol || now->tok == tok_eof) + /* We are done. */ + return; + + if (now->tok == tok_semicolon) + /* Next round. */ + continue; + + /* If we come here something is wrong. */ + lr_error (ldfile, _("syntax error")); + lr_ignore_rest (ldfile, 0); + return; + } +} + + +/* The parser for the LC_CTYPE section of the locale definition. */ +void +ctype_read (struct linereader *ldfile, struct localedef_t *result, + const struct charmap_t *charmap, const char *repertoire_name, + int ignore_content) +{ + struct repertoire_t *repertoire = NULL; + struct locale_ctype_t *ctype; + struct token *now; + enum token_t nowtok; + size_t cnt; + uint32_t last_wch = 0; + enum token_t last_token; + enum token_t ellipsis_token; + int step; + char last_charcode[16]; + size_t last_charcode_len = 0; + const char *last_str = NULL; + int mapidx; + struct localedef_t *copy_locale = NULL; + + /* Get the repertoire we have to use. */ + if (repertoire_name != NULL) + repertoire = repertoire_read (repertoire_name); + + /* The rest of the line containing `LC_CTYPE' must be free. */ + lr_ignore_rest (ldfile, 1); + + + do + { + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + nowtok = now->tok; + } + while (nowtok == tok_eol); + + /* If we see `copy' now we are almost done. */ + if (nowtok == tok_copy) + { + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + if (now->tok != tok_string) + { + SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE"); + + skip_category: + do + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + while (now->tok != tok_eof && now->tok != tok_end); + + if (now->tok != tok_eof + || (now = lr_token (ldfile, charmap, NULL, NULL, verbose), + now->tok == tok_eof)) + lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE"); + else if (now->tok != tok_lc_ctype) + { + lr_error (ldfile, _("\ +%1$s: definition does not end with `END %1$s'"), "LC_CTYPE"); + lr_ignore_rest (ldfile, 0); + } + else + lr_ignore_rest (ldfile, 1); + + return; + } + + if (! ignore_content) + { + /* Get the locale definition. */ + copy_locale = load_locale (LC_CTYPE, now->val.str.startmb, + repertoire_name, charmap, NULL); + if ((copy_locale->avail & CTYPE_LOCALE) == 0) + { + /* Not yet loaded. So do it now. */ + if (locfile_read (copy_locale, charmap) != 0) + goto skip_category; + } + + if (copy_locale->categories[LC_CTYPE].ctype == NULL) + return; + } + + lr_ignore_rest (ldfile, 1); + + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + nowtok = now->tok; + } + + /* Prepare the data structures. */ + ctype_startup (ldfile, result, charmap, copy_locale, ignore_content); + ctype = result->categories[LC_CTYPE].ctype; + + /* Remember the repertoire we use. */ + if (!ignore_content) + ctype->repertoire = repertoire; + + while (1) + { + unsigned long int class_bit = 0; + unsigned long int class256_bit = 0; + int handle_digits = 0; + + /* Of course we don't proceed beyond the end of file. */ + if (nowtok == tok_eof) + break; + + /* Ingore empty lines. */ + if (nowtok == tok_eol) + { + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + nowtok = now->tok; + continue; + } + + switch (nowtok) + { + case tok_charclass: + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + while (now->tok == tok_ident || now->tok == tok_string) + { + ctype_class_new (ldfile, ctype, now->val.str.startmb); + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + if (now->tok != tok_semicolon) + break; + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + } + if (now->tok != tok_eol) + SYNTAX_ERROR (_("\ +%s: syntax error in definition of new character class"), "LC_CTYPE"); + break; + + case tok_charconv: + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + while (now->tok == tok_ident || now->tok == tok_string) + { + ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap); + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + if (now->tok != tok_semicolon) + break; + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + } + if (now->tok != tok_eol) + SYNTAX_ERROR (_("\ +%s: syntax error in definition of new character map"), "LC_CTYPE"); + break; + + case tok_class: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + /* We simply forget the `class' keyword and use the following + operand to determine the bit. */ + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + if (now->tok == tok_ident || now->tok == tok_string) + { + /* Must can be one of the predefined class names. */ + for (cnt = 0; cnt < ctype->nr_charclass; ++cnt) + if (strcmp (ctype->classnames[cnt], now->val.str.startmb) == 0) + break; + if (cnt >= ctype->nr_charclass) + { + /* OK, it's a new class. */ + ctype_class_new (ldfile, ctype, now->val.str.startmb); + + class_bit = _ISwbit (ctype->nr_charclass - 1); + } + else + { + class_bit = _ISwbit (cnt); + + free (now->val.str.startmb); + } + } + else if (now->tok == tok_digit) + goto handle_tok_digit; + else if (now->tok < tok_upper || now->tok > tok_blank) + goto err_label; + else + { + class_bit = BITw (now->tok); + class256_bit = BIT (now->tok); + } + + /* The next character must be a semicolon. */ + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + if (now->tok != tok_semicolon) + goto err_label; + goto read_charclass; + + case tok_upper: + case tok_lower: + case tok_alpha: + case tok_alnum: + case tok_space: + case tok_cntrl: + case tok_punct: + case tok_graph: + case tok_print: + case tok_xdigit: + case tok_blank: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + class_bit = BITw (now->tok); + class256_bit = BIT (now->tok); + handle_digits = 0; + read_charclass: + ctype->class_done |= class_bit; + last_token = tok_none; + ellipsis_token = tok_none; + step = 1; + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + while (now->tok != tok_eol && now->tok != tok_eof) + { + uint32_t wch; + struct charseq *seq; + + if (ellipsis_token == tok_none) + { + if (get_character (now, charmap, repertoire, &seq, &wch)) + goto err_label; + + if (!ignore_content && seq != NULL && seq->nbytes == 1) + /* Yep, we can store information about this byte + sequence. */ + ctype->class256_collection[seq->bytes[0]] |= class256_bit; + + if (!ignore_content && wch != ILLEGAL_CHAR_VALUE + && class_bit != 0) + /* We have the UCS4 position. */ + *find_idx (ctype, &ctype->class_collection, + &ctype->class_collection_max, + &ctype->class_collection_act, wch) |= class_bit; + + last_token = now->tok; + /* Terminate the string. */ + if (last_token == tok_bsymbol) + { + now->val.str.startmb[now->val.str.lenmb] = '\0'; + last_str = now->val.str.startmb; + } + else + last_str = NULL; + last_wch = wch; + memcpy (last_charcode, now->val.charcode.bytes, 16); + last_charcode_len = now->val.charcode.nbytes; + + if (!ignore_content && handle_digits == 1) + { + /* We must store the digit values. */ + if (ctype->mbdigits_act == ctype->mbdigits_max) + { + ctype->mbdigits_max += 10; + ctype->mbdigits = xrealloc (ctype->mbdigits, + (ctype->mbdigits_max + * sizeof (char *))); + ctype->wcdigits_max += 10; + ctype->wcdigits = xrealloc (ctype->wcdigits, + (ctype->wcdigits_max + * sizeof (uint32_t))); + } + + ctype->mbdigits[ctype->mbdigits_act++] = seq; + ctype->wcdigits[ctype->wcdigits_act++] = wch; + } + else if (!ignore_content && handle_digits == 2) + { + /* We must store the digit values. */ + if (ctype->outdigits_act >= 10) + { + lr_error (ldfile, _("\ +%s: field `%s' does not contain exactly ten entries"), + "LC_CTYPE", "outdigit"); + lr_ignore_rest (ldfile, 0); + break; + } + + ctype->mboutdigits[ctype->outdigits_act] = seq; + ctype->wcoutdigits[ctype->outdigits_act] = wch; + ++ctype->outdigits_act; + } + } + else + { + /* Now it gets complicated. We have to resolve the + ellipsis problem. First we must distinguish between + the different kind of ellipsis and this must match the + tokens we have seen. */ + assert (last_token != tok_none); + + if (last_token != now->tok) + { + lr_error (ldfile, _("\ +ellipsis range must be marked by two operands of same type")); + lr_ignore_rest (ldfile, 0); + break; + } + + if (last_token == tok_bsymbol) + { + if (ellipsis_token == tok_ellipsis3) + lr_error (ldfile, _("with symbolic name range values \ +the absolute ellipsis `...' must not be used")); + + charclass_symbolic_ellipsis (ldfile, ctype, charmap, + repertoire, now, last_str, + class256_bit, class_bit, + (ellipsis_token + == tok_ellipsis4 + ? 10 : 16), + ignore_content, + handle_digits, step); + } + else if (last_token == tok_ucs4) + { + if (ellipsis_token != tok_ellipsis2) + lr_error (ldfile, _("\ +with UCS range values one must use the hexadecimal symbolic ellipsis `..'")); + + charclass_ucs4_ellipsis (ldfile, ctype, charmap, + repertoire, now, last_wch, + class256_bit, class_bit, + ignore_content, handle_digits, + step); + } + else + { + assert (last_token == tok_charcode); + + if (ellipsis_token != tok_ellipsis3) + lr_error (ldfile, _("\ +with character code range values one must use the absolute ellipsis `...'")); + + charclass_charcode_ellipsis (ldfile, ctype, charmap, + repertoire, now, + last_charcode, + last_charcode_len, + class256_bit, class_bit, + ignore_content, + handle_digits); + } + + /* Now we have used the last value. */ + last_token = tok_none; + } + + /* Next we expect a semicolon or the end of the line. */ + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + if (now->tok == tok_eol || now->tok == tok_eof) + break; + + if (last_token != tok_none + && now->tok >= tok_ellipsis2 && now->tok <= tok_ellipsis4_2) + { + if (now->tok == tok_ellipsis2_2) + { + now->tok = tok_ellipsis2; + step = 2; + } + else if (now->tok == tok_ellipsis4_2) + { + now->tok = tok_ellipsis4; + step = 2; + } + + ellipsis_token = now->tok; + + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + continue; + } + + if (now->tok != tok_semicolon) + goto err_label; + + /* And get the next character. */ + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + + ellipsis_token = tok_none; + step = 1; + } + break; + + case tok_digit: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + handle_tok_digit: + class_bit = _ISwdigit; + class256_bit = _ISdigit; + handle_digits = 1; + goto read_charclass; + + case tok_outdigit: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + if (ctype->outdigits_act != 0) + lr_error (ldfile, _("\ +%s: field `%s' declared more than once"), + "LC_CTYPE", "outdigit"); + class_bit = 0; + class256_bit = 0; + handle_digits = 2; + goto read_charclass; + + case tok_toupper: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + mapidx = 0; + goto read_mapping; + + case tok_tolower: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + mapidx = 1; + goto read_mapping; + + case tok_map: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + /* We simply forget the `map' keyword and use the following + operand to determine the mapping. */ + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + if (now->tok == tok_ident || now->tok == tok_string) + { + size_t cnt; + + for (cnt = 2; cnt < ctype->map_collection_nr; ++cnt) + if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0) + break; + + if (cnt < ctype->map_collection_nr) + free (now->val.str.startmb); + else + /* OK, it's a new map. */ + ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap); + + mapidx = cnt; + } + else if (now->tok < tok_toupper || now->tok > tok_tolower) + goto err_label; + else + mapidx = now->tok - tok_toupper; + + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + /* This better should be a semicolon. */ + if (now->tok != tok_semicolon) + goto err_label; + + read_mapping: + /* Test whether this mapping was already defined. */ + if (ctype->tomap_done[mapidx]) + { + lr_error (ldfile, _("duplicated definition for mapping `%s'"), + ctype->mapnames[mapidx]); + lr_ignore_rest (ldfile, 0); + break; + } + ctype->tomap_done[mapidx] = 1; + + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + while (now->tok != tok_eol && now->tok != tok_eof) + { + struct charseq *from_seq; + uint32_t from_wch; + struct charseq *to_seq; + uint32_t to_wch; + + /* Every pair starts with an opening brace. */ + if (now->tok != tok_open_brace) + goto err_label; + + /* Next comes the from-value. */ + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + if (get_character (now, charmap, repertoire, &from_seq, + &from_wch) != 0) + goto err_label; + + /* The next is a comma. */ + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + if (now->tok != tok_comma) + goto err_label; + + /* And the other value. */ + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + if (get_character (now, charmap, repertoire, &to_seq, + &to_wch) != 0) + goto err_label; + + /* And the last thing is the closing brace. */ + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + if (now->tok != tok_close_brace) + goto err_label; + + if (!ignore_content) + { + /* Check whether the mapping converts from an ASCII value + to a non-ASCII value. */ + if (from_seq != NULL && from_seq->nbytes == 1 + && isascii (from_seq->bytes[0]) + && to_seq != NULL && (to_seq->nbytes != 1 + || !isascii (to_seq->bytes[0]))) + ctype->to_nonascii = 1; + + if (mapidx < 2 && from_seq != NULL && to_seq != NULL + && from_seq->nbytes == 1 && to_seq->nbytes == 1) + /* We can use this value. */ + ctype->map256_collection[mapidx][from_seq->bytes[0]] + = to_seq->bytes[0]; + + if (from_wch != ILLEGAL_CHAR_VALUE + && to_wch != ILLEGAL_CHAR_VALUE) + /* Both correct values. */ + *find_idx (ctype, &ctype->map_collection[mapidx], + &ctype->map_collection_max[mapidx], + &ctype->map_collection_act[mapidx], + from_wch) = to_wch; + } + + /* Now comes a semicolon or the end of the line/file. */ + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + if (now->tok == tok_semicolon) + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + } + break; + + case tok_translit_start: + /* Ignore the entire translit section with its peculiar syntax + if we don't need the input. */ + if (ignore_content) + { + do + { + lr_ignore_rest (ldfile, 0); + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + } + while (now->tok != tok_translit_end && now->tok != tok_eof); + + if (now->tok == tok_eof) + lr_error (ldfile, _(\ +"%s: `translit_start' section does not end with `translit_end'"), + "LC_CTYPE"); + + break; + } + + /* The rest of the line better should be empty. */ + lr_ignore_rest (ldfile, 1); + + /* We count here the number of allocated entries in the `translit' + array. */ + cnt = 0; + + ldfile->translate_strings = 1; + ldfile->return_widestr = 1; + + /* We proceed until we see the `translit_end' token. */ + while (now = lr_token (ldfile, charmap, NULL, repertoire, verbose), + now->tok != tok_translit_end && now->tok != tok_eof) + { + if (now->tok == tok_eol) + /* Ignore empty lines. */ + continue; + + if (now->tok == tok_include) + { + /* We have to include locale. */ + const char *locale_name; + const char *repertoire_name; + struct translit_include_t *include_stmt, **include_ptr; + + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + /* This should be a string or an identifier. In any + case something to name a locale. */ + if (now->tok != tok_string && now->tok != tok_ident) + { + translit_syntax: + lr_error (ldfile, _("%s: syntax error"), "LC_CTYPE"); + lr_ignore_rest (ldfile, 0); + continue; + } + locale_name = now->val.str.startmb; + + /* Next should be a semicolon. */ + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + if (now->tok != tok_semicolon) + goto translit_syntax; + + /* Now the repertoire name. */ + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + if ((now->tok != tok_string && now->tok != tok_ident) + || now->val.str.startmb == NULL) + goto translit_syntax; + repertoire_name = now->val.str.startmb; + if (repertoire_name[0] == '\0') + /* Ignore the empty string. */ + repertoire_name = NULL; + + /* Save the include statement for later processing. */ + include_stmt = (struct translit_include_t *) + xmalloc (sizeof (struct translit_include_t)); + include_stmt->copy_locale = locale_name; + include_stmt->copy_repertoire = repertoire_name; + include_stmt->next = NULL; + + include_ptr = &ctype->translit_include; + while (*include_ptr != NULL) + include_ptr = &(*include_ptr)->next; + *include_ptr = include_stmt; + + /* The rest of the line must be empty. */ + lr_ignore_rest (ldfile, 1); + + /* Make sure the locale is read. */ + add_to_readlist (LC_CTYPE, locale_name, repertoire_name, + 1, NULL); + continue; + } + else if (now->tok == tok_default_missing) + { + uint32_t *wstr; + + while (1) + { + /* We expect a single character or string as the + argument. */ + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + wstr = read_widestring (ldfile, now, charmap, + repertoire); + + if (wstr != NULL) + { + if (ctype->default_missing != NULL) + { + lr_error (ldfile, _("\ +%s: duplicate `default_missing' definition"), "LC_CTYPE"); + WITH_CUR_LOCALE (error_at_line (0, 0, + ctype->default_missing_file, + ctype->default_missing_lineno, + _("\ +previous definition was here"))); + } + else + { + ctype->default_missing = wstr; + ctype->default_missing_file = ldfile->fname; + ctype->default_missing_lineno = ldfile->lineno; + } + /* We can have more entries, ignore them. */ + lr_ignore_rest (ldfile, 0); + break; + } + else if (wstr == (uint32_t *) -1l) + /* This was an syntax error. */ + break; + + /* Maybe there is another replacement we can use. */ + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + if (now->tok == tok_eol || now->tok == tok_eof) + { + /* Nothing found. We tell the user. */ + lr_error (ldfile, _("\ +%s: no representable `default_missing' definition found"), "LC_CTYPE"); + break; + } + if (now->tok != tok_semicolon) + goto translit_syntax; + } + + continue; + } + else if (now->tok == tok_translit_ignore) + { + read_translit_ignore_entry (ldfile, ctype, charmap, + repertoire); + continue; + } + + read_translit_entry (ldfile, ctype, now, charmap, repertoire); + } + ldfile->return_widestr = 0; + + if (now->tok == tok_eof) + lr_error (ldfile, _(\ +"%s: `translit_start' section does not end with `translit_end'"), + "LC_CTYPE"); + + break; + + case tok_ident: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + /* This could mean one of several things. First test whether + it's a character class name. */ + for (cnt = 0; cnt < ctype->nr_charclass; ++cnt) + if (strcmp (now->val.str.startmb, ctype->classnames[cnt]) == 0) + break; + if (cnt < ctype->nr_charclass) + { + class_bit = _ISwbit (cnt); + class256_bit = cnt <= 11 ? _ISbit (cnt) : 0; + free (now->val.str.startmb); + goto read_charclass; + } + for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt) + if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0) + break; + if (cnt < ctype->map_collection_nr) + { + mapidx = cnt; + free (now->val.str.startmb); + goto read_mapping; + } + break; + + case tok_end: + /* Next we assume `LC_CTYPE'. */ + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + if (now->tok == tok_eof) + break; + if (now->tok == tok_eol) + lr_error (ldfile, _("%s: incomplete `END' line"), + "LC_CTYPE"); + else if (now->tok != tok_lc_ctype) + lr_error (ldfile, _("\ +%1$s: definition does not end with `END %1$s'"), "LC_CTYPE"); + lr_ignore_rest (ldfile, now->tok == tok_lc_ctype); + return; + + default: + err_label: + if (now->tok != tok_eof) + SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE"); + } + + /* Prepare for the next round. */ + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + nowtok = now->tok; + } + + /* When we come here we reached the end of the file. */ + lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE"); +} + + +/* Subroutine of set_class_defaults, below. */ +static void +set_one_default (struct locale_ctype_t *ctype, + const struct charmap_t *charmap, + int bitpos, int from, int to) +{ + char tmp[2]; + int ch; + int bit = _ISbit (bitpos); + int bitw = _ISwbit (bitpos); + /* Define string. */ + strcpy (tmp, "?"); + + for (ch = from; ch <= to; ++ch) + { + struct charseq *seq; + tmp[0] = ch; + + seq = charmap_find_value (charmap, tmp, 1); + if (seq == NULL) + { + char buf[10]; + sprintf (buf, "U%08X", ch); + seq = charmap_find_value (charmap, buf, 9); + } + if (seq == NULL) + { + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", tmp)); + } + else if (seq->nbytes != 1) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: character `%s' in charmap not representable with one byte"), + "LC_CTYPE", tmp)); + else + ctype->class256_collection[seq->bytes[0]] |= bit; + + /* No need to search here, the ASCII value is also the Unicode + value. */ + ELEM (ctype, class_collection, , ch) |= bitw; + } +} + +static void +set_class_defaults (struct locale_ctype_t *ctype, + const struct charmap_t *charmap, + struct repertoire_t *repertoire) +{ +#define set_default(bitpos, from, to) \ + set_one_default (ctype, charmap, bitpos, from, to) + + /* These function defines the default values for the classes and conversions + according to POSIX.2 2.5.2.1. + It may seem that the order of these if-blocks is arbitrary but it is NOT. + Don't move them unless you know what you do! */ + + /* Set default values if keyword was not present. */ + if ((ctype->class_done & BITw (tok_upper)) == 0) + /* "If this keyword [lower] is not specified, the lowercase letters + `A' through `Z', ..., shall automatically belong to this class, + with implementation defined character values." [P1003.2, 2.5.2.1] */ + set_default (BITPOS (tok_upper), 'A', 'Z'); + + if ((ctype->class_done & BITw (tok_lower)) == 0) + /* "If this keyword [lower] is not specified, the lowercase letters + `a' through `z', ..., shall automatically belong to this class, + with implementation defined character values." [P1003.2, 2.5.2.1] */ + set_default (BITPOS (tok_lower), 'a', 'z'); + + if ((ctype->class_done & BITw (tok_alpha)) == 0) + { + /* Table 2-6 in P1003.2 says that characters in class `upper' or + class `lower' *must* be in class `alpha'. */ + unsigned long int mask = BIT (tok_upper) | BIT (tok_lower); + unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower); + + for (size_t cnt = 0; cnt < 256; ++cnt) + if ((ctype->class256_collection[cnt] & mask) != 0) + ctype->class256_collection[cnt] |= BIT (tok_alpha); + + for (size_t cnt = 0; cnt < ctype->class_collection_act; ++cnt) + if ((ctype->class_collection[cnt] & maskw) != 0) + ctype->class_collection[cnt] |= BITw (tok_alpha); + } + + if ((ctype->class_done & BITw (tok_digit)) == 0) + /* "If this keyword [digit] is not specified, the digits `0' through + `9', ..., shall automatically belong to this class, with + implementation-defined character values." [P1003.2, 2.5.2.1] */ + set_default (BITPOS (tok_digit), '0', '9'); + + /* "Only characters specified for the `alpha' and `digit' keyword + shall be specified. Characters specified for the keyword `alpha' + and `digit' are automatically included in this class. */ + { + unsigned long int mask = BIT (tok_alpha) | BIT (tok_digit); + unsigned long int maskw = BITw (tok_alpha) | BITw (tok_digit); + + for (size_t cnt = 0; cnt < 256; ++cnt) + if ((ctype->class256_collection[cnt] & mask) != 0) + ctype->class256_collection[cnt] |= BIT (tok_alnum); + + for (size_t cnt = 0; cnt < ctype->class_collection_act; ++cnt) + if ((ctype->class_collection[cnt] & maskw) != 0) + ctype->class_collection[cnt] |= BITw (tok_alnum); + } + + if ((ctype->class_done & BITw (tok_space)) == 0) + /* "If this keyword [space] is not specified, the characters <space>, + <form-feed>, <newline>, <carriage-return>, <tab>, and + <vertical-tab>, ..., shall automatically belong to this class, + with implementation-defined character values." [P1003.2, 2.5.2.1] */ + { + struct charseq *seq; + + seq = charmap_find_value (charmap, "space", 5); + if (seq == NULL) + seq = charmap_find_value (charmap, "SP", 2); + if (seq == NULL) + seq = charmap_find_value (charmap, "U00000020", 9); + if (seq == NULL) + { + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", "<space>")); + } + else if (seq->nbytes != 1) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: character `%s' in charmap not representable with one byte"), + "LC_CTYPE", "<space>")); + else + ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space); + + /* No need to search. */ + ELEM (ctype, class_collection, , L' ') |= BITw (tok_space); + + seq = charmap_find_value (charmap, "form-feed", 9); + if (seq == NULL) + seq = charmap_find_value (charmap, "U0000000C", 9); + if (seq == NULL) + { + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", "<form-feed>")); + } + else if (seq->nbytes != 1) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: character `%s' in charmap not representable with one byte"), + "LC_CTYPE", "<form-feed>")); + else + ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space); + + /* No need to search. */ + ELEM (ctype, class_collection, , L'\f') |= BITw (tok_space); + + + seq = charmap_find_value (charmap, "newline", 7); + if (seq == NULL) + seq = charmap_find_value (charmap, "U0000000A", 9); + if (seq == NULL) + { + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", "<newline>")); + } + else if (seq->nbytes != 1) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: character `%s' in charmap not representable with one byte"), + "LC_CTYPE", "<newline>")); + else + ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space); + + /* No need to search. */ + ELEM (ctype, class_collection, , L'\n') |= BITw (tok_space); + + + seq = charmap_find_value (charmap, "carriage-return", 15); + if (seq == NULL) + seq = charmap_find_value (charmap, "U0000000D", 9); + if (seq == NULL) + { + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", "<carriage-return>")); + } + else if (seq->nbytes != 1) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: character `%s' in charmap not representable with one byte"), + "LC_CTYPE", "<carriage-return>")); + else + ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space); + + /* No need to search. */ + ELEM (ctype, class_collection, , L'\r') |= BITw (tok_space); + + + seq = charmap_find_value (charmap, "tab", 3); + if (seq == NULL) + seq = charmap_find_value (charmap, "U00000009", 9); + if (seq == NULL) + { + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", "<tab>")); + } + else if (seq->nbytes != 1) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: character `%s' in charmap not representable with one byte"), + "LC_CTYPE", "<tab>")); + else + ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space); + + /* No need to search. */ + ELEM (ctype, class_collection, , L'\t') |= BITw (tok_space); + + + seq = charmap_find_value (charmap, "vertical-tab", 12); + if (seq == NULL) + seq = charmap_find_value (charmap, "U0000000B", 9); + if (seq == NULL) + { + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", "<vertical-tab>")); + } + else if (seq->nbytes != 1) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: character `%s' in charmap not representable with one byte"), + "LC_CTYPE", "<vertical-tab>")); + else + ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space); + + /* No need to search. */ + ELEM (ctype, class_collection, , L'\v') |= BITw (tok_space); + } + + if ((ctype->class_done & BITw (tok_xdigit)) == 0) + /* "If this keyword is not specified, the digits `0' to `9', the + uppercase letters `A' through `F', and the lowercase letters `a' + through `f', ..., shell automatically belong to this class, with + implementation defined character values." [P1003.2, 2.5.2.1] */ + { + set_default (BITPOS (tok_xdigit), '0', '9'); + set_default (BITPOS (tok_xdigit), 'A', 'F'); + set_default (BITPOS (tok_xdigit), 'a', 'f'); + } + + if ((ctype->class_done & BITw (tok_blank)) == 0) + /* "If this keyword [blank] is unspecified, the characters <space> and + <tab> shall belong to this character class." [P1003.2, 2.5.2.1] */ + { + struct charseq *seq; + + seq = charmap_find_value (charmap, "space", 5); + if (seq == NULL) + seq = charmap_find_value (charmap, "SP", 2); + if (seq == NULL) + seq = charmap_find_value (charmap, "U00000020", 9); + if (seq == NULL) + { + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", "<space>")); + } + else if (seq->nbytes != 1) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: character `%s' in charmap not representable with one byte"), + "LC_CTYPE", "<space>")); + else + ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank); + + /* No need to search. */ + ELEM (ctype, class_collection, , L' ') |= BITw (tok_blank); + + + seq = charmap_find_value (charmap, "tab", 3); + if (seq == NULL) + seq = charmap_find_value (charmap, "U00000009", 9); + if (seq == NULL) + { + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", "<tab>")); + } + else if (seq->nbytes != 1) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: character `%s' in charmap not representable with one byte"), + "LC_CTYPE", "<tab>")); + else + ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank); + + /* No need to search. */ + ELEM (ctype, class_collection, , L'\t') |= BITw (tok_blank); + } + + if ((ctype->class_done & BITw (tok_graph)) == 0) + /* "If this keyword [graph] is not specified, characters specified for + the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct', + shall belong to this character class." [P1003.2, 2.5.2.1] */ + { + unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) | + BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct); + unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) | + BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) | + BITw (tok_punct); + + for (size_t cnt = 0; cnt < ctype->class_collection_act; ++cnt) + if ((ctype->class_collection[cnt] & maskw) != 0) + ctype->class_collection[cnt] |= BITw (tok_graph); + + for (size_t cnt = 0; cnt < 256; ++cnt) + if ((ctype->class256_collection[cnt] & mask) != 0) + ctype->class256_collection[cnt] |= BIT (tok_graph); + } + + if ((ctype->class_done & BITw (tok_print)) == 0) + /* "If this keyword [print] is not provided, characters specified for + the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct', + and the <space> character shall belong to this character class." + [P1003.2, 2.5.2.1] */ + { + unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) | + BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct); + unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) | + BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) | + BITw (tok_punct); + struct charseq *seq; + + for (size_t cnt = 0; cnt < ctype->class_collection_act; ++cnt) + if ((ctype->class_collection[cnt] & maskw) != 0) + ctype->class_collection[cnt] |= BITw (tok_print); + + for (size_t cnt = 0; cnt < 256; ++cnt) + if ((ctype->class256_collection[cnt] & mask) != 0) + ctype->class256_collection[cnt] |= BIT (tok_print); + + + seq = charmap_find_value (charmap, "space", 5); + if (seq == NULL) + seq = charmap_find_value (charmap, "SP", 2); + if (seq == NULL) + seq = charmap_find_value (charmap, "U00000020", 9); + if (seq == NULL) + { + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", "<space>")); + } + else if (seq->nbytes != 1) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: character `%s' in charmap not representable with one byte"), + "LC_CTYPE", "<space>")); + else + ctype->class256_collection[seq->bytes[0]] |= BIT (tok_print); + + /* No need to search. */ + ELEM (ctype, class_collection, , L' ') |= BITw (tok_print); + } + + if (ctype->tomap_done[0] == 0) + /* "If this keyword [toupper] is not specified, the lowercase letters + `a' through `z', and their corresponding uppercase letters `A' to + `Z', ..., shall automatically be included, with implementation- + defined character values." [P1003.2, 2.5.2.1] */ + { + char tmp[4]; + int ch; + + strcpy (tmp, "<?>"); + + for (ch = 'a'; ch <= 'z'; ++ch) + { + struct charseq *seq_from, *seq_to; + + tmp[1] = (char) ch; + + seq_from = charmap_find_value (charmap, &tmp[1], 1); + if (seq_from == NULL) + { + char buf[10]; + sprintf (buf, "U%08X", ch); + seq_from = charmap_find_value (charmap, buf, 9); + } + if (seq_from == NULL) + { + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", tmp)); + } + else if (seq_from->nbytes != 1) + { + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: character `%s' needed as default value not representable with one byte"), + "LC_CTYPE", tmp)); + } + else + { + /* This conversion is implementation defined. */ + tmp[1] = (char) (ch + ('A' - 'a')); + seq_to = charmap_find_value (charmap, &tmp[1], 1); + if (seq_to == NULL) + { + char buf[10]; + sprintf (buf, "U%08X", ch + ('A' - 'a')); + seq_to = charmap_find_value (charmap, buf, 9); + } + if (seq_to == NULL) + { + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", tmp)); + } + else if (seq_to->nbytes != 1) + { + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: character `%s' needed as default value not representable with one byte"), + "LC_CTYPE", tmp)); + } + else + /* The index [0] is determined by the order of the + `ctype_map_newP' calls in `ctype_startup'. */ + ctype->map256_collection[0][seq_from->bytes[0]] + = seq_to->bytes[0]; + } + + /* No need to search. */ + ELEM (ctype, map_collection, [0], ch) = ch + ('A' - 'a'); + } + } + + if (ctype->tomap_done[1] == 0) + /* "If this keyword [tolower] is not specified, the mapping shall be + the reverse mapping of the one specified to `toupper'." [P1003.2] */ + { + for (size_t cnt = 0; cnt < ctype->map_collection_act[0]; ++cnt) + if (ctype->map_collection[0][cnt] != 0) + ELEM (ctype, map_collection, [1], + ctype->map_collection[0][cnt]) + = ctype->charnames[cnt]; + + for (size_t cnt = 0; cnt < 256; ++cnt) + if (ctype->map256_collection[0][cnt] != 0) + ctype->map256_collection[1][ctype->map256_collection[0][cnt]] = cnt; + } + + if (ctype->outdigits_act != 10) + { + if (ctype->outdigits_act != 0) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: field `%s' does not contain exactly ten entries"), + "LC_CTYPE", "outdigit")); + + for (size_t cnt = ctype->outdigits_act; cnt < 10; ++cnt) + { + ctype->mboutdigits[cnt] = charmap_find_symbol (charmap, + (char *) digits + cnt, + 1); + + if (ctype->mboutdigits[cnt] == NULL) + ctype->mboutdigits[cnt] = charmap_find_symbol (charmap, + longnames[cnt], + strlen (longnames[cnt])); + + if (ctype->mboutdigits[cnt] == NULL) + ctype->mboutdigits[cnt] = charmap_find_symbol (charmap, + uninames[cnt], 9); + + if (ctype->mboutdigits[cnt] == NULL) + { + /* Provide a replacement. */ + WITH_CUR_LOCALE (error (0, 0, _("\ +no output digits defined and none of the standard names in the charmap"))); + + ctype->mboutdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool, + sizeof (struct charseq) + + 1); + + /* This is better than nothing. */ + ctype->mboutdigits[cnt]->bytes[0] = digits[cnt]; + ctype->mboutdigits[cnt]->nbytes = 1; + } + + ctype->wcoutdigits[cnt] = L'0' + cnt; + } + + ctype->outdigits_act = 10; + } + +#undef set_default +} + + +/* Initialize. Assumes t->p and t->q have already been set. */ +static inline void +wctype_table_init (struct wctype_table *t) +{ + t->level1 = NULL; + t->level1_alloc = t->level1_size = 0; + t->level2 = NULL; + t->level2_alloc = t->level2_size = 0; + t->level3 = NULL; + t->level3_alloc = t->level3_size = 0; +} + +/* Retrieve an entry. */ +static inline int +wctype_table_get (struct wctype_table *t, uint32_t wc) +{ + uint32_t index1 = wc >> (t->q + t->p + 5); + if (index1 < t->level1_size) + { + uint32_t lookup1 = t->level1[index1]; + if (lookup1 != EMPTY) + { + uint32_t index2 = ((wc >> (t->p + 5)) & ((1 << t->q) - 1)) + + (lookup1 << t->q); + uint32_t lookup2 = t->level2[index2]; + if (lookup2 != EMPTY) + { + uint32_t index3 = ((wc >> 5) & ((1 << t->p) - 1)) + + (lookup2 << t->p); + uint32_t lookup3 = t->level3[index3]; + uint32_t index4 = wc & 0x1f; + + return (lookup3 >> index4) & 1; + } + } + } + return 0; +} + +/* Add one entry. */ +static void +wctype_table_add (struct wctype_table *t, uint32_t wc) +{ + uint32_t index1 = wc >> (t->q + t->p + 5); + uint32_t index2 = (wc >> (t->p + 5)) & ((1 << t->q) - 1); + uint32_t index3 = (wc >> 5) & ((1 << t->p) - 1); + uint32_t index4 = wc & 0x1f; + size_t i, i1, i2; + + if (index1 >= t->level1_size) + { + if (index1 >= t->level1_alloc) + { + size_t alloc = 2 * t->level1_alloc; + if (alloc <= index1) + alloc = index1 + 1; + t->level1 = (uint32_t *) xrealloc ((char *) t->level1, + alloc * sizeof (uint32_t)); + t->level1_alloc = alloc; + } + while (index1 >= t->level1_size) + t->level1[t->level1_size++] = EMPTY; + } + + if (t->level1[index1] == EMPTY) + { + if (t->level2_size == t->level2_alloc) + { + size_t alloc = 2 * t->level2_alloc + 1; + t->level2 = (uint32_t *) xrealloc ((char *) t->level2, + (alloc << t->q) * sizeof (uint32_t)); + t->level2_alloc = alloc; + } + i1 = t->level2_size << t->q; + i2 = (t->level2_size + 1) << t->q; + for (i = i1; i < i2; i++) + t->level2[i] = EMPTY; + t->level1[index1] = t->level2_size++; + } + + index2 += t->level1[index1] << t->q; + + if (t->level2[index2] == EMPTY) + { + if (t->level3_size == t->level3_alloc) + { + size_t alloc = 2 * t->level3_alloc + 1; + t->level3 = (uint32_t *) xrealloc ((char *) t->level3, + (alloc << t->p) * sizeof (uint32_t)); + t->level3_alloc = alloc; + } + i1 = t->level3_size << t->p; + i2 = (t->level3_size + 1) << t->p; + for (i = i1; i < i2; i++) + t->level3[i] = 0; + t->level2[index2] = t->level3_size++; + } + + index3 += t->level2[index2] << t->p; + + t->level3[index3] |= (uint32_t)1 << index4; +} + +/* Finalize and shrink. */ +static void +add_locale_wctype_table (struct locale_file *file, struct wctype_table *t) +{ + size_t i, j, k; + uint32_t reorder3[t->level3_size]; + uint32_t reorder2[t->level2_size]; + uint32_t level2_offset, level3_offset; + + /* Uniquify level3 blocks. */ + k = 0; + for (j = 0; j < t->level3_size; j++) + { + for (i = 0; i < k; i++) + if (memcmp (&t->level3[i << t->p], &t->level3[j << t->p], + (1 << t->p) * sizeof (uint32_t)) == 0) + break; + /* Relocate block j to block i. */ + reorder3[j] = i; + if (i == k) + { + if (i != j) + memcpy (&t->level3[i << t->p], &t->level3[j << t->p], + (1 << t->p) * sizeof (uint32_t)); + k++; + } + } + t->level3_size = k; + + for (i = 0; i < (t->level2_size << t->q); i++) + if (t->level2[i] != EMPTY) + t->level2[i] = reorder3[t->level2[i]]; + + /* Uniquify level2 blocks. */ + k = 0; + for (j = 0; j < t->level2_size; j++) + { + for (i = 0; i < k; i++) + if (memcmp (&t->level2[i << t->q], &t->level2[j << t->q], + (1 << t->q) * sizeof (uint32_t)) == 0) + break; + /* Relocate block j to block i. */ + reorder2[j] = i; + if (i == k) + { + if (i != j) + memcpy (&t->level2[i << t->q], &t->level2[j << t->q], + (1 << t->q) * sizeof (uint32_t)); + k++; + } + } + t->level2_size = k; + + for (i = 0; i < t->level1_size; i++) + if (t->level1[i] != EMPTY) + t->level1[i] = reorder2[t->level1[i]]; + + t->result_size = + 5 * sizeof (uint32_t) + + t->level1_size * sizeof (uint32_t) + + (t->level2_size << t->q) * sizeof (uint32_t) + + (t->level3_size << t->p) * sizeof (uint32_t); + + level2_offset = + 5 * sizeof (uint32_t) + + t->level1_size * sizeof (uint32_t); + level3_offset = + 5 * sizeof (uint32_t) + + t->level1_size * sizeof (uint32_t) + + (t->level2_size << t->q) * sizeof (uint32_t); + + start_locale_structure (file); + add_locale_uint32 (file, t->q + t->p + 5); + add_locale_uint32 (file, t->level1_size); + add_locale_uint32 (file, t->p + 5); + add_locale_uint32 (file, (1 << t->q) - 1); + add_locale_uint32 (file, (1 << t->p) - 1); + + for (i = 0; i < t->level1_size; i++) + add_locale_uint32 + (file, + t->level1[i] == EMPTY + ? 0 + : (t->level1[i] << t->q) * sizeof (uint32_t) + level2_offset); + + for (i = 0; i < (t->level2_size << t->q); i++) + add_locale_uint32 + (file, + t->level2[i] == EMPTY + ? 0 + : (t->level2[i] << t->p) * sizeof (uint32_t) + level3_offset); + + add_locale_uint32_array (file, t->level3, t->level3_size << t->p); + end_locale_structure (file); + + if (t->level1_alloc > 0) + free (t->level1); + if (t->level2_alloc > 0) + free (t->level2); + if (t->level3_alloc > 0) + free (t->level3); +} + +/* Flattens the included transliterations into a translit list. + Inserts them in the list at `cursor', and returns the new cursor. */ +static struct translit_t ** +translit_flatten (struct locale_ctype_t *ctype, + const struct charmap_t *charmap, + struct translit_t **cursor) +{ + while (ctype->translit_include != NULL) + { + const char *copy_locale = ctype->translit_include->copy_locale; + const char *copy_repertoire = ctype->translit_include->copy_repertoire; + struct localedef_t *other; + + /* Unchain the include statement. During the depth-first traversal + we don't want to visit any locale more than once. */ + ctype->translit_include = ctype->translit_include->next; + + other = find_locale (LC_CTYPE, copy_locale, copy_repertoire, charmap); + + if (other == NULL || other->categories[LC_CTYPE].ctype == NULL) + { + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: transliteration data from locale `%s' not available"), + "LC_CTYPE", copy_locale)); + } + else + { + struct locale_ctype_t *other_ctype = + other->categories[LC_CTYPE].ctype; + + cursor = translit_flatten (other_ctype, charmap, cursor); + assert (other_ctype->translit_include == NULL); + + if (other_ctype->translit != NULL) + { + /* Insert the other_ctype->translit list at *cursor. */ + struct translit_t *endp = other_ctype->translit; + while (endp->next != NULL) + endp = endp->next; + + endp->next = *cursor; + *cursor = other_ctype->translit; + + /* Avoid any risk of circular lists. */ + other_ctype->translit = NULL; + + cursor = &endp->next; + } + + if (ctype->default_missing == NULL) + ctype->default_missing = other_ctype->default_missing; + } + } + + return cursor; +} + +static void +allocate_arrays (struct locale_ctype_t *ctype, const struct charmap_t *charmap, + struct repertoire_t *repertoire) +{ + size_t idx, nr; + const void *key; + size_t len; + void *vdata; + void *curs; + + /* You wonder about this amount of memory? This is only because some + users do not manage to address the array with unsigned values or + data types with range >= 256. '\200' would result in the array + index -128. To help these poor people we duplicate the entries for + 128 up to 255 below the entry for \0. */ + ctype->ctype_b = (char_class_t *) xcalloc (256 + 128, sizeof (char_class_t)); + ctype->ctype32_b = (char_class32_t *) xcalloc (256, sizeof (char_class32_t)); + ctype->class_b = (uint32_t **) + xmalloc (ctype->nr_charclass * sizeof (uint32_t *)); + ctype->class_3level = (struct wctype_table *) + xmalloc (ctype->nr_charclass * sizeof (struct wctype_table)); + + /* This is the array accessed using the multibyte string elements. */ + for (idx = 0; idx < 256; ++idx) + ctype->ctype_b[128 + idx] = ctype->class256_collection[idx]; + + /* Mirror first 127 entries. We must take care that entry -1 is not + mirrored because EOF == -1. */ + for (idx = 0; idx < 127; ++idx) + ctype->ctype_b[idx] = ctype->ctype_b[256 + idx]; + + /* The 32 bit array contains all characters < 0x100. */ + for (idx = 0; idx < ctype->class_collection_act; ++idx) + if (ctype->charnames[idx] < 0x100) + ctype->ctype32_b[ctype->charnames[idx]] = ctype->class_collection[idx]; + + for (nr = 0; nr < ctype->nr_charclass; nr++) + { + ctype->class_b[nr] = (uint32_t *) xcalloc (256 / 32, sizeof (uint32_t)); + + /* We only set CLASS_B for the bits in the ISO C classes, not + the user defined classes. The number should not change but + who knows. */ +#define LAST_ISO_C_BIT 11 + if (nr <= LAST_ISO_C_BIT) + for (idx = 0; idx < 256; ++idx) + if (ctype->class256_collection[idx] & _ISbit (nr)) + ctype->class_b[nr][idx >> 5] |= (uint32_t) 1 << (idx & 0x1f); + } + + for (nr = 0; nr < ctype->nr_charclass; nr++) + { + struct wctype_table *t; + + t = &ctype->class_3level[nr]; + t->p = 4; /* or: 5 */ + t->q = 7; /* or: 6 */ + wctype_table_init (t); + + for (idx = 0; idx < ctype->class_collection_act; ++idx) + if (ctype->class_collection[idx] & _ISwbit (nr)) + wctype_table_add (t, ctype->charnames[idx]); + + if (verbose) + WITH_CUR_LOCALE (fprintf (stderr, _("\ +%s: table for class \"%s\": %lu bytes\n"), + "LC_CTYPE", ctype->classnames[nr], + (unsigned long int) t->result_size)); + } + + /* Room for table of mappings. */ + ctype->map_b = (uint32_t **) xmalloc (2 * sizeof (uint32_t *)); + ctype->map32_b = (uint32_t **) xmalloc (ctype->map_collection_nr + * sizeof (uint32_t *)); + ctype->map_3level = (struct wctrans_table *) + xmalloc (ctype->map_collection_nr * sizeof (struct wctrans_table)); + + /* Fill in all mappings. */ + for (idx = 0; idx < 2; ++idx) + { + unsigned int idx2; + + /* Allocate table. */ + ctype->map_b[idx] = (uint32_t *) + xmalloc ((256 + 128) * sizeof (uint32_t)); + + /* Copy values from collection. */ + for (idx2 = 0; idx2 < 256; ++idx2) + ctype->map_b[idx][128 + idx2] = ctype->map256_collection[idx][idx2]; + + /* Mirror first 127 entries. We must take care not to map entry + -1 because EOF == -1. */ + for (idx2 = 0; idx2 < 127; ++idx2) + ctype->map_b[idx][idx2] = ctype->map_b[idx][256 + idx2]; + + /* EOF must map to EOF. */ + ctype->map_b[idx][127] = EOF; + } + + for (idx = 0; idx < ctype->map_collection_nr; ++idx) + { + unsigned int idx2; + + /* Allocate table. */ + ctype->map32_b[idx] = (uint32_t *) xmalloc (256 * sizeof (uint32_t)); + + /* Copy values from collection. Default is identity mapping. */ + for (idx2 = 0; idx2 < 256; ++idx2) + ctype->map32_b[idx][idx2] = + (ctype->map_collection[idx][idx2] != 0 + ? ctype->map_collection[idx][idx2] + : idx2); + } + + for (nr = 0; nr < ctype->map_collection_nr; nr++) + { + struct wctrans_table *t; + + t = &ctype->map_3level[nr]; + t->p = 7; + t->q = 9; + wctrans_table_init (t); + + for (idx = 0; idx < ctype->map_collection_act[nr]; ++idx) + if (ctype->map_collection[nr][idx] != 0) + wctrans_table_add (t, ctype->charnames[idx], + ctype->map_collection[nr][idx]); + + if (verbose) + WITH_CUR_LOCALE (fprintf (stderr, _("\ +%s: table for map \"%s\": %lu bytes\n"), + "LC_CTYPE", ctype->mapnames[nr], + (unsigned long int) t->result_size)); + } + + /* Extra array for class and map names. */ + ctype->class_name_ptr = (uint32_t *) xmalloc (ctype->nr_charclass + * sizeof (uint32_t)); + ctype->map_name_ptr = (uint32_t *) xmalloc (ctype->map_collection_nr + * sizeof (uint32_t)); + + ctype->class_offset = _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1); + ctype->map_offset = ctype->class_offset + ctype->nr_charclass; + + /* Array for width information. Because the expected widths are very + small (never larger than 2) we use only one single byte. This + saves space. + We put only printable characters in the table. wcwidth is specified + to return -1 for non-printable characters. Doing the check here + saves a run-time check. + But we put L'\0' in the table. This again saves a run-time check. */ + { + struct wcwidth_table *t; + + t = &ctype->width; + t->p = 7; + t->q = 9; + wcwidth_table_init (t); + + /* First set all the printable characters of the character set to + the default width. */ + curs = NULL; + while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0) + { + struct charseq *data = (struct charseq *) vdata; + + if (data->ucs4 == UNINITIALIZED_CHAR_VALUE) + data->ucs4 = repertoire_find_value (ctype->repertoire, + data->name, len); + + if (data->ucs4 != ILLEGAL_CHAR_VALUE) + { + uint32_t *class_bits = + find_idx (ctype, &ctype->class_collection, NULL, + &ctype->class_collection_act, data->ucs4); + + if (class_bits != NULL && (*class_bits & BITw (tok_print))) + wcwidth_table_add (t, data->ucs4, charmap->width_default); + } + } + + /* Now add the explicitly specified widths. */ + if (charmap->width_rules != NULL) + for (size_t cnt = 0; cnt < charmap->nwidth_rules; ++cnt) + { + unsigned char bytes[charmap->mb_cur_max]; + int nbytes = charmap->width_rules[cnt].from->nbytes; + + /* We have the range of character for which the width is + specified described using byte sequences of the multibyte + charset. We have to convert this to UCS4 now. And we + cannot simply convert the beginning and the end of the + sequence, we have to iterate over the byte sequence and + convert it for every single character. */ + memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes); + + while (nbytes < charmap->width_rules[cnt].to->nbytes + || memcmp (bytes, charmap->width_rules[cnt].to->bytes, + nbytes) <= 0) + { + /* Find the UCS value for `bytes'. */ + int inner; + uint32_t wch; + struct charseq *seq = + charmap_find_symbol (charmap, (char *) bytes, nbytes); + + if (seq == NULL) + wch = ILLEGAL_CHAR_VALUE; + else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE) + wch = seq->ucs4; + else + wch = repertoire_find_value (ctype->repertoire, seq->name, + strlen (seq->name)); + + if (wch != ILLEGAL_CHAR_VALUE) + { + /* Store the value. */ + uint32_t *class_bits = + find_idx (ctype, &ctype->class_collection, NULL, + &ctype->class_collection_act, wch); + + if (class_bits != NULL && (*class_bits & BITw (tok_print))) + wcwidth_table_add (t, wch, + charmap->width_rules[cnt].width); + } + + /* "Increment" the bytes sequence. */ + inner = nbytes - 1; + while (inner >= 0 && bytes[inner] == 0xff) + --inner; + + if (inner < 0) + { + /* We have to extend the byte sequence. */ + if (nbytes >= charmap->width_rules[cnt].to->nbytes) + break; + + bytes[0] = 1; + memset (&bytes[1], 0, nbytes); + ++nbytes; + } + else + { + ++bytes[inner]; + while (++inner < nbytes) + bytes[inner] = 0; + } + } + } + + /* Set the width of L'\0' to 0. */ + wcwidth_table_add (t, 0, 0); + + if (verbose) + WITH_CUR_LOCALE (fprintf (stderr, _("%s: table for width: %lu bytes\n"), + "LC_CTYPE", (unsigned long int) t->result_size)); + } + + /* Set MB_CUR_MAX. */ + ctype->mb_cur_max = charmap->mb_cur_max; + + /* Now determine the table for the transliteration information. + + XXX It is not yet clear to me whether it is worth implementing a + complicated algorithm which uses a hash table to locate the entries. + For now I'll use a simple array which can be searching using binary + search. */ + if (ctype->translit_include != NULL) + /* Traverse the locales mentioned in the `include' statements in a + depth-first way and fold in their transliteration information. */ + translit_flatten (ctype, charmap, &ctype->translit); + + if (ctype->translit != NULL) + { + /* First count how many entries we have. This is the upper limit + since some entries from the included files might be overwritten. */ + size_t number = 0; + struct translit_t *runp = ctype->translit; + struct translit_t **sorted; + size_t from_len, to_len; + + while (runp != NULL) + { + ++number; + runp = runp->next; + } + + /* Next we allocate an array large enough and fill in the values. */ + sorted = (struct translit_t **) alloca (number + * sizeof (struct translit_t **)); + runp = ctype->translit; + number = 0; + do + { + /* Search for the place where to insert this string. + XXX Better use a real sorting algorithm later. */ + size_t idx = 0; + int replace = 0; + + while (idx < number) + { + int res = wcscmp ((const wchar_t *) sorted[idx]->from, + (const wchar_t *) runp->from); + if (res == 0) + { + replace = 1; + break; + } + if (res > 0) + break; + ++idx; + } + + if (replace) + sorted[idx] = runp; + else + { + memmove (&sorted[idx + 1], &sorted[idx], + (number - idx) * sizeof (struct translit_t *)); + sorted[idx] = runp; + ++number; + } + + runp = runp->next; + } + while (runp != NULL); + + /* The next step is putting all the possible transliteration + strings in one memory block so that we can write it out. + We need several different blocks: + - index to the from-string array + - from-string array + - index to the to-string array + - to-string array. + */ + from_len = to_len = 0; + for (size_t cnt = 0; cnt < number; ++cnt) + { + struct translit_to_t *srunp; + from_len += wcslen ((const wchar_t *) sorted[cnt]->from) + 1; + srunp = sorted[cnt]->to; + while (srunp != NULL) + { + to_len += wcslen ((const wchar_t *) srunp->str) + 1; + srunp = srunp->next; + } + /* Plus one for the extra NUL character marking the end of + the list for the current entry. */ + ++to_len; + } + + /* We can allocate the arrays for the results. */ + ctype->translit_from_idx = xmalloc (number * sizeof (uint32_t)); + ctype->translit_from_tbl = xmalloc (from_len * sizeof (uint32_t)); + ctype->translit_to_idx = xmalloc (number * sizeof (uint32_t)); + ctype->translit_to_tbl = xmalloc (to_len * sizeof (uint32_t)); + + from_len = 0; + to_len = 0; + for (size_t cnt = 0; cnt < number; ++cnt) + { + size_t len; + struct translit_to_t *srunp; + + ctype->translit_from_idx[cnt] = from_len; + ctype->translit_to_idx[cnt] = to_len; + + len = wcslen ((const wchar_t *) sorted[cnt]->from) + 1; + wmemcpy ((wchar_t *) &ctype->translit_from_tbl[from_len], + (const wchar_t *) sorted[cnt]->from, len); + from_len += len; + + ctype->translit_to_idx[cnt] = to_len; + srunp = sorted[cnt]->to; + while (srunp != NULL) + { + len = wcslen ((const wchar_t *) srunp->str) + 1; + wmemcpy ((wchar_t *) &ctype->translit_to_tbl[to_len], + (const wchar_t *) srunp->str, len); + to_len += len; + srunp = srunp->next; + } + ctype->translit_to_tbl[to_len++] = L'\0'; + } + + /* Store the information about the length. */ + ctype->translit_idx_size = number; + ctype->translit_from_tbl_size = from_len * sizeof (uint32_t); + ctype->translit_to_tbl_size = to_len * sizeof (uint32_t); + } + else + { + ctype->translit_from_idx = no_str; + ctype->translit_from_tbl = no_str; + ctype->translit_to_tbl = no_str; + ctype->translit_idx_size = 0; + ctype->translit_from_tbl_size = 0; + ctype->translit_to_tbl_size = 0; + } +} diff --git a/REORG.TODO/locale/programs/ld-identification.c b/REORG.TODO/locale/programs/ld-identification.c new file mode 100644 index 0000000000..3e3ea649d7 --- /dev/null +++ b/REORG.TODO/locale/programs/ld-identification.c @@ -0,0 +1,416 @@ +/* Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <error.h> +#include <langinfo.h> +#include <stdlib.h> +#include <string.h> +#include <stdint.h> +#include <sys/uio.h> + +#include <assert.h> + +#include "localedef.h" +#include "localeinfo.h" +#include "locfile.h" + + +/* The real definition of the struct for the LC_IDENTIFICATION locale. */ +struct locale_identification_t +{ + const char *title; + const char *source; + const char *address; + const char *contact; + const char *email; + const char *tel; + const char *fax; + const char *language; + const char *territory; + const char *audience; + const char *application; + const char *abbreviation; + const char *revision; + const char *date; + const char *category[__LC_LAST]; +}; + + +static const char *category_name[__LC_LAST] = +{ + [LC_CTYPE] = "LC_CTYPE", + [LC_NUMERIC] = "LC_NUMERIC", + [LC_TIME] = "LC_TIME", + [LC_COLLATE] = "LC_COLLATE", + [LC_MONETARY] = "LC_MONETARY", + [LC_MESSAGES] = "LC_MESSAGES", + [LC_ALL] = "LC_ALL", + [LC_PAPER] = "LC_PAPER", + [LC_NAME] = "LC_NAME", + [LC_ADDRESS] = "LC_ADDRESS", + [LC_TELEPHONE] = "LC_TELEPHONE", + [LC_MEASUREMENT] = "LC_MEASUREMENT", + [LC_IDENTIFICATION] = "LC_IDENTIFICATION" +}; + + +static void +identification_startup (struct linereader *lr, struct localedef_t *locale, + int ignore_content) +{ + if (!ignore_content) + { + locale->categories[LC_IDENTIFICATION].identification = + (struct locale_identification_t *) + xcalloc (1, sizeof (struct locale_identification_t)); + + locale->categories[LC_IDENTIFICATION].identification->category[LC_ALL] = + ""; + } + + if (lr != NULL) + { + lr->translate_strings = 1; + lr->return_widestr = 0; + } +} + + +void +identification_finish (struct localedef_t *locale, + const struct charmap_t *charmap) +{ + struct locale_identification_t *identification + = locale->categories[LC_IDENTIFICATION].identification; + int nothing = 0; + size_t num; + + /* Now resolve copying and also handle completely missing definitions. */ + if (identification == NULL) + { + /* First see whether we were supposed to copy. If yes, find the + actual definition. */ + if (locale->copy_name[LC_IDENTIFICATION] != NULL) + { + /* Find the copying locale. This has to happen transitively since + the locale we are copying from might also copying another one. */ + struct localedef_t *from = locale; + + do + from = find_locale (LC_IDENTIFICATION, + from->copy_name[LC_IDENTIFICATION], + from->repertoire_name, charmap); + while (from->categories[LC_IDENTIFICATION].identification == NULL + && from->copy_name[LC_IDENTIFICATION] != NULL); + + identification = locale->categories[LC_IDENTIFICATION].identification + = from->categories[LC_IDENTIFICATION].identification; + } + + /* If there is still no definition issue an warning and create an + empty one. */ + if (identification == NULL) + { + if (! be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +No definition for %s category found"), "LC_IDENTIFICATION")); + identification_startup (NULL, locale, 0); + identification + = locale->categories[LC_IDENTIFICATION].identification; + nothing = 1; + } + } + +#define TEST_ELEM(cat) \ + if (identification->cat == NULL) \ + { \ + if (verbose && ! nothing) \ + WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"), \ + "LC_IDENTIFICATION", #cat)); \ + identification->cat = ""; \ + } + + TEST_ELEM (title); + TEST_ELEM (source); + TEST_ELEM (address); + TEST_ELEM (contact); + TEST_ELEM (email); + TEST_ELEM (tel); + TEST_ELEM (fax); + TEST_ELEM (language); + TEST_ELEM (territory); + TEST_ELEM (audience); + TEST_ELEM (application); + TEST_ELEM (abbreviation); + TEST_ELEM (revision); + TEST_ELEM (date); + + for (num = 0; num < __LC_LAST; ++num) + { + /* We don't accept/parse this category, so skip it early. */ + if (num == LC_ALL) + continue; + + if (identification->category[num] == NULL) + { + if (verbose && ! nothing) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: no identification for category `%s'"), + "LC_IDENTIFICATION", category_name[num])); + identification->category[num] = ""; + } + else + { + /* Only list the standards we care about. This is based on the + ISO 30112 WD10 [2014] standard which supersedes all previous + revisions of the ISO 14652 standard. */ + static const char * const standards[] = + { + "posix:1993", + "i18n:2004", + "i18n:2012", + }; + size_t i; + bool matched = false; + + for (i = 0; i < sizeof (standards) / sizeof (standards[0]); ++i) + if (strcmp (identification->category[num], standards[i]) == 0) + matched = true; + + if (matched != true) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: unknown standard `%s' for category `%s'"), + "LC_IDENTIFICATION", + identification->category[num], + category_name[num])); + } + } +} + + +void +identification_output (struct localedef_t *locale, + const struct charmap_t *charmap, + const char *output_path) +{ + struct locale_identification_t *identification + = locale->categories[LC_IDENTIFICATION].identification; + struct locale_file file; + size_t num; + + init_locale_data (&file, _NL_ITEM_INDEX (_NL_NUM_LC_IDENTIFICATION)); + add_locale_string (&file, identification->title); + add_locale_string (&file, identification->source); + add_locale_string (&file, identification->address); + add_locale_string (&file, identification->contact); + add_locale_string (&file, identification->email); + add_locale_string (&file, identification->tel); + add_locale_string (&file, identification->fax); + add_locale_string (&file, identification->language); + add_locale_string (&file, identification->territory); + add_locale_string (&file, identification->audience); + add_locale_string (&file, identification->application); + add_locale_string (&file, identification->abbreviation); + add_locale_string (&file, identification->revision); + add_locale_string (&file, identification->date); + start_locale_structure (&file); + for (num = 0; num < __LC_LAST; ++num) + if (num != LC_ALL) + add_locale_string (&file, identification->category[num]); + end_locale_structure (&file); + add_locale_string (&file, charmap->code_set_name); + write_locale_data (output_path, LC_IDENTIFICATION, "LC_IDENTIFICATION", + &file); +} + + +/* The parser for the LC_IDENTIFICATION section of the locale definition. */ +void +identification_read (struct linereader *ldfile, struct localedef_t *result, + const struct charmap_t *charmap, const char *repertoire_name, + int ignore_content) +{ + struct locale_identification_t *identification; + struct token *now; + struct token *arg; + struct token *cattok; + int category; + enum token_t nowtok; + + /* The rest of the line containing `LC_IDENTIFICATION' must be free. */ + lr_ignore_rest (ldfile, 1); + + do + { + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + } + while (nowtok == tok_eol); + + /* If we see `copy' now we are almost done. */ + if (nowtok == tok_copy) + { + handle_copy (ldfile, charmap, repertoire_name, result, + tok_lc_identification, LC_IDENTIFICATION, + "LC_IDENTIFICATION", ignore_content); + return; + } + + /* Prepare the data structures. */ + identification_startup (ldfile, result, ignore_content); + identification = result->categories[LC_IDENTIFICATION].identification; + + while (1) + { + /* Of course we don't proceed beyond the end of file. */ + if (nowtok == tok_eof) + break; + + /* Ignore empty lines. */ + if (nowtok == tok_eol) + { + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + continue; + } + + switch (nowtok) + { +#define STR_ELEM(cat) \ + case tok_##cat: \ + /* Ignore the rest of the line if we don't need the input of \ + this line. */ \ + if (ignore_content) \ + { \ + lr_ignore_rest (ldfile, 0); \ + break; \ + } \ + \ + arg = lr_token (ldfile, charmap, result, NULL, verbose); \ + if (arg->tok != tok_string) \ + goto err_label; \ + if (identification->cat != NULL) \ + lr_error (ldfile, _("\ +%s: field `%s' declared more than once"), "LC_IDENTIFICATION", #cat); \ + else if (!ignore_content && arg->val.str.startmb == NULL) \ + { \ + lr_error (ldfile, _("\ +%s: unknown character in field `%s'"), "LC_IDENTIFICATION", #cat); \ + identification->cat = ""; \ + } \ + else if (!ignore_content) \ + identification->cat = arg->val.str.startmb; \ + break + + STR_ELEM (title); + STR_ELEM (source); + STR_ELEM (address); + STR_ELEM (contact); + STR_ELEM (email); + STR_ELEM (tel); + STR_ELEM (fax); + STR_ELEM (language); + STR_ELEM (territory); + STR_ELEM (audience); + STR_ELEM (application); + STR_ELEM (abbreviation); + STR_ELEM (revision); + STR_ELEM (date); + + case tok_category: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + /* We expect two operands. */ + arg = lr_token (ldfile, charmap, result, NULL, verbose); + if (arg->tok != tok_string && arg->tok != tok_ident) + goto err_label; + /* Next is a semicolon. */ + cattok = lr_token (ldfile, charmap, result, NULL, verbose); + if (cattok->tok != tok_semicolon) + goto err_label; + /* Now a LC_xxx identifier. */ + cattok = lr_token (ldfile, charmap, result, NULL, verbose); + switch (cattok->tok) + { +#define CATEGORY(lname, uname) \ + case tok_lc_##lname: \ + category = LC_##uname; \ + break + + CATEGORY (identification, IDENTIFICATION); + CATEGORY (ctype, CTYPE); + CATEGORY (collate, COLLATE); + CATEGORY (time, TIME); + CATEGORY (numeric, NUMERIC); + CATEGORY (monetary, MONETARY); + CATEGORY (messages, MESSAGES); + CATEGORY (paper, PAPER); + CATEGORY (name, NAME); + CATEGORY (address, ADDRESS); + CATEGORY (telephone, TELEPHONE); + CATEGORY (measurement, MEASUREMENT); + + default: + goto err_label; + } + if (identification->category[category] != NULL) + { + lr_error (ldfile, _("\ +%s: duplicate category version definition"), "LC_IDENTIFICATION"); + free (arg->val.str.startmb); + } + else + identification->category[category] = arg->val.str.startmb; + break; + + case tok_end: + /* Next we assume `LC_IDENTIFICATION'. */ + arg = lr_token (ldfile, charmap, result, NULL, verbose); + if (arg->tok == tok_eof) + break; + if (arg->tok == tok_eol) + lr_error (ldfile, _("%s: incomplete `END' line"), + "LC_IDENTIFICATION"); + else if (arg->tok != tok_lc_identification) + lr_error (ldfile, _("\ +%1$s: definition does not end with `END %1$s'"), "LC_IDENTIFICATION"); + lr_ignore_rest (ldfile, arg->tok == tok_lc_identification); + return; + + default: + err_label: + SYNTAX_ERROR (_("%s: syntax error"), "LC_IDENTIFICATION"); + } + + /* Prepare for the next round. */ + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + } + + /* When we come here we reached the end of the file. */ + lr_error (ldfile, _("%s: premature end of file"), "LC_IDENTIFICATION"); +} diff --git a/REORG.TODO/locale/programs/ld-measurement.c b/REORG.TODO/locale/programs/ld-measurement.c new file mode 100644 index 0000000000..92c849ebfb --- /dev/null +++ b/REORG.TODO/locale/programs/ld-measurement.c @@ -0,0 +1,233 @@ +/* Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <error.h> +#include <langinfo.h> +#include <string.h> +#include <stdint.h> +#include <sys/uio.h> + +#include <assert.h> + +#include "localedef.h" +#include "localeinfo.h" +#include "locfile.h" + + +/* The real definition of the struct for the LC_MEASUREMENT locale. */ +struct locale_measurement_t +{ + unsigned char measurement; +}; + + +static void +measurement_startup (struct linereader *lr, struct localedef_t *locale, + int ignore_content) +{ + if (!ignore_content) + locale->categories[LC_MEASUREMENT].measurement = + (struct locale_measurement_t *) + xcalloc (1, sizeof (struct locale_measurement_t)); + + if (lr != NULL) + { + lr->translate_strings = 1; + lr->return_widestr = 0; + } +} + + +void +measurement_finish (struct localedef_t *locale, + const struct charmap_t *charmap) +{ + struct locale_measurement_t *measurement = + locale->categories[LC_MEASUREMENT].measurement; + int nothing = 0; + + /* Now resolve copying and also handle completely missing definitions. */ + if (measurement == NULL) + { + /* First see whether we were supposed to copy. If yes, find the + actual definition. */ + if (locale->copy_name[LC_MEASUREMENT] != NULL) + { + /* Find the copying locale. This has to happen transitively since + the locale we are copying from might also copying another one. */ + struct localedef_t *from = locale; + + do + from = find_locale (LC_MEASUREMENT, + from->copy_name[LC_MEASUREMENT], + from->repertoire_name, charmap); + while (from->categories[LC_MEASUREMENT].measurement == NULL + && from->copy_name[LC_MEASUREMENT] != NULL); + + measurement = locale->categories[LC_MEASUREMENT].measurement + = from->categories[LC_MEASUREMENT].measurement; + } + + /* If there is still no definition issue an warning and create an + empty one. */ + if (measurement == NULL) + { + if (! be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +No definition for %s category found"), "LC_MEASUREMENT")); + measurement_startup (NULL, locale, 0); + measurement = locale->categories[LC_MEASUREMENT].measurement; + nothing = 1; + } + } + + if (measurement->measurement == 0) + { + if (! nothing) + WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"), + "LC_MEASUREMENT", "measurement")); + /* Use as the default value the value of the i18n locale. */ + measurement->measurement = 1; + } + else + { + if (measurement->measurement > 3) + WITH_CUR_LOCALE (error (0, 0, _("%s: invalid value for field `%s'"), + "LC_MEASUREMENT", "measurement")); + } +} + + +void +measurement_output (struct localedef_t *locale, + const struct charmap_t *charmap, const char *output_path) +{ + struct locale_measurement_t *measurement = + locale->categories[LC_MEASUREMENT].measurement; + struct locale_file file; + + init_locale_data (&file, _NL_ITEM_INDEX (_NL_NUM_LC_MEASUREMENT)); + add_locale_char (&file, measurement->measurement); + add_locale_string (&file, charmap->code_set_name); + write_locale_data (output_path, LC_MEASUREMENT, "LC_MEASUREMENT", &file); +} + + +/* The parser for the LC_MEASUREMENT section of the locale definition. */ +void +measurement_read (struct linereader *ldfile, struct localedef_t *result, + const struct charmap_t *charmap, const char *repertoire_name, + int ignore_content) +{ + struct locale_measurement_t *measurement; + struct token *now; + struct token *arg; + enum token_t nowtok; + + /* The rest of the line containing `LC_MEASUREMENT' must be free. */ + lr_ignore_rest (ldfile, 1); + + do + { + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + } + while (nowtok == tok_eol); + + /* If we see `copy' now we are almost done. */ + if (nowtok == tok_copy) + { + handle_copy (ldfile, charmap, repertoire_name, result, + tok_lc_measurement, LC_MEASUREMENT, "LC_MEASUREMENT", + ignore_content); + return; + } + + /* Prepare the data structures. */ + measurement_startup (ldfile, result, ignore_content); + measurement = result->categories[LC_MEASUREMENT].measurement; + + while (1) + { + /* Of course we don't proceed beyond the end of file. */ + if (nowtok == tok_eof) + break; + + /* Ingore empty lines. */ + if (nowtok == tok_eol) + { + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + continue; + } + + switch (nowtok) + { +#define INT_ELEM(cat) \ + case tok_##cat: \ + /* Ignore the rest of the line if we don't need the input of \ + this line. */ \ + if (ignore_content) \ + { \ + lr_ignore_rest (ldfile, 0); \ + break; \ + } \ + \ + arg = lr_token (ldfile, charmap, result, NULL, verbose); \ + if (arg->tok != tok_number) \ + goto err_label; \ + else if (measurement->cat != 0) \ + lr_error (ldfile, _("%s: field `%s' declared more than once"), \ + "LC_MEASUREMENT", #cat); \ + else if (!ignore_content) \ + measurement->cat = arg->val.num; \ + break + + INT_ELEM (measurement); + + case tok_end: + /* Next we assume `LC_MEASUREMENT'. */ + arg = lr_token (ldfile, charmap, result, NULL, verbose); + if (arg->tok == tok_eof) + break; + if (arg->tok == tok_eol) + lr_error (ldfile, _("%s: incomplete `END' line"), + "LC_MEASUREMENT"); + else if (arg->tok != tok_lc_measurement) + lr_error (ldfile, _("\ +%1$s: definition does not end with `END %1$s'"), "LC_MEASUREMENT"); + lr_ignore_rest (ldfile, arg->tok == tok_lc_measurement); + return; + + default: + err_label: + SYNTAX_ERROR (_("%s: syntax error"), "LC_MEASUREMENT"); + } + + /* Prepare for the next round. */ + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + } + + /* When we come here we reached the end of the file. */ + lr_error (ldfile, _("%s: premature end of file"), + "LC_MEASUREMENT"); +} diff --git a/REORG.TODO/locale/programs/ld-messages.c b/REORG.TODO/locale/programs/ld-messages.c new file mode 100644 index 0000000000..bc86ec0ccf --- /dev/null +++ b/REORG.TODO/locale/programs/ld-messages.c @@ -0,0 +1,315 @@ +/* Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@gnu.org>, 1995. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <langinfo.h> +#include <sys/types.h> +#include <regex.h> +#include <string.h> +#include <stdint.h> +#include <sys/uio.h> + +#include <assert.h> + +#include "localedef.h" +#include "linereader.h" +#include "localeinfo.h" +#include "locfile.h" + + +/* The real definition of the struct for the LC_MESSAGES locale. */ +struct locale_messages_t +{ + const char *yesexpr; + const char *noexpr; + const char *yesstr; + const char *nostr; +}; + + +static void +messages_startup (struct linereader *lr, struct localedef_t *locale, + int ignore_content) +{ + if (!ignore_content) + locale->categories[LC_MESSAGES].messages = + (struct locale_messages_t *) xcalloc (1, + sizeof (struct locale_messages_t)); + + if (lr != NULL) + { + lr->translate_strings = 1; + lr->return_widestr = 0; + } +} + + +void +messages_finish (struct localedef_t *locale, const struct charmap_t *charmap) +{ + struct locale_messages_t *messages + = locale->categories[LC_MESSAGES].messages; + int nothing = 0; + + /* Now resolve copying and also handle completely missing definitions. */ + if (messages == NULL) + { + /* First see whether we were supposed to copy. If yes, find the + actual definition. */ + if (locale->copy_name[LC_MESSAGES] != NULL) + { + /* Find the copying locale. This has to happen transitively since + the locale we are copying from might also copying another one. */ + struct localedef_t *from = locale; + + do + from = find_locale (LC_MESSAGES, from->copy_name[LC_MESSAGES], + from->repertoire_name, charmap); + while (from->categories[LC_MESSAGES].messages == NULL + && from->copy_name[LC_MESSAGES] != NULL); + + messages = locale->categories[LC_MESSAGES].messages + = from->categories[LC_MESSAGES].messages; + } + + /* If there is still no definition issue an warning and create an + empty one. */ + if (messages == NULL) + { + if (! be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +No definition for %s category found"), "LC_MESSAGES")); + messages_startup (NULL, locale, 0); + messages = locale->categories[LC_MESSAGES].messages; + nothing = 1; + } + } + + /* The fields YESSTR and NOSTR are optional. */ + if (messages->yesstr == NULL) + messages->yesstr = ""; + if (messages->nostr == NULL) + messages->nostr = ""; + + if (messages->yesexpr == NULL) + { + if (! be_quiet && ! nothing) + WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' undefined"), + "LC_MESSAGES", "yesexpr")); + messages->yesexpr = "^[yY]"; + } + else if (messages->yesexpr[0] == '\0') + { + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: value for field `%s' must not be an empty string"), + "LC_MESSAGES", "yesexpr")); + } + else + { + int result; + regex_t re; + + /* Test whether it are correct regular expressions. */ + result = regcomp (&re, messages->yesexpr, REG_EXTENDED); + if (result != 0 && !be_quiet) + { + char errbuf[BUFSIZ]; + + (void) regerror (result, &re, errbuf, BUFSIZ); + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: no correct regular expression for field `%s': %s"), + "LC_MESSAGES", "yesexpr", errbuf)); + } + else if (result != 0) + regfree (&re); + } + + if (messages->noexpr == NULL) + { + if (! be_quiet && ! nothing) + WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' undefined"), + "LC_MESSAGES", "noexpr")); + messages->noexpr = "^[nN]"; + } + else if (messages->noexpr[0] == '\0') + { + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: value for field `%s' must not be an empty string"), + "LC_MESSAGES", "noexpr")); + } + else + { + int result; + regex_t re; + + /* Test whether it are correct regular expressions. */ + result = regcomp (&re, messages->noexpr, REG_EXTENDED); + if (result != 0 && !be_quiet) + { + char errbuf[BUFSIZ]; + + (void) regerror (result, &re, errbuf, BUFSIZ); + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: no correct regular expression for field `%s': %s"), + "LC_MESSAGES", "noexpr", errbuf)); + } + else if (result != 0) + regfree (&re); + } +} + + +void +messages_output (struct localedef_t *locale, const struct charmap_t *charmap, + const char *output_path) +{ + struct locale_messages_t *messages + = locale->categories[LC_MESSAGES].messages; + struct locale_file file; + + init_locale_data (&file, _NL_ITEM_INDEX (_NL_NUM_LC_MESSAGES)); + add_locale_string (&file, messages->yesexpr); + add_locale_string (&file, messages->noexpr); + add_locale_string (&file, messages->yesstr); + add_locale_string (&file, messages->nostr); + add_locale_string (&file, charmap->code_set_name); + write_locale_data (output_path, LC_MESSAGES, "LC_MESSAGES", &file); +} + + +/* The parser for the LC_MESSAGES section of the locale definition. */ +void +messages_read (struct linereader *ldfile, struct localedef_t *result, + const struct charmap_t *charmap, const char *repertoire_name, + int ignore_content) +{ + struct repertoire_t *repertoire = NULL; + struct locale_messages_t *messages; + struct token *now; + enum token_t nowtok; + + /* Get the repertoire we have to use. */ + if (repertoire_name != NULL) + repertoire = repertoire_read (repertoire_name); + + /* The rest of the line containing `LC_MESSAGES' must be free. */ + lr_ignore_rest (ldfile, 1); + + + do + { + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + } + while (nowtok == tok_eol); + + /* If we see `copy' now we are almost done. */ + if (nowtok == tok_copy) + { + handle_copy (ldfile, charmap, repertoire_name, result, tok_lc_messages, + LC_MESSAGES, "LC_MESSAGES", ignore_content); + return; + } + + /* Prepare the data structures. */ + messages_startup (ldfile, result, ignore_content); + messages = result->categories[LC_MESSAGES].messages; + + while (1) + { + struct token *arg; + + /* Of course we don't proceed beyond the end of file. */ + if (nowtok == tok_eof) + break; + + /* Ignore empty lines. */ + if (nowtok == tok_eol) + { + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + continue; + } + + switch (nowtok) + { +#define STR_ELEM(cat) \ + case tok_##cat: \ + /* Ignore the rest of the line if we don't need the input of \ + this line. */ \ + if (ignore_content) \ + { \ + lr_ignore_rest (ldfile, 0); \ + break; \ + } \ + \ + if (messages->cat != NULL) \ + { \ + lr_error (ldfile, _("\ +%s: field `%s' declared more than once"), "LC_MESSAGES", #cat); \ + lr_ignore_rest (ldfile, 0); \ + break; \ + } \ + now = lr_token (ldfile, charmap, result, repertoire, verbose); \ + if (now->tok != tok_string) \ + goto syntax_error; \ + else if (!ignore_content && now->val.str.startmb == NULL) \ + { \ + lr_error (ldfile, _("\ +%s: unknown character in field `%s'"), "LC_MESSAGES", #cat); \ + messages->cat = ""; \ + } \ + else if (!ignore_content) \ + messages->cat = now->val.str.startmb; \ + break + + STR_ELEM (yesexpr); + STR_ELEM (noexpr); + STR_ELEM (yesstr); + STR_ELEM (nostr); + + case tok_end: + /* Next we assume `LC_MESSAGES'. */ + arg = lr_token (ldfile, charmap, result, NULL, verbose); + if (arg->tok == tok_eof) + break; + if (arg->tok == tok_eol) + lr_error (ldfile, _("%s: incomplete `END' line"), "LC_MESSAGES"); + else if (arg->tok != tok_lc_messages) + lr_error (ldfile, _("\ +%1$s: definition does not end with `END %1$s'"), "LC_MESSAGES"); + lr_ignore_rest (ldfile, arg->tok == tok_lc_messages); + return; + + default: + syntax_error: + SYNTAX_ERROR (_("%s: syntax error"), "LC_MESSAGES"); + } + + /* Prepare for the next round. */ + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + } + + /* When we come here we reached the end of the file. */ + lr_error (ldfile, _("%s: premature end of file"), "LC_MESSAGES"); +} diff --git a/REORG.TODO/locale/programs/ld-monetary.c b/REORG.TODO/locale/programs/ld-monetary.c new file mode 100644 index 0000000000..cd50541603 --- /dev/null +++ b/REORG.TODO/locale/programs/ld-monetary.c @@ -0,0 +1,757 @@ +/* Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@gnu.org>, 1995. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <byteswap.h> +#include <langinfo.h> +#include <limits.h> +#include <stdlib.h> +#include <string.h> +#include <stdint.h> +#include <sys/uio.h> + +#include <assert.h> + +#include "localedef.h" +#include "linereader.h" +#include "localeinfo.h" +#include "locfile.h" + + +/* The real definition of the struct for the LC_MONETARY locale. */ +struct locale_monetary_t +{ + const char *int_curr_symbol; + const char *currency_symbol; + const char *mon_decimal_point; + const char *mon_thousands_sep; + uint32_t mon_decimal_point_wc; + uint32_t mon_thousands_sep_wc; + char *mon_grouping; + size_t mon_grouping_len; + const char *positive_sign; + const char *negative_sign; + signed char int_frac_digits; + signed char frac_digits; + signed char p_cs_precedes; + signed char p_sep_by_space; + signed char n_cs_precedes; + signed char n_sep_by_space; + signed char p_sign_posn; + signed char n_sign_posn; + signed char int_p_cs_precedes; + signed char int_p_sep_by_space; + signed char int_n_cs_precedes; + signed char int_n_sep_by_space; + signed char int_p_sign_posn; + signed char int_n_sign_posn; + const char *duo_int_curr_symbol; + const char *duo_currency_symbol; + signed char duo_int_frac_digits; + signed char duo_frac_digits; + signed char duo_p_cs_precedes; + signed char duo_p_sep_by_space; + signed char duo_n_cs_precedes; + signed char duo_n_sep_by_space; + signed char duo_p_sign_posn; + signed char duo_n_sign_posn; + signed char duo_int_p_cs_precedes; + signed char duo_int_p_sep_by_space; + signed char duo_int_n_cs_precedes; + signed char duo_int_n_sep_by_space; + signed char duo_int_p_sign_posn; + signed char duo_int_n_sign_posn; + uint32_t uno_valid_from; + uint32_t uno_valid_to; + uint32_t duo_valid_from; + uint32_t duo_valid_to; + uint32_t conversion_rate[2]; + char *crncystr; +}; + + +/* The content iof the field int_curr_symbol has to be taken from + ISO-4217. We test for correct values. */ +#define DEFINE_INT_CURR(str) str, +static const char *const valid_int_curr[] = + { +# include "../iso-4217.def" + }; +#define NR_VALID_INT_CURR ((sizeof (valid_int_curr) \ + / sizeof (valid_int_curr[0]))) +#undef DEFINE_INT_CURR + + +/* Prototypes for local functions. */ +static int curr_strcmp (const char *s1, const char **s2); + + +static void +monetary_startup (struct linereader *lr, struct localedef_t *locale, + int ignore_content) +{ + if (!ignore_content) + { + struct locale_monetary_t *monetary; + + locale->categories[LC_MONETARY].monetary = monetary = + (struct locale_monetary_t *) xmalloc (sizeof (*monetary)); + + memset (monetary, '\0', sizeof (struct locale_monetary_t)); + + monetary->mon_grouping = NULL; + monetary->mon_grouping_len = 0; + + monetary->int_frac_digits = -2; + monetary->frac_digits = -2; + monetary->p_cs_precedes = -2; + monetary->p_sep_by_space = -2; + monetary->n_cs_precedes = -2; + monetary->n_sep_by_space = -2; + monetary->p_sign_posn = -2; + monetary->n_sign_posn = -2; + monetary->int_p_cs_precedes = -2; + monetary->int_p_sep_by_space = -2; + monetary->int_n_cs_precedes = -2; + monetary->int_n_sep_by_space = -2; + monetary->int_p_sign_posn = -2; + monetary->int_n_sign_posn = -2; + monetary->duo_int_frac_digits = -2; + monetary->duo_frac_digits = -2; + monetary->duo_p_cs_precedes = -2; + monetary->duo_p_sep_by_space = -2; + monetary->duo_n_cs_precedes = -2; + monetary->duo_n_sep_by_space = -2; + monetary->duo_p_sign_posn = -2; + monetary->duo_n_sign_posn = -2; + monetary->duo_int_p_cs_precedes = -2; + monetary->duo_int_p_sep_by_space = -2; + monetary->duo_int_n_cs_precedes = -2; + monetary->duo_int_n_sep_by_space = -2; + monetary->duo_int_p_sign_posn = -2; + monetary->duo_int_n_sign_posn = -2; + } + + if (lr != NULL) + { + lr->translate_strings = 1; + lr->return_widestr = 0; + } +} + + +void +monetary_finish (struct localedef_t *locale, const struct charmap_t *charmap) +{ + struct locale_monetary_t *monetary + = locale->categories[LC_MONETARY].monetary; + int nothing = 0; + + /* Now resolve copying and also handle completely missing definitions. */ + if (monetary == NULL) + { + /* First see whether we were supposed to copy. If yes, find the + actual definition. */ + if (locale->copy_name[LC_MONETARY] != NULL) + { + /* Find the copying locale. This has to happen transitively since + the locale we are copying from might also copying another one. */ + struct localedef_t *from = locale; + + do + from = find_locale (LC_MONETARY, from->copy_name[LC_MONETARY], + from->repertoire_name, charmap); + while (from->categories[LC_MONETARY].monetary == NULL + && from->copy_name[LC_MONETARY] != NULL); + + monetary = locale->categories[LC_MONETARY].monetary + = from->categories[LC_MONETARY].monetary; + } + + /* If there is still no definition issue an warning and create an + empty one. */ + if (monetary == NULL) + { + if (! be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +No definition for %s category found"), "LC_MONETARY")); + monetary_startup (NULL, locale, 0); + monetary = locale->categories[LC_MONETARY].monetary; + nothing = 1; + } + } + +#define TEST_ELEM(cat, initval) \ + if (monetary->cat == NULL) \ + { \ + if (! be_quiet && ! nothing) \ + WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"), \ + "LC_MONETARY", #cat)); \ + monetary->cat = initval; \ + } + + TEST_ELEM (int_curr_symbol, ""); + TEST_ELEM (currency_symbol, ""); + TEST_ELEM (mon_decimal_point, "."); + TEST_ELEM (mon_thousands_sep, ""); + TEST_ELEM (positive_sign, ""); + TEST_ELEM (negative_sign, ""); + + /* The international currency symbol must come from ISO 4217. */ + if (monetary->int_curr_symbol != NULL) + { + if (strlen (monetary->int_curr_symbol) != 4) + { + if (! be_quiet && ! nothing) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: value of field `int_curr_symbol' has wrong length"), + "LC_MONETARY")); + } + else + { /* Check the first three characters against ISO 4217 */ + char symbol[4]; + strncpy (symbol, monetary->int_curr_symbol, 3); + symbol[3] = '\0'; + if (bsearch (symbol, valid_int_curr, NR_VALID_INT_CURR, + sizeof (const char *), + (comparison_fn_t) curr_strcmp) == NULL + && !be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: value of field `int_curr_symbol' does \ +not correspond to a valid name in ISO 4217"), + "LC_MONETARY")); + } + } + + /* The decimal point must not be empty. This is not said explicitly + in POSIX but ANSI C (ISO/IEC 9899) says in 4.4.2.1 it has to be + != "". */ + if (monetary->mon_decimal_point == NULL) + { + if (! be_quiet && ! nothing) + WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"), + "LC_MONETARY", "mon_decimal_point")); + monetary->mon_decimal_point = "."; + } + else if (monetary->mon_decimal_point[0] == '\0' && ! be_quiet && ! nothing) + { + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: value for field `%s' must not be an empty string"), + "LC_MONETARY", "mon_decimal_point")); + } + if (monetary->mon_decimal_point_wc == L'\0') + monetary->mon_decimal_point_wc = L'.'; + + if (monetary->mon_grouping_len == 0) + { + if (! be_quiet && ! nothing) + WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"), + "LC_MONETARY", "mon_grouping")); + + monetary->mon_grouping = (char *) "\177"; + monetary->mon_grouping_len = 1; + } + +#undef TEST_ELEM +#define TEST_ELEM(cat, min, max, initval) \ + if (monetary->cat == -2) \ + { \ + if (! be_quiet && ! nothing) \ + WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"), \ + "LC_MONETARY", #cat)); \ + monetary->cat = initval; \ + } \ + else if ((monetary->cat < min || monetary->cat > max) \ + && min < max \ + && !be_quiet && !nothing) \ + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: value for field `%s' must be in range %d...%d"), \ + "LC_MONETARY", #cat, min, max)) + + TEST_ELEM (int_frac_digits, 1, 0, -1); + TEST_ELEM (frac_digits, 1, 0, -1); + TEST_ELEM (p_cs_precedes, -1, 1, -1); + TEST_ELEM (p_sep_by_space, -1, 2, -1); + TEST_ELEM (n_cs_precedes, -1, 1, -1); + TEST_ELEM (n_sep_by_space, -1, 2, -1); + TEST_ELEM (p_sign_posn, -1, 4, -1); + TEST_ELEM (n_sign_posn, -1, 4, -1); + + /* The non-POSIX.2 extensions are optional. */ + if (monetary->duo_int_curr_symbol == NULL) + monetary->duo_int_curr_symbol = monetary->int_curr_symbol; + if (monetary->duo_currency_symbol == NULL) + monetary->duo_currency_symbol = monetary->currency_symbol; + + if (monetary->duo_int_frac_digits == -2) + monetary->duo_int_frac_digits = monetary->int_frac_digits; + if (monetary->duo_frac_digits == -2) + monetary->duo_frac_digits = monetary->frac_digits; + +#undef TEST_ELEM +#define TEST_ELEM(cat, alt, min, max) \ + if (monetary->cat == -2) \ + monetary->cat = monetary->alt; \ + else if ((monetary->cat < min || monetary->cat > max) && !be_quiet \ + && ! nothing) \ + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: value for field `%s' must be in range %d...%d"), \ + "LC_MONETARY", #cat, min, max)) + + TEST_ELEM (int_p_cs_precedes, p_cs_precedes, -1, 1); + TEST_ELEM (int_p_sep_by_space, p_sep_by_space, -1, 2); + TEST_ELEM (int_n_cs_precedes, n_cs_precedes, -1, 1); + TEST_ELEM (int_n_sep_by_space, n_sep_by_space, -1, 2); + TEST_ELEM (int_p_sign_posn, p_sign_posn, -1, 4); + TEST_ELEM (int_n_sign_posn, n_sign_posn, -1, 4); + + TEST_ELEM (duo_p_cs_precedes, p_cs_precedes, -1, 1); + TEST_ELEM (duo_p_sep_by_space, p_sep_by_space, -1, 2); + TEST_ELEM (duo_n_cs_precedes, n_cs_precedes, -1, 1); + TEST_ELEM (duo_n_sep_by_space, n_sep_by_space, -1, 2); + TEST_ELEM (duo_int_p_cs_precedes, int_p_cs_precedes, -1, 1); + TEST_ELEM (duo_int_p_sep_by_space, int_p_sep_by_space, -1, 2); + TEST_ELEM (duo_int_n_cs_precedes, int_n_cs_precedes, -1, 1); + TEST_ELEM (duo_int_n_sep_by_space, int_n_sep_by_space, -1, 2); + TEST_ELEM (duo_p_sign_posn, p_sign_posn, -1, 4); + TEST_ELEM (duo_n_sign_posn, n_sign_posn, -1, 4); + TEST_ELEM (duo_int_p_sign_posn, int_p_sign_posn, -1, 4); + TEST_ELEM (duo_int_n_sign_posn, int_n_sign_posn, -1, 4); + + if (monetary->uno_valid_from == 0) + monetary->uno_valid_from = 10101; + if (monetary->uno_valid_to == 0) + monetary->uno_valid_to = 99991231; + if (monetary->duo_valid_from == 0) + monetary->duo_valid_from = 10101; + if (monetary->duo_valid_to == 0) + monetary->duo_valid_to = 99991231; + + if (monetary->conversion_rate[0] == 0) + { + monetary->conversion_rate[0] = 1; + monetary->conversion_rate[1] = 1; + } + + /* Create the crncystr entry. */ + monetary->crncystr = (char *) xmalloc (strlen (monetary->currency_symbol) + + 2); + monetary->crncystr[0] = monetary->p_cs_precedes ? '-' : '+'; + strcpy (&monetary->crncystr[1], monetary->currency_symbol); +} + + +void +monetary_output (struct localedef_t *locale, const struct charmap_t *charmap, + const char *output_path) +{ + struct locale_monetary_t *monetary + = locale->categories[LC_MONETARY].monetary; + struct locale_file file; + + init_locale_data (&file, _NL_ITEM_INDEX (_NL_NUM_LC_MONETARY)); + add_locale_string (&file, monetary->int_curr_symbol); + add_locale_string (&file, monetary->currency_symbol); + add_locale_string (&file, monetary->mon_decimal_point); + add_locale_string (&file, monetary->mon_thousands_sep); + add_locale_raw_data (&file, monetary->mon_grouping, + monetary->mon_grouping_len); + add_locale_string (&file, monetary->positive_sign); + add_locale_string (&file, monetary->negative_sign); + add_locale_char (&file, monetary->int_frac_digits); + add_locale_char (&file, monetary->frac_digits); + add_locale_char (&file, monetary->p_cs_precedes); + add_locale_char (&file, monetary->p_sep_by_space); + add_locale_char (&file, monetary->n_cs_precedes); + add_locale_char (&file, monetary->n_sep_by_space); + add_locale_char (&file, monetary->p_sign_posn); + add_locale_char (&file, monetary->n_sign_posn); + add_locale_string (&file, monetary->crncystr); + add_locale_char (&file, monetary->int_p_cs_precedes); + add_locale_char (&file, monetary->int_p_sep_by_space); + add_locale_char (&file, monetary->int_n_cs_precedes); + add_locale_char (&file, monetary->int_n_sep_by_space); + add_locale_char (&file, monetary->int_p_sign_posn); + add_locale_char (&file, monetary->int_n_sign_posn); + add_locale_string (&file, monetary->duo_int_curr_symbol); + add_locale_string (&file, monetary->duo_currency_symbol); + add_locale_char (&file, monetary->duo_int_frac_digits); + add_locale_char (&file, monetary->duo_frac_digits); + add_locale_char (&file, monetary->duo_p_cs_precedes); + add_locale_char (&file, monetary->duo_p_sep_by_space); + add_locale_char (&file, monetary->duo_n_cs_precedes); + add_locale_char (&file, monetary->duo_n_sep_by_space); + add_locale_char (&file, monetary->duo_int_p_cs_precedes); + add_locale_char (&file, monetary->duo_int_p_sep_by_space); + add_locale_char (&file, monetary->duo_int_n_cs_precedes); + add_locale_char (&file, monetary->duo_int_n_sep_by_space); + add_locale_char (&file, monetary->duo_p_sign_posn); + add_locale_char (&file, monetary->duo_n_sign_posn); + add_locale_char (&file, monetary->duo_int_p_sign_posn); + add_locale_char (&file, monetary->duo_int_n_sign_posn); + add_locale_uint32 (&file, monetary->uno_valid_from); + add_locale_uint32 (&file, monetary->uno_valid_to); + add_locale_uint32 (&file, monetary->duo_valid_from); + add_locale_uint32 (&file, monetary->duo_valid_to); + add_locale_uint32_array (&file, monetary->conversion_rate, 2); + add_locale_uint32 (&file, monetary->mon_decimal_point_wc); + add_locale_uint32 (&file, monetary->mon_thousands_sep_wc); + add_locale_string (&file, charmap->code_set_name); + write_locale_data (output_path, LC_MONETARY, "LC_MONETARY", &file); +} + + +static int +curr_strcmp (const char *s1, const char **s2) +{ + return strcmp (s1, *s2); +} + + +/* The parser for the LC_MONETARY section of the locale definition. */ +void +monetary_read (struct linereader *ldfile, struct localedef_t *result, + const struct charmap_t *charmap, const char *repertoire_name, + int ignore_content) +{ + struct repertoire_t *repertoire = NULL; + struct locale_monetary_t *monetary; + struct token *now; + enum token_t nowtok; + + /* Get the repertoire we have to use. */ + if (repertoire_name != NULL) + repertoire = repertoire_read (repertoire_name); + + /* The rest of the line containing `LC_MONETARY' must be free. */ + lr_ignore_rest (ldfile, 1); + + do + { + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + } + while (nowtok == tok_eol); + + /* If we see `copy' now we are almost done. */ + if (nowtok == tok_copy) + { + handle_copy (ldfile, charmap, repertoire_name, result, tok_lc_monetary, + LC_MONETARY, "LC_MONETARY", ignore_content); + return; + } + + /* Prepare the data structures. */ + monetary_startup (ldfile, result, ignore_content); + monetary = result->categories[LC_MONETARY].monetary; + + while (1) + { + /* Of course we don't proceed beyond the end of file. */ + if (nowtok == tok_eof) + break; + + /* Ignore empty lines. */ + if (nowtok == tok_eol) + { + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + continue; + } + + switch (nowtok) + { +#define STR_ELEM(cat) \ + case tok_##cat: \ + /* Ignore the rest of the line if we don't need the input of \ + this line. */ \ + if (ignore_content) \ + { \ + lr_ignore_rest (ldfile, 0); \ + break; \ + } \ + \ + now = lr_token (ldfile, charmap, result, NULL, verbose); \ + if (now->tok != tok_string) \ + goto err_label; \ + else if (monetary->cat != NULL) \ + lr_error (ldfile, _("%s: field `%s' declared more than once"), \ + "LC_MONETARY", #cat); \ + else if (!ignore_content && now->val.str.startmb == NULL) \ + { \ + lr_error (ldfile, _("\ +%s: unknown character in field `%s'"), "LC_MONETARY", #cat); \ + monetary->cat = ""; \ + } \ + else if (!ignore_content) \ + monetary->cat = now->val.str.startmb; \ + lr_ignore_rest (ldfile, 1); \ + break + + STR_ELEM (int_curr_symbol); + STR_ELEM (currency_symbol); + STR_ELEM (positive_sign); + STR_ELEM (negative_sign); + STR_ELEM (duo_int_curr_symbol); + STR_ELEM (duo_currency_symbol); + +#define STR_ELEM_WC(cat) \ + case tok_##cat: \ + /* Ignore the rest of the line if we don't need the input of \ + this line. */ \ + if (ignore_content) \ + { \ + lr_ignore_rest (ldfile, 0); \ + break; \ + } \ + \ + ldfile->return_widestr = 1; \ + now = lr_token (ldfile, charmap, result, repertoire, verbose); \ + if (now->tok != tok_string) \ + goto err_label; \ + if (monetary->cat != NULL) \ + lr_error (ldfile, _("\ +%s: field `%s' declared more than once"), "LC_MONETARY", #cat); \ + else if (!ignore_content && now->val.str.startmb == NULL) \ + { \ + lr_error (ldfile, _("\ +%s: unknown character in field `%s'"), "LC_MONETARY", #cat); \ + monetary->cat = ""; \ + monetary->cat##_wc = L'\0'; \ + } \ + else if (now->val.str.startwc != NULL && now->val.str.lenwc > 2) \ + { \ + lr_error (ldfile, _("\ +%s: value for field `%s' must be a single character"), "LC_MONETARY", #cat); \ + } \ + else if (!ignore_content) \ + { \ + monetary->cat = now->val.str.startmb; \ + \ + if (now->val.str.startwc != NULL) \ + monetary->cat##_wc = *now->val.str.startwc; \ + } \ + ldfile->return_widestr = 0; \ + break + + STR_ELEM_WC (mon_decimal_point); + STR_ELEM_WC (mon_thousands_sep); + +#define INT_ELEM(cat) \ + case tok_##cat: \ + /* Ignore the rest of the line if we don't need the input of \ + this line. */ \ + if (ignore_content) \ + { \ + lr_ignore_rest (ldfile, 0); \ + break; \ + } \ + \ + now = lr_token (ldfile, charmap, result, NULL, verbose); \ + if (now->tok != tok_minus1 && now->tok != tok_number) \ + goto err_label; \ + else if (monetary->cat != -2) \ + lr_error (ldfile, _("%s: field `%s' declared more than once"), \ + "LC_MONETARY", #cat); \ + else if (!ignore_content) \ + monetary->cat = now->tok == tok_minus1 ? -1 : now->val.num; \ + break + + INT_ELEM (int_frac_digits); + INT_ELEM (frac_digits); + INT_ELEM (p_cs_precedes); + INT_ELEM (p_sep_by_space); + INT_ELEM (n_cs_precedes); + INT_ELEM (n_sep_by_space); + INT_ELEM (p_sign_posn); + INT_ELEM (n_sign_posn); + INT_ELEM (int_p_cs_precedes); + INT_ELEM (int_p_sep_by_space); + INT_ELEM (int_n_cs_precedes); + INT_ELEM (int_n_sep_by_space); + INT_ELEM (int_p_sign_posn); + INT_ELEM (int_n_sign_posn); + INT_ELEM (duo_int_frac_digits); + INT_ELEM (duo_frac_digits); + INT_ELEM (duo_p_cs_precedes); + INT_ELEM (duo_p_sep_by_space); + INT_ELEM (duo_n_cs_precedes); + INT_ELEM (duo_n_sep_by_space); + INT_ELEM (duo_p_sign_posn); + INT_ELEM (duo_n_sign_posn); + INT_ELEM (duo_int_p_cs_precedes); + INT_ELEM (duo_int_p_sep_by_space); + INT_ELEM (duo_int_n_cs_precedes); + INT_ELEM (duo_int_n_sep_by_space); + INT_ELEM (duo_int_p_sign_posn); + INT_ELEM (duo_int_n_sign_posn); + INT_ELEM (uno_valid_from); + INT_ELEM (uno_valid_to); + INT_ELEM (duo_valid_from); + INT_ELEM (duo_valid_to); + + case tok_mon_grouping: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + now = lr_token (ldfile, charmap, result, NULL, verbose); + if (now->tok != tok_minus1 && now->tok != tok_number) + goto err_label; + else + { + size_t act = 0; + size_t max = 10; + char *grouping = ignore_content ? NULL : xmalloc (max); + + do + { + if (act + 1 >= max) + { + max *= 2; + grouping = xrealloc (grouping, max); + } + + if (act > 0 && grouping[act - 1] == '\177') + { + lr_error (ldfile, _("\ +%s: `-1' must be last entry in `%s' field"), + "LC_MONETARY", "mon_grouping"); + lr_ignore_rest (ldfile, 0); + break; + } + + if (now->tok == tok_minus1) + { + if (!ignore_content) + grouping[act++] = '\177'; + } + else if (now->val.num == 0) + { + /* A value of 0 disables grouping from here on but + we must not store a NUL character since this + terminates the string. Use something different + which must not be used otherwise. */ + if (!ignore_content) + grouping[act++] = '\377'; + } + else if (now->val.num > 126) + lr_error (ldfile, _("\ +%s: values for field `%s' must be smaller than 127"), + "LC_MONETARY", "mon_grouping"); + else if (!ignore_content) + grouping[act++] = now->val.num; + + /* Next must be semicolon. */ + now = lr_token (ldfile, charmap, result, NULL, verbose); + if (now->tok != tok_semicolon) + break; + + now = lr_token (ldfile, charmap, result, NULL, verbose); + } + while (now->tok == tok_minus1 || now->tok == tok_number); + + if (now->tok != tok_eol) + goto err_label; + + if (!ignore_content) + { + /* A single -1 means no grouping. */ + if (act == 1 && grouping[0] == '\177') + act--; + grouping[act++] = '\0'; + + monetary->mon_grouping = xrealloc (grouping, act); + monetary->mon_grouping_len = act; + } + } + break; + + case tok_conversion_rate: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + now = lr_token (ldfile, charmap, result, NULL, verbose); + if (now->tok != tok_number) + goto err_label; + if (now->val.num == 0) + { + invalid_conversion_rate: + lr_error (ldfile, _("conversion rate value cannot be zero")); + if (!ignore_content) + { + monetary->conversion_rate[0] = 1; + monetary->conversion_rate[1] = 1; + } + break; + } + if (!ignore_content) + monetary->conversion_rate[0] = now->val.num; + /* Next must be a semicolon. */ + now = lr_token (ldfile, charmap, result, NULL, verbose); + if (now->tok != tok_semicolon) + goto err_label; + /* And another number. */ + now = lr_token (ldfile, charmap, result, NULL, verbose); + if (now->tok != tok_number) + goto err_label; + if (now->val.num == 0) + goto invalid_conversion_rate; + if (!ignore_content) + monetary->conversion_rate[1] = now->val.num; + /* The rest of the line must be empty. */ + lr_ignore_rest (ldfile, 1); + break; + + case tok_end: + /* Next we assume `LC_MONETARY'. */ + now = lr_token (ldfile, charmap, result, NULL, verbose); + if (now->tok == tok_eof) + break; + if (now->tok == tok_eol) + lr_error (ldfile, _("%s: incomplete `END' line"), "LC_MONETARY"); + else if (now->tok != tok_lc_monetary) + lr_error (ldfile, _("\ +%1$s: definition does not end with `END %1$s'"), "LC_MONETARY"); + lr_ignore_rest (ldfile, now->tok == tok_lc_monetary); + return; + + default: + err_label: + SYNTAX_ERROR (_("%s: syntax error"), "LC_MONETARY"); + } + + /* Prepare for the next round. */ + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + } + + /* When we come here we reached the end of the file. */ + lr_error (ldfile, _("%s: premature end of file"), "LC_MONETARY"); +} diff --git a/REORG.TODO/locale/programs/ld-name.c b/REORG.TODO/locale/programs/ld-name.c new file mode 100644 index 0000000000..ee50ae7322 --- /dev/null +++ b/REORG.TODO/locale/programs/ld-name.c @@ -0,0 +1,281 @@ +/* Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <langinfo.h> +#include <string.h> +#include <stdint.h> +#include <sys/uio.h> + +#include <assert.h> + +#include "localedef.h" +#include "localeinfo.h" +#include "locfile.h" + + +/* The real definition of the struct for the LC_NAME locale. */ +struct locale_name_t +{ + const char *name_fmt; + const char *name_gen; + const char *name_mr; + const char *name_mrs; + const char *name_miss; + const char *name_ms; +}; + + +static void +name_startup (struct linereader *lr, struct localedef_t *locale, + int ignore_content) +{ + if (!ignore_content) + locale->categories[LC_NAME].name = + (struct locale_name_t *) xcalloc (1, sizeof (struct locale_name_t)); + + if (lr != NULL) + { + lr->translate_strings = 1; + lr->return_widestr = 0; + } +} + + +void +name_finish (struct localedef_t *locale, const struct charmap_t *charmap) +{ + struct locale_name_t *name = locale->categories[LC_NAME].name; + int nothing = 0; + + /* Now resolve copying and also handle completely missing definitions. */ + if (name == NULL) + { + /* First see whether we were supposed to copy. If yes, find the + actual definition. */ + if (locale->copy_name[LC_NAME] != NULL) + { + /* Find the copying locale. This has to happen transitively since + the locale we are copying from might also copying another one. */ + struct localedef_t *from = locale; + + do + from = find_locale (LC_NAME, from->copy_name[LC_NAME], + from->repertoire_name, charmap); + while (from->categories[LC_NAME].name == NULL + && from->copy_name[LC_NAME] != NULL); + + name = locale->categories[LC_NAME].name + = from->categories[LC_NAME].name; + } + + /* If there is still no definition issue an warning and create an + empty one. */ + if (name == NULL) + { + if (! be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +No definition for %s category found"), "LC_NAME")); + name_startup (NULL, locale, 0); + name = locale->categories[LC_NAME].name; + nothing = 1; + } + } + + if (name->name_fmt == NULL) + { + if (! nothing) + WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"), + "LC_NAME", "name_fmt")); + /* Use as the default value the value of the i18n locale. */ + name->name_fmt = "%p%t%g%t%m%t%f"; + } + else + { + /* We must check whether the format string contains only the + allowed escape sequences. */ + const char *cp = name->name_fmt; + + if (*cp == '\0') + WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' must not be empty"), + "LC_NAME", "name_fmt")); + else + while (*cp != '\0') + { + if (*cp == '%') + { + if (*++cp == 'R') + /* Romanize-flag. */ + ++cp; + if (strchr ("dfFgGlomMpsSt", *cp) == NULL) + { + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: invalid escape sequence in field `%s'"), "LC_NAME", "name_fmt")); + break; + } + } + ++cp; + } + } + +#define TEST_ELEM(cat) \ + if (name->cat == NULL) \ + { \ + if (verbose && ! nothing) \ + WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"), \ + "LC_NAME", #cat)); \ + name->cat = ""; \ + } + + TEST_ELEM (name_gen); + TEST_ELEM (name_mr); + TEST_ELEM (name_mrs); + TEST_ELEM (name_miss); + TEST_ELEM (name_ms); +} + + +void +name_output (struct localedef_t *locale, const struct charmap_t *charmap, + const char *output_path) +{ + struct locale_name_t *name = locale->categories[LC_NAME].name; + struct locale_file file; + + init_locale_data (&file, _NL_ITEM_INDEX (_NL_NUM_LC_NAME)); + add_locale_string (&file, name->name_fmt); + add_locale_string (&file, name->name_gen); + add_locale_string (&file, name->name_mr); + add_locale_string (&file, name->name_mrs); + add_locale_string (&file, name->name_miss); + add_locale_string (&file, name->name_ms); + add_locale_string (&file, charmap->code_set_name); + write_locale_data (output_path, LC_NAME, "LC_NAME", &file); +} + + +/* The parser for the LC_NAME section of the locale definition. */ +void +name_read (struct linereader *ldfile, struct localedef_t *result, + const struct charmap_t *charmap, const char *repertoire_name, + int ignore_content) +{ + struct locale_name_t *name; + struct token *now; + struct token *arg; + enum token_t nowtok; + + /* The rest of the line containing `LC_NAME' must be empty. */ + lr_ignore_rest (ldfile, 1); + + do + { + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + } + while (nowtok == tok_eol); + + /* If we see `copy' now we are almost done. */ + if (nowtok == tok_copy) + { + handle_copy (ldfile, charmap, repertoire_name, result, tok_lc_name, + LC_NAME, "LC_NAME", ignore_content); + return; + } + + /* Prepare the data structures. */ + name_startup (ldfile, result, ignore_content); + name = result->categories[LC_NAME].name; + + while (1) + { + /* Of course we don't proceed beyond the end of file. */ + if (nowtok == tok_eof) + break; + + /* Ignore empty lines. */ + if (nowtok == tok_eol) + { + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + continue; + } + + switch (nowtok) + { +#define STR_ELEM(cat) \ + case tok_##cat: \ + /* Ignore the rest of the line if we don't need the input of \ + this line. */ \ + if (ignore_content) \ + { \ + lr_ignore_rest (ldfile, 0); \ + break; \ + } \ + \ + arg = lr_token (ldfile, charmap, result, NULL, verbose); \ + if (arg->tok != tok_string) \ + goto err_label; \ + if (name->cat != NULL) \ + lr_error (ldfile, _("%s: field `%s' declared more than once"), \ + "LC_NAME", #cat); \ + else if (!ignore_content && arg->val.str.startmb == NULL) \ + { \ + lr_error (ldfile, _("%s: unknown character in field `%s'"), \ + "LC_NAME", #cat); \ + name->cat = ""; \ + } \ + else if (!ignore_content) \ + name->cat = arg->val.str.startmb; \ + break + + STR_ELEM (name_fmt); + STR_ELEM (name_gen); + STR_ELEM (name_mr); + STR_ELEM (name_mrs); + STR_ELEM (name_miss); + STR_ELEM (name_ms); + + case tok_end: + /* Next we assume `LC_NAME'. */ + arg = lr_token (ldfile, charmap, result, NULL, verbose); + if (arg->tok == tok_eof) + break; + if (arg->tok == tok_eol) + lr_error (ldfile, _("%s: incomplete `END' line"), "LC_NAME"); + else if (arg->tok != tok_lc_name) + lr_error (ldfile, _("\ +%1$s: definition does not end with `END %1$s'"), "LC_NAME"); + lr_ignore_rest (ldfile, arg->tok == tok_lc_name); + return; + + default: + err_label: + SYNTAX_ERROR (_("%s: syntax error"), "LC_NAME"); + } + + /* Prepare for the next round. */ + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + } + + /* When we come here we reached the end of the file. */ + lr_error (ldfile, _("%s: premature end of file"), "LC_NAME"); +} diff --git a/REORG.TODO/locale/programs/ld-numeric.c b/REORG.TODO/locale/programs/ld-numeric.c new file mode 100644 index 0000000000..a81ff04f93 --- /dev/null +++ b/REORG.TODO/locale/programs/ld-numeric.c @@ -0,0 +1,343 @@ +/* Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@gnu.org>, 1995. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <langinfo.h> +#include <string.h> +#include <stdint.h> +#include <sys/uio.h> + +#include <assert.h> + +#include "localedef.h" +#include "linereader.h" +#include "localeinfo.h" +#include "locfile.h" + + +/* The real definition of the struct for the LC_NUMERIC locale. */ +struct locale_numeric_t +{ + const char *decimal_point; + const char *thousands_sep; + char *grouping; + size_t grouping_len; + uint32_t decimal_point_wc; + uint32_t thousands_sep_wc; +}; + + +static void +numeric_startup (struct linereader *lr, struct localedef_t *locale, + int ignore_content) +{ + if (!ignore_content) + { + locale->categories[LC_NUMERIC].numeric = + (struct locale_numeric_t *) xcalloc (1, + sizeof (struct locale_numeric_t)); + } + + if (lr != NULL) + { + lr->translate_strings = 1; + lr->return_widestr = 0; + } +} + + +void +numeric_finish (struct localedef_t *locale, const struct charmap_t *charmap) +{ + struct locale_numeric_t *numeric = locale->categories[LC_NUMERIC].numeric; + int nothing = 0; + + /* Now resolve copying and also handle completely missing definitions. */ + if (numeric == NULL) + { + /* First see whether we were supposed to copy. If yes, find the + actual definition. */ + if (locale->copy_name[LC_NUMERIC] != NULL) + { + /* Find the copying locale. This has to happen transitively since + the locale we are copying from might also copying another one. */ + struct localedef_t *from = locale; + + do + from = find_locale (LC_NUMERIC, from->copy_name[LC_NUMERIC], + from->repertoire_name, charmap); + while (from->categories[LC_NUMERIC].numeric == NULL + && from->copy_name[LC_NUMERIC] != NULL); + + numeric = locale->categories[LC_NUMERIC].numeric + = from->categories[LC_NUMERIC].numeric; + } + + /* If there is still no definition issue an warning and create an + empty one. */ + if (numeric == NULL) + { + if (! be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +No definition for %s category found"), "LC_NUMERIC")); + numeric_startup (NULL, locale, 0); + numeric = locale->categories[LC_NUMERIC].numeric; + nothing = 1; + } + } + + /* The decimal point must not be empty. This is not said explicitly + in POSIX but ANSI C (ISO/IEC 9899) says in 4.4.2.1 it has to be + != "". */ + if (numeric->decimal_point == NULL) + { + if (! be_quiet && ! nothing) + WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"), + "LC_NUMERIC", "decimal_point")); + numeric->decimal_point = "."; + } + else if (numeric->decimal_point[0] == '\0' && ! be_quiet && ! nothing) + { + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: value for field `%s' must not be an empty string"), + "LC_NUMERIC", "decimal_point")); + } + if (numeric->decimal_point_wc == L'\0') + numeric->decimal_point_wc = L'.'; + + if (numeric->grouping_len == 0 && ! be_quiet && ! nothing) + WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"), + "LC_NUMERIC", "grouping")); +} + + +void +numeric_output (struct localedef_t *locale, const struct charmap_t *charmap, + const char *output_path) +{ + struct locale_numeric_t *numeric = locale->categories[LC_NUMERIC].numeric; + struct locale_file file; + + init_locale_data (&file, _NL_ITEM_INDEX (_NL_NUM_LC_NUMERIC)); + add_locale_string (&file, numeric->decimal_point ?: ""); + add_locale_string (&file, numeric->thousands_sep ?: ""); + add_locale_raw_data (&file, numeric->grouping, numeric->grouping_len); + add_locale_uint32 (&file, numeric->decimal_point_wc); + add_locale_uint32 (&file, numeric->thousands_sep_wc); + add_locale_string (&file, charmap->code_set_name); + write_locale_data (output_path, LC_NUMERIC, "LC_NUMERIC", &file); +} + + +/* The parser for the LC_NUMERIC section of the locale definition. */ +void +numeric_read (struct linereader *ldfile, struct localedef_t *result, + const struct charmap_t *charmap, const char *repertoire_name, + int ignore_content) +{ + struct repertoire_t *repertoire = NULL; + struct locale_numeric_t *numeric; + struct token *now; + enum token_t nowtok; + + /* Get the repertoire we have to use. */ + if (repertoire_name != NULL) + repertoire = repertoire_read (repertoire_name); + + /* The rest of the line containing `LC_NUMERIC' must be free. */ + lr_ignore_rest (ldfile, 1); + + + do + { + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + } + while (nowtok == tok_eol); + + /* If we see `copy' now we are almost done. */ + if (nowtok == tok_copy) + { + handle_copy (ldfile, charmap, repertoire_name, result, tok_lc_numeric, + LC_NUMERIC, "LC_NUMERIC", ignore_content); + return; + } + + /* Prepare the data structures. */ + numeric_startup (ldfile, result, ignore_content); + numeric = result->categories[LC_NUMERIC].numeric; + + while (1) + { + /* Of course we don't proceed beyond the end of file. */ + if (nowtok == tok_eof) + break; + + /* Ingore empty lines. */ + if (nowtok == tok_eol) + { + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + continue; + } + + switch (nowtok) + { +#define STR_ELEM(cat) \ + case tok_##cat: \ + /* Ignore the rest of the line if we don't need the input of \ + this line. */ \ + if (ignore_content) \ + { \ + lr_ignore_rest (ldfile, 0); \ + break; \ + } \ + \ + ldfile->return_widestr = 1; \ + now = lr_token (ldfile, charmap, result, repertoire, verbose); \ + if (now->tok != tok_string) \ + goto err_label; \ + if (numeric->cat != NULL) \ + lr_error (ldfile, _("\ +%s: field `%s' declared more than once"), "LC_NUMERIC", #cat); \ + else if (!ignore_content && now->val.str.startmb == NULL) \ + { \ + lr_error (ldfile, _("\ +%s: unknown character in field `%s'"), "LC_NUMERIC", #cat); \ + numeric->cat = ""; \ + numeric->cat##_wc = L'\0'; \ + } \ + else if (now->val.str.startwc != NULL && now->val.str.lenwc > 2) \ + { \ + lr_error (ldfile, _("\ +%s: value for field `%s' must be a single character"), "LC_NUMERIC", #cat); \ + } \ + else if (!ignore_content) \ + { \ + numeric->cat = now->val.str.startmb; \ + \ + if (now->val.str.startwc != NULL) \ + numeric->cat##_wc = *now->val.str.startwc; \ + } \ + ldfile->return_widestr = 0; \ + break + + STR_ELEM (decimal_point); + STR_ELEM (thousands_sep); + + case tok_grouping: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + now = lr_token (ldfile, charmap, result, NULL, verbose); + if (now->tok != tok_minus1 && now->tok != tok_number) + goto err_label; + else + { + size_t act = 0; + size_t max = 10; + char *grouping = xmalloc (max); + + do + { + if (act + 1 >= max) + { + max *= 2; + grouping = xrealloc (grouping, max); + } + + if (act > 0 && grouping[act - 1] == '\177') + { + lr_error (ldfile, _("\ +%s: `-1' must be last entry in `%s' field"), "LC_NUMERIC", "grouping"); + lr_ignore_rest (ldfile, 0); + break; + } + + if (now->tok == tok_minus1) + grouping[act++] = '\177'; + else if (now->val.num == 0) + { + /* A value of 0 disables grouping from here on but + we must not store a NUL character since this + terminates the string. Use something different + which must not be used otherwise. */ + grouping[act++] = '\377'; + } + else if (now->val.num > 126) + lr_error (ldfile, _("\ +%s: values for field `%s' must be smaller than 127"), + "LC_NUMERIC", "grouping"); + else + grouping[act++] = now->val.num; + + /* Next must be semicolon. */ + now = lr_token (ldfile, charmap, result, NULL, verbose); + if (now->tok != tok_semicolon) + break; + + now = lr_token (ldfile, charmap, result, NULL, verbose); + } + while (now->tok == tok_minus1 || now->tok == tok_number); + + if (now->tok != tok_eol) + goto err_label; + + /* A single -1 means no grouping. */ + if (act == 1 && grouping[0] == '\177') + act--; + grouping[act++] = '\0'; + + numeric->grouping = xrealloc (grouping, act); + numeric->grouping_len = act; + } + break; + + case tok_end: + /* Next we assume `LC_NUMERIC'. */ + now = lr_token (ldfile, charmap, result, NULL, verbose); + if (now->tok == tok_eof) + break; + if (now->tok == tok_eol) + lr_error (ldfile, _("%s: incomplete `END' line"), "LC_NUMERIC"); + else if (now->tok != tok_lc_numeric) + lr_error (ldfile, _("\ +%1$s: definition does not end with `END %1$s'"), "LC_NUMERIC"); + lr_ignore_rest (ldfile, now->tok == tok_lc_numeric); + return; + + default: + err_label: + SYNTAX_ERROR (_("%s: syntax error"), "LC_NUMERIC"); + } + + /* Prepare for the next round. */ + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + } + + /* When we come here we reached the end of the file. */ + lr_error (ldfile, _("%s: premature end of file"), "LC_NUMERIC"); +} diff --git a/REORG.TODO/locale/programs/ld-paper.c b/REORG.TODO/locale/programs/ld-paper.c new file mode 100644 index 0000000000..df7ce12036 --- /dev/null +++ b/REORG.TODO/locale/programs/ld-paper.c @@ -0,0 +1,231 @@ +/* Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <error.h> +#include <langinfo.h> +#include <string.h> +#include <stdint.h> +#include <sys/uio.h> + +#include <assert.h> + +#include "localedef.h" +#include "localeinfo.h" +#include "locfile.h" + + +/* The real definition of the struct for the LC_PAPER locale. */ +struct locale_paper_t +{ + uint32_t height; + uint32_t width; +}; + + +static void +paper_startup (struct linereader *lr, struct localedef_t *locale, + int ignore_content) +{ + if (!ignore_content) + locale->categories[LC_PAPER].paper = + (struct locale_paper_t *) xcalloc (1, sizeof (struct locale_paper_t)); + + if (lr != NULL) + { + lr->translate_strings = 1; + lr->return_widestr = 0; + } +} + + +void +paper_finish (struct localedef_t *locale, const struct charmap_t *charmap) +{ + struct locale_paper_t *paper = locale->categories[LC_PAPER].paper; + int nothing = 0; + + /* Now resolve copying and also handle completely missing definitions. */ + if (paper == NULL) + { + /* First see whether we were supposed to copy. If yes, find the + actual definition. */ + if (locale->copy_name[LC_PAPER] != NULL) + { + /* Find the copying locale. This has to happen transitively since + the locale we are copying from might also copying another one. */ + struct localedef_t *from = locale; + + do + from = find_locale (LC_PAPER, from->copy_name[LC_PAPER], + from->repertoire_name, charmap); + while (from->categories[LC_PAPER].paper == NULL + && from->copy_name[LC_PAPER] != NULL); + + paper = locale->categories[LC_PAPER].paper + = from->categories[LC_PAPER].paper; + } + + /* If there is still no definition issue an warning and create an + empty one. */ + if (paper == NULL) + { + if (! be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +No definition for %s category found"), "LC_PAPER")); + paper_startup (NULL, locale, 0); + paper = locale->categories[LC_PAPER].paper; + nothing = 1; + } + } + + if (paper->height == 0) + { + if (! nothing) + WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"), + "LC_PAPER", "height")); + /* Use as default values the values from the i18n locale. */ + paper->height = 297; + } + + if (paper->width == 0) + { + if (! nothing) + WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"), + "LC_PAPER", "width")); + /* Use as default values the values from the i18n locale. */ + paper->width = 210; + } +} + + +void +paper_output (struct localedef_t *locale, const struct charmap_t *charmap, + const char *output_path) +{ + struct locale_paper_t *paper = locale->categories[LC_PAPER].paper; + struct locale_file file; + + init_locale_data (&file, _NL_ITEM_INDEX (_NL_NUM_LC_PAPER)); + add_locale_uint32 (&file, paper->height); + add_locale_uint32 (&file, paper->width); + add_locale_string (&file, charmap->code_set_name); + write_locale_data (output_path, LC_PAPER, "LC_PAPER", &file); +} + + +/* The parser for the LC_PAPER section of the locale definition. */ +void +paper_read (struct linereader *ldfile, struct localedef_t *result, + const struct charmap_t *charmap, const char *repertoire_name, + int ignore_content) +{ + struct locale_paper_t *paper; + struct token *now; + struct token *arg; + enum token_t nowtok; + + /* The rest of the line containing `LC_PAPER' must be empty. */ + lr_ignore_rest (ldfile, 1); + + do + { + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + } + while (nowtok == tok_eol); + + /* If we see `copy' now we are almost done. */ + if (nowtok == tok_copy) + { + handle_copy (ldfile, charmap, repertoire_name, result, tok_lc_paper, + LC_PAPER, "LC_PAPER", ignore_content); + return; + } + + /* Prepare the data structures. */ + paper_startup (ldfile, result, ignore_content); + paper = result->categories[LC_PAPER].paper; + + while (1) + { + /* Of course we don't proceed beyond the end of file. */ + if (nowtok == tok_eof) + break; + + /* Ingore empty lines. */ + if (nowtok == tok_eol) + { + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + continue; + } + + switch (nowtok) + { +#define INT_ELEM(cat) \ + case tok_##cat: \ + /* Ignore the rest of the line if we don't need the input of \ + this line. */ \ + if (ignore_content) \ + { \ + lr_ignore_rest (ldfile, 0); \ + break; \ + } \ + \ + arg = lr_token (ldfile, charmap, result, NULL, verbose); \ + if (arg->tok != tok_number) \ + goto err_label; \ + else if (paper->cat != 0) \ + lr_error (ldfile, _("%s: field `%s' declared more than once"), \ + "LC_PAPER", #cat); \ + else if (!ignore_content) \ + paper->cat = arg->val.num; \ + break + + INT_ELEM (height); + INT_ELEM (width); + + case tok_end: + /* Next we assume `LC_PAPER'. */ + arg = lr_token (ldfile, charmap, result, NULL, verbose); + if (arg->tok == tok_eof) + break; + if (arg->tok == tok_eol) + lr_error (ldfile, _("%s: incomplete `END' line"), "LC_PAPER"); + else if (arg->tok != tok_lc_paper) + lr_error (ldfile, _("\ +%1$s: definition does not end with `END %1$s'"), "LC_PAPER"); + lr_ignore_rest (ldfile, arg->tok == tok_lc_paper); + return; + + default: + err_label: + SYNTAX_ERROR (_("%s: syntax error"), "LC_PAPER"); + } + + /* Prepare for the next round. */ + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + } + + /* When we come here we reached the end of the file. */ + lr_error (ldfile, _("%s: premature end of file"), "LC_PAPER"); +} diff --git a/REORG.TODO/locale/programs/ld-telephone.c b/REORG.TODO/locale/programs/ld-telephone.c new file mode 100644 index 0000000000..b62280aeec --- /dev/null +++ b/REORG.TODO/locale/programs/ld-telephone.c @@ -0,0 +1,295 @@ +/* Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <error.h> +#include <langinfo.h> +#include <string.h> +#include <stdint.h> +#include <sys/uio.h> + +#include <assert.h> + +#include "localedef.h" +#include "localeinfo.h" +#include "locfile.h" + + +/* The real definition of the struct for the LC_TELEPHONE locale. */ +struct locale_telephone_t +{ + const char *tel_int_fmt; + const char *tel_dom_fmt; + const char *int_select; + const char *int_prefix; +}; + + +static void +telephone_startup (struct linereader *lr, struct localedef_t *locale, + int ignore_content) +{ + if (!ignore_content) + locale->categories[LC_TELEPHONE].telephone = (struct locale_telephone_t *) + xcalloc (1, sizeof (struct locale_telephone_t)); + + if (lr != NULL) + { + lr->translate_strings = 1; + lr->return_widestr = 0; + } +} + + +void +telephone_finish (struct localedef_t *locale, const struct charmap_t *charmap) +{ + struct locale_telephone_t *telephone = + locale->categories[LC_TELEPHONE].telephone; + int nothing = 0; + + /* Now resolve copying and also handle completely missing definitions. */ + if (telephone == NULL) + { + /* First see whether we were supposed to copy. If yes, find the + actual definition. */ + if (locale->copy_name[LC_TELEPHONE] != NULL) + { + /* Find the copying locale. This has to happen transitively since + the locale we are copying from might also copying another one. */ + struct localedef_t *from = locale; + + do + from = find_locale (LC_TELEPHONE, from->copy_name[LC_TELEPHONE], + from->repertoire_name, charmap); + while (from->categories[LC_TELEPHONE].telephone == NULL + && from->copy_name[LC_TELEPHONE] != NULL); + + telephone = locale->categories[LC_TELEPHONE].telephone + = from->categories[LC_TELEPHONE].telephone; + } + + /* If there is still no definition issue an warning and create an + empty one. */ + if (telephone == NULL) + { + if (! be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +No definition for %s category found"), "LC_TELEPHONE")); + telephone_startup (NULL, locale, 0); + telephone = locale->categories[LC_TELEPHONE].telephone; + nothing = 1; + } + } + + if (telephone->tel_int_fmt == NULL) + { + if (! nothing) + WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"), + "LC_TELEPHONE", "tel_int_fmt")); + /* Use as the default value the value of the i18n locale. */ + telephone->tel_int_fmt = "+%c %a%t%l"; + } + else + { + /* We must check whether the format string contains only the + allowed escape sequences. */ + const char *cp = telephone->tel_int_fmt; + + if (*cp == '\0') + WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' must not be empty"), + "LC_TELEPHONE", "tel_int_fmt")); + else + while (*cp != '\0') + { + if (*cp == '%') + { + if (strchr ("aAcCelt", *++cp) == NULL) + { + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: invalid escape sequence in field `%s'"), "LC_TELEPHONE", "tel_int_fmt")); + break; + } + } + ++cp; + } + } + + if (telephone->tel_dom_fmt == NULL) + telephone->tel_dom_fmt = ""; + else if (telephone->tel_dom_fmt[0] != '\0') + { + /* We must check whether the format string contains only the + allowed escape sequences. */ + const char *cp = telephone->tel_dom_fmt; + + while (*cp != '\0') + { + if (*cp == '%') + { + if (strchr ("aAcCelt", *++cp) == NULL) + { + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: invalid escape sequence in field `%s'"), "LC_TELEPHONE", "tel_dom_fmt")); + break; + } + } + ++cp; + } + } + +#define TEST_ELEM(cat) \ + if (telephone->cat == NULL) \ + { \ + if (verbose && ! nothing) \ + WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"), \ + "LC_TELEPHONE", #cat)); \ + telephone->cat = ""; \ + } + + TEST_ELEM (int_select); + TEST_ELEM (int_prefix); +} + + +void +telephone_output (struct localedef_t *locale, const struct charmap_t *charmap, + const char *output_path) +{ + struct locale_telephone_t *telephone = + locale->categories[LC_TELEPHONE].telephone; + struct locale_file file; + + init_locale_data (&file, _NL_ITEM_INDEX (_NL_NUM_LC_TELEPHONE)); + add_locale_string (&file, telephone->tel_int_fmt); + add_locale_string (&file, telephone->tel_dom_fmt); + add_locale_string (&file, telephone->int_select); + add_locale_string (&file, telephone->int_prefix); + add_locale_string (&file, charmap->code_set_name); + write_locale_data (output_path, LC_TELEPHONE, "LC_TELEPHONE", &file); +} + + +/* The parser for the LC_TELEPHONE section of the locale definition. */ +void +telephone_read (struct linereader *ldfile, struct localedef_t *result, + const struct charmap_t *charmap, const char *repertoire_name, + int ignore_content) +{ + struct locale_telephone_t *telephone; + struct token *now; + struct token *arg; + enum token_t nowtok; + + /* The rest of the line containing `LC_TELEPHONE' must be free. */ + lr_ignore_rest (ldfile, 1); + + do + { + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + } + while (nowtok == tok_eol); + + /* If we see `copy' now we are almost done. */ + if (nowtok == tok_copy) + { + handle_copy (ldfile, charmap, repertoire_name, result, tok_lc_telephone, + LC_TELEPHONE, "LC_TELEPHONE", ignore_content); + return; + } + + /* Prepare the data structures. */ + telephone_startup (ldfile, result, ignore_content); + telephone = result->categories[LC_TELEPHONE].telephone; + + while (1) + { + /* Of course we don't proceed beyond the end of file. */ + if (nowtok == tok_eof) + break; + + /* Ingore empty lines. */ + if (nowtok == tok_eol) + { + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + continue; + } + + switch (nowtok) + { +#define STR_ELEM(cat) \ + case tok_##cat: \ + /* Ignore the rest of the line if we don't need the input of \ + this line. */ \ + if (ignore_content) \ + { \ + lr_ignore_rest (ldfile, 0); \ + break; \ + } \ + \ + arg = lr_token (ldfile, charmap, result, NULL, verbose); \ + if (arg->tok != tok_string) \ + goto err_label; \ + if (telephone->cat != NULL) \ + lr_error (ldfile, _("%s: field `%s' declared more than once"), \ + "LC_TELEPHONE", #cat); \ + else if (!ignore_content && arg->val.str.startmb == NULL) \ + { \ + lr_error (ldfile, _("%s: unknown character in field `%s'"), \ + "LC_TELEPHONE", #cat); \ + telephone->cat = ""; \ + } \ + else if (!ignore_content) \ + telephone->cat = arg->val.str.startmb; \ + break + + STR_ELEM (tel_int_fmt); + STR_ELEM (tel_dom_fmt); + STR_ELEM (int_select); + STR_ELEM (int_prefix); + + case tok_end: + /* Next we assume `LC_TELEPHONE'. */ + arg = lr_token (ldfile, charmap, result, NULL, verbose); + if (arg->tok == tok_eof) + break; + if (arg->tok == tok_eol) + lr_error (ldfile, _("%s: incomplete `END' line"), "LC_TELEPHONE"); + else if (arg->tok != tok_lc_telephone) + lr_error (ldfile, _("\ +%1$s: definition does not end with `END %1$s'"), "LC_TELEPHONE"); + lr_ignore_rest (ldfile, arg->tok == tok_lc_telephone); + return; + + default: + err_label: + SYNTAX_ERROR (_("%s: syntax error"), "LC_TELEPHONE"); + } + + /* Prepare for the next round. */ + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + } + + /* When we come here we reached the end of the file. */ + lr_error (ldfile, _("%s: premature end of file"), "LC_TELEPHONE"); +} diff --git a/REORG.TODO/locale/programs/ld-time.c b/REORG.TODO/locale/programs/ld-time.c new file mode 100644 index 0000000000..32e9c41e35 --- /dev/null +++ b/REORG.TODO/locale/programs/ld-time.c @@ -0,0 +1,964 @@ +/* Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@gnu.org>, 1995. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <byteswap.h> +#include <langinfo.h> +#include <stdlib.h> +#include <string.h> +#include <wchar.h> +#include <stdint.h> +#include <sys/uio.h> + +#include <assert.h> + +#include "localedef.h" +#include "linereader.h" +#include "localeinfo.h" +#include "locfile.h" + + +/* Entry describing an entry of the era specification. */ +struct era_data +{ + int32_t direction; + int32_t offset; + int32_t start_date[3]; + int32_t stop_date[3]; + const char *name; + const char *format; + uint32_t *wname; + uint32_t *wformat; +}; + + +/* The real definition of the struct for the LC_TIME locale. */ +struct locale_time_t +{ + const char *abday[7]; + const uint32_t *wabday[7]; + int abday_defined; + const char *day[7]; + const uint32_t *wday[7]; + int day_defined; + const char *abmon[12]; + const uint32_t *wabmon[12]; + int abmon_defined; + const char *mon[12]; + const uint32_t *wmon[12]; + int mon_defined; + const char *am_pm[2]; + const uint32_t *wam_pm[2]; + int am_pm_defined; + const char *d_t_fmt; + const uint32_t *wd_t_fmt; + const char *d_fmt; + const uint32_t *wd_fmt; + const char *t_fmt; + const uint32_t *wt_fmt; + const char *t_fmt_ampm; + const uint32_t *wt_fmt_ampm; + const char **era; + const uint32_t **wera; + uint32_t num_era; + const char *era_year; + const uint32_t *wera_year; + const char *era_d_t_fmt; + const uint32_t *wera_d_t_fmt; + const char *era_t_fmt; + const uint32_t *wera_t_fmt; + const char *era_d_fmt; + const uint32_t *wera_d_fmt; + const char *alt_digits[100]; + const uint32_t *walt_digits[100]; + const char *date_fmt; + const uint32_t *wdate_fmt; + int alt_digits_defined; + unsigned char week_ndays; + uint32_t week_1stday; + unsigned char week_1stweek; + unsigned char first_weekday; + unsigned char first_workday; + unsigned char cal_direction; + const char *timezone; + const uint32_t *wtimezone; + + struct era_data *era_entries; +}; + + +/* This constant is used to represent an empty wide character string. */ +static const uint32_t empty_wstr[1] = { 0 }; + + +static void +time_startup (struct linereader *lr, struct localedef_t *locale, + int ignore_content) +{ + if (!ignore_content) + locale->categories[LC_TIME].time = + (struct locale_time_t *) xcalloc (1, sizeof (struct locale_time_t)); + + if (lr != NULL) + { + lr->translate_strings = 1; + lr->return_widestr = 1; + } +} + + +void +time_finish (struct localedef_t *locale, const struct charmap_t *charmap) +{ + struct locale_time_t *time = locale->categories[LC_TIME].time; + int nothing = 0; + + /* Now resolve copying and also handle completely missing definitions. */ + if (time == NULL) + { + /* First see whether we were supposed to copy. If yes, find the + actual definition. */ + if (locale->copy_name[LC_TIME] != NULL) + { + /* Find the copying locale. This has to happen transitively since + the locale we are copying from might also copying another one. */ + struct localedef_t *from = locale; + + do + from = find_locale (LC_TIME, from->copy_name[LC_TIME], + from->repertoire_name, charmap); + while (from->categories[LC_TIME].time == NULL + && from->copy_name[LC_TIME] != NULL); + + time = locale->categories[LC_TIME].time + = from->categories[LC_TIME].time; + } + + /* If there is still no definition issue an warning and create an + empty one. */ + if (time == NULL) + { + if (! be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +No definition for %s category found"), "LC_TIME")); + time_startup (NULL, locale, 0); + time = locale->categories[LC_TIME].time; + nothing = 1; + } + } + +#define noparen(arg1, argn...) arg1, ##argn +#define TESTARR_ELEM(cat, val) \ + if (!time->cat##_defined) \ + { \ + const char *initval[] = { noparen val }; \ + unsigned int i; \ + \ + if (! be_quiet && ! nothing) \ + WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"), \ + "LC_TIME", #cat)); \ + \ + for (i = 0; i < sizeof (initval) / sizeof (initval[0]); ++i) \ + time->cat[i] = initval[i]; \ + } + + TESTARR_ELEM (abday, ( "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" )); + TESTARR_ELEM (day, ( "Sunday", "Monday", "Tuesday", "Wednesday", + "Thursday", "Friday", "Saturday" )); + TESTARR_ELEM (abmon, ( "Jan", "Feb", "Mar", "Apr", "May", "Jun", + "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" )); + TESTARR_ELEM (mon, ( "January", "February", "March", "April", + "May", "June", "July", "August", + "September", "October", "November", "December" )); + TESTARR_ELEM (am_pm, ( "AM", "PM" )); + +#define TEST_ELEM(cat, initval) \ + if (time->cat == NULL) \ + { \ + if (! be_quiet && ! nothing) \ + WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"), \ + "LC_TIME", #cat)); \ + \ + time->cat = initval; \ + } + + TEST_ELEM (d_t_fmt, "%a %b %e %H:%M:%S %Y"); + TEST_ELEM (d_fmt, "%m/%d/%y"); + TEST_ELEM (t_fmt, "%H:%M:%S"); + + /* According to C.Y.Alexis Cheng <alexis@vnet.ibm.com> the T_FMT_AMPM + field is optional. */ + if (time->t_fmt_ampm == NULL) + { + if (time->am_pm[0][0] == '\0' && time->am_pm[1][0] == '\0') + { + /* No AM/PM strings defined, use the 24h format as default. */ + time->t_fmt_ampm = time->t_fmt; + time->wt_fmt_ampm = time->wt_fmt; + } + else + { + time->t_fmt_ampm = "%I:%M:%S %p"; + time->wt_fmt_ampm = (const uint32_t *) L"%I:%M:%S %p"; + } + } + + /* Now process the era entries. */ + if (time->num_era != 0) + { + const int days_per_month[12] = { 31, 29, 31, 30, 31, 30, + 31, 31, 30, 31 ,30, 31 }; + size_t idx; + wchar_t *wstr; + + time->era_entries = + (struct era_data *) xmalloc (time->num_era + * sizeof (struct era_data)); + + for (idx = 0; idx < time->num_era; ++idx) + { + size_t era_len = strlen (time->era[idx]); + char *str = xmalloc ((era_len + 1 + 3) & ~3); + char *endp; + + memcpy (str, time->era[idx], era_len + 1); + + /* First character must be + or - for the direction. */ + if (*str != '+' && *str != '-') + { + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: direction flag in string %Zd in `era' field is not '+' nor '-'"), + "LC_TIME", idx + 1)); + /* Default arbitrarily to '+'. */ + time->era_entries[idx].direction = '+'; + } + else + time->era_entries[idx].direction = *str; + if (*++str != ':') + { + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: direction flag in string %Zd in `era' field is not a single character"), + "LC_TIME", idx + 1)); + (void) strsep (&str, ":"); + } + else + ++str; + + /* Now the offset year. */ + time->era_entries[idx].offset = strtol (str, &endp, 10); + if (endp == str) + { + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: invalid number for offset in string %Zd in `era' field"), + "LC_TIME", idx + 1)); + (void) strsep (&str, ":"); + } + else if (*endp != ':') + { + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: garbage at end of offset value in string %Zd in `era' field"), + "LC_TIME", idx + 1)); + (void) strsep (&str, ":"); + } + else + str = endp + 1; + + /* Next is the starting date in ISO format. */ + if (strncmp (str, "-*", 2) == 0) + { + time->era_entries[idx].start_date[0] = + time->era_entries[idx].start_date[1] = + time->era_entries[idx].start_date[2] = 0x80000000; + if (str[2] != ':') + goto garbage_start_date; + str += 3; + } + else if (strncmp (str, "+*", 2) == 0) + { + time->era_entries[idx].start_date[0] = + time->era_entries[idx].start_date[1] = + time->era_entries[idx].start_date[2] = 0x7fffffff; + if (str[2] != ':') + goto garbage_start_date; + str += 3; + } + else + { + time->era_entries[idx].start_date[0] = strtol (str, &endp, 10); + if (endp == str || *endp != '/') + goto invalid_start_date; + else + str = endp + 1; + time->era_entries[idx].start_date[0] -= 1900; + /* year -1 represent 1 B.C. (not -1 A.D.) */ + if (time->era_entries[idx].start_date[0] < -1900) + ++time->era_entries[idx].start_date[0]; + + time->era_entries[idx].start_date[1] = strtol (str, &endp, 10); + if (endp == str || *endp != '/') + goto invalid_start_date; + else + str = endp + 1; + time->era_entries[idx].start_date[1] -= 1; + + time->era_entries[idx].start_date[2] = strtol (str, &endp, 10); + if (endp == str) + { + invalid_start_date: + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: invalid starting date in string %Zd in `era' field"), + "LC_TIME", idx + 1)); + (void) strsep (&str, ":"); + } + else if (*endp != ':') + { + garbage_start_date: + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: garbage at end of starting date in string %Zd in `era' field "), + "LC_TIME", idx + 1)); + (void) strsep (&str, ":"); + } + else + { + str = endp + 1; + + /* Check for valid value. */ + if ((time->era_entries[idx].start_date[1] < 0 + || time->era_entries[idx].start_date[1] >= 12 + || time->era_entries[idx].start_date[2] < 0 + || (time->era_entries[idx].start_date[2] + > days_per_month[time->era_entries[idx].start_date[1]]) + || (time->era_entries[idx].start_date[1] == 2 + && time->era_entries[idx].start_date[2] == 29 + && !__isleap (time->era_entries[idx].start_date[0]))) + && !be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: starting date is invalid in string %Zd in `era' field"), + "LC_TIME", idx + 1)); + } + } + + /* Next is the stopping date in ISO format. */ + if (strncmp (str, "-*", 2) == 0) + { + time->era_entries[idx].stop_date[0] = + time->era_entries[idx].stop_date[1] = + time->era_entries[idx].stop_date[2] = 0x80000000; + if (str[2] != ':') + goto garbage_stop_date; + str += 3; + } + else if (strncmp (str, "+*", 2) == 0) + { + time->era_entries[idx].stop_date[0] = + time->era_entries[idx].stop_date[1] = + time->era_entries[idx].stop_date[2] = 0x7fffffff; + if (str[2] != ':') + goto garbage_stop_date; + str += 3; + } + else + { + time->era_entries[idx].stop_date[0] = strtol (str, &endp, 10); + if (endp == str || *endp != '/') + goto invalid_stop_date; + else + str = endp + 1; + time->era_entries[idx].stop_date[0] -= 1900; + /* year -1 represent 1 B.C. (not -1 A.D.) */ + if (time->era_entries[idx].stop_date[0] < -1900) + ++time->era_entries[idx].stop_date[0]; + + time->era_entries[idx].stop_date[1] = strtol (str, &endp, 10); + if (endp == str || *endp != '/') + goto invalid_stop_date; + else + str = endp + 1; + time->era_entries[idx].stop_date[1] -= 1; + + time->era_entries[idx].stop_date[2] = strtol (str, &endp, 10); + if (endp == str) + { + invalid_stop_date: + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: invalid stopping date in string %Zd in `era' field"), + "LC_TIME", idx + 1)); + (void) strsep (&str, ":"); + } + else if (*endp != ':') + { + garbage_stop_date: + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: garbage at end of stopping date in string %Zd in `era' field"), + "LC_TIME", idx + 1)); + (void) strsep (&str, ":"); + } + else + { + str = endp + 1; + + /* Check for valid value. */ + if ((time->era_entries[idx].stop_date[1] < 0 + || time->era_entries[idx].stop_date[1] >= 12 + || time->era_entries[idx].stop_date[2] < 0 + || (time->era_entries[idx].stop_date[2] + > days_per_month[time->era_entries[idx].stop_date[1]]) + || (time->era_entries[idx].stop_date[1] == 2 + && time->era_entries[idx].stop_date[2] == 29 + && !__isleap (time->era_entries[idx].stop_date[0]))) + && !be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: invalid stopping date in string %Zd in `era' field"), + "LC_TIME", idx + 1)); + } + } + + if (str == NULL || *str == '\0') + { + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: missing era name in string %Zd in `era' field"), "LC_TIME", idx + 1)); + time->era_entries[idx].name = + time->era_entries[idx].format = ""; + } + else + { + time->era_entries[idx].name = strsep (&str, ":"); + + if (str == NULL || *str == '\0') + { + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: missing era format in string %Zd in `era' field"), + "LC_TIME", idx + 1)); + time->era_entries[idx].name = + time->era_entries[idx].format = ""; + } + else + time->era_entries[idx].format = str; + } + + /* Now generate the wide character name and format. */ + wstr = wcschr ((wchar_t *) time->wera[idx], L':');/* end direction */ + wstr = wstr ? wcschr (wstr + 1, L':') : NULL; /* end offset */ + wstr = wstr ? wcschr (wstr + 1, L':') : NULL; /* end start */ + wstr = wstr ? wcschr (wstr + 1, L':') : NULL; /* end end */ + if (wstr != NULL) + { + time->era_entries[idx].wname = (uint32_t *) wstr + 1; + wstr = wcschr (wstr + 1, L':'); /* end name */ + if (wstr != NULL) + { + *wstr = L'\0'; + time->era_entries[idx].wformat = (uint32_t *) wstr + 1; + } + else + time->era_entries[idx].wname = + time->era_entries[idx].wformat = (uint32_t *) L""; + } + else + time->era_entries[idx].wname = + time->era_entries[idx].wformat = (uint32_t *) L""; + } + } + + /* Set up defaults based on ISO 30112 WD10 [2014]. */ + if (time->week_ndays == 0) + time->week_ndays = 7; + + if (time->week_1stday == 0) + time->week_1stday = 19971130; + + if (time->week_1stweek == 0) + time->week_1stweek = 7; + + if (time->week_1stweek > time->week_ndays) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: third operand for value of field `%s' must not be larger than %d"), + "LC_TIME", "week", 7)); + + if (time->first_weekday == '\0') + /* The definition does not specify this so the default is used. */ + time->first_weekday = 1; + else if (time->first_weekday > time->week_ndays) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: values for field `%s' must not be larger than %d"), + "LC_TIME", "first_weekday", 7)); + + if (time->first_workday == '\0') + /* The definition does not specify this so the default is used. */ + time->first_workday = 2; + else if (time->first_workday > time->week_ndays) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: values for field `%s' must not be larger than %d"), + "LC_TIME", "first_workday", 7)); + + if (time->cal_direction == '\0') + /* The definition does not specify this so the default is used. */ + time->cal_direction = 1; + else if (time->cal_direction > 3) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: values for field `%s' must not be larger than %d"), + "LC_TIME", "cal_direction", 3)); + + /* XXX We don't perform any tests on the timezone value since this is + simply useless, stupid $&$!@... */ + if (time->timezone == NULL) + time->timezone = ""; + + if (time->date_fmt == NULL) + time->date_fmt = "%a %b %e %H:%M:%S %Z %Y"; + if (time->wdate_fmt == NULL) + time->wdate_fmt = (const uint32_t *) L"%a %b %e %H:%M:%S %Z %Y"; +} + + +void +time_output (struct localedef_t *locale, const struct charmap_t *charmap, + const char *output_path) +{ + struct locale_time_t *time = locale->categories[LC_TIME].time; + struct locale_file file; + size_t num, n; + + init_locale_data (&file, _NL_ITEM_INDEX (_NL_NUM_LC_TIME)); + + /* The ab'days. */ + for (n = 0; n < 7; ++n) + add_locale_string (&file, time->abday[n] ?: ""); + + /* The days. */ + for (n = 0; n < 7; ++n) + add_locale_string (&file, time->day[n] ?: ""); + + /* The ab'mons. */ + for (n = 0; n < 12; ++n) + add_locale_string (&file, time->abmon[n] ?: ""); + + /* The mons. */ + for (n = 0; n < 12; ++n) + add_locale_string (&file, time->mon[n] ?: ""); + + /* AM/PM. */ + for (n = 0; n < 2; ++n) + add_locale_string (&file, time->am_pm[n]); + + add_locale_string (&file, time->d_t_fmt ?: ""); + add_locale_string (&file, time->d_fmt ?: ""); + add_locale_string (&file, time->t_fmt ?: ""); + add_locale_string (&file, time->t_fmt_ampm ?: ""); + + start_locale_structure (&file); + for (num = 0; num < time->num_era; ++num) + add_locale_string (&file, time->era[num]); + end_locale_structure (&file); + + add_locale_string (&file, time->era_year ?: ""); + add_locale_string (&file, time->era_d_fmt ?: ""); + + start_locale_structure (&file); + for (num = 0; num < 100; ++num) + add_locale_string (&file, time->alt_digits[num] ?: ""); + end_locale_structure (&file); + + add_locale_string (&file, time->era_d_t_fmt ?: ""); + add_locale_string (&file, time->era_t_fmt ?: ""); + add_locale_uint32 (&file, time->num_era); + + start_locale_structure (&file); + for (num = 0; num < time->num_era; ++num) + { + add_locale_uint32 (&file, time->era_entries[num].direction); + add_locale_uint32 (&file, time->era_entries[num].offset); + add_locale_uint32 (&file, time->era_entries[num].start_date[0]); + add_locale_uint32 (&file, time->era_entries[num].start_date[1]); + add_locale_uint32 (&file, time->era_entries[num].start_date[2]); + add_locale_uint32 (&file, time->era_entries[num].stop_date[0]); + add_locale_uint32 (&file, time->era_entries[num].stop_date[1]); + add_locale_uint32 (&file, time->era_entries[num].stop_date[2]); + add_locale_string (&file, time->era_entries[num].name); + add_locale_string (&file, time->era_entries[num].format); + add_locale_wstring (&file, time->era_entries[num].wname); + add_locale_wstring (&file, time->era_entries[num].wformat); + } + end_locale_structure (&file); + + /* The wide character ab'days. */ + for (n = 0; n < 7; ++n) + add_locale_wstring (&file, time->wabday[n] ?: empty_wstr); + + /* The wide character days. */ + for (n = 0; n < 7; ++n) + add_locale_wstring (&file, time->wday[n] ?: empty_wstr); + + /* The wide character ab'mons. */ + for (n = 0; n < 12; ++n) + add_locale_wstring (&file, time->wabmon[n] ?: empty_wstr); + + /* The wide character mons. */ + for (n = 0; n < 12; ++n) + add_locale_wstring (&file, time->wmon[n] ?: empty_wstr); + + /* Wide character AM/PM. */ + for (n = 0; n < 2; ++n) + add_locale_wstring (&file, time->wam_pm[n] ?: empty_wstr); + + add_locale_wstring (&file, time->wd_t_fmt ?: empty_wstr); + add_locale_wstring (&file, time->wd_fmt ?: empty_wstr); + add_locale_wstring (&file, time->wt_fmt ?: empty_wstr); + add_locale_wstring (&file, time->wt_fmt_ampm ?: empty_wstr); + add_locale_wstring (&file, time->wera_year ?: empty_wstr); + add_locale_wstring (&file, time->wera_d_fmt ?: empty_wstr); + + start_locale_structure (&file); + for (num = 0; num < 100; ++num) + add_locale_wstring (&file, time->walt_digits[num] ?: empty_wstr); + end_locale_structure (&file); + + add_locale_wstring (&file, time->wera_d_t_fmt ?: empty_wstr); + add_locale_wstring (&file, time->wera_t_fmt ?: empty_wstr); + add_locale_char (&file, time->week_ndays); + add_locale_uint32 (&file, time->week_1stday); + add_locale_char (&file, time->week_1stweek); + add_locale_char (&file, time->first_weekday); + add_locale_char (&file, time->first_workday); + add_locale_char (&file, time->cal_direction); + add_locale_string (&file, time->timezone); + add_locale_string (&file, time->date_fmt); + add_locale_wstring (&file, time->wdate_fmt); + add_locale_string (&file, charmap->code_set_name); + write_locale_data (output_path, LC_TIME, "LC_TIME", &file); +} + + +/* The parser for the LC_TIME section of the locale definition. */ +void +time_read (struct linereader *ldfile, struct localedef_t *result, + const struct charmap_t *charmap, const char *repertoire_name, + int ignore_content) +{ + struct repertoire_t *repertoire = NULL; + struct locale_time_t *time; + struct token *now; + enum token_t nowtok; + size_t cnt; + + /* Get the repertoire we have to use. */ + if (repertoire_name != NULL) + repertoire = repertoire_read (repertoire_name); + + /* The rest of the line containing `LC_TIME' must be free. */ + lr_ignore_rest (ldfile, 1); + + + do + { + now = lr_token (ldfile, charmap, result, repertoire, verbose); + nowtok = now->tok; + } + while (nowtok == tok_eol); + + /* If we see `copy' now we are almost done. */ + if (nowtok == tok_copy) + { + handle_copy (ldfile, charmap, repertoire_name, result, tok_lc_time, + LC_TIME, "LC_TIME", ignore_content); + return; + } + + /* Prepare the data structures. */ + time_startup (ldfile, result, ignore_content); + time = result->categories[LC_TIME].time; + + while (1) + { + /* Of course we don't proceed beyond the end of file. */ + if (nowtok == tok_eof) + break; + + /* Ingore empty lines. */ + if (nowtok == tok_eol) + { + now = lr_token (ldfile, charmap, result, repertoire, verbose); + nowtok = now->tok; + continue; + } + + switch (nowtok) + { +#define STRARR_ELEM(cat, min, max) \ + case tok_##cat: \ + /* Ignore the rest of the line if we don't need the input of \ + this line. */ \ + if (ignore_content) \ + { \ + lr_ignore_rest (ldfile, 0); \ + break; \ + } \ + \ + for (cnt = 0; cnt < max; ++cnt) \ + { \ + now = lr_token (ldfile, charmap, result, repertoire, verbose); \ + if (now->tok == tok_eol) \ + { \ + if (cnt < min) \ + lr_error (ldfile, _("%s: too few values for field `%s'"), \ + "LC_TIME", #cat); \ + if (!ignore_content) \ + do \ + { \ + time->cat[cnt] = ""; \ + time->w##cat[cnt] = empty_wstr; \ + } \ + while (++cnt < max); \ + break; \ + } \ + else if (now->tok != tok_string) \ + goto err_label; \ + else if (!ignore_content && (now->val.str.startmb == NULL \ + || now->val.str.startwc == NULL)) \ + { \ + lr_error (ldfile, _("%s: unknown character in field `%s'"), \ + "LC_TIME", #cat); \ + time->cat[cnt] = ""; \ + time->w##cat[cnt] = empty_wstr; \ + } \ + else if (!ignore_content) \ + { \ + time->cat[cnt] = now->val.str.startmb; \ + time->w##cat[cnt] = now->val.str.startwc; \ + } \ + \ + /* Match the semicolon. */ \ + now = lr_token (ldfile, charmap, result, repertoire, verbose); \ + if (now->tok != tok_semicolon && now->tok != tok_eol) \ + break; \ + } \ + if (now->tok != tok_eol) \ + { \ + while (!ignore_content && cnt < min) \ + { \ + time->cat[cnt] = ""; \ + time->w##cat[cnt++] = empty_wstr; \ + } \ + \ + if (now->tok == tok_semicolon) \ + { \ + now = lr_token (ldfile, charmap, result, repertoire, \ + verbose); \ + if (now->tok == tok_eol) \ + lr_error (ldfile, _("extra trailing semicolon")); \ + else if (now->tok == tok_string) \ + { \ + lr_error (ldfile, _("\ +%s: too many values for field `%s'"), \ + "LC_TIME", #cat); \ + lr_ignore_rest (ldfile, 0); \ + } \ + else \ + goto err_label; \ + } \ + else \ + goto err_label; \ + } \ + time->cat##_defined = 1; \ + break + + STRARR_ELEM (abday, 7, 7); + STRARR_ELEM (day, 7, 7); + STRARR_ELEM (abmon, 12, 12); + STRARR_ELEM (mon, 12, 12); + STRARR_ELEM (am_pm, 2, 2); + STRARR_ELEM (alt_digits, 0, 100); + + case tok_era: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + do + { + now = lr_token (ldfile, charmap, result, repertoire, verbose); + if (now->tok != tok_string) + goto err_label; + if (!ignore_content && (now->val.str.startmb == NULL + || now->val.str.startwc == NULL)) + { + lr_error (ldfile, _("%s: unknown character in field `%s'"), + "LC_TIME", "era"); + lr_ignore_rest (ldfile, 0); + break; + } + if (!ignore_content) + { + time->era = xrealloc (time->era, + (time->num_era + 1) * sizeof (char *)); + time->era[time->num_era] = now->val.str.startmb; + + time->wera = xrealloc (time->wera, + (time->num_era + 1) + * sizeof (char *)); + time->wera[time->num_era++] = now->val.str.startwc; + } + now = lr_token (ldfile, charmap, result, repertoire, verbose); + if (now->tok != tok_eol && now->tok != tok_semicolon) + goto err_label; + } + while (now->tok == tok_semicolon); + break; + +#define STR_ELEM(cat) \ + case tok_##cat: \ + /* Ignore the rest of the line if we don't need the input of \ + this line. */ \ + if (ignore_content) \ + { \ + lr_ignore_rest (ldfile, 0); \ + break; \ + } \ + \ + now = lr_token (ldfile, charmap, result, repertoire, verbose); \ + if (now->tok != tok_string) \ + goto err_label; \ + else if (time->cat != NULL) \ + lr_error (ldfile, _("\ +%s: field `%s' declared more than once"), "LC_TIME", #cat); \ + else if (!ignore_content && (now->val.str.startmb == NULL \ + || now->val.str.startwc == NULL)) \ + { \ + lr_error (ldfile, _("%s: unknown character in field `%s'"), \ + "LC_TIME", #cat); \ + time->cat = ""; \ + time->w##cat = empty_wstr; \ + } \ + else if (!ignore_content) \ + { \ + time->cat = now->val.str.startmb; \ + time->w##cat = now->val.str.startwc; \ + } \ + break + + STR_ELEM (d_t_fmt); + STR_ELEM (d_fmt); + STR_ELEM (t_fmt); + STR_ELEM (t_fmt_ampm); + STR_ELEM (era_year); + STR_ELEM (era_d_t_fmt); + STR_ELEM (era_d_fmt); + STR_ELEM (era_t_fmt); + STR_ELEM (timezone); + STR_ELEM (date_fmt); + +#define INT_ELEM(cat) \ + case tok_##cat: \ + /* Ignore the rest of the line if we don't need the input of \ + this line. */ \ + if (ignore_content) \ + { \ + lr_ignore_rest (ldfile, 0); \ + break; \ + } \ + \ + now = lr_token (ldfile, charmap, result, repertoire, verbose); \ + if (now->tok != tok_number) \ + goto err_label; \ + else if (time->cat != 0) \ + lr_error (ldfile, _("%s: field `%s' declared more than once"), \ + "LC_TIME", #cat); \ + else if (!ignore_content) \ + time->cat = now->val.num; \ + break + + INT_ELEM (first_weekday); + INT_ELEM (first_workday); + INT_ELEM (cal_direction); + + case tok_week: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + now = lr_token (ldfile, charmap, result, repertoire, verbose); + if (now->tok != tok_number) + goto err_label; + time->week_ndays = now->val.num; + + now = lr_token (ldfile, charmap, result, repertoire, verbose); + if (now->tok != tok_semicolon) + goto err_label; + + now = lr_token (ldfile, charmap, result, repertoire, verbose); + if (now->tok != tok_number) + goto err_label; + time->week_1stday = now->val.num; + + now = lr_token (ldfile, charmap, result, repertoire, verbose); + if (now->tok != tok_semicolon) + goto err_label; + + now = lr_token (ldfile, charmap, result, repertoire, verbose); + if (now->tok != tok_number) + goto err_label; + time->week_1stweek = now->val.num; + + lr_ignore_rest (ldfile, 1); + break; + + case tok_end: + /* Next we assume `LC_TIME'. */ + now = lr_token (ldfile, charmap, result, repertoire, verbose); + if (now->tok == tok_eof) + break; + if (now->tok == tok_eol) + lr_error (ldfile, _("%s: incomplete `END' line"), "LC_TIME"); + else if (now->tok != tok_lc_time) + lr_error (ldfile, _("\ +%1$s: definition does not end with `END %1$s'"), "LC_TIME"); + lr_ignore_rest (ldfile, now->tok == tok_lc_time); + return; + + default: + err_label: + SYNTAX_ERROR (_("%s: syntax error"), "LC_TIME"); + } + + /* Prepare for the next round. */ + now = lr_token (ldfile, charmap, result, repertoire, verbose); + nowtok = now->tok; + } + + /* When we come here we reached the end of the file. */ + lr_error (ldfile, _("%s: premature end of file"), "LC_TIME"); +} diff --git a/REORG.TODO/locale/programs/linereader.c b/REORG.TODO/locale/programs/linereader.c new file mode 100644 index 0000000000..52b340963a --- /dev/null +++ b/REORG.TODO/locale/programs/linereader.c @@ -0,0 +1,886 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@gnu.org>, 1996. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <assert.h> +#include <ctype.h> +#include <errno.h> +#include <libintl.h> +#include <stdarg.h> +#include <stdlib.h> +#include <string.h> +#include <stdint.h> + +#include "localedef.h" +#include "charmap.h" +#include "error.h" +#include "linereader.h" +#include "locfile.h" + +/* Prototypes for local functions. */ +static struct token *get_toplvl_escape (struct linereader *lr); +static struct token *get_symname (struct linereader *lr); +static struct token *get_ident (struct linereader *lr); +static struct token *get_string (struct linereader *lr, + const struct charmap_t *charmap, + struct localedef_t *locale, + const struct repertoire_t *repertoire, + int verbose); + + +struct linereader * +lr_open (const char *fname, kw_hash_fct_t hf) +{ + FILE *fp; + + if (fname == NULL || strcmp (fname, "-") == 0 + || strcmp (fname, "/dev/stdin") == 0) + return lr_create (stdin, "<stdin>", hf); + else + { + fp = fopen (fname, "rm"); + if (fp == NULL) + return NULL; + return lr_create (fp, fname, hf); + } +} + +struct linereader * +lr_create (FILE *fp, const char *fname, kw_hash_fct_t hf) +{ + struct linereader *result; + int n; + + result = (struct linereader *) xmalloc (sizeof (*result)); + + result->fp = fp; + result->fname = xstrdup (fname); + result->buf = NULL; + result->bufsize = 0; + result->lineno = 1; + result->idx = 0; + result->comment_char = '#'; + result->escape_char = '\\'; + result->translate_strings = 1; + result->return_widestr = 0; + + n = getdelim (&result->buf, &result->bufsize, '\n', result->fp); + if (n < 0) + { + int save = errno; + fclose (result->fp); + free ((char *) result->fname); + free (result); + errno = save; + return NULL; + } + + if (n > 1 && result->buf[n - 2] == '\\' && result->buf[n - 1] == '\n') + n -= 2; + + result->buf[n] = '\0'; + result->bufact = n; + result->hash_fct = hf; + + return result; +} + + +int +lr_eof (struct linereader *lr) +{ + return lr->bufact = 0; +} + + +void +lr_ignore_rest (struct linereader *lr, int verbose) +{ + if (verbose) + { + while (isspace (lr->buf[lr->idx]) && lr->buf[lr->idx] != '\n' + && lr->buf[lr->idx] != lr->comment_char) + if (lr->buf[lr->idx] == '\0') + { + if (lr_next (lr) < 0) + return; + } + else + ++lr->idx; + + if (lr->buf[lr->idx] != '\n' && ! feof (lr->fp) + && lr->buf[lr->idx] != lr->comment_char) + lr_error (lr, _("trailing garbage at end of line")); + } + + /* Ignore continued line. */ + while (lr->bufact > 0 && lr->buf[lr->bufact - 1] != '\n') + if (lr_next (lr) < 0) + break; + + lr->idx = lr->bufact; +} + + +void +lr_close (struct linereader *lr) +{ + fclose (lr->fp); + free (lr->buf); + free (lr); +} + + +int +lr_next (struct linereader *lr) +{ + int n; + + n = getdelim (&lr->buf, &lr->bufsize, '\n', lr->fp); + if (n < 0) + return -1; + + ++lr->lineno; + + if (n > 1 && lr->buf[n - 2] == lr->escape_char && lr->buf[n - 1] == '\n') + { +#if 0 + /* XXX Is this correct? */ + /* An escaped newline character is substituted with a single <SP>. */ + --n; + lr->buf[n - 1] = ' '; +#else + n -= 2; +#endif + } + + lr->buf[n] = '\0'; + lr->bufact = n; + lr->idx = 0; + + return 0; +} + + +/* Defined in error.c. */ +/* This variable is incremented each time `error' is called. */ +extern unsigned int error_message_count; + +/* The calling program should define program_name and set it to the + name of the executing program. */ +extern char *program_name; + + +struct token * +lr_token (struct linereader *lr, const struct charmap_t *charmap, + struct localedef_t *locale, const struct repertoire_t *repertoire, + int verbose) +{ + int ch; + + while (1) + { + do + { + ch = lr_getc (lr); + + if (ch == EOF) + { + lr->token.tok = tok_eof; + return &lr->token; + }; + + if (ch == '\n') + { + lr->token.tok = tok_eol; + return &lr->token; + } + } + while (isspace (ch)); + + if (ch != lr->comment_char) + break; + + /* Is there an newline at the end of the buffer? */ + if (lr->buf[lr->bufact - 1] != '\n') + { + /* No. Some people want this to mean that only the line in + the file not the logical, concatenated line is ignored. + Let's try this. */ + lr->idx = lr->bufact; + continue; + } + + /* Ignore rest of line. */ + lr_ignore_rest (lr, 0); + lr->token.tok = tok_eol; + return &lr->token; + } + + /* Match escape sequences. */ + if (ch == lr->escape_char) + return get_toplvl_escape (lr); + + /* Match ellipsis. */ + if (ch == '.') + { + if (strncmp (&lr->buf[lr->idx], "...(2)....", 10) == 0) + { + int cnt; + for (cnt = 0; cnt < 10; ++cnt) + lr_getc (lr); + lr->token.tok = tok_ellipsis4_2; + return &lr->token; + } + if (strncmp (&lr->buf[lr->idx], "...", 3) == 0) + { + lr_getc (lr); + lr_getc (lr); + lr_getc (lr); + lr->token.tok = tok_ellipsis4; + return &lr->token; + } + if (strncmp (&lr->buf[lr->idx], "..", 2) == 0) + { + lr_getc (lr); + lr_getc (lr); + lr->token.tok = tok_ellipsis3; + return &lr->token; + } + if (strncmp (&lr->buf[lr->idx], ".(2)..", 6) == 0) + { + int cnt; + for (cnt = 0; cnt < 6; ++cnt) + lr_getc (lr); + lr->token.tok = tok_ellipsis2_2; + return &lr->token; + } + if (lr->buf[lr->idx] == '.') + { + lr_getc (lr); + lr->token.tok = tok_ellipsis2; + return &lr->token; + } + } + + switch (ch) + { + case '<': + return get_symname (lr); + + case '0' ... '9': + lr->token.tok = tok_number; + lr->token.val.num = ch - '0'; + + while (isdigit (ch = lr_getc (lr))) + { + lr->token.val.num *= 10; + lr->token.val.num += ch - '0'; + } + if (isalpha (ch)) + lr_error (lr, _("garbage at end of number")); + lr_ungetn (lr, 1); + + return &lr->token; + + case ';': + lr->token.tok = tok_semicolon; + return &lr->token; + + case ',': + lr->token.tok = tok_comma; + return &lr->token; + + case '(': + lr->token.tok = tok_open_brace; + return &lr->token; + + case ')': + lr->token.tok = tok_close_brace; + return &lr->token; + + case '"': + return get_string (lr, charmap, locale, repertoire, verbose); + + case '-': + ch = lr_getc (lr); + if (ch == '1') + { + lr->token.tok = tok_minus1; + return &lr->token; + } + lr_ungetn (lr, 2); + break; + } + + return get_ident (lr); +} + + +static struct token * +get_toplvl_escape (struct linereader *lr) +{ + /* This is supposed to be a numeric value. We return the + numerical value and the number of bytes. */ + size_t start_idx = lr->idx - 1; + unsigned char *bytes = lr->token.val.charcode.bytes; + size_t nbytes = 0; + int ch; + + do + { + unsigned int byte = 0; + unsigned int base = 8; + + ch = lr_getc (lr); + + if (ch == 'd') + { + base = 10; + ch = lr_getc (lr); + } + else if (ch == 'x') + { + base = 16; + ch = lr_getc (lr); + } + + if ((base == 16 && !isxdigit (ch)) + || (base != 16 && (ch < '0' || ch >= (int) ('0' + base)))) + { + esc_error: + lr->token.val.str.startmb = &lr->buf[start_idx]; + + while (ch != EOF && !isspace (ch)) + ch = lr_getc (lr); + lr->token.val.str.lenmb = lr->idx - start_idx; + + lr->token.tok = tok_error; + return &lr->token; + } + + if (isdigit (ch)) + byte = ch - '0'; + else + byte = tolower (ch) - 'a' + 10; + + ch = lr_getc (lr); + if ((base == 16 && !isxdigit (ch)) + || (base != 16 && (ch < '0' || ch >= (int) ('0' + base)))) + goto esc_error; + + byte *= base; + if (isdigit (ch)) + byte += ch - '0'; + else + byte += tolower (ch) - 'a' + 10; + + ch = lr_getc (lr); + if (base != 16 && isdigit (ch)) + { + byte *= base; + byte += ch - '0'; + + ch = lr_getc (lr); + } + + bytes[nbytes++] = byte; + } + while (ch == lr->escape_char + && nbytes < (int) sizeof (lr->token.val.charcode.bytes)); + + if (!isspace (ch)) + lr_error (lr, _("garbage at end of character code specification")); + + lr_ungetn (lr, 1); + + lr->token.tok = tok_charcode; + lr->token.val.charcode.nbytes = nbytes; + + return &lr->token; +} + + +#define ADDC(ch) \ + do \ + { \ + if (bufact == bufmax) \ + { \ + bufmax *= 2; \ + buf = xrealloc (buf, bufmax); \ + } \ + buf[bufact++] = (ch); \ + } \ + while (0) + + +#define ADDS(s, l) \ + do \ + { \ + size_t _l = (l); \ + if (bufact + _l > bufmax) \ + { \ + if (bufact < _l) \ + bufact = _l; \ + bufmax *= 2; \ + buf = xrealloc (buf, bufmax); \ + } \ + memcpy (&buf[bufact], s, _l); \ + bufact += _l; \ + } \ + while (0) + + +#define ADDWC(ch) \ + do \ + { \ + if (buf2act == buf2max) \ + { \ + buf2max *= 2; \ + buf2 = xrealloc (buf2, buf2max * 4); \ + } \ + buf2[buf2act++] = (ch); \ + } \ + while (0) + + +static struct token * +get_symname (struct linereader *lr) +{ + /* Symbol in brackets. We must distinguish three kinds: + 1. reserved words + 2. ISO 10646 position values + 3. all other. */ + char *buf; + size_t bufact = 0; + size_t bufmax = 56; + const struct keyword_t *kw; + int ch; + + buf = (char *) xmalloc (bufmax); + + do + { + ch = lr_getc (lr); + if (ch == lr->escape_char) + { + int c2 = lr_getc (lr); + ADDC (c2); + + if (c2 == '\n') + ch = '\n'; + } + else + ADDC (ch); + } + while (ch != '>' && ch != '\n'); + + if (ch == '\n') + lr_error (lr, _("unterminated symbolic name")); + + /* Test for ISO 10646 position value. */ + if (buf[0] == 'U' && (bufact == 6 || bufact == 10)) + { + char *cp = buf + 1; + while (cp < &buf[bufact - 1] && isxdigit (*cp)) + ++cp; + + if (cp == &buf[bufact - 1]) + { + /* Yes, it is. */ + lr->token.tok = tok_ucs4; + lr->token.val.ucs4 = strtoul (buf + 1, NULL, 16); + + return &lr->token; + } + } + + /* It is a symbolic name. Test for reserved words. */ + kw = lr->hash_fct (buf, bufact - 1); + + if (kw != NULL && kw->symname_or_ident == 1) + { + lr->token.tok = kw->token; + free (buf); + } + else + { + lr->token.tok = tok_bsymbol; + + buf = xrealloc (buf, bufact + 1); + buf[bufact] = '\0'; + + lr->token.val.str.startmb = buf; + lr->token.val.str.lenmb = bufact - 1; + } + + return &lr->token; +} + + +static struct token * +get_ident (struct linereader *lr) +{ + char *buf; + size_t bufact; + size_t bufmax = 56; + const struct keyword_t *kw; + int ch; + + buf = xmalloc (bufmax); + bufact = 0; + + ADDC (lr->buf[lr->idx - 1]); + + while (!isspace ((ch = lr_getc (lr))) && ch != '"' && ch != ';' + && ch != '<' && ch != ',' && ch != EOF) + { + if (ch == lr->escape_char) + { + ch = lr_getc (lr); + if (ch == '\n' || ch == EOF) + { + lr_error (lr, _("invalid escape sequence")); + break; + } + } + ADDC (ch); + } + + lr_ungetc (lr, ch); + + kw = lr->hash_fct (buf, bufact); + + if (kw != NULL && kw->symname_or_ident == 0) + { + lr->token.tok = kw->token; + free (buf); + } + else + { + lr->token.tok = tok_ident; + + buf = xrealloc (buf, bufact + 1); + buf[bufact] = '\0'; + + lr->token.val.str.startmb = buf; + lr->token.val.str.lenmb = bufact; + } + + return &lr->token; +} + + +static struct token * +get_string (struct linereader *lr, const struct charmap_t *charmap, + struct localedef_t *locale, const struct repertoire_t *repertoire, + int verbose) +{ + int return_widestr = lr->return_widestr; + char *buf; + wchar_t *buf2 = NULL; + size_t bufact; + size_t bufmax = 56; + + /* We must return two different strings. */ + buf = xmalloc (bufmax); + bufact = 0; + + /* We know it'll be a string. */ + lr->token.tok = tok_string; + + /* If we need not translate the strings (i.e., expand <...> parts) + we can run a simple loop. */ + if (!lr->translate_strings) + { + int ch; + + buf2 = NULL; + while ((ch = lr_getc (lr)) != '"' && ch != '\n' && ch != EOF) + ADDC (ch); + + /* Catch errors with trailing escape character. */ + if (bufact > 0 && buf[bufact - 1] == lr->escape_char + && (bufact == 1 || buf[bufact - 2] != lr->escape_char)) + { + lr_error (lr, _("illegal escape sequence at end of string")); + --bufact; + } + else if (ch == '\n' || ch == EOF) + lr_error (lr, _("unterminated string")); + + ADDC ('\0'); + } + else + { + int illegal_string = 0; + size_t buf2act = 0; + size_t buf2max = 56 * sizeof (uint32_t); + int ch; + int warned = 0; + + /* We have to provide the wide character result as well. */ + if (return_widestr) + buf2 = xmalloc (buf2max); + + /* Read until the end of the string (or end of the line or file). */ + while ((ch = lr_getc (lr)) != '"' && ch != '\n' && ch != EOF) + { + size_t startidx; + uint32_t wch; + struct charseq *seq; + + if (ch != '<') + { + /* The standards leave it up to the implementation to decide + what to do with character which stand for themself. We + could jump through hoops to find out the value relative to + the charmap and the repertoire map, but instead we leave + it up to the locale definition author to write a better + definition. We assume here that every character which + stands for itself is encoded using ISO 8859-1. Using the + escape character is allowed. */ + if (ch == lr->escape_char) + { + ch = lr_getc (lr); + if (ch == '\n' || ch == EOF) + break; + } + + if (verbose && !warned) + { + lr_error (lr, _("\ +non-symbolic character value should not be used")); + warned = 1; + } + + ADDC (ch); + if (return_widestr) + ADDWC ((uint32_t) ch); + + continue; + } + + /* Now we have to search for the end of the symbolic name, i.e., + the closing '>'. */ + startidx = bufact; + while ((ch = lr_getc (lr)) != '>' && ch != '\n' && ch != EOF) + { + if (ch == lr->escape_char) + { + ch = lr_getc (lr); + if (ch == '\n' || ch == EOF) + break; + } + ADDC (ch); + } + if (ch == '\n' || ch == EOF) + /* Not a correct string. */ + break; + if (bufact == startidx) + { + /* <> is no correct name. Ignore it and also signal an + error. */ + illegal_string = 1; + continue; + } + + /* It might be a Uxxxx symbol. */ + if (buf[startidx] == 'U' + && (bufact - startidx == 5 || bufact - startidx == 9)) + { + char *cp = buf + startidx + 1; + while (cp < &buf[bufact] && isxdigit (*cp)) + ++cp; + + if (cp == &buf[bufact]) + { + char utmp[10]; + + /* Yes, it is. */ + ADDC ('\0'); + wch = strtoul (buf + startidx + 1, NULL, 16); + + /* Now forget about the name we just added. */ + bufact = startidx; + + if (return_widestr) + ADDWC (wch); + + /* See whether the charmap contains the Uxxxxxxxx names. */ + snprintf (utmp, sizeof (utmp), "U%08X", wch); + seq = charmap_find_value (charmap, utmp, 9); + + if (seq == NULL) + { + /* No, this isn't the case. Now determine from + the repertoire the name of the character and + find it in the charmap. */ + if (repertoire != NULL) + { + const char *symbol; + + symbol = repertoire_find_symbol (repertoire, wch); + + if (symbol != NULL) + seq = charmap_find_value (charmap, symbol, + strlen (symbol)); + } + + if (seq == NULL) + { +#ifndef NO_TRANSLITERATION + /* Transliterate if possible. */ + if (locale != NULL) + { + uint32_t *translit; + + if ((locale->avail & CTYPE_LOCALE) == 0) + { + /* Load the CTYPE data now. */ + int old_needed = locale->needed; + + locale->needed = 0; + locale = load_locale (LC_CTYPE, + locale->name, + locale->repertoire_name, + charmap, locale); + locale->needed = old_needed; + } + + if ((locale->avail & CTYPE_LOCALE) != 0 + && ((translit = find_translit (locale, + charmap, wch)) + != NULL)) + /* The CTYPE data contains a matching + transliteration. */ + { + int i; + + for (i = 0; translit[i] != 0; ++i) + { + char utmp[10]; + + snprintf (utmp, sizeof (utmp), "U%08X", + translit[i]); + seq = charmap_find_value (charmap, utmp, + 9); + assert (seq != NULL); + ADDS (seq->bytes, seq->nbytes); + } + + continue; + } + } +#endif /* NO_TRANSLITERATION */ + + /* Not a known name. */ + illegal_string = 1; + } + } + + if (seq != NULL) + ADDS (seq->bytes, seq->nbytes); + + continue; + } + } + + /* We now have the symbolic name in buf[startidx] to + buf[bufact-1]. Now find out the value for this character + in the charmap as well as in the repertoire map (in this + order). */ + seq = charmap_find_value (charmap, &buf[startidx], + bufact - startidx); + + if (seq == NULL) + { + /* This name is not in the charmap. */ + lr_error (lr, _("symbol `%.*s' not in charmap"), + (int) (bufact - startidx), &buf[startidx]); + illegal_string = 1; + } + + if (return_widestr) + { + /* Now the same for the multibyte representation. */ + if (seq != NULL && seq->ucs4 != UNINITIALIZED_CHAR_VALUE) + wch = seq->ucs4; + else + { + wch = repertoire_find_value (repertoire, &buf[startidx], + bufact - startidx); + if (seq != NULL) + seq->ucs4 = wch; + } + + if (wch == ILLEGAL_CHAR_VALUE) + { + /* This name is not in the repertoire map. */ + lr_error (lr, _("symbol `%.*s' not in repertoire map"), + (int) (bufact - startidx), &buf[startidx]); + illegal_string = 1; + } + else + ADDWC (wch); + } + + /* Now forget about the name we just added. */ + bufact = startidx; + + /* And copy the bytes. */ + if (seq != NULL) + ADDS (seq->bytes, seq->nbytes); + } + + if (ch == '\n' || ch == EOF) + { + lr_error (lr, _("unterminated string")); + illegal_string = 1; + } + + if (illegal_string) + { + free (buf); + free (buf2); + lr->token.val.str.startmb = NULL; + lr->token.val.str.lenmb = 0; + lr->token.val.str.startwc = NULL; + lr->token.val.str.lenwc = 0; + + return &lr->token; + } + + ADDC ('\0'); + + if (return_widestr) + { + ADDWC (0); + lr->token.val.str.startwc = xrealloc (buf2, + buf2act * sizeof (uint32_t)); + lr->token.val.str.lenwc = buf2act; + } + } + + lr->token.val.str.startmb = xrealloc (buf, bufact); + lr->token.val.str.lenmb = bufact; + + return &lr->token; +} diff --git a/REORG.TODO/locale/programs/linereader.h b/REORG.TODO/locale/programs/linereader.h new file mode 100644 index 0000000000..3965db558c --- /dev/null +++ b/REORG.TODO/locale/programs/linereader.h @@ -0,0 +1,146 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper, <drepper@gnu.org>. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifndef _LINEREADER_H +#define _LINEREADER_H 1 + +#include <ctype.h> +#include <libintl.h> +#include <stdint.h> +#include <stdio.h> + +#include "charmap.h" +#include "error.h" +#include "locfile-token.h" +#include "repertoire.h" + + +typedef const struct keyword_t *(*kw_hash_fct_t) (const char *, unsigned int); +struct charset_t; +struct localedef_t; + +struct token +{ + enum token_t tok; + union + { + struct + { + char *startmb; + size_t lenmb; + uint32_t *startwc; + size_t lenwc; + } str; + unsigned long int num; + struct + { + /* This element is sized on the safe expectation that no single + character in any character set uses more than 16 bytes. */ + unsigned char bytes[16]; + int nbytes; + } charcode; + uint32_t ucs4; + } val; +}; + + +struct linereader +{ + FILE *fp; + const char *fname; + char *buf; + size_t bufsize; + size_t bufact; + size_t lineno; + + size_t idx; + + char comment_char; + char escape_char; + + struct token token; + + int translate_strings; + int return_widestr; + + kw_hash_fct_t hash_fct; +}; + + +/* Functions defined in linereader.c. */ +extern struct linereader *lr_open (const char *fname, kw_hash_fct_t hf); +extern struct linereader *lr_create (FILE *fp, const char *fname, + kw_hash_fct_t hf); +extern int lr_eof (struct linereader *lr); +extern void lr_close (struct linereader *lr); +extern int lr_next (struct linereader *lr); +extern struct token *lr_token (struct linereader *lr, + const struct charmap_t *charmap, + struct localedef_t *locale, + const struct repertoire_t *repertoire, + int verbose); +extern void lr_ignore_rest (struct linereader *lr, int verbose); + + +#define lr_error(lr, fmt, args...) \ + WITH_CUR_LOCALE (error_at_line (0, 0, lr->fname, lr->lineno, fmt, ## args)) + + + +static inline int +__attribute ((always_inline)) +lr_getc (struct linereader *lr) +{ + if (lr->idx == lr->bufact) + { + if (lr->bufact != 0) + if (lr_next (lr) < 0) + return EOF; + + if (lr->bufact == 0) + return EOF; + } + + return lr->buf[lr->idx] == '\32' ? EOF : lr->buf[lr->idx++]; +} + + +static inline int +__attribute ((always_inline)) +lr_ungetc (struct linereader *lr, int ch) +{ + if (lr->idx == 0) + return -1; + + if (ch != EOF) + lr->buf[--lr->idx] = ch; + return 0; +} + + +static inline int +lr_ungetn (struct linereader *lr, size_t n) +{ + if (lr->idx < n) + return -1; + + lr->idx -= n; + return 0; +} + + +#endif /* linereader.h */ diff --git a/REORG.TODO/locale/programs/locale-spec.c b/REORG.TODO/locale/programs/locale-spec.c new file mode 100644 index 0000000000..4e9bf81b78 --- /dev/null +++ b/REORG.TODO/locale/programs/locale-spec.c @@ -0,0 +1,131 @@ +/* Handle special requests. + Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <error.h> +#include <libintl.h> +#include <stdio.h> +#include <string.h> +#include <wchar.h> + +#include "localeinfo.h" + + +/* We provide support for some special names. This helps debugging + and may be useful for advanced usage of the provided information + outside C. */ +void +locale_special (const char *name, int show_category_name, + int show_keyword_name) +{ +#if 0 + /* "collate-elements": print collation elements of locale. */ + if (strcmp (name, "collate-elements") == 0) + { + size_t nelem = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_ELEM_HASH_SIZE); + + if (show_category_name) + puts ("LC_COLLATE"); + if (show_keyword_name) + fputs ("collate-elements=", stdout); + + if (nelem != 0) + { + int first = 1; + size_t cnt; + + for (cnt = 0; cnt < nelem; ++cnt) + if (__collate_element_hash[2 * cnt] != (~((u_int32_t) 0))) + { + size_t idx = __collate_element_hash[2 * cnt]; + + printf ("%s<%s>", first ? "" : ";", + &__collate_element_strings[idx]); + + /* We don't print the string. This is only confusing + because only the programs have to know the + encoding. The code is left in place because it + shows how to get the information. */ + { + const wchar_t *wp; + + idx = __collate_element_hash[2 * cnt + 1]; + wp = &__collate_element_values[idx]; + while (*wp != L'\0') + { + /********************************************\ + |* XXX The element values are really wide *| + |* chars. But we are currently not able to *| + |* print these so fake here. *| + \********************************************/ + int ch = wctob (*wp++); + if (ch != EOF) + putchar (ch); + else + fputs ("<??\?>", stdout); + } + + putchar ('"'); + } + first = 0; + } + } + putchar ('\n'); + return; + } + + if (strcmp (name, "collate-classes") == 0) + { + size_t nelem = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZE); + size_t cnt; + int first = 1; + + if (show_category_name) + puts ("LC_COLLATE"); + if (show_keyword_name) + fputs ("collate-classes=", stdout); + + for (cnt = 0; cnt < nelem; ++cnt) + if (__collate_symbol_hash[2 * cnt] != 0xffffffff) + { + printf ("%s<%s>", first ? "" : ",", + &__collate_symbol_strings[__collate_symbol_hash[2 * cnt]]); +#if 0 + { + size_t idx = __collate_symbol_hash[2 * cnt + 1]; + size_t cls; + + putchar ('='); + for (cls = 0; cls < __collate_symbol_classes[idx]; ++cls) + printf ("%s%d", cls == 0 ? "" : ":", + __collate_symbol_classes[idx + 1 + cls]); + } +#endif + first = 0; + } + putchar ('\n'); + return; + } +#endif + + /* If nothing matches, fail. */ + error (1, 0, gettext ("unknown name \"%s\""), name); +} diff --git a/REORG.TODO/locale/programs/locale.c b/REORG.TODO/locale/programs/locale.c new file mode 100644 index 0000000000..941290089b --- /dev/null +++ b/REORG.TODO/locale/programs/locale.c @@ -0,0 +1,989 @@ +/* Implementation of the locale program according to POSIX 9945-2. + Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1995. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <argp.h> +#include <argz.h> +#include <dirent.h> +#include <errno.h> +#include <error.h> +#include <fcntl.h> +#include <langinfo.h> +#include <libintl.h> +#include <limits.h> +#include <locale.h> +#include <search.h> +#include <stdio.h> +#include <stdio_ext.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <stdint.h> +#include <sys/mman.h> +#include <sys/stat.h> + +#include "localeinfo.h" +#include "charmap-dir.h" +#include "../locarchive.h" +#include <programs/xmalloc.h> + +#define ARCHIVE_NAME COMPLOCALEDIR "/locale-archive" + +/* If set print the name of the category. */ +static int show_category_name; + +/* If set print the name of the item. */ +static int show_keyword_name; + +/* Print names of all available locales. */ +static int do_all; + +/* Print names of all available character maps. */ +static int do_charmaps = 0; + +/* Nonzero if verbose output is wanted. */ +static int verbose; + +/* Name and version of program. */ +static void print_version (FILE *stream, struct argp_state *state); +void (*argp_program_version_hook) (FILE *, struct argp_state *) = print_version; + +/* Definitions of arguments for argp functions. */ +static const struct argp_option options[] = +{ + { NULL, 0, NULL, 0, N_("System information:") }, + { "all-locales", 'a', NULL, OPTION_NO_USAGE, + N_("Write names of available locales") }, + { "charmaps", 'm', NULL, OPTION_NO_USAGE, + N_("Write names of available charmaps") }, + { NULL, 0, NULL, 0, N_("Modify output format:") }, + { "category-name", 'c', NULL, 0, N_("Write names of selected categories") }, + { "keyword-name", 'k', NULL, 0, N_("Write names of selected keywords") }, + { "verbose", 'v', NULL, 0, N_("Print more information") }, + { NULL, 0, NULL, 0, NULL } +}; + +/* Short description of program. */ +static const char doc[] = N_("Get locale-specific information."); + +/* Strings for arguments in help texts. */ +static const char args_doc[] = N_("NAME\n[-a|-m]"); + +/* Prototype for option handler. */ +static error_t parse_opt (int key, char *arg, struct argp_state *state); + +/* Function to print some extra text in the help message. */ +static char *more_help (int key, const char *text, void *input); + +/* Data structure to communicate with argp functions. */ +static struct argp argp = +{ + options, parse_opt, args_doc, doc, NULL, more_help +}; + + +/* We don't have these constants defined because we don't use them. Give + default values. */ +#define CTYPE_MB_CUR_MIN 0 +#define CTYPE_MB_CUR_MAX 0 +#define CTYPE_HASH_SIZE 0 +#define CTYPE_HASH_LAYERS 0 +#define CTYPE_CLASS 0 +#define CTYPE_TOUPPER_EB 0 +#define CTYPE_TOLOWER_EB 0 +#define CTYPE_TOUPPER_EL 0 +#define CTYPE_TOLOWER_EL 0 + +/* Definition of the data structure which represents a category and its + items. */ +struct category +{ + int cat_id; + const char *name; + size_t number; + struct cat_item + { + int item_id; + const char *name; + enum { std, opt } status; + enum value_type value_type; + int min; + int max; + } *item_desc; +}; + +/* Simple helper macro. */ +#define NELEMS(arr) ((sizeof (arr)) / (sizeof (arr[0]))) + +/* For some tricky stuff. */ +#define NO_PAREN(Item, More...) Item, ## More + +/* We have all categories defined in `categories.def'. Now construct + the description and data structure used for all categories. */ +#define DEFINE_ELEMENT(Item, More...) { Item, ## More }, +#define DEFINE_CATEGORY(category, name, items, postload) \ + static struct cat_item category##_desc[] = \ + { \ + NO_PAREN items \ + }; + +#include "categories.def" +#undef DEFINE_CATEGORY + +static struct category category[] = + { +#define DEFINE_CATEGORY(category, name, items, postload) \ + [category] = { _NL_NUM_##category, name, NELEMS (category##_desc), \ + category##_desc }, +#include "categories.def" +#undef DEFINE_CATEGORY + }; +#define NCATEGORIES NELEMS (category) + + +/* Automatically set variable. */ +extern const char *__progname; + +/* helper function for extended name handling. */ +extern void locale_special (const char *name, int show_category_name, + int show_keyword_name); + +/* Prototypes for local functions. */ +static void print_LC_IDENTIFICATION (void *mapped, size_t size); +static void print_LC_CTYPE (void *mapped, size_t size); +static void write_locales (void); +static int nameentcmp (const void *a, const void *b); +static int write_archive_locales (void **all_datap, char *linebuf); +static void write_charmaps (void); +static void show_locale_vars (void); +static void show_info (const char *name); + + +int +main (int argc, char *argv[]) +{ + int remaining; + + /* Set initial values for global variables. */ + show_category_name = 0; + show_keyword_name = 0; + + /* Set locale. Do not set LC_ALL because the other categories must + not be affected (according to POSIX.2). */ + if (setlocale (LC_CTYPE, "") == NULL) + error (0, errno, gettext ("Cannot set LC_CTYPE to default locale")); + if (setlocale (LC_MESSAGES, "") == NULL) + error (0, errno, gettext ("Cannot set LC_MESSAGES to default locale")); + + /* Initialize the message catalog. */ + textdomain (PACKAGE); + + /* Parse and process arguments. */ + argp_parse (&argp, argc, argv, 0, &remaining, NULL); + + /* `-a' requests the names of all available locales. */ + if (do_all != 0) + { + if (setlocale (LC_COLLATE, "") == NULL) + error (0, errno, + gettext ("Cannot set LC_COLLATE to default locale")); + write_locales (); + exit (EXIT_SUCCESS); + } + + /* `m' requests the names of all available charmaps. The names can be + used for the -f argument to localedef(1). */ + if (do_charmaps != 0) + { + write_charmaps (); + exit (EXIT_SUCCESS); + } + + /* Specific information about the current locale are requested. + Change to this locale now. */ + if (setlocale (LC_ALL, "") == NULL) + error (0, errno, gettext ("Cannot set LC_ALL to default locale")); + + /* If no real argument is given we have to print the contents of the + current locale definition variables. These are LANG and the LC_*. */ + if (remaining == argc && show_keyword_name == 0 && show_category_name == 0) + { + show_locale_vars (); + exit (EXIT_SUCCESS); + } + + /* Process all given names. */ + while (remaining < argc) + show_info (argv[remaining++]); + + exit (EXIT_SUCCESS); +} + + +/* Handle program arguments. */ +static error_t +parse_opt (int key, char *arg, struct argp_state *state) +{ + switch (key) + { + case 'a': + do_all = 1; + break; + case 'c': + show_category_name = 1; + break; + case 'm': + do_charmaps = 1; + break; + case 'k': + show_keyword_name = 1; + break; + case 'v': + verbose = 1; + break; + default: + return ARGP_ERR_UNKNOWN; + } + return 0; +} + + +static char * +more_help (int key, const char *text, void *input) +{ + char *tp = NULL; + switch (key) + { + case ARGP_KEY_HELP_EXTRA: + /* We print some extra information. */ + if (asprintf (&tp, gettext ("\ +For bug reporting instructions, please see:\n\ +%s.\n"), REPORT_BUGS_TO) < 0) + return NULL; + return tp; + default: + break; + } + return (char *) text; +} + + +/* Print the version information. */ +static void +print_version (FILE *stream, struct argp_state *state) +{ + fprintf (stream, "locale %s%s\n", PKGVERSION, VERSION); + fprintf (stream, gettext ("\ +Copyright (C) %s Free Software Foundation, Inc.\n\ +This is free software; see the source for copying conditions. There is NO\n\ +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\ +"), "2017"); + fprintf (stream, gettext ("Written by %s.\n"), "Ulrich Drepper"); +} + + +/* Simple action function which prints arguments as strings. */ +static void +print_names (const void *nodep, VISIT value, int level) +{ + if (value == postorder || value == leaf) + puts (*(char **) nodep); +} + + +static int +select_dirs (const struct dirent *dirent) +{ + int result = 0; + + if (strcmp (dirent->d_name, ".") != 0 && strcmp (dirent->d_name, "..") != 0) + { + mode_t mode = 0; + +#ifdef _DIRENT_HAVE_D_TYPE + if (dirent->d_type != DT_UNKNOWN && dirent->d_type != DT_LNK) + mode = DTTOIF (dirent->d_type); + else +#endif + { + struct stat64 st; + char buf[sizeof (COMPLOCALEDIR) + + strlen (dirent->d_name) + 1]; + + stpcpy (stpcpy (stpcpy (buf, COMPLOCALEDIR), "/"), + dirent->d_name); + + if (stat64 (buf, &st) == 0) + mode = st.st_mode; + } + + result = S_ISDIR (mode); + } + + return result; +} + + +static void +print_LC_IDENTIFICATION (void *mapped, size_t size) +{ + /* Read the information from the file. */ + struct + { + unsigned int magic; + unsigned int nstrings; + unsigned int strindex[0]; + } *filedata = mapped; + + if (filedata->magic == LIMAGIC (LC_IDENTIFICATION) + && (sizeof *filedata + + (filedata->nstrings + * sizeof (unsigned int)) + <= size)) + { + const char *str; + +#define HANDLE(idx, name) \ + str = ((char *) mapped \ + + filedata->strindex[_NL_ITEM_INDEX (_NL_IDENTIFICATION_##idx)]); \ + if (*str != '\0') \ + printf ("%9s | %s\n", name, str) + HANDLE (TITLE, "title"); + HANDLE (SOURCE, "source"); + HANDLE (ADDRESS, "address"); + HANDLE (CONTACT, "contact"); + HANDLE (EMAIL, "email"); + HANDLE (TEL, "telephone"); + HANDLE (FAX, "fax"); + HANDLE (LANGUAGE, "language"); + HANDLE (TERRITORY, "territory"); + HANDLE (AUDIENCE, "audience"); + HANDLE (APPLICATION, "application"); + HANDLE (ABBREVIATION, "abbreviation"); + HANDLE (REVISION, "revision"); + HANDLE (DATE, "date"); + } +} + + +static void +print_LC_CTYPE (void *mapped, size_t size) +{ + struct + { + unsigned int magic; + unsigned int nstrings; + unsigned int strindex[0]; + } *filedata = mapped; + + if (filedata->magic == LIMAGIC (LC_CTYPE) + && (sizeof *filedata + + (filedata->nstrings + * sizeof (unsigned int)) + <= size)) + { + const char *str; + + str = ((char *) mapped + + filedata->strindex[_NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME)]); + if (*str != '\0') + printf (" codeset | %s\n", str); + } +} + + +/* Write the names of all available locales to stdout. We have some + sources of the information: the contents of the locale directory + and the locale.alias file. To avoid duplicates and print the + result is a reasonable order we put all entries is a search tree + and print them afterwards. */ +static void +write_locales (void) +{ + char linebuf[80]; + void *all_data = NULL; + struct dirent **dirents; + int ndirents; + int cnt; + char *alias_path; + size_t alias_path_len; + char *entry; + int first_locale = 1; + +#define PUT(name) tsearch (name, &all_data, \ + (int (*) (const void *, const void *)) strcoll) +#define GET(name) tfind (name, &all_data, \ + (int (*) (const void *, const void *)) strcoll) + + /* `POSIX' locale is always available (POSIX.2 4.34.3). */ + PUT ("POSIX"); + /* And so is the "C" locale. */ + PUT ("C"); + + memset (linebuf, '-', sizeof (linebuf) - 1); + linebuf[sizeof (linebuf) - 1] = '\0'; + + /* First scan the locale archive. */ + if (write_archive_locales (&all_data, linebuf)) + first_locale = 0; + + /* Now we can look for all files in the directory. */ + ndirents = scandir (COMPLOCALEDIR, &dirents, select_dirs, + alphasort); + for (cnt = 0; cnt < ndirents; ++cnt) + { + /* Test whether at least the LC_CTYPE data is there. Some + directories only contain translations. */ + char buf[sizeof (COMPLOCALEDIR) + + strlen (dirents[cnt]->d_name) + + sizeof "/LC_IDENTIFICATION"]; + char *enddir; + struct stat64 st; + + stpcpy (enddir = stpcpy (stpcpy (stpcpy (buf, + COMPLOCALEDIR), + "/"), + dirents[cnt]->d_name), + "/LC_IDENTIFICATION"); + + if (stat64 (buf, &st) == 0 && S_ISREG (st.st_mode)) + { + if (verbose && GET (dirents[cnt]->d_name) == NULL) + { + /* Provide some nice output of all kinds of + information. */ + int fd; + + if (! first_locale) + putchar_unlocked ('\n'); + first_locale = 0; + + printf ("locale: %-15.15s directory: %.*s\n%s\n", + dirents[cnt]->d_name, (int) (enddir - buf), buf, + linebuf); + + fd = open64 (buf, O_RDONLY); + if (fd != -1) + { + void *mapped = mmap64 (NULL, st.st_size, PROT_READ, + MAP_SHARED, fd, 0); + if (mapped != MAP_FAILED) + { + print_LC_IDENTIFICATION (mapped, st.st_size); + + munmap (mapped, st.st_size); + } + + close (fd); + + /* Now try to get the charset information. */ + strcpy (enddir, "/LC_CTYPE"); + fd = open64 (buf, O_RDONLY); + if (fd != -1 && fstat64 (fd, &st) >= 0 + && ((mapped = mmap64 (NULL, st.st_size, PROT_READ, + MAP_SHARED, fd, 0)) + != MAP_FAILED)) + { + print_LC_CTYPE (mapped, st.st_size); + + munmap (mapped, st.st_size); + } + + if (fd != -1) + close (fd); + } + } + + /* If the verbose format is not selected we simply + collect the names. */ + PUT (xstrdup (dirents[cnt]->d_name)); + } + } + if (ndirents > 0) + free (dirents); + + /* Now read the locale.alias files. */ + if (argz_create_sep (LOCALE_ALIAS_PATH, ':', &alias_path, &alias_path_len)) + error (1, errno, gettext ("while preparing output")); + + entry = NULL; + while ((entry = argz_next (alias_path, alias_path_len, entry))) + { + static const char aliasfile[] = "/locale.alias"; + FILE *fp; + char full_name[strlen (entry) + sizeof aliasfile]; + + stpcpy (stpcpy (full_name, entry), aliasfile); + fp = fopen (full_name, "rm"); + if (fp == NULL) + /* Ignore non-existing files. */ + continue; + + /* No threads present. */ + __fsetlocking (fp, FSETLOCKING_BYCALLER); + + while (! feof_unlocked (fp)) + { + /* It is a reasonable approach to use a fix buffer here + because + a) we are only interested in the first two fields + b) these fields must be usable as file names and so must + not be that long */ + char buf[BUFSIZ]; + char *alias; + char *value; + char *cp; + + if (fgets_unlocked (buf, BUFSIZ, fp) == NULL) + /* EOF reached. */ + break; + + cp = buf; + /* Ignore leading white space. */ + while (isspace (cp[0]) && cp[0] != '\n') + ++cp; + + /* A leading '#' signals a comment line. */ + if (cp[0] != '\0' && cp[0] != '#' && cp[0] != '\n') + { + alias = cp++; + while (cp[0] != '\0' && !isspace (cp[0])) + ++cp; + /* Terminate alias name. */ + if (cp[0] != '\0') + *cp++ = '\0'; + + /* Now look for the beginning of the value. */ + while (isspace (cp[0])) + ++cp; + + if (cp[0] != '\0') + { + value = cp++; + while (cp[0] != '\0' && !isspace (cp[0])) + ++cp; + /* Terminate value. */ + if (cp[0] == '\n') + { + /* This has to be done to make the following + test for the end of line possible. We are + looking for the terminating '\n' which do not + overwrite here. */ + *cp++ = '\0'; + *cp = '\n'; + } + else if (cp[0] != '\0') + *cp++ = '\0'; + + /* Add the alias. */ + if (! verbose && GET (value) != NULL) + PUT (xstrdup (alias)); + } + } + + /* Possibly not the whole line fits into the buffer. + Ignore the rest of the line. */ + while (strchr (cp, '\n') == NULL) + { + cp = buf; + if (fgets_unlocked (buf, BUFSIZ, fp) == NULL) + /* Make sure the inner loop will be left. The outer + loop will exit at the `feof' test. */ + *cp = '\n'; + } + } + + fclose (fp); + } + + if (! verbose) + { + twalk (all_data, print_names); + } +} + + +struct nameent +{ + char *name; + uint32_t locrec_offset; +}; + + +static int +nameentcmp (const void *a, const void *b) +{ + return strcoll (((const struct nameent *) a)->name, + ((const struct nameent *) b)->name); +} + + +static int +write_archive_locales (void **all_datap, char *linebuf) +{ + struct stat64 st; + void *all_data = *all_datap; + size_t len = 0; + struct locarhead *head; + struct namehashent *namehashtab; + char *addr = MAP_FAILED; + int fd, ret = 0; + uint32_t cnt; + + fd = open64 (ARCHIVE_NAME, O_RDONLY); + if (fd < 0) + return 0; + + if (fstat64 (fd, &st) < 0 || st.st_size < sizeof (*head)) + goto error_out; + + len = st.st_size; + addr = mmap64 (NULL, len, PROT_READ, MAP_SHARED, fd, 0); + if (addr == MAP_FAILED) + goto error_out; + + head = (struct locarhead *) addr; + if (head->namehash_offset + head->namehash_size > len + || head->string_offset + head->string_size > len + || head->locrectab_offset + head->locrectab_size > len + || head->sumhash_offset + head->sumhash_size > len) + goto error_out; + + namehashtab = (struct namehashent *) (addr + head->namehash_offset); + if (! verbose) + { + for (cnt = 0; cnt < head->namehash_size; ++cnt) + if (namehashtab[cnt].locrec_offset != 0) + { + PUT (xstrdup (addr + namehashtab[cnt].name_offset)); + ++ret; + } + } + else + { + struct nameent *names; + uint32_t used; + + names = (struct nameent *) xmalloc (head->namehash_used + * sizeof (struct nameent)); + for (cnt = used = 0; cnt < head->namehash_size; ++cnt) + if (namehashtab[cnt].locrec_offset != 0) + { + names[used].name = addr + namehashtab[cnt].name_offset; + names[used++].locrec_offset = namehashtab[cnt].locrec_offset; + } + + /* Sort the names. */ + qsort (names, used, sizeof (struct nameent), nameentcmp); + + for (cnt = 0; cnt < used; ++cnt) + { + struct locrecent *locrec; + + PUT (xstrdup (names[cnt].name)); + + if (cnt) + putchar_unlocked ('\n'); + + printf ("locale: %-15.15s archive: " ARCHIVE_NAME "\n%s\n", + names[cnt].name, linebuf); + + locrec = (struct locrecent *) (addr + names[cnt].locrec_offset); + + print_LC_IDENTIFICATION (addr + + locrec->record[LC_IDENTIFICATION].offset, + locrec->record[LC_IDENTIFICATION].len); + + print_LC_CTYPE (addr + locrec->record[LC_CTYPE].offset, + locrec->record[LC_CTYPE].len); + } + + ret = used; + } + +error_out: + if (addr != MAP_FAILED) + munmap (addr, len); + close (fd); + *all_datap = all_data; + return ret; +} + + +/* Write the names of all available character maps to stdout. */ +static void +write_charmaps (void) +{ + void *all_data = NULL; + CHARMAP_DIR *dir; + const char *dirent; + + /* Look for all files in the charmap directory. */ + dir = charmap_opendir (CHARMAP_PATH); + if (dir == NULL) + return; + + while ((dirent = charmap_readdir (dir)) != NULL) + { + char **aliases; + char **p; + + PUT (xstrdup (dirent)); + + aliases = charmap_aliases (CHARMAP_PATH, dirent); + +#if 0 + /* Add the code_set_name and the aliases. */ + for (p = aliases; *p; p++) + PUT (xstrdup (*p)); +#else + /* Add the code_set_name only. Most aliases are obsolete. */ + p = aliases; + if (*p) + PUT (xstrdup (*p)); +#endif + + charmap_free_aliases (aliases); + } + + charmap_closedir (dir); + + twalk (all_data, print_names); +} + +/* Print a properly quoted assignment of NAME with VAL, using double + quotes iff DQUOTE is true. */ +static void +print_assignment (const char *name, const char *val, bool dquote) +{ + printf ("%s=", name); + if (dquote) + putchar ('"'); + while (*val != '\0') + { + size_t segment + = strcspn (val, dquote ? "$`\"\\" : "~|&;<>()$`\\\"' \t\n"); + printf ("%.*s", (int) segment, val); + val += segment; + if (*val == '\0') + break; + putchar ('\\'); + putchar (*val++); + } + if (dquote) + putchar ('"'); + putchar ('\n'); +} + +/* We have to show the contents of the environments determining the + locale. */ +static void +show_locale_vars (void) +{ + const char *lcall = getenv ("LC_ALL") ?: ""; + const char *lang = getenv ("LANG") ?: ""; + + /* LANG has to be the first value. */ + print_assignment ("LANG", lang, false); + + /* Now all categories in an unspecified order. */ + for (size_t cat_no = 0; cat_no < NCATEGORIES; ++cat_no) + if (cat_no != LC_ALL) + { + const char *name = category[cat_no].name; + const char *val = getenv (name); + + if (lcall[0] != '\0' || val == NULL) + print_assignment (name, + lcall[0] != '\0' ? lcall + : lang[0] != '\0' ? lang + : "POSIX", + true); + else + print_assignment (name, val, false); + } + + /* The last is the LC_ALL value. */ + print_assignment ("LC_ALL", lcall, false); +} + + +/* Subroutine of show_info, below. */ +static void +print_item (struct cat_item *item) +{ + switch (item->value_type) + { + case string: + if (show_keyword_name) + printf ("%s=\"", item->name); + fputs (nl_langinfo (item->item_id) ? : "", stdout); + if (show_keyword_name) + putchar ('"'); + putchar ('\n'); + break; + case stringarray: + { + const char *val; + int cnt; + + if (show_keyword_name) + printf ("%s=\"", item->name); + + for (cnt = 0; cnt < item->max - 1; ++cnt) + { + val = nl_langinfo (item->item_id + cnt); + if (val != NULL) + fputs (val, stdout); + putchar (';'); + } + + val = nl_langinfo (item->item_id + cnt); + if (val != NULL) + fputs (val, stdout); + + if (show_keyword_name) + putchar ('"'); + putchar ('\n'); + } + break; + case stringlist: + { + int first = 1; + const char *val = nl_langinfo (item->item_id) ? : ""; + + if (show_keyword_name) + printf ("%s=", item->name); + + for (int cnt = 0; cnt < item->max && *val != '\0'; ++cnt) + { + printf ("%s%s%s%s", first ? "" : ";", + show_keyword_name ? "\"" : "", val, + show_keyword_name ? "\"" : ""); + val = strchr (val, '\0') + 1; + first = 0; + } + putchar ('\n'); + } + break; + case byte: + { + const char *val = nl_langinfo (item->item_id); + + if (show_keyword_name) + printf ("%s=", item->name); + + if (val != NULL) + printf ("%d", *val == '\377' ? -1 : *val); + putchar ('\n'); + } + break; + case bytearray: + { + const char *val = nl_langinfo (item->item_id); + int cnt = val ? strlen (val) : 0; + + if (show_keyword_name) + printf ("%s=", item->name); + + while (cnt > 1) + { + printf ("%d;", *val == '\177' ? -1 : *val); + --cnt; + ++val; + } + + printf ("%d\n", cnt == 0 || *val == '\177' ? -1 : *val); + } + break; + case word: + { + union { unsigned int word; char *string; } val; + val.string = nl_langinfo (item->item_id); + if (show_keyword_name) + printf ("%s=", item->name); + + printf ("%d\n", val.word); + } + break; + case wordarray: + { + int first = 1; + union { unsigned int *wordarray; char *string; } val; + + val.string = nl_langinfo (item->item_id); + if (show_keyword_name) + printf ("%s=", item->name); + + for (int cnt = 0; cnt < item->max; ++cnt) + { + printf ("%s%d", first ? "" : ";", val.wordarray[cnt]); + first = 0; + } + putchar ('\n'); + } + break; + case wstring: + case wstringarray: + case wstringlist: + /* We don't print wide character information since the same + information is available in a multibyte string. */ + default: + break; + } +} + +/* Show the information request for NAME. */ +static void +show_info (const char *name) +{ + for (size_t cat_no = 0; cat_no < NCATEGORIES; ++cat_no) + if (cat_no != LC_ALL) + { + if (strcmp (name, category[cat_no].name) == 0) + /* Print the whole category. */ + { + if (show_category_name != 0) + puts (category[cat_no].name); + + for (size_t item_no = 0; + item_no < category[cat_no].number; + ++item_no) + print_item (&category[cat_no].item_desc[item_no]); + + return; + } + + for (size_t item_no = 0; item_no < category[cat_no].number; ++item_no) + if (strcmp (name, category[cat_no].item_desc[item_no].name) == 0) + { + if (show_category_name != 0) + puts (category[cat_no].name); + + print_item (&category[cat_no].item_desc[item_no]); + return; + } + } + + /* The name is not a standard one. + For testing and perhaps advanced use allow some more symbols. */ + locale_special (name, show_category_name, show_keyword_name); +} diff --git a/REORG.TODO/locale/programs/localedef.c b/REORG.TODO/locale/programs/localedef.c new file mode 100644 index 0000000000..6acc1342c7 --- /dev/null +++ b/REORG.TODO/locale/programs/localedef.c @@ -0,0 +1,626 @@ +/* Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1995. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <argp.h> +#include <errno.h> +#include <fcntl.h> +#include <libintl.h> +#include <locale.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <error.h> +#include <sys/mman.h> +#include <sys/stat.h> + +#include "localedef.h" +#include "charmap.h" +#include "locfile.h" + +/* Undefine the following line in the production version. */ +/* #define NDEBUG 1 */ +#include <assert.h> + + +/* List of copied locales. */ +struct copy_def_list_t *copy_list; + +/* If this is defined be POSIX conform. */ +int posix_conformance; + +/* If not zero give a lot more messages. */ +int verbose; + +/* If not zero suppress warnings and information messages. */ +int be_quiet; + +/* If not zero force output even if warning were issued. */ +static int force_output; + +/* Prefix for output files. */ +const char *output_prefix; + +/* Name of the character map file. */ +static const char *charmap_file; + +/* Name of the locale definition file. */ +static const char *input_file; + +/* Name of the repertoire map file. */ +const char *repertoire_global; + +/* Name of the locale.alias file. */ +const char *alias_file; + +/* List of all locales. */ +static struct localedef_t *locales; + +/* If true don't add locale data to archive. */ +bool no_archive; + +/* If true add named locales to archive. */ +static bool add_to_archive; + +/* If true delete named locales from archive. */ +static bool delete_from_archive; + +/* If true replace archive content when adding. */ +static bool replace_archive; + +/* If true list archive content. */ +static bool list_archive; + +/* Maximum number of retries when opening the locale archive. */ +int max_locarchive_open_retry = 10; + + +/* Name and version of program. */ +static void print_version (FILE *stream, struct argp_state *state); +void (*argp_program_version_hook) (FILE *, struct argp_state *) = print_version; + +#define OPT_POSIX 301 +#define OPT_QUIET 302 +#define OPT_PREFIX 304 +#define OPT_NO_ARCHIVE 305 +#define OPT_ADD_TO_ARCHIVE 306 +#define OPT_REPLACE 307 +#define OPT_DELETE_FROM_ARCHIVE 308 +#define OPT_LIST_ARCHIVE 309 +#define OPT_LITTLE_ENDIAN 400 +#define OPT_BIG_ENDIAN 401 + +/* Definitions of arguments for argp functions. */ +static const struct argp_option options[] = +{ + { NULL, 0, NULL, 0, N_("Input Files:") }, + { "charmap", 'f', N_("FILE"), 0, + N_("Symbolic character names defined in FILE") }, + { "inputfile", 'i', N_("FILE"), 0, + N_("Source definitions are found in FILE") }, + { "repertoire-map", 'u', N_("FILE"), 0, + N_("FILE contains mapping from symbolic names to UCS4 values") }, + + { NULL, 0, NULL, 0, N_("Output control:") }, + { "force", 'c', NULL, 0, + N_("Create output even if warning messages were issued") }, + { "prefix", OPT_PREFIX, N_("PATH"), 0, N_("Optional output file prefix") }, + { "posix", OPT_POSIX, NULL, 0, N_("Strictly conform to POSIX") }, + { "quiet", OPT_QUIET, NULL, 0, + N_("Suppress warnings and information messages") }, + { "verbose", 'v', NULL, 0, N_("Print more messages") }, + { NULL, 0, NULL, 0, N_("Archive control:") }, + { "no-archive", OPT_NO_ARCHIVE, NULL, 0, + N_("Don't add new data to archive") }, + { "add-to-archive", OPT_ADD_TO_ARCHIVE, NULL, 0, + N_("Add locales named by parameters to archive") }, + { "replace", OPT_REPLACE, NULL, 0, N_("Replace existing archive content") }, + { "delete-from-archive", OPT_DELETE_FROM_ARCHIVE, NULL, 0, + N_("Remove locales named by parameters from archive") }, + { "list-archive", OPT_LIST_ARCHIVE, NULL, 0, N_("List content of archive") }, + { "alias-file", 'A', N_("FILE"), 0, + N_("locale.alias file to consult when making archive")}, + { "little-endian", OPT_LITTLE_ENDIAN, NULL, 0, + N_("Generate little-endian output") }, + { "big-endian", OPT_BIG_ENDIAN, NULL, 0, + N_("Generate big-endian output") }, + { NULL, 0, NULL, 0, NULL } +}; + +/* Short description of program. */ +static const char doc[] = N_("Compile locale specification"); + +/* Strings for arguments in help texts. */ +static const char args_doc[] = N_("\ +NAME\n\ +[--add-to-archive|--delete-from-archive] FILE...\n\ +--list-archive [FILE]"); + +/* Prototype for option handler. */ +static error_t parse_opt (int key, char *arg, struct argp_state *state); + +/* Function to print some extra text in the help message. */ +static char *more_help (int key, const char *text, void *input); + +/* Data structure to communicate with argp functions. */ +static struct argp argp = +{ + options, parse_opt, args_doc, doc, NULL, more_help +}; + + +/* Prototypes for local functions. */ +static void error_print (void); +static const char *construct_output_path (char *path); +static const char *normalize_codeset (const char *codeset, size_t name_len); + + +int +main (int argc, char *argv[]) +{ + const char *output_path; + int cannot_write_why; + struct charmap_t *charmap; + struct localedef_t global; + int remaining; + + /* Set initial values for global variables. */ + copy_list = NULL; + posix_conformance = getenv ("POSIXLY_CORRECT") != NULL; + error_print_progname = error_print; + + /* Set locale. Do not set LC_ALL because the other categories must + not be affected (according to POSIX.2). */ + setlocale (LC_MESSAGES, ""); + setlocale (LC_CTYPE, ""); + + /* Initialize the message catalog. */ + textdomain (_libc_intl_domainname); + + /* Parse and process arguments. */ + argp_err_exit_status = 4; + argp_parse (&argp, argc, argv, 0, &remaining, NULL); + + /* Handle a few special cases. */ + if (list_archive) + show_archive_content (remaining > 1 ? argv[remaining] : NULL, verbose); + if (add_to_archive) + return add_locales_to_archive (argc - remaining, &argv[remaining], + replace_archive); + if (delete_from_archive) + return delete_locales_from_archive (argc - remaining, &argv[remaining]); + + /* POSIX.2 requires to be verbose about missing characters in the + character map. */ + verbose |= posix_conformance; + + if (argc - remaining != 1) + { + /* We need exactly one non-option parameter. */ + argp_help (&argp, stdout, ARGP_HELP_SEE | ARGP_HELP_EXIT_ERR, + program_invocation_short_name); + exit (4); + } + + /* The parameter describes the output path of the constructed files. + If the described files cannot be written return a NULL pointer. */ + output_path = construct_output_path (argv[remaining]); + if (output_path == NULL && ! no_archive) + error (4, errno, _("cannot create directory for output files")); + cannot_write_why = errno; + + /* Now that the parameters are processed we have to reset the local + ctype locale. (P1003.2 4.35.5.2) */ + setlocale (LC_CTYPE, "POSIX"); + + /* Look whether the system really allows locale definitions. POSIX + defines error code 3 for this situation so I think it must be + a fatal error (see P1003.2 4.35.8). */ + if (sysconf (_SC_2_LOCALEDEF) < 0) + WITH_CUR_LOCALE (error (3, 0, _("\ +FATAL: system does not define `_POSIX2_LOCALEDEF'"))); + + /* Process charmap file. */ + charmap = charmap_read (charmap_file, verbose, 1, be_quiet, 1); + + /* Add the first entry in the locale list. */ + memset (&global, '\0', sizeof (struct localedef_t)); + global.name = input_file ?: "/dev/stdin"; + global.needed = ALL_LOCALES; + locales = &global; + + /* Now read the locale file. */ + if (locfile_read (&global, charmap) != 0) + WITH_CUR_LOCALE (error (4, errno, _("\ +cannot open locale definition file `%s'"), input_file)); + + /* Perhaps we saw some `copy' instructions. */ + while (1) + { + struct localedef_t *runp = locales; + + while (runp != NULL && (runp->needed & runp->avail) == runp->needed) + runp = runp->next; + + if (runp == NULL) + /* Everything read. */ + break; + + if (locfile_read (runp, charmap) != 0) + WITH_CUR_LOCALE (error (4, errno, _("\ +cannot open locale definition file `%s'"), runp->name)); + } + + /* Check the categories we processed in source form. */ + check_all_categories (locales, charmap); + + /* We are now able to write the data files. If warning were given we + do it only if it is explicitly requested (--force). */ + if (error_message_count == 0 || force_output != 0) + { + if (cannot_write_why != 0) + WITH_CUR_LOCALE (error (4, cannot_write_why, _("\ +cannot write output files to `%s'"), output_path ? : argv[remaining])); + else + write_all_categories (locales, charmap, argv[remaining], output_path); + } + else + WITH_CUR_LOCALE (error (4, 0, _("\ +no output file produced because warnings were issued"))); + + /* This exit status is prescribed by POSIX.2 4.35.7. */ + exit (error_message_count != 0); +} + + +/* Handle program arguments. */ +static error_t +parse_opt (int key, char *arg, struct argp_state *state) +{ + switch (key) + { + case OPT_QUIET: + be_quiet = 1; + break; + case OPT_POSIX: + posix_conformance = 1; + break; + case OPT_PREFIX: + output_prefix = arg; + break; + case OPT_NO_ARCHIVE: + no_archive = true; + break; + case OPT_ADD_TO_ARCHIVE: + add_to_archive = true; + break; + case OPT_REPLACE: + replace_archive = true; + break; + case OPT_DELETE_FROM_ARCHIVE: + delete_from_archive = true; + break; + case OPT_LIST_ARCHIVE: + list_archive = true; + break; + case OPT_LITTLE_ENDIAN: + set_big_endian (false); + break; + case OPT_BIG_ENDIAN: + set_big_endian (true); + break; + case 'c': + force_output = 1; + break; + case 'f': + charmap_file = arg; + break; + case 'A': + alias_file = arg; + break; + case 'i': + input_file = arg; + break; + case 'u': + repertoire_global = arg; + break; + case 'v': + verbose = 1; + break; + default: + return ARGP_ERR_UNKNOWN; + } + return 0; +} + + +static char * +more_help (int key, const char *text, void *input) +{ + char *cp; + char *tp; + + switch (key) + { + case ARGP_KEY_HELP_EXTRA: + /* We print some extra information. */ + if (asprintf (&tp, gettext ("\ +For bug reporting instructions, please see:\n\ +%s.\n"), REPORT_BUGS_TO) < 0) + return NULL; + if (asprintf (&cp, gettext ("\ +System's directory for character maps : %s\n\ + repertoire maps: %s\n\ + locale path : %s\n\ +%s"), + CHARMAP_PATH, REPERTOIREMAP_PATH, LOCALE_PATH, tp) < 0) + { + free (tp); + return NULL; + } + return cp; + default: + break; + } + return (char *) text; +} + +/* Print the version information. */ +static void +print_version (FILE *stream, struct argp_state *state) +{ + fprintf (stream, "localedef %s%s\n", PKGVERSION, VERSION); + fprintf (stream, gettext ("\ +Copyright (C) %s Free Software Foundation, Inc.\n\ +This is free software; see the source for copying conditions. There is NO\n\ +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\ +"), "2017"); + fprintf (stream, gettext ("Written by %s.\n"), "Ulrich Drepper"); +} + + +/* The address of this function will be assigned to the hook in the error + functions. */ +static void +error_print (void) +{ +} + + +/* The parameter to localedef describes the output path. If it does + contain a '/' character it is a relative path. Otherwise it names the + locale this definition is for. */ +static const char * +construct_output_path (char *path) +{ + const char *normal = NULL; + char *result; + char *endp; + + if (strchr (path, '/') == NULL) + { + /* This is a system path. First examine whether the locale name + contains a reference to the codeset. This should be + normalized. */ + char *startp; + + startp = path; + /* We must be prepared for finding a CEN name or a location of + the introducing `.' where it is not possible anymore. */ + while (*startp != '\0' && *startp != '@' && *startp != '.') + ++startp; + if (*startp == '.') + { + /* We found a codeset specification. Now find the end. */ + endp = ++startp; + while (*endp != '\0' && *endp != '@') + ++endp; + + if (endp > startp) + normal = normalize_codeset (startp, endp - startp); + } + else + /* This is to keep gcc quiet. */ + endp = NULL; + + /* We put an additional '\0' at the end of the string because at + the end of the function we need another byte for the trailing + '/'. */ + ssize_t n; + if (normal == NULL) + n = asprintf (&result, "%s%s/%s%c", output_prefix ?: "", + COMPLOCALEDIR, path, '\0'); + else + n = asprintf (&result, "%s%s/%.*s%s%s%c", + output_prefix ?: "", COMPLOCALEDIR, + (int) (startp - path), path, normal, endp, '\0'); + + if (n < 0) + return NULL; + + endp = result + n - 1; + } + else + { + /* This is a user path. Please note the additional byte in the + memory allocation. */ + size_t len = strlen (path) + 1; + result = xmalloc (len + 1); + endp = mempcpy (result, path, len) - 1; + + /* If the user specified an output path we cannot add the output + to the archive. */ + no_archive = true; + } + + errno = 0; + + if (no_archive && euidaccess (result, W_OK) == -1) + /* Perhaps the directory does not exist now. Try to create it. */ + if (errno == ENOENT) + { + errno = 0; + if (mkdir (result, 0777) < 0) + return NULL; + } + + *endp++ = '/'; + *endp = '\0'; + + return result; +} + + +/* Normalize codeset name. There is no standard for the codeset + names. Normalization allows the user to use any of the common + names. */ +static const char * +normalize_codeset (const char *codeset, size_t name_len) +{ + int len = 0; + int only_digit = 1; + char *retval; + char *wp; + size_t cnt; + + for (cnt = 0; cnt < name_len; ++cnt) + if (isalnum (codeset[cnt])) + { + ++len; + + if (isalpha (codeset[cnt])) + only_digit = 0; + } + + retval = (char *) malloc ((only_digit ? 3 : 0) + len + 1); + + if (retval != NULL) + { + if (only_digit) + wp = stpcpy (retval, "iso"); + else + wp = retval; + + for (cnt = 0; cnt < name_len; ++cnt) + if (isalpha (codeset[cnt])) + *wp++ = tolower (codeset[cnt]); + else if (isdigit (codeset[cnt])) + *wp++ = codeset[cnt]; + + *wp = '\0'; + } + + return (const char *) retval; +} + + +struct localedef_t * +add_to_readlist (int category, const char *name, const char *repertoire_name, + int generate, struct localedef_t *copy_locale) +{ + struct localedef_t *runp = locales; + + while (runp != NULL && strcmp (name, runp->name) != 0) + runp = runp->next; + + if (runp == NULL) + { + /* Add a new entry at the end. */ + struct localedef_t *newp; + + assert (generate == 1); + + newp = xcalloc (1, sizeof (struct localedef_t)); + newp->name = name; + newp->repertoire_name = repertoire_name; + + if (locales == NULL) + runp = locales = newp; + else + { + runp = locales; + while (runp->next != NULL) + runp = runp->next; + runp = runp->next = newp; + } + } + + if (generate + && (runp->needed & (1 << category)) != 0 + && (runp->avail & (1 << category)) == 0) + WITH_CUR_LOCALE (error (5, 0, _("\ +circular dependencies between locale definitions"))); + + if (copy_locale != NULL) + { + if (runp->categories[category].generic != NULL) + WITH_CUR_LOCALE (error (5, 0, _("\ +cannot add already read locale `%s' a second time"), name)); + else + runp->categories[category].generic = + copy_locale->categories[category].generic; + } + + runp->needed |= 1 << category; + + return runp; +} + + +struct localedef_t * +find_locale (int category, const char *name, const char *repertoire_name, + const struct charmap_t *charmap) +{ + struct localedef_t *result; + + /* Find the locale, but do not generate it since this would be a bug. */ + result = add_to_readlist (category, name, repertoire_name, 0, NULL); + + assert (result != NULL); + + if ((result->avail & (1 << category)) == 0 + && locfile_read (result, charmap) != 0) + WITH_CUR_LOCALE (error (4, errno, _("\ +cannot open locale definition file `%s'"), result->name)); + + return result; +} + + +struct localedef_t * +load_locale (int category, const char *name, const char *repertoire_name, + const struct charmap_t *charmap, struct localedef_t *copy_locale) +{ + struct localedef_t *result; + + /* Generate the locale if it does not exist. */ + result = add_to_readlist (category, name, repertoire_name, 1, copy_locale); + + assert (result != NULL); + + if ((result->avail & (1 << category)) == 0 + && locfile_read (result, charmap) != 0) + WITH_CUR_LOCALE (error (4, errno, _("\ +cannot open locale definition file `%s'"), result->name)); + + return result; +} diff --git a/REORG.TODO/locale/programs/localedef.h b/REORG.TODO/locale/programs/localedef.h new file mode 100644 index 0000000000..74a2eba74a --- /dev/null +++ b/REORG.TODO/locale/programs/localedef.h @@ -0,0 +1,177 @@ +/* General definitions for localedef(1). + Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifndef _LOCALEDEF_H +#define _LOCALEDEF_H 1 + +/* Get the basic locale definitions. */ +#include <errno.h> +#include <locale.h> +#include <stdbool.h> +#include <stddef.h> + +#include "repertoire.h" +#include "../locarchive.h" + + +/* We need a bitmask for the locales. */ +enum +{ + CTYPE_LOCALE = 1 << LC_CTYPE, + NUMERIC_LOCALE = 1 << LC_NUMERIC, + TIME_LOCALE = 1 << LC_TIME, + COLLATE_LOCALE = 1 << LC_COLLATE, + MONETARY_LOCALE = 1 << LC_MONETARY, + MESSAGES_LOCALE = 1 << LC_MESSAGES, + PAPER_LOCALE = 1 << LC_PAPER, + NAME_LOCALE = 1 << LC_NAME, + ADDRESS_LOCALE = 1 << LC_ADDRESS, + TELEPHONE_LOCALE = 1 << LC_TELEPHONE, + MEASUREMENT_LOCALE = 1 << LC_MEASUREMENT, + IDENTIFICATION_LOCALE = 1 << LC_IDENTIFICATION, + ALL_LOCALES = (1 << LC_CTYPE + | 1 << LC_NUMERIC + | 1 << LC_TIME + | 1 << LC_COLLATE + | 1 << LC_MONETARY + | 1 << LC_MESSAGES + | 1 << LC_PAPER + | 1 << LC_NAME + | 1 << LC_ADDRESS + | 1 << LC_TELEPHONE + | 1 << LC_MEASUREMENT + | 1 << LC_IDENTIFICATION) +}; + + +/* Opaque types for the different locales. */ +struct locale_ctype_t; +struct locale_collate_t; +struct locale_monetary_t; +struct locale_numeric_t; +struct locale_time_t; +struct locale_messages_t; +struct locale_paper_t; +struct locale_name_t; +struct locale_address_t; +struct locale_telephone_t; +struct locale_measurement_t; +struct locale_identification_t; + + +/* Definitions for the locale. */ +struct localedef_t +{ + struct localedef_t *next; + + const char *name; + + int needed; + int avail; + + union + { + void *generic; + struct locale_ctype_t *ctype; + struct locale_collate_t *collate; + struct locale_monetary_t *monetary; + struct locale_numeric_t *numeric; + struct locale_time_t *time; + struct locale_messages_t *messages; + struct locale_paper_t *paper; + struct locale_name_t *name; + struct locale_address_t *address; + struct locale_telephone_t *telephone; + struct locale_measurement_t *measurement; + struct locale_identification_t *identification; + } categories[__LC_LAST]; + + size_t len[__LC_LAST]; + + const char *copy_name[__LC_LAST]; + + const char *repertoire_name; +}; + + +/* Global variables of the localedef program. */ +extern int verbose; +extern int be_quiet; +extern const char *repertoire_global; +extern int max_locarchive_open_retry; +extern bool no_archive; +extern const char *alias_file; + + +/* Prototypes for a few program-wide used functions. */ +#include <programs/xmalloc.h> + + +/* Wrapper to switch LC_CTYPE back to the locale specified in the + environment for output. */ +#define WITH_CUR_LOCALE(stmt) \ + do { \ + int saved_errno = errno; \ + const char *cur_locale_ = setlocale (LC_CTYPE, NULL); \ + setlocale (LC_CTYPE, ""); \ + errno = saved_errno; \ + stmt; \ + setlocale (LC_CTYPE, cur_locale_); \ + } while (0) + + +/* Mark given locale as to be read. */ +extern struct localedef_t *add_to_readlist (int locale, const char *name, + const char *repertoire_name, + int generate, + struct localedef_t *copy_locale); + +/* Find the information for the locale NAME. */ +extern struct localedef_t *find_locale (int locale, const char *name, + const char *repertoire_name, + const struct charmap_t *charmap); + +/* Load (if necessary) the information for the locale NAME. */ +extern struct localedef_t *load_locale (int locale, const char *name, + const char *repertoire_name, + const struct charmap_t *charmap, + struct localedef_t *copy_locale); + + +/* Open the locale archive. */ +extern void open_archive (struct locarhandle *ah, bool readonly); + +/* Close the locale archive. */ +extern void close_archive (struct locarhandle *ah); + +/* Add given locale data to the archive. */ +extern int add_locale_to_archive (struct locarhandle *ah, const char *name, + locale_data_t data, bool replace); + +/* Add content of named directories to locale archive. */ +extern int add_locales_to_archive (size_t nlist, char *list[], bool replace); + +/* Removed named locales from archive. */ +extern int delete_locales_from_archive (size_t nlist, char *list[]); + +/* List content of locale archive. If FNAME is non-null use that as + the locale archive to list, otherwise the default. */ +extern void show_archive_content (const char *fname, + int verbose) __attribute__ ((noreturn)); + +#endif /* localedef.h */ diff --git a/REORG.TODO/locale/programs/locarchive.c b/REORG.TODO/locale/programs/locarchive.c new file mode 100644 index 0000000000..f67b7b8d99 --- /dev/null +++ b/REORG.TODO/locale/programs/locarchive.c @@ -0,0 +1,1757 @@ +/* Copyright (C) 2002-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@redhat.com>, 2002. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <assert.h> +#include <dirent.h> +#include <errno.h> +#include <error.h> +#include <fcntl.h> +#include <inttypes.h> +#include <libintl.h> +#include <locale.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdio_ext.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <unistd.h> +#include <stdint.h> +#include <sys/mman.h> +#include <sys/param.h> +#include <sys/shm.h> +#include <sys/stat.h> + +#include <libc-mmap.h> +#include <libc-pointer-arith.h> +#include "../../crypt/md5.h" +#include "../localeinfo.h" +#include "../locarchive.h" +#include "localedef.h" +#include "locfile.h" + +/* Define the hash function. We define the function as static inline. + We must change the name so as not to conflict with simple-hash.h. */ +#define compute_hashval static archive_hashval +#define hashval_t uint32_t +#include "hashval.h" +#undef compute_hashval + +extern const char *output_prefix; + +#define ARCHIVE_NAME COMPLOCALEDIR "/locale-archive" + +static const char *locnames[] = + { +#define DEFINE_CATEGORY(category, category_name, items, a) \ + [category] = category_name, +#include "categories.def" +#undef DEFINE_CATEGORY + }; + + +/* Size of the initial archive header. */ +#define INITIAL_NUM_NAMES 900 +#define INITIAL_SIZE_STRINGS 7500 +#define INITIAL_NUM_LOCREC 420 +#define INITIAL_NUM_SUMS 2000 + + +/* Get and set values (possibly endian-swapped) in structures mapped + from or written directly to locale archives. */ +#define GET(FIELD) maybe_swap_uint32 (FIELD) +#define SET(FIELD, VALUE) ((FIELD) = maybe_swap_uint32 (VALUE)) +#define INC(FIELD, INCREMENT) SET (FIELD, GET (FIELD) + (INCREMENT)) + + +/* Size of the reserved address space area. */ +#define RESERVE_MMAP_SIZE 512 * 1024 * 1024 + +/* To prepare for enlargements of the mmaped area reserve some address + space. On some machines, being a file mapping rather than an anonymous + mapping affects the address selection. So do this mapping from the + actual file, even though it's only a dummy to reserve address space. */ +static void * +prepare_address_space (int fd, size_t total, size_t *reserved, int *xflags, + void **mmap_base, size_t *mmap_len) +{ + if (total < RESERVE_MMAP_SIZE) + { + void *p = mmap64 (NULL, RESERVE_MMAP_SIZE, PROT_NONE, MAP_SHARED, fd, 0); + if (p != MAP_FAILED) + { + void *aligned_p = PTR_ALIGN_UP (p, MAP_FIXED_ALIGNMENT); + size_t align_adjust = aligned_p - p; + *mmap_base = p; + *mmap_len = RESERVE_MMAP_SIZE; + assert (align_adjust < RESERVE_MMAP_SIZE); + *reserved = RESERVE_MMAP_SIZE - align_adjust; + *xflags = MAP_FIXED; + return aligned_p; + } + } + + *reserved = total; + *xflags = 0; + *mmap_base = NULL; + *mmap_len = 0; + return NULL; +} + + +static void +create_archive (const char *archivefname, struct locarhandle *ah) +{ + int fd; + char fname[strlen (archivefname) + sizeof (".XXXXXX")]; + struct locarhead head; + size_t total; + + strcpy (stpcpy (fname, archivefname), ".XXXXXX"); + + /* Create a temporary file in the correct directory. */ + fd = mkstemp (fname); + if (fd == -1) + error (EXIT_FAILURE, errno, _("cannot create temporary file: %s"), fname); + + /* Create the initial content of the archive. */ + SET (head.magic, AR_MAGIC); + SET (head.serial, 0); + SET (head.namehash_offset, sizeof (struct locarhead)); + SET (head.namehash_used, 0); + SET (head.namehash_size, next_prime (INITIAL_NUM_NAMES)); + + SET (head.string_offset, + (GET (head.namehash_offset) + + GET (head.namehash_size) * sizeof (struct namehashent))); + SET (head.string_used, 0); + SET (head.string_size, INITIAL_SIZE_STRINGS); + + SET (head.locrectab_offset, + GET (head.string_offset) + GET (head.string_size)); + SET (head.locrectab_used, 0); + SET (head.locrectab_size, INITIAL_NUM_LOCREC); + + SET (head.sumhash_offset, + (GET (head.locrectab_offset) + + GET (head.locrectab_size) * sizeof (struct locrecent))); + SET (head.sumhash_used, 0); + SET (head.sumhash_size, next_prime (INITIAL_NUM_SUMS)); + + total = (GET (head.sumhash_offset) + + GET (head.sumhash_size) * sizeof (struct sumhashent)); + + /* Write out the header and create room for the other data structures. */ + if (TEMP_FAILURE_RETRY (write (fd, &head, sizeof (head))) != sizeof (head)) + { + int errval = errno; + unlink (fname); + error (EXIT_FAILURE, errval, _("cannot initialize archive file")); + } + + if (ftruncate64 (fd, total) != 0) + { + int errval = errno; + unlink (fname); + error (EXIT_FAILURE, errval, _("cannot resize archive file")); + } + + size_t reserved, mmap_len; + int xflags; + void *mmap_base; + void *p = prepare_address_space (fd, total, &reserved, &xflags, &mmap_base, + &mmap_len); + + /* Map the header and all the administration data structures. */ + p = mmap64 (p, total, PROT_READ | PROT_WRITE, MAP_SHARED | xflags, fd, 0); + if (p == MAP_FAILED) + { + int errval = errno; + unlink (fname); + error (EXIT_FAILURE, errval, _("cannot map archive header")); + } + + /* Now try to rename it. We don't use the rename function since + this would overwrite a file which has been created in + parallel. */ + if (link (fname, archivefname) == -1) + { + int errval = errno; + + /* We cannot use the just created file. */ + close (fd); + unlink (fname); + + if (errval == EEXIST) + { + /* There is already an archive. Must have been a localedef run + which happened in parallel. Simply open this file then. */ + open_archive (ah, false); + return; + } + + error (EXIT_FAILURE, errval, _("failed to create new locale archive")); + } + + /* Remove the temporary name. */ + unlink (fname); + + /* Make the file globally readable. */ + if (fchmod (fd, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH) == -1) + { + int errval = errno; + unlink (archivefname); + error (EXIT_FAILURE, errval, + _("cannot change mode of new locale archive")); + } + + ah->fname = NULL; + ah->fd = fd; + ah->mmap_base = mmap_base; + ah->mmap_len = mmap_len; + ah->addr = p; + ah->mmaped = total; + ah->reserved = reserved; +} + + +/* This structure and qsort comparator function are used below to sort an + old archive's locrec table in order of data position in the file. */ +struct oldlocrecent +{ + unsigned int cnt; + struct locrecent *locrec; +}; + +static int +oldlocrecentcmp (const void *a, const void *b) +{ + struct locrecent *la = ((const struct oldlocrecent *) a)->locrec; + struct locrecent *lb = ((const struct oldlocrecent *) b)->locrec; + uint32_t start_a = -1, end_a = 0; + uint32_t start_b = -1, end_b = 0; + int cnt; + + for (cnt = 0; cnt < __LC_LAST; ++cnt) + if (cnt != LC_ALL) + { + if (GET (la->record[cnt].offset) < start_a) + start_a = GET (la->record[cnt].offset); + if (GET (la->record[cnt].offset) + GET (la->record[cnt].len) > end_a) + end_a = GET (la->record[cnt].offset) + GET (la->record[cnt].len); + } + assert (start_a != (uint32_t)-1); + assert (end_a != 0); + + for (cnt = 0; cnt < __LC_LAST; ++cnt) + if (cnt != LC_ALL) + { + if (GET (lb->record[cnt].offset) < start_b) + start_b = GET (lb->record[cnt].offset); + if (GET (lb->record[cnt].offset) + GET (lb->record[cnt].len) > end_b) + end_b = GET (lb->record[cnt].offset) + GET (lb->record[cnt].len); + } + assert (start_b != (uint32_t)-1); + assert (end_b != 0); + + if (start_a != start_b) + return (int)start_a - (int)start_b; + return (int)end_a - (int)end_b; +} + + +/* forward decls for below */ +static uint32_t add_locale (struct locarhandle *ah, const char *name, + locale_data_t data, bool replace); +static void add_alias (struct locarhandle *ah, const char *alias, + bool replace, const char *oldname, + uint32_t *locrec_offset_p); + + +static bool +file_data_available_p (struct locarhandle *ah, uint32_t offset, uint32_t size) +{ + if (offset < ah->mmaped && offset + size <= ah->mmaped) + return true; + + struct stat64 st; + if (fstat64 (ah->fd, &st) != 0) + return false; + + if (st.st_size > ah->reserved) + return false; + + size_t start = ALIGN_DOWN (ah->mmaped, MAP_FIXED_ALIGNMENT); + void *p = mmap64 (ah->addr + start, st.st_size - start, + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, + ah->fd, start); + if (p == MAP_FAILED) + { + ah->mmaped = start; + return false; + } + + ah->mmaped = st.st_size; + return true; +} + + +static int +compare_from_file (struct locarhandle *ah, void *p1, uint32_t offset2, + uint32_t size) +{ + void *p2 = xmalloc (size); + if (pread (ah->fd, p2, size, offset2) != size) + WITH_CUR_LOCALE (error (4, errno, + _("cannot read data from locale archive"))); + + int res = memcmp (p1, p2, size); + free (p2); + return res; +} + + +static void +enlarge_archive (struct locarhandle *ah, const struct locarhead *head) +{ + struct stat64 st; + int fd; + struct locarhead newhead; + size_t total; + unsigned int cnt, loccnt; + struct namehashent *oldnamehashtab; + struct locarhandle new_ah; + size_t prefix_len = output_prefix ? strlen (output_prefix) : 0; + char archivefname[prefix_len + sizeof (ARCHIVE_NAME)]; + char fname[prefix_len + sizeof (ARCHIVE_NAME) + sizeof (".XXXXXX") - 1]; + + if (output_prefix) + memcpy (archivefname, output_prefix, prefix_len); + strcpy (archivefname + prefix_len, ARCHIVE_NAME); + strcpy (stpcpy (fname, archivefname), ".XXXXXX"); + + /* Not all of the old file has to be mapped. Change this now this + we will have to access the whole content. */ + if (fstat64 (ah->fd, &st) != 0) + enomap: + error (EXIT_FAILURE, errno, _("cannot map locale archive file")); + + if (st.st_size < ah->reserved) + ah->addr = mmap64 (ah->addr, st.st_size, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_FIXED, ah->fd, 0); + else + { + if (ah->mmap_base) + munmap (ah->mmap_base, ah->mmap_len); + else + munmap (ah->addr, ah->reserved); + ah->addr = mmap64 (NULL, st.st_size, PROT_READ | PROT_WRITE, + MAP_SHARED, ah->fd, 0); + ah->reserved = st.st_size; + ah->mmap_base = NULL; + ah->mmap_len = 0; + head = ah->addr; + } + if (ah->addr == MAP_FAILED) + goto enomap; + ah->mmaped = st.st_size; + + /* Create a temporary file in the correct directory. */ + fd = mkstemp (fname); + if (fd == -1) + error (EXIT_FAILURE, errno, _("cannot create temporary file: %s"), fname); + + /* Copy the existing head information. */ + newhead = *head; + + /* Create the new archive header. The sizes of the various tables + should be double from what is currently used. */ + SET (newhead.namehash_size, + MAX (next_prime (2 * GET (newhead.namehash_used)), + GET (newhead.namehash_size))); + if (verbose) + printf ("name: size: %u, used: %d, new: size: %u\n", + GET (head->namehash_size), + GET (head->namehash_used), GET (newhead.namehash_size)); + + SET (newhead.string_offset, (GET (newhead.namehash_offset) + + (GET (newhead.namehash_size) + * sizeof (struct namehashent)))); + /* Keep the string table size aligned to 4 bytes, so that + all the struct { uint32_t } types following are happy. */ + SET (newhead.string_size, MAX ((2 * GET (newhead.string_used) + 3) & -4, + GET (newhead.string_size))); + + SET (newhead.locrectab_offset, + GET (newhead.string_offset) + GET (newhead.string_size)); + SET (newhead.locrectab_size, MAX (2 * GET (newhead.locrectab_used), + GET (newhead.locrectab_size))); + + SET (newhead.sumhash_offset, (GET (newhead.locrectab_offset) + + (GET (newhead.locrectab_size) + * sizeof (struct locrecent)))); + SET (newhead.sumhash_size, + MAX (next_prime (2 * GET (newhead.sumhash_used)), + GET (newhead.sumhash_size))); + + total = (GET (newhead.sumhash_offset) + + GET (newhead.sumhash_size) * sizeof (struct sumhashent)); + + /* The new file is empty now. */ + SET (newhead.namehash_used, 0); + SET (newhead.string_used, 0); + SET (newhead.locrectab_used, 0); + SET (newhead.sumhash_used, 0); + + /* Write out the header and create room for the other data structures. */ + if (TEMP_FAILURE_RETRY (write (fd, &newhead, sizeof (newhead))) + != sizeof (newhead)) + { + int errval = errno; + unlink (fname); + error (EXIT_FAILURE, errval, _("cannot initialize archive file")); + } + + if (ftruncate64 (fd, total) != 0) + { + int errval = errno; + unlink (fname); + error (EXIT_FAILURE, errval, _("cannot resize archive file")); + } + + size_t reserved, mmap_len; + int xflags; + void *mmap_base; + void *p = prepare_address_space (fd, total, &reserved, &xflags, &mmap_base, + &mmap_len); + + /* Map the header and all the administration data structures. */ + p = mmap64 (p, total, PROT_READ | PROT_WRITE, MAP_SHARED | xflags, fd, 0); + if (p == MAP_FAILED) + { + int errval = errno; + unlink (fname); + error (EXIT_FAILURE, errval, _("cannot map archive header")); + } + + /* Lock the new file. */ + if (lockf64 (fd, F_LOCK, total) != 0) + { + int errval = errno; + unlink (fname); + error (EXIT_FAILURE, errval, _("cannot lock new archive")); + } + + new_ah.mmaped = total; + new_ah.mmap_base = mmap_base; + new_ah.mmap_len = mmap_len; + new_ah.addr = p; + new_ah.fd = fd; + new_ah.reserved = reserved; + + /* Walk through the hash name hash table to find out what data is + still referenced and transfer it into the new file. */ + oldnamehashtab = (struct namehashent *) ((char *) ah->addr + + GET (head->namehash_offset)); + + /* Sort the old locrec table in order of data position. */ + struct oldlocrecent oldlocrecarray[GET (head->namehash_size)]; + for (cnt = 0, loccnt = 0; cnt < GET (head->namehash_size); ++cnt) + if (GET (oldnamehashtab[cnt].locrec_offset) != 0) + { + oldlocrecarray[loccnt].cnt = cnt; + oldlocrecarray[loccnt++].locrec + = (struct locrecent *) ((char *) ah->addr + + GET (oldnamehashtab[cnt].locrec_offset)); + } + qsort (oldlocrecarray, loccnt, sizeof (struct oldlocrecent), + oldlocrecentcmp); + + uint32_t last_locrec_offset = 0; + for (cnt = 0; cnt < loccnt; ++cnt) + { + /* Insert this entry in the new hash table. */ + locale_data_t old_data; + unsigned int idx; + struct locrecent *oldlocrec = oldlocrecarray[cnt].locrec; + + for (idx = 0; idx < __LC_LAST; ++idx) + if (idx != LC_ALL) + { + old_data[idx].size = GET (oldlocrec->record[idx].len); + old_data[idx].addr + = ((char *) ah->addr + GET (oldlocrec->record[idx].offset)); + + __md5_buffer (old_data[idx].addr, old_data[idx].size, + old_data[idx].sum); + } + + if (cnt > 0 && oldlocrecarray[cnt - 1].locrec == oldlocrec) + { + const char *oldname + = ((char *) ah->addr + + GET (oldnamehashtab[oldlocrecarray[cnt + - 1].cnt].name_offset)); + + add_alias + (&new_ah, + ((char *) ah->addr + + GET (oldnamehashtab[oldlocrecarray[cnt].cnt].name_offset)), + 0, oldname, &last_locrec_offset); + continue; + } + + last_locrec_offset = + add_locale + (&new_ah, + ((char *) ah->addr + + GET (oldnamehashtab[oldlocrecarray[cnt].cnt].name_offset)), + old_data, 0); + if (last_locrec_offset == 0) + error (EXIT_FAILURE, 0, _("cannot extend locale archive file")); + } + + /* Make the file globally readable. */ + if (fchmod (fd, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH) == -1) + { + int errval = errno; + unlink (fname); + error (EXIT_FAILURE, errval, + _("cannot change mode of resized locale archive")); + } + + /* Rename the new file. */ + if (rename (fname, archivefname) != 0) + { + int errval = errno; + unlink (fname); + error (EXIT_FAILURE, errval, _("cannot rename new archive")); + } + + /* Close the old file. */ + close_archive (ah); + + /* Add the information for the new one. */ + *ah = new_ah; +} + + +void +open_archive (struct locarhandle *ah, bool readonly) +{ + struct stat64 st; + struct stat64 st2; + int fd; + struct locarhead head; + int retry = 0; + size_t prefix_len = output_prefix ? strlen (output_prefix) : 0; + char default_fname[prefix_len + sizeof (ARCHIVE_NAME)]; + const char *archivefname = ah->fname; + + /* If ah has a non-NULL fname open that otherwise open the default. */ + if (archivefname == NULL) + { + archivefname = default_fname; + if (output_prefix) + memcpy (default_fname, output_prefix, prefix_len); + strcpy (default_fname + prefix_len, ARCHIVE_NAME); + } + + while (1) + { + /* Open the archive. We must have exclusive write access. */ + fd = open64 (archivefname, readonly ? O_RDONLY : O_RDWR); + if (fd == -1) + { + /* Maybe the file does not yet exist? If we are opening + the default locale archive we ignore the failure and + list an empty archive, otherwise we print an error + and exit. */ + if (errno == ENOENT && archivefname == default_fname) + { + if (readonly) + { + static const struct locarhead nullhead = + { + .namehash_used = 0, + .namehash_offset = 0, + .namehash_size = 0 + }; + + ah->addr = (void *) &nullhead; + ah->fd = -1; + } + else + create_archive (archivefname, ah); + + return; + } + else + error (EXIT_FAILURE, errno, _("cannot open locale archive \"%s\""), + archivefname); + } + + if (fstat64 (fd, &st) < 0) + error (EXIT_FAILURE, errno, _("cannot stat locale archive \"%s\""), + archivefname); + + if (!readonly && lockf64 (fd, F_LOCK, sizeof (struct locarhead)) == -1) + { + close (fd); + + if (retry++ < max_locarchive_open_retry) + { + struct timespec req; + + /* Wait for a bit. */ + req.tv_sec = 0; + req.tv_nsec = 1000000 * (random () % 500 + 1); + (void) nanosleep (&req, NULL); + + continue; + } + + error (EXIT_FAILURE, errno, _("cannot lock locale archive \"%s\""), + archivefname); + } + + /* One more check. Maybe another process replaced the archive file + with a new, larger one since we opened the file. */ + if (stat64 (archivefname, &st2) == -1 + || st.st_dev != st2.st_dev + || st.st_ino != st2.st_ino) + { + (void) lockf64 (fd, F_ULOCK, sizeof (struct locarhead)); + close (fd); + continue; + } + + /* Leave the loop. */ + break; + } + + /* Read the header. */ + if (TEMP_FAILURE_RETRY (read (fd, &head, sizeof (head))) != sizeof (head)) + { + (void) lockf64 (fd, F_ULOCK, sizeof (struct locarhead)); + error (EXIT_FAILURE, errno, _("cannot read archive header")); + } + + ah->fd = fd; + ah->mmaped = st.st_size; + + size_t reserved, mmap_len; + int xflags; + void *mmap_base; + void *p = prepare_address_space (fd, st.st_size, &reserved, &xflags, + &mmap_base, &mmap_len); + + /* Map the entire file. We might need to compare the category data + in the file with the newly added data. */ + ah->addr = mmap64 (p, st.st_size, PROT_READ | (readonly ? 0 : PROT_WRITE), + MAP_SHARED | xflags, fd, 0); + if (ah->addr == MAP_FAILED) + { + (void) lockf64 (fd, F_ULOCK, sizeof (struct locarhead)); + error (EXIT_FAILURE, errno, _("cannot map archive header")); + } + ah->reserved = reserved; + ah->mmap_base = mmap_base; + ah->mmap_len = mmap_len; +} + + +void +close_archive (struct locarhandle *ah) +{ + if (ah->fd != -1) + { + if (ah->mmap_base) + munmap (ah->mmap_base, ah->mmap_len); + else + munmap (ah->addr, ah->reserved); + close (ah->fd); + } +} + +#include "../../intl/explodename.c" +#include "../../intl/l10nflist.c" + +static struct namehashent * +insert_name (struct locarhandle *ah, + const char *name, size_t name_len, bool replace) +{ + const struct locarhead *const head = ah->addr; + struct namehashent *namehashtab + = (struct namehashent *) ((char *) ah->addr + + GET (head->namehash_offset)); + unsigned int insert_idx, idx, incr; + + /* Hash value of the locale name. */ + uint32_t hval = archive_hashval (name, name_len); + + insert_idx = -1; + idx = hval % GET (head->namehash_size); + incr = 1 + hval % (GET (head->namehash_size) - 2); + + /* If the name_offset field is zero this means this is a + deleted entry and therefore no entry can be found. */ + while (GET (namehashtab[idx].name_offset) != 0) + { + if (GET (namehashtab[idx].hashval) == hval + && (strcmp (name, + (char *) ah->addr + GET (namehashtab[idx].name_offset)) + == 0)) + { + /* Found the entry. */ + if (GET (namehashtab[idx].locrec_offset) != 0 && ! replace) + { + if (! be_quiet) + error (0, 0, _("locale '%s' already exists"), name); + return NULL; + } + + break; + } + + if (GET (namehashtab[idx].hashval) == hval && ! be_quiet) + { + error (0, 0, "hash collision (%u) %s, %s", + hval, name, + (char *) ah->addr + GET (namehashtab[idx].name_offset)); + } + + /* Remember the first place we can insert the new entry. */ + if (GET (namehashtab[idx].locrec_offset) == 0 && insert_idx == -1) + insert_idx = idx; + + idx += incr; + if (idx >= GET (head->namehash_size)) + idx -= GET (head->namehash_size); + } + + /* Add as early as possible. */ + if (insert_idx != -1) + idx = insert_idx; + + SET (namehashtab[idx].hashval, hval); /* no-op if replacing an old entry. */ + return &namehashtab[idx]; +} + +static void +add_alias (struct locarhandle *ah, const char *alias, bool replace, + const char *oldname, uint32_t *locrec_offset_p) +{ + uint32_t locrec_offset = *locrec_offset_p; + struct locarhead *head = ah->addr; + const size_t name_len = strlen (alias); + struct namehashent *namehashent = insert_name (ah, alias, strlen (alias), + replace); + if (namehashent == NULL && ! replace) + return; + + if (GET (namehashent->name_offset) == 0) + { + /* We are adding a new hash entry for this alias. + Determine whether we have to resize the file. */ + if (GET (head->string_used) + name_len + 1 > GET (head->string_size) + || (100 * GET (head->namehash_used) + > 75 * GET (head->namehash_size))) + { + /* The current archive is not large enough. */ + enlarge_archive (ah, head); + + /* The locrecent might have moved, so we have to look up + the old name afresh. */ + namehashent = insert_name (ah, oldname, strlen (oldname), true); + assert (GET (namehashent->name_offset) != 0); + assert (GET (namehashent->locrec_offset) != 0); + *locrec_offset_p = GET (namehashent->locrec_offset); + + /* Tail call to try the whole thing again. */ + add_alias (ah, alias, replace, oldname, locrec_offset_p); + return; + } + + /* Add the name string. */ + memcpy (ah->addr + GET (head->string_offset) + GET (head->string_used), + alias, name_len + 1); + SET (namehashent->name_offset, + GET (head->string_offset) + GET (head->string_used)); + INC (head->string_used, name_len + 1); + + INC (head->namehash_used, 1); + } + + if (GET (namehashent->locrec_offset) != 0) + { + /* Replacing an existing entry. + Mark that we are no longer using the old locrecent. */ + struct locrecent *locrecent + = (struct locrecent *) ((char *) ah->addr + + GET (namehashent->locrec_offset)); + INC (locrecent->refs, -1); + } + + /* Point this entry at the locrecent installed for the main name. */ + SET (namehashent->locrec_offset, locrec_offset); +} + +static int /* qsort comparator used below */ +cmpcategorysize (const void *a, const void *b) +{ + if (*(const void **) a == NULL) + return 1; + if (*(const void **) b == NULL) + return -1; + return ((*(const struct locale_category_data **) a)->size + - (*(const struct locale_category_data **) b)->size); +} + +/* Check the content of the archive for duplicates. Add the content + of the files if necessary. Returns the locrec_offset. */ +static uint32_t +add_locale (struct locarhandle *ah, + const char *name, locale_data_t data, bool replace) +{ + /* First look for the name. If it already exists and we are not + supposed to replace it don't do anything. If it does not exist + we have to allocate a new locale record. */ + size_t name_len = strlen (name); + uint32_t file_offsets[__LC_LAST]; + unsigned int num_new_offsets = 0; + struct sumhashent *sumhashtab; + uint32_t hval; + unsigned int cnt, idx; + struct locarhead *head; + struct namehashent *namehashent; + unsigned int incr; + struct locrecent *locrecent; + off64_t lastoffset; + char *ptr; + struct locale_category_data *size_order[__LC_LAST]; + /* Page size alignment is a minor optimization for locality; use a + common value here rather than making the localedef output depend + on the page size of the system on which localedef is run. See + <https://sourceware.org/glibc/wiki/Development_Todo/Master#Locale_archive_alignment> + for more discussion. */ + const size_t pagesz = 4096; + int small_mask; + + head = ah->addr; + sumhashtab = (struct sumhashent *) ((char *) ah->addr + + GET (head->sumhash_offset)); + + memset (file_offsets, 0, sizeof (file_offsets)); + + size_order[LC_ALL] = NULL; + for (cnt = 0; cnt < __LC_LAST; ++cnt) + if (cnt != LC_ALL) + size_order[cnt] = &data[cnt]; + + /* Sort the array in ascending order of data size. */ + qsort (size_order, __LC_LAST, sizeof size_order[0], cmpcategorysize); + + small_mask = 0; + data[LC_ALL].size = 0; + for (cnt = 0; cnt < __LC_LAST; ++cnt) + if (size_order[cnt] != NULL) + { + const size_t rounded_size = (size_order[cnt]->size + 15) & -16; + if (data[LC_ALL].size + rounded_size > 2 * pagesz) + { + /* This category makes the small-categories block + stop being small, so this is the end of the road. */ + do + size_order[cnt++] = NULL; + while (cnt < __LC_LAST); + break; + } + data[LC_ALL].size += rounded_size; + small_mask |= 1 << (size_order[cnt] - data); + } + + /* Copy the data for all the small categories into the LC_ALL + pseudo-category. */ + + data[LC_ALL].addr = alloca (data[LC_ALL].size); + memset (data[LC_ALL].addr, 0, data[LC_ALL].size); + + ptr = data[LC_ALL].addr; + for (cnt = 0; cnt < __LC_LAST; ++cnt) + if (small_mask & (1 << cnt)) + { + memcpy (ptr, data[cnt].addr, data[cnt].size); + ptr += (data[cnt].size + 15) & -16; + } + __md5_buffer (data[LC_ALL].addr, data[LC_ALL].size, data[LC_ALL].sum); + + /* For each locale category data set determine whether the same data + is already somewhere in the archive. */ + for (cnt = 0; cnt < __LC_LAST; ++cnt) + if (small_mask == 0 ? cnt != LC_ALL : !(small_mask & (1 << cnt))) + { + ++num_new_offsets; + + /* Compute the hash value of the checksum to determine a + starting point for the search in the MD5 hash value + table. */ + hval = archive_hashval (data[cnt].sum, 16); + + idx = hval % GET (head->sumhash_size); + incr = 1 + hval % (GET (head->sumhash_size) - 2); + + while (GET (sumhashtab[idx].file_offset) != 0) + { + if (memcmp (data[cnt].sum, sumhashtab[idx].sum, 16) == 0) + { + /* Check the content, there could be a collision of + the hash sum. + + Unfortunately the sumhashent record does not include + the size of the stored data. So we have to search for + it. */ + locrecent + = (struct locrecent *) ((char *) ah->addr + + GET (head->locrectab_offset)); + size_t iloc; + for (iloc = 0; iloc < GET (head->locrectab_used); ++iloc) + if (GET (locrecent[iloc].refs) != 0 + && (GET (locrecent[iloc].record[cnt].offset) + == GET (sumhashtab[idx].file_offset))) + break; + + if (iloc != GET (head->locrectab_used) + && data[cnt].size == GET (locrecent[iloc].record[cnt].len) + /* We have to compare the content. Either we can + have the data mmaped or we have to read from + the file. */ + && (file_data_available_p + (ah, GET (sumhashtab[idx].file_offset), + data[cnt].size) + ? memcmp (data[cnt].addr, + (char *) ah->addr + + GET (sumhashtab[idx].file_offset), + data[cnt].size) == 0 + : compare_from_file (ah, data[cnt].addr, + GET (sumhashtab[idx].file_offset), + data[cnt].size) == 0)) + { + /* Found it. */ + file_offsets[cnt] = GET (sumhashtab[idx].file_offset); + --num_new_offsets; + break; + } + } + + idx += incr; + if (idx >= GET (head->sumhash_size)) + idx -= GET (head->sumhash_size); + } + } + + /* Find a slot for the locale name in the hash table. */ + namehashent = insert_name (ah, name, name_len, replace); + if (namehashent == NULL) /* Already exists and !REPLACE. */ + return 0; + + /* Determine whether we have to resize the file. */ + if ((100 * (GET (head->sumhash_used) + num_new_offsets) + > 75 * GET (head->sumhash_size)) + || (GET (namehashent->locrec_offset) == 0 + && (GET (head->locrectab_used) == GET (head->locrectab_size) + || (GET (head->string_used) + name_len + 1 + > GET (head->string_size)) + || (100 * GET (head->namehash_used) + > 75 * GET (head->namehash_size))))) + { + /* The current archive is not large enough. */ + enlarge_archive (ah, head); + return add_locale (ah, name, data, replace); + } + + /* Add the locale data which is not yet in the archive. */ + for (cnt = 0, lastoffset = 0; cnt < __LC_LAST; ++cnt) + if ((small_mask == 0 ? cnt != LC_ALL : !(small_mask & (1 << cnt))) + && file_offsets[cnt] == 0) + { + /* The data for this section is not yet available in the + archive. Append it. */ + off64_t lastpos; + uint32_t md5hval; + + lastpos = lseek64 (ah->fd, 0, SEEK_END); + if (lastpos == (off64_t) -1) + error (EXIT_FAILURE, errno, _("cannot add to locale archive")); + + /* If block of small categories would cross page boundary, + align it unless it immediately follows a large category. */ + if (cnt == LC_ALL && lastoffset != lastpos + && ((((lastpos & (pagesz - 1)) + data[cnt].size + pagesz - 1) + & -pagesz) + > ((data[cnt].size + pagesz - 1) & -pagesz))) + { + size_t sz = pagesz - (lastpos & (pagesz - 1)); + char *zeros = alloca (sz); + + memset (zeros, 0, sz); + if (TEMP_FAILURE_RETRY (write (ah->fd, zeros, sz) != sz)) + error (EXIT_FAILURE, errno, + _("cannot add to locale archive")); + + lastpos += sz; + } + + /* Align all data to a 16 byte boundary. */ + if ((lastpos & 15) != 0) + { + static const char zeros[15] = { 0, }; + + if (TEMP_FAILURE_RETRY (write (ah->fd, zeros, 16 - (lastpos & 15))) + != 16 - (lastpos & 15)) + error (EXIT_FAILURE, errno, _("cannot add to locale archive")); + + lastpos += 16 - (lastpos & 15); + } + + /* Remember the position. */ + file_offsets[cnt] = lastpos; + lastoffset = lastpos + data[cnt].size; + + /* Write the data. */ + if (TEMP_FAILURE_RETRY (write (ah->fd, data[cnt].addr, data[cnt].size)) + != data[cnt].size) + error (EXIT_FAILURE, errno, _("cannot add to locale archive")); + + /* Add the hash value to the hash table. */ + md5hval = archive_hashval (data[cnt].sum, 16); + + idx = md5hval % GET (head->sumhash_size); + incr = 1 + md5hval % (GET (head->sumhash_size) - 2); + + while (GET (sumhashtab[idx].file_offset) != 0) + { + idx += incr; + if (idx >= GET (head->sumhash_size)) + idx -= GET (head->sumhash_size); + } + + memcpy (sumhashtab[idx].sum, data[cnt].sum, 16); + SET (sumhashtab[idx].file_offset, file_offsets[cnt]); + + INC (head->sumhash_used, 1); + } + + lastoffset = file_offsets[LC_ALL]; + for (cnt = 0; cnt < __LC_LAST; ++cnt) + if (small_mask & (1 << cnt)) + { + file_offsets[cnt] = lastoffset; + lastoffset += (data[cnt].size + 15) & -16; + } + + if (GET (namehashent->name_offset) == 0) + { + /* Add the name string. */ + memcpy ((char *) ah->addr + GET (head->string_offset) + + GET (head->string_used), + name, name_len + 1); + SET (namehashent->name_offset, + GET (head->string_offset) + GET (head->string_used)); + INC (head->string_used, name_len + 1); + INC (head->namehash_used, 1); + } + + if (GET (namehashent->locrec_offset == 0)) + { + /* Allocate a name location record. */ + SET (namehashent->locrec_offset, (GET (head->locrectab_offset) + + (GET (head->locrectab_used) + * sizeof (struct locrecent)))); + INC (head->locrectab_used, 1); + locrecent = (struct locrecent *) ((char *) ah->addr + + GET (namehashent->locrec_offset)); + SET (locrecent->refs, 1); + } + else + { + /* If there are other aliases pointing to this locrecent, + we still need a new one. If not, reuse the old one. */ + + locrecent = (struct locrecent *) ((char *) ah->addr + + GET (namehashent->locrec_offset)); + if (GET (locrecent->refs) > 1) + { + INC (locrecent->refs, -1); + SET (namehashent->locrec_offset, (GET (head->locrectab_offset) + + (GET (head->locrectab_used) + * sizeof (struct locrecent)))); + INC (head->locrectab_used, 1); + locrecent + = (struct locrecent *) ((char *) ah->addr + + GET (namehashent->locrec_offset)); + SET (locrecent->refs, 1); + } + } + + /* Fill in the table with the locations of the locale data. */ + for (cnt = 0; cnt < __LC_LAST; ++cnt) + { + SET (locrecent->record[cnt].offset, file_offsets[cnt]); + SET (locrecent->record[cnt].len, data[cnt].size); + } + + return GET (namehashent->locrec_offset); +} + + +/* Check the content of the archive for duplicates. Add the content + of the files if necessary. Add all the names, possibly overwriting + old files. */ +int +add_locale_to_archive (struct locarhandle *ah, const char *name, + locale_data_t data, bool replace) +{ + char *normalized_name = NULL; + uint32_t locrec_offset; + + /* First analyze the name to decide how to archive it. */ + const char *language; + const char *modifier; + const char *territory; + const char *codeset; + const char *normalized_codeset; + int mask = _nl_explode_name (strdupa (name), + &language, &modifier, &territory, + &codeset, &normalized_codeset); + if (mask == -1) + return -1; + + if (mask & XPG_NORM_CODESET) + /* This name contains a codeset in unnormalized form. + We will store it in the archive with a normalized name. */ + asprintf (&normalized_name, "%s%s%s.%s%s%s", + language, territory == NULL ? "" : "_", territory ?: "", + (mask & XPG_NORM_CODESET) ? normalized_codeset : codeset, + modifier == NULL ? "" : "@", modifier ?: ""); + + /* This call does the main work. */ + locrec_offset = add_locale (ah, normalized_name ?: name, data, replace); + if (locrec_offset == 0) + { + free (normalized_name); + if (mask & XPG_NORM_CODESET) + free ((char *) normalized_codeset); + return -1; + } + + if ((mask & XPG_CODESET) == 0) + { + /* This name lacks a codeset, so determine the locale's codeset and + add an alias for its name with normalized codeset appended. */ + + const struct + { + unsigned int magic; + unsigned int nstrings; + unsigned int strindex[0]; + } *filedata = data[LC_CTYPE].addr; + codeset = (char *) filedata + + maybe_swap_uint32 (filedata->strindex[_NL_ITEM_INDEX + (_NL_CTYPE_CODESET_NAME)]); + char *normalized_codeset_name = NULL; + + normalized_codeset = _nl_normalize_codeset (codeset, strlen (codeset)); + mask |= XPG_NORM_CODESET; + + asprintf (&normalized_codeset_name, "%s%s%s.%s%s%s", + language, territory == NULL ? "" : "_", territory ?: "", + normalized_codeset, + modifier == NULL ? "" : "@", modifier ?: ""); + + add_alias (ah, normalized_codeset_name, replace, + normalized_name ?: name, &locrec_offset); + free (normalized_codeset_name); + } + + /* Now read the locale.alias files looking for lines whose + right hand side matches our name after normalization. */ + int result = 0; + if (alias_file != NULL) + { + FILE *fp; + fp = fopen (alias_file, "rm"); + if (fp == NULL) + error (1, errno, _("locale alias file `%s' not found"), + alias_file); + + /* No threads present. */ + __fsetlocking (fp, FSETLOCKING_BYCALLER); + + while (! feof_unlocked (fp)) + { + /* It is a reasonable approach to use a fix buffer here + because + a) we are only interested in the first two fields + b) these fields must be usable as file names and so must + not be that long */ + char buf[BUFSIZ]; + char *alias; + char *value; + char *cp; + + if (fgets_unlocked (buf, BUFSIZ, fp) == NULL) + /* EOF reached. */ + break; + + cp = buf; + /* Ignore leading white space. */ + while (isspace (cp[0]) && cp[0] != '\n') + ++cp; + + /* A leading '#' signals a comment line. */ + if (cp[0] != '\0' && cp[0] != '#' && cp[0] != '\n') + { + alias = cp++; + while (cp[0] != '\0' && !isspace (cp[0])) + ++cp; + /* Terminate alias name. */ + if (cp[0] != '\0') + *cp++ = '\0'; + + /* Now look for the beginning of the value. */ + while (isspace (cp[0])) + ++cp; + + if (cp[0] != '\0') + { + value = cp++; + while (cp[0] != '\0' && !isspace (cp[0])) + ++cp; + /* Terminate value. */ + if (cp[0] == '\n') + { + /* This has to be done to make the following + test for the end of line possible. We are + looking for the terminating '\n' which do not + overwrite here. */ + *cp++ = '\0'; + *cp = '\n'; + } + else if (cp[0] != '\0') + *cp++ = '\0'; + + /* Does this alias refer to our locale? We will + normalize the right hand side and compare the + elements of the normalized form. */ + { + const char *rhs_language; + const char *rhs_modifier; + const char *rhs_territory; + const char *rhs_codeset; + const char *rhs_normalized_codeset; + int rhs_mask = _nl_explode_name (value, + &rhs_language, + &rhs_modifier, + &rhs_territory, + &rhs_codeset, + &rhs_normalized_codeset); + if (rhs_mask == -1) + { + result = -1; + goto out; + } + if (!strcmp (language, rhs_language) + && ((rhs_mask & XPG_CODESET) + /* He has a codeset, it must match normalized. */ + ? !strcmp ((mask & XPG_NORM_CODESET) + ? normalized_codeset : codeset, + (rhs_mask & XPG_NORM_CODESET) + ? rhs_normalized_codeset : rhs_codeset) + /* He has no codeset, we must also have none. */ + : (mask & XPG_CODESET) == 0) + /* Codeset (or lack thereof) matches. */ + && !strcmp (territory ?: "", rhs_territory ?: "") + && !strcmp (modifier ?: "", rhs_modifier ?: "")) + /* We have a winner. */ + add_alias (ah, alias, replace, + normalized_name ?: name, &locrec_offset); + if (rhs_mask & XPG_NORM_CODESET) + free ((char *) rhs_normalized_codeset); + } + } + } + + /* Possibly not the whole line fits into the buffer. + Ignore the rest of the line. */ + while (strchr (cp, '\n') == NULL) + { + cp = buf; + if (fgets_unlocked (buf, BUFSIZ, fp) == NULL) + /* Make sure the inner loop will be left. The outer + loop will exit at the `feof' test. */ + *cp = '\n'; + } + } + + out: + fclose (fp); + } + + free (normalized_name); + + if (mask & XPG_NORM_CODESET) + free ((char *) normalized_codeset); + + return result; +} + + +int +add_locales_to_archive (size_t nlist, char *list[], bool replace) +{ + struct locarhandle ah; + int result = 0; + + /* Open the archive. This call never returns if we cannot + successfully open the archive. */ + ah.fname = NULL; + open_archive (&ah, false); + + while (nlist-- > 0) + { + const char *fname = *list++; + size_t fnamelen = strlen (fname); + struct stat64 st; + DIR *dirp; + struct dirent64 *d; + int seen; + locale_data_t data; + int cnt; + + if (! be_quiet) + printf (_("Adding %s\n"), fname); + + /* First see whether this really is a directory and whether it + contains all the require locale category files. */ + if (stat64 (fname, &st) < 0) + { + error (0, 0, _("stat of \"%s\" failed: %s: ignored"), fname, + strerror (errno)); + continue; + } + if (!S_ISDIR (st.st_mode)) + { + error (0, 0, _("\"%s\" is no directory; ignored"), fname); + continue; + } + + dirp = opendir (fname); + if (dirp == NULL) + { + error (0, 0, _("cannot open directory \"%s\": %s: ignored"), + fname, strerror (errno)); + continue; + } + + seen = 0; + while ((d = readdir64 (dirp)) != NULL) + { + for (cnt = 0; cnt < __LC_LAST; ++cnt) + if (cnt != LC_ALL) + if (strcmp (d->d_name, locnames[cnt]) == 0) + { + unsigned char d_type; + + /* We have an object of the required name. If it's + a directory we have to look at a file with the + prefix "SYS_". Otherwise we have found what we + are looking for. */ +#ifdef _DIRENT_HAVE_D_TYPE + d_type = d->d_type; + + if (d_type != DT_REG) +#endif + { + char fullname[fnamelen + 2 * strlen (d->d_name) + 7]; + +#ifdef _DIRENT_HAVE_D_TYPE + if (d_type == DT_UNKNOWN) +#endif + { + strcpy (stpcpy (stpcpy (fullname, fname), "/"), + d->d_name); + + if (stat64 (fullname, &st) == -1) + /* We cannot stat the file, ignore it. */ + break; + + d_type = IFTODT (st.st_mode); + } + + if (d_type == DT_DIR) + { + /* We have to do more tests. The file is a + directory and it therefore must contain a + regular file with the same name except a + "SYS_" prefix. */ + char *t = stpcpy (stpcpy (fullname, fname), "/"); + strcpy (stpcpy (stpcpy (t, d->d_name), "/SYS_"), + d->d_name); + + if (stat64 (fullname, &st) == -1) + /* There is no SYS_* file or we cannot + access it. */ + break; + + d_type = IFTODT (st.st_mode); + } + } + + /* If we found a regular file (eventually after + following a symlink) we are successful. */ + if (d_type == DT_REG) + ++seen; + break; + } + } + + closedir (dirp); + + if (seen != __LC_LAST - 1) + { + /* We don't have all locale category files. Ignore the name. */ + error (0, 0, _("incomplete set of locale files in \"%s\""), + fname); + continue; + } + + /* Add the files to the archive. To do this we first compute + sizes and the MD5 sums of all the files. */ + for (cnt = 0; cnt < __LC_LAST; ++cnt) + if (cnt != LC_ALL) + { + char fullname[fnamelen + 2 * strlen (locnames[cnt]) + 7]; + int fd; + + strcpy (stpcpy (stpcpy (fullname, fname), "/"), locnames[cnt]); + fd = open64 (fullname, O_RDONLY); + if (fd == -1 || fstat64 (fd, &st) == -1) + { + /* Cannot read the file. */ + if (fd != -1) + close (fd); + break; + } + + if (S_ISDIR (st.st_mode)) + { + char *t; + close (fd); + t = stpcpy (stpcpy (fullname, fname), "/"); + strcpy (stpcpy (stpcpy (t, locnames[cnt]), "/SYS_"), + locnames[cnt]); + + fd = open64 (fullname, O_RDONLY); + if (fd == -1 || fstat64 (fd, &st) == -1 + || !S_ISREG (st.st_mode)) + { + if (fd != -1) + close (fd); + break; + } + } + + /* Map the file. */ + data[cnt].addr = mmap64 (NULL, st.st_size, PROT_READ, MAP_SHARED, + fd, 0); + if (data[cnt].addr == MAP_FAILED) + { + /* Cannot map it. */ + close (fd); + break; + } + + data[cnt].size = st.st_size; + __md5_buffer (data[cnt].addr, st.st_size, data[cnt].sum); + + /* We don't need the file descriptor anymore. */ + close (fd); + } + + if (cnt != __LC_LAST) + { + while (cnt-- > 0) + if (cnt != LC_ALL) + munmap (data[cnt].addr, data[cnt].size); + + error (0, 0, _("cannot read all files in \"%s\": ignored"), fname); + + continue; + } + + result |= add_locale_to_archive (&ah, basename (fname), data, replace); + + for (cnt = 0; cnt < __LC_LAST; ++cnt) + if (cnt != LC_ALL) + munmap (data[cnt].addr, data[cnt].size); + } + + /* We are done. */ + close_archive (&ah); + + return result; +} + + +int +delete_locales_from_archive (size_t nlist, char *list[]) +{ + struct locarhandle ah; + struct locarhead *head; + struct namehashent *namehashtab; + + /* Open the archive. This call never returns if we cannot + successfully open the archive. */ + ah.fname = NULL; + open_archive (&ah, false); + + head = ah.addr; + namehashtab = (struct namehashent *) ((char *) ah.addr + + GET (head->namehash_offset)); + + while (nlist-- > 0) + { + const char *locname = *list++; + uint32_t hval; + unsigned int idx; + unsigned int incr; + + /* Search for this locale in the archive. */ + hval = archive_hashval (locname, strlen (locname)); + + idx = hval % GET (head->namehash_size); + incr = 1 + hval % (GET (head->namehash_size) - 2); + + /* If the name_offset field is zero this means this is no + deleted entry and therefore no entry can be found. */ + while (GET (namehashtab[idx].name_offset) != 0) + { + if (GET (namehashtab[idx].hashval) == hval + && (strcmp (locname, + ((char *) ah.addr + + GET (namehashtab[idx].name_offset))) + == 0)) + { + /* Found the entry. Now mark it as removed by zero-ing + the reference to the locale record. */ + SET (namehashtab[idx].locrec_offset, 0); + break; + } + + idx += incr; + if (idx >= GET (head->namehash_size)) + idx -= GET (head->namehash_size); + } + + if (GET (namehashtab[idx].name_offset) == 0 && ! be_quiet) + error (0, 0, _("locale \"%s\" not in archive"), locname); + } + + close_archive (&ah); + + return 0; +} + + +struct nameent +{ + char *name; + uint32_t locrec_offset; +}; + + +struct dataent +{ + const unsigned char *sum; + uint32_t file_offset; + uint32_t nlink; +}; + + +static int +nameentcmp (const void *a, const void *b) +{ + return strcmp (((const struct nameent *) a)->name, + ((const struct nameent *) b)->name); +} + + +static int +dataentcmp (const void *a, const void *b) +{ + if (((const struct dataent *) a)->file_offset + < ((const struct dataent *) b)->file_offset) + return -1; + + if (((const struct dataent *) a)->file_offset + > ((const struct dataent *) b)->file_offset) + return 1; + + return 0; +} + + +void +show_archive_content (const char *fname, int verbose) +{ + struct locarhandle ah; + struct locarhead *head; + struct namehashent *namehashtab; + struct nameent *names; + size_t cnt, used; + + /* Open the archive. This call never returns if we cannot + successfully open the archive. */ + ah.fname = fname; + open_archive (&ah, true); + + head = ah.addr; + + names = (struct nameent *) xmalloc (GET (head->namehash_used) + * sizeof (struct nameent)); + + namehashtab = (struct namehashent *) ((char *) ah.addr + + GET (head->namehash_offset)); + for (cnt = used = 0; cnt < GET (head->namehash_size); ++cnt) + if (GET (namehashtab[cnt].locrec_offset) != 0) + { + assert (used < GET (head->namehash_used)); + names[used].name = ah.addr + GET (namehashtab[cnt].name_offset); + names[used++].locrec_offset = GET (namehashtab[cnt].locrec_offset); + } + + /* Sort the names. */ + qsort (names, used, sizeof (struct nameent), nameentcmp); + + if (verbose) + { + struct dataent *files; + struct sumhashent *sumhashtab; + int sumused; + + files = (struct dataent *) xmalloc (GET (head->sumhash_used) + * sizeof (struct dataent)); + + sumhashtab = (struct sumhashent *) ((char *) ah.addr + + GET (head->sumhash_offset)); + for (cnt = sumused = 0; cnt < GET (head->sumhash_size); ++cnt) + if (GET (sumhashtab[cnt].file_offset) != 0) + { + assert (sumused < GET (head->sumhash_used)); + files[sumused].sum = (const unsigned char *) sumhashtab[cnt].sum; + files[sumused].file_offset = GET (sumhashtab[cnt].file_offset); + files[sumused++].nlink = 0; + } + + /* Sort by file locations. */ + qsort (files, sumused, sizeof (struct dataent), dataentcmp); + + /* Compute nlink fields. */ + for (cnt = 0; cnt < used; ++cnt) + { + struct locrecent *locrec; + int idx; + + locrec = (struct locrecent *) ((char *) ah.addr + + names[cnt].locrec_offset); + for (idx = 0; idx < __LC_LAST; ++idx) + if (GET (locrec->record[LC_ALL].offset) != 0 + ? (idx == LC_ALL + || (GET (locrec->record[idx].offset) + < GET (locrec->record[LC_ALL].offset)) + || ((GET (locrec->record[idx].offset) + + GET (locrec->record[idx].len)) + > (GET (locrec->record[LC_ALL].offset) + + GET (locrec->record[LC_ALL].len)))) + : idx != LC_ALL) + { + struct dataent *data, dataent; + + dataent.file_offset = GET (locrec->record[idx].offset); + data = (struct dataent *) bsearch (&dataent, files, sumused, + sizeof (struct dataent), + dataentcmp); + assert (data != NULL); + ++data->nlink; + } + } + + /* Print it. */ + for (cnt = 0; cnt < used; ++cnt) + { + struct locrecent *locrec; + int idx, i; + + locrec = (struct locrecent *) ((char *) ah.addr + + names[cnt].locrec_offset); + for (idx = 0; idx < __LC_LAST; ++idx) + if (idx != LC_ALL) + { + struct dataent *data, dataent; + + dataent.file_offset = GET (locrec->record[idx].offset); + if (GET (locrec->record[LC_ALL].offset) != 0 + && (dataent.file_offset + >= GET (locrec->record[LC_ALL].offset)) + && (dataent.file_offset + GET (locrec->record[idx].len) + <= (GET (locrec->record[LC_ALL].offset) + + GET (locrec->record[LC_ALL].len)))) + dataent.file_offset = GET (locrec->record[LC_ALL].offset); + + data = (struct dataent *) bsearch (&dataent, files, sumused, + sizeof (struct dataent), + dataentcmp); + printf ("%6d %7x %3d%c ", + GET (locrec->record[idx].len), + GET (locrec->record[idx].offset), + data->nlink, + (dataent.file_offset + == GET (locrec->record[LC_ALL].offset)) + ? '+' : ' '); + for (i = 0; i < 16; i += 4) + printf ("%02x%02x%02x%02x", + data->sum[i], data->sum[i + 1], + data->sum[i + 2], data->sum[i + 3]); + printf (" %s/%s\n", names[cnt].name, + idx == LC_MESSAGES ? "LC_MESSAGES/SYS_LC_MESSAGES" + : locnames[idx]); + } + } + } + else + for (cnt = 0; cnt < used; ++cnt) + puts (names[cnt].name); + + close_archive (&ah); + + exit (EXIT_SUCCESS); +} diff --git a/REORG.TODO/locale/programs/locfile-kw.gperf b/REORG.TODO/locale/programs/locfile-kw.gperf new file mode 100644 index 0000000000..3605d15c8e --- /dev/null +++ b/REORG.TODO/locale/programs/locfile-kw.gperf @@ -0,0 +1,201 @@ +%{ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@gnu.org>, 1996. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +#include "locfile-token.h" +%} +struct keyword_t ; +%% +escape_char, tok_escape_char, 0 +comment_char, tok_comment_char, 0 +repertoiremap, tok_repertoiremap, 0 +include, tok_include, 0 +LC_CTYPE, tok_lc_ctype, 0 +END, tok_end, 0 +copy, tok_copy, 0 +upper, tok_upper, 0 +lower, tok_lower, 0 +alpha, tok_alpha, 0 +digit, tok_digit, 0 +outdigit, tok_outdigit, 0 +alnum, tok_alnum, 0 +space, tok_space, 0 +cntrl, tok_cntrl, 0 +punct, tok_punct, 0 +graph, tok_graph, 0 +print, tok_print, 0 +xdigit, tok_xdigit, 0 +blank, tok_blank, 0 +charclass, tok_charclass, 0 +class, tok_class, 0 +charconv, tok_charconv, 0 +toupper, tok_toupper, 0 +tolower, tok_tolower, 0 +map, tok_map, 0 +translit_start, tok_translit_start, 0 +translit_end, tok_translit_end, 0 +translit_ignore, tok_translit_ignore, 0 +default_missing, tok_default_missing, 0 +LC_COLLATE, tok_lc_collate, 0 +coll_weight_max, tok_coll_weight_max, 0 +section-symbol, tok_section_symbol, 0 +collating-element, tok_collating_element, 0 +collating-symbol, tok_collating_symbol, 0 +symbol-equivalence, tok_symbol_equivalence, 0 +script, tok_script, 0 +order_start, tok_order_start, 0 +order_end, tok_order_end, 0 +from, tok_from, 0 +forward, tok_forward, 0 +backward, tok_backward, 0 +position, tok_position, 0 +UNDEFINED, tok_undefined, 0 +IGNORE, tok_ignore, 0 +reorder-after, tok_reorder_after, 0 +reorder-end, tok_reorder_end, 0 +reorder-sections-after, tok_reorder_sections_after, 0 +reorder-sections-end, tok_reorder_sections_end, 0 +define, tok_define, 0 +undef, tok_undef, 0 +ifdef, tok_ifdef, 0 +else, tok_else, 0 +elifdef, tok_elifdef, 0 +elifndef, tok_elifndef, 0 +endif, tok_endif, 0 +LC_MONETARY, tok_lc_monetary, 0 +int_curr_symbol, tok_int_curr_symbol, 0 +currency_symbol, tok_currency_symbol, 0 +mon_decimal_point, tok_mon_decimal_point, 0 +mon_thousands_sep, tok_mon_thousands_sep, 0 +mon_grouping, tok_mon_grouping, 0 +positive_sign, tok_positive_sign, 0 +negative_sign, tok_negative_sign, 0 +int_frac_digits, tok_int_frac_digits, 0 +frac_digits, tok_frac_digits, 0 +p_cs_precedes, tok_p_cs_precedes, 0 +p_sep_by_space, tok_p_sep_by_space, 0 +n_cs_precedes, tok_n_cs_precedes, 0 +n_sep_by_space, tok_n_sep_by_space, 0 +p_sign_posn, tok_p_sign_posn, 0 +n_sign_posn, tok_n_sign_posn, 0 +int_p_cs_precedes, tok_int_p_cs_precedes, 0 +int_p_sep_by_space, tok_int_p_sep_by_space, 0 +int_n_cs_precedes, tok_int_n_cs_precedes, 0 +int_n_sep_by_space, tok_int_n_sep_by_space, 0 +int_p_sign_posn, tok_int_p_sign_posn, 0 +int_n_sign_posn, tok_int_n_sign_posn, 0 +duo_int_curr_symbol, tok_duo_int_curr_symbol, 0 +duo_currency_symbol, tok_duo_currency_symbol, 0 +duo_int_frac_digits, tok_duo_int_frac_digits, 0 +duo_frac_digits, tok_duo_frac_digits, 0 +duo_p_cs_precedes, tok_duo_p_cs_precedes, 0 +duo_p_sep_by_space, tok_duo_p_sep_by_space, 0 +duo_n_cs_precedes, tok_duo_n_cs_precedes, 0 +duo_n_sep_by_space, tok_duo_n_sep_by_space, 0 +duo_int_p_cs_precedes, tok_duo_int_p_cs_precedes, 0 +duo_int_p_sep_by_space, tok_duo_int_p_sep_by_space, 0 +duo_int_n_cs_precedes, tok_duo_int_n_cs_precedes, 0 +duo_int_n_sep_by_space, tok_duo_int_n_sep_by_space, 0 +duo_p_sign_posn, tok_duo_p_sign_posn, 0 +duo_n_sign_posn, tok_duo_n_sign_posn, 0 +duo_int_p_sign_posn, tok_duo_int_p_sign_posn, 0 +duo_int_n_sign_posn, tok_duo_int_n_sign_posn, 0 +uno_valid_from, tok_uno_valid_from, 0 +uno_valid_to, tok_uno_valid_to, 0 +duo_valid_from, tok_duo_valid_from, 0 +duo_valid_to, tok_duo_valid_to, 0 +conversion_rate, tok_conversion_rate, 0 +LC_NUMERIC, tok_lc_numeric, 0 +decimal_point, tok_decimal_point, 0 +thousands_sep, tok_thousands_sep, 0 +grouping, tok_grouping, 0 +LC_TIME, tok_lc_time, 0 +abday, tok_abday, 0 +day, tok_day, 0 +week, tok_week, 0 +abmon, tok_abmon, 0 +mon, tok_mon, 0 +d_t_fmt, tok_d_t_fmt, 0 +d_fmt, tok_d_fmt, 0 +t_fmt, tok_t_fmt, 0 +am_pm, tok_am_pm, 0 +t_fmt_ampm, tok_t_fmt_ampm, 0 +era, tok_era, 0 +era_year, tok_era_year, 0 +era_d_fmt, tok_era_d_fmt, 0 +era_d_t_fmt, tok_era_d_t_fmt, 0 +era_t_fmt, tok_era_t_fmt, 0 +alt_digits, tok_alt_digits, 0 +first_weekday, tok_first_weekday, 0 +first_workday, tok_first_workday, 0 +cal_direction, tok_cal_direction, 0 +timezone, tok_timezone, 0 +date_fmt, tok_date_fmt, 0 +LC_MESSAGES, tok_lc_messages, 0 +yesexpr, tok_yesexpr, 0 +noexpr, tok_noexpr, 0 +yesstr, tok_yesstr, 0 +nostr, tok_nostr, 0 +LC_PAPER, tok_lc_paper, 0 +height, tok_height, 0 +width, tok_width, 0 +LC_NAME, tok_lc_name, 0 +name_fmt, tok_name_fmt, 0 +name_gen, tok_name_gen, 0 +name_mr, tok_name_mr, 0 +name_mrs, tok_name_mrs, 0 +name_miss, tok_name_miss, 0 +name_ms, tok_name_ms, 0 +LC_ADDRESS, tok_lc_address, 0 +postal_fmt, tok_postal_fmt, 0 +country_name, tok_country_name, 0 +country_post, tok_country_post, 0 +country_ab2, tok_country_ab2, 0 +country_ab3, tok_country_ab3, 0 +country_num, tok_country_num, 0 +country_car, tok_country_car, 0 +country_isbn, tok_country_isbn, 0 +lang_name, tok_lang_name, 0 +lang_ab, tok_lang_ab, 0 +lang_term, tok_lang_term, 0 +lang_lib, tok_lang_lib, 0 +LC_TELEPHONE, tok_lc_telephone, 0 +tel_int_fmt, tok_tel_int_fmt, 0 +tel_dom_fmt, tok_tel_dom_fmt, 0 +int_select, tok_int_select, 0 +int_prefix, tok_int_prefix, 0 +LC_MEASUREMENT, tok_lc_measurement, 0 +measurement, tok_measurement, 0 +LC_IDENTIFICATION, tok_lc_identification, 0 +title, tok_title, 0 +source, tok_source, 0 +address, tok_address, 0 +contact, tok_contact, 0 +email, tok_email, 0 +tel, tok_tel, 0 +fax, tok_fax, 0 +language, tok_language, 0 +territory, tok_territory, 0 +audience, tok_audience, 0 +application, tok_application, 0 +abbreviation, tok_abbreviation, 0 +revision, tok_revision, 0 +date, tok_date, 0 +category, tok_category, 0 diff --git a/REORG.TODO/locale/programs/locfile-kw.h b/REORG.TODO/locale/programs/locfile-kw.h new file mode 100644 index 0000000000..1cdca1941b --- /dev/null +++ b/REORG.TODO/locale/programs/locfile-kw.h @@ -0,0 +1,621 @@ +/* ANSI-C code produced by gperf version 3.0.4 */ +/* Command-line: gperf -acCgopt -k'1,2,5,9,$' -L ANSI-C -N locfile_hash locfile-kw.gperf */ + +#ifa' == 97) && ('b' == 98) \ + && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \ + && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \ + && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \ + && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \ + && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \ + && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \ + && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126)) +/* The character set is not based on ISO-646. */ +#error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gnu-gperf@gnu.org>." +#endif + +#line 1 "locfile-kw.gperf" + +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@gnu.org>, 1996. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +#include "locfile-token.h" +#line 24 "locfile-kw.gperf" +struct keyword_t ; + +#define TOTAL_KEYWORDS 176 +#define MIN_WORD_LENGTH 3 +#define MAX_WORD_LENGTH 22 +#define MIN_HASH_VALUE 3 +#define MAX_HASH_VALUE 630 +/* maximum key range = 628, duplicates = 0 */ + +#ifdef __GNUC__ +__inline +#else +#ifdef __cplusplus +inline +#endif +#endif +static unsigned int +hash (register const char *str, register unsigned int len) +{ + static const unsigned short asso_values[] = + { + 631, 631, 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, 631, 631, + 5, 0, 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 5, 631, 0, 0, 0, + 0, 0, 10, 0, 631, 631, 0, 631, 0, 5, + 631, 631, 0, 0, 0, 10, 631, 631, 631, 0, + 631, 631, 631, 631, 631, 0, 631, 145, 80, 25, + 15, 0, 180, 105, 10, 35, 631, 50, 80, 160, + 5, 130, 40, 45, 5, 0, 10, 35, 40, 35, + 5, 10, 0, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631 + }; + register int hval = len; + + switch (hval) + { + default: + hval += asso_values[(unsigned char)str[8]]; + /*FALLTHROUGH*/ + case 8: + case 7: + case 6: + case 5: + hval += asso_values[(unsigned char)str[4]]; + /*FALLTHROUGH*/ + case 4: + case 3: + case 2: + hval += asso_values[(unsigned char)str[1]]; + /*FALLTHROUGH*/ + case 1: + hval += asso_values[(unsigned char)str[0]]; + break; + } + return hval + asso_values[(unsigned char)str[len - 1]]; +} + +#ifdef __GNUC__ +__inline +#if defined __GNUC_STDC_INLINE__ || defined __GNUC_GNU_INLINE__ +__attribute__ ((__gnu_inline__)) +#endif +#endif +const struct keyword_t * +locfile_hash (register const char *str, register unsigned int len) +{ + static const struct keyword_t wordlist[] = + { + {""}, {""}, {""}, +#line 31 "locfile-kw.gperf" + {"END", tok_end, 0}, + {""}, {""}, +#line 70 "locfile-kw.gperf" + {"IGNORE", tok_ignore, 0}, +#line 129 "locfile-kw.gperf" + {"LC_TIME", tok_lc_time, 0}, +#line 30 "locfile-kw.gperf" + {"LC_CTYPE", tok_lc_ctype, 0}, + {""}, +#line 166 "locfile-kw.gperf" + {"LC_ADDRESS", tok_lc_address, 0}, +#line 151 "locfile-kw.gperf" + {"LC_MESSAGES", tok_lc_messages, 0}, +#line 159 "locfile-kw.gperf" + {"LC_NAME", tok_lc_name, 0}, +#line 156 "locfile-kw.gperf" + {"LC_PAPER", tok_lc_paper, 0}, +#line 184 "locfile-kw.gperf" + {"LC_MEASUREMENT", tok_lc_measurement, 0}, +#line 56 "locfile-kw.gperf" + {"LC_COLLATE", tok_lc_collate, 0}, + {""}, +#line 186 "locfile-kw.gperf" + {"LC_IDENTIFICATION", tok_lc_identification, 0}, +#line 199 "locfile-kw.gperf" + {"revision", tok_revision, 0}, +#line 69 "locfile-kw.gperf" + {"UNDEFINED", tok_undefined, 0}, +#line 125 "locfile-kw.gperf" + {"LC_NUMERIC", tok_lc_numeric, 0}, +#line 82 "locfile-kw.gperf" + {"LC_MONETARY", tok_lc_monetary, 0}, +#line 179 "locfile-kw.gperf" + {"LC_TELEPHONE", tok_lc_telephone, 0}, + {""}, {""}, {""}, +#line 75 "locfile-kw.gperf" + {"define", tok_define, 0}, +#line 152 "locfile-kw.gperf" + {"yesexpr", tok_yesexpr, 0}, +#line 141 "locfile-kw.gperf" + {"era_year", tok_era_year, 0}, + {""}, +#line 54 "locfile-kw.gperf" + {"translit_ignore", tok_translit_ignore, 0}, +#line 154 "locfile-kw.gperf" + {"yesstr", tok_yesstr, 0}, + {""}, +#line 89 "locfile-kw.gperf" + {"negative_sign", tok_negative_sign, 0}, + {""}, +#line 137 "locfile-kw.gperf" + {"t_fmt", tok_t_fmt, 0}, +#line 157 "locfile-kw.gperf" + {"height", tok_height, 0}, + {""}, {""}, +#line 52 "locfile-kw.gperf" + {"translit_start", tok_translit_start, 0}, +#line 136 "locfile-kw.gperf" + {"d_fmt", tok_d_fmt, 0}, + {""}, +#line 53 "locfile-kw.gperf" + {"translit_end", tok_translit_end, 0}, +#line 94 "locfile-kw.gperf" + {"n_cs_precedes", tok_n_cs_precedes, 0}, +#line 144 "locfile-kw.gperf" + {"era_t_fmt", tok_era_t_fmt, 0}, +#line 39 "locfile-kw.gperf" + {"space", tok_space, 0}, +#line 72 "locfile-kw.gperf" + {"reorder-end", tok_reorder_end, 0}, +#line 73 "locfile-kw.gperf" + {"reorder-sections-after", tok_reorder_sections_after, 0}, + {""}, +#line 142 "locfile-kw.gperf" + {"era_d_fmt", tok_era_d_fmt, 0}, +#line 187 "locfile-kw.gperf" + {"title", tok_title, 0}, + {""}, {""}, +#line 149 "locfile-kw.gperf" + {"timezone", tok_timezone, 0}, + {""}, +#line 74 "locfile-kw.gperf" + {"reorder-sections-end", tok_reorder_sections_end, 0}, + {""}, {""}, {""}, +#line 95 "locfile-kw.gperf" + {"n_sep_by_space", tok_n_sep_by_space, 0}, + {""}, {""}, +#line 100 "locfile-kw.gperf" + {"int_n_cs_precedes", tok_int_n_cs_precedes, 0}, + {""}, {""}, {""}, +#line 26 "locfile-kw.gperf" + {"escape_char", tok_escape_char, 0}, + {""}, +#line 28 "locfile-kw.gperf" + {"repertoiremap", tok_repertoiremap, 0}, +#line 46 "locfile-kw.gperf" + {"charclass", tok_charclass, 0}, +#line 43 "locfile-kw.gperf" + {"print", tok_print, 0}, +#line 44 "locfile-kw.gperf" + {"xdigit", tok_xdigit, 0}, +#line 110 "locfile-kw.gperf" + {"duo_n_cs_precedes", tok_duo_n_cs_precedes, 0}, +#line 127 "locfile-kw.gperf" + {"thousands_sep", tok_thousands_sep, 0}, +#line 195 "locfile-kw.gperf" + {"territory", tok_territory, 0}, +#line 36 "locfile-kw.gperf" + {"digit", tok_digit, 0}, + {""}, {""}, +#line 92 "locfile-kw.gperf" + {"p_cs_precedes", tok_p_cs_precedes, 0}, + {""}, {""}, +#line 62 "locfile-kw.gperf" + {"script", tok_script, 0}, +#line 29 "locfile-kw.gperf" + {"include", tok_include, 0}, + {""}, +#line 78 "locfile-kw.gperf" + {"else", tok_else, 0}, +#line 182 "locfile-kw.gperf" + {"int_select", tok_int_select, 0}, + {""}, {""}, {""}, +#line 132 "locfile-kw.gperf" + {"week", tok_week, 0}, +#line 33 "locfile-kw.gperf" + {"upper", tok_upper, 0}, + {""}, {""}, +#line 192 "locfile-kw.gperf" + {"tel", tok_tel, 0}, +#line 93 "locfile-kw.gperf" + {"p_sep_by_space", tok_p_sep_by_space, 0}, +#line 158 "locfile-kw.gperf" + {"width", tok_width, 0}, + {""}, +#line 98 "locfile-kw.gperf" + {"int_p_cs_precedes", tok_int_p_cs_precedes, 0}, + {""}, {""}, +#line 41 "locfile-kw.gperf" + {"punct", tok_punct, 0}, + {""}, {""}, +#line 101 "locfile-kw.gperf" + {"int_n_sep_by_space", tok_int_n_sep_by_space, 0}, + {""}, {""}, {""}, +#line 108 "locfile-kw.gperf" + {"duo_p_cs_precedes", tok_duo_p_cs_precedes, 0}, +#line 48 "locfile-kw.gperf" + {"charconv", tok_charconv, 0}, + {""}, +#line 47 "locfile-kw.gperf" + {"class", tok_class, 0}, +#line 114 "locfile-kw.gperf" + {"duo_int_n_cs_precedes", tok_duo_int_n_cs_precedes, 0}, +#line 115 "locfile-kw.gperf" + {"duo_int_n_sep_by_space", tok_duo_int_n_sep_by_space, 0}, +#line 111 "locfile-kw.gperf" + {"duo_n_sep_by_space", tok_duo_n_sep_by_space, 0}, +#line 119 "locfile-kw.gperf" + {"duo_int_n_sign_posn", tok_duo_int_n_sign_posn, 0}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, +#line 58 "locfile-kw.gperf" + {"section-symbol", tok_section_symbol, 0}, +#line 183 "locfile-kw.gperf" + {"int_prefix", tok_int_prefix, 0}, + {""}, {""}, {""}, {""}, +#line 42 "locfile-kw.gperf" + {"graph", tok_graph, 0}, + {""}, {""}, +#line 99 "locfile-kw.gperf" + {"int_p_sep_by_space", tok_int_p_sep_by_space, 0}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 112 "locfile-kw.gperf" + {"duo_int_p_cs_precedes", tok_duo_int_p_cs_precedes, 0}, +#line 113 "locfile-kw.gperf" + {"duo_int_p_sep_by_space", tok_duo_int_p_sep_by_space, 0}, +#line 109 "locfile-kw.gperf" + {"duo_p_sep_by_space", tok_duo_p_sep_by_space, 0}, +#line 118 "locfile-kw.gperf" + {"duo_int_p_sign_posn", tok_duo_int_p_sign_posn, 0}, +#line 155 "locfile-kw.gperf" + {"nostr", tok_nostr, 0}, + {""}, {""}, +#line 140 "locfile-kw.gperf" + {"era", tok_era, 0}, + {""}, +#line 84 "locfile-kw.gperf" + {"currency_symbol", tok_currency_symbol, 0}, + {""}, +#line 165 "locfile-kw.gperf" + {"name_ms", tok_name_ms, 0}, +#line 163 "locfile-kw.gperf" + {"name_mrs", tok_name_mrs, 0}, +#line 164 "locfile-kw.gperf" + {"name_miss", tok_name_miss, 0}, +#line 83 "locfile-kw.gperf" + {"int_curr_symbol", tok_int_curr_symbol, 0}, +#line 188 "locfile-kw.gperf" + {"source", tok_source, 0}, +#line 162 "locfile-kw.gperf" + {"name_mr", tok_name_mr, 0}, +#line 161 "locfile-kw.gperf" + {"name_gen", tok_name_gen, 0}, +#line 200 "locfile-kw.gperf" + {"date", tok_date, 0}, + {""}, {""}, +#line 189 "locfile-kw.gperf" + {"address", tok_address, 0}, +#line 160 "locfile-kw.gperf" + {"name_fmt", tok_name_fmt, 0}, +#line 32 "locfile-kw.gperf" + {"copy", tok_copy, 0}, +#line 103 "locfile-kw.gperf" + {"int_n_sign_posn", tok_int_n_sign_posn, 0}, + {""}, {""}, +#line 131 "locfile-kw.gperf" + {"day", tok_day, 0}, +#line 105 "locfile-kw.gperf" + {"duo_currency_symbol", tok_duo_currency_symbol, 0}, + {""}, {""}, {""}, +#line 150 "locfile-kw.gperf" + {"date_fmt", tok_date_fmt, 0}, +#line 64 "locfile-kw.gperf" + {"order_end", tok_order_end, 0}, +#line 117 "locfile-kw.gperf" + {"duo_n_sign_posn", tok_duo_n_sign_posn, 0}, + {""}, +#line 168 "locfile-kw.gperf" + {"country_name", tok_country_name, 0}, +#line 71 "locfile-kw.gperf" + {"reorder-after", tok_reorder_after, 0}, + {""}, {""}, +#line 153 "locfile-kw.gperf" + {"noexpr", tok_noexpr, 0}, +#line 50 "locfile-kw.gperf" + {"tolower", tok_tolower, 0}, +#line 196 "locfile-kw.gperf" + {"audience", tok_audience, 0}, + {""}, {""}, {""}, +#line 49 "locfile-kw.gperf" + {"toupper", tok_toupper, 0}, +#line 68 "locfile-kw.gperf" + {"position", tok_position, 0}, + {""}, +#line 40 "locfile-kw.gperf" + {"cntrl", tok_cntrl, 0}, + {""}, +#line 27 "locfile-kw.gperf" + {"comment_char", tok_comment_char, 0}, +#line 88 "locfile-kw.gperf" + {"positive_sign", tok_positive_sign, 0}, + {""}, {""}, {""}, {""}, +#line 61 "locfile-kw.gperf" + {"symbol-equivalence", tok_symbol_equivalence, 0}, + {""}, +#line 102 "locfile-kw.gperf" + {"int_p_sign_posn", tok_int_p_sign_posn, 0}, +#line 173 "locfile-kw.gperf" + {"country_car", tok_country_car, 0}, + {""}, {""}, +#line 104 "locfile-kw.gperf" + {"duo_int_curr_symbol", tok_duo_int_curr_symbol, 0}, + {""}, {""}, +#line 135 "locfile-kw.gperf" + {"d_t_fmt", tok_d_t_fmt, 0}, + {""}, {""}, +#line 116 "locfile-kw.gperf" + {"duo_p_sign_posn", tok_duo_p_sign_posn, 0}, +#line 185 "locfile-kw.gperf" + {"measurement", tok_measurement, 0}, +#line 174 "locfile-kw.gperf" + {"country_isbn", tok_country_isbn, 0}, +#line 37 "locfile-kw.gperf" + {"outdigit", tok_outdigit, 0}, + {""}, {""}, +#line 143 "locfile-kw.gperf" + {"era_d_t_fmt", tok_era_d_t_fmt, 0}, + {""}, {""}, {""}, +#line 34 "locfile-kw.gperf" + {"lower", tok_lower, 0}, +#line 181 "locfile-kw.gperf" + {"tel_dom_fmt", tok_tel_dom_fmt, 0}, +#line 169 "locfile-kw.gperf" + {"country_post", tok_country_post, 0}, +#line 148 "locfile-kw.gperf" + {"cal_direction", tok_cal_direction, 0}, + {""}, +#line 139 "locfile-kw.gperf" + {"t_fmt_ampm", tok_t_fmt_ampm, 0}, +#line 91 "locfile-kw.gperf" + {"frac_digits", tok_frac_digits, 0}, + {""}, {""}, +#line 175 "locfile-kw.gperf" + {"lang_name", tok_lang_name, 0}, +#line 90 "locfile-kw.gperf" + {"int_frac_digits", tok_int_frac_digits, 0}, + {""}, +#line 121 "locfile-kw.gperf" + {"uno_valid_to", tok_uno_valid_to, 0}, +#line 126 "locfile-kw.gperf" + {"decimal_point", tok_decimal_point, 0}, + {""}, +#line 133 "locfile-kw.gperf" + {"abmon", tok_abmon, 0}, + {""}, {""}, {""}, {""}, +#line 107 "locfile-kw.gperf" + {"duo_frac_digits", tok_duo_frac_digits, 0}, +#line 180 "locfile-kw.gperf" + {"tel_int_fmt", tok_tel_int_fmt, 0}, +#line 123 "locfile-kw.gperf" + {"duo_valid_to", tok_duo_valid_to, 0}, +#line 146 "locfile-kw.gperf" + {"first_weekday", tok_first_weekday, 0}, + {""}, +#line 130 "locfile-kw.gperf" + {"abday", tok_abday, 0}, + {""}, +#line 198 "locfile-kw.gperf" + {"abbreviation", tok_abbreviation, 0}, +#line 147 "locfile-kw.gperf" + {"first_workday", tok_first_workday, 0}, + {""}, {""}, +#line 97 "locfile-kw.gperf" + {"n_sign_posn", tok_n_sign_posn, 0}, + {""}, {""}, {""}, +#line 145 "locfile-kw.gperf" + {"alt_digits", tok_alt_digits, 0}, + {""}, {""}, +#line 128 "locfile-kw.gperf" + {"grouping", tok_grouping, 0}, + {""}, +#line 45 "locfile-kw.gperf" + {"blank", tok_blank, 0}, + {""}, {""}, +#line 194 "locfile-kw.gperf" + {"language", tok_language, 0}, +#line 120 "locfile-kw.gperf" + {"uno_valid_from", tok_uno_valid_from, 0}, + {""}, +#line 197 "locfile-kw.gperf" + {"application", tok_application, 0}, + {""}, +#line 80 "locfile-kw.gperf" + {"elifndef", tok_elifndef, 0}, + {""}, {""}, {""}, {""}, {""}, +#line 122 "locfile-kw.gperf" + {"duo_valid_from", tok_duo_valid_from, 0}, +#line 57 "locfile-kw.gperf" + {"coll_weight_max", tok_coll_weight_max, 0}, + {""}, +#line 79 "locfile-kw.gperf" + {"elifdef", tok_elifdef, 0}, +#line 67 "locfile-kw.gperf" + {"backward", tok_backward, 0}, +#line 106 "locfile-kw.gperf" + {"duo_int_frac_digits", tok_duo_int_frac_digits, 0}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 96 "locfile-kw.gperf" + {"p_sign_posn", tok_p_sign_posn, 0}, + {""}, +#line 201 "locfile-kw.gperf" + {"category", tok_category, 0}, + {""}, {""}, {""}, {""}, +#line 134 "locfile-kw.gperf" + {"mon", tok_mon, 0}, + {""}, +#line 124 "locfile-kw.gperf" + {"conversion_rate", tok_conversion_rate, 0}, + {""}, {""}, {""}, {""}, {""}, +#line 63 "locfile-kw.gperf" + {"order_start", tok_order_start, 0}, + {""}, {""}, {""}, {""}, {""}, +#line 176 "locfile-kw.gperf" + {"lang_ab", tok_lang_ab, 0}, +#line 178 "locfile-kw.gperf" + {"lang_lib", tok_lang_lib, 0}, + {""}, {""}, {""}, +#line 190 "locfile-kw.gperf" + {"contact", tok_contact, 0}, + {""}, {""}, {""}, +#line 171 "locfile-kw.gperf" + {"country_ab3", tok_country_ab3, 0}, + {""}, {""}, {""}, +#line 191 "locfile-kw.gperf" + {"email", tok_email, 0}, +#line 170 "locfile-kw.gperf" + {"country_ab2", tok_country_ab2, 0}, + {""}, {""}, {""}, +#line 55 "locfile-kw.gperf" + {"default_missing", tok_default_missing, 0}, + {""}, {""}, +#line 193 "locfile-kw.gperf" + {"fax", tok_fax, 0}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 172 "locfile-kw.gperf" + {"country_num", tok_country_num, 0}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 51 "locfile-kw.gperf" + {"map", tok_map, 0}, +#line 65 "locfile-kw.gperf" + {"from", tok_from, 0}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 86 "locfile-kw.gperf" + {"mon_thousands_sep", tok_mon_thousands_sep, 0}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, +#line 81 "locfile-kw.gperf" + {"endif", tok_endif, 0}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 76 "locfile-kw.gperf" + {"undef", tok_undef, 0}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 59 "locfile-kw.gperf" + {"collating-element", tok_collating_element, 0}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 66 "locfile-kw.gperf" + {"forward", tok_forward, 0}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, +#line 85 "locfile-kw.gperf" + {"mon_decimal_point", tok_mon_decimal_point, 0}, + {""}, {""}, +#line 167 "locfile-kw.gperf" + {"postal_fmt", tok_postal_fmt, 0}, + {""}, {""}, {""}, {""}, {""}, +#line 60 "locfile-kw.gperf" + {"collating-symbol", tok_collating_symbol, 0}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 35 "locfile-kw.gperf" + {"alpha", tok_alpha, 0}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, +#line 38 "locfile-kw.gperf" + {"alnum", tok_alnum, 0}, + {""}, +#line 87 "locfile-kw.gperf" + {"mon_grouping", tok_mon_grouping, 0}, + {""}, +#line 177 "locfile-kw.gperf" + {"lang_term", tok_lang_term, 0}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 77 "locfile-kw.gperf" + {"ifdef", tok_ifdef, 0}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, +#line 138 "locfile-kw.gperf" + {"am_pm", tok_am_pm, 0} + }; + + if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH) + { + register int key = hash (str, len); + + if (key <= MAX_HASH_VALUE && key >= 0) + { + register const char *s = wordlist[key].name; + + if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0') + return &wordlist[key]; + } + } + return 0; +} diff --git a/REORG.TODO/locale/programs/locfile-token.h b/REORG.TODO/locale/programs/locfile-token.h new file mode 100644 index 0000000000..0c32f2c70b --- /dev/null +++ b/REORG.TODO/locale/programs/locfile-token.h @@ -0,0 +1,258 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@gnu.org>, 1996. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifndef _TOKEN_H +#define _TOKEN_H + +enum token_t +{ + tok_none = 0, + + tok_eof, + tok_eol, + tok_bsymbol, + tok_ident, + tok_ellipsis2, + tok_ellipsis3, + tok_ellipsis4, + tok_ellipsis2_2, + tok_ellipsis4_2, + tok_semicolon, + tok_comma, + tok_open_brace, + tok_close_brace, + tok_charcode, + tok_ucs4, + tok_number, + tok_minus1, + tok_string, + tok_include, + + tok_escape_char, + tok_comment_char, + tok_charmap, + tok_end, + tok_g0esc, + tok_g1esc, + tok_g2esc, + tok_g3esc, + tok_escseq, + tok_addset, + + tok_charids, + + tok_code_set_name, + tok_mb_cur_max, + tok_mb_cur_min, + tok_charconv, + tok_width, + tok_width_variable, + tok_width_default, + tok_repertoiremap, + + tok_lc_ctype, + tok_copy, + /* Keep the following entries up to the next comment in this order! */ + tok_upper, + tok_lower, + tok_alpha, + tok_digit, + tok_xdigit, + tok_space, + tok_print, + tok_graph, + tok_blank, + tok_cntrl, + tok_punct, + tok_alnum, + /* OK, shuffling allowed again. */ + tok_outdigit, + tok_charclass, + tok_class, + tok_toupper, + tok_tolower, + tok_map, + tok_translit_start, + tok_translit_end, + tok_translit_ignore, + tok_default_missing, + tok_lc_collate, + tok_coll_weight_max, + tok_section_symbol, + tok_collating_element, + tok_collating_symbol, + tok_symbol_equivalence, + tok_script, + tok_order_start, + tok_order_end, + tok_from, + tok_forward, + tok_backward, + tok_position, + tok_undefined, + tok_ignore, + tok_reorder_after, + tok_reorder_end, + tok_reorder_sections_after, + tok_reorder_sections_end, + tok_define, + tok_undef, + tok_ifdef, + tok_ifndef, + tok_else, + tok_elifdef, + tok_elifndef, + tok_endif, + tok_lc_monetary, + tok_int_curr_symbol, + tok_currency_symbol, + tok_mon_decimal_point, + tok_mon_thousands_sep, + tok_mon_grouping, + tok_positive_sign, + tok_negative_sign, + tok_int_frac_digits, + tok_frac_digits, + tok_p_cs_precedes, + tok_p_sep_by_space, + tok_n_cs_precedes, + tok_n_sep_by_space, + tok_p_sign_posn, + tok_n_sign_posn, + tok_int_p_cs_precedes, + tok_int_p_sep_by_space, + tok_int_n_cs_precedes, + tok_int_n_sep_by_space, + tok_int_p_sign_posn, + tok_int_n_sign_posn, + tok_duo_int_curr_symbol, + tok_duo_currency_symbol, + tok_duo_int_frac_digits, + tok_duo_frac_digits, + tok_duo_p_cs_precedes, + tok_duo_p_sep_by_space, + tok_duo_n_cs_precedes, + tok_duo_n_sep_by_space, + tok_duo_int_p_cs_precedes, + tok_duo_int_p_sep_by_space, + tok_duo_int_n_cs_precedes, + tok_duo_int_n_sep_by_space, + tok_duo_p_sign_posn, + tok_duo_n_sign_posn, + tok_duo_int_p_sign_posn, + tok_duo_int_n_sign_posn, + tok_uno_valid_from, + tok_uno_valid_to, + tok_duo_valid_from, + tok_duo_valid_to, + tok_conversion_rate, + tok_lc_numeric, + tok_decimal_point, + tok_thousands_sep, + tok_grouping, + tok_lc_time, + tok_abday, + tok_day, + tok_abmon, + tok_mon, + tok_d_t_fmt, + tok_d_fmt, + tok_t_fmt, + tok_am_pm, + tok_t_fmt_ampm, + tok_era, + tok_era_year, + tok_era_d_fmt, + tok_era_d_t_fmt, + tok_era_t_fmt, + tok_alt_digits, + tok_week, + tok_first_weekday, + tok_first_workday, + tok_cal_direction, + tok_timezone, + tok_date_fmt, + tok_lc_messages, + tok_yesexpr, + tok_noexpr, + tok_yesstr, + tok_nostr, + tok_lc_paper, + tok_height, + tok_lc_name, + tok_name_fmt, + tok_name_gen, + tok_name_mr, + tok_name_mrs, + tok_name_miss, + tok_name_ms, + tok_lc_address, + tok_postal_fmt, + tok_country_name, + tok_country_post, + tok_country_ab2, + tok_country_ab3, + tok_country_num, + tok_country_car, + tok_country_isbn, + tok_lang_name, + tok_lang_ab, + tok_lang_term, + tok_lang_lib, + tok_lc_telephone, + tok_tel_int_fmt, + tok_tel_dom_fmt, + tok_int_select, + tok_int_prefix, + tok_lc_measurement, + tok_measurement, + tok_lc_identification, + tok_title, + tok_source, + tok_address, + tok_contact, + tok_email, + tok_tel, + tok_fax, + tok_language, + tok_territory, + tok_audience, + tok_application, + tok_abbreviation, + tok_revision, + tok_date, + tok_category, + + tok_error +}; + + +struct keyword_t +{ + const char *name; + enum token_t token; + int symname_or_ident; + + /* Only for locdef file. */ + int locale; + enum token_t base; + enum token_t group; + enum token_t list; +}; + + +#endif /* token.h */ diff --git a/REORG.TODO/locale/programs/locfile.c b/REORG.TODO/locale/programs/locfile.c new file mode 100644 index 0000000000..0990ef11be --- /dev/null +++ b/REORG.TODO/locale/programs/locfile.c @@ -0,0 +1,1001 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@gnu.org>, 1996. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <dirent.h> +#include <errno.h> +#include <fcntl.h> +#include <stdbool.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/param.h> +#include <sys/stat.h> +#include <assert.h> +#include <wchar.h> + +#include "../../crypt/md5.h" +#include "localedef.h" +#include "localeinfo.h" +#include "locfile.h" +#include "simple-hash.h" + +#include "locfile-kw.h" + +#define obstack_chunk_alloc xmalloc +#define obstack_chunk_free free + +/* Temporary storage of the locale data before writing it to the archive. */ +static locale_data_t to_archive; + + +int +locfile_read (struct localedef_t *result, const struct charmap_t *charmap) +{ + const char *filename = result->name; + const char *repertoire_name = result->repertoire_name; + int locale_mask = result->needed & ~result->avail; + struct linereader *ldfile; + int not_here = ALL_LOCALES; + + /* If no repertoire name was specified use the global one. */ + if (repertoire_name == NULL) + repertoire_name = repertoire_global; + + /* Open the locale definition file. */ + ldfile = lr_open (filename, locfile_hash); + if (ldfile == NULL) + { + if (filename != NULL && filename[0] != '/') + { + char *i18npath = getenv ("I18NPATH"); + if (i18npath != NULL && *i18npath != '\0') + { + const size_t pathlen = strlen (i18npath); + char i18npathbuf[pathlen + 1]; + char path[strlen (filename) + 1 + pathlen + + sizeof ("/locales/") - 1]; + char *next; + i18npath = memcpy (i18npathbuf, i18npath, pathlen + 1); + + while (ldfile == NULL + && (next = strsep (&i18npath, ":")) != NULL) + { + stpcpy (stpcpy (stpcpy (path, next), "/locales/"), filename); + + ldfile = lr_open (path, locfile_hash); + + if (ldfile == NULL) + { + stpcpy (stpcpy (stpcpy (path, next), "/"), filename); + + ldfile = lr_open (path, locfile_hash); + } + } + } + + /* Test in the default directory. */ + if (ldfile == NULL) + { + char path[strlen (filename) + 1 + sizeof (LOCSRCDIR)]; + + stpcpy (stpcpy (stpcpy (path, LOCSRCDIR), "/"), filename); + ldfile = lr_open (path, locfile_hash); + } + } + + if (ldfile == NULL) + return 1; + } + + /* Parse locale definition file and store result in RESULT. */ + while (1) + { + struct token *now = lr_token (ldfile, charmap, NULL, NULL, verbose); + enum token_t nowtok = now->tok; + struct token *arg; + + if (nowtok == tok_eof) + break; + + if (nowtok == tok_eol) + /* Ignore empty lines. */ + continue; + + switch (nowtok) + { + case tok_escape_char: + case tok_comment_char: + /* We need an argument. */ + arg = lr_token (ldfile, charmap, NULL, NULL, verbose); + + if (arg->tok != tok_ident) + { + SYNTAX_ERROR (_("bad argument")); + continue; + } + + if (arg->val.str.lenmb != 1) + { + lr_error (ldfile, _("\ +argument to `%s' must be a single character"), + nowtok == tok_escape_char + ? "escape_char" : "comment_char"); + + lr_ignore_rest (ldfile, 0); + continue; + } + + if (nowtok == tok_escape_char) + ldfile->escape_char = *arg->val.str.startmb; + else + ldfile->comment_char = *arg->val.str.startmb; + break; + + case tok_repertoiremap: + /* We need an argument. */ + arg = lr_token (ldfile, charmap, NULL, NULL, verbose); + + if (arg->tok != tok_ident) + { + SYNTAX_ERROR (_("bad argument")); + continue; + } + + if (repertoire_name == NULL) + { + char *newp = alloca (arg->val.str.lenmb + 1); + + *((char *) mempcpy (newp, arg->val.str.startmb, + arg->val.str.lenmb)) = '\0'; + repertoire_name = newp; + } + break; + + case tok_lc_ctype: + ctype_read (ldfile, result, charmap, repertoire_name, + (locale_mask & CTYPE_LOCALE) == 0); + result->avail |= locale_mask & CTYPE_LOCALE; + not_here ^= CTYPE_LOCALE; + continue; + + case tok_lc_collate: + collate_read (ldfile, result, charmap, repertoire_name, + (locale_mask & COLLATE_LOCALE) == 0); + result->avail |= locale_mask & COLLATE_LOCALE; + not_here ^= COLLATE_LOCALE; + continue; + + case tok_lc_monetary: + monetary_read (ldfile, result, charmap, repertoire_name, + (locale_mask & MONETARY_LOCALE) == 0); + result->avail |= locale_mask & MONETARY_LOCALE; + not_here ^= MONETARY_LOCALE; + continue; + + case tok_lc_numeric: + numeric_read (ldfile, result, charmap, repertoire_name, + (locale_mask & NUMERIC_LOCALE) == 0); + result->avail |= locale_mask & NUMERIC_LOCALE; + not_here ^= NUMERIC_LOCALE; + continue; + + case tok_lc_time: + time_read (ldfile, result, charmap, repertoire_name, + (locale_mask & TIME_LOCALE) == 0); + result->avail |= locale_mask & TIME_LOCALE; + not_here ^= TIME_LOCALE; + continue; + + case tok_lc_messages: + messages_read (ldfile, result, charmap, repertoire_name, + (locale_mask & MESSAGES_LOCALE) == 0); + result->avail |= locale_mask & MESSAGES_LOCALE; + not_here ^= MESSAGES_LOCALE; + continue; + + case tok_lc_paper: + paper_read (ldfile, result, charmap, repertoire_name, + (locale_mask & PAPER_LOCALE) == 0); + result->avail |= locale_mask & PAPER_LOCALE; + not_here ^= PAPER_LOCALE; + continue; + + case tok_lc_name: + name_read (ldfile, result, charmap, repertoire_name, + (locale_mask & NAME_LOCALE) == 0); + result->avail |= locale_mask & NAME_LOCALE; + not_here ^= NAME_LOCALE; + continue; + + case tok_lc_address: + address_read (ldfile, result, charmap, repertoire_name, + (locale_mask & ADDRESS_LOCALE) == 0); + result->avail |= locale_mask & ADDRESS_LOCALE; + not_here ^= ADDRESS_LOCALE; + continue; + + case tok_lc_telephone: + telephone_read (ldfile, result, charmap, repertoire_name, + (locale_mask & TELEPHONE_LOCALE) == 0); + result->avail |= locale_mask & TELEPHONE_LOCALE; + not_here ^= TELEPHONE_LOCALE; + continue; + + case tok_lc_measurement: + measurement_read (ldfile, result, charmap, repertoire_name, + (locale_mask & MEASUREMENT_LOCALE) == 0); + result->avail |= locale_mask & MEASUREMENT_LOCALE; + not_here ^= MEASUREMENT_LOCALE; + continue; + + case tok_lc_identification: + identification_read (ldfile, result, charmap, repertoire_name, + (locale_mask & IDENTIFICATION_LOCALE) == 0); + result->avail |= locale_mask & IDENTIFICATION_LOCALE; + not_here ^= IDENTIFICATION_LOCALE; + continue; + + default: + SYNTAX_ERROR (_("\ +syntax error: not inside a locale definition section")); + continue; + } + + /* The rest of the line must be empty. */ + lr_ignore_rest (ldfile, 1); + } + + /* We read all of the file. */ + lr_close (ldfile); + + /* Mark the categories which are not contained in the file. We assume + them to be available and the default data will be used. */ + result->avail |= not_here; + + return 0; +} + + +/* Semantic checking of locale specifications. */ + +static void (*const check_funcs[]) (struct localedef_t *, + const struct charmap_t *) = +{ + [LC_CTYPE] = ctype_finish, + [LC_COLLATE] = collate_finish, + [LC_MESSAGES] = messages_finish, + [LC_MONETARY] = monetary_finish, + [LC_NUMERIC] = numeric_finish, + [LC_TIME] = time_finish, + [LC_PAPER] = paper_finish, + [LC_NAME] = name_finish, + [LC_ADDRESS] = address_finish, + [LC_TELEPHONE] = telephone_finish, + [LC_MEASUREMENT] = measurement_finish, + [LC_IDENTIFICATION] = identification_finish +}; + +void +check_all_categories (struct localedef_t *definitions, + const struct charmap_t *charmap) +{ + int cnt; + + for (cnt = 0; cnt < sizeof (check_funcs) / sizeof (check_funcs[0]); ++cnt) + if (check_funcs[cnt] != NULL) + check_funcs[cnt] (definitions, charmap); +} + + +/* Writing the locale data files. All files use the same output_path. */ + +static void (*const write_funcs[]) (struct localedef_t *, + const struct charmap_t *, const char *) = +{ + [LC_CTYPE] = ctype_output, + [LC_COLLATE] = collate_output, + [LC_MESSAGES] = messages_output, + [LC_MONETARY] = monetary_output, + [LC_NUMERIC] = numeric_output, + [LC_TIME] = time_output, + [LC_PAPER] = paper_output, + [LC_NAME] = name_output, + [LC_ADDRESS] = address_output, + [LC_TELEPHONE] = telephone_output, + [LC_MEASUREMENT] = measurement_output, + [LC_IDENTIFICATION] = identification_output +}; + + +void +write_all_categories (struct localedef_t *definitions, + const struct charmap_t *charmap, const char *locname, + const char *output_path) +{ + int cnt; + + for (cnt = 0; cnt < sizeof (write_funcs) / sizeof (write_funcs[0]); ++cnt) + if (write_funcs[cnt] != NULL) + write_funcs[cnt] (definitions, charmap, output_path); + + if (! no_archive) + { + /* The data has to be added to the archive. Do this now. */ + struct locarhandle ah; + + /* Open the archive. This call never returns if we cannot + successfully open the archive. */ + ah.fname = NULL; + open_archive (&ah, false); + + if (add_locale_to_archive (&ah, locname, to_archive, true) != 0) + error (EXIT_FAILURE, errno, _("cannot add to locale archive")); + + /* We are done. */ + close_archive (&ah); + } +} + + +/* Return a NULL terminated list of the directories next to output_path + that have the same owner, group, permissions and device as output_path. */ +static const char ** +siblings_uncached (const char *output_path) +{ + size_t len; + char *base, *p; + struct stat64 output_stat; + DIR *dirp; + int nelems; + const char **elems; + + /* Remove trailing slashes and trailing pathname component. */ + len = strlen (output_path); + base = (char *) alloca (len); + memcpy (base, output_path, len); + p = base + len; + while (p > base && p[-1] == '/') + p--; + if (p == base) + return NULL; + do + p--; + while (p > base && p[-1] != '/'); + if (p == base) + return NULL; + *--p = '\0'; + len = p - base; + + /* Get the properties of output_path. */ + if (lstat64 (output_path, &output_stat) < 0 || !S_ISDIR (output_stat.st_mode)) + return NULL; + + /* Iterate through the directories in base directory. */ + dirp = opendir (base); + if (dirp == NULL) + return NULL; + nelems = 0; + elems = NULL; + for (;;) + { + struct dirent64 *other_dentry; + const char *other_name; + char *other_path; + struct stat64 other_stat; + + other_dentry = readdir64 (dirp); + if (other_dentry == NULL) + break; + + other_name = other_dentry->d_name; + if (strcmp (other_name, ".") == 0 || strcmp (other_name, "..") == 0) + continue; + + other_path = (char *) xmalloc (len + 1 + strlen (other_name) + 2); + memcpy (other_path, base, len); + other_path[len] = '/'; + strcpy (other_path + len + 1, other_name); + + if (lstat64 (other_path, &other_stat) >= 0 + && S_ISDIR (other_stat.st_mode) + && other_stat.st_uid == output_stat.st_uid + && other_stat.st_gid == output_stat.st_gid + && other_stat.st_mode == output_stat.st_mode + && other_stat.st_dev == output_stat.st_dev) + { + /* Found a subdirectory. Add a trailing slash and store it. */ + p = other_path + len + 1 + strlen (other_name); + *p++ = '/'; + *p = '\0'; + elems = (const char **) xrealloc ((char *) elems, + (nelems + 2) * sizeof (char **)); + elems[nelems++] = other_path; + } + else + free (other_path); + } + closedir (dirp); + + if (elems != NULL) + elems[nelems] = NULL; + return elems; +} + + +/* Return a NULL terminated list of the directories next to output_path + that have the same owner, group, permissions and device as output_path. + Cache the result for future calls. */ +static const char ** +siblings (const char *output_path) +{ + static const char *last_output_path; + static const char **last_result; + + if (output_path != last_output_path) + { + if (last_result != NULL) + { + const char **p; + + for (p = last_result; *p != NULL; p++) + free ((char *) *p); + free (last_result); + } + + last_output_path = output_path; + last_result = siblings_uncached (output_path); + } + return last_result; +} + + +/* Read as many bytes from a file descriptor as possible. */ +static ssize_t +full_read (int fd, void *bufarea, size_t nbyte) +{ + char *buf = (char *) bufarea; + + while (nbyte > 0) + { + ssize_t retval = read (fd, buf, nbyte); + + if (retval == 0) + break; + else if (retval > 0) + { + buf += retval; + nbyte -= retval; + } + else if (errno != EINTR) + return retval; + } + return buf - (char *) bufarea; +} + + +/* Compare the contents of two regular files of the same size. Return 0 + if they are equal, 1 if they are different, or -1 if an error occurs. */ +static int +compare_files (const char *filename1, const char *filename2, size_t size, + size_t blocksize) +{ + int fd1, fd2; + int ret = -1; + + fd1 = open (filename1, O_RDONLY); + if (fd1 >= 0) + { + fd2 = open (filename2, O_RDONLY); + if (fd2 >= 0) + { + char *buf1 = (char *) xmalloc (2 * blocksize); + char *buf2 = buf1 + blocksize; + + ret = 0; + while (size > 0) + { + size_t bytes = (size < blocksize ? size : blocksize); + + if (full_read (fd1, buf1, bytes) < (ssize_t) bytes) + { + ret = -1; + break; + } + if (full_read (fd2, buf2, bytes) < (ssize_t) bytes) + { + ret = -1; + break; + } + if (memcmp (buf1, buf2, bytes) != 0) + { + ret = 1; + break; + } + size -= bytes; + } + + free (buf1); + close (fd2); + } + close (fd1); + } + return ret; +} + +/* True if the locale files use the opposite endianness to the + machine running localedef. */ +bool swap_endianness_p; + +/* When called outside a start_locale_structure/end_locale_structure + or start_locale_prelude/end_locale_prelude block, record that the + next byte in FILE's obstack will be the first byte of a new element. + Do likewise for the first call inside a start_locale_structure/ + end_locale_structure block. */ +static void +record_offset (struct locale_file *file) +{ + if (file->structure_stage < 2) + { + assert (file->next_element < file->n_elements); + file->offsets[file->next_element++] + = (obstack_object_size (&file->data) + + (file->n_elements + 2) * sizeof (uint32_t)); + if (file->structure_stage == 1) + file->structure_stage = 2; + } +} + +/* Initialize FILE for a new output file. N_ELEMENTS is the number + of elements in the file. */ +void +init_locale_data (struct locale_file *file, size_t n_elements) +{ + file->n_elements = n_elements; + file->next_element = 0; + file->offsets = xmalloc (sizeof (uint32_t) * n_elements); + obstack_init (&file->data); + file->structure_stage = 0; +} + +/* Align the size of FILE's obstack object to BOUNDARY bytes. */ +void +align_locale_data (struct locale_file *file, size_t boundary) +{ + size_t size = -obstack_object_size (&file->data) & (boundary - 1); + obstack_blank (&file->data, size); + memset (obstack_next_free (&file->data) - size, 0, size); +} + +/* Record that FILE's next element contains no data. */ +void +add_locale_empty (struct locale_file *file) +{ + record_offset (file); +} + +/* Record that FILE's next element consists of SIZE bytes starting at DATA. */ +void +add_locale_raw_data (struct locale_file *file, const void *data, size_t size) +{ + record_offset (file); + obstack_grow (&file->data, data, size); +} + +/* Finish the current object on OBSTACK and use it as the data for FILE's + next element. */ +void +add_locale_raw_obstack (struct locale_file *file, struct obstack *obstack) +{ + size_t size = obstack_object_size (obstack); + record_offset (file); + obstack_grow (&file->data, obstack_finish (obstack), size); +} + +/* Use STRING as FILE's next element. */ +void +add_locale_string (struct locale_file *file, const char *string) +{ + record_offset (file); + obstack_grow (&file->data, string, strlen (string) + 1); +} + +/* Likewise for wide strings. */ +void +add_locale_wstring (struct locale_file *file, const uint32_t *string) +{ + add_locale_uint32_array (file, string, wcslen ((const wchar_t *) string) + 1); +} + +/* Record that FILE's next element is the 32-bit integer VALUE. */ +void +add_locale_uint32 (struct locale_file *file, uint32_t value) +{ + align_locale_data (file, LOCFILE_ALIGN); + record_offset (file); + value = maybe_swap_uint32 (value); + obstack_grow (&file->data, &value, sizeof (value)); +} + +/* Record that FILE's next element is an array of N_ELEMS integers + starting at DATA. */ +void +add_locale_uint32_array (struct locale_file *file, + const uint32_t *data, size_t n_elems) +{ + align_locale_data (file, LOCFILE_ALIGN); + record_offset (file); + obstack_grow (&file->data, data, n_elems * sizeof (uint32_t)); + maybe_swap_uint32_obstack (&file->data, n_elems); +} + +/* Record that FILE's next element is the single byte given by VALUE. */ +void +add_locale_char (struct locale_file *file, char value) +{ + record_offset (file); + obstack_1grow (&file->data, value); +} + +/* Start building an element that contains several different pieces of data. + Subsequent calls to add_locale_* will add data to the same element up + till the next call to end_locale_structure. The element's alignment + is dictated by the first piece of data added to it. */ +void +start_locale_structure (struct locale_file *file) +{ + assert (file->structure_stage == 0); + file->structure_stage = 1; +} + +/* Finish a structure element that was started by start_locale_structure. + Empty structures are OK and behave like add_locale_empty. */ +void +end_locale_structure (struct locale_file *file) +{ + record_offset (file); + assert (file->structure_stage == 2); + file->structure_stage = 0; +} + +/* Start building data that goes before the next element's recorded offset. + Subsequent calls to add_locale_* will add data to the file without + treating any of it as the start of a new element. Calling + end_locale_prelude switches back to the usual behavior. */ +void +start_locale_prelude (struct locale_file *file) +{ + assert (file->structure_stage == 0); + file->structure_stage = 3; +} + +/* End a block started by start_locale_prelude. */ +void +end_locale_prelude (struct locale_file *file) +{ + assert (file->structure_stage == 3); + file->structure_stage = 0; +} + +/* Write a locale file, with contents given by FILE. */ +void +write_locale_data (const char *output_path, int catidx, const char *category, + struct locale_file *file) +{ + size_t cnt, step, maxiov; + int fd; + char *fname; + const char **other_paths; + uint32_t header[2]; + size_t n_elem; + struct iovec vec[3]; + + assert (file->n_elements == file->next_element); + header[0] = LIMAGIC (catidx); + header[1] = file->n_elements; + vec[0].iov_len = sizeof (header); + vec[0].iov_base = header; + vec[1].iov_len = sizeof (uint32_t) * file->n_elements; + vec[1].iov_base = file->offsets; + vec[2].iov_len = obstack_object_size (&file->data); + vec[2].iov_base = obstack_finish (&file->data); + maybe_swap_uint32_array (vec[0].iov_base, 2); + maybe_swap_uint32_array (vec[1].iov_base, file->n_elements); + n_elem = 3; + if (! no_archive) + { + /* The data will be added to the archive. For now we simply + generate the image which will be written. First determine + the size. */ + int cnt; + void *endp; + + to_archive[catidx].size = 0; + for (cnt = 0; cnt < n_elem; ++cnt) + to_archive[catidx].size += vec[cnt].iov_len; + + /* Allocate the memory for it. */ + to_archive[catidx].addr = xmalloc (to_archive[catidx].size); + + /* Fill it in. */ + for (cnt = 0, endp = to_archive[catidx].addr; cnt < n_elem; ++cnt) + endp = mempcpy (endp, vec[cnt].iov_base, vec[cnt].iov_len); + + /* Compute the MD5 sum for the data. */ + __md5_buffer (to_archive[catidx].addr, to_archive[catidx].size, + to_archive[catidx].sum); + + return; + } + + fname = xmalloc (strlen (output_path) + 2 * strlen (category) + 7); + + /* Normally we write to the directory pointed to by the OUTPUT_PATH. + But for LC_MESSAGES we have to take care for the translation + data. This means we need to have a directory LC_MESSAGES in + which we place the file under the name SYS_LC_MESSAGES. */ + sprintf (fname, "%s%s", output_path, category); + fd = -2; + if (strcmp (category, "LC_MESSAGES") == 0) + { + struct stat64 st; + + if (stat64 (fname, &st) < 0) + { + if (mkdir (fname, 0777) >= 0) + { + fd = -1; + errno = EISDIR; + } + } + else if (!S_ISREG (st.st_mode)) + { + fd = -1; + errno = EISDIR; + } + } + + /* Create the locale file with nlinks == 1; this avoids crashing processes + which currently use the locale and damaging files belonging to other + locales as well. */ + if (fd == -2) + { + unlink (fname); + fd = creat (fname, 0666); + } + + if (fd == -1) + { + int save_err = errno; + + if (errno == EISDIR) + { + sprintf (fname, "%1$s%2$s/SYS_%2$s", output_path, category); + unlink (fname); + fd = creat (fname, 0666); + if (fd == -1) + save_err = errno; + } + + if (fd == -1) + { + if (!be_quiet) + WITH_CUR_LOCALE (error (0, save_err, _("\ +cannot open output file `%s' for category `%s'"), fname, category)); + free (fname); + return; + } + } + +#ifdef UIO_MAXIOV + maxiov = UIO_MAXIOV; +#else + maxiov = sysconf (_SC_UIO_MAXIOV); +#endif + + /* Write the data using writev. But we must take care for the + limitation of the implementation. */ + for (cnt = 0; cnt < n_elem; cnt += step) + { + step = n_elem - cnt; + if (maxiov > 0) + step = MIN (maxiov, step); + + if (writev (fd, &vec[cnt], step) < 0) + { + if (!be_quiet) + WITH_CUR_LOCALE (error (0, errno, _("\ +failure while writing data for category `%s'"), category)); + break; + } + } + + close (fd); + + /* Compare the file with the locale data files for the same category in + other locales, and see if we can reuse it, to save disk space. */ + other_paths = siblings (output_path); + if (other_paths != NULL) + { + struct stat64 fname_stat; + + if (lstat64 (fname, &fname_stat) >= 0 + && S_ISREG (fname_stat.st_mode)) + { + const char *fname_tail = fname + strlen (output_path); + const char **other_p; + int seen_count; + ino_t *seen_inodes; + + seen_count = 0; + for (other_p = other_paths; *other_p; other_p++) + seen_count++; + seen_inodes = (ino_t *) xmalloc (seen_count * sizeof (ino_t)); + seen_count = 0; + + for (other_p = other_paths; *other_p; other_p++) + { + const char *other_path = *other_p; + size_t other_path_len = strlen (other_path); + char *other_fname; + struct stat64 other_fname_stat; + + other_fname = + (char *) xmalloc (other_path_len + strlen (fname_tail) + 1); + memcpy (other_fname, other_path, other_path_len); + strcpy (other_fname + other_path_len, fname_tail); + + if (lstat64 (other_fname, &other_fname_stat) >= 0 + && S_ISREG (other_fname_stat.st_mode) + /* Consider only files on the same device. + Otherwise hard linking won't work anyway. */ + && other_fname_stat.st_dev == fname_stat.st_dev + /* Consider only files with the same permissions. + Otherwise there are security risks. */ + && other_fname_stat.st_uid == fname_stat.st_uid + && other_fname_stat.st_gid == fname_stat.st_gid + && other_fname_stat.st_mode == fname_stat.st_mode + /* Don't compare fname with itself. */ + && other_fname_stat.st_ino != fname_stat.st_ino + /* Files must have the same size, otherwise they + cannot be the same. */ + && other_fname_stat.st_size == fname_stat.st_size) + { + /* Skip this file if we have already read it (under a + different name). */ + int i; + + for (i = seen_count - 1; i >= 0; i--) + if (seen_inodes[i] == other_fname_stat.st_ino) + break; + if (i < 0) + { + /* Now compare fname and other_fname for real. */ + blksize_t blocksize; + +#ifdef _STATBUF_ST_BLKSIZE + blocksize = MAX (fname_stat.st_blksize, + other_fname_stat.st_blksize); + if (blocksize > 8 * 1024) + blocksize = 8 * 1024; +#else + blocksize = 8 * 1024; +#endif + + if (compare_files (fname, other_fname, + fname_stat.st_size, blocksize) == 0) + { + /* Found! other_fname is identical to fname. */ + /* Link other_fname to fname. But use a temporary + file, in case hard links don't work on the + particular filesystem. */ + char * tmp_fname = + (char *) xmalloc (strlen (fname) + 4 + 1); + + strcpy (stpcpy (tmp_fname, fname), ".tmp"); + + if (link (other_fname, tmp_fname) >= 0) + { + unlink (fname); + if (rename (tmp_fname, fname) < 0) + { + if (!be_quiet) + WITH_CUR_LOCALE (error (0, errno, _("\ +cannot create output file `%s' for category `%s'"), fname, category)); + } + free (tmp_fname); + free (other_fname); + break; + } + free (tmp_fname); + } + + /* Don't compare with this file a second time. */ + seen_inodes[seen_count++] = other_fname_stat.st_ino; + } + } + free (other_fname); + } + free (seen_inodes); + } + } + + free (fname); +} + + +/* General handling of `copy'. */ +void +handle_copy (struct linereader *ldfile, const struct charmap_t *charmap, + const char *repertoire_name, struct localedef_t *result, + enum token_t token, int locale, const char *locale_name, + int ignore_content) +{ + struct token *now; + int warned = 0; + + now = lr_token (ldfile, charmap, result, NULL, verbose); + if (now->tok != tok_string) + lr_error (ldfile, _("expecting string argument for `copy'")); + else if (!ignore_content) + { + if (now->val.str.startmb == NULL) + lr_error (ldfile, _("\ +locale name should consist only of portable characters")); + else + { + (void) add_to_readlist (locale, now->val.str.startmb, + repertoire_name, 1, NULL); + result->copy_name[locale] = now->val.str.startmb; + } + } + + lr_ignore_rest (ldfile, now->tok == tok_string); + + /* The rest of the line must be empty and the next keyword must be + `END xxx'. */ + while ((now = lr_token (ldfile, charmap, result, NULL, verbose))->tok + != tok_end && now->tok != tok_eof) + { + if (warned == 0) + { + lr_error (ldfile, _("\ +no other keyword shall be specified when `copy' is used")); + warned = 1; + } + + lr_ignore_rest (ldfile, 0); + } + + if (now->tok != tok_eof) + { + /* Handle `END xxx'. */ + now = lr_token (ldfile, charmap, result, NULL, verbose); + + if (now->tok != token) + lr_error (ldfile, _("\ +`%1$s' definition does not end with `END %1$s'"), locale_name); + + lr_ignore_rest (ldfile, now->tok == token); + } + else + /* When we come here we reached the end of the file. */ + lr_error (ldfile, _("%s: premature end of file"), locale_name); +} diff --git a/REORG.TODO/locale/programs/locfile.h b/REORG.TODO/locale/programs/locfile.h new file mode 100644 index 0000000000..3407e13c13 --- /dev/null +++ b/REORG.TODO/locale/programs/locfile.h @@ -0,0 +1,279 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@gnu.org>, 1996. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifndef _LOCFILE_H +#define _LOCFILE_H 1 + +#include <byteswap.h> +#include <stdbool.h> +#include <stdint.h> +#include <sys/uio.h> + +#include "obstack.h" +#include "linereader.h" +#include "localedef.h" + +/* Structure for storing the contents of a category file. */ +struct locale_file +{ + size_t n_elements, next_element; + uint32_t *offsets; + struct obstack data; + int structure_stage; +}; + + +/* Macros used in the parser. */ +#define SYNTAX_ERROR(string, args...) \ + do \ + { \ + lr_error (ldfile, string, ## args); \ + lr_ignore_rest (ldfile, 0); \ + } \ + while (0) + + +/* General handling of `copy'. */ +extern void handle_copy (struct linereader *ldfile, + const struct charmap_t *charmap, + const char *repertoire_name, + struct localedef_t *result, enum token_t token, + int locale, const char *locale_name, + int ignore_content); + +/* Found in locfile.c. */ +extern int locfile_read (struct localedef_t *result, + const struct charmap_t *charmap); + +/* Check validity of all the locale data. */ +extern void check_all_categories (struct localedef_t *definitions, + const struct charmap_t *charmap); + +/* Write out all locale categories. */ +extern void write_all_categories (struct localedef_t *definitions, + const struct charmap_t *charmap, + const char *locname, + const char *output_path); + +extern bool swap_endianness_p; + +/* Change the output to be big-endian if BIG_ENDIAN is true and + little-endian otherwise. */ +static inline void +set_big_endian (bool big_endian) +{ + swap_endianness_p = (big_endian != (__BYTE_ORDER == __BIG_ENDIAN)); +} + +/* Munge VALUE so that, when stored, it has the correct byte order + for the output files. */ +static uint32_t +__attribute__ ((unused)) +maybe_swap_uint32 (uint32_t value) +{ + return swap_endianness_p ? bswap_32 (value) : value; +} + +/* Likewise, but munge an array of N uint32_ts starting at ARRAY. */ +static inline void +maybe_swap_uint32_array (uint32_t *array, size_t n) +{ + if (swap_endianness_p) + while (n-- > 0) + array[n] = bswap_32 (array[n]); +} + +/* Like maybe_swap_uint32_array, but the array of N elements is at + the end of OBSTACK's current object. */ +static inline void +maybe_swap_uint32_obstack (struct obstack *obstack, size_t n) +{ + maybe_swap_uint32_array ((uint32_t *) obstack_next_free (obstack) - n, n); +} + +/* Write out the data. */ +extern void init_locale_data (struct locale_file *file, size_t n_elements); +extern void align_locale_data (struct locale_file *file, size_t boundary); +extern void add_locale_empty (struct locale_file *file); +extern void add_locale_raw_data (struct locale_file *file, const void *data, + size_t size); +extern void add_locale_raw_obstack (struct locale_file *file, + struct obstack *obstack); +extern void add_locale_string (struct locale_file *file, const char *string); +extern void add_locale_wstring (struct locale_file *file, + const uint32_t *string); +extern void add_locale_uint32 (struct locale_file *file, uint32_t value); +extern void add_locale_uint32_array (struct locale_file *file, + const uint32_t *data, size_t n_elems); +extern void add_locale_char (struct locale_file *file, char value); +extern void start_locale_structure (struct locale_file *file); +extern void end_locale_structure (struct locale_file *file); +extern void start_locale_prelude (struct locale_file *file); +extern void end_locale_prelude (struct locale_file *file); +extern void write_locale_data (const char *output_path, int catidx, + const char *category, struct locale_file *file); + + +/* Entrypoints for the parsers of the individual categories. */ + +/* Handle LC_CTYPE category. */ +extern void ctype_read (struct linereader *ldfile, + struct localedef_t *result, + const struct charmap_t *charmap, + const char *repertoire_name, + int ignore_content); +extern void ctype_finish (struct localedef_t *locale, + const struct charmap_t *charmap); +extern void ctype_output (struct localedef_t *locale, + const struct charmap_t *charmap, + const char *output_path); +extern uint32_t *find_translit (struct localedef_t *locale, + const struct charmap_t *charmap, uint32_t wch); + +/* Handle LC_COLLATE category. */ +extern void collate_read (struct linereader *ldfile, + struct localedef_t *result, + const struct charmap_t *charmap, + const char *repertoire_name, + int ignore_content); +extern void collate_finish (struct localedef_t *locale, + const struct charmap_t *charmap); +extern void collate_output (struct localedef_t *locale, + const struct charmap_t *charmap, + const char *output_path); + +/* Handle LC_MONETARY category. */ +extern void monetary_read (struct linereader *ldfile, + struct localedef_t *result, + const struct charmap_t *charmap, + const char *repertoire_name, + int ignore_content); +extern void monetary_finish (struct localedef_t *locale, + const struct charmap_t *charmap); +extern void monetary_output (struct localedef_t *locale, + const struct charmap_t *charmap, + const char *output_path); + +/* Handle LC_NUMERIC category. */ +extern void numeric_read (struct linereader *ldfile, + struct localedef_t *result, + const struct charmap_t *charmap, + const char *repertoire_name, + int ignore_content); +extern void numeric_finish (struct localedef_t *locale, + const struct charmap_t *charmap); +extern void numeric_output (struct localedef_t *locale, + const struct charmap_t *charmap, + const char *output_path); + +/* Handle LC_MESSAGES category. */ +extern void messages_read (struct linereader *ldfile, + struct localedef_t *result, + const struct charmap_t *charmap, + const char *repertoire_name, + int ignore_content); +extern void messages_finish (struct localedef_t *locale, + const struct charmap_t *charmap); +extern void messages_output (struct localedef_t *locale, + const struct charmap_t *charmap, + const char *output_path); + +/* Handle LC_TIME category. */ +extern void time_read (struct linereader *ldfile, + struct localedef_t *result, + const struct charmap_t *charmap, + const char *repertoire_name, + int ignore_content); +extern void time_finish (struct localedef_t *locale, + const struct charmap_t *charmap); +extern void time_output (struct localedef_t *locale, + const struct charmap_t *charmap, + const char *output_path); + +/* Handle LC_PAPER category. */ +extern void paper_read (struct linereader *ldfile, + struct localedef_t *result, + const struct charmap_t *charmap, + const char *repertoire_name, + int ignore_content); +extern void paper_finish (struct localedef_t *locale, + const struct charmap_t *charmap); +extern void paper_output (struct localedef_t *locale, + const struct charmap_t *charmap, + const char *output_path); + +/* Handle LC_NAME category. */ +extern void name_read (struct linereader *ldfile, + struct localedef_t *result, + const struct charmap_t *charmap, + const char *repertoire_name, + int ignore_content); +extern void name_finish (struct localedef_t *locale, + const struct charmap_t *charmap); +extern void name_output (struct localedef_t *locale, + const struct charmap_t *charmap, + const char *output_path); + +/* Handle LC_ADDRESS category. */ +extern void address_read (struct linereader *ldfile, + struct localedef_t *result, + const struct charmap_t *charmap, + const char *repertoire_name, + int ignore_content); +extern void address_finish (struct localedef_t *locale, + const struct charmap_t *charmap); +extern void address_output (struct localedef_t *locale, + const struct charmap_t *charmap, + const char *output_path); + +/* Handle LC_TELEPHONE category. */ +extern void telephone_read (struct linereader *ldfile, + struct localedef_t *result, + const struct charmap_t *charmap, + const char *repertoire_name, + int ignore_content); +extern void telephone_finish (struct localedef_t *locale, + const struct charmap_t *charmap); +extern void telephone_output (struct localedef_t *locale, + const struct charmap_t *charmap, + const char *output_path); + +/* Handle LC_MEASUREMENT category. */ +extern void measurement_read (struct linereader *ldfile, + struct localedef_t *result, + const struct charmap_t *charmap, + const char *repertoire_name, + int ignore_content); +extern void measurement_finish (struct localedef_t *locale, + const struct charmap_t *charmap); +extern void measurement_output (struct localedef_t *locale, + const struct charmap_t *charmap, + const char *output_path); + +/* Handle LC_IDENTIFICATION category. */ +extern void identification_read (struct linereader *ldfile, + struct localedef_t *result, + const struct charmap_t *charmap, + const char *repertoire_name, + int ignore_content); +extern void identification_finish (struct localedef_t *locale, + const struct charmap_t *charmap); +extern void identification_output (struct localedef_t *locale, + const struct charmap_t *charmap, + const char *output_path); + +#endif /* locfile.h */ diff --git a/REORG.TODO/locale/programs/repertoire.c b/REORG.TODO/locale/programs/repertoire.c new file mode 100644 index 0000000000..61f2c055e7 --- /dev/null +++ b/REORG.TODO/locale/programs/repertoire.c @@ -0,0 +1,524 @@ +/* Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <errno.h> +#include <error.h> +#include <limits.h> +#include <obstack.h> +#include <search.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <stdint.h> + +#include "localedef.h" +#include "linereader.h" +#include "charmap.h" +#include "repertoire.h" +#include "simple-hash.h" + + +/* Simple keyword hashing for the repertoiremap. */ +static const struct keyword_t *repertoiremap_hash (const char *str, + unsigned int len); +static void repertoire_new_char (struct linereader *lr, hash_table *ht, + hash_table *rt, struct obstack *ob, + uint32_t value, const char *from, + const char *to, int decimal_ellipsis); +static int repertoire_compare (const void *p1, const void *p2); + +/* Already known repertoire maps. */ +static void *known; + +/* List of repertoire maps which are not available and which have been + reported to not be. */ +static void *unavailable; + + +struct repertoire_t * +repertoire_read (const char *filename) +{ + struct linereader *repfile; + struct repertoire_t *result; + struct repertoire_t **resultp; + struct repertoire_t search; + int state; + char *from_name = NULL; + char *to_name = NULL; + enum token_t ellipsis = tok_none; + + search.name = filename; + resultp = tfind (&search, &known, &repertoire_compare); + if (resultp != NULL) + return *resultp; + + /* Determine path. */ + repfile = lr_open (filename, repertoiremap_hash); + if (repfile == NULL) + { + if (strchr (filename, '/') == NULL) + { + char *i18npath = getenv ("I18NPATH"); + if (i18npath != NULL && *i18npath != '\0') + { + const size_t pathlen = strlen (i18npath); + char i18npathbuf[pathlen + 1]; + char path[strlen (filename) + 1 + pathlen + + sizeof ("/repertoiremaps/") - 1]; + char *next; + i18npath = memcpy (i18npathbuf, i18npath, pathlen + 1); + + while (repfile == NULL + && (next = strsep (&i18npath, ":")) != NULL) + { + stpcpy (stpcpy (stpcpy (path, next), "/repertoiremaps/"), + filename); + + repfile = lr_open (path, repertoiremap_hash); + + if (repfile == NULL) + { + stpcpy (stpcpy (stpcpy (path, next), "/"), filename); + + repfile = lr_open (path, repertoiremap_hash); + } + } + } + + if (repfile == NULL) + { + /* Look in the systems charmap directory. */ + char *buf = xmalloc (strlen (filename) + 1 + + sizeof (REPERTOIREMAP_PATH)); + + stpcpy (stpcpy (stpcpy (buf, REPERTOIREMAP_PATH), "/"), + filename); + repfile = lr_open (buf, repertoiremap_hash); + + free (buf); + } + } + + if (repfile == NULL) + return NULL; + } + + /* We don't want symbolic names in string to be translated. */ + repfile->translate_strings = 0; + + /* Allocate room for result. */ + result = (struct repertoire_t *) xmalloc (sizeof (struct repertoire_t)); + memset (result, '\0', sizeof (struct repertoire_t)); + + result->name = xstrdup (filename); + +#define obstack_chunk_alloc malloc +#define obstack_chunk_free free + obstack_init (&result->mem_pool); + + if (init_hash (&result->char_table, 256) + || init_hash (&result->reverse_table, 256) + || init_hash (&result->seq_table, 256)) + { + free (result); + return NULL; + } + + /* We use a state machine to describe the charmap description file + format. */ + state = 1; + while (1) + { + /* What's on? */ + struct token *now = lr_token (repfile, NULL, NULL, NULL, verbose); + enum token_t nowtok = now->tok; + struct token *arg; + + if (nowtok == tok_eof) + break; + + switch (state) + { + case 1: + /* We haven't yet read any character definition. This is where + we accept escape_char and comment_char definitions. */ + if (nowtok == tok_eol) + /* Ignore empty lines. */ + continue; + + if (nowtok == tok_escape_char || nowtok == tok_comment_char) + { + /* We know that we need an argument. */ + arg = lr_token (repfile, NULL, NULL, NULL, verbose); + + if (arg->tok != tok_ident) + { + lr_error (repfile, _("syntax error in prolog: %s"), + _("bad argument")); + + lr_ignore_rest (repfile, 0); + continue; + } + + if (arg->val.str.lenmb != 1) + { + lr_error (repfile, _("\ +argument to <%s> must be a single character"), + nowtok == tok_escape_char ? "escape_char" + : "comment_char"); + + lr_ignore_rest (repfile, 0); + continue; + } + + if (nowtok == tok_escape_char) + repfile->escape_char = *arg->val.str.startmb; + else + repfile->comment_char = *arg->val.str.startmb; + + lr_ignore_rest (repfile, 1); + continue; + } + + if (nowtok == tok_charids) + { + lr_ignore_rest (repfile, 1); + + state = 2; + continue; + } + + /* Otherwise we start reading the character definitions. */ + state = 2; + /* FALLTHROUGH */ + + case 2: + /* We are now are in the body. Each line + must have the format "%s %s %s\n" or "%s...%s %s %s\n". */ + if (nowtok == tok_eol) + /* Ignore empty lines. */ + continue; + + if (nowtok == tok_end) + { + state = 90; + continue; + } + + if (nowtok != tok_bsymbol) + { + lr_error (repfile, + _("syntax error in repertoire map definition: %s"), + _("no symbolic name given")); + + lr_ignore_rest (repfile, 0); + continue; + } + + /* If the previous line was not completely correct free the + used memory. */ + if (from_name != NULL) + obstack_free (&result->mem_pool, from_name); + + from_name = (char *) obstack_copy0 (&result->mem_pool, + now->val.str.startmb, + now->val.str.lenmb); + to_name = NULL; + + state = 3; + continue; + + case 3: + /* We have two possibilities: We can see an ellipsis or an + encoding value. */ + if (nowtok == tok_ellipsis3 || nowtok == tok_ellipsis4 + || nowtok == tok_ellipsis2) + { + ellipsis = nowtok; + state = 4; + continue; + } + /* FALLTHROUGH */ + + case 5: + /* We expect a value of the form <Uxxxx> or <Uxxxxxxxx> where + the xxx mean a hexadecimal value. */ + state = 2; + + errno = 0; + if (nowtok != tok_ucs4) + { + lr_error (repfile, + _("syntax error in repertoire map definition: %s"), + _("no <Uxxxx> or <Uxxxxxxxx> value given")); + + lr_ignore_rest (repfile, 0); + continue; + } + + /* We've found a new valid definition. */ + repertoire_new_char (repfile, &result->char_table, + &result->reverse_table, &result->mem_pool, + now->val.ucs4, from_name, to_name, + ellipsis != tok_ellipsis2); + + /* Ignore the rest of the line. */ + lr_ignore_rest (repfile, 0); + + from_name = NULL; + to_name = NULL; + + continue; + + case 4: + if (nowtok != tok_bsymbol) + { + lr_error (repfile, + _("syntax error in repertoire map definition: %s"), + _("no symbolic name given for end of range")); + + lr_ignore_rest (repfile, 0); + state = 2; + continue; + } + + /* Copy the to-name in a safe place. */ + to_name = (char *) obstack_copy0 (&result->mem_pool, + repfile->token.val.str.startmb, + repfile->token.val.str.lenmb); + + state = 5; + continue; + + case 90: + if (nowtok != tok_charids) + lr_error (repfile, _("\ +%1$s: definition does not end with `END %1$s'"), "CHARIDS"); + + lr_ignore_rest (repfile, nowtok == tok_charids); + break; + } + + break; + } + + if (state != 2 && state != 90 && !be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("%s: premature end of file"), + repfile->fname)); + + lr_close (repfile); + + if (tsearch (result, &known, &repertoire_compare) == NULL) + /* Something went wrong. */ + WITH_CUR_LOCALE (error (0, errno, _("cannot save new repertoire map"))); + + return result; +} + + +void +repertoire_complain (const char *name) +{ + if (tfind (name, &unavailable, (__compar_fn_t) strcmp) == NULL) + { + WITH_CUR_LOCALE (error (0, errno, _("\ +repertoire map file `%s' not found"), name)); + + /* Remember that we reported this map. */ + tsearch (name, &unavailable, (__compar_fn_t) strcmp); + } +} + + +static int +repertoire_compare (const void *p1, const void *p2) +{ + struct repertoire_t *r1 = (struct repertoire_t *) p1; + struct repertoire_t *r2 = (struct repertoire_t *) p2; + + return strcmp (r1->name, r2->name); +} + + +static const struct keyword_t * +repertoiremap_hash (const char *str, unsigned int len) +{ + static const struct keyword_t wordlist[] = + { + {"escape_char", tok_escape_char, 0}, + {"comment_char", tok_comment_char, 0}, + {"CHARIDS", tok_charids, 0}, + {"END", tok_end, 0}, + }; + + if (len == 11 && memcmp (wordlist[0].name, str, 11) == 0) + return &wordlist[0]; + if (len == 12 && memcmp (wordlist[1].name, str, 12) == 0) + return &wordlist[1]; + if (len == 7 && memcmp (wordlist[2].name, str, 7) == 0) + return &wordlist[2]; + if (len == 3 && memcmp (wordlist[3].name, str, 3) == 0) + return &wordlist[3]; + + return NULL; +} + + +static void +repertoire_new_char (struct linereader *lr, hash_table *ht, hash_table *rt, + struct obstack *ob, uint32_t value, const char *from, + const char *to, int decimal_ellipsis) +{ + char *from_end; + char *to_end; + const char *cp; + char *buf = NULL; + int prefix_len, len1, len2; + unsigned long int from_nr, to_nr, cnt; + + if (to == NULL) + { + insert_entry (ht, from, strlen (from), + (void *) (unsigned long int) value); + /* Please note that it isn't a bug if a symbol is defined more + than once. All later definitions are simply discarded. */ + + insert_entry (rt, obstack_copy (ob, &value, sizeof (value)), + sizeof (value), (void *) from); + + return; + } + + /* We have a range: the names must have names with equal prefixes + and an equal number of digits, where the second number is greater + or equal than the first. */ + len1 = strlen (from); + len2 = strlen (to); + + if (len1 != len2) + { + invalid_range: + lr_error (lr, _("invalid names for character range")); + return; + } + + cp = &from[len1 - 1]; + if (decimal_ellipsis) + while (isdigit (*cp) && cp >= from) + --cp; + else + while (isxdigit (*cp) && cp >= from) + { + if (!isdigit (*cp) && !isupper (*cp)) + lr_error (lr, _("\ +hexadecimal range format should use only capital characters")); + --cp; + } + + prefix_len = (cp - from) + 1; + + if (cp == &from[len1 - 1] || strncmp (from, to, prefix_len) != 0) + goto invalid_range; + + errno = 0; + from_nr = strtoul (&from[prefix_len], &from_end, decimal_ellipsis ? 10 : 16); + if (*from_end != '\0' || (from_nr == ULONG_MAX && errno == ERANGE) + || ((to_nr = strtoul (&to[prefix_len], &to_end, + decimal_ellipsis ? 10 : 16)) == ULONG_MAX + && errno == ERANGE) + || *to_end != '\0') + { + lr_error (lr, _("<%s> and <%s> are invalid names for range"), + from, to); + return; + } + + if (from_nr > to_nr) + { + lr_error (lr, _("upper limit in range is smaller than lower limit")); + return; + } + + for (cnt = from_nr; cnt <= to_nr; ++cnt) + { + uint32_t this_value = value + (cnt - from_nr); + + obstack_printf (ob, decimal_ellipsis ? "%.*s%0*ld" : "%.*s%0*lX", + prefix_len, from, len1 - prefix_len, cnt); + obstack_1grow (ob, '\0'); + + insert_entry (ht, buf, len1, + (void *) (unsigned long int) this_value); + /* Please note we don't examine the return value since it is no error + if we have two definitions for a symbol. */ + + insert_entry (rt, obstack_copy (ob, &this_value, sizeof (this_value)), + sizeof (this_value), (void *) from); + } +} + + +uint32_t +repertoire_find_value (const struct repertoire_t *rep, const char *name, + size_t len) +{ + void *result; + + if (rep == NULL) + return ILLEGAL_CHAR_VALUE; + + if (find_entry ((hash_table *) &rep->char_table, name, len, &result) < 0) + return ILLEGAL_CHAR_VALUE; + + return (uint32_t) ((unsigned long int) result); +} + + +const char * +repertoire_find_symbol (const struct repertoire_t *rep, uint32_t ucs) +{ + void *result; + + if (rep == NULL) + return NULL; + + if (find_entry ((hash_table *) &rep->reverse_table, &ucs, sizeof (ucs), + &result) < 0) + return NULL; + + return (const char *) result; +} + + +struct charseq * +repertoire_find_seq (const struct repertoire_t *rep, uint32_t ucs) +{ + void *result; + + if (rep == NULL) + return NULL; + + if (find_entry ((hash_table *) &rep->seq_table, &ucs, sizeof (ucs), + &result) < 0) + return NULL; + + return (struct charseq *) result; +} diff --git a/REORG.TODO/locale/programs/repertoire.h b/REORG.TODO/locale/programs/repertoire.h new file mode 100644 index 0000000000..f07ffcf1f4 --- /dev/null +++ b/REORG.TODO/locale/programs/repertoire.h @@ -0,0 +1,64 @@ +/* Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifndef _REPERTOIREMAP_H +#define _REPERTOIREMAP_H 1 + +#include <obstack.h> +#include <stdint.h> + +#include "charmap.h" +#include "simple-hash.h" + +struct repertoire_t +{ + const char *name; + struct obstack mem_pool; + hash_table char_table; + hash_table reverse_table; + hash_table seq_table; +}; + + +/* We need one value to mark the error case. Let's use 0xffffffff. + I.e., it is placed in the last page of ISO 10646. For now only the + first is used and we have plenty of room. */ +#define ILLEGAL_CHAR_VALUE ((uint32_t) 0xffffffffu) + +/* Another value is needed to signal that a value is not yet determined. */ +#define UNINITIALIZED_CHAR_VALUE ((uint32_t) 0xfffffffeu) + + +/* Prototypes for repertoire map handling functions. */ +extern struct repertoire_t *repertoire_read (const char *filename); + +/* Report missing repertoire map. */ +extern void repertoire_complain (const char *name); + +/* Return UCS4 value of character with given NAME. */ +extern uint32_t repertoire_find_value (const struct repertoire_t *repertoire, + const char *name, size_t len); + +/* Return symbol for given UCS4 value. */ +extern const char *repertoire_find_symbol (const struct repertoire_t *repertoire, + uint32_t ucs); + +/* Query the has table to memoize mapping from UCS4 to byte sequences. */ +extern struct charseq *repertoire_find_seq (const struct repertoire_t *rep, + uint32_t ucs); + +#endif /* repertoiremap.h */ diff --git a/REORG.TODO/locale/programs/simple-hash.c b/REORG.TODO/locale/programs/simple-hash.c new file mode 100644 index 0000000000..5e62e249a6 --- /dev/null +++ b/REORG.TODO/locale/programs/simple-hash.c @@ -0,0 +1,291 @@ +/* Implement simple hashing table with string based keys. + Copyright (C) 1994-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Written by Ulrich Drepper <drepper@gnu.ai.mit.edu>, October 1994. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <inttypes.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdint.h> +#include <sys/types.h> + +#include <obstack.h> + +#ifdef HAVE_VALUES_H +# include <values.h> +#endif + +#include "simple-hash.h" + +#define obstack_chunk_alloc malloc +#define obstack_chunk_free free + +#ifndef BITSPERBYTE +# define BITSPERBYTE 8 +#endif + +#define hashval_t uint32_t +#include "hashval.h" + +#include <programs/xmalloc.h> + +typedef struct hash_entry +{ + unsigned long used; + const void *key; + size_t keylen; + void *data; + struct hash_entry *next; +} +hash_entry; + +/* Prototypes for local functions. */ +static void insert_entry_2 (hash_table *htab, const void *key, size_t keylen, + unsigned long hval, size_t idx, void *data); +static size_t lookup (const hash_table *htab, const void *key, size_t keylen, + unsigned long int hval); +static int is_prime (unsigned long int candidate); + + +int +init_hash (hash_table *htab, unsigned long int init_size) +{ + /* We need the size to be a prime. */ + init_size = next_prime (init_size); + + /* Initialize the data structure. */ + htab->size = init_size; + htab->filled = 0; + htab->first = NULL; + htab->table = (void *) xcalloc (init_size + 1, sizeof (hash_entry)); + if (htab->table == NULL) + return -1; + + obstack_init (&htab->mem_pool); + + return 0; +} + + +int +delete_hash (hash_table *htab) +{ + free (htab->table); + obstack_free (&htab->mem_pool, NULL); + return 0; +} + + +int +insert_entry (hash_table *htab, const void *key, size_t keylen, void *data) +{ + unsigned long int hval = compute_hashval (key, keylen); + hash_entry *table = (hash_entry *) htab->table; + size_t idx = lookup (htab, key, keylen, hval); + + if (table[idx].used) + /* We don't want to overwrite the old value. */ + return -1; + else + { + /* An empty bucket has been found. */ + insert_entry_2 (htab, obstack_copy (&htab->mem_pool, key, keylen), + keylen, hval, idx, data); + return 0; + } +} + +static void +insert_entry_2 (hash_table *htab, const void *key, size_t keylen, + unsigned long int hval, size_t idx, void *data) +{ + hash_entry *table = (hash_entry *) htab->table; + + table[idx].used = hval; + table[idx].key = key; + table[idx].keylen = keylen; + table[idx].data = data; + + /* List the new value in the list. */ + if ((hash_entry *) htab->first == NULL) + { + table[idx].next = &table[idx]; + htab->first = &table[idx]; + } + else + { + table[idx].next = ((hash_entry *) htab->first)->next; + ((hash_entry *) htab->first)->next = &table[idx]; + htab->first = &table[idx]; + } + + ++htab->filled; + if (100 * htab->filled > 75 * htab->size) + { + /* Table is filled more than 75%. Resize the table. + Experiments have shown that for best performance, this threshold + must lie between 40% and 85%. */ + unsigned long int old_size = htab->size; + + htab->size = next_prime (htab->size * 2); + htab->filled = 0; + htab->first = NULL; + htab->table = (void *) xcalloc (1 + htab->size, sizeof (hash_entry)); + + for (idx = 1; idx <= old_size; ++idx) + if (table[idx].used) + insert_entry_2 (htab, table[idx].key, table[idx].keylen, + table[idx].used, + lookup (htab, table[idx].key, table[idx].keylen, + table[idx].used), + table[idx].data); + + free (table); + } +} + + +int +find_entry (const hash_table *htab, const void *key, size_t keylen, + void **result) +{ + hash_entry *table = (hash_entry *) htab->table; + size_t idx = lookup (htab, key, keylen, compute_hashval (key, keylen)); + + if (table[idx].used == 0) + return -1; + + *result = table[idx].data; + return 0; +} + + +int +set_entry (hash_table *htab, const void *key, size_t keylen, void *newval) +{ + hash_entry *table = (hash_entry *) htab->table; + size_t idx = lookup (htab, key, keylen, compute_hashval (key, keylen)); + + if (table[idx].used == 0) + return -1; + + table[idx].data = newval; + return 0; +} + + +int +iterate_table (const hash_table *htab, void **ptr, const void **key, + size_t *keylen, void **data) +{ + if (*ptr == NULL) + { + if (htab->first == NULL) + return -1; + *ptr = (void *) ((hash_entry *) htab->first)->next; + } + else + { + if (*ptr == htab->first) + return -1; + *ptr = (void *) (((hash_entry *) *ptr)->next); + } + + *key = ((hash_entry *) *ptr)->key; + *keylen = ((hash_entry *) *ptr)->keylen; + *data = ((hash_entry *) *ptr)->data; + return 0; +} + + +/* References: + [Aho,Sethi,Ullman] Compilers: Principles, Techniques and Tools, 1986 + [Knuth] The Art of Computer Programming, part3 (6.4) */ + +static size_t +lookup (const hash_table *htab, const void *key, size_t keylen, + unsigned long int hval) +{ + unsigned long int hash; + size_t idx; + hash_entry *table = (hash_entry *) htab->table; + + /* First hash function: simply take the modul but prevent zero. */ + hash = 1 + hval % htab->size; + + idx = hash; + + if (table[idx].used) + { + if (table[idx].used == hval && table[idx].keylen == keylen + && memcmp (table[idx].key, key, keylen) == 0) + return idx; + + /* Second hash function as suggested in [Knuth]. */ + hash = 1 + hval % (htab->size - 2); + + do + { + if (idx <= hash) + idx = htab->size + idx - hash; + else + idx -= hash; + + /* If entry is found use it. */ + if (table[idx].used == hval && table[idx].keylen == keylen + && memcmp (table[idx].key, key, keylen) == 0) + return idx; + } + while (table[idx].used); + } + return idx; +} + + +unsigned long int +next_prime (unsigned long int seed) +{ + /* Make it definitely odd. */ + seed |= 1; + + while (!is_prime (seed)) + seed += 2; + + return seed; +} + + +static int +is_prime (unsigned long int candidate) +{ + /* No even number and none less than 10 will be passed here. */ + unsigned long int divn = 3; + unsigned long int sq = divn * divn; + + while (sq < candidate && candidate % divn != 0) + { + ++divn; + sq += 4 * divn; + ++divn; + } + + return candidate % divn != 0; +} diff --git a/REORG.TODO/locale/programs/simple-hash.h b/REORG.TODO/locale/programs/simple-hash.h new file mode 100644 index 0000000000..92ce9508e9 --- /dev/null +++ b/REORG.TODO/locale/programs/simple-hash.h @@ -0,0 +1,53 @@ +/* Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@gnu.org>, 1995. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifndef _SIMPLE_HASH_H +#define _SIMPLE_HASH_H + +#include <inttypes.h> +#include <obstack.h> +#include <stdint.h> + +typedef struct hash_table +{ + unsigned long int size; + unsigned long int filled; + void *first; + void *table; + struct obstack mem_pool; +} +hash_table; + + +extern int init_hash (hash_table *htab, unsigned long int init_size) __THROW; +extern int delete_hash (hash_table *htab) __THROW; +extern int insert_entry (hash_table *htab, const void *key, size_t keylen, + void *data) __THROW; +extern int find_entry (const hash_table *htab, const void *key, size_t keylen, + void **result) __THROW; +extern int set_entry (hash_table *htab, const void *key, size_t keylen, + void *newval) __THROW; + +extern int iterate_table (const hash_table *htab, void **ptr, + const void **key, size_t *keylen, void **data) + __THROW; + +extern uint32_t compute_hashval (const void *key, size_t keylen) + __THROW; +extern unsigned long int next_prime (unsigned long int seed) __THROW; + +#endif /* simple-hash.h */ diff --git a/REORG.TODO/locale/programs/xmalloc.c b/REORG.TODO/locale/programs/xmalloc.c new file mode 100644 index 0000000000..92468b8c7f --- /dev/null +++ b/REORG.TODO/locale/programs/xmalloc.c @@ -0,0 +1,106 @@ +/* xmalloc.c -- malloc with out of memory checking + Copyright (C) 1990-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#define VOID void + +#include <sys/types.h> + +#if STDC_HEADERS || _LIBC +#include <stdlib.h> +static VOID *fixup_null_alloc (size_t n) __THROW; +VOID *xmalloc (size_t n) __THROW; +VOID *xcalloc (size_t n, size_t s) __THROW; +VOID *xrealloc (VOID *p, size_t n) __THROW; +#else +VOID *calloc (); +VOID *malloc (); +VOID *realloc (); +void free (); +#endif + +#include <libintl.h> +#include "error.h" + +#ifndef _ +# define _(str) gettext (str) +#endif + +#ifndef EXIT_FAILURE +#define EXIT_FAILURE 4 +#endif + +/* Exit value when the requested amount of memory is not available. + The caller may set it to some other value. */ +int xmalloc_exit_failure = EXIT_FAILURE; + +static VOID * +fixup_null_alloc (size_t n) +{ + VOID *p; + + p = 0; + if (n == 0) + p = malloc ((size_t) 1); + if (p == 0) + error (xmalloc_exit_failure, 0, _("memory exhausted")); + return p; +} + +/* Allocate N bytes of memory dynamically, with error checking. */ + +VOID * +xmalloc (size_t n) +{ + VOID *p; + + p = malloc (n); + if (p == 0) + p = fixup_null_alloc (n); + return p; +} + +/* Allocate memory for N elements of S bytes, with error checking. */ + +VOID * +xcalloc (size_t n, size_t s) +{ + VOID *p; + + p = calloc (n, s); + if (p == 0) + p = fixup_null_alloc (n); + return p; +} + +/* Change the size of an allocated block of memory P to N bytes, + with error checking. + If P is NULL, run xmalloc. */ + +VOID * +xrealloc (VOID *p, size_t n) +{ + if (p == 0) + return xmalloc (n); + p = realloc (p, n); + if (p == 0) + p = fixup_null_alloc (n); + return p; +} diff --git a/REORG.TODO/locale/programs/xstrdup.c b/REORG.TODO/locale/programs/xstrdup.c new file mode 100644 index 0000000000..dcd89b160f --- /dev/null +++ b/REORG.TODO/locale/programs/xstrdup.c @@ -0,0 +1,36 @@ +/* xstrdup.c -- copy a string with out of memory checking + Copyright (C) 1990-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#if defined STDC_HEADERS || defined HAVE_STRING_H || _LIBC +# include <string.h> +#else +# include <strings.h> +#endif +void *xmalloc (size_t n) __THROW; +char *xstrdup (char *string) __THROW; + +/* Return a newly allocated copy of STRING. */ + +char * +xstrdup (char *string) +{ + return strcpy (xmalloc (strlen (string) + 1), string); +} diff --git a/REORG.TODO/locale/setlocale.c b/REORG.TODO/locale/setlocale.c new file mode 100644 index 0000000000..19acc4b2c7 --- /dev/null +++ b/REORG.TODO/locale/setlocale.c @@ -0,0 +1,534 @@ +/* Copyright (C) 1991-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <alloca.h> +#include <argz.h> +#include <errno.h> +#include <libc-lock.h> +#include <locale.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "localeinfo.h" + +#ifdef NL_CURRENT_INDIRECT + +/* For each category declare a special external symbol + _nl_current_CATEGORY_used with a weak reference. + This symbol will is defined in lc-CATEGORY.c and will be linked in + if anything uses _nl_current_CATEGORY (also defined in that module). + Also use a weak reference for the _nl_current_CATEGORY thread variable. */ + +# define DEFINE_CATEGORY(category, category_name, items, a) \ + extern char _nl_current_##category##_used; \ + weak_extern (_nl_current_##category##_used) \ + weak_extern (_nl_current_##category) +# include "categories.def" +# undef DEFINE_CATEGORY + +/* Now define a table of flags based on those special weak symbols' values. + _nl_current_used[CATEGORY] will be zero if _nl_current_CATEGORY is not + linked in. */ +static char *const _nl_current_used[] = + { +# define DEFINE_CATEGORY(category, category_name, items, a) \ + [category] = &_nl_current_##category##_used, +# include "categories.def" +# undef DEFINE_CATEGORY + }; + +# define CATEGORY_USED(category) (_nl_current_used[category] != 0) + +#else + +/* The shared library always loads all the categories, + and the current global settings are kept in _nl_global_locale. */ + +# define CATEGORY_USED(category) (1) + +#endif + + +/* Define an array of category names (also the environment variable names). */ +const union catnamestr_t _nl_category_names attribute_hidden = + { + { +#define DEFINE_CATEGORY(category, category_name, items, a) \ + category_name, +#include "categories.def" +#undef DEFINE_CATEGORY + } + }; + +const uint8_t _nl_category_name_idxs[__LC_LAST] attribute_hidden = + { +#define DEFINE_CATEGORY(category, category_name, items, a) \ + [category] = offsetof (union catnamestr_t, CATNAMEMF (__LINE__)), +#include "categories.def" +#undef DEFINE_CATEGORY + }; + +/* An array of their lengths, for convenience. */ +const uint8_t _nl_category_name_sizes[] attribute_hidden = + { +#define DEFINE_CATEGORY(category, category_name, items, a) \ + [category] = sizeof (category_name) - 1, +#include "categories.def" +#undef DEFINE_CATEGORY + [LC_ALL] = sizeof ("LC_ALL") - 1 + }; + + +#ifdef NL_CURRENT_INDIRECT +# define WEAK_POSTLOAD(postload) weak_extern (postload) +#else +# define WEAK_POSTLOAD(postload) /* Need strong refs in static linking. */ +#endif + +/* Declare the postload functions used below. */ +#undef NO_POSTLOAD +#define NO_POSTLOAD _nl_postload_ctype /* Harmless thing known to exist. */ +#define DEFINE_CATEGORY(category, category_name, items, postload) \ +extern void postload (void); WEAK_POSTLOAD (postload) +#include "categories.def" +#undef DEFINE_CATEGORY +#undef NO_POSTLOAD + +/* Define an array indexed by category of postload functions to call after + loading and installing that category's data. */ +static void (*const _nl_category_postload[]) (void) = + { +#define DEFINE_CATEGORY(category, category_name, items, postload) \ + [category] = postload, +#include "categories.def" +#undef DEFINE_CATEGORY + }; + + +/* Lock for protecting global data. */ +__libc_rwlock_define_initialized (, __libc_setlocale_lock attribute_hidden) + +/* Defined in loadmsgcat.c. */ +extern int _nl_msg_cat_cntr; + + +/* Use this when we come along an error. */ +#define ERROR_RETURN \ + do { \ + __set_errno (EINVAL); \ + return NULL; \ + } while (0) + + +/* Construct a new composite name. */ +static char * +new_composite_name (int category, const char *newnames[__LC_LAST]) +{ + size_t last_len = 0; + size_t cumlen = 0; + int i; + char *new, *p; + int same = 1; + + for (i = 0; i < __LC_LAST; ++i) + if (i != LC_ALL) + { + const char *name = (category == LC_ALL ? newnames[i] : + category == i ? newnames[0] : + _nl_global_locale.__names[i]); + last_len = strlen (name); + cumlen += _nl_category_name_sizes[i] + 1 + last_len + 1; + if (same && name != newnames[0] && strcmp (name, newnames[0]) != 0) + same = 0; + } + + if (same) + { + /* All the categories use the same name. */ + if (strcmp (newnames[0], _nl_C_name) == 0 + || strcmp (newnames[0], _nl_POSIX_name) == 0) + return (char *) _nl_C_name; + + new = malloc (last_len + 1); + + return new == NULL ? NULL : memcpy (new, newnames[0], last_len + 1); + } + + new = malloc (cumlen); + if (new == NULL) + return NULL; + p = new; + for (i = 0; i < __LC_LAST; ++i) + if (i != LC_ALL) + { + /* Add "CATEGORY=NAME;" to the string. */ + const char *name = (category == LC_ALL ? newnames[i] : + category == i ? newnames[0] : + _nl_global_locale.__names[i]); + p = __stpcpy (p, _nl_category_names.str + _nl_category_name_idxs[i]); + *p++ = '='; + p = __stpcpy (p, name); + *p++ = ';'; + } + p[-1] = '\0'; /* Clobber the last ';'. */ + return new; +} + + +/* Put NAME in _nl_global_locale.__names. */ +static void +setname (int category, const char *name) +{ + if (_nl_global_locale.__names[category] == name) + return; + + if (_nl_global_locale.__names[category] != _nl_C_name) + free ((char *) _nl_global_locale.__names[category]); + + _nl_global_locale.__names[category] = name; +} + +/* Put DATA in *_nl_current[CATEGORY]. */ +static void +setdata (int category, struct __locale_data *data) +{ + if (CATEGORY_USED (category)) + { + _nl_global_locale.__locales[category] = data; + if (_nl_category_postload[category]) + (*_nl_category_postload[category]) (); + } +} + +char * +setlocale (int category, const char *locale) +{ + char *locale_path; + size_t locale_path_len; + const char *locpath_var; + char *composite; + + /* Sanity check for CATEGORY argument. */ + if (__builtin_expect (category, 0) < 0 + || __builtin_expect (category, 0) >= __LC_LAST) + ERROR_RETURN; + + /* Does user want name of current locale? */ + if (locale == NULL) + return (char *) _nl_global_locale.__names[category]; + + /* Protect global data. */ + __libc_rwlock_wrlock (__libc_setlocale_lock); + + if (strcmp (locale, _nl_global_locale.__names[category]) == 0) + { + /* Changing to the same thing. */ + __libc_rwlock_unlock (__libc_setlocale_lock); + + return (char *) _nl_global_locale.__names[category]; + } + + /* We perhaps really have to load some data. So we determine the + path in which to look for the data now. The environment variable + `LOCPATH' must only be used when the binary has no SUID or SGID + bit set. If using the default path, we tell _nl_find_locale + by passing null and it can check the canonical locale archive. */ + locale_path = NULL; + locale_path_len = 0; + + locpath_var = getenv ("LOCPATH"); + if (locpath_var != NULL && locpath_var[0] != '\0') + { + if (__argz_create_sep (locpath_var, ':', + &locale_path, &locale_path_len) != 0 + || __argz_add_sep (&locale_path, &locale_path_len, + _nl_default_locale_path, ':') != 0) + { + __libc_rwlock_unlock (__libc_setlocale_lock); + return NULL; + } + } + + if (category == LC_ALL) + { + /* The user wants to set all categories. The desired locales + for the individual categories can be selected by using a + composite locale name. This is a semi-colon separated list + of entries of the form `CATEGORY=VALUE'. */ + const char *newnames[__LC_LAST]; + struct __locale_data *newdata[__LC_LAST]; + /* Copy of the locale argument, for in-place splitting. */ + char *locale_copy = NULL; + + /* Set all name pointers to the argument name. */ + for (category = 0; category < __LC_LAST; ++category) + if (category != LC_ALL) + newnames[category] = (char *) locale; + + if (__glibc_unlikely (strchr (locale, ';') != NULL)) + { + /* This is a composite name. Make a copy and split it up. */ + locale_copy = __strdup (locale); + if (__glibc_unlikely (locale_copy == NULL)) + { + __libc_rwlock_unlock (__libc_setlocale_lock); + return NULL; + } + char *np = locale_copy; + char *cp; + int cnt; + + while ((cp = strchr (np, '=')) != NULL) + { + for (cnt = 0; cnt < __LC_LAST; ++cnt) + if (cnt != LC_ALL + && (size_t) (cp - np) == _nl_category_name_sizes[cnt] + && (memcmp (np, (_nl_category_names.str + + _nl_category_name_idxs[cnt]), cp - np) + == 0)) + break; + + if (cnt == __LC_LAST) + { + error_return: + __libc_rwlock_unlock (__libc_setlocale_lock); + free (locale_copy); + + /* Bogus category name. */ + ERROR_RETURN; + } + + /* Found the category this clause sets. */ + newnames[cnt] = ++cp; + cp = strchr (cp, ';'); + if (cp != NULL) + { + /* Examine the next clause. */ + *cp = '\0'; + np = cp + 1; + } + else + /* This was the last clause. We are done. */ + break; + } + + for (cnt = 0; cnt < __LC_LAST; ++cnt) + if (cnt != LC_ALL && newnames[cnt] == locale) + /* The composite name did not specify all categories. */ + goto error_return; + } + + /* Load the new data for each category. */ + while (category-- > 0) + if (category != LC_ALL) + { + newdata[category] = _nl_find_locale (locale_path, locale_path_len, + category, + &newnames[category]); + + if (newdata[category] == NULL) + { +#ifdef NL_CURRENT_INDIRECT + if (newnames[category] == _nl_C_name) + /* Null because it's the weak value of _nl_C_LC_FOO. */ + continue; +#endif + break; + } + + /* We must not simply free a global locale since we have + no control over the usage. So we mark it as + un-deletable. And yes, the 'if' is needed, the data + might be in read-only memory. */ + if (newdata[category]->usage_count != UNDELETABLE) + newdata[category]->usage_count = UNDELETABLE; + + /* Make a copy of locale name. */ + if (newnames[category] != _nl_C_name) + { + if (strcmp (newnames[category], + _nl_global_locale.__names[category]) == 0) + newnames[category] = _nl_global_locale.__names[category]; + else + { + newnames[category] = __strdup (newnames[category]); + if (newnames[category] == NULL) + break; + } + } + } + + /* Create new composite name. */ + composite = (category >= 0 + ? NULL : new_composite_name (LC_ALL, newnames)); + if (composite != NULL) + { + /* Now we have loaded all the new data. Put it in place. */ + for (category = 0; category < __LC_LAST; ++category) + if (category != LC_ALL) + { + setdata (category, newdata[category]); + setname (category, newnames[category]); + } + setname (LC_ALL, composite); + + /* We successfully loaded a new locale. Let the message catalog + functions know about this. */ + ++_nl_msg_cat_cntr; + } + else + for (++category; category < __LC_LAST; ++category) + if (category != LC_ALL && newnames[category] != _nl_C_name + && newnames[category] != _nl_global_locale.__names[category]) + free ((char *) newnames[category]); + + /* Critical section left. */ + __libc_rwlock_unlock (__libc_setlocale_lock); + + /* Free the resources. */ + free (locale_path); + free (locale_copy); + + return composite; + } + else + { + struct __locale_data *newdata = NULL; + const char *newname[1] = { locale }; + + if (CATEGORY_USED (category)) + { + /* Only actually load the data if anything will use it. */ + newdata = _nl_find_locale (locale_path, locale_path_len, category, + &newname[0]); + if (newdata == NULL) + goto abort_single; + + /* We must not simply free a global locale since we have no + control over the usage. So we mark it as un-deletable. + + Note: do not remove the `if', it's necessary to cope with + the builtin locale data. */ + if (newdata->usage_count != UNDELETABLE) + newdata->usage_count = UNDELETABLE; + } + + /* Make a copy of locale name. */ + if (newname[0] != _nl_C_name) + { + newname[0] = __strdup (newname[0]); + if (newname[0] == NULL) + goto abort_single; + } + + /* Create new composite name. */ + composite = new_composite_name (category, newname); + if (composite == NULL) + { + if (newname[0] != _nl_C_name) + free ((char *) newname[0]); + + /* Say that we don't have any data loaded. */ + abort_single: + newname[0] = NULL; + } + else + { + if (CATEGORY_USED (category)) + setdata (category, newdata); + + setname (category, newname[0]); + setname (LC_ALL, composite); + + /* We successfully loaded a new locale. Let the message catalog + functions know about this. */ + ++_nl_msg_cat_cntr; + } + + /* Critical section left. */ + __libc_rwlock_unlock (__libc_setlocale_lock); + + /* Free the resources (the locale path variable. */ + free (locale_path); + + return (char *) newname[0]; + } +} +libc_hidden_def (setlocale) + +static void __libc_freeres_fn_section +free_category (int category, + struct __locale_data *here, struct __locale_data *c_data) +{ + struct loaded_l10nfile *runp = _nl_locale_file_list[category]; + + /* If this category is already "C" don't do anything. */ + if (here != c_data) + { + /* We have to be prepared that sometime later we still + might need the locale information. */ + setdata (category, c_data); + setname (category, _nl_C_name); + } + + while (runp != NULL) + { + struct loaded_l10nfile *curr = runp; + struct __locale_data *data = (struct __locale_data *) runp->data; + + if (data != NULL && data != c_data) + _nl_unload_locale (data); + runp = runp->next; + free ((char *) curr->filename); + free (curr); + } +} + +/* This is called from iconv/gconv_db.c's free_mem, as locales must + be freed before freeing gconv steps arrays. */ +void __libc_freeres_fn_section +_nl_locale_subfreeres (void) +{ +#ifdef NL_CURRENT_INDIRECT + /* We don't use the loop because we want to have individual weak + symbol references here. */ +# define DEFINE_CATEGORY(category, category_name, items, a) \ + if (CATEGORY_USED (category)) \ + { \ + extern struct __locale_data _nl_C_##category; \ + weak_extern (_nl_C_##category) \ + free_category (category, *_nl_current_##category, &_nl_C_##category); \ + } +# include "categories.def" +# undef DEFINE_CATEGORY +#else + int category; + + for (category = 0; category < __LC_LAST; ++category) + if (category != LC_ALL) + free_category (category, _NL_CURRENT_DATA (category), + _nl_C_locobj.__locales[category]); +#endif + + setname (LC_ALL, _nl_C_name); + + /* This frees the data structures associated with the locale archive. + The locales from the archive are not in the file list, so we have + not called _nl_unload_locale on them above. */ + _nl_archive_subfreeres (); +} diff --git a/REORG.TODO/locale/strlen-hash.h b/REORG.TODO/locale/strlen-hash.h new file mode 100644 index 0000000000..d7597edbc2 --- /dev/null +++ b/REORG.TODO/locale/strlen-hash.h @@ -0,0 +1,51 @@ +/* Implements hashing function for string with known length. + Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sys/types.h> + +/* We assume to have `size_t' value with at least 32 bits. */ +#define HASHWORDBITS 32 + + +/* Defines the so called `hashpjw' function by P.J. Weinberger + [see Aho/Sethi/Ullman, COMPILERS: Principles, Techniques and Tools, + 1986, 1987 Bell Telephone Laboratories, Inc.] */ +static size_t hash_string (const char *__str_param, size_t __len); + +static inline size_t +hash_string (const char *str_param, size_t len) +{ + size_t hval, g; + const char *end_str = str_param + len; + + /* Compute the hash value for the given string. */ + hval = len; + while (str_param != end_str) + { + hval <<= 4; + hval += (size_t) *str_param++; + g = hval & ((size_t) 0xf << (HASHWORDBITS - 4)); + if (g != 0) + { + hval ^= g >> (HASHWORDBITS - 8); + hval ^= g; + } + } + return hval; +} diff --git a/REORG.TODO/locale/tst-C-locale.c b/REORG.TODO/locale/tst-C-locale.c new file mode 100644 index 0000000000..4736c045f9 --- /dev/null +++ b/REORG.TODO/locale/tst-C-locale.c @@ -0,0 +1,497 @@ +/* Tests of C and POSIX locale contents. + Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@redhat.com>, 2000. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <ctype.h> +#include <langinfo.h> +#include <limits.h> +#include <locale.h> +#include <stdio.h> +#include <string.h> +#include <wchar.h> +#include <wctype.h> + + +static int +run_test (const char *locname) +{ + struct lconv *lc; + const char *str; + const wchar_t *wstr; + int result = 0; + locale_t loc; + + /* ISO C stuff. */ + lc = localeconv (); + if (lc == NULL) + { + printf ("localeconv failed for locale %s\n", locname); + result = 1; + } + else + { +#define STRTEST(name, exp) \ + do \ + if (strcmp (lc->name, exp) != 0) \ + { \ + printf (#name " in locale %s wrong (is \"%s\", should be \"%s\")\n",\ + locname, lc->name, exp); \ + result = 1; \ + } \ + while (0) + STRTEST (decimal_point, "."); + STRTEST (thousands_sep, ""); + STRTEST (grouping, ""); + STRTEST (mon_decimal_point, ""); + STRTEST (mon_thousands_sep, ""); + STRTEST (mon_grouping, ""); + STRTEST (positive_sign, ""); + STRTEST (negative_sign, ""); + STRTEST (currency_symbol, ""); + STRTEST (int_curr_symbol, ""); + +#define CHARTEST(name, exp) \ + do \ + if (lc->name != exp) \ + { \ + printf (#name " in locale %s wrong (is %d, should be %d)\n", \ + locname, lc->name, CHAR_MAX); \ + result = 1; \ + } \ + while (0) + CHARTEST (frac_digits, CHAR_MAX); + CHARTEST (p_cs_precedes, CHAR_MAX); + CHARTEST (n_cs_precedes, CHAR_MAX); + CHARTEST (p_sep_by_space, CHAR_MAX); + CHARTEST (n_sep_by_space, CHAR_MAX); + CHARTEST (p_sign_posn, CHAR_MAX); + CHARTEST (n_sign_posn, CHAR_MAX); + CHARTEST (int_frac_digits, CHAR_MAX); + CHARTEST (int_p_cs_precedes, CHAR_MAX); + CHARTEST (int_n_cs_precedes, CHAR_MAX); + CHARTEST (int_p_sep_by_space, CHAR_MAX); + CHARTEST (int_n_sep_by_space, CHAR_MAX); + CHARTEST (int_p_sign_posn, CHAR_MAX); + CHARTEST (int_n_sign_posn, CHAR_MAX); + } + +#undef STRTEST +#define STRTEST(name, exp) \ + str = nl_langinfo (name); \ + if (strcmp (str, exp) != 0) \ + { \ + printf ("nl_langinfo(" #name ") in locale %s wrong " \ + "(is \"%s\", should be \"%s\")\n", locname, str, exp); \ + result = 1; \ + } +#define WSTRTEST(name, exp) \ + wstr = (wchar_t *) nl_langinfo (name); \ + if (wcscmp (wstr, exp) != 0) \ + { \ + printf ("nl_langinfo(" #name ") in locale %s wrong " \ + "(is \"%S\", should be \"%S\")\n", locname, wstr, exp); \ + result = 1; \ + } + + /* Unix stuff. */ + STRTEST (ABDAY_1, "Sun"); + STRTEST (ABDAY_2, "Mon"); + STRTEST (ABDAY_3, "Tue"); + STRTEST (ABDAY_4, "Wed"); + STRTEST (ABDAY_5, "Thu"); + STRTEST (ABDAY_6, "Fri"); + STRTEST (ABDAY_7, "Sat"); + STRTEST (DAY_1, "Sunday"); + STRTEST (DAY_2, "Monday"); + STRTEST (DAY_3, "Tuesday"); + STRTEST (DAY_4, "Wednesday"); + STRTEST (DAY_5, "Thursday"); + STRTEST (DAY_6, "Friday"); + STRTEST (DAY_7, "Saturday"); + STRTEST (ABMON_1, "Jan"); + STRTEST (ABMON_2, "Feb"); + STRTEST (ABMON_3, "Mar"); + STRTEST (ABMON_4, "Apr"); + STRTEST (ABMON_5, "May"); + STRTEST (ABMON_6, "Jun"); + STRTEST (ABMON_7, "Jul"); + STRTEST (ABMON_8, "Aug"); + STRTEST (ABMON_9, "Sep"); + STRTEST (ABMON_10, "Oct"); + STRTEST (ABMON_11, "Nov"); + STRTEST (ABMON_12, "Dec"); + STRTEST (MON_1, "January"); + STRTEST (MON_2, "February"); + STRTEST (MON_3, "March"); + STRTEST (MON_4, "April"); + STRTEST (MON_5, "May"); + STRTEST (MON_6, "June"); + STRTEST (MON_7, "July"); + STRTEST (MON_8, "August"); + STRTEST (MON_9, "September"); + STRTEST (MON_10, "October"); + STRTEST (MON_11, "November"); + STRTEST (MON_12, "December"); + STRTEST (AM_STR, "AM"); + STRTEST (PM_STR, "PM"); + STRTEST (D_T_FMT, "%a %b %e %H:%M:%S %Y"); + STRTEST (D_FMT, "%m/%d/%y"); + STRTEST (T_FMT, "%H:%M:%S"); + STRTEST (T_FMT_AMPM, "%I:%M:%S %p"); + STRTEST (ERA, ""); + STRTEST (ERA_D_FMT, ""); + STRTEST (ERA_T_FMT, ""); + STRTEST (ERA_D_T_FMT, ""); + STRTEST (ALT_DIGITS, ""); + + STRTEST (RADIXCHAR, "."); + STRTEST (THOUSEP, ""); + + STRTEST (YESEXPR, "^[yY]"); + STRTEST (NOEXPR, "^[nN]"); + + /* Extensions. */ + WSTRTEST (_NL_WABDAY_1, L"Sun"); + WSTRTEST (_NL_WABDAY_2, L"Mon"); + WSTRTEST (_NL_WABDAY_3, L"Tue"); + WSTRTEST (_NL_WABDAY_4, L"Wed"); + WSTRTEST (_NL_WABDAY_5, L"Thu"); + WSTRTEST (_NL_WABDAY_6, L"Fri"); + WSTRTEST (_NL_WABDAY_7, L"Sat"); + WSTRTEST (_NL_WDAY_1, L"Sunday"); + WSTRTEST (_NL_WDAY_2, L"Monday"); + WSTRTEST (_NL_WDAY_3, L"Tuesday"); + WSTRTEST (_NL_WDAY_4, L"Wednesday"); + WSTRTEST (_NL_WDAY_5, L"Thursday"); + WSTRTEST (_NL_WDAY_6, L"Friday"); + WSTRTEST (_NL_WDAY_7, L"Saturday"); + WSTRTEST (_NL_WABMON_1, L"Jan"); + WSTRTEST (_NL_WABMON_2, L"Feb"); + WSTRTEST (_NL_WABMON_3, L"Mar"); + WSTRTEST (_NL_WABMON_4, L"Apr"); + WSTRTEST (_NL_WABMON_5, L"May"); + WSTRTEST (_NL_WABMON_6, L"Jun"); + WSTRTEST (_NL_WABMON_7, L"Jul"); + WSTRTEST (_NL_WABMON_8, L"Aug"); + WSTRTEST (_NL_WABMON_9, L"Sep"); + WSTRTEST (_NL_WABMON_10, L"Oct"); + WSTRTEST (_NL_WABMON_11, L"Nov"); + WSTRTEST (_NL_WABMON_12, L"Dec"); + WSTRTEST (_NL_WMON_1, L"January"); + WSTRTEST (_NL_WMON_2, L"February"); + WSTRTEST (_NL_WMON_3, L"March"); + WSTRTEST (_NL_WMON_4, L"April"); + WSTRTEST (_NL_WMON_5, L"May"); + WSTRTEST (_NL_WMON_6, L"June"); + WSTRTEST (_NL_WMON_7, L"July"); + WSTRTEST (_NL_WMON_8, L"August"); + WSTRTEST (_NL_WMON_9, L"September"); + WSTRTEST (_NL_WMON_10, L"October"); + WSTRTEST (_NL_WMON_11, L"November"); + WSTRTEST (_NL_WMON_12, L"December"); + WSTRTEST (_NL_WAM_STR, L"AM"); + WSTRTEST (_NL_WPM_STR, L"PM"); + WSTRTEST (_NL_WD_T_FMT, L"%a %b %e %H:%M:%S %Y"); + WSTRTEST (_NL_WD_FMT, L"%m/%d/%y"); + WSTRTEST (_NL_WT_FMT, L"%H:%M:%S"); + WSTRTEST (_NL_WT_FMT_AMPM, L"%I:%M:%S %p"); + WSTRTEST (_NL_WERA_D_FMT, L""); + WSTRTEST (_NL_WERA_T_FMT, L""); + WSTRTEST (_NL_WERA_D_T_FMT, L""); + WSTRTEST (_NL_WALT_DIGITS, L""); + + STRTEST (_DATE_FMT, "%a %b %e %H:%M:%S %Z %Y"); + WSTRTEST (_NL_W_DATE_FMT, L"%a %b %e %H:%M:%S %Z %Y"); + + STRTEST (INT_CURR_SYMBOL, ""); + STRTEST (CURRENCY_SYMBOL, ""); + STRTEST (MON_DECIMAL_POINT, ""); + STRTEST (MON_THOUSANDS_SEP, ""); + STRTEST (MON_GROUPING, ""); + STRTEST (POSITIVE_SIGN, ""); + STRTEST (NEGATIVE_SIGN, ""); + STRTEST (GROUPING, ""); + + STRTEST (YESSTR, ""); + STRTEST (NOSTR, ""); + + /* Test the new locale mechanisms. */ + loc = newlocale (LC_ALL_MASK, locname, NULL); + if (loc == NULL) + { + printf ("cannot create locale object for locale %s\n", locname); + result = 1; + } + else + { + int c; + +#undef STRTEST +#define STRTEST(name, exp) \ + str = nl_langinfo_l (name, loc); \ + if (strcmp (str, exp) != 0) \ + { \ + printf ("nl_langinfo_l(" #name ") in locale %s wrong " \ + "(is \"%s\", should be \"%s\")\n", locname, str, exp); \ + result = 1; \ + } +#undef WSTRTEST +#define WSTRTEST(name, exp) \ + wstr = (wchar_t *) nl_langinfo_l (name, loc); \ + if (wcscmp (wstr, exp) != 0) \ + { \ + printf ("nl_langinfo_l(" #name ") in locale %s wrong " \ + "(is \"%S\", should be \"%S\")\n", locname, wstr, exp); \ + result = 1; \ + } + + /* Unix stuff. */ + STRTEST (ABDAY_1, "Sun"); + STRTEST (ABDAY_2, "Mon"); + STRTEST (ABDAY_3, "Tue"); + STRTEST (ABDAY_4, "Wed"); + STRTEST (ABDAY_5, "Thu"); + STRTEST (ABDAY_6, "Fri"); + STRTEST (ABDAY_7, "Sat"); + STRTEST (DAY_1, "Sunday"); + STRTEST (DAY_2, "Monday"); + STRTEST (DAY_3, "Tuesday"); + STRTEST (DAY_4, "Wednesday"); + STRTEST (DAY_5, "Thursday"); + STRTEST (DAY_6, "Friday"); + STRTEST (DAY_7, "Saturday"); + STRTEST (ABMON_1, "Jan"); + STRTEST (ABMON_2, "Feb"); + STRTEST (ABMON_3, "Mar"); + STRTEST (ABMON_4, "Apr"); + STRTEST (ABMON_5, "May"); + STRTEST (ABMON_6, "Jun"); + STRTEST (ABMON_7, "Jul"); + STRTEST (ABMON_8, "Aug"); + STRTEST (ABMON_9, "Sep"); + STRTEST (ABMON_10, "Oct"); + STRTEST (ABMON_11, "Nov"); + STRTEST (ABMON_12, "Dec"); + STRTEST (MON_1, "January"); + STRTEST (MON_2, "February"); + STRTEST (MON_3, "March"); + STRTEST (MON_4, "April"); + STRTEST (MON_5, "May"); + STRTEST (MON_6, "June"); + STRTEST (MON_7, "July"); + STRTEST (MON_8, "August"); + STRTEST (MON_9, "September"); + STRTEST (MON_10, "October"); + STRTEST (MON_11, "November"); + STRTEST (MON_12, "December"); + STRTEST (AM_STR, "AM"); + STRTEST (PM_STR, "PM"); + STRTEST (D_T_FMT, "%a %b %e %H:%M:%S %Y"); + STRTEST (D_FMT, "%m/%d/%y"); + STRTEST (T_FMT, "%H:%M:%S"); + STRTEST (T_FMT_AMPM, "%I:%M:%S %p"); + STRTEST (ERA, ""); + STRTEST (ERA_D_FMT, ""); + STRTEST (ERA_T_FMT, ""); + STRTEST (ERA_D_T_FMT, ""); + STRTEST (ALT_DIGITS, ""); + + STRTEST (RADIXCHAR, "."); + STRTEST (THOUSEP, ""); + + STRTEST (YESEXPR, "^[yY]"); + STRTEST (NOEXPR, "^[nN]"); + + /* Extensions. */ + WSTRTEST (_NL_WABDAY_1, L"Sun"); + WSTRTEST (_NL_WABDAY_2, L"Mon"); + WSTRTEST (_NL_WABDAY_3, L"Tue"); + WSTRTEST (_NL_WABDAY_4, L"Wed"); + WSTRTEST (_NL_WABDAY_5, L"Thu"); + WSTRTEST (_NL_WABDAY_6, L"Fri"); + WSTRTEST (_NL_WABDAY_7, L"Sat"); + WSTRTEST (_NL_WDAY_1, L"Sunday"); + WSTRTEST (_NL_WDAY_2, L"Monday"); + WSTRTEST (_NL_WDAY_3, L"Tuesday"); + WSTRTEST (_NL_WDAY_4, L"Wednesday"); + WSTRTEST (_NL_WDAY_5, L"Thursday"); + WSTRTEST (_NL_WDAY_6, L"Friday"); + WSTRTEST (_NL_WDAY_7, L"Saturday"); + WSTRTEST (_NL_WABMON_1, L"Jan"); + WSTRTEST (_NL_WABMON_2, L"Feb"); + WSTRTEST (_NL_WABMON_3, L"Mar"); + WSTRTEST (_NL_WABMON_4, L"Apr"); + WSTRTEST (_NL_WABMON_5, L"May"); + WSTRTEST (_NL_WABMON_6, L"Jun"); + WSTRTEST (_NL_WABMON_7, L"Jul"); + WSTRTEST (_NL_WABMON_8, L"Aug"); + WSTRTEST (_NL_WABMON_9, L"Sep"); + WSTRTEST (_NL_WABMON_10, L"Oct"); + WSTRTEST (_NL_WABMON_11, L"Nov"); + WSTRTEST (_NL_WABMON_12, L"Dec"); + WSTRTEST (_NL_WMON_1, L"January"); + WSTRTEST (_NL_WMON_2, L"February"); + WSTRTEST (_NL_WMON_3, L"March"); + WSTRTEST (_NL_WMON_4, L"April"); + WSTRTEST (_NL_WMON_5, L"May"); + WSTRTEST (_NL_WMON_6, L"June"); + WSTRTEST (_NL_WMON_7, L"July"); + WSTRTEST (_NL_WMON_8, L"August"); + WSTRTEST (_NL_WMON_9, L"September"); + WSTRTEST (_NL_WMON_10, L"October"); + WSTRTEST (_NL_WMON_11, L"November"); + WSTRTEST (_NL_WMON_12, L"December"); + WSTRTEST (_NL_WAM_STR, L"AM"); + WSTRTEST (_NL_WPM_STR, L"PM"); + WSTRTEST (_NL_WD_T_FMT, L"%a %b %e %H:%M:%S %Y"); + WSTRTEST (_NL_WD_FMT, L"%m/%d/%y"); + WSTRTEST (_NL_WT_FMT, L"%H:%M:%S"); + WSTRTEST (_NL_WT_FMT_AMPM, L"%I:%M:%S %p"); + WSTRTEST (_NL_WERA_D_FMT, L""); + WSTRTEST (_NL_WERA_T_FMT, L""); + WSTRTEST (_NL_WERA_D_T_FMT, L""); + WSTRTEST (_NL_WALT_DIGITS, L""); + + STRTEST (_DATE_FMT, "%a %b %e %H:%M:%S %Z %Y"); + WSTRTEST (_NL_W_DATE_FMT, L"%a %b %e %H:%M:%S %Z %Y"); + + STRTEST (INT_CURR_SYMBOL, ""); + STRTEST (CURRENCY_SYMBOL, ""); + STRTEST (MON_DECIMAL_POINT, ""); + STRTEST (MON_THOUSANDS_SEP, ""); + STRTEST (MON_GROUPING, ""); + STRTEST (POSITIVE_SIGN, ""); + STRTEST (NEGATIVE_SIGN, ""); + STRTEST (GROUPING, ""); + + STRTEST (YESSTR, ""); + STRTEST (NOSTR, ""); + + /* Character class tests. */ + for (c = 0; c < 128; ++c) + { +#define CLASSTEST(name) \ + if (is##name (c) != is##name##_l (c, loc)) \ + { \ + printf ("is%s('\\%o') != is%s_l('\\%o')\n", \ + #name, c, #name, c); \ + result = 1; \ + } + CLASSTEST (alnum); + CLASSTEST (alpha); + CLASSTEST (blank); + CLASSTEST (cntrl); + CLASSTEST (digit); + CLASSTEST (lower); + CLASSTEST (graph); + CLASSTEST (print); + CLASSTEST (punct); + CLASSTEST (space); + CLASSTEST (upper); + CLASSTEST (xdigit); + + /* Character mapping tests. */ +#define MAPTEST(name) \ + if (to##name (c) != to##name##_l (c, loc)) \ + { \ + printf ("to%s('\\%o') != to%s_l('\\%o'): '\\%o' vs '\\%o'\n", \ + #name, c, #name, c, \ + to##name (c), to##name##_l (c, loc)); \ + result = 1; \ + } + MAPTEST (lower); + MAPTEST (upper); + } + + /* Character class tests, this time for wide characters. Note that + this only works because we know that the internal encoding is + UCS4. */ + for (c = 0; c < 128; ++c) + { +#undef CLASSTEST +#define CLASSTEST(name) \ + if (isw##name (c) != isw##name##_l (c, loc)) \ + { \ + printf ("isw%s('\\%o') != isw%s_l('\\%o')\n", \ + #name, c, #name, c); \ + result = 1; \ + } + CLASSTEST (alnum); + CLASSTEST (alpha); + CLASSTEST (blank); + CLASSTEST (cntrl); + CLASSTEST (digit); + CLASSTEST (lower); + CLASSTEST (graph); + CLASSTEST (print); + CLASSTEST (punct); + CLASSTEST (space); + CLASSTEST (upper); + CLASSTEST (xdigit); + + /* Character mapping tests. Note that + this only works because we know that the internal encoding is + UCS4. */ +#undef MAPTEST +#define MAPTEST(name) \ + if (tow##name (c) != tow##name##_l (c, loc)) \ + { \ + printf ("tow%s('\\%o') != tow%s_l('\\%o'): '\\%o' vs '\\%o'\n",\ + #name, c, #name, c, \ + tow##name (c), tow##name##_l (c, loc)); \ + result = 1; \ + } + MAPTEST (lower); + MAPTEST (upper); + } + + freelocale (loc); + } + + return result; +} + + +static int +do_test (void) +{ + int result; + + /* First use the name "C". */ + if (setlocale (LC_ALL, "C") == NULL) + { + puts ("cannot set C locale"); + result = 1; + } + else + result = run_test ("C"); + + /* Then the name "POSIX". */ + if (setlocale (LC_ALL, "POSIX") == NULL) + { + puts ("cannot set POSIX locale"); + result = 1; + } + else + result |= run_test ("POSIX"); + + return result; +} + +#define TEST_FUNCTION do_test () +#include "../test-skeleton.c" diff --git a/REORG.TODO/locale/tst-duplocale.c b/REORG.TODO/locale/tst-duplocale.c new file mode 100644 index 0000000000..53e5fbb8fe --- /dev/null +++ b/REORG.TODO/locale/tst-duplocale.c @@ -0,0 +1,14 @@ +#include <locale.h> +#include <stdio.h> + +static int +do_test (void) +{ + locale_t d = duplocale (LC_GLOBAL_LOCALE); + if (d != (locale_t) 0) + freelocale (d); + return 0; +} + +#define TEST_FUNCTION do_test () +#include "../test-skeleton.c" diff --git a/REORG.TODO/locale/tst-locname.c b/REORG.TODO/locale/tst-locname.c new file mode 100644 index 0000000000..7eb71adfd8 --- /dev/null +++ b/REORG.TODO/locale/tst-locname.c @@ -0,0 +1,20 @@ +#include <langinfo.h> +#include <locale.h> +#include <stdio.h> +#include <string.h> + +static int +do_test (void) +{ + const char *s = nl_langinfo (_NL_LOCALE_NAME (LC_CTYPE)); + if (s == NULL || strcmp (s, "C") != 0) + { + printf ("incorrect locale name returned: %s, expected \"C\"\n", s); + return 1; + } + + return 0; +} + +#define TEST_FUNCTION do_test () +#include "../test-skeleton.c" diff --git a/REORG.TODO/locale/uselocale.c b/REORG.TODO/locale/uselocale.c new file mode 100644 index 0000000000..9ef22d3cf9 --- /dev/null +++ b/REORG.TODO/locale/uselocale.c @@ -0,0 +1,75 @@ +/* uselocale -- fetch and set the current per-thread locale + Copyright (C) 2002-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <locale.h> +#include "localeinfo.h" +#include <ctype.h> + +/* Switch the current thread's locale to DATASET. + If DATASET is null, instead just return the current setting. + The special value LC_GLOBAL_LOCALE is the initial setting + for all threads, and means the thread uses the global + setting controlled by `setlocale'. */ +locale_t +__uselocale (locale_t newloc) +{ + locale_t oldloc = _NL_CURRENT_LOCALE; + + if (newloc != NULL) + { + const locale_t locobj + = newloc == LC_GLOBAL_LOCALE ? &_nl_global_locale : newloc; + __libc_tsd_set (__locale_t, LOCALE, locobj); + +#ifdef NL_CURRENT_INDIRECT + /* Now we must update all the per-category thread-local variables to + point into the new current locale for this thread. The magic + symbols _nl_current_LC_FOO_used are defined to meaningless values + if _nl_current_LC_FOO was linked in. By using weak references to + both symbols and testing the address of _nl_current_LC_FOO_used, + we can avoid accessing the _nl_current_LC_FOO thread-local + variable at all when no code referring to it was linked in. We + need the special bogus symbol because while TLS symbols can be + weak, there is no reasonable way to test for the default-zero + value as with a heap symbol (taking the address would just use + some bogus offset from our thread pointer). */ + +# define DEFINE_CATEGORY(category, category_name, items, a) \ + { \ + extern char _nl_current_##category##_used; \ + weak_extern (_nl_current_##category##_used) \ + weak_extern (_nl_current_##category) \ + if (&_nl_current_##category##_used != 0) \ + _nl_current_##category = &locobj->__locales[category]; \ + } +# include "categories.def" +# undef DEFINE_CATEGORY +#endif + + /* Update the special tsd cache of some locale data. */ + __libc_tsd_set (const uint16_t *, CTYPE_B, (void *) locobj->__ctype_b); + __libc_tsd_set (const int32_t *, CTYPE_TOLOWER, + (void *) locobj->__ctype_tolower); + __libc_tsd_set (const int32_t *, CTYPE_TOUPPER, + (void *) locobj->__ctype_toupper); + } + + return oldloc == &_nl_global_locale ? LC_GLOBAL_LOCALE : oldloc; +} +libc_hidden_def (__uselocale) +weak_alias (__uselocale, uselocale) diff --git a/REORG.TODO/locale/weight.h b/REORG.TODO/locale/weight.h new file mode 100644 index 0000000000..0558123f34 --- /dev/null +++ b/REORG.TODO/locale/weight.h @@ -0,0 +1,149 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Written by Ulrich Drepper, <drepper@cygnus.com>. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _WEIGHT_H_ +#define _WEIGHT_H_ 1 + +#include <libc-diag.h> + +/* Find index of weight. */ +static inline int32_t __attribute__ ((always_inline)) +findidx (const int32_t *table, + const int32_t *indirect, + const unsigned char *extra, + const unsigned char **cpp, size_t len) +{ + int_fast32_t i = table[*(*cpp)++]; + const unsigned char *cp; + const unsigned char *usrc; + + if (i >= 0) + /* This is an index into the weight table. Cool. */ + return i; + + /* Oh well, more than one sequence starting with this byte. + Search for the correct one. */ + cp = &extra[-i]; + usrc = *cpp; + --len; + while (1) + { + size_t nhere; + + /* The first thing is the index. */ + i = *((const int32_t *) cp); + cp += sizeof (int32_t); + + /* Next is the length of the byte sequence. These are always + short byte sequences so there is no reason to call any + function (even if they are inlined). */ + nhere = *cp++; + + if (i >= 0) + { + /* It is a single character. If it matches we found our + index. Note that at the end of each list there is an + entry of length zero which represents the single byte + sequence. The first (and here only) byte was tested + already. */ + size_t cnt; + + /* With GCC 5.3 when compiling with -Os the compiler warns + that seq2.back_us, which becomes usrc, might be used + uninitialized. This can't be true because we pass a length + of -1 for len at the same time which means that this loop + never executes. */ + DIAG_PUSH_NEEDS_COMMENT; + DIAG_IGNORE_Os_NEEDS_COMMENT (5, "-Wmaybe-uninitialized"); + for (cnt = 0; cnt < nhere && cnt < len; ++cnt) + if (cp[cnt] != usrc[cnt]) + break; + DIAG_POP_NEEDS_COMMENT; + + if (cnt == nhere) + { + /* Found it. */ + *cpp += nhere; + return i; + } + + /* Up to the next entry. */ + cp += nhere; + if (!LOCFILE_ALIGNED_P (1 + nhere)) + cp += LOCFILE_ALIGN - (1 + nhere) % LOCFILE_ALIGN; + } + else + { + /* This is a range of characters. First decide whether the + current byte sequence lies in the range. */ + size_t cnt; + size_t offset = 0; + + for (cnt = 0; cnt < nhere && cnt < len; ++cnt) + if (cp[cnt] != usrc[cnt]) + break; + + if (cnt != nhere) + { + if (cnt == len || cp[cnt] > usrc[cnt]) + { + /* Cannot be in this range. */ + cp += 2 * nhere; + if (!LOCFILE_ALIGNED_P (1 + 2 * nhere)) + cp += (LOCFILE_ALIGN + - (1 + 2 * nhere) % LOCFILE_ALIGN); + continue; + } + + /* Test against the end of the range. */ + for (cnt = 0; cnt < nhere; ++cnt) + if (cp[nhere + cnt] != usrc[cnt]) + break; + + if (cnt != nhere && cp[nhere + cnt] < usrc[cnt]) + { + /* Cannot be in this range. */ + cp += 2 * nhere; + if (!LOCFILE_ALIGNED_P (1 + 2 * nhere)) + cp += (LOCFILE_ALIGN + - (1 + 2 * nhere) % LOCFILE_ALIGN); + continue; + } + + /* This range matches the next characters. Now find + the offset in the indirect table. */ + for (cnt = 0; cp[cnt] == usrc[cnt]; ++cnt); + + do + { + offset <<= 8; + offset += usrc[cnt] - cp[cnt]; + } + while (++cnt < nhere); + } + + *cpp += nhere; + return indirect[-i + offset]; + } + } + + /* NOTREACHED */ + return 0x43219876; +} + +#endif /* weight.h */ diff --git a/REORG.TODO/locale/weightwc.h b/REORG.TODO/locale/weightwc.h new file mode 100644 index 0000000000..97ce2b3dec --- /dev/null +++ b/REORG.TODO/locale/weightwc.h @@ -0,0 +1,128 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Written by Ulrich Drepper, <drepper@cygnus.com>. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _WEIGHTWC_H_ +#define _WEIGHTWC_H_ 1 + +#include <libc-diag.h> + +/* Find index of weight. */ +static inline int32_t __attribute__ ((always_inline)) +findidx (const int32_t *table, + const int32_t *indirect, + const wint_t *extra, + const wint_t **cpp, size_t len) +{ + wint_t ch = *(*cpp)++; + int32_t i = __collidx_table_lookup ((const char *) table, ch); + + if (i >= 0) + /* This is an index into the weight table. Cool. */ + return i; + + /* Oh well, more than one sequence starting with this byte. + Search for the correct one. */ + const int32_t *cp = (const int32_t *) &extra[-i]; + --len; + while (1) + { + size_t nhere; + const int32_t *usrc = (const int32_t *) *cpp; + + /* The first thing is the index. */ + i = *cp++; + + /* Next is the length of the byte sequence. These are always + short byte sequences so there is no reason to call any + function (even if they are inlined). */ + nhere = *cp++; + + if (i >= 0) + { + /* It is a single character. If it matches we found our + index. Note that at the end of each list there is an + entry of length zero which represents the single byte + sequence. The first (and here only) byte was tested + already. */ + size_t cnt; + + /* With GCC 5.3 when compiling with -Os the compiler warns + that seq2.back_us, which becomes usrc, might be used + uninitialized. This can't be true because we pass a length + of -1 for len at the same time which means that this loop + never executes. */ + DIAG_PUSH_NEEDS_COMMENT; + DIAG_IGNORE_Os_NEEDS_COMMENT (5, "-Wmaybe-uninitialized"); + for (cnt = 0; cnt < nhere && cnt < len; ++cnt) + if (cp[cnt] != usrc[cnt]) + break; + DIAG_POP_NEEDS_COMMENT; + + if (cnt == nhere) + { + /* Found it. */ + *cpp += nhere; + return i; + } + + /* Up to the next entry. */ + cp += nhere; + } + else + { + /* This is a range of characters. First decide whether the + current byte sequence lies in the range. */ + size_t cnt; + size_t offset; + + for (cnt = 0; cnt < nhere - 1 && cnt < len; ++cnt) + if (cp[cnt] != usrc[cnt]) + break; + + if (cnt < nhere - 1) + { + cp += 2 * nhere; + continue; + } + + if (cp[nhere - 1] > usrc[nhere -1]) + { + cp += 2 * nhere; + continue; + } + + if (cp[2 * nhere - 1] < usrc[nhere -1]) + { + cp += 2 * nhere; + continue; + } + + /* This range matches the next characters. Now find + the offset in the indirect table. */ + offset = usrc[nhere - 1] - cp[nhere - 1]; + *cpp += nhere; + + return indirect[-i + offset]; + } + } + + /* NOTREACHED */ + return 0x43219876; +} + +#endif /* weightwc.h */ diff --git a/REORG.TODO/locale/xlocale.c b/REORG.TODO/locale/xlocale.c new file mode 100644 index 0000000000..fc7e207ee8 --- /dev/null +++ b/REORG.TODO/locale/xlocale.c @@ -0,0 +1,54 @@ +/* C locale object. + Copyright (C) 2001-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 2001. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <locale.h> +#include "localeinfo.h" + +#define DEFINE_CATEGORY(category, category_name, items, a) \ +extern struct __locale_data _nl_C_##category; +#include "categories.def" +#undef DEFINE_CATEGORY + +/* Defined in locale/C-ctype.c. */ +extern const char _nl_C_LC_CTYPE_class[] attribute_hidden; +extern const char _nl_C_LC_CTYPE_toupper[] attribute_hidden; +extern const char _nl_C_LC_CTYPE_tolower[] attribute_hidden; + + +const struct __locale_struct _nl_C_locobj attribute_hidden = + { + .__locales = + { +#define DEFINE_CATEGORY(category, category_name, items, a) \ + [category] = &_nl_C_##category, +#include "categories.def" +#undef DEFINE_CATEGORY + }, + .__names = + { + [LC_ALL] = _nl_C_name, +#define DEFINE_CATEGORY(category, category_name, items, a) \ + [category] = _nl_C_name, +#include "categories.def" +#undef DEFINE_CATEGORY + }, + .__ctype_b = (const unsigned short int *) _nl_C_LC_CTYPE_class + 128, + .__ctype_tolower = (const int *) _nl_C_LC_CTYPE_tolower + 128, + .__ctype_toupper = (const int *) _nl_C_LC_CTYPE_toupper + 128 + }; diff --git a/REORG.TODO/locale/xlocale.h b/REORG.TODO/locale/xlocale.h new file mode 100644 index 0000000000..20b2c1109f --- /dev/null +++ b/REORG.TODO/locale/xlocale.h @@ -0,0 +1,44 @@ +/* Definition of locale datatype. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _XLOCALE_H +#define _XLOCALE_H 1 + +/* Structure for reentrant locale using functions. This is an + (almost) opaque type for the user level programs. The file and + this data structure is not standardized. Don't rely on it. It can + go away without warning. */ +typedef struct __locale_struct +{ + /* Note: LC_ALL is not a valid index into this array. */ + struct __locale_data *__locales[13]; /* 13 = __LC_LAST. */ + + /* To increase the speed of this solution we add some special members. */ + const unsigned short int *__ctype_b; + const int *__ctype_tolower; + const int *__ctype_toupper; + + /* Note: LC_ALL is not a valid index into this array. */ + const char *__names[13]; +} *__locale_t; + +/* POSIX 2008 makes locale_t official. */ +typedef __locale_t locale_t; + +#endif /* xlocale.h */ |