diff options
author | Roland McGrath <roland@gnu.org> | 1996-03-28 08:30:38 +0000 |
---|---|---|
committer | Roland McGrath <roland@gnu.org> | 1996-03-28 08:30:38 +0000 |
commit | 19bc17a90548ee427035994bbc4b14395723ff1f (patch) | |
tree | e7a17eda196c2610ca4be26c9e7985815162eafb /locale/programs | |
parent | 53f770e0f9d405ea8d1888254c6f7ce431b04c6e (diff) | |
download | glibc-19bc17a90548ee427035994bbc4b14395723ff1f.tar.gz glibc-19bc17a90548ee427035994bbc4b14395723ff1f.tar.xz glibc-19bc17a90548ee427035994bbc4b14395723ff1f.zip |
Thu Mar 28 03:25:10 1996 Roland McGrath <roland@charlie-brown.gnu.ai.mit.edu>
* intl/Makefile (copysrc): Add missing > in sed cmd. Sat Mar 23 17:52:49 1996 Ulrich Drepper <drepper@gnu.ai.mit.edu> * Makeconfig: Rename Makefile variable nlsdir to i18ndir and change value to $(datadir)/i18n. `nls' is not an appropriate name. * Makefile (subdirs): Add new subdir wctype. * ctype/ctype-info.c: Add new global variable __ctype_names and initialize from _nl_C_LC_CTYPE. * ctype/ctype.h: In P1003.3b/D11 `alnum' is a separate character class. Use bit 11. [_ISbit]: Protect definition of bitmasks because they are also used in wctype.h. * libio/genops.c (_IO_sputbackc, _IO_sungetc): Clear EOF flag after successfully pushing back a character. Fundamental changes in locale implementation. Almost nothing from the old code is used anymore. * locale/charmap.c, locale/collate.c, locale/config.h, locale/ctypedump.c, locale/hash.h, locale/keyword.gperf, locale/keyword.h, locale/loadlocale.c, locale/locale-ctype.c, locale/locale.c locale/localeconv.c, locale/localedef.c, locale/localedef.h, locale/locfile-hash.c, locale/locfile-lex.c, locale/locfile-parse.c, locale/messages.c, locale/monetary.c, locale/numeric.c, locale/setlocale.c, locale/token.h, locale/xmalloc.c: Removed. * locale/Makefile: Update for new locale implementation with program source code distributed in subdir. * locale/categories.def, locale/iso-4217.def: Updated file for new locale implementation. * locale/langinfo.h: Updated for new locale implementation. (ERA_D_T_FMT, ERA_T_FMT): New official values according to P1003.2b/D11. (_NL_COLLATE_NRULES, _NL_COLLATE_RULES, _NL_COLLATE_HASH_SIZE, _NL_COLLATE_HASH_LAYERS, _NL_COLLATE_TABLE_EB, _NL_COLLATE_TABLE_EL, _NL_COLLATE_UNDEFINED, _NL_COLLATE_EXTRA_EB, _NL_COLLATE_EXTRA_EL, _NL_CTYPE_NAMES_EB, _NL_CTYPE_NAMES_EL, _NL_CTYPE_HASH_SIZE, _NL_CTYPE_HASH_LAYERS, _NL_CTYPE_CLASS_NAMES, _NL_CTYPE_MAP_NAMES, _NL_CTYPE_WIDTH): New internal values for extended LC_CTYPE and LC_COLLATE implementation. * locale/simple-hash.c, locale/simple-hash.h, locale/xmalloc.c, locale/xstrdup.c: Helper functions for locale related programs. * locale/C-collate.c, locale/C-ctype.c, locale/C-messages.c, locale/C-monetary.c, locale/C-numeric.c, locale/C-time.c, locale/lc-collate.c, locale/lc-ctype.c, locale/lc-messages.c, locale/lc-monetary.c, locale/lc-numeric.c, locale/lc-time.c: New implementation of locale functions, and new generated "C" locale data. * locale/loadlocale.c: Now handles word fields in locale binary automatically by changing the endianess if necessary. * locale/localeinfo.h (LIMAGIC): Changed magic number because of incompatible changes. (locale_data): Changed definition to allow word as a value type. (coll_sort_rule): Values for collation sorting mode. (_NL_CURRENT_WORD): New macro to access word value of locale entry. (__collate_table, __collate_extra): Declare new global variables for collation tables. * locale/programs/charmap-kw.gperf, locale/programs/charmap-kw.h, locale/programs/charmap.c, locale/programs/charset.c, locale/programs/charset.h, locale/programs/config.h, locale/programs/ctypedump.c, locale/programs/ld-collate.c, locale/programs/ld-ctype.c, locale/programs/ld-messages.c, locale/programs/ld-monetary.c, locale/programs/ld-numeric.c, locale/programs/ld-time.c, locale/programs/linereader.c, locale/programs/linereader.h, locale/programs/locale.c, locale/programs/localedef.c, locale/programs/locales.h, locale/programs/locfile-kw.gperf, locale/programs/locfile-kw.h, locale/programs/locfile-token.h, locale/programs/locfile.c, locale/programs/locfile.h, locale/programs/stringtrans.c, locale/programs/stringtrans.h: Implementation of locale related programs. * locale/weight.h: Functions to access collation tables. * posix/unistd.h: Define _POSIX2_LOCALEDEF. * stdio-common/printf_fp.c: Fix bug with printing certain numbers < 10^-1. Reported by Bill Metzenthen. * stdio-common/tfformat.c: Add new test for above bug. * string/strcoll.c, string/strxfrm.c: Real implementation of string collation according to ISO C. * wctype/Makefile, wctype/cname-lookup.h, wctype/iswctype.c, wctype/test_wctype.c, wctype/towctrans.c, wctype/wcfuncs.c, wctype/wctrans.c, wctype/wctype.c, wctype/wctype.h: New files. Implementation of wide character classes and mapping.
Diffstat (limited to 'locale/programs')
25 files changed, 8782 insertions, 0 deletions
diff --git a/locale/programs/charmap-kw.gperf b/locale/programs/charmap-kw.gperf new file mode 100644 index 0000000000..8e00103882 --- /dev/null +++ b/locale/programs/charmap-kw.gperf @@ -0,0 +1,40 @@ +%{ +/* Copyright (C) 1995, 1996 Free Software Foundation, Inc. +This file is part of the GNU C Library. +Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>. + +The GNU C Library is free software; you can redistribute it and/or +modify it under the terms of the GNU Library General Public License as +published by the Free Software Foundation; either version 2 of the +License, or (at your option) any later version. + +The GNU C Library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Library General Public License for more details. + +You should have received a copy of the GNU Library General Public +License along with the GNU C Library; see the file COPYING.LIB. If +not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +#include <string.h> + +#include "locfile-token.h" +%} +struct keyword_t ; +%% +code_set_name, tok_code_set_name, 1 +mb_cur_max, tok_mb_cur_max, 1 +mb_cur_min, tok_mb_cur_min, 1 +escape_char, tok_escape_char, 1 +comment_char, tok_comment_char, 1 +g0esc, tok_g0esc, 1 +g1esc, tok_g1esc, 1 +g2esc, tok_g2esc, 1 +g3esc, tok_g3esc, 1 +CHARMAP, tok_charmap, 0 +END, tok_end, 0 +WIDTH, tok_width, 0 +WIDTH_VARIABLE, tok_width_variable, 0 +WIDTH_DEFAULT, tok_width_default, 0 diff --git a/locale/programs/charmap-kw.h b/locale/programs/charmap-kw.h new file mode 100644 index 0000000000..93326d0382 --- /dev/null +++ b/locale/programs/charmap-kw.h @@ -0,0 +1,117 @@ +/* C code produced by gperf version 2.5 (GNU C++ version) */ +/* Command-line: gperf -acCgopt -k1,2,5,$ -N charmap_hash programs/charmap-kw.gperf */ +/* Copyright (C) 1995, 1996 Free Software Foundation, Inc. +This file is part of the GNU C Library. +Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>. + +The GNU C Library is free software; you can redistribute it and/or +modify it under the terms of the GNU Library General Public License as +published by the Free Software Foundation; either version 2 of the +License, or (at your option) any later version. + +The GNU C Library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Library General Public License for more details. + +You should have received a copy of the GNU Library General Public +License along with the GNU C Library; see the file COPYING.LIB. If +not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +#include <string.h> + +#include "locfile-token.h" +struct keyword_t ; + +#define TOTAL_KEYWORDS 14 +#define MIN_WORD_LENGTH 3 +#define MAX_WORD_LENGTH 14 +#define MIN_HASH_VALUE 3 +#define MAX_HASH_VALUE 25 +/* maximum key range = 23, duplicates = 0 */ + +#ifdef __GNUC__ +inline +#endif +static unsigned int +hash (register const char *str, register int len) +{ + static const unsigned char asso_values[] = + { + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 14, 10, + 15, 4, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 0, 0, 0, + 26, 26, 0, 0, 26, 26, 26, 0, 0, 26, + 0, 26, 26, 26, 5, 26, 26, 0, 26, 26, + 26, 26, 26, 26, 26, 0, 26, 26, 0, 0, + 26, 0, 26, 0, 26, 26, 26, 26, 26, 0, + 15, 0, 0, 26, 0, 0, 26, 0, 26, 26, + 0, 26, 26, 26, 26, 26, 26, 26, + }; + register int hval = len; + + switch (hval) + { + default: + case 5: + hval += asso_values[(int) str[4]]; + case 4: + case 3: + case 2: + hval += asso_values[(int) str[1]]; + case 1: + hval += asso_values[(int) str[0]]; + break; + } + return hval + asso_values[(int) str[len - 1]]; +} + +#ifdef __GNUC__ +inline +#endif +const struct keyword_t * +charmap_hash (register const char *str, register int len) +{ + static const struct keyword_t wordlist[] = + { + {"",}, {"",}, {"",}, + {"END", tok_end, 0}, + {"",}, + {"WIDTH", tok_width, 0}, + {"",}, + {"CHARMAP", tok_charmap, 0}, + {"",}, + {"g3esc", tok_g3esc, 1}, + {"mb_cur_max", tok_mb_cur_max, 1}, + {"escape_char", tok_escape_char, 1}, + {"comment_char", tok_comment_char, 1}, + {"code_set_name", tok_code_set_name, 1}, + {"WIDTH_VARIABLE", tok_width_variable, 0}, + {"g1esc", tok_g1esc, 1}, + {"",}, {"",}, + {"WIDTH_DEFAULT", tok_width_default, 0}, + {"g0esc", tok_g0esc, 1}, + {"g2esc", tok_g2esc, 1}, + {"",}, {"",}, {"",}, {"",}, + {"mb_cur_min", tok_mb_cur_min, 1}, + }; + + if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH) + { + register int key = hash (str, len); + + if (key <= MAX_HASH_VALUE && key >= 0) + { + register const char *s = wordlist[key].name; + + if (*s == *str && !strncmp (str + 1, s + 1, len - 1)) + return &wordlist[key]; + } + } + return 0; +} diff --git a/locale/programs/charmap.c b/locale/programs/charmap.c new file mode 100644 index 0000000000..2b71821ec0 --- /dev/null +++ b/locale/programs/charmap.c @@ -0,0 +1,593 @@ +/* Copyright (C) 1996 Free Software Foundation, Inc. +This file is part of the GNU C Library. +Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>. + +The GNU C Library is free software; you can redistribute it and/or +modify it under the terms of the GNU Library General Public License as +published by the Free Software Foundation; either version 2 of the +License, or (at your option) any later version. + +The GNU C Library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Library General Public License for more details. + +You should have received a copy of the GNU Library General Public +License along with the GNU C Library; see the file COPYING.LIB. If +not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <ctype.h> +#include <errno.h> +#include <libintl.h> +#include <obstack.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "error.h" +#include "linereader.h" +#include "charset.h" + + +/* Uncomment following line for production version. */ +/* define NDEBUG 1 */ +#include <assert.h> + + +/* Define the lookup function. */ +#include "charmap-kw.h" + + +void *xmalloc (size_t __n); + +/* Prototypes for local functions. */ +static struct charset_t *parse_charmap (const char *filename); + + + +struct charset_t * +charmap_read (const char *filename) +{ + const char *pathnfile; + struct charset_t *result = NULL; + + if (filename != NULL) + { + if (euidaccess (filename, R_OK) >= 0) + pathnfile = filename; + else + { + char *cp = xmalloc (strlen (filename) + sizeof CHARMAP_PATH + 1); + stpcpy (stpcpy (stpcpy (cp, CHARMAP_PATH), "/"), filename); + + pathnfile = (const char *) cp; + } + + result = parse_charmap (pathnfile); + + if (result == NULL) + error (0, errno, _("character map file `%s' not found"), filename); + } + + if (result == NULL) + { + pathnfile = CHARMAP_PATH "/" DEFAULT_CHARMAP; + + result = parse_charmap (pathnfile); + + if (result == NULL) + error (4, errno, _("default character map file `%s' not found"), + DEFAULT_CHARMAP); + } + + return result; +} + + +static struct charset_t * +parse_charmap (const char *filename) +{ + struct linereader *cmfile; + struct charset_t *result; + int state; + enum token_t expected_tok = tok_error; + const char *expected_str = NULL; + char *from_name = NULL; + char *to_name = NULL; + + /* Determine path. */ + cmfile = lr_open (filename, charmap_hash); + if (cmfile == NULL) + { + if (strchr (filename, '/') == NULL) + { + /* Look in the systems charmap directory. */ + char *buf = xmalloc (strlen (filename) + 1 + sizeof (CHARMAP_PATH)); + + stpcpy (stpcpy (stpcpy (buf, CHARMAP_PATH), "/"), filename); + cmfile = lr_open (buf, charmap_hash); + + if (cmfile == NULL) + free (buf); + } + + if (cmfile == NULL) + return NULL; + } + + /* Allocate room for result. */ + result = (struct charset_t *) xmalloc (sizeof (struct charset_t)); + memset (result, '\0', sizeof (struct charset_t)); + +#define obstack_chunk_alloc xmalloc +#define obstack_chunk_free free + obstack_init (&result->mem_pool); + + if (init_hash (&result->char_table, 256)) + { + free (result); + return NULL; + } + + /* We use a state machine to describe the charmap description file + format. */ + state = 1; + while (1) + { + /* What's on? */ + struct token *now = lr_token (cmfile, NULL); + enum token_t nowtok = now->tok; + struct token *arg; + + if (nowtok == tok_eof) + break; + + switch (state) + { + case 1: + /* The beginning. We expect the special declarations, EOL or + `CHARMAP'. */ + if (nowtok == tok_eol) + /* Ignore empty lines. */ + continue; + + if (nowtok == tok_charmap) + { + from_name = NULL; + to_name = NULL; + + /* We have to set up the real work. Fill in some + default values. */ + if (result->mb_cur_max == 0) + result->mb_cur_max = 1; + if (result->mb_cur_min == 0) + result->mb_cur_min = result->mb_cur_max; + if (result->mb_cur_min > result->mb_cur_max) + { + error (0, 0, _("\ +%s: <mb_cur_max> must be greater than <mb_cur_min>\n"), + cmfile->fname); + + result->mb_cur_min = result->mb_cur_max; + } + + lr_ignore_rest (cmfile, 1); + + state = 2; + continue; + } + + if (nowtok != tok_code_set_name && nowtok != tok_mb_cur_max + && nowtok != tok_mb_cur_min && nowtok != tok_escape_char + && nowtok != tok_comment_char && nowtok != tok_g0esc + && nowtok != tok_g1esc && nowtok != tok_g2esc + && nowtok != tok_g3esc) + { + lr_error (cmfile, _("syntax error in prolog: %s"), + _("illegal definition")); + + lr_ignore_rest (cmfile, 0); + continue; + } + + /* We know that we need an argument. */ + arg = lr_token (cmfile, NULL); + + switch (nowtok) + { + case tok_code_set_name: + if (arg->tok != tok_ident) + { + badarg: + lr_error (cmfile, _("syntax error in prolog: %s"), + _("bad argument")); + + lr_ignore_rest (cmfile, 0); + continue; + } + + result->code_set_name = obstack_copy0 (&result->mem_pool, + arg->val.str.start, + arg->val.str.len); + + lr_ignore_rest (cmfile, 1); + continue; + + case tok_mb_cur_max: + case tok_mb_cur_min: + if (arg->tok != tok_number) + goto badarg; + + if (arg->val.num < 1 || arg->val.num > 4) + { + lr_error (cmfile, + _("value for <%s> must lie between 1 and 4"), + nowtok == tok_mb_cur_min ? "mb_cur_min" + : "mb_cur_max"); + + lr_ignore_rest (cmfile, 0); + continue; + } + if ((nowtok == tok_mb_cur_max && result->mb_cur_min != 0 + && arg->val.num < result->mb_cur_min) + || (nowtok == tok_mb_cur_min && result->mb_cur_max != 0 + && arg->val.num > result->mb_cur_max)) + { + lr_error (cmfile, _("\ +value of <mb_cur_max> must be greater than the value of <mb_cur_min>")); + + lr_ignore_rest (cmfile, 0); + continue; + } + + if (nowtok == tok_mb_cur_max) + result->mb_cur_max = arg->val.num; + else + result->mb_cur_min = arg->val.num; + + lr_ignore_rest (cmfile, 1); + continue; + + case tok_escape_char: + case tok_comment_char: + if (arg->tok != tok_ident) + goto badarg; + + if (arg->val.str.len != 1) + { + lr_error (cmfile, _("\ +argument to <%s> must be a single character"), + nowtok == tok_escape_char ? "escape_char" + : "comment_char"); + + lr_ignore_rest (cmfile, 0); + continue; + } + + if (nowtok == tok_escape_char) + cmfile->escape_char = *arg->val.str.start; + else + cmfile->comment_char = *arg->val.str.start; + + lr_ignore_rest (cmfile, 1); + continue; + + case tok_g0esc: + case tok_g1esc: + case tok_g2esc: + case tok_g3esc: + lr_ignore_rest (cmfile, 0); /* XXX */ + continue; + + default: + /* Cannot happen. */ + assert (! "Should not happen"); + } + break; + + case 2: + /* We have seen `CHARMAP' and now are in the body. Each line + must have the format "%s %s %s\n" or "%s...%s %s %s\n". */ + if (nowtok == tok_eol) + /* Ignore empty lines. */ + continue; + + if (nowtok == tok_end) + { + expected_tok = tok_charmap; + expected_str = "CHARMAP"; + state = 90; + continue; + } + + if (nowtok != tok_bsymbol) + { + lr_error (cmfile, _("syntax error in %s definition: %s"), + "CHARMAP", _("no symbolic name given")); + + lr_ignore_rest (cmfile, 0); + continue; + } + + /* If the previous line was not completely correct free the + used memory. */ + if (from_name != NULL) + obstack_free (&result->mem_pool, from_name); + + from_name = (char *) obstack_copy0 (&result->mem_pool, + now->val.str.start, + now->val.str.len); + to_name = NULL; + + state = 3; + continue; + + case 3: + /* We have two possibilities: We can see an ellipsis or an + encoding value. */ + if (nowtok == tok_ellipsis) + { + state = 4; + continue; + } + /* FALLTHROUGH */ + + case 5: + if (nowtok != tok_charcode && nowtok != tok_ucs2 + && nowtok != tok_ucs4) + { + lr_error (cmfile, _("syntax error in %s definition: %s"), + "CHARMAP", _("illegal encoding given")); + + lr_ignore_rest (cmfile, 0); + + state = 2; + continue; + } + + if (nowtok == tok_charcode) + /* Write char value in table. */ + charset_new_char (cmfile, result, now->val.charcode.nbytes, + now->val.charcode.val, from_name, to_name); + else + /* Determine ISO 10646 value and write into table. */ + charset_new_unicode (cmfile, result, now->val.charcode.nbytes, + now->val.charcode.val, from_name, to_name); + + /* Ignore trailing comment silently. */ + lr_ignore_rest (cmfile, 0); + + from_name = NULL; + to_name = NULL; + + state = 2; + continue; + + case 4: + if (nowtok != tok_bsymbol) + { + lr_error (cmfile, _("syntax error in %s definition: %s"), + "CHARMAP", + _("no symbolic name given for end of range")); + + lr_ignore_rest (cmfile, 0); + continue; + } + + /* If the previous line was not completely correct free the + used memory. */ + to_name = (char *) obstack_copy0 (&result->mem_pool, + cmfile->token.val.str.start, + cmfile->token.val.str.len); + + state = 3; + continue; + + case 90: + if (nowtok != expected_tok) + lr_error (cmfile, _("\ +`%1$s' definition does not end with `END %1$s'"), expected_str); + + lr_ignore_rest (cmfile, nowtok == expected_tok); + state = 91; + continue; + + case 91: + /* Waiting for WIDTH... */ + if (nowtok == tok_width_default) + { + state = 92; + continue; + } + + if (nowtok == tok_width) + { + lr_ignore_rest (cmfile, 1); + state = 93; + continue; + } + + if (nowtok == tok_width_variable) + { + lr_ignore_rest (cmfile, 1); + state = 98; + continue; + } + + lr_error (cmfile, _("\ +only WIDTH definitions are allowed to follow the CHARMAP definition")); + + lr_ignore_rest (cmfile, 0); + continue; + + case 92: + if (nowtok != tok_number) + lr_error (cmfile, _("value for %s must be an integer"), + "WIDTH_DEFAULT"); + else + result->width_default = now->val.num; + + lr_ignore_rest (cmfile, nowtok == tok_number); + + state = 91; + continue; + + case 93: + /* We now expect `END WIDTH' or lines of the format "%s %d\n" or + "%s...%s %d\n". */ + if (nowtok == tok_eol) + /* ignore empty lines. */ + continue; + + if (nowtok == tok_end) + { + expected_tok = tok_width; + expected_str = "WIDTH"; + state = 90; + continue; + } + + if (nowtok != tok_bsymbol) + { + lr_error (cmfile, _("syntax error in %s definition: %s"), + "WIDTH", _("no symbolic name given")); + + lr_ignore_rest (cmfile, 0); + continue; + } + + if (from_name != NULL) + obstack_free (&result->mem_pool, from_name); + + from_name = (char *) obstack_copy0 (&result->mem_pool, + now->val.str.start, + now->val.str.len); + to_name = NULL; + + state = 94; + continue; + + case 94: + if (nowtok == tok_ellipsis) + state = 95; + + case 96: + if (nowtok != tok_number) + lr_error (cmfile, _("value for %s must be an integer"), + "WIDTH"); + else + { + /* XXX Store width for chars. */ + from_name = NULL; + } + + lr_ignore_rest (cmfile, nowtok == tok_number); + + state = 93; + continue; + + case 95: + if (nowtok != tok_bsymbol) + { + lr_error (cmfile, _("syntax error in %s definition: %s"), + "WIDTH", _("no symbolic name given for end of range")); + + lr_ignore_rest (cmfile, 0); + + state = 93; + continue; + } + + to_name = (char *) obstack_copy0 (&result->mem_pool, + now->val.str.start, + now->val.str.len); + + lr_ignore_rest (cmfile, 1); + + state = 96; + continue; + + case 98: + /* We now expect `END WIDTH_VARIABLE' or lines of the format + "%s\n" or "%s...%s\n". */ + if (nowtok == tok_eol) + /* ignore empty lines. */ + continue; + + if (nowtok == tok_end) + { + expected_tok = tok_width_variable; + expected_str = "WIDTH_VARIABLE"; + state = 90; + continue; + } + + if (nowtok != tok_bsymbol) + { + lr_error (cmfile, _("syntax error in %s definition: %s"), + "WIDTH_VARIABLE", _("no symbolic name given")); + + lr_ignore_rest (cmfile, 0); + + continue; + } + + if (from_name != NULL) + obstack_free (&result->mem_pool, from_name); + + from_name = (char *) obstack_copy0 (&result->mem_pool, + now->val.str.start, + now->val.str.len); + to_name = NULL; + + state = 99; + continue; + + case 99: + if (nowtok == tok_ellipsis) + state = 100; + + /* Store info. */ + from_name = NULL; + + /* Warn */ + state = 98; + continue; + + case 100: + if (nowtok != tok_bsymbol) + lr_error (cmfile, _("syntax error in %s definition: %s"), + "WIDTH_VARIABLE", + _("no symbolic name given for end of range")); + else + { + to_name = (char *) obstack_copy0 (&result->mem_pool, + now->val.str.start, + now->val.str.len); + /* XXX Enter value into table. */ + } + + lr_ignore_rest (cmfile, nowtok == tok_bsymbol); + + state = 98; + continue; + + default: + error (5, 0, _("%s: error in state machine"), __FILE__); + /* NOTREACHED */ + } + break; + } + + if (state != 91) + error (0, 0, _("%s: premature end of file"), cmfile->fname); + + lr_close (cmfile); + + return result; +} diff --git a/locale/programs/charset.c b/locale/programs/charset.c new file mode 100644 index 0000000000..2e2f63bd9a --- /dev/null +++ b/locale/programs/charset.c @@ -0,0 +1,132 @@ +/* Copyright (C) 1996 Free Software Foundation, Inc. +This file is part of the GNU C Library. +Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>. + +The GNU C Library is free software; you can redistribute it and/or +modify it under the terms of the GNU Library General Public License as +published by the Free Software Foundation; either version 2 of the +License, or (at your option) any later version. + +The GNU C Library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Library General Public License for more details. + +You should have received a copy of the GNU Library General Public +License along with the GNU C Library; see the file COPYING.LIB. If +not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <alloca.h> +#include <ctype.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "error.h" +#include "charset.h" + + +static void +insert_char (struct linereader *lr, struct charset_t *cs, int bytes, + unsigned int value, const char *from, const char *to); + + +void +charset_new_char (struct linereader *lr, struct charset_t *cs, int bytes, + unsigned int value, const char *from, const char *to) +{ + if (bytes < cs->mb_cur_min) + lr_error (lr, _("too few bytes in character encoding")); + else if (bytes > cs->mb_cur_max) + lr_error (lr, _("too many bytes in character encoding")); + else + insert_char (lr, cs, bytes, value, from, to); +} + + +void +charset_new_unicode (struct linereader *lr, struct charset_t *cs, int bytes, + unsigned int value, const char *from, const char *to) +{ + /* For now: perhaps <Uxxxx> support will be removed again... */ + insert_char (lr, cs, bytes, value, from, to); +} + + +unsigned int +charset_find_value (const struct charset_t *cs, const char *name, size_t len) +{ + void *result; + + if (find_entry ((hash_table *) &cs->char_table, name, len, &result) < 0) + return ILLEGAL_CHAR_VALUE; + + return (unsigned int) result; +} + + +static void +insert_char (struct linereader *lr, struct charset_t *cs, int bytes, + unsigned int value, const char *from, const char *to) +{ + const char *cp; + char *buf; + int prefix_len, len1, len2; + unsigned int from_nr, to_nr, cnt; + + if (to == NULL) + { + if (insert_entry (&cs->char_table, from, strlen (from), (void *) value) + < 0) + lr_error (lr, _("duplicate character name `%s'"), from); + + return; + } + + /* We have a range: the names must have names with equal prefixes + and an equal number of digits, where the second number is greater + or equal than the first. */ + len1 = strlen (from); + len2 = strlen (to); + + if (len1 != len2) + { + illegal_range: + lr_error (lr, _("illegal names for character range")); + return; + } + + cp = &from[len1 - 1]; + while (isdigit (*cp) && cp >= from) + --cp; + + prefix_len = (cp - from) + 1; + + if (cp == &from[len1 - 1] || strncmp (from, to, prefix_len) != 0) + goto illegal_range; + + from_nr = strtoul (&from[prefix_len], NULL, 10); + to_nr = strtoul (&to[prefix_len], NULL, 10); + + if (from_nr > to_nr) + { + lr_error (lr, _("upper limit in range is not smaller then lower limit")); + return; + } + + buf = alloca (len1 + 1); + memcpy (buf, from, prefix_len); + + for (cnt = from_nr; cnt <= to_nr; ++cnt) + { + sprintf (&buf[prefix_len], "%0d", cnt); + + if (insert_entry (&cs->char_table, buf, len1, (void *) cnt) < 0) + lr_error (lr, _("duplicate character name `%s'"), buf); + } +} diff --git a/locale/programs/charset.h b/locale/programs/charset.h new file mode 100644 index 0000000000..222d468407 --- /dev/null +++ b/locale/programs/charset.h @@ -0,0 +1,61 @@ +/* Copyright (C) 1996 Free Software Foundation, Inc. +This file is part of the GNU C Library. +Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>. + +The GNU C Library is free software; you can redistribute it and/or +modify it under the terms of the GNU Library General Public License as +published by the Free Software Foundation; either version 2 of the +License, or (at your option) any later version. + +The GNU C Library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Library General Public License for more details. + +You should have received a copy of the GNU Library General Public +License along with the GNU C Library; see the file COPYING.LIB. If +not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +#ifndef _CHARSET_H +#define _CHARSET_H + +#include <obstack.h> + +#include "simple-hash.h" +#include "linereader.h" + + +struct charset_t +{ + const char *code_set_name; + int mb_cur_min; + int mb_cur_max; + int width_default; + + struct obstack mem_pool; + hash_table char_table; +}; + + +/* We need one value to mark the error case. Let's use 0xffffffff. + I.e., it is placed in the last page of ISO 10646. For now only the + first is used and we have plenty of room. */ +#define ILLEGAL_CHAR_VALUE 0xffffffffu + + +/* Prototypes for charmap handling functions. */ +struct charset_t *charmap_read (const char *filename); + +/* Prototypes for funciton to insert new character. */ +void charset_new_char (struct linereader *lr, struct charset_t *cs, int bytes, + unsigned int value, const char *from, const char *to); + +void charset_new_unicode (struct linereader *lr, struct charset_t *cs, + int bytes, unsigned int value, const char *from, + const char *to); + +unsigned int charset_find_value (const struct charset_t *__cs, + const char *__name, size_t __len); + +#endif /* charset.h */ diff --git a/locale/programs/config.h b/locale/programs/config.h new file mode 100644 index 0000000000..64054657cb --- /dev/null +++ b/locale/programs/config.h @@ -0,0 +1,33 @@ +#ifndef _LD_CONFIG_H +#define _LD_CONFIG_H + +/* Use the internal textdomain used for libc messages. */ +#define PACKAGE _libc_intl_domainname +#ifndef VERSION +/* Get libc version number. */ +#include "../../version.h" +#endif + +#define DEFAULT_CHARMAP "POSIX" + +#ifndef PARAMS +# if __STDC__ +# define PARAMS(args) args +# else +# define PARAMS(args) () +# endif +#endif + + + +#define HAVE_VPRINTF 1 + + +typedef int wint_t; +typedef unsigned short int u16_t; + + + +int euidaccess (__const char *__name, int __type); + +#endif diff --git a/locale/programs/ctypedump.c b/locale/programs/ctypedump.c new file mode 100644 index 0000000000..2a6753495e --- /dev/null +++ b/locale/programs/ctypedump.c @@ -0,0 +1,163 @@ +/* Copyright (C) 1995, 1996 Free Software Foundation, Inc. + +The GNU C Library is free software; you can redistribute it and/or +modify it under the terms of the GNU Library General Public License as +published by the Free Software Foundation; either version 2 of the +License, or (at your option) any later version. + +The GNU C Library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Library General Public License for more details. + +You should have received a copy of the GNU Library General Public +License along with the GNU C Library; see the file COPYING.LIB. If +not, write to the Free Software Foundation, Inc., 675 Mass Ave, +Cambridge, MA 02139, USA. */ + +#include <ctype.h> +#include <endian.h> +#include <stdlib.h> +#include <stdio.h> +#include <netinet/in.h> /* Just for htons() */ + +/*#include "localedef.h"*/ +#include "localeinfo.h" + + +/* FIXME: these values should be part of the LC_CTYPE information. */ +#define mb_cur_max 1 +#define mb_cur_min 1 + + +#define SWAP32(v) \ + ((u32_t) (((((u32_t) (v)) & 0x000000ff) << 24) \ + | ((((u32_t) (v)) & 0x0000ff00) << 8) \ + | ((((u32_t) (v)) & 0x00ff0000) >> 8) \ + | ((((u32_t) (v)) & 0xff000000) >> 24))) + + + +static inline void +print_short_in_char (unsigned short val) +{ + const unsigned char *p = (const unsigned char *) &val; + printf ("\"\\%03o\\%03o\"", p[0], p[1]); +} + + +static inline void +print_int_in_char (unsigned int val) +{ + const unsigned char *p = (const unsigned char *) &val; + printf ("\"\\%03o\\%03o\\%03o\\%03o\"", p[0], p[1], p[2], p[3]); +} + + +int +ctype_output (void) +{ + int ch; + int result = 0; + const char *locname = (getenv ("LC_ALL") ?: getenv ("LC_CTYPE") ?: + getenv ("LANG") ?: "POSIX"); + + puts ("#include <endian.h>\n"); + + if (mb_cur_max == 1) + { + printf ("const char _nl_%s_LC_CTYPE_class[] = \n", locname); + for (ch = -128; ch < (1 << (8 * MB_CUR_MAX)); ++ch) + { + if (((ch + 128) % 6) == 0) + printf (" /* 0x%02x */ ", ch < 0 ? 256 + ch : ch); + print_short_in_char (htons (__ctype_b [ch < 0 ? 256 + ch : ch])); + fputc (((ch + 128) % 6) == 5 ? '\n' : ' ', stdout); + } + puts (";"); + } + + printf ("#if BYTE_ORDER == %s\n", + BYTE_ORDER == LITTLE_ENDIAN ? "LITTLE_ENDIAN" : "BIG_ENDIAN"); + + if (mb_cur_max == 1) + { + printf ("const char _nl_%s_LC_CTYPE_toupper[] = \n", locname); + for (ch = -128; ch < (1 << (8 * MB_CUR_MAX)); ++ch) + { + if (((ch + 128) % 3) == 0) + printf (" /* 0x%02x */ ", ch < 0 ? 256 + ch : ch); + print_int_in_char (__ctype_toupper[ch < 0 ? 256 + ch : ch]); + fputc (((ch + 128) % 3) == 2 ? '\n' : ' ', stdout); + } + puts (";"); + + printf ("const char _nl_%s_LC_CTYPE_tolower[] = \n", locname); + for (ch = -128; ch < (1 << (8 * MB_CUR_MAX)); ++ch) + { + if (((ch + 128) % 3) == 0) + printf (" /* 0x%02x */ ", ch < 0 ? 256 + ch : ch); + print_int_in_char (__ctype_tolower[ch < 0 ? 256 + ch : ch]); + fputc (((ch + 128) % 3) == 2 ? '\n' : ' ', stdout); + } + puts (";"); + } + else + /* not implemented */; + + printf ("#elif BYTE_ORDER == %s\n", + BYTE_ORDER == LITTLE_ENDIAN ? "BIG_ENDIAN" : "LITTLE_ENDIAN"); + + if (mb_cur_max == 1) + { + printf ("const char _nl_%s_LC_CTYPE_toupper[] = \n", locname); + for (ch = -128; ch < (1 << (8 * MB_CUR_MAX)); ++ch) + { + if (((ch + 128) % 3) == 0) + printf (" /* 0x%02x */ ", ch < 0 ? 256 + ch : ch); + print_int_in_char (SWAP32 (__ctype_toupper[ch < 0 ? 256 + ch : ch])); + fputc (((ch + 128) % 3) == 2 ? '\n' : ' ', stdout); + } + puts (";"); + + printf ("const char _nl_%s_LC_CTYPE_tolower[] = \n", locname); + for (ch = -128; ch < (1 << (8 * MB_CUR_MAX)); ++ch) + { + if (((ch + 128) % 3) == 0) + printf (" /* 0x%02x */ ", ch < 0 ? 256 + ch : ch); + print_int_in_char (SWAP32 (__ctype_tolower[ch < 0 ? 256 + ch : ch])); + fputc (((ch + 128) % 3) == 2 ? '\n' : ' ', stdout); + } + puts (";"); + } + else + /* not implemented */; + + puts ("#else\n#error \"BYTE_ORDER\" BYTE_ORDER \" not handled.\"\n#endif\n"); + + printf("const struct locale_data _nl_%s_LC_CTYPE = \n\ +{\n\ + NULL, 0, /* no file mapped */\n\ + 5,\n\ + {\n\ + _nl_C_LC_CTYPE_class,\n\ +#ifdef BYTE_ORDER == LITTLE_ENDIAN\n\ + NULL, NULL,\n\ +#endif\n\ + _nl_C_LC_CTYPE_toupper,\n\ + _nl_C_LC_CTYPE_tolower,\n\ +#ifdef BYTE_ORDER == BIG_ENDIAN\n\ + NULL, NULL,\n\ +#endif\n\ + }\n\ +};\n", locname); + + return result; +} + +/* + * Local Variables: + * mode:c + * c-basic-offset:2 + * End: + */ diff --git a/locale/programs/ld-collate.c b/locale/programs/ld-collate.c new file mode 100644 index 0000000000..0f3bcbca33 --- /dev/null +++ b/locale/programs/ld-collate.c @@ -0,0 +1,1549 @@ +/* Copyright (C) 1995, 1996 Free Software Foundation, Inc. +This file is part of the GNU C Library. +Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>. + +The GNU C Library is free software; you can redistribute it and/or +modify it under the terms of the GNU Library General Public License as +published by the Free Software Foundation; either version 2 of the +License, or (at your option) any later version. + +The GNU C Library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Library General Public License for more details. + +You should have received a copy of the GNU Library General Public +License along with the GNU C Library; see the file COPYING.LIB. If +not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <endian.h> +#include <errno.h> +#include <limits.h> +#include <locale.h> +#include <obstack.h> +#include <stdlib.h> +#include <string.h> +#include <wcstr.h> + +#include "localeinfo.h" +#include "locales.h" +#include "simple-hash.h" +#include "stringtrans.h" + +/* Uncomment the following line in the production version. */ +/* define NDEBUG 1 */ +#include <assert.h> + + +#define MAX(a, b) ((a) > (b) ? (a) : (b)) + +#define SWAPU32(w) \ + (((w) << 24) | (((w) & 0xff00) << 8) | (((w) >> 8) & 0xff00) | ((w) >> 24)) + + +/* What kind of symbols get defined? */ +enum coll_symbol +{ + undefined, + ellipsis, + character, + element, + symbol +}; + + +typedef struct patch_t +{ + const char *fname; + size_t lineno; + const char *token; + union + { + unsigned int *pos; + size_t idx; + } where; + struct patch_t *next; +} patch_t; + + +typedef struct element_t +{ + const wchar_t *name; + unsigned int this_weight; + + struct element_t *next; + + unsigned int *ordering; + size_t ordering_len; +} element_t; + + +/* The real definition of the struct for the LC_CTYPE locale. */ +struct locale_collate_t +{ + /* Collate symbol table. Simple mapping to number. */ + hash_table symbols; + + /* The collation elements. */ + hash_table elements; + struct obstack element_mem; + + /* The result table. */ + hash_table result; + + /* Sorting rules given in order_start line. */ + int nrules; + int nrules_max; + enum coll_sort_rule *rules; + + /* Used while recognizing symbol composed of multiple tokens + (collating-element). */ + const char *combine_token; + size_t combine_token_len; + + /* How many sorting order specifications so far. */ + unsigned int order_cnt; + + /* Was lastline ellipsis? */ + int was_ellipsis; + /* Value of last entry if was character. */ + wchar_t last_char; + /* Current element. */ + element_t *current_element; + /* What kind of symbol is current element. */ + enum coll_symbol kind; + + /* While collecting the weigths we need some temporary space. */ + unsigned int current_order; + int *weight_cnt; + int weight_idx; + unsigned int *weight; + int nweight; + int nweight_max; + + /* Patch lists. */ + patch_t *current_patch; + patch_t *all_patches; + + /* Room for the UNDEFINED information. */ + element_t undefined; + unsigned int undefined_len; +}; + + +/* Be verbose? Defined in localedef.c. */ +extern int verbose; + + +void *xmalloc (size_t __n); +void *xrealloc (void *__p, size_t __n); + + +#define obstack_chunk_alloc xmalloc +#define obstack_chunk_free free + + +void +collate_startup (struct linereader *lr, struct localedef_t *locale, + struct charset_t *charset) +{ + struct locale_collate_t *collate; + + /* It is important that we always use UCS4 encoding for strings now. */ + encoding_method = ENC_UCS4; + + /* Allocate the needed room. */ + locale->categories[LC_COLLATE].collate = collate = + (struct locale_collate_t *) xmalloc (sizeof (struct locale_collate_t)); + + /* Allocate hash table for collating elements. */ + if (init_hash (&collate->elements, 512)) + error (4, 0, _("memory exhausted")); + collate->combine_token = NULL; + obstack_init (&collate->element_mem); + + /* Allocate hash table for collating elements. */ + if (init_hash (&collate->symbols, 64)) + error (4, 0, _("memory exhausted")); + + /* Allocate hash table for result. */ + if (init_hash (&collate->result, 512)) + error (4, 0, _("memory exhausted")); + + collate->nrules = 0; + collate->nrules_max = 10; + collate->rules + = (enum coll_sort_rule *) xmalloc (collate->nrules_max + * sizeof (enum coll_sort_rule)); + + collate->order_cnt = 1; /* The smallest weight is 2. */ + + collate->was_ellipsis = 0; + collate->last_char = L'\0'; /* 0 because leading ellipsis is allowed. */ + + collate->all_patches = NULL; + + /* This tells us no UNDEFINED entry was found until now. */ + collate->undefined.this_weight = 0; + + lr->translate_strings = 0; +} + + +void +collate_finish (struct localedef_t *locale, struct charset_t *charset) +{ + struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate; + patch_t *patch; + size_t cnt; + + /* Patch the constructed table so that forward references are + correctly filled. */ + for (patch = collate->all_patches; patch != NULL; patch = patch->next) + { + wchar_t wch; + size_t toklen = strlen (patch->token); + void *ptmp; + unsigned int value = 0; + + wch = charset_find_value (charset, patch->token, toklen); + if (wch != ILLEGAL_CHAR_VALUE) + { + element_t *runp; + + if (find_entry (&collate->result, &wch, sizeof (wchar_t), + (void *) &runp) < 0) + runp = NULL; + for (; runp != NULL; runp = runp->next) + if (runp->name[0] == wch && runp->name[1] == L'\0') + break; + + value = runp == NULL ? 0 : runp->this_weight; + } + else if (find_entry (&collate->elements, patch->token, toklen, &ptmp) + >= 0) + { + value = ((element_t *) ptmp)->this_weight; + } + else if (find_entry (&collate->symbols, patch->token, toklen, &ptmp) + >= 0) + { + value = (unsigned int) ptmp; + } + else + value = 0; + + if (value == 0) + error_with_loc (0, 0, patch->fname, patch->lineno, + _("no weight defined for symbol `%s'"), patch->token); + else + *patch->where.pos = value; + } + + /* If no definition for UNDEFINED is given, all characters in the + given charset must be specified. */ + if (collate->undefined.ordering == NULL) + { + /**************************************************************\ + |* XXX We should test whether really an unspecified character *| + |* exists before giving the message. *| + \**************************************************************/ + u32_t weight; + + error (0, 0, _("no definition of `UNDEFINED'")); + + collate->undefined.ordering_len = collate->nrules; + weight = ++collate->order_cnt; + + for (cnt = 0; cnt < collate->nrules; ++cnt) + { + u32_t one = 1; + obstack_grow (&collate->element_mem, &one, sizeof (one)); + } + + for (cnt = 0; cnt < collate->nrules; ++cnt) + obstack_grow (&collate->element_mem, &weight, sizeof (weight)); + + collate->undefined.ordering = obstack_finish (&collate->element_mem); + } + + collate->undefined_len = 2; /* For the name: 1 x wchar_t + L'\0'. */ + for (cnt = 0; cnt < collate->nrules; ++cnt) + collate->undefined_len += 1 + collate->undefined.ordering[cnt]; + + /* Collating symbols are not used anymore. */ + (void) delete_hash (&collate->symbols); +} + + + +void +collate_output (struct localedef_t *locale, const char *output_path) +{ + struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate; + u32_t table_size, table_best, level_best, sum_best; + void *last; + element_t *pelem; + wchar_t *name; + size_t len; + const size_t nelems = _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE); + struct iovec iov[2 + nelems]; + struct locale_file data; + u32_t idx[nelems]; + struct obstack non_simple; + size_t cnt, entry_size; + u32_t undefined_offset = UINT_MAX; + u32_t *table, *extra, *table2, *extra2; + size_t extra_len; + + sum_best = UINT_MAX; + table_best = 0xffff; + level_best = 0xffff; + + /* Compute table size. */ + fputs (_("\ +Computing table size for collation information might take a while..."), + stderr); + for (table_size = 256; table_size < sum_best; ++table_size) + { + size_t hits[table_size]; + unsigned int worst = 1; + size_t cnt; + + last = NULL; + + for (cnt = 0; cnt < 256; ++cnt) + hits[cnt] = 1; + memset (&hits[256], '\0', sizeof (hits) - 256 * sizeof (size_t)); + + while (iterate_table (&collate->result, &last, (const void **) &name, + &len, (void **) &pelem) >= 0) + if (pelem->ordering != NULL && pelem->name[0] > 0xff) + if (++hits[(unsigned int) pelem->name[0] % table_size] > worst) + { + worst = hits[(unsigned int) pelem->name[0] % table_size]; + if (table_size * worst > sum_best) + break; + } + + if (table_size * worst < sum_best) + { + sum_best = table_size * worst; + table_best = table_size; + level_best = worst; + } + } + assert (table_best != 0xffff || level_best != 0xffff); + fputs (_(" done\n"), stderr); + + obstack_init (&non_simple); + + data.magic = LIMAGIC (LC_COLLATE); + data.n = nelems; + iov[0].iov_base = (void *) &data; + iov[0].iov_len = sizeof (data); + + iov[1].iov_base = (void *) idx; + iov[1].iov_len = sizeof (idx); + + iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_NRULES)].iov_base = &collate->nrules; + iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_NRULES)].iov_len = sizeof (u32_t); + + table = (u32_t *) alloca (collate->nrules * sizeof (u32_t)); + iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_RULES)].iov_base = table; + iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_RULES)].iov_len + = collate->nrules * sizeof (u32_t); + /* Another trick here. Describing the collation method needs only a + few bits (3, to be exact). But the binary file should be + accessible by maschines with both endianesses and so we store both + information in the same word. */ + for (cnt = 0; cnt < collate->nrules; ++cnt) + table[cnt] = collate->rules[cnt] | SWAPU32 (collate->rules[cnt]); + + iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_SIZE)].iov_base = &table_best; + iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_SIZE)].iov_len = sizeof (u32_t); + + iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_LAYERS)].iov_base = &level_best; + iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_LAYERS)].iov_len = sizeof (u32_t); + + entry_size = 1 + MAX (collate->nrules, 2); + + table = (u32_t *) alloca (table_best * level_best * entry_size + * sizeof (table[0])); + memset (table, '\0', table_best * level_best * entry_size + * sizeof (table[0])); + + + /* Macros for inserting in output table. */ +#define ADD_VALUE(expr) \ + do { \ + u32_t to_write = (u32_t) expr; \ + obstack_grow (&non_simple, &to_write, sizeof (to_write)); \ + } while (0) + +#define ADD_ELEMENT(pelem, len) \ + do { \ + size_t cnt, idx; \ + \ + ADD_VALUE (len); \ + \ + wlen = wcslen (pelem->name); \ + obstack_grow (&non_simple, pelem->name, (wlen + 1) * sizeof (u32_t)); \ + \ + idx = collate->nrules; \ + for (cnt = 0; cnt < collate->nrules; ++cnt) \ + { \ + size_t disp; \ + \ + ADD_VALUE (pelem->ordering[cnt]); \ + for (disp = 0; disp < pelem->ordering[cnt]; ++disp) \ + ADD_VALUE (pelem->ordering[idx++]); \ + } \ + } while (0) + +#define ADD_FORWARD(pelem) \ + do { \ + /* We leave a reference in the main table and put all \ + information in the table for the extended entries. */ \ + element_t *runp; \ + element_t *has_simple = NULL; \ + size_t wlen; \ + \ + table[(level * table_best + slot) * entry_size + 1] \ + = FORWARD_CHAR; \ + table[(level * table_best + slot) * entry_size + 2] \ + = obstack_object_size (&non_simple) / sizeof (u32_t); \ + \ + /* Here we have to construct the non-simple table entry. First \ + compute the total length of this entry. */ \ + for (runp = (pelem); runp != NULL; runp = runp->next) \ + if (runp->ordering != NULL) \ + { \ + u32_t value; \ + size_t cnt; \ + \ + value = 1 + wcslen (runp->name) + 1; \ + \ + for (cnt = 0; cnt < collate->nrules; ++cnt) \ + /* We have to take care for entries without ordering \ + information. While reading them they get inserted in the \ + table and later not removed when something goes wrong with \ + reading its weights. */ \ + { \ + value += 1 + runp->ordering[cnt]; \ + \ + if (runp->name[1] == L'\0') \ + has_simple = runp; \ + } \ + \ + ADD_ELEMENT (runp, value); \ + } \ + \ + if (has_simple == NULL) \ + { \ + size_t idx, cnt; \ + \ + ADD_VALUE (collate->undefined_len + 1); \ + \ + /* Add the name. */ \ + ADD_VALUE ((pelem)->name[0]); \ + ADD_VALUE (0); \ + \ + idx = collate->nrules; \ + for (cnt = 0; cnt < collate->nrules; ++cnt) \ + { \ + size_t disp; \ + \ + ADD_VALUE (collate->undefined.ordering[cnt]); \ + for (disp = 0; disp < collate->undefined.ordering[cnt]; ++disp) \ + { \ + if (collate->undefined.ordering[idx] == ELLIPSIS_CHAR) \ + ADD_VALUE ((pelem)->name[0]); \ + else \ + ADD_VALUE (collate->undefined.ordering[idx++]); \ + ++idx; \ + } \ + } \ + } \ + } while (0) + + + + /* Fill the table now. First we look for all the characters which + fit into one single byte. This speeds up the 8-bit string + functions. */ + last = NULL; + while (iterate_table (&collate->result, &last, (const void **) &name, + &len, (void **) &pelem) >= 0) + if (pelem->name[0] <= 0xff) + { + /* We have a single byte name. Now we must distinguish + between entries in simple form (i.e., only one value per + weight and no collation element starting with the same + character) and those which are not. */ + size_t slot = ((size_t) pelem->name[0]); + const size_t level = 0; + + table[slot * entry_size] = pelem->name[0]; + + if (pelem->name[1] == L'\0' && pelem->next == NULL + && pelem->ordering_len == collate->nrules) + { + /* Yes, we have a simple one. Lucky us. */ + size_t cnt; + + for (cnt = 0; cnt < collate->nrules; ++cnt) + table[slot * entry_size + 1 + cnt] + = pelem->ordering[collate->nrules + cnt]; + } + else + ADD_FORWARD (pelem); + } + + /* Now check for missing single byte entries. If one exist we fill + with the UNDEFINED entry. */ + for (cnt = 0; cnt < 256; ++cnt) + /* The first weight is never 0 for existing entries. */ + if (table[cnt * entry_size + 1] == 0) + { + /* We have to fill in the information from the UNDEFINED + entry. */ + table[cnt * entry_size] = (u32_t) cnt; + + if (collate->undefined.ordering_len == collate->nrules) + { + size_t inner; + + for (inner = 0; inner < collate->nrules; ++inner) + if (collate->undefined.ordering[collate->nrules + inner] + == ELLIPSIS_CHAR) + table[cnt * entry_size + 1 + inner] = cnt; + else + table[cnt * entry_size + 1 + inner] + = collate->undefined.ordering[collate->nrules + inner]; + } + else + { + if (undefined_offset != UINT_MAX) + { + table[cnt * entry_size + 1] = FORWARD_CHAR; + table[cnt * entry_size + 2] = undefined_offset; + } + else + { + const size_t slot = cnt; + const size_t level = 0; + + ADD_FORWARD (&collate->undefined); + undefined_offset = table[cnt * entry_size + 2]; + } + } + } + + /* Now we are ready for inserting the whole rest. */ + last = NULL; + while (iterate_table (&collate->result, &last, (const void **) &name, + &len, (void **) &pelem) >= 0) + if (pelem->name[0] > 0xff) + { + /* Find the position. */ + size_t slot = ((size_t) pelem->name[0]) % table_best; + size_t level = 0; + + while (table[(level * table_best + slot) * entry_size + 1] != 0) + ++level; + assert (level < level_best); + + if (pelem->name[1] == L'\0' && pelem->next == NULL + && pelem->ordering_len == collate->nrules) + { + /* Again a simple entry. */ + size_t inner; + + for (inner = 0; inner < collate->nrules; ++inner) + table[(level * table_best + slot) * entry_size + 1 + inner] + = pelem->ordering[collate->nrules + inner]; + } + else + ADD_FORWARD (pelem); + } + + /* Add the UNDEFINED entry. */ + { + /* Here we have to construct the non-simple table entry. */ + size_t idx, cnt; + + undefined_offset = obstack_object_size (&non_simple); + + idx = collate->nrules; + for (cnt = 0; cnt < collate->nrules; ++cnt) + { + size_t disp; + + ADD_VALUE (collate->undefined.ordering[cnt]); + for (disp = 0; disp < collate->undefined.ordering[cnt]; ++disp) + ADD_VALUE (collate->undefined.ordering[idx++]); + } + } + + /* Finish the extra block. */ + extra_len = obstack_object_size (&non_simple); + extra = (u32_t *) obstack_finish (&non_simple); + assert ((extra_len % sizeof (u32_t)) == 0); + + /* Now we have to build the two array for the other byte ordering. */ + table2 = (u32_t *) alloca (table_best * level_best * entry_size + * sizeof (table[0])); + extra2 = (u32_t *) alloca (extra_len); + + for (cnt = 0; cnt < table_best * level_best * entry_size; ++cnt) + table2[cnt] = SWAPU32 (table[cnt]); + + for (cnt = 0; cnt < extra_len / sizeof (u32_t); ++cnt) + extra2[cnt] = SWAPU32 (extra2[cnt]); + + /* Store table adresses and lengths. */ +#if __BYTE_ORDER == __BIG_ENDIAN + iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB)].iov_base = table; + iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB)].iov_len + = table_best * level_best * entry_size * sizeof (table[0]); + + iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL)].iov_base = table2; + iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL)].iov_len + = table_best * level_best * entry_size * sizeof (table[0]); + + iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB)].iov_base = extra; + iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB)].iov_len = extra_len; + + iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL)].iov_base = extra2; + iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL)].iov_len = extra_len; +#else + iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB)].iov_base = table2; + iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB)].iov_len + = table_best * level_best * entry_size * sizeof (table[0]); + + iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL)].iov_base = table; + iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL)].iov_len + = table_best * level_best * entry_size * sizeof (table[0]); + + iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB)].iov_base = extra2; + iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB)].iov_len = extra_len; + + iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL)].iov_base = extra; + iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL)].iov_len = extra_len; +#endif + + iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_UNDEFINED)].iov_base = &undefined_offset; + iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_UNDEFINED)].iov_len = sizeof (u32_t); + + /* Update idx array. */ + idx[0] = iov[0].iov_len + iov[1].iov_len; + for (cnt = 1; cnt < nelems; ++cnt) + idx[cnt] = idx[cnt - 1] + iov[1 + cnt].iov_len; + + write_locale_data (output_path, "LC_COLLATE", 2 + nelems, iov); +} + + +void +collate_element_to (struct linereader *lr, struct localedef_t *locale, + struct token *code, struct charset_t *charset) +{ + struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate; + unsigned int value; + void *not_used; + + if (collate->combine_token != NULL) + { + free ((void *) collate->combine_token); + collate->combine_token = NULL; + } + + value = charset_find_value (charset, code->val.str.start, code->val.str.len); + if (value != ILLEGAL_CHAR_VALUE) + { + lr_error (lr, _("symbol for multicharacter collating element " + "`%.*s' duplicates symbolic name in charset"), + code->val.str.len, code->val.str.start); + return; + } + + if (find_entry (&collate->elements, code->val.str.start, code->val.str.len, + ¬_used) >= 0) + { + lr_error (lr, _("symbol for multicharacter collating element " + "`%.*s' duplicates other element definition"), + code->val.str.len, code->val.str.start); + return; + } + + if (find_entry (&collate->elements, code->val.str.start, code->val.str.len, + ¬_used) >= 0) + { + lr_error (lr, _("symbol for multicharacter collating element " + "`%.*s' duplicates symbol definition"), + code->val.str.len, code->val.str.start); + return; + } + + collate->combine_token = code->val.str.start; + collate->combine_token_len = code->val.str.len; +} + + +void +collate_element_from (struct linereader *lr, struct localedef_t *locale, + struct token *code, struct charset_t *charset) +{ + struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate; + element_t *elemp, *runp; + + /* CODE is a string. */ + elemp = (element_t *) obstack_alloc (&collate->element_mem, + sizeof (element_t)); + + /* We have to translate the string. It may contain <...> character + names. */ + elemp->name = (wchar_t *) translate_string (code->val.str.start, charset); + elemp->this_weight = 0; + elemp->ordering = NULL; + elemp->ordering_len = 0; + + free (code->val.str.start); + + if (elemp->name == NULL) + { + /* At least one character in the string is not defined. We simply + do nothing. */ + if (verbose) + lr_error (lr, _("\ +`from' string in collation element declaration contains unknown character")); + return; + } + + if (elemp->name[0] == L'\0' || elemp->name[1] == L'\0') + { + lr_error (lr, _("illegal colltion element")); + return; + } + + /* The entries in the linked lists of RESULT are sorting in + descending order. The order is important for the `strcoll' and + `wcscoll' functions. */ + if (find_entry (&collate->result, elemp->name, sizeof (wchar_t), + (void *) &runp) >= 0) + { + /* We already have an entry with this key. Check whether it is + identical. */ + element_t *prevp = NULL; + int cmpres; + + do + { + cmpres = wcscmp (elemp->name, runp->name); + if (cmpres <= 0) + break; + prevp = runp; + } + while ((runp = runp->next) != NULL); + + if (cmpres == 0) + lr_error (lr, _("duplicate collating element definition")); + else + { + elemp->next = runp; + if (prevp == NULL) + { + if (set_entry (&collate->result, elemp->name, sizeof (wchar_t), + elemp) < 0) + error (EXIT_FAILURE, 0, + _("\ +error while inserting collation element into hash table")); + } + else + prevp->next = elemp; + } + } + else + { + elemp->next = NULL; + if (insert_entry (&collate->result, elemp->name, sizeof (wchar_t), elemp) + < 0) + error (EXIT_FAILURE, errno, _("error while inserting to hash table")); + } + + if (insert_entry (&collate->elements, collate->combine_token, + collate->combine_token_len, (void *) elemp) < 0) + lr_error (lr, _("cannot insert new collating symbol definition: %s"), + strerror (errno)); +} + + +void +collate_symbol (struct linereader *lr, struct localedef_t *locale, + struct token *code, struct charset_t *charset) +{ + struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate; + wchar_t value; + void *not_used; + + value = charset_find_value (charset, code->val.str.start, code->val.str.len); + if (value != ILLEGAL_CHAR_VALUE) + { + lr_error (lr, _("symbol for multicharacter collating element " + "`%.*s' duplicates symbolic name in charset"), + code->val.str.len, code->val.str.start); + return; + } + + if (find_entry (&collate->elements, code->val.str.start, code->val.str.len, + ¬_used) >= 0) + { + lr_error (lr, _("symbol for multicharacter collating element " + "`%.*s' duplicates element definition"), + code->val.str.len, code->val.str.start); + return; + } + + if (find_entry (&collate->symbols, code->val.str.start, code->val.str.len, + ¬_used) >= 0) + { + lr_error (lr, _("symbol for multicharacter collating element " + "`%.*s' duplicates other symbol definition"), + code->val.str.len, code->val.str.start); + return; + } + + if (insert_entry (&collate->symbols, code->val.str.start, code->val.str.len, + (void *) 0) < 0) + lr_error (lr, _("cannot insert new collating symbol definition: %s"), + strerror (errno)); +} + + +void +collate_new_order (struct linereader *lr, struct localedef_t *locale, + enum coll_sort_rule sort_rule) +{ + struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate; + + if (collate->nrules >= collate->nrules_max) + { + collate->nrules_max *= 2; + collate->rules + = (enum coll_sort_rule *) xrealloc (collate->rules, + collate->nrules_max + * sizeof (enum coll_sort_rule)); + } + + collate->rules[collate->nrules++] = sort_rule; +} + + +void +collate_build_arrays (struct linereader *lr, struct localedef_t *locale) +{ + struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate; + + collate->rules + = (enum coll_sort_rule *) xrealloc (collate->rules, + collate->nrules + * sizeof (enum coll_sort_rule)); + + /* Allocate arrays for temporary weights. */ + collate->weight_cnt = (int *) xmalloc (collate->nrules * sizeof (int)); + + /* Choose arbitrary start value for table size. */ + collate->nweight_max = 5 * collate->nrules; + collate->weight = (int *) xmalloc (collate->nweight_max * sizeof (int)); +} + + +int +collate_order_elem (struct linereader *lr, struct localedef_t *locale, + struct token *code, struct charset_t *charset) +{ + const wchar_t zero = L'\0'; + struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate; + int result = 0; + wchar_t value; + void *tmp; + int i; + + switch (code->tok) + { + case tok_bsymbol: + /* We have a string to find in one of the three hashing tables. */ + value = charset_find_value (charset, code->val.str.start, + code->val.str.len); + if (value != ILLEGAL_CHAR_VALUE) + { + element_t *lastp, *firstp; + + collate->kind = character; + + if (find_entry (&collate->result, &value, sizeof (wchar_t), + (void *) &firstp) < 0) + firstp = lastp = NULL; + else + { + /* The entry for the simple character is always found at + the end. */ + lastp = firstp; + while (lastp->next != NULL) + lastp = lastp->next; + + if (lastp->name[0] == value && lastp->name[1] == L'\0') + { + lr_error (lr, _("duplicate definition for character `%.*s'"), + code->val.str.len, code->val.str.start); + lr_ignore_rest (lr, 0); + result = -1; + break; + } + } + + collate->current_element + = (element_t *) obstack_alloc (&collate->element_mem, + sizeof (element_t)); + + obstack_grow (&collate->element_mem, &value, sizeof (value)); + obstack_grow (&collate->element_mem, &zero, sizeof (zero)); + + collate->current_element->name = + (const wchar_t *) obstack_finish (&collate->element_mem); + + collate->current_element->this_weight = ++collate->order_cnt; + + collate->current_element->next = NULL; + + if (firstp == NULL) + { + if (insert_entry (&collate->result, &value, sizeof (wchar_t), + (void *) collate->current_element) < 0) + { + lr_error (lr, _("cannot insert collation element `%.*s'"), + code->val.str.len, code->val.str.start); + exit (4); + } + } + else + lastp->next = collate->current_element; + } + else if (find_entry (&collate->elements, code->val.str.start, + code->val.str.len, &tmp) >= 0) + { + collate->current_element = (element_t *) tmp; + + if (collate->current_element->this_weight != 0) + { + lr_error (lr, _("\ +collation element `%.*s' appears more than once: ignore line"), + code->val.str.len, code->val.str.start); + lr_ignore_rest (lr, 0); + result = -1; + break; + } + + collate->kind = element; + collate->current_element->this_weight = ++collate->order_cnt; + } + else if (find_entry (&collate->symbols, code->val.str.start, + code->val.str.len, &tmp) >= 0) + { + unsigned int order = ++collate->order_cnt; + + if ((unsigned int) tmp != 0) + { + lr_error (lr, _("\ +collation symbol `.*s' appears more than once: ignore line"), + code->val.str.len, code->val.str.start); + lr_ignore_rest (lr, 0); + result = -1; + break; + } + + collate->kind = symbol; + + if (set_entry (&collate->symbols, code->val.str.start, + code->val.str.len, (void *) order) < 0) + { + lr_error (lr, _("cannot process order specification")); + exit (4); + } + } + else + { + if (verbose) + lr_error (lr, _("unknown symbol `%.*s': line ignored"), + code->val.str.len, code->val.str.start); + lr_ignore_rest (lr, 0); + + result = -1; + } + break; + + case tok_undefined: + collate->kind = undefined; + collate->current_element = &collate->undefined; + break; + + case tok_ellipsis: + if (collate->was_ellipsis) + { + lr_error (lr, _("\ +two lines in a row containing `...' are not allowed")); + result = -1; + } + else if (collate->kind != character) + { + /* An ellipsis requires the previous line to be an + character definition. */ + lr_error (lr, _("\ +line before ellipsis does not contain definition for character constant")); + lr_ignore_rest (lr, 0); + result = -1; + } + else + collate->kind = ellipsis; + break; + + default: + assert (! "illegal token in `collate_order_elem'"); + } + + /* Now it's time to handle the ellipsis in the previous line. We do + this only when the last line contained an definition for an + character, the current line also defines an character, the + character code for the later is bigger than the former. */ + if (collate->was_ellipsis) + { + if (collate->kind != character) + { + lr_error (lr, _("\ +line after ellipsis must contain character definition")); + lr_ignore_rest (lr, 0); + result = -1; + } + else if (collate->last_char > value) + { + lr_error (lr, _("end point of ellipsis range is bigger then start")); + lr_ignore_rest (lr, 0); + result = -1; + } + else + { + /* We can fill the arrays with the information we need. */ + wchar_t name[2]; + unsigned int *data; + size_t *ptr; + size_t cnt; + + name[0] = collate->last_char + 1; + name[1] = L'\0'; + + data = (unsigned int *) alloca ((collate->nrules + collate->nweight) + * sizeof (unsigned int)); + ptr = (size_t *) alloca (collate->nrules * sizeof (size_t)); + + if (data == NULL || ptr == NULL) + error (4, 0, _("memory exhausted")); + + /* Prepare data. Because the characters covered by an + ellipsis all have equal values we prepare the data once + and only change the variable number (if there are any). + PTR[...] will point to the entries which will have to be + fixed during the output loop. */ + for (cnt = 0; cnt < collate->nrules; ++cnt) + { + data[cnt] = collate->weight_cnt[cnt]; + ptr[cnt] = (cnt == 0 + ? collate->nweight + : ptr[cnt - 1] + collate->weight_cnt[cnt - 1]); + } + + for (cnt = 0; cnt < collate->nweight; ++cnt) + data[collate->nrules + cnt] = collate->weight[cnt]; + + for (cnt = 0; cnt < collate->nrules; ++cnt) + if (data[ptr[cnt]] != ELLIPSIS_CHAR) + ptr[cnt] = 0; + + while (name[0] <= value) + { + element_t *pelem; + + pelem = (element_t *) obstack_alloc (&collate->element_mem, + sizeof (element_t)); + if (pelem == NULL) + error (4, 0, _("memory exhausted")); + + pelem->name + = (const wchar_t *) obstack_copy (&collate->element_mem, + name, 2 * sizeof (wchar_t)); + pelem->this_weight = ++collate->order_cnt; + + pelem->ordering_len = collate->nweight; + pelem->ordering + = (unsigned int *) obstack_copy (&collate->element_mem, data, + (collate->nrules + * pelem->ordering_len) + * sizeof (unsigned int)); + + /* `...' weights need to be adjusted. */ + for (cnt = 0; cnt < collate->nrules; ++cnt) + if (ptr[cnt] != 0) + pelem->ordering[ptr[cnt]] = pelem->this_weight; + + /* Insert new entry into result table. */ + if (find_entry (&collate->result, name, sizeof (wchar_t), + (void *) &pelem->next) >= 0) + { + if (set_entry (&collate->result, name, sizeof (wchar_t), + (void *) pelem->next) < 0) + error (4, 0, _("cannot insert into result table")); + } + else + if (insert_entry (&collate->result, name, sizeof (wchar_t), + (void *) pelem->next) < 0) + error (4, 0, _("cannot insert into result table")); + + /* Increment counter. */ + ++name[0]; + } + } + } + + /* Reset counters for weights. */ + collate->weight_idx = 0; + collate->nweight = 0; + for (i = 0; i < collate->nrules; ++i) + collate->weight_cnt[i] = 0; + collate->current_patch = NULL; + + return result; +} + + +int +collate_weight_bsymbol (struct linereader *lr, struct localedef_t *locale, + struct token *code, struct charset_t *charset) +{ + struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate; + unsigned int here_weight; + wchar_t value; + void *tmp; + + assert (code->tok == tok_bsymbol); + + value = charset_find_value (charset, code->val.str.start, code->val.str.len); + if (value != ILLEGAL_CHAR_VALUE) + { + element_t *runp; + + if (find_entry (&collate->result, &value, sizeof (wchar_t), + (void *)&runp) < 0) + runp = NULL; + + while (runp != NULL + && (runp->name[0] != value || runp->name[1] != L'\0')) + runp = runp->next; + + here_weight = runp == NULL ? 0 : runp->this_weight; + } + else if (find_entry (&collate->elements, code->val.str.start, + code->val.str.len, &tmp) >= 0) + { + element_t *runp = (element_t *) tmp; + + here_weight = runp->this_weight; + } + else if (find_entry (&collate->symbols, code->val.str.start, + code->val.str.len, &tmp) >= 0) + { + here_weight = (unsigned int) tmp; + } + else + { + if (verbose) + lr_error (lr, _("unknown symbol `%.*s': line ignored"), + code->val.str.len, code->val.str.start); + lr_ignore_rest (lr, 0); + return -1; + } + + /* When we currently work on a collation symbol we do not expect any + weight. */ + if (collate->kind == symbol) + { + lr_error (lr, _("\ +specification of sorting weight for collation symbol does not make sense")); + lr_ignore_rest (lr, 0); + return -1; + } + + /* Add to the current collection of weights. */ + if (collate->nweight >= collate->nweight_max) + { + collate->nweight_max *= 2; + collate->weight = (unsigned int *) xrealloc (collate->weight, + collate->nweight_max); + } + + /* If the weight is currently not known, we remember to patch the + resulting tables. */ + if (here_weight == 0) + { + patch_t *newp; + + newp = (patch_t *) obstack_alloc (&collate->element_mem, + sizeof (patch_t)); + newp->fname = lr->fname; + newp->lineno = lr->lineno; + newp->token = (const char *) obstack_copy0 (&collate->element_mem, + code->val.str.start, + code->val.str.len); + newp->where.idx = collate->nweight++; + newp->next = collate->current_patch; + collate->current_patch = newp; + } + else + collate->weight[collate->nweight++] = here_weight; + ++collate->weight_cnt[collate->weight_idx]; + + return 0; +} + + +int +collate_next_weight (struct linereader *lr, struct localedef_t *locale) +{ + struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate; + + if (collate->kind == symbol) + { + lr_error (lr, _("\ +specification of sorting weight for collation symbol does not make sense")); + lr_ignore_rest (lr, 0); + return -1; + } + + ++collate->weight_idx; + if (collate->weight_idx >= collate->nrules) + { + lr_error (lr, _("too many weights")); + lr_ignore_rest (lr, 0); + return -1; + } + + return 0; +} + + +int +collate_simple_weight (struct linereader *lr, struct localedef_t *locale, + struct token *code, struct charset_t *charset) +{ + struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate; + unsigned int value = 0; + + /* There current tokens can be `IGNORE', `...', or a string. */ + switch (code->tok) + { + case tok_ignore: + /* This token is allowed in all situations. */ + value = IGNORE_CHAR; + break; + + case tok_ellipsis: + /* The ellipsis is only allowed for the `...' or `UNDEFINED' + entry. */ + if (collate->kind != ellipsis && collate->kind != undefined) + { + lr_error (lr, _("\ +`...' must only be used in `...' and `UNDEFINED' entries")); + lr_ignore_rest (lr, 0); + return -1; + } + value = ELLIPSIS_CHAR; + break; + + case tok_string: + /* This can become difficult. We have to get the weights which + correspind the the single wide chars in the string. But some + of the `chars' might not be real characters, but collation + elements or symbols. And so the string decoder might have + signaled errors. The string at this point is not translated. + I.e., all <...> sequences are still there. */ + { + char *runp = code->val.str.start; + void *tmp; + + while (*runp != '\0') + { + char *startp = (char *) runp; + char *putp = (char *) runp; + wchar_t wch; + + /* Lookup weight for char and store it. */ + if (*runp == '<') + { + while (*++runp != '\0' && *runp != '>') + { + if (*runp == lr->escape_char) + if (*++runp == '\0') + { + lr_error (lr, _("unterminated weight name")); + lr_ignore_rest (lr, 0); + return -1; + } + *putp++ = *runp; + } + if (*runp == '>') + ++runp; + + if (putp == startp) + { + lr_error (lr, _("empty weight name: line ignored")); + lr_ignore_rest (lr, 0); + return -1; + } + + wch = charset_find_value (charset, startp, putp - startp); + if (wch != ILLEGAL_CHAR_VALUE) + { + element_t *pelem; + + if (find_entry (&collate->result, &wch, sizeof (wchar_t), + (void *)&pelem) < 0) + pelem = NULL; + + while (pelem != NULL + && (pelem->name[0] != wch + || pelem->name[1] != L'\0')) + pelem = pelem->next; + + value = pelem == NULL ? 0 : pelem->this_weight; + } + else if (find_entry (&collate->elements, startp, putp - startp, + &tmp) >= 0) + { + element_t *pelem = (element_t *) tmp; + + value = pelem->this_weight; + } + else if (find_entry (&collate->symbols, startp, putp - startp, + &tmp) >= 0) + { + value = (unsigned int) tmp; + } + else + { + if (verbose) + lr_error (lr, _("unknown symbol `%.*s': line ignored"), + putp - startp, startp); + lr_ignore_rest (lr, 0); + return -1; + } + } + else + { + element_t *wp; + wchar_t wch; + + if (*runp == lr->escape_char) + { + static char digits[] = "0123456789abcdef"; + char *dp; + int base; + + ++runp; + if (tolower (*runp) == 'x') + { + ++runp; + base = 16; + } + else if (tolower (*runp) == 'd') + { + ++runp; + base = 10; + } + else + base = 8; + + dp = strchr (digits, tolower (*runp)); + if (dp == NULL || (dp - digits) >= base) + { + illegal_char: + lr_error (lr, _("\ +illegal character constant in string")); + lr_ignore_rest (lr, 0); + return -1; + } + wch = dp - digits; + ++runp; + + dp = strchr (digits, tolower (*runp)); + if (dp == NULL || (dp - digits) >= base) + goto illegal_char; + wch *= base; + wch += dp - digits; + ++runp; + + if (base != 16) + { + dp = strchr (digits, tolower (*runp)); + if (dp != NULL && (dp - digits < base)) + { + wch *= base; + wch += dp - digits; + ++runp; + } + } + } + else + wch = (wchar_t) *runp++; + + /* Lookup the weight for WCH. */ + if (find_entry (&collate->result, &wch, sizeof (wch), + (void *)&wp) < 0) + wp = NULL; + + while (wp != NULL + && (wp->name[0] != wch || wp->name[1] != L'\0')) + wp = wp->next; + + value = wp == NULL ? 0 : wp->this_weight; + + /* To get the correct name for the error message. */ + putp = runp; + + /**************************************************\ + |* I know here is something wrong. Characters in *| + |* the string which are not in the <...> form *| + |* cannot be declared forward for now!!! *| + \**************************************************/ + } + + /* Store in weight array. */ + if (collate->nweight >= collate->nweight_max) + { + collate->nweight_max *= 2; + collate->weight + = (unsigned int *) xrealloc (collate->weight, + collate->nweight_max); + } + + if (value == 0) + { + patch_t *newp; + + newp = (patch_t *) obstack_alloc (&collate->element_mem, + sizeof (patch_t)); + newp->fname = lr->fname; + newp->lineno = lr->lineno; + newp->token + = (const char *) obstack_copy0 (&collate->element_mem, + startp, putp - startp); + newp->where.idx = collate->nweight++; + newp->next = collate->current_patch; + collate->current_patch = newp; + } + else + collate->weight[collate->nweight++] = value; + ++collate->weight_cnt[collate->weight_idx]; + } + } + return 0; + + default: + assert (! "should not happen"); + } + + + if (collate->nweight >= collate->nweight_max) + { + collate->nweight_max *= 2; + collate->weight = (unsigned int *) xrealloc (collate->weight, + collate->nweight_max); + } + + collate->weight[collate->nweight++] = value; + ++collate->weight_cnt[collate->weight_idx]; + + return 0; +} + + +void +collate_end_weight (struct linereader *lr, struct localedef_t *locale) +{ + struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate; + element_t *pelem = collate->current_element; + + if (collate->kind == symbol) + { + /* We don't have to do anything. */ + collate->was_ellipsis = 0; + return; + } + + if (collate->kind == ellipsis) + { + /* Before the next line is processed the ellipsis is handled. */ + collate->was_ellipsis = 1; + return; + } + + assert (collate->kind == character || collate->kind == element + || collate->kind == undefined); + + /* Fill in the missing weights. */ + while (++collate->weight_idx < collate->nrules) + { + collate->weight[collate->nweight++] = pelem->this_weight; + ++collate->weight_cnt[collate->weight_idx]; + } + + /* Now we know how many ordering weights the current + character/element has. Allocate room in the element structure + and copy information. */ + pelem->ordering_len = collate->nweight; + + /* First we write an array with the number of values for each + weight. */ + obstack_grow (&collate->element_mem, collate->weight_cnt, + collate->nrules * sizeof (unsigned int)); + + /* Now the weights itselves. */ + obstack_grow (&collate->element_mem, collate->weight, + collate->nweight * sizeof (unsigned int)); + + /* Get result. */ + pelem->ordering = obstack_finish (&collate->element_mem); + + /* Now we handle the "patches". */ + while (collate->current_patch != NULL) + { + patch_t *this_patch; + + this_patch = collate->current_patch; + + this_patch->where.pos = &pelem->ordering[collate->nrules + + this_patch->where.idx]; + + collate->current_patch = this_patch->next; + this_patch->next = collate->all_patches; + collate->all_patches = this_patch; + } + + /* Set information for next round. */ + collate->was_ellipsis = 0; + if (collate->kind != undefined) + collate->last_char = pelem->name[0]; +} diff --git a/locale/programs/ld-ctype.c b/locale/programs/ld-ctype.c new file mode 100644 index 0000000000..c1cc8e53b8 --- /dev/null +++ b/locale/programs/ld-ctype.c @@ -0,0 +1,1310 @@ +/* Copyright (C) 1995, 1996 Free Software Foundation, Inc. +This file is part of the GNU C Library. +Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>. + +The GNU C Library is free software; you can redistribute it and/or +modify it under the terms of the GNU Library General Public License as +published by the Free Software Foundation; either version 2 of the +License, or (at your option) any later version. + +The GNU C Library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Library General Public License for more details. + +You should have received a copy of the GNU Library General Public +License along with the GNU C Library; see the file COPYING.LIB. If +not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <endian.h> +#include <limits.h> +#include <string.h> + +#include "locales.h" +#include "localeinfo.h" +#include "langinfo.h" +#include "locfile-token.h" +#include "stringtrans.h" + +/* Uncomment the following line in the production version. */ +/* define NDEBUG 1 */ +#include <assert.h> + + +void *xmalloc (size_t __n); +void *xcalloc (size_t __n, size_t __s); +void *xrealloc (void *__ptr, size_t __n); + + +/* The bit used for representing a special class. */ +#define BITPOS(class) ((class) - tok_upper) +#define BIT(class) (1 << BITPOS (class)) + +#define ELEM(ctype, collection, idx, value) \ + *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx, \ + &ctype->collection##_act idx, value) + +#define SWAPU32(w) \ + (((w) << 24) | (((w) & 0xff00) << 8) | (((w) >> 8) & 0xff00) | ((w) >> 24)) + +#define SWAPU16(w) \ + ((((w) >> 8) & 0xff) | (((w) & 0xff) << 8)) + + +/* To be compatible with former implementations we for now restrict + the number of bits for character classes to 16. When compatibility + is not necessary anymore increase the number to 32. */ +#define char_class_t u16_t +#define CHAR_CLASS_TRANS SWAPU16 +#define char_class32_t u32_t +#define CHAR_CLASS32_TRANS SWAPU32 + + +/* The real definition of the struct for the LC_CTYPE locale. */ +struct locale_ctype_t +{ + unsigned int *charnames; + size_t charnames_max; + size_t charnames_act; + + /* We will allow up to 8 * sizeof(u32_t) - 1 character classes. */ +#define MAX_NR_CHARCLASS (8 * sizeof (u32_t) - 1) + int nr_charclass; + const char *classnames[MAX_NR_CHARCLASS]; + unsigned long int current_class_mask; + unsigned int last_class_char; + u32_t *class_collection; + size_t class_collection_max; + size_t class_collection_act; + unsigned long int class_done; + + /* If the following number ever turns out to be too small simply + increase it. But I doubt it will. --drepper@gnu */ +#define MAX_NR_CHARMAP 16 + const char *mapnames[MAX_NR_CHARMAP]; + u32_t *map_collection[MAX_NR_CHARMAP]; + unsigned int map_collection_max[MAX_NR_CHARMAP]; + unsigned int map_collection_act[MAX_NR_CHARMAP]; + size_t map_collection_nr; + size_t last_map_idx; + unsigned int from_map_char; + int toupper_done; + int tolower_done; + + /* The arrays for the binary representation. */ + u32_t plane_size; + u32_t plane_cnt; + char_class_t *ctype_b; + char_class32_t *ctype32_b; + u32_t *names_el; + u32_t *names_eb; + u32_t **map_eb; + u32_t **map_el; + u32_t *class_name_ptr; + u32_t *map_name_ptr; +}; + + +/* Prototypes for local functions. */ +static void ctype_class_newP (struct linereader *lr, + struct locale_ctype_t *ctype, const char *name); +static void ctype_map_newP (struct linereader *lr, + struct locale_ctype_t *ctype, + const char *name, struct charset_t *charset); +static u32_t *find_idx (struct locale_ctype_t *ctype, u32_t **table, + size_t *max, size_t *act, unsigned int idx); +static void set_class_defaults (struct locale_ctype_t *ctype, + struct charset_t *charset); +static void allocate_arrays (struct locale_ctype_t *ctype); + + +void +ctype_startup (struct linereader *lr, struct localedef_t *locale, + struct charset_t *charset) +{ + unsigned int cnt; + struct locale_ctype_t *ctype; + + /* It is important that we always use UCS1 encoding for strings now. */ + encoding_method = ENC_UCS1; + + /* Allocate the needed room. */ + locale->categories[LC_CTYPE].ctype = ctype = + (struct locale_ctype_t *) xmalloc (sizeof (struct locale_ctype_t)); + + /* We have no names seen yet. */ + ctype->charnames_max = charset->mb_cur_max == 1 ? 256 : 512; + ctype->charnames = + (unsigned int *) xmalloc (ctype->charnames_max * sizeof (unsigned int)); + for (cnt = 0; cnt < 256; ++cnt) + ctype->charnames[cnt] = cnt; + ctype->charnames_act = 256; + + /* Fill character class information. */ + ctype->nr_charclass = 0; + ctype->current_class_mask = 0; + ctype->last_class_char = ILLEGAL_CHAR_VALUE; + /* The order of the following instructions determines the bit + positions! */ + ctype_class_newP (lr, ctype, "upper"); + ctype_class_newP (lr, ctype, "lower"); + ctype_class_newP (lr, ctype, "alpha"); + ctype_class_newP (lr, ctype, "digit"); + ctype_class_newP (lr, ctype, "xdigit"); + ctype_class_newP (lr, ctype, "space"); + ctype_class_newP (lr, ctype, "print"); + ctype_class_newP (lr, ctype, "graph"); + ctype_class_newP (lr, ctype, "blank"); + ctype_class_newP (lr, ctype, "cntrl"); + ctype_class_newP (lr, ctype, "punct"); + ctype_class_newP (lr, ctype, "alnum"); + + ctype->class_collection_max = charset->mb_cur_max == 1 ? 256 : 512; + ctype->class_collection = (u32_t *) xmalloc (sizeof (unsigned long int) + * ctype->class_collection_max); + memset (ctype->class_collection, '\0', + sizeof (unsigned long int) * ctype->class_collection_max); + ctype->class_collection_act = 256; + + /* Fill character map information. */ + ctype->map_collection_nr = 0; + ctype->last_map_idx = MAX_NR_CHARMAP; + ctype->from_map_char = ILLEGAL_CHAR_VALUE; + ctype_map_newP (lr, ctype, "toupper", charset); + ctype_map_newP (lr, ctype, "tolower", charset); + + /* Fill first 256 entries in `toupper' and `tolower' arrays. */ + for (cnt = 0; cnt < 256; ++cnt) + { + ctype->map_collection[0][cnt] = cnt; + ctype->map_collection[1][cnt] = cnt; + } +} + + +void +ctype_finish (struct localedef_t *locale, struct charset_t *charset) +{ + /* See POSIX.2, table 2-6 for the meaning of the following table. */ +#define NCLASS 12 + static const struct + { + const char *name; + const char allow[NCLASS]; + } + valid_table[NCLASS] = + { + /* The order is important. See token.h for more information. + M = Always, D = Default, - = Permitted, X = Mutually exclusive */ + { "upper", "--MX-XDDXXX-" }, + { "lower", "--MX-XDDXXX-" }, + { "alpha", "---X-XDDXXX-" }, + { "digit", "XXX--XDDXXX-" }, + { "xdigit", "-----XDDXXX-" }, + { "space", "XXXXX------X" }, + { "print", "---------X--" }, + { "graph", "---------X--" }, + { "blank", "XXXXXM-----X" }, + { "cntrl", "XXXXX-XX--XX" }, + { "punct", "XXXXX-DD-X-X" }, + { "alnum", "-----XDDXXX-" } + }; + size_t cnt; + int cls1, cls2; + unsigned int space_value; + struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype; + + /* Set default value for classes not specified. */ + set_class_defaults (ctype, charset); + + /* Check according to table. */ + for (cnt = 0; cnt < ctype->class_collection_max; ++cnt) + { + unsigned long int tmp; + + tmp = ctype->class_collection[cnt]; + if (tmp == 0) + continue; + + for (cls1 = 0; cls1 < NCLASS; ++cls1) + if ((tmp & (1 << cls1)) != 0) + for (cls2 = 0; cls2 < NCLASS; ++cls2) + if (valid_table[cls1].allow[cls2] != '-') + { + int eq = (tmp & (1 << cls2)) != 0; + switch (valid_table[cls1].allow[cls2]) + { + case 'M': + if (!eq) + { + char buf[17]; + char *cp = buf; + unsigned int value; + + value = ctype->charnames[cnt]; + + if ((value & 0xff000000) != 0) + cp += sprintf (cp, "\\%o", (value >> 24) & 0xff); + if ((value & 0xffff0000) != 0) + cp += sprintf (cp, "\\%o", (value >> 16) & 0xff); + if ((value & 0xffffff00) != 0) + cp += sprintf (cp, "\\%o", (value >> 8) & 0xff); + sprintf (cp, "\\%o", value & 0xff); + + error (0, 0, _("\ +character %s'%s' in class `%s' must be in class `%s'"), value > 256 ? "L" : "", + cp, valid_table[cls1].name, + valid_table[cls2].name); + } + break; + + case 'X': + if (eq) + { + char buf[17]; + char *cp = buf; + unsigned int value; + + value = ctype->charnames[cnt]; + + if ((value & 0xff000000) != 0) + cp += sprintf (cp, "\\%o", value >> 24); + if ((value & 0xffff0000) != 0) + cp += sprintf (cp, "\\%o", (value >> 16) & 0xff); + if ((value & 0xffffff00) != 0) + cp += sprintf (cp, "\\%o", (value >> 8) & 0xff); + sprintf (cp, "\\%o", value & 0xff); + + error (0, 0, _("\ +character %s'%s' in class `%s' must not be in class `%s'"), + value > 256 ? "L" : "", cp, + valid_table[cls1].name, valid_table[cls2].name); + } + break; + + case 'D': + ctype->class_collection[cnt] |= 1 << cls2; + break; + + default: + error (5, 0, _("internal error in %s, line %u"), + __FUNCTION__, __LINE__); + } + } + } + + /* ... and now test <SP> as a special case. */ + space_value = charset_find_value (charset, "SP", 2); + if (space_value == ILLEGAL_CHAR_VALUE) + error (0, 0, _("character <SP> not defined in character map")); + else if ((cnt = BITPOS (tok_space), + (ELEM (ctype, class_collection, , space_value) + & BIT (tok_space)) == 0) + || (cnt = BITPOS (tok_blank), + (ELEM (ctype, class_collection, , space_value) + & BIT (tok_blank)) == 0)) + error (0, 0, _("<SP> character not in class `%s'"), + valid_table[cnt].name); + else if ((cnt = BITPOS (tok_punct), + (ELEM (ctype, class_collection, , space_value) + & BIT (tok_punct)) != 0) + || (cnt = BITPOS (tok_graph), + (ELEM (ctype, class_collection, , space_value) + & BIT (tok_graph)) + != 0)) + error (0, 0, _("<SP> character must not be in class `%s'"), + valid_table[cnt].name); + else + ELEM (ctype, class_collection, , space_value) |= BIT (tok_print); +} + + +void +ctype_output (struct localedef_t *locale, const char *output_path) +{ + struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype; + const size_t nelems = (_NL_ITEM_INDEX (_NL_NUM_LC_CTYPE) + + 2 * (ctype->map_collection_nr - 2)); + struct iovec iov[2 + nelems + (ctype->nr_charclass + 1) + + (ctype->map_collection_nr + 1)]; + struct locale_file data; + u32_t idx[nelems]; + size_t elem, cnt, offset; + + + if ((locale->binary & (1 << LC_CTYPE)) != 0) + { + iov[0].iov_base = ctype; + iov[0].iov_len = locale->len[LC_CTYPE]; + + write_locale_data (output_path, "LC_CTYPE", 1, iov); + + return; + } + + + /* Now prepare the output: Find the sizes of the table we can use. */ + allocate_arrays (ctype); + + data.magic = LIMAGIC (LC_CTYPE); + data.n = nelems; + iov[0].iov_base = (void *) &data; + iov[0].iov_len = sizeof (data); + + iov[1].iov_base = (void *) idx; + iov[1].iov_len = sizeof (idx); + + idx[0] = iov[0].iov_len + iov[1].iov_len; + offset = 0; + + for (elem = 0; elem < nelems; ++elem) + { + if (elem < _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)) + switch (elem) + { +#define CTYPE_DATA(name, base, len) \ + case _NL_ITEM_INDEX (name): \ + iov[2 + elem].iov_base = base; \ + iov[2 + elem].iov_len = len; \ + break + + CTYPE_DATA (_NL_CTYPE_CLASS, + ctype->ctype_b, + (256 + 128) * sizeof (char_class_t)); + + CTYPE_DATA (_NL_CTYPE_TOUPPER_EB, + ctype->map_eb[0], + (ctype->plane_size * ctype->plane_cnt + 128) + * sizeof (u32_t)); + CTYPE_DATA (_NL_CTYPE_TOLOWER_EB, + ctype->map_eb[1], + (ctype->plane_size * ctype->plane_cnt + 128) + * sizeof (u32_t)); + + CTYPE_DATA (_NL_CTYPE_TOUPPER_EL, + ctype->map_el[0], + (ctype->plane_size * ctype->plane_cnt + 128) + * sizeof (u32_t)); + CTYPE_DATA (_NL_CTYPE_TOLOWER_EL, + ctype->map_el[1], + (ctype->plane_size * ctype->plane_cnt + 128) + * sizeof (u32_t)); + + CTYPE_DATA (_NL_CTYPE_CLASS32, + ctype->ctype32_b, + (ctype->plane_size * ctype->plane_cnt + * sizeof (char_class32_t))); + + CTYPE_DATA (_NL_CTYPE_NAMES_EB, + ctype->names_eb, + ctype->plane_size * ctype->plane_cnt * sizeof (u32_t)); + CTYPE_DATA (_NL_CTYPE_NAMES_EL, + ctype->names_el, + ctype->plane_size * ctype->plane_cnt * sizeof (u32_t)); + + CTYPE_DATA (_NL_CTYPE_HASH_SIZE, + &ctype->plane_size, sizeof (u32_t)); + CTYPE_DATA (_NL_CTYPE_HASH_LAYERS, + &ctype->plane_cnt, sizeof (u32_t)); + + CTYPE_DATA (_NL_CTYPE_CLASS_NAMES, + ctype->class_name_ptr, + ctype->nr_charclass * sizeof (u32_t)); + CTYPE_DATA (_NL_CTYPE_MAP_NAMES, + ctype->map_name_ptr, + ctype->map_collection_nr * sizeof (u32_t)); + + CTYPE_DATA (_NL_CTYPE_WIDTH, + NULL, 0); /* Not yet implemented. */ + + default: + assert (! "unknown CTYPE element"); + } + else + { + /* Handle extra maps. */ + size_t nr = (elem - _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)) >> 1; + + if (((elem - _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)) & 1) == 0) + iov[2 + elem].iov_base = ctype->map_eb[nr]; + else + iov[2 + elem].iov_base = ctype->map_el[nr]; + + iov[2 + elem].iov_len = ((ctype->plane_size * ctype->plane_cnt + 128) + * sizeof (u32_t)); + } + + if (elem + 1 < nelems) + idx[elem + 1] = idx[elem] + iov[2 + elem].iov_len; + } + + offset = idx[elem - 1] + iov[2 + elem - 1].iov_len; + + /* The class name array. */ + for (cnt = 0; cnt < ctype->nr_charclass; ++cnt, ++elem) + { + iov[2 + elem].iov_base = (void *) ctype->classnames[cnt]; + iov[2 + elem].iov_len = strlen (ctype->classnames[cnt]) + 1; + + ctype->class_name_ptr[cnt] = offset; + offset += iov[2 + elem].iov_len; + } + iov[2 + elem].iov_base = (void *) ""; + iov[2 + elem].iov_len = 1; + ++elem; + + /* The map name array. */ + for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt, ++elem) + { + iov[2 + elem].iov_base = (void *) ctype->mapnames[cnt]; + iov[2 + elem].iov_len = strlen (ctype->mapnames[cnt]) + 1; + + ctype->map_name_ptr[cnt] = offset; + offset += iov[2 + elem].iov_len; + } + iov[2 + elem].iov_base = (void *) ""; + iov[2 + elem].iov_len = 1; + ++elem; + + assert (elem == nelems + ctype->nr_charclass + ctype->map_collection_nr + 2); + + write_locale_data (output_path, "LC_CTYPE", 2 + elem, iov); +} + + +/* Character class handling. */ +void +ctype_class_new (struct linereader *lr, struct localedef_t *locale, + enum token_t tok, struct token *code, + struct charset_t *charset) +{ + ctype_class_newP (lr, locale->categories[LC_CTYPE].ctype, + code->val.str.start); +} + + +int +ctype_is_charclass (struct linereader *lr, struct localedef_t *locale, + const char *name) +{ + int cnt; + + for (cnt = 0; cnt < locale->categories[LC_CTYPE].ctype->nr_charclass; ++cnt) + if (strcmp (name, locale->categories[LC_CTYPE].ctype->classnames[cnt]) + == 0) + return 1; + + return 0; +} + + +void +ctype_class_start (struct linereader *lr, struct localedef_t *locale, + enum token_t tok, const char *str, + struct charset_t *charset) +{ + struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype; + int cnt; + + switch (tok) + { + case tok_upper: + str = "upper"; + break; + case tok_lower: + str = "lower"; + break; + case tok_alpha: + str = "alpha"; + break; + case tok_digit: + str = "digit"; + break; + case tok_xdigit: + str = "xdigit"; + break; + case tok_space: + str = "space"; + break; + case tok_print: + str = "print"; + break; + case tok_graph: + str = "graph"; + break; + case tok_blank: + str = "blank"; + break; + case tok_cntrl: + str = "cntrl"; + break; + case tok_punct: + str = "punct"; + break; + case tok_alnum: + str = "alnum"; + break; + case tok_ident: + break; + default: + assert (! "illegal token as class name: should not happen"); + } + + for (cnt = 0; cnt < ctype->nr_charclass; ++cnt) + if (strcmp (str, ctype->classnames[cnt]) == 0) + break; + + if (cnt >= ctype->nr_charclass) + assert (! "unknown class in class definition: should not happen"); + + ctype->class_done |= BIT (tok); + + ctype->current_class_mask = 1 << cnt; + ctype->last_class_char = ILLEGAL_CHAR_VALUE; +} + + +void +ctype_class_from (struct linereader *lr, struct localedef_t *locale, + struct token *code, struct charset_t *charset) +{ + struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype; + unsigned int value; + + value = charset_find_value (charset, code->val.str.start, code->val.str.len); + + ctype->last_class_char = value; + + if (value == ILLEGAL_CHAR_VALUE) + /* In the LC_CTYPE category it is no error when a character is + not found. This has to be ignored silently. */ + return; + + *find_idx (ctype, &ctype->class_collection, &ctype->class_collection_max, + &ctype->class_collection_act, value) + |= ctype->current_class_mask; +} + + +void +ctype_class_to (struct linereader *lr, struct localedef_t *locale, + struct token *code, struct charset_t *charset) +{ + struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype; + unsigned int value, cnt; + + value = charset_find_value (charset, code->val.str.start, code->val.str.len); + + assert (value >= ctype->last_class_char); + + for (cnt = ctype->last_class_char + 1; cnt <= value; ++cnt) + *find_idx (ctype, &ctype->class_collection, &ctype->class_collection_max, + &ctype->class_collection_act, cnt) + |= ctype->current_class_mask; + + ctype->last_class_char = ILLEGAL_CHAR_VALUE; +} + + +void +ctype_class_end (struct linereader *lr, struct localedef_t *locale) +{ + struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype; + + /* We have no special actions to perform here. */ + ctype->current_class_mask = 0; + ctype->last_class_char = ILLEGAL_CHAR_VALUE; +} + + +/* Character map handling. */ +void +ctype_map_new (struct linereader *lr, struct localedef_t *locale, + enum token_t tok, struct token *code, + struct charset_t *charset) +{ + ctype_map_newP (lr, locale->categories[LC_CTYPE].ctype, + code->val.str.start, charset); +} + + +int +ctype_is_charmap (struct linereader *lr, struct localedef_t *locale, + const char *name) +{ + struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype; + size_t cnt; + + for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt) + if (strcmp (name, ctype->mapnames[cnt]) == 0) + return 1; + + return 0; +} + + +void +ctype_map_start (struct linereader *lr, struct localedef_t *locale, + enum token_t tok, const char *name, struct charset_t *charset) +{ + struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype; + size_t cnt; + + switch (tok) + { + case tok_toupper: + ctype->toupper_done = 1; + name = "toupper"; + break; + case tok_tolower: + ctype->tolower_done = 1; + name = "tolower"; + break; + case tok_ident: + break; + default: + assert (! "unknown token in category `LC_CTYPE' should not happen"); + } + + for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt) + if (strcmp (name, ctype->mapnames[cnt]) == 0) + break; + + if (cnt == ctype->map_collection_nr) + assert (! "unknown token in category `LC_CTYPE' should not happen"); + + ctype->last_map_idx = cnt; + ctype->from_map_char = ILLEGAL_CHAR_VALUE; +} + + +void +ctype_map_from (struct linereader *lr, struct localedef_t *locale, + struct token *code, struct charset_t *charset) +{ + struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype; + unsigned int value; + + value = charset_find_value (charset, code->val.str.start, code->val.str.len); + + if (value == ILLEGAL_CHAR_VALUE) + /* In the LC_CTYPE category it is no error when a character is + not found. This has to be ignored silently. */ + return; + + assert (ctype->last_map_idx < ctype->map_collection_nr); + + ctype->from_map_char = value; +} + + +void +ctype_map_to (struct linereader *lr, struct localedef_t *locale, + struct token *code, struct charset_t *charset) +{ + struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype; + unsigned int value; + + value = charset_find_value (charset, code->val.str.start, code->val.str.len); + + if (ctype->from_map_char == ILLEGAL_CHAR_VALUE + || value == ILLEGAL_CHAR_VALUE) + { + /* In the LC_CTYPE category it is no error when a character is + not found. This has to be ignored silently. */ + ctype->from_map_char = ILLEGAL_CHAR_VALUE; + return; + } + + *find_idx (ctype, &ctype->map_collection[ctype->last_map_idx], + &ctype->map_collection_max[ctype->last_map_idx], + &ctype->map_collection_act[ctype->last_map_idx], + ctype->from_map_char) = value; + + ctype->from_map_char = ILLEGAL_CHAR_VALUE; +} + + +void +ctype_map_end (struct linereader *lr, struct localedef_t *locale) +{ + struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype; + + ctype->last_map_idx = MAX_NR_CHARMAP; + ctype->from_map_char = ILLEGAL_CHAR_VALUE; +} + + +/* Local functions. */ +static void +ctype_class_newP (struct linereader *lr, struct locale_ctype_t *ctype, + const char *name) +{ + int cnt; + + for (cnt = 0; cnt < ctype->nr_charclass; ++cnt) + if (strcmp (ctype->classnames[cnt], name) == 0) + break; + + if (cnt < ctype->nr_charclass) + { + lr_error (lr, _("character class `%s' already defined")); + return; + } + + if (ctype->nr_charclass == MAX_NR_CHARCLASS) + /* Exit code 2 is prescribed in P1003.2b. */ + error (2, 0, _("\ +implementation limit: no more than %d character classes allowed"), + MAX_NR_CHARCLASS); + + ctype->classnames[ctype->nr_charclass++] = name; +} + + +static void +ctype_map_newP (struct linereader *lr, struct locale_ctype_t *ctype, + const char *name, struct charset_t *charset) +{ + size_t max_chars = 0; + int cnt; + + for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt) + { + if (strcmp (ctype->mapnames[cnt], name) == 0) + break; + + if (max_chars < ctype->map_collection_max[cnt]) + max_chars = ctype->map_collection_max[cnt]; + } + + if (cnt < ctype->map_collection_nr) + { + lr_error (lr, _("character map `%s' already defined")); + return; + } + + if (ctype->map_collection_nr == MAX_NR_CHARMAP) + /* Exit code 2 is prescribed in P1003.2b. */ + error (2, 0, _("\ +implementation limit: no more than %d character maps allowed"), + MAX_NR_CHARMAP); + + ctype->mapnames[cnt] = name; + + if (max_chars == 0) + ctype->map_collection_max[cnt] = charset->mb_cur_max == 1 ? 256 + : 512; + else + ctype->map_collection_max[cnt] = max_chars; + + ctype->map_collection[cnt] = + (u32_t *) xmalloc (sizeof (u32_t) * ctype->map_collection_max[cnt]); + memset (ctype->map_collection[cnt], '\0', + sizeof (u32_t) * ctype->map_collection_max[cnt]); + ctype->map_collection_act[cnt] = 256; + + ++ctype->map_collection_nr; +} + + +static u32_t * +find_idx (struct locale_ctype_t *ctype, u32_t **table, size_t *max, + size_t *act, unsigned int idx) +{ + size_t cnt; + + if (idx < 256) + return &(*table)[idx]; + + for (cnt = 256; cnt < ctype->charnames_act; ++cnt) + if (ctype->charnames[cnt] == idx) + break; + + /* We have to distinguish two cases: the names is found or not. */ + if (cnt == ctype->charnames_act) + { + /* Extend the name array. */ + if (ctype->charnames_act == ctype->charnames_max) + { + ctype->charnames_max *= 2; + ctype->charnames = (unsigned int *) + xrealloc (ctype->charnames, + sizeof (unsigned int) * ctype->charnames_max); + } + ctype->charnames[ctype->charnames_act++] = idx; + } + + if (cnt >= *act) + { + if (cnt >= *max) + { + size_t old_max = *max; + do + *max *= 2; + while (*max <= cnt); + + *table = + (u32_t *) xrealloc (*table, *max * sizeof (unsigned long int)); + memset (&(*table)[old_max], '\0', (*max - old_max) * sizeof (u32_t)); + } + + (*table)[cnt] = 0; + *act = cnt; + } + + return &(*table)[cnt]; +} + + +static void +set_class_defaults (struct locale_ctype_t *ctype, struct charset_t *charset) +{ + /* These function defines the default values for the classes and conversions + according to POSIX.2 2.5.2.1. + It may seem that the order of these if-blocks is arbitrary but it is NOT. + Don't move them unless you know what you do! */ + + void set_default (int bit, int from, int to) + { + char tmp[2]; + int ch; + /* Define string. */ + strcpy (tmp, "?"); + + for (ch = from; ch <= to; ++ch) + { + unsigned int value; + tmp[0] = ch; + + value = charset_find_value (charset, tmp, 1); + if (value == ILLEGAL_CHAR_VALUE) + { + error (0, 0, _("\ +character `%s' not defined while needed as default value"), + tmp); + continue; + } + else + ELEM (ctype, class_collection, , value) |= bit; + } + } + + /* Set default values if keyword was not present. */ + if ((ctype->class_done & BIT (tok_upper)) == 0) + /* "If this keyword [lower] is not specified, the lowercase letters + `A' through `Z', ..., shall automatically belong to this class, + with implementation defined character values." [P1003.2, 2.5.2.1] */ + set_default (BIT (tok_upper), 'A', 'Z'); + + if ((ctype->class_done & BIT (tok_lower)) == 0) + /* "If this keyword [lower] is not specified, the lowercase letters + `a' through `z', ..., shall automatically belong to this class, + with implementation defined character values." [P1003.2, 2.5.2.1] */ + set_default (BIT (tok_lower), 'a', 'z'); + + if ((ctype->class_done & BIT (tok_alpha)) == 0) + { + /* Table 2-6 in P1003.2 says that characters in class `upper' or + class `lower' *must* be in class `alpha'. */ + unsigned long int mask = BIT (tok_upper) | BIT (tok_lower); + size_t cnt; + + for (cnt = 0; cnt < ctype->class_collection_act; ++cnt) + if ((ctype->class_collection[cnt] & mask) != 0) + ctype->class_collection[cnt] |= BIT (tok_alpha); + } + + if ((ctype->class_done & BIT (tok_digit)) == 0) + /* "If this keyword [digit] is not specified, the digits `0' through + `9', ..., shall automatically belong to this class, with + implementation-defined character values." [P1003.2, 2.5.2.1] */ + set_default (BIT (tok_digit), '0', '9'); + + /* "Only characters specified for the `alpha' and `digit' keyword + shall be specified. Characters specified for the keyword `alpha' + and `digit' are automatically included in this class. */ + { + unsigned long int mask = BIT (tok_alpha) | BIT (tok_digit); + size_t cnt; + + for (cnt = 0; cnt < ctype->class_collection_act; ++cnt) + if ((ctype->class_collection[cnt] & mask) != 0) + ctype->class_collection[cnt] |= BIT (tok_alnum); + } + + if ((ctype->class_done & BIT (tok_space)) == 0) + /* "If this keyword [space] is not specified, the characters <space>, + <form-feed>, <newline>, <carriage-return>, <tab>, and + <vertical-tab>, ..., shall automatically belong to this class, + with implementation-defined character values." [P1003.2, 2.5.2.1] */ + { + unsigned int value; + + value = charset_find_value (charset, "space", 5); + if (value == ILLEGAL_CHAR_VALUE) + error (0, 0, _("\ +character `%s' not defined while needed as default value"), + "<space>"); + else + ELEM (ctype, class_collection, , value) |= BIT (tok_space); + + value = charset_find_value (charset, "form-feed", 9); + if (value == ILLEGAL_CHAR_VALUE) + error (0, 0, _("\ +character `%s' not defined while needed as default value"), + "<form-feed>"); + else + ELEM (ctype, class_collection, , value) |= BIT (tok_space); + + value = charset_find_value (charset, "newline", 7); + if (value == ILLEGAL_CHAR_VALUE) + error (0, 0, _("\ +character `%s' not defined while needed as default value"), + "<newline>"); + else + ELEM (ctype, class_collection, , value) |= BIT (tok_space); + + value = charset_find_value (charset, "carriage-return", 15); + if (value == ILLEGAL_CHAR_VALUE) + error (0, 0, _("\ +character `%s' not defined while needed as default value"), + "<carriage-return>"); + else + ELEM (ctype, class_collection, , value) |= BIT (tok_space); + + value = charset_find_value (charset, "tab", 3); + if (value == ILLEGAL_CHAR_VALUE) + error (0, 0, _("\ +character `%s' not defined while needed as default value"), + "<tab>"); + else + ELEM (ctype, class_collection, , value) |= BIT (tok_space); + + value = charset_find_value (charset, "vertical-tab", 12); + if (value == ILLEGAL_CHAR_VALUE) + error (0, 0, _("\ +character `%s' not defined while needed as default value"), + "<vertical-tab>"); + else + ELEM (ctype, class_collection, , value) |= BIT (tok_space); + } + + if ((ctype->class_done & BIT (tok_xdigit)) == 0) + /* "If this keyword is not specified, the digits `0' to `9', the + uppercase letters `A' through `F', and the lowercase letters `a' + through `f', ..., shell automatically belong to this class, with + implementation defined character values." [P1003.2, 2.5.2.1] */ + { + set_default (BIT (tok_xdigit), '0', '9'); + set_default (BIT (tok_xdigit), 'A', 'F'); + set_default (BIT (tok_xdigit), 'a', 'f'); + } + + if ((ctype->class_done & BIT (tok_blank)) == 0) + /* "If this keyword [blank] is unspecified, the characters <space> and + <tab> shall belong to this character class." [P1003.2, 2.5.2.1] */ + { + unsigned int value; + + value = charset_find_value (charset, "space", 5); + if (value == ILLEGAL_CHAR_VALUE) + error (0, 0, _("\ +character `%s' not defined while needed as default value"), + "<space>"); + else + ELEM (ctype, class_collection, , value) |= BIT (tok_blank); + + value = charset_find_value (charset, "tab", 3); + if (value == ILLEGAL_CHAR_VALUE) + error (0, 0, _("\ +character `%s' not defined while needed as default value"), + "<tab>"); + else + ELEM (ctype, class_collection, , value) |= BIT (tok_blank); + } + + if ((ctype->class_done & BIT (tok_graph)) == 0) + /* "If this keyword [graph] is not specified, characters specified for + the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct', + shall belong to this character class." [P1003.2, 2.5.2.1] */ + { + unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) | + BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct); + size_t cnt; + + for (cnt = 0; cnt < ctype->class_collection_act; ++cnt) + if ((ctype->class_collection[cnt] & mask) != 0) + ctype->class_collection[cnt] |= BIT (tok_graph); + } + + if ((ctype->class_done & BIT (tok_print)) == 0) + /* "If this keyword [print] is not provided, characters specified for + the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct', + and the <space> character shall belong to this character class." + [P1003.2, 2.5.2.1] */ + { + unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) | + BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct); + size_t cnt; + int space; + + for (cnt = 0; cnt < ctype->class_collection_act; ++cnt) + if ((ctype->class_collection[cnt] & mask) != 0) + ctype->class_collection[cnt] |= BIT (tok_print); + + space = charset_find_value (charset, "space", 5); + if (space == ILLEGAL_CHAR_VALUE) + error (0, 0, _("\ +character `%s' not defined while needed as default value"), + "<space>"); + else + ELEM (ctype, class_collection, , space) |= BIT (tok_print); + } + + if (ctype->toupper_done == 0) + /* "If this keyword [toupper] is not spcified, the lowercase letters + `a' through `z', and their corresponding uppercase letters `A' to + `Z', ..., shall automatically be included, with implementation- + defined character values." [P1003.2, 2.5.2.1] */ + { + char tmp[4]; + int ch; + + strcpy (tmp, "<?>"); + + for (ch = 'a'; ch <= 'z'; ++ch) + { + unsigned int value_from, value_to; + + tmp[1] = (char) ch; + + value_from = charset_find_value (charset, &tmp[1], 1); + if (value_from == ILLEGAL_CHAR_VALUE) + { + error (0, 0, _("\ +character `%c' not defined while needed as default value"), + tmp); + continue; + } + + /* This conversion is implementation defined. */ + tmp[1] = (char) (ch + ('A' - 'a')); + value_to = charset_find_value (charset, &tmp[1], 1); + if (value_to == -1) + { + error (0, 0, _("\ +character `%s' not defined while needed as default value"), + tmp); + continue; + } + + /* The index [0] is determined by the order of the + `ctype_map_newP' calls in `ctype_startup'. */ + ELEM (ctype, map_collection, [0], value_from) = value_to; + } + } + + if (ctype->tolower_done == 0) + /* "If this keyword [tolower] is not specified, the mapping shall be + the reverse mapping of the one specified to `toupper'." [P1003.2] */ + { + size_t cnt; + + for (cnt = 0; cnt < ctype->map_collection_act[0]; ++cnt) + if (ctype->map_collection[0][cnt] != 0) + ELEM (ctype, map_collection, [1], + ctype->map_collection[0][cnt]) + = ctype->charnames[cnt]; + } +} + + +static void +allocate_arrays (struct locale_ctype_t *ctype) +{ + size_t idx; + + /* First we have to decide how we organize the arrays. It is easy for + a one-byte character set. But multi-byte character set cannot be + stored flat because they might be sparsly used. So we determine an + optimal hashing function for the used characters. + + We use a very trivial hashing function to store the sparse table. + CH % TABSIZE is used as an index. To solve multiple hits we have + N planes. This gurantees a fixed search time for a character [N + / 2]. In the following code we determine the minmum value for + TABSIZE * N, where TABSIZE >= 256. */ + size_t min_total = UINT_MAX; + size_t act_size = 256; + + fputs (_("\ +Computing table size for character classes might take a while..."), + stderr); + + while (act_size < min_total) + { + size_t cnt[act_size]; + size_t act_planes = 1; + + memset (cnt, '\0', sizeof cnt); + + for (idx = 0; idx < 256; ++idx) + cnt[idx] = 1; + + for (idx = 0; idx < ctype->charnames_act; ++idx) + if (ctype->charnames[idx] >= 256) + { + size_t nr = ctype->charnames[idx] % act_size; + + if (++cnt[nr] > act_planes) + { + act_planes = cnt[nr]; + if (act_size * act_planes >= min_total) + break; + } + } + + if (act_size * act_planes < min_total) + { + min_total = act_size * act_planes; + ctype->plane_size = act_size; + ctype->plane_cnt = act_planes; + } + + ++act_size; + } + + fprintf (stderr, _(" done\n")); + +#if __BYTE_ORDER == __LITTLE_ENDIAN +# define NAMES_B1 ctype->names_el +# define NAMES_B2 ctype->names_eb +#else +# define NAMES_B1 ctype->names_eb +# define NAMES_B2 ctype->names_el +#endif + + ctype->names_eb = (u32_t *) xcalloc (ctype->plane_size * ctype->plane_cnt, + sizeof (u32_t)); + ctype->names_el = (u32_t *) xcalloc (ctype->plane_size * ctype->plane_cnt, + sizeof (u32_t)); + + for (idx = 1; idx < 256; ++idx) + NAMES_B1[idx] = idx; + + /* Trick: change the 0th entry's name to 1 to mark the cell occupied. */ + NAMES_B1[0] = 1; + + for (idx = 256; idx < ctype->charnames_act; ++idx) + { + size_t nr = (ctype->charnames[idx] % ctype->plane_size); + size_t depth = 0; + + while (NAMES_B1[nr + depth * ctype->plane_size]) + ++depth; + assert (depth < ctype->plane_cnt); + + NAMES_B1[nr + depth * ctype->plane_size] = ctype->charnames[idx]; + + /* Now for faster access remember the index in the NAMES_B array. */ + ctype->charnames[idx] = nr + depth * ctype->plane_size; + } + NAMES_B1[0] = 0; + + for (idx = 0; idx < ctype->plane_size * ctype->plane_cnt; ++idx) + NAMES_B2[idx] = SWAPU32 (NAMES_B1[idx]); + + + /* You wonder about this amount of memory? This is only because some + users do not manage to address the array with unsigned values or + data types with range >= 256. '\200' would result in the array + index -128. To help these poor people we duplicate the entries for + 128 up to 255 below the entry for \0. */ + ctype->ctype_b = (char_class_t *) xcalloc (256 + 128, + sizeof (char_class_t)); + ctype->ctype32_b = (char_class32_t *) xcalloc (ctype->plane_size + * ctype->plane_cnt, + sizeof (char_class32_t)); + + /* Fill in the character class information. */ +#if __BYTE_ORDER == __LITTLE_ENDIAN +# define TRANS(w) CHAR_CLASS_TRANS (w) +# define TRANS32(w) CHAR_CLASS32_TRANS (w) +#else +# define TRANS(w) (w) +# define TRANS32(w) (w) +#endif + + for (idx = 0; idx < ctype->class_collection_act; ++idx) + if (ctype->charnames[idx] < 256) + ctype->ctype_b[128 + ctype->charnames[idx]] + = TRANS (ctype->class_collection[idx]); + + /* Mirror first 128 entries. */ + for (idx = 0; idx < 128; ++idx) + ctype->ctype_b[idx] = ctype->ctype_b[256 + idx]; + + /* The 32 bit array contains all characters. */ + for (idx = 0; idx < ctype->class_collection_act; ++idx) + ctype->ctype32_b[ctype->charnames[idx]] + = TRANS32 (ctype->class_collection[idx]); + + /* Room for table of mappings. */ + ctype->map_eb = (u32_t **) xmalloc (ctype->map_collection_nr + * sizeof (u32_t *)); + ctype->map_el = (u32_t **) xmalloc (ctype->map_collection_nr + * sizeof (u32_t *)); + + /* Fill in all mappings. */ + for (idx = 0; idx < ctype->map_collection_nr; ++idx) + { + unsigned int idx2; + + /* Allocate table. */ + ctype->map_eb[idx] = (u32_t *) xmalloc ((ctype->plane_size + * ctype->plane_cnt + 128) + * sizeof (u32_t)); + ctype->map_el[idx] = (u32_t *) xmalloc ((ctype->plane_size + * ctype->plane_cnt + 128) + * sizeof (u32_t)); + +#if __BYTE_ORDER == __LITTLE_ENDIAN +# define MAP_B1 ctype->map_el +# define MAP_B2 ctype->map_eb +#else +# define MAP_B1 ctype->map_eb +# define MAP_B2 ctype->map_el +#endif + + /* Copy default value (identity mapping). */ + memcpy (&MAP_B1[idx][128], NAMES_B1, + ctype->plane_size * ctype->plane_cnt * sizeof (u32_t)); + + /* Copy values from collection. */ + for (idx2 = 0; idx2 < ctype->map_collection_act[idx]; ++idx2) + if (ctype->map_collection[idx][idx2] != 0) + MAP_B1[idx][128 + ctype->charnames[idx2]] = + ctype->map_collection[idx][idx2]; + + /* Mirror first 128 entries. */ + for (idx2 = 0; idx2 < 128; ++idx2) + MAP_B1[idx][idx2] = MAP_B1[idx][256 + idx2]; + + + /* And now the other byte order. */ + for (idx2 = 0; idx2 < ctype->plane_size * ctype->plane_cnt + 128; ++idx2) + MAP_B2[idx][idx2] = SWAPU32 (MAP_B1[idx][idx2]); + } + + /* Extra array for class and map names. */ + ctype->class_name_ptr = (u32_t *) xmalloc (ctype->nr_charclass + * sizeof (u32_t)); + ctype->map_name_ptr = (u32_t *) xmalloc (ctype->map_collection_nr + * sizeof (u32_t)); +} diff --git a/locale/programs/ld-messages.c b/locale/programs/ld-messages.c new file mode 100644 index 0000000000..ebd5054b02 --- /dev/null +++ b/locale/programs/ld-messages.c @@ -0,0 +1,237 @@ +/* Copyright (C) 1995, 1996 Free Software Foundation, Inc. +This file is part of the GNU C Library. +Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>. + +The GNU C Library is free software; you can redistribute it and/or +modify it under the terms of the GNU Library General Public License as +published by the Free Software Foundation; either version 2 of the +License, or (at your option) any later version. + +The GNU C Library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Library General Public License for more details. + +You should have received a copy of the GNU Library General Public +License along with the GNU C Library; see the file COPYING.LIB. If +not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <alloca.h> +#include <langinfo.h> +#include <string.h> +#include <sys/uio.h> + +#ifdef HAVE_REGEX +# include <regex.h> +#else +# include <rx.h> +#endif + +/* Undefine following line in production version. */ +/* #define NDEBUG 1 */ +#include <assert.h> + +#include "locales.h" +#include "stringtrans.h" +#include "localeinfo.h" + + +void *xmalloc (size_t __n); + + +/* The real definition of the struct for the LC_MESSAGES locale. */ +struct locale_messages_t +{ + const char *yesexpr; + const char *noexpr; + const char *yesstr; + const char *nostr; +}; + + +void +messages_startup (struct linereader *lr, struct localedef_t *locale, + struct charset_t *charset) +{ + struct locale_messages_t *messages; + + /* It is important that we always use UCS1 encoding for strings now. */ + encoding_method = ENC_UCS1; + + locale->categories[LC_MESSAGES].messages = messages = + (struct locale_messages_t *) xmalloc (sizeof (struct locale_messages_t)); + + memset (messages, '\0', sizeof (struct locale_messages_t)); +} + + +void +messages_finish (struct localedef_t *locale) +{ + struct locale_messages_t *messages + = locale->categories[LC_MESSAGES].messages; + + /* The fields YESSTR and NOSTR are optional. */ + if (messages->yesexpr == NULL) + error (0, 0, _("field `%s' in category `%s' undefined"), + "yesexpr", "LC_MESSAGES"); + else + { + int result; + regex_t re; + + /* Test whether it are correct regular expressions. */ + result = regcomp (&re, messages->yesexpr, REG_EXTENDED); + if (result != 0) + { + char errbuf[BUFSIZ]; + + (void) regerror (result, &re, errbuf, BUFSIZ); + error (0, 0, _("\ +no correct regular expression for field `%s' in category `%s': %s"), + "yesexpr", "LC_MESSAGES", errbuf); + } + } + + if (messages->noexpr == NULL) + error (0, 0, _("field `%s' in category `%s' undefined"), + "noexpr", "LC_MESSAGES"); + else + { + int result; + regex_t re; + + /* Test whether it are correct regular expressions. */ + result = regcomp (&re, messages->noexpr, REG_EXTENDED); + if (result != 0) + { + char errbuf[BUFSIZ]; + + (void) regerror (result, &re, errbuf, BUFSIZ); + error (0, 0, _("\ +no correct regular expression for field `%s' in category `%s': %s"), + "noexpr", "LC_MESSAGES", errbuf); + } + } +} + + +void +messages_output (struct localedef_t *locale, const char *output_path) +{ + struct locale_messages_t *messages + = locale->categories[LC_MESSAGES].messages; + struct iovec iov[2 + _NL_ITEM_INDEX (_NL_NUM_LC_MESSAGES)]; + struct locale_file data; + u32_t idx[_NL_ITEM_INDEX (_NL_NUM_LC_MESSAGES)]; + size_t cnt = 0; + + if ((locale->binary & (1 << LC_MESSAGES)) != 0) + { + iov[0].iov_base = messages; + iov[0].iov_len = locale->len[LC_MESSAGES]; + + write_locale_data (output_path, "LC_MESSAGES", 1, iov); + + return; + } + + data.magic = LIMAGIC (LC_MESSAGES); + data.n = _NL_ITEM_INDEX (_NL_NUM_LC_MESSAGES); + iov[cnt].iov_base = (void *) &data; + iov[cnt].iov_len = sizeof (data); + ++cnt; + + iov[cnt].iov_base = (void *) idx; + iov[cnt].iov_len = sizeof (idx); + ++cnt; + + idx[cnt - 2] = iov[0].iov_len + iov[1].iov_len; + iov[cnt].iov_base = (void *) (messages->yesexpr ?: ""); + iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) (messages->noexpr ?: ""); + iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) (messages->yesstr ?: ""); + iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) (messages->nostr ?: ""); + iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; + + assert (cnt + 1 == 2 + _NL_ITEM_INDEX (_NL_NUM_LC_MESSAGES)); + + write_locale_data (output_path, "LC_MESSAGES", + 2 + _NL_ITEM_INDEX (_NL_NUM_LC_MESSAGES), iov); +} + + +void +messages_add (struct linereader *lr, struct localedef_t *locale, + enum token_t tok, struct token *code, + struct charset_t *charset) +{ + struct locale_messages_t *messages + = locale->categories[LC_MESSAGES].messages; + + switch (tok) + { + case tok_yesexpr: + if (code->val.str.start == NULL) + { + lr_error (lr, _("unknown character in field `%s' of category `%s'"), + "yesexpr", "LC_MESSAGES"); + messages->yesexpr = ""; + } + else + messages->yesexpr = code->val.str.start; + break; + + case tok_noexpr: + if (code->val.str.start == NULL) + { + lr_error (lr, _("unknown character in field `%s' of category `%s'"), + "noexpr", "LC_MESSAGES"); + messages->noexpr = ""; + } + else + messages->noexpr = code->val.str.start; + break; + + case tok_yesstr: + if (code->val.str.start == NULL) + { + lr_error (lr, _("unknown character in field `%s' of category `%s'"), + "yesstr", "LC_MESSAGES"); + messages->yesstr = ""; + } + else + messages->yesstr = code->val.str.start; + break; + + case tok_nostr: + if (code->val.str.start == NULL) + { + lr_error (lr, _("unknown character in field `%s' of category `%s'"), + "nostr", "LC_MESSAGES"); + messages->nostr = ""; + } + else + messages->nostr = code->val.str.start; + break; + + default: + assert (! "unknown token in category `LC_MESSAGES': should not happen"); + } +} diff --git a/locale/programs/ld-monetary.c b/locale/programs/ld-monetary.c new file mode 100644 index 0000000000..18e27866fb --- /dev/null +++ b/locale/programs/ld-monetary.c @@ -0,0 +1,385 @@ +/* Copyright (C) 1995, 1996 Free Software Foundation, Inc. +This file is part of the GNU C Library. +Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>. + +The GNU C Library is free software; you can redistribute it and/or +modify it under the terms of the GNU Library General Public License as +published by the Free Software Foundation; either version 2 of the +License, or (at your option) any later version. + +The GNU C Library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Library General Public License for more details. + +You should have received a copy of the GNU Library General Public +License along with the GNU C Library; see the file COPYING.LIB. If +not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <langinfo.h> +#include <limits.h> +#include <stdlib.h> +#include <string.h> + + +/* Undefine following line in production version. */ +/* #define NDEBUG 1 */ +#include <assert.h> + +#include "locales.h" +#include "localeinfo.h" +#include "stringtrans.h" + +void *xmalloc (size_t __n); +void *xrealloc (void *__ptr, size_t __n); + + +/* The real definition of the struct for the LC_NUMERIC locale. */ +struct locale_monetary_t +{ + const char *int_curr_symbol; + const char *currency_symbol; + const char *mon_decimal_point; + const char *mon_thousands_sep; + char *mon_grouping; + size_t mon_grouping_max; + size_t mon_grouping_act; + const char *positive_sign; + const char *negative_sign; + signed char int_frac_digits; + signed char frac_digits; + signed char p_cs_precedes; + signed char p_sep_by_space; + signed char n_cs_precedes; + signed char n_sep_by_space; + signed char p_sign_posn; + signed char n_sign_posn; +}; + + +/* The content iof the field int_curr_symbol has to be taken from + ISO-4217. We test for correct values. */ +#define DEFINE_INT_CURR(str) str, +static const char *const valid_int_curr[] = + { +# include "../iso-4217.def" + }; +#define NR_VALID_INT_CURR ((sizeof (valid_int_curr) \ + / sizeof (valid_int_curr[0]))) +#undef DEFINE_INT_CURR + + +/* Prototypes for local functions. */ +static int curr_strcmp(const char *s1, const char **s2); + + +void +monetary_startup (struct linereader *lr, struct localedef_t *locale, + struct charset_t *charset) +{ + struct locale_monetary_t *monetary; + + /* It is important that we always use UCS1 encoding for strings now. */ + encoding_method = ENC_UCS1; + + locale->categories[LC_MONETARY].monetary = monetary = + (struct locale_monetary_t *) xmalloc (sizeof (struct locale_monetary_t)); + + memset (monetary, '\0', sizeof (struct locale_monetary_t)); + + monetary->mon_grouping_max = 80; + monetary->mon_grouping = + (char *) xmalloc (monetary->mon_grouping_max); + monetary->mon_grouping_act = 0; + + monetary->int_frac_digits = -2; + monetary->frac_digits = -2; + monetary->p_cs_precedes = -2; + monetary->p_sep_by_space = -2; + monetary->n_cs_precedes = -2; + monetary->n_sep_by_space = -2; + monetary->p_sign_posn = -2; + monetary->n_sign_posn = -2; +} + + +void +monetary_finish (struct localedef_t *locale) +{ + struct locale_monetary_t *monetary + = locale->categories[LC_MONETARY].monetary; + +#define TEST_ELEM(cat) \ + if (monetary->cat == NULL) \ + error (0, 0, _("field `%s' in category `%s' not defined"), \ + #cat, "LC_MONETARY") + + TEST_ELEM (int_curr_symbol); + TEST_ELEM (currency_symbol); + TEST_ELEM (mon_decimal_point); + TEST_ELEM (mon_thousands_sep); + TEST_ELEM (positive_sign); + TEST_ELEM (negative_sign); + + /* The international currency symbol must come from ISO 4217. */ + if (monetary->int_curr_symbol != NULL) + { + if (strlen (monetary->int_curr_symbol) != 4) + error (0, 0, _("\ +value of field `int_curr_symbol' in category `LC_MONETARY' has wrong length")); + else if (bsearch (monetary->int_curr_symbol, valid_int_curr, + NR_VALID_INT_CURR, sizeof (const char *), + (comparison_fn_t) curr_strcmp) == NULL) + error (0, 0, _("\ +value of field `int_curr_symbol' in category `LC_MONETARY' does \ +not correspond to a valid name in ISO 4217")); + } + + /* The decimal point must not be empty. This is not said explicitly + in POSIX but ANSI C (ISO/IEC 9899) says in 4.4.2.1 it has to be + != "". */ + if (monetary->mon_decimal_point[0] == '\0') + { + error (0, 0, _("\ +value for field `%s' in category `%s' must not be the empty string"), + "mon_decimal_point", "LC_MONETARY"); + } + + if (monetary->mon_grouping_act == 0) + error (0, 0, _("field `%s' in category `%s' not defined"), + "mon_grouping", "LC_MONETARY"); + +#undef TEST_ELEM +#define TEST_ELEM(cat, min, max) \ + if (monetary->cat == -2) \ + error (0, 0, _("field `%s' in category `%s' not defined"), \ + #cat, "LC_MONETARY"); \ + else if (monetary->cat < min || monetary->cat > max) \ + error (0, 0, _("\ +value for field `%s' in category `%s' must be in range %d...%d"), \ + #cat, "LC_MONETARY", min, max) + + TEST_ELEM (int_frac_digits, -128, 127); /* No range check. */ + TEST_ELEM (frac_digits, -128, 127); /* No range check. */ + TEST_ELEM (p_cs_precedes, -1, 1); + TEST_ELEM (p_sep_by_space, -1, 2); + TEST_ELEM (n_cs_precedes, -1, 1); + TEST_ELEM (n_sep_by_space, -1, 2); + TEST_ELEM (p_sign_posn, -1, 4); + TEST_ELEM (n_sign_posn, -1, 4); +} + + +void +monetary_output (struct localedef_t *locale, const char *output_path) +{ + struct locale_monetary_t *monetary + = locale->categories[LC_MONETARY].monetary; + struct iovec iov[2 + _NL_ITEM_INDEX (_NL_NUM_LC_MONETARY)]; + struct locale_file data; + u32_t idx[_NL_ITEM_INDEX (_NL_NUM_LC_MONETARY)]; + size_t cnt = 0; + + if ((locale->binary & (1 << LC_MONETARY)) != 0) + { + iov[0].iov_base = monetary; + iov[0].iov_len = locale->len[LC_MONETARY]; + + write_locale_data (output_path, "LC_MONETARY", 1, iov); + + return; + } + + data.magic = LIMAGIC (LC_MONETARY); + data.n = _NL_ITEM_INDEX (_NL_NUM_LC_MONETARY); + iov[cnt].iov_base = (void *) &data; + iov[cnt].iov_len = sizeof (data); + ++cnt; + + iov[cnt].iov_base = (void *) idx; + iov[cnt].iov_len = sizeof (idx); + ++cnt; + + idx[cnt - 2] = iov[0].iov_len + iov[1].iov_len; + iov[cnt].iov_base = (void *) (monetary->int_curr_symbol ?: ""); + iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) (monetary->currency_symbol ?: ""); + iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) (monetary->mon_decimal_point ?: ""); + iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) (monetary->mon_thousands_sep ?: ""); + iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = alloca (monetary->mon_grouping_act + 1); + iov[cnt].iov_len = monetary->mon_grouping_act + 1; + memcpy (iov[cnt].iov_base, monetary->mon_grouping, + monetary->mon_grouping_act); + ((char *) iov[cnt].iov_base)[monetary->mon_grouping_act] = '\0'; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) (monetary->positive_sign ?: ""); + iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) (monetary->negative_sign ?: ""); + iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) &monetary->int_frac_digits; + iov[cnt].iov_len = 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) &monetary->frac_digits; + iov[cnt].iov_len = 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) &monetary->p_cs_precedes; + iov[cnt].iov_len = 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) &monetary->p_sep_by_space; + iov[cnt].iov_len = 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) &monetary->n_cs_precedes; + iov[cnt].iov_len = 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) &monetary->n_sep_by_space; + iov[cnt].iov_len = 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) &monetary->p_sign_posn; + iov[cnt].iov_len = 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) &monetary->n_sign_posn; + iov[cnt].iov_len = 1; + + assert (cnt + 1 == 2 + _NL_ITEM_INDEX (_NL_NUM_LC_MONETARY)); + + write_locale_data (output_path, "LC_MONETARY", + 2 + _NL_ITEM_INDEX (_NL_NUM_LC_MONETARY), iov); +} + + +void +monetary_add (struct linereader *lr, struct localedef_t *locale, + enum token_t tok, struct token *code, + struct charset_t *charset) +{ + struct locale_monetary_t *monetary + = locale->categories[LC_MONETARY].monetary; + + switch (tok) + { +#define STR_ELEM(cat) \ + case tok_##cat: \ + if (monetary->cat != NULL) \ + lr_error (lr, _("\ +field `%s' in category `%s' declared more than once"), \ + #cat, "LC_MONETARY"); \ + else if (code->val.str.start == NULL) \ + { \ + lr_error (lr, _("unknown character in field `%s' of category `%s'"),\ + #cat, "LC_MONETARY"); \ + monetary->cat = ""; \ + } \ + else \ + monetary->cat = code->val.str.start; \ + break + + STR_ELEM (int_curr_symbol); + STR_ELEM (currency_symbol); + STR_ELEM (mon_decimal_point); + STR_ELEM (mon_thousands_sep); + STR_ELEM (positive_sign); + STR_ELEM (negative_sign); + +#define INT_ELEM(cat) \ + case tok_##cat: \ + if (monetary->cat != -2) \ + lr_error (lr, _("\ +field `%s' in category `%s' declared more than once"), \ + #cat, "LC_MONETARY"); \ + else \ + monetary->cat = code->val.num; \ + break + + INT_ELEM (int_frac_digits); + INT_ELEM (frac_digits); + INT_ELEM (p_cs_precedes); + INT_ELEM (p_sep_by_space); + INT_ELEM (n_cs_precedes); + INT_ELEM (n_sep_by_space); + INT_ELEM (p_sign_posn); + INT_ELEM (n_sign_posn); + + case tok_mon_grouping: + if (monetary->mon_grouping_act == monetary->mon_grouping_max) + { + monetary->mon_grouping_max *= 2; + monetary->mon_grouping = + (char *) xrealloc (monetary->mon_grouping, + monetary->mon_grouping_max); + } + if (monetary->mon_grouping[monetary->mon_grouping_act - 1] + == '\177') + lr_error (lr, _("\ +`-1' must be last entry in `%s' field in `%s' category"), + "mon_grouping", "LC_MONETARY"); + else + { + if (code->tok == tok_minus1) + monetary->mon_grouping[monetary->mon_grouping_act++] = '\177'; + else if (code->val.num == 0) + lr_error (lr, _("\ +values for field `%s' in category `%s' must not be zero"), + "mon_grouping", "LC_MONETARY"); + else if (code->val.num > 126) + lr_error (lr, _("\ +values for field `%s' in category `%s' must be smaller than 127"), + "mon_grouping", "LC_MONETARY"); + else + monetary->mon_grouping[monetary->mon_grouping_act++] + = code->val.num; + } + break; + + default: + assert (! "unknown token in category `LC_MONETARY': should not happen"); + } +} + + +static int +curr_strcmp(const char *s1, const char **s2) +{ + return strcmp (s1, *s2); +} diff --git a/locale/programs/ld-numeric.c b/locale/programs/ld-numeric.c new file mode 100644 index 0000000000..0b5fe2afe5 --- /dev/null +++ b/locale/programs/ld-numeric.c @@ -0,0 +1,208 @@ +/* Copyright (C) 1995, 1996 Free Software Foundation, Inc. +This file is part of the GNU C Library. +Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>. + +The GNU C Library is free software; you can redistribute it and/or +modify it under the terms of the GNU Library General Public License as +published by the Free Software Foundation; either version 2 of the +License, or (at your option) any later version. + +The GNU C Library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Library General Public License for more details. + +You should have received a copy of the GNU Library General Public +License along with the GNU C Library; see the file COPYING.LIB. If +not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <langinfo.h> +#include <string.h> + +/* Undefine following line in production version. */ +/* #define NDEBUG 1 */ +#include <assert.h> + +#include "locales.h" +#include "localeinfo.h" +#include "stringtrans.h" + +void *xmalloc (size_t __n); +void *xrealloc (void *__ptr, size_t __n); + + +/* The real definition of the struct for the LC_NUMERIC locale. */ +struct locale_numeric_t +{ + const char *decimal_point; + const char *thousands_sep; + char *grouping; + size_t grouping_max; + size_t grouping_act; +}; + + +void +numeric_startup (struct linereader *lr, struct localedef_t *locale, + struct charset_t *charset) +{ + struct locale_numeric_t *numeric; + + /* It is important that we always use UCS1 encoding for strings now. */ + encoding_method = ENC_UCS1; + + locale->categories[LC_NUMERIC].numeric = numeric = + (struct locale_numeric_t *) xmalloc (sizeof (struct locale_numeric_t)); + + memset (numeric, '\0', sizeof (struct locale_numeric_t)); + + numeric->grouping_max = 80; + numeric->grouping = (char *) xmalloc (numeric->grouping_max); + numeric->grouping_act = 0; +} + + +void +numeric_finish (struct localedef_t *locale) +{ + struct locale_numeric_t *numeric = locale->categories[LC_NUMERIC].numeric; + +#define TEST_ELEM(cat) \ + if (numeric->cat == NULL) \ + error (0, 0, _("field `%s' in category `%s' not defined"), \ + #cat, "LC_NUMERIC") + + TEST_ELEM (decimal_point); + TEST_ELEM (thousands_sep); + + /* The decimal point must not be empty. This is not said explicitly + in POSIX but ANSI C (ISO/IEC 9899) says in 4.4.2.1 it has to be + != "". */ + if (numeric->decimal_point[0] == '\0') + { + error (0, 0, _("\ +value for field `%s' in category `%s' must not be the empty string"), + "decimal_point", "LC_NUMERIC"); + } + + if (numeric->grouping_act == 0) + error (0, 0, _("field `%s' in category `%s' not defined"), + "grouping", "LC_NUMERIC"); +} + + +void +numeric_output (struct localedef_t *locale, const char *output_path) +{ + struct locale_numeric_t *numeric = locale->categories[LC_NUMERIC].numeric; + struct iovec iov[2 + _NL_ITEM_INDEX (_NL_NUM_LC_NUMERIC)]; + struct locale_file data; + u32_t idx[_NL_ITEM_INDEX (_NL_NUM_LC_NUMERIC)]; + size_t cnt = 0; + + if ((locale->binary & (1 << LC_NUMERIC)) != 0) + { + iov[0].iov_base = numeric; + iov[0].iov_len = locale->len[LC_NUMERIC]; + + write_locale_data (output_path, "LC_NUMERIC", 1, iov); + + return; + } + + data.magic = LIMAGIC (LC_NUMERIC); + data.n = _NL_ITEM_INDEX (_NL_NUM_LC_NUMERIC); + iov[cnt].iov_base = (void *) &data; + iov[cnt].iov_len = sizeof (data); + ++cnt; + + iov[cnt].iov_base = (void *) idx; + iov[cnt].iov_len = sizeof (idx); + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) (numeric->decimal_point ?: ""); + iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) (numeric->thousands_sep ?: ""); + iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = alloca (numeric->grouping_act + 1); + iov[cnt].iov_len = numeric->grouping_act + 1; + memcpy (iov[cnt].iov_base, numeric->grouping, numeric->grouping_act); + ((char *) iov[cnt].iov_base)[numeric->grouping_act] = '\0'; + + assert (cnt + 1 == 2 + _NL_ITEM_INDEX (_NL_NUM_LC_NUMERIC)); + + write_locale_data (output_path, "LC_NUMERIC", + 2 + _NL_ITEM_INDEX (_NL_NUM_LC_NUMERIC), iov); +} + + +void +numeric_add (struct linereader *lr, struct localedef_t *locale, + enum token_t tok, struct token *code, + struct charset_t *charset) +{ + struct locale_numeric_t *numeric = locale->categories[LC_NUMERIC].numeric; + + switch (tok) + { +#define STR_ELEM(cat) \ + case tok_##cat: \ + if (numeric->cat != NULL) \ + lr_error (lr, _("\ +field `%s' in category `%s' declared more than once"), \ + #cat, "LC_NUMERIC"); \ + else if (code->val.str.start == NULL) \ + { \ + lr_error (lr, _("unknown character in field `%s' of category `%s'"),\ + #cat, "LC_NUMERIC"); \ + numeric->cat = ""; \ + } \ + else \ + numeric->cat = code->val.str.start; \ + break + + STR_ELEM (decimal_point); + STR_ELEM (thousands_sep); + + case tok_grouping: + if (numeric->grouping_act == numeric->grouping_max) + { + numeric->grouping_max *= 2; + numeric->grouping = (char *) xrealloc (numeric->grouping, + numeric->grouping_max); + } + if (numeric->grouping_act > 0 + && (numeric->grouping[numeric->grouping_act - 1] == '\177')) + { + lr_error (lr, _("\ +`-1' must be last entry in `%s' field in `%s' category"), + "grouping", "LC_NUMERIC"); + --numeric->grouping_act; + } + + if (code->tok == tok_minus1) + numeric->grouping[numeric->grouping_act++] = '\177'; + else if (code->val.num > 126) + lr_error (lr, _("\ +values for field `%s' in category `%s' must be smaller than 127"), + "grouping", "LC_NUMERIC"); + else + numeric->grouping[numeric->grouping_act++] = code->val.num; + break; + + default: + assert (! "unknown token in category `LC_NUMERIC': should not happen"); + } +} diff --git a/locale/programs/ld-time.c b/locale/programs/ld-time.c new file mode 100644 index 0000000000..2587faccdc --- /dev/null +++ b/locale/programs/ld-time.c @@ -0,0 +1,310 @@ +/* Copyright (C) 1995, 1996 Free Software Foundation, Inc. +This file is part of the GNU C Library. +Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>. + +The GNU C Library is free software; you can redistribute it and/or +modify it under the terms of the GNU Library General Public License as +published by the Free Software Foundation; either version 2 of the +License, or (at your option) any later version. + +The GNU C Library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Library General Public License for more details. + +You should have received a copy of the GNU Library General Public +License along with the GNU C Library; see the file COPYING.LIB. If +not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <langinfo.h> +#include <string.h> + +/* Undefine following line in production version. */ +/* #define NDEBUG 1 */ +#include <assert.h> + +#include "locales.h" +#include "localeinfo.h" +#include "stringtrans.h" + + +void *xmalloc (size_t __n); + + +/* The real definition of the struct for the LC_TIME locale. */ +struct locale_time_t +{ + const char *abday[7]; + size_t cur_num_abday; + const char *day[7]; + size_t cur_num_day; + const char *abmon[12]; + size_t cur_num_abmon; + const char *mon[12]; + size_t cur_num_mon; + const char *am_pm[2]; + size_t cur_num_am_pm; + const char *d_t_fmt; + const char *d_fmt; + const char *t_fmt; + const char *t_fmt_ampm; + const char *era; + const char *era_year; + const char *era_d_t_fmt; + const char *era_t_fmt; + const char *era_d_fmt; + const char *alt_digits[100]; + size_t cur_num_alt_digits; +}; + + +void +time_startup (struct linereader *lr, struct localedef_t *locale, + struct charset_t *charset) +{ + struct locale_time_t *time; + + /* It is important that we always use UCS1 encoding for strings now. */ + encoding_method = ENC_UCS1; + + locale->categories[LC_TIME].time = time = + (struct locale_time_t *) xmalloc (sizeof (struct locale_time_t)); + + memset (time, '\0', sizeof (struct locale_time_t)); +} + + +void +time_finish (struct localedef_t *locale) +{ + struct locale_time_t *time = locale->categories[LC_TIME].time; + +#define TESTARR_ELEM(cat, max) \ + if (time->cur_num_##cat == 0) \ + error (0, 0, _("field `%s' in category `%s' not defined"), \ + #cat, "LC_TIME"); \ + else if (time->cur_num_##cat != max) \ + error (0, 0, _("field `%s' in category `%s' has not enough values"), \ + #cat, "LC_TIME") + + TESTARR_ELEM (abday, 7); + TESTARR_ELEM (day, 7); + TESTARR_ELEM (abmon, 12); + TESTARR_ELEM (mon, 12); + TESTARR_ELEM (am_pm, 2); + +#define TEST_ELEM(cat) \ + if (time->cat == NULL) \ + error (0, 0, _("field `%s' in category `%s' not defined"), \ + #cat, "LC_TIME") + + TEST_ELEM (d_t_fmt); + TEST_ELEM (d_fmt); + TEST_ELEM (t_fmt); + TEST_ELEM (t_fmt_ampm); +} + + +void +time_output (struct localedef_t *locale, const char *output_path) +{ + struct locale_time_t *time = locale->categories[LC_TIME].time; + struct iovec iov[2 + _NL_ITEM_INDEX (_NL_NUM_LC_TIME) + + time->cur_num_alt_digits]; + struct locale_file data; + u32_t idx[_NL_ITEM_INDEX (_NL_NUM_LC_TIME)]; + size_t cnt, last_idx, num; + + if ((locale->binary & (1 << LC_TIME)) != 0) + { + iov[0].iov_base = time; + iov[0].iov_len = locale->len[LC_TIME]; + + write_locale_data (output_path, "LC_TIME", 1, iov); + + return; + } + + data.magic = LIMAGIC (LC_TIME); + data.n = _NL_ITEM_INDEX (_NL_NUM_LC_TIME); + iov[0].iov_base = (void *) &data; + iov[0].iov_len = sizeof (data); + + iov[1].iov_base = (void *) idx; + iov[1].iov_len = sizeof (idx); + + idx[0] = iov[0].iov_len + iov[1].iov_len; + + /* The ab'days. */ + for (cnt = 0; cnt <= _NL_ITEM_INDEX (ABDAY_7); ++cnt) + { + iov[2 + cnt].iov_base = + (void *) (time->abday[cnt - _NL_ITEM_INDEX (ABDAY_1)] ?: ""); + iov[2 + cnt].iov_len = strlen (iov[2 + cnt].iov_base) + 1; + idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len; + } + + /* The days. */ + for (; cnt <= _NL_ITEM_INDEX (DAY_7); ++cnt) + { + iov[2 + cnt].iov_base = + (void *) (time->day[cnt - _NL_ITEM_INDEX (DAY_1)] ?: ""); + iov[2 + cnt].iov_len = strlen (iov[2 + cnt].iov_base) + 1; + idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len; + } + + /* The ab'mons. */ + for (; cnt <= _NL_ITEM_INDEX (ABMON_12); ++cnt) + { + iov[2 + cnt].iov_base = + (void *) (time->abmon[cnt - _NL_ITEM_INDEX (ABMON_1)] ?: ""); + iov[2 + cnt].iov_len = strlen (iov[2 + cnt].iov_base) + 1; + idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len; + } + + /* The mons. */ + for (; cnt <= _NL_ITEM_INDEX (MON_12); ++cnt) + { + iov[2 + cnt].iov_base = + (void *) (time->mon[cnt - _NL_ITEM_INDEX (MON_1)] ?: ""); + iov[2 + cnt].iov_len = strlen (iov[2 + cnt].iov_base) + 1; + idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len; + } + + /* AM/PM. */ + for (; cnt <= _NL_ITEM_INDEX (PM_STR); ++cnt) + { + iov[2 + cnt].iov_base = + (void *) (time->am_pm[cnt - _NL_ITEM_INDEX (AM_STR)] ?: ""); + iov[2 + cnt].iov_len = strlen (iov[2 + cnt].iov_base) + 1; + idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len; + } + + iov[2 + cnt].iov_base = (void *) (time->d_t_fmt ?: ""); + iov[2 + cnt].iov_len = strlen (iov[2 + cnt].iov_base) + 1; + idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len; + ++cnt; + + iov[2 + cnt].iov_base = (void *) (time->d_fmt ?: ""); + iov[2 + cnt].iov_len = strlen (iov[2 + cnt].iov_base) + 1; + idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len; + ++cnt; + + iov[2 + cnt].iov_base = (void *) (time->t_fmt ?: ""); + iov[2 + cnt].iov_len = strlen (iov[2 + cnt].iov_base) + 1; + idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len; + ++cnt; + + iov[2 + cnt].iov_base = (void *) (time->t_fmt_ampm ?: ""); + iov[2 + cnt].iov_len = strlen (iov[2 + cnt].iov_base) + 1; + idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len; + ++cnt; + + iov[2 + cnt].iov_base = (void *) (time->era ?: ""); + iov[2 + cnt].iov_len = strlen (iov[2 + cnt].iov_base) + 1; + idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len; + ++cnt; + + iov[2 + cnt].iov_base = (void *) (time->era_year ?: ""); + iov[2 + cnt].iov_len = strlen (iov[2 + cnt].iov_base) + 1; + idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len; + ++cnt; + + iov[2 + cnt].iov_base = (void *) (time->era_d_fmt ?: ""); + iov[2 + cnt].iov_len = strlen (iov[2 + cnt].iov_base) + 1; + idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len; + last_idx = ++cnt; + + idx[1 + last_idx] = idx[last_idx]; + for (num = 0; num < time->cur_num_alt_digits; ++num, ++cnt) + { + iov[2 + cnt].iov_base = (void *) (time->alt_digits[num] ?: ""); + iov[2 + cnt].iov_len = strlen (iov[2 + cnt].iov_base) + 1; + idx[1 + last_idx] += iov[2 + cnt].iov_len; + } + ++last_idx; + + iov[2 + cnt].iov_base = (void *) (time->era_d_t_fmt ?: ""); + iov[2 + cnt].iov_len = strlen (iov[2 + cnt].iov_base) + 1; + idx[1 + last_idx] = idx[last_idx] + iov[2 + cnt].iov_len; + ++cnt; + + iov[2 + cnt].iov_base = (void *) (time->era_d_fmt ?: ""); + iov[2 + cnt].iov_len = strlen (iov[2 + cnt].iov_base) + 1; + ++cnt; + + assert (cnt == (_NL_ITEM_INDEX (_NL_NUM_LC_TIME) - 1 + + time->cur_num_alt_digits)); + + write_locale_data (output_path, "LC_TIME", 2 + cnt, iov); +} + + +void +time_add (struct linereader *lr, struct localedef_t *locale, + enum token_t tok, struct token *code, + struct charset_t *charset) +{ + struct locale_time_t *time = locale->categories[LC_TIME].time; + + switch (tok) + { +#define STRARR_ELEM(cat, max) \ + case tok_##cat: \ + if (time->cur_num_##cat >= max) \ + lr_error (lr, _(" \ +too many values for field `%s' in category `LC_TIME'"), \ + #cat, "LC_TIME"); \ + else if (code->val.str.start == NULL) \ + { \ + lr_error (lr, _("unknown character in field `%s' of category `%s'"),\ + #cat, "LC_TIME"); \ + time->cat[time->cur_num_##cat++] = ""; \ + } \ + else \ + time->cat[time->cur_num_##cat++] \ + = code->val.str.start; \ + break + + STRARR_ELEM (abday, 7); + STRARR_ELEM (day, 7); + STRARR_ELEM (abmon, 12); + STRARR_ELEM (mon, 12); + STRARR_ELEM (am_pm, 2); + STRARR_ELEM (alt_digits, 100); + +#define STR_ELEM(cat) \ + case tok_##cat: \ + if (time->cat != NULL) \ + lr_error (lr, _("\ +field `%s' in category `%s' declared more than once"), \ + #cat, "LC_TIME"); \ + else if (code->val.str.start == NULL) \ + { \ + lr_error (lr, _("unknown character in field `%s' of category `%s'"),\ + #cat, "LC_TIME"); \ + time->cat = ""; \ + } \ + else \ + time->cat = code->val.str.start; \ + break + + STR_ELEM (d_t_fmt); + STR_ELEM (d_fmt); + STR_ELEM (t_fmt); + STR_ELEM (t_fmt_ampm); + STR_ELEM (era); + STR_ELEM (era_year); + STR_ELEM (era_d_t_fmt); + STR_ELEM (era_d_fmt); + STR_ELEM (era_t_fmt); + + default: + assert (! "unknown token in category `LC_TIME': should not happen"); + } +} diff --git a/locale/programs/linereader.c b/locale/programs/linereader.c new file mode 100644 index 0000000000..e4a1305712 --- /dev/null +++ b/locale/programs/linereader.c @@ -0,0 +1,579 @@ +/* Copyright (C) 1996 Free Software Foundation, Inc. +This file is part of the GNU C Library. +Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>. + +The GNU C Library is free software; you can redistribute it and/or +modify it under the terms of the GNU Library General Public License as +published by the Free Software Foundation; either version 2 of the +License, or (at your option) any later version. + +The GNU C Library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Library General Public License for more details. + +You should have received a copy of the GNU Library General Public +License along with the GNU C Library; see the file COPYING.LIB. If +not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <ctype.h> +#include <errno.h> +#include <libintl.h> +#include <stdarg.h> +#include <stdlib.h> +#include <string.h> + +#include "error.h" +#include "linereader.h" +#include "charset.h" +#include "stringtrans.h" + + +void *xmalloc (size_t __n); +void *xrealloc (void *__p, size_t __n); +char *xstrdup (const char *__str); + + +static struct token *get_toplvl_escape (struct linereader *lr); +static struct token *get_symname (struct linereader *lr); +static struct token *get_ident (struct linereader *lr); +static struct token *get_string (struct linereader *lr, + const struct charset_t *charset); + + +struct linereader * +lr_open (const char *fname, kw_hash_fct_t hf) +{ + FILE *fp; + struct linereader *result; + int n; + + if (fname == NULL || strcmp (fname, "-") == 0 + || strcmp (fname, "/dev/stdin") == 0) + fp = stdin; + else + { + fp = fopen (fname, "r"); + if (fp == NULL) + return NULL; + } + + result = (struct linereader *) xmalloc (sizeof (*result)); + + result->fp = fp; + result->fname = xstrdup (fname); + result->buf = NULL; + result->bufsize = 0; + result->lineno = 1; + result->idx = 0; + result->comment_char = '#'; + result->escape_char = '\\'; + result->translate_strings = 1; + + n = getdelim (&result->buf, &result->bufsize, '\n', result->fp); + if (n < 0) + { + int save = errno; + fclose (result->fp); + free (result); + errno = save; + return NULL; + } + + if (n > 1 && result->buf[n - 2] == '\\' && result->buf[n - 1] == '\n') + n -= 2; + + result->buf[n] = '\0'; + result->bufact = n; + result->hash_fct = hf; + + return result; +} + + +int +lr_eof (struct linereader *lr) +{ + return lr->bufact = 0; +} + + +void +lr_close (struct linereader *lr) +{ + fclose (lr->fp); + free (lr->buf); + free (lr); +} + + +int +lr_next (struct linereader *lr) +{ + int n; + + n = getdelim (&lr->buf, &lr->bufsize, '\n', lr->fp); + if (n < 0) + return -1; + + ++lr->lineno; + + if (n > 1 && lr->buf[n - 2] == lr->escape_char && lr->buf[n - 1] == '\n') + { + /* An escaped newline character is substituted with a single <SP>. */ + --n; + lr->buf[n - 1] = ' '; + } + + lr->buf[n] = '\0'; + lr->bufact = n; + lr->idx = 0; + + return 0; +} + + +/* Defined in error.c. */ +/* This variable is incremented each time `error' is called. */ +extern unsigned int error_message_count; + +/* The calling program should define program_name and set it to the + name of the executing program. */ +extern char *program_name; + + +struct token * +lr_token (struct linereader *lr, const struct charset_t *charset) +{ + int ch; + + while (1) + { + do + { + ch = lr_getc (lr); + + if (ch == '\n') + { + lr->token.tok = tok_eol; + return &lr->token; + } + } + while (isspace (ch)); + + if (ch == EOF) + { + lr->token.tok = tok_eof; + return &lr->token; + }; + + if (ch != lr->comment_char) + break; + + /* Ignore rest of line. */ + lr_ignore_rest (lr, 0); + lr->token.tok = tok_eol; + return &lr->token; + } + + /* Match escape sequences. */ + if (ch == lr->escape_char) + return get_toplvl_escape (lr); + + /* Match ellipsis. */ + if (ch == '.' && strncmp (&lr->buf[lr->idx], "..", 2) == 0) + { + lr_getc (lr); + lr_getc (lr); + lr->token.tok = tok_ellipsis; + return &lr->token; + } + + switch (ch) + { + case '<': + return get_symname (lr); + + case '0' ... '9': + lr->token.tok = tok_number; + lr->token.val.num = ch - '0'; + + while (isdigit (ch = lr_getc (lr))) + { + lr->token.val.num *= 10; + lr->token.val.num += ch - '0'; + } + if (isalpha (ch)) + lr_error (lr, _("garbage at end of digit")); + lr_ungetn (lr, 1); + + return &lr->token; + + case ';': + lr->token.tok = tok_semicolon; + return &lr->token; + + case ',': + lr->token.tok = tok_comma; + return &lr->token; + + case '(': + lr->token.tok = tok_open_brace; + return &lr->token; + + case ')': + lr->token.tok = tok_close_brace; + return &lr->token; + + case '"': + return get_string (lr, charset); + + case '-': + ch = lr_getc (lr); + if (ch == '1') + { + lr->token.tok = tok_minus1; + return &lr->token; + } + lr_ungetn (lr, 2); + break; + } + + return get_ident (lr); +} + + +static struct token * +get_toplvl_escape (struct linereader *lr) +{ + /* This is supposed to be a numeric value. We return the + numerical value and the number of bytes. */ + size_t start_idx = lr->idx - 1; + unsigned int value = 0; + int nbytes = 0; + int ch; + + do + { + unsigned int byte = 0; + unsigned int base = 8; + + ch = lr_getc (lr); + + if (ch == 'd') + { + base = 10; + ch = lr_getc (lr); + } + else if (ch == 'x') + { + base = 16; + ch = lr_getc (lr); + } + + if ((base == 16 && !isxdigit (ch)) + || (base != 16 && (ch < '0' || ch >= '0' + base))) + { + esc_error: + lr->token.val.str.start = &lr->buf[start_idx]; + + while (ch != EOF || !isspace (ch)) + ch = lr_getc (lr); + lr->token.val.str.len = lr->idx - start_idx; + + lr->token.tok = tok_error; + return &lr->token; + } + + if (isdigit (ch)) + byte = ch - '0'; + else + byte = tolower (ch) - 'a' + 10; + + ch = lr_getc (lr); + if ((base == 16 && !isxdigit (ch)) + || (base != 16 && (ch < '0' || ch >= '0' + base))) + goto esc_error; + + byte *= base; + if (isdigit (ch)) + byte += ch - '0'; + else + byte += tolower (ch) - 'a' + 10; + + ch = lr_getc (lr); + if (base != 16 && isdigit (ch)) + { + byte *= base; + base += ch - '0'; + + ch = lr_getc (lr); + } + + value *= 256; + value += byte; + + ++nbytes; + } + while (ch == lr->escape_char && nbytes < 4); + + if (!isspace (ch)) + lr_error (lr, _("garbage at end of character code specification")); + + lr_ungetn (lr, 1); + + lr->token.tok = tok_charcode; + lr->token.val.charcode.val = value; + lr->token.val.charcode.nbytes = nbytes; + + return &lr->token; +} + + +#define ADDC(ch) \ + do \ + { \ + if (bufact == bufmax) \ + { \ + bufmax *= 2; \ + buf = xrealloc (buf, bufmax); \ + } \ + buf[bufact++] = (ch); \ + } \ + while (0) + + +static struct token * +get_symname (struct linereader *lr) +{ + /* Symbol in brackets. We must distinguish three kinds: + 1. reserved words + 2. ISO 10646 position values + 3. all other. */ + char *buf; + size_t bufact = 0; + size_t bufmax = 56; + const struct keyword_t *kw; + int ch; + + buf = (char *) xmalloc (bufmax); + + do + { + ch = lr_getc (lr); + if (ch == lr->escape_char) + { + int c2 = lr_getc (lr); + ADDC (c2); + + if (c2 == '\n') + ch = '\n'; + } + else + ADDC (ch); + } + while (ch != '>' && ch != '\n'); + + if (ch == '\n') + lr_error (lr, _("unterminated symbolic name")); + + /* Test for ISO 10646 position value. */ + if (buf[0] == 'U' && (bufact == 6 || bufact == 10)) + { + char *cp = buf + 1; + while (cp < &buf[bufact - 1] && isxdigit (*cp)) + ++cp; + + if (cp == &buf[bufact - 1]) + { + /* Yes, it is. */ + lr->token.tok = bufact == 6 ? tok_ucs2 : tok_ucs4; + lr->token.val.charcode.val = strtoul (buf, NULL, 16); + lr->token.val.charcode.nbytes = lr->token.tok == tok_ucs2 ? 2 : 4; + + return &lr->token; + } + } + + /* It is a symbolic name. Test for reserved words. */ + kw = lr->hash_fct (buf, bufact - 1); + + if (kw != NULL && kw->symname_or_ident == 1) + { + lr->token.tok = kw->token; + free (buf); + } + else + { + lr->token.tok = tok_bsymbol; + + buf[bufact] = '\0'; + buf = xrealloc (buf, bufact + 1); + + lr->token.val.str.start = buf; + lr->token.val.str.len = bufact - 1; + } + + return &lr->token; +} + + +static struct token * +get_ident (struct linereader *lr) +{ + char *buf; + size_t bufact; + size_t bufmax = 56; + const struct keyword_t *kw; + int ch; + + buf = xmalloc (bufmax); + bufact = 0; + + ADDC (lr->buf[lr->idx - 1]); + + while (!isspace ((ch = lr_getc (lr))) && ch != '"' && ch != ';' + && ch != '<' && ch != ',') + /* XXX Handle escape sequences? */ + ADDC (ch); + + lr_ungetn (lr, 1); + + kw = lr->hash_fct (buf, bufact); + + if (kw != NULL && kw->symname_or_ident == 0) + { + lr->token.tok = kw->token; + free (buf); + } + else + { + lr->token.tok = tok_ident; + + buf[bufact] = '\0'; + buf = xrealloc (buf, bufact + 1); + + lr->token.val.str.start = buf; + lr->token.val.str.len = bufact; + } + + return &lr->token; +} + + +static struct token * +get_string (struct linereader *lr, const struct charset_t *charset) +{ + int illegal_string = 0; + char *buf, *cp; + size_t bufact; + size_t bufmax = 56; + int ch; + + buf = xmalloc (bufmax); + bufact = 0; + + while ((ch = lr_getc (lr)) != '"' && ch != '\n' && ch != EOF) + if (ch != '<' || charset == NULL) + { + if (ch == lr->escape_char) + { + ch = lr_getc (lr); + if (ch == '\n' || ch == EOF) + break; + } + ADDC (ch); + } + else + { + /* We have to get the value of the symbol. */ + unsigned int value; + size_t startidx = bufact; + + if (!lr->translate_strings) + ADDC ('<'); + + while ((ch = lr_getc (lr)) != '>' && ch != '\n' && ch != EOF) + { + if (ch == lr->escape_char) + { + ch = lr_getc (lr); + if (ch == '\n' || ch == EOF) + break; + } + ADDC (ch); + } + + if (ch == '\n' || ch == EOF) + lr_error (lr, _("unterminated string")); + else + if (!lr->translate_strings) + ADDC ('>'); + + if (lr->translate_strings) + { + value = charset_find_value (charset, &buf[startidx], + bufact - startidx); + if (value == ILLEGAL_CHAR_VALUE) + illegal_string = 1; + bufact = startidx; + + if (bufmax - bufact < 8) + { + bufmax *= 2; + buf = (char *) xrealloc (buf, bufmax); + } + + cp = &buf[bufact]; + if (encode_char (value, &cp)) + illegal_string = 1; + + bufact = cp - buf; + } + } + + /* Catch errors with trailing escape character. */ + if (bufact > 0 && buf[bufact - 1] == lr->escape_char + && (bufact == 1 || buf[bufact - 2] != lr->escape_char)) + { + lr_error (lr, _("illegal escape sequence at end of string")); + --bufact; + } + else if (ch == '\n' || ch == EOF) + lr_error (lr, _("unterminated string")); + + /* Terminate string if necessary. */ + if (lr->translate_strings) + { + cp = &buf[bufact]; + if (encode_char (0, &cp)) + illegal_string = 1; + + bufact = cp - buf; + } + else + ADDC ('\0'); + + lr->token.tok = tok_string; + + if (illegal_string) + { + free (buf); + lr->token.val.str.start = NULL; + lr->token.val.str.len = 0; + } + else + { + buf = xrealloc (buf, bufact + 1); + + lr->token.val.str.start = buf; + lr->token.val.str.len = bufact; + } + + return &lr->token; +} diff --git a/locale/programs/linereader.h b/locale/programs/linereader.h new file mode 100644 index 0000000000..b78697e87d --- /dev/null +++ b/locale/programs/linereader.h @@ -0,0 +1,158 @@ +/* Copyright (C) 1996 Free Software Foundation, Inc. +This file is part of the GNU C Library. +Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>. + +The GNU C Library is free software; you can redistribute it and/or +modify it under the terms of the GNU Library General Public License as +published by the Free Software Foundation; either version 2 of the +License, or (at your option) any later version. + +The GNU C Library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Library General Public License for more details. + +You should have received a copy of the GNU Library General Public +License along with the GNU C Library; see the file COPYING.LIB. If +not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +#ifndef _LINEREADER_H +#define _LINEREADER_H 1 + +#include <ctype.h> +#include <libintl.h> +#include <stdio.h> + +#include "error.h" +#include "locfile-token.h" + + +typedef const struct keyword_t *(*kw_hash_fct_t) (const char *, int); +struct charset_t; + + +struct token +{ + enum token_t tok; + union + { + struct + { + char *start; + size_t len; + } str; + unsigned long int num; + struct + { + unsigned int val; + int nbytes; + } charcode; + } val; +}; + + +struct linereader +{ + FILE *fp; + const char *fname; + char *buf; + size_t bufsize; + size_t bufact; + size_t lineno; + + size_t idx; + + char comment_char; + char escape_char; + + struct token token; + + int translate_strings; + + kw_hash_fct_t hash_fct; +}; + + +/* Functions defined in linereader.c. */ +struct linereader *lr_open (const char *fname, kw_hash_fct_t hf); +int lr_eof (struct linereader *lr); +void lr_close (struct linereader *lr); +int lr_next (struct linereader *lr); +struct token *lr_token (struct linereader *lr, + const struct charset_t *charset); + + +#define lr_error(lr, fmt, args...) \ + error_at_line (0, 0, lr->fname, lr->lineno, fmt, ## args) + + + +static inline int +lr_getc (struct linereader *lr) +{ + if (lr->idx == lr->bufact) + { + if (lr->bufact != 0) + if (lr_next (lr) < 0) + return EOF; + + if (lr->bufact == 0) + return EOF; + } + + return lr->buf[lr->idx] == '\32' ? EOF : lr->buf[lr->idx++]; +} + + +static inline int +lr_ungetc (struct linereader *lr, int ch) +{ + if (lr->idx == 0) + return -1; + + lr->buf[--lr->idx] = ch; + return 0; +} + + +static inline int +lr_ungetn (struct linereader *lr, int n) +{ + if (lr->idx < n) + return -1; + + lr->idx -= n; + return 0; +} + + +static inline void +lr_ignore_rest (struct linereader *lr, int verbose) +{ + if (verbose) + { + while (isspace (lr->buf[lr->idx]) && lr->buf[lr->idx] != '\n' + && lr->buf[lr->idx] != lr->comment_char) + if (lr->buf[lr->idx] == '\0') + { + if (lr_next (lr) < 0) + return; + } + else + ++lr->idx; + + if (lr->buf[lr->idx] != '\n' &&lr->buf[lr->idx] != lr->comment_char) + lr_error (lr, _("trailing garbage at end of line")); + } + + /* Ignore continued line. */ + while (lr->bufact > 0 && lr->buf[lr->bufact - 1] != '\n') + if (lr_next (lr) < 0) + break; + + lr->idx = lr->bufact; +} + + +#endif /* linereader.h */ diff --git a/locale/programs/locale.c b/locale/programs/locale.c new file mode 100644 index 0000000000..4e4ff83a37 --- /dev/null +++ b/locale/programs/locale.c @@ -0,0 +1,544 @@ +/* Copyright (C) 1995, 1996 Free Software Foundation, Inc. + +The GNU C Library is free software; you can redistribute it and/or +modify it under the terms of the GNU Library General Public License as +published by the Free Software Foundation; either version 2 of the +License, or (at your option) any later version. + +The GNU C Library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Library General Public License for more details. + +You should have received a copy of the GNU Library General Public +License along with the GNU C Library; see the file COPYING.LIB. If +not, write to the Free Software Foundation, Inc., 675 Mass Ave, +Cambridge, MA 02139, USA. */ + +#include <dirent.h> +#include <getopt.h> +#include <langinfo.h> +#include <libintl.h> +#include <limits.h> +#include <locale.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> + +/*#include "localedef.h"*/ +#include "localeinfo.h" + + +/* If set dump C code describing the current locale. */ +static int do_dump; + +/* If set print the name of the category. */ +static int show_category_name; + +/* If set print the name of the item. */ +static int show_keyword_name; + +/* Long options. */ +static const struct option long_options[] = +{ + { "all-locales", no_argument, NULL, 'a' }, + { "category-name", no_argument, &show_category_name, 1 }, + { "charmaps", no_argument, NULL, 'm' }, + { "dump", no_argument, &do_dump, 1 }, + { "help", no_argument, NULL, 'h' }, + { "keyword-name", no_argument, &show_keyword_name, 1 }, + { "version", no_argument, NULL, 'v' }, + { NULL, 0, NULL, 0 } +}; + + +/* We don't have these constants defined because we don't use them. Give + default values. */ +#define CTYPE_MB_CUR_MIN 0 +#define CTYPE_MB_CUR_MAX 0 +#define CTYPE_HASH_SIZE 0 +#define CTYPE_HASH_LAYERS 0 +#define CTYPE_CLASS 0 +#define CTYPE_TOUPPER_EB 0 +#define CTYPE_TOLOWER_EB 0 +#define CTYPE_TOUPPER_EL 0 +#define CTYPE_TOLOWER_EL 0 + +/* XXX Hack */ +struct cat_item +{ + int item_id; + const char *name; + enum { std, opt } status; + enum value_type value_type; + int min; + int max; +}; + + +/* We have all categories defined in `categories.def'. Now construct + the description and data structure used for all categories. */ +#define DEFINE_CATEGORY(category, name, items, postload, in, check, out) \ + static struct cat_item category##_desc[] = \ + { \ + NO_PAREN items \ + }; + +#include "locale/aux/categories.def" +#undef DEFINE_CATEGORY + +static struct category category[] = + { +#define DEFINE_CATEGORY(category, name, items, postload, in, check, out) \ + { _NL_NUM_##category, name, NELEMS (category##_desc) - 1, \ + category##_desc, NULL, NULL, NULL, out }, +#include "locale/aux/categories.def" +#undef DEFINE_CATEGORY + }; +#define NCATEGORIES NELEMS (category) + + +/* Prototypes for local functions. */ +static void usage (int status) __attribute__ ((noreturn)); +static void write_locales (void); +static void write_charmaps (void); +static void show_locale_vars (void); +static void show_info (const char *name); +static void dump_category (const char *name); + + +int +main (int argc, char *argv[]) +{ + int optchar; + int do_all = 0; + int do_help = 0; + int do_version = 0; + int do_charmaps = 0; + + /* Set initial values for global varaibles. */ + do_dump = 0; + show_category_name = 0; + show_keyword_name = 0; + + /* Set locale. Do not set LC_ALL because the other categories must + not be affected (acccording to POSIX.2). */ + setlocale (LC_CTYPE, ""); + setlocale (LC_MESSAGES, ""); + + /* Initialize the message catalog. */ + textdomain (PACKAGE); + + while ((optchar = getopt_long (argc, argv, "achkmv", long_options, NULL)) + != EOF) + switch (optchar) + { + case '\0': + break; + case 'a': + do_all = 1; + break; + case 'c': + show_category_name = 1; + break; + case 'h': + do_help = 1; + break; + case 'k': + show_keyword_name = 1; + break; + case 'm': + do_charmaps = 1; + break; + case 'v': + do_version = 1; + break; + default: + error (1, 0, gettext ("illegal option \"%s\""), optarg); + break; + } + + /* Version information is requested. */ + if (do_version) + { + fprintf (stderr, "GNU %s %s\n", PACKAGE, VERSION); + exit (EXIT_SUCCESS); + } + + /* Help is requested. */ + if (do_help) + usage (EXIT_SUCCESS); + + /* Dump C code. */ + if (do_dump) + { + printf ("\ +/* Generated by GNU %s %s. */\n\ +\n\ +#include \"localeinfo.h\"\n", program_invocation_name, VERSION); + + while (optind < argc) + dump_category (argv[optind++]); + + exit (EXIT_SUCCESS); + } + + /* `-a' requests the names of all available locales. */ + if (do_all != 0) + { + write_locales (); + exit (EXIT_SUCCESS); + } + + /* `m' requests the names of all available charmaps. The names can be + used for the -f argument to localedef(3). */ + if (do_charmaps != 0) + { + write_charmaps (); + exit (EXIT_SUCCESS); + } + + /* If no real argument is given we have to print the contents of the + current locale definition variables. These are LANG and the LC_*. */ + if (optind == argc && show_keyword_name == 0 && show_category_name == 0) + { + show_locale_vars (); + exit (EXIT_SUCCESS); + } + + /* Process all given names. */ + while (optind < argc) + show_info (argv[optind++]); + + exit (EXIT_SUCCESS); +} + + +/* Display usage information and exit. */ +static void +usage(int status) +{ + if (status != EXIT_SUCCESS) + fprintf (stderr, gettext ("Try `%s --help' for more information.\n"), + program_invocation_name); + else + printf(gettext ("\ +Usage: %s [OPTION]... name\n\ +Mandatory arguments to long options are mandatory for short options too.\n\ + -h, --help display this help and exit\n\ + -v, --version output version information and exit\n\ +\n\ + -a, --all-locales write names of available locales\n\ + -m, --charmaps write names of available charmaps\n\ +\n\ + -c, --category-name write names of selected categories\n\ + -k, --keyword-name write names of selected keywords\n\ +\n\ + --dump dump C code describing the current locale\n\ + (this code can be used in the C library)\n\ +"), program_invocation_name); + + exit (status); +} + + +/* Write the names of all available locales to stdout. */ +static void +write_locales (void) +{ + DIR *dir; + struct dirent *dirent; + + /* `POSIX' locale is always available (POSIX.2 4.34.3). */ + puts ("POSIX"); + + dir = opendir (LOCALE_PATH); + if (dir == NULL) + { + error (1, errno, gettext ("cannot read locale directory `%s'"), + LOCALE_PATH); + return; + } + + /* Now we can look for all files in the directory. */ + while ((dirent = readdir (dir)) != NULL) + if (strcmp (dirent->d_name, ".") != 0 + && strcmp (dirent->d_name, "..") != 0) + puts (dirent->d_name); + + closedir (dir); +} + + +/* Write the names of all available character maps to stdout. */ +static void +write_charmaps (void) +{ + DIR *dir; + struct dirent *dirent; + + dir = opendir (CHARMAP_PATH); + if (dir == NULL) + { + error (1, errno, gettext ("cannot read character map directory `%s'"), + CHARMAP_PATH); + return; + } + + /* Now we can look for all files in the directory. */ + while ((dirent = readdir (dir)) != NULL) + if (strcmp (dirent->d_name, ".") != 0 + && strcmp (dirent->d_name, "..") != 0) + puts (dirent->d_name); + + closedir (dir); +} + + +/* We have to show the contents of the environments determining the + locale. */ +static void +show_locale_vars (void) +{ + size_t cat_no; + const char *lcall = getenv ("LC_ALL"); + const char *lang = getenv ("LANG") ? : "POSIX"; + + void get_source (const char *name) + { + char *val = getenv (name); + + if (lcall != NULL || val == NULL) + printf ("%s=\"%s\"\n", name, lcall ? : lang); + else + printf ("%s=%s\n", name, val); + } + + /* LANG has to be the first value. */ + printf ("LANG=%s\n", lang); + + /* Now all categories in an unspecified order. */ + for (cat_no = 0; cat_no < NCATEGORIES; ++cat_no) + get_source (category[cat_no].name); + + /* The last is the LC_ALL value. */ + printf ("LC_ALL=%s\n", lcall ? : ""); +} + + +/* Show the information request for NAME. */ +static void +show_info (const char *name) +{ + size_t cat_no; + + void print_item (struct cat_item *item) + { + if (show_keyword_name != 0) + printf ("%s=", item->name); + + switch (item->value_type) + { + case string: + printf ("%s%s%s", show_keyword_name ? "\"" : "", + nl_langinfo (item->item_id) ? : "", + show_keyword_name ? "\"" : ""); + break; + case stringarray: + { + int cnt; + const char *val; + + if (show_keyword_name) + putchar ('"'); + + for (cnt = 0; cnt < item->max - 1; ++cnt) + { + val = nl_langinfo (item->item_id + cnt); + printf ("%s;", val ? : ""); + } + + val = nl_langinfo (item->item_id + cnt); + printf ("%s", val ? : ""); + + if (show_keyword_name) + putchar ('"'); + } + break; + case byte: + { + const char *val = nl_langinfo (item->item_id); + + if (val != NULL) + printf ("%d", *val == CHAR_MAX ? -1 : *val); + } + break; + case bytearray: + { + const char *val = nl_langinfo (item->item_id); + int cnt = val ? strlen (val) : 0; + + while (cnt > 1) + { + printf ("%d;", *val == CHAR_MAX ? -1 : *val); + --cnt; + ++val; + } + + printf ("%d", cnt == 0 || *val == CHAR_MAX ? -1 : *val); + } + break; + default: + } + putchar ('\n'); + } + + for (cat_no = 0; cat_no < NCATEGORIES; ++cat_no) + { + size_t item_no; + + if (category[cat_no].outfct != NULL) + /* Categories which need special handling of the output are + not written. This is especially for LC_CTYPE and LC_COLLATE. + It does not make sense to have this large number of cryptic + characters displayed. */ + continue; + + if (strcmp (name, category[cat_no].name) == 0) + /* Print the whole category. */ + { + if (show_category_name != 0) + puts (category[cat_no].name); + + for (item_no = 0; item_no < category[cat_no].number; ++item_no) + print_item (&category[cat_no].item_desc[item_no]); + + return; + } + + for (item_no = 0; item_no < category[cat_no].number; ++item_no) + if (strcmp (name, category[cat_no].item_desc[item_no].name) == 0) + { + if (show_category_name != 0) + puts (category[cat_no].name); + + print_item (&category[cat_no].item_desc[item_no]); + return; + } + } +} + + +static void +dump_category (const char *name) +{ + char *locname; + size_t cat_no, item_no, nstrings; + + for (cat_no = 0; cat_no < NCATEGORIES; ++cat_no) + if (strcmp (name, category[cat_no].name) == 0) + break; + + if (cat_no >= NCATEGORIES) + return; + + /* The NAME specifies a correct locale category. */ + if (category[cat_no].outfct != NULL) + { + category[cat_no].outfct (); + return; + } + + locname = (getenv ("LC_ALL") ?: getenv (name) ?: + getenv ("LANG") ?: (char *) "POSIX"); + + /* Determine the number of strings in advance. */ + nstrings = 0; + for (item_no = 0; item_no < category[cat_no].number; ++item_no) + switch (category[cat_no].item_desc[item_no].value_type) + { + case string: + case byte: + case bytearray: + ++nstrings; + break; + case stringarray: + nstrings += category[cat_no].item_desc[item_no].max; + default: + } + + printf ("\nconst struct locale_data _nl_%s_%s =\n{\n" + " NULL, 0, /* no file mapped */\n %Zu,\n {\n", + locname, name, nstrings); + + for (item_no = 0; item_no < category[cat_no].number; ++item_no) + switch (category[cat_no].item_desc[item_no].value_type) + { + case string: + { + const char *val = nl_langinfo ( + category[cat_no].item_desc[item_no].item_id); + + if (val != NULL) + printf (" \"%s\",\n", val); + else + puts (" NULL,"); + } + break; + case stringarray: + { + const char *val; + int cnt; + + for (cnt = 0; cnt < category[cat_no].item_desc[item_no].max; ++cnt) + { + val = nl_langinfo ( + category[cat_no].item_desc[item_no].item_id + cnt); + + if (val != NULL) + printf (" \"%s\",\n", val); + else + puts (" NULL,"); + } + } + break; + case byte: + { + const char *val = nl_langinfo ( + category[cat_no].item_desc[item_no].item_id); + + if (val != NULL) + printf (" \"\\%o\",\n", + *(unsigned char *) val ? : UCHAR_MAX); + else + puts (" NULL,"); + } + break; + case bytearray: + { + const char *bytes = nl_langinfo ( + category[cat_no].item_desc[item_no].item_id); + + if (bytes != NULL) + { + fputs (" \"", stdout); + if (*bytes != '\0') + do + printf ("\\%o", *(unsigned char *) bytes++); + while (*bytes != '\0'); + else + printf ("\\%o", UCHAR_MAX); + + puts ("\","); + } + else + puts (" NULL,"); + } + break; + default: + break; + } + + puts (" }\n};"); +} diff --git a/locale/programs/localedef.c b/locale/programs/localedef.c new file mode 100644 index 0000000000..a98bac4301 --- /dev/null +++ b/locale/programs/localedef.c @@ -0,0 +1,461 @@ +/* Copyright (C) 1995, 1996 Free Software Foundation, Inc. +This file is part of the GNU C Library. +Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>. + +The GNU C Library is free software; you can redistribute it and/or +modify it under the terms of the GNU Library General Public License as +published by the Free Software Foundation; either version 2 of the +License, or (at your option) any later version. + +The GNU C Library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Library General Public License for more details. + +You should have received a copy of the GNU Library General Public +License along with the GNU C Library; see the file COPYING.LIB. If +not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <errno.h> +#include <fcntl.h> +#include <getopt.h> +#include <libintl.h> +#include <locale.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/mman.h> +#include <sys/stat.h> + +#include "error.h" +#include "charset.h" +#include "locfile.h" + +/* Undefine the following line in the production version. */ +/* #define NDEBUG 1 */ +#include <assert.h> + + +/* List of locale definition files which are used in `copy' instructions. */ +struct copy_def_list_t +{ + struct copy_def_list_t *next; + + const char *name; + int mask; + + struct localedef_t *locale; + + struct + { + void *data; + size_t len; + } binary[6]; +}; + + +/* List of copied locales. */ +struct copy_def_list_t *copy_list; + +/* If this is defined be POSIX conform. */ +int posix_conformance; + +/* Name of the running program. */ +const char *program_name; + +/* If not zero give a lot more messages. */ +int verbose; + + + +/* Long options. */ +static const struct option long_options[] = +{ + { "charmap", required_argument, NULL, 'f' }, + { "code-set-name", required_argument, NULL, 'u' }, + { "help", no_argument, NULL, 'h' }, + { "force", no_argument, NULL, 'c' }, + { "inputfile", required_argument, NULL, 'i' }, + { "posix", no_argument, &posix_conformance, 1 }, + { "verbose", no_argument, &verbose, 1}, + { "version", no_argument, NULL, 'V' }, + { NULL, 0, NULL, 0 } +}; + + +/* Prototypes for global functions. */ +void *xmalloc (size_t __n); + +/* Prototypes for local functions. */ +static void usage (int status) __attribute__ ((noreturn)); +static void error_print (void); +static const char *construct_output_path (const char *path); + + +int +main (int argc, char *argv[]) +{ + int optchar; + int do_help = 0; + int do_version = 0; + int force_output = 0; + const char *charmap_file = NULL; + const char *input_file = NULL; + const char *ucs_csn = NULL; + const char *output_path; + int cannot_write_why; + struct charset_t *charset; + struct localedef_t *localedef; + struct copy_def_list_t *act_add_locdef; + + /* Set initial values for global varaibles. */ + copy_list = NULL; + posix_conformance = getenv ("POSIXLY_CORRECT") != NULL; + program_name = argv[0]; + error_print_progname = error_print; + verbose = 0; + + /* Set locale. Do not set LC_ALL because the other categories must + not be affected (acccording to POSIX.2). */ + setlocale (LC_MESSAGES, ""); + setlocale (LC_CTYPE, ""); + + /* Initialize the message catalog. */ +#if 0 + /* In the final version for glibc we can use the variable. */ + textdomain (_libc_intl_domainname); +#else + textdomain ("SYS_libc"); +#endif + + while ((optchar = getopt_long (argc, argv, "cf:hi:u:vV", long_options, NULL)) + != EOF) + switch (optchar) + { + case '\0': /* Long option. */ + break; + + case 'c': + force_output = 1; + break; + + case 'f': + charmap_file = optarg; + break; + + case 'h': + do_help = 1; + break; + + case 'i': + input_file = optarg; + break; + + case 'u': + ucs_csn = optarg; + break; + + case 'v': + verbose = 1; + break; + + case 'V': + do_version = 1; + break; + + default: + usage (4); /* A value >3 is forced by POSIX. */ + break; + } + + /* POSIX.2 requires to be verbose about missing characters in the + character map. */ + verbose |= posix_conformance; + + /* Version information is requested. */ + if (do_version) + { + fprintf (stderr, "%s - GNU %s %s\n", program_name, PACKAGE, VERSION); + exit (0); + } + + /* Help is requested. */ + if (do_help) + /* Possible violation: POSIX.2 4.35.8 defines the return value 0 as + "No errors occured and the locale(s) were successfully created." + But giving a other value than 0 does not make sense here. It + is perhaps not that important because POSIX does not specify the + -h option for localedef. */ + usage (0); + + if (argc - optind != 1) + /* We need exactly one non-option parameter. */ + usage (4); + + /* The parameter describes the output path of the constructed files. + If the described files cannot be written return a NULL pointer. */ + output_path = construct_output_path (argv[optind]); + cannot_write_why = errno; + + /* Now that the parameters are processed we have to reset the local + ctype locale. (P1003.2 4.35.5.2) */ + setlocale (LC_CTYPE, "POSIX"); + + /* Look whether the system really allows locale definitions. POSIX + defines error code 3 for this situation so I think it must be + a fatal error (see P1003.2 4.35.8). */ + if (sysconf (_SC_2_LOCALEDEF) < 0) + error (3, 0, _("FATAL: system does not define `_POSIX2_LOCALEDEF'")); + + /* Process charmap file. */ + charset = charmap_read (charmap_file); + + /* Now read the locale file. */ + localedef = locfile_read (input_file, charset); + if (localedef->failed != 0) + error (4, errno, _("cannot open locale definition file `%s'"), input_file); + + /* Perhaps we saw some `copy' instructions. Process the given list. + We use a very simple algorithm: we look up the list from the + beginning every time. */ + do + { + int cat; + + for (act_add_locdef = copy_list; act_add_locdef != NULL; + act_add_locdef = act_add_locdef->next) + { + for (cat = LC_COLLATE; cat <= LC_MESSAGES; ++cat) + if ((act_add_locdef->mask & (1 << cat)) != 0) + { + act_add_locdef->mask &= ~(1 << cat); + break; + } + if (cat <= LC_MESSAGES) + break; + } + + if (act_add_locdef != NULL) + { + int avail = 0; + + if (act_add_locdef->locale == NULL) + act_add_locdef->locale = locfile_read (act_add_locdef->name, + charset); + + if (! act_add_locdef->locale->failed) + { + avail = act_add_locdef->locale->categories[cat].generic != NULL; + if (avail) + localedef->categories[cat].generic + = act_add_locdef->locale->categories[cat].generic; + } + + if (! avail) + { + const char *locale_names[] = { "LC_COLLATE", "LC_CTYPE", + "LC_MONETARY", "LC_NUMERIC", + "LC_TIME", "LC_MESSAGES" }; + char *fname; + int fd; + struct stat st; + + asprintf (&fname, LOCALE_PATH "/%s/%s", act_add_locdef->name, + locale_names[cat]); + fd = open (fname, O_RDONLY); + if (fd == -1) + { + free (fname); + + asprintf (&fname, LOCALE_PATH "/%s/%s/SYS_%s", + act_add_locdef->name, locale_names[cat], + locale_names[cat]); + + fd = open (fname, O_RDONLY); + if (fd == -1) + error (5, 0, _("\ +locale file `%s', used in `copy' statement, not found"), + act_add_locdef->name); + } + + if (fstat (fd, &st) < 0) + error (5, errno, _("\ +cannot `stat' locale file `%s'"), + fname); + + localedef->len[cat] = st.st_size; + localedef->categories[cat].generic + = mmap (NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + + if (localedef->categories[cat].generic == (void *) -1) + { + size_t left = st.st_size; + void *read_ptr; + + localedef->categories[cat].generic + = xmalloc (st.st_size); + read_ptr = localedef->categories[cat].generic; + + do + { + long int n; + n = read (fd, read_ptr, left); + if (n == 1) + error (5, errno, _("cannot read locale file `%s'"), + fname); + read_ptr += n; + left -= n; + } + while (left > 0); + } + + close (fd); + free (fname); + + localedef->binary |= 1 << cat; + } + } + } + while (act_add_locdef != NULL); + + /* Check the categories we processed in source form. */ + check_all_categories (localedef, charset); + + /* We are now able to write the data files. If warning were given we + do it only if it is explicitly requested (--force). */ + if (error_message_count == 0 || force_output != 0) + { + if (cannot_write_why != 0) + error (4, cannot_write_why, _("cannot write output files to `%s'"), + output_path); + else + write_all_categories (localedef, output_path); + } + else + error (4, 0, _("no output file produced because warning were issued")); + + /* This exit status is prescribed by POSIX.2 4.35.7. */ + exit (error_message_count != 0); +} + + +void +def_to_process (const char *name, int category) +{ + struct copy_def_list_t *new, **rp; + + for (rp = ©_list; *rp != NULL; rp = &(*rp)->next) + if (strcmp (name, (*rp)->name) == 0) + break; + + if (*rp == NULL) + { + size_t cnt; + + *rp = (struct copy_def_list_t *) xmalloc (sizeof (**rp)); + + (*rp)->next = NULL; + (*rp)->name = name; + (*rp)->mask = 0; + (*rp)->locale = NULL; + + for (cnt = 0; cnt < 6; ++cnt) + { + (*rp)->binary[cnt].data = NULL; + (*rp)->binary[cnt].len = 0; + } + } + new = *rp; + + if ((new->mask & category) != 0) + /* We already have the information. This cannot happen. */ + error (5, 0, _("\ +category data requested more than once: should not happen")); + + new->mask |= category; +} + + +/* Display usage information and exit. */ +static void +usage (int status) +{ + if (status != 0) + fprintf (stderr, _("Try `%s --help' for more information.\n"), + program_name); + else + printf (_("\ +Usage: %s [OPTION]... name\n\ +Mandatory arguments to long options are mandatory for short options too.\n\ + -c, --force create output even if warning messages were issued\n\ + -h, --help display this help and exit\n\ + -f, --charmap=FILE symbolic character names defined in FILE\n\ + -i, --inputfile=FILE source definitions are found in FILE\n\ + -u, --code-set-name=NAME specify code set for mapping ISO 10646 elements\n\ + -v, --verbose print more messages\n\ + -V, --version output version information and exit\n\ + --posix be strictly POSIX conform\n\ +\n\ +System's directory for character maps: %s\n\ + locale files : %s\n"), + program_name, CHARMAP_PATH, LOCALE_PATH); + + exit (status); +} + + +/* The address of this function will be assigned to the hook in the error + functions. */ +static void +error_print () +{ + /* We don't want the program name to be printed in messages. Emacs' + compile.el does not like this. */ +} + + +/* The parameter to localedef describes the output path. If it does + contain a '/' character it is a relativ path. Otherwise it names the + locale this definition is for. */ +static const char * +construct_output_path (const char *path) +{ + char *result; + + if (strchr (path, '/') == NULL) + { + /* This is a system path. */ + int path_max_len = pathconf (LOCALE_PATH, _PC_PATH_MAX) + 1; + result = (char *) xmalloc (path_max_len); + + snprintf (result, path_max_len, "%s/%s", LOCALE_PATH, path); + } + else + { + char *t; + /* This is a user path. */ + result = xmalloc (strlen (path) + 2); + t = stpcpy (result, path); + *t = '\0'; + } + + errno = 0; + + if (euidaccess (result, W_OK) == -1) + /* Perhaps the directory does not exist now. Try to create it. */ + if (errno == ENOENT) + { + errno = 0; + mkdir (result, 0777); + } + + strcat (result, "/"); + + return result; +} diff --git a/locale/programs/locales.h b/locale/programs/locales.h new file mode 100644 index 0000000000..3c7676b765 --- /dev/null +++ b/locale/programs/locales.h @@ -0,0 +1,207 @@ +/* Copyright (C) 1996 Free Software Foundation, Inc. +This file is part of the GNU C Library. +Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>. + +The GNU C Library is free software; you can redistribute it and/or +modify it under the terms of the GNU Library General Public License as +published by the Free Software Foundation; either version 2 of the +License, or (at your option) any later version. + +The GNU C Library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Library General Public License for more details. + +You should have received a copy of the GNU Library General Public +License along with the GNU C Library; see the file COPYING.LIB. If +not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +#ifndef _LOCALES_H +#define _LOCALES_H + +#include <ctype.h> + +/* Undefine following line in production version. */ +/* #define NDEBUG 1 */ +#include <assert.h> + +#include "linereader.h" +#include "locfile-token.h" +#include "charset.h" +#include "locfile.h" +#include "localeinfo.h" + + +/* Header of the locale data files. */ +struct locale_file +{ + int magic; + int n; +}; + + +/* Handle LC_CTYPE category. */ + +static inline unsigned int +charclass_to_bit (enum token_t tok) +{ + static unsigned int lastbit = _ISalnum; + + switch (tok) + { +#define CLASS(name) case tok_##name: return _IS##name + CLASS (upper); + CLASS (lower); + CLASS (alpha); + CLASS (digit); + CLASS (alnum); + CLASS (space); + CLASS (cntrl); + CLASS (punct); + CLASS (graph); + CLASS (print); + CLASS (xdigit); + CLASS (blank); +#undef CLASS + case tok_string: + lastbit <<= 1; + if (lastbit == 0ul) + /* Exit status 2 means a limitation in the implementation is + exceeded. */ + error (2, 0, _("too many character classes defined")); + return lastbit; + default: + assert (1 == 0); + } + return 0; +} + +/* Remember name of newly created charclass. */ +void ctype_startup (struct linereader *lr, struct localedef_t *locale, + struct charset_t *__charset); +void ctype_finish (struct localedef_t *__locale, struct charset_t *__charset); + +void ctype_output (struct localedef_t *locale, const char *output_path); + +int ctype_is_charclass (struct linereader *lr, struct localedef_t *locale, + const char *__name); +void ctype_class_new (struct linereader *lr, struct localedef_t *locale, + enum token_t __tok, struct token *__code, + struct charset_t *__charset); +void ctype_class_start (struct linereader *lr, struct localedef_t *locale, + enum token_t __tok, const char *__name, + struct charset_t *__charset); +void ctype_class_from (struct linereader *lr, struct localedef_t *locale, + struct token *__code, struct charset_t *__charset); +void ctype_class_to (struct linereader *lr, struct localedef_t *locale, + struct token *__code, struct charset_t *__charset); +void ctype_class_end (struct linereader *lr, struct localedef_t *locale); + +int ctype_is_charmap (struct linereader *lr, struct localedef_t *locale, + const char *__name); +void ctype_map_new (struct linereader *lr, struct localedef_t *locale, + enum token_t __tok, struct token *__code, + struct charset_t *__charset); +void ctype_map_start (struct linereader *lr, struct localedef_t *locale, + enum token_t __tok, const char *__name, + struct charset_t *__charset); +void ctype_map_from (struct linereader *lr, struct localedef_t *locale, + struct token *__code, struct charset_t *__charset); +void ctype_map_to (struct linereader *lr, struct localedef_t *locale, + struct token *__code, struct charset_t *__charset); +void ctype_map_end (struct linereader *lr, struct localedef_t *locale); + + +/* Handle LC_COLLATE category. */ + +void collate_startup (struct linereader *__lr, struct localedef_t *__locale, + struct charset_t *__charset); + +void collate_finish (struct localedef_t *__locale, + struct charset_t *__charset); + +void collate_output (struct localedef_t *locale, const char *output_path); + +void collate_element_to (struct linereader *__lr, struct localedef_t *__locale, + struct token *__code, struct charset_t *__charset); +void collate_element_from (struct linereader *__lr, + struct localedef_t *__locale, struct token *__code, + struct charset_t *__charset); +void collate_symbol (struct linereader *__lr, struct localedef_t *__locale, + struct token *__code, struct charset_t *__charset); +void collate_new_order (struct linereader *__lr, struct localedef_t *__locale, + enum coll_sort_rule __sort_rule); +void collate_build_arrays (struct linereader *__lr, + struct localedef_t *__locale); +int collate_order_elem (struct linereader *__lr, struct localedef_t *__locale, + struct token *__code, struct charset_t *__charset); +int collate_weight_bsymbol (struct linereader *__lr, + struct localedef_t *__locale, + struct token *__code, struct charset_t *__charset); +int collate_next_weight (struct linereader *__lr, + struct localedef_t *__locale); +int collate_simple_weight (struct linereader *__lr, + struct localedef_t *__locale, + struct token *__code, struct charset_t *__charset); +void collate_end_weight (struct linereader *__lr, + struct localedef_t *__locale); + + +/* Handle LC_MONETARY category. */ + +void monetary_startup (struct linereader *__lr, struct localedef_t *__locale, + struct charset_t *__charset); + +void monetary_finish (struct localedef_t *__locale); + +void monetary_output (struct localedef_t *locale, const char *output_path); + +void monetary_add (struct linereader *lr, struct localedef_t *locale, + enum token_t __tok, struct token *__code, + struct charset_t *__charset); + + +/* Handle LC_NUMERIC category. */ + +void numeric_startup (struct linereader *__lr, struct localedef_t *__locale, + struct charset_t *__charset); + +void numeric_finish (struct localedef_t *__locale); + +void numeric_output (struct localedef_t *locale, const char *output_path); + +void numeric_add (struct linereader *lr, struct localedef_t *locale, + enum token_t __tok, struct token *__code, + struct charset_t *__charset); + + +/* Handle LC_TIME category. */ + +void time_startup (struct linereader *__lr, struct localedef_t *__locale, + struct charset_t *__charset); + +void time_finish (struct localedef_t *__locale); + +void time_output (struct localedef_t *locale, const char *output_path); + +void time_add (struct linereader *lr, struct localedef_t *locale, + enum token_t __tok, struct token *__code, + struct charset_t *__charset); + + +/* Handle LC_MESSAGES category. */ + +void messages_startup (struct linereader *__lr, struct localedef_t *__locale, + struct charset_t *__charset); + +void messages_finish (struct localedef_t *__locale); + +void messages_output (struct localedef_t *locale, const char *output_path); + +void messages_add (struct linereader *lr, struct localedef_t *locale, + enum token_t __tok, struct token *__code, + struct charset_t *__charset); + + +#endif /* locales.h */ diff --git a/locale/programs/locfile-kw.gperf b/locale/programs/locfile-kw.gperf new file mode 100644 index 0000000000..85e031c777 --- /dev/null +++ b/locale/programs/locfile-kw.gperf @@ -0,0 +1,99 @@ +%{ +/* Copyright (C) 1996 Free Software Foundation, Inc. +This file is part of the GNU C Library. +Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>. + +The GNU C Library is free software; you can redistribute it and/or +modify it under the terms of the GNU Library General Public License as +published by the Free Software Foundation; either version 2 of the +License, or (at your option) any later version. + +The GNU C Library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Library General Public License for more details. + +You should have received a copy of the GNU Library General Public +License along with the GNU C Library; see the file COPYING.LIB. If +not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +#include <string.h> + +#include "locfile-token.h" +%} +struct keyword_t ; +%% +escape_char, tok_escape_char, 0 +comment_char, tok_comment_char, 0 +LC_CTYPE, tok_lc_ctype, 0 +END, tok_end, 0 +copy, tok_copy, 0 +upper, tok_upper, 0 +lower, tok_lower, 0 +alpha, tok_alpha, 0 +digit, tok_digit, 0 +alnum, tok_alnum, 0 +space, tok_space, 0 +cntrl, tok_cntrl, 0 +punct, tok_punct, 0 +graph, tok_graph, 0 +print, tok_print, 0 +xdigit, tok_xdigit, 0 +blank, tok_blank, 0 +charclass, tok_charclass, 0 +charmap, tok_charmap, 0 +toupper, tok_toupper, 0 +tolower, tok_tolower, 0 +LC_COLLATE, tok_lc_collate, 0 +collating-element, tok_collating_element, 0 +collating-symbol, tok_collating_symbol, 0 +order_start, tok_order_start, 0 +order_end, tok_order_end, 0 +from, tok_from, 0 +forward, tok_forward, 0 +backward, tok_backward, 0 +position, tok_position, 0 +UNDEFINED, tok_undefined, 0 +IGNORE, tok_ignore, 0 +LC_MONETARY, tok_lc_monetary, 0 +int_curr_symbol, tok_int_curr_symbol, 0 +currency_symbol, tok_currency_symbol, 0 +mon_decimal_point, tok_mon_decimal_point, 0 +mon_thousands_sep, tok_mon_thousands_sep, 0 +mon_grouping, tok_mon_grouping, 0 +positive_sign, tok_positive_sign, 0 +negative_sign, tok_negative_sign, 0 +int_frac_digits, tok_int_frac_digits, 0 +frac_digits, tok_frac_digits, 0 +p_cs_precedes, tok_p_cs_precedes, 0 +p_sep_by_space, tok_p_sep_by_space, 0 +n_cs_precedes, tok_n_cs_precedes, 0 +n_sep_by_space, tok_n_sep_by_space, 0 +p_sign_posn, tok_p_sign_posn, 0 +n_sign_posn, tok_n_sign_posn, 0 +LC_NUMERIC, tok_lc_numeric, 0 +decimal_point, tok_decimal_point, 0 +thousands_sep, tok_thousands_sep, 0 +grouping, tok_grouping, 0 +LC_TIME, tok_lc_time, 0 +abday, tok_abday, 0 +day, tok_day, 0 +abmon, tok_abmon, 0 +mon, tok_mon, 0 +d_t_fmt, tok_d_t_fmt, 0 +d_fmt, tok_d_fmt, 0 +t_fmt, tok_t_fmt, 0 +am_pm, tok_am_pm, 0 +t_fmt_ampm, tok_t_fmt_ampm, 0 +era, tok_era, 0 +era_year, tok_era_year, 0 +era_d_fmt, tok_era_d_fmt, 0 +era_d_t_fmt, tok_era_d_t_fmt, 0 +era_t_fmt, tok_era_t_fmt, 0 +alt_digits, tok_alt_digits, 0 +LC_MESSAGES, tok_lc_messages, 0 +yesexpr, tok_yesexpr, 0 +noexpr, tok_noexpr, 0 +yesstr, tok_yesstr, 0 +nostr, tok_nostr, 0 diff --git a/locale/programs/locfile-kw.h b/locale/programs/locfile-kw.h new file mode 100644 index 0000000000..c892669893 --- /dev/null +++ b/locale/programs/locfile-kw.h @@ -0,0 +1,211 @@ +/* C code produced by gperf version 2.5 (GNU C++ version) */ +/* Command-line: gperf -acCgopt -k1,2,5,$ -N locfile_hash programs/locfile-kw.gperf */ +/* Copyright (C) 1996 Free Software Foundation, Inc. +This file is part of the GNU C Library. +Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>. + +The GNU C Library is free software; you can redistribute it and/or +modify it under the terms of the GNU Library General Public License as +published by the Free Software Foundation; either version 2 of the +License, or (at your option) any later version. + +The GNU C Library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Library General Public License for more details. + +You should have received a copy of the GNU Library General Public +License along with the GNU C Library; see the file COPYING.LIB. If +not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +#include <string.h> + +#include "locfile-token.h" +struct keyword_t ; + +#define TOTAL_KEYWORDS 73 +#define MIN_WORD_LENGTH 3 +#define MAX_WORD_LENGTH 17 +#define MIN_HASH_VALUE 3 +#define MAX_HASH_VALUE 185 +/* maximum key range = 183, duplicates = 0 */ + +#ifdef __GNUC__ +inline +#endif +static unsigned int +hash (register const char *str, register int len) +{ + static const unsigned char asso_values[] = + { + 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, + 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, + 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, + 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, + 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, + 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, + 186, 186, 186, 186, 186, 186, 186, 0, 0, 0, + 0, 0, 186, 0, 186, 186, 0, 186, 0, 35, + 186, 186, 0, 0, 0, 5, 186, 186, 186, 0, + 186, 186, 186, 186, 186, 15, 186, 0, 0, 5, + 15, 10, 55, 30, 15, 75, 186, 20, 5, 40, + 10, 0, 0, 186, 35, 30, 0, 70, 186, 10, + 20, 75, 186, 186, 186, 186, 186, 186, + }; + register int hval = len; + + switch (hval) + { + default: + case 5: + hval += asso_values[(int) str[4]]; + case 4: + case 3: + case 2: + hval += asso_values[(int) str[1]]; + case 1: + hval += asso_values[(int) str[0]]; + break; + } + return hval + asso_values[(int) str[len - 1]]; +} + +#ifdef __GNUC__ +inline +#endif +const struct keyword_t * +locfile_hash (register const char *str, register int len) +{ + static const struct keyword_t wordlist[] = + { + {"",}, {"",}, {"",}, + {"END", tok_end, 0}, + {"",}, {"",}, + {"IGNORE", tok_ignore, 0}, + {"LC_TIME", tok_lc_time, 0}, + {"LC_CTYPE", tok_lc_ctype, 0}, + {"",}, + {"alpha", tok_alpha, 0}, + {"LC_MESSAGES", tok_lc_messages, 0}, + {"",}, {"",}, + {"UNDEFINED", tok_undefined, 0}, + {"LC_NUMERIC", tok_lc_numeric, 0}, + {"",}, {"",}, + {"position", tok_position, 0}, + {"",}, + {"t_fmt", tok_t_fmt, 0}, + {"",}, + {"collating-element", tok_collating_element, 0}, + {"positive_sign", tok_positive_sign, 0}, + {"",}, + {"abmon", tok_abmon, 0}, + {"collating-symbol", tok_collating_symbol, 0}, + {"",}, {"",}, {"",}, + {"cntrl", tok_cntrl, 0}, + {"",}, {"",}, + {"backward", tok_backward, 0}, + {"",}, + {"d_fmt", tok_d_fmt, 0}, + {"",}, {"",}, {"",}, + {"p_sep_by_space", tok_p_sep_by_space, 0}, + {"print", tok_print, 0}, + {"",}, + {"toupper", tok_toupper, 0}, + {"negative_sign", tok_negative_sign, 0}, + {"",}, + {"LC_COLLATE", tok_lc_collate, 0}, + {"LC_MONETARY", tok_lc_monetary, 0}, + {"",}, + {"era", tok_era, 0}, + {"n_sep_by_space", tok_n_sep_by_space, 0}, + {"blank", tok_blank, 0}, + {"noexpr", tok_noexpr, 0}, + {"tolower", tok_tolower, 0}, + {"mon", tok_mon, 0}, + {"era_t_fmt", tok_era_t_fmt, 0}, + {"space", tok_space, 0}, + {"",}, + {"mon_thousands_sep", tok_mon_thousands_sep, 0}, + {"thousands_sep", tok_thousands_sep, 0}, + {"",}, + {"alt_digits", tok_alt_digits, 0}, + {"",}, + {"comment_char", tok_comment_char, 0}, + {"",}, + {"charclass", tok_charclass, 0}, + {"t_fmt_ampm", tok_t_fmt_ampm, 0}, + {"p_sign_posn", tok_p_sign_posn, 0}, + {"charmap", tok_charmap, 0}, + {"",}, + {"era_d_fmt", tok_era_d_fmt, 0}, + {"",}, + {"era_d_t_fmt", tok_era_d_t_fmt, 0}, + {"mon_decimal_point", tok_mon_decimal_point, 0}, + {"p_cs_precedes", tok_p_cs_precedes, 0}, + {"",}, + {"punct", tok_punct, 0}, + {"n_sign_posn", tok_n_sign_posn, 0}, + {"forward", tok_forward, 0}, + {"decimal_point", tok_decimal_point, 0}, + {"",}, + {"lower", tok_lower, 0}, + {"order_start", tok_order_start, 0}, + {"",}, + {"n_cs_precedes", tok_n_cs_precedes, 0}, + {"copy", tok_copy, 0}, + {"nostr", tok_nostr, 0}, + {"escape_char", tok_escape_char, 0}, + {"",}, {"",}, {"",}, + {"alnum", tok_alnum, 0}, + {"",}, + {"d_t_fmt", tok_d_t_fmt, 0}, + {"day", tok_day, 0}, + {"order_end", tok_order_end, 0}, + {"digit", tok_digit, 0}, + {"",}, {"",}, {"",}, {"",}, + {"graph", tok_graph, 0}, + {"",}, {"",}, + {"grouping", tok_grouping, 0}, + {"",}, + {"currency_symbol", tok_currency_symbol, 0}, + {"",}, {"",}, {"",}, {"",}, + {"int_curr_symbol", tok_int_curr_symbol, 0}, + {"",}, + {"mon_grouping", tok_mon_grouping, 0}, + {"",}, {"",}, {"",}, + {"xdigit", tok_xdigit, 0}, + {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, + {"am_pm", tok_am_pm, 0}, + {"yesstr", tok_yesstr, 0}, + {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, + {"from", tok_from, 0}, + {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, + {"",}, + {"upper", tok_upper, 0}, + {"frac_digits", tok_frac_digits, 0}, + {"yesexpr", tok_yesexpr, 0}, + {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, + {"abday", tok_abday, 0}, + {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, + {"era_year", tok_era_year, 0}, + {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, + {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, + {"",}, {"",}, {"",}, + {"int_frac_digits", tok_int_frac_digits, 0}, + }; + + if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH) + { + register int key = hash (str, len); + + if (key <= MAX_HASH_VALUE && key >= 0) + { + register const char *s = wordlist[key].name; + + if (*s == *str && !strncmp (str + 1, s + 1, len - 1)) + return &wordlist[key]; + } + } + return 0; +} diff --git a/locale/programs/locfile-token.h b/locale/programs/locfile-token.h new file mode 100644 index 0000000000..1c3cfdc9db --- /dev/null +++ b/locale/programs/locfile-token.h @@ -0,0 +1,147 @@ +/* Copyright (C) 1996 Free Software Foundation, Inc. +This file is part of the GNU C Library. +Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>. + +The GNU C Library is free software; you can redistribute it and/or +modify it under the terms of the GNU Library General Public License as +published by the Free Software Foundation; either version 2 of the +License, or (at your option) any later version. + +The GNU C Library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Library General Public License for more details. + +You should have received a copy of the GNU Library General Public +License along with the GNU C Library; see the file COPYING.LIB. If +not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +#ifndef _TOKEN_H +#define _TOKEN_H + +enum token_t +{ + tok_none = 0, + + tok_eof, + tok_eol, + tok_bsymbol, + tok_ident, + tok_ellipsis, + tok_semicolon, + tok_comma, + tok_open_brace, + tok_close_brace, + tok_charcode, + tok_ucs2, + tok_ucs4, + tok_number, + tok_minus1, + tok_string, + + tok_escape_char, + tok_comment_char, + tok_end, + tok_g0esc, + tok_g1esc, + tok_g2esc, + tok_g3esc, + + tok_code_set_name, + tok_mb_cur_max, + tok_mb_cur_min, + tok_charmap, + tok_width, + tok_width_variable, + tok_width_default, + + tok_lc_ctype, + tok_copy, + tok_upper, + tok_lower, + tok_alpha, + tok_digit, + tok_xdigit, + tok_space, + tok_print, + tok_graph, + tok_blank, + tok_cntrl, + tok_punct, + tok_alnum, + tok_charclass, + tok_toupper, + tok_tolower, + tok_lc_collate, + tok_collating_element, + tok_collating_symbol, + tok_order_start, + tok_order_end, + tok_from, + tok_forward, + tok_backward, + tok_position, + tok_undefined, + tok_ignore, + tok_lc_monetary, + tok_int_curr_symbol, + tok_currency_symbol, + tok_mon_decimal_point, + tok_mon_thousands_sep, + tok_mon_grouping, + tok_positive_sign, + tok_negative_sign, + tok_int_frac_digits, + tok_frac_digits, + tok_p_cs_precedes, + tok_p_sep_by_space, + tok_n_cs_precedes, + tok_n_sep_by_space, + tok_p_sign_posn, + tok_n_sign_posn, + tok_lc_numeric, + tok_decimal_point, + tok_thousands_sep, + tok_grouping, + tok_lc_time, + tok_abday, + tok_day, + tok_abmon, + tok_mon, + tok_d_t_fmt, + tok_d_fmt, + tok_t_fmt, + tok_am_pm, + tok_t_fmt_ampm, + tok_era, + tok_era_year, + tok_era_d_fmt, + tok_era_d_t_fmt, + tok_era_t_fmt, + tok_alt_digits, + tok_lc_messages, + tok_yesexpr, + tok_noexpr, + tok_yesstr, + tok_nostr, + + tok_error +}; + + +struct keyword_t +{ + const char *name; + enum token_t token; + int symname_or_ident; + + /* Only for locdef file. */ + int locale; + enum token_t base; + enum token_t group; + enum token_t list; +}; + + +#endif /* token.h */ diff --git a/locale/programs/locfile.c b/locale/programs/locfile.c new file mode 100644 index 0000000000..cb98a5d530 --- /dev/null +++ b/locale/programs/locfile.c @@ -0,0 +1,979 @@ +/* Copyright (C) 1996 Free Software Foundation, Inc. +This file is part of the GNU C Library. +Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>. + +The GNU C Library is free software; you can redistribute it and/or +modify it under the terms of the GNU Library General Public License as +published by the Free Software Foundation; either version 2 of the +License, or (at your option) any later version. + +The GNU C Library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Library General Public License for more details. + +You should have received a copy of the GNU Library General Public +License along with the GNU C Library; see the file COPYING.LIB. If +not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <errno.h> +#include <fcntl.h> +#include <locale.h> +#include <malloc.h> +#include <string.h> +#include <unistd.h> +#include <sys/stat.h> +#include <sys/uio.h> + +#include "locfile.h" +#include "linereader.h" +#include "localeinfo.h" +#include "locales.h" + + +/* Uncomment the following line in the production version. */ +/* #define NDEBUG 1 */ +#include <assert.h> + +/* Define the lookup function. */ +#include "locfile-kw.h" + + +/* Some useful macros. */ +#define MIN(a, b) (__extension__ ({ typeof (a) _a = (a); \ + typeof (b) _b = (b); \ + _a < _b ? _a : _b; })) + + +void *xmalloc (size_t __n); +char *xstrdup (const char *__str); + +struct localedef_t * +locfile_read (const char *filename, struct charset_t *charset) +{ + struct linereader *ldfile; + struct localedef_t *result; + int state; + enum token_t expected_tok = tok_none; + const char *expected_str = NULL; + enum token_t ctype_tok_sym = tok_none; + const char *ctype_tok_str = NULL; + int copy_category = 0; + int cnt; + + /* Allocate space for result. */ + result = (struct localedef_t *) xmalloc (sizeof (struct localedef_t)); + memset (result, '\0', sizeof (struct localedef_t)); + + ldfile = lr_open (filename, locfile_hash); + if (ldfile == NULL) + { + if (filename[0] != '/') + { + char path[strlen (filename) + 1 + sizeof (LOCSRCDIR)]; + + stpcpy (stpcpy (stpcpy (path, LOCSRCDIR), "/"), filename); + ldfile = lr_open (path, locfile_hash); + } + + if (ldfile == NULL) + { + result->failed = 1; + return result; + } + } + +#define HANDLE_COPY(category, token, string) \ + if (nowtok == tok_copy) \ + { \ + copy_category = category; \ + expected_tok = token; \ + expected_str = string; \ + state = 8; \ + continue; \ + } \ + ++state + +#define LOCALE_PROLOG(token, string) \ + if (nowtok == tok_eol) \ + /* Ignore empty lines. */ \ + continue; \ + if (nowtok == tok_end) \ + { \ + expected_tok = token; \ + expected_str = string; \ + state = 4; \ + continue; \ + } \ + if (nowtok == tok_copy) \ + goto only_copy; + + +#define READ_STRING(fn, errlabel) \ + do \ + { \ + arg = lr_token (ldfile, charset); \ + if (arg->tok != tok_string) \ + goto errlabel; \ + fn (ldfile, result, nowtok, arg, charset); \ + lr_ignore_rest (ldfile, 1); \ + } \ + while (0) + +#define READ_STRING_LIST(fn, errlabel) \ + do \ + { \ + arg = lr_token (ldfile, charset); \ + while (arg->tok == tok_string) \ + { \ + fn (ldfile, result, nowtok, arg, charset); \ + arg = lr_token (ldfile, charset); \ + if (arg->tok != tok_semicolon) \ + break; \ + arg = lr_token (ldfile, charset); \ + } \ + if (arg->tok != tok_eol) \ + goto errlabel; \ + } \ + while (0) + +#define READ_NUMBER(fn, errlabel) \ + do \ + { \ + arg = lr_token (ldfile, charset); \ + if (arg->tok != tok_minus1 && arg->tok != tok_number) \ + goto errlabel; \ + fn (ldfile, result, nowtok, arg, charset); \ + lr_ignore_rest (ldfile, 1); \ + } \ + while (0) + +#define READ_NUMBER_LIST(fn, errlabel) \ + do \ + { \ + arg = lr_token (ldfile, charset); \ + while (arg->tok == tok_minus1 || arg->tok == tok_number) \ + { \ + fn (ldfile, result, nowtok, arg, charset); \ + arg = lr_token (ldfile, charset); \ + if (arg->tok != tok_semicolon) \ + break; \ + arg = lr_token (ldfile, charset); \ + } \ + if (arg->tok != tok_eol) \ + goto errlabel; \ + } \ + while (0) + +#define SYNTAX_ERROR(string) \ + lr_error (ldfile, string); \ + lr_ignore_rest (ldfile, 0); + + + /* Parse locale definition file and store result in RESULT. */ + state = 1; + while (1) + { + /* What's on? */ + struct token *now = lr_token (ldfile, charset); + enum token_t nowtok = now->tok; + struct token *arg; + + if (nowtok == tok_eof) + break; + + switch (state) + { + case 1: + /* The beginning. We expect the special declarations, EOL or + the start of any locale. */ + if (nowtok == tok_eol) + /* Ignore empty lines. */ + continue; + + switch (nowtok) + { + case tok_escape_char: + case tok_comment_char: + /* We need an argument. */ + arg = lr_token (ldfile, charset); + + if (arg->tok != tok_ident) + { + SYNTAX_ERROR (_("bad argument")); + continue; + } + + if (arg->val.str.len != 1) + { + lr_error (ldfile, _("\ +argument to `%s' must be a single character"), + nowtok == tok_escape_char ? "escape_char" + : "comment_char"); + + lr_ignore_rest (ldfile, 0); + continue; + } + + if (nowtok == tok_escape_char) + ldfile->escape_char = *arg->val.str.start; + else + ldfile->comment_char = *arg->val.str.start; + break; + + case tok_lc_ctype: + state = 2; + break; + + case tok_lc_collate: + state = 10; + break; + + case tok_lc_monetary: + state = 20; + break; + + case tok_lc_numeric: + state = 30; + break; + + case tok_lc_time: + state = 40; + break; + + case tok_lc_messages: + state = 50; + break; + + default: + SYNTAX_ERROR (_("\ +syntax error: not inside a locale definition section")); + continue; + } + lr_ignore_rest (ldfile, 1); + continue; + + case 2: + HANDLE_COPY (LC_CTYPE, tok_lc_ctype, "LC_CYTPE"); + + ctype_startup (ldfile, result, charset); + /* FALLTHROUGH */ + + case 3: + /* Here we accept all the character classes, tolower/toupper, + and following ANSI C:1995 self-defined classes. */ + LOCALE_PROLOG (tok_lc_ctype, "LC_CTYPE"); + + if (nowtok == tok_charclass) + { + READ_STRING_LIST (ctype_class_new, bad_new_charclass); + continue; + bad_new_charclass: + SYNTAX_ERROR (_("\ +syntax error in definition of new character class")); + continue; + } + + if (nowtok == tok_charmap) + { + READ_STRING_LIST (ctype_map_new, bad_new_charmap); + continue; + bad_new_charmap: + SYNTAX_ERROR (_("\ +syntax error in definition of new character map")); + continue; + } + + if (nowtok == tok_upper || nowtok == tok_lower + || nowtok == tok_alpha || nowtok == tok_digit + || nowtok == tok_alnum || nowtok == tok_space + || nowtok == tok_cntrl || nowtok == tok_punct + || nowtok == tok_graph || nowtok == tok_print + || nowtok == tok_xdigit || nowtok == tok_blank) + { + ctype_tok_sym = nowtok; + ctype_tok_str = NULL; + state = 5; + continue; + } + + if (nowtok == tok_toupper|| nowtok == tok_tolower) + { + ctype_tok_sym = nowtok; + ctype_tok_str = NULL; + state = 6; + continue; + } + + if (nowtok != tok_ident) + goto bad_charclass; + + /* We possibly have a self-defined character class. */ + if (ctype_is_charclass (ldfile, result, now->val.str.start)) + { + ctype_tok_sym = nowtok; + ctype_tok_str = now->val.str.start; + state = 5; + continue; + } + + /* ...or a self-defined character map. */ + if (ctype_is_charmap (ldfile, result, now->val.str.start)) + { + ctype_tok_sym = nowtok; + ctype_tok_str = now->val.str.start; + state = 6; + continue; + } + + SYNTAX_ERROR (_("syntax error in definition of LC_CTYPE category")); + continue; + + case 4: + /* Handle `END xxx'. */ + if (nowtok != expected_tok) + lr_error (ldfile, _("\ +`%1$s' definition does not end with `END %1$s'"), expected_str); + + lr_ignore_rest (ldfile, nowtok == expected_tok); + state = 1; + continue; + + case 5: + /* Here we expect a semicolon separated list of bsymbols. The + bit to be set in the word is given in CHARCLASS_BIT. */ + arg = now; + + ctype_class_start (ldfile, result, ctype_tok_sym, ctype_tok_str, + charset); + + while (arg->tok != tok_eol) + { + /* Any token other than a bsymbol is an error. */ + if (arg->tok != tok_bsymbol) + { + bad_charclass: + SYNTAX_ERROR (_("\ +syntax error in character class definition")); + break; + } + + /* Lookup value for token and write into array. */ + ctype_class_from (ldfile, result, arg, charset); + + arg = lr_token (ldfile, charset); + if (arg->tok == tok_semicolon) + arg = lr_token (ldfile, charset); + else if (arg->tok != tok_eol) + goto bad_charclass; + + /* Look for ellipsis. */ + if (arg->tok == tok_ellipsis) + { + arg = lr_token (ldfile, charset); + if (arg->tok != tok_semicolon) + goto bad_charclass; + + arg = lr_token (ldfile, charset); + if (arg->tok != tok_bsymbol) + goto bad_charclass; + + /* Write range starting at LAST to ARG->VAL. */ + ctype_class_to (ldfile, result, arg, charset); + + arg = lr_token (ldfile, charset); + if (arg->tok == tok_semicolon) + arg = lr_token (ldfile, charset); + else if (arg->tok != tok_eol) + goto bad_charclass; + } + } + + /* Mark class as already seen. */ + ctype_class_end (ldfile, result); + state = 3; + + continue; + + case 6: + /* Here we expect a list of character mappings. Note: the + first opening brace is already matched. */ + ctype_map_start (ldfile, result, ctype_tok_sym, ctype_tok_str, + charset); + + while (1) + { + /* Match ( bsymbol , bsymbol ) */ + if (now->tok != tok_open_brace) + goto bad_charmap; + + now = lr_token (ldfile, charset); + if (now->tok != tok_bsymbol) + { + bad_charmap: + SYNTAX_ERROR (_("\ +syntax error in character mapping definition")); + state = 3; + break; + } + + /* Lookup arg and assign to FROM. */ + ctype_map_from (ldfile, result, now, charset); + + now = lr_token (ldfile, charset); + if (now->tok != tok_comma) + goto bad_charmap; + + now = lr_token (ldfile, charset); + if (now->tok != tok_bsymbol) + goto bad_charmap; + + /* Lookup arg and assign to TO. */ + ctype_map_to (ldfile, result, now, charset); + + now = lr_token (ldfile, charset); + if (now->tok != tok_close_brace) + goto bad_charmap; + + now = lr_token (ldfile, charset); + if (now->tok == tok_eol) + { + state = 3; + break; + } + if (now->tok != tok_semicolon) + goto bad_charmap; + + now = lr_token (ldfile, charset); + } + + ctype_map_end (ldfile, result); + continue; + + case 8: + { + /* We have seen `copy'. First match the argument. */ + int warned = 0; + + if (nowtok != tok_string) + lr_error (ldfile, _("expect string argument for `copy'")); + else + def_to_process (now->val.str.start, 1 << copy_category); + + lr_ignore_rest (ldfile, nowtok == tok_string); + + /* The rest of the line must be empty + and the next keyword must be `END xxx'. */ + + while (lr_token (ldfile, charset)->tok != tok_end) + { + if (warned == 0) + { + only_copy: + lr_error (ldfile, _("\ +no other keyword shall be specified when `copy' is used")); + warned = 1; + } + + lr_ignore_rest (ldfile, 0); + } + + state = 4; + } + continue; + + case 10: + HANDLE_COPY (LC_COLLATE, tok_lc_collate, "LC_COLLATE"); + + collate_startup (ldfile, result, charset); + /* FALLTHROUGH */ + + case 11: + /* Process the LC_COLLATE section. We expect `END LC_COLLATE' + any of the collation specifications, or any bsymbol. */ + LOCALE_PROLOG (tok_lc_collate, "LC_COLLATE"); + + if (nowtok == tok_order_start) + { + state = 12; + continue; + } + + if (nowtok != tok_collating_element + && nowtok != tok_collating_symbol) + { + bad_collation: + lr_error (ldfile, _("\ +syntax error in collation definition")); + lr_ignore_rest (ldfile, 0); + continue; + } + + /* Get argument. */ + arg = lr_token (ldfile, charset); + if (arg->tok != tok_bsymbol) + { + lr_error (ldfile, _("\ +collation symbol expected after `%s'"), + nowtok == tok_collating_element + ? "collating-element" : "collating-symbol"); + lr_ignore_rest (ldfile, 0); + continue; + } + + if (nowtok == tok_collating_element) + { + /* Save to-value as new name. */ + collate_element_to (ldfile, result, arg, charset); + + arg = lr_token (ldfile, charset); + if (arg->tok != tok_from) + { + lr_error (ldfile, _("\ +`from' expected after first argument to `collating-element'")); + lr_ignore_rest (ldfile, 0); + continue; + } + + arg = lr_token (ldfile, charset); + if (arg->tok != tok_string) + { + lr_error (ldfile, _("\ +from-value of `collating-element' must be a string")); + lr_ignore_rest (ldfile, 0); + continue; + } + + /* Enter new collating element. */ + collate_element_from (ldfile, result, arg, charset); + } + else + /* Enter new collating symbol into table. */ + collate_symbol (ldfile, result, arg, charset); + + lr_ignore_rest (ldfile, 1); + continue; + + case 12: + /* We parse the rest of the line containing `order_start'. + In any case we continue with parsing the symbols. */ + state = 13; + + cnt = 0; + while (now->tok != tok_eol) + { + int collation_method = 0; + + ++cnt; + + do + { + if (now->tok == tok_forward) + collation_method |= sort_forward; + else if (now->tok == tok_backward) + collation_method |= sort_backward; + else if (now->tok == tok_position) + collation_method |= sort_position; + else + { + lr_error (ldfile, _("unknown collation directive")); + lr_ignore_rest (ldfile, 0); + continue; + } + + now = lr_token (ldfile, charset); + } + while (now->tok == tok_comma + && (now == lr_token (ldfile, charset) != tok_none)); + + /* Check for consistency: forward and backwards are + mutually exclusive. */ + if ((collation_method & sort_forward) != 0 + && (collation_method & sort_backward) != 0) + { + lr_error (ldfile, _("\ +sorting order `forward' and `backward' are mutually exclusive")); + /* The recover clear the backward flag. */ + collation_method &= ~sort_backward; + } + + /* ??? I don't know whether this is correct but while + thinking about the `strcoll' functions I found that I + need a direction when performing position depended + collation. So I assume here that implicitly the + direction `forward' is given when `position' alone is + written. --drepper */ + if (collation_method == sort_position) + collation_method |= sort_forward; + + /* Enter info about next collation order. */ + collate_new_order (ldfile, result, collation_method); + + if (now->tok != tok_eol && now->tok != tok_semicolon) + { + lr_error (ldfile, _("\ +syntax error in `order_start' directive")); + lr_ignore_rest (ldfile, 0); + break; + } + + if (now->tok == tok_semicolon) + now = lr_token (ldfile, charset); + } + + /* If no argument to `order_start' is given, one `forward' + argument is implicitely assumed. */ + if (cnt == 0) + collate_new_order (ldfile, result, sort_forward); + + + /* We now know about all sorting rules. */ + collate_build_arrays (ldfile, result); + + continue; + + case 13: + /* We read one symbol a line until `order_end' is found. */ + { + static int last_correct = 1; + + if (nowtok == tok_order_end) + { + state = 14; + lr_ignore_rest (ldfile, 1); + continue; + } + + /* Ignore empty lines. */ + if (nowtok == tok_eol) + continue; + + if (nowtok != tok_bsymbol && nowtok != tok_undefined + && nowtok != tok_ellipsis) + { + if (last_correct == 1) + { + lr_error (ldfile, _("\ +syntax error in collating order definition")); + last_correct = 0; + } + lr_ignore_rest (ldfile, 0); + continue; + } + else + { + last_correct = 1; + + /* Remember current token. */ + if (collate_order_elem (ldfile, result, now, charset) < 0) + continue; + } + + /* Read optional arguments. */ + arg = lr_token (ldfile, charset); + while (arg->tok != tok_eol) + { + if (arg->tok != tok_ignore && arg->tok != tok_ellipsis + && arg->tok != tok_bsymbol && arg->tok != tok_string) + break; + + if (arg->tok == tok_ignore || arg->tok == tok_ellipsis + || arg->tok == tok_string) + { + /* Call handler for simple weights. */ + if (collate_simple_weight (ldfile, result, arg, charset) + < 0) + goto illegal_weight; + + arg = lr_token (ldfile, charset); + } + else + do + { + /* Collect char. */ + int ok = collate_weight_bsymbol (ldfile, result, arg, + charset); + if (ok < 0) + goto illegal_weight; + + arg = lr_token (ldfile, charset); + } + while (arg->tok == tok_bsymbol); + + /* Are there more weights? */ + if (arg->tok != tok_semicolon) + break; + + /* Yes, prepare next weight. */ + if (collate_next_weight (ldfile, result) < 0) + goto illegal_weight; + + arg = lr_token (ldfile, charset); + } + + if (arg->tok != tok_eol) + { + SYNTAX_ERROR (_("syntax error in order specification")); + } + + collate_end_weight (ldfile, result); + illegal_weight: + } + continue; + + case 14: + /* Following to the `order_end' keyword we don't expect + anything but the `END'. */ + if (nowtok == tok_eol) + continue; + + if (nowtok != tok_end) + goto bad_collation; + + expected_tok = tok_lc_collate; + expected_str = "LC_COLLATE"; + state = 4; + + ldfile->translate_strings = 1; + continue; + + case 20: + HANDLE_COPY (LC_MONETARY, tok_lc_monetary, "LC_MONETARY"); + + monetary_startup (ldfile, result, charset); + /* FALLTHROUGH */ + + case 21: + LOCALE_PROLOG (tok_lc_monetary, "LC_MONETARY"); + + switch (nowtok) + { + case tok_int_curr_symbol: + case tok_currency_symbol: + case tok_mon_decimal_point: + case tok_mon_thousands_sep: + case tok_positive_sign: + case tok_negative_sign: + READ_STRING (monetary_add, bad_monetary); + break; + + case tok_int_frac_digits: + case tok_frac_digits: + case tok_p_cs_precedes: + case tok_p_sep_by_space: + case tok_n_cs_precedes: + case tok_n_sep_by_space: + case tok_p_sign_posn: + case tok_n_sign_posn: + READ_NUMBER (monetary_add, bad_monetary); + break; + + case tok_mon_grouping: + /* We have a semicolon separated list of integers. */ + READ_NUMBER_LIST (monetary_add, bad_monetary); + break; + + default: + bad_monetary: + SYNTAX_ERROR (_("syntax error in monetary locale definition")); + } + continue; + + case 30: + HANDLE_COPY (LC_NUMERIC, tok_lc_numeric, "LC_NUMERIC"); + + numeric_startup (ldfile, result, charset); + /* FALLTHROUGH */ + + case 31: + LOCALE_PROLOG (tok_lc_numeric, "LC_NUMERIC"); + + switch (nowtok) + { + case tok_decimal_point: + case tok_thousands_sep: + READ_STRING (numeric_add, bad_numeric); + break; + + case tok_grouping: + /* We have a semicolon separated list of integers. */ + READ_NUMBER_LIST (numeric_add, bad_numeric); + break; + + default: + bad_numeric: + SYNTAX_ERROR (_("syntax error in numeric locale definition")); + } + continue; + + case 40: + HANDLE_COPY (LC_TIME, tok_lc_time, "LC_TIME"); + + time_startup (ldfile, result, charset); + /* FALLTHROUGH */ + + case 41: + LOCALE_PROLOG (tok_lc_time, "LC_TIME"); + + switch (nowtok) + { + case tok_abday: + case tok_day: + case tok_abmon: + case tok_mon: + case tok_am_pm: + case tok_alt_digits: + READ_STRING_LIST (time_add, bad_time); + continue; + + case tok_d_t_fmt: + case tok_d_fmt: + case tok_t_fmt: + case tok_t_fmt_ampm: + case tok_era: + case tok_era_year: + case tok_era_d_t_fmt: + case tok_era_d_fmt: + case tok_era_t_fmt: + READ_STRING (time_add, bad_time); + break; + + default: + bad_time: + SYNTAX_ERROR (_("syntax error in time locale definition")); + } + continue; + + case 50: + HANDLE_COPY (LC_MESSAGES, tok_lc_messages, "LC_MESSAGES"); + + messages_startup (ldfile, result, charset); + /* FALLTHROUGH */ + + case 51: + LOCALE_PROLOG (tok_lc_messages, "LC_MESSAGES"); + + switch (nowtok) + { + case tok_yesexpr: + case tok_noexpr: + case tok_yesstr: + case tok_nostr: + READ_STRING (messages_add, bad_message); + break; + + default: + bad_message: + SYNTAX_ERROR (_("syntax error in message locale definition")); + } + continue; + + default: + error (5, 0, _("%s: error in state machine"), __FILE__); + /* NOTREACHED */ + } + + break; + } + + /* We read all of the file. */ + lr_close (ldfile); + + /* Let's see what information is available. */ + for (cnt = LC_CTYPE; cnt <= LC_MESSAGES; ++cnt) + if (result->categories[cnt].generic != NULL) + result->avail |= 1 << cnt; + + return result; +} + + +void +check_all_categories (struct localedef_t *locale, struct charset_t *charset) +{ + /* Call the finishing functions for all locales. */ + if ((locale->binary & (1 << LC_CTYPE)) == 0) + ctype_finish (locale, charset); + if ((locale->binary & (1 << LC_COLLATE)) == 0) + collate_finish (locale, charset); + if ((locale->binary & (1 << LC_MONETARY)) == 0) + monetary_finish (locale); + if ((locale->binary & (1 << LC_NUMERIC)) == 0) + numeric_finish (locale); + if ((locale->binary & (1 << LC_TIME)) == 0) + time_finish (locale); + if ((locale->binary & (1 << LC_MESSAGES)) == 0) + messages_finish (locale); +} + + +void +write_all_categories (struct localedef_t *locale, const char *output_path) +{ + /* Call all functions to write locale data. */ + ctype_output (locale, output_path); + collate_output (locale, output_path); + monetary_output (locale, output_path); + numeric_output (locale, output_path); + time_output (locale, output_path); + messages_output (locale, output_path); +} + + +void +write_locale_data (const char *output_path, const char *category, + size_t n_elem, struct iovec *vec) +{ + size_t cnt, step; + int fd; + char *fname; + + asprintf (&fname, "%s/%s", output_path, category); + fd = creat (fname, 0666); + if (fd == -1) + { + int save_err = errno; + + if (errno == EISDIR) + { + free (fname); + asprintf (&fname, "%1$s/%2$s/SYS_%2$s", output_path, category); + fd = creat (fname, 0666); + if (fd == -1) + save_err = errno; + } + + if (fd == -1) + { + error (0, save_err, _("cannot open output file for category `%s'"), + category); + return; + } + } + free (fname); + + /* Write the data using writev. But we must take care for the + limitation of the implementation. */ + for (cnt = 0; cnt < n_elem; cnt += step) + { + /* XXX Fixme: should be in libc header. */ +#ifndef MAX_IOVEC +# define MAX_IOVEC 8 +#endif + step = MIN (MAX_IOVEC, n_elem - cnt); + + if (writev (fd, &vec[cnt], step) < 0) + { + error (0, errno, _("failure while writing data for category `%s'"), + category); + break; + } + } + + close (fd); +} diff --git a/locale/programs/locfile.h b/locale/programs/locfile.h new file mode 100644 index 0000000000..e337e961ed --- /dev/null +++ b/locale/programs/locfile.h @@ -0,0 +1,75 @@ +/* Copyright (C) 1996 Free Software Foundation, Inc. +This file is part of the GNU C Library. +Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>. + +The GNU C Library is free software; you can redistribute it and/or +modify it under the terms of the GNU Library General Public License as +published by the Free Software Foundation; either version 2 of the +License, or (at your option) any later version. + +The GNU C Library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Library General Public License for more details. + +You should have received a copy of the GNU Library General Public +License along with the GNU C Library; see the file COPYING.LIB. If +not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +#ifndef _LOCFILE_H +#define _LOCFILE_H + +#include <sys/uio.h> + +#include "charset.h" + +/* Opaque types for the different loales. */ +struct locale_ctype_t; +struct locale_collate_t; +struct locale_monetary_t; +struct locale_numeric_t; +struct locale_time_t; +struct locale_messages_t; + +struct localedef_t +{ + int failed; + + int avail; + int binary; + + union + { + void *generic; + struct locale_ctype_t *ctype; + struct locale_collate_t *collate; + struct locale_monetary_t *monetary; + struct locale_numeric_t *numeric; + struct locale_time_t *time; + struct locale_messages_t *messages; + } categories[6]; + + size_t len[6]; +}; + + +/* Found in localedef.c. */ +void def_to_process (const char *name, int category); + + +/* Found in locfile.c. */ +struct localedef_t *locfile_read (const char *filename, + struct charset_t *charset); + +void check_all_categories (struct localedef_t *locale, + struct charset_t *charset); + +void write_all_categories (struct localedef_t *locale, + const char *output_path); + + +void write_locale_data (const char *output_path, const char *category, + size_t n_elem, struct iovec *vec); + +#endif /* locfile.h */ diff --git a/locale/programs/stringtrans.c b/locale/programs/stringtrans.c new file mode 100644 index 0000000000..bff5aa41a2 --- /dev/null +++ b/locale/programs/stringtrans.c @@ -0,0 +1,146 @@ +/* Copyright (C) 1996 Free Software Foundation, Inc. +This file is part of the GNU C Library. +COntributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>. + +The GNU C Library is free software; you can redistribute it and/or +modify it under the terms of the GNU Library General Public License as +published by the Free Software Foundation; either version 2 of the +License, or (at your option) any later version. + +The GNU C Library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Library General Public License for more details. + +You should have received a copy of the GNU Library General Public +License along with the GNU C Library; see the file COPYING.LIB. If +not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <assert.h> +#include <stdlib.h> + +#include "charset.h" +#include "stringtrans.h" + + +/* Global variable. */ +enum encoding_method encoding_method = ENC_UCS4; + + +void *xmalloc (size_t __n); +void *xrealloc (void *__p, size_t __n); + + +#define ADDC(ch) \ + do \ + { \ + if (bufact == bufmax) \ + { \ + bufmax *= 2; \ + buf = xrealloc (buf, bufmax); \ + } \ + buf[bufact++] = (ch); \ + } \ + while (0) + + +char * +translate_string (char *str, struct charset_t *charset) +{ + char *buf; + size_t bufact = 0; + size_t bufmax = 56; + + buf = (char *) xmalloc (bufmax); + + while (str[0] != '\0') + { + char *tp; + unsigned int value; + + if (str[0] != '<') + { + ADDC (*str++); + continue; + } + + tp = &str[1]; + while (tp[0] != '\0' && tp[0] != '>') + if (tp[0] == '\\') + if (tp[1] != '\0') + tp += 2; + else + ++tp; + else + ++tp; + + if (tp[0] == '\0') + { + free (buf); + return NULL; + } + + value = charset_find_value (charset, str + 1, tp - (str + 1)); + if (value == ILLEGAL_CHAR_VALUE) + { + free (buf); + return NULL; + } + else + { + /* Encode string using current method. */ + char *cp; + + if (bufmax - bufact < 8) + { + bufmax *= 2; + buf = (char *) xrealloc (buf, bufmax); + } + + cp = &buf[bufact]; + if (encode_char (value, &cp) < 0) + { + free (buf); + return NULL; + } + bufact = cp - buf; + } + + str = &tp[1]; + } + + ADDC ('\0'); + + return buf;; +} + + +int +encode_char (unsigned int value, char **cpp) +{ + switch (encoding_method) + { + case ENC_UCS1: + if (value > 255) + return -11; + *(*cpp)++ = (char) value; + break; + + case ENC_UCS4: + *(*cpp)++ = (char) (value >> 24); + *(*cpp)++ = (char) ((value >> 16) & 0xff); + *(*cpp)++ = (char) ((value >> 8) & 0xff); + *(*cpp)++ = (char) (value & 0xff); + break; + + default: + return -1; + } + + return 0; +} diff --git a/locale/programs/stringtrans.h b/locale/programs/stringtrans.h new file mode 100644 index 0000000000..3576ce445c --- /dev/null +++ b/locale/programs/stringtrans.h @@ -0,0 +1,38 @@ +/* Copyright (C) 1996 Free Software Foundation, Inc. +This file is part of the GNU C Library. +Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>. + +The GNU C Library is free software; you can redistribute it and/or +modify it under the terms of the GNU Library General Public License as +published by the Free Software Foundation; either version 2 of the +License, or (at your option) any later version. + +The GNU C Library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Library General Public License for more details. + +You should have received a copy of the GNU Library General Public +License along with the GNU C Library; see the file COPYING.LIB. If +not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +#ifndef _TRANSLATE_H +#define _TRANSLATE_H 1 + +enum encoding_method +{ + ENC_UCS1, + ENC_UCS4 +}; + + +extern enum encoding_method encoding_method; + + +char *translate_string (char *__str, struct charset_t *__charset); + +int encode_char (unsigned int __value, char **__cpp); + + +#endif /* translate.h */ |