diff options
Diffstat (limited to 'locale/programs/ld-collate.c')
-rw-r--r-- | locale/programs/ld-collate.c | 1819 |
1 files changed, 1545 insertions, 274 deletions
diff --git a/locale/programs/ld-collate.c b/locale/programs/ld-collate.c index 265bfd0af1..3c1267420c 100644 --- a/locale/programs/ld-collate.c +++ b/locale/programs/ld-collate.c @@ -1,6 +1,6 @@ /* Copyright (C) 1995, 1996, 1997, 1998, 1999 Free Software Foundation, Inc. This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995. + Contributed by Ulrich Drepper <drepper@gnu.org>, 1995. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as @@ -21,32 +21,1034 @@ # include <config.h> #endif -#include <endian.h> -#include <errno.h> -#include <limits.h> -#include <locale.h> -#include <obstack.h> +#include <error.h> #include <stdlib.h> -#include <string.h> -#include <wchar.h> -#include <libintl.h> +#include "charmap.h" #include "localeinfo.h" -#include "locales.h" -#include "simple-hash.h" -#include "stringtrans.h" -#include "strlen-hash.h" +#include "linereader.h" +#include "locfile.h" +#include "localedef.h" /* Uncomment the following line in the production version. */ /* #define NDEBUG 1 */ #include <assert.h> +#define obstack_chunk_alloc malloc +#define obstack_chunk_free free + +/* Forward declaration. */ +struct element_t; + +/* Data type for list of strings. */ +struct section_list +{ + struct section_list *next; + /* Name of the section. */ + const char *name; + /* First element of this section. */ + struct element_t *first; + /* Last element of this section. */ + struct element_t *last; + /* These are the rules for this section. */ + enum coll_sort_rule *rules; +}; + +/* Data type for collating element. */ +struct element_t +{ + const char *mbs; + const uint32_t *wcs; + int order; + + struct element_t **weights; + + /* Where does the definition come from. */ + const char *file; + size_t line; + + /* Which section does this belong to. */ + struct section_list *section; + + /* Predecessor and successor in the order list. */ + struct element_t *last; + struct element_t *next; +}; + +/* Data type for collating symbol. */ +struct symbol_t +{ + /* Point to place in the order list. */ + struct element_t *order; + + /* Where does the definition come from. */ + const char *file; + size_t line; +}; + + +/* The real definition of the struct for the LC_COLLATE locale. */ +struct locale_collate_t +{ + int col_weight_max; + int cur_weight_max; + + /* List of known scripts. */ + struct section_list *sections; + /* Current section using definition. */ + struct section_list *current_section; + /* There always can be an unnamed section. */ + struct section_list unnamed_section; + /* To make handling of errors easier we have another section. */ + struct section_list error_section; + + /* Number of sorting rules given in order_start line. */ + uint32_t nrules; + + /* Start of the order list. */ + struct element_t *start; + + /* The undefined element. */ + struct element_t undefined; -#define MAX(a, b) ((a) > (b) ? (a) : (b)) + /* This is the cursor for `reorder_after' insertions. */ + struct element_t *cursor; -#define SWAPU32(w) \ - (((w) << 24) | (((w) & 0xff00) << 8) | (((w) >> 8) & 0xff00) | ((w) >> 24)) + /* Remember whether last weight was an ellipsis. */ + int was_ellipsis; + + /* Known collating elements. */ + hash_table elem_table; + + /* Known collating symbols. */ + hash_table sym_table; + + /* Known collation sequences. */ + hash_table seq_table; + + struct obstack mempool; + + /* The LC_COLLATE category is a bit special as it is sometimes possible + that the definitions from more than one input file contains information. + Therefore we keep all relevant input in a list. */ + struct locale_collate_t *next; +}; + + +/* We have a few global variables which are used for reading all + LC_COLLATE category descriptions in all files. */ +static int nrules; + + +static struct section_list * +make_seclist_elem (struct locale_collate_t *collate, const char *string, + struct section_list *next) +{ + struct section_list *newp; + + newp = (struct section_list *) obstack_alloc (&collate->mempool, + sizeof (*newp)); + newp->next = next; + newp->name = string; + newp->first = NULL; + + return newp; +} + + +static struct element_t * +new_element (struct locale_collate_t *collate, const char *mbs, + const uint32_t *wcs) +{ + struct element_t *newp; + + newp = (struct element_t *) obstack_alloc (&collate->mempool, + sizeof (*newp)); + newp->mbs = mbs; + newp->wcs = wcs; + newp->order = 0; + + newp->file = NULL; + newp->line = 0; + + newp->section = NULL; + + newp->last = NULL; + newp->next = NULL; + + return newp; +} + + +static struct symbol_t * +new_symbol (struct locale_collate_t *collate) +{ + struct symbol_t *newp; + newp = (struct symbol_t *) obstack_alloc (&collate->mempool, sizeof (*newp)); + + newp->order = NULL; + + newp->file = NULL; + newp->line = 0; + + return newp; +} + + +/* Test whether this name is already defined somewhere. */ +static int +check_duplicate (struct linereader *ldfile, struct locale_collate_t *collate, + struct charmap_t *charmap, struct repertoire_t *repertoire, + const char *symbol, size_t symbol_len) +{ + void *ignore = NULL; + + if (find_entry (&charmap->char_table, symbol, symbol_len, &ignore) == 0) + { + lr_error (ldfile, _("`%s' already defined in charmap"), symbol); + return 1; + } + + if (find_entry (&repertoire->char_table, symbol, symbol_len, &ignore) == 0) + { + lr_error (ldfile, _("`%s' already defined in repertoire"), symbol); + return 1; + } + + if (find_entry (&collate->sym_table, symbol, symbol_len, &ignore) == 0) + { + lr_error (ldfile, _("`%s' already defined as collating symbol"), symbol); + return 1; + } + + if (find_entry (&collate->elem_table, symbol, symbol_len, &ignore) == 0) + { + lr_error (ldfile, _("`%s' already defined as collating element"), + symbol); + return 1; + } + + return 0; +} + + +/* Read the direction specification. */ +static void +read_directions (struct linereader *ldfile, struct token *arg, + struct charmap_t *charmap, struct repertoire_t *repertoire, + struct locale_collate_t *collate) +{ + int cnt = 0; + int max = nrules ?: 10; + enum coll_sort_rule *rules = calloc (max, sizeof (*rules)); + int warned = 0; + + while (1) + { + int valid = 0; + + if (arg->tok == tok_forward) + { + if (rules[cnt] & sort_backward) + { + if (! warned) + { + lr_error (ldfile, _("\ +%s: `forward' and `backward' are mutually excluding each other"), + "LC_COLLATE"); + warned = 1; + } + } + else if (rules[cnt] & sort_forward) + { + if (! warned) + { + lr_error (ldfile, _("\ +%s: `%s' mentioned twice in definition of weight %d"), + "LC_COLLATE", "forward", cnt + 1); + } + } + else + rules[cnt] |= sort_forward; + + valid = 1; + } + else if (arg->tok == tok_backward) + { + if (rules[cnt] & sort_forward) + { + if (! warned) + { + lr_error (ldfile, _("\ +%s: `forward' and `backward' are mutually excluding each other"), + "LC_COLLATE"); + warned = 1; + } + } + else if (rules[cnt] & sort_backward) + { + if (! warned) + { + lr_error (ldfile, _("\ +%s: `%s' mentioned twice in definition of weight %d"), + "LC_COLLATE", "backward", cnt + 1); + } + } + else + rules[cnt] |= sort_backward; + + valid = 1; + } + else if (arg->tok == tok_position) + { + if (rules[cnt] & sort_position) + { + if (! warned) + { + lr_error (ldfile, _("\ +%s: `%s' mentioned twice in definition of weight %d in category `%s'"), + "LC_COLLATE", "position", cnt + 1); + } + } + else + rules[cnt] |= sort_position; + + valid = 1; + } + + if (arg->tok == tok_eof || arg->tok == tok_eol || arg->tok == tok_comma + || arg->tok == tok_semicolon) + { + if (! valid && ! warned) + { + lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE"); + warned = 1; + } + + /* See whether we have to increment the counter. */ + if (arg->tok != tok_comma && rules[cnt] != 0) + ++cnt; + + if (arg->tok == tok_eof || arg->tok == tok_eol) + /* End of line or file, so we exit the loop. */ + break; + + if (nrules == 0) + { + /* See whether we have enough room in the array. */ + if (cnt == max) + { + max += 10; + rules = (enum coll_sort_rule *) xrealloc (rules, + max + * sizeof (*rules)); + memset (&rules[cnt], '\0', (max - cnt) * sizeof (*rules)); + } + } + else + { + if (cnt == nrules) + { + /* There must not be any more rule. */ + if (! warned) + { + lr_error (ldfile, _("\ +%s: too many rules; first entry only had %d"), + "LC_COLLATE", nrules); + warned = 1; + } + + lr_ignore_rest (ldfile, 0); + break; + } + } + } + else + { + if (! warned) + { + lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE"); + warned = 1; + } + } + + arg = lr_token (ldfile, charmap, repertoire); + } + + if (nrules == 0) + { + /* Now we know how many rules we have. */ + nrules = cnt; + rules = (enum coll_sort_rule *) xrealloc (rules, + nrules * sizeof (*rules)); + } + else + { + if (cnt < nrules) + { + /* Not enough rules in this specification. */ + if (! warned) + lr_error (ldfile, _("%s: not enough sorting rules"), "LC_COLLATE"); + + do + rules[cnt] = sort_forward; + while (++cnt < nrules); + } + } + + collate->current_section->rules = rules; +} + + +static void +insert_value (struct linereader *ldfile, struct token *arg, + struct charmap_t *charmap, struct repertoire_t *repertoire, + struct locale_collate_t *collate) +{ + /* First find out what kind of symbol this is. */ + struct charseq *seq; + uint32_t wc; + struct element_t *elem = NULL; + int weight_cnt; + + /* First determine the wide character. There must be such a value, + otherwise we ignore it (if it is no collatio symbol or element). */ + wc = repertoire_find_value (repertoire, arg->val.str.startmb, + arg->val.str.lenmb); + + /* Try to find the character in the charmap. */ + seq = charmap_find_value (charmap, arg->val.str.startmb, arg->val.str.lenmb); + + if (wc == ILLEGAL_CHAR_VALUE) + { + /* It's no character, so look through the collation elements and + symbol list. */ + void *result; + + if (find_entry (&collate->sym_table, arg->val.str.startmb, + arg->val.str.lenmb, &result) == 0) + { + /* It's a collation symbol. */ + struct symbol_t *sym = (struct symbol_t *) result; + elem = sym->order; + } + else if (find_entry (&collate->elem_table, arg->val.str.startmb, + arg->val.str.lenmb, &result) != 0) + /* It's also no collation element. Therefore ignore it. */ + return; + } + + /* XXX elem must be defined. */ + + /* Test whether this element is not already in the list. */ + if (elem->next != NULL) + { + lr_error (ldfile, _("order for `%.*s' already defined at %s:%Z"), + arg->val.str.startmb, arg->val.str.lenmb, + elem->file, elem->line); + return; + } + + /* Initialize all the fields. */ + elem->file = ldfile->fname; + elem->line = ldfile->lineno; + elem->last = collate->cursor; + elem->next = collate->cursor ? collate->cursor->next : NULL; + elem->weights = (struct element_t **) + obstack_alloc (&collate->mempool, nrules * sizeof (struct element_t *)); + memset (elem->weights, '\0', nrules * sizeof (struct element_t *)); + + if (collate->current_section->first == NULL) + collate->current_section->first = elem; + if (collate->current_section->last == collate->cursor) + collate->current_section->last = elem; + + collate->cursor = elem; + + /* Now read the rest of the line. */ + ldfile->return_widestr = 1; + + weight_cnt = 0; + do + { + arg = lr_token (ldfile, charmap, repertoire); + + if (arg->tok == tok_eof || arg->tok == tok_eol) + { + /* This means the rest of the line uses the current element + as the weight. */ + do + elem->weights[weight_cnt] = elem; + while (++weight_cnt < nrules); + + return; + } + + if (arg->tok == tok_ignore) + { + /* The weight for this level has to be ignored. We use the + null pointer to indicate this. */ + } + else if (arg->tok == tok_bsymbol) + { + + } + } + while (++weight_cnt < nrules); + + lr_ignore_rest (ldfile, weight_cnt == nrules); +} + + +static void +collate_startup (struct linereader *ldfile, struct localedef_t *locale, + int ignore_content) +{ + if (!ignore_content) + { + struct locale_collate_t *collate; + + collate = locale->categories[LC_COLLATE].collate = + (struct locale_collate_t *) xcalloc (1, + sizeof (struct locale_collate_t)); + + /* Init the various data structures. */ + init_hash (&collate->elem_table, 100); + init_hash (&collate->sym_table, 100); + init_hash (&collate->seq_table, 500); + obstack_init (&collate->mempool); + + collate->col_weight_max = -1; + } + + ldfile->translate_strings = 1; + ldfile->return_widestr = 0; +} + + +void +collate_finish (struct localedef_t *locale, struct charmap_t *charmap) +{ +} + + +void +collate_output (struct localedef_t *locale, struct charmap_t *charmap, + const char *output_path) +{ +} + + +void +collate_read (struct linereader *ldfile, struct localedef_t *result, + struct charmap_t *charmap, const char *repertoire_name, + int ignore_content) +{ + struct repertoire_t *repertoire = NULL; + struct locale_collate_t *collate; + struct token *now; + struct token *arg; + enum token_t nowtok; + int state = 0; + int was_ellipsis = 0; + + /* Get the repertoire we have to use. */ + if (repertoire_name != NULL) + repertoire = repertoire_read (repertoire_name); + + /* The rest of the line containing `LC_COLLATE' must be free. */ + lr_ignore_rest (ldfile, 1); + + do + { + now = lr_token (ldfile, charmap, NULL); + nowtok = now->tok; + } + while (nowtok == tok_eol); + + if (nowtok == tok_copy) + { + state = 2; + now = lr_token (ldfile, charmap, NULL); + if (now->tok != tok_string) + goto err_label; + /* XXX Use the name */ + lr_ignore_rest (ldfile, 1); + + now = lr_token (ldfile, charmap, NULL); + nowtok = now->tok; + } + + /* Prepare the data structures. */ + collate_startup (ldfile, result, ignore_content); + collate = result->categories[LC_COLLATE].collate; + + while (1) + { + /* Of course we don't proceed beyond the end of file. */ + if (nowtok == tok_eof) + break; + + /* Ingore empty lines. */ + if (nowtok == tok_eol) + { + now = lr_token (ldfile, charmap, NULL); + nowtok = now->tok; + continue; + } + + switch (nowtok) + { + case tok_coll_weight_max: + if (state != 0) + goto err_label; + + arg = lr_token (ldfile, charmap, NULL); + if (arg->tok != tok_number) + goto err_label; + if (collate->col_weight_max != -1) + lr_error (ldfile, _("%s: duplicate definition of `%s'"), + "LC_COLLATE", "col_weight_max"); + else + collate->col_weight_max = arg->val.num; + lr_ignore_rest (ldfile, 1); + break; + + case tok_section_symbol: + if (state != 0) + goto err_label; + + arg = lr_token (ldfile, charmap, repertoire); + if (arg->tok != tok_bsymbol) + goto err_label; + else if (!ignore_content) + { + /* Check whether this section is already known. */ + struct section_list *known = collate->sections; + while (known != NULL) + if (strcmp (known->name, arg->val.str.startmb) == 0) + break; + + if (known != NULL) + { + lr_error (ldfile, + _("%s: duplicate declaration of section `%s'"), + "LC_COLLATE", arg->val.str.startmb); + free (arg->val.str.startmb); + } + else + collate->sections = make_seclist_elem (collate, + arg->val.str.startmb, + collate->sections); + + lr_ignore_rest (ldfile, known == NULL); + } + else + { + free (arg->val.str.startmb); + lr_ignore_rest (ldfile, 1); + } + break; + + case tok_collating_element: + if (state != 0) + goto err_label; + + arg = lr_token (ldfile, charmap, repertoire); + if (arg->tok != tok_bsymbol) + goto err_label; + else + { + const char *symbol = arg->val.str.startmb; + size_t symbol_len = arg->val.str.lenmb; + + /* Next the `from' keyword. */ + arg = lr_token (ldfile, charmap, repertoire); + if (arg->tok != tok_from) + { + free ((char *) symbol); + goto err_label; + } + + ldfile->return_widestr = 1; + + /* Finally the string with the replacement. */ + arg = lr_token (ldfile, charmap, repertoire); + ldfile->return_widestr = 0; + if (arg->tok != tok_string) + goto err_label; + + if (!ignore_content) + { + if (symbol == NULL) + lr_error (ldfile, _("\ +%s: unknown character in collating element name"), + "LC_COLLATE"); + if (arg->val.str.startmb == NULL) + lr_error (ldfile, _("\ +%s: unknown character in collating element definition"), + "LC_COLLATE"); + if (arg->val.str.startwc == NULL) + lr_error (ldfile, _("\ +%s: unknown wide character in collating element definition"), + "LC_COLLATE"); + else if (arg->val.str.lenwc < 2) + lr_error (ldfile, _("\ +%s: substitution string in collating element definition must have at least two characters"), + "LC_COLLATE"); + + if (symbol != NULL) + { + /* The name is already defined. */ + if (check_duplicate (ldfile, collate, charmap, + repertoire, symbol, symbol_len)) + goto col_elem_free; + + if (insert_entry (&collate->elem_table, + symbol, symbol_len, + new_element (collate, + arg->val.str.startmb, + arg->val.str.startwc)) + < 0) + lr_error (ldfile, _("\ +error while adding collating element")); + } + else + goto col_elem_free; + } + else + { + col_elem_free: + if (symbol != NULL) + free ((char *) symbol); + if (arg->val.str.startmb != NULL) + free (arg->val.str.startmb); + if (arg->val.str.startwc != NULL) + free (arg->val.str.startwc); + } + lr_ignore_rest (ldfile, 1); + } + break; + + case tok_collating_symbol: + if (state != 0) + goto err_label; + + arg = lr_token (ldfile, charmap, repertoire); + if (arg->tok != tok_bsymbol) + goto err_label; + else + { + const char *symbol = arg->val.str.startmb; + size_t symbol_len = arg->val.str.lenmb; + + if (!ignore_content) + { + if (symbol == NULL) + lr_error (ldfile, _("\ +%s: unknown character in collating symbol name"), + "LC_COLLATE"); + else + { + /* The name is already defined. */ + if (check_duplicate (ldfile, collate, charmap, + repertoire, symbol, symbol_len)) + goto col_sym_free; + + if (insert_entry (&collate->sym_table, + symbol, symbol_len, + new_symbol (collate)) < 0) + lr_error (ldfile, _("\ +error while adding collating symbol")); + } + } + else + { + col_sym_free: + if (symbol != NULL) + free ((char *) symbol); + } + lr_ignore_rest (ldfile, 1); + } + break; + + case tok_symbol_equivalence: + if (state != 0) + goto err_label; + + arg = lr_token (ldfile, charmap, repertoire); + if (arg->tok != tok_bsymbol) + goto err_label; + else + { + const char *newname = arg->val.str.startmb; + size_t newname_len = arg->val.str.lenmb; + const char *symname; + size_t symname_len; + struct symbol_t *symval; + + arg = lr_token (ldfile, charmap, repertoire); + if (arg->tok != tok_bsymbol) + { + if (newname != NULL) + free ((char *) newname); + goto err_label; + } + + symname = arg->val.str.startmb; + symname_len = arg->val.str.lenmb; + + if (!ignore_content) + { + if (newname == NULL) + { + lr_error (ldfile, _("\ +%s: unknown character in equivalent definition name"), + "LC_COLLATE"); + goto sym_equiv_free; + } + if (symname == NULL) + { + lr_error (ldfile, _("\ +%s: unknown character in equivalent definition value"), + "LC_COLLATE"); + goto sym_equiv_free; + } + /* The name is already defined. */ + if (check_duplicate (ldfile, collate, charmap, + repertoire, symname, symname_len)) + goto col_sym_free; + + /* See whether the symbol name is already defined. */ + if (find_entry (&collate->sym_table, symname, symname_len, + (void **) &symval) != 0) + { + lr_error (ldfile, _("\ +%s: unknown symbol `%s' in equivalent definition"), + "LC_COLLATE", symname); + goto col_sym_free; + } + + if (insert_entry (&collate->sym_table, + newname, newname_len, symval) < 0) + { + lr_error (ldfile, _("\ +error while adding equivalent collating symbol")); + goto sym_equiv_free; + } + + free ((char *) symname); + } + else + { + sym_equiv_free: + if (newname != NULL) + free ((char *) newname); + if (symname != NULL) + free ((char *) symname); + } + lr_ignore_rest (ldfile, 1); + } + break; + + case tok_order_start: + if (state != 0 && state != 1) + goto err_label; + state = 1; + + /* The 14652 draft does not specify whether all `order_start' lines + must contain the same number of sort-rules, but 14651 does. So + we require this here as well. */ + arg = lr_token (ldfile, charmap, repertoire); + if (arg->tok == tok_bsymbol) + { + /* This better should be a section name. */ + struct section_list *sp = collate->sections; + while (sp != NULL + && strcmp (sp->name, arg->val.str.startmb) != 0) + sp = sp->next; + + if (sp == NULL) + { + lr_error (ldfile, _("\ +%s: unknown section name `%s'"), + "LC_COLLATE", arg->val.str.startmb); + /* We use the error section. */ + collate->current_section = &collate->error_section; + } + else + { + /* Remember this section. */ + collate->current_section = sp; + + /* One should not be allowed to open the same + section twice. */ + if (sp->first != NULL) + lr_error (ldfile, _("\ +%s: multiple order definitions for section `%s'"), + "LC_COLLATE", sp->name); + + /* Next should come the end of the line or a semicolon. */ + arg = lr_token (ldfile, charmap, repertoire); + if (arg->tok == tok_eol) + { + uint32_t cnt; + + /* This means we have exactly one rule: `forward'. */ + if (collate->nrules > 1) + lr_error (ldfile, _("\ +%s: invalid number of sorting rules"), + "LC_COLLATE"); + else + collate->nrules = 1; + sp->rules = obstack_alloc (&collate->mempool, + (sizeof (enum coll_sort_rule) + * collate->nrules)); + for (cnt = 0; cnt < collate->nrules; ++cnt) + sp->rules[cnt] = sort_forward; + + /* Next line. */ + break; + } + + /* Get the next token. */ + arg = lr_token (ldfile, charmap, repertoire); + } + } + else + { + /* There is no section symbol. Therefore we use the unnamed + section. */ + collate->current_section = &collate->unnamed_section; + + if (collate->unnamed_section.first != NULL) + lr_error (ldfile, _("\ +%s: multiple order definitions for unnamed section"), + "LC_COLLATE"); + } + + /* Now read the direction names. */ + read_directions (ldfile, arg, charmap, repertoire, collate); + break; + + case tok_order_end: + if (state != 1) + goto err_label; + state = 2; + lr_ignore_rest (ldfile, 1); + break; + + case tok_reorder_after: + if (state != 2 && state != 3) + goto err_label; + state = 3; + /* XXX get symbol */ + break; + + case tok_reorder_end: + if (state != 3) + goto err_label; + state = 4; + lr_ignore_rest (ldfile, 1); + break; + + case tok_bsymbol: + if (state != 1 && state != 3) + goto err_label; + + if (state == 3) + { + /* It is possible that we already have this collation sequence. + In this case we move the entry. */ + struct element_t *seqp; + + if (find_entry (&collate->seq_table, arg->val.str.startmb, + arg->val.str.lenmb, (void **) &seqp) == 0) + { + /* Remove the entry from the old position. */ + if (seqp->last == NULL) + collate->start = seqp->next; + else + seqp->last->next = seqp->next; + if (seqp->next != NULL) + seqp->next->last = seqp->last; + + /* We also have to check whether this entry is the + first or last of a section. */ + if (seqp->section->first == seqp) + { + if (seqp->section->first == seqp->section->last) + /* This setion has no content anymore. */ + seqp->section->first = seqp->section->last = NULL; + else + seqp->section->first = seqp->next; + } + else if (seqp->section->last == seqp) + seqp->section->last = seqp->last; + + seqp->last = seqp->next = NULL; + } + } + + /* Now insert in the new place. */ + insert_value (ldfile, arg, charmap, repertoire, collate); + break; + + case tok_undefined: + if (state != 1) + goto err_label; + /* XXX handle UNDEFINED weight */ + break; + + case tok_ellipsis3: + if (state != 1 && state != 3) + goto err_label; + + was_ellipsis = 1; + /* XXX Read the remainder of the line and remember what are + the weights. */ + break; + + case tok_end: + /* Next we assume `LC_COLLATE'. */ + if (state == 0) + /* We must either see a copy statement or have ordering values. */ + lr_error (ldfile, _("%s: empty category description not allowed"), + "LC_COLLATE"); + else if (state == 1) + lr_error (ldfile, _("%s: missing `order_end' keyword"), + "LC_COLLATE"); + else if (state == 3) + error (0, 0, _("%s: missing `reorder-end' keyword"), + "LC_COLLATE"); + arg = lr_token (ldfile, charmap, NULL); + if (arg->tok == tok_eof) + break; + if (arg->tok == tok_eol) + lr_error (ldfile, _("%s: incomplete `END' line"), "LC_COLLATE"); + else if (arg->tok != tok_lc_collate) + lr_error (ldfile, _("\ +%1$s: definition does not end with `END %1$s'"), "LC_COLLATE"); + lr_ignore_rest (ldfile, arg->tok == tok_lc_collate); + return; + + default: + err_label: + SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE"); + } + + /* Prepare for the next round. */ + now = lr_token (ldfile, charmap, NULL); + nowtok = now->tok; + } + + /* When we come here we reached the end of the file. */ + lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE"); +} + + +#if 0 /* What kind of symbols get defined? */ enum coll_symbol @@ -75,7 +1077,8 @@ typedef struct patch_t typedef struct element_t { - const wchar_t *name; + const char *namemb; + const uint32_t *namewc; unsigned int this_weight; struct element_t *next; @@ -95,12 +1098,12 @@ struct locale_collate_t hash_table elements; struct obstack element_mem; - /* The result table. */ - hash_table result; + /* The result tables. */ + hash_table resultmb; + hash_table resultwc; /* Sorting rules given in order_start line. */ - u_int32_t nrules; - u_int32_t nrules_max; + uint32_t nrules; enum coll_sort_rule *rules; /* Used while recognizing symbol composed of multiple tokens @@ -114,20 +1117,12 @@ struct locale_collate_t /* Was lastline ellipsis? */ int was_ellipsis; /* Value of last entry if was character. */ - wchar_t last_char; + uint32_t last_char; /* Current element. */ element_t *current_element; /* What kind of symbol is current element. */ enum coll_symbol kind; - /* While collecting the weights we need some temporary space. */ - unsigned int current_order; - int *weight_cnt; - unsigned int weight_idx; - unsigned int *weight; - size_t nweight; - size_t nweight_max; - /* Patch lists. */ patch_t *current_patch; patch_t *all_patches; @@ -135,6 +1130,10 @@ struct locale_collate_t /* Room for the UNDEFINED information. */ element_t undefined; unsigned int undefined_len; + + /* Script information. */ + const char **scripts; + unsigned int nscripts; }; @@ -142,25 +1141,22 @@ struct locale_collate_t extern int verbose; -void *xmalloc (size_t __n); -void *xrealloc (void *__p, size_t __n); - #define obstack_chunk_alloc malloc #define obstack_chunk_free free -void -collate_startup (struct linereader *lr, struct localedef_t *locale, - struct charset_t *charset) -{ - struct locale_collate_t *collate; +/* Prototypes for local functions. */ +static void collate_startup (struct linereader *ldfile, + struct localedef_t *locale, + struct charmap_t *charmap, int ignore_content); - /* We have a definition for LC_COLLATE. */ - copy_posix.mask &= ~(1 << LC_COLLATE); - /* It is important that we always use UCS4 encoding for strings now. */ - encoding_method = ENC_UCS4; +static void +collate_startup (struct linereader *ldfile, struct localedef_t *locale, + struct charmap_t *charset, int ignore_content) +{ + struct locale_collate_t *collate; /* Allocate the needed room. */ locale->categories[LC_COLLATE].collate = collate = @@ -196,12 +1192,14 @@ collate_startup (struct linereader *lr, struct localedef_t *locale, /* This tells us no UNDEFINED entry was found until now. */ memset (&collate->undefined, '\0', sizeof (collate->undefined)); - lr->translate_strings = 0; + ldfile->translate_strings = 0; + ldfile->return_widestr = 0; } void -collate_finish (struct localedef_t *locale, struct charset_t *charset) +collate_finish (struct localedef_t *locale, struct charset_t *charset, + struct repertoire_t *repertoire) { struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate; patch_t *patch; @@ -211,7 +1209,7 @@ collate_finish (struct localedef_t *locale, struct charset_t *charset) correctly filled. */ for (patch = collate->all_patches; patch != NULL; patch = patch->next) { - wchar_t wch; + uint32_t wch; size_t toklen = strlen (patch->token); void *ptmp; unsigned int value = 0; @@ -221,7 +1219,7 @@ collate_finish (struct localedef_t *locale, struct charset_t *charset) { element_t *runp; - if (find_entry (&collate->result, &wch, sizeof (wchar_t), + if (find_entry (&collate->result, &wch, sizeof (uint32_t), (void *) &runp) < 0) runp = NULL; for (; runp != NULL; runp = runp->next) @@ -262,9 +1260,9 @@ collate_finish (struct localedef_t *locale, struct charset_t *charset) |* XXX We should test whether really an unspecified character *| |* exists before giving the message. *| \**************************************************************/ - u_int32_t weight; + uint32_t weight; - if (/* XXX Remove the 0 & */ 0 && !be_quiet) + if (!be_quiet) error (0, 0, _("no definition of `UNDEFINED'")); collate->undefined.ordering_len = collate->nrules; @@ -272,7 +1270,7 @@ collate_finish (struct localedef_t *locale, struct charset_t *charset) for (cnt = 0; cnt < collate->nrules; ++cnt) { - u_int32_t one = 1; + uint32_t one = 1; obstack_grow (&collate->element_mem, &one, sizeof (one)); } @@ -282,7 +1280,7 @@ collate_finish (struct localedef_t *locale, struct charset_t *charset) collate->undefined.ordering = obstack_finish (&collate->element_mem); } - collate->undefined_len = 2; /* For the name: 1 x wchar_t + L'\0'. */ + collate->undefined_len = 2; /* For the name: 1 x uint32_t + L'\0'. */ for (cnt = 0; cnt < collate->nrules; ++cnt) collate->undefined_len += 1 + collate->undefined.ordering[cnt]; } @@ -291,40 +1289,40 @@ collate_finish (struct localedef_t *locale, struct charset_t *charset) void collate_output (struct localedef_t *locale, struct charset_t *charset, - const char *output_path) + struct repertoire_t *repertoire, const char *output_path) { struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate; - u_int32_t table_size, table_best, level_best, sum_best; + uint32_t table_size, table_best, level_best, sum_best; void *last; element_t *pelem; - wchar_t *name; + uint32_t *name; size_t len; const size_t nelems = _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE); struct iovec iov[2 + nelems]; struct locale_file data; - u_int32_t idx[nelems]; + uint32_t idx[nelems]; struct obstack non_simple; struct obstack string_pool; size_t cnt, entry_size; - u_int32_t undefined_offset = UINT_MAX; - u_int32_t *table, *extra, *table2, *extra2; + uint32_t undefined_offset = UINT_MAX; + uint32_t *table, *extra, *table2, *extra2; size_t extra_len; - u_int32_t element_hash_tab_size; - u_int32_t *element_hash_tab; - u_int32_t *element_hash_tab_ob; - u_int32_t element_string_pool_size; + uint32_t element_hash_tab_size; + uint32_t *element_hash_tab; + uint32_t *element_hash_tab_ob; + uint32_t element_string_pool_size; char *element_string_pool; - u_int32_t element_value_size; - wchar_t *element_value; - wchar_t *element_value_ob; - u_int32_t symbols_hash_tab_size; - u_int32_t *symbols_hash_tab; - u_int32_t *symbols_hash_tab_ob; - u_int32_t symbols_string_pool_size; + uint32_t element_value_size; + uint32_t *element_value; + uint32_t *element_value_ob; + uint32_t symbols_hash_tab_size; + uint32_t *symbols_hash_tab; + uint32_t *symbols_hash_tab_ob; + uint32_t symbols_string_pool_size; char *symbols_string_pool; - u_int32_t symbols_class_size; - u_int32_t *symbols_class; - u_int32_t *symbols_class_ob; + uint32_t symbols_class_size; + uint32_t *symbols_class; + uint32_t *symbols_class_ob; hash_table *hash_tab; unsigned int dummy_weights[collate->nrules + 1]; @@ -382,29 +1380,29 @@ Computing table size for collation information might take a while..."), iov[1].iov_len = sizeof (idx); iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_NRULES)].iov_base = &collate->nrules; - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_NRULES)].iov_len = sizeof (u_int32_t); + iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_NRULES)].iov_len = sizeof (uint32_t); - table = (u_int32_t *) alloca (collate->nrules * sizeof (u_int32_t)); + table = (uint32_t *) alloca (collate->nrules * sizeof (uint32_t)); iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_RULES)].iov_base = table; iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_RULES)].iov_len - = collate->nrules * sizeof (u_int32_t); + = collate->nrules * sizeof (uint32_t); /* Another trick here. Describing the collation method needs only a few bits (3, to be exact). But the binary file should be accessible by machines with both endianesses and so we store both forms in the same word. */ for (cnt = 0; cnt < collate->nrules; ++cnt) - table[cnt] = collate->rules[cnt] | SWAPU32 (collate->rules[cnt]); + table[cnt] = collate->rules[cnt] | bswap_32 (collate->rules[cnt]); iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_SIZE)].iov_base = &table_best; - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_SIZE)].iov_len = sizeof (u_int32_t); + iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_SIZE)].iov_len = sizeof (uint32_t); iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_LAYERS)].iov_base = &level_best; iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_LAYERS)].iov_len - = sizeof (u_int32_t); + = sizeof (uint32_t); entry_size = 1 + MAX (collate->nrules, 2); - table = (u_int32_t *) alloca (table_best * level_best * entry_size + table = (uint32_t *) alloca (table_best * level_best * entry_size * sizeof (table[0])); memset (table, '\0', table_best * level_best * entry_size * sizeof (table[0])); @@ -413,7 +1411,7 @@ Computing table size for collation information might take a while..."), /* Macros for inserting in output table. */ #define ADD_VALUE(expr) \ do { \ - u_int32_t to_write = (u_int32_t) expr; \ + uint32_t to_write = (uint32_t) expr; \ obstack_grow (&non_simple, &to_write, sizeof (to_write)); \ } while (0) @@ -424,7 +1422,7 @@ Computing table size for collation information might take a while..."), ADD_VALUE (len); \ \ wlen = wcslen (pelem->name); \ - obstack_grow (&non_simple, pelem->name, (wlen + 1) * sizeof (u_int32_t)); \ + obstack_grow (&non_simple, pelem->name, (wlen + 1) * sizeof (uint32_t)); \ \ idx = collate->nrules; \ for (cnt = 0; cnt < collate->nrules; ++cnt) \ @@ -448,14 +1446,14 @@ Computing table size for collation information might take a while..."), table[(level * table_best + slot) * entry_size + 1] \ = FORWARD_CHAR; \ table[(level * table_best + slot) * entry_size + 2] \ - = obstack_object_size (&non_simple) / sizeof (u_int32_t); \ + = obstack_object_size (&non_simple) / sizeof (uint32_t); \ \ /* Here we have to construct the non-simple table entry. First \ compute the total length of this entry. */ \ for (runp = (pelem); runp != NULL; runp = runp->next) \ if (runp->ordering != NULL) \ { \ - u_int32_t value; \ + uint32_t value; \ size_t cnt; \ \ value = 1 + wcslen (runp->name) + 1; \ @@ -491,7 +1489,7 @@ Computing table size for collation information might take a while..."), ADD_VALUE (collate->undefined.ordering[cnt]); \ for (disp = 0; disp < collate->undefined.ordering[cnt]; ++disp) \ { \ - if ((wchar_t) collate->undefined.ordering[idx] \ + if ((uint32_t) collate->undefined.ordering[idx] \ == ELLIPSIS_CHAR) \ ADD_VALUE ((pelem)->name[0]); \ else \ @@ -543,14 +1541,15 @@ Computing table size for collation information might take a while..."), { /* We have to fill in the information from the UNDEFINED entry. */ - table[cnt * entry_size] = (u_int32_t) cnt; + table[cnt * entry_size] = (uint32_t) cnt; if (collate->undefined.ordering_len == collate->nrules) { size_t inner; for (inner = 0; inner < collate->nrules; ++inner) - if ((wchar_t)collate->undefined.ordering[collate->nrules + inner] + if ((uint32_t)collate->undefined.ordering[collate->nrules + + inner] == ELLIPSIS_CHAR) table[cnt * entry_size + 1 + inner] = cnt; else @@ -609,8 +1608,6 @@ Computing table size for collation information might take a while..."), size_t idx, cnt; undefined_offset = obstack_object_size (&non_simple); - assert (undefined_offset % sizeof (u_int32_t) == 0); - undefined_offset /= sizeof (u_int32_t); idx = collate->nrules; for (cnt = 0; cnt < collate->nrules; ++cnt) @@ -625,19 +1622,19 @@ Computing table size for collation information might take a while..."), /* Finish the extra block. */ extra_len = obstack_object_size (&non_simple); - extra = (u_int32_t *) obstack_finish (&non_simple); - assert ((extra_len % sizeof (u_int32_t)) == 0); + extra = (uint32_t *) obstack_finish (&non_simple); + assert ((extra_len % sizeof (uint32_t)) == 0); /* Now we have to build the two array for the other byte ordering. */ - table2 = (u_int32_t *) alloca (table_best * level_best * entry_size + table2 = (uint32_t *) alloca (table_best * level_best * entry_size * sizeof (table[0])); - extra2 = (u_int32_t *) alloca (extra_len); + extra2 = (uint32_t *) alloca (extra_len); for (cnt = 0; cnt < table_best * level_best * entry_size; ++cnt) - table2[cnt] = SWAPU32 (table[cnt]); + table2[cnt] = bswap_32 (table[cnt]); - for (cnt = 0; cnt < extra_len / sizeof (u_int32_t); ++cnt) - extra2[cnt] = SWAPU32 (extra[cnt]); + for (cnt = 0; cnt < extra_len / sizeof (uint32_t); ++cnt) + extra2[cnt] = bswap_32 (extra2[cnt]); /* We need a simple hashing table to get a collation-element->chars mapping. We again use internal hashing using a secondary hashing @@ -687,9 +1684,9 @@ Computing table size for collation information might take a while..."), element_hash_tab_size = 7; element_hash_tab = obstack_alloc (&non_simple, (2 * element_hash_tab_size - * sizeof (u_int32_t))); + * sizeof (uint32_t))); memset (element_hash_tab, '\377', (2 * element_hash_tab_size - * sizeof (u_int32_t))); + * sizeof (uint32_t))); ptr = NULL; while (iterate_table (&collate->elements, &ptr, (const void **) &key, @@ -698,7 +1695,7 @@ Computing table size for collation information might take a while..."), size_t hash_val = hash_string (key, keylen); size_t idx = hash_val % element_hash_tab_size; - if (element_hash_tab[2 * idx] != (~((u_int32_t) 0))) + if (element_hash_tab[2 * idx] != (~((uint32_t) 0))) { /* We need the second hashing function. */ size_t c = 1 + (hash_val % (element_hash_tab_size - 2)); @@ -708,16 +1705,16 @@ Computing table size for collation information might take a while..."), idx -= element_hash_tab_size - c; else idx += c; - while (element_hash_tab[2 * idx] != (~((u_int32_t) 0))); + while (element_hash_tab[2 * idx] != (~((uint32_t) 0))); } element_hash_tab[2 * idx] = obstack_object_size (&non_simple); element_hash_tab[2 * idx + 1] = (obstack_object_size (&string_pool) - / sizeof (wchar_t)); + / sizeof (uint32_t)); obstack_grow0 (&non_simple, key, keylen); obstack_grow (&string_pool, data->name, - (wcslen (data->name) + 1) * sizeof (wchar_t)); + (wcslen (data->name) + 1) * sizeof (uint32_t)); } if (obstack_object_size (&non_simple) % 4 != 0) @@ -732,18 +1729,13 @@ Computing table size for collation information might take a while..."), /* Create the tables for the other byte order. */ element_hash_tab_ob = obstack_alloc (&non_simple, (2 * element_hash_tab_size - * sizeof (u_int32_t))); + * sizeof (uint32_t))); for (cnt = 0; cnt < 2 * element_hash_tab_size; ++cnt) - element_hash_tab_ob[cnt] = SWAPU32 (element_hash_tab[cnt]); + element_hash_tab_ob[cnt] = bswap_U32 (element_hash_tab[cnt]); element_value_ob = obstack_alloc (&string_pool, element_value_size); - if (sizeof (wchar_t) != 4) - { - fputs ("sizeof (wchar_t) != 4 currently not handled", stderr); - abort (); - } for (cnt = 0; cnt < element_value_size / 4; ++cnt) - element_value_ob[cnt] = SWAPU32 (element_value[cnt]); + element_value_ob[cnt] = bswap_32 (element_value[cnt]); } /* Store collation elements as map to collation class. There are @@ -757,9 +1749,9 @@ Computing table size for collation information might take a while..."), + collate->elements.filled + collate->symbols.filled)) / 3); symbols_hash_tab = obstack_alloc (&non_simple, (2 * symbols_hash_tab_size - * sizeof (u_int32_t))); + * sizeof (uint32_t))); memset (symbols_hash_tab, '\377', (2 * symbols_hash_tab_size - * sizeof (u_int32_t))); + * sizeof (uint32_t))); /* Now fill the array. First the symbols from the character set, then the collation elements and last the collation symbols. */ @@ -777,29 +1769,29 @@ Computing table size for collation information might take a while..."), { size_t hash_val; size_t idx; - u_int32_t word; + uint32_t word; unsigned int *weights; if (hash_tab == &charset->char_table || hash_tab == &collate->elements) { element_t *lastp, *firstp; - wchar_t dummy_name[2]; - const wchar_t *name; + uint32_t dummy_name[2]; + const uint32_t *name; size_t name_len; if (hash_tab == &charset->char_table) { - dummy_name[0] = (wchar_t) ((unsigned long int) data); + dummy_name[0] = (uint32_t) ((unsigned long int) data); dummy_name[1] = L'\0'; name = dummy_name; - name_len = sizeof (wchar_t); + name_len = sizeof (uint32_t); } else { element_t *elemp = (element_t *) data; name = elemp->name; - name_len = wcslen (name) * sizeof (wchar_t); + name_len = wcslen (name) * sizeof (uint32_t); } /* First check whether this character is used at all. */ @@ -815,8 +1807,6 @@ Computing table size for collation information might take a while..."), lastp = firstp; while (lastp->next != NULL && wcscmp (name, lastp->name)) lastp = lastp->next; - if (lastp->ordering == NULL) - lastp = &collate->undefined; } weights = lastp->ordering; @@ -835,7 +1825,7 @@ Computing table size for collation information might take a while..."), hash_val = hash_string (key, keylen); idx = hash_val % symbols_hash_tab_size; - if (symbols_hash_tab[2 * idx] != (~((u_int32_t) 0))) + if (symbols_hash_tab[2 * idx] != (~((uint32_t) 0))) { /* We need the second hashing function. */ size_t c = 1 + (hash_val % (symbols_hash_tab_size - 2)); @@ -845,23 +1835,23 @@ Computing table size for collation information might take a while..."), idx -= symbols_hash_tab_size - c; else idx += c; - while (symbols_hash_tab[2 * idx] != (~((u_int32_t) 0))); + while (symbols_hash_tab[2 * idx] != (~((uint32_t) 0))); } symbols_hash_tab[2 * idx] = obstack_object_size (&string_pool); symbols_hash_tab[2 * idx + 1] = (obstack_object_size (&non_simple) - / sizeof (u_int32_t)); + / sizeof (uint32_t)); obstack_grow0 (&string_pool, key, keylen); /* Adding the first weight looks complicated. We have to deal with the kind it is stored and with the fact that original - form uses `unsigned int's while we need `u_int32_t' here. */ + form uses `unsigned int's while we need `uint32_t' here. */ word = weights[0]; - obstack_grow (&non_simple, &word, sizeof (u_int32_t)); + obstack_grow (&non_simple, &word, sizeof (uint32_t)); for (cnt = 0; cnt < weights[0]; ++cnt) { word = weights[collate->nrules + cnt]; - obstack_grow (&non_simple, &word, sizeof (u_int32_t)); + obstack_grow (&non_simple, &word, sizeof (uint32_t)); } } @@ -884,13 +1874,13 @@ Computing table size for collation information might take a while..."), /* Generate tables with other byte order. */ symbols_hash_tab_ob = obstack_alloc (&non_simple, (2 * symbols_hash_tab_size - * sizeof (u_int32_t))); + * sizeof (uint32_t))); for (cnt = 0; cnt < 2 * symbols_hash_tab_size; ++cnt) - symbols_hash_tab_ob[cnt] = SWAPU32 (symbols_hash_tab[cnt]); + symbols_hash_tab_ob[cnt] = bswap_32 (symbols_hash_tab[cnt]); symbols_class_ob = obstack_alloc (&non_simple, symbols_class_size); for (cnt = 0; cnt < symbols_class_size / 4; ++cnt) - symbols_class_ob[cnt] = SWAPU32 (symbols_class[cnt]); + symbols_class_ob[cnt] = bswap_32 (symbols_class[cnt]); /* Store table addresses and lengths. */ @@ -925,34 +1915,34 @@ Computing table size for collation information might take a while..."), #endif iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_UNDEFINED)].iov_base = &undefined_offset; - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_UNDEFINED)].iov_len = sizeof (u_int32_t); + iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_UNDEFINED)].iov_len = sizeof (uint32_t); iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_SIZE)].iov_base = &element_hash_tab_size; iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_SIZE)].iov_len - = sizeof (u_int32_t); + = sizeof (uint32_t); #if __BYTE_ORDER == __BIG_ENDIAN iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EB)].iov_base = element_hash_tab; iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EB)].iov_len - = 2 * element_hash_tab_size * sizeof (u_int32_t); + = 2 * element_hash_tab_size * sizeof (uint32_t); iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EL)].iov_base = element_hash_tab_ob; iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EL)].iov_len - = 2 * element_hash_tab_size * sizeof (u_int32_t); + = 2 * element_hash_tab_size * sizeof (uint32_t); #else iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EL)].iov_base = element_hash_tab; iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EL)].iov_len - = 2 * element_hash_tab_size * sizeof (u_int32_t); + = 2 * element_hash_tab_size * sizeof (uint32_t); iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EB)].iov_base = element_hash_tab_ob; iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EB)].iov_len - = 2 * element_hash_tab_size * sizeof (u_int32_t); + = 2 * element_hash_tab_size * sizeof (uint32_t); #endif iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_STR_POOL)].iov_base @@ -985,28 +1975,28 @@ Computing table size for collation information might take a while..."), iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZE)].iov_base = &symbols_hash_tab_size; iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZE)].iov_len - = sizeof (u_int32_t); + = sizeof (uint32_t); #if __BYTE_ORDER == __BIG_ENDIAN iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EB)].iov_base = symbols_hash_tab; iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EB)].iov_len - = 2 * symbols_hash_tab_size * sizeof (u_int32_t); + = 2 * symbols_hash_tab_size * sizeof (uint32_t); iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EL)].iov_base = symbols_hash_tab_ob; iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EL)].iov_len - = 2 * symbols_hash_tab_size * sizeof (u_int32_t); + = 2 * symbols_hash_tab_size * sizeof (uint32_t); #else iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EL)].iov_base = symbols_hash_tab; iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EL)].iov_len - = 2 * symbols_hash_tab_size * sizeof (u_int32_t); + = 2 * symbols_hash_tab_size * sizeof (uint32_t); iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EB)].iov_base = symbols_hash_tab_ob; iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EB)].iov_len - = 2 * symbols_hash_tab_size * sizeof (u_int32_t); + = 2 * symbols_hash_tab_size * sizeof (uint32_t); #endif iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_STR_POOL)].iov_base @@ -1048,58 +2038,64 @@ Computing table size for collation information might take a while..."), } -void -collate_element_to (struct linereader *lr, struct localedef_t *locale, - struct token *code, struct charset_t *charset) +static int +collate_element_to (struct linereader *ldfile, + struct locale_collate_t *collate, + struct token *code, struct charmap_t *charmap, + struct repertoire_t *repertoire) { - struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate; - unsigned int value; + struct charseq *seq; + uint32_t value; void *not_used; - if (collate->combine_token != NULL) + seq = charmap_find_value (charmap, code->val.str.start, code->val.str.len); + if (seq != NULL) { - free ((void *) collate->combine_token); - collate->combine_token = NULL; + lr_error (ldfile, _("symbol for multicharacter collating element " + "`%.*s' duplicates symbolic name in charmap"), + (int) code->val.str.len, code->val.str.start); + return 1; } - value = charset_find_value (&charset->char_table, code->val.str.start, - code->val.str.len); - if ((wchar_t) value != ILLEGAL_CHAR_VALUE) + value = repertoire_find_value (repertoire, code->val.str.start, + code->val.str.len); + if (value != ILLEGAL_CHAR_VALUE) { - lr_error (lr, _("symbol for multicharacter collating element " - "`%.*s' duplicates symbolic name in charset"), + lr_error (ldfile, _("symbol for multicharacter collating element " + "`%.*s' duplicates symbolic name in repertoire"), (int) code->val.str.len, code->val.str.start); - return; + return 1; } if (find_entry (&collate->elements, code->val.str.start, code->val.str.len, ¬_used) >= 0) { - lr_error (lr, _("symbol for multicharacter collating element " - "`%.*s' duplicates element definition"), + lr_error (ldfile, _("symbol for multicharacter collating element " + "`%.*s' duplicates other element definition"), (int) code->val.str.len, code->val.str.start); - return; + return 1; } if (find_entry (&collate->elements, code->val.str.start, code->val.str.len, ¬_used) >= 0) { - lr_error (lr, _("symbol for multicharacter collating element " + lr_error (ldfile, _("symbol for multicharacter collating element " "`%.*s' duplicates symbol definition"), (int) code->val.str.len, code->val.str.start); - return; + return 1; } - collate->combine_token = code->val.str.start; - collate->combine_token_len = code->val.str.len; + return 0; } -void -collate_element_from (struct linereader *lr, struct localedef_t *locale, - struct token *code, struct charset_t *charset) +static void +collate_element_from (struct linereader *ldfile, + struct locale_collate_t *collate, + const char *to_str, struct token *code, + struct charmap_t *charmap, + struct repertoire_t *repertoire) { - struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate; element_t *elemp, *runp; /* CODE is a string. */ @@ -1108,33 +2104,26 @@ collate_element_from (struct linereader *lr, struct localedef_t *locale, /* We have to translate the string. It may contain <...> character names. */ - elemp->name = (wchar_t *) translate_string (code->val.str.start, charset); + elemp->namemb = code->val.str.startmb; + elemp->namewc = code->val.str.startwc; elemp->this_weight = 0; elemp->ordering = NULL; elemp->ordering_len = 0; - free (code->val.str.start); - - if (elemp->name == NULL) + if (elemp->namemb == NULL && elemp->namewc == NULL) { - /* At least one character in the string is not defined. We simply - do nothing. */ + /* The string contains characters which are not in the charmap nor + in the repertoire. Ignore the string. */ if (verbose) - lr_error (lr, _("\ + lr_error (ldfile, _("\ `from' string in collation element declaration contains unknown character")); return; } - if (elemp->name[0] == L'\0' || elemp->name[1] == L'\0') - { - lr_error (lr, _("illegal collation element")); - return; - } - /* The entries in the linked lists of RESULT are sorting in descending order. The order is important for the `strcoll' and `wcscoll' functions. */ - if (find_entry (&collate->result, elemp->name, sizeof (wchar_t), + if (find_entry (&collate->resultwc, elemp->namewc, sizeof (uint32_t), (void *) &runp) >= 0) { /* We already have an entry with this key. Check whether it is @@ -1144,7 +2133,49 @@ collate_element_from (struct linereader *lr, struct localedef_t *locale, do { - cmpres = wcscmp (elemp->name, runp->name); + cmpres = wcscmp (elemp->namewc, runp->namewc); + if (cmpres <= 0) + break; + prevp = runp; + } + while ((runp = runp->next) != NULL); + + if (cmpres == 0) + lr_error (ldfile, _("\ +duplicate collating element definition (repertoire)")); + else + { + elemp->next = runp; + if (prevp == NULL) + { + if (set_entry (&collate->resultwc, elemp->namewc, + sizeof (uint32_t), elemp) < 0) + error (EXIT_FAILURE, 0, _("\ +error while inserting collation element into hash table")); + } + else + prevp->next = elemp; + } + } + else + { + elemp->next = NULL; + if (insert_entry (&collate->resultwc, elemp->namewc, sizeof (uint32_t), + elemp) < 0) + error (EXIT_FAILURE, errno, _("error while inserting to hash table")); + } + + /* Now also insert the element definition in the multibyte table. */ + if (find_entry (&collate->resultmb, elemp->namemb, 1, (void *) &runp) >= 0) + { + /* We already have an entry with this key. Check whether it is + identical. */ + element_t *prevp = NULL; + int cmpres; + + do + { + cmpres = strcmp (elemp->namemb, runp->namemb); if (cmpres <= 0) break; prevp = runp; @@ -1152,14 +2183,14 @@ collate_element_from (struct linereader *lr, struct localedef_t *locale, while ((runp = runp->next) != NULL); if (cmpres == 0) - lr_error (lr, _("duplicate collating element definition")); + lr_error (ldfile, _("\ +duplicate collating element definition (charmap)")); else { elemp->next = runp; if (prevp == NULL) { - if (set_entry (&collate->result, elemp->name, sizeof (wchar_t), - elemp) < 0) + if (set_entry (&collate->resultmb, elemp->namemb, 1, elemp) < 0) error (EXIT_FAILURE, 0, _("\ error while inserting collation element into hash table")); } @@ -1170,32 +2201,41 @@ error while inserting collation element into hash table")); else { elemp->next = NULL; - if (insert_entry (&collate->result, elemp->name, sizeof (wchar_t), elemp) - < 0) + if (insert_entry (&collate->resultmb, elemp->namemb, 1, elemp) < 0) error (EXIT_FAILURE, errno, _("error while inserting to hash table")); } - if (insert_entry (&collate->elements, collate->combine_token, - collate->combine_token_len, (void *) elemp) < 0) - lr_error (lr, _("cannot insert new collating symbol definition: %s"), + /* Finally install the mapping from the `to'-name to the `from'-name. */ + if (insert_entry (&collate->elements, to_str, strlen (to_str), + (void *) elemp) < 0) + lr_error (ldfile, _("cannot insert new collating symbol definition: %s"), strerror (errno)); } -void -collate_symbol (struct linereader *lr, struct localedef_t *locale, - struct token *code, struct charset_t *charset) +static void +collate_symbol (struct linereader *ldfile, struct locale_collate_t *collate, + struct token *code, struct charmap_t *charmap, + struct repertoire_t *repertoire) { - struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate; - wchar_t value; + uint32_t value; + struct charseq *seq; void *not_used; - value = charset_find_value (&charset->char_table, code->val.str.start, - code->val.str.len); + seq = charset_find_value (charmap, code->val.str.start, code->val.str.len); + if (seq != NULL) + { + lr_error (ldfile, _("symbol for multicharacter collating element " + "`%.*s' duplicates symbolic name in charmap"), + (int) code->val.str.len, code->val.str.start); + return; + } + + value = repertoire (repertoire, code->val.str.start, code->val.str.len); if (value != ILLEGAL_CHAR_VALUE) { - lr_error (lr, _("symbol for multicharacter collating element " - "`%.*s' duplicates symbolic name in charset"), + lr_error (ldfile, _("symbol for multicharacter collating element " + "`%.*s' duplicates symbolic name in repertoire"), (int) code->val.str.len, code->val.str.start); return; } @@ -1203,7 +2243,7 @@ collate_symbol (struct linereader *lr, struct localedef_t *locale, if (find_entry (&collate->elements, code->val.str.start, code->val.str.len, ¬_used) >= 0) { - lr_error (lr, _("symbol for multicharacter collating element " + lr_error (ldfile, _("symbol for multicharacter collating element " "`%.*s' duplicates element definition"), (int) code->val.str.len, code->val.str.start); return; @@ -1212,7 +2252,7 @@ collate_symbol (struct linereader *lr, struct localedef_t *locale, if (find_entry (&collate->symbols, code->val.str.start, code->val.str.len, ¬_used) >= 0) { - lr_error (lr, _("symbol for multicharacter collating element " + lr_error (ldfile, _("symbol for multicharacter collating element " "`%.*s' duplicates other symbol definition"), (int) code->val.str.len, code->val.str.start); return; @@ -1220,13 +2260,13 @@ collate_symbol (struct linereader *lr, struct localedef_t *locale, if (insert_entry (&collate->symbols, code->val.str.start, code->val.str.len, (void *) 0) < 0) - lr_error (lr, _("cannot insert new collating symbol definition: %s"), + lr_error (ldfile, _("cannot insert new collating symbol definition: %s"), strerror (errno)); } void -collate_new_order (struct linereader *lr, struct localedef_t *locale, +collate_new_order (struct linereader *ldfile, struct localedef_t *locale, enum coll_sort_rule sort_rule) { struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate; @@ -1245,7 +2285,7 @@ collate_new_order (struct linereader *lr, struct localedef_t *locale, void -collate_build_arrays (struct linereader *lr, struct localedef_t *locale) +collate_build_arrays (struct linereader *ldfile, struct localedef_t *locale) { struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate; @@ -1264,13 +2304,13 @@ collate_build_arrays (struct linereader *lr, struct localedef_t *locale) int -collate_order_elem (struct linereader *lr, struct localedef_t *locale, +collate_order_elem (struct linereader *ldfile, struct localedef_t *locale, struct token *code, struct charset_t *charset) { - const wchar_t zero = L'\0'; + const uint32_t zero = L'\0'; struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate; int result = 0; - wchar_t value; + uint32_t value; void *tmp; unsigned int i; @@ -1286,7 +2326,7 @@ collate_order_elem (struct linereader *lr, struct localedef_t *locale, collate->kind = character; - if (find_entry (&collate->result, &value, sizeof (wchar_t), + if (find_entry (&collate->result, &value, sizeof (uint32_t), (void *) &firstp) < 0) firstp = lastp = NULL; else @@ -1299,9 +2339,10 @@ collate_order_elem (struct linereader *lr, struct localedef_t *locale, if (lastp->name[0] == value && lastp->name[1] == L'\0') { - lr_error (lr, _("duplicate definition for character `%.*s'"), + lr_error (ldfile, + _("duplicate definition for character `%.*s'"), (int) code->val.str.len, code->val.str.start); - lr_ignore_rest (lr, 0); + lr_ignore_rest (ldfile, 0); result = -1; break; } @@ -1315,7 +2356,7 @@ collate_order_elem (struct linereader *lr, struct localedef_t *locale, obstack_grow (&collate->element_mem, &zero, sizeof (zero)); collate->current_element->name = - (const wchar_t *) obstack_finish (&collate->element_mem); + (const uint32_t *) obstack_finish (&collate->element_mem); collate->current_element->this_weight = ++collate->order_cnt; @@ -1323,10 +2364,10 @@ collate_order_elem (struct linereader *lr, struct localedef_t *locale, if (firstp == NULL) { - if (insert_entry (&collate->result, &value, sizeof (wchar_t), + if (insert_entry (&collate->result, &value, sizeof (uint32_t), (void *) collate->current_element) < 0) { - lr_error (lr, _("cannot insert collation element `%.*s'"), + lr_error (ldfile, _("cannot insert collation element `%.*s'"), (int) code->val.str.len, code->val.str.start); exit (4); } @@ -1341,10 +2382,10 @@ collate_order_elem (struct linereader *lr, struct localedef_t *locale, if (collate->current_element->this_weight != 0) { - lr_error (lr, _("\ + lr_error (ldfile, _("\ collation element `%.*s' appears more than once: ignore line"), (int) code->val.str.len, code->val.str.start); - lr_ignore_rest (lr, 0); + lr_ignore_rest (ldfile, 0); result = -1; break; } @@ -1359,10 +2400,10 @@ collation element `%.*s' appears more than once: ignore line"), if ((unsigned long int) tmp != 0ul) { - lr_error (lr, _("\ + lr_error (ldfile, _("\ collation symbol `%.*s' appears more than once: ignore line"), (int) code->val.str.len, code->val.str.start); - lr_ignore_rest (lr, 0); + lr_ignore_rest (ldfile, 0); result = -1; break; } @@ -1372,16 +2413,16 @@ collation symbol `%.*s' appears more than once: ignore line"), if (set_entry (&collate->symbols, code->val.str.start, code->val.str.len, (void *) order) < 0) { - lr_error (lr, _("cannot process order specification")); + lr_error (ldfile, _("cannot process order specification")); exit (4); } } else { if (verbose) - lr_error (lr, _("unknown symbol `%.*s': line ignored"), + lr_error (ldfile, _("unknown symbol `%.*s': line ignored"), (int) code->val.str.len, code->val.str.start); - lr_ignore_rest (lr, 0); + lr_ignore_rest (ldfile, 0); result = -1; } @@ -1395,7 +2436,7 @@ collation symbol `%.*s' appears more than once: ignore line"), case tok_ellipsis: if (collate->was_ellipsis) { - lr_error (lr, _("\ + lr_error (ldfile, _("\ two lines in a row containing `...' are not allowed")); result = -1; } @@ -1403,9 +2444,9 @@ two lines in a row containing `...' are not allowed")); { /* An ellipsis requires the previous line to be an character definition. */ - lr_error (lr, _("\ + lr_error (ldfile, _("\ line before ellipsis does not contain definition for character constant")); - lr_ignore_rest (lr, 0); + lr_ignore_rest (ldfile, 0); result = -1; } else @@ -1424,21 +2465,21 @@ line before ellipsis does not contain definition for character constant")); { if (collate->kind != character) { - lr_error (lr, _("\ + lr_error (ldfile, _("\ line after ellipsis must contain character definition")); - lr_ignore_rest (lr, 0); + lr_ignore_rest (ldfile, 0); result = -1; } else if (collate->last_char > value) { - lr_error (lr, _("end point of ellipsis range is bigger then start")); - lr_ignore_rest (lr, 0); + lr_error (ldfile, _("end point of ellipsis range is bigger then start")); + lr_ignore_rest (ldfile, 0); result = -1; } else { /* We can fill the arrays with the information we need. */ - wchar_t name[2]; + uint32_t name[2]; unsigned int *data; size_t *ptr; size_t cnt; @@ -1450,9 +2491,6 @@ line after ellipsis must contain character definition")); * sizeof (unsigned int)); ptr = (size_t *) alloca (collate->nrules * sizeof (size_t)); - if (data == NULL || ptr == NULL) - error (4, 0, _("memory exhausted")); - /* Prepare data. Because the characters covered by an ellipsis all have equal values we prepare the data once and only change the variable number (if there are any). @@ -1470,7 +2508,7 @@ line after ellipsis must contain character definition")); data[collate->nrules + cnt] = collate->weight[cnt]; for (cnt = 0; cnt < collate->nrules; ++cnt) - if ((wchar_t) data[ptr[cnt]] != ELLIPSIS_CHAR) + if ((uint32_t) data[ptr[cnt]] != ELLIPSIS_CHAR) ptr[cnt] = 0; while (name[0] <= value) @@ -1479,12 +2517,9 @@ line after ellipsis must contain character definition")); pelem = (element_t *) obstack_alloc (&collate->element_mem, sizeof (element_t)); - if (pelem == NULL) - error (4, 0, _("memory exhausted")); - pelem->name - = (const wchar_t *) obstack_copy (&collate->element_mem, - name, 2 * sizeof (wchar_t)); + = (const uint32_t *) obstack_copy (&collate->element_mem, + name, 2 * sizeof (uint32_t)); pelem->this_weight = ++collate->order_cnt; pelem->ordering_len = collate->nweight; @@ -1500,17 +2535,17 @@ line after ellipsis must contain character definition")); pelem->ordering[ptr[cnt]] = pelem->this_weight; /* Insert new entry into result table. */ - if (find_entry (&collate->result, name, sizeof (wchar_t), + if (find_entry (&collate->result, name, sizeof (uint32_t), (void *) &pelem->next) >= 0) { - if (set_entry (&collate->result, name, sizeof (wchar_t), + if (set_entry (&collate->result, name, sizeof (uint32_t), (void *) pelem) < 0) error (4, 0, _("cannot insert into result table")); } else { pelem->next = NULL; - if (insert_entry (&collate->result, name, sizeof (wchar_t), + if (insert_entry (&collate->result, name, sizeof (uint32_t), (void *) pelem) < 0) error (4, 0, _("cannot insert into result table")); } @@ -1533,12 +2568,12 @@ line after ellipsis must contain character definition")); int -collate_weight_bsymbol (struct linereader *lr, struct localedef_t *locale, +collate_weight_bsymbol (struct linereader *ldfile, struct localedef_t *locale, struct token *code, struct charset_t *charset) { struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate; unsigned int here_weight; - wchar_t value; + uint32_t value; void *tmp; assert (code->tok == tok_bsymbol); @@ -1549,7 +2584,7 @@ collate_weight_bsymbol (struct linereader *lr, struct localedef_t *locale, { element_t *runp; - if (find_entry (&collate->result, &value, sizeof (wchar_t), + if (find_entry (&collate->result, &value, sizeof (uint32_t), (void *)&runp) < 0) runp = NULL; @@ -1574,9 +2609,9 @@ collate_weight_bsymbol (struct linereader *lr, struct localedef_t *locale, else { if (verbose) - lr_error (lr, _("unknown symbol `%.*s': line ignored"), + lr_error (ldfile, _("unknown symbol `%.*s': line ignored"), (int) code->val.str.len, code->val.str.start); - lr_ignore_rest (lr, 0); + lr_ignore_rest (ldfile, 0); return -1; } @@ -1584,9 +2619,9 @@ collate_weight_bsymbol (struct linereader *lr, struct localedef_t *locale, weight. */ if (collate->kind == symbol) { - lr_error (lr, _("\ + lr_error (ldfile, _("\ specification of sorting weight for collation symbol does not make sense")); - lr_ignore_rest (lr, 0); + lr_ignore_rest (ldfile, 0); return -1; } @@ -1606,8 +2641,8 @@ specification of sorting weight for collation symbol does not make sense")); newp = (patch_t *) obstack_alloc (&collate->element_mem, sizeof (patch_t)); - newp->fname = lr->fname; - newp->lineno = lr->lineno; + newp->fname = ldfile->fname; + newp->lineno = ldfile->lineno; newp->token = (const char *) obstack_copy0 (&collate->element_mem, code->val.str.start, code->val.str.len); @@ -1624,23 +2659,23 @@ specification of sorting weight for collation symbol does not make sense")); int -collate_next_weight (struct linereader *lr, struct localedef_t *locale) +collate_next_weight (struct linereader *ldfile, struct localedef_t *locale) { struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate; if (collate->kind == symbol) { - lr_error (lr, _("\ + lr_error (ldfile, _("\ specification of sorting weight for collation symbol does not make sense")); - lr_ignore_rest (lr, 0); + lr_ignore_rest (ldfile, 0); return -1; } ++collate->weight_idx; if (collate->weight_idx >= collate->nrules) { - lr_error (lr, _("too many weights")); - lr_ignore_rest (lr, 0); + lr_error (ldfile, _("too many weights")); + lr_ignore_rest (ldfile, 0); return -1; } @@ -1649,7 +2684,7 @@ specification of sorting weight for collation symbol does not make sense")); int -collate_simple_weight (struct linereader *lr, struct localedef_t *locale, +collate_simple_weight (struct linereader *ldfile, struct localedef_t *locale, struct token *code, struct charset_t *charset) { struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate; @@ -1668,9 +2703,9 @@ collate_simple_weight (struct linereader *lr, struct localedef_t *locale, entry. */ if (collate->kind != ellipsis && collate->kind != undefined) { - lr_error (lr, _("\ + lr_error (ldfile, _("\ `...' must only be used in `...' and `UNDEFINED' entries")); - lr_ignore_rest (lr, 0); + lr_ignore_rest (ldfile, 0); return -1; } value = ELLIPSIS_CHAR; @@ -1691,18 +2726,18 @@ collate_simple_weight (struct linereader *lr, struct localedef_t *locale, { char *startp = (char *) runp; char *putp = (char *) runp; - wchar_t wch; + uint32_t wch; /* Lookup weight for char and store it. */ if (*runp == '<') { while (*++runp != '\0' && *runp != '>') { - if (*runp == lr->escape_char) + if (*runp == ldfile->escape_char) if (*++runp == '\0') { - lr_error (lr, _("unterminated weight name")); - lr_ignore_rest (lr, 0); + lr_error (ldfile, _("unterminated weight name")); + lr_ignore_rest (ldfile, 0); return -1; } *putp++ = *runp; @@ -1712,8 +2747,8 @@ collate_simple_weight (struct linereader *lr, struct localedef_t *locale, if (putp == startp) { - lr_error (lr, _("empty weight name: line ignored")); - lr_ignore_rest (lr, 0); + lr_error (ldfile, _("empty weight name: line ignored")); + lr_ignore_rest (ldfile, 0); return -1; } @@ -1723,7 +2758,7 @@ collate_simple_weight (struct linereader *lr, struct localedef_t *locale, { element_t *pelem; - if (find_entry (&collate->result, &wch, sizeof (wchar_t), + if (find_entry (&collate->result, &wch, sizeof (uint32_t), (void *)&pelem) < 0) pelem = NULL; @@ -1749,30 +2784,30 @@ collate_simple_weight (struct linereader *lr, struct localedef_t *locale, else { if (verbose) - lr_error (lr, _("unknown symbol `%.*s': line ignored"), + lr_error (ldfile, _("unknown symbol `%.*s': line ignored"), (int) (putp - startp), startp); - lr_ignore_rest (lr, 0); + lr_ignore_rest (ldfile, 0); return -1; } } else { element_t *wp; - wchar_t wch; + uint32_t wch; - if (*runp == lr->escape_char) + if (*runp == ldfile->escape_char) { static const char digits[] = "0123456789abcdef"; const char *dp; int base; ++runp; - if (_tolower (*runp) == 'x') + if (tolower (*runp) == 'x') { ++runp; base = 16; } - else if (_tolower (*runp) == 'd') + else if (tolower (*runp) == 'd') { ++runp; base = 10; @@ -1780,19 +2815,19 @@ collate_simple_weight (struct linereader *lr, struct localedef_t *locale, else base = 8; - dp = strchr (digits, _tolower (*runp)); + dp = strchr (digits, tolower (*runp)); if (dp == NULL || (dp - digits) >= base) { illegal_char: - lr_error (lr, _("\ + lr_error (ldfile, _("\ illegal character constant in string")); - lr_ignore_rest (lr, 0); + lr_ignore_rest (ldfile, 0); return -1; } wch = dp - digits; ++runp; - dp = strchr (digits, _tolower (*runp)); + dp = strchr (digits, tolower (*runp)); if (dp == NULL || (dp - digits) >= base) goto illegal_char; wch *= base; @@ -1801,7 +2836,7 @@ illegal character constant in string")); if (base != 16) { - dp = strchr (digits, _tolower (*runp)); + dp = strchr (digits, tolower (*runp)); if (dp != NULL && (dp - digits < base)) { wch *= base; @@ -1811,7 +2846,7 @@ illegal character constant in string")); } } else - wch = (wchar_t) *runp++; + wch = (uint32_t) *runp++; /* Lookup the weight for WCH. */ if (find_entry (&collate->result, &wch, sizeof (wch), @@ -1849,8 +2884,8 @@ illegal character constant in string")); newp = (patch_t *) obstack_alloc (&collate->element_mem, sizeof (patch_t)); - newp->fname = lr->fname; - newp->lineno = lr->lineno; + newp->fname = ldfile->fname; + newp->lineno = ldfile->lineno; newp->token = (const char *) obstack_copy0 (&collate->element_mem, startp, putp - startp); @@ -1885,7 +2920,7 @@ illegal character constant in string")); void -collate_end_weight (struct linereader *lr, struct localedef_t *locale) +collate_end_weight (struct linereader *ldfile, struct localedef_t *locale) { struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate; element_t *pelem = collate->current_element; @@ -1951,3 +2986,239 @@ collate_end_weight (struct linereader *lr, struct localedef_t *locale) if (collate->kind != undefined) collate->last_char = pelem->name[0]; } + + +/* The parser for the LC_CTYPE section of the locale definition. */ +void +read_lc_collate (struct linereader *ldfile, struct localedef_t *result, + struct charmap_t *charmap, struct repertoire_t *repertoire, + int ignore_content) +{ + struct locale_collate_t *collate; + int did_copy = 0; + const char *save_str; + + /* The rest of the line containing `LC_COLLATE' must be free. */ + lr_ignore_rest (ldfile, 1); + + now = lr_token (ldfile, charmap, NULL); + nowtok = now->tok; + + /* If we see `copy' now we are almost done. */ + if (nowtok == tok_copy) + { + handle_copy (ldfile, charmap, tok_lc_collate, LC_COLLATE, "LC_COLLATE", + ignore_content); + did_copy = 1; + } + + /* Prepare the data structures. */ + collate_startup (ldfile, result, charmap, ignore_content); + collate = result->categories[LC_COLLATE].collate; + + while (1) + { + /* Of course we don't proceed beyond the end of file. */ + if (nowtok == tok_eof) + break; + + /* Ingore empty lines. */ + if (nowtok == tok_eol) + { + now = lr_token (ldfile, charmap, NULL); + nowtok = now->tok; + continue; + } + + switch (nowtok) + { + case tok_coll_weight_max: + if (did_copy) + goto err_label; + /* The rest of the line must be a single integer value. */ + now = lr_token (ldfile, charmap, NULL); + if (now->tok != tok_number) + goto err_label; + /* We simply forget about the value we just read, the implementation + has no fixed limits. */ + lr_ignore_rest (ldfile, 1); + break; + + case tok_script: + if (did_copy) + goto err_label; + /* We expect the name of the script in brackets. */ + now = lr_token (ldfile, charmap, NULL); + if (now->tok != tok_bsymbol && now->tok != tok_ucs4) + goto err_label; + if (now->tok != tok_bsymbol) + { + lr_error (ldfile, _("\ +script name `%s' must not duplicate any known name"), + tok->val.str.startmb); + lr_ignore_rest (ldfile, 0); + break; + } + collate->scripts = xmalloc (collate->scripts, + (collate->nscripts + * sizeof (const char *))); + collate->scripts[collate->nscripts++] = tok->val.str.startmb; + lr_ignore_rest (ldfile, 1); + break; + + case tok_collating_element: + if (did_copy) + goto err_label; + /* Get the first argument, a symbol in brackets. */ + now = lr_token (ldfile, charmap, NULL); + if (now->tok != tok_bsymbol) + goto err_label; + /* Test it. */ + if (collate_element_to (ldfile, collate, now, charmap, repertoire)) + { + /* An error occurred. */ + lr_ignore_rest (ldfile, 0); + break; + } + save_str = tok->val.str.startmb; + /* Next comes `from'. */ + now = lr_token (ldfile, charmap, NULL); + if (now->tok != tok_from) + goto err_label; + /* Now comes a string. */ + now = lr_token (ldfile, charmap, repertoire); + if (now->tok != tok_string) + goto err_label; + collate_element_from (ldfile, collate, save_str, now, charmap, + repertoire); + /* The rest of the line should be empty. */ + lr_ignore_rest (ldfile, 1); + break; + + case tok_collating_symbol: + if (did_copy) + goto err_label; + /* Get the argument, a single symbol in brackets. */ + now = lr_token (ldfile, charmap, NULL); + if (now->tok != tok_bsymbol) + goto err_label; + collate_symbol (ldfile, collate, now, charmap, repertoire); + break; + + case tok_order_start: + if (did_copy) + goto err_label; + + /* We expect now a scripting symbol or start right away + with the order keywords. Or we have no argument at all + in which means `forward'. */ + now = lr_token (ldfile, charmap, NULL); + if (now->tok == tok_eol) + { + static enum coll_sort_rule default_rule = sort_forward; + /* Use a single `forward' rule. */ + collate->nrules = 1; + collate->rules = &default_rule; + } + else + { + /* XXX We don't recognize the ISO 14651 extensions yet. */ + uint32_t nrules = 0; + uint32_t nrules_max = 32; + enum coll_sort_rule *rules = alloca (nrules_max + * sizeof (*rules)); + int saw_semicolon = 0; + + memset (rules, '\0', nrules_max * sizeof (*rules)); + do + { + if (now->tok != tok_forward && now->tok != tok_backward + && now->tok != tok_position) + goto err_label; + + if (saw_semicolon) + { + if (nrules == nrules_max) + { + newp = alloca (nrules_max * 2 * sizeof (*rules)); + rules = memcpy (newp, rules, + nrules_max * sizeof (*rules)); + memset (&rules[nrules_max], '\0', + nrules_max * sizeof (*rules)); + nrules_max *= 2; + } + ++nrules; + } + + switch (now->tok) + { + case tok_forward: + if ((rules[nrules] & sort_backward) != 0) + { + lr_error (ldfile, _("\ +`forward' and `backward' order exclude each other")); + lr_ignore_rest (ldfile, 0); + goto error_sort; + } + rules[nrules] |= sort_forward; + break; + case tok_backward: + if ((rules[nrules] & sort_forward) != 0) + { + lr_error (ldfile, _("\ +`forward' and `backward' order exclude each other")); + lr_ignore_rest (ldfile, 0); + goto error_sort; + } + rules[nrules] |= sort_backward; + break; + case tok_position: + rules[nrules] |= tok_position; + break; + } + + /* Get the next token. This is either the end of the line, + a comma or a semicolon. */ + now = lr_token (ldfile, charmap, NULL); + if (now->tok == tok_comma || now->tok == tok_semicolon) + { + saw_semicolon = now->tok == tok_semicolon; + now = lr_token (ldfile, charmap, NULL); + } + } + while (now->tok != tok_eol || now->tok != tok_eof); + + error_sort: + collate->nrules = nrules; + collate->rules = memcpy (xmalloc (nrules * sizeof (*rules)), + rules, nrules * sizeof (*rules)); + } + + /* Now read the rules. */ + read_rules (ldfile, collate, charmap, repertoire); + break; + + case tok_reorder_after: + break; + + case tok_reorder_script_after: + break; + + default: + err_label: + if (now->tok != tok_eof) + SYNTAX_ERROR (_("syntax error in %s locale definition"), + "LC_COLLATE"); + } + + /* Prepare for the next round. */ + now = lr_token (ldfile, charmap, NULL); + nowtok = now->tok; + } + + /* When we come here we reached the end of the file. */ + lr_error (ldfile, _("premature end of file while reading category `%s'"), + "LC_COLLATE"); +} + +#endif |