diff options
Diffstat (limited to 'REORG.TODO/locale/programs/ld-collate.c')
-rw-r--r-- | REORG.TODO/locale/programs/ld-collate.c | 3978 |
1 files changed, 3978 insertions, 0 deletions
diff --git a/REORG.TODO/locale/programs/ld-collate.c b/REORG.TODO/locale/programs/ld-collate.c new file mode 100644 index 0000000000..cec848cb7c --- /dev/null +++ b/REORG.TODO/locale/programs/ld-collate.c @@ -0,0 +1,3978 @@ +/* Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@gnu.org>, 1995. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <errno.h> +#include <error.h> +#include <stdlib.h> +#include <wchar.h> +#include <stdint.h> +#include <sys/param.h> + +#include "localedef.h" +#include "charmap.h" +#include "localeinfo.h" +#include "linereader.h" +#include "locfile.h" +#include "elem-hash.h" + +/* Uncomment the following line in the production version. */ +/* #define NDEBUG 1 */ +#include <assert.h> + +#define obstack_chunk_alloc malloc +#define obstack_chunk_free free + +static inline void +__attribute ((always_inline)) +obstack_int32_grow (struct obstack *obstack, int32_t data) +{ + assert (LOCFILE_ALIGNED_P (obstack_object_size (obstack))); + data = maybe_swap_uint32 (data); + if (sizeof (int32_t) == sizeof (int)) + obstack_int_grow (obstack, data); + else + obstack_grow (obstack, &data, sizeof (int32_t)); +} + +static inline void +__attribute ((always_inline)) +obstack_int32_grow_fast (struct obstack *obstack, int32_t data) +{ + assert (LOCFILE_ALIGNED_P (obstack_object_size (obstack))); + data = maybe_swap_uint32 (data); + if (sizeof (int32_t) == sizeof (int)) + obstack_int_grow_fast (obstack, data); + else + obstack_grow (obstack, &data, sizeof (int32_t)); +} + +/* Forward declaration. */ +struct element_t; + +/* Data type for list of strings. */ +struct section_list +{ + /* Successor in the known_sections list. */ + struct section_list *def_next; + /* Successor in the sections list. */ + struct section_list *next; + /* Name of the section. */ + const char *name; + /* First element of this section. */ + struct element_t *first; + /* Last element of this section. */ + struct element_t *last; + /* These are the rules for this section. */ + enum coll_sort_rule *rules; + /* Index of the rule set in the appropriate section of the output file. */ + int ruleidx; +}; + +struct element_t; + +struct element_list_t +{ + /* Number of elements. */ + int cnt; + + struct element_t **w; +}; + +/* Data type for collating element. */ +struct element_t +{ + const char *name; + + const char *mbs; + size_t nmbs; + const uint32_t *wcs; + size_t nwcs; + int *mborder; + int wcorder; + + /* The following is a bit mask which bits are set if this element is + used in the appropriate level. Interesting for the singlebyte + weight computation. + + XXX The type here restricts the number of levels to 32. It could + be changed if necessary but I doubt this is necessary. */ + unsigned int used_in_level; + + struct element_list_t *weights; + + /* Nonzero if this is a real character definition. */ + int is_character; + + /* Order of the character in the sequence. This information will + be used in range expressions. */ + int mbseqorder; + int wcseqorder; + + /* Where does the definition come from. */ + const char *file; + size_t line; + + /* Which section does this belong to. */ + struct section_list *section; + + /* Predecessor and successor in the order list. */ + struct element_t *last; + struct element_t *next; + + /* Next element in multibyte output list. */ + struct element_t *mbnext; + struct element_t *mblast; + + /* Next element in wide character output list. */ + struct element_t *wcnext; + struct element_t *wclast; +}; + +/* Special element value. */ +#define ELEMENT_ELLIPSIS2 ((struct element_t *) 1) +#define ELEMENT_ELLIPSIS3 ((struct element_t *) 2) +#define ELEMENT_ELLIPSIS4 ((struct element_t *) 3) + +/* Data type for collating symbol. */ +struct symbol_t +{ + const char *name; + + /* Point to place in the order list. */ + struct element_t *order; + + /* Where does the definition come from. */ + const char *file; + size_t line; +}; + +/* Sparse table of struct element_t *. */ +#define TABLE wchead_table +#define ELEMENT struct element_t * +#define DEFAULT NULL +#define ITERATE +#define NO_ADD_LOCALE +#include "3level.h" + +/* Sparse table of int32_t. */ +#define TABLE collidx_table +#define ELEMENT int32_t +#define DEFAULT 0 +#include "3level.h" + +/* Sparse table of uint32_t. */ +#define TABLE collseq_table +#define ELEMENT uint32_t +#define DEFAULT ~((uint32_t) 0) +#include "3level.h" + + +/* Simple name list for the preprocessor. */ +struct name_list +{ + struct name_list *next; + char str[0]; +}; + + +/* The real definition of the struct for the LC_COLLATE locale. */ +struct locale_collate_t +{ + int col_weight_max; + int cur_weight_max; + + /* List of known scripts. */ + struct section_list *known_sections; + /* List of used sections. */ + struct section_list *sections; + /* Current section using definition. */ + struct section_list *current_section; + /* There always can be an unnamed section. */ + struct section_list unnamed_section; + /* Flag whether the unnamed section has been defined. */ + bool unnamed_section_defined; + /* To make handling of errors easier we have another section. */ + struct section_list error_section; + /* Sometimes we are defining the values for collating symbols before + the first actual section. */ + struct section_list symbol_section; + + /* Start of the order list. */ + struct element_t *start; + + /* The undefined element. */ + struct element_t undefined; + + /* This is the cursor for `reorder_after' insertions. */ + struct element_t *cursor; + + /* This value is used when handling ellipsis. */ + struct element_t ellipsis_weight; + + /* Known collating elements. */ + hash_table elem_table; + + /* Known collating symbols. */ + hash_table sym_table; + + /* Known collation sequences. */ + hash_table seq_table; + + struct obstack mempool; + + /* The LC_COLLATE category is a bit special as it is sometimes possible + that the definitions from more than one input file contains information. + Therefore we keep all relevant input in a list. */ + struct locale_collate_t *next; + + /* Arrays with heads of the list for each of the leading bytes in + the multibyte sequences. */ + struct element_t *mbheads[256]; + + /* Arrays with heads of the list for each of the leading bytes in + the multibyte sequences. */ + struct wchead_table wcheads; + + /* The arrays with the collation sequence order. */ + unsigned char mbseqorder[256]; + struct collseq_table wcseqorder; + + /* State of the preprocessor. */ + enum + { + else_none = 0, + else_ignore, + else_seen + } + else_action; +}; + + +/* We have a few global variables which are used for reading all + LC_COLLATE category descriptions in all files. */ +static uint32_t nrules; + +/* List of defined preprocessor symbols. */ +static struct name_list *defined; + + +/* We need UTF-8 encoding of numbers. */ +static inline int +__attribute ((always_inline)) +utf8_encode (char *buf, int val) +{ + int retval; + + if (val < 0x80) + { + *buf++ = (char) val; + retval = 1; + } + else + { + int step; + + for (step = 2; step < 6; ++step) + if ((val & (~(uint32_t)0 << (5 * step + 1))) == 0) + break; + retval = step; + + *buf = (unsigned char) (~0xff >> step); + --step; + do + { + buf[step] = 0x80 | (val & 0x3f); + val >>= 6; + } + while (--step > 0); + *buf |= val; + } + + return retval; +} + + +static struct section_list * +make_seclist_elem (struct locale_collate_t *collate, const char *string, + struct section_list *next) +{ + struct section_list *newp; + + newp = (struct section_list *) obstack_alloc (&collate->mempool, + sizeof (*newp)); + newp->next = next; + newp->name = string; + newp->first = NULL; + newp->last = NULL; + + return newp; +} + + +static struct element_t * +new_element (struct locale_collate_t *collate, const char *mbs, size_t mbslen, + const uint32_t *wcs, const char *name, size_t namelen, + int is_character) +{ + struct element_t *newp; + + newp = (struct element_t *) obstack_alloc (&collate->mempool, + sizeof (*newp)); + newp->name = name == NULL ? NULL : obstack_copy0 (&collate->mempool, + name, namelen); + if (mbs != NULL) + { + newp->mbs = obstack_copy0 (&collate->mempool, mbs, mbslen); + newp->nmbs = mbslen; + } + else + { + newp->mbs = NULL; + newp->nmbs = 0; + } + if (wcs != NULL) + { + size_t nwcs = wcslen ((wchar_t *) wcs); + uint32_t zero = 0; + /* Handle <U0000> as a single character. */ + if (nwcs == 0) + nwcs = 1; + obstack_grow (&collate->mempool, wcs, nwcs * sizeof (uint32_t)); + obstack_grow (&collate->mempool, &zero, sizeof (uint32_t)); + newp->wcs = (uint32_t *) obstack_finish (&collate->mempool); + newp->nwcs = nwcs; + } + else + { + newp->wcs = NULL; + newp->nwcs = 0; + } + newp->mborder = NULL; + newp->wcorder = 0; + newp->used_in_level = 0; + newp->is_character = is_character; + + /* Will be assigned later. XXX */ + newp->mbseqorder = 0; + newp->wcseqorder = 0; + + /* Will be allocated later. */ + newp->weights = NULL; + + newp->file = NULL; + newp->line = 0; + + newp->section = collate->current_section; + + newp->last = NULL; + newp->next = NULL; + + newp->mbnext = NULL; + newp->mblast = NULL; + + newp->wcnext = NULL; + newp->wclast = NULL; + + return newp; +} + + +static struct symbol_t * +new_symbol (struct locale_collate_t *collate, const char *name, size_t len) +{ + struct symbol_t *newp; + + newp = (struct symbol_t *) obstack_alloc (&collate->mempool, sizeof (*newp)); + + newp->name = obstack_copy0 (&collate->mempool, name, len); + newp->order = NULL; + + newp->file = NULL; + newp->line = 0; + + return newp; +} + + +/* Test whether this name is already defined somewhere. */ +static int +check_duplicate (struct linereader *ldfile, struct locale_collate_t *collate, + const struct charmap_t *charmap, + struct repertoire_t *repertoire, const char *symbol, + size_t symbol_len) +{ + void *ignore = NULL; + + if (find_entry (&charmap->char_table, symbol, symbol_len, &ignore) == 0) + { + lr_error (ldfile, _("`%.*s' already defined in charmap"), + (int) symbol_len, symbol); + return 1; + } + + if (repertoire != NULL + && (find_entry (&repertoire->char_table, symbol, symbol_len, &ignore) + == 0)) + { + lr_error (ldfile, _("`%.*s' already defined in repertoire"), + (int) symbol_len, symbol); + return 1; + } + + if (find_entry (&collate->sym_table, symbol, symbol_len, &ignore) == 0) + { + lr_error (ldfile, _("`%.*s' already defined as collating symbol"), + (int) symbol_len, symbol); + return 1; + } + + if (find_entry (&collate->elem_table, symbol, symbol_len, &ignore) == 0) + { + lr_error (ldfile, _("`%.*s' already defined as collating element"), + (int) symbol_len, symbol); + return 1; + } + + return 0; +} + + +/* Read the direction specification. */ +static void +read_directions (struct linereader *ldfile, struct token *arg, + const struct charmap_t *charmap, + struct repertoire_t *repertoire, struct localedef_t *result) +{ + int cnt = 0; + int max = nrules ?: 10; + enum coll_sort_rule *rules = calloc (max, sizeof (*rules)); + int warned = 0; + struct locale_collate_t *collate = result->categories[LC_COLLATE].collate; + + while (1) + { + int valid = 0; + + if (arg->tok == tok_forward) + { + if (rules[cnt] & sort_backward) + { + if (! warned) + { + lr_error (ldfile, _("\ +%s: `forward' and `backward' are mutually excluding each other"), + "LC_COLLATE"); + warned = 1; + } + } + else if (rules[cnt] & sort_forward) + { + if (! warned) + { + lr_error (ldfile, _("\ +%s: `%s' mentioned more than once in definition of weight %d"), + "LC_COLLATE", "forward", cnt + 1); + } + } + else + rules[cnt] |= sort_forward; + + valid = 1; + } + else if (arg->tok == tok_backward) + { + if (rules[cnt] & sort_forward) + { + if (! warned) + { + lr_error (ldfile, _("\ +%s: `forward' and `backward' are mutually excluding each other"), + "LC_COLLATE"); + warned = 1; + } + } + else if (rules[cnt] & sort_backward) + { + if (! warned) + { + lr_error (ldfile, _("\ +%s: `%s' mentioned more than once in definition of weight %d"), + "LC_COLLATE", "backward", cnt + 1); + } + } + else + rules[cnt] |= sort_backward; + + valid = 1; + } + else if (arg->tok == tok_position) + { + if (rules[cnt] & sort_position) + { + if (! warned) + { + lr_error (ldfile, _("\ +%s: `%s' mentioned more than once in definition of weight %d"), + "LC_COLLATE", "position", cnt + 1); + } + } + else + rules[cnt] |= sort_position; + + valid = 1; + } + + if (valid) + arg = lr_token (ldfile, charmap, result, repertoire, verbose); + + if (arg->tok == tok_eof || arg->tok == tok_eol || arg->tok == tok_comma + || arg->tok == tok_semicolon) + { + if (! valid && ! warned) + { + lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE"); + warned = 1; + } + + /* See whether we have to increment the counter. */ + if (arg->tok != tok_comma && rules[cnt] != 0) + { + /* Add the default `forward' if we have seen only `position'. */ + if (rules[cnt] == sort_position) + rules[cnt] = sort_position | sort_forward; + + ++cnt; + } + + if (arg->tok == tok_eof || arg->tok == tok_eol) + /* End of line or file, so we exit the loop. */ + break; + + if (nrules == 0) + { + /* See whether we have enough room in the array. */ + if (cnt == max) + { + max += 10; + rules = (enum coll_sort_rule *) xrealloc (rules, + max + * sizeof (*rules)); + memset (&rules[cnt], '\0', (max - cnt) * sizeof (*rules)); + } + } + else + { + if (cnt == nrules) + { + /* There must not be any more rule. */ + if (! warned) + { + lr_error (ldfile, _("\ +%s: too many rules; first entry only had %d"), + "LC_COLLATE", nrules); + warned = 1; + } + + lr_ignore_rest (ldfile, 0); + break; + } + } + } + else + { + if (! warned) + { + lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE"); + warned = 1; + } + } + + arg = lr_token (ldfile, charmap, result, repertoire, verbose); + } + + if (nrules == 0) + { + /* Now we know how many rules we have. */ + nrules = cnt; + rules = (enum coll_sort_rule *) xrealloc (rules, + nrules * sizeof (*rules)); + } + else + { + if (cnt < nrules) + { + /* Not enough rules in this specification. */ + if (! warned) + lr_error (ldfile, _("%s: not enough sorting rules"), "LC_COLLATE"); + + do + rules[cnt] = sort_forward; + while (++cnt < nrules); + } + } + + collate->current_section->rules = rules; +} + + +static struct element_t * +find_element (struct linereader *ldfile, struct locale_collate_t *collate, + const char *str, size_t len) +{ + void *result = NULL; + + /* Search for the entries among the collation sequences already define. */ + if (find_entry (&collate->seq_table, str, len, &result) != 0) + { + /* Nope, not define yet. So we see whether it is a + collation symbol. */ + void *ptr; + + if (find_entry (&collate->sym_table, str, len, &ptr) == 0) + { + /* It's a collation symbol. */ + struct symbol_t *sym = (struct symbol_t *) ptr; + result = sym->order; + + if (result == NULL) + result = sym->order = new_element (collate, NULL, 0, NULL, + NULL, 0, 0); + } + else if (find_entry (&collate->elem_table, str, len, &result) != 0) + { + /* It's also no collation element. So it is a character + element defined later. */ + result = new_element (collate, NULL, 0, NULL, str, len, 1); + /* Insert it into the sequence table. */ + insert_entry (&collate->seq_table, str, len, result); + } + } + + return (struct element_t *) result; +} + + +static void +unlink_element (struct locale_collate_t *collate) +{ + if (collate->cursor == collate->start) + { + assert (collate->cursor->next == NULL); + assert (collate->cursor->last == NULL); + collate->cursor = NULL; + } + else + { + if (collate->cursor->next != NULL) + collate->cursor->next->last = collate->cursor->last; + if (collate->cursor->last != NULL) + collate->cursor->last->next = collate->cursor->next; + collate->cursor = collate->cursor->last; + } +} + + +static void +insert_weights (struct linereader *ldfile, struct element_t *elem, + const struct charmap_t *charmap, + struct repertoire_t *repertoire, struct localedef_t *result, + enum token_t ellipsis) +{ + int weight_cnt; + struct token *arg; + struct locale_collate_t *collate = result->categories[LC_COLLATE].collate; + + /* Initialize all the fields. */ + elem->file = ldfile->fname; + elem->line = ldfile->lineno; + + elem->last = collate->cursor; + elem->next = collate->cursor ? collate->cursor->next : NULL; + if (collate->cursor != NULL && collate->cursor->next != NULL) + collate->cursor->next->last = elem; + if (collate->cursor != NULL) + collate->cursor->next = elem; + if (collate->start == NULL) + { + assert (collate->cursor == NULL); + collate->start = elem; + } + + elem->section = collate->current_section; + + if (collate->current_section->first == NULL) + collate->current_section->first = elem; + if (collate->current_section->last == collate->cursor) + collate->current_section->last = elem; + + collate->cursor = elem; + + elem->weights = (struct element_list_t *) + obstack_alloc (&collate->mempool, nrules * sizeof (struct element_list_t)); + memset (elem->weights, '\0', nrules * sizeof (struct element_list_t)); + + weight_cnt = 0; + + arg = lr_token (ldfile, charmap, result, repertoire, verbose); + do + { + if (arg->tok == tok_eof || arg->tok == tok_eol) + break; + + if (arg->tok == tok_ignore) + { + /* The weight for this level has to be ignored. We use the + null pointer to indicate this. */ + elem->weights[weight_cnt].w = (struct element_t **) + obstack_alloc (&collate->mempool, sizeof (struct element_t *)); + elem->weights[weight_cnt].w[0] = NULL; + elem->weights[weight_cnt].cnt = 1; + } + else if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4) + { + char ucs4str[10]; + struct element_t *val; + char *symstr; + size_t symlen; + + if (arg->tok == tok_bsymbol) + { + symstr = arg->val.str.startmb; + symlen = arg->val.str.lenmb; + } + else + { + snprintf (ucs4str, sizeof (ucs4str), "U%08X", arg->val.ucs4); + symstr = ucs4str; + symlen = 9; + } + + val = find_element (ldfile, collate, symstr, symlen); + if (val == NULL) + break; + + elem->weights[weight_cnt].w = (struct element_t **) + obstack_alloc (&collate->mempool, sizeof (struct element_t *)); + elem->weights[weight_cnt].w[0] = val; + elem->weights[weight_cnt].cnt = 1; + } + else if (arg->tok == tok_string) + { + /* Split the string up in the individual characters and put + the element definitions in the list. */ + const char *cp = arg->val.str.startmb; + int cnt = 0; + struct element_t *charelem; + struct element_t **weights = NULL; + int max = 0; + + if (*cp == '\0') + { + lr_error (ldfile, _("%s: empty weight string not allowed"), + "LC_COLLATE"); + lr_ignore_rest (ldfile, 0); + break; + } + + do + { + if (*cp == '<') + { + /* Ahh, it's a bsymbol or an UCS4 value. If it's + the latter we have to unify the name. */ + const char *startp = ++cp; + size_t len; + + while (*cp != '>') + { + if (*cp == ldfile->escape_char) + ++cp; + if (*cp == '\0') + /* It's a syntax error. */ + goto syntax; + + ++cp; + } + + if (cp - startp == 5 && startp[0] == 'U' + && isxdigit (startp[1]) && isxdigit (startp[2]) + && isxdigit (startp[3]) && isxdigit (startp[4])) + { + unsigned int ucs4 = strtoul (startp + 1, NULL, 16); + char *newstr; + + newstr = (char *) xmalloc (10); + snprintf (newstr, 10, "U%08X", ucs4); + startp = newstr; + + len = 9; + } + else + len = cp - startp; + + charelem = find_element (ldfile, collate, startp, len); + ++cp; + } + else + { + /* People really shouldn't use characters directly in + the string. Especially since it's not really clear + what this means. We interpret all characters in the + string as if that would be bsymbols. Otherwise we + would have to match back to bsymbols somehow and this + is normally not what people normally expect. */ + charelem = find_element (ldfile, collate, cp++, 1); + } + + if (charelem == NULL) + { + /* We ignore the rest of the line. */ + lr_ignore_rest (ldfile, 0); + break; + } + + /* Add the pointer. */ + if (cnt >= max) + { + struct element_t **newp; + max += 10; + newp = (struct element_t **) + alloca (max * sizeof (struct element_t *)); + memcpy (newp, weights, cnt * sizeof (struct element_t *)); + weights = newp; + } + weights[cnt++] = charelem; + } + while (*cp != '\0'); + + /* Now store the information. */ + elem->weights[weight_cnt].w = (struct element_t **) + obstack_alloc (&collate->mempool, + cnt * sizeof (struct element_t *)); + memcpy (elem->weights[weight_cnt].w, weights, + cnt * sizeof (struct element_t *)); + elem->weights[weight_cnt].cnt = cnt; + + /* We don't need the string anymore. */ + free (arg->val.str.startmb); + } + else if (ellipsis != tok_none + && (arg->tok == tok_ellipsis2 + || arg->tok == tok_ellipsis3 + || arg->tok == tok_ellipsis4)) + { + /* It must be the same ellipsis as used in the initial column. */ + if (arg->tok != ellipsis) + lr_error (ldfile, _("\ +%s: weights must use the same ellipsis symbol as the name"), + "LC_COLLATE"); + + /* The weight for this level will depend on the element + iterating over the range. Put a placeholder. */ + elem->weights[weight_cnt].w = (struct element_t **) + obstack_alloc (&collate->mempool, sizeof (struct element_t *)); + elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2; + elem->weights[weight_cnt].cnt = 1; + } + else + { + syntax: + /* It's a syntax error. */ + lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE"); + lr_ignore_rest (ldfile, 0); + break; + } + + arg = lr_token (ldfile, charmap, result, repertoire, verbose); + /* This better should be the end of the line or a semicolon. */ + if (arg->tok == tok_semicolon) + /* OK, ignore this and read the next token. */ + arg = lr_token (ldfile, charmap, result, repertoire, verbose); + else if (arg->tok != tok_eof && arg->tok != tok_eol) + { + /* It's a syntax error. */ + lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE"); + lr_ignore_rest (ldfile, 0); + break; + } + } + while (++weight_cnt < nrules); + + if (weight_cnt < nrules) + { + /* This means the rest of the line uses the current element as + the weight. */ + do + { + elem->weights[weight_cnt].w = (struct element_t **) + obstack_alloc (&collate->mempool, sizeof (struct element_t *)); + if (ellipsis == tok_none) + elem->weights[weight_cnt].w[0] = elem; + else + elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2; + elem->weights[weight_cnt].cnt = 1; + } + while (++weight_cnt < nrules); + } + else + { + if (arg->tok == tok_ignore || arg->tok == tok_bsymbol) + { + /* Too many rule values. */ + lr_error (ldfile, _("%s: too many values"), "LC_COLLATE"); + lr_ignore_rest (ldfile, 0); + } + else + lr_ignore_rest (ldfile, arg->tok != tok_eol && arg->tok != tok_eof); + } +} + + +static int +insert_value (struct linereader *ldfile, const char *symstr, size_t symlen, + const struct charmap_t *charmap, struct repertoire_t *repertoire, + struct localedef_t *result) +{ + /* First find out what kind of symbol this is. */ + struct charseq *seq; + uint32_t wc; + struct element_t *elem = NULL; + struct locale_collate_t *collate = result->categories[LC_COLLATE].collate; + + /* Try to find the character in the charmap. */ + seq = charmap_find_value (charmap, symstr, symlen); + + /* Determine the wide character. */ + if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE) + { + wc = repertoire_find_value (repertoire, symstr, symlen); + if (seq != NULL) + seq->ucs4 = wc; + } + else + wc = seq->ucs4; + + if (wc == ILLEGAL_CHAR_VALUE && seq == NULL) + { + /* It's no character, so look through the collation elements and + symbol list. */ + void *ptr = elem; + if (find_entry (&collate->elem_table, symstr, symlen, &ptr) != 0) + { + void *result; + struct symbol_t *sym = NULL; + + /* It's also collation element. Therefore it's either a + collating symbol or it's a character which is not + supported by the character set. In the later case we + simply create a dummy entry. */ + if (find_entry (&collate->sym_table, symstr, symlen, &result) == 0) + { + /* It's a collation symbol. */ + sym = (struct symbol_t *) result; + + elem = sym->order; + } + + if (elem == NULL) + { + elem = new_element (collate, NULL, 0, NULL, symstr, symlen, 0); + + if (sym != NULL) + sym->order = elem; + else + /* Enter a fake element in the sequence table. This + won't cause anything in the output since there is + no multibyte or wide character associated with + it. */ + insert_entry (&collate->seq_table, symstr, symlen, elem); + } + } + else + /* Copy the result back. */ + elem = ptr; + } + else + { + /* Otherwise the symbols stands for a character. */ + void *ptr = elem; + if (find_entry (&collate->seq_table, symstr, symlen, &ptr) != 0) + { + uint32_t wcs[2] = { wc, 0 }; + + /* We have to allocate an entry. */ + elem = new_element (collate, + seq != NULL ? (char *) seq->bytes : NULL, + seq != NULL ? seq->nbytes : 0, + wc == ILLEGAL_CHAR_VALUE ? NULL : wcs, + symstr, symlen, 1); + + /* And add it to the table. */ + if (insert_entry (&collate->seq_table, symstr, symlen, elem) != 0) + /* This cannot happen. */ + assert (! "Internal error"); + } + else + { + /* Copy the result back. */ + elem = ptr; + + /* Maybe the character was used before the definition. In this case + we have to insert the byte sequences now. */ + if (elem->mbs == NULL && seq != NULL) + { + elem->mbs = obstack_copy0 (&collate->mempool, + seq->bytes, seq->nbytes); + elem->nmbs = seq->nbytes; + } + + if (elem->wcs == NULL && wc != ILLEGAL_CHAR_VALUE) + { + uint32_t wcs[2] = { wc, 0 }; + + elem->wcs = obstack_copy (&collate->mempool, wcs, sizeof (wcs)); + elem->nwcs = 1; + } + } + } + + /* Test whether this element is not already in the list. */ + if (elem->next != NULL || elem == collate->cursor) + { + lr_error (ldfile, _("order for `%.*s' already defined at %s:%Zu"), + (int) symlen, symstr, elem->file, elem->line); + lr_ignore_rest (ldfile, 0); + return 1; + } + + insert_weights (ldfile, elem, charmap, repertoire, result, tok_none); + + return 0; +} + + +static void +handle_ellipsis (struct linereader *ldfile, const char *symstr, size_t symlen, + enum token_t ellipsis, const struct charmap_t *charmap, + struct repertoire_t *repertoire, + struct localedef_t *result) +{ + struct element_t *startp; + struct element_t *endp; + struct locale_collate_t *collate = result->categories[LC_COLLATE].collate; + + /* Unlink the entry added for the ellipsis. */ + unlink_element (collate); + startp = collate->cursor; + + /* Process and add the end-entry. */ + if (symstr != NULL + && insert_value (ldfile, symstr, symlen, charmap, repertoire, result)) + /* Something went wrong with inserting the to-value. This means + we cannot process the ellipsis. */ + return; + + /* Reset the cursor. */ + collate->cursor = startp; + + /* Now we have to handle many different situations: + - we have to distinguish between the three different ellipsis forms + - the is the ellipsis at the beginning, in the middle, or at the end. + */ + endp = collate->cursor->next; + assert (symstr == NULL || endp != NULL); + + /* XXX The following is probably very wrong since also collating symbols + can appear in ranges. But do we want/can refine the test for that? */ +#if 0 + /* Both, the start and the end symbol, must stand for characters. */ + if ((startp != NULL && (startp->name == NULL || ! startp->is_character)) + || (endp != NULL && (endp->name == NULL|| ! endp->is_character))) + { + lr_error (ldfile, _("\ +%s: the start and the end symbol of a range must stand for characters"), + "LC_COLLATE"); + return; + } +#endif + + if (ellipsis == tok_ellipsis3) + { + /* One requirement we make here: the length of the byte + sequences for the first and end character must be the same. + This is mainly to prevent unwanted effects and this is often + not what is wanted. */ + size_t len = (startp->mbs != NULL ? startp->nmbs + : (endp->mbs != NULL ? endp->nmbs : 0)); + char mbcnt[len + 1]; + char mbend[len + 1]; + + /* Well, this should be caught somewhere else already. Just to + make sure. */ + assert (startp == NULL || startp->wcs == NULL || startp->wcs[1] == 0); + assert (endp == NULL || endp->wcs == NULL || endp->wcs[1] == 0); + + if (startp != NULL && endp != NULL + && startp->mbs != NULL && endp->mbs != NULL + && startp->nmbs != endp->nmbs) + { + lr_error (ldfile, _("\ +%s: byte sequences of first and last character must have the same length"), + "LC_COLLATE"); + return; + } + + /* Determine whether we have to generate multibyte sequences. */ + if ((startp == NULL || startp->mbs != NULL) + && (endp == NULL || endp->mbs != NULL)) + { + int cnt; + int ret; + + /* Prepare the beginning byte sequence. This is either from the + beginning byte sequence or it is all nulls if it was an + initial ellipsis. */ + if (startp == NULL || startp->mbs == NULL) + memset (mbcnt, '\0', len); + else + { + memcpy (mbcnt, startp->mbs, len); + + /* And increment it so that the value is the first one we will + try to insert. */ + for (cnt = len - 1; cnt >= 0; --cnt) + if (++mbcnt[cnt] != '\0') + break; + } + mbcnt[len] = '\0'; + + /* And the end sequence. */ + if (endp == NULL || endp->mbs == NULL) + memset (mbend, '\0', len); + else + memcpy (mbend, endp->mbs, len); + mbend[len] = '\0'; + + /* Test whether we have a correct range. */ + ret = memcmp (mbcnt, mbend, len); + if (ret >= 0) + { + if (ret > 0) + lr_error (ldfile, _("%s: byte sequence of first character of \ +range is not lower than that of the last character"), "LC_COLLATE"); + return; + } + + /* Generate the byte sequences data. */ + while (1) + { + struct charseq *seq; + + /* Quite a bit of work ahead. We have to find the character + definition for the byte sequence and then determine the + wide character belonging to it. */ + seq = charmap_find_symbol (charmap, mbcnt, len); + if (seq != NULL) + { + struct element_t *elem; + size_t namelen; + + /* I don't think this can ever happen. */ + assert (seq->name != NULL); + namelen = strlen (seq->name); + + if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE) + seq->ucs4 = repertoire_find_value (repertoire, seq->name, + namelen); + + /* Now we are ready to insert the new value in the + sequence. Find out whether the element is + already known. */ + void *ptr; + if (find_entry (&collate->seq_table, seq->name, namelen, + &ptr) != 0) + { + uint32_t wcs[2] = { seq->ucs4, 0 }; + + /* We have to allocate an entry. */ + elem = new_element (collate, mbcnt, len, + seq->ucs4 == ILLEGAL_CHAR_VALUE + ? NULL : wcs, seq->name, + namelen, 1); + + /* And add it to the table. */ + if (insert_entry (&collate->seq_table, seq->name, + namelen, elem) != 0) + /* This cannot happen. */ + assert (! "Internal error"); + } + else + /* Copy the result. */ + elem = ptr; + + /* Test whether this element is not already in the list. */ + if (elem->next != NULL || (collate->cursor != NULL + && elem->next == collate->cursor)) + { + lr_error (ldfile, _("\ +order for `%.*s' already defined at %s:%Zu"), + (int) namelen, seq->name, + elem->file, elem->line); + goto increment; + } + + /* Enqueue the new element. */ + elem->last = collate->cursor; + if (collate->cursor == NULL) + elem->next = NULL; + else + { + elem->next = collate->cursor->next; + elem->last->next = elem; + if (elem->next != NULL) + elem->next->last = elem; + } + if (collate->start == NULL) + { + assert (collate->cursor == NULL); + collate->start = elem; + } + collate->cursor = elem; + + /* Add the weight value. We take them from the + `ellipsis_weights' member of `collate'. */ + elem->weights = (struct element_list_t *) + obstack_alloc (&collate->mempool, + nrules * sizeof (struct element_list_t)); + for (cnt = 0; cnt < nrules; ++cnt) + if (collate->ellipsis_weight.weights[cnt].cnt == 1 + && (collate->ellipsis_weight.weights[cnt].w[0] + == ELEMENT_ELLIPSIS2)) + { + elem->weights[cnt].w = (struct element_t **) + obstack_alloc (&collate->mempool, + sizeof (struct element_t *)); + elem->weights[cnt].w[0] = elem; + elem->weights[cnt].cnt = 1; + } + else + { + /* Simply use the weight from `ellipsis_weight'. */ + elem->weights[cnt].w = + collate->ellipsis_weight.weights[cnt].w; + elem->weights[cnt].cnt = + collate->ellipsis_weight.weights[cnt].cnt; + } + } + + /* Increment for the next round. */ + increment: + for (cnt = len - 1; cnt >= 0; --cnt) + if (++mbcnt[cnt] != '\0') + break; + + /* Find out whether this was all. */ + if (cnt < 0 || memcmp (mbcnt, mbend, len) >= 0) + /* Yep, that's all. */ + break; + } + } + } + else + { + /* For symbolic range we naturally must have a beginning and an + end specified by the user. */ + if (startp == NULL) + lr_error (ldfile, _("\ +%s: symbolic range ellipsis must not directly follow `order_start'"), + "LC_COLLATE"); + else if (endp == NULL) + lr_error (ldfile, _("\ +%s: symbolic range ellipsis must not be directly followed by `order_end'"), + "LC_COLLATE"); + else + { + /* Determine the range. To do so we have to determine the + common prefix of the both names and then the numeric + values of both ends. */ + size_t lenfrom = strlen (startp->name); + size_t lento = strlen (endp->name); + char buf[lento + 1]; + int preflen = 0; + long int from; + long int to; + char *cp; + int base = ellipsis == tok_ellipsis2 ? 16 : 10; + + if (lenfrom != lento) + { + invalid_range: + lr_error (ldfile, _("\ +`%s' and `%.*s' are not valid names for symbolic range"), + startp->name, (int) lento, endp->name); + return; + } + + while (startp->name[preflen] == endp->name[preflen]) + if (startp->name[preflen] == '\0') + /* Nothing to be done. The start and end point are identical + and while inserting the end point we have already given + the user an error message. */ + return; + else + ++preflen; + + errno = 0; + from = strtol (startp->name + preflen, &cp, base); + if ((from == UINT_MAX && errno == ERANGE) || *cp != '\0') + goto invalid_range; + + errno = 0; + to = strtol (endp->name + preflen, &cp, base); + if ((to == UINT_MAX && errno == ERANGE) || *cp != '\0') + goto invalid_range; + + /* Copy the prefix. */ + memcpy (buf, startp->name, preflen); + + /* Loop over all values. */ + for (++from; from < to; ++from) + { + struct element_t *elem = NULL; + struct charseq *seq; + uint32_t wc; + int cnt; + + /* Generate the name. */ + sprintf (buf + preflen, base == 10 ? "%0*ld" : "%0*lX", + (int) (lenfrom - preflen), from); + + /* Look whether this name is already defined. */ + void *ptr; + if (find_entry (&collate->seq_table, buf, symlen, &ptr) == 0) + { + /* Copy back the result. */ + elem = ptr; + + if (elem->next != NULL || (collate->cursor != NULL + && elem->next == collate->cursor)) + { + lr_error (ldfile, _("\ +%s: order for `%.*s' already defined at %s:%Zu"), + "LC_COLLATE", (int) lenfrom, buf, + elem->file, elem->line); + continue; + } + + if (elem->name == NULL) + { + lr_error (ldfile, _("%s: `%s' must be a character"), + "LC_COLLATE", buf); + continue; + } + } + + if (elem == NULL || (elem->mbs == NULL && elem->wcs == NULL)) + { + /* Search for a character of this name. */ + seq = charmap_find_value (charmap, buf, lenfrom); + if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE) + { + wc = repertoire_find_value (repertoire, buf, lenfrom); + + if (seq != NULL) + seq->ucs4 = wc; + } + else + wc = seq->ucs4; + + if (wc == ILLEGAL_CHAR_VALUE && seq == NULL) + /* We don't know anything about a character with this + name. XXX Should we warn? */ + continue; + + if (elem == NULL) + { + uint32_t wcs[2] = { wc, 0 }; + + /* We have to allocate an entry. */ + elem = new_element (collate, + seq != NULL + ? (char *) seq->bytes : NULL, + seq != NULL ? seq->nbytes : 0, + wc == ILLEGAL_CHAR_VALUE + ? NULL : wcs, buf, lenfrom, 1); + } + else + { + /* Update the element. */ + if (seq != NULL) + { + elem->mbs = obstack_copy0 (&collate->mempool, + seq->bytes, seq->nbytes); + elem->nmbs = seq->nbytes; + } + + if (wc != ILLEGAL_CHAR_VALUE) + { + uint32_t zero = 0; + + obstack_grow (&collate->mempool, + &wc, sizeof (uint32_t)); + obstack_grow (&collate->mempool, + &zero, sizeof (uint32_t)); + elem->wcs = obstack_finish (&collate->mempool); + elem->nwcs = 1; + } + } + + elem->file = ldfile->fname; + elem->line = ldfile->lineno; + elem->section = collate->current_section; + } + + /* Enqueue the new element. */ + elem->last = collate->cursor; + elem->next = collate->cursor->next; + elem->last->next = elem; + if (elem->next != NULL) + elem->next->last = elem; + collate->cursor = elem; + + /* Now add the weights. They come from the `ellipsis_weights' + member of `collate'. */ + elem->weights = (struct element_list_t *) + obstack_alloc (&collate->mempool, + nrules * sizeof (struct element_list_t)); + for (cnt = 0; cnt < nrules; ++cnt) + if (collate->ellipsis_weight.weights[cnt].cnt == 1 + && (collate->ellipsis_weight.weights[cnt].w[0] + == ELEMENT_ELLIPSIS2)) + { + elem->weights[cnt].w = (struct element_t **) + obstack_alloc (&collate->mempool, + sizeof (struct element_t *)); + elem->weights[cnt].w[0] = elem; + elem->weights[cnt].cnt = 1; + } + else + { + /* Simly use the weight from `ellipsis_weight'. */ + elem->weights[cnt].w = + collate->ellipsis_weight.weights[cnt].w; + elem->weights[cnt].cnt = + collate->ellipsis_weight.weights[cnt].cnt; + } + } + } + } +} + + +static void +collate_startup (struct linereader *ldfile, struct localedef_t *locale, + struct localedef_t *copy_locale, int ignore_content) +{ + if (!ignore_content && locale->categories[LC_COLLATE].collate == NULL) + { + struct locale_collate_t *collate; + + if (copy_locale == NULL) + { + collate = locale->categories[LC_COLLATE].collate = + (struct locale_collate_t *) + xcalloc (1, sizeof (struct locale_collate_t)); + + /* Init the various data structures. */ + init_hash (&collate->elem_table, 100); + init_hash (&collate->sym_table, 100); + init_hash (&collate->seq_table, 500); + obstack_init (&collate->mempool); + + collate->col_weight_max = -1; + } + else + /* Reuse the copy_locale's data structures. */ + collate = locale->categories[LC_COLLATE].collate = + copy_locale->categories[LC_COLLATE].collate; + } + + ldfile->translate_strings = 0; + ldfile->return_widestr = 0; +} + + +void +collate_finish (struct localedef_t *locale, const struct charmap_t *charmap) +{ + /* Now is the time when we can assign the individual collation + values for all the symbols. We have possibly different values + for the wide- and the multibyte-character symbols. This is done + since it might make a difference in the encoding if there is in + some cases no multibyte-character but there are wide-characters. + (The other way around it is not important since theencoded + collation value in the wide-character case is 32 bits wide and + therefore requires no encoding). + + The lowest collation value assigned is 2. Zero is reserved for + the NUL byte terminating the strings in the `strxfrm'/`wcsxfrm' + functions and 1 is used to separate the individual passes for the + different rules. + + We also have to construct is list with all the bytes/words which + can come first in a sequence, followed by all the elements which + also start with this byte/word. The order is reverse which has + among others the important effect that longer strings are located + first in the list. This is required for the output data since + the algorithm used in `strcoll' etc depends on this. + + The multibyte case is easy. We simply sort into an array with + 256 elements. */ + struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate; + int mbact[nrules]; + int wcact; + int mbseqact; + int wcseqact; + struct element_t *runp; + int i; + int need_undefined = 0; + struct section_list *sect; + int ruleidx; + int nr_wide_elems = 0; + + if (collate == NULL) + { + /* No data, no check. */ + if (! be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("No definition for %s category found"), + "LC_COLLATE")); + return; + } + + /* If this assertion is hit change the type in `element_t'. */ + assert (nrules <= sizeof (runp->used_in_level) * 8); + + /* Make sure that the `position' rule is used either in all sections + or in none. */ + for (i = 0; i < nrules; ++i) + for (sect = collate->sections; sect != NULL; sect = sect->next) + if (sect != collate->current_section + && sect->rules != NULL + && ((sect->rules[i] & sort_position) + != (collate->current_section->rules[i] & sort_position))) + { + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: `position' must be used for a specific level in all sections or none"), + "LC_COLLATE")); + break; + } + + /* Find out which elements are used at which level. At the same + time we find out whether we have any undefined symbols. */ + runp = collate->start; + while (runp != NULL) + { + if (runp->mbs != NULL) + { + for (i = 0; i < nrules; ++i) + { + int j; + + for (j = 0; j < runp->weights[i].cnt; ++j) + /* A NULL pointer as the weight means IGNORE. */ + if (runp->weights[i].w[j] != NULL) + { + if (runp->weights[i].w[j]->weights == NULL) + { + WITH_CUR_LOCALE (error_at_line (0, 0, runp->file, + runp->line, + _("symbol `%s' not defined"), + runp->weights[i].w[j]->name)); + + need_undefined = 1; + runp->weights[i].w[j] = &collate->undefined; + } + else + /* Set the bit for the level. */ + runp->weights[i].w[j]->used_in_level |= 1 << i; + } + } + } + + /* Up to the next entry. */ + runp = runp->next; + } + + /* Walk through the list of defined sequences and assign weights. Also + create the data structure which will allow generating the single byte + character based tables. + + Since at each time only the weights for each of the rules are + only compared to other weights for this rule it is possible to + assign more compact weight values than simply counting all + weights in sequence. We can assign weights from 3, one for each + rule individually and only for those elements, which are actually + used for this rule. + + Why is this important? It is not for the wide char table. But + it is for the singlebyte output since here larger numbers have to + be encoded to make it possible to emit the value as a byte + string. */ + for (i = 0; i < nrules; ++i) + mbact[i] = 2; + wcact = 2; + mbseqact = 0; + wcseqact = 0; + runp = collate->start; + while (runp != NULL) + { + /* Determine the order. */ + if (runp->used_in_level != 0) + { + runp->mborder = (int *) obstack_alloc (&collate->mempool, + nrules * sizeof (int)); + + for (i = 0; i < nrules; ++i) + if ((runp->used_in_level & (1 << i)) != 0) + runp->mborder[i] = mbact[i]++; + else + runp->mborder[i] = 0; + } + + if (runp->mbs != NULL) + { + struct element_t **eptr; + struct element_t *lastp = NULL; + + /* Find the point where to insert in the list. */ + eptr = &collate->mbheads[((unsigned char *) runp->mbs)[0]]; + while (*eptr != NULL) + { + if ((*eptr)->nmbs < runp->nmbs) + break; + + if ((*eptr)->nmbs == runp->nmbs) + { + int c = memcmp ((*eptr)->mbs, runp->mbs, runp->nmbs); + + if (c == 0) + { + /* This should not happen. It means that we have + to symbols with the same byte sequence. It is + of course an error. */ + WITH_CUR_LOCALE (error_at_line (0, 0, (*eptr)->file, + (*eptr)->line, + _("\ +symbol `%s' has the same encoding as"), (*eptr)->name); + error_at_line (0, 0, runp->file, + runp->line, + _("symbol `%s'"), + runp->name)); + goto dont_insert; + } + else if (c < 0) + /* Insert it here. */ + break; + } + + /* To the next entry. */ + lastp = *eptr; + eptr = &(*eptr)->mbnext; + } + + /* Set the pointers. */ + runp->mbnext = *eptr; + runp->mblast = lastp; + if (*eptr != NULL) + (*eptr)->mblast = runp; + *eptr = runp; + dont_insert: + ; + } + + if (runp->used_in_level) + { + runp->wcorder = wcact++; + + /* We take the opportunity to count the elements which have + wide characters. */ + ++nr_wide_elems; + } + + if (runp->is_character) + { + if (runp->nmbs == 1) + collate->mbseqorder[((unsigned char *) runp->mbs)[0]] = mbseqact++; + + runp->wcseqorder = wcseqact++; + } + else if (runp->mbs != NULL && runp->weights != NULL) + /* This is for collation elements. */ + runp->wcseqorder = wcseqact++; + + /* Up to the next entry. */ + runp = runp->next; + } + + /* Find out whether any of the `mbheads' entries is unset. In this + case we use the UNDEFINED entry. */ + for (i = 1; i < 256; ++i) + if (collate->mbheads[i] == NULL) + { + need_undefined = 1; + collate->mbheads[i] = &collate->undefined; + } + + /* Now to the wide character case. */ + collate->wcheads.p = 6; + collate->wcheads.q = 10; + wchead_table_init (&collate->wcheads); + + collate->wcseqorder.p = 6; + collate->wcseqorder.q = 10; + collseq_table_init (&collate->wcseqorder); + + /* Start adding. */ + runp = collate->start; + while (runp != NULL) + { + if (runp->wcs != NULL) + { + struct element_t *e; + struct element_t **eptr; + struct element_t *lastp; + + /* Insert the collation sequence value. */ + if (runp->is_character) + collseq_table_add (&collate->wcseqorder, runp->wcs[0], + runp->wcseqorder); + + /* Find the point where to insert in the list. */ + e = wchead_table_get (&collate->wcheads, runp->wcs[0]); + eptr = &e; + lastp = NULL; + while (*eptr != NULL) + { + if ((*eptr)->nwcs < runp->nwcs) + break; + + if ((*eptr)->nwcs == runp->nwcs) + { + int c = wmemcmp ((wchar_t *) (*eptr)->wcs, + (wchar_t *) runp->wcs, runp->nwcs); + + if (c == 0) + { + /* This should not happen. It means that we have + two symbols with the same byte sequence. It is + of course an error. */ + WITH_CUR_LOCALE (error_at_line (0, 0, (*eptr)->file, + (*eptr)->line, + _("\ +symbol `%s' has the same encoding as"), (*eptr)->name); + error_at_line (0, 0, runp->file, + runp->line, + _("symbol `%s'"), + runp->name)); + goto dont_insertwc; + } + else if (c < 0) + /* Insert it here. */ + break; + } + + /* To the next entry. */ + lastp = *eptr; + eptr = &(*eptr)->wcnext; + } + + /* Set the pointers. */ + runp->wcnext = *eptr; + runp->wclast = lastp; + if (*eptr != NULL) + (*eptr)->wclast = runp; + *eptr = runp; + if (eptr == &e) + wchead_table_add (&collate->wcheads, runp->wcs[0], e); + dont_insertwc: + ; + } + + /* Up to the next entry. */ + runp = runp->next; + } + + /* Now determine whether the UNDEFINED entry is needed and if yes, + whether it was defined. */ + collate->undefined.used_in_level = need_undefined ? ~0ul : 0; + if (collate->undefined.file == NULL) + { + if (need_undefined) + { + /* This seems not to be enforced by recent standards. Don't + emit an error, simply append UNDEFINED at the end. */ + if (0) + WITH_CUR_LOCALE (error (0, 0, _("no definition of `UNDEFINED'"))); + + /* Add UNDEFINED at the end. */ + collate->undefined.mborder = + (int *) obstack_alloc (&collate->mempool, nrules * sizeof (int)); + + for (i = 0; i < nrules; ++i) + collate->undefined.mborder[i] = mbact[i]++; + } + + /* In any case we will need the definition for the wide character + case. But we will not complain that it is missing since the + specification strangely enough does not seem to account for + this. */ + collate->undefined.wcorder = wcact++; + } + + /* Finally, try to unify the rules for the sections. Whenever the rules + for a section are the same as those for another section give the + ruleset the same index. Since there are never many section we can + use an O(n^2) algorithm here. */ + sect = collate->sections; + while (sect != NULL && sect->rules == NULL) + sect = sect->next; + + /* Bail out if we have no sections because of earlier errors. */ + if (sect == NULL) + { + WITH_CUR_LOCALE (error (EXIT_FAILURE, 0, + _("too many errors; giving up"))); + return; + } + + ruleidx = 0; + do + { + struct section_list *osect = collate->sections; + + while (osect != sect) + if (osect->rules != NULL + && memcmp (osect->rules, sect->rules, + nrules * sizeof (osect->rules[0])) == 0) + break; + else + osect = osect->next; + + if (osect == sect) + sect->ruleidx = ruleidx++; + else + sect->ruleidx = osect->ruleidx; + + /* Next section. */ + do + sect = sect->next; + while (sect != NULL && sect->rules == NULL); + } + while (sect != NULL); + /* We are currently not prepared for more than 128 rulesets. But this + should never really be a problem. */ + assert (ruleidx <= 128); +} + + +static int32_t +output_weight (struct obstack *pool, struct locale_collate_t *collate, + struct element_t *elem) +{ + size_t cnt; + int32_t retval; + + /* Optimize the use of UNDEFINED. */ + if (elem == &collate->undefined) + /* The weights are already inserted. */ + return 0; + + /* This byte can start exactly one collation element and this is + a single byte. We can directly give the index to the weights. */ + retval = obstack_object_size (pool); + + /* Construct the weight. */ + for (cnt = 0; cnt < nrules; ++cnt) + { + char buf[elem->weights[cnt].cnt * 7]; + int len = 0; + int i; + + for (i = 0; i < elem->weights[cnt].cnt; ++i) + /* Encode the weight value. We do nothing for IGNORE entries. */ + if (elem->weights[cnt].w[i] != NULL) + len += utf8_encode (&buf[len], + elem->weights[cnt].w[i]->mborder[cnt]); + + /* And add the buffer content. */ + obstack_1grow (pool, len); + obstack_grow (pool, buf, len); + } + + return retval | ((elem->section->ruleidx & 0x7f) << 24); +} + + +static int32_t +output_weightwc (struct obstack *pool, struct locale_collate_t *collate, + struct element_t *elem) +{ + size_t cnt; + int32_t retval; + + /* Optimize the use of UNDEFINED. */ + if (elem == &collate->undefined) + /* The weights are already inserted. */ + return 0; + + /* This byte can start exactly one collation element and this is + a single byte. We can directly give the index to the weights. */ + retval = obstack_object_size (pool) / sizeof (int32_t); + + /* Construct the weight. */ + for (cnt = 0; cnt < nrules; ++cnt) + { + int32_t buf[elem->weights[cnt].cnt]; + int i; + int32_t j; + + for (i = 0, j = 0; i < elem->weights[cnt].cnt; ++i) + if (elem->weights[cnt].w[i] != NULL) + buf[j++] = elem->weights[cnt].w[i]->wcorder; + + /* And add the buffer content. */ + obstack_int32_grow (pool, j); + + obstack_grow (pool, buf, j * sizeof (int32_t)); + maybe_swap_uint32_obstack (pool, j); + } + + return retval | ((elem->section->ruleidx & 0x7f) << 24); +} + +/* If localedef is every threaded, this would need to be __thread var. */ +static struct +{ + struct obstack *weightpool; + struct obstack *extrapool; + struct obstack *indpool; + struct locale_collate_t *collate; + struct collidx_table *tablewc; +} atwc; + +static void add_to_tablewc (uint32_t ch, struct element_t *runp); + +static void +add_to_tablewc (uint32_t ch, struct element_t *runp) +{ + if (runp->wcnext == NULL && runp->nwcs == 1) + { + int32_t weigthidx = output_weightwc (atwc.weightpool, atwc.collate, + runp); + collidx_table_add (atwc.tablewc, ch, weigthidx); + } + else + { + /* As for the singlebyte table, we recognize sequences and + compress them. */ + + collidx_table_add (atwc.tablewc, ch, + -(obstack_object_size (atwc.extrapool) + / sizeof (uint32_t))); + + do + { + /* Store the current index in the weight table. We know that + the current position in the `extrapool' is aligned on a + 32-bit address. */ + int32_t weightidx; + int added; + + /* Find out wether this is a single entry or we have more than + one consecutive entry. */ + if (runp->wcnext != NULL + && runp->nwcs == runp->wcnext->nwcs + && wmemcmp ((wchar_t *) runp->wcs, + (wchar_t *)runp->wcnext->wcs, + runp->nwcs - 1) == 0 + && (runp->wcs[runp->nwcs - 1] + == runp->wcnext->wcs[runp->nwcs - 1] + 1)) + { + int i; + struct element_t *series_startp = runp; + struct element_t *curp; + + /* Now add first the initial byte sequence. */ + added = (1 + 1 + 2 * (runp->nwcs - 1)) * sizeof (int32_t); + if (sizeof (int32_t) == sizeof (int)) + obstack_make_room (atwc.extrapool, added); + + /* More than one consecutive entry. We mark this by having + a negative index into the indirect table. */ + obstack_int32_grow_fast (atwc.extrapool, + -(obstack_object_size (atwc.indpool) + / sizeof (int32_t))); + obstack_int32_grow_fast (atwc.extrapool, runp->nwcs - 1); + + do + runp = runp->wcnext; + while (runp->wcnext != NULL + && runp->nwcs == runp->wcnext->nwcs + && wmemcmp ((wchar_t *) runp->wcs, + (wchar_t *)runp->wcnext->wcs, + runp->nwcs - 1) == 0 + && (runp->wcs[runp->nwcs - 1] + == runp->wcnext->wcs[runp->nwcs - 1] + 1)); + + /* Now walk backward from here to the beginning. */ + curp = runp; + + for (i = 1; i < runp->nwcs; ++i) + obstack_int32_grow_fast (atwc.extrapool, curp->wcs[i]); + + /* Now find the end of the consecutive sequence and + add all the indeces in the indirect pool. */ + do + { + weightidx = output_weightwc (atwc.weightpool, atwc.collate, + curp); + obstack_int32_grow (atwc.indpool, weightidx); + + curp = curp->wclast; + } + while (curp != series_startp); + + /* Add the final weight. */ + weightidx = output_weightwc (atwc.weightpool, atwc.collate, + curp); + obstack_int32_grow (atwc.indpool, weightidx); + + /* And add the end byte sequence. Without length this + time. */ + for (i = 1; i < curp->nwcs; ++i) + obstack_int32_grow (atwc.extrapool, curp->wcs[i]); + } + else + { + /* A single entry. Simply add the index and the length and + string (except for the first character which is already + tested for). */ + int i; + + /* Output the weight info. */ + weightidx = output_weightwc (atwc.weightpool, atwc.collate, + runp); + + assert (runp->nwcs > 0); + added = (1 + 1 + runp->nwcs - 1) * sizeof (int32_t); + if (sizeof (int) == sizeof (int32_t)) + obstack_make_room (atwc.extrapool, added); + + obstack_int32_grow_fast (atwc.extrapool, weightidx); + obstack_int32_grow_fast (atwc.extrapool, runp->nwcs - 1); + for (i = 1; i < runp->nwcs; ++i) + obstack_int32_grow_fast (atwc.extrapool, runp->wcs[i]); + } + + /* Next entry. */ + runp = runp->wcnext; + } + while (runp != NULL); + } +} + +void +collate_output (struct localedef_t *locale, const struct charmap_t *charmap, + const char *output_path) +{ + struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate; + const size_t nelems = _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE); + struct locale_file file; + size_t ch; + int32_t tablemb[256]; + struct obstack weightpool; + struct obstack extrapool; + struct obstack indirectpool; + struct section_list *sect; + struct collidx_table tablewc; + uint32_t elem_size; + uint32_t *elem_table; + int i; + struct element_t *runp; + + init_locale_data (&file, nelems); + add_locale_uint32 (&file, nrules); + + /* If we have no LC_COLLATE data emit only the number of rules as zero. */ + if (collate == NULL) + { + size_t idx; + for (idx = 1; idx < nelems; idx++) + { + /* The words have to be handled specially. */ + if (idx == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB)) + add_locale_uint32 (&file, 0); + else + add_locale_empty (&file); + } + write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", &file); + return; + } + + obstack_init (&weightpool); + obstack_init (&extrapool); + obstack_init (&indirectpool); + + /* Since we are using the sign of an integer to mark indirection the + offsets in the arrays we are indirectly referring to must not be + zero since -0 == 0. Therefore we add a bit of dummy content. */ + obstack_int32_grow (&extrapool, 0); + obstack_int32_grow (&indirectpool, 0); + + /* Prepare the ruleset table. */ + for (sect = collate->sections, i = 0; sect != NULL; sect = sect->next) + if (sect->rules != NULL && sect->ruleidx == i) + { + int j; + + obstack_make_room (&weightpool, nrules); + + for (j = 0; j < nrules; ++j) + obstack_1grow_fast (&weightpool, sect->rules[j]); + ++i; + } + /* And align the output. */ + i = (nrules * i) % LOCFILE_ALIGN; + if (i > 0) + do + obstack_1grow (&weightpool, '\0'); + while (++i < LOCFILE_ALIGN); + + add_locale_raw_obstack (&file, &weightpool); + + /* Generate the 8-bit table. Walk through the lists of sequences + starting with the same byte and add them one after the other to + the table. In case we have more than one sequence starting with + the same byte we have to use extra indirection. + + First add a record for the NUL byte. This entry will never be used + so it does not matter. */ + tablemb[0] = 0; + + /* Now insert the `UNDEFINED' value if it is used. Since this value + will probably be used more than once it is good to store the + weights only once. */ + if (collate->undefined.used_in_level != 0) + output_weight (&weightpool, collate, &collate->undefined); + + for (ch = 1; ch < 256; ++ch) + if (collate->mbheads[ch]->mbnext == NULL + && collate->mbheads[ch]->nmbs <= 1) + { + tablemb[ch] = output_weight (&weightpool, collate, + collate->mbheads[ch]); + } + else + { + /* The entries in the list are sorted by length and then + alphabetically. This is the order in which we will add the + elements to the collation table. This allows simply walking + the table in sequence and stopping at the first matching + entry. Since the longer sequences are coming first in the + list they have the possibility to match first, just as it + has to be. In the worst case we are walking to the end of + the list where we put, if no singlebyte sequence is defined + in the locale definition, the weights for UNDEFINED. + + To reduce the length of the search list we compress them a bit. + This happens by collecting sequences of consecutive byte + sequences in one entry (having and begin and end byte sequence) + and add only one index into the weight table. We can find the + consecutive entries since they are also consecutive in the list. */ + struct element_t *runp = collate->mbheads[ch]; + struct element_t *lastp; + + assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool))); + + tablemb[ch] = -obstack_object_size (&extrapool); + + do + { + /* Store the current index in the weight table. We know that + the current position in the `extrapool' is aligned on a + 32-bit address. */ + int32_t weightidx; + int added; + + /* Find out wether this is a single entry or we have more than + one consecutive entry. */ + if (runp->mbnext != NULL + && runp->nmbs == runp->mbnext->nmbs + && memcmp (runp->mbs, runp->mbnext->mbs, runp->nmbs - 1) == 0 + && (runp->mbs[runp->nmbs - 1] + == runp->mbnext->mbs[runp->nmbs - 1] + 1)) + { + int i; + struct element_t *series_startp = runp; + struct element_t *curp; + + /* Compute how much space we will need. */ + added = LOCFILE_ALIGN_UP (sizeof (int32_t) + 1 + + 2 * (runp->nmbs - 1)); + assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool))); + obstack_make_room (&extrapool, added); + + /* More than one consecutive entry. We mark this by having + a negative index into the indirect table. */ + obstack_int32_grow_fast (&extrapool, + -(obstack_object_size (&indirectpool) + / sizeof (int32_t))); + + /* Now search first the end of the series. */ + do + runp = runp->mbnext; + while (runp->mbnext != NULL + && runp->nmbs == runp->mbnext->nmbs + && memcmp (runp->mbs, runp->mbnext->mbs, + runp->nmbs - 1) == 0 + && (runp->mbs[runp->nmbs - 1] + == runp->mbnext->mbs[runp->nmbs - 1] + 1)); + + /* Now walk backward from here to the beginning. */ + curp = runp; + + assert (runp->nmbs <= 256); + obstack_1grow_fast (&extrapool, curp->nmbs - 1); + for (i = 1; i < curp->nmbs; ++i) + obstack_1grow_fast (&extrapool, curp->mbs[i]); + + /* Now find the end of the consecutive sequence and + add all the indeces in the indirect pool. */ + do + { + weightidx = output_weight (&weightpool, collate, curp); + obstack_int32_grow (&indirectpool, weightidx); + + curp = curp->mblast; + } + while (curp != series_startp); + + /* Add the final weight. */ + weightidx = output_weight (&weightpool, collate, curp); + obstack_int32_grow (&indirectpool, weightidx); + + /* And add the end byte sequence. Without length this + time. */ + for (i = 1; i < curp->nmbs; ++i) + obstack_1grow_fast (&extrapool, curp->mbs[i]); + } + else + { + /* A single entry. Simply add the index and the length and + string (except for the first character which is already + tested for). */ + int i; + + /* Output the weight info. */ + weightidx = output_weight (&weightpool, collate, runp); + + added = LOCFILE_ALIGN_UP (sizeof (int32_t) + 1 + + runp->nmbs - 1); + assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool))); + obstack_make_room (&extrapool, added); + + obstack_int32_grow_fast (&extrapool, weightidx); + assert (runp->nmbs <= 256); + obstack_1grow_fast (&extrapool, runp->nmbs - 1); + + for (i = 1; i < runp->nmbs; ++i) + obstack_1grow_fast (&extrapool, runp->mbs[i]); + } + + /* Add alignment bytes if necessary. */ + while (!LOCFILE_ALIGNED_P (obstack_object_size (&extrapool))) + obstack_1grow_fast (&extrapool, '\0'); + + /* Next entry. */ + lastp = runp; + runp = runp->mbnext; + } + while (runp != NULL); + + assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool))); + + /* If the final entry in the list is not a single character we + add an UNDEFINED entry here. */ + if (lastp->nmbs != 1) + { + int added = LOCFILE_ALIGN_UP (sizeof (int32_t) + 1 + 1); + obstack_make_room (&extrapool, added); + + obstack_int32_grow_fast (&extrapool, 0); + /* XXX What rule? We just pick the first. */ + obstack_1grow_fast (&extrapool, 0); + /* Length is zero. */ + obstack_1grow_fast (&extrapool, 0); + + /* Add alignment bytes if necessary. */ + while (!LOCFILE_ALIGNED_P (obstack_object_size (&extrapool))) + obstack_1grow_fast (&extrapool, '\0'); + } + } + + /* Add padding to the tables if necessary. */ + while (!LOCFILE_ALIGNED_P (obstack_object_size (&weightpool))) + obstack_1grow (&weightpool, 0); + + /* Now add the four tables. */ + add_locale_uint32_array (&file, (const uint32_t *) tablemb, 256); + add_locale_raw_obstack (&file, &weightpool); + add_locale_raw_obstack (&file, &extrapool); + add_locale_raw_obstack (&file, &indirectpool); + + /* Now the same for the wide character table. We need to store some + more information here. */ + add_locale_empty (&file); + add_locale_empty (&file); + add_locale_empty (&file); + + /* Since we are using the sign of an integer to mark indirection the + offsets in the arrays we are indirectly referring to must not be + zero since -0 == 0. Therefore we add a bit of dummy content. */ + obstack_int32_grow (&extrapool, 0); + obstack_int32_grow (&indirectpool, 0); + + /* Now insert the `UNDEFINED' value if it is used. Since this value + will probably be used more than once it is good to store the + weights only once. */ + if (output_weightwc (&weightpool, collate, &collate->undefined) != 0) + abort (); + + /* Generate the table. Walk through the lists of sequences starting + with the same wide character and add them one after the other to + the table. In case we have more than one sequence starting with + the same byte we have to use extra indirection. */ + tablewc.p = 6; + tablewc.q = 10; + collidx_table_init (&tablewc); + + atwc.weightpool = &weightpool; + atwc.extrapool = &extrapool; + atwc.indpool = &indirectpool; + atwc.collate = collate; + atwc.tablewc = &tablewc; + + wchead_table_iterate (&collate->wcheads, add_to_tablewc); + + memset (&atwc, 0, sizeof (atwc)); + + /* Now add the four tables. */ + add_locale_collidx_table (&file, &tablewc); + add_locale_raw_obstack (&file, &weightpool); + add_locale_raw_obstack (&file, &extrapool); + add_locale_raw_obstack (&file, &indirectpool); + + /* Finally write the table with collation element names out. It is + a hash table with a simple function which gets the name of the + character as the input. One character might have many names. The + value associated with the name is an index into the weight table + where we are then interested in the first-level weight value. + + To determine how large the table should be we are counting the + elements have to put in. Since we are using internal chaining + using a secondary hash function we have to make the table a bit + larger to avoid extremely long search times. We can achieve + good results with a 40% larger table than there are entries. */ + elem_size = 0; + runp = collate->start; + while (runp != NULL) + { + if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character) + /* Yep, the element really counts. */ + ++elem_size; + + runp = runp->next; + } + /* Add 40% and find the next prime number. */ + elem_size = next_prime (elem_size * 1.4); + + /* Allocate the table. Each entry consists of two words: the hash + value and an index in a secondary table which provides the index + into the weight table and the string itself (so that a match can + be determined). */ + elem_table = (uint32_t *) obstack_alloc (&extrapool, + elem_size * 2 * sizeof (uint32_t)); + memset (elem_table, '\0', elem_size * 2 * sizeof (uint32_t)); + + /* Now add the elements. */ + runp = collate->start; + while (runp != NULL) + { + if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character) + { + /* Compute the hash value of the name. */ + uint32_t namelen = strlen (runp->name); + uint32_t hash = elem_hash (runp->name, namelen); + size_t idx = hash % elem_size; +#ifndef NDEBUG + size_t start_idx = idx; +#endif + + if (elem_table[idx * 2] != 0) + { + /* The spot is already taken. Try iterating using the value + from the secondary hashing function. */ + size_t iter = hash % (elem_size - 2) + 1; + + do + { + idx += iter; + if (idx >= elem_size) + idx -= elem_size; + assert (idx != start_idx); + } + while (elem_table[idx * 2] != 0); + } + /* This is the spot where we will insert the value. */ + elem_table[idx * 2] = hash; + elem_table[idx * 2 + 1] = obstack_object_size (&extrapool); + + /* The string itself including length. */ + obstack_1grow (&extrapool, namelen); + obstack_grow (&extrapool, runp->name, namelen); + + /* And the multibyte representation. */ + obstack_1grow (&extrapool, runp->nmbs); + obstack_grow (&extrapool, runp->mbs, runp->nmbs); + + /* And align again to 32 bits. */ + if ((1 + namelen + 1 + runp->nmbs) % sizeof (int32_t) != 0) + obstack_grow (&extrapool, "\0\0", + (sizeof (int32_t) + - ((1 + namelen + 1 + runp->nmbs) + % sizeof (int32_t)))); + + /* Now some 32-bit values: multibyte collation sequence, + wide char string (including length), and wide char + collation sequence. */ + obstack_int32_grow (&extrapool, runp->mbseqorder); + + obstack_int32_grow (&extrapool, runp->nwcs); + obstack_grow (&extrapool, runp->wcs, + runp->nwcs * sizeof (uint32_t)); + maybe_swap_uint32_obstack (&extrapool, runp->nwcs); + + obstack_int32_grow (&extrapool, runp->wcseqorder); + } + + runp = runp->next; + } + + /* Prepare to write out this data. */ + add_locale_uint32 (&file, elem_size); + add_locale_uint32_array (&file, elem_table, 2 * elem_size); + add_locale_raw_obstack (&file, &extrapool); + add_locale_raw_data (&file, collate->mbseqorder, 256); + add_locale_collseq_table (&file, &collate->wcseqorder); + add_locale_string (&file, charmap->code_set_name); + write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", &file); + + obstack_free (&weightpool, NULL); + obstack_free (&extrapool, NULL); + obstack_free (&indirectpool, NULL); +} + + +static enum token_t +skip_to (struct linereader *ldfile, struct locale_collate_t *collate, + const struct charmap_t *charmap, int to_endif) +{ + while (1) + { + struct token *now = lr_token (ldfile, charmap, NULL, NULL, 0); + enum token_t nowtok = now->tok; + + if (nowtok == tok_eof || nowtok == tok_end) + return nowtok; + + if (nowtok == tok_ifdef || nowtok == tok_ifndef) + { + lr_error (ldfile, _("%s: nested conditionals not supported"), + "LC_COLLATE"); + nowtok = skip_to (ldfile, collate, charmap, tok_endif); + if (nowtok == tok_eof || nowtok == tok_end) + return nowtok; + } + else if (nowtok == tok_endif || (!to_endif && nowtok == tok_else)) + { + lr_ignore_rest (ldfile, 1); + return nowtok; + } + else if (!to_endif && (nowtok == tok_elifdef || nowtok == tok_elifndef)) + { + /* Do not read the rest of the line. */ + return nowtok; + } + else if (nowtok == tok_else) + { + lr_error (ldfile, _("%s: more than one 'else'"), "LC_COLLATE"); + } + + lr_ignore_rest (ldfile, 0); + } +} + + +void +collate_read (struct linereader *ldfile, struct localedef_t *result, + const struct charmap_t *charmap, const char *repertoire_name, + int ignore_content) +{ + struct repertoire_t *repertoire = NULL; + struct locale_collate_t *collate; + struct token *now; + struct token *arg = NULL; + enum token_t nowtok; + enum token_t was_ellipsis = tok_none; + struct localedef_t *copy_locale = NULL; + /* Parsing state: + 0 - start + 1 - between `order-start' and `order-end' + 2 - after `order-end' + 3 - after `reorder-after', waiting for `reorder-end' + 4 - after `reorder-end' + 5 - after `reorder-sections-after', waiting for `reorder-sections-end' + 6 - after `reorder-sections-end' + */ + int state = 0; + + /* Get the repertoire we have to use. */ + if (repertoire_name != NULL) + repertoire = repertoire_read (repertoire_name); + + /* The rest of the line containing `LC_COLLATE' must be free. */ + lr_ignore_rest (ldfile, 1); + + while (1) + { + do + { + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + } + while (nowtok == tok_eol); + + if (nowtok != tok_define) + break; + + if (ignore_content) + lr_ignore_rest (ldfile, 0); + else + { + arg = lr_token (ldfile, charmap, result, NULL, verbose); + if (arg->tok != tok_ident) + SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE"); + else + { + /* Simply add the new symbol. */ + struct name_list *newsym = xmalloc (sizeof (*newsym) + + arg->val.str.lenmb + 1); + memcpy (newsym->str, arg->val.str.startmb, arg->val.str.lenmb); + newsym->str[arg->val.str.lenmb] = '\0'; + newsym->next = defined; + defined = newsym; + + lr_ignore_rest (ldfile, 1); + } + } + } + + if (nowtok == tok_copy) + { + now = lr_token (ldfile, charmap, result, NULL, verbose); + if (now->tok != tok_string) + { + SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE"); + + skip_category: + do + now = lr_token (ldfile, charmap, result, NULL, verbose); + while (now->tok != tok_eof && now->tok != tok_end); + + if (now->tok != tok_eof + || (now = lr_token (ldfile, charmap, result, NULL, verbose), + now->tok == tok_eof)) + lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE"); + else if (now->tok != tok_lc_collate) + { + lr_error (ldfile, _("\ +%1$s: definition does not end with `END %1$s'"), "LC_COLLATE"); + lr_ignore_rest (ldfile, 0); + } + else + lr_ignore_rest (ldfile, 1); + + return; + } + + if (! ignore_content) + { + /* Get the locale definition. */ + copy_locale = load_locale (LC_COLLATE, now->val.str.startmb, + repertoire_name, charmap, NULL); + if ((copy_locale->avail & COLLATE_LOCALE) == 0) + { + /* Not yet loaded. So do it now. */ + if (locfile_read (copy_locale, charmap) != 0) + goto skip_category; + } + + if (copy_locale->categories[LC_COLLATE].collate == NULL) + return; + } + + lr_ignore_rest (ldfile, 1); + + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + } + + /* Prepare the data structures. */ + collate_startup (ldfile, result, copy_locale, ignore_content); + collate = result->categories[LC_COLLATE].collate; + + while (1) + { + char ucs4buf[10]; + char *symstr; + size_t symlen; + + /* Of course we don't proceed beyond the end of file. */ + if (nowtok == tok_eof) + break; + + /* Ingore empty lines. */ + if (nowtok == tok_eol) + { + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + continue; + } + + switch (nowtok) + { + case tok_copy: + /* Allow copying other locales. */ + now = lr_token (ldfile, charmap, result, NULL, verbose); + if (now->tok != tok_string) + goto err_label; + + if (! ignore_content) + load_locale (LC_COLLATE, now->val.str.startmb, repertoire_name, + charmap, result); + + lr_ignore_rest (ldfile, 1); + break; + + case tok_coll_weight_max: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + if (state != 0) + goto err_label; + + arg = lr_token (ldfile, charmap, result, NULL, verbose); + if (arg->tok != tok_number) + goto err_label; + if (collate->col_weight_max != -1) + lr_error (ldfile, _("%s: duplicate definition of `%s'"), + "LC_COLLATE", "col_weight_max"); + else + collate->col_weight_max = arg->val.num; + lr_ignore_rest (ldfile, 1); + break; + + case tok_section_symbol: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + if (state != 0) + goto err_label; + + arg = lr_token (ldfile, charmap, result, repertoire, verbose); + if (arg->tok != tok_bsymbol) + goto err_label; + else if (!ignore_content) + { + /* Check whether this section is already known. */ + struct section_list *known = collate->sections; + while (known != NULL) + { + if (strcmp (known->name, arg->val.str.startmb) == 0) + break; + known = known->next; + } + + if (known != NULL) + { + lr_error (ldfile, + _("%s: duplicate declaration of section `%s'"), + "LC_COLLATE", arg->val.str.startmb); + free (arg->val.str.startmb); + } + else + collate->sections = make_seclist_elem (collate, + arg->val.str.startmb, + collate->sections); + + lr_ignore_rest (ldfile, known == NULL); + } + else + { + free (arg->val.str.startmb); + lr_ignore_rest (ldfile, 0); + } + break; + + case tok_collating_element: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + if (state != 0 && state != 2) + goto err_label; + + arg = lr_token (ldfile, charmap, result, repertoire, verbose); + if (arg->tok != tok_bsymbol) + goto err_label; + else + { + const char *symbol = arg->val.str.startmb; + size_t symbol_len = arg->val.str.lenmb; + + /* Next the `from' keyword. */ + arg = lr_token (ldfile, charmap, result, repertoire, verbose); + if (arg->tok != tok_from) + { + free ((char *) symbol); + goto err_label; + } + + ldfile->return_widestr = 1; + ldfile->translate_strings = 1; + + /* Finally the string with the replacement. */ + arg = lr_token (ldfile, charmap, result, repertoire, verbose); + + ldfile->return_widestr = 0; + ldfile->translate_strings = 0; + + if (arg->tok != tok_string) + goto err_label; + + if (!ignore_content && symbol != NULL) + { + /* The name is already defined. */ + if (check_duplicate (ldfile, collate, charmap, + repertoire, symbol, symbol_len)) + goto col_elem_free; + + if (arg->val.str.startmb != NULL) + insert_entry (&collate->elem_table, symbol, symbol_len, + new_element (collate, + arg->val.str.startmb, + arg->val.str.lenmb - 1, + arg->val.str.startwc, + symbol, symbol_len, 0)); + } + else + { + col_elem_free: + free ((char *) symbol); + free (arg->val.str.startmb); + free (arg->val.str.startwc); + } + lr_ignore_rest (ldfile, 1); + } + break; + + case tok_collating_symbol: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + if (state != 0 && state != 2) + goto err_label; + + arg = lr_token (ldfile, charmap, result, repertoire, verbose); + if (arg->tok != tok_bsymbol) + goto err_label; + else + { + char *symbol = arg->val.str.startmb; + size_t symbol_len = arg->val.str.lenmb; + char *endsymbol = NULL; + size_t endsymbol_len = 0; + enum token_t ellipsis = tok_none; + + arg = lr_token (ldfile, charmap, result, repertoire, verbose); + if (arg->tok == tok_ellipsis2 || arg->tok == tok_ellipsis4) + { + ellipsis = arg->tok; + + arg = lr_token (ldfile, charmap, result, repertoire, + verbose); + if (arg->tok != tok_bsymbol) + { + free (symbol); + goto err_label; + } + + endsymbol = arg->val.str.startmb; + endsymbol_len = arg->val.str.lenmb; + + lr_ignore_rest (ldfile, 1); + } + else if (arg->tok != tok_eol) + { + free (symbol); + goto err_label; + } + + if (!ignore_content) + { + if (symbol == NULL + || (ellipsis != tok_none && endsymbol == NULL)) + { + lr_error (ldfile, _("\ +%s: unknown character in collating symbol name"), + "LC_COLLATE"); + goto col_sym_free; + } + else if (ellipsis == tok_none) + { + /* A single symbol, no ellipsis. */ + if (check_duplicate (ldfile, collate, charmap, + repertoire, symbol, symbol_len)) + /* The name is already defined. */ + goto col_sym_free; + + insert_entry (&collate->sym_table, symbol, symbol_len, + new_symbol (collate, symbol, symbol_len)); + } + else if (symbol_len != endsymbol_len) + { + col_sym_inv_range: + lr_error (ldfile, + _("invalid names for character range")); + goto col_sym_free; + } + else + { + /* Oh my, we have to handle an ellipsis. First, as + usual, determine the common prefix and then + convert the rest into a range. */ + size_t prefixlen; + unsigned long int from; + unsigned long int to; + char *endp; + + for (prefixlen = 0; prefixlen < symbol_len; ++prefixlen) + if (symbol[prefixlen] != endsymbol[prefixlen]) + break; + + /* Convert the rest into numbers. */ + symbol[symbol_len] = '\0'; + from = strtoul (&symbol[prefixlen], &endp, + ellipsis == tok_ellipsis2 ? 16 : 10); + if (*endp != '\0') + goto col_sym_inv_range; + + endsymbol[symbol_len] = '\0'; + to = strtoul (&endsymbol[prefixlen], &endp, + ellipsis == tok_ellipsis2 ? 16 : 10); + if (*endp != '\0') + goto col_sym_inv_range; + + if (from > to) + goto col_sym_inv_range; + + /* Now loop over all entries. */ + while (from <= to) + { + char *symbuf; + + symbuf = (char *) obstack_alloc (&collate->mempool, + symbol_len + 1); + + /* Create the name. */ + sprintf (symbuf, + ellipsis == tok_ellipsis2 + ? "%.*s%.*lX" : "%.*s%.*lu", + (int) prefixlen, symbol, + (int) (symbol_len - prefixlen), from); + + if (check_duplicate (ldfile, collate, charmap, + repertoire, symbuf, symbol_len)) + /* The name is already defined. */ + goto col_sym_free; + + insert_entry (&collate->sym_table, symbuf, + symbol_len, + new_symbol (collate, symbuf, + symbol_len)); + + /* Increment the counter. */ + ++from; + } + + goto col_sym_free; + } + } + else + { + col_sym_free: + free (symbol); + free (endsymbol); + } + } + break; + + case tok_symbol_equivalence: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + if (state != 0) + goto err_label; + + arg = lr_token (ldfile, charmap, result, repertoire, verbose); + if (arg->tok != tok_bsymbol) + goto err_label; + else + { + const char *newname = arg->val.str.startmb; + size_t newname_len = arg->val.str.lenmb; + const char *symname; + size_t symname_len; + void *symval; /* Actually struct symbol_t* */ + + arg = lr_token (ldfile, charmap, result, repertoire, verbose); + if (arg->tok != tok_bsymbol) + { + free ((char *) newname); + goto err_label; + } + + symname = arg->val.str.startmb; + symname_len = arg->val.str.lenmb; + + if (newname == NULL) + { + lr_error (ldfile, _("\ +%s: unknown character in equivalent definition name"), + "LC_COLLATE"); + + sym_equiv_free: + free ((char *) newname); + free ((char *) symname); + break; + } + if (symname == NULL) + { + lr_error (ldfile, _("\ +%s: unknown character in equivalent definition value"), + "LC_COLLATE"); + goto sym_equiv_free; + } + + /* See whether the symbol name is already defined. */ + if (find_entry (&collate->sym_table, symname, symname_len, + &symval) != 0) + { + lr_error (ldfile, _("\ +%s: unknown symbol `%s' in equivalent definition"), + "LC_COLLATE", symname); + goto sym_equiv_free; + } + + if (insert_entry (&collate->sym_table, + newname, newname_len, symval) < 0) + { + lr_error (ldfile, _("\ +error while adding equivalent collating symbol")); + goto sym_equiv_free; + } + + free ((char *) symname); + } + lr_ignore_rest (ldfile, 1); + break; + + case tok_script: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + /* We get told about the scripts we know. */ + arg = lr_token (ldfile, charmap, result, repertoire, verbose); + if (arg->tok != tok_bsymbol) + goto err_label; + else + { + struct section_list *runp = collate->known_sections; + char *name; + + while (runp != NULL) + if (strncmp (runp->name, arg->val.str.startmb, + arg->val.str.lenmb) == 0 + && runp->name[arg->val.str.lenmb] == '\0') + break; + else + runp = runp->def_next; + + if (runp != NULL) + { + lr_error (ldfile, _("duplicate definition of script `%s'"), + runp->name); + lr_ignore_rest (ldfile, 0); + break; + } + + runp = (struct section_list *) xcalloc (1, sizeof (*runp)); + name = (char *) xmalloc (arg->val.str.lenmb + 1); + memcpy (name, arg->val.str.startmb, arg->val.str.lenmb); + name[arg->val.str.lenmb] = '\0'; + runp->name = name; + + runp->def_next = collate->known_sections; + collate->known_sections = runp; + } + lr_ignore_rest (ldfile, 1); + break; + + case tok_order_start: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + if (state != 0 && state != 1 && state != 2) + goto err_label; + state = 1; + + /* The 14652 draft does not specify whether all `order_start' lines + must contain the same number of sort-rules, but 14651 does. So + we require this here as well. */ + arg = lr_token (ldfile, charmap, result, repertoire, verbose); + if (arg->tok == tok_bsymbol) + { + /* This better should be a section name. */ + struct section_list *sp = collate->known_sections; + while (sp != NULL + && (sp->name == NULL + || strncmp (sp->name, arg->val.str.startmb, + arg->val.str.lenmb) != 0 + || sp->name[arg->val.str.lenmb] != '\0')) + sp = sp->def_next; + + if (sp == NULL) + { + lr_error (ldfile, _("\ +%s: unknown section name `%.*s'"), + "LC_COLLATE", (int) arg->val.str.lenmb, + arg->val.str.startmb); + /* We use the error section. */ + collate->current_section = &collate->error_section; + + if (collate->error_section.first == NULL) + { + /* Insert &collate->error_section at the end of + the collate->sections list. */ + if (collate->sections == NULL) + collate->sections = &collate->error_section; + else + { + sp = collate->sections; + while (sp->next != NULL) + sp = sp->next; + + sp->next = &collate->error_section; + } + collate->error_section.next = NULL; + } + } + else + { + /* One should not be allowed to open the same + section twice. */ + if (sp->first != NULL) + lr_error (ldfile, _("\ +%s: multiple order definitions for section `%s'"), + "LC_COLLATE", sp->name); + else + { + /* Insert sp in the collate->sections list, + right after collate->current_section. */ + if (collate->current_section != NULL) + { + sp->next = collate->current_section->next; + collate->current_section->next = sp; + } + else if (collate->sections == NULL) + /* This is the first section to be defined. */ + collate->sections = sp; + + collate->current_section = sp; + } + + /* Next should come the end of the line or a semicolon. */ + arg = lr_token (ldfile, charmap, result, repertoire, + verbose); + if (arg->tok == tok_eol) + { + uint32_t cnt; + + /* This means we have exactly one rule: `forward'. */ + if (nrules > 1) + lr_error (ldfile, _("\ +%s: invalid number of sorting rules"), + "LC_COLLATE"); + else + nrules = 1; + sp->rules = obstack_alloc (&collate->mempool, + (sizeof (enum coll_sort_rule) + * nrules)); + for (cnt = 0; cnt < nrules; ++cnt) + sp->rules[cnt] = sort_forward; + + /* Next line. */ + break; + } + + /* Get the next token. */ + arg = lr_token (ldfile, charmap, result, repertoire, + verbose); + } + } + else + { + /* There is no section symbol. Therefore we use the unnamed + section. */ + collate->current_section = &collate->unnamed_section; + + if (collate->unnamed_section_defined) + lr_error (ldfile, _("\ +%s: multiple order definitions for unnamed section"), + "LC_COLLATE"); + else + { + /* Insert &collate->unnamed_section at the beginning of + the collate->sections list. */ + collate->unnamed_section.next = collate->sections; + collate->sections = &collate->unnamed_section; + collate->unnamed_section_defined = true; + } + } + + /* Now read the direction names. */ + read_directions (ldfile, arg, charmap, repertoire, result); + + /* From now we need the strings untranslated. */ + ldfile->translate_strings = 0; + break; + + case tok_order_end: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + if (state != 1) + goto err_label; + + /* Handle ellipsis at end of list. */ + if (was_ellipsis != tok_none) + { + handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap, + repertoire, result); + was_ellipsis = tok_none; + } + + state = 2; + lr_ignore_rest (ldfile, 1); + break; + + case tok_reorder_after: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + if (state == 1) + { + lr_error (ldfile, _("%s: missing `order_end' keyword"), + "LC_COLLATE"); + state = 2; + + /* Handle ellipsis at end of list. */ + if (was_ellipsis != tok_none) + { + handle_ellipsis (ldfile, arg->val.str.startmb, + arg->val.str.lenmb, was_ellipsis, charmap, + repertoire, result); + was_ellipsis = tok_none; + } + } + else if (state == 0 && copy_locale == NULL) + goto err_label; + else if (state != 0 && state != 2 && state != 3) + goto err_label; + state = 3; + + arg = lr_token (ldfile, charmap, result, repertoire, verbose); + if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4) + { + /* Find this symbol in the sequence table. */ + char ucsbuf[10]; + char *startmb; + size_t lenmb; + struct element_t *insp; + int no_error = 1; + void *ptr; + + if (arg->tok == tok_bsymbol) + { + startmb = arg->val.str.startmb; + lenmb = arg->val.str.lenmb; + } + else + { + sprintf (ucsbuf, "U%08X", arg->val.ucs4); + startmb = ucsbuf; + lenmb = 9; + } + + if (find_entry (&collate->seq_table, startmb, lenmb, &ptr) == 0) + /* Yes, the symbol exists. Simply point the cursor + to it. */ + collate->cursor = (struct element_t *) ptr; + else + { + struct symbol_t *symbp; + void *ptr; + + if (find_entry (&collate->sym_table, startmb, lenmb, + &ptr) == 0) + { + symbp = ptr; + + if (symbp->order->last != NULL + || symbp->order->next != NULL) + collate->cursor = symbp->order; + else + { + /* This is a collating symbol but its position + is not yet defined. */ + lr_error (ldfile, _("\ +%s: order for collating symbol %.*s not yet defined"), + "LC_COLLATE", (int) lenmb, startmb); + collate->cursor = NULL; + no_error = 0; + } + } + else if (find_entry (&collate->elem_table, startmb, lenmb, + &ptr) == 0) + { + insp = (struct element_t *) ptr; + + if (insp->last != NULL || insp->next != NULL) + collate->cursor = insp; + else + { + /* This is a collating element but its position + is not yet defined. */ + lr_error (ldfile, _("\ +%s: order for collating element %.*s not yet defined"), + "LC_COLLATE", (int) lenmb, startmb); + collate->cursor = NULL; + no_error = 0; + } + } + else + { + /* This is bad. The symbol after which we have to + insert does not exist. */ + lr_error (ldfile, _("\ +%s: cannot reorder after %.*s: symbol not known"), + "LC_COLLATE", (int) lenmb, startmb); + collate->cursor = NULL; + no_error = 0; + } + } + + lr_ignore_rest (ldfile, no_error); + } + else + /* This must not happen. */ + goto err_label; + break; + + case tok_reorder_end: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + break; + + if (state != 3) + goto err_label; + state = 4; + lr_ignore_rest (ldfile, 1); + break; + + case tok_reorder_sections_after: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + if (state == 1) + { + lr_error (ldfile, _("%s: missing `order_end' keyword"), + "LC_COLLATE"); + state = 2; + + /* Handle ellipsis at end of list. */ + if (was_ellipsis != tok_none) + { + handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap, + repertoire, result); + was_ellipsis = tok_none; + } + } + else if (state == 3) + { + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: missing `reorder-end' keyword"), "LC_COLLATE")); + state = 4; + } + else if (state != 2 && state != 4) + goto err_label; + state = 5; + + /* Get the name of the sections we are adding after. */ + arg = lr_token (ldfile, charmap, result, repertoire, verbose); + if (arg->tok == tok_bsymbol) + { + /* Now find a section with this name. */ + struct section_list *runp = collate->sections; + + while (runp != NULL) + { + if (runp->name != NULL + && strlen (runp->name) == arg->val.str.lenmb + && memcmp (runp->name, arg->val.str.startmb, + arg->val.str.lenmb) == 0) + break; + + runp = runp->next; + } + + if (runp != NULL) + collate->current_section = runp; + else + { + /* This is bad. The section after which we have to + reorder does not exist. Therefore we cannot + process the whole rest of this reorder + specification. */ + lr_error (ldfile, _("%s: section `%.*s' not known"), + "LC_COLLATE", (int) arg->val.str.lenmb, + arg->val.str.startmb); + + do + { + lr_ignore_rest (ldfile, 0); + + now = lr_token (ldfile, charmap, result, NULL, verbose); + } + while (now->tok == tok_reorder_sections_after + || now->tok == tok_reorder_sections_end + || now->tok == tok_end); + + /* Process the token we just saw. */ + nowtok = now->tok; + continue; + } + } + else + /* This must not happen. */ + goto err_label; + break; + + case tok_reorder_sections_end: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + break; + + if (state != 5) + goto err_label; + state = 6; + lr_ignore_rest (ldfile, 1); + break; + + case tok_bsymbol: + case tok_ucs4: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + if (state != 0 && state != 1 && state != 3 && state != 5) + goto err_label; + + if ((state == 0 || state == 5) && nowtok == tok_ucs4) + goto err_label; + + if (nowtok == tok_ucs4) + { + snprintf (ucs4buf, sizeof (ucs4buf), "U%08X", now->val.ucs4); + symstr = ucs4buf; + symlen = 9; + } + else if (arg != NULL) + { + symstr = arg->val.str.startmb; + symlen = arg->val.str.lenmb; + } + else + { + lr_error (ldfile, _("%s: bad symbol <%.*s>"), "LC_COLLATE", + (int) ldfile->token.val.str.lenmb, + ldfile->token.val.str.startmb); + break; + } + + struct element_t *seqp; + if (state == 0) + { + /* We are outside an `order_start' region. This means + we must only accept definitions of values for + collation symbols since these are purely abstract + values and don't need directions associated. */ + void *ptr; + + if (find_entry (&collate->seq_table, symstr, symlen, &ptr) == 0) + { + seqp = ptr; + + /* It's already defined. First check whether this + is really a collating symbol. */ + if (seqp->is_character) + goto err_label; + + goto move_entry; + } + else + { + void *result; + + if (find_entry (&collate->sym_table, symstr, symlen, + &result) != 0) + /* No collating symbol, it's an error. */ + goto err_label; + + /* Maybe this is the first time we define a symbol + value and it is before the first actual section. */ + if (collate->sections == NULL) + collate->sections = collate->current_section = + &collate->symbol_section; + } + + if (was_ellipsis != tok_none) + { + handle_ellipsis (ldfile, symstr, symlen, was_ellipsis, + charmap, repertoire, result); + + /* Remember that we processed the ellipsis. */ + was_ellipsis = tok_none; + + /* And don't add the value a second time. */ + break; + } + } + else if (state == 3) + { + /* It is possible that we already have this collation sequence. + In this case we move the entry. */ + void *sym; + void *ptr; + + /* If the symbol after which we have to insert was not found + ignore all entries. */ + if (collate->cursor == NULL) + { + lr_ignore_rest (ldfile, 0); + break; + } + + if (find_entry (&collate->seq_table, symstr, symlen, &ptr) == 0) + { + seqp = (struct element_t *) ptr; + goto move_entry; + } + + if (find_entry (&collate->sym_table, symstr, symlen, &sym) == 0 + && (seqp = ((struct symbol_t *) sym)->order) != NULL) + goto move_entry; + + if (find_entry (&collate->elem_table, symstr, symlen, &ptr) == 0 + && (seqp = (struct element_t *) ptr, + seqp->last != NULL || seqp->next != NULL + || (collate->start != NULL && seqp == collate->start))) + { + move_entry: + /* Remove the entry from the old position. */ + if (seqp->last == NULL) + collate->start = seqp->next; + else + seqp->last->next = seqp->next; + if (seqp->next != NULL) + seqp->next->last = seqp->last; + + /* We also have to check whether this entry is the + first or last of a section. */ + if (seqp->section->first == seqp) + { + if (seqp->section->first == seqp->section->last) + /* This section has no content anymore. */ + seqp->section->first = seqp->section->last = NULL; + else + seqp->section->first = seqp->next; + } + else if (seqp->section->last == seqp) + seqp->section->last = seqp->last; + + /* Now insert it in the new place. */ + insert_weights (ldfile, seqp, charmap, repertoire, result, + tok_none); + break; + } + + /* Otherwise we just add a new entry. */ + } + else if (state == 5) + { + /* We are reordering sections. Find the named section. */ + struct section_list *runp = collate->sections; + struct section_list *prevp = NULL; + + while (runp != NULL) + { + if (runp->name != NULL + && strlen (runp->name) == symlen + && memcmp (runp->name, symstr, symlen) == 0) + break; + + prevp = runp; + runp = runp->next; + } + + if (runp == NULL) + { + lr_error (ldfile, _("%s: section `%.*s' not known"), + "LC_COLLATE", (int) symlen, symstr); + lr_ignore_rest (ldfile, 0); + } + else + { + if (runp != collate->current_section) + { + /* Remove the named section from the old place and + insert it in the new one. */ + prevp->next = runp->next; + + runp->next = collate->current_section->next; + collate->current_section->next = runp; + collate->current_section = runp; + } + + /* Process the rest of the line which might change + the collation rules. */ + arg = lr_token (ldfile, charmap, result, repertoire, + verbose); + if (arg->tok != tok_eof && arg->tok != tok_eol) + read_directions (ldfile, arg, charmap, repertoire, + result); + } + break; + } + else if (was_ellipsis != tok_none) + { + /* Using the information in the `ellipsis_weight' + element and this and the last value we have to handle + the ellipsis now. */ + assert (state == 1); + + handle_ellipsis (ldfile, symstr, symlen, was_ellipsis, charmap, + repertoire, result); + + /* Remember that we processed the ellipsis. */ + was_ellipsis = tok_none; + + /* And don't add the value a second time. */ + break; + } + + /* Now insert in the new place. */ + insert_value (ldfile, symstr, symlen, charmap, repertoire, result); + break; + + case tok_undefined: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + if (state != 1) + goto err_label; + + if (was_ellipsis != tok_none) + { + lr_error (ldfile, + _("%s: cannot have `%s' as end of ellipsis range"), + "LC_COLLATE", "UNDEFINED"); + + unlink_element (collate); + was_ellipsis = tok_none; + } + + /* See whether UNDEFINED already appeared somewhere. */ + if (collate->undefined.next != NULL + || &collate->undefined == collate->cursor) + { + lr_error (ldfile, + _("%s: order for `%.*s' already defined at %s:%Zu"), + "LC_COLLATE", 9, "UNDEFINED", + collate->undefined.file, + collate->undefined.line); + lr_ignore_rest (ldfile, 0); + } + else + /* Parse the weights. */ + insert_weights (ldfile, &collate->undefined, charmap, + repertoire, result, tok_none); + break; + + case tok_ellipsis2: /* symbolic hexadecimal ellipsis */ + case tok_ellipsis3: /* absolute ellipsis */ + case tok_ellipsis4: /* symbolic decimal ellipsis */ + /* This is the symbolic (decimal or hexadecimal) or absolute + ellipsis. */ + if (was_ellipsis != tok_none) + goto err_label; + + if (state != 0 && state != 1 && state != 3) + goto err_label; + + was_ellipsis = nowtok; + + insert_weights (ldfile, &collate->ellipsis_weight, charmap, + repertoire, result, nowtok); + break; + + case tok_end: + seen_end: + /* Next we assume `LC_COLLATE'. */ + if (!ignore_content) + { + if (state == 0 && copy_locale == NULL) + /* We must either see a copy statement or have + ordering values. */ + lr_error (ldfile, + _("%s: empty category description not allowed"), + "LC_COLLATE"); + else if (state == 1) + { + lr_error (ldfile, _("%s: missing `order_end' keyword"), + "LC_COLLATE"); + + /* Handle ellipsis at end of list. */ + if (was_ellipsis != tok_none) + { + handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap, + repertoire, result); + was_ellipsis = tok_none; + } + } + else if (state == 3) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: missing `reorder-end' keyword"), "LC_COLLATE")); + else if (state == 5) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: missing `reorder-sections-end' keyword"), "LC_COLLATE")); + } + arg = lr_token (ldfile, charmap, result, NULL, verbose); + if (arg->tok == tok_eof) + break; + if (arg->tok == tok_eol) + lr_error (ldfile, _("%s: incomplete `END' line"), "LC_COLLATE"); + else if (arg->tok != tok_lc_collate) + lr_error (ldfile, _("\ +%1$s: definition does not end with `END %1$s'"), "LC_COLLATE"); + lr_ignore_rest (ldfile, arg->tok == tok_lc_collate); + return; + + case tok_define: + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + arg = lr_token (ldfile, charmap, result, NULL, verbose); + if (arg->tok != tok_ident) + goto err_label; + + /* Simply add the new symbol. */ + struct name_list *newsym = xmalloc (sizeof (*newsym) + + arg->val.str.lenmb + 1); + memcpy (newsym->str, arg->val.str.startmb, arg->val.str.lenmb); + newsym->str[arg->val.str.lenmb] = '\0'; + newsym->next = defined; + defined = newsym; + + lr_ignore_rest (ldfile, 1); + break; + + case tok_undef: + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + arg = lr_token (ldfile, charmap, result, NULL, verbose); + if (arg->tok != tok_ident) + goto err_label; + + /* Remove _all_ occurrences of the symbol from the list. */ + struct name_list *prevdef = NULL; + struct name_list *curdef = defined; + while (curdef != NULL) + if (strncmp (arg->val.str.startmb, curdef->str, + arg->val.str.lenmb) == 0 + && curdef->str[arg->val.str.lenmb] == '\0') + { + if (prevdef == NULL) + defined = curdef->next; + else + prevdef->next = curdef->next; + + struct name_list *olddef = curdef; + curdef = curdef->next; + + free (olddef); + } + else + { + prevdef = curdef; + curdef = curdef->next; + } + + lr_ignore_rest (ldfile, 1); + break; + + case tok_ifdef: + case tok_ifndef: + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + found_ifdef: + arg = lr_token (ldfile, charmap, result, NULL, verbose); + if (arg->tok != tok_ident) + goto err_label; + lr_ignore_rest (ldfile, 1); + + if (collate->else_action == else_none) + { + curdef = defined; + while (curdef != NULL) + if (strncmp (arg->val.str.startmb, curdef->str, + arg->val.str.lenmb) == 0 + && curdef->str[arg->val.str.lenmb] == '\0') + break; + else + curdef = curdef->next; + + if ((nowtok == tok_ifdef && curdef != NULL) + || (nowtok == tok_ifndef && curdef == NULL)) + { + /* We have to use the if-branch. */ + collate->else_action = else_ignore; + } + else + { + /* We have to use the else-branch, if there is one. */ + nowtok = skip_to (ldfile, collate, charmap, 0); + if (nowtok == tok_else) + collate->else_action = else_seen; + else if (nowtok == tok_elifdef) + { + nowtok = tok_ifdef; + goto found_ifdef; + } + else if (nowtok == tok_elifndef) + { + nowtok = tok_ifndef; + goto found_ifdef; + } + else if (nowtok == tok_eof) + goto seen_eof; + else if (nowtok == tok_end) + goto seen_end; + } + } + else + { + /* XXX Should it really become necessary to support nested + preprocessor handling we will push the state here. */ + lr_error (ldfile, _("%s: nested conditionals not supported"), + "LC_COLLATE"); + nowtok = skip_to (ldfile, collate, charmap, 1); + if (nowtok == tok_eof) + goto seen_eof; + else if (nowtok == tok_end) + goto seen_end; + } + break; + + case tok_elifdef: + case tok_elifndef: + case tok_else: + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + lr_ignore_rest (ldfile, 1); + + if (collate->else_action == else_ignore) + { + /* Ignore everything until the endif. */ + nowtok = skip_to (ldfile, collate, charmap, 1); + if (nowtok == tok_eof) + goto seen_eof; + else if (nowtok == tok_end) + goto seen_end; + } + else + { + assert (collate->else_action == else_none); + lr_error (ldfile, _("\ +%s: '%s' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE", + nowtok == tok_else ? "else" + : nowtok == tok_elifdef ? "elifdef" : "elifndef"); + } + break; + + case tok_endif: + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + lr_ignore_rest (ldfile, 1); + + if (collate->else_action != else_ignore + && collate->else_action != else_seen) + lr_error (ldfile, _("\ +%s: 'endif' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE"); + + /* XXX If we support nested preprocessor directives we pop + the state here. */ + collate->else_action = else_none; + break; + + default: + err_label: + SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE"); + } + + /* Prepare for the next round. */ + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + } + + seen_eof: + /* When we come here we reached the end of the file. */ + lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE"); +} |