diff options
Diffstat (limited to 'iconv/gconv_db.c')
-rw-r--r-- | iconv/gconv_db.c | 453 |
1 files changed, 280 insertions, 173 deletions
diff --git a/iconv/gconv_db.c b/iconv/gconv_db.c index e6253b8380..5269d792f6 100644 --- a/iconv/gconv_db.c +++ b/iconv/gconv_db.c @@ -1,5 +1,5 @@ /* Provide access to the collection of available transformation modules. - Copyright (C) 1997, 1998 Free Software Foundation, Inc. + Copyright (C) 1997, 1998, 1999 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. @@ -18,9 +18,11 @@ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ +#include <limits.h> #include <search.h> #include <stdlib.h> #include <string.h> +#include <sys/param.h> #include <bits/libc-lock.h> #include <ldsodefs.h> @@ -32,8 +34,7 @@ void *__gconv_alias_db; /* Array with available modules. */ -size_t __gconv_nmodules; -struct gconv_module **__gconv_modules_db; +struct gconv_module *__gconv_modules_db; /* We modify global data. */ __libc_lock_define_initialized (static, lock) @@ -55,14 +56,20 @@ __gconv_alias_compare (const void *p1, const void *p2) struct derivation_step { const char *result_set; + size_t result_set_len; + int cost_lo; + int cost_hi; struct gconv_module *code; struct derivation_step *last; struct derivation_step *next; }; -#define NEW_STEP(result, module, last_mod) \ +#define NEW_STEP(result, hi, lo, module, last_mod) \ ({ struct derivation_step *newp = alloca (sizeof (struct derivation_step)); \ newp->result_set = result; \ + newp->result_set_len = strlen (result); \ + newp->cost_hi = hi; \ + newp->cost_lo = lo; \ newp->code = module; \ newp->last = last_mod; \ newp->next = NULL; \ @@ -224,7 +231,7 @@ gen_steps (struct derivation_step *best, const char *toset, { status = _CALL_DL_FCT (result[step_cnt].init_fct, (&result[step_cnt])); - + if (status != GCONV_OK) { failed = 1; @@ -276,9 +283,9 @@ find_derivation (const char *toset, const char *toset_expand, struct gconv_step **handle, size_t *nsteps) { __libc_lock_define_initialized (static, lock) - struct derivation_step *first, *current, **lastp, *best = NULL; - int best_cost_hi = 0; - int best_cost_lo = 0; + struct derivation_step *first, *current, **lastp, *solution = NULL; + int best_cost_hi = INT_MAX; + int best_cost_lo = INT_MAX; int result; result = derivation_lookup (fromset_expand ?: fromset, toset_expand ?: toset, @@ -299,202 +306,287 @@ find_derivation (const char *toset, const char *toset_expand, return result; } - /* ### TODO - For now we use a simple algorithm with quadratic runtime behaviour. + /* For now we use a simple algorithm with quadratic runtime behaviour. The task is to match the `toset' with any of the available rules, starting from FROMSET. */ if (fromset_expand != NULL) { - first = NEW_STEP (fromset_expand, NULL, NULL); - first->next = NEW_STEP (fromset, NULL, NULL); + first = NEW_STEP (fromset_expand, 0, 0, NULL, NULL); + first->next = NEW_STEP (fromset, 0, 0, NULL, NULL); lastp = &first->next->next; } else { - first = NEW_STEP (fromset, NULL, NULL); + first = NEW_STEP (fromset, 0, 0, NULL, NULL); lastp = &first->next; } - current = first; - while (current != NULL) + for (current = first; current != NULL; current = current->next) { /* Now match all the available module specifications against the current charset name. If any of them matches check whether we already have a derivation for this charset. If yes, use the one with the lower costs. Otherwise add the new charset at the - end. */ - size_t cnt; - - for (cnt = 0; cnt < __gconv_nmodules; ++cnt) - { - const char *result_set = NULL; + end. + + The module database is organized in a tree form which allows to + search for prefixes. So we search for the first entry with a + matching prefix and any other matching entry can be found from + this place. */ + struct gconv_module *node = __gconv_modules_db; + + /* Maybe it is not necessary anymore to look for a solution for + this entry since the cost is already as high (or heigher) as + the cost for the best solution so far. */ + if (current->cost_hi > best_cost_hi + || (current->cost_hi == best_cost_hi + && current->cost_lo >= best_cost_lo)) + continue; + + while (node != NULL) + { + int cmpres = strncmp (current->result_set, node->from_constpfx, + MIN (current->result_set_len, + node->from_constpfx_len)); - if (__gconv_modules_db[cnt]->from_pattern == NULL) + if (cmpres == 0) { - if (__strcasecmp (current->result_set, - __gconv_modules_db[cnt]->from_constpfx) == 0) + /* Walk through the list of modules with this prefix and + try to match the name. */ + struct gconv_module *runp; + + if (current->result_set_len < node->from_constpfx_len) + /* Cannot possibly match. */ + break; + + /* Check all the modules with this prefix. */ + runp = node; + do { - if (strcmp (__gconv_modules_db[cnt]->to_string, "-") == 0) - result_set = toset_expand ?: toset; + const char *result_set = NULL; + + if (runp->from_pattern == NULL) + { + /* This is a simple entry and therefore we have a + found an matching entry if the strings are really + equal. */ + if (current->result_set_len == runp->from_constpfx_len) + { + if (strcmp (runp->to_string, "-") == 0) + result_set = toset_expand ?: toset; + else + result_set = runp->to_string; + } + } else - result_set = __gconv_modules_db[cnt]->to_string; - } - } - else - /* We have a regular expression. First see if the prefix - matches. */ - if (__strncasecmp (current->result_set, - __gconv_modules_db[cnt]->from_constpfx, - __gconv_modules_db[cnt]->from_constpfx_len) - == 0) - { - /* First compile the regex if not already done. */ - if (__gconv_modules_db[cnt]->from_regex == NULL) - { - if (__regcomp (&__gconv_modules_db[cnt]->from_regex_mem, - __gconv_modules_db[cnt]->from_pattern, - REG_EXTENDED | REG_ICASE) != 0) - /* Something is wrong. Remember this. */ - __gconv_modules_db[cnt]->from_regex = (regex_t *) -1L; - else - __gconv_modules_db[cnt]->from_regex - = &__gconv_modules_db[cnt]->from_regex_mem; - } - - if (__gconv_modules_db[cnt]->from_regex != (regex_t *) -1L) - { - /* Try to match the from name. */ - regmatch_t match[4]; - - if (__regexec (__gconv_modules_db[cnt]->from_regex, - current->result_set, 4, match, 0) == 0 - && match[0].rm_so == 0 - && current->result_set[match[0].rm_eo] == '\0') - { - /* At least the whole <from> string is matched. - We must now match sed-like possible - subexpressions from the match to the - toset expression. */ + { + /* Compile the regular expression if necessary. */ + if (runp->from_regex == NULL) + { + if (__regcomp (&runp->from_regex_mem, + runp->from_pattern, + REG_EXTENDED | REG_ICASE) != 0) + /* Something is wrong. Remember this. */ + runp->from_regex = (regex_t *) -1L; + else + runp->from_regex = &runp->from_regex_mem; + } + + if (runp->from_regex != (regex_t *) -1L) + { + regmatch_t match[4]; + + /* Try to match the regular expression. */ + if (__regexec (runp->from_regex, current->result_set, + 4, match, 0) == 0 + && match[0].rm_so == 0 + && current->result_set[match[0].rm_eo] == '\0') + { + /* At least the whole <from> string is matched. + We must now match sed-like possible + subexpressions from the match to the + toset expression. */ #define ENSURE_LEN(LEN) \ if (wp + (LEN) >= constr + len - 1) \ { \ char *newp = alloca (len += 128); \ - memcpy (newp, constr, wp - constr); \ - wp = newp + (wp - constr); \ + wp = __mempcpy (newp, constr, wp - constr); \ constr = newp; \ } - size_t len = 128; - char *constr = alloca (len); - char *wp = constr; - const char *cp = __gconv_modules_db[cnt]->to_string; - - while (*cp != '\0') - { - if (*cp != '\\') - { - ENSURE_LEN (1); - *wp++ = *cp++; - } - else if (cp[1] == '\0') - /* Backslash at end of string. */ - break; - else - { - ++cp; - if (*cp == '\\') - { - *wp++ = *cp++; - ENSURE_LEN (1); - } - else if (*cp < '1' || *cp > '3') - break; - else - { - int idx = *cp - '0'; - if (match[idx].rm_so == -1) - /* No match. */ - break; - - ENSURE_LEN (match[idx].rm_eo - - match[idx].rm_so); - wp = __mempcpy (wp, - ¤t->result_set[match[idx].rm_so], - match[idx].rm_eo - - match[idx].rm_so); - ++cp; - } - } - } - if (*cp == '\0' && wp != constr) - { - /* Terminate the constructed string. */ - *wp = '\0'; - result_set = constr; - } - } - } - } - - if (result_set != NULL) - { - /* We managed to find a derivation. First see whether - this is what we are looking for. */ - if (__strcasecmp (result_set, toset) == 0 - || (toset_expand != NULL - && __strcasecmp (result_set, toset_expand) == 0)) - { - /* Determine the costs. If they are lower than the - previous solution (or this is the first solution) - remember this solution. */ - int cost_hi = __gconv_modules_db[cnt]->cost_hi; - int cost_lo = __gconv_modules_db[cnt]->cost_lo; - struct derivation_step *runp = current; - while (runp->code != NULL) - { - cost_hi += runp->code->cost_hi; - cost_lo += runp->code->cost_lo; - runp = runp->last; + size_t len = 128; + char *constr = alloca (len); + char *wp = constr; + const char *cp = runp->to_string; + + while (*cp != '\0') + { + if (*cp != '\\') + { + ENSURE_LEN (1); + *wp++ = *cp++; + } + else if (cp[1] == '\0') + /* Backslash at end of string. */ + break; + else + { + ++cp; + if (*cp == '\\') + { + *wp++ = *cp++; + ENSURE_LEN (1); + } + else if (*cp < '1' || *cp > '3') + break; + else + { + int idx = *cp - '0'; + if (match[idx].rm_so == -1) + /* No match. */ + break; + + ENSURE_LEN (match[idx].rm_eo + - match[idx].rm_so); + wp = __mempcpy (wp, + ¤t->result_set[match[idx].rm_so], + match[idx].rm_eo + - match[idx].rm_so); + ++cp; + } + } + } + if (*cp == '\0' && wp != constr) + { + /* Terminate the constructed string. */ + *wp = '\0'; + result_set = constr; + } + } + } } - if (best == NULL || cost_hi < best_cost_hi - || (cost_hi == best_cost_hi && cost_lo < best_cost_lo)) + + if (result_set != NULL) { - best = NEW_STEP (result_set, __gconv_modules_db[cnt], - current); - best_cost_hi = cost_hi; - best_cost_lo = cost_lo; + int cost_hi = runp->cost_hi + current->cost_hi; + int cost_lo = runp->cost_lo + current->cost_lo; + struct derivation_step *step; + + /* We managed to find a derivation. First see whether + this is what we are looking for. */ + if (__strcasecmp (result_set, toset) == 0 + || (toset_expand != NULL + && __strcasecmp (result_set, toset_expand) == 0)) + { + if (solution == NULL || cost_hi < best_cost_hi + || (cost_hi == best_cost_hi + && cost_lo < best_cost_lo)) + { + best_cost_hi = cost_hi; + best_cost_lo = cost_lo; + } + + /* Append this solution to list. */ + if (solution == NULL) + solution = NEW_STEP (result_set, 0, 0, runp, + current); + else + { + while (solution->next != NULL) + solution = solution->next; + + solution->next = NEW_STEP (result_set, 0, 0, + runp, current); + } + } + else if (cost_hi < best_cost_hi + || (cost_hi == best_cost_hi + && cost_lo < best_cost_lo)) + { + /* Append at the end if there is no entry with + this name. */ + for (step = first; step != NULL; step = step->next) + if (__strcasecmp (result_set, step->result_set) + == 0) + break; + + if (step == NULL) + { + *lastp = NEW_STEP (result_set, + cost_hi, cost_lo, + runp, current); + lastp = &(*lastp)->next; + } + else if (step->cost_hi > cost_hi + || (step->cost_hi == cost_hi + && step->cost_lo > cost_lo)) + { + step->code = runp; + step->last = current; + + /* Update the cost for all steps. */ + for (step = first; step != NULL; + step = step->next) + { + struct derivation_step *back; + + if (step->code == NULL) + /* This is one of the entries we started + from. */ + continue; + + step->cost_hi = step->code->cost_hi; + step->cost_lo = step->code->cost_lo; + + for (back = step->last; back->code != NULL; + back = back->last) + { + step->cost_hi += back->code->cost_hi; + step->cost_lo += back->code->cost_lo; + } + } + + for (step = solution; step != NULL; + step = step->next) + { + step->cost_hi = (step->code->cost_hi + + step->last->cost_hi); + step->cost_lo = (step->code->cost_lo + + step->last->cost_lo); + + if (step->cost_hi < best_cost_hi + || (step->cost_hi == best_cost_hi + && step->cost_lo < best_cost_lo)) + { + solution = step; + best_cost_hi = step->cost_hi; + best_cost_lo = step->cost_lo; + } + } + } + } } + + runp = runp->same; } - else - { - /* Append at the end if there is no entry with this name. */ - struct derivation_step *runp = first; + while (runp != NULL); - while (runp != NULL) - { - if (__strcasecmp (result_set, runp->result_set) == 0) - break; - runp = runp->next; - } + if (current->result_set_len == node->from_constpfx_len) + break; - if (runp == NULL) - { - *lastp = NEW_STEP (result_set, __gconv_modules_db[cnt], - current); - lastp = &(*lastp)->next; - } - } + node = node->matching; } + else if (cmpres < 0) + node = node->left; + else + node = node->right; } - - /* Go on with the next entry. */ - current = current->next; } - if (best != NULL) + if (solution != NULL) /* We really found a way to do the transformation. Now build a data structure describing the transformation steps.*/ - result = gen_steps (best, toset_expand ?: toset, fromset_expand ?: fromset, - handle, nsteps); + result = gen_steps (solution, toset_expand ?: toset, + fromset_expand ?: fromset, handle, nsteps); else { /* We haven't found a transformation. Clear the result values. */ @@ -620,20 +712,35 @@ __gconv_close_transform (struct gconv_step *steps, size_t nsteps) } +/* Free the modules mentioned. */ +static void +internal_function +free_modules_db (struct gconv_module *node) +{ + if (node->left != NULL) + free_modules_db (node->left); + if (node->right != NULL) + free_modules_db (node->right); + if (node->same != NULL) + free_modules_db (node->same); + do + { + struct gconv_module *act = node; + node = node->matching; + free (act); + } + while (node != NULL); +} + + /* Free all resources if necessary. */ static void __attribute__ ((unused)) free_mem (void) { - size_t cnt; - if (__gconv_alias_db != NULL) __tdestroy (__gconv_alias_db, free); - for (cnt = 0; cnt < __gconv_nmodules; ++cnt) - /* Modules which names do not start with a slash are builtin - transformations and the memory is not allocated dynamically. */ - if (__gconv_modules_db[cnt]->module_name[0] == '/') - free (__gconv_modules_db[cnt]); + free_modules_db (__gconv_modules_db); if (known_derivations != NULL) __tdestroy (known_derivations, free_derivation); |