diff options
-rw-r--r-- | ChangeLog | 35 | ||||
-rw-r--r-- | PROJECTS | 19 | ||||
-rw-r--r-- | iconv/Makefile | 9 | ||||
-rw-r--r-- | iconv/gconv_builtin.h | 47 | ||||
-rw-r--r-- | iconv/gconv_cache.c | 395 | ||||
-rw-r--r-- | iconv/gconv_conf.c | 31 | ||||
-rw-r--r-- | iconv/gconv_db.c | 68 | ||||
-rw-r--r-- | iconv/gconv_int.h | 18 | ||||
-rw-r--r-- | iconv/gconv_simple.c | 4 | ||||
-rw-r--r-- | iconv/iconvconfig.c | 1165 | ||||
-rw-r--r-- | iconv/iconvconfig.h | 67 | ||||
-rw-r--r-- | iconv/strtab.c | 294 |
12 files changed, 2058 insertions, 94 deletions
diff --git a/ChangeLog b/ChangeLog index e34991d6bb..93f9c57bf2 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,38 @@ +2001-07-22 Ulrich Drepper <drepper@redhat.com> + + * iconv/gconv_builtin.c (struct builtin_map): Remove init and end + elements. + (BUILTIN_TRANSFORMATION): Remove Init and End parameters. + (__gconv_get_builtin_trans): Initialize __init_fct and __end_fct to + NULL. + * iconv/gconv_builtin.h: Remove NULL parameters for Init and End in + all BUILTIN_TRANSFORMATION calls. + * iconv/gconv_conf.c (BUILTIN_TRANSFORMATION): Remove Init and End + parameters. + * iconv/gconv_simple.c: Likewise. + * iconv/gconv_db.c (gen_steps): Internal converters don't have + initializers, move the code accordingly. + + * iconv/gconv_conf.c (__gconv_read_conf): Don't read configuration + file if STATIC_GCONV is defined. + + * iconv/gconv_conf.c (__gconv_path_envvar): New global variable. + (__gconv_get_path): Use it instead of call getenv. + (__gconv_read_conf): First see whether cache can be used. If yes, + don't do any work here. + * iconv/gconv_db.c (__gconv_release_step): Renamed from release_step + and exported. Change callers. + (__gconv_find_transform): First call __gconv_lookup_cache and only + continue if it signals no cache available. Remove some unnecessary + tests. + * iconv/gconv_int.h: Declare __gconv_path_envvar, __gconv_lookup_cache, + __gconv_release_step, and __gconv_loaded_cache. + * iconv/gconv_cache.c: New file. + * iconv/iconvconfig.c: New file. + * iconv/iconvconfig.h: New file. + * iconv/strtab.c: New file. + * iconv/Makefile: Add rules to build new files and programs. + 2001-07-20 Roland McGrath <roland@frob.com> * sysdeps/generic/device-nrs.h (DEV_TTY_P): Change argument type diff --git a/PROJECTS b/PROJECTS index e0feb7d1b1..9898c89e23 100644 --- a/PROJECTS +++ b/PROJECTS @@ -156,24 +156,7 @@ contact <bug-glibc@gnu.org>. the currently implemented methods. -[22] It should be possible to have the information gconv-modules in - a simple cache which is faster to access. Using libdb is probably - overkill and loading it would probably be slower than reading the - plain text file. But a file format with a simple hash table and - some data it points to should be fine. Probably it should be - two tables, one for the aliases, one for the mappings. The code - should start similar to this: - - if (stat ("gconv-modules", &stp) == 0 - && stat ("gconv-modules.db", &std) == 0 - && stp.st_mtime < std.st_mtime) - { - ... use the cache ... - { - else - { - ... use the plain file if it exists, otherwise the db ... - } +[22] Done. [23] The `strptime' function needs to be completed. This includes among diff --git a/iconv/Makefile b/iconv/Makefile index 1e385ca7b9..32fd60497d 100644 --- a/iconv/Makefile +++ b/iconv/Makefile @@ -26,11 +26,12 @@ include ../Makeconfig headers = iconv.h gconv.h routines = iconv_open iconv iconv_close \ gconv_open gconv gconv_close gconv_db gconv_conf \ - gconv_builtin gconv_simple gconv_trans + gconv_builtin gconv_simple gconv_trans gconv_cache ifeq ($(elf),yes) routines += gconv_dl else CFLAGS-gconv_db.c = -DSTATIC_GCONV +CFLAGS-gconv_cache.c = -DSTATIC_GCONV CFLAGS-gconv_simple.c = -DSTATIC_GCONV endif @@ -38,6 +39,7 @@ vpath %.c ../locale/programs iconv_prog-modules = iconv_charmap charmap charmap-dir linereader \ dummy-repertoire simple-hash xstrdup xmalloc +iconvconfig-modules = strtab xmalloc extra-objs = $(iconv_prog-modules:=.o) CFLAGS-iconv_prog.c = -I../locale/programs CFLAGS-iconv_charmap.c = -I../locale/programs @@ -50,10 +52,12 @@ tests = tst-iconv1 tst-iconv2 tst-iconv3 distribute = gconv_builtin.h gconv_int.h loop.c skeleton.c iconv_prog.h \ iconv_charmap.c dummy-repertoire.c gconv_charset.h -others = iconv_prog +others = iconv_prog iconvconfig install-others = $(inst_bindir)/iconv +CFLAGS-gconv_cache.c = -DGCONV_DIR='"$(gconvdir)"' CFLAGS-gconv_conf.c = -DGCONV_PATH='"$(gconvdir)"' +CFLAGS-iconvconfig.c = -DGCONV_PATH='"$(gconvdir)"' -DGCONV_DIR='"$(gconvdir)"' include ../Rules @@ -61,3 +65,4 @@ $(inst_bindir)/iconv: $(objpfx)iconv_prog $(+force) $(do-install-program) $(objpfx)iconv_prog: $(iconv_prog-modules:%=$(objpfx)%.o) +$(objpfx)iconvconfig: $(iconvconfig-modules:%=$(objpfx)%.o) diff --git a/iconv/gconv_builtin.h b/iconv/gconv_builtin.h index b4c0c4eea4..86ff6abbc3 100644 --- a/iconv/gconv_builtin.h +++ b/iconv/gconv_builtin.h @@ -1,5 +1,5 @@ /* Builtin transformations. - Copyright (C) 1997, 1998, 1999, 2000 Free Software Foundation, Inc. + Copyright (C) 1997, 1998, 1999, 2000, 2001 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. @@ -30,18 +30,14 @@ BUILTIN_ALIAS ("OSF00010105//", "ISO-10646/UCS4/") /* level 2 */ BUILTIN_ALIAS ("OSF00010106//", "ISO-10646/UCS4/") /* level 3 */ BUILTIN_TRANSFORMATION ("INTERNAL", "ISO-10646/UCS4/", 1, "=INTERNAL->ucs4", - __gconv_transform_internal_ucs4, NULL, NULL, - 4, 4, 4, 4) + __gconv_transform_internal_ucs4, 4, 4, 4, 4) BUILTIN_TRANSFORMATION ("ISO-10646/UCS4/", "INTERNAL", 1, "=ucs4->INTERNAL", - __gconv_transform_ucs4_internal, NULL, NULL, - 4, 4, 4, 4) + __gconv_transform_ucs4_internal, 4, 4, 4, 4) BUILTIN_TRANSFORMATION ("INTERNAL", "UCS-4LE//", 1, "=INTERNAL->ucs4le", - __gconv_transform_internal_ucs4le, NULL, NULL, - 4, 4, 4, 4) + __gconv_transform_internal_ucs4le, 4, 4, 4, 4) BUILTIN_TRANSFORMATION ("UCS-4LE//", "INTERNAL", 1, "=ucs4le->INTERNAL", - __gconv_transform_ucs4le_internal, NULL, NULL, - 4, 4, 4, 4) + __gconv_transform_ucs4le_internal, 4, 4, 4, 4) BUILTIN_ALIAS ("WCHAR_T//", "INTERNAL") @@ -51,15 +47,12 @@ BUILTIN_ALIAS ("ISO-IR-193//", "ISO-10646/UTF8/") BUILTIN_ALIAS ("OSF05010001//", "ISO-10646/UTF8/") BUILTIN_TRANSFORMATION ("INTERNAL", "ISO-10646/UTF8/", 1, "=INTERNAL->utf8", - __gconv_transform_internal_utf8, NULL, NULL, - 4, 4, 1, 6) + __gconv_transform_internal_utf8, 4, 4, 1, 6) BUILTIN_TRANSFORMATION ("ISO-10646/UTF-8/", "INTERNAL", 1, "=utf8->INTERNAL", - __gconv_transform_utf8_internal, NULL, NULL, - 1, 6, 4, 4) + __gconv_transform_utf8_internal, 1, 6, 4, 4) BUILTIN_TRANSFORMATION ("ISO-10646/UTF8/", "INTERNAL", 1, "=utf8->INTERNAL", - __gconv_transform_utf8_internal, NULL, NULL, - 1, 6, 4, 4) + __gconv_transform_utf8_internal, 1, 6, 4, 4) BUILTIN_ALIAS ("UCS2//", "ISO-10646/UCS2/") BUILTIN_ALIAS ("UCS-2//", "ISO-10646/UCS2/") @@ -68,12 +61,10 @@ BUILTIN_ALIAS ("OSF00010101//", "ISO-10646/UCS2/") /* level 2 */ BUILTIN_ALIAS ("OSF00010102//", "ISO-10646/UCS2/") /* level 3 */ BUILTIN_TRANSFORMATION ("ISO-10646/UCS2/", "INTERNAL", 1, "=ucs2->INTERNAL", - __gconv_transform_ucs2_internal, NULL, NULL, - 2, 2, 4, 4) + __gconv_transform_ucs2_internal, 2, 2, 4, 4) BUILTIN_TRANSFORMATION ("INTERNAL", "ISO-10646/UCS2/", 1, "=INTERNAL->ucs2", - __gconv_transform_internal_ucs2, NULL, NULL, - 4, 4, 2, 2) + __gconv_transform_internal_ucs2, 4, 4, 2, 2) BUILTIN_ALIAS ("ANSI_X3.4//", "ANSI_X3.4-1968//") @@ -90,12 +81,10 @@ BUILTIN_ALIAS ("CSASCII//", "ANSI_X3.4-1968//") BUILTIN_ALIAS ("OSF00010020//", "ANSI_X3.4-1968//") BUILTIN_TRANSFORMATION ("ANSI_X3.4-1968//", "INTERNAL", 1, "=ascii->INTERNAL", - __gconv_transform_ascii_internal, NULL, NULL, - 4, 4, 1, 1) + __gconv_transform_ascii_internal, 4, 4, 1, 1) BUILTIN_TRANSFORMATION ("INTERNAL", "ANSI_X3.4-1968//", 1, "=INTERNAL->ascii", - __gconv_transform_internal_ascii, NULL, NULL, - 4, 4, 1, 1) + __gconv_transform_internal_ascii, 4, 4, 1, 1) #if BYTE_ORDER == BIG_ENDIAN @@ -106,13 +95,11 @@ BUILTIN_ALIAS ("UCS-2LE//", "UNICODELITTLE//") BUILTIN_TRANSFORMATION ("UNICODELITTLE//", "INTERNAL", 1, "=ucs2reverse->INTERNAL", - __gconv_transform_ucs2reverse_internal, NULL, NULL, - 2, 2, 4, 4) + __gconv_transform_ucs2reverse_internal, 2, 2, 4, 4) BUILTIN_TRANSFORMATION ("INTERNAL", "UNICODELITTLE//", 1, "=INTERNAL->ucs2reverse", - __gconv_transform_internal_ucs2reverse, NULL, NULL, - 4, 4, 2, 2) + __gconv_transform_internal_ucs2reverse, 4, 4, 2, 2) #else BUILTIN_ALIAS ("UNICODELITTLE//", "ISO-10646/UCS2/") BUILTIN_ALIAS ("UCS-2LE//", "ISO-10646/UCS2/") @@ -121,11 +108,9 @@ BUILTIN_ALIAS ("UCS-2BE//", "UNICODEBIG//") BUILTIN_TRANSFORMATION ("UNICODEBIG//", "INTERNAL", 1, "=ucs2reverse->INTERNAL", - __gconv_transform_ucs2reverse_internal, NULL, NULL, - 2, 2, 4, 4) + __gconv_transform_ucs2reverse_internal, 2, 2, 4, 4) BUILTIN_TRANSFORMATION ("INTERNAL", "UNICODEBIG//", 1, "=INTERNAL->ucs2reverse", - __gconv_transform_internal_ucs2reverse, NULL, NULL, - 4, 4, 2, 2) + __gconv_transform_internal_ucs2reverse, 4, 4, 2, 2) #endif diff --git a/iconv/gconv_cache.c b/iconv/gconv_cache.c new file mode 100644 index 0000000000..e204cf1377 --- /dev/null +++ b/iconv/gconv_cache.c @@ -0,0 +1,395 @@ +/* Cache handling for iconv modules. + Copyright (C) 2001 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 2001. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <dlfcn.h> +#include <fcntl.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/mman.h> +#include <sys/stat.h> + +#include <gconv_int.h> +#include <iconvconfig.h> + +#include "../intl/hash-string.h" + +static void *cache; +static size_t cache_size; +static int cache_malloced; + + +int +internal_function +__gconv_load_cache (void) +{ + int fd; + struct stat64 st; + struct gconvcache_header *header; + + /* We cannot use the cache if the GCONV_PATH environment variable is + set. */ + __gconv_path_envvar = getenv ("GCONV_PATH"); + if (__gconv_path_envvar != NULL) + return -1; + + /* See whether the cache file exists. */ + fd = __open (GCONV_MODULES_CACHE, O_RDONLY); + if (__builtin_expect (fd, 0) == -1) + /* Not available. */ + return -1; + + /* Get information about the file. */ + if (__builtin_expect (__fxstat64 (_STAT_VER, fd, &st), 0) < 0 + /* We do not have to start looking at the file if it cannot contain + at least the cache header. */ + || st.st_size < sizeof (struct gconvcache_header)) + { + close_and_exit: + close (fd); + return -1; + } + + /* Make the file content available. */ + cache_size = st.st_size; +#ifdef _POSIX_MAPPED_FILES + cache = __mmap (NULL, cache_size, PROT_READ, MAP_SHARED, fd, 0); + if (__builtin_expect (cache == MAP_FAILED, 0)) +#endif + { + size_t already_read; + + cache = malloc (cache_size); + if (cache == NULL) + goto close_and_exit; + + already_read = 0; + do + { + ssize_t n = __read (fd, (char *) cache + already_read, + cache_size - already_read); + if (__builtin_expect (n, 0) == -1) + { + free (cache); + cache = NULL; + goto close_and_exit; + } + + already_read += n; + } + while (already_read < cache_size); + + cache_malloced = 1; + } + + /* We don't need the file descriptor anymore. */ + close (fd); + + /* Check the consistency. */ + header = (struct gconvcache_header *) cache; + if (__builtin_expect (header->magic, GCONVCACHE_MAGIC) != GCONVCACHE_MAGIC + || __builtin_expect (header->string_offset >= cache_size, 0) + || __builtin_expect (header->hash_offset >= cache_size, 0) + || __builtin_expect (header->hash_size == 0, 0) + || __builtin_expect ((header->hash_offset + + header->hash_size * sizeof (struct hash_entry)) + > cache_size, 0) + || __builtin_expect (header->module_offset >= cache_size, 0) + || __builtin_expect (header->otherconv_offset > cache_size, 0)) + { + if (cache_malloced) + { + free (cache); + cache_malloced = 0; + } +#ifdef _POSIX_MAPPED_FILES + else + munmap (cache, cache_size); +#endif + cache = NULL; + + return -1; + } + + /* That worked. */ + return 0; +} + + +static int +internal_function +find_module_idx (const char *str, size_t *idxp) +{ + unsigned int idx; + unsigned int hval; + unsigned int hval2; + const struct gconvcache_header *header; + const char *strtab; + const struct hash_entry *hashtab; + unsigned int limit; + + header = (const struct gconvcache_header *) cache; + strtab = (char *) cache + header->string_offset; + hashtab = (struct hash_entry *) ((char *) cache + header->hash_offset); + + hval = hash_string (str); + idx = hval % header->hash_size; + hval2 = 1 + hval % (header->hash_size - 2); + + limit = cache_size - header->string_offset; + while (hashtab[idx].string_offset != 0) + if (hashtab[idx].string_offset < limit + && strcmp (str, strtab + hashtab[idx].string_offset) == 0) + { + *idxp = hashtab[idx].module_idx; + return 0; + } + else + if ((idx += hval2) >= header->hash_size) + idx -= header->hash_size; + + /* Nothing found. */ + return -1; +} + + +static int +internal_function +find_module (const char *directory, const char *filename, + struct __gconv_step *result) +{ + size_t dirlen = strlen (directory); + size_t fnamelen = strlen (filename) + 1; + char *fullname; + int status = __GCONV_OK; + + fullname = (char *) malloc (dirlen + fnamelen); + if (fullname == NULL) + return __GCONV_NOMEM; + + memcpy (__mempcpy (fullname, directory, dirlen), filename, fnamelen); + + result->__shlib_handle = __gconv_find_shlib (fullname); + if (result->__shlib_handle == NULL) + return __GCONV_NOCONV; + + result->__modname = fullname; + result->__fct = result->__shlib_handle->fct; + result->__init_fct = result->__shlib_handle->init_fct; + result->__end_fct = result->__shlib_handle->end_fct; + result->__counter = 1; + + result->__data = NULL; + if (result->__init_fct != NULL) + status = DL_CALL_FCT (result->__init_fct, (result)); + + return status; +} + + +int +internal_function +__gconv_lookup_cache (const char *toset, const char *fromset, + struct __gconv_step **handle, size_t *nsteps, int flags) +{ + const struct gconvcache_header *header; + const char *strtab; + size_t fromidx; + size_t toidx; + const struct module_entry *modtab; + const struct module_entry *from_module; + const struct module_entry *to_module; + struct __gconv_step *result; + + if (cache == NULL) + /* We have no cache available. */ + return __GCONV_NODB; + + header = (const struct gconvcache_header *) cache; + strtab = (char *) cache + header->string_offset; + modtab = (const struct module_entry *) ((char *) cache + + header->module_offset); + + if (find_module_idx (fromset, &fromidx) != 0 + || (header->module_offset + (fromidx + 1) * sizeof (struct module_entry) + > cache_size)) + return __GCONV_NOCONV; + from_module = &modtab[fromidx]; + + if (find_module_idx (toset, &toidx) != 0 + || (header->module_offset + (toidx + 1) * sizeof (struct module_entry) + > cache_size)) + return __GCONV_NOCONV; + to_module = &modtab[toidx]; + + /* Avoid copy-only transformations if the user requests. */ + if (__builtin_expect (flags & GCONV_AVOID_NOCONV, 0) && fromidx == toidx) + return __GCONV_NOCONV; + + /* If there are special conversions available examine them first. */ + if (__builtin_expect (from_module->extra_offset, 0) != 0) + { + /* Search through the list to see whether there is a module + matching the destination character set. */ + const struct extra_entry *extra; + + /* Note the -1. This is due to the offset added in iconvconfig. + See there for more explanations. */ + extra = (const struct extra_entry *) ((char *) cache + + header->otherconv_offset + + from_module->extra_offset - 1); + while (extra->module_cnt != 0 + && extra->module[extra->module_cnt - 1].outname_offset != toidx) + extra = (const struct extra_entry *) ((char *) extra + + sizeof (struct extra_entry) + + (extra->module_cnt + * sizeof (struct extra_entry_module))); + + if (extra->module_cnt != 0) + { + /* Use the extra module. First determine how many steps. */ + char *fromname; + int idx; + + *nsteps = extra->module_cnt; + *handle = result = + (struct __gconv_step *) malloc (extra->module_cnt + * sizeof (struct __gconv_step)); + if (result == NULL) + return __GCONV_NOMEM; + + fromname = (char *) strtab + from_module->canonname_offset; + idx = 0; + do + { + result[idx].__from_name = fromname; + fromname = result[idx].__to_name = + (char *) strtab + modtab[extra->module[idx].outname_offset].canonname_offset; + +#ifndef STATIC_GCONV + if (strtab[extra->module[idx].dir_offset] != '\0') + { + /* Load the module, return handle for it. */ + int res; + + res = find_module (strtab + extra->module[idx].dir_offset, + strtab + extra->module[idx].name_offset, + &result[idx]); + if (__builtin_expect (res, __GCONV_OK) != __GCONV_OK) + { + /* Something went wrong. */ + free (result); + goto try_internal; + } + } + else +#endif + /* It's a builtin transformation. */ + __gconv_get_builtin_trans (strtab + + extra->module[idx].name_offset, + &result[idx]); + + } + while (++idx < extra->module_cnt); + + return __GCONV_OK; + } + } + + try_internal: + /* See whether we can convert via the INTERNAL charset. */ + if (__builtin_expect (from_module->fromname_offset, 1) == 0 + || __builtin_expect (to_module->toname_offset, 1) == 0) + /* Not possible. Nothing we can do. */ + return __GCONV_NOCONV; + + /* Use the two modules. */ + result = (struct __gconv_step *) malloc (2 * sizeof (struct __gconv_step)); + if (result == NULL) + return __GCONV_NOMEM; + + *handle = result; + *nsteps = 2; + + /* Generate data structure for conversion to INTERNAL. */ + result[0].__from_name = (char *) strtab + from_module->canonname_offset; + result[0].__to_name = (char *) "INTERNAL"; + +#ifndef STATIC_GCONV + if (strtab[from_module->fromdir_offset] != '\0') + { + /* Load the module, return handle for it. */ + int res = find_module (strtab + from_module->fromdir_offset, + strtab + from_module->fromname_offset, + &result[0]); + if (__builtin_expect (res, __GCONV_OK) != __GCONV_OK) + { + /* Something went wrong. */ + free (result); + return res; + } + } + else +#endif + /* It's a builtin transformation. */ + __gconv_get_builtin_trans (strtab + from_module->fromname_offset, + &result[0]); + + /* Generate data structure for conversion from INTERNAL. */ + result[1].__from_name = (char *) "INTERNAL"; + result[1].__to_name = (char *) strtab + to_module->canonname_offset; + +#ifndef STATIC_GCONV + if (strtab[to_module->todir_offset] != '\0') + { + /* Load the module, return handle for it. */ + int res = find_module (strtab + to_module->todir_offset, + strtab + to_module->toname_offset, + &result[1]); + if (__builtin_expect (res, __GCONV_OK) != __GCONV_OK) + { + /* Something went wrong. */ + __gconv_release_step (&result[0]); + free (result); + return res; + } + } + else +#endif + /* It's a builtin transformation. */ + __gconv_get_builtin_trans (strtab + to_module->toname_offset, &result[1]); + + return __GCONV_OK; +} + + +/* Free all resources if necessary. */ +static void __attribute__ ((unused)) +free_mem (void) +{ + if (cache_malloced) + free (cache); +#ifdef _POSIX_MAPPED_FILES + else + munmap (cache, cache_size); +#endif +} + +text_set_element (__libc_subfreeres, free_mem); diff --git a/iconv/gconv_conf.c b/iconv/gconv_conf.c index 9cedbb8406..db5bb476fd 100644 --- a/iconv/gconv_conf.c +++ b/iconv/gconv_conf.c @@ -60,8 +60,8 @@ static const char gconv_module_ext[] = MODULE_EXT; /* We have a few builtin transformations. */ static struct gconv_module builtin_modules[] = { -#define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, Init, End, MinF, \ - MaxF, MinT, MaxT) \ +#define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, MinF, MaxF, \ + MinT, MaxT) \ { \ from_string: From, \ to_string: To, \ @@ -79,8 +79,8 @@ static struct gconv_module builtin_modules[] = static const char *builtin_aliases[] = { -#define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, Init, End, MinF, \ - MaxF, MinT, MaxT) +#define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, MinF, MaxF, \ + MinT, MaxT) #define BUILTIN_ALIAS(From, To) From " " To, #include "gconv_builtin.h" @@ -92,6 +92,10 @@ static const char *builtin_aliases[] = #endif +/* Value of the GCONV_PATH environment variable. */ +const char *__gconv_path_envvar; + + /* Test whether there is already a matching module known. */ static int internal_function @@ -423,7 +427,6 @@ __gconv_get_path (void) if (result == NULL) { /* Determine the complete path first. */ - const char *user_path; char *gconv_path; size_t gconv_path_len; char *elem; @@ -433,8 +436,7 @@ __gconv_get_path (void) char *cwd; size_t cwdlen; - user_path = getenv ("GCONV_PATH"); - if (user_path == NULL) + if (__gconv_path_envvar == NULL) { /* No user-defined path. Make a modifiable copy of the default path. */ @@ -446,11 +448,12 @@ __gconv_get_path (void) else { /* Append the default path to the user-defined path. */ - size_t user_len = strlen (user_path); + size_t user_len = strlen (__gconv_path_envvar); gconv_path_len = user_len + 1 + sizeof (default_gconv_path); gconv_path = alloca (gconv_path_len); - __mempcpy (__mempcpy (__mempcpy (gconv_path, user_path, user_len), + __mempcpy (__mempcpy (__mempcpy (gconv_path, __gconv_path_envvar, + user_len), ":", 1), default_gconv_path, sizeof (default_gconv_path)); cwd = __getcwd (NULL, 0); @@ -530,6 +533,15 @@ __gconv_read_conf (void) int save_errno = errno; size_t cnt; + /* First see whether we should use the cache. */ + if (__gconv_load_cache () == 0) + { + /* Yes, we are done. */ + __set_errno (save_errno); + return; + } + +#ifndef STATIC_GCONV /* Find out where we have to look. */ if (__gconv_path_elem == NULL) __gconv_get_path (); @@ -549,6 +561,7 @@ __gconv_read_conf (void) /* Read the next configuration file. */ read_conf_file (filename, elem, elem_len, &modules, &nmodules); } +#endif /* Add the internal modules. */ for (cnt = 0; cnt < sizeof (builtin_modules) / sizeof (builtin_modules[0]); diff --git a/iconv/gconv_db.c b/iconv/gconv_db.c index c661c3d472..92b520987b 100644 --- a/iconv/gconv_db.c +++ b/iconv/gconv_db.c @@ -178,8 +178,9 @@ free_derivation (void *p) /* Decrement the reference count for a single step in a steps array. */ -static inline void -release_step (struct __gconv_step *step) +void +internal_function +__gconv_release_step (struct __gconv_step *step) { if (--step->__counter == 0) { @@ -231,6 +232,9 @@ gen_steps (struct derivation_step *best, const char *toset, ? __strdup (current->result_set) : result[step_cnt + 1].__from_name); + result[step_cnt].__counter = 1; + result[step_cnt].__data = NULL; + #ifndef STATIC_GCONV if (current->code->module_name[0] == '/') { @@ -249,6 +253,22 @@ gen_steps (struct derivation_step *best, const char *toset, result[step_cnt].__fct = shlib_handle->fct; result[step_cnt].__init_fct = shlib_handle->init_fct; result[step_cnt].__end_fct = shlib_handle->end_fct; + + /* Call the init function. */ + if (result[step_cnt].__init_fct != NULL) + { + status = DL_CALL_FCT (result[step_cnt].__init_fct, + (&result[step_cnt])); + + if (__builtin_expect (status, __GCONV_OK) != __GCONV_OK) + { + failed = 1; + /* Make sure we unload this modules. */ + --step_cnt; + result[step_cnt].__end_fct = NULL; + break; + } + } } else #endif @@ -256,25 +276,6 @@ gen_steps (struct derivation_step *best, const char *toset, __gconv_get_builtin_trans (current->code->module_name, &result[step_cnt]); - result[step_cnt].__counter = 1; - - /* Call the init function. */ - result[step_cnt].__data = NULL; - if (result[step_cnt].__init_fct != NULL) - { - status = DL_CALL_FCT (result[step_cnt].__init_fct, - (&result[step_cnt])); - - if (__builtin_expect (status, __GCONV_OK) != __GCONV_OK) - { - failed = 1; - /* Make sure we unload this modules. */ - --step_cnt; - result[step_cnt].__end_fct = NULL; - break; - } - } - current = current->last; } @@ -282,7 +283,7 @@ gen_steps (struct derivation_step *best, const char *toset, { /* Something went wrong while initializing the modules. */ while (++step_cnt < *nsteps) - release_step (&result[step_cnt]); + __gconv_release_step (&result[step_cnt]); free (result); *nsteps = 0; *handle = NULL; @@ -328,7 +329,7 @@ increment_counter (struct __gconv_step *steps, size_t nsteps) (after unloading) and this time loading failed!? */ --step->__counter; while (++cnt < nsteps) - release_step (&steps[cnt]); + __gconv_release_step (&steps[cnt]); result = __GCONV_NOCONV; break; } @@ -647,8 +648,8 @@ __gconv_find_transform (const char *toset, const char *fromset, struct __gconv_step **handle, size_t *nsteps, int flags) { - const char *fromset_expand = NULL; - const char *toset_expand = NULL; + const char *fromset_expand; + const char *toset_expand; int result; /* Ensure that the configuration data is read. */ @@ -657,6 +658,14 @@ __gconv_find_transform (const char *toset, const char *fromset, /* Acquire the lock. */ __libc_lock_lock (lock); + result = __gconv_lookup_cache (toset, fromset, handle, nsteps, flags); + if (result != __GCONV_NODB) + { + /* We have a cache and could resolve the request, successful or not. */ + __libc_lock_unlock (lock); + return result; + } + /* If we don't have a module database return with an error. */ if (__gconv_modules_db == NULL) { @@ -665,11 +674,8 @@ __gconv_find_transform (const char *toset, const char *fromset, } /* See whether the names are aliases. */ - if (__gconv_alias_db != NULL) - { - fromset_expand = do_lookup_alias (fromset); - toset_expand = do_lookup_alias (toset); - } + fromset_expand = do_lookup_alias (fromset); + toset_expand = do_lookup_alias (toset); if (__builtin_expect (flags & GCONV_AVOID_NOCONV, 0) /* We are not supposed to create a pseudo transformation (means @@ -713,7 +719,7 @@ __gconv_close_transform (struct __gconv_step *steps, size_t nsteps) __libc_lock_lock (lock); while (nsteps-- > 0) - release_step (&steps[nsteps]); + __gconv_release_step (&steps[nsteps]); /* Release the lock. */ __libc_lock_unlock (lock); diff --git a/iconv/gconv_int.h b/iconv/gconv_int.h index 9641077bcc..c517bd9e97 100644 --- a/iconv/gconv_int.h +++ b/iconv/gconv_int.h @@ -1,4 +1,4 @@ -/* Copyright (C) 1997, 1998, 1999, 2000 Free Software Foundation, Inc. +/* Copyright (C) 1997, 1998, 1999, 2000, 2001 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. @@ -120,6 +120,9 @@ extern void *__gconv_alias_db; extern size_t __gconv_nmodules; extern struct gconv_module *__gconv_modules_db; +/* Value of the GCONV_PATH environment variable. */ +extern const char *__gconv_path_envvar; + /* The gconv functions expects the name to be in upper case and complete, including the trailing slashes if necessary. */ @@ -179,9 +182,22 @@ extern int __gconv_find_transform (const char *toset, const char *fromset, size_t *nsteps, int flags) internal_function; +/* Search for transformation in cache data. */ +extern int __gconv_lookup_cache (const char *toset, const char *fromset, + struct __gconv_step **handle, size_t *nsteps, + int flags) + internal_function; + +/* Free data associated with a step's structure. */ +extern void __gconv_release_step (struct __gconv_step *step) + internal_function; + /* Read all the configuration data and cache it. */ extern void __gconv_read_conf (void); +/* Try to read module cache file. */ +extern int __gconv_load_cache (void) internal_function; + /* Determine the directories we are looking in. */ extern void __gconv_get_path (void); diff --git a/iconv/gconv_simple.c b/iconv/gconv_simple.c index b0525e5c75..dbec90cb79 100644 --- a/iconv/gconv_simple.c +++ b/iconv/gconv_simple.c @@ -30,8 +30,8 @@ #include <sys/param.h> #define BUILTIN_ALIAS(s1, s2) /* nothing */ -#define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, Init, End, MinF, \ - MaxF, MinT, MaxT) \ +#define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, MinF, MaxF, \ + MinT, MaxT) \ extern int Fct (struct __gconv_step *, struct __gconv_step_data *, \ __const unsigned char **, __const unsigned char *, \ unsigned char **, size_t *, int, int); diff --git a/iconv/iconvconfig.c b/iconv/iconvconfig.c new file mode 100644 index 0000000000..05b3c9d9a7 --- /dev/null +++ b/iconv/iconvconfig.c @@ -0,0 +1,1165 @@ +/* Generate fastloading iconv module configuration files. + Copyright (C) 2000, 2001 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@redhat.com>, 2000. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +#include <argp.h> +#include <assert.h> +#include <error.h> +#include <errno.h> +#include <fcntl.h> +#include <libintl.h> +#include <locale.h> +#include <mcheck.h> +#include <search.h> +#include <stdint.h> +#include <stdio.h> +#include <stdio_ext.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/uio.h> + +#include "iconvconfig.h" + +/* Get libc version number. */ +#include "../version.h" + +#define PACKAGE _libc_intl_domainname + + +/* The hashing function we use. */ +#include "../intl/hash-string.h" + + +/* Types used. */ +struct module +{ + char *fromname; + struct Strent *fromname_strent; + char *filename; + struct Strent *filename_strent; + const char *directory; + struct Strent *directory_strent; + struct module *next; + int cost; + struct Strent *toname_strent; + char toname[0]; +}; + +struct alias +{ + char *fromname; + struct Strent *froment; + struct module *module; + struct Strent *toent; + char toname[0]; +}; + +struct name +{ + const char *name; + struct Strent *strent; + int module_idx; + uint32_t hashval; +}; + +struct name_info +{ + const char *canonical_name; + struct Strent *canonical_strent; + + struct module *from_internal; + struct module *to_internal; + + struct other_conv_list + { + int dest_idx; + struct other_conv + { + gidx_t module_idx; + struct module *module; + struct other_conv *next; + } other_conv; + struct other_conv_list *next; + } *other_conv_list; +}; + + +/* Name and version of program. */ +static void print_version (FILE *stream, struct argp_state *state); +void (*argp_program_version_hook) (FILE *, struct argp_state *) = print_version; + +#define OPT_VERBOSE 1000 + +/* Definitions of arguments for argp functions. */ +static const struct argp_option options[] = +{ + { "verbose", OPT_VERBOSE, NULL, 0, N_("print progress information") }, + { NULL, 0, NULL, 0, NULL } +}; + +/* Short description of program. */ +static const char doc[] = N_("\ +Create fastloading iconv module configuration file."); + +/* Strings for arguments in help texts. */ +static const char args_doc[] = N_("[DIR...]"); + +/* Prototype for option handler. */ +static error_t parse_opt (int key, char *arg, struct argp_state *state); + +/* Function to print some extra text in the help message. */ +static char *more_help (int key, const char *text, void *input); + +/* Data structure to communicate with argp functions. */ +static struct argp argp = +{ + options, parse_opt, args_doc, doc, NULL, more_help +}; + + +/* The function doing the actual work. */ +static int handle_dir (const char *dir); + +/* Add all known builtin conversions and aliases. */ +static void add_builtins (void); + +/* Create list of all aliases without circular aliases. */ +static void get_aliases (void); + +/* Create list of all modules. */ +static void get_modules (void); + +/* Get list of all the names and thereby indexing them. */ +static void generate_name_list (void); + +/* Collect information about all the names. */ +static void generate_name_info (void); + +/* Write the output file. */ +static int write_output (void); + + +/* Nonzero if verbose ouput is wanted. */ +static int verbose; + +/* Search tree of the modules we know. */ +static void *modules; + +/* Search tree of the aliases we know. */ +static void *aliases; + +/* Search tree for name to index mapping. */ +static void *names; + +/* Number of names we know about. */ +static int nnames; + +/* List of all aliases. */ +static struct alias **alias_list; +static size_t nalias_list; +static size_t nalias_list_max; + +/* List of all modules. */ +static struct module **module_list; +static size_t nmodule_list; +static size_t nmodule_list_max; + +/* Names and information about them. */ +static struct name_info *name_info; +static size_t nname_info; + +/* Number of translations not from or to INTERNAL. */ +static size_t nextra_modules; + + +/* Names and aliases for the builtin transformations. */ +static struct +{ + const char *from; + const char *to; +} builtin_alias[] = + { +#define BUILTIN_ALIAS(alias, real) \ + { .from = alias, .to = real }, +#define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, MinF, MaxF, \ + MinT, MaxT) +#include <gconv_builtin.h> + }; +#define nbuiltin_alias (sizeof (builtin_alias) / sizeof (builtin_alias[0])) + +static struct +{ + const char *from; + const char *to; + const char *module; + int cost; +} builtin_trans[] = + { +#define BUILTIN_ALIAS(alias, real) +#define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, MinF, MaxF, \ + MinT, MaxT) \ + { .from = From, .to = To, .module = Name, .cost = Cost }, +#include <gconv_builtin.h> + }; +#define nbuiltin_trans (sizeof (builtin_trans) / sizeof (builtin_trans[0])) + + +/* Filename extension for the modules. */ +#ifndef MODULE_EXT +# define MODULE_EXT ".so" +#endif +static const char gconv_module_ext[] = MODULE_EXT; + + +extern void *xmalloc (size_t n) __attribute__ ((__malloc__)); +extern void *xcalloc (size_t n, size_t m) __attribute__ ((__malloc__)); +extern void *xrealloc (void *p, size_t n); + + +/* C string table handling. */ +struct Strtab; +struct Strent; + +/* Create new C string table object in memory. */ +extern struct Strtab *strtabinit (void); + +/* Free resources allocated for C string table ST. */ +extern void strtabfree (struct Strtab *st); + +/* Add string STR (length LEN is != 0) to C string table ST. */ +extern struct Strent *strtabadd (struct Strtab *st, const char *str, + size_t len); + +/* Finalize string table ST and store size in *SIZE and return a pointer. */ +extern void *strtabfinalize (struct Strtab *st, size_t *size); + +/* Get offset in string table for string associated with SE. */ +extern size_t strtaboffset (struct Strent *se); + +/* String table we construct. */ +static struct Strtab *strtab; + + + +int +main (int argc, char *argv[]) +{ + int remaining; + int status = 0; + char *path; + char *tp; + + /* Enable memory use testing. */ + mcheck_pedantic (NULL); + + /* Set locale via LC_ALL. */ + setlocale (LC_ALL, ""); + + /* Set the text message domain. */ + textdomain (_libc_intl_domainname); + + /* Parse and process arguments. */ + argp_parse (&argp, argc, argv, 0, &remaining, NULL); + + /* Initialize the string table. */ + strtab = strtabinit (); + + /* Handle all directories mentioned. */ + while (remaining < argc) + status |= handle_dir (argv[remaining++]); + + /* In any case also handle the standard directory. */ + path = strdupa (GCONV_PATH); + tp = strtok (path, ":"); + while (tp != NULL) + { + status |= handle_dir (tp); + + tp = strtok (NULL, ":"); + } + + /* Add the builtin transformations and aliases without overwriting + anything. */ + add_builtins (); + + /* Store aliases in an array. */ + get_aliases (); + + /* Get list of all modules. */ + get_modules (); + + /* Generate list of all the names we know to handle in some way. */ + generate_name_list (); + + /* Now we know all the names we will handle, collect information + about them. */ + generate_name_info (); + + /* Write the output file. */ + status = write_output (); + + return status; +} + + +/* Handle program arguments. */ +static error_t +parse_opt (int key, char *arg, struct argp_state *state) +{ + switch (key) + { + case OPT_VERBOSE: + verbose = 1; + break; + default: + return ARGP_ERR_UNKNOWN; + } + return 0; +} + + +static char * +more_help (int key, const char *text, void *input) +{ + switch (key) + { + case ARGP_KEY_HELP_EXTRA: + /* We print some extra information. */ + return strdup (gettext ("\ +Report bugs using the `glibcbug' script to <bugs@gnu.org>.\n")); + default: + break; + } + return (char *) text; +} + + +/* Print the version information. */ +static void +print_version (FILE *stream, struct argp_state *state) +{ + fprintf (stream, "iconvconfig (GNU %s) %s\n", PACKAGE, VERSION); + fprintf (stream, gettext ("\ +Copyright (C) %s Free Software Foundation, Inc.\n\ +This is free software; see the source for copying conditions. There is NO\n\ +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\ +"), "2001"); + fprintf (stream, gettext ("Written by %s.\n"), "Ulrich Drepper"); +} + + +static int +alias_compare (const void *p1, const void *p2) +{ + const struct alias *a1 = (const struct alias *) p1; + const struct alias *a2 = (const struct alias *) p2; + + return strcmp (a1->fromname, a2->fromname); +} + + +static void +new_alias (const char *fromname, size_t fromlen, const char *toname, + size_t tolen) +{ + struct alias *newp; + void **inserted; + + newp = (struct alias *) xmalloc (sizeof (struct alias) + fromlen + tolen); + + newp->fromname = mempcpy (newp->toname, toname, tolen); + memcpy (newp->fromname, fromname, fromlen); + newp->module = NULL; + + inserted = (void **) tsearch (newp, &aliases, alias_compare); + if (inserted == NULL) + error (EXIT_FAILURE, errno, gettext ("while inserting in search tree")); + if (*inserted != newp) + /* Something went wrong, free this entry. */ + free (newp); + else + { + newp->froment = strtabadd (strtab, newp->fromname, fromlen); + newp->toent = strtabadd (strtab, newp->toname, tolen); + } +} + + +/* Add new alias. */ +static void +add_alias (char *rp) +{ + /* We now expect two more string. The strings are normalized + (converted to UPPER case) and strored in the alias database. */ + char *from; + char *to; + char *wp; + + while (isspace (*rp)) + ++rp; + from = wp = rp; + while (*rp != '\0' && !isspace (*rp)) + *wp++ = toupper (*rp++); + if (*rp == '\0') + /* There is no `to' string on the line. Ignore it. */ + return; + *wp++ = '\0'; + to = ++rp; + while (isspace (*rp)) + ++rp; + while (*rp != '\0' && !isspace (*rp)) + *wp++ = toupper (*rp++); + if (to == wp) + /* No `to' string, ignore the line. */ + return; + *wp++ = '\0'; + + new_alias (from, to - from, to, wp - to); +} + + +static void +append_alias (const void *nodep, VISIT value, int level) +{ + if (value != leaf && value != postorder) + return; + + if (nalias_list_max == nalias_list) + { + nalias_list_max += 50; + alias_list = (struct alias **) xrealloc (alias_list, + (nalias_list_max + * sizeof (struct alias *))); + } + + alias_list[nalias_list++] = *(struct alias **) nodep; +} + + +static void +get_aliases (void) +{ + twalk (aliases, append_alias); +} + + +static int +module_compare (const void *p1, const void *p2) +{ + const struct module *m1 = (const struct module *) p1; + const struct module *m2 = (const struct module *) p2; + int result; + + result = strcmp (m1->fromname, m2->fromname); + if (result == 0) + result = strcmp (m1->toname, m2->toname); + + return result; +} + + +/* Create new module record. */ +static void +new_module (const char *fromname, size_t fromlen, const char *toname, + size_t tolen, const char *directory, + const char *filename, size_t filelen, int cost, size_t need_ext) +{ + struct module *new_module; + size_t dirlen = strlen (directory) + 1; + char *tmp; + void **inserted; + + new_module = (struct module *) xmalloc (sizeof (struct module) + + fromlen + tolen + filelen + + need_ext); + + new_module->fromname = mempcpy (new_module->toname, toname, tolen); + + new_module->filename = mempcpy (new_module->fromname, fromname, fromlen); + + new_module->cost = cost; + new_module->next = NULL; + + tmp = mempcpy (new_module->filename, filename, filelen); + if (need_ext) + { + memcpy (tmp - 1, gconv_module_ext, sizeof (gconv_module_ext)); + filelen += sizeof (gconv_module_ext) - 1; + } + new_module->directory = directory; + + /* Now insert the new module data structure in our search tree. */ + inserted = (void **) tsearch (new_module, &modules, module_compare); + if (inserted == NULL) + error (EXIT_FAILURE, errno, "while inserting in search tree"); + if (*inserted != new_module) + free (new_module); + else + { + new_module->fromname_strent = strtabadd (strtab, new_module->fromname, + fromlen); + new_module->toname_strent = strtabadd (strtab, new_module->toname, + tolen); + new_module->filename_strent = strtabadd (strtab, new_module->filename, + filelen); + new_module->directory_strent = strtabadd (strtab, directory, dirlen); + } +} + + +/* Add new module. */ +static void +internal_function +add_module (char *rp, const char *directory) +{ + /* We expect now + 1. `from' name + 2. `to' name + 3. filename of the module + 4. an optional cost value + */ + char *from; + char *to; + char *module; + char *wp; + int need_ext; + int cost; + + while (isspace (*rp)) + ++rp; + from = rp; + while (*rp != '\0' && !isspace (*rp)) + { + *rp = toupper (*rp); + ++rp; + } + if (*rp == '\0') + return; + *rp++ = '\0'; + to = wp = rp; + while (isspace (*rp)) + ++rp; + while (*rp != '\0' && !isspace (*rp)) + *wp++ = toupper (*rp++); + if (*rp == '\0') + return; + *wp++ = '\0'; + do + ++rp; + while (isspace (*rp)); + module = wp; + while (*rp != '\0' && !isspace (*rp)) + *wp++ = *rp++; + if (*rp == '\0') + { + /* There is no cost, use one by default. */ + *wp++ = '\0'; + cost = 1; + } + else + { + /* There might be a cost value. */ + char *endp; + + *wp++ = '\0'; + cost = strtol (rp, &endp, 10); + if (rp == endp || cost < 1) + /* No useful information. */ + cost = 1; + } + + if (module[0] == '\0') + /* No module name given. */ + return; + + /* See whether we must add the ending. */ + need_ext = 0; + if (wp - module < sizeof (gconv_module_ext) + || memcmp (wp - sizeof (gconv_module_ext), gconv_module_ext, + sizeof (gconv_module_ext)) != 0) + /* We must add the module extension. */ + need_ext = sizeof (gconv_module_ext) - 1; + + assert (strlen (from) + 1 == to - from); + assert (strlen (to) + 1 == module - to); + assert (strlen (module) + 1 == wp - module); + + new_module (from, to - from, to, module - to, directory, module, wp - module, + cost, need_ext); +} + + +/* Read the config file and add the data for this directory to that. */ +static int +handle_dir (const char *dir) +{ + char *infile; + FILE *fp; + char *line = NULL; + size_t linelen = 0; + size_t dirlen = strlen (dir); + + if (dir[dirlen - 1] != '/') + { + char *newp = (char *) xmalloc (dirlen + 2); + dir = memcpy (newp, dir, dirlen); + newp[dirlen++] = '/'; + newp[dirlen] = '\0'; + } + + infile = (char *) alloca (dirlen + sizeof "gconv-modules"); + strcpy (mempcpy (infile, dir, dirlen), "gconv-modules"); + + fp = fopen (infile, "r"); + if (fp == NULL) + { + error (0, errno, "cannot open `%s'", infile); + return 1; + } + + /* No threads present. */ + __fsetlocking (fp, FSETLOCKING_BYCALLER); + + while (!feof_unlocked (fp)) + { + char *rp, *endp, *word; + ssize_t n = __getdelim (&line, &linelen, '\n', fp); + + if (n < 0) + /* An error occurred. */ + break; + + rp = line; + /* Terminate the line (excluding comments or newline) with a NUL + byte to simplify the following code. */ + endp = strchr (rp, '#'); + if (endp != NULL) + *endp = '\0'; + else + if (rp[n - 1] == '\n') + rp[n - 1] = '\0'; + + while (isspace (*rp)) + ++rp; + + /* If this is an empty line go on with the next one. */ + if (rp == endp) + continue; + + word = rp; + while (*rp != '\0' && !isspace (*rp)) + ++rp; + + if (rp - word == sizeof ("alias") - 1 + && memcmp (word, "alias", sizeof ("alias") - 1) == 0) + add_alias (rp); + else if (rp - word == sizeof ("module") - 1 + && memcmp (word, "module", sizeof ("module") - 1) == 0) + add_module (rp, dir); + /* else */ + /* Otherwise ignore the line. */ + } + + free (line); + + fclose (fp); + + return 0; +} + + +static void +append_module (const void *nodep, VISIT value, int level) +{ + struct module *mo; + + if (value != leaf && value != postorder) + return; + + mo = *(struct module **) nodep; + + if (nmodule_list > 0 + && strcmp (module_list[nmodule_list - 1]->fromname, mo->fromname) == 0) + { + /* Same name. */ + mo->next = module_list[nmodule_list - 1]; + module_list[nmodule_list - 1] = mo; + + return; + } + + if (nmodule_list_max == nmodule_list) + { + nmodule_list_max += 50; + module_list = (struct module **) xrealloc (module_list, + (nmodule_list_max + * sizeof (struct module *))); + } + + module_list[nmodule_list++] = mo; +} + + +static void +get_modules (void) +{ + twalk (modules, append_module); +} + + +static void +add_builtins (void) +{ + size_t cnt; + + /* Add all aliases. */ + for (cnt = 0; cnt < nbuiltin_alias; ++cnt) + new_alias (builtin_alias[cnt].from, + strlen (builtin_alias[cnt].from) + 1, + builtin_alias[cnt].to, + strlen (builtin_alias[cnt].to) + 1); + + /* add the builtin transformations. */ + for (cnt = 0; cnt < nbuiltin_trans; ++cnt) + new_module (builtin_trans[cnt].from, + strlen (builtin_trans[cnt].from) + 1, + builtin_trans[cnt].to, + strlen (builtin_trans[cnt].to) + 1, + "", builtin_trans[cnt].module, + strlen (builtin_trans[cnt].module) + 1, + builtin_trans[cnt].cost, 0); +} + + +static int +name_compare (const void *p1, const void *p2) +{ + const struct name *n1 = (const struct name *) p1; + const struct name *n2 = (const struct name *) p2; + + return strcmp (n1->name, n2->name); +} + + +static struct name * +new_name (const char *str, struct Strent *strent) +{ + struct name *newp = (struct name *) xmalloc (sizeof (struct name)); + + newp->name = str; + newp->strent = strent; + newp->module_idx = -1; + newp->hashval = hash_string (str); + + ++nnames; + + return newp; +} + + +static void +generate_name_list (void) +{ + size_t i; + + for (i = 0; i < nmodule_list; ++i) + { + struct module *runp; + + if (strcmp (module_list[i]->fromname, "INTERNAL") != 0) + tsearch (new_name (module_list[i]->fromname, + module_list[i]->fromname_strent), + &names, name_compare); + + for (runp = module_list[i]; runp != NULL; runp = runp->next) + if (strcmp (runp->toname, "INTERNAL") != 0) + tsearch (new_name (runp->toname, runp->toname_strent), + &names, name_compare); + } +} + + +static int +name_to_module_idx (const char *name, int add) +{ + struct name **res; + struct name fake_name = { .name = name }; + int idx; + + res = (struct name **) tfind (&fake_name, &names, name_compare); + if (res == NULL) + abort (); + + idx = (*res)->module_idx; + if (idx == -1 && add) + /* No module index assigned yet. */ + idx = (*res)->module_idx = nname_info++; + + return idx; +} + + +static void +generate_name_info (void) +{ + size_t i; + + name_info = (struct name_info *) xcalloc (nmodule_list, + sizeof (struct name_info)); + + for (i = 0; i < nmodule_list; ++i) + { + struct module *runp; + + for (runp = module_list[i]; runp != NULL; runp = runp->next) + if (strcmp (runp->fromname, "INTERNAL") == 0) + { + int idx = name_to_module_idx (runp->toname, 1); + name_info[idx].from_internal = runp; + assert (name_info[idx].canonical_name == NULL + || strcmp (name_info[idx].canonical_name, + runp->toname) == 0); + name_info[idx].canonical_name = runp->toname; + name_info[idx].canonical_strent = runp->toname_strent; + } + else if (strcmp (runp->toname, "INTERNAL") == 0) + { + int idx = name_to_module_idx (runp->fromname, 1); + name_info[idx].to_internal = runp; + assert (name_info[idx].canonical_name == NULL + || strcmp (name_info[idx].canonical_name, + runp->fromname) == 0); + name_info[idx].canonical_name = runp->fromname; + name_info[idx].canonical_strent = runp->fromname_strent; + } + else + { + /* This is a transformation not to or from the INTERNAL + encoding. */ + int from_idx = name_to_module_idx (runp->fromname, 1); + int to_idx = name_to_module_idx (runp->toname, 1); + struct other_conv_list *newp; + + newp = (struct other_conv_list *) + xmalloc (sizeof (struct other_conv_list)); + newp->other_conv.module_idx = to_idx; + newp->other_conv.module = runp; + newp->other_conv.next = NULL; /* XXX Allow multiple module sequence */ + newp->dest_idx = to_idx; + newp->next = name_info[from_idx].other_conv_list; + name_info[from_idx].other_conv_list = newp; + assert (name_info[from_idx].canonical_name == NULL + || strcmp (name_info[from_idx].canonical_name, + runp->fromname) == 0); + name_info[from_idx].canonical_name = runp->fromname; + name_info[from_idx].canonical_strent = runp->fromname_strent; + + ++nextra_modules; + } + } + + /* Now add the module index information for all the aliases. */ + for (i = 0; i < nalias_list; ++i) + { + struct name fake_name = { .name = alias_list[i]->toname }; + struct name **tonamep; + + tonamep = (struct name **) tfind (&fake_name, &names, name_compare); + if (tonamep != NULL) + { + struct name *newp = new_name (alias_list[i]->fromname, + alias_list[i]->froment); + newp->module_idx = (*tonamep)->module_idx; + tsearch (newp, &names, name_compare); + } + } +} + + +static int +is_prime (unsigned long int candidate) +{ + /* No even number and none less than 10 will be passed here. */ + unsigned long int divn = 3; + unsigned long int sq = divn * divn; + + while (sq < candidate && candidate % divn != 0) + { + ++divn; + sq += 4 * divn; + ++divn; + } + + return candidate % divn != 0; +} + + +static uint32_t +next_prime (uint32_t seed) +{ + /* Make it definitely odd. */ + seed |= 1; + + while (!is_prime (seed)) + seed += 2; + + return seed; +} + + +/* Format of the output file. + + Offset Length Description + 0000 4 Magic header bytes + 0004 4 Offset of string table (stoff) + 0008 4 Offset of name hashing table (hoff) + 000C 4 Hashing table size (hsize) + 0010 4 Offset of module table (moff) + 0014 4 Offset of other conversion module table (ooff) + + stoff ??? String table + + hoff 8*hsize Array of tuples + string table offset + module index + + moff ??? Array of tuples + canonical name offset + from-internal module dir name offset + from-internal module name off + to-internal module dir name offset + to-internal module name offset + offset into other conversion table + + ooff ??? Sequence of words + name offset + one or more of tuple + canonical name offset + module dir name offset + module name offset + (following last entry canocical name offset is 0) +*/ +static int +write_output (void) +{ + int fd; + char *string_table; + size_t string_table_size; + struct gconvcache_header header; + struct hash_entry *hash_table; + size_t hash_size; + struct module_entry *module_table; + char *extra_table; + char *cur_extra_table; + size_t n; + int idx; + struct iovec iov[6]; + static const gidx_t null_word; + size_t total; + + /* Function to insert the names. */ + void name_insert (const void *nodep, VISIT value, int level) + { + struct name *name; + unsigned int idx; + unsigned int hval2; + + if (value != leaf && value != postorder) + return; + + name = *(struct name **) nodep; + idx = name->hashval % hash_size; + hval2 = 1 + name->hashval % (hash_size - 2); + + while (hash_table[idx].string_offset != 0) + if ((idx += hval2) >= hash_size) + idx -= hash_size; + + hash_table[idx].string_offset = strtaboffset (name->strent); + + assert (name->module_idx != -1); + hash_table[idx].module_idx = name->module_idx; + } + + /* Open the output file. */ + fd = open (GCONV_MODULES_CACHE, O_TRUNC | O_CREAT | O_RDWR, 0644); + if (fd == -1) + return 1; + + /* Create the string table. */ + string_table = strtabfinalize (strtab, &string_table_size); + + /* Create the hashing table. We know how many strings we have. + Creating a perfect hash table is not reasonable here. Therefore + we use open hashing and a table size which is the next prime 40% + larger than the number of strings. */ + hash_size = next_prime (nnames * 1.4); + hash_table = (struct hash_entry *) xcalloc (hash_size, + sizeof (struct hash_entry)); + /* Fill the hash table. */ + twalk (names, name_insert); + + /* Create the section for the module list. */ + module_table = (struct module_entry *) xcalloc (sizeof (struct module_entry), + nname_info); + + /* Allocate memory for the non-INTERNAL conversions. The allocated + memory can be more than is actually needed. */ + extra_table = (char *) xcalloc (sizeof (struct extra_entry) + + sizeof (gidx_t) + + sizeof (struct extra_entry_module), + nextra_modules); + cur_extra_table = extra_table; + + /* Fill in the module information. */ + for (n = 0; n < nname_info; ++n) + { + module_table[n].canonname_offset = + strtaboffset (name_info[n].canonical_strent); + + if (name_info[n].from_internal == NULL) + { + module_table[n].fromdir_offset = 0; + module_table[n].fromname_offset = 0; + } + else + { + module_table[n].fromdir_offset = + strtaboffset (name_info[n].from_internal->directory_strent); + module_table[n].fromname_offset = + strtaboffset (name_info[n].from_internal->filename_strent); + } + + if (name_info[n].to_internal == NULL) + { + module_table[n].todir_offset = 0; + module_table[n].toname_offset = 0; + } + else + { + module_table[n].todir_offset = + strtaboffset (name_info[n].to_internal->directory_strent); + module_table[n].toname_offset = + strtaboffset (name_info[n].to_internal->filename_strent); + } + + if (name_info[n].other_conv_list != NULL) + { + struct other_conv_list *other = name_info[n].other_conv_list; + + /* Store the reference. We add 1 to distinguish the entry + at offset zero from the case where no extra modules are + available. The file reader has to account for the + offset. */ + module_table[n].extra_offset = 1 + cur_extra_table - extra_table; + + do + { + struct other_conv *runp; + struct extra_entry *extra; + + /* Allocate new entry. */ + extra = (struct extra_entry *) cur_extra_table; + cur_extra_table += sizeof (struct extra_entry); + extra->module_cnt = 0; + + runp = &other->other_conv; + do + { + cur_extra_table += sizeof (struct extra_entry_module); + extra->module[extra->module_cnt].outname_offset = + runp->next == NULL + ? other->dest_idx : runp->next->module_idx; + extra->module[extra->module_cnt].dir_offset = + strtaboffset (runp->module->directory_strent); + extra->module[extra->module_cnt].name_offset = + strtaboffset (runp->module->filename_strent); + ++extra->module_cnt; + + runp = runp->next; + } + while (runp != NULL); + + other = other->next; + } + while (other != NULL); + + /* Final module_cnt is zero. */ + *((gidx_t *) cur_extra_table) = 0; + cur_extra_table += sizeof (gidx_t); + } + } + + header.magic = GCONVCACHE_MAGIC; + + iov[0].iov_base = &header; + iov[0].iov_len = sizeof (struct gconvcache_header); + total = iov[0].iov_len; + + header.string_offset = total; + iov[1].iov_base = string_table; + iov[1].iov_len = string_table_size; + total += iov[1].iov_len; + + idx = 2; + if ((string_table_size & (sizeof (gidx_t) - 1)) != 0) + { + iov[2].iov_base = (void *) &null_word; + iov[2].iov_len = (sizeof (gidx_t) + - (string_table_size & (sizeof (gidx_t) - 1))); + total += iov[2].iov_len; + ++idx; + } + + header.hash_offset = total; + header.hash_size = hash_size; + iov[idx].iov_base = hash_table; + iov[idx].iov_len = hash_size * sizeof (struct hash_entry); + total += iov[idx].iov_len; + ++idx; + + header.module_offset = total; + iov[idx].iov_base = module_table; + iov[idx].iov_len = nname_info * sizeof (struct module_entry); + total += iov[idx].iov_len; + ++idx; + + assert (cur_extra_table - extra_table + <= ((sizeof (struct extra_entry) + sizeof (gidx_t) + + sizeof (struct extra_entry_module)) + * nextra_modules)); + header.otherconv_offset = total; + iov[idx].iov_base = extra_table; + iov[idx].iov_len = cur_extra_table - extra_table; + total += iov[idx].iov_len; + ++idx; + + if (TEMP_FAILURE_RETRY (writev (fd, iov, idx)) != total) + { + int save_errno = errno; + close (fd); + unlink (GCONV_MODULES_CACHE); + error (EXIT_FAILURE, save_errno, gettext ("cannot write output file")); + } + + close (fd); + + return 0; +} diff --git a/iconv/iconvconfig.h b/iconv/iconvconfig.h new file mode 100644 index 0000000000..c52ceaefe8 --- /dev/null +++ b/iconv/iconvconfig.h @@ -0,0 +1,67 @@ +/* Copyright (C) 2000, 2001 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@redhat.com>, 2000. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +#include <stdint.h> + + +typedef uint16_t gidx_t; + + +struct gconvcache_header +{ + uint32_t magic; + gidx_t string_offset; + gidx_t hash_offset; + gidx_t hash_size; + gidx_t module_offset; + gidx_t otherconv_offset; +}; + +struct hash_entry +{ + gidx_t string_offset; + gidx_t module_idx; +}; + +struct module_entry +{ + gidx_t canonname_offset; + gidx_t fromdir_offset; + gidx_t fromname_offset; + gidx_t todir_offset; + gidx_t toname_offset; + gidx_t extra_offset; +}; + +struct extra_entry +{ + gidx_t module_cnt; + struct extra_entry_module + { + gidx_t outname_offset; + gidx_t dir_offset; + gidx_t name_offset; + } module[0]; +}; + + +#define GCONVCACHE_MAGIC 0x20010324 + + +#define GCONV_MODULES_CACHE GCONV_DIR "/gconv-modules.cache" diff --git a/iconv/strtab.c b/iconv/strtab.c new file mode 100644 index 0000000000..5b3e3283e9 --- /dev/null +++ b/iconv/strtab.c @@ -0,0 +1,294 @@ +/* C string table handling. + Copyright (C) 2000, 2001 Free Software Foundation, Inc. + Written by Ulrich Drepper <drepper@redhat.com>, 2000. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <assert.h> +#include <inttypes.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/param.h> + + +struct Strent +{ + const char *string; + size_t len; + struct Strent *next; + struct Strent *left; + struct Strent *right; + size_t offset; + char reverse[0]; +}; + + +struct memoryblock +{ + struct memoryblock *next; + char memory[0]; +}; + + +struct Strtab +{ + struct Strent *root; + struct memoryblock *memory; + char *backp; + size_t left; + size_t total; + + struct Strent null; +}; + + +/* Cache for the pagesize. We correct this value a bit so that `malloc' + is not allocating more than a page. */ +static size_t ps; + + +extern void *xmalloc (size_t n) __attribute__ ((__malloc__)); + + +struct Strtab * +strtabinit (void) +{ + if (ps == 0) + { + ps = sysconf (_SC_PAGESIZE) - 2 * sizeof (void); + assert (sizeof (struct memoryblock) < ps); + } + + return (struct Strtab *) calloc (1, sizeof (struct Strtab)); +} + + +static void +morememory (struct Strtab *st, size_t len) +{ + struct memoryblock *newmem; + + if (len < ps) + len = ps; + newmem = (struct memoryblock *) malloc (len); + if (newmem == NULL) + abort (); + + newmem->next = st->memory; + st->memory = newmem; + st->backp = newmem->memory; + st->left = len; +} + + +void +strtabfree (struct Strtab *st) +{ + struct memoryblock *mb = st->memory; + + while (mb != NULL) + { + void *old = mb; + mb = mb->next; + free (old); + } + + free (st); +} + + +static struct Strent * +newstring (struct Strtab *st, const char *str, size_t len) +{ + struct Strent *newstr; + size_t align; + int i; + + /* Compute the string length if the caller doesn't know it. */ + if (len == 0) + len = strlen (str) + 1; + + /* Compute the amount of padding needed to make the structure aligned. */ + align = ((__alignof__ (struct Strent) + - (((uintptr_t) st->backp) + & (__alignof__ (struct Strent) - 1))) + & (__alignof__ (struct Strent) - 1)); + + /* Make sure there is enough room in the memory block. */ + if (st->left < align + sizeof (struct Strent) + len) + { + morememory (st, sizeof (struct Strent) + len); + align = 0; + } + + /* Create the reserved string. */ + newstr = (struct Strent *) (st->backp + align); + newstr->string = str; + newstr->len = len; + newstr->next = NULL; + newstr->left = NULL; + newstr->right = NULL; + newstr->offset = 0; + for (i = len - 2; i >= 0; --i) + newstr->reverse[i] = str[len - 2 - i]; + newstr->reverse[len - 1] = '\0'; + st->backp += align + sizeof (struct Strent) + len; + st->left -= align + sizeof (struct Strent) + len; + + return newstr; +} + + +/* XXX This function should definitely be rewritten to use a balancing + tree algorith (AVL, red-black trees). For now a simple, correct + implementation is enough. */ +static struct Strent ** +searchstring (struct Strent **sep, struct Strent *newstr) +{ + int cmpres; + + /* More strings? */ + if (*sep == NULL) + { + *sep = newstr; + return sep; + } + + /* Compare the strings. */ + cmpres = memcmp ((*sep)->reverse, newstr->reverse, + MIN ((*sep)->len, newstr->len)); + if (cmpres == 0) + /* We found a matching string. */ + return sep; + else if (cmpres > 0) + return searchstring (&(*sep)->left, newstr); + else + return searchstring (&(*sep)->right, newstr); +} + + +/* Add new string. The actual string is assumed to be permanent. */ +struct Strent * +strtabadd (struct Strtab *st, const char *str, size_t len) +{ + struct Strent *newstr; + struct Strent **sep; + + /* Allocate memory for the new string and its associated information. */ + newstr = newstring (st, str, len); + + /* Search in the array for the place to insert the string. If there + is no string with matching prefix and no string with matching + leading substring, create a new entry. */ + sep = searchstring (&st->root, newstr); + if (*sep != newstr) + { + /* This is not the same entry. This means we have a prefix match. */ + if ((*sep)->len > newstr->len) + { + /* We have a new substring. This means we don't need the reverse + string of this entry anymore. */ + st->backp -= newstr->len; + st->left += newstr->len; + + newstr->next = (*sep)->next; + (*sep)->next = newstr; + } + else if ((*sep)->len != newstr->len) + { + /* When we get here it means that the string we are about to + add has a common prefix with a string we already have but + it is longer. In this case we have to put it first. */ + newstr->next = *sep; + *sep = newstr; + + st->total += newstr->len - (*sep)->len; + } + else + { + /* We have an exact match. Free the memory we allocated. */ + st->left += st->backp - (char *) newstr; + st->backp = (char *) newstr; + + newstr = *sep; + } + } + else + st->total += newstr->len; + + return newstr; +} + + +static void +copystrings (struct Strent *nodep, char **freep, size_t *offsetp) +{ + struct Strent *subs; + + if (nodep->left != NULL) + copystrings (nodep->left, freep, offsetp); + + /* Process the current node. */ + nodep->offset = *offsetp; + *freep = (char *) mempcpy (*freep, nodep->string, nodep->len); + *offsetp += nodep->len; + + for (subs = nodep->next; subs != NULL; subs = subs->next) + { + assert (subs->len < nodep->len); + subs->offset = nodep->offset + nodep->len - subs->len; + } + + if (nodep->right != NULL) + copystrings (nodep->right, freep, offsetp); +} + + +void * +strtabfinalize (struct Strtab *st, size_t *size) +{ + size_t copylen; + char *endp; + char *retval; + + /* Fill in the information. */ + endp = retval = (char *) xmalloc (st->total + 1); + + /* Always put an empty string at the beginning so that a zero offset + can mean error. */ + *endp++ = '\0'; + + /* Now run through the tree and add all the string while also updating + the offset members of the elfstrent records. */ + copylen = 1; + copystrings (st->root, &endp, ©len); + assert (copylen == st->total + 1); + assert (endp = retval + st->total + 1); + *size = copylen; + + return retval; +} + + +size_t +strtaboffset (struct Strent *se) +{ + return se->offset; +} |