diff options
Diffstat (limited to 'REORG.TODO/iconv')
35 files changed, 10175 insertions, 0 deletions
diff --git a/REORG.TODO/iconv/Makefile b/REORG.TODO/iconv/Makefile new file mode 100644 index 0000000000..b2fead0479 --- /dev/null +++ b/REORG.TODO/iconv/Makefile @@ -0,0 +1,79 @@ +# Copyright (C) 1997-2017 Free Software Foundation, Inc. +# This file is part of the GNU C Library. + +# The GNU C Library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. + +# The GNU C Library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with the GNU C Library; if not, see +# <http://www.gnu.org/licenses/>. + +# +# Makefile for iconv. +# +subdir := iconv + +include ../Makeconfig + +headers = iconv.h gconv.h +routines = iconv_open iconv iconv_close \ + gconv_open gconv gconv_close gconv_db gconv_conf \ + gconv_builtin gconv_simple gconv_trans gconv_cache +routines += gconv_dl + +vpath %.c ../locale/programs ../intl + +iconv_prog-modules = iconv_charmap charmap charmap-dir linereader \ + dummy-repertoire simple-hash xstrdup xmalloc +iconvconfig-modules = strtab xmalloc hash-string +extra-objs = $(iconv_prog-modules:=.o) $(iconvconfig-modules:=.o) +CFLAGS-iconv_prog.c = -I../locale/programs +CFLAGS-iconv_charmap.c = -I../locale/programs +CFLAGS-dummy-repertoire.c = -I../locale/programs +CFLAGS-charmap.c = -DCHARMAP_PATH='"$(i18ndir)/charmaps"' \ + -DDEFAULT_CHARMAP=null_pointer -DNEED_NULL_POINTER +CFLAGS-linereader.c = -DNO_TRANSLITERATION +CFLAGS-simple-hash.c = -I../locale + +tests = tst-iconv1 tst-iconv2 tst-iconv3 tst-iconv4 tst-iconv5 tst-iconv6 + +others = iconv_prog iconvconfig +install-others-programs = $(inst_bindir)/iconv +install-sbin = iconvconfig + +CFLAGS-gconv_cache.c += -DGCONV_DIR='"$(gconvdir)"' +CFLAGS-gconv_conf.c = -DGCONV_PATH='"$(gconvdir)"' +CFLAGS-iconvconfig.c = -DGCONV_PATH='"$(gconvdir)"' -DGCONV_DIR='"$(gconvdir)"' + +# Set libof-* for each routine. +cpp-srcs-left := $(iconv_prog-modules) $(iconvconfig-modules) +lib := iconvprogs +include $(patsubst %,$(..)libof-iterator.mk,$(cpp-srcs-left)) + +ifeq ($(run-built-tests),yes) +xtests-special += $(objpfx)test-iconvconfig.out +endif + +include ../Rules + +$(inst_bindir)/iconv: $(objpfx)iconv_prog $(+force) + $(do-install-program) + +$(objpfx)iconv_prog: $(iconv_prog-modules:%=$(objpfx)%.o) +$(objpfx)iconvconfig: $(iconvconfig-modules:%=$(objpfx)%.o) + +$(objpfx)test-iconvconfig.out: /dev/null $(objpfx)iconvconfig + (set -e; \ + tmp=$(objpfx)gconv-modules.cache.$$$$; \ + rm -f $$tmp; \ + $(make-test-out) --output=$$tmp --nostdlib $(inst_gconvdir); \ + cmp $$tmp $(inst_gconvdir)/gconv-modules.cache; \ + rm -f $$tmp) > $@; \ + $(evaluate-test) diff --git a/REORG.TODO/iconv/Versions b/REORG.TODO/iconv/Versions new file mode 100644 index 0000000000..60ab10a277 --- /dev/null +++ b/REORG.TODO/iconv/Versions @@ -0,0 +1,13 @@ +libc { + GLIBC_2.1 { + # i* + iconv; iconv_open; iconv_close; + } + GLIBC_PRIVATE { + # functions shared with iconv program + __gconv_get_alias_db; __gconv_get_cache; __gconv_get_modules_db; + + # function used by the gconv modules + __gconv_transliterate; + } +} diff --git a/REORG.TODO/iconv/dummy-repertoire.c b/REORG.TODO/iconv/dummy-repertoire.c new file mode 100644 index 0000000000..a4db977951 --- /dev/null +++ b/REORG.TODO/iconv/dummy-repertoire.c @@ -0,0 +1,37 @@ +/* Copyright (C) 2001-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@redhat.com>, 2001. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#include <stdint.h> + +/* For iconv we don't have to handle repertoire maps. Provide dummy + definitions to allow the use of linereader.c unchanged. */ +#include <repertoire.h> + + +uint32_t +repertoire_find_value (const struct repertoire_t *repertoire, const char *name, + size_t len) +{ + return ILLEGAL_CHAR_VALUE; +} + + +const char * +repertoire_find_symbol (const struct repertoire_t *repertoire, uint32_t ucs) +{ + return NULL; +} diff --git a/REORG.TODO/iconv/gconv.c b/REORG.TODO/iconv/gconv.c new file mode 100644 index 0000000000..0aab0546b9 --- /dev/null +++ b/REORG.TODO/iconv/gconv.c @@ -0,0 +1,91 @@ +/* Convert characters in input buffer using conversion descriptor to + output buffer. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <assert.h> +#include <dlfcn.h> +#include <stddef.h> +#include <sys/param.h> + +#include <gconv_int.h> +#include <sysdep.h> + + +int +internal_function +__gconv (__gconv_t cd, const unsigned char **inbuf, + const unsigned char *inbufend, unsigned char **outbuf, + unsigned char *outbufend, size_t *irreversible) +{ + size_t last_step; + int result; + + if (cd == (__gconv_t) -1L) + return __GCONV_ILLEGAL_DESCRIPTOR; + + last_step = cd->__nsteps - 1; + + assert (irreversible != NULL); + *irreversible = 0; + + cd->__data[last_step].__outbuf = outbuf != NULL ? *outbuf : NULL; + cd->__data[last_step].__outbufend = outbufend; + + __gconv_fct fct = cd->__steps->__fct; +#ifdef PTR_DEMANGLE + if (cd->__steps->__shlib_handle != NULL) + PTR_DEMANGLE (fct); +#endif + + if (inbuf == NULL || *inbuf == NULL) + { + /* We just flush. */ + result = DL_CALL_FCT (fct, + (cd->__steps, cd->__data, NULL, NULL, NULL, + irreversible, + cd->__data[last_step].__outbuf == NULL ? 2 : 1, + 0)); + + /* If the flush was successful clear the rest of the state. */ + if (result == __GCONV_OK) + for (size_t cnt = 0; cnt <= last_step; ++cnt) + cd->__data[cnt].__invocation_counter = 0; + } + else + { + const unsigned char *last_start; + + assert (outbuf != NULL && *outbuf != NULL); + + do + { + last_start = *inbuf; + result = DL_CALL_FCT (fct, + (cd->__steps, cd->__data, inbuf, inbufend, + NULL, irreversible, 0, 0)); + } + while (result == __GCONV_EMPTY_INPUT && last_start != *inbuf + && *inbuf + cd->__steps->__min_needed_from <= inbufend); + } + + if (outbuf != NULL && *outbuf != NULL) + *outbuf = cd->__data[last_step].__outbuf; + + return result; +} diff --git a/REORG.TODO/iconv/gconv.h b/REORG.TODO/iconv/gconv.h new file mode 100644 index 0000000000..db678dba49 --- /dev/null +++ b/REORG.TODO/iconv/gconv.h @@ -0,0 +1,154 @@ +/* Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* This header provides no interface for a user to the internals of + the gconv implementation in the libc. Therefore there is no use + for these definitions beside for writing additional gconv modules. */ + +#ifndef _GCONV_H +#define _GCONV_H 1 + +#include <features.h> +#include <bits/types/__mbstate_t.h> +#include <bits/types/wint_t.h> + +#define __need_size_t +#define __need_wchar_t +#include <stddef.h> + +/* ISO 10646 value used to signal invalid value. */ +#define __UNKNOWN_10646_CHAR ((wchar_t) 0xfffd) + +/* Error codes for gconv functions. */ +enum +{ + __GCONV_OK = 0, + __GCONV_NOCONV, + __GCONV_NODB, + __GCONV_NOMEM, + + __GCONV_EMPTY_INPUT, + __GCONV_FULL_OUTPUT, + __GCONV_ILLEGAL_INPUT, + __GCONV_INCOMPLETE_INPUT, + + __GCONV_ILLEGAL_DESCRIPTOR, + __GCONV_INTERNAL_ERROR +}; + + +/* Flags the `__gconv_open' function can set. */ +enum +{ + __GCONV_IS_LAST = 0x0001, + __GCONV_IGNORE_ERRORS = 0x0002, + __GCONV_SWAP = 0x0004, + __GCONV_TRANSLIT = 0x0008 +}; + + +/* Forward declarations. */ +struct __gconv_step; +struct __gconv_step_data; +struct __gconv_loaded_object; + + +/* Type of a conversion function. */ +typedef int (*__gconv_fct) (struct __gconv_step *, struct __gconv_step_data *, + const unsigned char **, const unsigned char *, + unsigned char **, size_t *, int, int); + +/* Type of a specialized conversion function for a single byte to INTERNAL. */ +typedef wint_t (*__gconv_btowc_fct) (struct __gconv_step *, unsigned char); + +/* Constructor and destructor for local data for conversion step. */ +typedef int (*__gconv_init_fct) (struct __gconv_step *); +typedef void (*__gconv_end_fct) (struct __gconv_step *); + + +/* Description of a conversion step. */ +struct __gconv_step +{ + struct __gconv_loaded_object *__shlib_handle; + const char *__modname; + + int __counter; + + char *__from_name; + char *__to_name; + + __gconv_fct __fct; + __gconv_btowc_fct __btowc_fct; + __gconv_init_fct __init_fct; + __gconv_end_fct __end_fct; + + /* Information about the number of bytes needed or produced in this + step. This helps optimizing the buffer sizes. */ + int __min_needed_from; + int __max_needed_from; + int __min_needed_to; + int __max_needed_to; + + /* Flag whether this is a stateful encoding or not. */ + int __stateful; + + void *__data; /* Pointer to step-local data. */ +}; + +/* Additional data for steps in use of conversion descriptor. This is + allocated by the `init' function. */ +struct __gconv_step_data +{ + unsigned char *__outbuf; /* Output buffer for this step. */ + unsigned char *__outbufend; /* Address of first byte after the output + buffer. */ + + /* Is this the last module in the chain. */ + int __flags; + + /* Counter for number of invocations of the module function for this + descriptor. */ + int __invocation_counter; + + /* Flag whether this is an internal use of the module (in the mb*towc* + and wc*tomb* functions) or regular with iconv(3). */ + int __internal_use; + + __mbstate_t *__statep; + __mbstate_t __state; /* This element must not be used directly by + any module; always use STATEP! */ +}; + + +/* Combine conversion step description with data. */ +typedef struct __gconv_info +{ + size_t __nsteps; + struct __gconv_step *__steps; + __extension__ struct __gconv_step_data __data[0]; +} *__gconv_t; + +/* Transliteration using the locale's data. */ +extern int __gconv_transliterate (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char *inbufstart, + const unsigned char **inbufp, + const unsigned char *inbufend, + unsigned char **outbufstart, + size_t *irreversible); + +#endif /* gconv.h */ diff --git a/REORG.TODO/iconv/gconv_builtin.c b/REORG.TODO/iconv/gconv_builtin.c new file mode 100644 index 0000000000..111233dab5 --- /dev/null +++ b/REORG.TODO/iconv/gconv_builtin.c @@ -0,0 +1,87 @@ +/* Table for builtin transformation mapping. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <endian.h> +#include <limits.h> +#include <stdint.h> +#include <string.h> + +#include <gconv_int.h> + +#include <assert.h> + + +static const struct builtin_map +{ + const char *name; + __gconv_fct fct; + __gconv_btowc_fct btowc_fct; + + int8_t min_needed_from; + int8_t max_needed_from; + int8_t min_needed_to; + int8_t max_needed_to; + +} map[] = +{ +#define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \ + MinF, MaxF, MinT, MaxT) \ + { \ + .name = Name, \ + .fct = Fct, \ + .btowc_fct = BtowcFct, \ + \ + .min_needed_from = MinF, \ + .max_needed_from = MaxF, \ + .min_needed_to = MinT, \ + .max_needed_to = MaxT \ + }, +#define BUILTIN_ALIAS(From, To) + +#include <gconv_builtin.h> +}; + + +void +internal_function +__gconv_get_builtin_trans (const char *name, struct __gconv_step *step) +{ + size_t cnt; + + for (cnt = 0; cnt < sizeof (map) / sizeof (map[0]); ++cnt) + if (strcmp (name, map[cnt].name) == 0) + break; + + assert (cnt < sizeof (map) / sizeof (map[0])); + + step->__fct = map[cnt].fct; + step->__btowc_fct = map[cnt].btowc_fct; + step->__init_fct = NULL; + step->__end_fct = NULL; + step->__shlib_handle = NULL; + step->__modname = NULL; + + step->__min_needed_from = map[cnt].min_needed_from; + step->__max_needed_from = map[cnt].max_needed_from; + step->__min_needed_to = map[cnt].min_needed_to; + step->__max_needed_to = map[cnt].max_needed_to; + + /* None of the builtin converters handles stateful encoding. */ + step->__stateful = 0; +} diff --git a/REORG.TODO/iconv/gconv_builtin.h b/REORG.TODO/iconv/gconv_builtin.h new file mode 100644 index 0000000000..93e2e4d865 --- /dev/null +++ b/REORG.TODO/iconv/gconv_builtin.h @@ -0,0 +1,123 @@ +/* Builtin transformations. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* All encoding named must be in upper case. There must be no extra + spaces. */ + +BUILTIN_ALIAS ("UCS4//", "ISO-10646/UCS4/") +BUILTIN_ALIAS ("UCS-4//", "ISO-10646/UCS4/") +BUILTIN_ALIAS ("UCS-4BE//", "ISO-10646/UCS4/") +BUILTIN_ALIAS ("CSUCS4//", "ISO-10646/UCS4/") +BUILTIN_ALIAS ("ISO-10646//", "ISO-10646/UCS4/") +BUILTIN_ALIAS ("10646-1:1993//", "ISO-10646/UCS4/") +BUILTIN_ALIAS ("10646-1:1993/UCS4/", "ISO-10646/UCS4/") +BUILTIN_ALIAS ("OSF00010104//", "ISO-10646/UCS4/") /* level 1 */ +BUILTIN_ALIAS ("OSF00010105//", "ISO-10646/UCS4/") /* level 2 */ +BUILTIN_ALIAS ("OSF00010106//", "ISO-10646/UCS4/") /* level 3 */ + +BUILTIN_TRANSFORMATION ("INTERNAL", "ISO-10646/UCS4/", 1, "=INTERNAL->ucs4", + __gconv_transform_internal_ucs4, NULL, 4, 4, 4, 4) +BUILTIN_TRANSFORMATION ("ISO-10646/UCS4/", "INTERNAL", 1, "=ucs4->INTERNAL", + __gconv_transform_ucs4_internal, NULL, 4, 4, 4, 4) + +BUILTIN_TRANSFORMATION ("INTERNAL", "UCS-4LE//", 1, "=INTERNAL->ucs4le", + __gconv_transform_internal_ucs4le, NULL, 4, 4, 4, 4) +BUILTIN_TRANSFORMATION ("UCS-4LE//", "INTERNAL", 1, "=ucs4le->INTERNAL", + __gconv_transform_ucs4le_internal, NULL, 4, 4, 4, 4) + +BUILTIN_ALIAS ("WCHAR_T//", "INTERNAL") + +BUILTIN_ALIAS ("UTF8//", "ISO-10646/UTF8/") +BUILTIN_ALIAS ("UTF-8//", "ISO-10646/UTF8/") +BUILTIN_ALIAS ("ISO-IR-193//", "ISO-10646/UTF8/") +BUILTIN_ALIAS ("OSF05010001//", "ISO-10646/UTF8/") +BUILTIN_ALIAS ("ISO-10646/UTF-8/", "ISO-10646/UTF8/") + +BUILTIN_TRANSFORMATION ("INTERNAL", "ISO-10646/UTF8/", 1, "=INTERNAL->utf8", + __gconv_transform_internal_utf8, NULL, 4, 4, 1, 6) + +BUILTIN_TRANSFORMATION ("ISO-10646/UTF8/", "INTERNAL", 1, "=utf8->INTERNAL", + __gconv_transform_utf8_internal, __gconv_btwoc_ascii, + 1, 6, 4, 4) + +BUILTIN_ALIAS ("UCS2//", "ISO-10646/UCS2/") +BUILTIN_ALIAS ("UCS-2//", "ISO-10646/UCS2/") +BUILTIN_ALIAS ("OSF00010100//", "ISO-10646/UCS2/") /* level 1 */ +BUILTIN_ALIAS ("OSF00010101//", "ISO-10646/UCS2/") /* level 2 */ +BUILTIN_ALIAS ("OSF00010102//", "ISO-10646/UCS2/") /* level 3 */ + +BUILTIN_TRANSFORMATION ("ISO-10646/UCS2/", "INTERNAL", 1, "=ucs2->INTERNAL", + __gconv_transform_ucs2_internal, NULL, 2, 2, 4, 4) + +BUILTIN_TRANSFORMATION ("INTERNAL", "ISO-10646/UCS2/", 1, "=INTERNAL->ucs2", + __gconv_transform_internal_ucs2, NULL, 4, 4, 2, 2) + + +BUILTIN_ALIAS ("ANSI_X3.4//", "ANSI_X3.4-1968//") +BUILTIN_ALIAS ("ISO-IR-6//", "ANSI_X3.4-1968//") +BUILTIN_ALIAS ("ANSI_X3.4-1986//", "ANSI_X3.4-1968//") +BUILTIN_ALIAS ("ISO_646.IRV:1991//", "ANSI_X3.4-1968//") +BUILTIN_ALIAS ("ASCII//", "ANSI_X3.4-1968//") +BUILTIN_ALIAS ("ISO646-US//", "ANSI_X3.4-1968//") +BUILTIN_ALIAS ("US-ASCII//", "ANSI_X3.4-1968//") +BUILTIN_ALIAS ("US//", "ANSI_X3.4-1968//") +BUILTIN_ALIAS ("IBM367//", "ANSI_X3.4-1968//") +BUILTIN_ALIAS ("CP367//", "ANSI_X3.4-1968//") +BUILTIN_ALIAS ("CSASCII//", "ANSI_X3.4-1968//") +BUILTIN_ALIAS ("OSF00010020//", "ANSI_X3.4-1968//") + +BUILTIN_TRANSFORMATION ("ANSI_X3.4-1968//", "INTERNAL", 1, "=ascii->INTERNAL", + __gconv_transform_ascii_internal, __gconv_btwoc_ascii, + 1, 1, 4, 4) + +BUILTIN_TRANSFORMATION ("INTERNAL", "ANSI_X3.4-1968//", 1, "=INTERNAL->ascii", + __gconv_transform_internal_ascii, NULL, 4, 4, 1, 1) + + +#if BYTE_ORDER == BIG_ENDIAN +BUILTIN_ALIAS ("UNICODEBIG//", "ISO-10646/UCS2/") +BUILTIN_ALIAS ("UCS-2BE//", "ISO-10646/UCS2/") + +BUILTIN_ALIAS ("UCS-2LE//", "UNICODELITTLE//") + +BUILTIN_TRANSFORMATION ("UNICODELITTLE//", "INTERNAL", 1, + "=ucs2reverse->INTERNAL", + __gconv_transform_ucs2reverse_internal, NULL, + 2, 2, 4, 4) + +BUILTIN_TRANSFORMATION ("INTERNAL", "UNICODELITTLE//", 1, + "=INTERNAL->ucs2reverse", + __gconv_transform_internal_ucs2reverse, NULL, + 4, 4, 2, 2) +#else +BUILTIN_ALIAS ("UNICODELITTLE//", "ISO-10646/UCS2/") +BUILTIN_ALIAS ("UCS-2LE//", "ISO-10646/UCS2/") + +BUILTIN_ALIAS ("UCS-2BE//", "UNICODEBIG//") + +BUILTIN_TRANSFORMATION ("UNICODEBIG//", "INTERNAL", 1, + "=ucs2reverse->INTERNAL", + __gconv_transform_ucs2reverse_internal, NULL, + 2, 2, 4, 4) + +BUILTIN_TRANSFORMATION ("INTERNAL", "UNICODEBIG//", 1, + "=INTERNAL->ucs2reverse", + __gconv_transform_internal_ucs2reverse, NULL, + 4, 4, 2, 2) +#endif diff --git a/REORG.TODO/iconv/gconv_cache.c b/REORG.TODO/iconv/gconv_cache.c new file mode 100644 index 0000000000..0cadea3638 --- /dev/null +++ b/REORG.TODO/iconv/gconv_cache.c @@ -0,0 +1,472 @@ +/* Cache handling for iconv modules. + Copyright (C) 2001-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 2001. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <dlfcn.h> +#include <errno.h> +#include <fcntl.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/mman.h> +#include <sys/stat.h> + +#include <gconv_int.h> +#include <iconvconfig.h> +#include <not-cancel.h> + +#include "../intl/hash-string.h" + +static void *gconv_cache; +static size_t cache_size; +static int cache_malloced; + + +void * +__gconv_get_cache (void) +{ + return gconv_cache; +} + + +int +internal_function +__gconv_load_cache (void) +{ + int fd; + struct stat64 st; + struct gconvcache_header *header; + + /* We cannot use the cache if the GCONV_PATH environment variable is + set. */ + __gconv_path_envvar = getenv ("GCONV_PATH"); + if (__gconv_path_envvar != NULL) + return -1; + + /* See whether the cache file exists. */ + fd = open_not_cancel (GCONV_MODULES_CACHE, O_RDONLY, 0); + if (__builtin_expect (fd, 0) == -1) + /* Not available. */ + return -1; + + /* Get information about the file. */ + if (__builtin_expect (__fxstat64 (_STAT_VER, fd, &st), 0) < 0 + /* We do not have to start looking at the file if it cannot contain + at least the cache header. */ + || (size_t) st.st_size < sizeof (struct gconvcache_header)) + { + close_and_exit: + close_not_cancel_no_status (fd); + return -1; + } + + /* Make the file content available. */ + cache_size = st.st_size; +#ifdef _POSIX_MAPPED_FILES + gconv_cache = __mmap (NULL, cache_size, PROT_READ, MAP_SHARED, fd, 0); + if (__glibc_unlikely (gconv_cache == MAP_FAILED)) +#endif + { + size_t already_read; + + gconv_cache = malloc (cache_size); + if (gconv_cache == NULL) + goto close_and_exit; + + already_read = 0; + do + { + ssize_t n = __read (fd, (char *) gconv_cache + already_read, + cache_size - already_read); + if (__builtin_expect (n, 0) == -1) + { + free (gconv_cache); + gconv_cache = NULL; + goto close_and_exit; + } + + already_read += n; + } + while (already_read < cache_size); + + cache_malloced = 1; + } + + /* We don't need the file descriptor anymore. */ + close_not_cancel_no_status (fd); + + /* Check the consistency. */ + header = (struct gconvcache_header *) gconv_cache; + if (__builtin_expect (header->magic, GCONVCACHE_MAGIC) != GCONVCACHE_MAGIC + || __builtin_expect (header->string_offset >= cache_size, 0) + || __builtin_expect (header->hash_offset >= cache_size, 0) + || __builtin_expect (header->hash_size == 0, 0) + || __builtin_expect ((header->hash_offset + + header->hash_size * sizeof (struct hash_entry)) + > cache_size, 0) + || __builtin_expect (header->module_offset >= cache_size, 0) + || __builtin_expect (header->otherconv_offset > cache_size, 0)) + { + if (cache_malloced) + { + free (gconv_cache); + cache_malloced = 0; + } +#ifdef _POSIX_MAPPED_FILES + else + __munmap (gconv_cache, cache_size); +#endif + gconv_cache = NULL; + + return -1; + } + + /* That worked. */ + return 0; +} + + +static int +internal_function +find_module_idx (const char *str, size_t *idxp) +{ + unsigned int idx; + unsigned int hval; + unsigned int hval2; + const struct gconvcache_header *header; + const char *strtab; + const struct hash_entry *hashtab; + unsigned int limit; + + header = (const struct gconvcache_header *) gconv_cache; + strtab = (char *) gconv_cache + header->string_offset; + hashtab = (struct hash_entry *) ((char *) gconv_cache + + header->hash_offset); + + hval = __hash_string (str); + idx = hval % header->hash_size; + hval2 = 1 + hval % (header->hash_size - 2); + + limit = cache_size - header->string_offset; + while (hashtab[idx].string_offset != 0) + if (hashtab[idx].string_offset < limit + && strcmp (str, strtab + hashtab[idx].string_offset) == 0) + { + *idxp = hashtab[idx].module_idx; + return 0; + } + else + if ((idx += hval2) >= header->hash_size) + idx -= header->hash_size; + + /* Nothing found. */ + return -1; +} + + +#ifndef STATIC_GCONV +static int +internal_function +find_module (const char *directory, const char *filename, + struct __gconv_step *result) +{ + size_t dirlen = strlen (directory); + size_t fnamelen = strlen (filename) + 1; + char fullname[dirlen + fnamelen]; + int status = __GCONV_NOCONV; + + memcpy (__mempcpy (fullname, directory, dirlen), filename, fnamelen); + + result->__shlib_handle = __gconv_find_shlib (fullname); + if (result->__shlib_handle != NULL) + { + status = __GCONV_OK; + + result->__modname = NULL; + result->__fct = result->__shlib_handle->fct; + result->__init_fct = result->__shlib_handle->init_fct; + result->__end_fct = result->__shlib_handle->end_fct; + + /* These settings can be overridden by the init function. */ + result->__btowc_fct = NULL; + result->__data = NULL; + + /* Call the init function. */ + if (result->__init_fct != NULL) + { + __gconv_init_fct init_fct = result->__init_fct; +#ifdef PTR_DEMANGLE + PTR_DEMANGLE (init_fct); +#endif + status = DL_CALL_FCT (init_fct, (result)); + +#ifdef PTR_MANGLE + if (result->__btowc_fct != NULL) + PTR_MANGLE (result->__btowc_fct); +#endif + } + } + + return status; +} +#endif + + +int +internal_function +__gconv_compare_alias_cache (const char *name1, const char *name2, int *result) +{ + size_t name1_idx; + size_t name2_idx; + + if (gconv_cache == NULL) + return -1; + + if (find_module_idx (name1, &name1_idx) != 0 + || find_module_idx (name2, &name2_idx) != 0) + *result = strcmp (name1, name2); + else + *result = (int) (name1_idx - name2_idx); + + return 0; +} + + +int +internal_function +__gconv_lookup_cache (const char *toset, const char *fromset, + struct __gconv_step **handle, size_t *nsteps, int flags) +{ + const struct gconvcache_header *header; + const char *strtab; + size_t fromidx; + size_t toidx; + const struct module_entry *modtab; + const struct module_entry *from_module; + const struct module_entry *to_module; + struct __gconv_step *result; + + if (gconv_cache == NULL) + /* We have no cache available. */ + return __GCONV_NODB; + + header = (const struct gconvcache_header *) gconv_cache; + strtab = (char *) gconv_cache + header->string_offset; + modtab = (const struct module_entry *) ((char *) gconv_cache + + header->module_offset); + + if (find_module_idx (fromset, &fromidx) != 0 + || (header->module_offset + (fromidx + 1) * sizeof (struct module_entry) + > cache_size)) + return __GCONV_NOCONV; + from_module = &modtab[fromidx]; + + if (find_module_idx (toset, &toidx) != 0 + || (header->module_offset + (toidx + 1) * sizeof (struct module_entry) + > cache_size)) + return __GCONV_NOCONV; + to_module = &modtab[toidx]; + + /* Avoid copy-only transformations if the user requests. */ + if (__builtin_expect (flags & GCONV_AVOID_NOCONV, 0) && fromidx == toidx) + return __GCONV_NULCONV; + + /* If there are special conversions available examine them first. */ + if (fromidx != 0 && toidx != 0 + && __builtin_expect (from_module->extra_offset, 0) != 0) + { + /* Search through the list to see whether there is a module + matching the destination character set. */ + const struct extra_entry *extra; + + /* Note the -1. This is due to the offset added in iconvconfig. + See there for more explanations. */ + extra = (const struct extra_entry *) ((char *) gconv_cache + + header->otherconv_offset + + from_module->extra_offset - 1); + while (extra->module_cnt != 0 + && extra->module[extra->module_cnt - 1].outname_offset != toidx) + extra = (const struct extra_entry *) ((char *) extra + + sizeof (struct extra_entry) + + (extra->module_cnt + * sizeof (struct extra_entry_module))); + + if (extra->module_cnt != 0) + { + /* Use the extra module. First determine how many steps. */ + char *fromname; + int idx; + + *nsteps = extra->module_cnt; + *handle = result = + (struct __gconv_step *) malloc (extra->module_cnt + * sizeof (struct __gconv_step)); + if (result == NULL) + return __GCONV_NOMEM; + + fromname = (char *) strtab + from_module->canonname_offset; + idx = 0; + do + { + result[idx].__from_name = fromname; + fromname = result[idx].__to_name = + (char *) strtab + modtab[extra->module[idx].outname_offset].canonname_offset; + + result[idx].__counter = 1; + result[idx].__data = NULL; + +#ifndef STATIC_GCONV + if (strtab[extra->module[idx].dir_offset] != '\0') + { + /* Load the module, return handle for it. */ + int res; + + res = find_module (strtab + extra->module[idx].dir_offset, + strtab + extra->module[idx].name_offset, + &result[idx]); + if (__builtin_expect (res, __GCONV_OK) != __GCONV_OK) + { + /* Something went wrong. */ + free (result); + goto try_internal; + } + } + else +#endif + /* It's a builtin transformation. */ + __gconv_get_builtin_trans (strtab + + extra->module[idx].name_offset, + &result[idx]); + + } + while (++idx < extra->module_cnt); + + return __GCONV_OK; + } + } + + try_internal: + /* See whether we can convert via the INTERNAL charset. */ + if ((fromidx != 0 && __builtin_expect (from_module->fromname_offset, 1) == 0) + || (toidx != 0 && __builtin_expect (to_module->toname_offset, 1) == 0) + || (fromidx == 0 && toidx == 0)) + /* Not possible. Nothing we can do. */ + return __GCONV_NOCONV; + + /* We will use up to two modules. Always allocate room for two. */ + result = (struct __gconv_step *) malloc (2 * sizeof (struct __gconv_step)); + if (result == NULL) + return __GCONV_NOMEM; + + *handle = result; + *nsteps = 0; + + /* Generate data structure for conversion to INTERNAL. */ + if (fromidx != 0) + { + result[0].__from_name = (char *) strtab + from_module->canonname_offset; + result[0].__to_name = (char *) "INTERNAL"; + + result[0].__counter = 1; + result[0].__data = NULL; + +#ifndef STATIC_GCONV + if (strtab[from_module->todir_offset] != '\0') + { + /* Load the module, return handle for it. */ + int res = find_module (strtab + from_module->todir_offset, + strtab + from_module->toname_offset, + &result[0]); + if (__builtin_expect (res, __GCONV_OK) != __GCONV_OK) + { + /* Something went wrong. */ + free (result); + return res; + } + } + else +#endif + /* It's a builtin transformation. */ + __gconv_get_builtin_trans (strtab + from_module->toname_offset, + &result[0]); + + ++*nsteps; + } + + /* Generate data structure for conversion from INTERNAL. */ + if (toidx != 0) + { + int idx = *nsteps; + + result[idx].__from_name = (char *) "INTERNAL"; + result[idx].__to_name = (char *) strtab + to_module->canonname_offset; + + result[idx].__counter = 1; + result[idx].__data = NULL; + +#ifndef STATIC_GCONV + if (strtab[to_module->fromdir_offset] != '\0') + { + /* Load the module, return handle for it. */ + int res = find_module (strtab + to_module->fromdir_offset, + strtab + to_module->fromname_offset, + &result[idx]); + if (__builtin_expect (res, __GCONV_OK) != __GCONV_OK) + { + /* Something went wrong. */ + if (idx != 0) + __gconv_release_step (&result[0]); + free (result); + return res; + } + } + else +#endif + /* It's a builtin transformation. */ + __gconv_get_builtin_trans (strtab + to_module->fromname_offset, + &result[idx]); + + ++*nsteps; + } + + return __GCONV_OK; +} + + +/* Free memory allocated for the transformation record. */ +void +internal_function +__gconv_release_cache (struct __gconv_step *steps, size_t nsteps) +{ + if (gconv_cache != NULL) + /* The only thing we have to deallocate is the record with the + steps. */ + free (steps); +} + + +/* Free all resources if necessary. */ +libc_freeres_fn (free_mem) +{ + if (cache_malloced) + free (gconv_cache); +#ifdef _POSIX_MAPPED_FILES + else if (gconv_cache != NULL) + __munmap (gconv_cache, cache_size); +#endif +} diff --git a/REORG.TODO/iconv/gconv_charset.h b/REORG.TODO/iconv/gconv_charset.h new file mode 100644 index 0000000000..18d8bd6ae7 --- /dev/null +++ b/REORG.TODO/iconv/gconv_charset.h @@ -0,0 +1,57 @@ +/* Charset name normalization. + Copyright (C) 2001-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 2001. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <ctype.h> +#include <locale.h> + + +static void +strip (char *wp, const char *s) +{ + int slash_count = 0; + + while (*s != '\0') + { + if (__isalnum_l (*s, _nl_C_locobj_ptr) + || *s == '_' || *s == '-' || *s == '.' || *s == ',' || *s == ':') + *wp++ = __toupper_l (*s, _nl_C_locobj_ptr); + else if (*s == '/') + { + if (++slash_count == 3) + break; + *wp++ = '/'; + } + ++s; + } + + while (slash_count++ < 2) + *wp++ = '/'; + + *wp = '\0'; +} + + +static inline char * __attribute__ ((unused, always_inline)) +upstr (char *dst, const char *str) +{ + char *cp = dst; + while ((*cp++ = __toupper_l (*str++, _nl_C_locobj_ptr)) != '\0') + /* nothing */; + return dst; +} diff --git a/REORG.TODO/iconv/gconv_close.c b/REORG.TODO/iconv/gconv_close.c new file mode 100644 index 0000000000..4853dd8779 --- /dev/null +++ b/REORG.TODO/iconv/gconv_close.c @@ -0,0 +1,50 @@ +/* Release any resource associated with given conversion descriptor. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <stdlib.h> + +#include <gconv_int.h> + + +int +internal_function +__gconv_close (__gconv_t cd) +{ + struct __gconv_step *srunp; + struct __gconv_step_data *drunp; + size_t nsteps; + + /* Free all resources by calling destructor functions and release + the implementations. */ + srunp = cd->__steps; + nsteps = cd->__nsteps; + drunp = cd->__data; + do + { + if (!(drunp->__flags & __GCONV_IS_LAST) && drunp->__outbuf != NULL) + free (drunp->__outbuf); + } + while (!((drunp++)->__flags & __GCONV_IS_LAST)); + + /* Free the data allocated for the descriptor. */ + free (cd); + + /* Close the participating modules. */ + return __gconv_close_transform (srunp, nsteps); +} diff --git a/REORG.TODO/iconv/gconv_conf.c b/REORG.TODO/iconv/gconv_conf.c new file mode 100644 index 0000000000..5aa055de6e --- /dev/null +++ b/REORG.TODO/iconv/gconv_conf.c @@ -0,0 +1,616 @@ +/* Handle configuration data. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <assert.h> +#include <ctype.h> +#include <errno.h> +#include <limits.h> +#include <locale.h> +#include <search.h> +#include <stddef.h> +#include <stdio.h> +#include <stdio_ext.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/param.h> + +#include <libc-lock.h> +#include <gconv_int.h> + + +/* This is the default path where we look for module lists. */ +static const char default_gconv_path[] = GCONV_PATH; + +/* The path elements, as determined by the __gconv_get_path function. + All path elements end in a slash. */ +struct path_elem *__gconv_path_elem; +/* Maximum length of a single path element in __gconv_path_elem. */ +size_t __gconv_max_path_elem_len; + +/* We use the following struct if we couldn't allocate memory. */ +static const struct path_elem empty_path_elem = { NULL, 0 }; + +/* Name of the file containing the module information in the directories + along the path. */ +static const char gconv_conf_filename[] = "gconv-modules"; + +/* Filename extension for the modules. */ +#ifndef MODULE_EXT +# define MODULE_EXT ".so" +#endif +static const char gconv_module_ext[] = MODULE_EXT; + +/* We have a few builtin transformations. */ +static struct gconv_module builtin_modules[] = +{ +#define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \ + MinF, MaxF, MinT, MaxT) \ + { \ + .from_string = From, \ + .to_string = To, \ + .cost_hi = Cost, \ + .cost_lo = INT_MAX, \ + .module_name = Name \ + }, +#define BUILTIN_ALIAS(From, To) + +#include "gconv_builtin.h" + +#undef BUILTIN_TRANSFORMATION +#undef BUILTIN_ALIAS +}; + +static const char builtin_aliases[] = +{ +#define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \ + MinF, MaxF, MinT, MaxT) +#define BUILTIN_ALIAS(From, To) From "\0" To "\0" + +#include "gconv_builtin.h" + +#undef BUILTIN_TRANSFORMATION +#undef BUILTIN_ALIAS +}; + +#include <libio/libioP.h> +#define __getdelim(line, len, c, fp) _IO_getdelim (line, len, c, fp) + + +/* Value of the GCONV_PATH environment variable. */ +const char *__gconv_path_envvar; + + +/* Test whether there is already a matching module known. */ +static int +internal_function +detect_conflict (const char *alias) +{ + struct gconv_module *node = __gconv_modules_db; + + while (node != NULL) + { + int cmpres = strcmp (alias, node->from_string); + + if (cmpres == 0) + /* We have a conflict. */ + return 1; + else if (cmpres < 0) + node = node->left; + else + node = node->right; + } + + return node != NULL; +} + + +/* The actual code to add aliases. */ +static void +add_alias2 (const char *from, const char *to, const char *wp, void *modules) +{ + /* Test whether this alias conflicts with any available module. */ + if (detect_conflict (from)) + /* It does conflict, don't add the alias. */ + return; + + struct gconv_alias *new_alias = (struct gconv_alias *) + malloc (sizeof (struct gconv_alias) + (wp - from)); + if (new_alias != NULL) + { + void **inserted; + + new_alias->fromname = memcpy ((char *) new_alias + + sizeof (struct gconv_alias), + from, wp - from); + new_alias->toname = new_alias->fromname + (to - from); + + inserted = (void **) __tsearch (new_alias, &__gconv_alias_db, + __gconv_alias_compare); + if (inserted == NULL || *inserted != new_alias) + /* Something went wrong, free this entry. */ + free (new_alias); + } +} + + +/* Add new alias. */ +static void +add_alias (char *rp, void *modules) +{ + /* We now expect two more string. The strings are normalized + (converted to UPPER case) and strored in the alias database. */ + char *from, *to, *wp; + + while (__isspace_l (*rp, _nl_C_locobj_ptr)) + ++rp; + from = wp = rp; + while (*rp != '\0' && !__isspace_l (*rp, _nl_C_locobj_ptr)) + *wp++ = __toupper_l (*rp++, _nl_C_locobj_ptr); + if (*rp == '\0') + /* There is no `to' string on the line. Ignore it. */ + return; + *wp++ = '\0'; + to = ++rp; + while (__isspace_l (*rp, _nl_C_locobj_ptr)) + ++rp; + while (*rp != '\0' && !__isspace_l (*rp, _nl_C_locobj_ptr)) + *wp++ = __toupper_l (*rp++, _nl_C_locobj_ptr); + if (to == wp) + /* No `to' string, ignore the line. */ + return; + *wp++ = '\0'; + + add_alias2 (from, to, wp, modules); +} + + +/* Insert a data structure for a new module in the search tree. */ +static void +internal_function +insert_module (struct gconv_module *newp, int tobefreed) +{ + struct gconv_module **rootp = &__gconv_modules_db; + + while (*rootp != NULL) + { + struct gconv_module *root = *rootp; + int cmpres; + + cmpres = strcmp (newp->from_string, root->from_string); + if (cmpres == 0) + { + /* Both strings are identical. Insert the string at the + end of the `same' list if it is not already there. */ + while (strcmp (newp->from_string, root->from_string) != 0 + || strcmp (newp->to_string, root->to_string) != 0) + { + rootp = &root->same; + root = *rootp; + if (root == NULL) + break; + } + + if (root != NULL) + { + /* This is a no new conversion. But maybe the cost is + better. */ + if (newp->cost_hi < root->cost_hi + || (newp->cost_hi == root->cost_hi + && newp->cost_lo < root->cost_lo)) + { + newp->left = root->left; + newp->right = root->right; + newp->same = root->same; + *rootp = newp; + + free (root); + } + else if (tobefreed) + free (newp); + return; + } + + break; + } + else if (cmpres < 0) + rootp = &root->left; + else + rootp = &root->right; + } + + /* Plug in the new node here. */ + *rootp = newp; +} + + +/* Add new module. */ +static void +internal_function +add_module (char *rp, const char *directory, size_t dir_len, void **modules, + size_t *nmodules, int modcounter) +{ + /* We expect now + 1. `from' name + 2. `to' name + 3. filename of the module + 4. an optional cost value + */ + struct gconv_alias fake_alias; + struct gconv_module *new_module; + char *from, *to, *module, *wp; + int need_ext; + int cost_hi; + + while (__isspace_l (*rp, _nl_C_locobj_ptr)) + ++rp; + from = rp; + while (*rp != '\0' && !__isspace_l (*rp, _nl_C_locobj_ptr)) + { + *rp = __toupper_l (*rp, _nl_C_locobj_ptr); + ++rp; + } + if (*rp == '\0') + return; + *rp++ = '\0'; + to = wp = rp; + while (__isspace_l (*rp, _nl_C_locobj_ptr)) + ++rp; + while (*rp != '\0' && !__isspace_l (*rp, _nl_C_locobj_ptr)) + *wp++ = __toupper_l (*rp++, _nl_C_locobj_ptr); + if (*rp == '\0') + return; + *wp++ = '\0'; + do + ++rp; + while (__isspace_l (*rp, _nl_C_locobj_ptr)); + module = wp; + while (*rp != '\0' && !__isspace_l (*rp, _nl_C_locobj_ptr)) + *wp++ = *rp++; + if (*rp == '\0') + { + /* There is no cost, use one by default. */ + *wp++ = '\0'; + cost_hi = 1; + } + else + { + /* There might be a cost value. */ + char *endp; + + *wp++ = '\0'; + cost_hi = strtol (rp, &endp, 10); + if (rp == endp || cost_hi < 1) + /* No useful information. */ + cost_hi = 1; + } + + if (module[0] == '\0') + /* No module name given. */ + return; + if (module[0] == '/') + dir_len = 0; + + /* See whether we must add the ending. */ + need_ext = 0; + if (wp - module < (ptrdiff_t) sizeof (gconv_module_ext) + || memcmp (wp - sizeof (gconv_module_ext), gconv_module_ext, + sizeof (gconv_module_ext)) != 0) + /* We must add the module extension. */ + need_ext = sizeof (gconv_module_ext) - 1; + + /* See whether we have already an alias with this name defined. */ + fake_alias.fromname = strndupa (from, to - from); + + if (__tfind (&fake_alias, &__gconv_alias_db, __gconv_alias_compare) != NULL) + /* This module duplicates an alias. */ + return; + + new_module = (struct gconv_module *) calloc (1, + sizeof (struct gconv_module) + + (wp - from) + + dir_len + need_ext); + if (new_module != NULL) + { + char *tmp; + + new_module->from_string = tmp = (char *) (new_module + 1); + tmp = __mempcpy (tmp, from, to - from); + + new_module->to_string = tmp; + tmp = __mempcpy (tmp, to, module - to); + + new_module->cost_hi = cost_hi; + new_module->cost_lo = modcounter; + + new_module->module_name = tmp; + + if (dir_len != 0) + tmp = __mempcpy (tmp, directory, dir_len); + + tmp = __mempcpy (tmp, module, wp - module); + + if (need_ext) + memcpy (tmp - 1, gconv_module_ext, sizeof (gconv_module_ext)); + + /* Now insert the new module data structure in our search tree. */ + insert_module (new_module, 1); + } +} + + +/* Read the next configuration file. */ +static void +internal_function +read_conf_file (const char *filename, const char *directory, size_t dir_len, + void **modules, size_t *nmodules) +{ + /* Note the file is opened with cancellation in the I/O functions + disabled. */ + FILE *fp = fopen (filename, "rce"); + char *line = NULL; + size_t line_len = 0; + static int modcounter; + + /* Don't complain if a file is not present or readable, simply silently + ignore it. */ + if (fp == NULL) + return; + + /* No threads reading from this stream. */ + __fsetlocking (fp, FSETLOCKING_BYCALLER); + + /* Process the known entries of the file. Comments start with `#' and + end with the end of the line. Empty lines are ignored. */ + while (!feof_unlocked (fp)) + { + char *rp, *endp, *word; + ssize_t n = __getdelim (&line, &line_len, '\n', fp); + if (n < 0) + /* An error occurred. */ + break; + + rp = line; + /* Terminate the line (excluding comments or newline) by an NUL byte + to simplify the following code. */ + endp = strchr (rp, '#'); + if (endp != NULL) + *endp = '\0'; + else + if (rp[n - 1] == '\n') + rp[n - 1] = '\0'; + + while (__isspace_l (*rp, _nl_C_locobj_ptr)) + ++rp; + + /* If this is an empty line go on with the next one. */ + if (rp == endp) + continue; + + word = rp; + while (*rp != '\0' && !__isspace_l (*rp, _nl_C_locobj_ptr)) + ++rp; + + if (rp - word == sizeof ("alias") - 1 + && memcmp (word, "alias", sizeof ("alias") - 1) == 0) + add_alias (rp, *modules); + else if (rp - word == sizeof ("module") - 1 + && memcmp (word, "module", sizeof ("module") - 1) == 0) + add_module (rp, directory, dir_len, modules, nmodules, modcounter++); + /* else */ + /* Otherwise ignore the line. */ + } + + free (line); + + fclose (fp); +} + + +/* Determine the directories we are looking for data in. */ +void +internal_function +__gconv_get_path (void) +{ + struct path_elem *result; + __libc_lock_define_initialized (static, lock); + + __libc_lock_lock (lock); + + /* Make sure there wasn't a second thread doing it already. */ + result = (struct path_elem *) __gconv_path_elem; + if (result == NULL) + { + /* Determine the complete path first. */ + char *gconv_path; + size_t gconv_path_len; + char *elem; + char *oldp; + char *cp; + int nelems; + char *cwd; + size_t cwdlen; + + if (__gconv_path_envvar == NULL) + { + /* No user-defined path. Make a modifiable copy of the + default path. */ + gconv_path = strdupa (default_gconv_path); + gconv_path_len = sizeof (default_gconv_path); + cwd = NULL; + cwdlen = 0; + } + else + { + /* Append the default path to the user-defined path. */ + size_t user_len = strlen (__gconv_path_envvar); + + gconv_path_len = user_len + 1 + sizeof (default_gconv_path); + gconv_path = alloca (gconv_path_len); + __mempcpy (__mempcpy (__mempcpy (gconv_path, __gconv_path_envvar, + user_len), + ":", 1), + default_gconv_path, sizeof (default_gconv_path)); + cwd = __getcwd (NULL, 0); + cwdlen = __glibc_unlikely (cwd == NULL) ? 0 : strlen (cwd); + } + assert (default_gconv_path[0] == '/'); + + /* In a first pass we calculate the number of elements. */ + oldp = NULL; + cp = strchr (gconv_path, ':'); + nelems = 1; + while (cp != NULL) + { + if (cp != oldp + 1) + ++nelems; + oldp = cp; + cp = strchr (cp + 1, ':'); + } + + /* Allocate the memory for the result. */ + result = (struct path_elem *) malloc ((nelems + 1) + * sizeof (struct path_elem) + + gconv_path_len + nelems + + (nelems - 1) * (cwdlen + 1)); + if (result != NULL) + { + char *strspace = (char *) &result[nelems + 1]; + int n = 0; + + /* Separate the individual parts. */ + __gconv_max_path_elem_len = 0; + elem = __strtok_r (gconv_path, ":", &gconv_path); + assert (elem != NULL); + do + { + result[n].name = strspace; + if (elem[0] != '/') + { + assert (cwd != NULL); + strspace = __mempcpy (strspace, cwd, cwdlen); + *strspace++ = '/'; + } + strspace = __stpcpy (strspace, elem); + if (strspace[-1] != '/') + *strspace++ = '/'; + + result[n].len = strspace - result[n].name; + if (result[n].len > __gconv_max_path_elem_len) + __gconv_max_path_elem_len = result[n].len; + + *strspace++ = '\0'; + ++n; + } + while ((elem = __strtok_r (NULL, ":", &gconv_path)) != NULL); + + result[n].name = NULL; + result[n].len = 0; + } + + __gconv_path_elem = result ?: (struct path_elem *) &empty_path_elem; + + free (cwd); + } + + __libc_lock_unlock (lock); +} + + +/* Read all configuration files found in the user-specified and the default + path. */ +void +attribute_hidden +__gconv_read_conf (void) +{ + void *modules = NULL; + size_t nmodules = 0; + int save_errno = errno; + size_t cnt; + + /* First see whether we should use the cache. */ + if (__gconv_load_cache () == 0) + { + /* Yes, we are done. */ + __set_errno (save_errno); + return; + } + +#ifndef STATIC_GCONV + /* Find out where we have to look. */ + if (__gconv_path_elem == NULL) + __gconv_get_path (); + + for (cnt = 0; __gconv_path_elem[cnt].name != NULL; ++cnt) + { + const char *elem = __gconv_path_elem[cnt].name; + size_t elem_len = __gconv_path_elem[cnt].len; + char *filename; + + /* No slash needs to be inserted between elem and gconv_conf_filename; + elem already ends in a slash. */ + filename = alloca (elem_len + sizeof (gconv_conf_filename)); + __mempcpy (__mempcpy (filename, elem, elem_len), + gconv_conf_filename, sizeof (gconv_conf_filename)); + + /* Read the next configuration file. */ + read_conf_file (filename, elem, elem_len, &modules, &nmodules); + } +#endif + + /* Add the internal modules. */ + for (cnt = 0; cnt < sizeof (builtin_modules) / sizeof (builtin_modules[0]); + ++cnt) + { + struct gconv_alias fake_alias; + + fake_alias.fromname = (char *) builtin_modules[cnt].from_string; + + if (__tfind (&fake_alias, &__gconv_alias_db, __gconv_alias_compare) + != NULL) + /* It'll conflict so don't add it. */ + continue; + + insert_module (&builtin_modules[cnt], 0); + } + + /* Add aliases for builtin conversions. */ + const char *cp = builtin_aliases; + do + { + const char *from = cp; + const char *to = __rawmemchr (from, '\0') + 1; + cp = __rawmemchr (to, '\0') + 1; + + add_alias2 (from, to, cp, modules); + } + while (*cp != '\0'); + + /* Restore the error number. */ + __set_errno (save_errno); +} + + + +/* Free all resources if necessary. */ +libc_freeres_fn (free_mem) +{ + if (__gconv_path_elem != NULL && __gconv_path_elem != &empty_path_elem) + free ((void *) __gconv_path_elem); +} diff --git a/REORG.TODO/iconv/gconv_db.c b/REORG.TODO/iconv/gconv_db.c new file mode 100644 index 0000000000..7893fadba1 --- /dev/null +++ b/REORG.TODO/iconv/gconv_db.c @@ -0,0 +1,870 @@ +/* Provide access to the collection of available transformation modules. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <assert.h> +#include <limits.h> +#include <search.h> +#include <stdlib.h> +#include <string.h> +#include <sys/param.h> +#include <libc-lock.h> +#include <locale/localeinfo.h> + +#include <dlfcn.h> +#include <gconv_int.h> +#include <sysdep.h> + + +/* Simple data structure for alias mapping. We have two names, `from' + and `to'. */ +void *__gconv_alias_db; + +/* Array with available modules. */ +struct gconv_module *__gconv_modules_db; + +/* We modify global data. */ +__libc_lock_define_initialized (, __gconv_lock) + + +/* Provide access to module database. */ +struct gconv_module * +__gconv_get_modules_db (void) +{ + return __gconv_modules_db; +} + +void * +__gconv_get_alias_db (void) +{ + return __gconv_alias_db; +} + + +/* Function for searching alias. */ +int +__gconv_alias_compare (const void *p1, const void *p2) +{ + const struct gconv_alias *s1 = (const struct gconv_alias *) p1; + const struct gconv_alias *s2 = (const struct gconv_alias *) p2; + return strcmp (s1->fromname, s2->fromname); +} + + +/* To search for a derivation we create a list of intermediate steps. + Each element contains a pointer to the element which precedes it + in the derivation order. */ +struct derivation_step +{ + const char *result_set; + size_t result_set_len; + int cost_lo; + int cost_hi; + struct gconv_module *code; + struct derivation_step *last; + struct derivation_step *next; +}; + +#define NEW_STEP(result, hi, lo, module, last_mod) \ + ({ struct derivation_step *newp = alloca (sizeof (struct derivation_step)); \ + newp->result_set = result; \ + newp->result_set_len = strlen (result); \ + newp->cost_hi = hi; \ + newp->cost_lo = lo; \ + newp->code = module; \ + newp->last = last_mod; \ + newp->next = NULL; \ + newp; }) + + +/* If a specific transformation is used more than once we should not need + to start looking for it again. Instead cache each successful result. */ +struct known_derivation +{ + const char *from; + const char *to; + struct __gconv_step *steps; + size_t nsteps; +}; + +/* Compare function for database of found derivations. */ +static int +derivation_compare (const void *p1, const void *p2) +{ + const struct known_derivation *s1 = (const struct known_derivation *) p1; + const struct known_derivation *s2 = (const struct known_derivation *) p2; + int result; + + result = strcmp (s1->from, s2->from); + if (result == 0) + result = strcmp (s1->to, s2->to); + return result; +} + +/* The search tree for known derivations. */ +static void *known_derivations; + +/* Look up whether given transformation was already requested before. */ +static int +internal_function +derivation_lookup (const char *fromset, const char *toset, + struct __gconv_step **handle, size_t *nsteps) +{ + struct known_derivation key = { fromset, toset, NULL, 0 }; + struct known_derivation **result; + + result = __tfind (&key, &known_derivations, derivation_compare); + + if (result == NULL) + return __GCONV_NOCONV; + + *handle = (*result)->steps; + *nsteps = (*result)->nsteps; + + /* Please note that we return GCONV_OK even if the last search for + this transformation was unsuccessful. */ + return __GCONV_OK; +} + +/* Add new derivation to list of known ones. */ +static void +internal_function +add_derivation (const char *fromset, const char *toset, + struct __gconv_step *handle, size_t nsteps) +{ + struct known_derivation *new_deriv; + size_t fromset_len = strlen (fromset) + 1; + size_t toset_len = strlen (toset) + 1; + + new_deriv = (struct known_derivation *) + malloc (sizeof (struct known_derivation) + fromset_len + toset_len); + if (new_deriv != NULL) + { + new_deriv->from = (char *) (new_deriv + 1); + new_deriv->to = memcpy (__mempcpy (new_deriv + 1, fromset, fromset_len), + toset, toset_len); + + new_deriv->steps = handle; + new_deriv->nsteps = nsteps; + + if (__tsearch (new_deriv, &known_derivations, derivation_compare) + == NULL) + /* There is some kind of memory allocation problem. */ + free (new_deriv); + } + /* Please note that we don't complain if the allocation failed. This + is not tragically but in case we use the memory debugging facilities + not all memory will be freed. */ +} + +static void __libc_freeres_fn_section +free_derivation (void *p) +{ + struct known_derivation *deriv = (struct known_derivation *) p; + size_t cnt; + + for (cnt = 0; cnt < deriv->nsteps; ++cnt) + if (deriv->steps[cnt].__counter > 0 + && deriv->steps[cnt].__end_fct != NULL) + { + assert (deriv->steps[cnt].__shlib_handle != NULL); + + __gconv_end_fct end_fct = deriv->steps[cnt].__end_fct; +#ifdef PTR_DEMANGLE + PTR_DEMANGLE (end_fct); +#endif + DL_CALL_FCT (end_fct, (&deriv->steps[cnt])); + } + + /* Free the name strings. */ + if (deriv->steps != NULL) + { + free ((char *) deriv->steps[0].__from_name); + free ((char *) deriv->steps[deriv->nsteps - 1].__to_name); + free ((struct __gconv_step *) deriv->steps); + } + + free (deriv); +} + + +/* Decrement the reference count for a single step in a steps array. */ +void +internal_function +__gconv_release_step (struct __gconv_step *step) +{ + /* Skip builtin modules; they are not reference counted. */ + if (step->__shlib_handle != NULL && --step->__counter == 0) + { + /* Call the destructor. */ + if (step->__end_fct != NULL) + { + assert (step->__shlib_handle != NULL); + + __gconv_end_fct end_fct = step->__end_fct; +#ifdef PTR_DEMANGLE + PTR_DEMANGLE (end_fct); +#endif + DL_CALL_FCT (end_fct, (step)); + } + +#ifndef STATIC_GCONV + /* Release the loaded module. */ + __gconv_release_shlib (step->__shlib_handle); + step->__shlib_handle = NULL; +#endif + } + else if (step->__shlib_handle == NULL) + /* Builtin modules should not have end functions. */ + assert (step->__end_fct == NULL); +} + +static int +internal_function +gen_steps (struct derivation_step *best, const char *toset, + const char *fromset, struct __gconv_step **handle, size_t *nsteps) +{ + size_t step_cnt = 0; + struct __gconv_step *result; + struct derivation_step *current; + int status = __GCONV_NOMEM; + char *from_name = NULL; + char *to_name = NULL; + + /* First determine number of steps. */ + for (current = best; current->last != NULL; current = current->last) + ++step_cnt; + + result = (struct __gconv_step *) malloc (sizeof (struct __gconv_step) + * step_cnt); + if (result != NULL) + { + int failed = 0; + + status = __GCONV_OK; + *nsteps = step_cnt; + current = best; + while (step_cnt-- > 0) + { + if (step_cnt == 0) + { + result[step_cnt].__from_name = from_name = __strdup (fromset); + if (from_name == NULL) + { + failed = 1; + break; + } + } + else + result[step_cnt].__from_name = (char *)current->last->result_set; + + if (step_cnt + 1 == *nsteps) + { + result[step_cnt].__to_name = to_name + = __strdup (current->result_set); + if (to_name == NULL) + { + failed = 1; + break; + } + } + else + result[step_cnt].__to_name = result[step_cnt + 1].__from_name; + + result[step_cnt].__counter = 1; + result[step_cnt].__data = NULL; + +#ifndef STATIC_GCONV + if (current->code->module_name[0] == '/') + { + /* Load the module, return handle for it. */ + struct __gconv_loaded_object *shlib_handle = + __gconv_find_shlib (current->code->module_name); + + if (shlib_handle == NULL) + { + failed = 1; + break; + } + + result[step_cnt].__shlib_handle = shlib_handle; + result[step_cnt].__modname = shlib_handle->name; + result[step_cnt].__fct = shlib_handle->fct; + result[step_cnt].__init_fct = shlib_handle->init_fct; + result[step_cnt].__end_fct = shlib_handle->end_fct; + + /* These settings can be overridden by the init function. */ + result[step_cnt].__btowc_fct = NULL; + + /* Call the init function. */ + __gconv_init_fct init_fct = result[step_cnt].__init_fct; + if (init_fct != NULL) + { + assert (result[step_cnt].__shlib_handle != NULL); + +# ifdef PTR_DEMANGLE + PTR_DEMANGLE (init_fct); +# endif + status = DL_CALL_FCT (init_fct, (&result[step_cnt])); + + if (__builtin_expect (status, __GCONV_OK) != __GCONV_OK) + { + failed = 1; + /* Make sure we unload this modules. */ + --step_cnt; + result[step_cnt].__end_fct = NULL; + break; + } + +# ifdef PTR_MANGLE + if (result[step_cnt].__btowc_fct != NULL) + PTR_MANGLE (result[step_cnt].__btowc_fct); +# endif + } + } + else +#endif + /* It's a builtin transformation. */ + __gconv_get_builtin_trans (current->code->module_name, + &result[step_cnt]); + + current = current->last; + } + + if (__builtin_expect (failed, 0) != 0) + { + /* Something went wrong while initializing the modules. */ + while (++step_cnt < *nsteps) + __gconv_release_step (&result[step_cnt]); + free (result); + free (from_name); + free (to_name); + *nsteps = 0; + *handle = NULL; + if (status == __GCONV_OK) + status = __GCONV_NOCONV; + } + else + *handle = result; + } + else + { + *nsteps = 0; + *handle = NULL; + } + + return status; +} + + +#ifndef STATIC_GCONV +static int +internal_function +increment_counter (struct __gconv_step *steps, size_t nsteps) +{ + /* Increment the user counter. */ + size_t cnt = nsteps; + int result = __GCONV_OK; + + while (cnt-- > 0) + { + struct __gconv_step *step = &steps[cnt]; + + if (step->__counter++ == 0) + { + /* Skip builtin modules. */ + if (step->__modname != NULL) + { + /* Reopen a previously used module. */ + step->__shlib_handle = __gconv_find_shlib (step->__modname); + if (step->__shlib_handle == NULL) + { + /* Oops, this is the second time we use this module + (after unloading) and this time loading failed!? */ + --step->__counter; + while (++cnt < nsteps) + __gconv_release_step (&steps[cnt]); + result = __GCONV_NOCONV; + break; + } + + /* The function addresses defined by the module may + have changed. */ + step->__fct = step->__shlib_handle->fct; + step->__init_fct = step->__shlib_handle->init_fct; + step->__end_fct = step->__shlib_handle->end_fct; + + /* These settings can be overridden by the init function. */ + step->__btowc_fct = NULL; + } + + /* Call the init function. */ + __gconv_init_fct init_fct = step->__init_fct; + if (init_fct != NULL) + { +#ifdef PTR_DEMANGLE + PTR_DEMANGLE (init_fct); +#endif + DL_CALL_FCT (init_fct, (step)); + +#ifdef PTR_MANGLE + if (step->__btowc_fct != NULL) + PTR_MANGLE (step->__btowc_fct); +#endif + } + } + } + return result; +} +#endif + + +/* The main function: find a possible derivation from the `fromset' (either + the given name or the alias) to the `toset' (again with alias). */ +static int +internal_function +find_derivation (const char *toset, const char *toset_expand, + const char *fromset, const char *fromset_expand, + struct __gconv_step **handle, size_t *nsteps) +{ + struct derivation_step *first, *current, **lastp, *solution = NULL; + int best_cost_hi = INT_MAX; + int best_cost_lo = INT_MAX; + int result; + + /* Look whether an earlier call to `find_derivation' has already + computed a possible derivation. If so, return it immediately. */ + result = derivation_lookup (fromset_expand ?: fromset, toset_expand ?: toset, + handle, nsteps); + if (result == __GCONV_OK) + { +#ifndef STATIC_GCONV + result = increment_counter (*handle, *nsteps); +#endif + return result; + } + + /* The task is to find a sequence of transformations, backed by the + existing modules - whether builtin or dynamically loadable -, + starting at `fromset' (or `fromset_expand') and ending at `toset' + (or `toset_expand'), and with minimal cost. + + For computer scientists, this is a shortest path search in the + graph where the nodes are all possible charsets and the edges are + the transformations listed in __gconv_modules_db. + + For now we use a simple algorithm with quadratic runtime behaviour. + A breadth-first search, starting at `fromset' and `fromset_expand'. + The list starting at `first' contains all nodes that have been + visited up to now, in the order in which they have been visited -- + excluding the goal nodes `toset' and `toset_expand' which get + managed in the list starting at `solution'. + `current' walks through the list starting at `first' and looks + which nodes are reachable from the current node, adding them to + the end of the list [`first' or `solution' respectively] (if + they are visited the first time) or updating them in place (if + they have have already been visited). + In each node of either list, cost_lo and cost_hi contain the + minimum cost over any paths found up to now, starting at `fromset' + or `fromset_expand', ending at that node. best_cost_lo and + best_cost_hi represent the minimum over the elements of the + `solution' list. */ + + if (fromset_expand != NULL) + { + first = NEW_STEP (fromset_expand, 0, 0, NULL, NULL); + first->next = NEW_STEP (fromset, 0, 0, NULL, NULL); + lastp = &first->next->next; + } + else + { + first = NEW_STEP (fromset, 0, 0, NULL, NULL); + lastp = &first->next; + } + + for (current = first; current != NULL; current = current->next) + { + /* Now match all the available module specifications against the + current charset name. If any of them matches check whether + we already have a derivation for this charset. If yes, use the + one with the lower costs. Otherwise add the new charset at the + end. + + The module database is organized in a tree form which allows + searching for prefixes. So we search for the first entry with a + matching prefix and any other matching entry can be found from + this place. */ + struct gconv_module *node; + + /* Maybe it is not necessary anymore to look for a solution for + this entry since the cost is already as high (or higher) as + the cost for the best solution so far. */ + if (current->cost_hi > best_cost_hi + || (current->cost_hi == best_cost_hi + && current->cost_lo >= best_cost_lo)) + continue; + + node = __gconv_modules_db; + while (node != NULL) + { + int cmpres = strcmp (current->result_set, node->from_string); + if (cmpres == 0) + { + /* Walk through the list of modules with this prefix and + try to match the name. */ + struct gconv_module *runp; + + /* Check all the modules with this prefix. */ + runp = node; + do + { + const char *result_set = (strcmp (runp->to_string, "-") == 0 + ? (toset_expand ?: toset) + : runp->to_string); + int cost_hi = runp->cost_hi + current->cost_hi; + int cost_lo = runp->cost_lo + current->cost_lo; + struct derivation_step *step; + + /* We managed to find a derivation. First see whether + we have reached one of the goal nodes. */ + if (strcmp (result_set, toset) == 0 + || (toset_expand != NULL + && strcmp (result_set, toset_expand) == 0)) + { + /* Append to the `solution' list if there + is no entry with this name. */ + for (step = solution; step != NULL; step = step->next) + if (strcmp (result_set, step->result_set) == 0) + break; + + if (step == NULL) + { + step = NEW_STEP (result_set, + cost_hi, cost_lo, + runp, current); + step->next = solution; + solution = step; + } + else if (step->cost_hi > cost_hi + || (step->cost_hi == cost_hi + && step->cost_lo > cost_lo)) + { + /* A better path was found for the node, + on the `solution' list. */ + step->code = runp; + step->last = current; + step->cost_hi = cost_hi; + step->cost_lo = cost_lo; + } + + /* Update best_cost accordingly. */ + if (cost_hi < best_cost_hi + || (cost_hi == best_cost_hi + && cost_lo < best_cost_lo)) + { + best_cost_hi = cost_hi; + best_cost_lo = cost_lo; + } + } + else if (cost_hi < best_cost_hi + || (cost_hi == best_cost_hi + && cost_lo < best_cost_lo)) + { + /* Append at the end of the `first' list if there + is no entry with this name. */ + for (step = first; step != NULL; step = step->next) + if (strcmp (result_set, step->result_set) == 0) + break; + + if (step == NULL) + { + *lastp = NEW_STEP (result_set, + cost_hi, cost_lo, + runp, current); + lastp = &(*lastp)->next; + } + else if (step->cost_hi > cost_hi + || (step->cost_hi == cost_hi + && step->cost_lo > cost_lo)) + { + /* A better path was found for the node, + on the `first' list. */ + step->code = runp; + step->last = current; + + /* Update the cost for all steps. */ + for (step = first; step != NULL; + step = step->next) + /* But don't update the start nodes. */ + if (step->code != NULL) + { + struct derivation_step *back; + int hi, lo; + + hi = step->code->cost_hi; + lo = step->code->cost_lo; + + for (back = step->last; back->code != NULL; + back = back->last) + { + hi += back->code->cost_hi; + lo += back->code->cost_lo; + } + + step->cost_hi = hi; + step->cost_lo = lo; + } + + /* Likewise for the nodes on the solution list. + Also update best_cost accordingly. */ + for (step = solution; step != NULL; + step = step->next) + { + step->cost_hi = (step->code->cost_hi + + step->last->cost_hi); + step->cost_lo = (step->code->cost_lo + + step->last->cost_lo); + + if (step->cost_hi < best_cost_hi + || (step->cost_hi == best_cost_hi + && step->cost_lo < best_cost_lo)) + { + best_cost_hi = step->cost_hi; + best_cost_lo = step->cost_lo; + } + } + } + } + + runp = runp->same; + } + while (runp != NULL); + + break; + } + else if (cmpres < 0) + node = node->left; + else + node = node->right; + } + } + + if (solution != NULL) + { + /* We really found a way to do the transformation. */ + + /* Choose the best solution. This is easy because we know that + the solution list has at most length 2 (one for every possible + goal node). */ + if (solution->next != NULL) + { + struct derivation_step *solution2 = solution->next; + + if (solution2->cost_hi < solution->cost_hi + || (solution2->cost_hi == solution->cost_hi + && solution2->cost_lo < solution->cost_lo)) + solution = solution2; + } + + /* Now build a data structure describing the transformation steps. */ + result = gen_steps (solution, toset_expand ?: toset, + fromset_expand ?: fromset, handle, nsteps); + } + else + { + /* We haven't found a transformation. Clear the result values. */ + *handle = NULL; + *nsteps = 0; + } + + /* Add result in any case to list of known derivations. */ + add_derivation (fromset_expand ?: fromset, toset_expand ?: toset, + *handle, *nsteps); + + return result; +} + + +/* Control of initialization. */ +__libc_once_define (static, once); + + +static const char * +do_lookup_alias (const char *name) +{ + struct gconv_alias key; + struct gconv_alias **found; + + key.fromname = (char *) name; + found = __tfind (&key, &__gconv_alias_db, __gconv_alias_compare); + return found != NULL ? (*found)->toname : NULL; +} + + +int +internal_function +__gconv_compare_alias (const char *name1, const char *name2) +{ + int result; + + /* Ensure that the configuration data is read. */ + __libc_once (once, __gconv_read_conf); + + if (__gconv_compare_alias_cache (name1, name2, &result) != 0) + result = strcmp (do_lookup_alias (name1) ?: name1, + do_lookup_alias (name2) ?: name2); + + return result; +} + + +int +internal_function +__gconv_find_transform (const char *toset, const char *fromset, + struct __gconv_step **handle, size_t *nsteps, + int flags) +{ + const char *fromset_expand; + const char *toset_expand; + int result; + + /* Ensure that the configuration data is read. */ + __libc_once (once, __gconv_read_conf); + + /* Acquire the lock. */ + __libc_lock_lock (__gconv_lock); + + result = __gconv_lookup_cache (toset, fromset, handle, nsteps, flags); + if (result != __GCONV_NODB) + { + /* We have a cache and could resolve the request, successful or not. */ + __libc_lock_unlock (__gconv_lock); + return result; + } + + /* If we don't have a module database return with an error. */ + if (__gconv_modules_db == NULL) + { + __libc_lock_unlock (__gconv_lock); + return __GCONV_NOCONV; + } + + /* See whether the names are aliases. */ + fromset_expand = do_lookup_alias (fromset); + toset_expand = do_lookup_alias (toset); + + if (__builtin_expect (flags & GCONV_AVOID_NOCONV, 0) + /* We are not supposed to create a pseudo transformation (means + copying) when the input and output character set are the same. */ + && (strcmp (toset, fromset) == 0 + || (toset_expand != NULL && strcmp (toset_expand, fromset) == 0) + || (fromset_expand != NULL + && (strcmp (toset, fromset_expand) == 0 + || (toset_expand != NULL + && strcmp (toset_expand, fromset_expand) == 0))))) + { + /* Both character sets are the same. */ + __libc_lock_unlock (__gconv_lock); + return __GCONV_NULCONV; + } + + result = find_derivation (toset, toset_expand, fromset, fromset_expand, + handle, nsteps); + + /* Release the lock. */ + __libc_lock_unlock (__gconv_lock); + + /* The following code is necessary since `find_derivation' will return + GCONV_OK even when no derivation was found but the same request + was processed before. I.e., negative results will also be cached. */ + return (result == __GCONV_OK + ? (*handle == NULL ? __GCONV_NOCONV : __GCONV_OK) + : result); +} + + +/* Release the entries of the modules list. */ +int +internal_function +__gconv_close_transform (struct __gconv_step *steps, size_t nsteps) +{ + int result = __GCONV_OK; + size_t cnt; + + /* Acquire the lock. */ + __libc_lock_lock (__gconv_lock); + +#ifndef STATIC_GCONV + cnt = nsteps; + while (cnt-- > 0) + __gconv_release_step (&steps[cnt]); +#endif + + /* If we use the cache we free a bit more since we don't keep any + transformation records around, they are cheap enough to + recreate. */ + __gconv_release_cache (steps, nsteps); + + /* Release the lock. */ + __libc_lock_unlock (__gconv_lock); + + return result; +} + + +/* Free the modules mentioned. */ +static void +internal_function __libc_freeres_fn_section +free_modules_db (struct gconv_module *node) +{ + if (node->left != NULL) + free_modules_db (node->left); + if (node->right != NULL) + free_modules_db (node->right); + do + { + struct gconv_module *act = node; + node = node->same; + if (act->module_name[0] == '/') + free (act); + } + while (node != NULL); +} + + +/* Free all resources if necessary. */ +libc_freeres_fn (free_mem) +{ + /* First free locale memory. This needs to be done before freeing + derivations, as ctype cleanup functions dereference steps arrays which we + free below. */ + _nl_locale_subfreeres (); + + /* finddomain.c has similar problem. */ + extern void _nl_finddomain_subfreeres (void) attribute_hidden; + _nl_finddomain_subfreeres (); + + if (__gconv_alias_db != NULL) + __tdestroy (__gconv_alias_db, free); + + if (__gconv_modules_db != NULL) + free_modules_db (__gconv_modules_db); + + if (known_derivations != NULL) + __tdestroy (known_derivations, free_derivation); +} diff --git a/REORG.TODO/iconv/gconv_dl.c b/REORG.TODO/iconv/gconv_dl.c new file mode 100644 index 0000000000..241836204d --- /dev/null +++ b/REORG.TODO/iconv/gconv_dl.c @@ -0,0 +1,242 @@ +/* Handle loading/unloading of shared object for transformation. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <assert.h> +#include <dlfcn.h> +#include <inttypes.h> +#include <search.h> +#include <stdlib.h> +#include <string.h> +#include <libc-lock.h> +#include <sys/param.h> + +#include <gconv_int.h> +#include <sysdep.h> + + +#ifdef DEBUG +/* For debugging purposes. */ +static void print_all (void); +#endif + + +/* This is a tuning parameter. If a transformation module is not used + anymore it gets not immediately unloaded. Instead we wait a certain + number of load attempts for further modules. If none of the + subsequent load attempts name the same object it finally gets unloaded. + Otherwise it is still available which hopefully is the frequent case. + The following number is the number of unloading attempts we wait + before unloading. */ +#define TRIES_BEFORE_UNLOAD 2 + +/* Array of loaded objects. This is shared by all threads so we have + to use semaphores to access it. */ +static void *loaded; + +/* Comparison function for searching `loaded_object' tree. */ +static int +known_compare (const void *p1, const void *p2) +{ + const struct __gconv_loaded_object *s1 = + (const struct __gconv_loaded_object *) p1; + const struct __gconv_loaded_object *s2 = + (const struct __gconv_loaded_object *) p2; + + return strcmp (s1->name, s2->name); +} + +/* Open the gconv database if necessary. A non-negative return value + means success. */ +struct __gconv_loaded_object * +internal_function +__gconv_find_shlib (const char *name) +{ + struct __gconv_loaded_object *found; + void *keyp; + + /* Search the tree of shared objects previously requested. Data in + the tree are `loaded_object' structures, whose first member is a + `const char *', the lookup key. The search returns a pointer to + the tree node structure; the first member of the is a pointer to + our structure (i.e. what will be a `loaded_object'); since the + first member of that is the lookup key string, &FCT_NAME is close + enough to a pointer to our structure to use as a lookup key that + will be passed to `known_compare' (above). */ + + keyp = __tfind (&name, &loaded, known_compare); + if (keyp == NULL) + { + /* This name was not known before. */ + size_t namelen = strlen (name) + 1; + + found = malloc (sizeof (struct __gconv_loaded_object) + namelen); + if (found != NULL) + { + /* Point the tree node at this new structure. */ + found->name = (char *) memcpy (found + 1, name, namelen); + found->counter = -TRIES_BEFORE_UNLOAD - 1; + found->handle = NULL; + + if (__builtin_expect (__tsearch (found, &loaded, known_compare) + == NULL, 0)) + { + /* Something went wrong while inserting the entry. */ + free (found); + found = NULL; + } + } + } + else + found = *(struct __gconv_loaded_object **) keyp; + + /* Try to load the shared object if the usage count is 0. This + implies that if the shared object is not loadable, the handle is + NULL and the usage count > 0. */ + if (found != NULL) + { + if (found->counter < -TRIES_BEFORE_UNLOAD) + { + assert (found->handle == NULL); + found->handle = __libc_dlopen (found->name); + if (found->handle != NULL) + { + found->fct = __libc_dlsym (found->handle, "gconv"); + if (found->fct == NULL) + { + /* Argh, no conversion function. There is something + wrong here. */ + __gconv_release_shlib (found); + found = NULL; + } + else + { + found->init_fct = __libc_dlsym (found->handle, "gconv_init"); + found->end_fct = __libc_dlsym (found->handle, "gconv_end"); + +#ifdef PTR_MANGLE + PTR_MANGLE (found->fct); + if (found->init_fct != NULL) + PTR_MANGLE (found->init_fct); + if (found->end_fct != NULL) + PTR_MANGLE (found->end_fct); +#endif + + /* We have succeeded in loading the shared object. */ + found->counter = 1; + } + } + else + /* Error while loading the shared object. */ + found = NULL; + } + else if (found->handle != NULL) + found->counter = MAX (found->counter + 1, 1); + } + + return found; +} + + +/* This is very ugly but the tsearch functions provide no way to pass + information to the walker function. So we use a global variable. + It is MT safe since we use a lock. */ +static struct __gconv_loaded_object *release_handle; + +static void +do_release_shlib (void *nodep, VISIT value, int level) +{ + struct __gconv_loaded_object *obj = *(struct __gconv_loaded_object **) nodep; + + if (value != preorder && value != leaf) + return; + + if (obj == release_handle) + { + /* This is the object we want to unload. Now decrement the + reference counter. */ + assert (obj->counter > 0); + --obj->counter; + } + else if (obj->counter <= 0 && obj->counter >= -TRIES_BEFORE_UNLOAD + && --obj->counter < -TRIES_BEFORE_UNLOAD && obj->handle != NULL) + { + /* Unload the shared object. */ + __libc_dlclose (obj->handle); + obj->handle = NULL; + } +} + + +/* Notify system that a shared object is not longer needed. */ +void +internal_function +__gconv_release_shlib (struct __gconv_loaded_object *handle) +{ + /* Urgh, this is ugly but we have no other possibility. */ + release_handle = handle; + + /* Process all entries. Please note that we also visit entries + with release counts <= 0. This way we can finally unload them + if necessary. */ + __twalk (loaded, (__action_fn_t) do_release_shlib); +} + + +/* We run this if we debug the memory allocation. */ +static void __libc_freeres_fn_section +do_release_all (void *nodep) +{ + struct __gconv_loaded_object *obj = (struct __gconv_loaded_object *) nodep; + + /* Unload the shared object. */ + if (obj->handle != NULL) + __libc_dlclose (obj->handle); + + free (obj); +} + +libc_freeres_fn (free_mem) +{ + __tdestroy (loaded, do_release_all); + loaded = NULL; +} + + +#ifdef DEBUG + +#include <stdio.h> + +static void +do_print (const void *nodep, VISIT value, int level) +{ + struct __gconv_loaded_object *obj = *(struct __gconv_loaded_object **) nodep; + + printf ("%10s: \"%s\", %d\n", + value == leaf ? "leaf" : + value == preorder ? "preorder" : + value == postorder ? "postorder" : "endorder", + obj->name, obj->counter); +} + +static void __attribute__ ((used)) +print_all (void) +{ + __twalk (loaded, do_print); +} +#endif diff --git a/REORG.TODO/iconv/gconv_int.h b/REORG.TODO/iconv/gconv_int.h new file mode 100644 index 0000000000..85a67ad31b --- /dev/null +++ b/REORG.TODO/iconv/gconv_int.h @@ -0,0 +1,287 @@ +/* Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _GCONV_INT_H +#define _GCONV_INT_H 1 + +#include "gconv.h" +#include <stdlib.h> /* For alloca used in macro below. */ +#include <ctype.h> /* For __toupper_l used in macro below. */ +#include <string.h> /* For strlen et al used in macro below. */ +#include <libc-lock.h> + +__BEGIN_DECLS + + +/* Type to represent search path. */ +struct path_elem +{ + const char *name; + size_t len; +}; + +/* Variable with search path for `gconv' implementation. */ +extern struct path_elem *__gconv_path_elem attribute_hidden; +/* Maximum length of a single path element. */ +extern size_t __gconv_max_path_elem_len attribute_hidden; + + +/* Structure for alias definition. Simply two strings. */ +struct gconv_alias +{ + char *fromname; + char *toname; +}; + + +/* How many character should be converted in one call? */ +#define GCONV_NCHAR_GOAL 8160 + + +/* Structure describing one loaded shared object. This normally are + objects to perform conversation but as a special case the db shared + object is also handled. */ +struct __gconv_loaded_object +{ + /* Name of the object. It must be the first structure element. */ + const char *name; + + /* Reference counter for the db functionality. If no conversion is + needed we unload the db library. */ + int counter; + + /* The handle for the shared object. */ + void *handle; + + /* Pointer to the functions the module defines. */ + __gconv_fct fct; + __gconv_init_fct init_fct; + __gconv_end_fct end_fct; +}; + + +/* Description for an available conversion module. */ +struct gconv_module +{ + const char *from_string; + const char *to_string; + + int cost_hi; + int cost_lo; + + const char *module_name; + + struct gconv_module *left; /* Prefix smaller. */ + struct gconv_module *same; /* List of entries with identical prefix. */ + struct gconv_module *right; /* Prefix larger. */ +}; + + +/* Flags for `gconv_open'. */ +enum +{ + GCONV_AVOID_NOCONV = 1 << 0 +}; + +/* When GCONV_AVOID_NOCONV is set and no conversion is needed, + __GCONV_NULCONV should be returned. */ +enum +{ + __GCONV_NULCONV = -1 +}; + +/* Global variables. */ + +/* Database of alias names. */ +extern void *__gconv_alias_db attribute_hidden; + +/* Array with available modules. */ +extern size_t __gconv_nmodules; +extern struct gconv_module *__gconv_modules_db attribute_hidden; + +/* Value of the GCONV_PATH environment variable. */ +extern const char *__gconv_path_envvar attribute_hidden; + +/* Lock for the conversion database content. */ +__libc_lock_define (extern, __gconv_lock attribute_hidden) + + +/* The gconv functions expects the name to be in upper case and complete, + including the trailing slashes if necessary. */ +#define norm_add_slashes(str,suffix) \ + ({ \ + const char *cp = (str); \ + char *result; \ + char *tmp; \ + size_t cnt = 0; \ + const size_t suffix_len = strlen (suffix); \ + \ + while (*cp != '\0') \ + if (*cp++ == '/') \ + ++cnt; \ + \ + tmp = result = __alloca (cp - (str) + 3 + suffix_len); \ + cp = (str); \ + while (*cp != '\0') \ + *tmp++ = __toupper_l (*cp++, _nl_C_locobj_ptr); \ + if (cnt < 2) \ + { \ + *tmp++ = '/'; \ + if (cnt < 1) \ + { \ + *tmp++ = '/'; \ + if (suffix_len != 0) \ + tmp = __mempcpy (tmp, suffix, suffix_len); \ + } \ + } \ + *tmp = '\0'; \ + result; \ + }) + + +/* Return in *HANDLE decriptor for transformation from FROMSET to TOSET. */ +extern int __gconv_open (const char *toset, const char *fromset, + __gconv_t *handle, int flags) + internal_function; + +/* Free resources associated with transformation descriptor CD. */ +extern int __gconv_close (__gconv_t cd) + internal_function; + +/* Transform at most *INBYTESLEFT bytes from buffer starting at *INBUF + according to rules described by CD and place up to *OUTBYTESLEFT + bytes in buffer starting at *OUTBUF. Return number of non-identical + conversions in *IRREVERSIBLE if this pointer is not null. */ +extern int __gconv (__gconv_t cd, const unsigned char **inbuf, + const unsigned char *inbufend, unsigned char **outbuf, + unsigned char *outbufend, size_t *irreversible) + internal_function; + +/* Return in *HANDLE a pointer to an array with *NSTEPS elements describing + the single steps necessary for transformation from FROMSET to TOSET. */ +extern int __gconv_find_transform (const char *toset, const char *fromset, + struct __gconv_step **handle, + size_t *nsteps, int flags) + internal_function; + +/* Search for transformation in cache data. */ +extern int __gconv_lookup_cache (const char *toset, const char *fromset, + struct __gconv_step **handle, size_t *nsteps, + int flags) + internal_function; + +/* Compare the two name for whether they are after alias expansion the + same. This function uses the cache and fails if none is + loaded. */ +extern int __gconv_compare_alias_cache (const char *name1, const char *name2, + int *result) internal_function; + +/* Free data associated with a step's structure. */ +extern void __gconv_release_step (struct __gconv_step *step) + internal_function; + +/* Read all the configuration data and cache it. */ +extern void __gconv_read_conf (void) attribute_hidden; + +/* Try to read module cache file. */ +extern int __gconv_load_cache (void) internal_function; + +/* Retrieve pointer to internal cache. */ +extern void *__gconv_get_cache (void); + +/* Retrieve pointer to internal module database. */ +extern struct gconv_module *__gconv_get_modules_db (void); + +/* Retrieve pointer to internal alias database. */ +extern void *__gconv_get_alias_db (void); + +/* Determine the directories we are looking in. */ +extern void __gconv_get_path (void) internal_function; + +/* Comparison function to search alias. */ +extern int __gconv_alias_compare (const void *p1, const void *p2) + attribute_hidden; + +/* Clear reference to transformation step implementations which might + cause the code to be unloaded. */ +extern int __gconv_close_transform (struct __gconv_step *steps, + size_t nsteps) + internal_function; + +/* Free all resources allocated for the transformation record when + using the cache. */ +extern void __gconv_release_cache (struct __gconv_step *steps, size_t nsteps) + internal_function; + +/* Load shared object named by NAME. If already loaded increment reference + count. */ +extern struct __gconv_loaded_object *__gconv_find_shlib (const char *name) + internal_function; + +/* Release shared object. If no further reference is available unload + the object. */ +extern void __gconv_release_shlib (struct __gconv_loaded_object *handle) + internal_function; + +/* Fill STEP with information about builtin module with NAME. */ +extern void __gconv_get_builtin_trans (const char *name, + struct __gconv_step *step) + internal_function; + +libc_hidden_proto (__gconv_transliterate) + +/* If NAME is an codeset alias expand it. */ +extern int __gconv_compare_alias (const char *name1, const char *name2) + internal_function; + + +/* Builtin transformations. */ +#ifdef _LIBC +# define __BUILTIN_TRANSFORM(Name) \ + extern int Name (struct __gconv_step *step, \ + struct __gconv_step_data *data, \ + const unsigned char **inbuf, \ + const unsigned char *inbufend, \ + unsigned char **outbufstart, size_t *irreversible, \ + int do_flush, int consume_incomplete) + +__BUILTIN_TRANSFORM (__gconv_transform_ascii_internal); +__BUILTIN_TRANSFORM (__gconv_transform_internal_ascii); +__BUILTIN_TRANSFORM (__gconv_transform_utf8_internal); +__BUILTIN_TRANSFORM (__gconv_transform_internal_utf8); +__BUILTIN_TRANSFORM (__gconv_transform_ucs2_internal); +__BUILTIN_TRANSFORM (__gconv_transform_internal_ucs2); +__BUILTIN_TRANSFORM (__gconv_transform_ucs2reverse_internal); +__BUILTIN_TRANSFORM (__gconv_transform_internal_ucs2reverse); +__BUILTIN_TRANSFORM (__gconv_transform_internal_ucs4); +__BUILTIN_TRANSFORM (__gconv_transform_ucs4_internal); +__BUILTIN_TRANSFORM (__gconv_transform_internal_ucs4le); +__BUILTIN_TRANSFORM (__gconv_transform_ucs4le_internal); +__BUILTIN_TRANSFORM (__gconv_transform_internal_utf16); +__BUILTIN_TRANSFORM (__gconv_transform_utf16_internal); +# undef __BUITLIN_TRANSFORM + +/* Specialized conversion function for a single byte to INTERNAL, recognizing + only ASCII characters. */ +extern wint_t __gconv_btwoc_ascii (struct __gconv_step *step, unsigned char c); + +#endif + +__END_DECLS + +#endif /* gconv_int.h */ diff --git a/REORG.TODO/iconv/gconv_open.c b/REORG.TODO/iconv/gconv_open.c new file mode 100644 index 0000000000..ff4fd121eb --- /dev/null +++ b/REORG.TODO/iconv/gconv_open.c @@ -0,0 +1,208 @@ +/* Find matching transformation algorithms and initialize steps. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <errno.h> +#include <locale.h> +#include "../locale/localeinfo.h" +#include <stdlib.h> +#include <string.h> + +#include <gconv_int.h> + + +int +internal_function +__gconv_open (const char *toset, const char *fromset, __gconv_t *handle, + int flags) +{ + struct __gconv_step *steps; + size_t nsteps; + __gconv_t result = NULL; + size_t cnt = 0; + int res; + int conv_flags = 0; + const char *errhand; + const char *ignore; + bool translit = false; + + /* Find out whether any error handling method is specified. */ + errhand = strchr (toset, '/'); + if (errhand != NULL) + errhand = strchr (errhand + 1, '/'); + if (__glibc_likely (errhand != NULL)) + { + if (*++errhand == '\0') + errhand = NULL; + else + { + /* Make copy without the error handling description. */ + char *newtoset = (char *) alloca (errhand - toset + 1); + char *tok; + char *ptr = NULL /* Work around a bogus warning */; + + newtoset[errhand - toset] = '\0'; + toset = memcpy (newtoset, toset, errhand - toset); + + /* Find the appropriate transliteration handlers. */ + tok = strdupa (errhand); + + tok = __strtok_r (tok, ",", &ptr); + while (tok != NULL) + { + if (__strcasecmp_l (tok, "TRANSLIT", _nl_C_locobj_ptr) == 0) + translit = true; + else if (__strcasecmp_l (tok, "IGNORE", _nl_C_locobj_ptr) == 0) + /* Set the flag to ignore all errors. */ + conv_flags |= __GCONV_IGNORE_ERRORS; + + tok = __strtok_r (NULL, ",", &ptr); + } + } + } + + /* For the source character set we ignore the error handler specification. + XXX Is this really always the best? */ + ignore = strchr (fromset, '/'); + if (ignore != NULL && (ignore = strchr (ignore + 1, '/')) != NULL + && *++ignore != '\0') + { + char *newfromset = (char *) alloca (ignore - fromset + 1); + + newfromset[ignore - fromset] = '\0'; + fromset = memcpy (newfromset, fromset, ignore - fromset); + } + + /* If the string is empty define this to mean the charset of the + currently selected locale. */ + if (strcmp (toset, "//") == 0) + { + const char *codeset = _NL_CURRENT (LC_CTYPE, CODESET); + size_t len = strlen (codeset); + char *dest; + toset = dest = (char *) alloca (len + 3); + memcpy (__mempcpy (dest, codeset, len), "//", 3); + } + if (strcmp (fromset, "//") == 0) + { + const char *codeset = _NL_CURRENT (LC_CTYPE, CODESET); + size_t len = strlen (codeset); + char *dest; + fromset = dest = (char *) alloca (len + 3); + memcpy (__mempcpy (dest, codeset, len), "//", 3); + } + + res = __gconv_find_transform (toset, fromset, &steps, &nsteps, flags); + if (res == __GCONV_OK) + { + /* Allocate room for handle. */ + result = (__gconv_t) malloc (sizeof (struct __gconv_info) + + (nsteps + * sizeof (struct __gconv_step_data))); + if (result == NULL) + res = __GCONV_NOMEM; + else + { + /* Remember the list of steps. */ + result->__steps = steps; + result->__nsteps = nsteps; + + /* Clear the array for the step data. */ + memset (result->__data, '\0', + nsteps * sizeof (struct __gconv_step_data)); + + /* Call all initialization functions for the transformation + step implementations. */ + for (cnt = 0; cnt < nsteps; ++cnt) + { + size_t size; + + /* Would have to be done if we would not clear the whole + array above. */ +#if 0 + /* Reset the counter. */ + result->__data[cnt].__invocation_counter = 0; + + /* It's a regular use. */ + result->__data[cnt].__internal_use = 0; +#endif + + /* We use the `mbstate_t' member in DATA. */ + result->__data[cnt].__statep = &result->__data[cnt].__state; + + /* The builtin transliteration handling only + supports the internal encoding. */ + if (translit + && __strcasecmp_l (steps[cnt].__from_name, + "INTERNAL", _nl_C_locobj_ptr) == 0) + conv_flags |= __GCONV_TRANSLIT; + + /* If this is the last step we must not allocate an + output buffer. */ + if (cnt < nsteps - 1) + { + result->__data[cnt].__flags = conv_flags; + + /* Allocate the buffer. */ + size = (GCONV_NCHAR_GOAL * steps[cnt].__max_needed_to); + + result->__data[cnt].__outbuf = malloc (size); + if (result->__data[cnt].__outbuf == NULL) + { + res = __GCONV_NOMEM; + goto bail; + } + + result->__data[cnt].__outbufend = + result->__data[cnt].__outbuf + size; + } + else + { + /* Handle the last entry. */ + result->__data[cnt].__flags = conv_flags | __GCONV_IS_LAST; + + break; + } + } + } + + if (res != __GCONV_OK) + { + /* Something went wrong. Free all the resources. */ + int serrno; + bail: + serrno = errno; + + if (result != NULL) + { + while (cnt-- > 0) + free (result->__data[cnt].__outbuf); + + free (result); + result = NULL; + } + + __gconv_close_transform (steps, nsteps); + + __set_errno (serrno); + } + } + + *handle = result; + return res; +} diff --git a/REORG.TODO/iconv/gconv_simple.c b/REORG.TODO/iconv/gconv_simple.c new file mode 100644 index 0000000000..863d3dcc3f --- /dev/null +++ b/REORG.TODO/iconv/gconv_simple.c @@ -0,0 +1,1329 @@ +/* Simple transformations functions. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <byteswap.h> +#include <dlfcn.h> +#include <endian.h> +#include <errno.h> +#include <gconv.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <wchar.h> +#include <sys/param.h> +#include <gconv_int.h> + +#define BUILTIN_ALIAS(s1, s2) /* nothing */ +#define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \ + MinF, MaxF, MinT, MaxT) \ + extern int Fct (struct __gconv_step *, struct __gconv_step_data *, \ + const unsigned char **, const unsigned char *, \ + unsigned char **, size_t *, int, int); +#include "gconv_builtin.h" + + +#ifndef EILSEQ +# define EILSEQ EINVAL +#endif + + +/* Specialized conversion function for a single byte to INTERNAL, recognizing + only ASCII characters. */ +wint_t +__gconv_btwoc_ascii (struct __gconv_step *step, unsigned char c) +{ + if (c < 0x80) + return c; + else + return WEOF; +} + + +/* Transform from the internal, UCS4-like format, to UCS4. The + difference between the internal ucs4 format and the real UCS4 + format is, if any, the endianess. The Unicode/ISO 10646 says that + unless some higher protocol specifies it differently, the byte + order is big endian.*/ +#define DEFINE_INIT 0 +#define DEFINE_FINI 0 +#define MIN_NEEDED_FROM 4 +#define MIN_NEEDED_TO 4 +#define FROM_DIRECTION 1 +#define FROM_LOOP internal_ucs4_loop +#define TO_LOOP internal_ucs4_loop /* This is not used. */ +#define FUNCTION_NAME __gconv_transform_internal_ucs4 +#define ONE_DIRECTION 0 + + +static inline int +__attribute ((always_inline)) +internal_ucs4_loop (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char **inptrp, const unsigned char *inend, + unsigned char **outptrp, unsigned char *outend, + size_t *irreversible) +{ + const unsigned char *inptr = *inptrp; + unsigned char *outptr = *outptrp; + size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; + int result; + +#if __BYTE_ORDER == __LITTLE_ENDIAN + /* Sigh, we have to do some real work. */ + size_t cnt; + uint32_t *outptr32 = (uint32_t *) outptr; + + for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4) + *outptr32++ = bswap_32 (*(const uint32_t *) inptr); + + *inptrp = inptr; + *outptrp = (unsigned char *) outptr32; +#elif __BYTE_ORDER == __BIG_ENDIAN + /* Simply copy the data. */ + *inptrp = inptr + n_convert * 4; + *outptrp = __mempcpy (outptr, inptr, n_convert * 4); +#else +# error "This endianess is not supported." +#endif + + /* Determine the status. */ + if (*inptrp == inend) + result = __GCONV_EMPTY_INPUT; + else if (*outptrp + 4 > outend) + result = __GCONV_FULL_OUTPUT; + else + result = __GCONV_INCOMPLETE_INPUT; + + return result; +} + +#if !_STRING_ARCH_unaligned +static inline int +__attribute ((always_inline)) +internal_ucs4_loop_unaligned (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char **inptrp, + const unsigned char *inend, + unsigned char **outptrp, unsigned char *outend, + size_t *irreversible) +{ + const unsigned char *inptr = *inptrp; + unsigned char *outptr = *outptrp; + size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; + int result; + +# if __BYTE_ORDER == __LITTLE_ENDIAN + /* Sigh, we have to do some real work. */ + size_t cnt; + + for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4) + { + outptr[0] = inptr[3]; + outptr[1] = inptr[2]; + outptr[2] = inptr[1]; + outptr[3] = inptr[0]; + } + + *inptrp = inptr; + *outptrp = outptr; +# elif __BYTE_ORDER == __BIG_ENDIAN + /* Simply copy the data. */ + *inptrp = inptr + n_convert * 4; + *outptrp = __mempcpy (outptr, inptr, n_convert * 4); +# else +# error "This endianess is not supported." +# endif + + /* Determine the status. */ + if (*inptrp == inend) + result = __GCONV_EMPTY_INPUT; + else if (*outptrp + 4 > outend) + result = __GCONV_FULL_OUTPUT; + else + result = __GCONV_INCOMPLETE_INPUT; + + return result; +} +#endif + + +static inline int +__attribute ((always_inline)) +internal_ucs4_loop_single (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char **inptrp, + const unsigned char *inend, + unsigned char **outptrp, unsigned char *outend, + size_t *irreversible) +{ + mbstate_t *state = step_data->__statep; + size_t cnt = state->__count & 7; + + while (*inptrp < inend && cnt < 4) + state->__value.__wchb[cnt++] = *(*inptrp)++; + + if (__glibc_unlikely (cnt < 4)) + { + /* Still not enough bytes. Store the ones in the input buffer. */ + state->__count &= ~7; + state->__count |= cnt; + + return __GCONV_INCOMPLETE_INPUT; + } + +#if __BYTE_ORDER == __LITTLE_ENDIAN + (*outptrp)[0] = state->__value.__wchb[3]; + (*outptrp)[1] = state->__value.__wchb[2]; + (*outptrp)[2] = state->__value.__wchb[1]; + (*outptrp)[3] = state->__value.__wchb[0]; + +#elif __BYTE_ORDER == __BIG_ENDIAN + /* XXX unaligned */ + (*outptrp)[0] = state->__value.__wchb[0]; + (*outptrp)[1] = state->__value.__wchb[1]; + (*outptrp)[2] = state->__value.__wchb[2]; + (*outptrp)[3] = state->__value.__wchb[3]; +#else +# error "This endianess is not supported." +#endif + *outptrp += 4; + + /* Clear the state buffer. */ + state->__count &= ~7; + + return __GCONV_OK; +} + +#include <iconv/skeleton.c> + + +/* Transform from UCS4 to the internal, UCS4-like format. Unlike + for the other direction we have to check for correct values here. */ +#define DEFINE_INIT 0 +#define DEFINE_FINI 0 +#define MIN_NEEDED_FROM 4 +#define MIN_NEEDED_TO 4 +#define FROM_DIRECTION 1 +#define FROM_LOOP ucs4_internal_loop +#define TO_LOOP ucs4_internal_loop /* This is not used. */ +#define FUNCTION_NAME __gconv_transform_ucs4_internal +#define ONE_DIRECTION 0 + + +static inline int +__attribute ((always_inline)) +ucs4_internal_loop (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char **inptrp, const unsigned char *inend, + unsigned char **outptrp, unsigned char *outend, + size_t *irreversible) +{ + int flags = step_data->__flags; + const unsigned char *inptr = *inptrp; + unsigned char *outptr = *outptrp; + size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; + int result; + size_t cnt; + + for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4) + { + uint32_t inval; + +#if __BYTE_ORDER == __LITTLE_ENDIAN + inval = bswap_32 (*(const uint32_t *) inptr); +#else + inval = *(const uint32_t *) inptr; +#endif + + if (__glibc_unlikely (inval > 0x7fffffff)) + { + /* The value is too large. We don't try transliteration here since + this is not an error because of the lack of possibilities to + represent the result. This is a genuine bug in the input since + UCS4 does not allow such values. */ + if (irreversible == NULL) + /* We are transliterating, don't try to correct anything. */ + return __GCONV_ILLEGAL_INPUT; + + if (flags & __GCONV_IGNORE_ERRORS) + { + /* Just ignore this character. */ + ++*irreversible; + continue; + } + + *inptrp = inptr; + *outptrp = outptr; + return __GCONV_ILLEGAL_INPUT; + } + + *((uint32_t *) outptr) = inval; + outptr += sizeof (uint32_t); + } + + *inptrp = inptr; + *outptrp = outptr; + + /* Determine the status. */ + if (*inptrp == inend) + result = __GCONV_EMPTY_INPUT; + else if (*outptrp + 4 > outend) + result = __GCONV_FULL_OUTPUT; + else + result = __GCONV_INCOMPLETE_INPUT; + + return result; +} + +#if !_STRING_ARCH_unaligned +static inline int +__attribute ((always_inline)) +ucs4_internal_loop_unaligned (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char **inptrp, + const unsigned char *inend, + unsigned char **outptrp, unsigned char *outend, + size_t *irreversible) +{ + int flags = step_data->__flags; + const unsigned char *inptr = *inptrp; + unsigned char *outptr = *outptrp; + size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; + int result; + size_t cnt; + + for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4) + { + if (__glibc_unlikely (inptr[0] > 0x80)) + { + /* The value is too large. We don't try transliteration here since + this is not an error because of the lack of possibilities to + represent the result. This is a genuine bug in the input since + UCS4 does not allow such values. */ + if (irreversible == NULL) + /* We are transliterating, don't try to correct anything. */ + return __GCONV_ILLEGAL_INPUT; + + if (flags & __GCONV_IGNORE_ERRORS) + { + /* Just ignore this character. */ + ++*irreversible; + continue; + } + + *inptrp = inptr; + *outptrp = outptr; + return __GCONV_ILLEGAL_INPUT; + } + +# if __BYTE_ORDER == __LITTLE_ENDIAN + outptr[3] = inptr[0]; + outptr[2] = inptr[1]; + outptr[1] = inptr[2]; + outptr[0] = inptr[3]; +# else + outptr[0] = inptr[0]; + outptr[1] = inptr[1]; + outptr[2] = inptr[2]; + outptr[3] = inptr[3]; +# endif + outptr += 4; + } + + *inptrp = inptr; + *outptrp = outptr; + + /* Determine the status. */ + if (*inptrp == inend) + result = __GCONV_EMPTY_INPUT; + else if (*outptrp + 4 > outend) + result = __GCONV_FULL_OUTPUT; + else + result = __GCONV_INCOMPLETE_INPUT; + + return result; +} +#endif + + +static inline int +__attribute ((always_inline)) +ucs4_internal_loop_single (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char **inptrp, + const unsigned char *inend, + unsigned char **outptrp, unsigned char *outend, + size_t *irreversible) +{ + mbstate_t *state = step_data->__statep; + int flags = step_data->__flags; + size_t cnt = state->__count & 7; + + while (*inptrp < inend && cnt < 4) + state->__value.__wchb[cnt++] = *(*inptrp)++; + + if (__glibc_unlikely (cnt < 4)) + { + /* Still not enough bytes. Store the ones in the input buffer. */ + state->__count &= ~7; + state->__count |= cnt; + + return __GCONV_INCOMPLETE_INPUT; + } + + if (__builtin_expect (((unsigned char *) state->__value.__wchb)[0] > 0x80, + 0)) + { + /* The value is too large. We don't try transliteration here since + this is not an error because of the lack of possibilities to + represent the result. This is a genuine bug in the input since + UCS4 does not allow such values. */ + if (!(flags & __GCONV_IGNORE_ERRORS)) + { + *inptrp -= cnt - (state->__count & 7); + return __GCONV_ILLEGAL_INPUT; + } + } + else + { +#if __BYTE_ORDER == __LITTLE_ENDIAN + (*outptrp)[0] = state->__value.__wchb[3]; + (*outptrp)[1] = state->__value.__wchb[2]; + (*outptrp)[2] = state->__value.__wchb[1]; + (*outptrp)[3] = state->__value.__wchb[0]; +#elif __BYTE_ORDER == __BIG_ENDIAN + (*outptrp)[0] = state->__value.__wchb[0]; + (*outptrp)[1] = state->__value.__wchb[1]; + (*outptrp)[2] = state->__value.__wchb[2]; + (*outptrp)[3] = state->__value.__wchb[3]; +#endif + + *outptrp += 4; + } + + /* Clear the state buffer. */ + state->__count &= ~7; + + return __GCONV_OK; +} + +#include <iconv/skeleton.c> + + +/* Similarly for the little endian form. */ +#define DEFINE_INIT 0 +#define DEFINE_FINI 0 +#define MIN_NEEDED_FROM 4 +#define MIN_NEEDED_TO 4 +#define FROM_DIRECTION 1 +#define FROM_LOOP internal_ucs4le_loop +#define TO_LOOP internal_ucs4le_loop /* This is not used. */ +#define FUNCTION_NAME __gconv_transform_internal_ucs4le +#define ONE_DIRECTION 0 + + +static inline int +__attribute ((always_inline)) +internal_ucs4le_loop (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char **inptrp, const unsigned char *inend, + unsigned char **outptrp, unsigned char *outend, + size_t *irreversible) +{ + const unsigned char *inptr = *inptrp; + unsigned char *outptr = *outptrp; + size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; + int result; + +#if __BYTE_ORDER == __BIG_ENDIAN + /* Sigh, we have to do some real work. */ + size_t cnt; + uint32_t *outptr32 = (uint32_t *) outptr; + + for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4) + *outptr32++ = bswap_32 (*(const uint32_t *) inptr); + outptr = (unsigned char *) outptr32; + + *inptrp = inptr; + *outptrp = outptr; +#elif __BYTE_ORDER == __LITTLE_ENDIAN + /* Simply copy the data. */ + *inptrp = inptr + n_convert * 4; + *outptrp = __mempcpy (outptr, inptr, n_convert * 4); +#else +# error "This endianess is not supported." +#endif + + /* Determine the status. */ + if (*inptrp == inend) + result = __GCONV_EMPTY_INPUT; + else if (*outptrp + 4 > outend) + result = __GCONV_FULL_OUTPUT; + else + result = __GCONV_INCOMPLETE_INPUT; + + return result; +} + +#if !_STRING_ARCH_unaligned +static inline int +__attribute ((always_inline)) +internal_ucs4le_loop_unaligned (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char **inptrp, + const unsigned char *inend, + unsigned char **outptrp, unsigned char *outend, + size_t *irreversible) +{ + const unsigned char *inptr = *inptrp; + unsigned char *outptr = *outptrp; + size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; + int result; + +# if __BYTE_ORDER == __BIG_ENDIAN + /* Sigh, we have to do some real work. */ + size_t cnt; + + for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4) + { + outptr[0] = inptr[3]; + outptr[1] = inptr[2]; + outptr[2] = inptr[1]; + outptr[3] = inptr[0]; + } + + *inptrp = inptr; + *outptrp = outptr; +# elif __BYTE_ORDER == __LITTLE_ENDIAN + /* Simply copy the data. */ + *inptrp = inptr + n_convert * 4; + *outptrp = __mempcpy (outptr, inptr, n_convert * 4); +# else +# error "This endianess is not supported." +# endif + + /* Determine the status. */ + if (*inptrp == inend) + result = __GCONV_EMPTY_INPUT; + else if (*inptrp + 4 > inend) + result = __GCONV_INCOMPLETE_INPUT; + else + { + assert (*outptrp + 4 > outend); + result = __GCONV_FULL_OUTPUT; + } + + return result; +} +#endif + + +static inline int +__attribute ((always_inline)) +internal_ucs4le_loop_single (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char **inptrp, + const unsigned char *inend, + unsigned char **outptrp, unsigned char *outend, + size_t *irreversible) +{ + mbstate_t *state = step_data->__statep; + size_t cnt = state->__count & 7; + + while (*inptrp < inend && cnt < 4) + state->__value.__wchb[cnt++] = *(*inptrp)++; + + if (__glibc_unlikely (cnt < 4)) + { + /* Still not enough bytes. Store the ones in the input buffer. */ + state->__count &= ~7; + state->__count |= cnt; + + return __GCONV_INCOMPLETE_INPUT; + } + +#if __BYTE_ORDER == __BIG_ENDIAN + (*outptrp)[0] = state->__value.__wchb[3]; + (*outptrp)[1] = state->__value.__wchb[2]; + (*outptrp)[2] = state->__value.__wchb[1]; + (*outptrp)[3] = state->__value.__wchb[0]; + +#else + /* XXX unaligned */ + (*outptrp)[0] = state->__value.__wchb[0]; + (*outptrp)[1] = state->__value.__wchb[1]; + (*outptrp)[2] = state->__value.__wchb[2]; + (*outptrp)[3] = state->__value.__wchb[3]; + +#endif + + *outptrp += 4; + + /* Clear the state buffer. */ + state->__count &= ~7; + + return __GCONV_OK; +} + +#include <iconv/skeleton.c> + + +/* And finally from UCS4-LE to the internal encoding. */ +#define DEFINE_INIT 0 +#define DEFINE_FINI 0 +#define MIN_NEEDED_FROM 4 +#define MIN_NEEDED_TO 4 +#define FROM_DIRECTION 1 +#define FROM_LOOP ucs4le_internal_loop +#define TO_LOOP ucs4le_internal_loop /* This is not used. */ +#define FUNCTION_NAME __gconv_transform_ucs4le_internal +#define ONE_DIRECTION 0 + + +static inline int +__attribute ((always_inline)) +ucs4le_internal_loop (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char **inptrp, const unsigned char *inend, + unsigned char **outptrp, unsigned char *outend, + size_t *irreversible) +{ + int flags = step_data->__flags; + const unsigned char *inptr = *inptrp; + unsigned char *outptr = *outptrp; + size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; + int result; + size_t cnt; + + for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4) + { + uint32_t inval; + +#if __BYTE_ORDER == __BIG_ENDIAN + inval = bswap_32 (*(const uint32_t *) inptr); +#else + inval = *(const uint32_t *) inptr; +#endif + + if (__glibc_unlikely (inval > 0x7fffffff)) + { + /* The value is too large. We don't try transliteration here since + this is not an error because of the lack of possibilities to + represent the result. This is a genuine bug in the input since + UCS4 does not allow such values. */ + if (irreversible == NULL) + /* We are transliterating, don't try to correct anything. */ + return __GCONV_ILLEGAL_INPUT; + + if (flags & __GCONV_IGNORE_ERRORS) + { + /* Just ignore this character. */ + ++*irreversible; + continue; + } + + *inptrp = inptr; + *outptrp = outptr; + return __GCONV_ILLEGAL_INPUT; + } + + *((uint32_t *) outptr) = inval; + outptr += sizeof (uint32_t); + } + + *inptrp = inptr; + *outptrp = outptr; + + /* Determine the status. */ + if (*inptrp == inend) + result = __GCONV_EMPTY_INPUT; + else if (*inptrp + 4 > inend) + result = __GCONV_INCOMPLETE_INPUT; + else + { + assert (*outptrp + 4 > outend); + result = __GCONV_FULL_OUTPUT; + } + + return result; +} + +#if !_STRING_ARCH_unaligned +static inline int +__attribute ((always_inline)) +ucs4le_internal_loop_unaligned (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char **inptrp, + const unsigned char *inend, + unsigned char **outptrp, unsigned char *outend, + size_t *irreversible) +{ + int flags = step_data->__flags; + const unsigned char *inptr = *inptrp; + unsigned char *outptr = *outptrp; + size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; + int result; + size_t cnt; + + for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4) + { + if (__glibc_unlikely (inptr[3] > 0x80)) + { + /* The value is too large. We don't try transliteration here since + this is not an error because of the lack of possibilities to + represent the result. This is a genuine bug in the input since + UCS4 does not allow such values. */ + if (irreversible == NULL) + /* We are transliterating, don't try to correct anything. */ + return __GCONV_ILLEGAL_INPUT; + + if (flags & __GCONV_IGNORE_ERRORS) + { + /* Just ignore this character. */ + ++*irreversible; + continue; + } + + *inptrp = inptr; + *outptrp = outptr; + return __GCONV_ILLEGAL_INPUT; + } + +# if __BYTE_ORDER == __BIG_ENDIAN + outptr[3] = inptr[0]; + outptr[2] = inptr[1]; + outptr[1] = inptr[2]; + outptr[0] = inptr[3]; +# else + outptr[0] = inptr[0]; + outptr[1] = inptr[1]; + outptr[2] = inptr[2]; + outptr[3] = inptr[3]; +# endif + + outptr += 4; + } + + *inptrp = inptr; + *outptrp = outptr; + + /* Determine the status. */ + if (*inptrp == inend) + result = __GCONV_EMPTY_INPUT; + else if (*inptrp + 4 > inend) + result = __GCONV_INCOMPLETE_INPUT; + else + { + assert (*outptrp + 4 > outend); + result = __GCONV_FULL_OUTPUT; + } + + return result; +} +#endif + + +static inline int +__attribute ((always_inline)) +ucs4le_internal_loop_single (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char **inptrp, + const unsigned char *inend, + unsigned char **outptrp, unsigned char *outend, + size_t *irreversible) +{ + mbstate_t *state = step_data->__statep; + int flags = step_data->__flags; + size_t cnt = state->__count & 7; + + while (*inptrp < inend && cnt < 4) + state->__value.__wchb[cnt++] = *(*inptrp)++; + + if (__glibc_unlikely (cnt < 4)) + { + /* Still not enough bytes. Store the ones in the input buffer. */ + state->__count &= ~7; + state->__count |= cnt; + + return __GCONV_INCOMPLETE_INPUT; + } + + if (__builtin_expect (((unsigned char *) state->__value.__wchb)[3] > 0x80, + 0)) + { + /* The value is too large. We don't try transliteration here since + this is not an error because of the lack of possibilities to + represent the result. This is a genuine bug in the input since + UCS4 does not allow such values. */ + if (!(flags & __GCONV_IGNORE_ERRORS)) + return __GCONV_ILLEGAL_INPUT; + } + else + { +#if __BYTE_ORDER == __BIG_ENDIAN + (*outptrp)[0] = state->__value.__wchb[3]; + (*outptrp)[1] = state->__value.__wchb[2]; + (*outptrp)[2] = state->__value.__wchb[1]; + (*outptrp)[3] = state->__value.__wchb[0]; +#else + (*outptrp)[0] = state->__value.__wchb[0]; + (*outptrp)[1] = state->__value.__wchb[1]; + (*outptrp)[2] = state->__value.__wchb[2]; + (*outptrp)[3] = state->__value.__wchb[3]; +#endif + + *outptrp += 4; + } + + /* Clear the state buffer. */ + state->__count &= ~7; + + return __GCONV_OK; +} + +#include <iconv/skeleton.c> + + +/* Convert from ISO 646-IRV to the internal (UCS4-like) format. */ +#define DEFINE_INIT 0 +#define DEFINE_FINI 0 +#define MIN_NEEDED_FROM 1 +#define MIN_NEEDED_TO 4 +#define FROM_DIRECTION 1 +#define FROM_LOOP ascii_internal_loop +#define TO_LOOP ascii_internal_loop /* This is not used. */ +#define FUNCTION_NAME __gconv_transform_ascii_internal +#define ONE_DIRECTION 1 + +#define MIN_NEEDED_INPUT MIN_NEEDED_FROM +#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +#define LOOPFCT FROM_LOOP +#define BODY \ + { \ + if (__glibc_unlikely (*inptr > '\x7f')) \ + { \ + /* The value is too large. We don't try transliteration here since \ + this is not an error because of the lack of possibilities to \ + represent the result. This is a genuine bug in the input since \ + ASCII does not allow such values. */ \ + STANDARD_FROM_LOOP_ERR_HANDLER (1); \ + } \ + else \ + { \ + /* It's an one byte sequence. */ \ + *((uint32_t *) outptr) = *inptr++; \ + outptr += sizeof (uint32_t); \ + } \ + } +#define LOOP_NEED_FLAGS +#include <iconv/loop.c> +#include <iconv/skeleton.c> + + +/* Convert from the internal (UCS4-like) format to ISO 646-IRV. */ +#define DEFINE_INIT 0 +#define DEFINE_FINI 0 +#define MIN_NEEDED_FROM 4 +#define MIN_NEEDED_TO 1 +#define FROM_DIRECTION 1 +#define FROM_LOOP internal_ascii_loop +#define TO_LOOP internal_ascii_loop /* This is not used. */ +#define FUNCTION_NAME __gconv_transform_internal_ascii +#define ONE_DIRECTION 1 + +#define MIN_NEEDED_INPUT MIN_NEEDED_FROM +#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +#define LOOPFCT FROM_LOOP +#define BODY \ + { \ + if (__glibc_unlikely (*((const uint32_t *) inptr) > 0x7f)) \ + { \ + UNICODE_TAG_HANDLER (*((const uint32_t *) inptr), 4); \ + STANDARD_TO_LOOP_ERR_HANDLER (4); \ + } \ + else \ + { \ + /* It's an one byte sequence. */ \ + *outptr++ = *((const uint32_t *) inptr); \ + inptr += sizeof (uint32_t); \ + } \ + } +#define LOOP_NEED_FLAGS +#include <iconv/loop.c> +#include <iconv/skeleton.c> + + +/* Convert from the internal (UCS4-like) format to UTF-8. */ +#define DEFINE_INIT 0 +#define DEFINE_FINI 0 +#define MIN_NEEDED_FROM 4 +#define MIN_NEEDED_TO 1 +#define MAX_NEEDED_TO 6 +#define FROM_DIRECTION 1 +#define FROM_LOOP internal_utf8_loop +#define TO_LOOP internal_utf8_loop /* This is not used. */ +#define FUNCTION_NAME __gconv_transform_internal_utf8 +#define ONE_DIRECTION 1 + +#define MIN_NEEDED_INPUT MIN_NEEDED_FROM +#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +#define MAX_NEEDED_OUTPUT MAX_NEEDED_TO +#define LOOPFCT FROM_LOOP +#define BODY \ + { \ + uint32_t wc = *((const uint32_t *) inptr); \ + \ + if (__glibc_likely (wc < 0x80)) \ + /* It's an one byte sequence. */ \ + *outptr++ = (unsigned char) wc; \ + else if (__glibc_likely (wc <= 0x7fffffff \ + && (wc < 0xd800 || wc > 0xdfff))) \ + { \ + size_t step; \ + unsigned char *start; \ + \ + for (step = 2; step < 6; ++step) \ + if ((wc & (~(uint32_t)0 << (5 * step + 1))) == 0) \ + break; \ + \ + if (__glibc_unlikely (outptr + step > outend)) \ + { \ + /* Too long. */ \ + result = __GCONV_FULL_OUTPUT; \ + break; \ + } \ + \ + start = outptr; \ + *outptr = (unsigned char) (~0xff >> step); \ + outptr += step; \ + do \ + { \ + start[--step] = 0x80 | (wc & 0x3f); \ + wc >>= 6; \ + } \ + while (step > 1); \ + start[0] |= wc; \ + } \ + else \ + { \ + STANDARD_TO_LOOP_ERR_HANDLER (4); \ + } \ + \ + inptr += 4; \ + } +#define LOOP_NEED_FLAGS +#include <iconv/loop.c> +#include <iconv/skeleton.c> + + +/* Convert from UTF-8 to the internal (UCS4-like) format. */ +#define DEFINE_INIT 0 +#define DEFINE_FINI 0 +#define MIN_NEEDED_FROM 1 +#define MAX_NEEDED_FROM 6 +#define MIN_NEEDED_TO 4 +#define FROM_DIRECTION 1 +#define FROM_LOOP utf8_internal_loop +#define TO_LOOP utf8_internal_loop /* This is not used. */ +#define FUNCTION_NAME __gconv_transform_utf8_internal +#define ONE_DIRECTION 1 + +#define MIN_NEEDED_INPUT MIN_NEEDED_FROM +#define MAX_NEEDED_INPUT MAX_NEEDED_FROM +#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +#define LOOPFCT FROM_LOOP +#define BODY \ + { \ + /* Next input byte. */ \ + uint32_t ch = *inptr; \ + \ + if (__glibc_likely (ch < 0x80)) \ + { \ + /* One byte sequence. */ \ + ++inptr; \ + } \ + else \ + { \ + uint_fast32_t cnt; \ + uint_fast32_t i; \ + \ + if (ch >= 0xc2 && ch < 0xe0) \ + { \ + /* We expect two bytes. The first byte cannot be 0xc0 or 0xc1, \ + otherwise the wide character could have been represented \ + using a single byte. */ \ + cnt = 2; \ + ch &= 0x1f; \ + } \ + else if (__glibc_likely ((ch & 0xf0) == 0xe0)) \ + { \ + /* We expect three bytes. */ \ + cnt = 3; \ + ch &= 0x0f; \ + } \ + else if (__glibc_likely ((ch & 0xf8) == 0xf0)) \ + { \ + /* We expect four bytes. */ \ + cnt = 4; \ + ch &= 0x07; \ + } \ + else if (__glibc_likely ((ch & 0xfc) == 0xf8)) \ + { \ + /* We expect five bytes. */ \ + cnt = 5; \ + ch &= 0x03; \ + } \ + else if (__glibc_likely ((ch & 0xfe) == 0xfc)) \ + { \ + /* We expect six bytes. */ \ + cnt = 6; \ + ch &= 0x01; \ + } \ + else \ + { \ + /* Search the end of this ill-formed UTF-8 character. This \ + is the next byte with (x & 0xc0) != 0x80. */ \ + i = 0; \ + do \ + ++i; \ + while (inptr + i < inend \ + && (*(inptr + i) & 0xc0) == 0x80 \ + && i < 5); \ + \ + errout: \ + STANDARD_FROM_LOOP_ERR_HANDLER (i); \ + } \ + \ + if (__glibc_unlikely (inptr + cnt > inend)) \ + { \ + /* We don't have enough input. But before we report that check \ + that all the bytes are correct. */ \ + for (i = 1; inptr + i < inend; ++i) \ + if ((inptr[i] & 0xc0) != 0x80) \ + break; \ + \ + if (__glibc_likely (inptr + i == inend)) \ + { \ + result = __GCONV_INCOMPLETE_INPUT; \ + break; \ + } \ + \ + goto errout; \ + } \ + \ + /* Read the possible remaining bytes. */ \ + for (i = 1; i < cnt; ++i) \ + { \ + uint32_t byte = inptr[i]; \ + \ + if ((byte & 0xc0) != 0x80) \ + /* This is an illegal encoding. */ \ + break; \ + \ + ch <<= 6; \ + ch |= byte & 0x3f; \ + } \ + \ + /* If i < cnt, some trail byte was not >= 0x80, < 0xc0. \ + If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could \ + have been represented with fewer than cnt bytes. */ \ + if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0) \ + /* Do not accept UTF-16 surrogates. */ \ + || (ch >= 0xd800 && ch <= 0xdfff)) \ + { \ + /* This is an illegal encoding. */ \ + goto errout; \ + } \ + \ + inptr += cnt; \ + } \ + \ + /* Now adjust the pointers and store the result. */ \ + *((uint32_t *) outptr) = ch; \ + outptr += sizeof (uint32_t); \ + } +#define LOOP_NEED_FLAGS + +#define STORE_REST \ + { \ + /* We store the remaining bytes while converting them into the UCS4 \ + format. We can assume that the first byte in the buffer is \ + correct and that it requires a larger number of bytes than there \ + are in the input buffer. */ \ + wint_t ch = **inptrp; \ + size_t cnt, r; \ + \ + state->__count = inend - *inptrp; \ + \ + assert (ch != 0xc0 && ch != 0xc1); \ + if (ch >= 0xc2 && ch < 0xe0) \ + { \ + /* We expect two bytes. The first byte cannot be 0xc0 or \ + 0xc1, otherwise the wide character could have been \ + represented using a single byte. */ \ + cnt = 2; \ + ch &= 0x1f; \ + } \ + else if (__glibc_likely ((ch & 0xf0) == 0xe0)) \ + { \ + /* We expect three bytes. */ \ + cnt = 3; \ + ch &= 0x0f; \ + } \ + else if (__glibc_likely ((ch & 0xf8) == 0xf0)) \ + { \ + /* We expect four bytes. */ \ + cnt = 4; \ + ch &= 0x07; \ + } \ + else if (__glibc_likely ((ch & 0xfc) == 0xf8)) \ + { \ + /* We expect five bytes. */ \ + cnt = 5; \ + ch &= 0x03; \ + } \ + else \ + { \ + /* We expect six bytes. */ \ + cnt = 6; \ + ch &= 0x01; \ + } \ + \ + /* The first byte is already consumed. */ \ + r = cnt - 1; \ + while (++(*inptrp) < inend) \ + { \ + ch <<= 6; \ + ch |= **inptrp & 0x3f; \ + --r; \ + } \ + \ + /* Shift for the so far missing bytes. */ \ + ch <<= r * 6; \ + \ + /* Store the number of bytes expected for the entire sequence. */ \ + state->__count |= cnt << 8; \ + \ + /* Store the value. */ \ + state->__value.__wch = ch; \ + } + +#define UNPACK_BYTES \ + { \ + static const unsigned char inmask[5] = { 0xc0, 0xe0, 0xf0, 0xf8, 0xfc }; \ + wint_t wch = state->__value.__wch; \ + size_t ntotal = state->__count >> 8; \ + \ + inlen = state->__count & 255; \ + \ + bytebuf[0] = inmask[ntotal - 2]; \ + \ + do \ + { \ + if (--ntotal < inlen) \ + bytebuf[ntotal] = 0x80 | (wch & 0x3f); \ + wch >>= 6; \ + } \ + while (ntotal > 1); \ + \ + bytebuf[0] |= wch; \ + } + +#define CLEAR_STATE \ + state->__count = 0 + + +#include <iconv/loop.c> +#include <iconv/skeleton.c> + + +/* Convert from UCS2 to the internal (UCS4-like) format. */ +#define DEFINE_INIT 0 +#define DEFINE_FINI 0 +#define MIN_NEEDED_FROM 2 +#define MIN_NEEDED_TO 4 +#define FROM_DIRECTION 1 +#define FROM_LOOP ucs2_internal_loop +#define TO_LOOP ucs2_internal_loop /* This is not used. */ +#define FUNCTION_NAME __gconv_transform_ucs2_internal +#define ONE_DIRECTION 1 + +#define MIN_NEEDED_INPUT MIN_NEEDED_FROM +#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +#define LOOPFCT FROM_LOOP +#define BODY \ + { \ + uint16_t u1 = get16 (inptr); \ + \ + if (__glibc_unlikely (u1 >= 0xd800 && u1 < 0xe000)) \ + { \ + /* Surrogate characters in UCS-2 input are not valid. Reject \ + them. (Catching this here is not security relevant.) */ \ + STANDARD_FROM_LOOP_ERR_HANDLER (2); \ + } \ + \ + *((uint32_t *) outptr) = u1; \ + outptr += sizeof (uint32_t); \ + inptr += 2; \ + } +#define LOOP_NEED_FLAGS +#include <iconv/loop.c> +#include <iconv/skeleton.c> + + +/* Convert from the internal (UCS4-like) format to UCS2. */ +#define DEFINE_INIT 0 +#define DEFINE_FINI 0 +#define MIN_NEEDED_FROM 4 +#define MIN_NEEDED_TO 2 +#define FROM_DIRECTION 1 +#define FROM_LOOP internal_ucs2_loop +#define TO_LOOP internal_ucs2_loop /* This is not used. */ +#define FUNCTION_NAME __gconv_transform_internal_ucs2 +#define ONE_DIRECTION 1 + +#define MIN_NEEDED_INPUT MIN_NEEDED_FROM +#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +#define LOOPFCT FROM_LOOP +#define BODY \ + { \ + uint32_t val = *((const uint32_t *) inptr); \ + \ + if (__glibc_unlikely (val >= 0x10000)) \ + { \ + UNICODE_TAG_HANDLER (val, 4); \ + STANDARD_TO_LOOP_ERR_HANDLER (4); \ + } \ + else if (__glibc_unlikely (val >= 0xd800 && val < 0xe000)) \ + { \ + /* Surrogate characters in UCS-4 input are not valid. \ + We must catch this, because the UCS-2 output might be \ + interpreted as UTF-16 by other programs. If we let \ + surrogates pass through, attackers could make a security \ + hole exploit by synthesizing any desired plane 1-16 \ + character. */ \ + result = __GCONV_ILLEGAL_INPUT; \ + if (! ignore_errors_p ()) \ + break; \ + inptr += 4; \ + ++*irreversible; \ + continue; \ + } \ + else \ + { \ + put16 (outptr, val); \ + outptr += sizeof (uint16_t); \ + inptr += 4; \ + } \ + } +#define LOOP_NEED_FLAGS +#include <iconv/loop.c> +#include <iconv/skeleton.c> + + +/* Convert from UCS2 in other endianness to the internal (UCS4-like) format. */ +#define DEFINE_INIT 0 +#define DEFINE_FINI 0 +#define MIN_NEEDED_FROM 2 +#define MIN_NEEDED_TO 4 +#define FROM_DIRECTION 1 +#define FROM_LOOP ucs2reverse_internal_loop +#define TO_LOOP ucs2reverse_internal_loop/* This is not used.*/ +#define FUNCTION_NAME __gconv_transform_ucs2reverse_internal +#define ONE_DIRECTION 1 + +#define MIN_NEEDED_INPUT MIN_NEEDED_FROM +#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +#define LOOPFCT FROM_LOOP +#define BODY \ + { \ + uint16_t u1 = bswap_16 (get16 (inptr)); \ + \ + if (__glibc_unlikely (u1 >= 0xd800 && u1 < 0xe000)) \ + { \ + /* Surrogate characters in UCS-2 input are not valid. Reject \ + them. (Catching this here is not security relevant.) */ \ + if (! ignore_errors_p ()) \ + { \ + result = __GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + inptr += 2; \ + ++*irreversible; \ + continue; \ + } \ + \ + *((uint32_t *) outptr) = u1; \ + outptr += sizeof (uint32_t); \ + inptr += 2; \ + } +#define LOOP_NEED_FLAGS +#include <iconv/loop.c> +#include <iconv/skeleton.c> + + +/* Convert from the internal (UCS4-like) format to UCS2 in other endianness. */ +#define DEFINE_INIT 0 +#define DEFINE_FINI 0 +#define MIN_NEEDED_FROM 4 +#define MIN_NEEDED_TO 2 +#define FROM_DIRECTION 1 +#define FROM_LOOP internal_ucs2reverse_loop +#define TO_LOOP internal_ucs2reverse_loop/* This is not used.*/ +#define FUNCTION_NAME __gconv_transform_internal_ucs2reverse +#define ONE_DIRECTION 1 + +#define MIN_NEEDED_INPUT MIN_NEEDED_FROM +#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +#define LOOPFCT FROM_LOOP +#define BODY \ + { \ + uint32_t val = *((const uint32_t *) inptr); \ + if (__glibc_unlikely (val >= 0x10000)) \ + { \ + UNICODE_TAG_HANDLER (val, 4); \ + STANDARD_TO_LOOP_ERR_HANDLER (4); \ + } \ + else if (__glibc_unlikely (val >= 0xd800 && val < 0xe000)) \ + { \ + /* Surrogate characters in UCS-4 input are not valid. \ + We must catch this, because the UCS-2 output might be \ + interpreted as UTF-16 by other programs. If we let \ + surrogates pass through, attackers could make a security \ + hole exploit by synthesizing any desired plane 1-16 \ + character. */ \ + if (! ignore_errors_p ()) \ + { \ + result = __GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + inptr += 4; \ + ++*irreversible; \ + continue; \ + } \ + else \ + { \ + put16 (outptr, bswap_16 (val)); \ + outptr += sizeof (uint16_t); \ + inptr += 4; \ + } \ + } +#define LOOP_NEED_FLAGS +#include <iconv/loop.c> +#include <iconv/skeleton.c> diff --git a/REORG.TODO/iconv/gconv_trans.c b/REORG.TODO/iconv/gconv_trans.c new file mode 100644 index 0000000000..53b8822615 --- /dev/null +++ b/REORG.TODO/iconv/gconv_trans.c @@ -0,0 +1,239 @@ +/* Transliteration using the locale's data. + Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 2000. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <assert.h> +#include <dlfcn.h> +#include <search.h> +#include <stdint.h> +#include <string.h> +#include <stdlib.h> + +#include <libc-lock.h> +#include "gconv_int.h" +#include "../locale/localeinfo.h" + + +int +__gconv_transliterate (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char *inbufstart, + const unsigned char **inbufp, + const unsigned char *inbufend, + unsigned char **outbufstart, size_t *irreversible) +{ + /* Find out about the locale's transliteration. */ + uint_fast32_t size; + const uint32_t *from_idx; + const uint32_t *from_tbl; + const uint32_t *to_idx; + const uint32_t *to_tbl; + const uint32_t *winbuf; + const uint32_t *winbufend; + uint_fast32_t low; + uint_fast32_t high; + + /* The input buffer. There are actually 4-byte values. */ + winbuf = (const uint32_t *) *inbufp; + winbufend = (const uint32_t *) inbufend; + + __gconv_fct fct = step->__fct; +#ifdef PTR_DEMANGLE + if (step->__shlib_handle != NULL) + PTR_DEMANGLE (fct); +#endif + + /* If there is no transliteration information in the locale don't do + anything and return the error. */ + size = _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_TAB_SIZE); + if (size == 0) + goto no_rules; + + /* Get the rest of the values. */ + from_idx = + (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_FROM_IDX); + from_tbl = + (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_FROM_TBL); + to_idx = + (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_TO_IDX); + to_tbl = + (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_TO_TBL); + + /* Test whether there is enough input. */ + if (winbuf + 1 > winbufend) + return (winbuf == winbufend + ? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT); + + /* The array starting at FROM_IDX contains indeces to the string table + in FROM_TBL. The indeces are sorted wrt to the strings. I.e., we + are doing binary search. */ + low = 0; + high = size; + while (low < high) + { + uint_fast32_t med = (low + high) / 2; + uint32_t idx; + int cnt; + + /* Compare the string at this index with the string at the current + position in the input buffer. */ + idx = from_idx[med]; + cnt = 0; + do + { + if (from_tbl[idx + cnt] != winbuf[cnt]) + /* Does not match. */ + break; + ++cnt; + } + while (from_tbl[idx + cnt] != L'\0' && winbuf + cnt < winbufend); + + if (cnt > 0 && from_tbl[idx + cnt] == L'\0') + { + /* Found a matching input sequence. Now try to convert the + possible replacements. */ + uint32_t idx2 = to_idx[med]; + + do + { + /* Determine length of replacement. */ + uint_fast32_t len = 0; + int res; + const unsigned char *toinptr; + unsigned char *outptr; + + while (to_tbl[idx2 + len] != L'\0') + ++len; + + /* Try this input text. */ + toinptr = (const unsigned char *) &to_tbl[idx2]; + outptr = *outbufstart; + res = DL_CALL_FCT (fct, + (step, step_data, &toinptr, + (const unsigned char *) &to_tbl[idx2 + len], + &outptr, NULL, 0, 0)); + if (res != __GCONV_ILLEGAL_INPUT) + { + /* If the conversion succeeds we have to increment the + input buffer. */ + if (res == __GCONV_EMPTY_INPUT) + { + *inbufp += cnt * sizeof (uint32_t); + ++*irreversible; + res = __GCONV_OK; + } + /* Do not increment the output pointer if we could not + store the entire output. */ + if (res != __GCONV_FULL_OUTPUT) + *outbufstart = outptr; + + return res; + } + + /* Next replacement. */ + idx2 += len + 1; + } + while (to_tbl[idx2] != L'\0'); + + /* Nothing found, continue searching. */ + } + else if (cnt > 0) + /* This means that the input buffer contents matches a prefix of + an entry. Since we cannot match it unless we get more input, + we will tell the caller about it. */ + return __GCONV_INCOMPLETE_INPUT; + + if (winbuf + cnt >= winbufend || from_tbl[idx + cnt] < winbuf[cnt]) + low = med + 1; + else + high = med; + } + + no_rules: + /* Maybe the character is supposed to be ignored. */ + if (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_IGNORE_LEN) != 0) + { + int n = _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_IGNORE_LEN); + const uint32_t *ranges = + (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_IGNORE); + const uint32_t wc = *(const uint32_t *) (*inbufp); + int i; + + /* Test whether there is enough input. */ + if (winbuf + 1 > winbufend) + return (winbuf == winbufend + ? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT); + + for (i = 0; i < n; ranges += 3, ++i) + if (ranges[0] <= wc && wc <= ranges[1] + && (wc - ranges[0]) % ranges[2] == 0) + { + /* Matches the range. Ignore it. */ + *inbufp += 4; + ++*irreversible; + return __GCONV_OK; + } + else if (wc < ranges[0]) + /* There cannot be any other matching range since they are + sorted. */ + break; + } + + /* One last chance: use the default replacement. */ + if (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN) != 0) + { + const uint32_t *default_missing = (const uint32_t *) + _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_DEFAULT_MISSING); + const unsigned char *toinptr = (const unsigned char *) default_missing; + uint32_t len = _NL_CURRENT_WORD (LC_CTYPE, + _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN); + unsigned char *outptr; + int res; + + /* Test whether there is enough input. */ + if (winbuf + 1 > winbufend) + return (winbuf == winbufend + ? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT); + + outptr = *outbufstart; + res = DL_CALL_FCT (fct, + (step, step_data, &toinptr, + (const unsigned char *) (default_missing + len), + &outptr, NULL, 0, 0)); + + if (res != __GCONV_ILLEGAL_INPUT) + { + /* If the conversion succeeds we have to increment the + input buffer. */ + if (res == __GCONV_EMPTY_INPUT) + { + /* This worked but is not reversible. */ + ++*irreversible; + *inbufp += 4; + res = __GCONV_OK; + } + *outbufstart = outptr; + + return res; + } + } + + /* Haven't found a match. */ + return __GCONV_ILLEGAL_INPUT; +} +libc_hidden_def (__gconv_transliterate) diff --git a/REORG.TODO/iconv/iconv.c b/REORG.TODO/iconv/iconv.c new file mode 100644 index 0000000000..2c6f0f0bd1 --- /dev/null +++ b/REORG.TODO/iconv/iconv.c @@ -0,0 +1,95 @@ +/* Convert characters in input buffer using conversion descriptor to + output buffer. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <stddef.h> /* for NULL */ +#include <errno.h> +#include <iconv.h> + +#include <gconv_int.h> + +#include <assert.h> + + +size_t +iconv (iconv_t cd, char **inbuf, size_t *inbytesleft, char **outbuf, + size_t *outbytesleft) +{ + __gconv_t gcd = (__gconv_t) cd; + char *outstart = outbuf ? *outbuf : NULL; + size_t irreversible; + int result; + + if (__glibc_unlikely (inbuf == NULL || *inbuf == NULL)) + { + if (outbuf == NULL || *outbuf == NULL) + result = __gconv (gcd, NULL, NULL, NULL, NULL, &irreversible); + else + result = __gconv (gcd, NULL, NULL, (unsigned char **) outbuf, + (unsigned char *) (outstart + *outbytesleft), + &irreversible); + } + else + { + const char *instart = *inbuf; + + result = __gconv (gcd, (const unsigned char **) inbuf, + (const unsigned char *) (*inbuf + *inbytesleft), + (unsigned char **) outbuf, + (unsigned char *) (*outbuf + *outbytesleft), + &irreversible); + + *inbytesleft -= *inbuf - instart; + } + if (outstart != NULL) + *outbytesleft -= *outbuf - outstart; + + switch (__builtin_expect (result, __GCONV_OK)) + { + case __GCONV_ILLEGAL_DESCRIPTOR: + __set_errno (EBADF); + irreversible = (size_t) -1L; + break; + + case __GCONV_ILLEGAL_INPUT: + __set_errno (EILSEQ); + irreversible = (size_t) -1L; + break; + + case __GCONV_FULL_OUTPUT: + __set_errno (E2BIG); + irreversible = (size_t) -1L; + break; + + case __GCONV_INCOMPLETE_INPUT: + __set_errno (EINVAL); + irreversible = (size_t) -1L; + break; + + case __GCONV_EMPTY_INPUT: + case __GCONV_OK: + /* Nothing. */ + break; + + default: + assert (!"Nothing like this should happen"); + } + + return irreversible; +} diff --git a/REORG.TODO/iconv/iconv.h b/REORG.TODO/iconv/iconv.h new file mode 100644 index 0000000000..d5d9d00f6b --- /dev/null +++ b/REORG.TODO/iconv/iconv.h @@ -0,0 +1,55 @@ +/* Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _ICONV_H +#define _ICONV_H 1 + +#include <features.h> +#define __need_size_t +#include <stddef.h> + + +__BEGIN_DECLS + +/* Identifier for conversion method from one codeset to another. */ +typedef void *iconv_t; + + +/* Allocate descriptor for code conversion from codeset FROMCODE to + codeset TOCODE. + + This function is a possible cancellation point and therefore not + marked with __THROW. */ +extern iconv_t iconv_open (const char *__tocode, const char *__fromcode); + +/* Convert at most *INBYTESLEFT bytes from *INBUF according to the + code conversion algorithm specified by CD and place up to + *OUTBYTESLEFT bytes in buffer at *OUTBUF. */ +extern size_t iconv (iconv_t __cd, char **__restrict __inbuf, + size_t *__restrict __inbytesleft, + char **__restrict __outbuf, + size_t *__restrict __outbytesleft); + +/* Free resources allocated for descriptor CD for code conversion. + + This function is a possible cancellation point and therefore not + marked with __THROW. */ +extern int iconv_close (iconv_t __cd); + +__END_DECLS + +#endif /* iconv.h */ diff --git a/REORG.TODO/iconv/iconv_charmap.c b/REORG.TODO/iconv/iconv_charmap.c new file mode 100644 index 0000000000..b8ece3bda2 --- /dev/null +++ b/REORG.TODO/iconv/iconv_charmap.c @@ -0,0 +1,560 @@ +/* Convert using charmaps and possibly iconv(). + Copyright (C) 2001-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@redhat.com>, 2001. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#include <assert.h> +#include <errno.h> +#include <error.h> +#include <fcntl.h> +#include <iconv.h> +#include <libintl.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <stdint.h> +#include <sys/mman.h> +#include <sys/stat.h> + +#include "iconv_prog.h" + + +/* Prototypes for a few program-wide used functions. */ +#include <programs/xmalloc.h> + + +struct convtable +{ + int term[256 / 8]; + union + { + struct convtable *sub; + struct charseq *out; + } val[256]; +}; + + +static inline struct convtable * +allocate_table (void) +{ + return (struct convtable *) xcalloc (1, sizeof (struct convtable)); +} + + +static inline int +is_term (struct convtable *tbl, unsigned int idx) +{ + return tbl->term[idx / 8] & (1 << (idx % 8)); +} + + +static inline void +clear_term (struct convtable *tbl, unsigned int idx) +{ + tbl->term[idx / 8] &= ~(1 << (idx % 8)); +} + + +static inline void +set_term (struct convtable *tbl, unsigned int idx) +{ + tbl->term[idx / 8] |= 1 << (idx % 8); +} + + +/* Generate the conversion table. */ +static struct convtable *use_from_charmap (struct charmap_t *from_charmap, + const char *to_code); +static struct convtable *use_to_charmap (const char *from_code, + struct charmap_t *to_charmap); +static struct convtable *use_both_charmaps (struct charmap_t *from_charmap, + struct charmap_t *to_charmap); + +/* Prototypes for the functions doing the actual work. */ +static int process_block (struct convtable *tbl, char *addr, size_t len, + FILE *output); +static int process_fd (struct convtable *tbl, int fd, FILE *output); +static int process_file (struct convtable *tbl, FILE *input, FILE *output); + + +int +charmap_conversion (const char *from_code, struct charmap_t *from_charmap, + const char *to_code, struct charmap_t *to_charmap, + int argc, int remaining, char *argv[], + const char *output_file) +{ + struct convtable *cvtbl; + int status = EXIT_SUCCESS; + + /* We have three different cases to handle: + + - both, from_charmap and to_charmap, are available. This means we + can assume that the symbolic names match and use them to create + the mapping. + + - only from_charmap is available. In this case we can only hope that + the symbolic names used are of the <Uxxxx> form in which case we + can use a UCS4->"to_code" iconv() conversion for the second step. + + - only to_charmap is available. This is similar, only that we would + use iconv() for the "to_code"->UCS4 conversion. + + We first create a table which maps input bytes into output bytes. + Once this is done we can handle all three of the cases above + equally. */ + if (from_charmap != NULL) + { + if (to_charmap == NULL) + cvtbl = use_from_charmap (from_charmap, to_code); + else + cvtbl = use_both_charmaps (from_charmap, to_charmap); + } + else + { + assert (to_charmap != NULL); + cvtbl = use_to_charmap (from_code, to_charmap); + } + + /* If we couldn't generate a table stop now. */ + if (cvtbl == NULL) + return EXIT_FAILURE; + + /* Determine output file. */ + FILE *output; + if (output_file != NULL && strcmp (output_file, "-") != 0) + { + output = fopen (output_file, "w"); + if (output == NULL) + error (EXIT_FAILURE, errno, _("cannot open output file")); + } + else + output = stdout; + + /* We can now start the conversion. */ + if (remaining == argc) + { + if (process_file (cvtbl, stdin, output) != 0) + status = EXIT_FAILURE; + } + else + do + { + int fd; + + if (verbose) + printf ("%s:\n", argv[remaining]); + if (strcmp (argv[remaining], "-") == 0) + fd = 0; + else + { + fd = open (argv[remaining], O_RDONLY); + + if (fd == -1) + { + error (0, errno, _("cannot open input file `%s'"), + argv[remaining]); + status = EXIT_FAILURE; + continue; + } + } + +#ifdef _POSIX_MAPPED_FILES + struct stat64 st; + char *addr; + /* We have possibilities for reading the input file. First try + to mmap() it since this will provide the fastest solution. */ + if (fstat64 (fd, &st) == 0 + && ((addr = mmap (NULL, st.st_size, PROT_READ, MAP_PRIVATE, + fd, 0)) != MAP_FAILED)) + { + /* Yes, we can use mmap(). The descriptor is not needed + anymore. */ + if (close (fd) != 0) + error (EXIT_FAILURE, errno, + _("error while closing input `%s'"), argv[remaining]); + + if (process_block (cvtbl, addr, st.st_size, output) < 0) + { + /* Something went wrong. */ + status = EXIT_FAILURE; + + /* We don't need the input data anymore. */ + munmap ((void *) addr, st.st_size); + + /* We cannot go on with producing output since it might + lead to problem because the last output might leave + the output stream in an undefined state. */ + break; + } + + /* We don't need the input data anymore. */ + munmap ((void *) addr, st.st_size); + } + else +#endif /* _POSIX_MAPPED_FILES */ + { + /* Read the file in pieces. */ + if (process_fd (cvtbl, fd, output) != 0) + { + /* Something went wrong. */ + status = EXIT_FAILURE; + + /* We don't need the input file anymore. */ + close (fd); + + /* We cannot go on with producing output since it might + lead to problem because the last output might leave + the output stream in an undefined state. */ + break; + } + + /* Now close the file. */ + close (fd); + } + } + while (++remaining < argc); + + /* All done. */ + return status; +} + + +/* Add the IN->OUT mapping to TBL. OUT is potentially stored in the table. + IN is used only here, so it need not be kept live afterwards. */ +static void +add_bytes (struct convtable *tbl, const struct charseq *in, struct charseq *out) +{ + int n = 0; + unsigned int byte; + + assert (in->nbytes > 0); + + byte = ((unsigned char *) in->bytes)[n]; + while (n + 1 < in->nbytes) + { + if (is_term (tbl, byte) || tbl->val[byte].sub == NULL) + { + /* Note that we simply ignore a definition for a byte sequence + which is also the prefix for a longer one. */ + clear_term (tbl, byte); + tbl->val[byte].sub = + (struct convtable *) xcalloc (1, sizeof (struct convtable)); + } + + tbl = tbl->val[byte].sub; + + byte = ((unsigned char *) in->bytes)[++n]; + } + + /* Only add the new sequence if there is none yet and the byte sequence + is not part of an even longer one. */ + if (! is_term (tbl, byte) && tbl->val[byte].sub == NULL) + { + set_term (tbl, byte); + tbl->val[byte].out = out; + } +} + +/* Try to convert SEQ from WCHAR_T format using CD. + Returns a malloc'd struct or NULL. */ +static struct charseq * +convert_charseq (iconv_t cd, const struct charseq *seq) +{ + struct charseq *result = NULL; + + if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE) + { + /* There is a chance. Try the iconv module. */ + wchar_t inbuf[1] = { seq->ucs4 }; + unsigned char outbuf[64]; + char *inptr = (char *) inbuf; + size_t inlen = sizeof (inbuf); + char *outptr = (char *) outbuf; + size_t outlen = sizeof (outbuf); + + (void) iconv (cd, &inptr, &inlen, &outptr, &outlen); + + if (outptr != (char *) outbuf) + { + /* We got some output. Good, use it. */ + outlen = sizeof (outbuf) - outlen; + assert ((char *) outbuf + outlen == outptr); + + result = xmalloc (sizeof (struct charseq) + outlen); + result->name = seq->name; + result->ucs4 = seq->ucs4; + result->nbytes = outlen; + memcpy (result->bytes, outbuf, outlen); + } + + /* Clear any possible state left behind. */ + (void) iconv (cd, NULL, NULL, NULL, NULL); + } + + return result; +} + + +static struct convtable * +use_from_charmap (struct charmap_t *from_charmap, const char *to_code) +{ + /* We iterate over all entries in the from_charmap and for those which + have a known UCS4 representation we use an iconv() call to determine + the mapping to the to_code charset. */ + struct convtable *rettbl; + iconv_t cd; + void *ptr = NULL; + const void *key; + size_t keylen; + void *data; + + cd = iconv_open (to_code, "WCHAR_T"); + if (cd == (iconv_t) -1) + /* We cannot do anything. */ + return NULL; + + rettbl = allocate_table (); + + while (iterate_table (&from_charmap->char_table, &ptr, &key, &keylen, &data) + >= 0) + { + struct charseq *in = data; + struct charseq *newp = convert_charseq (cd, in); + if (newp != NULL) + add_bytes (rettbl, in, newp); + } + + iconv_close (cd); + + return rettbl; +} + + +static struct convtable * +use_to_charmap (const char *from_code, struct charmap_t *to_charmap) +{ + /* We iterate over all entries in the to_charmap and for those which + have a known UCS4 representation we use an iconv() call to determine + the mapping to the from_code charset. */ + struct convtable *rettbl; + iconv_t cd; + void *ptr = NULL; + const void *key; + size_t keylen; + void *data; + + /* Note that the conversion we use here is the reverse direction. Without + exhaustive search we cannot figure out which input yields the UCS4 + character we are looking for. Therefore we determine it the other + way round. */ + cd = iconv_open (from_code, "WCHAR_T"); + if (cd == (iconv_t) -1) + /* We cannot do anything. */ + return NULL; + + rettbl = allocate_table (); + + while (iterate_table (&to_charmap->char_table, &ptr, &key, &keylen, &data) + >= 0) + { + struct charseq *out = data; + struct charseq *newp = convert_charseq (cd, out); + if (newp != NULL) + { + add_bytes (rettbl, newp, out); + free (newp); + } + } + + iconv_close (cd); + + return rettbl; +} + + +static struct convtable * +use_both_charmaps (struct charmap_t *from_charmap, + struct charmap_t *to_charmap) +{ + /* In this case we iterate over all the entries in the from_charmap, + determine the internal name, and find an appropriate entry in the + to_charmap (if it exists). */ + struct convtable *rettbl = allocate_table (); + void *ptr = NULL; + const void *key; + size_t keylen; + void *data; + + while (iterate_table (&from_charmap->char_table, &ptr, &key, &keylen, &data) + >= 0) + { + struct charseq *in = (struct charseq *) data; + struct charseq *out = charmap_find_value (to_charmap, key, keylen); + + if (out != NULL) + add_bytes (rettbl, in, out); + } + + return rettbl; +} + + +static int +process_block (struct convtable *tbl, char *addr, size_t len, FILE *output) +{ + size_t n = 0; + + while (n < len) + { + struct convtable *cur = tbl; + unsigned char *curp = (unsigned char *) addr; + unsigned int byte = *curp; + int cnt; + struct charseq *out; + + while (! is_term (cur, byte)) + if (cur->val[byte].sub == NULL) + { + /* This is an invalid sequence. Skip the first byte if we are + ignoring errors. Otherwise punt. */ + if (! omit_invalid) + { + error (0, 0, _("illegal input sequence at position %Zd"), n); + return -1; + } + + n -= curp - (unsigned char *) addr; + + byte = *(curp = (unsigned char *) ++addr); + if (++n >= len) + /* All converted. */ + return 0; + + cur = tbl; + } + else + { + cur = cur->val[byte].sub; + + if (++n >= len) + { + error (0, 0, _("\ +incomplete character or shift sequence at end of buffer")); + return -1; + } + + byte = *++curp; + } + + /* We found a final byte. Write the output bytes. */ + out = cur->val[byte].out; + for (cnt = 0; cnt < out->nbytes; ++cnt) + fputc_unlocked (out->bytes[cnt], output); + + addr = (char *) curp + 1; + ++n; + } + + return 0; +} + + +static int +process_fd (struct convtable *tbl, int fd, FILE *output) +{ + /* We have a problem with reading from a descriptor since we must not + provide the iconv() function an incomplete character or shift + sequence at the end of the buffer. Since we have to deal with + arbitrary encodings we must read the whole text in a buffer and + process it in one step. */ + static char *inbuf = NULL; + static size_t maxlen = 0; + char *inptr = inbuf; + size_t actlen = 0; + + while (actlen < maxlen) + { + ssize_t n = read (fd, inptr, maxlen - actlen); + + if (n == 0) + /* No more text to read. */ + break; + + if (n == -1) + { + /* Error while reading. */ + error (0, errno, _("error while reading the input")); + return -1; + } + + inptr += n; + actlen += n; + } + + if (actlen == maxlen) + while (1) + { + ssize_t n; + char *new_inbuf; + + /* Increase the buffer. */ + new_inbuf = (char *) realloc (inbuf, maxlen + 32768); + if (new_inbuf == NULL) + { + error (0, errno, _("unable to allocate buffer for input")); + return -1; + } + inbuf = new_inbuf; + maxlen += 32768; + inptr = inbuf + actlen; + + do + { + n = read (fd, inptr, maxlen - actlen); + + if (n == 0) + /* No more text to read. */ + break; + + if (n == -1) + { + /* Error while reading. */ + error (0, errno, _("error while reading the input")); + return -1; + } + + inptr += n; + actlen += n; + } + while (actlen < maxlen); + + if (n == 0) + /* Break again so we leave both loops. */ + break; + } + + /* Now we have all the input in the buffer. Process it in one run. */ + return process_block (tbl, inbuf, actlen, output); +} + + +static int +process_file (struct convtable *tbl, FILE *input, FILE *output) +{ + /* This should be safe since we use this function only for `stdin' and + we haven't read anything so far. */ + return process_fd (tbl, fileno (input), output); +} diff --git a/REORG.TODO/iconv/iconv_close.c b/REORG.TODO/iconv/iconv_close.c new file mode 100644 index 0000000000..b4b3aff082 --- /dev/null +++ b/REORG.TODO/iconv/iconv_close.c @@ -0,0 +1,36 @@ +/* Release any resource associated with given conversion descriptor. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <errno.h> +#include <iconv.h> + +#include <gconv_int.h> + + +int +iconv_close (iconv_t cd) +{ + if (__glibc_unlikely (cd == (iconv_t *) -1L)) + { + __set_errno (EBADF); + return -1; + } + + return __gconv_close ((__gconv_t) cd) ? -1 : 0; +} diff --git a/REORG.TODO/iconv/iconv_open.c b/REORG.TODO/iconv/iconv_open.c new file mode 100644 index 0000000000..02e2b7d85e --- /dev/null +++ b/REORG.TODO/iconv/iconv_open.c @@ -0,0 +1,88 @@ +/* Get descriptor for character set conversion. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <alloca.h> +#include <errno.h> +#include <iconv.h> +#include <stdbool.h> +#include <stdlib.h> +#include <string.h> + +#include <gconv_int.h> +#include "gconv_charset.h" + + +iconv_t +iconv_open (const char *tocode, const char *fromcode) +{ + /* Normalize the name. We remove all characters beside alpha-numeric, + '_', '-', '/', '.', and ':'. */ + size_t tocode_len = strlen (tocode) + 3; + char *tocode_conv; + bool tocode_usealloca = __libc_use_alloca (tocode_len); + if (tocode_usealloca) + tocode_conv = (char *) alloca (tocode_len); + else + { + tocode_conv = (char *) malloc (tocode_len); + if (tocode_conv == NULL) + return (iconv_t) -1; + } + strip (tocode_conv, tocode); + tocode = (tocode_conv[2] == '\0' && tocode[0] != '\0' + ? upstr (tocode_conv, tocode) : tocode_conv); + + size_t fromcode_len = strlen (fromcode) + 3; + char *fromcode_conv; + bool fromcode_usealloca = __libc_use_alloca (fromcode_len); + if (fromcode_usealloca) + fromcode_conv = (char *) alloca (fromcode_len); + else + { + fromcode_conv = (char *) malloc (fromcode_len); + if (fromcode_conv == NULL) + { + if (! tocode_usealloca) + free (tocode_conv); + return (iconv_t) -1; + } + } + strip (fromcode_conv, fromcode); + fromcode = (fromcode_conv[2] == '\0' && fromcode[0] != '\0' + ? upstr (fromcode_conv, fromcode) : fromcode_conv); + + __gconv_t cd; + int res = __gconv_open (tocode, fromcode, &cd, 0); + + if (! fromcode_usealloca) + free (fromcode_conv); + if (! tocode_usealloca) + free (tocode_conv); + + if (__builtin_expect (res, __GCONV_OK) != __GCONV_OK) + { + /* We must set the error number according to the specs. */ + if (res == __GCONV_NOCONV || res == __GCONV_NODB) + __set_errno (EINVAL); + + cd = (iconv_t) -1; + } + + return (iconv_t) cd; +} diff --git a/REORG.TODO/iconv/iconv_prog.c b/REORG.TODO/iconv/iconv_prog.c new file mode 100644 index 0000000000..1397d2e9bd --- /dev/null +++ b/REORG.TODO/iconv/iconv_prog.c @@ -0,0 +1,803 @@ +/* Convert text in given files from the specified from-set to the to-set. + Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#include <argp.h> +#include <assert.h> +#include <ctype.h> +#include <errno.h> +#include <error.h> +#include <fcntl.h> +#include <iconv.h> +#include <langinfo.h> +#include <locale.h> +#include <search.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <libintl.h> +#ifdef _POSIX_MAPPED_FILES +# include <sys/mman.h> +#endif +#include <charmap.h> +#include <gconv_int.h> +#include "iconv_prog.h" +#include "iconvconfig.h" + +/* Get libc version number. */ +#include "../version.h" + +#define PACKAGE _libc_intl_domainname + + +/* Name and version of program. */ +static void print_version (FILE *stream, struct argp_state *state); +void (*argp_program_version_hook) (FILE *, struct argp_state *) = print_version; + +#define OPT_VERBOSE 1000 +#define OPT_LIST 'l' + +/* Definitions of arguments for argp functions. */ +static const struct argp_option options[] = +{ + { NULL, 0, NULL, 0, N_("Input/Output format specification:") }, + { "from-code", 'f', N_("NAME"), 0, N_("encoding of original text") }, + { "to-code", 't', N_("NAME"), 0, N_("encoding for output") }, + { NULL, 0, NULL, 0, N_("Information:") }, + { "list", 'l', NULL, 0, N_("list all known coded character sets") }, + { NULL, 0, NULL, 0, N_("Output control:") }, + { NULL, 'c', NULL, 0, N_("omit invalid characters from output") }, + { "output", 'o', N_("FILE"), 0, N_("output file") }, + { "silent", 's', NULL, 0, N_("suppress warnings") }, + { "verbose", OPT_VERBOSE, NULL, 0, N_("print progress information") }, + { NULL, 0, NULL, 0, NULL } +}; + +/* Short description of program. */ +static const char doc[] = N_("\ +Convert encoding of given files from one encoding to another."); + +/* Strings for arguments in help texts. */ +static const char args_doc[] = N_("[FILE...]"); + +/* Prototype for option handler. */ +static error_t parse_opt (int key, char *arg, struct argp_state *state); + +/* Function to print some extra text in the help message. */ +static char *more_help (int key, const char *text, void *input); + +/* Data structure to communicate with argp functions. */ +static struct argp argp = +{ + options, parse_opt, args_doc, doc, NULL, more_help +}; + +/* Code sets to convert from and to respectively. An empty string as the + default causes the 'iconv_open' function to look up the charset of the + currently selected locale and use it. */ +static const char *from_code = ""; +static const char *to_code = ""; + +/* File to write output to. If NULL write to stdout. */ +static const char *output_file; + +/* Nonzero if verbose ouput is wanted. */ +int verbose; + +/* Nonzero if list of all coded character sets is wanted. */ +static int list; + +/* If nonzero omit invalid character from output. */ +int omit_invalid; + +/* Prototypes for the functions doing the actual work. */ +static int process_block (iconv_t cd, char *addr, size_t len, FILE **output, + const char *output_file); +static int process_fd (iconv_t cd, int fd, FILE **output, + const char *output_file); +static int process_file (iconv_t cd, FILE *input, FILE **output, + const char *output_file); +static void print_known_names (void) internal_function; + + +int +main (int argc, char *argv[]) +{ + int status = EXIT_SUCCESS; + int remaining; + iconv_t cd; + const char *orig_to_code; + struct charmap_t *from_charmap = NULL; + struct charmap_t *to_charmap = NULL; + + /* Set locale via LC_ALL. */ + setlocale (LC_ALL, ""); + + /* Set the text message domain. */ + textdomain (_libc_intl_domainname); + + /* Parse and process arguments. */ + argp_parse (&argp, argc, argv, 0, &remaining, NULL); + + /* List all coded character sets if wanted. */ + if (list) + { + print_known_names (); + exit (EXIT_SUCCESS); + } + + /* If we have to ignore errors make sure we use the appropriate name for + the to-character-set. */ + orig_to_code = to_code; + if (omit_invalid) + { + const char *errhand = strchrnul (to_code, '/'); + int nslash = 2; + char *newp; + char *cp; + + if (*errhand == '/') + { + --nslash; + errhand = strchrnul (errhand + 1, '/'); + + if (*errhand == '/') + { + --nslash; + errhand = strchr (errhand, '\0'); + } + } + + newp = (char *) alloca (errhand - to_code + nslash + 7 + 1); + cp = mempcpy (newp, to_code, errhand - to_code); + while (nslash-- > 0) + *cp++ = '/'; + if (cp[-1] != '/') + *cp++ = ','; + memcpy (cp, "IGNORE", sizeof ("IGNORE")); + + to_code = newp; + } + + /* POSIX 1003.2b introduces a silly thing: the arguments to -t anf -f + can be file names of charmaps. In this case iconv will have to read + those charmaps and use them to do the conversion. But there are + holes in the specification. There is nothing said that if -f is a + charmap filename that -t must be, too. And vice versa. There is + also no word about the symbolic names used. What if they don't + match? */ + if (strchr (from_code, '/') != NULL) + /* The from-name might be a charmap file name. Try reading the + file. */ + from_charmap = charmap_read (from_code, /*0, 1*/1, 0, 0, 0); + + if (strchr (orig_to_code, '/') != NULL) + /* The to-name might be a charmap file name. Try reading the + file. */ + to_charmap = charmap_read (orig_to_code, /*0, 1,*/1, 0, 0, 0); + + + /* At this point we have to handle two cases. The first one is + where a charmap is used for the from- or to-charset, or both. We + handle this special since it is very different from the sane way of + doing things. The other case allows converting using the iconv() + function. */ + if (from_charmap != NULL || to_charmap != NULL) + /* Construct the conversion table and do the conversion. */ + status = charmap_conversion (from_code, from_charmap, to_code, to_charmap, + argc, remaining, argv, output_file); + else + { + /* Let's see whether we have these coded character sets. */ + cd = iconv_open (to_code, from_code); + if (cd == (iconv_t) -1) + { + if (errno == EINVAL) + { + /* Try to be nice with the user and tell her which of the + two encoding names is wrong. This is possible because + all supported encodings can be converted from/to Unicode, + in other words, because the graph of encodings is + connected. */ + bool from_wrong = + (iconv_open ("UTF-8", from_code) == (iconv_t) -1 + && errno == EINVAL); + bool to_wrong = + (iconv_open (to_code, "UTF-8") == (iconv_t) -1 + && errno == EINVAL); + const char *from_pretty = + (from_code[0] ? from_code : nl_langinfo (CODESET)); + const char *to_pretty = + (orig_to_code[0] ? orig_to_code : nl_langinfo (CODESET)); + + if (from_wrong) + { + if (to_wrong) + error (0, 0, + _("\ +conversions from `%s' and to `%s' are not supported"), + from_pretty, to_pretty); + else + error (0, 0, + _("conversion from `%s' is not supported"), + from_pretty); + } + else + { + if (to_wrong) + error (0, 0, + _("conversion to `%s' is not supported"), + to_pretty); + else + error (0, 0, + _("conversion from `%s' to `%s' is not supported"), + from_pretty, to_pretty); + } + + argp_help (&argp, stderr, ARGP_HELP_SEE, + program_invocation_short_name); + exit (1); + } + else + error (EXIT_FAILURE, errno, + _("failed to start conversion processing")); + } + + /* The output file. Will be opened when we are ready to produce + output. */ + FILE *output = NULL; + + /* Now process the remaining files. Write them to stdout or the file + specified with the `-o' parameter. If we have no file given as + the parameter process all from stdin. */ + if (remaining == argc) + { + if (process_file (cd, stdin, &output, output_file) != 0) + status = EXIT_FAILURE; + } + else + do + { +#ifdef _POSIX_MAPPED_FILES + struct stat64 st; + char *addr; +#endif + int fd, ret; + + if (verbose) + fprintf (stderr, "%s:\n", argv[remaining]); + if (strcmp (argv[remaining], "-") == 0) + fd = 0; + else + { + fd = open (argv[remaining], O_RDONLY); + + if (fd == -1) + { + error (0, errno, _("cannot open input file `%s'"), + argv[remaining]); + status = EXIT_FAILURE; + continue; + } + } + +#ifdef _POSIX_MAPPED_FILES + /* We have possibilities for reading the input file. First try + to mmap() it since this will provide the fastest solution. */ + if (fstat64 (fd, &st) == 0 + && ((addr = mmap (NULL, st.st_size, PROT_READ, MAP_PRIVATE, + fd, 0)) != MAP_FAILED)) + { + /* Yes, we can use mmap(). The descriptor is not needed + anymore. */ + if (close (fd) != 0) + error (EXIT_FAILURE, errno, + _("error while closing input `%s'"), + argv[remaining]); + + ret = process_block (cd, addr, st.st_size, &output, + output_file); + + /* We don't need the input data anymore. */ + munmap ((void *) addr, st.st_size); + + if (ret != 0) + { + status = EXIT_FAILURE; + + if (ret < 0) + /* We cannot go on with producing output since it might + lead to problem because the last output might leave + the output stream in an undefined state. */ + break; + } + } + else +#endif /* _POSIX_MAPPED_FILES */ + { + /* Read the file in pieces. */ + ret = process_fd (cd, fd, &output, output_file); + + /* Now close the file. */ + close (fd); + + if (ret != 0) + { + /* Something went wrong. */ + status = EXIT_FAILURE; + + if (ret < 0) + /* We cannot go on with producing output since it might + lead to problem because the last output might leave + the output stream in an undefined state. */ + break; + } + } + } + while (++remaining < argc); + + /* Close the output file now. */ + if (output != NULL && fclose (output)) + error (EXIT_FAILURE, errno, _("error while closing output file")); + } + + return status; +} + + +/* Handle program arguments. */ +static error_t +parse_opt (int key, char *arg, struct argp_state *state) +{ + switch (key) + { + case 'f': + from_code = arg; + break; + case 't': + to_code = arg; + break; + case 'o': + output_file = arg; + break; + case 's': + /* Nothing, for now at least. We are not giving out any information + about missing character or so. */ + break; + case 'c': + /* Omit invalid characters from output. */ + omit_invalid = 1; + break; + case OPT_VERBOSE: + verbose = 1; + break; + case OPT_LIST: + list = 1; + break; + default: + return ARGP_ERR_UNKNOWN; + } + return 0; +} + + +static char * +more_help (int key, const char *text, void *input) +{ + char *tp = NULL; + switch (key) + { + case ARGP_KEY_HELP_EXTRA: + /* We print some extra information. */ + if (asprintf (&tp, gettext ("\ +For bug reporting instructions, please see:\n\ +%s.\n"), REPORT_BUGS_TO) < 0) + return NULL; + return tp; + default: + break; + } + return (char *) text; +} + + +/* Print the version information. */ +static void +print_version (FILE *stream, struct argp_state *state) +{ + fprintf (stream, "iconv %s%s\n", PKGVERSION, VERSION); + fprintf (stream, gettext ("\ +Copyright (C) %s Free Software Foundation, Inc.\n\ +This is free software; see the source for copying conditions. There is NO\n\ +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\ +"), "2017"); + fprintf (stream, gettext ("Written by %s.\n"), "Ulrich Drepper"); +} + + +static int +write_output (const char *outbuf, const char *outptr, FILE **output, + const char *output_file) +{ + /* We have something to write out. */ + int errno_save = errno; + + if (*output == NULL) + { + /* Determine output file. */ + if (output_file != NULL && strcmp (output_file, "-") != 0) + { + *output = fopen (output_file, "w"); + if (*output == NULL) + error (EXIT_FAILURE, errno, _("cannot open output file")); + } + else + *output = stdout; + } + + if (fwrite (outbuf, 1, outptr - outbuf, *output) < (size_t) (outptr - outbuf) + || ferror (*output)) + { + /* Error occurred while printing the result. */ + error (0, 0, _("\ +conversion stopped due to problem in writing the output")); + return -1; + } + + errno = errno_save; + + return 0; +} + + +static int +process_block (iconv_t cd, char *addr, size_t len, FILE **output, + const char *output_file) +{ +#define OUTBUF_SIZE 32768 + const char *start = addr; + char outbuf[OUTBUF_SIZE]; + char *outptr; + size_t outlen; + size_t n; + int ret = 0; + + while (len > 0) + { + outptr = outbuf; + outlen = OUTBUF_SIZE; + n = iconv (cd, &addr, &len, &outptr, &outlen); + + if (n == (size_t) -1 && omit_invalid && errno == EILSEQ) + { + ret = 1; + if (len == 0) + n = 0; + else + errno = E2BIG; + } + + if (outptr != outbuf) + { + ret = write_output (outbuf, outptr, output, output_file); + if (ret != 0) + break; + } + + if (n != (size_t) -1) + { + /* All the input test is processed. For state-dependent + character sets we have to flush the state now. */ + outptr = outbuf; + outlen = OUTBUF_SIZE; + n = iconv (cd, NULL, NULL, &outptr, &outlen); + + if (outptr != outbuf) + { + ret = write_output (outbuf, outptr, output, output_file); + if (ret != 0) + break; + } + + if (n != (size_t) -1) + break; + + if (omit_invalid && errno == EILSEQ) + { + ret = 1; + break; + } + } + + if (errno != E2BIG) + { + /* iconv() ran into a problem. */ + switch (errno) + { + case EILSEQ: + if (! omit_invalid) + error (0, 0, _("illegal input sequence at position %ld"), + (long int) (addr - start)); + break; + case EINVAL: + error (0, 0, _("\ +incomplete character or shift sequence at end of buffer")); + break; + case EBADF: + error (0, 0, _("internal error (illegal descriptor)")); + break; + default: + error (0, 0, _("unknown iconv() error %d"), errno); + break; + } + + return -1; + } + } + + return ret; +} + + +static int +process_fd (iconv_t cd, int fd, FILE **output, const char *output_file) +{ + /* we have a problem with reading from a desriptor since we must not + provide the iconv() function an incomplete character or shift + sequence at the end of the buffer. Since we have to deal with + arbitrary encodings we must read the whole text in a buffer and + process it in one step. */ + static char *inbuf = NULL; + static size_t maxlen = 0; + char *inptr = NULL; + size_t actlen = 0; + + while (actlen < maxlen) + { + ssize_t n = read (fd, inptr, maxlen - actlen); + + if (n == 0) + /* No more text to read. */ + break; + + if (n == -1) + { + /* Error while reading. */ + error (0, errno, _("error while reading the input")); + return -1; + } + + inptr += n; + actlen += n; + } + + if (actlen == maxlen) + while (1) + { + ssize_t n; + char *new_inbuf; + + /* Increase the buffer. */ + new_inbuf = (char *) realloc (inbuf, maxlen + 32768); + if (new_inbuf == NULL) + { + error (0, errno, _("unable to allocate buffer for input")); + return -1; + } + inbuf = new_inbuf; + maxlen += 32768; + inptr = inbuf + actlen; + + do + { + n = read (fd, inptr, maxlen - actlen); + + if (n == 0) + /* No more text to read. */ + break; + + if (n == -1) + { + /* Error while reading. */ + error (0, errno, _("error while reading the input")); + return -1; + } + + inptr += n; + actlen += n; + } + while (actlen < maxlen); + + if (n == 0) + /* Break again so we leave both loops. */ + break; + } + + /* Now we have all the input in the buffer. Process it in one run. */ + return process_block (cd, inbuf, actlen, output, output_file); +} + + +static int +process_file (iconv_t cd, FILE *input, FILE **output, const char *output_file) +{ + /* This should be safe since we use this function only for `stdin' and + we haven't read anything so far. */ + return process_fd (cd, fileno (input), output, output_file); +} + + +/* Print all known character sets/encodings. */ +static void *printlist; +static size_t column; +static int not_first; + +static void +insert_print_list (const void *nodep, VISIT value, int level) +{ + if (value == leaf || value == postorder) + { + const struct gconv_alias *s = *(const struct gconv_alias **) nodep; + tsearch (s->fromname, &printlist, (__compar_fn_t) strverscmp); + } +} + +static void +do_print_human (const void *nodep, VISIT value, int level) +{ + if (value == leaf || value == postorder) + { + const char *s = *(const char **) nodep; + size_t len = strlen (s); + size_t cnt; + + while (len > 0 && s[len - 1] == '/') + --len; + + for (cnt = 0; cnt < len; ++cnt) + if (isalnum (s[cnt])) + break; + if (cnt == len) + return; + + if (not_first) + { + putchar (','); + ++column; + + if (column > 2 && column + len > 77) + { + fputs ("\n ", stdout); + column = 2; + } + else + { + putchar (' '); + ++column; + } + } + else + not_first = 1; + + fwrite (s, len, 1, stdout); + column += len; + } +} + +static void +do_print (const void *nodep, VISIT value, int level) +{ + if (value == leaf || value == postorder) + { + const char *s = *(const char **) nodep; + + puts (s); + } +} + +static void +internal_function +add_known_names (struct gconv_module *node) +{ + if (node->left != NULL) + add_known_names (node->left); + if (node->right != NULL) + add_known_names (node->right); + do + { + if (strcmp (node->from_string, "INTERNAL") != 0) + tsearch (node->from_string, &printlist, (__compar_fn_t) strverscmp); + if (strcmp (node->to_string, "INTERNAL") != 0) + tsearch (node->to_string, &printlist, (__compar_fn_t) strverscmp); + + node = node->same; + } + while (node != NULL); +} + + +static void +insert_cache (void) +{ + const struct gconvcache_header *header; + const char *strtab; + const struct hash_entry *hashtab; + size_t cnt; + + header = (const struct gconvcache_header *) __gconv_get_cache (); + strtab = (char *) header + header->string_offset; + hashtab = (struct hash_entry *) ((char *) header + header->hash_offset); + + for (cnt = 0; cnt < header->hash_size; ++cnt) + if (hashtab[cnt].string_offset != 0) + { + const char *str = strtab + hashtab[cnt].string_offset; + + if (strcmp (str, "INTERNAL") != 0) + tsearch (str, &printlist, (__compar_fn_t) strverscmp); + } +} + + +static void +internal_function +print_known_names (void) +{ + iconv_t h; + void *cache; + + /* We must initialize the internal databases first. */ + h = iconv_open ("L1", "L1"); + iconv_close (h); + + /* See whether we have a cache. */ + cache = __gconv_get_cache (); + if (cache != NULL) + /* Yep, use only this information. */ + insert_cache (); + else + { + struct gconv_module *modules; + + /* No, then use the information read from the gconv-modules file. + First add the aliases. */ + twalk (__gconv_get_alias_db (), insert_print_list); + + /* Add the from- and to-names from the known modules. */ + modules = __gconv_get_modules_db (); + if (modules != NULL) + add_known_names (modules); + } + + bool human_readable = isatty (fileno (stdout)); + + if (human_readable) + fputs (_("\ +The following list contains all the coded character sets known. This does\n\ +not necessarily mean that all combinations of these names can be used for\n\ +the FROM and TO command line parameters. One coded character set can be\n\ +listed with several different names (aliases).\n\n "), stdout); + + /* Now print the collected names. */ + column = 2; + twalk (printlist, human_readable ? do_print_human : do_print); + + if (human_readable && column != 0) + puts (""); +} diff --git a/REORG.TODO/iconv/iconv_prog.h b/REORG.TODO/iconv/iconv_prog.h new file mode 100644 index 0000000000..1571fc9181 --- /dev/null +++ b/REORG.TODO/iconv/iconv_prog.h @@ -0,0 +1,41 @@ +/* Copyright (C) 2001-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@redhat.com>, 2001. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _ICONV_PROG_H +#define _ICONV_PROG_H 1 + +#include <stdio.h> +#include <charmap.h> + + +/* Nonzero if verbose ouput is wanted. */ +extern int verbose; + +/* If nonzero omit invalid character from output. */ +extern int omit_invalid; + +/* Perform the conversion using a charmap or two. */ +extern int charmap_conversion (const char *from_code, + struct charmap_t *from_charmap, + const char *to_code, + struct charmap_t *to_charmap, + int argc, int remaining, char *argv[], + const char *output_file); + + +#endif /* iconv_prog.h */ diff --git a/REORG.TODO/iconv/iconvconfig.c b/REORG.TODO/iconv/iconvconfig.c new file mode 100644 index 0000000000..9be4111a42 --- /dev/null +++ b/REORG.TODO/iconv/iconvconfig.c @@ -0,0 +1,1245 @@ +/* Generate fastloading iconv module configuration files. + Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@redhat.com>, 2000. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#include <argp.h> +#include <assert.h> +#include <error.h> +#include <errno.h> +#include <fcntl.h> +#include <libintl.h> +#include <locale.h> +#include <mcheck.h> +#include <search.h> +#include <stdint.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdio_ext.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/cdefs.h> +#include <sys/uio.h> + +#include "iconvconfig.h" + +/* Get libc version number. */ +#include "../version.h" + +#define PACKAGE _libc_intl_domainname + + +/* The hashing function we use. */ +#include "../intl/hash-string.h" + + +/* Types used. */ +struct module +{ + char *fromname; + struct Strent *fromname_strent; + char *filename; + struct Strent *filename_strent; + const char *directory; + struct Strent *directory_strent; + struct module *next; + int cost; + struct Strent *toname_strent; + char toname[0]; +}; + +struct alias +{ + char *fromname; + struct Strent *froment; + struct module *module; + struct Strent *toent; + char toname[0]; +}; + +struct name +{ + const char *name; + struct Strent *strent; + int module_idx; + uint32_t hashval; +}; + +struct name_info +{ + const char *canonical_name; + struct Strent *canonical_strent; + + struct module *from_internal; + struct module *to_internal; + + struct other_conv_list + { + int dest_idx; + struct other_conv + { + gidx_t module_idx; + struct module *module; + struct other_conv *next; + } other_conv; + struct other_conv_list *next; + } *other_conv_list; +}; + + +/* Name and version of program. */ +static void print_version (FILE *stream, struct argp_state *state); +void (*argp_program_version_hook) (FILE *, struct argp_state *) = print_version; + +/* Short description of program. */ +static const char doc[] = N_("\ +Create fastloading iconv module configuration file."); + +/* Strings for arguments in help texts. */ +static const char args_doc[] = N_("[DIR...]"); + +/* Prototype for option handler. */ +static error_t parse_opt (int key, char *arg, struct argp_state *state); + +/* Function to print some extra text in the help message. */ +static char *more_help (int key, const char *text, void *input); + +/* Definitions of arguments for argp functions. */ +#define OPT_PREFIX 300 +#define OPT_NOSTDLIB 301 +static const struct argp_option options[] = +{ + { "prefix", OPT_PREFIX, N_("PATH"), 0, + N_("Prefix used for all file accesses") }, + { "output", 'o', N_("FILE"), 0, N_("\ +Put output in FILE instead of installed location\ + (--prefix does not apply to FILE)") }, + { "nostdlib", OPT_NOSTDLIB, NULL, 0, + N_("Do not search standard directories, only those on the command line") }, + { NULL, 0, NULL, 0, NULL } +}; + +/* Data structure to communicate with argp functions. */ +static struct argp argp = +{ + options, parse_opt, args_doc, doc, NULL, more_help +}; + + +/* The function doing the actual work. */ +static int handle_dir (const char *dir); + +/* Add all known builtin conversions and aliases. */ +static void add_builtins (void); + +/* Create list of all aliases without circular aliases. */ +static void get_aliases (void); + +/* Create list of all modules. */ +static void get_modules (void); + +/* Get list of all the names and thereby indexing them. */ +static void generate_name_list (void); + +/* Collect information about all the names. */ +static void generate_name_info (void); + +/* Write the output file. */ +static int write_output (void); + + +/* Prefix to be used for all file accesses. */ +static const char *prefix = ""; +/* Its length. */ +static size_t prefix_len; + +/* Directory to place output file in. */ +static const char *output_file; +/* Its length. */ +static size_t output_file_len; + +/* If true, omit the GCONV_PATH directories and require some arguments. */ +static bool nostdlib; + +/* Search tree of the modules we know. */ +static void *modules; + +/* Search tree of the aliases we know. */ +static void *aliases; + +/* Search tree for name to index mapping. */ +static void *names; + +/* Number of names we know about. */ +static int nnames; + +/* List of all aliases. */ +static struct alias **alias_list; +static size_t nalias_list; +static size_t nalias_list_max; + +/* List of all modules. */ +static struct module **module_list; +static size_t nmodule_list; +static size_t nmodule_list_max; + +/* Names and information about them. */ +static struct name_info *name_info; +static size_t nname_info; + +/* Number of translations not from or to INTERNAL. */ +static size_t nextra_modules; + + +/* Names and aliases for the builtin transformations. */ +static struct +{ + const char *from; + const char *to; +} builtin_alias[] = + { +#define BUILTIN_ALIAS(alias, real) \ + { .from = alias, .to = real }, +#define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \ + MinF, MaxF, MinT, MaxT) +#include <gconv_builtin.h> + }; +#undef BUILTIN_ALIAS +#undef BUILTIN_TRANSFORMATION +#define nbuiltin_alias (sizeof (builtin_alias) / sizeof (builtin_alias[0])) + +static struct +{ + const char *from; + const char *to; + const char *module; + int cost; +} builtin_trans[] = + { +#define BUILTIN_ALIAS(alias, real) +#define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \ + MinF, MaxF, MinT, MaxT) \ + { .from = From, .to = To, .module = Name, .cost = Cost }, +#include <gconv_builtin.h> + }; +#undef BUILTIN_ALIAS +#undef BUILTIN_TRANSFORMATION +#define nbuiltin_trans (sizeof (builtin_trans) / sizeof (builtin_trans[0])) + + +/* Filename extension for the modules. */ +#ifndef MODULE_EXT +# define MODULE_EXT ".so" +#endif +static const char gconv_module_ext[] = MODULE_EXT; + + +#include <programs/xmalloc.h> + + +/* C string table handling. */ +struct Strtab; +struct Strent; + +/* Create new C string table object in memory. */ +extern struct Strtab *strtabinit (void); + +/* Free resources allocated for C string table ST. */ +extern void strtabfree (struct Strtab *st); + +/* Add string STR (length LEN is != 0) to C string table ST. */ +extern struct Strent *strtabadd (struct Strtab *st, const char *str, + size_t len); + +/* Finalize string table ST and store size in *SIZE and return a pointer. */ +extern void *strtabfinalize (struct Strtab *st, size_t *size); + +/* Get offset in string table for string associated with SE. */ +extern size_t strtaboffset (struct Strent *se); + +/* String table we construct. */ +static struct Strtab *strtab; + + + +int +main (int argc, char *argv[]) +{ + int remaining; + int status = 0; + + /* Enable memory use testing. */ + /* mcheck_pedantic (NULL); */ + mtrace (); + + /* Set locale via LC_ALL. */ + setlocale (LC_ALL, ""); + + /* Set the text message domain. */ + textdomain (_libc_intl_domainname); + + /* Parse and process arguments. */ + argp_parse (&argp, argc, argv, 0, &remaining, NULL); + + if (nostdlib && remaining == argc) + error (2, 0, _("Directory arguments required when using --nostdlib")); + + /* Initialize the string table. */ + strtab = strtabinit (); + + /* Handle all directories mentioned. */ + while (remaining < argc) + status |= handle_dir (argv[remaining++]); + + if (! nostdlib) + { + /* In any case also handle the standard directory. */ + char *path = strdupa (GCONV_PATH), *tp = strsep (&path, ":"); + while (tp != NULL) + { + status |= handle_dir (tp); + + tp = strsep (&path, ":"); + } + } + + /* Add the builtin transformations and aliases without overwriting + anything. */ + add_builtins (); + + /* Store aliases in an array. */ + get_aliases (); + + /* Get list of all modules. */ + get_modules (); + + /* Generate list of all the names we know to handle in some way. */ + generate_name_list (); + + /* Now we know all the names we will handle, collect information + about them. */ + generate_name_info (); + + /* Write the output file, but only if we haven't seen any error. */ + if (status == 0) + status = write_output (); + else + error (1, 0, _("no output file produced because warnings were issued")); + + return status; +} + + +/* Handle program arguments. */ +static error_t +parse_opt (int key, char *arg, struct argp_state *state) +{ + switch (key) + { + case OPT_PREFIX: + prefix = arg; + prefix_len = strlen (prefix); + break; + case 'o': + output_file = arg; + output_file_len = strlen (output_file); + break; + case OPT_NOSTDLIB: + nostdlib = true; + break; + default: + return ARGP_ERR_UNKNOWN; + } + return 0; +} + + +static char * +more_help (int key, const char *text, void *input) +{ + char *tp = NULL; + switch (key) + { + case ARGP_KEY_HELP_EXTRA: + /* We print some extra information. */ + if (asprintf (&tp, gettext ("\ +For bug reporting instructions, please see:\n\ +%s.\n"), REPORT_BUGS_TO) < 0) + return NULL; + return tp; + default: + break; + } + return (char *) text; +} + + +/* Print the version information. */ +static void +print_version (FILE *stream, struct argp_state *state) +{ + fprintf (stream, "iconvconfig %s%s\n", PKGVERSION, VERSION); + fprintf (stream, gettext ("\ +Copyright (C) %s Free Software Foundation, Inc.\n\ +This is free software; see the source for copying conditions. There is NO\n\ +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\ +"), "2017"); + fprintf (stream, gettext ("Written by %s.\n"), "Ulrich Drepper"); +} + + +static int +alias_compare (const void *p1, const void *p2) +{ + const struct alias *a1 = (const struct alias *) p1; + const struct alias *a2 = (const struct alias *) p2; + + return strcmp (a1->fromname, a2->fromname); +} + + +static void +new_alias (const char *fromname, size_t fromlen, const char *toname, + size_t tolen) +{ + struct alias *newp; + void **inserted; + + newp = (struct alias *) xmalloc (sizeof (struct alias) + fromlen + tolen); + + newp->fromname = mempcpy (newp->toname, toname, tolen); + memcpy (newp->fromname, fromname, fromlen); + newp->module = NULL; + + inserted = (void **) tsearch (newp, &aliases, alias_compare); + if (inserted == NULL) + error (EXIT_FAILURE, errno, gettext ("while inserting in search tree")); + if (*inserted != newp) + /* Something went wrong, free this entry. */ + free (newp); + else + { + newp->froment = strtabadd (strtab, newp->fromname, fromlen); + newp->toent = strtabadd (strtab, newp->toname, tolen); + } +} + + +/* Add new alias. */ +static void +add_alias (char *rp) +{ + /* We now expect two more string. The strings are normalized + (converted to UPPER case) and strored in the alias database. */ + char *from; + char *to; + char *wp; + + while (isspace (*rp)) + ++rp; + from = wp = rp; + while (*rp != '\0' && !isspace (*rp)) + *wp++ = toupper (*rp++); + if (*rp == '\0') + /* There is no `to' string on the line. Ignore it. */ + return; + *wp++ = '\0'; + to = ++rp; + while (isspace (*rp)) + ++rp; + while (*rp != '\0' && !isspace (*rp)) + *wp++ = toupper (*rp++); + if (to == wp) + /* No `to' string, ignore the line. */ + return; + *wp++ = '\0'; + + assert (strlen (from) + 1 == (size_t) (to - from)); + assert (strlen (to) + 1 == (size_t) (wp - to)); + + new_alias (from, to - from, to, wp - to); +} + + +static void +append_alias (const void *nodep, VISIT value, int level) +{ + if (value != leaf && value != postorder) + return; + + if (nalias_list_max == nalias_list) + { + nalias_list_max += 50; + alias_list = (struct alias **) xrealloc (alias_list, + (nalias_list_max + * sizeof (struct alias *))); + } + + alias_list[nalias_list++] = *(struct alias **) nodep; +} + + +static void +get_aliases (void) +{ + twalk (aliases, append_alias); +} + + +static int +module_compare (const void *p1, const void *p2) +{ + const struct module *m1 = (const struct module *) p1; + const struct module *m2 = (const struct module *) p2; + int result; + + result = strcmp (m1->fromname, m2->fromname); + if (result == 0) + result = strcmp (m1->toname, m2->toname); + + return result; +} + + +/* Create new module record. */ +static void +new_module (const char *fromname, size_t fromlen, const char *toname, + size_t tolen, const char *directory, + const char *filename, size_t filelen, int cost, size_t need_ext) +{ + struct module *new_module; + size_t dirlen = strlen (directory) + 1; + char *tmp; + void **inserted; + + new_module = (struct module *) xmalloc (sizeof (struct module) + + fromlen + tolen + filelen + + need_ext); + + new_module->fromname = mempcpy (new_module->toname, toname, tolen); + + new_module->filename = mempcpy (new_module->fromname, fromname, fromlen); + + new_module->cost = cost; + new_module->next = NULL; + + tmp = mempcpy (new_module->filename, filename, filelen); + if (need_ext) + { + memcpy (tmp - 1, gconv_module_ext, need_ext + 1); + filelen += need_ext; + } + new_module->directory = directory; + + /* Now insert the new module data structure in our search tree. */ + inserted = (void **) tsearch (new_module, &modules, module_compare); + if (inserted == NULL) + error (EXIT_FAILURE, errno, "while inserting in search tree"); + if (*inserted != new_module) + free (new_module); + else + { + new_module->fromname_strent = strtabadd (strtab, new_module->fromname, + fromlen); + new_module->toname_strent = strtabadd (strtab, new_module->toname, + tolen); + new_module->filename_strent = strtabadd (strtab, new_module->filename, + filelen); + new_module->directory_strent = strtabadd (strtab, directory, dirlen); + } +} + + +/* Add new module. */ +static void +internal_function +add_module (char *rp, const char *directory) +{ + /* We expect now + 1. `from' name + 2. `to' name + 3. filename of the module + 4. an optional cost value + */ + char *from; + char *to; + char *module; + char *wp; + int need_ext; + int cost; + + while (isspace (*rp)) + ++rp; + from = rp; + while (*rp != '\0' && !isspace (*rp)) + { + *rp = toupper (*rp); + ++rp; + } + if (*rp == '\0') + return; + *rp++ = '\0'; + to = wp = rp; + while (isspace (*rp)) + ++rp; + while (*rp != '\0' && !isspace (*rp)) + *wp++ = toupper (*rp++); + if (*rp == '\0') + return; + *wp++ = '\0'; + do + ++rp; + while (isspace (*rp)); + module = wp; + while (*rp != '\0' && !isspace (*rp)) + *wp++ = *rp++; + if (*rp == '\0') + { + /* There is no cost, use one by default. */ + *wp++ = '\0'; + cost = 1; + } + else + { + /* There might be a cost value. */ + char *endp; + + *wp++ = '\0'; + cost = strtol (rp, &endp, 10); + if (rp == endp || cost < 1) + /* No useful information. */ + cost = 1; + } + + if (module[0] == '\0') + /* No module name given. */ + return; + + /* See whether we must add the ending. */ + need_ext = 0; + if ((size_t) (wp - module) < sizeof (gconv_module_ext) + || memcmp (wp - sizeof (gconv_module_ext), gconv_module_ext, + sizeof (gconv_module_ext)) != 0) + /* We must add the module extension. */ + need_ext = sizeof (gconv_module_ext) - 1; + + assert (strlen (from) + 1 == (size_t) (to - from)); + assert (strlen (to) + 1 == (size_t) (module - to)); + assert (strlen (module) + 1 == (size_t) (wp - module)); + + new_module (from, to - from, to, module - to, directory, module, wp - module, + cost, need_ext); +} + + +/* Read the config file and add the data for this directory to that. */ +static int +handle_dir (const char *dir) +{ + char *cp; + FILE *fp; + char *line = NULL; + size_t linelen = 0; + size_t dirlen = strlen (dir); + + if (dir[dirlen - 1] != '/') + { + char *newp = (char *) xmalloc (dirlen + 2); + dir = memcpy (newp, dir, dirlen); + newp[dirlen++] = '/'; + newp[dirlen] = '\0'; + } + + char infile[prefix_len + dirlen + sizeof "gconv-modules"]; + cp = infile; + if (dir[0] == '/') + cp = mempcpy (cp, prefix, prefix_len); + strcpy (mempcpy (cp, dir, dirlen), "gconv-modules"); + + fp = fopen (infile, "r"); + if (fp == NULL) + { + error (0, errno, "cannot open `%s'", infile); + return 1; + } + + /* No threads present. */ + __fsetlocking (fp, FSETLOCKING_BYCALLER); + + while (!feof_unlocked (fp)) + { + char *rp, *endp, *word; + ssize_t n = __getdelim (&line, &linelen, '\n', fp); + + if (n < 0) + /* An error occurred. */ + break; + + rp = line; + /* Terminate the line (excluding comments or newline) with a NUL + byte to simplify the following code. */ + endp = strchr (rp, '#'); + if (endp != NULL) + *endp = '\0'; + else + if (rp[n - 1] == '\n') + rp[n - 1] = '\0'; + + while (isspace (*rp)) + ++rp; + + /* If this is an empty line go on with the next one. */ + if (rp == endp) + continue; + + word = rp; + while (*rp != '\0' && !isspace (*rp)) + ++rp; + + if (rp - word == sizeof ("alias") - 1 + && memcmp (word, "alias", sizeof ("alias") - 1) == 0) + add_alias (rp); + else if (rp - word == sizeof ("module") - 1 + && memcmp (word, "module", sizeof ("module") - 1) == 0) + add_module (rp, dir); + /* else */ + /* Otherwise ignore the line. */ + } + + free (line); + + fclose (fp); + + return 0; +} + + +static void +append_module (const void *nodep, VISIT value, int level) +{ + struct module *mo; + + if (value != leaf && value != postorder) + return; + + mo = *(struct module **) nodep; + + if (nmodule_list > 0 + && strcmp (module_list[nmodule_list - 1]->fromname, mo->fromname) == 0) + { + /* Same name. */ + mo->next = module_list[nmodule_list - 1]; + module_list[nmodule_list - 1] = mo; + + return; + } + + if (nmodule_list_max == nmodule_list) + { + nmodule_list_max += 50; + module_list = (struct module **) xrealloc (module_list, + (nmodule_list_max + * sizeof (struct module *))); + } + + module_list[nmodule_list++] = mo; +} + + +static void +get_modules (void) +{ + twalk (modules, append_module); +} + + +static void +add_builtins (void) +{ + size_t cnt; + + /* Add all aliases. */ + for (cnt = 0; cnt < nbuiltin_alias; ++cnt) + new_alias (builtin_alias[cnt].from, + strlen (builtin_alias[cnt].from) + 1, + builtin_alias[cnt].to, + strlen (builtin_alias[cnt].to) + 1); + + /* add the builtin transformations. */ + for (cnt = 0; cnt < nbuiltin_trans; ++cnt) + new_module (builtin_trans[cnt].from, + strlen (builtin_trans[cnt].from) + 1, + builtin_trans[cnt].to, + strlen (builtin_trans[cnt].to) + 1, + "", builtin_trans[cnt].module, + strlen (builtin_trans[cnt].module) + 1, + builtin_trans[cnt].cost, 0); +} + + +static int +name_compare (const void *p1, const void *p2) +{ + const struct name *n1 = (const struct name *) p1; + const struct name *n2 = (const struct name *) p2; + + return strcmp (n1->name, n2->name); +} + + +static struct name * +new_name (const char *str, struct Strent *strent) +{ + struct name *newp = (struct name *) xmalloc (sizeof (struct name)); + + newp->name = str; + newp->strent = strent; + newp->module_idx = -1; + newp->hashval = __hash_string (str); + + ++nnames; + + return newp; +} + + +static void +generate_name_list (void) +{ + size_t i; + + /* A name we always need. */ + tsearch (new_name ("INTERNAL", strtabadd (strtab, "INTERNAL", + sizeof ("INTERNAL"))), + &names, name_compare); + + for (i = 0; i < nmodule_list; ++i) + { + struct module *runp; + + if (strcmp (module_list[i]->fromname, "INTERNAL") != 0) + tsearch (new_name (module_list[i]->fromname, + module_list[i]->fromname_strent), + &names, name_compare); + + for (runp = module_list[i]; runp != NULL; runp = runp->next) + if (strcmp (runp->toname, "INTERNAL") != 0) + tsearch (new_name (runp->toname, runp->toname_strent), + &names, name_compare); + } +} + + +static int +name_to_module_idx (const char *name, int add) +{ + struct name **res; + struct name fake_name = { .name = name }; + int idx; + + res = (struct name **) tfind (&fake_name, &names, name_compare); + if (res == NULL) + abort (); + + idx = (*res)->module_idx; + if (idx == -1 && add) + /* No module index assigned yet. */ + idx = (*res)->module_idx = nname_info++; + + return idx; +} + + +static void +generate_name_info (void) +{ + size_t i; + int idx; + + name_info = (struct name_info *) xcalloc (nmodule_list + 1, + sizeof (struct name_info)); + + /* First add a special entry for the INTERNAL name. This must have + index zero. */ + idx = name_to_module_idx ("INTERNAL", 1); + name_info[0].canonical_name = "INTERNAL"; + name_info[0].canonical_strent = strtabadd (strtab, "INTERNAL", + sizeof ("INTERNAL")); + assert (nname_info == 1); + + for (i = 0; i < nmodule_list; ++i) + { + struct module *runp; + + for (runp = module_list[i]; runp != NULL; runp = runp->next) + if (strcmp (runp->fromname, "INTERNAL") == 0) + { + idx = name_to_module_idx (runp->toname, 1); + name_info[idx].from_internal = runp; + assert (name_info[idx].canonical_name == NULL + || strcmp (name_info[idx].canonical_name, + runp->toname) == 0); + name_info[idx].canonical_name = runp->toname; + name_info[idx].canonical_strent = runp->toname_strent; + } + else if (strcmp (runp->toname, "INTERNAL") == 0) + { + idx = name_to_module_idx (runp->fromname, 1); + name_info[idx].to_internal = runp; + assert (name_info[idx].canonical_name == NULL + || strcmp (name_info[idx].canonical_name, + runp->fromname) == 0); + name_info[idx].canonical_name = runp->fromname; + name_info[idx].canonical_strent = runp->fromname_strent; + } + else + { + /* This is a transformation not to or from the INTERNAL + encoding. */ + int from_idx = name_to_module_idx (runp->fromname, 1); + int to_idx = name_to_module_idx (runp->toname, 1); + struct other_conv_list *newp; + + newp = (struct other_conv_list *) + xmalloc (sizeof (struct other_conv_list)); + newp->other_conv.module_idx = to_idx; + newp->other_conv.module = runp; + newp->other_conv.next = NULL; /* XXX Allow multiple module sequence */ + newp->dest_idx = to_idx; + newp->next = name_info[from_idx].other_conv_list; + name_info[from_idx].other_conv_list = newp; + assert (name_info[from_idx].canonical_name == NULL + || strcmp (name_info[from_idx].canonical_name, + runp->fromname) == 0); + name_info[from_idx].canonical_name = runp->fromname; + name_info[from_idx].canonical_strent = runp->fromname_strent; + + ++nextra_modules; + } + } + + /* Now add the module index information for all the aliases. */ + for (i = 0; i < nalias_list; ++i) + { + struct name fake_name = { .name = alias_list[i]->toname }; + struct name **tonamep; + + tonamep = (struct name **) tfind (&fake_name, &names, name_compare); + if (tonamep != NULL) + { + struct name *newp = new_name (alias_list[i]->fromname, + alias_list[i]->froment); + newp->module_idx = (*tonamep)->module_idx; + tsearch (newp, &names, name_compare); + } + } +} + + +static int +is_prime (unsigned long int candidate) +{ + /* No even number and none less than 10 will be passed here. */ + unsigned long int divn = 3; + unsigned long int sq = divn * divn; + + while (sq < candidate && candidate % divn != 0) + { + ++divn; + sq += 4 * divn; + ++divn; + } + + return candidate % divn != 0; +} + + +static uint32_t +next_prime (uint32_t seed) +{ + /* Make it definitely odd. */ + seed |= 1; + + while (!is_prime (seed)) + seed += 2; + + return seed; +} + + +/* Format of the output file. + + Offset Length Description + 0000 4 Magic header bytes + 0004 2 Offset of string table (stoff) + 0006 2 Offset of name hashing table (hoff) + 0008 2 Hashing table size (hsize) + 000A 2 Offset of module table (moff) + 000C 2 Offset of other conversion module table (ooff) + + stoff ??? String table + + hoff 8*hsize Array of tuples + string table offset + module index + + moff ??? Array of tuples + canonical name offset + from-internal module dir name offset + from-internal module name off + to-internal module dir name offset + to-internal module name offset + offset into other conversion table + + ooff ??? One or more of + number of steps/modules + one or more of tuple + canonical name offset for output + module dir name offset + module name offset + (following last entry with step count 0) +*/ + +static struct hash_entry *hash_table; +static size_t hash_size; + +/* Function to insert the names. */ +static void name_insert (const void *nodep, VISIT value, int level) +{ + struct name *name; + unsigned int idx; + unsigned int hval2; + + if (value != leaf && value != postorder) + return; + + name = *(struct name **) nodep; + idx = name->hashval % hash_size; + hval2 = 1 + name->hashval % (hash_size - 2); + + while (hash_table[idx].string_offset != 0) + if ((idx += hval2) >= hash_size) + idx -= hash_size; + + hash_table[idx].string_offset = strtaboffset (name->strent); + + assert (name->module_idx != -1); + hash_table[idx].module_idx = name->module_idx; +} + +static int +write_output (void) +{ + int fd; + char *string_table; + size_t string_table_size; + struct gconvcache_header header; + struct module_entry *module_table; + char *extra_table; + char *cur_extra_table; + size_t n; + int idx; + struct iovec iov[6]; + static const gidx_t null_word; + size_t total; + char finalname[prefix_len + sizeof GCONV_MODULES_CACHE]; + char tmpfname[(output_file == NULL ? sizeof finalname : output_file_len + 1) + + strlen (".XXXXXX")]; + + /* Open the output file. */ + if (output_file == NULL) + { + assert (GCONV_MODULES_CACHE[0] == '/'); + strcpy (stpcpy (mempcpy (tmpfname, prefix, prefix_len), + GCONV_MODULES_CACHE), + ".XXXXXX"); + strcpy (mempcpy (finalname, prefix, prefix_len), GCONV_MODULES_CACHE); + } + else + strcpy (mempcpy (tmpfname, output_file, output_file_len), ".XXXXXX"); + fd = mkstemp (tmpfname); + if (fd == -1) + return 1; + + /* Create the string table. */ + string_table = strtabfinalize (strtab, &string_table_size); + + /* Create the hashing table. We know how many strings we have. + Creating a perfect hash table is not reasonable here. Therefore + we use open hashing and a table size which is the next prime 40% + larger than the number of strings. */ + hash_size = next_prime (nnames * 1.4); + hash_table = (struct hash_entry *) xcalloc (hash_size, + sizeof (struct hash_entry)); + /* Fill the hash table. */ + twalk (names, name_insert); + + /* Create the section for the module list. */ + module_table = (struct module_entry *) xcalloc (sizeof (struct module_entry), + nname_info); + + /* Allocate memory for the non-INTERNAL conversions. The allocated + memory can be more than is actually needed. */ + extra_table = (char *) xcalloc (sizeof (struct extra_entry) + + sizeof (gidx_t) + + sizeof (struct extra_entry_module), + nextra_modules); + cur_extra_table = extra_table; + + /* Fill in the module information. */ + for (n = 0; n < nname_info; ++n) + { + module_table[n].canonname_offset = + strtaboffset (name_info[n].canonical_strent); + + if (name_info[n].from_internal == NULL) + { + module_table[n].fromdir_offset = 0; + module_table[n].fromname_offset = 0; + } + else + { + module_table[n].fromdir_offset = + strtaboffset (name_info[n].from_internal->directory_strent); + module_table[n].fromname_offset = + strtaboffset (name_info[n].from_internal->filename_strent); + } + + if (name_info[n].to_internal == NULL) + { + module_table[n].todir_offset = 0; + module_table[n].toname_offset = 0; + } + else + { + module_table[n].todir_offset = + strtaboffset (name_info[n].to_internal->directory_strent); + module_table[n].toname_offset = + strtaboffset (name_info[n].to_internal->filename_strent); + } + + if (name_info[n].other_conv_list != NULL) + { + struct other_conv_list *other = name_info[n].other_conv_list; + + /* Store the reference. We add 1 to distinguish the entry + at offset zero from the case where no extra modules are + available. The file reader has to account for the + offset. */ + module_table[n].extra_offset = 1 + cur_extra_table - extra_table; + + do + { + struct other_conv *runp; + struct extra_entry *extra; + + /* Allocate new entry. */ + extra = (struct extra_entry *) cur_extra_table; + cur_extra_table += sizeof (struct extra_entry); + extra->module_cnt = 0; + + runp = &other->other_conv; + do + { + cur_extra_table += sizeof (struct extra_entry_module); + extra->module[extra->module_cnt].outname_offset = + runp->next == NULL + ? other->dest_idx : runp->next->module_idx; + extra->module[extra->module_cnt].dir_offset = + strtaboffset (runp->module->directory_strent); + extra->module[extra->module_cnt].name_offset = + strtaboffset (runp->module->filename_strent); + ++extra->module_cnt; + + runp = runp->next; + } + while (runp != NULL); + + other = other->next; + } + while (other != NULL); + + /* Final module_cnt is zero. */ + *((gidx_t *) cur_extra_table) = 0; + cur_extra_table += sizeof (gidx_t); + } + } + + /* Clear padding. */ + memset (&header, 0, sizeof (struct gconvcache_header)); + + header.magic = GCONVCACHE_MAGIC; + + iov[0].iov_base = &header; + iov[0].iov_len = sizeof (struct gconvcache_header); + total = iov[0].iov_len; + + header.string_offset = total; + iov[1].iov_base = string_table; + iov[1].iov_len = string_table_size; + total += iov[1].iov_len; + + idx = 2; + if ((string_table_size & (sizeof (gidx_t) - 1)) != 0) + { + iov[2].iov_base = (void *) &null_word; + iov[2].iov_len = (sizeof (gidx_t) + - (string_table_size & (sizeof (gidx_t) - 1))); + total += iov[2].iov_len; + ++idx; + } + + header.hash_offset = total; + header.hash_size = hash_size; + iov[idx].iov_base = hash_table; + iov[idx].iov_len = hash_size * sizeof (struct hash_entry); + total += iov[idx].iov_len; + ++idx; + + header.module_offset = total; + iov[idx].iov_base = module_table; + iov[idx].iov_len = nname_info * sizeof (struct module_entry); + total += iov[idx].iov_len; + ++idx; + + assert ((size_t) (cur_extra_table - extra_table) + <= ((sizeof (struct extra_entry) + sizeof (gidx_t) + + sizeof (struct extra_entry_module)) + * nextra_modules)); + header.otherconv_offset = total; + iov[idx].iov_base = extra_table; + iov[idx].iov_len = cur_extra_table - extra_table; + total += iov[idx].iov_len; + ++idx; + + if ((size_t) TEMP_FAILURE_RETRY (writev (fd, iov, idx)) != total + /* The file was created with mode 0600. Make it world-readable. */ + || fchmod (fd, 0644) != 0 + /* Rename the file, possibly replacing an old one. */ + || rename (tmpfname, output_file ?: finalname) != 0) + { + int save_errno = errno; + close (fd); + unlink (tmpfname); + error (EXIT_FAILURE, save_errno, + gettext ("cannot generate output file")); + } + + close (fd); + + return 0; +} diff --git a/REORG.TODO/iconv/iconvconfig.h b/REORG.TODO/iconv/iconvconfig.h new file mode 100644 index 0000000000..3f9fbdb1f4 --- /dev/null +++ b/REORG.TODO/iconv/iconvconfig.h @@ -0,0 +1,66 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@redhat.com>, 2000. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#include <stdint.h> + + +typedef uint16_t gidx_t; + + +struct gconvcache_header +{ + uint32_t magic; + gidx_t string_offset; + gidx_t hash_offset; + gidx_t hash_size; + gidx_t module_offset; + gidx_t otherconv_offset; +}; + +struct hash_entry +{ + gidx_t string_offset; + gidx_t module_idx; +}; + +struct module_entry +{ + gidx_t canonname_offset; + gidx_t fromdir_offset; + gidx_t fromname_offset; + gidx_t todir_offset; + gidx_t toname_offset; + gidx_t extra_offset; +}; + +struct extra_entry +{ + gidx_t module_cnt; + struct extra_entry_module + { + gidx_t outname_offset; + gidx_t dir_offset; + gidx_t name_offset; + } module[0]; +}; + + +#define GCONVCACHE_MAGIC 0x20010324 + + +#define GCONV_MODULES_CACHE GCONV_DIR "/gconv-modules.cache" diff --git a/REORG.TODO/iconv/loop.c b/REORG.TODO/iconv/loop.c new file mode 100644 index 0000000000..0160f72cd6 --- /dev/null +++ b/REORG.TODO/iconv/loop.c @@ -0,0 +1,523 @@ +/* Conversion loop frame work. + Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* This file provides a frame for the reader loop in all conversion modules. + The actual code must (of course) be provided in the actual module source + code but certain actions can be written down generically, with some + customization options which are these: + + MIN_NEEDED_INPUT minimal number of input bytes needed for the next + conversion. + MIN_NEEDED_OUTPUT minimal number of bytes produced by the next round + of conversion. + + MAX_NEEDED_INPUT you guess it, this is the maximal number of input + bytes needed. It defaults to MIN_NEEDED_INPUT + MAX_NEEDED_OUTPUT likewise for output bytes. + + LOOPFCT name of the function created. If not specified + the name is `loop' but this prevents the use + of multiple functions in the same file. + + BODY this is supposed to expand to the body of the loop. + The user must provide this. + + EXTRA_LOOP_DECLS extra arguments passed from conversion loop call. + + INIT_PARAMS code to define and initialize variables from params. + UPDATE_PARAMS code to store result in params. + + ONEBYTE_BODY body of the specialized conversion function for a + single byte from the current character set to INTERNAL. +*/ + +#include <assert.h> +#include <endian.h> +#include <gconv.h> +#include <stdint.h> +#include <string.h> +#include <wchar.h> +#include <sys/param.h> /* For MIN. */ +#define __need_size_t +#include <stddef.h> +#include <libc-diag.h> + +/* We have to provide support for machines which are not able to handled + unaligned memory accesses. Some of the character encodings have + representations with a fixed width of 2 or 4 bytes. But if we cannot + access unaligned memory we still have to read byte-wise. */ +#undef FCTNAME2 +#if _STRING_ARCH_unaligned || !defined DEFINE_UNALIGNED +/* We can handle unaligned memory access. */ +# define get16(addr) *((const uint16_t *) (addr)) +# define get32(addr) *((const uint32_t *) (addr)) + +/* We need no special support for writing values either. */ +# define put16(addr, val) *((uint16_t *) (addr)) = (val) +# define put32(addr, val) *((uint32_t *) (addr)) = (val) + +# define FCTNAME2(name) name +#else +/* Distinguish between big endian and little endian. */ +# if __BYTE_ORDER == __LITTLE_ENDIAN +# define get16(addr) \ + (((const unsigned char *) (addr))[1] << 8 \ + | ((const unsigned char *) (addr))[0]) +# define get32(addr) \ + (((((const unsigned char *) (addr))[3] << 8 \ + | ((const unsigned char *) (addr))[2]) << 8 \ + | ((const unsigned char *) (addr))[1]) << 8 \ + | ((const unsigned char *) (addr))[0]) + +# define put16(addr, val) \ + ({ uint16_t __val = (val); \ + ((unsigned char *) (addr))[0] = __val; \ + ((unsigned char *) (addr))[1] = __val >> 8; \ + (void) 0; }) +# define put32(addr, val) \ + ({ uint32_t __val = (val); \ + ((unsigned char *) (addr))[0] = __val; \ + __val >>= 8; \ + ((unsigned char *) (addr))[1] = __val; \ + __val >>= 8; \ + ((unsigned char *) (addr))[2] = __val; \ + __val >>= 8; \ + ((unsigned char *) (addr))[3] = __val; \ + (void) 0; }) +# else +# define get16(addr) \ + (((const unsigned char *) (addr))[0] << 8 \ + | ((const unsigned char *) (addr))[1]) +# define get32(addr) \ + (((((const unsigned char *) (addr))[0] << 8 \ + | ((const unsigned char *) (addr))[1]) << 8 \ + | ((const unsigned char *) (addr))[2]) << 8 \ + | ((const unsigned char *) (addr))[3]) + +# define put16(addr, val) \ + ({ uint16_t __val = (val); \ + ((unsigned char *) (addr))[1] = __val; \ + ((unsigned char *) (addr))[0] = __val >> 8; \ + (void) 0; }) +# define put32(addr, val) \ + ({ uint32_t __val = (val); \ + ((unsigned char *) (addr))[3] = __val; \ + __val >>= 8; \ + ((unsigned char *) (addr))[2] = __val; \ + __val >>= 8; \ + ((unsigned char *) (addr))[1] = __val; \ + __val >>= 8; \ + ((unsigned char *) (addr))[0] = __val; \ + (void) 0; }) +# endif + +# define FCTNAME2(name) name##_unaligned +#endif +#define FCTNAME(name) FCTNAME2(name) + + +/* We need at least one byte for the next round. */ +#ifndef MIN_NEEDED_INPUT +# error "MIN_NEEDED_INPUT definition missing" +#elif MIN_NEEDED_INPUT < 1 +# error "MIN_NEEDED_INPUT must be >= 1" +#endif + +/* Let's see how many bytes we produce. */ +#ifndef MAX_NEEDED_INPUT +# define MAX_NEEDED_INPUT MIN_NEEDED_INPUT +#endif + +/* We produce at least one byte in the next round. */ +#ifndef MIN_NEEDED_OUTPUT +# error "MIN_NEEDED_OUTPUT definition missing" +#elif MIN_NEEDED_OUTPUT < 1 +# error "MIN_NEEDED_OUTPUT must be >= 1" +#endif + +/* Let's see how many bytes we produce. */ +#ifndef MAX_NEEDED_OUTPUT +# define MAX_NEEDED_OUTPUT MIN_NEEDED_OUTPUT +#endif + +/* Default name for the function. */ +#ifndef LOOPFCT +# define LOOPFCT loop +#endif + +/* Make sure we have a loop body. */ +#ifndef BODY +# error "Definition of BODY missing for function" LOOPFCT +#endif + + +/* If no arguments have to passed to the loop function define the macro + as empty. */ +#ifndef EXTRA_LOOP_DECLS +# define EXTRA_LOOP_DECLS +#endif + +/* Allow using UPDATE_PARAMS in macros where #ifdef UPDATE_PARAMS test + isn't possible. */ +#ifndef UPDATE_PARAMS +# define UPDATE_PARAMS do { } while (0) +#endif +#ifndef REINIT_PARAMS +# define REINIT_PARAMS do { } while (0) +#endif + + +/* To make it easier for the writers of the modules, we define a macro + to test whether we have to ignore errors. */ +#define ignore_errors_p() \ + (irreversible != NULL && (flags & __GCONV_IGNORE_ERRORS)) + + +/* Error handling for the FROM_LOOP direction, with ignoring of errors. + Note that we cannot use the do while (0) trick since `break' and + `continue' must reach certain points. */ +#define STANDARD_FROM_LOOP_ERR_HANDLER(Incr) \ + { \ + result = __GCONV_ILLEGAL_INPUT; \ + \ + if (! ignore_errors_p ()) \ + break; \ + \ + /* We ignore the invalid input byte sequence. */ \ + inptr += (Incr); \ + ++*irreversible; \ + /* But we keep result == __GCONV_ILLEGAL_INPUT, because of the constraint \ + that "iconv -c" must give the same exitcode as "iconv". */ \ + continue; \ + } + +/* Error handling for the TO_LOOP direction, with use of transliteration/ + transcription functions and ignoring of errors. Note that we cannot use + the do while (0) trick since `break' and `continue' must reach certain + points. */ +#define STANDARD_TO_LOOP_ERR_HANDLER(Incr) \ + { \ + result = __GCONV_ILLEGAL_INPUT; \ + \ + if (irreversible == NULL) \ + /* This means we are in call from __gconv_transliterate. In this \ + case we are not doing any error recovery outself. */ \ + break; \ + \ + /* If needed, flush any conversion state, so that __gconv_transliterate \ + starts with current shift state. */ \ + UPDATE_PARAMS; \ + \ + /* First try the transliteration methods. */ \ + if ((step_data->__flags & __GCONV_TRANSLIT) != 0) \ + result = __gconv_transliterate \ + (step, step_data, *inptrp, \ + &inptr, inend, &outptr, irreversible); \ + \ + REINIT_PARAMS; \ + \ + /* If any of them recognized the input continue with the loop. */ \ + if (result != __GCONV_ILLEGAL_INPUT) \ + { \ + if (__glibc_unlikely (result == __GCONV_FULL_OUTPUT)) \ + break; \ + \ + continue; \ + } \ + \ + /* Next see whether we have to ignore the error. If not, stop. */ \ + if (! ignore_errors_p ()) \ + break; \ + \ + /* When we come here it means we ignore the character. */ \ + ++*irreversible; \ + inptr += Incr; \ + /* But we keep result == __GCONV_ILLEGAL_INPUT, because of the constraint \ + that "iconv -c" must give the same exitcode as "iconv". */ \ + continue; \ + } + + +/* Handling of Unicode 3.1 TAG characters. Unicode recommends + "If language codes are not relevant to the particular processing + operation, then they should be ignored." This macro is usually + called right before STANDARD_TO_LOOP_ERR_HANDLER (Incr). */ +#define UNICODE_TAG_HANDLER(Character, Incr) \ + { \ + /* TAG characters are those in the range U+E0000..U+E007F. */ \ + if (((Character) >> 7) == (0xe0000 >> 7)) \ + { \ + inptr += Incr; \ + continue; \ + } \ + } + + +/* The function returns the status, as defined in gconv.h. */ +static inline int +__attribute ((always_inline)) +FCTNAME (LOOPFCT) (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char **inptrp, const unsigned char *inend, + unsigned char **outptrp, const unsigned char *outend, + size_t *irreversible EXTRA_LOOP_DECLS) +{ +#ifdef LOOP_NEED_STATE + mbstate_t *state = step_data->__statep; +#endif +#ifdef LOOP_NEED_FLAGS + int flags = step_data->__flags; +#endif +#ifdef LOOP_NEED_DATA + void *data = step->__data; +#endif + int result = __GCONV_EMPTY_INPUT; + const unsigned char *inptr = *inptrp; + unsigned char *outptr = *outptrp; + +#ifdef INIT_PARAMS + INIT_PARAMS; +#endif + + while (inptr != inend) + { + /* `if' cases for MIN_NEEDED_OUTPUT ==/!= 1 is made to help the + compiler generating better code. They will be optimized away + since MIN_NEEDED_OUTPUT is always a constant. */ + if (MIN_NEEDED_INPUT > 1 + && __builtin_expect (inptr + MIN_NEEDED_INPUT > inend, 0)) + { + /* We don't have enough input for another complete input + character. */ + result = __GCONV_INCOMPLETE_INPUT; + break; + } + if ((MIN_NEEDED_OUTPUT != 1 + && __builtin_expect (outptr + MIN_NEEDED_OUTPUT > outend, 0)) + || (MIN_NEEDED_OUTPUT == 1 + && __builtin_expect (outptr >= outend, 0))) + { + /* Overflow in the output buffer. */ + result = __GCONV_FULL_OUTPUT; + break; + } + + /* Here comes the body the user provides. It can stop with + RESULT set to GCONV_INCOMPLETE_INPUT (if the size of the + input characters vary in size), GCONV_ILLEGAL_INPUT, or + GCONV_FULL_OUTPUT (if the output characters vary in size). */ + BODY + } + + /* Update the pointers pointed to by the parameters. */ + *inptrp = inptr; + *outptrp = outptr; + UPDATE_PARAMS; + + return result; +} + + +/* Include the file a second time to define the function to handle + unaligned access. */ +#if !defined DEFINE_UNALIGNED && !_STRING_ARCH_unaligned \ + && MIN_NEEDED_INPUT != 1 && MAX_NEEDED_INPUT % MIN_NEEDED_INPUT == 0 \ + && MIN_NEEDED_OUTPUT != 1 && MAX_NEEDED_OUTPUT % MIN_NEEDED_OUTPUT == 0 +# undef get16 +# undef get32 +# undef put16 +# undef put32 +# undef unaligned + +# define DEFINE_UNALIGNED +# include "loop.c" +# undef DEFINE_UNALIGNED +#else +# if MAX_NEEDED_INPUT > 1 +# define SINGLE(fct) SINGLE2 (fct) +# define SINGLE2(fct) fct##_single +static inline int +__attribute ((always_inline)) +SINGLE(LOOPFCT) (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char **inptrp, const unsigned char *inend, + unsigned char **outptrp, unsigned char *outend, + size_t *irreversible EXTRA_LOOP_DECLS) +{ + mbstate_t *state = step_data->__statep; +# ifdef LOOP_NEED_FLAGS + int flags = step_data->__flags; +# endif +# ifdef LOOP_NEED_DATA + void *data = step->__data; +# endif + int result = __GCONV_OK; + unsigned char bytebuf[MAX_NEEDED_INPUT]; + const unsigned char *inptr = *inptrp; + unsigned char *outptr = *outptrp; + size_t inlen; + +# ifdef INIT_PARAMS + INIT_PARAMS; +# endif + +# ifdef UNPACK_BYTES + UNPACK_BYTES +# else + /* Add the bytes from the state to the input buffer. */ + assert ((state->__count & 7) <= sizeof (state->__value)); + for (inlen = 0; inlen < (size_t) (state->__count & 7); ++inlen) + bytebuf[inlen] = state->__value.__wchb[inlen]; +# endif + + /* Are there enough bytes in the input buffer? */ + if (MIN_NEEDED_INPUT > 1 + && __builtin_expect (inptr + (MIN_NEEDED_INPUT - inlen) > inend, 0)) + { + *inptrp = inend; +# ifdef STORE_REST + + /* Building with -O3 GCC emits a `array subscript is above array + bounds' warning. GCC BZ #64739 has been opened for this. */ + DIAG_PUSH_NEEDS_COMMENT; + DIAG_IGNORE_NEEDS_COMMENT (4.9, "-Warray-bounds"); + while (inptr < inend) + bytebuf[inlen++] = *inptr++; + DIAG_POP_NEEDS_COMMENT; + + inptr = bytebuf; + inptrp = &inptr; + inend = &bytebuf[inlen]; + + STORE_REST +# else + /* We don't have enough input for another complete input + character. */ + while (inptr < inend) + state->__value.__wchb[inlen++] = *inptr++; +# endif + + return __GCONV_INCOMPLETE_INPUT; + } + + /* Enough space in output buffer. */ + if ((MIN_NEEDED_OUTPUT != 1 && outptr + MIN_NEEDED_OUTPUT > outend) + || (MIN_NEEDED_OUTPUT == 1 && outptr >= outend)) + /* Overflow in the output buffer. */ + return __GCONV_FULL_OUTPUT; + + /* Now add characters from the normal input buffer. */ + do + bytebuf[inlen++] = *inptr++; + while (inlen < MAX_NEEDED_INPUT && inptr < inend); + + inptr = bytebuf; + inend = &bytebuf[inlen]; + + do + { + BODY + } + while (0); + + /* Now we either have produced an output character and consumed all the + bytes from the state and at least one more, or the character is still + incomplete, or we have some other error (like illegal input character, + no space in output buffer). */ + if (__glibc_likely (inptr != bytebuf)) + { + /* We found a new character. */ + assert (inptr - bytebuf > (state->__count & 7)); + + *inptrp += inptr - bytebuf - (state->__count & 7); + *outptrp = outptr; + + result = __GCONV_OK; + + /* Clear the state buffer. */ +# ifdef CLEAR_STATE + CLEAR_STATE; +# else + state->__count &= ~7; +# endif + } + else if (result == __GCONV_INCOMPLETE_INPUT) + { + /* This can only happen if we have less than MAX_NEEDED_INPUT bytes + available. */ + assert (inend != &bytebuf[MAX_NEEDED_INPUT]); + + *inptrp += inend - bytebuf - (state->__count & 7); +# ifdef STORE_REST + inptrp = &inptr; + + STORE_REST +# else + /* We don't have enough input for another complete input + character. */ + assert (inend - inptr > (state->__count & ~7)); + assert (inend - inptr <= sizeof (state->__value)); + state->__count = (state->__count & ~7) | (inend - inptr); + inlen = 0; + while (inptr < inend) + state->__value.__wchb[inlen++] = *inptr++; +# endif + } + + return result; +} +# undef SINGLE +# undef SINGLE2 +# endif + + +# ifdef ONEBYTE_BODY +/* Define the shortcut function for btowc. */ +static wint_t +gconv_btowc (struct __gconv_step *step, unsigned char c) + ONEBYTE_BODY +# define FROM_ONEBYTE gconv_btowc +# endif + +#endif + +/* We remove the macro definitions so that we can include this file again + for the definition of another function. */ +#undef MIN_NEEDED_INPUT +#undef MAX_NEEDED_INPUT +#undef MIN_NEEDED_OUTPUT +#undef MAX_NEEDED_OUTPUT +#undef LOOPFCT +#undef BODY +#undef LOOPFCT +#undef EXTRA_LOOP_DECLS +#undef INIT_PARAMS +#undef UPDATE_PARAMS +#undef REINIT_PARAMS +#undef ONEBYTE_BODY +#undef UNPACK_BYTES +#undef CLEAR_STATE +#undef LOOP_NEED_STATE +#undef LOOP_NEED_FLAGS +#undef LOOP_NEED_DATA +#undef get16 +#undef get32 +#undef put16 +#undef put32 +#undef unaligned diff --git a/REORG.TODO/iconv/skeleton.c b/REORG.TODO/iconv/skeleton.c new file mode 100644 index 0000000000..a12119dc20 --- /dev/null +++ b/REORG.TODO/iconv/skeleton.c @@ -0,0 +1,821 @@ +/* Skeleton for a conversion module. + Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* This file can be included to provide definitions of several things + many modules have in common. It can be customized using the following + macros: + + DEFINE_INIT define the default initializer. This requires the + following symbol to be defined. + + CHARSET_NAME string with official name of the coded character + set (in all-caps) + + DEFINE_FINI define the default destructor function. + + MIN_NEEDED_FROM minimal number of bytes needed for the from-charset. + MIN_NEEDED_TO likewise for the to-charset. + + MAX_NEEDED_FROM maximal number of bytes needed for the from-charset. + This macro is optional, it defaults to MIN_NEEDED_FROM. + MAX_NEEDED_TO likewise for the to-charset. + + FROM_LOOP_MIN_NEEDED_FROM + FROM_LOOP_MAX_NEEDED_FROM + minimal/maximal number of bytes needed on input + of one round through the FROM_LOOP. Defaults + to MIN_NEEDED_FROM and MAX_NEEDED_FROM, respectively. + FROM_LOOP_MIN_NEEDED_TO + FROM_LOOP_MAX_NEEDED_TO + minimal/maximal number of bytes needed on output + of one round through the FROM_LOOP. Defaults + to MIN_NEEDED_TO and MAX_NEEDED_TO, respectively. + TO_LOOP_MIN_NEEDED_FROM + TO_LOOP_MAX_NEEDED_FROM + minimal/maximal number of bytes needed on input + of one round through the TO_LOOP. Defaults + to MIN_NEEDED_TO and MAX_NEEDED_TO, respectively. + TO_LOOP_MIN_NEEDED_TO + TO_LOOP_MAX_NEEDED_TO + minimal/maximal number of bytes needed on output + of one round through the TO_LOOP. Defaults + to MIN_NEEDED_FROM and MAX_NEEDED_FROM, respectively. + + FROM_DIRECTION this macro is supposed to return a value != 0 + if we convert from the current character set, + otherwise it return 0. + + EMIT_SHIFT_TO_INIT this symbol is optional. If it is defined it + defines some code which writes out a sequence + of bytes which bring the current state into + the initial state. + + FROM_LOOP name of the function implementing the conversion + from the current character set. + TO_LOOP likewise for the other direction + + ONE_DIRECTION optional. If defined to 1, only one conversion + direction is defined instead of two. In this + case, FROM_DIRECTION should be defined to 1, and + FROM_LOOP and TO_LOOP should have the same value. + + SAVE_RESET_STATE in case of an error we must reset the state for + the rerun so this macro must be defined for + stateful encodings. It takes an argument which + is nonzero when saving. + + RESET_INPUT_BUFFER If the input character sets allow this the macro + can be defined to reset the input buffer pointers + to cover only those characters up to the error. + + FUNCTION_NAME if not set the conversion function is named `gconv'. + + PREPARE_LOOP optional code preparing the conversion loop. Can + contain variable definitions. + END_LOOP also optional, may be used to store information + + EXTRA_LOOP_ARGS optional macro specifying extra arguments passed + to loop function. + + STORE_REST optional, needed only when MAX_NEEDED_FROM > 4. + This macro stores the seen but unconverted input bytes + in the state. + + FROM_ONEBYTE optional. If defined, should be the name of a + specialized conversion function for a single byte + from the current character set to INTERNAL. This + function has prototype + wint_t + FROM_ONEBYTE (struct __gconv_step *, unsigned char); + and does a special conversion: + - The input is a single byte. + - The output is a single uint32_t. + - The state before the conversion is the initial state; + the state after the conversion is irrelevant. + - No transliteration. + - __invocation_counter = 0. + - __internal_use = 1. + - do_flush = 0. + + Modules can use mbstate_t to store conversion state as follows: + + * Bits 2..0 of '__count' contain the number of lookahead input bytes + stored in __value.__wchb. Always zero if the converter never + returns __GCONV_INCOMPLETE_INPUT. + + * Bits 31..3 of '__count' are module dependent shift state. + + * __value: When STORE_REST/UNPACK_BYTES aren't defined and when the + converter has returned __GCONV_INCOMPLETE_INPUT, this contains + at most 4 lookahead bytes. Converters with an mb_cur_max > 4 + (currently only UTF-8) must find a way to store their state + in __value.__wch and define STORE_REST/UNPACK_BYTES appropriately. + + When __value contains lookahead, __count must not be zero, because + the converter is not in the initial state then, and mbsinit() -- + defined as a (__count == 0) test -- must reflect this. + */ + +#include <assert.h> +#include <gconv.h> +#include <string.h> +#define __need_size_t +#define __need_NULL +#include <stddef.h> + +#ifndef STATIC_GCONV +# include <dlfcn.h> +#endif + +#include <sysdep.h> +#include <stdint.h> + +#ifndef DL_CALL_FCT +# define DL_CALL_FCT(fct, args) fct args +#endif + +/* The direction objects. */ +#if DEFINE_INIT +# ifndef FROM_DIRECTION +# define FROM_DIRECTION_VAL NULL +# define TO_DIRECTION_VAL ((void *) ~((uintptr_t) 0)) +# define FROM_DIRECTION (step->__data == FROM_DIRECTION_VAL) +# endif +#else +# ifndef FROM_DIRECTION +# error "FROM_DIRECTION must be provided if non-default init is used" +# endif +#endif + +/* How many bytes are needed at most for the from-charset. */ +#ifndef MAX_NEEDED_FROM +# define MAX_NEEDED_FROM MIN_NEEDED_FROM +#endif + +/* Same for the to-charset. */ +#ifndef MAX_NEEDED_TO +# define MAX_NEEDED_TO MIN_NEEDED_TO +#endif + +/* Defaults for the per-direction min/max constants. */ +#ifndef FROM_LOOP_MIN_NEEDED_FROM +# define FROM_LOOP_MIN_NEEDED_FROM MIN_NEEDED_FROM +#endif +#ifndef FROM_LOOP_MAX_NEEDED_FROM +# define FROM_LOOP_MAX_NEEDED_FROM MAX_NEEDED_FROM +#endif +#ifndef FROM_LOOP_MIN_NEEDED_TO +# define FROM_LOOP_MIN_NEEDED_TO MIN_NEEDED_TO +#endif +#ifndef FROM_LOOP_MAX_NEEDED_TO +# define FROM_LOOP_MAX_NEEDED_TO MAX_NEEDED_TO +#endif +#ifndef TO_LOOP_MIN_NEEDED_FROM +# define TO_LOOP_MIN_NEEDED_FROM MIN_NEEDED_TO +#endif +#ifndef TO_LOOP_MAX_NEEDED_FROM +# define TO_LOOP_MAX_NEEDED_FROM MAX_NEEDED_TO +#endif +#ifndef TO_LOOP_MIN_NEEDED_TO +# define TO_LOOP_MIN_NEEDED_TO MIN_NEEDED_FROM +#endif +#ifndef TO_LOOP_MAX_NEEDED_TO +# define TO_LOOP_MAX_NEEDED_TO MAX_NEEDED_FROM +#endif + + +/* Define macros which can access unaligned buffers. These macros are + supposed to be used only in code outside the inner loops. For the inner + loops we have other definitions which allow optimized access. */ +#if _STRING_ARCH_unaligned +/* We can handle unaligned memory access. */ +# define get16u(addr) *((const uint16_t *) (addr)) +# define get32u(addr) *((const uint32_t *) (addr)) + +/* We need no special support for writing values either. */ +# define put16u(addr, val) *((uint16_t *) (addr)) = (val) +# define put32u(addr, val) *((uint32_t *) (addr)) = (val) +#else +/* Distinguish between big endian and little endian. */ +# if __BYTE_ORDER == __LITTLE_ENDIAN +# define get16u(addr) \ + (((const unsigned char *) (addr))[1] << 8 \ + | ((const unsigned char *) (addr))[0]) +# define get32u(addr) \ + (((((const unsigned char *) (addr))[3] << 8 \ + | ((const unsigned char *) (addr))[2]) << 8 \ + | ((const unsigned char *) (addr))[1]) << 8 \ + | ((const unsigned char *) (addr))[0]) + +# define put16u(addr, val) \ + ({ uint16_t __val = (val); \ + ((unsigned char *) (addr))[0] = __val; \ + ((unsigned char *) (addr))[1] = __val >> 8; \ + (void) 0; }) +# define put32u(addr, val) \ + ({ uint32_t __val = (val); \ + ((unsigned char *) (addr))[0] = __val; \ + __val >>= 8; \ + ((unsigned char *) (addr))[1] = __val; \ + __val >>= 8; \ + ((unsigned char *) (addr))[2] = __val; \ + __val >>= 8; \ + ((unsigned char *) (addr))[3] = __val; \ + (void) 0; }) +# else +# define get16u(addr) \ + (((const unsigned char *) (addr))[0] << 8 \ + | ((const unsigned char *) (addr))[1]) +# define get32u(addr) \ + (((((const unsigned char *) (addr))[0] << 8 \ + | ((const unsigned char *) (addr))[1]) << 8 \ + | ((const unsigned char *) (addr))[2]) << 8 \ + | ((const unsigned char *) (addr))[3]) + +# define put16u(addr, val) \ + ({ uint16_t __val = (val); \ + ((unsigned char *) (addr))[1] = __val; \ + ((unsigned char *) (addr))[0] = __val >> 8; \ + (void) 0; }) +# define put32u(addr, val) \ + ({ uint32_t __val = (val); \ + ((unsigned char *) (addr))[3] = __val; \ + __val >>= 8; \ + ((unsigned char *) (addr))[2] = __val; \ + __val >>= 8; \ + ((unsigned char *) (addr))[1] = __val; \ + __val >>= 8; \ + ((unsigned char *) (addr))[0] = __val; \ + (void) 0; }) +# endif +#endif + + +/* For conversions from a fixed width character set to another fixed width + character set we can define RESET_INPUT_BUFFER in a very fast way. */ +#if !defined RESET_INPUT_BUFFER && !defined SAVE_RESET_STATE +# if FROM_LOOP_MIN_NEEDED_FROM == FROM_LOOP_MAX_NEEDED_FROM \ + && FROM_LOOP_MIN_NEEDED_TO == FROM_LOOP_MAX_NEEDED_TO \ + && TO_LOOP_MIN_NEEDED_FROM == TO_LOOP_MAX_NEEDED_FROM \ + && TO_LOOP_MIN_NEEDED_TO == TO_LOOP_MAX_NEEDED_TO +/* We have to use these `if's here since the compiler cannot know that + (outbuf - outerr) is always divisible by FROM/TO_LOOP_MIN_NEEDED_TO. + The ?:1 avoids division by zero warnings that gcc 3.2 emits even for + obviously unreachable code. */ +# define RESET_INPUT_BUFFER \ + if (FROM_DIRECTION) \ + { \ + if (FROM_LOOP_MIN_NEEDED_FROM % FROM_LOOP_MIN_NEEDED_TO == 0) \ + *inptrp -= (outbuf - outerr) \ + * (FROM_LOOP_MIN_NEEDED_FROM / FROM_LOOP_MIN_NEEDED_TO); \ + else if (FROM_LOOP_MIN_NEEDED_TO % FROM_LOOP_MIN_NEEDED_FROM == 0) \ + *inptrp -= (outbuf - outerr) \ + / (FROM_LOOP_MIN_NEEDED_TO / FROM_LOOP_MIN_NEEDED_FROM \ + ? : 1); \ + else \ + *inptrp -= ((outbuf - outerr) / FROM_LOOP_MIN_NEEDED_TO) \ + * FROM_LOOP_MIN_NEEDED_FROM; \ + } \ + else \ + { \ + if (TO_LOOP_MIN_NEEDED_FROM % TO_LOOP_MIN_NEEDED_TO == 0) \ + *inptrp -= (outbuf - outerr) \ + * (TO_LOOP_MIN_NEEDED_FROM / TO_LOOP_MIN_NEEDED_TO); \ + else if (TO_LOOP_MIN_NEEDED_TO % TO_LOOP_MIN_NEEDED_FROM == 0) \ + *inptrp -= (outbuf - outerr) \ + / (TO_LOOP_MIN_NEEDED_TO / TO_LOOP_MIN_NEEDED_FROM ? : 1); \ + else \ + *inptrp -= ((outbuf - outerr) / TO_LOOP_MIN_NEEDED_TO) \ + * TO_LOOP_MIN_NEEDED_FROM; \ + } +# endif +#endif + + +/* The default init function. It simply matches the name and initializes + the step data to point to one of the objects above. */ +#if DEFINE_INIT +# ifndef CHARSET_NAME +# error "CHARSET_NAME not defined" +# endif + +extern int gconv_init (struct __gconv_step *step); +int +gconv_init (struct __gconv_step *step) +{ + /* Determine which direction. */ + if (strcmp (step->__from_name, CHARSET_NAME) == 0) + { + step->__data = FROM_DIRECTION_VAL; + + step->__min_needed_from = FROM_LOOP_MIN_NEEDED_FROM; + step->__max_needed_from = FROM_LOOP_MAX_NEEDED_FROM; + step->__min_needed_to = FROM_LOOP_MIN_NEEDED_TO; + step->__max_needed_to = FROM_LOOP_MAX_NEEDED_TO; + +#ifdef FROM_ONEBYTE + step->__btowc_fct = FROM_ONEBYTE; +#endif + } + else if (__builtin_expect (strcmp (step->__to_name, CHARSET_NAME), 0) == 0) + { + step->__data = TO_DIRECTION_VAL; + + step->__min_needed_from = TO_LOOP_MIN_NEEDED_FROM; + step->__max_needed_from = TO_LOOP_MAX_NEEDED_FROM; + step->__min_needed_to = TO_LOOP_MIN_NEEDED_TO; + step->__max_needed_to = TO_LOOP_MAX_NEEDED_TO; + } + else + return __GCONV_NOCONV; + +#ifdef SAVE_RESET_STATE + step->__stateful = 1; +#else + step->__stateful = 0; +#endif + + return __GCONV_OK; +} +#endif + + +/* The default destructor function does nothing in the moment and so + we don't define it at all. But we still provide the macro just in + case we need it some day. */ +#if DEFINE_FINI +#endif + + +/* If no arguments have to passed to the loop function define the macro + as empty. */ +#ifndef EXTRA_LOOP_ARGS +# define EXTRA_LOOP_ARGS +#endif + + +/* This is the actual conversion function. */ +#ifndef FUNCTION_NAME +# define FUNCTION_NAME gconv +#endif + +/* The macros are used to access the function to convert single characters. */ +#define SINGLE(fct) SINGLE2 (fct) +#define SINGLE2(fct) fct##_single + + +extern int FUNCTION_NAME (struct __gconv_step *step, + struct __gconv_step_data *data, + const unsigned char **inptrp, + const unsigned char *inend, + unsigned char **outbufstart, size_t *irreversible, + int do_flush, int consume_incomplete); +int +FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data, + const unsigned char **inptrp, const unsigned char *inend, + unsigned char **outbufstart, size_t *irreversible, int do_flush, + int consume_incomplete) +{ + struct __gconv_step *next_step = step + 1; + struct __gconv_step_data *next_data = data + 1; + __gconv_fct fct = NULL; + int status; + + if ((data->__flags & __GCONV_IS_LAST) == 0) + { + fct = next_step->__fct; +#ifdef PTR_DEMANGLE + if (next_step->__shlib_handle != NULL) + PTR_DEMANGLE (fct); +#endif + } + + /* If the function is called with no input this means we have to reset + to the initial state. The possibly partly converted input is + dropped. */ + if (__glibc_unlikely (do_flush)) + { + /* This should never happen during error handling. */ + assert (outbufstart == NULL); + + status = __GCONV_OK; + +#ifdef EMIT_SHIFT_TO_INIT + if (do_flush == 1) + { + /* We preserve the initial values of the pointer variables. */ + unsigned char *outbuf = data->__outbuf; + unsigned char *outstart = outbuf; + unsigned char *outend = data->__outbufend; + +# ifdef PREPARE_LOOP + PREPARE_LOOP +# endif + +# ifdef SAVE_RESET_STATE + SAVE_RESET_STATE (1); +# endif + + /* Emit the escape sequence to reset the state. */ + EMIT_SHIFT_TO_INIT; + + /* Call the steps down the chain if there are any but only if we + successfully emitted the escape sequence. This should only + fail if the output buffer is full. If the input is invalid + it should be discarded since the user wants to start from a + clean state. */ + if (status == __GCONV_OK) + { + if (data->__flags & __GCONV_IS_LAST) + /* Store information about how many bytes are available. */ + data->__outbuf = outbuf; + else + { + /* Write out all output which was produced. */ + if (outbuf > outstart) + { + const unsigned char *outerr = outstart; + int result; + + result = DL_CALL_FCT (fct, (next_step, next_data, + &outerr, outbuf, NULL, + irreversible, 0, + consume_incomplete)); + + if (result != __GCONV_EMPTY_INPUT) + { + if (__glibc_unlikely (outerr != outbuf)) + { + /* We have a problem. Undo the conversion. */ + outbuf = outstart; + + /* Restore the state. */ +# ifdef SAVE_RESET_STATE + SAVE_RESET_STATE (0); +# endif + } + + /* Change the status. */ + status = result; + } + } + + if (status == __GCONV_OK) + /* Now flush the remaining steps. */ + status = DL_CALL_FCT (fct, (next_step, next_data, NULL, + NULL, NULL, irreversible, 1, + consume_incomplete)); + } + } + } + else +#endif + { + /* Clear the state object. There might be bytes in there from + previous calls with CONSUME_INCOMPLETE == 1. But don't emit + escape sequences. */ + memset (data->__statep, '\0', sizeof (*data->__statep)); + + if (! (data->__flags & __GCONV_IS_LAST)) + /* Now flush the remaining steps. */ + status = DL_CALL_FCT (fct, (next_step, next_data, NULL, NULL, + NULL, irreversible, do_flush, + consume_incomplete)); + } + } + else + { + /* We preserve the initial values of the pointer variables, + but only some conversion modules need it. */ + const unsigned char *inptr __attribute__ ((__unused__)) = *inptrp; + unsigned char *outbuf = (__builtin_expect (outbufstart == NULL, 1) + ? data->__outbuf : *outbufstart); + unsigned char *outend = data->__outbufend; + unsigned char *outstart; + /* This variable is used to count the number of characters we + actually converted. */ + size_t lirreversible = 0; + size_t *lirreversiblep = irreversible ? &lirreversible : NULL; + + /* The following assumes that encodings, which have a variable length + what might unalign a buffer even though it is an aligned in the + beginning, either don't have the minimal number of bytes as a divisor + of the maximum length or have a minimum length of 1. This is true + for all known and supported encodings. + We use && instead of || to combine the subexpression for the FROM + encoding and for the TO encoding, because usually one of them is + INTERNAL, for which the subexpression evaluates to 1, but INTERNAL + buffers are always aligned correctly. */ +#define POSSIBLY_UNALIGNED \ + (!_STRING_ARCH_unaligned \ + && (((FROM_LOOP_MIN_NEEDED_FROM != 1 \ + && FROM_LOOP_MAX_NEEDED_FROM % FROM_LOOP_MIN_NEEDED_FROM == 0) \ + && (FROM_LOOP_MIN_NEEDED_TO != 1 \ + && FROM_LOOP_MAX_NEEDED_TO % FROM_LOOP_MIN_NEEDED_TO == 0)) \ + || ((TO_LOOP_MIN_NEEDED_FROM != 1 \ + && TO_LOOP_MAX_NEEDED_FROM % TO_LOOP_MIN_NEEDED_FROM == 0) \ + && (TO_LOOP_MIN_NEEDED_TO != 1 \ + && TO_LOOP_MAX_NEEDED_TO % TO_LOOP_MIN_NEEDED_TO == 0)))) +#if POSSIBLY_UNALIGNED + int unaligned; +# define GEN_unaligned(name) GEN_unaligned2 (name) +# define GEN_unaligned2(name) name##_unaligned +#else +# define unaligned 0 +#endif + +#ifdef PREPARE_LOOP + PREPARE_LOOP +#endif + +#if FROM_LOOP_MAX_NEEDED_FROM > 1 || TO_LOOP_MAX_NEEDED_FROM > 1 + /* If the function is used to implement the mb*towc*() or wc*tomb*() + functions we must test whether any bytes from the last call are + stored in the `state' object. */ + if (((FROM_LOOP_MAX_NEEDED_FROM > 1 && TO_LOOP_MAX_NEEDED_FROM > 1) + || (FROM_LOOP_MAX_NEEDED_FROM > 1 && FROM_DIRECTION) + || (TO_LOOP_MAX_NEEDED_FROM > 1 && !FROM_DIRECTION)) + && consume_incomplete && (data->__statep->__count & 7) != 0) + { + /* Yep, we have some bytes left over. Process them now. + But this must not happen while we are called from an + error handler. */ + assert (outbufstart == NULL); + +# if FROM_LOOP_MAX_NEEDED_FROM > 1 + if (TO_LOOP_MAX_NEEDED_FROM == 1 || FROM_DIRECTION) + status = SINGLE(FROM_LOOP) (step, data, inptrp, inend, &outbuf, + outend, lirreversiblep + EXTRA_LOOP_ARGS); +# endif +# if !ONE_DIRECTION +# if FROM_LOOP_MAX_NEEDED_FROM > 1 && TO_LOOP_MAX_NEEDED_FROM > 1 + else +# endif +# if TO_LOOP_MAX_NEEDED_FROM > 1 + status = SINGLE(TO_LOOP) (step, data, inptrp, inend, &outbuf, + outend, lirreversiblep EXTRA_LOOP_ARGS); +# endif +# endif + + if (__builtin_expect (status, __GCONV_OK) != __GCONV_OK) + return status; + } +#endif + +#if POSSIBLY_UNALIGNED + unaligned = + ((FROM_DIRECTION + && ((uintptr_t) inptr % FROM_LOOP_MIN_NEEDED_FROM != 0 + || ((data->__flags & __GCONV_IS_LAST) + && (uintptr_t) outbuf % FROM_LOOP_MIN_NEEDED_TO != 0))) + || (!FROM_DIRECTION + && (((data->__flags & __GCONV_IS_LAST) + && (uintptr_t) outbuf % TO_LOOP_MIN_NEEDED_TO != 0) + || (uintptr_t) inptr % TO_LOOP_MIN_NEEDED_FROM != 0))); +#endif + + while (1) + { + /* Remember the start value for this round. */ + inptr = *inptrp; + /* The outbuf buffer is empty. */ + outstart = outbuf; + +#ifdef SAVE_RESET_STATE + SAVE_RESET_STATE (1); +#endif + + if (__glibc_likely (!unaligned)) + { + if (FROM_DIRECTION) + /* Run the conversion loop. */ + status = FROM_LOOP (step, data, inptrp, inend, &outbuf, outend, + lirreversiblep EXTRA_LOOP_ARGS); + else + /* Run the conversion loop. */ + status = TO_LOOP (step, data, inptrp, inend, &outbuf, outend, + lirreversiblep EXTRA_LOOP_ARGS); + } +#if POSSIBLY_UNALIGNED + else + { + if (FROM_DIRECTION) + /* Run the conversion loop. */ + status = GEN_unaligned (FROM_LOOP) (step, data, inptrp, inend, + &outbuf, outend, + lirreversiblep + EXTRA_LOOP_ARGS); + else + /* Run the conversion loop. */ + status = GEN_unaligned (TO_LOOP) (step, data, inptrp, inend, + &outbuf, outend, + lirreversiblep + EXTRA_LOOP_ARGS); + } +#endif + + /* If we were called as part of an error handling module we + don't do anything else here. */ + if (__glibc_unlikely (outbufstart != NULL)) + { + *outbufstart = outbuf; + return status; + } + + /* We finished one use of the loops. */ + ++data->__invocation_counter; + + /* If this is the last step leave the loop, there is nothing + we can do. */ + if (__glibc_unlikely (data->__flags & __GCONV_IS_LAST)) + { + /* Store information about how many bytes are available. */ + data->__outbuf = outbuf; + + /* Remember how many non-identical characters we + converted in an irreversible way. */ + *irreversible += lirreversible; + + break; + } + + /* Write out all output which was produced. */ + if (__glibc_likely (outbuf > outstart)) + { + const unsigned char *outerr = data->__outbuf; + int result; + + result = DL_CALL_FCT (fct, (next_step, next_data, &outerr, + outbuf, NULL, irreversible, 0, + consume_incomplete)); + + if (result != __GCONV_EMPTY_INPUT) + { + if (__glibc_unlikely (outerr != outbuf)) + { +#ifdef RESET_INPUT_BUFFER + RESET_INPUT_BUFFER; +#else + /* We have a problem in one of the functions below. + Undo the conversion upto the error point. */ + size_t nstatus __attribute__ ((unused)); + + /* Reload the pointers. */ + *inptrp = inptr; + outbuf = outstart; + + /* Restore the state. */ +# ifdef SAVE_RESET_STATE + SAVE_RESET_STATE (0); +# endif + + if (__glibc_likely (!unaligned)) + { + if (FROM_DIRECTION) + /* Run the conversion loop. */ + nstatus = FROM_LOOP (step, data, inptrp, inend, + &outbuf, outerr, + lirreversiblep + EXTRA_LOOP_ARGS); + else + /* Run the conversion loop. */ + nstatus = TO_LOOP (step, data, inptrp, inend, + &outbuf, outerr, + lirreversiblep + EXTRA_LOOP_ARGS); + } +# if POSSIBLY_UNALIGNED + else + { + if (FROM_DIRECTION) + /* Run the conversion loop. */ + nstatus = GEN_unaligned (FROM_LOOP) (step, data, + inptrp, inend, + &outbuf, + outerr, + lirreversiblep + EXTRA_LOOP_ARGS); + else + /* Run the conversion loop. */ + nstatus = GEN_unaligned (TO_LOOP) (step, data, + inptrp, inend, + &outbuf, outerr, + lirreversiblep + EXTRA_LOOP_ARGS); + } +# endif + + /* We must run out of output buffer space in this + rerun. */ + assert (outbuf == outerr); + assert (nstatus == __GCONV_FULL_OUTPUT); + + /* If we haven't consumed a single byte decrement + the invocation counter. */ + if (__glibc_unlikely (outbuf == outstart)) + --data->__invocation_counter; +#endif /* reset input buffer */ + } + + /* Change the status. */ + status = result; + } + else + /* All the output is consumed, we can make another run + if everything was ok. */ + if (status == __GCONV_FULL_OUTPUT) + { + status = __GCONV_OK; + outbuf = data->__outbuf; + } + } + + if (status != __GCONV_OK) + break; + + /* Reset the output buffer pointer for the next round. */ + outbuf = data->__outbuf; + } + +#ifdef END_LOOP + END_LOOP +#endif + + /* If we are supposed to consume all character store now all of the + remaining characters in the `state' object. */ +#if FROM_LOOP_MAX_NEEDED_FROM > 1 || TO_LOOP_MAX_NEEDED_FROM > 1 + if (((FROM_LOOP_MAX_NEEDED_FROM > 1 && TO_LOOP_MAX_NEEDED_FROM > 1) + || (FROM_LOOP_MAX_NEEDED_FROM > 1 && FROM_DIRECTION) + || (TO_LOOP_MAX_NEEDED_FROM > 1 && !FROM_DIRECTION)) + && __builtin_expect (consume_incomplete, 0) + && status == __GCONV_INCOMPLETE_INPUT) + { +# ifdef STORE_REST + mbstate_t *state = data->__statep; + + STORE_REST +# else + /* Make sure the remaining bytes fit into the state objects + buffer. */ + assert (inend - *inptrp < 4); + + size_t cnt; + for (cnt = 0; *inptrp < inend; ++cnt) + data->__statep->__value.__wchb[cnt] = *(*inptrp)++; + data->__statep->__count &= ~7; + data->__statep->__count |= cnt; +# endif + } +#endif +#undef unaligned +#undef POSSIBLY_UNALIGNED + } + + return status; +} + +#undef DEFINE_INIT +#undef CHARSET_NAME +#undef DEFINE_FINI +#undef MIN_NEEDED_FROM +#undef MIN_NEEDED_TO +#undef MAX_NEEDED_FROM +#undef MAX_NEEDED_TO +#undef FROM_LOOP_MIN_NEEDED_FROM +#undef FROM_LOOP_MAX_NEEDED_FROM +#undef FROM_LOOP_MIN_NEEDED_TO +#undef FROM_LOOP_MAX_NEEDED_TO +#undef TO_LOOP_MIN_NEEDED_FROM +#undef TO_LOOP_MAX_NEEDED_FROM +#undef TO_LOOP_MIN_NEEDED_TO +#undef TO_LOOP_MAX_NEEDED_TO +#undef FROM_DIRECTION +#undef EMIT_SHIFT_TO_INIT +#undef FROM_LOOP +#undef TO_LOOP +#undef ONE_DIRECTION +#undef SAVE_RESET_STATE +#undef RESET_INPUT_BUFFER +#undef FUNCTION_NAME +#undef PREPARE_LOOP +#undef END_LOOP +#undef EXTRA_LOOP_ARGS +#undef STORE_REST +#undef FROM_ONEBYTE diff --git a/REORG.TODO/iconv/strtab.c b/REORG.TODO/iconv/strtab.c new file mode 100644 index 0000000000..89b28c5dde --- /dev/null +++ b/REORG.TODO/iconv/strtab.c @@ -0,0 +1,339 @@ +/* C string table handling. + Copyright (C) 2000-2017 Free Software Foundation, Inc. + Written by Ulrich Drepper <drepper@redhat.com>, 2000. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <assert.h> +#include <inttypes.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/cdefs.h> +#include <sys/param.h> + + +struct Strent +{ + const char *string; + size_t len; + struct Strent *next; + struct Strent *left; + struct Strent *right; + size_t offset; + char reverse[0]; +}; + + +struct memoryblock +{ + struct memoryblock *next; + char memory[0]; +}; + + +struct Strtab +{ + struct Strent *root; + struct memoryblock *memory; + char *backp; + size_t left; + size_t total; + + struct Strent null; +}; + + +/* Cache for the pagesize. We correct this value a bit so that `malloc' + is not allocating more than a page. */ +static size_t ps; + + +#include <programs/xmalloc.h> + +/* Prototypes for our functions that are used from iconvconfig.c. If + you change these, change also iconvconfig.c. */ +/* Create new C string table object in memory. */ +extern struct Strtab *strtabinit (void); + +/* Free resources allocated for C string table ST. */ +extern void strtabfree (struct Strtab *st); + +/* Add string STR (length LEN is != 0) to C string table ST. */ +extern struct Strent *strtabadd (struct Strtab *st, const char *str, + size_t len); + +/* Finalize string table ST and store size in *SIZE and return a pointer. */ +extern void *strtabfinalize (struct Strtab *st, size_t *size); + +/* Get offset in string table for string associated with SE. */ +extern size_t strtaboffset (struct Strent *se); + + +struct Strtab * +strtabinit (void) +{ + struct Strtab *ret; + + if (ps == 0) + { + ps = sysconf (_SC_PAGESIZE) - 2 * sizeof (void *); + assert (sizeof (struct memoryblock) < ps); + } + + ret = (struct Strtab *) calloc (1, sizeof (struct Strtab)); + if (ret != NULL) + { + ret->null.len = 1; + ret->null.string = ""; + } + return ret; +} + + +static void +morememory (struct Strtab *st, size_t len) +{ + struct memoryblock *newmem; + + if (len < ps) + len = ps; + newmem = (struct memoryblock *) malloc (len); + if (newmem == NULL) + abort (); + + newmem->next = st->memory; + st->memory = newmem; + st->backp = newmem->memory; + st->left = len - offsetof (struct memoryblock, memory); +} + + +void +strtabfree (struct Strtab *st) +{ + struct memoryblock *mb = st->memory; + + while (mb != NULL) + { + void *old = mb; + mb = mb->next; + free (old); + } + + free (st); +} + + +static struct Strent * +newstring (struct Strtab *st, const char *str, size_t len) +{ + struct Strent *newstr; + size_t align; + int i; + + /* Compute the amount of padding needed to make the structure aligned. */ + align = ((__alignof__ (struct Strent) + - (((uintptr_t) st->backp) + & (__alignof__ (struct Strent) - 1))) + & (__alignof__ (struct Strent) - 1)); + + /* Make sure there is enough room in the memory block. */ + if (st->left < align + sizeof (struct Strent) + len) + { + morememory (st, sizeof (struct Strent) + len); + align = 0; + } + + /* Create the reserved string. */ + newstr = (struct Strent *) (st->backp + align); + newstr->string = str; + newstr->len = len; + newstr->next = NULL; + newstr->left = NULL; + newstr->right = NULL; + newstr->offset = 0; + for (i = len - 2; i >= 0; --i) + newstr->reverse[i] = str[len - 2 - i]; + newstr->reverse[len - 1] = '\0'; + st->backp += align + sizeof (struct Strent) + len; + st->left -= align + sizeof (struct Strent) + len; + + return newstr; +} + + +/* XXX This function should definitely be rewritten to use a balancing + tree algorithm (AVL, red-black trees). For now a simple, correct + implementation is enough. */ +static struct Strent ** +searchstring (struct Strent **sep, struct Strent *newstr) +{ + int cmpres; + + /* More strings? */ + if (*sep == NULL) + { + *sep = newstr; + return sep; + } + + /* Compare the strings. */ + cmpres = memcmp ((*sep)->reverse, newstr->reverse, + MIN ((*sep)->len, newstr->len) - 1); + if (cmpres == 0) + /* We found a matching string. */ + return sep; + else if (cmpres > 0) + return searchstring (&(*sep)->left, newstr); + else + return searchstring (&(*sep)->right, newstr); +} + + +/* Add new string. The actual string is assumed to be permanent. */ +struct Strent * +strtabadd (struct Strtab *st, const char *str, size_t len) +{ + struct Strent *newstr; + struct Strent **sep; + + /* Compute the string length if the caller doesn't know it. */ + if (len == 0) + len = strlen (str) + 1; + + /* Make sure all "" strings get offset 0. */ + if (len == 1) + return &st->null; + + /* Allocate memory for the new string and its associated information. */ + newstr = newstring (st, str, len); + + /* Search in the array for the place to insert the string. If there + is no string with matching prefix and no string with matching + leading substring, create a new entry. */ + sep = searchstring (&st->root, newstr); + if (*sep != newstr) + { + /* This is not the same entry. This means we have a prefix match. */ + if ((*sep)->len > newstr->len) + { + struct Strent *subs; + + for (subs = (*sep)->next; subs; subs = subs->next) + if (subs->len == newstr->len) + { + /* We have an exact match with a substring. Free the memory + we allocated. */ + st->left += st->backp - (char *) newstr; + st->backp = (char *) newstr; + + return subs; + } + + /* We have a new substring. This means we don't need the reverse + string of this entry anymore. */ + st->backp -= newstr->len; + st->left += newstr->len; + + newstr->next = (*sep)->next; + (*sep)->next = newstr; + } + else if ((*sep)->len != newstr->len) + { + /* When we get here it means that the string we are about to + add has a common prefix with a string we already have but + it is longer. In this case we have to put it first. */ + st->total += newstr->len - (*sep)->len; + newstr->next = *sep; + newstr->left = (*sep)->left; + newstr->right = (*sep)->right; + *sep = newstr; + } + else + { + /* We have an exact match. Free the memory we allocated. */ + st->left += st->backp - (char *) newstr; + st->backp = (char *) newstr; + + newstr = *sep; + } + } + else + st->total += newstr->len; + + return newstr; +} + + +static void +copystrings (struct Strent *nodep, char **freep, size_t *offsetp) +{ + struct Strent *subs; + + if (nodep->left != NULL) + copystrings (nodep->left, freep, offsetp); + + /* Process the current node. */ + nodep->offset = *offsetp; + *freep = (char *) mempcpy (*freep, nodep->string, nodep->len); + *offsetp += nodep->len; + + for (subs = nodep->next; subs != NULL; subs = subs->next) + { + assert (subs->len < nodep->len); + subs->offset = nodep->offset + nodep->len - subs->len; + } + + if (nodep->right != NULL) + copystrings (nodep->right, freep, offsetp); +} + + +void * +strtabfinalize (struct Strtab *st, size_t *size) +{ + size_t copylen; + char *endp; + char *retval; + + /* Fill in the information. */ + endp = retval = (char *) xmalloc (st->total + 1); + + /* Always put an empty string at the beginning so that a zero offset + can mean error. */ + *endp++ = '\0'; + + /* Now run through the tree and add all the string while also updating + the offset members of the elfstrent records. */ + copylen = 1; + copystrings (st->root, &endp, ©len); + assert (copylen == st->total + 1); + assert (endp == retval + st->total + 1); + *size = copylen; + + return retval; +} + + +size_t +strtaboffset (struct Strent *se) +{ + return se->offset; +} diff --git a/REORG.TODO/iconv/tst-iconv1.c b/REORG.TODO/iconv/tst-iconv1.c new file mode 100644 index 0000000000..0609f50e50 --- /dev/null +++ b/REORG.TODO/iconv/tst-iconv1.c @@ -0,0 +1,47 @@ +/* Test case by yaoz@nih.gov. */ + +#include <iconv.h> +#include <stddef.h> +#include <stdio.h> +#include <string.h> + +static int +do_test (void) +{ + char utf8[5]; + wchar_t ucs4[5]; + iconv_t cd; + char *inbuf; + char *outbuf; + size_t inbytes; + size_t outbytes; + size_t n; + + strcpy (utf8, "abcd"); + + /* From UTF8 to UCS4. */ + cd = iconv_open ("UCS4", "UTF8"); + if (cd == (iconv_t) -1) + { + perror ("iconv_open"); + return 1; + } + + inbuf = utf8; + inbytes = 4; + outbuf = (char *) ucs4; + outbytes = 4 * sizeof (wchar_t); /* "Argument list too long" error. */ + n = iconv (cd, &inbuf, &inbytes, &outbuf, &outbytes); + if (n == (size_t) -1) + { + printf ("iconv: %m\n"); + iconv_close (cd); + return 1; + } + iconv_close (cd); + + return 0; +} + +#define TEST_FUNCTION do_test () +#include "../test-skeleton.c" diff --git a/REORG.TODO/iconv/tst-iconv2.c b/REORG.TODO/iconv/tst-iconv2.c new file mode 100644 index 0000000000..af78d78350 --- /dev/null +++ b/REORG.TODO/iconv/tst-iconv2.c @@ -0,0 +1,102 @@ +/* Copyright (C) 2001-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 2001. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <errno.h> +#include <iconv.h> +#include <mcheck.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + + +static int +do_test (void) +{ + char buf[3]; + const wchar_t wc[1] = L"a"; + iconv_t cd; + char *inptr; + size_t inlen; + char *outptr; + size_t outlen; + size_t n; + int e; + int result = 0; + + mtrace (); + + cd = iconv_open ("UCS4", "WCHAR_T"); + if (cd == (iconv_t) -1) + { + printf ("cannot convert from wchar_t to UCS4: %m\n"); + exit (1); + } + + inptr = (char *) wc; + inlen = sizeof (wchar_t); + outptr = buf; + outlen = 3; + + n = iconv (cd, &inptr, &inlen, &outptr, &outlen); + e = errno; + + if (n != (size_t) -1) + { + printf ("incorrect iconv() return value: %zd, expected -1\n", n); + result = 1; + } + + if (e != E2BIG) + { + printf ("incorrect error value: %s, expected %s\n", + strerror (e), strerror (E2BIG)); + result = 1; + } + + if (inptr != (char *) wc) + { + puts ("inptr changed"); + result = 1; + } + + if (inlen != sizeof (wchar_t)) + { + puts ("inlen changed"); + result = 1; + } + + if (outptr != buf) + { + puts ("outptr changed"); + result = 1; + } + + if (outlen != 3) + { + puts ("outlen changed"); + result = 1; + } + + iconv_close (cd); + + return result; +} + +#define TEST_FUNCTION do_test () +#include "../test-skeleton.c" diff --git a/REORG.TODO/iconv/tst-iconv3.c b/REORG.TODO/iconv/tst-iconv3.c new file mode 100644 index 0000000000..b06f75f0bc --- /dev/null +++ b/REORG.TODO/iconv/tst-iconv3.c @@ -0,0 +1,56 @@ +/* Contributed by Owen Taylor <otaylor@redhat.com>. */ + +#include <iconv.h> +#include <errno.h> +#include <stddef.h> +#include <stdio.h> + +#define BUFSIZE 10000 + +static int +do_test (void) +{ + char inbuf[BUFSIZE]; + wchar_t outbuf[BUFSIZE]; + + iconv_t cd; + int i; + char *inptr; + char *outptr; + size_t inbytes_left, outbytes_left; + int count; + int result = 0; + + for (i=0; i < BUFSIZE; i++) + inbuf[i] = 'a'; + + cd = iconv_open ("UCS-4LE", "UTF-8"); + + inbytes_left = BUFSIZE; + outbytes_left = BUFSIZE * 4; + inptr = inbuf; + outptr = (char *) outbuf; + + count = iconv (cd, &inptr, &inbytes_left, &outptr, &outbytes_left); + + if (count < 0) + { + if (errno == E2BIG) + printf ("Received E2BIG\n"); + else + printf ("Received something else\n"); + + printf ("inptr change: %td\n", inptr - inbuf); + printf ("inlen change: %zd\n", BUFSIZE - inbytes_left); + printf ("outptr change: %td\n", outptr - (char *) outbuf); + printf ("outlen change: %zd\n", BUFSIZE * 4 - outbytes_left); + result = 1; + } + else + printf ("Succeeded\n"); + + return result; +} + +#define TEST_FUNCTION do_test () +#include "../test-skeleton.c" diff --git a/REORG.TODO/iconv/tst-iconv4.c b/REORG.TODO/iconv/tst-iconv4.c new file mode 100644 index 0000000000..b5ff39306c --- /dev/null +++ b/REORG.TODO/iconv/tst-iconv4.c @@ -0,0 +1,65 @@ +// Derived from BZ #9793 +#include <errno.h> +#include <iconv.h> +#include <stdio.h> + + +static int +do_test (void) +{ + iconv_t cd = iconv_open ("ASCII//TRANSLIT", "UTF-8"); + if (cd == (iconv_t) -1) + { + puts ("iconv_open failed"); + return 1; + } + + char input[2] = { 0xc2, 0xae }; /* Registered trademark */ + char *inptr = input; + size_t insize = sizeof (input); + char output[2]; /* Too short to contain "(R)". */ + char *outptr = output; + size_t outsize = sizeof (output); + + size_t ret = iconv (cd, &inptr, &insize, &outptr, &outsize); + if (ret != (size_t) -1) + { + puts ("iconv succeeded"); + return 1; + } + if (errno != E2BIG) + { + puts ("iconv did not set errno to E2BIG"); + return 1; + } + int res = 0; + if (inptr != input) + { + puts ("inptr changed"); + res = 1; + } + if (insize != sizeof (input)) + { + puts ("insize changed"); + res = 1; + } + if (outptr != output) + { + puts ("outptr changed"); + res = 1; + } + if (outsize != sizeof (output)) + { + puts ("outsize changed"); + res = 1; + } + if (iconv_close (cd) == -1) + { + puts ("iconv_close failed"); + res = 1; + } + return res; +} + +#define TEST_FUNCTION do_test () +#include "../test-skeleton.c" diff --git a/REORG.TODO/iconv/tst-iconv5.c b/REORG.TODO/iconv/tst-iconv5.c new file mode 100644 index 0000000000..52f93d6695 --- /dev/null +++ b/REORG.TODO/iconv/tst-iconv5.c @@ -0,0 +1,161 @@ +/* Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by GOTO Masanori <gotom@debian.or.jp>, 2004 + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <iconv.h> +#include <string.h> +#include <stdlib.h> +#include <stdio.h> +#include <errno.h> + +#define SIZE 256 /* enough room for conversion */ +#define SAMPLESTR "abc" + +struct unalign +{ + char str1[1]; + char str2[SIZE]; +}; + +struct convcode +{ + const char *tocode; + const char *fromcode; +}; + +/* test builtin transformation */ +static const struct convcode testcode[] = { + {"ASCII", "ASCII"}, + {"UTF-8", "ASCII"}, + {"UCS-2BE", "ASCII"}, + {"UCS-2LE", "ASCII"}, + {"UCS-4BE", "ASCII"}, + {"UCS-4LE", "ASCII"}, +}; + +static const int number = (int) sizeof (testcode) / sizeof (struct convcode); + +static int +convert (const char *tocode, const char *fromcode, char *inbufp, + size_t inbytesleft, char *outbufp, size_t outbytesleft) +{ + iconv_t *ic; + size_t outbytes = outbytesleft; + int ret; + + ic = iconv_open (tocode, fromcode); + if (ic == (iconv_t *) - 1) + { + printf ("iconv_open failed: from: %s, to: %s: %s", + fromcode, tocode, strerror (errno)); + return -1; + } + + while (inbytesleft > 0) + { + ret = iconv (ic, &inbufp, &inbytesleft, &outbufp, &outbytes); + if (ret == -1) + { + printf ("iconv failed: from: %s, to: %s: %s", + fromcode, tocode, strerror (errno)); + return -1; + } + } + + ret = iconv_close (ic); + if (ret == -1) + { + printf ("iconv_close failed: from: %s, to: %s: %s", + fromcode, tocode, strerror (errno)); + return -1; + } + + return outbytesleft - outbytes; +} + + +static int +test_unalign (const struct convcode *codes, const char *str, int len) +{ + struct unalign *inbufp, *outbufp; + char *inbuf, *outbuf; + size_t inbytesleft, outbytesleft; + int retlen; + + /* allocating unaligned buffer for both inbuf and outbuf */ + inbufp = (struct unalign *) malloc (sizeof (struct unalign)); + if (!inbufp) + { + printf ("no memory available\n"); + exit (1); + } + inbuf = inbufp->str2; + + outbufp = (struct unalign *) malloc (sizeof (struct unalign)); + if (!outbufp) + { + printf ("no memory available\n"); + exit (1); + } + outbuf = outbufp->str2; + + /* first iconv phase */ + memcpy (inbuf, str, len); + inbytesleft = len; + outbytesleft = sizeof (struct unalign); + retlen = convert (codes->tocode, codes->fromcode, inbuf, inbytesleft, + outbuf, outbytesleft); + if (retlen == -1) /* failed */ + return 1; + + /* second round trip iconv phase */ + memcpy (inbuf, outbuf, retlen); + inbytesleft = retlen; + outbytesleft = sizeof (struct unalign); + retlen = convert (codes->fromcode, codes->tocode, inbuf, inbytesleft, + outbuf, outbytesleft); + if (retlen == -1) /* failed */ + return 1; + + free (inbufp); + free (outbufp); + + return 0; +} + +static int +do_test (void) +{ + int i; + int ret = 0; + + for (i = 0; i < number; i++) + { + ret = test_unalign (&testcode[i], (char *) SAMPLESTR, sizeof (SAMPLESTR)); + if (ret) + break; + printf ("iconv: %s <-> %s: ok\n", + testcode[i].fromcode, testcode[i].tocode); + } + if (ret == 0) + printf ("Succeeded.\n"); + + return ret; +} + +#define TEST_FUNCTION do_test () +#include "../test-skeleton.c" diff --git a/REORG.TODO/iconv/tst-iconv6.c b/REORG.TODO/iconv/tst-iconv6.c new file mode 100644 index 0000000000..ace7dc68b2 --- /dev/null +++ b/REORG.TODO/iconv/tst-iconv6.c @@ -0,0 +1,118 @@ +/* Testing ucs4le_internal_loop() in gconv_simple.c. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <inttypes.h> +#include <iconv.h> +#include <byteswap.h> +#include <endian.h> + +static int +do_test (void) +{ + iconv_t cd; + char *inptr; + size_t inlen; + char *outptr; + size_t outlen; + size_t n; + int e; + int result = 0; + +#if __BYTE_ORDER == __BIG_ENDIAN + /* On big-endian machines, ucs4le_internal_loop() swaps the bytes before + error checking. Thus the input values has to be swapped. */ +# define VALUE(val) bswap_32 (val) +#else +# define VALUE(val) val +#endif + uint32_t inbuf[3] = { VALUE (0x41), VALUE (0x80000000), VALUE (0x42) }; + uint32_t outbuf[3] = { 0, 0, 0 }; + + cd = iconv_open ("WCHAR_T", "UCS-4LE"); + if (cd == (iconv_t) -1) + { + printf ("cannot convert from UCS4LE to wchar_t: %m\n"); + return 1; + } + + inptr = (char *) inbuf; + inlen = sizeof (inbuf); + outptr = (char *) outbuf; + outlen = sizeof (outbuf); + + n = iconv (cd, &inptr, &inlen, &outptr, &outlen); + e = errno; + + if (n != (size_t) -1) + { + printf ("incorrect iconv() return value: %zd, expected -1\n", n); + result = 1; + } + + if (e != EILSEQ) + { + printf ("incorrect error value: %s, expected %s\n", + strerror (e), strerror (EILSEQ)); + result = 1; + } + + if (inptr != (char *) &inbuf[1]) + { + printf ("inptr=0x%p does not point to invalid character! Expected=0x%p\n" + , inptr, &inbuf[1]); + result = 1; + } + + if (inlen != sizeof (inbuf) - sizeof (uint32_t)) + { + printf ("inlen=%zd != %zd\n" + , inlen, sizeof (inbuf) - sizeof (uint32_t)); + result = 1; + } + + if (outptr != (char *) &outbuf[1]) + { + printf ("outptr=0x%p does not point to invalid character in inbuf! " + "Expected=0x%p\n" + , outptr, &outbuf[1]); + result = 1; + } + + if (outlen != sizeof (inbuf) - sizeof (uint32_t)) + { + printf ("outlen=%zd != %zd\n" + , outlen, sizeof (outbuf) - sizeof (uint32_t)); + result = 1; + } + + if (outbuf[0] != 0x41 || outbuf[1] != 0 || outbuf[2] != 0) + { + puts ("Characters conversion is incorrect!"); + result = 1; + } + + iconv_close (cd); + + return result; +} + +#define TEST_FUNCTION do_test () +#include "../test-skeleton.c" |