about summary refs log tree commit diff
path: root/iconv
diff options
context:
space:
mode:
Diffstat (limited to 'iconv')
-rw-r--r--iconv/gconv_builtin.c9
-rw-r--r--iconv/gconv_builtin.h36
-rw-r--r--iconv/gconv_cache.c4
-rw-r--r--iconv/gconv_conf.c13
-rw-r--r--iconv/gconv_db.c7
-rw-r--r--iconv/gconv_int.h36
-rw-r--r--iconv/gconv_simple.c17
-rw-r--r--iconv/iconvconfig.c10
-rw-r--r--iconv/loop.c13
-rw-r--r--iconv/skeleton.c28
10 files changed, 127 insertions, 46 deletions
diff --git a/iconv/gconv_builtin.c b/iconv/gconv_builtin.c
index 45bd4e7e00..f653d6c7f3 100644
--- a/iconv/gconv_builtin.c
+++ b/iconv/gconv_builtin.c
@@ -1,5 +1,5 @@
 /* Table for builtin transformation mapping.
-   Copyright (C) 1997, 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
+   Copyright (C) 1997-1999, 2000-2002 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
 
@@ -31,6 +31,7 @@ static struct builtin_map
 {
   const char *name;
   __gconv_fct fct;
+  __gconv_btowc_fct btowc_fct;
 
   int min_needed_from;
   int max_needed_from;
@@ -39,11 +40,12 @@ static struct builtin_map
 
 } map[] =
 {
-#define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, MinF, MaxF, \
-			       MinT, MaxT) \
+#define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \
+			       MinF, MaxF, MinT, MaxT) \
   {									      \
     .name = Name,							      \
     .fct = Fct,								      \
+    .btowc_fct = BtowcFct,						      \
 									      \
     .min_needed_from = MinF,						      \
     .max_needed_from = MaxF,						      \
@@ -69,6 +71,7 @@ __gconv_get_builtin_trans (const char *name, struct __gconv_step *step)
   assert (cnt < sizeof (map) / sizeof (map[0]));
 
   step->__fct = map[cnt].fct;
+  step->__btowc_fct = map[cnt].btowc_fct;
   step->__init_fct = NULL;
   step->__end_fct = NULL;
   step->__shlib_handle = NULL;
diff --git a/iconv/gconv_builtin.h b/iconv/gconv_builtin.h
index 1a9d8a80f1..bd34c256dc 100644
--- a/iconv/gconv_builtin.h
+++ b/iconv/gconv_builtin.h
@@ -1,5 +1,5 @@
 /* Builtin transformations.
-   Copyright (C) 1997, 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
+   Copyright (C) 1997-1999, 2000-2002 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
 
@@ -30,14 +30,14 @@ BUILTIN_ALIAS ("OSF00010105//", "ISO-10646/UCS4/") /* level 2 */
 BUILTIN_ALIAS ("OSF00010106//", "ISO-10646/UCS4/") /* level 3 */
 
 BUILTIN_TRANSFORMATION ("INTERNAL", "ISO-10646/UCS4/", 1, "=INTERNAL->ucs4",
-			__gconv_transform_internal_ucs4, 4, 4, 4, 4)
+			__gconv_transform_internal_ucs4, NULL, 4, 4, 4, 4)
 BUILTIN_TRANSFORMATION ("ISO-10646/UCS4/", "INTERNAL", 1, "=ucs4->INTERNAL",
-			__gconv_transform_ucs4_internal, 4, 4, 4, 4)
+			__gconv_transform_ucs4_internal, NULL, 4, 4, 4, 4)
 
 BUILTIN_TRANSFORMATION ("INTERNAL", "UCS-4LE//", 1, "=INTERNAL->ucs4le",
-			__gconv_transform_internal_ucs4le, 4, 4, 4, 4)
+			__gconv_transform_internal_ucs4le, NULL, 4, 4, 4, 4)
 BUILTIN_TRANSFORMATION ("UCS-4LE//", "INTERNAL", 1, "=ucs4le->INTERNAL",
-			__gconv_transform_ucs4le_internal, 4, 4, 4, 4)
+			__gconv_transform_ucs4le_internal, NULL, 4, 4, 4, 4)
 
 BUILTIN_ALIAS ("WCHAR_T//", "INTERNAL")
 
@@ -48,10 +48,11 @@ BUILTIN_ALIAS ("OSF05010001//", "ISO-10646/UTF8/")
 BUILTIN_ALIAS ("ISO-10646/UTF-8/", "ISO-10646/UTF8/")
 
 BUILTIN_TRANSFORMATION ("INTERNAL", "ISO-10646/UTF8/", 1, "=INTERNAL->utf8",
-			__gconv_transform_internal_utf8, 4, 4, 1, 6)
+			__gconv_transform_internal_utf8, NULL, 4, 4, 1, 6)
 
 BUILTIN_TRANSFORMATION ("ISO-10646/UTF8/", "INTERNAL", 1, "=utf8->INTERNAL",
-			__gconv_transform_utf8_internal, 1, 6, 4, 4)
+			__gconv_transform_utf8_internal, __gconv_btwoc_ascii,
+			1, 6, 4, 4)
 
 BUILTIN_ALIAS ("UCS2//", "ISO-10646/UCS2/")
 BUILTIN_ALIAS ("UCS-2//", "ISO-10646/UCS2/")
@@ -60,10 +61,10 @@ BUILTIN_ALIAS ("OSF00010101//", "ISO-10646/UCS2/") /* level 2 */
 BUILTIN_ALIAS ("OSF00010102//", "ISO-10646/UCS2/") /* level 3 */
 
 BUILTIN_TRANSFORMATION ("ISO-10646/UCS2/", "INTERNAL", 1, "=ucs2->INTERNAL",
-			__gconv_transform_ucs2_internal, 2, 2, 4, 4)
+			__gconv_transform_ucs2_internal, NULL, 2, 2, 4, 4)
 
 BUILTIN_TRANSFORMATION ("INTERNAL", "ISO-10646/UCS2/", 1, "=INTERNAL->ucs2",
-			__gconv_transform_internal_ucs2, 4, 4, 2, 2)
+			__gconv_transform_internal_ucs2, NULL, 4, 4, 2, 2)
 
 
 BUILTIN_ALIAS ("ANSI_X3.4//", "ANSI_X3.4-1968//")
@@ -80,10 +81,11 @@ BUILTIN_ALIAS ("CSASCII//", "ANSI_X3.4-1968//")
 BUILTIN_ALIAS ("OSF00010020//", "ANSI_X3.4-1968//")
 
 BUILTIN_TRANSFORMATION ("ANSI_X3.4-1968//", "INTERNAL", 1, "=ascii->INTERNAL",
-			__gconv_transform_ascii_internal, 4, 4, 1, 1)
+			__gconv_transform_ascii_internal, __gconv_btwoc_ascii,
+			4, 4, 1, 1)
 
 BUILTIN_TRANSFORMATION ("INTERNAL", "ANSI_X3.4-1968//", 1, "=INTERNAL->ascii",
-			__gconv_transform_internal_ascii, 4, 4, 1, 1)
+			__gconv_transform_internal_ascii, NULL, 4, 4, 1, 1)
 
 
 #if BYTE_ORDER == BIG_ENDIAN
@@ -94,11 +96,13 @@ BUILTIN_ALIAS ("UCS-2LE//", "UNICODELITTLE//")
 
 BUILTIN_TRANSFORMATION ("UNICODELITTLE//", "INTERNAL", 1,
 			"=ucs2reverse->INTERNAL",
-			__gconv_transform_ucs2reverse_internal, 2, 2, 4, 4)
+			__gconv_transform_ucs2reverse_internal, NULL,
+			2, 2, 4, 4)
 
 BUILTIN_TRANSFORMATION ("INTERNAL", "UNICODELITTLE//", 1,
 			"=INTERNAL->ucs2reverse",
-			__gconv_transform_internal_ucs2reverse, 4, 4, 2, 2)
+			__gconv_transform_internal_ucs2reverse, NULL,
+			4, 4, 2, 2)
 #else
 BUILTIN_ALIAS ("UNICODELITTLE//", "ISO-10646/UCS2/")
 BUILTIN_ALIAS ("UCS-2LE//", "ISO-10646/UCS2/")
@@ -107,9 +111,11 @@ BUILTIN_ALIAS ("UCS-2BE//", "UNICODEBIG//")
 
 BUILTIN_TRANSFORMATION ("UNICODEBIG//", "INTERNAL", 1,
 			"=ucs2reverse->INTERNAL",
-			__gconv_transform_ucs2reverse_internal, 2, 2, 4, 4)
+			__gconv_transform_ucs2reverse_internal, NULL,
+			2, 2, 4, 4)
 
 BUILTIN_TRANSFORMATION ("INTERNAL", "UNICODEBIG//", 1,
 			"=INTERNAL->ucs2reverse",
-			__gconv_transform_internal_ucs2reverse, 4, 4, 2, 2)
+			__gconv_transform_internal_ucs2reverse, NULL,
+			4, 4, 2, 2)
 #endif
diff --git a/iconv/gconv_cache.c b/iconv/gconv_cache.c
index 8f92cbaf67..882acc6fa4 100644
--- a/iconv/gconv_cache.c
+++ b/iconv/gconv_cache.c
@@ -201,7 +201,11 @@ find_module (const char *directory, const char *filename,
       result->__init_fct = result->__shlib_handle->init_fct;
       result->__end_fct = result->__shlib_handle->end_fct;
 
+      /* These settings can be overridden by the init function.  */
+      result->__btowc_fct = NULL;
       result->__data = NULL;
+
+      /* Call the init function.  */
       if (result->__init_fct != NULL)
 	status = DL_CALL_FCT (result->__init_fct, (result));
     }
diff --git a/iconv/gconv_conf.c b/iconv/gconv_conf.c
index cd5055c303..1262e76618 100644
--- a/iconv/gconv_conf.c
+++ b/iconv/gconv_conf.c
@@ -61,8 +61,8 @@ static const char gconv_module_ext[] = MODULE_EXT;
 /* We have a few builtin transformations.  */
 static struct gconv_module builtin_modules[] =
 {
-#define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, MinF, MaxF, \
-			       MinT, MaxT) \
+#define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \
+			       MinF, MaxF, MinT, MaxT) \
   {									      \
     from_string: From,							      \
     to_string: To,							      \
@@ -73,18 +73,21 @@ static struct gconv_module builtin_modules[] =
 #define BUILTIN_ALIAS(From, To)
 
 #include "gconv_builtin.h"
-};
 
 #undef BUILTIN_TRANSFORMATION
 #undef BUILTIN_ALIAS
+};
 
 static const char *builtin_aliases[] =
 {
-#define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, MinF, MaxF, \
-			       MinT, MaxT)
+#define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \
+			       MinF, MaxF, MinT, MaxT)
 #define BUILTIN_ALIAS(From, To) From " " To,
 
 #include "gconv_builtin.h"
+
+#undef BUILTIN_TRANSFORMATION
+#undef BUILTIN_ALIAS
 };
 
 #ifdef USE_IN_LIBIO
diff --git a/iconv/gconv_db.c b/iconv/gconv_db.c
index 70c33df16d..020b556d5e 100644
--- a/iconv/gconv_db.c
+++ b/iconv/gconv_db.c
@@ -268,6 +268,9 @@ gen_steps (struct derivation_step *best, const char *toset,
 	      result[step_cnt].__init_fct = shlib_handle->init_fct;
 	      result[step_cnt].__end_fct = shlib_handle->end_fct;
 
+	      /* These settings can be overridden by the init function.  */
+	      result[step_cnt].__btowc_fct = NULL;
+
 	      /* Call the init function.  */
 	      if (result[step_cnt].__init_fct != NULL)
 		{
@@ -353,8 +356,12 @@ increment_counter (struct __gconv_step *steps, size_t nsteps)
 	      step->__fct = step->__shlib_handle->fct;
 	      step->__init_fct = step->__shlib_handle->init_fct;
 	      step->__end_fct = step->__shlib_handle->end_fct;
+
+	      /* These settings can be overridden by the init function.  */
+	      step->__btowc_fct = NULL;
 	    }
 
+	  /* Call the init function.  */
 	  if (step->__init_fct != NULL)
 	    DL_CALL_FCT (step->__init_fct, (step));
 	}
diff --git a/iconv/gconv_int.h b/iconv/gconv_int.h
index 1ac878d900..dec29d9c14 100644
--- a/iconv/gconv_int.h
+++ b/iconv/gconv_int.h
@@ -264,7 +264,7 @@ extern int __gconv_transliterate (struct __gconv_step *step,
 
 /* Builtin transformations.  */
 #ifdef _LIBC
-# define __BUILTIN_TRANS(Name) \
+# define __BUILTIN_TRANSFORM(Name) \
   extern int Name (struct __gconv_step *step,				      \
 		   struct __gconv_step_data *data,			      \
 		   const unsigned char **inbuf,				      \
@@ -272,21 +272,25 @@ extern int __gconv_transliterate (struct __gconv_step *step,
 		   unsigned char **outbufstart, size_t *irreversible,	      \
 		   int do_flush, int consume_incomplete)
 
-__BUILTIN_TRANS (__gconv_transform_ascii_internal);
-__BUILTIN_TRANS (__gconv_transform_internal_ascii);
-__BUILTIN_TRANS (__gconv_transform_utf8_internal);
-__BUILTIN_TRANS (__gconv_transform_internal_utf8);
-__BUILTIN_TRANS (__gconv_transform_ucs2_internal);
-__BUILTIN_TRANS (__gconv_transform_internal_ucs2);
-__BUILTIN_TRANS (__gconv_transform_ucs2reverse_internal);
-__BUILTIN_TRANS (__gconv_transform_internal_ucs2reverse);
-__BUILTIN_TRANS (__gconv_transform_internal_ucs4);
-__BUILTIN_TRANS (__gconv_transform_ucs4_internal);
-__BUILTIN_TRANS (__gconv_transform_internal_ucs4le);
-__BUILTIN_TRANS (__gconv_transform_ucs4le_internal);
-__BUILTIN_TRANS (__gconv_transform_internal_utf16);
-__BUILTIN_TRANS (__gconv_transform_utf16_internal);
-# undef __BUITLIN_TRANS
+__BUILTIN_TRANSFORM (__gconv_transform_ascii_internal);
+__BUILTIN_TRANSFORM (__gconv_transform_internal_ascii);
+__BUILTIN_TRANSFORM (__gconv_transform_utf8_internal);
+__BUILTIN_TRANSFORM (__gconv_transform_internal_utf8);
+__BUILTIN_TRANSFORM (__gconv_transform_ucs2_internal);
+__BUILTIN_TRANSFORM (__gconv_transform_internal_ucs2);
+__BUILTIN_TRANSFORM (__gconv_transform_ucs2reverse_internal);
+__BUILTIN_TRANSFORM (__gconv_transform_internal_ucs2reverse);
+__BUILTIN_TRANSFORM (__gconv_transform_internal_ucs4);
+__BUILTIN_TRANSFORM (__gconv_transform_ucs4_internal);
+__BUILTIN_TRANSFORM (__gconv_transform_internal_ucs4le);
+__BUILTIN_TRANSFORM (__gconv_transform_ucs4le_internal);
+__BUILTIN_TRANSFORM (__gconv_transform_internal_utf16);
+__BUILTIN_TRANSFORM (__gconv_transform_utf16_internal);
+# undef __BUITLIN_TRANSFORM
+
+/* Specialized conversion function for a single byte to INTERNAL, recognizing
+   only ASCII characters.  */
+extern wint_t __gconv_btwoc_ascii (struct __gconv_step *step, unsigned char c);
 
 #endif
 
diff --git a/iconv/gconv_simple.c b/iconv/gconv_simple.c
index 35346aa498..3937b95c92 100644
--- a/iconv/gconv_simple.c
+++ b/iconv/gconv_simple.c
@@ -28,10 +28,11 @@
 #include <string.h>
 #include <wchar.h>
 #include <sys/param.h>
+#include <gconv_int.h>
 
 #define BUILTIN_ALIAS(s1, s2) /* nothing */
-#define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, MinF, MaxF, \
-			       MinT, MaxT) \
+#define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \
+			       MinF, MaxF, MinT, MaxT) \
   extern int Fct (struct __gconv_step *, struct __gconv_step_data *,	      \
 		  __const unsigned char **, __const unsigned char *,	      \
 		  unsigned char **, size_t *, int, int);
@@ -43,6 +44,18 @@
 #endif
 
 
+/* Specialized conversion function for a single byte to INTERNAL, recognizing
+   only ASCII characters.  */
+wint_t
+__gconv_btwoc_ascii (struct __gconv_step *step, unsigned char c)
+{
+  if (c < 0x80)
+    return c;
+  else
+    return WEOF;
+}
+
+
 /* Transform from the internal, UCS4-like format, to UCS4.  The
    difference between the internal ucs4 format and the real UCS4
    format is, if any, the endianess.  The Unicode/ISO 10646 says that
diff --git a/iconv/iconvconfig.c b/iconv/iconvconfig.c
index 62e3e4e413..a3c0a4b01f 100644
--- a/iconv/iconvconfig.c
+++ b/iconv/iconvconfig.c
@@ -201,8 +201,8 @@ static struct
   {
 #define BUILTIN_ALIAS(alias, real) \
     { .from = alias, .to = real },
-#define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, MinF, MaxF, \
-			       MinT, MaxT)
+#define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \
+			       MinF, MaxF, MinT, MaxT)
 #include <gconv_builtin.h>
   };
 #undef BUILTIN_ALIAS
@@ -218,11 +218,13 @@ static struct
 } builtin_trans[] =
   {
 #define BUILTIN_ALIAS(alias, real)
-#define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, MinF, MaxF, \
-			       MinT, MaxT) \
+#define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \
+			       MinF, MaxF, MinT, MaxT) \
     { .from = From, .to = To, .module = Name, .cost = Cost },
 #include <gconv_builtin.h>
   };
+#undef BUILTIN_ALIAS
+#undef BUILTIN_TRANSFORMATION
 #define nbuiltin_trans (sizeof (builtin_trans) / sizeof (builtin_trans[0]))
 
 
diff --git a/iconv/loop.c b/iconv/loop.c
index deb0173930..b0b76f32e8 100644
--- a/iconv/loop.c
+++ b/iconv/loop.c
@@ -43,6 +43,9 @@
 
      INIT_PARAMS	code to define and initialize variables from params.
      UPDATE_PARAMS	code to store result in params.
+
+     ONEBYTE_BODY	body of the specialized conversion function for a
+			single byte from the current character set to INTERNAL.
 */
 
 #include <assert.h>
@@ -453,6 +456,15 @@ SINGLE(LOOPFCT) (struct __gconv_step *step,
 #endif
 
 
+#ifdef ONEBYTE_BODY
+/* Define the shortcut function for btowc.  */
+static wint_t
+gconv_btowc (struct __gconv_step *step, unsigned char c)
+  ONEBYTE_BODY
+# define FROM_ONEBYTE gconv_btowc
+#endif
+
+
 /* We remove the macro definitions so that we can include this file again
    for the definition of another function.  */
 #undef MIN_NEEDED_INPUT
@@ -465,6 +477,7 @@ SINGLE(LOOPFCT) (struct __gconv_step *step,
 #undef EXTRA_LOOP_DECLS
 #undef INIT_PARAMS
 #undef UPDATE_PARAMS
+#undef ONEBYTE_BODY
 #undef UNPACK_BYTES
 #undef LOOP_NEED_STATE
 #undef LOOP_NEED_FLAGS
diff --git a/iconv/skeleton.c b/iconv/skeleton.c
index edcd92eb87..579426c7e5 100644
--- a/iconv/skeleton.c
+++ b/iconv/skeleton.c
@@ -101,6 +101,26 @@
      EXTRA_LOOP_ARGS	optional macro specifying extra arguments passed
 			to loop function.
 
+     STORE_REST		optional, needed only when MAX_NEEDED_FROM > 4.
+			This macro stores the seen but unconverted input bytes
+			in the state.
+
+     FROM_ONEBYTE	optional.  If defined, should be the name of a
+			specialized conversion function for a single byte
+			from the current character set to INTERNAL.  This
+			function has prototype
+			   wint_t
+			   FROM_ONEBYTE (struct __gconv_step *, unsigned char);
+			and does a special conversion:
+			- The input is a single byte.
+			- The output is a single uint32_t.
+			- The state before the conversion is the initial state;
+			  the state after the conversion is irrelevant.
+			- No transliteration.
+			- __invocation_counter = 0.
+			- __internal_use = 1.
+			- do_flush = 0.
+
    Modules can use mbstate_t to store conversion state as follows:
 
    * Bits 2..0 of '__count' contain the number of lookahead input bytes
@@ -315,6 +335,10 @@ gconv_init (struct __gconv_step *step)
       step->__max_needed_from = FROM_LOOP_MAX_NEEDED_FROM;
       step->__min_needed_to = FROM_LOOP_MIN_NEEDED_TO;
       step->__max_needed_to = FROM_LOOP_MAX_NEEDED_TO;
+
+#ifdef FROM_ONEBYTE
+      step->__btowc_fct = FROM_ONEBYTE;
+#endif
     }
   else if (__builtin_expect (strcmp (step->__to_name, CHARSET_NAME), 0) == 0)
     {
@@ -796,10 +820,12 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data,
 #undef EMIT_SHIFT_TO_INIT
 #undef FROM_LOOP
 #undef TO_LOOP
+#undef ONE_DIRECTION
 #undef SAVE_RESET_STATE
 #undef RESET_INPUT_BUFFER
 #undef FUNCTION_NAME
 #undef PREPARE_LOOP
 #undef END_LOOP
-#undef ONE_DIRECTION
+#undef EXTRA_LOOP_ARGS
 #undef STORE_REST
+#undef FROM_ONEBYTE