about summary refs log tree commit diff
path: root/iconv
diff options
context:
space:
mode:
Diffstat (limited to 'iconv')
-rw-r--r--iconv/skeleton.c234
1 files changed, 169 insertions, 65 deletions
diff --git a/iconv/skeleton.c b/iconv/skeleton.c
index 9a609c972e..edcd92eb87 100644
--- a/iconv/skeleton.c
+++ b/iconv/skeleton.c
@@ -37,6 +37,27 @@
 			This macro is optional, it defaults to MIN_NEEDED_FROM.
      MAX_NEEDED_TO	likewise for the to-charset.
 
+     FROM_LOOP_MIN_NEEDED_FROM
+     FROM_LOOP_MAX_NEEDED_FROM
+                        minimal/maximal number of bytes needed on input
+                        of one round through the FROM_LOOP.  Defaults
+                        to MIN_NEEDED_FROM and MAX_NEEDED_FROM, respectively.
+     FROM_LOOP_MIN_NEEDED_TO
+     FROM_LOOP_MAX_NEEDED_TO
+                        minimal/maximal number of bytes needed on output
+                        of one round through the FROM_LOOP.  Defaults
+                        to MIN_NEEDED_TO and MAX_NEEDED_TO, respectively.
+     TO_LOOP_MIN_NEEDED_FROM
+     TO_LOOP_MAX_NEEDED_FROM
+                        minimal/maximal number of bytes needed on input
+                        of one round through the TO_LOOP.  Defaults
+                        to MIN_NEEDED_TO and MAX_NEEDED_TO, respectively.
+     TO_LOOP_MIN_NEEDED_TO
+     TO_LOOP_MAX_NEEDED_TO
+                        minimal/maximal number of bytes needed on output
+                        of one round through the TO_LOOP.  Defaults
+                        to MIN_NEEDED_FROM and MAX_NEEDED_FROM, respectively.
+
      DEFINE_DIRECTION_OBJECTS
 			two objects will be defined to be used when the
 			`gconv' function must only distinguish two
@@ -50,11 +71,11 @@
 
      EMIT_SHIFT_TO_INIT	this symbol is optional.  If it is defined it
 			defines some code which writes out a sequence
-			of characters which bring the current state into
+			of bytes which bring the current state into
 			the initial state.
 
      FROM_LOOP		name of the function implementing the conversion
-			from the current characters.
+			from the current character set.
      TO_LOOP		likewise for the other direction
 
      ONE_DIRECTION	optional.  If defined to 1, only one conversion
@@ -79,6 +100,24 @@
 
      EXTRA_LOOP_ARGS	optional macro specifying extra arguments passed
 			to loop function.
+
+   Modules can use mbstate_t to store conversion state as follows:
+
+   * Bits 2..0 of '__count' contain the number of lookahead input bytes
+     stored in __value.__wchb.  Always zero if the converter never
+     returns __GCONV_INCOMPLETE_INPUT.
+
+   * Bits 31..3 of '__count' are module dependent shift state.
+
+   * __value: When STORE_REST/UNPACK_BYTES aren't defined and when the
+     converter has returned __GCONV_INCOMPLETE_INPUT, this contains
+     at most 4 lookahead bytes. Converters with an mb_cur_max > 4
+     (currently only UTF-8) must find a way to store their state
+     in __value.__wch and define STORE_REST/UNPACK_BYTES appropriately.
+
+   When __value contains lookahead, __count must not be zero, because
+   the converter is not in the initial state then, and mbsinit() --
+   defined as a (__count == 0) test -- must reflect this.
  */
 
 #include <assert.h>
@@ -121,6 +160,32 @@ static int to_object;
 # define MAX_NEEDED_TO		MIN_NEEDED_TO
 #endif
 
+/* Defaults for the per-direction min/max constants.  */
+#ifndef FROM_LOOP_MIN_NEEDED_FROM
+# define FROM_LOOP_MIN_NEEDED_FROM	MIN_NEEDED_FROM
+#endif
+#ifndef FROM_LOOP_MAX_NEEDED_FROM
+# define FROM_LOOP_MAX_NEEDED_FROM	MAX_NEEDED_FROM
+#endif
+#ifndef FROM_LOOP_MIN_NEEDED_TO
+# define FROM_LOOP_MIN_NEEDED_TO	MIN_NEEDED_TO
+#endif
+#ifndef FROM_LOOP_MAX_NEEDED_TO
+# define FROM_LOOP_MAX_NEEDED_TO	MAX_NEEDED_TO
+#endif
+#ifndef TO_LOOP_MIN_NEEDED_FROM
+# define TO_LOOP_MIN_NEEDED_FROM	MIN_NEEDED_TO
+#endif
+#ifndef TO_LOOP_MAX_NEEDED_FROM
+# define TO_LOOP_MAX_NEEDED_FROM	MAX_NEEDED_TO
+#endif
+#ifndef TO_LOOP_MIN_NEEDED_TO
+# define TO_LOOP_MIN_NEEDED_TO		MIN_NEEDED_FROM
+#endif
+#ifndef TO_LOOP_MAX_NEEDED_TO
+# define TO_LOOP_MAX_NEEDED_TO		MAX_NEEDED_FROM
+#endif
+
 
 /* Define macros which can access unaligned buffers.  These macros are
    supposed to be used only in code outside the inner loops.  For the inner
@@ -192,21 +257,40 @@ static int to_object;
 /* For conversions from a fixed width character set to another fixed width
    character set we can define RESET_INPUT_BUFFER in a very fast way.  */
 #if !defined RESET_INPUT_BUFFER && !defined SAVE_RESET_STATE
-# if MIN_NEEDED_FROM == MAX_NEEDED_FROM && MIN_NEEDED_TO == MAX_NEEDED_TO
-/* We have to use these `#if's here since the compiler cannot know that
-   (outbuf - outerr) is always divisible by MIN_NEEDED_TO.  We have to
-   use preprocessor arithmetic and no C code because gcc 3.2 complains
-   about division by zero even in obviously dead code.  */
-#  if MIN_NEEDED_FROM % MIN_NEEDED_TO == 0
-#   define RESET_INPUT_BUFFER \
-  *inptrp -= (outbuf - outerr) * (MIN_NEEDED_FROM / MIN_NEEDED_TO)
-#  elif MIN_NEEDED_TO % MIN_NEEDED_FROM == 0
-#   define RESET_INPUT_BUFFER \
-  *inptrp -= (outbuf - outerr) / (MIN_NEEDED_TO / MIN_NEEDED_FROM)
-#  else
-#   define RESET_INPUT_BUFFER \
-  *inptrp -= ((outbuf - outerr) / MIN_NEEDED_TO) * MIN_NEEDED_FROM
-#  endif
+# if FROM_LOOP_MIN_NEEDED_FROM == FROM_LOOP_MAX_NEEDED_FROM \
+     && FROM_LOOP_MIN_NEEDED_TO == FROM_LOOP_MAX_NEEDED_TO \
+     && TO_LOOP_MIN_NEEDED_FROM == TO_LOOP_MAX_NEEDED_FROM \
+     && TO_LOOP_MIN_NEEDED_TO == TO_LOOP_MAX_NEEDED_TO
+/* We have to use these `if's here since the compiler cannot know that
+   (outbuf - outerr) is always divisible by FROM/TO_LOOP_MIN_NEEDED_TO.
+   The ?:1 avoids division by zero warnings that gcc 3.2 emits even for
+   obviously unreachable code.  */
+#  define RESET_INPUT_BUFFER \
+  if (FROM_DIRECTION)							      \
+    {									      \
+      if (FROM_LOOP_MIN_NEEDED_FROM % FROM_LOOP_MIN_NEEDED_TO == 0)	      \
+	*inptrp -= (outbuf - outerr)					      \
+		   * (FROM_LOOP_MIN_NEEDED_FROM / FROM_LOOP_MIN_NEEDED_TO);   \
+      else if (FROM_LOOP_MIN_NEEDED_TO % FROM_LOOP_MIN_NEEDED_FROM == 0)      \
+	*inptrp -= (outbuf - outerr)					      \
+		   / (FROM_LOOP_MIN_NEEDED_TO / FROM_LOOP_MIN_NEEDED_FROM     \
+		      ? : 1);						      \
+      else								      \
+	*inptrp -= ((outbuf - outerr) / FROM_LOOP_MIN_NEEDED_TO)	      \
+		   * FROM_LOOP_MIN_NEEDED_FROM;				      \
+    }									      \
+  else									      \
+    {									      \
+      if (TO_LOOP_MIN_NEEDED_FROM % TO_LOOP_MIN_NEEDED_TO == 0)		      \
+	*inptrp -= (outbuf - outerr)					      \
+		   * (TO_LOOP_MIN_NEEDED_FROM / TO_LOOP_MIN_NEEDED_TO);	      \
+      else if (TO_LOOP_MIN_NEEDED_TO % TO_LOOP_MIN_NEEDED_FROM == 0)	      \
+	*inptrp -= (outbuf - outerr)					      \
+		   / (TO_LOOP_MIN_NEEDED_TO / TO_LOOP_MIN_NEEDED_FROM ? : 1); \
+      else								      \
+	*inptrp -= ((outbuf - outerr) / TO_LOOP_MIN_NEEDED_TO)		      \
+		   * TO_LOOP_MIN_NEEDED_FROM;				      \
+    }
 # endif
 #endif
 
@@ -227,19 +311,19 @@ gconv_init (struct __gconv_step *step)
     {
       step->__data = &from_object;
 
-      step->__min_needed_from = MIN_NEEDED_FROM;
-      step->__max_needed_from = MAX_NEEDED_FROM;
-      step->__min_needed_to = MIN_NEEDED_TO;
-      step->__max_needed_to = MAX_NEEDED_TO;
+      step->__min_needed_from = FROM_LOOP_MIN_NEEDED_FROM;
+      step->__max_needed_from = FROM_LOOP_MAX_NEEDED_FROM;
+      step->__min_needed_to = FROM_LOOP_MIN_NEEDED_TO;
+      step->__max_needed_to = FROM_LOOP_MAX_NEEDED_TO;
     }
   else if (__builtin_expect (strcmp (step->__to_name, CHARSET_NAME), 0) == 0)
     {
       step->__data = &to_object;
 
-      step->__min_needed_from = MIN_NEEDED_TO;
-      step->__max_needed_from = MAX_NEEDED_TO;
-      step->__min_needed_to = MIN_NEEDED_FROM;
-      step->__max_needed_to = MAX_NEEDED_FROM;
+      step->__min_needed_from = TO_LOOP_MIN_NEEDED_FROM;
+      step->__max_needed_from = TO_LOOP_MAX_NEEDED_FROM;
+      step->__min_needed_to = TO_LOOP_MIN_NEEDED_TO;
+      step->__max_needed_to = TO_LOOP_MAX_NEEDED_TO;
     }
   else
     return __GCONV_NOCONV;
@@ -403,27 +487,45 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data,
 	 actually converted.  */
       size_t lirreversible = 0;
       size_t *lirreversiblep = irreversible ? &lirreversible : NULL;
-#if defined _STRING_ARCH_unaligned \
-    || MIN_NEEDED_FROM == 1 || MAX_NEEDED_FROM % MIN_NEEDED_FROM != 0 \
-    || MIN_NEEDED_TO == 1 || MAX_NEEDED_TO % MIN_NEEDED_TO != 0
-# define unaligned 0
-#else
+
+      /* The following assumes that encodings, which have a variable length
+	 what might unalign a buffer even though it is a aligned in the
+	 beginning, either don't have the minimal number of bytes as a divisor
+	 of the maximum length or have a minimum length of 1.  This is true
+	 for all known and supported encodings.
+	 We use && instead of || to combine the subexpression for the FROM
+	 encoding and for the TO encoding, because usually one of them is
+	 INTERNAL, for which the subexpression evaluates to 1, but INTERNAL
+	 buffers are always aligned correctly.  */
+#define POSSIBLY_UNALIGNED \
+  (!defined _STRING_ARCH_unaligned					      \
+   && (((FROM_LOOP_MIN_NEEDED_FROM != 1					      \
+	 && FROM_LOOP_MAX_NEEDED_FROM % FROM_LOOP_MIN_NEEDED_FROM == 0)	      \
+	&& (FROM_LOOP_MIN_NEEDED_TO != 1				      \
+	    && FROM_LOOP_MAX_NEEDED_TO % FROM_LOOP_MIN_NEEDED_TO == 0))	      \
+       || ((TO_LOOP_MIN_NEEDED_FROM != 1				      \
+	    && TO_LOOP_MAX_NEEDED_FROM % TO_LOOP_MIN_NEEDED_FROM == 0)	      \
+	   && (TO_LOOP_MIN_NEEDED_TO != 1				      \
+	       && TO_LOOP_MAX_NEEDED_TO % TO_LOOP_MIN_NEEDED_TO == 0))))
+#if POSSIBLY_UNALIGNED
       int unaligned;
 # define GEN_unaligned(name) GEN_unaligned2 (name)
 # define GEN_unaligned2(name) name##_unaligned
+#else
+# define unaligned 0
 #endif
 
 #ifdef PREPARE_LOOP
       PREPARE_LOOP
 #endif
 
-#if MAX_NEEDED_FROM > 1 || MAX_NEEDED_TO > 1
+#if FROM_LOOP_MAX_NEEDED_FROM > 1 || TO_LOOP_MAX_NEEDED_FROM > 1
       /* If the function is used to implement the mb*towc*() or wc*tomb*()
 	 functions we must test whether any bytes from the last call are
 	 stored in the `state' object.  */
-      if (((MAX_NEEDED_FROM > 1 && MAX_NEEDED_TO > 1)
-	   || (MAX_NEEDED_FROM > 1 && FROM_DIRECTION)
-	   || (MAX_NEEDED_TO > 1 && !FROM_DIRECTION))
+      if (((FROM_LOOP_MAX_NEEDED_FROM > 1 && TO_LOOP_MAX_NEEDED_FROM > 1)
+	   || (FROM_LOOP_MAX_NEEDED_FROM > 1 && FROM_DIRECTION)
+	   || (TO_LOOP_MAX_NEEDED_FROM > 1 && !FROM_DIRECTION))
 	  && consume_incomplete && (data->__statep->__count & 7) != 0)
 	{
 	  /* Yep, we have some bytes left over.  Process them now.
@@ -431,18 +533,20 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data,
              error handler.  */
 	  assert (outbufstart == NULL);
 
-# if MAX_NEEDED_FROM > 1
-	  if (MAX_NEEDED_TO == 1 || FROM_DIRECTION)
+# if FROM_LOOP_MAX_NEEDED_FROM > 1
+	  if (TO_LOOP_MAX_NEEDED_FROM == 1 || FROM_DIRECTION)
 	    status = SINGLE(FROM_LOOP) (step, data, inptrp, inend, &outbuf,
 					outend, lirreversiblep
 					EXTRA_LOOP_ARGS);
 # endif
-# if MAX_NEEDED_FROM > 1 && MAX_NEEDED_TO > 1 && !ONE_DIRECTION
+# if !ONE_DIRECTION
+#  if FROM_LOOP_MAX_NEEDED_FROM > 1 && TO_LOOP_MAX_NEEDED_FROM > 1
 	  else
-# endif
-# if MAX_NEEDED_TO > 1 && !ONE_DIRECTION
+#  endif
+#  if TO_LOOP_MAX_NEEDED_FROM > 1
 	    status = SINGLE(TO_LOOP) (step, data, inptrp, inend, &outbuf,
 				      outend, lirreversiblep EXTRA_LOOP_ARGS);
+#  endif
 # endif
 
 	  if (__builtin_expect (status, __GCONV_OK) != __GCONV_OK)
@@ -450,22 +554,16 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data,
 	}
 #endif
 
-#if !defined _STRING_ARCH_unaligned \
-    && MIN_NEEDED_FROM != 1 && MAX_NEEDED_FROM % MIN_NEEDED_FROM == 0 \
-    && MIN_NEEDED_TO != 1 && MAX_NEEDED_TO % MIN_NEEDED_TO == 0
-      /* The following assumes that encodings, which have a variable length
-	 what might unalign a buffer even though it is a aligned in the
-	 beginning, either don't have the minimal number of bytes as a divisor
-	 of the maximum length or have a minimum length of 1.  This is true
-	 for all known and supported encodings.  */
-      unaligned = ((FROM_DIRECTION
-		    && ((uintptr_t) inptr % MIN_NEEDED_FROM != 0
-			|| ((data->__flags & __GCONV_IS_LAST)
-			    && (uintptr_t) outbuf % MIN_NEEDED_TO != 0)))
-		   || (!FROM_DIRECTION
-		       && (((data->__flags & __GCONV_IS_LAST)
-			    && (uintptr_t) outbuf % MIN_NEEDED_FROM != 0)
-			   || (uintptr_t) inptr % MIN_NEEDED_TO != 0)));
+#if POSSIBLY_UNALIGNED
+      unaligned =
+	((FROM_DIRECTION
+	  && ((uintptr_t) inptr % FROM_LOOP_MIN_NEEDED_FROM != 0
+	      || ((data->__flags & __GCONV_IS_LAST)
+		  && (uintptr_t) outbuf % FROM_LOOP_MIN_NEEDED_TO != 0)))
+	 || (!FROM_DIRECTION
+	     && (((data->__flags & __GCONV_IS_LAST)
+		  && (uintptr_t) outbuf % TO_LOOP_MIN_NEEDED_TO != 0)
+		 || (uintptr_t) inptr % TO_LOOP_MIN_NEEDED_FROM != 0)));
 #endif
 
       while (1)
@@ -492,9 +590,7 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data,
 		status = TO_LOOP (step, data, inptrp, inend, &outbuf, outend,
 				  lirreversiblep EXTRA_LOOP_ARGS);
 	    }
-#if !defined _STRING_ARCH_unaligned \
-    && MIN_NEEDED_FROM != 1 && MAX_NEEDED_FROM % MIN_NEEDED_FROM == 0 \
-    && MIN_NEEDED_TO != 1 && MAX_NEEDED_TO % MIN_NEEDED_TO == 0
+#if POSSIBLY_UNALIGNED
 	  else
 	    {
 	      if (FROM_DIRECTION)
@@ -589,9 +685,7 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data,
 					       lirreversiblep
 					       EXTRA_LOOP_ARGS);
 			}
-# if !defined _STRING_ARCH_unaligned \
-     && MIN_NEEDED_FROM != 1 && MAX_NEEDED_FROM % MIN_NEEDED_FROM == 0 \
-     && MIN_NEEDED_TO != 1 && MAX_NEEDED_TO % MIN_NEEDED_TO == 0
+# if POSSIBLY_UNALIGNED
 		      else
 			{
 			  if (FROM_DIRECTION)
@@ -650,10 +744,10 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data,
 
       /* If we are supposed to consume all character store now all of the
 	 remaining characters in the `state' object.  */
-#if MAX_NEEDED_FROM > 1 || MAX_NEEDED_TO > 1
-      if (((MAX_NEEDED_FROM > 1 && MAX_NEEDED_TO > 1)
-	   || (MAX_NEEDED_FROM > 1 && FROM_DIRECTION)
-	   || (MAX_NEEDED_TO > 1 && !FROM_DIRECTION))
+#if FROM_LOOP_MAX_NEEDED_FROM > 1 || TO_LOOP_MAX_NEEDED_FROM > 1
+      if (((FROM_LOOP_MAX_NEEDED_FROM > 1 && TO_LOOP_MAX_NEEDED_FROM > 1)
+	   || (FROM_LOOP_MAX_NEEDED_FROM > 1 && FROM_DIRECTION)
+	   || (TO_LOOP_MAX_NEEDED_FROM > 1 && !FROM_DIRECTION))
 	  && __builtin_expect (consume_incomplete, 0)
 	  && status == __GCONV_INCOMPLETE_INPUT)
 	{
@@ -675,6 +769,8 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data,
 # endif
 	}
 #endif
+#undef unaligned
+#undef POSSIBLY_UNALIGNED
     }
 
   return status;
@@ -687,6 +783,14 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data,
 #undef MIN_NEEDED_TO
 #undef MAX_NEEDED_FROM
 #undef MAX_NEEDED_TO
+#undef FROM_LOOP_MIN_NEEDED_FROM
+#undef FROM_LOOP_MAX_NEEDED_FROM
+#undef FROM_LOOP_MIN_NEEDED_TO
+#undef FROM_LOOP_MAX_NEEDED_TO
+#undef TO_LOOP_MIN_NEEDED_FROM
+#undef TO_LOOP_MAX_NEEDED_FROM
+#undef TO_LOOP_MIN_NEEDED_TO
+#undef TO_LOOP_MAX_NEEDED_TO
 #undef DEFINE_DIRECTION_OBJECTS
 #undef FROM_DIRECTION
 #undef EMIT_SHIFT_TO_INIT