about summary refs log tree commit diff
path: root/iconv
diff options
context:
space:
mode:
Diffstat (limited to 'iconv')
-rw-r--r--iconv/gconv_simple.c76
-rw-r--r--iconv/loop.c33
2 files changed, 44 insertions, 65 deletions
diff --git a/iconv/gconv_simple.c b/iconv/gconv_simple.c
index b4dcf93c32..fbdac832e0 100644
--- a/iconv/gconv_simple.c
+++ b/iconv/gconv_simple.c
@@ -770,15 +770,7 @@ ucs4le_internal_loop_single (struct __gconv_step *step,
 	   this is not an error because of the lack of possibilities to	      \
 	   represent the result.  This is a genuine bug in the input since    \
 	   ASCII does not allow such values.  */			      \
-	if (! ignore_errors_p ())					      \
-	  {								      \
-	    /* This is no correct ANSI_X3.4-1968 character.  */		      \
-	    result = __GCONV_ILLEGAL_INPUT;				      \
-	    break;							      \
-	  }								      \
-									      \
-	++*irreversible;						      \
-	++inptr;							      \
+	STANDARD_FROM_LOOP_ERR_HANDLER (1);				      \
       }									      \
     else								      \
       /* It's an one byte sequence.  */					      \
@@ -808,7 +800,7 @@ ucs4le_internal_loop_single (struct __gconv_step *step,
     if (__builtin_expect (*((const uint32_t *) inptr) > 0x7f, 0))	      \
       {									      \
 	UNICODE_TAG_HANDLER (*((const uint32_t *) inptr), 4);		      \
-	STANDARD_ERR_HANDLER (4);					      \
+	STANDARD_TO_LOOP_ERR_HANDLER (4);				      \
       }									      \
     else								      \
       /* It's an one byte sequence.  */					      \
@@ -872,7 +864,7 @@ ucs4le_internal_loop_single (struct __gconv_step *step,
       }									      \
     else								      \
       {									      \
-	STANDARD_ERR_HANDLER (4);					      \
+	STANDARD_TO_LOOP_ERR_HANDLER (4);				      \
       }									      \
 									      \
     inptr += 4;								      \
@@ -951,24 +943,16 @@ ucs4le_internal_loop_single (struct __gconv_step *step,
 	  {								      \
 	    int skipped;						      \
 									      \
-	    if (! ignore_errors_p ())					      \
-	      {								      \
-		/* This is an illegal encoding.  */			      \
-		result = __GCONV_ILLEGAL_INPUT;				      \
-		break;							      \
-	      }								      \
-									      \
 	    /* Search the end of this ill-formed UTF-8 character.  This	      \
 	       is the next byte with (x & 0xc0) != 0x80.  */		      \
-	     skipped = 0;						      \
-	     do								      \
-	       {							      \
-		 ++inptr;						      \
-		 ++skipped;						      \
-	       }							      \
-	     while (inptr < inend && (*inptr & 0xc0) == 0x80 && skipped < 5); \
+	    skipped = 0;						      \
+	    do								      \
+	      ++skipped;						      \
+	    while (inptr + skipped < inend				      \
+		   && (*(inptr + skipped) & 0xc0) == 0x80		      \
+		   && skipped < 5);					      \
 									      \
-	     continue;							      \
+	    STANDARD_FROM_LOOP_ERR_HANDLER (skipped);			      \
 	  }								      \
 									      \
 	if (__builtin_expect (inptr + cnt > inend, 0))			      \
@@ -985,16 +969,7 @@ ucs4le_internal_loop_single (struct __gconv_step *step,
 		break;							      \
 	      }								      \
 									      \
-	    if (ignore_errors_p ())					      \
-	      {								      \
-		/* Ignore it.  */					      \
-		inptr += i;						      \
-		++*irreversible;					      \
-		continue;						      \
-	      }								      \
-									      \
-	    result = __GCONV_ILLEGAL_INPUT;				      \
-	    break;							      \
+	    STANDARD_FROM_LOOP_ERR_HANDLER (i);				      \
 	  }								      \
 									      \
 	/* Read the possible remaining bytes.  */			      \
@@ -1016,15 +991,7 @@ ucs4le_internal_loop_single (struct __gconv_step *step,
 	if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0))		      \
 	  {								      \
 	    /* This is an illegal encoding.  */				      \
-	    if (ignore_errors_p ())					      \
-	      {								      \
-		inptr += i;						      \
-		++*irreversible;					      \
-		continue;						      \
-	      }								      \
-									      \
-	    result = __GCONV_ILLEGAL_INPUT;				      \
-	    break;							      \
+	    STANDARD_FROM_LOOP_ERR_HANDLER (i);				      \
 	  }								      \
 									      \
 	inptr += cnt;							      \
@@ -1164,14 +1131,7 @@ ucs4le_internal_loop_single (struct __gconv_step *step,
       {									      \
 	/* Surrogate characters in UCS-2 input are not valid.  Reject	      \
 	   them.  (Catching this here is not security relevant.)  */	      \
-	if (! ignore_errors_p ())					      \
-	  {								      \
-	    result = __GCONV_ILLEGAL_INPUT;				      \
-	    break;							      \
-	  }								      \
-	inptr += 2;							      \
-	++*irreversible;						      \
-	continue;							      \
+	STANDARD_FROM_LOOP_ERR_HANDLER (2);				      \
       }									      \
 									      \
     *((uint32_t *) outptr)++ = u1;					      \
@@ -1203,7 +1163,7 @@ ucs4le_internal_loop_single (struct __gconv_step *step,
     if (__builtin_expect (val >= 0x10000, 0))				      \
       {									      \
 	UNICODE_TAG_HANDLER (val, 4);					      \
-	STANDARD_ERR_HANDLER (4);					      \
+	STANDARD_TO_LOOP_ERR_HANDLER (4);				      \
       }									      \
     else if (__builtin_expect (val >= 0xd800 && val < 0xe000, 0))	      \
       {									      \
@@ -1213,11 +1173,9 @@ ucs4le_internal_loop_single (struct __gconv_step *step,
 	   surrogates pass through, attackers could make a security	      \
 	   hole exploit by synthesizing any desired plane 1-16		      \
 	   character.  */						      \
+	result = __GCONV_ILLEGAL_INPUT;					      \
 	if (! ignore_errors_p ())					      \
-	  {								      \
-	    result = __GCONV_ILLEGAL_INPUT;				      \
-	    break;							      \
-	  }								      \
+	  break;							      \
 	inptr += 4;							      \
 	++*irreversible;						      \
 	continue;							      \
@@ -1293,7 +1251,7 @@ ucs4le_internal_loop_single (struct __gconv_step *step,
     if (__builtin_expect (val >= 0x10000, 0))				      \
       {									      \
 	UNICODE_TAG_HANDLER (val, 4);					      \
-	STANDARD_ERR_HANDLER (4);					      \
+	STANDARD_TO_LOOP_ERR_HANDLER (4);				      \
       }									      \
     else if (__builtin_expect (val >= 0xd800 && val < 0xe000, 0))	      \
       {									      \
diff --git a/iconv/loop.c b/iconv/loop.c
index 0b1bce75c0..2fb73da7ea 100644
--- a/iconv/loop.c
+++ b/iconv/loop.c
@@ -178,10 +178,29 @@
   (irreversible != NULL && (flags & __GCONV_IGNORE_ERRORS))
 
 
-/* Error handling with transliteration/transcription function use and
-   ignoring of errors.  Note that we cannot use the do while (0) trick
-   since `break' and `continue' must reach certain points.  */
-#define STANDARD_ERR_HANDLER(Incr) \
+/* Error handling for the FROM_LOOP direction, with ignoring of errors.
+   Note that we cannot use the do while (0) trick since `break' and
+   `continue' must reach certain points.  */
+#define STANDARD_FROM_LOOP_ERR_HANDLER(Incr) \
+  {									      \
+    result = __GCONV_ILLEGAL_INPUT;					      \
+									      \
+    if (! ignore_errors_p ())						      \
+      break;								      \
+									      \
+    /* We ignore the invalid input byte sequence.  */			      \
+    inptr += (Incr);							      \
+    ++*irreversible;							      \
+    /* But we keep result == __GCONV_ILLEGAL_INPUT, because of the constraint \
+       that "iconv -c" must give the same exitcode as "iconv".  */	      \
+    continue;								      \
+  }
+
+/* Error handling for the TO_LOOP direction, with use of transliteration/
+   transcription functions and ignoring of errors.  Note that we cannot use
+   the do while (0) trick since `break' and `continue' must reach certain
+   points.  */
+#define STANDARD_TO_LOOP_ERR_HANDLER(Incr) \
   {									      \
     struct __gconv_trans_data *trans;					      \
 									      \
@@ -212,14 +231,16 @@
     /* When we come here it means we ignore the character.  */		      \
     ++*irreversible;							      \
     inptr += Incr;							      \
+    /* But we keep result == __GCONV_ILLEGAL_INPUT, because of the constraint \
+       that "iconv -c" must give the same exitcode as "iconv".  */	      \
     continue;								      \
   }
 
 
 /* Handling of Unicode 3.1 TAG characters.  Unicode recommends
    "If language codes are not relevant to the particular processing
-    operation, then they should be ignored."
-   This macro is usually called right before STANDARD_ERR_HANDLER (Incr).  */
+    operation, then they should be ignored."  This macro is usually
+   called right before  STANDARD_TO_LOOP_ERR_HANDLER (Incr).  */
 #define UNICODE_TAG_HANDLER(Character, Incr) \
   {									      \
     /* TAG characters are those in the range U+E0000..U+E007F.  */	      \