summary refs log tree commit diff
path: root/iconv/gconv_simple.c
diff options
context:
space:
mode:
Diffstat (limited to 'iconv/gconv_simple.c')
-rw-r--r--iconv/gconv_simple.c244
1 files changed, 183 insertions, 61 deletions
diff --git a/iconv/gconv_simple.c b/iconv/gconv_simple.c
index 4010a6b326..7189ad3787 100644
--- a/iconv/gconv_simple.c
+++ b/iconv/gconv_simple.c
@@ -64,7 +64,7 @@ static const unsigned char encoding_byte[] =
 static inline int
 internal_ucs4_loop (const unsigned char **inptrp, const unsigned char *inend,
 		    unsigned char **outptrp, unsigned char *outend,
-		    mbstate_t *state, void *data, size_t *converted)
+		    mbstate_t *state, int flags, void *data, size_t *converted)
 {
   const unsigned char *inptr = *inptrp;
   unsigned char *outptr = *outptrp;
@@ -104,7 +104,8 @@ static inline int
 internal_ucs4_loop_unaligned (const unsigned char **inptrp,
 			      const unsigned char *inend,
 			      unsigned char **outptrp, unsigned char *outend,
-			      mbstate_t *state, void *data, size_t *converted)
+			      mbstate_t *state, int flags, void *data,
+			      size_t *converted)
 {
   const unsigned char *inptr = *inptrp;
   unsigned char *outptr = *outptrp;
@@ -150,7 +151,8 @@ static inline int
 internal_ucs4_loop_single (const unsigned char **inptrp,
 			   const unsigned char *inend,
 			   unsigned char **outptrp, unsigned char *outend,
-			   mbstate_t *state, void *data, size_t *converted)
+			   mbstate_t *state, int flags, void *data,
+			   size_t *converted)
 {
   size_t cnt = state->__count & 7;
 
@@ -171,6 +173,8 @@ internal_ucs4_loop_single (const unsigned char **inptrp,
   (*outptrp)[1] = state->__value.__wchb[2];
   (*outptrp)[2] = state->__value.__wchb[1];
   (*outptrp)[3] = state->__value.__wchb[0];
+
+  *outptrp += 4;
 #elif __BYTE_ORDER == __BIG_ENDIAN
   /* XXX unaligned */
   *(*((uint32_t **) outptrp)++) = state->__value.__wch;
@@ -202,7 +206,7 @@ internal_ucs4_loop_single (const unsigned char **inptrp,
 static inline int
 ucs4_internal_loop (const unsigned char **inptrp, const unsigned char *inend,
 		    unsigned char **outptrp, unsigned char *outend,
-		    mbstate_t *state, void *data, size_t *converted)
+		    mbstate_t *state, int flags, void *data, size_t *converted)
 {
   const unsigned char *inptr = *inptrp;
   unsigned char *outptr = *outptrp;
@@ -222,6 +226,13 @@ ucs4_internal_loop (const unsigned char **inptrp, const unsigned char *inend,
 
       if (inval > 0x7fffffff)
 	{
+	  if (flags & __GCONV_IGNORE_ERRORS)
+	    {
+	      /* Just ignore this character.  */
+	      ++*converted;
+	      continue;
+	    }
+
 	  *inptrp = inptr;
 	  *outptrp = outptr;
 	  return __GCONV_ILLEGAL_INPUT;
@@ -249,7 +260,8 @@ static inline int
 ucs4_internal_loop_unaligned (const unsigned char **inptrp,
 			      const unsigned char *inend,
 			      unsigned char **outptrp, unsigned char *outend,
-			      mbstate_t *state, void *data, size_t *converted)
+			      mbstate_t *state, int flags, void *data,
+			      size_t *converted)
 {
   const unsigned char *inptr = *inptrp;
   unsigned char *outptr = *outptrp;
@@ -262,6 +274,13 @@ ucs4_internal_loop_unaligned (const unsigned char **inptrp,
       if (inptr[0] > 0x80)
 	{
 	  /* The value is too large.  */
+	  if (flags & __GCONV_IGNORE_ERRORS)
+	    {
+	      /* Just ignore this character.  */
+	      ++*converted;
+	      continue;
+	    }
+
 	  *inptrp = inptr;
 	  *outptrp = outptr;
  	  return __GCONV_ILLEGAL_INPUT;
@@ -312,7 +331,8 @@ static inline int
 ucs4_internal_loop_single (const unsigned char **inptrp,
 			   const unsigned char *inend,
 			   unsigned char **outptrp, unsigned char *outend,
-			   mbstate_t *state, void *data, size_t *converted)
+			   mbstate_t *state, int flags, void *data,
+			   size_t *converted)
 {
   size_t cnt = state->__count & 7;
 
@@ -329,21 +349,28 @@ ucs4_internal_loop_single (const unsigned char **inptrp,
     }
 
   if (((unsigned char *) state->__value.__wchb)[0] > 0x80)
-    /* The value is too large.  */
-    return __GCONV_ILLEGAL_INPUT;
-
+    {
+      /* The value is too large.  */
+      if (!(flags & __GCONV_IGNORE_ERRORS))
+	return __GCONV_ILLEGAL_INPUT;
+    }
+  else
+    {
 #if __BYTE_ORDER == __LITTLE_ENDIAN
-  (*outptrp)[0] = state->__value.__wchb[3];
-  (*outptrp)[1] = state->__value.__wchb[2];
-  (*outptrp)[2] = state->__value.__wchb[1];
-  (*outptrp)[3] = state->__value.__wchb[0];
+      (*outptrp)[0] = state->__value.__wchb[3];
+      (*outptrp)[1] = state->__value.__wchb[2];
+      (*outptrp)[2] = state->__value.__wchb[1];
+      (*outptrp)[3] = state->__value.__wchb[0];
 #elif __BYTE_ORDER == __BIG_ENDIAN
-  (*outptrp)[0] = state->__value.__wchb[0];
-  (*outptrp)[1] = state->__value.__wchb[1];
-  (*outptrp)[2] = state->__value.__wchb[2];
-  (*outptrp)[3] = state->__value.__wchb[3];
+      (*outptrp)[0] = state->__value.__wchb[0];
+      (*outptrp)[1] = state->__value.__wchb[1];
+      (*outptrp)[2] = state->__value.__wchb[2];
+      (*outptrp)[3] = state->__value.__wchb[3];
 #endif
 
+      *outptrp += 4;
+    }
+
   /* Clear the state buffer.  */
   state->__count &= ~7;
 
@@ -367,7 +394,8 @@ ucs4_internal_loop_single (const unsigned char **inptrp,
 static inline int
 internal_ucs4le_loop (const unsigned char **inptrp, const unsigned char *inend,
 		      unsigned char **outptrp, unsigned char *outend,
-		      mbstate_t *state, void *data, size_t *converted)
+		      mbstate_t *state, int flags, void *data,
+		      size_t *converted)
 {
   const unsigned char *inptr = *inptrp;
   unsigned char *outptr = *outptrp;
@@ -407,7 +435,7 @@ static inline int
 internal_ucs4le_loop_unaligned (const unsigned char **inptrp,
 				const unsigned char *inend,
 				unsigned char **outptrp, unsigned char *outend,
-				mbstate_t *state, void *data,
+				mbstate_t *state, int flags, void *data,
 				size_t *converted)
 {
   const unsigned char *inptr = *inptrp;
@@ -454,7 +482,8 @@ static inline int
 internal_ucs4le_loop_single (const unsigned char **inptrp,
 			     const unsigned char *inend,
 			     unsigned char **outptrp, unsigned char *outend,
-			     mbstate_t *state, void *data, size_t *converted)
+			     mbstate_t *state, int flags, void *data,
+			     size_t *converted)
 {
   size_t cnt = state->__count & 7;
 
@@ -475,6 +504,8 @@ internal_ucs4le_loop_single (const unsigned char **inptrp,
   (*outptrp)[1] = state->__value.__wchb[2];
   (*outptrp)[2] = state->__value.__wchb[1];
   (*outptrp)[3] = state->__value.__wchb[0];
+
+  *outptrp += 4;
 #else
   /* XXX unaligned */
   *(*((uint32_t **) outptrp)++) = state->__value.__wch;
@@ -503,7 +534,8 @@ internal_ucs4le_loop_single (const unsigned char **inptrp,
 static inline int
 ucs4le_internal_loop (const unsigned char **inptrp, const unsigned char *inend,
 		      unsigned char **outptrp, unsigned char *outend,
-		      mbstate_t *state, void *data, size_t *converted)
+		      mbstate_t *state, int flags, void *data,
+		      size_t *converted)
 {
   const unsigned char *inptr = *inptrp;
   unsigned char *outptr = *outptrp;
@@ -522,7 +554,16 @@ ucs4le_internal_loop (const unsigned char **inptrp, const unsigned char *inend,
 #endif
 
       if (inval > 0x7fffffff)
-	return __GCONV_ILLEGAL_INPUT;
+	{
+	  if (flags & __GCONV_IGNORE_ERRORS)
+	    {
+	      /* Just ignore this character.  */
+	      ++*converted;
+	      continue;
+	    }
+
+	  return __GCONV_ILLEGAL_INPUT;
+	}
 
       *((uint32_t *) outptr)++ = bswap_32 (*(uint32_t *) inptr);
     }
@@ -546,7 +587,7 @@ static inline int
 ucs4le_internal_loop_unaligned (const unsigned char **inptrp,
 				const unsigned char *inend,
 				unsigned char **outptrp, unsigned char *outend,
-				mbstate_t *state, void *data,
+				mbstate_t *state, int flags, void *data,
 				size_t *converted)
 {
   const unsigned char *inptr = *inptrp;
@@ -560,12 +601,18 @@ ucs4le_internal_loop_unaligned (const unsigned char **inptrp,
       if (inptr[3] > 0x80)
 	{
 	  /* The value is too large.  */
+	  if (flags & __GCONV_IGNORE_ERRORS)
+	    {
+	      /* Just ignore this character.  */
+	      ++*converted;
+	      continue;
+	    }
+
 	  *inptrp = inptr;
 	  *outptrp = outptr;
  	  return __GCONV_ILLEGAL_INPUT;
 	}
 
-
 # if __BYTE_ORDER == __BIG_ENDIAN
       outptr[3] = inptr[0];
       outptr[2] = inptr[1];
@@ -577,6 +624,8 @@ ucs4le_internal_loop_unaligned (const unsigned char **inptrp,
       outptr[2] = inptr[2];
       outptr[3] = inptr[3];
 # endif
+
+      outptr += 4;
     }
 
   *inptrp = inptr;
@@ -599,7 +648,8 @@ static inline int
 ucs4le_internal_loop_single (const unsigned char **inptrp,
 			     const unsigned char *inend,
 			     unsigned char **outptrp, unsigned char *outend,
-			     mbstate_t *state, void *data, size_t *converted)
+			     mbstate_t *state, int flags, void *data,
+			     size_t *converted)
 {
   size_t cnt = state->__count & 7;
 
@@ -616,21 +666,28 @@ ucs4le_internal_loop_single (const unsigned char **inptrp,
     }
 
   if (((unsigned char *) state->__value.__wchb)[3] > 0x80)
-    /* The value is too large.  */
-    return __GCONV_ILLEGAL_INPUT;
-
+    {
+      /* The value is too large.  */
+      if (!(flags & __GCONV_IGNORE_ERRORS))
+	return __GCONV_ILLEGAL_INPUT;
+    }
+  else
+    {
 #if __BYTE_ORDER == __BIG_ENDIAN
-  (*outptrp)[0] = state->__value.__wchb[3];
-  (*outptrp)[1] = state->__value.__wchb[2];
-  (*outptrp)[2] = state->__value.__wchb[1];
-  (*outptrp)[3] = state->__value.__wchb[0];
+      (*outptrp)[0] = state->__value.__wchb[3];
+      (*outptrp)[1] = state->__value.__wchb[2];
+      (*outptrp)[2] = state->__value.__wchb[1];
+      (*outptrp)[3] = state->__value.__wchb[0];
 #elif __BYTE_ORDER == __BIG_ENDIAN
-  (*outptrp)[0] = state->__value.__wchb[0];
-  (*outptrp)[1] = state->__value.__wchb[1];
-  (*outptrp)[2] = state->__value.__wchb[2];
-  (*outptrp)[3] = state->__value.__wchb[3];
+      (*outptrp)[0] = state->__value.__wchb[0];
+      (*outptrp)[1] = state->__value.__wchb[1];
+      (*outptrp)[2] = state->__value.__wchb[2];
+      (*outptrp)[3] = state->__value.__wchb[3];
 #endif
 
+      *outptrp += 4;
+    }
+
   /* Clear the state buffer.  */
   state->__count &= ~7;
 
@@ -658,14 +715,20 @@ ucs4le_internal_loop_single (const unsigned char **inptrp,
   {									      \
     if (*inptr > '\x7f')						      \
       {									      \
-	/* This is no correct ANSI_X3.4-1968 character.  */		      \
-	result = __GCONV_ILLEGAL_INPUT;					      \
-	break;								      \
-      }									      \
+	if (! ignore_errors_p ())					      \
+	  {								      \
+	    /* This is no correct ANSI_X3.4-1968 character.  */		      \
+	    result = __GCONV_ILLEGAL_INPUT;				      \
+	    break;							      \
+	  }								      \
 									      \
-    /* It's an one byte sequence.  */					      \
-    /* XXX unaligned.  */						      \
-    *((uint32_t *) outptr)++ = *inptr++;				      \
+	++*converted;							      \
+	++inptr; 							      \
+      }									      \
+    else								      \
+      /* It's an one byte sequence.  */					      \
+      /* XXX unaligned.  */						      \
+      *((uint32_t *) outptr)++ = *inptr++;				      \
   }
 #include <iconv/loop.c>
 #include <iconv/skeleton.c>
@@ -689,13 +752,20 @@ ucs4le_internal_loop_single (const unsigned char **inptrp,
   {									      \
     if (*((uint32_t *) inptr) > 0x7f)					      \
       {									      \
-	/* This is no correct ANSI_X3.4-1968 character.  */		      \
-	result = __GCONV_ILLEGAL_INPUT;					      \
-	break;								      \
-      }									      \
+	if (! ignore_errors_p ())					      \
+	  {								      \
+	    /* This is no correct ANSI_X3.4-1968 character.  */		      \
+	    result = __GCONV_ILLEGAL_INPUT;				      \
+	    break;							      \
+	  }								      \
 									      \
-    /* It's an one byte sequence.  */					      \
-    *outptr++ = *((uint32_t *) inptr)++;				      \
+	++*converted;							      \
+	inptr += 4; 							      \
+      }									      \
+    else								      \
+      /* It's an one byte sequence.  */					      \
+      /* XXX unaligned.  */						      \
+      *outptr++ = *((uint32_t *) inptr)++;				      \
   }
 #include <iconv/loop.c>
 #include <iconv/skeleton.c>
@@ -829,9 +899,26 @@ ucs4le_internal_loop_single (const unsigned char **inptrp,
 	  }								      \
 	else								      \
 	  {								      \
-	    /* This is an illegal encoding.  */				      \
-	    result = __GCONV_ILLEGAL_INPUT;				      \
-	    break;							      \
+	    int skipped;						      \
+									      \
+	    if (! ignore_errors_p ())					      \
+	      {								      \
+		/* This is an illegal encoding.  */			      \
+		result = __GCONV_ILLEGAL_INPUT;				      \
+		break;							      \
+	      }								      \
+									      \
+	    /* Search the end of this ill-formed UTF-8 character.  This	      \
+	       is the next byte with (x & 0xc0) != 0x80.  */		      \
+	     skipped = 0;						      \
+	     do								      \
+	       {							      \
+		 ++inptr;						      \
+		 ++skipped;						      \
+	       }							      \
+	     while (inptr < inend && (*inptr & 0xc0) == 0x80 && skipped < 5); \
+									      \
+	     continue;							      \
 	  }								      \
 									      \
 	if (NEED_LENGTH_TEST && inptr + cnt > inend)			      \
@@ -841,8 +928,23 @@ ucs4le_internal_loop_single (const unsigned char **inptrp,
 	    for (i = 1; inptr + i < inend; ++i)				      \
 	      if ((inptr[i] & 0xc0) != 0x80)				      \
 		break;							      \
-	    result = (inptr + i == inend				      \
-		      ? __GCONV_INCOMPLETE_INPUT : __GCONV_ILLEGAL_INPUT);    \
+									      \
+	    if (inptr + i == inend)					      \
+	      {								      \
+		result = __GCONV_INCOMPLETE_INPUT;			      \
+		break;							      \
+	      }								      \
+									      \
+	    /* This is an illegal character.  */			      \
+	    if (ignore_errors_p ())					      \
+	      {								      \
+		/* Ignore it.  */					      \
+		inptr += i;						      \
+		++*converted;						      \
+		continue;						      \
+	      }								      \
+									      \
+	    result = __GCONV_ILLEGAL_INPUT;				      \
 	    break;							      \
 	  }								      \
 									      \
@@ -858,13 +960,20 @@ ucs4le_internal_loop_single (const unsigned char **inptrp,
 	    ch <<= 6;							      \
 	    ch |= byte & 0x3f;						      \
 	  }								      \
- 									      \
+									      \
 	/* If i < cnt, some trail byte was not >= 0x80, < 0xc0.		      \
 	   If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could	      \
 	   have been represented with fewer than cnt bytes.  */		      \
-	if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0))	      \
+	if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0))		      \
 	  {								      \
 	    /* This is an illegal encoding.  */				      \
+	    if (ignore_errors_p ())					      \
+	      {								      \
+		inptr += i;						      \
+		++*converted;						      \
+		continue;						      \
+	      }								      \
+									      \
 	    result = __GCONV_ILLEGAL_INPUT;				      \
 	    break;							      \
 	  }								      \
@@ -1021,10 +1130,17 @@ ucs4le_internal_loop_single (const unsigned char **inptrp,
   {									      \
     if (*((uint32_t *) inptr) >= 0x10000)				      \
       {									      \
-	result = __GCONV_ILLEGAL_INPUT;					      \
-	break;								      \
+	if (! ignore_errors_p ())					      \
+	  {								      \
+	    result = __GCONV_ILLEGAL_INPUT;				      \
+	    break;							      \
+	  }								      \
+									      \
+	inptr += 4;							      \
+	++*converted;							      \
       }									      \
-    *((uint16_t *) outptr)++ = *((uint32_t *) inptr)++;			      \
+    else 								      \
+      *((uint16_t *) outptr)++ = *((uint32_t *) inptr)++;		      \
   }
 #include <iconv/loop.c>
 #include <iconv/skeleton.c>
@@ -1070,8 +1186,14 @@ ucs4le_internal_loop_single (const unsigned char **inptrp,
     uint32_t val = *((uint32_t *) inptr);				      \
     if (val >= 0x10000)							      \
       {									      \
-	result = __GCONV_ILLEGAL_INPUT;					      \
-	break;								      \
+	if (! ignore_errors_p ())					      \
+	  {								      \
+	    result = __GCONV_ILLEGAL_INPUT;				      \
+	    break;							      \
+	  }								      \
+									      \
+	inptr += 4;							      \
+	++*converted;							      \
       }									      \
     *((uint16_t *) outptr)++ = bswap_16 (val);				      \
     inptr += 4;								      \