about summary refs log tree commit diff
path: root/iconvdata/shift_jisx0213.c
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2002-09-24 04:19:03 +0000
committerUlrich Drepper <drepper@redhat.com>2002-09-24 04:19:03 +0000
commitfa00744e514a99087f5fe70cac9334b29a04c93a (patch)
treeafb188699c7ba3d139c0c1e5962a749bec6480b7 /iconvdata/shift_jisx0213.c
parentf2a444335f8deabb58145db315b33a87e4f576da (diff)
downloadglibc-fa00744e514a99087f5fe70cac9334b29a04c93a.tar.gz
glibc-fa00744e514a99087f5fe70cac9334b29a04c93a.tar.xz
glibc-fa00744e514a99087f5fe70cac9334b29a04c93a.zip
Update.
	* iconv/iconv_prog.c (main): Provide an error message that identifies
	the wrong encoding.

2002-09-22  Bruno Haible  <bruno@clisp.org>

	* iconvdata/tscii.c: New file.
	* iconvdata/testdata/TSCII: New file.
	* iconvdata/testdata/TSCII..UTF8: New file.
	* iconvdata/TSCII.precomposed: New file.
	* iconvdata/TSCII.irreversible: New file.
	* iconvdata/gconv-modules (TSCII): New module.
	* iconvdata/Makefile (modules): Add TSCII.
	(distribute): Add tscii.c.
	* iconvdata/tst-table-from.c (try, utf8_decode, main): Double output
	buffer size.
	* iconvdata/tst-tables.sh: Add TSCII.
	* iconvdata/TESTS: Add TSCII.

2002-09-22  Bruno Haible  <bruno@clisp.org>

	Revert 2002-04-18 patch.
	* iconvdata/euc-jisx0213.c (EMIT_SHIFT_TO_INIT, BODY for
	FROM_DIRECTION): Make the FROM direction stateful again.
	* iconvdata/shift_jisx0213.c (EMIT_SHIFT_TO_INIT, BODY for
	FROM_DIRECTION): Likewise.

2002-09-22  Bruno Haible  <bruno@clisp.org>
Diffstat (limited to 'iconvdata/shift_jisx0213.c')
-rw-r--r--iconvdata/shift_jisx0213.c189
1 files changed, 105 insertions, 84 deletions
diff --git a/iconvdata/shift_jisx0213.c b/iconvdata/shift_jisx0213.c
index d48d67ef0f..119e0017b9 100644
--- a/iconvdata/shift_jisx0213.c
+++ b/iconvdata/shift_jisx0213.c
@@ -67,7 +67,9 @@
     *statep = saved_state
 
 
-/* During UCS-4 to Shift_JISX0213 conversion, the COUNT element of the state
+/* During Shift_JISX0213 to UCS-4 conversion, the COUNT element of the state
+   contains the last UCS-4 character, shifted by 3 bits.
+   During UCS-4 to Shift_JISX0213 conversion, the COUNT element of the state
    contains the last two bytes to be output, shifted by 3 bits.  */
 
 /* Since this is a stateful encoding we have to provide code which resets
@@ -77,8 +79,17 @@
   if (data->__statep->__count != 0)					      \
     {									      \
       if (FROM_DIRECTION)						      \
-	/* We don't use shift states in the FROM_DIRECTION.  */		      \
-	data->__statep->__count = 0;					      \
+	{								      \
+	  if (__builtin_expect (outbuf + 4 <= outend, 1))		      \
+	    {								      \
+	      /* Write out the last character.  */			      \
+	      *((uint32_t *) outbuf)++ = data->__statep->__count >> 3;	      \
+	      data->__statep->__count = 0;				      \
+	    }								      \
+	  else								      \
+	    /* We don't have enough room in the output buffer.  */	      \
+	    status = __GCONV_FULL_OUTPUT;				      \
+	}								      \
       else								      \
 	{								      \
 	  if (__builtin_expect (outbuf + 2 <= outend, 1))		      \
@@ -104,106 +115,116 @@
 #define LOOPFCT			FROM_LOOP
 #define BODY \
   {									      \
-    uint32_t ch = *inptr;						      \
+    uint32_t ch;							      \
 									      \
-    if (ch < 0x80)							      \
-      {									      \
-	/* Plain ISO646-JP character.  */				      \
-	if (__builtin_expect (ch == 0x5c, 0))				      \
-	  ch = 0xa5;							      \
-	else if (__builtin_expect (ch == 0x7e, 0))			      \
-	  ch = 0x203e;							      \
-	++inptr;							      \
-      }									      \
-    else if (ch >= 0xa1 && ch <= 0xdf)					      \
+    /* Determine whether there is a buffered character pending.  */	      \
+    ch = *statep >> 3;							      \
+    if (__builtin_expect (ch == 0, 1))					      \
       {									      \
-	/* Half-width katakana.  */					      \
-	ch += 0xfec0;							      \
-	++inptr;							      \
-      }									      \
-    else if ((ch >= 0x81 && ch <= 0x9f) || (ch >= 0xe0 && ch <= 0xfc))	      \
-      {									      \
-	/* Two byte character.  */					      \
-	uint32_t ch2;							      \
+	/* No - so look at the next input byte.  */			      \
+	ch = *inptr;							      \
 									      \
-	if (__builtin_expect (inptr + 1 >= inend, 0))			      \
+	if (ch < 0x80)							      \
 	  {								      \
-	    /* The second byte is not available.  */			      \
-	    result = __GCONV_INCOMPLETE_INPUT;				      \
-	    break;							      \
+	    /* Plain ISO646-JP character.  */				      \
+	    if (__builtin_expect (ch == 0x5c, 0))			      \
+	      ch = 0xa5;						      \
+	    else if (__builtin_expect (ch == 0x7e, 0))			      \
+	      ch = 0x203e;						      \
+	    ++inptr;							      \
 	  }								      \
-									      \
-	ch2 = inptr[1];							      \
-									      \
-	/* The second byte must be in the range 0x{40..7E,80..FC}.  */	      \
-	if (__builtin_expect (ch2 < 0x40 || ch2 == 0x7f || ch2 > 0xfc, 0))    \
+	else if (ch >= 0xa1 && ch <= 0xdf)				      \
 	  {								      \
-	    /* This is an illegal character.  */			      \
-	    STANDARD_FROM_LOOP_ERR_HANDLER (1);				      \
+	    /* Half-width katakana.  */					      \
+	    ch += 0xfec0;						      \
+	    ++inptr;							      \
 	  }								      \
-									      \
-	/* Convert to row and column.  */				      \
-	if (ch < 0xe0)							      \
-	  ch -= 0x81;							      \
-	else								      \
-	  ch -= 0xc1;							      \
-	if (ch2 < 0x80)							      \
-	  ch2 -= 0x40;							      \
-	else								      \
-	  ch2 -= 0x41;							      \
-	/* Now 0 <= ch <= 0x3b, 0 <= ch2 <= 0xbb.  */			      \
-	ch = 2 * ch;							      \
-	if (ch2 >= 0x5e)						      \
-	  ch2 -= 0x5e, ch++;						      \
-	ch2 += 0x21;							      \
-	if (ch >= 0x5e)							      \
+	else if ((ch >= 0x81 && ch <= 0x9f) || (ch >= 0xe0 && ch <= 0xfc))    \
 	  {								      \
-	    /* Handling of JISX 0213 plane 2 rows.  */			      \
-	    if (ch >= 0x67)						      \
-	      ch += 230;						      \
-	    else if (ch >= 0x63 || ch == 0x5f)				      \
-	      ch += 168;						      \
-	    else 							      \
-	      ch += 162;						      \
-	  }								      \
-									      \
-	ch = jisx0213_to_ucs4 (0x121 + ch, ch2);			      \
+	    /* Two byte character.  */					      \
+	    uint32_t ch2;						      \
 									      \
-	if (ch == 0)							      \
-	  {								      \
-	    /* This is an illegal character.  */			      \
-	    STANDARD_FROM_LOOP_ERR_HANDLER (1);				      \
-	  }								      \
+	    if (__builtin_expect (inptr + 1 >= inend, 0))		      \
+	      {								      \
+		/* The second byte is not available.  */		      \
+		result = __GCONV_INCOMPLETE_INPUT;			      \
+		break;							      \
+	      }								      \
 									      \
-	if (ch < 0x80)							      \
-	  {								      \
-	    /* It's a combining character.  */				      \
-	    uint32_t u1 = __jisx0213_to_ucs_combining[ch - 1][0];	      \
-	    uint32_t u2 = __jisx0213_to_ucs_combining[ch - 1][1];	      \
+	    ch2 = inptr[1];						      \
 									      \
-	    /* See whether we have room for two characters.  */		      \
-	    if (outptr + 8 <= outend)					      \
+	    /* The second byte must be in the range 0x{40..7E,80..FC}.  */    \
+	    if (__builtin_expect (ch2 < 0x40 || ch2 == 0x7f || ch2 > 0xfc, 0))\
 	      {								      \
-		inptr += 2;						      \
-		put32 (outptr, u1);					      \
-		outptr += 4;						      \
-		put32 (outptr, u2);					      \
-		outptr += 4;						      \
-		continue;						      \
+		/* This is an illegal character.  */			      \
+		STANDARD_FROM_LOOP_ERR_HANDLER (1);			      \
 	      }								      \
+									      \
+	    /* Convert to row and column.  */				      \
+	    if (ch < 0xe0)						      \
+	      ch -= 0x81;						      \
+	    else							      \
+	      ch -= 0xc1;						      \
+	    if (ch2 < 0x80)						      \
+	      ch2 -= 0x40;						      \
 	    else							      \
+	      ch2 -= 0x41;						      \
+	    /* Now 0 <= ch <= 0x3b, 0 <= ch2 <= 0xbb.  */		      \
+	    ch = 2 * ch;						      \
+	    if (ch2 >= 0x5e)						      \
+	      ch2 -= 0x5e, ch++;					      \
+	    ch2 += 0x21;						      \
+	    if (ch >= 0x5e)						      \
 	      {								      \
+		/* Handling of JISX 0213 plane 2 rows.  */		      \
+		if (ch >= 0x67)						      \
+		  ch += 230;						      \
+		else if (ch >= 0x63 || ch == 0x5f)			      \
+		  ch += 168;						      \
+		else 							      \
+		  ch += 162;						      \
+	      }								      \
+									      \
+	    ch = jisx0213_to_ucs4 (0x121 + ch, ch2);			      \
+									      \
+	    if (ch == 0)						      \
+	      {								      \
+		/* This is an illegal character.  */			      \
+		STANDARD_FROM_LOOP_ERR_HANDLER (1);			      \
+	      }								      \
+									      \
+	    inptr += 2;							      \
+									      \
+	    if (ch < 0x80)						      \
+	      {								      \
+		/* It's a combining character.  */			      \
+		uint32_t u1 = __jisx0213_to_ucs_combining[ch - 1][0];	      \
+		uint32_t u2 = __jisx0213_to_ucs_combining[ch - 1][1];	      \
+									      \
+		put32 (outptr, u1);					      \
+		outptr += 4;						      \
+									      \
+		/* See whether we have room for two characters.  */	      \
+		if (outptr + 4 <= outend)				      \
+		  {							      \
+		    put32 (outptr, u2);					      \
+		    outptr += 4;					      \
+		    continue;						      \
+		  }							      \
+									      \
+		/* Otherwise store only the first character now, and	      \
+		   put the second one into the queue.  */		      \
+		*statep = u2 << 3;					      \
+		/* Tell the caller why we terminate the loop.  */	      \
 		result = __GCONV_FULL_OUTPUT;				      \
 		break;							      \
 	      }								      \
 	  }								      \
-									      \
-	inptr += 2;							      \
-      }									      \
-    else								      \
-      {									      \
-	/* This is illegal.  */						      \
-	STANDARD_FROM_LOOP_ERR_HANDLER (1);				      \
+	else								      \
+	  {								      \
+	    /* This is illegal.  */					      \
+	    STANDARD_FROM_LOOP_ERR_HANDLER (1);				      \
+	  }								      \
       }									      \
 									      \
     put32 (outptr, ch);							      \