about summary refs log tree commit diff
path: root/iconvdata/iso-2022-jp.c
diff options
context:
space:
mode:
Diffstat (limited to 'iconvdata/iso-2022-jp.c')
-rw-r--r--iconvdata/iso-2022-jp.c276
1 files changed, 156 insertions, 120 deletions
diff --git a/iconvdata/iso-2022-jp.c b/iconvdata/iso-2022-jp.c
index a7ec09b32d..10515d2dee 100644
--- a/iconvdata/iso-2022-jp.c
+++ b/iconvdata/iso-2022-jp.c
@@ -95,7 +95,14 @@ enum
   JISX0201_Kana_set,
   GB2312_set,
   KSC5601_set,
-  JISX0212_set,
+  JISX0212_set
+};
+
+/* The second value stored is the designation of the G2 set.  The following
+   values are possible:  */
+enum
+{
+  UNSPECIFIED_set = 0,
   ISO88591_set,
   ISO88597_set
 };
@@ -187,12 +194,13 @@ gconv_end (struct gconv_step *data)
 									      \
       if (dir == from_iso2022jp)					      \
 	/* It's easy, we don't have to emit anything, we just reset the	      \
-	   state for the input.  */					      \
+	   state for the input.  Note that this also clears the G2	      \
+	   designation.  */						      \
 	data->statep->count = ASCII_set;				      \
       else								      \
 	{								      \
 	  char *outbuf = data->outbuf;					      \
-	  								      \
+									      \
 	  /* We are not in the initial state.  To switch back we have	      \
 	     to emit the sequence `Esc ( B'.  */			      \
 	  if (outbuf + 3 > data->outbufend)				      \
@@ -205,6 +213,7 @@ gconv_end (struct gconv_step *data)
 	      *outbuf++ = '(';						      \
 	      *outbuf++ = 'B';						      \
 	      data->outbuf = outbuf;					      \
+	      /* Note that this also clears the G2 designation.  */	      \
 	      data->statep->count = ASCII_set;				      \
 	    }								      \
 	}								      \
@@ -319,23 +328,50 @@ gconv_end (struct gconv_step *data)
 	    if (inptr[2] == 'A')					      \
 	      {								      \
 		/* ISO 8859-1-GR selected.  */				      \
-		set = ISO88591_set;					      \
+		set2 = ISO88591_set;					      \
 		inptr += 3;						      \
 		continue;						      \
 	      }								      \
 	    else if (inptr[2] == 'F')					      \
 	      {								      \
 		/* ISO 8859-7-GR selected.  */				      \
-		set = ISO88597_set;					      \
+		set2 = ISO88597_set;					      \
 		inptr += 3;						      \
 		continue;						      \
 	      }								      \
 	  }								      \
       }									      \
 									      \
-    if (set == ASCII_set						      \
-	|| (var < ISO88591_set && (ch < 0x21 || ch == 0x7f))		      \
-	|| (var >= ISO88591_set && ch < 0x20))				      \
+    if (ch == ESC && var == iso2022jp2 && inptr[1] == 'N')		      \
+      {									      \
+	if (set2 == ISO88591_set)					      \
+	  {								      \
+	    ch = inptr[2] | 0x80;					      \
+	    inptr += 3;							      \
+	  }								      \
+	else if (set2 == ISO88597_set)					      \
+	  {								      \
+	    /* We use the table from the ISO 8859-7 module.  */		      \
+	    if (inptr[2] < 0x20 || inptr[2] > 0x80)			      \
+	      {								      \
+		result = GCONV_ILLEGAL_INPUT;				      \
+		break;							      \
+	      }								      \
+	    ch = iso88597_to_ucs4[inptr[2] - 0x20];			      \
+	    if (ch == 0)						      \
+	      {								      \
+		result = GCONV_ILLEGAL_INPUT;				      \
+		break;							      \
+	      }								      \
+	    inptr += 3;							      \
+	  }								      \
+	else								      \
+	  {								      \
+	    result = GCONV_ILLEGAL_INPUT;				      \
+	    break;							      \
+	  }								      \
+      }									      \
+    else if (set == ASCII_set || (ch < 0x21 || ch == 0x7f))		      \
       /* Almost done, just advance the input pointer.  */		      \
       ++inptr;								      \
     else if (set == JISX0201_Roman_set)					      \
@@ -360,24 +396,6 @@ gconv_end (struct gconv_step *data)
 	  }								      \
 	++inptr;							      \
       }									      \
-    else if (set == ISO88591_set)					      \
-      {									      \
-	/* This is quite easy.  All characters are defined and the	      \
-	   ISO 10646 value is computed by adding 0x80.  */		      \
-	ch |= 0x80;							      \
-	++inptr;							      \
-      }									      \
-    else if (set == ISO88597_set)					      \
-      {									      \
-	/* We use the table from the ISO 8859-7 module.  */		      \
-	ch = iso88597_to_ucs4[(ch & 0x7f) - 0x20];			      \
-	if (ch == 0)							      \
-	  {								      \
-	    result = GCONV_ILLEGAL_INPUT;				      \
-	    break;							      \
-	  }								      \
-	++inptr;							      \
-      }									      \
     else								      \
       {									      \
 	if (set == JISX0208_1978_set || set == JISX0208_1983_set)	      \
@@ -419,8 +437,8 @@ gconv_end (struct gconv_step *data)
     *((uint32_t *) outptr)++ = ch;					      \
   }
 #define EXTRA_LOOP_DECLS	, enum variant var, int *setp
-#define INIT_PARAMS		int set = *setp
-#define UPDATE_PARAMS		*setp = set
+#define INIT_PARAMS		int set = *setp % 0x100, set2 = *setp / 0x100
+#define UPDATE_PARAMS		*setp = (set2 << 8) + set
 #include <iconv/loop.c>
 
 
@@ -448,6 +466,9 @@ gconv_end (struct gconv_step *data)
 	    *outptr++ = ch;						      \
 	    written = 1;						      \
 	  }								      \
+	/* At the beginning of a line, G2 designation is cleared.  */	      \
+	if (var == iso2022jp2 && ch == 0x0a)				      \
+	  set2 = UNSPECIFIED_set;					      \
       }									      \
     else if (set == JISX0201_Roman_set)					      \
       {									      \
@@ -473,30 +494,6 @@ gconv_end (struct gconv_step *data)
 	else								      \
 	  written = UNKNOWN_10646_CHAR;					      \
       }									      \
-    else if (set == ISO88591_set)					      \
-      {									      \
-	if (ch >= 0x80 && ch <= 0xff)					      \
-	  {								      \
-	    *outptr++ = ch;						      \
-	    written = 1;						      \
-	  }								      \
-      }									      \
-    else if (set == ISO88597_set)					      \
-      {									      \
-	const struct gap *rp = from_idx;				      \
-									      \
-	while (ch > rp->end)						      \
-	  ++rp;								      \
-	if (ch >= rp->start)						      \
-	  {								      \
-	    unsigned char res = iso88597_from_ucs4[ch + rp->idx];	      \
-	    if (res != '\0')						      \
-	      {								      \
-		*outptr++ = res | 0x80;					      \
-		written = 1;						      \
-	      }								      \
-	  }								      \
-      }									      \
     else								      \
       {									      \
 	if (set == JISX0208_1978_set || set == JISX0208_1983_set)	      \
@@ -530,6 +527,38 @@ gconv_end (struct gconv_step *data)
 									      \
     if (written == UNKNOWN_10646_CHAR || written == 0)			      \
       {									      \
+	if (set2 == ISO88591_set)					      \
+	  {								      \
+	    if (ch >= 0x80 && ch <= 0xff) 				      \
+	      {								      \
+		*outptr++ = ESC;					      \
+		*outptr++ = 'N';					      \
+		*outptr++ = ch & 0x7f;					      \
+		written = 3;						      \
+	      }								      \
+	  }								      \
+	else if (set2 == ISO88597_set)					      \
+	  {								      \
+	    const struct gap *rp = from_idx;				      \
+									      \
+	    while (ch > rp->end)					      \
+	      ++rp;							      \
+	    if (ch >= rp->start)					      \
+	      {								      \
+		unsigned char res = iso88597_from_ucs4[ch - 0xa0 + rp->idx];  \
+		if (res != '\0')					      \
+		  {							      \
+		    *outptr++ = ESC;					      \
+		    *outptr++ = 'N';					      \
+		    *outptr++ = res;					      \
+		    written = 3;					      \
+		  }							      \
+	      }								      \
+	  }								      \
+      }									      \
+									      \
+    if (written == UNKNOWN_10646_CHAR || written == 0)			      \
+      {									      \
 	/* Either this is an unknown character or we have to switch	      \
 	   the currently selected character set.  The character sets	      \
 	   do not code entirely separate parts of ISO 10646 and		      \
@@ -541,31 +570,25 @@ gconv_end (struct gconv_step *data)
 	   later and now simply use a fixed order in which we test for	      \
 	   availability  */						      \
 									      \
-	/* First test whether we have at least three more bytes for	      \
-	   the escape sequence.  The two charsets which require four	      \
-	   bytes will be handled later.  */				      \
-	if (NEED_LENGTH_TEST && outptr + 3 > outend)			      \
-	  {								      \
-	    result = GCONV_FULL_OUTPUT;					      \
-	    break;							      \
-	  }								      \
-									      \
 	if (ch <= 0x7f)							      \
 	  {								      \
 	    /* We must encode using ASCII.  First write out the		      \
 	       escape sequence.  */					      \
-	    *outptr++ = ESC;						      \
-	    *outptr++ = '(';						      \
-	    *outptr++ = 'B';						      \
-	    set = ASCII_set;						      \
-									      \
-	    if (NEED_LENGTH_TEST && outptr == outend)			      \
+	    if (NEED_LENGTH_TEST && outptr + 4 > outend)		      \
 	      {								      \
 		result = GCONV_FULL_OUTPUT;				      \
 		break;							      \
 	      }								      \
 									      \
+	    *outptr++ = ESC;						      \
+	    *outptr++ = '(';						      \
+	    *outptr++ = 'B';						      \
+	    set = ASCII_set;						      \
 	    *outptr++ = ch;						      \
+									      \
+	    /* At the beginning of a line, G2 designation is cleared.  */     \
+	    if (var == iso2022jp2 && ch == 0x0a)			      \
+	      set2 = UNSPECIFIED_set; 					      \
 	  }								      \
 	else								      \
 	  {								      \
@@ -580,17 +603,16 @@ gconv_end (struct gconv_step *data)
 	    if (written != UNKNOWN_10646_CHAR && buf[0] < 0x80)		      \
 	      {								      \
 		/* We use JIS X 0201.  */				      \
-		*outptr++ = ESC;					      \
-		*outptr++ = '(';					      \
-		*outptr++ = 'J';					      \
-		set = JISX0201_Roman_set;				      \
-									      \
-		if (NEED_LENGTH_TEST && outptr == outend)		      \
+		if (NEED_LENGTH_TEST && outptr + 4 > outend)		      \
 		  {							      \
 		    result = GCONV_FULL_OUTPUT;				      \
 		    break;						      \
 		  }							      \
 									      \
+		*outptr++ = ESC;					      \
+		*outptr++ = '(';					      \
+		*outptr++ = 'J';					      \
+		set = JISX0201_Roman_set;				      \
 		*outptr++ = buf[0];					      \
 	      }								      \
 	    else							      \
@@ -599,17 +621,16 @@ gconv_end (struct gconv_step *data)
 		if (written != UNKNOWN_10646_CHAR)			      \
 		  {							      \
 		    /* We use JIS X 0208.  */				      \
-		    *outptr++ = ESC;					      \
-		    *outptr++ = '$';					      \
-		    *outptr++ = 'B';					      \
-		    set = JISX0208_1983_set;				      \
-									      \
-		    if (NEED_LENGTH_TEST && outptr + 2 > outend)	      \
+		    if (NEED_LENGTH_TEST && outptr + 5 > outend)	      \
 		      {							      \
 			result = GCONV_FULL_OUTPUT;			      \
 			break;						      \
 		      }							      \
 									      \
+		    *outptr++ = ESC;					      \
+		    *outptr++ = '$';					      \
+		    *outptr++ = 'B';					      \
+		    set = JISX0208_1983_set;				      \
 		    *outptr++ = buf[0];					      \
 		    *outptr++ = buf[1];					      \
 		  }							      \
@@ -625,7 +646,7 @@ gconv_end (struct gconv_step *data)
 		    if (written != UNKNOWN_10646_CHAR)			      \
 		      {							      \
 			/* We use JIS X 0212.  */			      \
-			if (NEED_LENGTH_TEST && outptr + 4 > outend)	      \
+			if (NEED_LENGTH_TEST && outptr + 6 > outend)	      \
 			  {						      \
 			    result = GCONV_FULL_OUTPUT;			      \
 			    break;					      \
@@ -635,13 +656,6 @@ gconv_end (struct gconv_step *data)
 			*outptr++ = '(';				      \
 			*outptr++ = 'D';				      \
 			set = JISX0212_set;				      \
-									      \
-			if (NEED_LENGTH_TEST && outptr + 2 > outend)	      \
-			  {						      \
-			    result = GCONV_FULL_OUTPUT;			      \
-			    break;					      \
-			  }						      \
-									      \
 			*outptr++ = buf[0];				      \
 			*outptr++ = buf[1];				      \
 		      }							      \
@@ -651,33 +665,33 @@ gconv_end (struct gconv_step *data)
 			if (written != UNKNOWN_10646_CHAR && buf[0] >= 0x80)  \
 			  {						      \
 			    /* We use JIS X 0201.  */			      \
-			    *outptr++ = ESC;				      \
-			    *outptr++ = '(';				      \
-			    *outptr++ = 'I';				      \
-			    set = JISX0201_Kana_set;			      \
-									      \
-			    if (NEED_LENGTH_TEST && outptr == outend)	      \
+			    if (NEED_LENGTH_TEST && outptr + 4 > outend)      \
 			      {						      \
 			        result = GCONV_FULL_OUTPUT;		      \
 			        break;					      \
 			      }						      \
 									      \
+			    *outptr++ = ESC;				      \
+			    *outptr++ = '(';				      \
+			    *outptr++ = 'I';				      \
+			    set = JISX0201_Kana_set;			      \
 			    *outptr++ = buf[0] - 0x80;			      \
 			  }						      \
 			else if (ch != 0xa5 && ch >= 0x80 && ch <= 0xff)      \
 			  {						      \
 			    /* ISO 8859-1 upper half.   */		      \
-			    *outptr++ = ESC;				      \
-			    *outptr++ = '.';				      \
-			    *outptr++ = 'A';				      \
-			    set = ISO88591_set;				      \
-									      \
-			    if (NEED_LENGTH_TEST && outptr == outend)	      \
+			    if (NEED_LENGTH_TEST && outptr + 6 > outend)      \
 			      {						      \
 				result = GCONV_FULL_OUTPUT;		      \
 				break;					      \
 			      }						      \
 									      \
+			    *outptr++ = ESC;				      \
+			    *outptr++ = '.';				      \
+			    *outptr++ = 'A';				      \
+			    set2 = ISO88591_set;			      \
+			    *outptr++ = ESC;				      \
+			    *outptr++ = 'N';				      \
 			    *outptr++ = ch;				      \
 			  }						      \
 			else						      \
@@ -686,28 +700,27 @@ gconv_end (struct gconv_step *data)
 			    if (written != UNKNOWN_10646_CHAR)		      \
 			      {						      \
 				/* We use GB 2312.  */			      \
-				*outptr++ = ESC;			      \
-				*outptr++ = '$';			      \
-				*outptr++ = 'A';			      \
-				set = GB2312_set;			      \
-									      \
-				if (NEED_LENGTH_TEST && outptr + 2 > outend)  \
+				if (NEED_LENGTH_TEST && outptr + 5 > outend)  \
 				  {					      \
 				    result = GCONV_FULL_OUTPUT;		      \
 				    break;				      \
 				  }					      \
 									      \
+				*outptr++ = ESC;			      \
+				*outptr++ = '$';			      \
+				*outptr++ = 'A';			      \
+				set = GB2312_set;			      \
 				*outptr++ = buf[0];			      \
 				*outptr++ = buf[1];			      \
 			      }						      \
 			    else					      \
 			      {						      \
-				written = ucs4_to_ksc5601 (ch, buf, 2);       \
+				written = ucs4_to_ksc5601 (ch, buf, 2);	      \
 				if (written != UNKNOWN_10646_CHAR)	      \
 				  {					      \
 				    /* We use KSC 5601.  */		      \
-				    if (NEED_LENGTH_TEST 		      \
-					&& outptr + 4 > outend)		      \
+				    if (NEED_LENGTH_TEST		      \
+					&& outptr + 6 > outend)		      \
 				      {					      \
 					result = GCONV_FULL_OUTPUT;	      \
 					break;				      \
@@ -717,21 +730,44 @@ gconv_end (struct gconv_step *data)
 				    *outptr++ = '(';			      \
 				    *outptr++ = 'C';			      \
 				    set = KSC5601_set;			      \
-									      \
-				    if (NEED_LENGTH_TEST		      \
-					&& outptr + 2 > outend)		      \
-				      {					      \
-					result = GCONV_FULL_OUTPUT;	      \
-					break;				      \
-				      }					      \
-									      \
 				    *outptr++ = buf[0];			      \
 				    *outptr++ = buf[1];			      \
 				  }					      \
 				else					      \
 				  {					      \
-				    result = GCONV_ILLEGAL_INPUT;	      \
-				    break;				      \
+				    const struct gap *rp = from_idx;	      \
+				    unsigned char gch = 0;		      \
+									      \
+				    while (ch > rp->end)		      \
+				      ++rp;				      \
+				    if (ch >= rp->start)		      \
+				      {					      \
+					ch = ch - 0xa0 + rp->idx;	      \
+					gch = iso88597_from_ucs4[ch];	      \
+				      }					      \
+									      \
+				    if (gch != 0)			      \
+				      {					      \
+					/* We use ISO 8859-7 greek.  */	      \
+					if (NEED_LENGTH_TEST		      \
+					    && outptr + 6 > outend)	      \
+					  {				      \
+					    result = GCONV_FULL_OUTPUT;	      \
+					    break;			      \
+					  }				      \
+					*outptr++ = ESC;		      \
+					*outptr++ = '.';		      \
+					*outptr++ = 'F';		      \
+					set2 = ISO88597_set;		      \
+					*outptr++ = ESC;		      \
+					*outptr++ = 'N';		      \
+					*outptr++ = gch;		      \
+				      }					      \
+				    else				      \
+				      {					      \
+					result = GCONV_ILLEGAL_INPUT;	      \
+					break;				      \
+				      }					      \
 				  }					      \
 			      }						      \
 			  }						      \
@@ -745,8 +781,8 @@ gconv_end (struct gconv_step *data)
     inptr += 4;								      \
   }
 #define EXTRA_LOOP_DECLS	, enum variant var, int *setp
-#define INIT_PARAMS		int set = *setp
-#define UPDATE_PARAMS		*setp = set
+#define INIT_PARAMS		int set = *setp % 0x100, set2 = *setp / 0x100
+#define UPDATE_PARAMS		*setp = (set2 << 8) + set
 #include <iconv/loop.c>