about summary refs log tree commit diff
path: root/iconv
diff options
context:
space:
mode:
Diffstat (limited to 'iconv')
-rw-r--r--iconv/gconv_builtin.h14
-rw-r--r--iconv/gconv_int.h2
-rw-r--r--iconv/gconv_simple.c172
3 files changed, 186 insertions, 2 deletions
diff --git a/iconv/gconv_builtin.h b/iconv/gconv_builtin.h
index e12f1e46ee..0c31890a37 100644
--- a/iconv/gconv_builtin.h
+++ b/iconv/gconv_builtin.h
@@ -1,5 +1,5 @@
 /* Builtin transformations.
-   Copyright (C) 1997, 1998 Free Software Foundation, Inc.
+   Copyright (C) 1997, 1998, 1999 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
 
@@ -79,3 +79,15 @@ BUILTIN_TRANSFORMATION (NULL, "INTERNAL", 8, "UNICODELITTLE//",
 			1, "=INTERNAL->ucs2little",
 			__gconv_transform_internal_ucs2little, NULL, NULL,
 			4, 4, 2, 2)
+
+BUILTIN_ALIAS ("UTF-16//", "UTF16//")
+
+BUILTIN_TRANSFORMATION (NULL, "INTERNAL", 8, "UTF16//",
+			1, "=INTERNAL->utf16",
+			__gconv_transform_internal_utf16, NULL, NULL,
+			4, 4, 2, 4)
+
+BUILTIN_TRANSFORMATION (NULL, "UTF16//", 7, "INTERNAL",
+			1, "=utf16->INTERNAL",
+			__gconv_transform_utf16_internal, NULL, NULL,
+			2, 4, 4, 4)
diff --git a/iconv/gconv_int.h b/iconv/gconv_int.h
index d4ffa551d6..9b00e6522c 100644
--- a/iconv/gconv_int.h
+++ b/iconv/gconv_int.h
@@ -168,6 +168,8 @@ __BUILTIN_TRANS (__gconv_transform_internal_ucs2);
 __BUILTIN_TRANS (__gconv_transform_ucs2little_internal);
 __BUILTIN_TRANS (__gconv_transform_internal_ucs2little);
 __BUILTIN_TRANS (__gconv_transform_internal_ucs4);
+__BUILTIN_TRANS (__gconv_transform_internal_utf16);
+__BUILTIN_TRANS (__gconv_transform_utf16_internal);
 # undef __BUITLIN_TRANS
 
 #endif
diff --git a/iconv/gconv_simple.c b/iconv/gconv_simple.c
index 4084d04b44..74dbfc0356 100644
--- a/iconv/gconv_simple.c
+++ b/iconv/gconv_simple.c
@@ -1,5 +1,5 @@
 /* Simple transformations functions.
-   Copyright (C) 1997, 1998 Free Software Foundation, Inc.
+   Copyright (C) 1997, 1998, 1999 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
 
@@ -451,3 +451,173 @@ internal_ucs4_loop (const unsigned char **inptrp, const unsigned char *inend,
 #endif
 #include <iconv/loop.c>
 #include <iconv/skeleton.c>
+
+
+/* Convert from the internal (UCS4-like) format to UTF-16.  */
+#define DEFINE_INIT		0
+#define DEFINE_FINI		0
+#define MIN_NEEDED_FROM		4
+#define MIN_NEEDED_TO		2
+#define MAX_NEEDED_TO		4
+#define FROM_DIRECTION		1
+#define FROM_LOOP		internal_utf16_loop
+#define TO_LOOP			internal_utf16_loop /* This is not used.  */
+#define FUNCTION_NAME		__gconv_transform_internal_utf16
+
+#define MIN_NEEDED_INPUT	MIN_NEEDED_FROM
+#define MIN_NEEDED_OUTPUT	MIN_NEEDED_TO
+#define MAX_NEEDED_OUTPUT	MAX_NEEDED_TO
+#define LOOPFCT			FROM_LOOP
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+# define BODY \
+  {									      \
+    if (*((uint32_t *) inptr) >= 0x10000)				      \
+      {									      \
+	if (*((uint32_t *) inptr) >= 0x110000)				      \
+	  {								      \
+	    result = GCONV_ILLEGAL_INPUT;				      \
+	    break;							      \
+	  }								      \
+									      \
+	/* Generate a surrogate character.  */				      \
+	if (NEED_LENGTH_TEST && outptr + 4 > outend)			      \
+	  {								      \
+	    /* Overflow in the output buffer.  */			      \
+	    result = GCONV_FULL_OUTPUT;					      \
+	    break;							      \
+	  }								      \
+									      \
+	*((uint16_t *) outptr)++ = bswap_16 (0xd7c0			      \
+					     + (*((uint32_t *) inptr) >> 10));\
+	*((uint16_t *) outptr)++ = bswap_16 (0xdc00			      \
+					     + (*((uint32_t *) inptr)	      \
+						& 0x3ff));		      \
+      }									      \
+    else								      \
+      /* Please note that we use the `uint32_t' from-pointer as an `uint16_t' \
+	 pointer which works since we are on a little endian machine.  */     \
+      *((uint16_t *) outptr)++ = bswap_16 (*((uint16_t *) inptr));	      \
+    inptr += 4;								      \
+  }
+#else
+# define BODY \
+  {									      \
+    if (*((uint32_t *) inptr) >= 0x10000)				      \
+      {									      \
+	if (*((uint32_t *) inptr) >= 0x110000)				      \
+	  {								      \
+	    result = GCONV_ILLEGAL_INPUT;				      \
+	    break;							      \
+	  }								      \
+									      \
+	/* Generate a surrogate character.  */				      \
+	if (NEED_LENGTH_TEST && outptr + 4 > outend)			      \
+	  {								      \
+	    /* Overflow in the output buffer.  */			      \
+	    result = GCONV_FULL_OUTPUT;					      \
+	    break;							      \
+	  }								      \
+									      \
+	*((uint16_t *) outptr)++ = 0xd7c0 + (*((uint32_t *) inptr) >> 10);    \
+	*((uint16_t *) outptr)++ = 0xdc00 + (*((uint32_t *) inptr) & 0x3ff);  \
+      }									      \
+    else								      \
+      *((uint16_t *) outptr)++ = *((uint32_t *) inptr)++;		      \
+  }
+#endif
+#include <iconv/loop.c>
+#include <iconv/skeleton.c>
+
+
+/* Convert from UTF-16 to the internal (UCS4-like) format.  */
+#define DEFINE_INIT		0
+#define DEFINE_FINI		0
+#define MIN_NEEDED_FROM		2
+#define MAX_NEEDED_FROM		4
+#define MIN_NEEDED_TO		4
+#define FROM_DIRECTION		1
+#define FROM_LOOP		utf16_internal_loop
+#define TO_LOOP			utf16_internal_loop /* This is not used.*/
+#define FUNCTION_NAME		__gconv_transform_utf16_internal
+
+#define MIN_NEEDED_INPUT	MIN_NEEDED_FROM
+#define MAX_NEEDED_INPUT	MAX_NEEDED_FROM
+#define MIN_NEEDED_OUTPUT	MIN_NEEDED_TO
+#define LOOPFCT			FROM_LOOP
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+# define BODY \
+  {									      \
+    uint16_t u1 = bswap_16 (*(uint16_t *) inptr);			      \
+									      \
+    if (u1 < 0xd800 || u1 > 0xdfff)					      \
+      {									      \
+	/* No surrogate.  */						      \
+	*((uint32_t *) outptr)++ = u1;					      \
+	inptr += 2;							      \
+      }									      \
+    else								      \
+      {									      \
+	uint16_t u2;							      \
+									      \
+	/* It's a surrogate character.  At least the first word says	      \
+           it is.  */							      \
+	if (NEED_LENGTH_TEST && inptr + 4 > inend)			      \
+	  {								      \
+	    /* We don't have enough input for another complete input	      \
+	       character.  */						      \
+	    result = GCONV_INCOMPLETE_INPUT;				      \
+	    break;							      \
+	  }								      \
+									      \
+	u2 = bswap_16 (((uint16_t *) inptr)[1]);			      \
+	if (u2 < 0xdc00 || u2 >= 0xdfff)				      \
+	  {								      \
+	    /* This is no valid second word for a surrogate.  */	      \
+	    result = GCONV_ILLEGAL_INPUT;				      \
+	    break;							      \
+	  }								      \
+									      \
+	*((uint32_t *) outptr)++ = ((u1 - 0xd7c0) << 10) + (u2 - 0xdc00);     \
+	inptr += 4;							      \
+      }									      \
+  }
+#else
+# define BODY \
+  {									      \
+    uint16_t u1 = *(uint16_t *) inptr;					      \
+									      \
+    if (u1 < 0xd800 || u1 > 0xdfff)					      \
+      {									      \
+	/* No surrogate.  */						      \
+	*((uint32_t *) outptr)++ = u1;					      \
+	inptr += 2;							      \
+      }									      \
+    else								      \
+      {									      \
+	uint16_t u2;							      \
+									      \
+	/* It's a surrogate character.  At least the first word says	      \
+           it is.  */							      \
+	if (NEED_LENGTH_TEST && inptr + 4 > inend)			      \
+	  {								      \
+	    /* We don't have enough input for another complete input	      \
+	       character.  */						      \
+	    result = GCONV_INCOMPLETE_INPUT;				      \
+	    break;							      \
+	  }								      \
+									      \
+	u2 = ((uint16_t *) inptr)[1];					      \
+	if (u2 < 0xdc00 || u2 >= 0xdfff)				      \
+	  {								      \
+	    /* This is no valid second word for a surrogate.  */	      \
+	    result = GCONV_ILLEGAL_INPUT;				      \
+	    break;							      \
+	  }								      \
+									      \
+	*((uint32_t *) outptr)++ = ((u1 - 0xd7c0) << 10) + (u2 - 0xdc00);     \
+	inptr += 4;							      \
+      }									      \
+  }
+#endif
+#include <iconv/loop.c>
+#include <iconv/skeleton.c>