about summary refs log tree commit diff
path: root/iconv
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>1998-04-01 17:44:34 +0000
committerUlrich Drepper <drepper@redhat.com>1998-04-01 17:44:34 +0000
commitd2374599d441d86cbf4dab69b69d7563c1fcaaa0 (patch)
tree4fdb62c712cf6f071567810400fe1348dc5ecf1b /iconv
parent8fe0fd03e5fb2b5ce401fc313e714af874cf01b5 (diff)
downloadglibc-d2374599d441d86cbf4dab69b69d7563c1fcaaa0.tar.gz
glibc-d2374599d441d86cbf4dab69b69d7563c1fcaaa0.tar.xz
glibc-d2374599d441d86cbf4dab69b69d7563c1fcaaa0.zip
Update.
1998-04-01 17:38  Ulrich Drepper  <drepper@cygnus.com>

	* iconv/gconv?simple.c: New builtins for UCS en/decoding.
	* iconv/gconv_builtin.h: Add definitions for new builtins.
	* iconv/gconv.h: Add prototypes for new builtins.

	* iconvdata/Makefile (modules): Add ISO646.
	Add rules for ISO646 module.
	(distribute): Add iso646.c.
	* iconvdata/gconv-modules: Add module and alias definition for
	ISO646 charsets.
	* iconvdata/iso646.c: New file.
Diffstat (limited to 'iconv')
-rw-r--r--iconv/gconv.h4
-rw-r--r--iconv/gconv_builtin.h23
-rw-r--r--iconv/gconv_simple.c251
3 files changed, 273 insertions, 5 deletions
diff --git a/iconv/gconv.h b/iconv/gconv.h
index 83f7a99541..139ca0d5c8 100644
--- a/iconv/gconv.h
+++ b/iconv/gconv.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 1997 Free Software Foundation, Inc.
+/* Copyright (C) 1997, 1998 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
 
@@ -200,6 +200,8 @@ extern void __gconv_get_builtin_trans __P ((__const char *__name,
 __BUILTIN_TRANS (__gconv_transform_dummy);
 __BUILTIN_TRANS (__gconv_transform_ucs4_utf8);
 __BUILTIN_TRANS (__gconv_transform_utf8_ucs4);
+__BUILTIN_TRANS (__gconv_transform_ucs2_ucs4);
+__BUILTIN_TRANS (__gconv_transform_ucs4_ucs2);
 # undef __BUITLIN_TRANS
 
 extern int __gconv_transform_init_rstate __P ((struct gconv_step *__step,
diff --git a/iconv/gconv_builtin.h b/iconv/gconv_builtin.h
index 3990b2cdb3..8e5d692946 100644
--- a/iconv/gconv_builtin.h
+++ b/iconv/gconv_builtin.h
@@ -21,17 +21,32 @@
 BUILTIN_ALIAS ("UTF8//", "ISO-10646/UTF8/")
 BUILTIN_ALIAS ("UTF-8//", "ISO-10646/UTF8/")
 
-BUILTIN_TRANSFORMATION ("([^/]+)/UCS4/([^/]*)", NULL, 0,
-			"\\1/UTF8/\\2", 1, "=ucs4->utf8",
+BUILTIN_TRANSFORMATION (NULL, "ISO-10646/UCS4/", 15,
+			"ISO-10646/UTF8/", 1, "=ucs4->utf8",
 			__gconv_transform_ucs4_utf8,
 			__gconv_transform_init_rstate,
 			__gconv_transform_end_rstate)
 
-BUILTIN_TRANSFORMATION ("([^/]+)/UTF-?8/([^/]*)", NULL, 0,
-			"\\1/UCS4/\\2", 1, "=utf8->ucs4",
+BUILTIN_TRANSFORMATION ("ISO-10646/UTF-?8/", "ISO-10646/UTF", 13,
+			"ISO-10646/UCS4/", 1, "=utf8->ucs4",
 			__gconv_transform_utf8_ucs4,
 			__gconv_transform_init_rstate,
 			__gconv_transform_end_rstate)
 
+BUILTIN_ALIAS ("UCS2//", "ISO-10646/UCS2/")
+BUILTIN_ALIAS ("UCS-2//", "ISO-10646/UCS2/")
+
+BUILTIN_TRANSFORMATION (NULL, "ISO-10646/UCS2/", 15, "ISO-10646/UCS4/",
+			1, "=ucs2->ucs4",
+			__gconv_transform_ucs2_ucs4,
+			__gconv_transform_init_rstate,
+			__gconv_transform_end_rstate)
+
+BUILTIN_TRANSFORMATION (NULL, "ISO-10646/UCS4/", 15, "ISO-10646/UCS2/",
+			1, "=ucs4->ucs2",
+			__gconv_transform_ucs4_ucs2,
+			__gconv_transform_init_rstate,
+			__gconv_transform_end_rstate)
+
 BUILTIN_TRANSFORMATION ("(.*)", NULL, 0, "\\1", 1, "=dummy",
 			__gconv_transform_dummy, NULL, NULL)
diff --git a/iconv/gconv_simple.c b/iconv/gconv_simple.c
index 6f399ccd1a..280ecf57b5 100644
--- a/iconv/gconv_simple.c
+++ b/iconv/gconv_simple.c
@@ -20,6 +20,7 @@
 
 #include <errno.h>
 #include <gconv.h>
+#include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
 #include <wchar.h>
@@ -289,3 +290,253 @@ __gconv_transform_utf8_ucs4 (struct gconv_step *step,
 
   return result;
 }
+
+
+int
+__gconv_transform_ucs2_ucs4 (struct gconv_step *step,
+			     struct gconv_step_data *data, const char *inbuf,
+			     size_t *inlen, size_t *written, int do_flush)
+{
+  struct gconv_step *next_step = step + 1;
+  struct gconv_step_data *next_data = data + 1;
+  gconv_fct fct = next_step->fct;
+  size_t do_write;
+  int result;
+
+  /* If the function is called with no input this means we have to reset
+     to the initial state.  The possibly partly converted input is
+     dropped.  */
+  if (do_flush)
+    {
+      /* Clear the state.  */
+      memset (data->data, '\0', sizeof (mbstate_t));
+      do_write = 0;
+
+      /* Call the steps down the chain if there are any.  */
+      if (data->is_last)
+	result = GCONV_OK;
+      else
+	{
+	  struct gconv_step *next_step = step + 1;
+	  struct gconv_step_data *next_data = data + 1;
+
+	  result = (*fct) (next_step, next_data, NULL, 0, written, 1);
+	}
+    }
+  else
+    {
+      int save_errno = errno;
+      do_write = 0;
+
+      do
+	{
+	  const uint16_t *newinbuf = (const uint16_t *) inbuf;
+	  wchar_t *outbuf = (wchar_t *) &data->outbuf[data->outbufavail];
+	  size_t actually = 0;
+
+	  errno = 0;
+
+	  while (data->outbufavail + 4 <= data->outbufsize
+		 && *inlen >= 2)
+	    {
+	      outbuf[actually++] = *newinbuf++;
+	      data->outbufavail += 4;
+	      *inlen -= 2;
+	    }
+
+	  if (*inlen != 1)
+	    {
+	      /* We have an incomplete input character.  */
+	      mbstate_t *state = (mbstate_t *) data->data;
+	      state->count = 1;
+	      state->value = *(uint8_t *) newinbuf;
+	      --*inlen;
+	    }
+
+	  /* Remember how much we converted.  */
+	  do_write += actually * sizeof (wchar_t);
+
+	  /* Check whether an illegal character appeared.  */
+	  if (errno != 0)
+	    {
+	      result = GCONV_ILLEGAL_INPUT;
+	      break;
+	    }
+
+	  if (*inlen == 0 && !mbsinit ((mbstate_t *) data->data))
+	    {
+	      /* We have an incomplete character at the end.  */
+	      result = GCONV_INCOMPLETE_INPUT;
+	      break;
+	    }
+
+	  if (data->is_last)
+	    {
+	      /* This is the last step.  */
+	      result = (data->outbufavail + sizeof (wchar_t) > data->outbufsize
+			? GCONV_FULL_OUTPUT : GCONV_EMPTY_INPUT);
+	      break;
+	    }
+
+	  /* Status so far.  */
+	  result = GCONV_EMPTY_INPUT;
+
+	  if (data->outbufavail > 0)
+	    {
+	      /* Call the functions below in the chain.  */
+	      size_t newavail = data->outbufavail;
+
+	      result = (*fct) (next_step, next_data, data->outbuf, &newavail,
+			       written, 0);
+
+	      /* Correct the output buffer.  */
+	      if (newavail != data->outbufavail && newavail > 0)
+		{
+		  memmove (data->outbuf,
+			   &data->outbuf[data->outbufavail - newavail],
+			   newavail);
+		  data->outbufavail = newavail;
+		}
+	    }
+	}
+      while (*inlen > 0 && result == GCONV_EMPTY_INPUT);
+
+      __set_errno (save_errno);
+    }
+
+  if (written != NULL && data->is_last)
+    *written = do_write;
+
+  return result;
+}
+
+
+int
+__gconv_transform_ucs4_ucs2 (struct gconv_step *step,
+			     struct gconv_step_data *data, const char *inbuf,
+			     size_t *inlen, size_t *written, int do_flush)
+{
+  struct gconv_step *next_step = step + 1;
+  struct gconv_step_data *next_data = data + 1;
+  gconv_fct fct = next_step->fct;
+  size_t do_write;
+  int result;
+
+  /* If the function is called with no input this means we have to reset
+     to the initial state.  The possibly partly converted input is
+     dropped.  */
+  if (do_flush)
+    {
+      /* Clear the state.  */
+      memset (data->data, '\0', sizeof (mbstate_t));
+      do_write = 0;
+
+      /* Call the steps down the chain if there are any.  */
+      if (data->is_last)
+	result = GCONV_OK;
+      else
+	{
+	  struct gconv_step *next_step = step + 1;
+	  struct gconv_step_data *next_data = data + 1;
+
+	  result = (*fct) (next_step, next_data, NULL, 0, written, 1);
+
+	  /* Clear output buffer.  */
+	  data->outbufavail = 0;
+	}
+    }
+  else
+    {
+      int save_errno = errno;
+      do_write = 0;
+
+      do
+	{
+	  const wchar_t *newinbuf = (const wchar_t *) inbuf;
+	  uint16_t *outbuf = (uint16_t *) &data->outbuf[data->outbufavail];
+	  size_t actually = 0;
+
+	  errno = 0;
+
+	  while (data->outbufavail + 2 <= data->outbufsize
+		 && *inlen >= 4)
+	    {
+	      if (*newinbuf >= 0x10000)
+		{
+		  __set_errno (EILSEQ);
+		    break;
+		}
+	      outbuf[actually++] = (wchar_t) *newinbuf;
+	      *inlen -= 4;
+	      data->outbufavail += 2;
+	    }
+
+	  if (*inlen < 4)
+	    {
+	      /* We have an incomplete input character.  */
+	      mbstate_t *state = (mbstate_t *) data->data;
+	      state->count = *inlen;
+	      state->value = 0;
+	      while (*inlen > 0)
+		{
+		  state->value <<= 8;
+		  state->value += *(uint8_t *) newinbuf;
+		  --*inlen;
+		}
+	    }
+
+	  /* Remember how much we converted.  */
+	  do_write += (const char *) newinbuf - inbuf;
+
+	  /* Check whether an illegal character appeared.  */
+	  if (errno != 0)
+	    {
+	      result = GCONV_ILLEGAL_INPUT;
+	      break;
+	    }
+
+	  if (*inlen == 0 && !mbsinit ((mbstate_t *) data->data))
+	    {
+	      /* We have an incomplete character at the end.  */
+	      result = GCONV_INCOMPLETE_INPUT;
+	      break;
+	    }
+
+	  if (data->is_last)
+	    {
+	      /* This is the last step.  */
+	      result = *inlen == 0 ? GCONV_EMPTY_INPUT : GCONV_FULL_OUTPUT;
+	      break;
+	    }
+
+	  /* Status so far.  */
+	  result = GCONV_EMPTY_INPUT;
+
+	  if (data->outbufavail > 0)
+	    {
+	      /* Call the functions below in the chain.  */
+	      size_t newavail = data->outbufavail;
+
+	      result = (*fct) (next_step, next_data, data->outbuf, &newavail,
+			       written, 0);
+
+	      /* Correct the output buffer.  */
+	      if (newavail != data->outbufavail && newavail > 0)
+		{
+		  memmove (data->outbuf,
+			   &data->outbuf[data->outbufavail - newavail],
+			   newavail);
+		  data->outbufavail = newavail;
+		}
+	    }
+	}
+      while (*inlen > 0 && result == GCONV_EMPTY_INPUT);
+
+      __set_errno (save_errno);
+    }
+
+  if (written != NULL && data->is_last)
+    *written = do_write / sizeof (wchar_t);
+
+  return result;
+}