about summary refs log tree commit diff
path: root/wcsmbs/mbrtoc16.c
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@gmail.com>2012-01-07 10:52:53 -0500
committerUlrich Drepper <drepper@gmail.com>2012-01-07 10:52:53 -0500
commit9954432e309c8fddaec2fe53e601702a5c981624 (patch)
tree3eb7513694e25391b3393afbb847dbd85ebf097a /wcsmbs/mbrtoc16.c
parentc3a87236702cb73be1dada3438bbd3c3934e83f8 (diff)
downloadglibc-9954432e309c8fddaec2fe53e601702a5c981624.tar.gz
glibc-9954432e309c8fddaec2fe53e601702a5c981624.tar.xz
glibc-9954432e309c8fddaec2fe53e601702a5c981624.zip
More char16_t and char32_t support
It works now for UTF-8 locales
Diffstat (limited to 'wcsmbs/mbrtoc16.c')
-rw-r--r--wcsmbs/mbrtoc16.c52
1 files changed, 45 insertions, 7 deletions
diff --git a/wcsmbs/mbrtoc16.c b/wcsmbs/mbrtoc16.c
index 7b5822d690..df970fba4f 100644
--- a/wcsmbs/mbrtoc16.c
+++ b/wcsmbs/mbrtoc16.c
@@ -1,6 +1,6 @@
 /* Copyright (C) 2011, 2012 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
-   Contributed by Ulrich Drepper <drepper@gnu.org>, 2011.
+   Contributed by Ulrich Drepper <drepper@gmail.com>, 2011.
 
    The GNU C Library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
@@ -43,20 +43,32 @@ static mbstate_t state;
 size_t
 mbrtoc16 (char16_t *pc16, const char *s, size_t n, mbstate_t *ps)
 {
-  char16_t buf[1];
+  if (ps == NULL)
+    ps = &state;
+
+  if (ps->__count & 0x80000000)
+    {
+      /* We have to return the second word for a surrogate.  */
+      ps->__count &= 0x7fffffff;
+      *pc16 = ps->__value.__wch;
+      ps->__value.__wch = L'\0';
+      return (size_t) -3;
+    }
+
+  char16_t buf[2];
   struct __gconv_step_data data;
   int status;
   size_t result;
   size_t dummy;
   const unsigned char *inbuf, *endbuf;
-  unsigned char *outbuf = (unsigned char *) (pc16 ?: buf);
+  unsigned char *outbuf = (unsigned char *) buf;
   const struct gconv_fcts *fcts;
 
   /* Set information for this step.  */
   data.__invocation_counter = 0;
   data.__internal_use = 1;
   data.__flags = __GCONV_IS_LAST;
-  data.__statep = ps ?: &state;
+  data.__statep = ps;
   data.__trans = NULL;
 
   /* A first special case is if S is NULL.  This means put PS in the
@@ -85,9 +97,22 @@ mbrtoc16 (char16_t *pc16, const char *s, size_t n, mbstate_t *ps)
   if (fcts->toc16->__shlib_handle != NULL)
     PTR_DEMANGLE (fct);
 #endif
+
+  /* We first have to check whether the character can be represented
+     without a surrogate.  If we immediately pass in a buffer large
+     enough to hold two char16_t values and the first character does
+     not require a surrogate the routine will try to convert more
+     input if N is larger then needed for the first character.  */
   status = DL_CALL_FCT (fct, (fcts->toc16, &data, &inbuf, endbuf,
 			      NULL, &dummy, 0, 1));
 
+  if (status == __GCONV_FULL_OUTPUT && data.__outbuf == outbuf)
+    {
+      data.__outbufend = outbuf + 2 * sizeof (char16_t);
+      status = DL_CALL_FCT (fct, (fcts->toc16, &data, &inbuf, endbuf,
+				  NULL, &dummy, 0, 1));
+    }
+
   /* There must not be any problems with the conversion but illegal input
      characters.  The output buffer must be large enough, otherwise the
      definition of MB_CUR_MAX is not correct.  All the other possible
@@ -100,15 +125,28 @@ mbrtoc16 (char16_t *pc16, const char *s, size_t n, mbstate_t *ps)
   if (status == __GCONV_OK || status == __GCONV_EMPTY_INPUT
       || status == __GCONV_FULL_OUTPUT)
     {
-      if (data.__outbuf != (unsigned char *) outbuf
-	  && *(char16_t *) outbuf == U('\0'))
+      if (pc16 != NULL)
+	*pc16 = buf[0];
+
+      if (data.__outbuf != outbuf && *(char16_t *) outbuf == U('\0'))
 	{
 	  /* The converted character is the NUL character.  */
 	  assert (__mbsinit (data.__statep));
 	  result = 0;
 	}
       else
-	result = inbuf - (const unsigned char *) s;
+	{
+	  result = inbuf - (const unsigned char *) s;
+
+	  if (data.__outbuf != outbuf + 2)
+	    {
+	      /* This is a surrogate.  */
+	      assert (buf[0] >= 0xd800 && buf[0] <= 0xdfff);
+	      assert (buf[1] >= 0xdc00 && buf[1] <= 0xdfff);
+	      ps->__count |= 0x80000000;
+	      ps->__value.__wch = buf[1];
+	    }
+	}
     }
   else if (status == __GCONV_INCOMPLETE_INPUT)
     result = (size_t) -2;