about summary refs log tree commit diff
path: root/wcsmbs
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@gmail.com>2011-12-28 06:19:42 -0500
committerUlrich Drepper <drepper@gmail.com>2012-01-01 07:17:22 -0500
commitdb6af3ebf46a83b885455dc03a3c2c1c2c2dedec (patch)
tree942a59c7de0033cf9ab3231523130c323fa4b80c /wcsmbs
parent8ea79a616e43093f403927e425c197afe39196b7 (diff)
downloadglibc-db6af3ebf46a83b885455dc03a3c2c1c2c2dedec.tar.gz
glibc-db6af3ebf46a83b885455dc03a3c2c1c2c2dedec.tar.xz
glibc-db6af3ebf46a83b885455dc03a3c2c1c2c2dedec.zip
Add uchar.h support, part 1
c16 support for locales other than the C locale is still missing.
Diffstat (limited to 'wcsmbs')
-rw-r--r--wcsmbs/Makefile3
-rw-r--r--wcsmbs/Versions3
-rw-r--r--wcsmbs/c16rtomb.c121
-rw-r--r--wcsmbs/mbrtoc16.c122
-rw-r--r--wcsmbs/mbrtowc.c7
-rw-r--r--wcsmbs/uchar.h8
-rw-r--r--wcsmbs/wchar.h8
-rw-r--r--wcsmbs/wcrtomb.c7
-rw-r--r--wcsmbs/wcsmbsload.c90
-rw-r--r--wcsmbs/wcsmbsload.h7
10 files changed, 363 insertions, 13 deletions
diff --git a/wcsmbs/Makefile b/wcsmbs/Makefile
index 0bb1740838..8c446e1fd3 100644
--- a/wcsmbs/Makefile
+++ b/wcsmbs/Makefile
@@ -40,7 +40,8 @@ routines := wcscat wcschr wcscmp wcscpy wcscspn wcsdup wcslen wcsncat \
 	    wcscasecmp wcsncase wcscasecmp_l wcsncase_l \
 	    wcsmbsload mbsrtowcs_l \
 	    isoc99_wscanf isoc99_vwscanf isoc99_fwscanf isoc99_vfwscanf \
-	    isoc99_swscanf isoc99_vswscanf
+	    isoc99_swscanf isoc99_vswscanf \
+	    mbrtoc16 c16rtomb
 
 strop-tests :=  wcscmp wmemcmp wcslen wcschr wcsrchr wcscpy
 tests := tst-wcstof wcsmbs-tst1 tst-wcsnlen tst-btowc tst-mbrtowc \
diff --git a/wcsmbs/Versions b/wcsmbs/Versions
index b6dfa85a40..10bccc9539 100644
--- a/wcsmbs/Versions
+++ b/wcsmbs/Versions
@@ -28,4 +28,7 @@ libc {
     __isoc99_wscanf; __isoc99_vwscanf; __isoc99_fwscanf; __isoc99_vfwscanf;
     __isoc99_swscanf; __isoc99_vswscanf;
   }
+  GLIBC_2.16 {
+    mbrtoc16; c16rtomb; mbrtoc32; c32rtomb;
+  }
 }
diff --git a/wcsmbs/c16rtomb.c b/wcsmbs/c16rtomb.c
new file mode 100644
index 0000000000..33e6b92d02
--- /dev/null
+++ b/wcsmbs/c16rtomb.c
@@ -0,0 +1,121 @@
+/* Copyright (C) 2011 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Ulrich Drepper <drepper@cygnus.com>, 2011.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <assert.h>
+#include <dlfcn.h>
+#include <errno.h>
+#include <gconv.h>
+#include <stdlib.h>
+#include <uchar.h>
+#include <wcsmbsload.h>
+
+#include <sysdep.h>
+
+#ifndef EILSEQ
+# define EILSEQ EINVAL
+#endif
+
+#if __STDC__ >= 20100L
+# define u(c) U##c
+#else
+# define u(c) L##c
+#endif
+
+
+/* This is the private state used if PS is NULL.  */
+static mbstate_t state;
+
+size_t
+c16rtomb (char *s, char16_t c16, mbstate_t *ps)
+{
+  char buf[MB_CUR_MAX];
+  struct __gconv_step_data data;
+  int status;
+  size_t result;
+  size_t dummy;
+  const struct gconv_fcts *fcts;
+
+  /* Set information for this step.  */
+  data.__invocation_counter = 0;
+  data.__internal_use = 1;
+  data.__flags = __GCONV_IS_LAST;
+  data.__statep = ps ?: &state;
+  data.__trans = NULL;
+
+  /* A first special case is if S is NULL.  This means put PS in the
+     initial state.  */
+  if (s == NULL)
+    {
+      s = buf;
+      c16 = u('\0');
+    }
+
+  /* Tell where we want to have the result.  */
+  data.__outbuf = (unsigned char *) s;
+  data.__outbufend = (unsigned char *) s + MB_CUR_MAX;
+
+  /* Get the conversion functions.  */
+  fcts = get_gconv_fcts (_NL_CURRENT_DATA (LC_CTYPE));
+  __gconv_fct fct = fcts->fromc16->__fct;
+#ifdef PTR_DEMANGLE
+  if (fcts->tomb->__shlib_handle != NULL)
+    PTR_DEMANGLE (fct);
+#endif
+
+  /* If C16 is the NUL character we write into the output buffer the byte
+     sequence necessary for PS to get into the initial state, followed
+     by a NUL byte.  */
+  if (c16 == L'\0')
+    {
+      status = DL_CALL_FCT (fct, (fcts->fromc16, &data, NULL, NULL,
+				  NULL, &dummy, 1, 1));
+
+      if (status == __GCONV_OK || status == __GCONV_EMPTY_INPUT)
+	*data.__outbuf++ = '\0';
+    }
+  else
+    {
+      /* Do a normal conversion.  */
+      const unsigned char *inbuf = (const unsigned char *) &c16;
+
+      status = DL_CALL_FCT (fct,
+			    (fcts->fromc16, &data, &inbuf,
+			     inbuf + sizeof (char16_t), NULL, &dummy, 0, 1));
+    }
+
+  /* There must not be any problems with the conversion but illegal input
+     characters.  The output buffer must be large enough, otherwise the
+     definition of MB_CUR_MAX is not correct.  All the other possible
+     errors also must not happen.  */
+  assert (status == __GCONV_OK || status == __GCONV_EMPTY_INPUT
+	  || status == __GCONV_ILLEGAL_INPUT
+	  || status == __GCONV_INCOMPLETE_INPUT
+	  || status == __GCONV_FULL_OUTPUT);
+
+  if (status == __GCONV_OK || status == __GCONV_EMPTY_INPUT
+      || status == __GCONV_FULL_OUTPUT)
+    result = data.__outbuf - (unsigned char *) s;
+  else
+    {
+      result = (size_t) -1;
+      __set_errno (EILSEQ);
+    }
+
+  return result;
+}
diff --git a/wcsmbs/mbrtoc16.c b/wcsmbs/mbrtoc16.c
new file mode 100644
index 0000000000..3a3a45ce1a
--- /dev/null
+++ b/wcsmbs/mbrtoc16.c
@@ -0,0 +1,122 @@
+/* Copyright (C) 2011 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Ulrich Drepper <drepper@gnu.org>, 2011.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <assert.h>
+#include <dlfcn.h>
+#include <errno.h>
+#include <gconv.h>
+#include <uchar.h>
+#include <wcsmbsload.h>
+
+#include <sysdep.h>
+
+#ifndef EILSEQ
+# define EILSEQ EINVAL
+#endif
+
+#if __STDC__ >= 20100L
+# define U(c) U##c
+#else
+# define U(c) L##c
+#endif
+
+
+/* This is the private state used if PS is NULL.  */
+static mbstate_t state;
+
+size_t
+mbrtoc16 (char16_t *pc16, const char *s, size_t n, mbstate_t *ps)
+{
+  char16_t buf[1];
+  struct __gconv_step_data data;
+  int status;
+  size_t result;
+  size_t dummy;
+  const unsigned char *inbuf, *endbuf;
+  unsigned char *outbuf = (unsigned char *) (pc16 ?: buf);
+  const struct gconv_fcts *fcts;
+
+  /* Set information for this step.  */
+  data.__invocation_counter = 0;
+  data.__internal_use = 1;
+  data.__flags = __GCONV_IS_LAST;
+  data.__statep = ps ?: &state;
+  data.__trans = NULL;
+
+  /* A first special case is if S is NULL.  This means put PS in the
+     initial state.  */
+  if (s == NULL)
+    {
+      outbuf = (unsigned char *) buf;
+      s = "";
+      n = 1;
+    }
+
+  /* Tell where we want the result.  */
+  data.__outbuf = outbuf;
+  data.__outbufend = outbuf + sizeof (char16_t);
+
+  /* Get the conversion functions.  */
+  fcts = get_gconv_fcts (_NL_CURRENT_DATA (LC_CTYPE));
+
+  /* Do a normal conversion.  */
+  inbuf = (const unsigned char *) s;
+  endbuf = inbuf + n;
+  if (__builtin_expect (endbuf < inbuf, 0))
+    endbuf = (const unsigned char *) ~(uintptr_t) 0;
+  __gconv_fct fct = fcts->toc16->__fct;
+#ifdef PTR_DEMANGLE
+  if (fcts->toc16->__shlib_handle != NULL)
+    PTR_DEMANGLE (fct);
+#endif
+  status = DL_CALL_FCT (fct, (fcts->toc16, &data, &inbuf, endbuf,
+			      NULL, &dummy, 0, 1));
+
+  /* There must not be any problems with the conversion but illegal input
+     characters.  The output buffer must be large enough, otherwise the
+     definition of MB_CUR_MAX is not correct.  All the other possible
+     errors also must not happen.  */
+  assert (status == __GCONV_OK || status == __GCONV_EMPTY_INPUT
+	  || status == __GCONV_ILLEGAL_INPUT
+	  || status == __GCONV_INCOMPLETE_INPUT
+	  || status == __GCONV_FULL_OUTPUT);
+
+  if (status == __GCONV_OK || status == __GCONV_EMPTY_INPUT
+      || status == __GCONV_FULL_OUTPUT)
+    {
+      if (data.__outbuf != (unsigned char *) outbuf
+	  && *(char16_t *) outbuf == U('\0'))
+	{
+	  /* The converted character is the NUL character.  */
+	  assert (__mbsinit (data.__statep));
+	  result = 0;
+	}
+      else
+	result = inbuf - (const unsigned char *) s;
+    }
+  else if (status == __GCONV_INCOMPLETE_INPUT)
+    result = (size_t) -2;
+  else
+    {
+      result = (size_t) -1;
+      __set_errno (EILSEQ);
+    }
+
+  return result;
+}
diff --git a/wcsmbs/mbrtowc.c b/wcsmbs/mbrtowc.c
index b534571736..03b8348d30 100644
--- a/wcsmbs/mbrtowc.c
+++ b/wcsmbs/mbrtowc.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1996, 1997, 1998, 1999, 2000, 2002, 2004, 2005
+/* Copyright (C) 1996, 1997, 1998, 1999, 2000, 2002, 2004, 2005, 2011
    Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@gnu.org>, 1996.
@@ -117,3 +117,8 @@ __mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps)
 libc_hidden_def (__mbrtowc)
 weak_alias (__mbrtowc, mbrtowc)
 libc_hidden_weak (mbrtowc)
+
+/* There should be no difference between the UTF-32 handling required
+   by mbrtoc32 and the wchar_t handling which has long since been
+   implemented in mbrtowc.  */
+weak_alias (__mbrtowc, mbrtoc32)
diff --git a/wcsmbs/uchar.h b/wcsmbs/uchar.h
index 44637c3396..bb5f3ba35c 100644
--- a/wcsmbs/uchar.h
+++ b/wcsmbs/uchar.h
@@ -31,6 +31,14 @@
 #define __need_mbstate_t
 #include <wchar.h>
 
+#ifndef __mbstate_t_defined
+__BEGIN_NAMESPACE_C99
+/* Public type.  */
+typedef __mbstate_t mbstate_t;
+__END_NAMESPACE_C99
+# define __mbstate_t_defined 1
+#endif
+
 
 #ifdef __GNUC__
 /* Define the 16-bit and 32-bit character types.  Use the information
diff --git a/wcsmbs/wchar.h b/wcsmbs/wchar.h
index 2b35f51ad6..ccaaed8f49 100644
--- a/wcsmbs/wchar.h
+++ b/wcsmbs/wchar.h
@@ -77,8 +77,8 @@ __END_NAMESPACE_STD
 # endif
 #endif
 
-#if (defined _WCHAR_H || defined __need_mbstate_t) && !defined __mbstate_t_defined
-# define __mbstate_t_defined	1
+#if (defined _WCHAR_H || defined __need_mbstate_t) && !defined ____mbstate_t_defined
+# define ____mbstate_t_defined	1
 /* Conversion state information.  */
 typedef struct
 {
@@ -101,10 +101,14 @@ typedef struct
    defined.  */
 #ifdef _WCHAR_H
 
+# ifndef __mbstate_t_defined
 __BEGIN_NAMESPACE_C99
 /* Public type.  */
 typedef __mbstate_t mbstate_t;
 __END_NAMESPACE_C99
+#  define __mbstate_t_defined 1
+# endif
+
 #ifdef __USE_GNU
 __USING_NAMESPACE_C99(mbstate_t)
 #endif
diff --git a/wcsmbs/wcrtomb.c b/wcsmbs/wcrtomb.c
index aa51b6891b..547b05aa9c 100644
--- a/wcsmbs/wcrtomb.c
+++ b/wcsmbs/wcrtomb.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1996,1997,1998,2000,2002,2005 Free Software Foundation, Inc.
+/* Copyright (C) 1996-1998,2000,2002,2005,2011 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996.
 
@@ -115,3 +115,8 @@ __wcrtomb (char *s, wchar_t wc, mbstate_t *ps)
 }
 weak_alias (__wcrtomb, wcrtomb)
 libc_hidden_weak (wcrtomb)
+
+/* There should be no difference between the UTF-32 handling required
+   by c32rtomb and the wchar_t handling which has long since been
+   implemented in wcrtomb.  */
+weak_alias (__wcrtomb, c32rtomb)
diff --git a/wcsmbs/wcsmbsload.c b/wcsmbs/wcsmbsload.c
index 328f16497c..212a6c8135 100644
--- a/wcsmbs/wcsmbsload.c
+++ b/wcsmbs/wcsmbsload.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1998-2002,2004,2005,2008,2010 Free Software Foundation, Inc.
+/* Copyright (C) 1998-2002,2004,2005,2008,2010,2011 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
 
@@ -67,6 +67,44 @@ static const struct __gconv_step to_mb =
   .__data = NULL
 };
 
+static const struct __gconv_step to_c16 =
+{
+  .__shlib_handle = NULL,
+  .__modname = NULL,
+  .__counter = INT_MAX,
+  .__from_name = (char *) "ANSI_X3.4-1968//TRANSLIT",
+  .__to_name = (char *) "UTF-16//",
+  .__fct = __gconv_transform_ascii_utf16,
+  .__btowc_fct = NULL,
+  .__init_fct = NULL,
+  .__end_fct = NULL,
+  .__min_needed_from = 1,
+  .__max_needed_from = 1,
+  .__min_needed_to = 4,
+  .__max_needed_to = 4,
+  .__stateful = 0,
+  .__data = NULL
+};
+
+static const struct __gconv_step from_c16 =
+{
+  .__shlib_handle = NULL,
+  .__modname = NULL,
+  .__counter = INT_MAX,
+  .__from_name = (char *) "UTF-16//",
+  .__to_name = (char *) "ANSI_X3.4-1968//TRANSLIT",
+  .__fct = __gconv_transform_utf16_ascii,
+  .__btowc_fct = NULL,
+  .__init_fct = NULL,
+  .__end_fct = NULL,
+  .__min_needed_from = 4,
+  .__max_needed_from = 4,
+  .__min_needed_to = 1,
+  .__max_needed_to = 1,
+  .__stateful = 0,
+  .__data = NULL
+};
+
 
 /* For the default locale we only have to handle ANSI_X3.4-1968.  */
 const struct gconv_fcts __wcsmbs_gconv_fcts_c =
@@ -74,7 +112,12 @@ const struct gconv_fcts __wcsmbs_gconv_fcts_c =
   .towc = (struct __gconv_step *) &to_wc,
   .towc_nsteps = 1,
   .tomb = (struct __gconv_step *) &to_mb,
-  .tomb_nsteps = 1
+  .tomb_nsteps = 1,
+
+  .toc16 = (struct __gconv_step *) &to_c16,
+  .toc16_nsteps = 1,
+  .fromc16 = (struct __gconv_step *) &from_c16,
+  .fromc16_nsteps = 1,
 };
 
 
@@ -191,6 +234,12 @@ __wcsmbs_load_conv (struct __locale_data *new_category)
 					   &new_fcts->tomb_nsteps)
 			: NULL);
 
+      // XXX
+      new_fcts->toc16 = (struct __gconv_step *) &to_c16;
+      new_fcts->toc16_nsteps = 1;
+      new_fcts->fromc16 = (struct __gconv_step *) &from_c16;
+      new_fcts->fromc16_nsteps = 1;
+
       /* If any of the conversion functions is not available we don't
 	 use any since this would mean we cannot convert back and
 	 forth.*/
@@ -242,14 +291,36 @@ internal_function
 __wcsmbs_named_conv (struct gconv_fcts *copy, const char *name)
 {
   copy->towc = __wcsmbs_getfct ("INTERNAL", name, &copy->towc_nsteps);
-  if (copy->towc != NULL)
+  if (copy->towc == NULL)
+    return 1;
+
+  copy->tomb = __wcsmbs_getfct (name, "INTERNAL", &copy->tomb_nsteps);
+  if (copy->tomb == NULL)
+    goto out_mb;
+
+#if 0
+  copy->fromc16 = __wcsmbs_getfct (name, "UTF-16//", &copy->fromc16_nsteps);
+  if (copy->fromc16 == NULL)
+    goto out_fromc16;
+
+  copy->toc16 = __wcsmbs_getfct ("UTF-16//", name, &copy->toc16_nsteps);
+  if (copy->toc16 == NULL)
+#else
+  if (0)
+#endif
     {
-      copy->tomb = __wcsmbs_getfct (name, "INTERNAL", &copy->tomb_nsteps);
-      if (copy->tomb == NULL)
-	__gconv_close_transform (copy->towc, copy->towc_nsteps);
+#if 0
+      __gconv_close_transform (copy->fromc16, copy->fromc16_nsteps);
+    out_fromc16:
+      __gconv_close_transform (copy->tomb, copy->tomb_nsteps);
+#endif
+    out_mb:
+      __gconv_close_transform (copy->towc, copy->towc_nsteps);
+    out_wc:
+      return 1;
     }
 
-  return copy->towc == NULL || copy->tomb == NULL ? 1 : 0;
+  return 0;
 }
 
 void internal_function
@@ -264,6 +335,11 @@ _nl_cleanup_ctype (struct __locale_data *locale)
       /* Free the old conversions.  */
       __gconv_close_transform (data->tomb, data->tomb_nsteps);
       __gconv_close_transform (data->towc, data->towc_nsteps);
+#if 0
+      // XXX
+      __gconv_close_transform (data->fromc16, data->fromc16_nsteps);
+      __gconv_close_transform (data->toc16, data->toc16c_nsteps);
+#endif
       free ((char *) data);
     }
 }
diff --git a/wcsmbs/wcsmbsload.h b/wcsmbs/wcsmbsload.h
index e2b1bfa9c8..064c41c82f 100644
--- a/wcsmbs/wcsmbsload.h
+++ b/wcsmbs/wcsmbsload.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 1998-2002, 2010 Free Software Foundation, Inc.
+/* Copyright (C) 1998-2002, 2010, 2011 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
 
@@ -32,6 +32,11 @@ struct gconv_fcts
     size_t towc_nsteps;
     struct __gconv_step *tomb;
     size_t tomb_nsteps;
+
+    struct __gconv_step *toc16;
+    size_t toc16_nsteps;
+    struct __gconv_step *fromc16;
+    size_t fromc16_nsteps;
   };
 
 /* Set of currently active conversion functions.  */