summary refs log tree commit diff
path: root/wcsmbs/tst-c16-surrogate.c
diff options
context:
space:
mode:
authorJoseph Myers <joseph@codesourcery.com>2018-10-19 16:31:29 +0000
committerJoseph Myers <joseph@codesourcery.com>2018-10-19 16:31:29 +0000
commitd0a74159792477e5922f53fa2aa6d58eb8265a14 (patch)
treefcce06511d2a91afa649f629cde15875b6fe6f6a /wcsmbs/tst-c16-surrogate.c
parentf997b4be18f7e57d757d39e42f7715db26528aa0 (diff)
downloadglibc-d0a74159792477e5922f53fa2aa6d58eb8265a14.tar.gz
glibc-d0a74159792477e5922f53fa2aa6d58eb8265a14.tar.xz
glibc-d0a74159792477e5922f53fa2aa6d58eb8265a14.zip
Handle surrogate pairs in c16rtomb (bug 23794, DR#488, C2X).
The c16rtomb implementation has:

  // XXX The ISO C 11 spec I have does not say anything about handling
  // XXX surrogates in this interface.

The DR#488 resolution, as applied to C2X, requires surrogate pairs to
be handled here (so the first call returns 0 and stores the high
surrogate in the mbstate_t, while the second call combines the
surrogates, produces a multibyte character and returns the number of
bytes written).  This patch implements that.  (mbrtoc16 already
handled producing surrogates as output.)

Tested for x86_64.

	[BZ #23794]
	* wcsmbs/c16rtomb.c (c16rtomb): Save first character of surrogate
	pair and return 0 in that case, and use saved character to
	interpret following character.
	* wcsmbs/tst-c16-surrogate.c: New file.
	* wcsmbs/Makefile (tests): Add tst-c16-surrogate.c.
	[$(run-built-tests) = yes] ($(objpfx)tst-c16-surrogate.out):
	Depend on $(gen-locales)
Diffstat (limited to 'wcsmbs/tst-c16-surrogate.c')
-rw-r--r--wcsmbs/tst-c16-surrogate.c89
1 files changed, 89 insertions, 0 deletions
diff --git a/wcsmbs/tst-c16-surrogate.c b/wcsmbs/tst-c16-surrogate.c
new file mode 100644
index 0000000000..6a87ff8206
--- /dev/null
+++ b/wcsmbs/tst-c16-surrogate.c
@@ -0,0 +1,89 @@
+/* Test c16rtomb handling of surrogate pairs (DR#488, bug 23794).
+   Copyright (C) 2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <errno.h>
+#include <locale.h>
+#include <stdio.h>
+#include <string.h>
+#include <uchar.h>
+#include <wchar.h>
+#include <array_length.h>
+#include <support/check.h>
+
+static int
+do_test (void)
+{
+  TEST_VERIFY_EXIT (setlocale (LC_ALL, "de_DE.UTF-8") != NULL);
+  /* Test conversions of surrogate pairs.  */
+  for (char32_t c = 0x10000; c <= 0x10ffff; c += 0x123)
+    {
+      char32_t c_pos = c - 0x10000;
+      char16_t c_hi = (c_pos >> 10) + 0xd800;
+      char16_t c_lo = (c_pos & 0x3ff) + 0xdc00;
+      printf ("testing U+0x%08x (0x%x 0x%x)\n",
+	      (unsigned int) c, (unsigned int) c_hi, (unsigned int) c_lo);
+      char buf[16] = { 0 };
+      size_t ret_hi = c16rtomb (buf, c_hi, NULL);
+      TEST_COMPARE (ret_hi, 0);
+      size_t ret_lo = c16rtomb (buf, c_lo, NULL);
+      TEST_COMPARE (ret_lo, 4);
+      wchar_t wc = 0;
+      size_t ret_wc = mbrtowc (&wc, buf, 4, NULL);
+      TEST_COMPARE (ret_wc, 4);
+      TEST_COMPARE (wc, (wchar_t) c);
+    }
+  /* Test errors for invalid conversions.  */
+  static const char16_t err_cases[][2] =
+    {
+      /* High surrogate followed by non-surrogate.  */
+      { 0xd800, 0x1 },
+      /* High surrogate followed by another high surrogate.  */
+      { 0xd800, 0xd800 },
+      /* Low surrogate not following high surrogate.  */
+      { 0xdc00, 0 }
+    };
+  for (size_t i = 0; i < array_length (err_cases); i++)
+    {
+      char16_t c_hi = err_cases[i][0];
+      char16_t c_lo = err_cases[i][1];
+      printf ("testing error case: 0x%x 0x%x\n", (unsigned int) c_hi,
+	      (unsigned int) c_lo);
+      c16rtomb (NULL, 0, NULL);
+      char buf[16] = { 0 };
+      errno = 0;
+      size_t ret_hi = c16rtomb (buf, c_hi, NULL);
+      if (c_lo == 0)
+	{
+	  /* Unmatched low surrogate in first place.  */
+	  TEST_COMPARE (ret_hi, (size_t) -1);
+	  TEST_COMPARE (errno, EILSEQ);
+	}
+      else
+	{
+	  /* High surrogate; error in second place.  */
+	  TEST_COMPARE (ret_hi, 0);
+	  errno = 0;
+	  size_t ret_lo = c16rtomb (buf, c_lo, NULL);
+	  TEST_COMPARE (ret_lo, (size_t) -1);
+	  TEST_COMPARE (errno, EILSEQ);
+	}
+    }
+  return 0;
+}
+
+#include <support/test-driver.c>