about summary refs log tree commit diff
path: root/wcsmbs/c16rtomb.c
diff options
context:
space:
mode:
authorJoseph Myers <joseph@codesourcery.com>2018-10-19 16:31:29 +0000
committerJoseph Myers <joseph@codesourcery.com>2018-10-19 16:31:29 +0000
commitd0a74159792477e5922f53fa2aa6d58eb8265a14 (patch)
treefcce06511d2a91afa649f629cde15875b6fe6f6a /wcsmbs/c16rtomb.c
parentf997b4be18f7e57d757d39e42f7715db26528aa0 (diff)
downloadglibc-d0a74159792477e5922f53fa2aa6d58eb8265a14.tar.gz
glibc-d0a74159792477e5922f53fa2aa6d58eb8265a14.tar.xz
glibc-d0a74159792477e5922f53fa2aa6d58eb8265a14.zip
Handle surrogate pairs in c16rtomb (bug 23794, DR#488, C2X).
The c16rtomb implementation has:

  // XXX The ISO C 11 spec I have does not say anything about handling
  // XXX surrogates in this interface.

The DR#488 resolution, as applied to C2X, requires surrogate pairs to
be handled here (so the first call returns 0 and stores the high
surrogate in the mbstate_t, while the second call combines the
surrogates, produces a multibyte character and returns the number of
bytes written).  This patch implements that.  (mbrtoc16 already
handled producing surrogates as output.)

Tested for x86_64.

	[BZ #23794]
	* wcsmbs/c16rtomb.c (c16rtomb): Save first character of surrogate
	pair and return 0 in that case, and use saved character to
	interpret following character.
	* wcsmbs/tst-c16-surrogate.c: New file.
	* wcsmbs/Makefile (tests): Add tst-c16-surrogate.c.
	[$(run-built-tests) = yes] ($(objpfx)tst-c16-surrogate.out):
	Depend on $(gen-locales)
Diffstat (limited to 'wcsmbs/c16rtomb.c')
-rw-r--r--wcsmbs/c16rtomb.c41
1 files changed, 38 insertions, 3 deletions
diff --git a/wcsmbs/c16rtomb.c b/wcsmbs/c16rtomb.c
index 48a63d067b..74950d8173 100644
--- a/wcsmbs/c16rtomb.c
+++ b/wcsmbs/c16rtomb.c
@@ -26,7 +26,42 @@ static mbstate_t state;
 size_t
 c16rtomb (char *s, char16_t c16, mbstate_t *ps)
 {
-  // XXX The ISO C 11 spec I have does not say anything about handling
-  // XXX surrogates in this interface.
-  return wcrtomb (s, c16, ps ?: &state);
+  wchar_t wc = c16;
+
+  if (ps == NULL)
+    ps = &state;
+
+  if (s == NULL)
+    {
+      /* Reset any state relating to surrogate pairs.  */
+      ps->__count &= 0x7fffffff;
+      ps->__value.__wch = 0;
+      wc = 0;
+    }
+
+  if (ps->__count & 0x80000000)
+    {
+      /* The previous call passed in the first surrogate of a
+	 surrogate pair.  */
+      ps->__count &= 0x7fffffff;
+      if (wc >= 0xdc00 && wc < 0xe000)
+	wc = (0x10000
+	      + ((ps->__value.__wch & 0x3ff) << 10)
+	      + (wc & 0x3ff));
+      else
+	/* This is not a low surrogate; ensure an EILSEQ error by
+	   trying to decode the high surrogate as a wide character on
+	   its own.  */
+	wc = ps->__value.__wch;
+      ps->__value.__wch = 0;
+    }
+  else if (wc >= 0xd800 && wc < 0xdc00)
+    {
+      /* The high part of a surrogate pair.  */
+      ps->__count |= 0x80000000;
+      ps->__value.__wch = wc;
+      return 0;
+    }
+
+  return wcrtomb (s, wc, ps);
 }