about summary refs log tree commit diff
path: root/wcsmbs/c8rtomb.c
diff options
context:
space:
mode:
Diffstat (limited to 'wcsmbs/c8rtomb.c')
-rw-r--r--wcsmbs/c8rtomb.c132
1 files changed, 132 insertions, 0 deletions
diff --git a/wcsmbs/c8rtomb.c b/wcsmbs/c8rtomb.c
new file mode 100644
index 0000000000..b564770eb5
--- /dev/null
+++ b/wcsmbs/c8rtomb.c
@@ -0,0 +1,132 @@
+/* UTF-8 to multibyte conversion.
+   Copyright (C) 2022 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <errno.h>
+#include <uchar.h>
+#include <wchar.h>
+
+
+/* This is the private state used if PS is NULL.  */
+static mbstate_t state;
+
+size_t
+c8rtomb (char *s, char8_t c8, mbstate_t *ps)
+{
+  /* This implementation depends on the converter invoked by wcrtomb not
+     needing to retain state in either the top most bit of ps->__count or
+     in ps->__value between invocations.  This implementation uses the
+     top most bit of ps->__count to indicate that trailing code units are
+     expected and uses ps->__value to store previously seen code units.  */
+
+  wchar_t wc;
+
+  if (ps == NULL)
+    ps = &state;
+
+  if (s == NULL)
+    {
+      /* if 's' is a null pointer, behave as if u8'\0' was passed as 'c8'.  If
+         this occurs for an incomplete code unit sequence, then an error will
+         be reported below.  */
+      c8 = u8""[0];
+    }
+
+  if (! (ps->__count & 0x80000000))
+    {
+      /* Initial state.  */
+      if ((c8 >= 0x80 && c8 <= 0xC1) || c8 >= 0xF5)
+	{
+	  /* An invalid lead code unit.  */
+	  __set_errno (EILSEQ);
+	  return -1;
+	}
+      if (c8 >= 0xC2)
+	{
+	  /* A valid lead code unit.  */
+	  ps->__count |= 0x80000000;
+	  ps->__value.__wchb[0] = c8;
+	  ps->__value.__wchb[3] = 1;
+	  return 0;
+	}
+      /* A single byte (ASCII) code unit.  */
+      wc = c8;
+    }
+  else
+    {
+      char8_t cu1 = ps->__value.__wchb[0];
+      if (ps->__value.__wchb[3] == 1)
+	{
+	  /* A single lead code unit was previously seen.  */
+	  if ((c8 < 0x80 || c8 > 0xBF)
+              || (cu1 == 0xE0 && c8 < 0xA0)
+              || (cu1 == 0xED && c8 > 0x9F)
+              || (cu1 == 0xF0 && c8 < 0x90)
+              || (cu1 == 0xF4 && c8 > 0x8F))
+	    {
+	      /* An invalid second code unit.  */
+	      __set_errno (EILSEQ);
+	      return -1;
+	    }
+	  if (cu1 >= 0xE0)
+	    {
+	      /* A three or four code unit sequence.  */
+	      ps->__value.__wchb[1] = c8;
+	      ++ps->__value.__wchb[3];
+	      return 0;
+	    }
+	  wc = ((cu1 & 0x1F) << 6)
+	       + (c8 & 0x3F);
+	}
+      else
+	{
+	  char8_t cu2 = ps->__value.__wchb[1];
+	  /* A three or four byte code unit sequence.  */
+	  if (c8 < 0x80 || c8 > 0xBF)
+	    {
+	      /* An invalid third or fourth code unit.  */
+	      __set_errno (EILSEQ);
+	      return -1;
+	    }
+	  if (ps->__value.__wchb[3] == 2 && cu1 >= 0xF0)
+	    {
+	      /* A four code unit sequence.  */
+	      ps->__value.__wchb[2] = c8;
+	      ++ps->__value.__wchb[3];
+	      return 0;
+	    }
+	  if (cu1 < 0xF0)
+	    {
+	      wc = ((cu1 & 0x0F) << 12)
+		   + ((cu2 & 0x3F) << 6)
+		   + (c8 & 0x3F);
+	    }
+	  else
+	    {
+	      char8_t cu3 = ps->__value.__wchb[2];
+	      wc = ((cu1 & 0x07) << 18)
+		   + ((cu2 & 0x3F) << 12)
+		   + ((cu3 & 0x3F) << 6)
+		   + (c8 & 0x3F);
+	    }
+	}
+      ps->__count &= 0x7fffffff;
+      ps->__value.__wch = 0;
+    }
+
+  return wcrtomb (s, wc, ps);
+}