about summary refs log tree commit diff
diff options
context:
space:
mode:
authorColin Leroy-Mira <colin@colino.net>2023-08-29 08:02:50 +0200
committerFlorian Weimer <fweimer@redhat.com>2023-08-29 09:31:23 +0200
commitdfe8c445883a50a55564b02b6957257bfc510db4 (patch)
treefcd0e0cdce35897c7c0efb94f8c7d54f0ab46377
parentc00b984fcd53f679ca2dafcd1aee2c89836e6e73 (diff)
downloadglibc-dfe8c445883a50a55564b02b6957257bfc510db4.tar.gz
glibc-dfe8c445883a50a55564b02b6957257bfc510db4.tar.xz
glibc-dfe8c445883a50a55564b02b6957257bfc510db4.zip
localedata: Translit common emojis to smileys [BZ #30649]
Add common emojis to the translit-able characters (mostly
faces and hearts), and translit them to old-fashioned
smileys.

Signed-off-by: Colin Leroy-Mira <colin@colino.net>
Reviewed-by: Florian Weimer <fweimer@redhat.com>
-rw-r--r--localedata/Makefile4
-rw-r--r--localedata/locales/translit_emojis91
-rw-r--r--localedata/locales/translit_neutral1
-rw-r--r--localedata/tst-iconv-emojis-trans.c117
4 files changed, 213 insertions, 0 deletions
diff --git a/localedata/Makefile b/localedata/Makefile
index 3619b6d47e..dd41db6d8f 100644
--- a/localedata/Makefile
+++ b/localedata/Makefile
@@ -1,4 +1,5 @@
 # Copyright (C) 1996-2023 Free Software Foundation, Inc.
+# Copyright The GNU Toolchain Authors.
 # This file is part of the GNU C Library.
 
 # The GNU C Library is free software; you can redistribute it and/or
@@ -164,6 +165,7 @@ tests = \
   bug-usesetlocale \
   tst-c-utf8-consistency \
   tst-digits \
+  tst-iconv-emojis-trans \
   tst-iconv-math-trans \
   tst-leaks \
   tst-mbswcs1 \
@@ -320,6 +322,8 @@ LOCALES := \
 
 include ../gen-locales.mk
 
+$(objpfx)tst-iconv-emojis-trans.out: $(gen-locales)
+
 $(objpfx)tst-iconv-math-trans.out: $(gen-locales)
 endif
 
diff --git a/localedata/locales/translit_emojis b/localedata/locales/translit_emojis
new file mode 100644
index 0000000000..cfb1964afa
--- /dev/null
+++ b/localedata/locales/translit_emojis
@@ -0,0 +1,91 @@
+escape_char /
+comment_char %
+
+% This file is part of the GNU C Library and contains locale data.
+% The Free Software Foundation and the GNU Toolchain Authors do not
+% claim any copyright interest in the locale data contained in this
+% file.  The foregoing does not affect the license of the GNU C
+% Library as a whole.  It does not exempt you from the conditions
+% of the license if your use would otherwise be governed by that
+% license.
+
+% Transliterations of emojis to ASCII smileys.
+
+LC_CTYPE
+
+translit_start
+
+โ™ก "/<3" % WHITE HEART SUIT
+โ™ฅ "/<3" % BLACK HEART SUIT
+โค "/<3" % HEAVY BLACK HEART
+๐Ÿ’™ "/<3" % BLUE HEART
+๐Ÿ’“ "/<3" % BEATING HEART
+๐Ÿ’” "/<//3" % BROKEN HEART
+๐Ÿ’– "/<3" % SPARKLING HEART
+๐Ÿ’— "/<3" % GROWING HEART
+๐Ÿ’š "/<3" % GREEN HEART
+๐Ÿ’› "/<3" % YELLOW HEART
+๐Ÿ’œ "/<3" % PURPLE HEART
+๐Ÿ–ค "/<3" % BLACK HEART
+๐Ÿงก "/<3" % ORANGE HEART
+๐Ÿค "/<3" % WHITE HEART
+๐ŸคŽ "/<3" % BROWN HEART
+๐Ÿ˜€ ":-D" % GRINNING FACE
+๐Ÿ˜ ":-D" % GRINNING FACE WITH SMILING EYES
+๐Ÿ˜‚ ":'D" % FACE WITH TEARS OF JOY
+๐Ÿ˜ƒ ":-D" % SMILING FACE WITH OPEN MOUTH (C.F. โ˜บ)
+๐Ÿ˜„ ":-D" % SMILING FACE WITH OPEN MOUTH AND SMILING EYES
+๐Ÿ˜… ":-D" % SMILING FACE WITH OPEN MOUTH AND COLD SWEAT
+๐Ÿ˜† ":-D" % SMILING FACE WITH OPEN MOUTH AND TIGHTLY-CLOSED EYES
+๐Ÿ˜‡ "O:-)" % SMILING FACE WITH HALO
+๐Ÿ˜ˆ "/>:)" % SMILING FACE WITH HORNS
+๐Ÿ˜‰ ";-)" % WINKING FACE
+๐Ÿ˜Š ":-)" % SMILING FACE WITH SMILING EYES
+๐Ÿ˜‹ ":-P" % FACE SAVOURING DELICIOUS FOOD
+๐Ÿ˜Œ ":-)" % RELIEVED FACE
+๐Ÿ˜ ":-*" % SMILING FACE WITH HEART-SHAPED EYES
+๐Ÿ˜Ž "B-)" % SMILING FACE WITH SUNGLASSES
+๐Ÿ˜ ";-)" % SMIRKING FACE
+๐Ÿ˜ ":-|" % NEUTRAL FACE
+๐Ÿ˜‘ ":-|" % EXPRESSIONLESS FACE
+๐Ÿ˜’ ":-|" % UNAMUSED FACE
+๐Ÿ˜“ ":'-|" % FACE WITH COLD SWEAT
+๐Ÿ˜” ":-|" % PENSIVE FACE
+๐Ÿ˜• ":-//" % CONFUSED FACE
+๐Ÿ˜– ":-S" % CONFOUNDED FACE
+๐Ÿ˜— ":-*" % KISSING FACE
+๐Ÿ˜˜ ":-*" % FACE THROWING A KISS
+๐Ÿ˜™ ":-*" % KISSING FACE WITH SMILING EYES
+๐Ÿ˜š ":-*" % KISSING FACE WITH CLOSED EYES
+๐Ÿ˜› ":-P" % FACE WITH STUCK-OUT TONGUE
+๐Ÿ˜œ ";-P" % FACE WITH STUCK-OUT TONGUE AND WINKING EYE
+๐Ÿ˜ "X-P" % FACE WITH STUCK-OUT TONGUE AND TIGHTLY-CLOSED EYES
+๐Ÿ˜ž ":-(" % DISAPPOINTED FACE
+๐Ÿ˜Ÿ ":-(" % WORRIED FACE
+๐Ÿ˜  "/>:-(" % ANGRY FACE
+๐Ÿ˜ก ":-(" % POUTING FACE
+๐Ÿ˜ข ":'-(" % CRYING FACE
+๐Ÿ˜ฃ "X-(" % PERSEVERING FACE
+๐Ÿ˜ฆ ":-O" % FROWNING FACE WITH OPEN MOUTH
+๐Ÿ˜ง ":-O" % ANGUISHED FACE
+๐Ÿ˜จ ":-O" % FEARFUL FACE
+๐Ÿ˜ฉ ":-O" % WEARY FACE
+๐Ÿ˜ญ ":<U0022>-(" % LOUDLY CRYING FACE
+๐Ÿ˜ฎ ":-O" % FACE WITH OPEN MOUTH
+๐Ÿ˜ฏ ":-O" % HUSHED FACE
+๐Ÿ˜ฐ ":'-O" % FACE WITH OPEN MOUTH AND COLD SWEAT
+๐Ÿ˜ฑ ":-O" % FACE SCREAMING IN FEAR
+๐Ÿ˜ฒ ":-O" % ASTONISHED FACE
+๐Ÿ˜ธ ":-3" % GRINNING CAT FACE WITH SMILING EYES
+๐Ÿ˜น ":'-3" % CAT FACE WITH TEARS OF JOY
+๐Ÿ˜บ ":-3" % SMILING CAT FACE WITH OPEN MOUTH
+๐Ÿ˜ป ":-3" % SMILING CAT FACE WITH HEART-SHAPE EYES
+๐Ÿ˜ผ ";-3" % CAT FACE WITH WRY SMILE
+๐Ÿ˜ฝ ":-3" % KISSING CAT FACE WITH CLOSED EYES
+๐Ÿ™ ":-(" % SLIGHTLY FROWNING FACE
+๐Ÿ™‚ ":-)" % SLIGHTLY SMILING FACE
+๐Ÿ™ƒ "(-:" % UPSIDE-DOWN FACE
+
+translit_end
+
+END LC_CTYPE
diff --git a/localedata/locales/translit_neutral b/localedata/locales/translit_neutral
index 72f66220b7..57412ae565 100644
--- a/localedata/locales/translit_neutral
+++ b/localedata/locales/translit_neutral
@@ -17,6 +17,7 @@ translit_start
 include "translit_circle";""
 include "translit_cjk_compat";""
 include "translit_compat";""
+include "translit_emojis";""
 include "translit_font";""
 include "translit_fraction";""
 include "translit_narrow";""
diff --git a/localedata/tst-iconv-emojis-trans.c b/localedata/tst-iconv-emojis-trans.c
new file mode 100644
index 0000000000..de4c20a279
--- /dev/null
+++ b/localedata/tst-iconv-emojis-trans.c
@@ -0,0 +1,117 @@
+/* Test some emoji transliterations
+
+   Copyright (C) 2019-2023 Free Software Foundation, Inc.
+   Copyright The GNU Toolchain Authors.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <iconv.h>
+#include <locale.h>
+#include <stdio.h>
+#include <string.h>
+#include <support/check.h>
+
+static int
+do_test (void)
+{
+  iconv_t cd;
+
+  const int num_emojis = 70;
+
+  const char str[] = "โ™ก โ™ฅ โค ๐Ÿ’™ ๐Ÿ’“ "
+                     "๐Ÿ’” ๐Ÿ’– ๐Ÿ’— ๐Ÿ’š ๐Ÿ’› "
+                     "๐Ÿ’œ ๐Ÿ–ค ๐Ÿงก ๐Ÿค ๐ŸคŽ "
+                     "๐Ÿ˜€ ๐Ÿ˜ ๐Ÿ˜‚ ๐Ÿ˜ƒ ๐Ÿ˜„ "
+                     "๐Ÿ˜… ๐Ÿ˜† ๐Ÿ˜‡ ๐Ÿ˜ˆ ๐Ÿ˜‰ "
+                     "๐Ÿ˜Š ๐Ÿ˜‹ ๐Ÿ˜Œ ๐Ÿ˜ ๐Ÿ˜Ž "
+                     "๐Ÿ˜ ๐Ÿ˜ ๐Ÿ˜‘ ๐Ÿ˜’ ๐Ÿ˜“ "
+                     "๐Ÿ˜” ๐Ÿ˜• ๐Ÿ˜– ๐Ÿ˜— ๐Ÿ˜˜ "
+                     "๐Ÿ˜™ ๐Ÿ˜š ๐Ÿ˜› ๐Ÿ˜œ ๐Ÿ˜ "
+                     "๐Ÿ˜ž ๐Ÿ˜Ÿ ๐Ÿ˜  ๐Ÿ˜ก ๐Ÿ˜ข "
+                     "๐Ÿ˜ฃ ๐Ÿ˜ฆ ๐Ÿ˜ง ๐Ÿ˜จ ๐Ÿ˜ฉ "
+                     "๐Ÿ˜ญ ๐Ÿ˜ฎ ๐Ÿ˜ฏ ๐Ÿ˜ฐ ๐Ÿ˜ฑ "
+                     "๐Ÿ˜ฒ ๐Ÿ˜ธ ๐Ÿ˜น ๐Ÿ˜บ ๐Ÿ˜ป "
+                     "๐Ÿ˜ผ ๐Ÿ˜ฝ ๐Ÿ™ ๐Ÿ™‚ ๐Ÿ™ƒ";
+
+  const char expected[] = "<3 <3 <3 <3 <3 "
+                          "</3 <3 <3 <3 <3 "
+                          "<3 <3 <3 <3 <3 "
+                          ":-D :-D :'D :-D :-D "
+                          ":-D :-D O:-) >:) ;-) "
+                          ":-) :-P :-) :-* B-) "
+                          ";-) :-| :-| :-| :'-| "
+                          ":-| :-/ :-S :-* :-* "
+                          ":-* :-* :-P ;-P X-P "
+                          ":-( :-( >:-( :-( :'-( "
+                          "X-( :-O :-O :-O :-O "
+                          ":\"-( :-O :-O :'-O :-O "
+                          ":-O :-3 :'-3 :-3 :-3 "
+                          ";-3 :-3 :-( :-) (-:";
+
+  char *inptr = (char *) str;
+  size_t inlen = strlen (str) + 1;
+  char outbuf[500];
+  char *outptr = outbuf;
+  size_t outlen = sizeof (outbuf);
+  int result = 0;
+  size_t n;
+
+  if (setlocale (LC_ALL, "en_US.UTF-8") == NULL)
+    FAIL_EXIT1 ("setlocale failed");
+
+  cd = iconv_open ("ASCII//TRANSLIT", "UTF-8");
+  if (cd == (iconv_t) -1)
+    FAIL_EXIT1 ("iconv_open failed");
+
+  n = iconv (cd, &inptr, &inlen, &outptr, &outlen);
+  if (n != num_emojis)
+    {
+      if (n == (size_t) -1)
+        printf ("iconv() returned error: %m\n");
+      else
+        printf ("iconv() returned %zd, expected %d\n", n, num_emojis);
+      result = 1;
+    }
+  if (inlen != 0)
+    {
+      puts ("not all input consumed");
+      result = 1;
+    }
+  else if (inptr - str != strlen (str) + 1)
+    {
+      printf ("inptr wrong, advanced by %td\n", inptr - str);
+      result = 1;
+    }
+  if (memcmp (outbuf, expected, sizeof (expected)) != 0)
+    {
+      printf ("result wrong: \"%.*s\", expected: \"%s\"\n",
+              (int) (sizeof (outbuf) - outlen), outbuf, expected);
+      result = 1;
+    }
+  else if (outlen != sizeof (outbuf) - sizeof (expected))
+    {
+      printf ("outlen wrong: %zd, expected %zd\n", outlen,
+              sizeof (outbuf) - sizeof (expected));
+      result = 1;
+    }
+  else
+    printf ("output is \"%s\" which is OK\n", outbuf);
+
+  return result;
+}
+
+#include <support/test-driver.c>