From 171e7fa4c1d9cbf0d8ff35ee795e1599913aa329 Mon Sep 17 00:00:00 2001
From: Jun-ichi Takimoto <takimoto-j@kba.biglobe.ne.jp>
Date: Fri, 12 May 2017 12:10:13 +0900
Subject: 41090: Replace iswprint() if unicode9 is enabled.

If wcwidth() or iswprint() is broken, force enable unicode9.
---
 configure.ac | 40 +++++++++++++++++++++++++---------------
 1 file changed, 25 insertions(+), 15 deletions(-)

(limited to 'configure.ac')

diff --git a/configure.ac b/configure.ac
index 911cc4547..88da89e3e 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2589,16 +2589,21 @@ AC_HELP_STRING([--enable-unicode9], [compile with unicode9 character widths]),
   AC_DEFINE(ENABLE_UNICODE9)
 fi])
 
-AH_TEMPLATE([BROKEN_WCWIDTH],
-[Define to 1 if the wcwidth() function is present but broken.])
 AH_TEMPLATE([BROKEN_ISPRINT],
 [Define to 1 if the isprint() function is broken under UTF-8 locale.])
+
 if test x$zsh_cv_c_unicode_support = xyes; then
   AC_DEFINE(MULTIBYTE_SUPPORT)
 
-  dnl Test for a wcwidth() implementation that gives the wrong width for
-  dnl zero-width combining characters.
-  dnl For the test we use a combining acute accent (\u0301).
+  dnl Test if wcwidth() and/or iswprint() is broken for
+  dnl   zero-width combining characters, or
+  dnl   some characters in the Latin Extended-B.
+  dnl If either of the functions is broken, both functions will be replaced
+  dnl by the ones from wcwidth9.h by defining ENABLE_UNICODE9. We will do
+  dnl this only if __STDC_ISO_10646__ is defined (or if building on macOS,
+  dnl where __STDC_ISO_10646__ is not defined but wchar_t is UCS).
+  dnl For the test we use a combining acute accent (\u0301) or
+  dnl a LATIN SMALL LETTER L WITH CURL (\u0234).
   dnl We input it as UTF-8 since that is the standard we can rely
   dnl upon most:  we can't rely on a wchar_t being stored as a
   dnl Unicode code point on all systems.
@@ -2607,9 +2612,8 @@ if test x$zsh_cv_c_unicode_support = xyes; then
   dnl - the programme compiled, linked and ran
   dnl - we successfully set a UTF-8 locale
   dnl - the locale we set plausibly converted the UTF-8 string
-  dnl   for a zero-width combining character (the only way to be
-  dnl   100% sure would be to output it and ask if it looked right)
-  dnl - the converted wide character gave a non-zero width.
+  dnl   into the correct wide character
+  dnl - but wcwidth() or iswprint() is broken for the converted wide character.
   dnl locale -a is a fallback; on most systems we should find en_US.UTF-8.
   [locale_prog='char *my_locales[] = {
   "en_US.UTF-8", "en_GB.UTF-8", "en.UTF-8", '
@@ -2621,32 +2625,38 @@ if test x$zsh_cv_c_unicode_support = xyes; then
   #include <stdlib.h>
   #include <locale.h>
   #include <wchar.h>
+  #include <wctype.h>
 
   int main() {
     char **localep;
     char comb_acute_mb[] = { (char)0xcc, (char)0x81 };
+    char u_0234[] = { (char)0xc8, (char)0xb4 };
     wchar_t wc;
+  #if !defined(__STDC_ISO_10646__) && !defined(__APPLE__)
+    return 1;
+  #endif
 
     for (localep = my_locales; *localep; localep++)
-      if (setlocale(LC_ALL, *localep) &&
-          mbtowc(&wc, comb_acute_mb, 2) == 2)
+      if (setlocale(LC_ALL, *localep))
 	  break;
     if (!*localep)
       return 1;
-    if (wcwidth(wc) == 0)
-      return 1;
-    return 0;
+    if (mbtowc(&wc, comb_acute_mb, 2) == 2 && (wcwidth(wc) != 0 || !iswprint(wc)))
+      return 0;
+    if (mbtowc(&wc, u_0234, 2) == 2 && (wcwidth(wc) != 1 || !iswprint(wc)))
+      return 0;
+    return 1;
   }
   "]
 
-  AC_CACHE_CHECK(if the wcwidth() function is broken,
+  AC_CACHE_CHECK(if the wcwidth() and/or iswprint() functions are broken,
   zsh_cv_c_broken_wcwidth,
   [AC_TRY_RUN([$locale_prog],
   zsh_cv_c_broken_wcwidth=yes,
   zsh_cv_c_broken_wcwidth=no,
   zsh_cv_c_broken_wcwidth=no)])
   if test x$zsh_cv_c_broken_wcwidth = xyes; then
-    AC_DEFINE(BROKEN_WCWIDTH)
+    AC_DEFINE(ENABLE_UNICODE9)
   fi
 
   dnl Check if isprint() behaves correctly under UTF-8 locale.
-- 
cgit 1.4.1