From 171e7fa4c1d9cbf0d8ff35ee795e1599913aa329 Mon Sep 17 00:00:00 2001 From: Jun-ichi Takimoto Date: Fri, 12 May 2017 12:10:13 +0900 Subject: 41090: Replace iswprint() if unicode9 is enabled. If wcwidth() or iswprint() is broken, force enable unicode9. --- configure.ac | 40 +++++++++++++++++++++++++--------------- 1 file changed, 25 insertions(+), 15 deletions(-) (limited to 'configure.ac') diff --git a/configure.ac b/configure.ac index 911cc4547..88da89e3e 100644 --- a/configure.ac +++ b/configure.ac @@ -2589,16 +2589,21 @@ AC_HELP_STRING([--enable-unicode9], [compile with unicode9 character widths]), AC_DEFINE(ENABLE_UNICODE9) fi]) -AH_TEMPLATE([BROKEN_WCWIDTH], -[Define to 1 if the wcwidth() function is present but broken.]) AH_TEMPLATE([BROKEN_ISPRINT], [Define to 1 if the isprint() function is broken under UTF-8 locale.]) + if test x$zsh_cv_c_unicode_support = xyes; then AC_DEFINE(MULTIBYTE_SUPPORT) - dnl Test for a wcwidth() implementation that gives the wrong width for - dnl zero-width combining characters. - dnl For the test we use a combining acute accent (\u0301). + dnl Test if wcwidth() and/or iswprint() is broken for + dnl zero-width combining characters, or + dnl some characters in the Latin Extended-B. + dnl If either of the functions is broken, both functions will be replaced + dnl by the ones from wcwidth9.h by defining ENABLE_UNICODE9. We will do + dnl this only if __STDC_ISO_10646__ is defined (or if building on macOS, + dnl where __STDC_ISO_10646__ is not defined but wchar_t is UCS). + dnl For the test we use a combining acute accent (\u0301) or + dnl a LATIN SMALL LETTER L WITH CURL (\u0234). dnl We input it as UTF-8 since that is the standard we can rely dnl upon most: we can't rely on a wchar_t being stored as a dnl Unicode code point on all systems. @@ -2607,9 +2612,8 @@ if test x$zsh_cv_c_unicode_support = xyes; then dnl - the programme compiled, linked and ran dnl - we successfully set a UTF-8 locale dnl - the locale we set plausibly converted the UTF-8 string - dnl for a zero-width combining character (the only way to be - dnl 100% sure would be to output it and ask if it looked right) - dnl - the converted wide character gave a non-zero width. + dnl into the correct wide character + dnl - but wcwidth() or iswprint() is broken for the converted wide character. dnl locale -a is a fallback; on most systems we should find en_US.UTF-8. [locale_prog='char *my_locales[] = { "en_US.UTF-8", "en_GB.UTF-8", "en.UTF-8", ' @@ -2621,32 +2625,38 @@ if test x$zsh_cv_c_unicode_support = xyes; then #include #include #include + #include int main() { char **localep; char comb_acute_mb[] = { (char)0xcc, (char)0x81 }; + char u_0234[] = { (char)0xc8, (char)0xb4 }; wchar_t wc; + #if !defined(__STDC_ISO_10646__) && !defined(__APPLE__) + return 1; + #endif for (localep = my_locales; *localep; localep++) - if (setlocale(LC_ALL, *localep) && - mbtowc(&wc, comb_acute_mb, 2) == 2) + if (setlocale(LC_ALL, *localep)) break; if (!*localep) return 1; - if (wcwidth(wc) == 0) - return 1; - return 0; + if (mbtowc(&wc, comb_acute_mb, 2) == 2 && (wcwidth(wc) != 0 || !iswprint(wc))) + return 0; + if (mbtowc(&wc, u_0234, 2) == 2 && (wcwidth(wc) != 1 || !iswprint(wc))) + return 0; + return 1; } "] - AC_CACHE_CHECK(if the wcwidth() function is broken, + AC_CACHE_CHECK(if the wcwidth() and/or iswprint() functions are broken, zsh_cv_c_broken_wcwidth, [AC_TRY_RUN([$locale_prog], zsh_cv_c_broken_wcwidth=yes, zsh_cv_c_broken_wcwidth=no, zsh_cv_c_broken_wcwidth=no)]) if test x$zsh_cv_c_broken_wcwidth = xyes; then - AC_DEFINE(BROKEN_WCWIDTH) + AC_DEFINE(ENABLE_UNICODE9) fi dnl Check if isprint() behaves correctly under UTF-8 locale. -- cgit 1.4.1