diff options
-rw-r--r-- | ChangeLog | 11 | ||||
-rw-r--r-- | posix/tst-regex.c | 49 | ||||
-rw-r--r-- | posix/tst-regex.input | 12 |
3 files changed, 41 insertions, 31 deletions
diff --git a/ChangeLog b/ChangeLog index ddda8700c4..05b9ffc38e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,14 @@ +2019-08-27 Paul Eggert <eggert@cs.ucla.edu> + + Fix posix/tst-regex by using UTF-8 and own test input + Problem reported by Stefan Liebler in: + https://sourceware.org/ml/libc-alpha/2019-08/msg00658.html + * posix/tst-regex.c: Convert this file from Latin-1 to UTF-8. + (do_test, test_expr): Adjust to the fact that this source file, + and the test data in ChangeLog.8, is now UTF-8 instead of Latin-1. + * posix/tst-regex.input: Copy from ChangeLog.old/ChangeLog.8, + so that it is now UTF-8. + 2019-08-28 Paul A. Clarke <pc@us.ibm.com> * sysdeps/powerpc/fpu/fenv_libc.h (fegetenv_status_ISA300): Delete. diff --git a/posix/tst-regex.c b/posix/tst-regex.c index c5d802625a..6be1da404f 100644 --- a/posix/tst-regex.c +++ b/posix/tst-regex.c @@ -86,27 +86,26 @@ do_test (void) close (fd); - /* We have to convert a few things from Latin-1 to UTF-8. */ - cd = iconv_open ("UTF-8", "ISO-8859-1"); + /* We have to convert a few things from UTF-8 to Latin-1. */ + cd = iconv_open ("ISO-8859-1", "UTF-8"); if (cd == (iconv_t) -1) error (EXIT_FAILURE, errno, "cannot get conversion descriptor"); - /* For the second test we have to convert the file content to UTF-8. - Since the text is mostly ASCII it should be enough to allocate - twice as much memory for the UTF-8 text than for the Latin-1 - text. */ - umem = (char *) calloc (2, memlen); + /* For the second test we have to convert the file content to Latin-1. + This cannot grow the data. */ + umem = (char *) malloc (memlen + 1); if (umem == NULL) error (EXIT_FAILURE, errno, "while allocating buffer"); inmem = mem; inlen = memlen; outmem = umem; - outlen = 2 * memlen - 1; + outlen = memlen; iconv (cd, &inmem, &inlen, &outmem, &outlen); umemlen = outmem - umem; if (inlen != 0) error (EXIT_FAILURE, errno, "cannot convert buffer"); + umem[umemlen] = '\0'; #if defined _POSIX_CPUTIME && _POSIX_CPUTIME >= 0 # if _POSIX_CPUTIME == 0 @@ -124,11 +123,11 @@ do_test (void) /* Run the actual tests. All tests are run in a single-byte and a multi-byte locale. */ - result = test_expr ("[äáàâéèêíìîñöóòôüúùû]", 2, 2); + result = test_expr ("[äáà âéèêÃìîñöóòôüúùû]", 4, 4); result |= test_expr ("G.ran", 2, 3); result |= test_expr ("G.\\{1\\}ran", 2, 3); result |= test_expr ("G.*ran", 3, 44); - result |= test_expr ("[äáàâ]", 0, 0); + result |= test_expr ("[äáà â]", 0, 0); result |= test_expr ("Uddeborg", 2, 2); result |= test_expr (".Uddeborg", 2, 2); @@ -151,27 +150,27 @@ test_expr (const char *expr, int expected, int expectedicase) size_t outlen; char *uexpr; - /* First test: search with an ISO-8859-1 locale. */ - if (setlocale (LC_ALL, "de_DE.ISO-8859-1") == NULL) - error (EXIT_FAILURE, 0, "cannot set locale de_DE.ISO-8859-1"); + /* First test: search with an UTF-8 locale. */ + if (setlocale (LC_ALL, "de_DE.UTF-8") == NULL) + error (EXIT_FAILURE, 0, "cannot set locale de_DE.UTF-8"); - printf ("\nTest \"%s\" with 8-bit locale\n", expr); + printf ("\nTest \"%s\" with multi-byte locale\n", expr); result = run_test (expr, mem, memlen, 0, expected); - printf ("\nTest \"%s\" with 8-bit locale, case insensitive\n", expr); + printf ("\nTest \"%s\" with multi-byte locale, case insensitive\n", expr); result |= run_test (expr, mem, memlen, 1, expectedicase); - printf ("\nTest \"%s\" backwards with 8-bit locale\n", expr); + printf ("\nTest \"%s\" backwards with multi-byte locale\n", expr); result |= run_test_backwards (expr, mem, memlen, 0, expected); - printf ("\nTest \"%s\" backwards with 8-bit locale, case insensitive\n", + printf ("\nTest \"%s\" backwards with multi-byte locale, case insensitive\n", expr); result |= run_test_backwards (expr, mem, memlen, 1, expectedicase); - /* Second test: search with an UTF-8 locale. */ - if (setlocale (LC_ALL, "de_DE.UTF-8") == NULL) - error (EXIT_FAILURE, 0, "cannot set locale de_DE.UTF-8"); + /* Second test: search with an ISO-8859-1 locale. */ + if (setlocale (LC_ALL, "de_DE.ISO-8859-1") == NULL) + error (EXIT_FAILURE, 0, "cannot set locale de_DE.ISO-8859-1"); inmem = (char *) expr; inlen = strlen (expr); - outlen = inlen * MB_CUR_MAX; + outlen = inlen; outmem = uexpr = alloca (outlen + 1); memset (outmem, '\0', outlen + 1); iconv (cd, &inmem, &inlen, &outmem, &outlen); @@ -179,13 +178,13 @@ test_expr (const char *expr, int expected, int expectedicase) error (EXIT_FAILURE, errno, "cannot convert expression"); /* Run the tests. */ - printf ("\nTest \"%s\" with multi-byte locale\n", expr); + printf ("\nTest \"%s\" with 8-bit locale\n", expr); result |= run_test (uexpr, umem, umemlen, 0, expected); - printf ("\nTest \"%s\" with multi-byte locale, case insensitive\n", expr); + printf ("\nTest \"%s\" with 8-bit locale, case insensitive\n", expr); result |= run_test (uexpr, umem, umemlen, 1, expectedicase); - printf ("\nTest \"%s\" backwards with multi-byte locale\n", expr); + printf ("\nTest \"%s\" backwards with 8-bit locale\n", expr); result |= run_test_backwards (uexpr, umem, umemlen, 0, expected); - printf ("\nTest \"%s\" backwards with multi-byte locale, case insensitive\n", + printf ("\nTest \"%s\" backwards with 8-bit locale, case insensitive\n", expr); result |= run_test_backwards (uexpr, umem, umemlen, 1, expectedicase); diff --git a/posix/tst-regex.input b/posix/tst-regex.input index 52b85a6270..c48660d23a 100644 --- a/posix/tst-regex.input +++ b/posix/tst-regex.input @@ -6025,7 +6025,7 @@ (Host Address Functions): Use uint32_t consequently and add a number of clarifications for IPv4/IPv6, classless addresses. (Internet Namespace): Added some paragraphs about IPv6. - Based on suggestions by Francesco Potorti` <F.Potorti@cnuce.cnr.it>. + Based on suggestions by Francesco Potortì <F.Potorti@cnuce.cnr.it>. 1998-04-05 Philip Blundell <Philip.Blundell@pobox.com> @@ -6565,7 +6565,7 @@ * manual/examples/mkfsock.c (make_named_socket): Removed blank lines for clarification. (make_named_socket): Use strncpy instead of strcpy. - Reported by Francesco Potorti` <F.Potorti@cnuce.cnr.it>. + Reported by Francesco Potortì <F.Potorti@cnuce.cnr.it>. 1998-03-30 13:28 Ulrich Drepper <drepper@cygnus.com> @@ -7975,7 +7975,7 @@ * sysdeps/generic/getresuid.c (__getresuid): Use ISO C declaration style to avoid warnings. -1998-03-06 11:48 Mark M._Kettenis <kettenis@hall.phys.uva.nl> +1998-03-06 11:48 Mark M. Kettenis <kettenis@hall.phys.uva.nl> * elf/rtld.c (process_dl_debug): Fix typo: "DL_DEBUG" -> "LD_DEBUG". @@ -8314,7 +8314,7 @@ 1998-02-27 Ulrich Drepper <drepper@cygnus.com> * misc/efgcvt_r.c (APPEND): Handle printing of 0.0 correctly. - Reported by Göran Uddeborg <goeran@uddeborg.pp.se>. + Reported by Göran Uddeborg <goeran@uddeborg.pp.se>. * misc/tst-efgcvt.c (ecvt_tests): Add new test case for reported bug. @@ -8322,7 +8322,7 @@ 1998-02-25 Andreas Jaeger <aj@arthur.rhein-neckar.de> * manual/arith.texi (Old-style number conversion): Correct - typo. Reported by Göran Uddeborg <goeran@uddeborg.pp.se>. + typo. Reported by Göran Uddeborg <goeran@uddeborg.pp.se>. 1998-02-27 Ulrich Drepper <drepper@cygnus.com> @@ -12044,7 +12044,7 @@ * libio/stdio.h: Correct comment of sys_nerr/sys_errlist. -1997-11-25 Paul Eggert <eggert@shade.twinsun.com> +1997-11-25 Paul Eggert <eggert@twinsun.com> * strftime.c (strftime): No longer any need to undef or declare if emacs is defined. |