From eac4282fa6325e5633bdfee7a6afd9f943b34b1a Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Fri, 28 Jul 2000 17:45:15 +0000 Subject: Update. 2000-07-27 Jakub Jelinek * locale/indigits.h (indigit_value): Correct. * locale/indigitswc.h (indigitwc_value): Correct. * stdio-common/vfscanf.c (__vfscanf): Fix I18N number conversion, add GROUP checking for it, fix GROUP number conversion with strlen(thousands) > 1. Honour width correctly in the presence of floating decimal points and thousands separators. * stdio-common/tst-sscanf.c: New test. * stdio-common/Makefile: Add it to tests. * sysdeps/generic/strtol.c (strtol): Fix conversion if there are thousands separators and group argument is non-zero. Reported by Andi Kleen . --- ChangeLog | 16 ++- locale/indigits.h | 16 +-- locale/indigitswc.h | 4 +- localedata/ChangeLog | 5 + stdio-common/Makefile | 4 +- stdio-common/tst-sscanf.c | 121 +++++++++++++++++++++ stdio-common/vfscanf.c | 269 ++++++++++++++++++++++++++++++++-------------- sysdeps/generic/strtol.c | 62 ++++++++++- 8 files changed, 396 insertions(+), 101 deletions(-) create mode 100644 stdio-common/tst-sscanf.c diff --git a/ChangeLog b/ChangeLog index 4815a4afad..52beff8836 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,17 @@ +2000-07-27 Jakub Jelinek + + * locale/indigits.h (indigit_value): Correct. + * locale/indigitswc.h (indigitwc_value): Correct. + * stdio-common/vfscanf.c (__vfscanf): Fix I18N number + conversion, add GROUP checking for it, fix GROUP number + conversion with strlen(thousands) > 1. + Honour width correctly in the presence of floating + decimal points and thousands separators. + * stdio-common/tst-sscanf.c: New test. + * stdio-common/Makefile: Add it to tests. + * sysdeps/generic/strtol.c (strtol): Fix conversion if there are + thousands separators and group argument is non-zero. + 2000-07-25 Bruno Haible * locale/Versions: Enumerate __*_l functions individually. __wctrans_l @@ -62,7 +76,7 @@ * sysdeps/unix/sysv/linux/bits/socket.h: Rename MSG_URG TO MSG_CONFIRM following kernel 2.4.0 (MSG_URG was never used). * sysdeps/unix/sysv/linux/mips/bits/socket.h: Likewise. - Reported by Andi Kleen . + Reported by Andi Kleen . * rt/tst-aio64.c: Add tests for aio_fsync64 and aio_cancel64. (do_wait): Test requests with aio_return64. diff --git a/locale/indigits.h b/locale/indigits.h index a5289cec06..7a45994de8 100644 --- a/locale/indigits.h +++ b/locale/indigits.h @@ -32,6 +32,7 @@ indigit_value (const char **s, size_t *len, int *decided) int from_level; int to_level; const char *mbdigits[10]; + int i; int n; if (*decided != -1) @@ -53,11 +54,12 @@ indigit_value (const char **s, size_t *len, int *decided) mbdigits[n] = _NL_CURRENT (LC_CTYPE, _NL_CTYPE_INDIGITS0_MB + n); dlen = strlen (mbdigits[n]); - if (dlen <= len && memcmp (*s, mbdigits[n], dlen) == 0) + if (from_level == 0 && dlen <= *len + && memcmp (*s, mbdigits[n], dlen) == 0) { /* Found it. */ *s += dlen; - len -= dlen; + *len -= dlen; if (*decided == -1) *decided = 0; return n; @@ -68,18 +70,19 @@ indigit_value (const char **s, size_t *len, int *decided) } /* Now perform the remaining tests. */ - while (++from_level <= to_level) + for (i = 1; i <= to_level; ++i) { /* Search all ten digits of this level. */ for (n = 0; n < 10; ++n) { size_t dlen = strlen (mbdigits[n]); - if (dlen <= len && memcmp (*s, mbdigits[n], dlen) == 0) + if (i >= from_level && dlen <= *len + && memcmp (*s, mbdigits[n], dlen) == 0) { /* Found it. */ *s += dlen; - len -= dlen; + *len -= dlen; if (*decided == -1) *decided = from_level; return n; @@ -88,9 +91,6 @@ indigit_value (const char **s, size_t *len, int *decided) /* Advance the pointer to the next string. */ mbdigits[n] += dlen + 1; } - - /* Next level. */ - ++from_level; } /* If we reach this point no matching digit was found. */ diff --git a/locale/indigitswc.h b/locale/indigitswc.h index 7bd871527e..9abe98e63b 100644 --- a/locale/indigitswc.h +++ b/locale/indigitswc.h @@ -48,6 +48,7 @@ indigitwc_value (wchar_t wc, int *decided) { /* Get the string for the digits with value N. */ wcdigits[n] = _NL_CURRENT (LC_CTYPE, _NL_CTYPE_INDIGITS0_WC + n); + wcdigits[n] += from_level; if (wc == *wcdigits[n]) { @@ -78,9 +79,6 @@ indigitwc_value (wchar_t wc, int *decided) /* Advance the pointer to the next string. */ ++wcdigits[n]; } - - /* Next level. */ - ++from_level; } /* If we reach this point no matching digit was found. */ diff --git a/localedata/ChangeLog b/localedata/ChangeLog index 9162239c26..1d3c967774 100644 --- a/localedata/ChangeLog +++ b/localedata/ChangeLog @@ -1,3 +1,8 @@ +2000-07-28 Ulrich Drepper + + * locales/zh_TW: New file. + Contributed by Tung-Han Hsieh . + 2000-07-28 Andreas Jaeger * tst-langinfo.c: Include for strcmp prototype. diff --git a/stdio-common/Makefile b/stdio-common/Makefile index 62289fef91..cde24ee94a 100644 --- a/stdio-common/Makefile +++ b/stdio-common/Makefile @@ -54,7 +54,7 @@ tests := tstscanf test_rdwr test-popen tstgetln test-fseek \ bug1 bug2 bug3 bug4 bug5 bug6 bug7 bug8 bug9 bug10 bug11 bug12 bug13 \ tfformat tiformat tllformat tstdiomisc tst-printfsz tst-wc-printf \ scanf1 scanf2 scanf3 scanf4 scanf5 scanf7 scanf8 scanf9 scanf10 \ - scanf12 tst-tmpnam tst-cookie tst-obprintf + scanf12 tst-tmpnam tst-cookie tst-obprintf tst-sscanf test-srcs = tst-unbputc tst-printf @@ -78,6 +78,8 @@ CFLAGS-scanf4.c = -Wno-format CFLAGS-scanf7.c = -Wno-format CFLAGS-tst-printfsz.c = -Wno-format +tst-sscanf-ENV = LOCPATH=$(common-objpfx)localedata + $(inst_includedir)/bits/stdio_lim.h: $(common-objpfx)bits/stdio_lim.h $(do-install) diff --git a/stdio-common/tst-sscanf.c b/stdio-common/tst-sscanf.c new file mode 100644 index 0000000000..c0d3c36f4b --- /dev/null +++ b/stdio-common/tst-sscanf.c @@ -0,0 +1,121 @@ +/* Copyright (C) 2000 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Jakub Jelinek , 2000. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +#include +#include +#include + +const char *str_double[] = +{ + "-.10000E+020.20000E+020.25000E+010.40000E+010.50000E+010.12500E+01", + "0.10000E+020.20000E+020.25000E+010.40000E+010.50000E+010.12500E+01", + "-1234567E0198765432E0912345678901987654321091234567890198765432109", + "-0.1000E+020.20000E+020.25000E+010.40000E+010.50000E+010.12500E+01" +}; + +const double val_double[] = +{ + -.10000E+02, 0.20000E+02, 0.25000E+01, 0.40000E+01, 0.50000E+01, 0.12500E+01, + 0.10000E+02, 0.20000E+02, 0.25000E+01, 0.40000E+01, 0.50000E+01, 0.12500E+01, + -1234567E01, 98765432E09, 12345678901, 98765432109, 12345678901, 98765432109, + -0.1000E+02, 0.20000E+02, 0.25000E+01, 0.40000E+01, 0.50000E+01, 0.12500E+01 +}; + +const char *str_long[] = +{ + "-12345678987654321123456789987654321123456789987654321", + "-12345678987654321123456789987654321123456789987654321", + "-12,345,678987,654,321123,456,789987,654,321123,456,789987,654,321", + "-12,345,678987,654,321123,456,789987,654,321123,456,789987,654,321" +}; + +const char *fmt_long[] = +{ + "%9ld%9ld%9ld%9ld%9ld%9ld", + "%I9ld%I9ld%I9ld%I9ld%I9ld%I9ld", + "%'11ld%'11ld%'11ld%'11ld%'11ld%'11ld", + "%I'11ld%I'11ld%I'11ld%I'11ld%I'11ld%I'11ld" +}; + +const long int val_long[] = +{ + -12345678, 987654321, 123456789, 987654321, 123456789, 987654321 +}; + +int +main (void) +{ + double d[6]; + long l[6]; + int i, j; + int tst_locale; + int result = 0; + + tst_locale = 1; + if (tst_locale) + if (setlocale (LC_ALL, "en_US.ISO-8859-1") == NULL) + { + puts ("Failed to set en_US locale, skipping locale related tests"); + tst_locale = 0; + } + + for (i = 0; i < 4; ++i) + { + if (sscanf (str_double[i], "%11lf%11lf%11lf%11lf%11lf%11lf", + &d[0], &d[1], &d[2], &d[3], &d[4], &d[5]) != 6) + { + printf ("Double sscanf test %d wrong number of " + "assigned inputs\n", i); + result = 1; + } + else + for (j = 0; j < 6; ++j) + if (d[j] != val_double[6 * i + j]) + { + printf ("Double sscanf test %d failed (%g instead of %g)\n", + i, d[j], val_double[6 * i + j]); + result = 1; + break; + } + } + + for (i = 0; i < 4; ++i) + { + if (sscanf (str_long[i], fmt_long[i], + &l[0], &l[1], &l[2], &l[3], &l[4], &l[5]) != 6) + { + printf ("Integer sscanf test %d wrong number of " + "assigned inputs\n", i); + result = 1; + } + else + for (j = 0; j < 6; ++j) + if (l[j] != val_long[j]) + { + printf ("Integer sscanf test %d failed (%ld instead %ld)\n", + i, l[j], val_long[j]); + result = 1; + break; + } + + if (! tst_locale) + break; + } + exit (result); +} diff --git a/stdio-common/vfscanf.c b/stdio-common/vfscanf.c index 9457c4c17e..d618851a89 100644 --- a/stdio-common/vfscanf.c +++ b/stdio-common/vfscanf.c @@ -1213,6 +1213,7 @@ __vfscanf (FILE *s, const char *format, va_list argptr) { int from_level; int to_level; + int level; #ifdef COMPILE_WSCANF const wchar_t *wcdigits[10]; #else @@ -1229,99 +1230,196 @@ __vfscanf (FILE *s, const char *format, va_list argptr) _NL_CTYPE_INDIGITS_MB_LEN) - 1; #endif - /* In this round we get the pointer to the digit strings - and also perform the first round of comparisons. */ - for (n = 0; n < 10; ++n) + /* Read the number into workspace. */ + while (c != EOF && width != 0) { - /* Get the string for the digits with value N. */ + /* In this round we get the pointer to the digit strings + and also perform the first round of comparisons. */ + for (n = 0; n < 10; ++n) + { + /* Get the string for the digits with value N. */ #ifdef COMPILE_WSCANF - wcdigits[n] = (const wchar_t *) - _NL_CURRENT (LC_CTYPE, _NL_CTYPE_INDIGITS0_WC + n); - if (c == *wcdigits[n]) - break; + wcdigits[n] = (const wchar_t *) + _NL_CURRENT (LC_CTYPE, _NL_CTYPE_INDIGITS0_WC + n); + wcdigits[n] += from_level; - /* Advance the pointer to the next string. */ - ++wcdigits[n]; + if (c == *wcdigits[n]) + { + to_level = from_level; + break; + } + + /* Advance the pointer to the next string. */ + ++wcdigits[n]; #else - size_t dlen; - size_t dcnt; + const char *cmpp; + int avail = width > 0 ? width : INT_MAX; - mbdigits[n] = _NL_CURRENT (LC_CTYPE, - _NL_CTYPE_INDIGITS0_MB + n); - dlen = strlen (mbdigits[n]); + mbdigits[n] = _NL_CURRENT (LC_CTYPE, + _NL_CTYPE_INDIGITS0_MB + n); - dcnt = 0; - do - { - if (c != mbdigits[n][dcnt]) - break; - c = inchar (); - } - while (--dcnt > 0); + for (level = 0; level < from_level; level++) + mbdigits[n] = strchr (mbdigits[n], '\0') + 1; - if (dcnt == 0) - /* We found it. */ - break; + cmpp = mbdigits[n]; + while (*cmpp == c && avail > 0) + { + if (*++cmpp == '\0') + break; + else + { + if ((c = inchar ()) == EOF) + break; + --avail; + } + } + + if (*cmpp == '\0') + { + if (width > 0) + width = avail; + to_level = from_level; + break; + } - /* Advance the pointer to the next string. */ - mbdigits[n] += dlen + 1; + /* We are pushing all read characters back. */ + if (cmpp > mbdigits[n]) + { + ungetc (c, s); + while (--cmpp > mbdigits[n]) + ungetc (*cmpp, s); + c = *cmpp; + } + + /* Advance the pointer to the next string. */ + mbdigits[n] = strchr (mbdigits[n], '\0') + 1; #endif - } + } - if (n == 10) - { - /* Have not yet found the digit. */ - while (++from_level <= to_level) + if (n == 10) { - /* Search all ten digits of this level. */ - for (n = 0; n < 10; ++n) + /* Have not yet found the digit. */ + for (level = from_level + 1; level <= to_level; ++level) { + /* Search all ten digits of this level. */ + for (n = 0; n < 10; ++n) + { #ifdef COMPILE_WSCANF - if (c == *wcdigits[n]) - break; + if (c == *wcdigits[n]) + break; - /* Advance the pointer to the next string. */ - ++wcdigits[n]; + /* Advance the pointer to the next string. */ + ++wcdigits[n]; #else - size_t dlen = strlen (mbdigits[n]); - size_t dcnt; + const char *cmpp; + int avail = width > 0 ? width : INT_MAX; + + cmpp = mbdigits[n]; + while (*cmpp == c && avail > 0) + { + if (*++cmpp == '\0') + break; + else + { + if ((c = inchar ()) == EOF) + break; + --avail; + } + } - dcnt = 0; - do + if (*cmpp == '\0') + { + if (width > 0) + width = avail; + break; + } + + /* We are pushing all read characters back. */ + if (cmpp > mbdigits[n]) + { + ungetc (c, s); + while (--cmpp > mbdigits[n]) + ungetc (*cmpp, s); + c = *cmpp; + } + + /* Advance the pointer to the next string. */ + mbdigits[n] = strchr (mbdigits[n], '\0') + 1; +#endif + } + + if (n < 10) { - if (c != mbdigits[n][dcnt]) - break; - c = inchar (); + /* Found it. */ + from_level = level; + to_level = level; + break; } - while (--dcnt > 0); + } + } - if (dcnt == 0) - /* We found it. */ + if (n < 10) + c = L_('0') + n; + else if ((flags & GROUP) +#ifdef COMPILE_WSCANF + && thousands != L'\0' +#else + && thousands != NULL +#endif + ) + { + /* Try matching against the thousands separator. */ +#ifdef COMPILE_WSCANF + if (c != thousands) + break; +#else + const char *cmpp = thousands; + int avail = width > 0 ? width : INT_MAX; + + while (*cmpp == c && avail > 0) + { + ADDW (c); + if (*++cmpp == '\0') break; + else + { + if ((c = inchar ()) == EOF) + break; + --avail; + } + } - /* Advance the pointer to the next string. */ - mbdigits[n] += dlen + 1; -#endif + if (*cmpp != '\0') + { + /* We are pushing all read characters back. */ + if (cmpp > thousands) + { + wpsize -= cmpp - thousands; + ungetc (c, s); + while (--cmpp > thousands) + ungetc (*cmpp, s); + c = *cmpp; + } + break; } - if (n < 10) - /* Found it. */ - break; + if (width > 0) + width = avail; - /* Next level. */ - ++from_level; + /* The last thousands character will be added back by + the ADDW below. */ + --wpsize; +#endif } - } + else + break; - if (n == 10) - { - /* Haven't found anything. Push the last character back - and return an error. */ - ungetc (c, s); - input_error (); - } + ADDW (c); + if (width > 0) + --width; - ADDW (L_('0') + n); + c = inchar (); + } } else /* Read the number into workspace. */ @@ -1351,20 +1449,24 @@ __vfscanf (FILE *s, const char *format, va_list argptr) int avail = width > 0 ? width : INT_MAX; while (*cmpp == c && avail > 0) - if (*++cmpp == '\0') - break; - else - { - if (inchar () == EOF) - break; - --avail; - } + { + ADDW (c); + if (*++cmpp == '\0') + break; + else + { + if ((c = inchar ()) == EOF) + break; + --avail; + } + } if (*cmpp != '\0') { - /* We are pushing all read character back. */ + /* We are pushing all read characters back. */ if (cmpp > thousands) { + wpsize -= cmpp - thousands; ungetc (c, s); while (--cmpp > thousands) ungetc (*cmpp, s); @@ -1372,9 +1474,13 @@ __vfscanf (FILE *s, const char *format, va_list argptr) } break; } + if (width > 0) - /* +1 because we substract below. */ - width = avail + 1; + width = avail; + + /* The last thousands character will be added back by + the ADDW below. */ + --wpsize; #endif } else @@ -1527,8 +1633,7 @@ __vfscanf (FILE *s, const char *format, va_list argptr) conv_error (); } if (width > 0) - /* +1 because we substract below. */ - width = avail + 1; + width = avail; #endif } if (width > 0) @@ -1689,8 +1794,7 @@ __vfscanf (FILE *s, const char *format, va_list argptr) for (cmpp = decimal; *cmpp != '\0'; ++cmpp) ADDW (*cmpp); if (width > 0) - /* +1 because we substract below. */ - width = avail + 1; + width = avail; got_dot = 1; } else @@ -1727,8 +1831,7 @@ __vfscanf (FILE *s, const char *format, va_list argptr) for (cmpp = thousands; *cmpp != '\0'; ++cmpp) ADDW (*cmpp); if (width > 0) - /* +1 because we substract below. */ - width = avail + 1; + width = avail; } else { diff --git a/sysdeps/generic/strtol.c b/sysdeps/generic/strtol.c index 44e2104e18..de6f276131 100644 --- a/sysdeps/generic/strtol.c +++ b/sysdeps/generic/strtol.c @@ -256,6 +256,7 @@ INTERNAL (strtol) (nptr, endptr, base, group LOCALE_PARAM) wchar_t thousands = L'\0'; # else const char *thousands = NULL; + size_t thousands_len = 0; # endif /* The numeric grouping specification of the current locale, in the format described in . */ @@ -338,18 +339,25 @@ INTERNAL (strtol) (nptr, endptr, base, group LOCALE_PARAM) save = s; #ifdef USE_NUMBER_GROUPING - if (group) + if (base != 10) + grouping = NULL; + + if (grouping) { +# ifndef USE_WIDE_CHAR + thousands_len = strlen (thousands); +# endif + /* Find the end of the digit string and check its grouping. */ end = s; if ( # ifdef USE_WIDE_CHAR *s != thousands # else - ({ for (cnt = 0; thousands[cnt] != '\0'; ++cnt) + ({ for (cnt = 0; cnt < thousands_len; ++cnt) if (thousands[cnt] != end[cnt]) break; - thousands[cnt] != '\0'; }) + cnt < thousands_len; }) # endif ) { @@ -358,10 +366,10 @@ INTERNAL (strtol) (nptr, endptr, base, group LOCALE_PARAM) # ifdef USE_WIDE_CHAR && c != thousands # else - && ({ for (cnt = 0; thousands[cnt] != '\0'; ++cnt) + && ({ for (cnt = 0; cnt < thousands_len; ++cnt) if (thousands[cnt] != end[cnt]) break; - thousands[cnt] != '\0'; }) + cnt < thousands_len; }) # endif && (!ISALPHA (c) || (int) (TOUPPER (c) - L_('A') + 10) >= base)) @@ -391,6 +399,28 @@ INTERNAL (strtol) (nptr, endptr, base, group LOCALE_PARAM) break; if (c >= L_('0') && c <= L_('9')) c -= L_('0'); +#ifdef USE_NUMBER_GROUPING +# ifdef USE_WIDE_CHAR + else if (grouping && c == thousands) + continue; +# else + else if (thousands_len) + { + for (cnt = 0; cnt < thousands_len; ++cnt) + if (thousands[cnt] != s[cnt]) + break; + if (cnt == thousands_len) + { + s += thousands_len - 1; + continue; + } + if (ISALPHA (c)) + c = TOUPPER (c) - L_('A') + 10; + else + break; + } +# endif +#endif else if (ISALPHA (c)) c = TOUPPER (c) - L_('A') + 10; else @@ -417,6 +447,28 @@ INTERNAL (strtol) (nptr, endptr, base, group LOCALE_PARAM) break; if (c >= L_('0') && c <= L_('9')) c -= L_('0'); +#ifdef USE_NUMBER_GROUPING +# ifdef USE_WIDE_CHAR + else if (grouping && c == thousands) + continue; +# else + else if (thousands_len) + { + for (cnt = 0; cnt < thousands_len; ++cnt) + if (thousands[cnt] != s[cnt]) + break; + if (cnt == thousands_len) + { + s += thousands_len - 1; + continue; + } + if (ISALPHA (c)) + c = TOUPPER (c) - L_('A') + 10; + else + break; + } +# endif +#endif else if (ISALPHA (c)) c = TOUPPER (c) - L_('A') + 10; else -- cgit 1.4.1