about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog5
-rw-r--r--localedata/ChangeLog4
-rw-r--r--localedata/Makefile6
-rw-r--r--localedata/tst-sscanf.c56
-rw-r--r--stdio-common/vfscanf.c242
5 files changed, 296 insertions, 17 deletions
diff --git a/ChangeLog b/ChangeLog
index d898823f51..be2556b4f2 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,10 @@
 2007-02-18  Ulrich Drepper  <drepper@redhat.com>
 
+	[BZ #2211]
+	* stdio-common/vfscanf.c: Handle localized digits etc for floating
+	point numbers.
+	Patch mostly by Hamed Malek <hamed@farsiweb.info>.
+
 	* stdio-common/vfscanf.c: Fix problems in width accounting.
 	* stdio-common/tst-sscanf.c (double_tests): New tests.
 	(main): Hook them up.
diff --git a/localedata/ChangeLog b/localedata/ChangeLog
index 9dfe469503..e1bc145ec3 100644
--- a/localedata/ChangeLog
+++ b/localedata/ChangeLog
@@ -1,5 +1,9 @@
 2007-02-18  Ulrich Drepper  <drepper@redhat.com>
 
+	* Makefile (tests): Add tst-sscanf.
+	(LOCALES): Add fa_IR.UTF-8.
+	* tst-sscanf.c: New file.
+
 	* da_DK.in: Adjust for unified collation.
 
 	* locales/vi_VN: Don't define HOK here as well.
diff --git a/localedata/Makefile b/localedata/Makefile
index db7094fabe..e518175b98 100644
--- a/localedata/Makefile
+++ b/localedata/Makefile
@@ -1,4 +1,4 @@
-# Copyright (C) 1996-2002, 2003, 2005 Free Software Foundation, Inc.
+# Copyright (C) 1996-2002, 2003, 2005, 2007 Free Software Foundation, Inc.
 # This file is part of the GNU C Library.
 
 # The GNU C Library is free software; you can redistribute it and/or
@@ -93,7 +93,7 @@ locale_test_suite := tst_iswalnum tst_iswalpha tst_iswcntrl            \
 
 tests = $(locale_test_suite) tst-digits tst-setlocale bug-iconv-trans \
 	tst-leaks tst-mbswcs6 tst-xlocale1 tst-xlocale2 bug-usesetlocale \
-	tst-strfmon1
+	tst-strfmon1 tst-sscanf
 ifeq (yes,$(build-shared))
 ifneq (no,$(PERL))
 tests: $(objpfx)mtrace-tst-leaks
@@ -133,7 +133,7 @@ LOCALES := de_DE.ISO-8859-1 de_DE.UTF-8 en_US.ANSI_X3.4-1968 \
 	   en_US.ISO-8859-1 ja_JP.EUC-JP da_DK.ISO-8859-1 \
 	   hr_HR.ISO-8859-2 sv_SE.ISO-8859-1 ja_JP.SJIS fr_FR.ISO-8859-1 \
 	   vi_VN.TCVN5712-1 nb_NO.ISO-8859-1 nn_NO.ISO-8859-1 \
-	   tr_TR.UTF-8 cs_CZ.UTF-8 zh_TW.EUC-TW
+	   tr_TR.UTF-8 cs_CZ.UTF-8 zh_TW.EUC-TW fa_IR.UTF-8
 LOCALE_SRCS := $(shell echo "$(LOCALES)"|sed 's/\([^ .]*\)[^ ]*/\1/g')
 CHARMAPS := $(shell echo "$(LOCALES)" | \
 		    sed -e 's/[^ .]*[.]\([^ ]*\)/\1/g' -e s/SJIS/SHIFT_JIS/g)
diff --git a/localedata/tst-sscanf.c b/localedata/tst-sscanf.c
new file mode 100644
index 0000000000..89a77a14f8
--- /dev/null
+++ b/localedata/tst-sscanf.c
@@ -0,0 +1,56 @@
+#include <stdio.h>
+#include <locale.h>
+#include <assert.h>
+
+#define P0 "\xDB\xB0"
+#define P1 "\xDB\xB1"
+#define P2 "\xDB\xB2"
+#define P3 "\xDB\xB3"
+#define P4 "\xDB\xB4"
+#define P5 "\xDB\xB5"
+#define P6 "\xDB\xB6"
+#define P7 "\xDB\xB7"
+#define P8 "\xDB\xB8"
+#define P9 "\xDB\xB9"
+#define PD "\xd9\xab"
+#define PT "\xd9\xac"
+
+static int
+check_sscanf (const char *s, const char *format, const float n)
+{
+  float f;
+
+  if (sscanf (s, format, &f) != 1)
+    {
+      printf ("nothing found for \"%s\"\n", s);
+      return 1;
+    }
+  if (f != n)
+    {
+      printf ("got %f expected %f from \"%s\"\n", f, n, s);
+      return 1;
+    }
+  return 0;
+}
+
+static int
+do_test (void)
+{
+  if (setlocale (LC_ALL, "fa_IR") == NULL)
+    {
+      puts ("cannot set fa_IR locale");
+      return 1;
+    }
+
+  int r = check_sscanf (P3 PD P1 P4, "%I8f", 3.14);
+  r |= check_sscanf (P3 PT P1 P4 P5, "%I'f", 3145);
+  r |= check_sscanf (P3 PD P1 P4 P1 P5 P9, "%If", 3.14159);
+  r |= check_sscanf ("-" P3 PD P1 P4 P1 P5, "%If", -3.1415);
+  r |= check_sscanf ("+" PD P1 P4 P1 P5, "%If", +.1415);
+  r |= check_sscanf (P3 PD P1 P4 P1 P5 "e+" P2, "%Ie", 3.1415e+2);
+
+  return r;
+}
+
+#define TEST_FUNCTION do_test ()
+#include "../test-skeleton.c"
diff --git a/stdio-common/vfscanf.c b/stdio-common/vfscanf.c
index 90e7e36a5f..cdb610dc0c 100644
--- a/stdio-common/vfscanf.c
+++ b/stdio-common/vfscanf.c
@@ -1,5 +1,4 @@
-/* Copyright (C) 1991-2002, 2003, 2004, 2005, 2006, 2007
-   Free Software Foundation, Inc.
+/* Copyright (C) 1991-2006, 2007 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -1264,13 +1263,13 @@ _IO_vfscanf_internal (_IO_FILE *s, const char *format, _IO_va_list argptr,
 			mbdigits[n] = strchr (mbdigits[n], '\0') + 1;
 
 		      cmpp = mbdigits[n];
-		      while ((unsigned char) *cmpp == c && avail > 0)
+		      while ((unsigned char) *cmpp == c && avail >= 0)
 			{
 			  if (*++cmpp == '\0')
 			    break;
 			  else
 			    {
-			      if ((c = inchar ()) == EOF)
+			      if (avail == 0 || inchar () == EOF)
 				break;
 			      --avail;
 			    }
@@ -1317,13 +1316,13 @@ _IO_vfscanf_internal (_IO_FILE *s, const char *format, _IO_va_list argptr,
 			      int avail = width > 0 ? width : INT_MAX;
 
 			      cmpp = mbdigits[n];
-			      while ((unsigned char) *cmpp == c && avail > 0)
+			      while ((unsigned char) *cmpp == c && avail >= 0)
 				{
 				  if (*++cmpp == '\0')
 				    break;
 				  else
 				    {
-				      if ((c = inchar ()) == EOF)
+				      if (avail == 0 || inchar () == EOF)
 					break;
 				      --avail;
 				    }
@@ -1378,14 +1377,14 @@ _IO_vfscanf_internal (_IO_FILE *s, const char *format, _IO_va_list argptr,
 		      const char *cmpp = thousands;
 		      int avail = width > 0 ? width : INT_MAX;
 
-		      while ((unsigned char) *cmpp == c && avail > 0)
+		      while ((unsigned char) *cmpp == c && avail >= 0)
 			{
 			  ADDW (c);
 			  if (*++cmpp == '\0')
 			    break;
 			  else
 			    {
-			      if ((c = inchar ()) == EOF)
+			      if (avail == 0 || inchar () == EOF)
 				break;
 			      --avail;
 			    }
@@ -1450,14 +1449,14 @@ _IO_vfscanf_internal (_IO_FILE *s, const char *format, _IO_va_list argptr,
 			const char *cmpp = thousands;
 			int avail = width > 0 ? width : INT_MAX;
 
-			while ((unsigned char) *cmpp == c && avail > 0)
+			while ((unsigned char) *cmpp == c && avail >= 0)
 			  {
 			    ADDW (c);
 			    if (*++cmpp == '\0')
 			      break;
 			    else
 			      {
-				if ((c = inchar ()) == EOF)
+				if (avail == 0 || inchar () == EOF)
 				  break;
 				--avail;
 			      }
@@ -1753,12 +1752,12 @@ _IO_vfscanf_internal (_IO_FILE *s, const char *format, _IO_va_list argptr,
 
 		  if (! got_dot)
 		    {
-		      while ((unsigned char) *cmpp == c && avail > 0)
+		      while ((unsigned char) *cmpp == c && avail >= 0)
 			if (*++cmpp == '\0')
 			  break;
 			else
 			  {
-			    if (inchar () == EOF)
+			    if (avail == 0 || inchar () == EOF)
 			      break;
 			    --avail;
 			  }
@@ -1790,12 +1789,12 @@ _IO_vfscanf_internal (_IO_FILE *s, const char *format, _IO_va_list argptr,
 			    ++cmp2p;
 			  if (cmp2p - thousands == cmpp - decimal)
 			    {
-			      while ((unsigned char) *cmp2p == c && avail > 0)
+			      while ((unsigned char) *cmp2p == c && avail >= 0)
 				if (*++cmp2p == '\0')
 				  break;
 				else
 				  {
-				    if (inchar () == EOF)
+				    if (avail == 0 || inchar () == EOF)
 				      break;
 				    --avail;
 				  }
@@ -1828,6 +1827,221 @@ _IO_vfscanf_internal (_IO_FILE *s, const char *format, _IO_va_list argptr,
 		--width;
 	    }
 
+	  wctrans_t map;
+	  if (__builtin_expect ((flags & I18N) != 0, 0)
+	      /* Hexadecimal floats make no sense, fixing localized
+		 digits with ASCII letters.  */
+	      && !is_hexa
+	      /* Minimum requirement.  */
+	      && (wpsize == 0 || got_dot)
+	      && (map = __wctrans ("to_inpunct")) != NULL)
+	    {
+	      /* Reget the first character.  */
+	      inchar ();
+
+	      /* Localized digits, decimal points, and thousands
+		 separator.  */
+	      wint_t wcdigits[12];
+
+	      /* First get decimal equivalent to check if we read it
+		 or not.  */
+	      wcdigits[11] = __towctrans (L'.', map);
+
+	      /* If we have not read any character or have just read
+	         locale decimal point which matches the decimal point
+	         for localized FP numbers, then we may have localized
+	         digits.  Note, we test GOT_DOT above.  */
+#ifdef COMPILE_WSCANF
+	      if (wpsize == 0 || (wpsize == 1 && wcdigits[11] == decimal))
+#else
+	      char mbdigits[12][MB_LEN_MAX + 1];
+
+	      mbstate_t state;
+	      memset (&state, '\0', sizeof (state));
+
+	      bool match_so_far = wpsize == 0;
+	      size_t mblen = __wcrtomb (mbdigits[11], wcdigits[11], &state);
+	      if (mblen != (size_t) -1)
+		{
+		  mbdigits[11][mblen] = '\0';
+		  match_so_far |= (wpsize == strlen (decimal)
+				   && strcmp (decimal, mbdigits[11]) == 0);
+		}
+	      else
+		{
+		  size_t decimal_len = strlen (decimal);
+		  /* This should always be the case but the data comes
+		     from a file.  */
+		  if (decimal_len <= MB_LEN_MAX)
+		    {
+		      match_so_far |= wpsize == decimal_len;
+		      memcpy (mbdigits[11], decimal, decimal_len + 1);
+		    }
+		  else
+		    match_so_far = false;
+		}
+
+	      if (match_so_far)
+#endif
+		{
+		  int have_locthousands = true;
+		  /* Now get the digits and the thousands-sep equivalents.  */
+	          for (int n = 0; n < 11; ++n)
+		    {
+		      if (n < 10)
+			wcdigits[n] = __towctrans (L'0' + n, map);
+		      else if (n == 10)
+			wcdigits[10] = __towctrans (L',', map);
+
+#ifndef COMPILE_WSCANF
+		      memset (&state, '\0', sizeof (state));
+
+		      size_t mblen = __wcrtomb (mbdigits[n], wcdigits[n],
+						&state);
+		      if (mblen == (size_t) -1)
+			{
+			  if (n == 10)
+			    {
+			      if (thousands == NULL || (flags & GROUP) == 0)
+				have_locthousands = false;
+			      else
+				{
+				  size_t thousands_len = strlen (thousands);
+				  if (thousands_len <= MB_LEN_MAX)
+				    memcpy (mbdigits[10], thousands,
+					    thousands_len + 1);
+				  else
+				    have_locthousands = false;
+				}
+			    }
+			  else
+			    /* Ignore checking against localized digits.  */
+			    goto no_i18nflt;
+			}
+		      else
+			mbdigits[n][mblen] = '\0';
+#endif
+		    }
+
+		  /* Start checking against localized digits, if
+		     convertion is done correctly. */
+		  while (1)
+		    {
+		      if (got_e && wp[wpsize - 1] == exp_char
+			  && (c == L_('-') || c == L_('+')))
+			ADDW (c);
+		      else if (wpsize > 0 && !got_e
+			       && (CHAR_T) TOLOWER (c) == exp_char)
+			{
+			  ADDW (exp_char);
+			  got_e = got_dot = 1;
+			}
+		      else
+			{
+			  /* Check against localized digits, decimal point,
+			     and thousands separator.  */
+			  int n;
+			  for (n = 0; n < 12; ++n)
+			    {
+#ifdef COMPILE_WSCANF
+			      if (c == wcdigits[n])
+				{
+				  if (n < 10)
+				    ADDW (L_('0') + n);
+				  else if (n == 11 && !got_dot)
+				    {
+				      ADDW (decimal);
+				      got_dot = 1;
+				    }
+				  else if (n == 10 && have_locthousands
+					   && ! got_dot)
+				    ADDW (thousands);
+				  else
+				    /* The last read character is not part
+				       of the number anymore.  */
+				    n = 12;
+
+				  break;
+				}
+#else
+			      const char *cmpp = mbdigits[n];
+			      int avail = width > 0 ? width : INT_MAX;
+
+			      while ((unsigned char) *cmpp == c && avail >= 0)
+				if (*++cmpp == '\0')
+				  break;
+				else
+				  {
+				    if (avail == 0 || inchar () == EOF)
+				      break;
+				    --avail;
+				  }
+			      if (*cmpp == '\0')
+				{
+				  if (width > 0)
+				    width = avail;
+
+				  if (n < 10)
+				    ADDW (L_('0') + n);
+				  else if (n == 11 && !got_dot)
+				    {
+				      /* Add all the characters.  */
+				      for (cmpp = decimal; *cmpp != '\0';
+					   ++cmpp)
+					ADDW ((unsigned char) *cmpp);
+
+				      got_dot = 1;
+				    }
+				  else if (n == 10 && (flags & GROUP) != 0
+					   && thousands != NULL && ! got_dot)
+				    {
+				      /* Add all the characters.  */
+				      for (cmpp = thousands; *cmpp != '\0';
+					   ++cmpp)
+					ADDW ((unsigned char) *cmpp);
+				    }
+				  else
+				    /* The last read character is not part
+				       of the number anymore.  */
+				      n = 12;
+
+				  break;
+				}
+
+			      /* We are pushing all read characters back.  */
+			      if (cmpp > mbdigits[n])
+				{
+				  ungetc (c, s);
+				  while (--cmpp > mbdigits[n])
+				    ungetc_not_eof ((unsigned char) *cmpp, s);
+				  c = (unsigned char) *cmpp;
+				}
+#endif
+			    }
+
+			  if (n >= 12)
+			    {
+			      /* The last read character is not part
+				 of the number anymore.  */
+			      ungetc (c, s);
+			      break;
+			    }
+			}
+
+		      if (width == 0 || inchar () == EOF)
+			break;
+
+		      if (width > 0)
+			--width;
+		    }
+		}
+
+#ifndef COMPILE_WSCANF
+	    no_i18nflt:
+	      ;
+#endif
+	    }
+
 	  /* Have we read any character?  If we try to read a number
 	     in hexadecimal notation and we have read only the `0x'
 	     prefix or no exponent this is an error.  */