summary refs log tree commit diff
path: root/stdio-common/vfscanf.c
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2007-02-19 04:40:58 +0000
committerUlrich Drepper <drepper@redhat.com>2007-02-19 04:40:58 +0000
commitecb72bd5b640497474ceae8d5e991b88c0c032e2 (patch)
tree97a7c41d486c34139416fb2b225854b0d074992d /stdio-common/vfscanf.c
parentac2ca0229ab39381e9e92e07f929703981f77a90 (diff)
downloadglibc-ecb72bd5b640497474ceae8d5e991b88c0c032e2.tar.gz
glibc-ecb72bd5b640497474ceae8d5e991b88c0c032e2.tar.xz
glibc-ecb72bd5b640497474ceae8d5e991b88c0c032e2.zip
[BZ #2211]
	* stdio-common/vfscanf.c: Handle localized digits etc for floating
	point numbers.
	Patch mostly by Hamed Malek <hamed@farsiweb.info>.
Diffstat (limited to 'stdio-common/vfscanf.c')
-rw-r--r--stdio-common/vfscanf.c242
1 files changed, 228 insertions, 14 deletions
diff --git a/stdio-common/vfscanf.c b/stdio-common/vfscanf.c
index 90e7e36a5f..cdb610dc0c 100644
--- a/stdio-common/vfscanf.c
+++ b/stdio-common/vfscanf.c
@@ -1,5 +1,4 @@
-/* Copyright (C) 1991-2002, 2003, 2004, 2005, 2006, 2007
-   Free Software Foundation, Inc.
+/* Copyright (C) 1991-2006, 2007 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -1264,13 +1263,13 @@ _IO_vfscanf_internal (_IO_FILE *s, const char *format, _IO_va_list argptr,
 			mbdigits[n] = strchr (mbdigits[n], '\0') + 1;
 
 		      cmpp = mbdigits[n];
-		      while ((unsigned char) *cmpp == c && avail > 0)
+		      while ((unsigned char) *cmpp == c && avail >= 0)
 			{
 			  if (*++cmpp == '\0')
 			    break;
 			  else
 			    {
-			      if ((c = inchar ()) == EOF)
+			      if (avail == 0 || inchar () == EOF)
 				break;
 			      --avail;
 			    }
@@ -1317,13 +1316,13 @@ _IO_vfscanf_internal (_IO_FILE *s, const char *format, _IO_va_list argptr,
 			      int avail = width > 0 ? width : INT_MAX;
 
 			      cmpp = mbdigits[n];
-			      while ((unsigned char) *cmpp == c && avail > 0)
+			      while ((unsigned char) *cmpp == c && avail >= 0)
 				{
 				  if (*++cmpp == '\0')
 				    break;
 				  else
 				    {
-				      if ((c = inchar ()) == EOF)
+				      if (avail == 0 || inchar () == EOF)
 					break;
 				      --avail;
 				    }
@@ -1378,14 +1377,14 @@ _IO_vfscanf_internal (_IO_FILE *s, const char *format, _IO_va_list argptr,
 		      const char *cmpp = thousands;
 		      int avail = width > 0 ? width : INT_MAX;
 
-		      while ((unsigned char) *cmpp == c && avail > 0)
+		      while ((unsigned char) *cmpp == c && avail >= 0)
 			{
 			  ADDW (c);
 			  if (*++cmpp == '\0')
 			    break;
 			  else
 			    {
-			      if ((c = inchar ()) == EOF)
+			      if (avail == 0 || inchar () == EOF)
 				break;
 			      --avail;
 			    }
@@ -1450,14 +1449,14 @@ _IO_vfscanf_internal (_IO_FILE *s, const char *format, _IO_va_list argptr,
 			const char *cmpp = thousands;
 			int avail = width > 0 ? width : INT_MAX;
 
-			while ((unsigned char) *cmpp == c && avail > 0)
+			while ((unsigned char) *cmpp == c && avail >= 0)
 			  {
 			    ADDW (c);
 			    if (*++cmpp == '\0')
 			      break;
 			    else
 			      {
-				if ((c = inchar ()) == EOF)
+				if (avail == 0 || inchar () == EOF)
 				  break;
 				--avail;
 			      }
@@ -1753,12 +1752,12 @@ _IO_vfscanf_internal (_IO_FILE *s, const char *format, _IO_va_list argptr,
 
 		  if (! got_dot)
 		    {
-		      while ((unsigned char) *cmpp == c && avail > 0)
+		      while ((unsigned char) *cmpp == c && avail >= 0)
 			if (*++cmpp == '\0')
 			  break;
 			else
 			  {
-			    if (inchar () == EOF)
+			    if (avail == 0 || inchar () == EOF)
 			      break;
 			    --avail;
 			  }
@@ -1790,12 +1789,12 @@ _IO_vfscanf_internal (_IO_FILE *s, const char *format, _IO_va_list argptr,
 			    ++cmp2p;
 			  if (cmp2p - thousands == cmpp - decimal)
 			    {
-			      while ((unsigned char) *cmp2p == c && avail > 0)
+			      while ((unsigned char) *cmp2p == c && avail >= 0)
 				if (*++cmp2p == '\0')
 				  break;
 				else
 				  {
-				    if (inchar () == EOF)
+				    if (avail == 0 || inchar () == EOF)
 				      break;
 				    --avail;
 				  }
@@ -1828,6 +1827,221 @@ _IO_vfscanf_internal (_IO_FILE *s, const char *format, _IO_va_list argptr,
 		--width;
 	    }
 
+	  wctrans_t map;
+	  if (__builtin_expect ((flags & I18N) != 0, 0)
+	      /* Hexadecimal floats make no sense, fixing localized
+		 digits with ASCII letters.  */
+	      && !is_hexa
+	      /* Minimum requirement.  */
+	      && (wpsize == 0 || got_dot)
+	      && (map = __wctrans ("to_inpunct")) != NULL)
+	    {
+	      /* Reget the first character.  */
+	      inchar ();
+
+	      /* Localized digits, decimal points, and thousands
+		 separator.  */
+	      wint_t wcdigits[12];
+
+	      /* First get decimal equivalent to check if we read it
+		 or not.  */
+	      wcdigits[11] = __towctrans (L'.', map);
+
+	      /* If we have not read any character or have just read
+	         locale decimal point which matches the decimal point
+	         for localized FP numbers, then we may have localized
+	         digits.  Note, we test GOT_DOT above.  */
+#ifdef COMPILE_WSCANF
+	      if (wpsize == 0 || (wpsize == 1 && wcdigits[11] == decimal))
+#else
+	      char mbdigits[12][MB_LEN_MAX + 1];
+
+	      mbstate_t state;
+	      memset (&state, '\0', sizeof (state));
+
+	      bool match_so_far = wpsize == 0;
+	      size_t mblen = __wcrtomb (mbdigits[11], wcdigits[11], &state);
+	      if (mblen != (size_t) -1)
+		{
+		  mbdigits[11][mblen] = '\0';
+		  match_so_far |= (wpsize == strlen (decimal)
+				   && strcmp (decimal, mbdigits[11]) == 0);
+		}
+	      else
+		{
+		  size_t decimal_len = strlen (decimal);
+		  /* This should always be the case but the data comes
+		     from a file.  */
+		  if (decimal_len <= MB_LEN_MAX)
+		    {
+		      match_so_far |= wpsize == decimal_len;
+		      memcpy (mbdigits[11], decimal, decimal_len + 1);
+		    }
+		  else
+		    match_so_far = false;
+		}
+
+	      if (match_so_far)
+#endif
+		{
+		  int have_locthousands = true;
+		  /* Now get the digits and the thousands-sep equivalents.  */
+	          for (int n = 0; n < 11; ++n)
+		    {
+		      if (n < 10)
+			wcdigits[n] = __towctrans (L'0' + n, map);
+		      else if (n == 10)
+			wcdigits[10] = __towctrans (L',', map);
+
+#ifndef COMPILE_WSCANF
+		      memset (&state, '\0', sizeof (state));
+
+		      size_t mblen = __wcrtomb (mbdigits[n], wcdigits[n],
+						&state);
+		      if (mblen == (size_t) -1)
+			{
+			  if (n == 10)
+			    {
+			      if (thousands == NULL || (flags & GROUP) == 0)
+				have_locthousands = false;
+			      else
+				{
+				  size_t thousands_len = strlen (thousands);
+				  if (thousands_len <= MB_LEN_MAX)
+				    memcpy (mbdigits[10], thousands,
+					    thousands_len + 1);
+				  else
+				    have_locthousands = false;
+				}
+			    }
+			  else
+			    /* Ignore checking against localized digits.  */
+			    goto no_i18nflt;
+			}
+		      else
+			mbdigits[n][mblen] = '\0';
+#endif
+		    }
+
+		  /* Start checking against localized digits, if
+		     convertion is done correctly. */
+		  while (1)
+		    {
+		      if (got_e && wp[wpsize - 1] == exp_char
+			  && (c == L_('-') || c == L_('+')))
+			ADDW (c);
+		      else if (wpsize > 0 && !got_e
+			       && (CHAR_T) TOLOWER (c) == exp_char)
+			{
+			  ADDW (exp_char);
+			  got_e = got_dot = 1;
+			}
+		      else
+			{
+			  /* Check against localized digits, decimal point,
+			     and thousands separator.  */
+			  int n;
+			  for (n = 0; n < 12; ++n)
+			    {
+#ifdef COMPILE_WSCANF
+			      if (c == wcdigits[n])
+				{
+				  if (n < 10)
+				    ADDW (L_('0') + n);
+				  else if (n == 11 && !got_dot)
+				    {
+				      ADDW (decimal);
+				      got_dot = 1;
+				    }
+				  else if (n == 10 && have_locthousands
+					   && ! got_dot)
+				    ADDW (thousands);
+				  else
+				    /* The last read character is not part
+				       of the number anymore.  */
+				    n = 12;
+
+				  break;
+				}
+#else
+			      const char *cmpp = mbdigits[n];
+			      int avail = width > 0 ? width : INT_MAX;
+
+			      while ((unsigned char) *cmpp == c && avail >= 0)
+				if (*++cmpp == '\0')
+				  break;
+				else
+				  {
+				    if (avail == 0 || inchar () == EOF)
+				      break;
+				    --avail;
+				  }
+			      if (*cmpp == '\0')
+				{
+				  if (width > 0)
+				    width = avail;
+
+				  if (n < 10)
+				    ADDW (L_('0') + n);
+				  else if (n == 11 && !got_dot)
+				    {
+				      /* Add all the characters.  */
+				      for (cmpp = decimal; *cmpp != '\0';
+					   ++cmpp)
+					ADDW ((unsigned char) *cmpp);
+
+				      got_dot = 1;
+				    }
+				  else if (n == 10 && (flags & GROUP) != 0
+					   && thousands != NULL && ! got_dot)
+				    {
+				      /* Add all the characters.  */
+				      for (cmpp = thousands; *cmpp != '\0';
+					   ++cmpp)
+					ADDW ((unsigned char) *cmpp);
+				    }
+				  else
+				    /* The last read character is not part
+				       of the number anymore.  */
+				      n = 12;
+
+				  break;
+				}
+
+			      /* We are pushing all read characters back.  */
+			      if (cmpp > mbdigits[n])
+				{
+				  ungetc (c, s);
+				  while (--cmpp > mbdigits[n])
+				    ungetc_not_eof ((unsigned char) *cmpp, s);
+				  c = (unsigned char) *cmpp;
+				}
+#endif
+			    }
+
+			  if (n >= 12)
+			    {
+			      /* The last read character is not part
+				 of the number anymore.  */
+			      ungetc (c, s);
+			      break;
+			    }
+			}
+
+		      if (width == 0 || inchar () == EOF)
+			break;
+
+		      if (width > 0)
+			--width;
+		    }
+		}
+
+#ifndef COMPILE_WSCANF
+	    no_i18nflt:
+	      ;
+#endif
+	    }
+
 	  /* Have we read any character?  If we try to read a number
 	     in hexadecimal notation and we have read only the `0x'
 	     prefix or no exponent this is an error.  */