about summary refs log tree commit diff
path: root/stdio-common/vfscanf.c
diff options
context:
space:
mode:
Diffstat (limited to 'stdio-common/vfscanf.c')
-rw-r--r--stdio-common/vfscanf.c345
1 files changed, 272 insertions, 73 deletions
diff --git a/stdio-common/vfscanf.c b/stdio-common/vfscanf.c
index 28f8ceb820..70d8bf0b18 100644
--- a/stdio-common/vfscanf.c
+++ b/stdio-common/vfscanf.c
@@ -1,20 +1,20 @@
 /* Copyright (C) 1991, 92, 93, 94, 95, 96 Free Software Foundation, Inc.
-This file is part of the GNU C Library.
+   This file is part of the GNU C Library.
 
-The GNU C Library is free software; you can redistribute it and/or
-modify it under the terms of the GNU Library General Public License as
-published by the Free Software Foundation; either version 2 of the
-License, or (at your option) any later version.
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
 
-The GNU C Library is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-Library General Public License for more details.
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
 
-You should have received a copy of the GNU Library General Public
-License along with the GNU C Library; see the file COPYING.LIB.  If
-not, write to the Free Software Foundation, Inc., 675 Mass Ave,
-Cambridge, MA 02139, USA.  */
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If not,
+   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
 
 #include "../locale/localeinfo.h"
 #include <errno.h>
@@ -24,6 +24,7 @@ Cambridge, MA 02139, USA.  */
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <wctype.h>
 #include <libc-lock.h>
 
 #ifdef	__GNUC__
@@ -55,6 +56,12 @@ Cambridge, MA 02139, USA.  */
 # define va_list	_IO_va_list
 # define ungetc(c, s)	(--read_in, _IO_ungetc (c, s))
 # define inchar()	((c = _IO_getc_unlocked (s)), (void) ++read_in, c)
+# define encode_error()	do {						      \
+			  if (errp != NULL) *errp |= 4;			      \
+			  _IO_funlockfile (s);				      \
+			  __set_errno (EILSEQ);				      \
+			  return done;					      \
+			} while (0)
 # define conv_error()	do {						      \
 			  if (errp != NULL) *errp |= 2;			      \
 			  _IO_funlockfile (s);				      \
@@ -88,6 +95,11 @@ Cambridge, MA 02139, USA.  */
 #else
 # define ungetc(c, s)	(--read_in, ungetc (c, s))
 # define inchar()	((c = getc (s)), (void) ++read_in, c)
+# define encode_error()	do {						      \
+			  _IO_funlockfile (s);				      \
+			  __set_errno (EILSEQ);				      \
+			  return done;					      \
+			} while (0)
 # define conv_error()	do {						      \
 			  funlockfile (s);				      \
 			  return done;					      \
@@ -173,6 +185,7 @@ __vfscanf (FILE *s, const char *format, va_list argptr)
     } num;
   /* Character-buffer pointer.  */
   char *str = NULL;
+  wchar_t *wstr = NULL;
   char **strptr = NULL;
   size_t strsize = 0;
   /* We must not react on white spaces immediately because they can
@@ -435,40 +448,138 @@ __vfscanf (FILE *s, const char *format, va_list argptr)
 	  break;
 
 	case 'c':	/* Match characters.  */
-	  if (!(flags & SUPPRESS))
+	  if ((flags & LONG) == 0)
 	    {
-	      str = ARG (char *);
-	      if (str == NULL)
-		conv_error ();
-	    }
+	      if (!(flags & SUPPRESS))
+		{
+		  str = ARG (char *);
+		  if (str == NULL)
+		    conv_error ();
+		}
 
-	  c = inchar ();
-	  if (c == EOF)
-	    input_error ();
+	      c = inchar ();
+	      if (c == EOF)
+		input_error ();
 
-	  if (width == -1)
-	    width = 1;
+	      if (width == -1)
+		width = 1;
 
-	  if (!(flags & SUPPRESS))
-	    {
-	      do
-		*str++ = c;
-	      while (--width > 0 && inchar () != EOF);
+	      if (!(flags & SUPPRESS))
+		{
+		  do
+		    *str++ = c;
+		  while (--width > 0 && inchar () != EOF);
+		}
+	      else
+		while (--width > 0 && inchar () != EOF);
+
+	      if (width > 0)
+		/* I.e., EOF was read.  */
+		--read_in;
+
+	      if (!(flags & SUPPRESS))
+		++done;
+
+	      break;
 	    }
-	  else
-	    while (--width > 0 && inchar () != EOF);
+	  /* FALLTHROUGH */
+	case 'C':
+	  /* Get UTF-8 encoded wide character.  Here we assume (as in
+	     other parts of the libc) that we only have to handle
+	     UTF-8.  */
+	  {
+	    wint_t val;
+	    size_t cnt = 0;
+	    int first = 1;
 
-	  if (width > 0)
-	    /* I.e., EOF was read.  */
-	    --read_in;
+	    if (!(flags & SUPPRESS))
+	      {
+		wstr = ARG (wchar_t *);
+		if (str == NULL)
+		  conv_error ();
+	      }
 
-	  if (!(flags & SUPPRESS))
-	    ++done;
+	    do
+	      {
+#define NEXT_WIDE_CHAR(First)						      \
+		c = inchar ();						      \
+		if (c == EOF)						      \
+		  /* EOF is only an error for the first character.  */	      \
+		  if (First)						      \
+		    input_error ();					      \
+		  else							      \
+		    break;						      \
+		val = c;						      \
+		if (val >= 0x80)					      \
+		  {							      \
+		    if ((c & 0xc0) == 0x80 || (c & 0xfe) == 0xfe)	      \
+		      encode_error ();					      \
+		    if ((c & 0xe0) == 0xc0)				      \
+		      {							      \
+			/* We expect two bytes.  */			      \
+			cnt = 1;					      \
+			val &= 0x1f;					      \
+		      }							      \
+		    else if ((c & 0xf0) == 0xe0)			      \
+		      {							      \
+			/* We expect three bytes.  */			      \
+			cnt = 2;					      \
+			val &= 0x0f;					      \
+		      }							      \
+		    else if ((c & 0xf8) == 0xf0)			      \
+		      {							      \
+			/* We expect four bytes.  */			      \
+			cnt = 3;					      \
+			val &= 0x07;					      \
+		      }							      \
+		    else if ((c & 0xfc) == 0xf8)			      \
+		      {							      \
+			/* We expect five bytes.  */			      \
+			cnt = 4;					      \
+			val &= 0x03;					      \
+		      }							      \
+		    else						      \
+		      {							      \
+			/* We expect six bytes.  */			      \
+			cnt = 5;					      \
+			val &= 0x01;					      \
+		      }							      \
+		    							      \
+		    do							      \
+		      {							      \
+			c = inchar ();					      \
+			if (c == EOF					      \
+			    || (c & 0xc0) == 0x80 || (c & 0xfe) == 0xfe)      \
+			  encode_error ();				      \
+			val <<= 6;					      \
+			val |= c & 0x3f;				      \
+		      }							      \
+		    while (--cnt > 0);					      \
+		  }							      \
+									      \
+		if (!(flags & SUPPRESS))				      \
+		  *wstr++ = val;					      \
+		first = 0
+
+		NEXT_WIDE_CHAR (first);
+	      }
+	    while (--width > 0);
 
+	    if (width > 0)
+	      /* I.e., EOF was read.  */
+	      --read_in;
+
+	    if (!(flags & SUPPRESS))
+	      ++done;
+	  }
 	  break;
 
 	case 's':		/* Read a string.  */
-#define STRING_ARG							      \
+	  if (flags & LONG)
+	    /* We have to process a wide character string.  */
+	    goto wide_char_string;
+
+#define STRING_ARG(Str, Type)						      \
 	  if (!(flags & SUPPRESS))					      \
 	    {								      \
 	      if (flags & MALLOC)					      \
@@ -479,14 +590,15 @@ __vfscanf (FILE *s, const char *format, va_list argptr)
 		    conv_error ();					      \
 		  /* Allocate an initial buffer.  */			      \
 		  strsize = 100;					      \
-		  *strptr = str = malloc (strsize);			      \
+		  *strptr = malloc (strsize * sizeof (Type));		      \
+		  Str = (Type *) *strptr;				      \
 		}							      \
 	      else							      \
-		str = ARG (char *);					      \
-	      if (str == NULL)						      \
+		Str = ARG (Type *);					      \
+	      if (Str == NULL)						      \
 		conv_error ();						      \
 	    }
-	  STRING_ARG;
+	  STRING_ARG (str, char);
 
 	  c = inchar ();
 	  if (c == EOF)
@@ -499,43 +611,44 @@ __vfscanf (FILE *s, const char *format, va_list argptr)
 		  ungetc (c, s);
 		  break;
 		}
-#define	STRING_ADD_CHAR(c)						      \
+#define	STRING_ADD_CHAR(Str, c, Type)					      \
 	      if (!(flags & SUPPRESS))					      \
 		{							      \
-		  *str++ = c;						      \
-		  if ((flags & MALLOC) && str == *strptr + strsize)	      \
+		  *Str++ = c;						      \
+		  if ((flags & MALLOC) && (char *) Str == *strptr + strsize)  \
 		    {							      \
 		      /* Enlarge the buffer.  */			      \
-		      str = realloc (*strptr, strsize * 2);		      \
-		      if (str == NULL)					      \
+		      Str = realloc (*strptr, strsize * 2 * sizeof (Type));   \
+		      if (Str == NULL)					      \
 			{						      \
 			  /* Can't allocate that much.  Last-ditch effort.  */\
-			  str = realloc (*strptr, strsize + 1);		      \
-			  if (str == NULL)				      \
+			  Str = realloc (*strptr,			      \
+					 (strsize + 1) * sizeof (Type));      \
+			  if (Str == NULL)				      \
 			    {						      \
 			      /* We lose.  Oh well.			      \
 				 Terminate the string and stop converting,    \
 				 so at least we don't skip any input.  */     \
-			      (*strptr)[strsize] = '\0';		      \
+			      ((Type *) (*strptr))[strsize] = '\0';	      \
 			      ++done;					      \
 			      conv_error ();				      \
 			    }						      \
 			  else						      \
 			    {						      \
-			      *strptr = str;				      \
-			      str += strsize;				      \
+			      *strptr = (char *) Str;			      \
+			      Str = ((Type *) *strptr) + strsize;	      \
 			      ++strsize;				      \
 			    }						      \
 			}						      \
 		      else						      \
 			{						      \
-			  *strptr = str;				      \
-			  str += strsize;				      \
+			  *strptr = (char *) Str;			      \
+			  Str = ((Type *) *strptr) + strsize;		      \
 			  strsize *= 2;					      \
 			}						      \
 		    }							      \
 		}
-	      STRING_ADD_CHAR (c);
+	      STRING_ADD_CHAR (str, c, char);
 	    } while ((width <= 0 || --width > 0) && inchar () != EOF);
 
 	  if (!(flags & SUPPRESS))
@@ -545,6 +658,43 @@ __vfscanf (FILE *s, const char *format, va_list argptr)
 	    }
 	  break;
 
+	case 'S':
+	  /* Wide character string.  */
+	wide_char_string:
+	  {
+	    wint_t val;
+	    int first = 1;
+	    STRING_ARG (wstr, wchar_t);
+
+	    do
+	      {
+		size_t cnt = 0;
+		NEXT_WIDE_CHAR (first);
+
+		if (iswspace (val))
+		  {
+		    /* XXX We would have to push back the whole wide char
+		       with possibly many bytes.  But since scanf does
+		       not make a difference for white space characters
+		       we can simply push back a simple <SP> which is
+		       guaranteed to be in the [:space:] class.  */
+		    ungetc (' ', s);
+		    break;
+		  }
+
+		STRING_ADD_CHAR (wstr, val, wchar_t);
+		first = 0;
+	      }
+	    while (width <= 0 || --width > 0);
+
+	    if (!(flags & SUPPRESS))
+	      {
+		*wstr = L'\0';
+		++done;
+	      }
+	  }
+	  break;
+
 	case 'x':	/* Hexadecimal integer.  */
 	case 'X':	/* Ditto.  */
 	  base = 16;
@@ -765,11 +915,19 @@ __vfscanf (FILE *s, const char *format, va_list argptr)
 	  break;
 
 	case '[':	/* Character class.  */
-	  STRING_ARG;
+	  if (flags & LONG)
+	    {
+	      STRING_ARG (wstr, wchar_t);
+	      c = '\0';		/* This is to keep gcc quiet.  */
+	    }
+	  else
+	    {
+	      STRING_ARG (str, char);
 
-	  c = inchar ();
-	  if (c == EOF)
-	    input_error ();
+	      c = inchar ();
+	      if (c == EOF)
+		input_error ();
+	    }
 
 	  if (*f == '^')
 	    {
@@ -814,31 +972,72 @@ __vfscanf (FILE *s, const char *format, va_list argptr)
 	    }
 	  if (fc == '\0')
 	    {
-	      ungetc (c, s);
+	      if (!(flags & LONG))
+		ungetc (c, s);
 	      conv_error();
 	    }
 
-	  num.ul = read_in - 1;	/* -1 because we already read one char.  */
-	  do
+	  if (flags & LONG)
 	    {
-	      if (wp[c] == not_in)
+	      wint_t val;
+	      int first = 1;
+
+	      do
 		{
-		  ungetc (c, s);
-		  break;
+		  size_t cnt = 0;
+		  NEXT_WIDE_CHAR (first);
+		  if (val > 255 || wp[val] == not_in)
+		    {
+		      /* XXX We have a problem here.  We read a wide
+			 character and this possibly took several
+			 bytes.  But we can only push back one single
+			 character.  To be sure we don't create wrong
+			 input we push it back only in case it is
+			 representable within one byte.  */
+		      if (val < 0x80)
+			ungetc (val, s);
+		      break;
+		    }
+		  STRING_ADD_CHAR (wstr, val, wchar_t);
+		  if (width > 0)
+		    --width;
+		  first = 0;
 		}
-	      STRING_ADD_CHAR (c);
-	      if (width > 0)
-		--width;
-	    }
-	  while (width != 0 && inchar () != EOF);
+	      while (width != 0);
 
-	  if (read_in == num.ul)
-	    conv_error ();
+	      if (first)
+		conv_error ();
 
-	  if (!(flags & SUPPRESS))
+	      if (!(flags & SUPPRESS))
+		{
+		  *wstr = L'\0';
+		  ++done;
+		}
+	    }
+	  else
 	    {
-	      *str = '\0';
-	      ++done;
+	      num.ul = read_in - 1; /* -1 because we already read one char.  */
+	      do
+		{
+		  if (wp[c] == not_in)
+		    {
+		      ungetc (c, s);
+		      break;
+		    }
+		  STRING_ADD_CHAR (str, c, char);
+		  if (width > 0)
+		    --width;
+		}
+	      while (width != 0 && inchar () != EOF);
+
+	      if (read_in == num.ul)
+		conv_error ();
+
+	      if (!(flags & SUPPRESS))
+		{
+		  *str = '\0';
+		  ++done;
+		}
 	    }
 	  break;