about summary refs log tree commit diff
diff options
context:
space:
mode:
authorFlorian Weimer <fweimer@redhat.com>2020-03-19 18:32:28 -0300
committerFlorian Weimer <fweimer@redhat.com>2022-08-30 10:05:43 +0200
commitae7748e67fb77b6ba8751001718c911d5209400c (patch)
tree52ac12f05a071b1937cfbd2a6b79bb805dcf742d
parent174d0b61c7f280374ca6d527b08bce795cd93af1 (diff)
downloadglibc-ae7748e67fb77b6ba8751001718c911d5209400c.tar.gz
glibc-ae7748e67fb77b6ba8751001718c911d5209400c.tar.xz
glibc-ae7748e67fb77b6ba8751001718c911d5209400c.zip
stdio: Remove memory leak from multibyte convertion [BZ#25691]
This is an updated version of a previous patch [1] with the
following changes:

  - Use compiler overflow builtins on done_add_func function.
  - Define the scratch +utstring_converted_wide_string using
    CHAR_T.
  - Added a testcase and mention the bug report.

Both default and wide printf functions might leak memory when
manipulate multibyte characters conversion depending of the size
of the input (whether __libc_use_alloca trigger or not the fallback
heap allocation).

This patch fixes it by removing the extra memory allocation on
string formatting with conversion parts.

The testcase uses input argument size that trigger memory leaks
on unpatched code (using a scratch buffer the threashold to use
heap allocation is lower).

Checked on x86_64-linux-gnu and i686-linux-gnu.

Reviewed-by: Adhemerval Zanella  <adhemerval.zanella@linaro.org>

[1] https://sourceware.org/pipermail/libc-alpha/2017-June/082098.html

(cherry picked from commit 3cc4a8367c23582b7db14cf4e150e4068b7fd461)
-rw-r--r--NEWS1
-rw-r--r--stdio-common/vfprintf-internal.c324
2 files changed, 181 insertions, 144 deletions
diff --git a/NEWS b/NEWS
index a3278be684..2310a9ab60 100644
--- a/NEWS
+++ b/NEWS
@@ -23,6 +23,7 @@ The following bugs are resolved with this release:
   [25635] arm: Wrong sysdep order selection for soft-fp
   [25639] localedata: Some names of days and months wrongly spelt in
     Occitan
+  [25691] stdio: Remove memory leak from multibyte convertion
   [25715] system() returns wrong errors when posix_spawn fails
   [25810] x32: Incorrect syscall entries with pointer, off_t and size_t
   [25896] Incorrect prctl
diff --git a/stdio-common/vfprintf-internal.c b/stdio-common/vfprintf-internal.c
index 3be92d4b6e..7cb9333c53 100644
--- a/stdio-common/vfprintf-internal.c
+++ b/stdio-common/vfprintf-internal.c
@@ -31,6 +31,7 @@
 #include <locale/localeinfo.h>
 #include <stdio.h>
 #include <scratch_buffer.h>
+#include <intprops.h>
 
 /* This code is shared between the standard stdio implementation found
    in GNU C library and the libio implementation originally found in
@@ -119,22 +120,38 @@
   while (0)
 #endif
 
-#define done_add(val) \
-  do {									      \
-    unsigned int _val = val;						      \
-    assert ((unsigned int) done < (unsigned int) INT_MAX);		      \
-    if (__glibc_unlikely (INT_MAX - done < _val))			      \
-      {									      \
-	done = -1;							      \
-	 __set_errno (EOVERFLOW);					      \
-	goto all_done;							      \
-      }									      \
-    done += _val;							      \
-  } while (0)
+/* Add LENGTH to DONE.  Return the new value of DONE, or -1 on
+   overflow (and set errno accordingly).  */
+static inline int
+done_add_func (size_t length, int done)
+{
+  if (done < 0)
+    return done;
+  int ret;
+  if (INT_ADD_WRAPV (done, length, &ret))
+    {
+      __set_errno (EOVERFLOW);
+      return -1;
+    }
+  return ret;
+}
+
+#define done_add(val)							\
+  do									\
+    {									\
+      /* Ensure that VAL has a type similar to int.  */			\
+      _Static_assert (sizeof (val) == sizeof (int), "value int size");	\
+      _Static_assert ((__typeof__ (val)) -1 < 0, "value signed");	\
+      done = done_add_func ((val), done);				\
+      if (done < 0)							\
+	goto all_done;							\
+    }									\
+  while (0)
 
 #ifndef COMPILE_WPRINTF
 # define vfprintf	__vfprintf_internal
 # define CHAR_T		char
+# define OTHER_CHAR_T   wchar_t
 # define UCHAR_T	unsigned char
 # define INT_T		int
 typedef const char *THOUSANDS_SEP_T;
@@ -143,25 +160,14 @@ typedef const char *THOUSANDS_SEP_T;
 # define STR_LEN(Str)	strlen (Str)
 
 # define PUT(F, S, N)	_IO_sputn ((F), (S), (N))
-# define PAD(Padchar) \
-  do {									      \
-    if (width > 0)							      \
-      {									      \
-	ssize_t written = _IO_padn (s, (Padchar), width);		      \
-	if (__glibc_unlikely (written != width))			      \
-	  {								      \
-	    done = -1;							      \
-	    goto all_done;						      \
-	  }								      \
-	done_add (written);						      \
-      }									      \
-  } while (0)
 # define PUTC(C, F)	_IO_putc_unlocked (C, F)
 # define ORIENT		if (_IO_vtable_offset (s) == 0 && _IO_fwide (s, -1) != -1)\
 			  return -1
+# define CONVERT_FROM_OTHER_STRING __wcsrtombs
 #else
 # define vfprintf	__vfwprintf_internal
 # define CHAR_T		wchar_t
+# define OTHER_CHAR_T   char
 /* This is a hack!!!  There should be a type uwchar_t.  */
 # define UCHAR_T	unsigned int /* uwchar_t */
 # define INT_T		wint_t
@@ -173,21 +179,9 @@ typedef wchar_t THOUSANDS_SEP_T;
 # include <_itowa.h>
 
 # define PUT(F, S, N)	_IO_sputn ((F), (S), (N))
-# define PAD(Padchar) \
-  do {									      \
-    if (width > 0)							      \
-      {									      \
-	ssize_t written = _IO_wpadn (s, (Padchar), width);		      \
-	if (__glibc_unlikely (written != width))			      \
-	  {								      \
-	    done = -1;							      \
-	    goto all_done;						      \
-	  }								      \
-	done_add (written);						      \
-      }									      \
-  } while (0)
 # define PUTC(C, F)	_IO_putwc_unlocked (C, F)
 # define ORIENT		if (_IO_fwide (s, 1) != 1) return -1
+# define CONVERT_FROM_OTHER_STRING __mbsrtowcs
 
 # undef _itoa
 # define _itoa(Val, Buf, Base, Case) _itowa (Val, Buf, Base, Case)
@@ -196,6 +190,33 @@ typedef wchar_t THOUSANDS_SEP_T;
 # define EOF WEOF
 #endif
 
+static inline int
+pad_func (FILE *s, CHAR_T padchar, int width, int done)
+{
+  if (width > 0)
+    {
+      ssize_t written;
+#ifndef COMPILE_WPRINTF
+      written = _IO_padn (s, padchar, width);
+#else
+      written = _IO_wpadn (s, padchar, width);
+#endif
+      if (__glibc_unlikely (written != width))
+	return -1;
+      return done_add_func (width, done);
+    }
+  return done;
+}
+
+#define PAD(Padchar)							\
+  do									\
+    {									\
+      done = pad_func (s, (Padchar), width, done);			\
+      if (done < 0)							\
+	goto all_done;							\
+    }									\
+  while (0)
+
 #include "_i18n_number.h"
 
 /* Include the shared code for parsing the format string.  */
@@ -215,24 +236,115 @@ typedef wchar_t THOUSANDS_SEP_T;
     }									      \
   while (0)
 
-#define outstring(String, Len)						      \
-  do									      \
-    {									      \
-      assert ((size_t) done <= (size_t) INT_MAX);			      \
-      if ((size_t) PUT (s, (String), (Len)) != (size_t) (Len))		      \
-	{								      \
-	  done = -1;							      \
-	  goto all_done;						      \
-	}								      \
-      if (__glibc_unlikely (INT_MAX - done < (Len)))			      \
-      {									      \
-	done = -1;							      \
-	 __set_errno (EOVERFLOW);					      \
-	goto all_done;							      \
-      }									      \
-      done += (Len);							      \
-    }									      \
-  while (0)
+static inline int
+outstring_func (FILE *s, const UCHAR_T *string, size_t length, int done)
+{
+  assert ((size_t) done <= (size_t) INT_MAX);
+  if ((size_t) PUT (s, string, length) != (size_t) (length))
+    return -1;
+  return done_add_func (length, done);
+}
+
+#define outstring(String, Len)						\
+  do									\
+    {									\
+      const void *string_ = (String);					\
+      done = outstring_func (s, string_, (Len), done);			\
+      if (done < 0)							\
+	goto all_done;							\
+    }									\
+   while (0)
+
+/* Write the string SRC to S.  If PREC is non-negative, write at most
+   PREC bytes.  If LEFT is true, perform left justification.  */
+static int
+outstring_converted_wide_string (FILE *s, const OTHER_CHAR_T *src, int prec,
+				 int width, bool left, int done)
+{
+  /* Use a small buffer to combine processing of multiple characters.
+     CONVERT_FROM_OTHER_STRING expects the buffer size in (wide)
+     characters, and buf_length counts that.  */
+  enum { buf_length = 256 / sizeof (CHAR_T) };
+  CHAR_T buf[buf_length];
+  _Static_assert (sizeof (buf) > MB_LEN_MAX,
+		  "buffer is large enough for a single multi-byte character");
+
+  /* Add the initial padding if needed.  */
+  if (width > 0 && !left)
+    {
+      /* Make a first pass to find the output width, so that we can
+	 add the required padding.  */
+      mbstate_t mbstate = { 0 };
+      const OTHER_CHAR_T *src_copy = src;
+      size_t total_written;
+      if (prec < 0)
+	total_written = CONVERT_FROM_OTHER_STRING
+	  (NULL, &src_copy, 0, &mbstate);
+      else
+	{
+	  /* The source might not be null-terminated.  Enforce the
+	     limit manually, based on the output length.  */
+	  total_written = 0;
+	  size_t limit = prec;
+	  while (limit > 0 && src_copy != NULL)
+	    {
+	      size_t write_limit = buf_length;
+	      if (write_limit > limit)
+		write_limit = limit;
+	      size_t written = CONVERT_FROM_OTHER_STRING
+		(buf, &src_copy, write_limit, &mbstate);
+	      if (written == (size_t) -1)
+		return -1;
+	      if (written == 0)
+		break;
+	      total_written += written;
+	      limit -= written;
+	    }
+	}
+
+      /* Output initial padding.  */
+      if (total_written < width)
+	{
+	  done = pad_func (s, L_(' '), width - total_written, done);
+	  if (done < 0)
+	    return done;
+	}
+    }
+
+  /* Convert the input string, piece by piece.  */
+  size_t total_written = 0;
+  {
+    mbstate_t mbstate = { 0 };
+    /* If prec is negative, remaining is not decremented, otherwise,
+      it serves as the write limit.  */
+    size_t remaining = -1;
+    if (prec >= 0)
+      remaining = prec;
+    while (remaining > 0 && src != NULL)
+      {
+	size_t write_limit = buf_length;
+	if (remaining < write_limit)
+	  write_limit = remaining;
+	size_t written = CONVERT_FROM_OTHER_STRING
+	  (buf, &src, write_limit, &mbstate);
+	if (written == (size_t) -1)
+	  return -1;
+	if (written == 0)
+	  break;
+	done = outstring_func (s, (const UCHAR_T *) buf, written, done);
+	if (done < 0)
+	  return done;
+	total_written += written;
+	if (prec >= 0)
+	  remaining -= written;
+      }
+  }
+
+  /* Add final padding.  */
+  if (width > 0 && left && total_written < width)
+    return pad_func (s, L_(' '), width - total_written, done);
+  return done;
+}
 
 /* For handling long_double and longlong we use the same flag.  If
    `long' and `long long' are effectively the same type define it to
@@ -1022,7 +1134,6 @@ static const uint8_t jump_table[] =
     LABEL (form_string):						      \
       {									      \
 	size_t len;							      \
-	int string_malloced;						      \
 									      \
 	/* The string argument could in fact be `char *' or `wchar_t *'.      \
 	   But this should not make a difference here.  */		      \
@@ -1034,7 +1145,6 @@ static const uint8_t jump_table[] =
 	/* Entry point for printing other strings.  */			      \
       LABEL (print_string):						      \
 									      \
-	string_malloced = 0;						      \
 	if (string == NULL)						      \
 	  {								      \
 	    /* Write "(null)" if there's space.  */			      \
@@ -1051,41 +1161,12 @@ static const uint8_t jump_table[] =
 	  }								      \
 	else if (!is_long && spec != L_('S'))				      \
 	  {								      \
-	    /* This is complicated.  We have to transform the multibyte	      \
-	       string into a wide character string.  */			      \
-	    const char *mbs = (const char *) string;			      \
-	    mbstate_t mbstate;						      \
-									      \
-	    len = prec != -1 ? __strnlen (mbs, (size_t) prec) : strlen (mbs); \
-									      \
-	    /* Allocate dynamically an array which definitely is long	      \
-	       enough for the wide character version.  Each byte in the	      \
-	       multi-byte string can produce at most one wide character.  */  \
-	    if (__glibc_unlikely (len > SIZE_MAX / sizeof (wchar_t)))	      \
-	      {								      \
-		__set_errno (EOVERFLOW);				      \
-		done = -1;						      \
-		goto all_done;						      \
-	      }								      \
-	    else if (__libc_use_alloca (len * sizeof (wchar_t)))	      \
-	      string = (CHAR_T *) alloca (len * sizeof (wchar_t));	      \
-	    else if ((string = (CHAR_T *) malloc (len * sizeof (wchar_t)))    \
-		     == NULL)						      \
-	      {								      \
-		done = -1;						      \
-		goto all_done;						      \
-	      }								      \
-	    else							      \
-	      string_malloced = 1;					      \
-									      \
-	    memset (&mbstate, '\0', sizeof (mbstate_t));		      \
-	    len = __mbsrtowcs (string, &mbs, len, &mbstate);		      \
-	    if (len == (size_t) -1)					      \
-	      {								      \
-		/* Illegal multibyte character.  */			      \
-		done = -1;						      \
-		goto all_done;						      \
-	      }								      \
+	    done = outstring_converted_wide_string			      \
+	      (s, (const char *) string, prec, width, left, done);	      \
+	    if (done < 0)						      \
+	      goto all_done;						      \
+	    /* The padding has already been written.  */		      \
+	    break;							      \
 	  }								      \
 	else								      \
 	  {								      \
@@ -1108,8 +1189,6 @@ static const uint8_t jump_table[] =
 	outstring (string, len);					      \
 	if (left)							      \
 	  PAD (L' ');							      \
-	if (__glibc_unlikely (string_malloced))				      \
-	  free (string);						      \
       }									      \
       break;
 #else
@@ -1158,7 +1237,6 @@ static const uint8_t jump_table[] =
     LABEL (form_string):						      \
       {									      \
 	size_t len;							      \
-	int string_malloced;						      \
 									      \
 	/* The string argument could in fact be `char *' or `wchar_t *'.      \
 	   But this should not make a difference here.  */		      \
@@ -1170,7 +1248,6 @@ static const uint8_t jump_table[] =
 	/* Entry point for printing other strings.  */			      \
       LABEL (print_string):						      \
 									      \
-	string_malloced = 0;						      \
 	if (string == NULL)						      \
 	  {								      \
 	    /* Write "(null)" if there's space.  */			      \
@@ -1196,51 +1273,12 @@ static const uint8_t jump_table[] =
 	  }								      \
 	else								      \
 	  {								      \
-	    const wchar_t *s2 = (const wchar_t *) string;		      \
-	    mbstate_t mbstate;						      \
-									      \
-	    memset (&mbstate, '\0', sizeof (mbstate_t));		      \
-									      \
-	    if (prec >= 0)						      \
-	      {								      \
-		/* The string `s2' might not be NUL terminated.  */	      \
-		if (__libc_use_alloca (prec))				      \
-		  string = (char *) alloca (prec);			      \
-		else if ((string = (char *) malloc (prec)) == NULL)	      \
-		  {							      \
-		    done = -1;						      \
-		    goto all_done;					      \
-		  }							      \
-		else							      \
-		  string_malloced = 1;					      \
-		len = __wcsrtombs (string, &s2, prec, &mbstate);	      \
-	      }								      \
-	    else							      \
-	      {								      \
-		len = __wcsrtombs (NULL, &s2, 0, &mbstate);		      \
-		if (len != (size_t) -1)					      \
-		  {							      \
-		    assert (__mbsinit (&mbstate));			      \
-		    s2 = (const wchar_t *) string;			      \
-		    if (__libc_use_alloca (len + 1))			      \
-		      string = (char *) alloca (len + 1);		      \
-		    else if ((string = (char *) malloc (len + 1)) == NULL)    \
-		      {							      \
-			done = -1;					      \
-			goto all_done;					      \
-		      }							      \
-		    else						      \
-		      string_malloced = 1;				      \
-		    (void) __wcsrtombs (string, &s2, len + 1, &mbstate);      \
-		  }							      \
-	      }								      \
-									      \
-	    if (len == (size_t) -1)					      \
-	      {								      \
-		/* Illegal wide-character string.  */			      \
-		done = -1;						      \
-		goto all_done;						      \
-	      }								      \
+	    done = outstring_converted_wide_string			      \
+	      (s, (const wchar_t *) string, prec, width, left, done);	      \
+	    if (done < 0)						      \
+	      goto all_done;						      \
+	    /* The padding has already been written.  */		      \
+	    break;							      \
 	  }								      \
 									      \
 	if ((width -= len) < 0)						      \
@@ -1254,8 +1292,6 @@ static const uint8_t jump_table[] =
 	outstring (string, len);					      \
 	if (left)							      \
 	  PAD (' ');							      \
-	if (__glibc_unlikely (string_malloced))			              \
-	  free (string);						      \
       }									      \
       break;
 #endif