about summary refs log tree commit diff
diff options
context:
space:
mode:
authorTulio Magno Quites Machado Filho <tuliom@linux.vnet.ibm.com>2015-04-16 11:02:14 -0300
committerTulio Magno Quites Machado Filho <tuliom@linux.vnet.ibm.com>2015-04-16 11:02:14 -0300
commit072e4453edcad72cf72cdb46b07d98e088c60c55 (patch)
treeda163ad435fa1b38680b4868633fee9efd01ffad
parentae75fdc50acbdcf5bfda6c3151c537f2ab8ce637 (diff)
parent4d54424420c6300efbf57a7b9aa8635a8b8c1942 (diff)
downloadglibc-072e4453edcad72cf72cdb46b07d98e088c60c55.tar.gz
glibc-072e4453edcad72cf72cdb46b07d98e088c60c55.tar.xz
glibc-072e4453edcad72cf72cdb46b07d98e088c60c55.zip
Merge release/2.20/master into ibm/2.20/master
Conflicts:
	NEWS
	stdio-common/Makefile
-rw-r--r--ChangeLog142
-rw-r--r--NEWS8
-rw-r--r--locale/weight.h13
-rw-r--r--locale/weightwc.h13
-rw-r--r--localedata/sort-test.sh7
-rw-r--r--localedata/xfrm-test.c52
-rw-r--r--misc/sys/cdefs.h10
-rw-r--r--nscd/nscd.c45
-rw-r--r--nscd/nscd_conf.c3
-rw-r--r--po/fr.po4
-rw-r--r--posix/fnmatch.c8
-rw-r--r--posix/fnmatch_loop.c17
-rw-r--r--posix/regcomp.c10
-rw-r--r--posix/regex_internal.h7
-rw-r--r--posix/regexec.c8
-rw-r--r--stdio-common/Makefile2
-rw-r--r--string/strcoll_l.c9
-rw-r--r--string/strxfrm_l.c491
-rw-r--r--sysdeps/aarch64/start.S1
-rw-r--r--sysdeps/unix/sysv/linux/mips/vfork.S1
-rw-r--r--sysdeps/x86_64/multiarch/init-arch.c9
-rw-r--r--sysdeps/x86_64/multiarch/init-arch.h4
-rw-r--r--sysdeps/x86_64/multiarch/memcpy.S2
-rw-r--r--sysdeps/x86_64/multiarch/memcpy_chk.S2
-rw-r--r--sysdeps/x86_64/multiarch/memmove.c2
-rw-r--r--sysdeps/x86_64/multiarch/memmove_chk.c2
-rw-r--r--sysdeps/x86_64/multiarch/mempcpy.S2
-rw-r--r--sysdeps/x86_64/multiarch/mempcpy_chk.S2
28 files changed, 704 insertions, 172 deletions
diff --git a/ChangeLog b/ChangeLog
index f856a8aca2..b19db3c9db 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -310,6 +310,148 @@
 	* sysdeps/powerpc/powerpc64/power6/memset.S: Likewise.
 	* sysdeps/powerpc/powerpc64/power7/memset.S: Likewise.
 
+2015-02-16  Paul Pluzhnikov  <ppluzhnikov@google.com>
+
+	[BZ #16618]
+	* stdio-common/tst-sscanf.c (main): Test for buffer overflow.
+	* stdio-common/vfscanf.c (_IO_vfscanf_internal): Compute needed
+	size in bytes. Store needed elements in wpmax. Use needed size
+	in bytes for extend_alloca.
+
+2015-02-16  H.J. Lu  <hongjiu.lu@intel.com>
+
+	[BZ #17801]
+	* sysdeps/x86_64/multiarch/init-arch.c (__init_cpu_features):
+	Set the bit_AVX_Fast_Unaligned_Load bit for AVX2.
+	* sysdeps/x86_64/multiarch/init-arch.h (bit_AVX_Fast_Unaligned_Load):
+	New.
+	(index_AVX_Fast_Unaligned_Load): Likewise.
+	(HAS_AVX_FAST_UNALIGNED_LOAD): Likewise.
+	* sysdeps/x86_64/multiarch/memcpy.S (__new_memcpy): Check the
+	bit_AVX_Fast_Unaligned_Load bit instead of the bit_AVX_Usable bit.
+	* sysdeps/x86_64/multiarch/memcpy_chk.S (__memcpy_chk): Likewise.
+	* sysdeps/x86_64/multiarch/mempcpy.S (__mempcpy): Likewise.
+	* sysdeps/x86_64/multiarch/mempcpy_chk.S (__mempcpy_chk): Likewise.
+	* sysdeps/x86_64/multiarch/memmove.c (__libc_memmove): Replace
+	HAS_AVX with HAS_AVX_FAST_UNALIGNED_LOAD.
+	* sysdeps/x86_64/multiarch/memmove_chk.c (__memmove_chk): Likewise.
+
+2015-02-16  Leonhard Holz  <leonhard.holz@web.de>
+
+	[BZ #16009]
+	* string/strxfrm_l.c (STRXFRM): Allocate fixed size cache for
+	weights and rules. Use do_xfrm_cached if data fits in cache,
+	do_xfrm otherwise.  Moved former main loop to...
+	* (do_xfrm_cached): New function.
+	* (do_xfrm): Non-caching version of do_xfrm_cached. Uses
+	find_idx, find_position and stack_push.
+	* (find_idx): New function.
+	* (find_position): Likewise.
+	* localedata/sort-test.sh: Added test run for do_xfrm.
+	* localedata/xfrm-test.c (main): Added command line option
+	-nocache to run the test with strings that are too large for
+	the STRXFRM cache.
+
+2015-02-16  Kostya Serebryany  <konstantin.s.serebryany@gmail.com>
+	    Roland McGrath  <roland@hack.frob.com>
+
+	* locale/weight.h: Add include guard.
+	(findidx): Make static rather than auto; take new parameters
+	TABLE, INDIRECT, and EXTRA instead of getting them as outer locals.
+	* locale/weightwc.h: Likewise.
+	* posix/fnmatch_loop.c
+	(FCT): Change type of EXTRA from int32_t to wint_t.
+	Don't include either header inside the function.
+	Call FINDIDX rather than findidx, and pass new arguments.
+	#undef FINDIDX at the end of the file.
+	* posix/fnmatch.c [_LIBC]: #include <locale/weight.h> and define
+	FINDIDX before including fnmatch_loop.c for the non-wide version.
+	[_LIBC] [HANDLE_MULTIBYTE]: #define findidx to findidxwc around
+	#include <locale/weightwc.h>, and define FINDIDX to findidxwc
+	for the wide version.
+	* posix/regcomp.c [_LIBC]: #include <locale/weight.h>.
+	(build_equiv_class) [_LIBC]: Don't #include it inside the function.
+	Pass new arguments to findidx.
+	* posix/regexec.c [RE_ENABLE_I18N] [_LIBC]: #include <locale/weight.h>.
+	[RE_ENABLE_I18N] (check_node_accept_bytes) [_LIBC]:
+	Don't #include it inside the function.  Pass new arguments to findidx.
+	* posix/regex_internal.h
+	[!NOT_IN_libc] [_LIBC]: #include <locale/weight.h>.
+	(re_string_elem_size_at): Don't #include it inside the function.
+	Pass new arguments to findidx.
+	* string/strcoll_l.c: #include WEIGHT_H at top level.
+	(get_next_seq): Don't #include it inside the function.
+	Pass new arguments to findidx.
+	(get_next_seq_nocache): Likewise.
+	* string/strxfrm_l.c: #include WEIGHT_H at top level.
+	(STRXFRM): Don't #include it inside the function.
+	Pass new arguments to findidx.
+
+2014-12-16  Florian Weimer  <fweimer@redhat.com>
+
+	[BZ #17630]
+	* resolv/nss_dns/dns-network.c (getanswer_r): Iterate over alias
+	names.
+
+2014-12-15  Jeff Law  <law@redhat.com>
+
+	[BZ #16617]
+	* stdio-common/vfprintf.c (vfprintf): Allocate large specs array
+	on the heap.  (CVE-2012-3406)
+	* stdio-common/bug23-2.c, stdio-common/bug23-3.c: New file.
+	* stdio-common/bug23-4.c: New file.  Test case by Joseph Myers.
+	* stdio-common/Makefile (tests): Add bug23-2, bug23-3, bug23-4.
+
+2014-11-24  Siddhesh Poyarekar  <siddhesh@redhat.com>
+
+	[BZ #17266]
+	* misc/sys/cdefs.h: Define __extern_always_inline for clang
+	4.2 and newer.
+
+2014-11-19  Carlos O'Donell  <carlos@redhat.com>
+	    Florian Weimer  <fweimer@redhat.com>
+	    Joseph Myers  <joseph@codesourcery.com>
+	    Adam Conrad  <adconrad@0c3.net>
+	    Andreas Schwab  <schwab@suse.de>
+	    Brooks  <bmoses@google.com>
+
+	[BZ #17625]
+	* wordexp-test.c (__dso_handle): Add prototype.
+	(__register_atfork): Likewise.
+	(__app_register_atfork): New function.
+	(registered_forks): New global.
+	(register_fork): New function.
+	(test_case): Add 3 new tests for WRDE_CMDSUB.
+	(main): Call __app_register_atfork.
+	(testit): If WRDE_NOCMD set registered_forks to zero, run test, and if
+	fork count is non-zero fail the test.
+	* posix/wordexp.c (exec_comm): Return WRDE_CMDSUB if WRDE_NOCMD flag
+	is set.
+	(parse_dollars): Remove check for WRDE_NOCMD.
+	(parse_dquote): Likewise.
+
+2014-11-10  Renlin Li  <Renlin.Li@arm.com>
+
+	[BZ #17555]
+	* sysdeps/aarch64/start.S (_start): Delete x29 overwritten assignment.
+
+2014-10-22  Maciej W. Rozycki  <macro@codesourcery.com>
+
+	[BZ #17485]
+	* sysdeps/unix/sysv/linux/mips/vfork.S (__libc_vfork): Define.
+
+2014-10-08  Roland McGrath  <roland@hack.frob.com>
+
+	[BZ #17460]
+	* nscd/nscd.c (more_help): Rewrite list of tables collection
+	using xstrdup and asprintf.
+
+	* nscd/nscd_conf.c: Remove local xstrdup declaration.
+
+2014-10-09  Allan McRae  <allan@archlinux.org>
+
+	* po/fr.po: Update French translation from translation project.
+
 2014-09-16  Siddhesh Poyarekar  <siddhesh@redhat.com>
 
 	[BZ #17370]
diff --git a/NEWS b/NEWS
index 60ece7dab6..4d94c08377 100644
--- a/NEWS
+++ b/NEWS
@@ -9,7 +9,8 @@ Version 2.20.1
 
 * The following bugs are resolved with this release:
 
-  16617, 16618, 17266, 17370, 17371, 17625, 17630, 18104.
+  16009, 16617, 16618, 17266, 17370, 17371, 17460, 17485, 17555, 17625,
+  17630, 17801.
 
 * Added support for TSX lock elision of pthread mutexes on powerpc32, powerpc64
   and powerpc64le.  This may improve lock scaling of existing programs on
@@ -22,6 +23,11 @@ Version 2.20.1
   implementations for powerpc64/powerpc64le.
   Implemented by Adhemerval Zanella (IBM).
 
+* CVE-2015-1472 Under certain conditions wscanf can allocate too little
+  memory for the to-be-scanned arguments and overflow the allocated
+  buffer.  The implementation now correctly computes the required buffer
+  size when using malloc.
+
 * CVE-2104-7817 The wordexp function could ignore the WRDE_NOCMD flag
   under certain input conditions resulting in the execution of a shell for
   command substitution when the applicaiton did not request it. The
diff --git a/locale/weight.h b/locale/weight.h
index 9eb8ac666a..9d93fdc5c4 100644
--- a/locale/weight.h
+++ b/locale/weight.h
@@ -16,10 +16,15 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
+#ifndef _WEIGHT_H_
+#define _WEIGHT_H_	1
+
 /* Find index of weight.  */
-auto inline int32_t
-__attribute ((always_inline))
-findidx (const unsigned char **cpp, size_t len)
+static inline int32_t __attribute__ ((always_inline))
+findidx (const int32_t *table,
+	 const int32_t *indirect,
+	 const unsigned char *extra,
+	 const unsigned char **cpp, size_t len)
 {
   int_fast32_t i = table[*(*cpp)++];
   const unsigned char *cp;
@@ -130,3 +135,5 @@ findidx (const unsigned char **cpp, size_t len)
   /* NOTREACHED */
   return 0x43219876;
 }
+
+#endif	/* weight.h */
diff --git a/locale/weightwc.h b/locale/weightwc.h
index 8f047e3ba7..0f70b00658 100644
--- a/locale/weightwc.h
+++ b/locale/weightwc.h
@@ -16,10 +16,15 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
+#ifndef _WEIGHTWC_H_
+#define _WEIGHTWC_H_	1
+
 /* Find index of weight.  */
-auto inline int32_t
-__attribute ((always_inline))
-findidx (const wint_t **cpp, size_t len)
+static inline int32_t __attribute__ ((always_inline))
+findidx (const int32_t *table,
+	 const int32_t *indirect,
+	 const wint_t *extra,
+	 const wint_t **cpp, size_t len)
 {
   wint_t ch = *(*cpp)++;
   int32_t i = __collidx_table_lookup ((const char *) table, ch);
@@ -109,3 +114,5 @@ findidx (const wint_t **cpp, size_t len)
   /* NOTREACHED */
   return 0x43219876;
 }
+
+#endif	/* weightwc.h */
diff --git a/localedata/sort-test.sh b/localedata/sort-test.sh
index e37129a032..3cb57fb45c 100644
--- a/localedata/sort-test.sh
+++ b/localedata/sort-test.sh
@@ -53,11 +53,18 @@ for l in $lang; do
    ${common_objpfx}localedata/xfrm-test $id < $cns.in \
    > ${common_objpfx}localedata/$cns.xout || here=1
   cmp -s $cns.in ${common_objpfx}localedata/$cns.xout || here=1
+  ${test_program_prefix_before_env} \
+   ${run_program_env} \
+   LC_ALL=$l ${test_program_prefix_after_env} \
+   ${common_objpfx}localedata/xfrm-test $id -nocache < $cns.in \
+   > ${common_objpfx}localedata/$cns.nocache.xout || here=1
+  cmp -s $cns.in ${common_objpfx}localedata/$cns.nocache.xout || here=1
   if test $here -eq 0; then
     echo "$l xfrm-test OK"
   else
     echo "$l xfrm-test FAIL"
     diff -u $cns.in ${common_objpfx}localedata/$cns.xout | sed 's/^/  /'
+    diff -u $cns.in ${common_objpfx}localedata/$cns.nocache.xout | sed 's/^/  /'
     status=1
   fi
 done
diff --git a/localedata/xfrm-test.c b/localedata/xfrm-test.c
index d2aba7d26e..5cf29f60c9 100644
--- a/localedata/xfrm-test.c
+++ b/localedata/xfrm-test.c
@@ -23,7 +23,10 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <stdbool.h>
 
+/* Keep in sync with string/strxfrm_l.c.  */
+#define SMALL_STR_SIZE 4095
 
 struct lines
 {
@@ -37,6 +40,7 @@ int
 main (int argc, char *argv[])
 {
   int result = 0;
+  bool nocache = false;
   size_t nstrings, nstrings_max;
   struct lines *strings;
   char *line = NULL;
@@ -44,7 +48,18 @@ main (int argc, char *argv[])
   size_t n;
 
   if (argc < 2)
-    error (1, 0, "usage: %s <random seed>", argv[0]);
+    error (1, 0, "usage: %s <random seed> [-nocache]", argv[0]);
+
+  if (argc == 3)
+    {
+      if (strcmp (argv[2], "-nocache") == 0)
+	nocache = true;
+      else
+	{
+	  printf ("Unknown option %s!\n", argv[2]);
+	  exit (1);
+	}
+    }
 
   setlocale (LC_ALL, "");
 
@@ -59,9 +74,9 @@ main (int argc, char *argv[])
 
   while (1)
     {
-      char saved, *newp;
-      int needed;
-      int l;
+      char saved, *word, *newp;
+      size_t l, line_len, needed;
+
       if (getline (&line, &len, stdin) < 0)
 	break;
 
@@ -83,10 +98,35 @@ main (int argc, char *argv[])
 
       saved = line[l];
       line[l] = '\0';
-      needed = strxfrm (NULL, line, 0);
+
+      if (nocache)
+	{
+	  line_len = strlen (line);
+	  word = malloc (line_len + SMALL_STR_SIZE + 1);
+	  if (word == NULL)
+	    {
+	      printf ("malloc failed: %m\n");
+	      exit (1);
+	    }
+	  memset (word, ' ', SMALL_STR_SIZE);
+	  memcpy (word + SMALL_STR_SIZE, line, line_len);
+	  word[line_len + SMALL_STR_SIZE] = '\0';
+	}
+      else
+        word = line;
+
+      needed = strxfrm (NULL, word, 0);
       newp = malloc (needed + 1);
-      strxfrm (newp, line, needed + 1);
+      if (newp == NULL)
+	{
+	  printf ("malloc failed: %m\n");
+	  exit (1);
+	}
+      strxfrm (newp, word, needed + 1);
       strings[nstrings].xfrm = newp;
+
+      if (nocache)
+	free (word);
       line[l] = saved;
       ++nstrings;
     }
diff --git a/misc/sys/cdefs.h b/misc/sys/cdefs.h
index 01e81ba9f1..711ac1d912 100644
--- a/misc/sys/cdefs.h
+++ b/misc/sys/cdefs.h
@@ -321,8 +321,14 @@
    inline semantics, unless -fgnu89-inline is used.  Using __GNUC_STDC_INLINE__
    or __GNUC_GNU_INLINE is not a good enough check for gcc because gcc versions
    older than 4.3 may define these macros and still not guarantee GNU inlining
-   semantics.  */
-#if !defined __cplusplus || __GNUC_PREREQ (4,3)
+   semantics.
+
+   clang++ identifies itself as gcc-4.2, but has support for GNU inlining
+   semantics, that can be checked fot by using the __GNUC_STDC_INLINE_ and
+   __GNUC_GNU_INLINE__ macro definitions.  */
+#if (!defined __cplusplus || __GNUC_PREREQ (4,3) \
+     || (defined __clang__ && (defined __GNUC_STDC_INLINE__ \
+			       || defined __GNUC_GNU_INLINE__)))
 # if defined __GNUC_STDC_INLINE__ || defined __cplusplus
 #  define __extern_inline extern __inline __attribute__ ((__gnu_inline__))
 #  define __extern_always_inline \
diff --git a/nscd/nscd.c b/nscd/nscd.c
index 7131ead8cb..b7704b37f8 100644
--- a/nscd/nscd.c
+++ b/nscd/nscd.c
@@ -451,33 +451,36 @@ parse_opt (int key, char *arg, struct argp_state *state)
 static char *
 more_help (int key, const char *text, void *input)
 {
-  char *tables, *tp = NULL;
-
   switch (key)
     {
     case ARGP_KEY_HELP_EXTRA:
       {
-	dbtype cnt;
+	/* We print some extra information.  */
 
-	tables = xmalloc (sizeof (dbnames) + 1);
-	for (cnt = 0; cnt < lastdb; cnt++)
+	char *tables = xstrdup (dbnames[0]);
+	for (dbtype i = 1; i < lastdb; ++i)
 	  {
-	    strcat (tables, dbnames[cnt]);
-	    strcat (tables, " ");
+	    char *more_tables;
+	    if (asprintf (&more_tables, "%s %s", tables, dbnames[i]) < 0)
+	      more_tables = NULL;
+	    free (tables);
+	    if (more_tables == NULL)
+	      return NULL;
+	    tables = more_tables;
 	  }
-      }
 
-      /* We print some extra information.  */
-      if (asprintf (&tp, gettext ("\
+	char *tp;
+	if (asprintf (&tp, gettext ("\
 Supported tables:\n\
 %s\n\
 \n\
 For bug reporting instructions, please see:\n\
 %s.\n\
 "), tables, REPORT_BUGS_TO) < 0)
-	tp = NULL;
-      free (tables);
-      return tp;
+	  tp = NULL;
+	free (tables);
+	return tp;
+      }
 
     default:
       break;
@@ -622,15 +625,15 @@ monitor_child (int fd)
 	}
 
       if (WIFEXITED (status))
-        {
-          child_ret = WEXITSTATUS (status);
-          fprintf (stderr, _("child exited with status %d\n"), child_ret);
-        }
+	{
+	  child_ret = WEXITSTATUS (status);
+	  fprintf (stderr, _("child exited with status %d\n"), child_ret);
+	}
       if (WIFSIGNALED (status))
-        {
-          child_ret = WTERMSIG (status);
-          fprintf (stderr, _("child terminated by signal %d\n"), child_ret);
-        }
+	{
+	  child_ret = WTERMSIG (status);
+	  fprintf (stderr, _("child terminated by signal %d\n"), child_ret);
+	}
     }
 
   /* We have the child status, so exit with that code.  */
diff --git a/nscd/nscd_conf.c b/nscd/nscd_conf.c
index 7856ed9b5a..c8e194d3e2 100644
--- a/nscd/nscd_conf.c
+++ b/nscd/nscd_conf.c
@@ -32,9 +32,6 @@
 #include "dbg_log.h"
 #include "nscd.h"
 
-/* Wrapper functions with error checking for standard functions.  */
-extern char *xstrdup (const char *s);
-
 
 /* Names of the databases.  */
 const char *const dbnames[lastdb] =
diff --git a/po/fr.po b/po/fr.po
index fbf564a69d..bb7a9240c9 100644
--- a/po/fr.po
+++ b/po/fr.po
@@ -6673,11 +6673,11 @@ msgstr "Erreur d'entrée/sortie sur l'hôte cible"
 
 #: sysdeps/gnu/errlist.c:1399
 msgid "No medium found"
-msgstr "Aucun medium trouvé"
+msgstr "Aucun médium trouvé"
 
 #: sysdeps/gnu/errlist.c:1407
 msgid "Wrong medium type"
-msgstr "Mauvais type de medium"
+msgstr "Mauvais type de médium"
 
 #: sysdeps/gnu/errlist.c:1415
 msgid "Required key not available"
diff --git a/posix/fnmatch.c b/posix/fnmatch.c
index c330a122ab..85a6ec2263 100644
--- a/posix/fnmatch.c
+++ b/posix/fnmatch.c
@@ -221,6 +221,8 @@ __wcschrnul (s, c)
 # define MEMCHR(S, C, N) memchr (S, C, N)
 # define STRCOLL(S1, S2) strcoll (S1, S2)
 # define WIDE_CHAR_VERSION 0
+# include <locale/weight.h>
+# define FINDIDX findidx
 # include "fnmatch_loop.c"
 
 
@@ -246,6 +248,12 @@ __wcschrnul (s, c)
 #  define MEMCHR(S, C, N) wmemchr (S, C, N)
 #  define STRCOLL(S1, S2) wcscoll (S1, S2)
 #  define WIDE_CHAR_VERSION 1
+/* Change the name the header defines so it doesn't conflict with
+   the <locale/weight.h> version included above.  */
+#  define findidx findidxwc
+#  include <locale/weightwc.h>
+#  undef findidx
+#  define FINDIDX findidxwc
 
 #  undef IS_CHAR_CLASS
 /* We have to convert the wide character string in a multibyte string.  But
diff --git a/posix/fnmatch_loop.c b/posix/fnmatch_loop.c
index 1957397d24..db6d9d7c56 100644
--- a/posix/fnmatch_loop.c
+++ b/posix/fnmatch_loop.c
@@ -376,7 +376,7 @@ FCT (pattern, string, string_end, no_leading_period, flags, ends, alloca_used)
 			const int32_t *table;
 # if WIDE_CHAR_VERSION
 			const int32_t *weights;
-			const int32_t *extra;
+			const wint_t *extra;
 # else
 			const unsigned char *weights;
 			const unsigned char *extra;
@@ -385,19 +385,12 @@ FCT (pattern, string, string_end, no_leading_period, flags, ends, alloca_used)
 			int32_t idx;
 			const UCHAR *cp = (const UCHAR *) str;
 
-			/* This #include defines a local function!  */
-# if WIDE_CHAR_VERSION
-#  include <locale/weightwc.h>
-# else
-#  include <locale/weight.h>
-# endif
-
 # if WIDE_CHAR_VERSION
 			table = (const int32_t *)
 			  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
 			weights = (const int32_t *)
 			  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
-			extra = (const int32_t *)
+			extra = (const wint_t *)
 			  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
 			indirect = (const int32_t *)
 			  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
@@ -412,7 +405,7 @@ FCT (pattern, string, string_end, no_leading_period, flags, ends, alloca_used)
 			  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
 # endif
 
-			idx = findidx (&cp, 1);
+			idx = FINDIDX (table, indirect, extra, &cp, 1);
 			if (idx != 0)
 			  {
 			    /* We found a table entry.  Now see whether the
@@ -422,7 +415,8 @@ FCT (pattern, string, string_end, no_leading_period, flags, ends, alloca_used)
 			    int32_t idx2;
 			    const UCHAR *np = (const UCHAR *) n;
 
-			    idx2 = findidx (&np, string_end - n);
+			    idx2 = FINDIDX (table, indirect, extra,
+					    &np, string_end - n);
 			    if (idx2 != 0
 				&& (idx >> 24) == (idx2 >> 24)
 				&& len == weights[idx2 & 0xffffff])
@@ -1277,3 +1271,4 @@ EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
 #undef L
 #undef BTOWC
 #undef WIDE_CHAR_VERSION
+#undef FINDIDX
diff --git a/posix/regcomp.c b/posix/regcomp.c
index 8f2747b3c9..897fe276a3 100644
--- a/posix/regcomp.c
+++ b/posix/regcomp.c
@@ -19,6 +19,10 @@
 
 #include <stdint.h>
 
+#ifdef _LIBC
+# include <locale/weight.h>
+#endif
+
 static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern,
 					  size_t length, reg_syntax_t syntax);
 static void re_compile_fastmap_iter (regex_t *bufp,
@@ -3426,8 +3430,6 @@ build_equiv_class (bitset_t sbcset, const unsigned char *name)
       int32_t idx1, idx2;
       unsigned int ch;
       size_t len;
-      /* This #include defines a local function!  */
-# include <locale/weight.h>
       /* Calculate the index for equivalence class.  */
       cp = name;
       table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
@@ -3437,7 +3439,7 @@ build_equiv_class (bitset_t sbcset, const unsigned char *name)
 						   _NL_COLLATE_EXTRAMB);
       indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
 						_NL_COLLATE_INDIRECTMB);
-      idx1 = findidx (&cp, -1);
+      idx1 = findidx (table, indirect, extra, &cp, -1);
       if (BE (idx1 == 0 || *cp != '\0', 0))
 	/* This isn't a valid character.  */
 	return REG_ECOLLATE;
@@ -3448,7 +3450,7 @@ build_equiv_class (bitset_t sbcset, const unsigned char *name)
 	{
 	  char_buf[0] = ch;
 	  cp = char_buf;
-	  idx2 = findidx (&cp, 1);
+	  idx2 = findidx (table, indirect, extra, &cp, 1);
 /*
 	  idx2 = table[ch];
 */
diff --git a/posix/regex_internal.h b/posix/regex_internal.h
index 4bbf6a8276..d1ed3dc46a 100644
--- a/posix/regex_internal.h
+++ b/posix/regex_internal.h
@@ -733,6 +733,10 @@ re_string_wchar_at (const re_string_t *pstr, int idx)
 }
 
 # ifndef NOT_IN_libc
+#  ifdef _LIBC
+#   include <locale/weight.h>
+#  endif
+
 static int
 internal_function __attribute__ ((pure, unused))
 re_string_elem_size_at (const re_string_t *pstr, int idx)
@@ -740,7 +744,6 @@ re_string_elem_size_at (const re_string_t *pstr, int idx)
 #  ifdef _LIBC
   const unsigned char *p, *extra;
   const int32_t *table, *indirect;
-#   include <locale/weight.h>
   uint_fast32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
 
   if (nrules != 0)
@@ -751,7 +754,7 @@ re_string_elem_size_at (const re_string_t *pstr, int idx)
       indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
 						_NL_COLLATE_INDIRECTMB);
       p = pstr->mbs + idx;
-      findidx (&p, pstr->len - idx);
+      findidx (table, indirect, extra, &p, pstr->len - idx);
       return p - pstr->mbs - idx;
     }
   else
diff --git a/posix/regexec.c b/posix/regexec.c
index 7032da75aa..c840b38fc3 100644
--- a/posix/regexec.c
+++ b/posix/regexec.c
@@ -3749,6 +3749,10 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
    one collating element like '.', '[a-z]', opposite to the other nodes
    can only accept one byte.  */
 
+# ifdef _LIBC
+#  include <locale/weight.h>
+# endif
+
 static int
 internal_function
 check_node_accept_bytes (const re_dfa_t *dfa, int node_idx,
@@ -3868,8 +3872,6 @@ check_node_accept_bytes (const re_dfa_t *dfa, int node_idx,
 	  const int32_t *table, *indirect;
 	  const unsigned char *weights, *extra;
 	  const char *collseqwc;
-	  /* This #include defines a local function!  */
-#  include <locale/weight.h>
 
 	  /* match with collating_symbol?  */
 	  if (cset->ncoll_syms)
@@ -3925,7 +3927,7 @@ check_node_accept_bytes (const re_dfa_t *dfa, int node_idx,
 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
 	      indirect = (const int32_t *)
 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
-	      int32_t idx = findidx (&cp, elem_len);
+	      int32_t idx = findidx (table, indirect, extra, &cp, elem_len);
 	      if (idx > 0)
 		for (i = 0; i < cset->nequiv_classes; ++i)
 		  {
diff --git a/stdio-common/Makefile b/stdio-common/Makefile
index 24e8496f75..e5e45b6135 100644
--- a/stdio-common/Makefile
+++ b/stdio-common/Makefile
@@ -57,7 +57,7 @@ tests := tstscanf test_rdwr test-popen tstgetln test-fseek \
 	 bug19 bug19a tst-popen2 scanf13 scanf14 scanf15 bug20 bug21 bug22 \
 	 scanf16 scanf17 tst-setvbuf1 tst-grouping bug23 bug24 \
 	 bug-vfprintf-nargs tst-long-dbl-fphex tst-fphex-wide tst-sprintf3 \
-	 bug25 tst-printf-round bug23-2 bug23-3 bug23-4
+	 bug25 tst-printf-round bug23-2 bug23-3 bug23-4 bug26
 
 test-srcs = tst-unbputc tst-printf
 
diff --git a/string/strcoll_l.c b/string/strcoll_l.c
index 10ce4a67ce..d4f42a32e5 100644
--- a/string/strcoll_l.c
+++ b/string/strcoll_l.c
@@ -41,6 +41,7 @@
 #define CONCAT1(a,b) a##b
 
 #include "../locale/localeinfo.h"
+#include WEIGHT_H
 
 /* Track status while looking for sequences in a string.  */
 typedef struct
@@ -152,7 +153,6 @@ get_next_seq (coll_seq *seq, int nrules, const unsigned char *rulesets,
 	      const USTRING_TYPE *weights, const int32_t *table,
 	      const USTRING_TYPE *extra, const int32_t *indirect)
 {
-#include WEIGHT_H
   size_t val = seq->val = 0;
   int len = seq->len;
   size_t backw_stop = seq->backw_stop;
@@ -194,7 +194,7 @@ get_next_seq (coll_seq *seq, int nrules, const unsigned char *rulesets,
 
 	  while (*us != L('\0'))
 	    {
-	      int32_t tmp = findidx (&us, -1);
+	      int32_t tmp = findidx (table, indirect, extra, &us, -1);
 	      rulearr[idxmax] = tmp >> 24;
 	      idxarr[idxmax] = tmp & 0xffffff;
 	      idxcnt = idxmax++;
@@ -242,7 +242,6 @@ get_next_seq_nocache (coll_seq *seq, int nrules, const unsigned char *rulesets,
 		      const USTRING_TYPE *extra, const int32_t *indirect,
 		      int pass)
 {
-#include WEIGHT_H
   size_t val = seq->val = 0;
   int len = seq->len;
   size_t backw_stop = seq->backw_stop;
@@ -285,7 +284,7 @@ get_next_seq_nocache (coll_seq *seq, int nrules, const unsigned char *rulesets,
 	      us = seq->back_us;
 	      while (i < backw)
 		{
-		  int32_t tmp = findidx (&us, -1);
+		  int32_t tmp = findidx (table, indirect, extra, &us, -1);
 		  idx = tmp & 0xffffff;
 		  i++;
 		}
@@ -300,7 +299,7 @@ get_next_seq_nocache (coll_seq *seq, int nrules, const unsigned char *rulesets,
 
 	  while (*us != L('\0'))
 	    {
-	      int32_t tmp = findidx (&us, -1);
+	      int32_t tmp = findidx (table, indirect, extra, &us, -1);
 	      unsigned char rule = tmp >> 24;
 	      prev_idx = idx;
 	      idx = tmp & 0xffffff;
diff --git a/string/strxfrm_l.c b/string/strxfrm_l.c
index 04b9338f05..95ffd6fdf1 100644
--- a/string/strxfrm_l.c
+++ b/string/strxfrm_l.c
@@ -40,8 +40,24 @@
 #define CONCAT(a,b) CONCAT1(a,b)
 #define CONCAT1(a,b) a##b
 
+/* Maximum string size that is calculated with cached indices.  Right now this
+   is an arbitrary value open to optimizations.  SMALL_STR_SIZE * 4 has to be
+   lower than __MAX_ALLOCA_CUTOFF.  Keep localedata/xfrm-test.c in sync.  */
+#define SMALL_STR_SIZE 4095
+
 #include "../locale/localeinfo.h"
+#include WEIGHT_H
 
+/* Group locale data for shorter parameter lists.  */
+typedef struct
+{
+  uint_fast32_t nrules;
+  unsigned char *rulesets;
+  USTRING_TYPE *weights;
+  int32_t *table;
+  USTRING_TYPE *extra;
+  int32_t *indirect;
+} locale_data_t;
 
 #ifndef WIDE_CHAR_VERSION
 
@@ -80,115 +96,325 @@ utf8_encode (char *buf, int val)
 }
 #endif
 
+/* Find next weight and rule index.  Inlined since called for every char.  */
+static __always_inline size_t
+find_idx (const USTRING_TYPE **us, int32_t *weight_idx,
+	  unsigned char *rule_idx, const locale_data_t *l_data, const int pass)
+{
+  int32_t tmp = findidx (l_data->table, l_data->indirect, l_data->extra, us,
+			 -1);
+  *rule_idx = tmp >> 24;
+  int32_t idx = tmp & 0xffffff;
+  size_t len = l_data->weights[idx++];
+
+  /* Skip over indices of previous levels.  */
+  for (int i = 0; i < pass; i++)
+    {
+      idx += len;
+      len = l_data->weights[idx++];
+    }
 
-size_t
-STRXFRM (STRING_TYPE *dest, const STRING_TYPE *src, size_t n, __locale_t l)
+  *weight_idx = idx;
+  return len;
+}
+
+static int
+find_position (const USTRING_TYPE *us, const locale_data_t *l_data,
+	       const int pass)
 {
-  struct __locale_data *current = l->__locales[LC_COLLATE];
-  uint_fast32_t nrules = current->values[_NL_ITEM_INDEX (_NL_COLLATE_NRULES)].word;
-  /* We don't assign the following values right away since it might be
-     unnecessary in case there are no rules.  */
-  const unsigned char *rulesets;
-  const int32_t *table;
-  const USTRING_TYPE *weights;
-  const USTRING_TYPE *extra;
-  const int32_t *indirect;
+  int32_t weight_idx;
+  unsigned char rule_idx;
+  const USTRING_TYPE *usrc = us;
+
+  find_idx (&usrc, &weight_idx, &rule_idx, l_data, pass);
+  return l_data->rulesets[rule_idx * l_data->nrules + pass] & sort_position;
+}
+
+/* Do the transformation.  */
+static size_t
+do_xfrm (const USTRING_TYPE *usrc, STRING_TYPE *dest, size_t n,
+	 const locale_data_t *l_data)
+{
+  int32_t weight_idx;
+  unsigned char rule_idx;
   uint_fast32_t pass;
-  size_t needed;
+  size_t needed = 0;
   size_t last_needed;
-  const USTRING_TYPE *usrc;
-  size_t srclen = STRLEN (src);
-  int32_t *idxarr;
-  unsigned char *rulearr;
-  size_t idxmax;
-  size_t idxcnt;
-  int use_malloc;
-
-#include WEIGHT_H
 
-  if (nrules == 0)
+  /* Now the passes over the weights.  */
+  for (pass = 0; pass < l_data->nrules; ++pass)
     {
-      if (n != 0)
-	STPNCPY (dest, src, MIN (srclen + 1, n));
+      size_t backw_len = 0;
+      last_needed = needed;
+      const USTRING_TYPE *cur = usrc;
+      const USTRING_TYPE *backw_start = NULL;
 
-      return srclen;
-    }
+       /* We assume that if a rule has defined `position' in one section
+         this is true for all of them.  */
+      int position = find_position (cur, l_data, pass);
 
-  rulesets = (const unsigned char *)
-    current->values[_NL_ITEM_INDEX (_NL_COLLATE_RULESETS)].string;
-  table = (const int32_t *)
-    current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_TABLE,SUFFIX))].string;
-  weights = (const USTRING_TYPE *)
-    current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_WEIGHT,SUFFIX))].string;
-  extra = (const USTRING_TYPE *)
-    current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_EXTRA,SUFFIX))].string;
-  indirect = (const int32_t *)
-    current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_INDIRECT,SUFFIX))].string;
-  use_malloc = 0;
+      if (position == 0)
+	{
+	  while (*cur != L('\0'))
+	    {
+	      const USTRING_TYPE *pos = cur;
+	      size_t len = find_idx (&cur, &weight_idx, &rule_idx, l_data,
+				     pass);
+	      int rule = l_data->rulesets[rule_idx * l_data->nrules + pass];
 
-  assert (((uintptr_t) table) % __alignof__ (table[0]) == 0);
-  assert (((uintptr_t) weights) % __alignof__ (weights[0]) == 0);
-  assert (((uintptr_t) extra) % __alignof__ (extra[0]) == 0);
-  assert (((uintptr_t) indirect) % __alignof__ (indirect[0]) == 0);
+	      if ((rule & sort_forward) != 0)
+		{
+		  /* Handle the pushed backward sequence.  */
+		  if (backw_start != NULL)
+		    {
+		      for (size_t i = backw_len; i > 0; )
+			{
+			  int32_t weight_idx;
+			  unsigned char rule_idx;
+			  size_t len = find_idx (&backw_start, &weight_idx,
+						 &rule_idx, l_data, pass);
+			  if (needed + i < n)
+			    for (size_t j = len; j > 0; j--)
+			      dest[needed + i - j] =
+				l_data->weights[weight_idx++];
+
+			  i -= len;
+			}
 
-  /* Handle an empty string as a special case.  */
-  if (srclen == 0)
-    {
-      if (n != 0)
-	*dest = L('\0');
-      return 0;
-    }
+		      needed += backw_len;
+		      backw_start = NULL;
+		      backw_len = 0;
+		    }
 
-  /* We need the elements of the string as unsigned values since they
-     are used as indeces.  */
-  usrc = (const USTRING_TYPE *) src;
-
-  /* Perform the first pass over the string and while doing this find
-     and store the weights for each character.  Since we want this to
-     be as fast as possible we are using `alloca' to store the temporary
-     values.  But since there is no limit on the length of the string
-     we have to use `malloc' if the string is too long.  We should be
-     very conservative here.  */
-  if (! __libc_use_alloca ((srclen + 1) * (sizeof (int32_t) + 1)))
-    {
-      idxarr = (int32_t *) malloc ((srclen + 1) * (sizeof (int32_t) + 1));
-      rulearr = (unsigned char *) &idxarr[srclen];
-
-      if (idxarr == NULL)
-	/* No memory.  Well, go with the stack then.
-
-	   XXX Once this implementation is stable we will handle this
-	   differently.  Instead of precomputing the indeces we will
-	   do this in time.  This means, though, that this happens for
-	   every pass again.  */
-	goto try_stack;
-      use_malloc = 1;
-    }
-  else
-    {
-    try_stack:
-      idxarr = (int32_t *) alloca (srclen * sizeof (int32_t));
-      rulearr = (unsigned char *) alloca (srclen + 1);
+		  /* Now handle the forward element.  */
+		  if (needed + len < n)
+		    while (len-- > 0)
+		      dest[needed++] = l_data->weights[weight_idx++];
+		  else
+		    /* No more characters fit into the buffer.  */
+		    needed += len;
+		}
+	      else
+		{
+		  /* Remember start of the backward sequence & track length.  */
+		  if (backw_start == NULL)
+		    backw_start = pos;
+		  backw_len += len;
+		}
+	    }
+
+
+	  /* Handle the pushed backward sequence.  */
+	  if (backw_start != NULL)
+	    {
+	      for (size_t i = backw_len; i > 0; )
+		{
+		  size_t len = find_idx (&backw_start, &weight_idx, &rule_idx,
+					 l_data, pass);
+		  if (needed + i < n)
+		    for (size_t j = len; j > 0; j--)
+		      dest[needed + i - j] =
+			l_data->weights[weight_idx++];
+
+		  i -= len;
+		}
+
+	      needed += backw_len;
+	    }
+	}
+      else
+	{
+	  int val = 1;
+#ifndef WIDE_CHAR_VERSION
+	  char buf[7];
+	  size_t buflen;
+#endif
+	  size_t i;
+
+	  while (*cur != L('\0'))
+	    {
+	      const USTRING_TYPE *pos = cur;
+	      size_t len = find_idx (&cur, &weight_idx, &rule_idx, l_data,
+				     pass);
+	      int rule = l_data->rulesets[rule_idx * l_data->nrules + pass];
+
+	      if ((rule & sort_forward) != 0)
+		{
+		  /* Handle the pushed backward sequence.  */
+		  if (backw_start != NULL)
+		    {
+		      for (size_t p = backw_len; p > 0; p--)
+			{
+			  size_t len;
+			  int32_t weight_idx;
+			  unsigned char rule_idx;
+			  const USTRING_TYPE *backw_cur = backw_start;
+
+			  /* To prevent a warning init the used vars.  */
+			  len = find_idx (&backw_cur, &weight_idx,
+					  &rule_idx, l_data, pass);
+
+			  for (i = 1; i < p; i++)
+			    len = find_idx (&backw_cur, &weight_idx,
+					    &rule_idx, l_data, pass);
+
+			  if (len != 0)
+			    {
+#ifdef WIDE_CHAR_VERSION
+			      if (needed + 1 + len < n)
+				{
+				  dest[needed] = val;
+				  for (i = 0; i < len; ++i)
+				    dest[needed + 1 + i] =
+				      l_data->weights[weight_idx + i];
+				}
+			      needed += 1 + len;
+#else
+			      buflen = utf8_encode (buf, val);
+			      if (needed + buflen + len < n)
+				{
+				  for (i = 0; i < buflen; ++i)
+				    dest[needed + i] = buf[i];
+				  for (i = 0; i < len; ++i)
+				    dest[needed + buflen + i] =
+				      l_data->weights[weight_idx + i];
+				}
+			      needed += buflen + len;
+#endif
+			      val = 1;
+			    }
+			  else
+			    ++val;
+			}
+
+		      backw_start = NULL;
+		      backw_len = 0;
+		    }
+
+		  /* Now handle the forward element.  */
+		  if (len != 0)
+		    {
+#ifdef WIDE_CHAR_VERSION
+		      if (needed + 1 + len < n)
+			{
+			  dest[needed] = val;
+			  for (i = 0; i < len; ++i)
+			    dest[needed + 1 + i] =
+			      l_data->weights[weight_idx + i];
+			}
+		      needed += 1 + len;
+#else
+		      buflen = utf8_encode (buf, val);
+		      if (needed + buflen + len < n)
+			{
+			  for (i = 0; i < buflen; ++i)
+			    dest[needed + i] = buf[i];
+			  for (i = 0; i < len; ++i)
+			    dest[needed + buflen + i] =
+			      l_data->weights[weight_idx + i];
+			}
+		      needed += buflen + len;
+#endif
+		      val = 1;
+		    }
+		  else
+		    ++val;
+		}
+	      else
+		{
+		  /* Remember start of the backward sequence & track length.  */
+		  if (backw_start == NULL)
+		    backw_start = pos;
+		  backw_len++;
+		}
+	    }
+
+	  /* Handle the pushed backward sequence.  */
+	  if (backw_start != NULL)
+	    {
+	      for (size_t p = backw_len; p > 0; p--)
+		{
+		  size_t len;
+		  int32_t weight_idx;
+		  unsigned char rule_idx;
+		  const USTRING_TYPE *backw_cur = backw_start;
+
+		  /* To prevent a warning init the used vars.  */
+		  len = find_idx (&backw_cur, &weight_idx,
+				  &rule_idx, l_data, pass);
+
+		  for (i = 1; i < p; i++)
+		    len = find_idx (&backw_cur, &weight_idx,
+				    &rule_idx, l_data, pass);
+
+		  if (len != 0)
+		    {
+#ifdef WIDE_CHAR_VERSION
+		      if (needed + 1 + len < n)
+			{
+			  dest[needed] = val;
+			  for (i = 0; i < len; ++i)
+			    dest[needed + 1 + i] =
+			      l_data->weights[weight_idx + i];
+			}
+		      needed += 1 + len;
+#else
+		      buflen = utf8_encode (buf, val);
+		      if (needed + buflen + len < n)
+			{
+			  for (i = 0; i < buflen; ++i)
+			    dest[needed + i] = buf[i];
+			  for (i = 0; i < len; ++i)
+			    dest[needed + buflen + i] =
+			      l_data->weights[weight_idx + i];
+			}
+		      needed += buflen + len;
+#endif
+		      val = 1;
+		    }
+		  else
+		    ++val;
+		}
+	    }
+	}
+
+      /* Finally store the byte to separate the passes or terminate
+	 the string.  */
+      if (needed < n)
+	dest[needed] = pass + 1 < l_data->nrules ? L('\1') : L('\0');
+      ++needed;
     }
 
-  idxmax = 0;
-  do
+  /* This is a little optimization: many collation specifications have
+     a `position' rule at the end and if no non-ignored character
+     is found the last \1 byte is immediately followed by a \0 byte
+     signalling this.  We can avoid the \1 byte(s).  */
+  if (needed > 2 && needed == last_needed + 1)
     {
-      int32_t tmp = findidx (&usrc, -1);
-      rulearr[idxmax] = tmp >> 24;
-      idxarr[idxmax] = tmp & 0xffffff;
-
-      ++idxmax;
+      /* Remove the \1 byte.  */
+      if (--needed <= n)
+	dest[needed - 1] = L('\0');
     }
-  while (*usrc != L('\0'));
 
-  /* This element is only read, the value never used but to determine
-     another value which then is ignored.  */
-  rulearr[idxmax] = '\0';
+  /* Return the number of bytes/words we need, but don't count the NUL
+     byte/word at the end.  */
+  return needed - 1;
+}
+
+/* Do the transformation using weight-index and rule cache.  */
+static size_t
+do_xfrm_cached (STRING_TYPE *dest, size_t n, const locale_data_t *l_data,
+		size_t idxmax, int32_t *idxarr, const unsigned char *rulearr)
+{
+  uint_fast32_t nrules = l_data->nrules;
+  unsigned char *rulesets = l_data->rulesets;
+  USTRING_TYPE *weights = l_data->weights;
+  uint_fast32_t pass;
+  size_t needed = 0;
+  size_t last_needed;
+  size_t idxcnt;
 
-  /* Now the passes over the weights.  We now use the indeces we found
-     before.  */
-  needed = 0;
+  /* Now the passes over the weights.  */
   for (pass = 0; pass < nrules; ++pass)
     {
       size_t backw_stop = ~0ul;
@@ -434,14 +660,87 @@ STRXFRM (STRING_TYPE *dest, const STRING_TYPE *src, size_t n, __locale_t l)
 	dest[needed - 1] = L('\0');
     }
 
-  /* Free the memory if needed.  */
-  if (use_malloc)
-    free (idxarr);
-
   /* Return the number of bytes/words we need, but don't count the NUL
      byte/word at the end.  */
   return needed - 1;
 }
+
+size_t
+STRXFRM (STRING_TYPE *dest, const STRING_TYPE *src, size_t n, __locale_t l)
+{
+  locale_data_t l_data;
+  struct __locale_data *current = l->__locales[LC_COLLATE];
+  l_data.nrules = current->values[_NL_ITEM_INDEX (_NL_COLLATE_NRULES)].word;
+
+  /* Handle byte comparison case.  */
+  if (l_data.nrules == 0)
+    {
+      size_t srclen = STRLEN (src);
+
+      if (n != 0)
+	STPNCPY (dest, src, MIN (srclen + 1, n));
+
+      return srclen;
+    }
+
+  /* Handle an empty string, code hereafter relies on strlen (src) > 0.  */
+  if (*src == L('\0'))
+    {
+      if (n != 0)
+	*dest = L('\0');
+      return 0;
+    }
+
+  /* Get the locale data.  */
+  l_data.rulesets = (unsigned char *)
+    current->values[_NL_ITEM_INDEX (_NL_COLLATE_RULESETS)].string;
+  l_data.table = (int32_t *)
+    current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_TABLE,SUFFIX))].string;
+  l_data.weights = (USTRING_TYPE *)
+    current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_WEIGHT,SUFFIX))].string;
+  l_data.extra = (USTRING_TYPE *)
+    current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_EXTRA,SUFFIX))].string;
+  l_data.indirect = (int32_t *)
+    current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_INDIRECT,SUFFIX))].string;
+
+  assert (((uintptr_t) l_data.table) % __alignof__ (l_data.table[0]) == 0);
+  assert (((uintptr_t) l_data.weights) % __alignof__ (l_data.weights[0]) == 0);
+  assert (((uintptr_t) l_data.extra) % __alignof__ (l_data.extra[0]) == 0);
+  assert (((uintptr_t) l_data.indirect) % __alignof__ (l_data.indirect[0]) == 0);
+
+  /* We need the elements of the string as unsigned values since they
+     are used as indeces.  */
+  const USTRING_TYPE *usrc = (const USTRING_TYPE *) src;
+
+  /* Allocate cache for small strings on the stack and fill it with weight and
+     rule indices.  If the cache size is not sufficient, continue with the
+     uncached xfrm version.  */
+  size_t idxmax = 0;
+  const USTRING_TYPE *cur = usrc;
+  int32_t *idxarr = alloca (SMALL_STR_SIZE * sizeof (int32_t));
+  unsigned char *rulearr = alloca (SMALL_STR_SIZE + 1);
+
+  do
+    {
+      int32_t tmp = findidx (l_data.table, l_data.indirect, l_data.extra, &cur,
+			     -1);
+      rulearr[idxmax] = tmp >> 24;
+      idxarr[idxmax] = tmp & 0xffffff;
+
+      ++idxmax;
+    }
+  while (*cur != L('\0') && idxmax < SMALL_STR_SIZE);
+
+  /* This element is only read, the value never used but to determine
+     another value which then is ignored.  */
+  rulearr[idxmax] = '\0';
+
+  /* Do the transformation.  */
+  if (*cur == L('\0'))
+    return do_xfrm_cached (dest, n, &l_data, idxmax, idxarr, rulearr);
+  else
+    return do_xfrm (usrc, dest, n, &l_data);
+}
 libc_hidden_def (STRXFRM)
 
 #ifndef WIDE_CHAR_VERSION
diff --git a/sysdeps/aarch64/start.S b/sysdeps/aarch64/start.S
index 35d603ae9e..69b45eac44 100644
--- a/sysdeps/aarch64/start.S
+++ b/sysdeps/aarch64/start.S
@@ -47,7 +47,6 @@ _start:
 	/* Create an initial frame with 0 LR and FP */
 	mov	x29, #0
 	mov	x30, #0
-	mov	x29, sp
 
 	/* Setup rtld_fini in argument register */
 	mov	x5, x0
diff --git a/sysdeps/unix/sysv/linux/mips/vfork.S b/sysdeps/unix/sysv/linux/mips/vfork.S
index 80c362d6eb..2c1a747db3 100644
--- a/sysdeps/unix/sysv/linux/mips/vfork.S
+++ b/sysdeps/unix/sysv/linux/mips/vfork.S
@@ -108,3 +108,4 @@ L(error):
 
 libc_hidden_def(__vfork)
 weak_alias (__vfork, vfork)
+strong_alias (__vfork, __libc_vfork)
diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c
index 2a6dcb78d8..f7c1bbe0db 100644
--- a/sysdeps/x86_64/multiarch/init-arch.c
+++ b/sysdeps/x86_64/multiarch/init-arch.c
@@ -167,9 +167,14 @@ __init_cpu_features (void)
 	  /* Determine if AVX is usable.  */
 	  if (CPUID_AVX)
 	    __cpu_features.feature[index_AVX_Usable] |= bit_AVX_Usable;
-	  /* Determine if AVX2 is usable.  */
+#if index_AVX2_Usable != index_AVX_Fast_Unaligned_Load
+# error index_AVX2_Usable != index_AVX_Fast_Unaligned_Load
+#endif
+	  /* Determine if AVX2 is usable.  Unaligned load with 256-bit
+	     AVX registers are faster on processors with AVX2.  */
 	  if (CPUID_AVX2)
-	    __cpu_features.feature[index_AVX2_Usable] |= bit_AVX2_Usable;
+	    __cpu_features.feature[index_AVX2_Usable]
+	      |= bit_AVX2_Usable | bit_AVX_Fast_Unaligned_Load;
 	  /* Determine if FMA is usable.  */
 	  if (CPUID_FMA)
 	    __cpu_features.feature[index_FMA_Usable] |= bit_FMA_Usable;
diff --git a/sysdeps/x86_64/multiarch/init-arch.h b/sysdeps/x86_64/multiarch/init-arch.h
index ef0abbd226..2fc7c7ceec 100644
--- a/sysdeps/x86_64/multiarch/init-arch.h
+++ b/sysdeps/x86_64/multiarch/init-arch.h
@@ -25,6 +25,7 @@
 #define bit_FMA4_Usable			(1 << 8)
 #define bit_Slow_SSE4_2			(1 << 9)
 #define bit_AVX2_Usable			(1 << 10)
+#define bit_AVX_Fast_Unaligned_Load	(1 << 11)
 
 /* CPUID Feature flags.  */
 
@@ -74,6 +75,7 @@
 # define index_FMA4_Usable		FEATURE_INDEX_1*FEATURE_SIZE
 # define index_Slow_SSE4_2		FEATURE_INDEX_1*FEATURE_SIZE
 # define index_AVX2_Usable		FEATURE_INDEX_1*FEATURE_SIZE
+# define index_AVX_Fast_Unaligned_Load	FEATURE_INDEX_1*FEATURE_SIZE
 
 #else	/* __ASSEMBLER__ */
 
@@ -169,6 +171,7 @@ extern const struct cpu_features *__get_cpu_features (void)
 # define index_FMA4_Usable		FEATURE_INDEX_1
 # define index_Slow_SSE4_2		FEATURE_INDEX_1
 # define index_AVX2_Usable		FEATURE_INDEX_1
+# define index_AVX_Fast_Unaligned_Load	FEATURE_INDEX_1
 
 # define HAS_ARCH_FEATURE(name) \
   ((__get_cpu_features ()->feature[index_##name] & (bit_##name)) != 0)
@@ -181,5 +184,6 @@ extern const struct cpu_features *__get_cpu_features (void)
 # define HAS_AVX2			HAS_ARCH_FEATURE (AVX2_Usable)
 # define HAS_FMA			HAS_ARCH_FEATURE (FMA_Usable)
 # define HAS_FMA4			HAS_ARCH_FEATURE (FMA4_Usable)
+# define HAS_AVX_FAST_UNALIGNED_LOAD	HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load)
 
 #endif	/* __ASSEMBLER__ */
diff --git a/sysdeps/x86_64/multiarch/memcpy.S b/sysdeps/x86_64/multiarch/memcpy.S
index e666695407..10bbd39631 100644
--- a/sysdeps/x86_64/multiarch/memcpy.S
+++ b/sysdeps/x86_64/multiarch/memcpy.S
@@ -33,7 +33,7 @@ ENTRY(__new_memcpy)
 	jne	1f
 	call	__init_cpu_features
 1:	leaq	__memcpy_avx_unaligned(%rip), %rax
-	testl	$bit_AVX_Usable, __cpu_features+FEATURE_OFFSET+index_AVX_Usable(%rip)
+	testl	$bit_AVX_Fast_Unaligned_Load, __cpu_features+FEATURE_OFFSET+index_AVX_Fast_Unaligned_Load(%rip)
 	jz 1f
 	ret
 1:	leaq	__memcpy_sse2(%rip), %rax
diff --git a/sysdeps/x86_64/multiarch/memcpy_chk.S b/sysdeps/x86_64/multiarch/memcpy_chk.S
index 076b19a9ea..30cca20330 100644
--- a/sysdeps/x86_64/multiarch/memcpy_chk.S
+++ b/sysdeps/x86_64/multiarch/memcpy_chk.S
@@ -39,7 +39,7 @@ ENTRY(__memcpy_chk)
 	testl	$bit_Fast_Copy_Backward, __cpu_features+FEATURE_OFFSET+index_Fast_Copy_Backward(%rip)
 	jz	2f
 	leaq	__memcpy_chk_ssse3_back(%rip), %rax
-	testl   $bit_AVX_Usable, __cpu_features+FEATURE_OFFSET+index_AVX_Usable(%rip)
+	testl   $bit_AVX_Fast_Unaligned_Load, __cpu_features+FEATURE_OFFSET+index_AVX_Fast_Unaligned_Load(%rip)
 	jz  2f
 	leaq    __memcpy_chk_avx_unaligned(%rip), %rax
 2:	ret
diff --git a/sysdeps/x86_64/multiarch/memmove.c b/sysdeps/x86_64/multiarch/memmove.c
index 0c9af7e4df..2c86a4a476 100644
--- a/sysdeps/x86_64/multiarch/memmove.c
+++ b/sysdeps/x86_64/multiarch/memmove.c
@@ -49,7 +49,7 @@ extern __typeof (__redirect_memmove) __memmove_avx_unaligned attribute_hidden;
    ifunc symbol properly.  */
 extern __typeof (__redirect_memmove) __libc_memmove;
 libc_ifunc (__libc_memmove,
-	    HAS_AVX
+	    HAS_AVX_FAST_UNALIGNED_LOAD
 	    ? __memmove_avx_unaligned
 	    : (HAS_SSSE3
 	       ? (HAS_FAST_COPY_BACKWARD
diff --git a/sysdeps/x86_64/multiarch/memmove_chk.c b/sysdeps/x86_64/multiarch/memmove_chk.c
index 44344f2820..5ffcaecce4 100644
--- a/sysdeps/x86_64/multiarch/memmove_chk.c
+++ b/sysdeps/x86_64/multiarch/memmove_chk.c
@@ -30,7 +30,7 @@ extern __typeof (__memmove_chk) __memmove_chk_avx_unaligned attribute_hidden;
 #include "debug/memmove_chk.c"
 
 libc_ifunc (__memmove_chk,
-	    HAS_AVX ? __memmove_chk_avx_unaligned :
+	    HAS_AVX_FAST_UNALIGNED_LOAD ? __memmove_chk_avx_unaligned :
 	    (HAS_SSSE3
 	    ? (HAS_FAST_COPY_BACKWARD
 	       ? __memmove_chk_ssse3_back : __memmove_chk_ssse3)
diff --git a/sysdeps/x86_64/multiarch/mempcpy.S b/sysdeps/x86_64/multiarch/mempcpy.S
index 7589d8c1ec..e205ef5557 100644
--- a/sysdeps/x86_64/multiarch/mempcpy.S
+++ b/sysdeps/x86_64/multiarch/mempcpy.S
@@ -37,7 +37,7 @@ ENTRY(__mempcpy)
 	testl	$bit_Fast_Copy_Backward, __cpu_features+FEATURE_OFFSET+index_Fast_Copy_Backward(%rip)
 	jz	2f
 	leaq	__mempcpy_ssse3_back(%rip), %rax
-	testl	$bit_AVX_Usable, __cpu_features+FEATURE_OFFSET+index_AVX_Usable(%rip)
+	testl	$bit_AVX_Fast_Unaligned_Load, __cpu_features+FEATURE_OFFSET+index_AVX_Fast_Unaligned_Load(%rip)
 	jz	2f
 	leaq	__mempcpy_avx_unaligned(%rip), %rax
 2:	ret
diff --git a/sysdeps/x86_64/multiarch/mempcpy_chk.S b/sysdeps/x86_64/multiarch/mempcpy_chk.S
index 88e0b74e83..dd777dfa48 100644
--- a/sysdeps/x86_64/multiarch/mempcpy_chk.S
+++ b/sysdeps/x86_64/multiarch/mempcpy_chk.S
@@ -39,7 +39,7 @@ ENTRY(__mempcpy_chk)
 	testl	$bit_Fast_Copy_Backward, __cpu_features+FEATURE_OFFSET+index_Fast_Copy_Backward(%rip)
 	jz	2f
 	leaq	__mempcpy_chk_ssse3_back(%rip), %rax
-	testl	$bit_AVX_Usable, __cpu_features+FEATURE_OFFSET+index_AVX_Usable(%rip)
+	testl	$bit_AVX_Fast_Unaligned_Load, __cpu_features+FEATURE_OFFSET+index_AVX_Fast_Unaligned_Load(%rip)
 	jz	2f
 	leaq	__mempcpy_chk_avx_unaligned(%rip), %rax
 2:	ret