diff options
author | Tulio Magno Quites Machado Filho <tuliom@linux.vnet.ibm.com> | 2015-04-16 11:02:14 -0300 |
---|---|---|
committer | Tulio Magno Quites Machado Filho <tuliom@linux.vnet.ibm.com> | 2015-04-16 11:02:14 -0300 |
commit | 072e4453edcad72cf72cdb46b07d98e088c60c55 (patch) | |
tree | da163ad435fa1b38680b4868633fee9efd01ffad | |
parent | ae75fdc50acbdcf5bfda6c3151c537f2ab8ce637 (diff) | |
parent | 4d54424420c6300efbf57a7b9aa8635a8b8c1942 (diff) | |
download | glibc-072e4453edcad72cf72cdb46b07d98e088c60c55.tar.gz glibc-072e4453edcad72cf72cdb46b07d98e088c60c55.tar.xz glibc-072e4453edcad72cf72cdb46b07d98e088c60c55.zip |
Merge release/2.20/master into ibm/2.20/master
Conflicts: NEWS stdio-common/Makefile
-rw-r--r-- | ChangeLog | 142 | ||||
-rw-r--r-- | NEWS | 8 | ||||
-rw-r--r-- | locale/weight.h | 13 | ||||
-rw-r--r-- | locale/weightwc.h | 13 | ||||
-rw-r--r-- | localedata/sort-test.sh | 7 | ||||
-rw-r--r-- | localedata/xfrm-test.c | 52 | ||||
-rw-r--r-- | misc/sys/cdefs.h | 10 | ||||
-rw-r--r-- | nscd/nscd.c | 45 | ||||
-rw-r--r-- | nscd/nscd_conf.c | 3 | ||||
-rw-r--r-- | po/fr.po | 4 | ||||
-rw-r--r-- | posix/fnmatch.c | 8 | ||||
-rw-r--r-- | posix/fnmatch_loop.c | 17 | ||||
-rw-r--r-- | posix/regcomp.c | 10 | ||||
-rw-r--r-- | posix/regex_internal.h | 7 | ||||
-rw-r--r-- | posix/regexec.c | 8 | ||||
-rw-r--r-- | stdio-common/Makefile | 2 | ||||
-rw-r--r-- | string/strcoll_l.c | 9 | ||||
-rw-r--r-- | string/strxfrm_l.c | 491 | ||||
-rw-r--r-- | sysdeps/aarch64/start.S | 1 | ||||
-rw-r--r-- | sysdeps/unix/sysv/linux/mips/vfork.S | 1 | ||||
-rw-r--r-- | sysdeps/x86_64/multiarch/init-arch.c | 9 | ||||
-rw-r--r-- | sysdeps/x86_64/multiarch/init-arch.h | 4 | ||||
-rw-r--r-- | sysdeps/x86_64/multiarch/memcpy.S | 2 | ||||
-rw-r--r-- | sysdeps/x86_64/multiarch/memcpy_chk.S | 2 | ||||
-rw-r--r-- | sysdeps/x86_64/multiarch/memmove.c | 2 | ||||
-rw-r--r-- | sysdeps/x86_64/multiarch/memmove_chk.c | 2 | ||||
-rw-r--r-- | sysdeps/x86_64/multiarch/mempcpy.S | 2 | ||||
-rw-r--r-- | sysdeps/x86_64/multiarch/mempcpy_chk.S | 2 |
28 files changed, 704 insertions, 172 deletions
diff --git a/ChangeLog b/ChangeLog index f856a8aca2..b19db3c9db 100644 --- a/ChangeLog +++ b/ChangeLog @@ -310,6 +310,148 @@ * sysdeps/powerpc/powerpc64/power6/memset.S: Likewise. * sysdeps/powerpc/powerpc64/power7/memset.S: Likewise. +2015-02-16 Paul Pluzhnikov <ppluzhnikov@google.com> + + [BZ #16618] + * stdio-common/tst-sscanf.c (main): Test for buffer overflow. + * stdio-common/vfscanf.c (_IO_vfscanf_internal): Compute needed + size in bytes. Store needed elements in wpmax. Use needed size + in bytes for extend_alloca. + +2015-02-16 H.J. Lu <hongjiu.lu@intel.com> + + [BZ #17801] + * sysdeps/x86_64/multiarch/init-arch.c (__init_cpu_features): + Set the bit_AVX_Fast_Unaligned_Load bit for AVX2. + * sysdeps/x86_64/multiarch/init-arch.h (bit_AVX_Fast_Unaligned_Load): + New. + (index_AVX_Fast_Unaligned_Load): Likewise. + (HAS_AVX_FAST_UNALIGNED_LOAD): Likewise. + * sysdeps/x86_64/multiarch/memcpy.S (__new_memcpy): Check the + bit_AVX_Fast_Unaligned_Load bit instead of the bit_AVX_Usable bit. + * sysdeps/x86_64/multiarch/memcpy_chk.S (__memcpy_chk): Likewise. + * sysdeps/x86_64/multiarch/mempcpy.S (__mempcpy): Likewise. + * sysdeps/x86_64/multiarch/mempcpy_chk.S (__mempcpy_chk): Likewise. + * sysdeps/x86_64/multiarch/memmove.c (__libc_memmove): Replace + HAS_AVX with HAS_AVX_FAST_UNALIGNED_LOAD. + * sysdeps/x86_64/multiarch/memmove_chk.c (__memmove_chk): Likewise. + +2015-02-16 Leonhard Holz <leonhard.holz@web.de> + + [BZ #16009] + * string/strxfrm_l.c (STRXFRM): Allocate fixed size cache for + weights and rules. Use do_xfrm_cached if data fits in cache, + do_xfrm otherwise. Moved former main loop to... + * (do_xfrm_cached): New function. + * (do_xfrm): Non-caching version of do_xfrm_cached. Uses + find_idx, find_position and stack_push. + * (find_idx): New function. + * (find_position): Likewise. + * localedata/sort-test.sh: Added test run for do_xfrm. + * localedata/xfrm-test.c (main): Added command line option + -nocache to run the test with strings that are too large for + the STRXFRM cache. + +2015-02-16 Kostya Serebryany <konstantin.s.serebryany@gmail.com> + Roland McGrath <roland@hack.frob.com> + + * locale/weight.h: Add include guard. + (findidx): Make static rather than auto; take new parameters + TABLE, INDIRECT, and EXTRA instead of getting them as outer locals. + * locale/weightwc.h: Likewise. + * posix/fnmatch_loop.c + (FCT): Change type of EXTRA from int32_t to wint_t. + Don't include either header inside the function. + Call FINDIDX rather than findidx, and pass new arguments. + #undef FINDIDX at the end of the file. + * posix/fnmatch.c [_LIBC]: #include <locale/weight.h> and define + FINDIDX before including fnmatch_loop.c for the non-wide version. + [_LIBC] [HANDLE_MULTIBYTE]: #define findidx to findidxwc around + #include <locale/weightwc.h>, and define FINDIDX to findidxwc + for the wide version. + * posix/regcomp.c [_LIBC]: #include <locale/weight.h>. + (build_equiv_class) [_LIBC]: Don't #include it inside the function. + Pass new arguments to findidx. + * posix/regexec.c [RE_ENABLE_I18N] [_LIBC]: #include <locale/weight.h>. + [RE_ENABLE_I18N] (check_node_accept_bytes) [_LIBC]: + Don't #include it inside the function. Pass new arguments to findidx. + * posix/regex_internal.h + [!NOT_IN_libc] [_LIBC]: #include <locale/weight.h>. + (re_string_elem_size_at): Don't #include it inside the function. + Pass new arguments to findidx. + * string/strcoll_l.c: #include WEIGHT_H at top level. + (get_next_seq): Don't #include it inside the function. + Pass new arguments to findidx. + (get_next_seq_nocache): Likewise. + * string/strxfrm_l.c: #include WEIGHT_H at top level. + (STRXFRM): Don't #include it inside the function. + Pass new arguments to findidx. + +2014-12-16 Florian Weimer <fweimer@redhat.com> + + [BZ #17630] + * resolv/nss_dns/dns-network.c (getanswer_r): Iterate over alias + names. + +2014-12-15 Jeff Law <law@redhat.com> + + [BZ #16617] + * stdio-common/vfprintf.c (vfprintf): Allocate large specs array + on the heap. (CVE-2012-3406) + * stdio-common/bug23-2.c, stdio-common/bug23-3.c: New file. + * stdio-common/bug23-4.c: New file. Test case by Joseph Myers. + * stdio-common/Makefile (tests): Add bug23-2, bug23-3, bug23-4. + +2014-11-24 Siddhesh Poyarekar <siddhesh@redhat.com> + + [BZ #17266] + * misc/sys/cdefs.h: Define __extern_always_inline for clang + 4.2 and newer. + +2014-11-19 Carlos O'Donell <carlos@redhat.com> + Florian Weimer <fweimer@redhat.com> + Joseph Myers <joseph@codesourcery.com> + Adam Conrad <adconrad@0c3.net> + Andreas Schwab <schwab@suse.de> + Brooks <bmoses@google.com> + + [BZ #17625] + * wordexp-test.c (__dso_handle): Add prototype. + (__register_atfork): Likewise. + (__app_register_atfork): New function. + (registered_forks): New global. + (register_fork): New function. + (test_case): Add 3 new tests for WRDE_CMDSUB. + (main): Call __app_register_atfork. + (testit): If WRDE_NOCMD set registered_forks to zero, run test, and if + fork count is non-zero fail the test. + * posix/wordexp.c (exec_comm): Return WRDE_CMDSUB if WRDE_NOCMD flag + is set. + (parse_dollars): Remove check for WRDE_NOCMD. + (parse_dquote): Likewise. + +2014-11-10 Renlin Li <Renlin.Li@arm.com> + + [BZ #17555] + * sysdeps/aarch64/start.S (_start): Delete x29 overwritten assignment. + +2014-10-22 Maciej W. Rozycki <macro@codesourcery.com> + + [BZ #17485] + * sysdeps/unix/sysv/linux/mips/vfork.S (__libc_vfork): Define. + +2014-10-08 Roland McGrath <roland@hack.frob.com> + + [BZ #17460] + * nscd/nscd.c (more_help): Rewrite list of tables collection + using xstrdup and asprintf. + + * nscd/nscd_conf.c: Remove local xstrdup declaration. + +2014-10-09 Allan McRae <allan@archlinux.org> + + * po/fr.po: Update French translation from translation project. + 2014-09-16 Siddhesh Poyarekar <siddhesh@redhat.com> [BZ #17370] diff --git a/NEWS b/NEWS index 60ece7dab6..4d94c08377 100644 --- a/NEWS +++ b/NEWS @@ -9,7 +9,8 @@ Version 2.20.1 * The following bugs are resolved with this release: - 16617, 16618, 17266, 17370, 17371, 17625, 17630, 18104. + 16009, 16617, 16618, 17266, 17370, 17371, 17460, 17485, 17555, 17625, + 17630, 17801. * Added support for TSX lock elision of pthread mutexes on powerpc32, powerpc64 and powerpc64le. This may improve lock scaling of existing programs on @@ -22,6 +23,11 @@ Version 2.20.1 implementations for powerpc64/powerpc64le. Implemented by Adhemerval Zanella (IBM). +* CVE-2015-1472 Under certain conditions wscanf can allocate too little + memory for the to-be-scanned arguments and overflow the allocated + buffer. The implementation now correctly computes the required buffer + size when using malloc. + * CVE-2104-7817 The wordexp function could ignore the WRDE_NOCMD flag under certain input conditions resulting in the execution of a shell for command substitution when the applicaiton did not request it. The diff --git a/locale/weight.h b/locale/weight.h index 9eb8ac666a..9d93fdc5c4 100644 --- a/locale/weight.h +++ b/locale/weight.h @@ -16,10 +16,15 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ +#ifndef _WEIGHT_H_ +#define _WEIGHT_H_ 1 + /* Find index of weight. */ -auto inline int32_t -__attribute ((always_inline)) -findidx (const unsigned char **cpp, size_t len) +static inline int32_t __attribute__ ((always_inline)) +findidx (const int32_t *table, + const int32_t *indirect, + const unsigned char *extra, + const unsigned char **cpp, size_t len) { int_fast32_t i = table[*(*cpp)++]; const unsigned char *cp; @@ -130,3 +135,5 @@ findidx (const unsigned char **cpp, size_t len) /* NOTREACHED */ return 0x43219876; } + +#endif /* weight.h */ diff --git a/locale/weightwc.h b/locale/weightwc.h index 8f047e3ba7..0f70b00658 100644 --- a/locale/weightwc.h +++ b/locale/weightwc.h @@ -16,10 +16,15 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ +#ifndef _WEIGHTWC_H_ +#define _WEIGHTWC_H_ 1 + /* Find index of weight. */ -auto inline int32_t -__attribute ((always_inline)) -findidx (const wint_t **cpp, size_t len) +static inline int32_t __attribute__ ((always_inline)) +findidx (const int32_t *table, + const int32_t *indirect, + const wint_t *extra, + const wint_t **cpp, size_t len) { wint_t ch = *(*cpp)++; int32_t i = __collidx_table_lookup ((const char *) table, ch); @@ -109,3 +114,5 @@ findidx (const wint_t **cpp, size_t len) /* NOTREACHED */ return 0x43219876; } + +#endif /* weightwc.h */ diff --git a/localedata/sort-test.sh b/localedata/sort-test.sh index e37129a032..3cb57fb45c 100644 --- a/localedata/sort-test.sh +++ b/localedata/sort-test.sh @@ -53,11 +53,18 @@ for l in $lang; do ${common_objpfx}localedata/xfrm-test $id < $cns.in \ > ${common_objpfx}localedata/$cns.xout || here=1 cmp -s $cns.in ${common_objpfx}localedata/$cns.xout || here=1 + ${test_program_prefix_before_env} \ + ${run_program_env} \ + LC_ALL=$l ${test_program_prefix_after_env} \ + ${common_objpfx}localedata/xfrm-test $id -nocache < $cns.in \ + > ${common_objpfx}localedata/$cns.nocache.xout || here=1 + cmp -s $cns.in ${common_objpfx}localedata/$cns.nocache.xout || here=1 if test $here -eq 0; then echo "$l xfrm-test OK" else echo "$l xfrm-test FAIL" diff -u $cns.in ${common_objpfx}localedata/$cns.xout | sed 's/^/ /' + diff -u $cns.in ${common_objpfx}localedata/$cns.nocache.xout | sed 's/^/ /' status=1 fi done diff --git a/localedata/xfrm-test.c b/localedata/xfrm-test.c index d2aba7d26e..5cf29f60c9 100644 --- a/localedata/xfrm-test.c +++ b/localedata/xfrm-test.c @@ -23,7 +23,10 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <stdbool.h> +/* Keep in sync with string/strxfrm_l.c. */ +#define SMALL_STR_SIZE 4095 struct lines { @@ -37,6 +40,7 @@ int main (int argc, char *argv[]) { int result = 0; + bool nocache = false; size_t nstrings, nstrings_max; struct lines *strings; char *line = NULL; @@ -44,7 +48,18 @@ main (int argc, char *argv[]) size_t n; if (argc < 2) - error (1, 0, "usage: %s <random seed>", argv[0]); + error (1, 0, "usage: %s <random seed> [-nocache]", argv[0]); + + if (argc == 3) + { + if (strcmp (argv[2], "-nocache") == 0) + nocache = true; + else + { + printf ("Unknown option %s!\n", argv[2]); + exit (1); + } + } setlocale (LC_ALL, ""); @@ -59,9 +74,9 @@ main (int argc, char *argv[]) while (1) { - char saved, *newp; - int needed; - int l; + char saved, *word, *newp; + size_t l, line_len, needed; + if (getline (&line, &len, stdin) < 0) break; @@ -83,10 +98,35 @@ main (int argc, char *argv[]) saved = line[l]; line[l] = '\0'; - needed = strxfrm (NULL, line, 0); + + if (nocache) + { + line_len = strlen (line); + word = malloc (line_len + SMALL_STR_SIZE + 1); + if (word == NULL) + { + printf ("malloc failed: %m\n"); + exit (1); + } + memset (word, ' ', SMALL_STR_SIZE); + memcpy (word + SMALL_STR_SIZE, line, line_len); + word[line_len + SMALL_STR_SIZE] = '\0'; + } + else + word = line; + + needed = strxfrm (NULL, word, 0); newp = malloc (needed + 1); - strxfrm (newp, line, needed + 1); + if (newp == NULL) + { + printf ("malloc failed: %m\n"); + exit (1); + } + strxfrm (newp, word, needed + 1); strings[nstrings].xfrm = newp; + + if (nocache) + free (word); line[l] = saved; ++nstrings; } diff --git a/misc/sys/cdefs.h b/misc/sys/cdefs.h index 01e81ba9f1..711ac1d912 100644 --- a/misc/sys/cdefs.h +++ b/misc/sys/cdefs.h @@ -321,8 +321,14 @@ inline semantics, unless -fgnu89-inline is used. Using __GNUC_STDC_INLINE__ or __GNUC_GNU_INLINE is not a good enough check for gcc because gcc versions older than 4.3 may define these macros and still not guarantee GNU inlining - semantics. */ -#if !defined __cplusplus || __GNUC_PREREQ (4,3) + semantics. + + clang++ identifies itself as gcc-4.2, but has support for GNU inlining + semantics, that can be checked fot by using the __GNUC_STDC_INLINE_ and + __GNUC_GNU_INLINE__ macro definitions. */ +#if (!defined __cplusplus || __GNUC_PREREQ (4,3) \ + || (defined __clang__ && (defined __GNUC_STDC_INLINE__ \ + || defined __GNUC_GNU_INLINE__))) # if defined __GNUC_STDC_INLINE__ || defined __cplusplus # define __extern_inline extern __inline __attribute__ ((__gnu_inline__)) # define __extern_always_inline \ diff --git a/nscd/nscd.c b/nscd/nscd.c index 7131ead8cb..b7704b37f8 100644 --- a/nscd/nscd.c +++ b/nscd/nscd.c @@ -451,33 +451,36 @@ parse_opt (int key, char *arg, struct argp_state *state) static char * more_help (int key, const char *text, void *input) { - char *tables, *tp = NULL; - switch (key) { case ARGP_KEY_HELP_EXTRA: { - dbtype cnt; + /* We print some extra information. */ - tables = xmalloc (sizeof (dbnames) + 1); - for (cnt = 0; cnt < lastdb; cnt++) + char *tables = xstrdup (dbnames[0]); + for (dbtype i = 1; i < lastdb; ++i) { - strcat (tables, dbnames[cnt]); - strcat (tables, " "); + char *more_tables; + if (asprintf (&more_tables, "%s %s", tables, dbnames[i]) < 0) + more_tables = NULL; + free (tables); + if (more_tables == NULL) + return NULL; + tables = more_tables; } - } - /* We print some extra information. */ - if (asprintf (&tp, gettext ("\ + char *tp; + if (asprintf (&tp, gettext ("\ Supported tables:\n\ %s\n\ \n\ For bug reporting instructions, please see:\n\ %s.\n\ "), tables, REPORT_BUGS_TO) < 0) - tp = NULL; - free (tables); - return tp; + tp = NULL; + free (tables); + return tp; + } default: break; @@ -622,15 +625,15 @@ monitor_child (int fd) } if (WIFEXITED (status)) - { - child_ret = WEXITSTATUS (status); - fprintf (stderr, _("child exited with status %d\n"), child_ret); - } + { + child_ret = WEXITSTATUS (status); + fprintf (stderr, _("child exited with status %d\n"), child_ret); + } if (WIFSIGNALED (status)) - { - child_ret = WTERMSIG (status); - fprintf (stderr, _("child terminated by signal %d\n"), child_ret); - } + { + child_ret = WTERMSIG (status); + fprintf (stderr, _("child terminated by signal %d\n"), child_ret); + } } /* We have the child status, so exit with that code. */ diff --git a/nscd/nscd_conf.c b/nscd/nscd_conf.c index 7856ed9b5a..c8e194d3e2 100644 --- a/nscd/nscd_conf.c +++ b/nscd/nscd_conf.c @@ -32,9 +32,6 @@ #include "dbg_log.h" #include "nscd.h" -/* Wrapper functions with error checking for standard functions. */ -extern char *xstrdup (const char *s); - /* Names of the databases. */ const char *const dbnames[lastdb] = diff --git a/po/fr.po b/po/fr.po index fbf564a69d..bb7a9240c9 100644 --- a/po/fr.po +++ b/po/fr.po @@ -6673,11 +6673,11 @@ msgstr "Erreur d'entrée/sortie sur l'hôte cible" #: sysdeps/gnu/errlist.c:1399 msgid "No medium found" -msgstr "Aucun medium trouvé" +msgstr "Aucun médium trouvé" #: sysdeps/gnu/errlist.c:1407 msgid "Wrong medium type" -msgstr "Mauvais type de medium" +msgstr "Mauvais type de médium" #: sysdeps/gnu/errlist.c:1415 msgid "Required key not available" diff --git a/posix/fnmatch.c b/posix/fnmatch.c index c330a122ab..85a6ec2263 100644 --- a/posix/fnmatch.c +++ b/posix/fnmatch.c @@ -221,6 +221,8 @@ __wcschrnul (s, c) # define MEMCHR(S, C, N) memchr (S, C, N) # define STRCOLL(S1, S2) strcoll (S1, S2) # define WIDE_CHAR_VERSION 0 +# include <locale/weight.h> +# define FINDIDX findidx # include "fnmatch_loop.c" @@ -246,6 +248,12 @@ __wcschrnul (s, c) # define MEMCHR(S, C, N) wmemchr (S, C, N) # define STRCOLL(S1, S2) wcscoll (S1, S2) # define WIDE_CHAR_VERSION 1 +/* Change the name the header defines so it doesn't conflict with + the <locale/weight.h> version included above. */ +# define findidx findidxwc +# include <locale/weightwc.h> +# undef findidx +# define FINDIDX findidxwc # undef IS_CHAR_CLASS /* We have to convert the wide character string in a multibyte string. But diff --git a/posix/fnmatch_loop.c b/posix/fnmatch_loop.c index 1957397d24..db6d9d7c56 100644 --- a/posix/fnmatch_loop.c +++ b/posix/fnmatch_loop.c @@ -376,7 +376,7 @@ FCT (pattern, string, string_end, no_leading_period, flags, ends, alloca_used) const int32_t *table; # if WIDE_CHAR_VERSION const int32_t *weights; - const int32_t *extra; + const wint_t *extra; # else const unsigned char *weights; const unsigned char *extra; @@ -385,19 +385,12 @@ FCT (pattern, string, string_end, no_leading_period, flags, ends, alloca_used) int32_t idx; const UCHAR *cp = (const UCHAR *) str; - /* This #include defines a local function! */ -# if WIDE_CHAR_VERSION -# include <locale/weightwc.h> -# else -# include <locale/weight.h> -# endif - # if WIDE_CHAR_VERSION table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC); weights = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC); - extra = (const int32_t *) + extra = (const wint_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC); indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC); @@ -412,7 +405,7 @@ FCT (pattern, string, string_end, no_leading_period, flags, ends, alloca_used) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB); # endif - idx = findidx (&cp, 1); + idx = FINDIDX (table, indirect, extra, &cp, 1); if (idx != 0) { /* We found a table entry. Now see whether the @@ -422,7 +415,8 @@ FCT (pattern, string, string_end, no_leading_period, flags, ends, alloca_used) int32_t idx2; const UCHAR *np = (const UCHAR *) n; - idx2 = findidx (&np, string_end - n); + idx2 = FINDIDX (table, indirect, extra, + &np, string_end - n); if (idx2 != 0 && (idx >> 24) == (idx2 >> 24) && len == weights[idx2 & 0xffffff]) @@ -1277,3 +1271,4 @@ EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end, #undef L #undef BTOWC #undef WIDE_CHAR_VERSION +#undef FINDIDX diff --git a/posix/regcomp.c b/posix/regcomp.c index 8f2747b3c9..897fe276a3 100644 --- a/posix/regcomp.c +++ b/posix/regcomp.c @@ -19,6 +19,10 @@ #include <stdint.h> +#ifdef _LIBC +# include <locale/weight.h> +#endif + static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern, size_t length, reg_syntax_t syntax); static void re_compile_fastmap_iter (regex_t *bufp, @@ -3426,8 +3430,6 @@ build_equiv_class (bitset_t sbcset, const unsigned char *name) int32_t idx1, idx2; unsigned int ch; size_t len; - /* This #include defines a local function! */ -# include <locale/weight.h> /* Calculate the index for equivalence class. */ cp = name; table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); @@ -3437,7 +3439,7 @@ build_equiv_class (bitset_t sbcset, const unsigned char *name) _NL_COLLATE_EXTRAMB); indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB); - idx1 = findidx (&cp, -1); + idx1 = findidx (table, indirect, extra, &cp, -1); if (BE (idx1 == 0 || *cp != '\0', 0)) /* This isn't a valid character. */ return REG_ECOLLATE; @@ -3448,7 +3450,7 @@ build_equiv_class (bitset_t sbcset, const unsigned char *name) { char_buf[0] = ch; cp = char_buf; - idx2 = findidx (&cp, 1); + idx2 = findidx (table, indirect, extra, &cp, 1); /* idx2 = table[ch]; */ diff --git a/posix/regex_internal.h b/posix/regex_internal.h index 4bbf6a8276..d1ed3dc46a 100644 --- a/posix/regex_internal.h +++ b/posix/regex_internal.h @@ -733,6 +733,10 @@ re_string_wchar_at (const re_string_t *pstr, int idx) } # ifndef NOT_IN_libc +# ifdef _LIBC +# include <locale/weight.h> +# endif + static int internal_function __attribute__ ((pure, unused)) re_string_elem_size_at (const re_string_t *pstr, int idx) @@ -740,7 +744,6 @@ re_string_elem_size_at (const re_string_t *pstr, int idx) # ifdef _LIBC const unsigned char *p, *extra; const int32_t *table, *indirect; -# include <locale/weight.h> uint_fast32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); if (nrules != 0) @@ -751,7 +754,7 @@ re_string_elem_size_at (const re_string_t *pstr, int idx) indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB); p = pstr->mbs + idx; - findidx (&p, pstr->len - idx); + findidx (table, indirect, extra, &p, pstr->len - idx); return p - pstr->mbs - idx; } else diff --git a/posix/regexec.c b/posix/regexec.c index 7032da75aa..c840b38fc3 100644 --- a/posix/regexec.c +++ b/posix/regexec.c @@ -3749,6 +3749,10 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, one collating element like '.', '[a-z]', opposite to the other nodes can only accept one byte. */ +# ifdef _LIBC +# include <locale/weight.h> +# endif + static int internal_function check_node_accept_bytes (const re_dfa_t *dfa, int node_idx, @@ -3868,8 +3872,6 @@ check_node_accept_bytes (const re_dfa_t *dfa, int node_idx, const int32_t *table, *indirect; const unsigned char *weights, *extra; const char *collseqwc; - /* This #include defines a local function! */ -# include <locale/weight.h> /* match with collating_symbol? */ if (cset->ncoll_syms) @@ -3925,7 +3927,7 @@ check_node_accept_bytes (const re_dfa_t *dfa, int node_idx, _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB); indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB); - int32_t idx = findidx (&cp, elem_len); + int32_t idx = findidx (table, indirect, extra, &cp, elem_len); if (idx > 0) for (i = 0; i < cset->nequiv_classes; ++i) { diff --git a/stdio-common/Makefile b/stdio-common/Makefile index 24e8496f75..e5e45b6135 100644 --- a/stdio-common/Makefile +++ b/stdio-common/Makefile @@ -57,7 +57,7 @@ tests := tstscanf test_rdwr test-popen tstgetln test-fseek \ bug19 bug19a tst-popen2 scanf13 scanf14 scanf15 bug20 bug21 bug22 \ scanf16 scanf17 tst-setvbuf1 tst-grouping bug23 bug24 \ bug-vfprintf-nargs tst-long-dbl-fphex tst-fphex-wide tst-sprintf3 \ - bug25 tst-printf-round bug23-2 bug23-3 bug23-4 + bug25 tst-printf-round bug23-2 bug23-3 bug23-4 bug26 test-srcs = tst-unbputc tst-printf diff --git a/string/strcoll_l.c b/string/strcoll_l.c index 10ce4a67ce..d4f42a32e5 100644 --- a/string/strcoll_l.c +++ b/string/strcoll_l.c @@ -41,6 +41,7 @@ #define CONCAT1(a,b) a##b #include "../locale/localeinfo.h" +#include WEIGHT_H /* Track status while looking for sequences in a string. */ typedef struct @@ -152,7 +153,6 @@ get_next_seq (coll_seq *seq, int nrules, const unsigned char *rulesets, const USTRING_TYPE *weights, const int32_t *table, const USTRING_TYPE *extra, const int32_t *indirect) { -#include WEIGHT_H size_t val = seq->val = 0; int len = seq->len; size_t backw_stop = seq->backw_stop; @@ -194,7 +194,7 @@ get_next_seq (coll_seq *seq, int nrules, const unsigned char *rulesets, while (*us != L('\0')) { - int32_t tmp = findidx (&us, -1); + int32_t tmp = findidx (table, indirect, extra, &us, -1); rulearr[idxmax] = tmp >> 24; idxarr[idxmax] = tmp & 0xffffff; idxcnt = idxmax++; @@ -242,7 +242,6 @@ get_next_seq_nocache (coll_seq *seq, int nrules, const unsigned char *rulesets, const USTRING_TYPE *extra, const int32_t *indirect, int pass) { -#include WEIGHT_H size_t val = seq->val = 0; int len = seq->len; size_t backw_stop = seq->backw_stop; @@ -285,7 +284,7 @@ get_next_seq_nocache (coll_seq *seq, int nrules, const unsigned char *rulesets, us = seq->back_us; while (i < backw) { - int32_t tmp = findidx (&us, -1); + int32_t tmp = findidx (table, indirect, extra, &us, -1); idx = tmp & 0xffffff; i++; } @@ -300,7 +299,7 @@ get_next_seq_nocache (coll_seq *seq, int nrules, const unsigned char *rulesets, while (*us != L('\0')) { - int32_t tmp = findidx (&us, -1); + int32_t tmp = findidx (table, indirect, extra, &us, -1); unsigned char rule = tmp >> 24; prev_idx = idx; idx = tmp & 0xffffff; diff --git a/string/strxfrm_l.c b/string/strxfrm_l.c index 04b9338f05..95ffd6fdf1 100644 --- a/string/strxfrm_l.c +++ b/string/strxfrm_l.c @@ -40,8 +40,24 @@ #define CONCAT(a,b) CONCAT1(a,b) #define CONCAT1(a,b) a##b +/* Maximum string size that is calculated with cached indices. Right now this + is an arbitrary value open to optimizations. SMALL_STR_SIZE * 4 has to be + lower than __MAX_ALLOCA_CUTOFF. Keep localedata/xfrm-test.c in sync. */ +#define SMALL_STR_SIZE 4095 + #include "../locale/localeinfo.h" +#include WEIGHT_H +/* Group locale data for shorter parameter lists. */ +typedef struct +{ + uint_fast32_t nrules; + unsigned char *rulesets; + USTRING_TYPE *weights; + int32_t *table; + USTRING_TYPE *extra; + int32_t *indirect; +} locale_data_t; #ifndef WIDE_CHAR_VERSION @@ -80,115 +96,325 @@ utf8_encode (char *buf, int val) } #endif +/* Find next weight and rule index. Inlined since called for every char. */ +static __always_inline size_t +find_idx (const USTRING_TYPE **us, int32_t *weight_idx, + unsigned char *rule_idx, const locale_data_t *l_data, const int pass) +{ + int32_t tmp = findidx (l_data->table, l_data->indirect, l_data->extra, us, + -1); + *rule_idx = tmp >> 24; + int32_t idx = tmp & 0xffffff; + size_t len = l_data->weights[idx++]; + + /* Skip over indices of previous levels. */ + for (int i = 0; i < pass; i++) + { + idx += len; + len = l_data->weights[idx++]; + } -size_t -STRXFRM (STRING_TYPE *dest, const STRING_TYPE *src, size_t n, __locale_t l) + *weight_idx = idx; + return len; +} + +static int +find_position (const USTRING_TYPE *us, const locale_data_t *l_data, + const int pass) { - struct __locale_data *current = l->__locales[LC_COLLATE]; - uint_fast32_t nrules = current->values[_NL_ITEM_INDEX (_NL_COLLATE_NRULES)].word; - /* We don't assign the following values right away since it might be - unnecessary in case there are no rules. */ - const unsigned char *rulesets; - const int32_t *table; - const USTRING_TYPE *weights; - const USTRING_TYPE *extra; - const int32_t *indirect; + int32_t weight_idx; + unsigned char rule_idx; + const USTRING_TYPE *usrc = us; + + find_idx (&usrc, &weight_idx, &rule_idx, l_data, pass); + return l_data->rulesets[rule_idx * l_data->nrules + pass] & sort_position; +} + +/* Do the transformation. */ +static size_t +do_xfrm (const USTRING_TYPE *usrc, STRING_TYPE *dest, size_t n, + const locale_data_t *l_data) +{ + int32_t weight_idx; + unsigned char rule_idx; uint_fast32_t pass; - size_t needed; + size_t needed = 0; size_t last_needed; - const USTRING_TYPE *usrc; - size_t srclen = STRLEN (src); - int32_t *idxarr; - unsigned char *rulearr; - size_t idxmax; - size_t idxcnt; - int use_malloc; - -#include WEIGHT_H - if (nrules == 0) + /* Now the passes over the weights. */ + for (pass = 0; pass < l_data->nrules; ++pass) { - if (n != 0) - STPNCPY (dest, src, MIN (srclen + 1, n)); + size_t backw_len = 0; + last_needed = needed; + const USTRING_TYPE *cur = usrc; + const USTRING_TYPE *backw_start = NULL; - return srclen; - } + /* We assume that if a rule has defined `position' in one section + this is true for all of them. */ + int position = find_position (cur, l_data, pass); - rulesets = (const unsigned char *) - current->values[_NL_ITEM_INDEX (_NL_COLLATE_RULESETS)].string; - table = (const int32_t *) - current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_TABLE,SUFFIX))].string; - weights = (const USTRING_TYPE *) - current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_WEIGHT,SUFFIX))].string; - extra = (const USTRING_TYPE *) - current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_EXTRA,SUFFIX))].string; - indirect = (const int32_t *) - current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_INDIRECT,SUFFIX))].string; - use_malloc = 0; + if (position == 0) + { + while (*cur != L('\0')) + { + const USTRING_TYPE *pos = cur; + size_t len = find_idx (&cur, &weight_idx, &rule_idx, l_data, + pass); + int rule = l_data->rulesets[rule_idx * l_data->nrules + pass]; - assert (((uintptr_t) table) % __alignof__ (table[0]) == 0); - assert (((uintptr_t) weights) % __alignof__ (weights[0]) == 0); - assert (((uintptr_t) extra) % __alignof__ (extra[0]) == 0); - assert (((uintptr_t) indirect) % __alignof__ (indirect[0]) == 0); + if ((rule & sort_forward) != 0) + { + /* Handle the pushed backward sequence. */ + if (backw_start != NULL) + { + for (size_t i = backw_len; i > 0; ) + { + int32_t weight_idx; + unsigned char rule_idx; + size_t len = find_idx (&backw_start, &weight_idx, + &rule_idx, l_data, pass); + if (needed + i < n) + for (size_t j = len; j > 0; j--) + dest[needed + i - j] = + l_data->weights[weight_idx++]; + + i -= len; + } - /* Handle an empty string as a special case. */ - if (srclen == 0) - { - if (n != 0) - *dest = L('\0'); - return 0; - } + needed += backw_len; + backw_start = NULL; + backw_len = 0; + } - /* We need the elements of the string as unsigned values since they - are used as indeces. */ - usrc = (const USTRING_TYPE *) src; - - /* Perform the first pass over the string and while doing this find - and store the weights for each character. Since we want this to - be as fast as possible we are using `alloca' to store the temporary - values. But since there is no limit on the length of the string - we have to use `malloc' if the string is too long. We should be - very conservative here. */ - if (! __libc_use_alloca ((srclen + 1) * (sizeof (int32_t) + 1))) - { - idxarr = (int32_t *) malloc ((srclen + 1) * (sizeof (int32_t) + 1)); - rulearr = (unsigned char *) &idxarr[srclen]; - - if (idxarr == NULL) - /* No memory. Well, go with the stack then. - - XXX Once this implementation is stable we will handle this - differently. Instead of precomputing the indeces we will - do this in time. This means, though, that this happens for - every pass again. */ - goto try_stack; - use_malloc = 1; - } - else - { - try_stack: - idxarr = (int32_t *) alloca (srclen * sizeof (int32_t)); - rulearr = (unsigned char *) alloca (srclen + 1); + /* Now handle the forward element. */ + if (needed + len < n) + while (len-- > 0) + dest[needed++] = l_data->weights[weight_idx++]; + else + /* No more characters fit into the buffer. */ + needed += len; + } + else + { + /* Remember start of the backward sequence & track length. */ + if (backw_start == NULL) + backw_start = pos; + backw_len += len; + } + } + + + /* Handle the pushed backward sequence. */ + if (backw_start != NULL) + { + for (size_t i = backw_len; i > 0; ) + { + size_t len = find_idx (&backw_start, &weight_idx, &rule_idx, + l_data, pass); + if (needed + i < n) + for (size_t j = len; j > 0; j--) + dest[needed + i - j] = + l_data->weights[weight_idx++]; + + i -= len; + } + + needed += backw_len; + } + } + else + { + int val = 1; +#ifndef WIDE_CHAR_VERSION + char buf[7]; + size_t buflen; +#endif + size_t i; + + while (*cur != L('\0')) + { + const USTRING_TYPE *pos = cur; + size_t len = find_idx (&cur, &weight_idx, &rule_idx, l_data, + pass); + int rule = l_data->rulesets[rule_idx * l_data->nrules + pass]; + + if ((rule & sort_forward) != 0) + { + /* Handle the pushed backward sequence. */ + if (backw_start != NULL) + { + for (size_t p = backw_len; p > 0; p--) + { + size_t len; + int32_t weight_idx; + unsigned char rule_idx; + const USTRING_TYPE *backw_cur = backw_start; + + /* To prevent a warning init the used vars. */ + len = find_idx (&backw_cur, &weight_idx, + &rule_idx, l_data, pass); + + for (i = 1; i < p; i++) + len = find_idx (&backw_cur, &weight_idx, + &rule_idx, l_data, pass); + + if (len != 0) + { +#ifdef WIDE_CHAR_VERSION + if (needed + 1 + len < n) + { + dest[needed] = val; + for (i = 0; i < len; ++i) + dest[needed + 1 + i] = + l_data->weights[weight_idx + i]; + } + needed += 1 + len; +#else + buflen = utf8_encode (buf, val); + if (needed + buflen + len < n) + { + for (i = 0; i < buflen; ++i) + dest[needed + i] = buf[i]; + for (i = 0; i < len; ++i) + dest[needed + buflen + i] = + l_data->weights[weight_idx + i]; + } + needed += buflen + len; +#endif + val = 1; + } + else + ++val; + } + + backw_start = NULL; + backw_len = 0; + } + + /* Now handle the forward element. */ + if (len != 0) + { +#ifdef WIDE_CHAR_VERSION + if (needed + 1 + len < n) + { + dest[needed] = val; + for (i = 0; i < len; ++i) + dest[needed + 1 + i] = + l_data->weights[weight_idx + i]; + } + needed += 1 + len; +#else + buflen = utf8_encode (buf, val); + if (needed + buflen + len < n) + { + for (i = 0; i < buflen; ++i) + dest[needed + i] = buf[i]; + for (i = 0; i < len; ++i) + dest[needed + buflen + i] = + l_data->weights[weight_idx + i]; + } + needed += buflen + len; +#endif + val = 1; + } + else + ++val; + } + else + { + /* Remember start of the backward sequence & track length. */ + if (backw_start == NULL) + backw_start = pos; + backw_len++; + } + } + + /* Handle the pushed backward sequence. */ + if (backw_start != NULL) + { + for (size_t p = backw_len; p > 0; p--) + { + size_t len; + int32_t weight_idx; + unsigned char rule_idx; + const USTRING_TYPE *backw_cur = backw_start; + + /* To prevent a warning init the used vars. */ + len = find_idx (&backw_cur, &weight_idx, + &rule_idx, l_data, pass); + + for (i = 1; i < p; i++) + len = find_idx (&backw_cur, &weight_idx, + &rule_idx, l_data, pass); + + if (len != 0) + { +#ifdef WIDE_CHAR_VERSION + if (needed + 1 + len < n) + { + dest[needed] = val; + for (i = 0; i < len; ++i) + dest[needed + 1 + i] = + l_data->weights[weight_idx + i]; + } + needed += 1 + len; +#else + buflen = utf8_encode (buf, val); + if (needed + buflen + len < n) + { + for (i = 0; i < buflen; ++i) + dest[needed + i] = buf[i]; + for (i = 0; i < len; ++i) + dest[needed + buflen + i] = + l_data->weights[weight_idx + i]; + } + needed += buflen + len; +#endif + val = 1; + } + else + ++val; + } + } + } + + /* Finally store the byte to separate the passes or terminate + the string. */ + if (needed < n) + dest[needed] = pass + 1 < l_data->nrules ? L('\1') : L('\0'); + ++needed; } - idxmax = 0; - do + /* This is a little optimization: many collation specifications have + a `position' rule at the end and if no non-ignored character + is found the last \1 byte is immediately followed by a \0 byte + signalling this. We can avoid the \1 byte(s). */ + if (needed > 2 && needed == last_needed + 1) { - int32_t tmp = findidx (&usrc, -1); - rulearr[idxmax] = tmp >> 24; - idxarr[idxmax] = tmp & 0xffffff; - - ++idxmax; + /* Remove the \1 byte. */ + if (--needed <= n) + dest[needed - 1] = L('\0'); } - while (*usrc != L('\0')); - /* This element is only read, the value never used but to determine - another value which then is ignored. */ - rulearr[idxmax] = '\0'; + /* Return the number of bytes/words we need, but don't count the NUL + byte/word at the end. */ + return needed - 1; +} + +/* Do the transformation using weight-index and rule cache. */ +static size_t +do_xfrm_cached (STRING_TYPE *dest, size_t n, const locale_data_t *l_data, + size_t idxmax, int32_t *idxarr, const unsigned char *rulearr) +{ + uint_fast32_t nrules = l_data->nrules; + unsigned char *rulesets = l_data->rulesets; + USTRING_TYPE *weights = l_data->weights; + uint_fast32_t pass; + size_t needed = 0; + size_t last_needed; + size_t idxcnt; - /* Now the passes over the weights. We now use the indeces we found - before. */ - needed = 0; + /* Now the passes over the weights. */ for (pass = 0; pass < nrules; ++pass) { size_t backw_stop = ~0ul; @@ -434,14 +660,87 @@ STRXFRM (STRING_TYPE *dest, const STRING_TYPE *src, size_t n, __locale_t l) dest[needed - 1] = L('\0'); } - /* Free the memory if needed. */ - if (use_malloc) - free (idxarr); - /* Return the number of bytes/words we need, but don't count the NUL byte/word at the end. */ return needed - 1; } + +size_t +STRXFRM (STRING_TYPE *dest, const STRING_TYPE *src, size_t n, __locale_t l) +{ + locale_data_t l_data; + struct __locale_data *current = l->__locales[LC_COLLATE]; + l_data.nrules = current->values[_NL_ITEM_INDEX (_NL_COLLATE_NRULES)].word; + + /* Handle byte comparison case. */ + if (l_data.nrules == 0) + { + size_t srclen = STRLEN (src); + + if (n != 0) + STPNCPY (dest, src, MIN (srclen + 1, n)); + + return srclen; + } + + /* Handle an empty string, code hereafter relies on strlen (src) > 0. */ + if (*src == L('\0')) + { + if (n != 0) + *dest = L('\0'); + return 0; + } + + /* Get the locale data. */ + l_data.rulesets = (unsigned char *) + current->values[_NL_ITEM_INDEX (_NL_COLLATE_RULESETS)].string; + l_data.table = (int32_t *) + current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_TABLE,SUFFIX))].string; + l_data.weights = (USTRING_TYPE *) + current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_WEIGHT,SUFFIX))].string; + l_data.extra = (USTRING_TYPE *) + current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_EXTRA,SUFFIX))].string; + l_data.indirect = (int32_t *) + current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_INDIRECT,SUFFIX))].string; + + assert (((uintptr_t) l_data.table) % __alignof__ (l_data.table[0]) == 0); + assert (((uintptr_t) l_data.weights) % __alignof__ (l_data.weights[0]) == 0); + assert (((uintptr_t) l_data.extra) % __alignof__ (l_data.extra[0]) == 0); + assert (((uintptr_t) l_data.indirect) % __alignof__ (l_data.indirect[0]) == 0); + + /* We need the elements of the string as unsigned values since they + are used as indeces. */ + const USTRING_TYPE *usrc = (const USTRING_TYPE *) src; + + /* Allocate cache for small strings on the stack and fill it with weight and + rule indices. If the cache size is not sufficient, continue with the + uncached xfrm version. */ + size_t idxmax = 0; + const USTRING_TYPE *cur = usrc; + int32_t *idxarr = alloca (SMALL_STR_SIZE * sizeof (int32_t)); + unsigned char *rulearr = alloca (SMALL_STR_SIZE + 1); + + do + { + int32_t tmp = findidx (l_data.table, l_data.indirect, l_data.extra, &cur, + -1); + rulearr[idxmax] = tmp >> 24; + idxarr[idxmax] = tmp & 0xffffff; + + ++idxmax; + } + while (*cur != L('\0') && idxmax < SMALL_STR_SIZE); + + /* This element is only read, the value never used but to determine + another value which then is ignored. */ + rulearr[idxmax] = '\0'; + + /* Do the transformation. */ + if (*cur == L('\0')) + return do_xfrm_cached (dest, n, &l_data, idxmax, idxarr, rulearr); + else + return do_xfrm (usrc, dest, n, &l_data); +} libc_hidden_def (STRXFRM) #ifndef WIDE_CHAR_VERSION diff --git a/sysdeps/aarch64/start.S b/sysdeps/aarch64/start.S index 35d603ae9e..69b45eac44 100644 --- a/sysdeps/aarch64/start.S +++ b/sysdeps/aarch64/start.S @@ -47,7 +47,6 @@ _start: /* Create an initial frame with 0 LR and FP */ mov x29, #0 mov x30, #0 - mov x29, sp /* Setup rtld_fini in argument register */ mov x5, x0 diff --git a/sysdeps/unix/sysv/linux/mips/vfork.S b/sysdeps/unix/sysv/linux/mips/vfork.S index 80c362d6eb..2c1a747db3 100644 --- a/sysdeps/unix/sysv/linux/mips/vfork.S +++ b/sysdeps/unix/sysv/linux/mips/vfork.S @@ -108,3 +108,4 @@ L(error): libc_hidden_def(__vfork) weak_alias (__vfork, vfork) +strong_alias (__vfork, __libc_vfork) diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c index 2a6dcb78d8..f7c1bbe0db 100644 --- a/sysdeps/x86_64/multiarch/init-arch.c +++ b/sysdeps/x86_64/multiarch/init-arch.c @@ -167,9 +167,14 @@ __init_cpu_features (void) /* Determine if AVX is usable. */ if (CPUID_AVX) __cpu_features.feature[index_AVX_Usable] |= bit_AVX_Usable; - /* Determine if AVX2 is usable. */ +#if index_AVX2_Usable != index_AVX_Fast_Unaligned_Load +# error index_AVX2_Usable != index_AVX_Fast_Unaligned_Load +#endif + /* Determine if AVX2 is usable. Unaligned load with 256-bit + AVX registers are faster on processors with AVX2. */ if (CPUID_AVX2) - __cpu_features.feature[index_AVX2_Usable] |= bit_AVX2_Usable; + __cpu_features.feature[index_AVX2_Usable] + |= bit_AVX2_Usable | bit_AVX_Fast_Unaligned_Load; /* Determine if FMA is usable. */ if (CPUID_FMA) __cpu_features.feature[index_FMA_Usable] |= bit_FMA_Usable; diff --git a/sysdeps/x86_64/multiarch/init-arch.h b/sysdeps/x86_64/multiarch/init-arch.h index ef0abbd226..2fc7c7ceec 100644 --- a/sysdeps/x86_64/multiarch/init-arch.h +++ b/sysdeps/x86_64/multiarch/init-arch.h @@ -25,6 +25,7 @@ #define bit_FMA4_Usable (1 << 8) #define bit_Slow_SSE4_2 (1 << 9) #define bit_AVX2_Usable (1 << 10) +#define bit_AVX_Fast_Unaligned_Load (1 << 11) /* CPUID Feature flags. */ @@ -74,6 +75,7 @@ # define index_FMA4_Usable FEATURE_INDEX_1*FEATURE_SIZE # define index_Slow_SSE4_2 FEATURE_INDEX_1*FEATURE_SIZE # define index_AVX2_Usable FEATURE_INDEX_1*FEATURE_SIZE +# define index_AVX_Fast_Unaligned_Load FEATURE_INDEX_1*FEATURE_SIZE #else /* __ASSEMBLER__ */ @@ -169,6 +171,7 @@ extern const struct cpu_features *__get_cpu_features (void) # define index_FMA4_Usable FEATURE_INDEX_1 # define index_Slow_SSE4_2 FEATURE_INDEX_1 # define index_AVX2_Usable FEATURE_INDEX_1 +# define index_AVX_Fast_Unaligned_Load FEATURE_INDEX_1 # define HAS_ARCH_FEATURE(name) \ ((__get_cpu_features ()->feature[index_##name] & (bit_##name)) != 0) @@ -181,5 +184,6 @@ extern const struct cpu_features *__get_cpu_features (void) # define HAS_AVX2 HAS_ARCH_FEATURE (AVX2_Usable) # define HAS_FMA HAS_ARCH_FEATURE (FMA_Usable) # define HAS_FMA4 HAS_ARCH_FEATURE (FMA4_Usable) +# define HAS_AVX_FAST_UNALIGNED_LOAD HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load) #endif /* __ASSEMBLER__ */ diff --git a/sysdeps/x86_64/multiarch/memcpy.S b/sysdeps/x86_64/multiarch/memcpy.S index e666695407..10bbd39631 100644 --- a/sysdeps/x86_64/multiarch/memcpy.S +++ b/sysdeps/x86_64/multiarch/memcpy.S @@ -33,7 +33,7 @@ ENTRY(__new_memcpy) jne 1f call __init_cpu_features 1: leaq __memcpy_avx_unaligned(%rip), %rax - testl $bit_AVX_Usable, __cpu_features+FEATURE_OFFSET+index_AVX_Usable(%rip) + testl $bit_AVX_Fast_Unaligned_Load, __cpu_features+FEATURE_OFFSET+index_AVX_Fast_Unaligned_Load(%rip) jz 1f ret 1: leaq __memcpy_sse2(%rip), %rax diff --git a/sysdeps/x86_64/multiarch/memcpy_chk.S b/sysdeps/x86_64/multiarch/memcpy_chk.S index 076b19a9ea..30cca20330 100644 --- a/sysdeps/x86_64/multiarch/memcpy_chk.S +++ b/sysdeps/x86_64/multiarch/memcpy_chk.S @@ -39,7 +39,7 @@ ENTRY(__memcpy_chk) testl $bit_Fast_Copy_Backward, __cpu_features+FEATURE_OFFSET+index_Fast_Copy_Backward(%rip) jz 2f leaq __memcpy_chk_ssse3_back(%rip), %rax - testl $bit_AVX_Usable, __cpu_features+FEATURE_OFFSET+index_AVX_Usable(%rip) + testl $bit_AVX_Fast_Unaligned_Load, __cpu_features+FEATURE_OFFSET+index_AVX_Fast_Unaligned_Load(%rip) jz 2f leaq __memcpy_chk_avx_unaligned(%rip), %rax 2: ret diff --git a/sysdeps/x86_64/multiarch/memmove.c b/sysdeps/x86_64/multiarch/memmove.c index 0c9af7e4df..2c86a4a476 100644 --- a/sysdeps/x86_64/multiarch/memmove.c +++ b/sysdeps/x86_64/multiarch/memmove.c @@ -49,7 +49,7 @@ extern __typeof (__redirect_memmove) __memmove_avx_unaligned attribute_hidden; ifunc symbol properly. */ extern __typeof (__redirect_memmove) __libc_memmove; libc_ifunc (__libc_memmove, - HAS_AVX + HAS_AVX_FAST_UNALIGNED_LOAD ? __memmove_avx_unaligned : (HAS_SSSE3 ? (HAS_FAST_COPY_BACKWARD diff --git a/sysdeps/x86_64/multiarch/memmove_chk.c b/sysdeps/x86_64/multiarch/memmove_chk.c index 44344f2820..5ffcaecce4 100644 --- a/sysdeps/x86_64/multiarch/memmove_chk.c +++ b/sysdeps/x86_64/multiarch/memmove_chk.c @@ -30,7 +30,7 @@ extern __typeof (__memmove_chk) __memmove_chk_avx_unaligned attribute_hidden; #include "debug/memmove_chk.c" libc_ifunc (__memmove_chk, - HAS_AVX ? __memmove_chk_avx_unaligned : + HAS_AVX_FAST_UNALIGNED_LOAD ? __memmove_chk_avx_unaligned : (HAS_SSSE3 ? (HAS_FAST_COPY_BACKWARD ? __memmove_chk_ssse3_back : __memmove_chk_ssse3) diff --git a/sysdeps/x86_64/multiarch/mempcpy.S b/sysdeps/x86_64/multiarch/mempcpy.S index 7589d8c1ec..e205ef5557 100644 --- a/sysdeps/x86_64/multiarch/mempcpy.S +++ b/sysdeps/x86_64/multiarch/mempcpy.S @@ -37,7 +37,7 @@ ENTRY(__mempcpy) testl $bit_Fast_Copy_Backward, __cpu_features+FEATURE_OFFSET+index_Fast_Copy_Backward(%rip) jz 2f leaq __mempcpy_ssse3_back(%rip), %rax - testl $bit_AVX_Usable, __cpu_features+FEATURE_OFFSET+index_AVX_Usable(%rip) + testl $bit_AVX_Fast_Unaligned_Load, __cpu_features+FEATURE_OFFSET+index_AVX_Fast_Unaligned_Load(%rip) jz 2f leaq __mempcpy_avx_unaligned(%rip), %rax 2: ret diff --git a/sysdeps/x86_64/multiarch/mempcpy_chk.S b/sysdeps/x86_64/multiarch/mempcpy_chk.S index 88e0b74e83..dd777dfa48 100644 --- a/sysdeps/x86_64/multiarch/mempcpy_chk.S +++ b/sysdeps/x86_64/multiarch/mempcpy_chk.S @@ -39,7 +39,7 @@ ENTRY(__mempcpy_chk) testl $bit_Fast_Copy_Backward, __cpu_features+FEATURE_OFFSET+index_Fast_Copy_Backward(%rip) jz 2f leaq __mempcpy_chk_ssse3_back(%rip), %rax - testl $bit_AVX_Usable, __cpu_features+FEATURE_OFFSET+index_AVX_Usable(%rip) + testl $bit_AVX_Fast_Unaligned_Load, __cpu_features+FEATURE_OFFSET+index_AVX_Fast_Unaligned_Load(%rip) jz 2f leaq __mempcpy_chk_avx_unaligned(%rip), %rax 2: ret |