diff options
author | H.J. Lu <hjl.tools@gmail.com> | 2017-06-15 08:33:25 -0700 |
---|---|---|
committer | H.J. Lu <hjl.tools@gmail.com> | 2017-06-15 08:33:35 -0700 |
commit | 93e46f87735e59c15eece22a086948519d129e1c (patch) | |
tree | 7c73a44054e32b5f7b1b3a31fa8246d014339919 /sysdeps/x86_64/multiarch | |
parent | 2c0b90ab443abc967cbf75add4f7fde84978cb95 (diff) | |
download | glibc-93e46f87735e59c15eece22a086948519d129e1c.tar.gz glibc-93e46f87735e59c15eece22a086948519d129e1c.tar.xz glibc-93e46f87735e59c15eece22a086948519d129e1c.zip |
x86-64: Implement memset family IFUNC selectors in C
Implement memset family IFUNC selectors in C. All internal calls within libc.so can use IFUNC on x86-64 since unlike x86, x86-64 supports PC-relative addressing to access the GOT entry so that it can call via PLT without using an extra register. For libc.a, we can't use IFUNC for functions which are called before IFUNC has been initialized. Use IFUNC internally reduces the icache footprint since libc.so and other codes in the process use the same implementations. This patch uses IFUNC for memset functions within libc. 2017-06-07 H.J. Lu <hongjiu.lu@intel.com> Erich Elsen <eriche@google.com> * sysdeps/x86_64/multiarch/Makefile (sysdep_routines): Add memset-sse2-unaligned-erms, and memset_chk-nonshared. * sysdeps/x86_64/multiarch/ifunc-impl-list.c (__libc_ifunc_impl_list): Add test for __memset_chk_erms. Update comments. * sysdeps/x86_64/multiarch/ifunc-memset.h: New file. * sysdeps/x86_64/multiarch/memset-sse2-unaligned-erms.S: Likewise. * sysdeps/x86_64/multiarch/memset.c: Likewise. * sysdeps/x86_64/multiarch/memset_chk-nonshared.S: Likewise. * sysdeps/x86_64/multiarch/memset_chk.c: Likewise. * sysdeps/x86_64/multiarch/memset.S: Removed. * sysdeps/x86_64/multiarch/memset_chk.S: Likewise. * sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S (__memset_chk_erms): New function.
Diffstat (limited to 'sysdeps/x86_64/multiarch')
-rw-r--r-- | sysdeps/x86_64/multiarch/Makefile | 3 | ||||
-rw-r--r-- | sysdeps/x86_64/multiarch/ifunc-impl-list.c | 6 | ||||
-rw-r--r-- | sysdeps/x86_64/multiarch/ifunc-memset.h | 69 | ||||
-rw-r--r-- | sysdeps/x86_64/multiarch/memset-sse2-unaligned-erms.S | 41 | ||||
-rw-r--r-- | sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S | 5 | ||||
-rw-r--r-- | sysdeps/x86_64/multiarch/memset.S | 82 | ||||
-rw-r--r-- | sysdeps/x86_64/multiarch/memset.c | 37 | ||||
-rw-r--r-- | sysdeps/x86_64/multiarch/memset_chk-nonshared.S | 21 | ||||
-rw-r--r-- | sysdeps/x86_64/multiarch/memset_chk.S | 61 | ||||
-rw-r--r-- | sysdeps/x86_64/multiarch/memset_chk.c | 31 | ||||
-rw-r--r-- | sysdeps/x86_64/multiarch/wmemset.c | 7 | ||||
-rw-r--r-- | sysdeps/x86_64/multiarch/wmemset_chk-nonshared.S | 2 |
12 files changed, 218 insertions, 147 deletions
diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile index 6e70993a45..f577175f3d 100644 --- a/sysdeps/x86_64/multiarch/Makefile +++ b/sysdeps/x86_64/multiarch/Makefile @@ -29,6 +29,7 @@ sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 \ memmove-sse2-unaligned-erms \ memmove-avx-unaligned-erms \ memmove-avx512-unaligned-erms \ + memset-sse2-unaligned-erms \ memset-avx2-unaligned-erms \ memset-avx512-unaligned-erms CFLAGS-varshift.c += -msse4 @@ -50,6 +51,6 @@ endif ifeq ($(subdir),debug) sysdep_routines += memcpy_chk-nonshared mempcpy_chk-nonshared \ - memmove_chk-nonshared \ + memmove_chk-nonshared memset_chk-nonshared \ wmemset_chk-nonshared endif diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c index 3462e67cc7..52ea44018e 100644 --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c @@ -121,9 +121,11 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __memrchr_avx2) IFUNC_IMPL_ADD (array, i, memrchr, 1, __memrchr_sse2)) - /* Support sysdeps/x86_64/multiarch/memset_chk.S. */ + /* Support sysdeps/x86_64/multiarch/memset_chk.c. */ IFUNC_IMPL (i, name, __memset_chk, IFUNC_IMPL_ADD (array, i, __memset_chk, 1, + __memset_chk_erms) + IFUNC_IMPL_ADD (array, i, __memset_chk, 1, __memset_chk_sse2_unaligned) IFUNC_IMPL_ADD (array, i, __memset_chk, 1, __memset_chk_sse2_unaligned_erms) @@ -144,7 +146,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __memset_chk_avx512_no_vzeroupper) ) - /* Support sysdeps/x86_64/multiarch/memset.S. */ + /* Support sysdeps/x86_64/multiarch/memset.c. */ IFUNC_IMPL (i, name, memset, IFUNC_IMPL_ADD (array, i, memset, 1, __memset_sse2_unaligned) diff --git a/sysdeps/x86_64/multiarch/ifunc-memset.h b/sysdeps/x86_64/multiarch/ifunc-memset.h new file mode 100644 index 0000000000..5c7eb7df9c --- /dev/null +++ b/sysdeps/x86_64/multiarch/ifunc-memset.h @@ -0,0 +1,69 @@ +/* Common definition for memset/memset_chk ifunc selections. + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <init-arch.h> + +extern __typeof (REDIRECT_NAME) OPTIMIZE (erms) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned) + attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned_erms) + attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned_erms) + attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned) + attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned_erms) + attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_no_vzeroupper) + attribute_hidden; + +static inline void * +IFUNC_SELECTOR (void) +{ + const struct cpu_features* cpu_features = __get_cpu_features (); + + if (CPU_FEATURES_ARCH_P (cpu_features, Prefer_ERMS)) + return OPTIMIZE (erms); + + if (CPU_FEATURES_ARCH_P (cpu_features, AVX512F_Usable) + && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512)) + { + if (CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) + return OPTIMIZE (avx512_no_vzeroupper); + + if (CPU_FEATURES_CPU_P (cpu_features, ERMS)) + return OPTIMIZE (avx512_unaligned_erms); + + return OPTIMIZE (avx512_unaligned); + } + + if (CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable)) + { + if (CPU_FEATURES_CPU_P (cpu_features, ERMS)) + return OPTIMIZE (avx2_unaligned_erms); + else + return OPTIMIZE (avx2_unaligned); + } + + if (CPU_FEATURES_CPU_P (cpu_features, ERMS)) + return OPTIMIZE (sse2_unaligned_erms); + + return OPTIMIZE (sse2_unaligned); +} diff --git a/sysdeps/x86_64/multiarch/memset-sse2-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-sse2-unaligned-erms.S new file mode 100644 index 0000000000..99f5a36e3c --- /dev/null +++ b/sysdeps/x86_64/multiarch/memset-sse2-unaligned-erms.S @@ -0,0 +1,41 @@ +/* memset with SSE2. + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <shlib-compat.h> +#include <init-arch.h> + +#if IS_IN (libc) +# define MEMSET_SYMBOL(p,s) p##_sse2_##s +# define WMEMSET_SYMBOL(p,s) p##_sse2_##s + +# ifdef SHARED +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) +# endif + +# undef weak_alias +# define weak_alias(original, alias) \ + .weak bzero; bzero = __bzero + +# undef strong_alias +# define strong_alias(ignored1, ignored2) +#endif + +#include <sysdeps/x86_64/memset.S> diff --git a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S index 2eb9e3744e..8ed470283e 100644 --- a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S +++ b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S @@ -121,6 +121,11 @@ L(entry_from_bzero): END (MEMSET_SYMBOL (__memset, unaligned)) # if VEC_SIZE == 16 +ENTRY (__memset_chk_erms) + cmpq %rdx, %rcx + jb HIDDEN_JUMPTARGET (__chk_fail) +END (__memset_chk_erms) + /* Only used to measure performance of REP STOSB. */ ENTRY (__memset_erms) # else diff --git a/sysdeps/x86_64/multiarch/memset.S b/sysdeps/x86_64/multiarch/memset.S deleted file mode 100644 index 11f27378b0..0000000000 --- a/sysdeps/x86_64/multiarch/memset.S +++ /dev/null @@ -1,82 +0,0 @@ -/* Multiple versions of memset - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2014-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <shlib-compat.h> -#include <init-arch.h> - -/* Define multiple versions only for the definition in lib. */ -#if IS_IN (libc) -ENTRY(memset) - .type memset, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - lea __memset_erms(%rip), %RAX_LP - HAS_ARCH_FEATURE (Prefer_ERMS) - jnz 2f - lea __memset_sse2_unaligned_erms(%rip), %RAX_LP - HAS_CPU_FEATURE (ERMS) - jnz 1f - lea __memset_sse2_unaligned(%rip), %RAX_LP -1: - HAS_ARCH_FEATURE (AVX2_Usable) - jz 2f - lea __memset_avx2_unaligned_erms(%rip), %RAX_LP - HAS_CPU_FEATURE (ERMS) - jnz L(AVX512F) - lea __memset_avx2_unaligned(%rip), %RAX_LP -L(AVX512F): - HAS_ARCH_FEATURE (Prefer_No_AVX512) - jnz 2f - HAS_ARCH_FEATURE (AVX512F_Usable) - jz 2f - lea __memset_avx512_no_vzeroupper(%rip), %RAX_LP - HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER) - jnz 2f - lea __memset_avx512_unaligned_erms(%rip), %RAX_LP - HAS_CPU_FEATURE (ERMS) - jnz 2f - lea __memset_avx512_unaligned(%rip), %RAX_LP -2: ret -END(memset) -#endif - -#if IS_IN (libc) -# define MEMSET_SYMBOL(p,s) p##_sse2_##s -# define WMEMSET_SYMBOL(p,s) p##_sse2_##s - -# ifdef SHARED -# undef libc_hidden_builtin_def -/* It doesn't make sense to send libc-internal memset calls through a PLT. - The speedup we get from using SSE2 instructions is likely eaten away - by the indirect call in the PLT. */ -# define libc_hidden_builtin_def(name) \ - .globl __GI_memset; __GI_memset = __memset_sse2_unaligned; \ - .globl __GI_wmemset; __GI_wmemset = __wmemset_sse2_unaligned; \ - .globl __GI___wmemset; __GI___wmemset = __wmemset_sse2_unaligned -# endif - -# undef weak_alias -# define weak_alias(original, alias) \ - .weak bzero; bzero = __bzero - -# undef strong_alias -# define strong_alias(original, alias) -#endif - -#include "../memset.S" diff --git a/sysdeps/x86_64/multiarch/memset.c b/sysdeps/x86_64/multiarch/memset.c new file mode 100644 index 0000000000..e3f5eaa319 --- /dev/null +++ b/sysdeps/x86_64/multiarch/memset.c @@ -0,0 +1,37 @@ +/* Multiple versions of memset. + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Define multiple versions only for the definition in lib and for + DSO. In static binaries we need memset before the initialization + happened. */ +#if IS_IN (libc) +# define memset __redirect_memset +# include <string.h> +# undef memset + +# define SYMBOL_NAME memset +# include "ifunc-memset.h" + +libc_ifunc_redirected (__redirect_memset, memset, IFUNC_SELECTOR ()); + +# ifdef SHARED +__hidden_ver1 (memset, __GI_memset, __redirect_memset) + __attribute__ ((visibility ("hidden"))); +# endif +#endif diff --git a/sysdeps/x86_64/multiarch/memset_chk-nonshared.S b/sysdeps/x86_64/multiarch/memset_chk-nonshared.S new file mode 100644 index 0000000000..9a5685c1cd --- /dev/null +++ b/sysdeps/x86_64/multiarch/memset_chk-nonshared.S @@ -0,0 +1,21 @@ +/* Non-shared version of memcpy_chk for x86-64. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) && !defined SHARED +# include <sysdeps/x86_64/memset_chk.S> +#endif diff --git a/sysdeps/x86_64/multiarch/memset_chk.S b/sysdeps/x86_64/multiarch/memset_chk.S deleted file mode 100644 index 7e08311cdf..0000000000 --- a/sysdeps/x86_64/multiarch/memset_chk.S +++ /dev/null @@ -1,61 +0,0 @@ -/* Multiple versions of memset_chk - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2014-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - -/* Define multiple versions only for the definition in lib. */ -#if IS_IN (libc) -# ifdef SHARED -ENTRY(__memset_chk) - .type __memset_chk, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - lea __memset_chk_sse2_unaligned_erms(%rip), %RAX_LP - HAS_CPU_FEATURE (ERMS) - jnz 1f - lea __memset_chk_sse2_unaligned(%rip), %RAX_LP -1: - HAS_ARCH_FEATURE (AVX2_Usable) - jz 2f - lea __memset_chk_avx2_unaligned_erms(%rip), %RAX_LP - HAS_CPU_FEATURE (ERMS) - jnz L(AVX512F) - lea __memset_chk_avx2_unaligned(%rip), %RAX_LP -L(AVX512F): - HAS_ARCH_FEATURE (Prefer_No_AVX512) - jnz 2f - HAS_ARCH_FEATURE (AVX512F_Usable) - jz 2f - lea __memset_chk_avx512_no_vzeroupper(%rip), %RAX_LP - HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER) - jnz 2f - lea __memset_chk_avx512_unaligned_erms(%rip), %RAX_LP - HAS_CPU_FEATURE (ERMS) - jnz 2f - lea __memset_chk_avx512_unaligned(%rip), %RAX_LP -2: ret -END(__memset_chk) - -strong_alias (__memset_chk, __memset_zero_constant_len_parameter) - .section .gnu.warning.__memset_zero_constant_len_parameter - .string "memset used with constant zero length parameter; this could be due to transposed parameters" -# else -# include "../memset_chk.S" -# endif -#endif diff --git a/sysdeps/x86_64/multiarch/memset_chk.c b/sysdeps/x86_64/multiarch/memset_chk.c new file mode 100644 index 0000000000..f1a1c8a44d --- /dev/null +++ b/sysdeps/x86_64/multiarch/memset_chk.c @@ -0,0 +1,31 @@ +/* Multiple versions of __memset_chk + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Define multiple versions only for the definition in libc.so. */ +#if IS_IN (libc) && defined SHARED +# define __memset_chk __redirect_memset_chk +# include <string.h> +# undef __memset_chk + +# define SYMBOL_NAME memset_chk +# include "ifunc-memset.h" + +libc_ifunc_redirected (__redirect_memset_chk, __memset_chk, + IFUNC_SELECTOR ()); +#endif diff --git a/sysdeps/x86_64/multiarch/wmemset.c b/sysdeps/x86_64/multiarch/wmemset.c index dd35be6e49..b5bc2f97a9 100644 --- a/sysdeps/x86_64/multiarch/wmemset.c +++ b/sysdeps/x86_64/multiarch/wmemset.c @@ -30,4 +30,11 @@ libc_ifunc_redirected (__redirect_wmemset, __wmemset, IFUNC_SELECTOR ()); weak_alias (__wmemset, wmemset) + +# ifdef SHARED +__hidden_ver1 (__wmemset, __GI___wmemset, __redirect___wmemset) + __attribute__ ((visibility ("hidden"))); +__hidden_ver1 (wmemset, __GI_wmemset, __redirect_wmemset) + __attribute__ ((visibility ("hidden"))); +# endif #endif diff --git a/sysdeps/x86_64/multiarch/wmemset_chk-nonshared.S b/sysdeps/x86_64/multiarch/wmemset_chk-nonshared.S index 0a537fe272..19cee720a6 100644 --- a/sysdeps/x86_64/multiarch/wmemset_chk-nonshared.S +++ b/sysdeps/x86_64/multiarch/wmemset_chk-nonshared.S @@ -17,5 +17,5 @@ <http://www.gnu.org/licenses/>. */ #if IS_IN (libc) && !defined SHARED -# include "../wmemset_chk.S" +# include <sysdeps/x86_64/wmemset_chk.S> #endif |