diff options
-rw-r--r-- | sysdeps/x86_64/multiarch/Makefile | 2 | ||||
-rw-r--r-- | sysdeps/x86_64/multiarch/ifunc-impl-list.c | 14 | ||||
-rw-r--r-- | sysdeps/x86_64/multiarch/strrchr-avx2.S | 235 | ||||
-rw-r--r-- | sysdeps/x86_64/multiarch/strrchr-sse2.S | 35 | ||||
-rw-r--r-- | sysdeps/x86_64/multiarch/strrchr.c | 30 | ||||
-rw-r--r-- | sysdeps/x86_64/multiarch/wcsrchr-avx2.S | 3 | ||||
-rw-r--r-- | sysdeps/x86_64/multiarch/wcsrchr-sse2.S | 23 | ||||
-rw-r--r-- | sysdeps/x86_64/multiarch/wcsrchr.c | 29 |
8 files changed, 371 insertions, 0 deletions
diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile index 60c586ca76..6baf2dbc7c 100644 --- a/sysdeps/x86_64/multiarch/Makefile +++ b/sysdeps/x86_64/multiarch/Makefile @@ -15,6 +15,7 @@ sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 \ memmove-ssse3-back \ memmove-avx512-no-vzeroupper strcasecmp_l-ssse3 \ strchr-sse2 strchrnul-sse2 strchr-avx2 strchrnul-avx2 \ + strrchr-sse2 strrchr-avx2 \ strlen-sse2 strnlen-sse2 strlen-avx2 strnlen-avx2 \ strncase_l-ssse3 strcat-ssse3 strncat-ssse3\ strcpy-ssse3 strncpy-ssse3 stpcpy-ssse3 stpncpy-ssse3 \ @@ -40,6 +41,7 @@ sysdep_routines += wmemcmp-sse4 wmemcmp-ssse3 wmemcmp-c \ wmemcmp-avx2-movbe \ wcscpy-ssse3 wcscpy-c \ wcschr-sse2 wcschr-avx2 \ + wcsrchr-sse2 wcsrchr-avx2 \ wcslen-sse2 wcsnlen-sse2 wcslen-avx2 wcsnlen-avx2 endif diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c index 86bd4ab445..c2578cb38c 100644 --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c @@ -251,6 +251,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __strchrnul_avx2) IFUNC_IMPL_ADD (array, i, strchrnul, 1, __strchrnul_sse2)) + /* Support sysdeps/x86_64/multiarch/strrchr.S. */ + IFUNC_IMPL (i, name, strrchr, + IFUNC_IMPL_ADD (array, i, strrchr, + HAS_ARCH_FEATURE (AVX2_Usable), + __strrchr_avx2) + IFUNC_IMPL_ADD (array, i, strrchr, 1, __strrchr_sse2)) + /* Support sysdeps/x86_64/multiarch/strcmp.S. */ IFUNC_IMPL (i, name, strcmp, IFUNC_IMPL_ADD (array, i, strcmp, HAS_CPU_FEATURE (SSE4_2), @@ -342,6 +349,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __wcschr_avx2) IFUNC_IMPL_ADD (array, i, wcschr, 1, __wcschr_sse2)) + /* Support sysdeps/x86_64/multiarch/wcsrchr.S. */ + IFUNC_IMPL (i, name, wcsrchr, + IFUNC_IMPL_ADD (array, i, wcsrchr, + HAS_ARCH_FEATURE (AVX2_Usable), + __wcsrchr_avx2) + IFUNC_IMPL_ADD (array, i, wcsrchr, 1, __wcsrchr_sse2)) + /* Support sysdeps/x86_64/multiarch/wcscpy.S. */ IFUNC_IMPL (i, name, wcscpy, IFUNC_IMPL_ADD (array, i, wcscpy, HAS_CPU_FEATURE (SSSE3), diff --git a/sysdeps/x86_64/multiarch/strrchr-avx2.S b/sysdeps/x86_64/multiarch/strrchr-avx2.S new file mode 100644 index 0000000000..36ef660b2f --- /dev/null +++ b/sysdeps/x86_64/multiarch/strrchr-avx2.S @@ -0,0 +1,235 @@ +/* strrchr/wcsrchr optimized with AVX2. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) + +# include <sysdep.h> + +# ifndef STRRCHR +# define STRRCHR __strrchr_avx2 +# endif + +# ifdef USE_AS_WCSRCHR +# define VPBROADCAST vpbroadcastd +# define VPCMPEQ vpcmpeqd +# else +# define VPBROADCAST vpbroadcastb +# define VPCMPEQ vpcmpeqb +# endif + +# ifndef VZEROUPPER +# define VZEROUPPER vzeroupper +# endif + +# define VEC_SIZE 32 + + .section .text.avx,"ax",@progbits +ENTRY (STRRCHR) + movd %esi, %xmm4 + movl %edi, %ecx + /* Broadcast CHAR to YMM4. */ + VPBROADCAST %xmm4, %ymm4 + vpxor %ymm0, %ymm0, %ymm0 + + /* Check if we may cross page boundary with one vector load. */ + andl $(2 * VEC_SIZE - 1), %ecx + cmpl $VEC_SIZE, %ecx + ja L(cros_page_boundary) + + vmovdqu (%rdi), %ymm1 + VPCMPEQ %ymm1, %ymm0, %ymm2 + VPCMPEQ %ymm1, %ymm4, %ymm3 + vpmovmskb %ymm2, %ecx + vpmovmskb %ymm3, %eax + addq $VEC_SIZE, %rdi + + testl %eax, %eax + jnz L(first_vec) + + testl %ecx, %ecx + jnz L(return_null) + + andq $-VEC_SIZE, %rdi + xorl %edx, %edx + jmp L(aligned_loop) + + .p2align 4 +L(first_vec): + /* Check if there is a nul CHAR. */ + testl %ecx, %ecx + jnz L(char_and_nul_in_first_vec) + + /* Remember the match and keep searching. */ + movl %eax, %edx + movq %rdi, %rsi + andq $-VEC_SIZE, %rdi + jmp L(aligned_loop) + + .p2align 4 +L(cros_page_boundary): + andl $(VEC_SIZE - 1), %ecx + andq $-VEC_SIZE, %rdi + vmovdqa (%rdi), %ymm1 + VPCMPEQ %ymm1, %ymm0, %ymm2 + VPCMPEQ %ymm1, %ymm4, %ymm3 + vpmovmskb %ymm2, %edx + vpmovmskb %ymm3, %eax + shrl %cl, %edx + shrl %cl, %eax + addq $VEC_SIZE, %rdi + + /* Check if there is a CHAR. */ + testl %eax, %eax + jnz L(found_char) + + testl %edx, %edx + jnz L(return_null) + + jmp L(aligned_loop) + + .p2align 4 +L(found_char): + testl %edx, %edx + jnz L(char_and_nul) + + /* Remember the match and keep searching. */ + movl %eax, %edx + leaq (%rdi, %rcx), %rsi + + .p2align 4 +L(aligned_loop): + vmovdqa (%rdi), %ymm1 + VPCMPEQ %ymm1, %ymm0, %ymm2 + addq $VEC_SIZE, %rdi + VPCMPEQ %ymm1, %ymm4, %ymm3 + vpmovmskb %ymm2, %ecx + vpmovmskb %ymm3, %eax + orl %eax, %ecx + jnz L(char_nor_null) + + vmovdqa (%rdi), %ymm1 + VPCMPEQ %ymm1, %ymm0, %ymm2 + add $VEC_SIZE, %rdi + VPCMPEQ %ymm1, %ymm4, %ymm3 + vpmovmskb %ymm2, %ecx + vpmovmskb %ymm3, %eax + orl %eax, %ecx + jnz L(char_nor_null) + + vmovdqa (%rdi), %ymm1 + VPCMPEQ %ymm1, %ymm0, %ymm2 + addq $VEC_SIZE, %rdi + VPCMPEQ %ymm1, %ymm4, %ymm3 + vpmovmskb %ymm2, %ecx + vpmovmskb %ymm3, %eax + orl %eax, %ecx + jnz L(char_nor_null) + + vmovdqa (%rdi), %ymm1 + VPCMPEQ %ymm1, %ymm0, %ymm2 + addq $VEC_SIZE, %rdi + VPCMPEQ %ymm1, %ymm4, %ymm3 + vpmovmskb %ymm2, %ecx + vpmovmskb %ymm3, %eax + orl %eax, %ecx + jz L(aligned_loop) + + .p2align 4 +L(char_nor_null): + /* Find a CHAR or a nul CHAR in a loop. */ + testl %eax, %eax + jnz L(match) +L(return_value): + testl %edx, %edx + jz L(return_null) + movl %edx, %eax + movq %rsi, %rdi + +# ifdef USE_AS_WCSRCHR + /* Keep the first bit for each matching CHAR for bsr. */ + andl $0x11111111, %eax +# endif + bsrl %eax, %eax + leaq -VEC_SIZE(%rdi, %rax), %rax + VZEROUPPER + ret + + .p2align 4 +L(match): + /* Find a CHAR. Check if there is a nul CHAR. */ + vpmovmskb %ymm2, %ecx + testl %ecx, %ecx + jnz L(find_nul) + + /* Remember the match and keep searching. */ + movl %eax, %edx + movq %rdi, %rsi + jmp L(aligned_loop) + + .p2align 4 +L(find_nul): +# ifdef USE_AS_WCSRCHR + /* Keep the first bit for each matching CHAR for bsr. */ + andl $0x11111111, %ecx + andl $0x11111111, %eax +# endif + /* Mask out any matching bits after the nul CHAR. */ + movl %ecx, %r8d + subl $1, %r8d + xorl %ecx, %r8d + andl %r8d, %eax + testl %eax, %eax + /* If there is no CHAR here, return the remembered one. */ + jz L(return_value) + bsrl %eax, %eax + leaq -VEC_SIZE(%rdi, %rax), %rax + VZEROUPPER + ret + + .p2align 4 +L(char_and_nul): + /* Find both a CHAR and a nul CHAR. */ + addq %rcx, %rdi + movl %edx, %ecx +L(char_and_nul_in_first_vec): +# ifdef USE_AS_WCSRCHR + /* Keep the first bit for each matching CHAR for bsr. */ + andl $0x11111111, %ecx + andl $0x11111111, %eax +# endif + /* Mask out any matching bits after the nul CHAR. */ + movl %ecx, %r8d + subl $1, %r8d + xorl %ecx, %r8d + andl %r8d, %eax + testl %eax, %eax + /* Return null pointer if the nul CHAR comes first. */ + jz L(return_null) + bsrl %eax, %eax + leaq -VEC_SIZE(%rdi, %rax), %rax + VZEROUPPER + ret + + .p2align 4 +L(return_null): + xorl %eax, %eax + VZEROUPPER + ret + +END (STRRCHR) +#endif diff --git a/sysdeps/x86_64/multiarch/strrchr-sse2.S b/sysdeps/x86_64/multiarch/strrchr-sse2.S new file mode 100644 index 0000000000..9e8dc2a417 --- /dev/null +++ b/sysdeps/x86_64/multiarch/strrchr-sse2.S @@ -0,0 +1,35 @@ +/* strrchr optimized with SSE2. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# define strrchr __strrchr_sse2 + +# ifdef SHARED +# undef libc_hidden_builtin_def +/* It doesn't make sense to send libc-internal strrchr calls through a PLT. + The speedup we get from using SSE4.2 instruction is likely eaten away + by the indirect call in the PLT. */ +# define libc_hidden_builtin_def(name) \ + .globl __GI_strrchr; __GI_strrchr = __strrchr_sse2 +# endif + +# undef weak_alias +# define weak_alias(strrchr, rindex) +#endif + +#include "../strrchr.S" diff --git a/sysdeps/x86_64/multiarch/strrchr.c b/sysdeps/x86_64/multiarch/strrchr.c new file mode 100644 index 0000000000..fbb9c8439a --- /dev/null +++ b/sysdeps/x86_64/multiarch/strrchr.c @@ -0,0 +1,30 @@ +/* Multiple versions of strrchr. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Define multiple versions only for the definition in libc. */ +#if IS_IN (libc) +# define strrchr __redirect_strrchr +# include <string.h> +# undef strrchr + +# define SYMBOL_NAME strrchr +# include "ifunc-sse2-avx2.h" + +libc_ifunc_redirected (__redirect_strrchr, strrchr, IFUNC_SELECTOR ()); +weak_alias (strrchr, rindex); +#endif diff --git a/sysdeps/x86_64/multiarch/wcsrchr-avx2.S b/sysdeps/x86_64/multiarch/wcsrchr-avx2.S new file mode 100644 index 0000000000..cf8a239ab2 --- /dev/null +++ b/sysdeps/x86_64/multiarch/wcsrchr-avx2.S @@ -0,0 +1,3 @@ +#define STRRCHR __wcsrchr_avx2 +#define USE_AS_WCSRCHR 1 +#include "strrchr-avx2.S" diff --git a/sysdeps/x86_64/multiarch/wcsrchr-sse2.S b/sysdeps/x86_64/multiarch/wcsrchr-sse2.S new file mode 100644 index 0000000000..0ac1b1356c --- /dev/null +++ b/sysdeps/x86_64/multiarch/wcsrchr-sse2.S @@ -0,0 +1,23 @@ +/* wcsrchr optimized with SSE2. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# define wcsrchr __wcsrchr_sse2 +#endif + +#include "../wcsrchr.S" diff --git a/sysdeps/x86_64/multiarch/wcsrchr.c b/sysdeps/x86_64/multiarch/wcsrchr.c new file mode 100644 index 0000000000..0413bd2636 --- /dev/null +++ b/sysdeps/x86_64/multiarch/wcsrchr.c @@ -0,0 +1,29 @@ +/* Multiple versions of wcsrchr. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Define multiple versions only for the definition in libc. */ +#if IS_IN (libc) +# define wcsrchr __redirect_wcsrchr +# include <wchar.h> +# undef wcsrchr + +# define SYMBOL_NAME wcsrchr +# include "ifunc-sse2-avx2.h" + +libc_ifunc_redirected (__redirect_wcsrchr, wcsrchr, IFUNC_SELECTOR ()); +#endif |