From e83fa71eb5ec74c4d4ac49679443985196b8dc56 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Tue, 25 Aug 2015 06:06:30 -0700 Subject: Add i386 wcschr/wcsrchr multiarch functions --- sysdeps/i386/i686/multiarch/Makefile | 4 +- sysdeps/i386/i686/multiarch/wcschr-c.c | 22 -- sysdeps/i386/i686/multiarch/wcschr-sse2.S | 219 ------------------ sysdeps/i386/i686/multiarch/wcschr.S | 36 --- sysdeps/i386/i686/multiarch/wcsrchr-c.c | 5 - sysdeps/i386/i686/multiarch/wcsrchr-sse2.S | 354 ----------------------------- sysdeps/i386/i686/multiarch/wcsrchr.S | 35 --- sysdeps/i386/multiarch/Makefile | 4 + sysdeps/i386/multiarch/ifunc-impl-list.c | 8 +- sysdeps/i386/multiarch/wcschr-i386.c | 22 ++ sysdeps/i386/multiarch/wcschr-sse2.S | 219 ++++++++++++++++++ sysdeps/i386/multiarch/wcschr.c | 50 ++++ sysdeps/i386/multiarch/wcsrchr-i386.c | 5 + sysdeps/i386/multiarch/wcsrchr-sse2.S | 354 +++++++++++++++++++++++++++++ sysdeps/i386/multiarch/wcsrchr.c | 48 ++++ 15 files changed, 709 insertions(+), 676 deletions(-) delete mode 100644 sysdeps/i386/i686/multiarch/wcschr-c.c delete mode 100644 sysdeps/i386/i686/multiarch/wcschr-sse2.S delete mode 100644 sysdeps/i386/i686/multiarch/wcschr.S delete mode 100644 sysdeps/i386/i686/multiarch/wcsrchr-c.c delete mode 100644 sysdeps/i386/i686/multiarch/wcsrchr-sse2.S delete mode 100644 sysdeps/i386/i686/multiarch/wcsrchr.S create mode 100644 sysdeps/i386/multiarch/wcschr-i386.c create mode 100644 sysdeps/i386/multiarch/wcschr-sse2.S create mode 100644 sysdeps/i386/multiarch/wcschr.c create mode 100644 sysdeps/i386/multiarch/wcsrchr-i386.c create mode 100644 sysdeps/i386/multiarch/wcsrchr-sse2.S create mode 100644 sysdeps/i386/multiarch/wcsrchr.c diff --git a/sysdeps/i386/i686/multiarch/Makefile b/sysdeps/i386/i686/multiarch/Makefile index ff54ee963b..289f0f9634 100644 --- a/sysdeps/i386/i686/multiarch/Makefile +++ b/sysdeps/i386/i686/multiarch/Makefile @@ -1,5 +1,5 @@ ifeq ($(subdir),wcsmbs) sysdep_routines += wcscmp-sse2 wcscmp-c wcslen-sse2 wcslen-c \ - wmemcmp-sse4 wmemcmp-ssse3 wmemcmp-c wcschr-sse2 \ - wcschr-c wcsrchr-sse2 wcsrchr-c wcscpy-ssse3 wcscpy-c + wmemcmp-sse4 wmemcmp-ssse3 wmemcmp-c \ + wcscpy-ssse3 wcscpy-c endif diff --git a/sysdeps/i386/i686/multiarch/wcschr-c.c b/sysdeps/i386/i686/multiarch/wcschr-c.c deleted file mode 100644 index 38d41d04de..0000000000 --- a/sysdeps/i386/i686/multiarch/wcschr-c.c +++ /dev/null @@ -1,22 +0,0 @@ -#include - -#if IS_IN (libc) -# undef libc_hidden_weak -# define libc_hidden_weak(name) - -# undef weak_alias -# define weak_alias(name,alias) - -# ifdef SHARED -# undef libc_hidden_def -# define libc_hidden_def(name) \ - __hidden_ver1 (__wcschr_ia32, __GI_wcschr, __wcschr_ia32); \ - strong_alias (__wcschr_ia32, __wcschr_ia32_1); \ - __hidden_ver1 (__wcschr_ia32_1, __GI___wcschr, __wcschr_ia32_1); -# endif -#endif - -extern __typeof (wcschr) __wcschr_ia32; - -#define WCSCHR __wcschr_ia32 -#include diff --git a/sysdeps/i386/i686/multiarch/wcschr-sse2.S b/sysdeps/i386/i686/multiarch/wcschr-sse2.S deleted file mode 100644 index 818eaa3014..0000000000 --- a/sysdeps/i386/i686/multiarch/wcschr-sse2.S +++ /dev/null @@ -1,219 +0,0 @@ -/* wcschr with SSE2, without using bsf instructions - Copyright (C) 2011-2015 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -#if IS_IN (libc) -# include - -# define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -# define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -# define PUSH(REG) pushl REG; CFI_PUSH (REG) -# define POP(REG) popl REG; CFI_POP (REG) - -# define PARMS 4 -# define STR1 PARMS -# define STR2 STR1+4 - - atom_text_section -ENTRY (__wcschr_sse2) - - mov STR1(%esp), %ecx - movd STR2(%esp), %xmm1 - - mov %ecx, %eax - punpckldq %xmm1, %xmm1 - pxor %xmm2, %xmm2 - punpckldq %xmm1, %xmm1 - - and $63, %eax - cmp $48, %eax - ja L(cross_cache) - - movdqu (%ecx), %xmm0 - pcmpeqd %xmm0, %xmm2 - pcmpeqd %xmm1, %xmm0 - pmovmskb %xmm2, %edx - pmovmskb %xmm0, %eax - or %eax, %edx - jnz L(matches) - and $-16, %ecx - jmp L(loop) - - .p2align 4 -L(cross_cache): - PUSH (%edi) - mov %ecx, %edi - mov %eax, %ecx - and $-16, %edi - and $15, %ecx - movdqa (%edi), %xmm0 - pcmpeqd %xmm0, %xmm2 - pcmpeqd %xmm1, %xmm0 - pmovmskb %xmm2, %edx - pmovmskb %xmm0, %eax - - sarl %cl, %edx - sarl %cl, %eax - test %eax, %eax - jz L(unaligned_no_match) - - add %edi, %ecx - POP (%edi) - - test %edx, %edx - jz L(match_case1) - test %al, %al - jz L(match_higth_case2) - test $15, %al - jnz L(match_case2_4) - test $15, %dl - jnz L(return_null) - lea 4(%ecx), %eax - ret - - CFI_PUSH (%edi) - - .p2align 4 -L(unaligned_no_match): - mov %edi, %ecx - POP (%edi) - - test %edx, %edx - jnz L(return_null) - - pxor %xmm2, %xmm2 - -/* Loop start on aligned string. */ - .p2align 4 -L(loop): - add $16, %ecx - movdqa (%ecx), %xmm0 - pcmpeqd %xmm0, %xmm2 - pcmpeqd %xmm1, %xmm0 - pmovmskb %xmm2, %edx - pmovmskb %xmm0, %eax - or %eax, %edx - jnz L(matches) - add $16, %ecx - - movdqa (%ecx), %xmm0 - pcmpeqd %xmm0, %xmm2 - pcmpeqd %xmm1, %xmm0 - pmovmskb %xmm2, %edx - pmovmskb %xmm0, %eax - or %eax, %edx - jnz L(matches) - add $16, %ecx - - movdqa (%ecx), %xmm0 - pcmpeqd %xmm0, %xmm2 - pcmpeqd %xmm1, %xmm0 - pmovmskb %xmm2, %edx - pmovmskb %xmm0, %eax - or %eax, %edx - jnz L(matches) - add $16, %ecx - - movdqa (%ecx), %xmm0 - pcmpeqd %xmm0, %xmm2 - pcmpeqd %xmm1, %xmm0 - pmovmskb %xmm2, %edx - pmovmskb %xmm0, %eax - or %eax, %edx - jz L(loop) - - .p2align 4 -L(matches): - pmovmskb %xmm2, %edx - test %eax, %eax - jz L(return_null) - test %edx, %edx - jz L(match_case1) - - .p2align 4 -L(match_case2): - test %al, %al - jz L(match_higth_case2) - test $15, %al - jnz L(match_case2_4) - test $15, %dl - jnz L(return_null) - lea 4(%ecx), %eax - ret - - .p2align 4 -L(match_case2_4): - mov %ecx, %eax - ret - - .p2align 4 -L(match_higth_case2): - test %dl, %dl - jnz L(return_null) - test $15, %ah - jnz L(match_case2_12) - test $15, %dh - jnz L(return_null) - lea 12(%ecx), %eax - ret - - .p2align 4 -L(match_case2_12): - lea 8(%ecx), %eax - ret - - .p2align 4 -L(match_case1): - test %al, %al - jz L(match_higth_case1) - - test $0x01, %al - jnz L(exit0) - lea 4(%ecx), %eax - ret - - .p2align 4 -L(match_higth_case1): - test $0x01, %ah - jnz L(exit3) - lea 12(%ecx), %eax - ret - - .p2align 4 -L(exit0): - mov %ecx, %eax - ret - - .p2align 4 -L(exit3): - lea 8(%ecx), %eax - ret - - .p2align 4 -L(return_null): - xor %eax, %eax - ret - -END (__wcschr_sse2) -#endif diff --git a/sysdeps/i386/i686/multiarch/wcschr.S b/sysdeps/i386/i686/multiarch/wcschr.S deleted file mode 100644 index 5918b127a6..0000000000 --- a/sysdeps/i386/i686/multiarch/wcschr.S +++ /dev/null @@ -1,36 +0,0 @@ -/* Multiple versions of wcschr - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2011-2015 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -#include -#include - -#if IS_IN (libc) - .text -ENTRY(__wcschr) - .type wcschr, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - LOAD_FUNC_GOT_EAX (__wcschr_ia32) - HAS_CPU_FEATURE (SSE2) - jz 2f - LOAD_FUNC_GOT_EAX (__wcschr_sse2) -2: ret -END(__wcschr) -weak_alias (__wcschr, wcschr) -#endif diff --git a/sysdeps/i386/i686/multiarch/wcsrchr-c.c b/sysdeps/i386/i686/multiarch/wcsrchr-c.c deleted file mode 100644 index 8d8a335b5b..0000000000 --- a/sysdeps/i386/i686/multiarch/wcsrchr-c.c +++ /dev/null @@ -1,5 +0,0 @@ -#if IS_IN (libc) -# define wcsrchr __wcsrchr_ia32 -#endif - -#include "wcsmbs/wcsrchr.c" diff --git a/sysdeps/i386/i686/multiarch/wcsrchr-sse2.S b/sysdeps/i386/i686/multiarch/wcsrchr-sse2.S deleted file mode 100644 index f897618a3a..0000000000 --- a/sysdeps/i386/i686/multiarch/wcsrchr-sse2.S +++ /dev/null @@ -1,354 +0,0 @@ -/* wcsrchr with SSE2, without using bsf instructions. - Copyright (C) 2011-2015 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -#if IS_IN (libc) -# include -# define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -# define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -# define PUSH(REG) pushl REG; CFI_PUSH (REG) -# define POP(REG) popl REG; CFI_POP (REG) - -# define PARMS 8 -# define ENTRANCE PUSH (%edi); -# define RETURN POP (%edi); ret; CFI_PUSH (%edi); -# define STR1 PARMS -# define STR2 STR1+4 - - atom_text_section -ENTRY (__wcsrchr_sse2) - - ENTRANCE - mov STR1(%esp), %ecx - movd STR2(%esp), %xmm1 - - mov %ecx, %edi - punpckldq %xmm1, %xmm1 - pxor %xmm2, %xmm2 - punpckldq %xmm1, %xmm1 - -/* ECX has OFFSET. */ - and $63, %ecx - cmp $48, %ecx - ja L(crosscache) - -/* unaligned string. */ - movdqu (%edi), %xmm0 - pcmpeqd %xmm0, %xmm2 - pcmpeqd %xmm1, %xmm0 -/* Find where NULL is. */ - pmovmskb %xmm2, %ecx -/* Check if there is a match. */ - pmovmskb %xmm0, %eax - add $16, %edi - - test %eax, %eax - jnz L(unaligned_match1) - - test %ecx, %ecx - jnz L(return_null) - - and $-16, %edi - - PUSH (%esi) - - xor %edx, %edx - jmp L(loop) - - CFI_POP (%esi) - - .p2align 4 -L(unaligned_match1): - test %ecx, %ecx - jnz L(prolog_find_zero_1) - - PUSH (%esi) - -/* Save current match */ - mov %eax, %edx - mov %edi, %esi - and $-16, %edi - jmp L(loop) - - CFI_POP (%esi) - - .p2align 4 -L(crosscache): -/* Hancle unaligned string. */ - and $15, %ecx - and $-16, %edi - pxor %xmm3, %xmm3 - movdqa (%edi), %xmm0 - pcmpeqd %xmm0, %xmm3 - pcmpeqd %xmm1, %xmm0 -/* Find where NULL is. */ - pmovmskb %xmm3, %edx -/* Check if there is a match. */ - pmovmskb %xmm0, %eax -/* Remove the leading bytes. */ - shr %cl, %edx - shr %cl, %eax - add $16, %edi - - test %eax, %eax - jnz L(unaligned_match) - - test %edx, %edx - jnz L(return_null) - - PUSH (%esi) - - xor %edx, %edx - jmp L(loop) - - CFI_POP (%esi) - - .p2align 4 -L(unaligned_match): - test %edx, %edx - jnz L(prolog_find_zero) - - PUSH (%esi) - - mov %eax, %edx - lea (%edi, %ecx), %esi - -/* Loop start on aligned string. */ - .p2align 4 -L(loop): - movdqa (%edi), %xmm0 - pcmpeqd %xmm0, %xmm2 - add $16, %edi - pcmpeqd %xmm1, %xmm0 - pmovmskb %xmm2, %ecx - pmovmskb %xmm0, %eax - or %eax, %ecx - jnz L(matches) - - movdqa (%edi), %xmm3 - pcmpeqd %xmm3, %xmm2 - add $16, %edi - pcmpeqd %xmm1, %xmm3 - pmovmskb %xmm2, %ecx - pmovmskb %xmm3, %eax - or %eax, %ecx - jnz L(matches) - - movdqa (%edi), %xmm4 - pcmpeqd %xmm4, %xmm2 - add $16, %edi - pcmpeqd %xmm1, %xmm4 - pmovmskb %xmm2, %ecx - pmovmskb %xmm4, %eax - or %eax, %ecx - jnz L(matches) - - movdqa (%edi), %xmm5 - pcmpeqd %xmm5, %xmm2 - add $16, %edi - pcmpeqd %xmm1, %xmm5 - pmovmskb %xmm2, %ecx - pmovmskb %xmm5, %eax - or %eax, %ecx - jz L(loop) - - .p2align 4 -L(matches): - test %eax, %eax - jnz L(match) -L(return_value): - test %edx, %edx - jz L(return_null_1) - mov %edx, %eax - mov %esi, %edi - - POP (%esi) - - test %ah, %ah - jnz L(match_third_or_fourth_wchar) - test $15 << 4, %al - jnz L(match_second_wchar) - lea -16(%edi), %eax - RETURN - - CFI_PUSH (%esi) - - .p2align 4 -L(return_null_1): - POP (%esi) - - xor %eax, %eax - RETURN - - CFI_PUSH (%esi) - - .p2align 4 -L(match): - pmovmskb %xmm2, %ecx - test %ecx, %ecx - jnz L(find_zero) -/* save match info */ - mov %eax, %edx - mov %edi, %esi - jmp L(loop) - - .p2align 4 -L(find_zero): - test %cl, %cl - jz L(find_zero_in_third_or_fourth_wchar) - test $15, %cl - jz L(find_zero_in_second_wchar) - and $1, %eax - jz L(return_value) - - POP (%esi) - - lea -16(%edi), %eax - RETURN - - CFI_PUSH (%esi) - - .p2align 4 -L(find_zero_in_second_wchar): - and $1 << 5 - 1, %eax - jz L(return_value) - - POP (%esi) - - test $15 << 4, %al - jnz L(match_second_wchar) - lea -16(%edi), %eax - RETURN - - CFI_PUSH (%esi) - - .p2align 4 -L(find_zero_in_third_or_fourth_wchar): - test $15, %ch - jz L(find_zero_in_fourth_wchar) - and $1 << 9 - 1, %eax - jz L(return_value) - - POP (%esi) - - test %ah, %ah - jnz L(match_third_wchar) - test $15 << 4, %al - jnz L(match_second_wchar) - lea -16(%edi), %eax - RETURN - - CFI_PUSH (%esi) - - .p2align 4 -L(find_zero_in_fourth_wchar): - - POP (%esi) - - test %ah, %ah - jnz L(match_third_or_fourth_wchar) - test $15 << 4, %al - jnz L(match_second_wchar) - lea -16(%edi), %eax - RETURN - - CFI_PUSH (%esi) - - .p2align 4 -L(match_second_wchar): - lea -12(%edi), %eax - RETURN - - .p2align 4 -L(match_third_or_fourth_wchar): - test $15 << 4, %ah - jnz L(match_fourth_wchar) - lea -8(%edi), %eax - RETURN - - .p2align 4 -L(match_third_wchar): - lea -8(%edi), %eax - RETURN - - .p2align 4 -L(match_fourth_wchar): - lea -4(%edi), %eax - RETURN - - .p2align 4 -L(return_null): - xor %eax, %eax - RETURN - - .p2align 4 -L(prolog_find_zero): - add %ecx, %edi - mov %edx, %ecx -L(prolog_find_zero_1): - test %cl, %cl - jz L(prolog_find_zero_in_third_or_fourth_wchar) - test $15, %cl - jz L(prolog_find_zero_in_second_wchar) - and $1, %eax - jz L(return_null) - - lea -16(%edi), %eax - RETURN - - .p2align 4 -L(prolog_find_zero_in_second_wchar): - and $1 << 5 - 1, %eax - jz L(return_null) - - test $15 << 4, %al - jnz L(match_second_wchar) - lea -16(%edi), %eax - RETURN - - .p2align 4 -L(prolog_find_zero_in_third_or_fourth_wchar): - test $15, %ch - jz L(prolog_find_zero_in_fourth_wchar) - and $1 << 9 - 1, %eax - jz L(return_null) - - test %ah, %ah - jnz L(match_third_wchar) - test $15 << 4, %al - jnz L(match_second_wchar) - lea -16(%edi), %eax - RETURN - - .p2align 4 -L(prolog_find_zero_in_fourth_wchar): - test %ah, %ah - jnz L(match_third_or_fourth_wchar) - test $15 << 4, %al - jnz L(match_second_wchar) - lea -16(%edi), %eax - RETURN - -END (__wcsrchr_sse2) -#endif diff --git a/sysdeps/i386/i686/multiarch/wcsrchr.S b/sysdeps/i386/i686/multiarch/wcsrchr.S deleted file mode 100644 index 9ed68101e1..0000000000 --- a/sysdeps/i386/i686/multiarch/wcsrchr.S +++ /dev/null @@ -1,35 +0,0 @@ -/* Multiple versions of wcsrchr - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2011-2015 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -#include -#include - -#if IS_IN (libc) - .text -ENTRY(wcsrchr) - .type wcsrchr, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - LOAD_FUNC_GOT_EAX (__wcsrchr_ia32) - HAS_CPU_FEATURE (SSE2) - jz 2f - LOAD_FUNC_GOT_EAX (__wcsrchr_sse2) -2: ret -END(wcsrchr) -#endif diff --git a/sysdeps/i386/multiarch/Makefile b/sysdeps/i386/multiarch/Makefile index 89c86b49c4..3515759304 100644 --- a/sysdeps/i386/multiarch/Makefile +++ b/sysdeps/i386/multiarch/Makefile @@ -49,6 +49,10 @@ CFLAGS-strspn-sse4.c += -msse4 endif endif +ifeq ($(subdir),wcsmbs) +sysdep_routines += wcschr-i386 wcschr-sse2 wcsrchr-i386 wcsrchr-sse2 +endif + ifeq (mathyes,$(subdir)$(config-cflags-avx)) libm-sysdep_routines += s_fma-fma s_fmaf-fma CFLAGS-s_fma-fma.c += -mavx -mfpmath=sse diff --git a/sysdeps/i386/multiarch/ifunc-impl-list.c b/sysdeps/i386/multiarch/ifunc-impl-list.c index 94a376d4e6..851d231862 100644 --- a/sysdeps/i386/multiarch/ifunc-impl-list.c +++ b/sysdeps/i386/multiarch/ifunc-impl-list.c @@ -316,13 +316,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __strspn_sse42) IFUNC_IMPL_ADD (array, i, strspn, 1, __strspn_i386)) -#if 0 /* Support sysdeps/i386/i686/multiarch/wcschr.S. */ IFUNC_IMPL (i, name, wcschr, IFUNC_IMPL_ADD (array, i, wcschr, HAS_CPU_FEATURE (SSE2), __wcschr_sse2) - IFUNC_IMPL_ADD (array, i, wcschr, 1, __wcschr_ia32)) + IFUNC_IMPL_ADD (array, i, wcschr, 1, __wcschr_i386)) +#if 0 /* Support sysdeps/i386/i686/multiarch/wcscmp.S. */ IFUNC_IMPL (i, name, wcscmp, IFUNC_IMPL_ADD (array, i, wcscmp, HAS_CPU_FEATURE (SSE2), @@ -340,13 +340,15 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, wcslen, HAS_CPU_FEATURE (SSE2), __wcslen_sse2) IFUNC_IMPL_ADD (array, i, wcslen, 1, __wcslen_ia32)) +#endif /* Support sysdeps/i386/i686/multiarch/wcsrchr.S. */ IFUNC_IMPL (i, name, wcsrchr, IFUNC_IMPL_ADD (array, i, wcsrchr, HAS_CPU_FEATURE (SSE2), __wcsrchr_sse2) - IFUNC_IMPL_ADD (array, i, wcsrchr, 1, __wcsrchr_ia32)) + IFUNC_IMPL_ADD (array, i, wcsrchr, 1, __wcsrchr_i386)) +#if 0 /* Support sysdeps/i386/i686/multiarch/wmemcmp.S. */ IFUNC_IMPL (i, name, wmemcmp, IFUNC_IMPL_ADD (array, i, wmemcmp, HAS_CPU_FEATURE (SSE4_2), diff --git a/sysdeps/i386/multiarch/wcschr-i386.c b/sysdeps/i386/multiarch/wcschr-i386.c new file mode 100644 index 0000000000..de06c2409a --- /dev/null +++ b/sysdeps/i386/multiarch/wcschr-i386.c @@ -0,0 +1,22 @@ +#include + +#if IS_IN (libc) +# undef libc_hidden_weak +# define libc_hidden_weak(name) + +# undef weak_alias +# define weak_alias(name,alias) + +# ifdef SHARED +# undef libc_hidden_def +# define libc_hidden_def(name) \ + __hidden_ver1 (__wcschr_i386, __GI_wcschr, __wcschr_i386); \ + strong_alias (__wcschr_i386, __wcschr_i386_1); \ + __hidden_ver1 (__wcschr_i386_1, __GI___wcschr, __wcschr_i386_1); +# endif +#endif + +extern __typeof (wcschr) __wcschr_i386; + +#define WCSCHR __wcschr_i386 +#include diff --git a/sysdeps/i386/multiarch/wcschr-sse2.S b/sysdeps/i386/multiarch/wcschr-sse2.S new file mode 100644 index 0000000000..818eaa3014 --- /dev/null +++ b/sysdeps/i386/multiarch/wcschr-sse2.S @@ -0,0 +1,219 @@ +/* wcschr with SSE2, without using bsf instructions + Copyright (C) 2011-2015 Free Software Foundation, Inc. + Contributed by Intel Corporation. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#if IS_IN (libc) +# include + +# define CFI_PUSH(REG) \ + cfi_adjust_cfa_offset (4); \ + cfi_rel_offset (REG, 0) + +# define CFI_POP(REG) \ + cfi_adjust_cfa_offset (-4); \ + cfi_restore (REG) + +# define PUSH(REG) pushl REG; CFI_PUSH (REG) +# define POP(REG) popl REG; CFI_POP (REG) + +# define PARMS 4 +# define STR1 PARMS +# define STR2 STR1+4 + + atom_text_section +ENTRY (__wcschr_sse2) + + mov STR1(%esp), %ecx + movd STR2(%esp), %xmm1 + + mov %ecx, %eax + punpckldq %xmm1, %xmm1 + pxor %xmm2, %xmm2 + punpckldq %xmm1, %xmm1 + + and $63, %eax + cmp $48, %eax + ja L(cross_cache) + + movdqu (%ecx), %xmm0 + pcmpeqd %xmm0, %xmm2 + pcmpeqd %xmm1, %xmm0 + pmovmskb %xmm2, %edx + pmovmskb %xmm0, %eax + or %eax, %edx + jnz L(matches) + and $-16, %ecx + jmp L(loop) + + .p2align 4 +L(cross_cache): + PUSH (%edi) + mov %ecx, %edi + mov %eax, %ecx + and $-16, %edi + and $15, %ecx + movdqa (%edi), %xmm0 + pcmpeqd %xmm0, %xmm2 + pcmpeqd %xmm1, %xmm0 + pmovmskb %xmm2, %edx + pmovmskb %xmm0, %eax + + sarl %cl, %edx + sarl %cl, %eax + test %eax, %eax + jz L(unaligned_no_match) + + add %edi, %ecx + POP (%edi) + + test %edx, %edx + jz L(match_case1) + test %al, %al + jz L(match_higth_case2) + test $15, %al + jnz L(match_case2_4) + test $15, %dl + jnz L(return_null) + lea 4(%ecx), %eax + ret + + CFI_PUSH (%edi) + + .p2align 4 +L(unaligned_no_match): + mov %edi, %ecx + POP (%edi) + + test %edx, %edx + jnz L(return_null) + + pxor %xmm2, %xmm2 + +/* Loop start on aligned string. */ + .p2align 4 +L(loop): + add $16, %ecx + movdqa (%ecx), %xmm0 + pcmpeqd %xmm0, %xmm2 + pcmpeqd %xmm1, %xmm0 + pmovmskb %xmm2, %edx + pmovmskb %xmm0, %eax + or %eax, %edx + jnz L(matches) + add $16, %ecx + + movdqa (%ecx), %xmm0 + pcmpeqd %xmm0, %xmm2 + pcmpeqd %xmm1, %xmm0 + pmovmskb %xmm2, %edx + pmovmskb %xmm0, %eax + or %eax, %edx + jnz L(matches) + add $16, %ecx + + movdqa (%ecx), %xmm0 + pcmpeqd %xmm0, %xmm2 + pcmpeqd %xmm1, %xmm0 + pmovmskb %xmm2, %edx + pmovmskb %xmm0, %eax + or %eax, %edx + jnz L(matches) + add $16, %ecx + + movdqa (%ecx), %xmm0 + pcmpeqd %xmm0, %xmm2 + pcmpeqd %xmm1, %xmm0 + pmovmskb %xmm2, %edx + pmovmskb %xmm0, %eax + or %eax, %edx + jz L(loop) + + .p2align 4 +L(matches): + pmovmskb %xmm2, %edx + test %eax, %eax + jz L(return_null) + test %edx, %edx + jz L(match_case1) + + .p2align 4 +L(match_case2): + test %al, %al + jz L(match_higth_case2) + test $15, %al + jnz L(match_case2_4) + test $15, %dl + jnz L(return_null) + lea 4(%ecx), %eax + ret + + .p2align 4 +L(match_case2_4): + mov %ecx, %eax + ret + + .p2align 4 +L(match_higth_case2): + test %dl, %dl + jnz L(return_null) + test $15, %ah + jnz L(match_case2_12) + test $15, %dh + jnz L(return_null) + lea 12(%ecx), %eax + ret + + .p2align 4 +L(match_case2_12): + lea 8(%ecx), %eax + ret + + .p2align 4 +L(match_case1): + test %al, %al + jz L(match_higth_case1) + + test $0x01, %al + jnz L(exit0) + lea 4(%ecx), %eax + ret + + .p2align 4 +L(match_higth_case1): + test $0x01, %ah + jnz L(exit3) + lea 12(%ecx), %eax + ret + + .p2align 4 +L(exit0): + mov %ecx, %eax + ret + + .p2align 4 +L(exit3): + lea 8(%ecx), %eax + ret + + .p2align 4 +L(return_null): + xor %eax, %eax + ret + +END (__wcschr_sse2) +#endif diff --git a/sysdeps/i386/multiarch/wcschr.c b/sysdeps/i386/multiarch/wcschr.c new file mode 100644 index 0000000000..25cc0db465 --- /dev/null +++ b/sysdeps/i386/multiarch/wcschr.c @@ -0,0 +1,50 @@ +/* Multiple versions of wcschr. + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +/* Define multiple versions only for the definition in libc. */ +#if IS_IN (libc) +/* Redefine wcschr so that the compiler won't complain about the type + mismatch with the IFUNC selector in strong_alias, below. */ +# undef wcschr +# define wcschr __redirect_wcschr +# include +# undef wcschr + +# include + +extern __typeof (__redirect_wcschr) __wcschr_i386 attribute_hidden; +extern __typeof (__redirect_wcschr) __wcschr_sse2 attribute_hidden; + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +extern __typeof (__redirect_wcschr) __wcschr; +extern void *wcschr_ifunc (void) __asm__ ("__wcschr"); + +void * +wcschr_ifunc (void) +{ + if (HAS_CPU_FEATURE (SSE2)) + return __wcschr_sse2; + + return __wcschr_i386; +} +__asm__ (".type __wcschr, %gnu_indirect_function"); + +weak_alias (__wcschr, wcschr) +#endif diff --git a/sysdeps/i386/multiarch/wcsrchr-i386.c b/sysdeps/i386/multiarch/wcsrchr-i386.c new file mode 100644 index 0000000000..1f7583a319 --- /dev/null +++ b/sysdeps/i386/multiarch/wcsrchr-i386.c @@ -0,0 +1,5 @@ +#if IS_IN (libc) +# define wcsrchr __wcsrchr_i386 +#endif + +#include "wcsmbs/wcsrchr.c" diff --git a/sysdeps/i386/multiarch/wcsrchr-sse2.S b/sysdeps/i386/multiarch/wcsrchr-sse2.S new file mode 100644 index 0000000000..f897618a3a --- /dev/null +++ b/sysdeps/i386/multiarch/wcsrchr-sse2.S @@ -0,0 +1,354 @@ +/* wcsrchr with SSE2, without using bsf instructions. + Copyright (C) 2011-2015 Free Software Foundation, Inc. + Contributed by Intel Corporation. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#if IS_IN (libc) +# include +# define CFI_PUSH(REG) \ + cfi_adjust_cfa_offset (4); \ + cfi_rel_offset (REG, 0) + +# define CFI_POP(REG) \ + cfi_adjust_cfa_offset (-4); \ + cfi_restore (REG) + +# define PUSH(REG) pushl REG; CFI_PUSH (REG) +# define POP(REG) popl REG; CFI_POP (REG) + +# define PARMS 8 +# define ENTRANCE PUSH (%edi); +# define RETURN POP (%edi); ret; CFI_PUSH (%edi); +# define STR1 PARMS +# define STR2 STR1+4 + + atom_text_section +ENTRY (__wcsrchr_sse2) + + ENTRANCE + mov STR1(%esp), %ecx + movd STR2(%esp), %xmm1 + + mov %ecx, %edi + punpckldq %xmm1, %xmm1 + pxor %xmm2, %xmm2 + punpckldq %xmm1, %xmm1 + +/* ECX has OFFSET. */ + and $63, %ecx + cmp $48, %ecx + ja L(crosscache) + +/* unaligned string. */ + movdqu (%edi), %xmm0 + pcmpeqd %xmm0, %xmm2 + pcmpeqd %xmm1, %xmm0 +/* Find where NULL is. */ + pmovmskb %xmm2, %ecx +/* Check if there is a match. */ + pmovmskb %xmm0, %eax + add $16, %edi + + test %eax, %eax + jnz L(unaligned_match1) + + test %ecx, %ecx + jnz L(return_null) + + and $-16, %edi + + PUSH (%esi) + + xor %edx, %edx + jmp L(loop) + + CFI_POP (%esi) + + .p2align 4 +L(unaligned_match1): + test %ecx, %ecx + jnz L(prolog_find_zero_1) + + PUSH (%esi) + +/* Save current match */ + mov %eax, %edx + mov %edi, %esi + and $-16, %edi + jmp L(loop) + + CFI_POP (%esi) + + .p2align 4 +L(crosscache): +/* Hancle unaligned string. */ + and $15, %ecx + and $-16, %edi + pxor %xmm3, %xmm3 + movdqa (%edi), %xmm0 + pcmpeqd %xmm0, %xmm3 + pcmpeqd %xmm1, %xmm0 +/* Find where NULL is. */ + pmovmskb %xmm3, %edx +/* Check if there is a match. */ + pmovmskb %xmm0, %eax +/* Remove the leading bytes. */ + shr %cl, %edx + shr %cl, %eax + add $16, %edi + + test %eax, %eax + jnz L(unaligned_match) + + test %edx, %edx + jnz L(return_null) + + PUSH (%esi) + + xor %edx, %edx + jmp L(loop) + + CFI_POP (%esi) + + .p2align 4 +L(unaligned_match): + test %edx, %edx + jnz L(prolog_find_zero) + + PUSH (%esi) + + mov %eax, %edx + lea (%edi, %ecx), %esi + +/* Loop start on aligned string. */ + .p2align 4 +L(loop): + movdqa (%edi), %xmm0 + pcmpeqd %xmm0, %xmm2 + add $16, %edi + pcmpeqd %xmm1, %xmm0 + pmovmskb %xmm2, %ecx + pmovmskb %xmm0, %eax + or %eax, %ecx + jnz L(matches) + + movdqa (%edi), %xmm3 + pcmpeqd %xmm3, %xmm2 + add $16, %edi + pcmpeqd %xmm1, %xmm3 + pmovmskb %xmm2, %ecx + pmovmskb %xmm3, %eax + or %eax, %ecx + jnz L(matches) + + movdqa (%edi), %xmm4 + pcmpeqd %xmm4, %xmm2 + add $16, %edi + pcmpeqd %xmm1, %xmm4 + pmovmskb %xmm2, %ecx + pmovmskb %xmm4, %eax + or %eax, %ecx + jnz L(matches) + + movdqa (%edi), %xmm5 + pcmpeqd %xmm5, %xmm2 + add $16, %edi + pcmpeqd %xmm1, %xmm5 + pmovmskb %xmm2, %ecx + pmovmskb %xmm5, %eax + or %eax, %ecx + jz L(loop) + + .p2align 4 +L(matches): + test %eax, %eax + jnz L(match) +L(return_value): + test %edx, %edx + jz L(return_null_1) + mov %edx, %eax + mov %esi, %edi + + POP (%esi) + + test %ah, %ah + jnz L(match_third_or_fourth_wchar) + test $15 << 4, %al + jnz L(match_second_wchar) + lea -16(%edi), %eax + RETURN + + CFI_PUSH (%esi) + + .p2align 4 +L(return_null_1): + POP (%esi) + + xor %eax, %eax + RETURN + + CFI_PUSH (%esi) + + .p2align 4 +L(match): + pmovmskb %xmm2, %ecx + test %ecx, %ecx + jnz L(find_zero) +/* save match info */ + mov %eax, %edx + mov %edi, %esi + jmp L(loop) + + .p2align 4 +L(find_zero): + test %cl, %cl + jz L(find_zero_in_third_or_fourth_wchar) + test $15, %cl + jz L(find_zero_in_second_wchar) + and $1, %eax + jz L(return_value) + + POP (%esi) + + lea -16(%edi), %eax + RETURN + + CFI_PUSH (%esi) + + .p2align 4 +L(find_zero_in_second_wchar): + and $1 << 5 - 1, %eax + jz L(return_value) + + POP (%esi) + + test $15 << 4, %al + jnz L(match_second_wchar) + lea -16(%edi), %eax + RETURN + + CFI_PUSH (%esi) + + .p2align 4 +L(find_zero_in_third_or_fourth_wchar): + test $15, %ch + jz L(find_zero_in_fourth_wchar) + and $1 << 9 - 1, %eax + jz L(return_value) + + POP (%esi) + + test %ah, %ah + jnz L(match_third_wchar) + test $15 << 4, %al + jnz L(match_second_wchar) + lea -16(%edi), %eax + RETURN + + CFI_PUSH (%esi) + + .p2align 4 +L(find_zero_in_fourth_wchar): + + POP (%esi) + + test %ah, %ah + jnz L(match_third_or_fourth_wchar) + test $15 << 4, %al + jnz L(match_second_wchar) + lea -16(%edi), %eax + RETURN + + CFI_PUSH (%esi) + + .p2align 4 +L(match_second_wchar): + lea -12(%edi), %eax + RETURN + + .p2align 4 +L(match_third_or_fourth_wchar): + test $15 << 4, %ah + jnz L(match_fourth_wchar) + lea -8(%edi), %eax + RETURN + + .p2align 4 +L(match_third_wchar): + lea -8(%edi), %eax + RETURN + + .p2align 4 +L(match_fourth_wchar): + lea -4(%edi), %eax + RETURN + + .p2align 4 +L(return_null): + xor %eax, %eax + RETURN + + .p2align 4 +L(prolog_find_zero): + add %ecx, %edi + mov %edx, %ecx +L(prolog_find_zero_1): + test %cl, %cl + jz L(prolog_find_zero_in_third_or_fourth_wchar) + test $15, %cl + jz L(prolog_find_zero_in_second_wchar) + and $1, %eax + jz L(return_null) + + lea -16(%edi), %eax + RETURN + + .p2align 4 +L(prolog_find_zero_in_second_wchar): + and $1 << 5 - 1, %eax + jz L(return_null) + + test $15 << 4, %al + jnz L(match_second_wchar) + lea -16(%edi), %eax + RETURN + + .p2align 4 +L(prolog_find_zero_in_third_or_fourth_wchar): + test $15, %ch + jz L(prolog_find_zero_in_fourth_wchar) + and $1 << 9 - 1, %eax + jz L(return_null) + + test %ah, %ah + jnz L(match_third_wchar) + test $15 << 4, %al + jnz L(match_second_wchar) + lea -16(%edi), %eax + RETURN + + .p2align 4 +L(prolog_find_zero_in_fourth_wchar): + test %ah, %ah + jnz L(match_third_or_fourth_wchar) + test $15 << 4, %al + jnz L(match_second_wchar) + lea -16(%edi), %eax + RETURN + +END (__wcsrchr_sse2) +#endif diff --git a/sysdeps/i386/multiarch/wcsrchr.c b/sysdeps/i386/multiarch/wcsrchr.c new file mode 100644 index 0000000000..14206907b8 --- /dev/null +++ b/sysdeps/i386/multiarch/wcsrchr.c @@ -0,0 +1,48 @@ +/* Multiple versions of wcsrchr. + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +/* Define multiple versions only for the definition in libc. */ +#if IS_IN (libc) +/* Redefine wcsrchr so that the compiler won't complain about the type + mismatch with the IFUNC selector in strong_alias, below. */ +# undef wcsrchr +# define wcsrchr __redirect_wcsrchr +# include +# undef wcsrchr + +# include + +extern __typeof (__redirect_wcsrchr) __wcsrchr_i386 attribute_hidden; +extern __typeof (__redirect_wcsrchr) __wcsrchr_sse2 attribute_hidden; + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +extern __typeof (__redirect_wcsrchr) wcsrchr; +extern void *wcsrchr_ifunc (void) __asm__ ("wcsrchr"); + +void * +wcsrchr_ifunc (void) +{ + if (HAS_CPU_FEATURE (SSE2)) + return __wcsrchr_sse2; + + return __wcsrchr_i386; +} +__asm__ (".type wcsrchr, %gnu_indirect_function"); +#endif -- cgit 1.4.1