From 97b62ad034f12ea32bf5637a26b3830adfccd477 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Mon, 24 Aug 2015 11:41:57 -0700 Subject: Add i386 strrchr multiarch functions --- sysdeps/i386/i686/multiarch/Makefile | 1 - sysdeps/i386/i686/multiarch/strrchr-sse2-bsf.S | 282 ---------- sysdeps/i386/i686/multiarch/strrchr-sse2.S | 708 ------------------------- sysdeps/i386/i686/multiarch/strrchr.S | 57 -- sysdeps/i386/multiarch/Makefile | 3 +- sysdeps/i386/multiarch/ifunc-impl-list.c | 4 +- sysdeps/i386/multiarch/strrchr-i386.S | 14 + sysdeps/i386/multiarch/strrchr-sse2-bsf.S | 282 ++++++++++ sysdeps/i386/multiarch/strrchr-sse2.S | 708 +++++++++++++++++++++++++ sysdeps/i386/multiarch/strrchr.c | 55 ++ 10 files changed, 1063 insertions(+), 1051 deletions(-) delete mode 100644 sysdeps/i386/i686/multiarch/strrchr-sse2-bsf.S delete mode 100644 sysdeps/i386/i686/multiarch/strrchr-sse2.S delete mode 100644 sysdeps/i386/i686/multiarch/strrchr.S create mode 100644 sysdeps/i386/multiarch/strrchr-i386.S create mode 100644 sysdeps/i386/multiarch/strrchr-sse2-bsf.S create mode 100644 sysdeps/i386/multiarch/strrchr-sse2.S create mode 100644 sysdeps/i386/multiarch/strrchr.c diff --git a/sysdeps/i386/i686/multiarch/Makefile b/sysdeps/i386/i686/multiarch/Makefile index 8f8e4509b8..2733c2a880 100644 --- a/sysdeps/i386/i686/multiarch/Makefile +++ b/sysdeps/i386/i686/multiarch/Makefile @@ -1,6 +1,5 @@ ifeq ($(subdir),string) sysdep_routines += strlen-sse2 strlen-sse2-bsf \ - strrchr-sse2 strrchr-sse2-bsf \ strnlen-sse2 strnlen-c ifeq (yes,$(config-cflags-sse4)) sysdep_routines += strspn-c diff --git a/sysdeps/i386/i686/multiarch/strrchr-sse2-bsf.S b/sysdeps/i386/i686/multiarch/strrchr-sse2-bsf.S deleted file mode 100644 index a3d42ddd4f..0000000000 --- a/sysdeps/i386/i686/multiarch/strrchr-sse2-bsf.S +++ /dev/null @@ -1,282 +0,0 @@ -/* strrchr with SSE2 with bsf and bsr - Copyright (C) 2011-2015 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -#if IS_IN (libc) - -# include - -# define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -# define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -# define PUSH(REG) pushl REG; CFI_PUSH (REG) -# define POP(REG) popl REG; CFI_POP (REG) - -# define PARMS 4 -# define STR1 PARMS -# define STR2 STR1+4 - - .text -ENTRY (__strrchr_sse2_bsf) - - mov STR1(%esp), %ecx - movd STR2(%esp), %xmm1 - - PUSH (%edi) - pxor %xmm2, %xmm2 - mov %ecx, %edi - punpcklbw %xmm1, %xmm1 - punpcklbw %xmm1, %xmm1 - /* ECX has OFFSET. */ - and $63, %ecx - cmp $48, %ecx - pshufd $0, %xmm1, %xmm1 - ja L(crosscashe) - -/* unaligned string. */ - movdqu (%edi), %xmm0 - pcmpeqb %xmm0, %xmm2 - pcmpeqb %xmm1, %xmm0 - /* Find where NULL is. */ - pmovmskb %xmm2, %edx - /* Check if there is a match. */ - pmovmskb %xmm0, %eax - - test %eax, %eax - jnz L(unaligned_match1) - - test %edx, %edx - jnz L(return_null) - - and $-16, %edi - add $16, %edi - - PUSH (%esi) - PUSH (%ebx) - - xor %ebx, %ebx - jmp L(loop) - - CFI_POP (%esi) - CFI_POP (%ebx) - - .p2align 4 -L(unaligned_return_value1): - bsf %edx, %ecx - mov $2, %edx - shl %cl, %edx - sub $1, %edx - and %edx, %eax - jz L(return_null) - bsr %eax, %eax - add %edi, %eax - POP (%edi) - ret - CFI_PUSH (%edi) - - .p2align 4 -L(unaligned_match1): - test %edx, %edx - jnz L(unaligned_return_value1) - - PUSH (%esi) - PUSH (%ebx) - - mov %eax, %ebx - lea 16(%edi), %esi - and $-16, %edi - add $16, %edi - jmp L(loop) - - CFI_POP (%esi) - CFI_POP (%ebx) - - .p2align 4 - L(crosscashe): -/* Hancle unaligned string. */ - and $15, %ecx - and $-16, %edi - pxor %xmm3, %xmm3 - movdqa (%edi), %xmm0 - pcmpeqb %xmm0, %xmm3 - pcmpeqb %xmm1, %xmm0 - /* Find where NULL is. */ - pmovmskb %xmm3, %edx - /* Check if there is a match. */ - pmovmskb %xmm0, %eax - /* Remove the leading bytes. */ - shr %cl, %edx - shr %cl, %eax - - test %eax, %eax - jnz L(unaligned_match) - - test %edx, %edx - jnz L(return_null) - - add $16, %edi - - PUSH (%esi) - PUSH (%ebx) - - xor %ebx, %ebx - jmp L(loop) - - CFI_POP (%esi) - CFI_POP (%ebx) - - .p2align 4 -L(unaligned_return_value): - add %ecx, %edi - bsf %edx, %ecx - mov $2, %edx - shl %cl, %edx - sub $1, %edx - and %edx, %eax - jz L(return_null) - bsr %eax, %eax - add %edi, %eax - POP (%edi) - ret - CFI_PUSH (%edi) - - .p2align 4 -L(unaligned_match): - test %edx, %edx - jnz L(unaligned_return_value) - - PUSH (%esi) - PUSH (%ebx) - - mov %eax, %ebx - add $16, %edi - lea (%edi, %ecx), %esi - -/* Loop start on aligned string. */ - .p2align 4 -L(loop): - movdqa (%edi), %xmm0 - pcmpeqb %xmm0, %xmm2 - add $16, %edi - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm2, %ecx - pmovmskb %xmm0, %eax - or %eax, %ecx - jnz L(matches) - - movdqa (%edi), %xmm0 - pcmpeqb %xmm0, %xmm2 - add $16, %edi - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm2, %ecx - pmovmskb %xmm0, %eax - or %eax, %ecx - jnz L(matches) - - movdqa (%edi), %xmm0 - pcmpeqb %xmm0, %xmm2 - add $16, %edi - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm2, %ecx - pmovmskb %xmm0, %eax - or %eax, %ecx - jnz L(matches) - - movdqa (%edi), %xmm0 - pcmpeqb %xmm0, %xmm2 - add $16, %edi - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm2, %ecx - pmovmskb %xmm0, %eax - or %eax, %ecx - jz L(loop) - -L(matches): - test %eax, %eax - jnz L(match) -L(return_value): - test %ebx, %ebx - jz L(return_null_1) - bsr %ebx, %eax - add %esi, %eax - - POP (%ebx) - POP (%esi) - - sub $16, %eax - POP (%edi) - ret - - CFI_PUSH (%edi) - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(match): - pmovmskb %xmm2, %ecx - test %ecx, %ecx - jnz L(return_value_1) - mov %eax, %ebx - mov %edi, %esi - jmp L(loop) - - .p2align 4 -L(return_value_1): - bsf %ecx, %ecx - mov $2, %edx - shl %cl, %edx - sub $1, %edx - and %edx, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - - bsr %eax, %eax - add %edi, %eax - sub $16, %eax - POP (%edi) - ret - - CFI_PUSH (%edi) -/* Return NULL. */ - .p2align 4 -L(return_null): - xor %eax, %eax - POP (%edi) - ret - - CFI_PUSH (%edi) - CFI_PUSH (%ebx) - CFI_PUSH (%esi) -/* Return NULL. */ - .p2align 4 -L(return_null_1): - POP (%ebx) - POP (%esi) - POP (%edi) - xor %eax, %eax - ret - -END (__strrchr_sse2_bsf) -#endif diff --git a/sysdeps/i386/i686/multiarch/strrchr-sse2.S b/sysdeps/i386/i686/multiarch/strrchr-sse2.S deleted file mode 100644 index df36ba0a93..0000000000 --- a/sysdeps/i386/i686/multiarch/strrchr-sse2.S +++ /dev/null @@ -1,708 +0,0 @@ -/* strrchr SSE2 without bsf and bsr - Copyright (C) 2011-2015 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -#if IS_IN (libc) - -# include - -# define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -# define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -# define PUSH(REG) pushl REG; CFI_PUSH (REG) -# define POP(REG) popl REG; CFI_POP (REG) - -# define PARMS 8 -# define ENTRANCE PUSH(%edi); -# define RETURN POP(%edi); ret; CFI_PUSH(%edi); - -# define STR1 PARMS -# define STR2 STR1+4 - - atom_text_section -ENTRY (__strrchr_sse2) - - ENTRANCE - mov STR1(%esp), %ecx - movd STR2(%esp), %xmm1 - - pxor %xmm2, %xmm2 - mov %ecx, %edi - punpcklbw %xmm1, %xmm1 - punpcklbw %xmm1, %xmm1 - /* ECX has OFFSET. */ - and $63, %ecx - cmp $48, %ecx - pshufd $0, %xmm1, %xmm1 - ja L(crosscache) - -/* unaligned string. */ - movdqu (%edi), %xmm0 - pcmpeqb %xmm0, %xmm2 - pcmpeqb %xmm1, %xmm0 - /* Find where NULL is. */ - pmovmskb %xmm2, %ecx - /* Check if there is a match. */ - pmovmskb %xmm0, %eax - add $16, %edi - - test %eax, %eax - jnz L(unaligned_match1) - - test %ecx, %ecx - jnz L(return_null) - - and $-16, %edi - - PUSH (%esi) - PUSH (%ebx) - - xor %ebx, %ebx - jmp L(loop) - - CFI_POP (%esi) - CFI_POP (%ebx) - - .p2align 4 -L(unaligned_match1): - test %ecx, %ecx - jnz L(prolog_find_zero_1) - - PUSH (%esi) - PUSH (%ebx) - - mov %eax, %ebx - mov %edi, %esi - and $-16, %edi - jmp L(loop) - - CFI_POP (%esi) - CFI_POP (%ebx) - - .p2align 4 -L(crosscache): -/* Hancle unaligned string. */ - and $15, %ecx - and $-16, %edi - pxor %xmm3, %xmm3 - movdqa (%edi), %xmm0 - pcmpeqb %xmm0, %xmm3 - pcmpeqb %xmm1, %xmm0 - /* Find where NULL is. */ - pmovmskb %xmm3, %edx - /* Check if there is a match. */ - pmovmskb %xmm0, %eax - /* Remove the leading bytes. */ - shr %cl, %edx - shr %cl, %eax - add $16, %edi - - test %eax, %eax - jnz L(unaligned_match) - - test %edx, %edx - jnz L(return_null) - - PUSH (%esi) - PUSH (%ebx) - - xor %ebx, %ebx - jmp L(loop) - - CFI_POP (%esi) - CFI_POP (%ebx) - - .p2align 4 -L(unaligned_match): - test %edx, %edx - jnz L(prolog_find_zero) - - PUSH (%esi) - PUSH (%ebx) - - mov %eax, %ebx - lea (%edi, %ecx), %esi - -/* Loop start on aligned string. */ - .p2align 4 -L(loop): - movdqa (%edi), %xmm0 - pcmpeqb %xmm0, %xmm2 - add $16, %edi - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm2, %ecx - pmovmskb %xmm0, %eax - or %eax, %ecx - jnz L(matches) - - movdqa (%edi), %xmm0 - pcmpeqb %xmm0, %xmm2 - add $16, %edi - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm2, %ecx - pmovmskb %xmm0, %eax - or %eax, %ecx - jnz L(matches) - - movdqa (%edi), %xmm0 - pcmpeqb %xmm0, %xmm2 - add $16, %edi - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm2, %ecx - pmovmskb %xmm0, %eax - or %eax, %ecx - jnz L(matches) - - movdqa (%edi), %xmm0 - pcmpeqb %xmm0, %xmm2 - add $16, %edi - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm2, %ecx - pmovmskb %xmm0, %eax - or %eax, %ecx - jz L(loop) - -L(matches): - test %eax, %eax - jnz L(match) -L(return_value): - test %ebx, %ebx - jz L(return_null_1) - mov %ebx, %eax - mov %esi, %edi - - POP (%ebx) - POP (%esi) - - jmp L(match_exit) - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(return_null_1): - POP (%ebx) - POP (%esi) - - xor %eax, %eax - RETURN - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(match): - pmovmskb %xmm2, %ecx - test %ecx, %ecx - jnz L(find_zero) - mov %eax, %ebx - mov %edi, %esi - jmp L(loop) - - .p2align 4 -L(find_zero): - test %cl, %cl - jz L(find_zero_high) - mov %cl, %dl - and $15, %dl - jz L(find_zero_8) - test $0x01, %cl - jnz L(FindZeroExit1) - test $0x02, %cl - jnz L(FindZeroExit2) - test $0x04, %cl - jnz L(FindZeroExit3) - and $1 << 4 - 1, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - jmp L(match_exit) - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(find_zero_8): - test $0x10, %cl - jnz L(FindZeroExit5) - test $0x20, %cl - jnz L(FindZeroExit6) - test $0x40, %cl - jnz L(FindZeroExit7) - and $1 << 8 - 1, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - jmp L(match_exit) - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(find_zero_high): - mov %ch, %dh - and $15, %dh - jz L(find_zero_high_8) - test $0x01, %ch - jnz L(FindZeroExit9) - test $0x02, %ch - jnz L(FindZeroExit10) - test $0x04, %ch - jnz L(FindZeroExit11) - and $1 << 12 - 1, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - jmp L(match_exit) - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(find_zero_high_8): - test $0x10, %ch - jnz L(FindZeroExit13) - test $0x20, %ch - jnz L(FindZeroExit14) - test $0x40, %ch - jnz L(FindZeroExit15) - and $1 << 16 - 1, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - jmp L(match_exit) - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(FindZeroExit1): - and $1, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - jmp L(match_exit) - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(FindZeroExit2): - and $1 << 2 - 1, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - jmp L(match_exit) - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(FindZeroExit3): - and $1 << 3 - 1, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - jmp L(match_exit) - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(FindZeroExit5): - and $1 << 5 - 1, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - jmp L(match_exit) - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(FindZeroExit6): - and $1 << 6 - 1, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - jmp L(match_exit) - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(FindZeroExit7): - and $1 << 7 - 1, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - jmp L(match_exit) - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(FindZeroExit9): - and $1 << 9 - 1, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - jmp L(match_exit) - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(FindZeroExit10): - and $1 << 10 - 1, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - jmp L(match_exit) - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(FindZeroExit11): - and $1 << 11 - 1, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - jmp L(match_exit) - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(FindZeroExit13): - and $1 << 13 - 1, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - jmp L(match_exit) - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(FindZeroExit14): - and $1 << 14 - 1, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - jmp L(match_exit) - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(FindZeroExit15): - and $1 << 15 - 1, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - - .p2align 4 -L(match_exit): - test %ah, %ah - jnz L(match_exit_high) - mov %al, %dl - and $15 << 4, %dl - jnz L(match_exit_8) - test $0x08, %al - jnz L(Exit4) - test $0x04, %al - jnz L(Exit3) - test $0x02, %al - jnz L(Exit2) - lea -16(%edi), %eax - RETURN - - .p2align 4 -L(match_exit_8): - test $0x80, %al - jnz L(Exit8) - test $0x40, %al - jnz L(Exit7) - test $0x20, %al - jnz L(Exit6) - lea -12(%edi), %eax - RETURN - - .p2align 4 -L(match_exit_high): - mov %ah, %dh - and $15 << 4, %dh - jnz L(match_exit_high_8) - test $0x08, %ah - jnz L(Exit12) - test $0x04, %ah - jnz L(Exit11) - test $0x02, %ah - jnz L(Exit10) - lea -8(%edi), %eax - RETURN - - .p2align 4 -L(match_exit_high_8): - test $0x80, %ah - jnz L(Exit16) - test $0x40, %ah - jnz L(Exit15) - test $0x20, %ah - jnz L(Exit14) - lea -4(%edi), %eax - RETURN - - .p2align 4 -L(Exit2): - lea -15(%edi), %eax - RETURN - - .p2align 4 -L(Exit3): - lea -14(%edi), %eax - RETURN - - .p2align 4 -L(Exit4): - lea -13(%edi), %eax - RETURN - - .p2align 4 -L(Exit6): - lea -11(%edi), %eax - RETURN - - .p2align 4 -L(Exit7): - lea -10(%edi), %eax - RETURN - - .p2align 4 -L(Exit8): - lea -9(%edi), %eax - RETURN - - .p2align 4 -L(Exit10): - lea -7(%edi), %eax - RETURN - - .p2align 4 -L(Exit11): - lea -6(%edi), %eax - RETURN - - .p2align 4 -L(Exit12): - lea -5(%edi), %eax - RETURN - - .p2align 4 -L(Exit14): - lea -3(%edi), %eax - RETURN - - .p2align 4 -L(Exit15): - lea -2(%edi), %eax - RETURN - - .p2align 4 -L(Exit16): - lea -1(%edi), %eax - RETURN - -/* Return NULL. */ - .p2align 4 -L(return_null): - xor %eax, %eax - RETURN - - .p2align 4 -L(prolog_find_zero): - add %ecx, %edi - mov %edx, %ecx -L(prolog_find_zero_1): - test %cl, %cl - jz L(prolog_find_zero_high) - mov %cl, %dl - and $15, %dl - jz L(prolog_find_zero_8) - test $0x01, %cl - jnz L(PrologFindZeroExit1) - test $0x02, %cl - jnz L(PrologFindZeroExit2) - test $0x04, %cl - jnz L(PrologFindZeroExit3) - and $1 << 4 - 1, %eax - jnz L(match_exit) - xor %eax, %eax - RETURN - - .p2align 4 -L(prolog_find_zero_8): - test $0x10, %cl - jnz L(PrologFindZeroExit5) - test $0x20, %cl - jnz L(PrologFindZeroExit6) - test $0x40, %cl - jnz L(PrologFindZeroExit7) - and $1 << 8 - 1, %eax - jnz L(match_exit) - xor %eax, %eax - RETURN - - .p2align 4 -L(prolog_find_zero_high): - mov %ch, %dh - and $15, %dh - jz L(prolog_find_zero_high_8) - test $0x01, %ch - jnz L(PrologFindZeroExit9) - test $0x02, %ch - jnz L(PrologFindZeroExit10) - test $0x04, %ch - jnz L(PrologFindZeroExit11) - and $1 << 12 - 1, %eax - jnz L(match_exit) - xor %eax, %eax - RETURN - - .p2align 4 -L(prolog_find_zero_high_8): - test $0x10, %ch - jnz L(PrologFindZeroExit13) - test $0x20, %ch - jnz L(PrologFindZeroExit14) - test $0x40, %ch - jnz L(PrologFindZeroExit15) - and $1 << 16 - 1, %eax - jnz L(match_exit) - xor %eax, %eax - RETURN - - .p2align 4 -L(PrologFindZeroExit1): - and $1, %eax - jnz L(match_exit) - xor %eax, %eax - RETURN - - .p2align 4 -L(PrologFindZeroExit2): - and $1 << 2 - 1, %eax - jnz L(match_exit) - xor %eax, %eax - RETURN - - .p2align 4 -L(PrologFindZeroExit3): - and $1 << 3 - 1, %eax - jnz L(match_exit) - xor %eax, %eax - RETURN - - .p2align 4 -L(PrologFindZeroExit5): - and $1 << 5 - 1, %eax - jnz L(match_exit) - xor %eax, %eax - RETURN - - .p2align 4 -L(PrologFindZeroExit6): - and $1 << 6 - 1, %eax - jnz L(match_exit) - xor %eax, %eax - RETURN - - .p2align 4 -L(PrologFindZeroExit7): - and $1 << 7 - 1, %eax - jnz L(match_exit) - xor %eax, %eax - RETURN - - .p2align 4 -L(PrologFindZeroExit9): - and $1 << 9 - 1, %eax - jnz L(match_exit) - xor %eax, %eax - RETURN - - .p2align 4 -L(PrologFindZeroExit10): - and $1 << 10 - 1, %eax - jnz L(match_exit) - xor %eax, %eax - RETURN - - .p2align 4 -L(PrologFindZeroExit11): - and $1 << 11 - 1, %eax - jnz L(match_exit) - xor %eax, %eax - RETURN - - .p2align 4 -L(PrologFindZeroExit13): - and $1 << 13 - 1, %eax - jnz L(match_exit) - xor %eax, %eax - RETURN - - .p2align 4 -L(PrologFindZeroExit14): - and $1 << 14 - 1, %eax - jnz L(match_exit) - xor %eax, %eax - RETURN - - .p2align 4 -L(PrologFindZeroExit15): - and $1 << 15 - 1, %eax - jnz L(match_exit) - xor %eax, %eax - RETURN - -END (__strrchr_sse2) -#endif diff --git a/sysdeps/i386/i686/multiarch/strrchr.S b/sysdeps/i386/i686/multiarch/strrchr.S deleted file mode 100644 index 6aa3321907..0000000000 --- a/sysdeps/i386/i686/multiarch/strrchr.S +++ /dev/null @@ -1,57 +0,0 @@ -/* Multiple versions of strrchr - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2011-2015 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -#include -#include - -#if IS_IN (libc) - .text -ENTRY(strrchr) - .type strrchr, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - LOAD_FUNC_GOT_EAX (__strrchr_ia32) - HAS_CPU_FEATURE (SSE2) - jz 2f - LOAD_FUNC_GOT_EAX (__strrchr_sse2_bsf) - HAS_ARCH_FEATURE (Slow_BSF) - jz 2f - LOAD_FUNC_GOT_EAX (__strrchr_sse2) -2: ret -END(strrchr) - -# undef ENTRY -# define ENTRY(name) \ - .type __strrchr_ia32, @function; \ - .globl __strrchr_ia32; \ - .p2align 4; \ - __strrchr_ia32: cfi_startproc; \ - CALL_MCOUNT -# undef END -# define END(name) \ - cfi_endproc; .size __strrchr_ia32, .-__strrchr_ia32 -# undef libc_hidden_builtin_def -/* IFUNC doesn't work with the hidden functions in shared library since - they will be called without setting up EBX needed for PLT which is - used by IFUNC. */ -# define libc_hidden_builtin_def(name) \ - .globl __GI_strrchr; __GI_strrchr = __strrchr_ia32 -#endif - -#include "../../strrchr.S" diff --git a/sysdeps/i386/multiarch/Makefile b/sysdeps/i386/multiarch/Makefile index 23c1f8b0de..a4161b0f33 100644 --- a/sysdeps/i386/multiarch/Makefile +++ b/sysdeps/i386/multiarch/Makefile @@ -35,7 +35,8 @@ sysdep_routines += bcopy-i386 bcopy-i686 bcopy-sse2-unaligned \ strcat-i386 strcat-sse2 strcat-ssse3 \ strncat-i386 strncat-sse2 strncat-ssse3 \ strchr-i386 strchr-i586 strchr-sse2-bsf strchr-sse2 \ - strcspn-i386 strpbrk-i386 + strcspn-i386 strpbrk-i386 \ + strrchr-i386 strrchr-sse2-bsf strrchr-sse2 ifeq (yes,$(config-cflags-sse4)) sysdep_routines += varshift strcspn-sse4 strpbrk-sse4 CFLAGS-varshift.c += -msse4 diff --git a/sysdeps/i386/multiarch/ifunc-impl-list.c b/sysdeps/i386/multiarch/ifunc-impl-list.c index 2fc97a31a4..f3bea9c9a5 100644 --- a/sysdeps/i386/multiarch/ifunc-impl-list.c +++ b/sysdeps/i386/multiarch/ifunc-impl-list.c @@ -304,15 +304,15 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __strpbrk_sse42) IFUNC_IMPL_ADD (array, i, strpbrk, 1, __strpbrk_i386)) -#if 0 /* Support sysdeps/i386/i686/multiarch/strrchr.S. */ IFUNC_IMPL (i, name, strrchr, IFUNC_IMPL_ADD (array, i, strrchr, HAS_CPU_FEATURE (SSE2), __strrchr_sse2_bsf) IFUNC_IMPL_ADD (array, i, strrchr, HAS_CPU_FEATURE (SSE2), __strrchr_sse2) - IFUNC_IMPL_ADD (array, i, strrchr, 1, __strrchr_ia32)) + IFUNC_IMPL_ADD (array, i, strrchr, 1, __strrchr_i386)) +#if 0 /* Support sysdeps/i386/i686/multiarch/strspn.S. */ IFUNC_IMPL (i, name, strspn, IFUNC_IMPL_ADD (array, i, strspn, HAS_CPU_FEATURE (SSE4_2), diff --git a/sysdeps/i386/multiarch/strrchr-i386.S b/sysdeps/i386/multiarch/strrchr-i386.S new file mode 100644 index 0000000000..a7941e6f33 --- /dev/null +++ b/sysdeps/i386/multiarch/strrchr-i386.S @@ -0,0 +1,14 @@ +#define strrchr __strrchr_i386 +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) +#undef libc_hidden_def +#define libc_hidden_def(name) +#undef weak_alias +#define weak_alias(name, aliasname) +#include + +#ifdef SHARED + .globl __GI_strrchr + .hidden __GI_strrchr + __GI_strrchr = __strrchr_i386 +#endif diff --git a/sysdeps/i386/multiarch/strrchr-sse2-bsf.S b/sysdeps/i386/multiarch/strrchr-sse2-bsf.S new file mode 100644 index 0000000000..a3d42ddd4f --- /dev/null +++ b/sysdeps/i386/multiarch/strrchr-sse2-bsf.S @@ -0,0 +1,282 @@ +/* strrchr with SSE2 with bsf and bsr + Copyright (C) 2011-2015 Free Software Foundation, Inc. + Contributed by Intel Corporation. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#if IS_IN (libc) + +# include + +# define CFI_PUSH(REG) \ + cfi_adjust_cfa_offset (4); \ + cfi_rel_offset (REG, 0) + +# define CFI_POP(REG) \ + cfi_adjust_cfa_offset (-4); \ + cfi_restore (REG) + +# define PUSH(REG) pushl REG; CFI_PUSH (REG) +# define POP(REG) popl REG; CFI_POP (REG) + +# define PARMS 4 +# define STR1 PARMS +# define STR2 STR1+4 + + .text +ENTRY (__strrchr_sse2_bsf) + + mov STR1(%esp), %ecx + movd STR2(%esp), %xmm1 + + PUSH (%edi) + pxor %xmm2, %xmm2 + mov %ecx, %edi + punpcklbw %xmm1, %xmm1 + punpcklbw %xmm1, %xmm1 + /* ECX has OFFSET. */ + and $63, %ecx + cmp $48, %ecx + pshufd $0, %xmm1, %xmm1 + ja L(crosscashe) + +/* unaligned string. */ + movdqu (%edi), %xmm0 + pcmpeqb %xmm0, %xmm2 + pcmpeqb %xmm1, %xmm0 + /* Find where NULL is. */ + pmovmskb %xmm2, %edx + /* Check if there is a match. */ + pmovmskb %xmm0, %eax + + test %eax, %eax + jnz L(unaligned_match1) + + test %edx, %edx + jnz L(return_null) + + and $-16, %edi + add $16, %edi + + PUSH (%esi) + PUSH (%ebx) + + xor %ebx, %ebx + jmp L(loop) + + CFI_POP (%esi) + CFI_POP (%ebx) + + .p2align 4 +L(unaligned_return_value1): + bsf %edx, %ecx + mov $2, %edx + shl %cl, %edx + sub $1, %edx + and %edx, %eax + jz L(return_null) + bsr %eax, %eax + add %edi, %eax + POP (%edi) + ret + CFI_PUSH (%edi) + + .p2align 4 +L(unaligned_match1): + test %edx, %edx + jnz L(unaligned_return_value1) + + PUSH (%esi) + PUSH (%ebx) + + mov %eax, %ebx + lea 16(%edi), %esi + and $-16, %edi + add $16, %edi + jmp L(loop) + + CFI_POP (%esi) + CFI_POP (%ebx) + + .p2align 4 + L(crosscashe): +/* Hancle unaligned string. */ + and $15, %ecx + and $-16, %edi + pxor %xmm3, %xmm3 + movdqa (%edi), %xmm0 + pcmpeqb %xmm0, %xmm3 + pcmpeqb %xmm1, %xmm0 + /* Find where NULL is. */ + pmovmskb %xmm3, %edx + /* Check if there is a match. */ + pmovmskb %xmm0, %eax + /* Remove the leading bytes. */ + shr %cl, %edx + shr %cl, %eax + + test %eax, %eax + jnz L(unaligned_match) + + test %edx, %edx + jnz L(return_null) + + add $16, %edi + + PUSH (%esi) + PUSH (%ebx) + + xor %ebx, %ebx + jmp L(loop) + + CFI_POP (%esi) + CFI_POP (%ebx) + + .p2align 4 +L(unaligned_return_value): + add %ecx, %edi + bsf %edx, %ecx + mov $2, %edx + shl %cl, %edx + sub $1, %edx + and %edx, %eax + jz L(return_null) + bsr %eax, %eax + add %edi, %eax + POP (%edi) + ret + CFI_PUSH (%edi) + + .p2align 4 +L(unaligned_match): + test %edx, %edx + jnz L(unaligned_return_value) + + PUSH (%esi) + PUSH (%ebx) + + mov %eax, %ebx + add $16, %edi + lea (%edi, %ecx), %esi + +/* Loop start on aligned string. */ + .p2align 4 +L(loop): + movdqa (%edi), %xmm0 + pcmpeqb %xmm0, %xmm2 + add $16, %edi + pcmpeqb %xmm1, %xmm0 + pmovmskb %xmm2, %ecx + pmovmskb %xmm0, %eax + or %eax, %ecx + jnz L(matches) + + movdqa (%edi), %xmm0 + pcmpeqb %xmm0, %xmm2 + add $16, %edi + pcmpeqb %xmm1, %xmm0 + pmovmskb %xmm2, %ecx + pmovmskb %xmm0, %eax + or %eax, %ecx + jnz L(matches) + + movdqa (%edi), %xmm0 + pcmpeqb %xmm0, %xmm2 + add $16, %edi + pcmpeqb %xmm1, %xmm0 + pmovmskb %xmm2, %ecx + pmovmskb %xmm0, %eax + or %eax, %ecx + jnz L(matches) + + movdqa (%edi), %xmm0 + pcmpeqb %xmm0, %xmm2 + add $16, %edi + pcmpeqb %xmm1, %xmm0 + pmovmskb %xmm2, %ecx + pmovmskb %xmm0, %eax + or %eax, %ecx + jz L(loop) + +L(matches): + test %eax, %eax + jnz L(match) +L(return_value): + test %ebx, %ebx + jz L(return_null_1) + bsr %ebx, %eax + add %esi, %eax + + POP (%ebx) + POP (%esi) + + sub $16, %eax + POP (%edi) + ret + + CFI_PUSH (%edi) + CFI_PUSH (%ebx) + CFI_PUSH (%esi) + + .p2align 4 +L(match): + pmovmskb %xmm2, %ecx + test %ecx, %ecx + jnz L(return_value_1) + mov %eax, %ebx + mov %edi, %esi + jmp L(loop) + + .p2align 4 +L(return_value_1): + bsf %ecx, %ecx + mov $2, %edx + shl %cl, %edx + sub $1, %edx + and %edx, %eax + jz L(return_value) + + POP (%ebx) + POP (%esi) + + bsr %eax, %eax + add %edi, %eax + sub $16, %eax + POP (%edi) + ret + + CFI_PUSH (%edi) +/* Return NULL. */ + .p2align 4 +L(return_null): + xor %eax, %eax + POP (%edi) + ret + + CFI_PUSH (%edi) + CFI_PUSH (%ebx) + CFI_PUSH (%esi) +/* Return NULL. */ + .p2align 4 +L(return_null_1): + POP (%ebx) + POP (%esi) + POP (%edi) + xor %eax, %eax + ret + +END (__strrchr_sse2_bsf) +#endif diff --git a/sysdeps/i386/multiarch/strrchr-sse2.S b/sysdeps/i386/multiarch/strrchr-sse2.S new file mode 100644 index 0000000000..df36ba0a93 --- /dev/null +++ b/sysdeps/i386/multiarch/strrchr-sse2.S @@ -0,0 +1,708 @@ +/* strrchr SSE2 without bsf and bsr + Copyright (C) 2011-2015 Free Software Foundation, Inc. + Contributed by Intel Corporation. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#if IS_IN (libc) + +# include + +# define CFI_PUSH(REG) \ + cfi_adjust_cfa_offset (4); \ + cfi_rel_offset (REG, 0) + +# define CFI_POP(REG) \ + cfi_adjust_cfa_offset (-4); \ + cfi_restore (REG) + +# define PUSH(REG) pushl REG; CFI_PUSH (REG) +# define POP(REG) popl REG; CFI_POP (REG) + +# define PARMS 8 +# define ENTRANCE PUSH(%edi); +# define RETURN POP(%edi); ret; CFI_PUSH(%edi); + +# define STR1 PARMS +# define STR2 STR1+4 + + atom_text_section +ENTRY (__strrchr_sse2) + + ENTRANCE + mov STR1(%esp), %ecx + movd STR2(%esp), %xmm1 + + pxor %xmm2, %xmm2 + mov %ecx, %edi + punpcklbw %xmm1, %xmm1 + punpcklbw %xmm1, %xmm1 + /* ECX has OFFSET. */ + and $63, %ecx + cmp $48, %ecx + pshufd $0, %xmm1, %xmm1 + ja L(crosscache) + +/* unaligned string. */ + movdqu (%edi), %xmm0 + pcmpeqb %xmm0, %xmm2 + pcmpeqb %xmm1, %xmm0 + /* Find where NULL is. */ + pmovmskb %xmm2, %ecx + /* Check if there is a match. */ + pmovmskb %xmm0, %eax + add $16, %edi + + test %eax, %eax + jnz L(unaligned_match1) + + test %ecx, %ecx + jnz L(return_null) + + and $-16, %edi + + PUSH (%esi) + PUSH (%ebx) + + xor %ebx, %ebx + jmp L(loop) + + CFI_POP (%esi) + CFI_POP (%ebx) + + .p2align 4 +L(unaligned_match1): + test %ecx, %ecx + jnz L(prolog_find_zero_1) + + PUSH (%esi) + PUSH (%ebx) + + mov %eax, %ebx + mov %edi, %esi + and $-16, %edi + jmp L(loop) + + CFI_POP (%esi) + CFI_POP (%ebx) + + .p2align 4 +L(crosscache): +/* Hancle unaligned string. */ + and $15, %ecx + and $-16, %edi + pxor %xmm3, %xmm3 + movdqa (%edi), %xmm0 + pcmpeqb %xmm0, %xmm3 + pcmpeqb %xmm1, %xmm0 + /* Find where NULL is. */ + pmovmskb %xmm3, %edx + /* Check if there is a match. */ + pmovmskb %xmm0, %eax + /* Remove the leading bytes. */ + shr %cl, %edx + shr %cl, %eax + add $16, %edi + + test %eax, %eax + jnz L(unaligned_match) + + test %edx, %edx + jnz L(return_null) + + PUSH (%esi) + PUSH (%ebx) + + xor %ebx, %ebx + jmp L(loop) + + CFI_POP (%esi) + CFI_POP (%ebx) + + .p2align 4 +L(unaligned_match): + test %edx, %edx + jnz L(prolog_find_zero) + + PUSH (%esi) + PUSH (%ebx) + + mov %eax, %ebx + lea (%edi, %ecx), %esi + +/* Loop start on aligned string. */ + .p2align 4 +L(loop): + movdqa (%edi), %xmm0 + pcmpeqb %xmm0, %xmm2 + add $16, %edi + pcmpeqb %xmm1, %xmm0 + pmovmskb %xmm2, %ecx + pmovmskb %xmm0, %eax + or %eax, %ecx + jnz L(matches) + + movdqa (%edi), %xmm0 + pcmpeqb %xmm0, %xmm2 + add $16, %edi + pcmpeqb %xmm1, %xmm0 + pmovmskb %xmm2, %ecx + pmovmskb %xmm0, %eax + or %eax, %ecx + jnz L(matches) + + movdqa (%edi), %xmm0 + pcmpeqb %xmm0, %xmm2 + add $16, %edi + pcmpeqb %xmm1, %xmm0 + pmovmskb %xmm2, %ecx + pmovmskb %xmm0, %eax + or %eax, %ecx + jnz L(matches) + + movdqa (%edi), %xmm0 + pcmpeqb %xmm0, %xmm2 + add $16, %edi + pcmpeqb %xmm1, %xmm0 + pmovmskb %xmm2, %ecx + pmovmskb %xmm0, %eax + or %eax, %ecx + jz L(loop) + +L(matches): + test %eax, %eax + jnz L(match) +L(return_value): + test %ebx, %ebx + jz L(return_null_1) + mov %ebx, %eax + mov %esi, %edi + + POP (%ebx) + POP (%esi) + + jmp L(match_exit) + + CFI_PUSH (%ebx) + CFI_PUSH (%esi) + + .p2align 4 +L(return_null_1): + POP (%ebx) + POP (%esi) + + xor %eax, %eax + RETURN + + CFI_PUSH (%ebx) + CFI_PUSH (%esi) + + .p2align 4 +L(match): + pmovmskb %xmm2, %ecx + test %ecx, %ecx + jnz L(find_zero) + mov %eax, %ebx + mov %edi, %esi + jmp L(loop) + + .p2align 4 +L(find_zero): + test %cl, %cl + jz L(find_zero_high) + mov %cl, %dl + and $15, %dl + jz L(find_zero_8) + test $0x01, %cl + jnz L(FindZeroExit1) + test $0x02, %cl + jnz L(FindZeroExit2) + test $0x04, %cl + jnz L(FindZeroExit3) + and $1 << 4 - 1, %eax + jz L(return_value) + + POP (%ebx) + POP (%esi) + jmp L(match_exit) + + CFI_PUSH (%ebx) + CFI_PUSH (%esi) + + .p2align 4 +L(find_zero_8): + test $0x10, %cl + jnz L(FindZeroExit5) + test $0x20, %cl + jnz L(FindZeroExit6) + test $0x40, %cl + jnz L(FindZeroExit7) + and $1 << 8 - 1, %eax + jz L(return_value) + + POP (%ebx) + POP (%esi) + jmp L(match_exit) + + CFI_PUSH (%ebx) + CFI_PUSH (%esi) + + .p2align 4 +L(find_zero_high): + mov %ch, %dh + and $15, %dh + jz L(find_zero_high_8) + test $0x01, %ch + jnz L(FindZeroExit9) + test $0x02, %ch + jnz L(FindZeroExit10) + test $0x04, %ch + jnz L(FindZeroExit11) + and $1 << 12 - 1, %eax + jz L(return_value) + + POP (%ebx) + POP (%esi) + jmp L(match_exit) + + CFI_PUSH (%ebx) + CFI_PUSH (%esi) + + .p2align 4 +L(find_zero_high_8): + test $0x10, %ch + jnz L(FindZeroExit13) + test $0x20, %ch + jnz L(FindZeroExit14) + test $0x40, %ch + jnz L(FindZeroExit15) + and $1 << 16 - 1, %eax + jz L(return_value) + + POP (%ebx) + POP (%esi) + jmp L(match_exit) + + CFI_PUSH (%ebx) + CFI_PUSH (%esi) + + .p2align 4 +L(FindZeroExit1): + and $1, %eax + jz L(return_value) + + POP (%ebx) + POP (%esi) + jmp L(match_exit) + + CFI_PUSH (%ebx) + CFI_PUSH (%esi) + + .p2align 4 +L(FindZeroExit2): + and $1 << 2 - 1, %eax + jz L(return_value) + + POP (%ebx) + POP (%esi) + jmp L(match_exit) + + CFI_PUSH (%ebx) + CFI_PUSH (%esi) + + .p2align 4 +L(FindZeroExit3): + and $1 << 3 - 1, %eax + jz L(return_value) + + POP (%ebx) + POP (%esi) + jmp L(match_exit) + + CFI_PUSH (%ebx) + CFI_PUSH (%esi) + + .p2align 4 +L(FindZeroExit5): + and $1 << 5 - 1, %eax + jz L(return_value) + + POP (%ebx) + POP (%esi) + jmp L(match_exit) + + CFI_PUSH (%ebx) + CFI_PUSH (%esi) + + .p2align 4 +L(FindZeroExit6): + and $1 << 6 - 1, %eax + jz L(return_value) + + POP (%ebx) + POP (%esi) + jmp L(match_exit) + + CFI_PUSH (%ebx) + CFI_PUSH (%esi) + + .p2align 4 +L(FindZeroExit7): + and $1 << 7 - 1, %eax + jz L(return_value) + + POP (%ebx) + POP (%esi) + jmp L(match_exit) + + CFI_PUSH (%ebx) + CFI_PUSH (%esi) + + .p2align 4 +L(FindZeroExit9): + and $1 << 9 - 1, %eax + jz L(return_value) + + POP (%ebx) + POP (%esi) + jmp L(match_exit) + + CFI_PUSH (%ebx) + CFI_PUSH (%esi) + + .p2align 4 +L(FindZeroExit10): + and $1 << 10 - 1, %eax + jz L(return_value) + + POP (%ebx) + POP (%esi) + jmp L(match_exit) + + CFI_PUSH (%ebx) + CFI_PUSH (%esi) + + .p2align 4 +L(FindZeroExit11): + and $1 << 11 - 1, %eax + jz L(return_value) + + POP (%ebx) + POP (%esi) + jmp L(match_exit) + + CFI_PUSH (%ebx) + CFI_PUSH (%esi) + + .p2align 4 +L(FindZeroExit13): + and $1 << 13 - 1, %eax + jz L(return_value) + + POP (%ebx) + POP (%esi) + jmp L(match_exit) + + CFI_PUSH (%ebx) + CFI_PUSH (%esi) + + .p2align 4 +L(FindZeroExit14): + and $1 << 14 - 1, %eax + jz L(return_value) + + POP (%ebx) + POP (%esi) + jmp L(match_exit) + + CFI_PUSH (%ebx) + CFI_PUSH (%esi) + + .p2align 4 +L(FindZeroExit15): + and $1 << 15 - 1, %eax + jz L(return_value) + + POP (%ebx) + POP (%esi) + + .p2align 4 +L(match_exit): + test %ah, %ah + jnz L(match_exit_high) + mov %al, %dl + and $15 << 4, %dl + jnz L(match_exit_8) + test $0x08, %al + jnz L(Exit4) + test $0x04, %al + jnz L(Exit3) + test $0x02, %al + jnz L(Exit2) + lea -16(%edi), %eax + RETURN + + .p2align 4 +L(match_exit_8): + test $0x80, %al + jnz L(Exit8) + test $0x40, %al + jnz L(Exit7) + test $0x20, %al + jnz L(Exit6) + lea -12(%edi), %eax + RETURN + + .p2align 4 +L(match_exit_high): + mov %ah, %dh + and $15 << 4, %dh + jnz L(match_exit_high_8) + test $0x08, %ah + jnz L(Exit12) + test $0x04, %ah + jnz L(Exit11) + test $0x02, %ah + jnz L(Exit10) + lea -8(%edi), %eax + RETURN + + .p2align 4 +L(match_exit_high_8): + test $0x80, %ah + jnz L(Exit16) + test $0x40, %ah + jnz L(Exit15) + test $0x20, %ah + jnz L(Exit14) + lea -4(%edi), %eax + RETURN + + .p2align 4 +L(Exit2): + lea -15(%edi), %eax + RETURN + + .p2align 4 +L(Exit3): + lea -14(%edi), %eax + RETURN + + .p2align 4 +L(Exit4): + lea -13(%edi), %eax + RETURN + + .p2align 4 +L(Exit6): + lea -11(%edi), %eax + RETURN + + .p2align 4 +L(Exit7): + lea -10(%edi), %eax + RETURN + + .p2align 4 +L(Exit8): + lea -9(%edi), %eax + RETURN + + .p2align 4 +L(Exit10): + lea -7(%edi), %eax + RETURN + + .p2align 4 +L(Exit11): + lea -6(%edi), %eax + RETURN + + .p2align 4 +L(Exit12): + lea -5(%edi), %eax + RETURN + + .p2align 4 +L(Exit14): + lea -3(%edi), %eax + RETURN + + .p2align 4 +L(Exit15): + lea -2(%edi), %eax + RETURN + + .p2align 4 +L(Exit16): + lea -1(%edi), %eax + RETURN + +/* Return NULL. */ + .p2align 4 +L(return_null): + xor %eax, %eax + RETURN + + .p2align 4 +L(prolog_find_zero): + add %ecx, %edi + mov %edx, %ecx +L(prolog_find_zero_1): + test %cl, %cl + jz L(prolog_find_zero_high) + mov %cl, %dl + and $15, %dl + jz L(prolog_find_zero_8) + test $0x01, %cl + jnz L(PrologFindZeroExit1) + test $0x02, %cl + jnz L(PrologFindZeroExit2) + test $0x04, %cl + jnz L(PrologFindZeroExit3) + and $1 << 4 - 1, %eax + jnz L(match_exit) + xor %eax, %eax + RETURN + + .p2align 4 +L(prolog_find_zero_8): + test $0x10, %cl + jnz L(PrologFindZeroExit5) + test $0x20, %cl + jnz L(PrologFindZeroExit6) + test $0x40, %cl + jnz L(PrologFindZeroExit7) + and $1 << 8 - 1, %eax + jnz L(match_exit) + xor %eax, %eax + RETURN + + .p2align 4 +L(prolog_find_zero_high): + mov %ch, %dh + and $15, %dh + jz L(prolog_find_zero_high_8) + test $0x01, %ch + jnz L(PrologFindZeroExit9) + test $0x02, %ch + jnz L(PrologFindZeroExit10) + test $0x04, %ch + jnz L(PrologFindZeroExit11) + and $1 << 12 - 1, %eax + jnz L(match_exit) + xor %eax, %eax + RETURN + + .p2align 4 +L(prolog_find_zero_high_8): + test $0x10, %ch + jnz L(PrologFindZeroExit13) + test $0x20, %ch + jnz L(PrologFindZeroExit14) + test $0x40, %ch + jnz L(PrologFindZeroExit15) + and $1 << 16 - 1, %eax + jnz L(match_exit) + xor %eax, %eax + RETURN + + .p2align 4 +L(PrologFindZeroExit1): + and $1, %eax + jnz L(match_exit) + xor %eax, %eax + RETURN + + .p2align 4 +L(PrologFindZeroExit2): + and $1 << 2 - 1, %eax + jnz L(match_exit) + xor %eax, %eax + RETURN + + .p2align 4 +L(PrologFindZeroExit3): + and $1 << 3 - 1, %eax + jnz L(match_exit) + xor %eax, %eax + RETURN + + .p2align 4 +L(PrologFindZeroExit5): + and $1 << 5 - 1, %eax + jnz L(match_exit) + xor %eax, %eax + RETURN + + .p2align 4 +L(PrologFindZeroExit6): + and $1 << 6 - 1, %eax + jnz L(match_exit) + xor %eax, %eax + RETURN + + .p2align 4 +L(PrologFindZeroExit7): + and $1 << 7 - 1, %eax + jnz L(match_exit) + xor %eax, %eax + RETURN + + .p2align 4 +L(PrologFindZeroExit9): + and $1 << 9 - 1, %eax + jnz L(match_exit) + xor %eax, %eax + RETURN + + .p2align 4 +L(PrologFindZeroExit10): + and $1 << 10 - 1, %eax + jnz L(match_exit) + xor %eax, %eax + RETURN + + .p2align 4 +L(PrologFindZeroExit11): + and $1 << 11 - 1, %eax + jnz L(match_exit) + xor %eax, %eax + RETURN + + .p2align 4 +L(PrologFindZeroExit13): + and $1 << 13 - 1, %eax + jnz L(match_exit) + xor %eax, %eax + RETURN + + .p2align 4 +L(PrologFindZeroExit14): + and $1 << 14 - 1, %eax + jnz L(match_exit) + xor %eax, %eax + RETURN + + .p2align 4 +L(PrologFindZeroExit15): + and $1 << 15 - 1, %eax + jnz L(match_exit) + xor %eax, %eax + RETURN + +END (__strrchr_sse2) +#endif diff --git a/sysdeps/i386/multiarch/strrchr.c b/sysdeps/i386/multiarch/strrchr.c new file mode 100644 index 0000000000..995e00731c --- /dev/null +++ b/sysdeps/i386/multiarch/strrchr.c @@ -0,0 +1,55 @@ +/* Multiple versions of strrchr + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +/* Define multiple versions only for the definition in libc. */ +#if IS_IN (libc) +/* Redefine strrchr so that the compiler won't complain about the type + mismatch with the IFUNC selector in strong_alias, below. */ +# undef strrchr +# define strrchr __redirect_strrchr +# include +# undef strrchr + +# include + +extern __typeof (__redirect_strrchr) __strrchr_i386 attribute_hidden; +extern __typeof (__redirect_strrchr) __strrchr_sse2 attribute_hidden; +extern __typeof (__redirect_strrchr) __strrchr_sse2_bsf attribute_hidden; + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +extern __typeof (__redirect_strrchr) strrchr; +extern void *strrchr_ifunc (void) __asm__ ("strrchr"); + +void * +strrchr_ifunc (void) +{ + if (HAS_CPU_FEATURE (SSE2)) + { + if (HAS_ARCH_FEATURE (Slow_BSF)) + return __strrchr_sse2; + else + return __strrchr_sse2_bsf; + } + + return __strrchr_i386; +} +__asm__ (".type strrchr, %gnu_indirect_function"); +weak_alias (strrchr, rindex) +#endif -- cgit 1.4.1