From 39ecf822e2a5be93b2132d21aacfb7466073879e Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Tue, 25 Aug 2015 06:51:45 -0700 Subject: Add i386 wcslen multiarch functions --- sysdeps/i386/i686/multiarch/Makefile | 3 +- sysdeps/i386/i686/multiarch/wcslen-c.c | 9 -- sysdeps/i386/i686/multiarch/wcslen-sse2.S | 193 ------------------------------ sysdeps/i386/i686/multiarch/wcslen.S | 37 ------ sysdeps/i386/multiarch/Makefile | 3 +- sysdeps/i386/multiarch/ifunc-impl-list.c | 4 +- sysdeps/i386/multiarch/wcslen-i386.c | 9 ++ sysdeps/i386/multiarch/wcslen-sse2.S | 193 ++++++++++++++++++++++++++++++ sysdeps/i386/multiarch/wcslen.c | 50 ++++++++ 9 files changed, 256 insertions(+), 245 deletions(-) delete mode 100644 sysdeps/i386/i686/multiarch/wcslen-c.c delete mode 100644 sysdeps/i386/i686/multiarch/wcslen-sse2.S delete mode 100644 sysdeps/i386/i686/multiarch/wcslen.S create mode 100644 sysdeps/i386/multiarch/wcslen-i386.c create mode 100644 sysdeps/i386/multiarch/wcslen-sse2.S create mode 100644 sysdeps/i386/multiarch/wcslen.c diff --git a/sysdeps/i386/i686/multiarch/Makefile b/sysdeps/i386/i686/multiarch/Makefile index 57cd608c75..3e078b7ec4 100644 --- a/sysdeps/i386/i686/multiarch/Makefile +++ b/sysdeps/i386/i686/multiarch/Makefile @@ -1,4 +1,3 @@ ifeq ($(subdir),wcsmbs) -sysdep_routines += wcslen-sse2 wcslen-c \ - wmemcmp-sse4 wmemcmp-ssse3 wmemcmp-c +sysdep_routines += wmemcmp-sse4 wmemcmp-ssse3 wmemcmp-c endif diff --git a/sysdeps/i386/i686/multiarch/wcslen-c.c b/sysdeps/i386/i686/multiarch/wcslen-c.c deleted file mode 100644 index a335dc0f7e..0000000000 --- a/sysdeps/i386/i686/multiarch/wcslen-c.c +++ /dev/null @@ -1,9 +0,0 @@ -#include - -#if IS_IN (libc) -# define WCSLEN __wcslen_ia32 -#endif - -extern __typeof (wcslen) __wcslen_ia32; - -#include "wcsmbs/wcslen.c" diff --git a/sysdeps/i386/i686/multiarch/wcslen-sse2.S b/sysdeps/i386/i686/multiarch/wcslen-sse2.S deleted file mode 100644 index 1c54b1894c..0000000000 --- a/sysdeps/i386/i686/multiarch/wcslen-sse2.S +++ /dev/null @@ -1,193 +0,0 @@ -/* wcslen with SSE2 - Copyright (C) 2011-2015 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -#if IS_IN (libc) -# include -# define STR 4 - - .text -ENTRY (__wcslen_sse2) - mov STR(%esp), %edx - - cmp $0, (%edx) - jz L(exit_tail0) - cmp $0, 4(%edx) - jz L(exit_tail1) - cmp $0, 8(%edx) - jz L(exit_tail2) - cmp $0, 12(%edx) - jz L(exit_tail3) - cmp $0, 16(%edx) - jz L(exit_tail4) - cmp $0, 20(%edx) - jz L(exit_tail5) - cmp $0, 24(%edx) - jz L(exit_tail6) - cmp $0, 28(%edx) - jz L(exit_tail7) - - pxor %xmm0, %xmm0 - - lea 32(%edx), %eax - lea 16(%edx), %ecx - and $-16, %eax - - pcmpeqd (%eax), %xmm0 - pmovmskb %xmm0, %edx - pxor %xmm1, %xmm1 - test %edx, %edx - lea 16(%eax), %eax - jnz L(exit) - - pcmpeqd (%eax), %xmm1 - pmovmskb %xmm1, %edx - pxor %xmm2, %xmm2 - test %edx, %edx - lea 16(%eax), %eax - jnz L(exit) - - pcmpeqd (%eax), %xmm2 - pmovmskb %xmm2, %edx - pxor %xmm3, %xmm3 - test %edx, %edx - lea 16(%eax), %eax - jnz L(exit) - - pcmpeqd (%eax), %xmm3 - pmovmskb %xmm3, %edx - test %edx, %edx - lea 16(%eax), %eax - jnz L(exit) - - and $-0x40, %eax - - .p2align 4 -L(aligned_64_loop): - movaps (%eax), %xmm0 - movaps 16(%eax), %xmm1 - movaps 32(%eax), %xmm2 - movaps 48(%eax), %xmm6 - - pminub %xmm1, %xmm0 - pminub %xmm6, %xmm2 - pminub %xmm0, %xmm2 - pcmpeqd %xmm3, %xmm2 - pmovmskb %xmm2, %edx - test %edx, %edx - lea 64(%eax), %eax - jz L(aligned_64_loop) - - pcmpeqd -64(%eax), %xmm3 - pmovmskb %xmm3, %edx - test %edx, %edx - lea 48(%ecx), %ecx - jnz L(exit) - - pcmpeqd %xmm1, %xmm3 - pmovmskb %xmm3, %edx - test %edx, %edx - lea -16(%ecx), %ecx - jnz L(exit) - - pcmpeqd -32(%eax), %xmm3 - pmovmskb %xmm3, %edx - test %edx, %edx - lea -16(%ecx), %ecx - jnz L(exit) - - pcmpeqd %xmm6, %xmm3 - pmovmskb %xmm3, %edx - test %edx, %edx - lea -16(%ecx), %ecx - jnz L(exit) - - jmp L(aligned_64_loop) - - .p2align 4 -L(exit): - sub %ecx, %eax - shr $2, %eax - test %dl, %dl - jz L(exit_high) - - mov %dl, %cl - and $15, %cl - jz L(exit_1) - ret - - .p2align 4 -L(exit_high): - mov %dh, %ch - and $15, %ch - jz L(exit_3) - add $2, %eax - ret - - .p2align 4 -L(exit_1): - add $1, %eax - ret - - .p2align 4 -L(exit_3): - add $3, %eax - ret - - .p2align 4 -L(exit_tail0): - xor %eax, %eax - ret - - .p2align 4 -L(exit_tail1): - mov $1, %eax - ret - - .p2align 4 -L(exit_tail2): - mov $2, %eax - ret - - .p2align 4 -L(exit_tail3): - mov $3, %eax - ret - - .p2align 4 -L(exit_tail4): - mov $4, %eax - ret - - .p2align 4 -L(exit_tail5): - mov $5, %eax - ret - - .p2align 4 -L(exit_tail6): - mov $6, %eax - ret - - .p2align 4 -L(exit_tail7): - mov $7, %eax - ret - -END (__wcslen_sse2) -#endif diff --git a/sysdeps/i386/i686/multiarch/wcslen.S b/sysdeps/i386/i686/multiarch/wcslen.S deleted file mode 100644 index 7740404a82..0000000000 --- a/sysdeps/i386/i686/multiarch/wcslen.S +++ /dev/null @@ -1,37 +0,0 @@ -/* Multiple versions of wcslen - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2011-2015 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -#include -#include - -#if IS_IN (libc) - .text -ENTRY(__wcslen) - .type __wcslen, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - LOAD_FUNC_GOT_EAX (__wcslen_ia32) - HAS_CPU_FEATURE (SSE2) - jz 2f - LOAD_FUNC_GOT_EAX (__wcslen_sse2) -2: ret -END(__wcslen) - -weak_alias(__wcslen, wcslen) -#endif diff --git a/sysdeps/i386/multiarch/Makefile b/sysdeps/i386/multiarch/Makefile index d06b2ad9f9..6d594f8611 100644 --- a/sysdeps/i386/multiarch/Makefile +++ b/sysdeps/i386/multiarch/Makefile @@ -51,7 +51,8 @@ endif ifeq ($(subdir),wcsmbs) sysdep_routines += wcschr-i386 wcschr-sse2 wcsrchr-i386 wcsrchr-sse2 \ - wcscmp-i386 wcscmp-sse2 wcscpy-i386 wcscpy-ssse3 + wcscmp-i386 wcscmp-sse2 wcscpy-i386 wcscpy-ssse3 \ + wcslen-i386 wcslen-sse2 endif ifeq (mathyes,$(subdir)$(config-cflags-avx)) diff --git a/sysdeps/i386/multiarch/ifunc-impl-list.c b/sysdeps/i386/multiarch/ifunc-impl-list.c index c5cc501c75..a10e700649 100644 --- a/sysdeps/i386/multiarch/ifunc-impl-list.c +++ b/sysdeps/i386/multiarch/ifunc-impl-list.c @@ -334,13 +334,11 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __wcscpy_ssse3) IFUNC_IMPL_ADD (array, i, wcscpy, 1, __wcscpy_i386)) -#if 0 /* Support sysdeps/i386/i686/multiarch/wcslen.S. */ IFUNC_IMPL (i, name, wcslen, IFUNC_IMPL_ADD (array, i, wcslen, HAS_CPU_FEATURE (SSE2), __wcslen_sse2) - IFUNC_IMPL_ADD (array, i, wcslen, 1, __wcslen_ia32)) -#endif + IFUNC_IMPL_ADD (array, i, wcslen, 1, __wcslen_i386)) /* Support sysdeps/i386/i686/multiarch/wcsrchr.S. */ IFUNC_IMPL (i, name, wcsrchr, diff --git a/sysdeps/i386/multiarch/wcslen-i386.c b/sysdeps/i386/multiarch/wcslen-i386.c new file mode 100644 index 0000000000..9c7b94e09f --- /dev/null +++ b/sysdeps/i386/multiarch/wcslen-i386.c @@ -0,0 +1,9 @@ +#include + +#if IS_IN (libc) +# define WCSLEN __wcslen_i386 +#endif + +extern __typeof (wcslen) __wcslen_i386; + +#include "wcsmbs/wcslen.c" diff --git a/sysdeps/i386/multiarch/wcslen-sse2.S b/sysdeps/i386/multiarch/wcslen-sse2.S new file mode 100644 index 0000000000..1c54b1894c --- /dev/null +++ b/sysdeps/i386/multiarch/wcslen-sse2.S @@ -0,0 +1,193 @@ +/* wcslen with SSE2 + Copyright (C) 2011-2015 Free Software Foundation, Inc. + Contributed by Intel Corporation. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#if IS_IN (libc) +# include +# define STR 4 + + .text +ENTRY (__wcslen_sse2) + mov STR(%esp), %edx + + cmp $0, (%edx) + jz L(exit_tail0) + cmp $0, 4(%edx) + jz L(exit_tail1) + cmp $0, 8(%edx) + jz L(exit_tail2) + cmp $0, 12(%edx) + jz L(exit_tail3) + cmp $0, 16(%edx) + jz L(exit_tail4) + cmp $0, 20(%edx) + jz L(exit_tail5) + cmp $0, 24(%edx) + jz L(exit_tail6) + cmp $0, 28(%edx) + jz L(exit_tail7) + + pxor %xmm0, %xmm0 + + lea 32(%edx), %eax + lea 16(%edx), %ecx + and $-16, %eax + + pcmpeqd (%eax), %xmm0 + pmovmskb %xmm0, %edx + pxor %xmm1, %xmm1 + test %edx, %edx + lea 16(%eax), %eax + jnz L(exit) + + pcmpeqd (%eax), %xmm1 + pmovmskb %xmm1, %edx + pxor %xmm2, %xmm2 + test %edx, %edx + lea 16(%eax), %eax + jnz L(exit) + + pcmpeqd (%eax), %xmm2 + pmovmskb %xmm2, %edx + pxor %xmm3, %xmm3 + test %edx, %edx + lea 16(%eax), %eax + jnz L(exit) + + pcmpeqd (%eax), %xmm3 + pmovmskb %xmm3, %edx + test %edx, %edx + lea 16(%eax), %eax + jnz L(exit) + + and $-0x40, %eax + + .p2align 4 +L(aligned_64_loop): + movaps (%eax), %xmm0 + movaps 16(%eax), %xmm1 + movaps 32(%eax), %xmm2 + movaps 48(%eax), %xmm6 + + pminub %xmm1, %xmm0 + pminub %xmm6, %xmm2 + pminub %xmm0, %xmm2 + pcmpeqd %xmm3, %xmm2 + pmovmskb %xmm2, %edx + test %edx, %edx + lea 64(%eax), %eax + jz L(aligned_64_loop) + + pcmpeqd -64(%eax), %xmm3 + pmovmskb %xmm3, %edx + test %edx, %edx + lea 48(%ecx), %ecx + jnz L(exit) + + pcmpeqd %xmm1, %xmm3 + pmovmskb %xmm3, %edx + test %edx, %edx + lea -16(%ecx), %ecx + jnz L(exit) + + pcmpeqd -32(%eax), %xmm3 + pmovmskb %xmm3, %edx + test %edx, %edx + lea -16(%ecx), %ecx + jnz L(exit) + + pcmpeqd %xmm6, %xmm3 + pmovmskb %xmm3, %edx + test %edx, %edx + lea -16(%ecx), %ecx + jnz L(exit) + + jmp L(aligned_64_loop) + + .p2align 4 +L(exit): + sub %ecx, %eax + shr $2, %eax + test %dl, %dl + jz L(exit_high) + + mov %dl, %cl + and $15, %cl + jz L(exit_1) + ret + + .p2align 4 +L(exit_high): + mov %dh, %ch + and $15, %ch + jz L(exit_3) + add $2, %eax + ret + + .p2align 4 +L(exit_1): + add $1, %eax + ret + + .p2align 4 +L(exit_3): + add $3, %eax + ret + + .p2align 4 +L(exit_tail0): + xor %eax, %eax + ret + + .p2align 4 +L(exit_tail1): + mov $1, %eax + ret + + .p2align 4 +L(exit_tail2): + mov $2, %eax + ret + + .p2align 4 +L(exit_tail3): + mov $3, %eax + ret + + .p2align 4 +L(exit_tail4): + mov $4, %eax + ret + + .p2align 4 +L(exit_tail5): + mov $5, %eax + ret + + .p2align 4 +L(exit_tail6): + mov $6, %eax + ret + + .p2align 4 +L(exit_tail7): + mov $7, %eax + ret + +END (__wcslen_sse2) +#endif diff --git a/sysdeps/i386/multiarch/wcslen.c b/sysdeps/i386/multiarch/wcslen.c new file mode 100644 index 0000000000..4a7970eebc --- /dev/null +++ b/sysdeps/i386/multiarch/wcslen.c @@ -0,0 +1,50 @@ +/* Multiple versions of wcslen. + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +/* Define multiple versions only for the definition in libc. */ +#if IS_IN (libc) +/* Redefine wcslen so that the compiler won't complain about the type + mismatch with the IFUNC selector in strong_alias, below. */ +# undef wcslen +# define wcslen __redirect_wcslen +# include +# undef wcslen + +# include + +extern __typeof (__redirect_wcslen) __wcslen_i386 attribute_hidden; +extern __typeof (__redirect_wcslen) __wcslen_sse2 attribute_hidden; + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +extern __typeof (__redirect_wcslen) __wcslen; +extern void *wcslen_ifunc (void) __asm__ ("__wcslen"); + +void * +wcslen_ifunc (void) +{ + if (HAS_CPU_FEATURE (SSE2)) + return __wcslen_sse2; + + return __wcslen_i386; +} +__asm__ (".type __wcslen, %gnu_indirect_function"); + +weak_alias (__wcslen, wcslen) +#endif -- cgit 1.4.1