diff options
Diffstat (limited to 'sysdeps/i386/multiarch')
-rw-r--r-- | sysdeps/i386/multiarch/Makefile | 3 | ||||
-rw-r--r-- | sysdeps/i386/multiarch/ifunc-impl-list.c | 4 | ||||
-rw-r--r-- | sysdeps/i386/multiarch/wcslen-i386.c | 9 | ||||
-rw-r--r-- | sysdeps/i386/multiarch/wcslen-sse2.S | 193 | ||||
-rw-r--r-- | sysdeps/i386/multiarch/wcslen.c | 50 |
5 files changed, 255 insertions, 4 deletions
diff --git a/sysdeps/i386/multiarch/Makefile b/sysdeps/i386/multiarch/Makefile index d06b2ad9f9..6d594f8611 100644 --- a/sysdeps/i386/multiarch/Makefile +++ b/sysdeps/i386/multiarch/Makefile @@ -51,7 +51,8 @@ endif ifeq ($(subdir),wcsmbs) sysdep_routines += wcschr-i386 wcschr-sse2 wcsrchr-i386 wcsrchr-sse2 \ - wcscmp-i386 wcscmp-sse2 wcscpy-i386 wcscpy-ssse3 + wcscmp-i386 wcscmp-sse2 wcscpy-i386 wcscpy-ssse3 \ + wcslen-i386 wcslen-sse2 endif ifeq (mathyes,$(subdir)$(config-cflags-avx)) diff --git a/sysdeps/i386/multiarch/ifunc-impl-list.c b/sysdeps/i386/multiarch/ifunc-impl-list.c index c5cc501c75..a10e700649 100644 --- a/sysdeps/i386/multiarch/ifunc-impl-list.c +++ b/sysdeps/i386/multiarch/ifunc-impl-list.c @@ -334,13 +334,11 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __wcscpy_ssse3) IFUNC_IMPL_ADD (array, i, wcscpy, 1, __wcscpy_i386)) -#if 0 /* Support sysdeps/i386/i686/multiarch/wcslen.S. */ IFUNC_IMPL (i, name, wcslen, IFUNC_IMPL_ADD (array, i, wcslen, HAS_CPU_FEATURE (SSE2), __wcslen_sse2) - IFUNC_IMPL_ADD (array, i, wcslen, 1, __wcslen_ia32)) -#endif + IFUNC_IMPL_ADD (array, i, wcslen, 1, __wcslen_i386)) /* Support sysdeps/i386/i686/multiarch/wcsrchr.S. */ IFUNC_IMPL (i, name, wcsrchr, diff --git a/sysdeps/i386/multiarch/wcslen-i386.c b/sysdeps/i386/multiarch/wcslen-i386.c new file mode 100644 index 0000000000..9c7b94e09f --- /dev/null +++ b/sysdeps/i386/multiarch/wcslen-i386.c @@ -0,0 +1,9 @@ +#include <wchar.h> + +#if IS_IN (libc) +# define WCSLEN __wcslen_i386 +#endif + +extern __typeof (wcslen) __wcslen_i386; + +#include "wcsmbs/wcslen.c" diff --git a/sysdeps/i386/multiarch/wcslen-sse2.S b/sysdeps/i386/multiarch/wcslen-sse2.S new file mode 100644 index 0000000000..1c54b1894c --- /dev/null +++ b/sysdeps/i386/multiarch/wcslen-sse2.S @@ -0,0 +1,193 @@ +/* wcslen with SSE2 + Copyright (C) 2011-2015 Free Software Foundation, Inc. + Contributed by Intel Corporation. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# include <sysdep.h> +# define STR 4 + + .text +ENTRY (__wcslen_sse2) + mov STR(%esp), %edx + + cmp $0, (%edx) + jz L(exit_tail0) + cmp $0, 4(%edx) + jz L(exit_tail1) + cmp $0, 8(%edx) + jz L(exit_tail2) + cmp $0, 12(%edx) + jz L(exit_tail3) + cmp $0, 16(%edx) + jz L(exit_tail4) + cmp $0, 20(%edx) + jz L(exit_tail5) + cmp $0, 24(%edx) + jz L(exit_tail6) + cmp $0, 28(%edx) + jz L(exit_tail7) + + pxor %xmm0, %xmm0 + + lea 32(%edx), %eax + lea 16(%edx), %ecx + and $-16, %eax + + pcmpeqd (%eax), %xmm0 + pmovmskb %xmm0, %edx + pxor %xmm1, %xmm1 + test %edx, %edx + lea 16(%eax), %eax + jnz L(exit) + + pcmpeqd (%eax), %xmm1 + pmovmskb %xmm1, %edx + pxor %xmm2, %xmm2 + test %edx, %edx + lea 16(%eax), %eax + jnz L(exit) + + pcmpeqd (%eax), %xmm2 + pmovmskb %xmm2, %edx + pxor %xmm3, %xmm3 + test %edx, %edx + lea 16(%eax), %eax + jnz L(exit) + + pcmpeqd (%eax), %xmm3 + pmovmskb %xmm3, %edx + test %edx, %edx + lea 16(%eax), %eax + jnz L(exit) + + and $-0x40, %eax + + .p2align 4 +L(aligned_64_loop): + movaps (%eax), %xmm0 + movaps 16(%eax), %xmm1 + movaps 32(%eax), %xmm2 + movaps 48(%eax), %xmm6 + + pminub %xmm1, %xmm0 + pminub %xmm6, %xmm2 + pminub %xmm0, %xmm2 + pcmpeqd %xmm3, %xmm2 + pmovmskb %xmm2, %edx + test %edx, %edx + lea 64(%eax), %eax + jz L(aligned_64_loop) + + pcmpeqd -64(%eax), %xmm3 + pmovmskb %xmm3, %edx + test %edx, %edx + lea 48(%ecx), %ecx + jnz L(exit) + + pcmpeqd %xmm1, %xmm3 + pmovmskb %xmm3, %edx + test %edx, %edx + lea -16(%ecx), %ecx + jnz L(exit) + + pcmpeqd -32(%eax), %xmm3 + pmovmskb %xmm3, %edx + test %edx, %edx + lea -16(%ecx), %ecx + jnz L(exit) + + pcmpeqd %xmm6, %xmm3 + pmovmskb %xmm3, %edx + test %edx, %edx + lea -16(%ecx), %ecx + jnz L(exit) + + jmp L(aligned_64_loop) + + .p2align 4 +L(exit): + sub %ecx, %eax + shr $2, %eax + test %dl, %dl + jz L(exit_high) + + mov %dl, %cl + and $15, %cl + jz L(exit_1) + ret + + .p2align 4 +L(exit_high): + mov %dh, %ch + and $15, %ch + jz L(exit_3) + add $2, %eax + ret + + .p2align 4 +L(exit_1): + add $1, %eax + ret + + .p2align 4 +L(exit_3): + add $3, %eax + ret + + .p2align 4 +L(exit_tail0): + xor %eax, %eax + ret + + .p2align 4 +L(exit_tail1): + mov $1, %eax + ret + + .p2align 4 +L(exit_tail2): + mov $2, %eax + ret + + .p2align 4 +L(exit_tail3): + mov $3, %eax + ret + + .p2align 4 +L(exit_tail4): + mov $4, %eax + ret + + .p2align 4 +L(exit_tail5): + mov $5, %eax + ret + + .p2align 4 +L(exit_tail6): + mov $6, %eax + ret + + .p2align 4 +L(exit_tail7): + mov $7, %eax + ret + +END (__wcslen_sse2) +#endif diff --git a/sysdeps/i386/multiarch/wcslen.c b/sysdeps/i386/multiarch/wcslen.c new file mode 100644 index 0000000000..4a7970eebc --- /dev/null +++ b/sysdeps/i386/multiarch/wcslen.c @@ -0,0 +1,50 @@ +/* Multiple versions of wcslen. + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Define multiple versions only for the definition in libc. */ +#if IS_IN (libc) +/* Redefine wcslen so that the compiler won't complain about the type + mismatch with the IFUNC selector in strong_alias, below. */ +# undef wcslen +# define wcslen __redirect_wcslen +# include <wchar.h> +# undef wcslen + +# include <init-arch.h> + +extern __typeof (__redirect_wcslen) __wcslen_i386 attribute_hidden; +extern __typeof (__redirect_wcslen) __wcslen_sse2 attribute_hidden; + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +extern __typeof (__redirect_wcslen) __wcslen; +extern void *wcslen_ifunc (void) __asm__ ("__wcslen"); + +void * +wcslen_ifunc (void) +{ + if (HAS_CPU_FEATURE (SSE2)) + return __wcslen_sse2; + + return __wcslen_i386; +} +__asm__ (".type __wcslen, %gnu_indirect_function"); + +weak_alias (__wcslen, wcslen) +#endif |