diff options
Diffstat (limited to 'sysdeps/i386/i686/multiarch/strlen-sse2-bsf.S')
-rw-r--r-- | sysdeps/i386/i686/multiarch/strlen-sse2-bsf.S | 127 |
1 files changed, 127 insertions, 0 deletions
diff --git a/sysdeps/i386/i686/multiarch/strlen-sse2-bsf.S b/sysdeps/i386/i686/multiarch/strlen-sse2-bsf.S new file mode 100644 index 0000000000..0dc651f017 --- /dev/null +++ b/sysdeps/i386/i686/multiarch/strlen-sse2-bsf.S @@ -0,0 +1,127 @@ +/* strlen with SSE2 and BSF + Copyright (C) 2010 Free Software Foundation, Inc. + Contributed by Intel Corporation. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#if defined SHARED && !defined NOT_IN_libc + +#include <sysdep.h> +#include "asm-syntax.h" + +#define CFI_PUSH(REG) \ + cfi_adjust_cfa_offset (4); \ + cfi_rel_offset (REG, 0) + +#define CFI_POP(REG) \ + cfi_adjust_cfa_offset (-4); \ + cfi_restore (REG) + +#define PUSH(REG) pushl REG; CFI_PUSH (REG) +#define POP(REG) popl REG; CFI_POP (REG) +#define PARMS 4 + 8 /* Preserve ESI and EDI. */ +#define STR PARMS +#define ENTRANCE PUSH (%esi); PUSH (%edi); cfi_remember_state +#define RETURN POP (%edi); POP (%esi); ret; \ + cfi_restore_state; cfi_remember_state + + .text +ENTRY ( __strlen_sse2_bsf) + ENTRANCE + mov STR(%esp), %edi + xor %eax, %eax + mov %edi, %ecx + and $0x3f, %ecx + pxor %xmm0, %xmm0 + cmp $0x30, %ecx + ja L(next) + movdqu (%edi), %xmm1 + pcmpeqb %xmm1, %xmm0 + pmovmskb %xmm0, %edx + test %edx, %edx + jnz L(exit_less16) + mov %edi, %eax + and $-16, %eax + jmp L(align16_start) +L(next): + + mov %edi, %eax + and $-16, %eax + pcmpeqb (%eax), %xmm0 + mov $-1, %esi + sub %eax, %ecx + shl %cl, %esi + pmovmskb %xmm0, %edx + and %esi, %edx + jnz L(exit) +L(align16_start): + pxor %xmm0, %xmm0 + pxor %xmm1, %xmm1 + pxor %xmm2, %xmm2 + pxor %xmm3, %xmm3 + .p2align 4 +L(align16_loop): + pcmpeqb 16(%eax), %xmm0 + pmovmskb %xmm0, %edx + test %edx, %edx + jnz L(exit16) + + pcmpeqb 32(%eax), %xmm1 + pmovmskb %xmm1, %edx + test %edx, %edx + jnz L(exit32) + + pcmpeqb 48(%eax), %xmm2 + pmovmskb %xmm2, %edx + test %edx, %edx + jnz L(exit48) + + pcmpeqb 64(%eax), %xmm3 + pmovmskb %xmm3, %edx + lea 64(%eax), %eax + test %edx, %edx + jz L(align16_loop) +L(exit): + sub %edi, %eax +L(exit_less16): + bsf %edx, %edx + add %edx, %eax + RETURN +L(exit16): + sub %edi, %eax + bsf %edx, %edx + add %edx, %eax + add $16, %eax + RETURN +L(exit32): + sub %edi, %eax + bsf %edx, %edx + add %edx, %eax + add $32, %eax + RETURN +L(exit48): + sub %edi, %eax + bsf %edx, %edx + add %edx, %eax + add $48, %eax + POP (%edi) + POP (%esi) + ret + +END ( __strlen_sse2_bsf) + +#endif |