/* strlen with SSE2 and BSF
Copyright (C) 2010-2014 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
. */
#if defined SHARED && IS_IN (libc)
#include
#define CFI_PUSH(REG) \
cfi_adjust_cfa_offset (4); \
cfi_rel_offset (REG, 0)
#define CFI_POP(REG) \
cfi_adjust_cfa_offset (-4); \
cfi_restore (REG)
#define PUSH(REG) pushl REG; CFI_PUSH (REG)
#define POP(REG) popl REG; CFI_POP (REG)
#define PARMS 4 + 8 /* Preserve ESI and EDI. */
#define STR PARMS
#define ENTRANCE PUSH (%esi); PUSH (%edi); cfi_remember_state
#define RETURN POP (%edi); POP (%esi); ret; \
cfi_restore_state; cfi_remember_state
.text
ENTRY ( __strlen_sse2_bsf)
ENTRANCE
mov STR(%esp), %edi
xor %eax, %eax
mov %edi, %ecx
and $0x3f, %ecx
pxor %xmm0, %xmm0
cmp $0x30, %ecx
ja L(next)
movdqu (%edi), %xmm1
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm0, %edx
test %edx, %edx
jnz L(exit_less16)
mov %edi, %eax
and $-16, %eax
jmp L(align16_start)
L(next):
mov %edi, %eax
and $-16, %eax
pcmpeqb (%eax), %xmm0
mov $-1, %esi
sub %eax, %ecx
shl %cl, %esi
pmovmskb %xmm0, %edx
and %esi, %edx
jnz L(exit)
L(align16_start):
pxor %xmm0, %xmm0
pxor %xmm1, %xmm1
pxor %xmm2, %xmm2
pxor %xmm3, %xmm3
.p2align 4
L(align16_loop):
pcmpeqb 16(%eax), %xmm0
pmovmskb %xmm0, %edx
test %edx, %edx
jnz L(exit16)
pcmpeqb 32(%eax), %xmm1
pmovmskb %xmm1, %edx
test %edx, %edx
jnz L(exit32)
pcmpeqb 48(%eax), %xmm2
pmovmskb %xmm2, %edx
test %edx, %edx
jnz L(exit48)
pcmpeqb 64(%eax), %xmm3
pmovmskb %xmm3, %edx
lea 64(%eax), %eax
test %edx, %edx
jz L(align16_loop)
L(exit):
sub %edi, %eax
L(exit_less16):
bsf %edx, %edx
add %edx, %eax
RETURN
L(exit16):
sub %edi, %eax
bsf %edx, %edx
add %edx, %eax
add $16, %eax
RETURN
L(exit32):
sub %edi, %eax
bsf %edx, %edx
add %edx, %eax
add $32, %eax
RETURN
L(exit48):
sub %edi, %eax
bsf %edx, %edx
add %edx, %eax
add $48, %eax
POP (%edi)
POP (%esi)
ret
END ( __strlen_sse2_bsf)
#endif