about summary refs log tree commit diff
path: root/sysdeps
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2009-04-05 18:49:28 +0000
committerUlrich Drepper <drepper@redhat.com>2009-04-05 18:49:28 +0000
commita152f366dc38314c891c523639706a20e353dd6b (patch)
treedb0a148c4f7f9afb39091baad0f1b764b966abce /sysdeps
parentc0bac8b05a52c6a5d39fc6ac50779773fbc28f20 (diff)
downloadglibc-a152f366dc38314c891c523639706a20e353dd6b.tar.gz
glibc-a152f366dc38314c891c523639706a20e353dd6b.tar.xz
glibc-a152f366dc38314c891c523639706a20e353dd6b.zip
* sysdeps/x86_64/strlen.S: Optimize by using SSE2 instructions.
Diffstat (limited to 'sysdeps')
-rw-r--r--sysdeps/x86_64/strlen.S146
1 files changed, 30 insertions, 116 deletions
diff --git a/sysdeps/x86_64/strlen.S b/sysdeps/x86_64/strlen.S
index fd950edaaa..b83332c90c 100644
--- a/sysdeps/x86_64/strlen.S
+++ b/sysdeps/x86_64/strlen.S
@@ -1,6 +1,6 @@
 /* strlen(str) -- determine the length of the string STR.
-   Copyright (C) 2002, 2003 Free Software Foundation, Inc.
-   Based on i486 version contributed by Ulrich Drepper <drepper@redhat.com>.
+   Copyright (C) 2009 Free Software Foundation, Inc.
+   Contributed by Ulrich Drepper <drepper@redhat.com>.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -19,121 +19,35 @@
    02111-1307 USA.  */
 
 #include <sysdep.h>
-#include "asm-syntax.h"
-#include "bp-sym.h"
-#include "bp-asm.h"
 
 
-	.text
-ENTRY (strlen)
-	movq %rdi, %rcx		/* Duplicate source pointer. */
-	andl $7, %ecx		/* mask alignment bits */
-	movq %rdi, %rax		/* duplicate destination.  */
-	jz 1f			/* aligned => start loop */
-
-	neg %ecx		/* We need to align to 8 bytes.  */
-	addl $8,%ecx
-	/* Search the first bytes directly.  */
-0:	cmpb $0x0,(%rax)	/* is byte NUL? */
-	je 2f			/* yes => return */
-	incq %rax		/* increment pointer */
-	decl %ecx
-	jnz 0b
-
-1:	movq $0xfefefefefefefeff,%r8 /* Save magic.  */
-
-	.p2align 4		/* Align loop.  */
-4:	/* Main Loop is unrolled 4 times.  */
-	/* First unroll.  */
-	movq (%rax), %rcx	/* get double word (= 8 bytes) in question */
-	addq $8,%rax		/* adjust pointer for next word */
-	movq %r8, %rdx		/* magic value */
-	addq %rcx, %rdx		/* add the magic value to the word.  We get
-				   carry bits reported for each byte which
-				   is *not* 0 */
-	jnc 3f			/* highest byte is NUL => return pointer */
-	xorq %rcx, %rdx		/* (word+magic)^word */
-	orq %r8, %rdx		/* set all non-carry bits */
-	incq %rdx		/* add 1: if one carry bit was *not* set
-				   the addition will not result in 0.  */
-	jnz 3f			/* found NUL => return pointer */
-
-	/* Second unroll.  */
-	movq (%rax), %rcx	/* get double word (= 8 bytes) in question */
-	addq $8,%rax		/* adjust pointer for next word */
-	movq %r8, %rdx		/* magic value */
-	addq %rcx, %rdx		/* add the magic value to the word.  We get
-				   carry bits reported for each byte which
-				   is *not* 0 */
-	jnc 3f			/* highest byte is NUL => return pointer */
-	xorq %rcx, %rdx		/* (word+magic)^word */
-	orq %r8, %rdx		/* set all non-carry bits */
-	incq %rdx		/* add 1: if one carry bit was *not* set
-				   the addition will not result in 0.  */
-	jnz 3f			/* found NUL => return pointer */
-
-	/* Third unroll.  */
-	movq (%rax), %rcx	/* get double word (= 8 bytes) in question */
-	addq $8,%rax		/* adjust pointer for next word */
-	movq %r8, %rdx		/* magic value */
-	addq %rcx, %rdx		/* add the magic value to the word.  We get
-				   carry bits reported for each byte which
-				   is *not* 0 */
-	jnc 3f			/* highest byte is NUL => return pointer */
-	xorq %rcx, %rdx		/* (word+magic)^word */
-	orq %r8, %rdx		/* set all non-carry bits */
-	incq %rdx		/* add 1: if one carry bit was *not* set
-				   the addition will not result in 0.  */
-	jnz 3f			/* found NUL => return pointer */
-
-	/* Fourth unroll.  */
-	movq (%rax), %rcx	/* get double word (= 8 bytes) in question */
-	addq $8,%rax		/* adjust pointer for next word */
-	movq %r8, %rdx		/* magic value */
-	addq %rcx, %rdx		/* add the magic value to the word.  We get
-				   carry bits reported for each byte which
-				   is *not* 0 */
-	jnc 3f			/* highest byte is NUL => return pointer */
-	xorq %rcx, %rdx		/* (word+magic)^word */
-	orq %r8, %rdx		/* set all non-carry bits */
-	incq %rdx		/* add 1: if one carry bit was *not* set
-				   the addition will not result in 0.  */
-	jz 4b			/* no NUL found => continue loop */
-
-	.p2align 4		/* Align, it's a jump target.  */
-3:	subq $8,%rax		/* correct pointer increment.  */
-
-	testb %cl, %cl		/* is first byte NUL? */
-	jz 2f			/* yes => return */
-	incq %rax		/* increment pointer */
-
-	testb %ch, %ch		/* is second byte NUL? */
-	jz 2f			/* yes => return */
-	incq %rax		/* increment pointer */
-
-	testl $0x00ff0000, %ecx /* is third byte NUL? */
-	jz 2f			/* yes => return pointer */
-	incq %rax		/* increment pointer */
-
-	testl $0xff000000, %ecx /* is fourth byte NUL? */
-	jz 2f			/* yes => return pointer */
-	incq %rax		/* increment pointer */
-
-	shrq $32, %rcx		/* look at other half.  */
-
-	testb %cl, %cl		/* is first byte NUL? */
-	jz 2f			/* yes => return */
-	incq %rax		/* increment pointer */
-
-	testb %ch, %ch		/* is second byte NUL? */
-	jz 2f			/* yes => return */
-	incq %rax		/* increment pointer */
-
-	testl $0xff0000, %ecx	/* is third byte NUL? */
-	jz 2f			/* yes => return pointer */
-	incq %rax		/* increment pointer */
-2:
-	subq %rdi, %rax		/* compute difference to string start */
+ENTRY(strlen)
+	movq	%rdi, %rcx
+	movq	%rdi, %r8
+	andq	$~15, %rdi
+	pxor	%xmm1, %xmm1
+	orl	$0xffffffff, %esi
+	movdqa	(%rdi), %xmm0
+	subq	%rdi, %rcx
+	leaq	16(%rdi), %rdi
+	pcmpeqb	%xmm1, %xmm0
+	shl	%cl, %esi
+	pmovmskb %xmm0, %edx
+	xorl	%eax, %eax
+	negq	%r8
+	andl	%esi, %edx
+	jnz	1f
+
+2:	movdqa	(%rdi), %xmm0
+	leaq	16(%rdi), %rdi
+	pcmpeqb	%xmm1, %xmm0
+	pmovmskb %xmm0, %edx
+	testl	%edx, %edx
+	jz	2b
+
+1:	leaq	-16(%rdi,%r8), %rdi
+	bsfl	%edx, %eax
+	addq	%rdi, %rax
 	ret
-END (strlen)
+END(strlen)
 libc_hidden_builtin_def (strlen)