about summary refs log tree commit diff
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2017-05-30 12:39:14 -0700
committerH.J. Lu <hjl.tools@gmail.com>2017-05-30 12:39:14 -0700
commit4f26ef1b67287d1f2c32865f7d79c13abda81915 (patch)
treebc9cb0c0b5c581cee6aba4f2fc138b48783ab565
parent542a34783ce1cfc63929ec50ab1f9f738711b815 (diff)
downloadglibc-4f26ef1b67287d1f2c32865f7d79c13abda81915.tar.gz
glibc-4f26ef1b67287d1f2c32865f7d79c13abda81915.tar.xz
glibc-4f26ef1b67287d1f2c32865f7d79c13abda81915.zip
x86_64: Remove redundant REX bytes from memchr.S
By x86-64 specification, 32-bit destination registers are zero-extended
to 64 bits.  There is no need to use 64-bit registers when only the lower
32 bits are non-zero.

	* sysdeps/x86_64/memchr.S (MEMCHR): Use 32-bit registers for
	the lower 32 bits.
-rw-r--r--ChangeLog5
-rw-r--r--sysdeps/x86_64/memchr.S28
2 files changed, 19 insertions, 14 deletions
diff --git a/ChangeLog b/ChangeLog
index 93f0088ace..92d762e48c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2017-05-30  H.J. Lu  <hongjiu.lu@intel.com>
+
+	* sysdeps/x86_64/memchr.S (memchr): Use 32-bit registers for
+	the lower 32 bits.
+
 2017-05-29  Andreas Schwab  <schwab@linux-m68k.org>
 
 	* sysdeps/m68k/Makefile (ASFLAGS-.o) [$(subdir) = csu &&
diff --git a/sysdeps/x86_64/memchr.S b/sysdeps/x86_64/memchr.S
index f82e1c5bf7..d3be012424 100644
--- a/sysdeps/x86_64/memchr.S
+++ b/sysdeps/x86_64/memchr.S
@@ -22,18 +22,18 @@
 
 	.text
 ENTRY(memchr)
-	movd	%rsi, %xmm1
-	mov	%rdi, %rcx
+	movd	%esi, %xmm1
+	mov	%edi, %ecx
 
 	punpcklbw %xmm1, %xmm1
 	test	%rdx, %rdx
 	jz	L(return_null)
 	punpcklbw %xmm1, %xmm1
 
-	and	$63, %rcx
+	and	$63, %ecx
 	pshufd	$0, %xmm1, %xmm1
 
-	cmp	$48, %rcx
+	cmp	$48, %ecx
 	ja	L(crosscache)
 
 	movdqu	(%rdi), %xmm0
@@ -45,7 +45,7 @@ ENTRY(memchr)
 	sub	$16, %rdx
 	jbe	L(return_null)
 	add	$16, %rdi
-	and	$15, %rcx
+	and	$15, %ecx
 	and	$-16, %rdi
 	add	%rcx, %rdx
 	sub	$64, %rdx
@@ -54,7 +54,7 @@ ENTRY(memchr)
 
 	.p2align 4
 L(crosscache):
-	and	$15, %rcx
+	and	$15, %ecx
 	and	$-16, %rdi
 	movdqa	(%rdi), %xmm0
 
@@ -148,7 +148,7 @@ L(loop_prolog):
 
 	mov	%rdi, %rcx
 	and	$-64, %rdi
-	and	$63, %rcx
+	and	$63, %ecx
 	add	%rcx, %rdx
 
 	.p2align 4
@@ -200,7 +200,7 @@ L(align64_loop):
 
 	.p2align 4
 L(exit_loop):
-	add	$32, %rdx
+	add	$32, %edx
 	jle	L(exit_loop_32)
 
 	movdqa	(%rdi), %xmm0
@@ -220,32 +220,32 @@ L(exit_loop):
 	pmovmskb %xmm3, %eax
 	test	%eax, %eax
 	jnz	L(matches32_1)
-	sub	$16, %rdx
+	sub	$16, %edx
 	jle	L(return_null)
 
 	pcmpeqb	48(%rdi), %xmm1
 	pmovmskb %xmm1, %eax
 	test	%eax, %eax
 	jnz	L(matches48_1)
-	xor	%rax, %rax
+	xor	%eax, %eax
 	ret
 
 	.p2align 4
 L(exit_loop_32):
-	add	$32, %rdx
+	add	$32, %edx
 	movdqa	(%rdi), %xmm0
 	pcmpeqb	%xmm1, %xmm0
 	pmovmskb %xmm0, %eax
 	test	%eax, %eax
 	jnz	L(matches_1)
-	sub	$16, %rdx
+	sub	$16, %edx
 	jbe	L(return_null)
 
 	pcmpeqb	16(%rdi), %xmm1
 	pmovmskb %xmm1, %eax
 	test	%eax, %eax
 	jnz	L(matches16_1)
-	xor	%rax, %rax
+	xor	%eax, %eax
 	ret
 
 	.p2align 4
@@ -306,7 +306,7 @@ L(matches48_1):
 
 	.p2align 4
 L(return_null):
-	xor	%rax, %rax
+	xor	%eax, %eax
 	ret
 END(memchr)