about summary refs log tree commit diff
path: root/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S
diff options
context:
space:
mode:
authorFlorian Weimer <fweimer@redhat.com>2017-06-23 17:23:44 +0200
committerFlorian Weimer <fweimer@redhat.com>2017-06-23 17:24:40 +0200
commit3ec7c02cc3e922b9364dc8cfd1d4546671b91003 (patch)
tree283d54448fe89359272093156316884e61992c9a /sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S
parent7fa1d9462baabc5a1058efc13a48444af4678acf (diff)
downloadglibc-3ec7c02cc3e922b9364dc8cfd1d4546671b91003.tar.gz
glibc-3ec7c02cc3e922b9364dc8cfd1d4546671b91003.tar.xz
glibc-3ec7c02cc3e922b9364dc8cfd1d4546671b91003.zip
x86-64: memcmp-avx2-movbe.S needs saturating subtraction [BZ #21662]
This code:

L(between_2_3):
	/* Load as big endian with overlapping loads and bswap to avoid
	   branches.  */
	movzwl	-2(%rdi, %rdx), %eax
	movzwl	-2(%rsi, %rdx), %ecx
	shll	$16, %eax
	shll	$16, %ecx
	movzwl	(%rdi), %edi
	movzwl	(%rsi), %esi
	orl	%edi, %eax
	orl	%esi, %ecx
	bswap	%eax
	bswap	%ecx
	subl	%ecx, %eax
	ret

needs a saturating subtract because the full register is used.
With this commit, only the lower 24 bits of the register are used,
so a regular subtraction suffices.

The test case change adds coverage for these kinds of bugs.
Diffstat (limited to 'sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S')
-rw-r--r--sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S20
1 files changed, 10 insertions, 10 deletions
diff --git a/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S b/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S
index 47630dd97b..9d1921033e 100644
--- a/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S
+++ b/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S
@@ -137,18 +137,18 @@ L(exit):
 
 	.p2align 4
 L(between_2_3):
-	/* Load as big endian with overlapping loads and bswap to avoid
-	   branches.  */
-	movzwl	-2(%rdi, %rdx), %eax
-	movzwl	-2(%rsi, %rdx), %ecx
-	shll	$16, %eax
-	shll	$16, %ecx
-	movzwl	(%rdi), %edi
-	movzwl	(%rsi), %esi
-	orl	%edi, %eax
-	orl	%esi, %ecx
+	/* Load as big endian to avoid branches.  */
+	movzwl	(%rdi), %eax
+	movzwl	(%rsi), %ecx
+	shll	$8, %eax
+	shll	$8, %ecx
 	bswap	%eax
 	bswap	%ecx
+	movzbl	-1(%rdi, %rdx), %edi
+	movzbl	-1(%rsi, %rdx), %esi
+	orl	%edi, %eax
+	orl	%esi, %ecx
+	/* Subtraction is okay because the upper 8 bits a zero.  */
 	subl	%ecx, %eax
 	ret