From 402bf0695218bbe290418b9486b1dd5fe284d903 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Fri, 19 May 2017 10:46:29 -0700
Subject: x86: Optimize SSE2 memchr overflow calculation

SSE2 memchr computes "edx + ecx - 16" where ecx is less than 16.  Use
"edx - (16 - ecx)", instead of satured math, to avoid possible addition
overflow.  This replaces

	add	%ecx, %edx
	sbb	%eax, %eax
	or	%eax, %edx
	sub	$16, %edx

with

	neg	%ecx
	add	$16, %ecx
	sub	%ecx, %edx

It is the same for x86_64, except for rcx/rdx, instead of ecx/edx.

	* sysdeps/i386/i686/multiarch/memchr-sse2.S (MEMCHR): Use
	"edx + ecx - 16" to avoid possible addition overflow.
	* sysdeps/x86_64/memchr.S (memchr): Likewise.
---
 sysdeps/i386/i686/multiarch/memchr-sse2.S | 14 ++++++--------
 sysdeps/x86_64/memchr.S                   | 14 ++++++--------
 2 files changed, 12 insertions(+), 16 deletions(-)

(limited to 'sysdeps')

diff --git a/sysdeps/i386/i686/multiarch/memchr-sse2.S b/sysdeps/i386/i686/multiarch/memchr-sse2.S
index e41f324a77..172d70de13 100644
--- a/sysdeps/i386/i686/multiarch/memchr-sse2.S
+++ b/sysdeps/i386/i686/multiarch/memchr-sse2.S
@@ -117,14 +117,12 @@ L(crosscache):
 
 # ifndef USE_AS_RAWMEMCHR
 	jnz	L(match_case2_prolog1)
-        /* Calculate the last acceptable address and check for possible
-           addition overflow by using satured math:
-           edx = ecx + edx
-           edx |= -(edx < ecx)  */
-	add	%ecx, %edx
-	sbb	%eax, %eax
-	or	%eax, %edx
-	sub	$16, %edx
+        /* "ecx" is less than 16.  Calculate "edx + ecx - 16" by using
+	   "edx - (16 - ecx)" instead of "(edx + ecx) - 16" to void
+	   possible addition overflow.  */
+	neg	%ecx
+	add	$16, %ecx
+	sub	%ecx, %edx
 	jbe	L(return_null)
 	lea	16(%edi), %edi
 # else
diff --git a/sysdeps/x86_64/memchr.S b/sysdeps/x86_64/memchr.S
index a205a25998..f82e1c5bf7 100644
--- a/sysdeps/x86_64/memchr.S
+++ b/sysdeps/x86_64/memchr.S
@@ -76,14 +76,12 @@ L(crosscache):
 
 	.p2align 4
 L(unaligned_no_match):
-        /* Calculate the last acceptable address and check for possible
-           addition overflow by using satured math:
-           rdx = rcx + rdx
-           rdx |= -(rdx < rcx)  */
-	add	%rcx, %rdx
-	sbb	%rax, %rax
-	or	%rax, %rdx
-	sub	$16, %rdx
+        /* "rcx" is less than 16.  Calculate "rdx + rcx - 16" by using
+	   "rdx - (16 - rcx)" instead of "(rdx + rcx) - 16" to void
+	   possible addition overflow.  */
+	neg	%rcx
+	add	$16, %rcx
+	sub	%rcx, %rdx
 	jbe	L(return_null)
 	add	$16, %rdi
 	sub	$64, %rdx
-- 
cgit 1.4.1