about summary refs log tree commit diff
diff options
context:
space:
mode:
authorNoah Goldstein <goldstein.w.n@gmail.com>2022-02-15 08:18:15 -0600
committerSunil K Pandey <skpgkp2@gmail.com>2022-05-25 14:30:20 -0700
commitac87df8d750b0734e20ffb82d1d0d0efbe4b6212 (patch)
tree4795f998bdb7cbdcc44bb62b35601435a5201743
parent478cd506eaa5bbc81e3de6593a5064fa5f36fea9 (diff)
downloadglibc-ac87df8d750b0734e20ffb82d1d0d0efbe4b6212.tar.gz
glibc-ac87df8d750b0734e20ffb82d1d0d0efbe4b6212.tar.xz
glibc-ac87df8d750b0734e20ffb82d1d0d0efbe4b6212.zip
x86: Fallback {str|wcs}cmp RTM in the ncmp overflow case [BZ #29127]
Re-cherry-pick commit c627209832 for strcmp-avx2.S change which was
omitted in intial cherry pick because at the time this bug was not
present on release branch.

Fixes BZ #29127.

In the overflow fallback strncmp-avx2-rtm and wcsncmp-avx2-rtm would
call strcmp-avx2 and wcscmp-avx2 respectively. This would have
not checks around vzeroupper and would trigger spurious
aborts. This commit fixes that.

test-strcmp, test-strncmp, test-wcscmp, and test-wcsncmp all pass on
AVX2 machines with and without RTM.

Co-authored-by: H.J. Lu <hjl.tools@gmail.com>
(cherry picked from commit c6272098323153db373f2986c67786ea8c85f1cf)
-rw-r--r--sysdeps/x86_64/multiarch/strcmp-avx2.S8
1 files changed, 2 insertions, 6 deletions
diff --git a/sysdeps/x86_64/multiarch/strcmp-avx2.S b/sysdeps/x86_64/multiarch/strcmp-avx2.S
index 3366d0b083..8da09bd86d 100644
--- a/sysdeps/x86_64/multiarch/strcmp-avx2.S
+++ b/sysdeps/x86_64/multiarch/strcmp-avx2.S
@@ -345,10 +345,10 @@ L(one_or_less):
 	movq	%LOCALE_REG, %rdx
 #  endif
 	jb	L(ret_zero)
-#  ifdef USE_AS_WCSCMP
 	/* 'nbe' covers the case where length is negative (large
 	   unsigned).  */
-	jnbe	__wcscmp_avx2
+	jnbe	OVERFLOW_STRCMP
+#  ifdef USE_AS_WCSCMP
 	movl	(%rdi), %edx
 	xorl	%eax, %eax
 	cmpl	(%rsi), %edx
@@ -357,10 +357,6 @@ L(one_or_less):
 	negl	%eax
 	orl	$1, %eax
 #  else
-	/* 'nbe' covers the case where length is negative (large
-	   unsigned).  */
-
-	jnbe	__strcmp_avx2
 	movzbl	(%rdi), %eax
 	movzbl	(%rsi), %ecx
 	TOLOWER_gpr (%rax, %eax)