about summary refs log tree commit diff
path: root/sysdeps/x86_64/multiarch/strcmp.S
diff options
context:
space:
mode:
authorLiubov Dmitrieva <liubov.dmitrieva@intel.com>2013-06-28 15:28:50 -0700
committerH.J. Lu <hjl.tools@gmail.com>2013-06-28 15:31:40 -0700
commit6308fd9a46a2f4aa550886e6f58190fb209ef027 (patch)
tree668039e1091165b38354a1ffebf35b0058de3eed /sysdeps/x86_64/multiarch/strcmp.S
parent89cd956937f46e8f4a0374994965f991642dd408 (diff)
downloadglibc-6308fd9a46a2f4aa550886e6f58190fb209ef027.tar.gz
glibc-6308fd9a46a2f4aa550886e6f58190fb209ef027.tar.xz
glibc-6308fd9a46a2f4aa550886e6f58190fb209ef027.zip
Skip SSE4.2 versions on Intel Silvermont
SSE2/SSSE3 versions are faster than SSE4.2 versions on Intel Silvermont.
Diffstat (limited to 'sysdeps/x86_64/multiarch/strcmp.S')
-rw-r--r--sysdeps/x86_64/multiarch/strcmp.S34
1 files changed, 20 insertions, 14 deletions
diff --git a/sysdeps/x86_64/multiarch/strcmp.S b/sysdeps/x86_64/multiarch/strcmp.S
index f69aaf42b3..1d4d711838 100644
--- a/sysdeps/x86_64/multiarch/strcmp.S
+++ b/sysdeps/x86_64/multiarch/strcmp.S
@@ -88,14 +88,16 @@ ENTRY(STRCMP)
 	jne	1f
 	call	__init_cpu_features
 1:
+	testl	$bit_Slow_SSE4_2, __cpu_features+CPUID_OFFSET+index_Slow_SSE4_2(%rip)
+	jnz	2f
 	leaq	STRCMP_SSE42(%rip), %rax
 	testl	$bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
-	jnz	2f
-	leaq	STRCMP_SSSE3(%rip), %rax
+	jnz	3f
+2:	leaq	STRCMP_SSSE3(%rip), %rax
 	testl	$bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
-	jnz	2f
+	jnz	3f
 	leaq	STRCMP_SSE2(%rip), %rax
-2:	ret
+3:	ret
 END(STRCMP)
 
 # ifdef USE_AS_STRCASECMP_L
@@ -109,16 +111,18 @@ ENTRY(__strcasecmp)
 #  ifdef HAVE_AVX_SUPPORT
 	leaq	__strcasecmp_avx(%rip), %rax
 	testl	$bit_AVX_Usable, __cpu_features+FEATURE_OFFSET+index_AVX_Usable(%rip)
-	jnz	2f
+	jnz	3f
 #  endif
+	testl	$bit_Slow_SSE4_2, __cpu_features+CPUID_OFFSET+index_Slow_SSE4_2(%rip)
+	jnz	2f
 	leaq	__strcasecmp_sse42(%rip), %rax
 	testl	$bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
-	jnz	2f
-	leaq	__strcasecmp_ssse3(%rip), %rax
+	jnz	3f
+2:	leaq	__strcasecmp_ssse3(%rip), %rax
 	testl	$bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
-	jnz	2f
+	jnz	3f
 	leaq	__strcasecmp_sse2(%rip), %rax
-2:	ret
+3:	ret
 END(__strcasecmp)
 weak_alias (__strcasecmp, strcasecmp)
 # endif
@@ -133,16 +137,18 @@ ENTRY(__strncasecmp)
 #  ifdef HAVE_AVX_SUPPORT
 	leaq	__strncasecmp_avx(%rip), %rax
 	testl	$bit_AVX_Usable, __cpu_features+FEATURE_OFFSET+index_AVX_Usable(%rip)
-	jnz	2f
+	jnz	3f
 #  endif
+	testl	$bit_Slow_SSE4_2, __cpu_features+CPUID_OFFSET+index_Slow_SSE4_2(%rip)
+	jnz	2f
 	leaq	__strncasecmp_sse42(%rip), %rax
 	testl	$bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
-	jnz	2f
-	leaq	__strncasecmp_ssse3(%rip), %rax
+	jnz	3f
+2:	leaq	__strncasecmp_ssse3(%rip), %rax
 	testl	$bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
-	jnz	2f
+	jnz	3f
 	leaq	__strncasecmp_sse2(%rip), %rax
-2:	ret
+3:	ret
 END(__strncasecmp)
 weak_alias (__strncasecmp, strncasecmp)
 # endif