about summary refs log tree commit diff
diff options
context:
space:
mode:
authorLiubov Dmitrieva <liubov.dmitrieva@intel.com>2013-06-28 15:28:50 -0700
committerH.J. Lu <hjl.tools@gmail.com>2013-06-28 15:31:40 -0700
commit6308fd9a46a2f4aa550886e6f58190fb209ef027 (patch)
tree668039e1091165b38354a1ffebf35b0058de3eed
parent89cd956937f46e8f4a0374994965f991642dd408 (diff)
downloadglibc-6308fd9a46a2f4aa550886e6f58190fb209ef027.tar.gz
glibc-6308fd9a46a2f4aa550886e6f58190fb209ef027.tar.xz
glibc-6308fd9a46a2f4aa550886e6f58190fb209ef027.zip
Skip SSE4.2 versions on Intel Silvermont
SSE2/SSSE3 versions are faster than SSE4.2 versions on Intel Silvermont.
-rw-r--r--ChangeLog14
-rw-r--r--sysdeps/x86_64/multiarch/init-arch.c10
-rw-r--r--sysdeps/x86_64/multiarch/init-arch.h4
-rw-r--r--sysdeps/x86_64/multiarch/strchr.S2
-rw-r--r--sysdeps/x86_64/multiarch/strcmp.S34
-rw-r--r--sysdeps/x86_64/multiarch/strrchr.S2
6 files changed, 51 insertions, 15 deletions
diff --git a/ChangeLog b/ChangeLog
index c5551b805e..406ca28bd2 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,17 @@
+2013-06-28  Liubov Dmitrieva  <liubov.dmitrieva@intel.com>
+
+	* sysdeps/x86_64/multiarch/init-arch.c (__init_cpu_features): Set
+	bit_Slow_SSE4_2 and bit_Prefer_PMINUB_for_stringop for Intel
+	Silvermont.
+	* sysdeps/x86_64/multiarch/init-arch.h (bit_Slow_SSE4_2): New
+	macro.
+	(index_Slow_SSE4_2): Likewise.
+	(index_Prefer_PMINUB_for_stringop): Likewise.
+	* sysdeps/x86_64/multiarch/strchr.S: Skip SSE4.2 version if
+	bit_Slow_SSE4_2 is set.
+	* sysdeps/x86_64/multiarch/strcmp.S: Likewise.
+	* sysdeps/x86_64/multiarch/strrchr.S: Likewise.
+
 2013-06-28  Ryan S. Arnold  <rsa@linux.vnet.ibm.com>
 
 	* sysdeps/powerpc/Makefile: Add comment about generating an offset to
diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c
index 9524aeea18..55839610e2 100644
--- a/sysdeps/x86_64/multiarch/init-arch.c
+++ b/sysdeps/x86_64/multiarch/init-arch.c
@@ -81,8 +81,16 @@ __init_cpu_features (void)
 	    case 0x37:
 	      /* Unaligned load versions are faster than SSSE3
 		 on Silvermont.  */
+#if index_Fast_Unaligned_Load != index_Prefer_PMINUB_for_stringop
+# error index_Fast_Unaligned_Load != index_Prefer_PMINUB_for_stringop
+#endif
+#if index_Fast_Unaligned_Load != index_Slow_SSE4_2
+# error index_Fast_Unaligned_Load != index_Slow_SSE4_2
+#endif
 	      __cpu_features.feature[index_Fast_Unaligned_Load]
-		|= bit_Fast_Unaligned_Load;
+		|= (bit_Fast_Unaligned_Load
+		    | bit_Prefer_PMINUB_for_stringop
+		    | bit_Slow_SSE4_2);
 	      break;
 
 	    default:
diff --git a/sysdeps/x86_64/multiarch/init-arch.h b/sysdeps/x86_64/multiarch/init-arch.h
index 28edbf7d07..0cb5f5bc30 100644
--- a/sysdeps/x86_64/multiarch/init-arch.h
+++ b/sysdeps/x86_64/multiarch/init-arch.h
@@ -23,6 +23,7 @@
 #define bit_AVX_Usable			(1 << 6)
 #define bit_FMA_Usable			(1 << 7)
 #define bit_FMA4_Usable			(1 << 8)
+#define bit_Slow_SSE4_2			(1 << 9)
 
 /* CPUID Feature flags.  */
 
@@ -62,6 +63,7 @@
 # define index_AVX_Usable		FEATURE_INDEX_1*FEATURE_SIZE
 # define index_FMA_Usable		FEATURE_INDEX_1*FEATURE_SIZE
 # define index_FMA4_Usable		FEATURE_INDEX_1*FEATURE_SIZE
+# define index_Slow_SSE4_2		FEATURE_INDEX_1*FEATURE_SIZE
 
 #else	/* __ASSEMBLER__ */
 
@@ -156,9 +158,11 @@ extern const struct cpu_features *__get_cpu_features (void)
 # define index_Fast_Copy_Backward	FEATURE_INDEX_1
 # define index_Slow_BSF			FEATURE_INDEX_1
 # define index_Fast_Unaligned_Load	FEATURE_INDEX_1
+# define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1
 # define index_AVX_Usable		FEATURE_INDEX_1
 # define index_FMA_Usable		FEATURE_INDEX_1
 # define index_FMA4_Usable		FEATURE_INDEX_1
+# define index_Slow_SSE4_2		FEATURE_INDEX_1
 
 # define HAS_ARCH_FEATURE(name) \
   ((__get_cpu_features ()->feature[index_##name] & (bit_##name)) != 0)
diff --git a/sysdeps/x86_64/multiarch/strchr.S b/sysdeps/x86_64/multiarch/strchr.S
index 6860329449..f170238b55 100644
--- a/sysdeps/x86_64/multiarch/strchr.S
+++ b/sysdeps/x86_64/multiarch/strchr.S
@@ -29,6 +29,8 @@ ENTRY(strchr)
 	jne	1f
 	call	__init_cpu_features
 1:	leaq	__strchr_sse2(%rip), %rax
+	testl	$bit_Slow_SSE4_2, __cpu_features+CPUID_OFFSET+index_Slow_SSE4_2(%rip)
+	jnz	2f
 	testl	$bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
 	jz	2f
 	leaq	__strchr_sse42(%rip), %rax
diff --git a/sysdeps/x86_64/multiarch/strcmp.S b/sysdeps/x86_64/multiarch/strcmp.S
index f69aaf42b3..1d4d711838 100644
--- a/sysdeps/x86_64/multiarch/strcmp.S
+++ b/sysdeps/x86_64/multiarch/strcmp.S
@@ -88,14 +88,16 @@ ENTRY(STRCMP)
 	jne	1f
 	call	__init_cpu_features
 1:
+	testl	$bit_Slow_SSE4_2, __cpu_features+CPUID_OFFSET+index_Slow_SSE4_2(%rip)
+	jnz	2f
 	leaq	STRCMP_SSE42(%rip), %rax
 	testl	$bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
-	jnz	2f
-	leaq	STRCMP_SSSE3(%rip), %rax
+	jnz	3f
+2:	leaq	STRCMP_SSSE3(%rip), %rax
 	testl	$bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
-	jnz	2f
+	jnz	3f
 	leaq	STRCMP_SSE2(%rip), %rax
-2:	ret
+3:	ret
 END(STRCMP)
 
 # ifdef USE_AS_STRCASECMP_L
@@ -109,16 +111,18 @@ ENTRY(__strcasecmp)
 #  ifdef HAVE_AVX_SUPPORT
 	leaq	__strcasecmp_avx(%rip), %rax
 	testl	$bit_AVX_Usable, __cpu_features+FEATURE_OFFSET+index_AVX_Usable(%rip)
-	jnz	2f
+	jnz	3f
 #  endif
+	testl	$bit_Slow_SSE4_2, __cpu_features+CPUID_OFFSET+index_Slow_SSE4_2(%rip)
+	jnz	2f
 	leaq	__strcasecmp_sse42(%rip), %rax
 	testl	$bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
-	jnz	2f
-	leaq	__strcasecmp_ssse3(%rip), %rax
+	jnz	3f
+2:	leaq	__strcasecmp_ssse3(%rip), %rax
 	testl	$bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
-	jnz	2f
+	jnz	3f
 	leaq	__strcasecmp_sse2(%rip), %rax
-2:	ret
+3:	ret
 END(__strcasecmp)
 weak_alias (__strcasecmp, strcasecmp)
 # endif
@@ -133,16 +137,18 @@ ENTRY(__strncasecmp)
 #  ifdef HAVE_AVX_SUPPORT
 	leaq	__strncasecmp_avx(%rip), %rax
 	testl	$bit_AVX_Usable, __cpu_features+FEATURE_OFFSET+index_AVX_Usable(%rip)
-	jnz	2f
+	jnz	3f
 #  endif
+	testl	$bit_Slow_SSE4_2, __cpu_features+CPUID_OFFSET+index_Slow_SSE4_2(%rip)
+	jnz	2f
 	leaq	__strncasecmp_sse42(%rip), %rax
 	testl	$bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
-	jnz	2f
-	leaq	__strncasecmp_ssse3(%rip), %rax
+	jnz	3f
+2:	leaq	__strncasecmp_ssse3(%rip), %rax
 	testl	$bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
-	jnz	2f
+	jnz	3f
 	leaq	__strncasecmp_sse2(%rip), %rax
-2:	ret
+3:	ret
 END(__strncasecmp)
 weak_alias (__strncasecmp, strncasecmp)
 # endif
diff --git a/sysdeps/x86_64/multiarch/strrchr.S b/sysdeps/x86_64/multiarch/strrchr.S
index ee6af6e9dd..3f92a41ef9 100644
--- a/sysdeps/x86_64/multiarch/strrchr.S
+++ b/sysdeps/x86_64/multiarch/strrchr.S
@@ -32,6 +32,8 @@ ENTRY(strrchr)
 	jne	1f
 	call	__init_cpu_features
 1:	leaq	__strrchr_sse2(%rip), %rax
+	testl	$bit_Slow_SSE4_2, __cpu_features+CPUID_OFFSET+index_Slow_SSE4_2(%rip)
+	jnz	2f
 	testl	$bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
 	jz	2f
 	leaq	__strrchr_sse42(%rip), %rax