about summary refs log tree commit diff
path: root/sysdeps/x86_64/multiarch
diff options
context:
space:
mode:
authorH.J. Lu <hongjiu.lu@intel.com>2011-06-24 14:15:32 -0400
committerUlrich Drepper <drepper@gmail.com>2011-06-24 14:15:32 -0400
commit0b1cbaaef5ccc21baf2c35d4698fb28e82eab385 (patch)
treec1f6ad8a49ef79510355c765ad3e385067e7ade0 /sysdeps/x86_64/multiarch
parent07f494a027b3adea1f3cd0cd4ca7c10949cdc476 (diff)
downloadglibc-0b1cbaaef5ccc21baf2c35d4698fb28e82eab385.tar.gz
glibc-0b1cbaaef5ccc21baf2c35d4698fb28e82eab385.tar.xz
glibc-0b1cbaaef5ccc21baf2c35d4698fb28e82eab385.zip
Optimized st{r,p}{,n}cpy for SSE2/SSSE3 on x86-32
Diffstat (limited to 'sysdeps/x86_64/multiarch')
-rw-r--r--sysdeps/x86_64/multiarch/init-arch.c11
-rw-r--r--sysdeps/x86_64/multiarch/init-arch.h6
2 files changed, 14 insertions, 3 deletions
diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c
index 809d105c77..81b2378467 100644
--- a/sysdeps/x86_64/multiarch/init-arch.c
+++ b/sysdeps/x86_64/multiarch/init-arch.c
@@ -97,13 +97,18 @@ __init_cpu_features (void)
 	    case 0x2c:
 	    case 0x2e:
 	    case 0x2f:
-	      /* Rep string instructions and copy backward are fast on
-		 Intel Core i3, i5 and i7.  */
+	      /* Rep string instructions, copy backward and unaligned loads
+		 are fast on Intel Core i3, i5 and i7.  */
 #if index_Fast_Rep_String != index_Fast_Copy_Backward
 # error index_Fast_Rep_String != index_Fast_Copy_Backward
 #endif
+#if index_Fast_Rep_String != index_Fast_Unaligned_Load
+# error index_Fast_Rep_String != index_Fast_Unaligned_Load
+#endif
 	      __cpu_features.feature[index_Fast_Rep_String]
-		|= bit_Fast_Rep_String | bit_Fast_Copy_Backward;
+		|= (bit_Fast_Rep_String
+		    | bit_Fast_Copy_Backward
+		    | bit_Fast_Unaligned_Load);
 	      break;
 	    }
 	}
diff --git a/sysdeps/x86_64/multiarch/init-arch.h b/sysdeps/x86_64/multiarch/init-arch.h
index 6e409b8f17..addf5f3dde 100644
--- a/sysdeps/x86_64/multiarch/init-arch.h
+++ b/sysdeps/x86_64/multiarch/init-arch.h
@@ -20,6 +20,7 @@
 #define bit_Fast_Copy_Backward		(1 << 1)
 #define bit_Slow_BSF			(1 << 2)
 #define bit_Prefer_SSE_for_memop	(1 << 3)
+#define bit_Fast_Unaligned_Load		(1 << 4)
 
 #ifdef	__ASSEMBLER__
 
@@ -39,6 +40,7 @@
 # define index_Fast_Copy_Backward	FEATURE_INDEX_1*FEATURE_SIZE
 # define index_Slow_BSF			FEATURE_INDEX_1*FEATURE_SIZE
 # define index_Prefer_SSE_for_memop	FEATURE_INDEX_1*FEATURE_SIZE
+# define index_Fast_Unaligned_Load	FEATURE_INDEX_1*FEATURE_SIZE
 
 #else	/* __ASSEMBLER__ */
 
@@ -112,6 +114,7 @@ extern const struct cpu_features *__get_cpu_features (void)
 # define index_Fast_Copy_Backward	FEATURE_INDEX_1
 # define index_Slow_BSF			FEATURE_INDEX_1
 # define index_Prefer_SSE_for_memop	FEATURE_INDEX_1
+# define index_Fast_Unaligned_Load	FEATURE_INDEX_1
 
 #define HAS_ARCH_FEATURE(idx, bit) \
   ((__get_cpu_features ()->feature[idx] & (bit)) != 0)
@@ -128,4 +131,7 @@ extern const struct cpu_features *__get_cpu_features (void)
 #define HAS_PREFER_SSE_FOR_MEMOP \
   HAS_ARCH_FEATURE (index_Prefer_SSE_for_memop, bit_Prefer_SSE_for_memop)
 
+#define HAS_FAST_UNALIGNED_LOAD \
+  HAS_ARCH_FEATURE (index_Fast_Unaligned_Load, bit_Fast_Unaligned_Load)
+
 #endif	/* __ASSEMBLER__ */