about summary refs log tree commit diff
path: root/sysdeps/x86_64
diff options
context:
space:
mode:
authorH.J. Lu <hongjiu.lu@intel.com>2010-01-12 11:22:03 -0800
committerUlrich Drepper <drepper@redhat.com>2010-01-12 11:22:03 -0800
commit3af48cbdfaeb8bc389de1caeb33bc29811da80e8 (patch)
tree94a209777ab8c7e24cff9e50660a4075e6338594 /sysdeps/x86_64
parent4bfc6ab9ae3b259caa8b12229f0c67b4b514e9cd (diff)
downloadglibc-3af48cbdfaeb8bc389de1caeb33bc29811da80e8.tar.gz
glibc-3af48cbdfaeb8bc389de1caeb33bc29811da80e8.tar.xz
glibc-3af48cbdfaeb8bc389de1caeb33bc29811da80e8.zip
Optimize 32bit memset/memcpy with SSE2/SSSE3.
Diffstat (limited to 'sysdeps/x86_64')
-rw-r--r--sysdeps/x86_64/cacheinfo.c10
-rw-r--r--sysdeps/x86_64/multiarch/ifunc-defines.sym3
-rw-r--r--sysdeps/x86_64/multiarch/init-arch.c18
-rw-r--r--sysdeps/x86_64/multiarch/init-arch.h14
4 files changed, 42 insertions, 3 deletions
diff --git a/sysdeps/x86_64/cacheinfo.c b/sysdeps/x86_64/cacheinfo.c
index 5b66c62eb3..54220379ec 100644
--- a/sysdeps/x86_64/cacheinfo.c
+++ b/sysdeps/x86_64/cacheinfo.c
@@ -74,6 +74,7 @@ static const struct intel_02_cache_info
     { 0x0a,  2, 32, M(_SC_LEVEL1_DCACHE_SIZE),    8192 },
     { 0x0c,  4, 32, M(_SC_LEVEL1_DCACHE_SIZE),   16384 },
     { 0x0d,  4, 64, M(_SC_LEVEL1_DCACHE_SIZE),   16384 },
+    { 0x0e,  6, 64, M(_SC_LEVEL1_DCACHE_SIZE),   24576 },
     { 0x21,  8, 64, M(_SC_LEVEL2_CACHE_SIZE),   262144 },
     { 0x22,  4, 64, M(_SC_LEVEL3_CACHE_SIZE),   524288 },
     { 0x23,  8, 64, M(_SC_LEVEL3_CACHE_SIZE),  1048576 },
@@ -113,6 +114,7 @@ static const struct intel_02_cache_info
     { 0x7c,  8, 64, M(_SC_LEVEL2_CACHE_SIZE),  1048576 },
     { 0x7d,  8, 64, M(_SC_LEVEL2_CACHE_SIZE),  2097152 },
     { 0x7f,  2, 64, M(_SC_LEVEL2_CACHE_SIZE),   524288 },
+    { 0x80,  8, 64, M(_SC_LEVEL2_CACHE_SIZE),   524288 },
     { 0x82,  8, 32, M(_SC_LEVEL2_CACHE_SIZE),   262144 },
     { 0x83,  8, 32, M(_SC_LEVEL2_CACHE_SIZE),   524288 },
     { 0x84,  8, 32, M(_SC_LEVEL2_CACHE_SIZE),  1048576 },
@@ -452,9 +454,10 @@ __cache_sysconf (int name)
 }
 
 
-/* Half the data cache size for use in memory and string routines, typically
+/* Data cache size for use in memory and string routines, typically
    L1 size.  */
 long int __x86_64_data_cache_size_half attribute_hidden = 32 * 1024 / 2;
+long int __x86_64_data_cache_size attribute_hidden = 32 * 1024;
 /* Shared cache size for use in memory and string routines, typically
    L2 or L3 size.  */
 long int __x86_64_shared_cache_size_half attribute_hidden = 1024 * 1024 / 2;
@@ -657,7 +660,10 @@ init_cacheinfo (void)
     }
 
   if (data > 0)
-    __x86_64_data_cache_size_half = data / 2;
+    {
+      __x86_64_data_cache_size_half = data / 2;
+      __x86_64_data_cache_size = data;
+    }
 
   if (shared > 0)
     {
diff --git a/sysdeps/x86_64/multiarch/ifunc-defines.sym b/sysdeps/x86_64/multiarch/ifunc-defines.sym
index e2021cdf87..eb1538abcc 100644
--- a/sysdeps/x86_64/multiarch/ifunc-defines.sym
+++ b/sysdeps/x86_64/multiarch/ifunc-defines.sym
@@ -13,5 +13,8 @@ CPUID_ECX_OFFSET	offsetof (struct cpuid_registers, ecx)
 CPUID_EDX_OFFSET	offsetof (struct cpuid_registers, edx)
 FAMILY_OFFSET		offsetof (struct cpu_features, family)
 MODEL_OFFSET		offsetof (struct cpu_features, model)
+FEATURE_OFFSET		offsetof (struct cpu_features, feature)
+FEATURE_SIZE		sizeof (unsigned int)
 
 COMMON_CPUID_INDEX_1
+FEATURE_INDEX_1
diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c
index 7823aceb9b..50b2a38fbd 100644
--- a/sysdeps/x86_64/multiarch/init-arch.c
+++ b/sysdeps/x86_64/multiarch/init-arch.c
@@ -64,7 +64,23 @@ __init_cpu_features (void)
 	  __cpu_features.model += extended_model;
 	}
       else if (__cpu_features.family == 0x06)
-	__cpu_features.model += extended_model;
+	{
+	  __cpu_features.model += extended_model;
+	  switch (__cpu_features.model)
+	    {
+	    case 0x1a:
+	    case 0x1e:
+	    case 0x1f:
+	    case 0x25:
+	    case 0x2e:
+	    case 0x2f:
+	      /* Rep string instructions are fast on Intel Core i3, i5
+		 and i7.  */
+	      __cpu_features.feature[index_Fast_Rep_String]
+		|= bit_Fast_Rep_String;
+	      break;
+	    }
+	}
     }
   /* This spells out "AuthenticAMD".  */
   else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
diff --git a/sysdeps/x86_64/multiarch/init-arch.h b/sysdeps/x86_64/multiarch/init-arch.h
index 0f8f77a8a1..69492cb3bf 100644
--- a/sysdeps/x86_64/multiarch/init-arch.h
+++ b/sysdeps/x86_64/multiarch/init-arch.h
@@ -16,6 +16,8 @@
    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307 USA.  */
 
+#define bit_Fast_Rep_String	(1 << 0)
+
 #ifdef	__ASSEMBLER__
 
 #include <ifunc-defines.h>
@@ -28,6 +30,8 @@
 #define index_SSSE3	COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
 #define index_SSE4_2	COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
 
+#define index_Fast_Rep_String	FEATURE_INDEX_1*FEATURE_SIZE
+
 #else	/* __ASSEMBLER__ */
 
 #include <sys/param.h>
@@ -39,6 +43,13 @@ enum
     COMMON_CPUID_INDEX_MAX
   };
 
+enum
+  {
+    FEATURE_INDEX_1 = 0,
+    /* Keep the following line at the end.  */
+    FEATURE_INDEX_MAX
+  };
+
 extern struct cpu_features
 {
   enum
@@ -58,6 +69,7 @@ extern struct cpu_features
   } cpuid[COMMON_CPUID_INDEX_MAX];
   unsigned int family;
   unsigned int model;
+  unsigned int feature[FEATURE_INDEX_MAX];
 } __cpu_features attribute_hidden;
 
 
@@ -86,4 +98,6 @@ extern const struct cpu_features *__get_cpu_features (void)
 #define HAS_SSE4_2	HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, 20)
 #define HAS_FMA		HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, 12)
 
+#define index_Fast_Rep_String	FEATURE_INDEX_1
+
 #endif	/* __ASSEMBLER__ */