summary refs log tree commit diff
path: root/sysdeps/x86_64/cacheinfo.c
diff options
context:
space:
mode:
authorH.J. Lu <hongjiu.lu@intel.com>2009-08-07 09:39:36 -0700
committerUlrich Drepper <drepper@redhat.com>2009-08-07 09:39:36 -0700
commita546baa9cd2e5176e9851811d5df6f23e35d3bb8 (patch)
tree1db7355ddca82021eef7f9f2d2baa11e3da78a2b /sysdeps/x86_64/cacheinfo.c
parent77c84aeb81808c3109665949448dba59965c391e (diff)
downloadglibc-a546baa9cd2e5176e9851811d5df6f23e35d3bb8.tar.gz
glibc-a546baa9cd2e5176e9851811d5df6f23e35d3bb8.tar.xz
glibc-a546baa9cd2e5176e9851811d5df6f23e35d3bb8.zip
Properly count number of logical processors on Intel CPUs.
The meaning of the 25-14 bits in EAX returned from cpuid with EAX = 4
has been changed from "the maximum number of threads sharing the cache"
to "the maximum number of addressable IDs for logical processors sharing
the cache" if cpuid takes EAX = 11.  We need to use results from both
EAX = 4 and EAX = 11 to get the number of threads sharing the cache.

The 25-14 bits in EAX on Core i7 is 15 although the number of logical
processors is 8.  Here is a white paper on this:

http://software.intel.com/en-us/articles/intel-64-architecture-processor-topology-enumeration/

This patch correctly counts number of logical processors on Intel CPUs
with EAX = 11 support on cpuid.  Tested on Dinnington, Core i7 and
Nehalem EX/EP.

It also fixed Pentium Ds workaround since EBX may not have the right
value returned from cpuid with EAX = 1.
Diffstat (limited to 'sysdeps/x86_64/cacheinfo.c')
-rw-r--r--sysdeps/x86_64/cacheinfo.c42
1 files changed, 38 insertions, 4 deletions
diff --git a/sysdeps/x86_64/cacheinfo.c b/sysdeps/x86_64/cacheinfo.c
index f252fc2c6c..ddad63baee 100644
--- a/sysdeps/x86_64/cacheinfo.c
+++ b/sysdeps/x86_64/cacheinfo.c
@@ -516,13 +516,15 @@ init_cacheinfo (void)
           shared = handle_intel (_SC_LEVEL2_CACHE_SIZE, max_cpuid);
 	}
 
+      unsigned int ebx_1;
+
 #ifdef USE_MULTIARCH
       eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax;
-      ebx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ebx;
+      ebx_1 = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ebx;
       ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx;
       edx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx;
 #else
-      __cpuid (1, eax, ebx, ecx, edx);
+      __cpuid (1, eax, ebx_1, ecx, edx);
 #endif
 
 #ifndef DISABLE_PREFERRED_MEMORY_INSTRUCTION
@@ -554,14 +556,46 @@ init_cacheinfo (void)
 	    }
           while (((eax >> 5) & 0x7) != level);
 
-	  threads = ((eax >> 14) & 0x3ff) + 1;
+	  threads = (eax >> 14) & 0x3ff;
+	
+	  /* If max_cpuid >= 11, THREADS is the maximum number of
+	      addressable IDs for logical processors sharing the
+	      cache, instead of the maximum number of threads
+	      sharing the cache.  */
+	  if (threads && max_cpuid >= 11)
+	    {
+	      /* Find the number of logical processors shipped in
+		 one core and apply count mask.  */
+	      i = 0;
+	      while (1)
+		{
+		  __cpuid_count (11, i++, eax, ebx, ecx, edx);
+
+		  int shipped = ebx & 0xff;
+		  int type = ecx & 0xff0;
+		  if (shipped == 0 || type == 0)
+		    break;
+		  else if (type == 0x200)
+		    {
+		      int count_mask;
+
+		      /* Compute count mask.  */
+		      asm ("bsr %1, %0"
+			   : "=r" (count_mask) : "g" (threads));
+		      count_mask = ~(-1 << (count_mask + 1));
+		      threads = (shipped - 1) & count_mask;
+		      break;
+		    }
+		}
+	    }
+	  threads += 1;
 	}
       else
         {
 	intel_bug_no_cache_info:
 	  /* Assume that all logical threads share the highest cache level.  */
 
-	  threads = (ebx >> 16) & 0xff;
+	  threads = (ebx_1 >> 16) & 0xff;
 	}
 
       /* Cap usage of highest cache level to the number of supported