about summary refs log tree commit diff
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2016-05-19 09:09:00 -0700
committerH.J. Lu <hjl.tools@gmail.com>2016-05-19 09:09:00 -0700
commit7c08d791ee4fabf96d96b66dec803602e621057c (patch)
treefdc67fe76da8482d407594c70260a64c27e31cb7
parenteb2c88c7c83901737db5c4de7dc4470c5681b2cb (diff)
downloadglibc-7c08d791ee4fabf96d96b66dec803602e621057c.tar.gz
glibc-7c08d791ee4fabf96d96b66dec803602e621057c.tar.xz
glibc-7c08d791ee4fabf96d96b66dec803602e621057c.zip
Check the HTT bit before counting logical threads
Skip counting logical threads for Intel processors if the HTT bit is 0
which indicates there is only a single logical processor.

	* sysdeps/x86/cacheinfo.c (init_cacheinfo): Skip counting
	logical threads if the HTT bit is 0.
	* sysdeps/x86/cpu-features.h (bit_cpu_HTT): New.
	(index_cpu_HTT): Likewise.
	(reg_HTT): Likewise.
-rw-r--r--ChangeLog8
-rw-r--r--sysdeps/x86/cacheinfo.c158
-rw-r--r--sysdeps/x86/cpu-features.h3
3 files changed, 93 insertions, 76 deletions
diff --git a/ChangeLog b/ChangeLog
index 8a4918cbc8..8adf828d1b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,13 @@
 2016-05-19  H.J. Lu  <hongjiu.lu@intel.com>
 
+	* sysdeps/x86/cacheinfo.c (init_cacheinfo): Skip counting
+	logical threads if the HTT bit is 0.
+	* sysdeps/x86/cpu-features.h (bit_cpu_HTT): New.
+	(index_cpu_HTT): Likewise.
+	(reg_HTT): Likewise.
+
+2016-05-19  H.J. Lu  <hongjiu.lu@intel.com>
+
 	[BZ #20115]
 	* sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S (__memset):
 	Remove alignments on jump targets.
diff --git a/sysdeps/x86/cacheinfo.c b/sysdeps/x86/cacheinfo.c
index 8408624ea4..1f46d9de20 100644
--- a/sysdeps/x86/cacheinfo.c
+++ b/sysdeps/x86/cacheinfo.c
@@ -506,99 +506,105 @@ init_cacheinfo (void)
 	  shared = core;
 	}
 
-      /* Figure out the number of logical threads that share the
-	 highest cache level.  */
-      if (max_cpuid >= 4)
+      /* A value of 0 for the HTT bit indicates there is only a single
+	 logical processor.  */
+      if (HAS_CPU_FEATURE (HTT))
 	{
-	  unsigned int family = GLRO(dl_x86_cpu_features).family;
-	  unsigned int model = GLRO(dl_x86_cpu_features).model;
+	  /* Figure out the number of logical threads that share the
+	     highest cache level.  */
+	  if (max_cpuid >= 4)
+	    {
+	      unsigned int family = GLRO(dl_x86_cpu_features).family;
+	      unsigned int model = GLRO(dl_x86_cpu_features).model;
 
-	  int i = 0;
+	      int i = 0;
 
-	  /* Query until desired cache level is enumerated.  */
-	  do
-	    {
-	      __cpuid_count (4, i++, eax, ebx, ecx, edx);
-
-	      /* There seems to be a bug in at least some Pentium Ds
-		 which sometimes fail to iterate all cache parameters.
-		 Do not loop indefinitely here, stop in this case and
-		 assume there is no such information.  */
-	      if ((eax & 0x1f) == 0)
-		goto intel_bug_no_cache_info;
-	    }
-	  while (((eax >> 5) & 0x7) != level);
+	      /* Query until desired cache level is enumerated.  */
+	      do
+		{
+		  __cpuid_count (4, i++, eax, ebx, ecx, edx);
+
+		  /* There seems to be a bug in at least some Pentium Ds
+		     which sometimes fail to iterate all cache parameters.
+		     Do not loop indefinitely here, stop in this case and
+		     assume there is no such information.  */
+		  if ((eax & 0x1f) == 0)
+		    goto intel_bug_no_cache_info;
+		}
+	      while (((eax >> 5) & 0x7) != level);
 
-	  /* Check if cache is inclusive of lower cache levels.  */
-	  inclusive_cache = (edx & 0x2) != 0;
+	      /* Check if cache is inclusive of lower cache levels.  */
+	      inclusive_cache = (edx & 0x2) != 0;
 
-	  threads = (eax >> 14) & 0x3ff;
+	      threads = (eax >> 14) & 0x3ff;
 
-	  /* If max_cpuid >= 11, THREADS is the maximum number of
-	      addressable IDs for logical processors sharing the
-	      cache, instead of the maximum number of threads
-	      sharing the cache.  */
-	  if (threads && max_cpuid >= 11)
-	    {
-	      /* Find the number of logical processors shipped in
-		 one core and apply count mask.  */
-	      i = 0;
-	      while (1)
+	      /* If max_cpuid >= 11, THREADS is the maximum number of
+		 addressable IDs for logical processors sharing the
+		 cache, instead of the maximum number of threads
+		 sharing the cache.  */
+	      if (threads && max_cpuid >= 11)
 		{
-		  __cpuid_count (11, i++, eax, ebx, ecx, edx);
-
-		  int shipped = ebx & 0xff;
-		  int type = ecx & 0xff0;
-		  if (shipped == 0 || type == 0)
-		    break;
-		  else if (type == 0x200)
+		  /* Find the number of logical processors shipped in
+		     one core and apply count mask.  */
+		  i = 0;
+		  while (1)
 		    {
-		      int count_mask;
-
-		      /* Compute count mask.  */
-		      asm ("bsr %1, %0"
-			   : "=r" (count_mask) : "g" (threads));
-		      count_mask = ~(-1 << (count_mask + 1));
-		      threads = (shipped - 1) & count_mask;
+		      __cpuid_count (11, i++, eax, ebx, ecx, edx);
+
+		      int shipped = ebx & 0xff;
+		      int type = ecx & 0xff0;
+		      if (shipped == 0 || type == 0)
+			break;
+		      else if (type == 0x200)
+			{
+			  int count_mask;
+
+			  /* Compute count mask.  */
+			  asm ("bsr %1, %0"
+			       : "=r" (count_mask) : "g" (threads));
+			  count_mask = ~(-1 << (count_mask + 1));
+			  threads = (shipped - 1) & count_mask;
+			  break;
+			}
+		    }
+		}
+	      threads += 1;
+	      if (threads > 2 && level == 2 && family == 6)
+		{
+		  switch (model)
+		    {
+		    case 0x57:
+		      /* Knights Landing has L2 cache shared by 2 cores.  */
+		    case 0x37:
+		    case 0x4a:
+		    case 0x4d:
+		    case 0x5a:
+		    case 0x5d:
+		      /* Silvermont has L2 cache shared by 2 cores.  */
+		      threads = 2;
+		      break;
+		    default:
 		      break;
 		    }
 		}
 	    }
-	  threads += 1;
-	  if (threads > 2 && level == 2 && family == 6)
+	  else
 	    {
-	      switch (model)
-		{
-		case 0x57:
-		  /* Knights Landing has L2 cache shared by 2 cores.  */
-		case 0x37:
-		case 0x4a:
-		case 0x4d:
-		case 0x5a:
-		case 0x5d:
-		  /* Silvermont has L2 cache shared by 2 cores.  */
-		  threads = 2;
-		  break;
-		default:
-		  break;
-		}
+intel_bug_no_cache_info:
+	      /* Assume that all logical threads share the highest cache
+		 level.  */
+
+	      threads
+		= ((GLRO(dl_x86_cpu_features).cpuid[COMMON_CPUID_INDEX_1].ebx
+		    >> 16) & 0xff);
 	    }
-	}
-      else
-	{
-	intel_bug_no_cache_info:
-	  /* Assume that all logical threads share the highest cache level.  */
 
-	  threads
-	    = ((GLRO(dl_x86_cpu_features).cpuid[COMMON_CPUID_INDEX_1].ebx
-		>> 16) & 0xff);
+	  /* Cap usage of highest cache level to the number of supported
+	     threads.  */
+	  if (shared > 0 && threads > 0)
+	    shared /= threads;
 	}
 
-      /* Cap usage of highest cache level to the number of supported
-	 threads.  */
-      if (shared > 0 && threads > 0)
-	shared /= threads;
-
       /* Account for non-inclusive L2 and L3 caches.  */
       if (level == 3 && !inclusive_cache)
 	shared += core;
diff --git a/sysdeps/x86/cpu-features.h b/sysdeps/x86/cpu-features.h
index 9529d61ff5..2bd93713a1 100644
--- a/sysdeps/x86/cpu-features.h
+++ b/sysdeps/x86/cpu-features.h
@@ -51,6 +51,7 @@
 #define bit_cpu_POPCOUNT	(1 << 23)
 #define bit_cpu_FMA		(1 << 12)
 #define bit_cpu_FMA4		(1 << 16)
+#define bit_cpu_HTT		(1 << 28)
 
 /* COMMON_CPUID_INDEX_7.  */
 #define bit_cpu_ERMS		(1 << 9)
@@ -235,6 +236,7 @@ extern const struct cpu_features *__get_cpu_features (void)
 # define index_cpu_FMA4		COMMON_CPUID_INDEX_80000001
 # define index_cpu_POPCOUNT	COMMON_CPUID_INDEX_1
 # define index_cpu_OSXSAVE	COMMON_CPUID_INDEX_1
+# define index_cpu_HTT		COMMON_CPUID_INDEX_1
 
 # define reg_CX8		edx
 # define reg_CMOV		edx
@@ -252,6 +254,7 @@ extern const struct cpu_features *__get_cpu_features (void)
 # define reg_FMA4		ecx
 # define reg_POPCOUNT		ecx
 # define reg_OSXSAVE		ecx
+# define reg_HTT		edx
 
 # define index_arch_Fast_Rep_String	FEATURE_INDEX_1
 # define index_arch_Fast_Copy_Backward	FEATURE_INDEX_1