about summary refs log tree commit diff
path: root/sysdeps/x86/cpu-features.c
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/x86/cpu-features.c')
-rw-r--r--sysdeps/x86/cpu-features.c15
1 files changed, 12 insertions, 3 deletions
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index e501e084ef..18ed008040 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -870,11 +870,19 @@ init_cpu_features (struct cpu_features *cpu_features)
 
 	      /* Newer Bigcore microarch (larger non-temporal store
 		 threshold).  */
-	    case INTEL_BIGCORE_SKYLAKE:
-	    case INTEL_BIGCORE_KABYLAKE:
-	    case INTEL_BIGCORE_COMETLAKE:
 	    case INTEL_BIGCORE_SKYLAKE_AVX512:
 	    case INTEL_BIGCORE_CANNONLAKE:
+	      /* Benchmarks indicate non-temporal memset is not
+		     necessarily profitable on SKX (and in some cases much
+		     worse). This is likely unique to SKX due its it unique
+		     mesh interconnect (not present on ICX or BWD). Disable
+		     non-temporal on all Skylake servers. */
+	      cpu_features->preferred[index_arch_Avoid_Non_Temporal_Memset]
+		  |= bit_arch_Avoid_Non_Temporal_Memset;
+	      /* fallthrough */
+	    case INTEL_BIGCORE_COMETLAKE:
+	    case INTEL_BIGCORE_SKYLAKE:
+	    case INTEL_BIGCORE_KABYLAKE:
 	    case INTEL_BIGCORE_ICELAKE:
 	    case INTEL_BIGCORE_TIGERLAKE:
 	    case INTEL_BIGCORE_ROCKETLAKE:
@@ -1066,6 +1074,7 @@ https://www.intel.com/content/www/us/en/support/articles/000059422/processors.ht
 	      /* Yongfeng and Shijidadao mircoarch tuning.  */
 	    case 0x5b:
 	      cpu_features->cachesize_non_temporal_divisor = 2;
+	      /* fallthrough */
 	    case 0x6b:
 	      cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
 		  &= ~bit_arch_AVX_Fast_Unaligned_Load;