about summary refs log tree commit diff
diff options
context:
space:
mode:
authorFeifei Wang <wangfeifei@hygon.cn>2024-08-19 14:57:55 +0800
committerH.J. Lu <hjl.tools@gmail.com>2024-08-26 10:01:58 -0700
commitca90758b2a2b1c21fcf1f04b3e5ddad238b3aefe (patch)
treef3ab4608b2b8c39faab2b4055018d5eeb1d84735
parentd14aecbffc032c97d86fdbfdcb7991d1a55e8399 (diff)
downloadglibc-ca90758b2a2b1c21fcf1f04b3e5ddad238b3aefe.tar.gz
glibc-ca90758b2a2b1c21fcf1f04b3e5ddad238b3aefe.tar.xz
glibc-ca90758b2a2b1c21fcf1f04b3e5ddad238b3aefe.zip
x86: Enable non-temporal memset for Hygon processors
This patch uses 'Avoid_Non_Temporal_Memset' flag to access
the non-temporal memset implementation for hygon processors.

Test Results:

hygon1 arch
x86_memset_non_temporal_threshold = 8MB
size                          new performance time / old performance time
1MB                           0.994
4MB                           0.996
8MB                           0.670
16MB                          0.343
32MB                          0.355

hygon2 arch
x86_memset_non_temporal_threshold = 8MB
size                          new performance time / old performance time
1MB                           1
4MB                           1
8MB                           1.312
16MB                          0.822
32MB                          0.830

hygon3 arch
x86_memset_non_temporal_threshold = 8MB
size                          new performance time / old performance time
1MB                           1
4MB                           0.990
8MB                           0.737
16MB                          0.390
32MB                          0.401

For hygon arch with this patch, non-temporal stores can improve
performance by 20% - 65%.

Signed-off-by: Feifei Wang <wangfeifei@hygon.cn>
Reviewed-by: Jing Li <lijing@hygon.cn>
Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
-rw-r--r--sysdeps/x86/cpu-features.c9
-rw-r--r--sysdeps/x86/dl-cacheinfo.h2
2 files changed, 8 insertions, 3 deletions
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index e6139e2837..1f30e237f5 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -756,9 +756,9 @@ init_cpu_features (struct cpu_features *cpu_features)
   unsigned int stepping = 0;
   enum cpu_features_kind kind;
 
-  /* Default is avoid non-temporal memset for non Intel/AMD hardware. This is,
+  /* Default is avoid non-temporal memset for non Intel/AMD/Hygon hardware. This is,
      as of writing this, we only have benchmarks indicatings it profitability
-     on Intel/AMD.  */
+     on Intel/AMD/Hygon.  */
   cpu_features->preferred[index_arch_Avoid_Non_Temporal_Memset]
       |= bit_arch_Avoid_Non_Temporal_Memset;
 
@@ -1116,6 +1116,11 @@ https://www.intel.com/content/www/us/en/support/articles/000059422/processors.ht
       get_extended_indices (cpu_features);
 
       update_active (cpu_features);
+
+      /* Benchmarks indicate non-temporal memset can be profitable on Hygon
+       hardware.  */
+      cpu_features->preferred[index_arch_Avoid_Non_Temporal_Memset]
+	    &= ~bit_arch_Avoid_Non_Temporal_Memset;
     }
   else
     {
diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h
index 8f4fe98d88..e9579505a3 100644
--- a/sysdeps/x86/dl-cacheinfo.h
+++ b/sysdeps/x86/dl-cacheinfo.h
@@ -1071,7 +1071,7 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
 
   /* Non-temporal stores are more performant on some hardware above
      non_temporal_threshold. Currently Prefer_Non_Temporal is set for for both
-     Intel and AMD hardware. */
+     Intel, AMD and Hygon hardware. */
   unsigned long int memset_non_temporal_threshold = SIZE_MAX;
   if (!CPU_FEATURES_ARCH_P (cpu_features, Avoid_Non_Temporal_Memset))
     memset_non_temporal_threshold = non_temporal_threshold;