about summary refs log tree commit diff
path: root/sysdeps/x86/include/cpu-features.h
diff options
context:
space:
mode:
authorNoah Goldstein <goldstein.w.n@gmail.com>2023-06-07 13:18:03 -0500
committerNoah Goldstein <goldstein.w.n@gmail.com>2023-06-12 11:33:39 -0500
commit180897c161a171d8ef0faee1c6c9fd6b57d8b13b (patch)
tree89e71e02a6e1edc57bb13f311228816dcbc92bd6 /sysdeps/x86/include/cpu-features.h
parentf193ea20eddc6cef84cba54cf1a647204ee6a86b (diff)
downloadglibc-180897c161a171d8ef0faee1c6c9fd6b57d8b13b.tar.gz
glibc-180897c161a171d8ef0faee1c6c9fd6b57d8b13b.tar.xz
glibc-180897c161a171d8ef0faee1c6c9fd6b57d8b13b.zip
x86: Make the divisor in setting `non_temporal_threshold` cpu specific
Different systems prefer a different divisors.

From benchmarks[1] so far the following divisors have been found:
    ICX     : 2
    SKX     : 2
    BWD     : 8

For Intel, we are generalizing that BWD and older prefers 8 as a
divisor, and SKL and newer prefers 2. This number can be further tuned
as benchmarks are run.

[1]: https://github.com/goldsteinn/memcpy-nt-benchmarks
Reviewed-by: DJ Delorie <dj@redhat.com>
Diffstat (limited to 'sysdeps/x86/include/cpu-features.h')
-rw-r--r--sysdeps/x86/include/cpu-features.h3
1 files changed, 3 insertions, 0 deletions
diff --git a/sysdeps/x86/include/cpu-features.h b/sysdeps/x86/include/cpu-features.h
index 40b8129d6a..c740e1a5fc 100644
--- a/sysdeps/x86/include/cpu-features.h
+++ b/sysdeps/x86/include/cpu-features.h
@@ -945,6 +945,9 @@ struct cpu_features
   unsigned long int level3_cache_linesize;
   /* /_SC_LEVEL4_CACHE_SIZE.  */
   unsigned long int level4_cache_size;
+  /* When no user non_temporal_threshold is specified. We default to
+     cachesize / cachesize_non_temporal_divisor.  */
+  unsigned long int cachesize_non_temporal_divisor;
 };
 
 /* Get a pointer to the CPU features structure.  */