diff options
author | Noah Goldstein <goldstein.w.n@gmail.com> | 2022-06-14 13:50:11 -0700 |
---|---|---|
committer | Noah Goldstein <goldstein.w.n@gmail.com> | 2022-06-14 20:58:07 -0700 |
commit | 035591551400cfc810b07244a015c9411e8bff7c (patch) | |
tree | 0bc9dfed85e4beb9b2735df6366b83f75dd1aa94 | |
parent | 7374c02b683b7110b853a32496a619410364d70b (diff) | |
download | glibc-035591551400cfc810b07244a015c9411e8bff7c.tar.gz glibc-035591551400cfc810b07244a015c9411e8bff7c.tar.xz glibc-035591551400cfc810b07244a015c9411e8bff7c.zip |
x86: Fix misordered logic for setting `rep_movsb_stop_threshold`
Move the setting of `rep_movsb_stop_threshold` to after the tunables have been collected so that the `rep_movsb_stop_threshold` (which is used to redirect control flow to the non_temporal case) will use any user value for `non_temporal_threshold` (set using glibc.cpu.x86_non_temporal_threshold)
-rw-r--r-- | sysdeps/x86/dl-cacheinfo.h | 24 |
1 files changed, 12 insertions, 12 deletions
diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h index f64a2fb0ba..cc3b840f9c 100644 --- a/sysdeps/x86/dl-cacheinfo.h +++ b/sysdeps/x86/dl-cacheinfo.h @@ -898,18 +898,6 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) if (CPU_FEATURE_USABLE_P (cpu_features, FSRM)) rep_movsb_threshold = 2112; - unsigned long int rep_movsb_stop_threshold; - /* ERMS feature is implemented from AMD Zen3 architecture and it is - performing poorly for data above L2 cache size. Henceforth, adding - an upper bound threshold parameter to limit the usage of Enhanced - REP MOVSB operations and setting its value to L2 cache size. */ - if (cpu_features->basic.kind == arch_kind_amd) - rep_movsb_stop_threshold = core; - /* Setting the upper bound of ERMS to the computed value of - non-temporal threshold for architectures other than AMD. */ - else - rep_movsb_stop_threshold = non_temporal_threshold; - /* The default threshold to use Enhanced REP STOSB. */ unsigned long int rep_stosb_threshold = 2048; @@ -951,6 +939,18 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) SIZE_MAX); #endif + unsigned long int rep_movsb_stop_threshold; + /* ERMS feature is implemented from AMD Zen3 architecture and it is + performing poorly for data above L2 cache size. Henceforth, adding + an upper bound threshold parameter to limit the usage of Enhanced + REP MOVSB operations and setting its value to L2 cache size. */ + if (cpu_features->basic.kind == arch_kind_amd) + rep_movsb_stop_threshold = core; + /* Setting the upper bound of ERMS to the computed value of + non-temporal threshold for architectures other than AMD. */ + else + rep_movsb_stop_threshold = non_temporal_threshold; + cpu_features->data_cache_size = data; cpu_features->shared_cache_size = shared; cpu_features->non_temporal_threshold = non_temporal_threshold; |