diff options
author | Noah Goldstein <goldstein.w.n@gmail.com> | 2021-11-01 00:49:52 -0500 |
---|---|---|
committer | Noah Goldstein <goldstein.w.n@gmail.com> | 2021-11-06 16:18:08 -0500 |
commit | 475b63702ef38b69558fc3d31a0b66776a70f1d3 (patch) | |
tree | f2d35f60a3526f2d52f2ec4ab18ab77584673bba /sysdeps/x86/dl-cacheinfo.h | |
parent | a6b7502ec0c2da89a7437f43171f160d713e39c6 (diff) | |
download | glibc-475b63702ef38b69558fc3d31a0b66776a70f1d3.tar.gz glibc-475b63702ef38b69558fc3d31a0b66776a70f1d3.tar.xz glibc-475b63702ef38b69558fc3d31a0b66776a70f1d3.zip |
x86: Double size of ERMS rep_movsb_threshold in dl-cacheinfo.h
No bug. This patch doubles the rep_movsb_threshold when using ERMS. Based on benchmarks the vector copy loop, especially now that it handles 4k aliasing, is better for these medium ranged. On Skylake with ERMS: Size, Align1, Align2, dst>src,(rep movsb) / (vec copy) 4096, 0, 0, 0, 0.975 4096, 0, 0, 1, 0.953 4096, 12, 0, 0, 0.969 4096, 12, 0, 1, 0.872 4096, 44, 0, 0, 0.979 4096, 44, 0, 1, 0.83 4096, 0, 12, 0, 1.006 4096, 0, 12, 1, 0.989 4096, 0, 44, 0, 0.739 4096, 0, 44, 1, 0.942 4096, 12, 12, 0, 1.009 4096, 12, 12, 1, 0.973 4096, 44, 44, 0, 0.791 4096, 44, 44, 1, 0.961 4096, 2048, 0, 0, 0.978 4096, 2048, 0, 1, 0.951 4096, 2060, 0, 0, 0.986 4096, 2060, 0, 1, 0.963 4096, 2048, 12, 0, 0.971 4096, 2048, 12, 1, 0.941 4096, 2060, 12, 0, 0.977 4096, 2060, 12, 1, 0.949 8192, 0, 0, 0, 0.85 8192, 0, 0, 1, 0.845 8192, 13, 0, 0, 0.937 8192, 13, 0, 1, 0.939 8192, 45, 0, 0, 0.932 8192, 45, 0, 1, 0.927 8192, 0, 13, 0, 0.621 8192, 0, 13, 1, 0.62 8192, 0, 45, 0, 0.53 8192, 0, 45, 1, 0.516 8192, 13, 13, 0, 0.664 8192, 13, 13, 1, 0.659 8192, 45, 45, 0, 0.593 8192, 45, 45, 1, 0.575 8192, 2048, 0, 0, 0.854 8192, 2048, 0, 1, 0.834 8192, 2061, 0, 0, 0.863 8192, 2061, 0, 1, 0.857 8192, 2048, 13, 0, 0.63 8192, 2048, 13, 1, 0.629 8192, 2061, 13, 0, 0.627 8192, 2061, 13, 1, 0.62 Signed-off-by: Noah Goldstein <goldstein.w.n@gmail.com> Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
Diffstat (limited to 'sysdeps/x86/dl-cacheinfo.h')
-rw-r--r-- | sysdeps/x86/dl-cacheinfo.h | 8 |
1 files changed, 5 insertions, 3 deletions
diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h index e6c94dfd02..2e43e67e4f 100644 --- a/sysdeps/x86/dl-cacheinfo.h +++ b/sysdeps/x86/dl-cacheinfo.h @@ -866,12 +866,14 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) /* NB: The REP MOVSB threshold must be greater than VEC_SIZE * 8. */ unsigned int minimum_rep_movsb_threshold; #endif - /* NB: The default REP MOVSB threshold is 2048 * (VEC_SIZE / 16). */ + /* NB: The default REP MOVSB threshold is 4096 * (VEC_SIZE / 16) for + VEC_SIZE == 64 or 32. For VEC_SIZE == 16, the default REP MOVSB + threshold is 2048 * (VEC_SIZE / 16). */ unsigned int rep_movsb_threshold; if (CPU_FEATURE_USABLE_P (cpu_features, AVX512F) && !CPU_FEATURE_PREFERRED_P (cpu_features, Prefer_No_AVX512)) { - rep_movsb_threshold = 2048 * (64 / 16); + rep_movsb_threshold = 4096 * (64 / 16); #if HAVE_TUNABLES minimum_rep_movsb_threshold = 64 * 8; #endif @@ -879,7 +881,7 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) else if (CPU_FEATURE_PREFERRED_P (cpu_features, AVX_Fast_Unaligned_Load)) { - rep_movsb_threshold = 2048 * (32 / 16); + rep_movsb_threshold = 4096 * (32 / 16); #if HAVE_TUNABLES minimum_rep_movsb_threshold = 32 * 8; #endif |