diff options
-rw-r--r-- | ChangeLog | 7 | ||||
-rw-r--r-- | sysdeps/x86/cpu-features.c | 18 |
2 files changed, 20 insertions, 5 deletions
diff --git a/ChangeLog b/ChangeLog index 5a1f291b85..7fe8e2463e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +2018-07-06 Amit Pawar <amit.pawar@amd.com> + + * sysdeps/x86/cpu-features.c (get_common_indeces): + AVX_Fast_Unaligned_Load is enabled when AVX2 is detected. + * sysdeps/x86/cpu-features.c (init_cpu_features): + AVX_Fast_Unaligned_Load is disabled for Excavator core. + 2018-07-05 Florian Weimer <fweimer@redhat.com> * csu/Makefile (CFLAGS-static-reloc.os): Build with stack diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c index 0fc3674c4b..d41ebde823 100644 --- a/sysdeps/x86/cpu-features.c +++ b/sysdeps/x86/cpu-features.c @@ -78,8 +78,15 @@ get_common_indeces (struct cpu_features *cpu_features, /* The following features depend on AVX being usable. */ /* Determine if AVX2 is usable. */ if (CPU_FEATURES_CPU_P (cpu_features, AVX2)) + { cpu_features->feature[index_arch_AVX2_Usable] |= bit_arch_AVX2_Usable; + + /* Unaligned load with 256-bit AVX registers are faster on + Intel/AMD processors with AVX2. */ + cpu_features->feature[index_arch_AVX_Fast_Unaligned_Load] + |= bit_arch_AVX_Fast_Unaligned_Load; + } /* Determine if FMA is usable. */ if (CPU_FEATURES_CPU_P (cpu_features, FMA)) cpu_features->feature[index_arch_FMA_Usable] @@ -298,11 +305,6 @@ init_cpu_features (struct cpu_features *cpu_features) } } - /* Unaligned load with 256-bit AVX registers are faster on - Intel processors with AVX2. */ - if (CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable)) - cpu_features->feature[index_arch_AVX_Fast_Unaligned_Load] - |= bit_arch_AVX_Fast_Unaligned_Load; /* Since AVX512ER is unique to Xeon Phi, set Prefer_No_VZEROUPPER if AVX512ER is available. Don't use AVX512 to avoid lower CPU @@ -351,9 +353,15 @@ init_cpu_features (struct cpu_features *cpu_features) #endif /* "Excavator" */ if (model >= 0x60 && model <= 0x7f) + { cpu_features->feature[index_arch_Fast_Unaligned_Load] |= (bit_arch_Fast_Unaligned_Load | bit_arch_Fast_Copy_Backward); + + /* Unaligned AVX loads are slower.*/ + cpu_features->feature[index_arch_AVX_Fast_Unaligned_Load] + &= ~bit_arch_AVX_Fast_Unaligned_Load; + } } } else |