summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog7
-rw-r--r--sysdeps/x86/cpu-features.c18
2 files changed, 20 insertions, 5 deletions
diff --git a/ChangeLog b/ChangeLog
index 5a1f291b85..7fe8e2463e 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2018-07-06  Amit Pawar  <amit.pawar@amd.com>
+
+	* sysdeps/x86/cpu-features.c (get_common_indeces):
+	AVX_Fast_Unaligned_Load is enabled when AVX2 is detected.
+	* sysdeps/x86/cpu-features.c (init_cpu_features):
+	AVX_Fast_Unaligned_Load is disabled for Excavator core.
+
 2018-07-05  Florian Weimer  <fweimer@redhat.com>
 
 	* csu/Makefile (CFLAGS-static-reloc.os): Build with stack
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index 0fc3674c4b..d41ebde823 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -78,8 +78,15 @@ get_common_indeces (struct cpu_features *cpu_features,
 	      /* The following features depend on AVX being usable.  */
 	      /* Determine if AVX2 is usable.  */
 	      if (CPU_FEATURES_CPU_P (cpu_features, AVX2))
+	      {
 		cpu_features->feature[index_arch_AVX2_Usable]
 		  |= bit_arch_AVX2_Usable;
+
+	        /* Unaligned load with 256-bit AVX registers are faster on
+	           Intel/AMD processors with AVX2.  */
+	        cpu_features->feature[index_arch_AVX_Fast_Unaligned_Load]
+		  |= bit_arch_AVX_Fast_Unaligned_Load;
+	      }
 	      /* Determine if FMA is usable.  */
 	      if (CPU_FEATURES_CPU_P (cpu_features, FMA))
 		cpu_features->feature[index_arch_FMA_Usable]
@@ -298,11 +305,6 @@ init_cpu_features (struct cpu_features *cpu_features)
 	    }
 	}
 
-      /* Unaligned load with 256-bit AVX registers are faster on
-	 Intel processors with AVX2.  */
-      if (CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable))
-	cpu_features->feature[index_arch_AVX_Fast_Unaligned_Load]
-	  |= bit_arch_AVX_Fast_Unaligned_Load;
 
       /* Since AVX512ER is unique to Xeon Phi, set Prefer_No_VZEROUPPER
          if AVX512ER is available.  Don't use AVX512 to avoid lower CPU
@@ -351,9 +353,15 @@ init_cpu_features (struct cpu_features *cpu_features)
 #endif
 	  /* "Excavator"   */
 	  if (model >= 0x60 && model <= 0x7f)
+	  {
 	    cpu_features->feature[index_arch_Fast_Unaligned_Load]
 	      |= (bit_arch_Fast_Unaligned_Load
 		  | bit_arch_Fast_Copy_Backward);
+
+	    /* Unaligned AVX loads are slower.*/
+	    cpu_features->feature[index_arch_AVX_Fast_Unaligned_Load]
+		  &= ~bit_arch_AVX_Fast_Unaligned_Load;
+	  }
 	}
     }
   else