about summary refs log tree commit diff
path: root/sysdeps/x86
diff options
context:
space:
mode:
authorNoah Goldstein <goldstein.w.n@gmail.com>2023-09-20 15:44:50 -0500
committerNoah Goldstein <goldstein.w.n@gmail.com>2023-09-29 14:18:42 -0500
commitd90b43a4ed475dac5b0cd6e01ceb35c7b0f7f2ff (patch)
tree0015cc47ecb9affe00f1d8afbebf4a5c16d20e25 /sysdeps/x86
parent5f913506f4bf4785f9cf2c2ac8d17dc9f877ff17 (diff)
downloadglibc-d90b43a4ed475dac5b0cd6e01ceb35c7b0f7f2ff.tar.gz
glibc-d90b43a4ed475dac5b0cd6e01ceb35c7b0f7f2ff.tar.xz
glibc-d90b43a4ed475dac5b0cd6e01ceb35c7b0f7f2ff.zip
x86: Add support for AVX10 preset and vec size in cpu-features
This commit add support for the new AVX10 cpu features:
https://cdrdv2-public.intel.com/784267/355989-intel-avx10-spec.pdf

We add checks for:
    - `AVX10`: Check if AVX10 is present.
    - `AVX10_{X,Y,Z}MM`: Check if a given vec class has AVX10 support.

`make check` passes and cpuid output was checked against GNR/DMR on an
emulator.
Diffstat (limited to 'sysdeps/x86')
-rw-r--r--sysdeps/x86/bits/platform/x86.h14
-rw-r--r--sysdeps/x86/cpu-features.c25
-rw-r--r--sysdeps/x86/include/cpu-features.h27
-rw-r--r--sysdeps/x86/tst-get-cpu-features.c8
4 files changed, 71 insertions, 3 deletions
diff --git a/sysdeps/x86/bits/platform/x86.h b/sysdeps/x86/bits/platform/x86.h
index 88ca071aa7..1e23d53ba2 100644
--- a/sysdeps/x86/bits/platform/x86.h
+++ b/sysdeps/x86/bits/platform/x86.h
@@ -30,7 +30,8 @@ enum
   CPUID_INDEX_80000008,
   CPUID_INDEX_7_ECX_1,
   CPUID_INDEX_19,
-  CPUID_INDEX_14_ECX_0
+  CPUID_INDEX_14_ECX_0,
+  CPUID_INDEX_24_ECX_0
 };
 
 struct cpuid_feature
@@ -312,6 +313,7 @@ enum
   x86_cpu_AVX_NE_CONVERT	= x86_cpu_index_7_ecx_1_edx + 5,
   x86_cpu_AMX_COMPLEX		= x86_cpu_index_7_ecx_1_edx + 8,
   x86_cpu_PREFETCHI		= x86_cpu_index_7_ecx_1_edx + 14,
+  x86_cpu_AVX10			= x86_cpu_index_7_ecx_1_edx + 19,
   x86_cpu_APX_F			= x86_cpu_index_7_ecx_1_edx + 21,
 
   x86_cpu_index_19_ebx
@@ -325,5 +327,13 @@ enum
     = (CPUID_INDEX_14_ECX_0 * 8 * 4 * sizeof (unsigned int)
        + cpuid_register_index_ebx * 8 * sizeof (unsigned int)),
 
-  x86_cpu_PTWRITE		= x86_cpu_index_14_ecx_0_ebx + 4
+  x86_cpu_PTWRITE		= x86_cpu_index_14_ecx_0_ebx + 4,
+
+  x86_cpu_index_24_ecx_0_ebx
+    = (CPUID_INDEX_24_ECX_0 * 8 * 4 * sizeof (unsigned int)
+       + cpuid_register_index_ebx * 8 * sizeof (unsigned int)),
+
+  x86_cpu_AVX10_XMM = x86_cpu_index_24_ecx_0_ebx + 16,
+  x86_cpu_AVX10_YMM = x86_cpu_index_24_ecx_0_ebx + 17,
+  x86_cpu_AVX10_ZMM = x86_cpu_index_24_ecx_0_ebx + 18,
 };
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index badf088874..0bf923d48b 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -115,11 +115,18 @@ update_active (struct cpu_features *cpu_features)
   CPU_FEATURE_SET_ACTIVE (cpu_features, SHSTK);
 #endif
 
+  enum
+  {
+    os_xmm = 1,
+    os_ymm = 2,
+    os_zmm = 4
+  } os_vector_size = os_xmm;
   /* Can we call xgetbv?  */
   if (CPU_FEATURES_CPU_P (cpu_features, OSXSAVE))
     {
       unsigned int xcrlow;
       unsigned int xcrhigh;
+      CPU_FEATURE_SET_ACTIVE (cpu_features, AVX10);
       asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
       /* Is YMM and XMM state usable?  */
       if ((xcrlow & (bit_YMM_state | bit_XMM_state))
@@ -128,6 +135,7 @@ update_active (struct cpu_features *cpu_features)
 	  /* Determine if AVX is usable.  */
 	  if (CPU_FEATURES_CPU_P (cpu_features, AVX))
 	    {
+	      os_vector_size |= os_ymm;
 	      CPU_FEATURE_SET (cpu_features, AVX);
 	      /* The following features depend on AVX being usable.  */
 	      /* Determine if AVX2 is usable.  */
@@ -166,6 +174,7 @@ update_active (struct cpu_features *cpu_features)
 			 | bit_ZMM16_31_state))
 	      == (bit_Opmask_state | bit_ZMM0_15_state | bit_ZMM16_31_state))
 	    {
+	      os_vector_size |= os_zmm;
 	      /* Determine if AVX512F is usable.  */
 	      if (CPU_FEATURES_CPU_P (cpu_features, AVX512F))
 		{
@@ -210,6 +219,22 @@ update_active (struct cpu_features *cpu_features)
 	    }
 	}
 
+      if (CPU_FEATURES_CPU_P (cpu_features, AVX10)
+	  && cpu_features->basic.max_cpuid >= 0x24)
+	{
+	  __cpuid_count (
+	      0x24, 0, cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.eax,
+	      cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.ebx,
+	      cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.ecx,
+	      cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.edx);
+	  if (os_vector_size & os_xmm)
+	    CPU_FEATURE_SET_ACTIVE (cpu_features, AVX10_XMM);
+	  if (os_vector_size & os_ymm)
+	    CPU_FEATURE_SET_ACTIVE (cpu_features, AVX10_YMM);
+	  if (os_vector_size & os_zmm)
+	    CPU_FEATURE_SET_ACTIVE (cpu_features, AVX10_ZMM);
+	}
+
       /* Are XTILECFG and XTILEDATA states usable?  */
       if ((xcrlow & (bit_XTILECFG_state | bit_XTILEDATA_state))
 	  == (bit_XTILECFG_state | bit_XTILEDATA_state))
diff --git a/sysdeps/x86/include/cpu-features.h b/sysdeps/x86/include/cpu-features.h
index eb30d342a6..2d7427a6c0 100644
--- a/sysdeps/x86/include/cpu-features.h
+++ b/sysdeps/x86/include/cpu-features.h
@@ -29,7 +29,7 @@
 
 enum
 {
-  CPUID_INDEX_MAX = CPUID_INDEX_14_ECX_0 + 1
+  CPUID_INDEX_MAX = CPUID_INDEX_24_ECX_0 + 1
 };
 
 enum
@@ -319,6 +319,7 @@ enum
 #define bit_cpu_AVX_NE_CONVERT	(1u << 5)
 #define bit_cpu_AMX_COMPLEX	(1u << 8)
 #define bit_cpu_PREFETCHI	(1u << 14)
+#define bit_cpu_AVX10		(1u << 19)
 #define bit_cpu_APX_F		(1u << 21)
 
 /* CPUID_INDEX_19.  */
@@ -332,6 +333,13 @@ enum
 /* EBX.  */
 #define bit_cpu_PTWRITE		(1u << 4)
 
+/* CPUID_INDEX_24_ECX_0.  */
+
+/* EBX.  */
+#define bit_cpu_AVX10_XMM		(1u << 16)
+#define bit_cpu_AVX10_YMM		(1u << 17)
+#define bit_cpu_AVX10_ZMM		(1u << 18)
+
 /* CPUID_INDEX_1.  */
 
 /* ECX.  */
@@ -563,6 +571,7 @@ enum
 #define index_cpu_AVX_NE_CONVERT CPUID_INDEX_7_ECX_1
 #define index_cpu_AMX_COMPLEX	CPUID_INDEX_7_ECX_1
 #define index_cpu_PREFETCHI	CPUID_INDEX_7_ECX_1
+#define index_cpu_AVX10		CPUID_INDEX_7_ECX_1
 #define index_cpu_APX_F		CPUID_INDEX_7_ECX_1
 
 /* CPUID_INDEX_19.  */
@@ -576,6 +585,13 @@ enum
 /* EBX.  */
 #define index_cpu_PTWRITE	CPUID_INDEX_14_ECX_0
 
+/* CPUID_INDEX_24_ECX_0.  */
+
+/* EBX.  */
+#define index_cpu_AVX10_XMM	CPUID_INDEX_24_ECX_0
+#define index_cpu_AVX10_YMM	CPUID_INDEX_24_ECX_0
+#define index_cpu_AVX10_ZMM	CPUID_INDEX_24_ECX_0
+
 /* CPUID_INDEX_1.  */
 
 /* ECX.  */
@@ -809,6 +825,7 @@ enum
 #define reg_AVX_NE_CONVERT	edx
 #define reg_AMX_COMPLEX		edx
 #define reg_PREFETCHI		edx
+#define reg_AVX10		edx
 #define reg_APX_F		edx
 
 /* CPUID_INDEX_19.  */
@@ -822,6 +839,14 @@ enum
 /* EBX.  */
 #define reg_PTWRITE		ebx
 
+/* CPUID_INDEX_24_ECX_0.  */
+
+/* EBX.  */
+#define reg_AVX10_XMM		ebx
+#define reg_AVX10_YMM		ebx
+#define reg_AVX10_ZMM		ebx
+
+
 /* PREFERRED_FEATURE_INDEX_1.  First define the bitindex values
    sequentially, then define the bit_arch* and index_arch_* lookup
    constants.  */
diff --git a/sysdeps/x86/tst-get-cpu-features.c b/sysdeps/x86/tst-get-cpu-features.c
index b27fa7324a..44edd18df2 100644
--- a/sysdeps/x86/tst-get-cpu-features.c
+++ b/sysdeps/x86/tst-get-cpu-features.c
@@ -219,6 +219,7 @@ do_test (void)
   CHECK_CPU_FEATURE_PRESENT (AVX_NE_CONVERT);
   CHECK_CPU_FEATURE_PRESENT (AMX_COMPLEX);
   CHECK_CPU_FEATURE_PRESENT (PREFETCHI);
+  CHECK_CPU_FEATURE_PRESENT (AVX10);
   CHECK_CPU_FEATURE_PRESENT (APX_F);
   CHECK_CPU_FEATURE_PRESENT (AESKLE);
   CHECK_CPU_FEATURE_PRESENT (WIDE_KL);
@@ -391,11 +392,18 @@ do_test (void)
   CHECK_CPU_FEATURE_ACTIVE (AVX_NE_CONVERT);
   CHECK_CPU_FEATURE_ACTIVE (AMX_COMPLEX);
   CHECK_CPU_FEATURE_ACTIVE (PREFETCHI);
+  CHECK_CPU_FEATURE_ACTIVE (AVX10);
   CHECK_CPU_FEATURE_ACTIVE (APX_F);
   CHECK_CPU_FEATURE_ACTIVE (AESKLE);
   CHECK_CPU_FEATURE_ACTIVE (WIDE_KL);
   CHECK_CPU_FEATURE_ACTIVE (PTWRITE);
 
+  if (CPU_FEATURE_ACTIVE (AVX10))
+    {
+      CHECK_CPU_FEATURE_ACTIVE (AVX10_XMM);
+      CHECK_CPU_FEATURE_ACTIVE (AVX10_YMM);
+      CHECK_CPU_FEATURE_ACTIVE (AVX10_ZMM);
+    }
   return 0;
 }