diff options
Diffstat (limited to 'sysdeps/x86')
-rw-r--r-- | sysdeps/x86/cpu-features-offsets.sym | 1 | ||||
-rw-r--r-- | sysdeps/x86/cpu-features.c | 80 | ||||
-rw-r--r-- | sysdeps/x86/cpu-features.h | 23 |
3 files changed, 85 insertions, 19 deletions
diff --git a/sysdeps/x86/cpu-features-offsets.sym b/sysdeps/x86/cpu-features-offsets.sym index f6739fae81..33dd094e37 100644 --- a/sysdeps/x86/cpu-features-offsets.sym +++ b/sysdeps/x86/cpu-features-offsets.sym @@ -15,6 +15,7 @@ CPUID_ECX_OFFSET offsetof (struct cpuid_registers, ecx) CPUID_EDX_OFFSET offsetof (struct cpuid_registers, edx) FAMILY_OFFSET offsetof (struct cpu_features, family) MODEL_OFFSET offsetof (struct cpu_features, model) +XSAVE_STATE_SIZE_OFFSET offsetof (struct cpu_features, xsave_state_size) FEATURE_OFFSET offsetof (struct cpu_features, feature) FEATURE_SIZE sizeof (unsigned int) diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c index d1ee922290..9eca98817d 100644 --- a/sysdeps/x86/cpu-features.c +++ b/sysdeps/x86/cpu-features.c @@ -18,6 +18,7 @@ #include <cpuid.h> #include <cpu-features.h> +#include <libc-internal.h> static void get_common_indeces (struct cpu_features *cpu_features, @@ -88,6 +89,71 @@ get_common_indeces (struct cpu_features *cpu_features, cpu_features->feature[index_arch_FMA_Usable] |= bit_arch_FMA_Usable; } + + /* For _dl_runtime_resolve, set xsave_state_size to xsave area + size + integer register save size and align it to 64 bytes. */ + if (cpu_features->max_cpuid >= 0xd) + { + unsigned int eax, ebx, ecx, edx; + + __cpuid_count (0xd, 0, eax, ebx, ecx, edx); + if (ebx != 0) + { + cpu_features->xsave_state_size + = ALIGN_UP (ebx + STATE_SAVE_OFFSET, 64); + + __cpuid_count (0xd, 1, eax, ebx, ecx, edx); + + /* Check if XSAVEC is available. */ + if ((eax & (1 << 1)) != 0) + { + unsigned int xstate_comp_offsets[32]; + unsigned int xstate_comp_sizes[32]; + unsigned int i; + + xstate_comp_offsets[0] = 0; + xstate_comp_offsets[1] = 160; + xstate_comp_offsets[2] = 576; + xstate_comp_sizes[0] = 160; + xstate_comp_sizes[1] = 256; + + for (i = 2; i < 32; i++) + { + if ((STATE_SAVE_MASK & (1 << i)) != 0) + { + __cpuid_count (0xd, i, eax, ebx, ecx, edx); + xstate_comp_sizes[i] = eax; + } + else + { + ecx = 0; + xstate_comp_sizes[i] = 0; + } + + if (i > 2) + { + xstate_comp_offsets[i] + = (xstate_comp_offsets[i - 1] + + xstate_comp_sizes[i -1]); + if ((ecx & (1 << 1)) != 0) + xstate_comp_offsets[i] + = ALIGN_UP (xstate_comp_offsets[i], 64); + } + } + + /* Use XSAVEC. */ + unsigned int size + = xstate_comp_offsets[31] + xstate_comp_sizes[31]; + if (size) + { + cpu_features->xsave_state_size + = ALIGN_UP (size + STATE_SAVE_OFFSET, 64); + cpu_features->feature[index_arch_XSAVEC_Usable] + |= bit_arch_XSAVEC_Usable; + } + } + } + } } } @@ -213,20 +279,6 @@ init_cpu_features (struct cpu_features *cpu_features) else cpu_features->feature[index_arch_Prefer_No_AVX512] |= bit_arch_Prefer_No_AVX512; - - /* To avoid SSE transition penalty, use _dl_runtime_resolve_slow. - If XGETBV suports ECX == 1, use _dl_runtime_resolve_opt. */ - cpu_features->feature[index_arch_Use_dl_runtime_resolve_slow] - |= bit_arch_Use_dl_runtime_resolve_slow; - if (cpu_features->max_cpuid >= 0xd) - { - unsigned int eax; - - __cpuid_count (0xd, 1, eax, ebx, ecx, edx); - if ((eax & (1 << 2)) != 0) - cpu_features->feature[index_arch_Use_dl_runtime_resolve_opt] - |= bit_arch_Use_dl_runtime_resolve_opt; - } } /* This spells out "AuthenticAMD". */ else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65) diff --git a/sysdeps/x86/cpu-features.h b/sysdeps/x86/cpu-features.h index 2609ac0999..507a141414 100644 --- a/sysdeps/x86/cpu-features.h +++ b/sysdeps/x86/cpu-features.h @@ -37,9 +37,8 @@ #define bit_arch_Prefer_No_VZEROUPPER (1 << 17) #define bit_arch_Fast_Unaligned_Copy (1 << 18) #define bit_arch_Prefer_ERMS (1 << 19) -#define bit_arch_Use_dl_runtime_resolve_opt (1 << 20) -#define bit_arch_Use_dl_runtime_resolve_slow (1 << 21) -#define bit_arch_Prefer_No_AVX512 (1 << 22) +#define bit_arch_Prefer_No_AVX512 (1 << 20) +#define bit_arch_XSAVEC_Usable (1 << 21) /* CPUID Feature flags. */ @@ -82,6 +81,15 @@ /* The current maximum size of the feature integer bit array. */ #define FEATURE_INDEX_MAX 1 +/* Offset for fxsave/xsave area used by _dl_runtime_resolve. Also need + space to preserve RCX, RDX, RSI, RDI, R8, R9 and RAX. It must be + aligned to 16 bytes for fxsave and 64 bytes for xsave. */ +#define STATE_SAVE_OFFSET (8 * 7 + 8) + +/* Save SSE, AVX, AVX512, mask and bound registers. */ +#define STATE_SAVE_MASK \ + ((1 << 1) | (1 << 2) | (1 << 3) | (1 << 5) | (1 << 6) | (1 << 7)) + #ifdef __ASSEMBLER__ # include <cpu-features-offsets.h> @@ -206,6 +214,12 @@ struct cpu_features } cpuid[COMMON_CPUID_INDEX_MAX]; unsigned int family; unsigned int model; + /* The type must be unsigned long int so that we use + + sub xsave_state_size_offset(%rip) %RSP_LP + + in _dl_runtime_resolve. */ + unsigned long int xsave_state_size; unsigned int feature[FEATURE_INDEX_MAX]; }; @@ -298,9 +312,8 @@ extern const struct cpu_features *__get_cpu_features (void) # define index_arch_Prefer_No_VZEROUPPER FEATURE_INDEX_1 # define index_arch_Fast_Unaligned_Copy FEATURE_INDEX_1 # define index_arch_Prefer_ERMS FEATURE_INDEX_1 -# define index_arch_Use_dl_runtime_resolve_opt FEATURE_INDEX_1 -# define index_arch_Use_dl_runtime_resolve_slow FEATURE_INDEX_1 # define index_arch_Prefer_No_AVX512 FEATURE_INDEX_1 +# define index_arch_XSAVEC_Usable FEATURE_INDEX_1 #endif /* !__ASSEMBLER__ */ |