about summary refs log tree commit diff
path: root/sysdeps/x86_64/multiarch/init-arch.h
blob: cfc6e7049e8f2d0ccfe6a201798c2af20aae9ce3 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
/* This file is part of the GNU C Library.
   Copyright (C) 2008-2015 Free Software Foundation, Inc.

   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.

   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public
   License along with the GNU C Library; if not, see
   <http://www.gnu.org/licenses/>.  */

#define bit_Fast_Rep_String		(1 << 0)
#define bit_Fast_Copy_Backward		(1 << 1)
#define bit_Slow_BSF			(1 << 2)
#define bit_Fast_Unaligned_Load		(1 << 4)
#define bit_Prefer_PMINUB_for_stringop	(1 << 5)
#define bit_AVX_Usable			(1 << 6)
#define bit_FMA_Usable			(1 << 7)
#define bit_FMA4_Usable			(1 << 8)
#define bit_Slow_SSE4_2			(1 << 9)
#define bit_AVX2_Usable			(1 << 10)
#define bit_AVX_Fast_Unaligned_Load	(1 << 11)
#define bit_AVX512F_Usable		(1 << 12)
#define bit_AVX512DQ_Usable		(1 << 13)

/* CPUID Feature flags.  */

/* COMMON_CPUID_INDEX_1.  */
#define bit_SSE2	(1 << 26)
#define bit_SSSE3	(1 << 9)
#define bit_SSE4_1	(1 << 19)
#define bit_SSE4_2	(1 << 20)
#define bit_OSXSAVE	(1 << 27)
#define bit_AVX		(1 << 28)
#define bit_POPCOUNT	(1 << 23)
#define bit_FMA		(1 << 12)
#define bit_FMA4	(1 << 16)

/* COMMON_CPUID_INDEX_7.  */
#define bit_RTM		(1 << 11)
#define bit_AVX2	(1 << 5)
#define bit_AVX512F	(1 << 16)
#define bit_AVX512DQ	(1 << 17)

/* XCR0 Feature flags.  */
#define bit_XMM_state  (1 << 1)
#define bit_YMM_state  (2 << 1)
#define bit_Opmask_state	(1 << 5)
#define bit_ZMM0_15_state	(1 << 6)
#define bit_ZMM16_31_state	(1 << 7)

/* The integer bit array index for the first set of internal feature bits.  */
# define FEATURE_INDEX_1 0

/* The current maximum size of the feature integer bit array.  */
# define FEATURE_INDEX_MAX 1

#ifdef	__ASSEMBLER__

# include <ifunc-defines.h>

# define index_SSE2	COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_EDX_OFFSET
# define index_SSSE3	COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
# define index_SSE4_1	COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
# define index_SSE4_2	COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
# define index_AVX	COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
# define index_AVX2	COMMON_CPUID_INDEX_7*CPUID_SIZE+CPUID_EBX_OFFSET

# define index_Fast_Rep_String		FEATURE_INDEX_1*FEATURE_SIZE
# define index_Fast_Copy_Backward	FEATURE_INDEX_1*FEATURE_SIZE
# define index_Slow_BSF			FEATURE_INDEX_1*FEATURE_SIZE
# define index_Fast_Unaligned_Load	FEATURE_INDEX_1*FEATURE_SIZE
# define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1*FEATURE_SIZE
# define index_AVX_Usable		FEATURE_INDEX_1*FEATURE_SIZE
# define index_FMA_Usable		FEATURE_INDEX_1*FEATURE_SIZE
# define index_FMA4_Usable		FEATURE_INDEX_1*FEATURE_SIZE
# define index_Slow_SSE4_2		FEATURE_INDEX_1*FEATURE_SIZE
# define index_AVX2_Usable		FEATURE_INDEX_1*FEATURE_SIZE
# define index_AVX_Fast_Unaligned_Load	FEATURE_INDEX_1*FEATURE_SIZE
# define index_AVX512F_Usable		FEATURE_INDEX_1*FEATURE_SIZE
# define index_AVX512DQ_Usable		FEATURE_INDEX_1*FEATURE_SIZE

#else	/* __ASSEMBLER__ */

# include <sys/param.h>

enum
  {
    COMMON_CPUID_INDEX_1 = 0,
    COMMON_CPUID_INDEX_7,
    COMMON_CPUID_INDEX_80000001,	/* for AMD */
    /* Keep the following line at the end.  */
    COMMON_CPUID_INDEX_MAX
  };

extern struct cpu_features
{
  enum cpu_features_kind
    {
      arch_kind_unknown = 0,
      arch_kind_intel,
      arch_kind_amd,
      arch_kind_other
    } kind;
  int max_cpuid;
  struct cpuid_registers
  {
    unsigned int eax;
    unsigned int ebx;
    unsigned int ecx;
    unsigned int edx;
  } cpuid[COMMON_CPUID_INDEX_MAX];
  unsigned int family;
  unsigned int model;
  unsigned int feature[FEATURE_INDEX_MAX];
} __cpu_features attribute_hidden;


extern void __init_cpu_features (void) attribute_hidden;
# define INIT_ARCH() \
  do							\
    if (__cpu_features.kind == arch_kind_unknown)	\
      __init_cpu_features ();				\
  while (0)

/* Used from outside libc.so to get access to the CPU features structure.  */
extern const struct cpu_features *__get_cpu_features (void)
     __attribute__ ((const));

# if IS_IN (libc)
#  define __get_cpu_features()	(&__cpu_features)
# endif

# define HAS_CPU_FEATURE(idx, reg, bit) \
  ((__get_cpu_features ()->cpuid[idx].reg & (bit)) != 0)

/* Following are the feature tests used throughout libc.  */

/* CPUID_* evaluates to true if the feature flag is enabled.
   We always use &__cpu_features because the HAS_CPUID_* macros
   are called only within __init_cpu_features, where we can't
   call __get_cpu_features without infinite recursion.  */
# define HAS_CPUID_FLAG(idx, reg, bit) \
  (((&__cpu_features)->cpuid[idx].reg & (bit)) != 0)

# define CPUID_OSXSAVE \
  HAS_CPUID_FLAG (COMMON_CPUID_INDEX_1, ecx, bit_OSXSAVE)
# define CPUID_AVX \
  HAS_CPUID_FLAG (COMMON_CPUID_INDEX_1, ecx, bit_AVX)
# define CPUID_FMA \
  HAS_CPUID_FLAG (COMMON_CPUID_INDEX_1, ecx, bit_FMA)
# define CPUID_FMA4 \
  HAS_CPUID_FLAG (COMMON_CPUID_INDEX_80000001, ecx, bit_FMA4)
# define CPUID_RTM \
  HAS_CPUID_FLAG (COMMON_CPUID_INDEX_7, ebx, bit_RTM)
# define CPUID_AVX2 \
  HAS_CPUID_FLAG (COMMON_CPUID_INDEX_7, ebx, bit_AVX2)
# define CPUID_AVX512F \
  HAS_CPUID_FLAG (COMMON_CPUID_INDEX_7, ebx, bit_AVX512F)
# define CPUID_AVX512DQ \
  HAS_CPUID_FLAG (COMMON_CPUID_INDEX_7, ebx, bit_AVX512DQ)

/* HAS_* evaluates to true if we may use the feature at runtime.  */
# define HAS_SSE2	HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, edx, bit_SSE2)
# define HAS_POPCOUNT	HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_POPCOUNT)
# define HAS_SSSE3	HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSSE3)
# define HAS_SSE4_1	HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_1)
# define HAS_SSE4_2	HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_2)
# define HAS_RTM	HAS_CPU_FEATURE (COMMON_CPUID_INDEX_7, ebx, bit_RTM)

# define index_Fast_Rep_String		FEATURE_INDEX_1
# define index_Fast_Copy_Backward	FEATURE_INDEX_1
# define index_Slow_BSF			FEATURE_INDEX_1
# define index_Fast_Unaligned_Load	FEATURE_INDEX_1
# define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1
# define index_AVX_Usable		FEATURE_INDEX_1
# define index_FMA_Usable		FEATURE_INDEX_1
# define index_FMA4_Usable		FEATURE_INDEX_1
# define index_Slow_SSE4_2		FEATURE_INDEX_1
# define index_AVX2_Usable		FEATURE_INDEX_1
# define index_AVX_Fast_Unaligned_Load	FEATURE_INDEX_1
# define index_AVX512F_Usable		FEATURE_INDEX_1
# define index_AVX512DQ_Usable		FEATURE_INDEX_1

# define HAS_ARCH_FEATURE(name) \
  ((__get_cpu_features ()->feature[index_##name] & (bit_##name)) != 0)

# define HAS_FAST_REP_STRING		HAS_ARCH_FEATURE (Fast_Rep_String)
# define HAS_FAST_COPY_BACKWARD		HAS_ARCH_FEATURE (Fast_Copy_Backward)
# define HAS_SLOW_BSF			HAS_ARCH_FEATURE (Slow_BSF)
# define HAS_FAST_UNALIGNED_LOAD	HAS_ARCH_FEATURE (Fast_Unaligned_Load)
# define HAS_AVX			HAS_ARCH_FEATURE (AVX_Usable)
# define HAS_AVX2			HAS_ARCH_FEATURE (AVX2_Usable)
# define HAS_AVX512F			HAS_ARCH_FEATURE (AVX512F_Usable)
# define HAS_AVX512DQ			HAS_ARCH_FEATURE (AVX512DQ_Usable)
# define HAS_FMA			HAS_ARCH_FEATURE (FMA_Usable)
# define HAS_FMA4			HAS_ARCH_FEATURE (FMA4_Usable)
# define HAS_AVX_FAST_UNALIGNED_LOAD	HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load)

#endif	/* __ASSEMBLER__ */