blob: 92e91d79af36da732c7142c8ccd5ebf4698ec162 (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
|
ifeq ($(subdir),math)
libm-sysdep_routines += s_floor-c s_ceil-c s_floorf-c s_ceilf-c \
s_rint-c s_rintf-c s_nearbyint-c s_nearbyintf-c \
s_trunc-c s_truncf-c
libm-sysdep_routines += s_ceil-sse4_1 s_ceilf-sse4_1 s_floor-sse4_1 \
s_floorf-sse4_1 s_nearbyint-sse4_1 \
s_nearbyintf-sse4_1 s_rint-sse4_1 s_rintf-sse4_1 \
s_trunc-sse4_1 s_truncf-sse4_1
libm-sysdep_routines += e_exp-fma e_log-fma e_pow-fma s_atan-fma \
e_asin-fma e_atan2-fma s_sin-fma s_tan-fma \
mplog-fma mpa-fma slowexp-fma slowpow-fma \
sincos32-fma doasin-fma dosincos-fma \
halfulp-fma mpexp-fma \
mpatan2-fma mpatan-fma mpsqrt-fma mptan-fma
CFLAGS-doasin-fma.c = -mfma -mavx2
CFLAGS-dosincos-fma.c = -mfma -mavx2
CFLAGS-e_asin-fma.c = -mfma -mavx2
CFLAGS-e_atan2-fma.c = -mfma -mavx2
CFLAGS-e_exp-fma.c = -mfma -mavx2
CFLAGS-e_log-fma.c = -mfma -mavx2
CFLAGS-e_pow-fma.c = -mfma -mavx2 $(config-cflags-nofma)
CFLAGS-halfulp-fma.c = -mfma -mavx2
CFLAGS-mpa-fma.c = -mfma -mavx2
CFLAGS-mpatan-fma.c = -mfma -mavx2
CFLAGS-mpatan2-fma.c = -mfma -mavx2
CFLAGS-mpexp-fma.c = -mfma -mavx2
CFLAGS-mplog-fma.c = -mfma -mavx2
CFLAGS-mpsqrt-fma.c = -mfma -mavx2
CFLAGS-mptan-fma.c = -mfma -mavx2
CFLAGS-s_atan-fma.c = -mfma -mavx2
CFLAGS-sincos32-fma.c = -mfma -mavx2
CFLAGS-slowexp-fma.c = -mfma -mavx2
CFLAGS-slowpow-fma.c = -mfma -mavx2
CFLAGS-s_sin-fma.c = -mfma -mavx2
CFLAGS-s_tan-fma.c = -mfma -mavx2
libm-sysdep_routines += e_expf-fma
CFLAGS-e_expf-fma.c = -mfma -mavx2
libm-sysdep_routines += e_exp-fma4 e_log-fma4 e_pow-fma4 s_atan-fma4 \
e_asin-fma4 e_atan2-fma4 s_sin-fma4 s_tan-fma4 \
mplog-fma4 mpa-fma4 slowexp-fma4 slowpow-fma4 \
sincos32-fma4 doasin-fma4 dosincos-fma4 \
halfulp-fma4 mpexp-fma4 \
mpatan2-fma4 mpatan-fma4 mpsqrt-fma4 mptan-fma4
CFLAGS-doasin-fma4.c = -mfma4
CFLAGS-dosincos-fma4.c = -mfma4
CFLAGS-e_asin-fma4.c = -mfma4
CFLAGS-e_atan2-fma4.c = -mfma4
CFLAGS-e_exp-fma4.c = -mfma4
CFLAGS-e_log-fma4.c = -mfma4
CFLAGS-e_pow-fma4.c = -mfma4 $(config-cflags-nofma)
CFLAGS-halfulp-fma4.c = -mfma4
CFLAGS-mpa-fma4.c = -mfma4
CFLAGS-mpatan-fma4.c = -mfma4
CFLAGS-mpatan2-fma4.c = -mfma4
CFLAGS-mpexp-fma4.c = -mfma4
CFLAGS-mplog-fma4.c = -mfma4
CFLAGS-mpsqrt-fma4.c = -mfma4
CFLAGS-mptan-fma4.c = -mfma4
CFLAGS-s_atan-fma4.c = -mfma4
CFLAGS-sincos32-fma4.c = -mfma4
CFLAGS-slowexp-fma4.c = -mfma4
CFLAGS-slowpow-fma4.c = -mfma4
CFLAGS-s_sin-fma4.c = -mfma4
CFLAGS-s_tan-fma4.c = -mfma4
libm-sysdep_routines += e_exp-avx e_log-avx s_atan-avx \
e_atan2-avx s_sin-avx s_tan-avx \
mplog-avx mpa-avx slowexp-avx \
mpexp-avx
CFLAGS-e_atan2-avx.c = -msse2avx -DSSE2AVX
CFLAGS-e_exp-avx.c = -msse2avx -DSSE2AVX
CFLAGS-e_log-avx.c = -msse2avx -DSSE2AVX
CFLAGS-mpa-avx.c = -msse2avx -DSSE2AVX
CFLAGS-mpexp-avx.c = -msse2avx -DSSE2AVX
CFLAGS-mplog-avx.c = -msse2avx -DSSE2AVX
CFLAGS-s_atan-avx.c = -msse2avx -DSSE2AVX
CFLAGS-s_sin-avx.c = -msse2avx -DSSE2AVX
CFLAGS-slowexp-avx.c = -msse2avx -DSSE2AVX
CFLAGS-s_tan-avx.c = -msse2avx -DSSE2AVX
endif
ifeq ($(subdir),mathvec)
libmvec-sysdep_routines += svml_d_cos2_core_sse4 svml_d_cos4_core_avx2 \
svml_d_cos8_core_avx512 svml_d_sin2_core_sse4 \
svml_d_sin4_core_avx2 svml_d_sin8_core_avx512 \
svml_d_log2_core_sse4 svml_d_log4_core_avx2 \
svml_d_log8_core_avx512 svml_d_sincos2_core_sse4 \
svml_d_sincos4_core_avx2 svml_d_sincos8_core_avx512 \
svml_s_cosf4_core_sse4 svml_s_cosf8_core_avx2 \
svml_s_cosf16_core_avx512 svml_s_sinf4_core_sse4 \
svml_s_sinf8_core_avx2 svml_s_sinf16_core_avx512 \
svml_s_logf4_core_sse4 svml_s_logf8_core_avx2 \
svml_s_logf16_core_avx512 svml_d_exp2_core_sse4 \
svml_d_exp4_core_avx2 svml_d_exp8_core_avx512 \
svml_s_expf4_core_sse4 svml_s_expf8_core_avx2 \
svml_s_expf16_core_avx512 svml_d_pow2_core_sse4 \
svml_d_pow4_core_avx2 svml_d_pow8_core_avx512 \
svml_s_powf4_core_sse4 svml_s_powf8_core_avx2 \
svml_s_powf16_core_avx512 svml_s_sincosf4_core_sse4 \
svml_s_sincosf8_core_avx2 \
svml_s_sincosf16_core_avx512 \
svml_d_cos2_core-sse2 svml_d_cos4_core-sse \
svml_d_cos8_core-avx2 svml_d_exp2_core-sse2 \
svml_d_exp4_core-sse svml_d_exp8_core-avx2 \
svml_d_log2_core-sse2 svml_d_log4_core-sse \
svml_d_log8_core-avx2 svml_d_pow2_core-sse2 \
svml_d_pow4_core-sse svml_d_pow8_core-avx2 \
svml_d_sin2_core-sse2 svml_d_sin4_core-sse \
svml_d_sin8_core-avx2 \
svml_d_sincos2_core-sse2 \
svml_d_sincos4_core-sse \
svml_d_sincos8_core-avx2 \
svml_s_cosf16_core-avx2 \
svml_s_cosf4_core-sse2 \
svml_s_cosf8_core-sse \
svml_s_expf16_core-avx2 \
svml_s_expf4_core-sse2 \
svml_s_expf8_core-sse \
svml_s_logf16_core-avx2 \
svml_s_logf4_core-sse2 \
svml_s_logf8_core-sse \
svml_s_powf16_core-avx2 \
svml_s_powf4_core-sse2 \
svml_s_powf8_core-sse \
svml_s_sincosf16_core-avx2 \
svml_s_sincosf4_core-sse2 \
svml_s_sincosf8_core-sse \
svml_s_sinf16_core-avx2 \
svml_s_sinf4_core-sse2 \
svml_s_sinf8_core-sse
endif
|