about summary refs log tree commit diff
path: root/sysdeps/x86_64/fpu/math_private.h
blob: 8e79718262d8b15b3acaf2803a012c900af2152d (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
#ifndef _MATH_PRIVATE_H

#define math_opt_barrier(x) \
  ({ __typeof(x) __x;							      \
     if (sizeof (x) <= sizeof (double))					      \
       __asm ("" : "=x" (__x) : "0" (x));				      \
     else								      \
       __asm ("" : "=t" (__x) : "0" (x));				      \
     __x; })
#define math_force_eval(x) \
  do {									      \
    if (sizeof (x) <= sizeof (double))					      \
      __asm __volatile ("" : : "x" (x));				      \
    else								      \
      __asm __volatile ("" : : "f" (x));				      \
  } while (0)

#include <math/math_private.h>

/* We can do a few things better on x86-64.  */

#if defined __AVX__ || defined SSE2AVX
# define MOVD "vmovd"
# define STMXCSR "vstmxcsr"
# define LDMXCSR "vldmxcsr"
#else
# define MOVD "movd"
# define STMXCSR "stmxcsr"
# define LDMXCSR "ldmxcsr"
#endif

/* Direct movement of float into integer register.  */
#undef EXTRACT_WORDS64
#define EXTRACT_WORDS64(i, d)						      \
  do {									      \
    long int i_;							      \
    asm (MOVD " %1, %0" : "=rm" (i_) : "x" ((double) (d)));		      \
    (i) = i_;								      \
  } while (0)

/* And the reverse.  */
#undef INSERT_WORDS64
#define INSERT_WORDS64(d, i) \
  do {									      \
    long int i_ = i;							      \
    double d__;								      \
    asm (MOVD " %1, %0" : "=x" (d__) : "rm" (i_));			      \
    d = d__;								      \
  } while (0)

/* Direct movement of float into integer register.  */
#undef GET_FLOAT_WORD
#define GET_FLOAT_WORD(i, d) \
  do {									      \
    int i_;								      \
    asm (MOVD " %1, %0" : "=rm" (i_) : "x" ((float) (d)));		      \
    (i) = i_;								      \
  } while (0)

/* And the reverse.  */
#undef SET_FLOAT_WORD
#define SET_FLOAT_WORD(f, i) \
  do {									      \
    int i_ = i;								      \
    float f__;								      \
    asm (MOVD " %1, %0" : "=x" (f__) : "rm" (i_));			      \
    f = f__;								      \
  } while (0)

#endif

#define __isnan(d) \
  ({ long int __di; EXTRACT_WORDS64 (__di, (double) (d));		      \
     (__di & 0x7fffffffffffffffl) > 0x7ff0000000000000l; })
#define __isnanf(d) \
  ({ int __di; GET_FLOAT_WORD (__di, (float) d);			      \
     (__di & 0x7fffffff) > 0x7f800000; })

#define __isinf_ns(d) \
  ({ long int __di; EXTRACT_WORDS64 (__di, (double) (d));		      \
     (__di & 0x7fffffffffffffffl) == 0x7ff0000000000000l; })
#define __isinf_nsf(d) \
  ({ int __di; GET_FLOAT_WORD (__di, (float) d);			      \
     (__di & 0x7fffffff) == 0x7f800000; })

#define __finite(d) \
  ({ long int __di; EXTRACT_WORDS64 (__di, (double) (d));		      \
     (__di & 0x7fffffffffffffffl) < 0x7ff0000000000000l; })
#define __finitef(d) \
  ({ int __di; GET_FLOAT_WORD (__di, (float) d);			      \
     (__di & 0x7fffffff) < 0x7f800000; })

#if defined __AVX__ || defined SSE2AVX
# define __ieee754_sqrt(d) \
  ({ double __res;							      \
    asm ("vsqrtsd %1, %0, %0" : "=x" (__res) : "xm" ((double) (d)));	      \
     __res; })
# define __ieee754_sqrtf(d) \
  ({ float __res;							      \
    asm ("vsqrtss %1, %0, %0" : "=x" (__res) : "xm" ((float) (d)));	      \
     __res; })
#else
# define __ieee754_sqrt(d) \
  ({ double __res;							      \
    asm ("sqrtsd %1, %0" : "=x" (__res) : "xm" ((double) (d)));		      \
     __res; })
# define __ieee754_sqrtf(d) \
  ({ float __res;							      \
    asm ("sqrtss %1, %0" : "=x" (__res) : "xm" ((float) (d)));		      \
     __res; })
#endif
#define __ieee754_sqrtl(d) \
  ({ long double __res;							      \
    asm ("fsqrt" : "=t" (__res) : "0" ((long double) (d)));		      \
     __res; })

#ifdef __SSE4_1__
# ifndef __rint
#  if defined __AVX__ || defined SSE2AVX
#   define __rint(d) \
  ({ double __res; \
    asm ("vroundsd $4, %1, %0, %0" : "=x" (__res) : "xm" ((double) (d)));      \
     __res; })
#  else
#   define __rint(d) \
  ({ double __res; \
    asm ("roundsd $4, %1, %0" : "=x" (__res) : "xm" ((double) (d)));	      \
     __res; })
#  endif
# endif
# ifndef __rintf
#  if defined __AVX__ || defined SSE2AVX
#   define __rintf(d) \
  ({ float __res; \
    asm ("vroundss $4, %1, %0, %0" : "=x" (__res) : "xm" ((float) (d)));      \
     __res; })
#  else
#   define __rintf(d) \
  ({ float __res; \
    asm ("roundss $4, %1, %0" : "=x" (__res) : "xm" ((float) (d)));	      \
     __res; })
#  endif
# endif

# ifndef __floor
#  if defined __AVX__ || defined SSE2AVX
#   define __floor(d) \
  ({ double __res; \
    asm ("vroundsd $1, %1, %0, %0" : "=x" (__res) : "xm" ((double) (d)));      \
     __res; })
#  else
#   define __floor(d) \
  ({ double __res; \
    asm ("roundsd $1, %1, %0" : "=x" (__res) : "xm" ((double) (d)));	      \
     __res; })
#  endif
# endif
# ifndef __floorf
#  if defined __AVX__ || defined SSE2AVX
#   define __floorf(d) \
  ({ float __res; \
    asm ("vroundss $1, %1, %0, %0" : "=x" (__res) : "xm" ((float) (d)));      \
     __res; })
#  else
#   define __floorf(d) \
  ({ float __res; \
    asm ("roundss $1, %1, %0" : "=x" (__res) : "xm" ((float) (d)));	      \
     __res; })
#  endif
# endif
#endif


/* Specialized variants of the <fenv.h> interfaces which only handle
   either the FPU or the SSE unit.  */
#undef libc_feholdexcept
#define libc_feholdexcept(e) \
  do {									      \
     unsigned int mxcsr;						      \
     asm (STMXCSR " %0" : "=m" (*&mxcsr));				      \
     (e)->__mxcsr = mxcsr;						      \
     mxcsr = (mxcsr | 0x1f80) & ~0x3f;					      \
     asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr));			      \
  } while (0)
#undef libc_feholdexceptf
#define libc_feholdexceptf(e) libc_feholdexcept (e)
// #define libc_feholdexceptl(e) (void) feholdexcept (e)

#undef libc_feholdexcept_setround
#define libc_feholdexcept_setround(e, r) \
  do {									      \
     unsigned int mxcsr;						      \
     asm (STMXCSR " %0" : "=m" (*&mxcsr));				      \
     (e)->__mxcsr = mxcsr;						      \
     mxcsr = ((mxcsr | 0x1f80) & ~0x603f) | ((r) << 3);			      \
     asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr));			      \
  } while (0)
#undef libc_feholdexcept_setroundf
#define libc_feholdexcept_setroundf(e, r) libc_feholdexcept_setround (e, r)
// #define libc_feholdexcept_setroundl(e, r) ...

#undef libc_fetestexcept
#define libc_fetestexcept(e) \
  ({ unsigned int mxcsr;						      \
     asm volatile (STMXCSR " %0" : "=m" (*&mxcsr));			      \
     mxcsr & (e) & FE_ALL_EXCEPT; })
#undef libc_fetestexceptf
#define libc_fetestexceptf(e) libc_fetestexcept (e)
// #define libc_fetestexceptl(e) fetestexcept (e)

#undef libc_fesetenv
#define libc_fesetenv(e) \
  asm volatile (LDMXCSR " %0" : : "m" ((e)->__mxcsr))
#undef libc_fesetenvf
#define libc_fesetenvf(e) libc_fesetenv (e)
// #define libc_fesetenvl(e) (void) fesetenv (e)

#undef libc_feupdateenv
#define libc_feupdateenv(e) \
  do {									      \
    unsigned int mxcsr;							      \
    asm volatile (STMXCSR " %0" : "=m" (*&mxcsr));			      \
    asm volatile (LDMXCSR " %0" : : "m" ((e)->__mxcsr));		      \
    __feraiseexcept (mxcsr & FE_ALL_EXCEPT);				      \
  } while (0)
#undef libc_feupdateenvf
#define libc_feupdateenvf(e) libc_feupdateenv (e)
// #define libc_feupdateenvl(e) (void) feupdateenv (e)