about summary refs log tree commit diff
path: root/sysdeps/x86/fpu/sfp-machine.h
blob: 5892f4f5fe58d1a01b45232c1bc234db45af7ba4 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
/* Configure soft-fp for building sqrtf128.  Based on sfp-machine.h in
   libgcc, with soft-float and other irrelevant parts removed.  */

/* The type of the result of a floating point comparison.  This must
   match `__libgcc_cmp_return__' in GCC for the target.  */
typedef int __gcc_CMPtype __attribute__ ((mode (__libgcc_cmp_return__)));
#define CMPtype __gcc_CMPtype

#ifdef __x86_64__
# define _FP_W_TYPE_SIZE	64
# define _FP_W_TYPE		unsigned long long
# define _FP_WS_TYPE		signed long long
# define _FP_I_TYPE		long long

typedef int TItype __attribute__ ((mode (TI)));
typedef unsigned int UTItype __attribute__ ((mode (TI)));

# define TI_BITS (__CHAR_BIT__ * (int) sizeof (TItype))

# define _FP_MUL_MEAT_Q(R,X,Y)				\
  _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm)

# define _FP_DIV_MEAT_Q(R,X,Y)   _FP_DIV_MEAT_2_udiv(Q,R,X,Y)

# define _FP_NANFRAC_S		_FP_QNANBIT_S
# define _FP_NANFRAC_D		_FP_QNANBIT_D
# define _FP_NANFRAC_E		_FP_QNANBIT_E, 0
# define _FP_NANFRAC_Q		_FP_QNANBIT_Q, 0

# define FP_EX_SHIFT 7

# define _FP_DECL_EX \
  unsigned int _fcw __attribute__ ((unused)) = FP_RND_NEAREST;

# define FP_RND_NEAREST		0
# define FP_RND_ZERO		0x6000
# define FP_RND_PINF		0x4000
# define FP_RND_MINF		0x2000

# define FP_RND_MASK		0x6000

# define FP_INIT_ROUNDMODE					\
  do {								\
    __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (_fcw));	\
  } while (0)
#else
# define _FP_W_TYPE_SIZE	32
# define _FP_W_TYPE		unsigned int
# define _FP_WS_TYPE		signed int
# define _FP_I_TYPE		int

# define __FP_FRAC_ADD_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0)	\
  __asm__ ("add{l} {%11,%3|%3,%11}\n\t"				\
	   "adc{l} {%9,%2|%2,%9}\n\t"				\
	   "adc{l} {%7,%1|%1,%7}\n\t"				\
	   "adc{l} {%5,%0|%0,%5}"				\
	   : "=r" ((USItype) (r3)),				\
	     "=&r" ((USItype) (r2)),				\
	     "=&r" ((USItype) (r1)),				\
	     "=&r" ((USItype) (r0))				\
	   : "%0" ((USItype) (x3)),				\
	     "g" ((USItype) (y3)),				\
	     "%1" ((USItype) (x2)),				\
	     "g" ((USItype) (y2)),				\
	     "%2" ((USItype) (x1)),				\
	     "g" ((USItype) (y1)),				\
	     "%3" ((USItype) (x0)),				\
	     "g" ((USItype) (y0)))
# define __FP_FRAC_ADD_3(r2,r1,r0,x2,x1,x0,y2,y1,y0)		\
  __asm__ ("add{l} {%8,%2|%2,%8}\n\t"				\
	   "adc{l} {%6,%1|%1,%6}\n\t"				\
	   "adc{l} {%4,%0|%0,%4}"				\
	   : "=r" ((USItype) (r2)),				\
	     "=&r" ((USItype) (r1)),				\
	     "=&r" ((USItype) (r0))				\
	   : "%0" ((USItype) (x2)),				\
	     "g" ((USItype) (y2)),				\
	     "%1" ((USItype) (x1)),				\
	     "g" ((USItype) (y1)),				\
	     "%2" ((USItype) (x0)),				\
	     "g" ((USItype) (y0)))
# define __FP_FRAC_SUB_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0)	\
  __asm__ ("sub{l} {%11,%3|%3,%11}\n\t"				\
	   "sbb{l} {%9,%2|%2,%9}\n\t"				\
	   "sbb{l} {%7,%1|%1,%7}\n\t"				\
	   "sbb{l} {%5,%0|%0,%5}"				\
	   : "=r" ((USItype) (r3)),				\
	     "=&r" ((USItype) (r2)),				\
	     "=&r" ((USItype) (r1)),				\
	     "=&r" ((USItype) (r0))				\
	   : "0" ((USItype) (x3)),				\
	     "g" ((USItype) (y3)),				\
	     "1" ((USItype) (x2)),				\
	     "g" ((USItype) (y2)),				\
	     "2" ((USItype) (x1)),				\
	     "g" ((USItype) (y1)),				\
	     "3" ((USItype) (x0)),				\
	     "g" ((USItype) (y0)))
# define __FP_FRAC_SUB_3(r2,r1,r0,x2,x1,x0,y2,y1,y0)		\
  __asm__ ("sub{l} {%8,%2|%2,%8}\n\t"				\
	   "sbb{l} {%6,%1|%1,%6}\n\t"				\
	   "sbb{l} {%4,%0|%0,%4}"				\
	   : "=r" ((USItype) (r2)),				\
	     "=&r" ((USItype) (r1)),				\
	     "=&r" ((USItype) (r0))				\
	   : "0" ((USItype) (x2)),				\
	     "g" ((USItype) (y2)),				\
	     "1" ((USItype) (x1)),				\
	     "g" ((USItype) (y1)),				\
	     "2" ((USItype) (x0)),				\
	     "g" ((USItype) (y0)))
# define __FP_FRAC_ADDI_4(x3,x2,x1,x0,i)			\
  __asm__ ("add{l} {%4,%3|%3,%4}\n\t"				\
	   "adc{l} {$0,%2|%2,0}\n\t"				\
	   "adc{l} {$0,%1|%1,0}\n\t"				\
	   "adc{l} {$0,%0|%0,0}"				\
	   : "+r" ((USItype) (x3)),				\
	     "+&r" ((USItype) (x2)),				\
	     "+&r" ((USItype) (x1)),				\
	     "+&r" ((USItype) (x0))				\
	   : "g" ((USItype) (i)))


# define _FP_MUL_MEAT_S(R,X,Y)				\
  _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_S,R,X,Y,umul_ppmm)
# define _FP_MUL_MEAT_D(R,X,Y)				\
  _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm)
# define _FP_MUL_MEAT_Q(R,X,Y)				\
  _FP_MUL_MEAT_4_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm)

# define _FP_DIV_MEAT_S(R,X,Y)   _FP_DIV_MEAT_1_loop(S,R,X,Y)
# define _FP_DIV_MEAT_D(R,X,Y)   _FP_DIV_MEAT_2_udiv(D,R,X,Y)
# define _FP_DIV_MEAT_Q(R,X,Y)   _FP_DIV_MEAT_4_udiv(Q,R,X,Y)

# define _FP_NANFRAC_S		_FP_QNANBIT_S
# define _FP_NANFRAC_D		_FP_QNANBIT_D, 0
/* Even if XFmode is 12byte,  we have to pad it to
   16byte since soft-fp emulation is done in 16byte.  */
# define _FP_NANFRAC_E		_FP_QNANBIT_E, 0, 0, 0
# define _FP_NANFRAC_Q		_FP_QNANBIT_Q, 0, 0, 0

# define FP_EX_SHIFT 0

# define _FP_DECL_EX \
  unsigned short _fcw __attribute__ ((unused)) = FP_RND_NEAREST;

# define FP_RND_NEAREST		0
# define FP_RND_ZERO		0xc00
# define FP_RND_PINF		0x800
# define FP_RND_MINF		0x400

# define FP_RND_MASK		0xc00

# define FP_INIT_ROUNDMODE				\
  do {							\
    __asm__ __volatile__ ("fnstcw\t%0" : "=m" (_fcw));	\
  } while (0)
#endif

#define _FP_KEEPNANFRACP	1
#define _FP_QNANNEGATEDP 0

#define _FP_NANSIGN_S		1
#define _FP_NANSIGN_D		1
#define _FP_NANSIGN_E		1
#define _FP_NANSIGN_Q		1

/* Here is something Intel misdesigned: the specs don't define
   the case where we have two NaNs with same mantissas, but
   different sign. Different operations pick up different NaNs.  */
#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP)			\
  do {								\
    if (_FP_FRAC_GT_##wc(X, Y)					\
	|| (_FP_FRAC_EQ_##wc(X,Y) && (OP == '+' || OP == '*')))	\
      {								\
	R##_s = X##_s;						\
	_FP_FRAC_COPY_##wc(R,X);				\
      }								\
    else							\
      {								\
	R##_s = Y##_s;						\
	_FP_FRAC_COPY_##wc(R,Y);				\
      }								\
    R##_c = FP_CLS_NAN;						\
  } while (0)

#define FP_EX_INVALID		0x01
#define FP_EX_DENORM		0x02
#define FP_EX_DIVZERO		0x04
#define FP_EX_OVERFLOW		0x08
#define FP_EX_UNDERFLOW		0x10
#define FP_EX_INEXACT		0x20
#define FP_EX_ALL \
	(FP_EX_INVALID | FP_EX_DENORM | FP_EX_DIVZERO | FP_EX_OVERFLOW \
	 | FP_EX_UNDERFLOW | FP_EX_INEXACT)

void __sfp_handle_exceptions (int);

#define FP_HANDLE_EXCEPTIONS			\
  do {						\
    if (__builtin_expect (_fex, 0))		\
      __sfp_handle_exceptions (_fex);		\
  } while (0);

#define FP_TRAPPING_EXCEPTIONS ((~_fcw >> FP_EX_SHIFT) & FP_EX_ALL)

#define FP_ROUNDMODE		(_fcw & FP_RND_MASK)

#define _FP_TININESS_AFTER_ROUNDING 1