about summary refs log tree commit diff
path: root/sysdeps/x86_64/fpu/e_expl.S
blob: 338527b3de26cf7cdb4378f320954045c440a102 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
/*
 * Written by J.T. Conklin <jtc@netbsd.org>.
 * Public domain.
 *
 * Adapted for `long double' by Ulrich Drepper <drepper@cygnus.com>.
 */

/*
 * The 8087 method for the exponential function is to calculate
 *   exp(x) = 2^(x log2(e))
 * after separating integer and fractional parts
 *   x log2(e) = i + f, |f| <= .5
 * 2^i is immediate but f needs to be precise for long double accuracy.
 * Suppress range reduction error in computing f by the following.
 * Separate x into integer and fractional parts
 *   x = xi + xf, |xf| <= .5
 * Separate log2(e) into the sum of an exact number c0 and small part c1.
 *   c0 + c1 = log2(e) to extra precision
 * Then
 *   f = (c0 xi - i) + c0 xf + c1 x
 * where c0 xi is exact and so also is (c0 xi - i).
 * -- moshier@na-net.ornl.gov
 */

#include <machine/asm.h>

#ifdef USE_AS_EXP10L
# define IEEE754_EXPL __ieee754_exp10l
# define EXPL_FINITE __exp10l_finite
# define FLDLOG fldl2t
#elif defined USE_AS_EXPM1L
# define IEEE754_EXPL __expm1l
# undef EXPL_FINITE
# define FLDLOG fldl2e
#else
# define IEEE754_EXPL __ieee754_expl
# define EXPL_FINITE __expl_finite
# define FLDLOG fldl2e
#endif

	.section .rodata.cst16,"aM",@progbits,16

	.p2align 4
#ifdef USE_AS_EXP10L
	.type c0,@object
c0:	.byte 0, 0, 0, 0, 0, 0, 0x9a, 0xd4, 0x00, 0x40
	.byte 0, 0, 0, 0, 0, 0
	ASM_SIZE_DIRECTIVE(c0)
	.type c1,@object
c1:	.byte 0x58, 0x92, 0xfc, 0x15, 0x37, 0x9a, 0x97, 0xf0, 0xef, 0x3f
	.byte 0, 0, 0, 0, 0, 0
	ASM_SIZE_DIRECTIVE(c1)
#else
	.type c0,@object
c0:	.byte 0, 0, 0, 0, 0, 0, 0xaa, 0xb8, 0xff, 0x3f
	.byte 0, 0, 0, 0, 0, 0
	ASM_SIZE_DIRECTIVE(c0)
	.type c1,@object
c1:	.byte 0x20, 0xfa, 0xee, 0xc2, 0x5f, 0x70, 0xa5, 0xec, 0xed, 0x3f
	.byte 0, 0, 0, 0, 0, 0
	ASM_SIZE_DIRECTIVE(c1)
#endif
#ifndef USE_AS_EXPM1L
	.type csat,@object
csat:	.byte 0, 0, 0, 0, 0, 0, 0, 0x80, 0x0e, 0x40
	.byte 0, 0, 0, 0, 0, 0
	ASM_SIZE_DIRECTIVE(csat)
#endif

#ifdef PIC
# define MO(op) op##(%rip)
#else
# define MO(op) op
#endif

	.text
ENTRY(IEEE754_EXPL)
#ifdef USE_AS_EXPM1L
	movzwl	8+8(%rsp), %eax
	xorb	$0x80, %ah	// invert sign bit (now 1 is "positive")
	cmpl	$0xc006, %eax	// is num positive and exp >= 6 (number is >= 128.0)?
	jae	HIDDEN_JUMPTARGET (__expl) // (if num is denormal, it is at least >= 64.0)
#endif
	fldt	8(%rsp)
/* I added the following ugly construct because expl(+-Inf) resulted
   in NaN.  The ugliness results from the bright minds at Intel.
   For the i686 the code can be written better.
   -- drepper@cygnus.com.  */
	fxam			/* Is NaN or +-Inf?  */
#ifdef USE_AS_EXPM1L
	xorb	$0x80, %ah
	cmpl	$0xc006, %eax
	fstsw	%ax
	movb	$0x45, %dh
	jb	4f

	/* Below -64.0 (may be -NaN or -Inf). */
	andb	%ah, %dh
	cmpb	$0x01, %dh
	je	2f		/* Is +-NaN, jump.  */
	jmp	1f		/* -large, possibly -Inf.  */

4:	/* In range -64.0 to 64.0 (may be +-0 but not NaN or +-Inf).  */
	/* Test for +-0 as argument.  */
	andb	%ah, %dh
	cmpb	$0x40, %dh
	je	2f
#else
	movzwl	8+8(%rsp), %eax
	andl	$0x7fff, %eax
	cmpl	$0x400d, %eax
	jg	5f
	cmpl	$0x3fbc, %eax
	jge	3f
	/* Argument's exponent below -67, result rounds to 1.  */
	fld1
	faddp
	jmp	2f
5:	/* Overflow, underflow or infinity or NaN as argument.  */
	fstsw	%ax
	movb	$0x45, %dh
	andb	%ah, %dh
	cmpb	$0x05, %dh
	je	1f		/* Is +-Inf, jump.    */
	cmpb	$0x01, %dh
	je	2f		/* Is +-NaN, jump.    */
	/* Overflow or underflow; saturate.  */
	fstp	%st
	fldt	MO(csat)
	andb	$2, %ah
	jz	3f
	fchs
#endif
3:	FLDLOG			/* 1  log2(base)      */
	fmul	%st(1), %st	/* 1  x log2(base)    */
	/* Set round-to-nearest temporarily.  */
	fstcw	-4(%rsp)
	movl	$0xf3ff, %edx
	andl	-4(%rsp), %edx
	movl	%edx, -8(%rsp)
	fldcw	-8(%rsp)
	frndint			/* 1  i               */
	fld	%st(1)		/* 2  x               */
	frndint			/* 2  xi              */
	fldcw	-4(%rsp)
	fld	%st(1)		/* 3  i               */
	fldt	MO(c0)		/* 4  c0              */
	fld	%st(2)		/* 5  xi              */
	fmul	%st(1), %st	/* 5  c0 xi           */
	fsubp	%st, %st(2)	/* 4  f = c0 xi  - i  */
	fld	%st(4)		/* 5  x               */
	fsub	%st(3), %st	/* 5  xf = x - xi     */
	fmulp	%st, %st(1)	/* 4  c0 xf           */
	faddp	%st, %st(1)	/* 3  f = f + c0 xf   */
	fldt	MO(c1)		/* 4                  */
	fmul	%st(4), %st	/* 4  c1 * x          */
	faddp	%st, %st(1)	/* 3  f = f + c1 * x  */
	f2xm1			/* 3 2^(fract(x * log2(base))) - 1 */
#ifdef USE_AS_EXPM1L
	fstp	%st(1)		/* 2                  */
	fscale			/* 2 scale factor is st(1); base^x - 2^i */
	fxch			/* 2 i                */
	fld1			/* 3 1.0              */
	fscale			/* 3 2^i              */
	fld1			/* 4 1.0              */
	fsubrp	%st, %st(1)	/* 3 2^i - 1.0        */
	fstp	%st(1)		/* 2                  */
	faddp	%st, %st(1)	/* 1 base^x - 1.0     */
#else
	fld1			/* 4 1.0              */
	faddp			/* 3 2^(fract(x * log2(base))) */
	fstp	%st(1)		/* 2  */
	fscale			/* 2 scale factor is st(1); base^x */
	fstp	%st(1)		/* 1  */
#endif
	fstp	%st(1)		/* 0  */
	jmp	2f
1:
#ifdef USE_AS_EXPM1L
	/* For expm1l, only negative sign gets here.  */
	fstp	%st
	fld1
	fchs
#else
	testl	$0x200, %eax	/* Test sign.  */
	jz	2f		/* If positive, jump.  */
	fstp	%st
	fldz			/* Set result to 0.  */
#endif
2:	ret
END(IEEE754_EXPL)
#ifdef USE_AS_EXPM1L
libm_hidden_def (__expm1l)
weak_alias (__expm1l, expm1l)
#else
strong_alias (IEEE754_EXPL, EXPL_FINITE)
#endif