From 1bead169c32a3a688de863709b863207b7aafddd Mon Sep 17 00:00:00 2001
From: Joseph Myers <joseph@codesourcery.com>
Date: Wed, 28 Nov 2012 13:40:54 +0000
Subject: Fix powl inaccuracy for x86_64 and x86 (bug 13881).

---
 sysdeps/x86_64/fpu/e_powl.S       | 52 +++++++++++++++++----------------------
 sysdeps/x86_64/fpu/libm-test-ulps |  4 +++
 2 files changed, 26 insertions(+), 30 deletions(-)

(limited to 'sysdeps/x86_64')
diff --git a/sysdeps/x86_64/fpu/e_powl.S b/sysdeps/x86_64/fpu/e_powl.S
index 1b3718522d..ff96cec68a 100644
--- a/sysdeps/x86_64/fpu/e_powl.S
+++ b/sysdeps/x86_64/fpu/e_powl.S
@@ -26,9 +26,9 @@
 	.type one,@object
 one:	.double 1.0
 	ASM_SIZE_DIRECTIVE(one)
-	.type limit,@object
-limit:	.double 0.29
-	ASM_SIZE_DIRECTIVE(limit)
+	.type p3,@object
+p3:	.byte 0, 0, 0, 0, 0, 0, 0x20, 0x40
+	ASM_SIZE_DIRECTIVE(p3)
 	.type p63,@object
 p63:	.byte 0, 0, 0, 0, 0, 0, 0xe0, 0x43
 	ASM_SIZE_DIRECTIVE(p63)
@@ -131,7 +131,15 @@ ENTRY(__ieee754_powl)
 	fchs			// -0x1p-79 : x
 	jmp	3f
 
-9:	/* OK, we have an integer value for y.  */
+9:	/* OK, we have an integer value for y.  Unless very small
+	   (we use < 8), use the algorithm for real exponent to avoid
+	   accumulation of errors.  */
+	fldl	MO(p3)		// 8 : y : x
+	fld	%st(1)		// y : 8 : y : x
+	fabs			// |y| : 8 : y : x
+	fcomip	%st(1), %st	// 8 : y : x
+	fstp	%st(0)		// y : x
+	jnc	2f
 	mov	-8(%rsp),%eax
 	mov	-4(%rsp),%edx
 	orl	$0, %edx
@@ -167,7 +175,7 @@ ENTRY(__ieee754_powl)
 	ret
 
 	.align ALIGNARG(4)
-2:	// y is a large integer (absolute value at least 1L<<63), but
+2:	// y is a large integer (absolute value at least 8), but
 	// may be odd unless at least 1L<<64.  So it may be necessary
 	// to adjust the sign of a negative result afterwards.
 	fxch			// x : y
@@ -190,31 +198,15 @@ ENTRY(__ieee754_powl)
 	fchs			// -(1L<<78) : |x|
 	.align ALIGNARG(4)
 3:	/* y is a real number.  */
-	fxch			// x : y
-	fldl	MO(one)		// 1.0 : x : y
-	fldl	MO(limit)	// 0.29 : 1.0 : x : y
-	fld	%st(2)		// x : 0.29 : 1.0 : x : y
-	fsub	%st(2)		// x-1 : 0.29 : 1.0 : x : y
-	fabs			// |x-1| : 0.29 : 1.0 : x : y
-	fucompp			// 1.0 : x : y
-	fnstsw
-	fxch			// x : 1.0 : y
-	test	$0x4500,%eax
-	jz	7f
-	fsub	%st(1)		// x-1 : 1.0 : y
-	fyl2xp1			// log2(x) : y
-	jmp	8f
-
-7:	fyl2x			// log2(x) : y
-8:	fmul	%st(1)		// y*log2(x) : y
-	fst	%st(1)		// y*log2(x) : y*log2(x)
-	frndint			// int(y*log2(x)) : y*log2(x)
-	fsubr	%st, %st(1)	// int(y*log2(x)) : fract(y*log2(x))
-	fxch			// fract(y*log2(x)) : int(y*log2(x))
-	f2xm1			// 2^fract(y*log2(x))-1 : int(y*log2(x))
-	faddl	MO(one)		// 2^fract(y*log2(x)) : int(y*log2(x))
-	fscale			// 2^fract(y*log2(x))*2^int(y*log2(x)) : int(y*log2(x))
-	fstp	%st(1)		// 2^fract(y*log2(x))*2^int(y*log2(x))
+	subq	$40, %rsp
+	cfi_adjust_cfa_offset (40)
+	fstpt	16(%rsp)	// x
+	fstpt	(%rsp)		// <empty>
+	mov	%edx, 32(%rsp)
+	call	HIDDEN_JUMPTARGET (__powl_helper)	// <result>
+	mov	32(%rsp), %edx
+	addq	$40, %rsp
+	cfi_adjust_cfa_offset (-40)
 	testb	$2, %dh
 	jz	292f
 	// x is negative.  If y is an odd integer, negate the result.
diff --git a/sysdeps/x86_64/fpu/libm-test-ulps b/sysdeps/x86_64/fpu/libm-test-ulps
index f33dfed3df..9e7a8adac3 100644
--- a/sysdeps/x86_64/fpu/libm-test-ulps
+++ b/sysdeps/x86_64/fpu/libm-test-ulps
@@ -2398,6 +2398,8 @@ ifloat: 1
 Test "pow (0x0.ffffffp0, -0x1p24) == 2.7182819094701610539628664526874952929416":
 float: 1
 ifloat: 1
+ildouble: 1
+ldouble: 1
 Test "pow (0x0.ffffffp0, 0x1p24) == 0.3678794302077803437135155590023422899744":
 float: 1
 ifloat: 1
@@ -3575,6 +3577,8 @@ ifloat: 1
 Function: "pow":
 float: 1
 ifloat: 1
+ildouble: 1
+ldouble: 1
 
 Function: "pow_downward":
 float: 1
-- 
cgit 1.4.1