Fix pow overflow in non-default rounding modes (bug 16315).

This patch fixes bug 16315, bad pow handling of overflow/underflow in non-default rounding modes. Tests of pow are duly converted to ALL_RM_TEST to run all tests in all rounding modes. There are two main issues here. First, various implementations compute a negative result by negating a positive result, but this yields inappropriate overflow / underflow values for directed rounding, so either overflow / underflow results need recomputing in the correct sign, or the relevant overflowing / underflowing operation needs to be made to have a result of the correct sign. Second, the dbl-64 implementation sets FE_TONEAREST internally; in the overflow / underflow case, the result needs recomputing in the original rounding mode. Tested x86_64 and x86 and ulps updated accordingly. [BZ #16315] * sysdeps/i386/fpu/e_pow.S (__ieee754_pow): Ensure possibly overflowing or underflowing operations take place with sign of result. * sysdeps/i386/fpu/e_powf.S (__ieee754_powf): Likewise. * sysdeps/i386/fpu/e_powl.S (__ieee754_powl): Likewise. * sysdeps/ieee754/dbl-64/e_pow.c: Include <math.h>. (__ieee754_pow): Recompute overflowing and underflowing results in original rounding mode. * sysdeps/x86/fpu/powl_helper.c: Include <stdbool.h>. (__powl_helper): Allow negative argument X and scale negated value as needed. Avoid passing value outside [-1, 1] to f2xm1. * sysdeps/x86_64/fpu/e_powl.S (__ieee754_powl): Ensure possibly overflowing or underflowing operations take place with sign of result. * sysdeps/x86_64/fpu/multiarch/e_pow.c [HAVE_FMA4_SUPPORT]: Include <math.h>. * math/auto-libm-test-in: Add more tests of pow. * math/auto-libm-test-out: Regenerated. * math/libm-test.inc (pow_test): Use ALL_RM_TEST. (pow_tonearest_test_data): Remove. (pow_test_tonearest): Likewise. (pow_towardzero_test_data): Likewise. (pow_test_towardzero): Likewise. (pow_downward_test_data): Likewise. (pow_test_downward): Likewise. (pow_upward_test_data): Likewise. (pow_test_upward): Likewise. (main): Don't call removed functions. * sysdeps/i386/fpu/libm-test-ulps: Update. * sysdeps/x86_64/fpu/libm-test-ulps: Likewise.
author: Joseph Myers <joseph@codesourcery.com> 2014-06-23 20:12:33 +0000
committer: Joseph Myers <joseph@codesourcery.com> 2014-06-23 20:12:33 +0000
commit: 4da6db51880289f0bf41b39e05cf9bb1c4769c47 (patch)
tree: 9f57512f4561f512366c93cf7430d5b75d4852fd /sysdeps/x86_64/fpu
parent: 5686b236cccdc8c72788b7996537ed92ac3a3c8c (diff)
download: glibc-4da6db51880289f0bf41b39e05cf9bb1c4769c47.tar.gz
glibc-4da6db51880289f0bf41b39e05cf9bb1c4769c47.tar.xz
glibc-4da6db51880289f0bf41b39e05cf9bb1c4769c47.zip
3 files changed, 34 insertions, 52 deletions
diff --git a/sysdeps/x86_64/fpu/e_powl.S b/sysdeps/x86_64/fpu/e_powl.S
index 47f129f34d..a960a1f8b4 100644
--- a/sysdeps/x86_64/fpu/e_powl.S
+++ b/sysdeps/x86_64/fpu/e_powl.S
@@ -141,7 +141,7 @@ ENTRY(__ieee754_powl)
 	fabs			// |y| : 8 : y : x
 	fcomip	%st(1), %st	// 8 : y : x
 	fstp	%st(0)		// y : x
-	jnc	2f
+	jnc	3f
 	mov	-8(%rsp),%eax
 	mov	-4(%rsp),%edx
 	orl	$0, %edx
@@ -154,12 +154,22 @@ ENTRY(__ieee754_powl)
 4:	fldl	MO(one)		// 1 : x
 	fxch
 
+	/* If y is even, take the absolute value of x.  Otherwise,
+	   ensure all intermediate values that might overflow have the
+	   sign of x.  */
+	testb	$1, %al
+	jnz	6f
+	fabs
+
 6:	shrdl	$1, %edx, %eax
 	jnc	5f
 	fxch
+	fabs
 	fmul	%st(1)		// x : ST*x
 	fxch
-5:	fmul	%st(0), %st	// x*x : ST*x
+5:	fld	%st		// x : x : ST*x
+	fabs			// |x| : x : ST*x
+	fmulp			// |x|*x : ST*x
 	shrl	$1, %edx
 	movl	%eax, %ecx
 	orl	%edx, %ecx
@@ -177,71 +187,32 @@ ENTRY(__ieee754_powl)
 	ret
 
 	.align ALIGNARG(4)
-2:	// y is a large integer (absolute value at least 8), but
-	// may be odd unless at least 1L<<64.  So it may be necessary
-	// to adjust the sign of a negative result afterwards.
-	fxch			// x : y
-	fabs			// |x| : y
-	fxch			// y : |x|
+2:	// y is a large integer (absolute value at least 1L<<63).
 	// If y has absolute value at least 1L<<78, then any finite
 	// nonzero x will result in 0 (underflow), 1 or infinity (overflow).
 	// Saturate y to those bounds to avoid overflow in the calculation
 	// of y*log2(x).
-	fldl	MO(p78)		// 1L<<78 : y : |x|
-	fld	%st(1)		// y : 1L<<78 : y : |x|
-	fabs			// |y| : 1L<<78 : y : |x|
-	fcomip	%st(1), %st	// 1L<<78 : y : |x|
-	fstp	%st(0)		// y : |x|
+	fldl	MO(p78)		// 1L<<78 : y : x
+	fld	%st(1)		// y : 1L<<78 : y : x
+	fabs			// |y| : 1L<<78 : y : x
+	fcomip	%st(1), %st	// 1L<<78 : y : x
+	fstp	%st(0)		// y : x
 	jc	3f
 	fstp	%st(0)		// pop y
-	fldl	MO(p78)		// 1L<<78 : |x|
+	fldl	MO(p78)		// 1L<<78 : x
 	testb	$2, %dl
 	jz	3f		// y > 0
-	fchs			// -(1L<<78) : |x|
+	fchs			// -(1L<<78) : x
 	.align ALIGNARG(4)
 3:	/* y is a real number.  */
 	subq	$40, %rsp
 	cfi_adjust_cfa_offset (40)
 	fstpt	16(%rsp)	// x
 	fstpt	(%rsp)		// <empty>
-	mov	%edx, 32(%rsp)
 	call	HIDDEN_JUMPTARGET (__powl_helper)	// <result>
-	mov	32(%rsp), %edx
 	addq	$40, %rsp
 	cfi_adjust_cfa_offset (-40)
-	testb	$2, %dh
-	jz	292f
-	// x is negative.  If y is an odd integer, negate the result.
-	fldt	24(%rsp)	// y : abs(result)
-	fldl	MO(p64)		// 1L<<64 : y : abs(result)
-	fld	%st(1)		// y : 1L<<64 : y : abs(result)
-	fabs			// |y| : 1L<<64 : y : abs(result)
-	fcomip	%st(1), %st	// 1L<<64 : y : abs(result)
-	fstp	%st(0)		// y : abs(result)
-	jnc	291f
-	fldl	MO(p63)		// p63 : y : abs(result)
-	fxch			// y : p63 : abs(result)
-	fprem			// y%p63 : p63 : abs(result)
-	fstp	%st(1)		// y%p63 : abs(result)
-
-	// We must find out whether y is an odd integer.
-	fld	%st		// y : y : abs(result)
-	fistpll	-8(%rsp)	// y : abs(result)
-	fildll	-8(%rsp)	// int(y) : y : abs(result)
-	fucomip	%st(1),%st	// y : abs(result)
-	ffreep	%st		// abs(result)
-	jne	292f
-
-	// OK, the value is an integer, but is it odd?
-	mov	-8(%rsp), %eax
-	mov	-4(%rsp), %edx
-	andb	$1, %al
-	jz	290f		// jump if not odd
-	// It's an odd integer.
-	fchs
-290:	ret
-291:	fstp	%st(0)		// abs(result)
-292:	ret
+	ret
 
 	// pow(x,�0) = 1
 	.align ALIGNARG(4)
diff --git a/sysdeps/x86_64/fpu/libm-test-ulps b/sysdeps/x86_64/fpu/libm-test-ulps
index 92fef5a134..8edd40b640 100644
--- a/sysdeps/x86_64/fpu/libm-test-ulps
+++ b/sysdeps/x86_64/fpu/libm-test-ulps
@@ -1736,8 +1736,12 @@ ildouble: 1
 ldouble: 1
 
 Function: "pow_downward":
+double: 1
 float: 1
+idouble: 1
 ifloat: 1
+ildouble: 4
+ldouble: 4
 
 Function: "pow_tonearest":
 float: 1
@@ -1746,14 +1750,20 @@ ildouble: 1
 ldouble: 1
 
 Function: "pow_towardzero":
+double: 1
 float: 1
+idouble: 1
 ifloat: 1
+ildouble: 1
+ldouble: 1
 
 Function: "pow_upward":
+double: 1
 float: 1
+idouble: 1
 ifloat: 1
-ildouble: 1
-ldouble: 1
+ildouble: 2
+ldouble: 2
 
 Function: "sin":
 ildouble: 1
diff --git a/sysdeps/x86_64/fpu/multiarch/e_pow.c b/sysdeps/x86_64/fpu/multiarch/e_pow.c
index a740b6c447..433cce0de6 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_pow.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_pow.c
@@ -1,5 +1,6 @@
 #ifdef HAVE_FMA4_SUPPORT
 # include <init-arch.h>
+# include <math.h>
 # include <math_private.h>
 
 extern double __ieee754_pow_sse2 (double, double);
author	Joseph Myers <joseph@codesourcery.com>	2014-06-23 20:12:33 +0000
committer	Joseph Myers <joseph@codesourcery.com>	2014-06-23 20:12:33 +0000
commit	4da6db51880289f0bf41b39e05cf9bb1c4769c47 (patch)
tree	9f57512f4561f512366c93cf7430d5b75d4852fd /sysdeps/x86_64/fpu
parent	5686b236cccdc8c72788b7996537ed92ac3a3c8c (diff)
download	glibc-4da6db51880289f0bf41b39e05cf9bb1c4769c47.tar.gz glibc-4da6db51880289f0bf41b39e05cf9bb1c4769c47.tar.xz glibc-4da6db51880289f0bf41b39e05cf9bb1c4769c47.zip