about summary refs log tree commit diff
diff options
context:
space:
mode:
authorJoseph Myers <joseph@codesourcery.com>2012-10-30 13:54:50 +0000
committerJoseph Myers <joseph@codesourcery.com>2012-10-30 13:54:50 +0000
commit8627a2329c5e6fc09bf9f0f070a21aca41f2b122 (patch)
treef1e7aa19f3fe1a9b3f9e403ced782c6f7d612f66
parent2a27fd6dae3edec949deda9a55928a0e22c8a8ae (diff)
downloadglibc-8627a2329c5e6fc09bf9f0f070a21aca41f2b122.tar.gz
glibc-8627a2329c5e6fc09bf9f0f070a21aca41f2b122.tar.xz
glibc-8627a2329c5e6fc09bf9f0f070a21aca41f2b122.zip
Fix fma missing underflows and bad results for some subnormal results (bugs 14152, 14783).
-rw-r--r--ChangeLog13
-rw-r--r--NEWS14
-rw-r--r--math/libm-test.inc65
-rw-r--r--sysdeps/ieee754/dbl-64/s_fma.c22
-rw-r--r--sysdeps/ieee754/ldbl-128/s_fmal.c22
-rw-r--r--sysdeps/ieee754/ldbl-96/s_fmal.c22
6 files changed, 105 insertions, 53 deletions
diff --git a/ChangeLog b/ChangeLog
index 067337a44b..912167b7bf 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,18 @@
 2012-10-30  Joseph Myers  <joseph@codesourcery.com>
 
+	[BZ #14152]
+	[BZ #14783]
+	* sysdeps/ieee754/dbl-64/s_fma.c (__fma): Extract low bits of
+	result and shift together with sticky bit instead of replicating
+	round-to-nearest rounding.
+	* sysdeps/ieee754/ldbl-128/s_fmal.c (__fmal): Likewise.
+	* sysdeps/ieee754/ldbl-96/s_fmal.c (__fmal): Likewise.
+	* math/libm-test.inc (fma_test): Add more tests.  Do not permit
+	missing underflow exceptions.
+	(fma_test_towardzero): Add more tests.
+	(fma_test_downward): Likewise.
+	(fma_test_upward): Likewise.
+
 	[BZ #14047]
 	* sysdeps/generic/tininess.h: New file.
 	* sysdeps/i386/tininess.h: Likewise.
diff --git a/NEWS b/NEWS
index e4e672680a..b54365f1e7 100644
--- a/NEWS
+++ b/NEWS
@@ -11,13 +11,13 @@ Version 2.17
 
   1349, 3479, 5044, 5298, 5400, 6530, 6778, 6808, 9685, 9914, 10014, 10038,
   10631, 11438, 11607, 12140, 13412, 13542, 13601, 13629, 13679, 13696,
-  13717, 13741, 13939, 13966, 14042, 14047, 14090, 14150, 14151, 14154,
-  14157, 14166, 14173, 14195, 14237, 14251, 14252, 14283, 14298, 14303,
-  14307, 14328, 14331, 14336, 14337, 14347, 14349, 14376, 14417, 14459,
-  14476, 14477, 14505, 14510, 14516, 14518, 14519, 14530, 14532, 14538,
-  14543, 14544, 14545, 14557, 14562, 14568, 14576, 14579, 14583, 14587,
-  14602, 14621, 14638, 14645, 14648, 14652, 14660, 14661, 14683, 14694,
-  14716, 14743, 14767.
+  13717, 13741, 13939, 13966, 14042, 14047, 14090, 14150, 14151, 14152,
+  14154, 14157, 14166, 14173, 14195, 14237, 14251, 14252, 14283, 14298,
+  14303, 14307, 14328, 14331, 14336, 14337, 14347, 14349, 14376, 14417,
+  14459, 14476, 14477, 14505, 14510, 14516, 14518, 14519, 14530, 14532,
+  14538, 14543, 14544, 14545, 14557, 14562, 14568, 14576, 14579, 14583,
+  14587, 14602, 14621, 14638, 14645, 14648, 14652, 14660, 14661, 14683,
+  14694, 14716, 14743, 14767, 14783.
 
 * Support for STT_GNU_IFUNC symbols added for s390 and s390x.
   Optimized versions of memcpy, memset, and memcmp added for System z10 and
diff --git a/math/libm-test.inc b/math/libm-test.inc
index e828fc2ec9..1e067fea64 100644
--- a/math/libm-test.inc
+++ b/math/libm-test.inc
@@ -4616,6 +4616,8 @@ fma_test (void)
   TEST_fff_f (fma, 0x1.fffffep+127, 0x1.001p+0, -0x1.fffffep+127, 0x1.fffffep+115);
   TEST_fff_f (fma, -0x1.fffffep+127, 0x1.fffffep+0, 0x1.fffffep+127, -0x1.fffffap+127);
   TEST_fff_f (fma, 0x1.fffffep+127, 2.0, -0x1.fffffep+127, 0x1.fffffep+127);
+  TEST_fff_f (fma, 0x1.4p-126, 0x1.000004p-1, 0x1p-128, 0x1.c00004p-127, UNDERFLOW_EXCEPTION);
+  TEST_fff_f (fma, -0x1.4p-126, 0x1.000004p-1, -0x1p-128, -0x1.c00004p-127, UNDERFLOW_EXCEPTION);
 #endif
 #if defined (TEST_DOUBLE) && DBL_MANT_DIG == 53
   TEST_fff_f (fma, 0x1.7fp+13, 0x1.0000000000001p+0, 0x1.ffep-48, 0x1.7f00000000001p+13);
@@ -4635,10 +4637,11 @@ fma_test (void)
   TEST_fff_f (fma, 0x1.4000004p-967, 0x1p-106, 0x0.000001p-1022, 0x0.0000010000003p-1022, UNDERFLOW_EXCEPTION);
   TEST_fff_f (fma, 0x1.4p-967, -0x1p-106, -0x0.000001p-1022, -0x0.0000010000002p-1022, UNDERFLOW_EXCEPTION);
   TEST_fff_f (fma, -0x1.19cab66d73e17p-959, 0x1.c7108a8c5ff51p-107, -0x0.80b0ad65d9b64p-1022, -0x0.80b0ad65d9d59p-1022, UNDERFLOW_EXCEPTION);
-  /* Sometimes the FE_UNDERFLOW is not set, so be prepared.  See Bug 14152.  */
-  TEST_fff_f (fma, -0x1.d2eaed6e8e9d3p-979, -0x1.4e066c62ac9ddp-63, -0x0.9245e6b003454p-1022, -0x0.9245c09c5fb5dp-1022, UNDERFLOW_EXCEPTION_OK);
+  TEST_fff_f (fma, -0x1.d2eaed6e8e9d3p-979, -0x1.4e066c62ac9ddp-63, -0x0.9245e6b003454p-1022, -0x0.9245c09c5fb5dp-1022, UNDERFLOW_EXCEPTION);
   TEST_fff_f (fma, 0x1.153d650bb9f06p-907, 0x1.2d01230d48407p-125, -0x0.b278d5acfc3cp-1022, -0x0.b22757123bbe9p-1022, UNDERFLOW_EXCEPTION);
   TEST_fff_f (fma, -0x1.fffffffffffffp-711, 0x1.fffffffffffffp-275, 0x1.fffffe00007ffp-983, 0x1.7ffffe00007ffp-983);
+  TEST_fff_f (fma, 0x1.4p-1022, 0x1.0000000000002p-1, 0x1p-1024, 0x1.c000000000002p-1023, UNDERFLOW_EXCEPTION);
+  TEST_fff_f (fma, -0x1.4p-1022, 0x1.0000000000002p-1, -0x1p-1024, -0x1.c000000000002p-1023, UNDERFLOW_EXCEPTION);
 #endif
 #if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 64
   TEST_fff_f (fma, -0x8.03fcp+3696L, 0xf.fffffffffffffffp-6140L, 0x8.3ffffffffffffffp-2450L, -0x8.01ecp-2440L);
@@ -4646,8 +4649,9 @@ fma_test (void)
   TEST_fff_f (fma, 0xc.7fc000003ffffffp-1194L, 0x8.1e0003fffffffffp+15327L, -0x8.fffep+14072L, 0xc.ae9f164020effffp+14136L);
   TEST_fff_f (fma, -0x8.0001fc000000003p+1798L, 0xcp-2230L, 0x8.f7e000000000007p-468L, -0xc.0002f9ffee10404p-429L);
   TEST_fff_f (fma, 0xc.0000000000007ffp+10130L, -0x8.000000000000001p+4430L, 0xc.07000000001ffffp+14513L, -0xb.fffffffffffd7e4p+14563L);
-  /* Bug 14152: underflow exception may be missing.  */
-  TEST_fff_f (fma, 0xb.ffffp-4777L, 0x8.000000fffffffffp-11612L, -0x0.3800fff8p-16385L, 0x5.c7fe80c7ffeffffp-16385L, UNDERFLOW_EXCEPTION_OK);
+  TEST_fff_f (fma, 0xb.ffffp-4777L, 0x8.000000fffffffffp-11612L, -0x0.3800fff8p-16385L, 0x5.c7fe80c7ffeffffp-16385L, UNDERFLOW_EXCEPTION);
+  TEST_fff_f (fma, 0x1.4p-16382L, 0x1.0000000000000004p-1L, 0x1p-16384L, 0x1.c000000000000004p-16383L, UNDERFLOW_EXCEPTION);
+  TEST_fff_f (fma, -0x1.4p-16382L, 0x1.0000000000000004p-1L, -0x1p-16384L, -0x1.c000000000000004p-16383L, UNDERFLOW_EXCEPTION);
 #endif
 #if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 113
   TEST_fff_f (fma, 0x1.bb2de33e02ccbbfa6e245a7c1f71p-2584L, -0x1.6b500daf0580d987f1bc0cadfcddp-13777L, 0x1.613cd91d9fed34b33820e5ab9d8dp-16378L, -0x1.3a79fb50eb9ce887cffa0f09bd9fp-16360L);
@@ -4663,6 +4667,8 @@ fma_test (void)
   TEST_fff_f (fma, 0x1.00000000000007ffffffffffffffp-9045L, -0x1.ffffffffffff80000001ffffffffp+4773L, -0x1.f8p-4316L, -0x1.00000000000f88000000fffffdffp-4271L);
   TEST_fff_f (fma, 0x1.4e922764c90701d4a2f21d01893dp-8683L, -0x1.955a12e2d7c9447c27fa022fc865p+212L, -0x1.e9634462eaef96528b90b6944578p-8521L, -0x1.08e1783184a371943d3598e10865p-8470L);
   TEST_fff_f (fma, 0x1.801181509c03bdbef10d6165588cp-15131L, 0x1.ad86f8e57d3d40bfa8007780af63p-368L, -0x1.6e9df0dab1c9f1d7a6043c390741p-15507L, 0x1.417c9b2b15e2ad57dc9e0e920844p-15498L);
+  TEST_fff_f (fma, 0x1.4p-16382L, 0x1.0000000000000000000000000002p-1L, 0x1p-16384L, 0x1.c000000000000000000000000002p-16383L, UNDERFLOW_EXCEPTION);
+  TEST_fff_f (fma, -0x1.4p-16382L, 0x1.0000000000000000000000000002p-1L, -0x1p-16384L, -0x1.c000000000000000000000000002p-16383L, UNDERFLOW_EXCEPTION);
 #endif
 
   END (fma);
@@ -4717,6 +4723,23 @@ fma_test_towardzero (void)
       TEST_fff_f (fma, -min_value, min_value, minus_zero, minus_zero, UNDERFLOW_EXCEPTION);
       TEST_fff_f (fma, -min_value, -min_value, plus_zero, plus_zero, UNDERFLOW_EXCEPTION);
       TEST_fff_f (fma, -min_value, -min_value, minus_zero, plus_zero, UNDERFLOW_EXCEPTION);
+
+#if defined (TEST_FLOAT) && FLT_MANT_DIG == 24
+      TEST_fff_f (fma, 0x1.4p-126, 0x1.000004p-1, 0x1p-128, 0x1.c00004p-127, UNDERFLOW_EXCEPTION);
+      TEST_fff_f (fma, -0x1.4p-126, 0x1.000004p-1, -0x1p-128, -0x1.c00004p-127, UNDERFLOW_EXCEPTION);
+#endif
+#if defined (TEST_DOUBLE) && DBL_MANT_DIG == 53
+      TEST_fff_f (fma, 0x1.4p-1022, 0x1.0000000000002p-1, 0x1p-1024, 0x1.c000000000002p-1023, UNDERFLOW_EXCEPTION);
+      TEST_fff_f (fma, -0x1.4p-1022, 0x1.0000000000002p-1, -0x1p-1024, -0x1.c000000000002p-1023, UNDERFLOW_EXCEPTION);
+#endif
+#if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 64
+      TEST_fff_f (fma, 0x1.4p-16382L, 0x1.0000000000000004p-1L, 0x1p-16384L, 0x1.c000000000000004p-16383L, UNDERFLOW_EXCEPTION);
+      TEST_fff_f (fma, -0x1.4p-16382L, 0x1.0000000000000004p-1L, -0x1p-16384L, -0x1.c000000000000004p-16383L, UNDERFLOW_EXCEPTION);
+#endif
+#if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 113
+      TEST_fff_f (fma, 0x1.4p-16382L, 0x1.0000000000000000000000000002p-1L, 0x1p-16384L, 0x1.c000000000000000000000000002p-16383L, UNDERFLOW_EXCEPTION);
+      TEST_fff_f (fma, -0x1.4p-16382L, 0x1.0000000000000000000000000002p-1L, -0x1p-16384L, -0x1.c000000000000000000000000002p-16383L, UNDERFLOW_EXCEPTION);
+#endif
     }
 
   fesetround (save_round_mode);
@@ -4773,6 +4796,23 @@ fma_test_downward (void)
       TEST_fff_f (fma, -min_value, min_value, minus_zero, -min_subnorm_value, UNDERFLOW_EXCEPTION);
       TEST_fff_f (fma, -min_value, -min_value, plus_zero, plus_zero, UNDERFLOW_EXCEPTION);
       TEST_fff_f (fma, -min_value, -min_value, minus_zero, plus_zero, UNDERFLOW_EXCEPTION);
+
+#if defined (TEST_FLOAT) && FLT_MANT_DIG == 24
+      TEST_fff_f (fma, 0x1.4p-126, 0x1.000004p-1, 0x1p-128, 0x1.c00004p-127, UNDERFLOW_EXCEPTION);
+      TEST_fff_f (fma, -0x1.4p-126, 0x1.000004p-1, -0x1p-128, -0x1.c00008p-127, UNDERFLOW_EXCEPTION);
+#endif
+#if defined (TEST_DOUBLE) && DBL_MANT_DIG == 53
+      TEST_fff_f (fma, 0x1.4p-1022, 0x1.0000000000002p-1, 0x1p-1024, 0x1.c000000000002p-1023, UNDERFLOW_EXCEPTION);
+      TEST_fff_f (fma, -0x1.4p-1022, 0x1.0000000000002p-1, -0x1p-1024, -0x1.c000000000004p-1023, UNDERFLOW_EXCEPTION);
+#endif
+#if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 64
+      TEST_fff_f (fma, 0x1.4p-16382L, 0x1.0000000000000004p-1L, 0x1p-16384L, 0x1.c000000000000004p-16383L, UNDERFLOW_EXCEPTION);
+      TEST_fff_f (fma, -0x1.4p-16382L, 0x1.0000000000000004p-1L, -0x1p-16384L, -0x1.c000000000000008p-16383L, UNDERFLOW_EXCEPTION);
+#endif
+#if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 113
+      TEST_fff_f (fma, 0x1.4p-16382L, 0x1.0000000000000000000000000002p-1L, 0x1p-16384L, 0x1.c000000000000000000000000002p-16383L, UNDERFLOW_EXCEPTION);
+      TEST_fff_f (fma, -0x1.4p-16382L, 0x1.0000000000000000000000000002p-1L, -0x1p-16384L, -0x1.c000000000000000000000000004p-16383L, UNDERFLOW_EXCEPTION);
+#endif
     }
 
   fesetround (save_round_mode);
@@ -4829,6 +4869,23 @@ fma_test_upward (void)
       TEST_fff_f (fma, -min_value, min_value, minus_zero, minus_zero, UNDERFLOW_EXCEPTION);
       TEST_fff_f (fma, -min_value, -min_value, plus_zero, min_subnorm_value, UNDERFLOW_EXCEPTION);
       TEST_fff_f (fma, -min_value, -min_value, minus_zero, min_subnorm_value, UNDERFLOW_EXCEPTION);
+
+#if defined (TEST_FLOAT) && FLT_MANT_DIG == 24
+      TEST_fff_f (fma, 0x1.4p-126, 0x1.000004p-1, 0x1p-128, 0x1.c00008p-127, UNDERFLOW_EXCEPTION);
+      TEST_fff_f (fma, -0x1.4p-126, 0x1.000004p-1, -0x1p-128, -0x1.c00004p-127, UNDERFLOW_EXCEPTION);
+#endif
+#if defined (TEST_DOUBLE) && DBL_MANT_DIG == 53
+      TEST_fff_f (fma, 0x1.4p-1022, 0x1.0000000000002p-1, 0x1p-1024, 0x1.c000000000004p-1023, UNDERFLOW_EXCEPTION);
+      TEST_fff_f (fma, -0x1.4p-1022, 0x1.0000000000002p-1, -0x1p-1024, -0x1.c000000000002p-1023, UNDERFLOW_EXCEPTION);
+#endif
+#if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 64
+      TEST_fff_f (fma, 0x1.4p-16382L, 0x1.0000000000000004p-1L, 0x1p-16384L, 0x1.c000000000000008p-16383L, UNDERFLOW_EXCEPTION);
+      TEST_fff_f (fma, -0x1.4p-16382L, 0x1.0000000000000004p-1L, -0x1p-16384L, -0x1.c000000000000004p-16383L, UNDERFLOW_EXCEPTION);
+#endif
+#if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 113
+      TEST_fff_f (fma, 0x1.4p-16382L, 0x1.0000000000000000000000000002p-1L, 0x1p-16384L, 0x1.c000000000000000000000000004p-16383L, UNDERFLOW_EXCEPTION);
+      TEST_fff_f (fma, -0x1.4p-16382L, 0x1.0000000000000000000000000002p-1L, -0x1p-16384L, -0x1.c000000000000000000000000002p-16383L, UNDERFLOW_EXCEPTION);
+#endif
     }
 
   fesetround (save_round_mode);
diff --git a/sysdeps/ieee754/dbl-64/s_fma.c b/sysdeps/ieee754/dbl-64/s_fma.c
index 5e21461a4b..108fd66192 100644
--- a/sysdeps/ieee754/dbl-64/s_fma.c
+++ b/sysdeps/ieee754/dbl-64/s_fma.c
@@ -210,20 +210,14 @@ __fma (double x, double y, double z)
 	{
 	  /* v.ieee.mantissa1 & 2 is LSB bit of the result before rounding,
 	     v.ieee.mantissa1 & 1 is the round bit and j is our sticky
-	     bit.  In round-to-nearest 001 rounds down like 00,
-	     011 rounds up, even though 01 rounds down (thus we need
-	     to adjust), 101 rounds down like 10 and 111 rounds up
-	     like 11.  */
-	  if ((v.ieee.mantissa1 & 3) == 1)
-	    {
-	      v.d *= 0x1p-106;
-	      if (v.ieee.negative)
-		return v.d - 0x1p-1074 /* __DBL_DENORM_MIN__ */;
-	      else
-		return v.d + 0x1p-1074 /* __DBL_DENORM_MIN__ */;
-	    }
-	  else
-	    return v.d * 0x1p-106;
+	     bit.  */
+	  w.d = 0.0;
+	  w.ieee.mantissa1 = ((v.ieee.mantissa1 & 3) << 1) | j;
+	  w.ieee.negative = v.ieee.negative;
+	  v.ieee.mantissa1 &= ~3U;
+	  v.d *= 0x1p-106;
+	  w.d *= 0x1p-2;
+	  return v.d + w.d;
 	}
       v.ieee.mantissa1 |= j;
       return v.d * 0x1p-106;
diff --git a/sysdeps/ieee754/ldbl-128/s_fmal.c b/sysdeps/ieee754/ldbl-128/s_fmal.c
index 46b3d81ce5..7fb9856ef1 100644
--- a/sysdeps/ieee754/ldbl-128/s_fmal.c
+++ b/sysdeps/ieee754/ldbl-128/s_fmal.c
@@ -211,20 +211,14 @@ __fmal (long double x, long double y, long double z)
 	{
 	  /* v.ieee.mantissa3 & 2 is LSB bit of the result before rounding,
 	     v.ieee.mantissa3 & 1 is the round bit and j is our sticky
-	     bit.  In round-to-nearest 001 rounds down like 00,
-	     011 rounds up, even though 01 rounds down (thus we need
-	     to adjust), 101 rounds down like 10 and 111 rounds up
-	     like 11.  */
-	  if ((v.ieee.mantissa3 & 3) == 1)
-	    {
-	      v.d *= 0x1p-226L;
-	      if (v.ieee.negative)
-		return v.d - 0x1p-16494L /* __LDBL_DENORM_MIN__ */;
-	      else
-		return v.d + 0x1p-16494L /* __LDBL_DENORM_MIN__ */;
-	    }
-	  else
-	    return v.d * 0x1p-226L;
+	     bit.  */
+	  w.d = 0.0L;
+	  w.ieee.mantissa3 = ((v.ieee.mantissa3 & 3) << 1) | j;
+	  w.ieee.negative = v.ieee.negative;
+	  v.ieee.mantissa3 &= ~3U;
+	  v.d *= 0x1p-226L;
+	  w.d *= 0x1p-2L;
+	  return v.d + w.d;
 	}
       v.ieee.mantissa3 |= j;
       return v.d * 0x1p-226L;
diff --git a/sysdeps/ieee754/ldbl-96/s_fmal.c b/sysdeps/ieee754/ldbl-96/s_fmal.c
index d125124286..73104914b3 100644
--- a/sysdeps/ieee754/ldbl-96/s_fmal.c
+++ b/sysdeps/ieee754/ldbl-96/s_fmal.c
@@ -211,20 +211,14 @@ __fmal (long double x, long double y, long double z)
 	{
 	  /* v.ieee.mantissa1 & 2 is LSB bit of the result before rounding,
 	     v.ieee.mantissa1 & 1 is the round bit and j is our sticky
-	     bit.  In round-to-nearest 001 rounds down like 00,
-	     011 rounds up, even though 01 rounds down (thus we need
-	     to adjust), 101 rounds down like 10 and 111 rounds up
-	     like 11.  */
-	  if ((v.ieee.mantissa1 & 3) == 1)
-	    {
-	      v.d *= 0x1p-128L;
-	      if (v.ieee.negative)
-		return v.d - 0x1p-16445L /* __LDBL_DENORM_MIN__ */;
-	      else
-		return v.d + 0x1p-16445L /* __LDBL_DENORM_MIN__ */;
-	    }
-	  else
-	    return v.d * 0x1p-128L;
+	     bit.  */
+	  w.d = 0.0L;
+	  w.ieee.mantissa1 = ((v.ieee.mantissa1 & 3) << 1) | j;
+	  w.ieee.negative = v.ieee.negative;
+	  v.ieee.mantissa1 &= ~3U;
+	  v.d *= 0x1p-128L;
+	  w.d *= 0x1p-2L;
+	  return v.d + w.d;
 	}
       v.ieee.mantissa1 |= j;
       return v.d * 0x1p-128L;