diff options
author | Joseph Myers <joseph@codesourcery.com> | 2012-11-06 14:12:54 +0000 |
---|---|---|
committer | Joseph Myers <joseph@codesourcery.com> | 2012-11-06 14:12:54 +0000 |
commit | 82477c28f46c579a149a8333c07233e9f4e43408 (patch) | |
tree | 718fb7196e880de54d4d61a79f4a8dbab9c2601e | |
parent | d7fcee3a58bd62c3b1b004f303ec345c11e44fa1 (diff) | |
download | glibc-82477c28f46c579a149a8333c07233e9f4e43408.tar.gz glibc-82477c28f46c579a149a8333c07233e9f4e43408.tar.xz glibc-82477c28f46c579a149a8333c07233e9f4e43408.zip |
Fix fma underflows with small x * y (bug 14793).
-rw-r--r-- | ChangeLog | 13 | ||||
-rw-r--r-- | NEWS | 2 | ||||
-rw-r--r-- | math/libm-test.inc | 128 | ||||
-rw-r--r-- | sysdeps/ieee754/dbl-64/s_fma.c | 45 | ||||
-rw-r--r-- | sysdeps/ieee754/ldbl-128/s_fmal.c | 45 | ||||
-rw-r--r-- | sysdeps/ieee754/ldbl-96/s_fmal.c | 45 |
6 files changed, 223 insertions, 55 deletions
diff --git a/ChangeLog b/ChangeLog index 3ff693729d..04ccde0f2f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,16 @@ +2012-11-06 Joseph Myers <joseph@codesourcery.com> + + [BZ #14793] + * sysdeps/ieee754/dbl-64/s_fma.c (__fma): In case of large z + exponent and small x and y exponents, scale x or y up. Increase + by 2 the exponent used in scaling up. + * sysdeps/ieee754/ldbl-128/s_fmal.c (__fmal): Likewise. + * sysdeps/ieee754/ldbl-96/s_fmal.c (__fmal): Likewise. + * math/libm-test.inc (fma_test): Add more tests. + (fma_test_towardzero): Likewise. + (fma_test_downward): Likewise. + (fma_test_upward): Likewise. + 2012-11-05 Joseph Myers <joseph@codesourcery.com> [BZ #14805] diff --git a/NEWS b/NEWS index fec122b636..331d21263f 100644 --- a/NEWS +++ b/NEWS @@ -18,7 +18,7 @@ Version 2.17 14518, 14519, 14530, 14532, 14538, 14543, 14544, 14545, 14557, 14562, 14568, 14576, 14579, 14583, 14587, 14595, 14602, 14610, 14621, 14638, 14645, 14648, 14652, 14660, 14661, 14669, 14683, 14694, 14716, 14743, - 14767, 14783, 14784, 14785, 14796, 14797, 14801, 14805. + 14767, 14783, 14784, 14785, 14793, 14796, 14797, 14801, 14805. * Support for STT_GNU_IFUNC symbols added for s390 and s390x. Optimized versions of memcpy, memset, and memcmp added for System z10 and diff --git a/math/libm-test.inc b/math/libm-test.inc index 55892c3459..a52ce6aa2d 100644 --- a/math/libm-test.inc +++ b/math/libm-test.inc @@ -4662,6 +4662,14 @@ fma_test (void) TEST_fff_f (fma, 0x0.fffp0, -0x0.fffp0, 0x0.ffep0, -0x1p-24); TEST_fff_f (fma, -0x0.fffp0, 0x0.fffp0, 0x0.ffep0, -0x1p-24); TEST_fff_f (fma, -0x0.fffp0, -0x0.fffp0, -0x0.ffep0, 0x1p-24); + TEST_fff_f (fma, 0x1.000002p-126, 0x1.000002p-26, 0x1p127, 0x1p127); + TEST_fff_f (fma, 0x1.000002p-126, -0x1.000002p-26, 0x1p127, 0x1p127); + TEST_fff_f (fma, 0x1.000002p-126, 0x1.000002p-26, -0x1p127, -0x1p127); + TEST_fff_f (fma, 0x1.000002p-126, -0x1.000002p-26, -0x1p127, -0x1p127); + TEST_fff_f (fma, 0x1.000002p-126, 0x1.000002p-26, 0x1p103, 0x1p103); + TEST_fff_f (fma, 0x1.000002p-126, -0x1.000002p-26, 0x1p103, 0x1p103); + TEST_fff_f (fma, 0x1.000002p-126, 0x1.000002p-26, -0x1p103, -0x1p103); + TEST_fff_f (fma, 0x1.000002p-126, -0x1.000002p-26, -0x1p103, -0x1p103); #endif #if defined (TEST_DOUBLE) && DBL_MANT_DIG == 53 TEST_fff_f (fma, 0x1.7fp+13, 0x1.0000000000001p+0, 0x1.ffep-48, 0x1.7f00000000001p+13); @@ -4712,6 +4720,14 @@ fma_test (void) TEST_fff_f (fma, 0x0.fffffffffffff8p0, -0x0.fffffffffffff8p0, 0x0.fffffffffffffp0, -0x1p-106); TEST_fff_f (fma, -0x0.fffffffffffff8p0, 0x0.fffffffffffff8p0, 0x0.fffffffffffffp0, -0x1p-106); TEST_fff_f (fma, -0x0.fffffffffffff8p0, -0x0.fffffffffffff8p0, -0x0.fffffffffffffp0, 0x1p-106); + TEST_fff_f (fma, 0x1.0000000000001p-1022, 0x1.0000000000001p-55, 0x1p1023, 0x1p1023); + TEST_fff_f (fma, 0x1.0000000000001p-1022, -0x1.0000000000001p-55, 0x1p1023, 0x1p1023); + TEST_fff_f (fma, 0x1.0000000000001p-1022, 0x1.0000000000001p-55, -0x1p1023, -0x1p1023); + TEST_fff_f (fma, 0x1.0000000000001p-1022, -0x1.0000000000001p-55, -0x1p1023, -0x1p1023); + TEST_fff_f (fma, 0x1.0000000000001p-1022, 0x1.0000000000001p-55, 0x1p970, 0x1p970); + TEST_fff_f (fma, 0x1.0000000000001p-1022, -0x1.0000000000001p-55, 0x1p970, 0x1p970); + TEST_fff_f (fma, 0x1.0000000000001p-1022, 0x1.0000000000001p-55, -0x1p970, -0x1p970); + TEST_fff_f (fma, 0x1.0000000000001p-1022, -0x1.0000000000001p-55, -0x1p970, -0x1p970); #endif #if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 64 TEST_fff_f (fma, -0x8.03fcp+3696L, 0xf.fffffffffffffffp-6140L, 0x8.3ffffffffffffffp-2450L, -0x8.01ecp-2440L); @@ -4748,6 +4764,14 @@ fma_test (void) TEST_fff_f (fma, 0x0.ffffffffffffffffp0L, -0x0.ffffffffffffffffp0L, 0x0.fffffffffffffffep0L, -0x1p-128L); TEST_fff_f (fma, -0x0.ffffffffffffffffp0L, 0x0.ffffffffffffffffp0L, 0x0.fffffffffffffffep0L, -0x1p-128L); TEST_fff_f (fma, -0x0.ffffffffffffffffp0L, -0x0.ffffffffffffffffp0L, -0x0.fffffffffffffffep0L, 0x1p-128L); + TEST_fff_f (fma, 0x1.0000000000000002p-16382L, 0x1.0000000000000002p-66L, 0x1p16383L, 0x1p16383L); + TEST_fff_f (fma, 0x1.0000000000000002p-16382L, -0x1.0000000000000002p-66L, 0x1p16383L, 0x1p16383L); + TEST_fff_f (fma, 0x1.0000000000000002p-16382L, 0x1.0000000000000002p-66L, -0x1p16383L, -0x1p16383L); + TEST_fff_f (fma, 0x1.0000000000000002p-16382L, -0x1.0000000000000002p-66L, -0x1p16383L, -0x1p16383L); + TEST_fff_f (fma, 0x1.0000000000000002p-16382L, 0x1.0000000000000002p-66L, 0x1p16319L, 0x1p16319L); + TEST_fff_f (fma, 0x1.0000000000000002p-16382L, -0x1.0000000000000002p-66L, 0x1p16319L, 0x1p16319L); + TEST_fff_f (fma, 0x1.0000000000000002p-16382L, 0x1.0000000000000002p-66L, -0x1p16319L, -0x1p16319L); + TEST_fff_f (fma, 0x1.0000000000000002p-16382L, -0x1.0000000000000002p-66L, -0x1p16319L, -0x1p16319L); #endif #if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 113 TEST_fff_f (fma, 0x1.bb2de33e02ccbbfa6e245a7c1f71p-2584L, -0x1.6b500daf0580d987f1bc0cadfcddp-13777L, 0x1.613cd91d9fed34b33820e5ab9d8dp-16378L, -0x1.3a79fb50eb9ce887cffa0f09bd9fp-16360L); @@ -4791,6 +4815,14 @@ fma_test (void) TEST_fff_f (fma, 0x0.ffffffffffffffffffffffffffff8p0L, -0x0.ffffffffffffffffffffffffffff8p0L, 0x0.ffffffffffffffffffffffffffffp0L, -0x1p-226L); TEST_fff_f (fma, -0x0.ffffffffffffffffffffffffffff8p0L, 0x0.ffffffffffffffffffffffffffff8p0L, 0x0.ffffffffffffffffffffffffffffp0L, -0x1p-226L); TEST_fff_f (fma, -0x0.ffffffffffffffffffffffffffff8p0L, -0x0.ffffffffffffffffffffffffffff8p0L, -0x0.ffffffffffffffffffffffffffffp0L, 0x1p-226L); + TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, 0x1.0000000000000000000000000001p-66L, 0x1p16383L, 0x1p16383L); + TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, -0x1.0000000000000000000000000001p-66L, 0x1p16383L, 0x1p16383L); + TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, 0x1.0000000000000000000000000001p-66L, -0x1p16383L, -0x1p16383L); + TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, -0x1.0000000000000000000000000001p-66L, -0x1p16383L, -0x1p16383L); + TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, 0x1.0000000000000000000000000001p-66L, 0x1p16319L, 0x1p16319L); + TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, -0x1.0000000000000000000000000001p-66L, 0x1p16319L, 0x1p16319L); + TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, 0x1.0000000000000000000000000001p-66L, -0x1p16319L, -0x1p16319L); + TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, -0x1.0000000000000000000000000001p-66L, -0x1p16319L, -0x1p16319L); #endif END (fma); @@ -4884,6 +4916,14 @@ fma_test_towardzero (void) TEST_fff_f (fma, 0x0.fffp0, -0x0.fffp0, 0x0.ffep0, -0x1p-24); TEST_fff_f (fma, -0x0.fffp0, 0x0.fffp0, 0x0.ffep0, -0x1p-24); TEST_fff_f (fma, -0x0.fffp0, -0x0.fffp0, -0x0.ffep0, 0x1p-24); + TEST_fff_f (fma, 0x1.000002p-126, 0x1.000002p-26, 0x1p127, 0x1p127); + TEST_fff_f (fma, 0x1.000002p-126, -0x1.000002p-26, 0x1p127, 0x0.ffffffp127); + TEST_fff_f (fma, 0x1.000002p-126, 0x1.000002p-26, -0x1p127, -0x0.ffffffp127); + TEST_fff_f (fma, 0x1.000002p-126, -0x1.000002p-26, -0x1p127, -0x1p127); + TEST_fff_f (fma, 0x1.000002p-126, 0x1.000002p-26, 0x1p103, 0x1p103); + TEST_fff_f (fma, 0x1.000002p-126, -0x1.000002p-26, 0x1p103, 0x0.ffffffp103); + TEST_fff_f (fma, 0x1.000002p-126, 0x1.000002p-26, -0x1p103, -0x0.ffffffp103); + TEST_fff_f (fma, 0x1.000002p-126, -0x1.000002p-26, -0x1p103, -0x1p103); #endif #if defined (TEST_DOUBLE) && DBL_MANT_DIG == 53 TEST_fff_f (fma, 0x1.4p-1022, 0x1.0000000000002p-1, 0x1p-1024, 0x1.c000000000002p-1023, UNDERFLOW_EXCEPTION); @@ -4914,6 +4954,14 @@ fma_test_towardzero (void) TEST_fff_f (fma, 0x0.fffffffffffff8p0, -0x0.fffffffffffff8p0, 0x0.fffffffffffffp0, -0x1p-106); TEST_fff_f (fma, -0x0.fffffffffffff8p0, 0x0.fffffffffffff8p0, 0x0.fffffffffffffp0, -0x1p-106); TEST_fff_f (fma, -0x0.fffffffffffff8p0, -0x0.fffffffffffff8p0, -0x0.fffffffffffffp0, 0x1p-106); + TEST_fff_f (fma, 0x1.0000000000001p-1022, 0x1.0000000000001p-55, 0x1p1023, 0x1p1023); + TEST_fff_f (fma, 0x1.0000000000001p-1022, -0x1.0000000000001p-55, 0x1p1023, 0x0.fffffffffffff8p1023); + TEST_fff_f (fma, 0x1.0000000000001p-1022, 0x1.0000000000001p-55, -0x1p1023, -0x0.fffffffffffff8p1023); + TEST_fff_f (fma, 0x1.0000000000001p-1022, -0x1.0000000000001p-55, -0x1p1023, -0x1p1023); + TEST_fff_f (fma, 0x1.0000000000001p-1022, 0x1.0000000000001p-55, 0x1p970, 0x1p970); + TEST_fff_f (fma, 0x1.0000000000001p-1022, -0x1.0000000000001p-55, 0x1p970, 0x0.fffffffffffff8p970); + TEST_fff_f (fma, 0x1.0000000000001p-1022, 0x1.0000000000001p-55, -0x1p970, -0x0.fffffffffffff8p970); + TEST_fff_f (fma, 0x1.0000000000001p-1022, -0x1.0000000000001p-55, -0x1p970, -0x1p970); #endif #if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 64 TEST_fff_f (fma, 0x1.4p-16382L, 0x1.0000000000000004p-1L, 0x1p-16384L, 0x1.c000000000000004p-16383L, UNDERFLOW_EXCEPTION); @@ -4944,6 +4992,14 @@ fma_test_towardzero (void) TEST_fff_f (fma, 0x0.ffffffffffffffffp0L, -0x0.ffffffffffffffffp0L, 0x0.fffffffffffffffep0L, -0x1p-128L); TEST_fff_f (fma, -0x0.ffffffffffffffffp0L, 0x0.ffffffffffffffffp0L, 0x0.fffffffffffffffep0L, -0x1p-128L); TEST_fff_f (fma, -0x0.ffffffffffffffffp0L, -0x0.ffffffffffffffffp0L, -0x0.fffffffffffffffep0L, 0x1p-128L); + TEST_fff_f (fma, 0x1.0000000000000002p-16382L, 0x1.0000000000000002p-66L, 0x1p16383L, 0x1p16383L); + TEST_fff_f (fma, 0x1.0000000000000002p-16382L, -0x1.0000000000000002p-66L, 0x1p16383L, 0x0.ffffffffffffffffp16383L); + TEST_fff_f (fma, 0x1.0000000000000002p-16382L, 0x1.0000000000000002p-66L, -0x1p16383L, -0x0.ffffffffffffffffp16383L); + TEST_fff_f (fma, 0x1.0000000000000002p-16382L, -0x1.0000000000000002p-66L, -0x1p16383L, -0x1p16383L); + TEST_fff_f (fma, 0x1.0000000000000002p-16382L, 0x1.0000000000000002p-66L, 0x1p16319L, 0x1p16319L); + TEST_fff_f (fma, 0x1.0000000000000002p-16382L, -0x1.0000000000000002p-66L, 0x1p16319L, 0x0.ffffffffffffffffp16319L); + TEST_fff_f (fma, 0x1.0000000000000002p-16382L, 0x1.0000000000000002p-66L, -0x1p16319L, -0x0.ffffffffffffffffp16319L); + TEST_fff_f (fma, 0x1.0000000000000002p-16382L, -0x1.0000000000000002p-66L, -0x1p16319L, -0x1p16319L); #endif #if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 113 TEST_fff_f (fma, 0x1.4p-16382L, 0x1.0000000000000000000000000002p-1L, 0x1p-16384L, 0x1.c000000000000000000000000002p-16383L, UNDERFLOW_EXCEPTION); @@ -4974,6 +5030,14 @@ fma_test_towardzero (void) TEST_fff_f (fma, 0x0.ffffffffffffffffffffffffffff8p0L, -0x0.ffffffffffffffffffffffffffff8p0L, 0x0.ffffffffffffffffffffffffffffp0L, -0x1p-226L); TEST_fff_f (fma, -0x0.ffffffffffffffffffffffffffff8p0L, 0x0.ffffffffffffffffffffffffffff8p0L, 0x0.ffffffffffffffffffffffffffffp0L, -0x1p-226L); TEST_fff_f (fma, -0x0.ffffffffffffffffffffffffffff8p0L, -0x0.ffffffffffffffffffffffffffff8p0L, -0x0.ffffffffffffffffffffffffffffp0L, 0x1p-226L); + TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, 0x1.0000000000000000000000000001p-66L, 0x1p16383L, 0x1p16383L); + TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, -0x1.0000000000000000000000000001p-66L, 0x1p16383L, 0x0.ffffffffffffffffffffffffffff8p16383L); + TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, 0x1.0000000000000000000000000001p-66L, -0x1p16383L, -0x0.ffffffffffffffffffffffffffff8p16383L); + TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, -0x1.0000000000000000000000000001p-66L, -0x1p16383L, -0x1p16383L); + TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, 0x1.0000000000000000000000000001p-66L, 0x1p16319L, 0x1p16319L); + TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, -0x1.0000000000000000000000000001p-66L, 0x1p16319L, 0x0.ffffffffffffffffffffffffffff8p16319L); + TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, 0x1.0000000000000000000000000001p-66L, -0x1p16319L, -0x0.ffffffffffffffffffffffffffff8p16319L); + TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, -0x1.0000000000000000000000000001p-66L, -0x1p16319L, -0x1p16319L); #endif } @@ -5070,6 +5134,14 @@ fma_test_downward (void) TEST_fff_f (fma, 0x0.fffp0, -0x0.fffp0, 0x0.ffep0, -0x1p-24); TEST_fff_f (fma, -0x0.fffp0, 0x0.fffp0, 0x0.ffep0, -0x1p-24); TEST_fff_f (fma, -0x0.fffp0, -0x0.fffp0, -0x0.ffep0, 0x1p-24); + TEST_fff_f (fma, 0x1.000002p-126, 0x1.000002p-26, 0x1p127, 0x1p127); + TEST_fff_f (fma, 0x1.000002p-126, -0x1.000002p-26, 0x1p127, 0x0.ffffffp127); + TEST_fff_f (fma, 0x1.000002p-126, 0x1.000002p-26, -0x1p127, -0x1p127); + TEST_fff_f (fma, 0x1.000002p-126, -0x1.000002p-26, -0x1p127, -0x1.000002p127); + TEST_fff_f (fma, 0x1.000002p-126, 0x1.000002p-26, 0x1p103, 0x1p103); + TEST_fff_f (fma, 0x1.000002p-126, -0x1.000002p-26, 0x1p103, 0x0.ffffffp103); + TEST_fff_f (fma, 0x1.000002p-126, 0x1.000002p-26, -0x1p103, -0x1p103); + TEST_fff_f (fma, 0x1.000002p-126, -0x1.000002p-26, -0x1p103, -0x1.000002p103); #endif #if defined (TEST_DOUBLE) && DBL_MANT_DIG == 53 TEST_fff_f (fma, 0x1.4p-1022, 0x1.0000000000002p-1, 0x1p-1024, 0x1.c000000000002p-1023, UNDERFLOW_EXCEPTION); @@ -5100,6 +5172,14 @@ fma_test_downward (void) TEST_fff_f (fma, 0x0.fffffffffffff8p0, -0x0.fffffffffffff8p0, 0x0.fffffffffffffp0, -0x1p-106); TEST_fff_f (fma, -0x0.fffffffffffff8p0, 0x0.fffffffffffff8p0, 0x0.fffffffffffffp0, -0x1p-106); TEST_fff_f (fma, -0x0.fffffffffffff8p0, -0x0.fffffffffffff8p0, -0x0.fffffffffffffp0, 0x1p-106); + TEST_fff_f (fma, 0x1.0000000000001p-1022, 0x1.0000000000001p-55, 0x1p1023, 0x1p1023); + TEST_fff_f (fma, 0x1.0000000000001p-1022, -0x1.0000000000001p-55, 0x1p1023, 0x0.fffffffffffff8p1023); + TEST_fff_f (fma, 0x1.0000000000001p-1022, 0x1.0000000000001p-55, -0x1p1023, -0x1p1023); + TEST_fff_f (fma, 0x1.0000000000001p-1022, -0x1.0000000000001p-55, -0x1p1023, -0x1.0000000000001p1023); + TEST_fff_f (fma, 0x1.0000000000001p-1022, 0x1.0000000000001p-55, 0x1p970, 0x1p970); + TEST_fff_f (fma, 0x1.0000000000001p-1022, -0x1.0000000000001p-55, 0x1p970, 0x0.fffffffffffff8p970); + TEST_fff_f (fma, 0x1.0000000000001p-1022, 0x1.0000000000001p-55, -0x1p970, -0x1p970); + TEST_fff_f (fma, 0x1.0000000000001p-1022, -0x1.0000000000001p-55, -0x1p970, -0x1.0000000000001p970); #endif #if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 64 TEST_fff_f (fma, 0x1.4p-16382L, 0x1.0000000000000004p-1L, 0x1p-16384L, 0x1.c000000000000004p-16383L, UNDERFLOW_EXCEPTION); @@ -5130,6 +5210,14 @@ fma_test_downward (void) TEST_fff_f (fma, 0x0.ffffffffffffffffp0L, -0x0.ffffffffffffffffp0L, 0x0.fffffffffffffffep0L, -0x1p-128L); TEST_fff_f (fma, -0x0.ffffffffffffffffp0L, 0x0.ffffffffffffffffp0L, 0x0.fffffffffffffffep0L, -0x1p-128L); TEST_fff_f (fma, -0x0.ffffffffffffffffp0L, -0x0.ffffffffffffffffp0L, -0x0.fffffffffffffffep0L, 0x1p-128L); + TEST_fff_f (fma, 0x1.0000000000000002p-16382L, 0x1.0000000000000002p-66L, 0x1p16383L, 0x1p16383L); + TEST_fff_f (fma, 0x1.0000000000000002p-16382L, -0x1.0000000000000002p-66L, 0x1p16383L, 0x0.ffffffffffffffffp16383L); + TEST_fff_f (fma, 0x1.0000000000000002p-16382L, 0x1.0000000000000002p-66L, -0x1p16383L, -0x1p16383L); + TEST_fff_f (fma, 0x1.0000000000000002p-16382L, -0x1.0000000000000002p-66L, -0x1p16383L, -0x1.0000000000000002p16383L); + TEST_fff_f (fma, 0x1.0000000000000002p-16382L, 0x1.0000000000000002p-66L, 0x1p16319L, 0x1p16319L); + TEST_fff_f (fma, 0x1.0000000000000002p-16382L, -0x1.0000000000000002p-66L, 0x1p16319L, 0x0.ffffffffffffffffp16319L); + TEST_fff_f (fma, 0x1.0000000000000002p-16382L, 0x1.0000000000000002p-66L, -0x1p16319L, -0x1p16319L); + TEST_fff_f (fma, 0x1.0000000000000002p-16382L, -0x1.0000000000000002p-66L, -0x1p16319L, -0x1.0000000000000002p16319L); #endif #if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 113 TEST_fff_f (fma, 0x1.4p-16382L, 0x1.0000000000000000000000000002p-1L, 0x1p-16384L, 0x1.c000000000000000000000000002p-16383L, UNDERFLOW_EXCEPTION); @@ -5160,6 +5248,14 @@ fma_test_downward (void) TEST_fff_f (fma, 0x0.ffffffffffffffffffffffffffff8p0L, -0x0.ffffffffffffffffffffffffffff8p0L, 0x0.ffffffffffffffffffffffffffffp0L, -0x1p-226L); TEST_fff_f (fma, -0x0.ffffffffffffffffffffffffffff8p0L, 0x0.ffffffffffffffffffffffffffff8p0L, 0x0.ffffffffffffffffffffffffffffp0L, -0x1p-226L); TEST_fff_f (fma, -0x0.ffffffffffffffffffffffffffff8p0L, -0x0.ffffffffffffffffffffffffffff8p0L, -0x0.ffffffffffffffffffffffffffffp0L, 0x1p-226L); + TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, 0x1.0000000000000000000000000001p-66L, 0x1p16383L, 0x1p16383L); + TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, -0x1.0000000000000000000000000001p-66L, 0x1p16383L, 0x0.ffffffffffffffffffffffffffff8p16383L); + TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, 0x1.0000000000000000000000000001p-66L, -0x1p16383L, -0x1p16383L); + TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, -0x1.0000000000000000000000000001p-66L, -0x1p16383L, -0x1.0000000000000000000000000001p16383L); + TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, 0x1.0000000000000000000000000001p-66L, 0x1p16319L, 0x1p16319L); + TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, -0x1.0000000000000000000000000001p-66L, 0x1p16319L, 0x0.ffffffffffffffffffffffffffff8p16319L); + TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, 0x1.0000000000000000000000000001p-66L, -0x1p16319L, -0x1p16319L); + TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, -0x1.0000000000000000000000000001p-66L, -0x1p16319L, -0x1.0000000000000000000000000001p16319L); #endif } @@ -5256,6 +5352,14 @@ fma_test_upward (void) TEST_fff_f (fma, 0x0.fffp0, -0x0.fffp0, 0x0.ffep0, -0x1p-24); TEST_fff_f (fma, -0x0.fffp0, 0x0.fffp0, 0x0.ffep0, -0x1p-24); TEST_fff_f (fma, -0x0.fffp0, -0x0.fffp0, -0x0.ffep0, 0x1p-24); + TEST_fff_f (fma, 0x1.000002p-126, 0x1.000002p-26, 0x1p127, 0x1.000002p127); + TEST_fff_f (fma, 0x1.000002p-126, -0x1.000002p-26, 0x1p127, 0x1p127); + TEST_fff_f (fma, 0x1.000002p-126, 0x1.000002p-26, -0x1p127, -0x0.ffffffp127); + TEST_fff_f (fma, 0x1.000002p-126, -0x1.000002p-26, -0x1p127, -0x1p127); + TEST_fff_f (fma, 0x1.000002p-126, 0x1.000002p-26, 0x1p103, 0x1.000002p103); + TEST_fff_f (fma, 0x1.000002p-126, -0x1.000002p-26, 0x1p103, 0x1p103); + TEST_fff_f (fma, 0x1.000002p-126, 0x1.000002p-26, -0x1p103, -0x0.ffffffp103); + TEST_fff_f (fma, 0x1.000002p-126, -0x1.000002p-26, -0x1p103, -0x1p103); #endif #if defined (TEST_DOUBLE) && DBL_MANT_DIG == 53 TEST_fff_f (fma, 0x1.4p-1022, 0x1.0000000000002p-1, 0x1p-1024, 0x1.c000000000004p-1023, UNDERFLOW_EXCEPTION); @@ -5286,6 +5390,14 @@ fma_test_upward (void) TEST_fff_f (fma, 0x0.fffffffffffff8p0, -0x0.fffffffffffff8p0, 0x0.fffffffffffffp0, -0x1p-106); TEST_fff_f (fma, -0x0.fffffffffffff8p0, 0x0.fffffffffffff8p0, 0x0.fffffffffffffp0, -0x1p-106); TEST_fff_f (fma, -0x0.fffffffffffff8p0, -0x0.fffffffffffff8p0, -0x0.fffffffffffffp0, 0x1p-106); + TEST_fff_f (fma, 0x1.0000000000001p-1022, 0x1.0000000000001p-55, 0x1p1023, 0x1.0000000000001p1023); + TEST_fff_f (fma, 0x1.0000000000001p-1022, -0x1.0000000000001p-55, 0x1p1023, 0x1p1023); + TEST_fff_f (fma, 0x1.0000000000001p-1022, 0x1.0000000000001p-55, -0x1p1023, -0x0.fffffffffffff8p1023); + TEST_fff_f (fma, 0x1.0000000000001p-1022, -0x1.0000000000001p-55, -0x1p1023, -0x1p1023); + TEST_fff_f (fma, 0x1.0000000000001p-1022, 0x1.0000000000001p-55, 0x1p970, 0x1.0000000000001p970); + TEST_fff_f (fma, 0x1.0000000000001p-1022, -0x1.0000000000001p-55, 0x1p970, 0x1p970); + TEST_fff_f (fma, 0x1.0000000000001p-1022, 0x1.0000000000001p-55, -0x1p970, -0x0.fffffffffffff8p970); + TEST_fff_f (fma, 0x1.0000000000001p-1022, -0x1.0000000000001p-55, -0x1p970, -0x1p970); #endif #if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 64 TEST_fff_f (fma, 0x1.4p-16382L, 0x1.0000000000000004p-1L, 0x1p-16384L, 0x1.c000000000000008p-16383L, UNDERFLOW_EXCEPTION); @@ -5316,6 +5428,14 @@ fma_test_upward (void) TEST_fff_f (fma, 0x0.ffffffffffffffffp0L, -0x0.ffffffffffffffffp0L, 0x0.fffffffffffffffep0L, -0x1p-128L); TEST_fff_f (fma, -0x0.ffffffffffffffffp0L, 0x0.ffffffffffffffffp0L, 0x0.fffffffffffffffep0L, -0x1p-128L); TEST_fff_f (fma, -0x0.ffffffffffffffffp0L, -0x0.ffffffffffffffffp0L, -0x0.fffffffffffffffep0L, 0x1p-128L); + TEST_fff_f (fma, 0x1.0000000000000002p-16382L, 0x1.0000000000000002p-66L, 0x1p16383L, 0x1.0000000000000002p16383L); + TEST_fff_f (fma, 0x1.0000000000000002p-16382L, -0x1.0000000000000002p-66L, 0x1p16383L, 0x1p16383L); + TEST_fff_f (fma, 0x1.0000000000000002p-16382L, 0x1.0000000000000002p-66L, -0x1p16383L, -0x0.ffffffffffffffffp16383L); + TEST_fff_f (fma, 0x1.0000000000000002p-16382L, -0x1.0000000000000002p-66L, -0x1p16383L, -0x1p16383L); + TEST_fff_f (fma, 0x1.0000000000000002p-16382L, 0x1.0000000000000002p-66L, 0x1p16319L, 0x1.0000000000000002p16319L); + TEST_fff_f (fma, 0x1.0000000000000002p-16382L, -0x1.0000000000000002p-66L, 0x1p16319L, 0x1p16319L); + TEST_fff_f (fma, 0x1.0000000000000002p-16382L, 0x1.0000000000000002p-66L, -0x1p16319L, -0x0.ffffffffffffffffp16319L); + TEST_fff_f (fma, 0x1.0000000000000002p-16382L, -0x1.0000000000000002p-66L, -0x1p16319L, -0x1p16319L); #endif #if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 113 TEST_fff_f (fma, 0x1.4p-16382L, 0x1.0000000000000000000000000002p-1L, 0x1p-16384L, 0x1.c000000000000000000000000004p-16383L, UNDERFLOW_EXCEPTION); @@ -5346,6 +5466,14 @@ fma_test_upward (void) TEST_fff_f (fma, 0x0.ffffffffffffffffffffffffffff8p0L, -0x0.ffffffffffffffffffffffffffff8p0L, 0x0.ffffffffffffffffffffffffffffp0L, -0x1p-226L); TEST_fff_f (fma, -0x0.ffffffffffffffffffffffffffff8p0L, 0x0.ffffffffffffffffffffffffffff8p0L, 0x0.ffffffffffffffffffffffffffffp0L, -0x1p-226L); TEST_fff_f (fma, -0x0.ffffffffffffffffffffffffffff8p0L, -0x0.ffffffffffffffffffffffffffff8p0L, -0x0.ffffffffffffffffffffffffffffp0L, 0x1p-226L); + TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, 0x1.0000000000000000000000000001p-66L, 0x1p16383L, 0x1.0000000000000000000000000001p16383L); + TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, -0x1.0000000000000000000000000001p-66L, 0x1p16383L, 0x1p16383L); + TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, 0x1.0000000000000000000000000001p-66L, -0x1p16383L, -0x0.ffffffffffffffffffffffffffff8p16383L); + TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, -0x1.0000000000000000000000000001p-66L, -0x1p16383L, -0x1p16383L); + TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, 0x1.0000000000000000000000000001p-66L, 0x1p16319L, 0x1.0000000000000000000000000001p16319L); + TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, -0x1.0000000000000000000000000001p-66L, 0x1p16319L, 0x1p16319L); + TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, 0x1.0000000000000000000000000001p-66L, -0x1p16319L, -0x0.ffffffffffffffffffffffffffff8p16319L); + TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, -0x1.0000000000000000000000000001p-66L, -0x1p16319L, -0x1p16319L); #endif } diff --git a/sysdeps/ieee754/dbl-64/s_fma.c b/sysdeps/ieee754/dbl-64/s_fma.c index cd28830709..8c69b987e2 100644 --- a/sysdeps/ieee754/dbl-64/s_fma.c +++ b/sysdeps/ieee754/dbl-64/s_fma.c @@ -114,8 +114,17 @@ __fma (double x, double y, double z) { /* Similarly. If z exponent is very large and x and y exponents are - very small, it doesn't matter if we don't adjust it. */ - if (u.ieee.exponent > v.ieee.exponent) + very small, adjust them up to avoid spurious underflows, + rather than down. */ + if (u.ieee.exponent + v.ieee.exponent + <= IEEE754_DOUBLE_BIAS + DBL_MANT_DIG) + { + if (u.ieee.exponent > v.ieee.exponent) + u.ieee.exponent += 2 * DBL_MANT_DIG + 2; + else + v.ieee.exponent += 2 * DBL_MANT_DIG + 2; + } + else if (u.ieee.exponent > v.ieee.exponent) { if (u.ieee.exponent > DBL_MANT_DIG) u.ieee.exponent -= DBL_MANT_DIG; @@ -145,15 +154,15 @@ __fma (double x, double y, double z) <= IEEE754_DOUBLE_BIAS + DBL_MANT_DIG) */ { if (u.ieee.exponent > v.ieee.exponent) - u.ieee.exponent += 2 * DBL_MANT_DIG; + u.ieee.exponent += 2 * DBL_MANT_DIG + 2; else - v.ieee.exponent += 2 * DBL_MANT_DIG; - if (w.ieee.exponent <= 4 * DBL_MANT_DIG + 4) + v.ieee.exponent += 2 * DBL_MANT_DIG + 2; + if (w.ieee.exponent <= 4 * DBL_MANT_DIG + 6) { if (w.ieee.exponent) - w.ieee.exponent += 2 * DBL_MANT_DIG; + w.ieee.exponent += 2 * DBL_MANT_DIG + 2; else - w.d *= 0x1p106; + w.d *= 0x1p108; adjust = -1; } /* Otherwise x * y should just affect inexact @@ -238,19 +247,19 @@ __fma (double x, double y, double z) /* If a1 + u.d is exact, the only rounding happens during scaling down. */ if (j == 0) - return v.d * 0x1p-106; + return v.d * 0x1p-108; /* If result rounded to zero is not subnormal, no double rounding will occur. */ - if (v.ieee.exponent > 106) - return (a1 + u.d) * 0x1p-106; - /* If v.d * 0x1p-106 with round to zero is a subnormal above - or equal to DBL_MIN / 2, then v.d * 0x1p-106 shifts mantissa + if (v.ieee.exponent > 108) + return (a1 + u.d) * 0x1p-108; + /* If v.d * 0x1p-108 with round to zero is a subnormal above + or equal to DBL_MIN / 2, then v.d * 0x1p-108 shifts mantissa down just by 1 bit, which means v.ieee.mantissa1 |= j would change the round bit, not sticky or guard bit. - v.d * 0x1p-106 never normalizes by shifting up, + v.d * 0x1p-108 never normalizes by shifting up, so round bit plus sticky bit should be already enough for proper rounding. */ - if (v.ieee.exponent == 106) + if (v.ieee.exponent == 108) { /* If the exponent would be in the normal range when rounding to normal precision with unbounded exponent @@ -260,8 +269,8 @@ __fma (double x, double y, double z) if (TININESS_AFTER_ROUNDING) { w.d = a1 + u.d; - if (w.ieee.exponent == 107) - return w.d * 0x1p-106; + if (w.ieee.exponent == 109) + return w.d * 0x1p-108; } /* v.ieee.mantissa1 & 2 is LSB bit of the result before rounding, v.ieee.mantissa1 & 1 is the round bit and j is our sticky @@ -270,12 +279,12 @@ __fma (double x, double y, double z) w.ieee.mantissa1 = ((v.ieee.mantissa1 & 3) << 1) | j; w.ieee.negative = v.ieee.negative; v.ieee.mantissa1 &= ~3U; - v.d *= 0x1p-106; + v.d *= 0x1p-108; w.d *= 0x1p-2; return v.d + w.d; } v.ieee.mantissa1 |= j; - return v.d * 0x1p-106; + return v.d * 0x1p-108; } } #ifndef __fma diff --git a/sysdeps/ieee754/ldbl-128/s_fmal.c b/sysdeps/ieee754/ldbl-128/s_fmal.c index 6fa663a6c0..c9accad8a3 100644 --- a/sysdeps/ieee754/ldbl-128/s_fmal.c +++ b/sysdeps/ieee754/ldbl-128/s_fmal.c @@ -118,8 +118,17 @@ __fmal (long double x, long double y, long double z) { /* Similarly. If z exponent is very large and x and y exponents are - very small, it doesn't matter if we don't adjust it. */ - if (u.ieee.exponent > v.ieee.exponent) + very small, adjust them up to avoid spurious underflows, + rather than down. */ + if (u.ieee.exponent + v.ieee.exponent + <= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG) + { + if (u.ieee.exponent > v.ieee.exponent) + u.ieee.exponent += 2 * LDBL_MANT_DIG + 2; + else + v.ieee.exponent += 2 * LDBL_MANT_DIG + 2; + } + else if (u.ieee.exponent > v.ieee.exponent) { if (u.ieee.exponent > LDBL_MANT_DIG) u.ieee.exponent -= LDBL_MANT_DIG; @@ -149,15 +158,15 @@ __fmal (long double x, long double y, long double z) <= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG) */ { if (u.ieee.exponent > v.ieee.exponent) - u.ieee.exponent += 2 * LDBL_MANT_DIG; + u.ieee.exponent += 2 * LDBL_MANT_DIG + 2; else - v.ieee.exponent += 2 * LDBL_MANT_DIG; - if (w.ieee.exponent <= 4 * LDBL_MANT_DIG + 4) + v.ieee.exponent += 2 * LDBL_MANT_DIG + 2; + if (w.ieee.exponent <= 4 * LDBL_MANT_DIG + 6) { if (w.ieee.exponent) - w.ieee.exponent += 2 * LDBL_MANT_DIG; + w.ieee.exponent += 2 * LDBL_MANT_DIG + 2; else - w.d *= 0x1p226L; + w.d *= 0x1p228L; adjust = -1; } /* Otherwise x * y should just affect inexact @@ -242,19 +251,19 @@ __fmal (long double x, long double y, long double z) /* If a1 + u.d is exact, the only rounding happens during scaling down. */ if (j == 0) - return v.d * 0x1p-226L; + return v.d * 0x1p-228L; /* If result rounded to zero is not subnormal, no double rounding will occur. */ - if (v.ieee.exponent > 226) - return (a1 + u.d) * 0x1p-226L; - /* If v.d * 0x1p-226L with round to zero is a subnormal above - or equal to LDBL_MIN / 2, then v.d * 0x1p-226L shifts mantissa + if (v.ieee.exponent > 228) + return (a1 + u.d) * 0x1p-228L; + /* If v.d * 0x1p-228L with round to zero is a subnormal above + or equal to LDBL_MIN / 2, then v.d * 0x1p-228L shifts mantissa down just by 1 bit, which means v.ieee.mantissa3 |= j would change the round bit, not sticky or guard bit. - v.d * 0x1p-226L never normalizes by shifting up, + v.d * 0x1p-228L never normalizes by shifting up, so round bit plus sticky bit should be already enough for proper rounding. */ - if (v.ieee.exponent == 226) + if (v.ieee.exponent == 228) { /* If the exponent would be in the normal range when rounding to normal precision with unbounded exponent @@ -264,8 +273,8 @@ __fmal (long double x, long double y, long double z) if (TININESS_AFTER_ROUNDING) { w.d = a1 + u.d; - if (w.ieee.exponent == 227) - return w.d * 0x1p-226L; + if (w.ieee.exponent == 229) + return w.d * 0x1p-228L; } /* v.ieee.mantissa3 & 2 is LSB bit of the result before rounding, v.ieee.mantissa3 & 1 is the round bit and j is our sticky @@ -274,12 +283,12 @@ __fmal (long double x, long double y, long double z) w.ieee.mantissa3 = ((v.ieee.mantissa3 & 3) << 1) | j; w.ieee.negative = v.ieee.negative; v.ieee.mantissa3 &= ~3U; - v.d *= 0x1p-226L; + v.d *= 0x1p-228L; w.d *= 0x1p-2L; return v.d + w.d; } v.ieee.mantissa3 |= j; - return v.d * 0x1p-226L; + return v.d * 0x1p-228L; } } weak_alias (__fmal, fmal) diff --git a/sysdeps/ieee754/ldbl-96/s_fmal.c b/sysdeps/ieee754/ldbl-96/s_fmal.c index 53098b6d4e..c86dff6f8c 100644 --- a/sysdeps/ieee754/ldbl-96/s_fmal.c +++ b/sysdeps/ieee754/ldbl-96/s_fmal.c @@ -116,8 +116,17 @@ __fmal (long double x, long double y, long double z) { /* Similarly. If z exponent is very large and x and y exponents are - very small, it doesn't matter if we don't adjust it. */ - if (u.ieee.exponent > v.ieee.exponent) + very small, adjust them up to avoid spurious underflows, + rather than down. */ + if (u.ieee.exponent + v.ieee.exponent + <= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG) + { + if (u.ieee.exponent > v.ieee.exponent) + u.ieee.exponent += 2 * LDBL_MANT_DIG + 2; + else + v.ieee.exponent += 2 * LDBL_MANT_DIG + 2; + } + else if (u.ieee.exponent > v.ieee.exponent) { if (u.ieee.exponent > LDBL_MANT_DIG) u.ieee.exponent -= LDBL_MANT_DIG; @@ -147,15 +156,15 @@ __fmal (long double x, long double y, long double z) <= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG) */ { if (u.ieee.exponent > v.ieee.exponent) - u.ieee.exponent += 2 * LDBL_MANT_DIG; + u.ieee.exponent += 2 * LDBL_MANT_DIG + 2; else - v.ieee.exponent += 2 * LDBL_MANT_DIG; - if (w.ieee.exponent <= 4 * LDBL_MANT_DIG + 4) + v.ieee.exponent += 2 * LDBL_MANT_DIG + 2; + if (w.ieee.exponent <= 4 * LDBL_MANT_DIG + 6) { if (w.ieee.exponent) - w.ieee.exponent += 2 * LDBL_MANT_DIG; + w.ieee.exponent += 2 * LDBL_MANT_DIG + 2; else - w.d *= 0x1p128L; + w.d *= 0x1p130L; adjust = -1; } /* Otherwise x * y should just affect inexact @@ -240,19 +249,19 @@ __fmal (long double x, long double y, long double z) /* If a1 + u.d is exact, the only rounding happens during scaling down. */ if (j == 0) - return v.d * 0x1p-128L; + return v.d * 0x1p-130L; /* If result rounded to zero is not subnormal, no double rounding will occur. */ - if (v.ieee.exponent > 128) - return (a1 + u.d) * 0x1p-128L; - /* If v.d * 0x1p-128L with round to zero is a subnormal above - or equal to LDBL_MIN / 2, then v.d * 0x1p-128L shifts mantissa + if (v.ieee.exponent > 130) + return (a1 + u.d) * 0x1p-130L; + /* If v.d * 0x1p-130L with round to zero is a subnormal above + or equal to LDBL_MIN / 2, then v.d * 0x1p-130L shifts mantissa down just by 1 bit, which means v.ieee.mantissa1 |= j would change the round bit, not sticky or guard bit. - v.d * 0x1p-128L never normalizes by shifting up, + v.d * 0x1p-130L never normalizes by shifting up, so round bit plus sticky bit should be already enough for proper rounding. */ - if (v.ieee.exponent == 128) + if (v.ieee.exponent == 130) { /* If the exponent would be in the normal range when rounding to normal precision with unbounded exponent @@ -262,8 +271,8 @@ __fmal (long double x, long double y, long double z) if (TININESS_AFTER_ROUNDING) { w.d = a1 + u.d; - if (w.ieee.exponent == 129) - return w.d * 0x1p-128L; + if (w.ieee.exponent == 131) + return w.d * 0x1p-130L; } /* v.ieee.mantissa1 & 2 is LSB bit of the result before rounding, v.ieee.mantissa1 & 1 is the round bit and j is our sticky @@ -272,12 +281,12 @@ __fmal (long double x, long double y, long double z) w.ieee.mantissa1 = ((v.ieee.mantissa1 & 3) << 1) | j; w.ieee.negative = v.ieee.negative; v.ieee.mantissa1 &= ~3U; - v.d *= 0x1p-128L; + v.d *= 0x1p-130L; w.d *= 0x1p-2L; return v.d + w.d; } v.ieee.mantissa1 |= j; - return v.d * 0x1p-128L; + return v.d * 0x1p-130L; } } weak_alias (__fmal, fmal) |