diff options
author | Joseph Myers <joseph@codesourcery.com> | 2012-11-03 19:48:53 +0000 |
---|---|---|
committer | Joseph Myers <joseph@codesourcery.com> | 2012-11-03 19:48:53 +0000 |
commit | 5b5b04d6282df0364424c6f2c0462e5c1a4394b0 (patch) | |
tree | bef8cb91fffbf78e56f18479234abb47ce3054b2 /sysdeps/ieee754 | |
parent | fbeafedeea37e0af1984a6511018d159f5ceed6a (diff) | |
download | glibc-5b5b04d6282df0364424c6f2c0462e5c1a4394b0.tar.gz glibc-5b5b04d6282df0364424c6f2c0462e5c1a4394b0.tar.xz glibc-5b5b04d6282df0364424c6f2c0462e5c1a4394b0.zip |
Make fma use of Dekker and Knuth algorithms use round-to-nearest (bug 14796).
Diffstat (limited to 'sysdeps/ieee754')
-rw-r--r-- | sysdeps/ieee754/dbl-64/s_fma.c | 18 | ||||
-rw-r--r-- | sysdeps/ieee754/ldbl-128/s_fmal.c | 18 | ||||
-rw-r--r-- | sysdeps/ieee754/ldbl-96/s_fma.c | 18 | ||||
-rw-r--r-- | sysdeps/ieee754/ldbl-96/s_fmal.c | 18 |
4 files changed, 64 insertions, 8 deletions
diff --git a/sysdeps/ieee754/dbl-64/s_fma.c b/sysdeps/ieee754/dbl-64/s_fma.c index 68c63a1987..07fe715617 100644 --- a/sysdeps/ieee754/dbl-64/s_fma.c +++ b/sysdeps/ieee754/dbl-64/s_fma.c @@ -167,6 +167,9 @@ __fma (double x, double y, double z) if (__builtin_expect ((x == 0 || y == 0) && z == 0, 0)) return x * y + z; + fenv_t env; + libc_feholdexcept_setround (&env, FE_TONEAREST); + /* Multiplication m1 + m2 = x * y using Dekker's algorithm. */ #define C ((1 << (DBL_MANT_DIG + 1) / 2) + 1) double x1 = x * C; @@ -185,9 +188,20 @@ __fma (double x, double y, double z) t1 = m1 - t1; t2 = z - t2; double a2 = t1 + t2; + feclearexcept (FE_INEXACT); - fenv_t env; - libc_feholdexcept_setround (&env, FE_TOWARDZERO); + /* If the result is an exact zero, ensure it has the correct + sign. */ + if (a1 == 0 && m2 == 0) + { + libc_feupdateenv (&env); + /* Ensure that round-to-nearest value of z + m1 is not + reused. */ + asm volatile ("" : "=m" (z) : "m" (z)); + return z + m1; + } + + libc_fesetround (FE_TOWARDZERO); /* Perform m2 + a2 addition with round to odd. */ u.d = a2 + m2; diff --git a/sysdeps/ieee754/ldbl-128/s_fmal.c b/sysdeps/ieee754/ldbl-128/s_fmal.c index c6a3d71f1c..d576403b25 100644 --- a/sysdeps/ieee754/ldbl-128/s_fmal.c +++ b/sysdeps/ieee754/ldbl-128/s_fmal.c @@ -170,6 +170,10 @@ __fmal (long double x, long double y, long double z) if (__builtin_expect ((x == 0 || y == 0) && z == 0, 0)) return x * y + z; + fenv_t env; + feholdexcept (&env); + fesetround (FE_TONEAREST); + /* Multiplication m1 + m2 = x * y using Dekker's algorithm. */ #define C ((1LL << (LDBL_MANT_DIG + 1) / 2) + 1) long double x1 = x * C; @@ -188,9 +192,19 @@ __fmal (long double x, long double y, long double z) t1 = m1 - t1; t2 = z - t2; long double a2 = t1 + t2; + feclearexcept (FE_INEXACT); + + /* If the result is an exact zero, ensure it has the correct + sign. */ + if (a1 == 0 && m2 == 0) + { + feupdateenv (&env); + /* Ensure that round-to-nearest value of z + m1 is not + reused. */ + asm volatile ("" : "=m" (z) : "m" (z)); + return z + m1; + } - fenv_t env; - feholdexcept (&env); fesetround (FE_TOWARDZERO); /* Perform m2 + a2 addition with round to odd. */ u.d = a2 + m2; diff --git a/sysdeps/ieee754/ldbl-96/s_fma.c b/sysdeps/ieee754/ldbl-96/s_fma.c index 001d8063d4..bf2d794c9b 100644 --- a/sysdeps/ieee754/ldbl-96/s_fma.c +++ b/sysdeps/ieee754/ldbl-96/s_fma.c @@ -42,6 +42,10 @@ __fma (double x, double y, double z) if (__builtin_expect ((x == 0 || y == 0) && z == 0, 0)) return x * y + z; + fenv_t env; + feholdexcept (&env); + fesetround (FE_TONEAREST); + /* Multiplication m1 + m2 = x * y using Dekker's algorithm. */ #define C ((1ULL << (LDBL_MANT_DIG + 1) / 2) + 1) long double x1 = (long double) x * C; @@ -60,9 +64,19 @@ __fma (double x, double y, double z) t1 = m1 - t1; t2 = z - t2; long double a2 = t1 + t2; + feclearexcept (FE_INEXACT); + + /* If the result is an exact zero, ensure it has the correct + sign. */ + if (a1 == 0 && m2 == 0) + { + feupdateenv (&env); + /* Ensure that round-to-nearest value of z + m1 is not + reused. */ + asm volatile ("" : "=m" (z) : "m" (z)); + return z + m1; + } - fenv_t env; - feholdexcept (&env); fesetround (FE_TOWARDZERO); /* Perform m2 + a2 addition with round to odd. */ a2 = a2 + m2; diff --git a/sysdeps/ieee754/ldbl-96/s_fmal.c b/sysdeps/ieee754/ldbl-96/s_fmal.c index b5fdcba6bc..32e71a18ba 100644 --- a/sysdeps/ieee754/ldbl-96/s_fmal.c +++ b/sysdeps/ieee754/ldbl-96/s_fmal.c @@ -168,6 +168,10 @@ __fmal (long double x, long double y, long double z) if (__builtin_expect ((x == 0 || y == 0) && z == 0, 0)) return x * y + z; + fenv_t env; + feholdexcept (&env); + fesetround (FE_TONEAREST); + /* Multiplication m1 + m2 = x * y using Dekker's algorithm. */ #define C ((1LL << (LDBL_MANT_DIG + 1) / 2) + 1) long double x1 = x * C; @@ -186,9 +190,19 @@ __fmal (long double x, long double y, long double z) t1 = m1 - t1; t2 = z - t2; long double a2 = t1 + t2; + feclearexcept (FE_INEXACT); + + /* If the result is an exact zero, ensure it has the correct + sign. */ + if (a1 == 0 && m2 == 0) + { + feupdateenv (&env); + /* Ensure that round-to-nearest value of z + m1 is not + reused. */ + asm volatile ("" : "=m" (z) : "m" (z)); + return z + m1; + } - fenv_t env; - feholdexcept (&env); fesetround (FE_TOWARDZERO); /* Perform m2 + a2 addition with round to odd. */ u.d = a2 + m2; |