diff options
-rw-r--r-- | ChangeLog | 3 | ||||
-rw-r--r-- | sysdeps/ieee754/dbl-64/mpa.c | 56 |
2 files changed, 53 insertions, 6 deletions
diff --git a/ChangeLog b/ChangeLog index c633a60c0b..99afa0963c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,8 @@ 2013-02-26 Siddhesh Poyarekar <siddhesh@redhat.com> + * sysdeps/ieee754/dbl-64/mpa.c: Include alloca.h. + (__mul): Reduce iterations for calculating mantissa. + * sysdeps/ieee754/dbl-64/sincos32.c (__c32): Use MPONE and MPTWO. (__mpranred): Likewise. diff --git a/sysdeps/ieee754/dbl-64/mpa.c b/sysdeps/ieee754/dbl-64/mpa.c index 7a6f01854b..8fc2626f76 100644 --- a/sysdeps/ieee754/dbl-64/mpa.c +++ b/sysdeps/ieee754/dbl-64/mpa.c @@ -43,6 +43,7 @@ #include "endian.h" #include "mpa.h" #include <sys/param.h> +#include <alloca.h> #ifndef SECTION # define SECTION @@ -621,6 +622,7 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p) long p2 = p; double u, zk; const mp_no *a; + double *diag; /* Is z=0? */ if (__glibc_unlikely (X[0] * Y[0] == ZERO)) @@ -673,12 +675,33 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p) while (k > ip + ip2 + 1) Z[k--] = ZERO; - zk = Z[k] = ZERO; + zk = ZERO; + + /* Precompute sums of diagonal elements so that we can directly use them + later. See the next comment to know we why need them. */ + diag = alloca (k * sizeof (double)); + double d = ZERO; + for (i = 1; i <= ip; i++) + { + d += X[i] * Y[i]; + diag[i] = d; + } + while (i < k) + diag[i++] = d; while (k > p2) { - for (i = k - p2, j = p2; i < p2 + 1; i++, j--) - zk += X[i] * Y[j]; + long lim = k / 2; + + if (k % 2 == 0) + /* We want to add this only once, but since we subtract it in the sum + of products above, we add twice. */ + zk += 2 * X[lim] * Y[lim]; + + for (i = k - p2, j = p2; i < j; i++, j--) + zk += (X[i] + X[j]) * (Y[i] + Y[j]); + + zk -= diag[k - 1]; u = (zk + CUTTER) - CUTTER; if (u > zk) @@ -687,11 +710,32 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p) zk = u * RADIXI; } - /* The real deal. */ + /* The real deal. Mantissa digit Z[k] is the sum of all X[i] * Y[j] where i + goes from 1 -> k - 1 and j goes the same range in reverse. To reduce the + number of multiplications, we halve the range and if k is an even number, + add the diagonal element X[k/2]Y[k/2]. Through the half range, we compute + X[i] * Y[j] as (X[i] + X[j]) * (Y[i] + Y[j]) - X[i] * Y[i] - X[j] * Y[j]. + + This reduction tells us that we're summing two things, the first term + through the half range and the negative of the sum of the product of all + terms of X and Y in the full range. i.e. + + SUM(X[i] * Y[i]) for k terms. This is precalculated above for each k in + a single loop so that it completes in O(n) time and can hence be directly + used in the loop below. */ while (k > 1) { - for (i = 1, j = k - 1; i < k; i++, j--) - zk += X[i] * Y[j]; + long lim = k / 2; + + if (k % 2 == 0) + /* We want to add this only once, but since we subtract it in the sum + of products above, we add twice. */ + zk += 2 * X[lim] * Y[lim]; + + for (i = 1, j = k - 1; i < j; i++, j--) + zk += (X[i] + X[j]) * (Y[i] + Y[j]); + + zk -= diag[k - 1]; u = (zk + CUTTER) - CUTTER; if (u > zk) |