Better exp polynomial

The lesser the __mul calls, the better it is for performance.
author: Siddhesh Poyarekar <siddhesh@redhat.com> 2013-02-13 14:49:50 +0530
committer: Siddhesh Poyarekar <siddhesh@redhat.com> 2013-02-13 14:49:50 +0530
commit: 4e92d59e26eb33c3ae523783e918a6e839e83ffd (patch)
tree: 1d10f92b166a89352df8701c6775ceb3b1795e44 /sysdeps/ieee754/dbl-64/mpexp.c
parent: 909279a5cfa938c989c9b01c8f48a0276291ec45 (diff)
download: glibc-4e92d59e26eb33c3ae523783e918a6e839e83ffd.tar.gz
glibc-4e92d59e26eb33c3ae523783e918a6e839e83ffd.tar.xz
glibc-4e92d59e26eb33c3ae523783e918a6e839e83ffd.zip
1 files changed, 37 insertions, 23 deletions
diff --git a/sysdeps/ieee754/dbl-64/mpexp.c b/sysdeps/ieee754/dbl-64/mpexp.c
index 8d288ff9a1..35c4e80b83 100644
--- a/sysdeps/ieee754/dbl-64/mpexp.c
+++ b/sysdeps/ieee754/dbl-64/mpexp.c
@@ -49,6 +49,15 @@ __mpexp (mp_no *x, mp_no *y, int p)
       0, 0, 0, 0, 3, 3, 4, 4, 5, 4, 4, 5, 5, 5, 6, 6, 6, 6, 6, 6,
       6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 8
     };
+
+  /* Factorials for the values of np above.  */
+  static const double nfa[33] =
+    {
+      1.0, 1.0, 1.0, 1.0, 6.0, 6.0, 24.0, 24.0, 120.0, 24.0, 24.0,
+      120.0, 120.0, 120.0, 720.0, 720.0, 720.0, 720.0, 720.0, 720.0,
+      720.0, 720.0, 720.0, 720.0, 5040.0, 5040.0, 5040.0, 5040.0,
+      40320.0, 40320.0, 40320.0, 40320.0, 40320.0
+    };
   static const int m1p[33] =
     {
       0, 0, 0, 0,
@@ -71,16 +80,7 @@ __mpexp (mp_no *x, mp_no *y, int p)
       {0, 0, 0, 0, 0, 0, 0, 0, 27, 0, 0, 39, 43, 47, 51, 55, 59, 63},
       {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 43, 47, 50, 54}
     };
-  mp_no mpk =
-    {
-      0,
-      {
-	0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
-	0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
-	0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
-      }
-    };
-  mp_no mps, mpak, mpt1, mpt2;
+  mp_no mps, mpk, mpt1, mpt2;
 
   /* Choose m,n and compute a=2**(-m).  */
   n = np[p];
@@ -115,24 +115,38 @@ __mpexp (mp_no *x, mp_no *y, int p)
 	  break;
     }
 
-  /* Compute s=x*2**(-m). Put result in mps.  */
+  /* Compute s=x*2**(-m). Put result in mps.  This is the range-reduced input
+     that we will use to compute e^s.  For the final result, simply raise it
+     to 2^m.  */
   __pow_mp (-m, &mpt1, p);
   __mul (x, &mpt1, &mps, p);
 
-  /* Evaluate the polynomial. Put result in mpt2.  */
-  mpk.e = 1;
-  mpk.d[0] = ONE;
-  mpk.d[1] = n;
-  __dvd (&mps, &mpk, &mpt1, p);
-  __add (&mpone, &mpt1, &mpak, p);
-  for (k = n - 1; k > 1; k--)
+  /* Compute the Taylor series for e^s:
+
+         1 + x/1! + x^2/2! + x^3/3! ...
+
+     for N iterations.  We compute this as:
+
+         e^x = 1 + (x * n!/1! + x^2 * n!/2! + x^3 * n!/3!) / n!
+             = 1 + (x * (n!/1! + x * (n!/2! + x * (n!/3! + x ...)))) / n!
+
+     n! is pre-computed and saved while k! is computed on the fly.  */
+  __cpy (&mps, &mpt2, p);
+
+  double kf = 1.0;
+
+  /* Evaluate the rest.  The result will be in mpt2.  */
+  for (k = n - 1; k > 0; k--)
     {
-      __mul (&mps, &mpak, &mpt1, p);
-      mpk.d[1] = k;
-      __dvd (&mpt1, &mpk, &mpt2, p);
-      __add (&mpone, &mpt2, &mpak, p);
+      /* n! / k! = n * (n - 1) ... * (n - k + 1) */
+      kf *= k + 1;
+
+      __dbl_mp (kf, &mpk, p);
+      __add (&mpt2, &mpk, &mpt1, p);
+      __mul (&mps, &mpt1, &mpt2, p);
     }
-  __mul (&mps, &mpak, &mpt1, p);
+  __dbl_mp (nfa[p], &mpk, p);
+  __dvd (&mpt2, &mpk, &mpt1, p);
   __add (&mpone, &mpt1, &mpt2, p);
 
   /* Raise polynomial value to the power of 2**m. Put result in y.  */
author	Siddhesh Poyarekar <siddhesh@redhat.com>	2013-02-13 14:49:50 +0530
committer	Siddhesh Poyarekar <siddhesh@redhat.com>	2013-02-13 14:49:50 +0530
commit	4e92d59e26eb33c3ae523783e918a6e839e83ffd (patch)
tree	1d10f92b166a89352df8701c6775ceb3b1795e44 /sysdeps/ieee754/dbl-64/mpexp.c
parent	909279a5cfa938c989c9b01c8f48a0276291ec45 (diff)
download	glibc-4e92d59e26eb33c3ae523783e918a6e839e83ffd.tar.gz glibc-4e92d59e26eb33c3ae523783e918a6e839e83ffd.tar.xz glibc-4e92d59e26eb33c3ae523783e918a6e839e83ffd.zip