about summary refs log tree commit diff
path: root/sysdeps/ieee754
diff options
context:
space:
mode:
authorWilco Dijkstra <wdijkstr@arm.com>2018-08-15 16:01:21 +0100
committerWilco Dijkstra <wdijkstr@arm.com>2018-08-15 16:01:21 +0100
commit126c4e3f804881f9fbc4eb71787f41793d2d7be5 (patch)
treea8925aeea4de10fc9fdf788cabc89118e06cd892 /sysdeps/ieee754
parent49acec179ca9fb2da961b78f56ef5ce501bbb50b (diff)
downloadglibc-126c4e3f804881f9fbc4eb71787f41793d2d7be5.tar.gz
glibc-126c4e3f804881f9fbc4eb71787f41793d2d7be5.tar.xz
glibc-126c4e3f804881f9fbc4eb71787f41793d2d7be5.zip
Use generic sinf/cosf in lgammaf_r
The internal functions __kernel_sinf and __kernel_cosf are used only by
lgammaf_r.  Removing the internal functions and using the generic sinf
and cosf is better overall.  Benchmarking on Cortex-A72 shows the generic
sinf and cosf are 1.4x and 2.3x faster in the range |x| < PI/4, and 0.66x
and 1.1x for |x| < PI/2, so it should make lgammaf_r faster on average.

GLIBC regression tests pass on AArch64.

	* sysdeps/ieee754/flt-32/e_lgammaf_r.c (sin_pif): Use __sinf/__cosf.
	* sysdeps/ieee754/flt-32/k_cosf.c (__kernel_cosf): Remove all code.
	* sysdeps/ieee754/flt-32/k_sinf.c (__kernel_sinf): Likewise.
Diffstat (limited to 'sysdeps/ieee754')
-rw-r--r--sysdeps/ieee754/flt-32/e_lgammaf_r.c12
-rw-r--r--sysdeps/ieee754/flt-32/k_cosf.c56
-rw-r--r--sysdeps/ieee754/flt-32/k_sinf.c52
3 files changed, 8 insertions, 112 deletions
diff --git a/sysdeps/ieee754/flt-32/e_lgammaf_r.c b/sysdeps/ieee754/flt-32/e_lgammaf_r.c
index 8fdf9bb8bc..ebe2d3462f 100644
--- a/sysdeps/ieee754/flt-32/e_lgammaf_r.c
+++ b/sysdeps/ieee754/flt-32/e_lgammaf_r.c
@@ -97,7 +97,7 @@ sin_pif(float x)
 	GET_FLOAT_WORD(ix,x);
 	ix &= 0x7fffffff;
 
-	if(ix<0x3e800000) return __kernel_sinf(pi*x,zero,0);
+	if(ix<0x3e800000) return __sinf (pi*x);
 	y = -x;		/* x is assume negative */
 
     /*
@@ -121,14 +121,14 @@ sin_pif(float x)
 	    }
 	}
 	switch (n) {
-	    case 0:   y =  __kernel_sinf(pi*y,zero,0); break;
+	    case 0:   y =  __sinf (pi*y); break;
 	    case 1:
-	    case 2:   y =  __kernel_cosf(pi*((float)0.5-y),zero); break;
+	    case 2:   y =  __cosf (pi*((float)0.5-y)); break;
 	    case 3:
-	    case 4:   y =  __kernel_sinf(pi*(one-y),zero,0); break;
+	    case 4:   y =  __sinf (pi*(one-y)); break;
 	    case 5:
-	    case 6:   y = -__kernel_cosf(pi*(y-(float)1.5),zero); break;
-	    default:  y =  __kernel_sinf(pi*(y-(float)2.0),zero,0); break;
+	    case 6:   y = -__cosf (pi*(y-(float)1.5)); break;
+	    default:  y =  __sinf (pi*(y-(float)2.0)); break;
 	    }
 	return -y;
 }
diff --git a/sysdeps/ieee754/flt-32/k_cosf.c b/sysdeps/ieee754/flt-32/k_cosf.c
index 63ca822f8c..1cc8931700 100644
--- a/sysdeps/ieee754/flt-32/k_cosf.c
+++ b/sysdeps/ieee754/flt-32/k_cosf.c
@@ -1,55 +1 @@
-/* k_cosf.c -- float version of k_cos.c
- * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
- */
-
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-
-#if defined(LIBM_SCCS) && !defined(lint)
-static char rcsid[] = "$NetBSD: k_cosf.c,v 1.4 1995/05/10 20:46:23 jtc Exp $";
-#endif
-
-#include <math.h>
-#include <math_private.h>
-
-static const float
-one =  1.0000000000e+00, /* 0x3f800000 */
-C1  =  4.1666667908e-02, /* 0x3d2aaaab */
-C2  = -1.3888889225e-03, /* 0xbab60b61 */
-C3  =  2.4801587642e-05, /* 0x37d00d01 */
-C4  = -2.7557314297e-07, /* 0xb493f27c */
-C5  =  2.0875723372e-09, /* 0x310f74f6 */
-C6  = -1.1359647598e-11; /* 0xad47d74e */
-
-float __kernel_cosf(float x, float y)
-{
-	float a,hz,z,r,qx;
-	int32_t ix;
-	GET_FLOAT_WORD(ix,x);
-	ix &= 0x7fffffff;			/* ix = |x|'s high word*/
-	if(ix<0x32000000) {			/* if x < 2**27 */
-	    if(((int)x)==0) return one;		/* generate inexact */
-	}
-	z  = x*x;
-	r  = z*(C1+z*(C2+z*(C3+z*(C4+z*(C5+z*C6)))));
-	if(ix < 0x3e99999a) 			/* if |x| < 0.3 */
-	    return one - ((float)0.5*z - (z*r - x*y));
-	else {
-	    if(ix > 0x3f480000) {		/* x > 0.78125 */
-		qx = (float)0.28125;
-	    } else {
-	        SET_FLOAT_WORD(qx,ix-0x01000000);	/* x/4 */
-	    }
-	    hz = (float)0.5*z-qx;
-	    a  = one-qx;
-	    return a - (hz - (z*r-x*y));
-	}
-}
+/* Not needed.  */
diff --git a/sysdeps/ieee754/flt-32/k_sinf.c b/sysdeps/ieee754/flt-32/k_sinf.c
index dcf3c35358..1cc8931700 100644
--- a/sysdeps/ieee754/flt-32/k_sinf.c
+++ b/sysdeps/ieee754/flt-32/k_sinf.c
@@ -1,51 +1 @@
-/* k_sinf.c -- float version of k_sin.c
- * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
- */
-
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-
-#if defined(LIBM_SCCS) && !defined(lint)
-static char rcsid[] = "$NetBSD: k_sinf.c,v 1.4 1995/05/10 20:46:33 jtc Exp $";
-#endif
-
-#include <float.h>
-#include <math.h>
-#include <math_private.h>
-#include <math-underflow.h>
-
-static const float
-half =  5.0000000000e-01,/* 0x3f000000 */
-S1  = -1.6666667163e-01, /* 0xbe2aaaab */
-S2  =  8.3333337680e-03, /* 0x3c088889 */
-S3  = -1.9841270114e-04, /* 0xb9500d01 */
-S4  =  2.7557314297e-06, /* 0x3638ef1b */
-S5  = -2.5050759689e-08, /* 0xb2d72f34 */
-S6  =  1.5896910177e-10; /* 0x2f2ec9d3 */
-
-float __kernel_sinf(float x, float y, int iy)
-{
-	float z,r,v;
-	int32_t ix;
-	GET_FLOAT_WORD(ix,x);
-	ix &= 0x7fffffff;			/* high word of x */
-	if(ix<0x32000000)			/* |x| < 2**-27 */
-	  {
-	    math_check_force_underflow (x);
-	    if ((int) x == 0)
-	      return x;		/* generate inexact */
-	  }
-	z	=  x*x;
-	v	=  z*x;
-	r	=  S2+z*(S3+z*(S4+z*(S5+z*S6)));
-	if(iy==0) return x+v*(S1+z*r);
-	else      return x-((z*(half*y-v*r)-y)-v*S1);
-}
+/* Not needed.  */