about summary refs log tree commit diff
path: root/sysdeps/aarch64/fpu/expm1f_advsimd.c
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/aarch64/fpu/expm1f_advsimd.c')
-rw-r--r--sysdeps/aarch64/fpu/expm1f_advsimd.c17
1 files changed, 9 insertions, 8 deletions
diff --git a/sysdeps/aarch64/fpu/expm1f_advsimd.c b/sysdeps/aarch64/fpu/expm1f_advsimd.c
index 8d4c9a2193..93db200f61 100644
--- a/sysdeps/aarch64/fpu/expm1f_advsimd.c
+++ b/sysdeps/aarch64/fpu/expm1f_advsimd.c
@@ -23,7 +23,8 @@
 static const struct data
 {
   float32x4_t poly[5];
-  float32x4_t invln2, ln2_lo, ln2_hi, shift;
+  float32x4_t invln2_and_ln2;
+  float32x4_t shift;
   int32x4_t exponent_bias;
 #if WANT_SIMD_EXCEPT
   uint32x4_t thresh;
@@ -34,9 +35,8 @@ static const struct data
   /* Generated using fpminimax with degree=5 in [-log(2)/2, log(2)/2].  */
   .poly = { V4 (0x1.fffffep-2), V4 (0x1.5554aep-3), V4 (0x1.555736p-5),
 	    V4 (0x1.12287cp-7), V4 (0x1.6b55a2p-10) },
-  .invln2 = V4 (0x1.715476p+0f),
-  .ln2_hi = V4 (0x1.62e4p-1f),
-  .ln2_lo = V4 (0x1.7f7d1cp-20f),
+  /* Stores constants: invln2, ln2_hi, ln2_lo, 0.  */
+  .invln2_and_ln2 = { 0x1.715476p+0f, 0x1.62e4p-1f, 0x1.7f7d1cp-20f, 0 },
   .shift = V4 (0x1.8p23f),
   .exponent_bias = V4 (0x3f800000),
 #if !WANT_SIMD_EXCEPT
@@ -80,7 +80,7 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (expm1) (float32x4_t x)
     x = v_zerofy_f32 (x, special);
 #else
   /* Handles very large values (+ve and -ve), +/-NaN, +/-Inf.  */
-  uint32x4_t special = vceqzq_u32 (vcaltq_f32 (x, d->oflow_bound));
+  uint32x4_t special = vcagtq_f32 (x, d->oflow_bound);
 #endif
 
   /* Reduce argument to smaller range:
@@ -88,10 +88,11 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (expm1) (float32x4_t x)
      and f = x - i * ln2, then f is in [-ln2/2, ln2/2].
      exp(x) - 1 = 2^i * (expm1(f) + 1) - 1
      where 2^i is exact because i is an integer.  */
-  float32x4_t j = vsubq_f32 (vfmaq_f32 (d->shift, d->invln2, x), d->shift);
+  float32x4_t j = vsubq_f32 (
+      vfmaq_laneq_f32 (d->shift, x, d->invln2_and_ln2, 0), d->shift);
   int32x4_t i = vcvtq_s32_f32 (j);
-  float32x4_t f = vfmsq_f32 (x, j, d->ln2_hi);
-  f = vfmsq_f32 (f, j, d->ln2_lo);
+  float32x4_t f = vfmsq_laneq_f32 (x, j, d->invln2_and_ln2, 1);
+  f = vfmsq_laneq_f32 (f, j, d->invln2_and_ln2, 2);
 
   /* Approximate expm1(f) using polynomial.
      Taylor expansion for expm1(x) has the form: