diff options
Diffstat (limited to 'sysdeps/aarch64/fpu/expf_advsimd.c')
-rw-r--r-- | sysdeps/aarch64/fpu/expf_advsimd.c | 10 |
1 files changed, 4 insertions, 6 deletions
diff --git a/sysdeps/aarch64/fpu/expf_advsimd.c b/sysdeps/aarch64/fpu/expf_advsimd.c index 99d2e647aa..5c9cb72620 100644 --- a/sysdeps/aarch64/fpu/expf_advsimd.c +++ b/sysdeps/aarch64/fpu/expf_advsimd.c @@ -22,7 +22,7 @@ static const struct data { float32x4_t poly[5]; - float32x4_t shift, inv_ln2, ln2_hi, ln2_lo; + float32x4_t inv_ln2, ln2_hi, ln2_lo; uint32x4_t exponent_bias; #if !WANT_SIMD_EXCEPT float32x4_t special_bound, scale_thresh; @@ -31,7 +31,6 @@ static const struct data /* maxerr: 1.45358 +0.5 ulp. */ .poly = { V4 (0x1.0e4020p-7f), V4 (0x1.573e2ep-5f), V4 (0x1.555e66p-3f), V4 (0x1.fffdb6p-2f), V4 (0x1.ffffecp-1f) }, - .shift = V4 (0x1.8p23f), .inv_ln2 = V4 (0x1.715476p+0f), .ln2_hi = V4 (0x1.62e4p-1f), .ln2_lo = V4 (0x1.7f7d1cp-20f), @@ -85,7 +84,7 @@ special_case (float32x4_t poly, float32x4_t n, uint32x4_t e, uint32x4_t cmp1, float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (exp) (float32x4_t x) { const struct data *d = ptr_barrier (&data); - float32x4_t n, r, r2, scale, p, q, poly, z; + float32x4_t n, r, r2, scale, p, q, poly; uint32x4_t cmp, e; #if WANT_SIMD_EXCEPT @@ -104,11 +103,10 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (exp) (float32x4_t x) /* exp(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)] x = ln2*n + r, with r in [-ln2/2, ln2/2]. */ - z = vfmaq_f32 (d->shift, x, d->inv_ln2); - n = vsubq_f32 (z, d->shift); + n = vrndaq_f32 (vmulq_f32 (x, d->inv_ln2)); r = vfmsq_f32 (x, n, d->ln2_hi); r = vfmsq_f32 (r, n, d->ln2_lo); - e = vshlq_n_u32 (vreinterpretq_u32_f32 (z), 23); + e = vshlq_n_u32 (vreinterpretq_u32_s32 (vcvtq_s32_f32 (n)), 23); scale = vreinterpretq_f32_u32 (vaddq_u32 (e, d->exponent_bias)); #if !WANT_SIMD_EXCEPT |