diff options
author | Joe Ramsay <Joe.Ramsay@arm.com> | 2024-02-20 16:44:13 +0000 |
---|---|---|
committer | Wilco Dijkstra <wilco.dijkstra@arm.com> | 2024-04-09 19:58:04 +0100 |
commit | b0e0a07018098c2c5927796be5681a298c312626 (patch) | |
tree | b63fa0f5c87aeb3336c9ad7c00b0546d12e95b2e /sysdeps/aarch64/fpu/exp2_advsimd.c | |
parent | 31c7d69af59da0da80caa74b2ec6ae149013384d (diff) | |
download | glibc-b0e0a07018098c2c5927796be5681a298c312626.tar.gz glibc-b0e0a07018098c2c5927796be5681a298c312626.tar.xz glibc-b0e0a07018098c2c5927796be5681a298c312626.zip |
aarch64/fpu: Sync libmvec routines from 2.39 and before with AOR
This includes a fix for big-endian in AdvSIMD log, some cosmetic changes, and numerous small optimisations mainly around inlining and using indexed variants of MLA intrinsics. Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org> (cherry picked from commit e302e1021391d13a9611ba3a910df128830bd19e)
Diffstat (limited to 'sysdeps/aarch64/fpu/exp2_advsimd.c')
-rw-r--r-- | sysdeps/aarch64/fpu/exp2_advsimd.c | 20 |
1 files changed, 11 insertions, 9 deletions
diff --git a/sysdeps/aarch64/fpu/exp2_advsimd.c b/sysdeps/aarch64/fpu/exp2_advsimd.c index 391a93180c..ae1e63d503 100644 --- a/sysdeps/aarch64/fpu/exp2_advsimd.c +++ b/sysdeps/aarch64/fpu/exp2_advsimd.c @@ -24,6 +24,7 @@ #define IndexMask (N - 1) #define BigBound 1022.0 #define UOFlowBound 1280.0 +#define TinyBound 0x2000000000000000 /* asuint64(0x1p-511). */ static const struct data { @@ -48,14 +49,13 @@ lookup_sbits (uint64x2_t i) #if WANT_SIMD_EXCEPT -# define TinyBound 0x2000000000000000 /* asuint64(0x1p-511). */ # define Thres 0x2080000000000000 /* asuint64(512.0) - TinyBound. */ /* Call scalar exp2 as a fallback. */ static float64x2_t VPCS_ATTR NOINLINE -special_case (float64x2_t x) +special_case (float64x2_t x, float64x2_t y, uint64x2_t is_special) { - return v_call_f64 (exp2, x, x, v_u64 (0xffffffffffffffff)); + return v_call_f64 (exp2, x, y, is_special); } #else @@ -65,7 +65,7 @@ special_case (float64x2_t x) # define SpecialBias1 0x7000000000000000 /* 0x1p769. */ # define SpecialBias2 0x3010000000000000 /* 0x1p-254. */ -static float64x2_t VPCS_ATTR +static inline float64x2_t VPCS_ATTR special_case (float64x2_t s, float64x2_t y, float64x2_t n, const struct data *d) { @@ -94,10 +94,10 @@ float64x2_t V_NAME_D1 (exp2) (float64x2_t x) #if WANT_SIMD_EXCEPT uint64x2_t ia = vreinterpretq_u64_f64 (vabsq_f64 (x)); cmp = vcgeq_u64 (vsubq_u64 (ia, v_u64 (TinyBound)), v_u64 (Thres)); - /* If any special case (inf, nan, small and large x) is detected, - fall back to scalar for all lanes. */ - if (__glibc_unlikely (v_any_u64 (cmp))) - return special_case (x); + /* Mask special lanes and retain a copy of x for passing to special-case + handler. */ + float64x2_t xc = x; + x = v_zerofy_f64 (x, cmp); #else cmp = vcagtq_f64 (x, d->scale_big_bound); #endif @@ -120,9 +120,11 @@ float64x2_t V_NAME_D1 (exp2) (float64x2_t x) float64x2_t y = v_pairwise_poly_3_f64 (r, r2, d->poly); y = vmulq_f64 (r, y); -#if !WANT_SIMD_EXCEPT if (__glibc_unlikely (v_any_u64 (cmp))) +#if !WANT_SIMD_EXCEPT return special_case (s, y, n, d); +#else + return special_case (xc, vfmaq_f64 (s, s, y), cmp); #endif return vfmaq_f64 (s, s, y); } |