diff options
author | Joe Ramsay <Joe.Ramsay@arm.com> | 2024-02-20 16:44:13 +0000 |
---|---|---|
committer | Adhemerval Zanella <adhemerval.zanella@linaro.org> | 2024-02-26 09:45:50 -0300 |
commit | e302e1021391d13a9611ba3a910df128830bd19e (patch) | |
tree | 6b672bb0065aa687d1e5e35c6c7fde07c5345961 /sysdeps/aarch64/fpu/expm1_advsimd.c | |
parent | 02782fd12849b6673cb5c2728cb750e8ec295aa3 (diff) | |
download | glibc-e302e1021391d13a9611ba3a910df128830bd19e.tar.gz glibc-e302e1021391d13a9611ba3a910df128830bd19e.tar.xz glibc-e302e1021391d13a9611ba3a910df128830bd19e.zip |
aarch64/fpu: Sync libmvec routines from 2.39 and before with AOR
This includes a fix for big-endian in AdvSIMD log, some cosmetic changes, and numerous small optimisations mainly around inlining and using indexed variants of MLA intrinsics. Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Diffstat (limited to 'sysdeps/aarch64/fpu/expm1_advsimd.c')
-rw-r--r-- | sysdeps/aarch64/fpu/expm1_advsimd.c | 11 |
1 files changed, 5 insertions, 6 deletions
diff --git a/sysdeps/aarch64/fpu/expm1_advsimd.c b/sysdeps/aarch64/fpu/expm1_advsimd.c index 0b85bd06f3..3628398674 100644 --- a/sysdeps/aarch64/fpu/expm1_advsimd.c +++ b/sysdeps/aarch64/fpu/expm1_advsimd.c @@ -23,7 +23,7 @@ static const struct data { float64x2_t poly[11]; - float64x2_t invln2, ln2_lo, ln2_hi, shift; + float64x2_t invln2, ln2, shift; int64x2_t exponent_bias; #if WANT_SIMD_EXCEPT uint64x2_t thresh, tiny_bound; @@ -38,8 +38,7 @@ static const struct data V2 (0x1.71ddf82db5bb4p-19), V2 (0x1.27e517fc0d54bp-22), V2 (0x1.af5eedae67435p-26), V2 (0x1.1f143d060a28ap-29) }, .invln2 = V2 (0x1.71547652b82fep0), - .ln2_hi = V2 (0x1.62e42fefa39efp-1), - .ln2_lo = V2 (0x1.abc9e3b39803fp-56), + .ln2 = { 0x1.62e42fefa39efp-1, 0x1.abc9e3b39803fp-56 }, .shift = V2 (0x1.8p52), .exponent_bias = V2 (0x3ff0000000000000), #if WANT_SIMD_EXCEPT @@ -83,7 +82,7 @@ float64x2_t VPCS_ATTR V_NAME_D1 (expm1) (float64x2_t x) x = v_zerofy_f64 (x, special); #else /* Large input, NaNs and Infs. */ - uint64x2_t special = vceqzq_u64 (vcaltq_f64 (x, d->oflow_bound)); + uint64x2_t special = vcageq_f64 (x, d->oflow_bound); #endif /* Reduce argument to smaller range: @@ -93,8 +92,8 @@ float64x2_t VPCS_ATTR V_NAME_D1 (expm1) (float64x2_t x) where 2^i is exact because i is an integer. */ float64x2_t n = vsubq_f64 (vfmaq_f64 (d->shift, d->invln2, x), d->shift); int64x2_t i = vcvtq_s64_f64 (n); - float64x2_t f = vfmsq_f64 (x, n, d->ln2_hi); - f = vfmsq_f64 (f, n, d->ln2_lo); + float64x2_t f = vfmsq_laneq_f64 (x, n, d->ln2, 0); + f = vfmsq_laneq_f64 (f, n, d->ln2, 1); /* Approximate expm1(f) using polynomial. Taylor expansion for expm1(x) has the form: |