aarch64/fpu: Sync libmvec routines from 2.39 and before with AOR

This includes a fix for big-endian in AdvSIMD log, some cosmetic changes, and numerous small optimisations mainly around inlining and using indexed variants of MLA intrinsics. Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
author: Joe Ramsay <Joe.Ramsay@arm.com> 2024-02-20 16:44:13 +0000
committer: Adhemerval Zanella <adhemerval.zanella@linaro.org> 2024-02-26 09:45:50 -0300
commit: e302e1021391d13a9611ba3a910df128830bd19e (patch)
tree: 6b672bb0065aa687d1e5e35c6c7fde07c5345961 /sysdeps/aarch64/fpu/expm1_advsimd.c
parent: 02782fd12849b6673cb5c2728cb750e8ec295aa3 (diff)
download: glibc-e302e1021391d13a9611ba3a910df128830bd19e.tar.gz
glibc-e302e1021391d13a9611ba3a910df128830bd19e.tar.xz
glibc-e302e1021391d13a9611ba3a910df128830bd19e.zip
1 files changed, 5 insertions, 6 deletions
diff --git a/sysdeps/aarch64/fpu/expm1_advsimd.c b/sysdeps/aarch64/fpu/expm1_advsimd.c
index 0b85bd06f3..3628398674 100644
--- a/sysdeps/aarch64/fpu/expm1_advsimd.c
+++ b/sysdeps/aarch64/fpu/expm1_advsimd.c
@@ -23,7 +23,7 @@
 static const struct data
 {
   float64x2_t poly[11];
-  float64x2_t invln2, ln2_lo, ln2_hi, shift;
+  float64x2_t invln2, ln2, shift;
   int64x2_t exponent_bias;
 #if WANT_SIMD_EXCEPT
   uint64x2_t thresh, tiny_bound;
@@ -38,8 +38,7 @@ static const struct data
 	    V2 (0x1.71ddf82db5bb4p-19), V2 (0x1.27e517fc0d54bp-22),
 	    V2 (0x1.af5eedae67435p-26), V2 (0x1.1f143d060a28ap-29) },
   .invln2 = V2 (0x1.71547652b82fep0),
-  .ln2_hi = V2 (0x1.62e42fefa39efp-1),
-  .ln2_lo = V2 (0x1.abc9e3b39803fp-56),
+  .ln2 = { 0x1.62e42fefa39efp-1, 0x1.abc9e3b39803fp-56 },
   .shift = V2 (0x1.8p52),
   .exponent_bias = V2 (0x3ff0000000000000),
 #if WANT_SIMD_EXCEPT
@@ -83,7 +82,7 @@ float64x2_t VPCS_ATTR V_NAME_D1 (expm1) (float64x2_t x)
     x = v_zerofy_f64 (x, special);
 #else
   /* Large input, NaNs and Infs.  */
-  uint64x2_t special = vceqzq_u64 (vcaltq_f64 (x, d->oflow_bound));
+  uint64x2_t special = vcageq_f64 (x, d->oflow_bound);
 #endif
 
   /* Reduce argument to smaller range:
@@ -93,8 +92,8 @@ float64x2_t VPCS_ATTR V_NAME_D1 (expm1) (float64x2_t x)
      where 2^i is exact because i is an integer.  */
   float64x2_t n = vsubq_f64 (vfmaq_f64 (d->shift, d->invln2, x), d->shift);
   int64x2_t i = vcvtq_s64_f64 (n);
-  float64x2_t f = vfmsq_f64 (x, n, d->ln2_hi);
-  f = vfmsq_f64 (f, n, d->ln2_lo);
+  float64x2_t f = vfmsq_laneq_f64 (x, n, d->ln2, 0);
+  f = vfmsq_laneq_f64 (f, n, d->ln2, 1);
 
   /* Approximate expm1(f) using polynomial.
      Taylor expansion for expm1(x) has the form:
author	Joe Ramsay <Joe.Ramsay@arm.com>	2024-02-20 16:44:13 +0000
committer	Adhemerval Zanella <adhemerval.zanella@linaro.org>	2024-02-26 09:45:50 -0300
commit	e302e1021391d13a9611ba3a910df128830bd19e (patch)
tree	6b672bb0065aa687d1e5e35c6c7fde07c5345961 /sysdeps/aarch64/fpu/expm1_advsimd.c
parent	02782fd12849b6673cb5c2728cb750e8ec295aa3 (diff)
download	glibc-e302e1021391d13a9611ba3a910df128830bd19e.tar.gz glibc-e302e1021391d13a9611ba3a910df128830bd19e.tar.xz glibc-e302e1021391d13a9611ba3a910df128830bd19e.zip