aarch64/fpu: Sync libmvec routines from 2.39 and before with AOR

This includes a fix for big-endian in AdvSIMD log, some cosmetic changes, and numerous small optimisations mainly around inlining and using indexed variants of MLA intrinsics. Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org> (cherry picked from commit e302e1021391d13a9611ba3a910df128830bd19e)
author: Joe Ramsay <Joe.Ramsay@arm.com> 2024-02-20 16:44:13 +0000
committer: Wilco Dijkstra <wilco.dijkstra@arm.com> 2024-04-09 19:58:04 +0100
commit: b0e0a07018098c2c5927796be5681a298c312626 (patch)
tree: b63fa0f5c87aeb3336c9ad7c00b0546d12e95b2e /sysdeps/aarch64/fpu/exp2_advsimd.c
parent: 31c7d69af59da0da80caa74b2ec6ae149013384d (diff)
download: glibc-b0e0a07018098c2c5927796be5681a298c312626.tar.gz
glibc-b0e0a07018098c2c5927796be5681a298c312626.tar.xz
glibc-b0e0a07018098c2c5927796be5681a298c312626.zip
1 files changed, 11 insertions, 9 deletions
diff --git a/sysdeps/aarch64/fpu/exp2_advsimd.c b/sysdeps/aarch64/fpu/exp2_advsimd.c
index 391a93180c..ae1e63d503 100644
--- a/sysdeps/aarch64/fpu/exp2_advsimd.c
+++ b/sysdeps/aarch64/fpu/exp2_advsimd.c
@@ -24,6 +24,7 @@
 #define IndexMask (N - 1)
 #define BigBound 1022.0
 #define UOFlowBound 1280.0
+#define TinyBound 0x2000000000000000 /* asuint64(0x1p-511).  */
 
 static const struct data
 {
@@ -48,14 +49,13 @@ lookup_sbits (uint64x2_t i)
 
 #if WANT_SIMD_EXCEPT
 
-# define TinyBound 0x2000000000000000 /* asuint64(0x1p-511).  */
 # define Thres 0x2080000000000000     /* asuint64(512.0) - TinyBound.  */
 
 /* Call scalar exp2 as a fallback.  */
 static float64x2_t VPCS_ATTR NOINLINE
-special_case (float64x2_t x)
+special_case (float64x2_t x, float64x2_t y, uint64x2_t is_special)
 {
-  return v_call_f64 (exp2, x, x, v_u64 (0xffffffffffffffff));
+  return v_call_f64 (exp2, x, y, is_special);
 }
 
 #else
@@ -65,7 +65,7 @@ special_case (float64x2_t x)
 # define SpecialBias1 0x7000000000000000 /* 0x1p769.  */
 # define SpecialBias2 0x3010000000000000 /* 0x1p-254.  */
 
-static float64x2_t VPCS_ATTR
+static inline float64x2_t VPCS_ATTR
 special_case (float64x2_t s, float64x2_t y, float64x2_t n,
 	      const struct data *d)
 {
@@ -94,10 +94,10 @@ float64x2_t V_NAME_D1 (exp2) (float64x2_t x)
 #if WANT_SIMD_EXCEPT
   uint64x2_t ia = vreinterpretq_u64_f64 (vabsq_f64 (x));
   cmp = vcgeq_u64 (vsubq_u64 (ia, v_u64 (TinyBound)), v_u64 (Thres));
-  /* If any special case (inf, nan, small and large x) is detected,
-     fall back to scalar for all lanes.  */
-  if (__glibc_unlikely (v_any_u64 (cmp)))
-    return special_case (x);
+  /* Mask special lanes and retain a copy of x for passing to special-case
+     handler.  */
+  float64x2_t xc = x;
+  x = v_zerofy_f64 (x, cmp);
 #else
   cmp = vcagtq_f64 (x, d->scale_big_bound);
 #endif
@@ -120,9 +120,11 @@ float64x2_t V_NAME_D1 (exp2) (float64x2_t x)
   float64x2_t y = v_pairwise_poly_3_f64 (r, r2, d->poly);
   y = vmulq_f64 (r, y);
 
-#if !WANT_SIMD_EXCEPT
   if (__glibc_unlikely (v_any_u64 (cmp)))
+#if !WANT_SIMD_EXCEPT
     return special_case (s, y, n, d);
+#else
+    return special_case (xc, vfmaq_f64 (s, s, y), cmp);
 #endif
   return vfmaq_f64 (s, s, y);
 }
author	Joe Ramsay <Joe.Ramsay@arm.com>	2024-02-20 16:44:13 +0000
committer	Wilco Dijkstra <wilco.dijkstra@arm.com>	2024-04-09 19:58:04 +0100
commit	b0e0a07018098c2c5927796be5681a298c312626 (patch)
tree	b63fa0f5c87aeb3336c9ad7c00b0546d12e95b2e /sysdeps/aarch64/fpu/exp2_advsimd.c
parent	31c7d69af59da0da80caa74b2ec6ae149013384d (diff)
download	glibc-b0e0a07018098c2c5927796be5681a298c312626.tar.gz glibc-b0e0a07018098c2c5927796be5681a298c312626.tar.xz glibc-b0e0a07018098c2c5927796be5681a298c312626.zip