about summary refs log tree commit diff
path: root/sysdeps/aarch64/fpu/tanf_advsimd.c
diff options
context:
space:
mode:
authorJoe Ramsay <Joe.Ramsay@arm.com>2024-05-02 16:43:13 +0100
committerSzabolcs Nagy <szabolcs.nagy@arm.com>2024-05-14 13:10:33 +0100
commit90a6ca8b28bf34e361e577e526e1b0f4c39a32a5 (patch)
tree69830b0b2204a585bcca976208ae412543c19dc1 /sysdeps/aarch64/fpu/tanf_advsimd.c
parentec6ed525f1aa24fd38ea5153e88d14d92d0d2f82 (diff)
downloadglibc-90a6ca8b28bf34e361e577e526e1b0f4c39a32a5.tar.gz
glibc-90a6ca8b28bf34e361e577e526e1b0f4c39a32a5.tar.xz
glibc-90a6ca8b28bf34e361e577e526e1b0f4c39a32a5.zip
aarch64: Fix AdvSIMD libmvec routines for big-endian
Previously many routines used * to load from vector types stored
in the data table. This is emitted as ldr, which byte-swaps the
entire vector register, and causes bugs for big-endian when not
all lanes contain the same value. When a vector is to be used
this way, it has been replaced with an array and the load with an
explicit ld1 intrinsic, which byte-swaps only within lanes.

As well, many routines previously used non-standard GCC syntax
for vector operations such as indexing into vectors types with []
and assembling vectors using {}. This syntax should not be mixed
with ACLE, as the former does not respect endianness whereas the
latter does. Such examples have been replaced with, for instance,
vcombine_* and vgetq_lane* intrinsics. Helpers which only use the
GCC syntax, such as the v_call helpers, do not need changing as
they do not use intrinsics.

Reviewed-by: Szabolcs Nagy <szabolcs.nagy@arm.com>
Diffstat (limited to 'sysdeps/aarch64/fpu/tanf_advsimd.c')
-rw-r--r--sysdeps/aarch64/fpu/tanf_advsimd.c11
1 files changed, 6 insertions, 5 deletions
diff --git a/sysdeps/aarch64/fpu/tanf_advsimd.c b/sysdeps/aarch64/fpu/tanf_advsimd.c
index 5a7489390a..705586f0c0 100644
--- a/sysdeps/aarch64/fpu/tanf_advsimd.c
+++ b/sysdeps/aarch64/fpu/tanf_advsimd.c
@@ -23,7 +23,7 @@
 static const struct data
 {
   float32x4_t poly[6];
-  float32x4_t pi_consts;
+  float pi_consts[4];
   float32x4_t shift;
 #if !WANT_SIMD_EXCEPT
   float32x4_t range_val;
@@ -95,16 +95,17 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (tan) (float32x4_t x)
 #endif
 
   /* n = rint(x/(pi/2)).  */
-  float32x4_t q = vfmaq_laneq_f32 (d->shift, x, d->pi_consts, 3);
+  float32x4_t pi_consts = vld1q_f32 (d->pi_consts);
+  float32x4_t q = vfmaq_laneq_f32 (d->shift, x, pi_consts, 3);
   float32x4_t n = vsubq_f32 (q, d->shift);
   /* Determine if x lives in an interval, where |tan(x)| grows to infinity.  */
   uint32x4_t pred_alt = vtstq_u32 (vreinterpretq_u32_f32 (q), v_u32 (1));
 
   /* r = x - n * (pi/2)  (range reduction into -pi./4 .. pi/4).  */
   float32x4_t r;
-  r = vfmaq_laneq_f32 (x, n, d->pi_consts, 0);
-  r = vfmaq_laneq_f32 (r, n, d->pi_consts, 1);
-  r = vfmaq_laneq_f32 (r, n, d->pi_consts, 2);
+  r = vfmaq_laneq_f32 (x, n, pi_consts, 0);
+  r = vfmaq_laneq_f32 (r, n, pi_consts, 1);
+  r = vfmaq_laneq_f32 (r, n, pi_consts, 2);
 
   /* If x lives in an interval, where |tan(x)|
      - is finite, then use a polynomial approximation of the form