about summary refs log tree commit diff
path: root/sysdeps/aarch64/fpu/v_expf_inline.h
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/aarch64/fpu/v_expf_inline.h')
-rw-r--r--sysdeps/aarch64/fpu/v_expf_inline.h71
1 files changed, 71 insertions, 0 deletions
diff --git a/sysdeps/aarch64/fpu/v_expf_inline.h b/sysdeps/aarch64/fpu/v_expf_inline.h
new file mode 100644
index 0000000000..a3b0e32f9e
--- /dev/null
+++ b/sysdeps/aarch64/fpu/v_expf_inline.h
@@ -0,0 +1,71 @@
+/* Helper for single-precision AdvSIMD routines which depend on exp
+
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef AARCH64_FPU_V_EXPF_INLINE_H
+#define AARCH64_FPU_V_EXPF_INLINE_H
+
+#include "v_math.h"
+
+struct v_expf_data
+{
+  float32x4_t poly[5];
+  float32x4_t shift, invln2_and_ln2;
+};
+
+/* maxerr: 1.45358 +0.5 ulp.  */
+#define V_EXPF_DATA                                                           \
+  {                                                                           \
+    .poly = { V4 (0x1.0e4020p-7f), V4 (0x1.573e2ep-5f), V4 (0x1.555e66p-3f),  \
+	      V4 (0x1.fffdb6p-2f), V4 (0x1.ffffecp-1f) },                     \
+    .shift = V4 (0x1.8p23f),                                                  \
+    .invln2_and_ln2 = { 0x1.715476p+0f, 0x1.62e4p-1f, 0x1.7f7d1cp-20f, 0 },   \
+  }
+
+#define ExponentBias v_u32 (0x3f800000) /* asuint(1.0f).  */
+#define C(i) d->poly[i]
+
+static inline float32x4_t
+v_expf_inline (float32x4_t x, const struct v_expf_data *d)
+{
+  /* Helper routine for calculating exp(x).
+     Copied from v_expf.c, with all special-case handling removed - the
+     calling routine should handle special values if required.  */
+
+  /* exp(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)]
+     x = ln2*n + r, with r in [-ln2/2, ln2/2].  */
+  float32x4_t n, r, z;
+  z = vfmaq_laneq_f32 (d->shift, x, d->invln2_and_ln2, 0);
+  n = vsubq_f32 (z, d->shift);
+  r = vfmsq_laneq_f32 (x, n, d->invln2_and_ln2, 1);
+  r = vfmsq_laneq_f32 (r, n, d->invln2_and_ln2, 2);
+  uint32x4_t e = vshlq_n_u32 (vreinterpretq_u32_f32 (z), 23);
+  float32x4_t scale = vreinterpretq_f32_u32 (vaddq_u32 (e, ExponentBias));
+
+  /* Custom order-4 Estrin avoids building high order monomial.  */
+  float32x4_t r2 = vmulq_f32 (r, r);
+  float32x4_t p, q, poly;
+  p = vfmaq_f32 (C (1), C (0), r);
+  q = vfmaq_f32 (C (3), C (2), r);
+  q = vfmaq_f32 (q, p, r2);
+  p = vmulq_f32 (C (4), r);
+  poly = vfmaq_f32 (p, q, r2);
+  return vfmaq_f32 (scale, poly, scale);
+}
+
+#endif