From 43cfdf8f4811c63bca8a2185d82a6d92977a125b Mon Sep 17 00:00:00 2001
From: Szabolcs Nagy <szabolcs.nagy@arm.com>
Date: Wed, 4 Jul 2018 12:29:29 +0100
Subject: Clean up converttoint handling and document the semantics

This patch currently only affects aarch64.

The roundtoint and converttoint internal functions are only called with small
values, so 32 bit result is enough for converttoint and it is a signed int
conversion so the return type is changed to int32_t.

The original idea was to help the compiler keeping the result in uint64_t,
then it's clear that no sign extension is needed and there is no accidental
undefined or implementation defined signed int arithmetics.

But it turns out gcc does a good job with inlining so changing the type has
no overhead and the semantics of the conversion is less surprising this way.
Since we want to allow the asuint64 (x + 0x1.8p52) style conversion, the top
bits were never usable and the existing code ensures that only the bottom
32 bits of the conversion result are used.

On aarch64 the neon intrinsics (which round ties to even) are changed to
round and lround (which round ties away from zero) this does not affect the
results in a significant way, but more portable (relies on round and lround
being inlined which works with -fno-math-errno).

The TOINT_SHIFT and TOINT_RINT macros were removed, only keep separate code
paths for TOINT_INTRINSICS and !TOINT_INTRINSICS.

	* sysdeps/aarch64/fpu/math_private.h (roundtoint): Use round.
	(converttoint): Use lround.
	* sysdeps/ieee754/flt-32/math_config.h (roundtoint): Declare and
	document the semantics when TOINT_INTRINSICS is set.
	(converttoint): Likewise.
	(TOINT_RINT): Remove.
	(TOINT_SHIFT): Remove.
	* sysdeps/ieee754/flt-32/e_expf.c (__expf): Remove the TOINT_RINT code
	path.
---
 sysdeps/aarch64/fpu/math_private.h | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

(limited to 'sysdeps/aarch64/fpu')

diff --git a/sysdeps/aarch64/fpu/math_private.h b/sysdeps/aarch64/fpu/math_private.h
index fcd02c0654..d2e0abc0b2 100644
--- a/sysdeps/aarch64/fpu/math_private.h
+++ b/sysdeps/aarch64/fpu/math_private.h
@@ -21,6 +21,8 @@
 
 #include <fenv.h>
 #include <fpu_control.h>
+#include <stdint.h>
+#include <math.h>
 
 static __always_inline void
 libc_feholdexcept_aarch64 (fenv_t *envp)
@@ -298,25 +300,20 @@ libc_feresetround_noex_aarch64_ctx (struct rm_ctx *ctx)
 #define libc_feresetround_noexf_ctx	libc_feresetround_noex_aarch64_ctx
 #define libc_feresetround_noexl_ctx	libc_feresetround_noex_aarch64_ctx
 
-/* Hack: only include the large arm_neon.h when needed.  */
-#ifdef _MATH_CONFIG_H
-# include <arm_neon.h>
-
-/* ACLE intrinsics for frintn and fcvtns instructions.  */
-# define TOINT_INTRINSICS 1
+/* Use inline round and lround instructions.  */
+#define TOINT_INTRINSICS 1
 
 static inline double_t
 roundtoint (double_t x)
 {
-  return vget_lane_f64 (vrndn_f64 (vld1_f64 (&x)), 0);
+  return round (x);
 }
 
-static inline uint64_t
+static inline int32_t
 converttoint (double_t x)
 {
-  return vcvtnd_s64_f64 (x);
+  return lround (x);
 }
-#endif
 
 #include_next <math_private.h>
 
-- 
cgit 1.4.1