From 5062680c602c27c9128ae2e38d199df22a8c2d38 Mon Sep 17 00:00:00 2001
From: Michael Collison <michael.collison@arm.com>
Date: Mon, 23 Oct 2017 10:28:01 +0100
Subject: aarch64: Implement math acceleration via builtins

This patch converts asm statements into builtins for AArch64.  As an
example for the file sysdeps/aarch64/fpu/s_ceil.c, we convert the
function from

double
__ceil (double x)
{
  double result;
  asm ("frintp\t%d0, %d1" :
       "=w" (result) : "w" (x) );
  return result;
}

into

double
__ceil (double x)
{
  return __builtin_ceil (x);
}

Tested on aarch64-linux-gnu with gcc-4.9.4 and gcc-6.

	* sysdeps/aarch64/fpu/e_sqrt.c (ieee754_sqrt): Replace asm statements
	with __builtin_sqrt.
	* sysdeps/aarch64/fpu/e_sqrtf.c (ieee754_sqrtf): Replace asm statements
	with __builtin_sqrtf.
	* sysdeps/aarch64/fpu/s_ceil.c (__ceil): Replace asm statements
	with __builtin_ceil.
	* sysdeps/aarch64/fpu/s_ceilf.c (__ceilf): Replace asm statements
	with __builtin_ceilf.
	* sysdeps/aarch64/fpu/s_floor.c (__floor): Replace asm statements
	with __builtin_floor.
	* sysdeps/aarch64/fpu/s_floorf.c (__floorf): Replace asm statements
	with __builtin_floorf.
	* sysdeps/aarch64/fpu/s_fma.c (__fma): Replace asm statements
	with __builtin_fma.
	* sysdeps/aarch64/fpu/s_fmaf.c (__fmaf): Replace asm statements
	with __builtin_fmaf.
	* sysdeps/aarch64/fpu/s_fmax.c (__fmax): Replace asm statements
	with __builtin_fmax.
	* sysdeps/aarch64/fpu/s_fmaxf.c (__fmaxf): Replace asm statements
	with __builtin_fmaxf.
	* sysdeps/aarch64/fpu/s_fmin.c (__fmin): Replace asm statements
	with __builtin_fmin.
	* sysdeps/aarch64/fpu/s_fminf.c (__fminf): Replace asm statements
	with __builtin_fminf.
	* sysdeps/aarch64/fpu/s_frint.c: Delete file.
	* sysdeps/aarch64/fpu/s_frintf.c: Delete file.
	* sysdeps/aarch64/fpu/s_llrint.c (__llrint): Replace asm statements
	with builtin_rint and conversion to int.
	* sysdeps/aarch64/fpu/s_llrintf.c (__llrintf): Likewise.
	* sysdeps/aarch64/fpu/s_llround.c (__llround): Replace asm statements
	with builtin_llround.
	* sysdeps/aarch64/fpu/s_llroundf.c (__llroundf): Likewise.
	* sysdeps/aarch64/fpu/s_lrint.c (__lrint): Replace asm statements
	with builtin_rint and conversion to long int.
	* sysdeps/aarch64/fpu/s_lrintf.c (__lrintf): Likewise.
	* sysdeps/aarch64/fpu/s_lround.c (__lround): Replace asm statements
	with builtin_lround.
	* sysdeps/aarch64/fpu/s_lroundf.c (__lroundf): Replace asm statements
	with builtin_lroundf.
	* sysdeps/aarch64/fpu/s_nearbyint.c (__nearbyint): Replace asm
	statements with __builtin_nearbyint.
	* sysdeps/aarch64/fpu/s_nearbyintf.c (__nearbyintf): Replace asm
	statements with __builtin_nearbyintf.
	* sysdeps/aarch64/fpu/s_rint.c (__rint): Replace asm statements
	with __builtin_rint.
	* sysdeps/aarch64/fpu/s_rintf.c (__rintf): Replace asm statements
	with __builtin_rintf.
	* sysdeps/aarch64/fpu/s_round.c (__round): Replace asm statements
	with __builtin_round.
	* sysdeps/aarch64/fpu/s_roundf.c (__roundf): Replace asm statements
	with __builtin_roundf.
	* sysdeps/aarch64/fpu/s_trunc.c (__trunc): Replace asm statements
	with __builtin_trunc.
	* sysdeps/aarch64/fpu/s_truncf.c (__truncf): Replace asm statements
	with __builtin_truncf.
	* sysdeps/aarch64/fpu/Makefile: Build e_sqrt[f].c with -fno-math-errno.
---
 sysdeps/aarch64/fpu/s_lrint.c | 46 +++++++++++++------------------------------
 1 file changed, 14 insertions(+), 32 deletions(-)

(limited to 'sysdeps/aarch64/fpu/s_lrint.c')

diff --git a/sysdeps/aarch64/fpu/s_lrint.c b/sysdeps/aarch64/fpu/s_lrint.c
index 6ef64e22bf..ee645ad29e 100644
--- a/sysdeps/aarch64/fpu/s_lrint.c
+++ b/sysdeps/aarch64/fpu/s_lrint.c
@@ -19,38 +19,17 @@
 #include <math.h>
 #include <get-rounding-mode.h>
 #include <stdint.h>
+#include <math_private.h>
 
-#ifndef FUNC
-# define FUNC lrint
-#endif
-
-#ifndef ITYPE
-# define ITYPE double
 # define IREG_SIZE 64
-#else
-# ifndef IREG_SIZE
-#  error IREG_SIZE not defined
-# endif
-#endif
 
-#ifndef OTYPE
-# define OTYPE long int
 # ifdef __ILP32__
 #  define OREG_SIZE 32
 # else
 #  define OREG_SIZE 64
 # endif
-#else
-# ifndef OREG_SIZE
-#  error OREG_SIZE not defined
-# endif
-#endif
 
-#if IREG_SIZE == 32
-# define IREGS "s"
-#else
 # define IREGS "d"
-#endif
 
 #if OREG_SIZE == 32
 # define OREGS "w"
@@ -58,15 +37,14 @@
 # define OREGS "x"
 #endif
 
-#define __CONCATX(a,b) __CONCAT(a,b)
 
-OTYPE
-__CONCATX(__,FUNC) (ITYPE x)
+long int
+__lrint (double x)
 {
-  OTYPE result;
-  ITYPE temp;
 
 #if IREG_SIZE == 64 && OREG_SIZE == 32
+  long int result;
+
   if (__builtin_fabs (x) > INT32_MAX)
     {
       /* Converting large values to a 32 bit int may cause the frintx/fcvtza
@@ -96,10 +74,14 @@ __CONCATX(__,FUNC) (ITYPE x)
       return result;
     }
 #endif
-  asm ( "frintx" "\t%" IREGS "1, %" IREGS "2\n\t"
-        "fcvtzs" "\t%" OREGS "0, %" IREGS "1"
-        : "=r" (result), "=w" (temp) : "w" (x) );
-  return result;
+
+  double r =  __builtin_rint (x);
+
+  /* Prevent gcc from calling lrint directly when compiled with
+     -fno-math-errno by inserting a barrier.  */
+
+  math_opt_barrier (r);
+  return r;
 }
 
-weak_alias (__CONCATX(__,FUNC), FUNC)
+weak_alias (__lrint, lrint)
-- 
cgit 1.4.1