From 5062680c602c27c9128ae2e38d199df22a8c2d38 Mon Sep 17 00:00:00 2001 From: Michael Collison Date: Mon, 23 Oct 2017 10:28:01 +0100 Subject: aarch64: Implement math acceleration via builtins This patch converts asm statements into builtins for AArch64. As an example for the file sysdeps/aarch64/fpu/s_ceil.c, we convert the function from double __ceil (double x) { double result; asm ("frintp\t%d0, %d1" : "=w" (result) : "w" (x) ); return result; } into double __ceil (double x) { return __builtin_ceil (x); } Tested on aarch64-linux-gnu with gcc-4.9.4 and gcc-6. * sysdeps/aarch64/fpu/e_sqrt.c (ieee754_sqrt): Replace asm statements with __builtin_sqrt. * sysdeps/aarch64/fpu/e_sqrtf.c (ieee754_sqrtf): Replace asm statements with __builtin_sqrtf. * sysdeps/aarch64/fpu/s_ceil.c (__ceil): Replace asm statements with __builtin_ceil. * sysdeps/aarch64/fpu/s_ceilf.c (__ceilf): Replace asm statements with __builtin_ceilf. * sysdeps/aarch64/fpu/s_floor.c (__floor): Replace asm statements with __builtin_floor. * sysdeps/aarch64/fpu/s_floorf.c (__floorf): Replace asm statements with __builtin_floorf. * sysdeps/aarch64/fpu/s_fma.c (__fma): Replace asm statements with __builtin_fma. * sysdeps/aarch64/fpu/s_fmaf.c (__fmaf): Replace asm statements with __builtin_fmaf. * sysdeps/aarch64/fpu/s_fmax.c (__fmax): Replace asm statements with __builtin_fmax. * sysdeps/aarch64/fpu/s_fmaxf.c (__fmaxf): Replace asm statements with __builtin_fmaxf. * sysdeps/aarch64/fpu/s_fmin.c (__fmin): Replace asm statements with __builtin_fmin. * sysdeps/aarch64/fpu/s_fminf.c (__fminf): Replace asm statements with __builtin_fminf. * sysdeps/aarch64/fpu/s_frint.c: Delete file. * sysdeps/aarch64/fpu/s_frintf.c: Delete file. * sysdeps/aarch64/fpu/s_llrint.c (__llrint): Replace asm statements with builtin_rint and conversion to int. * sysdeps/aarch64/fpu/s_llrintf.c (__llrintf): Likewise. * sysdeps/aarch64/fpu/s_llround.c (__llround): Replace asm statements with builtin_llround. * sysdeps/aarch64/fpu/s_llroundf.c (__llroundf): Likewise. * sysdeps/aarch64/fpu/s_lrint.c (__lrint): Replace asm statements with builtin_rint and conversion to long int. * sysdeps/aarch64/fpu/s_lrintf.c (__lrintf): Likewise. * sysdeps/aarch64/fpu/s_lround.c (__lround): Replace asm statements with builtin_lround. * sysdeps/aarch64/fpu/s_lroundf.c (__lroundf): Replace asm statements with builtin_lroundf. * sysdeps/aarch64/fpu/s_nearbyint.c (__nearbyint): Replace asm statements with __builtin_nearbyint. * sysdeps/aarch64/fpu/s_nearbyintf.c (__nearbyintf): Replace asm statements with __builtin_nearbyintf. * sysdeps/aarch64/fpu/s_rint.c (__rint): Replace asm statements with __builtin_rint. * sysdeps/aarch64/fpu/s_rintf.c (__rintf): Replace asm statements with __builtin_rintf. * sysdeps/aarch64/fpu/s_round.c (__round): Replace asm statements with __builtin_round. * sysdeps/aarch64/fpu/s_roundf.c (__roundf): Replace asm statements with __builtin_roundf. * sysdeps/aarch64/fpu/s_trunc.c (__trunc): Replace asm statements with __builtin_trunc. * sysdeps/aarch64/fpu/s_truncf.c (__truncf): Replace asm statements with __builtin_truncf. * sysdeps/aarch64/fpu/Makefile: Build e_sqrt[f].c with -fno-math-errno. --- sysdeps/aarch64/fpu/s_lrint.c | 46 +++++++++++++------------------------------ 1 file changed, 14 insertions(+), 32 deletions(-) (limited to 'sysdeps/aarch64/fpu/s_lrint.c') diff --git a/sysdeps/aarch64/fpu/s_lrint.c b/sysdeps/aarch64/fpu/s_lrint.c index 6ef64e22bf..ee645ad29e 100644 --- a/sysdeps/aarch64/fpu/s_lrint.c +++ b/sysdeps/aarch64/fpu/s_lrint.c @@ -19,38 +19,17 @@ #include #include #include +#include -#ifndef FUNC -# define FUNC lrint -#endif - -#ifndef ITYPE -# define ITYPE double # define IREG_SIZE 64 -#else -# ifndef IREG_SIZE -# error IREG_SIZE not defined -# endif -#endif -#ifndef OTYPE -# define OTYPE long int # ifdef __ILP32__ # define OREG_SIZE 32 # else # define OREG_SIZE 64 # endif -#else -# ifndef OREG_SIZE -# error OREG_SIZE not defined -# endif -#endif -#if IREG_SIZE == 32 -# define IREGS "s" -#else # define IREGS "d" -#endif #if OREG_SIZE == 32 # define OREGS "w" @@ -58,15 +37,14 @@ # define OREGS "x" #endif -#define __CONCATX(a,b) __CONCAT(a,b) -OTYPE -__CONCATX(__,FUNC) (ITYPE x) +long int +__lrint (double x) { - OTYPE result; - ITYPE temp; #if IREG_SIZE == 64 && OREG_SIZE == 32 + long int result; + if (__builtin_fabs (x) > INT32_MAX) { /* Converting large values to a 32 bit int may cause the frintx/fcvtza @@ -96,10 +74,14 @@ __CONCATX(__,FUNC) (ITYPE x) return result; } #endif - asm ( "frintx" "\t%" IREGS "1, %" IREGS "2\n\t" - "fcvtzs" "\t%" OREGS "0, %" IREGS "1" - : "=r" (result), "=w" (temp) : "w" (x) ); - return result; + + double r = __builtin_rint (x); + + /* Prevent gcc from calling lrint directly when compiled with + -fno-math-errno by inserting a barrier. */ + + math_opt_barrier (r); + return r; } -weak_alias (__CONCATX(__,FUNC), FUNC) +weak_alias (__lrint, lrint) -- cgit 1.4.1