From fe13a20c37578f08ce393ccaeb45caeb48815ca5 Mon Sep 17 00:00:00 2001 From: Adhemerval Zanella Date: Mon, 17 Feb 2014 10:44:08 -0600 Subject: PowerPC: llround/llroundf POWER8 optimization This patch add a optimized llround/llroundf implementation for POWER8 using the new Move From VSR Doubleword instruction to gains some cycles from FP to GRP register move. --- ChangeLog | 11 +++++ sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile | 2 +- .../powerpc64/fpu/multiarch/s_llround-power8.S | 31 ++++++++++++++ .../powerpc/powerpc64/fpu/multiarch/s_llround.c | 11 +++-- sysdeps/powerpc/powerpc64/power8/fpu/s_llround.S | 47 ++++++++++++++++++++++ 5 files changed, 97 insertions(+), 5 deletions(-) create mode 100644 sysdeps/powerpc/powerpc64/fpu/multiarch/s_llround-power8.S create mode 100644 sysdeps/powerpc/powerpc64/power8/fpu/s_llround.S diff --git a/ChangeLog b/ChangeLog index 8912bb7512..ee8b36395b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,14 @@ +2014-02-27 Adhemerval Zanella + + * sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile: Add llround power8 + implementation. + * sysdeps/powerpc/powerpc64/fpu/multiarch/s_llround-power8.S: New file: + POWER8 llround ifunc implementation. + * sysdeps/powerpc/powerpc64/fpu/multiarch/s_llround.c (__lllround): Add + POWER8 implementation. + * sysdeps/powerpc/powerpc64/power8/fpu/s_llround.S: New file: + POWER8 llround implementation. + 2014-02-27 Adhemerval Zanella * sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile: Add llrint power8 diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile b/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile index f88247892b..0e3eac7190 100644 --- a/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile +++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile @@ -24,7 +24,7 @@ libm-sysdep_routines += s_isnan-power7 s_isnan-power6x s_isnan-power6 \ s_modff-power5+ s_modff-ppc64 e_hypot-ppc64 \ e_hypot-power7 e_hypotf-ppc64 e_hypotf-power7 \ s_isnan-power8 s_isinf-power8 s_finite-power8 \ - s_llrint-power8 + s_llrint-power8 s_llround-power8 CFLAGS-s_logbf-power7.c = -mcpu=power7 CFLAGS-s_logbl-power7.c = -mcpu=power7 diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llround-power8.S b/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llround-power8.S new file mode 100644 index 0000000000..41c61a1bc8 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llround-power8.S @@ -0,0 +1,31 @@ +/* llround(). PowerPC64 default version. + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include + +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(lib, name, alias, ver) + +#define __llround __llround_power8 + +#include diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llround.c b/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llround.c index a4d1bf3a2a..7dba17e96e 100644 --- a/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llround.c +++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llround.c @@ -27,12 +27,15 @@ extern __typeof (__llround) __llround_ppc64 attribute_hidden; extern __typeof (__llround) __llround_power5plus attribute_hidden; extern __typeof (__llround) __llround_power6x attribute_hidden; +extern __typeof (__llround) __llround_power8 attribute_hidden; libc_ifunc (__llround, - (hwcap & PPC_FEATURE_POWER6_EXT) - ? __llround_power6x : - (hwcap & PPC_FEATURE_POWER5_PLUS) - ? __llround_power5plus + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __llround_power8 : + (hwcap & PPC_FEATURE_POWER6_EXT) + ? __llround_power6x : + (hwcap & PPC_FEATURE_POWER5_PLUS) + ? __llround_power5plus : __llround_ppc64); weak_alias (__llround, llround) diff --git a/sysdeps/powerpc/powerpc64/power8/fpu/s_llround.S b/sysdeps/powerpc/powerpc64/power8/fpu/s_llround.S new file mode 100644 index 0000000000..b00d4d616f --- /dev/null +++ b/sysdeps/powerpc/powerpc64/power8/fpu/s_llround.S @@ -0,0 +1,47 @@ +/* llround function. POWER8 PowerPC64 version. + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include + +#define MFVSRD_R3_V1 .byte 0x7c,0x23,0x00,0x66 /* mfvsrd r3,vs1 */ + +/* long long [r3] llround (float x [fp1]) */ + +ENTRY (__llround) + CALL_MCOUNT 0 + frin fp1,fp1 /* Round to nearest +-0.5. */ + fctidz fp1,fp1 /* Convert To Integer DW round toward 0. */ + MFVSRD_R3_V1 + blr +END (__llround) + +strong_alias (__llround, __lround) +weak_alias (__llround, llround) +weak_alias (__lround, lround) + +#ifdef NO_LONG_DOUBLE +weak_alias (__llround, llroundl) +strong_alias (__llround, __llroundl) +weak_alias (__lround, lroundl) +strong_alias (__lround, __lroundl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __llround, llroundl, GLIBC_2_1) +compat_symbol (libm, __lround, lroundl, GLIBC_2_1) +#endif -- cgit 1.4.1