about summary refs log tree commit diff
path: root/sysdeps/powerpc/powerpc32/power7
diff options
context:
space:
mode:
authorAdhemerval Zanella <adhemerval.zanella@linaro.org>2019-03-08 19:32:15 +0000
committerAdhemerval Zanella <adhemerval.zanella@linaro.org>2019-06-12 14:32:36 -0300
commit2666f96390b8283cf8dacd0d0f268bf06ff10eb8 (patch)
tree11df0f40297f4a20ccd776b227d20899a00f8374 /sysdeps/powerpc/powerpc32/power7
parent197dbda1a155324bfc52ae1bc3cc26e57955c1cf (diff)
downloadglibc-2666f96390b8283cf8dacd0d0f268bf06ff10eb8.tar.gz
glibc-2666f96390b8283cf8dacd0d0f268bf06ff10eb8.tar.xz
glibc-2666f96390b8283cf8dacd0d0f268bf06ff10eb8.zip
powerpc: Remove optimized isnan
The powerpc isnan optimizations are not really a gain:

  - GCC will call libm iff -fsignaling-nans is used. This usage pattern
    is usually not performance oriented and for such calls PLT overhead
    should dominate execution time.

  - The power5, power6, and power6x are just micro-optimization to
    improve the Load-Hit-Store hazards from floating-point to general
    register transfer, and current GCC already has support to minimize
    it by inserting either extra nops or group dispatch instructions.

  - The power7 uses ftdiv to optimize for some input patterns, but at
    cost of others.  Comparing against generic C implementation built
    for powerpc-linux-gnu-power4 (which uses the hp-timing support on
    benchtests):

    - Generic sysdeps/ieee754 implementation:
      "isnan": {
       "": {
        "duration": 4.98415e+09,
        "iterations": 2.34516e+09,
        "max": 45.925,
        "min": 2.052,
        "mean": 2.12529
       },
       "INF": {
        "duration": 4.74057e+09,
        "iterations": 1.69761e+09,
        "max": 91.01,
        "min": 2.052,
        "mean": 2.79249
       },
       "NAN": {
        "duration": 4.74071e+09,
        "iterations": 1.68768e+09,
        "max": 282.343,
        "min": 2.052,
        "mean": 2.809
       }
      }

    - power7 optimized one:
    $ ./testrun.sh benchtests/bench-isnan
      "isnan": {
       "": {
        "duration": 4.96842e+09,
        "iterations": 2.56297e+09,
        "max": 50.048,
        "min": 1.872,
        "mean": 1.93854
       },
       "INF": {
        "duration": 4.76648e+09,
        "iterations": 1.54213e+09,
        "max": 373.408,
        "min": 2.661,
        "mean": 3.09084
       },
       "NAN": {
        "duration": 4.76845e+09,
        "iterations": 1.54515e+09,
        "max": 51.016,
        "min": 2.736,
        "mean": 3.08607
       }
      }

    So it basically optimizes marginally for normal numbers while
    increasing the latency for other kind of FP.

  - The generic implementation requires getting the floating point
    status, disable the invalid operation bit, and restore the
    floating-point status.  Each operation is costly and requires
    flushing the FP pipeline.

    Using the same scenarion for the previous analysis:

      "isnan": {
       "": {
        "duration": 5.08284e+09,
        "iterations": 6.2898e+08,
        "max": 41.844,
        "min": 8.057,
        "mean": 8.08108
       },
       "INF": {
        "duration": 4.97904e+09,
        "iterations": 6.16176e+08,
        "max": 39.661,
        "min": 8.057,
        "mean": 8.08055
       },
       "NAN": {
        "duration": 4.98695e+09,
        "iterations": 5.95866e+08,
        "max": 29.728,
        "min": 8.345,
        "mean": 8.36925
       }
      }

  - The power8 implementation is just the generic implementation using
    ISA 2.07 mfvsrd instruction (which GCC uses for generic implementation).
    So generic implementation is the best option for powerpc64le.

Checked on powerpc-linux-gnu (built without --with-cpu, with
--with-cpu=power4 and with --with-cpu=power5+ and --disable-multi-arch),
powerpc64-linux-gnu (built without --with-cp and with --with-cpu=power5+
and --disable-multi-arch).

	* sysdeps/powerpc/fpu/s_isnan.c: Remove file.
	* sysdeps/powerpc/fpu/s_isnanf.S: Likewise.
	* sysdeps/powerpc/powerpc32/fpu/s_isnan.S: Likewise.
	* sysdeps/powerpc/powerpc32/power4/fpu/multiarch/Makefile
	(sysdeps_routines, libm-sysdep_routines): Remove s_isnan-* and
	s_isnanf-* objects.
	* sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isnan-power5.S:
	Remove file
	* sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isnan-power6.S:
	Likewise.
	* sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isnan-power7.S:
	Likewise.
	* sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isnan-ppc32.S:
	Likewise.
	* sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isnan.c: Likewise.
	* sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isnanf-power5.S:
	Likewise.
	* sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isnanf-power6.S:
	Likewise.
	* sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isnanf.c: Likewise.
	* sysdeps/powerpc/powerpc32/power5/fpu/s_isnan.S: Likewise.
	* sysdeps/powerpc/powerpc32/power5/fpu/s_isnanf.S: Likewise.
	* sysdeps/powerpc/powerpc32/power6/fpu/s_isnan.S: Likewise.
	* sysdeps/powerpc/powerpc32/power6/fpu/s_isnanf.S: Likewise.
	* sysdeps/powerpc/powerpc32/power7/fpu/s_isnan.S: Likewise.
	* sysdeps/powerpc/powerpc32/power7/fpu/s_isnanf.S: Likewise.
	* sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile (sysdep_calls):
	Remove s_isnan-* and s_isnanf-* objects.
	* sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan-power5.S: Likewise.
	* sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan-power6.S: Likewise.
	* sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan-power6x.S:
	Likewise.
	* sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan-power7.S: Likewise.
	* sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan-power8.S: Likewise.
	* sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan-ppc64.S: Likewise.
	* sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan.c: Likewise.
	* sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnanf.c: Likewise.
	* sysdeps/powerpc/powerpc64/fpu/s_isnan.S: Likewise.
	* sysdeps/powerpc/powerpc64/power5/fpu/s_isnan.S: Likewise.
	* sysdeps/powerpc/powerpc64/power6/fpu/s_isnan.S: Likewise.
	* sysdeps/powerpc/powerpc64/power6x/fpu/s_isnan.S: Likewise.
	* sysdeps/powerpc/powerpc64/power7/fpu/s_isnan.S: Likewise.
	* sysdeps/powerpc/powerpc64/power7/fpu/s_isnanf.S: Likewise.
	* sysdeps/powerpc/powerpc64/power8/fpu/s_isnan.S: Likewise.
	* sysdeps/powerpc/powerpc64/power8/fpu/s_isnanf.S: Likewise.

Reviewed-by: Gabriel F. T. Gomes <gabrielftg@linux.ibm.com>
Diffstat (limited to 'sysdeps/powerpc/powerpc32/power7')
-rw-r--r--sysdeps/powerpc/powerpc32/power7/fpu/s_isnan.S90
-rw-r--r--sysdeps/powerpc/powerpc32/power7/fpu/s_isnanf.S1
2 files changed, 0 insertions, 91 deletions
diff --git a/sysdeps/powerpc/powerpc32/power7/fpu/s_isnan.S b/sysdeps/powerpc/powerpc32/power7/fpu/s_isnan.S
deleted file mode 100644
index 524a4a6a32..0000000000
--- a/sysdeps/powerpc/powerpc32/power7/fpu/s_isnan.S
+++ /dev/null
@@ -1,90 +0,0 @@
-/* isnan().  PowerPC32/POWER7 version.
-   Copyright (C) 2010-2019 Free Software Foundation, Inc.
-   Contributed by Luis Machado <luisgpm@br.ibm.com>.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <math_ldbl_opt.h>
-
-/* int __isnan(x)  */
-	.section    .rodata.cst8,"aM",@progbits,8
-	.align 3
-.LC0:   /* 1.0 */
-	.quad	    0x3ff0000000000000
-
-	.section    ".text"
-	.type	    __isnan, @function
-	.machine    power7
-ENTRY (__isnan)
-#ifdef SHARED
-	mflr	r11
-	cfi_register(lr,r11)
-
-	SETUP_GOT_ACCESS(r9,got_label)
-	addis	r9,r9,.LC0-got_label@ha
-	lfd	fp0,.LC0-got_label@l(r9)
-
-	mtlr	r11
-	cfi_same_value (lr)
-#else
-	lis	r9,.LC0@ha
-	lfd	fp0,.LC0@l(r9)
-#endif
-	ftdiv	cr7,fp1,fp0
-	li	r3,0
-	bflr	30	      /* If not NaN or Inf, finish. */
-
-	/* We have -INF/+INF/NaN or a denormal.  */
-
-	stwu	r1,-16(r1)    /* Allocate stack space.  */
-	stfd	fp1,8(r1)     /* Transfer FP to GPR's.  */
-	ori	2,2,0	      /* Force a new dispatch group.  */
-	lwz     r4,8+HIWORD(r1) /* Load the upper half of the FP value.  */
-	lwz     r5,8+LOWORD(r1) /* Load the lower half of the FP value.  */
-	addi	r1,r1,16      /* Reset the stack pointer.  */
-	lis     r0,0x7ff0     /* Load the upper portion for an INF/NaN.  */
-	clrlwi  r4,r4,1	      /* r4 = abs(r4).  */
-	cmpw    cr7,r4,r0     /* if (abs(r4) <= inf).  */
-	cmpwi   cr6,r5,0      /* r5 == 0x00000000?  */
-	bltlr	cr7	      /* LT means we have a denormal.  */
-	bgt	cr7,L(NaN)    /* GT means we have a NaN.  */
-	beqlr	cr6	      /* EQ means we have +/-INF.  */
-L(NaN):
-	li      r3,1	      /* x == NaN?  */
-	blr
-	END (__isnan)
-
-hidden_def (__isnan)
-weak_alias (__isnan, isnan)
-
-/* It turns out that the 'double' version will also always work for
-   single-precision.  */
-strong_alias (__isnan, __isnanf)
-hidden_def (__isnanf)
-weak_alias (__isnanf, isnanf)
-
-#ifdef NO_LONG_DOUBLE
-strong_alias (__isnan, __isnanl)
-weak_alias (__isnan, isnanl)
-#endif
-
-#if !IS_IN (libm)
-# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0)
-compat_symbol (libc, __isnan, __isnanl, GLIBC_2_0);
-compat_symbol (libc, isnan, isnanl, GLIBC_2_0);
-# endif
-#endif
diff --git a/sysdeps/powerpc/powerpc32/power7/fpu/s_isnanf.S b/sysdeps/powerpc/powerpc32/power7/fpu/s_isnanf.S
deleted file mode 100644
index b48c85e0d3..0000000000
--- a/sysdeps/powerpc/powerpc32/power7/fpu/s_isnanf.S
+++ /dev/null
@@ -1 +0,0 @@
-/* This function uses the same code as s_isnan.S.  */