about summary refs log tree commit diff
path: root/sysdeps/powerpc/powerpc64/power8/fpu/s_finite.S
diff options
context:
space:
mode:
authorAdhemerval Zanella <adhemerval.zanella@linaro.org>2019-03-11 03:29:37 +0000
committerAdhemerval Zanella <adhemerval.zanella@linaro.org>2019-06-12 14:32:39 -0300
commit119269606996258ee0a53ff9e5828d474ee0f698 (patch)
treea8b24ad011f24ce14d08aea07f6eeab4735dcbbf /sysdeps/powerpc/powerpc64/power8/fpu/s_finite.S
parenta72186761b2ed913fd5ea3df2de2aa446456aa98 (diff)
downloadglibc-119269606996258ee0a53ff9e5828d474ee0f698.tar.gz
glibc-119269606996258ee0a53ff9e5828d474ee0f698.tar.xz
glibc-119269606996258ee0a53ff9e5828d474ee0f698.zip
powerpc: Remove optimized finite
The powerpc finite optimization do not show much gain:

  - GCC will call libm iff -fsignaling-nans is used. This usage pattern
    is usually not performance oriented and for such calls PLT overhead
    should dominate execution time.

  - The power7 uses ftdiv to optimize for some input patterns, but at
    cost of others.  Comparing against generic C implementation built
    for powerpc64-linux-gnu-power7 (--with-cpu=power7):

    - Generic sysdeps/ieee754 implementation:
       "isfinite": {
        "": {
         "duration": 5.0082e+09,
         "iterations": 2.45299e+09,
         "max": 43.824,
         "min": 2.008,
         "mean": 2.04167
        },
        "INF": {
         "duration": 4.66554e+09,
         "iterations": 2.28288e+09,
         "max": 35.73,
         "min": 2.008,
         "mean": 2.04371
        },
        "NAN": {
         "duration": 4.66274e+09,
         "iterations": 2.28716e+09,
         "max": 34.161,
         "min": 2.009,
         "mean": 2.03866
        }
       }

    - power7 optimized one:
       "isfinite": {
        "": {
         "duration": 4.99111e+09,
         "iterations": 2.65566e+09,
         "max": 25.015,
         "min": 1.716,
         "mean": 1.87942
        },
        "INF": {
         "duration": 4.6783e+09,
         "iterations": 2.0999e+09,
         "max": 35.264,
         "min": 1.868,
         "mean": 2.22787
        },
        "NAN": {
         "duration": 4.67915e+09,
         "iterations": 2.08678e+09,
         "max": 38.099,
         "min": 1.869,
         "mean": 2.24228
        }
       }

     So it basically optimizes marginally for normal numbers while
     increasing the latency for other kind of FP.

  - The power8 implementation is just the generic implementation using
    ISA 2.07 mfvsrd instruction (which GCC uses for generic implementation).
    So generic implementation is the best option for powerpc64le.

Checked on powerpc-linux-gnu (built without --with-cpu, with
--with-cpu=power4 and with --with-cpu=power5+ and --disable-multi-arch),
powerpc64-linux-gnu (built without --with-cp and with --with-cpu=power5+
and --disable-multi-arch).

	* sysdeps/powerpc/powerpc32/power4/fpu/multiarch/Makefile
	(sysdeps_routines, libm-sysdep_routines): Remove s_finite*
	objects.
	* sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_finite-power7.S:
	Remove file.
	* sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_finite-ppc32.c:
	Likewise.
	* sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_finite.c: Likewise.
	* sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_finitef-ppc32.c:
	Likewise.
	* sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_finitef.c: Likewise.
	* sysdeps/powerpc/powerpc32/power7/fpu/s_finite.S: Likewise.
	* sysdeps/powerpc/powerpc32/power7/fpu/s_finitef.S: Likewise.
	* sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile (sysdep_call):
	Remove s_finite* objects.
	* sysdeps/powerpc/powerpc64/fpu/multiarch/s_finite-power7.S: Remove file.
	* sysdeps/powerpc/powerpc64/fpu/multiarch/s_finite-power8.S: Likewise.
	* sysdeps/powerpc/powerpc64/fpu/multiarch/s_finite-ppc64.c: Likewise.
	* sysdeps/powerpc/powerpc64/fpu/multiarch/s_finite.c: Likewise.
	* sysdeps/powerpc/powerpc64/fpu/multiarch/s_finitef-ppc64.c: Likewise.
	* sysdeps/powerpc/powerpc64/fpu/multiarch/s_finitef.c: Likewise.
	* sysdeps/powerpc/powerpc64/power7/fpu/s_finite.S: Likewise.
	* sysdeps/powerpc/powerpc64/power7/fpu/s_finitef.S: Likewise.
	* sysdeps/powerpc/powerpc64/power8/fpu/s_finite.S: Likewise.
	* sysdeps/powerpc/powerpc64/power8/fpu/s_finitef.S: Likewise.

Reviewed-by: Gabriel F. T. Gomes <gabrielftg@linux.ibm.com>
Diffstat (limited to 'sysdeps/powerpc/powerpc64/power8/fpu/s_finite.S')
-rw-r--r--sysdeps/powerpc/powerpc64/power8/fpu/s_finite.S56
1 files changed, 0 insertions, 56 deletions
diff --git a/sysdeps/powerpc/powerpc64/power8/fpu/s_finite.S b/sysdeps/powerpc/powerpc64/power8/fpu/s_finite.S
deleted file mode 100644
index f5316e898f..0000000000
--- a/sysdeps/powerpc/powerpc64/power8/fpu/s_finite.S
+++ /dev/null
@@ -1,56 +0,0 @@
-/* isfinite().  PowerPC64/POWER8 version.
-   Copyright (C) 2014-2019 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <math_ldbl_opt.h>
-
-#define MFVSRD_R3_V1  .long 0x7c230066     /* mfvsrd  r3,vs1  */
-
-/* int [r3] __finite ([fp1] x)  */
-
-ENTRY_TOCLESS (__finite, 4)
-	CALL_MCOUNT 0
-	MFVSRD_R3_V1
-	lis     r9,0x8010
-	clrldi  r3,r3,1       /* r3 = r3 & 0x8000000000000000  */
-	rldicr  r9,r9,32,31   /* r9 = (r9 << 32) & 0xffffffff  */
-	add     r3,r3,r9
-	rldicl  r3,r3,1,63
-	blr
-END (__finite)
-
-hidden_def (__finite)
-weak_alias (__finite, finite)
-
-/* It turns out that the 'double' version will also always work for
-   single-precision.  */
-strong_alias (__finite, __finitef)
-hidden_def (__finitef)
-weak_alias (__finitef, finitef)
-
-#if IS_IN (libm)
-# if LONG_DOUBLE_COMPAT (libm, GLIBC_2_0)
-compat_symbol (libm, __finite, __finitel, GLIBC_2_0)
-compat_symbol (libm, finite, finitel, GLIBC_2_0)
-# endif
-#else
-# if LONG_DOUBLE_COMPAT (libc, GLIBC_2_0)
-compat_symbol (libc, __finite, __finitel, GLIBC_2_0);
-compat_symbol (libc, finite, finitel, GLIBC_2_0);
-# endif
-#endif