diff options
author | H.J. Lu <hjl.tools@gmail.com> | 2017-10-22 08:11:15 -0700 |
---|---|---|
committer | H.J. Lu <hjl.tools@gmail.com> | 2017-10-22 08:12:41 -0700 |
commit | 5313581cb52fd5d3d2cf222ddb6f8f86f090974f (patch) | |
tree | e2ec5d944c1089cec1de29c5c72c3fb600a8c3fb /sysdeps | |
parent | 6089a3ee24cede17e9443aef0aa72fa1a0ba1548 (diff) | |
download | glibc-5313581cb52fd5d3d2cf222ddb6f8f86f090974f.tar.gz glibc-5313581cb52fd5d3d2cf222ddb6f8f86f090974f.tar.xz glibc-5313581cb52fd5d3d2cf222ddb6f8f86f090974f.zip |
i386: Replace assembly versions of e_powf with generic e_powf.c
This patch replaces i386 assembly versions of e_powf with generic e_powf.c. For workload-spec2017.wrf, on Nehalem, it improves performance by: Before After Improvement reciprocal-throughput 230.855 78.3358 194% latency 231.685 94.1259 146% On Skylake, it improves performance by: Before After Improvement reciprocal-throughput 239.858 47.4713 405% latency 247.57 93.8798 163% On IvyBridge with --disable-multi-arch, it improves performance by: Before After Improvement reciprocal-throughput 269.078 63.3758 324% latency 271.473 102.091 165% * sysdeps/i386/fpu/e_powf.S: Removed. * sysdeps/i386/fpu/e_powf_log2_data.c: Likewise. * sysdeps/i386/fpu/w_powf.c: Likewise. * sysdeps/i386/fpu/libm-test-ulps: Updated for generic e_powf.c. * sysdeps/i386/i686/fpu/multiarch/libm-test-ulps: Likewise. * sysdeps/i386/i686/fpu/multiarch/Makefile (libm-sysdep_routines): Add e_powf-sse2. (CFLAGS-e_powf-sse2.c): New. * sysdeps/i386/i686/fpu/multiarch/e_powf-sse2.c: New file. * sysdeps/i386/i686/fpu/multiarch/e_powf.c: Likewise.
Diffstat (limited to 'sysdeps')
-rw-r--r-- | sysdeps/i386/fpu/e_powf.S | 392 | ||||
-rw-r--r-- | sysdeps/i386/fpu/e_powf_log2_data.c | 1 | ||||
-rw-r--r-- | sysdeps/i386/fpu/libm-test-ulps | 6 | ||||
-rw-r--r-- | sysdeps/i386/fpu/w_powf.c | 1 | ||||
-rw-r--r-- | sysdeps/i386/i686/fpu/multiarch/Makefile | 3 | ||||
-rw-r--r-- | sysdeps/i386/i686/fpu/multiarch/e_powf-sse2.c | 3 | ||||
-rw-r--r-- | sysdeps/i386/i686/fpu/multiarch/e_powf.c | 43 | ||||
-rw-r--r-- | sysdeps/i386/i686/fpu/multiarch/libm-test-ulps | 18 |
8 files changed, 66 insertions, 401 deletions
diff --git a/sysdeps/i386/fpu/e_powf.S b/sysdeps/i386/fpu/e_powf.S deleted file mode 100644 index 467ef2380b..0000000000 --- a/sysdeps/i386/fpu/e_powf.S +++ /dev/null @@ -1,392 +0,0 @@ -/* ix87 specific implementation of pow function. - Copyright (C) 1996-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <machine/asm.h> -#include <i386-math-asm.h> - - .section .rodata.cst8,"aM",@progbits,8 - - .p2align 3 - .type one,@object -one: .double 1.0 - ASM_SIZE_DIRECTIVE(one) - .type limit,@object -limit: .double 0.29 - ASM_SIZE_DIRECTIVE(limit) - .type p31,@object -p31: .byte 0, 0, 0, 0, 0, 0, 0xe0, 0x41 - ASM_SIZE_DIRECTIVE(p31) - - .section .rodata.cst16,"aM",@progbits,16 - - .p2align 3 - .type infinity,@object -inf_zero: -infinity: - .byte 0, 0, 0, 0, 0, 0, 0xf0, 0x7f - ASM_SIZE_DIRECTIVE(infinity) - .type zero,@object -zero: .double 0.0 - ASM_SIZE_DIRECTIVE(zero) - .type minf_mzero,@object -minf_mzero: -minfinity: - .byte 0, 0, 0, 0, 0, 0, 0xf0, 0xff -mzero: - .byte 0, 0, 0, 0, 0, 0, 0, 0x80 - ASM_SIZE_DIRECTIVE(minf_mzero) -DEFINE_FLT_MIN - -#ifdef PIC -# define MO(op) op##@GOTOFF(%ecx) -# define MOX(op,x,f) op##@GOTOFF(%ecx,x,f) -#else -# define MO(op) op -# define MOX(op,x,f) op(,x,f) -#endif - - .text -ENTRY(__ieee754_powf) - flds 8(%esp) // y - fxam - -#ifdef PIC - LOAD_PIC_REG (cx) -#endif - - fnstsw - movb %ah, %dl - andb $0x45, %ah - cmpb $0x40, %ah // is y == 0 ? - je 11f - - cmpb $0x05, %ah // is y == ±inf ? - je 12f - - cmpb $0x01, %ah // is y == NaN ? - je 30f - - flds 4(%esp) // x : y - - subl $4, %esp - cfi_adjust_cfa_offset (4) - - fxam - fnstsw - movb %ah, %dh - andb $0x45, %ah - cmpb $0x40, %ah - je 20f // x is ±0 - - cmpb $0x05, %ah - je 15f // x is ±inf - - cmpb $0x01, %ah - je 33f // x is NaN - - fxch // y : x - - /* fistpl raises invalid exception for |y| >= 1L<<31. */ - fld %st // y : y : x - fabs // |y| : y : x - fcompl MO(p31) // y : x - fnstsw - sahf - jnc 2f - - /* First see whether `y' is a natural number. In this case we - can use a more precise algorithm. */ - fld %st // y : y : x - fistpl (%esp) // y : x - fildl (%esp) // int(y) : y : x - fucomp %st(1) // y : x - fnstsw - sahf - jne 3f - - /* OK, we have an integer value for y. */ - popl %edx - cfi_adjust_cfa_offset (-4) - orl $0, %edx - fstp %st(0) // x - jns 4f // y >= 0, jump - fdivrl MO(one) // 1/x (now referred to as x) - negl %edx -4: fldl MO(one) // 1 : x - fxch - - /* If y is even, take the absolute value of x. Otherwise, - ensure all intermediate values that might overflow have the - sign of x. */ - testb $1, %dl - jnz 6f - fabs - -6: shrl $1, %edx - jnc 5f - fxch - fabs - fmul %st(1) // x : ST*x - fxch -5: fld %st // x : x : ST*x - fabs // |x| : x : ST*x - fmulp // |x|*x : ST*x - testl %edx, %edx - jnz 6b - fstp %st(0) // ST*x - FLT_NARROW_EVAL_UFLOW_NONNAN - ret - - /* y is ±NAN */ -30: flds 4(%esp) // x : y - fldl MO(one) // 1.0 : x : y - fucomp %st(1) // x : y - fnstsw - sahf - je 31f - fxch // y : x -31: fstp %st(1) - ret - - cfi_adjust_cfa_offset (4) - .align ALIGNARG(4) -2: /* y is a large integer (so even). */ - fxch // x : y - fabs // |x| : y - fxch // y : x - .align ALIGNARG(4) -3: /* y is a real number. */ - fxch // x : y - fldl MO(one) // 1.0 : x : y - fldl MO(limit) // 0.29 : 1.0 : x : y - fld %st(2) // x : 0.29 : 1.0 : x : y - fsub %st(2) // x-1 : 0.29 : 1.0 : x : y - fabs // |x-1| : 0.29 : 1.0 : x : y - fucompp // 1.0 : x : y - fnstsw - fxch // x : 1.0 : y - sahf - ja 7f - fsub %st(1) // x-1 : 1.0 : y - fyl2xp1 // log2(x) : y - jmp 8f - -7: fyl2x // log2(x) : y -8: fmul %st(1) // y*log2(x) : y - fst %st(1) // y*log2(x) : y*log2(x) - frndint // int(y*log2(x)) : y*log2(x) - fsubr %st, %st(1) // int(y*log2(x)) : fract(y*log2(x)) - fxch // fract(y*log2(x)) : int(y*log2(x)) - f2xm1 // 2^fract(y*log2(x))-1 : int(y*log2(x)) - faddl MO(one) // 2^fract(y*log2(x)) : int(y*log2(x)) - fscale // 2^fract(y*log2(x))*2^int(y*log2(x)) : int(y*log2(x)) -32: addl $4, %esp - cfi_adjust_cfa_offset (-4) - fstp %st(1) // 2^fract(y*log2(x))*2^int(y*log2(x)) - FLT_NARROW_EVAL_UFLOW_NONNAN - ret - - /* x is NaN. */ - cfi_adjust_cfa_offset (4) -33: addl $4, %esp - cfi_adjust_cfa_offset (-4) - fstp %st(1) - ret - - // pow(x,±0) = 1 - .align ALIGNARG(4) -11: fstp %st(0) // pop y - fldl MO(one) - ret - - // y == ±inf - .align ALIGNARG(4) -12: fstp %st(0) // pop y - fldl MO(one) // 1 - flds 4(%esp) // x : 1 - fabs // abs(x) : 1 - fucompp // < 1, == 1, or > 1 - fnstsw - andb $0x45, %ah - cmpb $0x45, %ah - je 13f // jump if x is NaN - - cmpb $0x40, %ah - je 14f // jump if |x| == 1 - - shlb $1, %ah - xorb %ah, %dl - andl $2, %edx - fldl MOX(inf_zero, %edx, 4) - ret - - .align ALIGNARG(4) -14: fldl MO(one) - ret - - .align ALIGNARG(4) -13: flds 4(%esp) // load x == NaN - ret - - cfi_adjust_cfa_offset (4) - .align ALIGNARG(4) - // x is ±inf -15: fstp %st(0) // y - testb $2, %dh - jz 16f // jump if x == +inf - - // fistpl raises invalid exception for |y| >= 1L<<31, so test - // that (in which case y is certainly even) before testing - // whether y is odd. - fld %st // y : y - fabs // |y| : y - fcompl MO(p31) // y - fnstsw - sahf - jnc 16f - - // We must find out whether y is an odd integer. - fld %st // y : y - fistpl (%esp) // y - fildl (%esp) // int(y) : y - fucompp // <empty> - fnstsw - sahf - jne 17f - - // OK, the value is an integer. - popl %edx - cfi_adjust_cfa_offset (-4) - testb $1, %dl - jz 18f // jump if not odd - // It's an odd integer. - shrl $31, %edx - fldl MOX(minf_mzero, %edx, 8) - ret - - cfi_adjust_cfa_offset (4) - .align ALIGNARG(4) -16: fcompl MO(zero) - addl $4, %esp - cfi_adjust_cfa_offset (-4) - fnstsw - shrl $5, %eax - andl $8, %eax - fldl MOX(inf_zero, %eax, 1) - ret - - cfi_adjust_cfa_offset (4) - .align ALIGNARG(4) -17: shll $30, %edx // sign bit for y in right position - addl $4, %esp - cfi_adjust_cfa_offset (-4) -18: shrl $31, %edx - fldl MOX(inf_zero, %edx, 8) - ret - - cfi_adjust_cfa_offset (4) - .align ALIGNARG(4) - // x is ±0 -20: fstp %st(0) // y - testb $2, %dl - jz 21f // y > 0 - - // x is ±0 and y is < 0. We must find out whether y is an odd integer. - testb $2, %dh - jz 25f - - // fistpl raises invalid exception for |y| >= 1L<<31, so test - // that (in which case y is certainly even) before testing - // whether y is odd. - fld %st // y : y - fabs // |y| : y - fcompl MO(p31) // y - fnstsw - sahf - jnc 25f - - fld %st // y : y - fistpl (%esp) // y - fildl (%esp) // int(y) : y - fucompp // <empty> - fnstsw - sahf - jne 26f - - // OK, the value is an integer. - popl %edx - cfi_adjust_cfa_offset (-4) - testb $1, %dl - jz 27f // jump if not odd - // It's an odd integer. - // Raise divide-by-zero exception and get minus infinity value. - fldl MO(one) - fdivl MO(zero) - fchs - ret - - cfi_adjust_cfa_offset (4) -25: fstp %st(0) -26: addl $4, %esp - cfi_adjust_cfa_offset (-4) -27: // Raise divide-by-zero exception and get infinity value. - fldl MO(one) - fdivl MO(zero) - ret - - cfi_adjust_cfa_offset (4) - .align ALIGNARG(4) - // x is ±0 and y is > 0. We must find out whether y is an odd integer. -21: testb $2, %dh - jz 22f - - // fistpl raises invalid exception for |y| >= 1L<<31, so test - // that (in which case y is certainly even) before testing - // whether y is odd. - fcoml MO(p31) // y - fnstsw - sahf - jnc 22f - - fld %st // y : y - fistpl (%esp) // y - fildl (%esp) // int(y) : y - fucompp // <empty> - fnstsw - sahf - jne 23f - - // OK, the value is an integer. - popl %edx - cfi_adjust_cfa_offset (-4) - testb $1, %dl - jz 24f // jump if not odd - // It's an odd integer. - fldl MO(mzero) - ret - - cfi_adjust_cfa_offset (4) -22: fstp %st(0) -23: addl $4, %esp // Don't use pop. - cfi_adjust_cfa_offset (-4) -24: fldl MO(zero) - ret - -END(__ieee754_powf) -strong_alias (__ieee754_powf, __powf_finite) diff --git a/sysdeps/i386/fpu/e_powf_log2_data.c b/sysdeps/i386/fpu/e_powf_log2_data.c deleted file mode 100644 index 1cc8931700..0000000000 --- a/sysdeps/i386/fpu/e_powf_log2_data.c +++ /dev/null @@ -1 +0,0 @@ -/* Not needed. */ diff --git a/sysdeps/i386/fpu/libm-test-ulps b/sysdeps/i386/fpu/libm-test-ulps index 64cac565f2..3ab3fd8d2c 100644 --- a/sysdeps/i386/fpu/libm-test-ulps +++ b/sysdeps/i386/fpu/libm-test-ulps @@ -2370,24 +2370,30 @@ ldouble: 1 Function: "pow_downward": double: 1 +float: 1 float128: 2 idouble: 1 +ifloat: 1 ifloat128: 2 ildouble: 4 ldouble: 4 Function: "pow_towardzero": double: 1 +float: 1 float128: 2 idouble: 1 +ifloat: 1 ifloat128: 2 ildouble: 4 ldouble: 4 Function: "pow_upward": double: 1 +float: 1 float128: 2 idouble: 1 +ifloat: 1 ifloat128: 2 ildouble: 4 ldouble: 4 diff --git a/sysdeps/i386/fpu/w_powf.c b/sysdeps/i386/fpu/w_powf.c deleted file mode 100644 index d133216f5b..0000000000 --- a/sysdeps/i386/fpu/w_powf.c +++ /dev/null @@ -1 +0,0 @@ -#include <sysdeps/../math/w_powf.c> diff --git a/sysdeps/i386/i686/fpu/multiarch/Makefile b/sysdeps/i386/i686/fpu/multiarch/Makefile index eee3b8b1fd..c0fa9761d3 100644 --- a/sysdeps/i386/i686/fpu/multiarch/Makefile +++ b/sysdeps/i386/i686/fpu/multiarch/Makefile @@ -1,9 +1,10 @@ ifeq ($(subdir),math) libm-sysdep_routines += e_exp2f-sse2 e_expf-sse2 e_logf-sse2 e_log2f-sse2 \ - s_sinf-sse2 s_cosf-sse2 s_sincosf-sse2 + e_powf-sse2 s_sinf-sse2 s_cosf-sse2 s_sincosf-sse2 CFLAGS-e_exp2f-sse2.c = -msse2 -mfpmath=sse CFLAGS-e_expf-sse2.c = -msse2 -mfpmath=sse CFLAGS-e_log2f-sse2.c = -msse2 -mfpmath=sse CFLAGS-e_logf-sse2.c = -msse2 -mfpmath=sse +CFLAGS-e_powf-sse2.c = -msse2 -mfpmath=sse endif diff --git a/sysdeps/i386/i686/fpu/multiarch/e_powf-sse2.c b/sysdeps/i386/i686/fpu/multiarch/e_powf-sse2.c new file mode 100644 index 0000000000..c56f6ee89f --- /dev/null +++ b/sysdeps/i386/i686/fpu/multiarch/e_powf-sse2.c @@ -0,0 +1,3 @@ +#define __powf __powf_sse2 + +#include <sysdeps/ieee754/flt-32/e_powf.c> diff --git a/sysdeps/i386/i686/fpu/multiarch/e_powf.c b/sysdeps/i386/i686/fpu/multiarch/e_powf.c new file mode 100644 index 0000000000..4dc4c87326 --- /dev/null +++ b/sysdeps/i386/i686/fpu/multiarch/e_powf.c @@ -0,0 +1,43 @@ +/* Multiple versions of powf. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define powf __redirect_powf +#define __DECL_SIMD___redirect_powf +#include <math.h> +#undef powf + +#define SYMBOL_NAME powf +#include "ifunc-sse2.h" + +libc_ifunc_redirected (__redirect_powf, __powf, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (__powf_ia32, __GI___powf, __redirect_powf) + __attribute__ ((visibility ("hidden"))); + +# include <shlib-compat.h> +versioned_symbol (libm, __powf, powf, GLIBC_2_27); +#else +weak_alias (__powf, powf) +#endif + +strong_alias (__powf, __ieee754_powf) +strong_alias (__powf, __powf_finite) + +#define __powf __powf_ia32 +#include <sysdeps/ieee754/flt-32/e_powf.c> diff --git a/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps b/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps index b5d74df580..26d90ec636 100644 --- a/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps +++ b/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps @@ -2370,24 +2370,30 @@ ldouble: 1 Function: "pow_downward": double: 1 +float: 1 float128: 2 idouble: 1 +ifloat: 1 ifloat128: 2 ildouble: 4 ldouble: 4 Function: "pow_towardzero": double: 1 +float: 1 float128: 2 idouble: 1 +ifloat: 1 ifloat128: 2 ildouble: 4 ldouble: 4 Function: "pow_upward": double: 1 +float: 1 float128: 2 idouble: 1 +ifloat: 1 ifloat128: 2 ildouble: 4 ldouble: 4 @@ -2577,30 +2583,30 @@ ldouble: 5 Function: "tgamma_downward": double: 3 -float: 4 +float: 5 float128: 5 idouble: 3 -ifloat: 4 +ifloat: 5 ifloat128: 5 ildouble: 5 ldouble: 5 Function: "tgamma_towardzero": double: 4 -float: 4 +float: 5 float128: 5 idouble: 4 -ifloat: 4 +ifloat: 5 ifloat128: 5 ildouble: 5 ldouble: 5 Function: "tgamma_upward": double: 4 -float: 4 +float: 6 float128: 4 idouble: 4 -ifloat: 4 +ifloat: 6 ifloat128: 4 ildouble: 5 ldouble: 5 |