diff options
author | Ulrich Drepper <drepper@gmail.com> | 2011-10-14 23:41:47 -0400 |
---|---|---|
committer | Ulrich Drepper <drepper@gmail.com> | 2011-10-14 23:41:47 -0400 |
commit | 38ad40ceca8ba35761e79cfce4aaef0d0f7583e6 (patch) | |
tree | 336de2d8f4b91a770418446fe2c322662d321bcd /sysdeps/i386 | |
parent | 396a21b1d016c52e011f8921e0728aa62a1e9df0 (diff) | |
download | glibc-38ad40ceca8ba35761e79cfce4aaef0d0f7583e6.tar.gz glibc-38ad40ceca8ba35761e79cfce4aaef0d0f7583e6.tar.xz glibc-38ad40ceca8ba35761e79cfce4aaef0d0f7583e6.zip |
Optimize x86-32 log
Diffstat (limited to 'sysdeps/i386')
-rw-r--r-- | sysdeps/i386/fpu/e_log.S | 20 | ||||
-rw-r--r-- | sysdeps/i386/fpu/e_logf.S | 20 | ||||
-rw-r--r-- | sysdeps/i386/fpu/e_logl.S | 20 | ||||
-rw-r--r-- | sysdeps/i386/i686/fpu/e_log.S | 80 | ||||
-rw-r--r-- | sysdeps/i386/i686/fpu/e_logf.S | 81 | ||||
-rw-r--r-- | sysdeps/i386/i686/fpu/e_logl.S | 81 |
6 files changed, 299 insertions, 3 deletions
diff --git a/sysdeps/i386/fpu/e_log.S b/sysdeps/i386/fpu/e_log.S index 8110a84535..a2e4d89a40 100644 --- a/sysdeps/i386/fpu/e_log.S +++ b/sysdeps/i386/fpu/e_log.S @@ -62,4 +62,22 @@ ENTRY(__ieee754_log) fstp %st(1) ret END (__ieee754_log) -strong_alias (__ieee754_log, __log_finite) + +ENTRY(__log_finite) + fldln2 // log(2) + fldl 4(%esp) // x : log(2) +#ifdef PIC + LOAD_PIC_REG (dx) +#endif + fld %st // x : x : log(2) + fsubl MO(one) // x-1 : x : log(2) + fld %st // x-1 : x-1 : x : log(2) + fabs // |x-1| : x-1 : x : log(2) + fcompl MO(limit) // x-1 : x : log(2) + fnstsw // x-1 : x : log(2) + andb $0x45, %ah + jz 2b + fstp %st(1) // x-1 : log(2) + fyl2xp1 // log(x) + ret +END(__log_finite) diff --git a/sysdeps/i386/fpu/e_logf.S b/sysdeps/i386/fpu/e_logf.S index b683e13853..1992cc2f82 100644 --- a/sysdeps/i386/fpu/e_logf.S +++ b/sysdeps/i386/fpu/e_logf.S @@ -63,4 +63,22 @@ ENTRY(__ieee754_logf) fstp %st(1) ret END (__ieee754_logf) -strong_alias (__ieee754_logf, __logf_finite) + +ENTRY(__logf_finite) + fldln2 // log(2) + flds 4(%esp) // x : log(2) +#ifdef PIC + LOAD_PIC_REG (dx) +#endif + fld %st // x : x : log(2) + fsubl MO(one) // x-1 : x : log(2) + fld %st // x-1 : x-1 : x : log(2) + fabs // |x-1| : x-1 : x : log(2) + fcompl MO(limit) // x-1 : x : log(2) + fnstsw // x-1 : x : log(2) + andb $0x45, %ah + jz 2b + fstp %st(1) // x-1 : log(2) + fyl2xp1 // log(x) + ret +END(__logf_finite) diff --git a/sysdeps/i386/fpu/e_logl.S b/sysdeps/i386/fpu/e_logl.S index ee1fb16bc4..bfb72a30e9 100644 --- a/sysdeps/i386/fpu/e_logl.S +++ b/sysdeps/i386/fpu/e_logl.S @@ -63,4 +63,22 @@ ENTRY(__ieee754_logl) fstp %st(1) ret END (__ieee754_logl) -strong_alias (__ieee754_logl, __logl_finite) + +ENTRY(__logl_finite) + fldln2 // log(2) + fldt 4(%esp) // x : log(2) +#ifdef PIC + LOAD_PIC_REG (dx) +#endif + fld %st // x : x : log(2) + fsubl MO(one) // x-1 : x : log(2) + fld %st // x-1 : x-1 : x : log(2) + fabs // |x-1| : x-1 : x : log(2) + fcompl MO(limit) // x-1 : x : log(2) + fnstsw // x-1 : x : log(2) + andb $0x45, %ah + jz 2b + fstp %st(1) // x-1 : log(2) + fyl2xp1 // log(x) + ret +END(__logl_finite) diff --git a/sysdeps/i386/i686/fpu/e_log.S b/sysdeps/i386/i686/fpu/e_log.S new file mode 100644 index 0000000000..c6524b1854 --- /dev/null +++ b/sysdeps/i386/i686/fpu/e_log.S @@ -0,0 +1,80 @@ +/* + * Written by J.T. Conklin <jtc@netbsd.org>. + * Public domain. + * + * Changed to use fyl2xp1 for values near 1, <drepper@cygnus.com>. + * Adapted for i686 instructions. + */ + +#include <machine/asm.h> + +#ifdef __ELF__ + .section .rodata.cst8,"aM",@progbits,8 +#else + .text +#endif + .p2align 3 + ASM_TYPE_DIRECTIVE(one,@object) +one: .double 1.0 + ASM_SIZE_DIRECTIVE(one) + /* It is not important that this constant is precise. It is only + a value which is known to be on the safe side for using the + fyl2xp1 instruction. */ + ASM_TYPE_DIRECTIVE(limit,@object) +limit: .double 0.29 + ASM_SIZE_DIRECTIVE(limit) + + +#ifdef PIC +# define MO(op) op##@GOTOFF(%edx) +#else +# define MO(op) op +#endif + + .text +ENTRY(__ieee754_log) + fldln2 // log(2) + fldl 4(%esp) // x : log(2) + fucomi %st + jp 3f +#ifdef PIC + LOAD_PIC_REG (dx) +#endif + fld %st // x : x : log(2) + fsubl MO(one) // x-1 : x : log(2) + fld %st // x-1 : x-1 : x : log(2) + fabs // |x-1| : x-1 : x : log(2) + fld MO(limit) // 0.29 : |x-1| : x-1 : x : log(2) + fcomip %st(1) // |x-1| : x-1 : x : log(2) + fstp %st(0) // x-1 : x : log(2) + jc 2f + fstp %st(1) // x-1 : log(2) + fyl2xp1 // log(x) + ret + +2: fstp %st(0) // x : log(2) + fyl2x // log(x) + ret + +3: fstp %st(1) + ret +END (__ieee754_log) + +ENTRY(__log_finite) + fldln2 // log(2) + fldl 4(%esp) // x : log(2) +#ifdef PIC + LOAD_PIC_REG (dx) +#endif + fld %st // x : x : log(2) + fsubl MO(one) // x-1 : x : log(2) + fld %st // x-1 : x-1 : x : log(2) + fabs // |x-1| : x-1 : x : log(2) + fld MO(limit) // 0.29 : |x-1| : x-1 : x : log(2) + fcomip %st(1) // |x-1| : x-1 : x : log(2) + fstp %st(0) // x-1 : x : log(2) + jc 2b + fstp %st(1) // x-1 : log(2) + fyl2xp1 // log(x) + ret +END(__log_finite) diff --git a/sysdeps/i386/i686/fpu/e_logf.S b/sysdeps/i386/i686/fpu/e_logf.S new file mode 100644 index 0000000000..64f8807f35 --- /dev/null +++ b/sysdeps/i386/i686/fpu/e_logf.S @@ -0,0 +1,81 @@ +/* + * Written by J.T. Conklin <jtc@netbsd.org>. + * Public domain. + * Adapted for float by Ulrich Drepper <drepper@cygnus.com>. + * + * Changed to use fyl2xp1 for values near 1, <drepper@cygnus.com>. + * Adapted for i686 instructions. + */ + +#include <machine/asm.h> + +#ifdef __ELF__ + .section .rodata.cst8,"aM",@progbits,8 +#else + .text +#endif + .p2align 3 + ASM_TYPE_DIRECTIVE(one,@object) +one: .double 1.0 + ASM_SIZE_DIRECTIVE(one) + /* It is not important that this constant is precise. It is only + a value which is known to be on the safe side for using the + fyl2xp1 instruction. */ + ASM_TYPE_DIRECTIVE(limit,@object) +limit: .double 0.29 + ASM_SIZE_DIRECTIVE(limit) + + +#ifdef PIC +# define MO(op) op##@GOTOFF(%edx) +#else +# define MO(op) op +#endif + + .text +ENTRY(__ieee754_logf) + fldln2 // log(2) + flds 4(%esp) // x : log(2) + fucomi %st + jp 3f +#ifdef PIC + LOAD_PIC_REG (dx) +#endif + fld %st // x : x : log(2) + fsubl MO(one) // x-1 : x : log(2) + fld %st // x-1 : x-1 : x : log(2) + fabs // |x-1| : x-1 : x : log(2) + fld MO(limit) // 0.29 : |x-1| : x-1 : x : log(2) + fcomip %st(1) // |x-1| : x-1 : x : log(2) + fstp %st(0) // x-1 : x : log(2) + jc 2f + fstp %st(1) // x-1 : log(2) + fyl2xp1 // log(x) + ret + +2: fstp %st(0) // x : log(2) + fyl2x // log(x) + ret + +3: fstp %st(1) + ret +END (__ieee754_logf) + +ENTRY(__logf_finite) + fldln2 // log(2) + flds 4(%esp) // x : log(2) +#ifdef PIC + LOAD_PIC_REG (dx) +#endif + fld %st // x : x : log(2) + fsubl MO(one) // x-1 : x : log(2) + fld %st // x-1 : x-1 : x : log(2) + fabs // |x-1| : x-1 : x : log(2) + fld MO(limit) // 0.29 : |x-1| : x-1 : x : log(2) + fcomip %st(1) // |x-1| : x-1 : x : log(2) + fstp %st(0) // x-1 : x : log(2) + jc 2b + fstp %st(1) // x-1 : log(2) + fyl2xp1 // log(x) + ret +END(__logf_finite) diff --git a/sysdeps/i386/i686/fpu/e_logl.S b/sysdeps/i386/i686/fpu/e_logl.S new file mode 100644 index 0000000000..4e79a5a4b5 --- /dev/null +++ b/sysdeps/i386/i686/fpu/e_logl.S @@ -0,0 +1,81 @@ +/* + * Written by J.T. Conklin <jtc@netbsd.org>. + * Public domain. + * + * Adapted for `long double' by Ulrich Drepper <drepper@cygnus.com>. + * Changed to use fyl2xp1 for values near 1, <drepper@cygnus.com>. + * Adapted for i686 instructions. + */ + +#include <machine/asm.h> + +#ifdef __ELF__ + .section .rodata.cst8,"aM",@progbits,8 +#else + .text +#endif + .p2align 3 + ASM_TYPE_DIRECTIVE(one,@object) +one: .double 1.0 + ASM_SIZE_DIRECTIVE(one) + /* It is not important that this constant is precise. It is only + a value which is known to be on the safe side for using the + fyl2xp1 instruction. */ + ASM_TYPE_DIRECTIVE(limit,@object) +limit: .double 0.29 + ASM_SIZE_DIRECTIVE(limit) + + +#ifdef PIC +# define MO(op) op##@GOTOFF(%edx) +#else +# define MO(op) op +#endif + + .text +ENTRY(__ieee754_logl) + fldln2 // log(2) + fldt 4(%esp) // x : log(2) + fucomi %st + jp 3f +#ifdef PIC + LOAD_PIC_REG (dx) +#endif + fld %st // x : x : log(2) + fsubl MO(one) // x-1 : x : log(2) + fld %st // x-1 : x-1 : x : log(2) + fabs // |x-1| : x-1 : x : log(2) + fld MO(limit) // 0.29 : |x-1| : x-1 : x : log(2) + fcomip %st(1) // |x-1| : x-1 : x : log(2) + fstp %st(0) // x-1 : x : log(2) + jc 2f + fstp %st(1) // x-1 : log(2) + fyl2xp1 // log(x) + ret + +2: fstp %st(0) // x : log(2) + fyl2x // log(x) + ret + +3: fstp %st(1) + ret +END (__ieee754_logl) + +ENTRY(__logl_finite) + fldln2 // log(2) + fldt 4(%esp) // x : log(2) +#ifdef PIC + LOAD_PIC_REG (dx) +#endif + fld %st // x : x : log(2) + fsubl MO(one) // x-1 : x : log(2) + fld %st // x-1 : x-1 : x : log(2) + fabs // |x-1| : x-1 : x : log(2) + fld MO(limit) // 0.29 : |x-1| : x-1 : x : log(2) + fcomip %st(1) // |x-1| : x-1 : x : log(2) + fstp %st(0) // x-1 : x : log(2) + jc 2b + fstp %st(1) // x-1 : log(2) + fyl2xp1 // log(x) + ret +END(__logl_finite) |