diff options
author | Rich Felker <dalias@aerifal.cx> | 2012-03-19 09:00:30 -0400 |
---|---|---|
committer | Rich Felker <dalias@aerifal.cx> | 2012-03-19 09:00:30 -0400 |
commit | 02db27d9deaee71b244c91e720ec819c74dab150 (patch) | |
tree | ef2543fd54a6fdbca8839cb14c71bb10ffdaa8f1 /src/math/i386/exp.s | |
parent | da7458a602a6f0bdea25d6b9b613372048a974e6 (diff) | |
download | musl-02db27d9deaee71b244c91e720ec819c74dab150.tar.gz musl-02db27d9deaee71b244c91e720ec819c74dab150.tar.xz musl-02db27d9deaee71b244c91e720ec819c74dab150.zip |
optimize exponential asm for i386
up to 30% faster exp2 by avoiding slow frndint and fscale functions. expm1 also takes a much more direct path for small arguments (the expected usage case).
Diffstat (limited to 'src/math/i386/exp.s')
-rw-r--r-- | src/math/i386/exp.s | 87 |
1 files changed, 76 insertions, 11 deletions
diff --git a/src/math/i386/exp.s b/src/math/i386/exp.s index f4769d59..76ab4d64 100644 --- a/src/math/i386/exp.s +++ b/src/math/i386/exp.s @@ -1,3 +1,37 @@ +.global expm1f +.type expm1f,@function +expm1f: + flds 4(%esp) + jmp 1f + +.global expm1l +.type expm1l,@function +expm1l: + fldt 4(%esp) + jmp 1f + +.global expm1 +.type expm1,@function +expm1: + fldl 4(%esp) +1: fldl2e + fmulp + fld1 + fld %st(1) + fabs + fucom %st(1) + fnstsw %ax + fstp %st(0) + fstp %st(0) + sahf + ja 1f + f2xm1 + ret +1: call 1f + fld1 + fsubrp + ret + .global exp2f .type exp2f,@function exp2f: @@ -34,22 +68,53 @@ exp: .type exp2,@function exp2: fldl 4(%esp) -1: fxam - fnstsw %ax +1: mov $0x47000000,%eax + push %eax + flds (%esp) + shl $7,%eax + push %eax + add %eax,%eax + push %eax + fld %st(1) + fabs + fucom %st(1) + fnstsw sahf - jnp 1f - jnc 1f - fstps 4(%esp) - mov $0xfe,%al - and %al,7(%esp) - flds 4(%esp) -1: fld %st(0) - frndint + ja 2f + fstp %st(0) + fstp %st(0) + fld %st(0) + fistpl 8(%esp) + fildl 8(%esp) fxch %st(1) fsub %st(1) + mov $0x3fff,%eax + add %eax,8(%esp) f2xm1 fld1 faddp - fscale + fldt (%esp) + fmulp fstp %st(1) + add $12,%esp + ret + +2: fstp %st(0) + fstp %st(0) + fsts 8(%esp) + mov 8(%esp),%eax + lea (%eax,%eax),%ecx + cmp $0xff000000,%ecx + ja 2f + fstp %st(0) + xor %ecx,%ecx + inc %ecx + add %eax,%eax + jc 1f + mov $0x7ffe,%ecx +1: mov %ecx,8(%esp) + fldt (%esp) + fld %st(0) + fmulp +2: add $12,%esp ret |