diff options
author | Szabolcs Nagy <nsz@port70.net> | 2013-09-05 10:58:48 +0000 |
---|---|---|
committer | Szabolcs Nagy <nsz@port70.net> | 2013-09-05 11:30:09 +0000 |
commit | 07039ed8563b850624146c938ae201a1099d2f75 (patch) | |
tree | 2a751e8684596943be0bca4391289759b718f2b4 /src/math/i386/exp.s | |
parent | 8dba5486288e719ed290cccefcd932ed32756d7c (diff) | |
download | musl-07039ed8563b850624146c938ae201a1099d2f75.tar.gz musl-07039ed8563b850624146c938ae201a1099d2f75.tar.xz musl-07039ed8563b850624146c938ae201a1099d2f75.zip |
math: fix exp2l asm on x86 (raise underflow correctly)
there were two problems: * omitted underflow on subnormal results: exp2l(-16383.5) was calculated as sqrt(2)*2^-16384, the last bits of sqrt(2) are zero so the down scaling does not underflow eventhough the result is in subnormal range * spurious underflow for subnormal inputs: exp2l(0x1p-16400) was evaluated as f2xm1(x)+1 and f2xm1 raised underflow (because inexact subnormal result) the first issue is fixed by raising underflow manually if x is in (-32768,-16382] and not integer (x-0x1p63+0x1p63 != x) the second issue is fixed by treating x in (-0x1p64,0x1p64) specially for these fixes the special case handling was completely rewritten
Diffstat (limited to 'src/math/i386/exp.s')
-rw-r--r-- | src/math/i386/exp.s | 70 |
1 files changed, 38 insertions, 32 deletions
diff --git a/src/math/i386/exp.s b/src/math/i386/exp.s index e5f54588..abb90369 100644 --- a/src/math/i386/exp.s +++ b/src/math/i386/exp.s @@ -95,42 +95,32 @@ exp: .type exp2,@function exp2: fldl 4(%esp) -1: pushl $0x467ff000 - flds (%esp) # 16380 - xorl %eax,%eax - pushl $0x80000000 - push %eax - fld %st(1) - fabs - fucomp %st(1) - fnstsw - fstp %st(0) - sahf - ja 3f # |x| > 16380 - jp 2f # x is nan (avoid invalid except in fistp) +1: sub $12,%esp fld %st(0) - fistpl 8(%esp) - fildl 8(%esp) - fxch %st(1) - fsub %st(1) - mov $0x3fff,%eax - add %eax,8(%esp) - f2xm1 - fld1 - faddp # 2^(x-rint(x)) - fldt (%esp) # 2^rint(x) - fmulp - fstp %st(1) -2: add $12,%esp - ret - -3: fld %st(0) fstpt (%esp) - fld1 mov 8(%esp),%ax and $0x7fff,%ax - cmp $0x7fff,%ax - je 1f # x = +-inf + cmp $0x3fff+13,%ax + jb 4f # |x| < 8192 + cmp $0x3fff+15,%ax + jae 3f # |x| >= 32768 + fsts (%esp) + cmpl $0xc67ff800,(%esp) + jb 2f # x > -16382 + movl $0x5f000000,(%esp) + flds (%esp) # 0x1p63 + fld %st(1) + fsub %st(1) + faddp + fucomp %st(1) + fnstsw + sahf + je 2f # x - 0x1p63 + 0x1p63 == x + movl $1,(%esp) + flds (%esp) # 0x1p-149 + fdiv %st(1) + fstps (%esp) # raise underflow +2: fld1 fld %st(1) frndint fxch %st(2) @@ -141,3 +131,19 @@ exp2: fstp %st(1) add $12,%esp ret +3: xor %eax,%eax +4: cmp $0x3fff-64,%ax + fld1 + jb 1b # |x| < 0x1p-64 + fstpt (%esp) + fistl 8(%esp) + fildl 8(%esp) + fsubrp %st(1) + addl $0x3fff,8(%esp) + f2xm1 + fld1 + faddp # 2^(x-rint(x)) + fldt (%esp) # 2^rint(x) + fmulp + add $12,%esp + ret |