diff options
-rw-r--r-- | src/fenv/i386/fenv.s | 39 | ||||
-rw-r--r-- | src/fenv/x86_64/fenv.s | 26 |
2 files changed, 38 insertions, 27 deletions
diff --git a/src/fenv/i386/fenv.s b/src/fenv/i386/fenv.s index 9bba40a5..a8540add 100644 --- a/src/fenv/i386/fenv.s +++ b/src/fenv/i386/fenv.s @@ -4,26 +4,41 @@ .type feclearexcept,@function feclearexcept: mov 4(%esp),%ecx - not %ecx + fnstsw %ax # consider sse fenv as well if the cpu has XMM capability call 1f 1: addl $__hwcap-1b,(%esp) pop %edx testl $0x02000000,(%edx) + jz 2f + # maintain exceptions in the sse mxcsr, clear x87 exceptions + test %eax,%ecx jz 1f - stmxcsr 4(%esp) - and %ecx,4(%esp) - ldmxcsr 4(%esp) -1: test $0x3f,%ecx - jnz 2f -1: fnclex - xor %eax,%eax + fnclex +1: push %edx + stmxcsr (%esp) + pop %edx + and $0x3f,%eax + or %eax,%edx + test %edx,%ecx + jz 1f + not %ecx + and %ecx,%edx + push %edx + ldmxcsr (%esp) + pop %edx +1: xor %eax,%eax ret -2: fnstsw %ax - # TODO: only load/store fenv if exceptions arent clear yet - and %ecx,%eax + # only do the expensive x87 fenv load/store when needed +2: test %eax,%ecx jz 1b - sub $32,%esp + not %ecx + and %ecx,%eax + test $0x3f,%eax + jz 1f + fnclex + jmp 1b +1: sub $32,%esp fnstenv (%esp) mov %al,4(%esp) fldenv (%esp) diff --git a/src/fenv/x86_64/fenv.s b/src/fenv/x86_64/fenv.s index c48dade3..dda6b61a 100644 --- a/src/fenv/x86_64/fenv.s +++ b/src/fenv/x86_64/fenv.s @@ -1,25 +1,21 @@ .global feclearexcept .type feclearexcept,@function feclearexcept: + # maintain exceptions in the sse mxcsr, clear x87 exceptions mov %edi,%ecx + fnstsw %ax + test %eax,%ecx + jz 1f + fnclex +1: stmxcsr -8(%rsp) + and $0x3f,%eax + or %eax,-8(%rsp) + test %ecx,-8(%rsp) + jz 1f not %ecx - stmxcsr -8(%rsp) and %ecx,-8(%rsp) ldmxcsr -8(%rsp) - test $0x3f,%ecx - jnz 2f -1: fnclex - xor %eax,%eax - ret -2: fnstsw %ax - and %ecx,%eax - jz 1b - sub $32,%rsp - fnstenv (%rsp) - mov %al,4(%rsp) - fldenv (%rsp) - add $32,%rsp - xor %eax,%eax +1: xor %eax,%eax ret .global feraiseexcept |