about summary refs log tree commit diff
path: root/src
diff options
context:
space:
mode:
authornsz <nsz@port70.net>2012-03-17 13:46:15 +0100
committernsz <nsz@port70.net>2012-03-17 13:46:15 +0100
commit88cfaf8a142a8c57beccb89398a6421c4bbf121a (patch)
treee31d28a5aac16410d4a0b5d59c871f507cf2d78a /src
parentd3fc724759ef08a8032e76e14f8cd96bfb837e17 (diff)
downloadmusl-88cfaf8a142a8c57beccb89398a6421c4bbf121a.tar.gz
musl-88cfaf8a142a8c57beccb89398a6421c4bbf121a.tar.xz
musl-88cfaf8a142a8c57beccb89398a6421c4bbf121a.zip
fix i386 fegetround and make fesetround faster
Note that the new fesetround has slightly different semantics:

Storing the floating-point environment with fnstenv makes the
next fldenv (or fldcw) "non-signaling", so unmasked and pending
exceptions does not invoke the exception handler.
(These are rare since exceptions are handled immediately and by
default all exceptions are masked anyway. But if one manually
unmasks an exception in the control word then either sets the
corresponding exception flag in the status word or the execution
of an exception raising floating-point operation gets interrupted
then it may happen).
So the old implementation did not trap in some rare cases
where the new implementation traps.

However POSIX does not specify anything like the x87 exception
handling traps and the fnstenv/fldenv pair is significantly slower
than the fnstcw/fldcw pair (new code is about 5x faster here and
it's dominated by the function call overhead).
Diffstat (limited to 'src')
-rw-r--r--src/fenv/i386/fenv.s20
1 files changed, 10 insertions, 10 deletions
diff --git a/src/fenv/i386/fenv.s b/src/fenv/i386/fenv.s
index 8148a30d..ee2eae6e 100644
--- a/src/fenv/i386/fenv.s
+++ b/src/fenv/i386/fenv.s
@@ -32,22 +32,22 @@ feraiseexcept:
 fesetround:
 	mov 4(%esp),%ecx
 	xor %eax,%eax
-	sub $32,%esp
-	fnstenv (%esp)
+	sub $4,%esp
+	fnstcw (%esp)
 	andb $0xf3,1(%esp)
-	or %ecx,(%esp)
-	fldenv (%esp)
-	add $32,%esp
+	or %cx,(%esp)
+	fldcw (%esp)
+	add $4,%esp
 	ret
 
 .global fegetround
 .type fegetround,@function
 fegetround:
-	sub $28,%esp
-	fnstenv (%esp)
-	mov 4(%esp),%eax
-	add $28,%esp
-	and $0xc,%ah
+	sub $4,%esp
+	fnstcw (%esp)
+	mov (%esp),%ax
+	add $4,%esp
+	and $0xc00,%eax
 	ret
 
 .global fegetenv