about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--src/fenv/i386/fenv.s67
-rw-r--r--src/fenv/x86_64/fenv.s3
2 files changed, 61 insertions, 9 deletions
diff --git a/src/fenv/i386/fenv.s b/src/fenv/i386/fenv.s
index 471d2af8..9bba40a5 100644
--- a/src/fenv/i386/fenv.s
+++ b/src/fenv/i386/fenv.s
@@ -1,14 +1,26 @@
+.hidden __hwcap
+
 .global feclearexcept
 .type feclearexcept,@function
 feclearexcept:	
 	mov 4(%esp),%ecx
 	not %ecx
-	test $0x3f,%ecx
+		# consider sse fenv as well if the cpu has XMM capability
+	call 1f
+1:	addl $__hwcap-1b,(%esp)
+	pop %edx
+	testl $0x02000000,(%edx)
+	jz 1f
+	stmxcsr 4(%esp)
+	and %ecx,4(%esp)
+	ldmxcsr 4(%esp)
+1:	test $0x3f,%ecx
 	jnz 2f
 1:	fnclex
 	xor %eax,%eax
 	ret
 2:	fnstsw %ax
+		# TODO: only load/store fenv if exceptions arent clear yet
 	and %ecx,%eax
 	jz 1b
 	sub $32,%esp
@@ -41,7 +53,18 @@ fesetround:
 	andb $0xf3,1(%esp)
 	or %ch,1(%esp)
 	fldcw (%esp)
-	pop %ecx
+		# consider sse fenv as well if the cpu has XMM capability
+	call 1f
+1:	addl $__hwcap-1b,(%esp)
+	pop %edx
+	testl $0x02000000,(%edx)
+	jmp 1f
+	stmxcsr (%esp)
+	shl $3,%ch
+	andb $0x9f,1(%esp)
+	or %ch,1(%esp)
+	ldmxcsr (%esp)
+1:	pop %ecx
 	ret
 
 .global fegetround
@@ -59,7 +82,18 @@ fegetenv:
 	mov 4(%esp),%ecx
 	xor %eax,%eax
 	fnstenv (%ecx)
-	ret
+		# consider sse fenv as well if the cpu has XMM capability
+	call 1f
+1:	addl $__hwcap-1b,(%esp)
+	pop %edx
+	testl $0x02000000,(%edx)
+	jz 1f
+	push %eax
+	stmxcsr (%esp)
+	pop %edx
+	and $0x3f,%edx
+	or %edx,4(%ecx)
+1:	ret
 
 .global fesetenv
 .type fesetenv,@function
@@ -69,7 +103,8 @@ fesetenv:
 	inc %ecx
 	jz 1f
 	fldenv -1(%ecx)
-	ret
+	movl -1(%ecx),%ecx
+	jmp 2f
 1:	push %eax
 	push %eax
 	push %eax
@@ -79,12 +114,32 @@ fesetenv:
 	pushl $0x37f
 	fldenv (%esp)
 	add $28,%esp
-	ret
+		# consider sse fenv as well if the cpu has XMM capability
+2:	call 1f
+1:	addl $__hwcap-1b,(%esp)
+	pop %edx
+	testl $0x02000000,(%edx)
+	jz 1f
+		# mxcsr := same rounding mode, cleared exceptions, default mask
+	and $0xc00,%ecx
+	shl $3,%ecx
+	or $0x1f80,%ecx
+	mov %ecx,4(%esp)
+	ldmxcsr 4(%esp)
+1:	ret
 
 .global fetestexcept
 .type fetestexcept,@function
 fetestexcept:
 	mov 4(%esp),%ecx
 	fnstsw %ax
-	and %ecx,%eax
+		# consider sse fenv as well if the cpu has XMM capability
+	call 1f
+1:	addl $__hwcap-1b,(%esp)
+	pop %edx
+	testl $0x02000000,(%edx)
+	jz 1f
+	stmxcsr 4(%esp)
+	or 4(%esp),%eax
+1:	and %ecx,%eax
 	ret
diff --git a/src/fenv/x86_64/fenv.s b/src/fenv/x86_64/fenv.s
index 443e35a2..c48dade3 100644
--- a/src/fenv/x86_64/fenv.s
+++ b/src/fenv/x86_64/fenv.s
@@ -28,9 +28,6 @@ feraiseexcept:
 	stmxcsr -8(%rsp)
 	or %edi,-8(%rsp)
 	ldmxcsr -8(%rsp)
-	fnstenv -32(%rsp)
-	or %edi,-28(%rsp)
-	fldenv -32(%rsp)
 	xor %eax,%eax
 	ret