about summary refs log tree commit diff
path: root/src/fenv
diff options
context:
space:
mode:
authorSzabolcs Nagy <nsz@port70.net>2013-08-17 02:40:44 +0000
committerSzabolcs Nagy <nsz@port70.net>2013-08-18 16:27:20 +0000
commitebc10fa17634a6ddb87a3aedd71b7d9617d12c19 (patch)
tree8e3f1ffffe8c0ed8d768ac7bbdb156038d12556e /src/fenv
parentd6841499109fc397cd3a57a726304fec9b08f510 (diff)
downloadmusl-ebc10fa17634a6ddb87a3aedd71b7d9617d12c19.tar.gz
musl-ebc10fa17634a6ddb87a3aedd71b7d9617d12c19.tar.xz
musl-ebc10fa17634a6ddb87a3aedd71b7d9617d12c19.zip
add sse fenv support on i386 through hwcap
the sse and x87 rounding modes should be always the same,
the visible exception flags are the bitwise or of the two
fenv states (so it's enough to query the rounding mode or
raise exceptions on one fenv)
Diffstat (limited to 'src/fenv')
-rw-r--r--src/fenv/i386/fenv.s67
-rw-r--r--src/fenv/x86_64/fenv.s3
2 files changed, 61 insertions, 9 deletions
diff --git a/src/fenv/i386/fenv.s b/src/fenv/i386/fenv.s
index 471d2af8..9bba40a5 100644
--- a/src/fenv/i386/fenv.s
+++ b/src/fenv/i386/fenv.s
@@ -1,14 +1,26 @@
+.hidden __hwcap
+
 .global feclearexcept
 .type feclearexcept,@function
 feclearexcept:	
 	mov 4(%esp),%ecx
 	not %ecx
-	test $0x3f,%ecx
+		# consider sse fenv as well if the cpu has XMM capability
+	call 1f
+1:	addl $__hwcap-1b,(%esp)
+	pop %edx
+	testl $0x02000000,(%edx)
+	jz 1f
+	stmxcsr 4(%esp)
+	and %ecx,4(%esp)
+	ldmxcsr 4(%esp)
+1:	test $0x3f,%ecx
 	jnz 2f
 1:	fnclex
 	xor %eax,%eax
 	ret
 2:	fnstsw %ax
+		# TODO: only load/store fenv if exceptions arent clear yet
 	and %ecx,%eax
 	jz 1b
 	sub $32,%esp
@@ -41,7 +53,18 @@ fesetround:
 	andb $0xf3,1(%esp)
 	or %ch,1(%esp)
 	fldcw (%esp)
-	pop %ecx
+		# consider sse fenv as well if the cpu has XMM capability
+	call 1f
+1:	addl $__hwcap-1b,(%esp)
+	pop %edx
+	testl $0x02000000,(%edx)
+	jmp 1f
+	stmxcsr (%esp)
+	shl $3,%ch
+	andb $0x9f,1(%esp)
+	or %ch,1(%esp)
+	ldmxcsr (%esp)
+1:	pop %ecx
 	ret
 
 .global fegetround
@@ -59,7 +82,18 @@ fegetenv:
 	mov 4(%esp),%ecx
 	xor %eax,%eax
 	fnstenv (%ecx)
-	ret
+		# consider sse fenv as well if the cpu has XMM capability
+	call 1f
+1:	addl $__hwcap-1b,(%esp)
+	pop %edx
+	testl $0x02000000,(%edx)
+	jz 1f
+	push %eax
+	stmxcsr (%esp)
+	pop %edx
+	and $0x3f,%edx
+	or %edx,4(%ecx)
+1:	ret
 
 .global fesetenv
 .type fesetenv,@function
@@ -69,7 +103,8 @@ fesetenv:
 	inc %ecx
 	jz 1f
 	fldenv -1(%ecx)
-	ret
+	movl -1(%ecx),%ecx
+	jmp 2f
 1:	push %eax
 	push %eax
 	push %eax
@@ -79,12 +114,32 @@ fesetenv:
 	pushl $0x37f
 	fldenv (%esp)
 	add $28,%esp
-	ret
+		# consider sse fenv as well if the cpu has XMM capability
+2:	call 1f
+1:	addl $__hwcap-1b,(%esp)
+	pop %edx
+	testl $0x02000000,(%edx)
+	jz 1f
+		# mxcsr := same rounding mode, cleared exceptions, default mask
+	and $0xc00,%ecx
+	shl $3,%ecx
+	or $0x1f80,%ecx
+	mov %ecx,4(%esp)
+	ldmxcsr 4(%esp)
+1:	ret
 
 .global fetestexcept
 .type fetestexcept,@function
 fetestexcept:
 	mov 4(%esp),%ecx
 	fnstsw %ax
-	and %ecx,%eax
+		# consider sse fenv as well if the cpu has XMM capability
+	call 1f
+1:	addl $__hwcap-1b,(%esp)
+	pop %edx
+	testl $0x02000000,(%edx)
+	jz 1f
+	stmxcsr 4(%esp)
+	or 4(%esp),%eax
+1:	and %ecx,%eax
 	ret
diff --git a/src/fenv/x86_64/fenv.s b/src/fenv/x86_64/fenv.s
index 443e35a2..c48dade3 100644
--- a/src/fenv/x86_64/fenv.s
+++ b/src/fenv/x86_64/fenv.s
@@ -28,9 +28,6 @@ feraiseexcept:
 	stmxcsr -8(%rsp)
 	or %edi,-8(%rsp)
 	ldmxcsr -8(%rsp)
-	fnstenv -32(%rsp)
-	or %edi,-28(%rsp)
-	fldenv -32(%rsp)
 	xor %eax,%eax
 	ret