asm for hypot and hypotf

special care is made to avoid any inexact computations when either arg is zero (in which case the exact absolute value of the other arg should be returned) and to support the special condition that hypot(±inf,nan) yields inf. hypotl is not yet implemented since avoiding overflow is nontrivial.
author: Rich Felker <dalias@aerifal.cx> 2012-03-23 01:52:49 -0400
committer: Rich Felker <dalias@aerifal.cx> 2012-03-23 01:52:49 -0400
commit: ad2d2b963a4bf9e2631b345c898e8715b36b459e (patch)
tree: a99362612a0a46e2601aa081b80a9ca5cb1a5d83 /src/math
parent: a9e85c0a5c690086c652f3a8ae9a109389f671a3 (diff)
download: musl-ad2d2b963a4bf9e2631b345c898e8715b36b459e.tar.gz
musl-ad2d2b963a4bf9e2631b345c898e8715b36b459e.tar.xz
musl-ad2d2b963a4bf9e2631b345c898e8715b36b459e.zip
2 files changed, 87 insertions, 0 deletions
diff --git a/src/math/i386/hypot.s b/src/math/i386/hypot.s
new file mode 100644
index 00000000..299c2e18
--- /dev/null
+++ b/src/math/i386/hypot.s
@@ -0,0 +1,45 @@
+.global hypot
+.type hypot,@function
+hypot:
+	mov 8(%esp),%eax
+	mov 16(%esp),%ecx
+	add %eax,%eax
+	add %ecx,%ecx
+	and %eax,%ecx
+	cmp $0xffe00000,%ecx
+	jae 2f
+	or 4(%esp),%eax
+	jnz 1f
+	fldl 12(%esp)
+	fabs
+	ret
+1:	mov 16(%esp),%eax
+	add %eax,%eax
+	or 12(%esp),%eax
+	jnz 1f
+	fldl 4(%esp)
+	fabs
+	ret
+1:	fldl 4(%esp)
+	fld %st(0)
+	fmulp
+	fldl 12(%esp)
+	fld %st(0)
+	fmulp
+	faddp
+	fsqrt
+	ret
+2:	sub $0xffe00000,%eax
+	or 4(%esp),%eax
+	jnz 1f
+	fldl 4(%esp)
+	fabs
+	ret
+1:	mov 16(%esp),%eax
+	add %eax,%eax
+	sub $0xffe00000,%eax
+	or 12(%esp),%eax
+	fldl 12(%esp)
+	jnz 1f
+	fabs
+1:	ret
diff --git a/src/math/i386/hypotf.s b/src/math/i386/hypotf.s
new file mode 100644
index 00000000..068935e2
--- /dev/null
+++ b/src/math/i386/hypotf.s
@@ -0,0 +1,42 @@
+.global hypotf
+.type hypotf,@function
+hypotf:
+	mov 4(%esp),%eax
+	mov 8(%esp),%ecx
+	add %eax,%eax
+	add %ecx,%ecx
+	and %eax,%ecx
+	cmp $0xff000000,%ecx
+	jae 2f
+	test %eax,%eax
+	jnz 1f
+	flds 8(%esp)
+	fabs
+	ret
+1:	mov 8(%esp),%eax
+	add %eax,%eax
+	jnz 1f
+	flds 4(%esp)
+	fabs
+	ret
+1:	flds 4(%esp)
+	fld %st(0)
+	fmulp
+	flds 8(%esp)
+	fld %st(0)
+	fmulp
+	faddp
+	fsqrt
+	ret
+2:	cmp $0xff000000,%eax
+	jnz 1f
+	flds 4(%esp)
+	fabs
+	ret
+1:	mov 8(%esp),%eax
+	add %eax,%eax
+	cmp $0xff000000,%eax
+	flds 8(%esp)
+	jnz 1f
+	fabs
+1:	ret
author	Rich Felker <dalias@aerifal.cx>	2012-03-23 01:52:49 -0400
committer	Rich Felker <dalias@aerifal.cx>	2012-03-23 01:52:49 -0400
commit	ad2d2b963a4bf9e2631b345c898e8715b36b459e (patch)
tree	a99362612a0a46e2601aa081b80a9ca5cb1a5d83 /src/math
parent	a9e85c0a5c690086c652f3a8ae9a109389f671a3 (diff)
download	musl-ad2d2b963a4bf9e2631b345c898e8715b36b459e.tar.gz musl-ad2d2b963a4bf9e2631b345c898e8715b36b459e.tar.xz musl-ad2d2b963a4bf9e2631b345c898e8715b36b459e.zip