x86_64: add single instruction fma

fma is only available on recent x86_64 cpus and it is much faster than a software fma, so this should be done with a runtime check, however that requires more changes, this patch just adds the code so it can be tested when musl is compiled with -mfma or -mfma4.
author: Szabolcs Nagy <nsz@port70.net> 2018-09-22 21:43:42 +0000
committer: Rich Felker <dalias@aerifal.cx> 2018-10-15 14:45:28 -0400
commit: e9016138886527a739804634048aeac16092dc1e (patch)
tree: b7581245a4ff1da3f27bfdc5826ff362a8ab8430 /src/math
parent: 7396ef0a05b834bf92c4f268a3336c0bc10c3593 (diff)
download: musl-e9016138886527a739804634048aeac16092dc1e.tar.gz
musl-e9016138886527a739804634048aeac16092dc1e.tar.xz
musl-e9016138886527a739804634048aeac16092dc1e.zip
4 files changed, 92 insertions, 0 deletions
diff --git a/src/math/x32/fma.c b/src/math/x32/fma.c
new file mode 100644
index 00000000..4dd53f2a
--- /dev/null
+++ b/src/math/x32/fma.c
@@ -0,0 +1,23 @@
+#include <math.h>
+
+#if __FMA__
+
+double fma(double x, double y, double z)
+{
+	__asm__ ("vfmadd132sd %1, %2, %0" : "+x" (x) : "x" (y), "x" (z));
+	return x;
+}
+
+#elif __FMA4__
+
+double fma(double x, double y, double z)
+{
+	__asm__ ("vfmaddsd %3, %2, %1, %0" : "=x" (x) : "x" (x), "x" (y), "x" (z));
+	return x;
+}
+
+#else
+
+#include "../fma.c"
+
+#endif
diff --git a/src/math/x32/fmaf.c b/src/math/x32/fmaf.c
new file mode 100644
index 00000000..30b971ff
--- /dev/null
+++ b/src/math/x32/fmaf.c
@@ -0,0 +1,23 @@
+#include <math.h>
+
+#if __FMA__
+
+float fmaf(float x, float y, float z)
+{
+	__asm__ ("vfmadd132ss %1, %2, %0" : "+x" (x) : "x" (y), "x" (z));
+	return x;
+}
+
+#elif __FMA4__
+
+float fmaf(float x, float y, float z)
+{
+	__asm__ ("vfmaddss %3, %2, %1, %0" : "=x" (x) : "x" (x), "x" (y), "x" (z));
+	return x;
+}
+
+#else
+
+#include "../fmaf.c"
+
+#endif
diff --git a/src/math/x86_64/fma.c b/src/math/x86_64/fma.c
new file mode 100644
index 00000000..4dd53f2a
--- /dev/null
+++ b/src/math/x86_64/fma.c
@@ -0,0 +1,23 @@
+#include <math.h>
+
+#if __FMA__
+
+double fma(double x, double y, double z)
+{
+	__asm__ ("vfmadd132sd %1, %2, %0" : "+x" (x) : "x" (y), "x" (z));
+	return x;
+}
+
+#elif __FMA4__
+
+double fma(double x, double y, double z)
+{
+	__asm__ ("vfmaddsd %3, %2, %1, %0" : "=x" (x) : "x" (x), "x" (y), "x" (z));
+	return x;
+}
+
+#else
+
+#include "../fma.c"
+
+#endif
diff --git a/src/math/x86_64/fmaf.c b/src/math/x86_64/fmaf.c
new file mode 100644
index 00000000..30b971ff
--- /dev/null
+++ b/src/math/x86_64/fmaf.c
@@ -0,0 +1,23 @@
+#include <math.h>
+
+#if __FMA__
+
+float fmaf(float x, float y, float z)
+{
+	__asm__ ("vfmadd132ss %1, %2, %0" : "+x" (x) : "x" (y), "x" (z));
+	return x;
+}
+
+#elif __FMA4__
+
+float fmaf(float x, float y, float z)
+{
+	__asm__ ("vfmaddss %3, %2, %1, %0" : "=x" (x) : "x" (x), "x" (y), "x" (z));
+	return x;
+}
+
+#else
+
+#include "../fmaf.c"
+
+#endif
author	Szabolcs Nagy <nsz@port70.net>	2018-09-22 21:43:42 +0000
committer	Rich Felker <dalias@aerifal.cx>	2018-10-15 14:45:28 -0400
commit	e9016138886527a739804634048aeac16092dc1e (patch)
tree	b7581245a4ff1da3f27bfdc5826ff362a8ab8430 /src/math
parent	7396ef0a05b834bf92c4f268a3336c0bc10c3593 (diff)
download	musl-e9016138886527a739804634048aeac16092dc1e.tar.gz musl-e9016138886527a739804634048aeac16092dc1e.tar.xz musl-e9016138886527a739804634048aeac16092dc1e.zip