about summary refs log tree commit diff
diff options
context:
space:
mode:
authorJoseph Myers <joseph@codesourcery.com>2017-09-20 16:54:05 +0000
committerJoseph Myers <joseph@codesourcery.com>2017-09-20 16:54:05 +0000
commitae8372d7e4c44f6839aa3d851d4d0cb486b81cd5 (patch)
tree83340587a4086402e9f1686c278aa1a264ef77e7
parenta856d4d4a8a56eaefdddb58884bfa2bfe922ee4c (diff)
downloadglibc-ae8372d7e4c44f6839aa3d851d4d0cb486b81cd5.tar.gz
glibc-ae8372d7e4c44f6839aa3d851d4d0cb486b81cd5.tar.xz
glibc-ae8372d7e4c44f6839aa3d851d4d0cb486b81cd5.zip
Add SSE4.1 trunc, truncf (bug 20142).
This patch adds SSE4.1 versions of trunc and truncf, using the roundsd
/ roundss instructions, similar to the versions of ceil, floor, rint
and nearbyint functions we already have.  In my testing with the glibc
benchtests these are about 30% faster than the C versions for double,
20% faster for float.

Tested for x86_64.

	[BZ #20142]
	* sysdeps/x86_64/fpu/multiarch/Makefile (libm-sysdep_routines):
	Add s_trunc-c, s_truncf-c, s_trunc-sse4_1 and s_truncf-sse4_1.
	* sysdeps/x86_64/fpu/multiarch/s_trunc-c.c: New file.
	* sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S: Likewise.
	* sysdeps/x86_64/fpu/multiarch/s_trunc.c: Likewise.
	* sysdeps/x86_64/fpu/multiarch/s_truncf-c.c: Likewise.
	* sysdeps/x86_64/fpu/multiarch/s_truncf-sse4_1.S: Likewise.
	* sysdeps/x86_64/fpu/multiarch/s_truncf.c: Likewise.
-rw-r--r--ChangeLog12
-rw-r--r--NEWS2
-rw-r--r--sysdeps/x86_64/fpu/multiarch/Makefile6
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_trunc-c.c2
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S25
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_trunc.c29
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_truncf-c.c2
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_truncf-sse4_1.S25
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_truncf.c29
9 files changed, 130 insertions, 2 deletions
diff --git a/ChangeLog b/ChangeLog
index a07c903731..d85bb873a5 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,15 @@
+2017-09-20  Joseph Myers  <joseph@codesourcery.com>
+
+	[BZ #20142]
+	* sysdeps/x86_64/fpu/multiarch/Makefile (libm-sysdep_routines):
+	Add s_trunc-c, s_truncf-c, s_trunc-sse4_1 and s_truncf-sse4_1.
+	* sysdeps/x86_64/fpu/multiarch/s_trunc-c.c: New file.
+	* sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S: Likewise.
+	* sysdeps/x86_64/fpu/multiarch/s_trunc.c: Likewise.
+	* sysdeps/x86_64/fpu/multiarch/s_truncf-c.c: Likewise.
+	* sysdeps/x86_64/fpu/multiarch/s_truncf-sse4_1.S: Likewise.
+	* sysdeps/x86_64/fpu/multiarch/s_truncf.c: Likewise.
+
 2017-09-20  Paul Pluzhnikov  <ppluzhnikov@google.com>
             Ricky Zhou  <rickyz@google.com>
             Anoop V Chakkalakkal  <anoop.vijayan@in.ibm.com>
diff --git a/NEWS b/NEWS
index dd381f8930..a3aa94cb3b 100644
--- a/NEWS
+++ b/NEWS
@@ -12,6 +12,8 @@ Major new features:
 * Optimized x86-64 asin, atan2, exp, expf, log, pow, atan, sin and tan
   with FMA, contributed by Arjan van de Ven and H.J. Lu from Intel.
 
+* Optimized x86-64 trunc and truncf for processors with SSE4.1.
+
 * In order to support faster and safer process termination the malloc API
   family of functions will no longer print a failure address and stack
   backtrace after detecting heap corruption.  The goal is to minimize the
diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile
index 1b61795aff..d66055224a 100644
--- a/sysdeps/x86_64/fpu/multiarch/Makefile
+++ b/sysdeps/x86_64/fpu/multiarch/Makefile
@@ -1,10 +1,12 @@
 ifeq ($(subdir),math)
 libm-sysdep_routines += s_floor-c s_ceil-c s_floorf-c s_ceilf-c \
-			s_rint-c s_rintf-c s_nearbyint-c s_nearbyintf-c
+			s_rint-c s_rintf-c s_nearbyint-c s_nearbyintf-c \
+			s_trunc-c s_truncf-c
 
 libm-sysdep_routines += s_ceil-sse4_1 s_ceilf-sse4_1 s_floor-sse4_1 \
 			s_floorf-sse4_1 s_nearbyint-sse4_1 \
-			s_nearbyintf-sse4_1 s_rint-sse4_1 s_rintf-sse4_1
+			s_nearbyintf-sse4_1 s_rint-sse4_1 s_rintf-sse4_1 \
+			s_trunc-sse4_1 s_truncf-sse4_1
 
 libm-sysdep_routines += e_exp-fma e_log-fma e_pow-fma s_atan-fma \
 			e_asin-fma e_atan2-fma s_sin-fma s_tan-fma \
diff --git a/sysdeps/x86_64/fpu/multiarch/s_trunc-c.c b/sysdeps/x86_64/fpu/multiarch/s_trunc-c.c
new file mode 100644
index 0000000000..6204ae3c77
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_trunc-c.c
@@ -0,0 +1,2 @@
+#define __trunc __trunc_c
+#include <sysdeps/ieee754/dbl-64/wordsize-64/s_trunc.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S
new file mode 100644
index 0000000000..ff3ed9c947
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S
@@ -0,0 +1,25 @@
+/* trunc for SSE4.1.
+   Copyright (C) 2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+	.section .text.sse4.1,"ax",@progbits
+ENTRY(__trunc_sse41)
+	roundsd	$11, %xmm0, %xmm0
+	ret
+END(__trunc_sse41)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_trunc.c b/sysdeps/x86_64/fpu/multiarch/s_trunc.c
new file mode 100644
index 0000000000..fb27fc3ec9
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_trunc.c
@@ -0,0 +1,29 @@
+/* Multiple versions of __trunc.
+   Copyright (C) 2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define trunc __redirect_trunc
+#define __trunc __redirect___trunc
+#include <math.h>
+#undef trunc
+#undef __trunc
+
+#define SYMBOL_NAME trunc
+#include "ifunc-sse4_1.h"
+
+libc_ifunc_redirected (__redirect_trunc, __trunc, IFUNC_SELECTOR ());
+weak_alias (__trunc, trunc)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_truncf-c.c b/sysdeps/x86_64/fpu/multiarch/s_truncf-c.c
new file mode 100644
index 0000000000..7a5ac7da1f
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_truncf-c.c
@@ -0,0 +1,2 @@
+#define __truncf __truncf_c
+#include <sysdeps/ieee754/flt-32/s_truncf.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_truncf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_truncf-sse4_1.S
new file mode 100644
index 0000000000..ccfe0d7031
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_truncf-sse4_1.S
@@ -0,0 +1,25 @@
+/* truncf for SSE4.1.
+   Copyright (C) 2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+	.section .text.sse4.1,"ax",@progbits
+ENTRY(__truncf_sse41)
+	roundss	$11, %xmm0, %xmm0
+	ret
+END(__truncf_sse41)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_truncf.c b/sysdeps/x86_64/fpu/multiarch/s_truncf.c
new file mode 100644
index 0000000000..6190c2ec78
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_truncf.c
@@ -0,0 +1,29 @@
+/* Multiple versions of __truncf.
+   Copyright (C) 2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define truncf __redirect_truncf
+#define __truncf __redirect___truncf
+#include <math.h>
+#undef truncf
+#undef __truncf
+
+#define SYMBOL_NAME truncf
+#include "ifunc-sse4_1.h"
+
+libc_ifunc_redirected (__redirect_truncf, __truncf, IFUNC_SELECTOR ());
+weak_alias (__truncf, truncf)