about summary refs log tree commit diff
path: root/sysdeps/x86_64/fpu/multiarch/Makefile
diff options
context:
space:
mode:
authorJoseph Myers <joseph@codesourcery.com>2017-09-20 16:54:05 +0000
committerJoseph Myers <joseph@codesourcery.com>2017-09-20 16:54:05 +0000
commitae8372d7e4c44f6839aa3d851d4d0cb486b81cd5 (patch)
tree83340587a4086402e9f1686c278aa1a264ef77e7 /sysdeps/x86_64/fpu/multiarch/Makefile
parenta856d4d4a8a56eaefdddb58884bfa2bfe922ee4c (diff)
downloadglibc-ae8372d7e4c44f6839aa3d851d4d0cb486b81cd5.tar.gz
glibc-ae8372d7e4c44f6839aa3d851d4d0cb486b81cd5.tar.xz
glibc-ae8372d7e4c44f6839aa3d851d4d0cb486b81cd5.zip
Add SSE4.1 trunc, truncf (bug 20142).
This patch adds SSE4.1 versions of trunc and truncf, using the roundsd
/ roundss instructions, similar to the versions of ceil, floor, rint
and nearbyint functions we already have.  In my testing with the glibc
benchtests these are about 30% faster than the C versions for double,
20% faster for float.

Tested for x86_64.

	[BZ #20142]
	* sysdeps/x86_64/fpu/multiarch/Makefile (libm-sysdep_routines):
	Add s_trunc-c, s_truncf-c, s_trunc-sse4_1 and s_truncf-sse4_1.
	* sysdeps/x86_64/fpu/multiarch/s_trunc-c.c: New file.
	* sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S: Likewise.
	* sysdeps/x86_64/fpu/multiarch/s_trunc.c: Likewise.
	* sysdeps/x86_64/fpu/multiarch/s_truncf-c.c: Likewise.
	* sysdeps/x86_64/fpu/multiarch/s_truncf-sse4_1.S: Likewise.
	* sysdeps/x86_64/fpu/multiarch/s_truncf.c: Likewise.
Diffstat (limited to 'sysdeps/x86_64/fpu/multiarch/Makefile')
-rw-r--r--sysdeps/x86_64/fpu/multiarch/Makefile6
1 files changed, 4 insertions, 2 deletions
diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile
index 1b61795aff..d66055224a 100644
--- a/sysdeps/x86_64/fpu/multiarch/Makefile
+++ b/sysdeps/x86_64/fpu/multiarch/Makefile
@@ -1,10 +1,12 @@
 ifeq ($(subdir),math)
 libm-sysdep_routines += s_floor-c s_ceil-c s_floorf-c s_ceilf-c \
-			s_rint-c s_rintf-c s_nearbyint-c s_nearbyintf-c
+			s_rint-c s_rintf-c s_nearbyint-c s_nearbyintf-c \
+			s_trunc-c s_truncf-c
 
 libm-sysdep_routines += s_ceil-sse4_1 s_ceilf-sse4_1 s_floor-sse4_1 \
 			s_floorf-sse4_1 s_nearbyint-sse4_1 \
-			s_nearbyintf-sse4_1 s_rint-sse4_1 s_rintf-sse4_1
+			s_nearbyintf-sse4_1 s_rint-sse4_1 s_rintf-sse4_1 \
+			s_trunc-sse4_1 s_truncf-sse4_1
 
 libm-sysdep_routines += e_exp-fma e_log-fma e_pow-fma s_atan-fma \
 			e_asin-fma e_atan2-fma s_sin-fma s_tan-fma \