diff options
author | Joseph Myers <joseph@codesourcery.com> | 2017-09-20 16:54:05 +0000 |
---|---|---|
committer | Joseph Myers <joseph@codesourcery.com> | 2017-09-20 16:54:05 +0000 |
commit | ae8372d7e4c44f6839aa3d851d4d0cb486b81cd5 (patch) | |
tree | 83340587a4086402e9f1686c278aa1a264ef77e7 | |
parent | a856d4d4a8a56eaefdddb58884bfa2bfe922ee4c (diff) | |
download | glibc-ae8372d7e4c44f6839aa3d851d4d0cb486b81cd5.tar.gz glibc-ae8372d7e4c44f6839aa3d851d4d0cb486b81cd5.tar.xz glibc-ae8372d7e4c44f6839aa3d851d4d0cb486b81cd5.zip |
Add SSE4.1 trunc, truncf (bug 20142).
This patch adds SSE4.1 versions of trunc and truncf, using the roundsd / roundss instructions, similar to the versions of ceil, floor, rint and nearbyint functions we already have. In my testing with the glibc benchtests these are about 30% faster than the C versions for double, 20% faster for float. Tested for x86_64. [BZ #20142] * sysdeps/x86_64/fpu/multiarch/Makefile (libm-sysdep_routines): Add s_trunc-c, s_truncf-c, s_trunc-sse4_1 and s_truncf-sse4_1. * sysdeps/x86_64/fpu/multiarch/s_trunc-c.c: New file. * sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S: Likewise. * sysdeps/x86_64/fpu/multiarch/s_trunc.c: Likewise. * sysdeps/x86_64/fpu/multiarch/s_truncf-c.c: Likewise. * sysdeps/x86_64/fpu/multiarch/s_truncf-sse4_1.S: Likewise. * sysdeps/x86_64/fpu/multiarch/s_truncf.c: Likewise.
-rw-r--r-- | ChangeLog | 12 | ||||
-rw-r--r-- | NEWS | 2 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/multiarch/Makefile | 6 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/multiarch/s_trunc-c.c | 2 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S | 25 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/multiarch/s_trunc.c | 29 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/multiarch/s_truncf-c.c | 2 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/multiarch/s_truncf-sse4_1.S | 25 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/multiarch/s_truncf.c | 29 |
9 files changed, 130 insertions, 2 deletions
diff --git a/ChangeLog b/ChangeLog index a07c903731..d85bb873a5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,15 @@ +2017-09-20 Joseph Myers <joseph@codesourcery.com> + + [BZ #20142] + * sysdeps/x86_64/fpu/multiarch/Makefile (libm-sysdep_routines): + Add s_trunc-c, s_truncf-c, s_trunc-sse4_1 and s_truncf-sse4_1. + * sysdeps/x86_64/fpu/multiarch/s_trunc-c.c: New file. + * sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S: Likewise. + * sysdeps/x86_64/fpu/multiarch/s_trunc.c: Likewise. + * sysdeps/x86_64/fpu/multiarch/s_truncf-c.c: Likewise. + * sysdeps/x86_64/fpu/multiarch/s_truncf-sse4_1.S: Likewise. + * sysdeps/x86_64/fpu/multiarch/s_truncf.c: Likewise. + 2017-09-20 Paul Pluzhnikov <ppluzhnikov@google.com> Ricky Zhou <rickyz@google.com> Anoop V Chakkalakkal <anoop.vijayan@in.ibm.com> diff --git a/NEWS b/NEWS index dd381f8930..a3aa94cb3b 100644 --- a/NEWS +++ b/NEWS @@ -12,6 +12,8 @@ Major new features: * Optimized x86-64 asin, atan2, exp, expf, log, pow, atan, sin and tan with FMA, contributed by Arjan van de Ven and H.J. Lu from Intel. +* Optimized x86-64 trunc and truncf for processors with SSE4.1. + * In order to support faster and safer process termination the malloc API family of functions will no longer print a failure address and stack backtrace after detecting heap corruption. The goal is to minimize the diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile index 1b61795aff..d66055224a 100644 --- a/sysdeps/x86_64/fpu/multiarch/Makefile +++ b/sysdeps/x86_64/fpu/multiarch/Makefile @@ -1,10 +1,12 @@ ifeq ($(subdir),math) libm-sysdep_routines += s_floor-c s_ceil-c s_floorf-c s_ceilf-c \ - s_rint-c s_rintf-c s_nearbyint-c s_nearbyintf-c + s_rint-c s_rintf-c s_nearbyint-c s_nearbyintf-c \ + s_trunc-c s_truncf-c libm-sysdep_routines += s_ceil-sse4_1 s_ceilf-sse4_1 s_floor-sse4_1 \ s_floorf-sse4_1 s_nearbyint-sse4_1 \ - s_nearbyintf-sse4_1 s_rint-sse4_1 s_rintf-sse4_1 + s_nearbyintf-sse4_1 s_rint-sse4_1 s_rintf-sse4_1 \ + s_trunc-sse4_1 s_truncf-sse4_1 libm-sysdep_routines += e_exp-fma e_log-fma e_pow-fma s_atan-fma \ e_asin-fma e_atan2-fma s_sin-fma s_tan-fma \ diff --git a/sysdeps/x86_64/fpu/multiarch/s_trunc-c.c b/sysdeps/x86_64/fpu/multiarch/s_trunc-c.c new file mode 100644 index 0000000000..6204ae3c77 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_trunc-c.c @@ -0,0 +1,2 @@ +#define __trunc __trunc_c +#include <sysdeps/ieee754/dbl-64/wordsize-64/s_trunc.c> diff --git a/sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S new file mode 100644 index 0000000000..ff3ed9c947 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S @@ -0,0 +1,25 @@ +/* trunc for SSE4.1. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .section .text.sse4.1,"ax",@progbits +ENTRY(__trunc_sse41) + roundsd $11, %xmm0, %xmm0 + ret +END(__trunc_sse41) diff --git a/sysdeps/x86_64/fpu/multiarch/s_trunc.c b/sysdeps/x86_64/fpu/multiarch/s_trunc.c new file mode 100644 index 0000000000..fb27fc3ec9 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_trunc.c @@ -0,0 +1,29 @@ +/* Multiple versions of __trunc. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define trunc __redirect_trunc +#define __trunc __redirect___trunc +#include <math.h> +#undef trunc +#undef __trunc + +#define SYMBOL_NAME trunc +#include "ifunc-sse4_1.h" + +libc_ifunc_redirected (__redirect_trunc, __trunc, IFUNC_SELECTOR ()); +weak_alias (__trunc, trunc) diff --git a/sysdeps/x86_64/fpu/multiarch/s_truncf-c.c b/sysdeps/x86_64/fpu/multiarch/s_truncf-c.c new file mode 100644 index 0000000000..7a5ac7da1f --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_truncf-c.c @@ -0,0 +1,2 @@ +#define __truncf __truncf_c +#include <sysdeps/ieee754/flt-32/s_truncf.c> diff --git a/sysdeps/x86_64/fpu/multiarch/s_truncf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_truncf-sse4_1.S new file mode 100644 index 0000000000..ccfe0d7031 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_truncf-sse4_1.S @@ -0,0 +1,25 @@ +/* truncf for SSE4.1. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .section .text.sse4.1,"ax",@progbits +ENTRY(__truncf_sse41) + roundss $11, %xmm0, %xmm0 + ret +END(__truncf_sse41) diff --git a/sysdeps/x86_64/fpu/multiarch/s_truncf.c b/sysdeps/x86_64/fpu/multiarch/s_truncf.c new file mode 100644 index 0000000000..6190c2ec78 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_truncf.c @@ -0,0 +1,29 @@ +/* Multiple versions of __truncf. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define truncf __redirect_truncf +#define __truncf __redirect___truncf +#include <math.h> +#undef truncf +#undef __truncf + +#define SYMBOL_NAME truncf +#include "ifunc-sse4_1.h" + +libc_ifunc_redirected (__redirect_truncf, __truncf, IFUNC_SELECTOR ()); +weak_alias (__truncf, truncf) |