diff options
Diffstat (limited to 'sysdeps/x86_64')
-rw-r--r-- | sysdeps/x86_64/fpu/libm-test-ulps | 33 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/multiarch/Makefile | 4 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/multiarch/s_exp10m1f-fma.c | 4 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/multiarch/s_exp10m1f.c | 33 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/multiarch/s_exp2m1f-fma.c | 4 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/multiarch/s_exp2m1f.c | 33 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/multiarch/s_fma.c | 2 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/multiarch/s_fmaf.c | 2 | ||||
-rw-r--r-- | sysdeps/x86_64/multiarch/strcmp-evex.S | 26 | ||||
-rw-r--r-- | sysdeps/x86_64/nptl/tcb-offsets.sym | 3 |
10 files changed, 94 insertions, 50 deletions
diff --git a/sysdeps/x86_64/fpu/libm-test-ulps b/sysdeps/x86_64/fpu/libm-test-ulps index c2e36dcbdf..e3c811549c 100644 --- a/sysdeps/x86_64/fpu/libm-test-ulps +++ b/sysdeps/x86_64/fpu/libm-test-ulps @@ -1062,6 +1062,7 @@ float128: 1 ldouble: 1 Function: Imaginary part of "csin": +float: 1 float128: 1 Function: Real part of "csin_downward": @@ -1431,25 +1432,21 @@ float: 1 Function: "exp10m1": double: 4 -float: 2 float128: 3 ldouble: 4 Function: "exp10m1_downward": double: 3 -float: 3 float128: 6 ldouble: 6 Function: "exp10m1_towardzero": double: 2 -float: 3 float128: 6 ldouble: 5 Function: "exp10m1_upward": double: 5 -float: 4 float128: 6 ldouble: 6 @@ -1499,25 +1496,21 @@ float: 1 Function: "exp2m1": double: 2 -float: 2 float128: 2 ldouble: 3 Function: "exp2m1_downward": double: 3 -float: 3 float128: 3 ldouble: 6 Function: "exp2m1_towardzero": double: 3 -float: 2 float128: 4 ldouble: 5 Function: "exp2m1_upward": double: 3 -float: 3 float128: 5 ldouble: 6 @@ -1558,25 +1551,21 @@ float: 1 Function: "expm1": double: 1 -float: 1 float128: 2 ldouble: 3 Function: "expm1_downward": double: 1 -float: 1 float128: 2 ldouble: 4 Function: "expm1_towardzero": double: 1 -float: 2 float128: 4 ldouble: 4 Function: "expm1_upward": double: 1 -float: 1 float128: 3 ldouble: 4 @@ -1809,49 +1798,41 @@ float: 1 Function: "log10p1": double: 2 -float: 2 float128: 3 ldouble: 4 Function: "log10p1_downward": double: 2 -float: 3 float128: 4 ldouble: 8 Function: "log10p1_towardzero": double: 3 -float: 2 float128: 3 ldouble: 8 Function: "log10p1_upward": double: 2 -float: 3 float128: 4 ldouble: 6 Function: "log1p": double: 1 -float: 1 float128: 3 ldouble: 2 Function: "log1p_downward": double: 2 -float: 2 float128: 3 ldouble: 4 Function: "log1p_towardzero": double: 2 -float: 2 float128: 3 ldouble: 4 Function: "log1p_upward": double: 2 -float: 2 float128: 2 ldouble: 3 @@ -1921,25 +1902,21 @@ float: 1 Function: "log2p1": double: 2 -float: 2 float128: 3 ldouble: 4 Function: "log2p1_downward": double: 2 -float: 2 float128: 3 ldouble: 4 Function: "log2p1_towardzero": double: 2 -float: 2 float128: 2 ldouble: 4 Function: "log2p1_upward": double: 2 -float: 2 float128: 3 ldouble: 5 @@ -1981,25 +1958,21 @@ float: 3 Function: "logp1": double: 1 -float: 1 float128: 3 ldouble: 2 Function: "logp1_downward": double: 2 -float: 2 float128: 3 ldouble: 4 Function: "logp1_towardzero": double: 2 -float: 2 float128: 3 ldouble: 4 Function: "logp1_upward": double: 2 -float: 2 float128: 2 ldouble: 3 @@ -2263,25 +2236,21 @@ double: 1 Function: "tgamma": double: 9 -float: 8 float128: 4 ldouble: 5 Function: "tgamma_downward": double: 9 -float: 7 float128: 5 ldouble: 6 Function: "tgamma_towardzero": double: 9 -float: 7 float128: 5 ldouble: 6 Function: "tgamma_upward": double: 9 -float: 8 float128: 4 ldouble: 5 diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile index cbe09d49f4..e823d2fcc6 100644 --- a/sysdeps/x86_64/fpu/multiarch/Makefile +++ b/sysdeps/x86_64/fpu/multiarch/Makefile @@ -11,6 +11,8 @@ CFLAGS-s_log1p-fma.c = -mfma -mavx2 CFLAGS-s_sin-fma.c = -mfma -mavx2 CFLAGS-s_tan-fma.c = -mfma -mavx2 CFLAGS-s_sincos-fma.c = -mfma -mavx2 +CFLAGS-s_exp10m1f-fma.c = -mfma -mavx2 +CFLAGS-s_exp2m1f-fma.c = -mfma -mavx2 CFLAGS-e_exp2f-fma.c = -mfma -mavx2 CFLAGS-e_expf-fma.c = -mfma -mavx2 @@ -72,6 +74,8 @@ libm-sysdep_routines += \ s_ceilf-sse4_1 \ s_cosf-fma \ s_cosf-sse2 \ + s_exp10m1f-fma \ + s_exp2m1f-fma \ s_expm1-fma \ s_floor-sse4_1 \ s_floorf-sse4_1 \ diff --git a/sysdeps/x86_64/fpu/multiarch/s_exp10m1f-fma.c b/sysdeps/x86_64/fpu/multiarch/s_exp10m1f-fma.c new file mode 100644 index 0000000000..3dda04e2dd --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_exp10m1f-fma.c @@ -0,0 +1,4 @@ +#define __exp10m1f __exp10m1f_fma +#define SECTION __attribute__ ((section (".text.fma"))) + +#include <sysdeps/ieee754/flt-32/s_exp10m1f.c> diff --git a/sysdeps/x86_64/fpu/multiarch/s_exp10m1f.c b/sysdeps/x86_64/fpu/multiarch/s_exp10m1f.c new file mode 100644 index 0000000000..8040b7ed79 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_exp10m1f.c @@ -0,0 +1,33 @@ +/* Multiple versions of exp10m1. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <sysdeps/x86/isa-level.h> +#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL +# include <libm-alias-float.h> + +extern float __redirect_exp10m1f (float); + +# define SYMBOL_NAME exp10m1f +# include "ifunc-fma.h" + +libc_ifunc_redirected (__redirect_exp10m1f, __exp10m1f, IFUNC_SELECTOR ()); +libm_alias_float (__exp10m1, exp10m1) + +# define __exp10m1f __exp10m1f_sse2 +#endif +#include <sysdeps/ieee754/flt-32/s_exp10m1f.c> diff --git a/sysdeps/x86_64/fpu/multiarch/s_exp2m1f-fma.c b/sysdeps/x86_64/fpu/multiarch/s_exp2m1f-fma.c new file mode 100644 index 0000000000..bfa00eae4e --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_exp2m1f-fma.c @@ -0,0 +1,4 @@ +#define __exp2m1f __exp2m1f_fma +#define SECTION __attribute__ ((section (".text.fma"))) + +#include <sysdeps/ieee754/flt-32/s_exp2m1f.c> diff --git a/sysdeps/x86_64/fpu/multiarch/s_exp2m1f.c b/sysdeps/x86_64/fpu/multiarch/s_exp2m1f.c new file mode 100644 index 0000000000..8e2d7ec384 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_exp2m1f.c @@ -0,0 +1,33 @@ +/* Multiple versions of exp2m1. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <sysdeps/x86/isa-level.h> +#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL +# include <libm-alias-float.h> + +extern float __redirect_exp2m1f (float); + +# define SYMBOL_NAME exp2m1f +# include "ifunc-fma.h" + +libc_ifunc_redirected (__redirect_exp2m1f, __exp2m1f, IFUNC_SELECTOR ()); +libm_alias_float (__exp2m1, exp2m1) + +# define __exp2m1f __exp2m1f_sse2 +#endif +#include <sysdeps/ieee754/flt-32/s_exp2m1f.c> diff --git a/sysdeps/x86_64/fpu/multiarch/s_fma.c b/sysdeps/x86_64/fpu/multiarch/s_fma.c index 10e6e625b3..92409a2779 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_fma.c +++ b/sysdeps/x86_64/fpu/multiarch/s_fma.c @@ -9,7 +9,7 @@ The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public diff --git a/sysdeps/x86_64/fpu/multiarch/s_fmaf.c b/sysdeps/x86_64/fpu/multiarch/s_fmaf.c index 85725e29c0..b5b63fd125 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_fmaf.c +++ b/sysdeps/x86_64/fpu/multiarch/s_fmaf.c @@ -9,7 +9,7 @@ The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public diff --git a/sysdeps/x86_64/multiarch/strcmp-evex.S b/sysdeps/x86_64/multiarch/strcmp-evex.S index 06730ab2a1..cea034f394 100644 --- a/sysdeps/x86_64/multiarch/strcmp-evex.S +++ b/sysdeps/x86_64/multiarch/strcmp-evex.S @@ -209,7 +209,9 @@ returned. */ .section SECTION(.text), "ax", @progbits - .align 16 + /* Align 64 bytes here. This is to get the L(loop) block ideally + aligned for the DSB. */ + .align 64 .type STRCMP, @function .globl STRCMP # ifdef USE_AS_STRCASECMP_L @@ -509,9 +511,7 @@ L(ret4): ret # endif - /* 32 byte align here ensures the main loop is ideally aligned - for DSB. */ - .p2align 5 + .p2align 4,, 4 L(more_3x_vec): /* Safe to compare 4x vectors. */ VMOVU (VEC_SIZE)(%rdi), %VMM(0) @@ -1426,10 +1426,9 @@ L(less_32_till_page): L(ret_zero_page_cross_slow_case0): xorl %eax, %eax ret -# endif - - +# else .p2align 4,, 10 +# endif L(less_16_till_page): cmpl $((VEC_SIZE - 8) / SIZE_OF_CHAR), %eax ja L(less_8_till_page) @@ -1482,8 +1481,12 @@ L(less_16_till_page): # endif jmp L(prepare_loop_aligned) - - +# ifndef USE_AS_STRNCMP + /* Fits in aligning bytes. */ +L(ret_zero_4_loop): + xorl %eax, %eax + ret +# endif .p2align 4,, 10 L(less_8_till_page): @@ -1554,6 +1557,7 @@ L(ret_less_8_wcs): # ifdef USE_AS_STRNCMP .p2align 4,, 2 +L(ret_zero_4_loop): L(ret_zero_page_cross_slow_case1): xorl %eax, %eax ret @@ -1586,10 +1590,6 @@ L(less_4_loop): subq $-(CHAR_PER_VEC * 4), %rdx # endif jmp L(prepare_loop_aligned) - -L(ret_zero_4_loop): - xorl %eax, %eax - ret L(ret_less_4_loop): xorl %r8d, %eax subl %r8d, %eax diff --git a/sysdeps/x86_64/nptl/tcb-offsets.sym b/sysdeps/x86_64/nptl/tcb-offsets.sym index 2bbd563a6c..988a4b8593 100644 --- a/sysdeps/x86_64/nptl/tcb-offsets.sym +++ b/sysdeps/x86_64/nptl/tcb-offsets.sym @@ -13,6 +13,3 @@ MULTIPLE_THREADS_OFFSET offsetof (tcbhead_t, multiple_threads) POINTER_GUARD offsetof (tcbhead_t, pointer_guard) FEATURE_1_OFFSET offsetof (tcbhead_t, feature_1) SSP_BASE_OFFSET offsetof (tcbhead_t, ssp_base) - --- Not strictly offsets, but these values are also used in the TCB. -TCB_CANCELED_BITMASK CANCELED_BITMASK |