From f20f980c71651e2b2c49e717f547d958cbe29a55 Mon Sep 17 00:00:00 2001 From: Sunil K Pandey Date: Wed, 22 Dec 2021 06:20:41 -0800 Subject: x86-64: Add vector acos/acosf implementation to libmvec Implement vectorized acos/acosf containing SSE, AVX, AVX2 and AVX512 versions for libmvec as per vector ABI. It also contains accuracy and ABI tests for vector acos/acosf with regenerated ulps. Reviewed-by: H.J. Lu --- sysdeps/x86_64/fpu/Makeconfig | 1 + sysdeps/x86_64/fpu/Versions | 4 + sysdeps/x86_64/fpu/libm-test-ulps | 20 ++ .../fpu/multiarch/ifunc-mathvec-avx512-skx.h | 39 +++ .../x86_64/fpu/multiarch/svml_d_acos2_core-sse2.S | 20 ++ sysdeps/x86_64/fpu/multiarch/svml_d_acos2_core.c | 27 ++ .../x86_64/fpu/multiarch/svml_d_acos2_core_sse4.S | 303 ++++++++++++++++++++ .../x86_64/fpu/multiarch/svml_d_acos4_core-sse.S | 20 ++ sysdeps/x86_64/fpu/multiarch/svml_d_acos4_core.c | 27 ++ .../x86_64/fpu/multiarch/svml_d_acos4_core_avx2.S | 285 +++++++++++++++++++ .../x86_64/fpu/multiarch/svml_d_acos8_core-avx2.S | 20 ++ sysdeps/x86_64/fpu/multiarch/svml_d_acos8_core.c | 27 ++ .../fpu/multiarch/svml_d_acos8_core_avx512.S | 307 +++++++++++++++++++++ .../fpu/multiarch/svml_s_acosf16_core-avx2.S | 20 ++ sysdeps/x86_64/fpu/multiarch/svml_s_acosf16_core.c | 28 ++ .../fpu/multiarch/svml_s_acosf16_core_avx512.S | 271 ++++++++++++++++++ .../x86_64/fpu/multiarch/svml_s_acosf4_core-sse2.S | 20 ++ sysdeps/x86_64/fpu/multiarch/svml_s_acosf4_core.c | 28 ++ .../x86_64/fpu/multiarch/svml_s_acosf4_core_sse4.S | 270 ++++++++++++++++++ .../x86_64/fpu/multiarch/svml_s_acosf8_core-sse.S | 20 ++ sysdeps/x86_64/fpu/multiarch/svml_s_acosf8_core.c | 28 ++ .../x86_64/fpu/multiarch/svml_s_acosf8_core_avx2.S | 264 ++++++++++++++++++ sysdeps/x86_64/fpu/svml_d_acos2_core.S | 29 ++ sysdeps/x86_64/fpu/svml_d_acos4_core.S | 29 ++ sysdeps/x86_64/fpu/svml_d_acos4_core_avx.S | 25 ++ sysdeps/x86_64/fpu/svml_d_acos8_core.S | 25 ++ sysdeps/x86_64/fpu/svml_s_acosf16_core.S | 25 ++ sysdeps/x86_64/fpu/svml_s_acosf4_core.S | 29 ++ sysdeps/x86_64/fpu/svml_s_acosf8_core.S | 29 ++ sysdeps/x86_64/fpu/svml_s_acosf8_core_avx.S | 25 ++ sysdeps/x86_64/fpu/test-double-libmvec-acos-avx.c | 1 + sysdeps/x86_64/fpu/test-double-libmvec-acos-avx2.c | 1 + .../x86_64/fpu/test-double-libmvec-acos-avx512f.c | 1 + sysdeps/x86_64/fpu/test-double-libmvec-acos.c | 3 + sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c | 1 + .../x86_64/fpu/test-double-vlen4-avx2-wrappers.c | 1 + sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c | 1 + sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c | 1 + sysdeps/x86_64/fpu/test-float-libmvec-acosf-avx.c | 1 + sysdeps/x86_64/fpu/test-float-libmvec-acosf-avx2.c | 1 + .../x86_64/fpu/test-float-libmvec-acosf-avx512f.c | 1 + sysdeps/x86_64/fpu/test-float-libmvec-acosf.c | 3 + sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c | 1 + sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c | 1 + .../x86_64/fpu/test-float-vlen8-avx2-wrappers.c | 1 + sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c | 1 + 46 files changed, 2285 insertions(+) create mode 100644 sysdeps/x86_64/fpu/multiarch/ifunc-mathvec-avx512-skx.h create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_acos2_core-sse2.S create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_acos2_core.c create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_acos2_core_sse4.S create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_acos4_core-sse.S create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_acos4_core.c create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_acos4_core_avx2.S create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_acos8_core-avx2.S create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_acos8_core.c create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_acos8_core_avx512.S create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_acosf16_core-avx2.S create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_acosf16_core.c create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_acosf16_core_avx512.S create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_acosf4_core-sse2.S create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_acosf4_core.c create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_acosf4_core_sse4.S create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_acosf8_core-sse.S create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_acosf8_core.c create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_acosf8_core_avx2.S create mode 100644 sysdeps/x86_64/fpu/svml_d_acos2_core.S create mode 100644 sysdeps/x86_64/fpu/svml_d_acos4_core.S create mode 100644 sysdeps/x86_64/fpu/svml_d_acos4_core_avx.S create mode 100644 sysdeps/x86_64/fpu/svml_d_acos8_core.S create mode 100644 sysdeps/x86_64/fpu/svml_s_acosf16_core.S create mode 100644 sysdeps/x86_64/fpu/svml_s_acosf4_core.S create mode 100644 sysdeps/x86_64/fpu/svml_s_acosf8_core.S create mode 100644 sysdeps/x86_64/fpu/svml_s_acosf8_core_avx.S create mode 100644 sysdeps/x86_64/fpu/test-double-libmvec-acos-avx.c create mode 100644 sysdeps/x86_64/fpu/test-double-libmvec-acos-avx2.c create mode 100644 sysdeps/x86_64/fpu/test-double-libmvec-acos-avx512f.c create mode 100644 sysdeps/x86_64/fpu/test-double-libmvec-acos.c create mode 100644 sysdeps/x86_64/fpu/test-float-libmvec-acosf-avx.c create mode 100644 sysdeps/x86_64/fpu/test-float-libmvec-acosf-avx2.c create mode 100644 sysdeps/x86_64/fpu/test-float-libmvec-acosf-avx512f.c create mode 100644 sysdeps/x86_64/fpu/test-float-libmvec-acosf.c (limited to 'sysdeps/x86_64') diff --git a/sysdeps/x86_64/fpu/Makeconfig b/sysdeps/x86_64/fpu/Makeconfig index b0e3bf7887..7acf1f306c 100644 --- a/sysdeps/x86_64/fpu/Makeconfig +++ b/sysdeps/x86_64/fpu/Makeconfig @@ -22,6 +22,7 @@ postclean-generated += libmvec.mk # Define for both math and mathvec directories. libmvec-funcs = \ + acos \ cos \ exp \ log \ diff --git a/sysdeps/x86_64/fpu/Versions b/sysdeps/x86_64/fpu/Versions index 08132045d6..2985fe7ca7 100644 --- a/sysdeps/x86_64/fpu/Versions +++ b/sysdeps/x86_64/fpu/Versions @@ -13,4 +13,8 @@ libmvec { _ZGVbN4vv_powf; _ZGVcN8vv_powf; _ZGVdN8vv_powf; _ZGVeN16vv_powf; _ZGVbN4vvv_sincosf; _ZGVcN8vvv_sincosf; _ZGVdN8vvv_sincosf; _ZGVeN16vvv_sincosf; } + GLIBC_2.35 { + _ZGVbN2v_acos; _ZGVcN4v_acos; _ZGVdN4v_acos; _ZGVeN8v_acos; + _ZGVbN4v_acosf; _ZGVcN8v_acosf; _ZGVdN8v_acosf; _ZGVeN16v_acosf; + } } diff --git a/sysdeps/x86_64/fpu/libm-test-ulps b/sysdeps/x86_64/fpu/libm-test-ulps index 1c75f0ead4..6c12976c82 100644 --- a/sysdeps/x86_64/fpu/libm-test-ulps +++ b/sysdeps/x86_64/fpu/libm-test-ulps @@ -25,6 +25,26 @@ float: 1 float128: 1 ldouble: 2 +Function: "acos_vlen16": +float: 1 + +Function: "acos_vlen2": +double: 1 + +Function: "acos_vlen4": +double: 1 +float: 2 + +Function: "acos_vlen4_avx2": +double: 1 + +Function: "acos_vlen8": +double: 1 +float: 2 + +Function: "acos_vlen8_avx2": +float: 1 + Function: "acosh": double: 2 float: 2 diff --git a/sysdeps/x86_64/fpu/multiarch/ifunc-mathvec-avx512-skx.h b/sysdeps/x86_64/fpu/multiarch/ifunc-mathvec-avx512-skx.h new file mode 100644 index 0000000000..3aed563dde --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/ifunc-mathvec-avx512-skx.h @@ -0,0 +1,39 @@ +/* Common definition for libmathvec ifunc selections optimized with + AVX512. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +#undef PASTER2 +#define PASTER2(x,y) x##_##y + +extern void REDIRECT_NAME (void); +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_wrapper) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (skx) attribute_hidden; + +static inline void * +IFUNC_SELECTOR (void) +{ + const struct cpu_features* cpu_features = __get_cpu_features (); + + if (!CPU_FEATURES_ARCH_P (cpu_features, MathVec_Prefer_No_AVX512) + && CPU_FEATURE_USABLE_P (cpu_features, AVX512DQ)) + return OPTIMIZE (skx); + + return OPTIMIZE (avx2_wrapper); +} diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_acos2_core-sse2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_acos2_core-sse2.S new file mode 100644 index 0000000000..25fb8d0cac --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_acos2_core-sse2.S @@ -0,0 +1,20 @@ +/* SSE2 version of vectorized acos, vector length is 2. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#define _ZGVbN2v_acos _ZGVbN2v_acos_sse2 +#include "../svml_d_acos2_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_acos2_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_acos2_core.c new file mode 100644 index 0000000000..5ba5d6fac2 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_acos2_core.c @@ -0,0 +1,27 @@ +/* Multiple versions of vectorized acos, vector length is 2. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#define SYMBOL_NAME _ZGVbN2v_acos +#include "ifunc-mathvec-sse4_1.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVbN2v_acos, __GI__ZGVbN2v_acos, __redirect__ZGVbN2v_acos) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_acos2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_acos2_core_sse4.S new file mode 100644 index 0000000000..aea45279ce --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_acos2_core_sse4.S @@ -0,0 +1,303 @@ +/* Function acos vectorized with SSE4. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + https://www.gnu.org/licenses/. */ + +/* + * ALGORITHM DESCRIPTION: + * + * SelMask = (|x| >= 0.5) ? 1 : 0; + * R = SelMask ? sqrt(0.5 - 0.5*|x|) : |x| + * acos(|x|) = SelMask ? 2*Poly(R) : (Pi/2 - Poly(R)) + * acos(x) = sign(x) ? (Pi - acos(|x|)) : acos(|x|) + * + */ + +/* Offsets for data table __svml_dacos_data_internal + */ +#define SgnBit 0 +#define OneHalf 16 +#define SmallNorm 32 +#define MOne 48 +#define Two 64 +#define sqrt_coeff 80 +#define poly_coeff 144 +#define PiH 336 +#define Pi2H 352 + +#include + + .text + .section .text.sse4,"ax",@progbits +ENTRY(_ZGVbN2v_acos_sse4) + subq $72, %rsp + cfi_def_cfa_offset(80) + movaps %xmm0, %xmm5 + movups __svml_dacos_data_internal(%rip), %xmm3 + movups OneHalf+__svml_dacos_data_internal(%rip), %xmm6 + +/* x = -|arg| */ + movaps %xmm3, %xmm4 + orps %xmm5, %xmm4 + +/* Y = 0.5 + 0.5*(-x) */ + movaps %xmm6, %xmm7 + mulpd %xmm4, %xmm7 + addpd %xmm7, %xmm6 + +/* S ~ 2*sqrt(Y) */ + cvtpd2ps %xmm6, %xmm9 + movlhps %xmm9, %xmm9 + +/* x^2 */ + movaps %xmm4, %xmm0 + rsqrtps %xmm9, %xmm10 + mulpd %xmm4, %xmm0 + cvtps2pd %xmm10, %xmm11 + minpd %xmm6, %xmm0 + movaps %xmm6, %xmm1 + movaps %xmm0, %xmm2 + cmpltpd SmallNorm+__svml_dacos_data_internal(%rip), %xmm1 + cmpnltpd %xmm6, %xmm2 + addpd %xmm6, %xmm6 + andnps %xmm11, %xmm1 + movaps %xmm0, %xmm11 + movaps %xmm1, %xmm12 + andps %xmm5, %xmm3 + mulpd %xmm1, %xmm12 + mulpd %xmm6, %xmm1 + mulpd %xmm12, %xmm6 + mulpd %xmm0, %xmm11 + subpd Two+__svml_dacos_data_internal(%rip), %xmm6 + movups sqrt_coeff+__svml_dacos_data_internal(%rip), %xmm13 + movaps %xmm6, %xmm14 + mulpd %xmm6, %xmm13 + mulpd %xmm1, %xmm14 + addpd sqrt_coeff+16+__svml_dacos_data_internal(%rip), %xmm13 + mulpd %xmm6, %xmm13 + addpd sqrt_coeff+32+__svml_dacos_data_internal(%rip), %xmm13 + mulpd %xmm13, %xmm6 + +/* polynomial */ + movups poly_coeff+__svml_dacos_data_internal(%rip), %xmm15 + movaps %xmm11, %xmm7 + mulpd %xmm0, %xmm15 + addpd sqrt_coeff+48+__svml_dacos_data_internal(%rip), %xmm6 + addpd poly_coeff+16+__svml_dacos_data_internal(%rip), %xmm15 + mulpd %xmm11, %xmm7 + mulpd %xmm6, %xmm14 + mulpd %xmm11, %xmm15 + subpd %xmm14, %xmm1 + movups MOne+__svml_dacos_data_internal(%rip), %xmm8 + andps %xmm2, %xmm1 + +/* NaN processed in special branch (so wind test passed) */ + cmpnlepd %xmm4, %xmm8 + movmskpd %xmm8, %edx + +/* X. */ + +#define _ZGVdN4v_acos _ZGVdN4v_acos_sse_wrapper +#include "../svml_d_acos4_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_acos4_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_acos4_core.c new file mode 100644 index 0000000000..6453e7ebe2 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_acos4_core.c @@ -0,0 +1,27 @@ +/* Multiple versions of vectorized acos, vector length is 4. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#define SYMBOL_NAME _ZGVdN4v_acos +#include "ifunc-mathvec-avx2.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVdN4v_acos, __GI__ZGVdN4v_acos, __redirect__ZGVdN4v_acos) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_acos4_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_acos4_core_avx2.S new file mode 100644 index 0000000000..bf85bdbd37 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_acos4_core_avx2.S @@ -0,0 +1,285 @@ +/* Function acos vectorized with AVX2. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + https://www.gnu.org/licenses/. */ + +/* + * ALGORITHM DESCRIPTION: + * + * SelMask = (|x| >= 0.5) ? 1 : 0; + * R = SelMask ? sqrt(0.5 - 0.5*|x|) : |x| + * acos(|x|) = SelMask ? 2*Poly(R) : (Pi/2 - Poly(R)) + * acos(x) = sign(x) ? (Pi - acos(|x|)) : acos(|x|) + * + */ + +/* Offsets for data table __svml_dacos_data_internal + */ +#define SgnBit 0 +#define OneHalf 32 +#define SmallNorm 64 +#define MOne 96 +#define Two 128 +#define sqrt_coeff 160 +#define poly_coeff 288 +#define PiH 672 +#define Pi2H 704 + +#include + + .text + .section .text.avx2,"ax",@progbits +ENTRY(_ZGVdN4v_acos_avx2) + pushq %rbp + cfi_def_cfa_offset(16) + movq %rsp, %rbp + cfi_def_cfa(6, 16) + cfi_offset(6, -16) + andq $-32, %rsp + subq $96, %rsp + vmovupd __svml_dacos_data_internal(%rip), %ymm6 + vmovupd OneHalf+__svml_dacos_data_internal(%rip), %ymm7 + vmovapd %ymm0, %ymm5 + +/* x = -|arg| */ + vorpd %ymm5, %ymm6, %ymm4 + +/* Y = 0.5 + 0.5*(-x) */ + vfmadd231pd %ymm4, %ymm7, %ymm7 + +/* x^2 */ + vmulpd %ymm4, %ymm4, %ymm8 + +/* S ~ 2*sqrt(Y) */ + vmovupd sqrt_coeff+__svml_dacos_data_internal(%rip), %ymm0 + vcmplt_oqpd SmallNorm+__svml_dacos_data_internal(%rip), %ymm7, %ymm12 + vminpd %ymm7, %ymm8, %ymm2 + +/* NaN processed in special branch (so wind test passed) */ + vcmpnge_uqpd MOne+__svml_dacos_data_internal(%rip), %ymm4, %ymm9 + vcvtpd2ps %ymm7, %xmm10 + vmovupd poly_coeff+64+__svml_dacos_data_internal(%rip), %ymm8 + vcmpnlt_uqpd %ymm7, %ymm2, %ymm1 + vrsqrtps %xmm10, %xmm11 + vfmadd213pd poly_coeff+96+__svml_dacos_data_internal(%rip), %ymm2, %ymm8 + vcvtps2pd %xmm11, %ymm13 + vmovupd poly_coeff+128+__svml_dacos_data_internal(%rip), %ymm11 + vandnpd %ymm13, %ymm12, %ymm14 + vmulpd %ymm14, %ymm14, %ymm15 + vfmadd213pd poly_coeff+160+__svml_dacos_data_internal(%rip), %ymm2, %ymm11 + vmulpd %ymm2, %ymm2, %ymm13 + vmovupd poly_coeff+256+__svml_dacos_data_internal(%rip), %ymm12 + vmulpd %ymm13, %ymm13, %ymm10 + vfmadd213pd poly_coeff+288+__svml_dacos_data_internal(%rip), %ymm2, %ymm12 + vandpd %ymm5, %ymm6, %ymm3 + vaddpd %ymm7, %ymm7, %ymm6 + vmulpd %ymm6, %ymm14, %ymm7 + vfmsub213pd Two+__svml_dacos_data_internal(%rip), %ymm15, %ymm6 + vmovupd poly_coeff+320+__svml_dacos_data_internal(%rip), %ymm14 + vfmadd213pd sqrt_coeff+32+__svml_dacos_data_internal(%rip), %ymm6, %ymm0 + vmulpd %ymm6, %ymm7, %ymm15 + vfmadd213pd poly_coeff+352+__svml_dacos_data_internal(%rip), %ymm2, %ymm14 + vfmadd213pd sqrt_coeff+64+__svml_dacos_data_internal(%rip), %ymm6, %ymm0 + vfmadd213pd sqrt_coeff+96+__svml_dacos_data_internal(%rip), %ymm6, %ymm0 + +/* polynomial */ + vmovupd poly_coeff+__svml_dacos_data_internal(%rip), %ymm6 + vfnmadd213pd %ymm7, %ymm15, %ymm0 + vfmadd213pd poly_coeff+32+__svml_dacos_data_internal(%rip), %ymm2, %ymm6 + vblendvpd %ymm1, %ymm0, %ymm4, %ymm0 + vfmadd213pd %ymm8, %ymm13, %ymm6 + vmovmskpd %ymm9, %edx + vmovupd poly_coeff+192+__svml_dacos_data_internal(%rip), %ymm9 + vfmadd213pd poly_coeff+224+__svml_dacos_data_internal(%rip), %ymm2, %ymm9 + vfmadd213pd %ymm9, %ymm13, %ymm11 + vfmadd213pd %ymm11, %ymm10, %ymm6 + vfmadd213pd %ymm12, %ymm13, %ymm6 + vfmadd213pd %ymm14, %ymm13, %ymm6 + vmulpd %ymm6, %ymm2, %ymm9 + +/* X. */ + +#define _ZGVeN8v_acos _ZGVeN8v_acos_avx2_wrapper +#include "../svml_d_acos8_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_acos8_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_acos8_core.c new file mode 100644 index 0000000000..1e7d1865fb --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_acos8_core.c @@ -0,0 +1,27 @@ +/* Multiple versions of vectorized acos, vector length is 8. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#define SYMBOL_NAME _ZGVeN8v_acos +#include "ifunc-mathvec-avx512-skx.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVeN8v_acos, __GI__ZGVeN8v_acos, __redirect__ZGVeN8v_acos) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_acos8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_acos8_core_avx512.S new file mode 100644 index 0000000000..521ff739c5 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_acos8_core_avx512.S @@ -0,0 +1,307 @@ +/* Function acos vectorized with AVX-512. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + https://www.gnu.org/licenses/. */ + +/* + * ALGORITHM DESCRIPTION: + * + * SelMask = (|x| >= 0.5) ? 1 : 0; + * R = SelMask ? sqrt(0.5 - 0.5*|x|) : |x| + * acos(|x|) = SelMask ? 2*Poly(R) : (Pi/2 - Poly(R)) + * acos(x) = sign(x) ? (Pi - acos(|x|)) : acos(|x|) + * + */ + +/* Offsets for data table __svml_dacos_data_internal + */ +#define SgnBit 0 +#define OneHalf 64 +#define SmallNorm 128 +#define MOne 192 +#define Two 256 +#define sqrt_coeff_1 320 +#define sqrt_coeff_2 384 +#define sqrt_coeff_3 448 +#define sqrt_coeff_4 512 +#define poly_coeff_1 576 +#define poly_coeff_2 640 +#define poly_coeff_3 704 +#define poly_coeff_4 768 +#define poly_coeff_5 832 +#define poly_coeff_6 896 +#define poly_coeff_7 960 +#define poly_coeff_8 1024 +#define poly_coeff_9 1088 +#define poly_coeff_10 1152 +#define poly_coeff_11 1216 +#define poly_coeff_12 1280 +#define PiH 1344 +#define Pi2H 1408 + +#include + + .text + .section .text.evex512,"ax",@progbits +ENTRY(_ZGVeN8v_acos_skx) + pushq %rbp + cfi_def_cfa_offset(16) + movq %rsp, %rbp + cfi_def_cfa(6, 16) + cfi_offset(6, -16) + andq $-64, %rsp + subq $192, %rsp + vmovups __svml_dacos_data_internal(%rip), %zmm7 + vmovups OneHalf+__svml_dacos_data_internal(%rip), %zmm8 + +/* S ~ 2*sqrt(Y) */ + vmovups SmallNorm+__svml_dacos_data_internal(%rip), %zmm11 + vmovups Two+__svml_dacos_data_internal(%rip), %zmm14 + vmovups sqrt_coeff_1+__svml_dacos_data_internal(%rip), %zmm15 + vmovups sqrt_coeff_2+__svml_dacos_data_internal(%rip), %zmm2 + vmovups sqrt_coeff_3+__svml_dacos_data_internal(%rip), %zmm1 + vmovups MOne+__svml_dacos_data_internal(%rip), %zmm10 + vmovaps %zmm0, %zmm6 + +/* x = -|arg| */ + vorpd %zmm6, %zmm7, %zmm5 + vandpd %zmm6, %zmm7, %zmm4 + +/* Y = 0.5 + 0.5*(-x) */ + vfmadd231pd {rn-sae}, %zmm5, %zmm8, %zmm8 + +/* x^2 */ + vmulpd {rn-sae}, %zmm5, %zmm5, %zmm9 + vrsqrt14pd %zmm8, %zmm12 + vcmppd $17, {sae}, %zmm11, %zmm8, %k1 + vcmppd $17, {sae}, %zmm10, %zmm5, %k0 + vmovups poly_coeff_5+__svml_dacos_data_internal(%rip), %zmm10 + vmovups poly_coeff_7+__svml_dacos_data_internal(%rip), %zmm11 + vminpd {sae}, %zmm8, %zmm9, %zmm3 + vmovups poly_coeff_3+__svml_dacos_data_internal(%rip), %zmm9 + vxorpd %zmm12, %zmm12, %zmm12{%k1} + vaddpd {rn-sae}, %zmm8, %zmm8, %zmm0 + vcmppd $21, {sae}, %zmm8, %zmm3, %k4 + +/* X. */ + +#define _ZGVeN16v_acosf _ZGVeN16v_acosf_avx2_wrapper +#include "../svml_s_acosf16_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_acosf16_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_acosf16_core.c new file mode 100644 index 0000000000..fcf05782c5 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_acosf16_core.c @@ -0,0 +1,28 @@ +/* Multiple versions of vectorized acosf, vector length is 16. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#define SYMBOL_NAME _ZGVeN16v_acosf +#include "ifunc-mathvec-avx512-skx.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVeN16v_acosf, __GI__ZGVeN16v_acosf, + __redirect__ZGVeN16v_acosf) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_acosf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_acosf16_core_avx512.S new file mode 100644 index 0000000000..36f08c492e --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_acosf16_core_avx512.S @@ -0,0 +1,271 @@ +/* Function acosf vectorized with AVX-512. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + https://www.gnu.org/licenses/. */ + +/* + * ALGORITHM DESCRIPTION: + * + * SelMask = (|x| >= 0.5) ? 1 : 0; + * R = SelMask ? sqrt(0.5 - 0.5*|x|) : |x| + * acos(|x|) = SelMask ? 2*Poly(R) : (Pi/2 - Poly(R)) + * acos(x) = sign(x) ? (Pi - acos(|x|)) : acos(|x|) + * + * + */ + +/* Offsets for data table __svml_sacos_data_internal + */ +#define SgnBit 0 +#define OneHalf 64 +#define SmallNorm 128 +#define MOne 192 +#define Two 256 +#define sqrt_coeff_1 320 +#define sqrt_coeff_2 384 +#define poly_coeff_1 448 +#define poly_coeff_2 512 +#define poly_coeff_3 576 +#define poly_coeff_4 640 +#define poly_coeff_5 704 +#define Pi2H 768 +#define PiH 832 + +#include + + .text + .section .text.exex512,"ax",@progbits +ENTRY(_ZGVeN16v_acosf_skx) + pushq %rbp + cfi_def_cfa_offset(16) + movq %rsp, %rbp + cfi_def_cfa(6, 16) + cfi_offset(6, -16) + andq $-64, %rsp + subq $192, %rsp + vmovups __svml_sacos_data_internal(%rip), %zmm5 + vmovups OneHalf+__svml_sacos_data_internal(%rip), %zmm6 + +/* SQ ~ 2*sqrt(Y) */ + vmovups SmallNorm+__svml_sacos_data_internal(%rip), %zmm9 + vmovups MOne+__svml_sacos_data_internal(%rip), %zmm8 + vmovups Two+__svml_sacos_data_internal(%rip), %zmm12 + vmovups sqrt_coeff_1+__svml_sacos_data_internal(%rip), %zmm13 + vmovaps %zmm0, %zmm4 + +/* x = -|arg| */ + vorps %zmm4, %zmm5, %zmm3 + vandps %zmm4, %zmm5, %zmm2 + vmovups sqrt_coeff_2+__svml_sacos_data_internal(%rip), %zmm0 + +/* Y = 0.5 + 0.5*(-x) */ + vfmadd231ps {rn-sae}, %zmm3, %zmm6, %zmm6 + +/* x^2 */ + vmulps {rn-sae}, %zmm3, %zmm3, %zmm7 + vrsqrt14ps %zmm6, %zmm10 + vcmpps $17, {sae}, %zmm9, %zmm6, %k1 + vcmpps $22, {sae}, %zmm3, %zmm8, %k0 + vmovups poly_coeff_4+__svml_sacos_data_internal(%rip), %zmm9 + vminps {sae}, %zmm6, %zmm7, %zmm1 + vmovups poly_coeff_3+__svml_sacos_data_internal(%rip), %zmm7 + vxorps %zmm10, %zmm10, %zmm10{%k1} + vaddps {rn-sae}, %zmm6, %zmm6, %zmm14 + vmulps {rn-sae}, %zmm1, %zmm1, %zmm8 + vmulps {rn-sae}, %zmm10, %zmm10, %zmm11 + vmulps {rn-sae}, %zmm10, %zmm14, %zmm5 + vcmpps $21, {sae}, %zmm6, %zmm1, %k4 + +/* X. */ + +#define _ZGVbN4v_acosf _ZGVbN4v_acosf_sse2 +#include "../svml_s_acosf4_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_acosf4_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_acosf4_core.c new file mode 100644 index 0000000000..6f9a5c1082 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_acosf4_core.c @@ -0,0 +1,28 @@ +/* Multiple versions of vectorized acosf, vector length is 4. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#define SYMBOL_NAME _ZGVbN4v_acosf +#include "ifunc-mathvec-sse4_1.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVbN4v_acosf, __GI__ZGVbN4v_acosf, + __redirect__ZGVbN4v_acosf) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_acosf4_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_s_acosf4_core_sse4.S new file mode 100644 index 0000000000..3b7c25a961 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_acosf4_core_sse4.S @@ -0,0 +1,270 @@ +/* Function acosf vectorized with SSE4. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + https://www.gnu.org/licenses/. */ + +/* + * ALGORITHM DESCRIPTION: + * + * SelMask = (|x| >= 0.5) ? 1 : 0; + * R = SelMask ? sqrt(0.5 - 0.5*|x|) : |x| + * acos(|x|) = SelMask ? 2*Poly(R) : (Pi/2 - Poly(R)) + * acos(x) = sign(x) ? (Pi - acos(|x|)) : acos(|x|) + * + * + */ + +/* Offsets for data table __svml_sacos_data_internal + */ +#define SgnBit 0 +#define OneHalf 16 +#define SmallNorm 32 +#define MOne 48 +#define Two 64 +#define sqrt_coeff 80 +#define poly_coeff 112 +#define Pi2H 192 +#define PiH 208 + +#include + + .text + .section .text.sse4,"ax",@progbits +ENTRY(_ZGVbN4v_acosf_sse4) + subq $72, %rsp + cfi_def_cfa_offset(80) + +/* X. */ + +#define _ZGVdN8v_acosf _ZGVdN8v_acosf_sse_wrapper +#include "../svml_s_acosf8_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_acosf8_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_acosf8_core.c new file mode 100644 index 0000000000..dd360a9479 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_acosf8_core.c @@ -0,0 +1,28 @@ +/* Multiple versions of vectorized acosf, vector length is 8. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#define SYMBOL_NAME _ZGVdN8v_acosf +#include "ifunc-mathvec-avx2.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVdN8v_acosf, __GI__ZGVdN8v_acosf, + __redirect__ZGVdN8v_acosf) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_acosf8_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_acosf8_core_avx2.S new file mode 100644 index 0000000000..bc783e5825 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_acosf8_core_avx2.S @@ -0,0 +1,264 @@ +/* Function acosf vectorized with AVX2. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + https://www.gnu.org/licenses/. */ + +/* + * ALGORITHM DESCRIPTION: + * + * SelMask = (|x| >= 0.5) ? 1 : 0; + * R = SelMask ? sqrt(0.5 - 0.5*|x|) : |x| + * acos(|x|) = SelMask ? 2*Poly(R) : (Pi/2 - Poly(R)) + * acos(x) = sign(x) ? (Pi - acos(|x|)) : acos(|x|) + * + * + */ + +/* Offsets for data table __svml_sacos_data_internal + */ +#define SgnBit 0 +#define OneHalf 32 +#define SmallNorm 64 +#define MOne 96 +#define Two 128 +#define sqrt_coeff 160 +#define poly_coeff 224 +#define Pi2H 384 +#define PiH 416 + +#include + + .text + .section .text.avx2,"ax",@progbits +ENTRY(_ZGVdN8v_acosf_avx2) + pushq %rbp + cfi_def_cfa_offset(16) + movq %rsp, %rbp + cfi_def_cfa(6, 16) + cfi_offset(6, -16) + andq $-32, %rsp + subq $96, %rsp + +/* + * 2*sqrt(X) ~ Sh - Sl (to 24+ bits) + * SQ ~ 2*sqrt(X) + */ + vmovups __svml_sacos_data_internal(%rip), %ymm6 + vmovups OneHalf+__svml_sacos_data_internal(%rip), %ymm7 + vmovaps %ymm0, %ymm5 + +/* x = -|arg| */ + vorps %ymm5, %ymm6, %ymm4 + +/* Y = 0.5 + 0.5*(-x) */ + vfmadd231ps %ymm4, %ymm7, %ymm7 + +/* x^2 */ + vmulps %ymm4, %ymm4, %ymm8 + +/* SQ ~ 2*sqrt(Y) */ + vmovups sqrt_coeff+__svml_sacos_data_internal(%rip), %ymm0 + vcmpnge_uqps MOne+__svml_sacos_data_internal(%rip), %ymm4, %ymm9 + vcmplt_oqps SmallNorm+__svml_sacos_data_internal(%rip), %ymm7, %ymm10 + vminps %ymm7, %ymm8, %ymm2 + vaddps %ymm7, %ymm7, %ymm14 + vrsqrtps %ymm7, %ymm11 + vmovups poly_coeff+64+__svml_sacos_data_internal(%rip), %ymm8 + vcmpnlt_uqps %ymm7, %ymm2, %ymm1 + vmulps %ymm2, %ymm2, %ymm7 + vfmadd213ps poly_coeff+96+__svml_sacos_data_internal(%rip), %ymm2, %ymm8 + vmovmskps %ymm9, %edx + +/* polynomial */ + vmovups poly_coeff+__svml_sacos_data_internal(%rip), %ymm9 + vandnps %ymm11, %ymm10, %ymm12 + vmulps %ymm12, %ymm12, %ymm13 + vfmadd213ps poly_coeff+32+__svml_sacos_data_internal(%rip), %ymm2, %ymm9 + +/* X. */ + +#include +#include "svml_d_wrapper_impl.h" + + .text +ENTRY (_ZGVbN2v_acos) +WRAPPER_IMPL_SSE2 acos +END (_ZGVbN2v_acos) + +#ifndef USE_MULTIARCH + libmvec_hidden_def (_ZGVbN2v_acos) +#endif diff --git a/sysdeps/x86_64/fpu/svml_d_acos4_core.S b/sysdeps/x86_64/fpu/svml_d_acos4_core.S new file mode 100644 index 0000000000..e99cb4ae78 --- /dev/null +++ b/sysdeps/x86_64/fpu/svml_d_acos4_core.S @@ -0,0 +1,29 @@ +/* Function acos vectorized with AVX2, wrapper version. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include "svml_d_wrapper_impl.h" + + .text +ENTRY (_ZGVdN4v_acos) +WRAPPER_IMPL_AVX _ZGVbN2v_acos +END (_ZGVdN4v_acos) + +#ifndef USE_MULTIARCH + libmvec_hidden_def (_ZGVdN4v_acos) +#endif diff --git a/sysdeps/x86_64/fpu/svml_d_acos4_core_avx.S b/sysdeps/x86_64/fpu/svml_d_acos4_core_avx.S new file mode 100644 index 0000000000..7cbcbc965c --- /dev/null +++ b/sysdeps/x86_64/fpu/svml_d_acos4_core_avx.S @@ -0,0 +1,25 @@ +/* Function acos vectorized in AVX ISA as wrapper to SSE4 ISA version. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include "svml_d_wrapper_impl.h" + + .text +ENTRY (_ZGVcN4v_acos) +WRAPPER_IMPL_AVX _ZGVbN2v_acos +END (_ZGVcN4v_acos) diff --git a/sysdeps/x86_64/fpu/svml_d_acos8_core.S b/sysdeps/x86_64/fpu/svml_d_acos8_core.S new file mode 100644 index 0000000000..e26b30d81a --- /dev/null +++ b/sysdeps/x86_64/fpu/svml_d_acos8_core.S @@ -0,0 +1,25 @@ +/* Function acos vectorized with AVX-512, wrapper to AVX2. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include "svml_d_wrapper_impl.h" + + .text +ENTRY (_ZGVeN8v_acos) +WRAPPER_IMPL_AVX512 _ZGVdN4v_acos +END (_ZGVeN8v_acos) diff --git a/sysdeps/x86_64/fpu/svml_s_acosf16_core.S b/sysdeps/x86_64/fpu/svml_s_acosf16_core.S new file mode 100644 index 0000000000..70e046d492 --- /dev/null +++ b/sysdeps/x86_64/fpu/svml_s_acosf16_core.S @@ -0,0 +1,25 @@ +/* Function acosf vectorized with AVX-512. Wrapper to AVX2 version. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include "svml_s_wrapper_impl.h" + + .text +ENTRY (_ZGVeN16v_acosf) +WRAPPER_IMPL_AVX512 _ZGVdN8v_acosf +END (_ZGVeN16v_acosf) diff --git a/sysdeps/x86_64/fpu/svml_s_acosf4_core.S b/sysdeps/x86_64/fpu/svml_s_acosf4_core.S new file mode 100644 index 0000000000..36354b32b5 --- /dev/null +++ b/sysdeps/x86_64/fpu/svml_s_acosf4_core.S @@ -0,0 +1,29 @@ +/* Function acosf vectorized with SSE2, wrapper version. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include "svml_s_wrapper_impl.h" + + .text +ENTRY (_ZGVbN4v_acosf) +WRAPPER_IMPL_SSE2 acosf +END (_ZGVbN4v_acosf) + +#ifndef USE_MULTIARCH + libmvec_hidden_def (_ZGVbN4v_acosf) +#endif diff --git a/sysdeps/x86_64/fpu/svml_s_acosf8_core.S b/sysdeps/x86_64/fpu/svml_s_acosf8_core.S new file mode 100644 index 0000000000..f08864a511 --- /dev/null +++ b/sysdeps/x86_64/fpu/svml_s_acosf8_core.S @@ -0,0 +1,29 @@ +/* Function acosf vectorized with AVX2, wrapper version. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include "svml_s_wrapper_impl.h" + + .text +ENTRY (_ZGVdN8v_acosf) +WRAPPER_IMPL_AVX _ZGVbN4v_acosf +END (_ZGVdN8v_acosf) + +#ifndef USE_MULTIARCH + libmvec_hidden_def (_ZGVdN8v_acosf) +#endif diff --git a/sysdeps/x86_64/fpu/svml_s_acosf8_core_avx.S b/sysdeps/x86_64/fpu/svml_s_acosf8_core_avx.S new file mode 100644 index 0000000000..f3ed4d8e78 --- /dev/null +++ b/sysdeps/x86_64/fpu/svml_s_acosf8_core_avx.S @@ -0,0 +1,25 @@ +/* Function acosf vectorized in AVX ISA as wrapper to SSE4 ISA version. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include "svml_s_wrapper_impl.h" + + .text +ENTRY (_ZGVcN8v_acosf) +WRAPPER_IMPL_AVX _ZGVbN4v_acosf +END (_ZGVcN8v_acosf) diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-acos-avx.c b/sysdeps/x86_64/fpu/test-double-libmvec-acos-avx.c new file mode 100644 index 0000000000..4f74b4260a --- /dev/null +++ b/sysdeps/x86_64/fpu/test-double-libmvec-acos-avx.c @@ -0,0 +1 @@ +#include "test-double-libmvec-acos.c" diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-acos-avx2.c b/sysdeps/x86_64/fpu/test-double-libmvec-acos-avx2.c new file mode 100644 index 0000000000..4f74b4260a --- /dev/null +++ b/sysdeps/x86_64/fpu/test-double-libmvec-acos-avx2.c @@ -0,0 +1 @@ +#include "test-double-libmvec-acos.c" diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-acos-avx512f.c b/sysdeps/x86_64/fpu/test-double-libmvec-acos-avx512f.c new file mode 100644 index 0000000000..4f74b4260a --- /dev/null +++ b/sysdeps/x86_64/fpu/test-double-libmvec-acos-avx512f.c @@ -0,0 +1 @@ +#include "test-double-libmvec-acos.c" diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-acos.c b/sysdeps/x86_64/fpu/test-double-libmvec-acos.c new file mode 100644 index 0000000000..e38b8ce821 --- /dev/null +++ b/sysdeps/x86_64/fpu/test-double-libmvec-acos.c @@ -0,0 +1,3 @@ +#define LIBMVEC_TYPE double +#define LIBMVEC_FUNC acos +#include "test-vector-abi-arg1.h" diff --git a/sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c b/sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c index ed932fc98d..0abc7d2021 100644 --- a/sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c +++ b/sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c @@ -27,6 +27,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (sin), _ZGVbN2v_sin) VECTOR_WRAPPER (WRAPPER_NAME (log), _ZGVbN2v_log) VECTOR_WRAPPER (WRAPPER_NAME (exp), _ZGVbN2v_exp) VECTOR_WRAPPER_ff (WRAPPER_NAME (pow), _ZGVbN2vv_pow) +VECTOR_WRAPPER (WRAPPER_NAME (acos), _ZGVbN2v_acos) #define VEC_INT_TYPE __m128i diff --git a/sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c b/sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c index 3a6e37044f..dda093b914 100644 --- a/sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c +++ b/sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c @@ -30,6 +30,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (sin), _ZGVdN4v_sin) VECTOR_WRAPPER (WRAPPER_NAME (log), _ZGVdN4v_log) VECTOR_WRAPPER (WRAPPER_NAME (exp), _ZGVdN4v_exp) VECTOR_WRAPPER_ff (WRAPPER_NAME (pow), _ZGVdN4vv_pow) +VECTOR_WRAPPER (WRAPPER_NAME (acos), _ZGVdN4v_acos) #ifndef __ILP32__ # define VEC_INT_TYPE __m256i diff --git a/sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c b/sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c index 99db4e7616..f3230463bb 100644 --- a/sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c +++ b/sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c @@ -27,6 +27,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (sin), _ZGVcN4v_sin) VECTOR_WRAPPER (WRAPPER_NAME (log), _ZGVcN4v_log) VECTOR_WRAPPER (WRAPPER_NAME (exp), _ZGVcN4v_exp) VECTOR_WRAPPER_ff (WRAPPER_NAME (pow), _ZGVcN4vv_pow) +VECTOR_WRAPPER (WRAPPER_NAME (acos), _ZGVcN4v_acos) #define VEC_INT_TYPE __m128i diff --git a/sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c b/sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c index 251d429ac0..cf9f52faf0 100644 --- a/sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c +++ b/sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c @@ -27,6 +27,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (sin), _ZGVeN8v_sin) VECTOR_WRAPPER (WRAPPER_NAME (log), _ZGVeN8v_log) VECTOR_WRAPPER (WRAPPER_NAME (exp), _ZGVeN8v_exp) VECTOR_WRAPPER_ff (WRAPPER_NAME (pow), _ZGVeN8vv_pow) +VECTOR_WRAPPER (WRAPPER_NAME (acos), _ZGVeN8v_acos) #ifndef __ILP32__ # define VEC_INT_TYPE __m512i diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-acosf-avx.c b/sysdeps/x86_64/fpu/test-float-libmvec-acosf-avx.c new file mode 100644 index 0000000000..1e6474dfa2 --- /dev/null +++ b/sysdeps/x86_64/fpu/test-float-libmvec-acosf-avx.c @@ -0,0 +1 @@ +#include "test-float-libmvec-acosf.c" diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-acosf-avx2.c b/sysdeps/x86_64/fpu/test-float-libmvec-acosf-avx2.c new file mode 100644 index 0000000000..1e6474dfa2 --- /dev/null +++ b/sysdeps/x86_64/fpu/test-float-libmvec-acosf-avx2.c @@ -0,0 +1 @@ +#include "test-float-libmvec-acosf.c" diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-acosf-avx512f.c b/sysdeps/x86_64/fpu/test-float-libmvec-acosf-avx512f.c new file mode 100644 index 0000000000..1e6474dfa2 --- /dev/null +++ b/sysdeps/x86_64/fpu/test-float-libmvec-acosf-avx512f.c @@ -0,0 +1 @@ +#include "test-float-libmvec-acosf.c" diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-acosf.c b/sysdeps/x86_64/fpu/test-float-libmvec-acosf.c new file mode 100644 index 0000000000..fb47f974fd --- /dev/null +++ b/sysdeps/x86_64/fpu/test-float-libmvec-acosf.c @@ -0,0 +1,3 @@ +#define LIBMVEC_TYPE float +#define LIBMVEC_FUNC acosf +#include "test-vector-abi-arg1.h" diff --git a/sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c b/sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c index c1d14cd79e..abbd3ed870 100644 --- a/sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c +++ b/sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c @@ -27,6 +27,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (sinf), _ZGVeN16v_sinf) VECTOR_WRAPPER (WRAPPER_NAME (logf), _ZGVeN16v_logf) VECTOR_WRAPPER (WRAPPER_NAME (expf), _ZGVeN16v_expf) VECTOR_WRAPPER_ff (WRAPPER_NAME (powf), _ZGVeN16vv_powf) +VECTOR_WRAPPER (WRAPPER_NAME (acosf), _ZGVeN16v_acosf) #define VEC_INT_TYPE __m512i diff --git a/sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c b/sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c index d23c372060..8a24027952 100644 --- a/sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c +++ b/sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c @@ -27,6 +27,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (sinf), _ZGVbN4v_sinf) VECTOR_WRAPPER (WRAPPER_NAME (logf), _ZGVbN4v_logf) VECTOR_WRAPPER (WRAPPER_NAME (expf), _ZGVbN4v_expf) VECTOR_WRAPPER_ff (WRAPPER_NAME (powf), _ZGVbN4vv_powf) +VECTOR_WRAPPER (WRAPPER_NAME (acosf), _ZGVbN4v_acosf) #define VEC_INT_TYPE __m128i diff --git a/sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c b/sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c index 3152cffb0c..aff0442606 100644 --- a/sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c +++ b/sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c @@ -30,6 +30,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (sinf), _ZGVdN8v_sinf) VECTOR_WRAPPER (WRAPPER_NAME (logf), _ZGVdN8v_logf) VECTOR_WRAPPER (WRAPPER_NAME (expf), _ZGVdN8v_expf) VECTOR_WRAPPER_ff (WRAPPER_NAME (powf), _ZGVdN8vv_powf) +VECTOR_WRAPPER (WRAPPER_NAME (acosf), _ZGVdN8v_acosf) /* Redefinition of wrapper to be compatible with _ZGVdN8vvv_sincosf. */ #undef VECTOR_WRAPPER_fFF diff --git a/sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c b/sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c index a8492abfef..913584d111 100644 --- a/sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c +++ b/sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c @@ -27,6 +27,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (sinf), _ZGVcN8v_sinf) VECTOR_WRAPPER (WRAPPER_NAME (logf), _ZGVcN8v_logf) VECTOR_WRAPPER (WRAPPER_NAME (expf), _ZGVcN8v_expf) VECTOR_WRAPPER_ff (WRAPPER_NAME (powf), _ZGVcN8vv_powf) +VECTOR_WRAPPER (WRAPPER_NAME (acosf), _ZGVcN8v_acosf) #define VEC_INT_TYPE __m128i -- cgit 1.4.1