diff options
author | Andrew Senkevich <andrew.senkevich@intel.com> | 2015-06-23 19:21:50 +0300 |
---|---|---|
committer | Andrew Senkevich <andrew.senkevich@intel.com> | 2015-06-23 19:21:50 +0300 |
commit | 5872b8352a8b6c0aa49c4e9f82bbda32becc5f02 (patch) | |
tree | 87f9545391602848e21061e6d3d7a808672beef7 /sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S | |
parent | 718d34a309493f8697ff9a8fefcbacbba12a2ccd (diff) | |
download | glibc-5872b8352a8b6c0aa49c4e9f82bbda32becc5f02.tar.gz glibc-5872b8352a8b6c0aa49c4e9f82bbda32becc5f02.tar.xz glibc-5872b8352a8b6c0aa49c4e9f82bbda32becc5f02.zip |
Combination of data tables for x86_64 vector functions sin, cos and sincos.
* sysdeps/x86_64/fpu/Makefile (libmvec-support): Fixed files list. * sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core_sse4.S: Renamed variable and included header. * sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core_avx2.S: Likewise. * sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core_avx512.S: Likewise. * sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core_sse4.S: Likewise. * sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core_avx2.S: Likewise. * sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S: Likewise. * sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S: Likewise. * sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S: Likewise. * sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S: Likewise. * sysdeps/x86_64/fpu/svml_d_trig_data.S: New file. * sysdeps/x86_64/fpu/svml_d_trig_data.h: Likewise. * sysdeps/x86_64/fpu/svml_d_cos2_core.S: Removed unneeded include. * sysdeps/x86_64/fpu/svml_d_cos4_core.S: Likewise. * sysdeps/x86_64/fpu/svml_d_cos8_core.S: Likewise. * sysdeps/x86_64/fpu/svml_d_cos_data.S: Removed file. * sysdeps/x86_64/fpu/svml_d_cos_data.h: Likewise. * sysdeps/x86_64/fpu/svml_d_sin_data.S: Likewise. * sysdeps/x86_64/fpu/svml_d_sin_data.h: Likewise. * sysdeps/x86_64/fpu/svml_d_sincos_data.S: Likewise. * sysdeps/x86_64/fpu/svml_d_sincos_data.h: Likewise.
Diffstat (limited to 'sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S')
-rw-r--r-- | sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S | 46 |
1 files changed, 23 insertions, 23 deletions
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S index c01ad1f8e0..422f6e8b0f 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S @@ -17,7 +17,7 @@ <http://www.gnu.org/licenses/>. */ #include <sysdep.h> -#include "svml_d_sin_data.h" +#include "svml_d_trig_data.h" #include "svml_d_wrapper_impl.h" .text @@ -45,18 +45,18 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_sin cfi_def_cfa_register (%rbp) andq $-64, %rsp subq $1280, %rsp - movq __svml_dsin_data@GOTPCREL(%rip), %rax + movq __svml_d_trig_data@GOTPCREL(%rip), %rax movq $-1, %rdx vmovups __dAbsMask(%rax), %zmm6 vmovups __dInvPI(%rax), %zmm1 /* - * ARGUMENT RANGE REDUCTION: - * X' = |X| + ARGUMENT RANGE REDUCTION: + X' = |X| */ vpandq %zmm6, %zmm0, %zmm12 vmovups __dPI1_FMA(%rax), %zmm2 - vmovups __dC7(%rax), %zmm7 + vmovups __dC7_sin(%rax), %zmm7 /* SignX - sign bit of X */ vpandnq %zmm0, %zmm6, %zmm11 @@ -86,31 +86,31 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_sin vfnmadd132pd __dPI3_FMA(%rax), %zmm3, %zmm4 /* - * POLYNOMIAL APPROXIMATION: - * R2 = R*R + POLYNOMIAL APPROXIMATION: + R2 = R*R */ vmulpd %zmm4, %zmm4, %zmm8 /* R = R^SignRes : update sign of reduced argument */ vpxorq %zmm5, %zmm4, %zmm9 - vfmadd213pd __dC6(%rax), %zmm8, %zmm7 - vfmadd213pd __dC5(%rax), %zmm8, %zmm7 - vfmadd213pd __dC4(%rax), %zmm8, %zmm7 + vfmadd213pd __dC6_sin(%rax), %zmm8, %zmm7 + vfmadd213pd __dC5_sin(%rax), %zmm8, %zmm7 + vfmadd213pd __dC4_sin(%rax), %zmm8, %zmm7 /* Poly = C3+R2*(C4+R2*(C5+R2*(C6+R2*C7))) */ - vfmadd213pd __dC3(%rax), %zmm8, %zmm7 + vfmadd213pd __dC3_sin(%rax), %zmm8, %zmm7 /* Poly = R2*(C1+R2*(C2+R2*Poly)) */ - vfmadd213pd __dC2(%rax), %zmm8, %zmm7 - vfmadd213pd __dC1(%rax), %zmm8, %zmm7 + vfmadd213pd __dC2_sin(%rax), %zmm8, %zmm7 + vfmadd213pd __dC1_sin(%rax), %zmm8, %zmm7 vmulpd %zmm8, %zmm7, %zmm10 /* Poly = Poly*R + R */ vfmadd213pd %zmm9, %zmm9, %zmm10 /* - * RECONSTRUCTION: - * Final sign setting: Res = Poly^SignX + RECONSTRUCTION: + Final sign setting: Res = Poly^SignX */ vpxorq %zmm11, %zmm10, %zmm1 testl %ecx, %ecx @@ -260,13 +260,13 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_sin cfi_def_cfa_register (%rbp) andq $-64, %rsp subq $1280, %rsp - movq __svml_dsin_data@GOTPCREL(%rip), %rax + movq __svml_d_trig_data@GOTPCREL(%rip), %rax vpbroadcastq .L_2il0floatpacket.14(%rip), %zmm14 vmovups __dAbsMask(%rax), %zmm7 vmovups __dInvPI(%rax), %zmm2 vmovups __dRShifter(%rax), %zmm1 vmovups __dPI1_FMA(%rax), %zmm3 - vmovups __dC7(%rax), %zmm8 + vmovups __dC7_sin(%rax), %zmm8 /* ARGUMENT RANGE REDUCTION: @@ -305,16 +305,16 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_sin /* R = R^SignRes : update sign of reduced argument */ vxorpd %zmm6, %zmm5, %zmm10 - vfmadd213pd __dC6(%rax), %zmm9, %zmm8 - vfmadd213pd __dC5(%rax), %zmm9, %zmm8 - vfmadd213pd __dC4(%rax), %zmm9, %zmm8 + vfmadd213pd __dC6_sin(%rax), %zmm9, %zmm8 + vfmadd213pd __dC5_sin(%rax), %zmm9, %zmm8 + vfmadd213pd __dC4_sin(%rax), %zmm9, %zmm8 /* Poly = C3+R2*(C4+R2*(C5+R2*(C6+R2*C7))) */ - vfmadd213pd __dC3(%rax), %zmm9, %zmm8 + vfmadd213pd __dC3_sin(%rax), %zmm9, %zmm8 /* Poly = R2*(C1+R2*(C2+R2*Poly)) */ - vfmadd213pd __dC2(%rax), %zmm9, %zmm8 - vfmadd213pd __dC1(%rax), %zmm9, %zmm8 + vfmadd213pd __dC2_sin(%rax), %zmm9, %zmm8 + vfmadd213pd __dC1_sin(%rax), %zmm9, %zmm8 vmulpd %zmm9, %zmm8, %zmm11 /* Poly = Poly*R + R */ |