From 5872b8352a8b6c0aa49c4e9f82bbda32becc5f02 Mon Sep 17 00:00:00 2001 From: Andrew Senkevich Date: Tue, 23 Jun 2015 19:21:50 +0300 Subject: Combination of data tables for x86_64 vector functions sin, cos and sincos. * sysdeps/x86_64/fpu/Makefile (libmvec-support): Fixed files list. * sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core_sse4.S: Renamed variable and included header. * sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core_avx2.S: Likewise. * sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core_avx512.S: Likewise. * sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core_sse4.S: Likewise. * sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core_avx2.S: Likewise. * sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S: Likewise. * sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S: Likewise. * sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S: Likewise. * sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S: Likewise. * sysdeps/x86_64/fpu/svml_d_trig_data.S: New file. * sysdeps/x86_64/fpu/svml_d_trig_data.h: Likewise. * sysdeps/x86_64/fpu/svml_d_cos2_core.S: Removed unneeded include. * sysdeps/x86_64/fpu/svml_d_cos4_core.S: Likewise. * sysdeps/x86_64/fpu/svml_d_cos8_core.S: Likewise. * sysdeps/x86_64/fpu/svml_d_cos_data.S: Removed file. * sysdeps/x86_64/fpu/svml_d_cos_data.h: Likewise. * sysdeps/x86_64/fpu/svml_d_sin_data.S: Likewise. * sysdeps/x86_64/fpu/svml_d_sin_data.h: Likewise. * sysdeps/x86_64/fpu/svml_d_sincos_data.S: Likewise. * sysdeps/x86_64/fpu/svml_d_sincos_data.h: Likewise. --- .../x86_64/fpu/multiarch/svml_d_cos2_core_sse4.S | 4 +- .../x86_64/fpu/multiarch/svml_d_cos4_core_avx2.S | 4 +- .../x86_64/fpu/multiarch/svml_d_cos8_core_avx512.S | 6 +-- .../x86_64/fpu/multiarch/svml_d_sin2_core_sse4.S | 30 +++++++------- .../x86_64/fpu/multiarch/svml_d_sin4_core_avx2.S | 18 ++++----- .../x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S | 46 +++++++++++----------- .../fpu/multiarch/svml_d_sincos2_core_sse4.S | 4 +- .../fpu/multiarch/svml_d_sincos4_core_avx2.S | 4 +- .../fpu/multiarch/svml_d_sincos8_core_avx512.S | 6 +-- 9 files changed, 61 insertions(+), 61 deletions(-) (limited to 'sysdeps/x86_64/fpu/multiarch') diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core_sse4.S index 11348a37c5..4420edcae0 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core_sse4.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core_sse4.S @@ -17,7 +17,7 @@ . */ #include -#include "svml_d_cos_data.h" +#include "svml_d_trig_data.h" .text ENTRY (_ZGVbN2v_cos_sse4) @@ -41,7 +41,7 @@ ENTRY (_ZGVbN2v_cos_sse4) andq $-64, %rsp subq $320, %rsp movaps %xmm0, %xmm3 - movq __svml_dcos_data@GOTPCREL(%rip), %rax + movq __svml_d_trig_data@GOTPCREL(%rip), %rax movups __dHalfPI(%rax), %xmm2 /* ARGUMENT RANGE REDUCTION: diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core_avx2.S index f192ba022e..9a776e7df7 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core_avx2.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core_avx2.S @@ -17,7 +17,7 @@ . */ #include -#include "svml_d_cos_data.h" +#include "svml_d_trig_data.h" .text ENTRY (_ZGVdN4v_cos_avx2) @@ -41,7 +41,7 @@ ENTRY (_ZGVdN4v_cos_avx2) cfi_def_cfa_register (%rbp) andq $-64, %rsp subq $448, %rsp - movq __svml_dcos_data@GOTPCREL(%rip), %rax + movq __svml_d_trig_data@GOTPCREL(%rip), %rax vmovapd %ymm0, %ymm1 vmovupd __dInvPI(%rax), %ymm4 vmovupd __dRShifter(%rax), %ymm5 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core_avx512.S index 14695ec3c9..b376155210 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core_avx512.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core_avx512.S @@ -17,7 +17,7 @@ . */ #include -#include "svml_d_cos_data.h" +#include "svml_d_trig_data.h" #include "svml_d_wrapper_impl.h" .text @@ -45,7 +45,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_cos cfi_def_cfa_register (%rbp) andq $-64, %rsp subq $1280, %rsp - movq __svml_dcos_data@GOTPCREL(%rip), %rax + movq __svml_d_trig_data@GOTPCREL(%rip), %rax /* R = X - N*Pi1 */ vmovaps %zmm0, %zmm7 @@ -259,7 +259,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_cos cfi_def_cfa_register (%rbp) andq $-64, %rsp subq $1280, %rsp - movq __svml_dcos_data@GOTPCREL(%rip), %rax + movq __svml_d_trig_data@GOTPCREL(%rip), %rax /* R = X - N*Pi1 */ vmovaps %zmm0, %zmm8 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core_sse4.S index 4b4d8be272..3a1ccbf139 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core_sse4.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core_sse4.S @@ -17,7 +17,7 @@ . */ #include -#include "svml_d_sin_data.h" +#include "svml_d_trig_data.h" .text ENTRY (_ZGVbN2v_sin_sse4) @@ -41,11 +41,11 @@ ENTRY (_ZGVbN2v_sin_sse4) andq $-64, %rsp subq $320, %rsp movaps %xmm0, %xmm5 - movq __svml_dsin_data@GOTPCREL(%rip), %rax + movq __svml_d_trig_data@GOTPCREL(%rip), %rax movups __dAbsMask(%rax), %xmm3 /* - * ARGUMENT RANGE REDUCTION: - * X' = |X| + ARGUMENT RANGE REDUCTION: + X' = |X| */ movaps %xmm3, %xmm4 @@ -90,31 +90,31 @@ ENTRY (_ZGVbN2v_sin_sse4) subpd %xmm1, %xmm0 /* - * POLYNOMIAL APPROXIMATION: - * R2 = R*R + POLYNOMIAL APPROXIMATION: + R2 = R*R */ movaps %xmm0, %xmm1 mulpd %xmm0, %xmm1 /* R = R^SignRes : update sign of reduced argument */ xorps %xmm2, %xmm0 - movups __dC7(%rax), %xmm2 + movups __dC7_sin(%rax), %xmm2 mulpd %xmm1, %xmm2 - addpd __dC6(%rax), %xmm2 + addpd __dC6_sin(%rax), %xmm2 mulpd %xmm1, %xmm2 - addpd __dC5(%rax), %xmm2 + addpd __dC5_sin(%rax), %xmm2 mulpd %xmm1, %xmm2 - addpd __dC4(%rax), %xmm2 + addpd __dC4_sin(%rax), %xmm2 /* Poly = C3+R2*(C4+R2*(C5+R2*(C6+R2*C7))) */ mulpd %xmm1, %xmm2 - addpd __dC3(%rax), %xmm2 + addpd __dC3_sin(%rax), %xmm2 /* Poly = R2*(C1+R2*(C2+R2*Poly)) */ mulpd %xmm1, %xmm2 - addpd __dC2(%rax), %xmm2 + addpd __dC2_sin(%rax), %xmm2 mulpd %xmm1, %xmm2 - addpd __dC1(%rax), %xmm2 + addpd __dC1_sin(%rax), %xmm2 mulpd %xmm2, %xmm1 /* Poly = Poly*R + R */ @@ -122,8 +122,8 @@ ENTRY (_ZGVbN2v_sin_sse4) addpd %xmm1, %xmm0 /* - * RECONSTRUCTION: - * Final sign setting: Res = Poly^SignX + RECONSTRUCTION: + Final sign setting: Res = Poly^SignX */ xorps %xmm3, %xmm0 testl %ecx, %ecx diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core_avx2.S index e7e60d48c5..6bf8b32b4f 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core_avx2.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core_avx2.S @@ -17,7 +17,7 @@ . */ #include -#include "svml_d_sin_data.h" +#include "svml_d_trig_data.h" .text ENTRY (_ZGVdN4v_sin_avx2) @@ -40,7 +40,7 @@ ENTRY (_ZGVdN4v_sin_avx2) cfi_def_cfa_register (%rbp) andq $-64, %rsp subq $448, %rsp - movq __svml_dsin_data@GOTPCREL(%rip), %rax + movq __svml_d_trig_data@GOTPCREL(%rip), %rax vmovdqa %ymm0, %ymm4 vmovupd __dAbsMask(%rax), %ymm2 vmovupd __dInvPI(%rax), %ymm6 @@ -80,17 +80,17 @@ ENTRY (_ZGVdN4v_sin_avx2) /* R = R^SignRes : update sign of reduced argument */ vxorpd %ymm5, %ymm1, %ymm6 - vmovupd __dC7(%rax), %ymm1 - vfmadd213pd __dC6(%rax), %ymm0, %ymm1 - vfmadd213pd __dC5(%rax), %ymm0, %ymm1 - vfmadd213pd __dC4(%rax), %ymm0, %ymm1 + vmovupd __dC7_sin(%rax), %ymm1 + vfmadd213pd __dC6_sin(%rax), %ymm0, %ymm1 + vfmadd213pd __dC5_sin(%rax), %ymm0, %ymm1 + vfmadd213pd __dC4_sin(%rax), %ymm0, %ymm1 /* Poly = C3+R2*(C4+R2*(C5+R2*(C6+R2*C7))) */ - vfmadd213pd __dC3(%rax), %ymm0, %ymm1 + vfmadd213pd __dC3_sin(%rax), %ymm0, %ymm1 /* Poly = R2*(C1+R2*(C2+R2*Poly)) */ - vfmadd213pd __dC2(%rax), %ymm0, %ymm1 - vfmadd213pd __dC1(%rax), %ymm0, %ymm1 + vfmadd213pd __dC2_sin(%rax), %ymm0, %ymm1 + vfmadd213pd __dC1_sin(%rax), %ymm0, %ymm1 /* SignX - sign bit of X */ vandnpd %ymm4, %ymm2, %ymm7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S index c01ad1f8e0..422f6e8b0f 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S @@ -17,7 +17,7 @@ . */ #include -#include "svml_d_sin_data.h" +#include "svml_d_trig_data.h" #include "svml_d_wrapper_impl.h" .text @@ -45,18 +45,18 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_sin cfi_def_cfa_register (%rbp) andq $-64, %rsp subq $1280, %rsp - movq __svml_dsin_data@GOTPCREL(%rip), %rax + movq __svml_d_trig_data@GOTPCREL(%rip), %rax movq $-1, %rdx vmovups __dAbsMask(%rax), %zmm6 vmovups __dInvPI(%rax), %zmm1 /* - * ARGUMENT RANGE REDUCTION: - * X' = |X| + ARGUMENT RANGE REDUCTION: + X' = |X| */ vpandq %zmm6, %zmm0, %zmm12 vmovups __dPI1_FMA(%rax), %zmm2 - vmovups __dC7(%rax), %zmm7 + vmovups __dC7_sin(%rax), %zmm7 /* SignX - sign bit of X */ vpandnq %zmm0, %zmm6, %zmm11 @@ -86,31 +86,31 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_sin vfnmadd132pd __dPI3_FMA(%rax), %zmm3, %zmm4 /* - * POLYNOMIAL APPROXIMATION: - * R2 = R*R + POLYNOMIAL APPROXIMATION: + R2 = R*R */ vmulpd %zmm4, %zmm4, %zmm8 /* R = R^SignRes : update sign of reduced argument */ vpxorq %zmm5, %zmm4, %zmm9 - vfmadd213pd __dC6(%rax), %zmm8, %zmm7 - vfmadd213pd __dC5(%rax), %zmm8, %zmm7 - vfmadd213pd __dC4(%rax), %zmm8, %zmm7 + vfmadd213pd __dC6_sin(%rax), %zmm8, %zmm7 + vfmadd213pd __dC5_sin(%rax), %zmm8, %zmm7 + vfmadd213pd __dC4_sin(%rax), %zmm8, %zmm7 /* Poly = C3+R2*(C4+R2*(C5+R2*(C6+R2*C7))) */ - vfmadd213pd __dC3(%rax), %zmm8, %zmm7 + vfmadd213pd __dC3_sin(%rax), %zmm8, %zmm7 /* Poly = R2*(C1+R2*(C2+R2*Poly)) */ - vfmadd213pd __dC2(%rax), %zmm8, %zmm7 - vfmadd213pd __dC1(%rax), %zmm8, %zmm7 + vfmadd213pd __dC2_sin(%rax), %zmm8, %zmm7 + vfmadd213pd __dC1_sin(%rax), %zmm8, %zmm7 vmulpd %zmm8, %zmm7, %zmm10 /* Poly = Poly*R + R */ vfmadd213pd %zmm9, %zmm9, %zmm10 /* - * RECONSTRUCTION: - * Final sign setting: Res = Poly^SignX + RECONSTRUCTION: + Final sign setting: Res = Poly^SignX */ vpxorq %zmm11, %zmm10, %zmm1 testl %ecx, %ecx @@ -260,13 +260,13 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_sin cfi_def_cfa_register (%rbp) andq $-64, %rsp subq $1280, %rsp - movq __svml_dsin_data@GOTPCREL(%rip), %rax + movq __svml_d_trig_data@GOTPCREL(%rip), %rax vpbroadcastq .L_2il0floatpacket.14(%rip), %zmm14 vmovups __dAbsMask(%rax), %zmm7 vmovups __dInvPI(%rax), %zmm2 vmovups __dRShifter(%rax), %zmm1 vmovups __dPI1_FMA(%rax), %zmm3 - vmovups __dC7(%rax), %zmm8 + vmovups __dC7_sin(%rax), %zmm8 /* ARGUMENT RANGE REDUCTION: @@ -305,16 +305,16 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_sin /* R = R^SignRes : update sign of reduced argument */ vxorpd %zmm6, %zmm5, %zmm10 - vfmadd213pd __dC6(%rax), %zmm9, %zmm8 - vfmadd213pd __dC5(%rax), %zmm9, %zmm8 - vfmadd213pd __dC4(%rax), %zmm9, %zmm8 + vfmadd213pd __dC6_sin(%rax), %zmm9, %zmm8 + vfmadd213pd __dC5_sin(%rax), %zmm9, %zmm8 + vfmadd213pd __dC4_sin(%rax), %zmm9, %zmm8 /* Poly = C3+R2*(C4+R2*(C5+R2*(C6+R2*C7))) */ - vfmadd213pd __dC3(%rax), %zmm9, %zmm8 + vfmadd213pd __dC3_sin(%rax), %zmm9, %zmm8 /* Poly = R2*(C1+R2*(C2+R2*Poly)) */ - vfmadd213pd __dC2(%rax), %zmm9, %zmm8 - vfmadd213pd __dC1(%rax), %zmm9, %zmm8 + vfmadd213pd __dC2_sin(%rax), %zmm9, %zmm8 + vfmadd213pd __dC1_sin(%rax), %zmm9, %zmm8 vmulpd %zmm9, %zmm8, %zmm11 /* Poly = Poly*R + R */ diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S index 0b37c7cdf0..b504d1d732 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S @@ -17,7 +17,7 @@ . */ #include -#include "svml_d_sincos_data.h" +#include "svml_d_trig_data.h" .text ENTRY (_ZGVbN2vvv_sincos_sse4) @@ -43,7 +43,7 @@ ENTRY (_ZGVbN2vvv_sincos_sse4) cfi_def_cfa_register (%rbp) andq $-64, %rsp subq $320, %rsp - movq __svml_dsincos_data@GOTPCREL(%rip), %rax + movq __svml_d_trig_data@GOTPCREL(%rip), %rax movups %xmm11, 160(%rsp) movups %xmm12, 144(%rsp) movups __dSignMask(%rax), %xmm11 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S index ec1ccc6357..dca5604111 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S @@ -17,7 +17,7 @@ . */ #include -#include "svml_d_sincos_data.h" +#include "svml_d_trig_data.h" .text ENTRY (_ZGVdN4vvv_sincos_avx2) @@ -43,7 +43,7 @@ ENTRY (_ZGVdN4vvv_sincos_avx2) cfi_def_cfa_register (%rbp) andq $-64, %rsp subq $448, %rsp - movq __svml_dsincos_data@GOTPCREL(%rip), %rax + movq __svml_d_trig_data@GOTPCREL(%rip), %rax vmovups %ymm14, 288(%rsp) vmovups %ymm8, 352(%rsp) vmovupd __dSignMask(%rax), %ymm6 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S index fcbf3935f1..e8388325f7 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S @@ -17,7 +17,7 @@ . */ #include -#include "svml_d_sincos_data.h" +#include "svml_d_trig_data.h" #include "svml_d_wrapper_impl.h" /* @@ -47,7 +47,7 @@ WRAPPER_IMPL_AVX512_fFF _ZGVdN4vvv_sincos cfi_def_cfa_register (%rbp) andq $-64, %rsp subq $1344, %rsp - movq __svml_dsincos_data@GOTPCREL(%rip), %rax + movq __svml_d_trig_data@GOTPCREL(%rip), %rax vmovaps %zmm0, %zmm4 movq $-1, %rdx vmovups __dSignMask(%rax), %zmm12 @@ -317,7 +317,7 @@ WRAPPER_IMPL_AVX512_fFF _ZGVdN4vvv_sincos cfi_def_cfa_register (%rbp) andq $-64, %rsp subq $1344, %rsp - movq __svml_dsincos_data@GOTPCREL(%rip), %rax + movq __svml_d_trig_data@GOTPCREL(%rip), %rax vmovaps %zmm0, %zmm8 vmovups __dSignMask(%rax), %zmm4 vmovups __dInvPI(%rax), %zmm9 -- cgit 1.4.1