about summary refs log tree commit diff
path: root/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S
diff options
context:
space:
mode:
authorAndrew Senkevich <andrew.senkevich@intel.com>2015-06-23 19:21:50 +0300
committerAndrew Senkevich <andrew.senkevich@intel.com>2015-06-23 19:21:50 +0300
commit5872b8352a8b6c0aa49c4e9f82bbda32becc5f02 (patch)
tree87f9545391602848e21061e6d3d7a808672beef7 /sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S
parent718d34a309493f8697ff9a8fefcbacbba12a2ccd (diff)
downloadglibc-5872b8352a8b6c0aa49c4e9f82bbda32becc5f02.tar.gz
glibc-5872b8352a8b6c0aa49c4e9f82bbda32becc5f02.tar.xz
glibc-5872b8352a8b6c0aa49c4e9f82bbda32becc5f02.zip
Combination of data tables for x86_64 vector functions sin, cos and sincos.
    * sysdeps/x86_64/fpu/Makefile (libmvec-support): Fixed files list.
    * sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core_sse4.S: Renamed variable
    and included header.
    * sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core_avx2.S: Likewise.
    * sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core_avx512.S: Likewise.
    * sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core_sse4.S: Likewise.
    * sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core_avx2.S: Likewise.
    * sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S: Likewise.
    * sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S: Likewise.
    * sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S: Likewise.
    * sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S: Likewise.
    * sysdeps/x86_64/fpu/svml_d_trig_data.S: New file.
    * sysdeps/x86_64/fpu/svml_d_trig_data.h: Likewise.
    * sysdeps/x86_64/fpu/svml_d_cos2_core.S: Removed unneeded include.
    * sysdeps/x86_64/fpu/svml_d_cos4_core.S: Likewise.
    * sysdeps/x86_64/fpu/svml_d_cos8_core.S: Likewise.
    * sysdeps/x86_64/fpu/svml_d_cos_data.S: Removed file.
    * sysdeps/x86_64/fpu/svml_d_cos_data.h: Likewise.
    * sysdeps/x86_64/fpu/svml_d_sin_data.S: Likewise.
    * sysdeps/x86_64/fpu/svml_d_sin_data.h: Likewise.
    * sysdeps/x86_64/fpu/svml_d_sincos_data.S: Likewise.
    * sysdeps/x86_64/fpu/svml_d_sincos_data.h: Likewise.
Diffstat (limited to 'sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S')
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S46
1 files changed, 23 insertions, 23 deletions
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S
index c01ad1f8e0..422f6e8b0f 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S
@@ -17,7 +17,7 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
-#include "svml_d_sin_data.h"
+#include "svml_d_trig_data.h"
 #include "svml_d_wrapper_impl.h"
 
 	.text
@@ -45,18 +45,18 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_sin
         cfi_def_cfa_register (%rbp)
         andq      $-64, %rsp
         subq      $1280, %rsp
-        movq      __svml_dsin_data@GOTPCREL(%rip), %rax
+        movq      __svml_d_trig_data@GOTPCREL(%rip), %rax
         movq      $-1, %rdx
         vmovups __dAbsMask(%rax), %zmm6
         vmovups __dInvPI(%rax), %zmm1
 
 /*
- * ARGUMENT RANGE REDUCTION:
- * X' = |X|
+   ARGUMENT RANGE REDUCTION:
+   X' = |X|
  */
         vpandq    %zmm6, %zmm0, %zmm12
         vmovups __dPI1_FMA(%rax), %zmm2
-        vmovups __dC7(%rax), %zmm7
+        vmovups __dC7_sin(%rax), %zmm7
 
 /* SignX - sign bit of X */
         vpandnq   %zmm0, %zmm6, %zmm11
@@ -86,31 +86,31 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_sin
         vfnmadd132pd __dPI3_FMA(%rax), %zmm3, %zmm4
 
 /*
- * POLYNOMIAL APPROXIMATION:
- * R2 = R*R
+  POLYNOMIAL APPROXIMATION:
+  R2 = R*R
  */
         vmulpd    %zmm4, %zmm4, %zmm8
 
 /* R = R^SignRes : update sign of reduced argument */
         vpxorq    %zmm5, %zmm4, %zmm9
-        vfmadd213pd __dC6(%rax), %zmm8, %zmm7
-        vfmadd213pd __dC5(%rax), %zmm8, %zmm7
-        vfmadd213pd __dC4(%rax), %zmm8, %zmm7
+        vfmadd213pd __dC6_sin(%rax), %zmm8, %zmm7
+        vfmadd213pd __dC5_sin(%rax), %zmm8, %zmm7
+        vfmadd213pd __dC4_sin(%rax), %zmm8, %zmm7
 
 /* Poly = C3+R2*(C4+R2*(C5+R2*(C6+R2*C7))) */
-        vfmadd213pd __dC3(%rax), %zmm8, %zmm7
+        vfmadd213pd __dC3_sin(%rax), %zmm8, %zmm7
 
 /* Poly = R2*(C1+R2*(C2+R2*Poly)) */
-        vfmadd213pd __dC2(%rax), %zmm8, %zmm7
-        vfmadd213pd __dC1(%rax), %zmm8, %zmm7
+        vfmadd213pd __dC2_sin(%rax), %zmm8, %zmm7
+        vfmadd213pd __dC1_sin(%rax), %zmm8, %zmm7
         vmulpd    %zmm8, %zmm7, %zmm10
 
 /* Poly = Poly*R + R */
         vfmadd213pd %zmm9, %zmm9, %zmm10
 
 /*
- * RECONSTRUCTION:
- * Final sign setting: Res = Poly^SignX
+   RECONSTRUCTION:
+   Final sign setting: Res = Poly^SignX
  */
         vpxorq    %zmm11, %zmm10, %zmm1
         testl     %ecx, %ecx
@@ -260,13 +260,13 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_sin
         cfi_def_cfa_register (%rbp)
         andq      $-64, %rsp
         subq      $1280, %rsp
-        movq      __svml_dsin_data@GOTPCREL(%rip), %rax
+        movq      __svml_d_trig_data@GOTPCREL(%rip), %rax
         vpbroadcastq .L_2il0floatpacket.14(%rip), %zmm14
         vmovups __dAbsMask(%rax), %zmm7
         vmovups __dInvPI(%rax), %zmm2
         vmovups __dRShifter(%rax), %zmm1
         vmovups __dPI1_FMA(%rax), %zmm3
-        vmovups __dC7(%rax), %zmm8
+        vmovups __dC7_sin(%rax), %zmm8
 
 /*
   ARGUMENT RANGE REDUCTION:
@@ -305,16 +305,16 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_sin
 
 /* R = R^SignRes : update sign of reduced argument */
         vxorpd    %zmm6, %zmm5, %zmm10
-        vfmadd213pd __dC6(%rax), %zmm9, %zmm8
-        vfmadd213pd __dC5(%rax), %zmm9, %zmm8
-        vfmadd213pd __dC4(%rax), %zmm9, %zmm8
+        vfmadd213pd __dC6_sin(%rax), %zmm9, %zmm8
+        vfmadd213pd __dC5_sin(%rax), %zmm9, %zmm8
+        vfmadd213pd __dC4_sin(%rax), %zmm9, %zmm8
 
 /* Poly = C3+R2*(C4+R2*(C5+R2*(C6+R2*C7))) */
-        vfmadd213pd __dC3(%rax), %zmm9, %zmm8
+        vfmadd213pd __dC3_sin(%rax), %zmm9, %zmm8
 
 /* Poly = R2*(C1+R2*(C2+R2*Poly)) */
-        vfmadd213pd __dC2(%rax), %zmm9, %zmm8
-        vfmadd213pd __dC1(%rax), %zmm9, %zmm8
+        vfmadd213pd __dC2_sin(%rax), %zmm9, %zmm8
+        vfmadd213pd __dC1_sin(%rax), %zmm9, %zmm8
         vmulpd    %zmm9, %zmm8, %zmm11
 
 /* Poly = Poly*R + R */