about summary refs log tree commit diff
path: root/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S
diff options
context:
space:
mode:
authorAndrew Senkevich <andrew.senkevich@intel.com>2016-07-01 14:15:38 +0300
committerAndrew Senkevich <andrew.senkevich@intel.com>2016-07-01 14:15:38 +0300
commitee2196bb6766ca7e63a1ba22ebb7619a3266776a (patch)
treea99accc0d97a405f535249efd7657de270726850 /sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S
parentfd1cf1dc3b2d90c2a61332363feb1043f6916564 (diff)
downloadglibc-ee2196bb6766ca7e63a1ba22ebb7619a3266776a.tar.gz
glibc-ee2196bb6766ca7e63a1ba22ebb7619a3266776a.tar.xz
glibc-ee2196bb6766ca7e63a1ba22ebb7619a3266776a.zip
Fixed wrong vector sincos/sincosf ABI to have it compatible with
current vector function declaration "#pragma omp declare simd notinbranch",
according to which vector sincos should have vector of pointers for second and
third parameters. It is fixed with implementation as wrapper to version
having second and third parameters as pointers.

    [BZ #20024]
    * sysdeps/x86/fpu/test-math-vector-sincos.h: New.
    * sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S: Fixed ABI
    of this implementation of vector function.
    * sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S: Likewise.
    * sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S: Likewise.
    * sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S:
    Likewise.
    * sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core_sse4.S: Likewise.
    * sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core_avx2.S: Likewise.
    * sysdeps/x86_64/fpu/svml_d_sincos2_core.S: Likewise.
    * sysdeps/x86_64/fpu/svml_d_sincos4_core.S: Likewise.
    * sysdeps/x86_64/fpu/svml_d_sincos4_core_avx.S: Likewise.
    * sysdeps/x86_64/fpu/svml_d_sincos8_core.S: Likewise.
    * sysdeps/x86_64/fpu/svml_s_sincosf16_core.S: Likewise.
    * sysdeps/x86_64/fpu/svml_s_sincosf4_core.S: Likewise.
    * sysdeps/x86_64/fpu/svml_s_sincosf8_core.S: Likewise.
    * sysdeps/x86_64/fpu/svml_s_sincosf8_core_avx.S: Likewise.
    * sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c: Use another wrapper
    for testing vector sincos with fixed ABI.
    * sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c: Likewise.
    * sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c: Likewise.
    * sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c: Likewise.
    * sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c: Likewise.
    * sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c: Likewise.
    * sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c: Likewise.
    * sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c: Likewise.
    * sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx.c: New test.
    * sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx2.c: Likewise.
    * sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx512.c: Likewise.
    * sysdeps/x86_64/fpu/test-double-libmvec-sincos.c: Likewise.
    * sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx.c: Likewise.
    * sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx2.c: Likewise.
    * sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx512.c: Likewise.
    * sysdeps/x86_64/fpu/test-float-libmvec-sincosf.c: Likewise.
    * sysdeps/x86_64/fpu/Makefile: Added new tests.
Diffstat (limited to 'sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S')
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S98
1 files changed, 97 insertions, 1 deletions
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S
index 24b57f4e8c..12f60100fa 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S
@@ -20,7 +20,7 @@
 #include "svml_d_trig_data.h"
 
 	.text
-ENTRY (_ZGVdN4vvv_sincos_avx2)
+ENTRY (_ZGVdN4vl8l8_sincos_avx2)
 /*
    ALGORITHM DESCRIPTION:
 
@@ -274,4 +274,100 @@ ENTRY (_ZGVdN4vvv_sincos_avx2)
         vmovsd    %xmm0, 384(%rsp,%r15)
         jmp       .LBL_1_7
 
+END (_ZGVdN4vl8l8_sincos_avx2)
+libmvec_hidden_def(_ZGVdN4vl8l8_sincos_avx2)
+
+/* vvv version implemented with wrapper to vl8l8 variant.  */
+ENTRY (_ZGVdN4vvv_sincos_avx2)
+#ifndef __ILP32__
+        pushq     %rbp
+        cfi_adjust_cfa_offset (8)
+        cfi_rel_offset (%rbp, 0)
+        movq      %rsp, %rbp
+        cfi_def_cfa_register (%rbp)
+        andq      $-32, %rsp
+        subq      $128, %rsp
+        vmovdqu   %ymm1, 64(%rsp)
+        lea       (%rsp), %rdi
+        vmovdqu   %ymm2, 96(%rdi)
+        lea       32(%rsp), %rsi
+        call      HIDDEN_JUMPTARGET(_ZGVdN4vl8l8_sincos_avx2)
+        movq      64(%rsp), %rdx
+        movq      96(%rsp), %rsi
+        movq      72(%rsp), %r8
+        movq      104(%rsp), %r10
+        movq      (%rsp), %rax
+        movq      32(%rsp), %rcx
+        movq      8(%rsp), %rdi
+        movq      40(%rsp), %r9
+        movq      %rax, (%rdx)
+        movq      %rcx, (%rsi)
+        movq      80(%rsp), %rax
+        movq      112(%rsp), %rcx
+        movq      %rdi, (%r8)
+        movq      %r9, (%r10)
+        movq      88(%rsp), %rdi
+        movq      120(%rsp), %r9
+        movq      16(%rsp), %r11
+        movq      48(%rsp), %rdx
+        movq      24(%rsp), %rsi
+        movq      56(%rsp), %r8
+        movq      %r11, (%rax)
+        movq      %rdx, (%rcx)
+        movq      %rsi, (%rdi)
+        movq      %r8, (%r9)
+        movq      %rbp, %rsp
+        cfi_def_cfa_register (%rsp)
+        popq      %rbp
+        cfi_adjust_cfa_offset (-8)
+        cfi_restore (%rbp)
+        ret
+#else
+        leal    8(%rsp), %r10d
+        .cfi_def_cfa 10, 0
+        andl    $-32, %esp
+        pushq   -8(%r10d)
+        pushq   %rbp
+        .cfi_escape 0x10,0x6,0x2,0x76,0
+        movl    %esp, %ebp
+        pushq   %r10
+        .cfi_escape 0xf,0x3,0x76,0x78,0x6
+        leal    -48(%rbp), %esi
+        leal    -80(%rbp), %edi
+        subl    $104, %esp
+        vmovaps %xmm1, -96(%ebp)
+        vmovaps %xmm2, -112(%ebp)
+        call    HIDDEN_JUMPTARGET(_ZGVdN4vl8l8_sincos_avx2)
+        movl    -96(%ebp), %eax
+        vmovsd  -80(%ebp), %xmm0
+        vmovsd  %xmm0, (%eax)
+        movl    -92(%ebp), %eax
+        vmovsd  -72(%ebp), %xmm0
+        vmovsd  %xmm0, (%eax)
+        movl    -88(%ebp), %eax
+        vmovsd  -64(%ebp), %xmm0
+        vmovsd  %xmm0, (%eax)
+        movl    -84(%ebp), %eax
+        vmovsd  -56(%ebp), %xmm0
+        vmovsd  %xmm0, (%eax)
+        movl    -112(%ebp), %eax
+        vmovsd  -48(%ebp), %xmm0
+        vmovsd  %xmm0, (%eax)
+        movl    -108(%ebp), %eax
+        vmovsd  -40(%ebp), %xmm0
+        vmovsd  %xmm0, (%eax)
+        movl    -104(%ebp), %eax
+        vmovsd  -32(%ebp), %xmm0
+        vmovsd  %xmm0, (%eax)
+        movl    -100(%ebp), %eax
+        vmovsd  -24(%ebp), %xmm0
+        vmovsd  %xmm0, (%eax)
+        addl    $104, %esp
+        popq    %r10
+        .cfi_def_cfa 10, 0
+        popq    %rbp
+        leal    -8(%r10), %esp
+        .cfi_def_cfa 7, 8
+        ret
+#endif
 END (_ZGVdN4vvv_sincos_avx2)