about summary refs log tree commit diff
path: root/sysdeps/x86_64/fpu/svml_d_sincos4_core_avx.S
diff options
context:
space:
mode:
authorAndrew Senkevich <andrew.senkevich@intel.com>2016-07-01 14:15:38 +0300
committerAndrew Senkevich <andrew.senkevich@intel.com>2016-07-01 14:15:38 +0300
commitee2196bb6766ca7e63a1ba22ebb7619a3266776a (patch)
treea99accc0d97a405f535249efd7657de270726850 /sysdeps/x86_64/fpu/svml_d_sincos4_core_avx.S
parentfd1cf1dc3b2d90c2a61332363feb1043f6916564 (diff)
downloadglibc-ee2196bb6766ca7e63a1ba22ebb7619a3266776a.tar.gz
glibc-ee2196bb6766ca7e63a1ba22ebb7619a3266776a.tar.xz
glibc-ee2196bb6766ca7e63a1ba22ebb7619a3266776a.zip
Fixed wrong vector sincos/sincosf ABI to have it compatible with
current vector function declaration "#pragma omp declare simd notinbranch",
according to which vector sincos should have vector of pointers for second and
third parameters. It is fixed with implementation as wrapper to version
having second and third parameters as pointers.

    [BZ #20024]
    * sysdeps/x86/fpu/test-math-vector-sincos.h: New.
    * sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S: Fixed ABI
    of this implementation of vector function.
    * sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S: Likewise.
    * sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S: Likewise.
    * sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S:
    Likewise.
    * sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core_sse4.S: Likewise.
    * sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core_avx2.S: Likewise.
    * sysdeps/x86_64/fpu/svml_d_sincos2_core.S: Likewise.
    * sysdeps/x86_64/fpu/svml_d_sincos4_core.S: Likewise.
    * sysdeps/x86_64/fpu/svml_d_sincos4_core_avx.S: Likewise.
    * sysdeps/x86_64/fpu/svml_d_sincos8_core.S: Likewise.
    * sysdeps/x86_64/fpu/svml_s_sincosf16_core.S: Likewise.
    * sysdeps/x86_64/fpu/svml_s_sincosf4_core.S: Likewise.
    * sysdeps/x86_64/fpu/svml_s_sincosf8_core.S: Likewise.
    * sysdeps/x86_64/fpu/svml_s_sincosf8_core_avx.S: Likewise.
    * sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c: Use another wrapper
    for testing vector sincos with fixed ABI.
    * sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c: Likewise.
    * sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c: Likewise.
    * sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c: Likewise.
    * sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c: Likewise.
    * sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c: Likewise.
    * sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c: Likewise.
    * sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c: Likewise.
    * sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx.c: New test.
    * sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx2.c: Likewise.
    * sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx512.c: Likewise.
    * sysdeps/x86_64/fpu/test-double-libmvec-sincos.c: Likewise.
    * sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx.c: Likewise.
    * sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx2.c: Likewise.
    * sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx512.c: Likewise.
    * sysdeps/x86_64/fpu/test-float-libmvec-sincosf.c: Likewise.
    * sysdeps/x86_64/fpu/Makefile: Added new tests.
Diffstat (limited to 'sysdeps/x86_64/fpu/svml_d_sincos4_core_avx.S')
-rw-r--r--sysdeps/x86_64/fpu/svml_d_sincos4_core_avx.S120
1 files changed, 119 insertions, 1 deletions
diff --git a/sysdeps/x86_64/fpu/svml_d_sincos4_core_avx.S b/sysdeps/x86_64/fpu/svml_d_sincos4_core_avx.S
index e4320a97c7..a60a524eeb 100644
--- a/sysdeps/x86_64/fpu/svml_d_sincos4_core_avx.S
+++ b/sysdeps/x86_64/fpu/svml_d_sincos4_core_avx.S
@@ -20,6 +20,124 @@
 #include "svml_d_wrapper_impl.h"
 
 	.text
+ENTRY (_ZGVcN4vl8l8_sincos)
+WRAPPER_IMPL_AVX_fFF _ZGVbN2vl8l8_sincos
+END (_ZGVcN4vl8l8_sincos)
+
+/* AVX ISA version as wrapper to SSE ISA version (for vector
+   function declared with #pragma omp declare simd notinbranch).  */
+.macro WRAPPER_IMPL_AVX_fFF_vvv callee
+#ifndef __ILP32__
+        pushq     %rbp
+        movq      %rsp, %rbp
+        andq      $-32, %rsp
+        subq      $160, %rsp
+        vmovupd   %ymm0, 64(%rsp)
+        lea       (%rsp), %rdi
+        vmovdqu   %xmm1, 96(%rdi)
+        vmovdqu   %xmm2, 112(%rdi)
+        vmovdqu   %xmm3, 128(%rdi)
+        vmovdqu   %xmm4, 144(%rdi)
+        lea       32(%rsp), %rsi
+	vzeroupper
+        call      HIDDEN_JUMPTARGET(\callee)
+        vmovdqu   80(%rsp), %xmm0
+        lea       16(%rsp), %rdi
+        lea       48(%rsp), %rsi
+        call      HIDDEN_JUMPTARGET(\callee)
+        movq      96(%rsp), %rdx
+        movq      104(%rsp), %rsi
+        movq      112(%rsp), %r8
+        movq      120(%rsp), %r10
+        movq      (%rsp), %rax
+        movq      8(%rsp), %rcx
+        movq      16(%rsp), %rdi
+        movq      24(%rsp), %r9
+        movq      %rax, (%rdx)
+        movq      %rcx, (%rsi)
+        movq      128(%rsp), %rax
+        movq      136(%rsp), %rcx
+        movq      %rdi, (%r8)
+        movq      %r9, (%r10)
+        movq      144(%rsp), %rdi
+        movq      152(%rsp), %r9
+        movq      32(%rsp), %r11
+        movq      40(%rsp), %rdx
+        movq      48(%rsp), %rsi
+        movq      56(%rsp), %r8
+        movq      %r11, (%rax)
+        movq      %rdx, (%rcx)
+        movq      %rsi, (%rdi)
+        movq      %r8, (%r9)
+        movq      %rbp, %rsp
+        popq      %rbp
+        ret
+#else
+        leal    8(%rsp), %r10d
+        .cfi_def_cfa 10, 0
+        andl    $-32, %esp
+        pushq   -8(%r10d)
+        pushq   %rbp
+        .cfi_escape 0x10,0x6,0x2,0x76,0
+        movl    %esp, %ebp
+        pushq   %r12
+        leal    -80(%rbp), %esi
+        pushq   %r10
+        .cfi_escape 0xf,0x3,0x76,0x70,0x6
+        .cfi_escape 0x10,0xc,0x2,0x76,0x78
+        leal    -112(%rbp), %edi
+        movq    %rsi, %r12
+        pushq   %rbx
+        .cfi_escape 0x10,0x3,0x2,0x76,0x68
+        movq    %rdi, %rbx
+        subl    $152, %esp
+        vmovaps %xmm1, -128(%ebp)
+        vmovaps %xmm2, -144(%ebp)
+        vmovapd %ymm0, -176(%ebp)
+        vzeroupper
+        call    HIDDEN_JUMPTARGET(\callee)
+        leal    16(%r12), %esi
+        vmovupd -160(%ebp), %xmm0
+        leal    16(%rbx), %edi
+        call    HIDDEN_JUMPTARGET(\callee)
+        movq    -128(%ebp), %rax
+        vmovsd  -112(%ebp), %xmm0
+        vmovdqa -128(%ebp), %xmm5
+        vmovdqa -144(%ebp), %xmm1
+        vmovsd  %xmm0, (%eax)
+        vmovsd  -104(%ebp), %xmm0
+        vpextrd $1, %xmm5, %eax
+        vmovsd  %xmm0, (%eax)
+        movq    -120(%ebp), %rax
+        vmovsd  -96(%ebp), %xmm0
+        vmovsd  %xmm0, (%eax)
+        vmovsd  -88(%ebp), %xmm0
+        vpextrd $3, %xmm5, %eax
+        vmovsd  %xmm0, (%eax)
+        movq    -144(%ebp), %rax
+        vmovsd  -80(%ebp), %xmm0
+        vmovsd  %xmm0, (%eax)
+        vmovsd  -72(%ebp), %xmm0
+        vpextrd $1, %xmm1, %eax
+        vmovsd  %xmm0, (%eax)
+        movq    -136(%ebp), %rax
+        vmovsd  -64(%ebp), %xmm0
+        vmovsd  %xmm0, (%eax)
+        vmovsd  -56(%ebp), %xmm0
+        vpextrd $3, %xmm1, %eax
+        vmovsd  %xmm0, (%eax)
+        addl    $152, %esp
+        popq    %rbx
+        popq    %r10
+        .cfi_def_cfa 10, 0
+        popq    %r12
+        popq    %rbp
+        leal    -8(%r10), %esp
+        .cfi_def_cfa 7, 8
+        ret
+#endif
+.endm
+
 ENTRY (_ZGVcN4vvv_sincos)
-WRAPPER_IMPL_AVX_fFF _ZGVbN2vvv_sincos
+WRAPPER_IMPL_AVX_fFF_vvv _ZGVbN2vl8l8_sincos
 END (_ZGVcN4vvv_sincos)