diff options
author | Andrew Senkevich <andrew.senkevich@intel.com> | 2016-07-01 14:15:38 +0300 |
---|---|---|
committer | Andrew Senkevich <andrew.senkevich@intel.com> | 2016-07-01 14:15:38 +0300 |
commit | ee2196bb6766ca7e63a1ba22ebb7619a3266776a (patch) | |
tree | a99accc0d97a405f535249efd7657de270726850 /sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S | |
parent | fd1cf1dc3b2d90c2a61332363feb1043f6916564 (diff) | |
download | glibc-ee2196bb6766ca7e63a1ba22ebb7619a3266776a.tar.gz glibc-ee2196bb6766ca7e63a1ba22ebb7619a3266776a.tar.xz glibc-ee2196bb6766ca7e63a1ba22ebb7619a3266776a.zip |
Fixed wrong vector sincos/sincosf ABI to have it compatible with
current vector function declaration "#pragma omp declare simd notinbranch", according to which vector sincos should have vector of pointers for second and third parameters. It is fixed with implementation as wrapper to version having second and third parameters as pointers. [BZ #20024] * sysdeps/x86/fpu/test-math-vector-sincos.h: New. * sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S: Fixed ABI of this implementation of vector function. * sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S: Likewise. * sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S: Likewise. * sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S: Likewise. * sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core_sse4.S: Likewise. * sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core_avx2.S: Likewise. * sysdeps/x86_64/fpu/svml_d_sincos2_core.S: Likewise. * sysdeps/x86_64/fpu/svml_d_sincos4_core.S: Likewise. * sysdeps/x86_64/fpu/svml_d_sincos4_core_avx.S: Likewise. * sysdeps/x86_64/fpu/svml_d_sincos8_core.S: Likewise. * sysdeps/x86_64/fpu/svml_s_sincosf16_core.S: Likewise. * sysdeps/x86_64/fpu/svml_s_sincosf4_core.S: Likewise. * sysdeps/x86_64/fpu/svml_s_sincosf8_core.S: Likewise. * sysdeps/x86_64/fpu/svml_s_sincosf8_core_avx.S: Likewise. * sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c: Use another wrapper for testing vector sincos with fixed ABI. * sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c: Likewise. * sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c: Likewise. * sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c: Likewise. * sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c: Likewise. * sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c: Likewise. * sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c: Likewise. * sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c: Likewise. * sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx.c: New test. * sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx2.c: Likewise. * sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx512.c: Likewise. * sysdeps/x86_64/fpu/test-double-libmvec-sincos.c: Likewise. * sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx.c: Likewise. * sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx2.c: Likewise. * sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx512.c: Likewise. * sysdeps/x86_64/fpu/test-float-libmvec-sincosf.c: Likewise. * sysdeps/x86_64/fpu/Makefile: Added new tests.
Diffstat (limited to 'sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S')
-rw-r--r-- | sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S | 56 |
1 files changed, 55 insertions, 1 deletions
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S index d37275d7ab..6dfc61ee93 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S @@ -20,7 +20,7 @@ #include "svml_d_trig_data.h" .text -ENTRY (_ZGVbN2vvv_sincos_sse4) +ENTRY (_ZGVbN2vl8l8_sincos_sse4) /* ALGORITHM DESCRIPTION: @@ -311,4 +311,58 @@ ENTRY (_ZGVbN2vvv_sincos_sse4) movsd %xmm0, 256(%rsp,%r15) jmp .LBL_1_7 +END (_ZGVbN2vl8l8_sincos_sse4) +libmvec_hidden_def(_ZGVbN2vl8l8_sincos_sse4) + +/* vvv version implemented with wrapper to vl8l8 variant. */ +ENTRY (_ZGVbN2vvv_sincos_sse4) +#ifndef __ILP32__ + subq $72, %rsp + .cfi_def_cfa_offset 80 + movdqu %xmm1, 32(%rsp) + lea (%rsp), %rdi + movdqu %xmm2, 48(%rdi) + lea 16(%rsp), %rsi + call HIDDEN_JUMPTARGET(_ZGVbN2vl8l8_sincos_sse4) + movq 32(%rsp), %rdx + movq 48(%rsp), %rsi + movq 40(%rsp), %r8 + movq 56(%rsp), %r10 + movq (%rsp), %rax + movq 16(%rsp), %rcx + movq 8(%rsp), %rdi + movq 24(%rsp), %r9 + movq %rax, (%rdx) + movq %rcx, (%rsi) + movq %rdi, (%r8) + movq %r9, (%r10) + addq $72, %rsp + .cfi_def_cfa_offset 8 + ret +#else + subl $72, %esp + .cfi_def_cfa_offset 80 + leal 48(%rsp), %esi + movaps %xmm1, 16(%esp) + leal 32(%rsp), %edi + movaps %xmm2, (%esp) + call HIDDEN_JUMPTARGET(_ZGVbN2vl8l8_sincos_sse4) + movdqa 16(%esp), %xmm1 + movsd 32(%esp), %xmm0 + movq %xmm1, %rax + movdqa (%esp), %xmm2 + movsd %xmm0, (%eax) + movsd 40(%esp), %xmm0 + pextrd $1, %xmm1, %eax + movsd %xmm0, (%eax) + movsd 48(%esp), %xmm0 + movq %xmm2, %rax + movsd %xmm0, (%eax) + movsd 56(%esp), %xmm0 + pextrd $1, %xmm2, %eax + movsd %xmm0, (%eax) + addl $72, %esp + .cfi_def_cfa_offset 8 + ret +#endif END (_ZGVbN2vvv_sincos_sse4) |