diff options
author | Zack Weinberg <zackw@panix.com> | 2017-06-08 15:39:03 -0400 |
---|---|---|
committer | Zack Weinberg <zackw@panix.com> | 2017-06-08 15:39:03 -0400 |
commit | 5046dbb4a7eba5eccfd258f92f4735c9ffc8d069 (patch) | |
tree | 4470480d904b65cf14ca524f96f79eca818c3eaf /REORG.TODO/sysdeps/x86_64/fpu/svml_s_sincosf4_core.S | |
parent | 199fc19d3aaaf57944ef036e15904febe877fc93 (diff) | |
download | glibc-5046dbb4a7eba5eccfd258f92f4735c9ffc8d069.tar.gz glibc-5046dbb4a7eba5eccfd258f92f4735c9ffc8d069.tar.xz glibc-5046dbb4a7eba5eccfd258f92f4735c9ffc8d069.zip |
Prepare for radical source tree reorganization. zack/build-layout-experiment
All top-level files and directories are moved into a temporary storage directory, REORG.TODO, except for files that will certainly still exist in their current form at top level when we're done (COPYING, COPYING.LIB, LICENSES, NEWS, README), all old ChangeLog files (which are moved to the new directory OldChangeLogs, instead), and the generated file INSTALL (which is just deleted; in the new order, there will be no generated files checked into version control).
Diffstat (limited to 'REORG.TODO/sysdeps/x86_64/fpu/svml_s_sincosf4_core.S')
-rw-r--r-- | REORG.TODO/sysdeps/x86_64/fpu/svml_s_sincosf4_core.S | 152 |
1 files changed, 152 insertions, 0 deletions
diff --git a/REORG.TODO/sysdeps/x86_64/fpu/svml_s_sincosf4_core.S b/REORG.TODO/sysdeps/x86_64/fpu/svml_s_sincosf4_core.S new file mode 100644 index 0000000000..2ab33b59a7 --- /dev/null +++ b/REORG.TODO/sysdeps/x86_64/fpu/svml_s_sincosf4_core.S @@ -0,0 +1,152 @@ +/* Function sincosf vectorized with SSE2. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include "svml_s_wrapper_impl.h" + + .text +ENTRY (_ZGVbN4vl4l4_sincosf) +WRAPPER_IMPL_SSE2_fFF sincosf +END (_ZGVbN4vl4l4_sincosf) +libmvec_hidden_def (_ZGVbN4vl4l4_sincosf) + +/* SSE2 ISA version as wrapper to scalar (for vector + function declared with #pragma omp declare simd notinbranch). */ +.macro WRAPPER_IMPL_SSE2_fFF_vvv callee +#ifndef __ILP32__ + subq $120, %rsp + cfi_adjust_cfa_offset(120) + movaps %xmm0, 96(%rsp) + lea (%rsp), %rdi + movdqa %xmm1, 32(%rdi) + lea 16(%rsp), %rsi + movdqa %xmm2, 32(%rsi) + movdqa %xmm3, 48(%rsi) + movdqa %xmm4, 64(%rsi) + call JUMPTARGET(\callee) + movss 100(%rsp), %xmm0 + lea 4(%rsp), %rdi + lea 20(%rsp), %rsi + call JUMPTARGET(\callee) + movss 104(%rsp), %xmm0 + lea 8(%rsp), %rdi + lea 24(%rsp), %rsi + call JUMPTARGET(\callee) + movss 108(%rsp), %xmm0 + lea 12(%rsp), %rdi + lea 28(%rsp), %rsi + call JUMPTARGET(\callee) + movq 32(%rsp), %rdx + movq 40(%rsp), %rsi + movq 48(%rsp), %r8 + movq 56(%rsp), %r10 + movl (%rsp), %eax + movl 4(%rsp), %ecx + movl 8(%rsp), %edi + movl 12(%rsp), %r9d + movl %eax, (%rdx) + movl %ecx, (%rsi) + movq 64(%rsp), %rax + movq 72(%rsp), %rcx + movl %edi, (%r8) + movl %r9d, (%r10) + movq 80(%rsp), %rdi + movq 88(%rsp), %r9 + movl 16(%rsp), %r11d + movl 20(%rsp), %edx + movl 24(%rsp), %esi + movl 28(%rsp), %r8d + movl %r11d, (%rax) + movl %edx, (%rcx) + movl %esi, (%rdi) + movl %r8d, (%r9) + addq $120, %rsp + cfi_adjust_cfa_offset(-120) + ret +#else + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset 6, -16 + pushq %rbx + .cfi_def_cfa_offset 24 + .cfi_offset 3, -24 + subl $88, %esp + .cfi_def_cfa_offset 112 + leal 64(%rsp), %esi + movaps %xmm1, (%esp) + leal 48(%rsp), %edi + movaps %xmm2, 16(%esp) + movq %rsi, %rbp + movq %rdi, %rbx + movaps %xmm0, 32(%esp) + call JUMPTARGET(\callee) + movups 36(%esp), %xmm0 + leal 4(%rbp), %esi + leal 4(%rbx), %edi + call JUMPTARGET(\callee) + movups 40(%esp), %xmm0 + leal 8(%rbp), %esi + leal 8(%rbx), %edi + call JUMPTARGET(\callee) + movups 44(%esp), %xmm0 + leal 12(%rbp), %esi + leal 12(%rbx), %edi + call JUMPTARGET(\callee) + movq (%esp), %rax + movss 48(%esp), %xmm0 + movdqa (%esp), %xmm4 + movdqa 16(%esp), %xmm7 + movss %xmm0, (%eax) + movss 52(%esp), %xmm0 + pextrd $1, %xmm4, %eax + movss %xmm0, (%eax) + movq 8(%esp), %rax + movss 56(%esp), %xmm0 + movss %xmm0, (%eax) + movss 60(%esp), %xmm0 + pextrd $3, %xmm4, %eax + movss %xmm0, (%eax) + movq 16(%esp), %rax + movss 64(%esp), %xmm0 + movss %xmm0, (%eax) + movss 68(%esp), %xmm0 + pextrd $1, %xmm7, %eax + movss %xmm0, (%eax) + movq 24(%esp), %rax + movss 72(%esp), %xmm0 + movss %xmm0, (%eax) + movss 76(%esp), %xmm0 + pextrd $3, %xmm7, %eax + movss %xmm0, (%eax) + addl $88, %esp + .cfi_def_cfa_offset 24 + popq %rbx + .cfi_def_cfa_offset 16 + popq %rbp + .cfi_def_cfa_offset 8 + ret +#endif +.endm + +ENTRY (_ZGVbN4vvv_sincosf) +WRAPPER_IMPL_SSE2_fFF_vvv sincosf +END (_ZGVbN4vvv_sincosf) + +#ifndef USE_MULTIARCH + libmvec_hidden_def (_ZGVbN4vvv_sincosf) +#endif |