sysdeps/x86_64/fpu/svml_s_sincosf4_core.S


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152

/* Function sincosf vectorized with SSE2.
   Copyright (C) 2014-2018 Free Software Foundation, Inc.
   This file is part of the GNU C Library.

   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.

   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public
   License along with the GNU C Library; if not, see
   <http://www.gnu.org/licenses/>.  */

#include <sysdep.h>
#include "svml_s_wrapper_impl.h"

	.text
ENTRY (_ZGVbN4vl4l4_sincosf)
WRAPPER_IMPL_SSE2_fFF sincosf
END (_ZGVbN4vl4l4_sincosf)
libmvec_hidden_def (_ZGVbN4vl4l4_sincosf)

/* SSE2 ISA version as wrapper to scalar (for vector
   function declared with #pragma omp declare simd notinbranch).  */
.macro WRAPPER_IMPL_SSE2_fFF_vvv callee
#ifndef __ILP32__
        subq      $120, %rsp
        cfi_adjust_cfa_offset(120)
        movaps    %xmm0, 96(%rsp)
        lea       (%rsp), %rdi
        movdqa    %xmm1, 32(%rdi)
        lea       16(%rsp), %rsi
        movdqa    %xmm2, 32(%rsi)
        movdqa    %xmm3, 48(%rsi)
        movdqa    %xmm4, 64(%rsi)
        call      JUMPTARGET(\callee)
        movss     100(%rsp), %xmm0
        lea       4(%rsp), %rdi
        lea       20(%rsp), %rsi
        call      JUMPTARGET(\callee)
        movss     104(%rsp), %xmm0
        lea       8(%rsp), %rdi
        lea       24(%rsp), %rsi
        call      JUMPTARGET(\callee)
        movss     108(%rsp), %xmm0
        lea       12(%rsp), %rdi
        lea       28(%rsp), %rsi
        call      JUMPTARGET(\callee)
        movq      32(%rsp), %rdx
        movq      40(%rsp), %rsi
        movq      48(%rsp), %r8
        movq      56(%rsp), %r10
        movl      (%rsp), %eax
        movl      4(%rsp), %ecx
        movl      8(%rsp), %edi
        movl      12(%rsp), %r9d
        movl      %eax, (%rdx)
        movl      %ecx, (%rsi)
        movq      64(%rsp), %rax
        movq      72(%rsp), %rcx
        movl      %edi, (%r8)
        movl      %r9d, (%r10)
        movq      80(%rsp), %rdi
        movq      88(%rsp), %r9
        movl      16(%rsp), %r11d
        movl      20(%rsp), %edx
        movl      24(%rsp), %esi
        movl      28(%rsp), %r8d
        movl      %r11d, (%rax)
        movl      %edx, (%rcx)
        movl      %esi, (%rdi)
        movl      %r8d, (%r9)
        addq      $120, %rsp
        cfi_adjust_cfa_offset(-120)
        ret
#else
        pushq   %rbp
        .cfi_def_cfa_offset 16
        .cfi_offset 6, -16
        pushq   %rbx
        .cfi_def_cfa_offset 24
        .cfi_offset 3, -24
        subl    $88, %esp
        .cfi_def_cfa_offset 112
        leal    64(%rsp), %esi
        movaps  %xmm1, (%esp)
        leal    48(%rsp), %edi
        movaps  %xmm2, 16(%esp)
        movq    %rsi, %rbp
        movq    %rdi, %rbx
        movaps  %xmm0, 32(%esp)
        call    JUMPTARGET(\callee)
        movups  36(%esp), %xmm0
        leal    4(%rbp), %esi
        leal    4(%rbx), %edi
        call    JUMPTARGET(\callee)
        movups  40(%esp), %xmm0
        leal    8(%rbp), %esi
        leal    8(%rbx), %edi
        call    JUMPTARGET(\callee)
        movups  44(%esp), %xmm0
        leal    12(%rbp), %esi
        leal    12(%rbx), %edi
        call    JUMPTARGET(\callee)
        movq    (%esp), %rax
        movss   48(%esp), %xmm0
        movdqa  (%esp), %xmm4
        movdqa  16(%esp), %xmm7
        movss   %xmm0, (%eax)
        movss   52(%esp), %xmm0
        pextrd  $1, %xmm4, %eax
        movss   %xmm0, (%eax)
        movq    8(%esp), %rax
        movss   56(%esp), %xmm0
        movss   %xmm0, (%eax)
        movss   60(%esp), %xmm0
        pextrd  $3, %xmm4, %eax
        movss   %xmm0, (%eax)
        movq    16(%esp), %rax
        movss   64(%esp), %xmm0
        movss   %xmm0, (%eax)
        movss   68(%esp), %xmm0
        pextrd  $1, %xmm7, %eax
        movss   %xmm0, (%eax)
        movq    24(%esp), %rax
        movss   72(%esp), %xmm0
        movss   %xmm0, (%eax)
        movss   76(%esp), %xmm0
        pextrd  $3, %xmm7, %eax
        movss   %xmm0, (%eax)
        addl    $88, %esp
        .cfi_def_cfa_offset 24
        popq    %rbx
        .cfi_def_cfa_offset 16
        popq    %rbp
        .cfi_def_cfa_offset 8
        ret
#endif
.endm

ENTRY (_ZGVbN4vvv_sincosf)
WRAPPER_IMPL_SSE2_fFF_vvv sincosf
END (_ZGVbN4vvv_sincosf)

#ifndef USE_MULTIARCH
 libmvec_hidden_def (_ZGVbN4vvv_sincosf)
#endif