diff options
author | Ulrich Drepper <drepper@redhat.com> | 2009-07-29 08:33:03 -0700 |
---|---|---|
committer | Ulrich Drepper <drepper@redhat.com> | 2009-07-29 08:33:03 -0700 |
commit | b48a267b8fbb885191a04cffdb4050a4d4c8a20b (patch) | |
tree | 1a517e3273ee5785b44a9bd5b9aec9ae62b95ac3 /sysdeps/x86_64/dl-trampoline.S | |
parent | 9655389317c92e5935c47d90c0ba48ca54bd245e (diff) | |
download | glibc-b48a267b8fbb885191a04cffdb4050a4d4c8a20b.tar.gz glibc-b48a267b8fbb885191a04cffdb4050a4d4c8a20b.tar.xz glibc-b48a267b8fbb885191a04cffdb4050a4d4c8a20b.zip |
Preserve SSE registers in runtime relocations on x86-64.
SSE registers are used for passing parameters and must be preserved in runtime relocations. This is inside ld.so enforced through the tests in tst-xmmymm.sh. But the malloc routines used after startup come from libc.so and can be arbitrarily complex. It's overkill to save the SSE registers all the time because of that. These calls are rare. Instead we save them on demand. The new infrastructure put in place in this patch makes this possible and efficient.
Diffstat (limited to 'sysdeps/x86_64/dl-trampoline.S')
-rw-r--r-- | sysdeps/x86_64/dl-trampoline.S | 82 |
1 files changed, 82 insertions, 0 deletions
diff --git a/sysdeps/x86_64/dl-trampoline.S b/sysdeps/x86_64/dl-trampoline.S index 49d239f075..7ecf1b0c64 100644 --- a/sysdeps/x86_64/dl-trampoline.S +++ b/sysdeps/x86_64/dl-trampoline.S @@ -390,3 +390,85 @@ L(no_avx4): cfi_endproc .size _dl_runtime_profile, .-_dl_runtime_profile #endif + + +#ifdef SHARED + .globl _dl_x86_64_save_sse + .type _dl_x86_64_save_sse, @function + .align 16 + cfi_startproc +_dl_x86_64_save_sse: +# ifdef HAVE_AVX_SUPPORT + cmpl $0, L(have_avx)(%rip) + jne 1f + movq %rbx, %r11 # Save rbx + movl $1, %eax + cpuid + movq %r11,%rbx # Restore rbx + movl $1, %eax + testl $(1 << 28), %ecx + jne 2f + negl %eax +2: movl %eax, L(have_avx)(%rip) + cmpl $0, %eax + +1: js L(no_avx5) + +# define YMM_SIZE 32 + vmovdqa %ymm0, %fs:RTLD_SAVESPACE_SSE+0*YMM_SIZE + vmovdqa %ymm1, %fs:RTLD_SAVESPACE_SSE+1*YMM_SIZE + vmovdqa %ymm2, %fs:RTLD_SAVESPACE_SSE+2*YMM_SIZE + vmovdqa %ymm3, %fs:RTLD_SAVESPACE_SSE+3*YMM_SIZE + vmovdqa %ymm4, %fs:RTLD_SAVESPACE_SSE+4*YMM_SIZE + vmovdqa %ymm5, %fs:RTLD_SAVESPACE_SSE+5*YMM_SIZE + vmovdqa %ymm6, %fs:RTLD_SAVESPACE_SSE+6*YMM_SIZE + vmovdqa %ymm7, %fs:RTLD_SAVESPACE_SSE+7*YMM_SIZE + ret +L(no_avx5): +# endif +# define YMM_SIZE 16 + movdqa %xmm0, %fs:RTLD_SAVESPACE_SSE+0*XMM_SIZE + movdqa %xmm1, %fs:RTLD_SAVESPACE_SSE+1*XMM_SIZE + movdqa %xmm2, %fs:RTLD_SAVESPACE_SSE+2*XMM_SIZE + movdqa %xmm3, %fs:RTLD_SAVESPACE_SSE+3*XMM_SIZE + movdqa %xmm4, %fs:RTLD_SAVESPACE_SSE+4*XMM_SIZE + movdqa %xmm5, %fs:RTLD_SAVESPACE_SSE+5*XMM_SIZE + movdqa %xmm6, %fs:RTLD_SAVESPACE_SSE+6*XMM_SIZE + movdqa %xmm7, %fs:RTLD_SAVESPACE_SSE+7*XMM_SIZE + ret + cfi_endproc + .size _dl_x86_64_save_sse, .-_dl_x86_64_save_sse + + + .globl _dl_x86_64_restore_sse + .type _dl_x86_64_restore_sse, @function + .align 16 + cfi_startproc +_dl_x86_64_restore_sse: +# ifdef HAVE_AVX_SUPPORT + cmpl $0, L(have_avx)(%rip) + js L(no_avx6) + + vmovdqa %fs:RTLD_SAVESPACE_SSE+0*YMM_SIZE, %ymm0 + vmovdqa %fs:RTLD_SAVESPACE_SSE+1*YMM_SIZE, %ymm1 + vmovdqa %fs:RTLD_SAVESPACE_SSE+2*YMM_SIZE, %ymm2 + vmovdqa %fs:RTLD_SAVESPACE_SSE+3*YMM_SIZE, %ymm3 + vmovdqa %fs:RTLD_SAVESPACE_SSE+4*YMM_SIZE, %ymm4 + vmovdqa %fs:RTLD_SAVESPACE_SSE+5*YMM_SIZE, %ymm5 + vmovdqa %fs:RTLD_SAVESPACE_SSE+6*YMM_SIZE, %ymm6 + vmovdqa %fs:RTLD_SAVESPACE_SSE+7*YMM_SIZE, %ymm7 + ret +L(no_avx6): +# endif + movdqa %fs:RTLD_SAVESPACE_SSE+0*XMM_SIZE, %xmm0 + movdqa %fs:RTLD_SAVESPACE_SSE+1*XMM_SIZE, %xmm1 + movdqa %fs:RTLD_SAVESPACE_SSE+2*XMM_SIZE, %xmm2 + movdqa %fs:RTLD_SAVESPACE_SSE+3*XMM_SIZE, %xmm3 + movdqa %fs:RTLD_SAVESPACE_SSE+4*XMM_SIZE, %xmm4 + movdqa %fs:RTLD_SAVESPACE_SSE+5*XMM_SIZE, %xmm5 + movdqa %fs:RTLD_SAVESPACE_SSE+6*XMM_SIZE, %xmm6 + movdqa %fs:RTLD_SAVESPACE_SSE+7*XMM_SIZE, %xmm7 + ret + cfi_endproc + .size _dl_x86_64_restore_sse, .-_dl_x86_64_restore_sse +#endif |