From 717ebfa85c8240d32d0d19d86a484c31c55c9617 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Mon, 18 Mar 2024 06:40:16 -0700 Subject: x86-64: Allocate state buffer space for RDI, RSI and RBX _dl_tlsdesc_dynamic preserves RDI, RSI and RBX before realigning stack. After realigning stack, it saves RCX, RDX, R8, R9, R10 and R11. Define TLSDESC_CALL_REGISTER_SAVE_AREA to allocate space for RDI, RSI and RBX to avoid clobbering saved RDI, RSI and RBX values on stack by xsave to STATE_SAVE_OFFSET(%rsp). +==================+<- stack frame start aligned at 8 or 16 bytes | |<- RDI saved in the red zone | |<- RSI saved in the red zone | |<- RBX saved in the red zone | |<- paddings for stack realignment of 64 bytes |------------------|<- xsave buffer end aligned at 64 bytes | |<- | |<- | |<- |------------------|<- xsave buffer start at STATE_SAVE_OFFSET(%rsp) | |<- 8-byte padding for 64-byte alignment | |<- 8-byte padding for 64-byte alignment | |<- R11 | |<- R10 | |<- R9 | |<- R8 | |<- RDX | |<- RCX +==================+<- RSP aligned at 64 bytes Define TLSDESC_CALL_REGISTER_SAVE_AREA, the total register save area size for all integer registers by adding 24 to STATE_SAVE_OFFSET since RDI, RSI and RBX are saved onto stack without adjusting stack pointer first, using the red-zone. This fixes BZ #31501. Reviewed-by: Sunil K Pandey --- sysdeps/x86_64/tst-gnu2-tls2mod1.S | 87 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) create mode 100644 sysdeps/x86_64/tst-gnu2-tls2mod1.S (limited to 'sysdeps/x86_64/tst-gnu2-tls2mod1.S') diff --git a/sysdeps/x86_64/tst-gnu2-tls2mod1.S b/sysdeps/x86_64/tst-gnu2-tls2mod1.S new file mode 100644 index 0000000000..1d636669ba --- /dev/null +++ b/sysdeps/x86_64/tst-gnu2-tls2mod1.S @@ -0,0 +1,87 @@ +/* Check if TLSDESC relocation preserves %rdi, %rsi and %rbx. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +/* On AVX512 machines, OFFSET == 40 caused _dl_tlsdesc_dynamic_xsavec + to clobber %rdi, %rsi and %rbx. On Intel AVX CPUs, the state size + is 960 bytes and this test didn't fail. It may be due to the unused + last 128 bytes. On AMD AVX CPUs, the state size is 832 bytes and + this test might fail without the fix. */ +#ifndef OFFSET +# define OFFSET 40 +#endif + + .text + .p2align 4 + .globl apply_tls + .type apply_tls, @function +apply_tls: + cfi_startproc + _CET_ENDBR + pushq %rbp + cfi_def_cfa_offset (16) + cfi_offset (6, -16) + movdqu (%RDI_LP), %xmm0 + lea tls_var1@TLSDESC(%rip), %RAX_LP + mov %RSP_LP, %RBP_LP + cfi_def_cfa_register (6) + /* Align stack to 64 bytes. */ + and $-64, %RSP_LP + sub $OFFSET, %RSP_LP + pushq %rbx + /* Set %ebx to 0xbadbeef. */ + movl $0xbadbeef, %ebx + movl $0xbadbeef, %esi + movq %rdi, saved_rdi(%rip) + movq %rsi, saved_rsi(%rip) + call *tls_var1@TLSCALL(%RAX_LP) + /* Check if _dl_tlsdesc_dynamic preserves %rdi, %rsi and %rbx. */ + cmpq saved_rdi(%rip), %rdi + jne L(hlt) + cmpq saved_rsi(%rip), %rsi + jne L(hlt) + cmpl $0xbadbeef, %ebx + jne L(hlt) + add %fs:0, %RAX_LP + movups %xmm0, 32(%RAX_LP) + movdqu 16(%RDI_LP), %xmm1 + mov %RAX_LP, %RBX_LP + movups %xmm1, 48(%RAX_LP) + lea 32(%RBX_LP), %RAX_LP + pop %rbx + leave + cfi_def_cfa (7, 8) + ret +L(hlt): + hlt + cfi_endproc + .size apply_tls, .-apply_tls + .hidden tls_var1 + .globl tls_var1 + .section .tbss,"awT",@nobits + .align 16 + .type tls_var1, @object + .size tls_var1, 3200 +tls_var1: + .zero 3200 + .local saved_rdi + .comm saved_rdi,8,8 + .local saved_rsi + .comm saved_rsi,8,8 + .section .note.GNU-stack,"",@progbits -- cgit 1.4.1