about summary refs log tree commit diff
path: root/sysdeps/x86
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/x86')
-rw-r--r--sysdeps/x86/cpu-features.c11
-rw-r--r--sysdeps/x86/sysdep.h60
2 files changed, 60 insertions, 11 deletions
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index 4ea373dffa..3d7c2819d7 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -311,7 +311,7 @@ update_active (struct cpu_features *cpu_features)
 	      /* NB: On AMX capable processors, ebx always includes AMX
 		 states.  */
 	      unsigned int xsave_state_full_size
-		= ALIGN_UP (ebx + STATE_SAVE_OFFSET, 64);
+		= ALIGN_UP (ebx + TLSDESC_CALL_REGISTER_SAVE_AREA, 64);
 
 	      cpu_features->xsave_state_size
 		= xsave_state_full_size;
@@ -401,8 +401,10 @@ update_active (struct cpu_features *cpu_features)
 		      unsigned int amx_size
 			= (xstate_amx_comp_offsets[31]
 			   + xstate_amx_comp_sizes[31]);
-		      amx_size = ALIGN_UP (amx_size + STATE_SAVE_OFFSET,
-					   64);
+		      amx_size
+			= ALIGN_UP ((amx_size
+				     + TLSDESC_CALL_REGISTER_SAVE_AREA),
+				    64);
 		      /* Set xsave_state_full_size to the compact AMX
 			 state size for XSAVEC.  NB: xsave_state_full_size
 			 is only used in _dl_tlsdesc_dynamic_xsave and
@@ -410,7 +412,8 @@ update_active (struct cpu_features *cpu_features)
 		      cpu_features->xsave_state_full_size = amx_size;
 #endif
 		      cpu_features->xsave_state_size
-			= ALIGN_UP (size + STATE_SAVE_OFFSET, 64);
+			= ALIGN_UP (size + TLSDESC_CALL_REGISTER_SAVE_AREA,
+				    64);
 		      CPU_FEATURE_SET (cpu_features, XSAVEC);
 		    }
 		}
diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h
index db8e576e91..7359149e17 100644
--- a/sysdeps/x86/sysdep.h
+++ b/sysdeps/x86/sysdep.h
@@ -38,14 +38,59 @@
 #ifdef __x86_64__
 /* Offset for fxsave/xsave area used by _dl_runtime_resolve.  Also need
    space to preserve RCX, RDX, RSI, RDI, R8, R9 and RAX.  It must be
-   aligned to 16 bytes for fxsave and 64 bytes for xsave.
-
-   NB: Is is non-zero because of the 128-byte red-zone.  Some registers
-   are saved on stack without adjusting stack pointer first.  When we
-   update stack pointer to allocate more space, we need to take the
-   red-zone into account.  */
+   aligned to 16 bytes for fxsave and 64 bytes for xsave.  It is non-zero
+   because MOV, instead of PUSH, is used to save registers onto stack.
+
+   +==================+<- stack frame start aligned at 8 or 16 bytes
+   |                  |<- paddings for stack realignment of 64 bytes
+   |------------------|<- xsave buffer end aligned at 64 bytes
+   |                  |<-
+   |                  |<-
+   |                  |<-
+   |------------------|<- xsave buffer start at STATE_SAVE_OFFSET(%rsp)
+   |                  |<- 8-byte padding for 64-byte alignment
+   |                  |<- R9
+   |                  |<- R8
+   |                  |<- RDI
+   |                  |<- RSI
+   |                  |<- RDX
+   |                  |<- RCX
+   |                  |<- RAX
+   +==================+<- RSP aligned at 64 bytes
+
+ */
 # define STATE_SAVE_OFFSET (8 * 7 + 8)
 
+/* _dl_tlsdesc_dynamic preserves RDI, RSI and RBX before realigning
+   stack.  After realigning stack, it saves RCX, RDX, R8, R9, R10 and
+   R11.  Allocate space for RDI, RSI and RBX to avoid clobbering saved
+   RDI, RSI and RBX values on stack by xsave.
+
+   +==================+<- stack frame start aligned at 8 or 16 bytes
+   |                  |<- RDI saved in the red zone
+   |                  |<- RSI saved in the red zone
+   |                  |<- RBX saved in the red zone
+   |                  |<- paddings for stack realignment of 64 bytes
+   |------------------|<- xsave buffer end aligned at 64 bytes
+   |                  |<-
+   |                  |<-
+   |                  |<-
+   |------------------|<- xsave buffer start at STATE_SAVE_OFFSET(%rsp)
+   |                  |<- 8-byte padding for 64-byte alignment
+   |                  |<- 8-byte padding for 64-byte alignment
+   |                  |<- R11
+   |                  |<- R10
+   |                  |<- R9
+   |                  |<- R8
+   |                  |<- RDX
+   |                  |<- RCX
+   +==================+<- RSP aligned at 64 bytes
+
+   Define the total register save area size for all integer registers by
+   adding 24 to STATE_SAVE_OFFSET since RDI, RSI and RBX are saved onto
+   stack without adjusting stack pointer first, using the red-zone.  */
+# define TLSDESC_CALL_REGISTER_SAVE_AREA (STATE_SAVE_OFFSET + 24)
+
 /* Save SSE, AVX, AVX512, mask, bound and APX registers.  Bound and APX
    registers are mutually exclusive.  */
 # define STATE_SAVE_MASK		\
@@ -66,8 +111,9 @@
   (STATE_SAVE_MASK | AMX_STATE_SAVE_MASK)
 #else
 /* Offset for fxsave/xsave area used by _dl_tlsdesc_dynamic.  Since i386
-   doesn't have red-zone, use 0 here.  */
+   uses PUSH to save registers onto stack, use 0 here.  */
 # define STATE_SAVE_OFFSET 0
+# define TLSDESC_CALL_REGISTER_SAVE_AREA 0
 
 /* Save SSE, AVX, AXV512, mask and bound registers.   */
 # define STATE_SAVE_MASK		\