diff options
-rw-r--r-- | sysdeps/x86_64/dl-trampoline.S | 20 | ||||
-rw-r--r-- | sysdeps/x86_64/dl-trampoline.h | 6 |
2 files changed, 15 insertions, 11 deletions
diff --git a/sysdeps/x86_64/dl-trampoline.S b/sysdeps/x86_64/dl-trampoline.S index bfc27a1a2a..1815193727 100644 --- a/sysdeps/x86_64/dl-trampoline.S +++ b/sysdeps/x86_64/dl-trampoline.S @@ -33,15 +33,19 @@ # define DL_STACK_ALIGNMENT 8 #endif -#ifndef DL_RUNIME_UNALIGNED_VEC_SIZE -/* The maximum size of unaligned vector load and store. */ -# define DL_RUNIME_UNALIGNED_VEC_SIZE 16 +#ifndef DL_RUNTIME_UNALIGNED_VEC_SIZE +/* The maximum size in bytes of unaligned vector load and store in the + dynamic linker. Since SSE optimized memory/string functions with + aligned SSE register load and store are used in the dynamic linker, + we must set this to 8 so that _dl_runtime_resolve_sse will align the + stack before calling _dl_fixup. */ +# define DL_RUNTIME_UNALIGNED_VEC_SIZE 8 #endif /* True if _dl_runtime_resolve should align stack to VEC_SIZE bytes. */ -#define DL_RUNIME_RESOLVE_REALIGN_STACK \ +#define DL_RUNTIME_RESOLVE_REALIGN_STACK \ (VEC_SIZE > DL_STACK_ALIGNMENT \ - && VEC_SIZE > DL_RUNIME_UNALIGNED_VEC_SIZE) + && VEC_SIZE > DL_RUNTIME_UNALIGNED_VEC_SIZE) /* Align vector register save area to 16 bytes. */ #define REGISTER_SAVE_VEC_OFF 0 @@ -76,7 +80,7 @@ #ifdef HAVE_AVX512_ASM_SUPPORT # define VEC_SIZE 64 # define VMOVA vmovdqa64 -# if DL_RUNIME_RESOLVE_REALIGN_STACK || VEC_SIZE <= DL_STACK_ALIGNMENT +# if DL_RUNTIME_RESOLVE_REALIGN_STACK || VEC_SIZE <= DL_STACK_ALIGNMENT # define VMOV vmovdqa64 # else # define VMOV vmovdqu64 @@ -100,7 +104,7 @@ strong_alias (_dl_runtime_profile_avx, _dl_runtime_profile_avx512) #define VEC_SIZE 32 #define VMOVA vmovdqa -#if DL_RUNIME_RESOLVE_REALIGN_STACK || VEC_SIZE <= DL_STACK_ALIGNMENT +#if DL_RUNTIME_RESOLVE_REALIGN_STACK || VEC_SIZE <= DL_STACK_ALIGNMENT # define VMOV vmovdqa #else # define VMOV vmovdqu @@ -119,7 +123,7 @@ strong_alias (_dl_runtime_profile_avx, _dl_runtime_profile_avx512) /* movaps/movups is 1-byte shorter. */ #define VEC_SIZE 16 #define VMOVA movaps -#if DL_RUNIME_RESOLVE_REALIGN_STACK || VEC_SIZE <= DL_STACK_ALIGNMENT +#if DL_RUNTIME_RESOLVE_REALIGN_STACK || VEC_SIZE <= DL_STACK_ALIGNMENT # define VMOV movaps #else # define VMOV movups diff --git a/sysdeps/x86_64/dl-trampoline.h b/sysdeps/x86_64/dl-trampoline.h index dd6d7c7835..fda28a286a 100644 --- a/sysdeps/x86_64/dl-trampoline.h +++ b/sysdeps/x86_64/dl-trampoline.h @@ -30,7 +30,7 @@ #undef REGISTER_SAVE_AREA #undef LOCAL_STORAGE_AREA #undef BASE -#if DL_RUNIME_RESOLVE_REALIGN_STACK +#if DL_RUNTIME_RESOLVE_REALIGN_STACK # define REGISTER_SAVE_AREA (REGISTER_SAVE_AREA_RAW + 8) /* Local stack area before jumping to function address: RBX. */ # define LOCAL_STORAGE_AREA 8 @@ -57,7 +57,7 @@ cfi_startproc _dl_runtime_resolve: cfi_adjust_cfa_offset(16) # Incorporate PLT -#if DL_RUNIME_RESOLVE_REALIGN_STACK +#if DL_RUNTIME_RESOLVE_REALIGN_STACK # if LOCAL_STORAGE_AREA != 8 # error LOCAL_STORAGE_AREA must be 8 # endif @@ -146,7 +146,7 @@ _dl_runtime_resolve: VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 5)(%rsp), %VEC(5) VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 6)(%rsp), %VEC(6) VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 7)(%rsp), %VEC(7) -#if DL_RUNIME_RESOLVE_REALIGN_STACK +#if DL_RUNTIME_RESOLVE_REALIGN_STACK mov %RBX_LP, %RSP_LP cfi_def_cfa_register(%rsp) movq (%rsp), %rbx |