summary refs log tree commit diff
path: root/sysdeps/ia64/dl-trampoline.S
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/ia64/dl-trampoline.S')
-rw-r--r--sysdeps/ia64/dl-trampoline.S409
1 files changed, 356 insertions, 53 deletions
diff --git a/sysdeps/ia64/dl-trampoline.S b/sysdeps/ia64/dl-trampoline.S
index 12d7652e44..b7969a6cf0 100644
--- a/sysdeps/ia64/dl-trampoline.S
+++ b/sysdeps/ia64/dl-trampoline.S
@@ -18,37 +18,38 @@
    02111-1307 USA.  */
 
 #include <sysdep.h>
+#undef ret
 
 /*
-   This code is used in dl-runtime.c to call the `fixup' function
-   and then redirect to the address it returns. `fixup()' takes two
-   arguments, however profile_fixup() takes three.
+   This code is used in dl-runtime.c to call the `_dl_fixup' function
+   and then redirect to the address it returns. `_dl_fixup()' takes two
+   arguments, however _dl_profile_fixup() takes five.
 
    The ABI specifies that we will never see more than 8 input
    registers to a function call, thus it is safe to simply allocate
    those, and simpler than playing stack games.  */
 
+/* Used to save and restore 8 incoming fp registers */
+#define RESOLVE_FRAME_SIZE (16*8)
+
 ENTRY(_dl_runtime_resolve)
 	{ .mmi
 	  .prologue
 	  .save ar.pfs, r40
-	  alloc loc0 = ar.pfs, 8, 6, 3, 0
-	  adds r2 = -144, r12
-	  adds r3 = -128, r12
+	  alloc loc0 = ar.pfs, 8, 6, 2, 0
+	  /* Use the 16 byte scratch area. r2 will start at f8 and
+	     r3 will start at f9.  */
+	  adds r2 = -(RESOLVE_FRAME_SIZE - 16), r12
+	  adds r3 = -(RESOLVE_FRAME_SIZE - 32), r12
 	}
 	{ .mii
-	  .fframe 160
-	  adds r12 = -160, r12
-	  .save rp, r41
+	  .fframe RESOLVE_FRAME_SIZE
+	  adds r12 = -RESOLVE_FRAME_SIZE, r12
+	  .save rp, loc1
 	  mov loc1 = b0
 	  .body
-	  mov out2 = b0		/* needed by fixup_profile */
-	  ;;
-	}
-	{ .mfb
 	  mov loc2 = r8		/* preserve struct value register */
-	  nop.f 0
-	  nop.b 0
+	  ;;
 	}
 	{ .mii
 	  mov loc3 = r9		/* preserve language specific register */
@@ -70,18 +71,20 @@ ENTRY(_dl_runtime_resolve)
 	{ .mmi
 	  stf.spill [r2] = f12, 32
 	  stf.spill [r3] = f13, 32
+	  /* Relocation record is 24 byte. */
 	  shladd out1 = r15, 3, out1
 	  ;;
 	}
 	{ .mmb
 	  stf.spill [r2] = f14
 	  stf.spill [r3] = f15
-	  br.call.sptk.many b0 = fixup
+	  br.call.sptk.many b0 = _dl_fixup
 	}
 	{ .mii
-	  ld8 r9 = [ret0], 8
+	  /* Skip the 16byte scratch area.  */
 	  adds r2 = 16, r12
 	  adds r3 = 32, r12
+	  mov b6 = ret0
 	  ;;
 	}
 	{ .mmi
@@ -93,7 +96,7 @@ ENTRY(_dl_runtime_resolve)
 	{ .mmi
 	  ldf.fill f10 = [r2], 32
 	  ldf.fill f11 = [r3], 32
-	  mov b6 = r9
+	  mov gp = ret1
 	  ;;
 	}
 	{ .mmi
@@ -106,7 +109,7 @@ ENTRY(_dl_runtime_resolve)
 	  ldf.fill f14 = [r2], 32
 	  ldf.fill f15 = [r3], 32
 	  .restore sp		/* pop the unwind frame state */
-	  adds r12 = 160, r12
+	  adds r12 = RESOLVE_FRAME_SIZE, r12
 	  ;;
 	}
 	{ .mii
@@ -115,7 +118,6 @@ ENTRY(_dl_runtime_resolve)
 	  mov r11 = loc5	/* restore language specific register */
 	}
 	{ .mii
-	  ld8 gp = [ret0]
 	  mov r8 = loc2		/* restore struct value register */
 	  ;;
 	}
@@ -128,42 +130,151 @@ ENTRY(_dl_runtime_resolve)
 	  br.sptk.many b6
 	  ;;
 	}
-END (_dl_runtime_resolve)
+END(_dl_runtime_resolve)
+
+
+/* The fourth argument to _dl_profile_fixup and the third one to
+   _dl_call_pltexit are a pointer to La_ia64_regs:
+
+   8byte r8
+   8byte r9
+   8byte r10
+   8byte r11
+   8byte in0
+   8byte in1
+   8byte in2
+   8byte in3
+   8byte in4
+   8byte in5
+   8byte in6
+   8byte in7
+   16byte f8
+   16byte f9
+   16byte f10
+   16byte f11
+   16byte f12
+   16byte f13
+   16byte f14
+   16byte f15
+   8byte ar.unat
+   8byte sp
+
+   The fifth argument to _dl_profile_fixup is a pointer to long int.
+   The fourth argument to _dl_call_pltexit is a pointer to
+   La_ia64_retval:
+
+   8byte r8
+   8byte r9
+   8byte r10
+   8byte r11
+   16byte f8
+   16byte f9
+   16byte f10
+   16byte f11
+   16byte f12
+   16byte f13
+   16byte f14
+   16byte f15
+  
+  Since stack has to be 16 byte aligned, the stack allocation is in
+  16byte increment. Before calling _dl_profile_fixup, the stack will
+  look like
+
+  psp	new frame_size
+  +16	La_ia64_regs
+  sp	scratch
 
+ */
+
+#define PLTENTER_FRAME_SIZE (4*8 + 8*8 + 8*16 + 2*8 + 16)
+#define PLTEXIT_FRAME_SIZE (PLTENTER_FRAME_SIZE + 4*8 + 8*16)
 
 ENTRY(_dl_runtime_profile)
-	{ .mmi
+	{ .mii
 	  .prologue
 	  .save ar.pfs, r40
-	  alloc loc0 = ar.pfs, 8, 6, 3, 0
-	  adds r2 = -144, r12
-	  adds r3 = -128, r12
+	  alloc loc0 = ar.pfs, 8, 12, 8, 0
+	  .vframe loc10
+	  mov loc10 = r12
+	  .save rp, loc1
+	  mov loc1 = b0
+	}
+	{ .mii
+	  .save ar.unat, r17
+	  mov r17 = ar.unat
+	  .save ar.lc, loc6
+	  mov loc6 = ar.lc
+	  mov loc11 = gp
 	}
 	{ .mii
-	  .fframe 160
-	  adds r12 = -160, r12
-	  .save rp, r41
-	  mov loc1 = b0
 	  .body
-	  mov out2 = b0		/* needed by fixup_profile */
+	  /* There is a 16 byte scratch area. r2 will start at r8 and
+	     r3 will start at r9 for La_ia64_regs.  */
+	  adds r2 = -(PLTENTER_FRAME_SIZE - 16), r12
+	  adds r3 = -(PLTENTER_FRAME_SIZE - 24), r12
+	  adds r12 = -PLTENTER_FRAME_SIZE, r12
+	  ;;
+	}
+	{ .mmi
+	  st8 [r2] = r8, 16;
+	  st8 [r3] = r9, 16;
+	  mov out2 = b0		/* needed by _dl_fixup_profile */
+	  ;;
+	}
+	{ .mmi
+	  st8 [r2] = r10, 16;
+	  st8 [r3] = r11, 16;
+	  adds out3 = 16, r12	/* pointer to La_ia64_regs */
+	  ;;
+	}
+	{ .mmi
+	  .mem.offset 0, 0
+	  st8.spill [r2] = in0, 16
+	  .mem.offset 8, 0
+	  st8.spill [r3] = in1, 16
+	  mov out4 = loc10	/* pointer to new frame size  */
 	  ;;
 	}
-	{ .mfb
+	{ .mmi
+	  .mem.offset 0, 0
+	  st8.spill [r2] = in2, 16
+	  .mem.offset 8, 0
+	  st8.spill [r3] = in3, 16
 	  mov loc2 = r8		/* preserve struct value register */
-	  nop.f 0
-	  nop.b 0
+	  ;;
 	}
-	{ .mii
+	{ .mmi
+	  .mem.offset 0, 0
+	  st8.spill [r2] = in4, 16
+	  .mem.offset 8, 0
+	  st8.spill [r3] = in5, 16
 	  mov loc3 = r9		/* preserve language specific register */
+	  ;;
+	}
+	{ .mmi
+	  .mem.offset 0, 0
+	  st8 [r2] = in6, 16
+	  .mem.offset 8, 0
+	  st8 [r3] = in7, 24	/* adjust for f9 */
 	  mov loc4 = r10	/* preserve language specific register */
+	  ;;
+	}
+	{ .mii
+	  mov r18 = ar.unat	/* save it in La_ia64_regs */
+	  mov loc7 = out3	/* save it for _dl_call_pltexit */
 	  mov loc5 = r11	/* preserve language specific register */
 	}
 	{ .mmi
 	  stf.spill [r2] = f8, 32
 	  stf.spill [r3] = f9, 32
-	  mov out0 = r16
+	  mov out0 = r16	/* needed by _dl_fixup_profile */
 	  ;;
 	}
+	{ .mii
+	  mov ar.unat = r17	/* restore it for function call */
+	  mov loc8 = r16	/* save it for _dl_call_pltexit */
+	  nop.i 0x0
+	}
 	{ .mmi
 	  stf.spill [r2] = f10, 32
 	  stf.spill [r3] = f11, 32
@@ -173,57 +284,249 @@ ENTRY(_dl_runtime_profile)
 	{ .mmi
 	  stf.spill [r2] = f12, 32
 	  stf.spill [r3] = f13, 32
+	  /* Relocation record is 24 byte. */
 	  shladd out1 = r15, 3, out1
 	  ;;
 	}
+	{ .mmi
+	  stf.spill [r2] = f14, 32
+	  stf.spill [r3] = f15, 24
+	  mov loc9 = out1	/* save it for _dl_call_pltexit */
+	  ;;
+	}
 	{ .mmb
-	  stf.spill [r2] = f14
-	  stf.spill [r3] = f15
-	  br.call.sptk.many b0 = profile_fixup
+	  st8 [r2] = r18	/* store ar.unat */
+	  st8 [r3] = loc10	/* store sp */
+	  br.call.sptk.many b0 = _dl_profile_fixup
 	}
 	{ .mii
-	  ld8 r9 = [ret0], 8
-	  adds r2 = 16, r12
-	  adds r3 = 32, r12
+	  /* Skip the 16byte scratch area, 4 language specific GRs and
+	     8 incoming GRs to restore incoming fp registers.  */
+	  adds r2 = (4*8 + 8*8 + 16), r12
+	  adds r3 = (4*8 + 8*8 + 32), r12
+	  mov b6 = ret0
 	  ;;
 	}
 	{ .mmi
 	  ldf.fill f8 = [r2], 32
 	  ldf.fill f9 = [r3], 32
-	  mov b0 = loc1
+	  mov gp = ret1
 	  ;;
 	}
 	{ .mmi
 	  ldf.fill f10 = [r2], 32
 	  ldf.fill f11 = [r3], 32
-	  mov b6 = r9
+	  mov r8 = loc2		/* restore struct value register */
 	  ;;
 	}
 	{ .mmi
 	  ldf.fill f12 = [r2], 32
 	  ldf.fill f13 = [r3], 32
-	  mov ar.pfs = loc0
+	  mov r9 = loc3		/* restore language specific register */
 	  ;;
 	}
 	{ .mmi
 	  ldf.fill f14 = [r2], 32
 	  ldf.fill f15 = [r3], 32
-	  .restore sp		/* pop the unwind frame state */
-	  adds r12 = 160, r12
+	  mov r10 = loc4	/* restore language specific register */
 	  ;;
 	}
 	{ .mii
-	  mov r9 = loc3		/* restore language specific register */
-	  mov r10 = loc4	/* restore language specific register */
+	  ld8 r15 = [loc10]	/* load the new frame size */
 	  mov r11 = loc5	/* restore language specific register */
+	  ;;
+	  cmp.eq p6, p7 = -1, r15
+	  ;;
 	}
 	{ .mii
-	  ld8 gp = [ret0]
-	  mov r8 = loc2		/* restore struct value register */
+(p7)	  cmp.eq p8, p9 = 0, r15
+(p6)	  mov b0 = loc1
+(p6)	  mov ar.lc = loc6
+	}
+	{ .mib
+	  nop.m 0x0
+(p6)	  mov ar.pfs = loc0
+(p6)	  br.cond.dptk.many .Lresolved
 	  ;;
 	}
-	/* An alloc is needed for the break system call to work.
-	   We don't care about the old value of the pfs register.  */
+
+	/* At this point, the stack looks like
+
+	  +psp	free
+	  +16	La_ia64_regs
+	  sp	scratch
+
+	  We need to keep the current stack and call the resolved
+	  function by copying the r15 byte from sp + PLTENTER_FRAME_SIZE
+	  + 16 (scratch area) to sp + 16 (scratch area). Since stack
+	  has to be 16byte aligned, we around r15 up to 16byte.  */
+
+	{ .mbb
+(p9)	  adds r15 = 15, r15
+(p8)	  br.cond.dptk.many .Lno_new_frame
+	  nop.b 0x0
+	  ;;
+	}
+	{ .mmi
+	  and r15 = -16, r15
+	  ;;
+	  /* We don't copy the 16byte scatch area. Prepare r16/r17 as
+	     destination.  */
+	  sub r16 = r12, r15
+	  sub r17 = r12, r15
+	  ;;
+	}
+	{ .mii
+	  adds r16 = 16, r16
+	  adds r17 = 24, r17
+	  sub r12 = r12, r15		/* Adjust stack  */
+	  ;;
+	}
+	{ .mii
+	  nop.m 0x0
+	  shr r15 = r15, 4
+	  ;;
+	  adds r15 = -1, r15
+	  ;;
+	}
+	{ .mii
+	  /* Skip the 16byte scatch area. Prepare r2/r3 as source.  */
+	  adds r2 = 16, loc10
+	  adds r3 = 24, loc10
+	  mov ar.lc = r15
+	  ;;
+	}
+.Lcopy:
+	{ .mmi
+	  ld8 r18 = [r2], 16
+	  ld8 r19 = [r3], 16
+	  nop.i 0x0
+	  ;;
+	}
+	{ .mmb
+	  st8 [r16] = r18, 16
+	  st8 [r17] = r19, 16
+	  br.cloop.sptk.few .Lcopy
+	}
+.Lno_new_frame:
+	{ .mii
+	  mov out0 = in0
+	  mov out1 = in1
+	  mov out2 = in2
+	}
+	{ .mii
+	  mov out3 = in3
+	  mov out4 = in4
+	  mov out5 = in5
+	}
+	{ .mib
+	  mov out6 = in6
+	  mov out7 = in7
+	  /* Call the resolved function  */
+	  br.call.sptk.many b0 = b6
+	}
+	{ .mii
+	  /* Prepare stack for _dl_call_pltexit. Loc10 has the original
+	     stack pointer.  */
+	  adds r12 = -PLTEXIT_FRAME_SIZE, loc10
+	  adds r2 = -(PLTEXIT_FRAME_SIZE - 16), loc10
+	  adds r3 = -(PLTEXIT_FRAME_SIZE - 24), loc10
+	  ;;
+	}
+	{ .mmi
+	  /* Load all possible return values into buffer.  */
+	  st8 [r2] = r8, 16
+	  st8 [r3] = r9, 16
+	  mov out0 = loc8
+	  ;;
+	}
+	{ .mmi
+	  st8 [r2] = r10, 16
+	  st8 [r3] = r11, 24
+	  mov out1 = loc9
+	  ;;
+	}
+	{ .mmi
+	  stf.spill [r2] = f8, 32
+	  stf.spill [r3] = f9, 32
+	  mov out2 = loc7		/* Pointer to La_ia64_regs */
+	  ;;
+	}
+	{ .mmi
+	  stf.spill [r2] = f10, 32
+	  stf.spill [r3] = f11, 32
+	  adds out3 = 16, r12		/* Pointer to La_ia64_retval */
+	  ;;
+	}
+	{ .mmi
+	  stf.spill [r2] = f12, 32
+	  stf.spill [r3] = f13, 32
+	  /* We need to restore gp for _dl_call_pltexit. */
+	  mov gp = loc11
+	  ;;
+	}
+	{ .mmb
+	  stf.spill [r2] = f14
+	  stf.spill [r3] = f15
+	  br.call.sptk.many b0 = _dl_call_pltexit
+	}
+	{ .mmi
+	  /* Load all the non-floating and floating return values. Skip
+	     the 16byte scratch area.  */
+	  adds r2 = 16, r12
+	  adds r3 = 24, r12
+	  nop.i 0x0
+	  ;;
+	}
+	{ .mmi
+	  ld8 r8 = [r2], 16
+	  ld8 r9 = [r3], 16
+	  nop.i 0x0
+	  ;;
+	}
+	{ .mmi
+	  ld8 r10 = [r2], 16
+	  ld8 r11 = [r3], 24
+	  nop.i 0x0
+	  ;;
+	}
+	{ .mmi
+	  ldf.fill f8 = [r2], 32
+	  ldf.fill f9 = [r3], 32
+	  mov ar.lc = loc6
+	  ;;
+	}
+	{ .mmi
+	  ldf.fill f10 = [r2], 32
+	  ldf.fill f11 = [r3], 32
+	  mov ar.pfs = loc0
+	  ;;
+	}
+	{ .mmi
+	  ldf.fill f12 = [r2], 32
+	  ldf.fill f13 = [r3], 32
+	  mov b0 = loc1
+	  ;;
+	}
+	{ .mmi
+	  ldf.fill f14 = [r2]
+	  ldf.fill f15 = [r3]
+	  /* We know that the previous stack pointer, loc10, isn't 0.
+	     We use it to reload p7.  */
+	  cmp.ne p7, p0 = 0, loc10
+	  ;;
+	}
+.Lresolved:
+	{ .mmb
+	  .restore sp
+	  mov r12 = loc10
+(p7)	  br.ret.sptk.many b0
+	  ;;
+	}
+	/* An alloc is needed for the break system call to work. We
+	   don't care about the old value of the pfs register. After
+	   this alloc, we can't use any rotating registers. Otherwise
+	   assembler won't be happy. This has to be at the end.  */
 	{ .mmb
 	  .prologue
 	  .body
@@ -231,4 +534,4 @@ ENTRY(_dl_runtime_profile)
 	  br.sptk.many b6
 	  ;;
 	}
-END (_dl_runtime_profile)
+END(_dl_runtime_profile)