about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--sysdeps/hppa/dl-fptr.c26
-rw-r--r--sysdeps/hppa/dl-machine.h36
-rw-r--r--sysdeps/hppa/dl-trampoline.S74
-rw-r--r--sysdeps/unix/sysv/linux/hppa/atomic-machine.h28
4 files changed, 142 insertions, 22 deletions
diff --git a/sysdeps/hppa/dl-fptr.c b/sysdeps/hppa/dl-fptr.c
index 0a37397284..25ca8f8463 100644
--- a/sysdeps/hppa/dl-fptr.c
+++ b/sysdeps/hppa/dl-fptr.c
@@ -172,8 +172,8 @@ make_fdesc (ElfW(Addr) ip, ElfW(Addr) gp)
     }
 
  install:
-  fdesc->ip = ip;
   fdesc->gp = gp;
+  fdesc->ip = ip;
 
   return (ElfW(Addr)) fdesc;
 }
@@ -350,7 +350,9 @@ ElfW(Addr)
 _dl_lookup_address (const void *address)
 {
   ElfW(Addr) addr = (ElfW(Addr)) address;
-  unsigned int *desc, *gptr;
+  ElfW(Word) reloc_arg;
+  volatile unsigned int *desc;
+  unsigned int *gptr;
 
   /* Return ADDR if the least-significant two bits of ADDR are not consistent
      with ADDR being a linker defined function pointer.  The normal value for
@@ -367,7 +369,11 @@ _dl_lookup_address (const void *address)
   if (!_dl_read_access_allowed (desc))
     return addr;
 
-  /* Load first word of candidate descriptor.  It should be a pointer
+  /* First load the relocation offset.  */
+  reloc_arg = (ElfW(Word)) desc[1];
+  atomic_full_barrier();
+
+  /* Then load first word of candidate descriptor.  It should be a pointer
      with word alignment and point to memory that can be read.  */
   gptr = (unsigned int *) desc[0];
   if (((unsigned int) gptr & 3) != 0
@@ -377,8 +383,8 @@ _dl_lookup_address (const void *address)
   /* See if descriptor requires resolution.  The following trampoline is
      used in each global offset table for function resolution:
 
-		ldw 0(r20),r22
-		bv r0(r22)
+		ldw 0(r20),r21
+		bv r0(r21)
 		ldw 4(r20),r21
      tramp:	b,l .-12,r20
 		depwi 0,31,2,r20
@@ -389,7 +395,15 @@ _dl_lookup_address (const void *address)
   if (gptr[0] == 0xea9f1fdd			/* b,l .-12,r20     */
       && gptr[1] == 0xd6801c1e			/* depwi 0,31,2,r20 */
       && (ElfW(Addr)) gptr[2] == elf_machine_resolve ())
-    _dl_fixup ((struct link_map *) gptr[5], (ElfW(Word)) desc[1]);
+    {
+      struct link_map *l = (struct link_map *) gptr[5];
+
+      /* If gp has been resolved, we need to hunt for relocation offset.  */
+      if (!(reloc_arg & PA_GP_RELOC))
+	reloc_arg = _dl_fix_reloc_arg (addr, l);
+
+      _dl_fixup (l, reloc_arg);
+    }
 
   return (ElfW(Addr)) desc[0];
 }
diff --git a/sysdeps/hppa/dl-machine.h b/sysdeps/hppa/dl-machine.h
index 9e98366ea3..8ecff97706 100644
--- a/sysdeps/hppa/dl-machine.h
+++ b/sysdeps/hppa/dl-machine.h
@@ -48,6 +48,14 @@
 #define GOT_FROM_PLT_STUB (4*4)
 #define PLT_ENTRY_SIZE (2*4)
 
+/* The gp slot in the function descriptor contains the relocation offset
+   before resolution.  To distinguish between a resolved gp value and an
+   unresolved relocation offset we set an unused bit in the relocation
+   offset.  This would allow us to do a synchronzied two word update
+   using this bit (interlocked update), but instead of waiting for the
+   update we simply recompute the gp value given that we know the ip.  */
+#define PA_GP_RELOC 1
+
 /* Initialize the function descriptor table before relocations */
 static inline void
 __hppa_init_bootstrap_fdesc_table (struct link_map *map)
@@ -117,10 +125,28 @@ elf_machine_fixup_plt (struct link_map *map, lookup_t t,
   volatile Elf32_Addr *rfdesc = reloc_addr;
   /* map is the link_map for the caller, t is the link_map for the object
      being called */
-  rfdesc[1] = value.gp;
-  /* Need to ensure that the gp is visible before the code
-     entry point is updated */
-  rfdesc[0] = value.ip;
+
+  /* We would like the function descriptor to be double word aligned.  This
+     helps performance (ip and gp then reside on the same cache line) and
+     we can update the pair atomically with a single store.  The linker
+     now ensures this alignment but we still have to handle old code.  */
+  if ((unsigned int)reloc_addr & 7)
+    {
+      /* Need to ensure that the gp is visible before the code
+         entry point is updated */
+      rfdesc[1] = value.gp;
+      atomic_full_barrier();
+      rfdesc[0] = value.ip;
+    }
+  else
+    {
+      /* Update pair atomically with floating point store.  */
+      union { ElfW(Word) v[2]; double d; } u;
+
+      u.v[0] = value.ip;
+      u.v[1] = value.gp;
+      *(volatile double *)rfdesc = u.d;
+    }
   return value;
 }
 
@@ -265,7 +291,7 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
 		     here.  The trampoline code will load the proper
 		     LTP and pass the reloc offset to the fixup
 		     function.  */
-		  fptr->gp = iplt - jmprel;
+		  fptr->gp = (iplt - jmprel) | PA_GP_RELOC;
 		} /* r_sym != 0 */
 	      else
 		{
diff --git a/sysdeps/hppa/dl-trampoline.S b/sysdeps/hppa/dl-trampoline.S
index 0114ca8b19..d0804b30c0 100644
--- a/sysdeps/hppa/dl-trampoline.S
+++ b/sysdeps/hppa/dl-trampoline.S
@@ -31,7 +31,7 @@
    slow down __cffc when it attempts to call fixup to resolve function
    descriptor references. Please refer to gcc/gcc/config/pa/fptr.c
 
-   Enter with r19 = reloc offset, r20 = got-8, r21 = fixup ltp.  */
+   Enter with r19 = reloc offset, r20 = got-8, r21 = fixup ltp, r22 = fp.  */
 
 	/* RELOCATION MARKER: bl to provide gcc's __cffc with fixup loc. */
 	.text
@@ -61,17 +61,20 @@ _dl_runtime_resolve:
 	copy	%sp, %r1	/* Copy previous sp */
 	/* Save function result address (on entry) */
 	stwm	%r28,128(%sp)
-	/* Fillin some frame info to follow ABI */
+	/* Fill in some frame info to follow ABI */
 	stw	%r1,-4(%sp)	/* Previous sp */
 	stw	%r21,-32(%sp)	/* PIC register value */
 
 	/* Save input floating point registers. This must be done
 	   in the new frame since the previous frame doesn't have
 	   enough space */
-	ldo	-56(%sp),%r1
+	ldo	-64(%sp),%r1
 	fstd,ma	%fr4,-8(%r1)
 	fstd,ma	%fr5,-8(%r1)
 	fstd,ma	%fr6,-8(%r1)
+
+	/* Test PA_GP_RELOC bit.  */
+	bb,>=	%r19,31,2f		/* branch if not reloc offset */
 	fstd,ma	%fr7,-8(%r1)
 
 	/* Set up args to fixup func, needs only two arguments  */
@@ -79,7 +82,7 @@ _dl_runtime_resolve:
 	copy	%r19,%r25		/* (2) reloc offset  */
 
 	/* Call the real address resolver. */
-	bl	_dl_fixup,%rp
+3:	bl	_dl_fixup,%rp
 	copy	%r21,%r19		/* set fixup func ltp */
 
 	/* While the linker will set a function pointer to NULL when it
@@ -102,7 +105,7 @@ _dl_runtime_resolve:
 	copy	%r29, %r19
 
 	/* Reload arguments fp args */
-	ldo	-56(%sp),%r1
+	ldo	-64(%sp),%r1
 	fldd,ma	-8(%r1),%fr4
 	fldd,ma	-8(%r1),%fr5
 	fldd,ma	-8(%r1),%fr6
@@ -129,6 +132,25 @@ _dl_runtime_resolve:
 	bv	%r0(%rp)
 	ldo	-128(%sp),%sp
 
+2:
+	/* Set up args for _dl_fix_reloc_arg.  */
+	copy	%r22,%r26		/* (1) function pointer */
+	depi	0,31,2,%r26		/* clear least significant bits */
+	ldw	8+4(%r20),%r25		/* (2) got[1] == struct link_map */
+
+	/* Save ltp and link map arg for _dl_fixup.  */
+	stw	%r21,-56(%sp)		/* ltp */
+	stw	%r25,-60(%sp)		/* struct link map */
+
+	/* Find reloc offset. */
+	bl	_dl_fix_reloc_arg,%rp
+	copy	%r21,%r19		/* set func ltp */
+
+	/* Set up args for _dl_fixup.  */
+	ldw	-56(%sp),%r21		/* ltp */
+	ldw	-60(%sp),%r26		/* (1) struct link map */
+	b	3b
+	copy	%ret0,%r25		/* (2) reloc offset */
         .EXIT
         .PROCEND
 	cfi_endproc
@@ -153,7 +175,7 @@ _dl_runtime_profile:
 	copy	%sp, %r1	/* Copy previous sp */
 	/* Save function result address (on entry) */
 	stwm	%r28,192(%sp)
-	/* Fillin some frame info to follow ABI */
+	/* Fill in some frame info to follow ABI */
 	stw	%r1,-4(%sp)	/* Previous sp */
 	stw	%r21,-32(%sp)	/* PIC register value */
 
@@ -181,10 +203,11 @@ _dl_runtime_profile:
 	fstd,ma	%fr5,8(%r1)
 	fstd,ma	%fr6,8(%r1)
 	fstd,ma	%fr7,8(%r1)
-	/* 32-bit stack pointer and return register */
-	stw	%sp,-56(%sp)
-	stw	%r2,-52(%sp)
 
+	/* Test PA_GP_RELOC bit.  */
+	bb,>=	%r19,31,2f		/* branch if not reloc offset */
+	/* 32-bit stack pointer */
+	stw	%sp,-56(%sp)
 
 	/* Set up args to fixup func, needs five arguments  */
 	ldw	8+4(%r20),%r26		/* (1) got[1] == struct link_map */
@@ -197,7 +220,7 @@ _dl_runtime_profile:
 	stw	%r1, -52(%sp)		/* (5) long int *framesizep */
 
 	/* Call the real address resolver. */
-	bl	_dl_profile_fixup,%rp
+3:	bl	_dl_profile_fixup,%rp
 	copy	%r21,%r19		/* set fixup func ltp */
 
 	/* Load up the returned function descriptor */
@@ -215,7 +238,9 @@ _dl_runtime_profile:
 	fldd,ma	8(%r1),%fr5
 	fldd,ma	8(%r1),%fr6
 	fldd,ma	8(%r1),%fr7
-	ldw	-52(%sp),%rp
+
+	/* Reload rp register -(192+20) without adjusting stack */
+	ldw	-212(%sp),%rp
 
 	/* Reload static link register -(192+16) without adjusting stack */
 	ldw	-208(%sp),%r29
@@ -303,6 +328,33 @@ L(cont):
         ldw -20(%sp),%rp
 	/* Return */
 	bv,n	0(%r2)
+
+2:
+	/* Set up args for _dl_fix_reloc_arg.  */
+	copy	%r22,%r26		/* (1) function pointer */
+	depi	0,31,2,%r26		/* clear least significant bits */
+	ldw	8+4(%r20),%r25		/* (2) got[1] == struct link_map */
+
+	/* Save ltp and link map arg for _dl_fixup.  */
+	stw	%r21,-92(%sp)		/* ltp */
+	stw	%r25,-116(%sp)		/* struct link map */
+
+	/* Find reloc offset. */
+	bl	_dl_fix_reloc_arg,%rp
+	copy	%r21,%r19		/* set func ltp */
+
+	 /* Restore fixup ltp.  */
+	ldw	-92(%sp),%r21		/* ltp */
+
+	/* Set up args to fixup func, needs five arguments  */
+	ldw	-116(%sp),%r26		/* (1) struct link map */
+	copy	%ret0,%r25		/* (2) reloc offset  */
+	stw	%r25,-120(%sp)		/* Save reloc offset */
+	ldw	-212(%sp),%r24		/* (3) profile_fixup needs rp */
+	ldo	-56(%sp),%r23		/* (4) La_hppa_regs */
+	ldo	-112(%sp), %r1
+	b	3b
+	stw	%r1, -52(%sp)		/* (5) long int *framesizep */
         .EXIT
         .PROCEND
 	cfi_endproc
diff --git a/sysdeps/unix/sysv/linux/hppa/atomic-machine.h b/sysdeps/unix/sysv/linux/hppa/atomic-machine.h
index 9d8ffbe860..bf61b66b70 100644
--- a/sysdeps/unix/sysv/linux/hppa/atomic-machine.h
+++ b/sysdeps/unix/sysv/linux/hppa/atomic-machine.h
@@ -36,9 +36,37 @@ typedef uintptr_t uatomicptr_t;
 typedef intmax_t atomic_max_t;
 typedef uintmax_t uatomic_max_t;
 
+#define atomic_full_barrier() __sync_synchronize ()
+
 #define __HAVE_64B_ATOMICS 0
 #define USE_ATOMIC_COMPILER_BUILTINS 0
 
+/* We use the compiler atomic load and store builtins as the generic
+   defines are not atomic.  In particular, we need to use compare and
+   exchange for stores as the implementation is synthesized.  */
+void __atomic_link_error (void);
+#define __atomic_check_size_ls(mem) \
+ if ((sizeof (*mem) != 1) && (sizeof (*mem) != 2) && sizeof (*mem) != 4)    \
+   __atomic_link_error ();
+
+#define atomic_load_relaxed(mem) \
+ ({ __atomic_check_size_ls((mem));                                           \
+    __atomic_load_n ((mem), __ATOMIC_RELAXED); })
+#define atomic_load_acquire(mem) \
+ ({ __atomic_check_size_ls((mem));                                           \
+    __atomic_load_n ((mem), __ATOMIC_ACQUIRE); })
+
+#define atomic_store_relaxed(mem, val) \
+ do {                                                                        \
+   __atomic_check_size_ls((mem));                                            \
+   __atomic_store_n ((mem), (val), __ATOMIC_RELAXED);                        \
+ } while (0)
+#define atomic_store_release(mem, val) \
+ do {                                                                        \
+   __atomic_check_size_ls((mem));                                            \
+   __atomic_store_n ((mem), (val), __ATOMIC_RELEASE);                        \
+ } while (0)
+
 /* XXX Is this actually correct?  */
 #define ATOMIC_EXCHANGE_USES_CAS 1