about summary refs log tree commit diff
diff options
context:
space:
mode:
authorPaul Murphy <murphyp@linux.vnet.ibm.com>2015-10-29 15:48:47 -0500
committerTulio Magno Quites Machado Filho <tuliom@linux.vnet.ibm.com>2015-11-19 18:04:30 -0200
commit9695cb3e6528d9f6f5c7c7f96bd6c94e5ba0e2c4 (patch)
tree63241a5efeb17ec0eda3d79b9cdaffc4cbc032bf
parent86b4939846caf2bb072bba6057e1dc3ad187c2c2 (diff)
downloadglibc-9695cb3e6528d9f6f5c7c7f96bd6c94e5ba0e2c4.tar.gz
glibc-9695cb3e6528d9f6f5c7c7f96bd6c94e5ba0e2c4.tar.xz
glibc-9695cb3e6528d9f6f5c7c7f96bd6c94e5ba0e2c4.zip
powerpc: Spinlock optimization and cleanup
This patch optimizes powerpc spinlock implementation by:

* Use the correct EH hint bit on the larx for supported ISA.  For lock
  acquisition, the thread that acquired the lock with a successful stcx
  does not want to give away the write ownership on the cacheline.  The
  idea is to make the load reservation "sticky" about retaining write
  authority to the line.  That way, the store that must inevitably come
  to release the lock can succeed quickly and not contend with other
  threads issuing lwarx.  If another thread does a store to the line
  (false sharing), the winning thread must give up write authority to
  the proper value of EH for the larx for a lock acquisition is 1.

* Increase contented lock performance by up to 40%, and no measurable
  impact on uncontended locks on P8.

Thanks to Adhemerval Zanella who did most of the work.  I've run some
tests, and addressed some minor feedback.

	* sysdeps/powerpc/nptl/pthread_spin_lock.c (pthread_spin_lock):
	Add lwarx hint, and use macro for acquire instruction.
	* sysdeps/powerpc/nptl/pthread_spin_trylock.c (pthread_spin_trylock):
	Likewise.
	* sysdep/unix/sysv/linux/powerpc/pthread_spin_unlock.c: Move to ...
	* sysdeps/powerpc/nptl/pthread_spin_unlock.c: ... here, and
	update to use new atomic macros.
-rw-r--r--ChangeLog11
-rw-r--r--sysdeps/powerpc/nptl/pthread_spin_lock.c4
-rw-r--r--sysdeps/powerpc/nptl/pthread_spin_trylock.c4
-rw-r--r--sysdeps/powerpc/nptl/pthread_spin_unlock.c (renamed from sysdeps/unix/sysv/linux/powerpc/pthread_spin_unlock.c)3
4 files changed, 16 insertions, 6 deletions
diff --git a/ChangeLog b/ChangeLog
index a5440e2a74..d822095ed5 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,14 @@
+2015-11-19  Adhemerval Zanella  <azanella@linux.vnet.ibm.com>
+	    Paul E. Murphy  <murphyp@linux.vnet.ibm.com>
+
+	* sysdeps/powerpc/nptl/pthread_spin_lock.c (pthread_spin_lock):
+	Add lwarx hint, and use macro for acquire instruction.
+	* sysdeps/powerpc/nptl/pthread_spin_trylock.c (pthread_spin_trylock):
+	Likewise.
+	* sysdep/unix/sysv/linux/powerpc/pthread_spin_unlock.c: Move to ...
+	* sysdeps/powerpc/nptl/pthread_spin_unlock.c: ... here, and
+	update to use new atomic macros.
+
 2015-11-19  Paul E. Murphy  <murphyp@linux.vnet.ibm.com>
 
 	* sysdeps/unix/sysv/linux/powerpc/elision-trylock.c
diff --git a/sysdeps/powerpc/nptl/pthread_spin_lock.c b/sysdeps/powerpc/nptl/pthread_spin_lock.c
index d7d4cae6fa..fae7f7e0b9 100644
--- a/sysdeps/powerpc/nptl/pthread_spin_lock.c
+++ b/sysdeps/powerpc/nptl/pthread_spin_lock.c
@@ -24,12 +24,12 @@ pthread_spin_lock (pthread_spinlock_t *lock)
   unsigned int __tmp;
 
   asm volatile (
-       "1:	lwarx	%0,0,%1\n"
+       "1:	lwarx	%0,0,%1" MUTEX_HINT_ACQ "\n"
        "	cmpwi	0,%0,0\n"
        "	bne-	2f\n"
        "	stwcx.	%2,0,%1\n"
        "	bne-	2f\n"
-       "	isync\n"
+                __ARCH_ACQ_INSTR "\n"
        "	.subsection 1\n"
        "2:	lwzx	%0,0,%1\n"
        "	cmpwi	0,%0,0\n"
diff --git a/sysdeps/powerpc/nptl/pthread_spin_trylock.c b/sysdeps/powerpc/nptl/pthread_spin_trylock.c
index c485aa4bf9..09791c36a8 100644
--- a/sysdeps/powerpc/nptl/pthread_spin_trylock.c
+++ b/sysdeps/powerpc/nptl/pthread_spin_trylock.c
@@ -25,13 +25,13 @@ pthread_spin_trylock (pthread_spinlock_t *lock)
   unsigned int old;
   int err = EBUSY;
 
-  asm ("1:	lwarx	%0,0,%2\n"
+  asm ("1:	lwarx	%0,0,%2" MUTEX_HINT_ACQ "\n"
        "	cmpwi	0,%0,0\n"
        "	bne	2f\n"
        "	stwcx.	%3,0,%2\n"
        "	bne-	1b\n"
        "	li	%1,0\n"
-       "	isync\n"
+                __ARCH_ACQ_INSTR "\n"
        "2:	"
        : "=&r" (old), "=&r" (err)
        : "r" (lock), "r" (1), "1" (err)
diff --git a/sysdeps/unix/sysv/linux/powerpc/pthread_spin_unlock.c b/sysdeps/powerpc/nptl/pthread_spin_unlock.c
index 7af694f4ab..f830ad2880 100644
--- a/sysdeps/unix/sysv/linux/powerpc/pthread_spin_unlock.c
+++ b/sysdeps/powerpc/nptl/pthread_spin_unlock.c
@@ -22,7 +22,6 @@
 int
 pthread_spin_unlock (pthread_spinlock_t *lock)
 {
-  __asm __volatile (__ARCH_REL_INSTR ::: "memory");
-  *lock = 0;
+  atomic_store_release (lock, 0);
   return 0;
 }