summary refs log tree commit diff
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2009-08-07 20:36:53 -0700
committerUlrich Drepper <drepper@redhat.com>2009-08-07 20:36:53 -0700
commit9083bcc5dc747eeec24c2bb8bf5b7055b3bd4dbf (patch)
treee5078f60592dcf86fcd638a7db7a5fa530ec668e
parent57b378ac895c346d74758b54f9e388ed3ca30751 (diff)
downloadglibc-9083bcc5dc747eeec24c2bb8bf5b7055b3bd4dbf.tar.gz
glibc-9083bcc5dc747eeec24c2bb8bf5b7055b3bd4dbf.tar.xz
glibc-9083bcc5dc747eeec24c2bb8bf5b7055b3bd4dbf.zip
Small optimizations to cancellable x86-64 code.
-rw-r--r--nptl/ChangeLog9
-rw-r--r--nptl/sysdeps/unix/sysv/linux/x86_64/cancellation.S1
-rw-r--r--nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S2
-rw-r--r--nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S40
-rw-r--r--nptl/sysdeps/unix/sysv/linux/x86_64/sem_wait.S58
5 files changed, 49 insertions, 61 deletions
diff --git a/nptl/ChangeLog b/nptl/ChangeLog
index 3a74b5fecd..0f5c231765 100644
--- a/nptl/ChangeLog
+++ b/nptl/ChangeLog
@@ -1,3 +1,12 @@
+2009-08-07  Ulrich Drepper  <drepper@redhat.com>
+
+	* sysdeps/unix/sysv/linux/x86_64/sem_wait.S: Little optimizations
+	enabled by the special *_asynccancel functions.
+	* sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S: Likewise.
+	* sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S: Likewise.
+
+	* sysdeps/unix/sysv/linux/x86_64/cancellation.S: Include lowlevellock.h.
+
 2009-08-04  Ulrich Drepper  <drepper@redhat.com>
 
 	* sysdeps/unix/sysv/linux/x86_64/cancellation.S: New file.
diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/cancellation.S b/nptl/sysdeps/unix/sysv/linux/x86_64/cancellation.S
index a51df3eacf..0d48ec6fcd 100644
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/cancellation.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/cancellation.S
@@ -20,6 +20,7 @@
 #include <sysdep.h>
 #include <tcb-offsets.h>
 #include <kernel-features.h>
+#include "lowlevellock.h"
 
 #ifdef IS_IN_libpthread
 # ifdef SHARED
diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S
index 4913beb8af..86bdac1b1b 100644
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S
@@ -157,7 +157,6 @@ __pthread_cond_timedwait:
 .LcleanupSTART1:
 34:	callq	__pthread_enable_asynccancel
 	movl	%eax, (%rsp)
-	movq	8(%rsp), %rdi
 
 	movq	%r13, %r10
 	movl	$FUTEX_WAIT_BITSET, %esi
@@ -511,7 +510,6 @@ __pthread_cond_timedwait:
 .LcleanupSTART2:
 4:	callq	__pthread_enable_asynccancel
 	movl	%eax, (%rsp)
-	movq	8(%rsp), %rdi
 
 	leaq	32(%rsp), %r10
 	cmpq	$-1, dep_mutex(%rdi)
diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S
index a66523eab6..45116b1ab0 100644
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S
@@ -45,14 +45,11 @@ __pthread_cond_wait:
 	cfi_lsda(DW_EH_PE_udata4, .LexceptSTART)
 #endif
 
-	pushq	%r12
-	cfi_adjust_cfa_offset(8)
-	cfi_rel_offset(%r12, 0)
 	pushq	%r13
 	cfi_adjust_cfa_offset(8)
 	cfi_rel_offset(%r13, 0)
 #define FRAME_SIZE 32
-	subq	$FRAME_SIZE, %rsp
+	leaq	-FRAME_SIZE(%rsp), %rsp
 	cfi_adjust_cfa_offset(FRAME_SIZE)
 
 	/* Stack frame:
@@ -112,7 +109,7 @@ __pthread_cond_wait:
 	movl	%edx, 4(%rsp)
 
 	/* Unlock.  */
-8:	movl	cond_futex(%rdi), %r12d
+8:	movl	cond_futex(%rdi), %edx
 	LOCK
 #if cond_lock == 0
 	decl	(%rdi)
@@ -125,9 +122,7 @@ __pthread_cond_wait:
 4:	callq	__pthread_enable_asynccancel
 	movl	%eax, (%rsp)
 
-	movq	8(%rsp), %rdi
 	xorq	%r10, %r10
-	movq	%r12, %rdx
 	cmpq	$-1, dep_mutex(%rdi)
 	leaq	cond_futex(%rdi), %rdi
 	movl	$FUTEX_WAIT, %esi
@@ -243,21 +238,14 @@ __pthread_cond_wait:
 
 	callq	__pthread_mutex_cond_lock
 
-14:	addq	$FRAME_SIZE, %rsp
-	cfi_adjust_cfa_offset(-FRAME_SIZE)
-
-	popq	%r13
-	cfi_adjust_cfa_offset(-8)
-	cfi_restore(%r13)
-	popq	%r12
-	cfi_adjust_cfa_offset(-8)
-	cfi_restore(%r12)
+14:	movq	FRAME_SIZE(%rsp), %r13
+	leaq	FRAME_SIZE+8(%rsp), %rsp
+	cfi_adjust_cfa_offset(-(FRAME_SIZE + 8))
 
 	/* We return the result of the mutex_lock operation.  */
 	retq
 
-	cfi_adjust_cfa_offset(16 + FRAME_SIZE)
-	cfi_rel_offset(%r12, FRAME_SIZE + 8)
+	cfi_adjust_cfa_offset(8 + FRAME_SIZE)
 	cfi_rel_offset(%r13, FRAME_SIZE)
 
 18:	callq	__pthread_mutex_cond_lock_adjust
@@ -285,7 +273,11 @@ __pthread_cond_wait:
 	movl	$LLL_PRIVATE, %eax
 	movl	$LLL_SHARED, %esi
 	cmovne	%eax, %esi
+	/* The call preserves %rdx.  */
 	callq	__lll_unlock_wake
+#if cond_lock != 0
+	subq	$cond_lock, %rdi
+#endif
 	jmp	4b
 
 	/* Locking in loop failed.  */
@@ -349,9 +341,7 @@ versioned_symbol (libpthread, __pthread_cond_wait, pthread_cond_wait,
 __condvar_cleanup1:
 	/* Stack frame:
 
-	   rsp + 48
-		    +--------------------------+
-	   rsp + 40 | %r12                     |
+	   rsp + 40
 		    +--------------------------+
 	   rsp + 32 | %r13                     |
 		    +--------------------------+
@@ -410,7 +400,7 @@ __condvar_cleanup1:
 3:	subl	$(1 << nwaiters_shift), cond_nwaiters(%rdi)
 
 	/* Wake up a thread which wants to destroy the condvar object.  */
-	xorq	%r12, %r12
+	xorl	%ecx, %ecx
 	cmpq	$0xffffffffffffffff, total_seq(%rdi)
 	jne	4f
 	movl	cond_nwaiters(%rdi), %eax
@@ -433,7 +423,7 @@ __condvar_cleanup1:
 	movl	$SYS_futex, %eax
 	syscall
 	subq	$cond_nwaiters, %rdi
-	movl	$1, %r12d
+	movl	$1, %ecx
 
 4:	LOCK
 #if cond_lock == 0
@@ -449,10 +439,11 @@ __condvar_cleanup1:
 	movl	$LLL_PRIVATE, %eax
 	movl	$LLL_SHARED, %esi
 	cmovne	%eax, %esi
+	/* The call preserves %rcx.  */
 	callq	__lll_unlock_wake
 
 	/* Wake up all waiters to make sure no signal gets lost.  */
-2:	testq	%r12, %r12
+2:	testl	%ecx, %ecx
 	jnz	5f
 	addq	$cond_futex, %rdi
 	cmpq	$-1, dep_mutex-cond_futex(%rdi)
@@ -474,7 +465,6 @@ __condvar_cleanup1:
 	callq	__pthread_mutex_cond_lock
 
 	movq	24(%rsp), %rdi
-	movq	40(%rsp), %r12
 	movq	32(%rsp), %r13
 .LcallUR:
 	call	_Unwind_Resume@PLT
diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/sem_wait.S b/nptl/sysdeps/unix/sysv/linux/x86_64/sem_wait.S
index a01d745a17..2cf6ec10a4 100644
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/sem_wait.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/sem_wait.S
@@ -61,16 +61,13 @@ sem_wait:
 	xorl	%eax, %eax
 	retq
 
-1:	pushq	%r12
+	/* This push is only needed to store the sem_t pointer for the
+	   exception handler.  */
+1:	pushq	%rdi
 	cfi_adjust_cfa_offset(8)
-	cfi_rel_offset(%r12, 0)
-	pushq	%r13
-	cfi_adjust_cfa_offset(8)
-	cfi_rel_offset(%r13, 0)
-	movq	%rdi, %r13
 
 	LOCK
-	addq	$1, NWAITERS(%r13)
+	addq	$1, NWAITERS(%rdi)
 
 .LcleanupSTART:
 6:	call	__pthread_enable_asynccancel
@@ -78,7 +75,6 @@ sem_wait:
 
 	xorq	%r10, %r10
 	movl	$SYS_futex, %eax
-	movq	%r13, %rdi
 #if FUTEX_WAIT == 0
 	movl	PRIVATE(%rdi), %esi
 #else
@@ -87,22 +83,23 @@ sem_wait:
 #endif
 	xorl	%edx, %edx
 	syscall
-	movq	%rax, %r12
+	movq	%rax, %rcx
 
-	movl	%r8d, %edi
+	xchgq	%r8, %rdi
 	call	__pthread_disable_asynccancel
 .LcleanupEND:
+	movq	%r8, %rdi
 
-	testq	%r12, %r12
+	testq	%rcx, %rcx
 	je	3f
-	cmpq	$-EWOULDBLOCK, %r12
+	cmpq	$-EWOULDBLOCK, %rcx
 	jne	4f
 
 3:
 #if VALUE == 0
-	movl	(%r13), %eax
+	movl	(%rdi), %eax
 #else
-	movl	VALUE(%r13), %eax
+	movl	VALUE(%rdi), %eax
 #endif
 5:	testl	%eax, %eax
 	je	6b
@@ -110,50 +107,43 @@ sem_wait:
 	leal	-1(%rax), %edx
 	LOCK
 #if VALUE == 0
-	cmpxchgl %edx, (%r13)
+	cmpxchgl %edx, (%rdi)
 #else
-	cmpxchgl %edx, VALUE(%r13)
+	cmpxchgl %edx, VALUE(%rdi)
 #endif
 	jne	5b
 
-	LOCK
-	subq	$1, NWAITERS(%r13)
-
 	xorl	%eax, %eax
 
-9:	popq	%r13
-	cfi_adjust_cfa_offset(-8)
-	cfi_restore(%r13)
-	popq	%r12
+9:	LOCK
+	subq	$1, NWAITERS(%rdi)
+
+	leaq	8(%rsp), %rsp
 	cfi_adjust_cfa_offset(-8)
-	cfi_restore(%r12)
 
 	retq
 
-	cfi_adjust_cfa_offset(2 * 8)
-	cfi_rel_offset(%r12, 8)
-	cfi_rel_offset(%r13, 0)
-4:	negq	%r12
+	cfi_adjust_cfa_offset(8)
+4:	negq	%rcx
 #if USE___THREAD
 	movq	errno@gottpoff(%rip), %rdx
-	movl	%r12d, %fs:(%rdx)
+	movl	%ecx, %fs:(%rdx)
 #else
+# error "not supported.  %rcx and %rdi must be preserved"
 	callq	__errno_location@plt
-	movl	%r12d, (%rax)
+	movl	%ecx, (%rax)
 #endif
 	orl	$-1, %eax
 
-	LOCK
-	subq	$1, NWAITERS(%r13)
-
 	jmp 9b
 	.size	sem_wait,.-sem_wait
 
 
 	.type	sem_wait_cleanup,@function
 sem_wait_cleanup:
+	movq	(%rsp), %rdi
 	LOCK
-	subq	$1, NWAITERS(%r13)
+	subq	$1, NWAITERS(%rdi)
 	movq	%rax, %rdi
 .LcallUR:
 	call	_Unwind_Resume@PLT