about summary refs log tree commit diff
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2009-08-08 10:21:46 -0700
committerUlrich Drepper <drepper@redhat.com>2009-08-08 10:21:46 -0700
commit49eea97b00d367aa3eac3dfd047c259e7bd04732 (patch)
tree689337823f6e88b1c410dfcd1dbc12f6d4a5f63f
parent8e436522e108229eeb96d8921c659c5a19eaf455 (diff)
downloadglibc-49eea97b00d367aa3eac3dfd047c259e7bd04732.tar.gz
glibc-49eea97b00d367aa3eac3dfd047c259e7bd04732.tar.xz
glibc-49eea97b00d367aa3eac3dfd047c259e7bd04732.zip
Another minor optimization of x86-64 pthread_cond_wait.
-rw-r--r--nptl/ChangeLog6
-rw-r--r--nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S22
2 files changed, 13 insertions, 15 deletions
diff --git a/nptl/ChangeLog b/nptl/ChangeLog
index 0f5c231765..48fcc0fe09 100644
--- a/nptl/ChangeLog
+++ b/nptl/ChangeLog
@@ -1,3 +1,9 @@
+2009-08-08  Ulrich Drepper  <drepper@redhat.com>
+
+	* sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S
+	(__pthread_cond_wait): Optimize by avoiding use of callee-safe
+	register.
+
 2009-08-07  Ulrich Drepper  <drepper@redhat.com>
 
 	* sysdeps/unix/sysv/linux/x86_64/sem_wait.S: Little optimizations
diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S
index 45116b1ab0..f5b929ea71 100644
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S
@@ -45,9 +45,6 @@ __pthread_cond_wait:
 	cfi_lsda(DW_EH_PE_udata4, .LexceptSTART)
 #endif
 
-	pushq	%r13
-	cfi_adjust_cfa_offset(8)
-	cfi_rel_offset(%r13, 0)
 #define FRAME_SIZE 32
 	leaq	-FRAME_SIZE(%rsp), %rsp
 	cfi_adjust_cfa_offset(FRAME_SIZE)
@@ -140,7 +137,7 @@ __pthread_cond_wait:
 	movl	$SYS_futex, %eax
 	syscall
 
-	movl	$1, %r13d
+	movl	$1, %r8d
 #ifdef __ASSUME_REQUEUE_PI
 	jmp	62f
 #else
@@ -158,7 +155,7 @@ __pthread_cond_wait:
 #else
 	orl	%fs:PRIVATE_FUTEX, %esi
 #endif
-60:	xorl	%r13d, %r13d
+60:	xorl	%r8d, %r8d
 	movl	$SYS_futex, %eax
 	syscall
 
@@ -233,20 +230,18 @@ __pthread_cond_wait:
 	/* If requeue_pi is used the kernel performs the locking of the
 	   mutex. */
 11:	movq	16(%rsp), %rdi
-	testl	%r13d, %r13d
+	testl	%r8d, %r8d
 	jnz	18f
 
 	callq	__pthread_mutex_cond_lock
 
-14:	movq	FRAME_SIZE(%rsp), %r13
-	leaq	FRAME_SIZE+8(%rsp), %rsp
-	cfi_adjust_cfa_offset(-(FRAME_SIZE + 8))
+14:	leaq	FRAME_SIZE(%rsp), %rsp
+	cfi_adjust_cfa_offset(-FRAME_SIZE)
 
 	/* We return the result of the mutex_lock operation.  */
 	retq
 
-	cfi_adjust_cfa_offset(8 + FRAME_SIZE)
-	cfi_rel_offset(%r13, FRAME_SIZE)
+	cfi_adjust_cfa_offset(FRAME_SIZE)
 
 18:	callq	__pthread_mutex_cond_lock_adjust
 	xorl	%eax, %eax
@@ -341,9 +336,7 @@ versioned_symbol (libpthread, __pthread_cond_wait, pthread_cond_wait,
 __condvar_cleanup1:
 	/* Stack frame:
 
-	   rsp + 40
-		    +--------------------------+
-	   rsp + 32 | %r13                     |
+	   rsp + 32
 		    +--------------------------+
 	   rsp + 24 | unused                   |
 	            +--------------------------+
@@ -465,7 +458,6 @@ __condvar_cleanup1:
 	callq	__pthread_mutex_cond_lock
 
 	movq	24(%rsp), %rdi
-	movq	32(%rsp), %r13
 .LcallUR:
 	call	_Unwind_Resume@PLT
 	hlt