about summary refs log tree commit diff
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2009-08-08 17:48:09 -0700
committerUlrich Drepper <drepper@redhat.com>2009-08-08 17:48:09 -0700
commitefa0569d2bfdbb7367fce42b1c99821b85d2d3ba (patch)
tree73e7beba2f7ee1b08281621bc288ac47c3e55934
parent5d368296eafdf38a81228d118e772134734d6bb3 (diff)
downloadglibc-efa0569d2bfdbb7367fce42b1c99821b85d2d3ba.tar.gz
glibc-efa0569d2bfdbb7367fce42b1c99821b85d2d3ba.tar.xz
glibc-efa0569d2bfdbb7367fce42b1c99821b85d2d3ba.zip
Optimize x86-64 version of sem_timedwait.
-rw-r--r--nptl/ChangeLog3
-rw-r--r--nptl/sysdeps/unix/sysv/linux/x86_64/sem_timedwait.S210
2 files changed, 148 insertions, 65 deletions
diff --git a/nptl/ChangeLog b/nptl/ChangeLog
index 48fcc0fe09..5be464e2f2 100644
--- a/nptl/ChangeLog
+++ b/nptl/ChangeLog
@@ -1,5 +1,8 @@
 2009-08-08  Ulrich Drepper  <drepper@redhat.com>
 
+	* sysdeps/unix/sysv/linux/x86_64/sem_timedwait.S (sem_timedwait):
+	Optimize code path used when FUTEX_CLOCK_REALTIME is supported.
+
 	* sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S
 	(__pthread_cond_wait): Optimize by avoiding use of callee-safe
 	register.
diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/sem_timedwait.S b/nptl/sysdeps/unix/sysv/linux/x86_64/sem_timedwait.S
index 95762834d3..0291beb169 100644
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/sem_timedwait.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/sem_timedwait.S
@@ -65,34 +65,9 @@ sem_timedwait:
 	retq
 
 	/* Check whether the timeout value is valid.  */
-1:	pushq	%r12
-	cfi_adjust_cfa_offset(8)
-	cfi_rel_offset(%r12, 0)
-	pushq	%r13
-	cfi_adjust_cfa_offset(8)
-	cfi_rel_offset(%r13, 0)
-	pushq	%r14
-	cfi_adjust_cfa_offset(8)
-	cfi_rel_offset(%r14, 0)
-#ifdef __ASSUME_FUTEX_CLOCK_REALTIME
-# define STACKFRAME 8
-#else
-# define STACKFRAME 24
-#endif
-	subq	$STACKFRAME, %rsp
-	cfi_adjust_cfa_offset(STACKFRAME)
-
-	movq	%rdi, %r12
-	movq	%rsi, %r13
-
-	/* Check for invalid nanosecond field.  */
-	cmpq	$1000000000, 8(%r13)
-	movl	$EINVAL, %r14d
+1:	cmpq	$1000000000, 8(%rsi)
 	jae	6f
 
-	LOCK
-	addq	$1, NWAITERS(%r12)
-
 #ifndef __ASSUME_FUTEX_CLOCK_REALTIME
 #  ifdef PIC
 	cmpl	$0, __have_futex_clock_realtime(%rip)
@@ -102,15 +77,22 @@ sem_timedwait:
 	je	.Lreltmo
 #endif
 
+	/* This push is only needed to store the sem_t pointer for the
+	   exception handler.  */
+	pushq	%rdi
+	cfi_adjust_cfa_offset(8)
+
+	movq	%rsi, %r10
+
+	LOCK
+	addq	$1, NWAITERS(%rdi)
+
 .LcleanupSTART:
 13:	call	__pthread_enable_asynccancel
-	movl	%eax, (%rsp)
+	movl	%eax, %r8d
 
-	movq	%r13, %r10
-#if VALUE == 0
-	movq	%r12, %rdi
-#else
-	leaq	VALUE(%r12), %rdi
+#if VALUE != 0
+	leaq	VALUE(%rdi), %rdi
 #endif
 	movl	$0xffffffff, %r9d
 	movl	$FUTEX_WAIT_BITSET|FUTEX_CLOCK_REALTIME, %esi
@@ -118,22 +100,26 @@ sem_timedwait:
 	movl	$SYS_futex, %eax
 	xorl	%edx, %edx
 	syscall
-	movq	%rax, %r14
+	movq	%rax, %r9
+#if VALUE != 0
+	leaq	-VALUE(%rdi), %rdi
+#endif
 
-	movl	(%rsp), %edi
+	xchgq	%r8, %rdi
 	call	__pthread_disable_asynccancel
 .LcleanupEND:
+	movq	%r8, %rdi
 
-	testq	%r14, %r14
+	testq	%r9, %r9
 	je	11f
-	cmpq	$-EWOULDBLOCK, %r14
+	cmpq	$-EWOULDBLOCK, %r9
 	jne	3f
 
 11:
 #if VALUE == 0
-	movl	(%r12), %eax
+	movl	(%rdi), %eax
 #else
-	movl	VALUE(%r12), %eax
+	movl	VALUE(%rdi), %eax
 #endif
 14:	testl	%eax, %eax
 	je	13b
@@ -141,49 +127,74 @@ sem_timedwait:
 	leaq	-1(%rax), %rcx
 	LOCK
 #if VALUE == 0
-	cmpxchgl %ecx, (%r12)
+	cmpxchgl %ecx, (%rdi)
 #else
-	cmpxchgl %ecx, VALUE(%r12)
+	cmpxchgl %ecx, VALUE(%rdi)
 #endif
 	jne	14b
 
-10:	xorl	%eax, %eax
+	xorl	%eax, %eax
 
 15:	LOCK
-	subq	$1, NWAITERS(%r12)
+	subq	$1, NWAITERS(%rdi)
 
-	addq	$STACKFRAME, %rsp
-	cfi_adjust_cfa_offset(-STACKFRAME)
-	popq	%r14
-	cfi_adjust_cfa_offset(-8)
-	cfi_restore(%r14)
-	popq	%r13
+	leaq	8(%rsp), %rsp
 	cfi_adjust_cfa_offset(-8)
-	cfi_restore(%r13)
-	popq	%r12
-	cfi_adjust_cfa_offset(-8)
-	cfi_restore(%r12)
 	retq
 
-	cfi_adjust_cfa_offset(STACKFRAME + 3 * 8)
-	cfi_rel_offset(%r12, STACKFRAME + 2 * 8)
-	cfi_rel_offset(%r13, STACKFRAME + 1 * 8)
-	cfi_rel_offset(%r14, STACKFRAME)
-3:	negq	%r14
-6:
+	cfi_adjust_cfa_offset(8)
+3:	negq	%r9
 #if USE___THREAD
 	movq	errno@gottpoff(%rip), %rdx
-	movl	%r14d, %fs:(%rdx)
+	movl	%r9d, %fs:(%rdx)
 #else
 	callq	__errno_location@plt
-	movl	%r14d, (%rax)
+	movl	%r9d, (%rax)
 #endif
 
 	orl	$-1, %eax
 	jmp	15b
 
+	cfi_adjust_cfa_offset(-8)
+6:
+#if USE___THREAD
+	movq	errno@gottpoff(%rip), %rdx
+	movl	$EINVAL, %fs:(%rdx)
+#else
+	callq	__errno_location@plt
+	movl	$EINVAL, (%rax)
+#endif
+
+	orl	$-1, %eax
+
+	retq
+
 #ifndef __ASSUME_FUTEX_CLOCK_REALTIME
 .Lreltmo:
+	pushq	%r12
+	cfi_adjust_cfa_offset(8)
+	cfi_rel_offset(%r12, 0)
+	pushq	%r13
+	cfi_adjust_cfa_offset(8)
+	cfi_rel_offset(%r13, 0)
+	pushq	%r14
+	cfi_adjust_cfa_offset(8)
+	cfi_rel_offset(%r14, 0)
+
+#ifdef __ASSUME_FUTEX_CLOCK_REALTIME
+# define STACKFRAME 8
+#else
+# define STACKFRAME 24
+#endif
+	subq	$STACKFRAME, %rsp
+	cfi_adjust_cfa_offset(STACKFRAME)
+
+	movq	%rdi, %r12
+	movq	%rsi, %r13
+
+	LOCK
+	addq	$1, NWAITERS(%r12)
+
 7:	xorl	%esi, %esi
 	movq	%rsp, %rdi
 	movq	$VSYSCALL_ADDR_vgettimeofday, %rax
@@ -202,7 +213,7 @@ sem_timedwait:
 	decq	%rdi
 5:	testq	%rdi, %rdi
 	movl	$ETIMEDOUT, %r14d
-	js	6b		/* Time is already up.  */
+	js	36f		/* Time is already up.  */
 
 	movq	%rdi, (%rsp)	/* Store relative timeout.  */
 	movq	%rsi, 8(%rsp)
@@ -235,7 +246,7 @@ sem_timedwait:
 	testq	%r14, %r14
 	je	9f
 	cmpq	$-EWOULDBLOCK, %r14
-	jne	3b
+	jne	33f
 
 9:
 # if VALUE == 0
@@ -254,15 +265,54 @@ sem_timedwait:
 	cmpxchgl %ecx, VALUE(%r12)
 # endif
 	jne	8b
-	jmp	10b
+
+	xorl	%eax, %eax
+
+45:	LOCK
+	subq	$1, NWAITERS(%r12)
+
+	addq	$STACKFRAME, %rsp
+	cfi_adjust_cfa_offset(-STACKFRAME)
+	popq	%r14
+	cfi_adjust_cfa_offset(-8)
+	cfi_restore(%r14)
+	popq	%r13
+	cfi_adjust_cfa_offset(-8)
+	cfi_restore(%r13)
+	popq	%r12
+	cfi_adjust_cfa_offset(-8)
+	cfi_restore(%r12)
+	retq
+
+	cfi_adjust_cfa_offset(STACKFRAME + 3 * 8)
+	cfi_rel_offset(%r12, STACKFRAME + 2 * 8)
+	cfi_rel_offset(%r13, STACKFRAME + 1 * 8)
+	cfi_rel_offset(%r14, STACKFRAME)
+33:	negq	%r14
+36:
+#if USE___THREAD
+	movq	errno@gottpoff(%rip), %rdx
+	movl	%r14d, %fs:(%rdx)
+#else
+	callq	__errno_location@plt
+	movl	%r14d, (%rax)
 #endif
+
+	orl	$-1, %eax
+	jmp	45b
+#endif
+	cfi_endproc
 	.size	sem_timedwait,.-sem_timedwait
 
 
 	.type	sem_timedwait_cleanup,@function
 sem_timedwait_cleanup:
+	cfi_startproc
+	cfi_adjust_cfa_offset(8)
+
+	movq	(%rsp), %rdi
 	LOCK
-	subq	$1, NWAITERS(%r12)
+	subq	$1, NWAITERS(%rdi)
 	movq	%rax, %rdi
 .LcallUR:
 	call	_Unwind_Resume@PLT
@@ -272,6 +322,30 @@ sem_timedwait_cleanup:
 	.size	sem_timedwait_cleanup,.-sem_timedwait_cleanup
 
 
+#ifndef __ASSUME_FUTEX_CLOCK_REALTIME
+	.type	sem_timedwait_cleanup2,@function
+sem_timedwait_cleanup2:
+	cfi_startproc
+	cfi_adjust_cfa_offset(STACKFRAME + 3 * 8)
+	cfi_rel_offset(%r12, STACKFRAME + 2 * 8)
+	cfi_rel_offset(%r13, STACKFRAME + 1 * 8)
+	cfi_rel_offset(%r14, STACKFRAME)
+
+	LOCK
+	subq	$1, NWAITERS(%r12)
+	movq	%rax, %rdi
+	movq	STACKFRAME(%rsp), %r14
+	movq	STACKFRAME+8(%rsp), %r13
+	movq	STACKFRAME+16(%rsp), %r12
+.LcallUR2:
+	call	_Unwind_Resume@PLT
+	hlt
+.LENDCODE2:
+	cfi_endproc
+	.size	sem_timedwait_cleanup2,.-sem_timedwait_cleanup2
+#endif
+
+
 	.section .gcc_except_table,"a",@progbits
 .LexceptSTART:
 	.byte	DW_EH_PE_omit			# @LPStart format
@@ -286,13 +360,19 @@ sem_timedwait_cleanup:
 #ifndef __ASSUME_FUTEX_CLOCK_REALTIME
 	.uleb128 .LcleanupSTART2-.LSTARTCODE
 	.uleb128 .LcleanupEND2-.LcleanupSTART2
-	.uleb128 sem_timedwait_cleanup-.LSTARTCODE
+	.uleb128 sem_timedwait_cleanup2-.LSTARTCODE
 	.uleb128  0
 #endif
 	.uleb128 .LcallUR-.LSTARTCODE
 	.uleb128 .LENDCODE-.LcallUR
 	.uleb128 0
 	.uleb128  0
+#ifndef __ASSUME_FUTEX_CLOCK_REALTIME
+	.uleb128 .LcallUR2-.LSTARTCODE
+	.uleb128 .LENDCODE2-.LcallUR2
+	.uleb128 0
+	.uleb128  0
+#endif
 .Lcstend: