From 9083bcc5dc747eeec24c2bb8bf5b7055b3bd4dbf Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Fri, 7 Aug 2009 20:36:53 -0700 Subject: Small optimizations to cancellable x86-64 code. --- nptl/sysdeps/unix/sysv/linux/x86_64/cancellation.S | 1 + .../sysv/linux/x86_64/pthread_cond_timedwait.S | 2 - .../unix/sysv/linux/x86_64/pthread_cond_wait.S | 40 ++++++--------- nptl/sysdeps/unix/sysv/linux/x86_64/sem_wait.S | 58 +++++++++------------- 4 files changed, 40 insertions(+), 61 deletions(-) (limited to 'nptl/sysdeps/unix') diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/cancellation.S b/nptl/sysdeps/unix/sysv/linux/x86_64/cancellation.S index a51df3eacf..0d48ec6fcd 100644 --- a/nptl/sysdeps/unix/sysv/linux/x86_64/cancellation.S +++ b/nptl/sysdeps/unix/sysv/linux/x86_64/cancellation.S @@ -20,6 +20,7 @@ #include #include #include +#include "lowlevellock.h" #ifdef IS_IN_libpthread # ifdef SHARED diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S index 4913beb8af..86bdac1b1b 100644 --- a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S +++ b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S @@ -157,7 +157,6 @@ __pthread_cond_timedwait: .LcleanupSTART1: 34: callq __pthread_enable_asynccancel movl %eax, (%rsp) - movq 8(%rsp), %rdi movq %r13, %r10 movl $FUTEX_WAIT_BITSET, %esi @@ -511,7 +510,6 @@ __pthread_cond_timedwait: .LcleanupSTART2: 4: callq __pthread_enable_asynccancel movl %eax, (%rsp) - movq 8(%rsp), %rdi leaq 32(%rsp), %r10 cmpq $-1, dep_mutex(%rdi) diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S index a66523eab6..45116b1ab0 100644 --- a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S +++ b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S @@ -45,14 +45,11 @@ __pthread_cond_wait: cfi_lsda(DW_EH_PE_udata4, .LexceptSTART) #endif - pushq %r12 - cfi_adjust_cfa_offset(8) - cfi_rel_offset(%r12, 0) pushq %r13 cfi_adjust_cfa_offset(8) cfi_rel_offset(%r13, 0) #define FRAME_SIZE 32 - subq $FRAME_SIZE, %rsp + leaq -FRAME_SIZE(%rsp), %rsp cfi_adjust_cfa_offset(FRAME_SIZE) /* Stack frame: @@ -112,7 +109,7 @@ __pthread_cond_wait: movl %edx, 4(%rsp) /* Unlock. */ -8: movl cond_futex(%rdi), %r12d +8: movl cond_futex(%rdi), %edx LOCK #if cond_lock == 0 decl (%rdi) @@ -125,9 +122,7 @@ __pthread_cond_wait: 4: callq __pthread_enable_asynccancel movl %eax, (%rsp) - movq 8(%rsp), %rdi xorq %r10, %r10 - movq %r12, %rdx cmpq $-1, dep_mutex(%rdi) leaq cond_futex(%rdi), %rdi movl $FUTEX_WAIT, %esi @@ -243,21 +238,14 @@ __pthread_cond_wait: callq __pthread_mutex_cond_lock -14: addq $FRAME_SIZE, %rsp - cfi_adjust_cfa_offset(-FRAME_SIZE) - - popq %r13 - cfi_adjust_cfa_offset(-8) - cfi_restore(%r13) - popq %r12 - cfi_adjust_cfa_offset(-8) - cfi_restore(%r12) +14: movq FRAME_SIZE(%rsp), %r13 + leaq FRAME_SIZE+8(%rsp), %rsp + cfi_adjust_cfa_offset(-(FRAME_SIZE + 8)) /* We return the result of the mutex_lock operation. */ retq - cfi_adjust_cfa_offset(16 + FRAME_SIZE) - cfi_rel_offset(%r12, FRAME_SIZE + 8) + cfi_adjust_cfa_offset(8 + FRAME_SIZE) cfi_rel_offset(%r13, FRAME_SIZE) 18: callq __pthread_mutex_cond_lock_adjust @@ -285,7 +273,11 @@ __pthread_cond_wait: movl $LLL_PRIVATE, %eax movl $LLL_SHARED, %esi cmovne %eax, %esi + /* The call preserves %rdx. */ callq __lll_unlock_wake +#if cond_lock != 0 + subq $cond_lock, %rdi +#endif jmp 4b /* Locking in loop failed. */ @@ -349,9 +341,7 @@ versioned_symbol (libpthread, __pthread_cond_wait, pthread_cond_wait, __condvar_cleanup1: /* Stack frame: - rsp + 48 - +--------------------------+ - rsp + 40 | %r12 | + rsp + 40 +--------------------------+ rsp + 32 | %r13 | +--------------------------+ @@ -410,7 +400,7 @@ __condvar_cleanup1: 3: subl $(1 << nwaiters_shift), cond_nwaiters(%rdi) /* Wake up a thread which wants to destroy the condvar object. */ - xorq %r12, %r12 + xorl %ecx, %ecx cmpq $0xffffffffffffffff, total_seq(%rdi) jne 4f movl cond_nwaiters(%rdi), %eax @@ -433,7 +423,7 @@ __condvar_cleanup1: movl $SYS_futex, %eax syscall subq $cond_nwaiters, %rdi - movl $1, %r12d + movl $1, %ecx 4: LOCK #if cond_lock == 0 @@ -449,10 +439,11 @@ __condvar_cleanup1: movl $LLL_PRIVATE, %eax movl $LLL_SHARED, %esi cmovne %eax, %esi + /* The call preserves %rcx. */ callq __lll_unlock_wake /* Wake up all waiters to make sure no signal gets lost. */ -2: testq %r12, %r12 +2: testl %ecx, %ecx jnz 5f addq $cond_futex, %rdi cmpq $-1, dep_mutex-cond_futex(%rdi) @@ -474,7 +465,6 @@ __condvar_cleanup1: callq __pthread_mutex_cond_lock movq 24(%rsp), %rdi - movq 40(%rsp), %r12 movq 32(%rsp), %r13 .LcallUR: call _Unwind_Resume@PLT diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/sem_wait.S b/nptl/sysdeps/unix/sysv/linux/x86_64/sem_wait.S index a01d745a17..2cf6ec10a4 100644 --- a/nptl/sysdeps/unix/sysv/linux/x86_64/sem_wait.S +++ b/nptl/sysdeps/unix/sysv/linux/x86_64/sem_wait.S @@ -61,16 +61,13 @@ sem_wait: xorl %eax, %eax retq -1: pushq %r12 + /* This push is only needed to store the sem_t pointer for the + exception handler. */ +1: pushq %rdi cfi_adjust_cfa_offset(8) - cfi_rel_offset(%r12, 0) - pushq %r13 - cfi_adjust_cfa_offset(8) - cfi_rel_offset(%r13, 0) - movq %rdi, %r13 LOCK - addq $1, NWAITERS(%r13) + addq $1, NWAITERS(%rdi) .LcleanupSTART: 6: call __pthread_enable_asynccancel @@ -78,7 +75,6 @@ sem_wait: xorq %r10, %r10 movl $SYS_futex, %eax - movq %r13, %rdi #if FUTEX_WAIT == 0 movl PRIVATE(%rdi), %esi #else @@ -87,22 +83,23 @@ sem_wait: #endif xorl %edx, %edx syscall - movq %rax, %r12 + movq %rax, %rcx - movl %r8d, %edi + xchgq %r8, %rdi call __pthread_disable_asynccancel .LcleanupEND: + movq %r8, %rdi - testq %r12, %r12 + testq %rcx, %rcx je 3f - cmpq $-EWOULDBLOCK, %r12 + cmpq $-EWOULDBLOCK, %rcx jne 4f 3: #if VALUE == 0 - movl (%r13), %eax + movl (%rdi), %eax #else - movl VALUE(%r13), %eax + movl VALUE(%rdi), %eax #endif 5: testl %eax, %eax je 6b @@ -110,50 +107,43 @@ sem_wait: leal -1(%rax), %edx LOCK #if VALUE == 0 - cmpxchgl %edx, (%r13) + cmpxchgl %edx, (%rdi) #else - cmpxchgl %edx, VALUE(%r13) + cmpxchgl %edx, VALUE(%rdi) #endif jne 5b - LOCK - subq $1, NWAITERS(%r13) - xorl %eax, %eax -9: popq %r13 - cfi_adjust_cfa_offset(-8) - cfi_restore(%r13) - popq %r12 +9: LOCK + subq $1, NWAITERS(%rdi) + + leaq 8(%rsp), %rsp cfi_adjust_cfa_offset(-8) - cfi_restore(%r12) retq - cfi_adjust_cfa_offset(2 * 8) - cfi_rel_offset(%r12, 8) - cfi_rel_offset(%r13, 0) -4: negq %r12 + cfi_adjust_cfa_offset(8) +4: negq %rcx #if USE___THREAD movq errno@gottpoff(%rip), %rdx - movl %r12d, %fs:(%rdx) + movl %ecx, %fs:(%rdx) #else +# error "not supported. %rcx and %rdi must be preserved" callq __errno_location@plt - movl %r12d, (%rax) + movl %ecx, (%rax) #endif orl $-1, %eax - LOCK - subq $1, NWAITERS(%r13) - jmp 9b .size sem_wait,.-sem_wait .type sem_wait_cleanup,@function sem_wait_cleanup: + movq (%rsp), %rdi LOCK - subq $1, NWAITERS(%r13) + subq $1, NWAITERS(%rdi) movq %rax, %rdi .LcallUR: call _Unwind_Resume@PLT -- cgit 1.4.1