diff options
-rw-r--r-- | ChangeLog | 9 | ||||
-rw-r--r-- | sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S | 54 |
2 files changed, 39 insertions, 24 deletions
diff --git a/ChangeLog b/ChangeLog index d22231b95d..e93b7bf3b1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,12 @@ +2016-04-03 H.J. Lu <hongjiu.lu@intel.com> + + * sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S: + (__mempcpy_erms, __memmove_erms): Moved before __mempcpy_chk + with unaligned_erms. + (__memmove_erms): Skip if source == destination. + (__memmove_unaligned_erms): Don't check source == destination + first. + 2016-04-01 H.J. Lu <hongjiu.lu@intel.com> * sysdeps/x86/cpu-features.c (init_cpu_features): Don't set diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S index cf645dd7ff..66779a3bec 100644 --- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S +++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S @@ -95,46 +95,30 @@ L(start): ret END (MEMMOVE_SYMBOL (__memmove, unaligned_2)) -# ifdef SHARED -ENTRY (MEMMOVE_SYMBOL (__mempcpy_chk, unaligned_erms)) - cmpq %rdx, %rcx - jb HIDDEN_JUMPTARGET (__chk_fail) -END (MEMMOVE_SYMBOL (__mempcpy_chk, unaligned_erms)) -# endif - -ENTRY (MEMMOVE_SYMBOL (__mempcpy, unaligned_erms)) - movq %rdi, %rax - addq %rdx, %rax - jmp L(start_erms) -END (MEMMOVE_SYMBOL (__mempcpy, unaligned_erms)) - -# ifdef SHARED -ENTRY (MEMMOVE_SYMBOL (__memmove_chk, unaligned_erms)) - cmpq %rdx, %rcx - jb HIDDEN_JUMPTARGET (__chk_fail) -END (MEMMOVE_SYMBOL (__memmove_chk, unaligned_erms)) -# endif - # if VEC_SIZE == 16 /* Only used to measure performance of REP MOVSB. */ # ifdef SHARED ENTRY (__mempcpy_erms) movq %rdi, %rax addq %rdx, %rax - jmp L(movsb) + jmp L(start_movsb) END (__mempcpy_erms) # endif ENTRY (__memmove_erms) movq %rdi, %rax +L(start_movsb): movq %rdx, %rcx cmpq %rsi, %rdi - jbe 1f + jb 1f + /* Source == destination is less common. */ + je 2f leaq (%rsi,%rcx), %rdx cmpq %rdx, %rdi jb L(movsb_backward) 1: rep movsb +2: ret L(movsb_backward): leaq -1(%rdi,%rcx), %rdi @@ -147,6 +131,26 @@ END (__memmove_erms) strong_alias (__memmove_erms, __memcpy_erms) # endif +# ifdef SHARED +ENTRY (MEMMOVE_SYMBOL (__mempcpy_chk, unaligned_erms)) + cmpq %rdx, %rcx + jb HIDDEN_JUMPTARGET (__chk_fail) +END (MEMMOVE_SYMBOL (__mempcpy_chk, unaligned_erms)) +# endif + +ENTRY (MEMMOVE_SYMBOL (__mempcpy, unaligned_erms)) + movq %rdi, %rax + addq %rdx, %rax + jmp L(start_erms) +END (MEMMOVE_SYMBOL (__mempcpy, unaligned_erms)) + +# ifdef SHARED +ENTRY (MEMMOVE_SYMBOL (__memmove_chk, unaligned_erms)) + cmpq %rdx, %rcx + jb HIDDEN_JUMPTARGET (__chk_fail) +END (MEMMOVE_SYMBOL (__memmove_chk, unaligned_erms)) +# endif + ENTRY (MEMMOVE_SYMBOL (__memmove, unaligned_erms)) movq %rdi, %rax L(start_erms): @@ -166,8 +170,9 @@ L(return): L(movsb): cmpq %rsi, %rdi - je L(nop) jb 1f + /* Source == destination is less common. */ + je L(nop) leaq (%rsi,%rdx), %r9 cmpq %r9, %rdi /* Avoid slow backward REP MOVSB. */ @@ -191,8 +196,9 @@ L(movsb_more_2x_vec): L(more_2x_vec): /* More than 2 * VEC. */ cmpq %rsi, %rdi - je L(nop) jb L(copy_forward) + /* Source == destination is less common. */ + je L(nop) leaq (%rsi,%rdx), %rcx cmpq %rcx, %rdi jb L(more_2x_vec_overlap) |