diff options
Diffstat (limited to 'REORG.TODO/sysdeps/arm/memmove.S')
-rw-r--r-- | REORG.TODO/sysdeps/arm/memmove.S | 336 |
1 files changed, 336 insertions, 0 deletions
diff --git a/REORG.TODO/sysdeps/arm/memmove.S b/REORG.TODO/sysdeps/arm/memmove.S new file mode 100644 index 0000000000..b18aa329d3 --- /dev/null +++ b/REORG.TODO/sysdeps/arm/memmove.S @@ -0,0 +1,336 @@ +/* Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + Contributed by MontaVista Software, Inc. (written by Nicolas Pitre) + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Thumb requires excessive IT insns here. */ +#define NO_THUMB +#include <sysdep.h> +#include <arm-features.h> + +/* + * Data preload for architectures that support it (ARM V5TE and above) + */ +#if (!defined (__ARM_ARCH_2__) && !defined (__ARM_ARCH_3__) \ + && !defined (__ARM_ARCH_3M__) && !defined (__ARM_ARCH_4__) \ + && !defined (__ARM_ARCH_4T__) && !defined (__ARM_ARCH_5__) \ + && !defined (__ARM_ARCH_5T__)) +#define PLD(code...) code +#else +#define PLD(code...) +#endif + +/* + * This can be used to enable code to cacheline align the source pointer. + * Experiments on tested architectures (StrongARM and XScale) didn't show + * this a worthwhile thing to do. That might be different in the future. + */ +//#define CALGN(code...) code +#define CALGN(code...) + +/* + * Endian independent macros for shifting bytes within registers. + */ +#ifndef __ARMEB__ +#define PULL lsr +#define PUSH lsl +#else +#define PULL lsl +#define PUSH lsr +#endif + + .text + .syntax unified + +/* + * Prototype: void *memmove(void *dest, const void *src, size_t n); + * + * Note: + * + * If the memory regions don't overlap, we simply branch to memcpy which is + * normally a bit faster. Otherwise the copy is done going downwards. + */ + +ENTRY(memmove) + + subs ip, r0, r1 + cmphi r2, ip +#if !IS_IN (libc) + bls memcpy +#else + bls HIDDEN_JUMPTARGET(memcpy) +#endif + + push {r0, r4, lr} + cfi_adjust_cfa_offset (12) + cfi_rel_offset (r4, 4) + cfi_rel_offset (lr, 8) + + cfi_remember_state + + add r1, r1, r2 + add r0, r0, r2 + subs r2, r2, #4 + blt 8f + ands ip, r0, #3 + PLD( pld [r1, #-4] ) + bne 9f + ands ip, r1, #3 + bne 10f + +1: subs r2, r2, #(28) + push {r5 - r8} + cfi_adjust_cfa_offset (16) + cfi_rel_offset (r5, 0) + cfi_rel_offset (r6, 4) + cfi_rel_offset (r7, 8) + cfi_rel_offset (r8, 12) + blt 5f + + CALGN( ands ip, r1, #31 ) + CALGN( sbcsne r4, ip, r2 ) @ C is always set here + CALGN( bcs 2f ) + CALGN( adr r4, 6f ) + CALGN( subs r2, r2, ip ) @ C is set here +#ifndef ARM_ALWAYS_BX + CALGN( add pc, r4, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)) +#else + CALGN( add r4, r4, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)) + CALGN( bx r4 ) +#endif + + PLD( pld [r1, #-4] ) +2: PLD( subs r2, r2, #96 ) + PLD( pld [r1, #-32] ) + PLD( blt 4f ) + PLD( pld [r1, #-64] ) + PLD( pld [r1, #-96] ) + +3: PLD( pld [r1, #-128] ) +4: ldmdb r1!, {r3, r4, r5, r6, r7, r8, ip, lr} + subs r2, r2, #32 + stmdb r0!, {r3, r4, r5, r6, r7, r8, ip, lr} + bge 3b + PLD( cmn r2, #96 ) + PLD( bge 4b ) + +5: ands ip, r2, #28 + rsb ip, ip, #32 +#ifndef ARM_ALWAYS_BX + /* C is always clear here. */ + addne pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2) + b 7f +#else + beq 7f + push {r10} + cfi_adjust_cfa_offset (4) + cfi_rel_offset (r10, 0) +0: add r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2) + /* If alignment is not perfect, then there will be some + padding (nop) instructions between this BX and label 6. + The computation above assumed that two instructions + later is exactly the right spot. */ + add r10, #(6f - (0b + PC_OFS)) + bx r10 +#endif + .p2align ARM_BX_ALIGN_LOG2 +6: nop + .p2align ARM_BX_ALIGN_LOG2 + ldr r3, [r1, #-4]! + .p2align ARM_BX_ALIGN_LOG2 + ldr r4, [r1, #-4]! + .p2align ARM_BX_ALIGN_LOG2 + ldr r5, [r1, #-4]! + .p2align ARM_BX_ALIGN_LOG2 + ldr r6, [r1, #-4]! + .p2align ARM_BX_ALIGN_LOG2 + ldr r7, [r1, #-4]! + .p2align ARM_BX_ALIGN_LOG2 + ldr r8, [r1, #-4]! + .p2align ARM_BX_ALIGN_LOG2 + ldr lr, [r1, #-4]! + +#ifndef ARM_ALWAYS_BX + add pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2) + nop +#else +0: add r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2) + /* If alignment is not perfect, then there will be some + padding (nop) instructions between this BX and label 66. + The computation above assumed that two instructions + later is exactly the right spot. */ + add r10, #(66f - (0b + PC_OFS)) + bx r10 +#endif + .p2align ARM_BX_ALIGN_LOG2 +66: nop + .p2align ARM_BX_ALIGN_LOG2 + str r3, [r0, #-4]! + .p2align ARM_BX_ALIGN_LOG2 + str r4, [r0, #-4]! + .p2align ARM_BX_ALIGN_LOG2 + str r5, [r0, #-4]! + .p2align ARM_BX_ALIGN_LOG2 + str r6, [r0, #-4]! + .p2align ARM_BX_ALIGN_LOG2 + str r7, [r0, #-4]! + .p2align ARM_BX_ALIGN_LOG2 + str r8, [r0, #-4]! + .p2align ARM_BX_ALIGN_LOG2 + str lr, [r0, #-4]! + +#ifdef ARM_ALWAYS_BX + pop {r10} + cfi_adjust_cfa_offset (-4) + cfi_restore (r10) +#endif + + CALGN( bcs 2b ) + +7: pop {r5 - r8} + cfi_adjust_cfa_offset (-16) + cfi_restore (r5) + cfi_restore (r6) + cfi_restore (r7) + cfi_restore (r8) + +8: movs r2, r2, lsl #31 + ldrbne r3, [r1, #-1]! + ldrbcs r4, [r1, #-1]! + ldrbcs ip, [r1, #-1] + strbne r3, [r0, #-1]! + strbcs r4, [r0, #-1]! + strbcs ip, [r0, #-1] + +#if ((defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)) \ + || defined (ARM_ALWAYS_BX)) + pop {r0, r4, lr} + cfi_adjust_cfa_offset (-12) + cfi_restore (r4) + cfi_restore (lr) + bx lr +#else + pop {r0, r4, pc} +#endif + + cfi_restore_state + +9: cmp ip, #2 + ldrbgt r3, [r1, #-1]! + ldrbge r4, [r1, #-1]! + ldrb lr, [r1, #-1]! + strbgt r3, [r0, #-1]! + strbge r4, [r0, #-1]! + subs r2, r2, ip + strb lr, [r0, #-1]! + blt 8b + ands ip, r1, #3 + beq 1b + +10: bic r1, r1, #3 + cmp ip, #2 + ldr r3, [r1, #0] + beq 17f + blt 18f + + + .macro backward_copy_shift push pull + + subs r2, r2, #28 + blt 14f + + CALGN( ands ip, r1, #31 ) + CALGN( rsb ip, ip, #32 ) + CALGN( sbcsne r4, ip, r2 ) @ C is always set here + CALGN( subcc r2, r2, ip ) + CALGN( bcc 15f ) + +11: push {r5 - r8, r10} + cfi_adjust_cfa_offset (20) + cfi_rel_offset (r5, 0) + cfi_rel_offset (r6, 4) + cfi_rel_offset (r7, 8) + cfi_rel_offset (r8, 12) + cfi_rel_offset (r10, 16) + + PLD( pld [r1, #-4] ) + PLD( subs r2, r2, #96 ) + PLD( pld [r1, #-32] ) + PLD( blt 13f ) + PLD( pld [r1, #-64] ) + PLD( pld [r1, #-96] ) + +12: PLD( pld [r1, #-128] ) +13: ldmdb r1!, {r7, r8, r10, ip} + mov lr, r3, PUSH #\push + subs r2, r2, #32 + ldmdb r1!, {r3, r4, r5, r6} + orr lr, lr, ip, PULL #\pull + mov ip, ip, PUSH #\push + orr ip, ip, r10, PULL #\pull + mov r10, r10, PUSH #\push + orr r10, r10, r8, PULL #\pull + mov r8, r8, PUSH #\push + orr r8, r8, r7, PULL #\pull + mov r7, r7, PUSH #\push + orr r7, r7, r6, PULL #\pull + mov r6, r6, PUSH #\push + orr r6, r6, r5, PULL #\pull + mov r5, r5, PUSH #\push + orr r5, r5, r4, PULL #\pull + mov r4, r4, PUSH #\push + orr r4, r4, r3, PULL #\pull + stmdb r0!, {r4 - r8, r10, ip, lr} + bge 12b + PLD( cmn r2, #96 ) + PLD( bge 13b ) + + pop {r5 - r8, r10} + cfi_adjust_cfa_offset (-20) + cfi_restore (r5) + cfi_restore (r6) + cfi_restore (r7) + cfi_restore (r8) + cfi_restore (r10) + +14: ands ip, r2, #28 + beq 16f + +15: mov lr, r3, PUSH #\push + ldr r3, [r1, #-4]! + subs ip, ip, #4 + orr lr, lr, r3, PULL #\pull + str lr, [r0, #-4]! + bgt 15b + CALGN( cmp r2, #0 ) + CALGN( bge 11b ) + +16: add r1, r1, #(\pull / 8) + b 8b + + .endm + + + backward_copy_shift push=8 pull=24 + +17: backward_copy_shift push=16 pull=16 + +18: backward_copy_shift push=24 pull=8 + + +END(memmove) +libc_hidden_builtin_def (memmove) |