about summary refs log tree commit diff
path: root/REORG.TODO/sysdeps/arm/memmove.S
diff options
context:
space:
mode:
authorZack Weinberg <zackw@panix.com>2017-06-08 15:39:03 -0400
committerZack Weinberg <zackw@panix.com>2017-06-08 15:39:03 -0400
commit5046dbb4a7eba5eccfd258f92f4735c9ffc8d069 (patch)
tree4470480d904b65cf14ca524f96f79eca818c3eaf /REORG.TODO/sysdeps/arm/memmove.S
parent199fc19d3aaaf57944ef036e15904febe877fc93 (diff)
downloadglibc-zack/build-layout-experiment.tar.gz
glibc-zack/build-layout-experiment.tar.xz
glibc-zack/build-layout-experiment.zip
Prepare for radical source tree reorganization. zack/build-layout-experiment
All top-level files and directories are moved into a temporary storage
directory, REORG.TODO, except for files that will certainly still
exist in their current form at top level when we're done (COPYING,
COPYING.LIB, LICENSES, NEWS, README), all old ChangeLog files (which
are moved to the new directory OldChangeLogs, instead), and the
generated file INSTALL (which is just deleted; in the new order, there
will be no generated files checked into version control).
Diffstat (limited to 'REORG.TODO/sysdeps/arm/memmove.S')
-rw-r--r--REORG.TODO/sysdeps/arm/memmove.S336
1 files changed, 336 insertions, 0 deletions
diff --git a/REORG.TODO/sysdeps/arm/memmove.S b/REORG.TODO/sysdeps/arm/memmove.S
new file mode 100644
index 0000000000..b18aa329d3
--- /dev/null
+++ b/REORG.TODO/sysdeps/arm/memmove.S
@@ -0,0 +1,336 @@
+/* Copyright (C) 2006-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   Contributed by MontaVista Software, Inc. (written by Nicolas Pitre)
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Thumb requires excessive IT insns here.  */
+#define NO_THUMB
+#include <sysdep.h>
+#include <arm-features.h>
+
+/*
+ * Data preload for architectures that support it (ARM V5TE and above)
+ */
+#if (!defined (__ARM_ARCH_2__) && !defined (__ARM_ARCH_3__) \
+     && !defined (__ARM_ARCH_3M__) && !defined (__ARM_ARCH_4__) \
+     && !defined (__ARM_ARCH_4T__) && !defined (__ARM_ARCH_5__) \
+     && !defined (__ARM_ARCH_5T__))
+#define PLD(code...)    code
+#else
+#define PLD(code...)
+#endif
+
+/*
+ * This can be used to enable code to cacheline align the source pointer.
+ * Experiments on tested architectures (StrongARM and XScale) didn't show
+ * this a worthwhile thing to do.  That might be different in the future.
+ */
+//#define CALGN(code...)        code
+#define CALGN(code...)
+
+/*
+ * Endian independent macros for shifting bytes within registers.
+ */
+#ifndef __ARMEB__
+#define PULL            lsr
+#define PUSH            lsl
+#else
+#define PULL            lsl
+#define PUSH            lsr
+#endif
+
+		.text
+		.syntax unified
+
+/*
+ * Prototype: void *memmove(void *dest, const void *src, size_t n);
+ *
+ * Note:
+ *
+ * If the memory regions don't overlap, we simply branch to memcpy which is
+ * normally a bit faster. Otherwise the copy is done going downwards.
+ */
+
+ENTRY(memmove)
+
+		subs	ip, r0, r1
+		cmphi	r2, ip
+#if !IS_IN (libc)
+		bls	memcpy
+#else
+		bls	HIDDEN_JUMPTARGET(memcpy)
+#endif
+
+		push	{r0, r4, lr}
+		cfi_adjust_cfa_offset (12)
+		cfi_rel_offset (r4, 4)
+		cfi_rel_offset (lr, 8)
+
+		cfi_remember_state
+
+		add	r1, r1, r2
+		add	r0, r0, r2
+		subs	r2, r2, #4
+		blt	8f
+		ands	ip, r0, #3
+	PLD(	pld	[r1, #-4]		)
+		bne	9f
+		ands	ip, r1, #3
+		bne	10f
+
+1:		subs	r2, r2, #(28)
+		push	{r5 - r8}
+		cfi_adjust_cfa_offset (16)
+		cfi_rel_offset (r5, 0)
+		cfi_rel_offset (r6, 4)
+		cfi_rel_offset (r7, 8)
+		cfi_rel_offset (r8, 12)
+		blt	5f
+
+	CALGN(	ands	ip, r1, #31		)
+	CALGN(	sbcsne	r4, ip, r2		)  @ C is always set here
+	CALGN(	bcs	2f			)
+	CALGN(	adr	r4, 6f			)
+	CALGN(	subs	r2, r2, ip		)  @ C is set here
+#ifndef ARM_ALWAYS_BX
+	CALGN(	add	pc, r4, ip, lsl	#(ARM_BX_ALIGN_LOG2 - 2))
+#else
+	CALGN(	add	r4, r4, ip, lsl	#(ARM_BX_ALIGN_LOG2 - 2))
+	CALGN(	bx	r4			)
+#endif
+
+	PLD(	pld	[r1, #-4]		)
+2:	PLD(	subs	r2, r2, #96		)
+	PLD(	pld	[r1, #-32]		)
+	PLD(	blt	4f			)
+	PLD(	pld	[r1, #-64]		)
+	PLD(	pld	[r1, #-96]		)
+
+3:	PLD(	pld	[r1, #-128]		)
+4:		ldmdb	r1!, {r3, r4, r5, r6, r7, r8, ip, lr}
+		subs	r2, r2, #32
+		stmdb	r0!, {r3, r4, r5, r6, r7, r8, ip, lr}
+		bge	3b
+	PLD(	cmn	r2, #96			)
+	PLD(	bge	4b			)
+
+5:		ands	ip, r2, #28
+		rsb	ip, ip, #32
+#ifndef ARM_ALWAYS_BX
+		/* C is always clear here.  */
+		addne	pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
+		b	7f
+#else
+		beq	7f
+		push	{r10}
+		cfi_adjust_cfa_offset (4)
+		cfi_rel_offset (r10, 0)
+0:		add	r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
+		/* If alignment is not perfect, then there will be some
+		   padding (nop) instructions between this BX and label 6.
+		   The computation above assumed that two instructions
+		   later is exactly the right spot.  */
+		add	r10, #(6f - (0b + PC_OFS))
+		bx	r10
+#endif
+		.p2align ARM_BX_ALIGN_LOG2
+6:		nop
+		.p2align ARM_BX_ALIGN_LOG2
+		ldr	r3, [r1, #-4]!
+		.p2align ARM_BX_ALIGN_LOG2
+		ldr	r4, [r1, #-4]!
+		.p2align ARM_BX_ALIGN_LOG2
+		ldr	r5, [r1, #-4]!
+		.p2align ARM_BX_ALIGN_LOG2
+		ldr	r6, [r1, #-4]!
+		.p2align ARM_BX_ALIGN_LOG2
+		ldr	r7, [r1, #-4]!
+		.p2align ARM_BX_ALIGN_LOG2
+		ldr	r8, [r1, #-4]!
+		.p2align ARM_BX_ALIGN_LOG2
+		ldr	lr, [r1, #-4]!
+
+#ifndef ARM_ALWAYS_BX
+		add	pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
+		nop
+#else
+0:		add	r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
+		/* If alignment is not perfect, then there will be some
+		   padding (nop) instructions between this BX and label 66.
+		   The computation above assumed that two instructions
+		   later is exactly the right spot.  */
+		add	r10, #(66f - (0b + PC_OFS))
+		bx	r10
+#endif
+		.p2align ARM_BX_ALIGN_LOG2
+66:		nop
+		.p2align ARM_BX_ALIGN_LOG2
+		str	r3, [r0, #-4]!
+		.p2align ARM_BX_ALIGN_LOG2
+		str	r4, [r0, #-4]!
+		.p2align ARM_BX_ALIGN_LOG2
+		str	r5, [r0, #-4]!
+		.p2align ARM_BX_ALIGN_LOG2
+		str	r6, [r0, #-4]!
+		.p2align ARM_BX_ALIGN_LOG2
+		str	r7, [r0, #-4]!
+		.p2align ARM_BX_ALIGN_LOG2
+		str	r8, [r0, #-4]!
+		.p2align ARM_BX_ALIGN_LOG2
+		str	lr, [r0, #-4]!
+
+#ifdef ARM_ALWAYS_BX
+		pop	{r10}
+		cfi_adjust_cfa_offset (-4)
+		cfi_restore (r10)
+#endif
+
+	CALGN(	bcs	2b			)
+
+7:		pop	{r5 - r8}
+		cfi_adjust_cfa_offset (-16)
+		cfi_restore (r5)
+		cfi_restore (r6)
+		cfi_restore (r7)
+		cfi_restore (r8)
+
+8:		movs	r2, r2, lsl #31
+		ldrbne	r3, [r1, #-1]!
+		ldrbcs	r4, [r1, #-1]!
+		ldrbcs	ip, [r1, #-1]
+		strbne	r3, [r0, #-1]!
+		strbcs	r4, [r0, #-1]!
+		strbcs	ip, [r0, #-1]
+
+#if ((defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)) \
+     || defined (ARM_ALWAYS_BX))
+		pop	{r0, r4, lr}
+		cfi_adjust_cfa_offset (-12)
+		cfi_restore (r4)
+		cfi_restore (lr)
+		bx      lr
+#else
+		pop	{r0, r4, pc}
+#endif
+
+		cfi_restore_state
+
+9:		cmp	ip, #2
+		ldrbgt	r3, [r1, #-1]!
+		ldrbge	r4, [r1, #-1]!
+		ldrb	lr, [r1, #-1]!
+		strbgt	r3, [r0, #-1]!
+		strbge	r4, [r0, #-1]!
+		subs	r2, r2, ip
+		strb	lr, [r0, #-1]!
+		blt	8b
+		ands	ip, r1, #3
+		beq	1b
+
+10:		bic	r1, r1, #3
+		cmp	ip, #2
+		ldr	r3, [r1, #0]
+		beq	17f
+		blt	18f
+
+
+		.macro	backward_copy_shift push pull
+
+		subs	r2, r2, #28
+		blt	14f
+
+	CALGN(	ands	ip, r1, #31		)
+	CALGN(	rsb	ip, ip, #32		)
+	CALGN(	sbcsne	r4, ip, r2		)  @ C is always set here
+	CALGN(	subcc	r2, r2, ip		)
+	CALGN(	bcc	15f			)
+
+11:		push	{r5 - r8, r10}
+		cfi_adjust_cfa_offset (20)
+		cfi_rel_offset (r5, 0)
+		cfi_rel_offset (r6, 4)
+		cfi_rel_offset (r7, 8)
+		cfi_rel_offset (r8, 12)
+		cfi_rel_offset (r10, 16)
+
+	PLD(	pld	[r1, #-4]		)
+	PLD(	subs	r2, r2, #96		)
+	PLD(	pld	[r1, #-32]		)
+	PLD(	blt	13f			)
+	PLD(	pld	[r1, #-64]		)
+	PLD(	pld	[r1, #-96]		)
+
+12:	PLD(	pld	[r1, #-128]		)
+13:		ldmdb   r1!, {r7, r8, r10, ip}
+		mov     lr, r3, PUSH #\push
+		subs    r2, r2, #32
+		ldmdb   r1!, {r3, r4, r5, r6}
+		orr     lr, lr, ip, PULL #\pull
+		mov     ip, ip, PUSH #\push
+		orr     ip, ip, r10, PULL #\pull
+		mov     r10, r10, PUSH #\push
+		orr     r10, r10, r8, PULL #\pull
+		mov     r8, r8, PUSH #\push
+		orr     r8, r8, r7, PULL #\pull
+		mov     r7, r7, PUSH #\push
+		orr     r7, r7, r6, PULL #\pull
+		mov     r6, r6, PUSH #\push
+		orr     r6, r6, r5, PULL #\pull
+		mov     r5, r5, PUSH #\push
+		orr     r5, r5, r4, PULL #\pull
+		mov     r4, r4, PUSH #\push
+		orr     r4, r4, r3, PULL #\pull
+		stmdb   r0!, {r4 - r8, r10, ip, lr}
+		bge	12b
+	PLD(	cmn	r2, #96			)
+	PLD(	bge	13b			)
+
+		pop	{r5 - r8, r10}
+		cfi_adjust_cfa_offset (-20)
+		cfi_restore (r5)
+		cfi_restore (r6)
+		cfi_restore (r7)
+		cfi_restore (r8)
+		cfi_restore (r10)
+
+14:		ands	ip, r2, #28
+		beq	16f
+
+15:		mov     lr, r3, PUSH #\push
+		ldr	r3, [r1, #-4]!
+		subs	ip, ip, #4
+		orr	lr, lr, r3, PULL #\pull
+		str	lr, [r0, #-4]!
+		bgt	15b
+	CALGN(	cmp	r2, #0			)
+	CALGN(	bge	11b			)
+
+16:		add	r1, r1, #(\pull / 8)
+		b	8b
+
+		.endm
+
+
+		backward_copy_shift	push=8	pull=24
+
+17:		backward_copy_shift	push=16	pull=16
+
+18:		backward_copy_shift	push=24	pull=8
+
+
+END(memmove)
+libc_hidden_builtin_def (memmove)