about summary refs log tree commit diff
path: root/sysdeps/arm/memcpy.S
diff options
context:
space:
mode:
authorJoseph Myers <joseph@codesourcery.com>2014-02-08 01:49:39 +0000
committerJoseph Myers <joseph@codesourcery.com>2014-02-08 01:49:39 +0000
commitc6bfe5c4d756913297db03f55e42016d1c48918c (patch)
tree898d2910ef903f46b596ad54e519ed4495357766 /sysdeps/arm/memcpy.S
parent852fa2dd3aa1a687780e27300df9eb0bdc433863 (diff)
downloadglibc-c6bfe5c4d756913297db03f55e42016d1c48918c.tar.gz
glibc-c6bfe5c4d756913297db03f55e42016d1c48918c.tar.xz
glibc-c6bfe5c4d756913297db03f55e42016d1c48918c.zip
Move arm from ports to libc.
I've moved the ARM port from ports to the main sysdeps hierarchy.
Beyond the README update, the move of the files was simply

git mv ports/sysdeps/arm sysdeps/arm
git mv ports/sysdeps/unix/arm sysdeps/unix/arm
git mv ports/sysdeps/unix/sysv/linux/arm sysdeps/unix/sysv/linux/arm

and in addition to the ChangeLog entries here, I put a note at the top
of ports/ChangeLog.arm similar to that at the top of
ChangeLog.powerpc.  There is deliberately no NEWS change, as I think
it makes the most sense to put in a general note above all ports
having moved if we can achieve that for 2.20.

Tested that disassembly of installed shared libraries for arm is the
same before and after this patch, except for data (not instructions)
in ld.so (there are assertions in sysdeps/arm/dl-machine.h, and the
path by which that file is found, and so by which it appears in the
assertion message, changes as a result of the move).

	* sysdeps/arm: Move directory from ports/sysdeps/arm.
	* sysdeps/unix/arm: Move directory from ports/sysdeps/unix/arm.
	* sysdeps/unix/sysv/linux/arm: Move directory from
	ports/sysdeps/unix/sysv/linux/arm.
	* README: Update listing for arm-*-linux-gnueabi.

ports/ChangeLog.arm:
	* sysdeps/arm: Move directory to ../sysdeps/arm.
	* sysdeps/unix/arm: Move directory to ../sysdeps.arm.
	* sysdeps/unix/sysv/linux/arm: Move directory to
	../sysdeps/unix/sysv/linux/arm.
Diffstat (limited to 'sysdeps/arm/memcpy.S')
-rw-r--r--sysdeps/arm/memcpy.S344
1 files changed, 344 insertions, 0 deletions
diff --git a/sysdeps/arm/memcpy.S b/sysdeps/arm/memcpy.S
new file mode 100644
index 0000000000..3e985dad68
--- /dev/null
+++ b/sysdeps/arm/memcpy.S
@@ -0,0 +1,344 @@
+/* Copyright (C) 2006-2014 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   Contributed by MontaVista Software, Inc. (written by Nicolas Pitre)
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Thumb requires excessive IT insns here.  */
+#define NO_THUMB
+#include <sysdep.h>
+#include <arm-features.h>
+
+/*
+ * Data preload for architectures that support it (ARM V5TE and above)
+ */
+#if (!defined (__ARM_ARCH_2__) && !defined (__ARM_ARCH_3__) \
+     && !defined (__ARM_ARCH_3M__) && !defined (__ARM_ARCH_4__) \
+     && !defined (__ARM_ARCH_4T__) && !defined (__ARM_ARCH_5__) \
+     && !defined (__ARM_ARCH_5T__))
+#define PLD(code...)    code
+#else
+#define PLD(code...)
+#endif
+
+/*
+ * This can be used to enable code to cacheline align the source pointer.
+ * Experiments on tested architectures (StrongARM and XScale) didn't show
+ * this a worthwhile thing to do.  That might be different in the future.
+ */
+//#define CALGN(code...)        code
+#define CALGN(code...)
+
+/*
+ * Endian independent macros for shifting bytes within registers.
+ */
+#ifndef __ARMEB__
+#define PULL            lsr
+#define PUSH            lsl
+#else
+#define PULL            lsl
+#define PUSH            lsr
+#endif
+
+		.text
+		.syntax unified
+
+/* Prototype: void *memcpy(void *dest, const void *src, size_t n); */
+
+ENTRY(memcpy)
+
+		push	{r0, r4, lr}
+		cfi_adjust_cfa_offset (12)
+		cfi_rel_offset (r4, 4)
+		cfi_rel_offset (lr, 8)
+
+		cfi_remember_state
+
+		subs	r2, r2, #4
+		blt	8f
+		ands	ip, r0, #3
+	PLD(	sfi_pld	r1, #0			)
+		bne	9f
+		ands	ip, r1, #3
+		bne	10f
+
+1:		subs	r2, r2, #(28)
+		push	{r5 - r8}
+		cfi_adjust_cfa_offset (16)
+		cfi_rel_offset (r5, 0)
+		cfi_rel_offset (r6, 4)
+		cfi_rel_offset (r7, 8)
+		cfi_rel_offset (r8, 12)
+		blt	5f
+
+	CALGN(	ands	ip, r1, #31		)
+	CALGN(	rsb	r3, ip, #32		)
+	CALGN(	sbcsne	r4, r3, r2		)  @ C is always set here
+	CALGN(	bcs	2f			)
+	CALGN(	adr	r4, 6f			)
+	CALGN(	subs	r2, r2, r3		)  @ C gets set
+#ifndef ARM_ALWAYS_BX
+	CALGN(	add	pc, r4, ip, lsl	#(ARM_BX_ALIGN_LOG2 - 2))
+#else
+	CALGN(	add	r4, r4, ip, lsl	#(ARM_BX_ALIGN_LOG2 - 2))
+	CALGN(	bx	r4			)
+#endif
+
+	PLD(	sfi_pld	r1, #0			)
+2:	PLD(	subs	r2, r2, #96		)
+	PLD(	sfi_pld	r1, #28			)
+	PLD(	blt	4f			)
+	PLD(	sfi_pld	r1, #60			)
+	PLD(	sfi_pld	r1, #92			)
+
+3:	PLD(	sfi_pld	r1, #124		)
+4:		sfi_breg r1, \
+		ldmia	\B!, {r3, r4, r5, r6, r7, r8, ip, lr}
+		subs	r2, r2, #32
+		sfi_breg r0, \
+		stmia	\B!, {r3, r4, r5, r6, r7, r8, ip, lr}
+		bge	3b
+	PLD(	cmn	r2, #96			)
+	PLD(	bge	4b			)
+
+5:		ands	ip, r2, #28
+		rsb	ip, ip, #32
+#ifndef ARM_ALWAYS_BX
+		/* C is always clear here.  */
+		addne	pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
+		b	7f
+#else
+		beq	7f
+		push	{r10}
+		cfi_adjust_cfa_offset (4)
+		cfi_rel_offset (r10, 0)
+		add	r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
+		bx	r10
+#endif
+		.p2align ARM_BX_ALIGN_LOG2
+6:		nop
+		.p2align ARM_BX_ALIGN_LOG2
+		sfi_breg r1, \
+		ldr	r3, [\B], #4
+		.p2align ARM_BX_ALIGN_LOG2
+		sfi_breg r1, \
+		ldr	r4, [\B], #4
+		.p2align ARM_BX_ALIGN_LOG2
+		sfi_breg r1, \
+		ldr	r5, [\B], #4
+		.p2align ARM_BX_ALIGN_LOG2
+		sfi_breg r1, \
+		ldr	r6, [\B], #4
+		.p2align ARM_BX_ALIGN_LOG2
+		sfi_breg r1, \
+		ldr	r7, [\B], #4
+		.p2align ARM_BX_ALIGN_LOG2
+		sfi_breg r1, \
+		ldr	r8, [\B], #4
+		.p2align ARM_BX_ALIGN_LOG2
+		sfi_breg r1, \
+		ldr	lr, [\B], #4
+
+#ifndef ARM_ALWAYS_BX
+		add	pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
+		nop
+#else
+		add	r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
+		bx	r10
+#endif
+		.p2align ARM_BX_ALIGN_LOG2
+		nop
+		.p2align ARM_BX_ALIGN_LOG2
+		sfi_breg r0, \
+		str	r3, [\B], #4
+		.p2align ARM_BX_ALIGN_LOG2
+		sfi_breg r0, \
+		str	r4, [\B], #4
+		.p2align ARM_BX_ALIGN_LOG2
+		sfi_breg r0, \
+		str	r5, [\B], #4
+		.p2align ARM_BX_ALIGN_LOG2
+		sfi_breg r0, \
+		str	r6, [\B], #4
+		.p2align ARM_BX_ALIGN_LOG2
+		sfi_breg r0, \
+		str	r7, [\B], #4
+		.p2align ARM_BX_ALIGN_LOG2
+		sfi_breg r0, \
+		str	r8, [\B], #4
+		.p2align ARM_BX_ALIGN_LOG2
+		sfi_breg r0, \
+		str	lr, [\B], #4
+
+#ifdef ARM_ALWAYS_BX
+		pop	{r10}
+		cfi_adjust_cfa_offset (-4)
+		cfi_restore (r10)
+#endif
+
+	CALGN(	bcs	2b			)
+
+7:		pop	{r5 - r8}
+		cfi_adjust_cfa_offset (-16)
+		cfi_restore (r5)
+		cfi_restore (r6)
+		cfi_restore (r7)
+		cfi_restore (r8)
+
+8:		movs	r2, r2, lsl #31
+		sfi_breg r1, \
+		ldrbne	r3, [\B], #1
+		sfi_breg r1, \
+		ldrbcs	r4, [\B], #1
+		sfi_breg r1, \
+		ldrbcs	ip, [\B]
+		sfi_breg r0, \
+		strbne	r3, [\B], #1
+		sfi_breg r0, \
+		strbcs	r4, [\B], #1
+		sfi_breg r0, \
+		strbcs	ip, [\B]
+
+#if ((defined (__ARM_ARCH_4T__) && defined(__THUMB_INTERWORK__)) \
+     || defined (ARM_ALWAYS_BX))
+		pop	{r0, r4, lr}
+		cfi_adjust_cfa_offset (-12)
+		cfi_restore (r4)
+		cfi_restore (lr)
+		bx      lr
+#else
+		pop	{r0, r4, pc}
+#endif
+
+		cfi_restore_state
+
+9:		rsb	ip, ip, #4
+		cmp	ip, #2
+		sfi_breg r1, \
+		ldrbgt	r3, [\B], #1
+		sfi_breg r1, \
+		ldrbge	r4, [\B], #1
+		sfi_breg r1, \
+		ldrb	lr, [\B], #1
+		sfi_breg r0, \
+		strbgt	r3, [\B], #1
+		sfi_breg r0, \
+		strbge	r4, [\B], #1
+		subs	r2, r2, ip
+		sfi_breg r0, \
+		strb	lr, [\B], #1
+		blt	8b
+		ands	ip, r1, #3
+		beq	1b
+
+10:		bic	r1, r1, #3
+		cmp	ip, #2
+		sfi_breg r1, \
+		ldr	lr, [\B], #4
+		beq	17f
+		bgt	18f
+
+
+		.macro	forward_copy_shift pull push
+
+		subs	r2, r2, #28
+		blt	14f
+
+	CALGN(	ands	ip, r1, #31		)
+	CALGN(	rsb	ip, ip, #32		)
+	CALGN(	sbcsne	r4, ip, r2		)  @ C is always set here
+	CALGN(	subcc	r2, r2, ip		)
+	CALGN(	bcc	15f			)
+
+11:		push	{r5 - r8, r10}
+		cfi_adjust_cfa_offset (20)
+		cfi_rel_offset (r5, 0)
+		cfi_rel_offset (r6, 4)
+		cfi_rel_offset (r7, 8)
+		cfi_rel_offset (r8, 12)
+		cfi_rel_offset (r10, 16)
+
+	PLD(	sfi_pld	r1, #0			)
+	PLD(	subs	r2, r2, #96		)
+	PLD(	sfi_pld	r1, #28			)
+	PLD(	blt	13f			)
+	PLD(	sfi_pld	r1, #60			)
+	PLD(	sfi_pld	r1, #92			)
+
+12:	PLD(	sfi_pld	r1, #124		)
+13:		sfi_breg r1, \
+		ldmia	\B!, {r4, r5, r6, r7}
+		mov	r3, lr, PULL #\pull
+		subs	r2, r2, #32
+		sfi_breg r1, \
+		ldmia	\B!, {r8, r10, ip, lr}
+		orr	r3, r3, r4, PUSH #\push
+		mov	r4, r4, PULL #\pull
+		orr	r4, r4, r5, PUSH #\push
+		mov	r5, r5, PULL #\pull
+		orr	r5, r5, r6, PUSH #\push
+		mov	r6, r6, PULL #\pull
+		orr	r6, r6, r7, PUSH #\push
+		mov	r7, r7, PULL #\pull
+		orr	r7, r7, r8, PUSH #\push
+		mov	r8, r8, PULL #\pull
+		orr	r8, r8, r10, PUSH #\push
+		mov	r10, r10, PULL #\pull
+		orr	r10, r10, ip, PUSH #\push
+		mov	ip, ip, PULL #\pull
+		orr	ip, ip, lr, PUSH #\push
+		sfi_breg r0, \
+		stmia	\B!, {r3, r4, r5, r6, r7, r8, r10, ip}
+		bge	12b
+	PLD(	cmn	r2, #96			)
+	PLD(	bge	13b			)
+
+		pop	{r5 - r8, r10}
+		cfi_adjust_cfa_offset (-20)
+		cfi_restore (r5)
+		cfi_restore (r6)
+		cfi_restore (r7)
+		cfi_restore (r8)
+		cfi_restore (r10)
+
+14:		ands	ip, r2, #28
+		beq	16f
+
+15:		mov	r3, lr, PULL #\pull
+		sfi_breg r1, \
+		ldr	lr, [\B], #4
+		subs	ip, ip, #4
+		orr	r3, r3, lr, PUSH #\push
+		sfi_breg r0, \
+		str	r3, [\B], #4
+		bgt	15b
+	CALGN(	cmp	r2, #0			)
+	CALGN(	bge	11b			)
+
+16:		sub	r1, r1, #(\push / 8)
+		b	8b
+
+		.endm
+
+
+		forward_copy_shift	pull=8	push=24
+
+17:		forward_copy_shift	pull=16	push=16
+
+18:		forward_copy_shift	pull=24	push=8
+
+END(memcpy)
+libc_hidden_builtin_def (memcpy)