about summary refs log tree commit diff
path: root/sysdeps/arm/armv7
diff options
context:
space:
mode:
authorZack Weinberg <zackw@panix.com>2017-05-11 20:36:15 -0400
committerZack Weinberg <zackw@panix.com>2017-05-20 08:12:11 -0400
commit81cb7a0b2b6b905a504b8b56fe3c1634adf8fb71 (patch)
treeed0a4b6153ee727aca95aadfb8b9061cd873ca7e /sysdeps/arm/armv7
parent42a844c6a213f9219a4baa013c7305679d5dcaaa (diff)
downloadglibc-81cb7a0b2b6b905a504b8b56fe3c1634adf8fb71.tar.gz
glibc-81cb7a0b2b6b905a504b8b56fe3c1634adf8fb71.tar.xz
glibc-81cb7a0b2b6b905a504b8b56fe3c1634adf8fb71.zip
Remove sfi_* annotations from ARM assembly files.
This semi-mechanical patch removes all uses and definitions of the
sfi_breg, sfi_pld, and sfi_sp macros from various ARM-specific
assembly files.  These were only used by NaCl.

	* sysdeps/arm/sysdep.h
        (ARM_SFI_MACROS, sfi_breg, sfi_pld, sfi_sp): Delete definitions.

	* sysdeps/arm/__longjmp.S, sysdeps/arm/add_n.S
	* sysdeps/arm/addmul_1.S, sysdeps/arm/arm-mcount.S
	* sysdeps/arm/armv6/rawmemchr.S, sysdeps/arm/armv6/strchr.S
	* sysdeps/arm/armv6/strcpy.S, sysdeps/arm/armv6/strlen.S
	* sysdeps/arm/armv6/strrchr.S, sysdeps/arm/armv6t2/memchr.S
	* sysdeps/arm/armv6t2/strlen.S
	* sysdeps/arm/armv7/multiarch/memcpy_impl.S
	* sysdeps/arm/armv7/strcmp.S, sysdeps/arm/dl-tlsdesc.S
	* sysdeps/arm/memcpy.S, sysdeps/arm/memmove.S
	* sysdeps/arm/memset.S, sysdeps/arm/setjmp.S
	* sysdeps/arm/strlen.S, sysdeps/arm/submul_1.S:
        Remove all uses of sfi_breg, sfi_pld, and sfi_sp.
Diffstat (limited to 'sysdeps/arm/armv7')
-rw-r--r--sysdeps/arm/armv7/multiarch/memcpy_impl.S581
-rw-r--r--sysdeps/arm/armv7/strcmp.S93
2 files changed, 227 insertions, 447 deletions
diff --git a/sysdeps/arm/armv7/multiarch/memcpy_impl.S b/sysdeps/arm/armv7/multiarch/memcpy_impl.S
index 5d5a3cefaa..c1b9fb0ab5 100644
--- a/sysdeps/arm/armv7/multiarch/memcpy_impl.S
+++ b/sysdeps/arm/armv7/multiarch/memcpy_impl.S
@@ -226,71 +226,40 @@
 
 #ifdef USE_VFP
 	.macro	cpy_line_vfp vreg, base
-	sfi_breg dst, \
-	vstr	\vreg, [\B, #\base]
-	sfi_breg src, \
-	vldr	\vreg, [\B, #\base]
-	sfi_breg dst, \
-	vstr	d0, [\B, #\base + 8]
-	sfi_breg src, \
-	vldr	d0, [\B, #\base + 8]
-	sfi_breg dst, \
-	vstr	d1, [\B, #\base + 16]
-	sfi_breg src, \
-	vldr	d1, [\B, #\base + 16]
-	sfi_breg dst, \
-	vstr	d2, [\B, #\base + 24]
-	sfi_breg src, \
-	vldr	d2, [\B, #\base + 24]
-	sfi_breg dst, \
-	vstr	\vreg, [\B, #\base + 32]
-	sfi_breg src, \
-	vldr	\vreg, [\B, #\base + prefetch_lines * 64 - 32]
-	sfi_breg dst, \
-	vstr	d0, [\B, #\base + 40]
-	sfi_breg src, \
-	vldr	d0, [\B, #\base + 40]
-	sfi_breg dst, \
-	vstr	d1, [\B, #\base + 48]
-	sfi_breg src, \
-	vldr	d1, [\B, #\base + 48]
-	sfi_breg dst, \
-	vstr	d2, [\B, #\base + 56]
-	sfi_breg src, \
-	vldr	d2, [\B, #\base + 56]
+	vstr	\vreg, [dst, #\base]
+	vldr	\vreg, [src, #\base]
+	vstr	d0, [dst, #\base + 8]
+	vldr	d0, [src, #\base + 8]
+	vstr	d1, [dst, #\base + 16]
+	vldr	d1, [src, #\base + 16]
+	vstr	d2, [dst, #\base + 24]
+	vldr	d2, [src, #\base + 24]
+	vstr	\vreg, [dst, #\base + 32]
+	vldr	\vreg, [src, #\base + prefetch_lines * 64 - 32]
+	vstr	d0, [dst, #\base + 40]
+	vldr	d0, [src, #\base + 40]
+	vstr	d1, [dst, #\base + 48]
+	vldr	d1, [src, #\base + 48]
+	vstr	d2, [dst, #\base + 56]
+	vldr	d2, [src, #\base + 56]
 	.endm
 
 	.macro	cpy_tail_vfp vreg, base
-	sfi_breg dst, \
-	vstr	\vreg, [\B, #\base]
-	sfi_breg src, \
-	vldr	\vreg, [\B, #\base]
-	sfi_breg dst, \
-	vstr	d0, [\B, #\base + 8]
-	sfi_breg src, \
-	vldr	d0, [\B, #\base + 8]
-	sfi_breg dst, \
-	vstr	d1, [\B, #\base + 16]
-	sfi_breg src, \
-	vldr	d1, [\B, #\base + 16]
-	sfi_breg dst, \
-	vstr	d2, [\B, #\base + 24]
-	sfi_breg src, \
-	vldr	d2, [\B, #\base + 24]
-	sfi_breg dst, \
-	vstr	\vreg, [\B, #\base + 32]
-	sfi_breg dst, \
-	vstr	d0, [\B, #\base + 40]
-	sfi_breg src, \
-	vldr	d0, [\B, #\base + 40]
-	sfi_breg dst, \
-	vstr	d1, [\B, #\base + 48]
-	sfi_breg src, \
-	vldr	d1, [\B, #\base + 48]
-	sfi_breg dst, \
-	vstr	d2, [\B, #\base + 56]
-	sfi_breg src, \
-	vldr	d2, [\B, #\base + 56]
+	vstr	\vreg, [dst, #\base]
+	vldr	\vreg, [src, #\base]
+	vstr	d0, [dst, #\base + 8]
+	vldr	d0, [src, #\base + 8]
+	vstr	d1, [dst, #\base + 16]
+	vldr	d1, [src, #\base + 16]
+	vstr	d2, [dst, #\base + 24]
+	vldr	d2, [src, #\base + 24]
+	vstr	\vreg, [dst, #\base + 32]
+	vstr	d0, [dst, #\base + 40]
+	vldr	d0, [src, #\base + 40]
+	vstr	d1, [dst, #\base + 48]
+	vldr	d1, [src, #\base + 48]
+	vstr	d2, [dst, #\base + 56]
+	vldr	d2, [src, #\base + 56]
 	.endm
 #endif
 
@@ -316,26 +285,16 @@ ENTRY(memcpy)
 	vst1.8	{d0}, [\reg]!
 	.endm
 
-	/* These are used by the NaCl sfi_breg macro.  */
-	.macro _sfi_breg_dmask_neon_load_d0 reg
-	_sfi_dmask \reg
-	.endm
-	.macro _sfi_breg_dmask_neon_store_d0 reg
-	_sfi_dmask \reg
-	.endm
-
 	and	tmp1, count, #0x38
 	.macro dispatch_step i
-	sfi_breg src, neon_load_d0 \B
-	sfi_breg dst, neon_store_d0 \B
+	neon_load_d0 src
+	neon_store_d0 dst
 	.endm
 	dispatch_7_dword
 
 	tst	count, #4
-	sfi_breg src, \
-	ldrne	tmp1, [\B], #4
-	sfi_breg dst, \
-	strne	tmp1, [\B], #4
+	ldrne	tmp1, [src], #4
+	strne	tmp1, [dst], #4
 #else
 	/* Copy up to 15 full words of data.  May not be aligned.  */
 	/* Cannot use VFP for unaligned data.  */
@@ -344,23 +303,17 @@ ENTRY(memcpy)
 	add	src, src, tmp1
 	/* Jump directly into the sequence below at the correct offset.  */
 	.macro dispatch_step i
-	sfi_breg src, \
-	ldr	tmp1, [\B, #-(\i * 4)]
-	sfi_breg dst, \
-	str	tmp1, [\B, #-(\i * 4)]
+	ldr	tmp1, [src, #-(\i * 4)]
+	str	tmp1, [dst, #-(\i * 4)]
 	.endm
 	dispatch_15_word
 #endif
 
 	lsls	count, count, #31
-	sfi_breg src, \
-	ldrhcs	tmp1, [\B], #2
-	sfi_breg src, \
-	ldrbne	src, [\B]		/* Src is dead, use as a scratch.  */
-	sfi_breg dst, \
-	strhcs	tmp1, [\B], #2
-	sfi_breg dst, \
-	strbne	src, [\B]
+	ldrhcs	tmp1, [src], #2
+	ldrbne	src, [src]		/* Src is dead, use as a scratch.  */
+	strhcs	tmp1, [dst], #2
+	strbne	src, [dst]
 	bx	lr
 
 .Lcpy_not_short:
@@ -388,19 +341,13 @@ ENTRY(memcpy)
 	beq	1f
 	rsbs	tmp2, tmp2, #0
 	sub	count, count, tmp2, lsr #29
-	sfi_breg src, \
-	ldrmi	tmp1, [\B], #4
-	sfi_breg dst, \
-	strmi	tmp1, [\B], #4
+	ldrmi	tmp1, [src], #4
+	strmi	tmp1, [dst], #4
 	lsls	tmp2, tmp2, #2
-	sfi_breg src, \
-	ldrhcs	tmp1, [\B], #2
-	sfi_breg src, \
-	ldrbne	tmp2, [\B], #1
-	sfi_breg dst, \
-	strhcs	tmp1, [\B], #2
-	sfi_breg dst, \
-	strbne	tmp2, [\B], #1
+	ldrhcs	tmp1, [src], #2
+	ldrbne	tmp2, [src], #1
+	strhcs	tmp1, [dst], #2
+	strbne	tmp2, [dst], #1
 
 1:
 	subs	tmp2, count, #64	/* Use tmp2 for count.  */
@@ -412,40 +359,24 @@ ENTRY(memcpy)
 .Lcpy_body_medium:			/* Count in tmp2.  */
 #ifdef USE_VFP
 1:
-	sfi_breg src, \
-	vldr	d0, [\B, #0]
+	vldr	d0, [src, #0]
 	subs	tmp2, tmp2, #64
-	sfi_breg src, \
-	vldr	d1, [\B, #8]
-	sfi_breg dst, \
-	vstr	d0, [\B, #0]
-	sfi_breg src, \
-	vldr	d0, [\B, #16]
-	sfi_breg dst, \
-	vstr	d1, [\B, #8]
-	sfi_breg src, \
-	vldr	d1, [\B, #24]
-	sfi_breg dst, \
-	vstr	d0, [\B, #16]
-	sfi_breg src, \
-	vldr	d0, [\B, #32]
-	sfi_breg dst, \
-	vstr	d1, [\B, #24]
-	sfi_breg src, \
-	vldr	d1, [\B, #40]
-	sfi_breg dst, \
-	vstr	d0, [\B, #32]
-	sfi_breg src, \
-	vldr	d0, [\B, #48]
-	sfi_breg dst, \
-	vstr	d1, [\B, #40]
-	sfi_breg src, \
-	vldr	d1, [\B, #56]
-	sfi_breg dst, \
-	vstr	d0, [\B, #48]
+	vldr	d1, [src, #8]
+	vstr	d0, [dst, #0]
+	vldr	d0, [src, #16]
+	vstr	d1, [dst, #8]
+	vldr	d1, [src, #24]
+	vstr	d0, [dst, #16]
+	vldr	d0, [src, #32]
+	vstr	d1, [dst, #24]
+	vldr	d1, [src, #40]
+	vstr	d0, [dst, #32]
+	vldr	d0, [src, #48]
+	vstr	d1, [dst, #40]
+	vldr	d1, [src, #56]
+	vstr	d0, [dst, #48]
 	add	src, src, #64
-	sfi_breg dst, \
-	vstr	d1, [\B, #56]
+	vstr	d1, [dst, #56]
 	add	dst, dst, #64
 	bge	1b
 	tst	tmp2, #0x3f
@@ -456,48 +387,30 @@ ENTRY(memcpy)
 	add	dst, dst, tmp1
 	add	src, src, tmp1
 	.macro dispatch_step i
-	sfi_breg src, \
-	vldr	d0, [\B, #-(\i * 8)]
-	sfi_breg dst, \
-	vstr	d0, [\B, #-(\i * 8)]
+	vldr	d0, [src, #-(\i * 8)]
+	vstr	d0, [dst, #-(\i * 8)]
 	.endm
 	dispatch_7_dword
 #else
 	sub	src, src, #8
 	sub	dst, dst, #8
 1:
-	sfi_breg src, \
-	ldrd	A_l, A_h, [\B, #8]
-	sfi_breg dst, \
-	strd	A_l, A_h, [\B, #8]
-	sfi_breg src, \
-	ldrd	A_l, A_h, [\B, #16]
-	sfi_breg dst, \
-	strd	A_l, A_h, [\B, #16]
-	sfi_breg src, \
-	ldrd	A_l, A_h, [\B, #24]
-	sfi_breg dst, \
-	strd	A_l, A_h, [\B, #24]
-	sfi_breg src, \
-	ldrd	A_l, A_h, [\B, #32]
-	sfi_breg dst, \
-	strd	A_l, A_h, [\B, #32]
-	sfi_breg src, \
-	ldrd	A_l, A_h, [\B, #40]
-	sfi_breg dst, \
-	strd	A_l, A_h, [\B, #40]
-	sfi_breg src, \
-	ldrd	A_l, A_h, [\B, #48]
-	sfi_breg dst, \
-	strd	A_l, A_h, [\B, #48]
-	sfi_breg src, \
-	ldrd	A_l, A_h, [\B, #56]
-	sfi_breg dst, \
-	strd	A_l, A_h, [\B, #56]
-	sfi_breg src, \
-	ldrd	A_l, A_h, [\B, #64]!
-	sfi_breg dst, \
-	strd	A_l, A_h, [\B, #64]!
+	ldrd	A_l, A_h, [src, #8]
+	strd	A_l, A_h, [dst, #8]
+	ldrd	A_l, A_h, [src, #16]
+	strd	A_l, A_h, [dst, #16]
+	ldrd	A_l, A_h, [src, #24]
+	strd	A_l, A_h, [dst, #24]
+	ldrd	A_l, A_h, [src, #32]
+	strd	A_l, A_h, [dst, #32]
+	ldrd	A_l, A_h, [src, #40]
+	strd	A_l, A_h, [dst, #40]
+	ldrd	A_l, A_h, [src, #48]
+	strd	A_l, A_h, [dst, #48]
+	ldrd	A_l, A_h, [src, #56]
+	strd	A_l, A_h, [dst, #56]
+	ldrd	A_l, A_h, [src, #64]!
+	strd	A_l, A_h, [dst, #64]!
 	subs	tmp2, tmp2, #64
 	bge	1b
 	tst	tmp2, #0x3f
@@ -524,28 +437,20 @@ ENTRY(memcpy)
 	add	dst, dst, tmp1
 	add	src, src, tmp1
 	.macro dispatch_step i
-	sfi_breg src, \
-	ldrd	A_l, A_h, [\B, #-(\i * 8)]
-	sfi_breg dst, \
-	strd	A_l, A_h, [\B, #-(\i * 8)]
+	ldrd	A_l, A_h, [src, #-(\i * 8)]
+	strd	A_l, A_h, [dst, #-(\i * 8)]
 	.endm
 	dispatch_7_dword
 #endif
 
 	tst	tmp2, #4
-	sfi_breg src, \
-	ldrne	tmp1, [\B], #4
-	sfi_breg dst, \
-	strne	tmp1, [\B], #4
+	ldrne	tmp1, [src], #4
+	strne	tmp1, [dst], #4
 	lsls	tmp2, tmp2, #31		/* Count (tmp2) now dead. */
-	sfi_breg src, \
-	ldrhcs	tmp1, [\B], #2
-	sfi_breg src, \
-	ldrbne	tmp2, [\B]
-	sfi_breg dst, \
-	strhcs	tmp1, [\B], #2
-	sfi_breg dst, \
-	strbne	tmp2, [\B]
+	ldrhcs	tmp1, [src], #2
+	ldrbne	tmp2, [src]
+	strhcs	tmp1, [dst], #2
+	strbne	tmp2, [dst]
 
 .Ldone:
 	ldr	tmp2, [sp], #FRAME_SIZE
@@ -565,23 +470,15 @@ ENTRY(memcpy)
 	   copy position into a register.  This should act like a PLD
 	   operation but we won't have to repeat the transfer.  */
 
-	sfi_breg src, \
-	vldr	d3, [\B, #0]
-	sfi_breg src, \
-	vldr	d4, [\B, #64]
-	sfi_breg src, \
-	vldr	d5, [\B, #128]
-	sfi_breg src, \
-	vldr	d6, [\B, #192]
-	sfi_breg src, \
-	vldr	d7, [\B, #256]
-
-	sfi_breg src, \
-	vldr	d0, [\B, #8]
-	sfi_breg src, \
-	vldr	d1, [\B, #16]
-	sfi_breg src, \
-	vldr	d2, [\B, #24]
+	vldr	d3, [src, #0]
+	vldr	d4, [src, #64]
+	vldr	d5, [src, #128]
+	vldr	d6, [src, #192]
+	vldr	d7, [src, #256]
+
+	vldr	d0, [src, #8]
+	vldr	d1, [src, #16]
+	vldr	d2, [src, #24]
 	add	src, src, #32
 
 	subs	tmp2, tmp2, #prefetch_lines * 64 * 2
@@ -606,31 +503,19 @@ ENTRY(memcpy)
 	add	src, src, #3 * 64
 	add	dst, dst, #3 * 64
 	cpy_tail_vfp	d6, 0
-	sfi_breg dst, \
-	vstr	d7, [\B, #64]
-	sfi_breg src, \
-	vldr	d7, [\B, #64]
-	sfi_breg dst, \
-	vstr	d0, [\B, #64 + 8]
-	sfi_breg src, \
-	vldr	d0, [\B, #64 + 8]
-	sfi_breg dst, \
-	vstr	d1, [\B, #64 + 16]
-	sfi_breg src, \
-	vldr	d1, [\B, #64 + 16]
-	sfi_breg dst, \
-	vstr	d2, [\B, #64 + 24]
-	sfi_breg src, \
-	vldr	d2, [\B, #64 + 24]
-	sfi_breg dst, \
-	vstr	d7, [\B, #64 + 32]
+	vstr	d7, [dst, #64]
+	vldr	d7, [src, #64]
+	vstr	d0, [dst, #64 + 8]
+	vldr	d0, [src, #64 + 8]
+	vstr	d1, [dst, #64 + 16]
+	vldr	d1, [src, #64 + 16]
+	vstr	d2, [dst, #64 + 24]
+	vldr	d2, [src, #64 + 24]
+	vstr	d7, [dst, #64 + 32]
 	add	src, src, #96
-	sfi_breg dst, \
-	vstr	d0, [\B, #64 + 40]
-	sfi_breg dst, \
-	vstr	d1, [\B, #64 + 48]
-	sfi_breg dst, \
-	vstr	d2, [\B, #64 + 56]
+	vstr	d0, [dst, #64 + 40]
+	vstr	d1, [dst, #64 + 48]
+	vstr	d2, [dst, #64 + 56]
 	add	dst, dst, #128
 	add	tmp2, tmp2, #prefetch_lines * 64
 	b	.Lcpy_body_medium
@@ -641,83 +526,59 @@ ENTRY(memcpy)
 	/* Pre-bias src and dst.  */
 	sub	src, src, #8
 	sub	dst, dst, #8
-	sfi_pld	src, #8
-	sfi_pld	src, #72
+	pld	[src, #8]
+	pld	[src, #72]
 	subs	tmp2, tmp2, #64
-	sfi_pld	src, #136
-	sfi_breg src, \
-	ldrd	A_l, A_h, [\B, #8]
+	pld	[src, #136]
+	ldrd	A_l, A_h, [src, #8]
 	strd	B_l, B_h, [sp, #8]
 	cfi_rel_offset (B_l, 8)
 	cfi_rel_offset (B_h, 12)
-	sfi_breg src, \
-	ldrd	B_l, B_h, [\B, #16]
+	ldrd	B_l, B_h, [src, #16]
 	strd	C_l, C_h, [sp, #16]
 	cfi_rel_offset (C_l, 16)
 	cfi_rel_offset (C_h, 20)
-	sfi_breg src, \
-	ldrd	C_l, C_h, [\B, #24]
+	ldrd	C_l, C_h, [src, #24]
 	strd	D_l, D_h, [sp, #24]
 	cfi_rel_offset (D_l, 24)
 	cfi_rel_offset (D_h, 28)
-	sfi_pld	src, #200
-	sfi_breg src, \
-	ldrd	D_l, D_h, [\B, #32]!
+	pld	[src, #200]
+	ldrd	D_l, D_h, [src, #32]!
 	b	1f
 	.p2align	6
 2:
-	sfi_pld	src, #232
-	sfi_breg dst, \
-	strd	A_l, A_h, [\B, #40]
-	sfi_breg src, \
-	ldrd	A_l, A_h, [\B, #40]
-	sfi_breg dst, \
-	strd	B_l, B_h, [\B, #48]
-	sfi_breg src, \
-	ldrd	B_l, B_h, [\B, #48]
-	sfi_breg dst, \
-	strd	C_l, C_h, [\B, #56]
-	sfi_breg src, \
-	ldrd	C_l, C_h, [\B, #56]
-	sfi_breg dst, \
-	strd	D_l, D_h, [\B, #64]!
-	sfi_breg src, \
-	ldrd	D_l, D_h, [\B, #64]!
+	pld	[src, #232]
+	strd	A_l, A_h, [dst, #40]
+	ldrd	A_l, A_h, [src, #40]
+	strd	B_l, B_h, [dst, #48]
+	ldrd	B_l, B_h, [src, #48]
+	strd	C_l, C_h, [dst, #56]
+	ldrd	C_l, C_h, [src, #56]
+	strd	D_l, D_h, [dst, #64]!
+	ldrd	D_l, D_h, [src, #64]!
 	subs	tmp2, tmp2, #64
 1:
-	sfi_breg dst, \
-	strd	A_l, A_h, [\B, #8]
-	sfi_breg src, \
-	ldrd	A_l, A_h, [\B, #8]
-	sfi_breg dst, \
-	strd	B_l, B_h, [\B, #16]
-	sfi_breg src, \
-	ldrd	B_l, B_h, [\B, #16]
-	sfi_breg dst, \
-	strd	C_l, C_h, [\B, #24]
-	sfi_breg src, \
-	ldrd	C_l, C_h, [\B, #24]
-	sfi_breg dst, \
-	strd	D_l, D_h, [\B, #32]
-	sfi_breg src, \
-	ldrd	D_l, D_h, [\B, #32]
+	strd	A_l, A_h, [dst, #8]
+	ldrd	A_l, A_h, [src, #8]
+	strd	B_l, B_h, [dst, #16]
+	ldrd	B_l, B_h, [src, #16]
+	strd	C_l, C_h, [dst, #24]
+	ldrd	C_l, C_h, [src, #24]
+	strd	D_l, D_h, [dst, #32]
+	ldrd	D_l, D_h, [src, #32]
 	bcs	2b
 	/* Save the remaining bytes and restore the callee-saved regs.  */
-	sfi_breg dst, \
-	strd	A_l, A_h, [\B, #40]
+	strd	A_l, A_h, [dst, #40]
 	add	src, src, #40
-	sfi_breg dst, \
-	strd	B_l, B_h, [\B, #48]
+	strd	B_l, B_h, [dst, #48]
 	ldrd	B_l, B_h, [sp, #8]
 	cfi_restore (B_l)
 	cfi_restore (B_h)
-	sfi_breg dst, \
-	strd	C_l, C_h, [\B, #56]
+	strd	C_l, C_h, [dst, #56]
 	ldrd	C_l, C_h, [sp, #16]
 	cfi_restore (C_l)
 	cfi_restore (C_h)
-	sfi_breg dst, \
-	strd	D_l, D_h, [\B, #64]
+	strd	D_l, D_h, [dst, #64]
 	ldrd	D_l, D_h, [sp, #24]
 	cfi_restore (D_l)
 	cfi_restore (D_h)
@@ -734,35 +595,29 @@ ENTRY(memcpy)
 	cfi_remember_state
 
 .Lcpy_notaligned:
-	sfi_pld	src
-	sfi_pld	src, #64
+	pld	[src, #0]
+	pld	[src, #64]
 	/* There's at least 64 bytes to copy, but there is no mutual
 	   alignment.  */
 	/* Bring DST to 64-bit alignment.  */
 	lsls	tmp2, dst, #29
-	sfi_pld	src, #(2 * 64)
+	pld	[src, #(2 * 64)]
 	beq	1f
 	rsbs	tmp2, tmp2, #0
 	sub	count, count, tmp2, lsr #29
-	sfi_breg src, \
-	ldrmi	tmp1, [\B], #4
-	sfi_breg dst, \
-	strmi	tmp1, [\B], #4
+	ldrmi	tmp1, [src], #4
+	strmi	tmp1, [dst], #4
 	lsls	tmp2, tmp2, #2
-	sfi_breg src, \
-	ldrbne	tmp1, [\B], #1
-	sfi_breg src, \
-	ldrhcs	tmp2, [\B], #2
-	sfi_breg dst, \
-	strbne	tmp1, [\B], #1
-	sfi_breg dst, \
-	strhcs	tmp2, [\B], #2
+	ldrbne	tmp1, [src], #1
+	ldrhcs	tmp2, [src], #2
+	strbne	tmp1, [dst], #1
+	strhcs	tmp2, [dst], #2
 1:
-	sfi_pld	src, #(3 * 64)
+	pld	[src, #(3 * 64)]
 	subs	count, count, #64
 	ldrmi	tmp2, [sp], #FRAME_SIZE
 	bmi	.Ltail63unaligned
-	sfi_pld	src, #(4 * 64)
+	pld	[src, #(4 * 64)]
 
 #ifdef USE_NEON
 	/* These need an extra layer of macro just to work around a
@@ -775,132 +630,88 @@ ENTRY(memcpy)
 	vst1.8	{\reglist}, [ALIGN (\basereg, 64)]!
 	.endm
 
-	/* These are used by the NaCl sfi_breg macro.  */
-	.macro _sfi_breg_dmask_neon_load_multi reg
-	_sfi_dmask \reg
-	.endm
-	.macro _sfi_breg_dmask_neon_store_multi reg
-	_sfi_dmask \reg
-	.endm
-
-	sfi_breg src, neon_load_multi d0-d3, \B
-	sfi_breg src, neon_load_multi d4-d7, \B
+	neon_load_multi d0-d3, src
+	neon_load_multi d4-d7, src
 	subs	count, count, #64
 	bmi	2f
 1:
-	sfi_pld	src, #(4 * 64)
-	sfi_breg dst, neon_store_multi d0-d3, \B
-	sfi_breg src, neon_load_multi d0-d3, \B
-	sfi_breg dst, neon_store_multi d4-d7, \B
-	sfi_breg src, neon_load_multi d4-d7, \B
+	pld	[src, #(4 * 64)]
+	neon_store_multi d0-d3, dst
+	neon_load_multi d0-d3, src
+	neon_store_multi d4-d7, dst
+	neon_load_multi d4-d7, src
 	subs	count, count, #64
 	bpl	1b
 2:
-	sfi_breg dst, neon_store_multi d0-d3, \B
-	sfi_breg dst, neon_store_multi d4-d7, \B
+	neon_store_multi d0-d3, dst
+	neon_store_multi d4-d7, dst
 	ands	count, count, #0x3f
 #else
 	/* Use an SMS style loop to maximize the I/O bandwidth.  */
 	sub	src, src, #4
 	sub	dst, dst, #8
 	subs	tmp2, count, #64	/* Use tmp2 for count.  */
-	sfi_breg src, \
-	ldr	A_l, [\B, #4]
-	sfi_breg src, \
-	ldr	A_h, [\B, #8]
+	ldr	A_l, [src, #4]
+	ldr	A_h, [src, #8]
 	strd	B_l, B_h, [sp, #8]
 	cfi_rel_offset (B_l, 8)
 	cfi_rel_offset (B_h, 12)
-	sfi_breg src, \
-	ldr	B_l, [\B, #12]
-	sfi_breg src, \
-	ldr	B_h, [\B, #16]
+	ldr	B_l, [src, #12]
+	ldr	B_h, [src, #16]
 	strd	C_l, C_h, [sp, #16]
 	cfi_rel_offset (C_l, 16)
 	cfi_rel_offset (C_h, 20)
-	sfi_breg src, \
-	ldr	C_l, [\B, #20]
-	sfi_breg src, \
-	ldr	C_h, [\B, #24]
+	ldr	C_l, [src, #20]
+	ldr	C_h, [src, #24]
 	strd	D_l, D_h, [sp, #24]
 	cfi_rel_offset (D_l, 24)
 	cfi_rel_offset (D_h, 28)
-	sfi_breg src, \
-	ldr	D_l, [\B, #28]
-	sfi_breg src, \
-	ldr	D_h, [\B, #32]!
+	ldr	D_l, [src, #28]
+	ldr	D_h, [src, #32]!
 	b	1f
 	.p2align	6
 2:
-	sfi_pld	src, #(5 * 64) - (32 - 4)
-	sfi_breg dst, \
-	strd	A_l, A_h, [\B, #40]
-	sfi_breg src, \
-	ldr	A_l, [\B, #36]
-	sfi_breg src, \
-	ldr	A_h, [\B, #40]
-	sfi_breg dst, \
-	strd	B_l, B_h, [\B, #48]
-	sfi_breg src, \
-	ldr	B_l, [\B, #44]
-	sfi_breg src, \
-	ldr	B_h, [\B, #48]
-	sfi_breg dst, \
-	strd	C_l, C_h, [\B, #56]
-	sfi_breg src, \
-	ldr	C_l, [\B, #52]
-	sfi_breg src, \
-	ldr	C_h, [\B, #56]
-	sfi_breg dst, \
-	strd	D_l, D_h, [\B, #64]!
-	sfi_breg src, \
-	ldr	D_l, [\B, #60]
-	sfi_breg src, \
-	ldr	D_h, [\B, #64]!
+	pld	[src, #(5 * 64) - (32 - 4)]
+	strd	A_l, A_h, [dst, #40]
+	ldr	A_l, [src, #36]
+	ldr	A_h, [src, #40]
+	strd	B_l, B_h, [dst, #48]
+	ldr	B_l, [src, #44]
+	ldr	B_h, [src, #48]
+	strd	C_l, C_h, [dst, #56]
+	ldr	C_l, [src, #52]
+	ldr	C_h, [src, #56]
+	strd	D_l, D_h, [dst, #64]!
+	ldr	D_l, [src, #60]
+	ldr	D_h, [src, #64]!
 	subs	tmp2, tmp2, #64
 1:
-	sfi_breg dst, \
-	strd	A_l, A_h, [\B, #8]
-	sfi_breg src, \
-	ldr	A_l, [\B, #4]
-	sfi_breg src, \
-	ldr	A_h, [\B, #8]
-	sfi_breg dst, \
-	strd	B_l, B_h, [\B, #16]
-	sfi_breg src, \
-	ldr	B_l, [\B, #12]
-	sfi_breg src, \
-	ldr	B_h, [\B, #16]
-	sfi_breg dst, \
-	strd	C_l, C_h, [\B, #24]
-	sfi_breg src, \
-	ldr	C_l, [\B, #20]
-	sfi_breg src, \
-	ldr	C_h, [\B, #24]
-	sfi_breg dst, \
-	strd	D_l, D_h, [\B, #32]
-	sfi_breg src, \
-	ldr	D_l, [\B, #28]
-	sfi_breg src, \
-	ldr	D_h, [\B, #32]
+	strd	A_l, A_h, [dst, #8]
+	ldr	A_l, [src, #4]
+	ldr	A_h, [src, #8]
+	strd	B_l, B_h, [dst, #16]
+	ldr	B_l, [src, #12]
+	ldr	B_h, [src, #16]
+	strd	C_l, C_h, [dst, #24]
+	ldr	C_l, [src, #20]
+	ldr	C_h, [src, #24]
+	strd	D_l, D_h, [dst, #32]
+	ldr	D_l, [src, #28]
+	ldr	D_h, [src, #32]
 	bcs	2b
 
 	/* Save the remaining bytes and restore the callee-saved regs.  */
-	sfi_breg dst, \
-	strd	A_l, A_h, [\B, #40]
+	strd	A_l, A_h, [dst, #40]
 	add	src, src, #36
-	sfi_breg dst, \
-	strd	B_l, B_h, [\B, #48]
+	strd	B_l, B_h, [dst, #48]
 	ldrd	B_l, B_h, [sp, #8]
 	cfi_restore (B_l)
 	cfi_restore (B_h)
-	sfi_breg dst, \
-	strd	C_l, C_h, [\B, #56]
+	strd	C_l, C_h, [dst, #56]
 	ldrd	C_l, C_h, [sp, #16]
 	cfi_restore (C_l)
 	cfi_restore (C_h)
-	sfi_breg dst, \
-	strd	D_l, D_h, [\B, #64]
+	strd	D_l, D_h, [dst, #64]
 	ldrd	D_l, D_h, [sp, #24]
 	cfi_restore (D_l)
 	cfi_restore (D_h)
diff --git a/sysdeps/arm/armv7/strcmp.S b/sysdeps/arm/armv7/strcmp.S
index c8fab4ba0f..25d055754e 100644
--- a/sysdeps/arm/armv7/strcmp.S
+++ b/sysdeps/arm/armv7/strcmp.S
@@ -178,10 +178,8 @@
 #endif
 ENTRY (strcmp)
 #if STRCMP_PRECHECK == 1
-	sfi_breg src1, \
-	ldrb	r2, [\B]
-	sfi_breg src2, \
-	ldrb	r3, [\B]
+	ldrb	r2, [src1]
+	ldrb	r3, [src2]
 	cmp	r2, #1
 	it	cs
 	cmpcs	r2, r3
@@ -211,11 +209,9 @@ ENTRY (strcmp)
 	and	tmp2, tmp1, #3
 	bic	src2, src2, #7
 	lsl	tmp2, tmp2, #3	/* Bytes -> bits.  */
-	sfi_breg src1, \
-	ldrd	data1a, data1b, [\B], #16
+	ldrd	data1a, data1b, [src1], #16
 	tst	tmp1, #4
-	sfi_breg src2, \
-	ldrd	data2a, data2b, [\B], #16
+	ldrd	data2a, data2b, [src2], #16
 	prepare_mask tmp1, tmp2
 	apply_mask data1a, tmp1
 	apply_mask data2a, tmp1
@@ -231,10 +227,8 @@ ENTRY (strcmp)
 	.p2align 5,,12  /* Don't start in the tail bytes of a cache line.  */
 	.p2align 2	/* Always word aligned.  */
 .Lloop_aligned8:
-	sfi_breg src1, \
-	ldrd	data1a, data1b, [\B], #16
-	sfi_breg src2, \
-	ldrd	data2a, data2b, [\B], #16
+	ldrd	data1a, data1b, [src1], #16
+	ldrd	data2a, data2b, [src2], #16
 .Lstart_realigned8:
 	uadd8	syndrome_b, data1a, const_m1	/* Only want GE bits,  */
 	eor	syndrome_a, data1a, data2a
@@ -245,10 +239,8 @@ ENTRY (strcmp)
 	sel	syndrome_b, syndrome_b, const_m1
 	cbnz	syndrome_b, .Ldiff_in_b
 
-	sfi_breg src1, \
-	ldrd	data1a, data1b, [\B, #-8]
-	sfi_breg src2, \
-	ldrd	data2a, data2b, [\B, #-8]
+	ldrd	data1a, data1b, [src1, #-8]
+	ldrd	data2a, data2b, [src2, #-8]
 	uadd8	syndrome_b, data1a, const_m1	/* Only want GE bits,  */
 	eor	syndrome_a, data1a, data2a
 	sel	syndrome_a, syndrome_a, const_m1
@@ -279,19 +271,15 @@ ENTRY (strcmp)
 	/* Unrolled by a factor of 2, to reduce the number of post-increment
 	   operations.  */
 .Lloop_aligned4:
-	sfi_breg src1, \
-	ldr	data1, [\B], #8
-	sfi_breg src2, \
-	ldr	data2, [\B], #8
+	ldr	data1, [src1], #8
+	ldr	data2, [src2], #8
 .Lstart_realigned4:
 	uadd8	syndrome, data1, const_m1	/* Only need GE bits.  */
 	eor	syndrome, data1, data2
 	sel	syndrome, syndrome, const_m1
 	cbnz	syndrome, .Laligned4_done
-	sfi_breg src1, \
-	ldr	data1, [\B, #-4]
-	sfi_breg src2, \
-	ldr	data2, [\B, #-4]
+	ldr	data1, [src1, #-4]
+	ldr	data2, [src2, #-4]
 	uadd8	syndrome, data1, const_m1
 	eor	syndrome, data1, data2
 	sel	syndrome, syndrome, const_m1
@@ -307,11 +295,9 @@ ENTRY (strcmp)
 	   masking off the unwanted loaded data to prevent a difference.  */
 	lsl	tmp1, tmp1, #3	/* Bytes -> bits.  */
 	bic	src1, src1, #3
-	sfi_breg src1, \
-	ldr	data1, [\B], #8
+	ldr	data1, [src1], #8
 	bic	src2, src2, #3
-	sfi_breg src2, \
-	ldr	data2, [\B], #8
+	ldr	data2, [src2], #8
 
 	prepare_mask tmp1, tmp1
 	apply_mask data1, tmp1
@@ -324,30 +310,26 @@ ENTRY (strcmp)
 	sub	src2, src2, tmp1
 	bic	src1, src1, #3
 	lsls	tmp1, tmp1, #31
-	sfi_breg src1, \
-	ldr	data1, [\B], #4
+	ldr	data1, [src1], #4
 	beq	.Laligned_m2
 	bcs	.Laligned_m1
 
 #if STRCMP_PRECHECK == 0
-	sfi_breg src2, \
-	ldrb	data2, [\B, #1]
+	ldrb	data2, [src2, #1]
 	uxtb	tmp1, data1, ror #BYTE1_OFFSET
 	subs	tmp1, tmp1, data2
 	bne	.Lmisaligned_exit
 	cbz	data2, .Lmisaligned_exit
 
 .Laligned_m2:
-	sfi_breg src2, \
-	ldrb	data2, [\B, #2]
+	ldrb	data2, [src2, #2]
 	uxtb	tmp1, data1, ror #BYTE2_OFFSET
 	subs	tmp1, tmp1, data2
 	bne	.Lmisaligned_exit
 	cbz	data2, .Lmisaligned_exit
 
 .Laligned_m1:
-	sfi_breg src2, \
-	ldrb	data2, [\B, #3]
+	ldrb	data2, [src2, #3]
 	uxtb	tmp1, data1, ror #BYTE3_OFFSET
 	subs	tmp1, tmp1, data2
 	bne	.Lmisaligned_exit
@@ -356,16 +338,14 @@ ENTRY (strcmp)
 #else  /* STRCMP_PRECHECK */
 	/* If we've done the pre-check, then we don't need to check the
 	   first byte again here.  */
-	sfi_breg src2, \
-	ldrb	data2, [\B, #2]
+	ldrb	data2, [src2, #2]
 	uxtb	tmp1, data1, ror #BYTE2_OFFSET
 	subs	tmp1, tmp1, data2
 	bne	.Lmisaligned_exit
 	cbz	data2, .Lmisaligned_exit
 
 .Laligned_m2:
-	sfi_breg src2, \
-	ldrb	data2, [\B, #3]
+	ldrb	data2, [src2, #3]
 	uxtb	tmp1, data1, ror #BYTE3_OFFSET
 	subs	tmp1, tmp1, data2
 	bne	.Lmisaligned_exit
@@ -391,13 +371,11 @@ ENTRY (strcmp)
 	cfi_restore_state
 	/* src1 is word aligned, but src2 has no common alignment
 	   with it.  */
-	sfi_breg src1, \
-	ldr	data1, [\B], #4
+	ldr	data1, [src1], #4
 	lsls	tmp1, src2, #31		/* C=src2[1], Z=src2[0].  */
 
 	bic	src2, src2, #3
-	sfi_breg src2, \
-	ldr	data2, [\B], #4
+	ldr	data2, [src2], #4
 	bhi	.Loverlap1		/* C=1, Z=0 => src2[1:0] = 0b11.  */
 	bcs	.Loverlap2		/* C=1, Z=1 => src2[1:0] = 0b10.  */
 
@@ -409,13 +387,11 @@ ENTRY (strcmp)
 	sel	syndrome, syndrome, const_m1
 	bne	4f
 	cbnz	syndrome, 5f
-	sfi_breg src2, \
-	ldr	data2, [\B], #4
+	ldr	data2, [src2], #4
 	eor	tmp1, tmp1, data1
 	cmp	tmp1, data2, S2HI #24
 	bne	6f
-	sfi_breg src1, \
-	ldr	data1, [\B], #4
+	ldr	data1, [src1], #4
 	b	.Loverlap3
 4:
 	S2LO	data2, data2, #8
@@ -427,8 +403,7 @@ ENTRY (strcmp)
 
 	/* We can only get here if the MSB of data1 contains 0, so
 	   fast-path the exit.  */
-	sfi_breg src2, \
-	ldrb	result, [\B]
+	ldrb	result, [src2]
 	ldrd	r4, r5, [sp], #16
 	cfi_remember_state
 	cfi_def_cfa_offset (0)
@@ -454,13 +429,11 @@ ENTRY (strcmp)
 	sel	syndrome, syndrome, const_m1
 	bne	4f
 	cbnz	syndrome, 5f
-	sfi_breg src2, \
-	ldr	data2, [\B], #4
+	ldr	data2, [src2], #4
 	eor	tmp1, tmp1, data1
 	cmp	tmp1, data2, S2HI #16
 	bne	6f
-	sfi_breg src1, \
-	ldr	data1, [\B], #4
+	ldr	data1, [src1], #4
 	b	.Loverlap2
 4:
 	S2LO	data2, data2, #16
@@ -469,8 +442,7 @@ ENTRY (strcmp)
 	ands	syndrome, syndrome, const_m1, S2LO #16
 	bne	.Lstrcmp_done_equal
 
-	sfi_breg src2, \
-	ldrh	data2, [\B]
+	ldrh	data2, [src2]
 	S2LO	data1, data1, #16
 #ifdef __ARM_BIG_ENDIAN
 	lsl	data2, data2, #16
@@ -490,13 +462,11 @@ ENTRY (strcmp)
 	sel	syndrome, syndrome, const_m1
 	bne	4f
 	cbnz	syndrome, 5f
-	sfi_breg src2, \
-	ldr	data2, [\B], #4
+	ldr	data2, [src2], #4
 	eor	tmp1, tmp1, data1
 	cmp	tmp1, data2, S2HI #8
 	bne	6f
-	sfi_breg src1, \
-	ldr	data1, [\B], #4
+	ldr	data1, [src1], #4
 	b	.Loverlap1
 4:
 	S2LO	data2, data2, #24
@@ -504,8 +474,7 @@ ENTRY (strcmp)
 5:
 	tst	syndrome, #LSB
 	bne	.Lstrcmp_done_equal
-	sfi_breg src2, \
-	ldr	data2, [\B]
+	ldr	data2, [src2]
 6:
 	S2LO	data1, data1, #8
 	bic	data2, data2, #MSB