From a7ac752299cb61f3140ab76c6f0a4ed46f52df55 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Fri, 15 Mar 2013 09:31:56 -0700 Subject: ARM: sfi_breg assembler macro --- ports/sysdeps/arm/__longjmp.S | 30 ++++++--- ports/sysdeps/arm/add_n.S | 27 +++++--- ports/sysdeps/arm/addmul_1.S | 18 ++++-- ports/sysdeps/arm/arm-mcount.S | 3 +- ports/sysdeps/arm/armv6/rawmemchr.S | 18 ++++-- ports/sysdeps/arm/armv6/strchr.S | 18 ++++-- ports/sysdeps/arm/armv6/strcpy.S | 55 ++++++++++------ ports/sysdeps/arm/armv6/strlen.S | 18 ++++-- ports/sysdeps/arm/armv6/strrchr.S | 6 +- ports/sysdeps/arm/armv6t2/memchr.S | 9 ++- ports/sysdeps/arm/memcpy.S | 124 +++++++++++++++++++++++------------- ports/sysdeps/arm/memmove.S | 124 +++++++++++++++++++++++------------- ports/sysdeps/arm/memset.S | 27 +++++--- ports/sysdeps/arm/setjmp.S | 27 +++++--- ports/sysdeps/arm/strlen.S | 6 +- ports/sysdeps/arm/submul_1.S | 18 ++++-- ports/sysdeps/arm/sysdep.h | 42 ++++++++++++ 17 files changed, 381 insertions(+), 189 deletions(-) (limited to 'ports/sysdeps') diff --git a/ports/sysdeps/arm/__longjmp.S b/ports/sysdeps/arm/__longjmp.S index becba930a0..2936a441d0 100644 --- a/ports/sysdeps/arm/__longjmp.S +++ b/ports/sysdeps/arm/__longjmp.S @@ -31,11 +31,13 @@ ENTRY (__longjmp) moveq r0, #1 /* can't let setjmp() return zero! */ #ifdef CHECK_SP - ldr r4, [ip, #32] /* jmpbuf's sp */ + sfi_breg ip, \ + ldr r4, [\B, #32] /* jmpbuf's sp */ cfi_undefined (r4) CHECK_SP (r4) #endif - ldmia ip!, JMP_BUF_REGLIST + sfi_breg ip, \ + ldmia \B!, JMP_BUF_REGLIST cfi_restore (v1) cfi_restore (v2) cfi_restore (v3) @@ -79,9 +81,11 @@ ENTRY (__longjmp) /* Restore the VFP registers. */ /* Following instruction is vldmia ip!, {d8-d15}. */ - ldc p11, cr8, [r12], #64 + sfi_breg r12, \ + ldc p11, cr8, [\B], #64 /* Restore the floating-point status register. */ - ldr a3, [ip], #4 + sfi_breg ip, \ + ldr a3, [\B], #4 /* Following instruction is fmxr fpscr, a3. */ mcr p10, 7, a3, cr1, cr0, 0 .Lno_vfp: @@ -92,12 +96,18 @@ ENTRY (__longjmp) /* Restore the call-preserved iWMMXt registers. */ /* Following instructions are wldrd wr10, [ip], #8 (etc.) */ - ldcl p1, cr10, [r12], #8 - ldcl p1, cr11, [r12], #8 - ldcl p1, cr12, [r12], #8 - ldcl p1, cr13, [r12], #8 - ldcl p1, cr14, [r12], #8 - ldcl p1, cr15, [r12], #8 + sfi_breg r12, \ + ldcl p1, cr10, [\B], #8 + sfi_breg r12, \ + ldcl p1, cr11, [\B], #8 + sfi_breg r12, \ + ldcl p1, cr12, [\B], #8 + sfi_breg r12, \ + ldcl p1, cr13, [\B], #8 + sfi_breg r12, \ + ldcl p1, cr14, [\B], #8 + sfi_breg r12, \ + ldcl p1, cr15, [\B], #8 .Lno_iwmmxt: #endif diff --git a/ports/sysdeps/arm/add_n.S b/ports/sysdeps/arm/add_n.S index 52927d997d..c6b0147972 100644 --- a/ports/sysdeps/arm/add_n.S +++ b/ports/sysdeps/arm/add_n.S @@ -52,31 +52,40 @@ ENTRY (FUNC) add lr, r1, r3, lsl #2 /* compute end src1 */ beq 1f - ldr r4, [r1], #4 /* do one to make count even */ - ldr r5, [r2], #4 + sfi_breg r1, \ + ldr r4, [\B], #4 /* do one to make count even */ + sfi_breg r2, \ + ldr r5, [\B], #4 OPC r4, r4, r5 teq r1, lr /* end of count? (preserve carry) */ - str r4, [r0], #4 + sfi_breg r0, \ + str r4, [\B], #4 beq 9f 1: tst r3, #2 /* count & 2 == 2? */ beq 2f - ldm r1!, { r4, r5 } /* do two to make count 0 mod 4 */ - ldm r2!, { r6, r7 } + sfi_breg r1, \ + ldm \B!, { r4, r5 } /* do two to make count 0 mod 4 */ + sfi_breg r2, \ + ldm \B!, { r6, r7 } OPC r4, r4, r6 OPC r5, r5, r7 teq r1, lr /* end of count? */ - stm r0!, { r4, r5 } + sfi_breg r0, \ + stm \B!, { r4, r5 } beq 9f 2: - ldm r1!, { r3, r5, r7, r10 } /* do four each loop */ - ldm r2!, { r4, r6, r8, ip } + sfi_breg r1, \ + ldm \B!, { r3, r5, r7, r10 } /* do four each loop */ + sfi_breg r2, \ + ldm \B!, { r4, r6, r8, ip } OPC r3, r3, r4 OPC r5, r5, r6 OPC r7, r7, r8 OPC r10, r10, ip teq r1, lr - stm r0!, { r3, r5, r7, r10 } + sfi_breg r0, \ + stm \B!, { r3, r5, r7, r10 } bne 2b 9: diff --git a/ports/sysdeps/arm/addmul_1.S b/ports/sysdeps/arm/addmul_1.S index 09153d2bf2..d204c887e3 100644 --- a/ports/sysdeps/arm/addmul_1.S +++ b/ports/sysdeps/arm/addmul_1.S @@ -37,16 +37,21 @@ ENTRY (__mpn_addmul_1) cfi_rel_offset (r6, 8) cfi_rel_offset (r7, 12) - ldr r6, [r1], #4 - ldr r5, [r0] + sfi_breg r1, \ + ldr r6, [\B], #4 + sfi_breg r0, \ + ldr r5, [\B] mov r4, #0 /* init carry in */ b 1f 0: - ldr r6, [r1], #4 /* load next ul */ + sfi_breg r1, \ + ldr r6, [\B], #4 /* load next ul */ adds r7, r4, r5 /* (out, c) = cl + lpl */ - ldr r5, [r0, #4] /* load next rl */ + sfi_breg r0, \ + ldr r5, [\B, #4] /* load next rl */ adc r4, ip, #0 /* cl = hpl + c */ - str r7, [r0], #4 + sfi_breg r0, \ + str r7, [\B], #4 1: mov ip, #0 /* zero-extend rl */ umlal r5, ip, r6, r3 /* (hpl, lpl) = ul * vl + rl */ @@ -54,7 +59,8 @@ ENTRY (__mpn_addmul_1) bne 0b adds r4, r4, r5 /* (out, c) = cl + llpl */ - str r4, [r0] + sfi_breg r0, \ + str r4, [\B] adc r0, ip, #0 /* return hpl + c */ pop { r4, r5, r6, r7 } diff --git a/ports/sysdeps/arm/arm-mcount.S b/ports/sysdeps/arm/arm-mcount.S index 8ad077925b..f61e978f5e 100644 --- a/ports/sysdeps/arm/arm-mcount.S +++ b/ports/sysdeps/arm/arm-mcount.S @@ -79,7 +79,8 @@ ENTRY(_mcount) cfi_rel_offset (lr, 20) movs r0, fp ittt ne - ldrne r0, [r0, #-4] + sfi_breg r0, \ + ldrne r0, [\B, #-4] movsne r1, lr blne __mcount_internal #if defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__) diff --git a/ports/sysdeps/arm/armv6/rawmemchr.S b/ports/sysdeps/arm/armv6/rawmemchr.S index 7877bcf6d6..b5e4a16f03 100644 --- a/ports/sysdeps/arm/armv6/rawmemchr.S +++ b/ports/sysdeps/arm/armv6/rawmemchr.S @@ -25,7 +25,8 @@ ENTRY (__rawmemchr) @ r0 = start of string @ r1 = character to match @ returns a pointer to the match, which must be present. - ldrb r2, [r0] @ load first byte asap + sfi_breg r0, \ + ldrb r2, [\B] @ load first byte asap @ To cater to long strings, we want to search through a few @ characters until we reach an aligned pointer. To cater to @@ -41,7 +42,8 @@ ENTRY (__rawmemchr) bxeq lr @ Loop until we find ... -1: ldrb r2, [r0, #1]! +1: sfi_breg r0, \ + ldrb r2, [\B, #1]! subs r3, r3, #1 @ ... the alignment point it ne cmpne r2, r1 @ ... or C @@ -54,15 +56,16 @@ ENTRY (__rawmemchr) add r0, r0, #1 @ So now we're aligned. - ldrd r2, r3, [r0], #8 + sfi_breg r0, \ + ldrd r2, r3, [\B], #8 orr r1, r1, r1, lsl #8 @ Replicate C to all bytes #ifdef ARCH_HAS_T2 movw ip, #0x0101 - pld [r0, #64] + sfi_pld r0, #64 movt ip, #0x0101 #else ldr ip, =0x01010101 - pld [r0, #64] + sfi_pld r0, #64 #endif orr r1, r1, r1, lsl #16 @@ -74,10 +77,11 @@ ENTRY (__rawmemchr) eor r3, r3, r1 uqsub8 r2, ip, r2 @ Find C uqsub8 r3, ip, r3 - pld [r0, #128] + sfi_pld r0, #128 orrs r3, r3, r2 @ Test both words for found it eq - ldrdeq r2, r3, [r0], #8 + sfi_breg r0, \ + ldrdeq r2, r3, [\B], #8 beq 2b @ Found something. Disambiguate between first and second words. diff --git a/ports/sysdeps/arm/armv6/strchr.S b/ports/sysdeps/arm/armv6/strchr.S index c856283d53..936c2be666 100644 --- a/ports/sysdeps/arm/armv6/strchr.S +++ b/ports/sysdeps/arm/armv6/strchr.S @@ -25,7 +25,8 @@ ENTRY (strchr) @ r0 = start of string @ r1 = character to match @ returns NULL for no match, or a pointer to the match - ldrb r2, [r0] @ load the first byte asap + sfi_breg r0, \ + ldrb r2, [\B] @ load the first byte asap uxtb r1, r1 @ To cater to long strings, we want to search through a few @@ -42,7 +43,8 @@ ENTRY (strchr) beq 99f @ Loop until we find ... -1: ldrb r2, [r0, #1]! +1: sfi_breg r0, \ + ldrb r2, [\B, #1]! subs r3, r3, #1 @ ... the aligment point it ne cmpne r2, r1 @ ... or the character @@ -65,15 +67,16 @@ ENTRY (strchr) cfi_rel_offset (r6, 8) cfi_rel_offset (r7, 12) - ldrd r2, r3, [r0], #8 + sfi_breg r0, \ + ldrd r2, r3, [\B], #8 orr r1, r1, r1, lsl #8 @ Replicate C to all bytes #ifdef ARCH_HAS_T2 movw ip, #0x0101 - pld [r0, #64] + sfi_pld r0, #64 movt ip, #0x0101 #else ldr ip, =0x01010101 - pld [r0, #64] + sfi_pld r0, #64 #endif orr r1, r1, r1, lsl #16 @@ -87,13 +90,14 @@ ENTRY (strchr) uqsub8 r5, ip, r3 eor r7, r3, r1 uqsub8 r6, ip, r6 @ Find C - pld [r0, #128] @ Prefetch 2 lines ahead + sfi_pld r0, #128 @ Prefetch 2 lines ahead uqsub8 r7, ip, r7 orr r4, r4, r6 @ Combine found for EOS and C orr r5, r5, r7 orrs r6, r4, r5 @ Combine the two words it eq - ldrdeq r2, r3, [r0], #8 + sfi_breg r0, \ + ldrdeq r2, r3, [\B], #8 beq 2b @ Found something. Disambiguate between first and second words. diff --git a/ports/sysdeps/arm/armv6/strcpy.S b/ports/sysdeps/arm/armv6/strcpy.S index 41f6443319..cd13ff7ecf 100644 --- a/ports/sysdeps/arm/armv6/strcpy.S +++ b/ports/sysdeps/arm/armv6/strcpy.S @@ -44,8 +44,8 @@ ENTRY (strcpy) @ Signal strcpy with DEST in IP. mov ip, r0 0: - pld [r0] - pld [r1] + sfi_pld r0 + sfi_pld r1 @ To cater to long strings, we want 8 byte alignment in the source. @ To cater to small strings, we don't want to start that right away. @@ -54,9 +54,11 @@ ENTRY (strcpy) rsb r3, r3, #16 @ Loop until we find ... -1: ldrb r2, [r1], #1 +1: sfi_breg r1, \ + ldrb r2, [\B], #1 subs r3, r3, #1 @ ... the alignment point - strb r2, [r0], #1 + sfi_breg r0, \ + strb r2, [\B], #1 it ne cmpne r2, #0 @ ... or EOS bne 1b @@ -66,9 +68,10 @@ ENTRY (strcpy) beq .Lreturn @ Load the next two words asap - ldrd r2, r3, [r1], #8 - pld [r0, #64] - pld [r1, #64] + sfi_breg r1, \ + ldrd r2, r3, [\B], #8 + sfi_pld r0, #64 + sfi_pld r1, #64 @ For longer strings, we actaully need a stack frame. push { r4, r5, r6, r7 } @@ -96,15 +99,18 @@ ENTRY (strcpy) .balign 16 2: uqsub8 r4, r7, r2 @ Find EOS uqsub8 r5, r7, r3 - pld [r1, #128] + sfi_pld r1, #128 cmp r4, #0 @ EOS in first word? - pld [r0, #128] + sfi_pld r0, #128 bne 3f - str r2, [r0], #4 + sfi_breg r0, \ + str r2, [\B], #4 cmp r5, #0 @ EOS in second word? bne 4f - str r3, [r0], #4 - ldrd r2, r3, [r1], #8 + sfi_breg r0, \ + str r3, [\B], #4 + sfi_breg r1, \ + ldrd r2, r3, [\B], #8 b 2b 3: sub r1, r1, #4 @ backup to first word @@ -114,9 +120,11 @@ ENTRY (strcpy) @ Note that we generally back up and re-read source bytes, @ but we'll not re-write dest bytes. .Lbyte_loop: - ldrb r2, [r1], #1 + sfi_breg r1, \ + ldrb r2, [\B], #1 cmp r2, #0 - strb r2, [r0], #1 + sfi_breg r0, \ + strb r2, [\B], #1 bne .Lbyte_loop pop { r4, r5, r6, r7 } @@ -161,7 +169,8 @@ ENTRY (strcpy) @ Store a few bytes from the first word. @ At the same time we align r0 and shift out bytes from r2. .rept 4-\unalign - strb r2, [r0], #1 + sfi_breg r0, \ + strb r2, [\B], #1 lsr r2, r2, #8 .endr #ifdef __ARMEB__ @@ -176,20 +185,23 @@ ENTRY (strcpy) orr r2, r2, r3, lsh_gt #(\unalign*8) @ Save leftover bytes from the two words lsh_ls r6, r3, #((4-\unalign)*8) - str r2, [r0], #4 + sfi_breg r0, \ + str r2, [\B], #4 @ The "real" start of the unaligned copy loop. - ldrd r2, r3, [r1], #8 @ Load 8 more bytes + sfi_breg r1, \ + ldrd r2, r3, [\B], #8 @ Load 8 more bytes uqsub8 r4, r7, r2 @ Find EOS - pld [r1, #128] + sfi_pld r1, #128 uqsub8 r5, r7, r3 - pld [r0, #128] + sfi_pld r0, #128 mvns r4, r4 @ EOS in first word? bne 3f @ Combine the leftover and the first word orr r6, r6, r2, lsh_gt #(\unalign*8) @ Discard used bytes from the first word. lsh_ls r2, r2, #((4-\unalign)*8) - str r6, [r0], #4 + sfi_breg r0, \ + str r6, [\B], #4 b 1b @ Found EOS in one of the words; adjust backward 3: sub r1, r1, #4 @@ -200,7 +212,8 @@ ENTRY (strcpy) rev r2, r2 #endif .rept \unalign - strb r2, [r0], #1 + sfi_breg r0, \ + strb r2, [\B], #1 lsr r2, r2, #8 .endr b .Lbyte_loop diff --git a/ports/sysdeps/arm/armv6/strlen.S b/ports/sysdeps/arm/armv6/strlen.S index a53d41418a..59ff6b5d93 100644 --- a/ports/sysdeps/arm/armv6/strlen.S +++ b/ports/sysdeps/arm/armv6/strlen.S @@ -23,7 +23,8 @@ ENTRY (strlen) @ r0 = start of string - ldrb r2, [r0] @ load the first byte asap + sfi_breg r0, \ + ldrb r2, [\B] @ load the first byte asap @ To cater to long strings, we want to search through a few @ characters until we reach an aligned pointer. To cater to @@ -38,7 +39,8 @@ ENTRY (strlen) beq 99f @ Loop until we find ... -1: ldrb r2, [r0, #1]! +1: sfi_breg r0, \ + ldrb r2, [\B, #1]! subs r3, r3, #1 @ ... the aligment point it ne cmpne r2, #0 @ ... or EOS @@ -50,14 +52,15 @@ ENTRY (strlen) add r0, r0, #1 @ So now we're aligned. - ldrd r2, r3, [r0], #8 + sfi_breg r0, \ + ldrd r2, r3, [\B], #8 #ifdef ARCH_HAS_T2 movw ip, #0x0101 - pld [r0, #64] + sfi_pld r0, #64 movt ip, #0x0101 #else ldr ip, =0x01010101 - pld [r0, #64] + sfi_pld r0, #64 #endif @ Loop searching for EOS, 8 bytes at a time. @@ -67,10 +70,11 @@ ENTRY (strlen) .balign 16 2: uqsub8 r2, ip, r2 @ Find EOS uqsub8 r3, ip, r3 - pld [r0, #128] @ Prefetch 2 lines ahead + sfi_pld r0, #128 @ Prefetch 2 lines ahead orrs r3, r3, r2 @ Combine the two words it eq - ldrdeq r2, r3, [r0], #8 + sfi_breg r0, \ + ldrdeq r2, r3, [\B], #8 beq 2b @ Found something. Disambiguate between first and second words. diff --git a/ports/sysdeps/arm/armv6/strrchr.S b/ports/sysdeps/arm/armv6/strrchr.S index ddd4f7fc06..e40df90a72 100644 --- a/ports/sysdeps/arm/armv6/strrchr.S +++ b/ports/sysdeps/arm/armv6/strrchr.S @@ -33,7 +33,8 @@ ENTRY (strrchr) @ Loop a few times until we're aligned. tst r3, #7 beq 2f -1: ldrb r2, [r3], #1 +1: sfi_breg r3, \ + ldrb r2, [\B], #1 cmp r2, r1 @ Find the character it eq subeq r0, r3, #1 @@ -64,7 +65,8 @@ ENTRY (strrchr) @ Loop searching for EOS and C, 8 bytes at a time. @ Any time we find a match in a word, we copy the address of @ the word to r0, and the found bits to r2. -3: ldrd r4, r5, [r3], #8 +3: sfi_breg r3, \ + ldrd r4, r5, [\B], #8 @ Subtracting (unsigned saturating) from 1 means result of 1 for @ any byte that was originally zero and 0 otherwise. Therefore @ we consider the lsb of each byte the "found" bit. diff --git a/ports/sysdeps/arm/armv6t2/memchr.S b/ports/sysdeps/arm/armv6t2/memchr.S index 7f644c3b85..f758971438 100644 --- a/ports/sysdeps/arm/armv6t2/memchr.S +++ b/ports/sysdeps/arm/armv6t2/memchr.S @@ -65,7 +65,8 @@ ENTRY(memchr) @ Work up to an aligned point 5: - ldrb r3, [r0],#1 + sfi_breg r0, \ + ldrb r3, [\B],#1 subs r2, r2, #1 cmp r3, r1 beq 50f @ If it matches exit found @@ -90,7 +91,8 @@ ENTRY(memchr) movs r3, #0 15: - ldrd r4,r5, [r0],#8 + sfi_breg r0, \ + ldrd r4,r5, [\B],#8 #ifndef NO_THUMB subs r6, r6, #8 #endif @@ -128,7 +130,8 @@ ENTRY(memchr) #endif 21: @ Post aligned section, or just a short call - ldrb r3,[r0],#1 + sfi_breg r0, \ + ldrb r3,[\B],#1 #ifndef NO_THUMB subs r2,r2,#1 eor r3,r3,r1 @ r3 = 0 if match - doesn't break flags from sub diff --git a/ports/sysdeps/arm/memcpy.S b/ports/sysdeps/arm/memcpy.S index 926581434e..add82e2d18 100644 --- a/ports/sysdeps/arm/memcpy.S +++ b/ports/sysdeps/arm/memcpy.S @@ -70,7 +70,7 @@ ENTRY(memcpy) subs r2, r2, #4 blt 8f ands ip, r0, #3 - PLD( pld [r1, #0] ) + PLD( sfi_pld r1, #0 ) bne 9f ands ip, r1, #3 bne 10f @@ -97,17 +97,19 @@ ENTRY(memcpy) CALGN( bx r4 ) #endif - PLD( pld [r1, #0] ) + PLD( sfi_pld r1, #0 ) 2: PLD( subs r2, r2, #96 ) - PLD( pld [r1, #28] ) + PLD( sfi_pld r1, #28 ) PLD( blt 4f ) - PLD( pld [r1, #60] ) - PLD( pld [r1, #92] ) + PLD( sfi_pld r1, #60 ) + PLD( sfi_pld r1, #92 ) -3: PLD( pld [r1, #124] ) -4: ldmia r1!, {r3, r4, r5, r6, r7, r8, ip, lr} +3: PLD( sfi_pld r1, #124 ) +4: sfi_breg r1, \ + ldmia \B!, {r3, r4, r5, r6, r7, r8, ip, lr} subs r2, r2, #32 - stmia r0!, {r3, r4, r5, r6, r7, r8, ip, lr} + sfi_breg r0, \ + stmia \B!, {r3, r4, r5, r6, r7, r8, ip, lr} bge 3b PLD( cmn r2, #96 ) PLD( bge 4b ) @@ -129,19 +131,26 @@ ENTRY(memcpy) .p2align ARM_BX_ALIGN_LOG2 6: nop .p2align ARM_BX_ALIGN_LOG2 - ldr r3, [r1], #4 + sfi_breg r1, \ + ldr r3, [\B], #4 .p2align ARM_BX_ALIGN_LOG2 - ldr r4, [r1], #4 + sfi_breg r1, \ + ldr r4, [\B], #4 .p2align ARM_BX_ALIGN_LOG2 - ldr r5, [r1], #4 + sfi_breg r1, \ + ldr r5, [\B], #4 .p2align ARM_BX_ALIGN_LOG2 - ldr r6, [r1], #4 + sfi_breg r1, \ + ldr r6, [\B], #4 .p2align ARM_BX_ALIGN_LOG2 - ldr r7, [r1], #4 + sfi_breg r1, \ + ldr r7, [\B], #4 .p2align ARM_BX_ALIGN_LOG2 - ldr r8, [r1], #4 + sfi_breg r1, \ + ldr r8, [\B], #4 .p2align ARM_BX_ALIGN_LOG2 - ldr lr, [r1], #4 + sfi_breg r1, \ + ldr lr, [\B], #4 #ifndef ARM_ALWAYS_BX add pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2) @@ -153,19 +162,26 @@ ENTRY(memcpy) .p2align ARM_BX_ALIGN_LOG2 nop .p2align ARM_BX_ALIGN_LOG2 - str r3, [r0], #4 + sfi_breg r0, \ + str r3, [\B], #4 .p2align ARM_BX_ALIGN_LOG2 - str r4, [r0], #4 + sfi_breg r0, \ + str r4, [\B], #4 .p2align ARM_BX_ALIGN_LOG2 - str r5, [r0], #4 + sfi_breg r0, \ + str r5, [\B], #4 .p2align ARM_BX_ALIGN_LOG2 - str r6, [r0], #4 + sfi_breg r0, \ + str r6, [\B], #4 .p2align ARM_BX_ALIGN_LOG2 - str r7, [r0], #4 + sfi_breg r0, \ + str r7, [\B], #4 .p2align ARM_BX_ALIGN_LOG2 - str r8, [r0], #4 + sfi_breg r0, \ + str r8, [\B], #4 .p2align ARM_BX_ALIGN_LOG2 - str lr, [r0], #4 + sfi_breg r0, \ + str lr, [\B], #4 #ifdef ARM_ALWAYS_BX pop {r10} @@ -183,12 +199,18 @@ ENTRY(memcpy) cfi_restore (r8) 8: movs r2, r2, lsl #31 - ldrbne r3, [r1], #1 - ldrbcs r4, [r1], #1 - ldrbcs ip, [r1] - strbne r3, [r0], #1 - strbcs r4, [r0], #1 - strbcs ip, [r0] + sfi_breg r1, \ + ldrbne r3, [\B], #1 + sfi_breg r1, \ + ldrbcs r4, [\B], #1 + sfi_breg r1, \ + ldrbcs ip, [\B] + sfi_breg r0, \ + strbne r3, [\B], #1 + sfi_breg r0, \ + strbcs r4, [\B], #1 + sfi_breg r0, \ + strbcs ip, [\B] #if ((defined (__ARM_ARCH_4T__) && defined(__THUMB_INTERWORK__)) \ || defined (ARM_ALWAYS_BX)) @@ -205,20 +227,27 @@ ENTRY(memcpy) 9: rsb ip, ip, #4 cmp ip, #2 - ldrbgt r3, [r1], #1 - ldrbge r4, [r1], #1 - ldrb lr, [r1], #1 - strbgt r3, [r0], #1 - strbge r4, [r0], #1 + sfi_breg r1, \ + ldrbgt r3, [\B], #1 + sfi_breg r1, \ + ldrbge r4, [\B], #1 + sfi_breg r1, \ + ldrb lr, [\B], #1 + sfi_breg r0, \ + strbgt r3, [\B], #1 + sfi_breg r0, \ + strbge r4, [\B], #1 subs r2, r2, ip - strb lr, [r0], #1 + sfi_breg r0, \ + strb lr, [\B], #1 blt 8b ands ip, r1, #3 beq 1b 10: bic r1, r1, #3 cmp ip, #2 - ldr lr, [r1], #4 + sfi_breg r1, \ + ldr lr, [\B], #4 beq 17f bgt 18f @@ -242,18 +271,20 @@ ENTRY(memcpy) cfi_rel_offset (r8, 12) cfi_rel_offset (r10, 16) - PLD( pld [r1, #0] ) + PLD( sfi_pld r1, #0 ) PLD( subs r2, r2, #96 ) - PLD( pld [r1, #28] ) + PLD( sfi_pld r1, #28 ) PLD( blt 13f ) - PLD( pld [r1, #60] ) - PLD( pld [r1, #92] ) + PLD( sfi_pld r1, #60 ) + PLD( sfi_pld r1, #92 ) -12: PLD( pld [r1, #124] ) -13: ldmia r1!, {r4, r5, r6, r7} +12: PLD( sfi_pld r1, #124 ) +13: sfi_breg r1, \ + ldmia \B!, {r4, r5, r6, r7} mov r3, lr, PULL #\pull subs r2, r2, #32 - ldmia r1!, {r8, r10, ip, lr} + sfi_breg r1, \ + ldmia \B!, {r8, r10, ip, lr} orr r3, r3, r4, PUSH #\push mov r4, r4, PULL #\pull orr r4, r4, r5, PUSH #\push @@ -269,7 +300,8 @@ ENTRY(memcpy) orr r10, r10, ip, PUSH #\push mov ip, ip, PULL #\pull orr ip, ip, lr, PUSH #\push - stmia r0!, {r3, r4, r5, r6, r7, r8, r10, ip} + sfi_breg r0, \ + stmia \B!, {r3, r4, r5, r6, r7, r8, r10, ip} bge 12b PLD( cmn r2, #96 ) PLD( bge 13b ) @@ -286,10 +318,12 @@ ENTRY(memcpy) beq 16f 15: mov r3, lr, PULL #\pull - ldr lr, [r1], #4 + sfi_breg r1, \ + ldr lr, [\B], #4 subs ip, ip, #4 orr r3, r3, lr, PUSH #\push - str r3, [r0], #4 + sfi_breg r0, \ + str r3, [\B], #4 bgt 15b CALGN( cmp r2, #0 ) CALGN( bge 11b ) diff --git a/ports/sysdeps/arm/memmove.S b/ports/sysdeps/arm/memmove.S index 96b27cfefc..9d4d5b099e 100644 --- a/ports/sysdeps/arm/memmove.S +++ b/ports/sysdeps/arm/memmove.S @@ -87,7 +87,7 @@ ENTRY(memmove) subs r2, r2, #4 blt 8f ands ip, r0, #3 - PLD( pld [r1, #-4] ) + PLD( sfi_pld r1, #-4 ) bne 9f ands ip, r1, #3 bne 10f @@ -113,17 +113,19 @@ ENTRY(memmove) CALGN( bx r4 ) #endif - PLD( pld [r1, #-4] ) + PLD( sfi_pld r1, #-4 ) 2: PLD( subs r2, r2, #96 ) - PLD( pld [r1, #-32] ) + PLD( sfi_pld r1, #-32 ) PLD( blt 4f ) - PLD( pld [r1, #-64] ) - PLD( pld [r1, #-96] ) + PLD( sfi_pld r1, #-64 ) + PLD( sfi_pld r1, #-96 ) -3: PLD( pld [r1, #-128] ) -4: ldmdb r1!, {r3, r4, r5, r6, r7, r8, ip, lr} +3: PLD( sfi_pld r1, #-128 ) +4: sfi_breg r1, \ + ldmdb \B!, {r3, r4, r5, r6, r7, r8, ip, lr} subs r2, r2, #32 - stmdb r0!, {r3, r4, r5, r6, r7, r8, ip, lr} + sfi_breg r0, \ + stmdb \B!, {r3, r4, r5, r6, r7, r8, ip, lr} bge 3b PLD( cmn r2, #96 ) PLD( bge 4b ) @@ -145,19 +147,26 @@ ENTRY(memmove) .p2align ARM_BX_ALIGN_LOG2 6: nop .p2align ARM_BX_ALIGN_LOG2 - ldr r3, [r1, #-4]! + sfi_breg r1, \ + ldr r3, [\B, #-4]! .p2align ARM_BX_ALIGN_LOG2 - ldr r4, [r1, #-4]! + sfi_breg r1, \ + ldr r4, [\B, #-4]! .p2align ARM_BX_ALIGN_LOG2 - ldr r5, [r1, #-4]! + sfi_breg r1, \ + ldr r5, [\B, #-4]! .p2align ARM_BX_ALIGN_LOG2 - ldr r6, [r1, #-4]! + sfi_breg r1, \ + ldr r6, [\B, #-4]! .p2align ARM_BX_ALIGN_LOG2 - ldr r7, [r1, #-4]! + sfi_breg r1, \ + ldr r7, [\B, #-4]! .p2align ARM_BX_ALIGN_LOG2 - ldr r8, [r1, #-4]! + sfi_breg r1, \ + ldr r8, [\B, #-4]! .p2align ARM_BX_ALIGN_LOG2 - ldr lr, [r1, #-4]! + sfi_breg r1, \ + ldr lr, [\B, #-4]! #ifndef ARM_ALWAYS_BX add pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2) @@ -169,19 +178,26 @@ ENTRY(memmove) .p2align ARM_BX_ALIGN_LOG2 nop .p2align ARM_BX_ALIGN_LOG2 - str r3, [r0, #-4]! + sfi_breg r0, \ + str r3, [\B, #-4]! .p2align ARM_BX_ALIGN_LOG2 - str r4, [r0, #-4]! + sfi_breg r0, \ + str r4, [\B, #-4]! .p2align ARM_BX_ALIGN_LOG2 - str r5, [r0, #-4]! + sfi_breg r0, \ + str r5, [\B, #-4]! .p2align ARM_BX_ALIGN_LOG2 - str r6, [r0, #-4]! + sfi_breg r0, \ + str r6, [\B, #-4]! .p2align ARM_BX_ALIGN_LOG2 - str r7, [r0, #-4]! + sfi_breg r0, \ + str r7, [\B, #-4]! .p2align ARM_BX_ALIGN_LOG2 - str r8, [r0, #-4]! + sfi_breg r0, \ + str r8, [\B, #-4]! .p2align ARM_BX_ALIGN_LOG2 - str lr, [r0, #-4]! + sfi_breg r0, \ + str lr, [\B, #-4]! #ifdef ARM_ALWAYS_BX pop {r10} @@ -199,12 +215,18 @@ ENTRY(memmove) cfi_restore (r8) 8: movs r2, r2, lsl #31 - ldrbne r3, [r1, #-1]! - ldrbcs r4, [r1, #-1]! - ldrbcs ip, [r1, #-1] - strbne r3, [r0, #-1]! - strbcs r4, [r0, #-1]! - strbcs ip, [r0, #-1] + sfi_breg r1, \ + ldrbne r3, [\B, #-1]! + sfi_breg r1, \ + ldrbcs r4, [\B, #-1]! + sfi_breg r1, \ + ldrbcs ip, [\B, #-1] + sfi_breg r0, \ + strbne r3, [\B, #-1]! + sfi_breg r0, \ + strbcs r4, [\B, #-1]! + sfi_breg r0, \ + strbcs ip, [\B, #-1] #if ((defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)) \ || defined (ARM_ALWAYS_BX)) @@ -220,20 +242,27 @@ ENTRY(memmove) cfi_restore_state 9: cmp ip, #2 - ldrbgt r3, [r1, #-1]! - ldrbge r4, [r1, #-1]! - ldrb lr, [r1, #-1]! - strbgt r3, [r0, #-1]! - strbge r4, [r0, #-1]! + sfi_breg r1, \ + ldrbgt r3, [\B, #-1]! + sfi_breg r1, \ + ldrbge r4, [\B, #-1]! + sfi_breg r1, \ + ldrb lr, [\B, #-1]! + sfi_breg r0, \ + strbgt r3, [\B, #-1]! + sfi_breg r0, \ + strbge r4, [\B, #-1]! subs r2, r2, ip - strb lr, [r0, #-1]! + sfi_breg r0, \ + strb lr, [\B, #-1]! blt 8b ands ip, r1, #3 beq 1b 10: bic r1, r1, #3 cmp ip, #2 - ldr r3, [r1, #0] + sfi_breg r1, \ + ldr r3, [\B, #0] beq 17f blt 18f @@ -257,18 +286,20 @@ ENTRY(memmove) cfi_rel_offset (r8, 12) cfi_rel_offset (r10, 16) - PLD( pld [r1, #-4] ) + PLD( sfi_pld r1, #-4 ) PLD( subs r2, r2, #96 ) - PLD( pld [r1, #-32] ) + PLD( sfi_pld r1, #-32 ) PLD( blt 13f ) - PLD( pld [r1, #-64] ) - PLD( pld [r1, #-96] ) + PLD( sfi_pld r1, #-64 ) + PLD( sfi_pld r1, #-96 ) -12: PLD( pld [r1, #-128] ) -13: ldmdb r1!, {r7, r8, r10, ip} +12: PLD( sfi_pld r1, #-128 ) +13: sfi_breg r1, \ + ldmdb \B!, {r7, r8, r10, ip} mov lr, r3, PUSH #\push subs r2, r2, #32 - ldmdb r1!, {r3, r4, r5, r6} + sfi_breg r1, \ + ldmdb \B!, {r3, r4, r5, r6} orr lr, lr, ip, PULL #\pull mov ip, ip, PUSH #\push orr ip, ip, r10, PULL #\pull @@ -284,7 +315,8 @@ ENTRY(memmove) orr r5, r5, r4, PULL #\pull mov r4, r4, PUSH #\push orr r4, r4, r3, PULL #\pull - stmdb r0!, {r4 - r8, r10, ip, lr} + sfi_breg r0, \ + stmdb \B!, {r4 - r8, r10, ip, lr} bge 12b PLD( cmn r2, #96 ) PLD( bge 13b ) @@ -301,10 +333,12 @@ ENTRY(memmove) beq 16f 15: mov lr, r3, PUSH #\push - ldr r3, [r1, #-4]! + sfi_breg r1, \ + ldr r3, [\B, #-4]! subs ip, ip, #4 orr lr, lr, r3, PULL #\pull - str lr, [r0, #-4]! + sfi_breg r0, \ + str lr, [\B, #-4]! bgt 15b CALGN( cmp r2, #0 ) CALGN( bge 11b ) diff --git a/ports/sysdeps/arm/memset.S b/ports/sysdeps/arm/memset.S index 5e055adea8..a28cdc6205 100644 --- a/ports/sysdeps/arm/memset.S +++ b/ports/sysdeps/arm/memset.S @@ -32,7 +32,8 @@ ENTRY(memset) 1: tst r3, #3 @ aligned yet? - strbne r1, [r3], #1 + sfi_breg r3, \ + strbne r1, [\B], #1 subne r2, r2, #1 bne 1b @@ -43,25 +44,33 @@ ENTRY(memset) 1: subs r2, r2, #8 - stmiacs r3!, {r1, ip} @ store up to 32 bytes per loop iteration + sfi_breg r3, \ + stmiacs \B!, {r1, ip} @ store up to 32 bytes per loop iteration subscs r2, r2, #8 - stmiacs r3!, {r1, ip} + sfi_breg r3, \ + stmiacs \B!, {r1, ip} subscs r2, r2, #8 - stmiacs r3!, {r1, ip} + sfi_breg r3, \ + stmiacs \B!, {r1, ip} subscs r2, r2, #8 - stmiacs r3!, {r1, ip} + sfi_breg r3, \ + stmiacs \B!, {r1, ip} bcs 1b and r2, r2, #7 2: subs r2, r2, #1 @ store up to 4 bytes per loop iteration - strbcs r1, [r3], #1 + sfi_breg r3, \ + strbcs r1, [\B], #1 subscs r2, r2, #1 - strbcs r1, [r3], #1 + sfi_breg r3, \ + strbcs r1, [\B], #1 subscs r2, r2, #1 - strbcs r1, [r3], #1 + sfi_breg r3, \ + strbcs r1, [\B], #1 subscs r2, r2, #1 - strbcs r1, [r3], #1 + sfi_breg r3, \ + strbcs r1, [\B], #1 bcs 2b DO_RET(lr) diff --git a/ports/sysdeps/arm/setjmp.S b/ports/sysdeps/arm/setjmp.S index c41a1baddd..f750470652 100644 --- a/ports/sysdeps/arm/setjmp.S +++ b/ports/sysdeps/arm/setjmp.S @@ -27,7 +27,8 @@ ENTRY (__sigsetjmp) mov ip, r0 /* Save registers */ - stmia ip!, JMP_BUF_REGLIST + sfi_breg ip, \ + stmia \B!, JMP_BUF_REGLIST #if !defined ARM_ASSUME_NO_IWMMXT || defined __SOFTFP__ # define NEED_HWCAP 1 @@ -64,11 +65,13 @@ ENTRY (__sigsetjmp) Don't use VFP instructions directly because this code is used in non-VFP multilibs. */ /* Following instruction is vstmia ip!, {d8-d15}. */ - stc p11, cr8, [ip], #64 + sfi_breg ip, \ + stc p11, cr8, [\B], #64 /* Store the floating-point status register. */ /* Following instruction is vmrs a4, fpscr. */ mrc p10, 7, a4, cr1, cr0, 0 - str a4, [ip], #4 + sfi_breg ip, \ + str a4, [\B], #4 .Lno_vfp: #ifndef ARM_ASSUME_NO_IWMMXT @@ -77,12 +80,18 @@ ENTRY (__sigsetjmp) /* Save the call-preserved iWMMXt registers. */ /* Following instructions are wstrd wr10, [ip], #8 (etc.) */ - stcl p1, cr10, [r12], #8 - stcl p1, cr11, [r12], #8 - stcl p1, cr12, [r12], #8 - stcl p1, cr13, [r12], #8 - stcl p1, cr14, [r12], #8 - stcl p1, cr15, [r12], #8 + sfi_breg r12, \ + stcl p1, cr10, [\B], #8 + sfi_breg r12, \ + stcl p1, cr11, [\B], #8 + sfi_breg r12, \ + stcl p1, cr12, [\B], #8 + sfi_breg r12, \ + stcl p1, cr13, [\B], #8 + sfi_breg r12, \ + stcl p1, cr14, [\B], #8 + sfi_breg r12, \ + stcl p1, cr15, [\B], #8 .Lno_iwmmxt: #endif diff --git a/ports/sysdeps/arm/strlen.S b/ports/sysdeps/arm/strlen.S index fef62cff47..7d358a7249 100644 --- a/ports/sysdeps/arm/strlen.S +++ b/ports/sysdeps/arm/strlen.S @@ -30,7 +30,8 @@ ENTRY(strlen) bic r1, r0, $3 @ addr of word containing first byte - ldr r2, [r1], $4 @ get the first word + sfi_breg r1, \ + ldr r2, [\B], $4 @ get the first word ands r3, r0, $3 @ how many bytes are duff? rsb r0, r3, $0 @ get - that number into counter. beq Laligned @ skip into main check routine if no @@ -54,7 +55,8 @@ Laligned: @ here, we have a word in r2. Does it tstne r2, $0x00ff0000 @ tstne r2, $0xff000000 @ addne r0, r0, $4 @ if not, the string is 4 bytes longer - ldrne r2, [r1], $4 @ and we continue to the next word + sfi_breg r1, \ + ldrne r2, [\B], $4 @ and we continue to the next word bne Laligned @ Llastword: @ drop through to here once we find a #ifdef __ARMEB__ diff --git a/ports/sysdeps/arm/submul_1.S b/ports/sysdeps/arm/submul_1.S index 3805eca9be..2d17490d8b 100644 --- a/ports/sysdeps/arm/submul_1.S +++ b/ports/sysdeps/arm/submul_1.S @@ -37,19 +37,24 @@ ENTRY (__mpn_submul_1) cfi_rel_offset (r6, 8) cfi_rel_offset (r7, 12) - ldr r6, [r1], #4 - ldr r7, [r0] + sfi_breg r1, \ + ldr r6, [\B], #4 + sfi_breg r0, \ + ldr r7, [\B] mov r4, #0 /* init carry in */ b 1f 0: - ldr r6, [r1], #4 /* load next ul */ + sfi_breg r1, \ + ldr r6, [\B], #4 /* load next ul */ adds r5, r5, r4 /* (lpl, c) = lpl + cl */ adc r4, ip, #0 /* cl = hpl + c */ subs r5, r7, r5 /* (lpl, !c) = rl - lpl */ - ldr r7, [r0, #4] /* load next rl */ + sfi_breg r0, \ + ldr r7, [\B, #4] /* load next rl */ it cc addcc r4, r4, #1 /* cl += !c */ - str r5, [r0], #4 + sfi_breg r0, \ + str r5, [\B], #4 1: umull r5, ip, r6, r3 /* (hpl, lpl) = ul * vl */ subs r2, r2, #1 @@ -58,7 +63,8 @@ ENTRY (__mpn_submul_1) adds r5, r5, r4 /* (lpl, c) = lpl + cl */ adc r4, ip, #0 /* cl = hpl + c */ subs r5, r7, r5 /* (lpl, !c) = rl - lpl */ - str r5, [r0], #4 + sfi_breg r0, \ + str r5, [\B], #4 it cc addcc r4, r4, #1 /* cl += !c */ mov r0, r4 /* return carry */ diff --git a/ports/sysdeps/arm/sysdep.h b/ports/sysdeps/arm/sysdep.h index 4a7a13f6fb..4917805857 100644 --- a/ports/sysdeps/arm/sysdep.h +++ b/ports/sysdeps/arm/sysdep.h @@ -210,6 +210,48 @@ cfi_restore_state # endif /* ARCH_HAS_HARD_TP */ +# ifndef ARM_SFI_MACROS +# define ARM_SFI_MACROS 1 +/* This assembly macro is prepended to any load/store instruction, + pulling the base register out of the addressing mode syntax and + making it the first operand of the macro. For example: + ldr r0, [r1] + becomes: + sfi_breg r1, ldr r0, [\B] + The \B stands in for the base register that is the first operand + to the macro, so we can avoid error-prone repetition of the base + register in two places on the line. + + This is used for all memory access through a base register other + than PC or SP. It's intended to support SFI schemes such as + Native Client, where the OS will enforce that all load/store + instructions use a special form. In any such configuration, + another sysdep.h file will have defined ARM_SFI_MACROS and + provided its own assembly macros with the same interface. */ + + .macro sfi_breg basereg, insn, operands:vararg + .macro _sfi_breg_doit B + \insn \operands + .endm + _sfi_breg_doit \basereg + .purgem _sfi_breg_doit + .endm + +/* This assembly macro replaces the "pld" instruction. + The syntax: + sfi_pld REGISTER, #OFFSET + is exactly equivalent to: + sfi_breg REGISTER, pld [\B, #OFFSET] + (and ", #OFFSET" is optional). We have a separate macro + only to work around a bug in GAS versions prior to 2.23.2, + that misparses the sfi_breg macro expansion in this case. */ + + .macro sfi_pld basereg, offset=#0 + pld [\basereg, \offset] + .endm + +# endif + #endif /* __ASSEMBLER__ */ /* This number is the offset from the pc at the current location. */ -- cgit 1.4.1