From 8a7413f9b036da83ffde491a37d9d2340bc321a7 Mon Sep 17 00:00:00 2001 From: Alan Modra Date: Sat, 17 Aug 2013 18:41:17 +0930 Subject: PowerPC LE strcmp and strncmp http://sourceware.org/ml/libc-alpha/2013-08/msg00099.html More little-endian support. I leave the main strcmp loops unchanged, (well, except for renumbering rTMP to something other than r0 since it's needed in an addi insn) and modify the tail for little-endian. I noticed some of the big-endian tail code was a little untidy so have cleaned that up too. * sysdeps/powerpc/powerpc64/strcmp.S (rTMP2): Define as r0. (rTMP): Define as r11. (strcmp): Add little-endian support. Optimise tail. * sysdeps/powerpc/powerpc32/strcmp.S: Similarly. * sysdeps/powerpc/powerpc64/strncmp.S: Likewise. * sysdeps/powerpc/powerpc32/strncmp.S: Likewise. * sysdeps/powerpc/powerpc64/power4/strncmp.S: Likewise. * sysdeps/powerpc/powerpc32/power4/strncmp.S: Likewise. * sysdeps/powerpc/powerpc64/power7/strncmp.S: Likewise. * sysdeps/powerpc/powerpc32/power7/strncmp.S: Likewise. --- sysdeps/powerpc/powerpc64/strcmp.S | 65 ++++++++++++++++++++++++++++++++------ 1 file changed, 56 insertions(+), 9 deletions(-) (limited to 'sysdeps/powerpc/powerpc64/strcmp.S') diff --git a/sysdeps/powerpc/powerpc64/strcmp.S b/sysdeps/powerpc/powerpc64/strcmp.S index c9d6dac121..70854689d3 100644 --- a/sysdeps/powerpc/powerpc64/strcmp.S +++ b/sysdeps/powerpc/powerpc64/strcmp.S @@ -25,7 +25,7 @@ EALIGN (strcmp, 4, 0) CALL_MCOUNT 2 -#define rTMP r0 +#define rTMP2 r0 #define rRTN r3 #define rSTR1 r3 /* first string arg */ #define rSTR2 r4 /* second string arg */ @@ -35,6 +35,7 @@ EALIGN (strcmp, 4, 0) #define r7F7F r8 /* constant 0x7f7f7f7f7f7f7f7f */ #define rNEG r9 /* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */ #define rBITDIF r10 /* bits that differ in s1 & s2 words */ +#define rTMP r11 dcbt 0,rSTR1 or rTMP, rSTR2, rSTR1 @@ -58,19 +59,66 @@ L(g0): ldu rWORD1, 8(rSTR1) ldu rWORD2, 8(rSTR2) L(g1): add rTMP, rFEFE, rWORD1 nor rNEG, r7F7F, rWORD1 - and. rTMP, rTMP, rNEG cmpd cr1, rWORD1, rWORD2 beq+ L(g0) -L(endstring): + /* OK. We've hit the end of the string. We need to be careful that we don't compare two strings as different because of gunk beyond the end of the strings... */ +#ifdef __LITTLE_ENDIAN__ +L(endstring): + addi rTMP2, rTMP, -1 + beq cr1, L(equal) + andc rTMP2, rTMP2, rTMP + rldimi rTMP2, rTMP2, 1, 0 + and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */ + and rWORD1, rWORD1, rTMP2 + cmpd cr1, rWORD1, rWORD2 + beq cr1, L(equal) + xor rBITDIF, rWORD1, rWORD2 /* rBITDIF has bits that differ. */ + neg rNEG, rBITDIF + and rNEG, rNEG, rBITDIF /* rNEG has LS bit that differs. */ + cntlzd rNEG, rNEG /* bitcount of the bit. */ + andi. rNEG, rNEG, 56 /* bitcount to LS byte that differs. */ + sld rWORD1, rWORD1, rNEG /* shift left to clear MS bytes. */ + sld rWORD2, rWORD2, rNEG + xor. rBITDIF, rWORD1, rWORD2 + sub rRTN, rWORD1, rWORD2 + blt- L(highbit) + sradi rRTN, rRTN, 63 /* must return an int. */ + ori rRTN, rRTN, 1 + blr +L(equal): + li rRTN, 0 + blr + +L(different): + ld rWORD1, -8(rSTR1) + xor rBITDIF, rWORD1, rWORD2 /* rBITDIF has bits that differ. */ + neg rNEG, rBITDIF + and rNEG, rNEG, rBITDIF /* rNEG has LS bit that differs. */ + cntlzd rNEG, rNEG /* bitcount of the bit. */ + andi. rNEG, rNEG, 56 /* bitcount to LS byte that differs. */ + sld rWORD1, rWORD1, rNEG /* shift left to clear MS bytes. */ + sld rWORD2, rWORD2, rNEG + xor. rBITDIF, rWORD1, rWORD2 + sub rRTN, rWORD1, rWORD2 + blt- L(highbit) + sradi rRTN, rRTN, 63 + ori rRTN, rRTN, 1 + blr +L(highbit): + sradi rRTN, rWORD2, 63 + ori rRTN, rRTN, 1 + blr + +#else +L(endstring): and rTMP, r7F7F, rWORD1 beq cr1, L(equal) add rTMP, rTMP, r7F7F xor. rBITDIF, rWORD1, rWORD2 - andc rNEG, rNEG, rTMP blt- L(highbit) cntlzd rBITDIF, rBITDIF @@ -79,7 +127,7 @@ L(endstring): cmpd cr1, rNEG, rBITDIF sub rRTN, rWORD1, rWORD2 blt- cr1, L(equal) - sradi rRTN, rRTN, 63 + sradi rRTN, rRTN, 63 /* must return an int. */ ori rRTN, rRTN, 1 blr L(equal): @@ -95,11 +143,10 @@ L(different): ori rRTN, rRTN, 1 blr L(highbit): - srdi rWORD2, rWORD2, 56 - srdi rWORD1, rWORD1, 56 - sub rRTN, rWORD1, rWORD2 + sradi rRTN, rWORD2, 63 + ori rRTN, rRTN, 1 blr - +#endif /* Oh well. In this case, we just do a byte-by-byte comparison. */ .align 4 -- cgit 1.4.1