diff options
Diffstat (limited to 'sysdeps/powerpc/powerpc64/power8/strcasecmp.S')
-rw-r--r-- | sysdeps/powerpc/powerpc64/power8/strcasecmp.S | 457 |
1 files changed, 0 insertions, 457 deletions
diff --git a/sysdeps/powerpc/powerpc64/power8/strcasecmp.S b/sysdeps/powerpc/powerpc64/power8/strcasecmp.S deleted file mode 100644 index 88b17a6eb1..0000000000 --- a/sysdeps/powerpc/powerpc64/power8/strcasecmp.S +++ /dev/null @@ -1,457 +0,0 @@ -/* Optimized strcasecmp implementation for PowerPC64. - Copyright (C) 2016-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <locale-defines.h> - -/* int [r3] strcasecmp (const char *s1 [r3], const char *s2 [r4] ) */ - -#ifndef USE_AS_STRNCASECMP -# define __STRCASECMP __strcasecmp -# define STRCASECMP strcasecmp -#else -# define __STRCASECMP __strncasecmp -# define STRCASECMP strncasecmp -#endif -/* Convert 16 bytes to lowercase and compare */ -#define TOLOWER() \ - vaddubm v8, v4, v1; \ - vaddubm v7, v4, v3; \ - vcmpgtub v8, v8, v2; \ - vsel v4, v7, v4, v8; \ - vaddubm v8, v5, v1; \ - vaddubm v7, v5, v3; \ - vcmpgtub v8, v8, v2; \ - vsel v5, v7, v5, v8; \ - vcmpequb. v7, v5, v4; - -/* - * Get 16 bytes for unaligned case. - * reg1: Vector to hold next 16 bytes. - * reg2: Address to read from. - * reg3: Permute control vector. - * v8: Tmp vector used to mask unwanted bytes. - * v9: Tmp vector,0 when null is found on first 16 bytes - */ -#ifdef __LITTLE_ENDIAN__ -#define GET16BYTES(reg1, reg2, reg3) \ - lvx reg1, 0, reg2; \ - vspltisb v8, -1; \ - vperm v8, v8, reg1, reg3; \ - vcmpequb. v8, v0, v8; \ - beq cr6, 1f; \ - vspltisb v9, 0; \ - b 2f; \ - .align 4; \ -1: \ - addi r6, reg2, 16; \ - lvx v9, 0, r6; \ -2: \ - vperm reg1, v9, reg1, reg3; -#else -#define GET16BYTES(reg1, reg2, reg3) \ - lvx reg1, 0, reg2; \ - vspltisb v8, -1; \ - vperm v8, reg1, v8, reg3; \ - vcmpequb. v8, v0, v8; \ - beq cr6, 1f; \ - vspltisb v9, 0; \ - b 2f; \ - .align 4; \ -1: \ - addi r6, reg2, 16; \ - lvx v9, 0, r6; \ -2: \ - vperm reg1, reg1, v9, reg3; -#endif - -/* Check null in v4, v5 and convert to lower. */ -#define CHECKNULLANDCONVERT() \ - vcmpequb. v7, v0, v5; \ - beq cr6, 3f; \ - vcmpequb. v7, v0, v4; \ - beq cr6, 3f; \ - b L(null_found); \ - .align 4; \ -3: \ - TOLOWER() - -#ifdef _ARCH_PWR8 -# define VCLZD_V8_v7 vclzd v8, v7; -# define MFVRD_R3_V1 mfvrd r3, v1; -# define VSUBUDM_V9_V8 vsubudm v9, v9, v8; -# define VPOPCNTD_V8_V8 vpopcntd v8, v8; -# define VADDUQM_V7_V8 vadduqm v9, v7, v8; -#else -# define VCLZD_V8_v7 .long 0x11003fc2 -# define MFVRD_R3_V1 .long 0x7c230067 -# define VSUBUDM_V9_V8 .long 0x112944c0 -# define VPOPCNTD_V8_V8 .long 0x110047c3 -# define VADDUQM_V7_V8 .long 0x11274100 -#endif - - .machine power7 - -ENTRY (__STRCASECMP) -#ifdef USE_AS_STRNCASECMP - CALL_MCOUNT 3 -#else - CALL_MCOUNT 2 -#endif -#define rRTN r3 /* Return value */ -#define rSTR1 r10 /* 1st string */ -#define rSTR2 r4 /* 2nd string */ -#define rCHAR1 r6 /* Byte read from 1st string */ -#define rCHAR2 r7 /* Byte read from 2nd string */ -#define rADDR1 r8 /* Address of tolower(rCHAR1) */ -#define rADDR2 r12 /* Address of tolower(rCHAR2) */ -#define rLWR1 r8 /* Word tolower(rCHAR1) */ -#define rLWR2 r12 /* Word tolower(rCHAR2) */ -#define rTMP r9 -#define rLOC r11 /* Default locale address */ - - cmpd cr7, rRTN, rSTR2 - - /* Get locale address. */ - ld rTMP, __libc_tsd_LOCALE@got@tprel(r2) - add rLOC, rTMP, __libc_tsd_LOCALE@tls - ld rLOC, 0(rLOC) - - mr rSTR1, rRTN - li rRTN, 0 - beqlr cr7 -#ifdef USE_AS_STRNCASECMP - cmpdi cr7, r5, 0 - beq cr7, L(retnull) - cmpdi cr7, r5, 16 - blt cr7, L(bytebybyte) -#endif - vspltisb v0, 0 - vspltisb v8, -1 - /* Check for null in initial characters. - Check max of 16 char depending on the alignment. - If null is present, proceed byte by byte. */ - lvx v4, 0, rSTR1 -#ifdef __LITTLE_ENDIAN__ - lvsr v10, 0, rSTR1 /* Compute mask. */ - vperm v9, v8, v4, v10 /* Mask bits that are not part of string. */ -#else - lvsl v10, 0, rSTR1 - vperm v9, v4, v8, v10 -#endif - vcmpequb. v9, v0, v9 /* Check for null bytes. */ - bne cr6, L(bytebybyte) - lvx v5, 0, rSTR2 - /* Calculate alignment. */ -#ifdef __LITTLE_ENDIAN__ - lvsr v6, 0, rSTR2 - vperm v9, v8, v5, v6 /* Mask bits that are not part of string. */ -#else - lvsl v6, 0, rSTR2 - vperm v9, v5, v8, v6 -#endif - vcmpequb. v9, v0, v9 /* Check for null bytes. */ - bne cr6, L(bytebybyte) - /* Check if locale has non ascii characters. */ - ld rTMP, 0(rLOC) - addi r6, rTMP,LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES - lwz rTMP, 0(r6) - cmpdi cr7, rTMP, 1 - beq cr7, L(bytebybyte) - - /* Load vector registers with values used for TOLOWER. */ - /* Load v1 = 0xbf, v2 = 0x19 v3 = 0x20 in each byte. */ - vspltisb v3, 2 - vspltisb v9, 4 - vsl v3, v3, v9 - vaddubm v1, v3, v3 - vnor v1, v1, v1 - vspltisb v2, 7 - vsububm v2, v3, v2 - - andi. rADDR1, rSTR1, 0xF - beq cr0, L(align) - addi r6, rSTR1, 16 - lvx v9, 0, r6 - /* Compute 16 bytes from previous two loads. */ -#ifdef __LITTLE_ENDIAN__ - vperm v4, v9, v4, v10 -#else - vperm v4, v4, v9, v10 -#endif -L(align): - andi. rADDR2, rSTR2, 0xF - beq cr0, L(align1) - addi r6, rSTR2, 16 - lvx v9, 0, r6 - /* Compute 16 bytes from previous two loads. */ -#ifdef __LITTLE_ENDIAN__ - vperm v5, v9, v5, v6 -#else - vperm v5, v5, v9, v6 -#endif -L(align1): - CHECKNULLANDCONVERT() - blt cr6, L(match) - b L(different) - .align 4 -L(match): - clrldi r6, rSTR1, 60 - subfic r7, r6, 16 -#ifdef USE_AS_STRNCASECMP - sub r5, r5, r7 -#endif - add rSTR1, rSTR1, r7 - add rSTR2, rSTR2, r7 - andi. rADDR2, rSTR2, 0xF - addi rSTR1, rSTR1, -16 - addi rSTR2, rSTR2, -16 - beq cr0, L(aligned) -#ifdef __LITTLE_ENDIAN__ - lvsr v6, 0, rSTR2 -#else - lvsl v6, 0, rSTR2 -#endif - /* There are 2 loops depending on the input alignment. - Each loop gets 16 bytes from s1 and s2, check for null, - convert to lowercase and compare. Loop till difference - or null occurs. */ -L(s1_align): - addi rSTR1, rSTR1, 16 - addi rSTR2, rSTR2, 16 -#ifdef USE_AS_STRNCASECMP - cmpdi cr7, r5, 16 - blt cr7, L(bytebybyte) - addi r5, r5, -16 -#endif - lvx v4, 0, rSTR1 - GET16BYTES(v5, rSTR2, v6) - CHECKNULLANDCONVERT() - blt cr6, L(s1_align) - b L(different) - .align 4 -L(aligned): - addi rSTR1, rSTR1, 16 - addi rSTR2, rSTR2, 16 -#ifdef USE_AS_STRNCASECMP - cmpdi cr7, r5, 16 - blt cr7, L(bytebybyte) - addi r5, r5, -16 -#endif - lvx v4, 0, rSTR1 - lvx v5, 0, rSTR2 - CHECKNULLANDCONVERT() - blt cr6, L(aligned) - - /* Calculate and return the difference. */ -L(different): - vaddubm v1, v3, v3 - vcmpequb v7, v0, v7 -#ifdef __LITTLE_ENDIAN__ - /* Count trailing zero. */ - vspltisb v8, -1 - VADDUQM_V7_V8 - vandc v8, v9, v7 - VPOPCNTD_V8_V8 - vspltb v6, v8, 15 - vcmpequb. v6, v6, v1 - blt cr6, L(shift8) -#else - /* Count leading zero. */ - VCLZD_V8_v7 - vspltb v6, v8, 7 - vcmpequb. v6, v6, v1 - blt cr6, L(shift8) - vsro v8, v8, v1 -#endif - b L(skipsum) - .align 4 -L(shift8): - vsumsws v8, v8, v0 -L(skipsum): -#ifdef __LITTLE_ENDIAN__ - /* Shift registers based on leading zero count. */ - vsro v6, v5, v8 - vsro v7, v4, v8 - /* Merge and move to GPR. */ - vmrglb v6, v6, v7 - vslo v1, v6, v1 - MFVRD_R3_V1 - /* Place the characters that are different in first position. */ - sldi rSTR2, rRTN, 56 - srdi rSTR2, rSTR2, 56 - sldi rSTR1, rRTN, 48 - srdi rSTR1, rSTR1, 56 -#else - vslo v6, v5, v8 - vslo v7, v4, v8 - vmrghb v1, v6, v7 - MFVRD_R3_V1 - srdi rSTR2, rRTN, 48 - sldi rSTR2, rSTR2, 56 - srdi rSTR2, rSTR2, 56 - srdi rSTR1, rRTN, 56 -#endif - subf rRTN, rSTR1, rSTR2 - extsw rRTN, rRTN - blr - - .align 4 - /* OK. We've hit the end of the string. We need to be careful that - we don't compare two strings as different because of junk beyond - the end of the strings... */ -L(null_found): - vaddubm v10, v3, v3 -#ifdef __LITTLE_ENDIAN__ - /* Count trailing zero. */ - vspltisb v8, -1 - VADDUQM_V7_V8 - vandc v8, v9, v7 - VPOPCNTD_V8_V8 - vspltb v6, v8, 15 - vcmpequb. v6, v6, v10 - blt cr6, L(shift_8) -#else - /* Count leading zero. */ - VCLZD_V8_v7 - vspltb v6, v8, 7 - vcmpequb. v6, v6, v10 - blt cr6, L(shift_8) - vsro v8, v8, v10 -#endif - b L(skipsum1) - .align 4 -L(shift_8): - vsumsws v8, v8, v0 -L(skipsum1): - /* Calculate shift count based on count of zero. */ - vspltisb v10, 7 - vslb v10, v10, v10 - vsldoi v9, v0, v10, 1 - VSUBUDM_V9_V8 - vspltisb v8, 8 - vsldoi v8, v0, v8, 1 - VSUBUDM_V9_V8 - /* Shift and remove junk after null character. */ -#ifdef __LITTLE_ENDIAN__ - vslo v5, v5, v9 - vslo v4, v4, v9 -#else - vsro v5, v5, v9 - vsro v4, v4, v9 -#endif - /* Convert and compare 16 bytes. */ - TOLOWER() - blt cr6, L(retnull) - b L(different) - .align 4 -L(retnull): - li rRTN, 0 - blr - .align 4 -L(bytebybyte): - /* Unrolling loop for POWER: loads are done with 'lbz' plus - offset and string descriptors are only updated in the end - of loop unrolling. */ - ld rLOC, LOCALE_CTYPE_TOLOWER(rLOC) - lbz rCHAR1, 0(rSTR1) /* Load char from s1 */ - lbz rCHAR2, 0(rSTR2) /* Load char from s2 */ -#ifdef USE_AS_STRNCASECMP - rldicl rTMP, r5, 62, 2 - cmpdi cr7, rTMP, 0 - beq cr7, L(lessthan4) - mtctr rTMP -#endif -L(loop): - cmpdi rCHAR1, 0 /* *s1 == '\0' ? */ - sldi rADDR1, rCHAR1, 2 /* Calculate address for tolower(*s1) */ - sldi rADDR2, rCHAR2, 2 /* Calculate address for tolower(*s2) */ - lwzx rLWR1, rLOC, rADDR1 /* Load tolower(*s1) */ - lwzx rLWR2, rLOC, rADDR2 /* Load tolower(*s2) */ - cmpw cr1, rLWR1, rLWR2 /* r = tolower(*s1) == tolower(*s2) ? */ - crorc 4*cr1+eq,eq,4*cr1+eq /* (*s1 != '\0') || (r == 1) */ - beq cr1, L(done) - lbz rCHAR1, 1(rSTR1) - lbz rCHAR2, 1(rSTR2) - cmpdi rCHAR1, 0 - sldi rADDR1, rCHAR1, 2 - sldi rADDR2, rCHAR2, 2 - lwzx rLWR1, rLOC, rADDR1 - lwzx rLWR2, rLOC, rADDR2 - cmpw cr1, rLWR1, rLWR2 - crorc 4*cr1+eq,eq,4*cr1+eq - beq cr1, L(done) - lbz rCHAR1, 2(rSTR1) - lbz rCHAR2, 2(rSTR2) - cmpdi rCHAR1, 0 - sldi rADDR1, rCHAR1, 2 - sldi rADDR2, rCHAR2, 2 - lwzx rLWR1, rLOC, rADDR1 - lwzx rLWR2, rLOC, rADDR2 - cmpw cr1, rLWR1, rLWR2 - crorc 4*cr1+eq,eq,4*cr1+eq - beq cr1, L(done) - lbz rCHAR1, 3(rSTR1) - lbz rCHAR2, 3(rSTR2) - cmpdi rCHAR1, 0 - /* Increment both string descriptors */ - addi rSTR1, rSTR1, 4 - addi rSTR2, rSTR2, 4 - sldi rADDR1, rCHAR1, 2 - sldi rADDR2, rCHAR2, 2 - lwzx rLWR1, rLOC, rADDR1 - lwzx rLWR2, rLOC, rADDR2 - cmpw cr1, rLWR1, rLWR2 - crorc 4*cr1+eq,eq,4*cr1+eq - beq cr1, L(done) - lbz rCHAR1, 0(rSTR1) /* Load char from s1 */ - lbz rCHAR2, 0(rSTR2) /* Load char from s2 */ -#ifdef USE_AS_STRNCASECMP - bdnz L(loop) -#else - b L(loop) -#endif -#ifdef USE_AS_STRNCASECMP -L(lessthan4): - clrldi r5, r5, 62 - cmpdi cr7, r5, 0 - beq cr7, L(retnull) - mtctr r5 -L(loop1): - cmpdi rCHAR1, 0 - sldi rADDR1, rCHAR1, 2 - sldi rADDR2, rCHAR2, 2 - lwzx rLWR1, rLOC, rADDR1 - lwzx rLWR2, rLOC, rADDR2 - cmpw cr1, rLWR1, rLWR2 - crorc 4*cr1+eq,eq,4*cr1+eq - beq cr1, L(done) - addi rSTR1, rSTR1, 1 - addi rSTR2, rSTR2, 1 - lbz rCHAR1, 0(rSTR1) - lbz rCHAR2, 0(rSTR2) - bdnz L(loop1) -#endif -L(done): - subf r0, rLWR2, rLWR1 - extsw rRTN, r0 - blr -END (__STRCASECMP) - -weak_alias (__STRCASECMP, STRCASECMP) -libc_hidden_builtin_def (__STRCASECMP) |