about summary refs log tree commit diff
path: root/sysdeps/powerpc/powerpc64/power8/strcasecmp.S
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/powerpc/powerpc64/power8/strcasecmp.S')
-rw-r--r--sysdeps/powerpc/powerpc64/power8/strcasecmp.S457
1 files changed, 0 insertions, 457 deletions
diff --git a/sysdeps/powerpc/powerpc64/power8/strcasecmp.S b/sysdeps/powerpc/powerpc64/power8/strcasecmp.S
deleted file mode 100644
index 88b17a6eb1..0000000000
--- a/sysdeps/powerpc/powerpc64/power8/strcasecmp.S
+++ /dev/null
@@ -1,457 +0,0 @@
-/* Optimized strcasecmp implementation for PowerPC64.
-   Copyright (C) 2016-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <locale-defines.h>
-
-/* int [r3] strcasecmp (const char *s1 [r3], const char *s2 [r4] ) */
-
-#ifndef USE_AS_STRNCASECMP
-#  define __STRCASECMP __strcasecmp
-#  define STRCASECMP   strcasecmp
-#else
-#  define __STRCASECMP __strncasecmp
-#  define STRCASECMP   strncasecmp
-#endif
-/* Convert 16 bytes to lowercase and compare */
-#define TOLOWER()     \
-	vaddubm	v8, v4, v1; \
-	vaddubm	v7, v4, v3; \
-	vcmpgtub	v8, v8, v2; \
-	vsel	v4, v7, v4, v8; \
-	vaddubm	v8, v5, v1; \
-	vaddubm	v7, v5, v3; \
-	vcmpgtub	v8, v8, v2; \
-	vsel	v5, v7, v5, v8; \
-	vcmpequb.	v7, v5, v4;
-
-/*
- * Get 16 bytes for unaligned case.
- * reg1: Vector to hold next 16 bytes.
- * reg2: Address to read from.
- * reg3: Permute control vector.
- * v8: Tmp vector used to mask unwanted bytes.
- * v9: Tmp vector,0 when null is found on first 16 bytes
- */
-#ifdef __LITTLE_ENDIAN__
-#define GET16BYTES(reg1, reg2, reg3) \
-	lvx	reg1, 0, reg2; \
-	vspltisb	v8, -1; \
-	vperm	v8, v8, reg1, reg3; \
-	vcmpequb.	v8, v0, v8; \
-	beq	cr6, 1f; \
-	vspltisb	v9, 0; \
-	b	2f; \
-	.align 4; \
-1: \
-	addi	r6, reg2, 16; \
-	lvx	v9, 0, r6; \
-2: \
-	vperm	reg1, v9, reg1, reg3;
-#else
-#define GET16BYTES(reg1, reg2, reg3) \
-	lvx	reg1, 0, reg2; \
-	vspltisb	 v8, -1; \
-	vperm	v8, reg1, v8,  reg3; \
-	vcmpequb.	v8, v0, v8; \
-	beq	cr6, 1f; \
-	vspltisb	v9, 0; \
-	b	2f; \
-	.align 4; \
-1: \
-	addi	r6, reg2, 16; \
-	lvx	v9, 0, r6; \
-2: \
-	vperm	reg1, reg1, v9, reg3;
-#endif
-
-/* Check null in v4, v5 and convert to lower.  */
-#define CHECKNULLANDCONVERT() \
-	vcmpequb.	v7, v0, v5; \
-	beq	cr6, 3f; \
-	vcmpequb.	v7, v0, v4; \
-	beq	cr6, 3f; \
-	b	L(null_found); \
-	.align  4; \
-3: \
-	TOLOWER()
-
-#ifdef _ARCH_PWR8
-#  define VCLZD_V8_v7	vclzd	v8, v7;
-#  define MFVRD_R3_V1	mfvrd	r3, v1;
-#  define VSUBUDM_V9_V8	vsubudm	v9, v9, v8;
-#  define VPOPCNTD_V8_V8	vpopcntd v8, v8;
-#  define VADDUQM_V7_V8	vadduqm	v9, v7, v8;
-#else
-#  define VCLZD_V8_v7	.long	0x11003fc2
-#  define MFVRD_R3_V1	.long	0x7c230067
-#  define VSUBUDM_V9_V8	.long	0x112944c0
-#  define VPOPCNTD_V8_V8	.long	0x110047c3
-#  define VADDUQM_V7_V8	.long	0x11274100
-#endif
-
-	.machine  power7
-
-ENTRY (__STRCASECMP)
-#ifdef USE_AS_STRNCASECMP
-	CALL_MCOUNT 3
-#else
-	CALL_MCOUNT 2
-#endif
-#define rRTN	r3	/* Return value */
-#define rSTR1	r10	/* 1st string */
-#define rSTR2	r4	/* 2nd string */
-#define rCHAR1	r6	/* Byte read from 1st string */
-#define rCHAR2	r7	/* Byte read from 2nd string */
-#define rADDR1	r8	/* Address of tolower(rCHAR1) */
-#define rADDR2	r12	/* Address of tolower(rCHAR2) */
-#define rLWR1	r8	/* Word tolower(rCHAR1) */
-#define rLWR2	r12	/* Word tolower(rCHAR2) */
-#define rTMP	r9
-#define rLOC	r11	/* Default locale address */
-
-	cmpd	cr7, rRTN, rSTR2
-
-	/* Get locale address.  */
-	ld 	rTMP, __libc_tsd_LOCALE@got@tprel(r2)
-	add 	rLOC, rTMP, __libc_tsd_LOCALE@tls
-	ld	rLOC, 0(rLOC)
-
-	mr	rSTR1, rRTN
-	li	rRTN, 0
-	beqlr	cr7
-#ifdef USE_AS_STRNCASECMP
-	cmpdi	cr7, r5, 0
-	beq	cr7, L(retnull)
-	cmpdi	cr7, r5, 16
-	blt	cr7, L(bytebybyte)
-#endif
-	vspltisb	v0, 0
-	vspltisb	v8, -1
-	/* Check for null in initial characters.
-	   Check max of 16 char depending on the alignment.
-	   If null is present, proceed byte by byte.  */
-	lvx	v4, 0, rSTR1
-#ifdef  __LITTLE_ENDIAN__
-	lvsr	v10, 0, rSTR1	/* Compute mask.  */
-	vperm	v9, v8, v4, v10	/* Mask bits that are not part of string.  */
-#else
-	lvsl	v10, 0, rSTR1
-	vperm	v9, v4, v8, v10
-#endif
-	vcmpequb.	v9, v0, v9	/* Check for null bytes.  */
-	bne	cr6, L(bytebybyte)
-	lvx	v5, 0, rSTR2
-	/* Calculate alignment.  */
-#ifdef __LITTLE_ENDIAN__
-	lvsr	v6, 0, rSTR2
-	vperm	v9, v8, v5, v6	/* Mask bits that are not part of string.  */
-#else
-	lvsl	v6, 0, rSTR2
-	vperm	v9, v5, v8, v6
-#endif
-	vcmpequb.	v9, v0, v9	/* Check for null bytes.  */
-	bne	cr6, L(bytebybyte)
-	/* Check if locale has non ascii characters.  */
-	ld	rTMP, 0(rLOC)
-	addi r6, rTMP,LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES
-	lwz	rTMP, 0(r6)
-	cmpdi	cr7, rTMP, 1
-	beq	cr7, L(bytebybyte)
-
-	/* Load vector registers with values used for TOLOWER.  */
-	/* Load v1 = 0xbf, v2 = 0x19 v3 = 0x20 in each byte.  */
-	vspltisb	v3, 2
-	vspltisb	v9, 4
-	vsl	v3, v3, v9
-	vaddubm	v1, v3, v3
-	vnor	v1, v1, v1
-	vspltisb	v2, 7
-	vsububm	v2, v3, v2
-
-	andi.	rADDR1, rSTR1, 0xF
-	beq	cr0, L(align)
-	addi	r6, rSTR1, 16
-	lvx	v9, 0, r6
-	/* Compute 16 bytes from previous two loads.  */
-#ifdef __LITTLE_ENDIAN__
-	vperm	v4, v9, v4, v10
-#else
-	vperm	v4, v4, v9, v10
-#endif
-L(align):
-	andi.	rADDR2, rSTR2, 0xF
-	beq	cr0, L(align1)
-	addi	r6, rSTR2, 16
-	lvx	v9, 0, r6
-	/* Compute 16 bytes from previous two loads.  */
-#ifdef __LITTLE_ENDIAN__
-	vperm	v5, v9, v5, v6
-#else
-	vperm	v5, v5, v9, v6
-#endif
-L(align1):
-	CHECKNULLANDCONVERT()
-	blt	cr6, L(match)
-	b	L(different)
-	.align 	4
-L(match):
-	clrldi	r6, rSTR1, 60
-	subfic	r7, r6, 16
-#ifdef USE_AS_STRNCASECMP
-	sub	r5, r5, r7
-#endif
-	add	rSTR1, rSTR1, r7
-	add	rSTR2, rSTR2, r7
-	andi.	rADDR2, rSTR2, 0xF
-	addi	rSTR1, rSTR1, -16
-	addi	rSTR2, rSTR2, -16
-	beq	cr0, L(aligned)
-#ifdef __LITTLE_ENDIAN__
-	lvsr	v6, 0, rSTR2
-#else
-	lvsl	v6, 0, rSTR2
-#endif
-	/* There are 2 loops depending on the input alignment.
-	   Each loop gets 16 bytes from s1 and s2, check for null,
-	   convert to lowercase and compare. Loop till difference
-	   or null occurs. */
-L(s1_align):
-	addi	rSTR1, rSTR1, 16
-	addi	rSTR2, rSTR2, 16
-#ifdef USE_AS_STRNCASECMP
-	cmpdi	cr7, r5, 16
-	blt	cr7, L(bytebybyte)
-	addi	r5, r5, -16
-#endif
-	lvx	v4, 0, rSTR1
-	GET16BYTES(v5, rSTR2, v6)
-	CHECKNULLANDCONVERT()
-	blt	cr6, L(s1_align)
-	b	L(different)
-	.align 	4
-L(aligned):
-	addi	rSTR1, rSTR1, 16
-	addi	rSTR2, rSTR2, 16
-#ifdef USE_AS_STRNCASECMP
-	cmpdi	cr7, r5, 16
-	blt	cr7, L(bytebybyte)
-	addi	r5, r5, -16
-#endif
-	lvx	v4, 0, rSTR1
-	lvx	v5, 0, rSTR2
-	CHECKNULLANDCONVERT()
-	blt	cr6, L(aligned)
-
-	/* Calculate and return the difference. */
-L(different):
-	vaddubm	v1, v3, v3
-	vcmpequb	v7, v0, v7
-#ifdef __LITTLE_ENDIAN__
-	/* Count trailing zero.  */
-	vspltisb	v8, -1
-	VADDUQM_V7_V8
-	vandc	v8, v9, v7
-	VPOPCNTD_V8_V8
-	vspltb	v6, v8, 15
-	vcmpequb.	v6, v6, v1
-	blt	cr6, L(shift8)
-#else
-	/* Count leading zero.  */
-	VCLZD_V8_v7
-	vspltb	v6, v8, 7
-	vcmpequb.	v6, v6, v1
-	blt	cr6, L(shift8)
-	vsro	v8, v8, v1
-#endif
-	b	L(skipsum)
-	.align  4
-L(shift8):
-	vsumsws		v8, v8, v0
-L(skipsum):
-#ifdef __LITTLE_ENDIAN__
-	/* Shift registers based on leading zero count.  */
-	vsro	v6, v5, v8
-	vsro	v7, v4, v8
-	/* Merge and move to GPR.  */
-	vmrglb	v6, v6, v7
-	vslo	v1, v6, v1
-	MFVRD_R3_V1
-	/* Place the characters that are different in first position.  */
-	sldi	rSTR2, rRTN, 56
-	srdi	rSTR2, rSTR2, 56
-	sldi	rSTR1, rRTN, 48
-	srdi	rSTR1, rSTR1, 56
-#else
-	vslo	v6, v5, v8
-	vslo	v7, v4, v8
-	vmrghb	v1, v6, v7
-	MFVRD_R3_V1
-	srdi	rSTR2, rRTN, 48
-	sldi	rSTR2, rSTR2, 56
-	srdi	rSTR2, rSTR2, 56
-	srdi	rSTR1, rRTN, 56
-#endif
-	subf  	rRTN, rSTR1, rSTR2
-	extsw 	rRTN, rRTN
-	blr
-
-	.align  4
-	/* OK. We've hit the end of the string. We need to be careful that
-	   we don't compare two strings as different because of junk beyond
-	   the end of the strings...  */
-L(null_found):
-	vaddubm	v10, v3, v3
-#ifdef __LITTLE_ENDIAN__
-	/* Count trailing zero.  */
-	vspltisb	v8, -1
-	VADDUQM_V7_V8
-	vandc	v8, v9, v7
-	VPOPCNTD_V8_V8
-	vspltb	v6, v8, 15
-	vcmpequb.	v6, v6, v10
-	blt	cr6, L(shift_8)
-#else
-	/* Count leading zero.  */
-	VCLZD_V8_v7
-	vspltb	v6, v8, 7
-	vcmpequb.	v6, v6, v10
-	blt	cr6, L(shift_8)
-	vsro	v8, v8, v10
-#endif
-	b	L(skipsum1)
-	.align  4
-L(shift_8):
-	vsumsws	v8, v8, v0
-L(skipsum1):
-	/* Calculate shift count based on count of zero.  */
-	vspltisb	v10, 7
-	vslb	v10, v10, v10
-	vsldoi	v9, v0, v10, 1
-	VSUBUDM_V9_V8
-	vspltisb	v8, 8
-	vsldoi	v8, v0, v8, 1
-	VSUBUDM_V9_V8
-	/* Shift and remove junk after null character.  */
-#ifdef __LITTLE_ENDIAN__
-	vslo	v5, v5, v9
-	vslo	v4, v4, v9
-#else
-	vsro	v5, v5, v9
-	vsro	v4, v4, v9
-#endif
-	/* Convert and compare 16 bytes.  */
-	TOLOWER()
-	blt	cr6, L(retnull)
-	b	L(different)
-	.align  4
-L(retnull):
-	li	rRTN, 0
-	blr
-	.align  4
-L(bytebybyte):
-	/* Unrolling loop for POWER: loads are done with 'lbz' plus
-	offset and string descriptors are only updated in the end
-	of loop unrolling. */
-	ld	rLOC, LOCALE_CTYPE_TOLOWER(rLOC)
-	lbz	rCHAR1, 0(rSTR1)	/* Load char from s1 */
-	lbz	rCHAR2, 0(rSTR2)	/* Load char from s2 */
-#ifdef USE_AS_STRNCASECMP
-	rldicl	rTMP, r5, 62, 2
-	cmpdi	cr7, rTMP, 0
-	beq	cr7, L(lessthan4)
-	mtctr	rTMP
-#endif
-L(loop):
-	cmpdi	rCHAR1, 0		/* *s1 == '\0' ? */
-	sldi	rADDR1, rCHAR1, 2	/* Calculate address for tolower(*s1) */
-	sldi	rADDR2, rCHAR2, 2	/* Calculate address for tolower(*s2) */
-	lwzx	rLWR1, rLOC, rADDR1	/* Load tolower(*s1) */
-	lwzx	rLWR2, rLOC, rADDR2	/* Load tolower(*s2) */
-	cmpw	cr1, rLWR1, rLWR2	/* r = tolower(*s1) == tolower(*s2) ? */
-	crorc	4*cr1+eq,eq,4*cr1+eq	/* (*s1 != '\0') || (r == 1) */
-	beq	cr1, L(done)
-	lbz	rCHAR1, 1(rSTR1)
-	lbz	rCHAR2, 1(rSTR2)
-	cmpdi	rCHAR1, 0
-	sldi	rADDR1, rCHAR1, 2
-	sldi	rADDR2, rCHAR2, 2
-	lwzx	rLWR1, rLOC, rADDR1
-	lwzx	rLWR2, rLOC, rADDR2
-	cmpw	cr1, rLWR1, rLWR2
-	crorc	4*cr1+eq,eq,4*cr1+eq
-	beq	cr1, L(done)
-	lbz	rCHAR1, 2(rSTR1)
-	lbz	rCHAR2, 2(rSTR2)
-	cmpdi	rCHAR1, 0
-	sldi	rADDR1, rCHAR1, 2
-	sldi	rADDR2, rCHAR2, 2
-	lwzx	rLWR1, rLOC, rADDR1
-	lwzx	rLWR2, rLOC, rADDR2
-	cmpw	cr1, rLWR1, rLWR2
-	crorc	4*cr1+eq,eq,4*cr1+eq
-	beq	cr1, L(done)
-	lbz	rCHAR1, 3(rSTR1)
-	lbz	rCHAR2, 3(rSTR2)
-	cmpdi	rCHAR1, 0
-	/* Increment both string descriptors */
-	addi	rSTR1, rSTR1, 4
-	addi	rSTR2, rSTR2, 4
-	sldi	rADDR1, rCHAR1, 2
-	sldi	rADDR2, rCHAR2, 2
-	lwzx	rLWR1, rLOC, rADDR1
-	lwzx	rLWR2, rLOC, rADDR2
-	cmpw	cr1, rLWR1, rLWR2
-	crorc	4*cr1+eq,eq,4*cr1+eq
-	beq     cr1, L(done)
-	lbz	rCHAR1, 0(rSTR1)	/* Load char from s1 */
-	lbz	rCHAR2, 0(rSTR2)	/* Load char from s2 */
-#ifdef USE_AS_STRNCASECMP
-	bdnz	L(loop)
-#else
-	b	L(loop)
-#endif
-#ifdef USE_AS_STRNCASECMP
-L(lessthan4):
-	clrldi	r5, r5, 62
-	cmpdi	cr7, r5, 0
-	beq	cr7, L(retnull)
-	mtctr	r5
-L(loop1):
-	cmpdi	rCHAR1, 0
-	sldi	rADDR1, rCHAR1, 2
-	sldi	rADDR2, rCHAR2, 2
-	lwzx	rLWR1, rLOC, rADDR1
-	lwzx	rLWR2, rLOC, rADDR2
-	cmpw	cr1, rLWR1, rLWR2
-	crorc	4*cr1+eq,eq,4*cr1+eq
-	beq	cr1, L(done)
-	addi	rSTR1, rSTR1, 1
-	addi	rSTR2, rSTR2, 1
-	lbz	rCHAR1, 0(rSTR1)
-	lbz	rCHAR2, 0(rSTR2)
-	bdnz	L(loop1)
-#endif
-L(done):
-	subf	r0, rLWR2, rLWR1
-	extsw	rRTN, r0
-	blr
-END (__STRCASECMP)
-
-weak_alias (__STRCASECMP, STRCASECMP)
-libc_hidden_builtin_def (__STRCASECMP)