about summary refs log tree commit diff
path: root/sysdeps/powerpc/powerpc64/power8
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/powerpc/powerpc64/power8')
-rw-r--r--sysdeps/powerpc/powerpc64/power8/Implies2
-rw-r--r--sysdeps/powerpc/powerpc64/power8/Makefile3
-rw-r--r--sysdeps/powerpc/powerpc64/power8/fpu/Implies1
-rw-r--r--sysdeps/powerpc/powerpc64/power8/fpu/e_expf.S303
-rw-r--r--sysdeps/powerpc/powerpc64/power8/fpu/multiarch/Implies1
-rw-r--r--sysdeps/powerpc/powerpc64/power8/fpu/s_cosf.S508
-rw-r--r--sysdeps/powerpc/powerpc64/power8/fpu/s_finite.S56
-rw-r--r--sysdeps/powerpc/powerpc64/power8/fpu/s_finitef.S1
-rw-r--r--sysdeps/powerpc/powerpc64/power8/fpu/s_isinf.S61
-rw-r--r--sysdeps/powerpc/powerpc64/power8/fpu/s_isinff.S1
-rw-r--r--sysdeps/powerpc/powerpc64/power8/fpu/s_isnan.S56
-rw-r--r--sysdeps/powerpc/powerpc64/power8/fpu/s_isnanf.S1
-rw-r--r--sysdeps/powerpc/powerpc64/power8/fpu/s_llrint.S45
-rw-r--r--sysdeps/powerpc/powerpc64/power8/fpu/s_llround.S48
-rw-r--r--sysdeps/powerpc/powerpc64/power8/fpu/s_sinf.S519
-rw-r--r--sysdeps/powerpc/powerpc64/power8/memcmp.S1447
-rw-r--r--sysdeps/powerpc/powerpc64/power8/memset.S458
-rw-r--r--sysdeps/powerpc/powerpc64/power8/multiarch/Implies1
-rw-r--r--sysdeps/powerpc/powerpc64/power8/stpcpy.S24
-rw-r--r--sysdeps/powerpc/powerpc64/power8/stpncpy.S24
-rw-r--r--sysdeps/powerpc/powerpc64/power8/strcasecmp.S457
-rw-r--r--sysdeps/powerpc/powerpc64/power8/strcasestr-ppc64.c29
-rw-r--r--sysdeps/powerpc/powerpc64/power8/strcasestr.S538
-rw-r--r--sysdeps/powerpc/powerpc64/power8/strchr.S377
-rw-r--r--sysdeps/powerpc/powerpc64/power8/strchrnul.S23
-rw-r--r--sysdeps/powerpc/powerpc64/power8/strcmp.S247
-rw-r--r--sysdeps/powerpc/powerpc64/power8/strcpy.S270
-rw-r--r--sysdeps/powerpc/powerpc64/power8/strcspn.S20
-rw-r--r--sysdeps/powerpc/powerpc64/power8/strlen.S301
-rw-r--r--sysdeps/powerpc/powerpc64/power8/strncase.S20
-rw-r--r--sysdeps/powerpc/powerpc64/power8/strncmp.S327
-rw-r--r--sysdeps/powerpc/powerpc64/power8/strncpy.S465
-rw-r--r--sysdeps/powerpc/powerpc64/power8/strnlen.S433
-rw-r--r--sysdeps/powerpc/powerpc64/power8/strrchr.S464
-rw-r--r--sysdeps/powerpc/powerpc64/power8/strspn.S202
35 files changed, 0 insertions, 7733 deletions
diff --git a/sysdeps/powerpc/powerpc64/power8/Implies b/sysdeps/powerpc/powerpc64/power8/Implies
deleted file mode 100644
index 9a5e3c7277..0000000000
--- a/sysdeps/powerpc/powerpc64/power8/Implies
+++ /dev/null
@@ -1,2 +0,0 @@
-powerpc/powerpc64/power7/fpu
-powerpc/powerpc64/power7
diff --git a/sysdeps/powerpc/powerpc64/power8/Makefile b/sysdeps/powerpc/powerpc64/power8/Makefile
deleted file mode 100644
index 71a59529f3..0000000000
--- a/sysdeps/powerpc/powerpc64/power8/Makefile
+++ /dev/null
@@ -1,3 +0,0 @@
-ifeq ($(subdir),string)
-sysdep_routines += strcasestr-ppc64
-endif
diff --git a/sysdeps/powerpc/powerpc64/power8/fpu/Implies b/sysdeps/powerpc/powerpc64/power8/fpu/Implies
deleted file mode 100644
index 1187cdfb0a..0000000000
--- a/sysdeps/powerpc/powerpc64/power8/fpu/Implies
+++ /dev/null
@@ -1 +0,0 @@
-powerpc/powerpc64/power7/fpu/
diff --git a/sysdeps/powerpc/powerpc64/power8/fpu/e_expf.S b/sysdeps/powerpc/powerpc64/power8/fpu/e_expf.S
deleted file mode 100644
index 4c42926a74..0000000000
--- a/sysdeps/powerpc/powerpc64/power8/fpu/e_expf.S
+++ /dev/null
@@ -1,303 +0,0 @@
-/* Optimized expf().  PowerPC64/POWER8 version.
-   Copyright (C) 2016-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-
-/* Short algorithm description:
- *
- *  Let K = 64 (table size).
- *       e^x  = 2^(x/log(2)) = 2^n * T[j] * (1 + P(y))
- *  where:
- *       x = m*log(2)/K + y,    y in [0.0..log(2)/K]
- *       m = n*K + j,           m,n,j - signed integer, j in [0..K-1]
- *       values of 2^(j/K) are tabulated as T[j].
- *
- *       P(y) is a minimax polynomial approximation of expf(y)-1
- *       on small interval [0.0..log(2)/K].
- *
- *       P(y) = P3*y*y*y*y + P2*y*y*y + P1*y*y + P0*y, calculated as
- *       z = y*y;    P(y) = (P3*z + P1)*z + (P2*z + P0)*y
- *
- * Special cases:
- *  expf(NaN) = NaN
- *  expf(+INF) = +INF
- *  expf(-INF) = 0
- *  expf(x) = 1 for subnormals
- *  for finite argument, only expf(0)=1 is exact
- *  expf(x) overflows if x>88.7228317260742190
- *  expf(x) underflows if x<-103.972076416015620
- */
-
-#define C1 0x42ad496b		/* Single precision 125*log(2).  */
-#define C2 0x31800000		/* Single precision 2^(-28).  */
-#define SP_INF 0x7f800000	/* Single precision Inf.  */
-#define SP_EXP_BIAS 0x1fc0	/* Single precision exponent bias.  */
-
-#define DATA_OFFSET r9
-
-/* Implements the function
-
-   float [fp1] expf (float [fp1] x)  */
-
-	.machine power8
-EALIGN(__ieee754_expf, 4, 0)
-	addis	DATA_OFFSET,r2,.Lanchor@toc@ha
-	addi	DATA_OFFSET,DATA_OFFSET,.Lanchor@toc@l
-
-	xscvdpspn v0,v1
-	mfvsrd	r8,v0		/* r8 = x  */
-	lfd	fp2,(.KLN2-.Lanchor)(DATA_OFFSET)
-	lfd	fp3,(.P2-.Lanchor)(DATA_OFFSET)
-	rldicl	r3,r8,32,33	/* r3 = |x|  */
-	lis	r4,C1@ha	/* r4 = 125*log(2)  */
-	ori	r4,r4,C1@l
-	cmpw	r3,r4
-	lfd	fp5,(.P3-.Lanchor)(DATA_OFFSET)
-	lfd	fp4,(.RS-.Lanchor)(DATA_OFFSET)
-	fmadd	fp2,fp1,fp2,fp4	/* fp2 = x * K/log(2) + (2^23 + 2^22)  */
-	bge	L(special_paths)	/* |x| >= 125*log(2) ?  */
-
-	lis	r4,C2@ha
-	ori	r4,r4,C2@l
-	cmpw	r3,r4
-	blt	L(small_args)	/* |x| < 2^(-28) ?  */
-
-	/* Main path: here if 2^(-28) <= |x| < 125*log(2) */
-	frsp	fp6,fp2
-	xscvdpsp v2,v2
-	mfvsrd	r8,v2
-	mr	r3,r8			/* r3 = m  */
-	rldicl	r8,r8,32,58		/* r8 = j  */
-	lfs	fp4,(.SP_RS-.Lanchor)(DATA_OFFSET)
-	fsubs	fp2,fp6,fp4		/* fp2 = m = x * K/log(2)  */
-	srdi	r3,r3,32
-	clrrwi	r3,r3,6			/* r3 = n  */
-	lfd	fp6,(.NLN2K-.Lanchor)(DATA_OFFSET)
-	fmadd	fp0,fp2,fp6,fp1		/* fp0 = y = x - m*log(2)/K  */
-	fmul	fp2,fp0,fp0		/* fp2 = z = y^2  */
-	lfd	fp4,(.P1-.Lanchor)(DATA_OFFSET)
-	lfd	fp6,(.P0-.Lanchor)(DATA_OFFSET)
-	lis	r4,SP_EXP_BIAS@ha
-	ori	r4,r4,SP_EXP_BIAS@l
-	add	r3,r3,r4
-	rldic	r3,r3,49,1		/* r3 = 2^n  */
-	fmadd	fp4,fp5,fp2,fp4		/* fp4 = P3 * z + P1  */
-	fmadd	fp6,fp3,fp2,fp6		/* fp6 = P2 * z + P0  */
-	mtvsrd	v1,r3
-	xscvspdp v1,v1
-	fmul	fp4,fp4,fp2		/* fp4 = (P3 * z + P1)*z  */
-	fmadd	fp0,fp0,fp6,fp4		/* fp0 = P(y)  */
-	sldi	r8,r8,3			/* Access doublewords from T[j].  */
-	addi	r6,DATA_OFFSET,(.Ttable-.Lanchor)
-	lfdx	fp3,r6,r8
-	fmadd	fp0,fp0,fp3,fp3		/* fp0 = T[j] * (1 + P(y))  */
-	fmul	fp1,fp1,fp0		/* fp1 = 2^n * T[j] * (1 + P(y))  */
-	frsp	fp1,fp1
-	blr
-
-	.align	4
-/* x is either underflow, overflow, infinite or NaN.  */
-L(special_paths):
-	srdi	r8,r8,32
-	rlwinm	r8,r8,3,29,29		/* r8 = 0, if x positive.
-					   r8 = 4, otherwise.  */
-	addi	r6,DATA_OFFSET,(.SPRANGE-.Lanchor)
-	lwzx	r4,r6,r8		/* r4 = .SPRANGE[signbit(x)]  */
-	cmpw	r3,r4
-	/* |x| <= .SPRANGE[signbit(x)]  */
-	ble	L(near_under_or_overflow)
-
-	lis	r4,SP_INF@ha
-	ori	r4,r4,SP_INF@l
-	cmpw	r3,r4
-	bge	L(arg_inf_or_nan)	/* |x| > Infinite ?  */
-
-	addi	r6,DATA_OFFSET,(.SPLARGE_SMALL-.Lanchor)
-	lfsx	fp1,r6,r8
-	fmuls	fp1,fp1,fp1
-	blr
-
-
-	.align	4
-L(small_args):
-	/* expf(x) = 1.0, where |x| < |2^(-28)|  */
-	lfs	fp2,(.SPone-.Lanchor)(DATA_OFFSET)
-	fadds	fp1,fp1,fp2
-	blr
-
-
-	.align	4
-L(arg_inf_or_nan:)
-	bne	L(arg_nan)
-
-	/* expf(+INF) = +INF
-	   expf(-INF) = 0  */
-	addi	r6,DATA_OFFSET,(.INF_ZERO-.Lanchor)
-	lfsx	fp1,r6,r8
-	blr
-
-
-	.align	4
-L(arg_nan):
-	/* expf(NaN) = NaN  */
-	fadd	fp1,fp1,fp1
-	frsp	fp1,fp1
-	blr
-
-	.align	4
-L(near_under_or_overflow):
-	frsp	fp6,fp2
-	xscvdpsp v2,v2
-	mfvsrd	r8,v2
-	mr	r3,r8			/* r3 = m  */
-	rldicl	r8,r8,32,58		/* r8 = j  */
-	lfs	fp4,(.SP_RS-.Lanchor)(DATA_OFFSET)
-	fsubs	fp2,fp6,fp4		/* fp2 = m = x * K/log(2)  */
-	srdi	r3,r3,32
-	clrrwi	r3,r3,6			/* r3 = n  */
-	lfd	fp6,(.NLN2K-.Lanchor)(DATA_OFFSET)
-	fmadd	fp0,fp2,fp6,fp1		/* fp0 = y = x - m*log(2)/K  */
-	fmul	fp2,fp0,fp0		/* fp2 = z = y^2  */
-	lfd	fp4,(.P1-.Lanchor)(DATA_OFFSET)
-	lfd	fp6,(.P0-.Lanchor)(DATA_OFFSET)
-	ld	r4,(.DP_EXP_BIAS-.Lanchor)(DATA_OFFSET)
-	add	r3,r3,r4
-	rldic	r3,r3,46,1		/* r3 = 2  */
-	fmadd	fp4,fp5,fp2,fp4		/* fp4 = P3 * z + P1  */
-	fmadd	fp6,fp3,fp2,fp6		/* fp6 = P2 * z + P0  */
-	mtvsrd	v1,r3
-	fmul	fp4,fp4,fp2		/* fp4 = (P3*z + P1)*z  */
-	fmadd	fp0,fp0,fp6,fp4		/* fp0 = P(y)  */
-	sldi	r8,r8,3			/* Access doublewords from T[j].  */
-	addi	r6,DATA_OFFSET,(.Ttable-.Lanchor)
-	lfdx	fp3,r6,r8
-	fmadd	fp0,fp0,fp3,fp3		/* fp0 = T[j] * (1 + T[j])  */
-	fmul	fp1,fp1,fp0		/* fp1 = 2^n * T[j] * (1 + T[j])  */
-	frsp	fp1,fp1
-	blr
-END(__ieee754_expf)
-
-	.section .rodata, "a",@progbits
-.Lanchor:
-	.balign	8
-/* Table T[j] = 2^(j/K).  Double precision.  */
-.Ttable:
-	.8byte	0x3ff0000000000000
-	.8byte	0x3ff02c9a3e778061
-	.8byte	0x3ff059b0d3158574
-	.8byte	0x3ff0874518759bc8
-	.8byte	0x3ff0b5586cf9890f
-	.8byte	0x3ff0e3ec32d3d1a2
-	.8byte	0x3ff11301d0125b51
-	.8byte	0x3ff1429aaea92de0
-	.8byte	0x3ff172b83c7d517b
-	.8byte	0x3ff1a35beb6fcb75
-	.8byte	0x3ff1d4873168b9aa
-	.8byte	0x3ff2063b88628cd6
-	.8byte	0x3ff2387a6e756238
-	.8byte	0x3ff26b4565e27cdd
-	.8byte	0x3ff29e9df51fdee1
-	.8byte	0x3ff2d285a6e4030b
-	.8byte	0x3ff306fe0a31b715
-	.8byte	0x3ff33c08b26416ff
-	.8byte	0x3ff371a7373aa9cb
-	.8byte	0x3ff3a7db34e59ff7
-	.8byte	0x3ff3dea64c123422
-	.8byte	0x3ff4160a21f72e2a
-	.8byte	0x3ff44e086061892d
-	.8byte	0x3ff486a2b5c13cd0
-	.8byte	0x3ff4bfdad5362a27
-	.8byte	0x3ff4f9b2769d2ca7
-	.8byte	0x3ff5342b569d4f82
-	.8byte	0x3ff56f4736b527da
-	.8byte	0x3ff5ab07dd485429
-	.8byte	0x3ff5e76f15ad2148
-	.8byte	0x3ff6247eb03a5585
-	.8byte	0x3ff6623882552225
-	.8byte	0x3ff6a09e667f3bcd
-	.8byte	0x3ff6dfb23c651a2f
-	.8byte	0x3ff71f75e8ec5f74
-	.8byte	0x3ff75feb564267c9
-	.8byte	0x3ff7a11473eb0187
-	.8byte	0x3ff7e2f336cf4e62
-	.8byte	0x3ff82589994cce13
-	.8byte	0x3ff868d99b4492ed
-	.8byte	0x3ff8ace5422aa0db
-	.8byte	0x3ff8f1ae99157736
-	.8byte	0x3ff93737b0cdc5e5
-	.8byte	0x3ff97d829fde4e50
-	.8byte	0x3ff9c49182a3f090
-	.8byte	0x3ffa0c667b5de565
-	.8byte	0x3ffa5503b23e255d
-	.8byte	0x3ffa9e6b5579fdbf
-	.8byte	0x3ffae89f995ad3ad
-	.8byte	0x3ffb33a2b84f15fb
-	.8byte	0x3ffb7f76f2fb5e47
-	.8byte	0x3ffbcc1e904bc1d2
-	.8byte	0x3ffc199bdd85529c
-	.8byte	0x3ffc67f12e57d14b
-	.8byte	0x3ffcb720dcef9069
-	.8byte	0x3ffd072d4a07897c
-	.8byte	0x3ffd5818dcfba487
-	.8byte	0x3ffda9e603db3285
-	.8byte	0x3ffdfc97337b9b5f
-	.8byte	0x3ffe502ee78b3ff6
-	.8byte	0x3ffea4afa2a490da
-	.8byte	0x3ffefa1bee615a27
-	.8byte	0x3fff50765b6e4540
-	.8byte	0x3fffa7c1819e90d8
-
-.KLN2:
-	.8byte	0x40571547652b82fe	/* Double precision K/log(2).  */
-
-/* Double precision polynomial coefficients.  */
-.P0:
-	.8byte	0x3fefffffffffe7c6
-.P1:
-	.8byte	0x3fe00000008d6118
-.P2:
-	.8byte	0x3fc55550da752d4f
-.P3:
-	.8byte	0x3fa56420eb78fa85
-
-.RS:
-	.8byte	0x4168000000000000	/* Double precision 2^23 + 2^22.  */
-.NLN2K:
-	.8byte	0xbf862e42fefa39ef	/* Double precision -log(2)/K.  */
-.DP_EXP_BIAS:
-	.8byte	0x000000000000ffc0	/* Double precision exponent bias.  */
-
-	.balign	4
-.SPone:
-	.4byte	0x3f800000	/* Single precision 1.0.  */
-.SP_RS:
-	.4byte	0x4b400000	/* Single precision 2^23 + 2^22.  */
-
-.SPRANGE: /* Single precision overflow/underflow bounds.  */
-	.4byte	0x42b17217	/* if x>this bound, then result overflows.  */
-	.4byte	0x42cff1b4	/* if x<this bound, then result underflows.  */
-
-.SPLARGE_SMALL:
-	.4byte	0x71800000	/* 2^100.  */
-	.4byte	0x0d800000	/* 2^-100.  */
-
-.INF_ZERO:
-	.4byte	0x7f800000	/* Single precision Inf.  */
-	.4byte	0		/* Single precision zero.  */
-
-strong_alias (__ieee754_expf, __expf_finite)
diff --git a/sysdeps/powerpc/powerpc64/power8/fpu/multiarch/Implies b/sysdeps/powerpc/powerpc64/power8/fpu/multiarch/Implies
deleted file mode 100644
index 7fd86fdf87..0000000000
--- a/sysdeps/powerpc/powerpc64/power8/fpu/multiarch/Implies
+++ /dev/null
@@ -1 +0,0 @@
-powerpc/powerpc64/power7/fpu/multiarch
diff --git a/sysdeps/powerpc/powerpc64/power8/fpu/s_cosf.S b/sysdeps/powerpc/powerpc64/power8/fpu/s_cosf.S
deleted file mode 100644
index 8dfa0076e0..0000000000
--- a/sysdeps/powerpc/powerpc64/power8/fpu/s_cosf.S
+++ /dev/null
@@ -1,508 +0,0 @@
-/* Optimized cosf().  PowerPC64/POWER8 version.
-   Copyright (C) 2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#define _ERRNO_H	1
-#include <bits/errno.h>
-
-#define FRAMESIZE (FRAME_MIN_SIZE+16)
-
-#define FLOAT_EXPONENT_SHIFT	23
-#define FLOAT_EXPONENT_BIAS	127
-#define INTEGER_BITS		3
-
-#define PI_4		0x3f490fdb	/* PI/4 */
-#define NINEPI_4	0x40e231d6	/* 9 * PI/4 */
-#define TWO_PN5		0x3d000000	/* 2^-5 */
-#define TWO_PN27	0x32000000	/* 2^-27 */
-#define INFINITY	0x7f800000
-#define TWO_P23		0x4b000000	/* 2^23 */
-#define FX_FRACTION_1_28 0x9249250	/* 0x100000000 / 28 + 1 */
-
-	/* Implements the function
-
-	   float [fp1] cosf (float [fp1] x)  */
-
-	.machine power8
-EALIGN(__cosf, 4, 0)
-	addis	r9,r2,L(anchor)@toc@ha
-	addi	r9,r9,L(anchor)@toc@l
-
-	lis	r4,PI_4@h
-	ori	r4,r4,PI_4@l
-
-	xscvdpspn v0,v1
-	mfvsrd	r8,v0
-	rldicl	r3,r8,32,33		/* Remove sign bit.  */
-
-	cmpw	r3,r4
-	bge	L(greater_or_equal_pio4)
-
-	lis	r4,TWO_PN5@h
-	ori	r4,r4,TWO_PN5@l
-
-	cmpw	r3,r4
-	blt	L(less_2pn5)
-
-	/* Chebyshev polynomial of the form:
-	 * 1.0+x^2*(C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4)))).  */
-
-	lfd	fp9,(L(C0)-L(anchor))(r9)
-	lfd	fp10,(L(C1)-L(anchor))(r9)
-	lfd	fp11,(L(C2)-L(anchor))(r9)
-	lfd	fp12,(L(C3)-L(anchor))(r9)
-	lfd	fp13,(L(C4)-L(anchor))(r9)
-
-	fmul	fp2,fp1,fp1		/* x^2 */
-	lfd     fp3,(L(DPone)-L(anchor))(r9)
-
-	fmadd	fp4,fp2,fp13,fp12	/* C3+x^2*C4 */
-	fmadd	fp4,fp2,fp4,fp11	/* C2+x^2*(C3+x^2*C4) */
-	fmadd	fp4,fp2,fp4,fp10	/* C1+x^2*(C2+x^2*(C3+x^2*C4)) */
-	fmadd	fp4,fp2,fp4,fp9		/* C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4))) */
-	fmadd	fp1,fp2,fp4,fp3		/* 1.0+x^2*(C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4)))) */
-	frsp	fp1,fp1			/* Round to single precision.  */
-
-	blr
-
-	.balign 16
-L(greater_or_equal_pio4):
-	lis	r4,NINEPI_4@h
-	ori	r4,r4,NINEPI_4@l
-	cmpw	r3,r4
-	bge	L(greater_or_equal_9pio4)
-
-	/* Calculate quotient of |x|/(PI/4).  */
-	lfd	fp2,(L(invpio4)-L(anchor))(r9)
-	fabs	fp1,fp1			/* |x| */
-	fmul	fp2,fp1,fp2		/* |x|/(PI/4) */
-	fctiduz	fp2,fp2
-	mfvsrd	r3,v2			/* n = |x| mod PI/4 */
-
-	/* Now use that quotient to find |x| mod (PI/2).  */
-	addi	r7,r3,1
-	rldicr	r5,r7,2,60		/* ((n+1) >> 1) << 3 */
-	addi	r6,r9,(L(pio2_table)-L(anchor))
-	lfdx	fp4,r5,r6
-	fsub	fp1,fp1,fp4
-
-	.balign 16
-L(reduced):
-	/* Now we are in the range -PI/4 to PI/4.  */
-
-	/* Work out if we are in a positive or negative primary interval.  */
-	addi    r7,r7,2
-	rldicl	r4,r7,62,63		/* ((n+3) >> 2) & 1 */
-
-	/* Load a 1.0 or -1.0.  */
-	addi	r5,r9,(L(ones)-L(anchor))
-	sldi	r4,r4,3
-	lfdx	fp0,r4,r5
-
-	/* Are we in the primary interval of sin or cos?  */
-	andi.	r4,r7,0x2
-	bne	L(cos)
-
-	/* Chebyshev polynomial of the form:
-	   x+x^3*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))).  */
-
-	lfd	fp9,(L(S0)-L(anchor))(r9)
-	lfd	fp10,(L(S1)-L(anchor))(r9)
-	lfd	fp11,(L(S2)-L(anchor))(r9)
-	lfd	fp12,(L(S3)-L(anchor))(r9)
-	lfd	fp13,(L(S4)-L(anchor))(r9)
-
-	fmul	fp2,fp1,fp1		/* x^2 */
-	fmul	fp3,fp2,fp1		/* x^3 */
-
-	fmadd	fp4,fp2,fp13,fp12	/* S3+x^2*S4 */
-	fmadd	fp4,fp2,fp4,fp11	/* S2+x^2*(S3+x^2*S4) */
-	fmadd	fp4,fp2,fp4,fp10	/* S1+x^2*(S2+x^2*(S3+x^2*S4)) */
-	fmadd	fp4,fp2,fp4,fp9		/* S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4))) */
-	fmadd	fp4,fp3,fp4,fp1		/* x+x^3*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))) */
-	fmul	fp4,fp4,fp0		/* Add in the sign.  */
-	frsp	fp1,fp4			/* Round to single precision.  */
-
-	blr
-
-	.balign 16
-L(cos):
-	/* Chebyshev polynomial of the form:
-	   1.0+x^2*(C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4)))).  */
-
-	lfd	fp9,(L(C0)-L(anchor))(r9)
-	lfd	fp10,(L(C1)-L(anchor))(r9)
-	lfd	fp11,(L(C2)-L(anchor))(r9)
-	lfd	fp12,(L(C3)-L(anchor))(r9)
-	lfd	fp13,(L(C4)-L(anchor))(r9)
-
-	fmul	fp2,fp1,fp1		/* x^2 */
-	lfd	fp3,(L(DPone)-L(anchor))(r9)
-
-	fmadd	fp4,fp2,fp13,fp12	/* C3+x^2*C4 */
-	fmadd	fp4,fp2,fp4,fp11	/* C2+x^2*(C3+x^2*C4) */
-	fmadd	fp4,fp2,fp4,fp10	/* C1+x^2*(C2+x^2*(C3+x^2*C4)) */
-	fmadd	fp4,fp2,fp4,fp9		/* C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4))) */
-	fmadd	fp4,fp2,fp4,fp3		/* 1.0 + x^2*(C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4)))) */
-	fmul	fp4,fp4,fp0		/* Add in the sign.  */
-	frsp	fp1,fp4			/* Round to single precision.  */
-
-	blr
-
-	.balign 16
-L(greater_or_equal_9pio4):
-	lis	r4,INFINITY@h
-	ori	r4,r4,INFINITY@l
-	cmpw	r3,r4
-	bge	L(inf_or_nan)
-
-	lis	r4,TWO_P23@h
-	ori	r4,r4,TWO_P23@l
-	cmpw	r3,r4
-	bge	L(greater_or_equal_2p23)
-
-	fabs	fp1,fp1			/* |x| */
-
-	/* Calculate quotient of |x|/(PI/4).  */
-	lfd	fp2,(L(invpio4)-L(anchor))(r9)
-
-	lfd     fp3,(L(DPone)-L(anchor))(r9)
-	lfd     fp4,(L(DPhalf)-L(anchor))(r9)
-	fmul    fp2,fp1,fp2             /* |x|/(PI/4) */
-	friz    fp2,fp2                 /* n = floor(|x|/(PI/4)) */
-
-	/* Calculate (n + 1) / 2.  */
-	fadd	fp2,fp2,fp3		/* n + 1 */
-	fmul	fp3,fp2,fp4		/* (n + 1) / 2 */
-	friz	fp3,fp3
-
-	lfd	fp4,(L(pio2hi)-L(anchor))(r9)
-	lfd	fp5,(L(pio2lo)-L(anchor))(r9)
-
-	fmul	fp6,fp4,fp3
-	fadd	fp6,fp6,fp1
-	fmadd	fp1,fp5,fp3,fp6
-
-	fctiduz	fp2,fp2
-	mfvsrd	r7,v2			/* n + 1 */
-
-	b	L(reduced)
-
-	.balign 16
-L(inf_or_nan):
-	bne	L(skip_errno_setting)	/* Is a NAN?  */
-
-	/* We delayed the creation of the stack frame, as well as the saving of
-	   the link register, because only at this point, we are sure that
-	   doing so is actually needed.  */
-
-	stfd	fp1,-8(r1)
-
-	/* Save the link register.  */
-	mflr	r0
-	std	r0,16(r1)
-	cfi_offset(lr, 16)
-
-	/* Create the stack frame.  */
-	stdu	r1,-FRAMESIZE(r1)
-	cfi_adjust_cfa_offset(FRAMESIZE)
-
-	bl	JUMPTARGET(__errno_location)
-	nop
-
-	/* Restore the stack frame.  */
-	addi	r1,r1,FRAMESIZE
-	cfi_adjust_cfa_offset(-FRAMESIZE)
-	/* Restore the link register.  */
-	ld	r0,16(r1)
-	mtlr	r0
-
-	lfd	fp1,-8(r1)
-
-	/* errno = EDOM */
-	li	r4,EDOM
-	stw	r4,0(r3)
-
-L(skip_errno_setting):
-	fsub	fp1,fp1,fp1		/* x - x */
-	blr
-
-	.balign 16
-L(greater_or_equal_2p23):
-	fabs	fp1,fp1
-
-	srwi	r4,r3,FLOAT_EXPONENT_SHIFT
-	subi	r4,r4,FLOAT_EXPONENT_BIAS
-
-	/* We reduce the input modulo pi/4, so we need 3 bits of integer
-	   to determine where in 2*pi we are. Index into our array
-	   accordingly.  */
-	addi r4,r4,INTEGER_BITS
-
-	/* To avoid an expensive divide, for the range we care about (0 - 127)
-	   we can transform x/28 into:
-
-	   x/28 = (x * ((0x100000000 / 28) + 1)) >> 32
-
-	   mulhwu returns the top 32 bits of the 64 bit result, doing the
-	   shift for us in the same instruction. The top 32 bits are undefined,
-	   so we have to mask them.  */
-
-	lis	r6,FX_FRACTION_1_28@h
-	ori	r6,r6,FX_FRACTION_1_28@l
-	mulhwu	r5,r4,r6
-	clrldi	r5,r5,32
-
-	/* Get our pointer into the invpio4_table array.  */
-	sldi	r4,r5,3
-	addi	r6,r9,(L(invpio4_table)-L(anchor))
-	add	r4,r4,r6
-
-	lfd	fp2,0(r4)
-	lfd	fp3,8(r4)
-	lfd	fp4,16(r4)
-	lfd	fp5,24(r4)
-
-	fmul	fp6,fp2,fp1
-	fmul	fp7,fp3,fp1
-	fmul	fp8,fp4,fp1
-	fmul	fp9,fp5,fp1
-
-	/* Mask off larger integer bits in highest double word that we don't
-	   care about to avoid losing precision when combining with smaller
-	   values.  */
-	fctiduz	fp10,fp6
-	mfvsrd	r7,v10
-	rldicr	r7,r7,0,(63-INTEGER_BITS)
-	mtvsrd	v10,r7
-	fcfidu	fp10,fp10		/* Integer bits.  */
-
-	fsub	fp6,fp6,fp10		/* highest -= integer bits */
-
-	/* Work out the integer component, rounded down. Use the top two
-	   limbs for this.  */
-	fadd	fp10,fp6,fp7		/* highest + higher */
-
-	fctiduz	fp10,fp10
-	mfvsrd	r7,v10
-	andi.	r0,r7,1
-	fcfidu	fp10,fp10
-
-	/* Subtract integer component from highest limb.  */
-	fsub	fp12,fp6,fp10
-
-	beq	L(even_integer)
-
-	/* Our integer component is odd, so we are in the -PI/4 to 0 primary
-	   region. We need to shift our result down by PI/4, and to do this
-	   in the mod (4/PI) space we simply subtract 1.  */
-	lfd	fp11,(L(DPone)-L(anchor))(r9)
-	fsub	fp12,fp12,fp11
-
-	/* Now add up all the limbs in order.  */
-	fadd	fp12,fp12,fp7
-	fadd	fp12,fp12,fp8
-	fadd	fp12,fp12,fp9
-
-	/* And finally multiply by pi/4.  */
-	lfd	fp13,(L(pio4)-L(anchor))(r9)
-	fmul	fp1,fp12,fp13
-
-	addi	r7,r7,1
-	b	L(reduced)
-
-L(even_integer):
-	lfd	fp11,(L(DPone)-L(anchor))(r9)
-
-	/* Now add up all the limbs in order.  */
-	fadd	fp12,fp12,fp7
-	fadd	fp12,r12,fp8
-	fadd	fp12,r12,fp9
-
-	/* We need to check if the addition of all the limbs resulted in us
-	   overflowing 1.0.  */
-	fcmpu	0,fp12,fp11
-	bgt	L(greater_than_one)
-
-	/* And finally multiply by pi/4.  */
-	lfd	fp13,(L(pio4)-L(anchor))(r9)
-	fmul	fp1,fp12,fp13
-
-	addi	r7,r7,1
-	b	L(reduced)
-
-L(greater_than_one):
-	/* We did overflow 1.0 when adding up all the limbs. Add 1.0 to our
-	   integer, and subtract 1.0 from our result. Since that makes the
-	   integer component odd, we need to subtract another 1.0 as
-	   explained above.  */
-	addi	r7,r7,1
-
-	lfd	fp11,(L(DPtwo)-L(anchor))(r9)
-	fsub	fp12,fp12,fp11
-
-	/* And finally multiply by pi/4.  */
-	lfd	fp13,(L(pio4)-L(anchor))(r9)
-	fmul	fp1,fp12,fp13
-
-	addi	r7,r7,1
-	b	L(reduced)
-
-	.balign 16
-L(less_2pn5):
-	lis	r4,TWO_PN27@h
-	ori	r4,r4,TWO_PN27@l
-
-	cmpw	r3,r4
-	blt	L(less_2pn27)
-
-	/* A simpler Chebyshev approximation is close enough for this range:
-	   1.0+x^2*(CC0+x^3*CC1).  */
-
-	lfd	fp10,(L(CC0)-L(anchor))(r9)
-	lfd	fp11,(L(CC1)-L(anchor))(r9)
-
-	fmul	fp2,fp1,fp1		/* x^2 */
-	fmul	fp3,fp2,fp1		/* x^3 */
-	lfd     fp1,(L(DPone)-L(anchor))(r9)
-
-	fmadd   fp4,fp3,fp11,fp10       /* CC0+x^3*CC1 */
-	fmadd	fp1,fp2,fp4,fp1		/* 1.0+x^2*(CC0+x^3*CC1) */
-
-	frsp	fp1,fp1			/* Round to single precision.  */
-
-	blr
-
-	.balign 16
-L(less_2pn27):
-	/* Handle some special cases:
-
-	   cosf(subnormal) raises inexact
-	   cosf(min_normalized) raises inexact
-	   cosf(normalized) raises inexact.  */
-
-	lfd     fp2,(L(DPone)-L(anchor))(r9)
-
-	fabs    fp1,fp1                 /* |x| */
-	fsub	fp1,fp2,fp1		/* 1.0-|x| */
-
-	frsp	fp1,fp1
-
-	blr
-
-END (__cosf)
-
-	.section .rodata, "a"
-
-	.balign 8
-
-L(anchor):
-
-	/* Chebyshev constants for sin, range -PI/4 - PI/4.  */
-L(S0):	.8byte	0xbfc5555555551cd9
-L(S1):	.8byte	0x3f81111110c2688b
-L(S2):	.8byte	0xbf2a019f8b4bd1f9
-L(S3):	.8byte	0x3ec71d7264e6b5b4
-L(S4):	.8byte	0xbe5a947e1674b58a
-
-	/* Chebyshev constants for cos, range 2^-27 - 2^-5.  */
-L(CC0):	.8byte	0xbfdfffffff5cc6fd
-L(CC1):	.8byte	0x3fa55514b178dac5
-
-	/* Chebyshev constants for cos, range -PI/4 - PI/4.  */
-L(C0):	.8byte	0xbfdffffffffe98ae
-L(C1):	.8byte	0x3fa55555545c50c7
-L(C2):	.8byte	0xbf56c16b348b6874
-L(C3):	.8byte	0x3efa00eb9ac43cc0
-L(C4):	.8byte	0xbe923c97dd8844d7
-
-L(invpio2):
-	.8byte	0x3fe45f306dc9c883	/* 2/PI */
-
-L(invpio4):
-	.8byte	0x3ff45f306dc9c883	/* 4/PI */
-
-L(invpio4_table):
-	.8byte	0x0000000000000000
-	.8byte	0x3ff45f306c000000
-	.8byte	0x3e3c9c882a000000
-	.8byte	0x3c54fe13a8000000
-	.8byte	0x3aaf47d4d0000000
-	.8byte	0x38fbb81b6c000000
-	.8byte	0x3714acc9e0000000
-	.8byte	0x3560e4107c000000
-	.8byte	0x33bca2c756000000
-	.8byte	0x31fbd778ac000000
-	.8byte	0x300b7246e0000000
-	.8byte	0x2e5d2126e8000000
-	.8byte	0x2c97003248000000
-	.8byte	0x2ad77504e8000000
-	.8byte	0x290921cfe0000000
-	.8byte	0x274deb1cb0000000
-	.8byte	0x25829a73e0000000
-	.8byte	0x23fd1046be000000
-	.8byte	0x2224baed10000000
-	.8byte	0x20709d338e000000
-	.8byte	0x1e535a2f80000000
-	.8byte	0x1cef904e64000000
-	.8byte	0x1b0d639830000000
-	.8byte	0x1964ce7d24000000
-	.8byte	0x17b908bf16000000
-
-L(pio4):
-	.8byte	0x3fe921fb54442d18	/* PI/4 */
-
-/* PI/2 as a sum of two doubles. We only use 32 bits of the upper limb
-   to avoid losing significant bits when multiplying with up to
-   (2^22)/(pi/2).  */
-L(pio2hi):
-	.8byte	0xbff921fb54400000
-
-L(pio2lo):
-	.8byte	0xbdd0b4611a626332
-
-L(pio2_table):
-	.8byte	0
-	.8byte	0x3ff921fb54442d18	/* 1 * PI/2 */
-	.8byte	0x400921fb54442d18	/* 2 * PI/2 */
-	.8byte	0x4012d97c7f3321d2	/* 3 * PI/2 */
-	.8byte	0x401921fb54442d18	/* 4 * PI/2 */
-	.8byte	0x401f6a7a2955385e	/* 5 * PI/2 */
-	.8byte	0x4022d97c7f3321d2	/* 6 * PI/2 */
-	.8byte	0x4025fdbbe9bba775	/* 7 * PI/2 */
-	.8byte	0x402921fb54442d18	/* 8 * PI/2 */
-	.8byte	0x402c463abeccb2bb	/* 9 * PI/2 */
-	.8byte	0x402f6a7a2955385e	/* 10 * PI/2 */
-
-L(small):
-	.8byte	0x3cd0000000000000	/* 2^-50 */
-
-L(ones):
-	.8byte	0x3ff0000000000000	/* +1.0 */
-	.8byte	0xbff0000000000000	/* -1.0 */
-
-L(DPhalf):
-	.8byte	0x3fe0000000000000	/* 0.5 */
-
-L(DPone):
-	.8byte	0x3ff0000000000000	/* 1.0 */
-
-L(DPtwo):
-	.8byte	0x4000000000000000	/* 2.0 */
-
-weak_alias(__cosf, cosf)
diff --git a/sysdeps/powerpc/powerpc64/power8/fpu/s_finite.S b/sysdeps/powerpc/powerpc64/power8/fpu/s_finite.S
deleted file mode 100644
index fcdcb60293..0000000000
--- a/sysdeps/powerpc/powerpc64/power8/fpu/s_finite.S
+++ /dev/null
@@ -1,56 +0,0 @@
-/* isfinite().  PowerPC64/POWER8 version.
-   Copyright (C) 2014-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <math_ldbl_opt.h>
-
-#define MFVSRD_R3_V1  .long 0x7c230066     /* mfvsrd  r3,vs1  */
-
-/* int [r3] __finite ([fp1] x)  */
-
-EALIGN (__finite, 4, 0)
-	CALL_MCOUNT 0
-	MFVSRD_R3_V1
-	lis     r9,0x8010
-	clrldi  r3,r3,1       /* r3 = r3 & 0x8000000000000000  */
-	rldicr  r9,r9,32,31   /* r9 = (r9 << 32) & 0xffffffff  */
-	add     r3,r3,r9
-	rldicl  r3,r3,1,63
-	blr
-END (__finite)
-
-hidden_def (__finite)
-weak_alias (__finite, finite)
-
-/* It turns out that the 'double' version will also always work for
-   single-precision.  */
-strong_alias (__finite, __finitef)
-hidden_def (__finitef)
-weak_alias (__finitef, finitef)
-
-#if IS_IN (libm)
-# if LONG_DOUBLE_COMPAT (libm, GLIBC_2_0)
-compat_symbol (libm, __finite, __finitel, GLIBC_2_0)
-compat_symbol (libm, finite, finitel, GLIBC_2_0)
-# endif
-#else
-# if LONG_DOUBLE_COMPAT (libc, GLIBC_2_0)
-compat_symbol (libc, __finite, __finitel, GLIBC_2_0);
-compat_symbol (libc, finite, finitel, GLIBC_2_0);
-# endif
-#endif
diff --git a/sysdeps/powerpc/powerpc64/power8/fpu/s_finitef.S b/sysdeps/powerpc/powerpc64/power8/fpu/s_finitef.S
deleted file mode 100644
index 54bd94176d..0000000000
--- a/sysdeps/powerpc/powerpc64/power8/fpu/s_finitef.S
+++ /dev/null
@@ -1 +0,0 @@
-/* This function uses the same code as s_finite.S.  */
diff --git a/sysdeps/powerpc/powerpc64/power8/fpu/s_isinf.S b/sysdeps/powerpc/powerpc64/power8/fpu/s_isinf.S
deleted file mode 100644
index 32814e4525..0000000000
--- a/sysdeps/powerpc/powerpc64/power8/fpu/s_isinf.S
+++ /dev/null
@@ -1,61 +0,0 @@
-/* isinf().  PowerPC64/POWER8 version.
-   Copyright (C) 2014-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <math_ldbl_opt.h>
-
-#define MFVSRD_R3_V1  .long 0x7c230066     /* mfvsrd  r3,vs1  */
-
-/* int [r3] __isinf([fp1] x)  */
-
-EALIGN (__isinf, 4, 0)
-	CALL_MCOUNT 0
-	MFVSRD_R3_V1
-	lis     r9,0x7ff0     /* r9 = 0x7ff0  */
-	rldicl  r10,r3,0,1    /* r10 = r3 & (0x8000000000000000)  */
-	sldi    r9,r9,32      /* r9 = r9 << 52  */
-	cmpd    cr7,r10,r9    /* fp1 & 0x7ff0000000000000 ?  */
-	beq     cr7,L(inf)
-	li      r3,0          /* Not inf  */
-	blr
-L(inf):
-	sradi   r3,r3,63      /* r3 = r3 >> 63  */
-	ori     r3,r3,1       /* r3 = r3 | 0x1  */
-	blr
-END (__isinf)
-
-hidden_def (__isinf)
-weak_alias (__isinf, isinf)
-
-/* It turns out that the 'double' version will also always work for
-   single-precision.  */
-strong_alias (__isinf, __isinff)
-hidden_def (__isinff)
-weak_alias (__isinff, isinff)
-
-#ifdef NO_LONG_DOUBLE
-strong_alias (__isinf, __isinfl)
-weak_alias (__isinf, isinfl)
-#endif
-
-#if !IS_IN (libm)
-# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0)
-compat_symbol (libc, __isinf, __isinfl, GLIBC_2_0);
-compat_symbol (libc, isinf, isinfl, GLIBC_2_0);
-# endif
-#endif
diff --git a/sysdeps/powerpc/powerpc64/power8/fpu/s_isinff.S b/sysdeps/powerpc/powerpc64/power8/fpu/s_isinff.S
deleted file mode 100644
index be759e091e..0000000000
--- a/sysdeps/powerpc/powerpc64/power8/fpu/s_isinff.S
+++ /dev/null
@@ -1 +0,0 @@
-/* This function uses the same code as s_isinf.S.  */
diff --git a/sysdeps/powerpc/powerpc64/power8/fpu/s_isnan.S b/sysdeps/powerpc/powerpc64/power8/fpu/s_isnan.S
deleted file mode 100644
index af52e502b7..0000000000
--- a/sysdeps/powerpc/powerpc64/power8/fpu/s_isnan.S
+++ /dev/null
@@ -1,56 +0,0 @@
-/* isnan().  PowerPC64/POWER8 version.
-   Copyright (C) 2014-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <math_ldbl_opt.h>
-
-#define MFVSRD_R3_V1  .long 0x7c230066     /* mfvsrd  r3,vs1  */
-
-/* int [r3] __isnan([f1] x)  */
-
-EALIGN (__isnan, 4, 0)
-	CALL_MCOUNT 0
-	MFVSRD_R3_V1
-	lis     r9,0x7ff0
-	clrldi  r3,r3,1       /* r3 = r3 & 0x8000000000000000  */
-	rldicr  r9,r9,32,31   /* r9 = (r9 << 32) & 0xffffffff  */
-	subf    r3,r3,r9
-	rldicl  r3,r3,1,63
-	blr
-END (__isnan)
-
-hidden_def (__isnan)
-weak_alias (__isnan, isnan)
-
-/* It turns out that the 'double' version will also always work for
-   single-precision.  */
-strong_alias (__isnan, __isnanf)
-hidden_def (__isnanf)
-weak_alias (__isnanf, isnanf)
-
-#ifdef NO_LONG_DOUBLE
-strong_alias (__isnan, __isnanl)
-weak_alias (__isnan, isnanl)
-#endif
-
-#if !IS_IN (libm)
-# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0)
-compat_symbol (libc, __isnan, __isnanl, GLIBC_2_0);
-compat_symbol (libc, isnan, isnanl, GLIBC_2_0);
-# endif
-#endif
diff --git a/sysdeps/powerpc/powerpc64/power8/fpu/s_isnanf.S b/sysdeps/powerpc/powerpc64/power8/fpu/s_isnanf.S
deleted file mode 100644
index b48c85e0d3..0000000000
--- a/sysdeps/powerpc/powerpc64/power8/fpu/s_isnanf.S
+++ /dev/null
@@ -1 +0,0 @@
-/* This function uses the same code as s_isnan.S.  */
diff --git a/sysdeps/powerpc/powerpc64/power8/fpu/s_llrint.S b/sysdeps/powerpc/powerpc64/power8/fpu/s_llrint.S
deleted file mode 100644
index aa180b6901..0000000000
--- a/sysdeps/powerpc/powerpc64/power8/fpu/s_llrint.S
+++ /dev/null
@@ -1,45 +0,0 @@
-/* Round double to long int.  POWER8 PowerPC64 version.
-   Copyright (C) 2014-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <math_ldbl_opt.h>
-
-#define MFVSRD_R3_V1  .long 0x7c230066     /* mfvsrd  r3,vs1  */
-
-/* long long int[r3] __llrint (double x[fp1])  */
-ENTRY (__llrint)
-	CALL_MCOUNT 0
-	fctid	fp1,fp1
-	MFVSRD_R3_V1
-	blr
-END (__llrint)
-
-strong_alias (__llrint, __lrint)
-weak_alias (__llrint, llrint)
-weak_alias (__lrint, lrint)
-
-#ifdef NO_LONG_DOUBLE
-strong_alias (__llrint, __llrintl)
-weak_alias (__llrint, llrintl)
-strong_alias (__lrint, __lrintl)
-weak_alias (__lrint, lrintl)
-#endif
-#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1)
-compat_symbol (libm, __llrint, llrintl, GLIBC_2_1)
-compat_symbol (libm, __lrint, lrintl, GLIBC_2_1)
-#endif
diff --git a/sysdeps/powerpc/powerpc64/power8/fpu/s_llround.S b/sysdeps/powerpc/powerpc64/power8/fpu/s_llround.S
deleted file mode 100644
index 043fc6a089..0000000000
--- a/sysdeps/powerpc/powerpc64/power8/fpu/s_llround.S
+++ /dev/null
@@ -1,48 +0,0 @@
-/* llround function.  POWER8 PowerPC64 version.
-   Copyright (C) 2014-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <endian.h>
-#include <math_ldbl_opt.h>
-
-#define MFVSRD_R3_V1  .long 0x7c230066     /* mfvsrd  r3,vs1  */
-
-/* long long [r3] llround (float x [fp1])  */
-
-ENTRY (__llround)
-	CALL_MCOUNT 0
-	frin	fp1,fp1	/* Round to nearest +-0.5.  */
-	fctidz	fp1,fp1	/* Convert To Integer DW round toward 0.  */
-	MFVSRD_R3_V1
-	blr
-END (__llround)
-
-strong_alias (__llround, __lround)
-weak_alias (__llround, llround)
-weak_alias (__lround, lround)
-
-#ifdef NO_LONG_DOUBLE
-weak_alias (__llround, llroundl)
-strong_alias (__llround, __llroundl)
-weak_alias (__lround, lroundl)
-strong_alias (__lround, __lroundl)
-#endif
-#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1)
-compat_symbol (libm, __llround, llroundl, GLIBC_2_1)
-compat_symbol (libm, __lround, lroundl, GLIBC_2_1)
-#endif
diff --git a/sysdeps/powerpc/powerpc64/power8/fpu/s_sinf.S b/sysdeps/powerpc/powerpc64/power8/fpu/s_sinf.S
deleted file mode 100644
index fb0add3462..0000000000
--- a/sysdeps/powerpc/powerpc64/power8/fpu/s_sinf.S
+++ /dev/null
@@ -1,519 +0,0 @@
-/* Optimized sinf().  PowerPC64/POWER8 version.
-   Copyright (C) 2016-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#define _ERRNO_H	1
-#include <bits/errno.h>
-
-#define FRAMESIZE (FRAME_MIN_SIZE+16)
-
-#define FLOAT_EXPONENT_SHIFT	23
-#define FLOAT_EXPONENT_BIAS	127
-#define INTEGER_BITS		3
-
-#define PI_4		0x3f490fdb	/* PI/4 */
-#define NINEPI_4	0x40e231d6	/* 9 * PI/4 */
-#define TWO_PN5		0x3d000000	/* 2^-5 */
-#define TWO_PN27	0x32000000	/* 2^-27 */
-#define INFINITY	0x7f800000
-#define TWO_P23		0x4b000000	/* 2^27 */
-#define FX_FRACTION_1_28 0x9249250	/* 0x100000000 / 28 + 1 */
-
-	/* Implements the function
-
-	   float [fp1] sinf (float [fp1] x)  */
-
-	.machine power8
-EALIGN(__sinf, 4, 0)
-	addis	r9,r2,L(anchor)@toc@ha
-	addi	r9,r9,L(anchor)@toc@l
-
-	lis	r4,PI_4@h
-	ori	r4,r4,PI_4@l
-
-	xscvdpspn v0,v1
-	mfvsrd	r8,v0
-	rldicl	r3,r8,32,33		/* Remove sign bit.  */
-
-	cmpw	r3,r4
-	bge	L(greater_or_equal_pio4)
-
-	lis	r4,TWO_PN5@h
-	ori	r4,r4,TWO_PN5@l
-
-	cmpw	r3,r4
-	blt	L(less_2pn5)
-
-	/* Chebyshev polynomial of the form:
-	 * x+x^3*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))).  */
-
-	lfd	fp9,(L(S0)-L(anchor))(r9)
-	lfd	fp10,(L(S1)-L(anchor))(r9)
-	lfd	fp11,(L(S2)-L(anchor))(r9)
-	lfd	fp12,(L(S3)-L(anchor))(r9)
-	lfd	fp13,(L(S4)-L(anchor))(r9)
-
-	fmul	fp2,fp1,fp1		/* x^2 */
-	fmul	fp3,fp2,fp1		/* x^3 */
-
-	fmadd	fp4,fp2,fp13,fp12	/* S3+x^2*S4 */
-	fmadd	fp4,fp2,fp4,fp11	/* S2+x^2*(S3+x^2*S4) */
-	fmadd	fp4,fp2,fp4,fp10	/* S1+x^2*(S2+x^2*(S3+x^2*S4)) */
-	fmadd	fp4,fp2,fp4,fp9		/* S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4))) */
-	fmadd	fp1,fp3,fp4,fp1		/* x+x^3*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))) */
-	frsp	fp1,fp1			/* Round to single precision.  */
-
-	blr
-
-	.balign 16
-L(greater_or_equal_pio4):
-	lis	r4,NINEPI_4@h
-	ori	r4,r4,NINEPI_4@l
-	cmpw	r3,r4
-	bge	L(greater_or_equal_9pio4)
-
-	/* Calculate quotient of |x|/(PI/4).  */
-	lfd	fp2,(L(invpio4)-L(anchor))(r9)
-	fabs	fp1,fp1			/* |x| */
-	fmul	fp2,fp1,fp2		/* |x|/(PI/4) */
-	fctiduz	fp2,fp2
-	mfvsrd	r3,v2			/* n = |x| mod PI/4 */
-
-	/* Now use that quotient to find |x| mod (PI/2).  */
-	addi	r7,r3,1
-	rldicr	r5,r7,2,60		/* ((n+1) >> 1) << 3 */
-	addi	r6,r9,(L(pio2_table)-L(anchor))
-	lfdx	fp4,r5,r6
-	fsub	fp1,fp1,fp4
-
-	.balign 16
-L(reduced):
-	/* Now we are in the range -PI/4 to PI/4.  */
-
-	/* Work out if we are in a positive or negative primary interval.  */
-	rldicl	r4,r7,62,63		/* ((n+1) >> 2) & 1 */
-
-	/* We are operating on |x|, so we need to add back the original
-	   sign.  */
-	rldicl	r8,r8,33,63		/* (x >> 31) & 1, ie the sign bit.  */
-	xor	r4,r4,r8		/* 0 if result should be positive,
-					   1 if negative.  */
-
-	/* Load a 1.0 or -1.0.  */
-	addi	r5,r9,(L(ones)-L(anchor))
-	sldi	r4,r4,3
-	lfdx	fp0,r4,r5
-
-	/* Are we in the primary interval of sin or cos?  */
-	andi.	r4,r7,0x2
-	bne	L(cos)
-
-	/* Chebyshev polynomial of the form:
-	   x+x^3*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))).  */
-
-	lfd	fp9,(L(S0)-L(anchor))(r9)
-	lfd	fp10,(L(S1)-L(anchor))(r9)
-	lfd	fp11,(L(S2)-L(anchor))(r9)
-	lfd	fp12,(L(S3)-L(anchor))(r9)
-	lfd	fp13,(L(S4)-L(anchor))(r9)
-
-	fmul	fp2,fp1,fp1		/* x^2 */
-	fmul	fp3,fp2,fp1		/* x^3 */
-
-	fmadd	fp4,fp2,fp13,fp12	/* S3+x^2*S4 */
-	fmadd	fp4,fp2,fp4,fp11	/* S2+x^2*(S3+x^2*S4) */
-	fmadd	fp4,fp2,fp4,fp10	/* S1+x^2*(S2+x^2*(S3+x^2*S4)) */
-	fmadd	fp4,fp2,fp4,fp9		/* S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4))) */
-	fmadd	fp4,fp3,fp4,fp1		/* x+x^3*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))) */
-	fmul	fp4,fp4,fp0		/* Add in the sign.  */
-	frsp	fp1,fp4			/* Round to single precision.  */
-
-	blr
-
-	.balign 16
-L(cos):
-	/* Chebyshev polynomial of the form:
-	   1.0+x^2*(C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4)))).  */
-
-	lfd	fp9,(L(C0)-L(anchor))(r9)
-	lfd	fp10,(L(C1)-L(anchor))(r9)
-	lfd	fp11,(L(C2)-L(anchor))(r9)
-	lfd	fp12,(L(C3)-L(anchor))(r9)
-	lfd	fp13,(L(C4)-L(anchor))(r9)
-
-	fmul	fp2,fp1,fp1		/* x^2 */
-	lfd	fp3,(L(DPone)-L(anchor))(r9)
-
-	fmadd	fp4,fp2,fp13,fp12	/* C3+x^2*C4 */
-	fmadd	fp4,fp2,fp4,fp11	/* C2+x^2*(C3+x^2*C4) */
-	fmadd	fp4,fp2,fp4,fp10	/* C1+x^2*(C2+x^2*(C3+x^2*C4)) */
-	fmadd	fp4,fp2,fp4,fp9		/* C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4))) */
-	fmadd	fp4,fp2,fp4,fp3		/* 1.0 + x^2*(C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4)))) */
-	fmul	fp4,fp4,fp0		/* Add in the sign.  */
-	frsp	fp1,fp4			/* Round to single precision.  */
-
-	blr
-
-	.balign 16
-L(greater_or_equal_9pio4):
-	lis	r4,INFINITY@h
-	ori	r4,r4,INFINITY@l
-	cmpw	r3,r4
-	bge	L(inf_or_nan)
-
-	lis	r4,TWO_P23@h
-	ori	r4,r4,TWO_P23@l
-	cmpw	r3,r4
-	bge	L(greater_or_equal_2p23)
-
-	fabs	fp1,fp1			/* |x| */
-
-	/* Calculate quotient of |x|/(PI/4).  */
-	lfd	fp2,(L(invpio4)-L(anchor))(r9)
-
-	lfd	fp3,(L(DPone)-L(anchor))(r9)
-	lfd	fp4,(L(DPhalf)-L(anchor))(r9)
-	fmul	fp2,fp1,fp2		/* |x|/(PI/4) */
-	friz	fp2,fp2			/* n = floor(|x|/(PI/4)) */
-
-	/* Calculate (n + 1) / 2.  */
-	fadd	fp2,fp2,fp3		/* n + 1 */
-	fmul	fp3,fp2,fp4		/* (n + 1) / 2 */
-	friz	fp3,fp3
-
-	lfd	fp4,(L(pio2hi)-L(anchor))(r9)
-	lfd	fp5,(L(pio2lo)-L(anchor))(r9)
-
-	fmul	fp6,fp4,fp3
-	fadd	fp6,fp6,fp1
-	fmadd	fp1,fp5,fp3,fp6
-
-	fctiduz	fp2,fp2
-	mfvsrd	r7,v2			/* n + 1 */
-
-	b	L(reduced)
-
-	.balign 16
-L(inf_or_nan):
-	bne	L(skip_errno_setting)	/* Is a NAN?  */
-
-	/* We delayed the creation of the stack frame, as well as the saving of
-	   the link register, because only at this point, we are sure that
-	   doing so is actually needed.  */
-
-	stfd	fp1,-8(r1)
-
-	/* Save the link register.  */
-	mflr	r0
-	std	r0,16(r1)
-	cfi_offset(lr, 16)
-
-	/* Create the stack frame.  */
-	stdu	r1,-FRAMESIZE(r1)
-	cfi_adjust_cfa_offset(FRAMESIZE)
-
-	bl	JUMPTARGET(__errno_location)
-	nop
-
-	/* Restore the stack frame.  */
-	addi	r1,r1,FRAMESIZE
-	cfi_adjust_cfa_offset(-FRAMESIZE)
-	/* Restore the link register.  */
-	ld	r0,16(r1)
-	mtlr	r0
-
-	lfd	fp1,-8(r1)
-
-	/* errno = EDOM */
-	li	r4,EDOM
-	stw	r4,0(r3)
-
-L(skip_errno_setting):
-	fsub	fp1,fp1,fp1		/* x - x */
-	blr
-
-	.balign 16
-L(greater_or_equal_2p23):
-	fabs	fp1,fp1
-
-	srwi	r4,r3,FLOAT_EXPONENT_SHIFT
-	subi	r4,r4,FLOAT_EXPONENT_BIAS
-
-	/* We reduce the input modulo pi/4, so we need 3 bits of integer
-	   to determine where in 2*pi we are. Index into our array
-	   accordingly.  */
-	addi r4,r4,INTEGER_BITS
-
-	/* To avoid an expensive divide, for the range we care about (0 - 127)
-	   we can transform x/28 into:
-
-	   x/28 = (x * ((0x100000000 / 28) + 1)) >> 32
-
-	   mulhwu returns the top 32 bits of the 64 bit result, doing the
-	   shift for us in the same instruction. The top 32 bits are undefined,
-	   so we have to mask them.  */
-
-	lis	r6,FX_FRACTION_1_28@h
-	ori	r6,r6,FX_FRACTION_1_28@l
-	mulhwu	r5,r4,r6
-	clrldi	r5,r5,32
-
-	/* Get our pointer into the invpio4_table array.  */
-	sldi	r4,r5,3
-	addi	r6,r9,(L(invpio4_table)-L(anchor))
-	add	r4,r4,r6
-
-	lfd	fp2,0(r4)
-	lfd	fp3,8(r4)
-	lfd	fp4,16(r4)
-	lfd	fp5,24(r4)
-
-	fmul	fp6,fp2,fp1
-	fmul	fp7,fp3,fp1
-	fmul	fp8,fp4,fp1
-	fmul	fp9,fp5,fp1
-
-	/* Mask off larger integer bits in highest double word that we don't
-	   care about to avoid losing precision when combining with smaller
-	   values.  */
-	fctiduz	fp10,fp6
-	mfvsrd	r7,v10
-	rldicr	r7,r7,0,(63-INTEGER_BITS)
-	mtvsrd	v10,r7
-	fcfidu	fp10,fp10		/* Integer bits.  */
-
-	fsub	fp6,fp6,fp10		/* highest -= integer bits */
-
-	/* Work out the integer component, rounded down. Use the top two
-	   limbs for this.  */
-	fadd	fp10,fp6,fp7		/* highest + higher */
-
-	fctiduz	fp10,fp10
-	mfvsrd	r7,v10
-	andi.	r0,r7,1
-	fcfidu	fp10,fp10
-
-	/* Subtract integer component from highest limb.  */
-	fsub	fp12,fp6,fp10
-
-	beq	L(even_integer)
-
-	/* Our integer component is odd, so we are in the -PI/4 to 0 primary
-	   region. We need to shift our result down by PI/4, and to do this
-	   in the mod (4/PI) space we simply subtract 1.  */
-	lfd	fp11,(L(DPone)-L(anchor))(r9)
-	fsub	fp12,fp12,fp11
-
-	/* Now add up all the limbs in order.  */
-	fadd	fp12,fp12,fp7
-	fadd	fp12,fp12,fp8
-	fadd	fp12,fp12,fp9
-
-	/* And finally multiply by pi/4.  */
-	lfd	fp13,(L(pio4)-L(anchor))(r9)
-	fmul	fp1,fp12,fp13
-
-	addi	r7,r7,1
-	b	L(reduced)
-
-L(even_integer):
-	lfd	fp11,(L(DPone)-L(anchor))(r9)
-
-	/* Now add up all the limbs in order.  */
-	fadd	fp12,fp12,fp7
-	fadd	fp12,r12,fp8
-	fadd	fp12,r12,fp9
-
-	/* We need to check if the addition of all the limbs resulted in us
-	   overflowing 1.0.  */
-	fcmpu	0,fp12,fp11
-	bgt	L(greater_than_one)
-
-	/* And finally multiply by pi/4.  */
-	lfd	fp13,(L(pio4)-L(anchor))(r9)
-	fmul	fp1,fp12,fp13
-
-	addi	r7,r7,1
-	b	L(reduced)
-
-L(greater_than_one):
-	/* We did overflow 1.0 when adding up all the limbs. Add 1.0 to our
-	   integer, and subtract 1.0 from our result. Since that makes the
-	   integer component odd, we need to subtract another 1.0 as
-	   explained above.  */
-	addi	r7,r7,1
-
-	lfd	fp11,(L(DPtwo)-L(anchor))(r9)
-	fsub	fp12,fp12,fp11
-
-	/* And finally multiply by pi/4.  */
-	lfd	fp13,(L(pio4)-L(anchor))(r9)
-	fmul	fp1,fp12,fp13
-
-	addi	r7,r7,1
-	b	L(reduced)
-
-	.balign 16
-L(less_2pn5):
-	lis	r4,TWO_PN27@h
-	ori	r4,r4,TWO_PN27@l
-
-	cmpw	r3,r4
-	blt	L(less_2pn27)
-
-	/* A simpler Chebyshev approximation is close enough for this range:
-	   x+x^3*(SS0+x^2*SS1).  */
-
-	lfd	fp10,(L(SS0)-L(anchor))(r9)
-	lfd	fp11,(L(SS1)-L(anchor))(r9)
-
-	fmul	fp2,fp1,fp1		/* x^2 */
-	fmul	fp3,fp2,fp1		/* x^3 */
-
-	fmadd	fp4,fp2,fp11,fp10	/* SS0+x^2*SS1 */
-	fmadd	fp1,fp3,fp4,fp1		/* x+x^3*(SS0+x^2*SS1) */
-
-	frsp	fp1,fp1			/* Round to single precision.  */
-
-	blr
-
-	.balign 16
-L(less_2pn27):
-	cmpwi	r3,0
-	beq	L(zero)
-
-	/* Handle some special cases:
-
-	   sinf(subnormal) raises inexact/underflow
-	   sinf(min_normalized) raises inexact/underflow
-	   sinf(normalized) raises inexact.  */
-
-	lfd	fp2,(L(small)-L(anchor))(r9)
-
-	fmul	fp2,fp1,fp2		/* x * small */
-	fsub	fp1,fp1,fp2		/* x - x * small */
-
-	frsp	fp1,fp1
-
-	blr
-
-	.balign 16
-L(zero):
-	blr
-
-END (__sinf)
-
-	.section .rodata, "a"
-
-	.balign 8
-
-L(anchor):
-
-	/* Chebyshev constants for sin, range -PI/4 - PI/4.  */
-L(S0):	.8byte	0xbfc5555555551cd9
-L(S1):	.8byte	0x3f81111110c2688b
-L(S2):	.8byte	0xbf2a019f8b4bd1f9
-L(S3):	.8byte	0x3ec71d7264e6b5b4
-L(S4):	.8byte	0xbe5a947e1674b58a
-
-	/* Chebyshev constants for sin, range 2^-27 - 2^-5.  */
-L(SS0):	.8byte	0xbfc555555543d49d
-L(SS1):	.8byte	0x3f8110f475cec8c5
-
-	/* Chebyshev constants for cos, range -PI/4 - PI/4.  */
-L(C0):	.8byte	0xbfdffffffffe98ae
-L(C1):	.8byte	0x3fa55555545c50c7
-L(C2):	.8byte	0xbf56c16b348b6874
-L(C3):	.8byte	0x3efa00eb9ac43cc0
-L(C4):	.8byte	0xbe923c97dd8844d7
-
-L(invpio2):
-	.8byte	0x3fe45f306dc9c883	/* 2/PI */
-
-L(invpio4):
-	.8byte	0x3ff45f306dc9c883	/* 4/PI */
-
-L(invpio4_table):
-	.8byte	0x0000000000000000
-	.8byte	0x3ff45f306c000000
-	.8byte	0x3e3c9c882a000000
-	.8byte	0x3c54fe13a8000000
-	.8byte	0x3aaf47d4d0000000
-	.8byte	0x38fbb81b6c000000
-	.8byte	0x3714acc9e0000000
-	.8byte	0x3560e4107c000000
-	.8byte	0x33bca2c756000000
-	.8byte	0x31fbd778ac000000
-	.8byte	0x300b7246e0000000
-	.8byte	0x2e5d2126e8000000
-	.8byte	0x2c97003248000000
-	.8byte	0x2ad77504e8000000
-	.8byte	0x290921cfe0000000
-	.8byte	0x274deb1cb0000000
-	.8byte	0x25829a73e0000000
-	.8byte	0x23fd1046be000000
-	.8byte	0x2224baed10000000
-	.8byte	0x20709d338e000000
-	.8byte	0x1e535a2f80000000
-	.8byte	0x1cef904e64000000
-	.8byte	0x1b0d639830000000
-	.8byte	0x1964ce7d24000000
-	.8byte	0x17b908bf16000000
-
-L(pio4):
-	.8byte	0x3fe921fb54442d18	/* PI/4 */
-
-/* PI/2 as a sum of two doubles. We only use 32 bits of the upper limb
-   to avoid losing significant bits when multiplying with up to
-   (2^22)/(pi/2).  */
-L(pio2hi):
-	.8byte	0xbff921fb54400000
-
-L(pio2lo):
-	.8byte	0xbdd0b4611a626332
-
-L(pio2_table):
-	.8byte	0
-	.8byte	0x3ff921fb54442d18	/* 1 * PI/2 */
-	.8byte	0x400921fb54442d18	/* 2 * PI/2 */
-	.8byte	0x4012d97c7f3321d2	/* 3 * PI/2 */
-	.8byte	0x401921fb54442d18	/* 4 * PI/2 */
-	.8byte	0x401f6a7a2955385e	/* 5 * PI/2 */
-	.8byte	0x4022d97c7f3321d2	/* 6 * PI/2 */
-	.8byte	0x4025fdbbe9bba775	/* 7 * PI/2 */
-	.8byte	0x402921fb54442d18	/* 8 * PI/2 */
-	.8byte	0x402c463abeccb2bb	/* 9 * PI/2 */
-	.8byte	0x402f6a7a2955385e	/* 10 * PI/2 */
-
-L(small):
-	.8byte	0x3cd0000000000000	/* 2^-50 */
-
-L(ones):
-	.8byte	0x3ff0000000000000	/* +1.0 */
-	.8byte	0xbff0000000000000	/* -1.0 */
-
-L(DPhalf):
-	.8byte	0x3fe0000000000000	/* 0.5 */
-
-L(DPone):
-	.8byte	0x3ff0000000000000	/* 1.0 */
-
-L(DPtwo):
-	.8byte	0x4000000000000000	/* 2.0 */
-
-weak_alias(__sinf, sinf)
diff --git a/sysdeps/powerpc/powerpc64/power8/memcmp.S b/sysdeps/powerpc/powerpc64/power8/memcmp.S
deleted file mode 100644
index 46b9c0067a..0000000000
--- a/sysdeps/powerpc/powerpc64/power8/memcmp.S
+++ /dev/null
@@ -1,1447 +0,0 @@
-/* Optimized memcmp implementation for POWER7/PowerPC64.
-   Copyright (C) 2010-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-
-/* int [r3] memcmp (const char *s1 [r3],
-		    const char *s2 [r4],
-		    size_t size [r5])  */
-
-/* TODO: change these to the actual instructions when the minimum required
-   binutils allows it.  */
-#define MFVRD(r,v)	.long (0x7c000067 | ((v)<<(32-11)) | ((r)<<(32-16)))
-#ifndef MEMCMP
-# define MEMCMP memcmp
-#endif
-	.machine power7
-EALIGN (MEMCMP, 4, 0)
-	CALL_MCOUNT 3
-
-#define rRTN		r3
-#define rSTR1		r3	/* First string arg.  */
-#define rSTR2		r4	/* Second string arg.  */
-#define rN		r5	/* Max string length.  */
-#define rWORD1		r6	/* Current word in s1.  */
-#define rWORD2		r7	/* Current word in s2.  */
-#define rWORD3		r8	/* Next word in s1.  */
-#define rWORD4		r9	/* Next word in s2.  */
-#define rWORD5		r10	/* Next word in s1.  */
-#define rWORD6		r11	/* Next word in s2.  */
-
-#define rOFF8		r20	/* 8 bytes offset.  */
-#define rOFF16  	r21	/* 16 bytes offset.  */
-#define rOFF24		r22	/* 24 bytes offset.  */
-#define rOFF32		r23	/* 24 bytes offset.  */
-#define rWORD6_SHIFT	r24	/* Left rotation temp for rWORD8.  */
-#define rWORD4_SHIFT	r25	/* Left rotation temp for rWORD6.  */
-#define rWORD2_SHIFT	r26	/* Left rotation temp for rWORD4.  */
-#define rWORD8_SHIFT	r27	/* Left rotation temp for rWORD2.  */
-#define rSHR		r28	/* Unaligned shift right count.  */
-#define rSHL		r29	/* Unaligned shift left count.  */
-#define rWORD7		r30	/* Next word in s1.  */
-#define rWORD8		r31	/* Next word in s2.  */
-
-#define rWORD8SAVE	(-8)
-#define rWORD7SAVE	(-16)
-#define rOFF8SAVE	(-24)
-#define rOFF16SAVE	(-32)
-#define rOFF24SAVE	(-40)
-#define rOFF32SAVE	(-48)
-#define rSHRSAVE	(-56)
-#define rSHLSAVE	(-64)
-#define rWORD8SHIFTSAVE	(-72)
-#define rWORD2SHIFTSAVE	(-80)
-#define rWORD4SHIFTSAVE	(-88)
-#define rWORD6SHIFTSAVE	(-96)
-
-#ifdef __LITTLE_ENDIAN__
-# define LD	ldbrx
-#else
-# define LD	ldx
-#endif
-
-	xor	r10, rSTR2, rSTR1
-	cmpldi	cr6, rN, 0
-	cmpldi	cr1, rN, 8
-	clrldi.	r0, r10, 61
-	clrldi	r12, rSTR1, 61
-	cmpldi	cr5, r12, 0
-	beq-	cr6, L(zeroLength)
-	dcbt	0, rSTR1
-	dcbt	0, rSTR2
-	/* If less than 8 bytes or not aligned, use the unaligned
-	   byte loop.  */
-	blt	cr1, L(bytealigned)
-	bne	L(unalignedqw)
-/* At this point we know both strings have the same alignment and the
-   compare length is at least 8 bytes.  r12 contains the low order
-   3 bits of rSTR1 and cr5 contains the result of the logical compare
-   of r12 to 0.  If r12 == 0 then we are already double word
-   aligned and can perform the DW aligned loop.  */
-
-	.align	4
-L(samealignment):
-	or	r11, rSTR2, rSTR1
-	clrldi.	r11, r11, 60
-	beq	L(qw_align)
-	/* Try to align to QW else proceed to DW loop.  */
-	clrldi.	r10, r10, 60
-	bne	L(DW)
-	/* For the difference to reach QW alignment, load as DW.  */
-	clrrdi	rSTR1, rSTR1, 3
-	clrrdi	rSTR2, rSTR2, 3
-	subfic	r10, r12, 8
-	LD	rWORD1, 0, rSTR1
-	LD	rWORD2, 0, rSTR2
-	sldi	r9, r10, 3
-	subfic	r9, r9, 64
-	sld	rWORD1, rWORD1, r9
-	sld	rWORD2, rWORD2, r9
-	cmpld	cr6, rWORD1, rWORD2
-	addi	rSTR1, rSTR1, 8
-	addi	rSTR2, rSTR2, 8
-	bne	cr6, L(ret_diff)
-	subf	rN, r10, rN
-
-	cmpld	cr6, r11, r12
-	bgt	cr6, L(qw_align)
-	LD	rWORD1, 0, rSTR1
-	LD	rWORD2, 0, rSTR2
-	cmpld	cr6, rWORD1, rWORD2
-	addi	rSTR1, rSTR1, 8
-	addi	rSTR2, rSTR2, 8
-	bne	cr6, L(different)
-	cmpldi	cr6, rN, 8
-	ble	cr6, L(zeroLength)
-	addi	rN, rN, -8
-	/* Now both rSTR1 and rSTR2 are aligned to QW.  */
-	.align	4
-L(qw_align):
-	vspltisb	v0, 0
-	srdi.	r6, rN, 6
-	li	r8, 16
-	li	r10, 32
-	li	r11, 48
-	ble	cr0, L(lessthan64)
-	mtctr	r6
-	vspltisb	v8, 0
-	vspltisb	v6, 0
-	/* Aligned vector loop.  */
-	.align	4
-L(aligned_loop):
-	lvx	v4, 0, rSTR1
-	lvx	v5, 0, rSTR2
-	vcmpequb.	v7, v6, v8
-	bnl	cr6, L(different3)
-	lvx	v6, rSTR1, r8
-	lvx	v8, rSTR2, r8
-	vcmpequb.	v7, v5, v4
-	bnl	cr6, L(different2)
-	lvx	v4, rSTR1, r10
-	lvx	v5, rSTR2, r10
-	vcmpequb.	v7, v6, v8
-	bnl	cr6, L(different3)
-	lvx	v6, rSTR1, r11
-	lvx	v8, rSTR2, r11
-	vcmpequb.	v7, v5, v4
-	bnl	cr6, L(different2)
-	addi	rSTR1, rSTR1, 64
-	addi	rSTR2, rSTR2, 64
-	bdnz	L(aligned_loop)
-	vcmpequb.	v7, v6, v8
-	bnl	cr6, L(different3)
-	clrldi	rN, rN, 58
-	/* Handle remainder for aligned loop.  */
-	.align	4
-L(lessthan64):
-	mr	r9, rSTR1
-	cmpdi	cr6, rN, 0
-	li	rSTR1, 0
-	blelr	cr6
-	lvx	v4, 0, r9
-	lvx	v5, 0, rSTR2
-	vcmpequb.	v7, v5, v4
-	bnl	cr6, L(different1)
-	addi	rN, rN, -16
-
-	cmpdi	cr6, rN, 0
-	blelr	cr6
-	lvx	v4, r9, r8
-	lvx	v5, rSTR2, r8
-	vcmpequb.	v7, v5, v4
-	bnl	cr6, L(different1)
-	addi	rN, rN, -16
-
-	cmpdi	cr6, rN, 0
-	blelr	cr6
-	lvx	v4, r9, r10
-	lvx	v5, rSTR2, r10
-	vcmpequb.	v7, v5, v4
-	bnl	cr6, L(different1)
-	addi	rN, rN, -16
-
-	cmpdi	cr6, rN, 0
-	blelr	cr6
-	lvx	v4, r9, r11
-	lvx	v5, rSTR2, r11
-	vcmpequb.	v7, v5, v4
-	bnl	cr6, L(different1)
-	blr
-
-	/* Calculate and return the difference.  */
-	.align 4
-L(different1):
-	cmpdi	cr6, rN, 16
-	bge	cr6, L(different2)
-	/* Discard unwanted bytes.  */
-#ifdef __LITTLE_ENDIAN__
-	lvsr	v1, 0, rN
-	vperm	v4, v4, v0, v1
-	vperm	v5, v5, v0, v1
-#else
-	lvsl	v1, 0, rN
-	vperm	v4, v0, v4, v1
-	vperm	v5, v0, v5, v1
-#endif
-	vcmpequb.	v7, v4, v5
-	li	rRTN, 0
-	bltlr	cr6
-	.align 4
-L(different2):
-#ifdef __LITTLE_ENDIAN__
-	/* Reverse bytes for direct comparison.  */
-	lvsl	v10, r0, r0
-	vspltisb	v8, 15
-	vsububm	v9, v8, v10
-	vperm	v4, v4, v0, v9
-	vperm	v5, v5, v0, v9
-#endif
-	MFVRD(r7, v4)
-	MFVRD(r9, v5)
-	cmpld	cr6, r7, r9
-	bne	cr6, L(ret_diff)
-	/* Difference in second DW.  */
-	vsldoi	v4, v4, v4, 8
-	vsldoi	v5, v5, v5, 8
-	MFVRD(r7, v4)
-	MFVRD(r9, v5)
-	cmpld	cr6, r7, r9
-L(ret_diff):
-	li	rRTN, 1
-	bgtlr	cr6
-	li	rRTN, -1
-	blr
-	.align	4
-L(different3):
-#ifdef __LITTLE_ENDIAN__
-	/* Reverse bytes for direct comparison.  */
-	vspltisb	v9, 15
-	lvsl	v10, r0, r0
-	vsububm	v9, v9, v10
-	vperm	v6, v6, v0, v9
-	vperm	v8, v8, v0, v9
-#endif
-	MFVRD(r7, v6)
-	MFVRD(r9, v8)
-	cmpld	cr6, r7, r9
-	bne	cr6, L(ret_diff)
-	/* Difference in second DW.  */
-	vsldoi	v6, v6, v6, 8
-	vsldoi	v8, v8, v8, 8
-	MFVRD(r7, v6)
-	MFVRD(r9, v8)
-	cmpld	cr6, r7, r9
-	li	rRTN, 1
-	bgtlr	cr6
-	li	rRTN, -1
-	blr
-
-	.align 4
-L(different):
-	cmpldi	cr7, rN, 8
-	bgt	cr7, L(end)
-	/* Skip unwanted bytes.  */
-	sldi	r8, rN, 3
-	subfic	r8, r8, 64
-	srd	rWORD1, rWORD1, r8
-	srd	rWORD2, rWORD2, r8
-	cmpld	cr6, rWORD1, rWORD2
-	li	rRTN, 0
-	beqlr	cr6
-L(end):
-	li	rRTN, 1
-	bgtlr	cr6
-	li	rRTN, -1
-	blr
-
-	.align	4
-L(unalignedqw):
-	/* Proceed to DW unaligned loop,if there is a chance of pagecross.  */
-	rldicl	r9, rSTR1, 0, 52
-	add	r9, r9, rN
-	cmpldi	cr0, r9, 4096-16
-	bgt	cr0, L(unaligned)
-	rldicl	r9, rSTR2, 0, 52
-	add	r9, r9, rN
-	cmpldi	cr0, r9, 4096-16
-	bgt	cr0, L(unaligned)
-	li	r0, 0
-	li	r8, 16
-	vspltisb	v0, 0
-	/* Check if rSTR1 is aligned to QW.  */
-	andi.	r11, rSTR1, 0xF
-	beq	L(s1_align)
-
-	/* Compare 16B and align S1 to QW.  */
-#ifdef __LITTLE_ENDIAN__
-	lvsr	v10, 0, rSTR1	/* Compute mask.  */
-	lvsr	v6, 0, rSTR2	/* Compute mask.  */
-#else
-	lvsl	v10, 0, rSTR1	/* Compute mask.  */
-	lvsl	v6, 0, rSTR2	/* Compute mask.  */
-#endif
-	lvx	v5, 0, rSTR2
-	lvx	v9, rSTR2, r8
-#ifdef __LITTLE_ENDIAN__
-	vperm	v5, v9, v5, v6
-#else
-	vperm	v5, v5, v9, v6
-#endif
-	lvx	v4, 0, rSTR1
-	lvx	v9, rSTR1, r8
-#ifdef __LITTLE_ENDIAN__
-	vperm	v4, v9, v4, v10
-#else
-	vperm	v4, v4, v9, v10
-#endif
-	vcmpequb.	v7, v5, v4
-	bnl	cr6, L(different1)
-	cmpldi	cr6, rN, 16
-	ble	cr6, L(zeroLength)
-	subfic	r11, r11, 16
-	subf	rN, r11, rN
-	add	rSTR1, rSTR1, r11
-	add	rSTR2, rSTR2, r11
-
-	/* As s1 is QW aligned prepare for unaligned loop.  */
-	.align	4
-L(s1_align):
-#ifdef __LITTLE_ENDIAN__
-	lvsr	v6, 0, rSTR2
-#else
-	lvsl	v6, 0, rSTR2
-#endif
-	lvx	v5, 0, rSTR2
-	srdi.	r6, rN, 6
-	li	r10, 32
-	li	r11, 48
-	ble	cr0, L(lessthan64_unalign)
-	mtctr	r6
-	li 	r9, 64
-	/* Unaligned vector loop.  */
-	.align	4
-L(unalign_qwloop):
-	lvx	v4, 0, rSTR1
-	lvx	v10, rSTR2, r8
-#ifdef __LITTLE_ENDIAN__
-	vperm	v5, v10, v5, v6
-#else
-	vperm	v5, v5, v10, v6
-#endif
-	vcmpequb.	v7, v5, v4
-	bnl	cr6, L(different2)
-	vor	v5, v10, v10
-	lvx	v4, rSTR1, r8
-	lvx	v10, rSTR2, r10
-#ifdef __LITTLE_ENDIAN__
-	vperm	v5, v10, v5, v6
-#else
-	vperm	v5, v5, v10, v6
-#endif
-	vcmpequb.	v7, v5, v4
-	bnl	cr6, L(different2)
-	vor	v5, v10, v10
-	lvx	v4, rSTR1, r10
-	lvx	v10, rSTR2, r11
-#ifdef __LITTLE_ENDIAN__
-	vperm	v5, v10, v5, v6
-#else
-	vperm	v5, v5, v10, v6
-#endif
-	vcmpequb.	v7, v5, v4
-	bnl	cr6, L(different2)
-	vor	v5, v10, v10
-	lvx	v4, rSTR1, r11
-	lvx	v10, rSTR2, r9
-#ifdef __LITTLE_ENDIAN__
-	vperm	v5, v10, v5, v6
-#else
-	vperm	v5, v5, v10, v6
-#endif
-	vcmpequb.	v7, v5, v4
-	bnl	cr6, L(different2)
-	vor	v5, v10, v10
-	addi	rSTR1, rSTR1, 64
-	addi	rSTR2, rSTR2, 64
-	bdnz	L(unalign_qwloop)
-	clrldi	rN, rN, 58
-	/* Handle remainder for unaligned loop.  */
-	.align	4
-L(lessthan64_unalign):
-	mr	r9, rSTR1
-	cmpdi	cr6, rN, 0
-	li	rSTR1, 0
-	blelr	cr6
-	lvx	v4, 0, r9
-	lvx     v10, rSTR2, r8
-#ifdef __LITTLE_ENDIAN__
-	vperm	v5, v10, v5, v6
-#else
-	vperm	v5, v5, v10, v6
-#endif
-	vcmpequb.	v7, v5, v4
-	bnl	cr6, L(different1)
-	vor	v5, v10, v10
-	addi	rN, rN, -16
-
-	cmpdi	cr6, rN, 0
-	blelr	cr6
-	lvx	v4, r9, r8
-	lvx	v10, rSTR2, r10
-#ifdef __LITTLE_ENDIAN__
-	vperm	v5, v10, v5, v6
-#else
-	vperm	v5, v5, v10, v6
-#endif
-	vcmpequb.	v7, v5, v4
-	bnl	cr6, L(different1)
-	vor	v5, v10, v10
-	addi	rN, rN, -16
-
-	cmpdi	cr6, rN, 0
-	blelr	cr6
-	lvx	v4, r9, r10
-	lvx	v10, rSTR2, r11
-#ifdef __LITTLE_ENDIAN__
-	vperm	v5, v10, v5, v6
-#else
-	vperm	v5, v5, v10, v6
-#endif
-	vcmpequb.	v7, v5, v4
-	bnl	cr6, L(different1)
-	vor	v5, v10, v10
-	addi	rN, rN, -16
-
-	cmpdi	cr6, rN, 0
-	blelr	cr6
-	lvx	v4, r9, r11
-	addi	r11, r11, 16
-	lvx	v10, rSTR2, r11
-#ifdef __LITTLE_ENDIAN__
-	vperm	v5, v10, v5, v6
-#else
-	vperm	v5, v5, v10, v6
-#endif
-	vcmpequb.	v7, v5, v4
-	bnl	cr6, L(different1)
-	blr
-
-/* Otherwise we know the two strings have the same alignment (but not
-   yet DW).  So we force the string addresses to the next lower DW
-   boundary and special case this first DW using shift left to
-   eliminate bits preceding the first byte.  Since we want to join the
-   normal (DW aligned) compare loop, starting at the second double word,
-   we need to adjust the length (rN) and special case the loop
-   versioning for the first DW.  This ensures that the loop count is
-   correct and the first DW (shifted) is in the expected register pair.  */
-	.align	4
-L(DW):
-	std	rWORD8, rWORD8SAVE(r1)
-	std	rWORD7, rWORD7SAVE(r1)
-	std	rOFF8, rOFF8SAVE(r1)
-	std	rOFF16, rOFF16SAVE(r1)
-	std	rOFF24, rOFF24SAVE(r1)
-	std	rOFF32, rOFF32SAVE(r1)
-	cfi_offset(rWORD8, rWORD8SAVE)
-	cfi_offset(rWORD7, rWORD7SAVE)
-	cfi_offset(rOFF8, rOFF8SAVE)
-	cfi_offset(rOFF16, rOFF16SAVE)
-	cfi_offset(rOFF24, rOFF24SAVE)
-	cfi_offset(rOFF32, rOFF32SAVE)
-
-	li	rOFF8,8
-	li	rOFF16,16
-	li	rOFF24,24
-	li	rOFF32,32
-	clrrdi	rSTR1, rSTR1, 3
-	clrrdi	rSTR2, rSTR2, 3
-	beq	cr5, L(DWaligned)
-	add	rN, rN, r12
-	sldi	rWORD6, r12, 3
-	srdi	r0, rN, 5	/* Divide by 32.  */
-	andi.	r12, rN, 24	/* Get the DW remainder.  */
-	LD	rWORD1, 0, rSTR1
-	LD	rWORD2, 0, rSTR2
-	cmpldi	cr1, r12, 16
-	cmpldi	cr7, rN, 32
-	clrldi	rN, rN, 61
-	beq	L(dPs4)
-	mtctr	r0
-	bgt	cr1, L(dPs3)
-	beq	cr1, L(dPs2)
-
-/* Remainder is 8.  */
-	.align	3
-L(dsP1):
-	sld	rWORD5, rWORD1, rWORD6
-	sld	rWORD6, rWORD2, rWORD6
-	cmpld	cr5, rWORD5, rWORD6
-	blt	cr7, L(dP1x)
-/* Do something useful in this cycle since we have to branch anyway.  */
-	LD	rWORD1, rOFF8, rSTR1
-	LD	rWORD2, rOFF8, rSTR2
-	cmpld	cr7, rWORD1, rWORD2
-	b	L(dP1e)
-/* Remainder is 16.  */
-	.align	4
-L(dPs2):
-	sld	rWORD5, rWORD1, rWORD6
-	sld	rWORD6, rWORD2, rWORD6
-	cmpld	cr6, rWORD5, rWORD6
-	blt	cr7, L(dP2x)
-/* Do something useful in this cycle since we have to branch anyway.  */
-	LD	rWORD7, rOFF8, rSTR1
-	LD	rWORD8, rOFF8, rSTR2
-	cmpld	cr5, rWORD7, rWORD8
-	b	L(dP2e)
-/* Remainder is 24.  */
-	.align	4
-L(dPs3):
-	sld	rWORD3, rWORD1, rWORD6
-	sld	rWORD4, rWORD2, rWORD6
-	cmpld	cr1, rWORD3, rWORD4
-	b	L(dP3e)
-/* Count is a multiple of 32, remainder is 0.  */
-	.align	4
-L(dPs4):
-	mtctr	r0
-	sld	rWORD1, rWORD1, rWORD6
-	sld	rWORD2, rWORD2, rWORD6
-	cmpld	cr7, rWORD1, rWORD2
-	b	L(dP4e)
-
-/* At this point we know both strings are double word aligned and the
-   compare length is at least 8 bytes.  */
-	.align	4
-L(DWaligned):
-	andi.	r12, rN, 24	/* Get the DW remainder.  */
-	srdi	r0, rN, 5	/* Divide by 32.  */
-	cmpldi	cr1, r12, 16
-	cmpldi	cr7, rN, 32
-	clrldi	rN, rN, 61
-	beq	L(dP4)
-	bgt	cr1, L(dP3)
-	beq	cr1, L(dP2)
-
-/* Remainder is 8.  */
-	.align	4
-L(dP1):
-	mtctr	r0
-/* Normally we'd use rWORD7/rWORD8 here, but since we might exit early
-   (8-15 byte compare), we want to use only volatile registers.  This
-   means we can avoid restoring non-volatile registers since we did not
-   change any on the early exit path.  The key here is the non-early
-   exit path only cares about the condition code (cr5), not about which
-   register pair was used.  */
-	LD	rWORD5, 0, rSTR1
-	LD	rWORD6, 0, rSTR2
-	cmpld	cr5, rWORD5, rWORD6
-	blt	cr7, L(dP1x)
-	LD	rWORD1, rOFF8, rSTR1
-	LD	rWORD2, rOFF8, rSTR2
-	cmpld	cr7, rWORD1, rWORD2
-L(dP1e):
-	LD	rWORD3, rOFF16, rSTR1
-	LD	rWORD4, rOFF16, rSTR2
-	cmpld	cr1, rWORD3, rWORD4
-	LD	rWORD5, rOFF24, rSTR1
-	LD	rWORD6, rOFF24, rSTR2
-	cmpld	cr6, rWORD5, rWORD6
-	bne	cr5, L(dLcr5x)
-	bne	cr7, L(dLcr7x)
-
-	LD	rWORD7, rOFF32, rSTR1
-	LD	rWORD8, rOFF32, rSTR2
-	addi	rSTR1, rSTR1, 32
-	addi	rSTR2, rSTR2, 32
-	bne	cr1, L(dLcr1)
-	cmpld	cr5, rWORD7, rWORD8
-	bdnz	L(dLoop)
-	bne	cr6, L(dLcr6)
-	ld	rWORD8, rWORD8SAVE(r1)
-	ld	rWORD7, rWORD7SAVE(r1)
-	.align	3
-L(dP1x):
-	sldi.	r12, rN, 3
-	bne	cr5, L(dLcr5x)
-	subfic	rN, r12, 64	/* Shift count is 64 - (rN * 8).  */
-	bne	L(d00)
-	ld	rOFF8,  rOFF8SAVE(r1)
-	ld	rOFF16, rOFF16SAVE(r1)
-	ld	rOFF24, rOFF24SAVE(r1)
-	ld	rOFF32, rOFF32SAVE(r1)
-	li	rRTN, 0
-	blr
-
-/* Remainder is 16.  */
-	.align	4
-L(dP2):
-	mtctr	r0
-	LD	rWORD5, 0, rSTR1
-	LD	rWORD6, 0, rSTR2
-	cmpld	cr6, rWORD5, rWORD6
-	blt	cr7, L(dP2x)
-	LD	rWORD7, rOFF8, rSTR1
-	LD	rWORD8, rOFF8, rSTR2
-	cmpld	cr5, rWORD7, rWORD8
-L(dP2e):
-	LD	rWORD1, rOFF16, rSTR1
-	LD	rWORD2, rOFF16, rSTR2
-	cmpld	cr7, rWORD1, rWORD2
-	LD	rWORD3, rOFF24, rSTR1
-	LD	rWORD4, rOFF24, rSTR2
-	cmpld	cr1, rWORD3, rWORD4
-	addi	rSTR1, rSTR1, 8
-	addi	rSTR2, rSTR2, 8
-	bne	cr6, L(dLcr6)
-	bne	cr5, L(dLcr5)
-	b	L(dLoop2)
-	.align	4
-L(dP2x):
-	LD	rWORD3, rOFF8, rSTR1
-	LD	rWORD4, rOFF8, rSTR2
-	cmpld	cr1, rWORD3, rWORD4
-	sldi.	r12, rN, 3
-	bne	cr6, L(dLcr6x)
-	addi	rSTR1, rSTR1, 8
-	addi	rSTR2, rSTR2, 8
-	bne	cr1, L(dLcr1x)
-	subfic	rN, r12, 64	/* Shift count is 64 - (rN * 8).  */
-	bne	L(d00)
-	ld	rOFF8,  rOFF8SAVE(r1)
-	ld	rOFF16, rOFF16SAVE(r1)
-	ld	rOFF24, rOFF24SAVE(r1)
-	ld	rOFF32, rOFF32SAVE(r1)
-	li	rRTN, 0
-	blr
-
-/* Remainder is 24.  */
-	.align	4
-L(dP3):
-	mtctr	r0
-	LD	rWORD3, 0, rSTR1
-	LD	rWORD4, 0, rSTR2
-	cmpld	cr1, rWORD3, rWORD4
-L(dP3e):
-	LD	rWORD5, rOFF8, rSTR1
-	LD	rWORD6, rOFF8, rSTR2
-	cmpld	cr6, rWORD5, rWORD6
-	blt	cr7, L(dP3x)
-	LD	rWORD7, rOFF16, rSTR1
-	LD	rWORD8, rOFF16, rSTR2
-	cmpld	cr5, rWORD7, rWORD8
-	LD	rWORD1, rOFF24, rSTR1
-	LD	rWORD2, rOFF24, rSTR2
-	cmpld	cr7, rWORD1, rWORD2
-	addi	rSTR1, rSTR1, 16
-	addi	rSTR2, rSTR2, 16
-	bne	cr1, L(dLcr1)
-	bne	cr6, L(dLcr6)
-	b	L(dLoop1)
-/* Again we are on a early exit path (24-31 byte compare), we want to
-   only use volatile registers and avoid restoring non-volatile
-   registers.  */
-	.align	4
-L(dP3x):
-	LD	rWORD1, rOFF16, rSTR1
-	LD	rWORD2, rOFF16, rSTR2
-	cmpld	cr7, rWORD1, rWORD2
-	sldi.	r12, rN, 3
-	bne	cr1, L(dLcr1x)
-	addi	rSTR1, rSTR1, 16
-	addi	rSTR2, rSTR2, 16
-	bne	cr6, L(dLcr6x)
-	subfic	rN, r12, 64	/* Shift count is 64 - (rN * 8).  */
-	bne	cr7, L(dLcr7x)
-	bne	L(d00)
-	ld	rOFF8,  rOFF8SAVE(r1)
-	ld	rOFF16, rOFF16SAVE(r1)
-	ld	rOFF24, rOFF24SAVE(r1)
-	ld	rOFF32, rOFF32SAVE(r1)
-	li	rRTN, 0
-	blr
-
-/* Count is a multiple of 32, remainder is 0.  */
-	.align	4
-L(dP4):
-	mtctr	r0
-	LD	rWORD1, 0, rSTR1
-	LD	rWORD2, 0, rSTR2
-	cmpld	cr7, rWORD1, rWORD2
-L(dP4e):
-	LD	rWORD3, rOFF8, rSTR1
-	LD	rWORD4, rOFF8, rSTR2
-	cmpld	cr1, rWORD3, rWORD4
-	LD	rWORD5, rOFF16, rSTR1
-	LD	rWORD6, rOFF16, rSTR2
-	cmpld	cr6, rWORD5, rWORD6
-	LD	rWORD7, rOFF24, rSTR1
-	LD	rWORD8, rOFF24, rSTR2
-	addi	rSTR1, rSTR1, 24
-	addi	rSTR2, rSTR2, 24
-	cmpld	cr5, rWORD7, rWORD8
-	bne	cr7, L(dLcr7)
-	bne	cr1, L(dLcr1)
-	bdz-	L(d24)		/* Adjust CTR as we start with +4.  */
-/* This is the primary loop.  */
-	.align	4
-L(dLoop):
-	LD	rWORD1, rOFF8, rSTR1
-	LD	rWORD2, rOFF8, rSTR2
-	cmpld	cr1, rWORD3, rWORD4
-	bne	cr6, L(dLcr6)
-L(dLoop1):
-	LD	rWORD3, rOFF16, rSTR1
-	LD	rWORD4, rOFF16, rSTR2
-	cmpld	cr6, rWORD5, rWORD6
-	bne	cr5, L(dLcr5)
-L(dLoop2):
-	LD	rWORD5, rOFF24, rSTR1
-	LD	rWORD6, rOFF24, rSTR2
-	cmpld	cr5, rWORD7, rWORD8
-	bne	cr7, L(dLcr7)
-L(dLoop3):
-	LD	rWORD7, rOFF32, rSTR1
-	LD	rWORD8, rOFF32, rSTR2
-	addi	rSTR1, rSTR1, 32
-	addi	rSTR2, rSTR2, 32
-	bne	cr1, L(dLcr1)
-	cmpld	cr7, rWORD1, rWORD2
-	bdnz	L(dLoop)
-
-L(dL4):
-	cmpld	cr1, rWORD3, rWORD4
-	bne	cr6, L(dLcr6)
-	cmpld	cr6, rWORD5, rWORD6
-	bne	cr5, L(dLcr5)
-	cmpld	cr5, rWORD7, rWORD8
-L(d44):
-	bne	cr7, L(dLcr7)
-L(d34):
-	bne	cr1, L(dLcr1)
-L(d24):
-	bne	cr6, L(dLcr6)
-L(d14):
-	sldi.	r12, rN, 3
-	bne	cr5, L(dLcr5)
-L(d04):
-	ld	rWORD8, rWORD8SAVE(r1)
-	ld	rWORD7, rWORD7SAVE(r1)
-	subfic	rN, r12, 64	/* Shift count is 64 - (rN * 8).  */
-	beq	L(duzeroLength)
-/* At this point we have a remainder of 1 to 7 bytes to compare.  Since
-   we are aligned it is safe to load the whole double word, and use
-   shift right double to eliminate bits beyond the compare length.  */
-L(d00):
-	LD	rWORD1, rOFF8, rSTR1
-	LD	rWORD2, rOFF8, rSTR2
-	srd	rWORD1, rWORD1, rN
-	srd	rWORD2, rWORD2, rN
-	cmpld	cr7, rWORD1, rWORD2
-	bne	cr7, L(dLcr7x)
-	ld	rOFF8,  rOFF8SAVE(r1)
-	ld	rOFF16, rOFF16SAVE(r1)
-	ld	rOFF24, rOFF24SAVE(r1)
-	ld	rOFF32, rOFF32SAVE(r1)
-	li	rRTN, 0
-	blr
-
-	.align	4
-L(dLcr7):
-	ld	rWORD8, rWORD8SAVE(r1)
-	ld	rWORD7, rWORD7SAVE(r1)
-L(dLcr7x):
-	ld	rOFF8,  rOFF8SAVE(r1)
-	ld	rOFF16, rOFF16SAVE(r1)
-	ld	rOFF24, rOFF24SAVE(r1)
-	ld	rOFF32, rOFF32SAVE(r1)
-	li	rRTN, 1
-	bgtlr	cr7
-	li	rRTN, -1
-	blr
-	.align	4
-L(dLcr1):
-	ld	rWORD8, rWORD8SAVE(r1)
-	ld	rWORD7, rWORD7SAVE(r1)
-L(dLcr1x):
-	ld	rOFF8,  rOFF8SAVE(r1)
-	ld	rOFF16, rOFF16SAVE(r1)
-	ld	rOFF24, rOFF24SAVE(r1)
-	ld	rOFF32, rOFF32SAVE(r1)
-	li	rRTN, 1
-	bgtlr	cr1
-	li	rRTN, -1
-	blr
-	.align	4
-L(dLcr6):
-	ld	rWORD8, rWORD8SAVE(r1)
-	ld	rWORD7, rWORD7SAVE(r1)
-L(dLcr6x):
-	ld	rOFF8,  rOFF8SAVE(r1)
-	ld	rOFF16, rOFF16SAVE(r1)
-	ld	rOFF24, rOFF24SAVE(r1)
-	ld	rOFF32, rOFF32SAVE(r1)
-	li	rRTN, 1
-	bgtlr	cr6
-	li	rRTN, -1
-	blr
-	.align	4
-L(dLcr5):
-	ld	rWORD8, rWORD8SAVE(r1)
-	ld	rWORD7, rWORD7SAVE(r1)
-L(dLcr5x):
-	ld	rOFF8,  rOFF8SAVE(r1)
-	ld	rOFF16, rOFF16SAVE(r1)
-	ld	rOFF24, rOFF24SAVE(r1)
-	ld	rOFF32, rOFF32SAVE(r1)
-	li	rRTN, 1
-	bgtlr	cr5
-	li	rRTN, -1
-	blr
-
-	.align	4
-L(bytealigned):
-	mtctr	rN
-
-/* We need to prime this loop.  This loop is swing modulo scheduled
-   to avoid pipe delays.  The dependent instruction latencies (load to
-   compare to conditional branch) is 2 to 3 cycles.  In this loop each
-   dispatch group ends in a branch and takes 1 cycle.  Effectively
-   the first iteration of the loop only serves to load operands and
-   branches based on compares are delayed until the next loop.
-
-   So we must precondition some registers and condition codes so that
-   we don't exit the loop early on the first iteration.  */
-
-	lbz	rWORD1, 0(rSTR1)
-	lbz	rWORD2, 0(rSTR2)
-	bdz	L(b11)
-	cmpld	cr7, rWORD1, rWORD2
-	lbz	rWORD3, 1(rSTR1)
-	lbz	rWORD4, 1(rSTR2)
-	bdz	L(b12)
-	cmpld	cr1, rWORD3, rWORD4
-	lbzu	rWORD5, 2(rSTR1)
-	lbzu	rWORD6, 2(rSTR2)
-	bdz	L(b13)
-	.align	4
-L(bLoop):
-	lbzu	rWORD1, 1(rSTR1)
-	lbzu	rWORD2, 1(rSTR2)
-	bne	cr7, L(bLcr7)
-
-	cmpld	cr6, rWORD5, rWORD6
-	bdz	L(b3i)
-
-	lbzu	rWORD3, 1(rSTR1)
-	lbzu	rWORD4, 1(rSTR2)
-	bne	cr1, L(bLcr1)
-
-	cmpld	cr7, rWORD1, rWORD2
-	bdz	L(b2i)
-
-	lbzu	rWORD5, 1(rSTR1)
-	lbzu	rWORD6, 1(rSTR2)
-	bne	cr6, L(bLcr6)
-
-	cmpld	cr1, rWORD3, rWORD4
-	bdnz	L(bLoop)
-
-/* We speculatively loading bytes before we have tested the previous
-   bytes.  But we must avoid overrunning the length (in the ctr) to
-   prevent these speculative loads from causing a segfault.  In this
-   case the loop will exit early (before the all pending bytes are
-   tested.  In this case we must complete the pending operations
-   before returning.  */
-L(b1i):
-	bne	cr7, L(bLcr7)
-	bne	cr1, L(bLcr1)
-	b	L(bx56)
-	.align	4
-L(b2i):
-	bne	cr6, L(bLcr6)
-	bne	cr7, L(bLcr7)
-	b	L(bx34)
-	.align	4
-L(b3i):
-	bne	cr1, L(bLcr1)
-	bne	cr6, L(bLcr6)
-	b	L(bx12)
-	.align	4
-L(bLcr7):
-	li	rRTN, 1
-	bgtlr	cr7
-	li	rRTN, -1
-	blr
-L(bLcr1):
-	li	rRTN, 1
-	bgtlr	cr1
-	li	rRTN, -1
-	blr
-L(bLcr6):
-	li	rRTN, 1
-	bgtlr	cr6
-	li	rRTN, -1
-	blr
-
-L(b13):
-	bne	cr7, L(bx12)
-	bne	cr1, L(bx34)
-L(bx56):
-	sub	rRTN, rWORD5, rWORD6
-	blr
-	nop
-L(b12):
-	bne	cr7, L(bx12)
-L(bx34):
-	sub	rRTN, rWORD3, rWORD4
-	blr
-L(b11):
-L(bx12):
-	sub	rRTN, rWORD1, rWORD2
-	blr
-
-	.align	4
-L(zeroLength):
-	li	rRTN, 0
-	blr
-
-	.align	4
-/* At this point we know the strings have different alignment and the
-   compare length is at least 8 bytes.  r12 contains the low order
-   3 bits of rSTR1 and cr5 contains the result of the logical compare
-   of r12 to 0.  If r12 == 0 then rStr1 is double word
-   aligned and can perform the DWunaligned loop.
-
-   Otherwise we know that rSTR1 is not already DW aligned yet.
-   So we can force the string addresses to the next lower DW
-   boundary and special case this first DW using shift left to
-   eliminate bits preceding the first byte.  Since we want to join the
-   normal (DWaligned) compare loop, starting at the second double word,
-   we need to adjust the length (rN) and special case the loop
-   versioning for the first DW.  This ensures that the loop count is
-   correct and the first DW (shifted) is in the expected resister pair.  */
-L(unaligned):
-	std	rWORD8, rWORD8SAVE(r1)
-	std	rWORD7, rWORD7SAVE(r1)
-	std	rOFF8, rOFF8SAVE(r1)
-	std	rOFF16, rOFF16SAVE(r1)
-	std	rOFF24, rOFF24SAVE(r1)
-	std	rOFF32, rOFF32SAVE(r1)
-	cfi_offset(rWORD8, rWORD8SAVE)
-	cfi_offset(rWORD7, rWORD7SAVE)
-	cfi_offset(rOFF8, rOFF8SAVE)
-	cfi_offset(rOFF16, rOFF16SAVE)
-	cfi_offset(rOFF24, rOFF24SAVE)
-	cfi_offset(rOFF32, rOFF32SAVE)
-	li	rOFF8,8
-	li	rOFF16,16
-	li	rOFF24,24
-	li	rOFF32,32
-	std	rSHL, rSHLSAVE(r1)
-	cfi_offset(rSHL, rSHLSAVE)
-	clrldi	rSHL, rSTR2, 61
-	beq	cr6, L(duzeroLength)
-	std	rSHR, rSHRSAVE(r1)
-	cfi_offset(rSHR, rSHRSAVE)
-	beq	cr5, L(DWunaligned)
-	std	rWORD8_SHIFT, rWORD8SHIFTSAVE(r1)
-	cfi_offset(rWORD8_SHIFT, rWORD8SHIFTSAVE)
-/* Adjust the logical start of rSTR2 to compensate for the extra bits
-   in the 1st rSTR1 DW.  */
-	sub	rWORD8_SHIFT, rSTR2, r12
-/* But do not attempt to address the DW before that DW that contains
-   the actual start of rSTR2.  */
-	clrrdi	rSTR2, rSTR2, 3
-	std	rWORD2_SHIFT, rWORD2SHIFTSAVE(r1)
-/* Compute the left/right shift counts for the unaligned rSTR2,
-   compensating for the logical (DW aligned) start of rSTR1.  */
-	clrldi	rSHL, rWORD8_SHIFT, 61
-	clrrdi	rSTR1, rSTR1, 3
-	std	rWORD4_SHIFT, rWORD4SHIFTSAVE(r1)
-	sldi	rSHL, rSHL, 3
-	cmpld	cr5, rWORD8_SHIFT, rSTR2
-	add	rN, rN, r12
-	sldi	rWORD6, r12, 3
-	std	rWORD6_SHIFT, rWORD6SHIFTSAVE(r1)
-	cfi_offset(rWORD2_SHIFT, rWORD2SHIFTSAVE)
-	cfi_offset(rWORD4_SHIFT, rWORD4SHIFTSAVE)
-	cfi_offset(rWORD6_SHIFT, rWORD6SHIFTSAVE)
-	subfic	rSHR, rSHL, 64
-	srdi	r0, rN, 5	/* Divide by 32.  */
-	andi.	r12, rN, 24	/* Get the DW remainder.  */
-/* We normally need to load 2 DWs to start the unaligned rSTR2, but in
-   this special case those bits may be discarded anyway.  Also we
-   must avoid loading a DW where none of the bits are part of rSTR2 as
-   this may cross a page boundary and cause a page fault.  */
-	li	rWORD8, 0
-	blt	cr5, L(dus0)
-	LD	rWORD8, 0, rSTR2
-	addi	rSTR2, rSTR2, 8
-	sld	rWORD8, rWORD8, rSHL
-
-L(dus0):
-	LD	rWORD1, 0, rSTR1
-	LD	rWORD2, 0, rSTR2
-	cmpldi	cr1, r12, 16
-	cmpldi	cr7, rN, 32
-	srd	r12, rWORD2, rSHR
-	clrldi	rN, rN, 61
-	beq	L(duPs4)
-	mtctr	r0
-	or	rWORD8, r12, rWORD8
-	bgt	cr1, L(duPs3)
-	beq	cr1, L(duPs2)
-
-/* Remainder is 8.  */
-	.align	4
-L(dusP1):
-	sld	rWORD8_SHIFT, rWORD2, rSHL
-	sld	rWORD7, rWORD1, rWORD6
-	sld	rWORD8, rWORD8, rWORD6
-	bge	cr7, L(duP1e)
-/* At this point we exit early with the first double word compare
-   complete and remainder of 0 to 7 bytes.  See L(du14) for details on
-   how we handle the remaining bytes.  */
-	cmpld	cr5, rWORD7, rWORD8
-	sldi.	rN, rN, 3
-	bne	cr5, L(duLcr5)
-	cmpld	cr7, rN, rSHR
-	beq	L(duZeroReturn)
-	li	r0, 0
-	ble	cr7, L(dutrim)
-	LD	rWORD2, rOFF8, rSTR2
-	srd	r0, rWORD2, rSHR
-	b	L(dutrim)
-/* Remainder is 16.  */
-	.align	4
-L(duPs2):
-	sld	rWORD6_SHIFT, rWORD2, rSHL
-	sld	rWORD5, rWORD1, rWORD6
-	sld	rWORD6, rWORD8, rWORD6
-	b	L(duP2e)
-/* Remainder is 24.  */
-	.align	4
-L(duPs3):
-	sld	rWORD4_SHIFT, rWORD2, rSHL
-	sld	rWORD3, rWORD1, rWORD6
-	sld	rWORD4, rWORD8, rWORD6
-	b	L(duP3e)
-/* Count is a multiple of 32, remainder is 0.  */
-	.align	4
-L(duPs4):
-	mtctr	r0
-	or	rWORD8, r12, rWORD8
-	sld	rWORD2_SHIFT, rWORD2, rSHL
-	sld	rWORD1, rWORD1, rWORD6
-	sld	rWORD2, rWORD8, rWORD6
-	b	L(duP4e)
-
-/* At this point we know rSTR1 is double word aligned and the
-   compare length is at least 8 bytes.  */
-	.align	4
-L(DWunaligned):
-	std	rWORD8_SHIFT, rWORD8SHIFTSAVE(r1)
-	clrrdi	rSTR2, rSTR2, 3
-	std	rWORD2_SHIFT, rWORD2SHIFTSAVE(r1)
-	srdi	r0, rN, 5	/* Divide by 32.  */
-	std	rWORD4_SHIFT, rWORD4SHIFTSAVE(r1)
-	andi.	r12, rN, 24	/* Get the DW remainder.  */
-	std	rWORD6_SHIFT, rWORD6SHIFTSAVE(r1)
-	cfi_offset(rWORD8_SHIFT, rWORD8SHIFTSAVE)
-	cfi_offset(rWORD2_SHIFT, rWORD2SHIFTSAVE)
-	cfi_offset(rWORD4_SHIFT, rWORD4SHIFTSAVE)
-	cfi_offset(rWORD6_SHIFT, rWORD6SHIFTSAVE)
-	sldi	rSHL, rSHL, 3
-	LD	rWORD6, 0, rSTR2
-	LD	rWORD8, rOFF8, rSTR2
-	addi	rSTR2, rSTR2, 8
-	cmpldi	cr1, r12, 16
-	cmpldi	cr7, rN, 32
-	clrldi	rN, rN, 61
-	subfic	rSHR, rSHL, 64
-	sld	rWORD6_SHIFT, rWORD6, rSHL
-	beq	L(duP4)
-	mtctr	r0
-	bgt	cr1, L(duP3)
-	beq	cr1, L(duP2)
-
-/* Remainder is 8.  */
-	.align	4
-L(duP1):
-	srd	r12, rWORD8, rSHR
-	LD	rWORD7, 0, rSTR1
-	sld	rWORD8_SHIFT, rWORD8, rSHL
-	or	rWORD8, r12, rWORD6_SHIFT
-	blt	cr7, L(duP1x)
-L(duP1e):
-	LD	rWORD1, rOFF8, rSTR1
-	LD	rWORD2, rOFF8, rSTR2
-	cmpld	cr5, rWORD7, rWORD8
-	srd	r0, rWORD2, rSHR
-	sld	rWORD2_SHIFT, rWORD2, rSHL
-	or	rWORD2, r0, rWORD8_SHIFT
-	LD	rWORD3, rOFF16, rSTR1
-	LD	rWORD4, rOFF16, rSTR2
-	cmpld	cr7, rWORD1, rWORD2
-	srd	r12, rWORD4, rSHR
-	sld	rWORD4_SHIFT, rWORD4, rSHL
-	bne	cr5, L(duLcr5)
-	or	rWORD4, r12, rWORD2_SHIFT
-	LD	rWORD5, rOFF24, rSTR1
-	LD	rWORD6, rOFF24, rSTR2
-	cmpld	cr1, rWORD3, rWORD4
-	srd	r0, rWORD6, rSHR
-	sld	rWORD6_SHIFT, rWORD6, rSHL
-	bne	cr7, L(duLcr7)
-	or	rWORD6, r0, rWORD4_SHIFT
-	cmpld	cr6, rWORD5, rWORD6
-	b	L(duLoop3)
-	.align	4
-/* At this point we exit early with the first double word compare
-   complete and remainder of 0 to 7 bytes.  See L(du14) for details on
-   how we handle the remaining bytes.  */
-L(duP1x):
-	cmpld	cr5, rWORD7, rWORD8
-	sldi.	rN, rN, 3
-	bne	cr5, L(duLcr5)
-	cmpld	cr7, rN, rSHR
-	beq	L(duZeroReturn)
-	li	r0, 0
-	ble	cr7, L(dutrim)
-	LD	rWORD2, rOFF8, rSTR2
-	srd	r0, rWORD2, rSHR
-	b	L(dutrim)
-/* Remainder is 16.  */
-	.align	4
-L(duP2):
-	srd	r0, rWORD8, rSHR
-	LD	rWORD5, 0, rSTR1
-	or	rWORD6, r0, rWORD6_SHIFT
-	sld	rWORD6_SHIFT, rWORD8, rSHL
-L(duP2e):
-	LD	rWORD7, rOFF8, rSTR1
-	LD	rWORD8, rOFF8, rSTR2
-	cmpld	cr6, rWORD5, rWORD6
-	srd	r12, rWORD8, rSHR
-	sld	rWORD8_SHIFT, rWORD8, rSHL
-	or	rWORD8, r12, rWORD6_SHIFT
-	blt	cr7, L(duP2x)
-	LD	rWORD1, rOFF16, rSTR1
-	LD	rWORD2, rOFF16, rSTR2
-	cmpld	cr5, rWORD7, rWORD8
-	bne	cr6, L(duLcr6)
-	srd	r0, rWORD2, rSHR
-	sld	rWORD2_SHIFT, rWORD2, rSHL
-	or	rWORD2, r0, rWORD8_SHIFT
-	LD	rWORD3, rOFF24, rSTR1
-	LD	rWORD4, rOFF24, rSTR2
-	cmpld	cr7, rWORD1, rWORD2
-	bne	cr5, L(duLcr5)
-	srd	r12, rWORD4, rSHR
-	sld	rWORD4_SHIFT, rWORD4, rSHL
-	or	rWORD4, r12, rWORD2_SHIFT
-	addi	rSTR1, rSTR1, 8
-	addi	rSTR2, rSTR2, 8
-	cmpld	cr1, rWORD3, rWORD4
-	b	L(duLoop2)
-	.align	4
-L(duP2x):
-	cmpld	cr5, rWORD7, rWORD8
-	addi	rSTR1, rSTR1, 8
-	addi	rSTR2, rSTR2, 8
-	bne	cr6, L(duLcr6)
-	sldi.	rN, rN, 3
-	bne	cr5, L(duLcr5)
-	cmpld	cr7, rN, rSHR
-	beq	L(duZeroReturn)
-	li	r0, 0
-	ble	cr7, L(dutrim)
-	LD	rWORD2, rOFF8, rSTR2
-	srd	r0, rWORD2, rSHR
-	b	L(dutrim)
-
-/* Remainder is 24.  */
-	.align	4
-L(duP3):
-	srd	r12, rWORD8, rSHR
-	LD	rWORD3, 0, rSTR1
-	sld	rWORD4_SHIFT, rWORD8, rSHL
-	or	rWORD4, r12, rWORD6_SHIFT
-L(duP3e):
-	LD	rWORD5, rOFF8, rSTR1
-	LD	rWORD6, rOFF8, rSTR2
-	cmpld	cr1, rWORD3, rWORD4
-	srd	r0, rWORD6, rSHR
-	sld	rWORD6_SHIFT, rWORD6, rSHL
-	or	rWORD6, r0, rWORD4_SHIFT
-	LD	rWORD7, rOFF16, rSTR1
-	LD	rWORD8, rOFF16, rSTR2
-	cmpld	cr6, rWORD5, rWORD6
-	bne	cr1, L(duLcr1)
-	srd	r12, rWORD8, rSHR
-	sld	rWORD8_SHIFT, rWORD8, rSHL
-	or	rWORD8, r12, rWORD6_SHIFT
-	blt	cr7, L(duP3x)
-	LD	rWORD1, rOFF24, rSTR1
-	LD	rWORD2, rOFF24, rSTR2
-	cmpld	cr5, rWORD7, rWORD8
-	bne	cr6, L(duLcr6)
-	srd	r0, rWORD2, rSHR
-	sld	rWORD2_SHIFT, rWORD2, rSHL
-	or	rWORD2, r0, rWORD8_SHIFT
-	addi	rSTR1, rSTR1, 16
-	addi	rSTR2, rSTR2, 16
-	cmpld	cr7, rWORD1, rWORD2
-	b	L(duLoop1)
-	.align	4
-L(duP3x):
-	addi	rSTR1, rSTR1, 16
-	addi	rSTR2, rSTR2, 16
-	cmpld	cr5, rWORD7, rWORD8
-	bne	cr6, L(duLcr6)
-	sldi.	rN, rN, 3
-	bne	cr5, L(duLcr5)
-	cmpld	cr7, rN, rSHR
-	beq	L(duZeroReturn)
-	li	r0, 0
-	ble	cr7, L(dutrim)
-	LD	rWORD2, rOFF8, rSTR2
-	srd	r0, rWORD2, rSHR
-	b	L(dutrim)
-
-/* Count is a multiple of 32, remainder is 0.  */
-	.align	4
-L(duP4):
-	mtctr	r0
-	srd	r0, rWORD8, rSHR
-	LD	rWORD1, 0, rSTR1
-	sld	rWORD2_SHIFT, rWORD8, rSHL
-	or	rWORD2, r0, rWORD6_SHIFT
-L(duP4e):
-	LD	rWORD3, rOFF8, rSTR1
-	LD	rWORD4, rOFF8, rSTR2
-	cmpld	cr7, rWORD1, rWORD2
-	srd	r12, rWORD4, rSHR
-	sld	rWORD4_SHIFT, rWORD4, rSHL
-	or	rWORD4, r12, rWORD2_SHIFT
-	LD	rWORD5, rOFF16, rSTR1
-	LD	rWORD6, rOFF16, rSTR2
-	cmpld	cr1, rWORD3, rWORD4
-	bne	cr7, L(duLcr7)
-	srd	r0, rWORD6, rSHR
-	sld	rWORD6_SHIFT, rWORD6, rSHL
-	or	rWORD6, r0, rWORD4_SHIFT
-	LD	rWORD7, rOFF24, rSTR1
-	LD	rWORD8, rOFF24, rSTR2
-	addi	rSTR1, rSTR1, 24
-	addi	rSTR2, rSTR2, 24
-	cmpld	cr6, rWORD5, rWORD6
-	bne	cr1, L(duLcr1)
-	srd	r12, rWORD8, rSHR
-	sld	rWORD8_SHIFT, rWORD8, rSHL
-	or	rWORD8, r12, rWORD6_SHIFT
-	cmpld	cr5, rWORD7, rWORD8
-	bdz	L(du24)		/* Adjust CTR as we start with +4.  */
-/* This is the primary loop.  */
-	.align	4
-L(duLoop):
-	LD	rWORD1, rOFF8, rSTR1
-	LD	rWORD2, rOFF8, rSTR2
-	cmpld	cr1, rWORD3, rWORD4
-	bne	cr6, L(duLcr6)
-	srd	r0, rWORD2, rSHR
-	sld	rWORD2_SHIFT, rWORD2, rSHL
-	or	rWORD2, r0, rWORD8_SHIFT
-L(duLoop1):
-	LD	rWORD3, rOFF16, rSTR1
-	LD	rWORD4, rOFF16, rSTR2
-	cmpld	cr6, rWORD5, rWORD6
-	bne	cr5, L(duLcr5)
-	srd	r12, rWORD4, rSHR
-	sld	rWORD4_SHIFT, rWORD4, rSHL
-	or	rWORD4, r12, rWORD2_SHIFT
-L(duLoop2):
-	LD	rWORD5, rOFF24, rSTR1
-	LD	rWORD6, rOFF24, rSTR2
-	cmpld	cr5, rWORD7, rWORD8
-	bne	cr7, L(duLcr7)
-	srd	r0, rWORD6, rSHR
-	sld	rWORD6_SHIFT, rWORD6, rSHL
-	or	rWORD6, r0, rWORD4_SHIFT
-L(duLoop3):
-	LD	rWORD7, rOFF32, rSTR1
-	LD	rWORD8, rOFF32, rSTR2
-	addi	rSTR1, rSTR1, 32
-	addi	rSTR2, rSTR2, 32
-	cmpld	cr7, rWORD1, rWORD2
-	bne	cr1, L(duLcr1)
-	srd	r12, rWORD8, rSHR
-	sld	rWORD8_SHIFT, rWORD8, rSHL
-	or	rWORD8, r12, rWORD6_SHIFT
-	bdnz	L(duLoop)
-
-L(duL4):
-	cmpld	cr1, rWORD3, rWORD4
-	bne	cr6, L(duLcr6)
-	cmpld	cr6, rWORD5, rWORD6
-	bne	cr5, L(duLcr5)
-	cmpld	cr5, rWORD7, rWORD8
-L(du44):
-	bne	cr7, L(duLcr7)
-L(du34):
-	bne	cr1, L(duLcr1)
-L(du24):
-	bne	cr6, L(duLcr6)
-L(du14):
-	sldi.	rN, rN, 3
-	bne	cr5, L(duLcr5)
-/* At this point we have a remainder of 1 to 7 bytes to compare.  We use
-   shift right double to eliminate bits beyond the compare length.
-
-   However it may not be safe to load rWORD2 which may be beyond the
-   string length.  So we compare the bit length of the remainder to
-   the right shift count (rSHR).  If the bit count is less than or equal
-   we do not need to load rWORD2 (all significant bits are already in
-   rWORD8_SHIFT).  */
-	cmpld	cr7, rN, rSHR
-	beq	L(duZeroReturn)
-	li	r0, 0
-	ble	cr7, L(dutrim)
-	LD	rWORD2, rOFF8, rSTR2
-	srd	r0, rWORD2, rSHR
-	.align	4
-L(dutrim):
-	LD	rWORD1, rOFF8, rSTR1
-	ld	rWORD8, -8(r1)
-	subfic	rN, rN, 64	/* Shift count is 64 - (rN * 8).  */
-	or	rWORD2, r0, rWORD8_SHIFT
-	ld	rWORD7, rWORD7SAVE(r1)
-	ld	rSHL, rSHLSAVE(r1)
-	srd	rWORD1, rWORD1, rN
-	srd	rWORD2, rWORD2, rN
-	ld	rSHR, rSHRSAVE(r1)
-	ld	rWORD8_SHIFT, rWORD8SHIFTSAVE(r1)
-	li	rRTN, 0
-	cmpld	cr7, rWORD1, rWORD2
-	ld	rWORD2_SHIFT, rWORD2SHIFTSAVE(r1)
-	ld	rWORD4_SHIFT, rWORD4SHIFTSAVE(r1)
-	beq	cr7, L(dureturn24)
-	li	rRTN, 1
-	ld	rWORD6_SHIFT, rWORD6SHIFTSAVE(r1)
-	ld	rOFF8,  rOFF8SAVE(r1)
-	ld	rOFF16, rOFF16SAVE(r1)
-	ld	rOFF24, rOFF24SAVE(r1)
-	ld	rOFF32, rOFF32SAVE(r1)
-	bgtlr	cr7
-	li	rRTN, -1
-	blr
-	.align	4
-L(duLcr7):
-	ld	rWORD8, rWORD8SAVE(r1)
-	ld	rWORD7, rWORD7SAVE(r1)
-	li	rRTN, 1
-	bgt	cr7, L(dureturn29)
-	ld	rSHL, rSHLSAVE(r1)
-	ld	rSHR, rSHRSAVE(r1)
-	li	rRTN, -1
-	b	L(dureturn27)
-	.align	4
-L(duLcr1):
-	ld	rWORD8, rWORD8SAVE(r1)
-	ld	rWORD7, rWORD7SAVE(r1)
-	li	rRTN, 1
-	bgt	cr1, L(dureturn29)
-	ld	rSHL, rSHLSAVE(r1)
-	ld	rSHR, rSHRSAVE(r1)
-	li	rRTN, -1
-	b	L(dureturn27)
-	.align	4
-L(duLcr6):
-	ld	rWORD8, rWORD8SAVE(r1)
-	ld	rWORD7, rWORD7SAVE(r1)
-	li	rRTN, 1
-	bgt	cr6, L(dureturn29)
-	ld	rSHL, rSHLSAVE(r1)
-	ld	rSHR, rSHRSAVE(r1)
-	li	rRTN, -1
-	b	L(dureturn27)
-	.align	4
-L(duLcr5):
-	ld	rWORD8, rWORD8SAVE(r1)
-	ld	rWORD7, rWORD7SAVE(r1)
-	li	rRTN, 1
-	bgt	cr5, L(dureturn29)
-	ld	rSHL, rSHLSAVE(r1)
-	ld	rSHR, rSHRSAVE(r1)
-	li	rRTN, -1
-	b	L(dureturn27)
-
-	.align	3
-L(duZeroReturn):
-	li	rRTN, 0
-	.align	4
-L(dureturn):
-	ld	rWORD8, rWORD8SAVE(r1)
-	ld	rWORD7, rWORD7SAVE(r1)
-L(dureturn29):
-	ld	rSHL, rSHLSAVE(r1)
-	ld	rSHR, rSHRSAVE(r1)
-L(dureturn27):
-	ld	rWORD8_SHIFT, rWORD8SHIFTSAVE(r1)
-	ld	rWORD2_SHIFT, rWORD2SHIFTSAVE(r1)
-	ld	rWORD4_SHIFT, rWORD4SHIFTSAVE(r1)
-L(dureturn24):
-	ld	rWORD6_SHIFT, rWORD6SHIFTSAVE(r1)
-	ld	rOFF8,  rOFF8SAVE(r1)
-	ld	rOFF16, rOFF16SAVE(r1)
-	ld	rOFF24, rOFF24SAVE(r1)
-	ld	rOFF32, rOFF32SAVE(r1)
-	blr
-
-L(duzeroLength):
-	ld	rOFF8,  rOFF8SAVE(r1)
-	ld	rOFF16, rOFF16SAVE(r1)
-	ld	rOFF24, rOFF24SAVE(r1)
-	ld	rOFF32, rOFF32SAVE(r1)
-	li	rRTN, 0
-	blr
-
-END (MEMCMP)
-libc_hidden_builtin_def (memcmp)
-weak_alias (memcmp, bcmp)
diff --git a/sysdeps/powerpc/powerpc64/power8/memset.S b/sysdeps/powerpc/powerpc64/power8/memset.S
deleted file mode 100644
index bc734c9f4f..0000000000
--- a/sysdeps/powerpc/powerpc64/power8/memset.S
+++ /dev/null
@@ -1,458 +0,0 @@
-/* Optimized memset implementation for PowerPC64/POWER8.
-   Copyright (C) 2014-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-
-#define MTVSRD_V1_R4  .long 0x7c240166     /* mtvsrd  v1,r4  */
-
-/* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5]));
-   Returns 's'.  */
-
-#ifndef MEMSET
-# define MEMSET memset
-#endif
-
-	/* No need to use .machine power8 since mtvsrd is already
-	   handled by the define.  It avoid breakage on binutils
-	   that does not support this machine specifier.  */
-	.machine power7
-EALIGN (MEMSET, 5, 0)
-	CALL_MCOUNT 3
-
-L(_memset):
-	cmpldi	cr7,r5,31
-	neg	r0,r3
-	mr	r10,r3
-
-	insrdi	r4,r4,8,48
-	insrdi	r4,r4,16,32	/* Replicate byte to word.  */
-	ble	cr7,L(write_LT_32)
-
-	andi.	r11,r10,15	/* Check alignment of DST.  */
-	insrdi	r4,r4,32,0	/* Replicate word to double word.  */
-
-	beq	L(big_aligned)
-
-	mtocrf	0x01,r0
-	clrldi	r0,r0,60
-
-	/* Get DST aligned to 16 bytes.  */
-1:	bf	31,2f
-	stb	r4,0(r10)
-	addi	r10,r10,1
-
-2:	bf	30,4f
-	sth	r4,0(r10)
-	addi	r10,r10,2
-
-4:	bf	29,8f
-	stw	r4,0(r10)
-	addi	r10,r10,4
-
-8:	bf      28,16f
-	std     r4,0(r10)
-	addi    r10,r10,8
-
-16:	subf	r5,r0,r5
-
-	.align	4
-L(big_aligned):
-	/* For sizes larger than 255 two possible paths:
-	   - if constant is '0', zero full cache lines with dcbz
-	   - otherwise uses vector instructions.  */
-	cmpldi	cr5,r5,255
-	dcbtst	0,r10
-	cmpldi	cr6,r4,0
-	crand	27,26,21
-	bt	27,L(huge_dcbz)
-	bge	cr5,L(huge_vector)
-
-
-	/* Size between 32 and 255 bytes with constant different than 0, use
-	   doubleword store instruction to achieve best throughput.  */
-	srdi    r8,r5,5
-	clrldi  r11,r5,59
-	cmpldi  cr6,r11,0
-	cmpdi	r8,0
-	beq     L(tail_bytes)
-	mtctr   r8
-
-	/* Main aligned write loop, writes 32-bytes at a time.  */
-	.align  4
-L(big_loop):
-	std     r4,0(r10)
-	std     r4,8(r10)
-	std     r4,16(r10)
-	std     r4,24(r10)
-	addi    r10,r10,32
-	bdz     L(tail_bytes)
-
-	std     r4,0(r10)
-	std     r4,8(r10)
-	std     r4,16(r10)
-	std     r4,24(r10)
-	addi    r10,10,32
-	bdnz    L(big_loop)
-
-	b       L(tail_bytes)
-
-	/* Write remaining 1~31 bytes.  */
-	.align  4
-L(tail_bytes):
-	beqlr   cr6
-
-	srdi    r7,r11,4
-	clrldi  r8,r11,60
-	mtocrf  0x01,r7
-
-	.align	4
-	bf	31,8f
-	std	r4,0(r10)
-	std	r4,8(r10)
-	addi	r10,r10,16
-
-	.align	4
-8:	mtocrf	0x1,r8
-	bf	28,4f
-	std	r4,0(r10)
-	addi	r10,r10,8
-
-	.align	4
-4:	bf      29,2f
-	stw     4,0(10)
-	addi    10,10,4
-
-	.align 	4
-2:	bf      30,1f
-	sth     4,0(10)
-	addi    10,10,2
-
-	.align  4
-1:      bflr    31
-	stb     4,0(10)
-	blr
-
-	/* Size larger than 255 bytes with constant different than 0, use
-	   vector instruction to achieve best throughput.  */
-L(huge_vector):
-	/* Replicate set byte to quadword in VMX register.  */
-	MTVSRD_V1_R4
-	xxpermdi 32,v0,v1,0
-	vspltb	 v2,v0,15
-
-	/* Main aligned write loop: 128 bytes at a time.  */
-	li	r6,16
-	li	r7,32
-	li	r8,48
-	mtocrf	0x02,r5
-	srdi	r12,r5,7
-	cmpdi	r12,0
-	beq	L(aligned_tail)
-	mtctr	r12
-	b	L(aligned_128loop)
-
-	.align  4
-L(aligned_128loop):
-	stvx	v2,0,r10
-	stvx	v2,r10,r6
-	stvx	v2,r10,r7
-	stvx	v2,r10,r8
-	addi	r10,r10,64
-	stvx	v2,0,r10
-	stvx	v2,r10,r6
-	stvx	v2,r10,r7
-	stvx	v2,r10,r8
-	addi	r10,r10,64
-	bdnz	L(aligned_128loop)
-
-	/* Write remaining 1~127 bytes.  */
-L(aligned_tail):
-	mtocrf	0x01,r5
-	bf	25,32f
-	stvx	v2,0,r10
-	stvx	v2,r10,r6
-	stvx	v2,r10,r7
-	stvx	v2,r10,r8
-	addi	r10,r10,64
-
-32:	bf	26,16f
-	stvx	v2,0,r10
-	stvx	v2,r10,r6
-	addi	r10,r10,32
-
-16:	bf	27,8f
-	stvx	v2,0,r10
-	addi	r10,r10,16
-
-8:	bf	28,4f
-	std     r4,0(r10)
-	addi	r10,r10,8
-
-	/* Copies 4~7 bytes.  */
-4:	bf	29,L(tail2)
-	stw     r4,0(r10)
-	bf      30,L(tail5)
-	sth     r4,4(r10)
-	bflr	31
-	stb     r4,6(r10)
-	/* Return original DST pointer.  */
-	blr
-
-	/* Special case when value is 0 and we have a long length to deal
-	   with.  Use dcbz to zero out a full cacheline of 128 bytes at a time.
-	   Before using dcbz though, we need to get the destination 128-byte
-	   aligned.  */
-	.align	4
-L(huge_dcbz):
-	andi.	r11,r10,127
-	neg	r0,r10
-	beq	L(huge_dcbz_aligned)
-
-	clrldi	r0,r0,57
-	subf	r5,r0,r5
-	srdi	r0,r0,3
-	mtocrf	0x01,r0
-
-	/* Write 1~128 bytes until DST is aligned to 128 bytes.  */
-8:	bf	28,4f
-
-	std	r4,0(r10)
-	std	r4,8(r10)
-	std	r4,16(r10)
-	std	r4,24(r10)
-	std	r4,32(r10)
-	std	r4,40(r10)
-	std	r4,48(r10)
-	std	r4,56(r10)
-	addi	r10,r10,64
-
-	.align	4
-4:	bf	29,2f
-	std	r4,0(r10)
-	std	r4,8(r10)
-	std	r4,16(r10)
-	std	r4,24(r10)
-	addi	r10,r10,32
-
-	.align	4
-2:	bf	30,1f
-	std	r4,0(r10)
-	std	r4,8(r10)
-	addi	r10,r10,16
-
-	.align	4
-1:	bf	31,L(huge_dcbz_aligned)
-	std	r4,0(r10)
-	addi	r10,r10,8
-
-L(huge_dcbz_aligned):
-	/* Setup dcbz unroll offsets and count numbers.  */
-	srdi	r8,r5,9
-	clrldi	r11,r5,55
-	cmpldi	cr6,r11,0
-	li	r9,128
-	cmpdi	r8,0
-	beq     L(huge_tail)
-	li	r7,256
-	li	r6,384
-	mtctr	r8
-
-	.align	4
-L(huge_loop):
-	/* Sets 512 bytes to zero in each iteration, the loop unrolling shows
-	   a throughput boost for large sizes (2048 bytes or higher).  */
-	dcbz	0,r10
-	dcbz	r9,r10
-	dcbz	r7,r10
-	dcbz	r6,r10
-	addi	r10,r10,512
-	bdnz	L(huge_loop)
-
-	beqlr	cr6
-
-L(huge_tail):
-	srdi    r6,r11,8
-	srdi    r7,r11,4
-	clrldi  r8,r11,4
-	cmpldi  cr6,r8,0
-	mtocrf  0x01,r6
-
-	beq	cr6,L(tail)
-
-	/* We have 1~511 bytes remaining.  */
-	.align	4
-32:	bf	31,16f
-	dcbz	0,r10
-	dcbz	r9,r10
-	addi	r10,r10,256
-
-	.align	4
-16:	mtocrf  0x01,r7
-	bf	28,8f
-	dcbz	0,r10
-	addi	r10,r10,128
-
-	.align 	4
-8:	bf	29,4f
-	std	r4,0(r10)
-	std	r4,8(r10)
-	std	r4,16(r10)
-	std	r4,24(r10)
-	std	r4,32(r10)
-	std	r4,40(r10)
-	std	r4,48(r10)
-	std	r4,56(r10)
-	addi	r10,r10,64
-
-	.align	4
-4:	bf	30,2f
-	std	r4,0(r10)
-	std	r4,8(r10)
-	std	r4,16(r10)
-	std	r4,24(r10)
-	addi	r10,r10,32
-
-	.align	4
-2:	bf	31,L(tail)
-	std	r4,0(r10)
-	std	r4,8(r10)
-	addi	r10,r10,16
-	.align	4
-
-	/* Remaining 1~15 bytes.  */
-L(tail):
-	mtocrf  0x01,r8
-
-	.align
-8:	bf	28,4f
-	std	r4,0(r10)
-	addi	r10,r10,8
-
-	.align	4
-4:	bf	29,2f
-	stw	r4,0(r10)
-	addi	r10,r10,4
-
-	.align	4
-2:	bf	30,1f
-	sth	r4,0(r10)
-	addi	r10,r10,2
-
-	.align	4
-1:	bflr	31
-	stb	r4,0(r10)
-	blr
-
-	/* Handle short copies of 0~31 bytes.  Best throughput is achieved
-	   by just unrolling all operations.  */
-	.align	4
-L(write_LT_32):
-	cmpldi	cr6,5,8
-	mtocrf	0x01,r5
-	ble	cr6,L(write_LE_8)
-
-	/* At least 9 bytes to go.  */
-	neg	r8,r4
-	andi.	r0,r8,3
-	cmpldi	cr1,r5,16
-	beq	L(write_LT_32_aligned)
-
-	/* Force 4-byte alignment for SRC.  */
-	mtocrf	0x01,r0
-	subf	r5,r0,r5
-
-2:	bf	30,1f
-	sth	r4,0(r10)
-	addi	r10,r10,2
-
-1:	bf	31,L(end_4bytes_alignment)
-	stb	r4,0(r10)
-	addi	r10,r10,1
-
-	.align	4
-L(end_4bytes_alignment):
-	cmpldi	cr1,r5,16
-	mtocrf	0x01,r5
-
-L(write_LT_32_aligned):
-	blt	cr1,8f
-
-	stw	r4,0(r10)
-	stw	r4,4(r10)
-	stw	r4,8(r10)
-	stw	r4,12(r10)
-	addi	r10,r10,16
-
-8:	bf	28,L(tail4)
-	stw	r4,0(r10)
-	stw	r4,4(r10)
-	addi	r10,r10,8
-
-	.align	4
-	/* Copies 4~7 bytes.  */
-L(tail4):
-	bf	29,L(tail2)
-	stw	r4,0(r10)
-	bf	30,L(tail5)
-	sth	r4,4(r10)
-	bflr	31
-	stb	r4,6(r10)
-	blr
-
-	.align	4
-	/* Copies 2~3 bytes.  */
-L(tail2):
-	bf	30,1f
-	sth	r4,0(r10)
-	bflr	31
-	stb	r4,2(r10)
-	blr
-
-	.align	4
-L(tail5):
-	bflr	31
-	stb	r4,4(r10)
-	blr
-
-	.align	4
-1: 	bflr	31
-	stb	r4,0(r10)
-	blr
-
-	/* Handles copies of 0~8 bytes.  */
-	.align	4
-L(write_LE_8):
-	bne	cr6,L(tail4)
-
-	stw	r4,0(r10)
-	stw	r4,4(r10)
-	blr
-END_GEN_TB (MEMSET,TB_TOCLESS)
-libc_hidden_builtin_def (memset)
-
-/* Copied from bzero.S to prevent the linker from inserting a stub
-   between bzero and memset.  */
-ENTRY (__bzero)
-	CALL_MCOUNT 3
-	mr	r5,r4
-	li	r4,0
-	b	L(_memset)
-END (__bzero)
-#ifndef __bzero
-weak_alias (__bzero, bzero)
-#endif
diff --git a/sysdeps/powerpc/powerpc64/power8/multiarch/Implies b/sysdeps/powerpc/powerpc64/power8/multiarch/Implies
deleted file mode 100644
index 1fc7b7cd39..0000000000
--- a/sysdeps/powerpc/powerpc64/power8/multiarch/Implies
+++ /dev/null
@@ -1 +0,0 @@
-powerpc/powerpc64/power7/multiarch
diff --git a/sysdeps/powerpc/powerpc64/power8/stpcpy.S b/sysdeps/powerpc/powerpc64/power8/stpcpy.S
deleted file mode 100644
index 955e738cee..0000000000
--- a/sysdeps/powerpc/powerpc64/power8/stpcpy.S
+++ /dev/null
@@ -1,24 +0,0 @@
-/* Optimized stpcpy implementation for PowerPC64/POWER8.
-   Copyright (C) 2015-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#define USE_AS_STPCPY
-#include <sysdeps/powerpc/powerpc64/power8/strcpy.S>
-
-weak_alias (__stpcpy, stpcpy)
-libc_hidden_def (__stpcpy)
-libc_hidden_builtin_def (stpcpy)
diff --git a/sysdeps/powerpc/powerpc64/power8/stpncpy.S b/sysdeps/powerpc/powerpc64/power8/stpncpy.S
deleted file mode 100644
index c14d984dd0..0000000000
--- a/sysdeps/powerpc/powerpc64/power8/stpncpy.S
+++ /dev/null
@@ -1,24 +0,0 @@
-/* Optimized stpncpy implementation for PowerPC64/POWER8.
-   Copyright (C) 2015-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#define USE_AS_STPNCPY
-#include <sysdeps/powerpc/powerpc64/power8/strncpy.S>
-
-weak_alias (__stpncpy, stpncpy)
-libc_hidden_def (__stpncpy)
-libc_hidden_builtin_def (stpncpy)
diff --git a/sysdeps/powerpc/powerpc64/power8/strcasecmp.S b/sysdeps/powerpc/powerpc64/power8/strcasecmp.S
deleted file mode 100644
index 88b17a6eb1..0000000000
--- a/sysdeps/powerpc/powerpc64/power8/strcasecmp.S
+++ /dev/null
@@ -1,457 +0,0 @@
-/* Optimized strcasecmp implementation for PowerPC64.
-   Copyright (C) 2016-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <locale-defines.h>
-
-/* int [r3] strcasecmp (const char *s1 [r3], const char *s2 [r4] ) */
-
-#ifndef USE_AS_STRNCASECMP
-#  define __STRCASECMP __strcasecmp
-#  define STRCASECMP   strcasecmp
-#else
-#  define __STRCASECMP __strncasecmp
-#  define STRCASECMP   strncasecmp
-#endif
-/* Convert 16 bytes to lowercase and compare */
-#define TOLOWER()     \
-	vaddubm	v8, v4, v1; \
-	vaddubm	v7, v4, v3; \
-	vcmpgtub	v8, v8, v2; \
-	vsel	v4, v7, v4, v8; \
-	vaddubm	v8, v5, v1; \
-	vaddubm	v7, v5, v3; \
-	vcmpgtub	v8, v8, v2; \
-	vsel	v5, v7, v5, v8; \
-	vcmpequb.	v7, v5, v4;
-
-/*
- * Get 16 bytes for unaligned case.
- * reg1: Vector to hold next 16 bytes.
- * reg2: Address to read from.
- * reg3: Permute control vector.
- * v8: Tmp vector used to mask unwanted bytes.
- * v9: Tmp vector,0 when null is found on first 16 bytes
- */
-#ifdef __LITTLE_ENDIAN__
-#define GET16BYTES(reg1, reg2, reg3) \
-	lvx	reg1, 0, reg2; \
-	vspltisb	v8, -1; \
-	vperm	v8, v8, reg1, reg3; \
-	vcmpequb.	v8, v0, v8; \
-	beq	cr6, 1f; \
-	vspltisb	v9, 0; \
-	b	2f; \
-	.align 4; \
-1: \
-	addi	r6, reg2, 16; \
-	lvx	v9, 0, r6; \
-2: \
-	vperm	reg1, v9, reg1, reg3;
-#else
-#define GET16BYTES(reg1, reg2, reg3) \
-	lvx	reg1, 0, reg2; \
-	vspltisb	 v8, -1; \
-	vperm	v8, reg1, v8,  reg3; \
-	vcmpequb.	v8, v0, v8; \
-	beq	cr6, 1f; \
-	vspltisb	v9, 0; \
-	b	2f; \
-	.align 4; \
-1: \
-	addi	r6, reg2, 16; \
-	lvx	v9, 0, r6; \
-2: \
-	vperm	reg1, reg1, v9, reg3;
-#endif
-
-/* Check null in v4, v5 and convert to lower.  */
-#define CHECKNULLANDCONVERT() \
-	vcmpequb.	v7, v0, v5; \
-	beq	cr6, 3f; \
-	vcmpequb.	v7, v0, v4; \
-	beq	cr6, 3f; \
-	b	L(null_found); \
-	.align  4; \
-3: \
-	TOLOWER()
-
-#ifdef _ARCH_PWR8
-#  define VCLZD_V8_v7	vclzd	v8, v7;
-#  define MFVRD_R3_V1	mfvrd	r3, v1;
-#  define VSUBUDM_V9_V8	vsubudm	v9, v9, v8;
-#  define VPOPCNTD_V8_V8	vpopcntd v8, v8;
-#  define VADDUQM_V7_V8	vadduqm	v9, v7, v8;
-#else
-#  define VCLZD_V8_v7	.long	0x11003fc2
-#  define MFVRD_R3_V1	.long	0x7c230067
-#  define VSUBUDM_V9_V8	.long	0x112944c0
-#  define VPOPCNTD_V8_V8	.long	0x110047c3
-#  define VADDUQM_V7_V8	.long	0x11274100
-#endif
-
-	.machine  power7
-
-ENTRY (__STRCASECMP)
-#ifdef USE_AS_STRNCASECMP
-	CALL_MCOUNT 3
-#else
-	CALL_MCOUNT 2
-#endif
-#define rRTN	r3	/* Return value */
-#define rSTR1	r10	/* 1st string */
-#define rSTR2	r4	/* 2nd string */
-#define rCHAR1	r6	/* Byte read from 1st string */
-#define rCHAR2	r7	/* Byte read from 2nd string */
-#define rADDR1	r8	/* Address of tolower(rCHAR1) */
-#define rADDR2	r12	/* Address of tolower(rCHAR2) */
-#define rLWR1	r8	/* Word tolower(rCHAR1) */
-#define rLWR2	r12	/* Word tolower(rCHAR2) */
-#define rTMP	r9
-#define rLOC	r11	/* Default locale address */
-
-	cmpd	cr7, rRTN, rSTR2
-
-	/* Get locale address.  */
-	ld 	rTMP, __libc_tsd_LOCALE@got@tprel(r2)
-	add 	rLOC, rTMP, __libc_tsd_LOCALE@tls
-	ld	rLOC, 0(rLOC)
-
-	mr	rSTR1, rRTN
-	li	rRTN, 0
-	beqlr	cr7
-#ifdef USE_AS_STRNCASECMP
-	cmpdi	cr7, r5, 0
-	beq	cr7, L(retnull)
-	cmpdi	cr7, r5, 16
-	blt	cr7, L(bytebybyte)
-#endif
-	vspltisb	v0, 0
-	vspltisb	v8, -1
-	/* Check for null in initial characters.
-	   Check max of 16 char depending on the alignment.
-	   If null is present, proceed byte by byte.  */
-	lvx	v4, 0, rSTR1
-#ifdef  __LITTLE_ENDIAN__
-	lvsr	v10, 0, rSTR1	/* Compute mask.  */
-	vperm	v9, v8, v4, v10	/* Mask bits that are not part of string.  */
-#else
-	lvsl	v10, 0, rSTR1
-	vperm	v9, v4, v8, v10
-#endif
-	vcmpequb.	v9, v0, v9	/* Check for null bytes.  */
-	bne	cr6, L(bytebybyte)
-	lvx	v5, 0, rSTR2
-	/* Calculate alignment.  */
-#ifdef __LITTLE_ENDIAN__
-	lvsr	v6, 0, rSTR2
-	vperm	v9, v8, v5, v6	/* Mask bits that are not part of string.  */
-#else
-	lvsl	v6, 0, rSTR2
-	vperm	v9, v5, v8, v6
-#endif
-	vcmpequb.	v9, v0, v9	/* Check for null bytes.  */
-	bne	cr6, L(bytebybyte)
-	/* Check if locale has non ascii characters.  */
-	ld	rTMP, 0(rLOC)
-	addi r6, rTMP,LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES
-	lwz	rTMP, 0(r6)
-	cmpdi	cr7, rTMP, 1
-	beq	cr7, L(bytebybyte)
-
-	/* Load vector registers with values used for TOLOWER.  */
-	/* Load v1 = 0xbf, v2 = 0x19 v3 = 0x20 in each byte.  */
-	vspltisb	v3, 2
-	vspltisb	v9, 4
-	vsl	v3, v3, v9
-	vaddubm	v1, v3, v3
-	vnor	v1, v1, v1
-	vspltisb	v2, 7
-	vsububm	v2, v3, v2
-
-	andi.	rADDR1, rSTR1, 0xF
-	beq	cr0, L(align)
-	addi	r6, rSTR1, 16
-	lvx	v9, 0, r6
-	/* Compute 16 bytes from previous two loads.  */
-#ifdef __LITTLE_ENDIAN__
-	vperm	v4, v9, v4, v10
-#else
-	vperm	v4, v4, v9, v10
-#endif
-L(align):
-	andi.	rADDR2, rSTR2, 0xF
-	beq	cr0, L(align1)
-	addi	r6, rSTR2, 16
-	lvx	v9, 0, r6
-	/* Compute 16 bytes from previous two loads.  */
-#ifdef __LITTLE_ENDIAN__
-	vperm	v5, v9, v5, v6
-#else
-	vperm	v5, v5, v9, v6
-#endif
-L(align1):
-	CHECKNULLANDCONVERT()
-	blt	cr6, L(match)
-	b	L(different)
-	.align 	4
-L(match):
-	clrldi	r6, rSTR1, 60
-	subfic	r7, r6, 16
-#ifdef USE_AS_STRNCASECMP
-	sub	r5, r5, r7
-#endif
-	add	rSTR1, rSTR1, r7
-	add	rSTR2, rSTR2, r7
-	andi.	rADDR2, rSTR2, 0xF
-	addi	rSTR1, rSTR1, -16
-	addi	rSTR2, rSTR2, -16
-	beq	cr0, L(aligned)
-#ifdef __LITTLE_ENDIAN__
-	lvsr	v6, 0, rSTR2
-#else
-	lvsl	v6, 0, rSTR2
-#endif
-	/* There are 2 loops depending on the input alignment.
-	   Each loop gets 16 bytes from s1 and s2, check for null,
-	   convert to lowercase and compare. Loop till difference
-	   or null occurs. */
-L(s1_align):
-	addi	rSTR1, rSTR1, 16
-	addi	rSTR2, rSTR2, 16
-#ifdef USE_AS_STRNCASECMP
-	cmpdi	cr7, r5, 16
-	blt	cr7, L(bytebybyte)
-	addi	r5, r5, -16
-#endif
-	lvx	v4, 0, rSTR1
-	GET16BYTES(v5, rSTR2, v6)
-	CHECKNULLANDCONVERT()
-	blt	cr6, L(s1_align)
-	b	L(different)
-	.align 	4
-L(aligned):
-	addi	rSTR1, rSTR1, 16
-	addi	rSTR2, rSTR2, 16
-#ifdef USE_AS_STRNCASECMP
-	cmpdi	cr7, r5, 16
-	blt	cr7, L(bytebybyte)
-	addi	r5, r5, -16
-#endif
-	lvx	v4, 0, rSTR1
-	lvx	v5, 0, rSTR2
-	CHECKNULLANDCONVERT()
-	blt	cr6, L(aligned)
-
-	/* Calculate and return the difference. */
-L(different):
-	vaddubm	v1, v3, v3
-	vcmpequb	v7, v0, v7
-#ifdef __LITTLE_ENDIAN__
-	/* Count trailing zero.  */
-	vspltisb	v8, -1
-	VADDUQM_V7_V8
-	vandc	v8, v9, v7
-	VPOPCNTD_V8_V8
-	vspltb	v6, v8, 15
-	vcmpequb.	v6, v6, v1
-	blt	cr6, L(shift8)
-#else
-	/* Count leading zero.  */
-	VCLZD_V8_v7
-	vspltb	v6, v8, 7
-	vcmpequb.	v6, v6, v1
-	blt	cr6, L(shift8)
-	vsro	v8, v8, v1
-#endif
-	b	L(skipsum)
-	.align  4
-L(shift8):
-	vsumsws		v8, v8, v0
-L(skipsum):
-#ifdef __LITTLE_ENDIAN__
-	/* Shift registers based on leading zero count.  */
-	vsro	v6, v5, v8
-	vsro	v7, v4, v8
-	/* Merge and move to GPR.  */
-	vmrglb	v6, v6, v7
-	vslo	v1, v6, v1
-	MFVRD_R3_V1
-	/* Place the characters that are different in first position.  */
-	sldi	rSTR2, rRTN, 56
-	srdi	rSTR2, rSTR2, 56
-	sldi	rSTR1, rRTN, 48
-	srdi	rSTR1, rSTR1, 56
-#else
-	vslo	v6, v5, v8
-	vslo	v7, v4, v8
-	vmrghb	v1, v6, v7
-	MFVRD_R3_V1
-	srdi	rSTR2, rRTN, 48
-	sldi	rSTR2, rSTR2, 56
-	srdi	rSTR2, rSTR2, 56
-	srdi	rSTR1, rRTN, 56
-#endif
-	subf  	rRTN, rSTR1, rSTR2
-	extsw 	rRTN, rRTN
-	blr
-
-	.align  4
-	/* OK. We've hit the end of the string. We need to be careful that
-	   we don't compare two strings as different because of junk beyond
-	   the end of the strings...  */
-L(null_found):
-	vaddubm	v10, v3, v3
-#ifdef __LITTLE_ENDIAN__
-	/* Count trailing zero.  */
-	vspltisb	v8, -1
-	VADDUQM_V7_V8
-	vandc	v8, v9, v7
-	VPOPCNTD_V8_V8
-	vspltb	v6, v8, 15
-	vcmpequb.	v6, v6, v10
-	blt	cr6, L(shift_8)
-#else
-	/* Count leading zero.  */
-	VCLZD_V8_v7
-	vspltb	v6, v8, 7
-	vcmpequb.	v6, v6, v10
-	blt	cr6, L(shift_8)
-	vsro	v8, v8, v10
-#endif
-	b	L(skipsum1)
-	.align  4
-L(shift_8):
-	vsumsws	v8, v8, v0
-L(skipsum1):
-	/* Calculate shift count based on count of zero.  */
-	vspltisb	v10, 7
-	vslb	v10, v10, v10
-	vsldoi	v9, v0, v10, 1
-	VSUBUDM_V9_V8
-	vspltisb	v8, 8
-	vsldoi	v8, v0, v8, 1
-	VSUBUDM_V9_V8
-	/* Shift and remove junk after null character.  */
-#ifdef __LITTLE_ENDIAN__
-	vslo	v5, v5, v9
-	vslo	v4, v4, v9
-#else
-	vsro	v5, v5, v9
-	vsro	v4, v4, v9
-#endif
-	/* Convert and compare 16 bytes.  */
-	TOLOWER()
-	blt	cr6, L(retnull)
-	b	L(different)
-	.align  4
-L(retnull):
-	li	rRTN, 0
-	blr
-	.align  4
-L(bytebybyte):
-	/* Unrolling loop for POWER: loads are done with 'lbz' plus
-	offset and string descriptors are only updated in the end
-	of loop unrolling. */
-	ld	rLOC, LOCALE_CTYPE_TOLOWER(rLOC)
-	lbz	rCHAR1, 0(rSTR1)	/* Load char from s1 */
-	lbz	rCHAR2, 0(rSTR2)	/* Load char from s2 */
-#ifdef USE_AS_STRNCASECMP
-	rldicl	rTMP, r5, 62, 2
-	cmpdi	cr7, rTMP, 0
-	beq	cr7, L(lessthan4)
-	mtctr	rTMP
-#endif
-L(loop):
-	cmpdi	rCHAR1, 0		/* *s1 == '\0' ? */
-	sldi	rADDR1, rCHAR1, 2	/* Calculate address for tolower(*s1) */
-	sldi	rADDR2, rCHAR2, 2	/* Calculate address for tolower(*s2) */
-	lwzx	rLWR1, rLOC, rADDR1	/* Load tolower(*s1) */
-	lwzx	rLWR2, rLOC, rADDR2	/* Load tolower(*s2) */
-	cmpw	cr1, rLWR1, rLWR2	/* r = tolower(*s1) == tolower(*s2) ? */
-	crorc	4*cr1+eq,eq,4*cr1+eq	/* (*s1 != '\0') || (r == 1) */
-	beq	cr1, L(done)
-	lbz	rCHAR1, 1(rSTR1)
-	lbz	rCHAR2, 1(rSTR2)
-	cmpdi	rCHAR1, 0
-	sldi	rADDR1, rCHAR1, 2
-	sldi	rADDR2, rCHAR2, 2
-	lwzx	rLWR1, rLOC, rADDR1
-	lwzx	rLWR2, rLOC, rADDR2
-	cmpw	cr1, rLWR1, rLWR2
-	crorc	4*cr1+eq,eq,4*cr1+eq
-	beq	cr1, L(done)
-	lbz	rCHAR1, 2(rSTR1)
-	lbz	rCHAR2, 2(rSTR2)
-	cmpdi	rCHAR1, 0
-	sldi	rADDR1, rCHAR1, 2
-	sldi	rADDR2, rCHAR2, 2
-	lwzx	rLWR1, rLOC, rADDR1
-	lwzx	rLWR2, rLOC, rADDR2
-	cmpw	cr1, rLWR1, rLWR2
-	crorc	4*cr1+eq,eq,4*cr1+eq
-	beq	cr1, L(done)
-	lbz	rCHAR1, 3(rSTR1)
-	lbz	rCHAR2, 3(rSTR2)
-	cmpdi	rCHAR1, 0
-	/* Increment both string descriptors */
-	addi	rSTR1, rSTR1, 4
-	addi	rSTR2, rSTR2, 4
-	sldi	rADDR1, rCHAR1, 2
-	sldi	rADDR2, rCHAR2, 2
-	lwzx	rLWR1, rLOC, rADDR1
-	lwzx	rLWR2, rLOC, rADDR2
-	cmpw	cr1, rLWR1, rLWR2
-	crorc	4*cr1+eq,eq,4*cr1+eq
-	beq     cr1, L(done)
-	lbz	rCHAR1, 0(rSTR1)	/* Load char from s1 */
-	lbz	rCHAR2, 0(rSTR2)	/* Load char from s2 */
-#ifdef USE_AS_STRNCASECMP
-	bdnz	L(loop)
-#else
-	b	L(loop)
-#endif
-#ifdef USE_AS_STRNCASECMP
-L(lessthan4):
-	clrldi	r5, r5, 62
-	cmpdi	cr7, r5, 0
-	beq	cr7, L(retnull)
-	mtctr	r5
-L(loop1):
-	cmpdi	rCHAR1, 0
-	sldi	rADDR1, rCHAR1, 2
-	sldi	rADDR2, rCHAR2, 2
-	lwzx	rLWR1, rLOC, rADDR1
-	lwzx	rLWR2, rLOC, rADDR2
-	cmpw	cr1, rLWR1, rLWR2
-	crorc	4*cr1+eq,eq,4*cr1+eq
-	beq	cr1, L(done)
-	addi	rSTR1, rSTR1, 1
-	addi	rSTR2, rSTR2, 1
-	lbz	rCHAR1, 0(rSTR1)
-	lbz	rCHAR2, 0(rSTR2)
-	bdnz	L(loop1)
-#endif
-L(done):
-	subf	r0, rLWR2, rLWR1
-	extsw	rRTN, r0
-	blr
-END (__STRCASECMP)
-
-weak_alias (__STRCASECMP, STRCASECMP)
-libc_hidden_builtin_def (__STRCASECMP)
diff --git a/sysdeps/powerpc/powerpc64/power8/strcasestr-ppc64.c b/sysdeps/powerpc/powerpc64/power8/strcasestr-ppc64.c
deleted file mode 100644
index 0e746b7718..0000000000
--- a/sysdeps/powerpc/powerpc64/power8/strcasestr-ppc64.c
+++ /dev/null
@@ -1,29 +0,0 @@
-/* Optimized strcasestr implementation for PowerPC64/POWER8.
-   Copyright (C) 2016-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <string.h>
-
-#define STRCASESTR __strcasestr_ppc
-#undef libc_hidden_builtin_def
-#define libc_hidden_builtin_def(__name)
-
-#undef weak_alias
-#define weak_alias(a,b)
-extern __typeof (strcasestr) __strcasestr_ppc attribute_hidden;
-
-#include <string/strcasestr.c>
diff --git a/sysdeps/powerpc/powerpc64/power8/strcasestr.S b/sysdeps/powerpc/powerpc64/power8/strcasestr.S
deleted file mode 100644
index 6ac6572f3b..0000000000
--- a/sysdeps/powerpc/powerpc64/power8/strcasestr.S
+++ /dev/null
@@ -1,538 +0,0 @@
-/* Optimized strcasestr implementation for PowerPC64/POWER8.
-   Copyright (C) 2016-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <locale-defines.h>
-
-/* Char * [r3] strcasestr (char *s [r3], char * pat[r4])  */
-
-/* The performance gain is obtained by comparing 16 bytes.  */
-
-/* When the first char of r4 is hit ITERATIONS times in r3
-   fallback to default.  */
-#define ITERATIONS	64
-
-#ifndef STRCASESTR
-# define STRCASESTR __strcasestr
-#endif
-
-#ifndef STRLEN
-/* For builds without IFUNC support, local calls should be made to internal
-   GLIBC symbol (created by libc_hidden_builtin_def).  */
-# ifdef SHARED
-#  define STRLEN   __GI_strlen
-# else
-#  define STRLEN   strlen
-# endif
-#endif
-
-#ifndef STRNLEN
-/* For builds without IFUNC support, local calls should be made to internal
-   GLIBC symbol (created by libc_hidden_builtin_def).  */
-# ifdef SHARED
-#  define STRNLEN   __GI_strnlen
-# else
-#  define STRNLEN    __strnlen
-# endif
-#endif
-
-#ifndef STRCHR
-# ifdef SHARED
-#  define STRCHR   __GI_strchr
-# else
-#  define STRCHR   strchr
-# endif
-#endif
-
-/* Convert 16 bytes of v4 and reg to lowercase and compare.  */
-#define TOLOWER(reg)     \
-	vcmpgtub	v6, v4, v1; \
-	vcmpgtub	v7, v2, v4; \
-	vand	v8, v7, v6; \
-	vand	v8, v8, v3; \
-	vor	v4, v8, v4; \
-	vcmpgtub	v6, reg, v1; \
-	vcmpgtub	v7, v2, reg; \
-	vand	v8, v7, v6; \
-	vand	v8, v8, v3; \
-	vor	reg, v8, reg; \
-	vcmpequb.	v6, reg, v4;
-
-/* TODO: change these to the actual instructions when the minimum required
-   binutils allows it.  */
-#ifdef _ARCH_PWR8
-#define VCLZD_V8_v7	vclzd	v8, v7;
-#else
-#define VCLZD_V8_v7	.long	0x11003fc2
-#endif
-
-#define	FRAMESIZE	(FRAME_MIN_SIZE+48)
-/* TODO: change this to .machine power8 when the minimum required binutils
-   allows it.  */
-	.machine  power7
-EALIGN (STRCASESTR, 4, 0)
-	CALL_MCOUNT 2
-	mflr	r0			/* Load link register LR to r0.  */
-	std	r31, -8(r1)		/* Save callers register r31.  */
-	std	r30, -16(r1)		/* Save callers register r30.  */
-	std	r29, -24(r1)		/* Save callers register r29.  */
-	std	r28, -32(r1)		/* Save callers register r28.  */
-	std	r27, -40(r1)		/* Save callers register r27.  */
-	std	r0, 16(r1)		/* Store the link register.  */
-	cfi_offset(r31, -8)
-	cfi_offset(r30, -16)
-	cfi_offset(r29, -24)
-	cfi_offset(r28, -32)
-	cfi_offset(r27, -40)
-	cfi_offset(lr, 16)
-	stdu	r1, -FRAMESIZE(r1)	/* Create the stack frame.  */
-	cfi_adjust_cfa_offset(FRAMESIZE)
-
-	dcbt	0, r3
-	dcbt	0, r4
-	cmpdi	cr7, r3, 0		/* Input validation.  */
-	beq	cr7, L(retnull)
-	cmpdi	cr7, r4, 0
-	beq	cr7, L(retnull)
-
-	mr	r29, r3
-	mr	r30, r4
-	/* Load first byte from r4 and check if its null.  */
-	lbz	r6, 0(r4)
-	cmpdi	cr7, r6, 0
-	beq	cr7, L(ret_r3)
-
-	ld	r10, __libc_tsd_LOCALE@got@tprel(r2)
-	add	r9, r10, __libc_tsd_LOCALE@tls
-	ld	r9, 0(r9)
-	ld	r9, LOCALE_CTYPE_TOUPPER(r9)
-	sldi	r10, r6, 2		/* Convert to upper case.  */
-	lwzx	r28, r9, r10
-
-	ld	r10, __libc_tsd_LOCALE@got@tprel(r2)
-	add	r11, r10, __libc_tsd_LOCALE@tls
-	ld	r11, 0(r11)
-	ld	r11, LOCALE_CTYPE_TOLOWER(r11)
-	sldi	r10, r6, 2              /* Convert to lower case.  */
-	lwzx	r27, r11, r10
-
-	/* Check if the first char is present.  */
-	mr	r4, r27
-	bl	STRCHR
-	nop
-	mr	r5, r3
-	mr	r3, r29
-	mr	r29, r5
-	mr	r4, r28
-	bl	STRCHR
-	nop
-	cmpdi	cr7, r29, 0
-	beq	cr7, L(firstpos)
-	cmpdi	cr7, r3, 0
-	beq	cr7, L(skipcheck)
-	cmpw	cr7, r3, r29
-	ble 	cr7, L(firstpos)
-	/* Move r3 to the first occurence.  */
-L(skipcheck):
-	mr	r3, r29
-L(firstpos):
-	mr	r29, r3
-
-	sldi	r9, r27, 8
-	or	r28, r9, r28
-	/* Reg r27 is used to count the number of iterations.  */
-	li	r27, 0
-	/* If first char of search str is not present.  */
-	cmpdi	cr7, r3, 0
-	ble	cr7, L(end)
-
-	/* Find the length of pattern.  */
-	mr	r3, r30
-	bl	STRLEN
-	nop
-
-	cmpdi	cr7, r3, 0	/* If search str is null.  */
-	beq	cr7, L(ret_r3)
-
-	mr	r31, r3
-	mr	r4, r3
-	mr	r3, r29
-	bl	STRNLEN
-	nop
-
-	cmpd	cr7, r3, r31 	/* If len(r3) < len(r4).  */
-	blt	cr7, L(retnull)
-
-	mr	r3, r29
-
-	/* Locales not matching ASCII for single bytes.  */
-	ld	r10, __libc_tsd_LOCALE@got@tprel(r2)
-	add	r9, r10, __libc_tsd_LOCALE@tls
-	ld	r9, 0(r9)
-	ld	r7, 0(r9)
-	addi	r7, r7, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES
-	lwz	r8, 0(r7)
-	cmpdi	cr7, r8, 1
-	beq	cr7, L(bytebybyte)
-
-	/* If len(r4) < 16 handle byte by byte.  */
-	/* For shorter strings we will not use vector registers.  */
-	cmpdi	cr7, r31, 16
-	blt	cr7, L(bytebybyte)
-
-	/* Comparison values used for TOLOWER.  */
-	/* Load v1 = 64('A' - 1), v2 = 91('Z' + 1), v3 = 32 in each byte.  */
-	vspltish	v0, 0
-	vspltisb	v5, 2
-	vspltisb	v4, 4
-	vsl	v3, v5, v4
-	vaddubm	v1, v3, v3
-	vspltisb	v5, 15
-	vaddubm	v2, v5, v5
-	vaddubm	v2, v1, v2
-	vspltisb	v4, -3
-	vaddubm	v2, v2, v4
-
-	/*
-	1. Load 16 bytes from r3 and r4
-	2. Check if there is null, If yes, proceed byte by byte path.
-	3. Else,Convert both to lowercase and compare.
-	4. If they are same proceed to 1.
-	5. If they dont match, find if first char of r4 is present in the
-	   loaded 16 byte of r3.
-	6. If yes, move position, load next 16 bytes of r3 and proceed to 2.
-	*/
-
-	mr	r8, r3		/* Save r3 for future use.  */
-	mr	r4, r30		/* Restore r4.  */
-	clrldi	r10, r4, 60
-	lvx	v5, 0, r4	/* Load 16 bytes from r4.  */
-	cmpdi	cr7, r10, 0
-	beq	cr7, L(begin2)
-	/* If r4 is unaligned, load another 16 bytes.  */
-#ifdef __LITTLE_ENDIAN__
-	lvsr	v7, 0, r4
-#else
-	lvsl	v7, 0, r4
-#endif
-	addi	r5, r4, 16
-	lvx	v9, 0, r5
-#ifdef __LITTLE_ENDIAN__
-	vperm	v5, v9, v5, v7
-#else
-	vperm	v5, v5, v9, v7
-#endif
-L(begin2):
-	lvx	v4, 0, r3
-	vcmpequb.	v7, v0, v4	/* Check for null.  */
-	beq	cr6, L(nullchk6)
-	b	L(trailcheck)
-
-        .align  4
-L(nullchk6):
-	clrldi	r10, r3, 60
-	cmpdi	cr7, r10, 0
-	beq	cr7, L(next16)
-#ifdef __LITTLE_ENDIAN__
-	lvsr	v7, 0, r3
-#else
-	lvsl	v7, 0, r3
-#endif
-	addi	r5, r3, 16
-	/* If r3 is unaligned, load another 16 bytes.  */
-	lvx	v10, 0, r5
-#ifdef __LITTLE_ENDIAN__
-	vperm	v4, v10, v4, v7
-#else
-	vperm	v4, v4, v10, v7
-#endif
-L(next16):
-	vcmpequb.	v6, v0, v5	/* Check for null.  */
-	beq	cr6, L(nullchk)
-	b	L(trailcheck)
-
-	.align	4
-L(nullchk):
-	vcmpequb.	v6, v0, v4
-	beq	cr6, L(nullchk1)
-	b	L(retnull)
-
-	.align	4
-L(nullchk1):
-	/* Convert both v3 and v4 to lower.  */
-	TOLOWER(v5)
-	/* If both are same, branch to match.  */
-	blt	cr6, L(match)
-	/* Find if the first char is present in next 15 bytes.  */
-#ifdef __LITTLE_ENDIAN__
-	vspltb	v6, v5, 15
-	vsldoi	v7, v0, v4, 15
-#else
-	vspltb	v6, v5, 0
-	vspltisb	v7, 8
-	vslo	v7, v4, v7
-#endif
-	vcmpequb	v7, v6, v7
-	vcmpequb.	v6, v0, v7
-	/* Shift r3 by 16 bytes and proceed.  */
-	blt	cr6, L(shift16)
-	VCLZD_V8_v7
-#ifdef __LITTLE_ENDIAN__
-	vspltb	v6, v8, 15
-#else
-	vspltb	v6, v8, 7
-#endif
-	vcmpequb.	v6, v6, v1
-	/* Shift r3 by 8  bytes and proceed.  */
-	blt	cr6, L(shift8)
-	b	L(begin)
-
-	.align	4
-L(match):
-	/* There is a match of 16 bytes, check next bytes.  */
-	cmpdi	cr7, r31, 16
-	mr	r29, r3
-	beq	cr7, L(ret_r3)
-
-L(secondmatch):
-	addi	r3, r3, 16
-	addi	r4, r4, 16
-	/* Load next 16 bytes of r3 and r4 and compare.  */
-	clrldi	r10, r4, 60
-	cmpdi	cr7, r10, 0
-	beq	cr7, L(nextload)
-	/* Handle unaligned case.  */
-	vor	v6, v9, v9
-	vcmpequb.	v7, v0, v6
-	beq	cr6, L(nullchk2)
-	b	L(trailcheck)
-
-	.align	4
-L(nullchk2):
-#ifdef __LITTLE_ENDIAN__
-	lvsr	v7, 0, r4
-#else
-	lvsl	v7, 0, r4
-#endif
-	addi	r5, r4, 16
-	/* If r4 is unaligned, load another 16 bytes.  */
-	lvx	v9, 0, r5
-#ifdef __LITTLE_ENDIAN__
-	vperm	v11, v9, v6, v7
-#else
-	vperm	v11, v6, v9, v7
-#endif
-	b	L(compare)
-
-	.align	4
-L(nextload):
-	lvx	v11, 0, r4
-L(compare):
-	vcmpequb.	v7, v0, v11
-	beq	cr6, L(nullchk3)
-	b	L(trailcheck)
-
-	.align	4
-L(nullchk3):
-	clrldi	r10, r3, 60
-	cmpdi 	cr7, r10, 0
-	beq 	cr7, L(nextload1)
-	/* Handle unaligned case.  */
-	vor	v4, v10, v10
-	vcmpequb.	v7, v0, v4
-	beq	cr6, L(nullchk4)
-	b	L(retnull)
-
-	.align	4
-L(nullchk4):
-#ifdef __LITTLE_ENDIAN__
-	lvsr	v7, 0, r3
-#else
-	lvsl	v7, 0, r3
-#endif
-	addi	r5, r3, 16
-	/* If r3 is unaligned, load another 16 bytes.  */
-	lvx	v10, 0, r5
-#ifdef __LITTLE_ENDIAN__
-	vperm	v4, v10, v4, v7
-#else
-	vperm	v4, v4, v10, v7
-#endif
-	b	L(compare1)
-
-	.align	4
-L(nextload1):
-	lvx	v4, 0, r3
-L(compare1):
-	vcmpequb.	v7, v0, v4
-	beq	cr6, L(nullchk5)
-	b	L(retnull)
-
-	.align	4
-L(nullchk5):
-	/* Convert both v3 and v4 to lower.  */
-	TOLOWER(v11)
-	/* If both are same, branch to secondmatch.  */
-	blt 	cr6, L(secondmatch)
-	/* Continue the search.  */
-        b	L(begin)
-
-	.align	4
-L(trailcheck):
-	ld	r10, __libc_tsd_LOCALE@got@tprel(r2)
-	add	r11, r10, __libc_tsd_LOCALE@tls
-	ld	r11, 0(r11)
-	ld	r11, LOCALE_CTYPE_TOLOWER(r11)
-L(loop2):
-	lbz	r5, 0(r3)               /* Load byte from r3.  */
-	lbz	r6, 0(r4)               /* Load next byte from r4.  */
-	cmpdi 	cr7, r6, 0              /* Is it null?  */
-	beq 	cr7, L(updater3)
-	cmpdi 	cr7, r5, 0              /* Is it null?  */
-	beq 	cr7, L(retnull)         /* If yes, return.  */
-	addi	r3, r3, 1
-	addi	r4, r4, 1               /* Increment r4.  */
-	sldi	r10, r5, 2              /* Convert to lower case.  */
-	lwzx	r10, r11, r10
-	sldi	r7, r6, 2               /* Convert to lower case.  */
-	lwzx	r7, r11, r7
-	cmpw	cr7, r7, r10            /* Compare with byte from r4.  */
-	bne	cr7, L(begin)
-	b	L(loop2)
-
-	.align	4
-L(shift8):
-	addi	r8, r8, 7
-	b	L(begin)
-	.align	4
-L(shift16):
-	addi	r8, r8, 15
-	.align	4
-L(begin):
-	addi	r8, r8, 1
-	mr	r3, r8
-	/* When our iterations exceed ITERATIONS,fall back to default.  */
-	addi	r27, r27, 1
-	cmpdi	cr7, r27, ITERATIONS
-	beq	cr7, L(default)
-	mr	r4, r30         /* Restore r4.  */
-	b	L(begin2)
-
-	/* Handling byte by byte.  */
-	.align	4
-L(loop1):
-	mr	r3, r8
-	addi	r27, r27, 1
-	cmpdi	cr7, r27, ITERATIONS
-	beq	cr7, L(default)
-	mr	r29, r8
-	srdi	r4, r28, 8
-	/* Check if the first char is present.  */
-	bl	STRCHR
-	nop
-	mr	r5, r3
-	mr	r3, r29
-	mr	r29, r5
-	sldi	r4, r28, 56
-	srdi	r4, r4, 56
-	bl	STRCHR
-	nop
-	cmpdi	cr7, r29, 0
-	beq	cr7, L(nextpos)
-	cmpdi	cr7, r3, 0
-	beq	cr7, L(skipcheck1)
-	cmpw	cr7, r3, r29
-	ble 	cr7, L(nextpos)
-	/* Move r3 to first occurence.  */
-L(skipcheck1):
-	mr	r3, r29
-L(nextpos):
-	mr	r29, r3
-	cmpdi 	cr7, r3, 0
-	ble 	cr7, L(retnull)
-L(bytebybyte):
-	ld	r10, __libc_tsd_LOCALE@got@tprel(r2)
-	add	r11, r10, __libc_tsd_LOCALE@tls
-	ld	r11, 0(r11)
-	ld	r11, LOCALE_CTYPE_TOLOWER(r11)
-	mr	r4, r30                 /* Restore r4.  */
-	mr	r8, r3                  /* Save r3.  */
-	addi	r8, r8, 1
-
-L(loop):
-	addi	r3, r3, 1
-	lbz	r5, 0(r3)               /* Load byte from r3.  */
-	addi	r4, r4, 1               /* Increment r4.  */
-	lbz	r6, 0(r4)               /* Load next byte from r4.  */
-	cmpdi 	cr7, r6, 0              /* Is it null?  */
-	beq 	cr7, L(updater3)
-	cmpdi 	cr7, r5, 0              /* Is it null?  */
-	beq 	cr7, L(retnull)         /* If yes, return.  */
-	sldi	r10, r5, 2              /* Convert to lower case.  */
-	lwzx	r10, r11, r10
-	sldi	r7, r6, 2               /* Convert to lower case.  */
-	lwzx	r7, r11, r7
-	cmpw	cr7, r7, r10            /* Compare with byte from r4.  */
-	bne 	cr7, L(loop1)
-	b	L(loop)
-
-	/* Handling return values.  */
-	.align	4
-L(updater3):
-	subf	r3, r31, r3	/* Reduce r31 (len of r4) from r3.  */
-	b	L(end)
-
-	.align	4
-L(ret_r3):
-	mr	r3, r29		/* Return point of match.  */
-	b	L(end)
-
-	.align	4
-L(retnull):
-	li	r3, 0		/* Substring was not found.  */
-	b	L(end)
-
-	.align	4
-L(default):
-	mr	r4, r30
-	bl	__strcasestr_ppc
-	nop
-
-	.align	4
-L(end):
-	addi	r1, r1, FRAMESIZE	/* Restore stack pointer.  */
-	cfi_adjust_cfa_offset(-FRAMESIZE)
-	ld	r0, 16(r1)	/* Restore the saved link register.  */
-	ld	r27, -40(r1)
-	ld	r28, -32(r1)
-	ld	r29, -24(r1)	/* Restore callers save register r29.  */
-	ld	r30, -16(r1)	/* Restore callers save register r30.  */
-	ld	r31, -8(r1)	/* Restore callers save register r31.  */
-	cfi_restore(lr)
-	cfi_restore(r27)
-	cfi_restore(r28)
-	cfi_restore(r29)
-	cfi_restore(r30)
-	cfi_restore(r31)
-	mtlr	r0		/* Branch to link register.  */
-	blr
-END (STRCASESTR)
-
-weak_alias (__strcasestr, strcasestr)
-libc_hidden_def (__strcasestr)
-libc_hidden_builtin_def (strcasestr)
diff --git a/sysdeps/powerpc/powerpc64/power8/strchr.S b/sysdeps/powerpc/powerpc64/power8/strchr.S
deleted file mode 100644
index e0c185c162..0000000000
--- a/sysdeps/powerpc/powerpc64/power8/strchr.S
+++ /dev/null
@@ -1,377 +0,0 @@
-/* Optimized strchr implementation for PowerPC64/POWER8.
-   Copyright (C) 2016-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-
-#ifdef USE_AS_STRCHRNUL
-# ifndef STRCHRNUL
-#   define FUNC_NAME __strchrnul
-# else
-#   define FUNC_NAME STRCHRNUL
-# endif
-#else
-# ifndef STRCHR
-#  define FUNC_NAME strchr
-# else
-#  define FUNC_NAME STRCHR
-# endif
-#endif  /* !USE_AS_STRCHRNUL  */
-
-/* int [r3] strchr (char *s [r3], int c [r4])  */
-/* TODO: change these to the actual instructions when the minimum required
-   binutils allows it.  */
-#define MTVRD(v,r) .long (0x7c000167 | ((v)<<(32-11)) | ((r)<<(32-16)))
-#define MFVRD(r,v) .long (0x7c000067 | ((v)<<(32-11)) | ((r)<<(32-16)))
-#define VBPERMQ(t,a,b)  .long (0x1000054c \
-			| ((t)<<(32-11)) \
-			| ((a)<<(32-16)) \
-			| ((b)<<(32-21)) )
-/* TODO: change this to .machine power8 when the minimum required binutils
-   allows it.  */
-	.machine  power7
-ENTRY (FUNC_NAME)
-	CALL_MCOUNT 2
-	dcbt	0,r3
-	clrrdi	r8,r3,3	      /* Align the address to doubleword boundary.  */
-	cmpdi	cr7,r4,0
-	ld	r12,0(r8)     /* Load doubleword from memory.  */
-	li	r0,0	      /* Doubleword with null chars to use
-				 with cmpb.  */
-
-	rlwinm	r6,r3,3,26,28 /* Calculate padding.  */
-
-	beq	cr7,L(null_match)
-
-	/* Replicate byte to doubleword.  */
-	insrdi	r4,r4,8,48
-	insrdi	r4,r4,16,32
-	insrdi  r4,r4,32,0
-
-	/* Now r4 has a doubleword of c bytes and r0 has
-	   a doubleword of null bytes.  */
-
-	cmpb	r10,r12,r4     /* Compare each byte against c byte.  */
-	cmpb	r11,r12,r0     /* Compare each byte against null byte.  */
-
-	/* Move the doublewords left and right to discard the bits that are
-	   not part of the string and bring them back as zeros.  */
-#ifdef __LITTLE_ENDIAN__
-	srd	r10,r10,r6
-	srd	r11,r11,r6
-	sld	r10,r10,r6
-	sld	r11,r11,r6
-#else
-	sld	r10,r10,r6
-	sld	r11,r11,r6
-	srd	r10,r10,r6
-	srd	r11,r11,r6
-#endif
-	or	r5,r10,r11    /* OR the results to speed things up.  */
-	cmpdi	cr7,r5,0      /* If r5 == 0, no c or null bytes
-				 have been found.  */
-	bne	cr7,L(done)
-
-	mtcrf   0x01,r8
-
-	/* Are we now aligned to a doubleword boundary?  If so, skip to
-	   the main loop.  Otherwise, go through the alignment code.  */
-
-	bt	28,L(loop)
-
-	/* Handle WORD2 of pair.  */
-	ldu	r12,8(r8)
-	cmpb    r10,r12,r4
-	cmpb	r11,r12,r0
-	or	r5,r10,r11
-	cmpdi	cr7,r5,0
-	bne	cr7,L(done)
-	b	L(loop)	      /* We branch here (rather than falling through)
-				 to skip the nops due to heavy alignment
-				 of the loop below.  */
-
-	.p2align  5
-L(loop):
-	/* Load two doublewords, compare and merge in a
-	   single register for speed.  This is an attempt
-	   to speed up the null-checking process for bigger strings.  */
-	ld	r12,8(r8)
-	ldu	r9,16(r8)
-	cmpb	r10,r12,r4
-	cmpb	r11,r12,r0
-	cmpb	r6,r9,r4
-	cmpb	r7,r9,r0
-	or	r5,r10,r11
-	or	r9,r6,r7
-	or	r12,r5,r9
-	cmpdi	cr7,r12,0
-	beq	cr7,L(vector)
-	/* OK, one (or both) of the doublewords contains a c/null byte.  Check
-	   the first doubleword and decrement the address in case the first
-	   doubleword really contains a c/null byte.  */
-
-	cmpdi	cr6,r5,0
-	addi	r8,r8,-8
-	bne	cr6,L(done)
-
-	/* The c/null byte must be in the second doubleword.  Adjust the
-	   address again and move the result of cmpb to r10 so we can calculate
-	   the pointer.  */
-
-	mr	r10,r6
-	mr	r11,r7
-	addi	r8,r8,8
-#ifdef USE_AS_STRCHRNUL
-	mr	r5, r9
-#endif
-	/* r10/r11 have the output of the cmpb instructions, that is,
-	   0xff in the same position as the c/null byte in the original
-	   doubleword from the string.  Use that to calculate the pointer.  */
-L(done):
-#ifdef USE_AS_STRCHRNUL
-	mr	r10, r5
-#endif
-#ifdef __LITTLE_ENDIAN__
-	addi    r3,r10,-1
-	andc    r3,r3,r10
-	popcntd	r0,r3
-# ifndef USE_AS_STRCHRNUL
-	addi    r4,r11,-1
-	andc    r4,r4,r11
-	cmpld	cr7,r3,r4
-	bgt	cr7,L(no_match)
-# endif
-#else
-	cntlzd	r0,r10	      /* Count leading zeros before c matches.  */
-# ifndef USE_AS_STRCHRNUL
-	cmpld	cr7,r11,r10
-	bgt	cr7,L(no_match)
-# endif
-#endif
-	srdi	r0,r0,3	      /* Convert leading zeros to bytes.  */
-	add	r3,r8,r0      /* Return address of the matching c byte
-				 or null in case c was not found.  */
-	blr
-
-	/* Check the first 32B in GPR's and move to vectorized loop.  */
-	.p2align  5
-L(vector):
-	addi	r3, r8, 8
-	andi.	r10, r3, 31
-	bne	cr0, L(loop)
-	vspltisb	v0, 0
-	/* Precompute vbpermq constant.  */
-	vspltisb	v10, 3
-	lvsl	v11, r0, r0
-	vslb	v10, v11, v10
-	MTVRD(v1,r4)
-	li	r5, 16
-	vspltb	v1, v1, 7
-	/* Compare 32 bytes in each loop.  */
-L(continue):
-	lvx	v4, 0, r3
-	lvx	v5, r3, r5
-	vcmpequb	v2, v0, v4
-	vcmpequb	v3, v0, v5
-	vcmpequb	v6, v1, v4
-	vcmpequb	v7, v1, v5
-	vor	v8, v2, v3
-	vor	v9, v6, v7
-	vor	v11, v8, v9
-	vcmpequb.	v11, v0, v11
-	addi	r3, r3, 32
-	blt	cr6, L(continue)
-	/* One (or both) of the quadwords contains a c/null byte.  */
-	addi	r3, r3, -32
-#ifndef USE_AS_STRCHRNUL
-	vcmpequb.	v11, v0, v9
-	blt	cr6, L(no_match)
-#endif
-	/* Permute the first bit of each byte into bits 48-63.  */
-	VBPERMQ(v2, v2, v10)
-	VBPERMQ(v3, v3, v10)
-	VBPERMQ(v6, v6, v10)
-	VBPERMQ(v7, v7, v10)
-	/* Shift each component into its correct position for merging.  */
-#ifdef __LITTLE_ENDIAN__
-	vsldoi	v3, v3, v3, 2
-	vsldoi	v7, v7, v7, 2
-#else
-	vsldoi	v2, v2, v2, 6
-	vsldoi	v3, v3, v3, 4
-	vsldoi	v6, v6, v6, 6
-	vsldoi	v7, v7, v7, 4
-#endif
-
-        /* Merge the results and move to a GPR.  */
-        vor     v1, v3, v2
-        vor     v2, v6, v7
-        vor     v4, v1, v2
-	MFVRD(r5, v4)
-#ifdef __LITTLE_ENDIAN__
-	addi	r6, r5, -1
-	andc	r6, r6, r5
-	popcntd	r6, r6
-#else
-	cntlzd	r6, r5	/* Count leading zeros before the match.  */
-#endif
-	add	r3, r3, r6	/* Compute final length.  */
-	/* Return NULL if null found before c.  */
-#ifndef USE_AS_STRCHRNUL
-	lbz	r4, 0(r3)
-	cmpdi	cr7, r4, 0
-	beq	cr7, L(no_match)
-#endif
-	blr
-
-#ifndef USE_AS_STRCHRNUL
-	.align	4
-L(no_match):
-	li	r3,0
-	blr
-#endif
-
-/* We are here because strchr was called with a null byte.  */
-	.align	4
-L(null_match):
-	/* r0 has a doubleword of null bytes.  */
-
-	cmpb	r5,r12,r0     /* Compare each byte against null bytes.  */
-
-	/* Move the doublewords left and right to discard the bits that are
-	   not part of the string and bring them back as zeros.  */
-#ifdef __LITTLE_ENDIAN__
-	srd	r5,r5,r6
-	sld	r5,r5,r6
-#else
-	sld	r5,r5,r6
-	srd	r5,r5,r6
-#endif
-	cmpdi	cr7,r5,0      /* If r10 == 0, no c or null bytes
-				 have been found.  */
-	bne	cr7,L(done_null)
-
-	mtcrf   0x01,r8
-
-	/* Are we now aligned to a quadword boundary?  If so, skip to
-	   the main loop.  Otherwise, go through the alignment code.  */
-
-	bt	28,L(loop_null)
-
-	/* Handle WORD2 of pair.  */
-	ldu	r12,8(r8)
-	cmpb    r5,r12,r0
-	cmpdi	cr7,r5,0
-	bne	cr7,L(done_null)
-	b	L(loop_null)  /* We branch here (rather than falling through)
-				 to skip the nops due to heavy alignment
-				 of the loop below.  */
-
-	/* Main loop to look for the end of the string.  Since it's a
-	   small loop (< 8 instructions), align it to 32-bytes.  */
-	.p2align  5
-L(loop_null):
-	/* Load two doublewords, compare and merge in a
-	   single register for speed.  This is an attempt
-	   to speed up the null-checking process for bigger strings.  */
-	ld	r12,8(r8)
-	ldu     r11,16(r8)
-	cmpb	r5,r12,r0
-	cmpb	r10,r11,r0
-	or	r6,r5,r10
-	cmpdi	cr7,r6,0
-	beq	cr7,L(vector1)
-
-	/* OK, one (or both) of the doublewords contains a null byte.  Check
-	   the first doubleword and decrement the address in case the first
-	   doubleword really contains a null byte.  */
-
-	cmpdi	cr6,r5,0
-	addi	r8,r8,-8
-	bne	cr6,L(done_null)
-
-	/* The null byte must be in the second doubleword.  Adjust the address
-	   again and move the result of cmpb to r10 so we can calculate the
-	   pointer.  */
-
-	mr	r5,r10
-	addi	r8,r8,8
-
-	/* r5 has the output of the cmpb instruction, that is, it contains
-	   0xff in the same position as the null byte in the original
-	   doubleword from the string.  Use that to calculate the pointer.  */
-L(done_null):
-#ifdef __LITTLE_ENDIAN__
-	addi    r0,r5,-1
-	andc    r0,r0,r5
-	popcntd	r0,r0
-#else
-	cntlzd	r0,r5	      /* Count leading zeros before the match.  */
-#endif
-	srdi	r0,r0,3	      /* Convert leading zeros to bytes.  */
-	add	r3,r8,r0      /* Return address of the matching null byte.  */
-	blr
-	.p2align  5
-L(vector1):
-	addi    r3, r8, 8
-	andi.	r10, r3, 31
-	bne	cr0, L(loop_null)
-	vspltisb	v8, -1
-	vspltisb	v0, 0
-	vspltisb	v10, 3
-	lvsl	v11, r0, r0
-	vslb	v10, v11, v10
-	li	r5, 16
-L(continue1):
-	lvx	v4, 0, r3
-	lvx	v5, r3, r5
-	vcmpequb	v2, v0, v4
-	vcmpequb	v3, v0, v5
-	vor	v8, v2, v3
-	vcmpequb.	v11, v0, v8
-	addi	r3, r3, 32
-	blt	cr6, L(continue1)
-	addi	r3, r3, -32
-L(end1):
-	VBPERMQ(v2, v2, v10)
-	VBPERMQ(v3, v3, v10)
-	/* Shift each component into its correct position for merging.  */
-#ifdef __LITTLE_ENDIAN__
-	vsldoi	v3, v3, v3, 2
-#else
-	vsldoi	v2, v2, v2, 6
-	vsldoi	v3, v3, v3, 4
-#endif
-
-        /* Merge the results and move to a GPR.  */
-        vor     v4, v3, v2
-	MFVRD(r5, v4)
-#ifdef __LITTLE_ENDIAN__
-	addi	r6, r5, -1
-	andc	r6, r6, r5
-	popcntd	r6, r6
-#else
-	cntlzd	r6, r5	/* Count leading zeros before the match.  */
-#endif
-	add	r3, r3, r6	/* Compute final length.  */
-	blr
-END (FUNC_NAME)
-
-#ifndef USE_AS_STRCHRNUL
-weak_alias (strchr, index)
-libc_hidden_builtin_def (strchr)
-#endif
diff --git a/sysdeps/powerpc/powerpc64/power8/strchrnul.S b/sysdeps/powerpc/powerpc64/power8/strchrnul.S
deleted file mode 100644
index 3bf4b275dd..0000000000
--- a/sysdeps/powerpc/powerpc64/power8/strchrnul.S
+++ /dev/null
@@ -1,23 +0,0 @@
-/* Optimized strchrnul implementation for PowerPC64/POWER8.
-   Copyright (C) 2016-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#define USE_AS_STRCHRNUL 1
-#include <sysdeps/powerpc/powerpc64/power8/strchr.S>
-
-weak_alias (__strchrnul,strchrnul)
-libc_hidden_builtin_def (__strchrnul)
diff --git a/sysdeps/powerpc/powerpc64/power8/strcmp.S b/sysdeps/powerpc/powerpc64/power8/strcmp.S
deleted file mode 100644
index 770484f1e1..0000000000
--- a/sysdeps/powerpc/powerpc64/power8/strcmp.S
+++ /dev/null
@@ -1,247 +0,0 @@
-/* Optimized strcmp implementation for PowerPC64/POWER8.
-   Copyright (C) 2015-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-
-#ifndef STRCMP
-# define STRCMP strcmp
-#endif
-
-/* Implements the function
-
-   size_t [r3] strcmp (const char *s1 [r3], const char *s2 [r4])
-
-   The implementation uses unaligned doubleword access to avoid specialized
-   code paths depending of data alignment.  Although recent powerpc64 uses
-   64K as default, the page cross handling assumes minimum page size of
-   4k.  */
-
-EALIGN (STRCMP, 4, 0)
-	li	r0,0
-
-	/* Check if [s1]+16 or [s2]+16 will cross a 4K page boundary using
-	   the code:
-
-	    (((size_t) s1) % PAGE_SIZE > (PAGE_SIZE - ITER_SIZE))
-
-	   with PAGE_SIZE being 4096 and ITER_SIZE begin 16.  */
-
-	rldicl	r7,r3,0,52
-	rldicl	r9,r4,0,52
-	cmpldi	cr7,r7,4096-16
-	bgt	cr7,L(pagecross_check)
-	cmpldi	cr5,r9,4096-16
-	bgt	cr5,L(pagecross_check)
-
-	/* For short string up to 16 bytes, load both s1 and s2 using
-	   unaligned dwords and compare.  */
-	ld	r8,0(r3)
-	ld	r10,0(r4)
-	cmpb	r12,r8,r0
-	cmpb	r11,r8,r10
-	orc.	r9,r12,r11
-	bne	cr0,L(different_nocmpb)
-
-	ld	r8,8(r3)
-	ld	r10,8(r4)
-	cmpb	r12,r8,r0
-	cmpb	r11,r8,r10
-	orc.	r9,r12,r11
-	bne	cr0,L(different_nocmpb)
-
-	addi	r7,r3,16
-	addi	r4,r4,16
-
-L(align_8b):
-	/* Now it has checked for first 16 bytes, align source1 to doubleword
-	   and adjust source2 address.  */
-	rldicl	r9,r7,0,61	/* source1 alignment to doubleword  */
-	subf	r4,r9,r4	/* Adjust source2 address based on source1
-				   alignment.  */
-	rldicr	r7,r7,0,60	/* Align source1 to doubleword.  */
-
-	/* At this point, source1 alignment is 0 and source2 alignment is
-	   between 0 and 7.  Check is source2 alignment is 0, meaning both
-	   sources have the same alignment.  */
-	andi.	r9,r4,0x7
-	bne	cr0,L(loop_diff_align)
-
-	/* If both source1 and source2 are doubleword aligned, there is no
-	   need for page boundary cross checks.  */
-
-	ld	r8,0(r7)
-	ld	r10,0(r4)
-	cmpb	r12,r8,r0
-	cmpb	r11,r8,r10
-	orc.	r9,r12,r11
-	bne	cr0,L(different_nocmpb)
-
-	.align 4
-L(loop_equal_align):
-	ld	r8,8(r7)
-	ld	r10,8(r4)
-	cmpb	r12,r8,r0
-	cmpb	r11,r8,r10
-	orc.	r9,r12,r11
-	bne	cr0,L(different_nocmpb)
-
-	ld	r8,16(r7)
-	ld	r10,16(r4)
-	cmpb	r12,r8,r0
-	cmpb	r11,r8,r10
-	orc.	r9,r12,r11
-	bne	cr0,L(different_nocmpb)
-
-	ldu	r8,24(r7)
-	ldu	r10,24(r4)
-	cmpb	r12,r8,r0
-	cmpb	r11,r8,r10
-	orc.	r9,r12,r11
-	bne	cr0,L(different_nocmpb)
-
-	b	L(loop_equal_align)
-
-	/* A zero byte was found in r8 (s1 dword), r9 contains the cmpb
-	   result and r10 the dword from s2.  To code isolate the byte
-	   up to end (including the '\0'), masking with 0xFF the remaining
-	   ones:
-
-           #if __LITTLE_ENDIAN__
-	     (__builtin_ffsl (x) - 1) = counting trailing zero bits
-	     r9 = (__builtin_ffsl (r9) - 1) + 8;
-	     r9 = -1UL << r9
-	   #else
-	     r9  = __builtin_clzl (r9) + 8;
-	     r9  = -1UL >> r9
-	   #endif
-	     r8  = r8  | r9
-	     r10 = r10 | r9  */
-
-#ifdef __LITTLE_ENDIAN__
-	nor 	r9,r9,r9
-L(different_nocmpb):
-	neg	r3,r9
-	and	r9,r9,r3
-	cntlzd	r9,r9
-	subfic	r9,r9,63
-#else
-	not	r9,r9
-L(different_nocmpb):
-	cntlzd	r9,r9
-	subfic	r9,r9,56
-#endif
-	srd	r3,r8,r9
-	srd	r10,r10,r9
-	rldicl	r10,r10,0,56
-	rldicl	r3,r3,0,56
-	subf	r3,r10,r3
-	extsw	r3,r3
-	blr
-
-	.align	4
-L(pagecross_check):
-	subfic	r9,r9,4096
-	subfic	r7,r7,4096
-	cmpld	cr7,r7,r9
-	bge	cr7,L(pagecross)
-	mr	r7,r9
-
-	/* If unaligned 16 bytes reads across a 4K page boundary, it uses
-	   a simple byte a byte comparison until the page alignment for s1
-	   is reached.  */
-L(pagecross):
-	add	r7,r3,r7
-	subf	r9,r3,r7
-	mtctr	r9
-
-	.align	4
-L(pagecross_loop):
-	/* Loads a byte from s1 and s2, compare if *s1 is equal to *s2
-	   and if *s1 is '\0'.  */
-	lbz	r9,0(r3)
-	lbz	r10,0(r4)
-	addi	r3,r3,1
-	addi	r4,r4,1
-	cmplw	cr7,r9,r10
-	cmpdi	cr5,r9,r0
-	bne	cr7,L(pagecross_ne)
-	beq	cr5,L(pagecross_nullfound)
-	bdnz	L(pagecross_loop)
-	b	L(align_8b)
-
-	.align	4
-	/* The unaligned read of source2 will cross a 4K page boundary,
-	   and the different byte or NULL maybe be in the remaining page
-	   bytes. Since it can not use the unaligned load, the algorithm
-	   reads and compares 8 bytes to keep source1 doubleword aligned.  */
-L(check_source2_byte):
-	li	r9,8
-	mtctr	r9
-
-	.align	4
-L(check_source2_byte_loop):
-	lbz	r9,0(r7)
-	lbz	r10,0(r4)
-	addi	r7,r7,1
-	addi	r4,r4,1
-	cmplw	cr7,r9,10
-	cmpdi	r5,r9,0
-	bne	cr7,L(pagecross_ne)
-	beq	cr5,L(pagecross_nullfound)
-	bdnz	L(check_source2_byte_loop)
-
-	/* If source2 is unaligned to doubleword, the code needs to check
-	   on each interation if the unaligned doubleword access will cross
-	   a 4k page boundary.  */
-	.align	5
-L(loop_unaligned):
-	ld	r8,0(r7)
-	ld	r10,0(r4)
-	cmpb	r12,r8,r0
-	cmpb	r11,r8,r10
-	orc.	r9,r12,r11
-	bne	cr0,L(different_nocmpb)
-	addi	r7,r7,8
-	addi	r4,r4,8
-
-L(loop_diff_align):
-	/* Check if [src2]+8 cross a 4k page boundary:
-
-	     srcin2 % PAGE_SIZE > (PAGE_SIZE - 8)
-
-	     with PAGE_SIZE being 4096.  */
-	rldicl	r9,r4,0,52
-	cmpldi	cr7,r9,4088
-	ble	cr7,L(loop_unaligned)
-	b	L(check_source2_byte)
-
-	.align	4
-L(pagecross_ne):
-	extsw	r3,r9
-	mr	r9,r10
-L(pagecross_retdiff):
-	subf	r9,r9,r3
-	extsw	r3,r9
-	blr
-
-	.align	4
-L(pagecross_nullfound):
-	li	r3,0
-	b	L(pagecross_retdiff)
-END (STRCMP)
-libc_hidden_builtin_def (strcmp)
diff --git a/sysdeps/powerpc/powerpc64/power8/strcpy.S b/sysdeps/powerpc/powerpc64/power8/strcpy.S
deleted file mode 100644
index 7f2cee4b1b..0000000000
--- a/sysdeps/powerpc/powerpc64/power8/strcpy.S
+++ /dev/null
@@ -1,270 +0,0 @@
-/* Optimized strcpy/stpcpy implementation for PowerPC64/POWER8.
-   Copyright (C) 2015-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-
-#ifdef USE_AS_STPCPY
-# ifndef STPCPY
-#   define FUNC_NAME __stpcpy
-# else
-#   define FUNC_NAME STPCPY
-# endif
-#else
-# ifndef STRCPY
-#  define FUNC_NAME strcpy
-# else
-#  define FUNC_NAME STRCPY
-# endif
-#endif  /* !USE_AS_STPCPY  */
-
-/* Implements the function
-
-   char * [r3] strcpy (char *dest [r3], const char *src [r4])
-
-   or
-
-   char * [r3] stpcpy (char *dest [r3], const char *src [r4])
-
-   if USE_AS_STPCPY is defined.
-
-   The implementation uses unaligned doubleword access to avoid specialized
-   code paths depending of data alignment.  Although recent powerpc64 uses
-   64K as default, the page cross handling assumes minimum page size of
-   4k.  */
-
-	.machine  power7
-EALIGN (FUNC_NAME, 4, 0)
-        li      r0,0          /* Doubleword with null chars to use
-                                 with cmpb.  */
-
-	/* Check if the [src]+15 will cross a 4K page by checking if the bit
-	   indicating the page size changes.  Basically:
-
-	   uint64_t srcin = (uint64_t)src;
-	   uint64_t ob = srcin & 4096UL;
-	   uint64_t nb = (srcin+15UL) & 4096UL;
-	   if (ob ^ nb)
-	     goto pagecross;  */
-
-	addi	r9,r4,15
-	xor	r9,r9,r4
-	rlwinm.	r9,r9,0,19,19
-	bne	L(pagecross)
-
-	/* For short string (less than 16 bytes), just calculate its size as
-	   strlen and issues a memcpy if null is found.  */
-	mr	r7,r4
-        ld      r12,0(r7)     /* Load doubleword from memory.  */
-        cmpb    r10,r12,r0    /* Check for null bytes in DWORD1.  */
-        cmpdi   cr7,r10,0     /* If r10 == 0, no null's have been found.  */
-        bne     cr7,L(done)
-
-        ldu     r8,8(r7)
-        cmpb    r10,r8,r0
-        cmpdi   cr7,r10,0
-        bne     cr7,L(done)
-
-	b	L(loop_before)
-
-	.align	4
-L(pagecross):
-	clrrdi  r7,r4,3       /* Align the address to doubleword boundary.  */
-	rlwinm  r6,r4,3,26,28 /* Calculate padding.  */
-	li      r5,-1         /* MASK = 0xffffffffffffffff.  */
-        ld      r12,0(r7)     /* Load doubleword from memory.  */
-#ifdef __LITTLE_ENDIAN__
-        sld     r5,r5,r6
-#else
-        srd     r5,r5,r6      /* MASK = MASK >> padding.  */
-#endif
-        orc     r9,r12,r5     /* Mask bits that are not part of the string.  */
-        cmpb    r10,r9,r0     /* Check for null bytes in DWORD1.  */
-        cmpdi   cr7,r10,0     /* If r10 == 0, no null's have been found.  */
-        bne     cr7,L(done)
-
-        ldu     r6,8(r7)
-        cmpb    r10,r6,r0
-        cmpdi   cr7,r10,0
-        bne     cr7,L(done)
-
-        ld      r12,0(r7)
-        cmpb    r10,r12,r0
-        cmpdi   cr7,r10,0
-        bne     cr7,L(done)
-
-        ldu     r6,8(r7)
-        cmpb    r10,r6,r0
-        cmpdi   cr7,r10,0
-        bne     cr7,L(done)
-
-	/* We checked for 24 - x bytes, with x being the source alignment
-	   (0 <= x <= 16), and no zero has been found.  Start the loop
-	   copy with doubleword aligned address.  */
-	mr	r7,r4
-	ld	r12, 0(r7)
-	ldu	r8, 8(r7)
-
-L(loop_before):
-	/* Save the two doublewords readed from source and align the source
-	   to 16 bytes for the loop.  */
-	mr	r11,r3
-	std	r12,0(r11)
-	std	r8,8(r11)
-	addi	r11,r11,16
-	rldicl	r9,r4,0,60
-	subf	r7,r9,r7
-	subf	r11,r9,r11
-	b	L(loop_start)
-
-        .align  5
-L(loop):
-        std     r12, 0(r11)
-        std     r6, 8(r11)
-	addi	r11,r11,16
-L(loop_start):
-        /* Load two doublewords, compare and merge in a
-           single register for speed.  This is an attempt
-           to speed up the null-checking process for bigger strings.  */
-
-        ld      r12, 8(r7)
-        ldu     r6, 16(r7)
-        cmpb    r10,r12,r0
-        cmpb    r9,r6,r0
-        or      r8,r9,r10     /* Merge everything in one doubleword.  */
-        cmpdi   cr7,r8,0
-        beq     cr7,L(loop)
-
-
-        /* OK, one (or both) of the doublewords contains a null byte.  Check
-           the first doubleword and decrement the address in case the first
-           doubleword really contains a null byte.  */
-
-	addi	r4,r7,-8
-        cmpdi   cr6,r10,0
-        addi    r7,r7,-8
-        bne     cr6,L(done2)
-
-        /* The null byte must be in the second doubleword.  Adjust the address
-           again and move the result of cmpb to r10 so we can calculate the
-           length.  */
-
-        mr      r10,r9
-        addi    r7,r7,8
-	b	L(done2)
-
-        /* r10 has the output of the cmpb instruction, that is, it contains
-           0xff in the same position as the null byte in the original
-           doubleword from the string.  Use that to calculate the length.  */
-L(done):
-	mr	r11,r3
-L(done2):
-#ifdef __LITTLE_ENDIAN__
-        addi    r9, r10, -1   /* Form a mask from trailing zeros.  */
-        andc    r9, r9, r10
-        popcntd r6, r9        /* Count the bits in the mask.  */
-#else
-        cntlzd  r6,r10        /* Count leading zeros before the match.  */
-#endif
-        subf    r5,r4,r7
-        srdi    r6,r6,3       /* Convert leading/trailing zeros to bytes.  */
-        add     r8,r5,r6      /* Compute final length.  */
-#ifdef USE_AS_STPCPY
-	/* stpcpy returns the dest address plus the size not counting the
-	   final '\0'.  */
-	add	r3,r11,r8
-#endif
-	addi	r8,r8,1       /* Final '/0'.  */
-
-	cmpldi	cr6,r8,8
-	mtocrf	0x01,r8
-	ble	cr6,L(copy_LE_8)
-
-	cmpldi	cr1,r8,16
-	blt	cr1,8f
-
-	/* Handle copies of 0~31 bytes.  */
-	.align	4
-L(copy_LT_32):
-	/* At least 6 bytes to go.  */
-	blt	cr1,8f
-
-	/* Copy 16 bytes.  */
-	ld	r6,0(r4)
-	ld	r8,8(r4)
-	addi	r4,r4,16
-	std	r6,0(r11)
-	std	r8,8(r11)
-	addi	r11,r11,16
-8:	/* Copy 8 bytes.  */
-	bf	28,L(tail4)
-	ld	r6,0(r4)
-	addi	r4,r4,8
-	std	r6,0(r11)
-	addi	r11,r11,8
-
-	.align	4
-/* Copies 4~7 bytes.  */
-L(tail4):
-	bf	29,L(tail2)
-	lwz	r6,0(r4)
-	stw	r6,0(r11)
-	bf	30,L(tail5)
-	lhz	r7,4(r4)
-	sth	r7,4(r11)
-	bflr	31
-	lbz	r8,6(r4)
-	stb	r8,6(r11)
-	blr
-
-	.align	4
-/* Copies 2~3 bytes.  */
-L(tail2):
-	bf	30,1f
-	lhz	r6,0(r4)
-	sth	r6,0(r11)
-	bflr	31
-	lbz	r7,2(r4)
-	stb	r7,2(r11)
-	blr
-
-	.align	4
-L(tail5):
-	bf	31,1f
-	lbz	r6,4(r4)
-	stb	r6,4(r11)
-	blr
-
-	.align	4
-1:
-	bflr	31
-	lbz	r6,0(r4)
-	stb	r6,0(r11)
-	blr
-
-/* Handles copies of 0~8 bytes.  */
-	.align	4
-L(copy_LE_8):
-	bne	cr6,L(tail4)
-	ld	r6,0(r4)
-	std	r6,0(r11)
-	blr
-END (FUNC_NAME)
-
-#ifndef USE_AS_STPCPY
-libc_hidden_builtin_def (strcpy)
-#endif
diff --git a/sysdeps/powerpc/powerpc64/power8/strcspn.S b/sysdeps/powerpc/powerpc64/power8/strcspn.S
deleted file mode 100644
index c9a7a2e3c3..0000000000
--- a/sysdeps/powerpc/powerpc64/power8/strcspn.S
+++ /dev/null
@@ -1,20 +0,0 @@
-/* Optimized strcspn implementation for PowerPC64/POWER8.
-   Copyright (C) 2016-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#define USE_AS_STRCSPN 1
-#include <sysdeps/powerpc/powerpc64/power8/strspn.S>
diff --git a/sysdeps/powerpc/powerpc64/power8/strlen.S b/sysdeps/powerpc/powerpc64/power8/strlen.S
deleted file mode 100644
index 8f4a1fc1dc..0000000000
--- a/sysdeps/powerpc/powerpc64/power8/strlen.S
+++ /dev/null
@@ -1,301 +0,0 @@
-/* Optimized strlen implementation for PowerPC64/POWER8 using a vectorized
-   loop.
-   Copyright (C) 2016-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-
-/* TODO: change these to the actual instructions when the minimum required
-   binutils allows it.  */
-#define MFVRD(r,v)	.long (0x7c000067 | ((v)<<(32-11)) | ((r)<<(32-16)))
-#define VBPERMQ(t,a,b)	.long (0x1000054c \
-			       | ((t)<<(32-11))	\
-			       | ((a)<<(32-16))	\
-			       | ((b)<<(32-21)) )
-
-/* int [r3] strlen (char *s [r3])  */
-
-#ifndef STRLEN
-# define STRLEN strlen
-#endif
-
-/* TODO: change this to .machine power8 when the minimum required binutils
-   allows it.  */
-	.machine  power7
-EALIGN (STRLEN, 4, 0)
-	CALL_MCOUNT 1
-	dcbt	0,r3
-	clrrdi	r4,r3,3	      /* Align the address to doubleword boundary.  */
-	rlwinm	r6,r3,3,26,28 /* Calculate padding.  */
-	li	r0,0	      /* Doubleword with null chars to use
-				 with cmpb.  */
-	li	r5,-1	      /* MASK = 0xffffffffffffffff.  */
-	ld	r12,0(r4)     /* Load doubleword from memory.  */
-#ifdef __LITTLE_ENDIAN__
-	sld	r5,r5,r6
-#else
-	srd	r5,r5,r6      /* MASK = MASK >> padding.  */
-#endif
-	orc	r9,r12,r5     /* Mask bits that are not part of the string.  */
-	cmpb	r10,r9,r0     /* Check for null bytes in DWORD1.  */
-	cmpdi	cr7,r10,0     /* If r10 == 0, no null's have been found.  */
-	bne	cr7,L(done)
-
-	/* For shorter strings (< 64 bytes), we will not use vector registers,
-	   as the overhead isn't worth it.  So, let's use GPRs instead.  This
-	   will be done the same way as we do in the POWER7 implementation.
-	   Let's see if we are aligned to a quadword boundary.  If so, we can
-	   jump to the first (non-vectorized) loop.  Otherwise, we have to
-	   handle the next DWORD first.  */
-	mtcrf	0x01,r4
-	mr	r9,r4
-	addi	r9,r9,8
-	bt	28,L(align64)
-
-	/* Handle the next 8 bytes so we are aligned to a quadword
-	   boundary.  */
-	ldu	r5,8(r4)
-	cmpb	r10,r5,r0
-	cmpdi	cr7,r10,0
-	addi	r9,r9,8
-	bne	cr7,L(done)
-
-L(align64):
-	/* Proceed to the old (POWER7) implementation, checking two doublewords
-	   per iteraction.  For the first 56 bytes, we will just check for null
-	   characters.  After that, we will also check if we are 64-byte aligned
-	   so we can jump to the vectorized implementation.  We will unroll
-	   these loops to avoid excessive branching.  */
-	ld	r6,8(r4)
-	ldu	r5,16(r4)
-	cmpb	r10,r6,r0
-	cmpb	r11,r5,r0
-	or	r5,r10,r11
-	cmpdi	cr7,r5,0
-	addi	r9,r9,16
-	bne	cr7,L(dword_zero)
-
-	ld	r6,8(r4)
-	ldu	r5,16(r4)
-	cmpb	r10,r6,r0
-	cmpb	r11,r5,r0
-	or	r5,r10,r11
-	cmpdi	cr7,r5,0
-	addi	r9,r9,16
-	bne	cr7,L(dword_zero)
-
-	ld	r6,8(r4)
-	ldu	r5,16(r4)
-	cmpb	r10,r6,r0
-	cmpb	r11,r5,r0
-	or	r5,r10,r11
-	cmpdi	cr7,r5,0
-	addi	r9,r9,16
-	bne	cr7,L(dword_zero)
-
-	/* Are we 64-byte aligned? If so, jump to the vectorized loop.
-	   Note: aligning to 64-byte will necessarily slow down performance for
-	   strings around 64 bytes in length due to the extra comparisons
-	   required to check alignment for the vectorized loop.  This is a
-	   necessary tradeoff we are willing to take in order to speed up the
-	   calculation for larger strings.  */
-	andi.	r10,r9,63
-	beq	cr0,L(preloop)
-	ld	r6,8(r4)
-	ldu	r5,16(r4)
-	cmpb	r10,r6,r0
-	cmpb	r11,r5,r0
-	or	r5,r10,r11
-	cmpdi	cr7,r5,0
-	addi	r9,r9,16
-	bne	cr7,L(dword_zero)
-
-	andi.	r10,r9,63
-	beq	cr0,L(preloop)
-	ld	r6,8(r4)
-	ldu	r5,16(r4)
-	cmpb	r10,r6,r0
-	cmpb	r11,r5,r0
-	or	r5,r10,r11
-	cmpdi	cr7,r5,0
-	addi	r9,r9,16
-	bne	cr7,L(dword_zero)
-
-	andi.	r10,r9,63
-	beq	cr0,L(preloop)
-	ld	r6,8(r4)
-	ldu	r5,16(r4)
-	cmpb	r10,r6,r0
-	cmpb	r11,r5,r0
-	or	r5,r10,r11
-	cmpdi	cr7,r5,0
-	addi	r9,r9,16
-	bne	cr7,L(dword_zero)
-
-	andi.	r10,r9,63
-	beq	cr0,L(preloop)
-	ld	r6,8(r4)
-	ldu	r5,16(r4)
-	cmpb	r10,r6,r0
-	cmpb	r11,r5,r0
-	or	r5,r10,r11
-	cmpdi	cr7,r5,0
-	addi	r9,r9,16
-
-	/* At this point, we are necessarily 64-byte aligned.  If no zeroes were
-	   found, jump to the vectorized loop.  */
-	beq	cr7,L(preloop)
-
-L(dword_zero):
-	/* OK, one (or both) of the doublewords contains a null byte.  Check
-	   the first doubleword and decrement the address in case the first
-	   doubleword really contains a null byte.  */
-
-	cmpdi	cr6,r10,0
-	addi	r4,r4,-8
-	bne	cr6,L(done)
-
-	/* The null byte must be in the second doubleword.  Adjust the address
-	   again and move the result of cmpb to r10 so we can calculate the
-	   length.  */
-
-	mr	r10,r11
-	addi	r4,r4,8
-
-	/* If the null byte was found in the non-vectorized code, compute the
-	   final length.  r10 has the output of the cmpb instruction, that is,
-	   it contains 0xff in the same position as the null byte in the
-	   original doubleword from the string.  Use that to calculate the
-	   length.  */
-L(done):
-#ifdef __LITTLE_ENDIAN__
-	addi	r9, r10,-1    /* Form a mask from trailing zeros.  */
-	andc	r9, r9,r10
-	popcntd	r0, r9	      /* Count the bits in the mask.  */
-#else
-	cntlzd	r0,r10	      /* Count leading zeros before the match.  */
-#endif
-	subf	r5,r3,r4
-	srdi	r0,r0,3	      /* Convert leading/trailing zeros to bytes.  */
-	add	r3,r5,r0      /* Compute final length.  */
-	blr
-
-	/* Vectorized implementation starts here.  */
-	.p2align  4
-L(preloop):
-	/* Set up for the loop.  */
-	mr	r4,r9
-	li	r7, 16	      /* Load required offsets.  */
-	li	r8, 32
-	li	r9, 48
-	li	r12, 8
-	vxor	v0,v0,v0      /* VR with null chars to use with
-				 vcmpequb.  */
-
-	/* Main loop to look for the end of the string.  We will read in
-	   64-byte chunks.  Align it to 32 bytes and unroll it 3 times to
-	   leverage the icache performance.  */
-	.p2align  5
-L(loop):
-	lvx	  v1,r4,r0  /* Load 4 quadwords.  */
-	lvx	  v2,r4,r7
-	lvx	  v3,r4,r8
-	lvx	  v4,r4,r9
-	vminub	  v5,v1,v2  /* Compare and merge into one VR for speed.  */
-	vminub	  v6,v3,v4
-	vminub	  v7,v5,v6
-	vcmpequb. v7,v7,v0  /* Check for NULLs.  */
-	addi	  r4,r4,64  /* Adjust address for the next iteration.  */
-	bne	  cr6,L(vmx_zero)
-
-	lvx	  v1,r4,r0  /* Load 4 quadwords.  */
-	lvx	  v2,r4,r7
-	lvx	  v3,r4,r8
-	lvx	  v4,r4,r9
-	vminub	  v5,v1,v2  /* Compare and merge into one VR for speed.  */
-	vminub	  v6,v3,v4
-	vminub	  v7,v5,v6
-	vcmpequb. v7,v7,v0  /* Check for NULLs.  */
-	addi	  r4,r4,64  /* Adjust address for the next iteration.  */
-	bne	  cr6,L(vmx_zero)
-
-	lvx	  v1,r4,r0  /* Load 4 quadwords.  */
-	lvx	  v2,r4,r7
-	lvx	  v3,r4,r8
-	lvx	  v4,r4,r9
-	vminub	  v5,v1,v2  /* Compare and merge into one VR for speed.  */
-	vminub	  v6,v3,v4
-	vminub	  v7,v5,v6
-	vcmpequb. v7,v7,v0  /* Check for NULLs.  */
-	addi	  r4,r4,64  /* Adjust address for the next iteration.  */
-	beq	  cr6,L(loop)
-
-L(vmx_zero):
-	/* OK, we found a null byte.  Let's look for it in the current 64-byte
-	   block and mark it in its corresponding VR.  */
-	vcmpequb  v1,v1,v0
-	vcmpequb  v2,v2,v0
-	vcmpequb  v3,v3,v0
-	vcmpequb  v4,v4,v0
-
-	/* We will now 'compress' the result into a single doubleword, so it
-	   can be moved to a GPR for the final calculation.  First, we
-	   generate an appropriate mask for vbpermq, so we can permute bits into
-	   the first halfword.  */
-	vspltisb  v10,3
-	lvsl	  v11,r0,r0
-	vslb	  v10,v11,v10
-
-	/* Permute the first bit of each byte into bits 48-63.  */
-	VBPERMQ(v1,v1,v10)
-	VBPERMQ(v2,v2,v10)
-	VBPERMQ(v3,v3,v10)
-	VBPERMQ(v4,v4,v10)
-
-	/* Shift each component into its correct position for merging.  */
-#ifdef __LITTLE_ENDIAN__
-	vsldoi  v2,v2,v2,2
-	vsldoi  v3,v3,v3,4
-	vsldoi  v4,v4,v4,6
-#else
-	vsldoi	v1,v1,v1,6
-	vsldoi	v2,v2,v2,4
-	vsldoi	v3,v3,v3,2
-#endif
-
-	/* Merge the results and move to a GPR.  */
-	vor	v1,v2,v1
-	vor	v2,v3,v4
-	vor	v4,v1,v2
-	MFVRD(r10,v4)
-
-	 /* Adjust address to the begninning of the current 64-byte block.  */
-	addi	r4,r4,-64
-
-#ifdef __LITTLE_ENDIAN__
-	addi	r9, r10,-1    /* Form a mask from trailing zeros.  */
-	andc	r9, r9,r10
-	popcntd	r0, r9	      /* Count the bits in the mask.  */
-#else
-	cntlzd	r0,r10	      /* Count leading zeros before the match.  */
-#endif
-	subf	r5,r3,r4
-	add	r3,r5,r0      /* Compute final length.  */
-	blr
-
-END (STRLEN)
-libc_hidden_builtin_def (strlen)
diff --git a/sysdeps/powerpc/powerpc64/power8/strncase.S b/sysdeps/powerpc/powerpc64/power8/strncase.S
deleted file mode 100644
index 32e09e4d94..0000000000
--- a/sysdeps/powerpc/powerpc64/power8/strncase.S
+++ /dev/null
@@ -1,20 +0,0 @@
-/* Optimized strncasecmp implementation for POWER8.
-   Copyright (C) 2016-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#define USE_AS_STRNCASECMP 1
-#include <sysdeps/powerpc/powerpc64/power8/strcasecmp.S>
diff --git a/sysdeps/powerpc/powerpc64/power8/strncmp.S b/sysdeps/powerpc/powerpc64/power8/strncmp.S
deleted file mode 100644
index 3d8df90538..0000000000
--- a/sysdeps/powerpc/powerpc64/power8/strncmp.S
+++ /dev/null
@@ -1,327 +0,0 @@
-/* Optimized strncmp implementation for PowerPC64/POWER8.
-   Copyright (C) 2015-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-
-#ifndef STRNCMP
-# define STRNCMP strncmp
-#endif
-
-/* Implements the function
-
-   int [r3] strncmp (const char *s1 [r3], const char *s2 [r4], size_t [r5] n)
-
-   The implementation uses unaligned doubleword access to avoid specialized
-   code paths depending of data alignment.  Although recent powerpc64 uses
-   64K as default, the page cross handling assumes minimum page size of
-   4k.  */
-
-	.machine  power7
-EALIGN (STRNCMP, 4, 0)
-	/* Check if size is 0.  */
-	mr.	r10,r5
-	beq	cr0,L(ret0)
-
-	/* Check if [s1]+16 or [s2]+16 will cross a 4K page boundary using
-	   the code:
-
-	    (((size_t) s1) % PAGE_SIZE > (PAGE_SIZE - ITER_SIZE))
-
-	   with PAGE_SIZE being 4096 and ITER_SIZE begin 16.  */
-	rldicl	r8,r3,0,52
-	cmpldi	cr7,r8,4096-16
-	bgt	cr7,L(pagecross)
-	rldicl	r9,r4,0,52
-	cmpldi	cr7,r9,4096-16
-	bgt	cr7,L(pagecross)
-
-	/* For short string up to 16 bytes, load both s1 and s2 using
-	   unaligned dwords and compare.  */
-	ld	r7,0(r3)
-	ld	r9,0(r4)
-	li	r8,0
-	cmpb	r8,r7,r8
-	cmpb	r6,r7,r9
-	orc.	r8,r8,r6
-	bne	cr0,L(different1)
-
-	/* If the string compared are equal, but size is less or equal
-	   to 8, return 0.  */
-	cmpldi	cr7,r10,8
-	li	r9,0
-	ble	cr7,L(ret1)
-	addi	r5,r10,-8
-
-	ld	r7,8(r3)
-	ld	r9,8(r4)
-	cmpb	r8,r7,r8
-	cmpb	r6,r7,r9
-	orc.	r8,r8,r6
-	bne	cr0,L(different0)
-
-	cmpldi	cr7,r5,8
-	mr	r9,r8
-	ble	cr7,L(ret1)
-
-	/* Update pointers and size.  */
-	addi	r10,r10,-16
-	addi	r3,r3,16
-	addi	r4,r4,16
-
-	/* Now it has checked for first 16 bytes, align source1 to doubleword
-	   and adjust source2 address.  */
-L(align_8b):
-	rldicl	r5,r3,0,61
-	rldicr	r3,r3,0,60
-	subf	r4,r5,r4
-	add	r10,r10,r5
-
-	/* At this point, source1 alignment is 0 and source2 alignment is
-	   between 0 and 7.  Check is source2 alignment is 0, meaning both
-	   sources have the same alignment.  */
-	andi.	r8,r4,0x7
-	beq	cr0,L(loop_eq_align_0)
-
-	li	r5,0
-	b	L(loop_ne_align_1)
-
-	/* If source2 is unaligned to doubleword, the code needs to check
-	   on each interation if the unaligned doubleword access will cross
-	   a 4k page boundary.  */
-	.align 4
-L(loop_ne_align_0):
-	ld	r7,0(r3)
-	ld	r9,0(r4)
-	cmpb	r8,r7,r5
-	cmpb	r6,r7,r9
-	orc.	r8,r8,r6
-	bne	cr0,L(different1)
-
-	cmpldi	cr7,r10,8
-	ble	cr7,L(ret0)
-	addi	r10,r10,-8
-	addi	r3,r3,8
-	addi	r4,r4,8
-L(loop_ne_align_1):
-	rldicl	r9,r4,0,52
-	cmpldi	r7,r9,4088
-	ble	cr7,L(loop_ne_align_0)
-	cmpdi	cr7,r10,0
-	beq	cr7,L(ret0)
-
-	lbz	r9,0(r3)
-	lbz	r8,0(r4)
-	cmplw	cr7,r9,r8
-	bne	cr7,L(byte_ne_4)
-	cmpdi	cr7,r9,0
-	beq	cr7,L(size_reached_0)
-
-	li	r9,r7
-	addi	r8,r3,1
-	mtctr	r9
-	addi	r4,r4,1
-	addi	r10,r10,-1
-	addi	r3,r3,8
-
-	/* The unaligned read of source2 will cross a 4K page boundary,
-	   and the different byte or NULL maybe be in the remaining page
-	   bytes.  Since it can not use the unaligned load the algorithm
-	   reads and compares 8 bytes to keep source1 doubleword aligned.  */
-	.align 4
-L(loop_ne_align_byte):
-	cmpdi	cr7,r10,0
-	addi	r10,r10,-1
-	beq	cr7,L(ret0)
-	lbz	r9,0(r8)
-	lbz	r7,0(r4)
-	addi	r8,r8,1
-	addi	r4,r4,1
-	cmplw	cr7,r9,r7
-	cmpdi	cr5,r9,0
-	bne	cr7,L(size_reached_2)
-	beq	cr5,L(size_reached_0)
-	bdnz	L(loop_ne_align_byte)
-
-	cmpdi	cr7,r10,0
-	bne+	cr7,L(loop_ne_align_0)
-
-	.align 4
-L(ret0):
-	li	r9,0
-L(ret1):
-	mr	r3,r9
-	blr
-
-	/* The code now check if r8 and r10 are different by issuing a
-	   cmpb and shift the result based on its output:
-
-	#ifdef __LITTLE_ENDIAN__
-	  leadzero = (__builtin_ffsl (z1) - 1);
-	  leadzero = leadzero > (n-1)*8 ? (n-1)*8 : leadzero;
-	  r1 = (r1 >> leadzero) & 0xFFUL;
-	  r2 = (r2 >> leadzero) & 0xFFUL;
-	#else
-	  leadzero = __builtin_clzl (z1);
-	  leadzero = leadzero > (n-1)*8 ? (n-1)*8 : leadzero;
-	  r1 = (r1 >> (56 - leadzero)) & 0xFFUL;
-	  r2 = (r2 >> (56 - leadzero)) & 0xFFUL;
-	#endif
-	  return r1 - r2;  */
-
-	.align 4
-L(different0):
-	mr	r10,r5
-#ifdef __LITTLE_ENDIAN__
-L(different1):
-        neg	r11,r8
-        sldi	r10,r10,3
-        and	r8,r11,r8
-        addi	r10,r10,-8
-        cntlzd	r8,r8
-        subfic	r8,r8,63
-        extsw 	r8,r8
-        cmpld	cr7,r8,r10
-        ble	cr7,L(different2)
-        mr	r8,r10
-L(different2):
-        extsw	r8,r8
-#else
-L(different1):
-	addi	r10,r10,-1
-	cntlzd	r8,r8
-	sldi	r10,r10,3
-	cmpld	cr7,r8,r10
-	blt	cr7,L(different2)
-	mr	r8,r10
-L(different2):
-	subfic	r8,r8,56
-#endif
-	srd	r7,r7,r8
-	srd	r9,r9,r8
-	rldicl	r3,r7,0,56
-	rldicl	r9,r9,0,56
-	subf	r9,r9,3
-	extsw	r9,r9
-	mr	r3,r9
-	blr
-
-	/* If unaligned 16 bytes reads across a 4K page boundary, it uses
-	   a simple byte a byte comparison until the page alignment for s1
-	   is reached.  */
-	.align 4
-L(pagecross):
-	lbz	r7,0(r3)
-	lbz	r9,0(r4)
-	subfic	r8,r8,4095
-	cmplw	cr7,r9,r7
-	bne	cr7,L(byte_ne_3)
-	cmpdi	cr7,r9,0
-	beq	cr7,L(byte_ne_0)
-	addi	r10,r10,-1
-	subf	r7,r8,r10
-	subf	r9,r7,r10
-	addi	r9,r9,1
-	mtctr	r9
-	b	L(pagecross_loop1)
-
-	.align 4
-L(pagecross_loop0):
-	beq	cr7,L(ret0)
-	lbz	r9,0(r3)
-	lbz	r8,0(r4)
-	addi	r10,r10,-1
-	cmplw	cr7,r9,r8
-	cmpdi	cr5,r9,0
-	bne	r7,L(byte_ne_2)
-	beq	r5,L(byte_ne_0)
-L(pagecross_loop1):
-	cmpdi	cr7,r10,0
-	addi	r3,r3,1
-	addi	r4,r4,1
-	bdnz	L(pagecross_loop0)
-	cmpdi	cr7,r7,0
-	li	r9,0
-	bne+	cr7,L(align_8b)
-	b	L(ret1)
-
-	/* If both source1 and source2 are doubleword aligned, there is no
-	   need for page boundary cross checks.  */
-	.align 4
-L(loop_eq_align_0):
-	ld	r7,0(r3)
-	ld	r9,0(r4)
-	cmpb	r8,r7,r8
-	cmpb	r6,r7,r9
-	orc.	r8,r8,r6
-	bne	cr0,L(different1)
-
-	cmpldi	cr7,r10,8
-	ble	cr7,L(ret0)
-	addi	r9,r10,-9
-
-	li	r5,0
-	srdi	r9,r9,3
-	addi	r9,r9,1
-	mtctr	r9
-	b	L(loop_eq_align_2)
-
-	.align 4
-L(loop_eq_align_1):
-	bdz	L(ret0)
-L(loop_eq_align_2):
-	ldu	r7,8(r3)
-	addi	r10,r10,-8
-	ldu	r9,8(r4)
-	cmpb	r8,r7,r5
-	cmpb	r6,r7,r9
-	orc.	r8,r8,r6
-	beq	cr0,L(loop_eq_align_1)
-	b	L(different1)
-
-	.align 4
-L(byte_ne_0):
-	li	r7,0
-L(byte_ne_1):
-	subf	r9,r9,r7
-	extsw	r9,r9
-	b	L(ret1)
-
-	.align 4
-L(byte_ne_2):
-	extsw	r7,r9
-	mr	r9,r8
-	b	L(byte_ne_1)
-L(size_reached_0):
-	li	r10,0
-L(size_reached_1):
-	subf	r9,r9,r10
-	extsw	r9,r9
-	b	L(ret1)
-L(size_reached_2):
-	extsw	r10,r9
-	mr	r9,r7
-	b	L(size_reached_1)
-L(byte_ne_3):
-	extsw	r7,r7
-	b	L(byte_ne_1)
-L(byte_ne_4):
-	extsw	r10,r9
-	mr	r9,r8
-	b	L(size_reached_1)
-END(STRNCMP)
-libc_hidden_builtin_def(strncmp)
diff --git a/sysdeps/powerpc/powerpc64/power8/strncpy.S b/sysdeps/powerpc/powerpc64/power8/strncpy.S
deleted file mode 100644
index 6d40f30ff7..0000000000
--- a/sysdeps/powerpc/powerpc64/power8/strncpy.S
+++ /dev/null
@@ -1,465 +0,0 @@
-/* Optimized strncpy/stpncpy implementation for PowerPC64/POWER8.
-   Copyright (C) 2015-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-
-#ifdef USE_AS_STPNCPY
-# ifndef STPNCPY
-#   define FUNC_NAME __stpncpy
-# else
-#   define FUNC_NAME STPNCPY
-# endif
-#else
-# ifndef STRNCPY
-#  define FUNC_NAME strncpy
-# else
-#  define FUNC_NAME STRNCPY
-# endif
-#endif  /* !USE_AS_STPNCPY  */
-
-#ifndef MEMSET
-/* For builds without IFUNC support, local calls should be made to internal
-   GLIBC symbol (created by libc_hidden_builtin_def).  */
-# ifdef SHARED
-#  define MEMSET   __GI_memset
-# else
-#  define MEMSET   memset
-# endif
-#endif
-
-#define FRAMESIZE (FRAME_MIN_SIZE+48)
-
-/* Implements the function
-
-   char * [r3] strncpy (char *dest [r3], const char *src [r4], size_t n [r5])
-
-   or
-
-   char * [r3] stpncpy (char *dest [r3], const char *src [r4], size_t n [r5])
-
-   if USE_AS_STPCPY is defined.
-
-   The implementation uses unaligned doubleword access to avoid specialized
-   code paths depending of data alignment.  Although recent powerpc64 uses
-   64K as default, the page cross handling assumes minimum page size of
-   4k.  */
-
-	.machine  power7
-EALIGN (FUNC_NAME, 4, 0)
-
-        /* Check if the [src]+15 will cross a 4K page by checking if the bit
-           indicating the page size changes.  Basically:
-
-           uint64_t srcin = (uint64_t)src;
-           uint64_t ob = srcin & 4096UL;
-           uint64_t nb = (srcin+15UL) & 4096UL;
-           if (ob ^ nb)
-             goto pagecross;  */
-
-	addi	r10,r4,16
-	rlwinm	r9,r4,0,19,19
-
-	/* Save some non-volatile registers on the stack.  */
-	std	r26,-48(r1)
-	std	r27,-40(r1)
-
-	rlwinm	r8,r10,0,19,19
-
-	std	r28,-32(r1)
-	std	r29,-24(r1)
-
-	cmpld	cr7,r9,r8
-
-	std	r30,-16(r1)
-	std	r31,-8(r1)
-
-	/* Update CFI.  */
-	cfi_offset(r26, -48)
-	cfi_offset(r27, -40)
-	cfi_offset(r28, -32)
-	cfi_offset(r29, -24)
-	cfi_offset(r30, -16)
-	cfi_offset(r31, -8)
-
-	beq	cr7,L(unaligned_lt_16)
-	rldicl	r9,r4,0,61
-	subfic	r8,r9,8
-	cmpld	cr7,r5,r8
-	bgt 	cr7,L(pagecross)
-
-	/* At this points there is 1 to 15 bytes to check and write.  Since it could
-	   be either from first unaligned 16 bytes access or from bulk copy, the code
-	   uses an unrolled byte read/write instead of trying to analyze the cmpb
-	   results.  */
-L(short_path):
-	mr	r9,r3
-L(short_path_1):
-	/* Return if there are no more bytes to be written.  */
-	cmpdi	cr7,r5,0
-	beq	cr7,L(short_path_loop_end_1)
-L(short_path_2):
-	/* Copy one char from src (r4) and write it to dest (r9).  If it is the
-	   end-of-string, start the null padding.  Continue, otherwise.  */
-	lbz	r10,0(r4)
-	cmpdi	cr7,r10,0
-	stb	r10,0(r9)
-	beq	cr7,L(zero_pad_start_1)
-	/* If there are no more bytes to be written, return.  */
-	cmpdi	cr0,r5,1
-	addi	r8,r9,1
-	addi	r6,r5,-1
-	beq	cr0,L(short_path_loop_end_0)
-	/* Copy another char from src (r4) to dest (r9).  Check again if it is
-	   the end-of-string.  If so, start the null padding.  */
-	lbz	r10,1(r4)
-	cmpdi	cr7,r10,0
-	stb	r10,1(r9)
-	beq	cr7,L(zero_pad_start_prepare_1)
-	/* Eagerly decrement r5 by 3, which is the number of bytes already
-	   written, plus one write that will be performed later on.  */
-	addi	r10,r5,-3
-	b	L(short_path_loop_1)
-
-	.align	4
-L(short_path_loop):
-	/* At this point, the induction variable, r5, as well as the pointers
-	   to dest and src (r9 and r4, respectivelly) have been updated.
-
-	   Note: The registers r7 and r10 are induction variables derived from
-	   r5.  They are used to determine if the total number of writes has
-	   been reached at every other write.
-
-	   Copy one char from src (r4) and write it to dest (r9).  If it is the
-	   end-of-string, start the null padding.  Continue, otherwise.  */
-	lbz	r8,0(r4)
-	addi	r7,r10,-2
-	cmpdi	cr5,r8,0
-	stb	r8,0(r9)
-	beq	cr5,L(zero_pad_start_1)
-	beq	cr7,L(short_path_loop_end_0)
-	/* Copy another char from src (r4) to dest (r9).  Check again if it is
-	   the end-of-string.  If so, start the null padding.  */
-	lbz	r8,1(r4)
-	cmpdi	cr7,r8,0
-	stb	r8,1(r9)
-	beq	cr7,L(zero_pad_start)
-	mr	r10,r7
-L(short_path_loop_1):
-	/* This block is reached after two chars have been already written to
-	   dest.  Nevertheless, r5 (the induction variable), r9 (the pointer to
-	   dest), and r4 (the pointer to src) have not yet been updated.
-
-	   At this point:
-	     r5 holds the count of bytes yet to be written plus 2.
-	     r9 points to the last two chars that were already written to dest.
-	     r4 points to the last two chars that were already copied from src.
-
-	   The algorithm continues by decrementing r5, the induction variable,
-	   so that it reflects the last two writes.  The pointers to dest (r9)
-	   and to src (r4) are increment by two, for the same reason.
-
-	   Note: Register r10 is another induction variable, derived from r5,
-	   which determines if the total number of writes has been reached.  */
-	addic.	r5,r5,-2
-	addi	r9,r9,2
-	cmpdi	cr7,r10,0 /* Eagerly check if the next write is the last.  */
-	addi	r4,r4,2
-	addi	r6,r9,1
-	bne	cr0,L(short_path_loop) /* Check if the total number of writes
-					  has been reached at every other
-					  write.  */
-#ifdef USE_AS_STPNCPY
-	mr	r3,r9
-	b	L(short_path_loop_end)
-#endif
-
-L(short_path_loop_end_0):
-#ifdef USE_AS_STPNCPY
-	addi	r3,r9,1
-	b	L(short_path_loop_end)
-#endif
-L(short_path_loop_end_1):
-#ifdef USE_AS_STPNCPY
-	mr	r3,r9
-#endif
-L(short_path_loop_end):
-	/* Restore non-volatile registers.  */
-	ld	r26,-48(r1)
-	ld	r27,-40(r1)
-	ld	r28,-32(r1)
-	ld	r29,-24(r1)
-	ld	r30,-16(r1)
-	ld	r31,-8(r1)
-	blr
-
-	/* This code pads the remainder of dest with NULL bytes.  The algorithm
-	   calculates the remaining size and calls memset.  */
-	.align	4
-L(zero_pad_start):
-	mr	r5,r10
-	mr	r9,r6
-L(zero_pad_start_1):
-	/* At this point:
-	     - r5 holds the number of bytes that still have to be written to
-	       dest.
-	     - r9 points to the position, in dest, where the first null byte
-	       will be written.
-	   The above statements are true both when control reaches this label
-	   from a branch or when falling through the previous lines.  */
-#ifndef USE_AS_STPNCPY
-	mr	r30,r3       /* Save the return value of strncpy.  */
-#endif
-	/* Prepare the call to memset.  */
-	mr	r3,r9        /* Pointer to the area to be zero-filled.  */
-	li	r4,0         /* Byte to be written (zero).  */
-
-	/* We delayed the creation of the stack frame, as well as the saving of
-	   the link register, because only at this point, we are sure that
-	   doing so is actually needed.  */
-
-	/* Save the link register.  */
-	mflr	r0
-	std	r0,16(r1)
-	cfi_offset(lr, 16)
-
-	/* Create the stack frame.  */
-	stdu	r1,-FRAMESIZE(r1)
-	cfi_adjust_cfa_offset(FRAMESIZE)
-
-	bl	MEMSET
-	nop
-
-	/* Restore the stack frame.  */
-	addi	r1,r1,FRAMESIZE
-	cfi_adjust_cfa_offset(-FRAMESIZE)
-	/* Restore the link register.  */
-	ld	r0,16(r1)
-	mtlr	r0
-
-#ifndef USE_AS_STPNCPY
-	mr	r3,r30       /* Restore the return value of strncpy, i.e.:
-				dest.  For stpncpy, the return value is the
-				same as return value of memset.  */
-#endif
-
-	/* Restore non-volatile registers and return.  */
-	ld	r26,-48(r1)
-	ld	r27,-40(r1)
-	ld	r28,-32(r1)
-	ld	r29,-24(r1)
-	ld	r30,-16(r1)
-	ld	r31,-8(r1)
-	blr
-
-	/* The common case where [src]+16 will not cross a 4K page boundary.
-	   In this case the code fast check the first 16 bytes by using doubleword
-	   read/compares and update destiny if neither total size or null byte
-	   is found in destiny. */
-	.align	4
-L(unaligned_lt_16):
-	cmpldi	cr7,r5,7
-	ble	cr7,L(short_path)
-	ld	r7,0(r4)
-	li	r8,0
-	cmpb	r8,r7,r8
-	cmpdi	cr7,r8,0
-	bne	cr7,L(short_path_prepare_2)
-	addi	r6,r5,-8
-	std	r7,0(r3)
-	addi	r9,r3,8
-	cmpldi	cr7,r6,7
-	addi	r7,r4,8
-	ble	cr7,L(short_path_prepare_1_1)
-	ld	r4,8(r4)
-	cmpb	r8,r4,r8
-	cmpdi	cr7,r8,0
-	bne	cr7,L(short_path_prepare_2_1)
-	std	r4,8(r3)
-	addi	r29,r3,16
-	addi	r5,r5,-16
-	/* Neither the null byte was found or total length was reached,
-	   align to 16 bytes and issue a bulk copy/compare.  */
-	b	L(align_to_16b)
-
-	/* In the case of 4k page boundary cross, the algorithm first align
-	   the address to a doubleword, calculate a mask based on alignment
-	   to ignore the bytes and continue using doubleword.  */
-	.align	4
-L(pagecross):
-	rldicr	r11,r4,0,59	/* Align the address to 8 bytes boundary.  */
-	li	r6,-1		/* MASK = 0xffffffffffffffffUL.  */
-	sldi	r9,r9,3		/* Calculate padding.  */
-	ld	r7,0(r11)	/* Load doubleword from memory.  */
-#ifdef __LITTLE_ENDIAN__
-	sld	r9,r6,r9	/* MASK = MASK << padding.  */
-#else
-	srd	r9,r6,r9	/* MASK = MASK >> padding.  */
-#endif
-	orc	r9,r7,r9	/* Mask bits that are not part of the
-				   string.  */
-	li	r7,0
-	cmpb	r9,r9,r7	/* Check for null bytes in DWORD1.  */
-	cmpdi	cr7,r9,0
-	bne	cr7,L(short_path_prepare_2)
-	subf	r8,r8,r5	/* Adjust total length.  */
-	cmpldi	cr7,r8,8	/* Check if length was reached.  */
-	ble	cr7,L(short_path_prepare_2)
-
-	/* For next checks we have aligned address, so we check for more
-	   three doublewords to make sure we can read 16 unaligned bytes
-	   to start the bulk copy with 16 aligned addresses.  */
-	ld	r7,8(r11)
-	cmpb	r9,r7,r9
-	cmpdi	cr7,r9,0
-	bne	cr7,L(short_path_prepare_2)
-	addi	r7,r8,-8
-	cmpldi	cr7,r7,8
-	ble	cr7,L(short_path_prepare_2)
-	ld	r7,16(r11)
-	cmpb	r9,r7,r9
-	cmpdi	cr7,r9,0
-	bne	cr7,L(short_path_prepare_2)
-	addi	r8,r8,-16
-	cmpldi	cr7,r8,8
-	ble	cr7,L(short_path_prepare_2)
-	ld	r8,24(r11)
-	cmpb	r9,r8,r9
-	cmpdi	cr7,r9,0
-	bne	cr7,L(short_path_prepare_2)
-
-	/* No null byte found in the 32 bytes readed and length not reached,
-	   read source again using unaligned loads and store them.  */
-	ld	r9,0(r4)
-	addi	r29,r3,16
-	addi	r5,r5,-16
-	std	r9,0(r3)
-	ld	r9,8(r4)
-	std	r9,8(r3)
-
-	/* Align source to 16 bytes and adjust destiny and size.  */
-L(align_to_16b):
-	rldicl	r9,r10,0,60
-	rldicr	r28,r10,0,59
-	add	r12,r5,r9
-	subf	r29,r9,r29
-
-	/* The bulk read/compare/copy loads two doublewords, compare and merge
-	   in a single register for speed.  This is an attempt to speed up the
-	   null-checking process for bigger strings.  */
-
-	cmpldi	cr7,r12,15
-	ble	cr7,L(short_path_prepare_1_2)
-
-	/* Main loop for large sizes, unrolled 2 times to get better use of
-	   pipeline.  */
-	ld	r8,0(28)
-	ld	r10,8(28)
-	li	r9,0
-	cmpb	r7,r8,r9
-	cmpb	r9,r10,r9
-	or.	r6,r9,r7
-	bne	cr0,L(short_path_prepare_2_3)
-	addi	r5,r12,-16
-	addi	r4,r28,16
-	std	r8,0(r29)
-	std	r10,8(r29)
-	cmpldi	cr7,r5,15
-	addi	r9,r29,16
-	ble	cr7,L(short_path_1)
-	mr	r11,r28
-	mr	r6,r29
-	li	r30,0
-	subfic	r26,r4,48
-	subfic	r27,r9,48
-
-	b	L(loop_16b)
-
-	.align	4
-L(loop_start):
-	ld	r31,0(r11)
-	ld	r10,8(r11)
-	cmpb	r0,r31,r7
-	cmpb	r8,r10,r7
-	or.	r7,r0,r8
-	addi	r5,r5,-32
-	cmpldi	cr7,r5,15
-	add	r4,r4,r26
-	add	r9,r9,r27
-	bne	cr0,L(short_path_prepare_2_2)
-	add	r4,r28,r4
-	std	r31,0(r6)
-	add	r9,r29,r9
-	std	r10,8(r6)
-	ble	cr7,L(short_path_1)
-
-L(loop_16b):
-	ld	r10,16(r11)
-	ld	r0,24(r11)
-	cmpb	r8,r10,r30
-	cmpb	r7,r0,r30
-	or.	r7,r8,r7
-	addi	r12,r12,-32
-	cmpldi	cr7,r12,15
-	addi	r11,r11,32
-	bne	cr0,L(short_path_2)
-	std	r10,16(r6)
-	addi	r6,r6,32
-	std	r0,-8(r6)
-	bgt	cr7,L(loop_start)
-
-	mr	r5,r12
-	mr	r4,r11
-	mr	r9,r6
-	b	L(short_path_1)
-
-	.align	4
-L(short_path_prepare_1_1):
-	mr	r5,r6
-	mr	r4,r7
-	b	L(short_path_1)
-L(short_path_prepare_1_2):
-	mr	r5,r12
-	mr	r4,r28
-	mr	r9,r29
-	b	L(short_path_1)
-L(short_path_prepare_2):
-	mr	r9,r3
-	b	L(short_path_2)
-L(short_path_prepare_2_1):
-	mr	r5,r6
-	mr	r4,r7
-	b	L(short_path_2)
-L(short_path_prepare_2_2):
-	mr	r5,r12
-	mr	r4,r11
-	mr	r9,r6
-	b	L(short_path_2)
-L(short_path_prepare_2_3):
-	mr	r5,r12
-	mr	r4,r28
-	mr	r9,r29
-	b	L(short_path_2)
-L(zero_pad_start_prepare_1):
-	mr	r5,r6
-	mr	r9,r8
-	b	L(zero_pad_start_1)
-END (FUNC_NAME)
-
-#ifndef USE_AS_STPNCPY
-libc_hidden_builtin_def (strncpy)
-#endif
diff --git a/sysdeps/powerpc/powerpc64/power8/strnlen.S b/sysdeps/powerpc/powerpc64/power8/strnlen.S
deleted file mode 100644
index 3eadbfb09e..0000000000
--- a/sysdeps/powerpc/powerpc64/power8/strnlen.S
+++ /dev/null
@@ -1,433 +0,0 @@
-/* Optimized strnlen implementation for POWER8 using a vmx loop.
-
-   Copyright (C) 2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-/* It is implemented the following heuristic:
-	1. Case maxlen <= 32: align the pointer to 8 bytes to loop through
-	reading doublewords. Uses the POWER7 algorithm.
-	2. Case maxlen > 32: check for null bytes in the first 16 bytes using
-	unaligned accesses. Return length if found. Otherwise:
-		2.1 Case maxlen < 64: deduct the bytes previously read, align
-		the pointer to 16 bytes and loop through reading quadwords
-		until find null bytes or reach maxlen.
-		2.2 Case maxlen > 64: deduct the bytes previously read, align
-		the pointer to 64 bytes and set up a counter to loop through
-		reading in strides of 64 bytes. In case it finished the loop
-		with null bytes not found, process the remainder bytes by
-		switching to the loop to heuristic in 2.1.  */
-
-#include <sysdep.h>
-
-/* Define default page size to 4KB.  */
-#define PAGE_SIZE 4096
-
-/* The following macros implement Power ISA v2.07 opcodes
-   that could not be used directly into this code to the keep
-   compatibility with older binutils versions.  */
-
-/* Move from vector register doubleword.  */
-#define MFVRD(r,v) .long (0x7c000067 | ((v)<<(32-11)) | ((r)<<(32-16)))
-
-/* Move to vector register doubleword.  */
-#define MTVRD(v,r) .long (0x7c000167 | ((v)<<(32-11)) | ((r)<<(32-16)))
-
-/* Vector Bit Permute Quadword.  */
-#define VBPERMQ(t,a,b)	.long (0x1000054c	\
-			       | ((t)<<(32-11))	\
-			       | ((a)<<(32-16))	\
-			       | ((b)<<(32-21)) )
-
-/* Vector Population Count Halfword.  */
-#define VPOPCNTH(t,b) .long (0x10000743 | ((t)<<(32-11)) | ((b)<<(32-21)))
-
-/* Vector Count Leading Zeros Halfword.  */
-#define VCLZH(t,b) .long (0x10000742 | ((t)<<(32-11)) | ((b)<<(32-21)))
-
-
-/* int [r3] strnlen (char *s [r3], size_t maxlen [r4])  */
-/* TODO: change to power8 when minimum required binutils allows it.  */
-	.machine  power7
-ENTRY (__strnlen)
-	CALL_MCOUNT 2
-	dcbt	0,r3
-
-	cmpldi	r4,32           /* Check if maxlen <= 32.  */
-	ble	L(small_range)  /* If maxlen <= 32.  */
-
-	/* Upcoming 16 bytes unaligned accesses cannot cross the page boundary
-	   otherwise the processor throws an memory access error.
-	   Use following code to check there is room for such as accesses:
-	     (((size_t) s) % PAGE_SIZE > (PAGE_SIZE - 16)
-	   If it is disallowed then switch to the code that handles
-	   the string when maxlen <= 32.  */
-	clrldi	r10,r3,52
-	cmpldi  cr7,r10,PAGE_SIZE-16
-	bgt     cr7,L(small_range)	/* If less than 16B of page end.  */
-
-	/* Compute our permute constant r8.  */
-	li	r7,0
-	/* Compute a bpermd constant to move bit 0 of each word into
-	   a halfword value, and count trailing zeros.  */
-#ifdef __LITTLE_ENDIAN__
-	li	r8,0x2820
-	oris	r8,r8,0x3830
-	sldi	r8,r8,32
-	ori	r8,r8,0x0800
-	oris	r8,r8,0x1810
-#else
-	li	r8,0x1018
-	oris	r8,r8,0x0008
-	sldi	r8,r8,32
-	ori	r8,r8,0x3038
-	oris	r8,r8,0x2028
-#endif
-
-	/* maxlen > 32. Optimistically check for null bytes in the first
-	   16 bytes of the string using unaligned accesses.  */
-	ld	r5,0(r3)
-	ld	r6,8(r3)
-	cmpb	r10,r7,r5		/* Check for null bytes in DWORD1.  */
-	cmpb	r11,r7,r6		/* Check for null bytes in DWORD2.  */
-	or.	r7,r10,r11
-	bne	cr0, L(early_find)	/* If found null bytes.  */
-
-	/* At this point maxlen > 32 and null bytes were not found at first
-	   16 bytes. Prepare for loop using VMX.  */
-
-	/* r3 == s, r4 == maxlen. All other volatile regs are unused now.  */
-
-	addi	r5,r3,16	/* Align up, or just add the 16B we
-				   already checked.  */
-	li	r0,15
-	and	r7,r5,r0	/* Find offset into 16B alignment.  */
-	andc	r5,r5,r0	/* Quadword align up s to the next quadword.  */
-	li	r0,16
-	subf	r0,r7,r0
-	subf	r4,r0,r4	/* Deduct unaligned bytes from maxlen.  */
-
-
-	/* Compute offsets for vmx loads, and precompute the vbpermq
-	   constants for both the 64B and 16B loops.  */
-	li	r6,0
-	vspltisb  v0,0
-	vspltisb  v10,3
-	lvsl	  v11,r6,r6
-	vslb	  v10,v11,v10
-
-	cmpldi  r4,64		/* Check maxlen < 64.  */
-	blt	L(smaller)	/* If maxlen < 64 */
-
-	/* In order to begin the 64B loop, it needs to be 64
-	   bytes aligned. So read quadwords until it is aligned or found null
-	   bytes. At worst case it will be aligned after the fourth iteration,
-	   so unroll the loop to avoid counter checking.  */
-	andi.   r7,r5,63		/* Check if is 64 bytes aligned.  */
-	beq     cr0,L(preloop_64B)	/* If it is already 64B aligned.  */
-	lvx     v1,r5,r6
-	vcmpequb.       v1,v1,v0
-	addi    r5,r5,16
-	addi    r4,r4,-16		/* Decrement maxlen in 16 bytes. */
-	bne     cr6,L(found_aligning64B) /* If found null bytes.  */
-
-	/* Unroll 3x above code block until aligned or find null bytes.  */
-	andi.   r7,r5,63
-	beq     cr0,L(preloop_64B)
-	lvx     v1,r5,r6
-	vcmpequb.      v1,v1,v0
-	addi    r5,r5,16
-	addi    r4,r4,-16
-	bne     cr6,L(found_aligning64B)
-
-	andi.   r7,r5,63
-	beq     cr0,L(preloop_64B)
-	lvx     v1,r5,r6
-	vcmpequb.      v1,v1,v0
-	addi    r5,r5,16
-	addi    r4,r4,-16
-	bne     cr6,L(found_aligning64B)
-
-	andi.   r7,r5,63
-	beq     cr0,L(preloop_64B)
-	lvx     v1,r5,r6
-	vcmpequb.      v1,v1,v0
-	addi    r5,r5,16
-	addi    r4,r4,-16
-	bne     cr6,L(found_aligning64B)
-
-	/* At this point it should be 16 bytes aligned.
-	   Prepare for the 64B loop.  */
-	.p2align 4
-L(preloop_64B):
-	/* Check if maxlen became is less than 64, therefore disallowing the
-	   64B loop. If it happened switch to the 16B loop code.  */
-	cmpldi  r4,64		/* Check if maxlen < 64.  */
-	blt     L(smaller)	/* If maxlen < 64.  */
-	/* Set some constant values.  */
-	li      r7,16
-	li      r10,32
-	li      r9,48
-
-	/* Compute the number of 64 bytes iterations needed.  */
-	srdi	r11,r4,6	/* Compute loop count (maxlen / 64).  */
-	andi.	r4,r4,63	/* Set maxlen the remainder (maxlen % 64).  */
-	mtctr	r11		/* Move loop count to counter register.  */
-
-	/* Handle maxlen > 64. Loop over the bytes in strides of 64B.  */
-	.p2align 4
-L(loop_64B):
-	lvx	v1,r5,r6	/* r5 is the pointer to s.  */
-	lvx	v2,r5,r7
-	lvx	v3,r5,r10
-	lvx	v4,r5,r9
-	/* Compare the four 16B vectors to obtain the least 16 values.
-	   Null bytes should emerge into v7, then check for null bytes.  */
-	vminub	v5,v1,v2
-	vminub	v6,v3,v4
-	vminub	v7,v5,v6
-	vcmpequb. v7,v7,v0		/* Check for null bytes.  */
-	addi	r5,r5,64		/* Add pointer to next iteraction.  */
-	bne	cr6,L(found_64B)	/* If found null bytes.  */
-	bdnz	L(loop_64B)		/* Continue the loop if count > 0. */
-
-/* Hit loop end without null match. So branch to handle the remainder.  */
-
-	/* Prepare a 16B loop to handle two cases:
-		1. If 32 > maxlen < 64.
-		2. If maxlen >= 64, and reached end of the 64B loop with null
-		bytes not found. Thus handle the remainder bytes here. */
-	.p2align 4
-L(smaller):
-        cmpldi  r4,0            /* Check maxlen is zero.  */
-        beq     L(done)         /* If maxlen is zero.  */
-
-	/* Place rounded up number of qw's to check into a vmx
-	   register, and use some vector tricks to minimize
-	   branching.  */
-        MTVRD(v7,r4)            /* Copy maxlen from GPR to vector register. */
-        vspltisb v5,1
-        vspltisb v6,15
-        vspltb   v2,v7,7
-        vaddubs  v3,v5,v6
-
-#ifdef __LITTLE_ENDIAN__
-	vspltish v5,1           /* Compute 16 in each byte.  */
-#endif
-
-	/* Loop in 16B aligned incremements now. */
-	.p2align 4
-L(loop_16B):
-	lvx     v1,r5,r6        /* Load quadword into vector register.  */
-	addi    r5,r5,16        /* Increment address to next 16B block.  */
-	vor     v7,v2,v2        /* Save loop count (v2) into v7. */
-	vsububs v2,v2,v3        /* Subtract 16B from count, saturate at 0. */
-	vminub  v4,v1,v2
-	vcmpequb. v4,v4,v0      /* Checking for null bytes.  */
-	beq     cr6,L(loop_16B) /* If null bytes not found.  */
-
-	vcmpequb  v1,v1,v0
-	VBPERMQ(v1,v1,v10)
-#ifdef __LITTLE_ENDIAN__
-	vsubuhm  v2,v1,v5       /* Form a mask of trailing zeros.  */
-	vandc    v2,v2,v1
-	VPOPCNTH(v1,v2)         /* Count of trailing zeros, 16 if none.  */
-#else
-	VCLZH(v1,v1)            /* Count the leading zeros, 16 if none.  */
-#endif
-	/* Truncate to maximum allowable offset.  */
-	vcmpgtub v2,v1,v7       /* Compare and truncate for matches beyond
-				   maxlen.  */
-	vsel     v1,v1,v7,v2    /* 0-16 is now in byte 7.  */
-
-	MFVRD(r0,v1)
-	addi    r5,r5,-16       /* Undo speculative bump.  */
-	extsb   r0,r0           /* Clear whatever gunk is in the high 56b.  */
-	add     r5,r5,r0        /* Add the offset of whatever was found.  */
-L(done):
-	subf    r3,r3,r5        /* Length is equal to the offset of null byte
-				   matched minus the pointer to s.  */
-	blr                     /* Done.  */
-
-	/* Handle case of maxlen > 64 and found null bytes in last block
-	   of 64 bytes read.  */
-	.p2align 4
-L(found_64B):
-	/* A zero was found. Reduce the result.  */
-	vcmpequb  v1,v1,v0
-	vcmpequb  v2,v2,v0
-	vcmpequb  v3,v3,v0
-	vcmpequb  v4,v4,v0
-
-	/* Permute the first bit of each byte into bits 48-63.  */
-	VBPERMQ(v1,v1,v10)
-	VBPERMQ(v2,v2,v10)
-	VBPERMQ(v3,v3,v10)
-	VBPERMQ(v4,v4,v10)
-
-	/* Shift each component into its correct position for merging.  */
-#ifdef __LITTLE_ENDIAN__
-	vsldoi	v2,v2,v2,2
-	vsldoi	v3,v3,v3,4
-	vsldoi	v4,v4,v4,6
-#else
-	vsldoi	v1,v1,v1,6
-	vsldoi	v2,v2,v2,4
-	vsldoi	v3,v3,v3,2
-#endif
-
-	/* Merge the results and move to a GPR.  */
-	vor	v1,v2,v1
-	vor	v2,v3,v4
-	vor	v4,v1,v2
-
-	/* Adjust address to the start of the current 64B block.  */
-	addi	r5,r5,-64
-
-	MFVRD(r10,v4)
-#ifdef __LITTLE_ENDIAN__
-	addi	r9,r10,-1	/* Form a mask from trailing zeros.  */
-	andc	r9,r9,r10
-	popcntd	r0,r9		/* Count the bits in the mask.  */
-#else
-	cntlzd	r0,r10		/* Count leading zeros before the match.  */
-#endif
-	subf	r5,r3,r5
-	add	r3,r5,r0	/* Compute final length.  */
-	blr                     /* Done.  */
-
-	/* Handle case where null bytes were found while aligning
-	   as a preparation for the 64B loop.  */
-	.p2align 4
-L(found_aligning64B):
-	VBPERMQ(v1,v1,v10)
-#ifdef __LITTLE_ENDIAN__
-	MFVRD(r10,v1)
-	addi    r9,r10,-1       /* Form a mask from trailing zeros.  */
-	andc    r9,r9,r10
-	popcntd r0,r9           /* Count the bits in the mask.  */
-#else
-	vsldoi  v1,v1,v1,6
-	MFVRD(r10,v1)
-	cntlzd  r0,r10          /* Count leading zeros before the match.  */
-#endif
-	addi    r5,r5,-16	/* Adjust address to offset of last 16 bytes
-				   read.  */
-	/* Calculate length as subtracted the pointer to s of last 16 bytes
-	   offset, added with the bytes before the match.  */
-	subf    r5,r3,r5
-	add     r3,r5,r0
-	blr			/* Done.  */
-
-	/* Handle case of maxlen > 32 and found a null bytes within the first
-	   16 bytes of s.  */
-	.p2align 4
-L(early_find):
-	bpermd	r5,r8,r10        /* r8 contains the bit permute constants.  */
-	bpermd	r6,r8,r11
-	sldi	r5,r5,8
-	or	r5,r5,r6	/* r5 should hold a 16B mask of
-				   a potential 0.  */
-	cntlzd	r5,r5		/* Count leading zeros.  */
-	addi	r3,r5,-48	/* Deduct the 48 leading zeros always
-				   present.  */
-	blr			/* Done.  */
-
-	/* Handle case of maxlen <= 32. Use the POWER7 algorithm.  */
-	.p2align 4
-L(small_range):
-	clrrdi	r8,r3,3  	/* Align the pointer to 8B.  */
-	li	r0,0
-	/* Register's content at this point:
-	   r3 == pointer to s, r4 == maxlen, r8 == pointer to s aligned to 8B,
-	   r7 == last acceptable address. */
-	cmpldi	r4,0                 /* Check if maxlen is zero.  */
-	beq	L(end_max)	     /* If maxlen is zero.  */
-
-	/* Calculate the last acceptable address and check for possible
-	   addition overflow by using satured math:
-	   r7 = r3 + r4
-	   r7 |= -(r7 < x)  */
-	add     r7,r3,r4
-	subfc   r6,r3,r7
-	subfe   r9,r9,r9
-	extsw   r6,r9
-	or      r7,r7,r6
-	addi    r7,r7,-1
-
-	clrrdi	r7,r7,3              /* Align to 8B address of last
-					acceptable address.  */
-
-	rlwinm	r6,r3,3,26,28        /* Calculate padding.  */
-	ld	r12,0(r8)            /* Load aligned doubleword.  */
-	cmpb	r10,r12,r0           /* Check for null bytes. */
-#ifdef __LITTLE_ENDIAN__
-	srd	r10,r10,r6
-	sld	r10,r10,r6
-#else
-	sld	r10,r10,r6
-	srd	r10,r10,r6
-#endif /* __LITTLE_ENDIAN__  */
-	cmpldi	cr7,r10,0
-	bne	cr7,L(done_small)    /* If found null byte.  */
-
-	cmpld	r8,r7                /* Check if reached maxlen.  */
-	beq	L(end_max)	     /* If reached maxlen.  */
-
-	/* Still handling case of maxlen <= 32. Read doubleword aligned until
-	   find null bytes or reach maxlen.  */
-	.p2align 4
-L(loop_small):
-	ldu	r12,8(r8)         /* Load next doubleword and update r8.  */
-	cmpb	r10,r12,r0        /* Check for null bytes.  */
-	cmpldi	cr6,r10,0
-	bne	cr6,L(done_small) /* If found null bytes.  */
-	cmpld	r8,r7             /* Check if reached maxlen. */
-	bne	L(loop_small)	  /* If it has more bytes to read.  */
-	mr	r3,r4             /* Reached maxlen with null bytes not found.
-				     Length is equal to maxlen.  */
-	blr			  /* Done.  */
-
-	/* Still handling case of maxlen <= 32. Found null bytes.
-	   Registers: r10 == match bits within doubleword, r8 == address of
-	   last doubleword read, r3 == pointer to s, r4 == maxlen.  */
-	.p2align 4
-L(done_small):
-#ifdef __LITTLE_ENDIAN__
-	/* Count trailing zeros.  */
-	addi	r0,r10,-1
-	andc	r0,r0,r10
-	popcntd	r0,r0
-#else
-	cntlzd	r0,r10	      /* Count leading zeros before the match.  */
-#endif
-	sub	r3,r8,r3      /* Calculate total of bytes before the match.  */
-	srdi	r0,r0,3	      /* Convert leading/trailing zeros to bytes.  */
-	add	r3,r3,r0      /* Length until the match.  */
-	cmpld	r3,r4         /* Check length is greater than maxlen.  */
-	blelr
-	mr	r3,r4	      /* If length is greater than maxlen, return
-				 maxlen.  */
-	blr
-
-	/* Handle case of reached maxlen with null bytes not found.  */
-	.p2align 4
-L(end_max):
-	mr	r3,r4	/* Length is equal to maxlen.  */
-	blr		/* Done.  */
-
-
-END (__strnlen)
-libc_hidden_def (__strnlen)
-weak_alias (__strnlen, strnlen)
-libc_hidden_def (strnlen)
diff --git a/sysdeps/powerpc/powerpc64/power8/strrchr.S b/sysdeps/powerpc/powerpc64/power8/strrchr.S
deleted file mode 100644
index 8eb74853c3..0000000000
--- a/sysdeps/powerpc/powerpc64/power8/strrchr.S
+++ /dev/null
@@ -1,464 +0,0 @@
-/* Optimized strrchr implementation for PowerPC64/POWER7 using cmpb insn.
-   Copyright (C) 2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-
-/* char *[r3] strrchr (char *s [r3], int c [r4])  */
-/* TODO: change these to the actual instructions when the minimum required
-   binutils allows it.  */
-#define MTVRD(v,r) .long (0x7c000167 | ((v)<<(32-11)) | ((r)<<(32-16)))
-#define MFVRD(r,v) .long (0x7c000067 | ((v)<<(32-11)) | ((r)<<(32-16)))
-#define VBPERMQ(t,a,b)  .long (0x1000054c \
-				| ((t)<<(32-11)) \
-				| ((a)<<(32-16)) \
-				| ((b)<<(32-21)) )
-#define VCLZD(r,v) .long (0x100007c2 | ((r)<<(32-11)) | ((v)<<(32-21)))
-#define VPOPCNTD(r,v) .long (0x100007c3 | ((r)<<(32-11)) | ((v)<<(32-21)))
-#define VADDUQM(t,a,b)  .long (0x10000100 \
-				| ((t)<<(32-11)) \
-				| ((a)<<(32-16)) \
-				| ((b)<<(32-21)) )
-#ifdef __LITTLE_ENDIAN__
-/* Find the match position from v6 and place result in r6.  */
-# define CALCULATE_MATCH() \
-	VBPERMQ(v6, v6, v10); \
-	vsldoi	v6, v6, v6, 6; \
-	MFVRD(r7, v6); \
-	cntlzd	r6, r7; \
-	subfic	r6, r6, 15;
-/*
- * Find the first null position to mask bytes after null.
- * (reg): vcmpequb result: v2 for 1st qw v3 for 2nd qw.
- * Result placed at v2.
- */
-# define FIND_NULL_POS(reg) \
-	vspltisb	v11, -1; \
-	VADDUQM(v11, reg, v11); \
-	vandc	v11, v11, reg; \
-	VPOPCNTD(v2, v11); \
-	vspltb	v11, v2, 15; \
-	vcmpequb.	v11, v11, v9; \
-	blt	cr6, 1f; \
-	vsldoi	v9, v0, v9, 1; \
-	vslo	v2, v2, v9; \
-1: \
-	vsumsws	v2, v2, v0;
-#else
-# define CALCULATE_MATCH() \
-	VBPERMQ(v6, v6, v10); \
-	MFVRD(r7, v6); \
-	addi	r6, r7, -1; \
-	andc	r6, r6, r7; \
-	popcntd	r6, r6; \
-	subfic	r6, r6, 15;
-# define FIND_NULL_POS(reg) \
-	VCLZD(v2, reg); \
-	vspltb	v11, v2, 7; \
-	vcmpequb.	v11, v11, v9; \
-	blt	cr6, 1f; \
-	vsldoi	v9, v0, v9, 1; \
-	vsro	v2, v2, v9; \
-1: \
-	vsumsws	v2, v2, v0;
-#endif	/* !__LITTLE_ENDIAN__  */
-	.machine  power7
-ENTRY (strrchr)
-	CALL_MCOUNT 2
-	dcbt	0,r3
-	clrrdi	r8,r3,3	      /* Align the address to doubleword boundary.  */
-	cmpdi	cr7,r4,0
-	ld	r12,0(r8)     /* Load doubleword from memory.  */
-	li	r9,0	      /* Used to store last occurence.  */
-	li	r0,0	      /* Doubleword with null chars to use
-				 with cmpb.  */
-
-	rlwinm	r6,r3,3,26,28 /* Calculate padding.  */
-
-	beq	cr7,L(null_match)
-
-	/* Replicate byte to doubleword.  */
-	insrdi	r4,r4,8,48
-	insrdi	r4,r4,16,32
-	insrdi	r4,r4,32,0
-
-	/* r4 is changed now.  If it's passed more chars, then
-	   check for null again.  */
-	cmpdi	cr7,r4,0
-	beq	cr7,L(null_match)
-	/* Now r4 has a doubleword of c bytes and r0 has
-	   a doubleword of null bytes.  */
-
-	cmpb	r10,r12,r4     /* Compare each byte against c byte.  */
-	cmpb	r11,r12,r0     /* Compare each byte against null byte.  */
-
-	/* Move the doublewords left and right to discard the bits that are
-	   not part of the string and bring them back as zeros.  */
-#ifdef __LITTLE_ENDIAN__
-	srd	r10,r10,r6
-	srd	r11,r11,r6
-	sld	r10,r10,r6
-	sld	r11,r11,r6
-#else
-	sld	r10,r10,r6
-	sld	r11,r11,r6
-	srd	r10,r10,r6
-	srd	r11,r11,r6
-#endif
-	or	r5,r10,r11    /* OR the results to speed things up.  */
-	cmpdi	cr7,r5,0      /* If r5 == 0, no c or null bytes
-				 have been found.  */
-	bne	cr7,L(done)
-
-L(align):
-	andi.	r12, r8, 15
-
-	/* Are we now aligned to a doubleword boundary?  If so, skip to
-	   the main loop.  Otherwise, go through the alignment code.  */
-
-	bne	cr0, L(loop)
-
-	/* Handle WORD2 of pair.  */
-	ldu	r12,8(r8)
-	cmpb	r10,r12,r4
-	cmpb	r11,r12,r0
-	or	r5,r10,r11
-	cmpdi	cr7,r5,0
-	bne	cr7,L(done)
-	b	L(loop)	      /* We branch here (rather than falling through)
-				 to skip the nops due to heavy alignment
-				 of the loop below.  */
-	.p2align  5
-L(loop):
-	/* Load two doublewords, compare and merge in a
-	   single register for speed.  This is an attempt
-	   to speed up the null-checking process for bigger strings.  */
-	ld	r12,8(r8)
-	ldu	r7,16(r8)
-	cmpb	r10,r12,r4
-	cmpb	r11,r12,r0
-	cmpb	r6,r7,r4
-	cmpb	r7,r7,r0
-	or	r12,r10,r11
-	or	r5,r6,r7
-	or	r5,r12,r5
-	cmpdi	cr7,r5,0
-	beq	cr7,L(vector)
-
-	/* OK, one (or both) of the doublewords contains a c/null byte.  Check
-	   the first doubleword and decrement the address in case the first
-	   doubleword really contains a c/null byte.  */
-	cmpdi	cr6,r12,0
-	addi	r8,r8,-8
-	bne	cr6,L(done)
-
-	/* The c/null byte must be in the second doubleword.  Adjust the
-	   address again and move the result of cmpb to r10 so we can calculate
-	   the pointer.  */
-
-	mr	r10,r6
-	mr	r11,r7
-	addi	r8,r8,8
-
-	/* r10/r11 have the output of the cmpb instructions, that is,
-	   0xff in the same position as the c/null byte in the original
-	   doubleword from the string.  Use that to calculate the pointer.  */
-
-L(done):
-	/* If there are more than one 0xff in r11, find the first position of
-	   0xff in r11 and fill r10 with 0 from that position.  */
-	cmpdi	cr7,r11,0
-	beq	cr7,L(no_null)
-#ifdef __LITTLE_ENDIAN__
-	addi	r3,r11,-1
-	andc	r3,r3,r11
-	popcntd r0,r3
-#else
-	cntlzd	r0,r11
-#endif
-	subfic	r0,r0,63
-	li	r6,-1
-#ifdef __LITTLE_ENDIAN__
-	srd	r0,r6,r0
-#else
-	sld	r0,r6,r0
-#endif
-	and	r10,r0,r10
-L(no_null):
-#ifdef __LITTLE_ENDIAN__
-	cntlzd	r0,r10		/* Count leading zeros before c matches.  */
-	addi	r3,r10,-1
-	andc	r3,r3,r10
-	addi	r10,r11,-1
-	andc	r10,r10,r11
-	cmpld	cr7,r3,r10
-	bgt	cr7,L(no_match)
-#else
-	addi	r3,r10,-1	/* Count trailing zeros before c matches.  */
-	andc	r3,r3,r10
-	popcntd	r0,r3
-	cmpld	cr7,r11,r10
-	bgt	cr7,L(no_match)
-#endif
-	srdi	r0,r0,3		/* Convert trailing zeros to bytes.  */
-	subfic	r0,r0,7
-	add	r9,r8,r0      /* Return address of the matching c byte
-				 or null in case c was not found.  */
-	li	r0,0
-	cmpdi	cr7,r11,0     /* If r11 == 0, no null's have been found.  */
-	beq	cr7,L(align)
-
-	.align	4
-L(no_match):
-	mr	r3,r9
-	blr
-
-/* Check the first 32B in GPR's and move to vectorized loop.  */
-	.p2align  5
-L(vector):
-	addi	r3, r8, 8
-	/* Make sure 32B aligned.  */
-	andi.	r10, r3, 31
-	bne	cr0, L(loop)
-	vspltisb	v0, 0
-	/* Precompute vbpermq constant.  */
-	vspltisb	v10, 3
-	lvsl	v11, r0, r0
-	vslb	v10, v11, v10
-	MTVRD(v1, r4)
-	li	r5, 16
-	vspltb	v1, v1, 7
-	/* Compare 32 bytes in each loop.  */
-L(continue):
-	lvx	v4, 0, r3
-	lvx	v5, r3, r5
-	vcmpequb	v2, v0, v4
-	vcmpequb	v3, v0, v5
-	vcmpequb	v6, v1, v4
-	vcmpequb	v7, v1, v5
-	vor	v8, v2, v3
-	vor	v9, v6, v7
-	vor	v11, v8, v9
-	vcmpequb.	v11, v0, v11
-	addi	r3, r3, 32
-	blt	cr6, L(continue)
-	vcmpequb.	v8, v0, v8
-	blt	cr6, L(match)
-
-	/* One (or both) of the quadwords contains c/null.  */
-	vspltisb	v8, 2
-	vspltisb	v9, 5
-	/* Precompute values used for comparison.  */
-	vsl	v9, v8, v9	/* v9 = 0x4040404040404040.  */
-	vaddubm	v8, v9, v9
-	vsldoi	v8, v0, v8, 1	/* v8 = 0x80.  */
-
-	/* Check if null is in second qw.  */
-	vcmpequb.	v11, v0, v2
-	blt	cr6, L(secondqw)
-
-	/* Null found in first qw.  */
-	addi	r8, r3, -32
-	/* Calculate the null position.  */
-	FIND_NULL_POS(v2)
-	/* Check if null is in the first byte.  */
-	vcmpequb.	v11, v0, v2
-	blt	cr6, L(no_match)
-	vsububm	v2, v8, v2
-	/* Mask unwanted bytes after null.  */
-#ifdef __LITTLE_ENDIAN__
-	vslo	v6, v6, v2
-	vsro	v6, v6, v2
-#else
-	vsro	v6, v6, v2
-	vslo	v6, v6, v2
-#endif
-	vcmpequb.	v11, v0, v6
-	blt	cr6, L(no_match)
-	/* Found a match before null.  */
-	CALCULATE_MATCH()
-	add	r3, r8, r6
-	blr
-
-L(secondqw):
-	addi	r8, r3, -16
-	FIND_NULL_POS(v3)
-	vcmpequb.	v11, v0, v2
-	blt	cr6, L(no_match1)
-	vsububm	v2, v8, v2
-	/* Mask unwanted bytes after null.  */
-#ifdef __LITTLE_ENDIAN__
-	vslo	v7, v7, v2
-	vsro	v7, v7, v2
-#else
-	vsro	v7, v7, v2
-	vslo	v7, v7, v2
-#endif
-	vcmpequb.	v11, v0, v7
-	blt	cr6, L(no_match1)
-	addi	r8, r8, 16
-	vor	v6, v0, v7
-L(no_match1):
-	addi	r8, r8, -16
-	vcmpequb.	v11, v0, v6
-	blt	cr6, L(no_match)
-	/* Found a match before null.  */
-	CALCULATE_MATCH()
-	add	r3, r8, r6
-	blr
-
-L(match):
-	/* One (or both) of the quadwords contains a match.  */
-	mr	r8, r3
-	vcmpequb.	v8, v0, v7
-	blt	cr6, L(firstqw)
-	/* Match found in second qw.  */
-	addi	r8, r8, 16
-	vor	v6, v0, v7
-L(firstqw):
-	addi	r8, r8, -32
-	CALCULATE_MATCH()
-	add	r9, r8, r6      /* Compute final length.  */
-	b	L(continue)
-/* We are here because strrchr was called with a null byte.  */
-	.align	4
-L(null_match):
-	/* r0 has a doubleword of null bytes.  */
-
-	cmpb	r5,r12,r0     /* Compare each byte against null bytes.  */
-
-	/* Move the doublewords left and right to discard the bits that are
-	   not part of the string and bring them back as zeros.  */
-#ifdef __LITTLE_ENDIAN__
-	srd	r5,r5,r6
-	sld	r5,r5,r6
-#else
-	sld	r5,r5,r6
-	srd	r5,r5,r6
-#endif
-	cmpdi	cr7,r5,0      /* If r5 == 0, no c or null bytes
-				 have been found.  */
-	bne	cr7,L(done_null)
-
-	andi.	r12, r8, 15
-
-	/* Are we now aligned to a quadword boundary?  If so, skip to
-	   the main loop.  Otherwise, go through the alignment code.  */
-
-	bne	cr0, L(loop_null)
-
-	/* Handle WORD2 of pair.  */
-	ldu	r12,8(r8)
-	cmpb	r5,r12,r0
-	cmpdi	cr7,r5,0
-	bne	cr7,L(done_null)
-	b	L(loop_null)  /* We branch here (rather than falling through)
-				 to skip the nops due to heavy alignment
-				 of the loop below.  */
-
-	/* Main loop to look for the end of the string.  Since it's a
-	   small loop (< 8 instructions), align it to 32-bytes.  */
-	.p2align  5
-L(loop_null):
-	/* Load two doublewords, compare and merge in a
-	   single register for speed.  This is an attempt
-	   to speed up the null-checking process for bigger strings.  */
-	ld	r12,8(r8)
-	ldu	r11,16(r8)
-	cmpb	r5,r12,r0
-	cmpb	r10,r11,r0
-	or	r6,r5,r10
-	cmpdi	cr7,r6,0
-	beq	cr7,L(vector1)
-
-	/* OK, one (or both) of the doublewords contains a null byte.  Check
-	   the first doubleword and decrement the address in case the first
-	   doubleword really contains a null byte.  */
-
-	cmpdi	cr6,r5,0
-	addi	r8,r8,-8
-	bne	cr6,L(done_null)
-
-	/* The null byte must be in the second doubleword.  Adjust the address
-	   again and move the result of cmpb to r10 so we can calculate the
-	   pointer.  */
-
-	mr	r5,r10
-	addi	r8,r8,8
-
-	/* r5 has the output of the cmpb instruction, that is, it contains
-	   0xff in the same position as the null byte in the original
-	   doubleword from the string.  Use that to calculate the pointer.  */
-L(done_null):
-#ifdef __LITTLE_ENDIAN__
-	addi	r0,r5,-1
-	andc	r0,r0,r5
-	popcntd	r0,r0
-#else
-	cntlzd	r0,r5	      /* Count leading zeros before the match.  */
-#endif
-	srdi	r0,r0,3	      /* Convert trailing zeros to bytes.  */
-	add	r3,r8,r0      /* Return address of the matching null byte.  */
-	blr
-/* Check the first 32B in GPR's and move to vectorized loop.  */
-	.p2align  5
-L(vector1):
-	addi	r3, r8, 8
-	/* Make sure 32B aligned.  */
-	andi.	r10, r3, 31
-	bne	cr0, L(loop_null)
-	vspltisb	v0, 0
-	/* Precompute vbpermq constant.  */
-	vspltisb	v10, 3
-	lvsl	v11, r0, r0
-	vslb	v10, v11, v10
-	li	r5, 16
-	/* Compare 32 bytes in each loop.  */
-L(continue1):
-	lvx	v4, 0, r3
-	lvx	v5, r3, r5
-	vcmpequb	v2, v0, v4
-	vcmpequb	v3, v0, v5
-	vor	v8, v2, v3
-	vcmpequb.	v11, v0, v8
-	addi	r3, r3, 32
-	blt	cr6, L(continue1)
-	addi	r3, r3, -32
-	VBPERMQ(v2, v2, v10)
-	VBPERMQ(v3, v3, v10)
-	/* Shift each component into its correct position for merging.  */
-#ifdef __LITTLE_ENDIAN__
-	vsldoi	v3, v3, v3, 2
-#else
-	vsldoi	v2, v2, v2, 6
-	vsldoi	v3, v3, v3, 4
-#endif
-	/* Merge the results and move to a GPR.  */
-	vor	v4, v3, v2
-	MFVRD(r5, v4)
-#ifdef __LITTLE_ENDIAN__
-	addi	r6, r5, -1
-	andc	r6, r6, r5
-	popcntd	r6, r6
-#else
-	cntlzd	r6, r5  /* Count leading zeros before the match.  */
-#endif
-	add	r3, r3, r6      /* Compute final length.  */
-	blr
-END (strrchr)
-weak_alias (strrchr, rindex)
-libc_hidden_builtin_def (strrchr)
diff --git a/sysdeps/powerpc/powerpc64/power8/strspn.S b/sysdeps/powerpc/powerpc64/power8/strspn.S
deleted file mode 100644
index e9271898f2..0000000000
--- a/sysdeps/powerpc/powerpc64/power8/strspn.S
+++ /dev/null
@@ -1,202 +0,0 @@
-/* Optimized strspn implementation for Power8.
-
-   Copyright (C) 2016-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-/* size_t [r3] strspn (const char *string [r3],
-                       const char *needleAccept [r4])  */
-
-/* This takes a novel approach by computing a 256 bit mask whereby
-   each set bit implies the byte is "accepted".  P8 vector hardware
-   has extremely efficient hardware for selecting bits from a mask.
-
-   One might ask "why not use bpermd for short strings"?  It is
-   so slow that its performance about matches the generic PPC64
-   variant without any fancy masking, with the added expense of
-   making the mask.  That was the first variant of this.  */
-
-
-
-#include "sysdep.h"
-
-#ifndef USE_AS_STRCSPN
-#  define USE_AS_STRCSPN 0
-#  ifndef STRSPN
-#    define STRSPN strspn
-#  endif
-#  define INITIAL_MASK 0
-#  define UPDATE_MASK(RA, RS, RB) or	RA, RS, RB
-#else
-#  ifndef STRSPN
-#    define STRSPN strcspn
-#  endif
-#  define INITIAL_MASK -1
-#  define UPDATE_MASK(RA, RS, RB) andc	RA, RS, RB
-#endif
-
-/* Simple macro to use VSX instructions in overlapping VR's.  */
-#define XXVR(insn, vrt, vra, vrb) \
-	insn 32+vrt, 32+vra, 32+vrb
-
-/* ISA 2.07B instructions are not all defined for older binutils.
-   Macros are defined below for these newer instructions in order
-   to maintain compatibility.  */
-
-/* Note, TX/SX is always set as VMX regs are the high 32 VSX regs.  */
-#define MTVRD(v,r) .long (0x7c000167 | ((v)<<(32-11)) | ((r)<<(32-16)))
-#define MFVRD(r,v) .long (0x7c000067 | ((v)<<(32-11)) | ((r)<<(32-16)))
-
-#define VBPERMQ(t,a,b) .long (0x1000054c \
-			      | ((t)<<(32-11))	\
-			      | ((a)<<(32-16))	\
-			      | ((b)<<(32-21)) )
-
-	/* This can be updated to power8 once the minimum version of
-	   binutils supports power8 and the above instructions.  */
-	.machine power7
-EALIGN(STRSPN, 4, 0)
-	CALL_MCOUNT 2
-
-	/* Generate useful constants for later on.  */
-	vspltisb v1, 7
-	vspltisb v2, -1
-	vslb	v1, v1, v1	/* 0x80 to swap high bit for vbpermq.  */
-	vspltisb v10, 0
-	vsldoi	v4, v10, v2, 2	/* 0xFFFF into vr4.  */
-	XXVR(xxmrgld, v4, v4, v10) /* Mask for checking matches.  */
-
-	/* Prepare to compute 256b mask.  */
-	addi	r4, r4, -1
-	li	r5, INITIAL_MASK
-	li	r6, INITIAL_MASK
-	li	r7, INITIAL_MASK
-	li	r8, INITIAL_MASK
-
-#if USE_AS_STRCSPN
-	/* Ensure the null character never matches by clearing ISA bit 0 in
-	   in r5 which is the bit which will check for it in the later usage
-	   of vbpermq.  */
-	srdi	r5, r5, 1
-#endif
-
-	li	r11, 1
-	sldi	r11, r11, 63
-
-	/* Start interleaved Mask computation.
-	   This will eventually or 1's into ignored bits from vbpermq.  */
-	lvsr	v11, 0, r3
-	vspltb  v11, v11, 0	/* Splat shift constant.  */
-
-	/* Build a 256b mask in r5-r8.  */
-	.align 4
-L(next_needle):
-	lbzu	r9, 1(r4)
-
-	cmpldi	cr0, r9, 0
-	cmpldi	cr1, r9, 128
-
-	/* This is a little tricky.  srd only uses the first 7 bits,
-	   and if bit 7 is set, value is always 0.  So, we can
-	   effectively shift 128b in this case.  */
-	xori	r12, r9,  0x40	/* Invert bit 6.  */
-	srd	r10, r11, r9	/* Mask for bits 0-63.  */
-	srd	r12, r11, r12	/* Mask for bits 64-127.  */
-
-	beq	cr0, L(start_cmp)
-
-	/* Now, or the value into the correct GPR.  */
-	bge cr1,L(needle_gt128)
-	UPDATE_MASK (r5, r5, r10)	/* 0 - 63.  */
-	UPDATE_MASK (r6, r6, r12)	/* 64 - 127.  */
-	b L(next_needle)
-
-	.align 4
-L(needle_gt128):
-	UPDATE_MASK (r7, r7, r10)	/* 128 - 191.  */
-	UPDATE_MASK (r8, r8, r12)	/* 192 - 255.  */
-	b L(next_needle)
-
-
-	.align 4
-L(start_cmp):
-	/* Move and merge bitmap into 2 VRs.  bpermd is slower on P8.  */
-	mr	r0, r3		/* Save r3 for final length computation.  */
-	MTVRD (v5, r5)
-	MTVRD (v6, r6)
-	MTVRD (v7, r7)
-	MTVRD (v8, r8)
-
-	/* Continue interleaved mask generation.  */
-#ifdef __LITTLE_ENDIAN__
-	vsrw	v11, v2, v11	/* Note, shift ignores higher order bits.  */
-	vsplth  v11, v11, 0	/* Only care about the high 16 bits of v10.  */
-#else
-	vslw	v11, v2, v11	/* Note, shift ignores higher order bits.  */
-	vsplth  v11, v11, 1	/* Only care about the low 16 bits of v10.  */
-#endif
-	lvx	v0, 0, r3	/* Note, unaligned load ignores lower bits.  */
-
-	/* Do the merging of the bitmask.  */
-	XXVR(xxmrghd, v5, v5, v6)
-	XXVR(xxmrghd, v6, v7, v8)
-
-	/* Finish mask generation.  */
-	vand	v11, v11, v4	/* Throwaway bits not in the mask.  */
-
-	/* Compare the first 1-16B, while masking unwanted bytes.  */
-	clrrdi  r3, r3, 4	/* Note,  counts from qw boundaries.  */
-	vxor	v9, v0, v1	/* Swap high bit.  */
-	VBPERMQ (v8, v5, v0)
-	VBPERMQ (v7, v6, v9)
-	vor	v7, v7, v8
-	vor	v7, v7, v11	/* Ignore non-participating bytes.  */
-	vcmpequh. v8, v7, v4
-	bnl	cr6, L(done)
-
-	addi	r3, r3, 16
-
-	.align 4
-L(vec):
-	lvx	v0, 0, r3
-	addi	r3, r3, 16
-	vxor	v9, v0, v1	/* Swap high bit.  */
-	VBPERMQ (v8, v5, v0)
-	VBPERMQ (v7, v6, v9)
-	vor	v7, v7, v8
-	vcmpequh. v8, v7, v4
-	blt	cr6, L(vec)
-
-	addi	r3, r3, -16
-L(done):
-	subf	r3, r0, r3
-	MFVRD (r10, v7)
-
-#ifdef __LITTLE_ENDIAN__
-	addi	r0,  r10, 1	/* Count the trailing 1's.  */
-	andc	r10, r10, r0
-	popcntd	r10, r10
-#else
-	xori	r10, r10, 0xffff /* Count leading 1's by inverting.  */
-	addi	r3,  r3,  -48	/* Account for the extra leading zeros.  */
-	cntlzd  r10, r10
-#endif
-
-	add	r3, r3, r10
-	blr
-
-END(STRSPN)
-libc_hidden_builtin_def (STRSPN)