summary refs log tree commit diff
path: root/sysdeps/powerpc
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/powerpc')
-rw-r--r--sysdeps/powerpc/Dist2
-rw-r--r--sysdeps/powerpc/add_n.s68
-rw-r--r--sysdeps/powerpc/addmul_1.s50
-rw-r--r--sysdeps/powerpc/lshift.s479
-rw-r--r--sysdeps/powerpc/memset.s202
-rw-r--r--sysdeps/powerpc/mul_1.s47
-rw-r--r--sysdeps/powerpc/rshift.s59
-rw-r--r--sysdeps/powerpc/strchr.s118
-rw-r--r--sysdeps/powerpc/strcmp.s273
-rw-r--r--sysdeps/powerpc/sub_n.s69
-rw-r--r--sysdeps/powerpc/submul_1.s52
11 files changed, 1419 insertions, 0 deletions
diff --git a/sysdeps/powerpc/Dist b/sysdeps/powerpc/Dist
index 282cf1394e..a3de7b3c96 100644
--- a/sysdeps/powerpc/Dist
+++ b/sysdeps/powerpc/Dist
@@ -1,3 +1,5 @@
 fenv_const.c
 fenv_libc.h
 quad_float.h
+fe_nomask.c
+t_sqrt.c
diff --git a/sysdeps/powerpc/add_n.s b/sysdeps/powerpc/add_n.s
new file mode 100644
index 0000000000..609f0a502a
--- /dev/null
+++ b/sysdeps/powerpc/add_n.s
@@ -0,0 +1,68 @@
+ # Add two limb vectors of equal, non-zero length for PowerPC.
+ # Copyright (C) 1997 Free Software Foundation, Inc.
+ # This file is part of the GNU C Library.
+ #
+ # The GNU C Library is free software; you can redistribute it and/or
+ # modify it under the terms of the GNU Library General Public License as
+ # published by the Free Software Foundation; either version 2 of the
+ # License, or (at your option) any later version.
+ #
+ # The GNU C Library is distributed in the hope that it will be useful,
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ # Library General Public License for more details.
+ #
+ # You should have received a copy of the GNU Library General Public
+ # License along with the GNU C Library; see the file COPYING.LIB.  If not,
+ # write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ # Boston, MA 02111-1307, USA.
+
+ # mp_limb_t mpn_add_n (mp_ptr res_ptr, mp_srcptr s1_ptr, mp_srcptr s2_ptr,
+ #                      mp_size_t size)
+ # Calculate s1+s2 and put result in res_ptr; return carry, 0 or 1.
+
+ # Note on optimisation: This code is optimal for the 601.  Almost every other
+ # possible 2-unrolled inner loop will not be.  Also, watch out for the
+ # alignment...
+
+	.align 3
+	.globl __mpn_add_n
+	.type	 __mpn_add_n,@function
+__mpn_add_n:
+ # Set up for loop below.
+	mtcrf 0x01,%r6
+	srwi. %r7,%r6,1
+	li    %r10,0
+	mtctr %r7
+	bt    31,2f
+
+ # Clear the carry.
+	addic %r0,%r0,0
+ # Adjust pointers for loop.
+	addi  %r3,%r3,-4
+	addi  %r4,%r4,-4
+	addi  %r5,%r5,-4
+	b     0f
+
+2:	lwz  %r7,0(%r5)
+	lwz  %r6,0(%r4)
+	addc %r6,%r6,%r7
+	stw  %r6,0(%r3)
+        beq  1f
+
+ # The loop.
+
+ # Align start of loop to an odd word boundary to guarantee that the
+ # last two words can be fetched in one access (for 601).
+0:	lwz  %r9,4(%r4)
+	lwz  %r8,4(%r5)
+	lwzu %r6,8(%r4)
+	lwzu %r7,8(%r5)
+	adde %r8,%r9,%r8
+	stw  %r8,4(%r3)
+	adde %r6,%r6,%r7
+	stwu %r6,8(%r3)
+	bdnz 0b
+ # return the carry
+1:	addze %r3,%r10
+	blr
diff --git a/sysdeps/powerpc/addmul_1.s b/sysdeps/powerpc/addmul_1.s
new file mode 100644
index 0000000000..cf8fd2a555
--- /dev/null
+++ b/sysdeps/powerpc/addmul_1.s
@@ -0,0 +1,50 @@
+ # Multiply a limb vector by a single limb, for PowerPC.
+ # Copyright (C) 1993, 1994, 1995, 1997 Free Software Foundation, Inc.
+ # This file is part of the GNU C Library.
+ #
+ # The GNU C Library is free software; you can redistribute it and/or
+ # modify it under the terms of the GNU Library General Public License as
+ # published by the Free Software Foundation; either version 2 of the
+ # License, or (at your option) any later version.
+ #
+ # The GNU C Library is distributed in the hope that it will be useful,
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ # Library General Public License for more details.
+ #
+ # You should have received a copy of the GNU Library General Public
+ # License along with the GNU C Library; see the file COPYING.LIB.  If not,
+ # write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ # Boston, MA 02111-1307, USA.
+
+ # mp_limb_t mpn_addmul_1 (mp_ptr res_ptr, mp_srcptr s1_ptr,
+ #                         mp_size_t s1_size, mp_limb_t s2_limb)
+ # Calculate res+s1*s2 and put result back in res; return carry.
+
+	.align 2
+	.globl __mpn_addmul_1
+	.type	 __mpn_addmul_1,@function
+__mpn_addmul_1:
+	mtctr	%r5
+
+	lwz	%r0,0(%r4)
+	mullw	%r7,%r0,%r6
+	mulhwu	%r10,%r0,%r6
+	lwz     %r9,0(%r3)
+	addc	%r8,%r7,%r9
+	addi	%r3,%r3,-4		# adjust res_ptr
+	bdz	Lend
+
+Loop:	lwzu	%r0,4(%r4)
+	stwu	%r8,4(%r3)
+	mullw	%r8,%r0,%r6
+	adde	%r7,%r8,%r10
+	mulhwu	%r10,%r0,%r6
+	lwz     %r9,4(%r3)
+	addze   %r10,%r10
+	addc    %r8,%r7,%r9
+	bdnz	Loop
+
+Lend:	stw	%r8,4(%r3)
+	addze	%r3,%r10
+	blr
diff --git a/sysdeps/powerpc/lshift.s b/sysdeps/powerpc/lshift.s
new file mode 100644
index 0000000000..9612a3dbec
--- /dev/null
+++ b/sysdeps/powerpc/lshift.s
@@ -0,0 +1,479 @@
+ # Shift a limb left, low level routine.
+ # Copyright (C) 1996, 1997 Free Software Foundation, Inc.
+ # This file is part of the GNU C Library.
+ #
+ # The GNU C Library is free software; you can redistribute it and/or
+ # modify it under the terms of the GNU Library General Public License as
+ # published by the Free Software Foundation; either version 2 of the
+ # License, or (at your option) any later version.
+ #
+ # The GNU C Library is distributed in the hope that it will be useful,
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ # Library General Public License for more details.
+ #
+ # You should have received a copy of the GNU Library General Public
+ # License along with the GNU C Library; see the file COPYING.LIB.  If not,
+ # write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ # Boston, MA 02111-1307, USA.
+
+ # mp_limb_t mpn_lshift (mp_ptr wp, mp_srcptr up, mp_size_t usize,
+ #			 unsigned int cnt)
+
+	.align 3
+	.globl __mpn_lshift
+	.type	 __mpn_lshift,@function
+__mpn_lshift:
+	mtctr	%r5		# copy size into CTR
+	cmplwi	%cr0,%r5,16	# is size < 16
+	slwi	%r0,%r5,2
+	add	%r7,%r3,%r0	# make r7 point at end of res
+	add	%r4,%r4,%r0	# make r4 point at end of s1
+	lwzu	%r11,-4(%r4)	# load first s1 limb
+	subfic	%r8,%r6,32
+	srw	%r3,%r11,%r8	# compute function return value
+	bge	%cr0,Lbig	# branch if size >= 16
+
+	bdz	Lend1
+
+Loop:	lwzu	%r10,-4(%r4)
+	slw	%r9,%r11,%r6
+	srw	%r12,%r10,%r8
+	or	%r9,%r9,%r12
+	stwu	%r9,-4(%r7)
+	bdz	Lend2
+	lwzu	%r11,-4(%r4)
+	slw	%r9,%r10,%r6
+	srw	%r12,%r11,%r8
+	or	%r9,%r9,%r12
+	stwu	%r9,-4(%r7)
+	bdnz	Loop
+	b	Lend1
+
+ # Guaranteed not to succeed.
+LBoom:	tweq    %r0,%r0
+
+ # We imitate a case statement, by using (yuk!) fixed-length code chunks,
+ # of size 4*12 bytes.  We have to do this (or something) to make this PIC.
+Lbig:	mflr    %r9
+	bltl    %cr0,LBoom      # Never taken, only used to set LR.
+	slwi    %r10,%r6,4
+	mflr    %r12
+	add     %r10,%r12,%r10
+	slwi	%r8,%r6,5
+	add     %r10,%r8,%r10
+	mtctr   %r10
+	addi	%r5,%r5,-1
+	mtlr    %r9
+	bctr
+
+Lend1:	slw	%r0,%r11,%r6
+	stw	%r0,-4(%r7)
+	blr
+
+	mtctr	%r5
+Loop1:	lwzu	%r10,-4(%r4)
+	slwi	%r9,%r11,1
+	inslwi	%r9,%r10,1,31
+	stwu	%r9,-4(%r7)
+	bdz	Lend2
+	lwzu	%r11,-4(%r4)
+	slwi	%r9,%r10,1
+	inslwi	%r9,%r11,1,31
+	stwu	%r9,-4(%r7)
+	bdnz	Loop1
+	b	Lend1
+
+	mtctr	%r5
+Loop2:	lwzu	%r10,-4(%r4)
+	slwi	%r9,%r11,2
+	inslwi	%r9,%r10,2,30
+	stwu	%r9,-4(%r7)
+	bdz	Lend2
+	lwzu	%r11,-4(%r4)
+	slwi	%r9,%r10,2
+	inslwi	%r9,%r11,2,30
+	stwu	%r9,-4(%r7)
+	bdnz	Loop2
+	b	Lend1
+
+	mtctr	%r5
+Loop3:	lwzu	%r10,-4(%r4)
+	slwi	%r9,%r11,3
+	inslwi	%r9,%r10,3,29
+	stwu	%r9,-4(%r7)
+	bdz	Lend2
+	lwzu	%r11,-4(%r4)
+	slwi	%r9,%r10,3
+	inslwi	%r9,%r11,3,29
+	stwu	%r9,-4(%r7)
+	bdnz	Loop3
+	b	Lend1
+
+	mtctr	%r5
+Loop4:	lwzu	%r10,-4(%r4)
+	slwi	%r9,%r11,4
+	inslwi	%r9,%r10,4,28
+	stwu	%r9,-4(%r7)
+	bdz	Lend2
+	lwzu	%r11,-4(%r4)
+	slwi	%r9,%r10,4
+	inslwi	%r9,%r11,4,28
+	stwu	%r9,-4(%r7)
+	bdnz	Loop4
+	b	Lend1
+
+	mtctr	%r5
+Loop5:	lwzu	%r10,-4(%r4)
+	slwi	%r9,%r11,5
+	inslwi	%r9,%r10,5,27
+	stwu	%r9,-4(%r7)
+	bdz	Lend2
+	lwzu	%r11,-4(%r4)
+	slwi	%r9,%r10,5
+	inslwi	%r9,%r11,5,27
+	stwu	%r9,-4(%r7)
+	bdnz	Loop5
+	b	Lend1
+
+	mtctr	%r5
+Loop6:	lwzu	%r10,-4(%r4)
+	slwi	%r9,%r11,6
+	inslwi	%r9,%r10,6,26
+	stwu	%r9,-4(%r7)
+	bdz	Lend2
+	lwzu	%r11,-4(%r4)
+	slwi	%r9,%r10,6
+	inslwi	%r9,%r11,6,26
+	stwu	%r9,-4(%r7)
+	bdnz	Loop6
+	b	Lend1
+
+	mtctr	%r5
+Loop7:	lwzu	%r10,-4(%r4)
+	slwi	%r9,%r11,7
+	inslwi	%r9,%r10,7,25
+	stwu	%r9,-4(%r7)
+	bdz	Lend2
+	lwzu	%r11,-4(%r4)
+	slwi	%r9,%r10,7
+	inslwi	%r9,%r11,7,25
+	stwu	%r9,-4(%r7)
+	bdnz	Loop7
+	b	Lend1
+
+	mtctr	%r5
+Loop8:	lwzu	%r10,-4(%r4)
+	slwi	%r9,%r11,8
+	inslwi	%r9,%r10,8,24
+	stwu	%r9,-4(%r7)
+	bdz	Lend2
+	lwzu	%r11,-4(%r4)
+	slwi	%r9,%r10,8
+	inslwi	%r9,%r11,8,24
+	stwu	%r9,-4(%r7)
+	bdnz	Loop8
+	b	Lend1
+
+	mtctr	%r5
+Loop9:	lwzu	%r10,-4(%r4)
+	slwi	%r9,%r11,9
+	inslwi	%r9,%r10,9,23
+	stwu	%r9,-4(%r7)
+	bdz	Lend2
+	lwzu	%r11,-4(%r4)
+	slwi	%r9,%r10,9
+	inslwi	%r9,%r11,9,23
+	stwu	%r9,-4(%r7)
+	bdnz	Loop9
+	b	Lend1
+
+	mtctr	%r5
+Loop10:	lwzu	%r10,-4(%r4)
+	slwi	%r9,%r11,10
+	inslwi	%r9,%r10,10,22
+	stwu	%r9,-4(%r7)
+	bdz	Lend2
+	lwzu	%r11,-4(%r4)
+	slwi	%r9,%r10,10
+	inslwi	%r9,%r11,10,22
+	stwu	%r9,-4(%r7)
+	bdnz	Loop10
+	b	Lend1
+
+	mtctr	%r5
+Loop11:	lwzu	%r10,-4(%r4)
+	slwi	%r9,%r11,11
+	inslwi	%r9,%r10,11,21
+	stwu	%r9,-4(%r7)
+	bdz	Lend2
+	lwzu	%r11,-4(%r4)
+	slwi	%r9,%r10,11
+	inslwi	%r9,%r11,11,21
+	stwu	%r9,-4(%r7)
+	bdnz	Loop11
+	b	Lend1
+
+	mtctr	%r5
+Loop12:	lwzu	%r10,-4(%r4)
+	slwi	%r9,%r11,12
+	inslwi	%r9,%r10,12,20
+	stwu	%r9,-4(%r7)
+	bdz	Lend2
+	lwzu	%r11,-4(%r4)
+	slwi	%r9,%r10,12
+	inslwi	%r9,%r11,12,20
+	stwu	%r9,-4(%r7)
+	bdnz	Loop12
+	b	Lend1
+
+	mtctr	%r5
+Loop13:	lwzu	%r10,-4(%r4)
+	slwi	%r9,%r11,13
+	inslwi	%r9,%r10,13,19
+	stwu	%r9,-4(%r7)
+	bdz	Lend2
+	lwzu	%r11,-4(%r4)
+	slwi	%r9,%r10,13
+	inslwi	%r9,%r11,13,19
+	stwu	%r9,-4(%r7)
+	bdnz	Loop13
+	b	Lend1
+
+	mtctr	%r5
+Loop14:	lwzu	%r10,-4(%r4)
+	slwi	%r9,%r11,14
+	inslwi	%r9,%r10,14,18
+	stwu	%r9,-4(%r7)
+	bdz	Lend2
+	lwzu	%r11,-4(%r4)
+	slwi	%r9,%r10,14
+	inslwi	%r9,%r11,14,18
+	stwu	%r9,-4(%r7)
+	bdnz	Loop14
+	b	Lend1
+
+	mtctr	%r5
+Loop15:	lwzu	%r10,-4(%r4)
+	slwi	%r9,%r11,15
+	inslwi	%r9,%r10,15,17
+	stwu	%r9,-4(%r7)
+	bdz	Lend2
+	lwzu	%r11,-4(%r4)
+	slwi	%r9,%r10,15
+	inslwi	%r9,%r11,15,17
+	stwu	%r9,-4(%r7)
+	bdnz	Loop15
+	b	Lend1
+
+	mtctr	%r5
+Loop16:	lwzu	%r10,-4(%r4)
+	slwi	%r9,%r11,16
+	inslwi	%r9,%r10,16,16
+	stwu	%r9,-4(%r7)
+	bdz	Lend2
+	lwzu	%r11,-4(%r4)
+	slwi	%r9,%r10,16
+	inslwi	%r9,%r11,16,16
+	stwu	%r9,-4(%r7)
+	bdnz	Loop16
+	b	Lend1
+
+	mtctr	%r5
+Loop17:	lwzu	%r10,-4(%r4)
+	slwi	%r9,%r11,17
+	inslwi	%r9,%r10,17,15
+	stwu	%r9,-4(%r7)
+	bdz	Lend2
+	lwzu	%r11,-4(%r4)
+	slwi	%r9,%r10,17
+	inslwi	%r9,%r11,17,15
+	stwu	%r9,-4(%r7)
+	bdnz	Loop17
+	b	Lend1
+
+	mtctr	%r5
+Loop18:	lwzu	%r10,-4(%r4)
+	slwi	%r9,%r11,18
+	inslwi	%r9,%r10,18,14
+	stwu	%r9,-4(%r7)
+	bdz	Lend2
+	lwzu	%r11,-4(%r4)
+	slwi	%r9,%r10,18
+	inslwi	%r9,%r11,18,14
+	stwu	%r9,-4(%r7)
+	bdnz	Loop18
+	b	Lend1
+
+	mtctr	%r5
+Loop19:	lwzu	%r10,-4(%r4)
+	slwi	%r9,%r11,19
+	inslwi	%r9,%r10,19,13
+	stwu	%r9,-4(%r7)
+	bdz	Lend2
+	lwzu	%r11,-4(%r4)
+	slwi	%r9,%r10,19
+	inslwi	%r9,%r11,19,13
+	stwu	%r9,-4(%r7)
+	bdnz	Loop19
+	b	Lend1
+
+	mtctr	%r5
+Loop20:	lwzu	%r10,-4(%r4)
+	slwi	%r9,%r11,20
+	inslwi	%r9,%r10,20,12
+	stwu	%r9,-4(%r7)
+	bdz	Lend2
+	lwzu	%r11,-4(%r4)
+	slwi	%r9,%r10,20
+	inslwi	%r9,%r11,20,12
+	stwu	%r9,-4(%r7)
+	bdnz	Loop20
+	b	Lend1
+
+	mtctr	%r5
+Loop21:	lwzu	%r10,-4(%r4)
+	slwi	%r9,%r11,21
+	inslwi	%r9,%r10,21,11
+	stwu	%r9,-4(%r7)
+	bdz	Lend2
+	lwzu	%r11,-4(%r4)
+	slwi	%r9,%r10,21
+	inslwi	%r9,%r11,21,11
+	stwu	%r9,-4(%r7)
+	bdnz	Loop21
+	b	Lend1
+
+	mtctr	%r5
+Loop22:	lwzu	%r10,-4(%r4)
+	slwi	%r9,%r11,22
+	inslwi	%r9,%r10,22,10
+	stwu	%r9,-4(%r7)
+	bdz	Lend2
+	lwzu	%r11,-4(%r4)
+	slwi	%r9,%r10,22
+	inslwi	%r9,%r11,22,10
+	stwu	%r9,-4(%r7)
+	bdnz	Loop22
+	b	Lend1
+
+	mtctr	%r5
+Loop23:	lwzu	%r10,-4(%r4)
+	slwi	%r9,%r11,23
+	inslwi	%r9,%r10,23,9
+	stwu	%r9,-4(%r7)
+	bdz	Lend2
+	lwzu	%r11,-4(%r4)
+	slwi	%r9,%r10,23
+	inslwi	%r9,%r11,23,9
+	stwu	%r9,-4(%r7)
+	bdnz	Loop23
+	b	Lend1
+
+	mtctr	%r5
+Loop24:	lwzu	%r10,-4(%r4)
+	slwi	%r9,%r11,24
+	inslwi	%r9,%r10,24,8
+	stwu	%r9,-4(%r7)
+	bdz	Lend2
+	lwzu	%r11,-4(%r4)
+	slwi	%r9,%r10,24
+	inslwi	%r9,%r11,24,8
+	stwu	%r9,-4(%r7)
+	bdnz	Loop24
+	b	Lend1
+
+	mtctr	%r5
+Loop25:	lwzu	%r10,-4(%r4)
+	slwi	%r9,%r11,25
+	inslwi	%r9,%r10,25,7
+	stwu	%r9,-4(%r7)
+	bdz	Lend2
+	lwzu	%r11,-4(%r4)
+	slwi	%r9,%r10,25
+	inslwi	%r9,%r11,25,7
+	stwu	%r9,-4(%r7)
+	bdnz	Loop25
+	b	Lend1
+
+	mtctr	%r5
+Loop26:	lwzu	%r10,-4(%r4)
+	slwi	%r9,%r11,26
+	inslwi	%r9,%r10,26,6
+	stwu	%r9,-4(%r7)
+	bdz	Lend2
+	lwzu	%r11,-4(%r4)
+	slwi	%r9,%r10,26
+	inslwi	%r9,%r11,26,6
+	stwu	%r9,-4(%r7)
+	bdnz	Loop26
+	b	Lend1
+
+	mtctr	%r5
+Loop27:	lwzu	%r10,-4(%r4)
+	slwi	%r9,%r11,27
+	inslwi	%r9,%r10,27,5
+	stwu	%r9,-4(%r7)
+	bdz	Lend2
+	lwzu	%r11,-4(%r4)
+	slwi	%r9,%r10,27
+	inslwi	%r9,%r11,27,5
+	stwu	%r9,-4(%r7)
+	bdnz	Loop27
+	b	Lend1
+
+	mtctr	%r5
+Loop28:	lwzu	%r10,-4(%r4)
+	slwi	%r9,%r11,28
+	inslwi	%r9,%r10,28,4
+	stwu	%r9,-4(%r7)
+	bdz	Lend2
+	lwzu	%r11,-4(%r4)
+	slwi	%r9,%r10,28
+	inslwi	%r9,%r11,28,4
+	stwu	%r9,-4(%r7)
+	bdnz	Loop28
+	b	Lend1
+
+	mtctr	%r5
+Loop29:	lwzu	%r10,-4(%r4)
+	slwi	%r9,%r11,29
+	inslwi	%r9,%r10,29,3
+	stwu	%r9,-4(%r7)
+	bdz	Lend2
+	lwzu	%r11,-4(%r4)
+	slwi	%r9,%r10,29
+	inslwi	%r9,%r11,29,3
+	stwu	%r9,-4(%r7)
+	bdnz	Loop29
+	b	Lend1
+
+	mtctr	%r5
+Loop30:	lwzu	%r10,-4(%r4)
+	slwi	%r9,%r11,30
+	inslwi	%r9,%r10,30,2
+	stwu	%r9,-4(%r7)
+	bdz	Lend2
+	lwzu	%r11,-4(%r4)
+	slwi	%r9,%r10,30
+	inslwi	%r9,%r11,30,2
+	stwu	%r9,-4(%r7)
+	bdnz	Loop30
+	b	Lend1
+
+	mtctr	%r5
+Loop31:	lwzu	%r10,-4(%r4)
+	slwi	%r9,%r11,31
+	inslwi	%r9,%r10,31,1
+	stwu	%r9,-4(%r7)
+	bdz	Lend2
+	lwzu	%r11,-4(%r4)
+	slwi	%r9,%r10,31
+	inslwi	%r9,%r11,31,1
+	stwu	%r9,-4(%r7)
+	bdnz	Loop31
+	b	Lend1
+
+Lend2:	slw	%r0,%r10,%r6
+	stw	%r0,-4(%r7)
+	blr
diff --git a/sysdeps/powerpc/memset.s b/sysdeps/powerpc/memset.s
new file mode 100644
index 0000000000..4c8bf8c6b4
--- /dev/null
+++ b/sysdeps/powerpc/memset.s
@@ -0,0 +1,202 @@
+ # Optimized memset implementation for PowerPC.
+ # Copyright (C) 1997 Free Software Foundation, Inc.
+ # This file is part of the GNU C Library.
+ #
+ # The GNU C Library is free software; you can redistribute it and/or
+ # modify it under the terms of the GNU Library General Public License as
+ # published by the Free Software Foundation; either version 2 of the
+ # License, or (at your option) any later version.
+ #
+ # The GNU C Library is distributed in the hope that it will be useful,
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ # Library General Public License for more details.
+ #
+ # You should have received a copy of the GNU Library General Public
+ # License along with the GNU C Library; see the file COPYING.LIB.  If not,
+ # write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ # Boston, MA 02111-1307, USA.
+
+	.section ".text"
+	.align 5
+	nop
+	
+	.globl memset
+	.type memset,@function
+memset:	
+ # __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5]));
+ # Returns 's'.
+
+ # The memset is done in three sizes: byte (8 bits), word (32 bits),
+ # cache line (256 bits). There is a special case for setting cache lines
+ # to 0, to take advantage of the dcbz instruction.
+ # r6:	current address we are storing at
+ # r7:	number of bytes we are setting now (when aligning)
+
+ # take care of case for size <= 4
+	cmplwi %cr1,%r5,4	
+	andi.  %r7,%r3,3
+	mr     %r6,%r3
+	ble-   %cr1,small
+ # align to word boundary
+	cmplwi %cr5,%r5,31
+	rlwimi %r4,%r4,8,16,23
+	beq+   aligned			# 8th instruction from .align
+	mtcrf  0x01,%r3
+	subfic %r7,%r7,4
+	add    %r6,%r6,%r7
+	sub    %r5,%r5,%r7
+	bf+    31,0f
+	stb    %r4,0(%r3)
+	bt     30,aligned
+0:	sth    %r4,-2(%r6)		#  16th instruction from .align
+ # take care of case for size < 31
+aligned:
+	mtcrf  0x01,%r5
+	rlwimi %r4,%r4,16,0,15
+	ble    %cr5,medium
+ # align to cache line boundary...
+	andi.  %r7,%r6,0x1C
+	subfic %r7,%r7,0x20
+	beq    caligned
+	mtcrf  0x01,%r7
+	add    %r6,%r6,%r7
+	sub    %r5,%r5,%r7
+	cmplwi %cr1,%r7,0x10
+	mr     %r8,%r6
+	bf     28,1f
+	stw    %r4,-4(%r8)
+	stwu   %r4,-8(%r8)
+1:	blt    %cr1,2f
+	stw    %r4,-4(%r8)	# 32nd instruction from .align
+	stw    %r4,-8(%r8)
+	stw    %r4,-12(%r8)
+	stwu   %r4,-16(%r8)
+2:	bf     29,caligned
+	stw    %r4,-4(%r8)
+ # now aligned to a cache line.
+caligned:
+	cmplwi %cr1,%r4,0
+	clrrwi. %r7,%r5,5
+	mtcrf  0x01,%r5		# 40th instruction from .align
+	beq    %cr1,zloopstart	# special case for clearing memory using dcbz
+	srwi   %r0,%r7,5
+	mtctr  %r0
+	beq    medium		# we may not actually get to do a full line
+	clrlwi. %r5,%r5,27
+	add    %r6,%r6,%r7
+0:	li     %r8,-0x40
+	bdz    cloopdone	# 48th instruction from .align
+	
+cloop:	dcbz   %r8,%r6
+	stw    %r4,-4(%r6)
+	stw    %r4,-8(%r6)
+	stw    %r4,-12(%r6)
+	stw    %r4,-16(%r6)
+	nop			# let 601 fetch last 4 instructions of loop
+	stw    %r4,-20(%r6)
+	stw    %r4,-24(%r6)	# 56th instruction from .align
+	nop			# let 601 fetch first 8 instructions of loop
+	stw    %r4,-28(%r6)
+	stwu   %r4,-32(%r6)
+	bdnz   cloop
+cloopdone:
+	stw    %r4,-4(%r6)
+	stw    %r4,-8(%r6)
+	stw    %r4,-12(%r6)
+	stw    %r4,-16(%r6)	# 64th instruction from .align
+	stw    %r4,-20(%r6)
+	cmplwi %cr1,%r5,16
+	stw    %r4,-24(%r6)
+	stw    %r4,-28(%r6)
+	stwu   %r4,-32(%r6)
+	beqlr
+	add    %r6,%r6,%r7
+	b      medium_tail2	# 72nd instruction from .align
+
+	.align 5
+	nop
+# clear lines of memory in 128-byte chunks.
+zloopstart:
+	clrlwi %r5,%r5,27
+	mtcrf  0x02,%r7
+	srwi.  %r0,%r7,7
+	mtctr  %r0
+	li     %r7,0x20
+	li     %r8,-0x40
+	cmplwi %cr1,%r5,16	# 8
+	bf     26,0f	
+	dcbz   0,%r6
+	addi   %r6,%r6,0x20
+0:	li     %r9,-0x20
+	bf     25,1f
+	dcbz   0,%r6
+	dcbz   %r7,%r6
+	addi   %r6,%r6,0x40	# 16
+1:	cmplwi %cr5,%r5,0
+	beq    medium
+zloop:	
+	dcbz   0,%r6
+	dcbz   %r7,%r6
+	addi   %r6,%r6,0x80
+	dcbz   %r8,%r6
+	dcbz   %r9,%r6
+	bdnz   zloop
+	beqlr  %cr5
+	b      medium_tail2
+	
+	.align 5	
+small:
+ # Memset of 4 bytes or less.
+	cmplwi %cr5,%r5,1
+	cmplwi %cr1,%r5,3
+	bltlr  %cr5
+	stb    %r4,0(%r6)
+	beqlr  %cr5
+	nop
+	stb    %r4,1(%r6)
+	bltlr  %cr1
+	stb    %r4,2(%r6)
+	beqlr  %cr1
+	nop
+	stb    %r4,3(%r6)
+	blr
+
+# memset of 0-31 bytes
+	.align 5
+medium:
+	cmplwi %cr1,%r5,16
+medium_tail2:
+	add    %r6,%r6,%r5
+medium_tail:
+	bt-    31,medium_31t
+	bt-    30,medium_30t
+medium_30f:
+	bt-    29,medium_29t
+medium_29f:
+	bge-   %cr1,medium_27t
+	bflr-  28
+	stw    %r4,-4(%r6)	# 8th instruction from .align
+	stw    %r4,-8(%r6)
+	blr
+
+medium_31t:	
+	stbu   %r4,-1(%r6)
+	bf-    30,medium_30f
+medium_30t:
+	sthu   %r4,-2(%r6)
+	bf-    29,medium_29f
+medium_29t:
+	stwu   %r4,-4(%r6)
+	blt-   %cr1,medium_27f	# 16th instruction from .align
+medium_27t:
+	stw    %r4,-4(%r6)
+	stw    %r4,-8(%r6)
+	stw    %r4,-12(%r6)
+	stwu   %r4,-16(%r6)
+medium_27f:
+	bflr-  28
+medium_28t:
+	stw    %r4,-4(%r6)
+	stw    %r4,-8(%r6)
+	blr
diff --git a/sysdeps/powerpc/mul_1.s b/sysdeps/powerpc/mul_1.s
new file mode 100644
index 0000000000..d6eb623bd4
--- /dev/null
+++ b/sysdeps/powerpc/mul_1.s
@@ -0,0 +1,47 @@
+ # Multiply a limb vector by a limb, for PowerPC.
+ # Copyright (C) 1993, 1994, 1995, 1997 Free Software Foundation, Inc.
+ # This file is part of the GNU C Library.
+ #
+ # The GNU C Library is free software; you can redistribute it and/or
+ # modify it under the terms of the GNU Library General Public License as
+ # published by the Free Software Foundation; either version 2 of the
+ # License, or (at your option) any later version.
+ #
+ # The GNU C Library is distributed in the hope that it will be useful,
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ # Library General Public License for more details.
+ #
+ # You should have received a copy of the GNU Library General Public
+ # License along with the GNU C Library; see the file COPYING.LIB.  If not,
+ # write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ # Boston, MA 02111-1307, USA.
+
+ # mp_limb_t mpn_mul_1 (mp_ptr res_ptr, mp_srcptr s1_ptr,
+ #                      mp_size_t s1_size, mp_limb_t s2_limb)
+ # Calculate s1*s2 and put result in res_ptr; return carry.
+
+	.align 2
+	.globl __mpn_mul_1
+	.type	 __mpn_mul_1,@function
+
+__mpn_mul_1:
+	mtctr	%r5
+
+	lwz	%r0,0(%r4)
+	mullw	%r7,%r0,%r6
+	mulhwu	%r10,%r0,%r6
+	addi	%r3,%r3,-4		# adjust res_ptr
+	addic	%r5,%r5,0		# clear cy with dummy insn
+	bdz	Lend
+
+Loop:	lwzu	%r0,4(%r4)
+	stwu	%r7,4(%r3)
+	mullw	%r8,%r0,%r6
+	adde	%r7,%r8,%r10
+	mulhwu	%r10,%r0,%r6
+	bdnz	Loop
+
+Lend:	stw	%r7,4(%r3)
+	addze	%r3,%r10
+	blr
diff --git a/sysdeps/powerpc/rshift.s b/sysdeps/powerpc/rshift.s
new file mode 100644
index 0000000000..20f09ad86a
--- /dev/null
+++ b/sysdeps/powerpc/rshift.s
@@ -0,0 +1,59 @@
+# PowerPC-32 __mpn_rshift --
+
+# Copyright (C) 1995 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+# License for more details.
+
+# You should have received a copy of the GNU Library General Public License
+# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr	r3
+# s1_ptr	r4
+# size		r5
+# cnt		r6
+
+	.align 3
+	.globl __mpn_rshift
+	.type	 __mpn_rshift,@function
+__mpn_rshift:
+	mtctr	5		# copy size into CTR
+	addi	7,3,-4		# move adjusted res_ptr to free return reg
+	subfic	8,6,32
+	lwz	11,0(4)		# load first s1 limb
+	slw	3,11,8		# compute function return value
+	bdz	Lend1
+
+Loop:	lwzu	10,4(4)
+	srw	9,11,6
+	slw	12,10,8
+	or	9,9,12
+	stwu	9,4(7)
+	bdz	Lend2
+	lwzu	11,4(4)
+	srw	9,10,6
+	slw	12,11,8
+	or	9,9,12
+	stwu	9,4(7)
+	bdnz	Loop
+
+Lend1:	srw	0,11,6
+	stw	0,4(7)
+	blr
+
+Lend2:	srw	0,10,6
+	stw	0,4(7)
+	blr
diff --git a/sysdeps/powerpc/strchr.s b/sysdeps/powerpc/strchr.s
new file mode 100644
index 0000000000..c1df66f8dc
--- /dev/null
+++ b/sysdeps/powerpc/strchr.s
@@ -0,0 +1,118 @@
+ # Optimized strchr implementation for PowerPC.
+ # Copyright (C) 1997 Free Software Foundation, Inc.
+ # This file is part of the GNU C Library.
+ #
+ # The GNU C Library is free software; you can redistribute it and/or
+ # modify it under the terms of the GNU Library General Public License as
+ # published by the Free Software Foundation; either version 2 of the
+ # License, or (at your option) any later version.
+ #
+ # The GNU C Library is distributed in the hope that it will be useful,
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ # Library General Public License for more details.
+ #
+ # You should have received a copy of the GNU Library General Public
+ # License along with the GNU C Library; see the file COPYING.LIB.  If not,
+ # write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ # Boston, MA 02111-1307, USA.
+
+ # See strlen.s for comments on how this works.
+
+	.section ".text"
+	.align 2
+	.globl strchr
+	.type strchr,@function
+strchr:
+ # char * [r3] strchr (const char *s [r3] , int c [r4] )
+
+ # r0:	a temporary
+ # r3:	our return result.
+ # r4:	byte we're looking for, spread over the whole word
+ # r5:	the current word
+ # r6:	the constant 0xfefefeff (-0x01010101)
+ # r7:	the constant 0x7f7f7f7f
+ # r8:	pointer to the current word.
+ # r9:	a temporary
+ # r10:	the number of bits we should ignore in the first word
+ # r11:	a mask with the bits to ignore set to 0
+ # r12:	a temporary
+	
+	rlwimi %r4,%r4,8,16,23
+	li   %r11,-1
+	rlwimi %r4,%r4,16,0,15
+	lis  %r6,0xfeff
+	lis  %r7,0x7f7f
+	clrrwi %r8,%r3,2
+	addi %r7,%r7,0x7f7f
+	addi %r6,%r6,0xfffffeff
+	rlwinm %r10,%r3,3,27,28
+ # Test the first (partial?) word.
+	lwz  %r5,0(%r8)
+	srw  %r11,%r11,%r10
+	orc  %r5,%r5,%r11
+	add  %r0,%r6,%r5
+	nor  %r9,%r7,%r5
+	and. %r0,%r0,%r9
+	xor  %r12,%r4,%r5
+	orc  %r12,%r12,%r11
+	b    loopentry
+	
+ # The loop.
+
+loop:	lwzu %r5,4(%r8)
+	and. %r0,%r0,%r9
+ # Test for 0
+	add  %r0,%r6,%r5
+	nor  %r9,%r7,%r5
+	bne  foundit
+	and. %r0,%r0,%r9
+ # Start test for the bytes we're looking for
+	xor  %r12,%r4,%r5
+loopentry:
+	add  %r0,%r6,%r12
+	nor  %r9,%r7,%r12
+	beq  loop
+ # There is a zero byte in the word, but may also be a matching byte (either
+ # before or after the zero byte). In fact, we may be looking for a
+ # zero byte, in which case we return a match. We guess that this hasn't
+ # happened, though.
+missed:	
+	and. %r0,%r0,%r9
+	li   %r3,0
+	beqlr
+ # It did happen. Decide which one was first...
+ # I'm not sure if this is actually faster than a sequence of
+ # rotates, compares, and branches (we use it anyway because it's shorter).
+	and  %r6,%r7,%r5
+	or   %r11,%r7,%r5
+	and  %r0,%r7,%r12
+	or   %r10,%r7,%r12
+	add  %r6,%r6,%r7
+	add  %r0,%r0,%r7
+	nor  %r5,%r11,%r6
+	nor  %r9,%r10,%r0
+	cmplw %r5,%r9
+	bgtlr
+	cntlzw %r4,%r9
+	srwi %r4,%r4,3
+	add  %r3,%r8,%r4
+	blr
+
+foundit:
+	and  %r0,%r7,%r12
+	or   %r10,%r7,%r12
+	add  %r0,%r0,%r7
+	nor  %r9,%r10,%r0
+	cntlzw %r4,%r9
+	subi %r8,%r8,4
+	srwi %r4,%r4,3
+	add  %r3,%r8,%r4
+	blr
+
+0:
+	.size	 strchr,0b-strchr
+
+	.globl index
+	.weak index
+	.set index,strchr
diff --git a/sysdeps/powerpc/strcmp.s b/sysdeps/powerpc/strcmp.s
new file mode 100644
index 0000000000..f901b82ab1
--- /dev/null
+++ b/sysdeps/powerpc/strcmp.s
@@ -0,0 +1,273 @@
+ # Optimized strcmp implementation for PowerPC.
+ # Copyright (C) 1997 Free Software Foundation, Inc.
+ # This file is part of the GNU C Library.
+ #
+ # The GNU C Library is free software; you can redistribute it and/or
+ # modify it under the terms of the GNU Library General Public License as
+ # published by the Free Software Foundation; either version 2 of the
+ # License, or (at your option) any later version.
+ #
+ # The GNU C Library is distributed in the hope that it will be useful,
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ # Library General Public License for more details.
+ #
+ # You should have received a copy of the GNU Library General Public
+ # License along with the GNU C Library; see the file COPYING.LIB.  If not,
+ # write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ # Boston, MA 02111-1307, USA.
+
+ # See strlen.s for comments on how the end-of-string testing works.
+
+	.section ".text"
+	.align 3
+	.globl strcmp
+	.type strcmp,@function
+strcmp:
+ # int [r3] strcmp (const char *p1 [r3], const char *p2 [r4])
+
+ # General register assignments:
+ # r0:	temporary
+ # r3:	pointer to previous word in s1
+ # r4:	pointer to previous word in s2
+ # r5:	current first word in s1
+ # r6:	current first word in s2 (after re-alignment)
+ # r7:	0xfefefeff
+ # r8:	0x7f7f7f7f
+ # r9:	~(word in s1 | 0x7f7f7f7f)
+	
+ # Register assignments in the prologue:
+ # r10:	low 2 bits of p2-p1
+ # r11:	mask to orc with r5/r6
+	
+	subf. %r10,%r4,%r3
+	beq-  equal
+	andi. %r10,%r10,3
+	cmpi  %cr1,%r10,2
+	beq-  %cr1,align2
+	lis   %r7,0xfeff
+	lis   %r8,0x7f7f
+	addi  %r8,%r8,0x7f7f
+	addi  %r7,%r7,0xfffffeff
+	bgt-  %cr1,align3
+strcmp3:
+	rlwinm %r0,%r3,3,27,28
+	li    %r11,-1
+	srw   %r11,%r11,%r0
+	clrrwi %r3,%r3,2
+	clrrwi %r4,%r4,2
+	lwz   %r5,0(%r3)
+	lwz   %r6,0(%r4)
+	bne-  align1
+
+ # The loop, case when both strings are aligned the same.
+ # on entry, cr1.eq must be 1.
+ # r10:	second word in s1
+ # r11:	second word in s2 OR mask to orc with first two words.
+align0:	
+	andi. %r0,%r3,4
+	orc   %r5,%r5,%r11
+	orc   %r6,%r6,%r11
+	beq+  a0start
+	add   %r0,%r7,%r5
+	nor   %r9,%r8,%r5
+	and.  %r0,%r0,%r9
+	cmplw %cr1,%r5,%r6
+	subi  %r3,%r3,4
+	bne-  endstringeq
+	subi  %r4,%r4,4
+	bne-  %cr1,difference
+
+loopalign0:
+	lwzu  %r5,8(%r3)
+	bne-  %cr1,difference2
+	lwzu  %r6,8(%r4)
+a0start:
+	add   %r0,%r7,%r5
+	nor   %r9,%r8,%r5
+	and.  %r0,%r0,%r9
+	cmplw %cr1,%r5,%r6
+	lwz   %r10,4(%r3)
+	bne-  endstringeq
+	add   %r0,%r7,%r10
+	bne-  %cr1,difference
+	nor   %r9,%r8,%r10
+	lwz   %r11,4(%r4)
+	and.  %r0,%r0,%r9
+	cmplw %cr1,%r10,%r11
+	beq+  loopalign0
+
+	mr    %r5,%r10
+	mr    %r6,%r11
+
+ # fall through to...
+
+endstringeq:
+ # (like 'endstring', but an equality code is in cr1)
+	beq  %cr1,equal
+endstring:
+ # OK. We've hit the end of the string. We need to be careful that
+ # we don't compare two strings as different because of gunk beyond
+ # the end of the strings. We do it like this...
+	and  %r0,%r8,%r5
+	add  %r0,%r0,%r8
+	xor. %r10,%r5,%r6
+	andc %r9,%r9,%r0
+	cntlzw %r10,%r10
+	cntlzw %r9,%r9
+	addi %r9,%r9,7
+	cmpw %cr1,%r9,%r10
+	blt  %cr1,equal
+	sub  %r3,%r5,%r6
+	bgelr+
+	mr   %r3,%r6
+	blr
+equal:	li   %r3,0
+	blr
+	
+ # The loop, case when s2 is aligned 1 char behind s1.
+ # r10:	current word in s2 (before re-alignment)
+
+align1:
+	cmpwi %cr1,%r0,0
+	orc   %r5,%r5,%r11
+	bne   %cr1,align1_123
+ # When s1 is aligned to a word boundary, the startup processing is special.
+	slwi. %r6,%r6,24
+	bne+  a1entry_0
+	nor   %r9,%r8,%r5
+	b     endstring
+
+align1_123:
+ # Otherwise (s1 not aligned to a word boundary):
+	mr    %r10,%r6
+	add   %r0,%r7,%r5
+	nor   %r9,%r8,%r5
+	and.  %r0,%r0,%r9
+	srwi  %r6,%r6,8
+	orc   %r6,%r6,%r11
+	cmplw %cr1,%r5,%r6
+	bne-  endstringeq
+	bne-  %cr1,difference
+
+loopalign1:
+	slwi. %r6,%r10,24
+	bne-  %cr1,a1difference
+	lwzu  %r5,4(%r3)
+	beq-  endstring1
+a1entry_0:
+	lwzu  %r10,4(%r4)
+a1entry_123:	
+	add   %r0,%r7,%r5
+	nor   %r9,%r8,%r5
+	and.  %r0,%r0,%r9
+	rlwimi %r6,%r10,24,8,31
+	cmplw %cr1,%r5,%r6
+	beq+  loopalign1
+	b     endstringeq
+
+endstring1:
+	srwi  %r3,%r5,24
+	blr
+
+a1difference:
+	lbz   %r6,-1(%r4)
+	slwi  %r6,%r6,24
+	rlwimi %r6,%r10,24,8,31
+
+ # fall through to...
+		
+difference:	
+ # The idea here is that we could just return '%r5 - %r6', except
+ # that the result might overflow. Overflow can only happen when %r5
+ # and %r6 have different signs (thus the xor), in which case we want to
+ # return negative iff %r6 has its high bit set so %r5 < %r6.
+ # A branch-free implementation of this is
+ #	xor  %r0,%r5,%r6
+ #	rlwinm %r0,%r0,1,31,31
+ #	rlwnm %r5,%r5,%r0,1,31
+ #	rlwnm %r6,%r6,%r0,1,31
+ #	sub  %r3,%r5,%r6
+ #	blr
+ # but this is usually more expensive.
+	xor. %r0,%r5,%r6
+	sub  %r3,%r5,%r6
+	bgelr+
+	mr   %r3,%r6
+	blr
+
+difference2:
+ # As for 'difference', but use registers r10 and r11 instead of r5 and r6.
+	xor. %r0,%r10,%r11
+	sub  %r3,%r10,%r11
+	bgelr+
+	mr   %r3,%r11
+	blr
+	
+ # For the case when s2 is aligned 3 chars behind s1, we switch
+ # s1 and s2...
+ # r10:	used by 'align2' (see below)
+ # r11:	used by 'align2' (see below)
+ # r12:	saved link register
+ # cr0.eq: must be left as 1.
+
+align3:	mflr %r12
+	mr   %r0,%r3
+	mr   %r3,%r4
+	mr   %r4,%r0
+	bl   strcmp3
+	mtlr %r12
+	neg  %r3,%r3
+	blr
+	
+ # The loop, case when s2 and s1's alignments differ by 2
+ # This is the ugly case...
+ # FIXME: on a 601, the loop takes 7 cycles instead of the 6 you'd expect,
+ # because there are too many branches. This loop should probably be
+ # coded like the align1 case.
+	
+a2even:	lhz   %r5,0(%r3)
+	lhz   %r6,0(%r4)
+	b     a2entry
+	
+align2:
+	andi. %r0,%r3,1
+	beq+  a2even
+	subi  %r3,%r3,1
+	subi  %r4,%r4,1
+	lbz   %r5,1(%r3)
+	lbz   %r6,1(%r4)
+	cmpwi %cr0,%r5,0
+	cmpw  %cr1,%r5,%r6
+	beq-  align2end2
+	lhzu  %r5,2(%r3)
+	beq+  %cr1,a2entry1
+	lbz   %r5,-1(%r3)
+	sub   %r3,%r5,%r6
+	blr
+
+loopalign2:
+	cmpw  %cr1,%r5,%r6
+	beq-  align2end2
+	lhzu  %r5,2(%r3)
+	bne-  %cr1,align2different
+a2entry1:
+	lhzu  %r6,2(%r4)
+a2entry:	
+	cmpwi %cr5,%r5,0x00ff
+	andi. %r0,%r5,0x00ff
+	bgt+  %cr5,loopalign2
+
+align2end:
+	andi. %r3,%r6,0xff00
+	neg   %r3,%r3
+	blr
+
+align2different:
+	lhzu  %r5,-2(%r3)
+align2end2:
+	sub   %r3,%r5,%r6
+	blr
+		
+0:
+	.size	 strcmp,0b-strcmp
diff --git a/sysdeps/powerpc/sub_n.s b/sysdeps/powerpc/sub_n.s
new file mode 100644
index 0000000000..8711bf9a40
--- /dev/null
+++ b/sysdeps/powerpc/sub_n.s
@@ -0,0 +1,69 @@
+ # Subtract two limb vectors of equal, non-zero length for PowerPC.
+ # Copyright (C) 1997 Free Software Foundation, Inc.
+ # This file is part of the GNU C Library.
+ #
+ # The GNU C Library is free software; you can redistribute it and/or
+ # modify it under the terms of the GNU Library General Public License as
+ # published by the Free Software Foundation; either version 2 of the
+ # License, or (at your option) any later version.
+ #
+ # The GNU C Library is distributed in the hope that it will be useful,
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ # Library General Public License for more details.
+ #
+ # You should have received a copy of the GNU Library General Public
+ # License along with the GNU C Library; see the file COPYING.LIB.  If not,
+ # write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ # Boston, MA 02111-1307, USA.
+
+ # mp_limb_t mpn_sub_n (mp_ptr res_ptr, mp_srcptr s1_ptr, mp_srcptr s2_ptr,
+ #                      mp_size_t size)
+ # Calculate s1-s2 and put result in res_ptr; return borrow, 0 or 1.
+
+ # Note on optimisation: This code is optimal for the 601.  Almost every other
+ # possible 2-unrolled inner loop will not be.  Also, watch out for the
+ # alignment...
+
+	.align 3
+	.globl __mpn_sub_n
+	.type	 __mpn_sub_n,@function
+	nop
+__mpn_sub_n:
+ # Set up for loop below.
+	mtcrf 0x01,%r6
+	srwi. %r7,%r6,1
+	mtctr %r7
+	bt    31,2f
+
+ # Set the carry (clear the borrow).
+	subfc %r0,%r0,%r0
+ # Adjust pointers for loop.
+	addi  %r3,%r3,-4
+	addi  %r4,%r4,-4
+	addi  %r5,%r5,-4
+	b     0f
+
+2:	lwz   %r7,0(%r5)
+	lwz   %r6,0(%r4)
+	subfc %r6,%r7,%r6
+	stw   %r6,0(%r3)
+        beq   1f
+
+ # Align start of loop to an odd word boundary to guarantee that the
+ # last two words can be fetched in one access (for 601).  This turns
+ # out to be important.
+0:
+	lwz   %r9,4(%r4)
+	lwz   %r8,4(%r5)
+	lwzu  %r6,8(%r4)
+	lwzu  %r7,8(%r5)
+	subfe %r8,%r8,%r9
+	stw   %r8,4(%r3)
+	subfe %r6,%r7,%r6
+	stwu  %r6,8(%r3)
+	bdnz  0b
+ # return the borrow
+1:	subfe %r3,%r3,%r3
+	neg   %r3,%r3
+	blr
diff --git a/sysdeps/powerpc/submul_1.s b/sysdeps/powerpc/submul_1.s
new file mode 100644
index 0000000000..999430d744
--- /dev/null
+++ b/sysdeps/powerpc/submul_1.s
@@ -0,0 +1,52 @@
+ # Multiply a limb vector by a single limb, for PowerPC.
+ # Copyright (C) 1993, 1994, 1995, 1997 Free Software Foundation, Inc.
+ # This file is part of the GNU C Library.
+ #
+ # The GNU C Library is free software; you can redistribute it and/or
+ # modify it under the terms of the GNU Library General Public License as
+ # published by the Free Software Foundation; either version 2 of the
+ # License, or (at your option) any later version.
+ #
+ # The GNU C Library is distributed in the hope that it will be useful,
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ # Library General Public License for more details.
+ #
+ # You should have received a copy of the GNU Library General Public
+ # License along with the GNU C Library; see the file COPYING.LIB.  If not,
+ # write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ # Boston, MA 02111-1307, USA.
+
+ # mp_limb_t mpn_submul_1 (mp_ptr res_ptr, mp_srcptr s1_ptr,
+ #                         mp_size_t s1_size, mp_limb_t s2_limb)
+ # Calculate res-s1*s2 and put result back in res; return carry.
+
+	.align 2
+	.globl __mpn_submul_1
+	.type	 __mpn_submul_1,@function
+__mpn_submul_1:
+	mtctr	%r5
+
+	lwz	%r0,0(%r4)
+	mullw	%r7,%r0,%r6
+	mulhwu	%r10,%r0,%r6
+	lwz     %r9,0(%r3)
+	subf 	%r8,%r7,%r9
+	addc    %r7,%r7,%r8		# invert cy (r7 is junk)
+	addi	%r3,%r3,-4		# adjust res_ptr
+	bdz	Lend
+
+Loop:	lwzu	%r0,4(%r4)
+	stwu	%r8,4(%r3)
+	mullw	%r8,%r0,%r6
+	adde	%r7,%r8,%r10
+	mulhwu	%r10,%r0,%r6
+	lwz     %r9,4(%r3)
+	addze   %r10,%r10
+	subf    %r8,%r7,%r9
+	addc    %r7,%r7,%r8		# invert cy (r7 is junk)
+	bdnz	Loop
+
+Lend:	stw	%r8,4(%r3)
+	addze	%r3,%r10
+	blr