about summary refs log tree commit diff
path: root/sysdeps/powerpc
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/powerpc')
-rw-r--r--sysdeps/powerpc/powerpc64/Dist4
-rw-r--r--sysdeps/powerpc/powerpc64/Implies1
-rw-r--r--sysdeps/powerpc/powerpc64/Makefile23
-rw-r--r--sysdeps/powerpc/powerpc64/__longjmp.S74
-rw-r--r--sysdeps/powerpc/powerpc64/atomicity.h132
-rw-r--r--sysdeps/powerpc/powerpc64/backtrace.c69
-rw-r--r--sysdeps/powerpc/powerpc64/bp-asm.h115
-rw-r--r--sysdeps/powerpc/powerpc64/bsd-_setjmp.S21
-rw-r--r--sysdeps/powerpc/powerpc64/bsd-setjmp.S21
-rw-r--r--sysdeps/powerpc/powerpc64/dl-dtprocnum.h22
-rw-r--r--sysdeps/powerpc/powerpc64/dl-lookupcfg.h22
-rw-r--r--sysdeps/powerpc/powerpc64/dl-machine.c50
-rw-r--r--sysdeps/powerpc/powerpc64/dl-machine.h750
-rw-r--r--sysdeps/powerpc/powerpc64/elf/bzero.S21
-rw-r--r--sysdeps/powerpc/powerpc64/elf/start.S66
-rw-r--r--sysdeps/powerpc/powerpc64/fpu/s_copysign.S50
-rw-r--r--sysdeps/powerpc/powerpc64/fpu/s_copysignf.S1
-rw-r--r--sysdeps/powerpc/powerpc64/memset.S296
-rw-r--r--sysdeps/powerpc/powerpc64/ppc-mcount.S43
-rw-r--r--sysdeps/powerpc/powerpc64/register-dump.h125
-rw-r--r--sysdeps/powerpc/powerpc64/setjmp.S87
-rw-r--r--sysdeps/powerpc/powerpc64/stpcpy.S121
-rw-r--r--sysdeps/powerpc/powerpc64/strchr.S130
-rw-r--r--sysdeps/powerpc/powerpc64/strcmp.S133
-rw-r--r--sysdeps/powerpc/powerpc64/strcpy.S120
-rw-r--r--sysdeps/powerpc/powerpc64/strlen.S163
26 files changed, 2660 insertions, 0 deletions
diff --git a/sysdeps/powerpc/powerpc64/Dist b/sysdeps/powerpc/powerpc64/Dist
new file mode 100644
index 0000000000..bbadfef3b8
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/Dist
@@ -0,0 +1,4 @@
+dl-machine.c
+ppc-mcount.S
+elf/entry.h
+bp-asm.h
diff --git a/sysdeps/powerpc/powerpc64/Implies b/sysdeps/powerpc/powerpc64/Implies
new file mode 100644
index 0000000000..a8cae95f9d
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/Implies
@@ -0,0 +1 @@
+wordsize-64
diff --git a/sysdeps/powerpc/powerpc64/Makefile b/sysdeps/powerpc/powerpc64/Makefile
new file mode 100644
index 0000000000..ff6819a63f
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/Makefile
@@ -0,0 +1,23 @@
+# Powerpc64 specific build options.
+# this is ./sysdeps/powerpc/powerpc64/Makefile
+
++cflags += -Wa,-mppc64 -mpowerpc64
+asm-CPPFLAGS += -Wa,-mppc64
+
+# On PPC64, -fpic is the default so we don't need to specify it. 
+# Also early compilers would issue a warning if -fpic was specified.
+# Each TOC entry takes 8 bytes and the TOC holds up to 2^16 bytes, 
+# or 8192 entries.  If the TOC fills up try -minimal-toc.
+
+ifeq ($(subdir),csu)
+ifneq ($(elf),no)
+# The initfini generation code doesn't work in the presence of -g1 or 
+# higher, so we use -g0.
+CFLAGS-initfini.s = -g0 -O1
+endif
+endif
+
+ifeq ($(subdir),elf)
+# help gcc inline asm code from dl-machine.h
++cflags += --param max-inline-insns=2000
+endif
diff --git a/sysdeps/powerpc/powerpc64/__longjmp.S b/sysdeps/powerpc/powerpc64/__longjmp.S
new file mode 100644
index 0000000000..e4152126b1
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/__longjmp.S
@@ -0,0 +1,74 @@
+/* longjmp for PowerPC64.
+   Copyright (C) 1995, 1996, 1997, 1999, 2000, 2001, 2002 
+   Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+#define _ASM
+#define _SETJMP_H
+#include <bits/setjmp.h>
+#include <bp-sym.h>
+#include <bp-asm.h>
+
+ENTRY (BP_SYM (__longjmp))
+	CHECK_BOUNDS_BOTH_WIDE_LIT (r3, r8, r9, JB_SIZE)
+	ld r1,(JB_GPR1*8)(r3)
+	ld r2,(JB_GPR2*8)(r3)
+	ld r0,(JB_LR*8)(r3)
+	ld r14,((JB_GPRS+0)*8)(r3)
+	lfd fp14,((JB_FPRS+0)*8)(r3)
+	ld r15,((JB_GPRS+1)*8)(r3)
+	lfd fp15,((JB_FPRS+1)*8)(r3)
+	ld r16,((JB_GPRS+2)*8)(r3)
+	lfd fp16,((JB_FPRS+2)*8)(r3)
+	ld r17,((JB_GPRS+3)*8)(r3)
+	lfd fp17,((JB_FPRS+3)*8)(r3)
+	ld r18,((JB_GPRS+4)*8)(r3)
+	lfd fp18,((JB_FPRS+4)*8)(r3)
+	ld r19,((JB_GPRS+5)*8)(r3)
+	lfd fp19,((JB_FPRS+5)*8)(r3)
+	ld r20,((JB_GPRS+6)*8)(r3)
+	lfd fp20,((JB_FPRS+6)*8)(r3)
+	mtlr r0
+	ld r21,((JB_GPRS+7)*8)(r3)
+	lfd fp21,((JB_FPRS+7)*8)(r3)
+	ld r22,((JB_GPRS+8)*8)(r3)
+	lfd fp22,((JB_FPRS+8)*8)(r3)
+	ld r0,(JB_CR*8)(r3)
+	ld r23,((JB_GPRS+9)*8)(r3)
+	lfd fp23,((JB_FPRS+9)*8)(r3)
+	ld r24,((JB_GPRS+10)*8)(r3)
+	lfd fp24,((JB_FPRS+10)*8)(r3)
+	ld r25,((JB_GPRS+11)*8)(r3)
+	lfd fp25,((JB_FPRS+11)*8)(r3)
+	mtcrf 0xFF,r0
+	ld r26,((JB_GPRS+12)*8)(r3)
+	lfd fp26,((JB_FPRS+12)*8)(r3)
+	ld r27,((JB_GPRS+13)*8)(r3)
+	lfd fp27,((JB_FPRS+13)*8)(r3)
+	ld r28,((JB_GPRS+14)*8)(r3)
+	lfd fp28,((JB_FPRS+14)*8)(r3)
+	ld r29,((JB_GPRS+15)*8)(r3)
+	lfd fp29,((JB_FPRS+15)*8)(r3)
+	ld r30,((JB_GPRS+16)*8)(r3)
+	lfd fp30,((JB_FPRS+16)*8)(r3)
+	ld r31,((JB_GPRS+17)*8)(r3)
+	lfd fp31,((JB_FPRS+17)*8)(r3)
+	mr r3,r4
+	blr
+END (BP_SYM (__longjmp))
diff --git a/sysdeps/powerpc/powerpc64/atomicity.h b/sysdeps/powerpc/powerpc64/atomicity.h
new file mode 100644
index 0000000000..b9d0de0ac5
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/atomicity.h
@@ -0,0 +1,132 @@
+/* Low-level functions for atomic operations.  PowerPC64 version.
+   Copyright (C) 1997, 1998, 1999, 2000, 2002 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If not,
+   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+#ifndef _ATOMICITY_H
+#define _ATOMICITY_H	1
+
+#include <inttypes.h>
+
+#if BROKEN_PPC_ASM_CR0
+# define __ATOMICITY_INLINE /* nothing */
+#else
+# define __ATOMICITY_INLINE inline
+#endif
+
+static __ATOMICITY_INLINE int
+__attribute__ ((unused))
+exchange_and_add (volatile uint32_t *mem, int val)
+{
+  int tmp, result;
+  __asm__ ("\n\
+0:	lwarx	%0,0,%2	\n\
+	add%I3	%1,%0,%3	\n\
+	stwcx.	%1,0,%2	\n\
+	bne-	0b	\n\
+" : "=&b"(result), "=&r"(tmp) : "r" (mem), "Ir"(val) : "cr0", "memory");
+  return result;
+}
+
+static __ATOMICITY_INLINE void
+__attribute__ ((unused))
+atomic_add_long (volatile long *mem, int val)
+{
+  int tmp;
+  __asm__ ("\n\
+0:	ldarx	%0,0,%1	\n\
+	add%I2	%0,%0,%2	\n\
+	stdcx.	%0,0,%1	\n\
+	bne-	0b	\n\
+" : "=&b"(tmp) : "r" (mem), "Ir"(val) : "cr0", "memory");
+}
+
+
+static __ATOMICITY_INLINE int
+__attribute__ ((unused))
+exchange_and_add_long (volatile long *mem, int val)
+{
+  int tmp, result;
+  __asm__ ("\n\
+0:	ldarx	%0,0,%2	\n\
+	add%I3	%1,%0,%3	\n\
+	stdcx.	%1,0,%2	\n\
+	bne-	0b	\n\
+" : "=&b"(result), "=&r"(tmp) : "r" (mem), "Ir"(val) : "cr0", "memory");
+  return result;
+}
+
+static __ATOMICITY_INLINE void
+__attribute__ ((unused))
+atomic_add (volatile uint32_t *mem, int val)
+{
+  int tmp;
+  __asm__ ("\n\
+0:	lwarx	%0,0,%1	\n\
+	add%I2	%0,%0,%2	\n\
+	stwcx.	%0,0,%1	\n\
+	bne-	0b	\n\
+" : "=&b"(tmp) : "r" (mem), "Ir"(val) : "cr0", "memory");
+}
+
+static __ATOMICITY_INLINE int
+__attribute__ ((unused))
+compare_and_swap (volatile long int *p, long int oldval, long int newval)
+{
+  int result;
+  __asm__ ("\n\
+0:	ldarx	%0,0,%1	\n\
+	sub%I2c.	%0,%0,%2	\n\
+	cntlzw	%0,%0	\n\
+	bne-	1f	\n\
+	stdcx.	%3,0,%1	\n\
+	bne-	0b	\n\
+1:	\n\
+" : "=&b"(result) : "r"(p), "Ir"(oldval), "r"(newval) : "cr0", "memory");
+  return result >> 5;
+}
+
+static __ATOMICITY_INLINE long int
+__attribute__ ((unused))
+always_swap (volatile long int *p, long int newval)
+{
+  long int result;
+  __asm__ ("\n\
+0:	ldarx	%0,0,%1	\n\
+	stdcx.	%2,0,%1	\n\
+	bne-	0b	\n\
+" : "=&r"(result) : "r"(p), "r"(newval) : "cr0", "memory");
+  return result;
+}
+
+static __ATOMICITY_INLINE int
+__attribute__ ((unused))
+test_and_set (volatile long int *p, long int newval)
+{
+  int result;
+  __asm__ ("\n\
+0:	ldarx	%0,0,%1	\n\
+	cmpdi	%0,0	\n\
+	bne-	1f	\n\
+	stdcx.	%2,0,%1	\n\
+	bne-	0b	\n\
+1:	\n\
+" : "=&r"(result) : "r"(p), "r"(newval) : "cr0", "memory");
+  return result;
+}
+
+#endif /* atomicity.h */
diff --git a/sysdeps/powerpc/powerpc64/backtrace.c b/sysdeps/powerpc/powerpc64/backtrace.c
new file mode 100644
index 0000000000..8669b6a689
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/backtrace.c
@@ -0,0 +1,69 @@
+/* Return backtrace of current program state.
+   Copyright (C) 1998, 2000, 2002 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If not,
+   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+#include <execinfo.h>
+#include <stddef.h>
+#include <bp-checks.h>
+
+/* This is the stack layout we see with every stack frame.
+   Note that every routine is required by the ABI to lay out the stack
+   like this.
+
+            +----------------+        +-----------------+
+    %r1  -> | %r1 last frame--------> | %r1 last frame--->...  --> NULL
+            |                |        |                 |
+            | cr save        |        | cr save	  |
+            |                |        |                 |
+            | (unused)       |        | return address  |
+            +----------------+        +-----------------+
+*/
+struct layout
+{
+  struct layout *__unbounded next;
+  long condition_register;
+  void *__unbounded return_address;
+};
+
+int
+__backtrace (void **array, int size)
+{
+  struct layout *current;
+  int count;
+
+  /* Force gcc to spill LR.  */
+  asm volatile ("" : "=l"(current));
+
+  /* Get the address on top-of-stack.  */
+  asm volatile ("ld %0,0(1)" : "=r"(current));
+  current = BOUNDED_1 (current);
+
+  for (				count = 0;
+       current != NULL && 	count < size;
+       current = BOUNDED_1 (current->next), count++)
+    array[count] = current->return_address;
+
+  /* It's possible the second-last stack frame can't return
+     (that is, it's __libc_start_main), in which case
+     the CRT startup code will have set its LR to 'NULL'.  */
+  if (count > 0 && array[count-1] == NULL)
+    count--;
+
+  return count;
+}
+weak_alias (__backtrace, backtrace)
diff --git a/sysdeps/powerpc/powerpc64/bp-asm.h b/sysdeps/powerpc/powerpc64/bp-asm.h
new file mode 100644
index 0000000000..ee99e3044b
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/bp-asm.h
@@ -0,0 +1,115 @@
+/* Bounded-pointer definitions for PowerPC64 assembler.
+   Copyright (C) 2000, 2002 Free Software Foundation, Inc.
+   Contributed by Greg McGary <greg@mcgary.org>
+
+   This file is part of the GNU C Library.  Its master source is NOT part of
+   the C library, however.  The master source lives in the GNU MP Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If not,
+   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+#if __BOUNDED_POINTERS__
+
+/* Byte offsets of BP components.  */
+# define oVALUE	0
+# define oLOW	4
+# define oHIGH	8
+
+/* Don't check bounds, just convert the BP register to its simple
+   pointer value.  */
+
+# define DISCARD_BOUNDS(rBP)			\
+	ld	rBP, oVALUE(rBP)
+
+/* Check low bound, with the side effect that the BP register is converted
+   its simple pointer value.  Move the high bound into a register for
+   later use.  */
+
+# define CHECK_BOUNDS_LOW(rBP, rLOW, rHIGH)	\
+	ld	rHIGH, oHIGH(rBP);		\
+	ld	rLOW, oLOW(rBP);		\
+	ld	rBP, oVALUE(rBP);		\
+	tdllt	rBP, rLOW
+
+/* Check the high bound, which is in a register, using the given
+   conditional trap instruction.  */
+
+# define CHECK_BOUNDS_HIGH(rVALUE, rHIGH, TWLcc) \
+	TWLcc	rVALUE, rHIGH
+
+/* Check the high bound, which is stored at the return-value's high
+   bound slot, using the given conditional trap instruction.  */
+
+# define CHECK_BOUNDS_HIGH_RTN(rVALUE, rHIGH, TWLcc)	\
+	ld	rHIGH, oHIGH(rRTN);			\
+	TWLcc	rVALUE, rHIGH
+
+/* Check both bounds, with the side effect that the BP register is
+   converted to its simple pointer value.  */
+
+# define CHECK_BOUNDS_BOTH(rBP, rLOW, rHIGH)	\
+	CHECK_BOUNDS_LOW(rBP, rLOW, rHIGH);	\
+	tdlge	rBP, rHIGH
+
+/* Check bounds on a memory region of given length, with the side
+   effect that the BP register is converted to its simple pointer
+   value.  */
+
+# define CHECK_BOUNDS_BOTH_WIDE(rBP, rLOW, rHIGH, rLENGTH)	\
+	CHECK_BOUNDS_LOW (rBP, rLOW, rHIGH);			\
+	sub	rHIGH, rHIGH, rLENGTH;				\
+	tdlgt	rBP, rHIGH
+
+# define CHECK_BOUNDS_BOTH_WIDE_LIT(rBP, rLOW, rHIGH, LENGTH)	\
+	CHECK_BOUNDS_LOW (rBP, rLOW, rHIGH);			\
+	subi	rHIGH, rHIGH, LENGTH;				\
+	tdlgt	rBP, rHIGH
+
+/* Store a pointer value register into the return-value's pointer
+   value slot.  */
+
+# define STORE_RETURN_VALUE(rVALUE)		\
+	std	rVALUE, oVALUE(rRTN)
+
+/* Store a low and high bounds into the return-value's pointer bounds
+   slots.  */
+
+# define STORE_RETURN_BOUNDS(rLOW, rHIGH)	\
+	std	rLOW, oLOW(rRTN);		\
+	std	rHIGH, oHIGH(rRTN)
+
+/* Stuff zero value/low/high into the BP addressed by rRTN.  */
+
+# define RETURN_NULL_BOUNDED_POINTER		\
+	li	r4, 0;				\
+	STORE_RETURN_VALUE (r4);		\
+	STORE_RETURN_BOUNDS (r4, r4)
+
+#else
+
+# define DISCARD_BOUNDS(rBP)
+# define CHECK_BOUNDS_LOW(rBP, rLOW, rHIGH)
+# define CHECK_BOUNDS_HIGH(rVALUE, rHIGH, TWLcc)
+# define CHECK_BOUNDS_HIGH_RTN(rVALUE, rHIGH, TWLcc)
+# define CHECK_BOUNDS_BOTH(rBP, rLOW, rHIGH)
+# define CHECK_BOUNDS_BOTH_WIDE(rBP, rLOW, rHIGH, rLENGTH)
+# define CHECK_BOUNDS_BOTH_WIDE_LIT(rBP, rLOW, rHIGH, LENGTH)
+# define STORE_RETURN_VALUE(rVALUE)
+# define STORE_RETURN_BOUNDS(rLOW, rHIGH)
+
+# define RETURN_NULL_BOUNDED_POINTER li rRTN, 0
+
+#endif
+
diff --git a/sysdeps/powerpc/powerpc64/bsd-_setjmp.S b/sysdeps/powerpc/powerpc64/bsd-_setjmp.S
new file mode 100644
index 0000000000..994f82f851
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/bsd-_setjmp.S
@@ -0,0 +1,21 @@
+/* BSD `_setjmp' entry point to `sigsetjmp (..., 0)'.  PowerPC64 version.
+   Copyright (C) 2002 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+/* This code was moved into setjmp.S to solve a double stub call problem. 
+   @local would have worked but it is not supported in PowerPC64 asm.   */
diff --git a/sysdeps/powerpc/powerpc64/bsd-setjmp.S b/sysdeps/powerpc/powerpc64/bsd-setjmp.S
new file mode 100644
index 0000000000..b6bb8f6e08
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/bsd-setjmp.S
@@ -0,0 +1,21 @@
+/* BSD `setjmp' entry point to `sigsetjmp (..., 1)'.  PowerPC version.
+   Copyright (C) 2002 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+/* This code was moved into setjmp.S to solve a double stub call problem. 
+   @local would have worked but it is not supported in PowerPC64 asm.  */
diff --git a/sysdeps/powerpc/powerpc64/dl-dtprocnum.h b/sysdeps/powerpc/powerpc64/dl-dtprocnum.h
new file mode 100644
index 0000000000..477cb44339
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/dl-dtprocnum.h
@@ -0,0 +1,22 @@
+/* Configuration of lookup functions.  PowerPC64 version.
+   Copyright (C) 2002 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+/* Number of extra dynamic section entries for this architecture.  By
+   default there are none.  */
+#define DT_THISPROCNUM	DT_PPC64_NUM
diff --git a/sysdeps/powerpc/powerpc64/dl-lookupcfg.h b/sysdeps/powerpc/powerpc64/dl-lookupcfg.h
new file mode 100644
index 0000000000..e502941015
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/dl-lookupcfg.h
@@ -0,0 +1,22 @@
+/* Configuration of lookup functions.  PowerPC64 version.
+   Copyright (C) 2002 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+/* Return the symbol map from the symbol lookup function.  */
+
+#define DL_LOOKUP_RETURNS_MAP 1
diff --git a/sysdeps/powerpc/powerpc64/dl-machine.c b/sysdeps/powerpc/powerpc64/dl-machine.c
new file mode 100644
index 0000000000..ef7b3400f3
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/dl-machine.c
@@ -0,0 +1,50 @@
+/* Machine-dependent ELF dynamic relocation functions.  PowerPC64 version.
+   Copyright (C) 1995,96,97,98,99,2000,01, 2002 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If not,
+   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+#include <string.h>
+#include <unistd.h>
+#include <ldsodefs.h>
+#include <stdio-common/_itoa.h>
+#include <dl-machine.h>
+
+void
+_dl_reloc_overflow (struct link_map *map,
+		   const char *name,
+		   Elf64_Addr *const reloc_addr,
+		   const Elf64_Sym *sym,
+		   const Elf64_Sym *refsym)
+{
+  char buffer[128];
+  char *t;
+  const Elf64_Sym *errsym = sym ?: refsym;
+  t = stpcpy (buffer, name);
+  t = stpcpy (t, " reloc at 0x");
+  _itoa_word ((unsigned long) reloc_addr, t, 16, 0);
+  if (errsym)
+    {
+      const char *strtab;
+
+      strtab = (const void *) D_PTR (map, l_info[DT_STRTAB]);
+      t = stpcpy (t, " for symbol `");
+      t = stpcpy (t, strtab + errsym->st_name);
+      t = stpcpy (t, "'");
+    }
+  t = stpcpy (t, " out of range");
+  _dl_signal_error (0, map->l_name, NULL, buffer);
+}
diff --git a/sysdeps/powerpc/powerpc64/dl-machine.h b/sysdeps/powerpc/powerpc64/dl-machine.h
new file mode 100644
index 0000000000..408161649b
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/dl-machine.h
@@ -0,0 +1,750 @@
+/* Machine-dependent ELF dynamic relocation inline functions.  
+   PowerPC64 version.
+   Copyright 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002
+   Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If not,
+   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+#ifndef dl_machine_h
+#define dl_machine_h
+
+#define ELF_MACHINE_NAME "powerpc64"
+
+#include <assert.h>
+#include <sys/param.h>
+
+/* Translate a processor specific dynamic tag to the index
+   in l_info array.  */
+#define DT_PPC64(x) (DT_PPC64_##x - DT_LOPROC + DT_NUM)
+
+/* A PowerPC64 function descriptor.  The .plt (procedure linkage
+   table) and .opd (official procedure descriptor) sections are
+   arrays of these.  */
+typedef struct
+{
+  Elf64_Addr fd_func;
+  Elf64_Addr fd_toc;
+  Elf64_Addr fd_aux;
+} Elf64_FuncDesc;
+
+#define ELF_MULT_MACHINES_SUPPORTED
+
+/* Return nonzero iff ELF header is compatible with the running host.  */
+static inline int
+elf_machine_matches_host (const Elf64_Ehdr *ehdr)
+{
+  return ehdr->e_machine == EM_PPC64;
+}
+
+/* Return nonzero iff ELF header is compatible with the running host,
+   but not this loader.  */
+static inline int
+elf_host_tolerates_machine (const Elf64_Ehdr *ehdr)
+{
+  return ehdr->e_machine == EM_PPC;
+}
+
+/* Return nonzero iff ELF header is compatible with the running host,
+   but not this loader.  */
+static inline int
+elf_host_tolerates_class (const Elf64_Ehdr *ehdr)
+{
+  return ehdr->e_ident[EI_CLASS] == ELFCLASS32;
+}
+
+
+/* Return the run-time load address of the shared object, assuming it
+   was originally linked at zero.  */
+static inline Elf64_Addr
+elf_machine_load_address (void) __attribute__ ((const));
+
+static inline Elf64_Addr
+elf_machine_load_address (void)
+{
+  Elf64_Addr ret;
+
+  /* The first entry in .got (and thus the first entry in .toc) is the
+     link-time TOC_base, ie. r2.  So the difference between that and
+     the current r2 set by the kernel is how far the shared lib has
+     moved.  */
+  asm (	"	ld	%0,-32768(2)\n"
+	"	subf	%0,%0,2\n"
+	: "=r"	(ret));
+  return ret;
+}
+
+/* Return the link-time address of _DYNAMIC.  */
+static inline Elf64_Addr
+elf_machine_dynamic (void)
+{
+  Elf64_Addr runtime_dynamic;
+  /* It's easier to get the run-time address.  */
+  asm (	"	addis	%0,2,_DYNAMIC@toc@ha\n"
+	"	addi	%0,%0,_DYNAMIC@toc@l\n"
+	: "=b"	(runtime_dynamic));
+  /* Then subtract off the load address offset.  */
+  return runtime_dynamic - elf_machine_load_address() ;
+}
+
+#define ELF_MACHINE_BEFORE_RTLD_RELOC(dynamic_info) /* nothing */
+
+/* The PLT uses Elf64_Rela relocs.  */
+#define elf_machine_relplt elf_machine_rela
+
+/* This code gets called via a .glink stub which loads PLT0.  It is
+   used in dl-runtime.c to call the `fixup' function and then redirect
+   to the address `fixup' returns.
+
+   Enter with r0 = plt reloc index,
+   r2 = ld.so tocbase,
+   r11 = ld.so link map.  */
+
+#define TRAMPOLINE_TEMPLATE(tramp_name, fixup_name) \
+  asm (".section \".text\"\n"						\
+"	.align	2\n"							\
+"	.globl	." #tramp_name "\n"					\
+"	.type	." #tramp_name ",@function\n"				\
+"	.section \".opd\",\"aw\"\n"					\
+"	.align	3\n"							\
+"	.globl	" #tramp_name "\n"					\
+"	.size	" #tramp_name ",24\n"					\
+#tramp_name ":\n"							\
+"	.quad	." #tramp_name ",.TOC.@tocbase,0\n"			\
+"	.previous\n"							\
+"." #tramp_name ":\n"							\
+/* We need to save the registers used to pass parameters, ie. r3 thru	\
+   r10; the registers are saved in a stack frame.  */			\
+"	stdu	1,-128(1)\n"						\
+"	std	3,48(1)\n"						\
+"	mr	3,11\n"							\
+"	std	4,56(1)\n"						\
+"	sldi	4,0,1\n"						\
+"	std	5,64(1)\n"						\
+"	add	4,4,0\n"						\
+"	std	6,72(1)\n"						\
+"	sldi	4,4,3\n"						\
+"	std	7,80(1)\n"						\
+"	mflr	0\n"							\
+"	std	8,88(1)\n"						\
+/* Store the LR in the LR Save area of the previous frame.  */    \
+"	std	0,128+16(1)\n"						\
+"	mfcr	0\n"							\
+"	std	9,96(1)\n"						\
+"	std	10,104(1)\n"						\
+/* I'm almost certain we don't have to save cr...  be safe.  */    \
+"	std	0,8(1)\n"						\
+"	bl	." #fixup_name "\n"					\
+/* Put the registers back.  */						\
+"	ld	0,128+16(1)\n"						\
+"	ld	10,104(1)\n"						\
+"	ld	9,96(1)\n"						\
+"	ld	8,88(1)\n"						\
+"	ld	7,80(1)\n"						\
+"	mtlr	0\n"							\
+"	ld	0,8(1)\n"						\
+"	ld	6,72(1)\n"						\
+"	ld	5,64(1)\n"						\
+"	ld	4,56(1)\n"						\
+"	mtcrf	0xFF,0\n"						\
+/* Load the target address, toc and static chain reg from the function  \
+   descriptor returned by fixup.  */					\
+"	ld	0,0(3)\n"						\
+"	ld	2,8(3)\n"						\
+"	mtctr	0\n"							\
+"	ld	11,16(3)\n"						\
+"	ld	3,48(1)\n"						\
+/* Unwind the stack frame, and jump.  */				\
+"	addi	1,1,128\n"						\
+"	bctr\n"								\
+".LT_" #tramp_name ":\n"						\
+"	.long 0\n"							\
+"	.byte 0x00,0x0c,0x24,0x40,0x00,0x00,0x00,0x00\n"		\
+"	.long .LT_" #tramp_name "-."#tramp_name "\n"			\
+"	.short .LT_" #tramp_name "_name_end-.LT_" #tramp_name "_name_start\n" \
+".LT_" #tramp_name "_name_start:\n"					\
+"	.ascii \"" #tramp_name "\"\n"					\
+".LT_" #tramp_name "_name_end:\n"					\
+"	.align 2\n"							\
+"	.size	." #tramp_name ",. - ." #tramp_name "\n"		\
+"	.previous");
+
+#ifndef PROF
+#define ELF_MACHINE_RUNTIME_TRAMPOLINE			\
+  TRAMPOLINE_TEMPLATE (_dl_runtime_resolve, fixup);	\
+  TRAMPOLINE_TEMPLATE (_dl_profile_resolve, profile_fixup);
+#else
+#define ELF_MACHINE_RUNTIME_TRAMPOLINE			\
+  TRAMPOLINE_TEMPLATE (_dl_runtime_resolve, fixup);	\
+  void _dl_runtime_resolve (void);			\
+  strong_alias (_dl_runtime_resolve, _dl_profile_resolve);
+#endif
+
+
+/* Initial entry point code for the dynamic linker.  The C function
+   `_dl_start' is the real entry point; its return value is the user
+   program's entry point.  */
+#define RTLD_START \
+  asm (".section \".text\"\n"						\
+"	.align	2\n"							\
+"	.globl	._start\n"						\
+"	.type	._start,@function\n"					\
+"	.section \".opd\",\"aw\"\n"					\
+"	.align	3\n"							\
+"	.globl	_start\n"						\
+"	.size	_start,24\n"						\
+"_start:\n"								\
+"	.quad	._start,.TOC.@tocbase,0\n"				\
+"	.previous\n"							\
+"._start:\n"								\
+/* We start with the following on the stack, from top:			\
+   argc (4 bytes);							\
+   arguments for program (terminated by NULL);				\
+   environment variables (terminated by NULL);				\
+   arguments for the program loader.  */				\
+"	mr	3,1\n"							\
+"	li	4,0\n"							\
+"	stdu	4,-128(1)\n"						\
+/* Call _dl_start with one parameter pointing at argc.  */		\
+"	bl	._dl_start\n"						\
+"	nop\n"								\
+/* Transfer control to _dl_start_user!  */				\
+"	b	._dl_start_user\n"					\
+".LT__start:\n"  \
+"	.long 0\n"      \
+"	.byte 0x00,0x0c,0x24,0x40,0x00,0x00,0x00,0x00\n"		\
+"	.long .LT__start-._start\n"					\
+"	.short .LT__start_name_end-.LT__start_name_start\n"		\
+".LT__start_name_start:\n"						\
+"	.ascii \"_start\"\n"						\
+".LT__start_name_end:\n"						\
+"	.align 2\n"							\
+"	.size	._start,.-._start\n"					\
+"	.globl	_dl_start_user\n"					\
+"	.section \".opd\",\"aw\"\n"					\
+"_dl_start_user:\n"							\
+"	.quad	._dl_start_user, .TOC.@tocbase, 0\n"			\
+"	.previous\n"							\
+"	.globl	._dl_start_user\n"					\
+"	.type	._dl_start_user,@function\n"				\
+/* Now, we do our main work of calling initialisation procedures.  \
+   The ELF ABI doesn't say anything about parameters for these,  \
+   so we just pass argc, argv, and the environment.  \
+   Changing these is strongly discouraged (not least because argc is  \
+   passed by value!).  */  \
+"._dl_start_user:\n"  \
+/* the address of _start in r30.  */					\
+"	mr	30,3\n"							\
+/* &_dl_argc in 29, &_dl_argv in 27, and _dl_loaded in 28.  */		\
+"	ld	28,_rtld_global@got(2)\n"    \
+"	ld	29,_dl_argc@got(2)\n"					\
+"	ld	27,_dl_argv@got(2)\n"					\
+/* _dl_init (_dl_loaded, _dl_argc, _dl_argv, _dl_argv+_dl_argc+1).  */	\
+"	ld	3,0(28)\n"						\
+"	lwa	4,0(29)\n"						\
+"	ld	5,0(27)\n"						\
+"	sldi	6,4,3\n"						\
+"	add	6,5,6\n"						\
+"	addi	6,6,8\n"						\
+"	bl	._dl_init\n"						\
+"	nop\n"								\
+/* Now, to conform to the ELF ABI, we have to:				\
+   Pass argc (actually _dl_argc) in r3;  */				\
+"	lwa	3,0(29)\n"						\
+/* Pass argv (actually _dl_argv) in r4;  */				\
+"	ld	4,0(27)\n"						\
+/* Pass argv+argc+1 in r5;  */						\
+"	sldi	5,3,3\n"						\
+"	add	6,4,5\n"						\
+"	addi	5,6,8\n"						\
+/* Pass the auxilary vector in r6. This is passed to us just after	\
+   _envp.  */								\
+"2:	ldu	0,8(6)\n"						\
+"	cmpdi	0,0\n"							\
+"	bne	2b\n"							\
+"	addi	6,6,8\n"						\
+/* Pass a termination function pointer (in this case _dl_fini) in	\
+   r7.  */								\
+"	ld	7,_dl_fini@got(2)\n"					\
+"	ld 	26,_dl_starting_up@got(2)\n"				\
+/* Pass the stack pointer in r1 (so far so good), pointing to a NULL	\
+   value.  This lets our startup code distinguish between a program	\
+   linked statically, which linux will call with argc on top of the	\
+   stack which will hopefully never be zero, and a dynamically linked	\
+   program which will always have a NULL on the top of the stack.	\
+   Take the opportunity to clear LR, so anyone who accidentally  \
+   returns from _start gets SEGV.  Also clear the next few words of	\
+   the stack.  */							\
+"	li	31,0\n"							\
+"	std	31,0(1)\n"						\
+"	mtlr	31\n"							\
+"	std	31,8(1)\n"						\
+"	std	31,16(1)\n"						\
+"	std	31,24(1)\n"						\
+/* Clear _dl_starting_up.  */						\
+"	stw	31,0(26)\n"						\
+/* Now, call the start function descriptor at r30...  */		\
+"	.globl	._dl_main_dispatch\n"  \
+"._dl_main_dispatch:\n"  \
+"	ld	0,0(30)\n"						\
+"	ld	2,8(30)\n"						\
+"	mtctr	0\n"							\
+"	ld	11,16(30)\n"						\
+"	bctr\n"								\
+".LT__dl_start_user:\n"	\
+"	.long 0\n"							\
+"	.byte 0x00,0x0c,0x24,0x40,0x00,0x00,0x00,0x00\n"		\
+"	.long .LT__dl_start_user-._dl_start_user\n"			\
+"	.short .LT__dl_start_user_name_end-.LT__dl_start_user_name_start\n" \
+".LT__dl_start_user_name_start:\n"					\
+"	.ascii \"_dl_start_user\"\n"					\
+".LT__dl_start_user_name_end:\n"					\
+"	.align 2\n"							\
+"	.size	._dl_start_user,.-._dl_start_user\n"			\
+"	.previous");
+
+/* Nonzero iff TYPE should not be allowed to resolve to one of
+   the main executable's symbols, as for a COPY reloc.  */
+#define elf_machine_lookup_noexec_p(type) ((type) == R_PPC64_COPY)
+
+/* Nonzero iff TYPE describes relocation of a PLT entry, so
+   PLT entries should not be allowed to define the value.  */
+#define elf_machine_lookup_noplt_p(type) ((type) == R_PPC64_JMP_SLOT)
+
+/* ELF_RTYPE_CLASS_PLT iff TYPE describes relocation of a PLT entry, so
+   PLT entries should not be allowed to define the value.
+   ELF_RTYPE_CLASS_NOCOPY iff TYPE should not be allowed to resolve to one
+   of the main executable's symbols, as for a COPY reloc.  */
+#define elf_machine_type_class(type) \
+  ((((type) == R_PPC64_ADDR24) * ELF_RTYPE_CLASS_PLT)	\
+   | (((type) == R_PPC64_COPY) * ELF_RTYPE_CLASS_COPY))
+
+/* A reloc type used for ld.so cmdline arg lookups to reject PLT entries.  */
+#define ELF_MACHINE_JMP_SLOT	R_PPC64_JMP_SLOT
+
+/* The PowerPC never uses REL relocations.  */
+#define ELF_MACHINE_NO_REL 1
+
+/* Stuff for the PLT.  */
+#define PLT_INITIAL_ENTRY_WORDS 3
+#define GLINK_INITIAL_ENTRY_WORDS 8
+
+#define PPC_DCBST(where) asm volatile ("dcbst 0,%0" : : "r"(where) : "memory")
+#define PPC_SYNC asm volatile ("sync" : : : "memory")
+#define PPC_ISYNC asm volatile ("sync; isync" : : : "memory")
+#define PPC_ICBI(where) asm volatile ("icbi 0,%0" : : "r"(where) : "memory")
+#define PPC_DIE asm volatile ("tweq 0,0")
+/* Use this when you've modified some code, but it won't be in the
+   instruction fetch queue (or when it doesn't matter if it is). */
+#define MODIFIED_CODE_NOQUEUE(where) \
+     do { PPC_DCBST(where); PPC_SYNC; PPC_ICBI(where); } while (0)
+/* Use this when it might be in the instruction queue. */
+#define MODIFIED_CODE(where) \
+     do { PPC_DCBST(where); PPC_SYNC; PPC_ICBI(where); PPC_ISYNC; } while (0)
+
+/* Set up the loaded object described by MAP so its unrelocated PLT
+   entries will jump to the on-demand fixup code in dl-runtime.c.  */
+static inline int
+elf_machine_runtime_setup (struct link_map *map, int lazy, int profile)
+{
+  if (map->l_info[DT_JMPREL])
+    {
+      Elf64_Word i;
+      Elf64_Word *glink = NULL;
+      Elf64_Xword *plt = (Elf64_Xword *) D_PTR (map, l_info[DT_PLTGOT]);
+      Elf64_Word num_plt_entries = (map->l_info[DT_PLTRELSZ]->d_un.d_val
+				    / sizeof (Elf64_Rela));
+      Elf64_Addr l_addr = map->l_addr;
+      Elf64_Dyn **info = map->l_info;
+      char *p;
+
+      extern void _dl_runtime_resolve (void);
+      extern void _dl_profile_resolve (void);
+
+      /* Relocate the DT_PPC64_GLINK entry in the _DYNAMIC section.
+	 elf_get_dynamic_info takes care of the standard entries but
+	 doesn't know exactly what to do with processor specific 
+	 entires.  */
+      if (info[DT_PPC64(GLINK)] != NULL)
+	info[DT_PPC64(GLINK)]->d_un.d_ptr += l_addr;
+
+      if (lazy)
+	{
+	  /* The function descriptor of the appropriate trampline
+	     routine is used to set the 1st and 2nd doubleword of the
+	     plt_reserve.  */
+	  Elf64_FuncDesc *resolve_fd;
+	  Elf64_Word glink_offset;
+	  /* the plt_reserve area is the 1st 3 doublewords of the PLT */
+	  Elf64_FuncDesc *plt_reserve = (Elf64_FuncDesc *) plt;
+	  Elf64_Word offset;
+
+	  resolve_fd = (Elf64_FuncDesc *) (profile ? _dl_profile_resolve
+					   : _dl_runtime_resolve);
+	  if (profile && _dl_name_match_p (GL(dl_profile), map))
+	    /* This is the object we are looking for.  Say that we really
+	       want profiling and the timers are started.  */
+	    GL(dl_profile_map) = map;
+
+
+	  /* We need to stuff the address/TOC of _dl_runtime_resolve
+	     into doublewords 0 and 1 of plt_reserve.  Then we need to
+	     stuff the map address into doubleword 2 of plt_reserve.
+	     This allows the GLINK0 code to transfer control to the
+	     correct trampoline which will transfer control to fixup
+	     in dl-machine.c.  */
+	  plt_reserve->fd_func = resolve_fd->fd_func;
+	  plt_reserve->fd_toc  = resolve_fd->fd_toc;
+	  plt_reserve->fd_aux  = (Elf64_Addr) map;
+#ifdef RTLD_BOOTSTRAP
+	  /* When we're bootstrapping, the opd entry will not have
+	     been relocated yet.  */
+	  plt_reserve->fd_func += l_addr;
+	  plt_reserve->fd_toc  += l_addr;
+#endif
+	  
+	  /* Set up the lazy PLT entries.  */
+	  glink = (Elf64_Word *) D_PTR (map, l_info[DT_PPC64(GLINK)]);
+	  offset = PLT_INITIAL_ENTRY_WORDS;
+	  glink_offset = GLINK_INITIAL_ENTRY_WORDS;
+	  for (i = 0; i < num_plt_entries; i++)
+	    {
+
+	      plt[offset] = (Elf64_Xword) &glink[glink_offset];
+	      offset += 3;
+	      /* The first 32k entries of glink can set an index and
+		 branch using two instructions;  Past that point,
+		 glink uses three instructions.  */
+	      if (i < 0x8000)
+          	glink_offset += 2;
+	      else
+          	glink_offset += 3;
+	    }
+
+	  /* Now, we've modified data.  We need to write the changes from
+	     the data cache to a second-level unified cache, then make
+	     sure that stale data in the instruction cache is removed.
+	     (In a multiprocessor system, the effect is more complex.)
+	     Most of the PLT shouldn't be in the instruction cache, but
+	     there may be a little overlap at the start and the end.
+
+	     Assumes that dcbst and icbi apply to lines of 16 bytes or
+	     more.  Current known line sizes are 16, 32, and 128 bytes.  */
+
+	  for (p = (char *) plt; p < (char *) &plt[offset]; p += 16)
+	    PPC_DCBST (p);
+	  PPC_SYNC;
+	}
+    }
+  return lazy;
+}
+
+static inline void
+elf_machine_lazy_rel (struct link_map *map,
+		      Elf64_Addr l_addr, const Elf64_Rela *reloc)
+{
+  /* elf_machine_runtime_setup handles this.  */
+}
+
+/* Change the PLT entry whose reloc is 'reloc' to call the actual
+   routine.  */
+static inline Elf64_Addr
+elf_machine_fixup_plt (struct link_map *map, lookup_t sym_map,
+		       const Elf64_Rela *reloc,
+		       Elf64_Addr *reloc_addr, Elf64_Addr finaladdr)
+{
+  Elf64_FuncDesc *plt = (Elf64_FuncDesc *) reloc_addr;
+  Elf64_FuncDesc *rel = (Elf64_FuncDesc *) finaladdr;
+  Elf64_Addr offset = 0;
+#ifndef RTLD_BOOTSTRAP
+  weak_extern (GL(dl_rtld_map));
+#endif
+
+  /* If sym_map is NULL, it's a weak undefined sym;  Leave the plt zero.  */
+  if (sym_map == NULL)
+    return 0;
+
+  /* If the opd entry is not yet relocated (because it's from a shared
+     object that hasn't been processed yet), then manually reloc it.  */
+  if (map != sym_map && !sym_map->l_relocated
+#ifndef RTLD_BOOTSTRAP
+      /* Bootstrap map doesn't have l_relocated set for it.  */
+      && sym_map != &GL(dl_rtld_map)
+#endif
+      )
+    offset = sym_map->l_addr;
+
+  /* For PPC64, fixup_plt copies the function descriptor from opd
+     over the corresponding PLT entry.
+     Initially, PLT Entry[i] is set up for lazy linking, or is zero.
+     For lazy linking, the fd_toc and fd_aux entries are irrelevant,
+     so for thread safety we write them before changing fd_func.  */
+
+  plt->fd_aux = rel->fd_aux + offset;
+  plt->fd_toc = rel->fd_toc + offset;
+  PPC_DCBST (&plt->fd_aux);
+  PPC_DCBST (&plt->fd_toc);
+  PPC_SYNC;
+
+  plt->fd_func = rel->fd_func + offset;
+  PPC_DCBST (&plt->fd_func);
+  PPC_SYNC;
+
+  return finaladdr;
+}
+
+/* Return the final value of a plt relocation.  */
+static inline Elf64_Addr
+elf_machine_plt_value (struct link_map *map, const Elf64_Rela *reloc,
+		       Elf64_Addr value)
+{
+  return value + reloc->r_addend;
+}
+ 
+#endif /* dl_machine_h */
+
+#ifdef RESOLVE_MAP
+
+#define PPC_LO(v) ((v) & 0xffff)
+#define PPC_HI(v) (((v) >> 16) & 0xffff)
+#define PPC_HA(v) PPC_HI ((v) + 0x8000)
+#define PPC_HIGHER(v) (((v) >> 32) & 0xffff)
+#define PPC_HIGHERA(v) PPC_HIGHER ((v) + 0x8000)
+#define PPC_HIGHEST(v) (((v) >> 48) & 0xffff)
+#define PPC_HIGHESTA(v) PPC_HIGHEST ((v) + 0x8000)
+#define BIT_INSERT(old, val, mask) ((old & ~(Elf64_Addr) mask) | (val & mask))
+
+#define dont_expect(X) __builtin_expect ((X), 0)
+
+extern void _dl_reloc_overflow (struct link_map *map,
+                                const char *name,
+                                Elf64_Addr *const reloc_addr,
+                                const Elf64_Sym *sym,
+                                const Elf64_Sym *refsym)
+                                attribute_hidden;
+
+static inline void
+elf_machine_rela_relative (Elf64_Addr l_addr, const Elf64_Rela *reloc,
+			   Elf64_Addr *const reloc_addr)
+{
+  *reloc_addr = l_addr + reloc->r_addend;
+}			       
+
+/* Perform the relocation specified by RELOC and SYM (which is fully
+   resolved).  MAP is the object containing the reloc.  */
+static inline void
+elf_machine_rela (struct link_map *map,
+		  const Elf64_Rela *reloc,
+		  const Elf64_Sym *sym,
+		  const struct r_found_version *version,
+		  Elf64_Addr *const reloc_addr)
+{
+  int r_type = ELF64_R_TYPE (reloc->r_info);
+  struct link_map *sym_map;
+  Elf64_Addr value;
+#ifndef RTLD_BOOTSTRAP
+  const Elf64_Sym *const refsym = sym;
+  /* This is defined in rtld.c, but nowhere in the static libc.a; make the
+     reference weak so static programs can still link.  This declaration
+     cannot be done when compiling rtld.c (i.e.  #ifdef RTLD_BOOTSTRAP)
+     because rtld.c contains the common defn for _dl_rtld_map, which is
+     incompatible with a weak decl in the same file.  */
+  weak_extern (GL(dl_rtld_map));
+#endif
+
+  if (r_type == R_PPC64_RELATIVE)
+    {
+#ifndef RTLD_BOOTSTRAP
+      /* Already done in dynamic linker.  */
+      if (map != &GL(dl_rtld_map))
+#endif
+	*reloc_addr = map->l_addr + reloc->r_addend;
+      return;
+    }
+
+  if (r_type == R_PPC64_NONE)
+    return;
+
+  sym_map = RESOLVE_MAP (&sym, version, r_type);
+  value = 0;
+  if (sym_map)
+    {
+      if (sym)
+	value = sym_map->l_addr + sym->st_value;
+      value += reloc->r_addend;
+    }
+
+  switch (r_type)
+    {
+    case R_PPC64_ADDR64:
+    case R_PPC64_GLOB_DAT:
+      *reloc_addr = value;
+      return;
+
+    case R_PPC64_JMP_SLOT:
+
+      elf_machine_fixup_plt (map, sym_map, reloc, reloc_addr, value);
+      return;
+
+#ifndef RTLD_BOOTSTRAP /* None of the following appear in ld.so */
+    case R_PPC64_ADDR16_LO_DS:
+      if (dont_expect ((value & 3) != 0))
+        _dl_reloc_overflow (map, "R_PPC64_ADDR16_LO_DS", 
+                            reloc_addr, sym, refsym);
+      *(Elf64_Half *) reloc_addr = BIT_INSERT (*(Elf64_Half *) reloc_addr,
+					       value, 0xfffc);
+      break;
+
+    case R_PPC64_ADDR16_LO:
+      *(Elf64_Half *) reloc_addr = PPC_LO (value);
+      break;
+
+    case R_PPC64_ADDR16_HI:
+      *(Elf64_Half *) reloc_addr = PPC_HI (value);
+      break;
+
+    case R_PPC64_ADDR16_HA:
+      *(Elf64_Half *) reloc_addr = PPC_HA (value);
+      break;
+
+    case R_PPC64_REL24:
+      {
+        Elf64_Addr delta = value - (Elf64_Xword) reloc_addr;
+        if (dont_expect ((delta + 0x2000000) >= 0x4000000 || (delta & 3) != 0))
+          _dl_reloc_overflow (map, "R_PPC64_REL24", reloc_addr, sym, refsym);
+        *(Elf64_Word *) reloc_addr = BIT_INSERT (*(Elf64_Word *) reloc_addr,
+                                                delta, 0x3fffffc);
+      }
+      break;
+
+    case R_PPC64_COPY:
+      if (dont_expect (sym == NULL))
+      /* This can happen in trace mode when an object could not be found. */
+        return;
+      if (dont_expect (sym->st_size > refsym->st_size
+        || (GL(dl_verbose) && sym->st_size < refsym->st_size)))
+	      {
+          const char *strtab;
+
+          strtab = (const void *) D_PTR (map, l_info[DT_STRTAB]);
+          _dl_error_printf ("%s: Symbol `%s' has different size" \
+                            " in shared object," \
+                            " consider re-linking\n",
+                            _dl_argv[0] ?: "<program name unknown>",
+                              strtab + refsym->st_name);
+	      }
+      memcpy (reloc_addr, (char *) value, MIN (sym->st_size, refsym->st_size));
+      return;
+
+    case R_PPC64_UADDR64:
+      /* We are big-endian.  */
+      ((char *) reloc_addr)[0] = (value >> 56) & 0xff;
+      ((char *) reloc_addr)[1] = (value >> 48) & 0xff;
+      ((char *) reloc_addr)[2] = (value >> 40) & 0xff;
+      ((char *) reloc_addr)[3] = (value >> 32) & 0xff;
+      ((char *) reloc_addr)[4] = (value >> 24) & 0xff;
+      ((char *) reloc_addr)[5] = (value >> 16) & 0xff;
+      ((char *) reloc_addr)[6] = (value >> 8) & 0xff;
+      ((char *) reloc_addr)[7] = (value >> 0) & 0xff;
+      return;
+
+    case R_PPC64_UADDR32:
+      /* We are big-endian.  */
+      ((char *) reloc_addr)[0] = (value >> 24) & 0xff;
+      ((char *) reloc_addr)[1] = (value >> 16) & 0xff;
+      ((char *) reloc_addr)[2] = (value >> 8) & 0xff;
+      ((char *) reloc_addr)[3] = (value >> 0) & 0xff;
+      return;
+
+    case R_PPC64_ADDR24:
+      if (dont_expect ((value + 0x2000000) >= 0x4000000 || (value & 3) != 0))
+        _dl_reloc_overflow (map, "R_PPC64_ADDR24", reloc_addr, sym, refsym);
+      *(Elf64_Word *) reloc_addr = BIT_INSERT (*(Elf64_Word *) reloc_addr,
+					       value, 0x3fffffc);
+      break;
+
+    case R_PPC64_ADDR16:
+      if (dont_expect ((value + 0x8000) >= 0x10000))
+        _dl_reloc_overflow (map, "R_PPC64_ADDR16", reloc_addr, sym, refsym);
+      *(Elf64_Half *) reloc_addr = value;
+      break;
+
+    case R_PPC64_UADDR16:
+      if (dont_expect ((value + 0x8000) >= 0x10000))
+        _dl_reloc_overflow (map, "R_PPC64_UADDR16", reloc_addr, sym, refsym);
+      /* We are big-endian.  */
+      ((char *) reloc_addr)[0] = (value >> 8) & 0xff;
+      ((char *) reloc_addr)[1] = (value >> 0) & 0xff;
+      break;
+
+    case R_PPC64_ADDR16_DS:
+      if (dont_expect ((value + 0x8000) >= 0x10000 || (value & 3) != 0))
+        _dl_reloc_overflow (map, "R_PPC64_ADDR16_DS", reloc_addr, sym, refsym);
+      *(Elf64_Half *) reloc_addr = BIT_INSERT (*(Elf64_Half *) reloc_addr,
+					       value, 0xfffc);
+      break;
+
+    case R_PPC64_ADDR16_HIGHER:
+      *(Elf64_Half *) reloc_addr = PPC_HIGHER (value);
+      break;
+
+    case R_PPC64_ADDR16_HIGHEST:
+      *(Elf64_Half *) reloc_addr = PPC_HIGHEST (value);
+      break;
+
+    case R_PPC64_ADDR16_HIGHERA:
+      *(Elf64_Half *) reloc_addr = PPC_HIGHERA (value);
+      break;
+
+    case R_PPC64_ADDR16_HIGHESTA:
+      *(Elf64_Half *) reloc_addr = PPC_HIGHESTA (value);
+      break;
+
+    case R_PPC64_ADDR14:
+    case R_PPC64_ADDR14_BRTAKEN:
+    case R_PPC64_ADDR14_BRNTAKEN:
+      {
+        Elf64_Word insn;
+        if (dont_expect ((value + 0x8000) >= 0x10000 || (value & 3) != 0))
+	        _dl_reloc_overflow (map, "R_PPC64_ADDR14", reloc_addr, sym, refsym);
+        insn = BIT_INSERT (*(Elf64_Word *) reloc_addr, value, 0xfffc);
+        if (r_type != R_PPC64_ADDR14)
+	        {
+	          insn &= ~(1 << 21);
+	          if (r_type == R_PPC64_ADDR14_BRTAKEN)
+	            insn |= 1 << 21;
+	          if ((insn & (0x14 << 21)) == (0x04 << 21))
+	            insn |= 0x02 << 21;
+	          else if ((insn & (0x14 << 21)) == (0x10 << 21))
+	            insn |= 0x08 << 21;
+	        }
+        *(Elf64_Word *) reloc_addr = insn;
+      }
+      break;
+
+    case R_PPC64_REL32:
+      *(Elf64_Word *) reloc_addr = value - (Elf64_Xword) reloc_addr;
+      return;
+#endif /* !RTLD_BOOTSTRAP */
+
+    default:
+      _dl_reloc_bad_type (map, r_type, 0);
+      return;
+    }
+  MODIFIED_CODE_NOQUEUE (reloc_addr);
+}
+
+#endif /* RESOLVE */
diff --git a/sysdeps/powerpc/powerpc64/elf/bzero.S b/sysdeps/powerpc/powerpc64/elf/bzero.S
new file mode 100644
index 0000000000..f899bd234e
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/elf/bzero.S
@@ -0,0 +1,21 @@
+/* Optimized bzero `implementation' for PowerPC64.
+   Copyright (C) 1997, 1999, 2000, 2002 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+/* This code was moved into memset.S to solve a double stub call problem. 
+   @local would have worked but it is not supported in PowerPC64 asm.  */
diff --git a/sysdeps/powerpc/powerpc64/elf/start.S b/sysdeps/powerpc/powerpc64/elf/start.S
new file mode 100644
index 0000000000..129f126e9d
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/elf/start.S
@@ -0,0 +1,66 @@
+/* Startup code for programs linked with GNU libc.  PowerPC64 version.
+   Copyright (C) 1998, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+#include "bp-sym.h"
+
+ /* These are the various addresses we require.  */
+	.section ".rodata"
+	.align	3
+	weak_extern(_init)
+	weak_extern(_fini) 
+	weak_extern(._init)
+	weak_extern(._fini) 
+L(start_addresses):
+	.quad	0 /* was _SDA_BASE_  but not in 64-bit ABI*/
+/*     function descriptors so don't need JUMPTARGET */
+	.quad	BP_SYM(main) 
+	.quad 	_init
+	.quad 	_fini 
+
+	ASM_SIZE_DIRECTIVE(L(start_addresses))
+
+	.section	".toc","aw"
+.L01:
+	.tc	L(start_addresses)[TC],L(start_addresses)
+	.section ".text"
+ENTRY(_start)
+ /* Save the stack pointer, in case we're statically linked under Linux.  */
+	mr	r9,r1
+ /* Set up an initial stack frame, and clear the LR.  */
+	clrrdi	r1,r1,4
+	li	r0,0
+	stdu	r1,-128(r1)
+	mtlr	r0
+	std	r0,0(r1)
+
+ /* put the address of start_addresses in r8...  **
+** PPC64 ABI uses R13 for thread local, so we leave it alone */
+	ld	r8,.L01(r2)
+
+ /* and continue in libc-start, in glibc.  */
+	b	JUMPTARGET(BP_SYM(__libc_start_main)) 
+
+END(_start)
+
+/* Define a symbol for the first piece of initialized data.  */
+	.section ".data"
+	.globl	__data_start
+__data_start:
+weak_alias (__data_start, data_start)
diff --git a/sysdeps/powerpc/powerpc64/fpu/s_copysign.S b/sysdeps/powerpc/powerpc64/fpu/s_copysign.S
new file mode 100644
index 0000000000..40fd83d3d1
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/fpu/s_copysign.S
@@ -0,0 +1,50 @@
+/* Copy a sign bit between floating-point values.  PowerPC64 version.
+   Copyright (C) 1997, 1999, 2000, 2002 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+/* This has been coded in assembler because GCC makes such a mess of it
+   when it's coded in C.  */
+
+#include <sysdep.h>
+
+ENTRY(__copysign)
+/* double [f1] copysign (double [f1] x, double [f2] y);
+   copysign(x,y) returns a value with the magnitude of x and
+   with the sign bit of y.  */
+	stdu	r1,-48(r1)
+	stfd	fp2,24(r1)
+	ld	r3,24(r1)
+	cmpdi   r3,0
+	addi    r1,r1,48
+	blt     L(0)
+	fabs    fp1,fp1
+	blr
+L(0):	fnabs   fp1,fp1
+	blr
+	END (__copysign)
+
+weak_alias(__copysign,copysign)
+
+/* It turns out that it's safe to use this code even for single-precision.  */
+weak_alias(__copysign,copysignf)
+strong_alias(__copysign,__copysignf)
+
+#ifdef NO_LONG_DOUBLE
+weak_alias(__copysign,copysignl)
+strong_alias(__copysign,__copysignl)
+#endif
diff --git a/sysdeps/powerpc/powerpc64/fpu/s_copysignf.S b/sysdeps/powerpc/powerpc64/fpu/s_copysignf.S
new file mode 100644
index 0000000000..e05438ae7d
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/fpu/s_copysignf.S
@@ -0,0 +1 @@
+/* __copysignf is in s_copysign.S  */
diff --git a/sysdeps/powerpc/powerpc64/memset.S b/sysdeps/powerpc/powerpc64/memset.S
new file mode 100644
index 0000000000..4bfe20d7b1
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/memset.S
@@ -0,0 +1,296 @@
+/* Optimized memset implementation for PowerPC64.
+   Copyright (C) 1997, 1999, 2000, 2002 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+#include <bp-sym.h>
+#include <bp-asm.h>
+
+/* Define a global static that can hold the cache line size.  The 
+   assumption is that startup code will access the "aux vector" to
+   to obtain the value set by the kernel and store it into this 
+   variable.  */
+	.globl __cache_line_size
+	.section	".data"
+	.align 2
+	.type	__cache_line_size,@object
+	.size	__cache_line_size,4
+__cache_line_size:
+	.long	0
+	.section	".toc","aw"
+.LC0:
+	.tc __cache_line_size[TC],__cache_line_size
+	.section	".text"
+	.align 2
+
+/* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5]));
+   Returns 's'.
+
+   The memset is done in three sizes: byte (8 bits), word (32 bits),
+   cache line (256 bits). There is a special case for setting cache lines
+   to 0, to take advantage of the dcbz instruction.  */
+
+EALIGN (BP_SYM (memset), 5, 0)
+
+#define rTMP	r0
+#define rRTN	r3	/* Initial value of 1st argument.  */
+#if __BOUNDED_POINTERS__
+# define rMEMP0	r4	/* Original value of 1st arg.  */
+# define rCHR	r5	/* Char to set in each byte.  */
+# define rLEN	r6	/* Length of region to set.  */
+# define rMEMP	r10	/* Address at which we are storing.  */
+#else
+# define rMEMP0	r3	/* Original value of 1st arg.  */
+# define rCHR	r4	/* Char to set in each byte.  */
+# define rLEN	r5	/* Length of region to set.  */
+# define rMEMP	r6	/* Address at which we are storing.  */
+#endif
+#define rALIGN	r7	/* Number of bytes we are setting now (when aligning). */
+#define rMEMP2	r8
+
+#define rNEG64	r8	/* Constant -64 for clearing with dcbz.  */
+#define rCLS	r8	/* Cache line size obtained from static.  */
+#define rCLM	r9	/* Cache line size mask to check for cache alignment.  */
+
+___memset:
+#if __BOUNDED_POINTERS__
+	cmpldi	cr1, rRTN, 0
+	CHECK_BOUNDS_BOTH_WIDE (rMEMP0, rTMP, rTMP2, rLEN)
+	beq	cr1, L(b0)
+	STORE_RETURN_VALUE (rMEMP0)
+	STORE_RETURN_BOUNDS (rTMP, rTMP2)
+L(b0):
+#endif
+/* Take care of case for size <= 4.  */
+	cmpldi	cr1, rLEN, 8
+	andi.	rALIGN, rMEMP0, 7
+	mr	rMEMP, rMEMP0
+	ble-	cr1, L(small)
+	
+/* Align to doubleword boundary.  */
+	cmpldi	cr5, rLEN, 31
+	rlwimi	rCHR, rCHR, 8, 16, 23 /* Replicate byte to halfword.  */
+	beq+	L(aligned2)
+	mtcrf	0x01, rMEMP0
+	subfic	rALIGN, rALIGN, 8
+	cror	28,30,31		/* Detect odd word aligned.  */
+	add	rMEMP, rMEMP, rALIGN
+	sub	rLEN, rLEN, rALIGN
+	rlwimi	rCHR, rCHR, 16, 0, 15 /* Replicate halfword to word.  */
+	bt	29, L(g4)
+/* Process the even word of doubleword.  */
+	bf+	31, L(g2)
+	stb	rCHR, 0(rMEMP0)
+	bt	30, L(g4x)
+L(g2):
+	sth	rCHR, -6(rMEMP)
+L(g4x):
+	stw	rCHR, -4(rMEMP)
+	b	L(aligned)
+/* Process the odd word of doubleword.  */
+L(g4):
+	bf	28, L(g4x) /* If false, word aligned on odd word.  */
+	bf+	31, L(g0)
+	stb	rCHR, 0(rMEMP0)
+	bt	30, L(aligned)
+L(g0):	
+	sth	rCHR, -2(rMEMP)	
+	
+/* Handle the case of size < 31.  */
+L(aligned2):
+	rlwimi	rCHR, rCHR, 16, 0, 15 /* Replicate halfword to word.  */
+L(aligned):
+	mtcrf	0x01, rLEN
+	ble	cr5, L(medium)
+/* Align to 32-byte boundary.  */
+	andi.	rALIGN, rMEMP, 0x18
+	subfic	rALIGN, rALIGN, 0x20
+	insrdi	rCHR,rCHR,32,0 /* Replicate word to double word. */
+	beq	L(caligned)
+	mtcrf	0x01, rALIGN
+	add	rMEMP, rMEMP, rALIGN
+	sub	rLEN, rLEN, rALIGN
+	cmplwi	cr1, rALIGN, 0x10
+	mr	rMEMP2, rMEMP
+	bf	28, L(a1)
+	stdu	rCHR, -8(rMEMP2)
+L(a1):	blt	cr1, L(a2)
+	std	rCHR, -8(rMEMP2)
+	stdu	rCHR, -16(rMEMP2)
+L(a2):
+
+/* Now aligned to a 32 byte boundary.  */
+L(caligned):
+	cmpldi	cr1, rCHR, 0
+	clrrdi.	rALIGN, rLEN, 5
+	mtcrf	0x01, rLEN	
+	beq	cr1, L(zloopstart) /* Special case for clearing memory using dcbz.  */
+L(nondcbz):	
+	srdi	rTMP, rALIGN, 5
+	mtctr	rTMP
+	beq	L(medium)	/* We may not actually get to do a full line.  */
+	clrldi.	rLEN, rLEN, 59
+	add	rMEMP, rMEMP, rALIGN
+	li	rNEG64, -0x40
+	bdz	L(cloopdone)
+
+L(c3):	dcbtst	rNEG64, rMEMP
+	std	rCHR, -8(rMEMP)
+	std	rCHR, -16(rMEMP)
+	std	rCHR, -24(rMEMP)
+	stdu	rCHR, -32(rMEMP)
+	bdnz	L(c3)
+L(cloopdone):
+	std	rCHR, -8(rMEMP)
+	std	rCHR, -16(rMEMP)
+	cmpldi	cr1, rLEN, 16
+	std	rCHR, -24(rMEMP)
+	stdu	rCHR, -32(rMEMP)
+	beqlr
+	add	rMEMP, rMEMP, rALIGN
+	b	L(medium_tail2)
+
+	.align 5
+/* Clear lines of memory in 128-byte chunks.  */
+L(zloopstart):
+/* If the remaining length is less the 32 bytes, don't bother getting 
+	 the cache line size.  */
+	beq	L(medium)
+	ld	rCLS,.LC0@toc(r2)
+	lwz	rCLS,0(rCLS)	
+/* If the cache line size was not set just goto to L(nondcbz) which is 
+	 safe for any cache line size.  */	
+	cmpldi	cr1,rCLS,0
+	beq		cr1,L(nondcbz)
+	
+	
+/* Now we know the cache line size, and it is not 32-bytes, but
+	 we may not yet be aligned to the cache line. May have a partial 
+	 line to fill, so touch it 1st.  */	
+	dcbt	0,rMEMP	
+	addi	rCLM,rCLS,-1
+L(getCacheAligned):
+	cmpldi	cr1,rLEN,32
+	and.	rTMP,rCLM,rMEMP
+	blt		cr1,L(handletail32)
+	beq		L(cacheAligned)
+	addi	rMEMP,rMEMP,32
+	addi	rLEN,rLEN,-32
+	std		rCHR,-32(rMEMP)
+	std		rCHR,-24(rMEMP)
+	std		rCHR,-16(rMEMP)
+	std		rCHR,-8(rMEMP)
+	b		L(getCacheAligned)
+	
+/* Now we are aligned to the cache line and can use dcbz.  */	
+L(cacheAligned):
+	cmpld	cr1,rLEN,rCLS
+	blt		cr1,L(handletail32)
+	dcbz	0,rMEMP
+	subf	rLEN,rCLS,rLEN
+	add		rMEMP,rMEMP,rCLS
+	b		L(cacheAligned)
+
+/* We are here because the cache line size was set and was not 32-bytes
+   and the remainder (rLEN) is less than the actual cache line size.
+   So set up the preconditions for L(nondcbz) and go there.  */			
+L(handletail32):
+	clrrwi.	rALIGN, rLEN, 5
+	b		L(nondcbz)
+
+	.align 5
+L(small):
+/* Memset of 8 bytes or less.  */
+	cmpldi	cr6, rLEN, 4
+	cmpldi	cr5, rLEN, 1
+	ble	cr6,L(le4)
+	subi	rLEN, rLEN, 4
+	stb	rCHR,0(rMEMP)
+	stb	rCHR,1(rMEMP)
+	stb	rCHR,2(rMEMP)
+	stb	rCHR,3(rMEMP)
+	addi	rMEMP,rMEMP, 4
+	cmpldi	cr5, rLEN, 1
+L(le4):
+	cmpldi	cr1, rLEN, 3
+	bltlr	cr5
+	stb	rCHR, 0(rMEMP)
+	beqlr	cr5
+	stb	rCHR, 1(rMEMP)
+	bltlr	cr1
+	stb	rCHR, 2(rMEMP)
+	beqlr	cr1
+	stb	rCHR, 3(rMEMP)
+	blr
+
+/* Memset of 0-31 bytes.  */
+	.align 5
+L(medium):
+	insrdi	rCHR,rCHR,32,0 /* Replicate word to double word.  */
+	cmpldi	cr1, rLEN, 16
+L(medium_tail2):
+	add	rMEMP, rMEMP, rLEN
+L(medium_tail):
+	bt-	31, L(medium_31t)
+	bt-	30, L(medium_30t)
+L(medium_30f):
+	bt-	29, L(medium_29t)
+L(medium_29f):
+	bge-	cr1, L(medium_27t)
+	bflr-	28
+	std	rCHR, -8(rMEMP)
+	blr
+
+L(medium_31t):
+	stbu	rCHR, -1(rMEMP)
+	bf-	30, L(medium_30f)
+L(medium_30t):
+	sthu	rCHR, -2(rMEMP)
+	bf-	29, L(medium_29f)
+L(medium_29t):
+	stwu	rCHR, -4(rMEMP)
+	blt-	cr1, L(medium_27f) 
+L(medium_27t):
+	std	rCHR, -8(rMEMP)
+	stdu	rCHR, -16(rMEMP)
+L(medium_27f):
+	bflr-	28
+L(medium_28t):
+	std	rCHR, -8(rMEMP)
+	blr
+END_GEN_TB (BP_SYM (memset),TB_TOCLESS)
+
+/* Copied from bzero.S to prevent the linker from inserting a stub 
+   between bzero and memset.  */
+ENTRY (BP_SYM (__bzero))
+#if __BOUNDED_POINTERS__
+	mr	r6,r4
+	li	r5,0
+	mr	r4,r3
+	/* Tell memset that we don't want a return value.  */
+	li	r3,0
+	b	___memset
+#else
+	mr	r5,r4
+	li	r4,0
+	b	___memset
+#endif
+END_GEN_TB (BP_SYM (__bzero),TB_TOCLESS)
+
+weak_alias (BP_SYM (__bzero), BP_SYM (bzero))
+
diff --git a/sysdeps/powerpc/powerpc64/ppc-mcount.S b/sysdeps/powerpc/powerpc64/ppc-mcount.S
new file mode 100644
index 0000000000..eaa586a0d8
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/ppc-mcount.S
@@ -0,0 +1,43 @@
+/* PowerPC64-specific implementation of profiling support.
+   Copyright (C) 1997, 1999, 2002 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+/* This would be bad.  */
+#ifdef PROF
+#undef PROF
+#endif
+
+#include <sysdep.h>
+/* We don't need to save the parameter-passing registers as gcc takes
+   care of that for us.  Thus this function looks fairly normal.
+   In fact, the generic code would work for us.  */
+
+ENTRY(_mcount)
+	mflr		 r4
+	ld		 r11, 0(r1)
+	stdu		 r1,-112(r1)
+	std		 r4, 128(r1)
+	ld		 r3, 16(r11)
+	bl		 JUMPTARGET(__mcount_internal)
+	nop
+	ld		 r0, 128(r1)
+	mtlr		 r0
+	addi		 r1,r1,112
+	blr
+END(_mcount)
+
diff --git a/sysdeps/powerpc/powerpc64/register-dump.h b/sysdeps/powerpc/powerpc64/register-dump.h
new file mode 100644
index 0000000000..dd69af3faf
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/register-dump.h
@@ -0,0 +1,125 @@
+/* Dump registers.
+   Copyright (C) 1998, 2002 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sys/uio.h>
+#include <stdio-common/_itoa.h>
+
+/* This prints out the information in the following form: */
+static const char dumpform[] = "\
+Register dump:\n\
+sr0=000000000000020% sr1=000000000000021% dar=000000000000029% dsi=000002a%\n\
+lr=000000000000024%  ctr=000000000000023% gr3*=000000000000022% trap=0000028%\n\
+ccr=0000026%  xer=0000025%\n\
+gr0-3:   000000000000000% 000000000000001% 000000000000002% 000000000000003%\n\
+gr4-7:   000000000000004% 000000000000005% 000000000000006% 000000000000007%\n\
+gr8-11:  000000000000008% 000000000000009% 00000000000000a% 00000000000000b%\n\
+gr12-15: 00000000000000c% 00000000000000d% 00000000000000e% 00000000000000f%\n\
+gr16-19: 000000000000010% 000000000000011% 000000000000012% 000000000000013%\n\
+gr20-23: 000000000000014% 000000000000015% 000000000000016% 000000000000017%\n\
+gr24-27: 000000000000018% 000000000000019% 00000000000001a% 00000000000001b%\n\
+gr28-31: 00000000000001c% 00000000000001d% 00000000000001e% 00000000000001f%\n\
+fscr=0000071%\n\
+fp0-3:   000000000000030% 000000000000031% 000000000000032% 000000000000033%\n\
+fp4-7:   000000000000034% 000000000000035% 000000000000036% 000000000000037%\n\
+fp8-11:  000000000000038% 000000000000038% 00000000000003a% 00000000000003b%\n\
+fp12-15: 00000000000003c% 00000000000003d% 00000000000003e% 00000000000003f%\n\
+fp16-19: 000000000000040% 000000000000041% 000000000000042% 000000000000043%\n\
+fp20-23: 000000000000044% 000000000000045% 000000000000046% 000000000000047%\n\
+fp24-27: 000000000000048% 000000000000049% 00000000000004a% 00000000000004b%\n\
+fp28-31: 00000000000004c% 00000000000004d% 00000000000004e% 00000000000004f%\n\
+";
+
+/* Most of the fields are self-explanatory.  'sr0' is the next
+   instruction to execute, from SRR0, which may have some relationship
+   with the instruction that caused the exception.  'r3*' is the value
+   that will be returned in register 3 when the current system call
+   returns.  'sr1' is SRR1, bits 16-31 of which are copied from the MSR:
+
+   16 - External interrupt enable
+   17 - Privilege level (1=user, 0=supervisor)
+   18 - FP available
+   19 - Machine check enable (if clear, processor locks up on machine check)
+   20 - FP exception mode bit 0 (FP exceptions recoverable)
+   21 - Single-step trace enable
+   22 - Branch trace enable
+   23 - FP exception mode bit 1
+   25 - exception prefix (if set, exceptions are taken from 0xFFFnnnnn,
+        otherwise from 0x000nnnnn).
+   26 - Instruction address translation enabled.
+   27 - Data address translation enabled.
+   30 - Exception is recoverable (otherwise, don't try to return).
+   31 - Little-endian mode enable.
+
+   'Trap' is the address of the exception:
+
+   00200 - Machine check exception (memory parity error, for instance)
+   00300 - Data access exception (memory not mapped, see dsisr for why)
+   00400 - Instruction access exception (memory not mapped)
+   00500 - External interrupt
+   00600 - Alignment exception (see dsisr for more information)
+   00700 - Program exception (illegal/trap instruction, FP exception)
+   00800 - FP unavailable (should not be seen by user code)
+   00900 - Decrementer exception (for instance, SIGALRM)
+   00A00 - I/O controller interface exception
+   00C00 - System call exception (for instance, kill(3)).
+   00E00 - FP assist exception (optional FP instructions, etc.)
+
+   'dar' is the memory location, for traps 00300, 00400, 00600, 00A00.
+   'dsisr' has the following bits under trap 00300:
+   0 - direct-store error exception
+   1 - no page table entry for page
+   4 - memory access not permitted
+   5 - trying to access I/O controller space or using lwarx/stwcx on
+       non-write-cached memory
+   6 - access was store
+   9 - data access breakpoint hit
+   10 - segment table search failed to find translation (64-bit ppcs only)
+   11 - I/O controller instruction not permitted
+   For trap 00400, the same bits are set in SRR1 instead.
+   For trap 00600, bits 12-31 of the DSISR set to allow emulation of
+   the instruction without actually having to read it from memory.
+*/
+
+#define xtoi(x) (x >= 'a' ? x + 10 - 'a' : x - '0')
+
+static void
+register_dump (int fd, struct sigcontext *ctx)
+{
+  char buffer[sizeof(dumpform)];
+  char *bufferpos;
+  unsigned regno;
+  unsigned *regs = (unsigned *)(ctx->regs);
+
+  memcpy(buffer, dumpform, sizeof(dumpform));
+
+  /* Generate the output.  */
+  while ((bufferpos = memchr (buffer, '%', sizeof(dumpform))))
+    {
+      regno = xtoi (bufferpos[-1]) | xtoi (bufferpos[-2]) << 4;
+      memset (bufferpos-2, '0', 3);
+      _itoa_word (regs[regno], bufferpos+1, 16, 0);
+    }
+
+  /* Write the output.  */
+  write (fd, buffer, sizeof(buffer));
+}
+
+
+#define REGISTER_DUMP \
+  register_dump (fd, ctx)
diff --git a/sysdeps/powerpc/powerpc64/setjmp.S b/sysdeps/powerpc/powerpc64/setjmp.S
new file mode 100644
index 0000000000..35d2a2790a
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/setjmp.S
@@ -0,0 +1,87 @@
+/* setjmp for PowerPC64.
+   Copyright (C) 1995, 1996, 1997, 1999, 2000, 2001, 2002 
+   Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+#define _ASM
+#define _SETJMP_H
+#include <bits/setjmp.h>
+#include <bp-sym.h>
+#include <bp-asm.h>
+
+ENTRY (BP_SYM (__sigsetjmp))
+	CHECK_BOUNDS_BOTH_WIDE_LIT (r3, r8, r9, JB_SIZE)
+___sigsetjmp:
+	std  r1,(JB_GPR1*8)(3)
+	mflr r0
+	std  r2,(JB_GPR2*8)(3)
+	std  r14,((JB_GPRS+0)*8)(3)
+	stfd fp14,((JB_FPRS+0)*8)(3)
+	std  r0,(JB_LR*8)(3)
+	std  r15,((JB_GPRS+1)*8)(3)
+	stfd fp15,((JB_FPRS+1)*8)(3)
+	mfcr r0
+	std  r16,((JB_GPRS+2)*8)(3)
+	stfd fp16,((JB_FPRS+2)*8)(3)
+	std  r0,(JB_CR*8)(3)
+	std  r17,((JB_GPRS+3)*8)(3)
+	stfd fp17,((JB_FPRS+3)*8)(3)
+	std  r18,((JB_GPRS+8)*8)(3)
+	stfd fp18,((JB_FPRS+4)*8)(3)
+	std  r19,((JB_GPRS+5)*8)(3)
+	stfd fp19,((JB_FPRS+5)*8)(3)
+	std  r20,((JB_GPRS+6)*8)(3)
+	stfd fp20,((JB_FPRS+6)*8)(3)
+	std  r21,((JB_GPRS+7)*8)(3)
+	stfd fp21,((JB_FPRS+7)*8)(3)
+	std  r22,((JB_GPRS+8)*8)(3)
+	stfd fp22,((JB_FPRS+8)*8)(3)
+	std  r23,((JB_GPRS+9)*8)(3)
+	stfd fp23,((JB_FPRS+9)*8)(3)
+	std  r24,((JB_GPRS+10)*8)(3)
+	stfd fp24,((JB_FPRS+10)*8)(3)
+	std  r25,((JB_GPRS+11)*8)(3)
+	stfd fp25,((JB_FPRS+11)*8)(3)
+	std  r26,((JB_GPRS+12)*8)(3)
+	stfd fp26,((JB_FPRS+12)*8)(3)
+	std  r27,((JB_GPRS+13)*8)(3)
+	stfd fp27,((JB_FPRS+13)*8)(3)
+	std  r28,((JB_GPRS+14)*8)(3)
+	stfd fp28,((JB_FPRS+14)*8)(3)
+	std  r29,((JB_GPRS+15)*8)(3)
+	stfd fp29,((JB_FPRS+15)*8)(3)
+	std  r30,((JB_GPRS+16)*8)(3)
+	stfd fp30,((JB_FPRS+16)*8)(3)
+	std  r31,((JB_GPRS+17)*8)(3)
+	stfd fp31,((JB_FPRS+17)*8)(3)
+	b JUMPTARGET (BP_SYM (__sigjmp_save))
+END (BP_SYM (__sigsetjmp))
+
+ENTRY (BP_SYM (_setjmp))
+	li r4,0			/* Set second argument to 0.  */
+	b	___sigsetjmp
+END (BP_SYM (_setjmp))
+
+ENTRY (BP_SYM (__setjmp))
+	li r4,1			/* Set second argument to 1.  */
+	b	___sigsetjmp
+END (BP_SYM (__setjmp))
+
+strong_alias (__setjmp, setjmp)
+
diff --git a/sysdeps/powerpc/powerpc64/stpcpy.S b/sysdeps/powerpc/powerpc64/stpcpy.S
new file mode 100644
index 0000000000..c842111ca5
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/stpcpy.S
@@ -0,0 +1,121 @@
+/* Optimized stpcpy implementation for PowerPC64.
+   Copyright (C) 1997, 1999, 2000, 2002 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+#include <bp-sym.h>
+#include <bp-asm.h>
+
+/* See strlen.s for comments on how the end-of-string testing works.  */
+
+/* char * [r3] stpcpy (char *dest [r3], const char *src [r4])  */
+
+EALIGN (BP_SYM (__stpcpy), 4, 0)
+
+#define rTMP	r0
+#define rRTN	r3
+#if __BOUNDED_POINTERS__
+# define rDEST	r4		/* pointer to previous word in dest */
+# define rSRC	r5		/* pointer to previous word in src */
+# define rLOW	r11
+# define rHIGH	r12
+#else
+# define rDEST	r3		/* pointer to previous word in dest */
+# define rSRC	r4		/* pointer to previous word in src */
+#endif
+#define rWORD	r6		/* current word from src */
+#define rFEFE	r7		/* 0xfefefeff */
+#define r7F7F	r8		/* 0x7f7f7f7f */
+#define rNEG	r9		/* ~(word in src | 0x7f7f7f7f) */
+#define rALT	r10		/* alternate word from src */
+
+	CHECK_BOUNDS_LOW (rSRC, rLOW, rHIGH)
+	CHECK_BOUNDS_LOW (rDEST, rLOW, rHIGH)
+	STORE_RETURN_BOUNDS (rLOW, rHIGH)
+
+	or	rTMP, rSRC, rDEST
+	clrldi.	rTMP, rTMP, 62
+	addi	rDEST, rDEST, -4
+	bne	L(unaligned)
+
+	lis	rFEFE, -0x101
+	lis	r7F7F, 0x7f7f
+	lwz	rWORD, 0(rSRC)
+	addi	rFEFE, rFEFE, -0x101
+	addi	r7F7F, r7F7F, 0x7f7f
+	b	L(g2)
+
+L(g0):	lwzu	rALT, 4(rSRC)
+	stwu	rWORD, 4(rDEST)
+	add	rTMP, rFEFE, rALT
+	nor	rNEG, r7F7F, rALT
+	and.	rTMP, rTMP, rNEG
+	bne-	L(g1)
+	lwzu	rWORD, 4(rSRC)
+	stwu	rALT, 4(rDEST)
+L(g2):	add	rTMP, rFEFE, rWORD
+	nor	rNEG, r7F7F, rWORD
+	and.	rTMP, rTMP, rNEG
+	beq+	L(g0)
+
+	mr	rALT, rWORD
+/* We've hit the end of the string.  Do the rest byte-by-byte.  */
+L(g1):	rlwinm.	rTMP, rALT, 8, 24, 31
+	stbu	rTMP, 4(rDEST)
+	beqlr-
+	rlwinm.	rTMP, rALT, 16, 24, 31
+	stbu	rTMP, 1(rDEST)
+	beqlr-
+	rlwinm.	rTMP, rALT, 24, 24, 31
+	stbu	rTMP, 1(rDEST)
+	beqlr-
+	stbu	rALT, 1(rDEST)
+	CHECK_BOUNDS_HIGH (rDEST, rHIGH, twlgt)
+	STORE_RETURN_VALUE (rDEST)
+	blr
+
+/* Oh well.  In this case, we just do a byte-by-byte copy.  */
+	.align 4
+	nop
+L(unaligned):
+	lbz	rWORD, 0(rSRC)
+	addi	rDEST, rDEST, 3
+	cmpwi	rWORD, 0
+	beq-	L(u2)
+
+L(u0):	lbzu	rALT, 1(rSRC)
+	stbu	rWORD, 1(rDEST)
+	cmpwi	rALT, 0
+	beq-	L(u1)
+	nop		/* Let 601 load start of loop.  */
+	lbzu	rWORD, 1(rSRC)
+	stbu	rALT, 1(rDEST)
+	cmpwi	rWORD, 0
+	bne+	L(u0)
+L(u2):	stbu	rWORD, 1(rDEST)
+	CHECK_BOUNDS_HIGH (rDEST, rHIGH, twlgt)
+	STORE_RETURN_VALUE (rDEST)
+	blr
+L(u1):	stbu	rALT, 1(rDEST)
+	CHECK_BOUNDS_HIGH (rDEST, rHIGH, twlgt)
+	STORE_RETURN_VALUE (rDEST)
+	blr
+END (BP_SYM (__stpcpy))
+
+weak_alias (BP_SYM (__stpcpy), BP_SYM (stpcpy))
+libc_hidden_def (__stpcpy)
diff --git a/sysdeps/powerpc/powerpc64/strchr.S b/sysdeps/powerpc/powerpc64/strchr.S
new file mode 100644
index 0000000000..f6d418bcae
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/strchr.S
@@ -0,0 +1,130 @@
+/* Optimized strchr implementation for PowerPC64.
+   Copyright (C) 1997, 1999, 2000, 2002 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+#include <bp-sym.h>
+#include <bp-asm.h>
+
+/* See strlen.s for comments on how this works.  */
+
+/* char * [r3] strchr (const char *s [r3] , int c [r4] )  */
+
+ENTRY (BP_SYM (strchr))
+
+#define rTMP1	r0
+#define rRTN	r3	/* outgoing result */
+#if __BOUNDED_POINTERS__
+# define rSTR	r4
+# define rCHR	r5	/* byte we're looking for, spread over the whole word */
+# define rWORD	r8	/* the current word */
+#else
+# define rSTR	r8	/* current word pointer */
+# define rCHR	r4	/* byte we're looking for, spread over the whole word */
+# define rWORD	r5	/* the current word */
+#endif
+#define rCLZB	rCHR	/* leading zero byte count */
+#define rFEFE	r6	/* constant 0xfefefeff (-0x01010101) */
+#define r7F7F	r7	/* constant 0x7f7f7f7f */
+#define rTMP2	r9
+#define rIGN	r10	/* number of bits we should ignore in the first word */
+#define rMASK	r11	/* mask with the bits to ignore set to 0 */
+#define rTMP3	r12
+
+	CHECK_BOUNDS_LOW (rSTR, rTMP1, rTMP2)
+	STORE_RETURN_BOUNDS (rTMP1, rTMP2)
+
+	rlwimi	rCHR, rCHR, 8, 16, 23
+	li	rMASK, -1
+	rlwimi	rCHR, rCHR, 16, 0, 15
+	rlwinm	rIGN, rRTN, 3, 27, 28
+	lis	rFEFE, -0x101
+	lis	r7F7F, 0x7f7f
+	clrrdi	rSTR, rRTN, 2
+	addi	rFEFE, rFEFE, -0x101
+	addi	r7F7F, r7F7F, 0x7f7f
+/* Test the first (partial?) word.  */
+	lwz	rWORD, 0(rSTR)
+	srw	rMASK, rMASK, rIGN
+	orc	rWORD, rWORD, rMASK
+	add	rTMP1, rFEFE, rWORD
+	nor	rTMP2, r7F7F, rWORD
+	and.	rTMP1, rTMP1, rTMP2
+	xor	rTMP3, rCHR, rWORD
+	orc	rTMP3, rTMP3, rMASK
+	b	L(loopentry)
+
+/* The loop.  */
+
+L(loop):lwzu rWORD, 4(rSTR)
+	and.	rTMP1, rTMP1, rTMP2
+/* Test for 0.	*/
+	add	rTMP1, rFEFE, rWORD
+	nor	rTMP2, r7F7F, rWORD
+	bne	L(foundit)
+	and.	rTMP1, rTMP1, rTMP2
+/* Start test for the bytes we're looking for.  */
+	xor	rTMP3, rCHR, rWORD
+L(loopentry):
+	add	rTMP1, rFEFE, rTMP3
+	nor	rTMP2, r7F7F, rTMP3
+	beq	L(loop)
+/* There is a zero byte in the word, but may also be a matching byte (either
+   before or after the zero byte).  In fact, we may be looking for a
+   zero byte, in which case we return a match.  We guess that this hasn't
+   happened, though.  */
+L(missed):
+	and.	rTMP1, rTMP1, rTMP2
+	li	rRTN, 0
+	STORE_RETURN_VALUE (rSTR)
+	beqlr
+/* It did happen. Decide which one was first...
+   I'm not sure if this is actually faster than a sequence of
+   rotates, compares, and branches (we use it anyway because it's shorter).  */
+	and	rFEFE, r7F7F, rWORD
+	or	rMASK, r7F7F, rWORD
+	and	rTMP1, r7F7F, rTMP3
+	or	rIGN, r7F7F, rTMP3
+	add	rFEFE, rFEFE, r7F7F
+	add	rTMP1, rTMP1, r7F7F
+	nor	rWORD, rMASK, rFEFE
+	nor	rTMP2, rIGN, rTMP1
+	cmplw	rWORD, rTMP2
+	bgtlr
+	cntlzw	rCLZB, rTMP2
+	srwi	rCLZB, rCLZB, 3
+	add	rRTN, rSTR, rCLZB
+	CHECK_BOUNDS_HIGH_RTN (rSTR, rTMP2, twlge)
+	STORE_RETURN_VALUE (rSTR)
+	blr
+
+L(foundit):
+	and	rTMP1, r7F7F, rTMP3
+	or	rIGN, r7F7F, rTMP3
+	add	rTMP1, rTMP1, r7F7F
+	nor	rTMP2, rIGN, rTMP1
+	cntlzw	rCLZB, rTMP2
+	subi	rSTR, rSTR, 4
+	srwi	rCLZB, rCLZB, 3
+	add	rRTN, rSTR, rCLZB
+	CHECK_BOUNDS_HIGH_RTN (rSTR, rTMP2, twlge)
+	STORE_RETURN_VALUE (rSTR)
+	blr
+END (BP_SYM (strchr))
+
+weak_alias (BP_SYM (strchr), BP_SYM (index))
diff --git a/sysdeps/powerpc/powerpc64/strcmp.S b/sysdeps/powerpc/powerpc64/strcmp.S
new file mode 100644
index 0000000000..71c6d867b7
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/strcmp.S
@@ -0,0 +1,133 @@
+/* Optimized strcmp implementation for PowerPC64.
+   Copyright (C) 1997, 1999, 2000, 2002 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+#include <bp-sym.h>
+#include <bp-asm.h>
+
+/* See strlen.s for comments on how the end-of-string testing works.  */
+
+/* int [r3] strcmp (const char *s1 [r3], const char *s2 [r4])  */
+
+EALIGN (BP_SYM(strcmp), 4, 0)
+
+#define rTMP	r0
+#define rRTN	r3
+#define rSTR1	r3	/* first string arg */
+#define rSTR2	r4	/* second string arg */
+#if __BOUNDED_POINTERS__
+# define rHIGH1	r11
+# define rHIGH2 r12
+#endif
+#define rWORD1	r5	/* current word in s1 */
+#define rWORD2	r6	/* current word in s2 */
+#define rFEFE	r7	/* constant 0xfefefeff (-0x01010101) */
+#define r7F7F	r8	/* constant 0x7f7f7f7f */
+#define rNEG	r9	/* ~(word in s1 | 0x7f7f7f7f) */
+#define rBITDIF	r10	/* bits that differ in s1 & s2 words */
+
+	CHECK_BOUNDS_LOW (rSTR1, rTMP, rHIGH1)
+	CHECK_BOUNDS_LOW (rSTR2, rTMP, rHIGH2)
+
+	or	rTMP, rSTR2, rSTR1
+	clrldi.	rTMP, rTMP, 62
+	lis	rFEFE, -0x101
+	bne	L(unaligned)
+
+	lwz	rWORD1, 0(rSTR1)
+	lwz	rWORD2, 0(rSTR2)
+	lis	r7F7F, 0x7f7f
+	addi	rFEFE, rFEFE, -0x101
+	clrldi	rFEFE,rFEFE,32 /* clear upper 32 */
+	addi	r7F7F, r7F7F, 0x7f7f
+	b	L(g1)
+
+L(g0):	lwzu	rWORD1, 4(rSTR1)
+	bne	cr1, L(different)
+	lwzu	rWORD2, 4(rSTR2)
+L(g1):	add	rTMP, rFEFE, rWORD1
+	nor	rNEG, r7F7F, rWORD1
+
+	clrldi	rNEG,rNEG,32 /* clear upper 32 */
+	and.	rTMP, rTMP, rNEG
+	cmpw	cr1, rWORD1, rWORD2
+	beq+	L(g0)
+L(endstring):
+/* OK. We've hit the end of the string. We need to be careful that
+   we don't compare two strings as different because of gunk beyond
+   the end of the strings...  */
+	and	rTMP, r7F7F, rWORD1
+	beq	cr1, L(equal)
+	add	rTMP, rTMP, r7F7F
+	xor.	rBITDIF, rWORD1, rWORD2
+
+	extsw.	rBITDIF,rBITDIF /* propagate sign for blt */
+	andc	rNEG, rNEG, rTMP
+	blt-	L(highbit)
+	cntlzw	rBITDIF, rBITDIF
+	cntlzw	rNEG, rNEG
+	addi	rNEG, rNEG, 7
+	cmpw	cr1, rNEG, rBITDIF
+	sub	rRTN, rWORD1, rWORD2
+	bgelr+	cr1
+L(equal):
+	li	rRTN, 0
+	/* GKM FIXME: check high bounds.  */
+	blr
+
+L(different):
+	lwz	rWORD1, -4(rSTR1)
+	xor.	rBITDIF, rWORD1, rWORD2
+
+	extsw.	rBITDIF,rBITDIF /* propagate sign for bgelr */
+	sub	rRTN, rWORD1, rWORD2
+	bgelr+
+L(highbit):
+	ori	rRTN, rWORD2, 1
+	/* GKM FIXME: check high bounds.  */
+	blr
+
+
+/* Oh well.  In this case, we just do a byte-by-byte comparison.  */
+	.align 4
+L(unaligned):
+	lbz	rWORD1, 0(rSTR1)
+	lbz	rWORD2, 0(rSTR2)
+	b	L(u1)
+
+L(u0):	lbzu	rWORD1, 1(rSTR1)
+	bne-	L(u4)
+	lbzu	rWORD2, 1(rSTR2)
+L(u1):	cmpwi	cr1, rWORD1, 0
+	beq-	cr1, L(u3)
+	cmpw	rWORD1, rWORD2
+	bne-	L(u3)
+	lbzu	rWORD1, 1(rSTR1)
+	lbzu	rWORD2, 1(rSTR2)
+	cmpwi	cr1, rWORD1, 0
+	cmpw	rWORD1, rWORD2
+	bne+	cr1, L(u0)
+L(u3):	sub	rRTN, rWORD1, rWORD2
+	/* GKM FIXME: check high bounds.  */
+	blr
+L(u4):	lbz	rWORD1, -1(rSTR1)
+	sub	rRTN, rWORD1, rWORD2
+	/* GKM FIXME: check high bounds.  */
+	blr
+END (BP_SYM (strcmp))
diff --git a/sysdeps/powerpc/powerpc64/strcpy.S b/sysdeps/powerpc/powerpc64/strcpy.S
new file mode 100644
index 0000000000..2015eff5c6
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/strcpy.S
@@ -0,0 +1,120 @@
+/* Optimized strcpy implementation for PowerPC64.
+   Copyright (C) 1997, 1999, 2000, 2002 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+#include <bp-sym.h>
+#include <bp-asm.h>
+
+/* See strlen.s for comments on how the end-of-string testing works.  */
+
+/* char * [r3] strcpy (char *dest [r3], const char *src [r4])  */
+
+EALIGN (BP_SYM (strcpy), 4, 0)
+
+#define rTMP	r0
+#define rRTN	r3	/* incoming DEST arg preserved as result */
+#if __BOUNDED_POINTERS__
+# define rDEST	r4	/* pointer to previous word in dest */
+# define rSRC	r5	/* pointer to previous word in src */
+# define rLOW	r11
+# define rHIGH	r12
+#else
+# define rSRC	r4	/* pointer to previous word in src */
+# define rDEST	r5	/* pointer to previous word in dest */
+#endif
+#define rWORD	r6	/* current word from src */
+#define rFEFE	r7	/* constant 0xfefefeff (-0x01010101) */
+#define r7F7F	r8	/* constant 0x7f7f7f7f */
+#define rNEG	r9	/* ~(word in s1 | 0x7f7f7f7f) */
+#define rALT	r10	/* alternate word from src */
+
+	CHECK_BOUNDS_LOW (rSRC, rLOW, rHIGH)
+	CHECK_BOUNDS_LOW (rDEST, rLOW, rHIGH)
+	STORE_RETURN_BOUNDS (rLOW, rHIGH)
+
+	or	rTMP, rSRC, rRTN
+	clrldi.	rTMP, rTMP, 62
+#if __BOUNDED_POINTERS__
+	addi	rDEST, rDEST, -4
+#else
+	addi	rDEST, rRTN, -4
+#endif
+	bne	L(unaligned)
+
+	lis	rFEFE, -0x101
+	lis	r7F7F, 0x7f7f
+	lwz	rWORD, 0(rSRC)
+	addi	rFEFE, rFEFE, -0x101
+	addi	r7F7F, r7F7F, 0x7f7f
+	b	L(g2)
+
+L(g0):	lwzu	rALT, 4(rSRC)
+	stwu	rWORD, 4(rDEST)
+	add	rTMP, rFEFE, rALT
+	nor	rNEG, r7F7F, rALT
+	and.	rTMP, rTMP, rNEG
+	bne-	L(g1)
+	lwzu	rWORD, 4(rSRC)
+	stwu	rALT, 4(rDEST)
+L(g2):	add	rTMP, rFEFE, rWORD
+	nor	rNEG, r7F7F, rWORD
+	and.	rTMP, rTMP, rNEG
+	beq+	L(g0)
+
+	mr	rALT, rWORD
+/* We've hit the end of the string.  Do the rest byte-by-byte.  */
+L(g1):	rlwinm.	rTMP, rALT, 8, 24, 31
+	stb	rTMP, 4(rDEST)
+	beqlr-
+	rlwinm.	rTMP, rALT, 16, 24, 31
+	stb	rTMP, 5(rDEST)
+	beqlr-
+	rlwinm.	rTMP, rALT, 24, 24, 31
+	stb	rTMP, 6(rDEST)
+	beqlr-
+	stb	rALT, 7(rDEST)
+	/* GKM FIXME: check high bound.  */
+	blr
+
+/* Oh well.  In this case, we just do a byte-by-byte copy.  */
+	.align 4
+	nop
+L(unaligned):
+	lbz	rWORD, 0(rSRC)
+	addi	rDEST, rRTN, -1
+	cmpwi	rWORD, 0
+	beq-	L(u2)
+
+L(u0):	lbzu	rALT, 1(rSRC)
+	stbu	rWORD, 1(rDEST)
+	cmpwi	rALT, 0
+	beq-	L(u1)
+	nop		/* Let 601 load start of loop.  */
+	lbzu	rWORD, 1(rSRC)
+	stbu	rALT, 1(rDEST)
+	cmpwi	rWORD, 0
+	bne+	L(u0)
+L(u2):	stb	rWORD, 1(rDEST)
+	/* GKM FIXME: check high bound.  */
+	blr
+L(u1):	stb	rALT, 1(rDEST)
+	/* GKM FIXME: check high bound.  */
+	blr
+
+END (BP_SYM (strcpy))
diff --git a/sysdeps/powerpc/powerpc64/strlen.S b/sysdeps/powerpc/powerpc64/strlen.S
new file mode 100644
index 0000000000..7907382002
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/strlen.S
@@ -0,0 +1,163 @@
+/* Optimized strlen implementation for PowerPC64.
+   Copyright (C) 1997, 1999, 2000, 2002 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+#include <bp-sym.h>
+#include <bp-asm.h>
+
+/* The algorithm here uses the following techniques:
+
+   1) Given a word 'x', we can test to see if it contains any 0 bytes
+      by subtracting 0x01010101, and seeing if any of the high bits of each
+      byte changed from 0 to 1. This works because the least significant
+      0 byte must have had no incoming carry (otherwise it's not the least
+      significant), so it is 0x00 - 0x01 == 0xff. For all other
+      byte values, either they have the high bit set initially, or when
+      1 is subtracted you get a value in the range 0x00-0x7f, none of which
+      have their high bit set. The expression here is
+      (x + 0xfefefeff) & ~(x | 0x7f7f7f7f), which gives 0x00000000 when
+      there were no 0x00 bytes in the word.
+
+   2) Given a word 'x', we can test to see _which_ byte was zero by
+      calculating ~(((x & 0x7f7f7f7f) + 0x7f7f7f7f) | x | 0x7f7f7f7f).
+      This produces 0x80 in each byte that was zero, and 0x00 in all
+      the other bytes. The '| 0x7f7f7f7f' clears the low 7 bits in each
+      byte, and the '| x' part ensures that bytes with the high bit set
+      produce 0x00. The addition will carry into the high bit of each byte
+      iff that byte had one of its low 7 bits set. We can then just see
+      which was the most significant bit set and divide by 8 to find how
+      many to add to the index.
+      This is from the book 'The PowerPC Compiler Writer's Guide',
+      by Steve Hoxey, Faraydon Karim, Bill Hay and Hank Warren.
+
+   We deal with strings not aligned to a word boundary by taking the
+   first word and ensuring that bytes not part of the string
+   are treated as nonzero. To allow for memory latency, we unroll the
+   loop a few times, being careful to ensure that we do not read ahead
+   across cache line boundaries.
+
+   Questions to answer:
+   1) How long are strings passed to strlen? If they're often really long,
+   we should probably use cache management instructions and/or unroll the
+   loop more. If they're often quite short, it might be better to use
+   fact (2) in the inner loop than have to recalculate it.
+   2) How popular are bytes with the high bit set? If they are very rare,
+   on some processors it might be useful to use the simpler expression
+   ~((x - 0x01010101) | 0x7f7f7f7f) (that is, on processors with only one
+   ALU), but this fails when any character has its high bit set.  */
+
+/* Some notes on register usage: Under the SVR4 ABI, we can use registers
+   0 and 3 through 12 (so long as we don't call any procedures) without
+   saving them. We can also use registers 14 through 31 if we save them.
+   We can't use r1 (it's the stack pointer), r2 nor r13 because the user
+   program may expect them to hold their usual value if we get sent
+   a signal. Integer parameters are passed in r3 through r10.
+   We can use condition registers cr0, cr1, cr5, cr6, and cr7 without saving
+   them, the others we must save.  */
+
+/* int [r3] strlen (char *s [r3])  */
+
+ENTRY (BP_SYM (strlen))
+
+#define rTMP1	r0
+#define rRTN	r3	/* incoming STR arg, outgoing result */
+#define rSTR	r4	/* current string position */
+#define rPADN	r5	/* number of padding bits we prepend to the
+			   string to make it start at a word boundary */
+#define rFEFE	r6	/* constant 0xfefefeff (-0x01010101) */
+#define r7F7F	r7	/* constant 0x7f7f7f7f */
+#define rWORD1	r8	/* current string word */
+#define rWORD2	r9	/* next string word */
+#define rMASK	r9	/* mask for first string word */
+#define rTMP2	r10
+#define rTMP3	r11
+#define rTMP4	r12
+
+	CHECK_BOUNDS_LOW (rRTN, rTMP1, rTMP2)
+
+	clrrdi	rSTR, rRTN, 2
+	lis	r7F7F, 0x7f7f
+	rlwinm	rPADN, rRTN, 3, 27, 28
+	lwz	rWORD1, 0(rSTR)
+	li	rMASK, -1
+	addi	r7F7F, r7F7F, 0x7f7f
+/* That's the setup done, now do the first pair of words.
+   We make an exception and use method (2) on the first two words, to reduce
+   overhead.  */
+	srw	rMASK, rMASK, rPADN
+	and	rTMP1, r7F7F, rWORD1
+	or	rTMP2, r7F7F, rWORD1
+	add	rTMP1, rTMP1, r7F7F
+	nor	rTMP1, rTMP2, rTMP1
+	and.	rWORD1, rTMP1, rMASK
+	mtcrf	0x01, rRTN
+	bne	L(done0)
+	lis	rFEFE, -0x101
+	addi	rFEFE, rFEFE, -0x101
+	clrldi	rFEFE,rFEFE,32 /* clear upper 32 */
+/* Are we now aligned to a doubleword boundary?  */
+	bt	29, L(loop)
+
+/* Handle second word of pair.  */
+	lwzu	rWORD1, 4(rSTR)
+	and	rTMP1, r7F7F, rWORD1
+	or	rTMP2, r7F7F, rWORD1
+	add	rTMP1, rTMP1, r7F7F
+	nor.	rWORD1, rTMP2, rTMP1
+	clrldi.	rWORD1,rWORD1,32 /* clear upper 32 */
+	bne	L(done0)
+
+/* The loop.  */
+
+L(loop):
+	lwz	rWORD1, 4(rSTR)
+	lwzu	rWORD2, 8(rSTR)
+	add	rTMP1, rFEFE, rWORD1
+	nor	rTMP2, r7F7F, rWORD1
+	and.	rTMP1, rTMP1, rTMP2
+	clrldi.	rTMP1,rTMP1,32 /* clear upper 32 */
+	add	rTMP3, rFEFE, rWORD2
+	nor	rTMP4, r7F7F, rWORD2
+	bne	L(done1)
+	and.	rTMP1, rTMP3, rTMP4
+	clrldi.	rTMP1,rTMP1,32 /* clear upper 32 */
+	beq	L(loop)
+
+	and	rTMP1, r7F7F, rWORD2
+	add	rTMP1, rTMP1, r7F7F
+	andc	rWORD1, rTMP4, rTMP1
+	b	L(done0)
+
+L(done1):
+	and	rTMP1, r7F7F, rWORD1
+	subi	rSTR, rSTR, 4
+	add	rTMP1, rTMP1, r7F7F
+	andc	rWORD1, rTMP2, rTMP1
+
+/* When we get to here, rSTR points to the first word in the string that
+   contains a zero byte, and the most significant set bit in rWORD1 is in that
+   byte.  */
+L(done0):
+	cntlzw	rTMP3, rWORD1
+	subf	rTMP1, rRTN, rSTR
+	srwi	rTMP3, rTMP3, 3
+	add	rRTN, rTMP1, rTMP3
+	/* GKM FIXME: check high bound.  */
+	blr
+END (BP_SYM (strlen))