summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog41
-rw-r--r--gmon/Makefile4
-rw-r--r--sysdeps/powerpc/powerpc64/Makefile7
-rw-r--r--sysdeps/powerpc/powerpc64/__longjmp-common.S1
-rw-r--r--sysdeps/powerpc/powerpc64/bsd-_setjmp.S12
-rw-r--r--sysdeps/powerpc/powerpc64/bsd-setjmp.S6
-rw-r--r--sysdeps/powerpc/powerpc64/fpu/s_ceil.S1
-rw-r--r--sysdeps/powerpc/powerpc64/fpu/s_ceilf.S1
-rw-r--r--sysdeps/powerpc/powerpc64/fpu/s_copysign.S1
-rw-r--r--sysdeps/powerpc/powerpc64/fpu/s_floor.S1
-rw-r--r--sysdeps/powerpc/powerpc64/fpu/s_floorf.S1
-rw-r--r--sysdeps/powerpc/powerpc64/fpu/s_llrint.S1
-rw-r--r--sysdeps/powerpc/powerpc64/fpu/s_llrintf.S1
-rw-r--r--sysdeps/powerpc/powerpc64/fpu/s_llround.S1
-rw-r--r--sysdeps/powerpc/powerpc64/fpu/s_llroundf.S1
-rw-r--r--sysdeps/powerpc/powerpc64/fpu/s_rint.S1
-rw-r--r--sysdeps/powerpc/powerpc64/fpu/s_rintf.S1
-rw-r--r--sysdeps/powerpc/powerpc64/fpu/s_round.S1
-rw-r--r--sysdeps/powerpc/powerpc64/fpu/s_roundf.S1
-rw-r--r--sysdeps/powerpc/powerpc64/fpu/s_trunc.S1
-rw-r--r--sysdeps/powerpc/powerpc64/fpu/s_truncf.S1
-rw-r--r--sysdeps/powerpc/powerpc64/memcpy.S2
-rw-r--r--sysdeps/powerpc/powerpc64/memset.S2
-rw-r--r--sysdeps/powerpc/powerpc64/setjmp-common.S4
-rw-r--r--sysdeps/powerpc/powerpc64/stpcpy.S1
-rw-r--r--sysdeps/powerpc/powerpc64/strchr.S1
-rw-r--r--sysdeps/powerpc/powerpc64/strcmp.S1
-rw-r--r--sysdeps/powerpc/powerpc64/strcpy.S1
-rw-r--r--sysdeps/powerpc/powerpc64/strlen.S1
-rw-r--r--sysdeps/powerpc/powerpc64/strncmp.S1
-rw-r--r--sysdeps/powerpc/powerpc64/sysdep.h68
31 files changed, 119 insertions, 49 deletions
diff --git a/ChangeLog b/ChangeLog
index e0245eb811..609c18daec 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,44 @@
+2004-10-06  Alan Modra  <amodra@bigpond.net.au>
+
+	* gmon/Makefile (CFLAGS-mcount.c): Move before inclusion of "Rules".
+	* sysdeps/powerpc/powerpc64/Makefile (CFLAGS-mcount.c): Add
+	-msoft-float.
+	* sysdeps/powerpc/powerpc64/sysdep.h (SAVE_ARG, REST_ARG): New macros.
+	(CALL_MCOUNT): Replace with a gas macro implementation.
+	(EALIGN): Delete PROF version.
+	* sysdeps/powerpc/powerpc64/__longjmp-common.S: Invoke CALL_MCOUNT.
+	* sysdeps/powerpc/powerpc64/bsd-_setjmp.S: Likewise.
+	* sysdeps/powerpc/powerpc64/bsd-setjmp.S: Likewise.
+	* sysdeps/powerpc/powerpc64/setjmp-common.S: Likewise.
+	* sysdeps/powerpc/powerpc64/memcpy.S: Likewise.
+	* sysdeps/powerpc/powerpc64/memset.S: Likewise.
+	* sysdeps/powerpc/powerpc64/stpcpy.S: Likewise.
+	* sysdeps/powerpc/powerpc64/strchr.S: Likewise.
+	* sysdeps/powerpc/powerpc64/strcmp.S: Likewise.
+	* sysdeps/powerpc/powerpc64/strcpy.S: Likewise.
+	* sysdeps/powerpc/powerpc64/strlen.S: Likewise.
+	* sysdeps/powerpc/powerpc64/strncmp.S: Likewise.
+	* sysdeps/powerpc/powerpc64/fpu/s_ceil.S: Likewise.
+	* sysdeps/powerpc/powerpc64/fpu/s_ceilf.S: Likewise.
+	* sysdeps/powerpc/powerpc64/fpu/s_copysign.S: Likewise.
+	* sysdeps/powerpc/powerpc64/fpu/s_floor.S: Likewise.
+	* sysdeps/powerpc/powerpc64/fpu/s_floorf.S: Likewise.
+	* sysdeps/powerpc/powerpc64/fpu/s_llrint.S: Likewise.
+	* sysdeps/powerpc/powerpc64/fpu/s_llrintf.S: Likewise.
+	* sysdeps/powerpc/powerpc64/fpu/s_llround.S: Likewise.
+	* sysdeps/powerpc/powerpc64/fpu/s_llroundf.S: Likewise.
+	* sysdeps/powerpc/powerpc64/fpu/s_rint.S: Likewise.
+	* sysdeps/powerpc/powerpc64/fpu/s_rintf.S: Likewise.
+	* sysdeps/powerpc/powerpc64/fpu/s_round.S: Likewise.
+	* sysdeps/powerpc/powerpc64/fpu/s_roundf.S: Likewise.
+	* sysdeps/powerpc/powerpc64/fpu/s_trunc.S: Likewise.
+	* sysdeps/powerpc/powerpc64/fpu/s_truncf.S: Likewise.
+
+	* sysdeps/powerpc/powerpc64/setjmp-common.S: Add extra entry point
+	past _mcount call.
+	* sysdeps/powerpc/powerpc64/bsd-_setjmp.S: Use it.
+	* sysdeps/powerpc/powerpc64/bsd-setjmp.S: Likewise.
+
 2004-10-06  Ulrich Drepper  <drepper@redhat.com>
 
 	* resolv/res_mkquery.c (res_nmkquery): Reject randombits value if
diff --git a/gmon/Makefile b/gmon/Makefile
index 060ede1f8f..80a79456f7 100644
--- a/gmon/Makefile
+++ b/gmon/Makefile
@@ -29,11 +29,11 @@ elide-routines.os = bb_init_func bb_exit_func
 
 tests	:= tst-sprofil
 
-include ../Rules
-
 # The mcount code won't work without a frame pointer.
 CFLAGS-mcount.c := -fno-omit-frame-pointer
 
+include ../Rules
+
 # We cannot compile mcount.c with -pg because that would
 # create recursive calls.  Just copy the normal static object.
 # On systems where `profil' is not a system call, the same
diff --git a/sysdeps/powerpc/powerpc64/Makefile b/sysdeps/powerpc/powerpc64/Makefile
index 758b856854..3ced6568df 100644
--- a/sysdeps/powerpc/powerpc64/Makefile
+++ b/sysdeps/powerpc/powerpc64/Makefile
@@ -22,3 +22,10 @@ ifeq ($(subdir),elf)
 # help gcc inline asm code from dl-machine.h
 +cflags += -finline-limit=2000
 endif
+
+ifeq ($(subdir),gmon)
+# The assembly functions assume that fp arg regs are not trashed.
+# Compiling with -msoft-float ensures that fp regs are not used
+# for moving memory around.
+CFLAGS-mcount.c += -msoft-float
+endif
diff --git a/sysdeps/powerpc/powerpc64/__longjmp-common.S b/sysdeps/powerpc/powerpc64/__longjmp-common.S
index b11a0fd7b5..9d0195dd14 100644
--- a/sysdeps/powerpc/powerpc64/__longjmp-common.S
+++ b/sysdeps/powerpc/powerpc64/__longjmp-common.S
@@ -42,6 +42,7 @@
 
 	.machine	"altivec"
 ENTRY (BP_SYM (__longjmp))
+	CALL_MCOUNT 2
 	CHECK_BOUNDS_BOTH_WIDE_LIT (r3, r8, r9, JB_SIZE)
 #ifndef __NO_VMX__
 	ld    r5,.LC__dl_hwcap@toc(r2)
diff --git a/sysdeps/powerpc/powerpc64/bsd-_setjmp.S b/sysdeps/powerpc/powerpc64/bsd-_setjmp.S
index 13f4056706..82b79a8098 100644
--- a/sysdeps/powerpc/powerpc64/bsd-_setjmp.S
+++ b/sysdeps/powerpc/powerpc64/bsd-_setjmp.S
@@ -25,8 +25,9 @@
 #if defined NOT_IN_libc
 /* Build a non-versioned object for rtld-*.  */
 ENTRY (BP_SYM (_setjmp))
+	CALL_MCOUNT 1
 	li r4,0			/* Set second argument to 0.  */
-	b JUMPTARGET(BP_SYM (__sigsetjmp))
+	b JUMPTARGET (__sigsetjmp_ent)
 END (BP_SYM (_setjmp))
 libc_hidden_def (_setjmp)
 
@@ -36,8 +37,9 @@ libc_hidden_def (_setjmp)
 symbol_version (__novmx_setjmp,_setjmp,GLIBC_2.3);
 
 ENTRY (BP_SYM (__novmx_setjmp))
+	CALL_MCOUNT 1
 	li r4,0			/* Set second argument to 0.  */
-	b JUMPTARGET(BP_SYM (__novmx__sigsetjmp))
+	b JUMPTARGET (__novmx__sigsetjmp_ent)
 END (BP_SYM (__novmx_setjmp))
 libc_hidden_def (__novmx_setjmp)
 # endif /* defined SHARED && SHLIB_COMPAT (libc, GLIBC_2_3, GLIBC_2_3_4) */
@@ -50,13 +52,15 @@ ENTRY (BP_SYM (__GI__setjmp))
 #if defined SHARED && !defined IS_IN_rtld
 	std r2,40(r1)	/* Save the callers TOC in the save area.  */
 #endif
+	CALL_MCOUNT 1
 	li r4,0			/* Set second argument to 0.  */
-	b JUMPTARGET(BP_SYM (__vmx__sigsetjmp))
+	b JUMPTARGET (__vmx__sigsetjmp_ent)
 END (BP_SYM (__GI__setjmp))
 
 ENTRY (BP_SYM (__vmx_setjmp))
+	CALL_MCOUNT 1
 	li r4,0			/* Set second argument to 0.  */
-	b JUMPTARGET(BP_SYM (__vmx__sigsetjmp))
+	b JUMPTARGET (__vmx__sigsetjmp_ent)
 END (BP_SYM (__vmx_setjmp))
 libc_hidden_def (__vmx_setjmp)
 #endif /* !NOT_IN_libc */
diff --git a/sysdeps/powerpc/powerpc64/bsd-setjmp.S b/sysdeps/powerpc/powerpc64/bsd-setjmp.S
index 73b9a4d34e..543e83faa3 100644
--- a/sysdeps/powerpc/powerpc64/bsd-setjmp.S
+++ b/sysdeps/powerpc/powerpc64/bsd-setjmp.S
@@ -25,8 +25,9 @@
 
 
 ENTRY (__novmxsetjmp)
+	CALL_MCOUNT 1
 	li r4,1			/* Set second argument to 1.  */
-	b JUMPTARGET (__novmx__sigsetjmp)
+	b JUMPTARGET (__novmx__sigsetjmp_ent)
 END (__novmxsetjmp)
 strong_alias (__novmxsetjmp, __novmx__setjmp)
 symbol_version (__novmxsetjmp, setjmp, GLIBC_2.3)
@@ -35,8 +36,9 @@ symbol_version (__novmxsetjmp, setjmp, GLIBC_2.3)
 
 
 ENTRY (__vmxsetjmp)
+	CALL_MCOUNT 1
 	li r4,1			/* Set second argument to 1.  */
-	b JUMPTARGET (__vmx__sigsetjmp)
+	b JUMPTARGET (__vmx__sigsetjmp_ent)
 END (__vmxsetjmp)
 strong_alias (__vmxsetjmp, __vmx__setjmp)
 strong_alias (__vmx__sigsetjmp, __setjmp)
diff --git a/sysdeps/powerpc/powerpc64/fpu/s_ceil.S b/sysdeps/powerpc/powerpc64/fpu/s_ceil.S
index 127f302666..a1bfaa70c2 100644
--- a/sysdeps/powerpc/powerpc64/fpu/s_ceil.S
+++ b/sysdeps/powerpc/powerpc64/fpu/s_ceil.S
@@ -27,6 +27,7 @@
 	.section	".text"
 
 ENTRY (__ceil)
+	CALL_MCOUNT 0
 	mffs	fp11		/* Save current FPU rounding mode.  */
 	lfd	fp13,.LC0@toc(2)
 	fabs	fp0,fp1
diff --git a/sysdeps/powerpc/powerpc64/fpu/s_ceilf.S b/sysdeps/powerpc/powerpc64/fpu/s_ceilf.S
index 34f61423cc..42eb274389 100644
--- a/sysdeps/powerpc/powerpc64/fpu/s_ceilf.S
+++ b/sysdeps/powerpc/powerpc64/fpu/s_ceilf.S
@@ -27,6 +27,7 @@
 	.section	".text"
 
 ENTRY (__ceilf)
+	CALL_MCOUNT 0
 	mffs	fp11		/* Save current FPU rounding mode.  */
 	lfd	fp13,.LC0@toc(2)
 	fabs	fp0,fp1
diff --git a/sysdeps/powerpc/powerpc64/fpu/s_copysign.S b/sysdeps/powerpc/powerpc64/fpu/s_copysign.S
index 40fd83d3d1..a43ed12cf0 100644
--- a/sysdeps/powerpc/powerpc64/fpu/s_copysign.S
+++ b/sysdeps/powerpc/powerpc64/fpu/s_copysign.S
@@ -23,6 +23,7 @@
 #include <sysdep.h>
 
 ENTRY(__copysign)
+	CALL_MCOUNT 0
 /* double [f1] copysign (double [f1] x, double [f2] y);
    copysign(x,y) returns a value with the magnitude of x and
    with the sign bit of y.  */
diff --git a/sysdeps/powerpc/powerpc64/fpu/s_floor.S b/sysdeps/powerpc/powerpc64/fpu/s_floor.S
index 57fd63cd66..80cbdc5709 100644
--- a/sysdeps/powerpc/powerpc64/fpu/s_floor.S
+++ b/sysdeps/powerpc/powerpc64/fpu/s_floor.S
@@ -25,6 +25,7 @@
 	.section	".text"
 
 ENTRY (__floor)
+	CALL_MCOUNT 0
 	mffs	fp11		/* Save current FPU rounding mode.  */
 	lfd	fp13,.LC0@toc(2)
 	fabs	fp0,fp1
diff --git a/sysdeps/powerpc/powerpc64/fpu/s_floorf.S b/sysdeps/powerpc/powerpc64/fpu/s_floorf.S
index cde92b9174..20cbb15ebd 100644
--- a/sysdeps/powerpc/powerpc64/fpu/s_floorf.S
+++ b/sysdeps/powerpc/powerpc64/fpu/s_floorf.S
@@ -25,6 +25,7 @@
 	.section	".text"
 
 ENTRY (__floorf)
+	CALL_MCOUNT 0
 	mffs	fp11		/* Save current FPU rounding mode.  */
 	lfd	fp13,.LC0@toc(2)
 	fabs	fp0,fp1
diff --git a/sysdeps/powerpc/powerpc64/fpu/s_llrint.S b/sysdeps/powerpc/powerpc64/fpu/s_llrint.S
index bc976a7edb..610b561f25 100644
--- a/sysdeps/powerpc/powerpc64/fpu/s_llrint.S
+++ b/sysdeps/powerpc/powerpc64/fpu/s_llrint.S
@@ -21,6 +21,7 @@
 
 /* long long int[r3] __llrint (double x[fp1])  */
 ENTRY (__llrint)	
+	CALL_MCOUNT 0
 	fctid	fp13,fp1
 	stfd	fp13,-16(r1)
 	nop	/* Insure the following load is in a different dispatch group */
diff --git a/sysdeps/powerpc/powerpc64/fpu/s_llrintf.S b/sysdeps/powerpc/powerpc64/fpu/s_llrintf.S
index aef84de831..b7b2a8fc63 100644
--- a/sysdeps/powerpc/powerpc64/fpu/s_llrintf.S
+++ b/sysdeps/powerpc/powerpc64/fpu/s_llrintf.S
@@ -21,6 +21,7 @@
 
 /* long long int[r3] __llrintf (float x[fp1])  */
 ENTRY (__llrintf)	
+	CALL_MCOUNT 0
 	fctid	fp13,fp1
 	stfd	fp13,-16(r1)
 	nop	/* Insure the following load is in a different dispatch group */
diff --git a/sysdeps/powerpc/powerpc64/fpu/s_llround.S b/sysdeps/powerpc/powerpc64/fpu/s_llround.S
index 320ef0f6d8..a3dcd4c33d 100644
--- a/sysdeps/powerpc/powerpc64/fpu/s_llround.S
+++ b/sysdeps/powerpc/powerpc64/fpu/s_llround.S
@@ -37,6 +37,7 @@
    to the integer value.  */
 
 ENTRY (__llround)
+	CALL_MCOUNT 0
 	lfd	fp12,.LC0@toc(2)
 	lfd	fp10,.LC1@toc(2)
 	fcmpu	cr6,fp1,fp12	/* if (x > 0.0)  */
diff --git a/sysdeps/powerpc/powerpc64/fpu/s_llroundf.S b/sysdeps/powerpc/powerpc64/fpu/s_llroundf.S
index ff923056b1..b5ca43bf20 100644
--- a/sysdeps/powerpc/powerpc64/fpu/s_llroundf.S
+++ b/sysdeps/powerpc/powerpc64/fpu/s_llroundf.S
@@ -37,6 +37,7 @@
    to the integer value.  */
 
 ENTRY (__llroundf)
+	CALL_MCOUNT 0
 	lfd	fp12,.LC0@toc(2)
 	lfd	fp10,.LC1@toc(2)
 	fcmpu	cr6,fp1,fp12	/* if (x < 0.0)  */
diff --git a/sysdeps/powerpc/powerpc64/fpu/s_rint.S b/sysdeps/powerpc/powerpc64/fpu/s_rint.S
index f7db7ff80a..79e807269d 100644
--- a/sysdeps/powerpc/powerpc64/fpu/s_rint.S
+++ b/sysdeps/powerpc/powerpc64/fpu/s_rint.S
@@ -28,6 +28,7 @@
 	.section	".text"
 
 ENTRY (__rint)
+	CALL_MCOUNT 0
 	lfd	fp13,.LC0@toc(2)
 	fabs	fp0,fp1
 	fsub	fp12,fp13,fp13	/* generate 0.0  */
diff --git a/sysdeps/powerpc/powerpc64/fpu/s_rintf.S b/sysdeps/powerpc/powerpc64/fpu/s_rintf.S
index de6d654796..eb34dd5e77 100644
--- a/sysdeps/powerpc/powerpc64/fpu/s_rintf.S
+++ b/sysdeps/powerpc/powerpc64/fpu/s_rintf.S
@@ -25,6 +25,7 @@
 	.section	".text"
 
 ENTRY (__rintf)
+	CALL_MCOUNT 0
 	lfd	fp13,.LC0@toc(2)
 	fabs	fp0,fp1
 	fsubs	fp12,fp13,fp13	/* generate 0.0  */
diff --git a/sysdeps/powerpc/powerpc64/fpu/s_round.S b/sysdeps/powerpc/powerpc64/fpu/s_round.S
index 1c2e5299e5..c0b6d46fea 100644
--- a/sysdeps/powerpc/powerpc64/fpu/s_round.S
+++ b/sysdeps/powerpc/powerpc64/fpu/s_round.S
@@ -39,6 +39,7 @@
    to the integer value.  */
 
 ENTRY (__round)
+	CALL_MCOUNT 0
 	mffs	fp11		/* Save current FPU rounding mode.  */
 	lfd	fp13,.LC0@toc(2)
 	fabs	fp0,fp1
diff --git a/sysdeps/powerpc/powerpc64/fpu/s_roundf.S b/sysdeps/powerpc/powerpc64/fpu/s_roundf.S
index fd8eb0aea3..23ee4c052b 100644
--- a/sysdeps/powerpc/powerpc64/fpu/s_roundf.S
+++ b/sysdeps/powerpc/powerpc64/fpu/s_roundf.S
@@ -39,6 +39,7 @@
    to the integer value.  */
 
 ENTRY (__roundf )
+	CALL_MCOUNT 0
 	mffs	fp11		/* Save current FPU rounding mode.  */
 	lfd	fp13,.LC0@toc(2)
 	fabs	fp0,fp1
diff --git a/sysdeps/powerpc/powerpc64/fpu/s_trunc.S b/sysdeps/powerpc/powerpc64/fpu/s_trunc.S
index dc345ab424..3ddd298525 100644
--- a/sysdeps/powerpc/powerpc64/fpu/s_trunc.S
+++ b/sysdeps/powerpc/powerpc64/fpu/s_trunc.S
@@ -34,6 +34,7 @@
    subtracting +-2**52.  */
 
 ENTRY (__trunc)
+	CALL_MCOUNT 0
 	mffs	fp11		/* Save current FPU rounding mode.  */
 	lfd	fp13,.LC0@toc(2)
 	fabs	fp0,fp1
diff --git a/sysdeps/powerpc/powerpc64/fpu/s_truncf.S b/sysdeps/powerpc/powerpc64/fpu/s_truncf.S
index 727647f6de..b38b722a6f 100644
--- a/sysdeps/powerpc/powerpc64/fpu/s_truncf.S
+++ b/sysdeps/powerpc/powerpc64/fpu/s_truncf.S
@@ -34,6 +34,7 @@
    subtracting +-2**23.  */
 
 ENTRY (__truncf)
+	CALL_MCOUNT 0
 	mffs	fp11		/* Save current FPU rounding mode.  */
 	lfd	fp13,.LC0@toc(2)
 	fabs	fp0,fp1
diff --git a/sysdeps/powerpc/powerpc64/memcpy.S b/sysdeps/powerpc/powerpc64/memcpy.S
index 251e1fed80..9df5bb42b6 100644
--- a/sysdeps/powerpc/powerpc64/memcpy.S
+++ b/sysdeps/powerpc/powerpc64/memcpy.S
@@ -37,6 +37,8 @@
    Each case has a optimized unrolled loop.   */
 
 EALIGN (BP_SYM (memcpy), 5, 0)
+	CALL_MCOUNT 3
+
     cmpldi cr1,5,31
     neg   0,3
     std   3,-16(1)
diff --git a/sysdeps/powerpc/powerpc64/memset.S b/sysdeps/powerpc/powerpc64/memset.S
index f3a299d5c8..1abc59bb17 100644
--- a/sysdeps/powerpc/powerpc64/memset.S
+++ b/sysdeps/powerpc/powerpc64/memset.S
@@ -41,6 +41,7 @@
    to 0, to take advantage of the dcbz instruction.  */
 
 EALIGN (BP_SYM (memset), 5, 0)
+	CALL_MCOUNT 3
 
 #define rTMP	r0
 #define rRTN	r3	/* Initial value of 1st argument.  */
@@ -274,6 +275,7 @@ libc_hidden_builtin_def (memset)
 /* Copied from bzero.S to prevent the linker from inserting a stub
    between bzero and memset.  */
 ENTRY (BP_SYM (__bzero))
+	CALL_MCOUNT 3
 #if __BOUNDED_POINTERS__
 	mr	r6,r4
 	li	r5,0
diff --git a/sysdeps/powerpc/powerpc64/setjmp-common.S b/sysdeps/powerpc/powerpc64/setjmp-common.S
index ab57170875..f82d5a21f5 100644
--- a/sysdeps/powerpc/powerpc64/setjmp-common.S
+++ b/sysdeps/powerpc/powerpc64/setjmp-common.S
@@ -41,6 +41,10 @@
 
 	.machine	"altivec"
 ENTRY (BP_SYM (__sigsetjmp))
+	CALL_MCOUNT 2
+	.globl JUMPTARGET(GLUE(__sigsetjmp,_ent))
+	.hidden JUMPTARGET(GLUE(__sigsetjmp,_ent))
+JUMPTARGET(GLUE(__sigsetjmp,_ent)):
 	CHECK_BOUNDS_BOTH_WIDE_LIT (r3, r8, r9, JB_SIZE)
 	std  r1,(JB_GPR1*8)(3)
 	mflr r0
diff --git a/sysdeps/powerpc/powerpc64/stpcpy.S b/sysdeps/powerpc/powerpc64/stpcpy.S
index deac0ea229..cc7a6ab66a 100644
--- a/sysdeps/powerpc/powerpc64/stpcpy.S
+++ b/sysdeps/powerpc/powerpc64/stpcpy.S
@@ -26,6 +26,7 @@
 /* char * [r3] stpcpy (char *dest [r3], const char *src [r4])  */
 
 EALIGN (BP_SYM (__stpcpy), 4, 0)
+	CALL_MCOUNT 2
 
 #define rTMP	r0
 #define rRTN	r3
diff --git a/sysdeps/powerpc/powerpc64/strchr.S b/sysdeps/powerpc/powerpc64/strchr.S
index e3222f354d..93ea61e49c 100644
--- a/sysdeps/powerpc/powerpc64/strchr.S
+++ b/sysdeps/powerpc/powerpc64/strchr.S
@@ -26,6 +26,7 @@
 /* char * [r3] strchr (const char *s [r3] , int c [r4] )  */
 
 ENTRY (BP_SYM (strchr))
+	CALL_MCOUNT 2
 
 #define rTMP1	r0
 #define rRTN	r3	/* outgoing result */
diff --git a/sysdeps/powerpc/powerpc64/strcmp.S b/sysdeps/powerpc/powerpc64/strcmp.S
index 7e2dae07f7..4d7eb21bf7 100644
--- a/sysdeps/powerpc/powerpc64/strcmp.S
+++ b/sysdeps/powerpc/powerpc64/strcmp.S
@@ -26,6 +26,7 @@
 /* int [r3] strcmp (const char *s1 [r3], const char *s2 [r4])  */
 
 EALIGN (BP_SYM(strcmp), 4, 0)
+	CALL_MCOUNT 2
 
 #define rTMP	r0
 #define rRTN	r3
diff --git a/sysdeps/powerpc/powerpc64/strcpy.S b/sysdeps/powerpc/powerpc64/strcpy.S
index 4d12b82e5e..e9e9fc78cd 100644
--- a/sysdeps/powerpc/powerpc64/strcpy.S
+++ b/sysdeps/powerpc/powerpc64/strcpy.S
@@ -26,6 +26,7 @@
 /* char * [r3] strcpy (char *dest [r3], const char *src [r4])  */
 
 EALIGN (BP_SYM (strcpy), 4, 0)
+	CALL_MCOUNT 2
 
 #define rTMP	r0
 #define rRTN	r3	/* incoming DEST arg preserved as result */
diff --git a/sysdeps/powerpc/powerpc64/strlen.S b/sysdeps/powerpc/powerpc64/strlen.S
index 760be5884a..4c1385aead 100644
--- a/sysdeps/powerpc/powerpc64/strlen.S
+++ b/sysdeps/powerpc/powerpc64/strlen.S
@@ -79,6 +79,7 @@
 /* int [r3] strlen (char *s [r3])  */
 
 ENTRY (BP_SYM (strlen))
+	CALL_MCOUNT 1
 
 #define rTMP1	r0
 #define rRTN	r3	/* incoming STR arg, outgoing result */
diff --git a/sysdeps/powerpc/powerpc64/strncmp.S b/sysdeps/powerpc/powerpc64/strncmp.S
index bdcfab99e3..04bdc2f606 100644
--- a/sysdeps/powerpc/powerpc64/strncmp.S
+++ b/sysdeps/powerpc/powerpc64/strncmp.S
@@ -26,6 +26,7 @@
 /* int [r3] strncmp (const char *s1 [r3], const char *s2 [r4], size_t size [r5])  */
 
 EALIGN (BP_SYM(strncmp), 4, 0)
+	CALL_MCOUNT 3
 
 #define rTMP	r0
 #define rRTN	r3
diff --git a/sysdeps/powerpc/powerpc64/sysdep.h b/sysdeps/powerpc/powerpc64/sysdep.h
index 4420a6dfac..fead0b578a 100644
--- a/sysdeps/powerpc/powerpc64/sysdep.h
+++ b/sysdeps/powerpc/powerpc64/sysdep.h
@@ -23,37 +23,36 @@
 
 #ifdef __ASSEMBLER__
 
+/* Support macros for CALL_MCOUNT.  */
+	.macro SAVE_ARG NARG
+	.if \NARG
+	SAVE_ARG \NARG-1
+	std	2+\NARG,-72+8*(\NARG)(1)
+	.endif
+	.endm
+
+	.macro REST_ARG NARG
+	.if \NARG
+	REST_ARG \NARG-1
+	ld	2+\NARG,40+8*(\NARG)(1)
+	.endif
+	.endm
+
 /* If compiled for profiling, call `_mcount' at the start of each function.
    see ppc-mcount.S for more details.  */
+	.macro CALL_MCOUNT NARG
 #ifdef	PROF
-/* The mcount code relies on a the return address being on the stack
-   to locate our caller and so it can restore it; so store one just
-   for its benefit.  */
-#ifdef SYSV_ELF_PROFILING
-#define CALL_MCOUNT	\
-  .pushsection;			\
-  .section ".data";		\
-  .align ALIGNARG(2);		\
-__mcount:			\
-  .long  0;			\
-  .previous;			\
-  .section ".toc","aw";		\
-.LC__mcount:;			\
-  .tc __mcount[TC],__mcount;	\
-  .previous;			\
-  mflr  r0;			\
-  std   r0,16(r1);		\
-  ld    r0,.LC__mcount@toc(r2);	\
-  bl    JUMPTARGET(_mcount);
-#else /* SYSV_ELF_PROFILING */
-#define CALL_MCOUNT	\
-  mflr  r0;			\
-  std   r0,16(r1);		\
-  bl    JUMPTARGET(_mcount);
-#endif /* SYSV_ELF_PROFILING */
-#else  /* PROF */
-#define CALL_MCOUNT		/* Do nothing.  */
-#endif /* PROF */
+	mflr	r0
+	SAVE_ARG \NARG
+	std	r0,16(r1)
+	stdu	r1,-112(r1)
+	bl	JUMPTARGET (_mcount)
+	ld	r0,128(r1)
+	REST_ARG \NARG
+	addi	r1,r1,112
+	mtlr	r0
+#endif
+	.endm
 
 #ifdef USE_PPC64_OVERLAPPING_OPD
 # define OPD_ENT(name)	.quad BODY_LABEL (name), .TOC.@tocbase
@@ -106,24 +105,11 @@ BODY_LABEL(name):
 
 /* EALIGN is like ENTRY, but does alignment to 'words'*4 bytes
    past a 2^alignt boundary.  */
-#ifdef PROF
-#define EALIGN(name, alignt, words) \
-	ENTRY_2(name)				\
-	.align ALIGNARG(alignt);		\
-	EALIGN_W_##words;			\
-BODY_LABEL(name):				\
-	CALL_MCOUNT				\
-	b 0f;					\
-	.align ALIGNARG(alignt);		\
-	EALIGN_W_##words;			\
-0:
-#else /* PROF */
 #define EALIGN(name, alignt, words) \
 	ENTRY_2(name)				\
 	.align ALIGNARG(alignt);		\
 	EALIGN_W_##words;			\
 BODY_LABEL(name):
-#endif
 
 /* Local labels stripped out by the linker.  */
 #undef L