about summary refs log tree commit diff
path: root/sysdeps
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2001-04-04 00:01:02 +0000
committerUlrich Drepper <drepper@redhat.com>2001-04-04 00:01:02 +0000
commitc891b2df087e0219319785c29d3af6042e4ac94f (patch)
tree35f252e14d0024b991068214f533f738d23bdab7 /sysdeps
parentf114375059d2f3ee90aaeea242120b3dda84787b (diff)
downloadglibc-c891b2df087e0219319785c29d3af6042e4ac94f.tar.gz
glibc-c891b2df087e0219319785c29d3af6042e4ac94f.tar.xz
glibc-c891b2df087e0219319785c29d3af6042e4ac94f.zip
Update.
2001-04-03  Ulrich Drepper  <drepper@redhat.com>

	* misc/dirname.c (dirname): Handle multiple slashes correctly.

2001-04-03  Martin Schwidefsky  <schwidefsky@de.ibm.com>

	* sysdeps/s390/s390-64/initfini.c: Fix __gmon_start__ GOT access.

2001-04-03  Martin Schwidefsky  <schwidefsky@de.ibm.com>

	* sysdeps/s390/s390-32/bcopy.S: Optimize for speed.
	* sysdeps/s390/s390-64/bcopy.S: Likewise.
	* sysdeps/s390/s390-32/mempcy.S: Likewise.
	* sysdeps/s390/s390-64/memcpy.S: Likewise.

2001-04-02  Bruno Haible  <haible@clisp.cons.org>

	* manual/message.texi (Advanced gettext functions): More specific
	syntax in the plural formula examples.

2001-04-02  Franz Sirl  <Franz.Sirl-kernel@lauterbach.com>

	* sysdeps/powerpc/atomicity.h: Silence warnings.
	* sysdeps/powerpc/dl-machine.h: Likewise.
	* sysdeps/powerpc/register-dump.h: Likewise.
	* sysdeps/powerpc/fpu/s_lrint.c: Likewise.

2001-04-02  Andreas Jaeger  <aj@suse.de>

	* misc/tst-dirname.c (main): Add more tests, derived from a bug
	report by Michael Kerrisk <mtk16@ext.canterbury.ac.nz>.

2001-04-01  Andreas Jaeger  <aj@suse.de>

	* debug/xtrace.sh (pcprofileso): Use SLIBDIR since libpcprofile.so
	is installed there.
	* malloc/memusage.sh (memusageso): Likewise for libmemusage.so.

2001-04-01  H.J. Lu  <hjl@gnu.org>

	* posix/annexc.c (macrofile): Renamed from TMPFILE and set to
	tmpnam (NULL).
	* stdlib/isomac.c (macrofile): Likewise.

2001-03-30  Thorsten Kukuk  <kukuk@suse.de>

	* inet/rcmd.c: Allow AF_UNSPEC as parameter.
	* nis/ypclnt.c (yp_all): Print error message only at last try,
	check for protocoll error only if we don't have a network error.
Diffstat (limited to 'sysdeps')
-rw-r--r--sysdeps/powerpc/atomicity.h58
-rw-r--r--sysdeps/powerpc/dl-machine.h190
-rw-r--r--sysdeps/powerpc/fpu/s_lrint.c12
-rw-r--r--sysdeps/powerpc/register-dump.h34
-rw-r--r--sysdeps/s390/s390-32/bcopy.S54
-rw-r--r--sysdeps/s390/s390-32/memcpy.S37
-rw-r--r--sysdeps/s390/s390-64/bcopy.S47
-rw-r--r--sysdeps/s390/s390-64/initfini.c4
-rw-r--r--sysdeps/s390/s390-64/memcpy.S34
9 files changed, 245 insertions, 225 deletions
diff --git a/sysdeps/powerpc/atomicity.h b/sysdeps/powerpc/atomicity.h
index 2924b181f5..6942dfecd1 100644
--- a/sysdeps/powerpc/atomicity.h
+++ b/sysdeps/powerpc/atomicity.h
@@ -33,11 +33,11 @@ __attribute__ ((unused))
 exchange_and_add (volatile uint32_t *mem, int val)
 {
   int tmp, result;
-  __asm__ ("\
-0:	lwarx	%0,0,%2
-	add%I3	%1,%0,%3
-	stwcx.	%1,0,%2
-	bne-	0b
+  __asm__ ("\n\
+0:	lwarx	%0,0,%2	\n\
+	add%I3	%1,%0,%3	\n\
+	stwcx.	%1,0,%2	\n\
+	bne-	0b	\n\
 " : "=&b"(result), "=&r"(tmp) : "r" (mem), "Ir"(val) : "cr0", "memory");
   return result;
 }
@@ -47,11 +47,11 @@ __attribute__ ((unused))
 atomic_add (volatile uint32_t *mem, int val)
 {
   int tmp;
-  __asm__ ("\
-0:	lwarx	%0,0,%1
-	add%I2	%0,%0,%2
-	stwcx.	%0,0,%1
-	bne-	0b
+  __asm__ ("\n\
+0:	lwarx	%0,0,%1	\n\
+	add%I2	%0,%0,%2	\n\
+	stwcx.	%0,0,%1	\n\
+	bne-	0b	\n\
 " : "=&b"(tmp) : "r" (mem), "Ir"(val) : "cr0", "memory");
 }
 
@@ -60,14 +60,14 @@ __attribute__ ((unused))
 compare_and_swap (volatile long int *p, long int oldval, long int newval)
 {
   int result;
-  __asm__ ("\
-0:	lwarx	%0,0,%1
-	sub%I2c.	%0,%0,%2
-	cntlzw	%0,%0
-	bne-	1f
-	stwcx.	%3,0,%1
-	bne-	0b
-1:
+  __asm__ ("\n\
+0:	lwarx	%0,0,%1	\n\
+	sub%I2c.	%0,%0,%2	\n\
+	cntlzw	%0,%0	\n\
+	bne-	1f	\n\
+	stwcx.	%3,0,%1	\n\
+	bne-	0b	\n\
+1:	\n\
 " : "=&b"(result) : "r"(p), "Ir"(oldval), "r"(newval) : "cr0", "memory");
   return result >> 5;
 }
@@ -77,10 +77,10 @@ __attribute__ ((unused))
 always_swap (volatile long int *p, long int newval)
 {
   long int result;
-  __asm__ ("\
-0:	lwarx	%0,0,%1
-	stwcx.	%2,0,%1
-	bne-	0b
+  __asm__ ("\n\
+0:	lwarx	%0,0,%1	\n\
+	stwcx.	%2,0,%1	\n\
+	bne-	0b	\n\
 " : "=&r"(result) : "r"(p), "r"(newval) : "cr0", "memory");
   return result;
 }
@@ -90,13 +90,13 @@ __attribute__ ((unused))
 test_and_set (volatile long int *p, long int newval)
 {
   int result;
-  __asm__ ("\
-0:	lwarx	%0,0,%1
-	cmpwi	%0,0
-	bne-	1f
-	stwcx.	%2,0,%1
-	bne-	0b
-1:
+  __asm__ ("\n\
+0:	lwarx	%0,0,%1	\n\
+	cmpwi	%0,0	\n\
+	bne-	1f	\n\
+	stwcx.	%2,0,%1	\n\
+	bne-	0b	\n\
+1:	\n\
 " : "=&r"(result) : "r"(p), "r"(newval) : "cr0", "memory");
   return result;
 }
diff --git a/sysdeps/powerpc/dl-machine.h b/sysdeps/powerpc/dl-machine.h
index 961aafbd62..80db704835 100644
--- a/sysdeps/powerpc/dl-machine.h
+++ b/sysdeps/powerpc/dl-machine.h
@@ -102,101 +102,101 @@ elf_machine_load_address (void)
 /* This code is used in dl-runtime.c to call the `fixup' function
    and then redirect to the address it returns.  It is called
    from code built in the PLT by elf_machine_runtime_setup.  */
-#define ELF_MACHINE_RUNTIME_TRAMPOLINE asm ("\
-	.section \".text\"
-	.align 2
-	.globl _dl_runtime_resolve
-	.type _dl_runtime_resolve,@function
-_dl_runtime_resolve:
- # We need to save the registers used to pass parameters, and register 0,
- # which is used by _mcount; the registers are saved in a stack frame.
-	stwu 1,-64(1)
-	stw 0,12(1)
-	stw 3,16(1)
-	stw 4,20(1)
- # The code that calls this has put parameters for `fixup' in r12 and r11.
-	mr 3,12
-	stw 5,24(1)
-	mr 4,11
-	stw 6,28(1)
-	mflr 0
- # We also need to save some of the condition register fields.
-	stw 7,32(1)
-	stw 0,48(1)
-	stw 8,36(1)
-	mfcr 0
-	stw 9,40(1)
-	stw 10,44(1)
-	stw 0,8(1)
-	bl fixup@local
- # 'fixup' returns the address we want to branch to.
-	mtctr 3
- # Put the registers back...
-	lwz 0,48(1)
-	lwz 10,44(1)
-	lwz 9,40(1)
-	mtlr 0
-	lwz 8,36(1)
-	lwz 0,8(1)
-	lwz 7,32(1)
-	lwz 6,28(1)
-	mtcrf 0xFF,0
-	lwz 5,24(1)
-	lwz 4,20(1)
-	lwz 3,16(1)
-	lwz 0,12(1)
- # ...unwind the stack frame, and jump to the PLT entry we updated.
-	addi 1,1,64
-	bctr
-	.size	 _dl_runtime_resolve,.-_dl_runtime_resolve
-
-	.align 2
-	.globl _dl_prof_resolve
-	.type _dl_prof_resolve,@function
-_dl_prof_resolve:
- # We need to save the registers used to pass parameters, and register 0,
- # which is used by _mcount; the registers are saved in a stack frame.
-	stwu 1,-64(1)
-        stw 0,12(1)
-	stw 3,16(1)
-	stw 4,20(1)
- # The code that calls this has put parameters for `fixup' in r12 and r11.
-	mr 3,12
-	stw 5,24(1)
-	mr 4,11
-	stw 6,28(1)
-	mflr 5
- # We also need to save some of the condition register fields.
-	stw 7,32(1)
-	stw 5,48(1)
-	stw 8,36(1)
-	mfcr 0
-	stw 9,40(1)
-	stw 10,44(1)
-	stw 0,8(1)
-	bl profile_fixup@local
- # 'fixup' returns the address we want to branch to.
-	mtctr 3
- # Put the registers back...
-	lwz 0,48(1)
-	lwz 10,44(1)
-	lwz 9,40(1)
-	mtlr 0
-	lwz 8,36(1)
-	lwz 0,8(1)
-	lwz 7,32(1)
-	lwz 6,28(1)
-	mtcrf 0xFF,0
-	lwz 5,24(1)
-	lwz 4,20(1)
-	lwz 3,16(1)
-        lwz 0,12(1)
- # ...unwind the stack frame, and jump to the PLT entry we updated.
-	addi 1,1,64
-	bctr
-	.size	 _dl_prof_resolve,.-_dl_prof_resolve
- # Undo '.section text'.
-	.previous
+#define ELF_MACHINE_RUNTIME_TRAMPOLINE asm ("\n\
+	.section \".text\"	\n\
+	.align 2	\n\
+	.globl _dl_runtime_resolve	\n\
+	.type _dl_runtime_resolve,@function	\n\
+_dl_runtime_resolve:	\n\
+ # We need to save the registers used to pass parameters, and register 0,\n\
+ # which is used by _mcount; the registers are saved in a stack frame.\n\
+	stwu 1,-64(1)	\n\
+	stw 0,12(1)	\n\
+	stw 3,16(1)	\n\
+	stw 4,20(1)	\n\
+ # The code that calls this has put parameters for `fixup' in r12 and r11.\n\
+	mr 3,12	\n\
+	stw 5,24(1)	\n\
+	mr 4,11	\n\
+	stw 6,28(1)	\n\
+	mflr 0	\n\
+ # We also need to save some of the condition register fields.\n\
+	stw 7,32(1)	\n\
+	stw 0,48(1)	\n\
+	stw 8,36(1)	\n\
+	mfcr 0	\n\
+	stw 9,40(1)	\n\
+	stw 10,44(1)	\n\
+	stw 0,8(1)	\n\
+	bl fixup@local	\n\
+ # 'fixup' returns the address we want to branch to.\n\
+	mtctr 3	\n\
+ # Put the registers back...\n\
+	lwz 0,48(1)	\n\
+	lwz 10,44(1)	\n\
+	lwz 9,40(1)	\n\
+	mtlr 0	\n\
+	lwz 8,36(1)	\n\
+	lwz 0,8(1)	\n\
+	lwz 7,32(1)	\n\
+	lwz 6,28(1)	\n\
+	mtcrf 0xFF,0	\n\
+	lwz 5,24(1)	\n\
+	lwz 4,20(1)	\n\
+	lwz 3,16(1)	\n\
+	lwz 0,12(1)	\n\
+ # ...unwind the stack frame, and jump to the PLT entry we updated.\n\
+	addi 1,1,64	\n\
+	bctr	\n\
+	.size	 _dl_runtime_resolve,.-_dl_runtime_resolve	\n\
+	\n\
+	.align 2	\n\
+	.globl _dl_prof_resolve	\n\
+	.type _dl_prof_resolve,@function	\n\
+_dl_prof_resolve:	\n\
+ # We need to save the registers used to pass parameters, and register 0,\n\
+ # which is used by _mcount; the registers are saved in a stack frame.\n\
+	stwu 1,-64(1)	\n\
+        stw 0,12(1)	\n\
+	stw 3,16(1)	\n\
+	stw 4,20(1)	\n\
+ # The code that calls this has put parameters for `fixup' in r12 and r11.\n\
+	mr 3,12	\n\
+	stw 5,24(1)	\n\
+	mr 4,11	\n\
+	stw 6,28(1)	\n\
+	mflr 5	\n\
+ # We also need to save some of the condition register fields.\n\
+	stw 7,32(1)	\n\
+	stw 5,48(1)	\n\
+	stw 8,36(1)	\n\
+	mfcr 0	\n\
+	stw 9,40(1)	\n\
+	stw 10,44(1)	\n\
+	stw 0,8(1)	\n\
+	bl profile_fixup@local	\n\
+ # 'fixup' returns the address we want to branch to.\n\
+	mtctr 3	\n\
+ # Put the registers back...\n\
+	lwz 0,48(1)	\n\
+	lwz 10,44(1)	\n\
+	lwz 9,40(1)	\n\
+	mtlr 0	\n\
+	lwz 8,36(1)	\n\
+	lwz 0,8(1)	\n\
+	lwz 7,32(1)	\n\
+	lwz 6,28(1)	\n\
+	mtcrf 0xFF,0	\n\
+	lwz 5,24(1)	\n\
+	lwz 4,20(1)	\n\
+	lwz 3,16(1)	\n\
+        lwz 0,12(1)	\n\
+ # ...unwind the stack frame, and jump to the PLT entry we updated.\n\
+	addi 1,1,64	\n\
+	bctr	\n\
+	.size	 _dl_prof_resolve,.-_dl_prof_resolve	\n\
+ # Undo '.section text'.\n\
+	.previous	\n\
 ");
 
 /* The actual _start code is in dl-start.S.  Use a really
diff --git a/sysdeps/powerpc/fpu/s_lrint.c b/sysdeps/powerpc/fpu/s_lrint.c
index a060598859..ce8df3193e 100644
--- a/sysdeps/powerpc/fpu/s_lrint.c
+++ b/sysdeps/powerpc/fpu/s_lrint.c
@@ -32,12 +32,12 @@ __lrint (double x)
 weak_alias (__lrint, lrint)
 
 /* This code will also work for a 'float' argument.  */
-asm ("\
-        .globl __lrintf
-        .globl lrintf
-        .weak lrintf
-	.set __lrintf,__lrint
-	.set lrintf,__lrint
+asm ("\n\
+	.globl __lrintf	\n\
+	.globl lrintf	\n\
+	.weak lrintf	\n\
+	.set __lrintf,__lrint	\n\
+	.set lrintf,__lrint	\n\
 ");
 
 #ifdef NO_LONG_DOUBLE
diff --git a/sysdeps/powerpc/register-dump.h b/sysdeps/powerpc/register-dump.h
index f1d3985e32..5cea8f8775 100644
--- a/sysdeps/powerpc/register-dump.h
+++ b/sysdeps/powerpc/register-dump.h
@@ -22,23 +22,23 @@
 
 /* This prints out the information in the following form: */
 static const char dumpform[] = "\
-Register dump:
-fp0-3:   0000030%0000031% 0000032%0000033% 0000034%0000035% 0000036%0000037%
-fp4-7:   0000038%0000039% 000003a%000003b% 000003c%000003d% 000003e%000003f%
-fp8-11:  0000040%0000041% 0000042%0000043% 0000044%0000045% 0000046%0000047%
-fp12-15: 0000048%0000049% 000004a%000004b% 000004c%000004d% 000004e%000004f%
-fp16-19: 0000050%0000051% 0000052%0000053% 0000054%0000055% 0000056%0000057%
-fp20-23: 0000058%0000059% 000005a%000005b% 000005c%000005d% 000005e%000005f%
-fp24-27: 0000060%0000061% 0000062%0000063% 0000064%0000065% 0000066%0000067%
-fp28-31: 0000068%0000069% 000006a%000006b% 000006c%000006d% 000006e%000006f%
-r0 =0000000% sp =0000001% r2 =0000002% r3 =0000003%  trap=0000028%
-r4 =0000004% r5 =0000005% r6 =0000006% r7 =0000007%   sr0=0000020% sr1=0000021%
-r8 =0000008% r9 =0000009% r10=000000a% r11=000000b%   dar=0000029% dsi=000002a%
-r12=000000c% r13=000000d% r14=000000e% r15=000000f%   r3*=0000022%
-r16=0000010% r17=0000011% r18=0000012% r19=0000013%
-r20=0000014% r21=0000015% r22=0000016% r23=0000017%    lr=0000024% xer=0000025%
-r24=0000018% r25=0000019% r26=000001a% r27=000001b%    mq=0000027% ctr=0000023%
-r28=000001c% r29=000001d% r30=000001e% r31=000001f%  fscr=0000071% ccr=0000026%
+Register dump:\n\
+fp0-3:   0000030%0000031% 0000032%0000033% 0000034%0000035% 0000036%0000037%\n\
+fp4-7:   0000038%0000039% 000003a%000003b% 000003c%000003d% 000003e%000003f%\n\
+fp8-11:  0000040%0000041% 0000042%0000043% 0000044%0000045% 0000046%0000047%\n\
+fp12-15: 0000048%0000049% 000004a%000004b% 000004c%000004d% 000004e%000004f%\n\
+fp16-19: 0000050%0000051% 0000052%0000053% 0000054%0000055% 0000056%0000057%\n\
+fp20-23: 0000058%0000059% 000005a%000005b% 000005c%000005d% 000005e%000005f%\n\
+fp24-27: 0000060%0000061% 0000062%0000063% 0000064%0000065% 0000066%0000067%\n\
+fp28-31: 0000068%0000069% 000006a%000006b% 000006c%000006d% 000006e%000006f%\n\
+r0 =0000000% sp =0000001% r2 =0000002% r3 =0000003%  trap=0000028%\n\
+r4 =0000004% r5 =0000005% r6 =0000006% r7 =0000007%   sr0=0000020% sr1=0000021%\n\
+r8 =0000008% r9 =0000009% r10=000000a% r11=000000b%   dar=0000029% dsi=000002a%\n\
+r12=000000c% r13=000000d% r14=000000e% r15=000000f%   r3*=0000022%\n\
+r16=0000010% r17=0000011% r18=0000012% r19=0000013%\n\
+r20=0000014% r21=0000015% r22=0000016% r23=0000017%    lr=0000024% xer=0000025%\n\
+r24=0000018% r25=0000019% r26=000001a% r27=000001b%    mq=0000027% ctr=0000023%\n\
+r28=000001c% r29=000001d% r30=000001e% r31=000001f%  fscr=0000071% ccr=0000026%\n\
 ";
 
 /* Most of the fields are self-explanatory.  'sr0' is the next
diff --git a/sysdeps/s390/s390-32/bcopy.S b/sysdeps/s390/s390-32/bcopy.S
index 8bf5bbbefa..526e3361d1 100644
--- a/sysdeps/s390/s390-32/bcopy.S
+++ b/sysdeps/s390/s390-32/bcopy.S
@@ -1,4 +1,4 @@
-/* bcopy -- copy a block from source to destination.  For IBM S390
+/* bcopy -- copy a block from source to destination.  S/390 version.
    This file is part of the GNU C Library.
    Copyright (C) 2000, 2001 Free Software Foundation, Inc.
    Contributed by Martin Schwidefsky (schwidefsky@de.ibm.com).
@@ -18,48 +18,53 @@
    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
    Boston, MA 02111-1307, USA.  */
 
-/*
- * R2 = address of source
- * R3 = address of destination
- * R4 = number of bytes to copy
- */
+/* INPUT PARAMETERS
+     %r2 = address of source
+     %r3 = address of destination
+     %r4 = number of bytes to copy.  */
 
 #include "sysdep.h"
 #include "asm-syntax.h"
 
         .text
 ENTRY(__bcopy)
+	ltr     %r1,%r4             # zero bcopy ?
+	jz      .L4
         clr     %r2,%r3             # check against destructive overlap
         jnl     .L0
-        lr      %r1,%r2
-        alr     %r1,%r4
+        alr     %r1,%r2
         clr     %r1,%r3
-        jh      .L2
-.L0:
-	lr      %r5,%r4		    # source length
-        lr      %r4,%r2             # source address
-        sr      %r1,%r1             # set pad byte to zero
-	lr      %r2,%r3             # set destination
-        lr      %r3,%r5             # destination length = source length
-.L1:    mvcle   %r2,%r4,0(%r1)      # thats it, MVCLE is your friend
-        jo      .L1
-        br      %r14
-.L2:                                # destructive overlay, can not use mvcle
+        jh      .L5
+.L0:	ahi     %r4,-1              # length - 1
+	lr      %r1,%r4
+	srl     %r1,8
+	ltr     %r1,%r1
+	jz      .L2
+.L1:	mvc     0(256,%r3),0(%r2)   # move in 256 byte chunks
+	la      %r2,256(%r2)
+	la      %r3,256(%r3)
+	brct    %r1,.L1
+.L2:	bras    %r1,.L3             # setup base pointer for execute
+	mvc     0(1,%r3),0(%r2)     # instruction for execute
+.L3:	ex      %r4,0(%r1)          # execute mvc with length ((%r4)&255)+1
+.L4:	br      %r14
+
+.L5:                                # destructive overlay, can not use mvcle
         lr     %r1,%r2              # bcopy is called with source,dest
         lr     %r2,%r3              # memmove with dest,source! Oh, well...
         lr     %r3,%r1
         basr   %r1,0
-.L3:
+.L6:
 #ifdef PIC
-        al     %r1,.L4-.L3(%r1)     # get address of global offset table
+        al     %r1,.L7-.L6(%r1)     # get address of global offset table
                                     # load address of memmove
         l      %r1,memmove@GOT12(%r1)
         br     %r1
-.L4:    .long  _GLOBAL_OFFSET_TABLE_-.L3
+.L7:    .long  _GLOBAL_OFFSET_TABLE_-.L6
 #else
-        al     %r1,.L4-.L3(%r1)     # load address of memmove
+        al     %r1,.L7-.L6(%r1)     # load address of memmove
         br     %r1                  # jump to memmove
-.L4:    .long  memmove-.L3
+.L7:    .long  memmove-.L6
 #endif
 
 END(__bcopy)
@@ -67,3 +72,4 @@ END(__bcopy)
 #ifndef NO_WEAK_ALIAS
 weak_alias (__bcopy, bcopy)
 #endif
+
diff --git a/sysdeps/s390/s390-32/memcpy.S b/sysdeps/s390/s390-32/memcpy.S
index 4bbec7c633..9c4b840928 100644
--- a/sysdeps/s390/s390-32/memcpy.S
+++ b/sysdeps/s390/s390-32/memcpy.S
@@ -1,4 +1,4 @@
-/* Set a block of memory to some byte value.  For IBM S390
+/* memcpy - copy a block from source to destination.  S/390 version.
    Copyright (C) 2000, 2001 Free Software Foundation, Inc.
    Contributed by Martin Schwidefsky (schwidefsky@de.ibm.com).
 
@@ -17,25 +17,30 @@
    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
    Boston, MA 02111-1307, USA.  */
 
-/*
- * R2 = address to destination memory area
- * R3 = address to source memory area
- * R4 = number of bytes to copy
- */
+/* INPUT PARAMETERS
+     %r2 = address of destination memory area
+     %r3 = address of source memory area
+     %r4 = number of bytes to copy.  */
 
 #include "sysdep.h"
 #include "asm-syntax.h"
 
 	.text
 ENTRY(memcpy)
-	ltr     %r5,%r4
+	ltr     %r4,%r4
+	jz      .L3
+	ahi     %r4,-1              # length - 1
+	lr      %r1,%r2             # copy destination address
+	lr      %r5,%r4
+	sra     %r5,8
 	jz      .L1
-	lr      %r4,%r3             # %r4/%r5 = source ptr/len
-	lr      %r3,%r5             # %r2/%r3 = dest ptr/len
-	lr      %r0,%r2             # save source address
-.L0:    mvcle   %r2,%r4,0           # thats it, MVCLE is your friend
-	jo      .L0
-	lr      %r2,%r0             # return value is source address
-.L1:
-	br      %r14
-END(memset)
+.L0:	mvc     0(256,%r1),0(%r3)   # move in 256 byte chunks
+	la      %r1,256(%r1)
+	la      %r3,256(%r3)
+	brct    %r5,.L0
+.L1:	bras    %r5,.L2             # setup base pointer for execute
+	mvc     0(1,%r1),0(%r3)     # instruction for execute
+.L2:	ex      %r4,0(%r5)          # execute mvc with length ((%r4)&255)+1
+.L3:	br      %r14
+END(memcpy)
+	
diff --git a/sysdeps/s390/s390-64/bcopy.S b/sysdeps/s390/s390-64/bcopy.S
index 5cb02b3162..0d75d3ea9f 100644
--- a/sysdeps/s390/s390-64/bcopy.S
+++ b/sysdeps/s390/s390-64/bcopy.S
@@ -1,6 +1,6 @@
 /* bcopy -- copy a block from source to destination.  64 bit S/390 version.
    This file is part of the GNU C Library.
-   Copyright (C) 2001 Free Software Foundation, Inc.
+   Copyright (C) 2000, 2001 Free Software Foundation, Inc.
    Contributed by Martin Schwidefsky (schwidefsky@de.ibm.com).
 
    The GNU C Library is free software  ; you can redistribute it and/or
@@ -26,27 +26,32 @@
 #include "sysdep.h"
 #include "asm-syntax.h"
 
-        .text
+	.text
 ENTRY(__bcopy)
-        clgr    %r2,%r3             # check against destructive overlap
-        jnl     .L0
-        lgr     %r1,%r2
-        algr    %r1,%r4
-        clgr    %r1,%r3
-        jh      .L2
-.L0:
-	lgr     %r5,%r4		    # source length
-        lgr     %r4,%r2             # source address
-        sgr     %r1,%r1             # set pad byte to zero
-	lgr     %r2,%r3             # set destination
-        lgr     %r3,%r5             # destination length = source length
-.L1:    mvcle   %r2,%r4,0(%r1)      # thats it, MVCLE is your friend
-        jo      .L1
-        br      %r14
-.L2:                                # destructive overlay, can not use mvcle
-        lgr    %r1,%r2              # bcopy is called with source,dest
-        lgr    %r2,%r3              # memmove with dest,source! Oh, well...
-        lgr    %r3,%r1
+	ltgr	%r1,%r4		    # zero bcopy ?
+	jz	.L4
+	clgr	%r2,%r3		    # check against destructive overlap
+	jnl	.L0
+	algr	%r1,%r2
+	clgr	%r1,%r3
+	jh	.L5
+.L0:	aghi	%r4,-1		    # length - 1
+	srlg	%r1,%r4,8
+	ltgr	%r1,%r1
+	jz	.L2
+.L1:	mvc	0(256,%r3),0(%r2)   # move in 256 byte chunks
+	la	%r2,256(%r2)
+	la	%r3,256(%r3)
+	brctg	%r1,.L1
+.L2:	bras	%r1,.L3		    # setup base pointer for execute
+	mvc	0(1,%r3),0(%r2)	    # instruction for execute
+.L3:	ex	%r4,0(%r1)	    # execute mvc with length ((%r4)&255)+1
+.L4:	br	%r14
+
+.L5:				    # destructive overlay, can not use mvcle
+	lgr    %r1,%r2		    # bcopy is called with source,dest
+	lgr    %r2,%r3		    # memmove with dest,source! Oh, well...
+	lgr    %r3,%r1
 #ifdef PIC
 	jg     memmove@PLT
 #else
diff --git a/sysdeps/s390/s390-64/initfini.c b/sysdeps/s390/s390-64/initfini.c
index 3d14b0b283..be5614b203 100644
--- a/sysdeps/s390/s390-64/initfini.c
+++ b/sysdeps/s390/s390-64/initfini.c
@@ -68,8 +68,8 @@ _init:
 	AGHI	15,-160
 	STG	1,0(15)
         LARL    12,_GLOBAL_OFFSET_TABLE_
-        LGHI    1,__gmon_start__@GOT
-	LG      1,0(1,12)
+	LARL    1,__gmon_start__@GOTENT
+	LG      1,0(1)
 	LTGR    1,1
 	JE      .L22
 	BASR    14,1
diff --git a/sysdeps/s390/s390-64/memcpy.S b/sysdeps/s390/s390-64/memcpy.S
index c59d3fbb79..008f026909 100644
--- a/sysdeps/s390/s390-64/memcpy.S
+++ b/sysdeps/s390/s390-64/memcpy.S
@@ -1,7 +1,6 @@
-/* Set a block of memory to some byte value.  64 bit S/390 version.
-   Copyright (C) 2001 Free Software Foundation, Inc.
+/* memcpy - copy a block from source to destination.  64 bit S/390 version.
+   Copyright (C) 2000, 2001 Free Software Foundation, Inc.
    Contributed by Martin Schwidefsky (schwidefsky@de.ibm.com).
-   This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Library General Public License as
@@ -19,8 +18,8 @@
    Boston, MA 02111-1307, USA.  */
 
 /* INPUT PARAMETERS
-     %r2 = address to destination memory area
-     %r3 = address to source memory area
+     %r2 = address of destination memory area
+     %r3 = address of source memory area
      %r4 = number of bytes to copy.  */
 
 #include "sysdep.h"
@@ -28,14 +27,19 @@
 
 	.text
 ENTRY(memcpy)
-	ltgr	%r5,%r4
+	ltgr	%r4,%r4
+	jz	.L3
+	aghi	%r4,-1		    # length - 1
+	lgr	%r1,%r2		    # copy destination address
+	srag	%r5,%r4,8
 	jz	.L1
-	lgr	%r4,%r3		    # %r4/%r5 = source ptr/len
-	lgr	%r3,%r5		    # %r2/%r3 = dest ptr/len
-	lgr	%r0,%r2		    # save source address
-.L0:	mvcle	%r2,%r4,0	    # thats it, MVCLE is your friend
-	jo	.L0
-	lgr	%r2,%r0		    # return value is source address
-.L1:
-	br	%r14
-END(memset)
+.L0:	mvc	0(256,%r1),0(%r3)   # move in 256 byte chunks
+	la	%r1,256(%r1)
+	la	%r3,256(%r3)
+	brctg	%r5,.L0
+.L1:	bras	%r5,.L2		    # setup base pointer for execute
+	mvc	0(1,%r1),0(%r3)	    # instruction for execute
+.L2:	ex	%r4,0(%r5)	    # execute mvc with length ((%r4)&255)+1
+.L3:	br	%r14
+END(memcpy)
+