2 files changed, 28 insertions, 5 deletions
diff --git a/sysdeps/s390/s390-64/bcopy.S b/sysdeps/s390/s390-64/bcopy.S
index bb3acdb708..ff7966723b 100644
--- a/sysdeps/s390/s390-64/bcopy.S
+++ b/sysdeps/s390/s390-64/bcopy.S
@@ -34,11 +34,13 @@ ENTRY(__bcopy)
 	jnl	.L0
 	algr	%r1,%r2
 	clgr	%r1,%r3
-	jh	.L5
+	jh	.L7
 .L0:	aghi	%r4,-1		    # length - 1
 	srlg	%r1,%r4,8
-	ltgr	%r1,%r1
+	ltgr	%r1,%r1             # < 256 bytes to move ?
 	jz	.L2
+	cghi    %r1,255             # > 1MB to move ?
+	jh      .L5
 .L1:	mvc	0(256,%r3),0(%r2)   # move in 256 byte chunks
 	la	%r2,256(%r2)
 	la	%r3,256(%r3)
@@ -47,8 +49,16 @@ ENTRY(__bcopy)
 	mvc	0(1,%r3),0(%r2)	    # instruction for execute
 .L3:	ex	%r4,0(%r1)	    # execute mvc with length ((%r4)&255)+1
 .L4:	br	%r14
-
-.L5:				    # destructive overlay, can not use mvcle
+	# data copies > 1MB are faster with mvcle.
+.L5:	aghi    %r4,1               # length + 1
+	lgr	%r5,%r4	            # source length
+	lgr	%r4,%r2             # source address
+	lgr	%r2,%r3             # set destination
+	lgr	%r3,%r5             # destination length = source length
+.L6:	mvcle	%r2,%r4,0           # thats it, MVCLE is your friend
+	jo	.L6
+	br	%r14
+.L7:				    # destructive overlay, can not use mvcle
 	lgr    %r1,%r2		    # bcopy is called with source,dest
 	lgr    %r2,%r3		    # memmove with dest,source! Oh, well...
 	lgr    %r3,%r1
diff --git a/sysdeps/s390/s390-64/memcpy.S b/sysdeps/s390/s390-64/memcpy.S
index 8212bd39d6..d13c0ad14e 100644
--- a/sysdeps/s390/s390-64/memcpy.S
+++ b/sysdeps/s390/s390-64/memcpy.S
@@ -32,8 +32,11 @@ ENTRY(memcpy)
 	jz	.L3
 	aghi	%r4,-1		    # length - 1
 	lgr	%r1,%r2		    # copy destination address
-	srag	%r5,%r4,8
+	srlg	%r5,%r4,8
+	ltgr    %r5,%r5             # < 256 bytes to mvoe ?
 	jz	.L1
+	chi     %r6,255             # > 1 MB to move ?
+	jh	.L4
 .L0:	mvc	0(256,%r1),0(%r3)   # move in 256 byte chunks
 	la	%r1,256(%r1)
 	la	%r3,256(%r3)
@@ -42,5 +45,15 @@ ENTRY(memcpy)
 	mvc	0(1,%r1),0(%r3)	    # instruction for execute
 .L2:	ex	%r4,0(%r5)	    # execute mvc with length ((%r4)&255)+1
 .L3:	br	%r14
+	# data copies > 1MB are faster with mvcle.
+.L4:	aghi    %r4,1               # length + 1
+	lgr	%r5,%r4	            # source length
+	lgr	%r4,%r2             # source address
+	lgr	%r2,%r3             # set destination
+	lgr	%r3,%r5             # destination length = source length
+.L5:	mvcle	%r2,%r4,0           # thats it, MVCLE is your friend
+	jo	.L5
+	lgr     %r2,%r1             # return destination address
+	br	%r14
 END(memcpy)