about summary refs log tree commit diff
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2008-04-09 20:01:52 +0000
committerUlrich Drepper <drepper@redhat.com>2008-04-09 20:01:52 +0000
commit21208604353a51f9c6430db9b33f9bb85ff8b8b9 (patch)
tree81a0c37c74922461cb33a4b395e6a62f2e22ed42
parent3f981865683d4c19b180543171a0b07e3d8c4942 (diff)
downloadglibc-21208604353a51f9c6430db9b33f9bb85ff8b8b9.tar.gz
glibc-21208604353a51f9c6430db9b33f9bb85ff8b8b9.tar.xz
glibc-21208604353a51f9c6430db9b33f9bb85ff8b8b9.zip
[BZ #4314]
	* sysdeps/i386/i686/memcpy.S: Optimize copying of aligned buffers.
-rw-r--r--ChangeLog3
-rw-r--r--sysdeps/i386/i686/memcpy.S57
2 files changed, 49 insertions, 11 deletions
diff --git a/ChangeLog b/ChangeLog
index 76ec21be9b..38efd89295 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,8 @@
 2008-04-09  Ulrich Drepper  <drepper@redhat.com>
 
+	[BZ #4314]
+	* sysdeps/i386/i686/memcpy.S: Optimize copying of aligned buffers.
+
 	[BZ #5209]
 	* sysdeps/unix/sysv/syscalls.list: The times syscall doesn't return
 	an error value.
diff --git a/sysdeps/i386/i686/memcpy.S b/sysdeps/i386/i686/memcpy.S
index 00e84ec2e5..ff5c66e9d4 100644
--- a/sysdeps/i386/i686/memcpy.S
+++ b/sysdeps/i386/i686/memcpy.S
@@ -1,7 +1,7 @@
 /* Copy memory block and return pointer to beginning of destination block
    For Intel 80x86, x>=6.
    This file is part of the GNU C Library.
-   Copyright (C) 1999, 2000, 2003, 2004 Free Software Foundation, Inc.
+   Copyright (C) 1999, 2000, 2003, 2004, 2008 Free Software Foundation, Inc.
    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1999.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -41,29 +41,64 @@ END (__memcpy_chk)
 ENTRY (BP_SYM (memcpy))
 	ENTER
 
-	movl	LEN(%esp), %ecx
 	movl	%edi, %eax
 	movl	DEST(%esp), %edi
 	movl	%esi, %edx
 	movl	SRC(%esp), %esi
-	CHECK_BOUNDS_BOTH_WIDE (%edi, DEST(%esp), %ecx)
-	CHECK_BOUNDS_BOTH_WIDE (%esi, SRC(%esp), %ecx)
 
+	movl	%edi, %ecx
+	xorl	%esi, %ecx
+	andl	$3, %ecx
+	movl	LEN(%esp), %ecx
 	cld
-	shrl	$1, %ecx
-	jnc	1f
+	jne	.Lunaligned
+
+	cmpl	$3, %ecx
+	jbe	.Lunaligned
+
+	testl	$3, %esi
+	je	1f
 	movsb
-1:	shrl	$1, %ecx
-	jnc	2f
-	movsw
-2:	rep
+	decl	%ecx
+	testl	$3, %esi
+	je	1f
+	movsb
+	decl	%ecx
+	testl	$3, %esi
+	je	1f
+	movsb
+	decl	%ecx
+1:	pushl	%eax
+	movl	%ecx, %eax
+	shrl	$2, %ecx
+	rep
 	movsl
-	movl	%eax, %edi
+	movl	%eax, %ecx
+	andl	$3, %ecx
+	rep
+	movsb
+	popl	%eax
+
+.Lend:	movl	%eax, %edi
 	movl	%edx, %esi
 	movl	DEST(%esp), %eax
 	RETURN_BOUNDED_POINTER (DEST(%esp))
 
 	LEAVE
 	RET_PTR
+
+	/* When we come here the pointers do not have the same
+	   alignment or the length is too short.  No need to optimize for
+	   aligned memory accesses. */
+.Lunaligned:
+	shrl	$1, %ecx
+	jnc	1f
+	movsb
+1:	shrl	$1, %ecx
+	jnc	2f
+	movsw
+2:	rep
+	movsl
+	jmp	.Lend
 END (BP_SYM (memcpy))
 libc_hidden_builtin_def (memcpy)