about summary refs log tree commit diff
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2004-02-10 01:55:34 +0000
committerUlrich Drepper <drepper@redhat.com>2004-02-10 01:55:34 +0000
commit221c6b37b5f0fdb9cfcd2579798209d31987323c (patch)
treeab16b0cb922bda145b6a666efa239f15eee2ccfc
parentc655f8f8516484d5a401ea5982921703b50e87e5 (diff)
downloadglibc-221c6b37b5f0fdb9cfcd2579798209d31987323c.tar.gz
glibc-221c6b37b5f0fdb9cfcd2579798209d31987323c.tar.xz
glibc-221c6b37b5f0fdb9cfcd2579798209d31987323c.zip
memcmp optimized for i686.
-rw-r--r--sysdeps/i386/i686/memcmp.S392
1 files changed, 392 insertions, 0 deletions
diff --git a/sysdeps/i386/i686/memcmp.S b/sysdeps/i386/i686/memcmp.S
new file mode 100644
index 0000000000..920ae72df3
--- /dev/null
+++ b/sysdeps/i386/i686/memcmp.S
@@ -0,0 +1,392 @@
+/* Compare two memory blocks for differences in the first COUNT bytes.
+   Copyright (C) 2004 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+#include "bp-sym.h"
+#include "bp-asm.h"
+
+#define PARMS		LINKAGE+4	/* Preserve EBX.  */
+#define BLK1		PARMS
+#define BLK2		BLK1+PTR_SIZE
+#define LEN		BLK2+PTR_SIZE
+#define ENTRANCE	pushl %ebx; ENTER
+#define RETURN		popl %ebx; LEAVE; ret
+
+/* Load an entry in a jump table into EBX.  TABLE is a jump table
+   with relative offsets.  INDEX is a register contains the index
+   into the jump table.  */
+#define LOAD_JUMP_TABLE_ENTRY(TABLE, INDEX)			\
+  /* We first load PC into EBX.  */				\
+  call	__i686.get_pc_thunk.bx;					\
+  /* Get the address of the jump table.  */			\
+  addl	$(TABLE - .), %ebx;					\
+  /* Get the entry and convert the relative offset to the	\
+     absolute address.  */					\
+  addl	(%ebx,INDEX,4), %ebx
+
+#ifdef HAVE_HIDDEN
+	.section	.gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits
+	.globl	__i686.get_pc_thunk.bx
+	.hidden	__i686.get_pc_thunk.bx
+#else
+        .text
+#endif
+	ALIGN (4)
+	.type	__i686.get_pc_thunk.bx,@function
+__i686.get_pc_thunk.bx:
+	movl	(%esp), %ebx
+	ret
+
+        .text
+	ALIGN (4)
+ENTRY (BP_SYM (memcmp))
+	ENTRANCE
+
+	movl	BLK1(%esp), %eax
+	movl	BLK2(%esp), %edx
+	movl	LEN(%esp), %ecx
+
+	cmpl 	$1, %ecx
+	jne	L(not_1)
+	movzbl	(%eax), %ecx		/* LEN == 1  */
+	cmpb	(%edx), %cl
+	jne	L(neq)
+L(bye):
+	xorl	%eax, %eax
+	RETURN
+
+L(neq):
+	sbbl	%eax, %eax
+	sbbl	$-1, %eax
+	RETURN
+
+L(not_1):
+	jl	L(bye)			/* LEN == 0  */
+
+	pushl	%esi
+	movl	%eax, %esi
+	cmpl	$32, %ecx;
+	jge	L(32bytesormore)	/* LEN => 32  */
+
+	LOAD_JUMP_TABLE_ENTRY (L(table_32bytes), %ecx)
+	addl	%ecx, %edx
+	addl	%ecx, %esi
+	jmp	*%ebx
+
+	ALIGN (4)
+L(28bytes):
+	movl	-28(%esi), %eax
+	movl	-28(%edx), %ecx
+	cmpl	%ecx, %eax
+	jne	L(find_diff)
+L(24bytes):
+	movl	-24(%esi), %eax
+	movl	-24(%edx), %ecx
+	cmpl	%ecx, %eax
+	jne	L(find_diff)
+L(20bytes):
+	movl	-20(%esi), %eax
+	movl	-20(%edx), %ecx
+	cmpl	%ecx, %eax
+	jne	L(find_diff)
+L(16bytes):
+	movl	-16(%esi), %eax
+	movl	-16(%edx), %ecx
+	cmpl	%ecx, %eax
+	jne	L(find_diff)
+L(12bytes):
+	movl	-12(%esi), %eax
+	movl	-12(%edx), %ecx
+	cmpl	%ecx, %eax
+	jne	L(find_diff)
+L(8bytes):
+	movl	-8(%esi), %eax
+	movl	-8(%edx), %ecx
+	cmpl	%ecx, %eax
+	jne	L(find_diff)
+L(4bytes):
+	movl	-4(%esi), %eax
+	movl	-4(%edx), %ecx
+	cmpl	%ecx, %eax
+	jne	L(find_diff)
+L(0bytes):
+	popl	%esi
+	xorl	%eax, %eax
+	RETURN
+
+L(29bytes):
+	movl	-29(%esi), %eax
+	movl	-29(%edx), %ecx
+	cmpl	%ecx, %eax
+	jne	L(find_diff)
+L(25bytes):
+	movl	-25(%esi), %eax
+	movl	-25(%edx), %ecx
+	cmpl	%ecx, %eax
+	jne	L(find_diff)
+L(21bytes):
+	movl	-21(%esi), %eax
+	movl	-21(%edx), %ecx
+	cmpl	%ecx, %eax
+	jne	L(find_diff)
+L(17bytes):
+	movl	-17(%esi), %eax
+	movl	-17(%edx), %ecx
+	cmpl	%ecx, %eax
+	jne	L(find_diff)
+L(13bytes):
+	movl	-13(%esi), %eax
+	movl	-13(%edx), %ecx
+	cmpl	%ecx, %eax
+	jne	L(find_diff)
+L(9bytes):
+	movl	-9(%esi), %eax
+	movl	-9(%edx), %ecx
+	cmpl	%ecx, %eax
+	jne	L(find_diff)
+L(5bytes):
+	movl	-5(%esi), %eax
+	movl	-5(%edx), %ecx
+	cmpl	%ecx, %eax
+	jne	L(find_diff)
+L(1bytes):
+	movzbl	-1(%esi), %eax
+	cmpb	-1(%edx), %al
+	jne	L(set)
+	popl	%esi
+	xorl	%eax, %eax
+	RETURN
+
+L(30bytes):
+	movl	-30(%esi), %eax
+	movl	-30(%edx), %ecx
+	cmpl	%ecx, %eax
+	jne	L(find_diff)
+L(26bytes):
+	movl	-26(%esi), %eax
+	movl	-26(%edx), %ecx
+	cmpl	%ecx, %eax
+	jne	L(find_diff)
+L(22bytes):
+	movl	-22(%esi), %eax
+	movl	-22(%edx), %ecx
+	cmpl	%ecx, %eax
+	jne	L(find_diff)
+L(18bytes):
+	movl	-18(%esi), %eax
+	movl	-18(%edx), %ecx
+	cmpl	%ecx, %eax
+	jne	L(find_diff)
+L(14bytes):
+	movl	-14(%esi), %eax
+	movl	-14(%edx), %ecx
+	cmpl	%ecx, %eax
+	jne	L(find_diff)
+L(10bytes):
+	movl	-10(%esi), %eax
+	movl	-10(%edx), %ecx
+	cmpl	%ecx, %eax
+	jne	L(find_diff)
+L(6bytes):
+	movl	-6(%esi), %eax
+	movl	-6(%edx), %ecx
+	cmpl	%ecx, %eax
+	jne	L(find_diff)
+L(2bytes):
+	movzwl	-2(%esi), %eax
+	movzwl	-2(%edx), %ecx
+	cmpb	%cl, %al
+	jne	L(set)
+	cmpl	%ecx, %eax
+	jne	L(set)
+	popl	%esi
+	xorl	%eax, %eax
+	RETURN
+
+L(31bytes):
+	movl	-31(%esi), %eax
+	movl	-31(%edx), %ecx
+	cmpl	%ecx, %eax
+	jne	L(find_diff)
+L(27bytes):
+	movl	-27(%esi), %eax
+	movl	-27(%edx), %ecx
+	cmpl	%ecx, %eax
+	jne	L(find_diff)
+L(23bytes):
+	movl	-23(%esi), %eax
+	movl	-23(%edx), %ecx
+	cmpl	%ecx, %eax
+	jne	L(find_diff)
+L(19bytes):
+	movl	-19(%esi), %eax
+	movl	-19(%edx), %ecx
+	cmpl	%ecx, %eax
+	jne	L(find_diff)
+L(15bytes):
+	movl	-15(%esi), %eax
+	movl	-15(%edx), %ecx
+	cmpl	%ecx, %eax
+	jne	L(find_diff)
+L(11bytes):
+	movl	-11(%esi), %eax
+	movl	-11(%edx), %ecx
+	cmpl	%ecx, %eax
+	jne	L(find_diff)
+L(7bytes):
+	movl	-7(%esi), %eax
+	movl	-7(%edx), %ecx
+	cmpl	%ecx, %eax
+	jne	L(find_diff)
+L(3bytes):
+	movzwl	-3(%esi), %eax
+	movzwl	-3(%edx), %ecx
+	cmpb	%cl, %al
+	jne	L(set)
+	cmpl	%ecx, %eax
+	jne	L(set)
+	movzbl	-1(%esi), %eax
+	cmpb	-1(%edx), %al
+	jne	L(set)
+	popl	%esi
+	xorl	%eax, %eax
+	RETURN
+
+	ALIGN (4)
+/* ECX >= 32.  */
+L(32bytesormore):
+	subl	$32, %ecx
+
+	movl	(%esi), %eax
+	cmpl	(%edx), %eax
+	jne	L(load_ecx)
+
+	movl	4(%esi), %eax
+	cmpl	4(%edx), %eax
+	jne	L(load_ecx_4)
+
+	movl	8(%esi), %eax
+	cmpl	8(%edx), %eax
+	jne	L(load_ecx_8)
+
+	movl	12(%esi), %eax
+	cmpl	12(%edx), %eax
+	jne	L(load_ecx_12)
+
+	movl	16(%esi), %eax
+	cmpl	16(%edx), %eax
+	jne	L(load_ecx_16)
+
+	movl	20(%esi), %eax
+	cmpl	20(%edx), %eax
+	jne	L(load_ecx_20)
+
+	movl	24(%esi), %eax
+	cmpl	24(%edx), %eax
+	jne	L(load_ecx_24)
+
+	movl	28(%esi), %eax
+	cmpl	28(%edx), %eax
+	jne	L(load_ecx_28)
+
+	addl	$32, %esi
+	addl	$32, %edx
+	cmpl	$32, %ecx
+	jge	L(32bytesormore)
+
+	LOAD_JUMP_TABLE_ENTRY (L(table_32bytes), %ecx)
+	addl	%ecx, %edx
+	addl	%ecx, %esi
+	jmp	*%ebx
+
+L(load_ecx_28):
+	addl	$0x4, %edx
+L(load_ecx_24):
+	addl	$0x4, %edx
+L(load_ecx_20):
+	addl	$0x4, %edx
+L(load_ecx_16):
+	addl	$0x4, %edx
+L(load_ecx_12):
+	addl	$0x4, %edx
+L(load_ecx_8):
+	addl	$0x4, %edx
+L(load_ecx_4):
+	addl	$0x4, %edx
+L(load_ecx):
+	movl	(%edx), %ecx
+
+L(find_diff):
+	cmpb	%cl, %al
+	jne	L(set)
+	cmpb	%ch, %ah
+	jne	L(set)
+	shrl	$16,%eax
+	shrl	$16,%ecx
+	cmpb	%cl, %al
+	jne	L(set)
+	/* We get there only if we already know there is a
+	   difference.  */
+	cmpl	%ecx, %eax
+L(set):
+	sbbl	%eax, %eax
+	sbbl	$-1, %eax
+	popl	%esi
+	RETURN
+
+	ALIGN (2)
+L(table_32bytes) :
+	.long	L(0bytes) - . + 0x0
+	.long	L(1bytes) - . + 0x4
+	.long	L(2bytes) - . + 0x8
+	.long	L(3bytes) - . + 0xc
+	.long	L(4bytes) - . + 0x10
+	.long	L(5bytes) - . + 0x14
+	.long	L(6bytes) - . + 0x18
+	.long	L(7bytes) - . + 0x1c
+	.long	L(8bytes) - . + 0x20
+	.long	L(9bytes) - . + 0x24
+	.long	L(10bytes) - . + 0x28
+	.long	L(11bytes) - . + 0x2c
+	.long	L(12bytes) - . + 0x30
+	.long	L(13bytes) - . + 0x34
+	.long	L(14bytes) - . + 0x38
+	.long	L(15bytes) - . + 0x3c
+	.long	L(16bytes) - . + 0x40
+	.long	L(17bytes) - . + 0x44
+	.long	L(18bytes) - . + 0x48
+	.long	L(19bytes) - . + 0x4c
+	.long	L(20bytes) - . + 0x50
+	.long	L(21bytes) - . + 0x54
+	.long	L(22bytes) - . + 0x58
+	.long	L(23bytes) - . + 0x5c
+	.long	L(24bytes) - . + 0x60
+	.long	L(25bytes) - . + 0x64
+	.long	L(26bytes) - . + 0x68
+	.long	L(27bytes) - . + 0x6c
+	.long	L(28bytes) - . + 0x70
+	.long	L(29bytes) - . + 0x74
+	.long	L(30bytes) - . + 0x78
+	.long	L(31bytes) - . + 0x7c
+
+END (BP_SYM (memcmp))
+
+#undef bcmp
+weak_alias (BP_SYM (memcmp), BP_SYM (bcmp))