about summary refs log tree commit diff
path: root/sysdeps/x86_64/memset.S
diff options
context:
space:
mode:
authorAndreas Jaeger <aj@suse.de>2002-08-31 17:45:33 +0000
committerAndreas Jaeger <aj@suse.de>2002-08-31 17:45:33 +0000
commit78df0fcb80247ca7573a8ed07cc992b7031674c1 (patch)
tree339b581727c7ae5782f5118bf98567acda987553 /sysdeps/x86_64/memset.S
parent7c9466bc7688e084cfbf9311eb91bdbaed1ea888 (diff)
downloadglibc-78df0fcb80247ca7573a8ed07cc992b7031674c1.tar.gz
glibc-78df0fcb80247ca7573a8ed07cc992b7031674c1.tar.xz
glibc-78df0fcb80247ca7573a8ed07cc992b7031674c1.zip
Update.
	* sysdeps/x86_64/dl-machine.h (elf_machine_runtime_setup): Declare
	external functions with hidden attribute.
	(elf_machine_rela): Optimize.

	* sysdeps/x86_64/memset.S: New file.
	* sysdeps/x86_64/bzero.S: New file.
	* sysdeps/x86_64/stpcpy.S: New file.
	* sysdeps/x86_64/strcat.S: New file.
	* sysdeps/x86_64/strchr.S: New file.
	* sysdeps/x86_64/strcpy.S: New file.
	* sysdeps/x86_64/strcspn.S: New file.
	* sysdeps/x86_64/strlen.S: New file.
	* sysdeps/x86_64/strpbrk.S: New file.
	* sysdeps/x86_64/strspn.S: New file.
	* sysdeps/x86_64/strcmp.S: New file.
	* sysdeps/x86_64/strtok_r.S: New file.
	* sysdeps/x86_64/strtok.S: New file.
	* sysdeps/x86_64/memcpy.S: New file.
	* sysdeps/x86_64/mempcpy.S: New file.
Diffstat (limited to 'sysdeps/x86_64/memset.S')
-rw-r--r--sysdeps/x86_64/memset.S131
1 files changed, 131 insertions, 0 deletions
diff --git a/sysdeps/x86_64/memset.S b/sysdeps/x86_64/memset.S
new file mode 100644
index 0000000000..b95ca40b2f
--- /dev/null
+++ b/sysdeps/x86_64/memset.S
@@ -0,0 +1,131 @@
+/* memset/bzero -- set memory area to CH/0
+   Optimized version for x86-64.
+   Copyright (C) 2002 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Andreas Jaeger <aj@suse.de>.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+#include "bp-sym.h"
+#include "bp-asm.h"
+
+/* BEWARE: `#ifdef memset' means that memset is redefined as `bzero' */
+#define BZERO_P (defined memset)
+
+/* This is somehow experimental and could made dependend on the cache
+   size.  */
+#define LARGE $120000
+
+        .text
+ENTRY (memset)
+#if BZERO_P
+	mov	%rsi,%rdx	/* Adjust parameter.  */
+	xorq	%rsi,%rsi	/* Fill with 0s.  */
+#endif
+	cmp	$0x7,%rdx	/* Check for small length.  */
+	mov	%rdi,%rcx	/* Save ptr as return value.  */
+	jbe	7f
+
+#if BZERO_P
+	mov	%rsi,%r8	/* Just copy 0.  */
+#else
+	/* Populate 8 bit data to full 64-bit.  */
+	movabs	$0x0101010101010101,%r8
+	movzbl	%sil,%eax
+	imul	%rax,%r8
+#endif
+	test	$0x7,%edi	/* Check for alignment.  */
+	je	2f
+
+	.p2align 4
+1:	/* Align ptr to 8 byte.  */
+	mov	%sil,(%rcx)
+	dec	%rdx
+	inc	%rcx
+	test	$0x7,%ecx
+	jne	1b
+
+2:	/* Check for really large regions.  */
+	mov	%rdx,%rax
+	shr	$0x6,%rax
+	je	4f
+	cmp	LARGE, %rdx
+	jae	11f
+
+	.p2align 4
+3:	/* Copy 64 bytes.  */
+	mov	%r8,(%rcx)
+	mov	%r8,0x8(%rcx)
+	mov	%r8,0x10(%rcx)
+	mov	%r8,0x18(%rcx)
+	mov	%r8,0x20(%rcx)
+	mov	%r8,0x28(%rcx)
+	mov	%r8,0x30(%rcx)
+	mov	%r8,0x38(%rcx)
+	add	$0x40,%rcx
+	dec	%rax
+	jne	3b
+
+4:	/* Copy final bytes.  */
+	and	$0x3f,%edx
+	mov	%rdx,%rax
+	shr	$0x3,%rax
+	je	6f
+
+5:	/* First in chunks of 8 bytes.  */
+	mov	%r8,(%rcx)
+	add	$0x8,%rcx
+	dec	%rax
+	jne	5b
+6:
+	and	$0x7,%edx
+7:
+	test	%rdx,%rdx
+	je	9f
+8:	/* And finally as bytes (up to 7).  */
+	mov	%sil,(%rcx)
+	inc	%rcx
+	dec	%rdx
+	jne	8b
+9:
+#if BZERO_P
+	nop
+#else
+	/* Load result (only if used as memset).  */
+	mov	%rdi,%rax	/* start address of destination is result */
+#endif
+	retq
+
+	.p2align 4
+11:	/* Copy 64 bytes without polluting the cache.  */
+	/* We could use	movntdq    %xmm0,(%rcx) here to further
+	   speed up for large cases but let's not use XMM registers.  */
+	movnti	%r8,(%rcx)
+	movnti  %r8,0x8(%rcx)
+	movnti  %r8,0x10(%rcx)
+	movnti  %r8,0x18(%rcx)
+	movnti  %r8,0x20(%rcx)
+	movnti  %r8,0x28(%rcx)
+	movnti  %r8,0x30(%rcx)
+	movnti  %r8,0x38(%rcx)
+	add	$0x40,%rcx
+	dec	%rax
+	jne	11b
+	jmp	4b
+
+END (memset)