about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog20
-rw-r--r--sysdeps/x86_64/bzero.S3
-rw-r--r--sysdeps/x86_64/dl-machine.h9
-rw-r--r--sysdeps/x86_64/memcpy.S92
-rw-r--r--sysdeps/x86_64/mempcpy.S5
-rw-r--r--sysdeps/x86_64/memset.S131
-rw-r--r--sysdeps/x86_64/stpcpy.S7
-rw-r--r--sysdeps/x86_64/strcat.S257
-rw-r--r--sysdeps/x86_64/strchr.S290
-rw-r--r--sysdeps/x86_64/strcmp.S44
-rw-r--r--sysdeps/x86_64/strcpy.S156
-rw-r--r--sysdeps/x86_64/strcspn.S123
-rw-r--r--sysdeps/x86_64/strlen.S138
-rw-r--r--sysdeps/x86_64/strpbrk.S2
-rw-r--r--sysdeps/x86_64/strspn.S113
-rw-r--r--sysdeps/x86_64/strtok.S208
-rw-r--r--sysdeps/x86_64/strtok_r.S4
17 files changed, 1599 insertions, 3 deletions
diff --git a/ChangeLog b/ChangeLog
index 44451b2d96..3881b41661 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,25 @@
 2002-08-31  Andreas Jaeger  <aj@suse.de>
 
+	* sysdeps/x86_64/dl-machine.h (elf_machine_runtime_setup): Declare
+	external functions with hidden attribute.
+	(elf_machine_rela): Optimize.
+
+	* sysdeps/x86_64/memset.S: New file.
+	* sysdeps/x86_64/bzero.S: New file.
+	* sysdeps/x86_64/stpcpy.S: New file.
+	* sysdeps/x86_64/strcat.S: New file.
+	* sysdeps/x86_64/strchr.S: New file.
+	* sysdeps/x86_64/strcpy.S: New file.
+	* sysdeps/x86_64/strcspn.S: New file.
+	* sysdeps/x86_64/strlen.S: New file.
+	* sysdeps/x86_64/strpbrk.S: New file.
+	* sysdeps/x86_64/strspn.S: New file.
+	* sysdeps/x86_64/strcmp.S: New file.
+	* sysdeps/x86_64/strtok_r.S: New file.
+	* sysdeps/x86_64/strtok.S: New file.
+	* sysdeps/x86_64/memcpy.S: New file.
+	* sysdeps/x86_64/mempcpy.S: New file.
+
 	* sysdeps/x86_64/fpu/s_copysign.S: Fix algorithm.
 
 	* sysdeps/x86_64/fpu/libm-test-ulps: Add ulps for double tests.
diff --git a/sysdeps/x86_64/bzero.S b/sysdeps/x86_64/bzero.S
new file mode 100644
index 0000000000..2688f456b5
--- /dev/null
+++ b/sysdeps/x86_64/bzero.S
@@ -0,0 +1,3 @@
+#define memset __bzero
+#include <sysdeps/x86_64/memset.S>
+weak_alias (__bzero, bzero)
diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h
index adf108c17f..2800a9e958 100644
--- a/sysdeps/x86_64/dl-machine.h
+++ b/sysdeps/x86_64/dl-machine.h
@@ -84,8 +84,8 @@ static inline int __attribute__ ((unused))
 elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
 {
   Elf64_Addr *got;
-  extern void _dl_runtime_resolve (Elf64_Word);
-  extern void _dl_runtime_profile (Elf64_Word);
+  extern void _dl_runtime_resolve (Elf64_Word) attribute_hidden;
+  extern void _dl_runtime_profile (Elf64_Word) attribute_hidden;
 
   if (l->l_info[DT_JMPREL] && lazy)
     {
@@ -367,7 +367,10 @@ elf_machine_rela (struct link_map *map, const Elf64_Rela *reloc,
       const Elf64_Sym *const refsym = sym;
 #endif
       Elf64_Addr value = RESOLVE (&sym, version, r_type);
-      if (sym)
+
+# ifndef RTLD_BOOTSTRAP
+      if (sym != NULL)
+# endif
 	value += sym->st_value;
 
 #ifdef RTLD_BOOTSTRAP
diff --git a/sysdeps/x86_64/memcpy.S b/sysdeps/x86_64/memcpy.S
new file mode 100644
index 0000000000..1339036bdb
--- /dev/null
+++ b/sysdeps/x86_64/memcpy.S
@@ -0,0 +1,92 @@
+/* Highly optimized version for x86-64.
+   Copyright (C) 1997, 2000, 2002 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Based on i586 version contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+#include "bp-sym.h"
+#include "bp-asm.h"
+
+/* BEWARE: `#ifdef memcpy' means that memcpy is redefined as `mempcpy',
+   and the return value is the byte after the last one copied in
+   the destination. */
+#define MEMPCPY_P (defined memcpy)
+
+        .text
+ENTRY (BP_SYM (memcpy))
+	/* Cutoff for the big loop is a size of 32 bytes since otherwise
+	   the loop will never be entered.  */
+	cmpq	$32, %rdx
+	movq	%rdx, %rcx
+#if !MEMPCPY_P
+	movq	%rdi, %r10	/* Save value. */
+#endif
+
+	/* We need this in any case.  */
+	cld
+
+	jbe	1f
+
+	/* Align destination.  */
+	movq	%rdi, %rax
+	negq	%rax
+	andq	$3, %rax
+	subq	%rax, %rcx
+	xchgq	%rax, %rcx
+
+	rep; movsb
+
+	movq	%rax, %rcx
+	subq	$32, %rcx
+	js	2f
+
+	.p2align 4
+3:
+
+	/* Now correct the loop counter.  Please note that in the following
+	   code the flags are not changed anymore.  */
+	subq	$32, %rcx
+
+	movq	(%rsi), %rax
+	movq	8(%rsi), %rdx
+	movq	16(%rsi), %r8
+	movq	24(%rsi), %r9
+	movq	%rax, (%rdi)
+	movq	%rdx, 8(%rdi)
+	movq	%r8, 16(%rdi)
+	movq	%r9, 24(%rdi)
+
+	leaq	32(%rsi), %rsi
+	leaq	32(%rdi), %rdi
+
+	jns	3b
+
+	/* Correct extra loop counter modification.  */
+2:	addq	$32, %rcx
+1:	rep; movsb
+
+#if MEMPCPY_P
+	movq	%rdi, %rax		/* Set return value.  */
+#else
+	movq	%r10, %rax		/* Set return value.  */
+	
+#endif
+	ret
+
+END (BP_SYM (memcpy))
diff --git a/sysdeps/x86_64/mempcpy.S b/sysdeps/x86_64/mempcpy.S
new file mode 100644
index 0000000000..38fdd0519e
--- /dev/null
+++ b/sysdeps/x86_64/mempcpy.S
@@ -0,0 +1,5 @@
+#define memcpy __mempcpy
+#include <sysdeps/x86_64/memcpy.S>
+
+libc_hidden_def (BP_SYM (__mempcpy))
+weak_alias (BP_SYM (__mempcpy), BP_SYM (mempcpy))
diff --git a/sysdeps/x86_64/memset.S b/sysdeps/x86_64/memset.S
new file mode 100644
index 0000000000..b95ca40b2f
--- /dev/null
+++ b/sysdeps/x86_64/memset.S
@@ -0,0 +1,131 @@
+/* memset/bzero -- set memory area to CH/0
+   Optimized version for x86-64.
+   Copyright (C) 2002 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Andreas Jaeger <aj@suse.de>.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+#include "bp-sym.h"
+#include "bp-asm.h"
+
+/* BEWARE: `#ifdef memset' means that memset is redefined as `bzero' */
+#define BZERO_P (defined memset)
+
+/* This is somehow experimental and could made dependend on the cache
+   size.  */
+#define LARGE $120000
+
+        .text
+ENTRY (memset)
+#if BZERO_P
+	mov	%rsi,%rdx	/* Adjust parameter.  */
+	xorq	%rsi,%rsi	/* Fill with 0s.  */
+#endif
+	cmp	$0x7,%rdx	/* Check for small length.  */
+	mov	%rdi,%rcx	/* Save ptr as return value.  */
+	jbe	7f
+
+#if BZERO_P
+	mov	%rsi,%r8	/* Just copy 0.  */
+#else
+	/* Populate 8 bit data to full 64-bit.  */
+	movabs	$0x0101010101010101,%r8
+	movzbl	%sil,%eax
+	imul	%rax,%r8
+#endif
+	test	$0x7,%edi	/* Check for alignment.  */
+	je	2f
+
+	.p2align 4
+1:	/* Align ptr to 8 byte.  */
+	mov	%sil,(%rcx)
+	dec	%rdx
+	inc	%rcx
+	test	$0x7,%ecx
+	jne	1b
+
+2:	/* Check for really large regions.  */
+	mov	%rdx,%rax
+	shr	$0x6,%rax
+	je	4f
+	cmp	LARGE, %rdx
+	jae	11f
+
+	.p2align 4
+3:	/* Copy 64 bytes.  */
+	mov	%r8,(%rcx)
+	mov	%r8,0x8(%rcx)
+	mov	%r8,0x10(%rcx)
+	mov	%r8,0x18(%rcx)
+	mov	%r8,0x20(%rcx)
+	mov	%r8,0x28(%rcx)
+	mov	%r8,0x30(%rcx)
+	mov	%r8,0x38(%rcx)
+	add	$0x40,%rcx
+	dec	%rax
+	jne	3b
+
+4:	/* Copy final bytes.  */
+	and	$0x3f,%edx
+	mov	%rdx,%rax
+	shr	$0x3,%rax
+	je	6f
+
+5:	/* First in chunks of 8 bytes.  */
+	mov	%r8,(%rcx)
+	add	$0x8,%rcx
+	dec	%rax
+	jne	5b
+6:
+	and	$0x7,%edx
+7:
+	test	%rdx,%rdx
+	je	9f
+8:	/* And finally as bytes (up to 7).  */
+	mov	%sil,(%rcx)
+	inc	%rcx
+	dec	%rdx
+	jne	8b
+9:
+#if BZERO_P
+	nop
+#else
+	/* Load result (only if used as memset).  */
+	mov	%rdi,%rax	/* start address of destination is result */
+#endif
+	retq
+
+	.p2align 4
+11:	/* Copy 64 bytes without polluting the cache.  */
+	/* We could use	movntdq    %xmm0,(%rcx) here to further
+	   speed up for large cases but let's not use XMM registers.  */
+	movnti	%r8,(%rcx)
+	movnti  %r8,0x8(%rcx)
+	movnti  %r8,0x10(%rcx)
+	movnti  %r8,0x18(%rcx)
+	movnti  %r8,0x20(%rcx)
+	movnti  %r8,0x28(%rcx)
+	movnti  %r8,0x30(%rcx)
+	movnti  %r8,0x38(%rcx)
+	add	$0x40,%rcx
+	dec	%rax
+	jne	11b
+	jmp	4b
+
+END (memset)
diff --git a/sysdeps/x86_64/stpcpy.S b/sysdeps/x86_64/stpcpy.S
new file mode 100644
index 0000000000..b9bbcd9cf6
--- /dev/null
+++ b/sysdeps/x86_64/stpcpy.S
@@ -0,0 +1,7 @@
+#define USE_AS_STPCPY
+#define STRCPY __stpcpy
+
+#include <sysdeps/x86_64/strcpy.S>
+
+weak_alias (__stpcpy, stpcpy)
+libc_hidden_def (__stpcpy)
diff --git a/sysdeps/x86_64/strcat.S b/sysdeps/x86_64/strcat.S
new file mode 100644
index 0000000000..549fd21b4a
--- /dev/null
+++ b/sysdeps/x86_64/strcat.S
@@ -0,0 +1,257 @@
+/* strcat(dest, src) -- Append SRC on the end of DEST.
+   Optimized for x86-64.
+   Copyright (C) 2002 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Andreas Jaeger <aj@suse.de>, 2002.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+#include "bp-sym.h"
+#include "bp-asm.h"
+
+
+	.text
+ENTRY (BP_SYM (strcat))
+	movq %rdi, %rcx		/* Dest. register. */
+	andl $7, %ecx		/* mask alignment bits */
+	movq %rdi, %rax		/* Duplicate destination pointer.  */
+	movq $0xfefefefefefefeff,%r8
+
+	/* First step: Find end of destination.  */
+	jz 4f			/* aligned => start loop */
+
+	neg %ecx		/* We need to align to 8 bytes.  */
+	addl $8,%ecx
+	/* Search the first bytes directly.  */
+0:	cmpb $0x0,(%rax)	/* is byte NUL? */
+	je 2f			/* yes => start copy */
+	incq %rax		/* increment pointer */
+	decl %ecx
+	jnz 0b
+
+
+
+	/* Now the source is aligned.  Scan for NUL byte.  */
+	.p2align 4
+4:
+	/* First unroll.  */
+	movq (%rax), %rcx	/* get double word (= 8 bytes) in question */
+	addq $8,%rax		/* adjust pointer for next word */
+	movq %r8, %rdx		/* magic value */
+	addq %rcx, %rdx		/* add the magic value to the word.  We get
+				   carry bits reported for each byte which
+				   is *not* 0 */
+	jnc 3f			/* highest byte is NUL => return pointer */
+	xorq %rcx, %rdx		/* (word+magic)^word */
+	orq %r8, %rdx		/* set all non-carry bits */
+	incq %rdx		/* add 1: if one carry bit was *not* set
+				   the addition will not result in 0.  */
+	jnz 3f			/* found NUL => return pointer */
+
+	/* Second unroll.  */
+	movq (%rax), %rcx	/* get double word (= 8 bytes) in question */
+	addq $8,%rax		/* adjust pointer for next word */
+	movq %r8, %rdx		/* magic value */
+	addq %rcx, %rdx		/* add the magic value to the word.  We get
+				   carry bits reported for each byte which
+				   is *not* 0 */
+	jnc 3f			/* highest byte is NUL => return pointer */
+	xorq %rcx, %rdx		/* (word+magic)^word */
+	orq %r8, %rdx		/* set all non-carry bits */
+	incq %rdx		/* add 1: if one carry bit was *not* set
+				   the addition will not result in 0.  */
+	jnz 3f			/* found NUL => return pointer */
+
+	/* Third unroll.  */
+	movq (%rax), %rcx	/* get double word (= 8 bytes) in question */
+	addq $8,%rax		/* adjust pointer for next word */
+	movq %r8, %rdx		/* magic value */
+	addq %rcx, %rdx		/* add the magic value to the word.  We get
+				   carry bits reported for each byte which
+				   is *not* 0 */
+	jnc 3f			/* highest byte is NUL => return pointer */
+	xorq %rcx, %rdx		/* (word+magic)^word */
+	orq %r8, %rdx		/* set all non-carry bits */
+	incq %rdx		/* add 1: if one carry bit was *not* set
+				   the addition will not result in 0.  */
+	jnz 3f			/* found NUL => return pointer */
+
+	/* Fourth unroll.  */
+	movq (%rax), %rcx	/* get double word (= 8 bytes) in question */
+	addq $8,%rax		/* adjust pointer for next word */
+	movq %r8, %rdx		/* magic value */
+	addq %rcx, %rdx		/* add the magic value to the word.  We get
+				   carry bits reported for each byte which
+				   is *not* 0 */
+	jnc 3f			/* highest byte is NUL => return pointer */
+	xorq %rcx, %rdx		/* (word+magic)^word */
+	orq %r8, %rdx		/* set all non-carry bits */
+	incq %rdx		/* add 1: if one carry bit was *not* set
+				   the addition will not result in 0.  */
+	jz 4b			/* no NUL found => continue loop */
+
+	.p2align 4		/* Align, it's a jump target.  */
+3:	subq $8,%rax		/* correct pointer increment.  */
+
+	testb %cl, %cl		/* is first byte NUL? */
+	jz 2f			/* yes => return */
+	incq %rax		/* increment pointer */
+
+	testb %ch, %ch		/* is second byte NUL? */
+	jz 2f			/* yes => return */
+	incq %rax		/* increment pointer */
+
+	testl $0x00ff0000, %ecx /* is third byte NUL? */
+	jz 2f			/* yes => return pointer */
+	incq %rax		/* increment pointer */
+
+	testl $0xff000000, %ecx /* is fourth byte NUL? */
+	jz 2f			/* yes => return pointer */
+	incq %rax		/* increment pointer */
+
+	shrq $32, %rcx		/* look at other half.  */
+
+	testb %cl, %cl		/* is first byte NUL? */
+	jz 2f			/* yes => return */
+	incq %rax		/* increment pointer */
+
+	testb %ch, %ch		/* is second byte NUL? */
+	jz 2f			/* yes => return */
+	incq %rax		/* increment pointer */
+
+	testl $0xff0000, %ecx	/* is third byte NUL? */
+	jz 2f			/* yes => return pointer */
+	incq %rax		/* increment pointer */
+
+2:
+	/* Second step: Copy source to destination.  */
+
+	movq	%rax, %rcx	/* duplicate  */
+	andl	$7,%ecx		/* mask alignment bits */
+	movq	%rax, %rdx	/* move around */
+	jz	22f		/* aligned => start loop */
+
+	/* Align the source pointer.  */
+21:
+	movb	(%rsi), %al	/* Fetch a byte */
+	testb	%al, %al	/* Is it NUL? */
+	movb	%al, (%rdx)	/* Store it */
+	jz	24f		/* If it was NUL, done! */
+	incq	%rsi
+	incq	%rdx
+	decl	%ecx
+	jnz	21b
+
+	/* Now the sources is aligned.  Unfortunatly we cannot force
+	   to have both source and destination aligned, so ignore the
+	   alignment of the destination.  */
+	.p2align 4
+22:
+	/* 1st unroll.  */
+	movq	(%rsi), %rax	/* Read double word (8 bytes).  */
+	addq	$8, %rsi	/* Adjust pointer for next word.  */
+	movq	%rax, %r9	/* Save a copy for NUL finding.  */
+	addq	%r8, %r9	/* add the magic value to the word.  We get
+				   carry bits reported for each byte which
+				   is *not* 0 */
+	jnc	23f		/* highest byte is NUL => return pointer */
+	xorq	%rax, %r9	/* (word+magic)^word */
+	orq	%r8, %r9	/* set all non-carry bits */
+	incq	%r9		/* add 1: if one carry bit was *not* set
+				   the addition will not result in 0.  */
+
+	jnz	23f		/* found NUL => return pointer */
+
+	movq	%rax, (%rdx)	/* Write value to destination.  */
+	addq	$8, %rdx	/* Adjust pointer.  */
+
+	/* 2nd unroll.  */
+	movq	(%rsi), %rax	/* Read double word (8 bytes).  */
+	addq	$8, %rsi	/* Adjust pointer for next word.  */
+	movq	%rax, %r9	/* Save a copy for NUL finding.  */
+	addq	%r8, %r9	/* add the magic value to the word.  We get
+				   carry bits reported for each byte which
+				   is *not* 0 */
+	jnc	23f		/* highest byte is NUL => return pointer */
+	xorq	%rax, %r9	/* (word+magic)^word */
+	orq	%r8, %r9	/* set all non-carry bits */
+	incq	%r9		/* add 1: if one carry bit was *not* set
+				   the addition will not result in 0.  */
+
+	jnz	23f		/* found NUL => return pointer */
+
+	movq	%rax, (%rdx)	/* Write value to destination.  */
+	addq	$8, %rdx	/* Adjust pointer.  */
+
+	/* 3rd unroll.  */
+	movq	(%rsi), %rax	/* Read double word (8 bytes).  */
+	addq	$8, %rsi	/* Adjust pointer for next word.  */
+	movq	%rax, %r9	/* Save a copy for NUL finding.  */
+	addq	%r8, %r9	/* add the magic value to the word.  We get
+				   carry bits reported for each byte which
+				   is *not* 0 */
+	jnc	23f		/* highest byte is NUL => return pointer */
+	xorq	%rax, %r9	/* (word+magic)^word */
+	orq	%r8, %r9	/* set all non-carry bits */
+	incq	%r9		/* add 1: if one carry bit was *not* set
+				   the addition will not result in 0.  */
+
+	jnz	23f		/* found NUL => return pointer */
+
+	movq	%rax, (%rdx)	/* Write value to destination.  */
+	addq	$8, %rdx	/* Adjust pointer.  */
+
+	/* 4th unroll.  */
+	movq	(%rsi), %rax	/* Read double word (8 bytes).  */
+	addq	$8, %rsi	/* Adjust pointer for next word.  */
+	movq	%rax, %r9	/* Save a copy for NUL finding.  */
+	addq	%r8, %r9	/* add the magic value to the word.  We get
+				   carry bits reported for each byte which
+				   is *not* 0 */
+	jnc	23f		/* highest byte is NUL => return pointer */
+	xorq	%rax, %r9	/* (word+magic)^word */
+	orq	%r8, %r9	/* set all non-carry bits */
+	incq	%r9		/* add 1: if one carry bit was *not* set
+				   the addition will not result in 0.  */
+
+	jnz	23f		/* found NUL => return pointer */
+
+	movq	%rax, (%rdx)	/* Write value to destination.  */
+	addq	$8, %rdx	/* Adjust pointer.  */
+	jmp	22b		/* Next iteration.  */
+
+	/* Do the last few bytes. %rax contains the value to write.
+	   The loop is unrolled twice.  */
+	.p2align 4
+23:
+	movb	%al, (%rdx)	/* 1st byte.  */
+	testb	%al, %al	/* Is it NUL.  */
+	jz	24f		/* yes, finish.  */
+	incq	%rdx		/* Increment destination.  */
+	movb	%ah, (%rdx)	/* 2nd byte.  */
+	testb	%ah, %ah	/* Is it NUL?.  */
+	jz	24f		/* yes, finish.  */
+	incq	%rdx		/* Increment destination.  */
+	shrq	$16, %rax	/* Shift...  */
+	jmp	23b		/* and look at next two bytes in %rax.  */
+
+
+24:
+	movq	%rdi, %rax	/* Source is return value.  */
+	retq
+END (BP_SYM (strcat))
diff --git a/sysdeps/x86_64/strchr.S b/sysdeps/x86_64/strchr.S
new file mode 100644
index 0000000000..391f575aa5
--- /dev/null
+++ b/sysdeps/x86_64/strchr.S
@@ -0,0 +1,290 @@
+/* strchr (str, ch) -- Return pointer to first occurrence of CH in STR.
+   For AMD x86-64.
+   Copyright (C) 2002 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+#include "bp-sym.h"
+#include "bp-asm.h"
+
+
+	.text
+ENTRY (BP_SYM (strchr))
+
+	/* Before we start with the main loop we process single bytes
+	   until the source pointer is aligned.  This has two reasons:
+	   1. aligned 64-bit memory access is faster
+	   and (more important)
+	   2. we process in the main loop 64 bit in one step although
+	      we don't know the end of the string.  But accessing at
+	      8-byte alignment guarantees that we never access illegal
+	      memory if this would not also be done by the trivial
+	      implementation (this is because all processor inherent
+	      boundaries are multiples of 8.  */
+
+	movq	%rdi, %rcx
+	andl	$7, %ecx	/* Mask alignment bits  */
+	movq	%rdi, %rax	/* duplicate destination.  */
+	jz	1f		/* aligned => start loop */
+	neg	%ecx
+	addl	$8, %ecx	/* Align to 8 bytes.  */
+
+	/* Search the first bytes directly.  */
+0:	movb	(%rax), %cl	/* load byte  */
+	cmpb	%cl,%sil	/* compare byte.  */
+	je	6f		/* target found */
+	testb	%cl,%cl		/* is byte NUL? */
+	je	7f		/* yes => return NULL */
+	incq	%rax		/* increment pointer */
+	decl	%ecx
+	jnz	0b
+
+
+1:
+	/* At the moment %rsi contains C.  What we need for the
+	   algorithm is C in all bytes of the register.  Avoid
+	   operations on 16 bit words because these require an
+	   prefix byte (and one more cycle).  */
+	/* Populate 8 bit data to full 64-bit.  */
+	movabs	$0x0101010101010101,%r9
+	movzbl	%sil,%edx
+	imul	%rdx,%r9
+
+	movq $0xfefefefefefefeff, %r8 /* Save magic.  */
+
+      /* We exit the loop if adding MAGIC_BITS to LONGWORD fails to
+	 change any of the hole bits of LONGWORD.
+
+	 1) Is this safe?  Will it catch all the zero bytes?
+	 Suppose there is a byte with all zeros.  Any carry bits
+	 propagating from its left will fall into the hole at its
+	 least significant bit and stop.  Since there will be no
+	 carry from its most significant bit, the LSB of the
+	 byte to the left will be unchanged, and the zero will be
+	 detected.
+
+	 2) Is this worthwhile?  Will it ignore everything except
+	 zero bytes?  Suppose every byte of QUARDWORD has a bit set
+	 somewhere.  There will be a carry into bit 8.	If bit 8
+	 is set, this will carry into bit 16.  If bit 8 is clear,
+	 one of bits 9-15 must be set, so there will be a carry
+	 into bit 16.  Similarly, there will be a carry into bit
+	 24 tec..  If one of bits 54-63 is set, there will be a carry
+	 into bit 64 (=carry flag), so all of the hole bits will
+	 be changed.
+
+	 3) But wait!  Aren't we looking for C, not zero?
+	 Good point.  So what we do is XOR LONGWORD with a longword,
+	 each of whose bytes is C.  This turns each byte that is C
+	 into a zero.  */
+
+	.p2align 4
+4:
+	/* Main Loop is unrolled 4 times.  */
+	/* First unroll.  */
+	movq (%rax), %rcx	/* get double word (= 8 bytes) in question */
+	addq $8,%rax		/* adjust pointer for next word */
+	movq %r8, %rdx		/* magic value */
+	xorq %r9, %rcx		/* XOR with qword c|...|c => bytes of str == c
+				   are now 0 */
+	addq %rcx, %rdx		/* add the magic value to the word.  We get
+				   carry bits reported for each byte which
+				   is *not* 0 */
+	jnc 3f			/* highest byte is NUL => return pointer */
+	xorq %rcx, %rdx		/* (word+magic)^word */
+	orq %r8, %rdx		/* set all non-carry bits */
+	incq %rdx		/* add 1: if one carry bit was *not* set
+				   the addition will not result in 0.  */
+	jnz 3f			/* found c => return pointer */
+
+	/* The quadword we looked at does not contain the value we're looking
+	   for.  Let's search now whether we have reached the end of the
+	   string.  */
+	xorq %r9, %rcx		/* restore original dword without reload */
+	movq %r8, %rdx		/* magic value */
+	addq %rcx, %rdx		/* add the magic value to the word.  We get
+				   carry bits reported for each byte which
+				   is *not* 0 */
+	jnc 7f			/* highest byte is NUL => return NULL */
+	xorq %rcx, %rdx		/* (word+magic)^word */
+	orq %r8, %rdx		/* set all non-carry bits */
+	incq %rdx		/* add 1: if one carry bit was *not* set
+				   the addition will not result in 0.  */
+	jnz 7f			/* found NUL => return NULL */
+
+	/* Second unroll.  */
+	movq (%rax), %rcx	/* get double word (= 8 bytes) in question */
+	addq $8,%rax		/* adjust pointer for next word */
+	movq %r8, %rdx		/* magic value */
+	xorq %r9, %rcx		/* XOR with qword c|...|c => bytes of str == c
+				   are now 0 */
+	addq %rcx, %rdx		/* add the magic value to the word.  We get
+				   carry bits reported for each byte which
+				   is *not* 0 */
+	jnc 3f			/* highest byte is NUL => return pointer */
+	xorq %rcx, %rdx		/* (word+magic)^word */
+	orq %r8, %rdx		/* set all non-carry bits */
+	incq %rdx		/* add 1: if one carry bit was *not* set
+				   the addition will not result in 0.  */
+	jnz 3f			/* found c => return pointer */
+
+	/* The quadword we looked at does not contain the value we're looking
+	   for.  Let's search now whether we have reached the end of the
+	   string.  */
+	xorq %r9, %rcx		/* restore original dword without reload */
+	movq %r8, %rdx		/* magic value */
+	addq %rcx, %rdx		/* add the magic value to the word.  We get
+				   carry bits reported for each byte which
+				   is *not* 0 */
+	jnc 7f			/* highest byte is NUL => return NULL */
+	xorq %rcx, %rdx		/* (word+magic)^word */
+	orq %r8, %rdx		/* set all non-carry bits */
+	incq %rdx		/* add 1: if one carry bit was *not* set
+				   the addition will not result in 0.  */
+	jnz 7f			/* found NUL => return NULL */
+	/* Third unroll.  */
+	movq (%rax), %rcx	/* get double word (= 8 bytes) in question */
+	addq $8,%rax		/* adjust pointer for next word */
+	movq %r8, %rdx		/* magic value */
+	xorq %r9, %rcx		/* XOR with qword c|...|c => bytes of str == c
+				   are now 0 */
+	addq %rcx, %rdx		/* add the magic value to the word.  We get
+				   carry bits reported for each byte which
+				   is *not* 0 */
+	jnc 3f			/* highest byte is NUL => return pointer */
+	xorq %rcx, %rdx		/* (word+magic)^word */
+	orq %r8, %rdx		/* set all non-carry bits */
+	incq %rdx		/* add 1: if one carry bit was *not* set
+				   the addition will not result in 0.  */
+	jnz 3f			/* found c => return pointer */
+
+	/* The quadword we looked at does not contain the value we're looking
+	   for.  Let's search now whether we have reached the end of the
+	   string.  */
+	xorq %r9, %rcx		/* restore original dword without reload */
+	movq %r8, %rdx		/* magic value */
+	addq %rcx, %rdx		/* add the magic value to the word.  We get
+				   carry bits reported for each byte which
+				   is *not* 0 */
+	jnc 7f			/* highest byte is NUL => return NULL */
+	xorq %rcx, %rdx		/* (word+magic)^word */
+	orq %r8, %rdx		/* set all non-carry bits */
+	incq %rdx		/* add 1: if one carry bit was *not* set
+				   the addition will not result in 0.  */
+	jnz 7f			/* found NUL => return NULL */
+	/* Fourth unroll.  */
+	movq (%rax), %rcx	/* get double word (= 8 bytes) in question */
+	addq $8,%rax		/* adjust pointer for next word */
+	movq %r8, %rdx		/* magic value */
+	xorq %r9, %rcx		/* XOR with qword c|...|c => bytes of str == c
+				   are now 0 */
+	addq %rcx, %rdx		/* add the magic value to the word.  We get
+				   carry bits reported for each byte which
+				   is *not* 0 */
+	jnc 3f			/* highest byte is NUL => return pointer */
+	xorq %rcx, %rdx		/* (word+magic)^word */
+	orq %r8, %rdx		/* set all non-carry bits */
+	incq %rdx		/* add 1: if one carry bit was *not* set
+				   the addition will not result in 0.  */
+	jnz 3f			/* found c => return pointer */
+
+	/* The quadword we looked at does not contain the value we're looking
+	   for.  Let's search now whether we have reached the end of the
+	   string.  */
+	xorq %r9, %rcx		/* restore original dword without reload */
+	movq %r8, %rdx		/* magic value */
+	addq %rcx, %rdx		/* add the magic value to the word.  We get
+				   carry bits reported for each byte which
+				   is *not* 0 */
+	jnc 7f			/* highest byte is NUL => return NULL */
+	xorq %rcx, %rdx		/* (word+magic)^word */
+	orq %r8, %rdx		/* set all non-carry bits */
+	incq %rdx		/* add 1: if one carry bit was *not* set
+				   the addition will not result in 0.  */
+	jz 4b			/* no NUL found => restart loop */
+
+
+7:	/* Return NULL.  */
+	xorq %rax, %rax
+	retq
+
+
+	/* We now scan for the byte in which the character was matched.
+	   But we have to take care of the case that a NUL char is
+	   found before this in the dword.  Note that we XORed %rcx
+	   with the byte we're looking for, therefore the tests below look
+	   reversed.  */
+
+
+	.p2align 4		/* Align, it's a jump target.  */
+3:	movq	%r9,%rdx	/* move to %rdx so that we can access bytes */
+	subq	$8,%rax		/* correct pointer increment.  */
+	testb %cl, %cl		/* is first byte C? */
+	jz 6f			/* yes => return pointer */
+	cmpb %dl, %cl		/* is first byte NUL? */
+	je 7b			/* yes => return NULL */
+	incq %rax		/* increment pointer */
+
+	testb %ch, %ch		/* is second byte C? */
+	jz 6f			/* yes => return pointer */
+	cmpb %dl, %ch		/* is second byte NUL? */
+	je 7b			/* yes => return NULL? */
+	incq %rax		/* increment pointer */
+
+	shrq $16, %rcx		/* make upper bytes accessible */
+	testb %cl, %cl		/* is third byte C? */
+	jz 6f			/* yes => return pointer */
+	cmpb %dl, %cl		/* is third byte NUL? */
+	je 7b			/* yes => return NULL */
+	incq %rax		/* increment pointer */
+
+	testb %ch, %ch		/* is fourth byte C? */
+	jz 6f			/* yes => return pointer */
+	cmpb %dl, %ch		/* is fourth byte NUL? */
+	je 7b			/* yes => return NULL? */
+	incq %rax		/* increment pointer */
+
+	shrq $16, %rcx		/* make upper bytes accessible */
+	testb %cl, %cl		/* is fifth byte C? */
+	jz 6f			/* yes => return pointer */
+	cmpb %dl, %cl		/* is fifth byte NUL? */
+	je 7b			/* yes => return NULL */
+	incq %rax		/* increment pointer */
+
+	testb %ch, %ch		/* is sixth byte C? */
+	jz 6f			/* yes => return pointer */
+	cmpb %dl, %ch		/* is sixth byte NUL? */
+	je 7b			/* yes => return NULL? */
+	incq %rax		/* increment pointer */
+
+	shrq $16, %rcx		/* make upper bytes accessible */
+	testb %cl, %cl		/* is seventh byte C? */
+	jz 6f			/* yes => return pointer */
+	cmpb %dl, %cl		/* is seventh byte NUL? */
+	je 7b			/* yes => return NULL */
+
+	/* It must be in the eigth byte and it cannot be NUL.  */
+	incq %rax
+
+6:
+	nop
+	retq
+END (BP_SYM (strchr))
+
+weak_alias (BP_SYM (strchr), BP_SYM (index))
diff --git a/sysdeps/x86_64/strcmp.S b/sysdeps/x86_64/strcmp.S
new file mode 100644
index 0000000000..6e6bdcbabd
--- /dev/null
+++ b/sysdeps/x86_64/strcmp.S
@@ -0,0 +1,44 @@
+/* Highly optimized version for x86-64.
+   Copyright (C) 1999, 2000, 2002 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Based on i686 version contributed by Ulrich Drepper
+   <drepper@cygnus.com>, 1999.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+#include "bp-sym.h"
+#include "bp-asm.h"
+
+        .text
+ENTRY (BP_SYM (strcmp))
+L(oop):	movb	(%rdi), %al
+	cmpb	(%rsi), %al
+	jne	L(neq)
+	incq	%rdi
+	incq	%rsi
+	testb	%al, %al
+	jnz	L(oop)
+
+	xorq	%rax, %rax
+	ret
+
+L(neq):	movl	$1, %eax
+	movl	$-1, %ecx
+	cmovbl	%ecx, %eax
+	ret
+END (BP_SYM (strcmp))
diff --git a/sysdeps/x86_64/strcpy.S b/sysdeps/x86_64/strcpy.S
new file mode 100644
index 0000000000..f178b9b69c
--- /dev/null
+++ b/sysdeps/x86_64/strcpy.S
@@ -0,0 +1,156 @@
+/* strcpy/stpcpy implementation for x86-64.
+   Copyright (C) 2002 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Andreas Jaeger <aj@suse.de>, 2002.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+#include "bp-sym.h"
+#include "bp-asm.h"
+
+#ifndef USE_AS_STPCPY
+# define STRCPY strcpy
+#endif
+
+	.text
+ENTRY (BP_SYM (STRCPY))
+	movq %rsi, %rcx		/* Source register. */
+	andl $7, %ecx		/* mask alignment bits */
+	movq %rdi, %rdx		/* Duplicate destination pointer.  */
+
+	jz 5f			/* aligned => start loop */
+
+	neg %ecx		/* We need to align to 8 bytes.  */
+	addl $8,%ecx
+	/* Search the first bytes directly.  */
+0:
+	movb	(%rsi), %al	/* Fetch a byte */
+	testb	%al, %al	/* Is it NUL? */
+	movb	%al, (%rdx)	/* Store it */
+	jz	4f		/* If it was NUL, done! */
+	incq	%rsi
+	incq	%rdx
+	decl	%ecx
+	jnz	0b
+
+5:
+	movq $0xfefefefefefefeff,%r8
+
+	/* Now the sources is aligned.  Unfortunatly we cannot force
+	   to have both source and destination aligned, so ignore the
+	   alignment of the destination.  */
+	.p2align 4
+1:
+	/* 1st unroll.  */
+	movq	(%rsi), %rax	/* Read double word (8 bytes).  */
+	addq	$8, %rsi	/* Adjust pointer for next word.  */
+	movq	%rax, %r9	/* Save a copy for NUL finding.  */
+	addq	%r8, %r9	/* add the magic value to the word.  We get
+				   carry bits reported for each byte which
+				   is *not* 0 */
+	jnc	3f		/* highest byte is NUL => return pointer */
+	xorq	%rax, %r9	/* (word+magic)^word */
+	orq	%r8, %r9	/* set all non-carry bits */
+	incq	%r9		/* add 1: if one carry bit was *not* set
+				   the addition will not result in 0.  */
+
+	jnz	3f		/* found NUL => return pointer */
+
+	movq	%rax, (%rdx)	/* Write value to destination.  */
+	addq	$8, %rdx	/* Adjust pointer.  */
+
+	/* 2nd unroll.  */
+	movq	(%rsi), %rax	/* Read double word (8 bytes).  */
+	addq	$8, %rsi	/* Adjust pointer for next word.  */
+	movq	%rax, %r9	/* Save a copy for NUL finding.  */
+	addq	%r8, %r9	/* add the magic value to the word.  We get
+				   carry bits reported for each byte which
+				   is *not* 0 */
+	jnc	3f		/* highest byte is NUL => return pointer */
+	xorq	%rax, %r9	/* (word+magic)^word */
+	orq	%r8, %r9	/* set all non-carry bits */
+	incq	%r9		/* add 1: if one carry bit was *not* set
+				   the addition will not result in 0.  */
+
+	jnz	3f		/* found NUL => return pointer */
+
+	movq	%rax, (%rdx)	/* Write value to destination.  */
+	addq	$8, %rdx	/* Adjust pointer.  */
+
+	/* 3rd unroll.  */
+	movq	(%rsi), %rax	/* Read double word (8 bytes).  */
+	addq	$8, %rsi	/* Adjust pointer for next word.  */
+	movq	%rax, %r9	/* Save a copy for NUL finding.  */
+	addq	%r8, %r9	/* add the magic value to the word.  We get
+				   carry bits reported for each byte which
+				   is *not* 0 */
+	jnc	3f		/* highest byte is NUL => return pointer */
+	xorq	%rax, %r9	/* (word+magic)^word */
+	orq	%r8, %r9	/* set all non-carry bits */
+	incq	%r9		/* add 1: if one carry bit was *not* set
+				   the addition will not result in 0.  */
+
+	jnz	3f		/* found NUL => return pointer */
+
+	movq	%rax, (%rdx)	/* Write value to destination.  */
+	addq	$8, %rdx	/* Adjust pointer.  */
+
+	/* 4th unroll.  */
+	movq	(%rsi), %rax	/* Read double word (8 bytes).  */
+	addq	$8, %rsi	/* Adjust pointer for next word.  */
+	movq	%rax, %r9	/* Save a copy for NUL finding.  */
+	addq	%r8, %r9	/* add the magic value to the word.  We get
+				   carry bits reported for each byte which
+				   is *not* 0 */
+	jnc	3f		/* highest byte is NUL => return pointer */
+	xorq	%rax, %r9	/* (word+magic)^word */
+	orq	%r8, %r9	/* set all non-carry bits */
+	incq	%r9		/* add 1: if one carry bit was *not* set
+				   the addition will not result in 0.  */
+
+	jnz	3f		/* found NUL => return pointer */
+
+	movq	%rax, (%rdx)	/* Write value to destination.  */
+	addq	$8, %rdx	/* Adjust pointer.  */
+	jmp	1b		/* Next iteration.  */
+
+	/* Do the last few bytes. %rax contains the value to write.
+	   The loop is unrolled twice.  */
+	.p2align 4
+3:
+	/* Note that stpcpy needs to return with the value of the NUL
+	   byte.  */
+	movb	%al, (%rdx)	/* 1st byte.  */
+	testb	%al, %al	/* Is it NUL.  */
+	jz	4f		/* yes, finish.  */
+	incq	%rdx		/* Increment destination.  */
+	movb	%ah, (%rdx)	/* 2nd byte.  */
+	testb	%ah, %ah	/* Is it NUL?.  */
+	jz	4f		/* yes, finish.  */
+	incq	%rdx		/* Increment destination.  */
+	shrq	$16, %rax	/* Shift...  */
+	jmp	3b		/* and look at next two bytes in %rax.  */
+
+4:
+#ifdef USE_AS_STPCPY
+	movq	%rdx, %rax	/* Destination is return value.  */
+#else
+	movq	%rdi, %rax	/* Source is return value.  */
+#endif
+	retq
+END (BP_SYM (STRCPY))
diff --git a/sysdeps/x86_64/strcspn.S b/sysdeps/x86_64/strcspn.S
new file mode 100644
index 0000000000..b488161fd9
--- /dev/null
+++ b/sysdeps/x86_64/strcspn.S
@@ -0,0 +1,123 @@
+/* strcspn (str, ss) -- Return the length of the initial segment of STR
+			which contains no characters from SS.
+   For AMD x86-64.
+   Copyright (C) 1994, 1995, 1996, 1997, 2000, 2002 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>.
+   Bug fixes by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au>.
+   Adopted for x86-64 by Andreas Jaeger <aj@suse.de>.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+
+/* BEWARE: `#ifdef strcspn' means that strcspn is redefined as `strpbrk' */
+#define STRPBRK_P (defined strcspn)
+
+	.text
+ENTRY (strcspn)
+
+	movq %rdi, %rdx		/* Save SRC.  */
+
+	/* First we create a table with flags for all possible characters.
+	   For the ASCII (7bit/8bit) or ISO-8859-X character sets which are
+	   supported by the C string functions we have 256 characters.
+	   Before inserting marks for the stop characters we clear the whole
+	   table.  */
+	movq %rdi, %r8			/* Save value.  */
+	subq $256, %rsp			/* Make space for 256 bytes.  */
+	movq $32,  %rcx			/* 32*8 bytes = 256 bytes.  */
+	movq %rsp, %rdi
+	xorq %rax, %rax			/* We store 0s.  */
+	cld
+	rep
+	stosq
+
+	movq %rsi, %rax			/* Setup skipset.  */
+
+/* For understanding the following code remember that %rcx == 0 now.
+   Although all the following instruction only modify %cl we always
+   have a correct zero-extended 64-bit value in %rcx.  */
+
+	.p2align 4
+L(2):	movb (%rax), %cl	/* get byte from skipset */
+	testb %cl, %cl		/* is NUL char? */
+	jz L(1)			/* yes => start compare loop */
+	movb %cl, (%rsp,%rcx)	/* set corresponding byte in skipset table */
+
+	movb 1(%rax), %cl	/* get byte from skipset */
+	testb $0xff, %cl	/* is NUL char? */
+	jz L(1)			/* yes => start compare loop */
+	movb %cl, (%rsp,%rcx)	/* set corresponding byte in skipset table */
+
+	movb 2(%rax), %cl	/* get byte from skipset */
+	testb $0xff, %cl	/* is NUL char? */
+	jz L(1)			/* yes => start compare loop */
+	movb %cl, (%rsp,%rcx)	/* set corresponding byte in skipset table */
+
+	movb 3(%rax), %cl	/* get byte from skipset */
+	addq $4, %rax		/* increment skipset pointer */
+	movb %cl, (%rsp,%rcx)	/* set corresponding byte in skipset table */
+	testb $0xff, %cl	/* is NUL char? */
+	jnz L(2)		/* no => process next dword from skipset */
+
+L(1):	leaq -4(%rdx), %rax	/* prepare loop */
+
+	/* We use a neat trick for the following loop.  Normally we would
+	   have to test for two termination conditions
+	   1. a character in the skipset was found
+	   and
+	   2. the end of the string was found
+	   But as a sign that the character is in the skipset we store its
+	   value in the table.  But the value of NUL is NUL so the loop
+	   terminates for NUL in every case.  */
+
+	.p2align 4
+L(3):	addq $4, %rax		/* adjust pointer for full loop round */
+
+	movb (%rax), %cl	/* get byte from string */
+	cmpb %cl, (%rsp,%rcx)	/* is it contained in skipset? */
+	je L(4)			/* yes => return */
+
+	movb 1(%rax), %cl	/* get byte from string */
+	cmpb %cl, (%rsp,%rcx)	/* is it contained in skipset? */
+	je L(5)			/* yes => return */
+
+	movb 2(%rax), %cl	/* get byte from string */
+	cmpb %cl, (%rsp,%rcx)	/* is it contained in skipset? */
+	jz L(6)			/* yes => return */
+
+	movb 3(%rax), %cl	/* get byte from string */
+	cmpb %cl, (%rsp,%rcx)	/* is it contained in skipset? */
+	jne L(3)		/* no => start loop again */
+
+	incq %rax		/* adjust pointer */
+L(6):	incq %rax
+L(5):	incq %rax
+
+L(4):	addq $256, %rsp		/* remove skipset */
+#if STRPBRK_P
+	xorq %rdx,%rdx
+	orb %cl, %cl		/* was last character NUL? */
+	cmovzq %rdx, %rax	/* Yes:	return NULL */
+#else	
+	subq %rdx, %rax		/* we have to return the number of valid
+				   characters, so compute distance to first
+				   non-valid character */
+#endif
+	ret
+END (strcspn)
diff --git a/sysdeps/x86_64/strlen.S b/sysdeps/x86_64/strlen.S
new file mode 100644
index 0000000000..4441ba750e
--- /dev/null
+++ b/sysdeps/x86_64/strlen.S
@@ -0,0 +1,138 @@
+/* strlen(str) -- determine the length of the string STR.
+   Copyright (C) 2002 Free Software Foundation, Inc.
+   Based on i486 version contributed by Ulrich Drepper <drepper@redhat.com>.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+#include "bp-sym.h"
+#include "bp-asm.h"
+
+
+	.text
+ENTRY (strlen)
+	movq %rdi, %rcx		/* Duplicate source pointer. */
+	andl $7, %ecx		/* mask alignment bits */
+	movq %rdi, %rax		/* duplicate destination.  */
+	jz 1f			/* aligned => start loop */
+
+	neg %ecx		/* We need to align to 8 bytes.  */
+	addl $8,%ecx
+	/* Search the first bytes directly.  */
+0:	cmpb $0x0,(%rax)	/* is byte NUL? */
+	je 2f			/* yes => return */
+	incq %rax		/* increment pointer */
+	decl %ecx
+	jnz 0b
+
+1:	movq $0xfefefefefefefeff,%r8 /* Save magic.  */
+
+	.p2align 4		/* Align loop.  */
+4:	/* Main Loop is unrolled 4 times.  */
+	/* First unroll.  */
+	movq (%rax), %rcx	/* get double word (= 8 bytes) in question */
+	addq $8,%rax		/* adjust pointer for next word */
+	movq %r8, %rdx		/* magic value */
+	addq %rcx, %rdx		/* add the magic value to the word.  We get
+				   carry bits reported for each byte which
+				   is *not* 0 */
+	jnc 3f			/* highest byte is NUL => return pointer */
+	xorq %rcx, %rdx		/* (word+magic)^word */
+	orq %r8, %rdx		/* set all non-carry bits */
+	incq %rdx		/* add 1: if one carry bit was *not* set
+				   the addition will not result in 0.  */
+	jnz 3f			/* found NUL => return pointer */
+
+	/* Second unroll.  */
+	movq (%rax), %rcx	/* get double word (= 8 bytes) in question */
+	addq $8,%rax		/* adjust pointer for next word */
+	movq %r8, %rdx		/* magic value */
+	addq %rcx, %rdx		/* add the magic value to the word.  We get
+				   carry bits reported for each byte which
+				   is *not* 0 */
+	jnc 3f			/* highest byte is NUL => return pointer */
+	xorq %rcx, %rdx		/* (word+magic)^word */
+	orq %r8, %rdx		/* set all non-carry bits */
+	incq %rdx		/* add 1: if one carry bit was *not* set
+				   the addition will not result in 0.  */
+	jnz 3f			/* found NUL => return pointer */
+
+	/* Third unroll.  */
+	movq (%rax), %rcx	/* get double word (= 8 bytes) in question */
+	addq $8,%rax		/* adjust pointer for next word */
+	movq %r8, %rdx		/* magic value */
+	addq %rcx, %rdx		/* add the magic value to the word.  We get
+				   carry bits reported for each byte which
+				   is *not* 0 */
+	jnc 3f			/* highest byte is NUL => return pointer */
+	xorq %rcx, %rdx		/* (word+magic)^word */
+	orq %r8, %rdx		/* set all non-carry bits */
+	incq %rdx		/* add 1: if one carry bit was *not* set
+				   the addition will not result in 0.  */
+	jnz 3f			/* found NUL => return pointer */
+
+	/* Fourth unroll.  */
+	movq (%rax), %rcx	/* get double word (= 8 bytes) in question */
+	addq $8,%rax		/* adjust pointer for next word */
+	movq %r8, %rdx		/* magic value */
+	addq %rcx, %rdx		/* add the magic value to the word.  We get
+				   carry bits reported for each byte which
+				   is *not* 0 */
+	jnc 3f			/* highest byte is NUL => return pointer */
+	xorq %rcx, %rdx		/* (word+magic)^word */
+	orq %r8, %rdx		/* set all non-carry bits */
+	incq %rdx		/* add 1: if one carry bit was *not* set
+				   the addition will not result in 0.  */
+	jz 4b			/* no NUL found => continue loop */
+
+	.p2align 4		/* Align, it's a jump target.  */
+3:	subq $8,%rax		/* correct pointer increment.  */
+
+	testb %cl, %cl		/* is first byte NUL? */
+	jz 2f			/* yes => return */
+	incq %rax		/* increment pointer */
+
+	testb %ch, %ch		/* is second byte NUL? */
+	jz 2f			/* yes => return */
+	incq %rax		/* increment pointer */
+
+	testl $0x00ff0000, %ecx /* is third byte NUL? */
+	jz 2f			/* yes => return pointer */
+	incq %rax		/* increment pointer */
+
+	testl $0xff000000, %ecx /* is fourth byte NUL? */
+	jz 2f			/* yes => return pointer */
+	incq %rax		/* increment pointer */
+
+	shrq $32, %rcx		/* look at other half.  */
+
+	testb %cl, %cl		/* is first byte NUL? */
+	jz 2f			/* yes => return */
+	incq %rax		/* increment pointer */
+
+	testb %ch, %ch		/* is second byte NUL? */
+	jz 2f			/* yes => return */
+	incq %rax		/* increment pointer */
+
+	testl $0xff0000, %ecx	/* is third byte NUL? */
+	jz 2f			/* yes => return pointer */
+	incq %rax		/* increment pointer */
+2:
+	subq %rdi, %rax		/* compute difference to string start */
+	ret
+END (strlen)
diff --git a/sysdeps/x86_64/strpbrk.S b/sysdeps/x86_64/strpbrk.S
new file mode 100644
index 0000000000..9b97ada84e
--- /dev/null
+++ b/sysdeps/x86_64/strpbrk.S
@@ -0,0 +1,2 @@
+#define strcspn strpbrk
+#include <sysdeps/x86_64/strcspn.S>
diff --git a/sysdeps/x86_64/strspn.S b/sysdeps/x86_64/strspn.S
new file mode 100644
index 0000000000..a8f0c07a3f
--- /dev/null
+++ b/sysdeps/x86_64/strspn.S
@@ -0,0 +1,113 @@
+/* strspn (str, ss) -- Return the length of the initial segment of STR
+			which contains only characters from SS.
+   For AMD x86-64.
+   Copyright (C) 1994, 1995, 1996, 1997, 2000, 2002 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>.
+   Bug fixes by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au>.
+   Adopted for x86-64 by Andreas Jaeger <aj@suse.de>.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+
+	.text
+ENTRY (strspn)
+
+	movq %rdi, %rdx		/* Save SRC.  */
+
+	/* First we create a table with flags for all possible characters.
+	   For the ASCII (7bit/8bit) or ISO-8859-X character sets which are
+	   supported by the C string functions we have 256 characters.
+	   Before inserting marks for the stop characters we clear the whole
+	   table.  */
+	movq %rdi, %r8			/* Save value.  */
+	subq $256, %rsp			/* Make space for 256 bytes.  */
+	movq $32,  %rcx			/* 32*8 bytes = 256 bytes.  */
+	movq %rsp, %rdi
+	xorq %rax, %rax			/* We store 0s.  */
+	cld
+	rep
+	stosq
+
+	movq %rsi, %rax			/* Setup stopset.  */
+
+/* For understanding the following code remember that %rcx == 0 now.
+   Although all the following instruction only modify %cl we always
+   have a correct zero-extended 64-bit value in %rcx.  */
+
+	.p2align 4
+L(2):	movb (%rax), %cl	/* get byte from stopset */
+	testb %cl, %cl		/* is NUL char? */
+	jz L(1)			/* yes => start compare loop */
+	movb %cl, (%rsp,%rcx)	/* set corresponding byte in stopset table */
+
+	movb 1(%rax), %cl	/* get byte from stopset */
+	testb $0xff, %cl	/* is NUL char? */
+	jz L(1)			/* yes => start compare loop */
+	movb %cl, (%rsp,%rcx)	/* set corresponding byte in stopset table */
+
+	movb 2(%rax), %cl	/* get byte from stopset */
+	testb $0xff, %cl	/* is NUL char? */
+	jz L(1)			/* yes => start compare loop */
+	movb %cl, (%rsp,%rcx)	/* set corresponding byte in stopset table */
+
+	movb 3(%rax), %cl	/* get byte from stopset */
+	addq $4, %rax		/* increment stopset pointer */
+	movb %cl, (%rsp,%rcx)	/* set corresponding byte in stopset table */
+	testb $0xff, %cl	/* is NUL char? */
+	jnz L(2)		/* no => process next dword from stopset */
+
+L(1):	leaq -4(%rdx), %rax	/* prepare loop */
+
+	/* We use a neat trick for the following loop.  Normally we would
+	   have to test for two termination conditions
+	   1. a character in the stopset was found
+	   and
+	   2. the end of the string was found
+	   But as a sign that the character is in the stopset we store its
+	   value in the table.  But the value of NUL is NUL so the loop
+	   terminates for NUL in every case.  */
+
+	.p2align 4
+L(3):	addq $4, %rax		/* adjust pointer for full loop round */
+
+	movb (%rax), %cl	/* get byte from string */
+	testb %cl, (%rsp,%rcx)	/* is it contained in skipset? */
+	jz L(4)			/* no => return */
+
+	movb 1(%rax), %cl	/* get byte from string */
+	testb %cl, (%rsp,%rcx)	/* is it contained in skipset? */
+	jz L(5)			/* no => return */
+
+	movb 2(%rax), %cl	/* get byte from string */
+	testb %cl, (%rsp,%rcx)	/* is it contained in skipset? */
+	jz L(6)			/* no => return */
+
+	movb 3(%rax), %cl	/* get byte from string */
+	testb %cl, (%rsp,%rcx)	/* is it contained in skipset? */
+	jnz L(3)		/* yes => start loop again */
+
+	incq %rax		/* adjust pointer */
+L(6):	incq %rax
+L(5):	incq %rax
+
+L(4):	addq $256, %rsp		/* remove stopset */
+	subq %rdx, %rax		/* we have to return the number of valid
+				   characters, so compute distance to first
+				   non-valid character */
+	ret
+END (strspn)
diff --git a/sysdeps/x86_64/strtok.S b/sysdeps/x86_64/strtok.S
new file mode 100644
index 0000000000..771bd2d0e3
--- /dev/null
+++ b/sysdeps/x86_64/strtok.S
@@ -0,0 +1,208 @@
+/* strtok (str, delim) -- Return next DELIM separated token from STR.
+   For AMD x86-64.
+   Copyright (C) 1998, 2000, 2001, 2002 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Based on i686 version contributed by Ulrich Drepper
+   <drepper@cygnus.com>, 1998.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+#include "bp-sym.h"
+#include "bp-asm.h"
+
+/* This file can be used for the strtok and strtok_r functions:
+
+   strtok:
+	INPUT PARAMETER:
+	str		%rdi
+	delim		%rsi
+
+   strtok_r:
+	INPUT PARAMETER:
+	str		%rdi
+	delim		%rsi
+	save_ptr	%rdx
+
+   We do a common implementation here.  */
+
+#ifdef USE_AS_STRTOK_R
+# define SAVE_PTR (%r9)
+#else
+	.bss
+	.local save_ptr
+	ASM_TYPE_DIRECTIVE (save_ptr, @object)
+	.size save_ptr, 8
+save_ptr:
+	.space 8
+
+# ifdef PIC
+#  define SAVE_PTR save_ptr(%rip)
+# else
+#  define SAVE_PTR save_ptr
+# endif
+
+# define FUNCTION strtok
+#endif
+
+	.text
+ENTRY (BP_SYM (FUNCTION))
+	/* First we create a table with flags for all possible characters.
+	   For the ASCII (7bit/8bit) or ISO-8859-X character sets which are
+	   supported by the C string functions we have 256 characters.
+	   Before inserting marks for the stop characters we clear the whole
+	   table.  */
+	movq %rdi, %r8			/* Save value.  */
+	subq $256, %rsp			/* Make space for 256 bytes.  */
+	movq $32,  %rcx			/* 32*8 bytes = 256 bytes.  */
+	movq %rsp, %rdi
+	xorq %rax, %rax			/* We store 0s.  */
+	cld
+	rep
+	stosq
+
+	/* Note: %rcx = 0 !!! */
+
+#ifdef USE_AS_STRTOK_R
+	/* The value is stored in the third argument.  */
+	movq %rdx, %rax
+	movq %rdx, %r9		/* Save value - see def. of SAVE_PTR.  */
+	movq (%rax), %rax
+#else
+	/* The value is in the local variable defined above.  But
+	   we have to take care for PIC code.  */
+	movq SAVE_PTR, %rax
+#endif
+	movq %r8, %rdx		/* Get start of string.  */
+
+	/* If the pointer is NULL we have to use the stored value of
+	   the last run.  */
+	cmpq $0, %rdx
+	cmove %rax, %rdx
+	testq %rdx, %rdx
+	jz L(returnNULL)
+	movq %rsi, %rax		/* Get start of delimiter set.  */
+
+/* For understanding the following code remember that %rcx == 0 now.
+   Although all the following instruction only modify %cl we always
+   have a correct zero-extended 64-bit value in %rcx.  */
+
+L(2):	movb (%rax), %cl	/* get byte from stopset */
+	testb %cl, %cl		/* is NUL char? */
+	jz L(1)			/* yes => start compare loop */
+	movb %cl, (%rsp,%rcx)	/* set corresponding byte in stopset table */
+
+	movb 1(%rax), %cl	/* get byte from stopset */
+	testb $0xff, %cl	/* is NUL char? */
+	jz L(1)			/* yes => start compare loop */
+	movb %cl, (%rsp,%rcx)	/* set corresponding byte in stopset table */
+
+	movb 2(%rax), %cl	/* get byte from stopset */
+	testb $0xff, %cl	/* is NUL char? */
+	jz L(1)			/* yes => start compare loop */
+	movb %cl, (%rsp,%rcx)	/* set corresponding byte in stopset table */
+
+	movb 3(%rax), %cl	/* get byte from stopset */
+	addq $4, %rax		/* increment stopset pointer */
+	movb %cl, (%rsp,%rcx)	/* set corresponding byte in stopset table */
+	testb $0xff, %cl	/* is NUL char? */
+	jnz L(2)		/* no => process next dword from stopset */
+
+L(1):
+
+	leaq -4(%rdx), %rax	/* prepare loop */
+
+	/* We use a neat trick for the following loop.  Normally we would
+	   have to test for two termination conditions
+	   1. a character in the stopset was found
+	   and
+	   2. the end of the string was found
+	   As a sign that the character is in the stopset we store its
+	   value in the table.  The value of NUL is NUL so the loop
+	   terminates for NUL in every case.  */
+
+L(3):	addq $4, %rax		/* adjust pointer for full loop round */
+
+	movb (%rax), %cl	/* get byte from string */
+	testb %cl, (%rsp,%rcx)	/* is it contained in stopset? */
+	jz L(4)			/* no => start of token */
+
+	movb 1(%rax), %cl	/* get byte from string */
+	testb %cl, (%rsp,%rcx)	/* is it contained in stopset? */
+	jz L(5)			/* no => start of token */
+
+	movb 2(%rax), %cl	/* get byte from string */
+	testb %cl, (%rsp,%rcx)	/* is it contained in stopset? */
+	jz L(6)			/* no => start of token */
+
+	movb 3(%rax), %cl	/* get byte from string */
+	testb %cl, (%rsp,%rcx)	/* is it contained in stopset? */
+	jnz L(3)		/* yes => start of loop */
+
+	incq %rax		/* adjust pointer */
+L(6):	incq %rax
+L(5):	incq %rax
+
+	/* Now we have to terminate the string.  */
+
+L(4):	leaq -4(%rax), %rdx	/* We use %rDX for the next run.  */
+
+L(7):	addq $4, %rdx		/* adjust pointer for full loop round */
+
+	movb (%rdx), %cl	/* get byte from string */
+	cmpb %cl, (%rsp,%rcx)	/* is it contained in skipset? */
+	je L(8)			/* yes => return */
+
+	movb 1(%rdx), %cl	/* get byte from string */
+	cmpb %cl, (%rsp,%rcx)	/* is it contained in skipset? */
+	je L(9)			/* yes => return */
+
+	movb 2(%rdx), %cl	/* get byte from string */
+	cmpb %cl, (%rsp,%rcx)	/* is it contained in skipset? */
+	je L(10)		/* yes => return */
+
+	movb 3(%rdx), %cl	/* get byte from string */
+	cmpb %cl, (%rsp,%rcx)	/* is it contained in skipset? */
+	jne L(7)		/* no => start loop again */
+
+	incq %rdx		/* adjust pointer */
+L(10):	incq %rdx
+L(9):	incq %rdx
+
+L(8):	cmpq %rax, %rdx
+	je L(returnNULL)	/* There was no token anymore.  */
+
+	movb $0, (%rdx)		/* Terminate string.  */
+
+	/* Are we at end of string?  */
+	cmpb $0, %cl
+	leaq 1(%rdx), %rcx
+	cmovne %rcx, %rdx
+
+	/* Store the pointer to the next character.  */
+	movq %rdx, SAVE_PTR
+
+L(epilogue):
+	/* Remove the stopset table.  */
+	addq $256, %rsp
+	retq
+
+L(returnNULL):
+	xorq %rax, %rax
+	jmp L(epilogue)
+
+END (BP_SYM (FUNCTION))
diff --git a/sysdeps/x86_64/strtok_r.S b/sysdeps/x86_64/strtok_r.S
new file mode 100644
index 0000000000..0248f27236
--- /dev/null
+++ b/sysdeps/x86_64/strtok_r.S
@@ -0,0 +1,4 @@
+#define FUNCTION __strtok_r
+#define USE_AS_STRTOK_R	1
+#include <sysdeps/x86_64/strtok.S>
+weak_alias (BP_SYM (__strtok_r), BP_SYM (strtok_r))