about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog55
-rw-r--r--INSTALL28
-rw-r--r--Makeconfig7
-rw-r--r--Makefile14
-rw-r--r--NEWS24
-rw-r--r--NOTES58
-rwxr-xr-xconfigure4
-rw-r--r--configure.in4
-rw-r--r--csu/Makefile4
-rw-r--r--fedora/branch.mk4
-rw-r--r--nptl/ChangeLog13
-rw-r--r--nptl/sysdeps/unix/sysv/linux/i386/i486/lowlevelrobustlock.S2
-rw-r--r--nptl/sysdeps/unix/sysv/linux/lowlevelrobustlock.c6
-rw-r--r--nptl/sysdeps/unix/sysv/linux/x86_64/lowlevelrobustlock.S2
-rw-r--r--sysdeps/ia64/memccpy.S55
-rw-r--r--sysdeps/mach/hurd/Subdirs10
-rw-r--r--sysdeps/rs6000/add_n.s81
-rw-r--r--sysdeps/rs6000/addmul_1.s123
-rw-r--r--sysdeps/rs6000/ffs.c42
-rw-r--r--sysdeps/rs6000/lshift.s59
-rw-r--r--sysdeps/rs6000/memcopy.h86
-rw-r--r--sysdeps/rs6000/mul_1.s110
-rw-r--r--sysdeps/rs6000/rshift.s57
-rw-r--r--sysdeps/rs6000/sub_n.s82
-rw-r--r--sysdeps/rs6000/submul_1.s128
-rw-r--r--sysdeps/sparc/fpu/fraiseexcpt.c34
-rw-r--r--sysdeps/sparc/sparc32/fpu/libm-test-ulps9
-rw-r--r--sysdeps/sparc/sparc32/sparcv9v/memcpy.S2
-rw-r--r--sysdeps/sparc/sparc32/sparcv9v/memset.S2
-rw-r--r--sysdeps/sparc/sparc64/fpu/libm-test-ulps9
-rw-r--r--sysdeps/sparc/sparc64/sparcv9v/memcpy.S593
-rw-r--r--sysdeps/sparc/sparc64/sparcv9v/memset.S127
-rw-r--r--sysdeps/unix/sysv/linux/i386/fxstatat.c2
-rw-r--r--sysdeps/unix/sysv/linux/s390/s390-64/sysdep.h2
-rw-r--r--version.h2
35 files changed, 987 insertions, 853 deletions
diff --git a/ChangeLog b/ChangeLog
index 509579ea37..16cb49f63b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,58 @@
+2006-01-05  H.J. Lu  <hongjiu.lu@intel.com>
+
+	[BZ #2013]
+	* sysdeps/ia64/memccpy.S: Properly handle recovery for
+	predicated speculative load.
+
+2006-03-01  Jakub Jelinek  <jakub@redhat.com>
+
+	* sysdeps/sparc/sparc64/fpu/libm-test-ulps: Update.
+
+	* sysdeps/unix/sysv/linux/i386/fxstatat.c (__fxstatat64): Use
+	libc_hidden_ver rather than libc_hidden_def.
+
+	* sysdeps/sparc/sparc32/fpu/libm-test-ulps: Update.
+	* sysdeps/sparc/fpu/fraiseexcpt.c (__feraiseexcept):
+	Use inline asm to make sure the compiler doesn't optimize insns out.
+
+	* sysdeps/unix/sysv/linux/s390/s390-64/sysdep.h
+	(DECLARGS_6): Fix a typo.
+
+2006-03-01  Roland McGrath  <roland@redhat.com>
+
+	* version.h (VERSION): 2.3.91
+
+	* Makeconfig (sysdeps-srcdirs): New variable.
+	(sysdeps-add-ons-srcdirs): Use it.
+	* csu/Makefile (all-Banner-files): Use it.
+
+	* Makefile (dist-selector): New variable.
+	(glibc-%.tar rule): Use it.  Make $(dist-separate) tarballs
+	contain one version-named directory.
+
+	* Makefile (format-me): Use  --plaintext --no-number-sections.
+
+	* NOTES, INSTALL: Regenerated.
+
+	* NEWS: Update 2.4 items.
+
+2006-03-01  David S. Miller  <davem@sunset.davemloft.net>
+
+	* configure.in (machine): Match sparcv9v and sparc64v.
+	* configure: Regenerated.
+	* sysdeps/sparc/sparc32/sparcv9v/memcpy.S: New file.
+	* sysdeps/sparc/sparc32/sparcv9v/memset.S: New file.
+	* sysdeps/sparc/sparc64/sparcv9v/memcpy.S: New file.
+	* sysdeps/sparc/sparc64/sparcv9v/memset.S: New file.
+
+2006-03-01  Roland McGrath  <roland@frob.com>
+
+	* sysdeps/mach/hurd/Subdirs: Use "first hurd".
+
+2006-02-28  Roland McGrath  <roland@redhat.com>
+
+	* sysdeps/rs6000: Directory removed, saved in ports repository.
+
 2006-03-01  Kaz Kojima  <kkojima@rr.iij4u.or.jp>
 
 	* sysdeps/unix/sysv/linux/sh/pread.c: Copy in mips pread.c.
diff --git a/INSTALL b/INSTALL
index b503ea8f5f..f60658f1ae 100644
--- a/INSTALL
+++ b/INSTALL
@@ -1,5 +1,5 @@
-Appendix A Installing the GNU C Library
-***************************************
+Installing the GNU C Library
+****************************
 
 Before you do anything else, you should read the file `FAQ' located at
 the top level of the source tree.  This file answers common questions
@@ -15,8 +15,8 @@ activate them, and they will be compiled into the library.
 and GNU Make, and possibly others.  *Note Tools for Compilation::,
 below.
 
-A.1 Configuring and compiling GNU Libc
-======================================
+Configuring and compiling GNU Libc
+==================================
 
 GNU libc can be compiled in the source directory, but we strongly advise
 building it in a separate build directory.  For example, if you have
@@ -205,8 +205,8 @@ library.  You may need to set `AR' and `RANLIB' to cross-compiling
 versions of `ar' and `ranlib' if the native tools are not configured to
 work with object files for the target you configured for.
 
-A.2 Installing the C Library
-============================
+Installing the C Library
+========================
 
 To install the library and its header files, and the Info files of the
 manual, type `env LANGUAGE=C LC_ALL=C make install'.  This will build
@@ -279,8 +279,8 @@ which is in `/usr/share/zoneinfo' to the file `/etc/localtime'.  For
 Germany, you might execute `ln -s /usr/share/zoneinfo/Europe/Berlin
 /etc/localtime'.
 
-A.3 Recommended Tools for Compilation
-=====================================
+Recommended Tools for Compilation
+=================================
 
 We recommend installing the following GNU tools before attempting to
 build the GNU C library:
@@ -359,8 +359,8 @@ and if you change any of the message translation files you will need
 You may also need these packages if you upgrade your source tree using
 patches, although we try to avoid this.
 
-A.4 Supported Configurations
-============================
+Supported Configurations
+========================
 
 The GNU C Library currently supports configurations that match the
 following patterns:
@@ -431,8 +431,8 @@ some `i686' specific instructions.  To generate code for other models,
 you have to configure for that model and give GCC the appropriate
 `-march=' and `-mcpu=' compiler switches via CFLAGS.
 
-A.5 Specific advice for GNU/Linux systems
-=========================================
+Specific advice for GNU/Linux systems
+=====================================
 
 If you are installing GNU libc on a GNU/Linux system, you need to have
 the header files from a 2.2 or newer kernel around for reference.  For
@@ -486,8 +486,8 @@ kernel-side thread support.  `nscd' happens to hit these bugs
 particularly hard, but you might have problems with any threaded
 program.
 
-A.6 Reporting Bugs
-==================
+Reporting Bugs
+==============
 
 There are probably bugs in the GNU C library.  There are certainly
 errors and omissions in this manual.  If you report them, they will get
diff --git a/Makeconfig b/Makeconfig
index cc44b00012..1f4b62822a 100644
--- a/Makeconfig
+++ b/Makeconfig
@@ -107,9 +107,10 @@ sysdirs := $(foreach D,$(config-sysdirs),$(firstword $(filter /%,$D) $(..)$D))
 # to find specific add-on files without assuming the add-on directory name.
 # It also means that headers can go into an add-on's base directory
 # instead of the add-on needing a sysdeps/generic of its own.
-+sysdep_dirs := $(sysdirs) $(foreach add-on,$(sysdeps-add-ons),\
-				     $(firstword $(filter /%,$(add-on)) \
-						 $(..)$(add-on)))
+sysdeps-srcdirs := $(foreach add-on,$(sysdeps-add-ons),\
+			     $(firstword $(filter /%,$(add-on)) \
+					 $(..)$(add-on)))
++sysdep_dirs = $(sysdirs) $(sysdeps-srcdirs)
 ifdef objdir
 +sysdep_dirs := $(objdir) $(+sysdep_dirs)
 endif
diff --git a/Makefile b/Makefile
index 8db0ff51f4..43c4f4d4b5 100644
--- a/Makefile
+++ b/Makefile
@@ -358,20 +358,18 @@ endif
 files-for-dist := README FAQ INSTALL NOTES configure
 
 tag-of-stem = glibc-$(subst .,_,$*)
+dist-selector = -r $(tag-of-stem)
 
 # Add-ons in the main repository but distributed in their own tar files.
 dist-separate = libidn
 
-# Directories in each add-on.
-dist-separate-libidn = libidn
-
 glibc-%.tar $(dist-separate:%=glibc-%-%.tar): $(files-for-dist) \
 					      $(foreach D,$(dist-separate),\
 							$D/configure)
 	@rm -fr glibc-$*
 	$(MAKE) -q `find sysdeps $(addsuffix /sysdeps,$(sysdeps-add-ons)) \
 			 -name configure`
-	cvs $(CVSOPTS) -Q export -d glibc-$* -r $(tag-of-stem) libc
+	cvs $(CVSOPTS) -Q export -d glibc-$* $(dist-selector) libc
 # Touch all the configure scripts going into the tarball since cvs export
 # might have delivered configure.in newer than configure.
 	find glibc-$* -name configure -print | xargs touch
@@ -380,8 +378,10 @@ glibc-%.tar $(dist-separate:%=glibc-%-%.tar): $(files-for-dist) \
 	rm -fr glibc-$*
 define dist-do-separate-dirs
 $(foreach dir,$(dist-separate),
-	tar cf glibc-$(dir)-$*.tar -C glibc-$* $(dist-separate-$(dir))
-	rm -rf $(addprefix glibc-$*/,$(dist-separate-$(dir)))
+	@rm -fr glibc-$(dir)-$*
+	mv glibc-$*/$(dir) glibc-$(dir)-$*
+	tar cf glibc-$(dir)-$*.tar glibc-$(dir)-$*
+	rm -fr glibc-$(dir)-$*
 )
 endef
 
@@ -399,7 +399,7 @@ tag-%: $(files-for-dist)
 
 define format-me
 @rm -f $@
-makeinfo --no-validate --no-warn --no-headers $< -o $@
+makeinfo --no-validate --plaintext --no-number-sections $< -o $@
 -chmod a-w $@
 endef
 INSTALL: manual/install.texi; $(format-me)
diff --git a/NEWS b/NEWS
index 41755c383b..c47a9772c6 100644
--- a/NEWS
+++ b/NEWS
@@ -1,5 +1,5 @@
-GNU C Library NEWS -- history of user-visible changes.  2005-12-15
-Copyright (C) 1992-2002,2003,2004,2005 Free Software Foundation, Inc.
+GNU C Library NEWS -- history of user-visible changes.  2006-03-01
+Copyright (C) 1992-2002,2003,2004,2005,2006 Free Software Foundation, Inc.
 See the end for copying conditions.
 
 Please send GNU C library bug reports via <http://sources.redhat.com/bugzilla/>
@@ -27,8 +27,22 @@ Version 2.4
   For a libc and libpthread that works well on Linux 2.4 kernels, we
   recommend using the stable 2.3 branch.
 
-* New interfaces: fdopendir, openat, fstatat, fchownat, futimesat, renameat,
-  unlinkat, mkdirat, mkfifoat, mknodat, linkat, symlinkat, readlinkat.
+* The new function open_wmemstream acts like open_memstream,
+  but uses a wchar_t wide character buffer.
+
+* The new function ppoll is an improved version of poll, similar to pselect.
+
+* New interfaces: fdopendir, openat, fstatat, fchmodat, fchownat,
+  futimesat, faccessat, mkdirat, mkfifoat, mknodat,
+  renameat, unlinkat, linkat, symlinkat, readlinkat.
+
+* New Linux kernel system calls: unshare,
+  inotify_init, inotify_add_watch, inotify_rm_watch.
+
+* The euidaccess function is now also known by the alias eaccess,
+  for compatibility with some other systems.
+
+* Timezone data updated to 2006b version.
 
 Version 2.3.6
 
@@ -1537,7 +1551,7 @@ Version 1.04
 ----------------------------------------------------------------------
 Copyright information:
 
-Copyright (C) 1992-1999,2000,2001,2002,2003,2004,2005
+Copyright (C) 1992-1999,2000,2001,2002,2003,2004,2005,2006
 	Free Software Foundation, Inc.
 
    Permission is granted to anyone to make or distribute verbatim copies
diff --git a/NOTES b/NOTES
index b409cb4ab8..9bef2425bd 100644
--- a/NOTES
+++ b/NOTES
@@ -1,8 +1,8 @@
 Feature Test Macros
 -------------------
 
-   The exact set of features available when you compile a source file
-is controlled by which "feature test macros" you define.
+The exact set of features available when you compile a source file is
+controlled by which "feature test macros" you define.
 
    If you compile your programs using `gcc -ansi', you get only the
 ISO C library features, unless you explicitly request additional
@@ -32,7 +32,7 @@ limited standard.  It is insufficient for this purpose, as it will not
 protect you from including header files outside the standard, or
 relying on semantics undefined within the standard.
 
- - Macro: _POSIX_SOURCE
+ -- Macro: _POSIX_SOURCE
      If you define this macro, then the functionality from the POSIX.1
      standard (IEEE Standard 1003.1) is available, as well as all of the
      ISO C facilities.
@@ -40,7 +40,7 @@ relying on semantics undefined within the standard.
      The state of `_POSIX_SOURCE' is irrelevant if you define the macro
      `_POSIX_C_SOURCE' to a positive integer.
 
- - Macro: _POSIX_C_SOURCE
+ -- Macro: _POSIX_C_SOURCE
      Define this macro to a positive integer to control which POSIX
      functionality is made available.  The greater the value of this
      macro, the more functionality is made available.
@@ -65,12 +65,7 @@ relying on semantics undefined within the standard.
      greater than or equal to `199506L', then the functionality from
      the 1996 edition is made available.
 
-     The Single Unix Specification specify that setting this macro to
-     the value `199506L' selects all the values specified by the POSIX
-     standards plus those of the Single Unix Specification, i.e., is the
-     same as if `_XOPEN_SOURCE' is set to `500' (see below).
-
- - Macro: _BSD_SOURCE
+ -- Macro: _BSD_SOURCE
      If you define this macro, functionality derived from 4.3 BSD Unix
      is included as well as the ISO C, POSIX.1, and POSIX.2 material.
 
@@ -88,15 +83,15 @@ relying on semantics undefined within the standard.
      must give the option `-lbsd-compat' to the compiler or linker when
      linking the program, to tell it to find functions in this special
      compatibility library before looking for them in the normal C
-     library.
+     library.  
 
- - Macro: _SVID_SOURCE
+ -- Macro: _SVID_SOURCE
      If you define this macro, functionality derived from SVID is
      included as well as the ISO C, POSIX.1, POSIX.2, and X/Open
      material.
 
- - Macro: _XOPEN_SOURCE
- - Macro: _XOPEN_SOURCE_EXTENDED
+ -- Macro: _XOPEN_SOURCE
+ -- Macro: _XOPEN_SOURCE_EXTENDED
      If you define this macro, functionality described in the X/Open
      Portability Guide is included.  This is a superset of the POSIX.1
      and POSIX.2 functionality and in fact `_POSIX_SOURCE' and
@@ -113,10 +108,10 @@ relying on semantics undefined within the standard.
      functionality described so far plus some new definitions from the
      Single Unix Specification, version 2.
 
- - Macro: _LARGEFILE_SOURCE
+ -- Macro: _LARGEFILE_SOURCE
      If this macro is defined some extra functions are available which
-     rectify a few shortcomings in all previous standards.  More
-     concrete the functions `fseeko' and `ftello' are available.
+     rectify a few shortcomings in all previous standards.
+     Specifically, the functions `fseeko' and `ftello' are available.
      Without these functions the difference between the ISO C interface
      (`fseek', `ftell') and the low-level POSIX interface (`lseek')
      would lead to problems.
@@ -124,8 +119,8 @@ relying on semantics undefined within the standard.
      This macro was introduced as part of the Large File Support
      extension (LFS).
 
- - Macro: _LARGEFILE64_SOURCE
-     If you define this macro an additional set of function is made
+ -- Macro: _LARGEFILE64_SOURCE
+     If you define this macro an additional set of functions is made
      available which enables 32 bit systems to use files of sizes beyond
      the usual limit of 2GB.  This interface is not available if the
      system does not support files that large.  On systems where the
@@ -138,13 +133,13 @@ relying on semantics undefined within the standard.
      `off64_t' and `fseeko' vs. `fseeko64'.
 
      This macro was introduced as part of the Large File Support
-     extension (LFS).  It is a transition interface for the time 64 bit
-     offsets are not generally used (see `_FILE_OFFSET_BITS'.
+     extension (LFS).  It is a transition interface for the period when
+     64 bit offsets are not generally used (see `_FILE_OFFSET_BITS').
 
- - Macro: _FILE_OFFSET_BITS
+ -- Macro: _FILE_OFFSET_BITS
      This macro determines which file system interface shall be used,
-     one replacing the other.  While `_LARGEFILE64_SOURCE' makes the
-     64 bit interface available as an additional interface
+     one replacing the other.  Whereas `_LARGEFILE64_SOURCE' makes the
+     64 bit interface available as an additional interface,
      `_FILE_OFFSET_BITS' allows the 64 bit interface to replace the old
      interface.
 
@@ -154,9 +149,10 @@ relying on semantics undefined within the standard.
 
      If the macro is defined to the value `64', the large file interface
      replaces the old interface.  I.e., the functions are not made
-     available under different names as `_LARGEFILE64_SOURCE' does.
-     Instead the old function names now reference the new functions,
-     e.g., a call to `fseeko' now indeed calls `fseeko64'.
+     available under different names (as they are with
+     `_LARGEFILE64_SOURCE').  Instead the old function names now
+     reference the new functions, e.g., a call to `fseeko' now indeed
+     calls `fseeko64'.
 
      This macro should only be selected if the system provides
      mechanisms for handling large files.  On 64 bit systems this macro
@@ -166,13 +162,13 @@ relying on semantics undefined within the standard.
      This macro was introduced as part of the Large File Support
      extension (LFS).
 
- - Macro: _ISOC99_SOURCE
+ -- Macro: _ISOC99_SOURCE
      Until the revised ISO C standard is widely adopted the new features
      are not automatically enabled.  The GNU libc nevertheless has a
      complete implementation of the new standard and to enable the new
      features the macro `_ISOC99_SOURCE' should be defined.
 
- - Macro: _GNU_SOURCE
+ -- Macro: _GNU_SOURCE
      If you define this macro, everything is included: ISO C89,
      ISO C99, POSIX.1, POSIX.2, BSD, SVID, X/Open, LFS, and GNU
      extensions.  In the cases where POSIX.1 conflicts with BSD, the
@@ -191,8 +187,8 @@ relying on semantics undefined within the standard.
      compiler or linker.  *Note:* If you forget to do this, you may get
      very strange errors at run time.
 
- - Macro: _REENTRANT
- - Macro: _THREAD_SAFE
+ -- Macro: _REENTRANT
+ -- Macro: _THREAD_SAFE
      If you define one of these macros, reentrant versions of several
      functions get declared.  Some of the functions are specified in
      POSIX.1c but many others are only available on a few other systems
diff --git a/configure b/configure
index 557d6fa696..82188b299b 100755
--- a/configure
+++ b/configure
@@ -2017,10 +2017,14 @@ sparcv8plus | sparcv8plusa | sparcv9)
 		base_machine=sparc machine=sparc/sparc32/sparcv9 ;;
 sparcv8plusb | sparcv9b)
 		base_machine=sparc machine=sparc/sparc32/sparcv9/sparcv9b ;;
+sparcv9v)
+		base_machine=sparc machine=sparc/sparc32/sparcv9/sparcv9v ;;
 sparc64)
 		base_machine=sparc machine=sparc/sparc64 ;;
 sparc64b)
 		base_machine=sparc machine=sparc/sparc64/sparcv9b ;;
+sparc64v)
+		base_machine=sparc machine=sparc/sparc64/sparcv9v ;;
 *)		base_machine=$machine ;;
 esac
 
diff --git a/configure.in b/configure.in
index f6e7443393..83f09de805 100644
--- a/configure.in
+++ b/configure.in
@@ -505,10 +505,14 @@ sparcv8plus | sparcv8plusa | sparcv9)
 		base_machine=sparc machine=sparc/sparc32/sparcv9 ;;
 sparcv8plusb | sparcv9b)
 		base_machine=sparc machine=sparc/sparc32/sparcv9/sparcv9b ;;
+sparcv9v)
+		base_machine=sparc machine=sparc/sparc32/sparcv9/sparcv9v ;;
 sparc64)
 		base_machine=sparc machine=sparc/sparc64 ;;
 sparc64b)
 		base_machine=sparc machine=sparc/sparc64/sparcv9b ;;
+sparc64v)
+		base_machine=sparc machine=sparc/sparc64/sparcv9v ;;
 *)		base_machine=$machine ;;
 esac
 changequote([,])dnl
diff --git a/csu/Makefile b/csu/Makefile
index 54af0d6662..0e2ae07cf7 100644
--- a/csu/Makefile
+++ b/csu/Makefile
@@ -200,7 +200,9 @@ $(objpfx)abi-tag.h: $(..)abi-tags
 	if test -r $@.new; then mv -f $@.new $@; \
 	else echo >&2 'This configuration not matched in $<'; exit 1; fi
 
-all-Banner-files = $(wildcard $(addsuffix /Banner,$(subdir-srcdirs)))
+all-Banner-files = $(wildcard $(addsuffix /Banner,\
+					  $(sort $(subdir-srcdirs) \
+						 $(sysdeps-srcdirs))))
 $(objpfx)version-info.h: $(common-objpfx)config.make $(all-Banner-files)
 	$(make-target-directory)
 	(case $(config-os) in \
diff --git a/fedora/branch.mk b/fedora/branch.mk
index ac6775bb5e..1de55dc861 100644
--- a/fedora/branch.mk
+++ b/fedora/branch.mk
@@ -3,5 +3,5 @@ glibc-branch := fedora
 glibc-base := HEAD
 DIST_BRANCH := devel
 COLLECTION := dist-fc4
-fedora-sync-date := 2006-03-01 06:47 UTC
-fedora-sync-tag := fedora-glibc-20060301T0647
+fedora-sync-date := 2006-03-02 08:55 UTC
+fedora-sync-tag := fedora-glibc-20060302T0855
diff --git a/nptl/ChangeLog b/nptl/ChangeLog
index 3eb1482f8c..b35cfddcac 100644
--- a/nptl/ChangeLog
+++ b/nptl/ChangeLog
@@ -1,3 +1,16 @@
+2006-03-01  Ulrich Drepper  <drepper@redhat.com>
+
+	* sysdeps/unix/sysv/linux/x86_64/lowlevelrobustlock.S
+	(__lll_robust_lock_wait): Also set FUTEX_WAITERS bit if we got the
+	mutex.
+	(__lll_robust_timedlock_wait): Likewise.
+	* sysdeps/unix/sysv/linux/i386/i486/lowlevelrobustlock.S
+	(__lll_robust_lock_wait): Likewise.
+	(__lll_robust_timedlock_wait): Likewise.
+	* sysdeps/unix/sysv/linux/lowlevelrobustlock.c
+	(__lll_robust_lock_wait): Likewise.
+	(__lll_robust_timedlock_wait): Likewise.
+
 2006-03-01  Jakub Jelinek  <jakub@redhat.com>
 
 	* sysdeps/unix/sysv/linux/sparc/lowlevellock.h (lll_robust_mutex_dead,
diff --git a/nptl/sysdeps/unix/sysv/linux/i386/i486/lowlevelrobustlock.S b/nptl/sysdeps/unix/sysv/linux/i386/i486/lowlevelrobustlock.S
index 1c516c7424..ff09745060 100644
--- a/nptl/sysdeps/unix/sysv/linux/i386/i486/lowlevelrobustlock.S
+++ b/nptl/sysdeps/unix/sysv/linux/i386/i486/lowlevelrobustlock.S
@@ -74,6 +74,7 @@ __lll_robust_mutex_lock_wait:
 	jne	4b
 
 	movl	%gs:TID, %edx
+	orl	$FUTEX_WAITERS, %edx
 	LOCK
 	cmpxchgl %edx, (%ebx)
 	jnz	4b
@@ -163,6 +164,7 @@ __lll_robust_mutex_timedlock_wait:
 	jne	7f
 
 	movl	%gs:TID, %edx
+	orl	$FUTEX_WAITERS, %edx
 	LOCK
 	cmpxchgl %edx, (%ebx)
 	jnz	7f
diff --git a/nptl/sysdeps/unix/sysv/linux/lowlevelrobustlock.c b/nptl/sysdeps/unix/sysv/linux/lowlevelrobustlock.c
index b7faeaf257..3e88ee1866 100644
--- a/nptl/sysdeps/unix/sysv/linux/lowlevelrobustlock.c
+++ b/nptl/sysdeps/unix/sysv/linux/lowlevelrobustlock.c
@@ -42,7 +42,9 @@ __lll_robust_lock_wait (int *futex)
 
       lll_futex_wait (futex, newval);
     }
-  while ((oldval = atomic_compare_and_exchange_val_acq (futex, tid, 0)) != 0);
+  while ((oldval = atomic_compare_and_exchange_val_acq (futex,
+							tid | FUTEX_WAITERS,
+							0)) != 0);
   return 0;
 }
 
@@ -89,7 +91,7 @@ __lll_robust_timedlock_wait (int *futex, const struct timespec *abstime)
 
       lll_futex_timed_wait (futex, newval, &rt);
     }
-  while (atomic_compare_and_exchange_bool_acq (futex, tid, 0));
+  while (atomic_compare_and_exchange_bool_acq (futex, tid | FUTEX_WAITERS, 0));
 
   return 0;
 }
diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/lowlevelrobustlock.S b/nptl/sysdeps/unix/sysv/linux/x86_64/lowlevelrobustlock.S
index 7bb9191691..1a088e27b1 100644
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/lowlevelrobustlock.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/lowlevelrobustlock.S
@@ -78,6 +78,7 @@ __lll_robust_mutex_lock_wait:
 	jne	4b
 
 	movl	%fs:TID, %edx
+	orl	$FUTEX_WAITERS, %edx
 	LOCK
 	cmpxchgl %edx, (%rdi)
 	jnz	4b
@@ -169,6 +170,7 @@ __lll_robust_mutex_timedlock_wait:
 	jne	7f
 
 	movl	%fs:TID, %edx
+	orl	$FUTEX_WAITERS, %edx
 	LOCK
 	cmpxchgl %edx, (%r12)
 	jnz	7f
diff --git a/sysdeps/ia64/memccpy.S b/sysdeps/ia64/memccpy.S
index 53c43c512b..dd638d43c8 100644
--- a/sysdeps/ia64/memccpy.S
+++ b/sysdeps/ia64/memccpy.S
@@ -1,6 +1,6 @@
 /* Optimized version of the memccpy() function.
    This file is part of the GNU C Library.
-   Copyright (C) 2000, 2001, 2003 Free Software Foundation, Inc.
+   Copyright (C) 2000,2001,2003,2006 Free Software Foundation, Inc.
    Contributed by Dan Pop <Dan.Pop@cern.ch>.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -183,27 +183,64 @@ ENTRY(memccpy)
 	br.ret.sptk.many b0
 
 .recovery1:
-	adds	src = -(MEMLAT + 6 + 1) * 8, asrc
+#if MEMLAT != 6
+# error "MEMLAT must be 6!"
+#endif
+	adds	src = -8, asrc
 	mov	loopcnt = ar.lc
-	mov	tmp = ar.ec ;;
+	mov	tmp = ar.ec
+	;;
+(p[0])	adds	src = -8, src
+	;;
+(p[1])	adds	src = -8, src
 	sub	sh1 = (MEMLAT + 6 + 1), tmp
-	shr.u	sh2 = sh2, 3
-	;; 
+	;;
+(p[2])	adds	src = -8, src
+	;;
+(p[3])	adds	src = -8, src
 	shl	loopcnt = loopcnt, 3
-	sub	src = src, sh2
+	;;
+(p[4])	adds	src = -8, src
+	;;
+(p[5])	adds	src = -8, src
 	shl	sh1 = sh1, 3
+	;;
+(p[6])	adds	src = -8, src
+	;;
+(p[7])	adds	src = -8, src
 	shl	tmp = tmp, 3
 	;;
+(p[8])	adds	src = -8, src
+	;;
+(p[9])	adds	src = -8, src
+	shr.u	sh2 = sh2, 3
+	;;
+(p[10])	adds	src = -8, src
+	;;
+(p[11])	adds	src = -8, src
 	add	len = len, loopcnt
-	add	src = sh1, src ;;
+	;;
+	sub	src = src, sh2
+	;;
 	add	len = tmp, len
-.back1:
+	add	src = sh1, src
 	br.cond.sptk .cpyfew
 
 .recovery2:
-	add	tmp = -(MEMLAT + 3) * 8, src
+#if MEMLAT != 6
+# error "MEMLAT must be 6!"
+#endif
+	add	tmp = -8, src
 (p7)	br.cond.spnt .gotit
 	;;
+(p[0])	add	tmp = -8, tmp ;;
+(p[1])	add	tmp = -8, tmp ;;
+(p[2])	add	tmp = -8, tmp ;;
+(p[3])	add	tmp = -8, tmp ;;
+(p[4])	add	tmp = -8, tmp ;;
+(p[5])	add	tmp = -8, tmp ;;
+(p[6])	add	tmp = -8, tmp ;;
+(p[7])	add	tmp = -8, tmp ;;
 	ld8	r[MEMLAT+2] = [tmp] ;;
 	xor	pos0[1] = r[MEMLAT+2], charx8 ;;
 	czx1.r	pos0[1] = pos0[1] ;;
diff --git a/sysdeps/mach/hurd/Subdirs b/sysdeps/mach/hurd/Subdirs
index 16b8348437..7a7757582a 100644
--- a/sysdeps/mach/hurd/Subdirs
+++ b/sysdeps/mach/hurd/Subdirs
@@ -1 +1,9 @@
-hurd
+# This file says that the hurd subdirectory should appear before all others.
+# The mach and hurd subdirectories have many generated header files which
+# much of the rest of the library depends on, so it is best to build them
+# first (and mach before hurd, at that).  The before-compile additions in
+# sysdeps/{mach,hurd}/Makefile should make it reliably work for these files
+# not to exist when making in other directories, but it will be slower that
+# way with more somewhat expensive `make' invocations.
+
+first hurd
diff --git a/sysdeps/rs6000/add_n.s b/sysdeps/rs6000/add_n.s
deleted file mode 100644
index 216874e7a4..0000000000
--- a/sysdeps/rs6000/add_n.s
+++ /dev/null
@@ -1,81 +0,0 @@
-# IBM POWER __mpn_add_n -- Add two limb vectors of equal, non-zero length.
-
-# Copyright (C) 1992, 1994, 1995, 1996 Free Software Foundation, Inc.
-
-# This file is part of the GNU MP Library.
-
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 2.1 of the License, or (at your
-# option) any later version.
-
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-# License for more details.
-
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
-# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
-# MA 02111-1307, USA.
-
-
-# INPUT PARAMETERS
-# res_ptr	r3
-# s1_ptr	r4
-# s2_ptr	r5
-# size		r6
-
-	.toc
-	.extern __mpn_add_n[DS]
-	.extern .__mpn_add_n
-.csect [PR]
-	.align 2
-	.globl __mpn_add_n
-	.globl .__mpn_add_n
-	.csect __mpn_add_n[DS]
-__mpn_add_n:
-	.long .__mpn_add_n, TOC[tc0], 0
-	.csect [PR]
-.__mpn_add_n:
-	andil.	10,6,1		# odd or even number of limbs?
-	l	8,0(4)		# load least significant s1 limb
-	l	0,0(5)		# load least significant s2 limb
-	cal	3,-4(3)		# offset res_ptr, it's updated before it's used
-	sri	10,6,1		# count for unrolled loop
-	a	7,0,8		# add least significant limbs, set cy
-	mtctr	10		# copy count into CTR
-	beq	0,Leven		# branch if even # of limbs (# of limbs >= 2)
-
-# We have an odd # of limbs.  Add the first limbs separately.
-	cmpi	1,10,0		# is count for unrolled loop zero?
-	bne	1,L1		# branch if not
-	st	7,4(3)
-	aze	3,10		# use the fact that r10 is zero...
-	br			# return
-
-# We added least significant limbs.  Now reload the next limbs to enter loop.
-L1:	lu	8,4(4)		# load s1 limb and update s1_ptr
-	lu	0,4(5)		# load s2 limb and update s2_ptr
-	stu	7,4(3)
-	ae	7,0,8		# add limbs, set cy
-Leven:	lu	9,4(4)		# load s1 limb and update s1_ptr
-	lu	10,4(5)		# load s2 limb and update s2_ptr
-	bdz	Lend		# If done, skip loop
-
-Loop:	lu	8,4(4)		# load s1 limb and update s1_ptr
-	lu	0,4(5)		# load s2 limb and update s2_ptr
-	ae	11,9,10		# add previous limbs with cy, set cy
-	stu	7,4(3)		# 
-	lu	9,4(4)		# load s1 limb and update s1_ptr
-	lu	10,4(5)		# load s2 limb and update s2_ptr
-	ae	7,0,8		# add previous limbs with cy, set cy
-	stu	11,4(3)		# 
-	bdn	Loop		# decrement CTR and loop back
-
-Lend:	ae	11,9,10		# add limbs with cy, set cy
-	st	7,4(3)		# 
-	st	11,8(3)		# 
-	lil	3,0		# load cy into ...
-	aze	3,3		# ... return value register
-	br
diff --git a/sysdeps/rs6000/addmul_1.s b/sysdeps/rs6000/addmul_1.s
deleted file mode 100644
index 7cd743cede..0000000000
--- a/sysdeps/rs6000/addmul_1.s
+++ /dev/null
@@ -1,123 +0,0 @@
-# IBM POWER __mpn_addmul_1 -- Multiply a limb vector with a limb and add
-# the result to a second limb vector.
-
-# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
-
-# This file is part of the GNU MP Library.
-
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 2.1 of the License, or (at your
-# option) any later version.
-
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-# License for more details.
-
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
-# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
-# MA 02111-1307, USA.
-
-
-# INPUT PARAMETERS
-# res_ptr	r3
-# s1_ptr	r4
-# size		r5
-# s2_limb	r6
-
-# The RS/6000 has no unsigned 32x32->64 bit multiplication instruction.  To
-# obtain that operation, we have to use the 32x32->64 signed multiplication
-# instruction, and add the appropriate compensation to the high limb of the
-# result.  We add the multiplicand if the multiplier has its most significant
-# bit set, and we add the multiplier if the multiplicand has its most
-# significant bit set.  We need to preserve the carry flag between each
-# iteration, so we have to compute the compensation carefully (the natural,
-# srai+and doesn't work).  Since the POWER architecture has a branch unit
-# we can branch in zero cycles, so that's how we perform the additions.
-
-	.toc
-	.csect .__mpn_addmul_1[PR]
-	.align 2
-	.globl __mpn_addmul_1
-	.globl .__mpn_addmul_1
-	.csect __mpn_addmul_1[DS]
-__mpn_addmul_1:
-	.long .__mpn_addmul_1[PR], TOC[tc0], 0
-	.csect .__mpn_addmul_1[PR]
-.__mpn_addmul_1:
-
-	cal	3,-4(3)
-	l	0,0(4)
-	cmpi	0,6,0
-	mtctr	5
-	mul	9,0,6
-	srai	7,0,31
-	and	7,7,6
-	mfmq	8
-	cax	9,9,7
-	l	7,4(3)
-	a	8,8,7		# add res_limb
-	blt	Lneg
-Lpos:	bdz	Lend
-
-Lploop:	lu	0,4(4)
-	stu	8,4(3)
-	cmpi	0,0,0
-	mul	10,0,6
-	mfmq	0
-	ae	8,0,9		# low limb + old_cy_limb + old cy
-	l	7,4(3)
-	aze	10,10		# propagate cy to new cy_limb
-	a	8,8,7		# add res_limb
-	bge	Lp0
-	cax	10,10,6		# adjust high limb for negative limb from s1
-Lp0:	bdz	Lend0
-	lu	0,4(4)
-	stu	8,4(3)
-	cmpi	0,0,0
-	mul	9,0,6
-	mfmq	0
-	ae	8,0,10
-	l	7,4(3)
-	aze	9,9
-	a	8,8,7
-	bge	Lp1
-	cax	9,9,6		# adjust high limb for negative limb from s1
-Lp1:	bdn	Lploop
-
-	b	Lend
-
-Lneg:	cax	9,9,0
-	bdz	Lend
-Lnloop:	lu	0,4(4)
-	stu	8,4(3)
-	cmpi	0,0,0
-	mul	10,0,6
-	mfmq	7
-	ae	8,7,9
-	l	7,4(3)
-	ae	10,10,0		# propagate cy to new cy_limb
-	a	8,8,7		# add res_limb
-	bge	Ln0
-	cax	10,10,6		# adjust high limb for negative limb from s1
-Ln0:	bdz	Lend0
-	lu	0,4(4)
-	stu	8,4(3)
-	cmpi	0,0,0
-	mul	9,0,6
-	mfmq	7
-	ae	8,7,10
-	l	7,4(3)
-	ae	9,9,0		# propagate cy to new cy_limb
-	a	8,8,7		# add res_limb
-	bge	Ln1
-	cax	9,9,6		# adjust high limb for negative limb from s1
-Ln1:	bdn	Lnloop
-	b	Lend
-
-Lend0:	cal	9,0(10)
-Lend:	st	8,4(3)
-	aze	3,9
-	br
diff --git a/sysdeps/rs6000/ffs.c b/sysdeps/rs6000/ffs.c
deleted file mode 100644
index 619412cb50..0000000000
--- a/sysdeps/rs6000/ffs.c
+++ /dev/null
@@ -1,42 +0,0 @@
-/* ffs -- find first set bit in a word, counted from least significant end.
-   For IBM rs6000.
-   Copyright (C) 1991, 1992, 1997, 2004, 2005 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-   Contributed by Torbjorn Granlund (tege@sics.se).
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#include <string.h>
-
-#undef	ffs
-
-#ifdef	__GNUC__
-
-int
-__ffs (x)
-     int x;
-{
-  int cnt;
-
-  asm ("cntlz %0,%1" : "=r" (cnt) : "r" (x & -x));
-  return 32 - cnt;
-}
-weak_alias (__ffs, ffs)
-libc_hidden_builtin_def (ffs)
-
-#else
-#include <string/ffs.c>
-#endif
diff --git a/sysdeps/rs6000/lshift.s b/sysdeps/rs6000/lshift.s
deleted file mode 100644
index 8ccba7407e..0000000000
--- a/sysdeps/rs6000/lshift.s
+++ /dev/null
@@ -1,59 +0,0 @@
-# IBM POWER __mpn_lshift -- 
-
-# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
-
-# This file is part of the GNU MP Library.
-
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 2.1 of the License, or (at your
-# option) any later version.
-
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-# License for more details.
-
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
-# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
-# MA 02111-1307, USA.
-
-
-# INPUT PARAMETERS
-# res_ptr	r3
-# s_ptr		r4
-# size		r5
-# cnt		r6
-
-	.toc
-	.extern __mpn_lshift[DS]
-	.extern .__mpn_lshift
-.csect [PR]
-	.align 2
-	.globl __mpn_lshift
-	.globl .__mpn_lshift
-	.csect __mpn_lshift[DS]
-__mpn_lshift:
-	.long .__mpn_lshift, TOC[tc0], 0
-	.csect [PR]
-.__mpn_lshift:
-	sli	0,5,2
-	cax	9,3,0
-	cax	4,4,0
-	sfi	8,6,32
-	mtctr	5		# put limb count in CTR loop register
-	lu	0,-4(4)		# read most significant limb
-	sre	3,0,8		# compute carry out limb, and init MQ register
-	bdz	Lend2		# if just one limb, skip loop
-	lu	0,-4(4)		# read 2:nd most significant limb
-	sreq	7,0,8		# compute most significant limb of result
-	bdz	Lend		# if just two limb, skip loop
-Loop:	lu	0,-4(4)		# load next lower limb
-	stu	7,-4(9)		# store previous result during read latency
-	sreq	7,0,8		# compute result limb
-	bdn	Loop		# loop back until CTR is zero
-Lend:	stu	7,-4(9)		# store 2:nd least significant limb
-Lend2:	sle	7,0,6		# compute least significant limb
-	st      7,-4(9)		# store it"				\
-	br
diff --git a/sysdeps/rs6000/memcopy.h b/sysdeps/rs6000/memcopy.h
deleted file mode 100644
index 8bdb6e9766..0000000000
--- a/sysdeps/rs6000/memcopy.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/* Copyright (C) 1991, 1997 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#include <sysdeps/generic/memcopy.h>
-
-#undef	OP_T_THRES
-#define OP_T_THRES 32
-
-#undef	BYTE_COPY_FWD
-#define BYTE_COPY_FWD(dst_bp, src_bp, nbytes)				      \
-  do									      \
-    {									      \
-      size_t __nbytes = nbytes;						      \
-      asm volatile("mtspr	1,%2\n"					      \
-		   "lsx		6,0,%1\n"				      \
-		   "stsx	6,0,%0" : /* No outputs.  */ :		      \
-		   "b" (dst_bp), "b" (src_bp), "r" (__nbytes) :		      \
-		   "6", "7", "8", "9", "10", "11", "12", "13");		      \
-      dst_bp += __nbytes;						      \
-      src_bp += __nbytes;						      \
-    } while (0)
-
-#undef	BYTE_COPY_BWD
-#define BYTE_COPY_BWD(dst_ep, src_ep, nbytes)				      \
-  do									      \
-    {									      \
-      size_t __nbytes = (nbytes);					      \
-      dst_ep -= __nbytes;						      \
-      src_ep -= __nbytes;						      \
-      asm volatile("mtspr	1,%2\n"					      \
-		   "lsx		6,0,%1\n"				      \
-		   "stsx	6,0,%0" : /* No outputs.  */ :		      \
-		   "b" (dst_ep), "b" (src_ep), "r" (__nbytes) :		      \
-		   "6", "7", "8", "9", "10", "11", "12", "13");		      \
-    } while (0)
-
-#undef	WORD_COPY_FWD
-#define WORD_COPY_FWD(dst_bp, src_bp, nbytes_left, nbytes)		      \
-  do									      \
-    {									      \
-      size_t __nblocks = (nbytes) / 32;					      \
-      if (__nblocks != 0)						      \
-	asm volatile("mtctr	%4\n"					      \
-		     "lsi	6,%1,32\n"				      \
-		     "ai	%1,%1,32\n"				      \
-		     "stsi	6,%0,32\n"				      \
-		     "ai	%0,%0,32\n"				      \
-		     "bdn	$-16" :					      \
-		     "=b" (dst_bp), "=b" (src_bp) :			      \
-		     "0" (dst_bp), "1" (src_bp), "r" (__nblocks) :	      \
-		     "6", "7", "8", "9", "10", "11", "12", "13");	      \
-      (nbytes_left) = (nbytes) % 32;					      \
-    } while (0)
-
-#undef	WORD_COPY_BWD
-#define WORD_COPY_BWD(dst_ep, src_ep, nbytes_left, nbytes)		      \
-  do									      \
-    {									      \
-      size_t __nblocks = (nbytes) / 32;					      \
-      if (__nblocks != 0)						      \
-	asm volatile("mtctr	%4\n"					      \
-		     "ai	%1,%1,-32\n"				      \
-		     "lsi	6,%1,32\n"				      \
-		     "ai	%0,%0,-32\n"				      \
-		     "stsi	6,%0,32\n"				      \
-		     "bdn	$-16" :					      \
-		     "=b" (dst_ep), "=b" (src_ep) :			      \
-		     "0" (dst_ep), "1" (src_ep), "r" (__nblocks) :	      \
-		     "6", "7", "8", "9", "10", "11", "12", "13");	      \
-      (nbytes_left) = (nbytes) % 32;					      \
-    } while (0)
diff --git a/sysdeps/rs6000/mul_1.s b/sysdeps/rs6000/mul_1.s
deleted file mode 100644
index c0feef4b72..0000000000
--- a/sysdeps/rs6000/mul_1.s
+++ /dev/null
@@ -1,110 +0,0 @@
-# IBM POWER __mpn_mul_1 -- Multiply a limb vector with a limb and store
-# the result in a second limb vector.
-
-# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
-
-# This file is part of the GNU MP Library.
-
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 2.1 of the License, or (at your
-# option) any later version.
-
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-# License for more details.
-
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
-# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
-# MA 02111-1307, USA.
-
-
-# INPUT PARAMETERS
-# res_ptr	r3
-# s1_ptr	r4
-# size		r5
-# s2_limb	r6
-
-# The RS/6000 has no unsigned 32x32->64 bit multiplication instruction.  To
-# obtain that operation, we have to use the 32x32->64 signed multiplication
-# instruction, and add the appropriate compensation to the high limb of the
-# result.  We add the multiplicand if the multiplier has its most significant
-# bit set, and we add the multiplier if the multiplicand has its most
-# significant bit set.  We need to preserve the carry flag between each
-# iteration, so we have to compute the compensation carefully (the natural,
-# srai+and doesn't work).  Since the POWER architecture has a branch unit
-# we can branch in zero cycles, so that's how we perform the additions.
-
-	.toc
-	.csect .__mpn_mul_1[PR]
-	.align 2
-	.globl __mpn_mul_1
-	.globl .__mpn_mul_1
-	.csect __mpn_mul_1[DS]
-__mpn_mul_1:
-	.long .__mpn_mul_1[PR], TOC[tc0], 0
-	.csect .__mpn_mul_1[PR]
-.__mpn_mul_1:
-
-	cal	3,-4(3)
-	l	0,0(4)
-	cmpi	0,6,0
-	mtctr	5
-	mul	9,0,6
-	srai	7,0,31
-	and	7,7,6
-	mfmq	8
-	ai	0,0,0		# reset carry
-	cax	9,9,7
-	blt	Lneg
-Lpos:	bdz	Lend
-Lploop:	lu	0,4(4)
-	stu	8,4(3)
-	cmpi	0,0,0
-	mul	10,0,6
-	mfmq	0
-	ae	8,0,9
-	bge	Lp0
-	cax	10,10,6		# adjust high limb for negative limb from s1
-Lp0:	bdz	Lend0
-	lu	0,4(4)
-	stu	8,4(3)
-	cmpi	0,0,0
-	mul	9,0,6
-	mfmq	0
-	ae	8,0,10
-	bge	Lp1
-	cax	9,9,6		# adjust high limb for negative limb from s1
-Lp1:	bdn	Lploop
-	b	Lend
-
-Lneg:	cax	9,9,0
-	bdz	Lend
-Lnloop:	lu	0,4(4)
-	stu	8,4(3)
-	cmpi	0,0,0
-	mul	10,0,6
-	cax	10,10,0		# adjust high limb for negative s2_limb
-	mfmq	0
-	ae	8,0,9
-	bge	Ln0
-	cax	10,10,6		# adjust high limb for negative limb from s1
-Ln0:	bdz	Lend0
-	lu	0,4(4)
-	stu	8,4(3)
-	cmpi	0,0,0
-	mul	9,0,6
-	cax	9,9,0		# adjust high limb for negative s2_limb
-	mfmq	0
-	ae	8,0,10
-	bge	Ln1
-	cax	9,9,6		# adjust high limb for negative limb from s1
-Ln1:	bdn	Lnloop
-	b	Lend
-
-Lend0:	cal	9,0(10)
-Lend:	st	8,4(3)
-	aze	3,9
-	br
diff --git a/sysdeps/rs6000/rshift.s b/sysdeps/rs6000/rshift.s
deleted file mode 100644
index 145218fabd..0000000000
--- a/sysdeps/rs6000/rshift.s
+++ /dev/null
@@ -1,57 +0,0 @@
-# IBM POWER __mpn_rshift -- 
-
-# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
-
-# This file is part of the GNU MP Library.
-
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 2.1 of the License, or (at your
-# option) any later version.
-
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-# License for more details.
-
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
-# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
-# MA 02111-1307, USA.
-
-
-# INPUT PARAMETERS
-# res_ptr	r3
-# s_ptr		r4
-# size		r5
-# cnt		r6
-
-	.toc
-	.extern __mpn_rshift[DS]
-	.extern .__mpn_rshift
-.csect [PR]
-	.align 2
-	.globl __mpn_rshift
-	.globl .__mpn_rshift
-	.csect __mpn_rshift[DS]
-__mpn_rshift:
-	.long .__mpn_rshift, TOC[tc0], 0
-	.csect [PR]
-.__mpn_rshift:
-	sfi	8,6,32
-	mtctr	5		# put limb count in CTR loop register
-	l	0,0(4)		# read least significant limb
-	ai	9,3,-4		# adjust res_ptr since it's offset in the stu:s
-	sle	3,0,8		# compute carry limb, and init MQ register
-	bdz	Lend2		# if just one limb, skip loop
-	lu	0,4(4)		# read 2:nd least significant limb
-	sleq	7,0,8		# compute least significant limb of result
-	bdz	Lend		# if just two limb, skip loop
-Loop:	lu	0,4(4)		# load next higher limb
-	stu	7,4(9)		# store previous result during read latency
-	sleq	7,0,8		# compute result limb
-	bdn	Loop		# loop back until CTR is zero
-Lend:	stu	7,4(9)		# store 2:nd most significant limb
-Lend2:	sre	7,0,6		# compute most significant limb
-	st      7,4(9)		# store it"				\
-	br
diff --git a/sysdeps/rs6000/sub_n.s b/sysdeps/rs6000/sub_n.s
deleted file mode 100644
index d931870935..0000000000
--- a/sysdeps/rs6000/sub_n.s
+++ /dev/null
@@ -1,82 +0,0 @@
-# IBM POWER __mpn_sub_n -- Subtract two limb vectors of equal, non-zero length.
-
-# Copyright (C) 1992, 1994, 1995, 1996 Free Software Foundation, Inc.
-
-# This file is part of the GNU MP Library.
-
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 2.1 of the License, or (at your
-# option) any later version.
-
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-# License for more details.
-
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
-# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
-# MA 02111-1307, USA.
-
-
-# INPUT PARAMETERS
-# res_ptr	r3
-# s1_ptr	r4
-# s2_ptr	r5
-# size		r6
-
-	.toc
-	.extern __mpn_sub_n[DS]
-	.extern .__mpn_sub_n
-.csect [PR]
-	.align 2
-	.globl __mpn_sub_n
-	.globl .__mpn_sub_n
-	.csect __mpn_sub_n[DS]
-__mpn_sub_n:
-	.long .__mpn_sub_n, TOC[tc0], 0
-	.csect [PR]
-.__mpn_sub_n:
-	andil.	10,6,1		# odd or even number of limbs?
-	l	8,0(4)		# load least significant s1 limb
-	l	0,0(5)		# load least significant s2 limb
-	cal	3,-4(3)		# offset res_ptr, it's updated before it's used
-	sri	10,6,1		# count for unrolled loop
-	sf	7,0,8		# subtract least significant limbs, set cy
-	mtctr	10		# copy count into CTR
-	beq	0,Leven		# branch if even # of limbs (# of limbs >= 2)
-
-# We have an odd # of limbs.  Add the first limbs separately.
-	cmpi	1,10,0		# is count for unrolled loop zero?
-	bne	1,L1		# branch if not
-	st	7,4(3)
-	sfe	3,0,0		# load !cy into ...
-	sfi	3,3,0		# ... return value register
-	br			# return
-
-# We added least significant limbs.  Now reload the next limbs to enter loop.
-L1:	lu	8,4(4)		# load s1 limb and update s1_ptr
-	lu	0,4(5)		# load s2 limb and update s2_ptr
-	stu	7,4(3)
-	sfe	7,0,8		# subtract limbs, set cy
-Leven:	lu	9,4(4)		# load s1 limb and update s1_ptr
-	lu	10,4(5)		# load s2 limb and update s2_ptr
-	bdz	Lend		# If done, skip loop
-
-Loop:	lu	8,4(4)		# load s1 limb and update s1_ptr
-	lu	0,4(5)		# load s2 limb and update s2_ptr
-	sfe	11,10,9		# subtract previous limbs with cy, set cy
-	stu	7,4(3)		# 
-	lu	9,4(4)		# load s1 limb and update s1_ptr
-	lu	10,4(5)		# load s2 limb and update s2_ptr
-	sfe	7,0,8		# subtract previous limbs with cy, set cy
-	stu	11,4(3)		# 
-	bdn	Loop		# decrement CTR and loop back
-
-Lend:	sfe	11,10,9		# subtract limbs with cy, set cy
-	st	7,4(3)		# 
-	st	11,8(3)		# 
-	sfe	3,0,0		# load !cy into ...
-	sfi	3,3,0		# ... return value register
-	br
diff --git a/sysdeps/rs6000/submul_1.s b/sysdeps/rs6000/submul_1.s
deleted file mode 100644
index 41095ab001..0000000000
--- a/sysdeps/rs6000/submul_1.s
+++ /dev/null
@@ -1,128 +0,0 @@
-# IBM POWER __mpn_submul_1 -- Multiply a limb vector with a limb and subtract
-# the result from a second limb vector.
-
-# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
-
-# This file is part of the GNU MP Library.
-
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 2.1 of the License, or (at your
-# option) any later version.
-
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-# License for more details.
-
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
-# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
-# MA 02111-1307, USA.
-
-
-# INPUT PARAMETERS
-# res_ptr	r3
-# s1_ptr	r4
-# size		r5
-# s2_limb	r6
-
-# The RS/6000 has no unsigned 32x32->64 bit multiplication instruction.  To
-# obtain that operation, we have to use the 32x32->64 signed multiplication
-# instruction, and add the appropriate compensation to the high limb of the
-# result.  We add the multiplicand if the multiplier has its most significant
-# bit set, and we add the multiplier if the multiplicand has its most
-# significant bit set.  We need to preserve the carry flag between each
-# iteration, so we have to compute the compensation carefully (the natural,
-# srai+and doesn't work).  Since the POWER architecture has a branch unit
-# we can branch in zero cycles, so that's how we perform the additions.
-
-	.toc
-	.csect .__mpn_submul_1[PR]
-	.align 2
-	.globl __mpn_submul_1
-	.globl .__mpn_submul_1
-	.csect __mpn_submul_1[DS]
-__mpn_submul_1:
-	.long .__mpn_submul_1[PR], TOC[tc0], 0
-	.csect .__mpn_submul_1[PR]
-.__mpn_submul_1:
-
-	cal	3,-4(3)
-	l	0,0(4)
-	cmpi	0,6,0
-	mtctr	5
-	mul	9,0,6
-	srai	7,0,31
-	and	7,7,6
-	mfmq	11
-	cax	9,9,7
-	l	7,4(3)
-	sf	8,11,7		# add res_limb
-	a	11,8,11		# invert cy (r11 is junk)
-	blt	Lneg
-Lpos:	bdz	Lend
-
-Lploop:	lu	0,4(4)
-	stu	8,4(3)
-	cmpi	0,0,0
-	mul	10,0,6
-	mfmq	0
-	ae	11,0,9		# low limb + old_cy_limb + old cy
-	l	7,4(3)
-	aze	10,10		# propagate cy to new cy_limb
-	sf	8,11,7		# add res_limb
-	a	11,8,11		# invert cy (r11 is junk)
-	bge	Lp0
-	cax	10,10,6		# adjust high limb for negative limb from s1
-Lp0:	bdz	Lend0
-	lu	0,4(4)
-	stu	8,4(3)
-	cmpi	0,0,0
-	mul	9,0,6
-	mfmq	0
-	ae	11,0,10
-	l	7,4(3)
-	aze	9,9
-	sf	8,11,7
-	a	11,8,11		# invert cy (r11 is junk)
-	bge	Lp1
-	cax	9,9,6		# adjust high limb for negative limb from s1
-Lp1:	bdn	Lploop
-
-	b	Lend
-
-Lneg:	cax	9,9,0
-	bdz	Lend
-Lnloop:	lu	0,4(4)
-	stu	8,4(3)
-	cmpi	0,0,0
-	mul	10,0,6
-	mfmq	7
-	ae	11,7,9
-	l	7,4(3)
-	ae	10,10,0		# propagate cy to new cy_limb
-	sf	8,11,7		# add res_limb
-	a	11,8,11		# invert cy (r11 is junk)
-	bge	Ln0
-	cax	10,10,6		# adjust high limb for negative limb from s1
-Ln0:	bdz	Lend0
-	lu	0,4(4)
-	stu	8,4(3)
-	cmpi	0,0,0
-	mul	9,0,6
-	mfmq	7
-	ae	11,7,10
-	l	7,4(3)
-	ae	9,9,0		# propagate cy to new cy_limb
-	sf	8,11,7		# add res_limb
-	a	11,8,11		# invert cy (r11 is junk)
-	bge	Ln1
-	cax	9,9,6		# adjust high limb for negative limb from s1
-Ln1:	bdn	Lnloop
-	b	Lend
-
-Lend0:	cal	9,0(10)
-Lend:	st	8,4(3)
-	aze	3,9
-	br
diff --git a/sysdeps/sparc/fpu/fraiseexcpt.c b/sysdeps/sparc/fpu/fraiseexcpt.c
index 0d45ec82d2..cbb8be80ec 100644
--- a/sysdeps/sparc/fpu/fraiseexcpt.c
+++ b/sysdeps/sparc/fpu/fraiseexcpt.c
@@ -25,12 +25,12 @@
 int
 __feraiseexcept (int excepts)
 {
-  static volatile double sink;
   static const struct {
     double zero, one, max, min, sixteen, pi;
   } c = {
     0.0, 1.0, DBL_MAX, DBL_MIN, 16.0, M_PI
   };
+  double d;
 
   /* Raise exceptions represented by EXPECTS.  But we must raise only
      one signal at a time.  It is important the if the overflow/underflow
@@ -39,24 +39,44 @@ __feraiseexcept (int excepts)
 
   /* First: invalid exception.  */
   if ((FE_INVALID & excepts) != 0)
-    /* One example of a invalid operation is 0/0.  */
-    sink = c.zero / c.zero;
+    {
+      /* One example of a invalid operation is 0/0.  */
+      __asm ("" : "=e" (d) : "0" (c.zero));
+      d /= c.zero;
+      __asm __volatile ("" : : "e" (d));
+    }
 
   /* Next: division by zero.  */
   if ((FE_DIVBYZERO & excepts) != 0)
-    sink = c.one / c.zero;
+    {
+      __asm ("" : "=e" (d) : "0" (c.one));
+      d /= c.zero;
+      __asm __volatile ("" : : "e" (d));
+    }
 
   /* Next: overflow.  */
   if ((FE_OVERFLOW & excepts) != 0)
-    sink = c.max * c.max;
+    {
+      __asm ("" : "=e" (d) : "0" (c.max));
+      d *= d;
+      __asm __volatile ("" : : "e" (d));
+    }
 
   /* Next: underflow.  */
   if ((FE_UNDERFLOW & excepts) != 0)
-    sink = c.min / c.sixteen;
+    {
+      __asm ("" : "=e" (d) : "0" (c.min));
+      d /= c.sixteen;
+      __asm __volatile ("" : : "e" (d));
+    }
 
   /* Last: inexact.  */
   if ((FE_INEXACT & excepts) != 0)
-    sink = c.one / c.pi;
+    {
+      __asm ("" : "=e" (d) : "0" (c.one));
+      d /= c.pi;
+      __asm __volatile ("" : : "e" (d));
+    }
 
   /* Success.  */
   return 0;
diff --git a/sysdeps/sparc/sparc32/fpu/libm-test-ulps b/sysdeps/sparc/sparc32/fpu/libm-test-ulps
index 40d563971a..ccf53788a6 100644
--- a/sysdeps/sparc/sparc32/fpu/libm-test-ulps
+++ b/sysdeps/sparc/sparc32/fpu/libm-test-ulps
@@ -465,6 +465,11 @@ ifloat: 2
 ildouble: 1
 ldouble: 1
 
+# exp2
+Test "exp2 (10) == 1024":
+ildouble: 2
+ldouble: 2
+
 # expm1
 Test "expm1 (0.75) == 1.11700001661267466854536981983709561":
 double: 1
@@ -1192,6 +1197,10 @@ ifloat: 2
 ildouble: 1
 ldouble: 1
 
+Function: "exp2":
+ildouble: 2
+ldouble: 2
+
 Function: "expm1":
 double: 1
 float: 1
diff --git a/sysdeps/sparc/sparc32/sparcv9v/memcpy.S b/sysdeps/sparc/sparc32/sparcv9v/memcpy.S
new file mode 100644
index 0000000000..4c05f57bc2
--- /dev/null
+++ b/sysdeps/sparc/sparc32/sparcv9v/memcpy.S
@@ -0,0 +1,2 @@
+#define XCC icc
+#include <sparc64/sparcv9v/memcpy.S>
diff --git a/sysdeps/sparc/sparc32/sparcv9v/memset.S b/sysdeps/sparc/sparc32/sparcv9v/memset.S
new file mode 100644
index 0000000000..5e46c7489f
--- /dev/null
+++ b/sysdeps/sparc/sparc32/sparcv9v/memset.S
@@ -0,0 +1,2 @@
+#define XCC icc
+#include <sparc64/sparcv9v/memset.S>
diff --git a/sysdeps/sparc/sparc64/fpu/libm-test-ulps b/sysdeps/sparc/sparc64/fpu/libm-test-ulps
index 5719a7ca54..db5543e9eb 100644
--- a/sysdeps/sparc/sparc64/fpu/libm-test-ulps
+++ b/sysdeps/sparc/sparc64/fpu/libm-test-ulps
@@ -465,6 +465,11 @@ ifloat: 2
 ildouble: 1
 ldouble: 1
 
+# exp2
+Test "exp2 (10) == 1024":
+ildouble: 2
+ldouble: 2
+
 # expm1
 Test "expm1 (0.75) == 1.11700001661267466854536981983709561":
 double: 1
@@ -1192,6 +1197,10 @@ ifloat: 2
 ildouble: 1
 ldouble: 1
 
+Function: "exp2":
+ildouble: 2
+ldouble: 2
+
 Function: "expm1":
 double: 1
 float: 1
diff --git a/sysdeps/sparc/sparc64/sparcv9v/memcpy.S b/sysdeps/sparc/sparc64/sparcv9v/memcpy.S
new file mode 100644
index 0000000000..05c837fa25
--- /dev/null
+++ b/sysdeps/sparc/sparc64/sparcv9v/memcpy.S
@@ -0,0 +1,593 @@
+/* Copy SIZE bytes from SRC to DEST.  For SUN4V Niagara.
+   Copyright (C) 2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by David S. Miller (davem@davemloft.net)
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+
+#define ASI_BLK_INIT_QUAD_LDD_P	0xe2
+#define ASI_P			0x80
+#define ASI_PNF			0x82
+
+#define LOAD(type,addr,dest)	type##a [addr] ASI_P, dest
+#define LOAD_TWIN(addr_reg,dest0,dest1)	\
+	ldda [addr_reg] ASI_BLK_INIT_QUAD_LDD_P, dest0
+
+#define STORE(type,src,addr)	type src, [addr]
+#define STORE_INIT(src,addr)	stxa src, [addr] %asi
+
+#ifndef XCC
+#define USE_BPR
+#define XCC xcc
+#endif
+
+	.register	%g2,#scratch
+	.register	%g3,#scratch
+	.register	%g6,#scratch
+
+	.text
+	.align		32
+
+ENTRY(bcopy)
+	sub		%o1, %o0, %o4
+	mov		%o0, %g4
+	cmp		%o4, %o2
+	mov		%o1, %o0
+	bgeu,pt		%XCC, 100f
+	 mov		%g4, %o1
+#ifndef USE_BPR
+	srl		%o2, 0, %o2
+#endif
+	brnz,pn		%o2, 220f
+	 add		%o0, %o2, %o0
+	retl
+	 nop
+END(bcopy)
+
+	.align		32
+ENTRY(memcpy)
+100:	/* %o0=dst, %o1=src, %o2=len */
+	mov		%o0, %g5
+	cmp		%o2, 0
+	be,pn		%XCC, 85f
+218:	 or		%o0, %o1, %o3
+	cmp		%o2, 16
+	blu,a,pn	%XCC, 80f
+	 or		%o3, %o2, %o3
+
+	/* 2 blocks (128 bytes) is the minimum we can do the block
+	 * copy with.  We need to ensure that we'll iterate at least
+	 * once in the block copy loop.  At worst we'll need to align
+	 * the destination to a 64-byte boundary which can chew up
+	 * to (64 - 1) bytes from the length before we perform the
+	 * block copy loop.
+	 */
+	cmp		%o2, (2 * 64)
+	blu,pt		%XCC, 70f
+	 andcc		%o3, 0x7, %g0
+
+	/* %o0:	dst
+	 * %o1:	src
+	 * %o2:	len  (known to be >= 128)
+	 *
+	 * The block copy loops will use %o4/%o5,%g2/%g3 as
+	 * temporaries while copying the data.
+	 */
+
+	LOAD(prefetch, %o1, #one_read)
+	wr		%g0, ASI_BLK_INIT_QUAD_LDD_P, %asi
+
+	/* Align destination on 64-byte boundary.  */
+	andcc		%o0, (64 - 1), %o4
+	be,pt		%XCC, 2f
+	 sub		%o4, 64, %o4
+	sub		%g0, %o4, %o4	! bytes to align dst
+	sub		%o2, %o4, %o2
+1:	subcc		%o4, 1, %o4
+	LOAD(ldub, %o1, %g1)
+	STORE(stb, %g1, %o0)
+	add		%o1, 1, %o1
+	bne,pt		%XCC, 1b
+	add		%o0, 1, %o0
+
+	/* If the source is on a 16-byte boundary we can do
+	 * the direct block copy loop.  If it is 8-byte aligned
+	 * we can do the 16-byte loads offset by -8 bytes and the
+	 * init stores offset by one register.
+	 *
+	 * If the source is not even 8-byte aligned, we need to do
+	 * shifting and masking (basically integer faligndata).
+	 *
+	 * The careful bit with init stores is that if we store
+	 * to any part of the cache line we have to store the whole
+	 * cacheline else we can end up with corrupt L2 cache line
+	 * contents.  Since the loop works on 64-bytes of 64-byte
+	 * aligned store data at a time, this is easy to ensure.
+	 */
+2:
+	andcc		%o1, (16 - 1), %o4
+	andn		%o2, (64 - 1), %g1	! block copy loop iterator
+	sub		%o2, %g1, %o2		! final sub-block copy bytes
+	be,pt		%XCC, 50f
+	 cmp		%o4, 8
+	be,a,pt		%XCC, 10f
+	 sub		%o1, 0x8, %o1
+
+	/* Neither 8-byte nor 16-byte aligned, shift and mask.  */
+	mov		%g1, %o4
+	and		%o1, 0x7, %g1
+	sll		%g1, 3, %g1
+	mov		64, %o3
+	andn		%o1, 0x7, %o1
+	LOAD(ldx, %o1, %g2)
+	sub		%o3, %g1, %o3
+	sllx		%g2, %g1, %g2
+
+#define SWIVEL_ONE_DWORD(SRC, TMP1, TMP2, PRE_VAL, PRE_SHIFT, POST_SHIFT, DST)\
+	LOAD(ldx, SRC, TMP1); \
+	srlx		TMP1, PRE_SHIFT, TMP2; \
+	or		TMP2, PRE_VAL, TMP2; \
+	STORE_INIT(TMP2, DST); \
+	sllx		TMP1, POST_SHIFT, PRE_VAL;
+
+1:	add		%o1, 0x8, %o1
+	SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x00)
+	add		%o1, 0x8, %o1
+	SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x08)
+	add		%o1, 0x8, %o1
+	SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x10)
+	add		%o1, 0x8, %o1
+	SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x18)
+	add		%o1, 32, %o1
+	LOAD(prefetch, %o1, #one_read)
+	sub		%o1, 32 - 8, %o1
+	SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x20)
+	add		%o1, 8, %o1
+	SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x28)
+	add		%o1, 8, %o1
+	SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x30)
+	add		%o1, 8, %o1
+	SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x38)
+	subcc		%o4, 64, %o4
+	bne,pt		%XCC, 1b
+	 add		%o0, 64, %o0
+
+#undef SWIVEL_ONE_DWORD
+
+	srl		%g1, 3, %g1
+	ba,pt		%XCC, 60f
+	 add		%o1, %g1, %o1
+
+10:	/* Destination is 64-byte aligned, source was only 8-byte
+	 * aligned but it has been subtracted by 8 and we perform
+	 * one twin load ahead, then add 8 back into source when
+	 * we finish the loop.
+	 */
+	LOAD_TWIN(%o1, %o4, %o5)
+1:	add		%o1, 16, %o1
+	LOAD_TWIN(%o1, %g2, %g3)
+	add		%o1, 16 + 32, %o1
+	LOAD(prefetch, %o1, #one_read)
+	sub		%o1, 32, %o1
+	STORE_INIT(%o5, %o0 + 0x00)		! initializes cache line
+	STORE_INIT(%g2, %o0 + 0x08)
+	LOAD_TWIN(%o1, %o4, %o5)
+	add		%o1, 16, %o1
+	STORE_INIT(%g3, %o0 + 0x10)
+	STORE_INIT(%o4, %o0 + 0x18)
+	LOAD_TWIN(%o1, %g2, %g3)
+	add		%o1, 16, %o1
+	STORE_INIT(%o5, %o0 + 0x20)
+	STORE_INIT(%g2, %o0 + 0x28)
+	LOAD_TWIN(%o1, %o4, %o5)
+	STORE_INIT(%g3, %o0 + 0x30)
+	STORE_INIT(%o4, %o0 + 0x38)
+	subcc		%g1, 64, %g1
+	bne,pt		%XCC, 1b
+	 add		%o0, 64, %o0
+
+	ba,pt		%XCC, 60f
+	 add		%o1, 0x8, %o1
+
+50:	/* Destination is 64-byte aligned, and source is 16-byte
+	 * aligned.
+	 */
+1:	LOAD_TWIN(%o1, %o4, %o5)
+	add	%o1, 16, %o1
+	LOAD_TWIN(%o1, %g2, %g3)
+	add	%o1, 16 + 32, %o1
+	LOAD(prefetch, %o1, #one_read)
+	sub	%o1, 32, %o1
+	STORE_INIT(%o4, %o0 + 0x00)		! initializes cache line
+	STORE_INIT(%o5, %o0 + 0x08)
+	LOAD_TWIN(%o1, %o4, %o5)
+	add	%o1, 16, %o1
+	STORE_INIT(%g2, %o0 + 0x10)
+	STORE_INIT(%g3, %o0 + 0x18)
+	LOAD_TWIN(%o1, %g2, %g3)
+	add	%o1, 16, %o1
+	STORE_INIT(%o4, %o0 + 0x20)
+	STORE_INIT(%o5, %o0 + 0x28)
+	STORE_INIT(%g2, %o0 + 0x30)
+	STORE_INIT(%g3, %o0 + 0x38)
+	subcc	%g1, 64, %g1
+	bne,pt	%XCC, 1b
+	 add	%o0, 64, %o0
+	/* fall through */
+
+60:
+	/* %o2 contains any final bytes still needed to be copied
+	 * over. If anything is left, we copy it one byte at a time.
+	 */
+	wr		%g0, ASI_PNF, %asi
+	brz,pt		%o2, 85f
+	 sub		%o0, %o1, %o3
+	ba,a,pt		%XCC, 90f
+
+	.align		64
+70: /* 16 < len <= 64 */
+	bne,pn		%XCC, 75f
+	 sub		%o0, %o1, %o3
+
+72:
+	andn		%o2, 0xf, %o4
+	and		%o2, 0xf, %o2
+1:	subcc		%o4, 0x10, %o4
+	LOAD(ldx, %o1, %o5)
+	add		%o1, 0x08, %o1
+	LOAD(ldx, %o1, %g1)
+	sub		%o1, 0x08, %o1
+	STORE(stx, %o5, %o1 + %o3)
+	add		%o1, 0x8, %o1
+	STORE(stx, %g1, %o1 + %o3)
+	bgu,pt		%XCC, 1b
+	 add		%o1, 0x8, %o1
+73:	andcc		%o2, 0x8, %g0
+	be,pt		%XCC, 1f
+	 nop
+	sub		%o2, 0x8, %o2
+	LOAD(ldx, %o1, %o5)
+	STORE(stx, %o5, %o1 + %o3)
+	add		%o1, 0x8, %o1
+1:	andcc		%o2, 0x4, %g0
+	be,pt		%XCC, 1f
+	 nop
+	sub		%o2, 0x4, %o2
+	LOAD(lduw, %o1, %o5)
+	STORE(stw, %o5, %o1 + %o3)
+	add		%o1, 0x4, %o1
+1:	cmp		%o2, 0
+	be,pt		%XCC, 85f
+	 nop
+	ba,pt		%XCC, 90f
+	 nop
+
+75:
+	andcc		%o0, 0x7, %g1
+	sub		%g1, 0x8, %g1
+	be,pn		%icc, 2f
+	 sub		%g0, %g1, %g1
+	sub		%o2, %g1, %o2
+
+1:	subcc		%g1, 1, %g1
+	LOAD(ldub, %o1, %o5)
+	STORE(stb, %o5, %o1 + %o3)
+	bgu,pt		%icc, 1b
+	 add		%o1, 1, %o1
+
+2:	add		%o1, %o3, %o0
+	andcc		%o1, 0x7, %g1
+	bne,pt		%icc, 8f
+	 sll		%g1, 3, %g1
+
+	cmp		%o2, 16
+	bgeu,pt		%icc, 72b
+	 nop
+	ba,a,pt		%XCC, 73b
+
+8:	mov		64, %o3
+	andn		%o1, 0x7, %o1
+	LOAD(ldx, %o1, %g2)
+	sub		%o3, %g1, %o3
+	andn		%o2, 0x7, %o4
+	sllx		%g2, %g1, %g2
+1:	add		%o1, 0x8, %o1
+	LOAD(ldx, %o1, %g3)
+	subcc		%o4, 0x8, %o4
+	srlx		%g3, %o3, %o5
+	or		%o5, %g2, %o5
+	STORE(stx, %o5, %o0)
+	add		%o0, 0x8, %o0
+	bgu,pt		%icc, 1b
+	 sllx		%g3, %g1, %g2
+
+	srl		%g1, 3, %g1
+	andcc		%o2, 0x7, %o2
+	be,pn		%icc, 85f
+	 add		%o1, %g1, %o1
+	ba,pt		%XCC, 90f
+	 sub		%o0, %o1, %o3
+
+	.align		64
+80: /* 0 < len <= 16 */
+	andcc		%o3, 0x3, %g0
+	bne,pn		%XCC, 90f
+	 sub		%o0, %o1, %o3
+
+1:
+	subcc		%o2, 4, %o2
+	LOAD(lduw, %o1, %g1)
+	STORE(stw, %g1, %o1 + %o3)
+	bgu,pt		%XCC, 1b
+	 add		%o1, 4, %o1
+
+85:	retl
+	 mov		%g5, %o0
+
+	.align		32
+90:
+	subcc		%o2, 1, %o2
+	LOAD(ldub, %o1, %g1)
+	STORE(stb, %g1, %o1 + %o3)
+	bgu,pt		%XCC, 90b
+	 add		%o1, 1, %o1
+	retl
+	 mov		%g5, %o0
+
+END(memcpy)
+
+#define RMOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3)	\
+	ldx		[%src - offset - 0x20], %t0; 		\
+	ldx		[%src - offset - 0x18], %t1; 		\
+	ldx		[%src - offset - 0x10], %t2; 		\
+	ldx		[%src - offset - 0x08], %t3; 		\
+	stw		%t0, [%dst - offset - 0x1c]; 		\
+	srlx		%t0, 32, %t0;				\
+	stw		%t0, [%dst - offset - 0x20]; 		\
+	stw		%t1, [%dst - offset - 0x14]; 		\
+	srlx		%t1, 32, %t1;				\
+	stw		%t1, [%dst - offset - 0x18]; 		\
+	stw		%t2, [%dst - offset - 0x0c]; 		\
+	srlx		%t2, 32, %t2;				\
+	stw		%t2, [%dst - offset - 0x10]; 		\
+	stw		%t3, [%dst - offset - 0x04];		\
+	srlx		%t3, 32, %t3;				\
+	stw		%t3, [%dst - offset - 0x08];
+
+#define RMOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3)	\
+	ldx		[%src - offset - 0x20], %t0; 		\
+	ldx		[%src - offset - 0x18], %t1; 		\
+	ldx		[%src - offset - 0x10], %t2; 		\
+	ldx		[%src - offset - 0x08], %t3; 		\
+	stx		%t0, [%dst - offset - 0x20]; 		\
+	stx		%t1, [%dst - offset - 0x18]; 		\
+	stx		%t2, [%dst - offset - 0x10]; 		\
+	stx		%t3, [%dst - offset - 0x08];		\
+	ldx		[%src - offset - 0x40], %t0; 		\
+	ldx		[%src - offset - 0x38], %t1; 		\
+	ldx		[%src - offset - 0x30], %t2; 		\
+	ldx		[%src - offset - 0x28], %t3; 		\
+	stx		%t0, [%dst - offset - 0x40]; 		\
+	stx		%t1, [%dst - offset - 0x38]; 		\
+	stx		%t2, [%dst - offset - 0x30]; 		\
+	stx		%t3, [%dst - offset - 0x28];
+
+#define RMOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3)	\
+	ldx		[%src + offset + 0x00], %t0;		\
+	ldx		[%src + offset + 0x08], %t1; 		\
+	stw		%t0, [%dst + offset + 0x04]; 		\
+	srlx		%t0, 32, %t2;				\
+	stw		%t2, [%dst + offset + 0x00]; 		\
+	stw		%t1, [%dst + offset + 0x0c]; 		\
+	srlx		%t1, 32, %t3;				\
+	stw		%t3, [%dst + offset + 0x08];
+
+#define RMOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1)		\
+	ldx		[%src + offset + 0x00], %t0; 		\
+	ldx		[%src + offset + 0x08], %t1; 		\
+	stx		%t0, [%dst + offset + 0x00]; 		\
+	stx		%t1, [%dst + offset + 0x08];
+
+	.align		32
+228:	andcc		%o2, 1, %g0
+	be,pt		%icc, 2f+4
+1:	 ldub		[%o1 - 1], %o5
+	sub		%o1, 1, %o1
+	sub		%o0, 1, %o0
+	subcc		%o2, 1, %o2
+	be,pn		%xcc, 229f
+	 stb		%o5, [%o0]
+2:	ldub		[%o1 - 1], %o5
+	sub		%o0, 2, %o0
+	ldub		[%o1 - 2], %g5
+	sub		%o1, 2, %o1
+	subcc		%o2, 2, %o2
+	stb		%o5, [%o0 + 1]
+	bne,pt		%xcc, 2b
+	 stb		%g5, [%o0]
+229:	retl
+	 mov		%g4, %o0
+out:	retl
+	 mov		%g5, %o0
+
+	.align		32
+ENTRY(memmove)
+	mov		%o0, %g5
+#ifndef USE_BPR
+	srl		%o2, 0, %o2
+#endif
+	brz,pn		%o2, out
+	 sub		%o0, %o1, %o4
+	cmp		%o4, %o2
+	bgeu,pt		%XCC, 218b
+	 mov		%o0, %g4
+	add		%o0, %o2, %o0
+220:	add		%o1, %o2, %o1
+	cmp		%o2, 15
+	bleu,pn		%xcc, 228b
+	 andcc		%o0, 7, %g2
+	sub		%o0, %o1, %g5
+	andcc		%g5, 3, %o5
+	bne,pn		%xcc, 232f
+	 andcc		%o1, 3, %g0
+	be,a,pt		%xcc, 236f
+	 andcc		%o1, 4, %g0
+	andcc		%o1, 1, %g0
+	be,pn		%xcc, 4f
+	 andcc		%o1, 2, %g0
+	ldub		[%o1 - 1], %g2
+	sub		%o1, 1, %o1
+	sub		%o0, 1, %o0
+	sub		%o2, 1, %o2
+	be,pn		%xcc, 5f
+	 stb		%g2, [%o0]
+4:	lduh		[%o1 - 2], %g2
+	sub		%o1, 2, %o1
+	sub		%o0, 2, %o0
+	sub		%o2, 2, %o2
+	sth		%g2, [%o0]
+5:	andcc		%o1, 4, %g0
+236:	be,a,pn		%xcc, 2f
+	 andcc		%o2, -128, %g6
+	lduw		[%o1 - 4], %g5
+	sub		%o1, 4, %o1
+	sub		%o0, 4, %o0
+	sub		%o2, 4, %o2
+	stw		%g5, [%o0]
+	andcc		%o2, -128, %g6
+2:	be,pn		%xcc, 235f
+	 andcc		%o0, 4, %g0
+	be,pn		%xcc, 282f + 4
+5:	RMOVE_BIGCHUNK(o1, o0, 0x00, g1, g3, g5, o5)
+	RMOVE_BIGCHUNK(o1, o0, 0x20, g1, g3, g5, o5)
+	RMOVE_BIGCHUNK(o1, o0, 0x40, g1, g3, g5, o5)
+	RMOVE_BIGCHUNK(o1, o0, 0x60, g1, g3, g5, o5)
+	subcc		%g6, 128, %g6
+	sub		%o1, 128, %o1
+	bne,pt		%xcc, 5b
+	 sub		%o0, 128, %o0
+235:	andcc		%o2, 0x70, %g6
+41:	be,pn		%xcc, 280f
+	 andcc		%o2, 8, %g0
+
+279:	rd		%pc, %o5
+	sll		%g6, 1, %g5
+	sub		%o1, %g6, %o1
+	sub		%o5, %g5, %o5
+	jmpl		%o5 + %lo(280f - 279b), %g0
+	 sub		%o0, %g6, %o0
+	RMOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g5, o5)
+	RMOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g5, o5)
+	RMOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g5, o5)
+	RMOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g5, o5)
+	RMOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g5, o5)
+	RMOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g5, o5)
+	RMOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g5, o5)
+280:	be,pt		%xcc, 281f
+	 andcc		%o2, 4, %g0
+	ldx		[%o1 - 8], %g2
+	sub		%o0, 8, %o0
+	stw		%g2, [%o0 + 4]
+	sub		%o1, 8, %o1
+	srlx		%g2, 32, %g2
+	stw		%g2, [%o0]
+281:	be,pt		%xcc, 1f
+	 andcc		%o2, 2, %g0
+	lduw		[%o1 - 4], %g2
+	sub		%o1, 4, %o1
+	stw		%g2, [%o0 - 4]
+	sub		%o0, 4, %o0
+1:	be,pt		%xcc, 1f
+	 andcc		%o2, 1, %g0
+	lduh		[%o1 - 2], %g2
+	sub		%o1, 2, %o1
+	sth		%g2, [%o0 - 2]
+	sub		%o0, 2, %o0
+1:	be,pt		%xcc, 211f
+	 nop
+	ldub		[%o1 - 1], %g2
+	stb		%g2, [%o0 - 1]
+211:	retl
+	 mov		%g4, %o0
+
+282:	RMOVE_BIGALIGNCHUNK(o1, o0, 0x00, g1, g3, g5, o5)
+	RMOVE_BIGALIGNCHUNK(o1, o0, 0x40, g1, g3, g5, o5)
+	subcc		%g6, 128, %g6
+	sub		%o1, 128, %o1
+	bne,pt		%xcc, 282b
+	 sub		%o0, 128, %o0
+	andcc		%o2, 0x70, %g6
+	be,pn		%xcc, 284f
+	 andcc		%o2, 8, %g0
+
+283:	rd		%pc, %o5
+	sub		%o1, %g6, %o1
+	sub		%o5, %g6, %o5
+	jmpl		%o5 + %lo(284f - 283b), %g0
+	 sub		%o0, %g6, %o0
+	RMOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3)
+	RMOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3)
+	RMOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3)
+	RMOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3)
+	RMOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3)
+	RMOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3)
+	RMOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3)
+284:	be,pt		%xcc, 285f
+	 andcc		%o2, 4, %g0
+	ldx		[%o1 - 8], %g2
+	sub		%o0, 8, %o0
+	sub		%o1, 8, %o1
+	stx		%g2, [%o0]
+285:	be,pt		%xcc, 1f
+	 andcc		%o2, 2, %g0
+	lduw		[%o1 - 4], %g2
+	sub		%o0, 4, %o0
+	sub		%o1, 4, %o1
+	stw		%g2, [%o0]
+1:	be,pt		%xcc, 1f
+	 andcc		%o2, 1, %g0
+	lduh		[%o1 - 2], %g2
+	sub		%o0, 2, %o0
+	sub		%o1, 2, %o1
+	sth		%g2, [%o0]
+1:	be,pt		%xcc, 1f
+	 nop
+	ldub		[%o1 - 1], %g2
+	stb		%g2, [%o0 - 1]
+1:	retl
+	 mov		%g4, %o0
+
+232:	ldub		[%o1 - 1], %g5
+	sub		%o1, 1, %o1
+	sub		%o0, 1, %o0
+	subcc		%o2, 1, %o2
+	bne,pt		%xcc, 232b
+	 stb		%g5, [%o0]
+234:	retl
+	 mov		%g4, %o0
+END(memmove)
+
+#ifdef USE_BPR
+weak_alias (memcpy, __align_cpy_1)
+weak_alias (memcpy, __align_cpy_2)
+weak_alias (memcpy, __align_cpy_4)
+weak_alias (memcpy, __align_cpy_8)
+weak_alias (memcpy, __align_cpy_16)
+#endif
+libc_hidden_builtin_def (memcpy)
+libc_hidden_builtin_def (memmove)
diff --git a/sysdeps/sparc/sparc64/sparcv9v/memset.S b/sysdeps/sparc/sparc64/sparcv9v/memset.S
new file mode 100644
index 0000000000..7a51ef77dc
--- /dev/null
+++ b/sysdeps/sparc/sparc64/sparcv9v/memset.S
@@ -0,0 +1,127 @@
+/* Set a block of memory to some byte value.  For SUN4V Niagara.
+   Copyright (C) 2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by David S. Miller (davem@davemloft.net)
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+
+#define ASI_BLK_INIT_QUAD_LDD_P	0xe2
+#define ASI_P			0x80
+#define ASI_PNF			0x82
+
+#ifndef XCC
+#define USE_BPR
+#define XCC xcc
+#endif
+
+	.register	%g2,#scratch
+
+	.text
+	.align		32
+
+ENTRY(memset)
+	/* %o0=buf, %o1=pat, %o2=len */
+	and		%o1, 0xff, %o3
+	mov		%o2, %o1
+	sllx		%o3, 8, %g1
+	or		%g1, %o3, %o2
+	sllx		%o2, 16, %g1
+	or		%g1, %o2, %o2
+	sllx		%o2, 32, %g1
+	ba,pt		%XCC, 1f
+	 or		%g1, %o2, %o2
+
+ENTRY(__bzero)
+	clr		%o2
+1:	brz,pn		%o1, 90f
+	 mov		%o0, %o3
+
+	wr		%g0, ASI_P, %asi
+
+	cmp		%o1, 15
+	bl,pn		%icc, 70f
+	 andcc		%o0, 0x7, %g1
+	be,pt		%XCC, 2f
+	 mov		8, %g2
+	sub		%g2, %g1, %g1
+	sub		%o1, %g1, %o1
+1:	stba		%o2, [%o0 + 0x00] %asi
+	subcc		%g1, 1, %g1
+	bne,pt		%XCC, 1b
+	 add		%o0, 1, %o0
+2:	cmp		%o1, 128
+	bl,pn		%icc, 60f
+	 andcc		%o0, (64 - 1), %g1
+	be,pt		%XCC, 40f
+	 mov		64, %g2
+	sub		%g2, %g1, %g1
+	sub		%o1, %g1, %o1
+1:	stxa		%o2, [%o0 + 0x00] %asi
+	subcc		%g1, 8, %g1
+	bne,pt		%XCC, 1b
+	 add		%o0, 8, %o0
+
+40:
+	wr		%g0, ASI_BLK_INIT_QUAD_LDD_P, %asi
+	andn		%o1, (64 - 1), %g1
+	sub		%o1, %g1, %o1
+50:
+	stxa		%o2, [%o0 + 0x00] %asi
+	stxa		%o2, [%o0 + 0x08] %asi
+	stxa		%o2, [%o0 + 0x10] %asi
+	stxa		%o2, [%o0 + 0x18] %asi
+	stxa		%o2, [%o0 + 0x20] %asi
+	stxa		%o2, [%o0 + 0x28] %asi
+	stxa		%o2, [%o0 + 0x30] %asi
+	stxa		%o2, [%o0 + 0x38] %asi
+	subcc		%g1, 64, %g1
+	bne,pt		%XCC, 50b
+	 add		%o0, 64, %o0
+
+	wr		%g0, ASI_P, %asi
+	brz,pn		%o1, 80f
+60:
+	 andncc		%o1, 0x7, %g1
+	be,pn		%XCC, 2f
+	 sub		%o1, %g1, %o1
+1:	stxa		%o2, [%o0 + 0x00] %asi
+	subcc		%g1, 8, %g1
+	bne,pt		%XCC, 1b
+	 add		%o0, 8, %o0
+2:	brz,pt		%o1, 80f
+	 nop
+
+70:
+1:	stba		%o2, [%o0 + 0x00] %asi
+	subcc		%o1, 1, %o1
+	bne,pt		%icc, 1b
+	 add		%o0, 1, %o0
+
+	/* fallthrough */
+
+80:
+	wr		%g0, ASI_PNF, %asi
+
+90:
+	retl
+	 mov		%o3, %o0
+END(__bzero)
+END(memset)
+
+libc_hidden_builtin_def (memset)
+weak_alias (__bzero, bzero)
diff --git a/sysdeps/unix/sysv/linux/i386/fxstatat.c b/sysdeps/unix/sysv/linux/i386/fxstatat.c
index b077435553..94f6e81186 100644
--- a/sysdeps/unix/sysv/linux/i386/fxstatat.c
+++ b/sysdeps/unix/sysv/linux/i386/fxstatat.c
@@ -172,5 +172,5 @@ libc_hidden_def (__fxstatat)
 #ifdef XSTAT_IS_XSTAT64
 # undef __fxstatat64
 strong_alias (__fxstatat, __fxstatat64);
-libc_hidden_def (__fxstatat64)
+libc_hidden_ver (__fxstatat, __fxstatat64)
 #endif
diff --git a/sysdeps/unix/sysv/linux/s390/s390-64/sysdep.h b/sysdeps/unix/sysv/linux/s390/s390-64/sysdep.h
index fc80c9ff86..9ddec8e041 100644
--- a/sysdeps/unix/sysv/linux/s390/s390-64/sysdep.h
+++ b/sysdeps/unix/sysv/linux/s390/s390-64/sysdep.h
@@ -268,7 +268,7 @@
 	register unsigned long gpr6 asm ("6") = (unsigned long)(arg5);
 #define DECLARGS_6(arg1, arg2, arg3, arg4, arg5, arg6) \
 	DECLARGS_5(arg1, arg2, arg3, arg4, arg5) \
-	register unsigned long gpr6 asm ("7") = (unsigned long)(arg6);
+	register unsigned long gpr7 asm ("7") = (unsigned long)(arg6);
 
 #define ASMFMT_0
 #define ASMFMT_1 , "0" (gpr2)
diff --git a/version.h b/version.h
index aec3a8f2b1..3eec34c09b 100644
--- a/version.h
+++ b/version.h
@@ -1,4 +1,4 @@
 /* This file just defines the current version number of libc.  */
 
 #define RELEASE "development"
-#define VERSION "2.3.90"
+#define VERSION "2.3.91"