about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog160
-rw-r--r--Makeconfig2
-rw-r--r--Makefile4
-rw-r--r--NEWS10
-rw-r--r--config.make.in2
-rwxr-xr-xconfigure34
-rw-r--r--configure.in17
-rw-r--r--elf/Makefile4
-rw-r--r--elf/dl-support.c7
-rw-r--r--elf/dl-sysdep.c3
-rw-r--r--elf/rtld.c2
-rw-r--r--fedora/branch.mk4
-rw-r--r--fedora/glibc.spec.in11
-rw-r--r--hurd/hurd/signal.h9
-rw-r--r--libio/fmemopen.c11
-rw-r--r--login/utmp_file.c25
-rw-r--r--manual/arith.texi6
-rw-r--r--manual/creature.texi2
-rw-r--r--manual/filesys.texi2
-rw-r--r--manual/math.texi2
-rw-r--r--manual/memory.texi2
-rw-r--r--manual/resource.texi4
-rw-r--r--manual/syslog.texi2
-rw-r--r--manual/time.texi2
-rw-r--r--nptl/ChangeLog9
-rw-r--r--nptl/allocatestack.c22
-rw-r--r--nptl/sysdeps/x86_64/pthreaddef.h10
-rw-r--r--nscd/connections.c19
-rw-r--r--resolv/res_mkquery.c9
-rw-r--r--scripts/gen-as-const.awk14
-rw-r--r--sysdeps/generic/ldsodefs.h5
-rw-r--r--sysdeps/i386/fpu/s_expm1l.S7
-rw-r--r--sysdeps/mach/i386/machine-lock.h8
-rw-r--r--sysdeps/posix/clock_getres.c4
-rw-r--r--sysdeps/powerpc/dl-procinfo.c10
-rw-r--r--sysdeps/powerpc/dl-procinfo.h10
-rw-r--r--sysdeps/powerpc/fpu/bits/mathinline.h10
-rw-r--r--sysdeps/powerpc/powerpc32/dl-machine.c5
-rw-r--r--sysdeps/powerpc/sysdep.h2
-rw-r--r--sysdeps/unix/sysv/linux/bits/in.h2
-rw-r--r--sysdeps/unix/sysv/linux/clock_getres.c6
-rw-r--r--sysdeps/unix/sysv/linux/dl-origin.c47
-rw-r--r--sysdeps/unix/sysv/linux/i386/bits/mman.h1
-rw-r--r--sysdeps/unix/sysv/linux/ifaddrs.c22
-rw-r--r--sysdeps/unix/sysv/linux/powerpc/bits/mathinline.h132
-rw-r--r--sysdeps/unix/sysv/linux/powerpc/bits/mman.h3
-rw-r--r--sysdeps/unix/sysv/linux/x86_64/bits/mman.h1
-rw-r--r--sysdeps/x86_64/fpu/s_expm1l.S7
-rw-r--r--sysdeps/x86_64/memset.S868
49 files changed, 857 insertions, 703 deletions
diff --git a/ChangeLog b/ChangeLog
index bca243a964..8bb37744d6 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,161 @@
+2008-08-26  Aurelien Jarno  <aurelien@aurel32.net>
+
+	[BZ #6860]
+        * hurd/hurd/signal.h (HURD_MSGPORT_RPC): Fix a typo.
+
+2008-07-18  Daniel Jacobowitz  <dan@codesourcery.com>
+
+	* Makefile (check-data): Check data directory in add-ons.
+	* elf/Makefile (check-data): Likewise.
+
+2008-08-18  Roland McGrath  <roland@redhat.com>
+
+	* configure.in (--with-cpu): Check compiler support for -march/-mcpu.
+	* configure: Regenerated.
+	* config.make.in (cflags-cpu): New substituted variable.
+	(with-cpu): Variable removed.
+	* Makeconfig (+cflags): Use $(cflags-cpu), not $(with-cpu).
+
+2008-08-14  Ryan S. Arnold  <rsa@us.ibm.com>
+
+	[BZ #6845]
+	* sysdeps/powerpc/fpu/bits/mathinline.h (__signbitl): Copy new
+	__signbitl definition and __LONG_DOUBLE_128__ guard from:
+	* sysdeps/unix/sysv/linux/powerpc/bits/mathinline.h: Remove as
+	redundant.  Functions which call floating point assembler operations
+	should go into a sysdeps powerpc/fpu directory.
+
+2008-08-15  Ulrich Drepper  <drepper@redhat.com>
+
+	* sysdeps/unix/sysv/linux/x86_64/bits/mman.h: Define MAP_STACK.
+	* sysdeps/unix/sysv/linux/i386/bits/mman.h: Likewise.
+
+2008-08-14  Ulrich Drepper  <drepper@redhat.com>
+
+	* sysdeps/x86_64/memset.S: Reduce size of tables for PIC.
+
+	* sysdeps/unix/sysv/linux/dl-origin.c (_dl_get_origin): Undo all
+	change related to AT_EXECFN.  We cannot use that string.
+	* sysdeps/generic/ldsodefs.h (struct rtld_global_ro): Remove
+	_dl_execfn member.
+	* elf/dl-support.c: Remove _dl_execfn variable.
+	(_dl_aux_init): Remove handling of AT_EXECFN.
+	* elf/dl-sysdep.c (_dl_sysdep_start): Remove handling of AT_EXECFN.
+	* elf/rtld.c (process_envvars): Remove use of __ASSUME_AT_EXECFN.
+
+2008-08-13  Ulrich Drepper  <drepper@redhat.com>
+
+	[BZ #6544]
+	* libio/fmemopen.c: Implement binary mode.  In this mode no NUL
+	byte gets added to writes and seeks from the end use the length of
+	the buffer and not the currently terminating NUL byte.
+
+	[BZ #6634]
+	* login/utmp_file.c (getutent_r_file): Take additional parameter.
+	Set to true if locking failed.
+	(getutid_r_file): Adjust caller.
+	(pututline_file): Likewise.  Return NULL in this case.
+	Patch mostly by halesh.s@gmail.com.
+
+2008-08-12  Ulrich Drepper  <drepper@redhat.com>
+
+	[BZ #6589]
+	* sysdeps/unix/sysv/linux/ifaddrs.c (struct sockaddr_ll_max):
+	Define.
+	(struct ifaddrs_storage): Use it instead of sockaddr_ll.
+
+2008-08-12  Ryan S. Arnold  <rsa@us.ibm.com>
+
+	[BZ #6839]
+	* sysdeps/powerpc/powerpc32/dl-machine.c (CHECK_STATIC_TLS): Remove
+	macro since it is now available in elf/dynamic-link.h.
+
+2008-08-12  Roland McGrath  <roland@frob.com>
+
+	* hurd/hurd/signal.h (HURD_MSGPORT_RPC): Avoid -Wparenthesis triggers,
+	in case used outside of libc.
+	Reported by Samuel Thibault <samuel.thibault@ens-lyon.org>
+
+2008-08-11  Ulrich Drepper  <drepper@redhat.com>
+
+	* resolv/res_mkquery.c (res_nmkquery): Call ns_name_compress
+	directly instead of going through dn_comp.
+
+2008-08-11  Aurelien Jarno  <aurel32@debian.org>
+
+	* sysdeps/mach/i386/machine-lock.h (__spin_unlock, __spin_try_lock):
+	Fix asm constraints.
+
+2008-08-07  Ulrich Drepper  <drepper@redhat.com>
+
+	[BZ #6790]
+	* sysdeps/unix/sysv/linux/bits/in.h (IP_PMTUDISC_PROBE): Define.
+	(IPV6_PMTUDISC_PROBE): Likewise.
+
+2008-08-07  Pete Eberlein  <eberlein@us.ibm.com>
+
+	[BZ #6791]
+	* sysdeps/unix/sysv/linux/powerpc/bits/mman.h (PROT_SAO): Define.
+
+2008-08-01  Steven Munroe  <sjmunroe@us.ibm.com>
+	    Carlos Eduardo Seo  <cseo@linux.vnet.ibm.com>
+
+	[BZ #6817]
+	* sysdeps/powerpc/dl-procinfo.c (_dl_powerpc_cap_flags):
+	Added the members 'vsx' and 'arch_2_06'.
+	(_dl_powerpc_platforms): Add the member 'power7'.
+	* sysdeps/powerpc/dl-procinfo.h: Modify _DL_HWCAP_FIRST
+	to reflect the changes required by VSX and ISA 2.06.
+	Modify _DL_PLATFORMS_COUNT to reflect the addition of
+	'power7'.
+	Defined PPC_PLATFORM_POWER7.
+	(_dl_string_platform): Add support for POWER7.
+	* sysdeps/powerpc/sysdep.h: Define bit masks for VSX
+	capability and ISA 2.06.
+
+2008-08-07  Ulrich Drepper  <drepper@redhat.com>
+
+	[BZ #6824]
+	* sysdeps/unix/sysv/linux/powerpc/bits/mathinline.h: Use correct
+	macro to detect use of 128 bit long double.
+	Patch by Ryan S. Arnold <rsa@us.ibm.com>.
+
+2008-08-05  Ulrich Drepper  <drepper@redhat.com>
+
+	* scripts/gen-as-const.awk: Use 32-bit values on 32-bit platforms.
+
+	[BZ #5794]
+	* sysdeps/i386/fpu/s_expm1l.S: Simply use exp implementation for large
+	parameters.
+	* sysdeps/x86_64/fpu/s_expm1l.S: Likewise.
+	Patch by Denys Vlasenko <dvlasenk@redhat.com>.
+
+2008-08-03  Ulrich Drepper  <drepper@redhat.com>
+
+	* nscd/connections.c (main_loop_poll): Pass a buffer which is
+	guaranteed to be large enough to read inotify event.  Ignore
+	EAGAIN error.  Better error message.  Add branch predicition.
+	(main_loop_epoll): Likewise.
+
+2008-08-02  Roland McGrath  <roland@frob.com>
+
+	* sysdeps/mach/i386/machine-lock.h (__spin_unlock, __spin_try_lock):
+	Add memory clobbers.
+
+2008-08-02  Ulrich Drepper  <drepper@redhat.com>
+
+	* manual/arith.texi: Avoid @strong{Note:}.
+	* manual/creature.texi: Likewise.
+	* manual/filesys.texi: Likewise.
+	* manual/math.texi: Likewise.
+	* manual/memory.texi: Likewise.
+	* manual/resource.texi: Likewise.
+	* manual/syslog.texi: Likewise.
+	* manual/time.texi: Likewise.
+
+	* sysdeps/posix/clock_getres.c (hp_timing_getres): Remove inline
+	to prevent warning.
+
 2008-08-01  Ulrich Drepper  <drepper@redhat.com>
 
 	* sysdeps/unix/sysv/linux/Versions: Fix changes due to old patch for
@@ -57,7 +215,7 @@
 	* sysdeps/posix/getaddrinfo.c (gaih_inet): Raise size of initial
 	buffer passed to NSS functions.
 
-	* nscd/connections.c (nscd_init): Type if preprocessor directive.
+	* nscd/connections.c (nscd_init): Typo in preprocessor directive.
 
 	* sysdeps/unix/sysv/linux/kernel-features.h (__ASSUME_PACCEPT):
 	Define.
diff --git a/Makeconfig b/Makeconfig
index a78615eb6a..83e14094c4 100644
--- a/Makeconfig
+++ b/Makeconfig
@@ -643,7 +643,7 @@ ifeq	"$(strip $(+cflags))" ""
 +cflags	:= $(default_cflags)
 endif	# $(+cflags) == ""
 
-+cflags += $(addprefix -mcpu=,$(with-cpu)) $(+gccwarn) $(+merge-constants)
++cflags += $(cflags-cpu) $(+gccwarn) $(+merge-constants)
 +gcc-nowarn := -w
 
 # Don't duplicate options if we inherited variables from the parent.
diff --git a/Makefile b/Makefile
index 437854ed33..8b405a4669 100644
--- a/Makefile
+++ b/Makefile
@@ -247,8 +247,8 @@ tests-clean:
 tests: $(objpfx)c++-types-check.out $(objpfx)check-local-headers.out
 ifneq ($(CXX),no)
 check-data := $(firstword $(wildcard \
-	        $(foreach D,$(add-ons) scripts/data,\
-	        	  $(patsubst %,$D/c++-types-%.data,\
+	        $(foreach D,$(add-ons) scripts,\
+	        	  $(patsubst %,$D/data/c++-types-%.data,\
 			   	     $(abi-name) \
 			   	     $(addsuffix -$(config-os),\
 				     		 $(config-machine) \
diff --git a/NEWS b/NEWS
index a8ad68bfd5..c3ceaba42e 100644
--- a/NEWS
+++ b/NEWS
@@ -1,4 +1,4 @@
-GNU C Library NEWS -- history of user-visible changes.  2008-8-1
+GNU C Library NEWS -- history of user-visible changes.  2008-8-2
 Copyright (C) 1992-2007, 2008 Free Software Foundation, Inc.
 See the end for copying conditions.
 
@@ -28,8 +28,16 @@ Version 2.9
 * Implement "e" option for popen to open file descriptor with the
   close-on-exec flag set.  Implemented by Ulrich Drepper.
 
+* Implement "b" mode for fmemopen.  In this mode writes writes don't
+  implicitly add a NUL byte and seeks from the end of the buffer really
+  use the buffer end, not the string length as the basis.
+  Implemented by Ulrich Drepper.
+
 * Many functions, exported and internal, now atomically set the close-on-exec
   flag when run on a sufficiently new kernel.  Implemented by Ulrich Drepper.
+
+* Sorting rules for some Indian languages (Kannada, Gurumukhi, Telugu)
+  Implemented by Pravin Satpute.
 
 Version 2.8
 
diff --git a/config.make.in b/config.make.in
index 3fd0abc65c..aa73466713 100644
--- a/config.make.in
+++ b/config.make.in
@@ -31,7 +31,7 @@ base-machine = @base_machine@
 config-vendor = @host_vendor@
 config-os = @host_os@
 config-sysdirs = @sysnames@
-with-cpu = @submachine@
+cflags-cpu = @libc_cv_cc_submachine@
 
 defines = @DEFINES@
 sysincludes = @SYSINCLUDES@
diff --git a/configure b/configure
index 6ffe5e64da..73f199689e 100755
--- a/configure
+++ b/configure
@@ -730,6 +730,7 @@ libc_cv_gnu89_inline
 libc_cv_have_initfini
 no_whole_archive
 exceptions
+libc_cv_cc_submachine
 LIBGD
 have_libaudit
 have_libcap
@@ -2329,7 +2330,6 @@ echo "$as_me: error: --with-cpu requires an argument" >&2;}
 fi
 
 
-
 # An add-on can set this when it wants to disable the sanity check below.
 libc_config_ok=no
 
@@ -7097,6 +7097,35 @@ _ACEOF
   fi
 fi
 
+if test -n "$submachine"; then
+  { echo "$as_me:$LINENO: checking for compiler option for CPU variant" >&5
+echo $ECHO_N "checking for compiler option for CPU variant... $ECHO_C" >&6; }
+if test "${libc_cv_cc_submachine+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+    libc_cv_cc_submachine=no
+  for opt in "-march=$submachine" "-mcpu=$submachine"; do
+    if { ac_try='${CC-cc} $opt -xc /dev/null -S -o /dev/null'
+  { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+      libc_cv_cc_submachine="$opt"
+      break
+    fi
+  done
+fi
+{ echo "$as_me:$LINENO: result: $libc_cv_cc_submachine" >&5
+echo "${ECHO_T}$libc_cv_cc_submachine" >&6; }
+  if test "x$libc_cv_cc_submachine" = xno; then
+    { { echo "$as_me:$LINENO: error: ${CC-cc} does not support $submachine" >&5
+echo "$as_me: error: ${CC-cc} does not support $submachine" >&2;}
+   { (exit 1); exit 1; }; }
+  fi
+fi
+
+
 { echo "$as_me:$LINENO: checking for libgd" >&5
 echo $ECHO_N "checking for libgd... $ECHO_C" >&6; }
 if test "$with_gd" != "no"; then
@@ -9264,6 +9293,7 @@ libc_cv_gnu89_inline!$libc_cv_gnu89_inline$ac_delim
 libc_cv_have_initfini!$libc_cv_have_initfini$ac_delim
 no_whole_archive!$no_whole_archive$ac_delim
 exceptions!$exceptions$ac_delim
+libc_cv_cc_submachine!$libc_cv_cc_submachine$ac_delim
 LIBGD!$LIBGD$ac_delim
 have_libaudit!$have_libaudit$ac_delim
 have_libcap!$have_libcap$ac_delim
@@ -9301,7 +9331,7 @@ LIBOBJS!$LIBOBJS$ac_delim
 LTLIBOBJS!$LTLIBOBJS$ac_delim
 _ACEOF
 
-  if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 52; then
+  if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 53; then
     break
   elif $ac_last_try; then
     { { echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5
diff --git a/configure.in b/configure.in
index cd08a23e29..1651d38c10 100644
--- a/configure.in
+++ b/configure.in
@@ -326,7 +326,6 @@ AC_ARG_WITH([cpu],
   esac
 ])
 
-
 # An add-on can set this when it wants to disable the sanity check below.
 libc_config_ok=no
 
@@ -1973,6 +1972,22 @@ EOF
   fi
 fi
 
+if test -n "$submachine"; then
+  AC_CACHE_CHECK([for compiler option for CPU variant],
+  		 libc_cv_cc_submachine, [dnl
+  libc_cv_cc_submachine=no
+  for opt in "-march=$submachine" "-mcpu=$submachine"; do
+    if AC_TRY_COMMAND([${CC-cc} $opt -xc /dev/null -S -o /dev/null]); then
+      libc_cv_cc_submachine="$opt"
+      break
+    fi
+  done])
+  if test "x$libc_cv_cc_submachine" = xno; then
+    AC_MSG_ERROR([${CC-cc} does not support $submachine])
+  fi
+fi
+AC_SUBST(libc_cv_cc_submachine)
+
 dnl Check whether we have the gd library available.
 AC_MSG_CHECKING(for libgd)
 if test "$with_gd" != "no"; then
diff --git a/elf/Makefile b/elf/Makefile
index b89528c0fa..c25a0c4b26 100644
--- a/elf/Makefile
+++ b/elf/Makefile
@@ -841,8 +841,8 @@ $(objpfx)tst-dlmodcount: $(libdl)
 $(objpfx)tst-dlmodcount.out: $(test-modules)
 
 check-data := $(firstword $(wildcard \
-	        $(foreach D,$(add-ons) scripts/data,\
-	        	  $(patsubst %,$(..)$D/localplt-%.data,\
+	        $(foreach D,$(add-ons) scripts,\
+	        	  $(patsubst %,$(..)$D/data/localplt-%.data,\
 			   	     $(abi-name) \
 			   	     $(addsuffix -$(config-os),\
 				     		 $(config-machine) \
diff --git a/elf/dl-support.c b/elf/dl-support.c
index e5b74fb4d6..6bd573ec57 100644
--- a/elf/dl-support.c
+++ b/elf/dl-support.c
@@ -62,12 +62,8 @@ struct link_map *_dl_profile_map;
 /* This is the address of the last stack address ever used.  */
 void *__libc_stack_end;
 
-#ifndef __ASSUME_AT_EXECFN
 /* Path where the binary is found.  */
 const char *_dl_origin_path;
-#endif
-/* File Name of the executable.  */
-const char *_dl_execfn;
 
 /* Nonzero if runtime lookup should not update the .got/.plt.  */
 int _dl_bind_not;
@@ -220,9 +216,6 @@ _dl_aux_init (ElfW(auxv_t) *av)
 	__libc_enable_secure = av->a_un.a_val;
 	__libc_enable_secure_decided = 1;
 	break;
-      case AT_EXECFN:
-	GLRO(dl_execfn) = (void *) av->a_un.a_val;
-	break;
 # ifdef DL_PLATFORM_AUXV
       DL_PLATFORM_AUXV
 # endif
diff --git a/elf/dl-sysdep.c b/elf/dl-sysdep.c
index 622eeb626b..e6f4272a63 100644
--- a/elf/dl-sysdep.c
+++ b/elf/dl-sysdep.c
@@ -173,9 +173,6 @@ _dl_sysdep_start (void **start_argptr,
 	GLRO(dl_sysinfo_dso) = (void *) av->a_un.a_val;
 	break;
 #endif
-      case AT_EXECFN:
-	GLRO(dl_execfn) = (void *) av->a_un.a_val;
-	break;
 #ifdef DL_PLATFORM_AUXV
       DL_PLATFORM_AUXV
 #endif
diff --git a/elf/rtld.c b/elf/rtld.c
index 3f2267af0e..46bece7fa3 100644
--- a/elf/rtld.c
+++ b/elf/rtld.c
@@ -2579,12 +2579,10 @@ process_envvars (enum mode *modep)
 	  break;
 
 	case 11:
-#ifndef __ASSUME_AT_EXECFN
 	  /* Path where the binary is found.  */
 	  if (!INTUSE(__libc_enable_secure)
 	      && memcmp (envline, "ORIGIN_PATH", 11) == 0)
 	    GLRO(dl_origin_path) = &envline[12];
-#endif
 	  break;
 
 	case 12:
diff --git a/fedora/branch.mk b/fedora/branch.mk
index 53b415d17d..dd9d94907e 100644
--- a/fedora/branch.mk
+++ b/fedora/branch.mk
@@ -3,5 +3,5 @@ glibc-branch := fedora
 glibc-base := HEAD
 DIST_BRANCH := devel
 COLLECTION := dist-f8
-fedora-sync-date := 2008-08-02 08:09 UTC
-fedora-sync-tag := fedora-glibc-20080802T0809
+fedora-sync-date := 2008-08-28 16:23 UTC
+fedora-sync-tag := fedora-glibc-20080828T1623
diff --git a/fedora/glibc.spec.in b/fedora/glibc.spec.in
index f7aaaa3689..85ed84d256 100644
--- a/fedora/glibc.spec.in
+++ b/fedora/glibc.spec.in
@@ -19,7 +19,7 @@
 Summary: The GNU libc libraries
 Name: glibc
 Version: @glibcversion@
-Release: 11
+Release: 12
 # GPLv2+ is used in a bunch of programs, LGPLv2+ is used for libraries.
 # Things that are linked directly into dynamically linked programs
 # and shared libraries (e.g. crt files, lib*_nonshared.a) have an additional
@@ -976,6 +976,15 @@ rm -f *.filelist*
 %endif
 
 %changelog
+* Fri Aug 29 2008 Jakub Jelinek <jakub@redhat.com> 2.8.90-12
+- update from trunk
+  - revert origin changes (#457849)
+  - use MAP_STACK for thread stacks
+  - misc fixes (BZ#6845, BZ#6544, BZ#6634, BZ#6589, BZ#6790, BZ#6791,
+    BZ#6824)
+  - power7 bits (BZ#6817)
+  - fix expm1 on i?86/x86_64 (#43354, BZ#5794)
+
 * Sat Aug  2 2008 Jakub Jelinek <jakub@redhat.com> 2.8.90-11
 - update from trunk
   - fix non-absolute $ORIGIN handling (#457560)
diff --git a/hurd/hurd/signal.h b/hurd/hurd/signal.h
index 39fa0f857c..21e30c5729 100644
--- a/hurd/hurd/signal.h
+++ b/hurd/hurd/signal.h
@@ -1,5 +1,6 @@
 /* Implementing POSIX.1 signals under the Hurd.
-   Copyright (C) 1993,94,95,96,98,99,2002,2007 Free Software Foundation, Inc.
+   Copyright (C) 1993,94,95,96,98,99,2002,2007,2008
+	Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -342,10 +343,12 @@ extern mach_msg_timeout_t _hurd_interrupted_rpc_timeout;
     do									      \
       {									      \
 	/* Get the message port.  */					      \
-	if (__err = (fetch_msgport_expr))				      \
+	__err = (fetch_msgport_expr);					      \
+	if (__err)							      \
 	  break;							      \
 	/* Get the reference port.  */					      \
-	if (__err = (fetch_refport_expr))				      \
+	__err = (fetch_refport_expr);					      \
+	if (__err)							      \
 	  {								      \
 	    /* Couldn't get it; deallocate MSGPORT and fail.  */	      \
 	    __mach_port_deallocate (__mach_task_self (), msgport);	      \
diff --git a/libio/fmemopen.c b/libio/fmemopen.c
index f3b280092c..b618ce585d 100644
--- a/libio/fmemopen.c
+++ b/libio/fmemopen.c
@@ -1,7 +1,7 @@
 /* Fmemopen implementation.
-   Copyright (C) 2000, 2002, 2005, 2006 Free Software Foundation, Inc.
+   Copyright (C) 2000, 2002, 2005, 2006, 2008 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
-   Contributed by  Hanno Mueller, kontakt@hanno.de, 2000.
+   Contributed by Hanno Mueller, kontakt@hanno.de, 2000.
 
    The GNU C Library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
@@ -82,6 +82,7 @@ struct fmemopen_cookie_struct
 {
   char *buffer;
   int mybuffer;
+  int binmode;
   size_t size;
   _IO_off64_t pos;
   size_t maxpos;
@@ -120,7 +121,7 @@ fmemopen_write (void *cookie, const char *b, size_t s)
 
   c = (fmemopen_cookie_t *) cookie;
 
-  addnullc = s == 0 || b[s - 1] != '\0';
+  addnullc = c->binmode == 0 && (s == 0 || b[s - 1] != '\0');
 
   if (c->pos + s + addnullc > c->size)
     {
@@ -165,7 +166,7 @@ fmemopen_seek (void *cookie, _IO_off64_t *p, int w)
       break;
 
     case SEEK_END:
-      np = c->maxpos - *p;
+      np = (c->binmode ? c->size : c->maxpos) - *p;
       break;
 
     default:
@@ -248,6 +249,8 @@ fmemopen (void *buf, size_t len, const char *mode)
   else
     c->pos = 0;
 
+  c->binmode = mode[0] != '\0' && mode[1] == 'b';
+
   iof.read = fmemopen_read;
   iof.write = fmemopen_write;
   iof.seek = fmemopen_seek;
diff --git a/login/utmp_file.c b/login/utmp_file.c
index c0bd229952..9033f72a4e 100644
--- a/login/utmp_file.c
+++ b/login/utmp_file.c
@@ -22,6 +22,7 @@
 #include <errno.h>
 #include <fcntl.h>
 #include <signal.h>
+#include <stdbool.h>
 #include <stdio.h>
 #include <string.h>
 #include <unistd.h>
@@ -244,12 +245,16 @@ getutent_r_file (struct utmp *buffer, struct utmp **result)
 
 
 static int
-internal_getut_r (const struct utmp *id, struct utmp *buffer)
+internal_getut_r (const struct utmp *id, struct utmp *buffer,
+		  bool *lock_failed)
 {
   int result = -1;
 
   LOCK_FILE (file_fd, F_RDLCK)
-    LOCKING_FAILED ();
+    {
+      *lock_failed = true;
+      LOCKING_FAILED ();
+    }
 
 #if _HAVE_UT_TYPE - 0
   if (id->ut_type == RUN_LVL || id->ut_type == BOOT_TIME
@@ -320,7 +325,10 @@ getutid_r_file (const struct utmp *id, struct utmp *buffer,
       return -1;
     }
 
-  if (internal_getut_r (id, &last_entry) < 0)
+  /* We don't have to distinguish whether we can lock the file or
+     whether there is no entry.  */
+  bool lock_failed = false;
+  if (internal_getut_r (id, &last_entry, &lock_failed) < 0)
     {
       *result = NULL;
       return -1;
@@ -410,7 +418,16 @@ pututline_file (const struct utmp *data)
 	  __utmp_equal (&last_entry, data)))
     found = 1;
   else
-    found = internal_getut_r (data, &buffer);
+    {
+      bool lock_failed = false;
+      found = internal_getut_r (data, &buffer, &lock_failed);
+
+      if (__builtin_expect (lock_failed, false))
+	{
+	  __set_errno (EAGAIN);
+	  return NULL;
+	}
+    }
 
   LOCK_FILE (file_fd, F_WRLCK)
     {
diff --git a/manual/arith.texi b/manual/arith.texi
index 078885e86f..b1580a06f5 100644
--- a/manual/arith.texi
+++ b/manual/arith.texi
@@ -418,7 +418,7 @@ This function returns @code{-1} if @var{x} represents negative infinity,
 This function returns a nonzero value if @var{x} is a ``not a number''
 value, and zero otherwise.
 
-@strong{Note:} The @code{isnan} macro defined by @w{ISO C99} overrides
+@strong{NB:} The @code{isnan} macro defined by @w{ISO C99} overrides
 the BSD function.  This is normally not a problem, because the two
 routines behave identically.  However, if you really need to get the BSD
 function for some reason, you can write
@@ -968,7 +968,7 @@ decide whether traps will occur for each of the exceptions, by setting
 bits in the @dfn{control word}.  In C, traps result in the program
 receiving the @code{SIGFPE} signal; see @ref{Signal Handling}.
 
-@strong{Note:} @w{IEEE 754} says that trap handlers are given details of
+@strong{NB:} @w{IEEE 754} says that trap handlers are given details of
 the exceptional situation, and can set the result value.  C signals do
 not provide any mechanism to pass this information back and forth.
 Trapping exceptions in C is therefore not very useful.
@@ -1721,7 +1721,7 @@ Not all machines provide hardware support for these operations.  On
 machines that don't, the macros can be very slow.  Therefore, you should
 not use these functions when NaN is not a concern.
 
-@strong{Note:} There are no macros @code{isequal} or @code{isunequal}.
+@strong{NB:} There are no macros @code{isequal} or @code{isunequal}.
 They are unnecessary, because the @code{==} and @code{!=} operators do
 @emph{not} throw an exception if one or both of the operands are NaN.
 
diff --git a/manual/creature.texi b/manual/creature.texi
index c825edd5ae..96501568a0 100644
--- a/manual/creature.texi
+++ b/manual/creature.texi
@@ -217,7 +217,7 @@ sequence of definitions:
 
 Note that if you do this, you must link your program with the BSD
 compatibility library by passing the @samp{-lbsd-compat} option to the
-compiler or linker.  @strong{Note:} If you forget to do this, you may
+compiler or linker.  @strong{NB:} If you forget to do this, you may
 get very strange errors at run time.
 @end defvr
 
diff --git a/manual/filesys.texi b/manual/filesys.texi
index 9719d41946..a486f7de34 100644
--- a/manual/filesys.texi
+++ b/manual/filesys.texi
@@ -3239,7 +3239,7 @@ are replaced with six characters which make the whole string a unique
 file name.  Usually the template string is something like
 @samp{/tmp/@var{prefix}XXXXXX}, and each program uses a unique @var{prefix}.
 
-@strong{Note:} Because @code{mktemp} and @code{mkstemp} modify the
+@strong{NB:} Because @code{mktemp} and @code{mkstemp} modify the
 template string, you @emph{must not} pass string constants to them.
 String constants are normally in read-only storage, so your program
 would crash when @code{mktemp} or @code{mkstemp} tried to modify the
diff --git a/manual/math.texi b/manual/math.texi
index 7ebcddee3f..50e087c487 100644
--- a/manual/math.texi
+++ b/manual/math.texi
@@ -1366,7 +1366,7 @@ The prototypes for these functions are in @file{stdlib.h}.
 This function returns the next pseudo-random number in the sequence.
 The value returned ranges from @code{0} to @code{RAND_MAX}.
 
-@strong{Note:} Temporarily this function was defined to return a
+@strong{NB:} Temporarily this function was defined to return a
 @code{int32_t} value to indicate that the return value always contains
 32 bits even if @code{long int} is wider.  The standard demands it
 differently.  Users must always be aware of the 32-bit limitation,
diff --git a/manual/memory.texi b/manual/memory.texi
index cbe147b712..43afc7bf95 100644
--- a/manual/memory.texi
+++ b/manual/memory.texi
@@ -2326,7 +2326,7 @@ additional block on each iteration.  This is impossible with
 variable-sized arrays.
 @end itemize
 
-@strong{Note:} If you mix use of @code{alloca} and variable-sized arrays
+@strong{NB:} If you mix use of @code{alloca} and variable-sized arrays
 within one function, exiting a scope in which a variable-sized array was
 declared frees all blocks allocated with @code{alloca} during the
 execution of that scope.
diff --git a/manual/resource.texi b/manual/resource.texi
index aabd28976a..4a814c9e4a 100644
--- a/manual/resource.texi
+++ b/manual/resource.texi
@@ -591,7 +591,7 @@ ready to execute instructions right now.  When a process blocks to wait
 for something like I/O, its absolute priority is irrelevant.
 
 @cindex runnable process
-@strong{Note:}  The term ``runnable'' is a synonym for ``ready to run.''
+@strong{NB:}  The term ``runnable'' is a synonym for ``ready to run.''
 
 When two processes are running or ready to run and both have the same
 absolute priority, it's more interesting.  In that case, who gets the
@@ -663,7 +663,7 @@ privileged process constantly monitors the process' CPU usage and raises
 its absolute priority when the process isn't getting its entitled share
 and lowers it when the process is exceeding it.
 
-@strong{Note:}  The absolute priority is sometimes called the ``static
+@strong{NB:}  The absolute priority is sometimes called the ``static
 priority.''  We don't use that term in this manual because it misses the
 most important feature of the absolute priority:  its absoluteness.
 
diff --git a/manual/syslog.texi b/manual/syslog.texi
index 02c8e28a7a..e16b5d2475 100644
--- a/manual/syslog.texi
+++ b/manual/syslog.texi
@@ -337,7 +337,7 @@ Locally defined
 
 Results are undefined if the facility code is anything else.
 
-@strong{note:} @code{syslog} recognizes one other facility code: that of
+@strong{NB:} @code{syslog} recognizes one other facility code: that of
 the kernel.  But you can't specify that facility code with these
 functions.  If you try, it looks the same to @code{syslog} as if you are
 requesting the default facility.  But you wouldn't want to anyway,
diff --git a/manual/time.texi b/manual/time.texi
index 64763a2d21..393bccd99f 100644
--- a/manual/time.texi
+++ b/manual/time.texi
@@ -1652,7 +1652,7 @@ The seconds as a decimal number (range @code{0} through @code{60}).
 
 Leading zeroes are permitted but not required.
 
-@strong{Note:} The Unix specification says the upper bound on this value
+@strong{NB:} The Unix specification says the upper bound on this value
 is @code{61}, a result of a decision to allow double leap seconds.  You
 will not see the value @code{61} because no minute has more than one
 leap second, but the myth persists.
diff --git a/nptl/ChangeLog b/nptl/ChangeLog
index 7c251a2b84..15ee177611 100644
--- a/nptl/ChangeLog
+++ b/nptl/ChangeLog
@@ -1,3 +1,12 @@
+2008-08-15  Ulrich Drepper  <drepper@redhat.com>
+
+	* sysdeps/x86_64/pthreaddef.h: Remove ARCH_MAP_FLAGS and
+	ARCH_RETRY_MMAP definitions.
+	* allocatestack.c: Remove definition of ARCH_MAP_FLAGS.
+	Define MAP_STACK when not defined.
+	(allocate_stack): Use MAP_STACK instead of ARCH_MAP_FLAGS.  Remove
+	handling of ARCH_RETRY_MMAP.
+
 2008-07-30  Ulrich Drepper  <drepper@redhat.com>
 
 	* tst-align2.c (f): Print message that f is reached.
diff --git a/nptl/allocatestack.c b/nptl/allocatestack.c
index 66128e455b..9ab4d6281c 100644
--- a/nptl/allocatestack.c
+++ b/nptl/allocatestack.c
@@ -84,10 +84,10 @@
 #endif
 
 
-/* Let the architecture add some flags to the mmap() call used to
-   allocate stacks.  */
-#ifndef ARCH_MAP_FLAGS
-# define ARCH_MAP_FLAGS 0
+/* Newer kernels have the MAP_STACK flag to indicate a mapping is used for
+   a stack.  Use it when possible.  */
+#ifndef MAP_STACK
+# define MAP_STACK 0
 #endif
 
 /* This yields the pointer that TLS support code calls the thread pointer.  */
@@ -454,20 +454,14 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
 #endif
 
 	  mem = mmap (NULL, size, prot,
-		      MAP_PRIVATE | MAP_ANONYMOUS | ARCH_MAP_FLAGS, -1, 0);
+		      MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
 
 	  if (__builtin_expect (mem == MAP_FAILED, 0))
 	    {
-#ifdef ARCH_RETRY_MMAP
-	      mem = ARCH_RETRY_MMAP (size, prot);
-	      if (__builtin_expect (mem == MAP_FAILED, 0))
-#endif
-		{
-		  if (errno == ENOMEM)
-		    errno = EAGAIN;
+	      if (errno == ENOMEM)
+		__set_errno (EAGAIN);
 
-		  return errno;
-		}
+	       return errno;
 	    }
 
 	  /* SIZE is guaranteed to be greater than zero.
diff --git a/nptl/sysdeps/x86_64/pthreaddef.h b/nptl/sysdeps/x86_64/pthreaddef.h
index 0195bc928f..be63a8c74a 100644
--- a/nptl/sysdeps/x86_64/pthreaddef.h
+++ b/nptl/sysdeps/x86_64/pthreaddef.h
@@ -36,16 +36,6 @@
   ({ char *frame; asm ("movq %%rsp, %0" : "=r" (frame)); frame; })
 
 
-/* We prefer to have the stack allocated in the low 4GB since this
-   allows faster context switches.  */
-#define ARCH_MAP_FLAGS MAP_32BIT
-
-/* If it is not possible to allocate memory there retry without that
-   flag.  */
-#define ARCH_RETRY_MMAP(size, prot) \
-  mmap (NULL, size, prot, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)
-
-
 /* XXX Until we have a better place keep the definitions here.  */
 
 /* While there is no such syscall.  */
diff --git a/nscd/connections.c b/nscd/connections.c
index 8281533c3b..3395e54fa1 100644
--- a/nscd/connections.c
+++ b/nscd/connections.c
@@ -1868,8 +1868,11 @@ main_loop_poll (void)
 		  bool to_clear[lastdb] = { false, };
 		  union
 		  {
+# ifndef PATH_MAX
+#  define PATH_MAX 1024
+# endif
 		    struct inotify_event i;
-		    char buf[100];
+		    char buf[sizeof (struct inotify_event) + PATH_MAX];
 		  } inev;
 
 		  while (1)
@@ -1878,17 +1881,20 @@ main_loop_poll (void)
 							     sizeof (inev)));
 		      if (nb < (ssize_t) sizeof (struct inotify_event))
 			{
-			  if (nb == -1)
+			  if (__builtin_expect (nb == -1 && errno != EAGAIN,
+						0))
 			    {
 			      /* Something went wrong when reading the inotify
 				 data.  Better disable inotify.  */
+			      dbg_log (_("\
+disabled inotify after read error %d"),
+				       errno);
 			      conns[1].fd = -1;
 			      firstfree = 1;
 			      if (nused == 2)
 				nused = 1;
 			      close (inotify_fd);
 			      inotify_fd = -1;
-			      dbg_log (_("disabled inotify after read error"));
 			    }
 			  break;
 			}
@@ -2047,7 +2053,7 @@ main_loop_epoll (int efd)
 	    union
 	    {
 	      struct inotify_event i;
-	      char buf[100];
+	      char buf[sizeof (struct inotify_event) + PATH_MAX];
 	    } inev;
 
 	    while (1)
@@ -2056,15 +2062,16 @@ main_loop_epoll (int efd)
 				 		 sizeof (inev)));
 		if (nb < (ssize_t) sizeof (struct inotify_event))
 		  {
-		    if (nb == -1)
+		    if (__builtin_expect (nb == -1 && errno != EAGAIN, 0))
 		      {
 			/* Something went wrong when reading the inotify
 			   data.  Better disable inotify.  */
+			dbg_log (_("disabled inotify after read error %d"),
+				 errno);
 			(void) epoll_ctl (efd, EPOLL_CTL_DEL, inotify_fd,
 					  NULL);
 			close (inotify_fd);
 			inotify_fd = -1;
-			dbg_log (_("disabled inotify after read error"));
 		      }
 		    break;
 		  }
diff --git a/resolv/res_mkquery.c b/resolv/res_mkquery.c
index 416bf1c32b..ae0cdb417e 100644
--- a/resolv/res_mkquery.c
+++ b/resolv/res_mkquery.c
@@ -160,7 +160,10 @@ res_nmkquery(res_state statp,
 		if ((buflen -= QFIXEDSZ) < 0)
 			return (-1);
 	compose:
-		if ((n = dn_comp(dname, cp, buflen, dnptrs, lastdnptr)) < 0)
+		n = ns_name_compress(dname, cp, buflen,
+				     (const u_char **) dnptrs,
+				     (const u_char **) lastdnptr);
+		if (n < 0)
 			return (-1);
 		cp += n;
 		buflen -= n;
@@ -172,7 +175,9 @@ res_nmkquery(res_state statp,
 		/*
 		 * Make an additional record for completion domain.
 		 */
-		n = dn_comp((char *)data, cp, buflen, dnptrs, lastdnptr);
+		n = ns_name_compress((char *)data, cp, buflen,
+				     (const u_char **) dnptrs,
+				     (const u_char **) lastdnptr);
 		if (__builtin_expect (n < 0, 0))
 			return (-1);
 		cp += n;
diff --git a/scripts/gen-as-const.awk b/scripts/gen-as-const.awk
index 23f2f2bc9b..20d18c12af 100644
--- a/scripts/gen-as-const.awk
+++ b/scripts/gen-as-const.awk
@@ -16,12 +16,20 @@ NF >= 1 && !started {
   if (test) {
     print "\n#include <inttypes.h>";
     print "\n#include <stdio.h>";
+    print "\n#if __WORDSIZE__ == 64";
+    print "\ntypedef uint64_t c_t;";
     print "\n#define U(n) UINT64_C (n)";
+    print "\n#define PRI PRId64";
+    print "\n#else";
+    print "\ntypedef uint32_t c_t;";
+    print "\n#define U(n) UINT32_C (n)";
+    print "\n#define PRI PRId32";
+    print "\n#endif";
     print "\nstatic int do_test (void)\n{\n  int bad = 0, good = 0;\n";
     print "#define TEST(name, source, expr) \\\n" \
-      "  if (U (asconst_##name) != (uint64_t) (expr)) { ++bad;" \
-      " fprintf (stderr, \"%s: %s is %\" PRId64 \" but %s is %\"PRId64 \"\\n\"," \
-      " source, #name, U (asconst_##name), #expr, (uint64_t) (expr));" \
+      "  if (U (asconst_##name) != (c_t) (expr)) { ++bad;" \
+      " fprintf (stderr, \"%s: %s is %\" PRI \" but %s is %\"PRI \"\\n\"," \
+      " source, #name, U (asconst_##name), #expr, (c_t) (expr));" \
       " } else ++good;\n";
   }
   else
diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h
index 27c8fb620d..4d857404a3 100644
--- a/sysdeps/generic/ldsodefs.h
+++ b/sysdeps/generic/ldsodefs.h
@@ -593,13 +593,8 @@ struct rtld_global_ro
   /* Names of shared object for which the RPATH should be ignored.  */
   EXTERN const char *_dl_inhibit_rpath;
 
-#ifndef __ASSUME_AT_EXECFN
   /* Location of the binary.  */
   EXTERN const char *_dl_origin_path;
-#endif
-
-  /* If set, points to the file name of the executable.  */
-  EXTERN const char *_dl_execfn;
 
   /* -1 if the dynamic linker should honor library load bias,
      0 if not, -2 use the default (honor biases for normal
diff --git a/sysdeps/i386/fpu/s_expm1l.S b/sysdeps/i386/fpu/s_expm1l.S
index 2dc379b790..b69b22bc62 100644
--- a/sysdeps/i386/fpu/s_expm1l.S
+++ b/sysdeps/i386/fpu/s_expm1l.S
@@ -1,5 +1,5 @@
 /* ix87 specific implementation of exp(x)-1.
-   Copyright (C) 1996, 1997, 2002, 2005 Free Software Foundation, Inc.
+   Copyright (C) 1996, 1997, 2002, 2005, 2008 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996.
    Based on code by John C. Bowman <bowman@ipp-garching.mpg.de>.
@@ -48,6 +48,11 @@ l2e:	.tfloat 1.442695040888963407359924681002
 
 	.text
 ENTRY(__expm1l)
+	movzwl	4+8(%esp), %eax	// load sign bit and 15-bit exponent
+	xorb	$0x80, %ah	// invert sign bit (now 1 is "positive")
+	cmpl	$0xc006, %eax	// is num positive and exp >= 6 (number is >= 128.0)?
+	jae     __ieee754_expl	// (if num is denormal, it is at least >= 64.0)
+
 	fldt	4(%esp)		// x
 	fxam			// Is NaN or +-Inf?
 	fstsw	%ax
diff --git a/sysdeps/mach/i386/machine-lock.h b/sysdeps/mach/i386/machine-lock.h
index d786628170..33602f43a7 100644
--- a/sysdeps/mach/i386/machine-lock.h
+++ b/sysdeps/mach/i386/machine-lock.h
@@ -1,5 +1,5 @@
 /* Machine-specific definition for spin locks.  i386 version.
-   Copyright (C) 1994, 1997, 2007 Free Software Foundation, Inc.
+   Copyright (C) 1994,1997,2007,2008 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -40,7 +40,8 @@ __spin_unlock (__spin_lock_t *__lock)
 {
    register int __unlocked;
    __asm__ __volatile ("xchgl %0, %1"
-		       : "=&r" (__unlocked), "=m" (*__lock) : "0" (0));
+		       : "=&r" (__unlocked), "=m" (*__lock) : "0" (0)
+		       : "memory");
 }
 
 /* Try to lock LOCK; return nonzero if we locked it, zero if another has.  */
@@ -50,7 +51,8 @@ __spin_try_lock (__spin_lock_t *__lock)
 {
   register int __locked;
   __asm__ __volatile ("xchgl %0, %1"
-		      : "=&r" (__locked), "=m" (*__lock) : "0" (1));
+		      : "=&r" (__locked), "=m" (*__lock) : "0" (1)
+		      : "memory");
   return !__locked;
 }
 
diff --git a/sysdeps/posix/clock_getres.c b/sysdeps/posix/clock_getres.c
index f4dc21f8af..7d7cd05ff9 100644
--- a/sysdeps/posix/clock_getres.c
+++ b/sysdeps/posix/clock_getres.c
@@ -1,5 +1,5 @@
 /* clock_getres -- Get the resolution of a POSIX clockid_t.
-   Copyright (C) 1999, 2000, 2001, 2003, 2004 Free Software Foundation, Inc.
+   Copyright (C) 1999,2000,2001,2003,2004,2008 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -28,7 +28,7 @@
 #if HP_TIMING_AVAIL
 static long int nsec;		/* Clock frequency of the processor.  */
 
-static inline int
+static int
 hp_timing_getres (struct timespec *res)
 {
   if (__builtin_expect (nsec == 0, 0))
diff --git a/sysdeps/powerpc/dl-procinfo.c b/sysdeps/powerpc/dl-procinfo.c
index a732e94fa8..1c74c2a905 100644
--- a/sysdeps/powerpc/dl-procinfo.c
+++ b/sysdeps/powerpc/dl-procinfo.c
@@ -46,11 +46,12 @@
 #if !defined PROCINFO_DECL && defined SHARED
   ._dl_powerpc_cap_flags
 #else
-PROCINFO_CLASS const char _dl_powerpc_cap_flags[23][10]
+PROCINFO_CLASS const char _dl_powerpc_cap_flags[25][10]
 #endif
 #ifndef PROCINFO_DECL
 = {
-    "power6x", "dfp", "pa6t",
+    "vsx", 
+    "arch_2_06", "power6x", "dfp", "pa6t",
     "arch_2_05", "ic_snoop", "smt", "booke",
     "cellbe", "power5+", "power5", "power4",
     "notb", "efpdouble", "efpsingle", "spe",
@@ -67,7 +68,7 @@ PROCINFO_CLASS const char _dl_powerpc_cap_flags[23][10]
 #if !defined PROCINFO_DECL && defined SHARED
   ._dl_powerpc_platforms
 #else
-PROCINFO_CLASS const char _dl_powerpc_platforms[7][12]
+PROCINFO_CLASS const char _dl_powerpc_platforms[8][12]
 #endif
 #ifndef PROCINFO_DECL
 = {
@@ -77,7 +78,8 @@ PROCINFO_CLASS const char _dl_powerpc_platforms[7][12]
     [PPC_PLATFORM_POWER5_PLUS] = "power5+",
     [PPC_PLATFORM_POWER6] = "power6",
     [PPC_PLATFORM_CELL_BE] = "ppc-cell-be",
-    [PPC_PLATFORM_POWER6X] = "power6x"
+    [PPC_PLATFORM_POWER6X] = "power6x",
+    [PPC_PLATFORM_POWER7] = "power7"
   }
 #endif
 #if !defined SHARED || defined PROCINFO_DECL
diff --git a/sysdeps/powerpc/dl-procinfo.h b/sysdeps/powerpc/dl-procinfo.h
index 0bf935385a..254195a94c 100644
--- a/sysdeps/powerpc/dl-procinfo.h
+++ b/sysdeps/powerpc/dl-procinfo.h
@@ -23,15 +23,15 @@
 #include <ldsodefs.h>
 #include <sysdep.h>		/* This defines the PPC_FEATURE_* macros.  */
 
-/* There are 20 bits used, but they are bits 12..31.  */
-#define _DL_HWCAP_FIRST		9
+/* There are 25 bits used, but they are bits 7..31.  */
+#define _DL_HWCAP_FIRST		7
 #define _DL_HWCAP_COUNT		32
 
 /* These bits influence library search.  */
 #define HWCAP_IMPORTANT		(PPC_FEATURE_HAS_ALTIVEC \
 				+ PPC_FEATURE_HAS_DFP)
 
-#define _DL_PLATFORMS_COUNT	7
+#define _DL_PLATFORMS_COUNT	8
 
 #define _DL_FIRST_PLATFORM      32
 /* Mask to filter out platforms.  */
@@ -46,6 +46,7 @@
 #define PPC_PLATFORM_POWER6		4
 #define PPC_PLATFORM_CELL_BE		5
 #define PPC_PLATFORM_POWER6X		6
+#define PPC_PLATFORM_POWER7		7
 
 static inline const char *
 __attribute__ ((unused))
@@ -103,6 +104,9 @@ _dl_string_platform (const char *str)
 	      ++str;
 	    }
 	  break;
+	case '7':
+	  ret = _DL_FIRST_PLATFORM + PPC_PLATFORM_POWER7;
+	  break;
 	default:
 	  return -1;
 	}
diff --git a/sysdeps/powerpc/fpu/bits/mathinline.h b/sysdeps/powerpc/fpu/bits/mathinline.h
index 4d4612dac0..4fa4bcaf16 100644
--- a/sysdeps/powerpc/fpu/bits/mathinline.h
+++ b/sysdeps/powerpc/fpu/bits/mathinline.h
@@ -1,5 +1,5 @@
 /* Inline math functions for powerpc.
-   Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2004, 2006, 2007
+   Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2004, 2006, 2007, 2008
    Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
@@ -72,6 +72,14 @@ __NTH (__signbit (double __x))
   __extension__ union { double __d; int __i[2]; } __u = { __d: __x };
   return __u.__i[0] < 0;
 }
+#  ifdef __LONG_DOUBLE_128__
+__MATH_INLINE int
+__NTH (__signbitl (long double __x))
+{
+  __extension__ union { long double __d; int __i[4]; } __u = { __d: __x };
+  return __u.__i[0] < 0;
+}
+#  endif
 # endif
 #endif /* __USE_ISOC99 */
 
diff --git a/sysdeps/powerpc/powerpc32/dl-machine.c b/sysdeps/powerpc/powerpc32/dl-machine.c
index fc2ce7c1d9..731d23956d 100644
--- a/sysdeps/powerpc/powerpc32/dl-machine.c
+++ b/sysdeps/powerpc/powerpc32/dl-machine.c
@@ -557,11 +557,6 @@ __process_machine_rela (struct link_map *map,
       }
       break;
 
-#define CHECK_STATIC_TLS(map, sym_map)					      \
-    do {								      \
-      if (__builtin_expect ((sym_map)->l_tls_offset == NO_TLS_OFFSET, 0))     \
-	_dl_allocate_static_tls (sym_map);				      \
-    } while (0)
 #define DO_TLS_RELOC(suffix)						      \
     case R_PPC_DTPREL##suffix:						      \
       /* During relocation all TLS symbols are defined and used.	      \
diff --git a/sysdeps/powerpc/sysdep.h b/sysdeps/powerpc/sysdep.h
index 2ae52b78c0..43edeb71eb 100644
--- a/sysdeps/powerpc/sysdep.h
+++ b/sysdeps/powerpc/sysdep.h
@@ -44,6 +44,8 @@
 #define PPC_FEATURE_PA6T		0x00000800 /* PA Semi 6T Core */
 #define PPC_FEATURE_HAS_DFP		0x00000400 /* Decimal FP Unit */
 #define PPC_FEATURE_POWER6_EXT		0x00000200 /* P6 + mffgpr/mftgpr */
+#define PPC_FEATURE_HAS_VSX		0x00000100 /* P7 Vector Extension.  */
+#define PPC_FEATURE_ARCH_2_06	        0x00000080 /* ISA 2.06 */
 #define PPC_FEATURE_970 (PPC_FEATURE_POWER4 + PPC_FEATURE_HAS_ALTIVEC)
 
 #ifdef __ASSEMBLER__
diff --git a/sysdeps/unix/sysv/linux/bits/in.h b/sysdeps/unix/sysv/linux/bits/in.h
index 433c033c70..b457a1790f 100644
--- a/sysdeps/unix/sysv/linux/bits/in.h
+++ b/sysdeps/unix/sysv/linux/bits/in.h
@@ -70,6 +70,7 @@
 #define IP_PMTUDISC_DONT   0	/* Never send DF frames.  */
 #define IP_PMTUDISC_WANT   1	/* Use per route hints.  */
 #define IP_PMTUDISC_DO     2	/* Always DF.  */
+#define IP_PMTUDISC_PROBE  3	/* Ignore dst pmtu.  */
 
 /* To select the IP level.  */
 #define SOL_IP	0
@@ -162,6 +163,7 @@ struct in_pktinfo
 #define IPV6_PMTUDISC_DONT	0	/* Never send DF frames.  */
 #define IPV6_PMTUDISC_WANT	1	/* Use per route hints.  */
 #define IPV6_PMTUDISC_DO	2	/* Always DF.  */
+#define IPV6_PMTUDISC_PROBE	3	/* Ignore dst pmtu.  */
 
 /* Socket level values for IPv6.  */
 #define SOL_IPV6        41
diff --git a/sysdeps/unix/sysv/linux/clock_getres.c b/sysdeps/unix/sysv/linux/clock_getres.c
index 813d3ebbf9..581ff22c7c 100644
--- a/sysdeps/unix/sysv/linux/clock_getres.c
+++ b/sysdeps/unix/sysv/linux/clock_getres.c
@@ -1,5 +1,5 @@
 /* clock_getres -- Get the resolution of a POSIX clockid_t.  Linux version.
-   Copyright (C) 2003,2004,2005,2006 Free Software Foundation, Inc.
+   Copyright (C) 2003,2004,2005,2006, 2008 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -121,7 +121,7 @@ maybe_syscall_getres_cpu (clockid_t clock_id, struct timespec *res)
 	return 0;
 
       e = INTERNAL_SYSCALL_ERRNO (r, err);
-# ifndef __ASSUME_POSIX_TIMERS
+#  ifndef __ASSUME_POSIX_TIMERS
       if (e == ENOSYS)
 	{
 	  __libc_missing_posix_timers = 1;
@@ -129,7 +129,7 @@ maybe_syscall_getres_cpu (clockid_t clock_id, struct timespec *res)
 	  e = EINVAL;
 	}
       else
-# endif
+#  endif
 	{
 	  if (e == EINVAL)
 	    {
diff --git a/sysdeps/unix/sysv/linux/dl-origin.c b/sysdeps/unix/sysv/linux/dl-origin.c
index 64e865b92a..a7fa53ea1d 100644
--- a/sysdeps/unix/sysv/linux/dl-origin.c
+++ b/sysdeps/unix/sysv/linux/dl-origin.c
@@ -1,5 +1,5 @@
 /* Find path of executable.
-   Copyright (C) 1998-2000, 2002, 2004, 2008 Free Software Foundation, Inc.
+   Copyright (C) 1998, 1999, 2000, 2002, 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
 
@@ -36,28 +36,29 @@ const char *
 _dl_get_origin (void)
 {
   char linkval[PATH_MAX];
-  const char *str;
-  char *result = (char *) -1l;
+  char *result;
   int len;
+  INTERNAL_SYSCALL_DECL (err);
 
-  str = GLRO(dl_execfn);
-  if (str == NULL || str[0] != '/')
+  len = INTERNAL_SYSCALL (readlink, err, 3, "/proc/self/exe", linkval,
+			  sizeof (linkval));
+  if (! INTERNAL_SYSCALL_ERROR_P (len, err) && len > 0 && linkval[0] != '[')
     {
-      INTERNAL_SYSCALL_DECL (err);
-
-      len = INTERNAL_SYSCALL (readlink, err, 3, "/proc/self/exe", linkval,
-			      sizeof (linkval));
-      if (! INTERNAL_SYSCALL_ERROR_P (len, err)
-	  && len > 0 && linkval[0] != '[')
-	str = linkval;
+      /* We can use this value.  */
+      assert (linkval[0] == '/');
+      while (len > 1 && linkval[len - 1] != '/')
+	--len;
+      result = (char *) malloc (len + 1);
+      if (result == NULL)
+	result = (char *) -1;
+      else if (len == 1)
+	memcpy (result, "/", 2);
       else
-	str = NULL;
+	*((char *) __mempcpy (result, linkval, len - 1)) = '\0';
     }
   else
-    len = strlen (str);
-
-  if (str == NULL)
     {
+      result = (char *) -1;
       /* We use the environment variable LD_ORIGIN_PATH.  If it is set make
 	 a copy and strip out trailing slashes.  */
       if (GLRO(dl_origin_path) != NULL)
@@ -75,20 +76,6 @@ _dl_get_origin (void)
 	    }
 	}
     }
-  else
-    {
-      /* We can use this value.  */
-      assert (str[0] == '/');
-      while (len > 1 && str[len - 1] != '/')
-	--len;
-      result = (char *) malloc (len + 1);
-      if (result == NULL)
-	result = (char *) -1;
-      else if (len == 1)
-	memcpy (result, "/", 2);
-      else
-	*((char *) __mempcpy (result, str, len - 1)) = '\0';
-    }
 
   return result;
 }
diff --git a/sysdeps/unix/sysv/linux/i386/bits/mman.h b/sysdeps/unix/sysv/linux/i386/bits/mman.h
index 00cb982395..2b90c8d7d5 100644
--- a/sysdeps/unix/sysv/linux/i386/bits/mman.h
+++ b/sysdeps/unix/sysv/linux/i386/bits/mman.h
@@ -63,6 +63,7 @@
 # define MAP_NORESERVE	0x04000		/* Don't check for reservations.  */
 # define MAP_POPULATE	0x08000		/* Populate (prefault) pagetables.  */
 # define MAP_NONBLOCK	0x10000		/* Do not block on IO.  */
+# define MAP_STACK	0x20000		/* Allocation is for a stack.  */
 #endif
 
 /* Flags to `msync'.  */
diff --git a/sysdeps/unix/sysv/linux/ifaddrs.c b/sysdeps/unix/sysv/linux/ifaddrs.c
index 9aa9abac3b..150ec8a9b0 100644
--- a/sysdeps/unix/sysv/linux/ifaddrs.c
+++ b/sysdeps/unix/sysv/linux/ifaddrs.c
@@ -1,5 +1,5 @@
 /* getifaddrs -- get names and addresses of all network interfaces
-   Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
+   Copyright (C) 2003-2007, 2008 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -49,6 +49,22 @@ int __no_netlink_support attribute_hidden;
 #endif
 
 
+/* There is a problem with this type.  The address length for
+   Infiniband sockets is much longer than the 8 bytes allocated in the
+   sockaddr_ll definition.  Hence we use here a special
+   definition.  */
+struct sockaddr_ll_max
+  {
+    unsigned short int sll_family;
+    unsigned short int sll_protocol;
+    int sll_ifindex;
+    unsigned short int sll_hatype;
+    unsigned char sll_pkttype;
+    unsigned char sll_halen;
+    unsigned char sll_addr[24];
+  };
+
+
 /* struct to hold the data for one ifaddrs entry, so we can allocate
    everything at once.  */
 struct ifaddrs_storage
@@ -59,7 +75,7 @@ struct ifaddrs_storage
     /* Save space for the biggest of the four used sockaddr types and
        avoid a lot of casts.  */
     struct sockaddr sa;
-    struct sockaddr_ll sl;
+    struct sockaddr_ll_max sl;
     struct sockaddr_in s4;
     struct sockaddr_in6 s6;
   } addr, netmask, broadaddr;
@@ -307,7 +323,7 @@ map_newlink (int index, struct ifaddrs_storage *ifas, int *map, int max)
 
 /* Create a linked list of `struct ifaddrs' structures, one for each
    network interface on the host machine.  If successful, store the
-   list in *IFAP and return 0.  On errors, return -1 and set `errno'.  */
+   list in *IFAP and 2004, 2005, 2006, return 0.  On errors, return -1 and set `errno'.  */
 int
 getifaddrs (struct ifaddrs **ifap)
 {
diff --git a/sysdeps/unix/sysv/linux/powerpc/bits/mathinline.h b/sysdeps/unix/sysv/linux/powerpc/bits/mathinline.h
deleted file mode 100644
index 4f173912ed..0000000000
--- a/sysdeps/unix/sysv/linux/powerpc/bits/mathinline.h
+++ /dev/null
@@ -1,132 +0,0 @@
-/* Inline math functions for powerpc.
-   Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2004, 2006, 2007, 2008
-   Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#ifndef _MATH_H
-# error "Never use <bits/mathinline.h> directly; include <math.h> instead."
-#endif
-
-#ifndef __extern_inline
-# define __MATH_INLINE __inline
-#else
-# define __MATH_INLINE __extern_inline
-#endif  /* __cplusplus */
-
-#if defined __GNUC__ && !defined _SOFT_FLOAT
-
-#ifdef __USE_ISOC99
-# if !__GNUC_PREREQ (2,97)
-#  define __unordered_cmp(x, y) \
-  (__extension__							      \
-   ({ __typeof__(x) __x = (x); __typeof__(y) __y = (y);			      \
-      unsigned __r;							      \
-      __asm__("fcmpu 7,%1,%2 ; mfcr %0" : "=r" (__r) : "f" (__x), "f"(__y)    \
-              : "cr7");  \
-      __r; }))
-
-#  undef isgreater
-#  undef isgreaterequal
-#  undef isless
-#  undef islessequal
-#  undef islessgreater
-#  undef isunordered
-
-#  define isgreater(x, y) (__unordered_cmp (x, y) >> 2 & 1)
-#  define isgreaterequal(x, y) ((__unordered_cmp (x, y) & 6) != 0)
-#  define isless(x, y) (__unordered_cmp (x, y) >> 3 & 1)
-#  define islessequal(x, y) ((__unordered_cmp (x, y) & 0xA) != 0)
-#  define islessgreater(x, y) ((__unordered_cmp (x, y) & 0xC) != 0)
-#  define isunordered(x, y) (__unordered_cmp (x, y) & 1)
-
-# endif /* __GNUC_PREREQ (2,97) */
-
-/* The gcc, version 2.7 or below, has problems with all this inlining
-   code.  So disable it for this version of the compiler.  */
-# if __GNUC_PREREQ (2, 8)
-/* Test for negative number.  Used in the signbit() macro.  */
-__MATH_INLINE int
-__NTH (__signbitf (float __x))
-{
-  __extension__ union { float __f; int __i; } __u = { __f: __x };
-  return __u.__i < 0;
-}
-__MATH_INLINE int
-__NTH (__signbit (double __x))
-{
-  __extension__ union { double __d; int __i[2]; } __u = { __d: __x };
-  return __u.__i[0] < 0;
-}
-#  ifdef __LONGDOUBLE128
-__MATH_INLINE int
-__NTH (__signbitl (long double __x))
-{
-  __extension__ union { long double __d; int __i[4]; } __u = { __d: __x };
-  return __u.__i[0] < 0;
-}
-#  endif
-# endif
-#endif /* __USE_ISOC99 */
-
-#if !defined __NO_MATH_INLINES && defined __OPTIMIZE__
-
-#ifdef __USE_ISOC99
-
-# ifndef __powerpc64__
-__MATH_INLINE long int lrint (double __x) __THROW;
-__MATH_INLINE long int
-__NTH (lrint (double __x))
-{
-  union {
-    double __d;
-    int __ll[2];
-  } __u;
-  __asm__ ("fctiw %0,%1" : "=f"(__u.__d) : "f"(__x));
-  return __u.__ll[1];
-}
-
-__MATH_INLINE long int lrintf (float __x) __THROW;
-__MATH_INLINE long int
-__NTH (lrintf (float __x))
-{
-  union {
-    double __d;
-    int __ll[2];
-  } __u;
-  __asm__ ("fctiw %0,%1" : "=f"(__u.__d) : "f"(__x));
-  return __u.__ll[1];
-}
-# endif
-
-__MATH_INLINE double fdim (double __x, double __y) __THROW;
-__MATH_INLINE double
-__NTH (fdim (double __x, double __y))
-{
-  return __x <= __y ? 0 : __x - __y;
-}
-
-__MATH_INLINE float fdimf (float __x, float __y) __THROW;
-__MATH_INLINE float
-__NTH (fdimf (float __x, float __y))
-{
-  return __x <= __y ? 0 : __x - __y;
-}
-
-#endif /* __USE_ISOC99 */
-#endif /* !__NO_MATH_INLINES && __OPTIMIZE__ */
-#endif /* __GNUC__ && !_SOFT_FLOAT */
diff --git a/sysdeps/unix/sysv/linux/powerpc/bits/mman.h b/sysdeps/unix/sysv/linux/powerpc/bits/mman.h
index e03ab7ff80..d5729a1f1c 100644
--- a/sysdeps/unix/sysv/linux/powerpc/bits/mman.h
+++ b/sysdeps/unix/sysv/linux/powerpc/bits/mman.h
@@ -1,5 +1,5 @@
 /* Definitions for POSIX memory map interface.  Linux/PowerPC version.
-   Copyright (C) 1997, 2000, 2003, 2005, 2006 Free Software Foundation, Inc.
+   Copyright (C) 1997,2000,2003,2005,2006,2008 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -34,6 +34,7 @@
 #define PROT_WRITE	0x2		/* Page can be written.  */
 #define PROT_EXEC	0x4		/* Page can be executed.  */
 #define PROT_NONE	0x0		/* Page can not be accessed.  */
+#define PROT_SAO	0x10		/* Strong Access Ordering.  */
 #define PROT_GROWSDOWN	0x01000000	/* Extend change to start of
 					   growsdown vma (mprotect only).  */
 #define PROT_GROWSUP	0x02000000	/* Extend change to start of
diff --git a/sysdeps/unix/sysv/linux/x86_64/bits/mman.h b/sysdeps/unix/sysv/linux/x86_64/bits/mman.h
index 535c9edcf9..7810682536 100644
--- a/sysdeps/unix/sysv/linux/x86_64/bits/mman.h
+++ b/sysdeps/unix/sysv/linux/x86_64/bits/mman.h
@@ -64,6 +64,7 @@
 # define MAP_NORESERVE	0x04000		/* Don't check for reservations.  */
 # define MAP_POPULATE	0x08000		/* Populate (prefault) pagetables.  */
 # define MAP_NONBLOCK	0x10000		/* Do not block on IO.  */
+# define MAP_STACK	0x20000		/* Allocation is for a stack.  */
 #endif
 
 /* Flags to `msync'.  */
diff --git a/sysdeps/x86_64/fpu/s_expm1l.S b/sysdeps/x86_64/fpu/s_expm1l.S
index b4f5a3efda..05a1bfcce1 100644
--- a/sysdeps/x86_64/fpu/s_expm1l.S
+++ b/sysdeps/x86_64/fpu/s_expm1l.S
@@ -1,5 +1,5 @@
 /* ix87 specific implementation of exp(x)-1.
-   Copyright (C) 1996, 1997, 2001, 2002 Free Software Foundation, Inc.
+   Copyright (C) 1996, 1997, 2001, 2002, 2008 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996.
    Based on code by John C. Bowman <bowman@ipp-garching.mpg.de>.
@@ -48,6 +48,11 @@ l2e:	.tfloat 1.442695040888963407359924681002
 
 	.text
 ENTRY(__expm1l)
+	movzwl	8+8(%rsp), %eax	// load sign bit and 15-bit exponent
+	xorb	$0x80, %ah	// invert sign bit (now 1 is "positive")
+	cmpl	$0xc006, %eax	// is num positive and exp >= 6 (number is >= 128.0)?
+	jae     __ieee754_expl	// (if num is denormal, it is at least >= 64.0)
+
 	fldt	8(%rsp)		// x
 	fxam			// Is NaN or +-Inf?
 	fstsw	%ax
diff --git a/sysdeps/x86_64/memset.S b/sysdeps/x86_64/memset.S
index c7bf2318de..681ab870e0 100644
--- a/sysdeps/x86_64/memset.S
+++ b/sysdeps/x86_64/memset.S
@@ -55,12 +55,14 @@ L(now_dw_aligned):
 	cmp    $0x90,%r8
 	jg     L(ck_mem_ops_method)
 L(now_dw_aligned_small):
-	lea    L(setPxQx)(%rip),%r11
 	add    %r8,%rdi
 #ifndef PIC
+	lea    L(setPxQx)(%rip),%r11
 	jmpq   *(%r11,%r8,8)
 #else
-	movslq (%r11,%r8,4),%rcx
+	lea    L(Got0)(%rip),%r11
+	lea    L(setPxQx)(%rip),%rcx
+	movswq (%rcx,%r8,2),%rcx
 	lea    (%rcx,%r11,1),%r11
 	jmpq   *%r11
 #endif
@@ -115,177 +117,177 @@ L(setPxQx):
 # endif
 #else
 L(setPxQx):
-	.int       L(Got0)-L(setPxQx)
-	.int       L(P1Q0)-L(setPxQx)
-	.int       L(P2Q0)-L(setPxQx)
-	.int       L(P3Q0)-L(setPxQx)
-	.int       L(P4Q0)-L(setPxQx)
-	.int       L(P5Q0)-L(setPxQx)
-	.int       L(P6Q0)-L(setPxQx)
-	.int       L(P7Q0)-L(setPxQx)
-
-	.int       L(P0Q1)-L(setPxQx)
-	.int       L(P1Q1)-L(setPxQx)
-	.int       L(P2Q1)-L(setPxQx)
-	.int       L(P3Q1)-L(setPxQx)
-	.int       L(P4Q1)-L(setPxQx)
-	.int       L(P5Q1)-L(setPxQx)
-	.int       L(P6Q1)-L(setPxQx)
-	.int       L(P7Q1)-L(setPxQx)
-
-	.int       L(P0Q2)-L(setPxQx)
-	.int       L(P1Q2)-L(setPxQx)
-	.int       L(P2Q2)-L(setPxQx)
-	.int       L(P3Q2)-L(setPxQx)
-	.int       L(P4Q2)-L(setPxQx)
-	.int       L(P5Q2)-L(setPxQx)
-	.int       L(P6Q2)-L(setPxQx)
-	.int       L(P7Q2)-L(setPxQx)
-
-	.int       L(P0Q3)-L(setPxQx)
-	.int       L(P1Q3)-L(setPxQx)
-	.int       L(P2Q3)-L(setPxQx)
-	.int       L(P3Q3)-L(setPxQx)
-	.int       L(P4Q3)-L(setPxQx)
-	.int       L(P5Q3)-L(setPxQx)
-	.int       L(P6Q3)-L(setPxQx)
-	.int       L(P7Q3)-L(setPxQx)
-
-	.int       L(P0Q4)-L(setPxQx)
-	.int       L(P1Q4)-L(setPxQx)
-	.int       L(P2Q4)-L(setPxQx)
-	.int       L(P3Q4)-L(setPxQx)
-	.int       L(P4Q4)-L(setPxQx)
-	.int       L(P5Q4)-L(setPxQx)
-	.int       L(P6Q4)-L(setPxQx)
-	.int       L(P7Q4)-L(setPxQx)
-
-	.int       L(P0Q5)-L(setPxQx)
-	.int       L(P1Q5)-L(setPxQx)
-	.int       L(P2Q5)-L(setPxQx)
-	.int       L(P3Q5)-L(setPxQx)
-	.int       L(P4Q5)-L(setPxQx)
-	.int       L(P5Q5)-L(setPxQx)
-	.int       L(P6Q5)-L(setPxQx)
-	.int       L(P7Q5)-L(setPxQx)
-
-	.int       L(P0Q6)-L(setPxQx)
-	.int       L(P1Q6)-L(setPxQx)
-	.int       L(P2Q6)-L(setPxQx)
-	.int       L(P3Q6)-L(setPxQx)
-	.int       L(P4Q6)-L(setPxQx)
-	.int       L(P5Q6)-L(setPxQx)
-	.int       L(P6Q6)-L(setPxQx)
-	.int       L(P7Q6)-L(setPxQx)
-
-	.int       L(P0Q7)-L(setPxQx)
-	.int       L(P1Q7)-L(setPxQx)
-	.int       L(P2Q7)-L(setPxQx)
-	.int       L(P3Q7)-L(setPxQx)
-	.int       L(P4Q7)-L(setPxQx)
-	.int       L(P5Q7)-L(setPxQx)
-	.int       L(P6Q7)-L(setPxQx)
-	.int       L(P7Q7)-L(setPxQx)
-
-	.int       L(P0Q8)-L(setPxQx)
-	.int       L(P1Q8)-L(setPxQx)
-	.int       L(P2Q8)-L(setPxQx)
-	.int       L(P3Q8)-L(setPxQx)
-	.int       L(P4Q8)-L(setPxQx)
-	.int       L(P5Q8)-L(setPxQx)
-	.int       L(P6Q8)-L(setPxQx)
-	.int       L(P7Q8)-L(setPxQx)
-
-	.int       L(P0Q9)-L(setPxQx)
-	.int       L(P1Q9)-L(setPxQx)
-	.int       L(P2Q9)-L(setPxQx)
-	.int       L(P3Q9)-L(setPxQx)
-	.int       L(P4Q9)-L(setPxQx)
-	.int       L(P5Q9)-L(setPxQx)
-	.int       L(P6Q9)-L(setPxQx)
-	.int       L(P7Q9)-L(setPxQx)
-
-	.int       L(P0QA)-L(setPxQx)
-	.int       L(P1QA)-L(setPxQx)
-	.int       L(P2QA)-L(setPxQx)
-	.int       L(P3QA)-L(setPxQx)
-	.int       L(P4QA)-L(setPxQx)
-	.int       L(P5QA)-L(setPxQx)
-	.int       L(P6QA)-L(setPxQx)
-	.int       L(P7QA)-L(setPxQx)
-
-	.int       L(P0QB)-L(setPxQx)
-	.int       L(P1QB)-L(setPxQx)
-	.int       L(P2QB)-L(setPxQx)
-	.int       L(P3QB)-L(setPxQx)
-	.int       L(P4QB)-L(setPxQx)
-	.int       L(P5QB)-L(setPxQx)
-	.int       L(P6QB)-L(setPxQx)
-	.int       L(P7QB)-L(setPxQx)
-
-	.int       L(P0QC)-L(setPxQx)
-	.int       L(P1QC)-L(setPxQx)
-	.int       L(P2QC)-L(setPxQx)
-	.int       L(P3QC)-L(setPxQx)
-	.int       L(P4QC)-L(setPxQx)
-	.int       L(P5QC)-L(setPxQx)
-	.int       L(P6QC)-L(setPxQx)
-	.int       L(P7QC)-L(setPxQx)
-
-	.int       L(P0QD)-L(setPxQx)
-	.int       L(P1QD)-L(setPxQx)
-	.int       L(P2QD)-L(setPxQx)
-	.int       L(P3QD)-L(setPxQx)
-	.int       L(P4QD)-L(setPxQx)
-	.int       L(P5QD)-L(setPxQx)
-	.int       L(P6QD)-L(setPxQx)
-	.int       L(P7QD)-L(setPxQx)
-
-	.int       L(P0QE)-L(setPxQx)
-	.int       L(P1QE)-L(setPxQx)
-	.int       L(P2QE)-L(setPxQx)
-	.int       L(P3QE)-L(setPxQx)
-	.int       L(P4QE)-L(setPxQx)
-	.int       L(P5QE)-L(setPxQx)
-	.int       L(P6QE)-L(setPxQx)
-	.int       L(P7QE)-L(setPxQx)
-
-	.int       L(P0QF)-L(setPxQx)
-	.int       L(P1QF)-L(setPxQx)
-	.int       L(P2QF)-L(setPxQx)
-	.int       L(P3QF)-L(setPxQx)
-	.int       L(P4QF)-L(setPxQx)
-	.int       L(P5QF)-L(setPxQx)
-	.int       L(P6QF)-L(setPxQx)
-	.int       L(P7QF)-L(setPxQx)
-
-	.int       L(P0QG)-L(setPxQx)
-	.int       L(P1QG)-L(setPxQx)
-	.int       L(P2QG)-L(setPxQx)
-	.int       L(P3QG)-L(setPxQx)
-	.int       L(P4QG)-L(setPxQx)
-	.int       L(P5QG)-L(setPxQx)
-	.int       L(P6QG)-L(setPxQx)
-	.int       L(P7QG)-L(setPxQx)
-
-	.int       L(P0QH)-L(setPxQx)
-	.int       L(P1QH)-L(setPxQx)
-	.int       L(P2QH)-L(setPxQx)
-	.int       L(P3QH)-L(setPxQx)
-	.int       L(P4QH)-L(setPxQx)
-	.int       L(P5QH)-L(setPxQx)
-	.int       L(P6QH)-L(setPxQx)
-	.int       L(P7QH)-L(setPxQx)
-
-	.int       L(P0QI)-L(setPxQx)
+	.short     L(Got0)-L(Got0)
+	.short     L(P1Q0)-L(Got0)
+	.short     L(P2Q0)-L(Got0)
+	.short     L(P3Q0)-L(Got0)
+	.short     L(P4Q0)-L(Got0)
+	.short     L(P5Q0)-L(Got0)
+	.short     L(P6Q0)-L(Got0)
+	.short     L(P7Q0)-L(Got0)
+
+	.short     L(P0Q1)-L(Got0)
+	.short     L(P1Q1)-L(Got0)
+	.short     L(P2Q1)-L(Got0)
+	.short     L(P3Q1)-L(Got0)
+	.short     L(P4Q1)-L(Got0)
+	.short     L(P5Q1)-L(Got0)
+	.short     L(P6Q1)-L(Got0)
+	.short     L(P7Q1)-L(Got0)
+
+	.short     L(P0Q2)-L(Got0)
+	.short     L(P1Q2)-L(Got0)
+	.short     L(P2Q2)-L(Got0)
+	.short     L(P3Q2)-L(Got0)
+	.short     L(P4Q2)-L(Got0)
+	.short     L(P5Q2)-L(Got0)
+	.short     L(P6Q2)-L(Got0)
+	.short     L(P7Q2)-L(Got0)
+
+	.short     L(P0Q3)-L(Got0)
+	.short     L(P1Q3)-L(Got0)
+	.short     L(P2Q3)-L(Got0)
+	.short     L(P3Q3)-L(Got0)
+	.short     L(P4Q3)-L(Got0)
+	.short     L(P5Q3)-L(Got0)
+	.short     L(P6Q3)-L(Got0)
+	.short     L(P7Q3)-L(Got0)
+
+	.short     L(P0Q4)-L(Got0)
+	.short     L(P1Q4)-L(Got0)
+	.short     L(P2Q4)-L(Got0)
+	.short     L(P3Q4)-L(Got0)
+	.short     L(P4Q4)-L(Got0)
+	.short     L(P5Q4)-L(Got0)
+	.short     L(P6Q4)-L(Got0)
+	.short     L(P7Q4)-L(Got0)
+
+	.short     L(P0Q5)-L(Got0)
+	.short     L(P1Q5)-L(Got0)
+	.short     L(P2Q5)-L(Got0)
+	.short     L(P3Q5)-L(Got0)
+	.short     L(P4Q5)-L(Got0)
+	.short     L(P5Q5)-L(Got0)
+	.short     L(P6Q5)-L(Got0)
+	.short     L(P7Q5)-L(Got0)
+
+	.short     L(P0Q6)-L(Got0)
+	.short     L(P1Q6)-L(Got0)
+	.short     L(P2Q6)-L(Got0)
+	.short     L(P3Q6)-L(Got0)
+	.short     L(P4Q6)-L(Got0)
+	.short     L(P5Q6)-L(Got0)
+	.short     L(P6Q6)-L(Got0)
+	.short     L(P7Q6)-L(Got0)
+
+	.short     L(P0Q7)-L(Got0)
+	.short     L(P1Q7)-L(Got0)
+	.short     L(P2Q7)-L(Got0)
+	.short     L(P3Q7)-L(Got0)
+	.short     L(P4Q7)-L(Got0)
+	.short     L(P5Q7)-L(Got0)
+	.short     L(P6Q7)-L(Got0)
+	.short     L(P7Q7)-L(Got0)
+
+	.short     L(P0Q8)-L(Got0)
+	.short     L(P1Q8)-L(Got0)
+	.short     L(P2Q8)-L(Got0)
+	.short     L(P3Q8)-L(Got0)
+	.short     L(P4Q8)-L(Got0)
+	.short     L(P5Q8)-L(Got0)
+	.short     L(P6Q8)-L(Got0)
+	.short     L(P7Q8)-L(Got0)
+
+	.short     L(P0Q9)-L(Got0)
+	.short     L(P1Q9)-L(Got0)
+	.short     L(P2Q9)-L(Got0)
+	.short     L(P3Q9)-L(Got0)
+	.short     L(P4Q9)-L(Got0)
+	.short     L(P5Q9)-L(Got0)
+	.short     L(P6Q9)-L(Got0)
+	.short     L(P7Q9)-L(Got0)
+
+	.short     L(P0QA)-L(Got0)
+	.short     L(P1QA)-L(Got0)
+	.short     L(P2QA)-L(Got0)
+	.short     L(P3QA)-L(Got0)
+	.short     L(P4QA)-L(Got0)
+	.short     L(P5QA)-L(Got0)
+	.short     L(P6QA)-L(Got0)
+	.short     L(P7QA)-L(Got0)
+
+	.short     L(P0QB)-L(Got0)
+	.short     L(P1QB)-L(Got0)
+	.short     L(P2QB)-L(Got0)
+	.short     L(P3QB)-L(Got0)
+	.short     L(P4QB)-L(Got0)
+	.short     L(P5QB)-L(Got0)
+	.short     L(P6QB)-L(Got0)
+	.short     L(P7QB)-L(Got0)
+
+	.short     L(P0QC)-L(Got0)
+	.short     L(P1QC)-L(Got0)
+	.short     L(P2QC)-L(Got0)
+	.short     L(P3QC)-L(Got0)
+	.short     L(P4QC)-L(Got0)
+	.short     L(P5QC)-L(Got0)
+	.short     L(P6QC)-L(Got0)
+	.short     L(P7QC)-L(Got0)
+
+	.short     L(P0QD)-L(Got0)
+	.short     L(P1QD)-L(Got0)
+	.short     L(P2QD)-L(Got0)
+	.short     L(P3QD)-L(Got0)
+	.short     L(P4QD)-L(Got0)
+	.short     L(P5QD)-L(Got0)
+	.short     L(P6QD)-L(Got0)
+	.short     L(P7QD)-L(Got0)
+
+	.short     L(P0QE)-L(Got0)
+	.short     L(P1QE)-L(Got0)
+	.short     L(P2QE)-L(Got0)
+	.short     L(P3QE)-L(Got0)
+	.short     L(P4QE)-L(Got0)
+	.short     L(P5QE)-L(Got0)
+	.short     L(P6QE)-L(Got0)
+	.short     L(P7QE)-L(Got0)
+
+	.short     L(P0QF)-L(Got0)
+	.short     L(P1QF)-L(Got0)
+	.short     L(P2QF)-L(Got0)
+	.short     L(P3QF)-L(Got0)
+	.short     L(P4QF)-L(Got0)
+	.short     L(P5QF)-L(Got0)
+	.short     L(P6QF)-L(Got0)
+	.short     L(P7QF)-L(Got0)
+
+	.short     L(P0QG)-L(Got0)
+	.short     L(P1QG)-L(Got0)
+	.short     L(P2QG)-L(Got0)
+	.short     L(P3QG)-L(Got0)
+	.short     L(P4QG)-L(Got0)
+	.short     L(P5QG)-L(Got0)
+	.short     L(P6QG)-L(Got0)
+	.short     L(P7QG)-L(Got0)
+
+	.short     L(P0QH)-L(Got0)
+	.short     L(P1QH)-L(Got0)
+	.short     L(P2QH)-L(Got0)
+	.short     L(P3QH)-L(Got0)
+	.short     L(P4QH)-L(Got0)
+	.short     L(P5QH)-L(Got0)
+	.short     L(P6QH)-L(Got0)
+	.short     L(P7QH)-L(Got0)
+
+	.short     L(P0QI)-L(Got0)
 # ifdef USE_EXTRA_TABLE
-	.int       L(P1QI)-L(setPxQx)
-	.int       L(P2QI)-L(setPxQx)
-	.int       L(P3QI)-L(setPxQx)
-	.int       L(P4QI)-L(setPxQx)
-	.int       L(P5QI)-L(setPxQx)
-	.int       L(P6QI)-L(setPxQx)
-	.int       L(P7QI)-L(setPxQx)
+	.short     L(P1QI)-L(Got0)
+	.short     L(P2QI)-L(Got0)
+	.short     L(P3QI)-L(Got0)
+	.short     L(P4QI)-L(Got0)
+	.short     L(P5QI)-L(Got0)
+	.short     L(P6QI)-L(Got0)
+	.short     L(P7QI)-L(Got0)
 # endif
 #endif
 	.popsection
@@ -499,18 +501,20 @@ L(ck_mem_ops_method):
 # align to 16 byte boundary first
 	#test $0xf,%rdi
 	#jz L(aligned_now)
-	 lea    L(AliPxQx)(%rip),%r11
-	 mov    $0x10,%r10
-	 mov    %rdi,%r9
-	 and    $0xf,%r9
-	 sub    %r9,%r10
-	 and    $0xf,%r10
-	 add    %r10,%rdi
-	 sub    %r10,%r8
+	mov    $0x10,%r10
+	mov    %rdi,%r9
+	and    $0xf,%r9
+	sub    %r9,%r10
+	and    $0xf,%r10
+	add    %r10,%rdi
+	sub    %r10,%r8
 #ifndef PIC
+	lea    L(AliPxQx)(%rip),%r11
 	jmpq   *(%r11,%r10,8)
 #else
-	movslq (%r11,%r10,4),%rcx
+	lea    L(aligned_now)(%rip), %r11
+	lea    L(AliPxQx)(%rip),%rcx
+	movswq (%rcx,%r10,2),%rcx
 	lea    (%rcx,%r11,1),%r11
 	jmpq   *%r11
 #endif
@@ -525,23 +529,23 @@ L(AliPxQx):
 	.quad       L(A4Q1), L(A5Q1), L(A6Q1), L(A7Q1)
 #else
 L(AliPxQx):
-	.int       L(aligned_now)-L(AliPxQx)
-	.int       L(A1Q0)-L(AliPxQx)
-	.int       L(A2Q0)-L(AliPxQx)
-	.int       L(A3Q0)-L(AliPxQx)
-	.int       L(A4Q0)-L(AliPxQx)
-	.int       L(A5Q0)-L(AliPxQx)
-	.int       L(A6Q0)-L(AliPxQx)
-	.int       L(A7Q0)-L(AliPxQx)
-
-	.int       L(A0Q1)-L(AliPxQx)
-	.int       L(A1Q1)-L(AliPxQx)
-	.int       L(A2Q1)-L(AliPxQx)
-	.int       L(A3Q1)-L(AliPxQx)
-	.int       L(A4Q1)-L(AliPxQx)
-	.int       L(A5Q1)-L(AliPxQx)
-	.int       L(A6Q1)-L(AliPxQx)
-	.int       L(A7Q1)-L(AliPxQx)
+	.short     L(aligned_now)-L(aligned_now)
+	.short     L(A1Q0)-L(aligned_now)
+	.short     L(A2Q0)-L(aligned_now)
+	.short     L(A3Q0)-L(aligned_now)
+	.short     L(A4Q0)-L(aligned_now)
+	.short     L(A5Q0)-L(aligned_now)
+	.short     L(A6Q0)-L(aligned_now)
+	.short     L(A7Q0)-L(aligned_now)
+
+	.short     L(A0Q1)-L(aligned_now)
+	.short     L(A1Q1)-L(aligned_now)
+	.short     L(A2Q1)-L(aligned_now)
+	.short     L(A3Q1)-L(aligned_now)
+	.short     L(A4Q1)-L(aligned_now)
+	.short     L(A5Q1)-L(aligned_now)
+	.short     L(A6Q1)-L(aligned_now)
+	.short     L(A7Q1)-L(aligned_now)
 #endif
 	.popsection
 
@@ -634,12 +638,14 @@ L(8byte_move_loop):
 L(8byte_move_skip):
 	andl	$127,%r8d
 	lea    	(%rdi,%r8,1),%rdi
-	lea    	L(setPxQx)(%rip),%r11
 
 #ifndef PIC
+	lea    	L(setPxQx)(%rip),%r11
 	jmpq   	*(%r11,%r8,8) # old scheme remained for nonPIC
 #else
-	movslq	(%r11,%r8,4),%rcx
+	lea    	L(Got0)(%rip),%r11
+	lea	L(setPxQx)(%rip),%rcx
+	movswq	(%rcx,%r8,2),%rcx
 	lea    	(%rcx,%r11,1),%r11
 	jmpq   	*%r11
 #endif
@@ -676,11 +682,13 @@ L(8byte_stos_skip):
 
 	andl	$7,%r8d
 	lea    	(%rdi,%r8,1),%rdi
-	lea    	L(setPxQx)(%rip),%r11
 #ifndef PIC
+	lea    	L(setPxQx)(%rip),%r11
 	jmpq   	*(%r11,%r8,8) # old scheme remained for nonPIC
 #else
-	movslq	(%r11,%r8,4),%rcx
+	lea    	L(Got0)(%rip),%r11
+	lea     L(setPxQx)(%rip),%rcx
+	movswq	(%rcx,%r8,2),%rcx
 	lea	(%rcx,%r11,1),%r11
 	jmpq   	*%r11
 #endif
@@ -722,11 +730,13 @@ L(8byte_nt_move_skip):
 	andl	$127,%r8d
 
 	lea    	(%rdi,%r8,1),%rdi
-	lea    	L(setPxQx)(%rip),%r11
 #ifndef PIC
+	lea    	L(setPxQx)(%rip),%r11
 	jmpq   	*(%r11,%r8,8) # old scheme remained for nonPIC
 #else
-	movslq	(%r11,%r8,4),%rcx
+	lea    	L(Got0)(%rip),%r11
+	lea     L(setPxQx)(%rip),%rcx
+	movswq	(%rcx,%r8,2),%rcx
 	lea    	(%rcx,%r11,1),%r11
 	jmpq   	*%r11
 #endif
@@ -736,15 +746,17 @@ L(SSE_pre):
 	 movd   %rdx,%xmm0
 	 punpcklqdq %xmm0,%xmm0
 
-	 lea    L(SSExDx)(%rip),%r9        # for later after the alignment
 	 cmp    $0xb0,%r8 # 176
 	 jge    L(byte32sse2_pre)
 
 	 add    %r8,%rdi
 #ifndef PIC
+	 lea    L(SSExDx)(%rip),%r9
 	 jmpq   *(%r9,%r8,8)
 #else
-	 movslq    (%r9,%r8,4),%rcx
+	 lea    L(SSE0Q0)(%rip),%r9
+	 lea    L(SSExDx)(%rip),%rcx
+	 movswq (%rcx,%r8,2),%rcx
 	 lea    (%rcx,%r9,1),%r9
 	 jmpq   *%r9
 #endif
@@ -1012,13 +1024,15 @@ L(byte32sse2):
 
 	lea    0x80(%rdi),%rdi
 	jge    L(byte32sse2)
-	lea    L(SSExDx)(%rip),%r11
 	add    %r8,%rdi
 #ifndef PIC
+	lea    L(SSExDx)(%rip),%r11
 	jmpq   *(%r11,%r8,8)
 #else
-	movslq    (%r11,%r8,4),%rcx
-	lea   (%rcx,%r11,1),%r11
+	lea    L(SSE0Q0)(%rip),%r11
+	lea    L(SSExDx)(%rip),%rcx
+	movswq (%rcx,%r8,2),%rcx
+	lea    (%rcx,%r11,1),%r11
 	jmpq   *%r11
 #endif
 
@@ -1044,13 +1058,15 @@ L(sse2_nt_move):
 
 	lea    0x80(%rdi),%rdi
 	jge    L(sse2_nt_move)
-	lea    L(SSExDx)(%rip),%r11
 	sfence
 	add    %r8,%rdi
 #ifndef PIC
+	lea    L(SSExDx)(%rip),%r11
 	jmpq   *(%r11,%r8,8)
 #else
-	movslq (%r11,%r8,4),%rcx
+	lea    L(SSE0Q0)(%rip),%r11
+	lea    L(SSExDx)(%rip),%rcx
+	movswq (%rcx,%r8,2),%rcx
 	lea   (%rcx,%r11,1),%r11
 	jmpq   *%r11
 #endif
@@ -1109,221 +1125,221 @@ L(SSExDx):
 	.quad       L(SSE12QB), L(SSE13QB), L(SSE14QB), L(SSE15QB)
 #else
 L(SSExDx):
-	.int       L(SSE0Q0) -L(SSExDx)
-	.int       L(SSE1Q0) -L(SSExDx)
-	.int       L(SSE2Q0) -L(SSExDx)
-	.int       L(SSE3Q0) -L(SSExDx)
-	.int       L(SSE4Q0) -L(SSExDx)
-	.int       L(SSE5Q0) -L(SSExDx)
-	.int       L(SSE6Q0) -L(SSExDx)
-	.int       L(SSE7Q0) -L(SSExDx)
-
-	.int       L(SSE8Q0) -L(SSExDx)
-	.int       L(SSE9Q0) -L(SSExDx)
-	.int       L(SSE10Q0)-L(SSExDx)
-	.int       L(SSE11Q0)-L(SSExDx)
-	.int       L(SSE12Q0)-L(SSExDx)
-	.int       L(SSE13Q0)-L(SSExDx)
-	.int       L(SSE14Q0)-L(SSExDx)
-	.int       L(SSE15Q0)-L(SSExDx)
-
-	.int       L(SSE0Q1) -L(SSExDx)
-	.int       L(SSE1Q1) -L(SSExDx)
-	.int       L(SSE2Q1) -L(SSExDx)
-	.int       L(SSE3Q1) -L(SSExDx)
-	.int       L(SSE4Q1) -L(SSExDx)
-	.int       L(SSE5Q1) -L(SSExDx)
-	.int       L(SSE6Q1) -L(SSExDx)
-	.int       L(SSE7Q1) -L(SSExDx)
-
-	.int       L(SSE8Q1) -L(SSExDx)
-	.int       L(SSE9Q1) -L(SSExDx)
-	.int       L(SSE10Q1)-L(SSExDx)
-	.int       L(SSE11Q1)-L(SSExDx)
-	.int       L(SSE12Q1)-L(SSExDx)
-	.int       L(SSE13Q1)-L(SSExDx)
-	.int       L(SSE14Q1)-L(SSExDx)
-	.int       L(SSE15Q1)-L(SSExDx)
-
-	.int       L(SSE0Q2) -L(SSExDx)
-	.int       L(SSE1Q2) -L(SSExDx)
-	.int       L(SSE2Q2) -L(SSExDx)
-	.int       L(SSE3Q2) -L(SSExDx)
-	.int       L(SSE4Q2) -L(SSExDx)
-	.int       L(SSE5Q2) -L(SSExDx)
-	.int       L(SSE6Q2) -L(SSExDx)
-	.int       L(SSE7Q2) -L(SSExDx)
-
-	.int       L(SSE8Q2) -L(SSExDx)
-	.int       L(SSE9Q2) -L(SSExDx)
-	.int       L(SSE10Q2)-L(SSExDx)
-	.int       L(SSE11Q2)-L(SSExDx)
-	.int       L(SSE12Q2)-L(SSExDx)
-	.int       L(SSE13Q2)-L(SSExDx)
-	.int       L(SSE14Q2)-L(SSExDx)
-	.int       L(SSE15Q2)-L(SSExDx)
-
-	.int       L(SSE0Q3) -L(SSExDx)
-	.int       L(SSE1Q3) -L(SSExDx)
-	.int       L(SSE2Q3) -L(SSExDx)
-	.int       L(SSE3Q3) -L(SSExDx)
-	.int       L(SSE4Q3) -L(SSExDx)
-	.int       L(SSE5Q3) -L(SSExDx)
-	.int       L(SSE6Q3) -L(SSExDx)
-	.int       L(SSE7Q3) -L(SSExDx)
-
-	.int       L(SSE8Q3) -L(SSExDx)
-	.int       L(SSE9Q3) -L(SSExDx)
-	.int       L(SSE10Q3)-L(SSExDx)
-	.int       L(SSE11Q3)-L(SSExDx)
-	.int       L(SSE12Q3)-L(SSExDx)
-	.int       L(SSE13Q3)-L(SSExDx)
-	.int       L(SSE14Q3)-L(SSExDx)
-	.int       L(SSE15Q3)-L(SSExDx)
-
-	.int       L(SSE0Q4) -L(SSExDx)
-	.int       L(SSE1Q4) -L(SSExDx)
-	.int       L(SSE2Q4) -L(SSExDx)
-	.int       L(SSE3Q4) -L(SSExDx)
-	.int       L(SSE4Q4) -L(SSExDx)
-	.int       L(SSE5Q4) -L(SSExDx)
-	.int       L(SSE6Q4) -L(SSExDx)
-	.int       L(SSE7Q4) -L(SSExDx)
-
-	.int       L(SSE8Q4) -L(SSExDx)
-	.int       L(SSE9Q4) -L(SSExDx)
-	.int       L(SSE10Q4)-L(SSExDx)
-	.int       L(SSE11Q4)-L(SSExDx)
-	.int       L(SSE12Q4)-L(SSExDx)
-	.int       L(SSE13Q4)-L(SSExDx)
-	.int       L(SSE14Q4)-L(SSExDx)
-	.int       L(SSE15Q4)-L(SSExDx)
-
-	.int       L(SSE0Q5) -L(SSExDx)
-	.int       L(SSE1Q5) -L(SSExDx)
-	.int       L(SSE2Q5) -L(SSExDx)
-	.int       L(SSE3Q5) -L(SSExDx)
-	.int       L(SSE4Q5) -L(SSExDx)
-	.int       L(SSE5Q5) -L(SSExDx)
-	.int       L(SSE6Q5) -L(SSExDx)
-	.int       L(SSE7Q5) -L(SSExDx)
-
-	.int       L(SSE8Q5) -L(SSExDx)
-	.int       L(SSE9Q5) -L(SSExDx)
-	.int       L(SSE10Q5)-L(SSExDx)
-	.int       L(SSE11Q5)-L(SSExDx)
-	.int       L(SSE12Q5)-L(SSExDx)
-	.int       L(SSE13Q5)-L(SSExDx)
-	.int       L(SSE14Q5)-L(SSExDx)
-	.int       L(SSE15Q5)-L(SSExDx)
-
-	.int       L(SSE0Q6) -L(SSExDx)
-	.int       L(SSE1Q6) -L(SSExDx)
-	.int       L(SSE2Q6) -L(SSExDx)
-	.int       L(SSE3Q6) -L(SSExDx)
-	.int       L(SSE4Q6) -L(SSExDx)
-	.int       L(SSE5Q6) -L(SSExDx)
-	.int       L(SSE6Q6) -L(SSExDx)
-	.int       L(SSE7Q6) -L(SSExDx)
-
-	.int       L(SSE8Q6) -L(SSExDx)
-	.int       L(SSE9Q6) -L(SSExDx)
-	.int       L(SSE10Q6)-L(SSExDx)
-	.int       L(SSE11Q6)-L(SSExDx)
-	.int       L(SSE12Q6)-L(SSExDx)
-	.int       L(SSE13Q6)-L(SSExDx)
-	.int       L(SSE14Q6)-L(SSExDx)
-	.int       L(SSE15Q6)-L(SSExDx)
-
-	.int       L(SSE0Q7) -L(SSExDx)
-	.int       L(SSE1Q7) -L(SSExDx)
-	.int       L(SSE2Q7) -L(SSExDx)
-	.int       L(SSE3Q7) -L(SSExDx)
-	.int       L(SSE4Q7) -L(SSExDx)
-	.int       L(SSE5Q7) -L(SSExDx)
-	.int       L(SSE6Q7) -L(SSExDx)
-	.int       L(SSE7Q7) -L(SSExDx)
-
-	.int       L(SSE8Q7) -L(SSExDx)
-	.int       L(SSE9Q7) -L(SSExDx)
-	.int       L(SSE10Q7)-L(SSExDx)
-	.int       L(SSE11Q7)-L(SSExDx)
-	.int       L(SSE12Q7)-L(SSExDx)
-	.int       L(SSE13Q7)-L(SSExDx)
-	.int       L(SSE14Q7)-L(SSExDx)
-	.int       L(SSE15Q7)-L(SSExDx)
-
-	.int       L(SSE0Q8) -L(SSExDx)
-	.int       L(SSE1Q8) -L(SSExDx)
-	.int       L(SSE2Q8) -L(SSExDx)
-	.int       L(SSE3Q8) -L(SSExDx)
-	.int       L(SSE4Q8) -L(SSExDx)
-	.int       L(SSE5Q8) -L(SSExDx)
-	.int       L(SSE6Q8) -L(SSExDx)
-	.int       L(SSE7Q8) -L(SSExDx)
-
-	.int       L(SSE8Q8) -L(SSExDx)
-	.int       L(SSE9Q8) -L(SSExDx)
-	.int       L(SSE10Q8)-L(SSExDx)
-	.int       L(SSE11Q8)-L(SSExDx)
-	.int       L(SSE12Q8)-L(SSExDx)
-	.int       L(SSE13Q8)-L(SSExDx)
-	.int       L(SSE14Q8)-L(SSExDx)
-	.int       L(SSE15Q8)-L(SSExDx)
-
-	.int       L(SSE0Q9) -L(SSExDx)
-	.int       L(SSE1Q9) -L(SSExDx)
-	.int       L(SSE2Q9) -L(SSExDx)
-	.int       L(SSE3Q9) -L(SSExDx)
-	.int       L(SSE4Q9) -L(SSExDx)
-	.int       L(SSE5Q9) -L(SSExDx)
-	.int       L(SSE6Q9) -L(SSExDx)
-	.int       L(SSE7Q9) -L(SSExDx)
-
-	.int       L(SSE8Q9) -L(SSExDx)
-	.int       L(SSE9Q9) -L(SSExDx)
-	.int       L(SSE10Q9)-L(SSExDx)
-	.int       L(SSE11Q9)-L(SSExDx)
-	.int       L(SSE12Q9)-L(SSExDx)
-	.int       L(SSE13Q9)-L(SSExDx)
-	.int       L(SSE14Q9)-L(SSExDx)
-	.int       L(SSE15Q9)-L(SSExDx)
-
-	.int       L(SSE0QA) -L(SSExDx)
-	.int       L(SSE1QA) -L(SSExDx)
-	.int       L(SSE2QA) -L(SSExDx)
-	.int       L(SSE3QA) -L(SSExDx)
-	.int       L(SSE4QA) -L(SSExDx)
-	.int       L(SSE5QA) -L(SSExDx)
-	.int       L(SSE6QA) -L(SSExDx)
-	.int       L(SSE7QA) -L(SSExDx)
-
-	.int       L(SSE8QA) -L(SSExDx)
-	.int       L(SSE9QA) -L(SSExDx)
-	.int       L(SSE10QA)-L(SSExDx)
-	.int       L(SSE11QA)-L(SSExDx)
-	.int       L(SSE12QA)-L(SSExDx)
-	.int       L(SSE13QA)-L(SSExDx)
-	.int       L(SSE14QA)-L(SSExDx)
-	.int       L(SSE15QA)-L(SSExDx)
-
-	.int       L(SSE0QB) -L(SSExDx)
-	.int       L(SSE1QB) -L(SSExDx)
-	.int       L(SSE2QB) -L(SSExDx)
-	.int       L(SSE3QB) -L(SSExDx)
-	.int       L(SSE4QB) -L(SSExDx)
-	.int       L(SSE5QB) -L(SSExDx)
-	.int       L(SSE6QB) -L(SSExDx)
-	.int       L(SSE7QB) -L(SSExDx)
-
-	.int       L(SSE8QB) -L(SSExDx)
-	.int       L(SSE9QB) -L(SSExDx)
-	.int       L(SSE10QB)-L(SSExDx)
-	.int       L(SSE11QB)-L(SSExDx)
-	.int       L(SSE12QB)-L(SSExDx)
-	.int       L(SSE13QB)-L(SSExDx)
-	.int       L(SSE14QB)-L(SSExDx)
-	.int       L(SSE15QB)-L(SSExDx)
+	.short     L(SSE0Q0) -L(SSE0Q0)
+	.short     L(SSE1Q0) -L(SSE0Q0)
+	.short     L(SSE2Q0) -L(SSE0Q0)
+	.short     L(SSE3Q0) -L(SSE0Q0)
+	.short     L(SSE4Q0) -L(SSE0Q0)
+	.short     L(SSE5Q0) -L(SSE0Q0)
+	.short     L(SSE6Q0) -L(SSE0Q0)
+	.short     L(SSE7Q0) -L(SSE0Q0)
+
+	.short     L(SSE8Q0) -L(SSE0Q0)
+	.short     L(SSE9Q0) -L(SSE0Q0)
+	.short     L(SSE10Q0)-L(SSE0Q0)
+	.short     L(SSE11Q0)-L(SSE0Q0)
+	.short     L(SSE12Q0)-L(SSE0Q0)
+	.short     L(SSE13Q0)-L(SSE0Q0)
+	.short     L(SSE14Q0)-L(SSE0Q0)
+	.short     L(SSE15Q0)-L(SSE0Q0)
+
+	.short     L(SSE0Q1) -L(SSE0Q0)
+	.short     L(SSE1Q1) -L(SSE0Q0)
+	.short     L(SSE2Q1) -L(SSE0Q0)
+	.short     L(SSE3Q1) -L(SSE0Q0)
+	.short     L(SSE4Q1) -L(SSE0Q0)
+	.short     L(SSE5Q1) -L(SSE0Q0)
+	.short     L(SSE6Q1) -L(SSE0Q0)
+	.short     L(SSE7Q1) -L(SSE0Q0)
+
+	.short     L(SSE8Q1) -L(SSE0Q0)
+	.short     L(SSE9Q1) -L(SSE0Q0)
+	.short     L(SSE10Q1)-L(SSE0Q0)
+	.short     L(SSE11Q1)-L(SSE0Q0)
+	.short     L(SSE12Q1)-L(SSE0Q0)
+	.short     L(SSE13Q1)-L(SSE0Q0)
+	.short     L(SSE14Q1)-L(SSE0Q0)
+	.short     L(SSE15Q1)-L(SSE0Q0)
+
+	.short     L(SSE0Q2) -L(SSE0Q0)
+	.short     L(SSE1Q2) -L(SSE0Q0)
+	.short     L(SSE2Q2) -L(SSE0Q0)
+	.short     L(SSE3Q2) -L(SSE0Q0)
+	.short     L(SSE4Q2) -L(SSE0Q0)
+	.short     L(SSE5Q2) -L(SSE0Q0)
+	.short     L(SSE6Q2) -L(SSE0Q0)
+	.short     L(SSE7Q2) -L(SSE0Q0)
+
+	.short     L(SSE8Q2) -L(SSE0Q0)
+	.short     L(SSE9Q2) -L(SSE0Q0)
+	.short     L(SSE10Q2)-L(SSE0Q0)
+	.short     L(SSE11Q2)-L(SSE0Q0)
+	.short     L(SSE12Q2)-L(SSE0Q0)
+	.short     L(SSE13Q2)-L(SSE0Q0)
+	.short     L(SSE14Q2)-L(SSE0Q0)
+	.short     L(SSE15Q2)-L(SSE0Q0)
+
+	.short     L(SSE0Q3) -L(SSE0Q0)
+	.short     L(SSE1Q3) -L(SSE0Q0)
+	.short     L(SSE2Q3) -L(SSE0Q0)
+	.short     L(SSE3Q3) -L(SSE0Q0)
+	.short     L(SSE4Q3) -L(SSE0Q0)
+	.short     L(SSE5Q3) -L(SSE0Q0)
+	.short     L(SSE6Q3) -L(SSE0Q0)
+	.short     L(SSE7Q3) -L(SSE0Q0)
+
+	.short     L(SSE8Q3) -L(SSE0Q0)
+	.short     L(SSE9Q3) -L(SSE0Q0)
+	.short     L(SSE10Q3)-L(SSE0Q0)
+	.short     L(SSE11Q3)-L(SSE0Q0)
+	.short     L(SSE12Q3)-L(SSE0Q0)
+	.short     L(SSE13Q3)-L(SSE0Q0)
+	.short     L(SSE14Q3)-L(SSE0Q0)
+	.short     L(SSE15Q3)-L(SSE0Q0)
+
+	.short     L(SSE0Q4) -L(SSE0Q0)
+	.short     L(SSE1Q4) -L(SSE0Q0)
+	.short     L(SSE2Q4) -L(SSE0Q0)
+	.short     L(SSE3Q4) -L(SSE0Q0)
+	.short     L(SSE4Q4) -L(SSE0Q0)
+	.short     L(SSE5Q4) -L(SSE0Q0)
+	.short     L(SSE6Q4) -L(SSE0Q0)
+	.short     L(SSE7Q4) -L(SSE0Q0)
+
+	.short     L(SSE8Q4) -L(SSE0Q0)
+	.short     L(SSE9Q4) -L(SSE0Q0)
+	.short     L(SSE10Q4)-L(SSE0Q0)
+	.short     L(SSE11Q4)-L(SSE0Q0)
+	.short     L(SSE12Q4)-L(SSE0Q0)
+	.short     L(SSE13Q4)-L(SSE0Q0)
+	.short     L(SSE14Q4)-L(SSE0Q0)
+	.short     L(SSE15Q4)-L(SSE0Q0)
+
+	.short     L(SSE0Q5) -L(SSE0Q0)
+	.short     L(SSE1Q5) -L(SSE0Q0)
+	.short     L(SSE2Q5) -L(SSE0Q0)
+	.short     L(SSE3Q5) -L(SSE0Q0)
+	.short     L(SSE4Q5) -L(SSE0Q0)
+	.short     L(SSE5Q5) -L(SSE0Q0)
+	.short     L(SSE6Q5) -L(SSE0Q0)
+	.short     L(SSE7Q5) -L(SSE0Q0)
+
+	.short     L(SSE8Q5) -L(SSE0Q0)
+	.short     L(SSE9Q5) -L(SSE0Q0)
+	.short     L(SSE10Q5)-L(SSE0Q0)
+	.short     L(SSE11Q5)-L(SSE0Q0)
+	.short     L(SSE12Q5)-L(SSE0Q0)
+	.short     L(SSE13Q5)-L(SSE0Q0)
+	.short     L(SSE14Q5)-L(SSE0Q0)
+	.short     L(SSE15Q5)-L(SSE0Q0)
+
+	.short     L(SSE0Q6) -L(SSE0Q0)
+	.short     L(SSE1Q6) -L(SSE0Q0)
+	.short     L(SSE2Q6) -L(SSE0Q0)
+	.short     L(SSE3Q6) -L(SSE0Q0)
+	.short     L(SSE4Q6) -L(SSE0Q0)
+	.short     L(SSE5Q6) -L(SSE0Q0)
+	.short     L(SSE6Q6) -L(SSE0Q0)
+	.short     L(SSE7Q6) -L(SSE0Q0)
+
+	.short     L(SSE8Q6) -L(SSE0Q0)
+	.short     L(SSE9Q6) -L(SSE0Q0)
+	.short     L(SSE10Q6)-L(SSE0Q0)
+	.short     L(SSE11Q6)-L(SSE0Q0)
+	.short     L(SSE12Q6)-L(SSE0Q0)
+	.short     L(SSE13Q6)-L(SSE0Q0)
+	.short     L(SSE14Q6)-L(SSE0Q0)
+	.short     L(SSE15Q6)-L(SSE0Q0)
+
+	.short     L(SSE0Q7) -L(SSE0Q0)
+	.short     L(SSE1Q7) -L(SSE0Q0)
+	.short     L(SSE2Q7) -L(SSE0Q0)
+	.short     L(SSE3Q7) -L(SSE0Q0)
+	.short     L(SSE4Q7) -L(SSE0Q0)
+	.short     L(SSE5Q7) -L(SSE0Q0)
+	.short     L(SSE6Q7) -L(SSE0Q0)
+	.short     L(SSE7Q7) -L(SSE0Q0)
+
+	.short     L(SSE8Q7) -L(SSE0Q0)
+	.short     L(SSE9Q7) -L(SSE0Q0)
+	.short     L(SSE10Q7)-L(SSE0Q0)
+	.short     L(SSE11Q7)-L(SSE0Q0)
+	.short     L(SSE12Q7)-L(SSE0Q0)
+	.short     L(SSE13Q7)-L(SSE0Q0)
+	.short     L(SSE14Q7)-L(SSE0Q0)
+	.short     L(SSE15Q7)-L(SSE0Q0)
+
+	.short     L(SSE0Q8) -L(SSE0Q0)
+	.short     L(SSE1Q8) -L(SSE0Q0)
+	.short     L(SSE2Q8) -L(SSE0Q0)
+	.short     L(SSE3Q8) -L(SSE0Q0)
+	.short     L(SSE4Q8) -L(SSE0Q0)
+	.short     L(SSE5Q8) -L(SSE0Q0)
+	.short     L(SSE6Q8) -L(SSE0Q0)
+	.short     L(SSE7Q8) -L(SSE0Q0)
+
+	.short     L(SSE8Q8) -L(SSE0Q0)
+	.short     L(SSE9Q8) -L(SSE0Q0)
+	.short     L(SSE10Q8)-L(SSE0Q0)
+	.short     L(SSE11Q8)-L(SSE0Q0)
+	.short     L(SSE12Q8)-L(SSE0Q0)
+	.short     L(SSE13Q8)-L(SSE0Q0)
+	.short     L(SSE14Q8)-L(SSE0Q0)
+	.short     L(SSE15Q8)-L(SSE0Q0)
+
+	.short     L(SSE0Q9) -L(SSE0Q0)
+	.short     L(SSE1Q9) -L(SSE0Q0)
+	.short     L(SSE2Q9) -L(SSE0Q0)
+	.short     L(SSE3Q9) -L(SSE0Q0)
+	.short     L(SSE4Q9) -L(SSE0Q0)
+	.short     L(SSE5Q9) -L(SSE0Q0)
+	.short     L(SSE6Q9) -L(SSE0Q0)
+	.short     L(SSE7Q9) -L(SSE0Q0)
+
+	.short     L(SSE8Q9) -L(SSE0Q0)
+	.short     L(SSE9Q9) -L(SSE0Q0)
+	.short     L(SSE10Q9)-L(SSE0Q0)
+	.short     L(SSE11Q9)-L(SSE0Q0)
+	.short     L(SSE12Q9)-L(SSE0Q0)
+	.short     L(SSE13Q9)-L(SSE0Q0)
+	.short     L(SSE14Q9)-L(SSE0Q0)
+	.short     L(SSE15Q9)-L(SSE0Q0)
+
+	.short     L(SSE0QA) -L(SSE0Q0)
+	.short     L(SSE1QA) -L(SSE0Q0)
+	.short     L(SSE2QA) -L(SSE0Q0)
+	.short     L(SSE3QA) -L(SSE0Q0)
+	.short     L(SSE4QA) -L(SSE0Q0)
+	.short     L(SSE5QA) -L(SSE0Q0)
+	.short     L(SSE6QA) -L(SSE0Q0)
+	.short     L(SSE7QA) -L(SSE0Q0)
+
+	.short     L(SSE8QA) -L(SSE0Q0)
+	.short     L(SSE9QA) -L(SSE0Q0)
+	.short     L(SSE10QA)-L(SSE0Q0)
+	.short     L(SSE11QA)-L(SSE0Q0)
+	.short     L(SSE12QA)-L(SSE0Q0)
+	.short     L(SSE13QA)-L(SSE0Q0)
+	.short     L(SSE14QA)-L(SSE0Q0)
+	.short     L(SSE15QA)-L(SSE0Q0)
+
+	.short     L(SSE0QB) -L(SSE0Q0)
+	.short     L(SSE1QB) -L(SSE0Q0)
+	.short     L(SSE2QB) -L(SSE0Q0)
+	.short     L(SSE3QB) -L(SSE0Q0)
+	.short     L(SSE4QB) -L(SSE0Q0)
+	.short     L(SSE5QB) -L(SSE0Q0)
+	.short     L(SSE6QB) -L(SSE0Q0)
+	.short     L(SSE7QB) -L(SSE0Q0)
+
+	.short     L(SSE8QB) -L(SSE0Q0)
+	.short     L(SSE9QB) -L(SSE0Q0)
+	.short     L(SSE10QB)-L(SSE0Q0)
+	.short     L(SSE11QB)-L(SSE0Q0)
+	.short     L(SSE12QB)-L(SSE0Q0)
+	.short     L(SSE13QB)-L(SSE0Q0)
+	.short     L(SSE14QB)-L(SSE0Q0)
+	.short     L(SSE15QB)-L(SSE0Q0)
 #endif
 	.popsection