about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog15
-rw-r--r--elf/dl-lookup.c13
-rw-r--r--elf/dl-runtime.c8
-rw-r--r--nptl/ChangeLog8
-rw-r--r--nptl/sysdeps/x86_64/tcb-offsets.sym1
-rw-r--r--nptl/sysdeps/x86_64/tls.h73
-rw-r--r--stdio-common/scanf15.c1
-rw-r--r--stdio-common/scanf17.c1
-rw-r--r--sysdeps/x86_64/dl-trampoline.S82
-rwxr-xr-xsysdeps/x86_64/tst-xmmymm.sh7
10 files changed, 188 insertions, 21 deletions
diff --git a/ChangeLog b/ChangeLog
index ff34e5f5d5..23e6906d06 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,18 @@
+2009-07-29  Ulrich Drepper  <drepper@redhat.com>
+
+	* elf/dl-runtime.c (_dl_fixup): Indicate before _dl_lookup_symbol_x
+	call that registers used in calling conventions need to be preserved.
+	* elf/dl-lookup.c (do_lookup_x): Use RTLD_*_FOREIGN_CALL macros
+	to preserve register content if necessary.
+	* sysdeps/x86_64/dl-trampoline.S (_dl_x86_64_save_sse): New function.
+	(_dl_x86_64_restore_sse): New function.
+	* sysdeps/x86_64/tst-xmmymm.sh: There is now one more function that
+	is allowed to modify xmm/ymm registers.
+
+	* stdio-common/scanf15.c: Undefine _LIBC.  We want to test from an
+	application's perspective.
+	* stdio-common/scanf17.c: Likewise.
+
 2009-07-28  Ulrich Drepper  <drepper@redhat.com>
 
 	* csu/libc-tls.c (__libc_setup_tls) [TLS_TCB_AT_TP]: Don't add TCB
diff --git a/elf/dl-lookup.c b/elf/dl-lookup.c
index 1d68d67a35..56724c9b4d 100644
--- a/elf/dl-lookup.c
+++ b/elf/dl-lookup.c
@@ -380,6 +380,10 @@ do_lookup_x (const char *undef_name, uint_fast32_t new_hash,
 		  if (size * 3 <= tab->n_elements * 4)
 		    {
 		      /* Expand the table.  */
+#ifdef RTLD_CHECK_FOREIGN_CALL
+		      /* This must not happen during runtime relocations.  */
+		      assert (!RTLD_CHECK_FOREIGN_CALL);
+#endif
 		      size_t newsize = _dl_higher_prime_number (size + 1);
 		      struct unique_sym *newentries
 			= calloc (sizeof (struct unique_sym), newsize);
@@ -405,6 +409,11 @@ do_lookup_x (const char *undef_name, uint_fast32_t new_hash,
 		}
 	      else
 		{
+#ifdef RTLD_CHECK_FOREIGN_CALL
+		  /* This must not happen during runtime relocations.  */
+		  assert (!RTLD_CHECK_FOREIGN_CALL);
+#endif
+
 #define INITIAL_NUNIQUE_SYM_TABLE 31
 		  size = INITIAL_NUNIQUE_SYM_TABLE;
 		  entries = calloc (sizeof (struct unique_sym), size);
@@ -600,6 +609,10 @@ add_dependency (struct link_map *undef_map, struct link_map *map, int flags)
 	  unsigned int max
 	    = undef_map->l_reldepsmax ? undef_map->l_reldepsmax * 2 : 10;
 
+#ifdef RTLD_PREPARE_FOREIGN_CALL
+	  RTLD_PREPARE_FOREIGN_CALL;
+#endif
+
 	  newp = malloc (sizeof (*newp) + max * sizeof (struct link_map *));
 	  if (newp == NULL)
 	    {
diff --git a/elf/dl-runtime.c b/elf/dl-runtime.c
index 0eb7d4e3b9..a52120d121 100644
--- a/elf/dl-runtime.c
+++ b/elf/dl-runtime.c
@@ -111,6 +111,10 @@ _dl_fixup (
 	  flags |= DL_LOOKUP_GSCOPE_LOCK;
 	}
 
+#ifdef RTLD_ENABLE_FOREIGN_CALL
+      RTLD_ENABLE_FOREIGN_CALL;
+#endif
+
       result = _dl_lookup_symbol_x (strtab + sym->st_name, l, &sym, l->l_scope,
 				    version, ELF_RTYPE_CLASS_PLT, flags, NULL);
 
@@ -118,6 +122,10 @@ _dl_fixup (
       if (!RTLD_SINGLE_THREAD_P)
 	THREAD_GSCOPE_RESET_FLAG ();
 
+#ifdef RTLD_FINALIZE_FOREIGN_CALL
+      RTLD_FINALIZE_FOREIGN_CALL;
+#endif
+
       /* Currently result contains the base load address (or link map)
 	 of the object that defines sym.  Now add in the symbol
 	 offset.  */
diff --git a/nptl/ChangeLog b/nptl/ChangeLog
index 8f37da7936..24fd28a0dc 100644
--- a/nptl/ChangeLog
+++ b/nptl/ChangeLog
@@ -1,3 +1,11 @@
+2009-07-29  Ulrich Drepper  <drepper@redhat.com>
+
+	* sysdeps/x86_64/tls.h (tcbhead_t): Add room for SSE registers the
+	dynamic linker might have to save.  Define RTLD_CHECK_FOREIGN_CALL,
+	RTLD_ENABLE_FOREIGN_CALL, RTLD_PREPARE_FOREIGN_CALL, and
+	RTLD_FINALIZE_FOREIGN_CALL.  Pretty printing.
+	* sysdeps/x86_64/tcb-offsets.sym: Add RTLD_SAVESPACE_SSE.
+
 2009-07-28  Ulrich Drepper  <drepper@redhat.com>
 
 	* pthread_mutex_lock.c [NO_INCR] (__pthread_mutex_cond_lock_adjust):
diff --git a/nptl/sysdeps/x86_64/tcb-offsets.sym b/nptl/sysdeps/x86_64/tcb-offsets.sym
index 1c70c6bde7..51f35c61cf 100644
--- a/nptl/sysdeps/x86_64/tcb-offsets.sym
+++ b/nptl/sysdeps/x86_64/tcb-offsets.sym
@@ -15,3 +15,4 @@ VGETCPU_CACHE_OFFSET	offsetof (tcbhead_t, vgetcpu_cache)
 #ifndef __ASSUME_PRIVATE_FUTEX
 PRIVATE_FUTEX		offsetof (tcbhead_t, private_futex)
 #endif
+RTLD_SAVESPACE_SSE	offsetof (tcbhead_t, rtld_savespace_sse)
diff --git a/nptl/sysdeps/x86_64/tls.h b/nptl/sysdeps/x86_64/tls.h
index ea89f3b1a2..a51b77052a 100644
--- a/nptl/sysdeps/x86_64/tls.h
+++ b/nptl/sysdeps/x86_64/tls.h
@@ -29,6 +29,7 @@
 # include <sysdep.h>
 # include <kernel-features.h>
 # include <bits/wordsize.h>
+# include <xmmintrin.h>
 
 
 /* Type for the dtv.  */
@@ -55,16 +56,23 @@ typedef struct
   uintptr_t stack_guard;
   uintptr_t pointer_guard;
   unsigned long int vgetcpu_cache[2];
-#ifndef __ASSUME_PRIVATE_FUTEX
+# ifndef __ASSUME_PRIVATE_FUTEX
   int private_futex;
-#else
+# else
   int __unused1;
-#endif
-#if __WORDSIZE == 64
-  int __pad1;
-#endif
+# endif
+# if __WORDSIZE == 64
+  int rtld_must_xmm_save;
+# endif
   /* Reservation of some values for the TM ABI.  */
   void *__private_tm[5];
+# if __WORDSIZE == 64
+  long int __unused2;
+  /* Have space for the post-AVX register size.  */
+  __m128 rtld_savespace_sse[8][4];
+
+  void *__padding[8];
+# endif
 } tcbhead_t;
 
 #else /* __ASSEMBLER__ */
@@ -298,7 +306,7 @@ typedef struct
 
 
 /* Atomic compare and exchange on TLS, returning old value.  */
-#define THREAD_ATOMIC_CMPXCHG_VAL(descr, member, newval, oldval) \
+# define THREAD_ATOMIC_CMPXCHG_VAL(descr, member, newval, oldval) \
   ({ __typeof (descr->member) __ret;					      \
      __typeof (oldval) __old = (oldval);				      \
      if (sizeof (descr->member) == 4)					      \
@@ -313,7 +321,7 @@ typedef struct
 
 
 /* Atomic logical and.  */
-#define THREAD_ATOMIC_AND(descr, member, val) \
+# define THREAD_ATOMIC_AND(descr, member, val) \
   (void) ({ if (sizeof ((descr)->member) == 4)				      \
 	      asm volatile (LOCK_PREFIX "andl %1, %%fs:%P0"		      \
 			    :: "i" (offsetof (struct pthread, member)),	      \
@@ -324,7 +332,7 @@ typedef struct
 
 
 /* Atomic set bit.  */
-#define THREAD_ATOMIC_BIT_SET(descr, member, bit) \
+# define THREAD_ATOMIC_BIT_SET(descr, member, bit) \
   (void) ({ if (sizeof ((descr)->member) == 4)				      \
 	      asm volatile (LOCK_PREFIX "orl %1, %%fs:%P0"		      \
 			    :: "i" (offsetof (struct pthread, member)),	      \
@@ -334,7 +342,7 @@ typedef struct
 	      abort (); })
 
 
-#define CALL_THREAD_FCT(descr) \
+# define CALL_THREAD_FCT(descr) \
   ({ void *__res;							      \
      asm volatile ("movq %%fs:%P2, %%rdi\n\t"				      \
 		   "callq *%%fs:%P1"					      \
@@ -355,18 +363,18 @@ typedef struct
 
 
 /* Set the pointer guard field in the TCB head.  */
-#define THREAD_SET_POINTER_GUARD(value) \
+# define THREAD_SET_POINTER_GUARD(value) \
   THREAD_SETMEM (THREAD_SELF, header.pointer_guard, value)
-#define THREAD_COPY_POINTER_GUARD(descr) \
+# define THREAD_COPY_POINTER_GUARD(descr) \
   ((descr)->header.pointer_guard					      \
    = THREAD_GETMEM (THREAD_SELF, header.pointer_guard))
 
 
 /* Get and set the global scope generation counter in the TCB head.  */
-#define THREAD_GSCOPE_FLAG_UNUSED 0
-#define THREAD_GSCOPE_FLAG_USED   1
-#define THREAD_GSCOPE_FLAG_WAIT   2
-#define THREAD_GSCOPE_RESET_FLAG() \
+# define THREAD_GSCOPE_FLAG_UNUSED 0
+# define THREAD_GSCOPE_FLAG_USED   1
+# define THREAD_GSCOPE_FLAG_WAIT   2
+# define THREAD_GSCOPE_RESET_FLAG() \
   do									      \
     { int __res;							      \
       asm volatile ("xchgl %0, %%fs:%P1"				      \
@@ -377,11 +385,40 @@ typedef struct
 	lll_futex_wake (&THREAD_SELF->header.gscope_flag, 1, LLL_PRIVATE);    \
     }									      \
   while (0)
-#define THREAD_GSCOPE_SET_FLAG() \
+# define THREAD_GSCOPE_SET_FLAG() \
   THREAD_SETMEM (THREAD_SELF, header.gscope_flag, THREAD_GSCOPE_FLAG_USED)
-#define THREAD_GSCOPE_WAIT() \
+# define THREAD_GSCOPE_WAIT() \
   GL(dl_wait_lookup_done) ()
 
+
+# ifdef SHARED
+/* Defined in dl-trampoline.S.  */
+extern void _dl_x86_64_save_sse (void);
+extern void _dl_x86_64_restore_sse (void);
+
+# define RTLD_CHECK_FOREIGN_CALL \
+  (THREAD_GETMEM (THREAD_SELF, header.rtld_must_xmm_save) != 0)
+
+#  define RTLD_ENABLE_FOREIGN_CALL \
+  THREAD_SETMEM (THREAD_SELF, header.rtld_must_xmm_save, 1)
+
+#  define RTLD_PREPARE_FOREIGN_CALL \
+  do if (THREAD_GETMEM (THREAD_SELF, header.rtld_must_xmm_save))	      \
+    {									      \
+      _dl_x86_64_save_sse ();						      \
+      THREAD_SETMEM (THREAD_SELF, header.rtld_must_xmm_save, 0);	      \
+    }									      \
+  while (0)
+
+#  define RTLD_FINALIZE_FOREIGN_CALL \
+  do {									      \
+    if (THREAD_GETMEM (THREAD_SELF, header.rtld_must_xmm_save) == 0)	      \
+      _dl_x86_64_restore_sse ();					      \
+    THREAD_SETMEM (THREAD_SELF, header.rtld_must_xmm_save, 0);		      \
+  } while (0)
+# endif
+
+
 #endif /* __ASSEMBLER__ */
 
 #endif	/* tls.h */
diff --git a/stdio-common/scanf15.c b/stdio-common/scanf15.c
index c56715c486..851466b3a9 100644
--- a/stdio-common/scanf15.c
+++ b/stdio-common/scanf15.c
@@ -1,5 +1,6 @@
 #undef _GNU_SOURCE
 #define _XOPEN_SOURCE 600
+#undef _LIBC
 /* The following macro definitions are a hack.  They word around disabling
    the GNU extension while still using a few internal headers.  */
 #define u_char unsigned char
diff --git a/stdio-common/scanf17.c b/stdio-common/scanf17.c
index ee9024f9b7..4478a7022f 100644
--- a/stdio-common/scanf17.c
+++ b/stdio-common/scanf17.c
@@ -1,5 +1,6 @@
 #undef _GNU_SOURCE
 #define _XOPEN_SOURCE 600
+#undef _LIBC
 /* The following macro definitions are a hack.  They word around disabling
    the GNU extension while still using a few internal headers.  */
 #define u_char unsigned char
diff --git a/sysdeps/x86_64/dl-trampoline.S b/sysdeps/x86_64/dl-trampoline.S
index 49d239f075..7ecf1b0c64 100644
--- a/sysdeps/x86_64/dl-trampoline.S
+++ b/sysdeps/x86_64/dl-trampoline.S
@@ -390,3 +390,85 @@ L(no_avx4):
 	cfi_endproc
 	.size _dl_runtime_profile, .-_dl_runtime_profile
 #endif
+
+
+#ifdef SHARED
+	.globl _dl_x86_64_save_sse
+	.type _dl_x86_64_save_sse, @function
+	.align 16
+	cfi_startproc
+_dl_x86_64_save_sse:
+# ifdef HAVE_AVX_SUPPORT
+	cmpl	$0, L(have_avx)(%rip)
+	jne	1f
+	movq	%rbx, %r11		# Save rbx
+	movl	$1, %eax
+	cpuid
+	movq	%r11,%rbx		# Restore rbx
+	movl	$1, %eax
+	testl	$(1 << 28), %ecx
+	jne	2f
+	negl	%eax
+2:	movl	%eax, L(have_avx)(%rip)
+	cmpl	$0, %eax
+
+1:	js	L(no_avx5)
+
+#  define YMM_SIZE 32
+	vmovdqa %ymm0, %fs:RTLD_SAVESPACE_SSE+0*YMM_SIZE
+	vmovdqa %ymm1, %fs:RTLD_SAVESPACE_SSE+1*YMM_SIZE
+	vmovdqa %ymm2, %fs:RTLD_SAVESPACE_SSE+2*YMM_SIZE
+	vmovdqa %ymm3, %fs:RTLD_SAVESPACE_SSE+3*YMM_SIZE
+	vmovdqa %ymm4, %fs:RTLD_SAVESPACE_SSE+4*YMM_SIZE
+	vmovdqa %ymm5, %fs:RTLD_SAVESPACE_SSE+5*YMM_SIZE
+	vmovdqa %ymm6, %fs:RTLD_SAVESPACE_SSE+6*YMM_SIZE
+	vmovdqa %ymm7, %fs:RTLD_SAVESPACE_SSE+7*YMM_SIZE
+	ret
+L(no_avx5):
+# endif
+# define YMM_SIZE 16
+	movdqa	%xmm0, %fs:RTLD_SAVESPACE_SSE+0*XMM_SIZE
+	movdqa	%xmm1, %fs:RTLD_SAVESPACE_SSE+1*XMM_SIZE
+	movdqa	%xmm2, %fs:RTLD_SAVESPACE_SSE+2*XMM_SIZE
+	movdqa	%xmm3, %fs:RTLD_SAVESPACE_SSE+3*XMM_SIZE
+	movdqa	%xmm4, %fs:RTLD_SAVESPACE_SSE+4*XMM_SIZE
+	movdqa	%xmm5, %fs:RTLD_SAVESPACE_SSE+5*XMM_SIZE
+	movdqa	%xmm6, %fs:RTLD_SAVESPACE_SSE+6*XMM_SIZE
+	movdqa	%xmm7, %fs:RTLD_SAVESPACE_SSE+7*XMM_SIZE
+	ret
+	cfi_endproc
+	.size _dl_x86_64_save_sse, .-_dl_x86_64_save_sse
+
+
+	.globl _dl_x86_64_restore_sse
+	.type _dl_x86_64_restore_sse, @function
+	.align 16
+	cfi_startproc
+_dl_x86_64_restore_sse:
+# ifdef HAVE_AVX_SUPPORT
+	cmpl	$0, L(have_avx)(%rip)
+	js	L(no_avx6)
+
+	vmovdqa %fs:RTLD_SAVESPACE_SSE+0*YMM_SIZE, %ymm0
+	vmovdqa %fs:RTLD_SAVESPACE_SSE+1*YMM_SIZE, %ymm1
+	vmovdqa %fs:RTLD_SAVESPACE_SSE+2*YMM_SIZE, %ymm2
+	vmovdqa %fs:RTLD_SAVESPACE_SSE+3*YMM_SIZE, %ymm3
+	vmovdqa %fs:RTLD_SAVESPACE_SSE+4*YMM_SIZE, %ymm4
+	vmovdqa %fs:RTLD_SAVESPACE_SSE+5*YMM_SIZE, %ymm5
+	vmovdqa %fs:RTLD_SAVESPACE_SSE+6*YMM_SIZE, %ymm6
+	vmovdqa %fs:RTLD_SAVESPACE_SSE+7*YMM_SIZE, %ymm7
+	ret
+L(no_avx6):
+# endif
+	movdqa	%fs:RTLD_SAVESPACE_SSE+0*XMM_SIZE, %xmm0
+	movdqa	%fs:RTLD_SAVESPACE_SSE+1*XMM_SIZE, %xmm1
+	movdqa	%fs:RTLD_SAVESPACE_SSE+2*XMM_SIZE, %xmm2
+	movdqa	%fs:RTLD_SAVESPACE_SSE+3*XMM_SIZE, %xmm3
+	movdqa	%fs:RTLD_SAVESPACE_SSE+4*XMM_SIZE, %xmm4
+	movdqa	%fs:RTLD_SAVESPACE_SSE+5*XMM_SIZE, %xmm5
+	movdqa	%fs:RTLD_SAVESPACE_SSE+6*XMM_SIZE, %xmm6
+	movdqa	%fs:RTLD_SAVESPACE_SSE+7*XMM_SIZE, %xmm7
+	ret
+	cfi_endproc
+	.size _dl_x86_64_restore_sse, .-_dl_x86_64_restore_sse
+#endif
diff --git a/sysdeps/x86_64/tst-xmmymm.sh b/sysdeps/x86_64/tst-xmmymm.sh
index a576e7da0d..da8af7e686 100755
--- a/sysdeps/x86_64/tst-xmmymm.sh
+++ b/sysdeps/x86_64/tst-xmmymm.sh
@@ -59,10 +59,11 @@ for f in $tocheck; do
   objdump -d "$objpfx"../*/"$f" |
   awk 'BEGIN { last="" } /^[[:xdigit:]]* <[_[:alnum:]]*>:$/ { fct=substr($2, 2, length($2)-3) } /,%[xy]mm[[:digit:]]*$/ { if (last != fct) { print fct; last=fct} }' |
   while read fct; do
-    if test "$fct" != "_dl_runtime_profile"; then
-      echo "function $fct in $f modifies xmm/ymm" >> "$tmp"
-      result=1
+    if test "$fct" = "_dl_runtime_profile" -o "$fct" = "_dl_x86_64_restore_sse"; then
+      continue;
     fi
+    echo "function $fct in $f modifies xmm/ymm" >> "$tmp"
+    result=1
   done
 done