about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog16
-rw-r--r--crypt/crypt_util.c24
-rw-r--r--nptl/ChangeLog8
-rw-r--r--nptl/sysdeps/pthread/unwind-forcedunwind.c14
-rw-r--r--sysdeps/x86_64/dl-trampoline.S25
5 files changed, 71 insertions, 16 deletions
diff --git a/ChangeLog b/ChangeLog
index 9e03d17257..d149838948 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,19 @@
+2011-07-22  Ulrich Drepper  <drepper@gmail.com>
+
+	* sysdeps/x86_64/dl-trampoline.S (_dl_runtime_profile): Fix test for
+	OSXSAVE.
+	(_dl_x86_64_save_sse): Likewise.
+
+	* crypt/crypt_util.c (__init_des_r): Optimize memset calls.
+
+	* crypt/crypt_util.c (__init_des_r): Add read barrier as well.
+
+2011-07-21  Andreas Schwab  <schwab@redhat.com>
+
+	* sysdeps/x86_64/dl-trampoline.S (_dl_runtime_profile): Fix last
+	change.
+	(_dl_x86_64_save_sse): Use correct AVX check.
+
 2011-07-21  Liubov Dmitrieva  <liubov.dmitrieva@gmail.com>
 
 	* sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S: Fix overfow
diff --git a/crypt/crypt_util.c b/crypt/crypt_util.c
index 7999a30672..04b262d9c8 100644
--- a/crypt/crypt_util.c
+++ b/crypt/crypt_util.c
@@ -1,7 +1,8 @@
 /*
  * UFC-crypt: ultra fast crypt(3) implementation
  *
- * Copyright (C) 1991-1993,1996-1998,2000,2010 Free Software Foundation, Inc.
+ * Copyright (C) 1991-1993,1996-1998,2000,2010,2011
+ * Free Software Foundation, Inc.
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
@@ -475,7 +476,8 @@ __init_des_r(__data)
 small_tables_done:
     __libc_lock_unlock(_ufc_tables_lock);
 #endif
-  }
+  } else
+    atomic_read_barrier ();
 
   /*
    * Create the sb tables:
@@ -490,10 +492,20 @@ small_tables_done:
    *
    */
 
-  _ufc_clearmem((char*)__data->sb0, (int)sizeof(__data->sb0));
-  _ufc_clearmem((char*)__data->sb1, (int)sizeof(__data->sb1));
-  _ufc_clearmem((char*)__data->sb2, (int)sizeof(__data->sb2));
-  _ufc_clearmem((char*)__data->sb3, (int)sizeof(__data->sb3));
+  if (__data->sb0 + sizeof (__data->sb0) == __data->sb1
+      && __data->sb1 + sizeof (__data->sb1) == __data->sb2
+      && __data->sb2 + sizeof (__data->sb2) == __data->sb3)
+    _ufc_clearmem(__data->sb0,
+		  (int)sizeof(__data->sb0)
+		  + (int)sizeof(__data->sb1)
+		  + (int)sizeof(__data->sb2)
+		  + (int)sizeof(__data->sb3));
+  else {
+    _ufc_clearmem(__data->sb0, (int)sizeof(__data->sb0));
+    _ufc_clearmem(__data->sb1, (int)sizeof(__data->sb1));
+    _ufc_clearmem(__data->sb2, (int)sizeof(__data->sb2));
+    _ufc_clearmem(__data->sb3, (int)sizeof(__data->sb3));
+  }
 
   for(sg = 0; sg < 4; sg++) {
     int j1, j2;
diff --git a/nptl/ChangeLog b/nptl/ChangeLog
index 2acc676fd8..5920800a9f 100644
--- a/nptl/ChangeLog
+++ b/nptl/ChangeLog
@@ -1,3 +1,11 @@
+2011-07-22  Ulrich Drepper  <drepper@gmail.com>
+
+	* sysdeps/pthread/unwind-forcedunwind.c (_Unwind_Resume): Add read
+	barrier.
+	(__gcc_personality_v0): Likewise.
+	(_Unwind_ForcedUnwind): Likewise.
+	(_Unwind_GetCFA): Likewise.
+
 2011-07-14  Roland McGrath  <roland@hack.frob.com>
 
 	* allocatestack.c (__reclaim_stacks): Use uintptr_t cast rather than
diff --git a/nptl/sysdeps/pthread/unwind-forcedunwind.c b/nptl/sysdeps/pthread/unwind-forcedunwind.c
index ae43f335e7..3b27827c6b 100644
--- a/nptl/sysdeps/pthread/unwind-forcedunwind.c
+++ b/nptl/sysdeps/pthread/unwind-forcedunwind.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2003, 2005, 2006, 2009 Free Software Foundation, Inc.
+/* Copyright (C) 2003, 2005, 2006, 2009, 2011 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Jakub Jelinek <jakub@redhat.com>.
 
@@ -96,6 +96,8 @@ _Unwind_Resume (struct _Unwind_Exception *exc)
 {
   if (__builtin_expect (libgcc_s_handle == NULL, 0))
     pthread_cancel_init ();
+  else
+    atomic_read_barrier ();
 
   void (*resume) (struct _Unwind_Exception *exc) = libgcc_s_resume;
   PTR_DEMANGLE (resume);
@@ -105,11 +107,13 @@ _Unwind_Resume (struct _Unwind_Exception *exc)
 _Unwind_Reason_Code
 __gcc_personality_v0 (int version, _Unwind_Action actions,
 		      _Unwind_Exception_Class exception_class,
-                      struct _Unwind_Exception *ue_header,
-                      struct _Unwind_Context *context)
+		      struct _Unwind_Exception *ue_header,
+		      struct _Unwind_Context *context)
 {
   if (__builtin_expect (libgcc_s_handle == NULL, 0))
     pthread_cancel_init ();
+  else
+    atomic_read_barrier ();
 
   _Unwind_Reason_Code (*personality)
     (int, _Unwind_Action, _Unwind_Exception_Class, struct _Unwind_Exception *,
@@ -124,6 +128,8 @@ _Unwind_ForcedUnwind (struct _Unwind_Exception *exc, _Unwind_Stop_Fn stop,
 {
   if (__builtin_expect (libgcc_s_handle == NULL, 0))
     pthread_cancel_init ();
+  else
+    atomic_read_barrier ();
 
   _Unwind_Reason_Code (*forcedunwind)
     (struct _Unwind_Exception *, _Unwind_Stop_Fn, void *)
@@ -137,6 +143,8 @@ _Unwind_GetCFA (struct _Unwind_Context *context)
 {
   if (__builtin_expect (libgcc_s_handle == NULL, 0))
     pthread_cancel_init ();
+  else
+    atomic_read_barrier ();
 
   _Unwind_Word (*getcfa) (struct _Unwind_Context *) = libgcc_s_getcfa;
   PTR_DEMANGLE (getcfa);
diff --git a/sysdeps/x86_64/dl-trampoline.S b/sysdeps/x86_64/dl-trampoline.S
index 1b97929aaa..4aa63ee8d5 100644
--- a/sysdeps/x86_64/dl-trampoline.S
+++ b/sysdeps/x86_64/dl-trampoline.S
@@ -141,7 +141,8 @@ L(have_avx):
 	movq	%r11,%rbx		# Restore rbx
 	xorl	%eax, %eax
 	// AVX and XSAVE supported?
-	testl	$((1 << 28) | (1 << 27)), %ecx
+	andl	$((1 << 28) | (1 << 27)), %ecx
+	cmpl	$((1 << 28) | (1 << 27)), %ecx
 	je	2f
 	xorl	%ecx, %ecx
 	// Get XFEATURE_ENABLED_MASK
@@ -150,7 +151,8 @@ L(have_avx):
 	cmpl	$0x6, %eax
 	// Nonzero if SSE and AVX state saving is enabled.
 	sete	%al
-2:	movl	%eax, L(have_avx)(%rip)
+2:	leal	-1(%eax,%eax), %eax
+	movl	%eax, L(have_avx)(%rip)
 	cmpl	$0, %eax
 
 1:	js	L(no_avx)
@@ -183,11 +185,20 @@ _dl_x86_64_save_sse:
 	movl	$1, %eax
 	cpuid
 	movq	%r11,%rbx		# Restore rbx
-	movl	$1, %eax
-	testl	$(1 << 28), %ecx
-	jne	2f
-	negl	%eax
-2:	movl	%eax, L(have_avx)(%rip)
+	xorl	%eax, %eax
+	// AVX and XSAVE supported?
+	andl	$((1 << 28) | (1 << 27)), %ecx
+	cmpl	$((1 << 28) | (1 << 27)), %ecx
+	je	2f
+	xorl	%ecx, %ecx
+	// Get XFEATURE_ENABLED_MASK
+	xgetbv
+	andl	$0x6, %eax
+	cmpl	$0x6, %eax
+	// Nonzero if SSE and AVX state saving is enabled.
+	sete	%al
+2:	leal	-1(%eax,%eax), %eax
+	movl	%eax, L(have_avx)(%rip)
 	cmpl	$0, %eax
 
 1:	js	L(no_avx5)