about summary refs log tree commit diff
path: root/sysdeps/x86_64/nptl
diff options
context:
space:
mode:
authorRoland McGrath <roland@hack.frob.com>2014-06-11 21:33:32 -0700
committerRoland McGrath <roland@hack.frob.com>2014-06-11 21:33:32 -0700
commit14642b8511ed1d6ecfd326de8688853118308082 (patch)
treed07bc0da0c2a988b094dbe73b6b1df5e1296447a /sysdeps/x86_64/nptl
parent31c44fea314ea227356b72c3ec29616b9692e39d (diff)
downloadglibc-14642b8511ed1d6ecfd326de8688853118308082.tar.gz
glibc-14642b8511ed1d6ecfd326de8688853118308082.tar.xz
glibc-14642b8511ed1d6ecfd326de8688853118308082.zip
Move x86_64 code out of nptl/ subdirectory.
Diffstat (limited to 'sysdeps/x86_64/nptl')
-rw-r--r--sysdeps/x86_64/nptl/Makefile27
-rw-r--r--sysdeps/x86_64/nptl/configure34
-rw-r--r--sysdeps/x86_64/nptl/configure.ac23
-rw-r--r--sysdeps/x86_64/nptl/pthread_spin_init.c1
-rw-r--r--sysdeps/x86_64/nptl/pthread_spin_lock.S36
-rw-r--r--sysdeps/x86_64/nptl/pthread_spin_trylock.S39
-rw-r--r--sysdeps/x86_64/nptl/pthread_spin_unlock.S30
-rw-r--r--sysdeps/x86_64/nptl/pthreaddef.h44
-rw-r--r--sysdeps/x86_64/nptl/tcb-offsets.sym28
-rw-r--r--sysdeps/x86_64/nptl/tls.h424
10 files changed, 686 insertions, 0 deletions
diff --git a/sysdeps/x86_64/nptl/Makefile b/sysdeps/x86_64/nptl/Makefile
new file mode 100644
index 0000000000..de43419bc9
--- /dev/null
+++ b/sysdeps/x86_64/nptl/Makefile
@@ -0,0 +1,27 @@
+# Copyright (C) 2002-2014 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, see
+# <http://www.gnu.org/licenses/>.
+
+ifeq ($(subdir),csu)
+gen-as-const-headers += tcb-offsets.sym
+endif
+
+ifeq ($(subdir),nptl)
+# P4s have problems with 4M aliasing.  We disturb the allocation of stacks
+# just enough so the subsequent allocations do not use stack address
+# (mod 4M) == 0.
+CFLAGS-pthread_create.c += -DMULTI_PAGE_ALIASING=65536
+endif
diff --git a/sysdeps/x86_64/nptl/configure b/sysdeps/x86_64/nptl/configure
new file mode 100644
index 0000000000..c9dea5a7a1
--- /dev/null
+++ b/sysdeps/x86_64/nptl/configure
@@ -0,0 +1,34 @@
+# This file is generated from configure.ac by Autoconf.  DO NOT EDIT!
+ # Local configure fragment for sysdeps/i386.
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for .cfi_personality and .cfi_lsda pseudo-ops" >&5
+$as_echo_n "checking for .cfi_personality and .cfi_lsda pseudo-ops... " >&6; }
+if ${libc_cv_asm_cfi_personality+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+    cat > conftest.s <<EOF
+${libc_cv_dot_text}
+foo:
+	.cfi_startproc
+	.cfi_personality 0, foo
+	.cfi_lsda 0, foo
+	.cfi_endproc
+EOF
+  if { ac_try='${CC-cc} $ASFLAGS -c conftest.s 1>&5'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }; then
+    libc_cv_asm_cfi_personality=yes
+  else
+    libc_cv_asm_cfi_personality=no
+  fi
+  rm -f conftest*
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_asm_cfi_personality" >&5
+$as_echo "$libc_cv_asm_cfi_personality" >&6; }
+if test x"$libc_cv_asm_cfi_personality" != xyes; then
+  as_fn_error $? "assembler too old, .cfi_personality support missing" "$LINENO" 5
+fi
diff --git a/sysdeps/x86_64/nptl/configure.ac b/sysdeps/x86_64/nptl/configure.ac
new file mode 100644
index 0000000000..0ba0cc3726
--- /dev/null
+++ b/sysdeps/x86_64/nptl/configure.ac
@@ -0,0 +1,23 @@
+GLIBC_PROVIDES dnl See aclocal.m4 in the top level source directory.
+# Local configure fragment for sysdeps/i386.
+
+AC_CACHE_CHECK([for .cfi_personality and .cfi_lsda pseudo-ops],
+	       libc_cv_asm_cfi_personality, [dnl
+  cat > conftest.s <<EOF
+${libc_cv_dot_text}
+foo:
+	.cfi_startproc
+	.cfi_personality 0, foo
+	.cfi_lsda 0, foo
+	.cfi_endproc
+EOF
+  if AC_TRY_COMMAND(${CC-cc} $ASFLAGS -c conftest.s 1>&AS_MESSAGE_LOG_FD); then
+    libc_cv_asm_cfi_personality=yes
+  else
+    libc_cv_asm_cfi_personality=no
+  fi
+  rm -f conftest*
+])
+if test x"$libc_cv_asm_cfi_personality" != xyes; then
+  AC_MSG_ERROR([assembler too old, .cfi_personality support missing])
+fi
diff --git a/sysdeps/x86_64/nptl/pthread_spin_init.c b/sysdeps/x86_64/nptl/pthread_spin_init.c
new file mode 100644
index 0000000000..55696204c1
--- /dev/null
+++ b/sysdeps/x86_64/nptl/pthread_spin_init.c
@@ -0,0 +1 @@
+#include "../i386/pthread_spin_init.c"
diff --git a/sysdeps/x86_64/nptl/pthread_spin_lock.S b/sysdeps/x86_64/nptl/pthread_spin_lock.S
new file mode 100644
index 0000000000..71c93a4cec
--- /dev/null
+++ b/sysdeps/x86_64/nptl/pthread_spin_lock.S
@@ -0,0 +1,36 @@
+/* Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <lowlevellock.h>
+
+	.globl	pthread_spin_lock
+	.type	pthread_spin_lock,@function
+	.align	16
+pthread_spin_lock:
+1:	LOCK
+	decl	0(%rdi)
+	jne	2f
+	xor	%eax, %eax
+	ret
+
+	.align	16
+2:	rep
+	nop
+	cmpl	$0, 0(%rdi)
+	jg	1b
+	jmp	2b
+	.size	pthread_spin_lock,.-pthread_spin_lock
diff --git a/sysdeps/x86_64/nptl/pthread_spin_trylock.S b/sysdeps/x86_64/nptl/pthread_spin_trylock.S
new file mode 100644
index 0000000000..ba1e8b51e5
--- /dev/null
+++ b/sysdeps/x86_64/nptl/pthread_spin_trylock.S
@@ -0,0 +1,39 @@
+/* Copyright (C) 2002-2014 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <pthread-errnos.h>
+
+
+#ifdef UP
+# define LOCK
+#else
+# define LOCK lock
+#endif
+
+	.globl	pthread_spin_trylock
+	.type	pthread_spin_trylock,@function
+	.align	16
+pthread_spin_trylock:
+	movl	$1, %eax
+	xorl	%ecx, %ecx
+	LOCK
+	cmpxchgl %ecx, (%rdi)
+	movl	$EBUSY, %eax
+	cmovel	%ecx, %eax
+	retq
+	.size	pthread_spin_trylock,.-pthread_spin_trylock
diff --git a/sysdeps/x86_64/nptl/pthread_spin_unlock.S b/sysdeps/x86_64/nptl/pthread_spin_unlock.S
new file mode 100644
index 0000000000..6cd4feaf2a
--- /dev/null
+++ b/sysdeps/x86_64/nptl/pthread_spin_unlock.S
@@ -0,0 +1,30 @@
+/* Copyright (C) 2002-2014 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+	.globl	pthread_spin_unlock
+	.type	pthread_spin_unlock,@function
+	.align	16
+pthread_spin_unlock:
+	movl	$1, (%rdi)
+	xorl	%eax, %eax
+	retq
+	.size	pthread_spin_unlock,.-pthread_spin_unlock
+
+	/* The implementation of pthread_spin_init is identical.  */
+	.globl	pthread_spin_init
+pthread_spin_init = pthread_spin_unlock
diff --git a/sysdeps/x86_64/nptl/pthreaddef.h b/sysdeps/x86_64/nptl/pthreaddef.h
new file mode 100644
index 0000000000..485a6252df
--- /dev/null
+++ b/sysdeps/x86_64/nptl/pthreaddef.h
@@ -0,0 +1,44 @@
+/* Copyright (C) 2002-2014 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Default stack size.  */
+#define ARCH_STACK_DEFAULT_SIZE	(2 * 1024 * 1024)
+
+/* Required stack pointer alignment at beginning.  SSE requires 16
+   bytes.  */
+#define STACK_ALIGN		16
+
+/* Minimal stack size after allocating thread descriptor and guard size.  */
+#define MINIMAL_REST_STACK	2048
+
+/* Alignment requirement for TCB.
+
+   We need to store post-AVX vector registers in the TCB and we want the
+   storage to be aligned to at least 32 bytes.
+
+   Some processors such as Intel Atom pay a big penalty on every
+   access using a segment override if that segment's base is not
+   aligned to the size of a cache line.  (See Intel 64 and IA-32
+   Architectures Optimization Reference Manual, section 13.3.3.3,
+   "Segment Base".)  On such machines, a cache line is 64 bytes.  */
+#define TCB_ALIGNMENT		64
+
+
+/* Location of current stack frame.  The frame pointer is not usable.  */
+#define CURRENT_STACK_FRAME \
+  ({ register char *frame __asm__("rsp"); frame; })
diff --git a/sysdeps/x86_64/nptl/tcb-offsets.sym b/sysdeps/x86_64/nptl/tcb-offsets.sym
new file mode 100644
index 0000000000..cf863752ee
--- /dev/null
+++ b/sysdeps/x86_64/nptl/tcb-offsets.sym
@@ -0,0 +1,28 @@
+#include <sysdep.h>
+#include <tls.h>
+
+RESULT			offsetof (struct pthread, result)
+TID			offsetof (struct pthread, tid)
+PID			offsetof (struct pthread, pid)
+CANCELHANDLING		offsetof (struct pthread, cancelhandling)
+CLEANUP_JMP_BUF		offsetof (struct pthread, cleanup_jmp_buf)
+CLEANUP			offsetof (struct pthread, cleanup)
+CLEANUP_PREV		offsetof (struct _pthread_cleanup_buffer, __prev)
+MUTEX_FUTEX		offsetof (pthread_mutex_t, __data.__lock)
+MULTIPLE_THREADS_OFFSET	offsetof (tcbhead_t, multiple_threads)
+POINTER_GUARD		offsetof (tcbhead_t, pointer_guard)
+VGETCPU_CACHE_OFFSET	offsetof (tcbhead_t, vgetcpu_cache)
+#ifndef __ASSUME_PRIVATE_FUTEX
+PRIVATE_FUTEX		offsetof (tcbhead_t, private_futex)
+#endif
+RTLD_SAVESPACE_SSE	offsetof (tcbhead_t, rtld_savespace_sse)
+
+-- Not strictly offsets, but these values are also used in the TCB.
+TCB_CANCELSTATE_BITMASK	 CANCELSTATE_BITMASK
+TCB_CANCELTYPE_BITMASK	 CANCELTYPE_BITMASK
+TCB_CANCELING_BITMASK	 CANCELING_BITMASK
+TCB_CANCELED_BITMASK	 CANCELED_BITMASK
+TCB_EXITING_BITMASK	 EXITING_BITMASK
+TCB_CANCEL_RESTMASK	 CANCEL_RESTMASK
+TCB_TERMINATED_BITMASK	 TERMINATED_BITMASK
+TCB_PTHREAD_CANCELED	 PTHREAD_CANCELED
diff --git a/sysdeps/x86_64/nptl/tls.h b/sysdeps/x86_64/nptl/tls.h
new file mode 100644
index 0000000000..3e811b6aea
--- /dev/null
+++ b/sysdeps/x86_64/nptl/tls.h
@@ -0,0 +1,424 @@
+/* Definition for thread-local data handling.  nptl/x86_64 version.
+   Copyright (C) 2002-2014 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _TLS_H
+#define _TLS_H	1
+
+#ifndef __ASSEMBLER__
+# include <asm/prctl.h>	/* For ARCH_SET_FS.  */
+# include <stdbool.h>
+# include <stddef.h>
+# include <stdint.h>
+# include <stdlib.h>
+# include <sysdep.h>
+# include <libc-internal.h>
+# include <kernel-features.h>
+
+/* Replacement type for __m128 since this file is included by ld.so,
+   which is compiled with -mno-sse.  It must not change the alignment
+   of rtld_savespace_sse.  */
+typedef struct
+{
+  int i[4];
+} __128bits;
+
+
+/* Type for the dtv.  */
+typedef union dtv
+{
+  size_t counter;
+  struct
+  {
+    void *val;
+    bool is_static;
+  } pointer;
+} dtv_t;
+
+
+typedef struct
+{
+  void *tcb;		/* Pointer to the TCB.  Not necessarily the
+			   thread descriptor used by libpthread.  */
+  dtv_t *dtv;
+  void *self;		/* Pointer to the thread descriptor.  */
+  int multiple_threads;
+  int gscope_flag;
+  uintptr_t sysinfo;
+  uintptr_t stack_guard;
+  uintptr_t pointer_guard;
+  unsigned long int vgetcpu_cache[2];
+# ifndef __ASSUME_PRIVATE_FUTEX
+  int private_futex;
+# else
+  int __glibc_reserved1;
+# endif
+  int rtld_must_xmm_save;
+  /* Reservation of some values for the TM ABI.  */
+  void *__private_tm[4];
+  /* GCC split stack support.  */
+  void *__private_ss;
+  long int __glibc_reserved2;
+  /* Have space for the post-AVX register size.  */
+  __128bits rtld_savespace_sse[8][4] __attribute__ ((aligned (32)));
+
+  void *__padding[8];
+} tcbhead_t;
+
+#else /* __ASSEMBLER__ */
+# include <tcb-offsets.h>
+#endif
+
+
+/* Alignment requirement for the stack.  */
+#define STACK_ALIGN	16
+
+
+#ifndef __ASSEMBLER__
+/* Get system call information.  */
+# include <sysdep.h>
+
+#ifndef LOCK_PREFIX
+# ifdef UP
+#  define LOCK_PREFIX	/* nothing */
+# else
+#  define LOCK_PREFIX	"lock;"
+# endif
+#endif
+
+/* This is the size of the initial TCB.  Can't be just sizeof (tcbhead_t),
+   because NPTL getpid, __libc_alloca_cutoff etc. need (almost) the whole
+   struct pthread even when not linked with -lpthread.  */
+# define TLS_INIT_TCB_SIZE sizeof (struct pthread)
+
+/* Alignment requirements for the initial TCB.  */
+# define TLS_INIT_TCB_ALIGN __alignof__ (struct pthread)
+
+/* This is the size of the TCB.  */
+# define TLS_TCB_SIZE sizeof (struct pthread)
+
+/* Alignment requirements for the TCB.  */
+# define TLS_TCB_ALIGN __alignof__ (struct pthread)
+
+/* The TCB can have any size and the memory following the address the
+   thread pointer points to is unspecified.  Allocate the TCB there.  */
+# define TLS_TCB_AT_TP	1
+# define TLS_DTV_AT_TP	0
+
+/* Get the thread descriptor definition.  */
+# include <nptl/descr.h>
+
+
+/* Install the dtv pointer.  The pointer passed is to the element with
+   index -1 which contain the length.  */
+# define INSTALL_DTV(descr, dtvp) \
+  ((tcbhead_t *) (descr))->dtv = (dtvp) + 1
+
+/* Install new dtv for current thread.  */
+# define INSTALL_NEW_DTV(dtvp) \
+  ({ struct pthread *__pd;						      \
+     THREAD_SETMEM (__pd, header.dtv, (dtvp)); })
+
+/* Return dtv of given thread descriptor.  */
+# define GET_DTV(descr) \
+  (((tcbhead_t *) (descr))->dtv)
+
+
+/* Code to initially initialize the thread pointer.  This might need
+   special attention since 'errno' is not yet available and if the
+   operation can cause a failure 'errno' must not be touched.
+
+   We have to make the syscall for both uses of the macro since the
+   address might be (and probably is) different.  */
+# define TLS_INIT_TP(thrdescr) \
+  ({ void *_thrdescr = (thrdescr);					      \
+     tcbhead_t *_head = _thrdescr;					      \
+     int _result;							      \
+									      \
+     _head->tcb = _thrdescr;						      \
+     /* For now the thread descriptor is at the same address.  */	      \
+     _head->self = _thrdescr;						      \
+									      \
+     /* It is a simple syscall to set the %fs value for the thread.  */	      \
+     asm volatile ("syscall"						      \
+		   : "=a" (_result)					      \
+		   : "0" ((unsigned long int) __NR_arch_prctl),		      \
+		     "D" ((unsigned long int) ARCH_SET_FS),		      \
+		     "S" (_thrdescr)					      \
+		   : "memory", "cc", "r11", "cx");			      \
+									      \
+    _result ? "cannot set %fs base address for thread-local storage" : 0;     \
+  })
+
+# define TLS_DEFINE_INIT_TP(tp, pd) void *tp = (pd)
+
+
+/* Return the address of the dtv for the current thread.  */
+# define THREAD_DTV() \
+  ({ struct pthread *__pd;						      \
+     THREAD_GETMEM (__pd, header.dtv); })
+
+
+/* Return the thread descriptor for the current thread.
+
+   The contained asm must *not* be marked volatile since otherwise
+   assignments like
+	pthread_descr self = thread_self();
+   do not get optimized away.  */
+# define THREAD_SELF \
+  ({ struct pthread *__self;						      \
+     asm ("mov %%fs:%c1,%0" : "=r" (__self)				      \
+	  : "i" (offsetof (struct pthread, header.self)));	 	      \
+     __self;})
+
+/* Magic for libthread_db to know how to do THREAD_SELF.  */
+# define DB_THREAD_SELF_INCLUDE  <sys/reg.h> /* For the FS constant.  */
+# define DB_THREAD_SELF CONST_THREAD_AREA (64, FS)
+
+/* Read member of the thread descriptor directly.  */
+# define THREAD_GETMEM(descr, member) \
+  ({ __typeof (descr->member) __value;					      \
+     if (sizeof (__value) == 1)						      \
+       asm volatile ("movb %%fs:%P2,%b0"				      \
+		     : "=q" (__value)					      \
+		     : "0" (0), "i" (offsetof (struct pthread, member)));     \
+     else if (sizeof (__value) == 4)					      \
+       asm volatile ("movl %%fs:%P1,%0"					      \
+		     : "=r" (__value)					      \
+		     : "i" (offsetof (struct pthread, member)));	      \
+     else								      \
+       {								      \
+	 if (sizeof (__value) != 8)					      \
+	   /* There should not be any value with a size other than 1,	      \
+	      4 or 8.  */						      \
+	   abort ();							      \
+									      \
+	 asm volatile ("movq %%fs:%P1,%q0"				      \
+		       : "=r" (__value)					      \
+		       : "i" (offsetof (struct pthread, member)));	      \
+       }								      \
+     __value; })
+
+
+/* Same as THREAD_GETMEM, but the member offset can be non-constant.  */
+# define THREAD_GETMEM_NC(descr, member, idx) \
+  ({ __typeof (descr->member[0]) __value;				      \
+     if (sizeof (__value) == 1)						      \
+       asm volatile ("movb %%fs:%P2(%q3),%b0"				      \
+		     : "=q" (__value)					      \
+		     : "0" (0), "i" (offsetof (struct pthread, member[0])),   \
+		       "r" (idx));					      \
+     else if (sizeof (__value) == 4)					      \
+       asm volatile ("movl %%fs:%P1(,%q2,4),%0"				      \
+		     : "=r" (__value)					      \
+		     : "i" (offsetof (struct pthread, member[0])), "r" (idx));\
+     else								      \
+       {								      \
+	 if (sizeof (__value) != 8)					      \
+	   /* There should not be any value with a size other than 1,	      \
+	      4 or 8.  */						      \
+	   abort ();							      \
+									      \
+	 asm volatile ("movq %%fs:%P1(,%q2,8),%q0"			      \
+		       : "=r" (__value)					      \
+		       : "i" (offsetof (struct pthread, member[0])),	      \
+			 "r" (idx));					      \
+       }								      \
+     __value; })
+
+
+/* Loading addresses of objects on x86-64 needs to be treated special
+   when generating PIC code.  */
+#ifdef __pic__
+# define IMM_MODE "nr"
+#else
+# define IMM_MODE "ir"
+#endif
+
+
+/* Same as THREAD_SETMEM, but the member offset can be non-constant.  */
+# define THREAD_SETMEM(descr, member, value) \
+  ({ if (sizeof (descr->member) == 1)					      \
+       asm volatile ("movb %b0,%%fs:%P1" :				      \
+		     : "iq" (value),					      \
+		       "i" (offsetof (struct pthread, member)));	      \
+     else if (sizeof (descr->member) == 4)				      \
+       asm volatile ("movl %0,%%fs:%P1" :				      \
+		     : IMM_MODE (value),				      \
+		       "i" (offsetof (struct pthread, member)));	      \
+     else								      \
+       {								      \
+	 if (sizeof (descr->member) != 8)				      \
+	   /* There should not be any value with a size other than 1,	      \
+	      4 or 8.  */						      \
+	   abort ();							      \
+									      \
+	 asm volatile ("movq %q0,%%fs:%P1" :				      \
+		       : IMM_MODE ((uint64_t) cast_to_integer (value)),	      \
+			 "i" (offsetof (struct pthread, member)));	      \
+       }})
+
+
+/* Set member of the thread descriptor directly.  */
+# define THREAD_SETMEM_NC(descr, member, idx, value) \
+  ({ if (sizeof (descr->member[0]) == 1)				      \
+       asm volatile ("movb %b0,%%fs:%P1(%q2)" :				      \
+		     : "iq" (value),					      \
+		       "i" (offsetof (struct pthread, member[0])),	      \
+		       "r" (idx));					      \
+     else if (sizeof (descr->member[0]) == 4)				      \
+       asm volatile ("movl %0,%%fs:%P1(,%q2,4)" :			      \
+		     : IMM_MODE (value),				      \
+		       "i" (offsetof (struct pthread, member[0])),	      \
+		       "r" (idx));					      \
+     else								      \
+       {								      \
+	 if (sizeof (descr->member[0]) != 8)				      \
+	   /* There should not be any value with a size other than 1,	      \
+	      4 or 8.  */						      \
+	   abort ();							      \
+									      \
+	 asm volatile ("movq %q0,%%fs:%P1(,%q2,8)" :			      \
+		       : IMM_MODE ((uint64_t) cast_to_integer (value)),	      \
+			 "i" (offsetof (struct pthread, member[0])),	      \
+			 "r" (idx));					      \
+       }})
+
+
+/* Atomic compare and exchange on TLS, returning old value.  */
+# define THREAD_ATOMIC_CMPXCHG_VAL(descr, member, newval, oldval) \
+  ({ __typeof (descr->member) __ret;					      \
+     __typeof (oldval) __old = (oldval);				      \
+     if (sizeof (descr->member) == 4)					      \
+       asm volatile (LOCK_PREFIX "cmpxchgl %2, %%fs:%P3"		      \
+		     : "=a" (__ret)					      \
+		     : "0" (__old), "r" (newval),			      \
+		       "i" (offsetof (struct pthread, member)));	      \
+     else								      \
+       /* Not necessary for other sizes in the moment.  */		      \
+       abort ();							      \
+     __ret; })
+
+
+/* Atomic logical and.  */
+# define THREAD_ATOMIC_AND(descr, member, val) \
+  (void) ({ if (sizeof ((descr)->member) == 4)				      \
+	      asm volatile (LOCK_PREFIX "andl %1, %%fs:%P0"		      \
+			    :: "i" (offsetof (struct pthread, member)),	      \
+			       "ir" (val));				      \
+	    else							      \
+	      /* Not necessary for other sizes in the moment.  */	      \
+	      abort (); })
+
+
+/* Atomic set bit.  */
+# define THREAD_ATOMIC_BIT_SET(descr, member, bit) \
+  (void) ({ if (sizeof ((descr)->member) == 4)				      \
+	      asm volatile (LOCK_PREFIX "orl %1, %%fs:%P0"		      \
+			    :: "i" (offsetof (struct pthread, member)),	      \
+			       "ir" (1 << (bit)));			      \
+	    else							      \
+	      /* Not necessary for other sizes in the moment.  */	      \
+	      abort (); })
+
+
+# define CALL_THREAD_FCT(descr) \
+  ({ void *__res;							      \
+     asm volatile ("movq %%fs:%P2, %%rdi\n\t"				      \
+		   "callq *%%fs:%P1"					      \
+		   : "=a" (__res)					      \
+		   : "i" (offsetof (struct pthread, start_routine)),	      \
+		     "i" (offsetof (struct pthread, arg))		      \
+		   : "di", "si", "cx", "dx", "r8", "r9", "r10", "r11",	      \
+		     "memory", "cc");					      \
+     __res; })
+
+
+/* Set the stack guard field in TCB head.  */
+# define THREAD_SET_STACK_GUARD(value) \
+    THREAD_SETMEM (THREAD_SELF, header.stack_guard, value)
+# define THREAD_COPY_STACK_GUARD(descr) \
+    ((descr)->header.stack_guard					      \
+     = THREAD_GETMEM (THREAD_SELF, header.stack_guard))
+
+
+/* Set the pointer guard field in the TCB head.  */
+# define THREAD_SET_POINTER_GUARD(value) \
+  THREAD_SETMEM (THREAD_SELF, header.pointer_guard, value)
+# define THREAD_COPY_POINTER_GUARD(descr) \
+  ((descr)->header.pointer_guard					      \
+   = THREAD_GETMEM (THREAD_SELF, header.pointer_guard))
+
+
+/* Get and set the global scope generation counter in the TCB head.  */
+# define THREAD_GSCOPE_FLAG_UNUSED 0
+# define THREAD_GSCOPE_FLAG_USED   1
+# define THREAD_GSCOPE_FLAG_WAIT   2
+# define THREAD_GSCOPE_RESET_FLAG() \
+  do									      \
+    { int __res;							      \
+      asm volatile ("xchgl %0, %%fs:%P1"				      \
+		    : "=r" (__res)					      \
+		    : "i" (offsetof (struct pthread, header.gscope_flag)),    \
+		      "0" (THREAD_GSCOPE_FLAG_UNUSED));			      \
+      if (__res == THREAD_GSCOPE_FLAG_WAIT)				      \
+	lll_futex_wake (&THREAD_SELF->header.gscope_flag, 1, LLL_PRIVATE);    \
+    }									      \
+  while (0)
+# define THREAD_GSCOPE_SET_FLAG() \
+  THREAD_SETMEM (THREAD_SELF, header.gscope_flag, THREAD_GSCOPE_FLAG_USED)
+# define THREAD_GSCOPE_WAIT() \
+  GL(dl_wait_lookup_done) ()
+
+
+# ifdef SHARED
+/* Defined in dl-trampoline.S.  */
+extern void _dl_x86_64_save_sse (void);
+extern void _dl_x86_64_restore_sse (void);
+
+# define RTLD_CHECK_FOREIGN_CALL \
+  (THREAD_GETMEM (THREAD_SELF, header.rtld_must_xmm_save) != 0)
+
+/* NB: Don't use the xchg operation because that would imply a lock
+   prefix which is expensive and unnecessary.  The cache line is also
+   not contested at all.  */
+#  define RTLD_ENABLE_FOREIGN_CALL \
+  int old_rtld_must_xmm_save = THREAD_GETMEM (THREAD_SELF,		      \
+					      header.rtld_must_xmm_save);     \
+  THREAD_SETMEM (THREAD_SELF, header.rtld_must_xmm_save, 1)
+
+#  define RTLD_PREPARE_FOREIGN_CALL \
+  do if (THREAD_GETMEM (THREAD_SELF, header.rtld_must_xmm_save))	      \
+    {									      \
+      _dl_x86_64_save_sse ();						      \
+      THREAD_SETMEM (THREAD_SELF, header.rtld_must_xmm_save, 0);	      \
+    }									      \
+  while (0)
+
+#  define RTLD_FINALIZE_FOREIGN_CALL \
+  do {									      \
+    if (THREAD_GETMEM (THREAD_SELF, header.rtld_must_xmm_save) == 0)	      \
+      _dl_x86_64_restore_sse ();					      \
+    THREAD_SETMEM (THREAD_SELF, header.rtld_must_xmm_save,		      \
+		   old_rtld_must_xmm_save);				      \
+  } while (0)
+# endif
+
+
+#endif /* __ASSEMBLER__ */
+
+#endif	/* tls.h */