summary refs log tree commit diff
path: root/nptl
diff options
context:
space:
mode:
authorStefan Liebler <stli@linux.ibm.com>2018-10-17 12:23:04 +0200
committerStefan Liebler <stli@linux.ibm.com>2018-10-17 12:23:04 +0200
commit403b4feb22dcbc85ace72a361d2a951380372471 (patch)
treefe2235a8bce38cad1bbef3b4f28da82dc353e138 /nptl
parentce5a7de6cd1479a1e78fda0db023bd4effa072a4 (diff)
downloadglibc-403b4feb22dcbc85ace72a361d2a951380372471.tar.gz
glibc-403b4feb22dcbc85ace72a361d2a951380372471.tar.xz
glibc-403b4feb22dcbc85ace72a361d2a951380372471.zip
Fix race in pthread_mutex_lock while promoting to PTHREAD_MUTEX_ELISION_NP [BZ #23275]
The race leads either to pthread_mutex_destroy returning EBUSY
or triggering an assertion (See description in bugzilla).

This patch is fixing the race by ensuring that the elision path is
used in all cases if elision is enabled by the GLIBC_TUNABLES framework.

The __kind variable in struct __pthread_mutex_s is accessed concurrently.
Therefore we are now using the atomic macros.

The new testcase tst-mutex10 is triggering the race on s390x and intel.
Presumably also on power, but I don't have access to a power machine
with lock-elision. At least the code for power is the same as on the other
two architectures.

ChangeLog:

	[BZ #23275]
	* nptl/tst-mutex10.c: New File.
	* nptl/Makefile (tests): Add tst-mutex10.
	(tst-mutex10-ENV): New variable.
	* sysdeps/unix/sysv/linux/s390/force-elision.h: (FORCE_ELISION):
	Ensure that elision path is used if elision is available.
	* sysdeps/unix/sysv/linux/powerpc/force-elision.h (FORCE_ELISION):
	Likewise.
	* sysdeps/unix/sysv/linux/x86/force-elision.h: (FORCE_ELISION):
	Likewise.
	* nptl/pthreadP.h (PTHREAD_MUTEX_TYPE, PTHREAD_MUTEX_TYPE_ELISION)
	(PTHREAD_MUTEX_PSHARED): Use atomic_load_relaxed.
	* nptl/pthread_mutex_consistent.c (pthread_mutex_consistent): Likewise.
	* nptl/pthread_mutex_getprioceiling.c (pthread_mutex_getprioceiling):
	Likewise.
	* nptl/pthread_mutex_lock.c (__pthread_mutex_lock_full)
	(__pthread_mutex_cond_lock_adjust): Likewise.
	* nptl/pthread_mutex_setprioceiling.c (pthread_mutex_setprioceiling):
	Likewise.
	* nptl/pthread_mutex_timedlock.c (__pthread_mutex_timedlock): Likewise.
	* nptl/pthread_mutex_trylock.c (__pthread_mutex_trylock): Likewise.
	* nptl/pthread_mutex_unlock.c (__pthread_mutex_unlock_full): Likewise.
	* sysdeps/nptl/bits/thread-shared-types.h (struct __pthread_mutex_s):
	Add comments.
	* nptl/pthread_mutex_destroy.c (__pthread_mutex_destroy):
	Use atomic_load_relaxed and atomic_store_relaxed.
	* nptl/pthread_mutex_init.c (__pthread_mutex_init):
	Use atomic_store_relaxed.
Diffstat (limited to 'nptl')
-rw-r--r--nptl/Makefile8
-rw-r--r--nptl/pthreadP.h12
-rw-r--r--nptl/pthread_mutex_consistent.c7
-rw-r--r--nptl/pthread_mutex_destroy.c11
-rw-r--r--nptl/pthread_mutex_getprioceiling.c4
-rw-r--r--nptl/pthread_mutex_init.c14
-rw-r--r--nptl/pthread_mutex_lock.c28
-rw-r--r--nptl/pthread_mutex_setprioceiling.c7
-rw-r--r--nptl/pthread_mutex_timedlock.c17
-rw-r--r--nptl/pthread_mutex_trylock.c17
-rw-r--r--nptl/pthread_mutex_unlock.c17
-rw-r--r--nptl/tst-mutex10.c109
12 files changed, 214 insertions, 37 deletions
diff --git a/nptl/Makefile b/nptl/Makefile
index be8066524c..49b6faa330 100644
--- a/nptl/Makefile
+++ b/nptl/Makefile
@@ -241,9 +241,9 @@ LDLIBS-tst-minstack-throw = -lstdc++
 
 tests = tst-attr1 tst-attr2 tst-attr3 tst-default-attr \
 	tst-mutex1 tst-mutex2 tst-mutex3 tst-mutex4 tst-mutex5 tst-mutex6 \
-	tst-mutex7 tst-mutex9 tst-mutex5a tst-mutex7a tst-mutex7robust \
-	tst-mutexpi1 tst-mutexpi2 tst-mutexpi3 tst-mutexpi4 tst-mutexpi5 \
-	tst-mutexpi5a tst-mutexpi6 tst-mutexpi7 tst-mutexpi7a \
+	tst-mutex7 tst-mutex9 tst-mutex10 tst-mutex5a tst-mutex7a \
+	tst-mutex7robust tst-mutexpi1 tst-mutexpi2 tst-mutexpi3 tst-mutexpi4 \
+	tst-mutexpi5 tst-mutexpi5a tst-mutexpi6 tst-mutexpi7 tst-mutexpi7a \
 	tst-mutexpi9 \
 	tst-spin1 tst-spin2 tst-spin3 tst-spin4 \
 	tst-cond1 tst-cond2 tst-cond3 tst-cond4 tst-cond5 tst-cond6 tst-cond7 \
@@ -709,6 +709,8 @@ endif
 
 $(objpfx)tst-compat-forwarder: $(objpfx)tst-compat-forwarder-mod.so
 
+tst-mutex10-ENV = GLIBC_TUNABLES=glibc.elision.enable=1
+
 # The tests here better do not run in parallel
 ifneq ($(filter %tests,$(MAKECMDGOALS)),)
 .NOTPARALLEL:
diff --git a/nptl/pthreadP.h b/nptl/pthreadP.h
index 13bdb11133..19efe1e35f 100644
--- a/nptl/pthreadP.h
+++ b/nptl/pthreadP.h
@@ -110,19 +110,23 @@ enum
 };
 #define PTHREAD_MUTEX_PSHARED_BIT 128
 
+/* See concurrency notes regarding __kind in struct __pthread_mutex_s
+   in sysdeps/nptl/bits/thread-shared-types.h.  */
 #define PTHREAD_MUTEX_TYPE(m) \
-  ((m)->__data.__kind & 127)
+  (atomic_load_relaxed (&((m)->__data.__kind)) & 127)
 /* Don't include NO_ELISION, as that type is always the same
    as the underlying lock type.  */
 #define PTHREAD_MUTEX_TYPE_ELISION(m) \
-  ((m)->__data.__kind & (127|PTHREAD_MUTEX_ELISION_NP))
+  (atomic_load_relaxed (&((m)->__data.__kind))	\
+   & (127 | PTHREAD_MUTEX_ELISION_NP))
 
 #if LLL_PRIVATE == 0 && LLL_SHARED == 128
 # define PTHREAD_MUTEX_PSHARED(m) \
-  ((m)->__data.__kind & 128)
+  (atomic_load_relaxed (&((m)->__data.__kind)) & 128)
 #else
 # define PTHREAD_MUTEX_PSHARED(m) \
-  (((m)->__data.__kind & 128) ? LLL_SHARED : LLL_PRIVATE)
+  ((atomic_load_relaxed (&((m)->__data.__kind)) & 128)	\
+   ? LLL_SHARED : LLL_PRIVATE)
 #endif
 
 /* The kernel when waking robust mutexes on exit never uses
diff --git a/nptl/pthread_mutex_consistent.c b/nptl/pthread_mutex_consistent.c
index 85b8e1a6cb..4fbd875430 100644
--- a/nptl/pthread_mutex_consistent.c
+++ b/nptl/pthread_mutex_consistent.c
@@ -23,8 +23,11 @@
 int
 pthread_mutex_consistent (pthread_mutex_t *mutex)
 {
-  /* Test whether this is a robust mutex with a dead owner.  */
-  if ((mutex->__data.__kind & PTHREAD_MUTEX_ROBUST_NORMAL_NP) == 0
+  /* Test whether this is a robust mutex with a dead owner.
+     See concurrency notes regarding __kind in struct __pthread_mutex_s
+     in sysdeps/nptl/bits/thread-shared-types.h.  */
+  if ((atomic_load_relaxed (&(mutex->__data.__kind))
+       & PTHREAD_MUTEX_ROBUST_NORMAL_NP) == 0
       || mutex->__data.__owner != PTHREAD_MUTEX_INCONSISTENT)
     return EINVAL;
 
diff --git a/nptl/pthread_mutex_destroy.c b/nptl/pthread_mutex_destroy.c
index 5a22611541..713ea68496 100644
--- a/nptl/pthread_mutex_destroy.c
+++ b/nptl/pthread_mutex_destroy.c
@@ -27,12 +27,17 @@ __pthread_mutex_destroy (pthread_mutex_t *mutex)
 {
   LIBC_PROBE (mutex_destroy, 1, mutex);
 
-  if ((mutex->__data.__kind & PTHREAD_MUTEX_ROBUST_NORMAL_NP) == 0
+  /* See concurrency notes regarding __kind in struct __pthread_mutex_s
+     in sysdeps/nptl/bits/thread-shared-types.h.  */
+  if ((atomic_load_relaxed (&(mutex->__data.__kind))
+       & PTHREAD_MUTEX_ROBUST_NORMAL_NP) == 0
       && mutex->__data.__nusers != 0)
     return EBUSY;
 
-  /* Set to an invalid value.  */
-  mutex->__data.__kind = -1;
+  /* Set to an invalid value.  Relaxed MO is enough as it is undefined behavior
+     if the mutex is used after it has been destroyed.  But you can reinitialize
+     it with pthread_mutex_init.  */
+  atomic_store_relaxed (&(mutex->__data.__kind), -1);
 
   return 0;
 }
diff --git a/nptl/pthread_mutex_getprioceiling.c b/nptl/pthread_mutex_getprioceiling.c
index efa37b0d99..ee85949578 100644
--- a/nptl/pthread_mutex_getprioceiling.c
+++ b/nptl/pthread_mutex_getprioceiling.c
@@ -24,7 +24,9 @@
 int
 pthread_mutex_getprioceiling (const pthread_mutex_t *mutex, int *prioceiling)
 {
-  if (__builtin_expect ((mutex->__data.__kind
+  /* See concurrency notes regarding __kind in struct __pthread_mutex_s
+     in sysdeps/nptl/bits/thread-shared-types.h.  */
+  if (__builtin_expect ((atomic_load_relaxed (&(mutex->__data.__kind))
 			 & PTHREAD_MUTEX_PRIO_PROTECT_NP) == 0, 0))
     return EINVAL;
 
diff --git a/nptl/pthread_mutex_init.c b/nptl/pthread_mutex_init.c
index d8fe473728..5cf290c272 100644
--- a/nptl/pthread_mutex_init.c
+++ b/nptl/pthread_mutex_init.c
@@ -101,7 +101,7 @@ __pthread_mutex_init (pthread_mutex_t *mutex,
   memset (mutex, '\0', __SIZEOF_PTHREAD_MUTEX_T);
 
   /* Copy the values from the attribute.  */
-  mutex->__data.__kind = imutexattr->mutexkind & ~PTHREAD_MUTEXATTR_FLAG_BITS;
+  int mutex_kind = imutexattr->mutexkind & ~PTHREAD_MUTEXATTR_FLAG_BITS;
 
   if ((imutexattr->mutexkind & PTHREAD_MUTEXATTR_FLAG_ROBUST) != 0)
     {
@@ -111,17 +111,17 @@ __pthread_mutex_init (pthread_mutex_t *mutex,
 	return ENOTSUP;
 #endif
 
-      mutex->__data.__kind |= PTHREAD_MUTEX_ROBUST_NORMAL_NP;
+      mutex_kind |= PTHREAD_MUTEX_ROBUST_NORMAL_NP;
     }
 
   switch (imutexattr->mutexkind & PTHREAD_MUTEXATTR_PROTOCOL_MASK)
     {
     case PTHREAD_PRIO_INHERIT << PTHREAD_MUTEXATTR_PROTOCOL_SHIFT:
-      mutex->__data.__kind |= PTHREAD_MUTEX_PRIO_INHERIT_NP;
+      mutex_kind |= PTHREAD_MUTEX_PRIO_INHERIT_NP;
       break;
 
     case PTHREAD_PRIO_PROTECT << PTHREAD_MUTEXATTR_PROTOCOL_SHIFT:
-      mutex->__data.__kind |= PTHREAD_MUTEX_PRIO_PROTECT_NP;
+      mutex_kind |= PTHREAD_MUTEX_PRIO_PROTECT_NP;
 
       int ceiling = (imutexattr->mutexkind
 		     & PTHREAD_MUTEXATTR_PRIO_CEILING_MASK)
@@ -145,7 +145,11 @@ __pthread_mutex_init (pthread_mutex_t *mutex,
      FUTEX_PRIVATE_FLAG FUTEX_WAKE.  */
   if ((imutexattr->mutexkind & (PTHREAD_MUTEXATTR_FLAG_PSHARED
 				| PTHREAD_MUTEXATTR_FLAG_ROBUST)) != 0)
-    mutex->__data.__kind |= PTHREAD_MUTEX_PSHARED_BIT;
+    mutex_kind |= PTHREAD_MUTEX_PSHARED_BIT;
+
+  /* See concurrency notes regarding __kind in struct __pthread_mutex_s
+     in sysdeps/nptl/bits/thread-shared-types.h.  */
+  atomic_store_relaxed (&(mutex->__data.__kind), mutex_kind);
 
   /* Default values: mutex not used yet.  */
   // mutex->__count = 0;	already done by memset
diff --git a/nptl/pthread_mutex_lock.c b/nptl/pthread_mutex_lock.c
index 1519c142bd..29cc143e6c 100644
--- a/nptl/pthread_mutex_lock.c
+++ b/nptl/pthread_mutex_lock.c
@@ -62,6 +62,8 @@ static int __pthread_mutex_lock_full (pthread_mutex_t *mutex)
 int
 __pthread_mutex_lock (pthread_mutex_t *mutex)
 {
+  /* See concurrency notes regarding mutex type which is loaded from __kind
+     in struct __pthread_mutex_s in sysdeps/nptl/bits/thread-shared-types.h.  */
   unsigned int type = PTHREAD_MUTEX_TYPE_ELISION (mutex);
 
   LIBC_PROBE (mutex_entry, 1, mutex);
@@ -350,8 +352,14 @@ __pthread_mutex_lock_full (pthread_mutex_t *mutex)
     case PTHREAD_MUTEX_PI_ROBUST_NORMAL_NP:
     case PTHREAD_MUTEX_PI_ROBUST_ADAPTIVE_NP:
       {
-	int kind = mutex->__data.__kind & PTHREAD_MUTEX_KIND_MASK_NP;
-	int robust = mutex->__data.__kind & PTHREAD_MUTEX_ROBUST_NORMAL_NP;
+	int kind, robust;
+	{
+	  /* See concurrency notes regarding __kind in struct __pthread_mutex_s
+	     in sysdeps/nptl/bits/thread-shared-types.h.  */
+	  int mutex_kind = atomic_load_relaxed (&(mutex->__data.__kind));
+	  kind = mutex_kind & PTHREAD_MUTEX_KIND_MASK_NP;
+	  robust = mutex_kind & PTHREAD_MUTEX_ROBUST_NORMAL_NP;
+	}
 
 	if (robust)
 	  {
@@ -502,7 +510,10 @@ __pthread_mutex_lock_full (pthread_mutex_t *mutex)
     case PTHREAD_MUTEX_PP_NORMAL_NP:
     case PTHREAD_MUTEX_PP_ADAPTIVE_NP:
       {
-	int kind = mutex->__data.__kind & PTHREAD_MUTEX_KIND_MASK_NP;
+	/* See concurrency notes regarding __kind in struct __pthread_mutex_s
+	   in sysdeps/nptl/bits/thread-shared-types.h.  */
+	int kind = atomic_load_relaxed (&(mutex->__data.__kind))
+	  & PTHREAD_MUTEX_KIND_MASK_NP;
 
 	oldval = mutex->__data.__lock;
 
@@ -607,15 +618,18 @@ hidden_def (__pthread_mutex_lock)
 void
 __pthread_mutex_cond_lock_adjust (pthread_mutex_t *mutex)
 {
-  assert ((mutex->__data.__kind & PTHREAD_MUTEX_PRIO_INHERIT_NP) != 0);
-  assert ((mutex->__data.__kind & PTHREAD_MUTEX_ROBUST_NORMAL_NP) == 0);
-  assert ((mutex->__data.__kind & PTHREAD_MUTEX_PSHARED_BIT) == 0);
+  /* See concurrency notes regarding __kind in struct __pthread_mutex_s
+     in sysdeps/nptl/bits/thread-shared-types.h.  */
+  int mutex_kind = atomic_load_relaxed (&(mutex->__data.__kind));
+  assert ((mutex_kind & PTHREAD_MUTEX_PRIO_INHERIT_NP) != 0);
+  assert ((mutex_kind & PTHREAD_MUTEX_ROBUST_NORMAL_NP) == 0);
+  assert ((mutex_kind & PTHREAD_MUTEX_PSHARED_BIT) == 0);
 
   /* Record the ownership.  */
   pid_t id = THREAD_GETMEM (THREAD_SELF, tid);
   mutex->__data.__owner = id;
 
-  if (mutex->__data.__kind == PTHREAD_MUTEX_PI_RECURSIVE_NP)
+  if (mutex_kind == PTHREAD_MUTEX_PI_RECURSIVE_NP)
     ++mutex->__data.__count;
 }
 #endif
diff --git a/nptl/pthread_mutex_setprioceiling.c b/nptl/pthread_mutex_setprioceiling.c
index 8594874f85..8306cabcf4 100644
--- a/nptl/pthread_mutex_setprioceiling.c
+++ b/nptl/pthread_mutex_setprioceiling.c
@@ -27,9 +27,10 @@ int
 pthread_mutex_setprioceiling (pthread_mutex_t *mutex, int prioceiling,
 			      int *old_ceiling)
 {
-  /* The low bits of __kind aren't ever changed after pthread_mutex_init,
-     so we don't need a lock yet.  */
-  if ((mutex->__data.__kind & PTHREAD_MUTEX_PRIO_PROTECT_NP) == 0)
+  /* See concurrency notes regarding __kind in struct __pthread_mutex_s
+     in sysdeps/nptl/bits/thread-shared-types.h.  */
+  if ((atomic_load_relaxed (&(mutex->__data.__kind))
+       & PTHREAD_MUTEX_PRIO_PROTECT_NP) == 0)
     return EINVAL;
 
   /* See __init_sched_fifo_prio.  */
diff --git a/nptl/pthread_mutex_timedlock.c b/nptl/pthread_mutex_timedlock.c
index 28237b0e58..888c12fe28 100644
--- a/nptl/pthread_mutex_timedlock.c
+++ b/nptl/pthread_mutex_timedlock.c
@@ -53,6 +53,8 @@ __pthread_mutex_timedlock (pthread_mutex_t *mutex,
   /* We must not check ABSTIME here.  If the thread does not block
      abstime must not be checked for a valid value.  */
 
+  /* See concurrency notes regarding mutex type which is loaded from __kind
+     in struct __pthread_mutex_s in sysdeps/nptl/bits/thread-shared-types.h.  */
   switch (__builtin_expect (PTHREAD_MUTEX_TYPE_ELISION (mutex),
 			    PTHREAD_MUTEX_TIMED_NP))
     {
@@ -338,8 +340,14 @@ __pthread_mutex_timedlock (pthread_mutex_t *mutex,
     case PTHREAD_MUTEX_PI_ROBUST_NORMAL_NP:
     case PTHREAD_MUTEX_PI_ROBUST_ADAPTIVE_NP:
       {
-	int kind = mutex->__data.__kind & PTHREAD_MUTEX_KIND_MASK_NP;
-	int robust = mutex->__data.__kind & PTHREAD_MUTEX_ROBUST_NORMAL_NP;
+	int kind, robust;
+	{
+	  /* See concurrency notes regarding __kind in struct __pthread_mutex_s
+	     in sysdeps/nptl/bits/thread-shared-types.h.  */
+	  int mutex_kind = atomic_load_relaxed (&(mutex->__data.__kind));
+	  kind = mutex_kind & PTHREAD_MUTEX_KIND_MASK_NP;
+	  robust = mutex_kind & PTHREAD_MUTEX_ROBUST_NORMAL_NP;
+	}
 
 	if (robust)
 	  {
@@ -509,7 +517,10 @@ __pthread_mutex_timedlock (pthread_mutex_t *mutex,
     case PTHREAD_MUTEX_PP_NORMAL_NP:
     case PTHREAD_MUTEX_PP_ADAPTIVE_NP:
       {
-	int kind = mutex->__data.__kind & PTHREAD_MUTEX_KIND_MASK_NP;
+	/* See concurrency notes regarding __kind in struct __pthread_mutex_s
+	   in sysdeps/nptl/bits/thread-shared-types.h.  */
+	int kind = atomic_load_relaxed (&(mutex->__data.__kind))
+	  & PTHREAD_MUTEX_KIND_MASK_NP;
 
 	oldval = mutex->__data.__lock;
 
diff --git a/nptl/pthread_mutex_trylock.c b/nptl/pthread_mutex_trylock.c
index 7de61f4f68..fa90c1d1e6 100644
--- a/nptl/pthread_mutex_trylock.c
+++ b/nptl/pthread_mutex_trylock.c
@@ -36,6 +36,8 @@ __pthread_mutex_trylock (pthread_mutex_t *mutex)
   int oldval;
   pid_t id = THREAD_GETMEM (THREAD_SELF, tid);
 
+  /* See concurrency notes regarding mutex type which is loaded from __kind
+     in struct __pthread_mutex_s in sysdeps/nptl/bits/thread-shared-types.h.  */
   switch (__builtin_expect (PTHREAD_MUTEX_TYPE_ELISION (mutex),
 			    PTHREAD_MUTEX_TIMED_NP))
     {
@@ -199,8 +201,14 @@ __pthread_mutex_trylock (pthread_mutex_t *mutex)
     case PTHREAD_MUTEX_PI_ROBUST_NORMAL_NP:
     case PTHREAD_MUTEX_PI_ROBUST_ADAPTIVE_NP:
       {
-	int kind = mutex->__data.__kind & PTHREAD_MUTEX_KIND_MASK_NP;
-	int robust = mutex->__data.__kind & PTHREAD_MUTEX_ROBUST_NORMAL_NP;
+	int kind, robust;
+	{
+	  /* See concurrency notes regarding __kind in struct __pthread_mutex_s
+	     in sysdeps/nptl/bits/thread-shared-types.h.  */
+	  int mutex_kind = atomic_load_relaxed (&(mutex->__data.__kind));
+	  kind = mutex_kind & PTHREAD_MUTEX_KIND_MASK_NP;
+	  robust = mutex_kind & PTHREAD_MUTEX_ROBUST_NORMAL_NP;
+	}
 
 	if (robust)
 	  /* Note: robust PI futexes are signaled by setting bit 0.  */
@@ -325,7 +333,10 @@ __pthread_mutex_trylock (pthread_mutex_t *mutex)
     case PTHREAD_MUTEX_PP_NORMAL_NP:
     case PTHREAD_MUTEX_PP_ADAPTIVE_NP:
       {
-	int kind = mutex->__data.__kind & PTHREAD_MUTEX_KIND_MASK_NP;
+	/* See concurrency notes regarding __kind in struct __pthread_mutex_s
+	   in sysdeps/nptl/bits/thread-shared-types.h.  */
+	int kind = atomic_load_relaxed (&(mutex->__data.__kind))
+	  & PTHREAD_MUTEX_KIND_MASK_NP;
 
 	oldval = mutex->__data.__lock;
 
diff --git a/nptl/pthread_mutex_unlock.c b/nptl/pthread_mutex_unlock.c
index 9ea62943b7..68d04d5395 100644
--- a/nptl/pthread_mutex_unlock.c
+++ b/nptl/pthread_mutex_unlock.c
@@ -35,6 +35,8 @@ int
 attribute_hidden
 __pthread_mutex_unlock_usercnt (pthread_mutex_t *mutex, int decr)
 {
+  /* See concurrency notes regarding mutex type which is loaded from __kind
+     in struct __pthread_mutex_s in sysdeps/nptl/bits/thread-shared-types.h.  */
   int type = PTHREAD_MUTEX_TYPE_ELISION (mutex);
   if (__builtin_expect (type &
 		~(PTHREAD_MUTEX_KIND_MASK_NP|PTHREAD_MUTEX_ELISION_FLAGS_NP), 0))
@@ -222,13 +224,19 @@ __pthread_mutex_unlock_full (pthread_mutex_t *mutex, int decr)
       /* If the previous owner died and the caller did not succeed in
 	 making the state consistent, mark the mutex as unrecoverable
 	 and make all waiters.  */
-      if ((mutex->__data.__kind & PTHREAD_MUTEX_ROBUST_NORMAL_NP) != 0
+      /* See concurrency notes regarding __kind in struct __pthread_mutex_s
+	 in sysdeps/nptl/bits/thread-shared-types.h.  */
+      if ((atomic_load_relaxed (&(mutex->__data.__kind))
+	   & PTHREAD_MUTEX_ROBUST_NORMAL_NP) != 0
 	  && __builtin_expect (mutex->__data.__owner
 			       == PTHREAD_MUTEX_INCONSISTENT, 0))
       pi_notrecoverable:
        newowner = PTHREAD_MUTEX_NOTRECOVERABLE;
 
-      if ((mutex->__data.__kind & PTHREAD_MUTEX_ROBUST_NORMAL_NP) != 0)
+      /* See concurrency notes regarding __kind in struct __pthread_mutex_s
+	 in sysdeps/nptl/bits/thread-shared-types.h.  */
+      if ((atomic_load_relaxed (&(mutex->__data.__kind))
+	   & PTHREAD_MUTEX_ROBUST_NORMAL_NP) != 0)
 	{
 	continue_pi_robust:
 	  /* Remove mutex from the list.
@@ -251,7 +259,10 @@ __pthread_mutex_unlock_full (pthread_mutex_t *mutex, int decr)
       /* Unlock.  Load all necessary mutex data before releasing the mutex
 	 to not violate the mutex destruction requirements (see
 	 lll_unlock).  */
-      int robust = mutex->__data.__kind & PTHREAD_MUTEX_ROBUST_NORMAL_NP;
+      /* See concurrency notes regarding __kind in struct __pthread_mutex_s
+	 in sysdeps/nptl/bits/thread-shared-types.h.  */
+      int robust = atomic_load_relaxed (&(mutex->__data.__kind))
+	& PTHREAD_MUTEX_ROBUST_NORMAL_NP;
       private = (robust
 		 ? PTHREAD_ROBUST_MUTEX_PSHARED (mutex)
 		 : PTHREAD_MUTEX_PSHARED (mutex));
diff --git a/nptl/tst-mutex10.c b/nptl/tst-mutex10.c
new file mode 100644
index 0000000000..e1113ca60a
--- /dev/null
+++ b/nptl/tst-mutex10.c
@@ -0,0 +1,109 @@
+/* Testing race while enabling lock elision.
+   Copyright (C) 2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <getopt.h>
+#include <support/support.h>
+#include <support/xthread.h>
+
+static pthread_barrier_t barrier;
+static pthread_mutex_t mutex;
+static long long int iteration_count = 1000000;
+static unsigned int thread_count = 3;
+
+static void *
+thr_func (void *arg)
+{
+  long long int i;
+  for (i = 0; i < iteration_count; i++)
+    {
+      if ((uintptr_t) arg == 0)
+	{
+	  xpthread_mutex_destroy (&mutex);
+	  xpthread_mutex_init (&mutex, NULL);
+	}
+
+      xpthread_barrier_wait (&barrier);
+
+      /* Test if enabling lock elision works if it is enabled concurrently.
+	 There was a race in FORCE_ELISION macro which leads to either
+	 pthread_mutex_destroy returning EBUSY as the owner was recorded
+	 by pthread_mutex_lock - in "normal mutex" code path - but was not
+	 resetted in pthread_mutex_unlock - in "elision" code path.
+	 Or it leads to the assertion in nptl/pthread_mutex_lock.c:
+	 assert (mutex->__data.__owner == 0);
+	 Please ensure that the test is run with lock elision:
+	 export GLIBC_TUNABLES=glibc.elision.enable=1  */
+      xpthread_mutex_lock (&mutex);
+      xpthread_mutex_unlock (&mutex);
+
+      xpthread_barrier_wait (&barrier);
+    }
+  return NULL;
+}
+
+static int
+do_test (void)
+{
+  unsigned int i;
+  printf ("Starting %d threads to run %lld iterations.\n",
+	  thread_count, iteration_count);
+
+  pthread_t *threads = xmalloc (thread_count * sizeof (pthread_t));
+  xpthread_barrier_init (&barrier, NULL, thread_count);
+  xpthread_mutex_init (&mutex, NULL);
+
+  for (i = 0; i < thread_count; i++)
+    threads[i] = xpthread_create (NULL, thr_func, (void *) (uintptr_t) i);
+
+  for (i = 0; i < thread_count; i++)
+    xpthread_join (threads[i]);
+
+  xpthread_barrier_destroy (&barrier);
+  free (threads);
+
+  return EXIT_SUCCESS;
+}
+
+#define OPT_ITERATIONS	10000
+#define OPT_THREADS	10001
+#define CMDLINE_OPTIONS						\
+  { "iterations", required_argument, NULL, OPT_ITERATIONS },	\
+  { "threads", required_argument, NULL, OPT_THREADS },
+static void
+cmdline_process (int c)
+{
+  long long int arg = strtoll (optarg, NULL, 0);
+  switch (c)
+    {
+    case OPT_ITERATIONS:
+      if (arg > 0)
+	iteration_count = arg;
+      break;
+    case OPT_THREADS:
+      if (arg > 0 && arg < 100)
+	thread_count = arg;
+      break;
+    }
+}
+#define CMDLINE_PROCESS cmdline_process
+#define TIMEOUT 50
+#include <support/test-driver.c>