1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
|
/* Elided pthread mutex lock.
Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
#include <pthread.h>
#include <pthreadP.h>
#include <lowlevellock.h>
#include <htmintrin.h>
#include <elision-conf.h>
#include <stdint.h>
#if !defined(LLL_LOCK) && !defined(EXTRAARG)
/* Make sure the configuration code is always linked in for static
libraries. */
#include "elision-conf.c"
#endif
#ifndef EXTRAARG
#define EXTRAARG
#endif
#ifndef LLL_LOCK
#define LLL_LOCK(a,b) lll_lock(a,b), 0
#endif
#define aconf __elision_aconf
/* Adaptive lock using transactions.
By default the lock region is run as a transaction, and when it
aborts or the lock is busy the lock adapts itself. */
int
__lll_lock_elision (int *futex, short *adapt_count, EXTRAARG int private)
{
/* adapt_count can be accessed concurrently; these accesses can be both
inside of transactions (if critical sections are nested and the outer
critical section uses lock elision) and outside of transactions. Thus,
we need to use atomic accesses to avoid data races. However, the
value of adapt_count is just a hint, so relaxed MO accesses are
sufficient. */
if (atomic_load_relaxed (adapt_count) > 0)
{
/* Lost updates are possible, but harmless. Due to races this might lead
to *adapt_count becoming less than zero. */
atomic_store_relaxed (adapt_count,
atomic_load_relaxed (adapt_count) - 1);
goto use_lock;
}
__asm__ volatile (".machinemode \"zarch_nohighgprs\"\n\t"
".machine \"all\""
: : : "memory");
int try_tbegin;
for (try_tbegin = aconf.try_tbegin;
try_tbegin > 0;
try_tbegin--)
{
unsigned status;
if (__builtin_expect
((status = __builtin_tbegin((void *)0)) == _HTM_TBEGIN_STARTED, 1))
{
if (*futex == 0)
return 0;
/* Lock was busy. Fall back to normal locking. */
if (__builtin_expect (__builtin_tx_nesting_depth (), 1))
{
/* In a non-nested transaction there is no need to abort,
which is expensive. */
__builtin_tend ();
/* Don't try to use transactions for the next couple of times.
See above for why relaxed MO is sufficient. */
if (aconf.skip_lock_busy > 0)
atomic_store_relaxed (adapt_count, aconf.skip_lock_busy);
goto use_lock;
}
else /* nesting depth is > 1 */
{
/* A nested transaction will abort eventually because it
cannot make any progress before *futex changes back to 0.
So we may as well abort immediately.
This persistently aborts the outer transaction to force
the outer mutex use the default lock instead of retrying
with transactions until the try_tbegin of the outer mutex
is zero.
The adapt_count of this inner mutex is not changed,
because using the default lock with the inner mutex
would abort the outer transaction.
*/
__builtin_tabort (_HTM_FIRST_USER_ABORT_CODE | 1);
}
}
else
{
if (status != _HTM_TBEGIN_TRANSIENT)
{
/* A persistent abort (cc 1 or 3) indicates that a retry is
probably futile. Use the normal locking now and for the
next couple of calls.
Be careful to avoid writing to the lock. See above for why
relaxed MO is sufficient. */
if (aconf.skip_lock_internal_abort > 0)
atomic_store_relaxed (adapt_count,
aconf.skip_lock_internal_abort);
goto use_lock;
}
}
}
/* Same logic as above, but for for a number of temporary failures in a
row. See above for why relaxed MO is sufficient. */
if (aconf.skip_lock_out_of_tbegin_retries > 0 && aconf.try_tbegin > 0)
atomic_store_relaxed (adapt_count, aconf.skip_lock_out_of_tbegin_retries);
use_lock:
return LLL_LOCK ((*futex), private);
}
|