libitm: Add missing undo-logging of RfW src regions in gl_wt memtransfer.
[gcc.git] / libitm / method-gl.cc
1 /* Copyright (C) 2011, 2012 Free Software Foundation, Inc.
2 Contributed by Torvald Riegel <triegel@redhat.com>.
3
4 This file is part of the GNU Transactional Memory Library (libitm).
5
6 Libitm is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
10
11 Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 more details.
15
16 Under Section 7 of GPL version 3, you are granted additional
17 permissions described in the GCC Runtime Library Exception, version
18 3.1, as published by the Free Software Foundation.
19
20 You should have received a copy of the GNU General Public License and
21 a copy of the GCC Runtime Library Exception along with this program;
22 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 <http://www.gnu.org/licenses/>. */
24
25 #include "libitm_i.h"
26
27 using namespace GTM;
28
29 namespace {
30
31 // This group consists of all TM methods that synchronize via just a single
32 // global lock (or ownership record).
33 struct gl_mg : public method_group
34 {
35 static const gtm_word LOCK_BIT = (~(gtm_word)0 >> 1) + 1;
36 // We can't use the full bitrange because ~0 in gtm_thread::shared_state has
37 // special meaning.
38 static const gtm_word VERSION_MAX = (~(gtm_word)0 >> 1) - 1;
39 static bool is_locked(gtm_word l) { return l & LOCK_BIT; }
40 static gtm_word set_locked(gtm_word l) { return l | LOCK_BIT; }
41 static gtm_word clear_locked(gtm_word l) { return l & ~LOCK_BIT; }
42
43 // The global ownership record.
44 atomic<gtm_word> orec;
45
46 virtual void init()
47 {
48 // This store is only executed while holding the serial lock, so relaxed
49 // memory order is sufficient here.
50 orec.store(0, memory_order_relaxed);
51 }
52 virtual void fini() { }
53 };
54
55 // TODO cacheline padding
56 static gl_mg o_gl_mg;
57
58
59 // The global lock, write-through TM method.
60 // Acquires the orec eagerly before the first write, and then writes through.
61 // Reads abort if the global orec's version number changed or if it is locked.
62 // Currently, writes require undo-logging to prevent deadlock between the
63 // serial lock and the global orec (writer txn acquires orec, reader txn
64 // upgrades to serial and waits for all other txns, writer tries to upgrade to
65 // serial too but cannot, writer cannot abort either, deadlock). We could
66 // avoid this if the serial lock would allow us to prevent other threads from
67 // going to serial mode, but this probably is too much additional complexity
68 // just to optimize this TM method.
69 // gtm_thread::shared_state is used to store a transaction's current
70 // snapshot time (or commit time). The serial lock uses ~0 for inactive
71 // transactions and 0 for active ones. Thus, we always have a meaningful
72 // timestamp in shared_state that can be used to implement quiescence-based
73 // privatization safety. This even holds if a writing transaction has the
74 // lock bit set in its shared_state because this is fine for both the serial
75 // lock (the value will be smaller than ~0) and privatization safety (we
76 // validate that no other update transaction comitted before we acquired the
77 // orec, so we have the most recent timestamp and no other transaction can
78 // commit until we have committed).
79 // However, we therefore cannot use this method for a serial transaction
80 // (because shared_state needs to remain at ~0) and we have to be careful
81 // when switching to serial mode (see the special handling in trycommit() and
82 // rollback()).
83 // ??? This sharing adds some complexity wrt. serial mode. Just use a separate
84 // state variable?
85 class gl_wt_dispatch : public abi_dispatch
86 {
87 protected:
88 static void pre_write(const void *addr, size_t len,
89 gtm_thread *tx = gtm_thr())
90 {
91 gtm_word v = tx->shared_state.load(memory_order_relaxed);
92 if (unlikely(!gl_mg::is_locked(v)))
93 {
94 // Check for and handle version number overflow.
95 if (unlikely(v >= gl_mg::VERSION_MAX))
96 tx->restart(RESTART_INIT_METHOD_GROUP);
97
98 // This validates that we have a consistent snapshot, which is also
99 // for making privatization safety work (see the class' comments).
100 // Note that this check here will be performed by the subsequent CAS
101 // again, so relaxed memory order is fine.
102 gtm_word now = o_gl_mg.orec.load(memory_order_relaxed);
103 if (now != v)
104 tx->restart(RESTART_VALIDATE_WRITE);
105
106 // CAS global orec from our snapshot time to the locked state.
107 // We need acq_rel memory order here to synchronize with other loads
108 // and modifications of orec.
109 if (!o_gl_mg.orec.compare_exchange_strong (now, gl_mg::set_locked(now),
110 memory_order_acq_rel))
111 tx->restart(RESTART_LOCKED_WRITE);
112
113 // We use an explicit fence here to avoid having to use release
114 // memory order for all subsequent data stores. This fence will
115 // synchronize with loads of the data with acquire memory order. See
116 // validate() for why this is necessary.
117 atomic_thread_fence(memory_order_release);
118
119 // Set shared_state to new value.
120 tx->shared_state.store(gl_mg::set_locked(now), memory_order_release);
121 }
122
123 tx->undolog.log(addr, len);
124 }
125
126 static void validate(gtm_thread *tx = gtm_thr())
127 {
128 // Check that snapshot is consistent. We expect the previous data load to
129 // have acquire memory order, or be atomic and followed by an acquire
130 // fence.
131 // As a result, the data load will synchronize with the release fence
132 // issued by the transactions whose data updates the data load has read
133 // from. This forces the orec load to read from a visible sequence of side
134 // effects that starts with the other updating transaction's store that
135 // acquired the orec and set it to locked.
136 // We therefore either read a value with the locked bit set (and restart)
137 // or read an orec value that was written after the data had been written.
138 // Either will allow us to detect inconsistent reads because it will have
139 // a higher/different value.
140 gtm_word l = o_gl_mg.orec.load(memory_order_relaxed);
141 if (l != tx->shared_state.load(memory_order_relaxed))
142 tx->restart(RESTART_VALIDATE_READ);
143 }
144
145 template <typename V> static V load(const V* addr, ls_modifier mod)
146 {
147 // Read-for-write should be unlikely, but we need to handle it or will
148 // break later WaW optimizations.
149 if (unlikely(mod == RfW))
150 {
151 pre_write(addr, sizeof(V));
152 return *addr;
153 }
154 if (unlikely(mod == RaW))
155 return *addr;
156
157 // We do not have acquired the orec, so we need to load a value and then
158 // validate that this was consistent.
159 // This needs to have acquire memory order (see validate()).
160 // Alternatively, we can put an acquire fence after the data load but this
161 // is probably less efficient.
162 // FIXME We would need an atomic load with acquire memory order here but
163 // we can't just forge an atomic load for nonatomic data because this
164 // might not work on all implementations of atomics. However, we need
165 // the acquire memory order and we can only establish this if we link
166 // it to the matching release using a reads-from relation between atomic
167 // loads. Also, the compiler is allowed to optimize nonatomic accesses
168 // differently than atomic accesses (e.g., if the load would be moved to
169 // after the fence, we potentially don't synchronize properly anymore).
170 // Instead of the following, just use an ordinary load followed by an
171 // acquire fence, and hope that this is good enough for now:
172 // V v = atomic_load_explicit((atomic<V>*)addr, memory_order_acquire);
173 V v = *addr;
174 atomic_thread_fence(memory_order_acquire);
175 validate();
176 return v;
177 }
178
179 template <typename V> static void store(V* addr, const V value,
180 ls_modifier mod)
181 {
182 if (likely(mod != WaW))
183 pre_write(addr, sizeof(V));
184 // FIXME We would need an atomic store here but we can't just forge an
185 // atomic load for nonatomic data because this might not work on all
186 // implementations of atomics. However, we need this store to link the
187 // release fence in pre_write() to the acquire operation in load, which
188 // is only guaranteed if we have a reads-from relation between atomic
189 // accesses. Also, the compiler is allowed to optimize nonatomic accesses
190 // differently than atomic accesses (e.g., if the store would be moved
191 // to before the release fence in pre_write(), things could go wrong).
192 // atomic_store_explicit((atomic<V>*)addr, value, memory_order_relaxed);
193 *addr = value;
194 }
195
196 public:
197 static void memtransfer_static(void *dst, const void* src, size_t size,
198 bool may_overlap, ls_modifier dst_mod, ls_modifier src_mod)
199 {
200 gtm_thread *tx = gtm_thr();
201 if (dst_mod != WaW && dst_mod != NONTXNAL)
202 pre_write(dst, size, tx);
203 // We need at least undo-logging for an RfW src region because we might
204 // subsequently write there with WaW.
205 if (src_mod == RfW)
206 pre_write(src, size, tx);
207
208 // FIXME We should use atomics here (see store()). Let's just hope that
209 // memcpy/memmove are good enough.
210 if (!may_overlap)
211 ::memcpy(dst, src, size);
212 else
213 ::memmove(dst, src, size);
214
215 if (src_mod != RfW && src_mod != RaW && src_mod != NONTXNAL
216 && dst_mod != WaW)
217 validate(tx);
218 }
219
220 static void memset_static(void *dst, int c, size_t size, ls_modifier mod)
221 {
222 if (mod != WaW)
223 pre_write(dst, size);
224 // FIXME We should use atomics here (see store()). Let's just hope that
225 // memset is good enough.
226 ::memset(dst, c, size);
227 }
228
229 virtual gtm_restart_reason begin_or_restart()
230 {
231 // We don't need to do anything for nested transactions.
232 gtm_thread *tx = gtm_thr();
233 if (tx->parent_txns.size() > 0)
234 return NO_RESTART;
235
236 // Spin until global orec is not locked.
237 // TODO This is not necessary if there are no pure loads (check txn props).
238 unsigned i = 0;
239 gtm_word v;
240 while (1)
241 {
242 // We need acquire memory order here so that this load will
243 // synchronize with the store that releases the orec in trycommit().
244 // In turn, this makes sure that subsequent data loads will read from
245 // a visible sequence of side effects that starts with the most recent
246 // store to the data right before the release of the orec.
247 v = o_gl_mg.orec.load(memory_order_acquire);
248 if (!gl_mg::is_locked(v))
249 break;
250 // TODO need method-specific max spin count
251 if (++i > gtm_spin_count_var)
252 return RESTART_VALIDATE_READ;
253 cpu_relax();
254 }
255
256 // Everything is okay, we have a snapshot time.
257 // We don't need to enforce any ordering for the following store. There
258 // are no earlier data loads in this transaction, so the store cannot
259 // become visible before those (which could lead to the violation of
260 // privatization safety). The store can become visible after later loads
261 // but this does not matter because the previous value will have been
262 // smaller or equal (the serial lock will set shared_state to zero when
263 // marking the transaction as active, and restarts enforce immediate
264 // visibility of a smaller or equal value with a barrier (see
265 // rollback()).
266 tx->shared_state.store(v, memory_order_relaxed);
267 return NO_RESTART;
268 }
269
270 virtual bool trycommit(gtm_word& priv_time)
271 {
272 gtm_thread* tx = gtm_thr();
273 gtm_word v = tx->shared_state.load(memory_order_relaxed);
274
275 // Special case: If shared_state is ~0, then we have acquired the
276 // serial lock (tx->state is not updated yet). In this case, the previous
277 // value isn't available anymore, so grab it from the global lock, which
278 // must have a meaningful value because no other transactions are active
279 // anymore. In particular, if it is locked, then we are an update
280 // transaction, which is all we care about for commit.
281 if (v == ~(typeof v)0)
282 v = o_gl_mg.orec.load(memory_order_relaxed);
283
284 // Release the orec but do not reset shared_state, which will be modified
285 // by the serial lock right after our commit anyway. Also, resetting
286 // shared state here would interfere with the serial lock's use of this
287 // location.
288 if (gl_mg::is_locked(v))
289 {
290 // Release the global orec, increasing its version number / timestamp.
291 // See begin_or_restart() for why we need release memory order here.
292 v = gl_mg::clear_locked(v) + 1;
293 o_gl_mg.orec.store(v, memory_order_release);
294
295 // Need to ensure privatization safety. Every other transaction must
296 // have a snapshot time that is at least as high as our commit time
297 // (i.e., our commit must be visible to them).
298 priv_time = v;
299 }
300 return true;
301 }
302
303 virtual void rollback(gtm_transaction_cp *cp)
304 {
305 // We don't do anything for rollbacks of nested transactions.
306 if (cp != 0)
307 return;
308
309 gtm_thread *tx = gtm_thr();
310 gtm_word v = tx->shared_state.load(memory_order_relaxed);
311 // Special case: If shared_state is ~0, then we have acquired the
312 // serial lock (tx->state is not updated yet). In this case, the previous
313 // value isn't available anymore, so grab it from the global lock, which
314 // must have a meaningful value because no other transactions are active
315 // anymore. In particular, if it is locked, then we are an update
316 // transaction, which is all we care about for rollback.
317 bool is_serial = v == ~(typeof v)0;
318 if (is_serial)
319 v = o_gl_mg.orec.load(memory_order_relaxed);
320
321 // Release lock and increment version number to prevent dirty reads.
322 // Also reset shared state here, so that begin_or_restart() can expect a
323 // value that is correct wrt. privatization safety.
324 if (gl_mg::is_locked(v))
325 {
326 // Release the global orec, increasing its version number / timestamp.
327 // See begin_or_restart() for why we need release memory order here.
328 v = gl_mg::clear_locked(v) + 1;
329 o_gl_mg.orec.store(v, memory_order_release);
330
331 // Also reset the timestamp published via shared_state.
332 // Special case: Only do this if we are not a serial transaction
333 // because otherwise, we would interfere with the serial lock.
334 if (!is_serial)
335 tx->shared_state.store(v, memory_order_release);
336
337 // We need a store-load barrier after this store to prevent it
338 // from becoming visible after later data loads because the
339 // previous value of shared_state has been higher than the actual
340 // snapshot time (the lock bit had been set), which could break
341 // privatization safety. We do not need a barrier before this
342 // store (see pre_write() for an explanation).
343 // ??? What is the precise reasoning in the C++11 model?
344 atomic_thread_fence(memory_order_seq_cst);
345 }
346
347 }
348
349 CREATE_DISPATCH_METHODS(virtual, )
350 CREATE_DISPATCH_METHODS_MEM()
351
352 gl_wt_dispatch() : abi_dispatch(false, true, false, false, &o_gl_mg)
353 { }
354 };
355
356 } // anon namespace
357
358 static const gl_wt_dispatch o_gl_wt_dispatch;
359
360 abi_dispatch *
361 GTM::dispatch_gl_wt ()
362 {
363 return const_cast<gl_wt_dispatch *>(&o_gl_wt_dispatch);
364 }