Merge from transactional-memory branch.
[gcc.git] / libitm / memcpy.cc
1 /* Copyright (C) 2008, 2009, 2011 Free Software Foundation, Inc.
2 Contributed by Richard Henderson <rth@redhat.com>.
3
4 This file is part of the GNU Transactional Memory Library (libitm).
5
6 Libitm is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
10
11 Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 more details.
15
16 Under Section 7 of GPL version 3, you are granted additional
17 permissions described in the GCC Runtime Library Exception, version
18 3.1, as published by the Free Software Foundation.
19
20 You should have received a copy of the GNU General Public License and
21 a copy of the GCC Runtime Library Exception along with this program;
22 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 <http://www.gnu.org/licenses/>. */
24
25 #include "libitm_i.h"
26
27 using namespace GTM;
28
29 static void
30 do_memcpy (uintptr_t idst, uintptr_t isrc, size_t size,
31 abi_dispatch::lock_type W, abi_dispatch::lock_type R)
32 {
33 abi_dispatch *disp = abi_disp();
34 // The position in the destination cacheline where *IDST starts.
35 uintptr_t dofs = idst & (CACHELINE_SIZE - 1);
36 // The position in the source cacheline where *ISRC starts.
37 uintptr_t sofs = isrc & (CACHELINE_SIZE - 1);
38 const gtm_cacheline *src
39 = reinterpret_cast<const gtm_cacheline *>(isrc & -CACHELINE_SIZE);
40 gtm_cacheline *dst
41 = reinterpret_cast<gtm_cacheline *>(idst & -CACHELINE_SIZE);
42 const gtm_cacheline *sline;
43 abi_dispatch::mask_pair dpair;
44
45 if (size == 0)
46 return;
47
48 // If both SRC and DST data start at the same position in the cachelines,
49 // we can easily copy the data in tandem, cacheline by cacheline...
50 if (dofs == sofs)
51 {
52 // We copy the data in three stages:
53
54 // (a) Copy stray bytes at the beginning that are smaller than a
55 // cacheline.
56 if (sofs != 0)
57 {
58 size_t sleft = CACHELINE_SIZE - sofs;
59 size_t min = (size <= sleft ? size : sleft);
60
61 dpair = disp->write_lock(dst, W);
62 sline = disp->read_lock(src, R);
63 *dpair.mask |= (((gtm_cacheline_mask)1 << min) - 1) << sofs;
64 memcpy (&dpair.line->b[sofs], &sline->b[sofs], min);
65 dst++;
66 src++;
67 size -= min;
68 }
69
70 // (b) Copy subsequent cacheline sized chunks.
71 while (size >= CACHELINE_SIZE)
72 {
73 dpair = disp->write_lock(dst, W);
74 sline = disp->read_lock(src, R);
75 *dpair.mask = -1;
76 *dpair.line = *sline;
77 dst++;
78 src++;
79 size -= CACHELINE_SIZE;
80 }
81
82 // (c) Copy anything left over.
83 if (size != 0)
84 {
85 dpair = disp->write_lock(dst, W);
86 sline = disp->read_lock(src, R);
87 *dpair.mask |= ((gtm_cacheline_mask)1 << size) - 1;
88 memcpy (dpair.line, sline, size);
89 }
90 }
91 // ... otherwise, we must copy the data in disparate hunks using
92 // temporary storage.
93 else
94 {
95 gtm_cacheline c;
96 size_t sleft = CACHELINE_SIZE - sofs;
97
98 sline = disp->read_lock(src, R);
99
100 // As above, we copy the data in three stages:
101
102 // (a) Copy stray bytes at the beginning that are smaller than a
103 // cacheline.
104 if (dofs != 0)
105 {
106 size_t dleft = CACHELINE_SIZE - dofs;
107 size_t min = (size <= dleft ? size : dleft);
108
109 dpair = disp->write_lock(dst, W);
110 *dpair.mask |= (((gtm_cacheline_mask)1 << min) - 1) << dofs;
111
112 // If what's left in the source cacheline will fit in the
113 // rest of the destination cacheline, straight up copy it.
114 if (min <= sleft)
115 {
116 memcpy (&dpair.line->b[dofs], &sline->b[sofs], min);
117 sofs += min;
118 }
119 // Otherwise, we need more bits from the source cacheline
120 // that are available. Piece together what we need from
121 // contiguous (source) cachelines, into temp space, and copy
122 // it over.
123 else
124 {
125 memcpy (&c, &sline->b[sofs], sleft);
126 sline = disp->read_lock(++src, R);
127 sofs = min - sleft;
128 memcpy (&c.b[sleft], sline, sofs);
129 memcpy (&dpair.line->b[dofs], &c, min);
130 }
131 sleft = CACHELINE_SIZE - sofs;
132
133 dst++;
134 size -= min;
135 }
136
137 // (b) Copy subsequent cacheline sized chunks.
138 while (size >= CACHELINE_SIZE)
139 {
140 // We have a full (destination) cacheline where to put the
141 // data, but to get to the corresponding cacheline sized
142 // chunk in the source, we have to piece together two
143 // contiguous source cachelines.
144
145 memcpy (&c, &sline->b[sofs], sleft);
146 sline = disp->read_lock(++src, R);
147 memcpy (&c.b[sleft], sline, sofs);
148
149 dpair = disp->write_lock(dst, W);
150 *dpair.mask = -1;
151 *dpair.line = c;
152
153 dst++;
154 size -= CACHELINE_SIZE;
155 }
156
157 // (c) Copy anything left over.
158 if (size != 0)
159 {
160 dpair = disp->write_lock(dst, W);
161 *dpair.mask |= ((gtm_cacheline_mask)1 << size) - 1;
162 // If what's left to copy is entirely in the remaining
163 // source cacheline, do it.
164 if (size <= sleft)
165 memcpy (dpair.line, &sline->b[sofs], size);
166 // Otherwise, piece together the remaining bits, and copy.
167 else
168 {
169 memcpy (&c, &sline->b[sofs], sleft);
170 sline = disp->read_lock(++src, R);
171 memcpy (&c.b[sleft], sline, size - sleft);
172 memcpy (dpair.line, &c, size);
173 }
174 }
175 }
176 }
177
178 static void
179 do_memmove (uintptr_t idst, uintptr_t isrc, size_t size,
180 abi_dispatch::lock_type W, abi_dispatch::lock_type R)
181 {
182 abi_dispatch *disp = abi_disp();
183 uintptr_t dleft, sleft, sofs, dofs;
184 const gtm_cacheline *sline;
185 abi_dispatch::mask_pair dpair;
186
187 if (size == 0)
188 return;
189
190 /* The co-aligned memmove below doesn't work for DST == SRC, so filter
191 that out. It's tempting to just return here, as this is a no-op move.
192 However, our caller has the right to expect the locks to be acquired
193 as advertized. */
194 if (__builtin_expect (idst == isrc, 0))
195 {
196 /* If the write lock is already acquired, nothing to do. */
197 if (W == abi_dispatch::WaW)
198 return;
199 /* If the destination is protected, acquire a write lock. */
200 if (W != abi_dispatch::NOLOCK)
201 R = abi_dispatch::RfW;
202 /* Notice serial mode, where we don't acquire locks at all. */
203 if (R == abi_dispatch::NOLOCK)
204 return;
205
206 idst = isrc + size;
207 for (isrc &= -CACHELINE_SIZE; isrc < idst; isrc += CACHELINE_SIZE)
208 disp->read_lock(reinterpret_cast<const gtm_cacheline *>(isrc), R);
209 return;
210 }
211
212 /* Fall back to memcpy if the implementation above can handle it. */
213 if (idst < isrc || isrc + size <= idst)
214 {
215 do_memcpy (idst, isrc, size, W, R);
216 return;
217 }
218
219 /* What remains requires a backward copy from the end of the blocks. */
220 idst += size;
221 isrc += size;
222 dofs = idst & (CACHELINE_SIZE - 1);
223 sofs = isrc & (CACHELINE_SIZE - 1);
224 dleft = CACHELINE_SIZE - dofs;
225 sleft = CACHELINE_SIZE - sofs;
226
227 gtm_cacheline *dst
228 = reinterpret_cast<gtm_cacheline *>(idst & -CACHELINE_SIZE);
229 const gtm_cacheline *src
230 = reinterpret_cast<const gtm_cacheline *>(isrc & -CACHELINE_SIZE);
231 if (dofs == 0)
232 dst--;
233 if (sofs == 0)
234 src--;
235
236 if (dofs == sofs)
237 {
238 /* Since DST and SRC are co-aligned, and we didn't use the memcpy
239 optimization above, that implies that SIZE > CACHELINE_SIZE. */
240 if (sofs != 0)
241 {
242 dpair = disp->write_lock(dst, W);
243 sline = disp->read_lock(src, R);
244 *dpair.mask |= ((gtm_cacheline_mask)1 << sleft) - 1;
245 memcpy (dpair.line, sline, sleft);
246 dst--;
247 src--;
248 size -= sleft;
249 }
250
251 while (size >= CACHELINE_SIZE)
252 {
253 dpair = disp->write_lock(dst, W);
254 sline = disp->read_lock(src, R);
255 *dpair.mask = -1;
256 *dpair.line = *sline;
257 dst--;
258 src--;
259 size -= CACHELINE_SIZE;
260 }
261
262 if (size != 0)
263 {
264 size_t ofs = CACHELINE_SIZE - size;
265 dpair = disp->write_lock(dst, W);
266 sline = disp->read_lock(src, R);
267 *dpair.mask |= (((gtm_cacheline_mask)1 << size) - 1) << ofs;
268 memcpy (&dpair.line->b[ofs], &sline->b[ofs], size);
269 }
270 }
271 else
272 {
273 gtm_cacheline c;
274
275 sline = disp->read_lock(src, R);
276 if (dofs != 0)
277 {
278 size_t min = (size <= dofs ? size : dofs);
279
280 if (min <= sofs)
281 {
282 sofs -= min;
283 memcpy (&c, &sline->b[sofs], min);
284 }
285 else
286 {
287 size_t min_ofs = min - sofs;
288 memcpy (&c.b[min_ofs], sline, sofs);
289 sline = disp->read_lock(--src, R);
290 sofs = CACHELINE_SIZE - min_ofs;
291 memcpy (&c, &sline->b[sofs], min_ofs);
292 }
293
294 dofs = dleft - min;
295 dpair = disp->write_lock(dst, W);
296 *dpair.mask |= (((gtm_cacheline_mask)1 << min) - 1) << dofs;
297 memcpy (&dpair.line->b[dofs], &c, min);
298
299 sleft = CACHELINE_SIZE - sofs;
300 dst--;
301 size -= min;
302 }
303
304 while (size >= CACHELINE_SIZE)
305 {
306 memcpy (&c.b[sleft], sline, sofs);
307 sline = disp->read_lock(--src, R);
308 memcpy (&c, &sline->b[sofs], sleft);
309
310 dpair = disp->write_lock(dst, W);
311 *dpair.mask = -1;
312 *dpair.line = c;
313
314 dst--;
315 size -= CACHELINE_SIZE;
316 }
317
318 if (size != 0)
319 {
320 dofs = CACHELINE_SIZE - size;
321
322 memcpy (&c.b[sleft], sline, sofs);
323 if (sleft > dofs)
324 {
325 sline = disp->read_lock(--src, R);
326 memcpy (&c, &sline->b[sofs], sleft);
327 }
328
329 dpair = disp->write_lock(dst, W);
330 *dpair.mask |= (gtm_cacheline_mask)-1 << dofs;
331 memcpy (&dpair.line->b[dofs], &c.b[dofs], size);
332 }
333 }
334 }
335
336 #define ITM_MEM_DEF(NAME, READ, WRITE) \
337 void ITM_REGPARM _ITM_memcpy##NAME(void *dst, const void *src, size_t size) \
338 { \
339 do_memcpy ((uintptr_t)dst, (uintptr_t)src, size, \
340 abi_dispatch::WRITE, abi_dispatch::READ); \
341 } \
342 void ITM_REGPARM _ITM_memmove##NAME(void *dst, const void *src, size_t size) \
343 { \
344 do_memmove ((uintptr_t)dst, (uintptr_t)src, size, \
345 abi_dispatch::WRITE, abi_dispatch::READ); \
346 }
347
348 ITM_MEM_DEF(RnWt, NOLOCK, W)
349 ITM_MEM_DEF(RnWtaR, NOLOCK, WaR)
350 ITM_MEM_DEF(RnWtaW, NOLOCK, WaW)
351
352 ITM_MEM_DEF(RtWn, R, NOLOCK)
353 ITM_MEM_DEF(RtWt, R, W)
354 ITM_MEM_DEF(RtWtaR, R, WaR)
355 ITM_MEM_DEF(RtWtaW, R, WaW)
356
357 ITM_MEM_DEF(RtaRWn, RaR, NOLOCK)
358 ITM_MEM_DEF(RtaRWt, RaR, W)
359 ITM_MEM_DEF(RtaRWtaR, RaR, WaR)
360 ITM_MEM_DEF(RtaRWtaW, RaR, WaW)
361
362 ITM_MEM_DEF(RtaWWn, RaW, NOLOCK)
363 ITM_MEM_DEF(RtaWWt, RaW, W)
364 ITM_MEM_DEF(RtaWWtaR, RaW, WaR)
365 ITM_MEM_DEF(RtaWWtaW, RaW, WaW)