gcc/
[gcc.git] / libgomp / loop.c
1 /* Copyright (C) 2005-2015 Free Software Foundation, Inc.
2 Contributed by Richard Henderson <rth@redhat.com>.
3
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
6
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
16
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
25
26 /* This file handles the LOOP (FOR/DO) construct. */
27
28 #include <limits.h>
29 #include <stdlib.h>
30 #include "libgomp.h"
31
32
33 /* Initialize the given work share construct from the given arguments. */
34
35 static inline void
36 gomp_loop_init (struct gomp_work_share *ws, long start, long end, long incr,
37 enum gomp_schedule_type sched, long chunk_size)
38 {
39 ws->sched = sched;
40 ws->chunk_size = chunk_size;
41 /* Canonicalize loops that have zero iterations to ->next == ->end. */
42 ws->end = ((incr > 0 && start > end) || (incr < 0 && start < end))
43 ? start : end;
44 ws->incr = incr;
45 ws->next = start;
46 if (sched == GFS_DYNAMIC)
47 {
48 ws->chunk_size *= incr;
49
50 #ifdef HAVE_SYNC_BUILTINS
51 {
52 /* For dynamic scheduling prepare things to make each iteration
53 faster. */
54 struct gomp_thread *thr = gomp_thread ();
55 struct gomp_team *team = thr->ts.team;
56 long nthreads = team ? team->nthreads : 1;
57
58 if (__builtin_expect (incr > 0, 1))
59 {
60 /* Cheap overflow protection. */
61 if (__builtin_expect ((nthreads | ws->chunk_size)
62 >= 1UL << (sizeof (long)
63 * __CHAR_BIT__ / 2 - 1), 0))
64 ws->mode = 0;
65 else
66 ws->mode = ws->end < (LONG_MAX
67 - (nthreads + 1) * ws->chunk_size);
68 }
69 /* Cheap overflow protection. */
70 else if (__builtin_expect ((nthreads | -ws->chunk_size)
71 >= 1UL << (sizeof (long)
72 * __CHAR_BIT__ / 2 - 1), 0))
73 ws->mode = 0;
74 else
75 ws->mode = ws->end > (nthreads + 1) * -ws->chunk_size - LONG_MAX;
76 }
77 #endif
78 }
79 }
80
81 /* The *_start routines are called when first encountering a loop construct
82 that is not bound directly to a parallel construct. The first thread
83 that arrives will create the work-share construct; subsequent threads
84 will see the construct exists and allocate work from it.
85
86 START, END, INCR are the bounds of the loop; due to the restrictions of
87 OpenMP, these values must be the same in every thread. This is not
88 verified (nor is it entirely verifiable, since START is not necessarily
89 retained intact in the work-share data structure). CHUNK_SIZE is the
90 scheduling parameter; again this must be identical in all threads.
91
92 Returns true if there's any work for this thread to perform. If so,
93 *ISTART and *IEND are filled with the bounds of the iteration block
94 allocated to this thread. Returns false if all work was assigned to
95 other threads prior to this thread's arrival. */
96
97 static bool
98 gomp_loop_static_start (long start, long end, long incr, long chunk_size,
99 long *istart, long *iend)
100 {
101 struct gomp_thread *thr = gomp_thread ();
102
103 thr->ts.static_trip = 0;
104 if (gomp_work_share_start (false))
105 {
106 gomp_loop_init (thr->ts.work_share, start, end, incr,
107 GFS_STATIC, chunk_size);
108 gomp_work_share_init_done ();
109 }
110
111 return !gomp_iter_static_next (istart, iend);
112 }
113
114 static bool
115 gomp_loop_dynamic_start (long start, long end, long incr, long chunk_size,
116 long *istart, long *iend)
117 {
118 struct gomp_thread *thr = gomp_thread ();
119 bool ret;
120
121 if (gomp_work_share_start (false))
122 {
123 gomp_loop_init (thr->ts.work_share, start, end, incr,
124 GFS_DYNAMIC, chunk_size);
125 gomp_work_share_init_done ();
126 }
127
128 #ifdef HAVE_SYNC_BUILTINS
129 ret = gomp_iter_dynamic_next (istart, iend);
130 #else
131 gomp_mutex_lock (&thr->ts.work_share->lock);
132 ret = gomp_iter_dynamic_next_locked (istart, iend);
133 gomp_mutex_unlock (&thr->ts.work_share->lock);
134 #endif
135
136 return ret;
137 }
138
139 static bool
140 gomp_loop_guided_start (long start, long end, long incr, long chunk_size,
141 long *istart, long *iend)
142 {
143 struct gomp_thread *thr = gomp_thread ();
144 bool ret;
145
146 if (gomp_work_share_start (false))
147 {
148 gomp_loop_init (thr->ts.work_share, start, end, incr,
149 GFS_GUIDED, chunk_size);
150 gomp_work_share_init_done ();
151 }
152
153 #ifdef HAVE_SYNC_BUILTINS
154 ret = gomp_iter_guided_next (istart, iend);
155 #else
156 gomp_mutex_lock (&thr->ts.work_share->lock);
157 ret = gomp_iter_guided_next_locked (istart, iend);
158 gomp_mutex_unlock (&thr->ts.work_share->lock);
159 #endif
160
161 return ret;
162 }
163
164 bool
165 GOMP_loop_runtime_start (long start, long end, long incr,
166 long *istart, long *iend)
167 {
168 struct gomp_task_icv *icv = gomp_icv (false);
169 switch (icv->run_sched_var)
170 {
171 case GFS_STATIC:
172 return gomp_loop_static_start (start, end, incr, icv->run_sched_modifier,
173 istart, iend);
174 case GFS_DYNAMIC:
175 return gomp_loop_dynamic_start (start, end, incr, icv->run_sched_modifier,
176 istart, iend);
177 case GFS_GUIDED:
178 return gomp_loop_guided_start (start, end, incr, icv->run_sched_modifier,
179 istart, iend);
180 case GFS_AUTO:
181 /* For now map to schedule(static), later on we could play with feedback
182 driven choice. */
183 return gomp_loop_static_start (start, end, incr, 0, istart, iend);
184 default:
185 abort ();
186 }
187 }
188
189 /* The *_ordered_*_start routines are similar. The only difference is that
190 this work-share construct is initialized to expect an ORDERED section. */
191
192 static bool
193 gomp_loop_ordered_static_start (long start, long end, long incr,
194 long chunk_size, long *istart, long *iend)
195 {
196 struct gomp_thread *thr = gomp_thread ();
197
198 thr->ts.static_trip = 0;
199 if (gomp_work_share_start (true))
200 {
201 gomp_loop_init (thr->ts.work_share, start, end, incr,
202 GFS_STATIC, chunk_size);
203 gomp_ordered_static_init ();
204 gomp_work_share_init_done ();
205 }
206
207 return !gomp_iter_static_next (istart, iend);
208 }
209
210 static bool
211 gomp_loop_ordered_dynamic_start (long start, long end, long incr,
212 long chunk_size, long *istart, long *iend)
213 {
214 struct gomp_thread *thr = gomp_thread ();
215 bool ret;
216
217 if (gomp_work_share_start (true))
218 {
219 gomp_loop_init (thr->ts.work_share, start, end, incr,
220 GFS_DYNAMIC, chunk_size);
221 gomp_mutex_lock (&thr->ts.work_share->lock);
222 gomp_work_share_init_done ();
223 }
224 else
225 gomp_mutex_lock (&thr->ts.work_share->lock);
226
227 ret = gomp_iter_dynamic_next_locked (istart, iend);
228 if (ret)
229 gomp_ordered_first ();
230 gomp_mutex_unlock (&thr->ts.work_share->lock);
231
232 return ret;
233 }
234
235 static bool
236 gomp_loop_ordered_guided_start (long start, long end, long incr,
237 long chunk_size, long *istart, long *iend)
238 {
239 struct gomp_thread *thr = gomp_thread ();
240 bool ret;
241
242 if (gomp_work_share_start (true))
243 {
244 gomp_loop_init (thr->ts.work_share, start, end, incr,
245 GFS_GUIDED, chunk_size);
246 gomp_mutex_lock (&thr->ts.work_share->lock);
247 gomp_work_share_init_done ();
248 }
249 else
250 gomp_mutex_lock (&thr->ts.work_share->lock);
251
252 ret = gomp_iter_guided_next_locked (istart, iend);
253 if (ret)
254 gomp_ordered_first ();
255 gomp_mutex_unlock (&thr->ts.work_share->lock);
256
257 return ret;
258 }
259
260 bool
261 GOMP_loop_ordered_runtime_start (long start, long end, long incr,
262 long *istart, long *iend)
263 {
264 struct gomp_task_icv *icv = gomp_icv (false);
265 switch (icv->run_sched_var)
266 {
267 case GFS_STATIC:
268 return gomp_loop_ordered_static_start (start, end, incr,
269 icv->run_sched_modifier,
270 istart, iend);
271 case GFS_DYNAMIC:
272 return gomp_loop_ordered_dynamic_start (start, end, incr,
273 icv->run_sched_modifier,
274 istart, iend);
275 case GFS_GUIDED:
276 return gomp_loop_ordered_guided_start (start, end, incr,
277 icv->run_sched_modifier,
278 istart, iend);
279 case GFS_AUTO:
280 /* For now map to schedule(static), later on we could play with feedback
281 driven choice. */
282 return gomp_loop_ordered_static_start (start, end, incr,
283 0, istart, iend);
284 default:
285 abort ();
286 }
287 }
288
289 /* The *_next routines are called when the thread completes processing of
290 the iteration block currently assigned to it. If the work-share
291 construct is bound directly to a parallel construct, then the iteration
292 bounds may have been set up before the parallel. In which case, this
293 may be the first iteration for the thread.
294
295 Returns true if there is work remaining to be performed; *ISTART and
296 *IEND are filled with a new iteration block. Returns false if all work
297 has been assigned. */
298
299 static bool
300 gomp_loop_static_next (long *istart, long *iend)
301 {
302 return !gomp_iter_static_next (istart, iend);
303 }
304
305 static bool
306 gomp_loop_dynamic_next (long *istart, long *iend)
307 {
308 bool ret;
309
310 #ifdef HAVE_SYNC_BUILTINS
311 ret = gomp_iter_dynamic_next (istart, iend);
312 #else
313 struct gomp_thread *thr = gomp_thread ();
314 gomp_mutex_lock (&thr->ts.work_share->lock);
315 ret = gomp_iter_dynamic_next_locked (istart, iend);
316 gomp_mutex_unlock (&thr->ts.work_share->lock);
317 #endif
318
319 return ret;
320 }
321
322 static bool
323 gomp_loop_guided_next (long *istart, long *iend)
324 {
325 bool ret;
326
327 #ifdef HAVE_SYNC_BUILTINS
328 ret = gomp_iter_guided_next (istart, iend);
329 #else
330 struct gomp_thread *thr = gomp_thread ();
331 gomp_mutex_lock (&thr->ts.work_share->lock);
332 ret = gomp_iter_guided_next_locked (istart, iend);
333 gomp_mutex_unlock (&thr->ts.work_share->lock);
334 #endif
335
336 return ret;
337 }
338
339 bool
340 GOMP_loop_runtime_next (long *istart, long *iend)
341 {
342 struct gomp_thread *thr = gomp_thread ();
343
344 switch (thr->ts.work_share->sched)
345 {
346 case GFS_STATIC:
347 case GFS_AUTO:
348 return gomp_loop_static_next (istart, iend);
349 case GFS_DYNAMIC:
350 return gomp_loop_dynamic_next (istart, iend);
351 case GFS_GUIDED:
352 return gomp_loop_guided_next (istart, iend);
353 default:
354 abort ();
355 }
356 }
357
358 /* The *_ordered_*_next routines are called when the thread completes
359 processing of the iteration block currently assigned to it.
360
361 Returns true if there is work remaining to be performed; *ISTART and
362 *IEND are filled with a new iteration block. Returns false if all work
363 has been assigned. */
364
365 static bool
366 gomp_loop_ordered_static_next (long *istart, long *iend)
367 {
368 struct gomp_thread *thr = gomp_thread ();
369 int test;
370
371 gomp_ordered_sync ();
372 gomp_mutex_lock (&thr->ts.work_share->lock);
373 test = gomp_iter_static_next (istart, iend);
374 if (test >= 0)
375 gomp_ordered_static_next ();
376 gomp_mutex_unlock (&thr->ts.work_share->lock);
377
378 return test == 0;
379 }
380
381 static bool
382 gomp_loop_ordered_dynamic_next (long *istart, long *iend)
383 {
384 struct gomp_thread *thr = gomp_thread ();
385 bool ret;
386
387 gomp_ordered_sync ();
388 gomp_mutex_lock (&thr->ts.work_share->lock);
389 ret = gomp_iter_dynamic_next_locked (istart, iend);
390 if (ret)
391 gomp_ordered_next ();
392 else
393 gomp_ordered_last ();
394 gomp_mutex_unlock (&thr->ts.work_share->lock);
395
396 return ret;
397 }
398
399 static bool
400 gomp_loop_ordered_guided_next (long *istart, long *iend)
401 {
402 struct gomp_thread *thr = gomp_thread ();
403 bool ret;
404
405 gomp_ordered_sync ();
406 gomp_mutex_lock (&thr->ts.work_share->lock);
407 ret = gomp_iter_guided_next_locked (istart, iend);
408 if (ret)
409 gomp_ordered_next ();
410 else
411 gomp_ordered_last ();
412 gomp_mutex_unlock (&thr->ts.work_share->lock);
413
414 return ret;
415 }
416
417 bool
418 GOMP_loop_ordered_runtime_next (long *istart, long *iend)
419 {
420 struct gomp_thread *thr = gomp_thread ();
421
422 switch (thr->ts.work_share->sched)
423 {
424 case GFS_STATIC:
425 case GFS_AUTO:
426 return gomp_loop_ordered_static_next (istart, iend);
427 case GFS_DYNAMIC:
428 return gomp_loop_ordered_dynamic_next (istart, iend);
429 case GFS_GUIDED:
430 return gomp_loop_ordered_guided_next (istart, iend);
431 default:
432 abort ();
433 }
434 }
435
436 /* The GOMP_parallel_loop_* routines pre-initialize a work-share construct
437 to avoid one synchronization once we get into the loop. */
438
439 static void
440 gomp_parallel_loop_start (void (*fn) (void *), void *data,
441 unsigned num_threads, long start, long end,
442 long incr, enum gomp_schedule_type sched,
443 long chunk_size, unsigned int flags)
444 {
445 struct gomp_team *team;
446
447 num_threads = gomp_resolve_num_threads (num_threads, 0);
448 team = gomp_new_team (num_threads);
449 gomp_loop_init (&team->work_shares[0], start, end, incr, sched, chunk_size);
450 gomp_team_start (fn, data, num_threads, flags, team);
451 }
452
453 void
454 GOMP_parallel_loop_static_start (void (*fn) (void *), void *data,
455 unsigned num_threads, long start, long end,
456 long incr, long chunk_size)
457 {
458 gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
459 GFS_STATIC, chunk_size, 0);
460 }
461
462 void
463 GOMP_parallel_loop_dynamic_start (void (*fn) (void *), void *data,
464 unsigned num_threads, long start, long end,
465 long incr, long chunk_size)
466 {
467 gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
468 GFS_DYNAMIC, chunk_size, 0);
469 }
470
471 void
472 GOMP_parallel_loop_guided_start (void (*fn) (void *), void *data,
473 unsigned num_threads, long start, long end,
474 long incr, long chunk_size)
475 {
476 gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
477 GFS_GUIDED, chunk_size, 0);
478 }
479
480 void
481 GOMP_parallel_loop_runtime_start (void (*fn) (void *), void *data,
482 unsigned num_threads, long start, long end,
483 long incr)
484 {
485 struct gomp_task_icv *icv = gomp_icv (false);
486 gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
487 icv->run_sched_var, icv->run_sched_modifier, 0);
488 }
489
490 ialias_redirect (GOMP_parallel_end)
491
492 void
493 GOMP_parallel_loop_static (void (*fn) (void *), void *data,
494 unsigned num_threads, long start, long end,
495 long incr, long chunk_size, unsigned flags)
496 {
497 gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
498 GFS_STATIC, chunk_size, flags);
499 fn (data);
500 GOMP_parallel_end ();
501 }
502
503 void
504 GOMP_parallel_loop_dynamic (void (*fn) (void *), void *data,
505 unsigned num_threads, long start, long end,
506 long incr, long chunk_size, unsigned flags)
507 {
508 gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
509 GFS_DYNAMIC, chunk_size, flags);
510 fn (data);
511 GOMP_parallel_end ();
512 }
513
514 void
515 GOMP_parallel_loop_guided (void (*fn) (void *), void *data,
516 unsigned num_threads, long start, long end,
517 long incr, long chunk_size, unsigned flags)
518 {
519 gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
520 GFS_GUIDED, chunk_size, flags);
521 fn (data);
522 GOMP_parallel_end ();
523 }
524
525 void
526 GOMP_parallel_loop_runtime (void (*fn) (void *), void *data,
527 unsigned num_threads, long start, long end,
528 long incr, unsigned flags)
529 {
530 struct gomp_task_icv *icv = gomp_icv (false);
531 gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
532 icv->run_sched_var, icv->run_sched_modifier,
533 flags);
534 fn (data);
535 GOMP_parallel_end ();
536 }
537
538 /* The GOMP_loop_end* routines are called after the thread is told that
539 all loop iterations are complete. The first two versions synchronize
540 all threads; the nowait version does not. */
541
542 void
543 GOMP_loop_end (void)
544 {
545 gomp_work_share_end ();
546 }
547
548 bool
549 GOMP_loop_end_cancel (void)
550 {
551 return gomp_work_share_end_cancel ();
552 }
553
554 void
555 GOMP_loop_end_nowait (void)
556 {
557 gomp_work_share_end_nowait ();
558 }
559
560
561 /* We use static functions above so that we're sure that the "runtime"
562 function can defer to the proper routine without interposition. We
563 export the static function with a strong alias when possible, or with
564 a wrapper function otherwise. */
565
566 #ifdef HAVE_ATTRIBUTE_ALIAS
567 extern __typeof(gomp_loop_static_start) GOMP_loop_static_start
568 __attribute__((alias ("gomp_loop_static_start")));
569 extern __typeof(gomp_loop_dynamic_start) GOMP_loop_dynamic_start
570 __attribute__((alias ("gomp_loop_dynamic_start")));
571 extern __typeof(gomp_loop_guided_start) GOMP_loop_guided_start
572 __attribute__((alias ("gomp_loop_guided_start")));
573
574 extern __typeof(gomp_loop_ordered_static_start) GOMP_loop_ordered_static_start
575 __attribute__((alias ("gomp_loop_ordered_static_start")));
576 extern __typeof(gomp_loop_ordered_dynamic_start) GOMP_loop_ordered_dynamic_start
577 __attribute__((alias ("gomp_loop_ordered_dynamic_start")));
578 extern __typeof(gomp_loop_ordered_guided_start) GOMP_loop_ordered_guided_start
579 __attribute__((alias ("gomp_loop_ordered_guided_start")));
580
581 extern __typeof(gomp_loop_static_next) GOMP_loop_static_next
582 __attribute__((alias ("gomp_loop_static_next")));
583 extern __typeof(gomp_loop_dynamic_next) GOMP_loop_dynamic_next
584 __attribute__((alias ("gomp_loop_dynamic_next")));
585 extern __typeof(gomp_loop_guided_next) GOMP_loop_guided_next
586 __attribute__((alias ("gomp_loop_guided_next")));
587
588 extern __typeof(gomp_loop_ordered_static_next) GOMP_loop_ordered_static_next
589 __attribute__((alias ("gomp_loop_ordered_static_next")));
590 extern __typeof(gomp_loop_ordered_dynamic_next) GOMP_loop_ordered_dynamic_next
591 __attribute__((alias ("gomp_loop_ordered_dynamic_next")));
592 extern __typeof(gomp_loop_ordered_guided_next) GOMP_loop_ordered_guided_next
593 __attribute__((alias ("gomp_loop_ordered_guided_next")));
594 #else
595 bool
596 GOMP_loop_static_start (long start, long end, long incr, long chunk_size,
597 long *istart, long *iend)
598 {
599 return gomp_loop_static_start (start, end, incr, chunk_size, istart, iend);
600 }
601
602 bool
603 GOMP_loop_dynamic_start (long start, long end, long incr, long chunk_size,
604 long *istart, long *iend)
605 {
606 return gomp_loop_dynamic_start (start, end, incr, chunk_size, istart, iend);
607 }
608
609 bool
610 GOMP_loop_guided_start (long start, long end, long incr, long chunk_size,
611 long *istart, long *iend)
612 {
613 return gomp_loop_guided_start (start, end, incr, chunk_size, istart, iend);
614 }
615
616 bool
617 GOMP_loop_ordered_static_start (long start, long end, long incr,
618 long chunk_size, long *istart, long *iend)
619 {
620 return gomp_loop_ordered_static_start (start, end, incr, chunk_size,
621 istart, iend);
622 }
623
624 bool
625 GOMP_loop_ordered_dynamic_start (long start, long end, long incr,
626 long chunk_size, long *istart, long *iend)
627 {
628 return gomp_loop_ordered_dynamic_start (start, end, incr, chunk_size,
629 istart, iend);
630 }
631
632 bool
633 GOMP_loop_ordered_guided_start (long start, long end, long incr,
634 long chunk_size, long *istart, long *iend)
635 {
636 return gomp_loop_ordered_guided_start (start, end, incr, chunk_size,
637 istart, iend);
638 }
639
640 bool
641 GOMP_loop_static_next (long *istart, long *iend)
642 {
643 return gomp_loop_static_next (istart, iend);
644 }
645
646 bool
647 GOMP_loop_dynamic_next (long *istart, long *iend)
648 {
649 return gomp_loop_dynamic_next (istart, iend);
650 }
651
652 bool
653 GOMP_loop_guided_next (long *istart, long *iend)
654 {
655 return gomp_loop_guided_next (istart, iend);
656 }
657
658 bool
659 GOMP_loop_ordered_static_next (long *istart, long *iend)
660 {
661 return gomp_loop_ordered_static_next (istart, iend);
662 }
663
664 bool
665 GOMP_loop_ordered_dynamic_next (long *istart, long *iend)
666 {
667 return gomp_loop_ordered_dynamic_next (istart, iend);
668 }
669
670 bool
671 GOMP_loop_ordered_guided_next (long *istart, long *iend)
672 {
673 return gomp_loop_ordered_guided_next (istart, iend);
674 }
675 #endif