1 /* Copyright (C) 2005-2020 Free Software Foundation, Inc.
2 Contributed by Richard Henderson <rth@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file handles the maintenance of threads in response to team
27 creation and termination. */
34 #ifdef LIBGOMP_USE_PTHREADS
35 pthread_attr_t gomp_thread_attr
;
37 /* This key is for the thread destructor. */
38 pthread_key_t gomp_thread_destructor
;
41 /* This is the libgomp per-thread data structure. */
42 #if defined HAVE_TLS || defined USE_EMUTLS
43 __thread
struct gomp_thread gomp_tls_data
;
45 pthread_key_t gomp_tls_key
;
49 /* This structure is used to communicate across pthread_create. */
51 struct gomp_thread_start_data
55 struct gomp_team_state ts
;
56 struct gomp_task
*task
;
57 struct gomp_thread_pool
*thread_pool
;
64 /* This function is a pthread_create entry point. This contains the idle
65 loop in which a thread waits to be called up to become part of a team. */
68 gomp_thread_start (void *xdata
)
70 struct gomp_thread_start_data
*data
= xdata
;
71 struct gomp_thread
*thr
;
72 struct gomp_thread_pool
*pool
;
73 void (*local_fn
) (void *);
76 #if defined HAVE_TLS || defined USE_EMUTLS
79 struct gomp_thread local_thr
;
81 pthread_setspecific (gomp_tls_key
, thr
);
83 gomp_sem_init (&thr
->release
, 0);
85 /* Extract what we need from data. */
87 local_data
= data
->fn_data
;
88 thr
->thread_pool
= data
->thread_pool
;
90 thr
->task
= data
->task
;
91 thr
->place
= data
->place
;
92 #ifdef GOMP_NEEDS_THREAD_HANDLE
93 thr
->handle
= data
->handle
;
96 thr
->ts
.team
->ordered_release
[thr
->ts
.team_id
] = &thr
->release
;
98 /* Make thread pool local. */
99 pool
= thr
->thread_pool
;
103 struct gomp_team
*team
= thr
->ts
.team
;
104 struct gomp_task
*task
= thr
->task
;
106 gomp_barrier_wait (&team
->barrier
);
108 local_fn (local_data
);
109 gomp_team_barrier_wait_final (&team
->barrier
);
110 gomp_finish_task (task
);
111 gomp_barrier_wait_last (&team
->barrier
);
115 pool
->threads
[thr
->ts
.team_id
] = thr
;
117 gomp_simple_barrier_wait (&pool
->threads_dock
);
120 struct gomp_team
*team
= thr
->ts
.team
;
121 struct gomp_task
*task
= thr
->task
;
123 local_fn (local_data
);
124 gomp_team_barrier_wait_final (&team
->barrier
);
125 gomp_finish_task (task
);
127 gomp_simple_barrier_wait (&pool
->threads_dock
);
130 local_data
= thr
->data
;
136 gomp_sem_destroy (&thr
->release
);
137 pthread_detach (pthread_self ());
138 thr
->thread_pool
= NULL
;
144 static inline struct gomp_team
*
145 get_last_team (unsigned nthreads
)
147 struct gomp_thread
*thr
= gomp_thread ();
148 if (thr
->ts
.team
== NULL
)
150 struct gomp_thread_pool
*pool
= gomp_get_thread_pool (thr
, nthreads
);
151 struct gomp_team
*last_team
= pool
->last_team
;
152 if (last_team
!= NULL
&& last_team
->nthreads
== nthreads
)
154 pool
->last_team
= NULL
;
161 /* Create a new team data structure. */
164 gomp_new_team (unsigned nthreads
)
166 struct gomp_team
*team
;
169 team
= get_last_team (nthreads
);
172 size_t extra
= sizeof (team
->ordered_release
[0])
173 + sizeof (team
->implicit_task
[0]);
174 team
= team_malloc (sizeof (*team
) + nthreads
* extra
);
176 #ifndef HAVE_SYNC_BUILTINS
177 gomp_mutex_init (&team
->work_share_list_free_lock
);
179 gomp_barrier_init (&team
->barrier
, nthreads
);
180 gomp_mutex_init (&team
->task_lock
);
182 team
->nthreads
= nthreads
;
185 team
->work_share_chunk
= 8;
186 #ifdef HAVE_SYNC_BUILTINS
187 team
->single_count
= 0;
189 team
->work_shares_to_free
= &team
->work_shares
[0];
190 gomp_init_work_share (&team
->work_shares
[0], 0, nthreads
);
191 team
->work_shares
[0].next_alloc
= NULL
;
192 team
->work_share_list_free
= NULL
;
193 team
->work_share_list_alloc
= &team
->work_shares
[1];
194 for (i
= 1; i
< 7; i
++)
195 team
->work_shares
[i
].next_free
= &team
->work_shares
[i
+ 1];
196 team
->work_shares
[i
].next_free
= NULL
;
198 gomp_sem_init (&team
->master_release
, 0);
199 team
->ordered_release
= (void *) &team
->implicit_task
[nthreads
];
200 team
->ordered_release
[0] = &team
->master_release
;
202 priority_queue_init (&team
->task_queue
);
203 team
->task_count
= 0;
204 team
->task_queued_count
= 0;
205 team
->task_running_count
= 0;
206 team
->work_share_cancelled
= 0;
207 team
->team_cancelled
= 0;
213 /* Free a team data structure. */
216 free_team (struct gomp_team
*team
)
218 #ifndef HAVE_SYNC_BUILTINS
219 gomp_mutex_destroy (&team
->work_share_list_free_lock
);
221 gomp_barrier_destroy (&team
->barrier
);
222 gomp_mutex_destroy (&team
->task_lock
);
223 priority_queue_free (&team
->task_queue
);
228 gomp_free_pool_helper (void *thread_pool
)
230 struct gomp_thread
*thr
= gomp_thread ();
231 struct gomp_thread_pool
*pool
232 = (struct gomp_thread_pool
*) thread_pool
;
233 gomp_simple_barrier_wait_last (&pool
->threads_dock
);
234 gomp_sem_destroy (&thr
->release
);
235 thr
->thread_pool
= NULL
;
237 #ifdef LIBGOMP_USE_PTHREADS
238 pthread_detach (pthread_self ());
240 #elif defined(__nvptx__)
242 #elif defined(__AMDGCN__)
243 asm ("s_dcache_wb\n\t"
246 #error gomp_free_pool_helper must terminate the thread
250 /* Free a thread pool and release its threads. */
253 gomp_free_thread (void *arg
__attribute__((unused
)))
255 struct gomp_thread
*thr
= gomp_thread ();
256 struct gomp_thread_pool
*pool
= thr
->thread_pool
;
259 if (pool
->threads_used
> 0)
262 for (i
= 1; i
< pool
->threads_used
; i
++)
264 struct gomp_thread
*nthr
= pool
->threads
[i
];
265 nthr
->fn
= gomp_free_pool_helper
;
268 /* This barrier undocks threads docked on pool->threads_dock. */
269 gomp_simple_barrier_wait (&pool
->threads_dock
);
270 /* And this waits till all threads have called gomp_barrier_wait_last
271 in gomp_free_pool_helper. */
272 gomp_simple_barrier_wait (&pool
->threads_dock
);
273 /* Now it is safe to destroy the barrier and free the pool. */
274 gomp_simple_barrier_destroy (&pool
->threads_dock
);
276 #ifdef HAVE_SYNC_BUILTINS
277 __sync_fetch_and_add (&gomp_managed_threads
,
278 1L - pool
->threads_used
);
280 gomp_mutex_lock (&gomp_managed_threads_lock
);
281 gomp_managed_threads
-= pool
->threads_used
- 1L;
282 gomp_mutex_unlock (&gomp_managed_threads_lock
);
286 free_team (pool
->last_team
);
288 team_free (pool
->threads
);
291 thr
->thread_pool
= NULL
;
293 if (thr
->ts
.level
== 0 && __builtin_expect (thr
->ts
.team
!= NULL
, 0))
295 if (thr
->task
!= NULL
)
297 struct gomp_task
*task
= thr
->task
;
305 #ifdef LIBGOMP_USE_PTHREADS
307 gomp_team_start (void (*fn
) (void *), void *data
, unsigned nthreads
,
308 unsigned flags
, struct gomp_team
*team
,
309 struct gomp_taskgroup
*taskgroup
)
311 struct gomp_thread_start_data
*start_data
;
312 struct gomp_thread
*thr
, *nthr
;
313 struct gomp_task
*task
;
314 struct gomp_task_icv
*icv
;
316 struct gomp_thread_pool
*pool
;
317 unsigned i
, n
, old_threads_used
= 0;
318 pthread_attr_t thread_attr
, *attr
;
319 unsigned long nthreads_var
;
321 unsigned int s
= 0, rest
= 0, p
= 0, k
= 0;
322 unsigned int affinity_count
= 0;
323 struct gomp_thread
**affinity_thr
= NULL
;
324 bool force_display
= false;
326 thr
= gomp_thread ();
327 nested
= thr
->ts
.level
;
328 pool
= thr
->thread_pool
;
330 icv
= task
? &task
->icv
: &gomp_global_icv
;
331 if (__builtin_expect (gomp_places_list
!= NULL
, 0) && thr
->place
== 0)
333 gomp_init_affinity ();
334 if (__builtin_expect (gomp_display_affinity_var
, 0) && nthreads
== 1)
335 gomp_display_affinity_thread (gomp_thread_self (), &thr
->ts
,
339 /* Always save the previous state, even if this isn't a nested team.
340 In particular, we should save any work share state from an outer
341 orphaned work share construct. */
342 team
->prev_ts
= thr
->ts
;
348 ++thr
->ts
.active_level
;
349 thr
->ts
.work_share
= &team
->work_shares
[0];
350 thr
->ts
.last_work_share
= NULL
;
351 #ifdef HAVE_SYNC_BUILTINS
352 thr
->ts
.single_count
= 0;
354 thr
->ts
.static_trip
= 0;
355 thr
->task
= &team
->implicit_task
[0];
356 #ifdef GOMP_NEEDS_THREAD_HANDLE
357 thr
->handle
= pthread_self ();
359 nthreads_var
= icv
->nthreads_var
;
360 if (__builtin_expect (gomp_nthreads_var_list
!= NULL
, 0)
361 && thr
->ts
.level
< gomp_nthreads_var_list_len
)
362 nthreads_var
= gomp_nthreads_var_list
[thr
->ts
.level
];
363 bind_var
= icv
->bind_var
;
364 if (bind_var
!= omp_proc_bind_false
&& (flags
& 7) != omp_proc_bind_false
)
365 bind_var
= flags
& 7;
367 if (__builtin_expect (gomp_bind_var_list
!= NULL
, 0)
368 && thr
->ts
.level
< gomp_bind_var_list_len
)
369 bind_var
= gomp_bind_var_list
[thr
->ts
.level
];
370 gomp_init_task (thr
->task
, task
, icv
);
371 thr
->task
->taskgroup
= taskgroup
;
372 team
->implicit_task
[0].icv
.nthreads_var
= nthreads_var
;
373 team
->implicit_task
[0].icv
.bind_var
= bind_var
;
380 if (__builtin_expect (gomp_places_list
!= NULL
, 0))
382 /* Depending on chosen proc_bind model, set subpartition
383 for the master thread and initialize helper variables
384 P and optionally S, K and/or REST used by later place
385 computation for each additional thread. */
389 case omp_proc_bind_true
:
390 case omp_proc_bind_close
:
391 if (nthreads
> thr
->ts
.place_partition_len
)
393 /* T > P. S threads will be placed in each place,
394 and the final REM threads placed one by one
395 into the already occupied places. */
396 s
= nthreads
/ thr
->ts
.place_partition_len
;
397 rest
= nthreads
% thr
->ts
.place_partition_len
;
403 case omp_proc_bind_master
:
404 /* Each thread will be bound to master's place. */
406 case omp_proc_bind_spread
:
407 if (nthreads
<= thr
->ts
.place_partition_len
)
409 /* T <= P. Each subpartition will have in between s
410 and s+1 places (subpartitions starting at or
411 after rest will have s places, earlier s+1 places),
412 each thread will be bound to the first place in
413 its subpartition (except for the master thread
414 that can be bound to another place in its
416 s
= thr
->ts
.place_partition_len
/ nthreads
;
417 rest
= thr
->ts
.place_partition_len
% nthreads
;
418 rest
= (s
+ 1) * rest
+ thr
->ts
.place_partition_off
;
421 p
-= (p
- thr
->ts
.place_partition_off
) % (s
+ 1);
422 thr
->ts
.place_partition_len
= s
+ 1;
427 thr
->ts
.place_partition_len
= s
;
429 thr
->ts
.place_partition_off
= p
;
433 /* T > P. Each subpartition will have just a single
434 place and we'll place between s and s+1
435 threads into each subpartition. */
436 s
= nthreads
/ thr
->ts
.place_partition_len
;
437 rest
= nthreads
% thr
->ts
.place_partition_len
;
438 thr
->ts
.place_partition_off
= p
;
439 thr
->ts
.place_partition_len
= 1;
446 bind
= omp_proc_bind_false
;
448 /* We only allow the reuse of idle threads for non-nested PARALLEL
449 regions. This appears to be implied by the semantics of
450 threadprivate variables, but perhaps that's reading too much into
451 things. Certainly it does prevent any locking problems, since
452 only the initial program thread will modify gomp_threads. */
455 old_threads_used
= pool
->threads_used
;
457 if (nthreads
<= old_threads_used
)
459 else if (old_threads_used
== 0)
462 gomp_simple_barrier_init (&pool
->threads_dock
, nthreads
);
466 n
= old_threads_used
;
468 /* Increase the barrier threshold to make sure all new
469 threads arrive before the team is released. */
470 gomp_simple_barrier_reinit (&pool
->threads_dock
, nthreads
);
473 /* Not true yet, but soon will be. We're going to release all
474 threads from the dock, and those that aren't part of the
476 pool
->threads_used
= nthreads
;
478 /* If necessary, expand the size of the gomp_threads array. It is
479 expected that changes in the number of threads are rare, thus we
480 make no effort to expand gomp_threads_size geometrically. */
481 if (nthreads
>= pool
->threads_size
)
483 pool
->threads_size
= nthreads
+ 1;
485 = gomp_realloc (pool
->threads
,
487 * sizeof (struct gomp_thread
*));
488 /* Add current (master) thread to threads[]. */
489 pool
->threads
[0] = thr
;
492 /* Release existing idle threads. */
495 unsigned int place_partition_off
= thr
->ts
.place_partition_off
;
496 unsigned int place_partition_len
= thr
->ts
.place_partition_len
;
497 unsigned int place
= 0;
498 if (__builtin_expect (gomp_places_list
!= NULL
, 0))
502 case omp_proc_bind_true
:
503 case omp_proc_bind_close
:
507 if (p
== (team
->prev_ts
.place_partition_off
508 + team
->prev_ts
.place_partition_len
))
509 p
= team
->prev_ts
.place_partition_off
;
511 if (i
== nthreads
- rest
)
517 case omp_proc_bind_master
:
519 case omp_proc_bind_spread
:
527 if (p
== (team
->prev_ts
.place_partition_off
528 + team
->prev_ts
.place_partition_len
))
529 p
= team
->prev_ts
.place_partition_off
;
530 place_partition_off
= p
;
532 place_partition_len
= s
+ 1;
534 place_partition_len
= s
;
542 if (p
== (team
->prev_ts
.place_partition_off
543 + team
->prev_ts
.place_partition_len
))
544 p
= team
->prev_ts
.place_partition_off
;
546 if (i
== nthreads
- rest
)
551 place_partition_off
= p
;
552 place_partition_len
= 1;
556 if (affinity_thr
!= NULL
557 || (bind
!= omp_proc_bind_true
558 && pool
->threads
[i
]->place
!= p
+ 1)
559 || pool
->threads
[i
]->place
<= place_partition_off
560 || pool
->threads
[i
]->place
> (place_partition_off
561 + place_partition_len
))
564 force_display
= true;
565 if (affinity_thr
== NULL
)
569 if (team
->prev_ts
.place_partition_len
> 64)
571 = gomp_malloc (team
->prev_ts
.place_partition_len
572 * sizeof (struct gomp_thread
*));
575 = gomp_alloca (team
->prev_ts
.place_partition_len
576 * sizeof (struct gomp_thread
*));
577 memset (affinity_thr
, '\0',
578 team
->prev_ts
.place_partition_len
579 * sizeof (struct gomp_thread
*));
580 for (j
= i
; j
< old_threads_used
; j
++)
582 if (pool
->threads
[j
]->place
583 > team
->prev_ts
.place_partition_off
584 && (pool
->threads
[j
]->place
585 <= (team
->prev_ts
.place_partition_off
586 + team
->prev_ts
.place_partition_len
)))
588 l
= pool
->threads
[j
]->place
- 1
589 - team
->prev_ts
.place_partition_off
;
590 pool
->threads
[j
]->data
= affinity_thr
[l
];
591 affinity_thr
[l
] = pool
->threads
[j
];
593 pool
->threads
[j
] = NULL
;
595 if (nthreads
> old_threads_used
)
596 memset (&pool
->threads
[old_threads_used
],
597 '\0', ((nthreads
- old_threads_used
)
598 * sizeof (struct gomp_thread
*)));
600 affinity_count
= old_threads_used
- i
;
602 if (affinity_count
== 0)
605 if (affinity_thr
[l
- team
->prev_ts
.place_partition_off
]
608 if (bind
!= omp_proc_bind_true
)
610 for (l
= place_partition_off
;
611 l
< place_partition_off
+ place_partition_len
;
613 if (affinity_thr
[l
- team
->prev_ts
.place_partition_off
]
616 if (l
== place_partition_off
+ place_partition_len
)
619 nthr
= affinity_thr
[l
- team
->prev_ts
.place_partition_off
];
620 affinity_thr
[l
- team
->prev_ts
.place_partition_off
]
621 = (struct gomp_thread
*) nthr
->data
;
623 pool
->threads
[i
] = nthr
;
626 nthr
= pool
->threads
[i
];
630 nthr
= pool
->threads
[i
];
631 nthr
->ts
.team
= team
;
632 nthr
->ts
.work_share
= &team
->work_shares
[0];
633 nthr
->ts
.last_work_share
= NULL
;
634 nthr
->ts
.team_id
= i
;
635 nthr
->ts
.level
= team
->prev_ts
.level
+ 1;
636 nthr
->ts
.active_level
= thr
->ts
.active_level
;
637 nthr
->ts
.place_partition_off
= place_partition_off
;
638 nthr
->ts
.place_partition_len
= place_partition_len
;
639 nthr
->ts
.def_allocator
= thr
->ts
.def_allocator
;
640 #ifdef HAVE_SYNC_BUILTINS
641 nthr
->ts
.single_count
= 0;
643 nthr
->ts
.static_trip
= 0;
644 nthr
->task
= &team
->implicit_task
[i
];
646 gomp_init_task (nthr
->task
, task
, icv
);
647 team
->implicit_task
[i
].icv
.nthreads_var
= nthreads_var
;
648 team
->implicit_task
[i
].icv
.bind_var
= bind_var
;
649 nthr
->task
->taskgroup
= taskgroup
;
652 team
->ordered_release
[i
] = &nthr
->release
;
655 if (__builtin_expect (affinity_thr
!= NULL
, 0))
657 /* If AFFINITY_THR is non-NULL just because we had to
658 permute some threads in the pool, but we've managed
659 to find exactly as many old threads as we'd find
660 without affinity, we don't need to handle this
661 specially anymore. */
662 if (nthreads
<= old_threads_used
663 ? (affinity_count
== old_threads_used
- nthreads
)
664 : (i
== old_threads_used
))
666 if (team
->prev_ts
.place_partition_len
> 64)
674 /* We are going to compute the places/subpartitions
675 again from the beginning. So, we need to reinitialize
676 vars modified by the switch (bind) above inside
677 of the loop, to the state they had after the initial
681 case omp_proc_bind_true
:
682 case omp_proc_bind_close
:
683 if (nthreads
> thr
->ts
.place_partition_len
)
684 /* T > P. S has been changed, so needs
686 s
= nthreads
/ thr
->ts
.place_partition_len
;
690 case omp_proc_bind_master
:
691 /* No vars have been changed. */
693 case omp_proc_bind_spread
:
694 p
= thr
->ts
.place_partition_off
;
698 s
= nthreads
/ team
->prev_ts
.place_partition_len
;
704 /* Increase the barrier threshold to make sure all new
705 threads and all the threads we're going to let die
706 arrive before the team is released. */
708 gomp_simple_barrier_reinit (&pool
->threads_dock
,
709 nthreads
+ affinity_count
);
718 if (__builtin_expect (nthreads
+ affinity_count
> old_threads_used
, 0))
720 long diff
= (long) (nthreads
+ affinity_count
) - (long) old_threads_used
;
722 if (old_threads_used
== 0)
725 #ifdef HAVE_SYNC_BUILTINS
726 __sync_fetch_and_add (&gomp_managed_threads
, diff
);
728 gomp_mutex_lock (&gomp_managed_threads_lock
);
729 gomp_managed_threads
+= diff
;
730 gomp_mutex_unlock (&gomp_managed_threads_lock
);
734 attr
= &gomp_thread_attr
;
735 if (__builtin_expect (gomp_places_list
!= NULL
, 0))
738 pthread_attr_init (&thread_attr
);
739 if (! pthread_attr_getstacksize (&gomp_thread_attr
, &stacksize
))
740 pthread_attr_setstacksize (&thread_attr
, stacksize
);
744 start_data
= gomp_alloca (sizeof (struct gomp_thread_start_data
)
747 /* Launch new threads. */
748 for (; i
< nthreads
; ++i
)
752 start_data
->ts
.place_partition_off
= thr
->ts
.place_partition_off
;
753 start_data
->ts
.place_partition_len
= thr
->ts
.place_partition_len
;
754 start_data
->place
= 0;
755 if (__builtin_expect (gomp_places_list
!= NULL
, 0))
759 case omp_proc_bind_true
:
760 case omp_proc_bind_close
:
764 if (p
== (team
->prev_ts
.place_partition_off
765 + team
->prev_ts
.place_partition_len
))
766 p
= team
->prev_ts
.place_partition_off
;
768 if (i
== nthreads
- rest
)
774 case omp_proc_bind_master
:
776 case omp_proc_bind_spread
:
784 if (p
== (team
->prev_ts
.place_partition_off
785 + team
->prev_ts
.place_partition_len
))
786 p
= team
->prev_ts
.place_partition_off
;
787 start_data
->ts
.place_partition_off
= p
;
789 start_data
->ts
.place_partition_len
= s
+ 1;
791 start_data
->ts
.place_partition_len
= s
;
799 if (p
== (team
->prev_ts
.place_partition_off
800 + team
->prev_ts
.place_partition_len
))
801 p
= team
->prev_ts
.place_partition_off
;
803 if (i
== nthreads
- rest
)
808 start_data
->ts
.place_partition_off
= p
;
809 start_data
->ts
.place_partition_len
= 1;
813 start_data
->place
= p
+ 1;
814 if (affinity_thr
!= NULL
&& pool
->threads
[i
] != NULL
)
816 gomp_init_thread_affinity (attr
, p
);
820 start_data
->fn_data
= data
;
821 start_data
->ts
.team
= team
;
822 start_data
->ts
.work_share
= &team
->work_shares
[0];
823 start_data
->ts
.last_work_share
= NULL
;
824 start_data
->ts
.team_id
= i
;
825 start_data
->ts
.level
= team
->prev_ts
.level
+ 1;
826 start_data
->ts
.active_level
= thr
->ts
.active_level
;
827 start_data
->ts
.def_allocator
= thr
->ts
.def_allocator
;
828 #ifdef HAVE_SYNC_BUILTINS
829 start_data
->ts
.single_count
= 0;
831 start_data
->ts
.static_trip
= 0;
832 start_data
->task
= &team
->implicit_task
[i
];
833 gomp_init_task (start_data
->task
, task
, icv
);
834 team
->implicit_task
[i
].icv
.nthreads_var
= nthreads_var
;
835 team
->implicit_task
[i
].icv
.bind_var
= bind_var
;
836 start_data
->task
->taskgroup
= taskgroup
;
837 start_data
->thread_pool
= pool
;
838 start_data
->nested
= nested
;
840 attr
= gomp_adjust_thread_attr (attr
, &thread_attr
);
841 err
= pthread_create (&start_data
->handle
, attr
, gomp_thread_start
,
845 gomp_fatal ("Thread creation failed: %s", strerror (err
));
848 if (__builtin_expect (attr
== &thread_attr
, 0))
849 pthread_attr_destroy (&thread_attr
);
853 gomp_barrier_wait (&team
->barrier
);
855 gomp_simple_barrier_wait (&pool
->threads_dock
);
857 /* Decrease the barrier threshold to match the number of threads
858 that should arrive back at the end of this team. The extra
859 threads should be exiting. Note that we arrange for this test
860 to never be true for nested teams. If AFFINITY_COUNT is non-zero,
861 the barrier as well as gomp_managed_threads was temporarily
862 set to NTHREADS + AFFINITY_COUNT. For NTHREADS < OLD_THREADS_COUNT,
863 AFFINITY_COUNT if non-zero will be always at least
864 OLD_THREADS_COUNT - NTHREADS. */
865 if (__builtin_expect (nthreads
< old_threads_used
, 0)
866 || __builtin_expect (affinity_count
, 0))
868 long diff
= (long) nthreads
- (long) old_threads_used
;
871 diff
= -affinity_count
;
873 gomp_simple_barrier_reinit (&pool
->threads_dock
, nthreads
);
875 #ifdef HAVE_SYNC_BUILTINS
876 __sync_fetch_and_add (&gomp_managed_threads
, diff
);
878 gomp_mutex_lock (&gomp_managed_threads_lock
);
879 gomp_managed_threads
+= diff
;
880 gomp_mutex_unlock (&gomp_managed_threads_lock
);
883 if (__builtin_expect (gomp_display_affinity_var
, 0))
886 || nthreads
!= old_threads_used
889 gomp_display_affinity_thread (gomp_thread_self (), &thr
->ts
,
893 start_data
-= nthreads
- 1;
894 for (i
= 1; i
< nthreads
; ++i
)
896 gomp_display_affinity_thread (
897 #ifdef LIBGOMP_USE_PTHREADS
909 for (i
= 1; i
< nthreads
; ++i
)
911 gomp_thread_handle handle
912 = gomp_thread_to_pthread_t (pool
->threads
[i
]);
913 gomp_display_affinity_thread (handle
, &pool
->threads
[i
]->ts
,
914 pool
->threads
[i
]->place
);
919 if (__builtin_expect (affinity_thr
!= NULL
, 0)
920 && team
->prev_ts
.place_partition_len
> 64)
926 /* Terminate the current team. This is only to be called by the master
927 thread. We assume that we must wait for the other threads. */
932 struct gomp_thread
*thr
= gomp_thread ();
933 struct gomp_team
*team
= thr
->ts
.team
;
935 /* This barrier handles all pending explicit threads.
936 As #pragma omp cancel parallel might get awaited count in
937 team->barrier in a inconsistent state, we need to use a different
939 gomp_team_barrier_wait_final (&team
->barrier
);
940 if (__builtin_expect (team
->team_cancelled
, 0))
942 struct gomp_work_share
*ws
= team
->work_shares_to_free
;
945 struct gomp_work_share
*next_ws
= gomp_ptrlock_get (&ws
->next_ws
);
947 gomp_ptrlock_set (&ws
->next_ws
, ws
);
948 gomp_fini_work_share (ws
);
954 gomp_fini_work_share (thr
->ts
.work_share
);
957 thr
->ts
= team
->prev_ts
;
959 if (__builtin_expect (thr
->ts
.level
!= 0, 0))
961 #ifdef HAVE_SYNC_BUILTINS
962 __sync_fetch_and_add (&gomp_managed_threads
, 1L - team
->nthreads
);
964 gomp_mutex_lock (&gomp_managed_threads_lock
);
965 gomp_managed_threads
-= team
->nthreads
- 1L;
966 gomp_mutex_unlock (&gomp_managed_threads_lock
);
968 /* This barrier has gomp_barrier_wait_last counterparts
969 and ensures the team can be safely destroyed. */
970 gomp_barrier_wait (&team
->barrier
);
973 if (__builtin_expect (team
->work_shares
[0].next_alloc
!= NULL
, 0))
975 struct gomp_work_share
*ws
= team
->work_shares
[0].next_alloc
;
978 struct gomp_work_share
*next_ws
= ws
->next_alloc
;
984 gomp_sem_destroy (&team
->master_release
);
986 if (__builtin_expect (thr
->ts
.team
!= NULL
, 0)
987 || __builtin_expect (team
->nthreads
== 1, 0))
991 struct gomp_thread_pool
*pool
= thr
->thread_pool
;
993 free_team (pool
->last_team
);
994 pool
->last_team
= team
;
995 gomp_release_thread_pool (pool
);
999 #ifdef LIBGOMP_USE_PTHREADS
1001 /* Constructors for this file. */
1003 static void __attribute__((constructor
))
1004 initialize_team (void)
1006 #if !defined HAVE_TLS && !defined USE_EMUTLS
1007 static struct gomp_thread initial_thread_tls_data
;
1009 pthread_key_create (&gomp_tls_key
, NULL
);
1010 pthread_setspecific (gomp_tls_key
, &initial_thread_tls_data
);
1013 if (pthread_key_create (&gomp_thread_destructor
, gomp_free_thread
) != 0)
1014 gomp_fatal ("could not create thread pool destructor.");
1017 static void __attribute__((destructor
))
1018 team_destructor (void)
1020 /* Without this dlclose on libgomp could lead to subsequent
1022 pthread_key_delete (gomp_thread_destructor
);
1025 /* Similar to gomp_free_pool_helper, but don't detach itself,
1026 gomp_pause_host will pthread_join those threads. */
1029 gomp_pause_pool_helper (void *thread_pool
)
1031 struct gomp_thread
*thr
= gomp_thread ();
1032 struct gomp_thread_pool
*pool
1033 = (struct gomp_thread_pool
*) thread_pool
;
1034 gomp_simple_barrier_wait_last (&pool
->threads_dock
);
1035 gomp_sem_destroy (&thr
->release
);
1036 thr
->thread_pool
= NULL
;
1038 pthread_exit (NULL
);
1041 /* Free a thread pool and release its threads. Return non-zero on
1045 gomp_pause_host (void)
1047 struct gomp_thread
*thr
= gomp_thread ();
1048 struct gomp_thread_pool
*pool
= thr
->thread_pool
;
1053 if (pool
->threads_used
> 0)
1057 = gomp_alloca (sizeof (pthread_t
) * pool
->threads_used
);
1058 for (i
= 1; i
< pool
->threads_used
; i
++)
1060 struct gomp_thread
*nthr
= pool
->threads
[i
];
1061 nthr
->fn
= gomp_pause_pool_helper
;
1063 thrs
[i
] = gomp_thread_to_pthread_t (nthr
);
1065 /* This barrier undocks threads docked on pool->threads_dock. */
1066 gomp_simple_barrier_wait (&pool
->threads_dock
);
1067 /* And this waits till all threads have called gomp_barrier_wait_last
1068 in gomp_pause_pool_helper. */
1069 gomp_simple_barrier_wait (&pool
->threads_dock
);
1070 /* Now it is safe to destroy the barrier and free the pool. */
1071 gomp_simple_barrier_destroy (&pool
->threads_dock
);
1073 #ifdef HAVE_SYNC_BUILTINS
1074 __sync_fetch_and_add (&gomp_managed_threads
,
1075 1L - pool
->threads_used
);
1077 gomp_mutex_lock (&gomp_managed_threads_lock
);
1078 gomp_managed_threads
-= pool
->threads_used
- 1L;
1079 gomp_mutex_unlock (&gomp_managed_threads_lock
);
1081 for (i
= 1; i
< pool
->threads_used
; i
++)
1082 pthread_join (thrs
[i
], NULL
);
1084 if (pool
->last_team
)
1085 free_team (pool
->last_team
);
1087 team_free (pool
->threads
);
1090 thr
->thread_pool
= NULL
;
1096 struct gomp_task_icv
*
1099 struct gomp_thread
*thr
= gomp_thread ();
1100 struct gomp_task
*task
= gomp_malloc (sizeof (struct gomp_task
));
1101 gomp_init_task (task
, NULL
, &gomp_global_icv
);
1103 #ifdef LIBGOMP_USE_PTHREADS
1104 pthread_setspecific (gomp_thread_destructor
, thr
);