initial commit
[glibc.git] / fbtl / allocatestack.c
1 /* Copyright (C) 2002-2013 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
18
19 #include <assert.h>
20 #include <errno.h>
21 #include <signal.h>
22 #include <stdint.h>
23 #include <string.h>
24 #include <unistd.h>
25 #include <sys/mman.h>
26 #include <sys/param.h>
27 #include <dl-sysdep.h>
28 #include <dl-tls.h>
29 #include <tls.h>
30 #include <list.h>
31 #include <lowlevellock.h>
32 #include <kernel-features.h>
33 #include <stack-aliasing.h>
34
35
36 #if !(defined(NEED_SEPARATE_REGISTER_STACK) || defined(NEED_STACK_SIZE_FOR_PTH_CREATE))
37
38 /* Most architectures have exactly one stack pointer. Some have more. */
39 # define STACK_VARIABLES void *stackaddr = NULL
40
41 /* How to pass the values to the 'create_thread' function. */
42 # define STACK_VARIABLES_ARGS stackaddr
43
44 /* How to declare function which gets there parameters. */
45 # define STACK_VARIABLES_PARMS void *stackaddr
46
47 /* How to declare allocate_stack. */
48 # define ALLOCATE_STACK_PARMS void **stack
49
50 /* This is how the function is called. We do it this way to allow
51 other variants of the function to have more parameters. */
52 # define ALLOCATE_STACK(attr, pd) allocate_stack (attr, pd, &stackaddr)
53
54 #else
55
56 /* We need two stacks. The kernel will place them but we have to tell
57 the kernel about the size of the reserved address space. */
58 # define STACK_VARIABLES void *stackaddr = NULL; size_t stacksize = 0
59
60 /* How to pass the values to the 'create_thread' function. */
61 # define STACK_VARIABLES_ARGS stackaddr, stacksize
62
63 /* How to declare function which gets there parameters. */
64 # define STACK_VARIABLES_PARMS void *stackaddr, size_t stacksize
65
66 /* How to declare allocate_stack. */
67 # define ALLOCATE_STACK_PARMS void **stack, size_t *stacksize
68
69 /* This is how the function is called. We do it this way to allow
70 other variants of the function to have more parameters. */
71 # define ALLOCATE_STACK(attr, pd) \
72 allocate_stack (attr, pd, &stackaddr, &stacksize)
73
74 #endif
75
76
77 /* Default alignment of stack. */
78 #ifndef STACK_ALIGN
79 # define STACK_ALIGN __alignof__ (long double)
80 #endif
81
82 /* Default value for minimal stack size after allocating thread
83 descriptor and guard. */
84 #ifndef MINIMAL_REST_STACK
85 # define MINIMAL_REST_STACK 4096
86 #endif
87
88 /*
89 Unfortunately, under FreeBSD mmap fails with addr=NULL, flags=MAP_STACK
90
91 See http://www.freebsd.org/cgi/query-pr.cgi?pr=158755
92
93 do not use MAP_STACK at all
94 */
95
96 #undef MAP_STACK
97
98
99 /* Newer kernels have the MAP_STACK flag to indicate a mapping is used for
100 a stack. Use it when possible. */
101 #ifndef MAP_STACK
102 # define MAP_STACK 0
103 #endif
104
105 /* This yields the pointer that TLS support code calls the thread pointer. */
106 #if TLS_TCB_AT_TP
107 # define TLS_TPADJ(pd) (pd)
108 #elif TLS_DTV_AT_TP
109 # define TLS_TPADJ(pd) ((struct pthread *)((char *) (pd) + TLS_PRE_TCB_SIZE))
110 #endif
111
112 /* Cache handling for not-yet free stacks. */
113
114 /* Maximum size in kB of cache. */
115 static size_t stack_cache_maxsize = 40 * 1024 * 1024; /* 40MiBi by default. */
116 static size_t stack_cache_actsize;
117
118 /* Mutex protecting this variable. */
119 static int stack_cache_lock = LLL_LOCK_INITIALIZER;
120
121 /* List of queued stack frames. */
122 static LIST_HEAD (stack_cache);
123
124 /* List of the stacks in use. */
125 static LIST_HEAD (stack_used);
126
127 /* We need to record what list operations we are going to do so that,
128 in case of an asynchronous interruption due to a fork() call, we
129 can correct for the work. */
130 static uintptr_t in_flight_stack;
131
132 /* List of the threads with user provided stacks in use. No need to
133 initialize this, since it's done in __pthread_initialize_minimal. */
134 list_t __stack_user __attribute__ ((nocommon));
135 hidden_data_def (__stack_user)
136
137 #if COLORING_INCREMENT != 0
138 /* Number of threads created. */
139 static unsigned int nptl_ncreated;
140 #endif
141
142
143 /* Check whether the stack is still used or not. */
144 #define FREE_P(descr) ((descr)->tid <= KTID_TERMINATED)
145
146
147 static void
148 stack_list_del (list_t *elem)
149 {
150 in_flight_stack = (uintptr_t) elem;
151
152 atomic_write_barrier ();
153
154 list_del (elem);
155
156 atomic_write_barrier ();
157
158 in_flight_stack = 0;
159 }
160
161
162 static void
163 stack_list_add (list_t *elem, list_t *list)
164 {
165 in_flight_stack = (uintptr_t) elem | 1;
166
167 atomic_write_barrier ();
168
169 list_add (elem, list);
170
171 atomic_write_barrier ();
172
173 in_flight_stack = 0;
174 }
175
176
177 /* We create a double linked list of all cache entries. Double linked
178 because this allows removing entries from the end. */
179
180
181 /* Get a stack frame from the cache. We have to match by size since
182 some blocks might be too small or far too large. */
183 static struct pthread *
184 get_cached_stack (size_t *sizep, void **memp)
185 {
186 size_t size = *sizep;
187 struct pthread *result = NULL;
188 list_t *entry;
189
190 lll_lock (stack_cache_lock, LLL_PRIVATE);
191
192 /* Search the cache for a matching entry. We search for the
193 smallest stack which has at least the required size. Note that
194 in normal situations the size of all allocated stacks is the
195 same. As the very least there are only a few different sizes.
196 Therefore this loop will exit early most of the time with an
197 exact match. */
198 list_for_each (entry, &stack_cache)
199 {
200 struct pthread *curr;
201
202 curr = list_entry (entry, struct pthread, list);
203 if (FREE_P (curr) && curr->stackblock_size >= size)
204 {
205 if (curr->stackblock_size == size)
206 {
207 result = curr;
208 break;
209 }
210
211 if (result == NULL
212 || result->stackblock_size > curr->stackblock_size)
213 result = curr;
214 }
215 }
216
217 if (__builtin_expect (result == NULL, 0)
218 /* Make sure the size difference is not too excessive. In that
219 case we do not use the block. */
220 || __builtin_expect (result->stackblock_size > 4 * size, 0))
221 {
222 /* Release the lock. */
223 lll_unlock (stack_cache_lock, LLL_PRIVATE);
224
225 return NULL;
226 }
227
228 /* Don't allow setxid until cloned. */
229 result->setxid_futex = -1;
230
231 /* Dequeue the entry. */
232 stack_list_del (&result->list);
233
234 /* And add to the list of stacks in use. */
235 stack_list_add (&result->list, &stack_used);
236
237 /* And decrease the cache size. */
238 stack_cache_actsize -= result->stackblock_size;
239
240 /* Release the lock early. */
241 lll_unlock (stack_cache_lock, LLL_PRIVATE);
242
243 /* Report size and location of the stack to the caller. */
244 *sizep = result->stackblock_size;
245 *memp = result->stackblock;
246
247 /* Cancellation handling is back to the default. */
248 result->cancelhandling = 0;
249 result->cleanup = NULL;
250
251 /* No pending event. */
252 result->nextevent = NULL;
253
254 /* Clear the DTV. */
255 dtv_t *dtv = GET_DTV (TLS_TPADJ (result));
256 for (size_t cnt = 0; cnt < dtv[-1].counter; ++cnt)
257 free (dtv[1 + cnt].pointer.to_free);
258 memset (dtv, '\0', (dtv[-1].counter + 1) * sizeof (dtv_t));
259
260 /* Re-initialize the TLS. */
261 _dl_allocate_tls_init (TLS_TPADJ (result));
262
263 return result;
264 }
265
266
267 /* Free stacks until cache size is lower than LIMIT. */
268 void
269 __free_stacks (size_t limit)
270 {
271 /* We reduce the size of the cache. Remove the last entries until
272 the size is below the limit. */
273 list_t *entry;
274 list_t *prev;
275
276 /* Search from the end of the list. */
277 list_for_each_prev_safe (entry, prev, &stack_cache)
278 {
279 struct pthread *curr;
280
281 curr = list_entry (entry, struct pthread, list);
282 if (FREE_P (curr))
283 {
284 /* Unlink the block. */
285 stack_list_del (entry);
286
287 /* Account for the freed memory. */
288 stack_cache_actsize -= curr->stackblock_size;
289
290 /* Free the memory associated with the ELF TLS. */
291 _dl_deallocate_tls (TLS_TPADJ (curr), false);
292
293 /* Remove this block. This should never fail. If it does
294 something is really wrong. */
295 if (munmap (curr->stackblock, curr->stackblock_size) != 0)
296 abort ();
297
298 /* Maybe we have freed enough. */
299 if (stack_cache_actsize <= limit)
300 break;
301 }
302 }
303 }
304
305
306 /* Add a stack frame which is not used anymore to the stack. Must be
307 called with the cache lock held. */
308 static inline void
309 __attribute ((always_inline))
310 queue_stack (struct pthread *stack)
311 {
312 /* We unconditionally add the stack to the list. The memory may
313 still be in use but it will not be reused until the kernel marks
314 the stack as not used anymore. */
315 stack_list_add (&stack->list, &stack_cache);
316
317 stack_cache_actsize += stack->stackblock_size;
318 if (__glibc_unlikely (stack_cache_actsize > stack_cache_maxsize))
319 __free_stacks (stack_cache_maxsize);
320 }
321
322
323 static int
324 internal_function
325 change_stack_perm (struct pthread *pd
326 #ifdef NEED_SEPARATE_REGISTER_STACK
327 , size_t pagemask
328 #endif
329 )
330 {
331 #ifdef NEED_SEPARATE_REGISTER_STACK
332 void *stack = (pd->stackblock
333 + (((((pd->stackblock_size - pd->guardsize) / 2)
334 & pagemask) + pd->guardsize) & pagemask));
335 size_t len = pd->stackblock + pd->stackblock_size - stack;
336 #elif _STACK_GROWS_DOWN
337 void *stack = pd->stackblock + pd->guardsize;
338 size_t len = pd->stackblock_size - pd->guardsize;
339 #elif _STACK_GROWS_UP
340 void *stack = pd->stackblock;
341 size_t len = (uintptr_t) pd - pd->guardsize - (uintptr_t) pd->stackblock;
342 #else
343 # error "Define either _STACK_GROWS_DOWN or _STACK_GROWS_UP"
344 #endif
345 if (mprotect (stack, len, PROT_READ | PROT_WRITE | PROT_EXEC) != 0)
346 return errno;
347
348 return 0;
349 }
350
351
352 /* Returns a usable stack for a new thread either by allocating a
353 new stack or reusing a cached stack of sufficient size.
354 ATTR must be non-NULL and point to a valid pthread_attr.
355 PDP must be non-NULL. */
356 static int
357 allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
358 ALLOCATE_STACK_PARMS)
359 {
360 struct pthread *pd;
361 size_t size;
362 size_t pagesize_m1 = __getpagesize () - 1;
363 void *stacktop;
364
365 assert (powerof2 (pagesize_m1 + 1));
366 assert (TCB_ALIGNMENT >= STACK_ALIGN);
367
368 /* Get the stack size from the attribute if it is set. Otherwise we
369 use the default we determined at start time. */
370 if (attr->stacksize != 0)
371 size = attr->stacksize;
372 else
373 {
374 lll_lock (__default_pthread_attr_lock, LLL_PRIVATE);
375 size = __default_pthread_attr.stacksize;
376 lll_unlock (__default_pthread_attr_lock, LLL_PRIVATE);
377 }
378
379 /* Get memory for the stack. */
380 if (__glibc_unlikely (attr->flags & ATTR_FLAG_STACKADDR))
381 {
382 uintptr_t adj;
383
384 /* If the user also specified the size of the stack make sure it
385 is large enough. */
386 if (attr->stacksize != 0
387 && attr->stacksize < (__static_tls_size + MINIMAL_REST_STACK))
388 return EINVAL;
389
390 /* Adjust stack size for alignment of the TLS block. */
391 #if TLS_TCB_AT_TP
392 adj = ((uintptr_t) attr->stackaddr - TLS_TCB_SIZE)
393 & __static_tls_align_m1;
394 assert (size > adj + TLS_TCB_SIZE);
395 #elif TLS_DTV_AT_TP
396 adj = ((uintptr_t) attr->stackaddr - __static_tls_size)
397 & __static_tls_align_m1;
398 assert (size > adj);
399 #endif
400
401 /* The user provided some memory. Let's hope it matches the
402 size... We do not allocate guard pages if the user provided
403 the stack. It is the user's responsibility to do this if it
404 is wanted. */
405 #if TLS_TCB_AT_TP
406 pd = (struct pthread *) ((uintptr_t) attr->stackaddr
407 - TLS_TCB_SIZE - adj);
408 #elif TLS_DTV_AT_TP
409 pd = (struct pthread *) (((uintptr_t) attr->stackaddr
410 - __static_tls_size - adj)
411 - TLS_PRE_TCB_SIZE);
412 #endif
413
414 /* The user provided stack memory needs to be cleared. */
415 memset (pd, '\0', sizeof (struct pthread));
416
417 /* The first TSD block is included in the TCB. */
418 pd->specific[0] = pd->specific_1stblock;
419
420 /* Remember the stack-related values. */
421 pd->stackblock = (char *) attr->stackaddr - size;
422 pd->stackblock_size = size;
423
424 /* This is a user-provided stack. It will not be queued in the
425 stack cache nor will the memory (except the TLS memory) be freed. */
426 pd->user_stack = true;
427
428 /* This is at least the second thread. */
429 pd->header.multiple_threads = 1;
430 #ifndef TLS_MULTIPLE_THREADS_IN_TCB
431 __pthread_multiple_threads = *__libc_multiple_threads_ptr = 1;
432 #endif
433
434 #ifndef __ASSUME_PRIVATE_FUTEX
435 /* The thread must know when private futexes are supported. */
436 pd->header.private_futex = THREAD_GETMEM (THREAD_SELF,
437 header.private_futex);
438 #endif
439
440 #ifdef NEED_DL_SYSINFO
441 /* Copy the sysinfo value from the parent. */
442 THREAD_SYSINFO(pd) = THREAD_SELF_SYSINFO;
443 #endif
444
445 /* Don't allow setxid until cloned. */
446 pd->setxid_futex = -1;
447
448 /* Allocate the DTV for this thread. */
449 if (_dl_allocate_tls (TLS_TPADJ (pd)) == NULL)
450 {
451 /* Something went wrong. */
452 assert (errno == ENOMEM);
453 return errno;
454 }
455
456
457 /* Prepare to modify global data. */
458 lll_lock (stack_cache_lock, LLL_PRIVATE);
459
460 /* And add to the list of stacks in use. */
461 list_add (&pd->list, &__stack_user);
462
463 lll_unlock (stack_cache_lock, LLL_PRIVATE);
464 }
465 else
466 {
467 /* Allocate some anonymous memory. If possible use the cache. */
468 size_t guardsize;
469 size_t reqsize;
470 void *mem;
471 const int prot = (PROT_READ | PROT_WRITE
472 | ((GL(dl_stack_flags) & PF_X) ? PROT_EXEC : 0));
473
474 #if COLORING_INCREMENT != 0
475 /* Add one more page for stack coloring. Don't do it for stacks
476 with 16 times pagesize or larger. This might just cause
477 unnecessary misalignment. */
478 if (size <= 16 * pagesize_m1)
479 size += pagesize_m1 + 1;
480 #endif
481
482 /* Adjust the stack size for alignment. */
483 size &= ~__static_tls_align_m1;
484 assert (size != 0);
485
486 /* Make sure the size of the stack is enough for the guard and
487 eventually the thread descriptor. */
488 guardsize = (attr->guardsize + pagesize_m1) & ~pagesize_m1;
489 if (__builtin_expect (size < ((guardsize + __static_tls_size
490 + MINIMAL_REST_STACK + pagesize_m1)
491 & ~pagesize_m1),
492 0))
493 /* The stack is too small (or the guard too large). */
494 return EINVAL;
495
496 /* Try to get a stack from the cache. */
497 reqsize = size;
498 pd = get_cached_stack (&size, &mem);
499 if (pd == NULL)
500 {
501 /* To avoid aliasing effects on a larger scale than pages we
502 adjust the allocated stack size if necessary. This way
503 allocations directly following each other will not have
504 aliasing problems. */
505 #if MULTI_PAGE_ALIASING != 0
506 if ((size % MULTI_PAGE_ALIASING) == 0)
507 size += pagesize_m1 + 1;
508 #endif
509
510 mem = mmap (NULL, size, prot,
511 MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
512
513 if (__glibc_unlikely (mem == MAP_FAILED))
514 return errno;
515
516 /* SIZE is guaranteed to be greater than zero.
517 So we can never get a null pointer back from mmap. */
518 assert (mem != NULL);
519
520 #if COLORING_INCREMENT != 0
521 /* Atomically increment NCREATED. */
522 unsigned int ncreated = atomic_increment_val (&nptl_ncreated);
523
524 /* We chose the offset for coloring by incrementing it for
525 every new thread by a fixed amount. The offset used
526 module the page size. Even if coloring would be better
527 relative to higher alignment values it makes no sense to
528 do it since the mmap() interface does not allow us to
529 specify any alignment for the returned memory block. */
530 size_t coloring = (ncreated * COLORING_INCREMENT) & pagesize_m1;
531
532 /* Make sure the coloring offsets does not disturb the alignment
533 of the TCB and static TLS block. */
534 if (__glibc_unlikely ((coloring & __static_tls_align_m1) != 0))
535 coloring = (((coloring + __static_tls_align_m1)
536 & ~(__static_tls_align_m1))
537 & ~pagesize_m1);
538 #else
539 /* Unless specified we do not make any adjustments. */
540 # define coloring 0
541 #endif
542
543 /* Place the thread descriptor at the end of the stack. */
544 #if TLS_TCB_AT_TP
545 pd = (struct pthread *) ((char *) mem + size - coloring) - 1;
546 #elif TLS_DTV_AT_TP
547 pd = (struct pthread *) ((((uintptr_t) mem + size - coloring
548 - __static_tls_size)
549 & ~__static_tls_align_m1)
550 - TLS_PRE_TCB_SIZE);
551 #endif
552
553 /* Remember the stack-related values. */
554 pd->stackblock = mem;
555 pd->stackblock_size = size;
556
557 /* We allocated the first block thread-specific data array.
558 This address will not change for the lifetime of this
559 descriptor. */
560 pd->specific[0] = pd->specific_1stblock;
561
562 /* This is at least the second thread. */
563 pd->header.multiple_threads = 1;
564 #ifndef TLS_MULTIPLE_THREADS_IN_TCB
565 __pthread_multiple_threads = *__libc_multiple_threads_ptr = 1;
566 #endif
567
568 #ifndef __ASSUME_PRIVATE_FUTEX
569 /* The thread must know when private futexes are supported. */
570 pd->header.private_futex = THREAD_GETMEM (THREAD_SELF,
571 header.private_futex);
572 #endif
573
574 #ifdef NEED_DL_SYSINFO
575 /* Copy the sysinfo value from the parent. */
576 THREAD_SYSINFO(pd) = THREAD_SELF_SYSINFO;
577 #endif
578
579 /* Don't allow setxid until cloned. */
580 pd->setxid_futex = -1;
581
582 /* Allocate the DTV for this thread. */
583 if (_dl_allocate_tls (TLS_TPADJ (pd)) == NULL)
584 {
585 /* Something went wrong. */
586 assert (errno == ENOMEM);
587
588 /* Free the stack memory we just allocated. */
589 (void) munmap (mem, size);
590
591 return errno;
592 }
593
594
595 /* Prepare to modify global data. */
596 lll_lock (stack_cache_lock, LLL_PRIVATE);
597
598 /* And add to the list of stacks in use. */
599 stack_list_add (&pd->list, &stack_used);
600
601 lll_unlock (stack_cache_lock, LLL_PRIVATE);
602
603
604 /* There might have been a race. Another thread might have
605 caused the stacks to get exec permission while this new
606 stack was prepared. Detect if this was possible and
607 change the permission if necessary. */
608 if (__builtin_expect ((GL(dl_stack_flags) & PF_X) != 0
609 && (prot & PROT_EXEC) == 0, 0))
610 {
611 int err = change_stack_perm (pd
612 #ifdef NEED_SEPARATE_REGISTER_STACK
613 , ~pagesize_m1
614 #endif
615 );
616 if (err != 0)
617 {
618 /* Free the stack memory we just allocated. */
619 (void) munmap (mem, size);
620
621 return err;
622 }
623 }
624
625
626 /* Note that all of the stack and the thread descriptor is
627 zeroed. This means we do not have to initialize fields
628 with initial value zero. This is specifically true for
629 the 'tid' field which is always set back to zero once the
630 stack is not used anymore and for the 'guardsize' field
631 which will be read next. */
632 }
633
634 /* Create or resize the guard area if necessary. */
635 if (__glibc_unlikely (guardsize > pd->guardsize))
636 {
637 #ifdef NEED_SEPARATE_REGISTER_STACK
638 char *guard = mem + (((size - guardsize) / 2) & ~pagesize_m1);
639 #elif _STACK_GROWS_DOWN
640 char *guard = mem;
641 # elif _STACK_GROWS_UP
642 char *guard = (char *) (((uintptr_t) pd - guardsize) & ~pagesize_m1);
643 #endif
644 if (mprotect (guard, guardsize, PROT_NONE) != 0)
645 {
646 mprot_error:
647 lll_lock (stack_cache_lock, LLL_PRIVATE);
648
649 /* Remove the thread from the list. */
650 stack_list_del (&pd->list);
651
652 lll_unlock (stack_cache_lock, LLL_PRIVATE);
653
654 /* Get rid of the TLS block we allocated. */
655 _dl_deallocate_tls (TLS_TPADJ (pd), false);
656
657 /* Free the stack memory regardless of whether the size
658 of the cache is over the limit or not. If this piece
659 of memory caused problems we better do not use it
660 anymore. Uh, and we ignore possible errors. There
661 is nothing we could do. */
662 (void) munmap (mem, size);
663
664 return errno;
665 }
666
667 pd->guardsize = guardsize;
668 }
669 else if (__builtin_expect (pd->guardsize - guardsize > size - reqsize,
670 0))
671 {
672 /* The old guard area is too large. */
673
674 #ifdef NEED_SEPARATE_REGISTER_STACK
675 char *guard = mem + (((size - guardsize) / 2) & ~pagesize_m1);
676 char *oldguard = mem + (((size - pd->guardsize) / 2) & ~pagesize_m1);
677
678 if (oldguard < guard
679 && mprotect (oldguard, guard - oldguard, prot) != 0)
680 goto mprot_error;
681
682 if (mprotect (guard + guardsize,
683 oldguard + pd->guardsize - guard - guardsize,
684 prot) != 0)
685 goto mprot_error;
686 #elif _STACK_GROWS_DOWN
687 if (mprotect ((char *) mem + guardsize, pd->guardsize - guardsize,
688 prot) != 0)
689 goto mprot_error;
690 #elif _STACK_GROWS_UP
691 if (mprotect ((char *) pd - pd->guardsize,
692 pd->guardsize - guardsize, prot) != 0)
693 goto mprot_error;
694 #endif
695
696 pd->guardsize = guardsize;
697 }
698 /* The pthread_getattr_np() calls need to get passed the size
699 requested in the attribute, regardless of how large the
700 actually used guardsize is. */
701 pd->reported_guardsize = guardsize;
702 }
703
704 /* Initialize the lock. We have to do this unconditionally since the
705 stillborn thread could be canceled while the lock is taken. */
706 pd->lock = LLL_LOCK_INITIALIZER;
707
708 /* The robust mutex lists also need to be initialized
709 unconditionally because the cleanup for the previous stack owner
710 might have happened in the kernel. */
711 pd->robust_head.futex_offset = (offsetof (pthread_mutex_t, __data.__lock)
712 - offsetof (pthread_mutex_t,
713 __data.__list.__next));
714 pd->robust_head.list_op_pending = NULL;
715 #ifdef __PTHREAD_MUTEX_HAVE_PREV
716 pd->robust_prev = &pd->robust_head;
717 #endif
718 pd->robust_head.list = &pd->robust_head;
719
720 /* We place the thread descriptor at the end of the stack. */
721 *pdp = pd;
722
723 #if TLS_TCB_AT_TP
724 /* The stack begins before the TCB and the static TLS block. */
725 stacktop = ((char *) (pd + 1) - __static_tls_size);
726 #elif TLS_DTV_AT_TP
727 stacktop = (char *) (pd - 1);
728 #endif
729
730 #if defined(NEED_SEPARATE_REGISTER_STACK) || defined(NEED_STACK_SIZE_FOR_PTH_CREATE)
731 *stack = pd->stackblock;
732 *stacksize = stacktop - *stack;
733 #elif _STACK_GROWS_DOWN
734 *stack = stacktop;
735 #elif _STACK_GROWS_UP
736 *stack = pd->stackblock;
737 assert (*stack > 0);
738 #endif
739
740 return 0;
741 }
742
743
744 void
745 internal_function
746 __deallocate_stack (struct pthread *pd)
747 {
748 lll_lock (stack_cache_lock, LLL_PRIVATE);
749
750 /* Remove the thread from the list of threads with user defined
751 stacks. */
752 stack_list_del (&pd->list);
753
754 /* Not much to do. Just free the mmap()ed memory. Note that we do
755 not reset the 'used' flag in the 'tid' field. This is done by
756 the kernel. If no thread has been created yet this field is
757 still zero. */
758 if (__glibc_likely (! pd->user_stack))
759 (void) queue_stack (pd);
760 else
761 /* Free the memory associated with the ELF TLS. */
762 _dl_deallocate_tls (TLS_TPADJ (pd), false);
763
764 lll_unlock (stack_cache_lock, LLL_PRIVATE);
765 }
766
767
768 int
769 internal_function
770 __make_stacks_executable (void **stack_endp)
771 {
772 /* First the main thread's stack. */
773 int err = _dl_make_stack_executable (stack_endp);
774 if (err != 0)
775 return err;
776
777 #ifdef NEED_SEPARATE_REGISTER_STACK
778 const size_t pagemask = ~(__getpagesize () - 1);
779 #endif
780
781 lll_lock (stack_cache_lock, LLL_PRIVATE);
782
783 list_t *runp;
784 list_for_each (runp, &stack_used)
785 {
786 err = change_stack_perm (list_entry (runp, struct pthread, list)
787 #ifdef NEED_SEPARATE_REGISTER_STACK
788 , pagemask
789 #endif
790 );
791 if (err != 0)
792 break;
793 }
794
795 /* Also change the permission for the currently unused stacks. This
796 might be wasted time but better spend it here than adding a check
797 in the fast path. */
798 if (err == 0)
799 list_for_each (runp, &stack_cache)
800 {
801 err = change_stack_perm (list_entry (runp, struct pthread, list)
802 #ifdef NEED_SEPARATE_REGISTER_STACK
803 , pagemask
804 #endif
805 );
806 if (err != 0)
807 break;
808 }
809
810 lll_unlock (stack_cache_lock, LLL_PRIVATE);
811
812 return err;
813 }
814
815
816 /* In case of a fork() call the memory allocation in the child will be
817 the same but only one thread is running. All stacks except that of
818 the one running thread are not used anymore. We have to recycle
819 them. */
820 void
821 __reclaim_stacks (void)
822 {
823 struct pthread *self = (struct pthread *) THREAD_SELF;
824
825 /* No locking necessary. The caller is the only stack in use. But
826 we have to be aware that we might have interrupted a list
827 operation. */
828
829 if (in_flight_stack != 0)
830 {
831 bool add_p = in_flight_stack & 1;
832 list_t *elem = (list_t *) (in_flight_stack & ~(uintptr_t) 1);
833
834 if (add_p)
835 {
836 /* We always add at the beginning of the list. So in this
837 case we only need to check the beginning of these lists. */
838 int check_list (list_t *l)
839 {
840 if (l->next->prev != l)
841 {
842 assert (l->next->prev == elem);
843
844 elem->next = l->next;
845 elem->prev = l;
846 l->next = elem;
847
848 return 1;
849 }
850
851 return 0;
852 }
853
854 if (check_list (&stack_used) == 0)
855 (void) check_list (&stack_cache);
856 }
857 else
858 {
859 /* We can simply always replay the delete operation. */
860 elem->next->prev = elem->prev;
861 elem->prev->next = elem->next;
862 }
863 }
864
865 /* Mark all stacks except the still running one as free. */
866 list_t *runp;
867 list_for_each (runp, &stack_used)
868 {
869 struct pthread *curp = list_entry (runp, struct pthread, list);
870 if (curp != self)
871 {
872 /* This marks the stack as free. */
873 curp->tid = 0;
874
875 /* Account for the size of the stack. */
876 stack_cache_actsize += curp->stackblock_size;
877
878 if (curp->specific_used)
879 {
880 /* Clear the thread-specific data. */
881 memset (curp->specific_1stblock, '\0',
882 sizeof (curp->specific_1stblock));
883
884 curp->specific_used = false;
885
886 for (size_t cnt = 1; cnt < PTHREAD_KEY_1STLEVEL_SIZE; ++cnt)
887 if (curp->specific[cnt] != NULL)
888 {
889 memset (curp->specific[cnt], '\0',
890 sizeof (curp->specific_1stblock));
891
892 /* We have allocated the block which we do not
893 free here so re-set the bit. */
894 curp->specific_used = true;
895 }
896 }
897 }
898 }
899
900 /* Add the stack of all running threads to the cache. */
901 list_splice (&stack_used, &stack_cache);
902
903 /* Remove the entry for the current thread to from the cache list
904 and add it to the list of running threads. Which of the two
905 lists is decided by the user_stack flag. */
906 stack_list_del (&self->list);
907
908 /* Re-initialize the lists for all the threads. */
909 INIT_LIST_HEAD (&stack_used);
910 INIT_LIST_HEAD (&__stack_user);
911
912 if (__glibc_unlikely (THREAD_GETMEM (self, user_stack)))
913 list_add (&self->list, &__stack_user);
914 else
915 list_add (&self->list, &stack_used);
916
917 /* There is one thread running. */
918 __nptl_nthreads = 1;
919
920 in_flight_stack = 0;
921
922 /* Initialize locks. */
923 stack_cache_lock = LLL_LOCK_INITIALIZER;
924 __default_pthread_attr_lock = LLL_LOCK_INITIALIZER;
925 }
926
927
928 #if HP_TIMING_AVAIL
929 # undef __find_thread_by_id
930 /* Find a thread given the thread ID. */
931 attribute_hidden
932 struct pthread *
933 __find_thread_by_id (pid_t tid)
934 {
935 struct pthread *result = NULL;
936
937 lll_lock (stack_cache_lock, LLL_PRIVATE);
938
939 /* Iterate over the list with system-allocated threads first. */
940 list_t *runp;
941 list_for_each (runp, &stack_used)
942 {
943 struct pthread *curp;
944
945 curp = list_entry (runp, struct pthread, list);
946
947 if (curp->tid == tid)
948 {
949 result = curp;
950 goto out;
951 }
952 }
953
954 /* Now the list with threads using user-allocated stacks. */
955 list_for_each (runp, &__stack_user)
956 {
957 struct pthread *curp;
958
959 curp = list_entry (runp, struct pthread, list);
960
961 if (curp->tid == tid)
962 {
963 result = curp;
964 goto out;
965 }
966 }
967
968 out:
969 lll_unlock (stack_cache_lock, LLL_PRIVATE);
970
971 return result;
972 }
973 #endif
974
975
976 static void
977 internal_function
978 setxid_mark_thread (struct xid_command *cmdp, struct pthread *t)
979 {
980 int ch;
981
982 /* Wait until this thread is cloned. */
983 if (t->setxid_futex == -1
984 && ! atomic_compare_and_exchange_bool_acq (&t->setxid_futex, -2, -1))
985 do
986 lll_futex_wait (&t->setxid_futex, -2, LLL_PRIVATE);
987 while (t->setxid_futex == -2);
988
989 /* Don't let the thread exit before the setxid handler runs. */
990 t->setxid_futex = 0;
991
992 do
993 {
994 ch = t->cancelhandling;
995
996 /* If the thread is exiting right now, ignore it. */
997 if ((ch & EXITING_BITMASK) != 0)
998 {
999 /* Release the futex if there is no other setxid in
1000 progress. */
1001 if ((ch & SETXID_BITMASK) == 0)
1002 {
1003 t->setxid_futex = 1;
1004 lll_futex_wake (&t->setxid_futex, 1, LLL_PRIVATE);
1005 }
1006 return;
1007 }
1008 }
1009 while (atomic_compare_and_exchange_bool_acq (&t->cancelhandling,
1010 ch | SETXID_BITMASK, ch));
1011 }
1012
1013
1014 static void
1015 internal_function
1016 setxid_unmark_thread (struct xid_command *cmdp, struct pthread *t)
1017 {
1018 int ch;
1019
1020 do
1021 {
1022 ch = t->cancelhandling;
1023 if ((ch & SETXID_BITMASK) == 0)
1024 return;
1025 }
1026 while (atomic_compare_and_exchange_bool_acq (&t->cancelhandling,
1027 ch & ~SETXID_BITMASK, ch));
1028
1029 /* Release the futex just in case. */
1030 t->setxid_futex = 1;
1031 lll_futex_wake (&t->setxid_futex, 1, LLL_PRIVATE);
1032 }
1033
1034
1035 static int
1036 internal_function
1037 setxid_signal_thread (struct xid_command *cmdp, struct pthread *t)
1038 {
1039 if ((t->cancelhandling & SETXID_BITMASK) == 0)
1040 return 0;
1041
1042 #warning setxid fixup needed
1043 #if 0
1044 int val;
1045 pid_t pid = __getpid ();
1046 INTERNAL_SYSCALL_DECL (err);
1047 val = INTERNAL_SYSCALL (tgkill, err, pid, t->tid, SIGSETXID);
1048
1049 /* If this failed, it must have had not started yet or else exited. */
1050 if (!INTERNAL_SYSCALL_ERROR_P (val, err))
1051 {
1052 atomic_increment (&cmdp->cntr);
1053 return 1;
1054 }
1055 else
1056 #endif
1057 return 0;
1058 }
1059
1060
1061 int
1062 attribute_hidden
1063 __nptl_setxid (struct xid_command *cmdp)
1064 {
1065 #warning setxid fixup needed
1066 #if 0
1067 int signalled;
1068 int result;
1069 lll_lock (stack_cache_lock, LLL_PRIVATE);
1070
1071 __xidcmd = cmdp;
1072 cmdp->cntr = 0;
1073
1074 struct pthread *self = THREAD_SELF;
1075
1076 /* Iterate over the list with system-allocated threads first. */
1077 list_t *runp;
1078 list_for_each (runp, &stack_used)
1079 {
1080 struct pthread *t = list_entry (runp, struct pthread, list);
1081 if (t == self)
1082 continue;
1083
1084 setxid_mark_thread (cmdp, t);
1085 }
1086
1087 /* Now the list with threads using user-allocated stacks. */
1088 list_for_each (runp, &__stack_user)
1089 {
1090 struct pthread *t = list_entry (runp, struct pthread, list);
1091 if (t == self)
1092 continue;
1093
1094 setxid_mark_thread (cmdp, t);
1095 }
1096
1097 /* Iterate until we don't succeed in signalling anyone. That means
1098 we have gotten all running threads, and their children will be
1099 automatically correct once started. */
1100 do
1101 {
1102 signalled = 0;
1103
1104 list_for_each (runp, &stack_used)
1105 {
1106 struct pthread *t = list_entry (runp, struct pthread, list);
1107 if (t == self)
1108 continue;
1109
1110 signalled += setxid_signal_thread (cmdp, t);
1111 }
1112
1113 list_for_each (runp, &__stack_user)
1114 {
1115 struct pthread *t = list_entry (runp, struct pthread, list);
1116 if (t == self)
1117 continue;
1118
1119 signalled += setxid_signal_thread (cmdp, t);
1120 }
1121
1122 int cur = cmdp->cntr;
1123 while (cur != 0)
1124 {
1125 lll_futex_wait (&cmdp->cntr, cur, LLL_PRIVATE);
1126 cur = cmdp->cntr;
1127 }
1128 }
1129 while (signalled != 0);
1130
1131 /* Clean up flags, so that no thread blocks during exit waiting
1132 for a signal which will never come. */
1133 list_for_each (runp, &stack_used)
1134 {
1135 struct pthread *t = list_entry (runp, struct pthread, list);
1136 if (t == self)
1137 continue;
1138
1139 setxid_unmark_thread (cmdp, t);
1140 }
1141
1142 list_for_each (runp, &__stack_user)
1143 {
1144 struct pthread *t = list_entry (runp, struct pthread, list);
1145 if (t == self)
1146 continue;
1147
1148 setxid_unmark_thread (cmdp, t);
1149 }
1150
1151 /* This must be last, otherwise the current thread might not have
1152 permissions to send SIGSETXID syscall to the other threads. */
1153 INTERNAL_SYSCALL_DECL (err);
1154 result = INTERNAL_SYSCALL_NCS (cmdp->syscall_no, err, 3,
1155 cmdp->id[0], cmdp->id[1], cmdp->id[2]);
1156 if (INTERNAL_SYSCALL_ERROR_P (result, err))
1157 {
1158 __set_errno (INTERNAL_SYSCALL_ERRNO (result, err));
1159 result = -1;
1160 }
1161
1162 lll_unlock (stack_cache_lock, LLL_PRIVATE);
1163 return result;
1164 #endif
1165 }
1166
1167 static inline void __attribute__((always_inline))
1168 init_one_static_tls (struct pthread *curp, struct link_map *map)
1169 {
1170 # if TLS_TCB_AT_TP
1171 void *dest = (char *) curp - map->l_tls_offset;
1172 # elif TLS_DTV_AT_TP
1173 void *dest = (char *) curp + map->l_tls_offset + TLS_PRE_TCB_SIZE;
1174 # else
1175 # error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
1176 # endif
1177
1178 /* We cannot delay the initialization of the Static TLS area, since
1179 it can be accessed with LE or IE, but since the DTV is only used
1180 by GD and LD, we can delay its update to avoid a race. */
1181 memset (__mempcpy (dest, map->l_tls_initimage, map->l_tls_initimage_size),
1182 '\0', map->l_tls_blocksize - map->l_tls_initimage_size);
1183 }
1184
1185 void
1186 attribute_hidden
1187 __pthread_init_static_tls (struct link_map *map)
1188 {
1189 lll_lock (stack_cache_lock, LLL_PRIVATE);
1190
1191 /* Iterate over the list with system-allocated threads first. */
1192 list_t *runp;
1193 list_for_each (runp, &stack_used)
1194 init_one_static_tls (list_entry (runp, struct pthread, list), map);
1195
1196 /* Now the list with threads using user-allocated stacks. */
1197 list_for_each (runp, &__stack_user)
1198 init_one_static_tls (list_entry (runp, struct pthread, list), map);
1199
1200 lll_unlock (stack_cache_lock, LLL_PRIVATE);
1201 }
1202
1203
1204 void
1205 attribute_hidden
1206 __wait_lookup_done (void)
1207 {
1208 lll_lock (stack_cache_lock, LLL_PRIVATE);
1209
1210 struct pthread *self = THREAD_SELF;
1211
1212 /* Iterate over the list with system-allocated threads first. */
1213 list_t *runp;
1214 list_for_each (runp, &stack_used)
1215 {
1216 struct pthread *t = list_entry (runp, struct pthread, list);
1217 if (t == self || t->header.gscope_flag == THREAD_GSCOPE_FLAG_UNUSED)
1218 continue;
1219
1220 int *const gscope_flagp = &t->header.gscope_flag;
1221
1222 /* We have to wait until this thread is done with the global
1223 scope. First tell the thread that we are waiting and
1224 possibly have to be woken. */
1225 if (atomic_compare_and_exchange_bool_acq (gscope_flagp,
1226 THREAD_GSCOPE_FLAG_WAIT,
1227 THREAD_GSCOPE_FLAG_USED))
1228 continue;
1229
1230 do
1231 lll_futex_wait (gscope_flagp, THREAD_GSCOPE_FLAG_WAIT, LLL_PRIVATE);
1232 while (*gscope_flagp == THREAD_GSCOPE_FLAG_WAIT);
1233 }
1234
1235 /* Now the list with threads using user-allocated stacks. */
1236 list_for_each (runp, &__stack_user)
1237 {
1238 struct pthread *t = list_entry (runp, struct pthread, list);
1239 if (t == self || t->header.gscope_flag == THREAD_GSCOPE_FLAG_UNUSED)
1240 continue;
1241
1242 int *const gscope_flagp = &t->header.gscope_flag;
1243
1244 /* We have to wait until this thread is done with the global
1245 scope. First tell the thread that we are waiting and
1246 possibly have to be woken. */
1247 if (atomic_compare_and_exchange_bool_acq (gscope_flagp,
1248 THREAD_GSCOPE_FLAG_WAIT,
1249 THREAD_GSCOPE_FLAG_USED))
1250 continue;
1251
1252 do
1253 lll_futex_wait (gscope_flagp, THREAD_GSCOPE_FLAG_WAIT, LLL_PRIVATE);
1254 while (*gscope_flagp == THREAD_GSCOPE_FLAG_WAIT);
1255 }
1256
1257 lll_unlock (stack_cache_lock, LLL_PRIVATE);
1258 }