fbtl/allocatestack.c

   1 /* Copyright (C) 2002-2013 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3    Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
   4
   5    The GNU C Library is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU Lesser General Public
   7    License as published by the Free Software Foundation; either
   8    version 2.1 of the License, or (at your option) any later version.
   9
  10    The GNU C Library is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    Lesser General Public License for more details.
  14
  15    You should have received a copy of the GNU Lesser General Public
  16    License along with the GNU C Library; if not, see
  17    <http://www.gnu.org/licenses/>.  */
  18
  19 #include <assert.h>
  20 #include <errno.h>
  21 #include <signal.h>
  22 #include <stdint.h>
  23 #include <string.h>
  24 #include <unistd.h>
  25 #include <sys/mman.h>
  26 #include <sys/param.h>
  27 #include <dl-sysdep.h>
  28 #include <dl-tls.h>
  29 #include <tls.h>
  30 #include <list.h>
  31 #include <lowlevellock.h>
  32 #include <kernel-features.h>
  33 #include <stack-aliasing.h>
  34
  35
  36 #if !(defined(NEED_SEPARATE_REGISTER_STACK) || defined(NEED_STACK_SIZE_FOR_PTH_CREATE))
  37
  38 /* Most architectures have exactly one stack pointer.  Some have more.  */
  39 # define STACK_VARIABLES void *stackaddr = NULL
  40
  41 /* How to pass the values to the 'create_thread' function.  */
  42 # define STACK_VARIABLES_ARGS stackaddr
  43
  44 /* How to declare function which gets there parameters.  */
  45 # define STACK_VARIABLES_PARMS void *stackaddr
  46
  47 /* How to declare allocate_stack.  */
  48 # define ALLOCATE_STACK_PARMS void **stack
  49
  50 /* This is how the function is called.  We do it this way to allow
  51    other variants of the function to have more parameters.  */
  52 # define ALLOCATE_STACK(attr, pd) allocate_stack (attr, pd, &stackaddr)
  53
  54 #else
  55
  56 /* We need two stacks.  The kernel will place them but we have to tell
  57    the kernel about the size of the reserved address space.  */
  58 # define STACK_VARIABLES void *stackaddr = NULL; size_t stacksize = 0
  59
  60 /* How to pass the values to the 'create_thread' function.  */
  61 # define STACK_VARIABLES_ARGS stackaddr, stacksize
  62
  63 /* How to declare function which gets there parameters.  */
  64 # define STACK_VARIABLES_PARMS void *stackaddr, size_t stacksize
  65
  66 /* How to declare allocate_stack.  */
  67 # define ALLOCATE_STACK_PARMS void **stack, size_t *stacksize
  68
  69 /* This is how the function is called.  We do it this way to allow
  70    other variants of the function to have more parameters.  */
  71 # define ALLOCATE_STACK(attr, pd) \
  72   allocate_stack (attr, pd, &stackaddr, &stacksize)
  73
  74 #endif
  75
  76
  77 /* Default alignment of stack.  */
  78 #ifndef STACK_ALIGN
  79 # define STACK_ALIGN __alignof__ (long double)
  80 #endif
  81
  82 /* Default value for minimal stack size after allocating thread
  83    descriptor and guard.  */
  84 #ifndef MINIMAL_REST_STACK
  85 # define MINIMAL_REST_STACK     4096
  86 #endif
  87
  88 /*
  89  Unfortunately, under FreeBSD  mmap fails with addr=NULL, flags=MAP_STACK
  90
  91  See http://www.freebsd.org/cgi/query-pr.cgi?pr=158755
  92
  93  do not use MAP_STACK at all
  94 */
  95
  96 #undef MAP_STACK
  97
  98
  99 /* Newer kernels have the MAP_STACK flag to indicate a mapping is used for
 100    a stack.  Use it when possible.  */
 101 #ifndef MAP_STACK
 102 # define MAP_STACK 0
 103 #endif
 104
 105 /* This yields the pointer that TLS support code calls the thread pointer.  */
 106 #if TLS_TCB_AT_TP
 107 # define TLS_TPADJ(pd) (pd)
 108 #elif TLS_DTV_AT_TP
 109 # define TLS_TPADJ(pd) ((struct pthread *)((char *) (pd) + TLS_PRE_TCB_SIZE))
 110 #endif
 111
 112 /* Cache handling for not-yet free stacks.  */
 113
 114 /* Maximum size in kB of cache.  */
 115 static size_t stack_cache_maxsize = 40 * 1024 * 1024; /* 40MiBi by default.  */
 116 static size_t stack_cache_actsize;
 117
 118 /* Mutex protecting this variable.  */
 119 static int stack_cache_lock = LLL_LOCK_INITIALIZER;
 120
 121 /* List of queued stack frames.  */
 122 static LIST_HEAD (stack_cache);
 123
 124 /* List of the stacks in use.  */
 125 static LIST_HEAD (stack_used);
 126
 127 /* We need to record what list operations we are going to do so that,
 128    in case of an asynchronous interruption due to a fork() call, we
 129    can correct for the work.  */
 130 static uintptr_t in_flight_stack;
 131
 132 /* List of the threads with user provided stacks in use.  No need to
 133    initialize this, since it's done in __pthread_initialize_minimal.  */
 134 list_t __stack_user __attribute__ ((nocommon));
 135 hidden_data_def (__stack_user)
 136
 137 #if COLORING_INCREMENT != 0
 138 /* Number of threads created.  */
 139 static unsigned int nptl_ncreated;
 140 #endif
 141
 142
 143 /* Check whether the stack is still used or not.  */
 144 #define FREE_P(descr) ((descr)->tid <= KTID_TERMINATED)
 145
 146
 147 static void
 148 stack_list_del (list_t *elem)
 149 {
 150   in_flight_stack = (uintptr_t) elem;
 151
 152   atomic_write_barrier ();
 153
 154   list_del (elem);
 155
 156   atomic_write_barrier ();
 157
 158   in_flight_stack = 0;
 159 }
 160
 161
 162 static void
 163 stack_list_add (list_t *elem, list_t *list)
 164 {
 165   in_flight_stack = (uintptr_t) elem | 1;
 166
 167   atomic_write_barrier ();
 168
 169   list_add (elem, list);
 170
 171   atomic_write_barrier ();
 172
 173   in_flight_stack = 0;
 174 }
 175
 176
 177 /* We create a double linked list of all cache entries.  Double linked
 178    because this allows removing entries from the end.  */
 179
 180
 181 /* Get a stack frame from the cache.  We have to match by size since
 182    some blocks might be too small or far too large.  */
 183 static struct pthread *
 184 get_cached_stack (size_t *sizep, void **memp)
 185 {
 186   size_t size = *sizep;
 187   struct pthread *result = NULL;
 188   list_t *entry;
 189
 190   lll_lock (stack_cache_lock, LLL_PRIVATE);
 191
 192   /* Search the cache for a matching entry.  We search for the
 193      smallest stack which has at least the required size.  Note that
 194      in normal situations the size of all allocated stacks is the
 195      same.  As the very least there are only a few different sizes.
 196      Therefore this loop will exit early most of the time with an
 197      exact match.  */
 198   list_for_each (entry, &stack_cache)
 199     {
 200       struct pthread *curr;
 201
 202       curr = list_entry (entry, struct pthread, list);
 203       if (FREE_P (curr) && curr->stackblock_size >= size)
 204         {
 205           if (curr->stackblock_size == size)
 206             {
 207               result = curr;
 208               break;
 209             }
 210
 211           if (result == NULL
 212               || result->stackblock_size > curr->stackblock_size)
 213             result = curr;
 214         }
 215     }
 216
 217   if (__builtin_expect (result == NULL, 0)
 218       /* Make sure the size difference is not too excessive.  In that
 219          case we do not use the block.  */
 220       || __builtin_expect (result->stackblock_size > 4 * size, 0))
 221     {
 222       /* Release the lock.  */
 223       lll_unlock (stack_cache_lock, LLL_PRIVATE);
 224
 225       return NULL;
 226     }
 227
 228   /* Don't allow setxid until cloned.  */
 229   result->setxid_futex = -1;
 230
 231   /* Dequeue the entry.  */
 232   stack_list_del (&result->list);
 233
 234   /* And add to the list of stacks in use.  */
 235   stack_list_add (&result->list, &stack_used);
 236
 237   /* And decrease the cache size.  */
 238   stack_cache_actsize -= result->stackblock_size;
 239
 240   /* Release the lock early.  */
 241   lll_unlock (stack_cache_lock, LLL_PRIVATE);
 242
 243   /* Report size and location of the stack to the caller.  */
 244   *sizep = result->stackblock_size;
 245   *memp = result->stackblock;
 246
 247   /* Cancellation handling is back to the default.  */
 248   result->cancelhandling = 0;
 249   result->cleanup = NULL;
 250
 251   /* No pending event.  */
 252   result->nextevent = NULL;
 253
 254   /* Clear the DTV.  */
 255   dtv_t *dtv = GET_DTV (TLS_TPADJ (result));
 256   for (size_t cnt = 0; cnt < dtv[-1].counter; ++cnt)
 257     free (dtv[1 + cnt].pointer.to_free);
 258   memset (dtv, '\0', (dtv[-1].counter + 1) * sizeof (dtv_t));
 259
 260   /* Re-initialize the TLS.  */
 261   _dl_allocate_tls_init (TLS_TPADJ (result));
 262
 263   return result;
 264 }
 265
 266
 267 /* Free stacks until cache size is lower than LIMIT.  */
 268 void
 269 __free_stacks (size_t limit)
 270 {
 271   /* We reduce the size of the cache.  Remove the last entries until
 272      the size is below the limit.  */
 273   list_t *entry;
 274   list_t *prev;
 275
 276   /* Search from the end of the list.  */
 277   list_for_each_prev_safe (entry, prev, &stack_cache)
 278     {
 279       struct pthread *curr;
 280
 281       curr = list_entry (entry, struct pthread, list);
 282       if (FREE_P (curr))
 283         {
 284           /* Unlink the block.  */
 285           stack_list_del (entry);
 286
 287           /* Account for the freed memory.  */
 288           stack_cache_actsize -= curr->stackblock_size;
 289
 290           /* Free the memory associated with the ELF TLS.  */
 291           _dl_deallocate_tls (TLS_TPADJ (curr), false);
 292
 293           /* Remove this block.  This should never fail.  If it does
 294              something is really wrong.  */
 295           if (munmap (curr->stackblock, curr->stackblock_size) != 0)
 296             abort ();
 297
 298           /* Maybe we have freed enough.  */
 299           if (stack_cache_actsize <= limit)
 300             break;
 301         }
 302     }
 303 }
 304
 305
 306 /* Add a stack frame which is not used anymore to the stack.  Must be
 307    called with the cache lock held.  */
 308 static inline void
 309 __attribute ((always_inline))
 310 queue_stack (struct pthread *stack)
 311 {
 312   /* We unconditionally add the stack to the list.  The memory may
 313      still be in use but it will not be reused until the kernel marks
 314      the stack as not used anymore.  */
 315   stack_list_add (&stack->list, &stack_cache);
 316
 317   stack_cache_actsize += stack->stackblock_size;
 318   if (__glibc_unlikely (stack_cache_actsize > stack_cache_maxsize))
 319     __free_stacks (stack_cache_maxsize);
 320 }
 321
 322
 323 static int
 324 internal_function
 325 change_stack_perm (struct pthread *pd
 326 #ifdef NEED_SEPARATE_REGISTER_STACK
 327                    , size_t pagemask
 328 #endif
 329                    )
 330 {
 331 #ifdef NEED_SEPARATE_REGISTER_STACK
 332   void *stack = (pd->stackblock
 333                  + (((((pd->stackblock_size - pd->guardsize) / 2)
 334                       & pagemask) + pd->guardsize) & pagemask));
 335   size_t len = pd->stackblock + pd->stackblock_size - stack;
 336 #elif _STACK_GROWS_DOWN
 337   void *stack = pd->stackblock + pd->guardsize;
 338   size_t len = pd->stackblock_size - pd->guardsize;
 339 #elif _STACK_GROWS_UP
 340   void *stack = pd->stackblock;
 341   size_t len = (uintptr_t) pd - pd->guardsize - (uintptr_t) pd->stackblock;
 342 #else
 343 # error "Define either _STACK_GROWS_DOWN or _STACK_GROWS_UP"
 344 #endif
 345   if (mprotect (stack, len, PROT_READ | PROT_WRITE | PROT_EXEC) != 0)
 346     return errno;
 347
 348   return 0;
 349 }
 350
 351
 352 /* Returns a usable stack for a new thread either by allocating a
 353    new stack or reusing a cached stack of sufficient size.
 354    ATTR must be non-NULL and point to a valid pthread_attr.
 355    PDP must be non-NULL.  */
 356 static int
 357 allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
 358                 ALLOCATE_STACK_PARMS)
 359 {
 360   struct pthread *pd;
 361   size_t size;
 362   size_t pagesize_m1 = __getpagesize () - 1;
 363   void *stacktop;
 364
 365   assert (powerof2 (pagesize_m1 + 1));
 366   assert (TCB_ALIGNMENT >= STACK_ALIGN);
 367
 368   /* Get the stack size from the attribute if it is set.  Otherwise we
 369      use the default we determined at start time.  */
 370   if (attr->stacksize != 0)
 371     size = attr->stacksize;
 372   else
 373     {
 374       lll_lock (__default_pthread_attr_lock, LLL_PRIVATE);
 375       size = __default_pthread_attr.stacksize;
 376       lll_unlock (__default_pthread_attr_lock, LLL_PRIVATE);
 377     }
 378
 379   /* Get memory for the stack.  */
 380   if (__glibc_unlikely (attr->flags & ATTR_FLAG_STACKADDR))
 381     {
 382       uintptr_t adj;
 383
 384       /* If the user also specified the size of the stack make sure it
 385          is large enough.  */
 386       if (attr->stacksize != 0
 387           && attr->stacksize < (__static_tls_size + MINIMAL_REST_STACK))
 388         return EINVAL;
 389
 390       /* Adjust stack size for alignment of the TLS block.  */
 391 #if TLS_TCB_AT_TP
 392       adj = ((uintptr_t) attr->stackaddr - TLS_TCB_SIZE)
 393             & __static_tls_align_m1;
 394       assert (size > adj + TLS_TCB_SIZE);
 395 #elif TLS_DTV_AT_TP
 396       adj = ((uintptr_t) attr->stackaddr - __static_tls_size)
 397             & __static_tls_align_m1;
 398       assert (size > adj);
 399 #endif
 400
 401       /* The user provided some memory.  Let's hope it matches the
 402          size...  We do not allocate guard pages if the user provided
 403          the stack.  It is the user's responsibility to do this if it
 404          is wanted.  */
 405 #if TLS_TCB_AT_TP
 406       pd = (struct pthread *) ((uintptr_t) attr->stackaddr
 407                                - TLS_TCB_SIZE - adj);
 408 #elif TLS_DTV_AT_TP
 409       pd = (struct pthread *) (((uintptr_t) attr->stackaddr
 410                                 - __static_tls_size - adj)
 411                                - TLS_PRE_TCB_SIZE);
 412 #endif
 413
 414       /* The user provided stack memory needs to be cleared.  */
 415       memset (pd, '\0', sizeof (struct pthread));
 416
 417       /* The first TSD block is included in the TCB.  */
 418       pd->specific[0] = pd->specific_1stblock;
 419
 420       /* Remember the stack-related values.  */
 421       pd->stackblock = (char *) attr->stackaddr - size;
 422       pd->stackblock_size = size;
 423
 424       /* This is a user-provided stack.  It will not be queued in the
 425          stack cache nor will the memory (except the TLS memory) be freed.  */
 426       pd->user_stack = true;
 427
 428       /* This is at least the second thread.  */
 429       pd->header.multiple_threads = 1;
 430 #ifndef TLS_MULTIPLE_THREADS_IN_TCB
 431       __pthread_multiple_threads = *__libc_multiple_threads_ptr = 1;
 432 #endif
 433
 434 #ifndef __ASSUME_PRIVATE_FUTEX
 435       /* The thread must know when private futexes are supported.  */
 436       pd->header.private_futex = THREAD_GETMEM (THREAD_SELF,
 437                                                 header.private_futex);
 438 #endif
 439
 440 #ifdef NEED_DL_SYSINFO
 441       /* Copy the sysinfo value from the parent.  */
 442       THREAD_SYSINFO(pd) = THREAD_SELF_SYSINFO;
 443 #endif
 444
 445       /* Don't allow setxid until cloned.  */
 446       pd->setxid_futex = -1;
 447
 448       /* Allocate the DTV for this thread.  */
 449       if (_dl_allocate_tls (TLS_TPADJ (pd)) == NULL)
 450         {
 451           /* Something went wrong.  */
 452           assert (errno == ENOMEM);
 453           return errno;
 454         }
 455
 456
 457       /* Prepare to modify global data.  */
 458       lll_lock (stack_cache_lock, LLL_PRIVATE);
 459
 460       /* And add to the list of stacks in use.  */
 461       list_add (&pd->list, &__stack_user);
 462
 463       lll_unlock (stack_cache_lock, LLL_PRIVATE);
 464     }
 465   else
 466     {
 467       /* Allocate some anonymous memory.  If possible use the cache.  */
 468       size_t guardsize;
 469       size_t reqsize;
 470       void *mem;
 471       const int prot = (PROT_READ | PROT_WRITE
 472                         | ((GL(dl_stack_flags) & PF_X) ? PROT_EXEC : 0));
 473
 474 #if COLORING_INCREMENT != 0
 475       /* Add one more page for stack coloring.  Don't do it for stacks
 476          with 16 times pagesize or larger.  This might just cause
 477          unnecessary misalignment.  */
 478       if (size <= 16 * pagesize_m1)
 479         size += pagesize_m1 + 1;
 480 #endif
 481
 482       /* Adjust the stack size for alignment.  */
 483       size &= ~__static_tls_align_m1;
 484       assert (size != 0);
 485
 486       /* Make sure the size of the stack is enough for the guard and
 487          eventually the thread descriptor.  */
 488       guardsize = (attr->guardsize + pagesize_m1) & ~pagesize_m1;
 489       if (__builtin_expect (size < ((guardsize + __static_tls_size
 490                                      + MINIMAL_REST_STACK + pagesize_m1)
 491                                     & ~pagesize_m1),
 492                             0))
 493         /* The stack is too small (or the guard too large).  */
 494         return EINVAL;
 495
 496       /* Try to get a stack from the cache.  */
 497       reqsize = size;
 498       pd = get_cached_stack (&size, &mem);
 499       if (pd == NULL)
 500         {
 501           /* To avoid aliasing effects on a larger scale than pages we
 502              adjust the allocated stack size if necessary.  This way
 503              allocations directly following each other will not have
 504              aliasing problems.  */
 505 #if MULTI_PAGE_ALIASING != 0
 506           if ((size % MULTI_PAGE_ALIASING) == 0)
 507             size += pagesize_m1 + 1;
 508 #endif
 509
 510           mem = mmap (NULL, size, prot,
 511                       MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
 512
 513           if (__glibc_unlikely (mem == MAP_FAILED))
 514             return errno;
 515
 516           /* SIZE is guaranteed to be greater than zero.
 517              So we can never get a null pointer back from mmap.  */
 518           assert (mem != NULL);
 519
 520 #if COLORING_INCREMENT != 0
 521           /* Atomically increment NCREATED.  */
 522           unsigned int ncreated = atomic_increment_val (&nptl_ncreated);
 523
 524           /* We chose the offset for coloring by incrementing it for
 525              every new thread by a fixed amount.  The offset used
 526              module the page size.  Even if coloring would be better
 527              relative to higher alignment values it makes no sense to
 528              do it since the mmap() interface does not allow us to
 529              specify any alignment for the returned memory block.  */
 530           size_t coloring = (ncreated * COLORING_INCREMENT) & pagesize_m1;
 531
 532           /* Make sure the coloring offsets does not disturb the alignment
 533              of the TCB and static TLS block.  */
 534           if (__glibc_unlikely ((coloring & __static_tls_align_m1) != 0))
 535             coloring = (((coloring + __static_tls_align_m1)
 536                          & ~(__static_tls_align_m1))
 537                         & ~pagesize_m1);
 538 #else
 539           /* Unless specified we do not make any adjustments.  */
 540 # define coloring 0
 541 #endif
 542
 543           /* Place the thread descriptor at the end of the stack.  */
 544 #if TLS_TCB_AT_TP
 545           pd = (struct pthread *) ((char *) mem + size - coloring) - 1;
 546 #elif TLS_DTV_AT_TP
 547           pd = (struct pthread *) ((((uintptr_t) mem + size - coloring
 548                                     - __static_tls_size)
 549                                     & ~__static_tls_align_m1)
 550                                    - TLS_PRE_TCB_SIZE);
 551 #endif
 552
 553           /* Remember the stack-related values.  */
 554           pd->stackblock = mem;
 555           pd->stackblock_size = size;
 556
 557           /* We allocated the first block thread-specific data array.
 558              This address will not change for the lifetime of this
 559              descriptor.  */
 560           pd->specific[0] = pd->specific_1stblock;
 561
 562           /* This is at least the second thread.  */
 563           pd->header.multiple_threads = 1;
 564 #ifndef TLS_MULTIPLE_THREADS_IN_TCB
 565           __pthread_multiple_threads = *__libc_multiple_threads_ptr = 1;
 566 #endif
 567
 568 #ifndef __ASSUME_PRIVATE_FUTEX
 569           /* The thread must know when private futexes are supported.  */
 570           pd->header.private_futex = THREAD_GETMEM (THREAD_SELF,
 571                                                     header.private_futex);
 572 #endif
 573
 574 #ifdef NEED_DL_SYSINFO
 575           /* Copy the sysinfo value from the parent.  */
 576           THREAD_SYSINFO(pd) = THREAD_SELF_SYSINFO;
 577 #endif
 578
 579           /* Don't allow setxid until cloned.  */
 580           pd->setxid_futex = -1;
 581
 582           /* Allocate the DTV for this thread.  */
 583           if (_dl_allocate_tls (TLS_TPADJ (pd)) == NULL)
 584             {
 585               /* Something went wrong.  */
 586               assert (errno == ENOMEM);
 587
 588               /* Free the stack memory we just allocated.  */
 589               (void) munmap (mem, size);
 590
 591               return errno;
 592             }
 593
 594
 595           /* Prepare to modify global data.  */
 596           lll_lock (stack_cache_lock, LLL_PRIVATE);
 597
 598           /* And add to the list of stacks in use.  */
 599           stack_list_add (&pd->list, &stack_used);
 600
 601           lll_unlock (stack_cache_lock, LLL_PRIVATE);
 602
 603
 604           /* There might have been a race.  Another thread might have
 605              caused the stacks to get exec permission while this new
 606              stack was prepared.  Detect if this was possible and
 607              change the permission if necessary.  */
 608           if (__builtin_expect ((GL(dl_stack_flags) & PF_X) != 0
 609                                 && (prot & PROT_EXEC) == 0, 0))
 610             {
 611               int err = change_stack_perm (pd
 612 #ifdef NEED_SEPARATE_REGISTER_STACK
 613                                            , ~pagesize_m1
 614 #endif
 615                                            );
 616               if (err != 0)
 617                 {
 618                   /* Free the stack memory we just allocated.  */
 619                   (void) munmap (mem, size);
 620
 621                   return err;
 622                 }
 623             }
 624
 625
 626           /* Note that all of the stack and the thread descriptor is
 627              zeroed.  This means we do not have to initialize fields
 628              with initial value zero.  This is specifically true for
 629              the 'tid' field which is always set back to zero once the
 630              stack is not used anymore and for the 'guardsize' field
 631              which will be read next.  */
 632         }
 633
 634       /* Create or resize the guard area if necessary.  */
 635       if (__glibc_unlikely (guardsize > pd->guardsize))
 636         {
 637 #ifdef NEED_SEPARATE_REGISTER_STACK
 638           char *guard = mem + (((size - guardsize) / 2) & ~pagesize_m1);
 639 #elif _STACK_GROWS_DOWN
 640           char *guard = mem;
 641 # elif _STACK_GROWS_UP
 642           char *guard = (char *) (((uintptr_t) pd - guardsize) & ~pagesize_m1);
 643 #endif
 644           if (mprotect (guard, guardsize, PROT_NONE) != 0)
 645             {
 646             mprot_error:
 647               lll_lock (stack_cache_lock, LLL_PRIVATE);
 648
 649               /* Remove the thread from the list.  */
 650               stack_list_del (&pd->list);
 651
 652               lll_unlock (stack_cache_lock, LLL_PRIVATE);
 653
 654               /* Get rid of the TLS block we allocated.  */
 655               _dl_deallocate_tls (TLS_TPADJ (pd), false);
 656
 657               /* Free the stack memory regardless of whether the size
 658                  of the cache is over the limit or not.  If this piece
 659                  of memory caused problems we better do not use it
 660                  anymore.  Uh, and we ignore possible errors.  There
 661                  is nothing we could do.  */
 662               (void) munmap (mem, size);
 663
 664               return errno;
 665             }
 666
 667           pd->guardsize = guardsize;
 668         }
 669       else if (__builtin_expect (pd->guardsize - guardsize > size - reqsize,
 670                                  0))
 671         {
 672           /* The old guard area is too large.  */
 673
 674 #ifdef NEED_SEPARATE_REGISTER_STACK
 675           char *guard = mem + (((size - guardsize) / 2) & ~pagesize_m1);
 676           char *oldguard = mem + (((size - pd->guardsize) / 2) & ~pagesize_m1);
 677
 678           if (oldguard < guard
 679               && mprotect (oldguard, guard - oldguard, prot) != 0)
 680             goto mprot_error;
 681
 682           if (mprotect (guard + guardsize,
 683                         oldguard + pd->guardsize - guard - guardsize,
 684                         prot) != 0)
 685             goto mprot_error;
 686 #elif _STACK_GROWS_DOWN
 687           if (mprotect ((char *) mem + guardsize, pd->guardsize - guardsize,
 688                         prot) != 0)
 689             goto mprot_error;
 690 #elif _STACK_GROWS_UP
 691           if (mprotect ((char *) pd - pd->guardsize,
 692                         pd->guardsize - guardsize, prot) != 0)
 693             goto mprot_error;
 694 #endif
 695
 696           pd->guardsize = guardsize;
 697         }
 698       /* The pthread_getattr_np() calls need to get passed the size
 699          requested in the attribute, regardless of how large the
 700          actually used guardsize is.  */
 701       pd->reported_guardsize = guardsize;
 702     }
 703
 704   /* Initialize the lock.  We have to do this unconditionally since the
 705      stillborn thread could be canceled while the lock is taken.  */
 706   pd->lock = LLL_LOCK_INITIALIZER;
 707
 708   /* The robust mutex lists also need to be initialized
 709      unconditionally because the cleanup for the previous stack owner
 710      might have happened in the kernel.  */
 711   pd->robust_head.futex_offset = (offsetof (pthread_mutex_t, __data.__lock)
 712                                   - offsetof (pthread_mutex_t,
 713                                               __data.__list.__next));
 714   pd->robust_head.list_op_pending = NULL;
 715 #ifdef __PTHREAD_MUTEX_HAVE_PREV
 716   pd->robust_prev = &pd->robust_head;
 717 #endif
 718   pd->robust_head.list = &pd->robust_head;
 719
 720   /* We place the thread descriptor at the end of the stack.  */
 721   *pdp = pd;
 722
 723 #if TLS_TCB_AT_TP
 724   /* The stack begins before the TCB and the static TLS block.  */
 725   stacktop = ((char *) (pd + 1) - __static_tls_size);
 726 #elif TLS_DTV_AT_TP
 727   stacktop = (char *) (pd - 1);
 728 #endif
 729
 730 #if defined(NEED_SEPARATE_REGISTER_STACK) || defined(NEED_STACK_SIZE_FOR_PTH_CREATE)
 731   *stack = pd->stackblock;
 732   *stacksize = stacktop - *stack;
 733 #elif _STACK_GROWS_DOWN
 734   *stack = stacktop;
 735 #elif _STACK_GROWS_UP
 736   *stack = pd->stackblock;
 737   assert (*stack > 0);
 738 #endif
 739
 740   return 0;
 741 }
 742
 743
 744 void
 745 internal_function
 746 __deallocate_stack (struct pthread *pd)
 747 {
 748   lll_lock (stack_cache_lock, LLL_PRIVATE);
 749
 750   /* Remove the thread from the list of threads with user defined
 751      stacks.  */
 752   stack_list_del (&pd->list);
 753
 754   /* Not much to do.  Just free the mmap()ed memory.  Note that we do
 755      not reset the 'used' flag in the 'tid' field.  This is done by
 756      the kernel.  If no thread has been created yet this field is
 757      still zero.  */
 758   if (__glibc_likely (! pd->user_stack))
 759     (void) queue_stack (pd);
 760   else
 761     /* Free the memory associated with the ELF TLS.  */
 762     _dl_deallocate_tls (TLS_TPADJ (pd), false);
 763
 764   lll_unlock (stack_cache_lock, LLL_PRIVATE);
 765 }
 766
 767
 768 int
 769 internal_function
 770 __make_stacks_executable (void **stack_endp)
 771 {
 772   /* First the main thread's stack.  */
 773   int err = _dl_make_stack_executable (stack_endp);
 774   if (err != 0)
 775     return err;
 776
 777 #ifdef NEED_SEPARATE_REGISTER_STACK
 778   const size_t pagemask = ~(__getpagesize () - 1);
 779 #endif
 780
 781   lll_lock (stack_cache_lock, LLL_PRIVATE);
 782
 783   list_t *runp;
 784   list_for_each (runp, &stack_used)
 785     {
 786       err = change_stack_perm (list_entry (runp, struct pthread, list)
 787 #ifdef NEED_SEPARATE_REGISTER_STACK
 788                                , pagemask
 789 #endif
 790                                );
 791       if (err != 0)
 792         break;
 793     }
 794
 795   /* Also change the permission for the currently unused stacks.  This
 796      might be wasted time but better spend it here than adding a check
 797      in the fast path.  */
 798   if (err == 0)
 799     list_for_each (runp, &stack_cache)
 800       {
 801         err = change_stack_perm (list_entry (runp, struct pthread, list)
 802 #ifdef NEED_SEPARATE_REGISTER_STACK
 803                                  , pagemask
 804 #endif
 805                                  );
 806         if (err != 0)
 807           break;
 808       }
 809
 810   lll_unlock (stack_cache_lock, LLL_PRIVATE);
 811
 812   return err;
 813 }
 814
 815
 816 /* In case of a fork() call the memory allocation in the child will be
 817    the same but only one thread is running.  All stacks except that of
 818    the one running thread are not used anymore.  We have to recycle
 819    them.  */
 820 void
 821 __reclaim_stacks (void)
 822 {
 823   struct pthread *self = (struct pthread *) THREAD_SELF;
 824
 825   /* No locking necessary.  The caller is the only stack in use.  But
 826      we have to be aware that we might have interrupted a list
 827      operation.  */
 828
 829   if (in_flight_stack != 0)
 830     {
 831       bool add_p = in_flight_stack & 1;
 832       list_t *elem = (list_t *) (in_flight_stack & ~(uintptr_t) 1);
 833
 834       if (add_p)
 835         {
 836           /* We always add at the beginning of the list.  So in this
 837              case we only need to check the beginning of these lists.  */
 838           int check_list (list_t *l)
 839           {
 840             if (l->next->prev != l)
 841               {
 842                 assert (l->next->prev == elem);
 843
 844                 elem->next = l->next;
 845                 elem->prev = l;
 846                 l->next = elem;
 847
 848                 return 1;
 849               }
 850
 851             return 0;
 852           }
 853
 854           if (check_list (&stack_used) == 0)
 855             (void) check_list (&stack_cache);
 856         }
 857       else
 858         {
 859           /* We can simply always replay the delete operation.  */
 860           elem->next->prev = elem->prev;
 861           elem->prev->next = elem->next;
 862         }
 863     }
 864
 865   /* Mark all stacks except the still running one as free.  */
 866   list_t *runp;
 867   list_for_each (runp, &stack_used)
 868     {
 869       struct pthread *curp = list_entry (runp, struct pthread, list);
 870       if (curp != self)
 871         {
 872           /* This marks the stack as free.  */
 873           curp->tid = 0;
 874
 875           /* Account for the size of the stack.  */
 876           stack_cache_actsize += curp->stackblock_size;
 877
 878           if (curp->specific_used)
 879             {
 880               /* Clear the thread-specific data.  */
 881               memset (curp->specific_1stblock, '\0',
 882                       sizeof (curp->specific_1stblock));
 883
 884               curp->specific_used = false;
 885
 886               for (size_t cnt = 1; cnt < PTHREAD_KEY_1STLEVEL_SIZE; ++cnt)
 887                 if (curp->specific[cnt] != NULL)
 888                   {
 889                     memset (curp->specific[cnt], '\0',
 890                             sizeof (curp->specific_1stblock));
 891
 892                     /* We have allocated the block which we do not
 893                        free here so re-set the bit.  */
 894                     curp->specific_used = true;
 895                   }
 896             }
 897         }
 898     }
 899
 900   /* Add the stack of all running threads to the cache.  */
 901   list_splice (&stack_used, &stack_cache);
 902
 903   /* Remove the entry for the current thread to from the cache list
 904      and add it to the list of running threads.  Which of the two
 905      lists is decided by the user_stack flag.  */
 906   stack_list_del (&self->list);
 907
 908   /* Re-initialize the lists for all the threads.  */
 909   INIT_LIST_HEAD (&stack_used);
 910   INIT_LIST_HEAD (&__stack_user);
 911
 912   if (__glibc_unlikely (THREAD_GETMEM (self, user_stack)))
 913     list_add (&self->list, &__stack_user);
 914   else
 915     list_add (&self->list, &stack_used);
 916
 917   /* There is one thread running.  */
 918   __nptl_nthreads = 1;
 919
 920   in_flight_stack = 0;
 921
 922   /* Initialize locks.  */
 923   stack_cache_lock = LLL_LOCK_INITIALIZER;
 924   __default_pthread_attr_lock = LLL_LOCK_INITIALIZER;
 925 }
 926
 927
 928 #if HP_TIMING_AVAIL
 929 # undef __find_thread_by_id
 930 /* Find a thread given the thread ID.  */
 931 attribute_hidden
 932 struct pthread *
 933 __find_thread_by_id (pid_t tid)
 934 {
 935   struct pthread *result = NULL;
 936
 937   lll_lock (stack_cache_lock, LLL_PRIVATE);
 938
 939   /* Iterate over the list with system-allocated threads first.  */
 940   list_t *runp;
 941   list_for_each (runp, &stack_used)
 942     {
 943       struct pthread *curp;
 944
 945       curp = list_entry (runp, struct pthread, list);
 946
 947       if (curp->tid == tid)
 948         {
 949           result = curp;
 950           goto out;
 951         }
 952     }
 953
 954   /* Now the list with threads using user-allocated stacks.  */
 955   list_for_each (runp, &__stack_user)
 956     {
 957       struct pthread *curp;
 958
 959       curp = list_entry (runp, struct pthread, list);
 960
 961       if (curp->tid == tid)
 962         {
 963           result = curp;
 964           goto out;
 965         }
 966     }
 967
 968  out:
 969   lll_unlock (stack_cache_lock, LLL_PRIVATE);
 970
 971   return result;
 972 }
 973 #endif
 974
 975
 976 static void
 977 internal_function
 978 setxid_mark_thread (struct xid_command *cmdp, struct pthread *t)
 979 {
 980   int ch;
 981
 982   /* Wait until this thread is cloned.  */
 983   if (t->setxid_futex == -1
 984       && ! atomic_compare_and_exchange_bool_acq (&t->setxid_futex, -2, -1))
 985     do
 986       lll_futex_wait (&t->setxid_futex, -2, LLL_PRIVATE);
 987     while (t->setxid_futex == -2);
 988
 989   /* Don't let the thread exit before the setxid handler runs.  */
 990   t->setxid_futex = 0;
 991
 992   do
 993     {
 994       ch = t->cancelhandling;
 995
 996       /* If the thread is exiting right now, ignore it.  */
 997       if ((ch & EXITING_BITMASK) != 0)
 998         {
 999           /* Release the futex if there is no other setxid in
1000              progress.  */
1001           if ((ch & SETXID_BITMASK) == 0)
1002             {
1003               t->setxid_futex = 1;
1004               lll_futex_wake (&t->setxid_futex, 1, LLL_PRIVATE);
1005             }
1006           return;
1007         }
1008     }
1009   while (atomic_compare_and_exchange_bool_acq (&t->cancelhandling,
1010                                                ch | SETXID_BITMASK, ch));
1011 }
1012
1013
1014 static void
1015 internal_function
1016 setxid_unmark_thread (struct xid_command *cmdp, struct pthread *t)
1017 {
1018   int ch;
1019
1020   do
1021     {
1022       ch = t->cancelhandling;
1023       if ((ch & SETXID_BITMASK) == 0)
1024         return;
1025     }
1026   while (atomic_compare_and_exchange_bool_acq (&t->cancelhandling,
1027                                                ch & ~SETXID_BITMASK, ch));
1028
1029   /* Release the futex just in case.  */
1030   t->setxid_futex = 1;
1031   lll_futex_wake (&t->setxid_futex, 1, LLL_PRIVATE);
1032 }
1033
1034
1035 static int
1036 internal_function
1037 setxid_signal_thread (struct xid_command *cmdp, struct pthread *t)
1038 {
1039   if ((t->cancelhandling & SETXID_BITMASK) == 0)
1040     return 0;
1041
1042 #warning setxid fixup needed
1043 #if 0
1044   int val;
1045   pid_t pid = __getpid ();
1046   INTERNAL_SYSCALL_DECL (err);
1047   val = INTERNAL_SYSCALL (tgkill, err, pid, t->tid, SIGSETXID);
1048
1049   /* If this failed, it must have had not started yet or else exited.  */
1050   if (!INTERNAL_SYSCALL_ERROR_P (val, err))
1051     {
1052       atomic_increment (&cmdp->cntr);
1053       return 1;
1054     }
1055   else
1056 #endif
1057     return 0;
1058 }
1059
1060
1061 int
1062 attribute_hidden
1063 __nptl_setxid (struct xid_command *cmdp)
1064 {
1065 #warning setxid fixup needed
1066 #if 0
1067   int signalled;
1068   int result;
1069   lll_lock (stack_cache_lock, LLL_PRIVATE);
1070
1071   __xidcmd = cmdp;
1072   cmdp->cntr = 0;
1073
1074   struct pthread *self = THREAD_SELF;
1075
1076   /* Iterate over the list with system-allocated threads first.  */
1077   list_t *runp;
1078   list_for_each (runp, &stack_used)
1079     {
1080       struct pthread *t = list_entry (runp, struct pthread, list);
1081       if (t == self)
1082         continue;
1083
1084       setxid_mark_thread (cmdp, t);
1085     }
1086
1087   /* Now the list with threads using user-allocated stacks.  */
1088   list_for_each (runp, &__stack_user)
1089     {
1090       struct pthread *t = list_entry (runp, struct pthread, list);
1091       if (t == self)
1092         continue;
1093
1094       setxid_mark_thread (cmdp, t);
1095     }
1096
1097   /* Iterate until we don't succeed in signalling anyone.  That means
1098      we have gotten all running threads, and their children will be
1099      automatically correct once started.  */
1100   do
1101     {
1102       signalled = 0;
1103
1104       list_for_each (runp, &stack_used)
1105         {
1106           struct pthread *t = list_entry (runp, struct pthread, list);
1107           if (t == self)
1108             continue;
1109
1110           signalled += setxid_signal_thread (cmdp, t);
1111         }
1112
1113       list_for_each (runp, &__stack_user)
1114         {
1115           struct pthread *t = list_entry (runp, struct pthread, list);
1116           if (t == self)
1117             continue;
1118
1119           signalled += setxid_signal_thread (cmdp, t);
1120         }
1121
1122       int cur = cmdp->cntr;
1123       while (cur != 0)
1124         {
1125           lll_futex_wait (&cmdp->cntr, cur, LLL_PRIVATE);
1126           cur = cmdp->cntr;
1127         }
1128     }
1129   while (signalled != 0);
1130
1131   /* Clean up flags, so that no thread blocks during exit waiting
1132      for a signal which will never come.  */
1133   list_for_each (runp, &stack_used)
1134     {
1135       struct pthread *t = list_entry (runp, struct pthread, list);
1136       if (t == self)
1137         continue;
1138
1139       setxid_unmark_thread (cmdp, t);
1140     }
1141
1142   list_for_each (runp, &__stack_user)
1143     {
1144       struct pthread *t = list_entry (runp, struct pthread, list);
1145       if (t == self)
1146         continue;
1147
1148       setxid_unmark_thread (cmdp, t);
1149     }
1150
1151   /* This must be last, otherwise the current thread might not have
1152      permissions to send SIGSETXID syscall to the other threads.  */
1153   INTERNAL_SYSCALL_DECL (err);
1154   result = INTERNAL_SYSCALL_NCS (cmdp->syscall_no, err, 3,
1155                                  cmdp->id[0], cmdp->id[1], cmdp->id[2]);
1156   if (INTERNAL_SYSCALL_ERROR_P (result, err))
1157     {
1158       __set_errno (INTERNAL_SYSCALL_ERRNO (result, err));
1159       result = -1;
1160     }
1161
1162   lll_unlock (stack_cache_lock, LLL_PRIVATE);
1163   return result;
1164 #endif
1165 }
1166
1167 static inline void __attribute__((always_inline))
1168 init_one_static_tls (struct pthread *curp, struct link_map *map)
1169 {
1170 # if TLS_TCB_AT_TP
1171   void *dest = (char *) curp - map->l_tls_offset;
1172 # elif TLS_DTV_AT_TP
1173   void *dest = (char *) curp + map->l_tls_offset + TLS_PRE_TCB_SIZE;
1174 # else
1175 #  error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
1176 # endif
1177
1178   /* We cannot delay the initialization of the Static TLS area, since
1179      it can be accessed with LE or IE, but since the DTV is only used
1180      by GD and LD, we can delay its update to avoid a race.  */
1181   memset (__mempcpy (dest, map->l_tls_initimage, map->l_tls_initimage_size),
1182           '\0', map->l_tls_blocksize - map->l_tls_initimage_size);
1183 }
1184
1185 void
1186 attribute_hidden
1187 __pthread_init_static_tls (struct link_map *map)
1188 {
1189   lll_lock (stack_cache_lock, LLL_PRIVATE);
1190
1191   /* Iterate over the list with system-allocated threads first.  */
1192   list_t *runp;
1193   list_for_each (runp, &stack_used)
1194     init_one_static_tls (list_entry (runp, struct pthread, list), map);
1195
1196   /* Now the list with threads using user-allocated stacks.  */
1197   list_for_each (runp, &__stack_user)
1198     init_one_static_tls (list_entry (runp, struct pthread, list), map);
1199
1200   lll_unlock (stack_cache_lock, LLL_PRIVATE);
1201 }
1202
1203
1204 void
1205 attribute_hidden
1206 __wait_lookup_done (void)
1207 {
1208   lll_lock (stack_cache_lock, LLL_PRIVATE);
1209
1210   struct pthread *self = THREAD_SELF;
1211
1212   /* Iterate over the list with system-allocated threads first.  */
1213   list_t *runp;
1214   list_for_each (runp, &stack_used)
1215     {
1216       struct pthread *t = list_entry (runp, struct pthread, list);
1217       if (t == self || t->header.gscope_flag == THREAD_GSCOPE_FLAG_UNUSED)
1218         continue;
1219
1220       int *const gscope_flagp = &t->header.gscope_flag;
1221
1222       /* We have to wait until this thread is done with the global
1223          scope.  First tell the thread that we are waiting and
1224          possibly have to be woken.  */
1225       if (atomic_compare_and_exchange_bool_acq (gscope_flagp,
1226                                                 THREAD_GSCOPE_FLAG_WAIT,
1227                                                 THREAD_GSCOPE_FLAG_USED))
1228         continue;
1229
1230       do
1231         lll_futex_wait (gscope_flagp, THREAD_GSCOPE_FLAG_WAIT, LLL_PRIVATE);
1232       while (*gscope_flagp == THREAD_GSCOPE_FLAG_WAIT);
1233     }
1234
1235   /* Now the list with threads using user-allocated stacks.  */
1236   list_for_each (runp, &__stack_user)
1237     {
1238       struct pthread *t = list_entry (runp, struct pthread, list);
1239       if (t == self || t->header.gscope_flag == THREAD_GSCOPE_FLAG_UNUSED)
1240         continue;
1241
1242       int *const gscope_flagp = &t->header.gscope_flag;
1243
1244       /* We have to wait until this thread is done with the global
1245          scope.  First tell the thread that we are waiting and
1246          possibly have to be woken.  */
1247       if (atomic_compare_and_exchange_bool_acq (gscope_flagp,
1248                                                 THREAD_GSCOPE_FLAG_WAIT,
1249                                                 THREAD_GSCOPE_FLAG_USED))
1250         continue;
1251
1252       do
1253         lll_futex_wait (gscope_flagp, THREAD_GSCOPE_FLAG_WAIT, LLL_PRIVATE);
1254       while (*gscope_flagp == THREAD_GSCOPE_FLAG_WAIT);
1255     }
1256
1257   lll_unlock (stack_cache_lock, LLL_PRIVATE);
1258 }