libgo/runtime/proc.c

   1 // Copyright 2009 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 #include <limits.h>
   6 #include <stdlib.h>
   7 #include <pthread.h>
   8 #include <unistd.h>
   9
  10 #include "config.h"
  11 #include "runtime.h"
  12 #include "arch.h"
  13 #include "defs.h"
  14 #include "malloc.h"
  15 #include "go-defer.h"
  16
  17 #ifdef USING_SPLIT_STACK
  18
  19 /* FIXME: These are not declared anywhere.  */
  20
  21 extern void __splitstack_getcontext(void *context[10]);
  22
  23 extern void __splitstack_setcontext(void *context[10]);
  24
  25 extern void *__splitstack_makecontext(size_t, void *context[10], size_t *);
  26
  27 extern void * __splitstack_resetcontext(void *context[10], size_t *);
  28
  29 extern void *__splitstack_find(void *, void *, size_t *, void **, void **,
  30                                void **);
  31
  32 extern void __splitstack_block_signals (int *, int *);
  33
  34 extern void __splitstack_block_signals_context (void *context[10], int *,
  35                                                 int *);
  36
  37 #endif
  38
  39 #if defined(USING_SPLIT_STACK) && defined(LINKER_SUPPORTS_SPLIT_STACK)
  40 # ifdef PTHREAD_STACK_MIN
  41 #  define StackMin PTHREAD_STACK_MIN
  42 # else
  43 #  define StackMin 8192
  44 # endif
  45 #else
  46 # define StackMin 2 * 1024 * 1024
  47 #endif
  48
  49 static void schedule(G*);
  50
  51 typedef struct Sched Sched;
  52
  53 M       runtime_m0;
  54 G       runtime_g0;     // idle goroutine for m0
  55
  56 #ifdef __rtems__
  57 #define __thread
  58 #endif
  59
  60 static __thread G *g;
  61 static __thread M *m;
  62
  63 // We can not always refer to the TLS variables directly.  The
  64 // compiler will call tls_get_addr to get the address of the variable,
  65 // and it may hold it in a register across a call to schedule.  When
  66 // we get back from the call we may be running in a different thread,
  67 // in which case the register now points to the TLS variable for a
  68 // different thread.  We use non-inlinable functions to avoid this
  69 // when necessary.
  70
  71 G* runtime_g(void) __attribute__ ((noinline, no_split_stack));
  72
  73 G*
  74 runtime_g(void)
  75 {
  76         return g;
  77 }
  78
  79 M* runtime_m(void) __attribute__ ((noinline, no_split_stack));
  80
  81 M*
  82 runtime_m(void)
  83 {
  84         return m;
  85 }
  86
  87 int32   runtime_gcwaiting;
  88
  89 // Go scheduler
  90 //
  91 // The go scheduler's job is to match ready-to-run goroutines (`g's)
  92 // with waiting-for-work schedulers (`m's).  If there are ready g's
  93 // and no waiting m's, ready() will start a new m running in a new
  94 // OS thread, so that all ready g's can run simultaneously, up to a limit.
  95 // For now, m's never go away.
  96 //
  97 // By default, Go keeps only one kernel thread (m) running user code
  98 // at a single time; other threads may be blocked in the operating system.
  99 // Setting the environment variable $GOMAXPROCS or calling
 100 // runtime.GOMAXPROCS() will change the number of user threads
 101 // allowed to execute simultaneously.  $GOMAXPROCS is thus an
 102 // approximation of the maximum number of cores to use.
 103 //
 104 // Even a program that can run without deadlock in a single process
 105 // might use more m's if given the chance.  For example, the prime
 106 // sieve will use as many m's as there are primes (up to runtime_sched.mmax),
 107 // allowing different stages of the pipeline to execute in parallel.
 108 // We could revisit this choice, only kicking off new m's for blocking
 109 // system calls, but that would limit the amount of parallel computation
 110 // that go would try to do.
 111 //
 112 // In general, one could imagine all sorts of refinements to the
 113 // scheduler, but the goal now is just to get something working on
 114 // Linux and OS X.
 115
 116 struct Sched {
 117         Lock;
 118
 119         G *gfree;       // available g's (status == Gdead)
 120         int32 goidgen;
 121
 122         G *ghead;       // g's waiting to run
 123         G *gtail;
 124         int32 gwait;    // number of g's waiting to run
 125         int32 gcount;   // number of g's that are alive
 126         int32 grunning; // number of g's running on cpu or in syscall
 127
 128         M *mhead;       // m's waiting for work
 129         int32 mwait;    // number of m's waiting for work
 130         int32 mcount;   // number of m's that have been created
 131
 132         volatile uint32 atomic; // atomic scheduling word (see below)
 133
 134         int32 profilehz;        // cpu profiling rate
 135
 136         bool init;  // running initialization
 137         bool lockmain;  // init called runtime.LockOSThread
 138
 139         Note    stopped;        // one g can set waitstop and wait here for m's to stop
 140 };
 141
 142 // The atomic word in sched is an atomic uint32 that
 143 // holds these fields.
 144 //
 145 //      [15 bits] mcpu          number of m's executing on cpu
 146 //      [15 bits] mcpumax       max number of m's allowed on cpu
 147 //      [1 bit] waitstop        some g is waiting on stopped
 148 //      [1 bit] gwaiting        gwait != 0
 149 //
 150 // These fields are the information needed by entersyscall
 151 // and exitsyscall to decide whether to coordinate with the
 152 // scheduler.  Packing them into a single machine word lets
 153 // them use a fast path with a single atomic read/write and
 154 // no lock/unlock.  This greatly reduces contention in
 155 // syscall- or cgo-heavy multithreaded programs.
 156 //
 157 // Except for entersyscall and exitsyscall, the manipulations
 158 // to these fields only happen while holding the schedlock,
 159 // so the routines holding schedlock only need to worry about
 160 // what entersyscall and exitsyscall do, not the other routines
 161 // (which also use the schedlock).
 162 //
 163 // In particular, entersyscall and exitsyscall only read mcpumax,
 164 // waitstop, and gwaiting.  They never write them.  Thus, writes to those
 165 // fields can be done (holding schedlock) without fear of write conflicts.
 166 // There may still be logic conflicts: for example, the set of waitstop must
 167 // be conditioned on mcpu >= mcpumax or else the wait may be a
 168 // spurious sleep.  The Promela model in proc.p verifies these accesses.
 169 enum {
 170         mcpuWidth = 15,
 171         mcpuMask = (1<<mcpuWidth) - 1,
 172         mcpuShift = 0,
 173         mcpumaxShift = mcpuShift + mcpuWidth,
 174         waitstopShift = mcpumaxShift + mcpuWidth,
 175         gwaitingShift = waitstopShift+1,
 176
 177         // The max value of GOMAXPROCS is constrained
 178         // by the max value we can store in the bit fields
 179         // of the atomic word.  Reserve a few high values
 180         // so that we can detect accidental decrement
 181         // beyond zero.
 182         maxgomaxprocs = mcpuMask - 10,
 183 };
 184
 185 #define atomic_mcpu(v)          (((v)>>mcpuShift)&mcpuMask)
 186 #define atomic_mcpumax(v)       (((v)>>mcpumaxShift)&mcpuMask)
 187 #define atomic_waitstop(v)      (((v)>>waitstopShift)&1)
 188 #define atomic_gwaiting(v)      (((v)>>gwaitingShift)&1)
 189
 190 Sched runtime_sched;
 191 int32 runtime_gomaxprocs;
 192 bool runtime_singleproc;
 193
 194 static bool canaddmcpu(void);
 195
 196 // An m that is waiting for notewakeup(&m->havenextg).  This may
 197 // only be accessed while the scheduler lock is held.  This is used to
 198 // minimize the number of times we call notewakeup while the scheduler
 199 // lock is held, since the m will normally move quickly to lock the
 200 // scheduler itself, producing lock contention.
 201 static M* mwakeup;
 202
 203 // Scheduling helpers.  Sched must be locked.
 204 static void gput(G*);   // put/get on ghead/gtail
 205 static G* gget(void);
 206 static void mput(M*);   // put/get on mhead
 207 static M* mget(G*);
 208 static void gfput(G*);  // put/get on gfree
 209 static G* gfget(void);
 210 static void matchmg(void);      // match m's to g's
 211 static void readylocked(G*);    // ready, but sched is locked
 212 static void mnextg(M*, G*);
 213 static void mcommoninit(M*);
 214
 215 void
 216 setmcpumax(uint32 n)
 217 {
 218         uint32 v, w;
 219
 220         for(;;) {
 221                 v = runtime_sched.atomic;
 222                 w = v;
 223                 w &= ~(mcpuMask<<mcpumaxShift);
 224                 w |= n<<mcpumaxShift;
 225                 if(runtime_cas(&runtime_sched.atomic, v, w))
 226                         break;
 227         }
 228 }
 229
 230 // First function run by a new goroutine.  This replaces gogocall.
 231 static void
 232 kickoff(void)
 233 {
 234         void (*fn)(void*);
 235
 236         fn = (void (*)(void*))(g->entry);
 237         fn(g->param);
 238         runtime_goexit();
 239 }
 240
 241 // Switch context to a different goroutine.  This is like longjmp.
 242 static void runtime_gogo(G*) __attribute__ ((noinline));
 243 static void
 244 runtime_gogo(G* newg)
 245 {
 246 #ifdef USING_SPLIT_STACK
 247         __splitstack_setcontext(&newg->stack_context[0]);
 248 #endif
 249         g = newg;
 250         newg->fromgogo = true;
 251         setcontext(&newg->context);
 252 }
 253
 254 // Save context and call fn passing g as a parameter.  This is like
 255 // setjmp.  Because getcontext always returns 0, unlike setjmp, we use
 256 // g->fromgogo as a code.  It will be true if we got here via
 257 // setcontext.  g == nil the first time this is called in a new m.
 258 static void runtime_mcall(void (*)(G*)) __attribute__ ((noinline));
 259 static void
 260 runtime_mcall(void (*pfn)(G*))
 261 {
 262 #ifndef USING_SPLIT_STACK
 263         int i;
 264 #endif
 265
 266         // Ensure that all registers are on the stack for the garbage
 267         // collector.
 268         __builtin_unwind_init();
 269
 270         if(g == m->g0)
 271                 runtime_throw("runtime: mcall called on m->g0 stack");
 272
 273         if(g != nil) {
 274
 275 #ifdef USING_SPLIT_STACK
 276                 __splitstack_getcontext(&g->stack_context[0]);
 277 #else
 278                 g->gcnext_sp = &i;
 279 #endif
 280                 g->fromgogo = false;
 281                 getcontext(&g->context);
 282         }
 283         if (g == nil || !g->fromgogo) {
 284 #ifdef USING_SPLIT_STACK
 285                 __splitstack_setcontext(&m->g0->stack_context[0]);
 286 #endif
 287                 m->g0->entry = (byte*)pfn;
 288                 m->g0->param = g;
 289                 g = m->g0;
 290                 setcontext(&m->g0->context);
 291                 runtime_throw("runtime: mcall function returned");
 292         }
 293 }
 294
 295 // The bootstrap sequence is:
 296 //
 297 //      call osinit
 298 //      call schedinit
 299 //      make & queue new G
 300 //      call runtime_mstart
 301 //
 302 // The new G calls runtime_main.
 303 void
 304 runtime_schedinit(void)
 305 {
 306         int32 n;
 307         const byte *p;
 308
 309         m = &runtime_m0;
 310         g = &runtime_g0;
 311         m->g0 = g;
 312         m->curg = g;
 313         g->m = m;
 314
 315         m->nomemprof++;
 316         runtime_mallocinit();
 317         mcommoninit(m);
 318
 319         runtime_goargs();
 320         runtime_goenvs();
 321
 322         // For debugging:
 323         // Allocate internal symbol table representation now,
 324         // so that we don't need to call malloc when we crash.
 325         // runtime_findfunc(0);
 326
 327         runtime_gomaxprocs = 1;
 328         p = runtime_getenv("GOMAXPROCS");
 329         if(p != nil && (n = runtime_atoi(p)) != 0) {
 330                 if(n > maxgomaxprocs)
 331                         n = maxgomaxprocs;
 332                 runtime_gomaxprocs = n;
 333         }
 334         setmcpumax(runtime_gomaxprocs);
 335         runtime_singleproc = runtime_gomaxprocs == 1;
 336
 337         canaddmcpu();   // mcpu++ to account for bootstrap m
 338         m->helpgc = 1;  // flag to tell schedule() to mcpu--
 339         runtime_sched.grunning++;
 340
 341         // Can not enable GC until all roots are registered.
 342         // mstats.enablegc = 1;
 343         m->nomemprof--;
 344 }
 345
 346 extern void main_init(void) __asm__ ("__go_init_main");
 347 extern void main_main(void) __asm__ ("main.main");
 348
 349 // The main goroutine.
 350 void
 351 runtime_main(void)
 352 {
 353         // Lock the main goroutine onto this, the main OS thread,
 354         // during initialization.  Most programs won't care, but a few
 355         // do require certain calls to be made by the main thread.
 356         // Those can arrange for main.main to run in the main thread
 357         // by calling runtime.LockOSThread during initialization
 358         // to preserve the lock.
 359         runtime_LockOSThread();
 360         runtime_sched.init = true;
 361         main_init();
 362         runtime_sched.init = false;
 363         if(!runtime_sched.lockmain)
 364                 runtime_UnlockOSThread();
 365
 366         // For gccgo we have to wait until after main is initialized
 367         // to enable GC, because initializing main registers the GC
 368         // roots.
 369         mstats.enablegc = 1;
 370
 371         main_main();
 372         runtime_exit(0);
 373         for(;;)
 374                 *(int32*)0 = 0;
 375 }
 376
 377 // Lock the scheduler.
 378 static void
 379 schedlock(void)
 380 {
 381         runtime_lock(&runtime_sched);
 382 }
 383
 384 // Unlock the scheduler.
 385 static void
 386 schedunlock(void)
 387 {
 388         M *m;
 389
 390         m = mwakeup;
 391         mwakeup = nil;
 392         runtime_unlock(&runtime_sched);
 393         if(m != nil)
 394                 runtime_notewakeup(&m->havenextg);
 395 }
 396
 397 void
 398 runtime_goexit(void)
 399 {
 400         g->status = Gmoribund;
 401         runtime_gosched();
 402 }
 403
 404 void
 405 runtime_goroutineheader(G *g)
 406 {
 407         const char *status;
 408
 409         switch(g->status) {
 410         case Gidle:
 411                 status = "idle";
 412                 break;
 413         case Grunnable:
 414                 status = "runnable";
 415                 break;
 416         case Grunning:
 417                 status = "running";
 418                 break;
 419         case Gsyscall:
 420                 status = "syscall";
 421                 break;
 422         case Gwaiting:
 423                 if(g->waitreason)
 424                         status = g->waitreason;
 425                 else
 426                         status = "waiting";
 427                 break;
 428         case Gmoribund:
 429                 status = "moribund";
 430                 break;
 431         default:
 432                 status = "???";
 433                 break;
 434         }
 435         runtime_printf("goroutine %d [%s]:\n", g->goid, status);
 436 }
 437
 438 void
 439 runtime_tracebackothers(G *me)
 440 {
 441         G *g;
 442
 443         for(g = runtime_allg; g != nil; g = g->alllink) {
 444                 if(g == me || g->status == Gdead)
 445                         continue;
 446                 runtime_printf("\n");
 447                 runtime_goroutineheader(g);
 448                 // runtime_traceback(g->sched.pc, g->sched.sp, 0, g);
 449         }
 450 }
 451
 452 // Mark this g as m's idle goroutine.
 453 // This functionality might be used in environments where programs
 454 // are limited to a single thread, to simulate a select-driven
 455 // network server.  It is not exposed via the standard runtime API.
 456 void
 457 runtime_idlegoroutine(void)
 458 {
 459         if(g->idlem != nil)
 460                 runtime_throw("g is already an idle goroutine");
 461         g->idlem = m;
 462 }
 463
 464 static void
 465 mcommoninit(M *m)
 466 {
 467         // Add to runtime_allm so garbage collector doesn't free m
 468         // when it is just in a register or thread-local storage.
 469         m->alllink = runtime_allm;
 470         // runtime_Cgocalls() iterates over allm w/o schedlock,
 471         // so we need to publish it safely.
 472         runtime_atomicstorep((void**)&runtime_allm, m);
 473
 474         m->id = runtime_sched.mcount++;
 475         m->fastrand = 0x49f6428aUL + m->id;
 476
 477         if(m->mcache == nil)
 478                 m->mcache = runtime_allocmcache();
 479 }
 480
 481 // Try to increment mcpu.  Report whether succeeded.
 482 static bool
 483 canaddmcpu(void)
 484 {
 485         uint32 v;
 486
 487         for(;;) {
 488                 v = runtime_sched.atomic;
 489                 if(atomic_mcpu(v) >= atomic_mcpumax(v))
 490                         return 0;
 491                 if(runtime_cas(&runtime_sched.atomic, v, v+(1<<mcpuShift)))
 492                         return 1;
 493         }
 494 }
 495
 496 // Put on `g' queue.  Sched must be locked.
 497 static void
 498 gput(G *g)
 499 {
 500         M *m;
 501
 502         // If g is wired, hand it off directly.
 503         if((m = g->lockedm) != nil && canaddmcpu()) {
 504                 mnextg(m, g);
 505                 return;
 506         }
 507
 508         // If g is the idle goroutine for an m, hand it off.
 509         if(g->idlem != nil) {
 510                 if(g->idlem->idleg != nil) {
 511                         runtime_printf("m%d idle out of sync: g%d g%d\n",
 512                                 g->idlem->id,
 513                                 g->idlem->idleg->goid, g->goid);
 514                         runtime_throw("runtime: double idle");
 515                 }
 516                 g->idlem->idleg = g;
 517                 return;
 518         }
 519
 520         g->schedlink = nil;
 521         if(runtime_sched.ghead == nil)
 522                 runtime_sched.ghead = g;
 523         else
 524                 runtime_sched.gtail->schedlink = g;
 525         runtime_sched.gtail = g;
 526
 527         // increment gwait.
 528         // if it transitions to nonzero, set atomic gwaiting bit.
 529         if(runtime_sched.gwait++ == 0)
 530                 runtime_xadd(&runtime_sched.atomic, 1<<gwaitingShift);
 531 }
 532
 533 // Report whether gget would return something.
 534 static bool
 535 haveg(void)
 536 {
 537         return runtime_sched.ghead != nil || m->idleg != nil;
 538 }
 539
 540 // Get from `g' queue.  Sched must be locked.
 541 static G*
 542 gget(void)
 543 {
 544         G *g;
 545
 546         g = runtime_sched.ghead;
 547         if(g){
 548                 runtime_sched.ghead = g->schedlink;
 549                 if(runtime_sched.ghead == nil)
 550                         runtime_sched.gtail = nil;
 551                 // decrement gwait.
 552                 // if it transitions to zero, clear atomic gwaiting bit.
 553                 if(--runtime_sched.gwait == 0)
 554                         runtime_xadd(&runtime_sched.atomic, -1<<gwaitingShift);
 555         } else if(m->idleg != nil) {
 556                 g = m->idleg;
 557                 m->idleg = nil;
 558         }
 559         return g;
 560 }
 561
 562 // Put on `m' list.  Sched must be locked.
 563 static void
 564 mput(M *m)
 565 {
 566         m->schedlink = runtime_sched.mhead;
 567         runtime_sched.mhead = m;
 568         runtime_sched.mwait++;
 569 }
 570
 571 // Get an `m' to run `g'.  Sched must be locked.
 572 static M*
 573 mget(G *g)
 574 {
 575         M *m;
 576
 577         // if g has its own m, use it.
 578         if(g && (m = g->lockedm) != nil)
 579                 return m;
 580
 581         // otherwise use general m pool.
 582         if((m = runtime_sched.mhead) != nil){
 583                 runtime_sched.mhead = m->schedlink;
 584                 runtime_sched.mwait--;
 585         }
 586         return m;
 587 }
 588
 589 // Mark g ready to run.
 590 void
 591 runtime_ready(G *g)
 592 {
 593         schedlock();
 594         readylocked(g);
 595         schedunlock();
 596 }
 597
 598 // Mark g ready to run.  Sched is already locked.
 599 // G might be running already and about to stop.
 600 // The sched lock protects g->status from changing underfoot.
 601 static void
 602 readylocked(G *g)
 603 {
 604         if(g->m){
 605                 // Running on another machine.
 606                 // Ready it when it stops.
 607                 g->readyonstop = 1;
 608                 return;
 609         }
 610
 611         // Mark runnable.
 612         if(g->status == Grunnable || g->status == Grunning) {
 613                 runtime_printf("goroutine %d has status %d\n", g->goid, g->status);
 614                 runtime_throw("bad g->status in ready");
 615         }
 616         g->status = Grunnable;
 617
 618         gput(g);
 619         matchmg();
 620 }
 621
 622 // Same as readylocked but a different symbol so that
 623 // debuggers can set a breakpoint here and catch all
 624 // new goroutines.
 625 static void
 626 newprocreadylocked(G *g)
 627 {
 628         readylocked(g);
 629 }
 630
 631 // Pass g to m for running.
 632 // Caller has already incremented mcpu.
 633 static void
 634 mnextg(M *m, G *g)
 635 {
 636         runtime_sched.grunning++;
 637         m->nextg = g;
 638         if(m->waitnextg) {
 639                 m->waitnextg = 0;
 640                 if(mwakeup != nil)
 641                         runtime_notewakeup(&mwakeup->havenextg);
 642                 mwakeup = m;
 643         }
 644 }
 645
 646 // Get the next goroutine that m should run.
 647 // Sched must be locked on entry, is unlocked on exit.
 648 // Makes sure that at most $GOMAXPROCS g's are
 649 // running on cpus (not in system calls) at any given time.
 650 static G*
 651 nextgandunlock(void)
 652 {
 653         G *gp;
 654         uint32 v;
 655
 656 top:
 657         if(atomic_mcpu(runtime_sched.atomic) >= maxgomaxprocs)
 658                 runtime_throw("negative mcpu");
 659
 660         // If there is a g waiting as m->nextg, the mcpu++
 661         // happened before it was passed to mnextg.
 662         if(m->nextg != nil) {
 663                 gp = m->nextg;
 664                 m->nextg = nil;
 665                 schedunlock();
 666                 return gp;
 667         }
 668
 669         if(m->lockedg != nil) {
 670                 // We can only run one g, and it's not available.
 671                 // Make sure some other cpu is running to handle
 672                 // the ordinary run queue.
 673                 if(runtime_sched.gwait != 0) {
 674                         matchmg();
 675                         // m->lockedg might have been on the queue.
 676                         if(m->nextg != nil) {
 677                                 gp = m->nextg;
 678                                 m->nextg = nil;
 679                                 schedunlock();
 680                                 return gp;
 681                         }
 682                 }
 683         } else {
 684                 // Look for work on global queue.
 685                 while(haveg() && canaddmcpu()) {
 686                         gp = gget();
 687                         if(gp == nil)
 688                                 runtime_throw("gget inconsistency");
 689
 690                         if(gp->lockedm) {
 691                                 mnextg(gp->lockedm, gp);
 692                                 continue;
 693                         }
 694                         runtime_sched.grunning++;
 695                         schedunlock();
 696                         return gp;
 697                 }
 698
 699                 // The while loop ended either because the g queue is empty
 700                 // or because we have maxed out our m procs running go
 701                 // code (mcpu >= mcpumax).  We need to check that
 702                 // concurrent actions by entersyscall/exitsyscall cannot
 703                 // invalidate the decision to end the loop.
 704                 //
 705                 // We hold the sched lock, so no one else is manipulating the
 706                 // g queue or changing mcpumax.  Entersyscall can decrement
 707                 // mcpu, but if does so when there is something on the g queue,
 708                 // the gwait bit will be set, so entersyscall will take the slow path
 709                 // and use the sched lock.  So it cannot invalidate our decision.
 710                 //
 711                 // Wait on global m queue.
 712                 mput(m);
 713         }
 714
 715         v = runtime_atomicload(&runtime_sched.atomic);
 716         if(runtime_sched.grunning == 0)
 717                 runtime_throw("all goroutines are asleep - deadlock!");
 718         m->nextg = nil;
 719         m->waitnextg = 1;
 720         runtime_noteclear(&m->havenextg);
 721
 722         // Stoptheworld is waiting for all but its cpu to go to stop.
 723         // Entersyscall might have decremented mcpu too, but if so
 724         // it will see the waitstop and take the slow path.
 725         // Exitsyscall never increments mcpu beyond mcpumax.
 726         if(atomic_waitstop(v) && atomic_mcpu(v) <= atomic_mcpumax(v)) {
 727                 // set waitstop = 0 (known to be 1)
 728                 runtime_xadd(&runtime_sched.atomic, -1<<waitstopShift);
 729                 runtime_notewakeup(&runtime_sched.stopped);
 730         }
 731         schedunlock();
 732
 733         runtime_notesleep(&m->havenextg);
 734         if(m->helpgc) {
 735                 runtime_gchelper();
 736                 m->helpgc = 0;
 737                 runtime_lock(&runtime_sched);
 738                 goto top;
 739         }
 740         if((gp = m->nextg) == nil)
 741                 runtime_throw("bad m->nextg in nextgoroutine");
 742         m->nextg = nil;
 743         return gp;
 744 }
 745
 746 int32
 747 runtime_helpgc(bool *extra)
 748 {
 749         M *mp;
 750         int32 n, max;
 751
 752         // Figure out how many CPUs to use.
 753         // Limited by gomaxprocs, number of actual CPUs, and MaxGcproc.
 754         max = runtime_gomaxprocs;
 755         if(max > runtime_ncpu)
 756                 max = runtime_ncpu > 0 ? runtime_ncpu : 1;
 757         if(max > MaxGcproc)
 758                 max = MaxGcproc;
 759
 760         // We're going to use one CPU no matter what.
 761         // Figure out the max number of additional CPUs.
 762         max--;
 763
 764         runtime_lock(&runtime_sched);
 765         n = 0;
 766         while(n < max && (mp = mget(nil)) != nil) {
 767                 n++;
 768                 mp->helpgc = 1;
 769                 mp->waitnextg = 0;
 770                 runtime_notewakeup(&mp->havenextg);
 771         }
 772         runtime_unlock(&runtime_sched);
 773         if(extra)
 774                 *extra = n != max;
 775         return n;
 776 }
 777
 778 void
 779 runtime_stoptheworld(void)
 780 {
 781         uint32 v;
 782
 783         schedlock();
 784         runtime_gcwaiting = 1;
 785
 786         setmcpumax(1);
 787
 788         // while mcpu > 1
 789         for(;;) {
 790                 v = runtime_sched.atomic;
 791                 if(atomic_mcpu(v) <= 1)
 792                         break;
 793
 794                 // It would be unsafe for multiple threads to be using
 795                 // the stopped note at once, but there is only
 796                 // ever one thread doing garbage collection.
 797                 runtime_noteclear(&runtime_sched.stopped);
 798                 if(atomic_waitstop(v))
 799                         runtime_throw("invalid waitstop");
 800
 801                 // atomic { waitstop = 1 }, predicated on mcpu <= 1 check above
 802                 // still being true.
 803                 if(!runtime_cas(&runtime_sched.atomic, v, v+(1<<waitstopShift)))
 804                         continue;
 805
 806                 schedunlock();
 807                 runtime_notesleep(&runtime_sched.stopped);
 808                 schedlock();
 809         }
 810         runtime_singleproc = runtime_gomaxprocs == 1;
 811         schedunlock();
 812 }
 813
 814 void
 815 runtime_starttheworld(bool extra)
 816 {
 817         M *m;
 818
 819         schedlock();
 820         runtime_gcwaiting = 0;
 821         setmcpumax(runtime_gomaxprocs);
 822         matchmg();
 823         if(extra && canaddmcpu()) {
 824                 // Start a new m that will (we hope) be idle
 825                 // and so available to help when the next
 826                 // garbage collection happens.
 827                 // canaddmcpu above did mcpu++
 828                 // (necessary, because m will be doing various
 829                 // initialization work so is definitely running),
 830                 // but m is not running a specific goroutine,
 831                 // so set the helpgc flag as a signal to m's
 832                 // first schedule(nil) to mcpu-- and grunning--.
 833                 m = runtime_newm();
 834                 m->helpgc = 1;
 835                 runtime_sched.grunning++;
 836         }
 837         schedunlock();
 838 }
 839
 840 // Called to start an M.
 841 void*
 842 runtime_mstart(void* mp)
 843 {
 844         m = (M*)mp;
 845         g = m->g0;
 846
 847         g->entry = nil;
 848         g->param = nil;
 849
 850         // Record top of stack for use by mcall.
 851         // Once we call schedule we're never coming back,
 852         // so other calls can reuse this stack space.
 853 #ifdef USING_SPLIT_STACK
 854         __splitstack_getcontext(&g->stack_context[0]);
 855 #else
 856         g->gcinitial_sp = &mp;
 857         g->gcstack_size = StackMin;
 858         g->gcnext_sp = &mp;
 859 #endif
 860         getcontext(&g->context);
 861
 862         if(g->entry != nil) {
 863                 // Got here from mcall.
 864                 void (*pfn)(G*) = (void (*)(G*))g->entry;
 865                 G* gp = (G*)g->param;
 866                 pfn(gp);
 867                 *(int*)0x21 = 0x21;
 868         }
 869         runtime_minit();
 870
 871 #ifdef USING_SPLIT_STACK
 872         {
 873           int dont_block_signals = 0;
 874           __splitstack_block_signals(&dont_block_signals, nil);
 875         }
 876 #endif
 877
 878         schedule(nil);
 879         return nil;
 880 }
 881
 882 typedef struct CgoThreadStart CgoThreadStart;
 883 struct CgoThreadStart
 884 {
 885         M *m;
 886         G *g;
 887         void (*fn)(void);
 888 };
 889
 890 // Kick off new m's as needed (up to mcpumax).
 891 // Sched is locked.
 892 static void
 893 matchmg(void)
 894 {
 895         G *gp;
 896         M *mp;
 897
 898         if(m->mallocing || m->gcing)
 899                 return;
 900
 901         while(haveg() && canaddmcpu()) {
 902                 gp = gget();
 903                 if(gp == nil)
 904                         runtime_throw("gget inconsistency");
 905
 906                 // Find the m that will run gp.
 907                 if((mp = mget(gp)) == nil)
 908                         mp = runtime_newm();
 909                 mnextg(mp, gp);
 910         }
 911 }
 912
 913 // Create a new m.  It will start off with a call to runtime_mstart.
 914 M*
 915 runtime_newm(void)
 916 {
 917         M *m;
 918         pthread_attr_t attr;
 919         pthread_t tid;
 920
 921         m = runtime_malloc(sizeof(M));
 922         mcommoninit(m);
 923         m->g0 = runtime_malg(-1, nil, nil);
 924
 925         if(pthread_attr_init(&attr) != 0)
 926                 runtime_throw("pthread_attr_init");
 927         if(pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED) != 0)
 928                 runtime_throw("pthread_attr_setdetachstate");
 929
 930 #ifndef PTHREAD_STACK_MIN
 931 #define PTHREAD_STACK_MIN 8192
 932 #endif
 933         if(pthread_attr_setstacksize(&attr, PTHREAD_STACK_MIN) != 0)
 934                 runtime_throw("pthread_attr_setstacksize");
 935
 936         if(pthread_create(&tid, &attr, runtime_mstart, m) != 0)
 937                 runtime_throw("pthread_create");
 938
 939         return m;
 940 }
 941
 942 // One round of scheduler: find a goroutine and run it.
 943 // The argument is the goroutine that was running before
 944 // schedule was called, or nil if this is the first call.
 945 // Never returns.
 946 static void
 947 schedule(G *gp)
 948 {
 949         int32 hz;
 950         uint32 v;
 951
 952         schedlock();
 953         if(gp != nil) {
 954                 // Just finished running gp.
 955                 gp->m = nil;
 956                 runtime_sched.grunning--;
 957
 958                 // atomic { mcpu-- }
 959                 v = runtime_xadd(&runtime_sched.atomic, -1<<mcpuShift);
 960                 if(atomic_mcpu(v) > maxgomaxprocs)
 961                         runtime_throw("negative mcpu in scheduler");
 962
 963                 switch(gp->status){
 964                 case Grunnable:
 965                 case Gdead:
 966                         // Shouldn't have been running!
 967                         runtime_throw("bad gp->status in sched");
 968                 case Grunning:
 969                         gp->status = Grunnable;
 970                         gput(gp);
 971                         break;
 972                 case Gmoribund:
 973                         gp->status = Gdead;
 974                         if(gp->lockedm) {
 975                                 gp->lockedm = nil;
 976                                 m->lockedg = nil;
 977                         }
 978                         gp->idlem = nil;
 979                         gfput(gp);
 980                         if(--runtime_sched.gcount == 0)
 981                                 runtime_exit(0);
 982                         break;
 983                 }
 984                 if(gp->readyonstop){
 985                         gp->readyonstop = 0;
 986                         readylocked(gp);
 987                 }
 988         } else if(m->helpgc) {
 989                 // Bootstrap m or new m started by starttheworld.
 990                 // atomic { mcpu-- }
 991                 v = runtime_xadd(&runtime_sched.atomic, -1<<mcpuShift);
 992                 if(atomic_mcpu(v) > maxgomaxprocs)
 993                         runtime_throw("negative mcpu in scheduler");
 994                 // Compensate for increment in starttheworld().
 995                 runtime_sched.grunning--;
 996                 m->helpgc = 0;
 997         } else if(m->nextg != nil) {
 998                 // New m started by matchmg.
 999         } else {
1000                 runtime_throw("invalid m state in scheduler");
1001         }
1002
1003         // Find (or wait for) g to run.  Unlocks runtime_sched.
1004         gp = nextgandunlock();
1005         gp->readyonstop = 0;
1006         gp->status = Grunning;
1007         m->curg = gp;
1008         gp->m = m;
1009
1010         // Check whether the profiler needs to be turned on or off.
1011         hz = runtime_sched.profilehz;
1012         if(m->profilehz != hz)
1013                 runtime_resetcpuprofiler(hz);
1014
1015         runtime_gogo(gp);
1016 }
1017
1018 // Enter scheduler.  If g->status is Grunning,
1019 // re-queues g and runs everyone else who is waiting
1020 // before running g again.  If g->status is Gmoribund,
1021 // kills off g.
1022 void
1023 runtime_gosched(void)
1024 {
1025         if(m->locks != 0)
1026                 runtime_throw("gosched holding locks");
1027         if(g == m->g0)
1028                 runtime_throw("gosched of g0");
1029         runtime_mcall(schedule);
1030 }
1031
1032 // The goroutine g is about to enter a system call.
1033 // Record that it's not using the cpu anymore.
1034 // This is called only from the go syscall library and cgocall,
1035 // not from the low-level system calls used by the runtime.
1036 //
1037 // Entersyscall cannot split the stack: the runtime_gosave must
1038 // make g->sched refer to the caller's stack segment, because
1039 // entersyscall is going to return immediately after.
1040 // It's okay to call matchmg and notewakeup even after
1041 // decrementing mcpu, because we haven't released the
1042 // sched lock yet, so the garbage collector cannot be running.
1043
1044 void runtime_entersyscall(void) __attribute__ ((no_split_stack));
1045
1046 void
1047 runtime_entersyscall(void)
1048 {
1049         uint32 v;
1050
1051         // Leave SP around for gc and traceback.
1052 #ifdef USING_SPLIT_STACK
1053         g->gcstack = __splitstack_find(NULL, NULL, &g->gcstack_size,
1054                                        &g->gcnext_segment, &g->gcnext_sp,
1055                                        &g->gcinitial_sp);
1056 #else
1057         g->gcnext_sp = (byte *) &v;
1058 #endif
1059
1060         // Save the registers in the g structure so that any pointers
1061         // held in registers will be seen by the garbage collector.
1062         // We could use getcontext here, but setjmp is more efficient
1063         // because it doesn't need to save the signal mask.
1064         setjmp(g->gcregs);
1065
1066         g->status = Gsyscall;
1067
1068         // Fast path.
1069         // The slow path inside the schedlock/schedunlock will get
1070         // through without stopping if it does:
1071         //      mcpu--
1072         //      gwait not true
1073         //      waitstop && mcpu <= mcpumax not true
1074         // If we can do the same with a single atomic add,
1075         // then we can skip the locks.
1076         v = runtime_xadd(&runtime_sched.atomic, -1<<mcpuShift);
1077         if(!atomic_gwaiting(v) && (!atomic_waitstop(v) || atomic_mcpu(v) > atomic_mcpumax(v)))
1078                 return;
1079
1080         schedlock();
1081         v = runtime_atomicload(&runtime_sched.atomic);
1082         if(atomic_gwaiting(v)) {
1083                 matchmg();
1084                 v = runtime_atomicload(&runtime_sched.atomic);
1085         }
1086         if(atomic_waitstop(v) && atomic_mcpu(v) <= atomic_mcpumax(v)) {
1087                 runtime_xadd(&runtime_sched.atomic, -1<<waitstopShift);
1088                 runtime_notewakeup(&runtime_sched.stopped);
1089         }
1090
1091         schedunlock();
1092 }
1093
1094 // The goroutine g exited its system call.
1095 // Arrange for it to run on a cpu again.
1096 // This is called only from the go syscall library, not
1097 // from the low-level system calls used by the runtime.
1098 void
1099 runtime_exitsyscall(void)
1100 {
1101         G *gp;
1102         uint32 v;
1103
1104         // Fast path.
1105         // If we can do the mcpu++ bookkeeping and
1106         // find that we still have mcpu <= mcpumax, then we can
1107         // start executing Go code immediately, without having to
1108         // schedlock/schedunlock.
1109         gp = g;
1110         v = runtime_xadd(&runtime_sched.atomic, (1<<mcpuShift));
1111         if(m->profilehz == runtime_sched.profilehz && atomic_mcpu(v) <= atomic_mcpumax(v)) {
1112                 // There's a cpu for us, so we can run.
1113                 gp->status = Grunning;
1114                 // Garbage collector isn't running (since we are),
1115                 // so okay to clear gcstack.
1116 #ifdef USING_SPLIT_STACK
1117                 gp->gcstack = nil;
1118 #endif
1119                 gp->gcnext_sp = nil;
1120                 runtime_memclr(gp->gcregs, sizeof gp->gcregs);
1121                 return;
1122         }
1123
1124         // Tell scheduler to put g back on the run queue:
1125         // mostly equivalent to g->status = Grunning,
1126         // but keeps the garbage collector from thinking
1127         // that g is running right now, which it's not.
1128         gp->readyonstop = 1;
1129
1130         // All the cpus are taken.
1131         // The scheduler will ready g and put this m to sleep.
1132         // When the scheduler takes g away from m,
1133         // it will undo the runtime_sched.mcpu++ above.
1134         runtime_gosched();
1135
1136         // Gosched returned, so we're allowed to run now.
1137         // Delete the gcstack information that we left for
1138         // the garbage collector during the system call.
1139         // Must wait until now because until gosched returns
1140         // we don't know for sure that the garbage collector
1141         // is not running.
1142 #ifdef USING_SPLIT_STACK
1143         gp->gcstack = nil;
1144 #endif
1145         gp->gcnext_sp = nil;
1146         runtime_memclr(gp->gcregs, sizeof gp->gcregs);
1147 }
1148
1149 // Allocate a new g, with a stack big enough for stacksize bytes.
1150 G*
1151 runtime_malg(int32 stacksize, byte** ret_stack, size_t* ret_stacksize)
1152 {
1153         G *newg;
1154
1155         newg = runtime_malloc(sizeof(G));
1156         if(stacksize >= 0) {
1157 #if USING_SPLIT_STACK
1158                 int dont_block_signals = 0;
1159
1160                 *ret_stack = __splitstack_makecontext(stacksize,
1161                                                       &newg->stack_context[0],
1162                                                       ret_stacksize);
1163                 __splitstack_block_signals_context(&newg->stack_context[0],
1164                                                    &dont_block_signals, nil);
1165 #else
1166                 *ret_stack = runtime_mallocgc(stacksize, FlagNoProfiling|FlagNoGC, 0, 0);
1167                 *ret_stacksize = stacksize;
1168                 newg->gcinitial_sp = *ret_stack;
1169                 newg->gcstack_size = stacksize;
1170 #endif
1171         }
1172         return newg;
1173 }
1174
1175 /* For runtime package testing.  */
1176
1177 void runtime_testing_entersyscall(void)
1178   __asm__("libgo_runtime.runtime.entersyscall");
1179
1180 void
1181 runtime_testing_entersyscall()
1182 {
1183         runtime_entersyscall();
1184 }
1185
1186 void runtime_testing_exitsyscall(void)
1187   __asm__("libgo_runtime.runtime.exitsyscall");
1188
1189 void
1190 runtime_testing_exitsyscall()
1191 {
1192         runtime_exitsyscall();
1193 }
1194
1195 G*
1196 __go_go(void (*fn)(void*), void* arg)
1197 {
1198         byte *sp;
1199         size_t spsize;
1200         G * volatile newg;      // volatile to avoid longjmp warning
1201
1202         schedlock();
1203
1204         if((newg = gfget()) != nil){
1205 #ifdef USING_SPLIT_STACK
1206                 int dont_block_signals = 0;
1207
1208                 sp = __splitstack_resetcontext(&newg->stack_context[0],
1209                                                &spsize);
1210                 __splitstack_block_signals_context(&newg->stack_context[0],
1211                                                    &dont_block_signals, nil);
1212 #else
1213                 sp = newg->gcinitial_sp;
1214                 spsize = newg->gcstack_size;
1215                 newg->gcnext_sp = sp;
1216 #endif
1217         } else {
1218                 newg = runtime_malg(StackMin, &sp, &spsize);
1219                 if(runtime_lastg == nil)
1220                         runtime_allg = newg;
1221                 else
1222                         runtime_lastg->alllink = newg;
1223                 runtime_lastg = newg;
1224         }
1225         newg->status = Gwaiting;
1226         newg->waitreason = "new goroutine";
1227
1228         newg->entry = (byte*)fn;
1229         newg->param = arg;
1230         newg->gopc = (uintptr)__builtin_return_address(0);
1231
1232         runtime_sched.gcount++;
1233         runtime_sched.goidgen++;
1234         newg->goid = runtime_sched.goidgen;
1235
1236         if(sp == nil)
1237                 runtime_throw("nil g->stack0");
1238
1239         getcontext(&newg->context);
1240         newg->context.uc_stack.ss_sp = sp;
1241         newg->context.uc_stack.ss_size = spsize;
1242         makecontext(&newg->context, kickoff, 0);
1243
1244         newprocreadylocked(newg);
1245         schedunlock();
1246
1247         return newg;
1248 //printf(" goid=%d\n", newg->goid);
1249 }
1250
1251 // Put on gfree list.  Sched must be locked.
1252 static void
1253 gfput(G *g)
1254 {
1255         g->schedlink = runtime_sched.gfree;
1256         runtime_sched.gfree = g;
1257 }
1258
1259 // Get from gfree list.  Sched must be locked.
1260 static G*
1261 gfget(void)
1262 {
1263         G *g;
1264
1265         g = runtime_sched.gfree;
1266         if(g)
1267                 runtime_sched.gfree = g->schedlink;
1268         return g;
1269 }
1270
1271 // Run all deferred functions for the current goroutine.
1272 static void
1273 rundefer(void)
1274 {
1275         Defer *d;
1276
1277         while((d = g->defer) != nil) {
1278                 void (*pfn)(void*);
1279
1280                 pfn = d->__pfn;
1281                 d->__pfn = nil;
1282                 if (pfn != nil)
1283                         (*pfn)(d->__arg);
1284                 g->defer = d->__next;
1285                 runtime_free(d);
1286         }
1287 }
1288
1289 void runtime_Goexit (void) asm ("libgo_runtime.runtime.Goexit");
1290
1291 void
1292 runtime_Goexit(void)
1293 {
1294         rundefer();
1295         runtime_goexit();
1296 }
1297
1298 void runtime_Gosched (void) asm ("libgo_runtime.runtime.Gosched");
1299
1300 void
1301 runtime_Gosched(void)
1302 {
1303         runtime_gosched();
1304 }
1305
1306 // Implementation of runtime.GOMAXPROCS.
1307 // delete when scheduler is stronger
1308 int32
1309 runtime_gomaxprocsfunc(int32 n)
1310 {
1311         int32 ret;
1312         uint32 v;
1313
1314         schedlock();
1315         ret = runtime_gomaxprocs;
1316         if(n <= 0)
1317                 n = ret;
1318         if(n > maxgomaxprocs)
1319                 n = maxgomaxprocs;
1320         runtime_gomaxprocs = n;
1321         if(runtime_gomaxprocs > 1)
1322                 runtime_singleproc = false;
1323         if(runtime_gcwaiting != 0) {
1324                 if(atomic_mcpumax(runtime_sched.atomic) != 1)
1325                         runtime_throw("invalid mcpumax during gc");
1326                 schedunlock();
1327                 return ret;
1328         }
1329
1330         setmcpumax(n);
1331
1332         // If there are now fewer allowed procs
1333         // than procs running, stop.
1334         v = runtime_atomicload(&runtime_sched.atomic);
1335         if((int32)atomic_mcpu(v) > n) {
1336                 schedunlock();
1337                 runtime_gosched();
1338                 return ret;
1339         }
1340         // handle more procs
1341         matchmg();
1342         schedunlock();
1343         return ret;
1344 }
1345
1346 void
1347 runtime_LockOSThread(void)
1348 {
1349         if(m == &runtime_m0 && runtime_sched.init) {
1350                 runtime_sched.lockmain = true;
1351                 return;
1352         }
1353         m->lockedg = g;
1354         g->lockedm = m;
1355 }
1356
1357 void
1358 runtime_UnlockOSThread(void)
1359 {
1360         if(m == &runtime_m0 && runtime_sched.init) {
1361                 runtime_sched.lockmain = false;
1362                 return;
1363         }
1364         m->lockedg = nil;
1365         g->lockedm = nil;
1366 }
1367
1368 bool
1369 runtime_lockedOSThread(void)
1370 {
1371         return g->lockedm != nil && m->lockedg != nil;
1372 }
1373
1374 // for testing of callbacks
1375
1376 _Bool runtime_golockedOSThread(void)
1377   asm("libgo_runtime.runtime.golockedOSThread");
1378
1379 _Bool
1380 runtime_golockedOSThread(void)
1381 {
1382         return runtime_lockedOSThread();
1383 }
1384
1385 // for testing of wire, unwire
1386 uint32
1387 runtime_mid()
1388 {
1389         return m->id;
1390 }
1391
1392 int32 runtime_Goroutines (void)
1393   __asm__ ("libgo_runtime.runtime.Goroutines");
1394
1395 int32
1396 runtime_Goroutines()
1397 {
1398         return runtime_sched.gcount;
1399 }
1400
1401 int32
1402 runtime_mcount(void)
1403 {
1404         return runtime_sched.mcount;
1405 }
1406
1407 static struct {
1408         Lock;
1409         void (*fn)(uintptr*, int32);
1410         int32 hz;
1411         uintptr pcbuf[100];
1412 } prof;
1413
1414 // Called if we receive a SIGPROF signal.
1415 void
1416 runtime_sigprof(uint8 *pc __attribute__ ((unused)),
1417                 uint8 *sp __attribute__ ((unused)),
1418                 uint8 *lr __attribute__ ((unused)),
1419                 G *gp __attribute__ ((unused)))
1420 {
1421         // int32 n;
1422
1423         if(prof.fn == nil || prof.hz == 0)
1424                 return;
1425
1426         runtime_lock(&prof);
1427         if(prof.fn == nil) {
1428                 runtime_unlock(&prof);
1429                 return;
1430         }
1431         // n = runtime_gentraceback(pc, sp, lr, gp, 0, prof.pcbuf, nelem(prof.pcbuf));
1432         // if(n > 0)
1433         //      prof.fn(prof.pcbuf, n);
1434         runtime_unlock(&prof);
1435 }
1436
1437 // Arrange to call fn with a traceback hz times a second.
1438 void
1439 runtime_setcpuprofilerate(void (*fn)(uintptr*, int32), int32 hz)
1440 {
1441         // Force sane arguments.
1442         if(hz < 0)
1443                 hz = 0;
1444         if(hz == 0)
1445                 fn = nil;
1446         if(fn == nil)
1447                 hz = 0;
1448
1449         // Stop profiler on this cpu so that it is safe to lock prof.
1450         // if a profiling signal came in while we had prof locked,
1451         // it would deadlock.
1452         runtime_resetcpuprofiler(0);
1453
1454         runtime_lock(&prof);
1455         prof.fn = fn;
1456         prof.hz = hz;
1457         runtime_unlock(&prof);
1458         runtime_lock(&runtime_sched);
1459         runtime_sched.profilehz = hz;
1460         runtime_unlock(&runtime_sched);
1461
1462         if(hz != 0)
1463                 runtime_resetcpuprofiler(hz);
1464 }