libgo/runtime/proc.c

   1 // Copyright 2009 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 #include <limits.h>
   6 #include <signal.h>
   7 #include <stdlib.h>
   8 #include <pthread.h>
   9 #include <unistd.h>
  10
  11 #include "config.h"
  12
  13 #ifdef HAVE_DL_ITERATE_PHDR
  14 #include <link.h>
  15 #endif
  16
  17 #include "runtime.h"
  18 #include "arch.h"
  19 #include "defs.h"
  20 #include "malloc.h"
  21 #include "race.h"
  22 #include "go-type.h"
  23 #include "go-defer.h"
  24
  25 #ifdef USING_SPLIT_STACK
  26
  27 /* FIXME: These are not declared anywhere.  */
  28
  29 extern void __splitstack_getcontext(void *context[10]);
  30
  31 extern void __splitstack_setcontext(void *context[10]);
  32
  33 extern void *__splitstack_makecontext(size_t, void *context[10], size_t *);
  34
  35 extern void * __splitstack_resetcontext(void *context[10], size_t *);
  36
  37 extern void *__splitstack_find(void *, void *, size_t *, void **, void **,
  38                                void **);
  39
  40 extern void __splitstack_block_signals (int *, int *);
  41
  42 extern void __splitstack_block_signals_context (void *context[10], int *,
  43                                                 int *);
  44
  45 #endif
  46
  47 #ifndef PTHREAD_STACK_MIN
  48 # define PTHREAD_STACK_MIN 8192
  49 #endif
  50
  51 #if defined(USING_SPLIT_STACK) && defined(LINKER_SUPPORTS_SPLIT_STACK)
  52 # define StackMin PTHREAD_STACK_MIN
  53 #else
  54 # define StackMin 2 * 1024 * 1024
  55 #endif
  56
  57 uintptr runtime_stacks_sys;
  58
  59 static void gtraceback(G*);
  60
  61 #ifdef __rtems__
  62 #define __thread
  63 #endif
  64
  65 static __thread G *g;
  66 static __thread M *m;
  67
  68 #ifndef SETCONTEXT_CLOBBERS_TLS
  69
  70 static inline void
  71 initcontext(void)
  72 {
  73 }
  74
  75 static inline void
  76 fixcontext(ucontext_t *c __attribute__ ((unused)))
  77 {
  78 }
  79
  80 #else
  81
  82 # if defined(__x86_64__) && defined(__sun__)
  83
  84 // x86_64 Solaris 10 and 11 have a bug: setcontext switches the %fs
  85 // register to that of the thread which called getcontext.  The effect
  86 // is that the address of all __thread variables changes.  This bug
  87 // also affects pthread_self() and pthread_getspecific.  We work
  88 // around it by clobbering the context field directly to keep %fs the
  89 // same.
  90
  91 static __thread greg_t fs;
  92
  93 static inline void
  94 initcontext(void)
  95 {
  96         ucontext_t c;
  97
  98         getcontext(&c);
  99         fs = c.uc_mcontext.gregs[REG_FSBASE];
 100 }
 101
 102 static inline void
 103 fixcontext(ucontext_t* c)
 104 {
 105         c->uc_mcontext.gregs[REG_FSBASE] = fs;
 106 }
 107
 108 # elif defined(__NetBSD__)
 109
 110 // NetBSD has a bug: setcontext clobbers tlsbase, we need to save
 111 // and restore it ourselves.
 112
 113 static __thread __greg_t tlsbase;
 114
 115 static inline void
 116 initcontext(void)
 117 {
 118         ucontext_t c;
 119
 120         getcontext(&c);
 121         tlsbase = c.uc_mcontext._mc_tlsbase;
 122 }
 123
 124 static inline void
 125 fixcontext(ucontext_t* c)
 126 {
 127         c->uc_mcontext._mc_tlsbase = tlsbase;
 128 }
 129
 130 # else
 131
 132 #  error unknown case for SETCONTEXT_CLOBBERS_TLS
 133
 134 # endif
 135
 136 #endif
 137
 138 // We can not always refer to the TLS variables directly.  The
 139 // compiler will call tls_get_addr to get the address of the variable,
 140 // and it may hold it in a register across a call to schedule.  When
 141 // we get back from the call we may be running in a different thread,
 142 // in which case the register now points to the TLS variable for a
 143 // different thread.  We use non-inlinable functions to avoid this
 144 // when necessary.
 145
 146 G* runtime_g(void) __attribute__ ((noinline, no_split_stack));
 147
 148 G*
 149 runtime_g(void)
 150 {
 151         return g;
 152 }
 153
 154 M* runtime_m(void) __attribute__ ((noinline, no_split_stack));
 155
 156 M*
 157 runtime_m(void)
 158 {
 159         return m;
 160 }
 161
 162 // Set m and g.
 163 void
 164 runtime_setmg(M* mp, G* gp)
 165 {
 166         m = mp;
 167         g = gp;
 168 }
 169
 170 // The static TLS size.  See runtime_newm.
 171 static int tlssize;
 172
 173 // Start a new thread.
 174 static void
 175 runtime_newosproc(M *mp)
 176 {
 177         pthread_attr_t attr;
 178         size_t stacksize;
 179         sigset_t clear, old;
 180         pthread_t tid;
 181         int ret;
 182
 183         if(pthread_attr_init(&attr) != 0)
 184                 runtime_throw("pthread_attr_init");
 185         if(pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED) != 0)
 186                 runtime_throw("pthread_attr_setdetachstate");
 187
 188         stacksize = PTHREAD_STACK_MIN;
 189
 190         // With glibc before version 2.16 the static TLS size is taken
 191         // out of the stack size, and we get an error or a crash if
 192         // there is not enough stack space left.  Add it back in if we
 193         // can, in case the program uses a lot of TLS space.  FIXME:
 194         // This can be disabled in glibc 2.16 and later, if the bug is
 195         // indeed fixed then.
 196         stacksize += tlssize;
 197
 198         if(pthread_attr_setstacksize(&attr, stacksize) != 0)
 199                 runtime_throw("pthread_attr_setstacksize");
 200
 201         // Block signals during pthread_create so that the new thread
 202         // starts with signals disabled.  It will enable them in minit.
 203         sigfillset(&clear);
 204
 205 #ifdef SIGTRAP
 206         // Blocking SIGTRAP reportedly breaks gdb on Alpha GNU/Linux.
 207         sigdelset(&clear, SIGTRAP);
 208 #endif
 209
 210         sigemptyset(&old);
 211         sigprocmask(SIG_BLOCK, &clear, &old);
 212         ret = pthread_create(&tid, &attr, runtime_mstart, mp);
 213         sigprocmask(SIG_SETMASK, &old, nil);
 214
 215         if (ret != 0)
 216                 runtime_throw("pthread_create");
 217 }
 218
 219 // First function run by a new goroutine.  This replaces gogocall.
 220 static void
 221 kickoff(void)
 222 {
 223         void (*fn)(void*);
 224
 225         if(g->traceback != nil)
 226                 gtraceback(g);
 227
 228         fn = (void (*)(void*))(g->entry);
 229         fn(g->param);
 230         runtime_goexit();
 231 }
 232
 233 // Switch context to a different goroutine.  This is like longjmp.
 234 void runtime_gogo(G*) __attribute__ ((noinline));
 235 void
 236 runtime_gogo(G* newg)
 237 {
 238 #ifdef USING_SPLIT_STACK
 239         __splitstack_setcontext(&newg->stack_context[0]);
 240 #endif
 241         g = newg;
 242         newg->fromgogo = true;
 243         fixcontext(&newg->context);
 244         setcontext(&newg->context);
 245         runtime_throw("gogo setcontext returned");
 246 }
 247
 248 // Save context and call fn passing g as a parameter.  This is like
 249 // setjmp.  Because getcontext always returns 0, unlike setjmp, we use
 250 // g->fromgogo as a code.  It will be true if we got here via
 251 // setcontext.  g == nil the first time this is called in a new m.
 252 void runtime_mcall(void (*)(G*)) __attribute__ ((noinline));
 253 void
 254 runtime_mcall(void (*pfn)(G*))
 255 {
 256         M *mp;
 257         G *gp;
 258 #ifndef USING_SPLIT_STACK
 259         int i;
 260 #endif
 261
 262         // Ensure that all registers are on the stack for the garbage
 263         // collector.
 264         __builtin_unwind_init();
 265
 266         mp = m;
 267         gp = g;
 268         if(gp == mp->g0)
 269                 runtime_throw("runtime: mcall called on m->g0 stack");
 270
 271         if(gp != nil) {
 272
 273 #ifdef USING_SPLIT_STACK
 274                 __splitstack_getcontext(&g->stack_context[0]);
 275 #else
 276                 gp->gcnext_sp = &i;
 277 #endif
 278                 gp->fromgogo = false;
 279                 getcontext(&gp->context);
 280
 281                 // When we return from getcontext, we may be running
 282                 // in a new thread.  That means that m and g may have
 283                 // changed.  They are global variables so we will
 284                 // reload them, but the addresses of m and g may be
 285                 // cached in our local stack frame, and those
 286                 // addresses may be wrong.  Call functions to reload
 287                 // the values for this thread.
 288                 mp = runtime_m();
 289                 gp = runtime_g();
 290
 291                 if(gp->traceback != nil)
 292                         gtraceback(gp);
 293         }
 294         if (gp == nil || !gp->fromgogo) {
 295 #ifdef USING_SPLIT_STACK
 296                 __splitstack_setcontext(&mp->g0->stack_context[0]);
 297 #endif
 298                 mp->g0->entry = (byte*)pfn;
 299                 mp->g0->param = gp;
 300
 301                 // It's OK to set g directly here because this case
 302                 // can not occur if we got here via a setcontext to
 303                 // the getcontext call just above.
 304                 g = mp->g0;
 305
 306                 fixcontext(&mp->g0->context);
 307                 setcontext(&mp->g0->context);
 308                 runtime_throw("runtime: mcall function returned");
 309         }
 310 }
 311
 312 #ifdef HAVE_DL_ITERATE_PHDR
 313
 314 // Called via dl_iterate_phdr.
 315
 316 static int
 317 addtls(struct dl_phdr_info* info, size_t size __attribute__ ((unused)), void *data)
 318 {
 319         size_t *total = (size_t *)data;
 320         unsigned int i;
 321
 322         for(i = 0; i < info->dlpi_phnum; ++i) {
 323                 if(info->dlpi_phdr[i].p_type == PT_TLS)
 324                         *total += info->dlpi_phdr[i].p_memsz;
 325         }
 326         return 0;
 327 }
 328
 329 // Set the total TLS size.
 330
 331 static void
 332 inittlssize()
 333 {
 334         size_t total = 0;
 335
 336         dl_iterate_phdr(addtls, (void *)&total);
 337         tlssize = total;
 338 }
 339
 340 #else
 341
 342 static void
 343 inittlssize()
 344 {
 345 }
 346
 347 #endif
 348
 349 // Goroutine scheduler
 350 // The scheduler's job is to distribute ready-to-run goroutines over worker threads.
 351 //
 352 // The main concepts are:
 353 // G - goroutine.
 354 // M - worker thread, or machine.
 355 // P - processor, a resource that is required to execute Go code.
 356 //     M must have an associated P to execute Go code, however it can be
 357 //     blocked or in a syscall w/o an associated P.
 358 //
 359 // Design doc at http://golang.org/s/go11sched.
 360
 361 typedef struct Sched Sched;
 362 struct Sched {
 363         Lock;
 364
 365         uint64  goidgen;
 366         M*      midle;   // idle m's waiting for work
 367         int32   nmidle;  // number of idle m's waiting for work
 368         int32   nmidlelocked; // number of locked m's waiting for work
 369         int32   mcount;  // number of m's that have been created
 370         int32   maxmcount;      // maximum number of m's allowed (or die)
 371
 372         P*      pidle;  // idle P's
 373         uint32  npidle;
 374         uint32  nmspinning;
 375
 376         // Global runnable queue.
 377         G*      runqhead;
 378         G*      runqtail;
 379         int32   runqsize;
 380
 381         // Global cache of dead G's.
 382         Lock    gflock;
 383         G*      gfree;
 384
 385         uint32  gcwaiting;      // gc is waiting to run
 386         int32   stopwait;
 387         Note    stopnote;
 388         uint32  sysmonwait;
 389         Note    sysmonnote;
 390         uint64  lastpoll;
 391
 392         int32   profilehz;      // cpu profiling rate
 393 };
 394
 395 // The max value of GOMAXPROCS.
 396 // There are no fundamental restrictions on the value.
 397 enum { MaxGomaxprocs = 1<<8 };
 398
 399 Sched   runtime_sched;
 400 int32   runtime_gomaxprocs;
 401 uint32  runtime_needextram = 1;
 402 bool    runtime_iscgo = true;
 403 M       runtime_m0;
 404 G       runtime_g0;      // idle goroutine for m0
 405 G*      runtime_allg;
 406 G*      runtime_lastg;
 407 M*      runtime_allm;
 408 P**     runtime_allp;
 409 M*      runtime_extram;
 410 int8*   runtime_goos;
 411 int32   runtime_ncpu;
 412 bool    runtime_precisestack;
 413 static int32    newprocs;
 414
 415 void* runtime_mstart(void*);
 416 static void runqput(P*, G*);
 417 static G* runqget(P*);
 418 static void runqgrow(P*);
 419 static G* runqsteal(P*, P*);
 420 static void mput(M*);
 421 static M* mget(void);
 422 static void mcommoninit(M*);
 423 static void schedule(void);
 424 static void procresize(int32);
 425 static void acquirep(P*);
 426 static P* releasep(void);
 427 static void newm(void(*)(void), P*);
 428 static void stopm(void);
 429 static void startm(P*, bool);
 430 static void handoffp(P*);
 431 static void wakep(void);
 432 static void stoplockedm(void);
 433 static void startlockedm(G*);
 434 static void sysmon(void);
 435 static uint32 retake(int64);
 436 static void incidlelocked(int32);
 437 static void checkdead(void);
 438 static void exitsyscall0(G*);
 439 static void park0(G*);
 440 static void goexit0(G*);
 441 static void gfput(P*, G*);
 442 static G* gfget(P*);
 443 static void gfpurge(P*);
 444 static void globrunqput(G*);
 445 static G* globrunqget(P*, int32);
 446 static P* pidleget(void);
 447 static void pidleput(P*);
 448 static void injectglist(G*);
 449 static bool preemptall(void);
 450 static bool exitsyscallfast(void);
 451
 452 // The bootstrap sequence is:
 453 //
 454 //      call osinit
 455 //      call schedinit
 456 //      make & queue new G
 457 //      call runtime_mstart
 458 //
 459 // The new G calls runtime_main.
 460 void
 461 runtime_schedinit(void)
 462 {
 463         int32 n, procs;
 464         const byte *p;
 465         Eface i;
 466
 467         m = &runtime_m0;
 468         g = &runtime_g0;
 469         m->g0 = g;
 470         m->curg = g;
 471         g->m = m;
 472
 473         initcontext();
 474         inittlssize();
 475
 476         runtime_sched.maxmcount = 10000;
 477         runtime_precisestack = 0;
 478
 479         runtime_mprofinit();
 480         runtime_mallocinit();
 481         mcommoninit(m);
 482
 483         // Initialize the itable value for newErrorCString,
 484         // so that the next time it gets called, possibly
 485         // in a fault during a garbage collection, it will not
 486         // need to allocated memory.
 487         runtime_newErrorCString(0, &i);
 488
 489         runtime_goargs();
 490         runtime_goenvs();
 491         runtime_parsedebugvars();
 492
 493         runtime_sched.lastpoll = runtime_nanotime();
 494         procs = 1;
 495         p = runtime_getenv("GOMAXPROCS");
 496         if(p != nil && (n = runtime_atoi(p)) > 0) {
 497                 if(n > MaxGomaxprocs)
 498                         n = MaxGomaxprocs;
 499                 procs = n;
 500         }
 501         runtime_allp = runtime_malloc((MaxGomaxprocs+1)*sizeof(runtime_allp[0]));
 502         procresize(procs);
 503
 504         // Can not enable GC until all roots are registered.
 505         // mstats.enablegc = 1;
 506
 507         // if(raceenabled)
 508         //      g->racectx = runtime_raceinit();
 509 }
 510
 511 extern void main_init(void) __asm__ (GOSYM_PREFIX "__go_init_main");
 512 extern void main_main(void) __asm__ (GOSYM_PREFIX "main.main");
 513
 514 static void
 515 initDone(void *arg __attribute__ ((unused))) {
 516         runtime_unlockOSThread();
 517 };
 518
 519 // The main goroutine.
 520 void
 521 runtime_main(void* dummy __attribute__((unused)))
 522 {
 523         Defer d;
 524         _Bool frame;
 525
 526         newm(sysmon, nil);
 527
 528         // Lock the main goroutine onto this, the main OS thread,
 529         // during initialization.  Most programs won't care, but a few
 530         // do require certain calls to be made by the main thread.
 531         // Those can arrange for main.main to run in the main thread
 532         // by calling runtime.LockOSThread during initialization
 533         // to preserve the lock.
 534         runtime_lockOSThread();
 535
 536         // Defer unlock so that runtime.Goexit during init does the unlock too.
 537         d.__pfn = initDone;
 538         d.__next = g->defer;
 539         d.__arg = (void*)-1;
 540         d.__panic = g->panic;
 541         d.__retaddr = nil;
 542         d.__frame = &frame;
 543         g->defer = &d;
 544
 545         if(m != &runtime_m0)
 546                 runtime_throw("runtime_main not on m0");
 547         __go_go(runtime_MHeap_Scavenger, nil);
 548         main_init();
 549
 550         if(g->defer != &d || d.__pfn != initDone)
 551                 runtime_throw("runtime: bad defer entry after init");
 552         g->defer = d.__next;
 553         runtime_unlockOSThread();
 554
 555         // For gccgo we have to wait until after main is initialized
 556         // to enable GC, because initializing main registers the GC
 557         // roots.
 558         mstats.enablegc = 1;
 559
 560         main_main();
 561         if(raceenabled)
 562                 runtime_racefini();
 563
 564         // Make racy client program work: if panicking on
 565         // another goroutine at the same time as main returns,
 566         // let the other goroutine finish printing the panic trace.
 567         // Once it does, it will exit. See issue 3934.
 568         if(runtime_panicking)
 569                 runtime_park(nil, nil, "panicwait");
 570
 571         runtime_exit(0);
 572         for(;;)
 573                 *(int32*)0 = 0;
 574 }
 575
 576 void
 577 runtime_goroutineheader(G *gp)
 578 {
 579         const char *status;
 580
 581         switch(gp->status) {
 582         case Gidle:
 583                 status = "idle";
 584                 break;
 585         case Grunnable:
 586                 status = "runnable";
 587                 break;
 588         case Grunning:
 589                 status = "running";
 590                 break;
 591         case Gsyscall:
 592                 status = "syscall";
 593                 break;
 594         case Gwaiting:
 595                 if(gp->waitreason)
 596                         status = gp->waitreason;
 597                 else
 598                         status = "waiting";
 599                 break;
 600         default:
 601                 status = "???";
 602                 break;
 603         }
 604         runtime_printf("goroutine %D [%s]:\n", gp->goid, status);
 605 }
 606
 607 void
 608 runtime_printcreatedby(G *g)
 609 {
 610         if(g != nil && g->gopc != 0 && g->goid != 1) {
 611                 String fn;
 612                 String file;
 613                 intgo line;
 614
 615                 if(__go_file_line(g->gopc - 1, &fn, &file, &line)) {
 616                         runtime_printf("created by %S\n", fn);
 617                         runtime_printf("\t%S:%D\n", file, (int64) line);
 618                 }
 619         }
 620 }
 621
 622 struct Traceback
 623 {
 624         G* gp;
 625         Location locbuf[100];
 626         int32 c;
 627 };
 628
 629 void
 630 runtime_tracebackothers(G * volatile me)
 631 {
 632         G * volatile gp;
 633         Traceback tb;
 634         int32 traceback;
 635
 636         tb.gp = me;
 637         traceback = runtime_gotraceback(nil);
 638
 639         // Show the current goroutine first, if we haven't already.
 640         if((gp = m->curg) != nil && gp != me) {
 641                 runtime_printf("\n");
 642                 runtime_goroutineheader(gp);
 643                 gp->traceback = &tb;
 644
 645 #ifdef USING_SPLIT_STACK
 646                 __splitstack_getcontext(&me->stack_context[0]);
 647 #endif
 648                 getcontext(&me->context);
 649
 650                 if(gp->traceback != nil) {
 651                   runtime_gogo(gp);
 652                 }
 653
 654                 runtime_printtrace(tb.locbuf, tb.c, false);
 655                 runtime_printcreatedby(gp);
 656         }
 657
 658         for(gp = runtime_allg; gp != nil; gp = gp->alllink) {
 659                 if(gp == me || gp == m->curg || gp->status == Gdead)
 660                         continue;
 661                 if(gp->issystem && traceback < 2)
 662                         continue;
 663                 runtime_printf("\n");
 664                 runtime_goroutineheader(gp);
 665
 666                 // Our only mechanism for doing a stack trace is
 667                 // _Unwind_Backtrace.  And that only works for the
 668                 // current thread, not for other random goroutines.
 669                 // So we need to switch context to the goroutine, get
 670                 // the backtrace, and then switch back.
 671
 672                 // This means that if g is running or in a syscall, we
 673                 // can't reliably print a stack trace.  FIXME.
 674
 675                 if(gp->status == Grunning) {
 676                         runtime_printf("\tgoroutine running on other thread; stack unavailable\n");
 677                         runtime_printcreatedby(gp);
 678                 } else if(gp->status == Gsyscall) {
 679                         runtime_printf("\tgoroutine in C code; stack unavailable\n");
 680                         runtime_printcreatedby(gp);
 681                 } else {
 682                         gp->traceback = &tb;
 683
 684 #ifdef USING_SPLIT_STACK
 685                         __splitstack_getcontext(&me->stack_context[0]);
 686 #endif
 687                         getcontext(&me->context);
 688
 689                         if(gp->traceback != nil) {
 690                                 runtime_gogo(gp);
 691                         }
 692
 693                         runtime_printtrace(tb.locbuf, tb.c, false);
 694                         runtime_printcreatedby(gp);
 695                 }
 696         }
 697 }
 698
 699 static void
 700 checkmcount(void)
 701 {
 702         // sched lock is held
 703         if(runtime_sched.mcount > runtime_sched.maxmcount) {
 704                 runtime_printf("runtime: program exceeds %d-thread limit\n", runtime_sched.maxmcount);
 705                 runtime_throw("thread exhaustion");
 706         }
 707 }
 708
 709 // Do a stack trace of gp, and then restore the context to
 710 // gp->dotraceback.
 711
 712 static void
 713 gtraceback(G* gp)
 714 {
 715         Traceback* traceback;
 716
 717         traceback = gp->traceback;
 718         gp->traceback = nil;
 719         traceback->c = runtime_callers(1, traceback->locbuf,
 720                 sizeof traceback->locbuf / sizeof traceback->locbuf[0]);
 721         runtime_gogo(traceback->gp);
 722 }
 723
 724 static void
 725 mcommoninit(M *mp)
 726 {
 727         // If there is no mcache runtime_callers() will crash,
 728         // and we are most likely in sysmon thread so the stack is senseless anyway.
 729         if(m->mcache)
 730                 runtime_callers(1, mp->createstack, nelem(mp->createstack));
 731
 732         mp->fastrand = 0x49f6428aUL + mp->id + runtime_cputicks();
 733
 734         runtime_lock(&runtime_sched);
 735         mp->id = runtime_sched.mcount++;
 736         checkmcount();
 737         runtime_mpreinit(mp);
 738
 739         // Add to runtime_allm so garbage collector doesn't free m
 740         // when it is just in a register or thread-local storage.
 741         mp->alllink = runtime_allm;
 742         // runtime_NumCgoCall() iterates over allm w/o schedlock,
 743         // so we need to publish it safely.
 744         runtime_atomicstorep(&runtime_allm, mp);
 745         runtime_unlock(&runtime_sched);
 746 }
 747
 748 // Mark gp ready to run.
 749 void
 750 runtime_ready(G *gp)
 751 {
 752         // Mark runnable.
 753         m->locks++;  // disable preemption because it can be holding p in a local var
 754         if(gp->status != Gwaiting) {
 755                 runtime_printf("goroutine %D has status %d\n", gp->goid, gp->status);
 756                 runtime_throw("bad g->status in ready");
 757         }
 758         gp->status = Grunnable;
 759         runqput(m->p, gp);
 760         if(runtime_atomicload(&runtime_sched.npidle) != 0 && runtime_atomicload(&runtime_sched.nmspinning) == 0)  // TODO: fast atomic
 761                 wakep();
 762         m->locks--;
 763 }
 764
 765 int32
 766 runtime_gcprocs(void)
 767 {
 768         int32 n;
 769
 770         // Figure out how many CPUs to use during GC.
 771         // Limited by gomaxprocs, number of actual CPUs, and MaxGcproc.
 772         runtime_lock(&runtime_sched);
 773         n = runtime_gomaxprocs;
 774         if(n > runtime_ncpu)
 775                 n = runtime_ncpu > 0 ? runtime_ncpu : 1;
 776         if(n > MaxGcproc)
 777                 n = MaxGcproc;
 778         if(n > runtime_sched.nmidle+1) // one M is currently running
 779                 n = runtime_sched.nmidle+1;
 780         runtime_unlock(&runtime_sched);
 781         return n;
 782 }
 783
 784 static bool
 785 needaddgcproc(void)
 786 {
 787         int32 n;
 788
 789         runtime_lock(&runtime_sched);
 790         n = runtime_gomaxprocs;
 791         if(n > runtime_ncpu)
 792                 n = runtime_ncpu;
 793         if(n > MaxGcproc)
 794                 n = MaxGcproc;
 795         n -= runtime_sched.nmidle+1; // one M is currently running
 796         runtime_unlock(&runtime_sched);
 797         return n > 0;
 798 }
 799
 800 void
 801 runtime_helpgc(int32 nproc)
 802 {
 803         M *mp;
 804         int32 n, pos;
 805
 806         runtime_lock(&runtime_sched);
 807         pos = 0;
 808         for(n = 1; n < nproc; n++) {  // one M is currently running
 809                 if(runtime_allp[pos]->mcache == m->mcache)
 810                         pos++;
 811                 mp = mget();
 812                 if(mp == nil)
 813                         runtime_throw("runtime_gcprocs inconsistency");
 814                 mp->helpgc = n;
 815                 mp->mcache = runtime_allp[pos]->mcache;
 816                 pos++;
 817                 runtime_notewakeup(&mp->park);
 818         }
 819         runtime_unlock(&runtime_sched);
 820 }
 821
 822 // Similar to stoptheworld but best-effort and can be called several times.
 823 // There is no reverse operation, used during crashing.
 824 // This function must not lock any mutexes.
 825 void
 826 runtime_freezetheworld(void)
 827 {
 828         int32 i;
 829
 830         if(runtime_gomaxprocs == 1)
 831                 return;
 832         // stopwait and preemption requests can be lost
 833         // due to races with concurrently executing threads,
 834         // so try several times
 835         for(i = 0; i < 5; i++) {
 836                 // this should tell the scheduler to not start any new goroutines
 837                 runtime_sched.stopwait = 0x7fffffff;
 838                 runtime_atomicstore((uint32*)&runtime_sched.gcwaiting, 1);
 839                 // this should stop running goroutines
 840                 if(!preemptall())
 841                         break;  // no running goroutines
 842                 runtime_usleep(1000);
 843         }
 844         // to be sure
 845         runtime_usleep(1000);
 846         preemptall();
 847         runtime_usleep(1000);
 848 }
 849
 850 void
 851 runtime_stoptheworld(void)
 852 {
 853         int32 i;
 854         uint32 s;
 855         P *p;
 856         bool wait;
 857
 858         runtime_lock(&runtime_sched);
 859         runtime_sched.stopwait = runtime_gomaxprocs;
 860         runtime_atomicstore((uint32*)&runtime_sched.gcwaiting, 1);
 861         preemptall();
 862         // stop current P
 863         m->p->status = Pgcstop;
 864         runtime_sched.stopwait--;
 865         // try to retake all P's in Psyscall status
 866         for(i = 0; i < runtime_gomaxprocs; i++) {
 867                 p = runtime_allp[i];
 868                 s = p->status;
 869                 if(s == Psyscall && runtime_cas(&p->status, s, Pgcstop))
 870                         runtime_sched.stopwait--;
 871         }
 872         // stop idle P's
 873         while((p = pidleget()) != nil) {
 874                 p->status = Pgcstop;
 875                 runtime_sched.stopwait--;
 876         }
 877         wait = runtime_sched.stopwait > 0;
 878         runtime_unlock(&runtime_sched);
 879
 880         // wait for remaining P's to stop voluntarily
 881         if(wait) {
 882                 runtime_notesleep(&runtime_sched.stopnote);
 883                 runtime_noteclear(&runtime_sched.stopnote);
 884         }
 885         if(runtime_sched.stopwait)
 886                 runtime_throw("stoptheworld: not stopped");
 887         for(i = 0; i < runtime_gomaxprocs; i++) {
 888                 p = runtime_allp[i];
 889                 if(p->status != Pgcstop)
 890                         runtime_throw("stoptheworld: not stopped");
 891         }
 892 }
 893
 894 static void
 895 mhelpgc(void)
 896 {
 897         m->helpgc = -1;
 898 }
 899
 900 void
 901 runtime_starttheworld(void)
 902 {
 903         P *p, *p1;
 904         M *mp;
 905         G *gp;
 906         bool add;
 907
 908         m->locks++;  // disable preemption because it can be holding p in a local var
 909         gp = runtime_netpoll(false);  // non-blocking
 910         injectglist(gp);
 911         add = needaddgcproc();
 912         runtime_lock(&runtime_sched);
 913         if(newprocs) {
 914                 procresize(newprocs);
 915                 newprocs = 0;
 916         } else
 917                 procresize(runtime_gomaxprocs);
 918         runtime_sched.gcwaiting = 0;
 919
 920         p1 = nil;
 921         while((p = pidleget()) != nil) {
 922                 // procresize() puts p's with work at the beginning of the list.
 923                 // Once we reach a p without a run queue, the rest don't have one either.
 924                 if(p->runqhead == p->runqtail) {
 925                         pidleput(p);
 926                         break;
 927                 }
 928                 p->m = mget();
 929                 p->link = p1;
 930                 p1 = p;
 931         }
 932         if(runtime_sched.sysmonwait) {
 933                 runtime_sched.sysmonwait = false;
 934                 runtime_notewakeup(&runtime_sched.sysmonnote);
 935         }
 936         runtime_unlock(&runtime_sched);
 937
 938         while(p1) {
 939                 p = p1;
 940                 p1 = p1->link;
 941                 if(p->m) {
 942                         mp = p->m;
 943                         p->m = nil;
 944                         if(mp->nextp)
 945                                 runtime_throw("starttheworld: inconsistent mp->nextp");
 946                         mp->nextp = p;
 947                         runtime_notewakeup(&mp->park);
 948                 } else {
 949                         // Start M to run P.  Do not start another M below.
 950                         newm(nil, p);
 951                         add = false;
 952                 }
 953         }
 954
 955         if(add) {
 956                 // If GC could have used another helper proc, start one now,
 957                 // in the hope that it will be available next time.
 958                 // It would have been even better to start it before the collection,
 959                 // but doing so requires allocating memory, so it's tricky to
 960                 // coordinate.  This lazy approach works out in practice:
 961                 // we don't mind if the first couple gc rounds don't have quite
 962                 // the maximum number of procs.
 963                 newm(mhelpgc, nil);
 964         }
 965         m->locks--;
 966 }
 967
 968 // Called to start an M.
 969 void*
 970 runtime_mstart(void* mp)
 971 {
 972         m = (M*)mp;
 973         g = m->g0;
 974
 975         initcontext();
 976
 977         g->entry = nil;
 978         g->param = nil;
 979
 980         // Record top of stack for use by mcall.
 981         // Once we call schedule we're never coming back,
 982         // so other calls can reuse this stack space.
 983 #ifdef USING_SPLIT_STACK
 984         __splitstack_getcontext(&g->stack_context[0]);
 985 #else
 986         g->gcinitial_sp = &mp;
 987         // Setting gcstack_size to 0 is a marker meaning that gcinitial_sp
 988         // is the top of the stack, not the bottom.
 989         g->gcstack_size = 0;
 990         g->gcnext_sp = &mp;
 991 #endif
 992         getcontext(&g->context);
 993
 994         if(g->entry != nil) {
 995                 // Got here from mcall.
 996                 void (*pfn)(G*) = (void (*)(G*))g->entry;
 997                 G* gp = (G*)g->param;
 998                 pfn(gp);
 999                 *(int*)0x21 = 0x21;
1000         }
1001         runtime_minit();
1002
1003 #ifdef USING_SPLIT_STACK
1004         {
1005                 int dont_block_signals = 0;
1006                 __splitstack_block_signals(&dont_block_signals, nil);
1007         }
1008 #endif
1009
1010         // Install signal handlers; after minit so that minit can
1011         // prepare the thread to be able to handle the signals.
1012         if(m == &runtime_m0)
1013                 runtime_initsig();
1014
1015         if(m->mstartfn)
1016                 m->mstartfn();
1017
1018         if(m->helpgc) {
1019                 m->helpgc = 0;
1020                 stopm();
1021         } else if(m != &runtime_m0) {
1022                 acquirep(m->nextp);
1023                 m->nextp = nil;
1024         }
1025         schedule();
1026
1027         // TODO(brainman): This point is never reached, because scheduler
1028         // does not release os threads at the moment. But once this path
1029         // is enabled, we must remove our seh here.
1030
1031         return nil;
1032 }
1033
1034 typedef struct CgoThreadStart CgoThreadStart;
1035 struct CgoThreadStart
1036 {
1037         M *m;
1038         G *g;
1039         void (*fn)(void);
1040 };
1041
1042 // Allocate a new m unassociated with any thread.
1043 // Can use p for allocation context if needed.
1044 M*
1045 runtime_allocm(P *p, int32 stacksize, byte** ret_g0_stack, size_t* ret_g0_stacksize)
1046 {
1047         M *mp;
1048
1049         m->locks++;  // disable GC because it can be called from sysmon
1050         if(m->p == nil)
1051                 acquirep(p);  // temporarily borrow p for mallocs in this function
1052 #if 0
1053         if(mtype == nil) {
1054                 Eface e;
1055                 runtime_gc_m_ptr(&e);
1056                 mtype = ((const PtrType*)e.__type_descriptor)->__element_type;
1057         }
1058 #endif
1059
1060         mp = runtime_mal(sizeof *mp);
1061         mcommoninit(mp);
1062         mp->g0 = runtime_malg(stacksize, ret_g0_stack, ret_g0_stacksize);
1063
1064         if(p == m->p)
1065                 releasep();
1066         m->locks--;
1067
1068         return mp;
1069 }
1070
1071 static M* lockextra(bool nilokay);
1072 static void unlockextra(M*);
1073
1074 // needm is called when a cgo callback happens on a
1075 // thread without an m (a thread not created by Go).
1076 // In this case, needm is expected to find an m to use
1077 // and return with m, g initialized correctly.
1078 // Since m and g are not set now (likely nil, but see below)
1079 // needm is limited in what routines it can call. In particular
1080 // it can only call nosplit functions (textflag 7) and cannot
1081 // do any scheduling that requires an m.
1082 //
1083 // In order to avoid needing heavy lifting here, we adopt
1084 // the following strategy: there is a stack of available m's
1085 // that can be stolen. Using compare-and-swap
1086 // to pop from the stack has ABA races, so we simulate
1087 // a lock by doing an exchange (via casp) to steal the stack
1088 // head and replace the top pointer with MLOCKED (1).
1089 // This serves as a simple spin lock that we can use even
1090 // without an m. The thread that locks the stack in this way
1091 // unlocks the stack by storing a valid stack head pointer.
1092 //
1093 // In order to make sure that there is always an m structure
1094 // available to be stolen, we maintain the invariant that there
1095 // is always one more than needed. At the beginning of the
1096 // program (if cgo is in use) the list is seeded with a single m.
1097 // If needm finds that it has taken the last m off the list, its job
1098 // is - once it has installed its own m so that it can do things like
1099 // allocate memory - to create a spare m and put it on the list.
1100 //
1101 // Each of these extra m's also has a g0 and a curg that are
1102 // pressed into service as the scheduling stack and current
1103 // goroutine for the duration of the cgo callback.
1104 //
1105 // When the callback is done with the m, it calls dropm to
1106 // put the m back on the list.
1107 //
1108 // Unlike the gc toolchain, we start running on curg, since we are
1109 // just going to return and let the caller continue.
1110 void
1111 runtime_needm(void)
1112 {
1113         M *mp;
1114
1115         if(runtime_needextram) {
1116                 // Can happen if C/C++ code calls Go from a global ctor.
1117                 // Can not throw, because scheduler is not initialized yet.
1118                 runtime_write(2, "fatal error: cgo callback before cgo call\n",
1119                         sizeof("fatal error: cgo callback before cgo call\n")-1);
1120                 runtime_exit(1);
1121         }
1122
1123         // Lock extra list, take head, unlock popped list.
1124         // nilokay=false is safe here because of the invariant above,
1125         // that the extra list always contains or will soon contain
1126         // at least one m.
1127         mp = lockextra(false);
1128
1129         // Set needextram when we've just emptied the list,
1130         // so that the eventual call into cgocallbackg will
1131         // allocate a new m for the extra list. We delay the
1132         // allocation until then so that it can be done
1133         // after exitsyscall makes sure it is okay to be
1134         // running at all (that is, there's no garbage collection
1135         // running right now).
1136         mp->needextram = mp->schedlink == nil;
1137         unlockextra(mp->schedlink);
1138
1139         // Install m and g (= m->curg).
1140         runtime_setmg(mp, mp->curg);
1141
1142         // Initialize g's context as in mstart.
1143         initcontext();
1144         g->status = Gsyscall;
1145         g->entry = nil;
1146         g->param = nil;
1147 #ifdef USING_SPLIT_STACK
1148         __splitstack_getcontext(&g->stack_context[0]);
1149 #else
1150         g->gcinitial_sp = &mp;
1151         g->gcstack_size = 0;
1152         g->gcnext_sp = &mp;
1153 #endif
1154         getcontext(&g->context);
1155
1156         if(g->entry != nil) {
1157                 // Got here from mcall.
1158                 void (*pfn)(G*) = (void (*)(G*))g->entry;
1159                 G* gp = (G*)g->param;
1160                 pfn(gp);
1161                 *(int*)0x22 = 0x22;
1162         }
1163
1164         // Initialize this thread to use the m.
1165         runtime_minit();
1166
1167 #ifdef USING_SPLIT_STACK
1168         {
1169                 int dont_block_signals = 0;
1170                 __splitstack_block_signals(&dont_block_signals, nil);
1171         }
1172 #endif
1173 }
1174
1175 // newextram allocates an m and puts it on the extra list.
1176 // It is called with a working local m, so that it can do things
1177 // like call schedlock and allocate.
1178 void
1179 runtime_newextram(void)
1180 {
1181         M *mp, *mnext;
1182         G *gp;
1183         byte *g0_sp, *sp;
1184         size_t g0_spsize, spsize;
1185
1186         // Create extra goroutine locked to extra m.
1187         // The goroutine is the context in which the cgo callback will run.
1188         // The sched.pc will never be returned to, but setting it to
1189         // runtime.goexit makes clear to the traceback routines where
1190         // the goroutine stack ends.
1191         mp = runtime_allocm(nil, StackMin, &g0_sp, &g0_spsize);
1192         gp = runtime_malg(StackMin, &sp, &spsize);
1193         gp->status = Gdead;
1194         mp->curg = gp;
1195         mp->locked = LockInternal;
1196         mp->lockedg = gp;
1197         gp->lockedm = mp;
1198         gp->goid = runtime_xadd64(&runtime_sched.goidgen, 1);
1199         // put on allg for garbage collector
1200         runtime_lock(&runtime_sched);
1201         if(runtime_lastg == nil)
1202                 runtime_allg = gp;
1203         else
1204                 runtime_lastg->alllink = gp;
1205         runtime_lastg = gp;
1206         runtime_unlock(&runtime_sched);
1207         gp->goid = runtime_xadd64(&runtime_sched.goidgen, 1);
1208
1209         // The context for gp will be set up in runtime_needm.  But
1210         // here we need to set up the context for g0.
1211         getcontext(&mp->g0->context);
1212         mp->g0->context.uc_stack.ss_sp = g0_sp;
1213 #ifdef MAKECONTEXT_STACK_TOP
1214         mp->g0->context.uc_stack.ss_sp += g0_spsize;
1215 #endif
1216         mp->g0->context.uc_stack.ss_size = g0_spsize;
1217         makecontext(&mp->g0->context, kickoff, 0);
1218
1219         // Add m to the extra list.
1220         mnext = lockextra(true);
1221         mp->schedlink = mnext;
1222         unlockextra(mp);
1223 }
1224
1225 // dropm is called when a cgo callback has called needm but is now
1226 // done with the callback and returning back into the non-Go thread.
1227 // It puts the current m back onto the extra list.
1228 //
1229 // The main expense here is the call to signalstack to release the
1230 // m's signal stack, and then the call to needm on the next callback
1231 // from this thread. It is tempting to try to save the m for next time,
1232 // which would eliminate both these costs, but there might not be
1233 // a next time: the current thread (which Go does not control) might exit.
1234 // If we saved the m for that thread, there would be an m leak each time
1235 // such a thread exited. Instead, we acquire and release an m on each
1236 // call. These should typically not be scheduling operations, just a few
1237 // atomics, so the cost should be small.
1238 //
1239 // TODO(rsc): An alternative would be to allocate a dummy pthread per-thread
1240 // variable using pthread_key_create. Unlike the pthread keys we already use
1241 // on OS X, this dummy key would never be read by Go code. It would exist
1242 // only so that we could register at thread-exit-time destructor.
1243 // That destructor would put the m back onto the extra list.
1244 // This is purely a performance optimization. The current version,
1245 // in which dropm happens on each cgo call, is still correct too.
1246 // We may have to keep the current version on systems with cgo
1247 // but without pthreads, like Windows.
1248 void
1249 runtime_dropm(void)
1250 {
1251         M *mp, *mnext;
1252
1253         // Undo whatever initialization minit did during needm.
1254         runtime_unminit();
1255
1256         // Clear m and g, and return m to the extra list.
1257         // After the call to setmg we can only call nosplit functions.
1258         mp = m;
1259         runtime_setmg(nil, nil);
1260
1261         mp->curg->status = Gdead;
1262
1263         mnext = lockextra(true);
1264         mp->schedlink = mnext;
1265         unlockextra(mp);
1266 }
1267
1268 #define MLOCKED ((M*)1)
1269
1270 // lockextra locks the extra list and returns the list head.
1271 // The caller must unlock the list by storing a new list head
1272 // to runtime.extram. If nilokay is true, then lockextra will
1273 // return a nil list head if that's what it finds. If nilokay is false,
1274 // lockextra will keep waiting until the list head is no longer nil.
1275 static M*
1276 lockextra(bool nilokay)
1277 {
1278         M *mp;
1279         void (*yield)(void);
1280
1281         for(;;) {
1282                 mp = runtime_atomicloadp(&runtime_extram);
1283                 if(mp == MLOCKED) {
1284                         yield = runtime_osyield;
1285                         yield();
1286                         continue;
1287                 }
1288                 if(mp == nil && !nilokay) {
1289                         runtime_usleep(1);
1290                         continue;
1291                 }
1292                 if(!runtime_casp(&runtime_extram, mp, MLOCKED)) {
1293                         yield = runtime_osyield;
1294                         yield();
1295                         continue;
1296                 }
1297                 break;
1298         }
1299         return mp;
1300 }
1301
1302 static void
1303 unlockextra(M *mp)
1304 {
1305         runtime_atomicstorep(&runtime_extram, mp);
1306 }
1307
1308 static int32
1309 countextra()
1310 {
1311         M *mp, *mc;
1312         int32 c;
1313
1314         for(;;) {
1315                 mp = runtime_atomicloadp(&runtime_extram);
1316                 if(mp == MLOCKED) {
1317                         runtime_osyield();
1318                         continue;
1319                 }
1320                 if(!runtime_casp(&runtime_extram, mp, MLOCKED)) {
1321                         runtime_osyield();
1322                         continue;
1323                 }
1324                 c = 0;
1325                 for(mc = mp; mc != nil; mc = mc->schedlink)
1326                         c++;
1327                 runtime_atomicstorep(&runtime_extram, mp);
1328                 return c;
1329         }
1330 }
1331
1332 // Create a new m.  It will start off with a call to fn, or else the scheduler.
1333 static void
1334 newm(void(*fn)(void), P *p)
1335 {
1336         M *mp;
1337
1338         mp = runtime_allocm(p, -1, nil, nil);
1339         mp->nextp = p;
1340         mp->mstartfn = fn;
1341
1342         runtime_newosproc(mp);
1343 }
1344
1345 // Stops execution of the current m until new work is available.
1346 // Returns with acquired P.
1347 static void
1348 stopm(void)
1349 {
1350         if(m->locks)
1351                 runtime_throw("stopm holding locks");
1352         if(m->p)
1353                 runtime_throw("stopm holding p");
1354         if(m->spinning) {
1355                 m->spinning = false;
1356                 runtime_xadd(&runtime_sched.nmspinning, -1);
1357         }
1358
1359 retry:
1360         runtime_lock(&runtime_sched);
1361         mput(m);
1362         runtime_unlock(&runtime_sched);
1363         runtime_notesleep(&m->park);
1364         runtime_noteclear(&m->park);
1365         if(m->helpgc) {
1366                 runtime_gchelper();
1367                 m->helpgc = 0;
1368                 m->mcache = nil;
1369                 goto retry;
1370         }
1371         acquirep(m->nextp);
1372         m->nextp = nil;
1373 }
1374
1375 static void
1376 mspinning(void)
1377 {
1378         m->spinning = true;
1379 }
1380
1381 // Schedules some M to run the p (creates an M if necessary).
1382 // If p==nil, tries to get an idle P, if no idle P's returns false.
1383 static void
1384 startm(P *p, bool spinning)
1385 {
1386         M *mp;
1387         void (*fn)(void);
1388
1389         runtime_lock(&runtime_sched);
1390         if(p == nil) {
1391                 p = pidleget();
1392                 if(p == nil) {
1393                         runtime_unlock(&runtime_sched);
1394                         if(spinning)
1395                                 runtime_xadd(&runtime_sched.nmspinning, -1);
1396                         return;
1397                 }
1398         }
1399         mp = mget();
1400         runtime_unlock(&runtime_sched);
1401         if(mp == nil) {
1402                 fn = nil;
1403                 if(spinning)
1404                         fn = mspinning;
1405                 newm(fn, p);
1406                 return;
1407         }
1408         if(mp->spinning)
1409                 runtime_throw("startm: m is spinning");
1410         if(mp->nextp)
1411                 runtime_throw("startm: m has p");
1412         mp->spinning = spinning;
1413         mp->nextp = p;
1414         runtime_notewakeup(&mp->park);
1415 }
1416
1417 // Hands off P from syscall or locked M.
1418 static void
1419 handoffp(P *p)
1420 {
1421         // if it has local work, start it straight away
1422         if(p->runqhead != p->runqtail || runtime_sched.runqsize) {
1423                 startm(p, false);
1424                 return;
1425         }
1426         // no local work, check that there are no spinning/idle M's,
1427         // otherwise our help is not required
1428         if(runtime_atomicload(&runtime_sched.nmspinning) + runtime_atomicload(&runtime_sched.npidle) == 0 &&  // TODO: fast atomic
1429                 runtime_cas(&runtime_sched.nmspinning, 0, 1)) {
1430                 startm(p, true);
1431                 return;
1432         }
1433         runtime_lock(&runtime_sched);
1434         if(runtime_sched.gcwaiting) {
1435                 p->status = Pgcstop;
1436                 if(--runtime_sched.stopwait == 0)
1437                         runtime_notewakeup(&runtime_sched.stopnote);
1438                 runtime_unlock(&runtime_sched);
1439                 return;
1440         }
1441         if(runtime_sched.runqsize) {
1442                 runtime_unlock(&runtime_sched);
1443                 startm(p, false);
1444                 return;
1445         }
1446         // If this is the last running P and nobody is polling network,
1447         // need to wakeup another M to poll network.
1448         if(runtime_sched.npidle == (uint32)runtime_gomaxprocs-1 && runtime_atomicload64(&runtime_sched.lastpoll) != 0) {
1449                 runtime_unlock(&runtime_sched);
1450                 startm(p, false);
1451                 return;
1452         }
1453         pidleput(p);
1454         runtime_unlock(&runtime_sched);
1455 }
1456
1457 // Tries to add one more P to execute G's.
1458 // Called when a G is made runnable (newproc, ready).
1459 static void
1460 wakep(void)
1461 {
1462         // be conservative about spinning threads
1463         if(!runtime_cas(&runtime_sched.nmspinning, 0, 1))
1464                 return;
1465         startm(nil, true);
1466 }
1467
1468 // Stops execution of the current m that is locked to a g until the g is runnable again.
1469 // Returns with acquired P.
1470 static void
1471 stoplockedm(void)
1472 {
1473         P *p;
1474
1475         if(m->lockedg == nil || m->lockedg->lockedm != m)
1476                 runtime_throw("stoplockedm: inconsistent locking");
1477         if(m->p) {
1478                 // Schedule another M to run this p.
1479                 p = releasep();
1480                 handoffp(p);
1481         }
1482         incidlelocked(1);
1483         // Wait until another thread schedules lockedg again.
1484         runtime_notesleep(&m->park);
1485         runtime_noteclear(&m->park);
1486         if(m->lockedg->status != Grunnable)
1487                 runtime_throw("stoplockedm: not runnable");
1488         acquirep(m->nextp);
1489         m->nextp = nil;
1490 }
1491
1492 // Schedules the locked m to run the locked gp.
1493 static void
1494 startlockedm(G *gp)
1495 {
1496         M *mp;
1497         P *p;
1498
1499         mp = gp->lockedm;
1500         if(mp == m)
1501                 runtime_throw("startlockedm: locked to me");
1502         if(mp->nextp)
1503                 runtime_throw("startlockedm: m has p");
1504         // directly handoff current P to the locked m
1505         incidlelocked(-1);
1506         p = releasep();
1507         mp->nextp = p;
1508         runtime_notewakeup(&mp->park);
1509         stopm();
1510 }
1511
1512 // Stops the current m for stoptheworld.
1513 // Returns when the world is restarted.
1514 static void
1515 gcstopm(void)
1516 {
1517         P *p;
1518
1519         if(!runtime_sched.gcwaiting)
1520                 runtime_throw("gcstopm: not waiting for gc");
1521         if(m->spinning) {
1522                 m->spinning = false;
1523                 runtime_xadd(&runtime_sched.nmspinning, -1);
1524         }
1525         p = releasep();
1526         runtime_lock(&runtime_sched);
1527         p->status = Pgcstop;
1528         if(--runtime_sched.stopwait == 0)
1529                 runtime_notewakeup(&runtime_sched.stopnote);
1530         runtime_unlock(&runtime_sched);
1531         stopm();
1532 }
1533
1534 // Schedules gp to run on the current M.
1535 // Never returns.
1536 static void
1537 execute(G *gp)
1538 {
1539         int32 hz;
1540
1541         if(gp->status != Grunnable) {
1542                 runtime_printf("execute: bad g status %d\n", gp->status);
1543                 runtime_throw("execute: bad g status");
1544         }
1545         gp->status = Grunning;
1546         m->p->schedtick++;
1547         m->curg = gp;
1548         gp->m = m;
1549
1550         // Check whether the profiler needs to be turned on or off.
1551         hz = runtime_sched.profilehz;
1552         if(m->profilehz != hz)
1553                 runtime_resetcpuprofiler(hz);
1554
1555         runtime_gogo(gp);
1556 }
1557
1558 // Finds a runnable goroutine to execute.
1559 // Tries to steal from other P's, get g from global queue, poll network.
1560 static G*
1561 findrunnable(void)
1562 {
1563         G *gp;
1564         P *p;
1565         int32 i;
1566
1567 top:
1568         if(runtime_sched.gcwaiting) {
1569                 gcstopm();
1570                 goto top;
1571         }
1572         // local runq
1573         gp = runqget(m->p);
1574         if(gp)
1575                 return gp;
1576         // global runq
1577         if(runtime_sched.runqsize) {
1578                 runtime_lock(&runtime_sched);
1579                 gp = globrunqget(m->p, 0);
1580                 runtime_unlock(&runtime_sched);
1581                 if(gp)
1582                         return gp;
1583         }
1584         // poll network
1585         gp = runtime_netpoll(false);  // non-blocking
1586         if(gp) {
1587                 injectglist(gp->schedlink);
1588                 gp->status = Grunnable;
1589                 return gp;
1590         }
1591         // If number of spinning M's >= number of busy P's, block.
1592         // This is necessary to prevent excessive CPU consumption
1593         // when GOMAXPROCS>>1 but the program parallelism is low.
1594         if(!m->spinning && 2 * runtime_atomicload(&runtime_sched.nmspinning) >= runtime_gomaxprocs - runtime_atomicload(&runtime_sched.npidle))  // TODO: fast atomic
1595                 goto stop;
1596         if(!m->spinning) {
1597                 m->spinning = true;
1598                 runtime_xadd(&runtime_sched.nmspinning, 1);
1599         }
1600         // random steal from other P's
1601         for(i = 0; i < 2*runtime_gomaxprocs; i++) {
1602                 if(runtime_sched.gcwaiting)
1603                         goto top;
1604                 p = runtime_allp[runtime_fastrand1()%runtime_gomaxprocs];
1605                 if(p == m->p)
1606                         gp = runqget(p);
1607                 else
1608                         gp = runqsteal(m->p, p);
1609                 if(gp)
1610                         return gp;
1611         }
1612 stop:
1613         // return P and block
1614         runtime_lock(&runtime_sched);
1615         if(runtime_sched.gcwaiting) {
1616                 runtime_unlock(&runtime_sched);
1617                 goto top;
1618         }
1619         if(runtime_sched.runqsize) {
1620                 gp = globrunqget(m->p, 0);
1621                 runtime_unlock(&runtime_sched);
1622                 return gp;
1623         }
1624         p = releasep();
1625         pidleput(p);
1626         runtime_unlock(&runtime_sched);
1627         if(m->spinning) {
1628                 m->spinning = false;
1629                 runtime_xadd(&runtime_sched.nmspinning, -1);
1630         }
1631         // check all runqueues once again
1632         for(i = 0; i < runtime_gomaxprocs; i++) {
1633                 p = runtime_allp[i];
1634                 if(p && p->runqhead != p->runqtail) {
1635                         runtime_lock(&runtime_sched);
1636                         p = pidleget();
1637                         runtime_unlock(&runtime_sched);
1638                         if(p) {
1639                                 acquirep(p);
1640                                 goto top;
1641                         }
1642                         break;
1643                 }
1644         }
1645         // poll network
1646         if(runtime_xchg64(&runtime_sched.lastpoll, 0) != 0) {
1647                 if(m->p)
1648                         runtime_throw("findrunnable: netpoll with p");
1649                 if(m->spinning)
1650                         runtime_throw("findrunnable: netpoll with spinning");
1651                 gp = runtime_netpoll(true);  // block until new work is available
1652                 runtime_atomicstore64(&runtime_sched.lastpoll, runtime_nanotime());
1653                 if(gp) {
1654                         runtime_lock(&runtime_sched);
1655                         p = pidleget();
1656                         runtime_unlock(&runtime_sched);
1657                         if(p) {
1658                                 acquirep(p);
1659                                 injectglist(gp->schedlink);
1660                                 gp->status = Grunnable;
1661                                 return gp;
1662                         }
1663                         injectglist(gp);
1664                 }
1665         }
1666         stopm();
1667         goto top;
1668 }
1669
1670 static void
1671 resetspinning(void)
1672 {
1673         int32 nmspinning;
1674
1675         if(m->spinning) {
1676                 m->spinning = false;
1677                 nmspinning = runtime_xadd(&runtime_sched.nmspinning, -1);
1678                 if(nmspinning < 0)
1679                         runtime_throw("findrunnable: negative nmspinning");
1680         } else
1681                 nmspinning = runtime_atomicload(&runtime_sched.nmspinning);
1682
1683         // M wakeup policy is deliberately somewhat conservative (see nmspinning handling),
1684         // so see if we need to wakeup another P here.
1685         if (nmspinning == 0 && runtime_atomicload(&runtime_sched.npidle) > 0)
1686                 wakep();
1687 }
1688
1689 // Injects the list of runnable G's into the scheduler.
1690 // Can run concurrently with GC.
1691 static void
1692 injectglist(G *glist)
1693 {
1694         int32 n;
1695         G *gp;
1696
1697         if(glist == nil)
1698                 return;
1699         runtime_lock(&runtime_sched);
1700         for(n = 0; glist; n++) {
1701                 gp = glist;
1702                 glist = gp->schedlink;
1703                 gp->status = Grunnable;
1704                 globrunqput(gp);
1705         }
1706         runtime_unlock(&runtime_sched);
1707
1708         for(; n && runtime_sched.npidle; n--)
1709                 startm(nil, false);
1710 }
1711
1712 // One round of scheduler: find a runnable goroutine and execute it.
1713 // Never returns.
1714 static void
1715 schedule(void)
1716 {
1717         G *gp;
1718         uint32 tick;
1719
1720         if(m->locks)
1721                 runtime_throw("schedule: holding locks");
1722
1723 top:
1724         if(runtime_sched.gcwaiting) {
1725                 gcstopm();
1726                 goto top;
1727         }
1728
1729         gp = nil;
1730         // Check the global runnable queue once in a while to ensure fairness.
1731         // Otherwise two goroutines can completely occupy the local runqueue
1732         // by constantly respawning each other.
1733         tick = m->p->schedtick;
1734         // This is a fancy way to say tick%61==0,
1735         // it uses 2 MUL instructions instead of a single DIV and so is faster on modern processors.
1736         if(tick - (((uint64)tick*0x4325c53fu)>>36)*61 == 0 && runtime_sched.runqsize > 0) {
1737                 runtime_lock(&runtime_sched);
1738                 gp = globrunqget(m->p, 1);
1739                 runtime_unlock(&runtime_sched);
1740                 if(gp)
1741                         resetspinning();
1742         }
1743         if(gp == nil) {
1744                 gp = runqget(m->p);
1745                 if(gp && m->spinning)
1746                         runtime_throw("schedule: spinning with local work");
1747         }
1748         if(gp == nil) {
1749                 gp = findrunnable();  // blocks until work is available
1750                 resetspinning();
1751         }
1752
1753         if(gp->lockedm) {
1754                 // Hands off own p to the locked m,
1755                 // then blocks waiting for a new p.
1756                 startlockedm(gp);
1757                 goto top;
1758         }
1759
1760         execute(gp);
1761 }
1762
1763 // Puts the current goroutine into a waiting state and unlocks the lock.
1764 // The goroutine can be made runnable again by calling runtime_ready(gp).
1765 void
1766 runtime_park(void(*unlockf)(Lock*), Lock *lock, const char *reason)
1767 {
1768         m->waitlock = lock;
1769         m->waitunlockf = unlockf;
1770         g->waitreason = reason;
1771         runtime_mcall(park0);
1772 }
1773
1774 // runtime_park continuation on g0.
1775 static void
1776 park0(G *gp)
1777 {
1778         gp->status = Gwaiting;
1779         gp->m = nil;
1780         m->curg = nil;
1781         if(m->waitunlockf) {
1782                 m->waitunlockf(m->waitlock);
1783                 m->waitunlockf = nil;
1784                 m->waitlock = nil;
1785         }
1786         if(m->lockedg) {
1787                 stoplockedm();
1788                 execute(gp);  // Never returns.
1789         }
1790         schedule();
1791 }
1792
1793 // Scheduler yield.
1794 void
1795 runtime_gosched(void)
1796 {
1797         runtime_mcall(runtime_gosched0);
1798 }
1799
1800 // runtime_gosched continuation on g0.
1801 void
1802 runtime_gosched0(G *gp)
1803 {
1804         gp->status = Grunnable;
1805         gp->m = nil;
1806         m->curg = nil;
1807         runtime_lock(&runtime_sched);
1808         globrunqput(gp);
1809         runtime_unlock(&runtime_sched);
1810         if(m->lockedg) {
1811                 stoplockedm();
1812                 execute(gp);  // Never returns.
1813         }
1814         schedule();
1815 }
1816
1817 // Finishes execution of the current goroutine.
1818 // Need to mark it as nosplit, because it runs with sp > stackbase (as runtime_lessstack).
1819 // Since it does not return it does not matter.  But if it is preempted
1820 // at the split stack check, GC will complain about inconsistent sp.
1821 void
1822 runtime_goexit(void)
1823 {
1824         if(raceenabled)
1825                 runtime_racegoend();
1826         runtime_mcall(goexit0);
1827 }
1828
1829 // runtime_goexit continuation on g0.
1830 static void
1831 goexit0(G *gp)
1832 {
1833         gp->status = Gdead;
1834         gp->entry = nil;
1835         gp->m = nil;
1836         gp->lockedm = nil;
1837         m->curg = nil;
1838         m->lockedg = nil;
1839         if(m->locked & ~LockExternal) {
1840                 runtime_printf("invalid m->locked = %d\n", m->locked);
1841                 runtime_throw("internal lockOSThread error");
1842         }
1843         m->locked = 0;
1844         gfput(m->p, gp);
1845         schedule();
1846 }
1847
1848 // The goroutine g is about to enter a system call.
1849 // Record that it's not using the cpu anymore.
1850 // This is called only from the go syscall library and cgocall,
1851 // not from the low-level system calls used by the runtime.
1852 //
1853 // Entersyscall cannot split the stack: the runtime_gosave must
1854 // make g->sched refer to the caller's stack segment, because
1855 // entersyscall is going to return immediately after.
1856
1857 void runtime_entersyscall(void) __attribute__ ((no_split_stack));
1858
1859 void
1860 runtime_entersyscall()
1861 {
1862         // Disable preemption because during this function g is in Gsyscall status,
1863         // but can have inconsistent g->sched, do not let GC observe it.
1864         m->locks++;
1865
1866         // Leave SP around for GC and traceback.
1867 #ifdef USING_SPLIT_STACK
1868         g->gcstack = __splitstack_find(nil, nil, &g->gcstack_size,
1869                                        &g->gcnext_segment, &g->gcnext_sp,
1870                                        &g->gcinitial_sp);
1871 #else
1872         {
1873                 uint32 v;
1874
1875                 g->gcnext_sp = (byte *) &v;
1876         }
1877 #endif
1878
1879         // Save the registers in the g structure so that any pointers
1880         // held in registers will be seen by the garbage collector.
1881         getcontext(&g->gcregs);
1882
1883         g->status = Gsyscall;
1884
1885         if(runtime_atomicload(&runtime_sched.sysmonwait)) {  // TODO: fast atomic
1886                 runtime_lock(&runtime_sched);
1887                 if(runtime_atomicload(&runtime_sched.sysmonwait)) {
1888                         runtime_atomicstore(&runtime_sched.sysmonwait, 0);
1889                         runtime_notewakeup(&runtime_sched.sysmonnote);
1890                 }
1891                 runtime_unlock(&runtime_sched);
1892         }
1893
1894         m->mcache = nil;
1895         m->p->m = nil;
1896         runtime_atomicstore(&m->p->status, Psyscall);
1897         if(runtime_sched.gcwaiting) {
1898                 runtime_lock(&runtime_sched);
1899                 if (runtime_sched.stopwait > 0 && runtime_cas(&m->p->status, Psyscall, Pgcstop)) {
1900                         if(--runtime_sched.stopwait == 0)
1901                                 runtime_notewakeup(&runtime_sched.stopnote);
1902                 }
1903                 runtime_unlock(&runtime_sched);
1904         }
1905
1906         m->locks--;
1907 }
1908
1909 // The same as runtime_entersyscall(), but with a hint that the syscall is blocking.
1910 void
1911 runtime_entersyscallblock(void)
1912 {
1913         P *p;
1914
1915         m->locks++;  // see comment in entersyscall
1916
1917         // Leave SP around for GC and traceback.
1918 #ifdef USING_SPLIT_STACK
1919         g->gcstack = __splitstack_find(nil, nil, &g->gcstack_size,
1920                                        &g->gcnext_segment, &g->gcnext_sp,
1921                                        &g->gcinitial_sp);
1922 #else
1923         g->gcnext_sp = (byte *) &p;
1924 #endif
1925
1926         // Save the registers in the g structure so that any pointers
1927         // held in registers will be seen by the garbage collector.
1928         getcontext(&g->gcregs);
1929
1930         g->status = Gsyscall;
1931
1932         p = releasep();
1933         handoffp(p);
1934         if(g->isbackground)  // do not consider blocked scavenger for deadlock detection
1935                 incidlelocked(1);
1936
1937         m->locks--;
1938 }
1939
1940 // The goroutine g exited its system call.
1941 // Arrange for it to run on a cpu again.
1942 // This is called only from the go syscall library, not
1943 // from the low-level system calls used by the runtime.
1944 void
1945 runtime_exitsyscall(void)
1946 {
1947         G *gp;
1948
1949         m->locks++;  // see comment in entersyscall
1950
1951         gp = g;
1952         if(gp->isbackground)  // do not consider blocked scavenger for deadlock detection
1953                 incidlelocked(-1);
1954
1955         if(exitsyscallfast()) {
1956                 // There's a cpu for us, so we can run.
1957                 m->p->syscalltick++;
1958                 gp->status = Grunning;
1959                 // Garbage collector isn't running (since we are),
1960                 // so okay to clear gcstack and gcsp.
1961 #ifdef USING_SPLIT_STACK
1962                 gp->gcstack = nil;
1963 #endif
1964                 gp->gcnext_sp = nil;
1965                 runtime_memclr(&gp->gcregs, sizeof gp->gcregs);
1966                 m->locks--;
1967                 return;
1968         }
1969
1970         m->locks--;
1971
1972         // Call the scheduler.
1973         runtime_mcall(exitsyscall0);
1974
1975         // Scheduler returned, so we're allowed to run now.
1976         // Delete the gcstack information that we left for
1977         // the garbage collector during the system call.
1978         // Must wait until now because until gosched returns
1979         // we don't know for sure that the garbage collector
1980         // is not running.
1981 #ifdef USING_SPLIT_STACK
1982         gp->gcstack = nil;
1983 #endif
1984         gp->gcnext_sp = nil;
1985         runtime_memclr(&gp->gcregs, sizeof gp->gcregs);
1986
1987         // Don't refer to m again, we might be running on a different
1988         // thread after returning from runtime_mcall.
1989         runtime_m()->p->syscalltick++;
1990 }
1991
1992 static bool
1993 exitsyscallfast(void)
1994 {
1995         P *p;
1996
1997         // Freezetheworld sets stopwait but does not retake P's.
1998         if(runtime_sched.stopwait) {
1999                 m->p = nil;
2000                 return false;
2001         }
2002
2003         // Try to re-acquire the last P.
2004         if(m->p && m->p->status == Psyscall && runtime_cas(&m->p->status, Psyscall, Prunning)) {
2005                 // There's a cpu for us, so we can run.
2006                 m->mcache = m->p->mcache;
2007                 m->p->m = m;
2008                 return true;
2009         }
2010         // Try to get any other idle P.
2011         m->p = nil;
2012         if(runtime_sched.pidle) {
2013                 runtime_lock(&runtime_sched);
2014                 p = pidleget();
2015                 if(p && runtime_atomicload(&runtime_sched.sysmonwait)) {
2016                         runtime_atomicstore(&runtime_sched.sysmonwait, 0);
2017                         runtime_notewakeup(&runtime_sched.sysmonnote);
2018                 }
2019                 runtime_unlock(&runtime_sched);
2020                 if(p) {
2021                         acquirep(p);
2022                         return true;
2023                 }
2024         }
2025         return false;
2026 }
2027
2028 // runtime_exitsyscall slow path on g0.
2029 // Failed to acquire P, enqueue gp as runnable.
2030 static void
2031 exitsyscall0(G *gp)
2032 {
2033         P *p;
2034
2035         gp->status = Grunnable;
2036         gp->m = nil;
2037         m->curg = nil;
2038         runtime_lock(&runtime_sched);
2039         p = pidleget();
2040         if(p == nil)
2041                 globrunqput(gp);
2042         else if(runtime_atomicload(&runtime_sched.sysmonwait)) {
2043                 runtime_atomicstore(&runtime_sched.sysmonwait, 0);
2044                 runtime_notewakeup(&runtime_sched.sysmonnote);
2045         }
2046         runtime_unlock(&runtime_sched);
2047         if(p) {
2048                 acquirep(p);
2049                 execute(gp);  // Never returns.
2050         }
2051         if(m->lockedg) {
2052                 // Wait until another thread schedules gp and so m again.
2053                 stoplockedm();
2054                 execute(gp);  // Never returns.
2055         }
2056         stopm();
2057         schedule();  // Never returns.
2058 }
2059
2060 // Called from syscall package before fork.
2061 void syscall_runtime_BeforeFork(void)
2062   __asm__(GOSYM_PREFIX "syscall.runtime_BeforeFork");
2063 void
2064 syscall_runtime_BeforeFork(void)
2065 {
2066         // Fork can hang if preempted with signals frequently enough (see issue 5517).
2067         // Ensure that we stay on the same M where we disable profiling.
2068         m->locks++;
2069         if(m->profilehz != 0)
2070                 runtime_resetcpuprofiler(0);
2071 }
2072
2073 // Called from syscall package after fork in parent.
2074 void syscall_runtime_AfterFork(void)
2075   __asm__(GOSYM_PREFIX "syscall.runtime_AfterFork");
2076 void
2077 syscall_runtime_AfterFork(void)
2078 {
2079         int32 hz;
2080
2081         hz = runtime_sched.profilehz;
2082         if(hz != 0)
2083                 runtime_resetcpuprofiler(hz);
2084         m->locks--;
2085 }
2086
2087 // Allocate a new g, with a stack big enough for stacksize bytes.
2088 G*
2089 runtime_malg(int32 stacksize, byte** ret_stack, size_t* ret_stacksize)
2090 {
2091         G *newg;
2092
2093         newg = runtime_malloc(sizeof(G));
2094         if(stacksize >= 0) {
2095 #if USING_SPLIT_STACK
2096                 int dont_block_signals = 0;
2097
2098                 *ret_stack = __splitstack_makecontext(stacksize,
2099                                                       &newg->stack_context[0],
2100                                                       ret_stacksize);
2101                 __splitstack_block_signals_context(&newg->stack_context[0],
2102                                                    &dont_block_signals, nil);
2103 #else
2104                 *ret_stack = runtime_mallocgc(stacksize, 0, FlagNoProfiling|FlagNoGC);
2105                 *ret_stacksize = stacksize;
2106                 newg->gcinitial_sp = *ret_stack;
2107                 newg->gcstack_size = stacksize;
2108                 runtime_xadd(&runtime_stacks_sys, stacksize);
2109 #endif
2110         }
2111         return newg;
2112 }
2113
2114 /* For runtime package testing.  */
2115
2116
2117 // Create a new g running fn with siz bytes of arguments.
2118 // Put it on the queue of g's waiting to run.
2119 // The compiler turns a go statement into a call to this.
2120 // Cannot split the stack because it assumes that the arguments
2121 // are available sequentially after &fn; they would not be
2122 // copied if a stack split occurred.  It's OK for this to call
2123 // functions that split the stack.
2124 void runtime_testing_entersyscall(void)
2125   __asm__ (GOSYM_PREFIX "runtime.entersyscall");
2126 void
2127 runtime_testing_entersyscall()
2128 {
2129         runtime_entersyscall();
2130 }
2131
2132 void runtime_testing_exitsyscall(void)
2133   __asm__ (GOSYM_PREFIX "runtime.exitsyscall");
2134
2135 void
2136 runtime_testing_exitsyscall()
2137 {
2138         runtime_exitsyscall();
2139 }
2140
2141 G*
2142 __go_go(void (*fn)(void*), void* arg)
2143 {
2144         byte *sp;
2145         size_t spsize;
2146         G *newg;
2147
2148 //runtime_printf("newproc1 %p %p narg=%d nret=%d\n", fn->fn, argp, narg, nret);
2149         m->locks++;  // disable preemption because it can be holding p in a local var
2150
2151         if((newg = gfget(m->p)) != nil) {
2152 #ifdef USING_SPLIT_STACK
2153                 int dont_block_signals = 0;
2154
2155                 sp = __splitstack_resetcontext(&newg->stack_context[0],
2156                                                &spsize);
2157                 __splitstack_block_signals_context(&newg->stack_context[0],
2158                                                    &dont_block_signals, nil);
2159 #else
2160                 sp = newg->gcinitial_sp;
2161                 spsize = newg->gcstack_size;
2162                 if(spsize == 0)
2163                         runtime_throw("bad spsize in __go_go");
2164                 newg->gcnext_sp = sp;
2165 #endif
2166         } else {
2167                 newg = runtime_malg(StackMin, &sp, &spsize);
2168                 runtime_lock(&runtime_sched);
2169                 if(runtime_lastg == nil)
2170                         runtime_allg = newg;
2171                 else
2172                         runtime_lastg->alllink = newg;
2173                 runtime_lastg = newg;
2174                 runtime_unlock(&runtime_sched);
2175         }
2176
2177         newg->entry = (byte*)fn;
2178         newg->param = arg;
2179         newg->gopc = (uintptr)__builtin_return_address(0);
2180         newg->status = Grunnable;
2181         newg->goid = runtime_xadd64(&runtime_sched.goidgen, 1);
2182
2183         {
2184                 // Avoid warnings about variables clobbered by
2185                 // longjmp.
2186                 byte * volatile vsp = sp;
2187                 size_t volatile vspsize = spsize;
2188                 G * volatile vnewg = newg;
2189
2190                 getcontext(&vnewg->context);
2191                 vnewg->context.uc_stack.ss_sp = vsp;
2192 #ifdef MAKECONTEXT_STACK_TOP
2193                 vnewg->context.uc_stack.ss_sp += vspsize;
2194 #endif
2195                 vnewg->context.uc_stack.ss_size = vspsize;
2196                 makecontext(&vnewg->context, kickoff, 0);
2197
2198                 runqput(m->p, vnewg);
2199
2200                 if(runtime_atomicload(&runtime_sched.npidle) != 0 && runtime_atomicload(&runtime_sched.nmspinning) == 0 && fn != runtime_main)  // TODO: fast atomic
2201                         wakep();
2202                 m->locks--;
2203                 return vnewg;
2204         }
2205 }
2206
2207 // Put on gfree list.
2208 // If local list is too long, transfer a batch to the global list.
2209 static void
2210 gfput(P *p, G *gp)
2211 {
2212         gp->schedlink = p->gfree;
2213         p->gfree = gp;
2214         p->gfreecnt++;
2215         if(p->gfreecnt >= 64) {
2216                 runtime_lock(&runtime_sched.gflock);
2217                 while(p->gfreecnt >= 32) {
2218                         p->gfreecnt--;
2219                         gp = p->gfree;
2220                         p->gfree = gp->schedlink;
2221                         gp->schedlink = runtime_sched.gfree;
2222                         runtime_sched.gfree = gp;
2223                 }
2224                 runtime_unlock(&runtime_sched.gflock);
2225         }
2226 }
2227
2228 // Get from gfree list.
2229 // If local list is empty, grab a batch from global list.
2230 static G*
2231 gfget(P *p)
2232 {
2233         G *gp;
2234
2235 retry:
2236         gp = p->gfree;
2237         if(gp == nil && runtime_sched.gfree) {
2238                 runtime_lock(&runtime_sched.gflock);
2239                 while(p->gfreecnt < 32 && runtime_sched.gfree) {
2240                         p->gfreecnt++;
2241                         gp = runtime_sched.gfree;
2242                         runtime_sched.gfree = gp->schedlink;
2243                         gp->schedlink = p->gfree;
2244                         p->gfree = gp;
2245                 }
2246                 runtime_unlock(&runtime_sched.gflock);
2247                 goto retry;
2248         }
2249         if(gp) {
2250                 p->gfree = gp->schedlink;
2251                 p->gfreecnt--;
2252         }
2253         return gp;
2254 }
2255
2256 // Purge all cached G's from gfree list to the global list.
2257 static void
2258 gfpurge(P *p)
2259 {
2260         G *gp;
2261
2262         runtime_lock(&runtime_sched.gflock);
2263         while(p->gfreecnt) {
2264                 p->gfreecnt--;
2265                 gp = p->gfree;
2266                 p->gfree = gp->schedlink;
2267                 gp->schedlink = runtime_sched.gfree;
2268                 runtime_sched.gfree = gp;
2269         }
2270         runtime_unlock(&runtime_sched.gflock);
2271 }
2272
2273 void
2274 runtime_Breakpoint(void)
2275 {
2276         runtime_breakpoint();
2277 }
2278
2279 void runtime_Gosched (void) __asm__ (GOSYM_PREFIX "runtime.Gosched");
2280
2281 void
2282 runtime_Gosched(void)
2283 {
2284         runtime_gosched();
2285 }
2286
2287 // Implementation of runtime.GOMAXPROCS.
2288 // delete when scheduler is even stronger
2289 int32
2290 runtime_gomaxprocsfunc(int32 n)
2291 {
2292         int32 ret;
2293
2294         if(n > MaxGomaxprocs)
2295                 n = MaxGomaxprocs;
2296         runtime_lock(&runtime_sched);
2297         ret = runtime_gomaxprocs;
2298         if(n <= 0 || n == ret) {
2299                 runtime_unlock(&runtime_sched);
2300                 return ret;
2301         }
2302         runtime_unlock(&runtime_sched);
2303
2304         runtime_semacquire(&runtime_worldsema, false);
2305         m->gcing = 1;
2306         runtime_stoptheworld();
2307         newprocs = n;
2308         m->gcing = 0;
2309         runtime_semrelease(&runtime_worldsema);
2310         runtime_starttheworld();
2311
2312         return ret;
2313 }
2314
2315 // lockOSThread is called by runtime.LockOSThread and runtime.lockOSThread below
2316 // after they modify m->locked. Do not allow preemption during this call,
2317 // or else the m might be different in this function than in the caller.
2318 static void
2319 lockOSThread(void)
2320 {
2321         m->lockedg = g;
2322         g->lockedm = m;
2323 }
2324
2325 void    runtime_LockOSThread(void) __asm__ (GOSYM_PREFIX "runtime.LockOSThread");
2326 void
2327 runtime_LockOSThread(void)
2328 {
2329         m->locked |= LockExternal;
2330         lockOSThread();
2331 }
2332
2333 void
2334 runtime_lockOSThread(void)
2335 {
2336         m->locked += LockInternal;
2337         lockOSThread();
2338 }
2339
2340
2341 // unlockOSThread is called by runtime.UnlockOSThread and runtime.unlockOSThread below
2342 // after they update m->locked. Do not allow preemption during this call,
2343 // or else the m might be in different in this function than in the caller.
2344 static void
2345 unlockOSThread(void)
2346 {
2347         if(m->locked != 0)
2348                 return;
2349         m->lockedg = nil;
2350         g->lockedm = nil;
2351 }
2352
2353 void    runtime_UnlockOSThread(void) __asm__ (GOSYM_PREFIX "runtime.UnlockOSThread");
2354
2355 void
2356 runtime_UnlockOSThread(void)
2357 {
2358         m->locked &= ~LockExternal;
2359         unlockOSThread();
2360 }
2361
2362 void
2363 runtime_unlockOSThread(void)
2364 {
2365         if(m->locked < LockInternal)
2366                 runtime_throw("runtime: internal error: misuse of lockOSThread/unlockOSThread");
2367         m->locked -= LockInternal;
2368         unlockOSThread();
2369 }
2370
2371 bool
2372 runtime_lockedOSThread(void)
2373 {
2374         return g->lockedm != nil && m->lockedg != nil;
2375 }
2376
2377 // for testing of callbacks
2378
2379 _Bool runtime_golockedOSThread(void)
2380   __asm__ (GOSYM_PREFIX "runtime.golockedOSThread");
2381
2382 _Bool
2383 runtime_golockedOSThread(void)
2384 {
2385         return runtime_lockedOSThread();
2386 }
2387
2388 intgo runtime_NumGoroutine (void)
2389   __asm__ (GOSYM_PREFIX "runtime.NumGoroutine");
2390
2391 intgo
2392 runtime_NumGoroutine()
2393 {
2394         return runtime_gcount();
2395 }
2396
2397 int32
2398 runtime_gcount(void)
2399 {
2400         G *gp;
2401         int32 n, s;
2402
2403         n = 0;
2404         runtime_lock(&runtime_sched);
2405         // TODO(dvyukov): runtime.NumGoroutine() is O(N).
2406         // We do not want to increment/decrement centralized counter in newproc/goexit,
2407         // just to make runtime.NumGoroutine() faster.
2408         // Compromise solution is to introduce per-P counters of active goroutines.
2409         for(gp = runtime_allg; gp; gp = gp->alllink) {
2410                 s = gp->status;
2411                 if(s == Grunnable || s == Grunning || s == Gsyscall || s == Gwaiting)
2412                         n++;
2413         }
2414         runtime_unlock(&runtime_sched);
2415         return n;
2416 }
2417
2418 int32
2419 runtime_mcount(void)
2420 {
2421         return runtime_sched.mcount;
2422 }
2423
2424 static struct {
2425         Lock;
2426         void (*fn)(uintptr*, int32);
2427         int32 hz;
2428         uintptr pcbuf[100];
2429         Location locbuf[100];
2430 } prof;
2431
2432 static void
2433 System(void)
2434 {
2435 }
2436
2437 // Called if we receive a SIGPROF signal.
2438 void
2439 runtime_sigprof()
2440 {
2441         int32 n, i;
2442         bool traceback;
2443
2444         if(prof.fn == nil || prof.hz == 0)
2445                 return;
2446         traceback = true;
2447         // Windows does profiling in a dedicated thread w/o m.
2448         if(!Windows && (m == nil || m->mcache == nil))
2449                 traceback = false;
2450
2451         runtime_lock(&prof);
2452         if(prof.fn == nil) {
2453                 runtime_unlock(&prof);
2454                 return;
2455         }
2456         n = 0;
2457         if(traceback) {
2458                 n = runtime_callers(0, prof.locbuf, nelem(prof.locbuf));
2459                 for(i = 0; i < n; i++)
2460                         prof.pcbuf[i] = prof.locbuf[i].pc;
2461         }
2462         if (!traceback || n <= 0) {
2463                 n = 2;
2464                 prof.pcbuf[0] = (uintptr)runtime_getcallerpc(&n);
2465                 prof.pcbuf[1] = (uintptr)System + 1;
2466         }
2467         prof.fn(prof.pcbuf, n);
2468         runtime_unlock(&prof);
2469 }
2470
2471 // Arrange to call fn with a traceback hz times a second.
2472 void
2473 runtime_setcpuprofilerate(void (*fn)(uintptr*, int32), int32 hz)
2474 {
2475         // Force sane arguments.
2476         if(hz < 0)
2477                 hz = 0;
2478         if(hz == 0)
2479                 fn = nil;
2480         if(fn == nil)
2481                 hz = 0;
2482
2483         // Disable preemption, otherwise we can be rescheduled to another thread
2484         // that has profiling enabled.
2485         m->locks++;
2486
2487         // Stop profiler on this thread so that it is safe to lock prof.
2488         // if a profiling signal came in while we had prof locked,
2489         // it would deadlock.
2490         runtime_resetcpuprofiler(0);
2491
2492         runtime_lock(&prof);
2493         prof.fn = fn;
2494         prof.hz = hz;
2495         runtime_unlock(&prof);
2496         runtime_lock(&runtime_sched);
2497         runtime_sched.profilehz = hz;
2498         runtime_unlock(&runtime_sched);
2499
2500         if(hz != 0)
2501                 runtime_resetcpuprofiler(hz);
2502
2503         m->locks--;
2504 }
2505
2506 // Change number of processors.  The world is stopped, sched is locked.
2507 static void
2508 procresize(int32 new)
2509 {
2510         int32 i, old;
2511         G *gp;
2512         P *p;
2513
2514         old = runtime_gomaxprocs;
2515         if(old < 0 || old > MaxGomaxprocs || new <= 0 || new >MaxGomaxprocs)
2516                 runtime_throw("procresize: invalid arg");
2517         // initialize new P's
2518         for(i = 0; i < new; i++) {
2519                 p = runtime_allp[i];
2520                 if(p == nil) {
2521                         p = (P*)runtime_mallocgc(sizeof(*p), 0, FlagNoInvokeGC);
2522                         p->id = i;
2523                         p->status = Pgcstop;
2524                         runtime_atomicstorep(&runtime_allp[i], p);
2525                 }
2526                 if(p->mcache == nil) {
2527                         if(old==0 && i==0)
2528                                 p->mcache = m->mcache;  // bootstrap
2529                         else
2530                                 p->mcache = runtime_allocmcache();
2531                 }
2532                 if(p->runq == nil) {
2533                         p->runqsize = 128;
2534                         p->runq = (G**)runtime_mallocgc(p->runqsize*sizeof(G*), 0, FlagNoInvokeGC);
2535                 }
2536         }
2537
2538         // redistribute runnable G's evenly
2539         for(i = 0; i < old; i++) {
2540                 p = runtime_allp[i];
2541                 while((gp = runqget(p)) != nil)
2542                         globrunqput(gp);
2543         }
2544         // start at 1 because current M already executes some G and will acquire allp[0] below,
2545         // so if we have a spare G we want to put it into allp[1].
2546         for(i = 1; runtime_sched.runqhead; i++) {
2547                 gp = runtime_sched.runqhead;
2548                 runtime_sched.runqhead = gp->schedlink;
2549                 runqput(runtime_allp[i%new], gp);
2550         }
2551         runtime_sched.runqtail = nil;
2552         runtime_sched.runqsize = 0;
2553
2554         // free unused P's
2555         for(i = new; i < old; i++) {
2556                 p = runtime_allp[i];
2557                 runtime_freemcache(p->mcache);
2558                 p->mcache = nil;
2559                 gfpurge(p);
2560                 p->status = Pdead;
2561                 // can't free P itself because it can be referenced by an M in syscall
2562         }
2563
2564         if(m->p)
2565                 m->p->m = nil;
2566         m->p = nil;
2567         m->mcache = nil;
2568         p = runtime_allp[0];
2569         p->m = nil;
2570         p->status = Pidle;
2571         acquirep(p);
2572         for(i = new-1; i > 0; i--) {
2573                 p = runtime_allp[i];
2574                 p->status = Pidle;
2575                 pidleput(p);
2576         }
2577         runtime_atomicstore((uint32*)&runtime_gomaxprocs, new);
2578 }
2579
2580 // Associate p and the current m.
2581 static void
2582 acquirep(P *p)
2583 {
2584         if(m->p || m->mcache)
2585                 runtime_throw("acquirep: already in go");
2586         if(p->m || p->status != Pidle) {
2587                 runtime_printf("acquirep: p->m=%p(%d) p->status=%d\n", p->m, p->m ? p->m->id : 0, p->status);
2588                 runtime_throw("acquirep: invalid p state");
2589         }
2590         m->mcache = p->mcache;
2591         m->p = p;
2592         p->m = m;
2593         p->status = Prunning;
2594 }
2595
2596 // Disassociate p and the current m.
2597 static P*
2598 releasep(void)
2599 {
2600         P *p;
2601
2602         if(m->p == nil || m->mcache == nil)
2603                 runtime_throw("releasep: invalid arg");
2604         p = m->p;
2605         if(p->m != m || p->mcache != m->mcache || p->status != Prunning) {
2606                 runtime_printf("releasep: m=%p m->p=%p p->m=%p m->mcache=%p p->mcache=%p p->status=%d\n",
2607                         m, m->p, p->m, m->mcache, p->mcache, p->status);
2608                 runtime_throw("releasep: invalid p state");
2609         }
2610         m->p = nil;
2611         m->mcache = nil;
2612         p->m = nil;
2613         p->status = Pidle;
2614         return p;
2615 }
2616
2617 static void
2618 incidlelocked(int32 v)
2619 {
2620         runtime_lock(&runtime_sched);
2621         runtime_sched.nmidlelocked += v;
2622         if(v > 0)
2623                 checkdead();
2624         runtime_unlock(&runtime_sched);
2625 }
2626
2627 // Check for deadlock situation.
2628 // The check is based on number of running M's, if 0 -> deadlock.
2629 static void
2630 checkdead(void)
2631 {
2632         G *gp;
2633         int32 run, grunning, s;
2634
2635         // -1 for sysmon
2636         run = runtime_sched.mcount - runtime_sched.nmidle - runtime_sched.nmidlelocked - 1 - countextra();
2637         if(run > 0)
2638                 return;
2639         if(run < 0) {
2640                 runtime_printf("checkdead: nmidle=%d nmidlelocked=%d mcount=%d\n",
2641                         runtime_sched.nmidle, runtime_sched.nmidlelocked, runtime_sched.mcount);
2642                 runtime_throw("checkdead: inconsistent counts");
2643         }
2644         grunning = 0;
2645         for(gp = runtime_allg; gp; gp = gp->alllink) {
2646                 if(gp->isbackground)
2647                         continue;
2648                 s = gp->status;
2649                 if(s == Gwaiting)
2650                         grunning++;
2651                 else if(s == Grunnable || s == Grunning || s == Gsyscall) {
2652                         runtime_printf("checkdead: find g %D in status %d\n", gp->goid, s);
2653                         runtime_throw("checkdead: runnable g");
2654                 }
2655         }
2656         if(grunning == 0)  // possible if main goroutine calls runtime_Goexit()
2657                 runtime_exit(0);
2658         m->throwing = -1;  // do not dump full stacks
2659         runtime_throw("all goroutines are asleep - deadlock!");
2660 }
2661
2662 static void
2663 sysmon(void)
2664 {
2665         uint32 idle, delay;
2666         int64 now, lastpoll, lasttrace;
2667         G *gp;
2668
2669         lasttrace = 0;
2670         idle = 0;  // how many cycles in succession we had not wokeup somebody
2671         delay = 0;
2672         for(;;) {
2673                 if(idle == 0)  // start with 20us sleep...
2674                         delay = 20;
2675                 else if(idle > 50)  // start doubling the sleep after 1ms...
2676                         delay *= 2;
2677                 if(delay > 10*1000)  // up to 10ms
2678                         delay = 10*1000;
2679                 runtime_usleep(delay);
2680                 if(runtime_debug.schedtrace <= 0 &&
2681                         (runtime_sched.gcwaiting || runtime_atomicload(&runtime_sched.npidle) == (uint32)runtime_gomaxprocs)) {  // TODO: fast atomic
2682                         runtime_lock(&runtime_sched);
2683                         if(runtime_atomicload(&runtime_sched.gcwaiting) || runtime_atomicload(&runtime_sched.npidle) == (uint32)runtime_gomaxprocs) {
2684                                 runtime_atomicstore(&runtime_sched.sysmonwait, 1);
2685                                 runtime_unlock(&runtime_sched);
2686                                 runtime_notesleep(&runtime_sched.sysmonnote);
2687                                 runtime_noteclear(&runtime_sched.sysmonnote);
2688                                 idle = 0;
2689                                 delay = 20;
2690                         } else
2691                                 runtime_unlock(&runtime_sched);
2692                 }
2693                 // poll network if not polled for more than 10ms
2694                 lastpoll = runtime_atomicload64(&runtime_sched.lastpoll);
2695                 now = runtime_nanotime();
2696                 if(lastpoll != 0 && lastpoll + 10*1000*1000 < now) {
2697                         runtime_cas64(&runtime_sched.lastpoll, lastpoll, now);
2698                         gp = runtime_netpoll(false);  // non-blocking
2699                         if(gp) {
2700                                 // Need to decrement number of idle locked M's
2701                                 // (pretending that one more is running) before injectglist.
2702                                 // Otherwise it can lead to the following situation:
2703                                 // injectglist grabs all P's but before it starts M's to run the P's,
2704                                 // another M returns from syscall, finishes running its G,
2705                                 // observes that there is no work to do and no other running M's
2706                                 // and reports deadlock.
2707                                 incidlelocked(-1);
2708                                 injectglist(gp);
2709                                 incidlelocked(1);
2710                         }
2711                 }
2712                 // retake P's blocked in syscalls
2713                 // and preempt long running G's
2714                 if(retake(now))
2715                         idle = 0;
2716                 else
2717                         idle++;
2718
2719                 if(runtime_debug.schedtrace > 0 && lasttrace + runtime_debug.schedtrace*1000000ll <= now) {
2720                         lasttrace = now;
2721                         runtime_schedtrace(runtime_debug.scheddetail);
2722                 }
2723         }
2724 }
2725
2726 typedef struct Pdesc Pdesc;
2727 struct Pdesc
2728 {
2729         uint32  schedtick;
2730         int64   schedwhen;
2731         uint32  syscalltick;
2732         int64   syscallwhen;
2733 };
2734 static Pdesc pdesc[MaxGomaxprocs];
2735
2736 static uint32
2737 retake(int64 now)
2738 {
2739         uint32 i, s, n;
2740         int64 t;
2741         P *p;
2742         Pdesc *pd;
2743
2744         n = 0;
2745         for(i = 0; i < (uint32)runtime_gomaxprocs; i++) {
2746                 p = runtime_allp[i];
2747                 if(p==nil)
2748                         continue;
2749                 pd = &pdesc[i];
2750                 s = p->status;
2751                 if(s == Psyscall) {
2752                         // Retake P from syscall if it's there for more than 1 sysmon tick (20us).
2753                         // But only if there is other work to do.
2754                         t = p->syscalltick;
2755                         if(pd->syscalltick != t) {
2756                                 pd->syscalltick = t;
2757                                 pd->syscallwhen = now;
2758                                 continue;
2759                         }
2760                         if(p->runqhead == p->runqtail &&
2761                                 runtime_atomicload(&runtime_sched.nmspinning) + runtime_atomicload(&runtime_sched.npidle) > 0)
2762                                 continue;
2763                         // Need to decrement number of idle locked M's
2764                         // (pretending that one more is running) before the CAS.
2765                         // Otherwise the M from which we retake can exit the syscall,
2766                         // increment nmidle and report deadlock.
2767                         incidlelocked(-1);
2768                         if(runtime_cas(&p->status, s, Pidle)) {
2769                                 n++;
2770                                 handoffp(p);
2771                         }
2772                         incidlelocked(1);
2773                 } else if(s == Prunning) {
2774                         // Preempt G if it's running for more than 10ms.
2775                         t = p->schedtick;
2776                         if(pd->schedtick != t) {
2777                                 pd->schedtick = t;
2778                                 pd->schedwhen = now;
2779                                 continue;
2780                         }
2781                         if(pd->schedwhen + 10*1000*1000 > now)
2782                                 continue;
2783                         // preemptone(p);
2784                 }
2785         }
2786         return n;
2787 }
2788
2789 // Tell all goroutines that they have been preempted and they should stop.
2790 // This function is purely best-effort.  It can fail to inform a goroutine if a
2791 // processor just started running it.
2792 // No locks need to be held.
2793 // Returns true if preemption request was issued to at least one goroutine.
2794 static bool
2795 preemptall(void)
2796 {
2797         return false;
2798 }
2799
2800 void
2801 runtime_schedtrace(bool detailed)
2802 {
2803         static int64 starttime;
2804         int64 now;
2805         int64 id1, id2, id3;
2806         int32 i, q, t, h, s;
2807         const char *fmt;
2808         M *mp, *lockedm;
2809         G *gp, *lockedg;
2810         P *p;
2811
2812         now = runtime_nanotime();
2813         if(starttime == 0)
2814                 starttime = now;
2815
2816         runtime_lock(&runtime_sched);
2817         runtime_printf("SCHED %Dms: gomaxprocs=%d idleprocs=%d threads=%d idlethreads=%d runqueue=%d",
2818                 (now-starttime)/1000000, runtime_gomaxprocs, runtime_sched.npidle, runtime_sched.mcount,
2819                 runtime_sched.nmidle, runtime_sched.runqsize);
2820         if(detailed) {
2821                 runtime_printf(" gcwaiting=%d nmidlelocked=%d nmspinning=%d stopwait=%d sysmonwait=%d\n",
2822                         runtime_sched.gcwaiting, runtime_sched.nmidlelocked, runtime_sched.nmspinning,
2823                         runtime_sched.stopwait, runtime_sched.sysmonwait);
2824         }
2825         // We must be careful while reading data from P's, M's and G's.
2826         // Even if we hold schedlock, most data can be changed concurrently.
2827         // E.g. (p->m ? p->m->id : -1) can crash if p->m changes from non-nil to nil.
2828         for(i = 0; i < runtime_gomaxprocs; i++) {
2829                 p = runtime_allp[i];
2830                 if(p == nil)
2831                         continue;
2832                 mp = p->m;
2833                 t = p->runqtail;
2834                 h = p->runqhead;
2835                 s = p->runqsize;
2836                 q = t - h;
2837                 if(q < 0)
2838                         q += s;
2839                 if(detailed)
2840                         runtime_printf("  P%d: status=%d schedtick=%d syscalltick=%d m=%d runqsize=%d/%d gfreecnt=%d\n",
2841                                 i, p->status, p->schedtick, p->syscalltick, mp ? mp->id : -1, q, s, p->gfreecnt);
2842                 else {
2843                         // In non-detailed mode format lengths of per-P run queues as:
2844                         // [len1 len2 len3 len4]
2845                         fmt = " %d";
2846                         if(runtime_gomaxprocs == 1)
2847                                 fmt = " [%d]\n";
2848                         else if(i == 0)
2849                                 fmt = " [%d";
2850                         else if(i == runtime_gomaxprocs-1)
2851                                 fmt = " %d]\n";
2852                         runtime_printf(fmt, q);
2853                 }
2854         }
2855         if(!detailed) {
2856                 runtime_unlock(&runtime_sched);
2857                 return;
2858         }
2859         for(mp = runtime_allm; mp; mp = mp->alllink) {
2860                 p = mp->p;
2861                 gp = mp->curg;
2862                 lockedg = mp->lockedg;
2863                 id1 = -1;
2864                 if(p)
2865                         id1 = p->id;
2866                 id2 = -1;
2867                 if(gp)
2868                         id2 = gp->goid;
2869                 id3 = -1;
2870                 if(lockedg)
2871                         id3 = lockedg->goid;
2872                 runtime_printf("  M%d: p=%D curg=%D mallocing=%d throwing=%d gcing=%d"
2873                         " locks=%d dying=%d helpgc=%d spinning=%d lockedg=%D\n",
2874                         mp->id, id1, id2,
2875                         mp->mallocing, mp->throwing, mp->gcing, mp->locks, mp->dying, mp->helpgc,
2876                         mp->spinning, id3);
2877         }
2878         for(gp = runtime_allg; gp; gp = gp->alllink) {
2879                 mp = gp->m;
2880                 lockedm = gp->lockedm;
2881                 runtime_printf("  G%D: status=%d(%s) m=%d lockedm=%d\n",
2882                         gp->goid, gp->status, gp->waitreason, mp ? mp->id : -1,
2883                         lockedm ? lockedm->id : -1);
2884         }
2885         runtime_unlock(&runtime_sched);
2886 }
2887
2888 // Put mp on midle list.
2889 // Sched must be locked.
2890 static void
2891 mput(M *mp)
2892 {
2893         mp->schedlink = runtime_sched.midle;
2894         runtime_sched.midle = mp;
2895         runtime_sched.nmidle++;
2896         checkdead();
2897 }
2898
2899 // Try to get an m from midle list.
2900 // Sched must be locked.
2901 static M*
2902 mget(void)
2903 {
2904         M *mp;
2905
2906         if((mp = runtime_sched.midle) != nil){
2907                 runtime_sched.midle = mp->schedlink;
2908                 runtime_sched.nmidle--;
2909         }
2910         return mp;
2911 }
2912
2913 // Put gp on the global runnable queue.
2914 // Sched must be locked.
2915 static void
2916 globrunqput(G *gp)
2917 {
2918         gp->schedlink = nil;
2919         if(runtime_sched.runqtail)
2920                 runtime_sched.runqtail->schedlink = gp;
2921         else
2922                 runtime_sched.runqhead = gp;
2923         runtime_sched.runqtail = gp;
2924         runtime_sched.runqsize++;
2925 }
2926
2927 // Try get a batch of G's from the global runnable queue.
2928 // Sched must be locked.
2929 static G*
2930 globrunqget(P *p, int32 max)
2931 {
2932         G *gp, *gp1;
2933         int32 n;
2934
2935         if(runtime_sched.runqsize == 0)
2936                 return nil;
2937         n = runtime_sched.runqsize/runtime_gomaxprocs+1;
2938         if(n > runtime_sched.runqsize)
2939                 n = runtime_sched.runqsize;
2940         if(max > 0 && n > max)
2941                 n = max;
2942         runtime_sched.runqsize -= n;
2943         if(runtime_sched.runqsize == 0)
2944                 runtime_sched.runqtail = nil;
2945         gp = runtime_sched.runqhead;
2946         runtime_sched.runqhead = gp->schedlink;
2947         n--;
2948         while(n--) {
2949                 gp1 = runtime_sched.runqhead;
2950                 runtime_sched.runqhead = gp1->schedlink;
2951                 runqput(p, gp1);
2952         }
2953         return gp;
2954 }
2955
2956 // Put p to on pidle list.
2957 // Sched must be locked.
2958 static void
2959 pidleput(P *p)
2960 {
2961         p->link = runtime_sched.pidle;
2962         runtime_sched.pidle = p;
2963         runtime_xadd(&runtime_sched.npidle, 1);  // TODO: fast atomic
2964 }
2965
2966 // Try get a p from pidle list.
2967 // Sched must be locked.
2968 static P*
2969 pidleget(void)
2970 {
2971         P *p;
2972
2973         p = runtime_sched.pidle;
2974         if(p) {
2975                 runtime_sched.pidle = p->link;
2976                 runtime_xadd(&runtime_sched.npidle, -1);  // TODO: fast atomic
2977         }
2978         return p;
2979 }
2980
2981 // Put g on local runnable queue.
2982 // TODO(dvyukov): consider using lock-free queue.
2983 static void
2984 runqput(P *p, G *gp)
2985 {
2986         int32 h, t, s;
2987
2988         runtime_lock(p);
2989 retry:
2990         h = p->runqhead;
2991         t = p->runqtail;
2992         s = p->runqsize;
2993         if(t == h-1 || (h == 0 && t == s-1)) {
2994                 runqgrow(p);
2995                 goto retry;
2996         }
2997         p->runq[t++] = gp;
2998         if(t == s)
2999                 t = 0;
3000         p->runqtail = t;
3001         runtime_unlock(p);
3002 }
3003
3004 // Get g from local runnable queue.
3005 static G*
3006 runqget(P *p)
3007 {
3008         G *gp;
3009         int32 t, h, s;
3010
3011         if(p->runqhead == p->runqtail)
3012                 return nil;
3013         runtime_lock(p);
3014         h = p->runqhead;
3015         t = p->runqtail;
3016         s = p->runqsize;
3017         if(t == h) {
3018                 runtime_unlock(p);
3019                 return nil;
3020         }
3021         gp = p->runq[h++];
3022         if(h == s)
3023                 h = 0;
3024         p->runqhead = h;
3025         runtime_unlock(p);
3026         return gp;
3027 }
3028
3029 // Grow local runnable queue.
3030 // TODO(dvyukov): consider using fixed-size array
3031 // and transfer excess to the global list (local queue can grow way too big).
3032 static void
3033 runqgrow(P *p)
3034 {
3035         G **q;
3036         int32 s, t, h, t2;
3037
3038         h = p->runqhead;
3039         t = p->runqtail;
3040         s = p->runqsize;
3041         t2 = 0;
3042         q = runtime_malloc(2*s*sizeof(*q));
3043         while(t != h) {
3044                 q[t2++] = p->runq[h++];
3045                 if(h == s)
3046                         h = 0;
3047         }
3048         runtime_free(p->runq);
3049         p->runq = q;
3050         p->runqhead = 0;
3051         p->runqtail = t2;
3052         p->runqsize = 2*s;
3053 }
3054
3055 // Steal half of elements from local runnable queue of p2
3056 // and put onto local runnable queue of p.
3057 // Returns one of the stolen elements (or nil if failed).
3058 static G*
3059 runqsteal(P *p, P *p2)
3060 {
3061         G *gp, *gp1;
3062         int32 t, h, s, t2, h2, s2, c, i;
3063
3064         if(p2->runqhead == p2->runqtail)
3065                 return nil;
3066         // sort locks to prevent deadlocks
3067         if(p < p2)
3068                 runtime_lock(p);
3069         runtime_lock(p2);
3070         if(p2->runqhead == p2->runqtail) {
3071                 runtime_unlock(p2);
3072                 if(p < p2)
3073                         runtime_unlock(p);
3074                 return nil;
3075         }
3076         if(p >= p2)
3077                 runtime_lock(p);
3078         // now we've locked both queues and know the victim is not empty
3079         h = p->runqhead;
3080         t = p->runqtail;
3081         s = p->runqsize;
3082         h2 = p2->runqhead;
3083         t2 = p2->runqtail;
3084         s2 = p2->runqsize;
3085         gp = p2->runq[h2++];  // return value
3086         if(h2 == s2)
3087                 h2 = 0;
3088         // steal roughly half
3089         if(t2 > h2)
3090                 c = (t2 - h2) / 2;
3091         else
3092                 c = (s2 - h2 + t2) / 2;
3093         // copy
3094         for(i = 0; i != c; i++) {
3095                 // the target queue is full?
3096                 if(t == h-1 || (h == 0 && t == s-1))
3097                         break;
3098                 // the victim queue is empty?
3099                 if(t2 == h2)
3100                         break;
3101                 gp1 = p2->runq[h2++];
3102                 if(h2 == s2)
3103                         h2 = 0;
3104                 p->runq[t++] = gp1;
3105                 if(t == s)
3106                         t = 0;
3107         }
3108         p->runqtail = t;
3109         p2->runqhead = h2;
3110         runtime_unlock(p2);
3111         runtime_unlock(p);
3112         return gp;
3113 }
3114
3115 void runtime_testSchedLocalQueue(void)
3116   __asm__("runtime.testSchedLocalQueue");
3117
3118 void
3119 runtime_testSchedLocalQueue(void)
3120 {
3121         P p;
3122         G gs[1000];
3123         int32 i, j;
3124
3125         runtime_memclr((byte*)&p, sizeof(p));
3126         p.runqsize = 1;
3127         p.runqhead = 0;
3128         p.runqtail = 0;
3129         p.runq = runtime_malloc(p.runqsize*sizeof(*p.runq));
3130
3131         for(i = 0; i < (int32)nelem(gs); i++) {
3132                 if(runqget(&p) != nil)
3133                         runtime_throw("runq is not empty initially");
3134                 for(j = 0; j < i; j++)
3135                         runqput(&p, &gs[i]);
3136                 for(j = 0; j < i; j++) {
3137                         if(runqget(&p) != &gs[i]) {
3138                                 runtime_printf("bad element at iter %d/%d\n", i, j);
3139                                 runtime_throw("bad element");
3140                         }
3141                 }
3142                 if(runqget(&p) != nil)
3143                         runtime_throw("runq is not empty afterwards");
3144         }
3145 }
3146
3147 void runtime_testSchedLocalQueueSteal(void)
3148   __asm__("runtime.testSchedLocalQueueSteal");
3149
3150 void
3151 runtime_testSchedLocalQueueSteal(void)
3152 {
3153         P p1, p2;
3154         G gs[1000], *gp;
3155         int32 i, j, s;
3156
3157         runtime_memclr((byte*)&p1, sizeof(p1));
3158         p1.runqsize = 1;
3159         p1.runqhead = 0;
3160         p1.runqtail = 0;
3161         p1.runq = runtime_malloc(p1.runqsize*sizeof(*p1.runq));
3162
3163         runtime_memclr((byte*)&p2, sizeof(p2));
3164         p2.runqsize = nelem(gs);
3165         p2.runqhead = 0;
3166         p2.runqtail = 0;
3167         p2.runq = runtime_malloc(p2.runqsize*sizeof(*p2.runq));
3168
3169         for(i = 0; i < (int32)nelem(gs); i++) {
3170                 for(j = 0; j < i; j++) {
3171                         gs[j].sig = 0;
3172                         runqput(&p1, &gs[j]);
3173                 }
3174                 gp = runqsteal(&p2, &p1);
3175                 s = 0;
3176                 if(gp) {
3177                         s++;
3178                         gp->sig++;
3179                 }
3180                 while((gp = runqget(&p2)) != nil) {
3181                         s++;
3182                         gp->sig++;
3183                 }
3184                 while((gp = runqget(&p1)) != nil)
3185                         gp->sig++;
3186                 for(j = 0; j < i; j++) {
3187                         if(gs[j].sig != 1) {
3188                                 runtime_printf("bad element %d(%d) at iter %d\n", j, gs[j].sig, i);
3189                                 runtime_throw("bad element");
3190                         }
3191                 }
3192                 if(s != i/2 && s != i/2+1) {
3193                         runtime_printf("bad steal %d, want %d or %d, iter %d\n",
3194                                 s, i/2, i/2+1, i);
3195                         runtime_throw("bad steal");
3196                 }
3197         }
3198 }
3199
3200 intgo runtime_debug_setMaxThreads(intgo)
3201      __asm__(GOSYM_PREFIX "runtime_debug.setMaxThreads");
3202
3203 intgo
3204 runtime_debug_setMaxThreads(intgo in)
3205 {
3206         intgo out;
3207
3208         runtime_lock(&runtime_sched);
3209         out = runtime_sched.maxmcount;
3210         runtime_sched.maxmcount = in;
3211         checkmcount();
3212         runtime_unlock(&runtime_sched);
3213         return out;
3214 }
3215
3216 void
3217 runtime_proc_scan(void (*addroot)(Obj))
3218 {
3219         addroot((Obj){(byte*)&runtime_sched, sizeof runtime_sched, 0});
3220 }
3221
3222 // When a function calls a closure, it passes the closure value to
3223 // __go_set_closure immediately before the function call.  When a
3224 // function uses a closure, it calls __go_get_closure immediately on
3225 // function entry.  This is a hack, but it will work on any system.
3226 // It would be better to use the static chain register when there is
3227 // one.  It is also worth considering expanding these functions
3228 // directly in the compiler.
3229
3230 void
3231 __go_set_closure(void* v)
3232 {
3233         g->closure = v;
3234 }
3235
3236 void *
3237 __go_get_closure(void)
3238 {
3239         return g->closure;
3240 }
3241
3242 // Return whether we are waiting for a GC.  This gc toolchain uses
3243 // preemption instead.
3244 bool
3245 runtime_gcwaiting(void)
3246 {
3247         return runtime_sched.gcwaiting;
3248 }