libgo/runtime/proc.c

   1 // Copyright 2009 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 #include <limits.h>
   6 #include <stdlib.h>
   7 #include <pthread.h>
   8 #include <unistd.h>
   9
  10 #include "config.h"
  11 #include "runtime.h"
  12 #include "arch.h"
  13 #include "defs.h"
  14 #include "malloc.h"
  15 #include "go-defer.h"
  16
  17 #ifdef USING_SPLIT_STACK
  18
  19 /* FIXME: These are not declared anywhere.  */
  20
  21 extern void __splitstack_getcontext(void *context[10]);
  22
  23 extern void __splitstack_setcontext(void *context[10]);
  24
  25 extern void *__splitstack_makecontext(size_t, void *context[10], size_t *);
  26
  27 extern void * __splitstack_resetcontext(void *context[10], size_t *);
  28
  29 extern void *__splitstack_find(void *, void *, size_t *, void **, void **,
  30                                void **);
  31
  32 extern void __splitstack_block_signals (int *, int *);
  33
  34 extern void __splitstack_block_signals_context (void *context[10], int *,
  35                                                 int *);
  36
  37 #endif
  38
  39 #if defined(USING_SPLIT_STACK) && defined(LINKER_SUPPORTS_SPLIT_STACK)
  40 # ifdef PTHREAD_STACK_MIN
  41 #  define StackMin PTHREAD_STACK_MIN
  42 # else
  43 #  define StackMin 8192
  44 # endif
  45 #else
  46 # define StackMin 2 * 1024 * 1024
  47 #endif
  48
  49 static void schedule(G*);
  50
  51 typedef struct Sched Sched;
  52
  53 M       runtime_m0;
  54 G       runtime_g0;     // idle goroutine for m0
  55
  56 #ifdef __rtems__
  57 #define __thread
  58 #endif
  59
  60 static __thread G *g;
  61 static __thread M *m;
  62
  63 #ifndef SETCONTEXT_CLOBBERS_TLS
  64
  65 static inline void
  66 initcontext(void)
  67 {
  68 }
  69
  70 static inline void
  71 fixcontext(ucontext_t *c __attribute__ ((unused)))
  72 {
  73 }
  74
  75 # else
  76
  77 # if defined(__x86_64__) && defined(__sun__)
  78
  79 // x86_64 Solaris 10 and 11 have a bug: setcontext switches the %fs
  80 // register to that of the thread which called getcontext.  The effect
  81 // is that the address of all __thread variables changes.  This bug
  82 // also affects pthread_self() and pthread_getspecific.  We work
  83 // around it by clobbering the context field directly to keep %fs the
  84 // same.
  85
  86 static __thread greg_t fs;
  87
  88 static inline void
  89 initcontext(void)
  90 {
  91         ucontext_t c;
  92
  93         getcontext(&c);
  94         fs = c.uc_mcontext.gregs[REG_FSBASE];
  95 }
  96
  97 static inline void
  98 fixcontext(ucontext_t* c)
  99 {
 100         c->uc_mcontext.gregs[REG_FSBASE] = fs;
 101 }
 102
 103 # else
 104
 105 #  error unknown case for SETCONTEXT_CLOBBERS_TLS
 106
 107 # endif
 108
 109 #endif
 110
 111 // We can not always refer to the TLS variables directly.  The
 112 // compiler will call tls_get_addr to get the address of the variable,
 113 // and it may hold it in a register across a call to schedule.  When
 114 // we get back from the call we may be running in a different thread,
 115 // in which case the register now points to the TLS variable for a
 116 // different thread.  We use non-inlinable functions to avoid this
 117 // when necessary.
 118
 119 G* runtime_g(void) __attribute__ ((noinline, no_split_stack));
 120
 121 G*
 122 runtime_g(void)
 123 {
 124         return g;
 125 }
 126
 127 M* runtime_m(void) __attribute__ ((noinline, no_split_stack));
 128
 129 M*
 130 runtime_m(void)
 131 {
 132         return m;
 133 }
 134
 135 int32   runtime_gcwaiting;
 136
 137 // Go scheduler
 138 //
 139 // The go scheduler's job is to match ready-to-run goroutines (`g's)
 140 // with waiting-for-work schedulers (`m's).  If there are ready g's
 141 // and no waiting m's, ready() will start a new m running in a new
 142 // OS thread, so that all ready g's can run simultaneously, up to a limit.
 143 // For now, m's never go away.
 144 //
 145 // By default, Go keeps only one kernel thread (m) running user code
 146 // at a single time; other threads may be blocked in the operating system.
 147 // Setting the environment variable $GOMAXPROCS or calling
 148 // runtime.GOMAXPROCS() will change the number of user threads
 149 // allowed to execute simultaneously.  $GOMAXPROCS is thus an
 150 // approximation of the maximum number of cores to use.
 151 //
 152 // Even a program that can run without deadlock in a single process
 153 // might use more m's if given the chance.  For example, the prime
 154 // sieve will use as many m's as there are primes (up to runtime_sched.mmax),
 155 // allowing different stages of the pipeline to execute in parallel.
 156 // We could revisit this choice, only kicking off new m's for blocking
 157 // system calls, but that would limit the amount of parallel computation
 158 // that go would try to do.
 159 //
 160 // In general, one could imagine all sorts of refinements to the
 161 // scheduler, but the goal now is just to get something working on
 162 // Linux and OS X.
 163
 164 struct Sched {
 165         Lock;
 166
 167         G *gfree;       // available g's (status == Gdead)
 168         int32 goidgen;
 169
 170         G *ghead;       // g's waiting to run
 171         G *gtail;
 172         int32 gwait;    // number of g's waiting to run
 173         int32 gcount;   // number of g's that are alive
 174         int32 grunning; // number of g's running on cpu or in syscall
 175
 176         M *mhead;       // m's waiting for work
 177         int32 mwait;    // number of m's waiting for work
 178         int32 mcount;   // number of m's that have been created
 179
 180         volatile uint32 atomic; // atomic scheduling word (see below)
 181
 182         int32 profilehz;        // cpu profiling rate
 183
 184         bool init;  // running initialization
 185         bool lockmain;  // init called runtime.LockOSThread
 186
 187         Note    stopped;        // one g can set waitstop and wait here for m's to stop
 188 };
 189
 190 // The atomic word in sched is an atomic uint32 that
 191 // holds these fields.
 192 //
 193 //      [15 bits] mcpu          number of m's executing on cpu
 194 //      [15 bits] mcpumax       max number of m's allowed on cpu
 195 //      [1 bit] waitstop        some g is waiting on stopped
 196 //      [1 bit] gwaiting        gwait != 0
 197 //
 198 // These fields are the information needed by entersyscall
 199 // and exitsyscall to decide whether to coordinate with the
 200 // scheduler.  Packing them into a single machine word lets
 201 // them use a fast path with a single atomic read/write and
 202 // no lock/unlock.  This greatly reduces contention in
 203 // syscall- or cgo-heavy multithreaded programs.
 204 //
 205 // Except for entersyscall and exitsyscall, the manipulations
 206 // to these fields only happen while holding the schedlock,
 207 // so the routines holding schedlock only need to worry about
 208 // what entersyscall and exitsyscall do, not the other routines
 209 // (which also use the schedlock).
 210 //
 211 // In particular, entersyscall and exitsyscall only read mcpumax,
 212 // waitstop, and gwaiting.  They never write them.  Thus, writes to those
 213 // fields can be done (holding schedlock) without fear of write conflicts.
 214 // There may still be logic conflicts: for example, the set of waitstop must
 215 // be conditioned on mcpu >= mcpumax or else the wait may be a
 216 // spurious sleep.  The Promela model in proc.p verifies these accesses.
 217 enum {
 218         mcpuWidth = 15,
 219         mcpuMask = (1<<mcpuWidth) - 1,
 220         mcpuShift = 0,
 221         mcpumaxShift = mcpuShift + mcpuWidth,
 222         waitstopShift = mcpumaxShift + mcpuWidth,
 223         gwaitingShift = waitstopShift+1,
 224
 225         // The max value of GOMAXPROCS is constrained
 226         // by the max value we can store in the bit fields
 227         // of the atomic word.  Reserve a few high values
 228         // so that we can detect accidental decrement
 229         // beyond zero.
 230         maxgomaxprocs = mcpuMask - 10,
 231 };
 232
 233 #define atomic_mcpu(v)          (((v)>>mcpuShift)&mcpuMask)
 234 #define atomic_mcpumax(v)       (((v)>>mcpumaxShift)&mcpuMask)
 235 #define atomic_waitstop(v)      (((v)>>waitstopShift)&1)
 236 #define atomic_gwaiting(v)      (((v)>>gwaitingShift)&1)
 237
 238 Sched runtime_sched;
 239 int32 runtime_gomaxprocs;
 240 bool runtime_singleproc;
 241
 242 static bool canaddmcpu(void);
 243
 244 // An m that is waiting for notewakeup(&m->havenextg).  This may
 245 // only be accessed while the scheduler lock is held.  This is used to
 246 // minimize the number of times we call notewakeup while the scheduler
 247 // lock is held, since the m will normally move quickly to lock the
 248 // scheduler itself, producing lock contention.
 249 static M* mwakeup;
 250
 251 // Scheduling helpers.  Sched must be locked.
 252 static void gput(G*);   // put/get on ghead/gtail
 253 static G* gget(void);
 254 static void mput(M*);   // put/get on mhead
 255 static M* mget(G*);
 256 static void gfput(G*);  // put/get on gfree
 257 static G* gfget(void);
 258 static void matchmg(void);      // match m's to g's
 259 static void readylocked(G*);    // ready, but sched is locked
 260 static void mnextg(M*, G*);
 261 static void mcommoninit(M*);
 262
 263 void
 264 setmcpumax(uint32 n)
 265 {
 266         uint32 v, w;
 267
 268         for(;;) {
 269                 v = runtime_sched.atomic;
 270                 w = v;
 271                 w &= ~(mcpuMask<<mcpumaxShift);
 272                 w |= n<<mcpumaxShift;
 273                 if(runtime_cas(&runtime_sched.atomic, v, w))
 274                         break;
 275         }
 276 }
 277
 278 // First function run by a new goroutine.  This replaces gogocall.
 279 static void
 280 kickoff(void)
 281 {
 282         void (*fn)(void*);
 283
 284         fn = (void (*)(void*))(g->entry);
 285         fn(g->param);
 286         runtime_goexit();
 287 }
 288
 289 // Switch context to a different goroutine.  This is like longjmp.
 290 static void runtime_gogo(G*) __attribute__ ((noinline));
 291 static void
 292 runtime_gogo(G* newg)
 293 {
 294 #ifdef USING_SPLIT_STACK
 295         __splitstack_setcontext(&newg->stack_context[0]);
 296 #endif
 297         g = newg;
 298         newg->fromgogo = true;
 299         fixcontext(&newg->context);
 300         setcontext(&newg->context);
 301         runtime_throw("gogo setcontext returned");
 302 }
 303
 304 // Save context and call fn passing g as a parameter.  This is like
 305 // setjmp.  Because getcontext always returns 0, unlike setjmp, we use
 306 // g->fromgogo as a code.  It will be true if we got here via
 307 // setcontext.  g == nil the first time this is called in a new m.
 308 static void runtime_mcall(void (*)(G*)) __attribute__ ((noinline));
 309 static void
 310 runtime_mcall(void (*pfn)(G*))
 311 {
 312         M *mp;
 313         G *gp;
 314 #ifndef USING_SPLIT_STACK
 315         int i;
 316 #endif
 317
 318         // Ensure that all registers are on the stack for the garbage
 319         // collector.
 320         __builtin_unwind_init();
 321
 322         mp = m;
 323         gp = g;
 324         if(gp == mp->g0)
 325                 runtime_throw("runtime: mcall called on m->g0 stack");
 326
 327         if(gp != nil) {
 328
 329 #ifdef USING_SPLIT_STACK
 330                 __splitstack_getcontext(&g->stack_context[0]);
 331 #else
 332                 gp->gcnext_sp = &i;
 333 #endif
 334                 gp->fromgogo = false;
 335                 getcontext(&gp->context);
 336
 337                 // When we return from getcontext, we may be running
 338                 // in a new thread.  That means that m and g may have
 339                 // changed.  They are global variables so we will
 340                 // reload them, but the addresses of m and g may be
 341                 // cached in our local stack frame, and those
 342                 // addresses may be wrong.  Call functions to reload
 343                 // the values for this thread.
 344                 mp = runtime_m();
 345                 gp = runtime_g();
 346         }
 347         if (gp == nil || !gp->fromgogo) {
 348 #ifdef USING_SPLIT_STACK
 349                 __splitstack_setcontext(&mp->g0->stack_context[0]);
 350 #endif
 351                 mp->g0->entry = (byte*)pfn;
 352                 mp->g0->param = gp;
 353
 354                 // It's OK to set g directly here because this case
 355                 // can not occur if we got here via a setcontext to
 356                 // the getcontext call just above.
 357                 g = mp->g0;
 358
 359                 fixcontext(&mp->g0->context);
 360                 setcontext(&mp->g0->context);
 361                 runtime_throw("runtime: mcall function returned");
 362         }
 363 }
 364
 365 // Keep trace of scavenger's goroutine for deadlock detection.
 366 static G *scvg;
 367
 368 // The bootstrap sequence is:
 369 //
 370 //      call osinit
 371 //      call schedinit
 372 //      make & queue new G
 373 //      call runtime_mstart
 374 //
 375 // The new G calls runtime_main.
 376 void
 377 runtime_schedinit(void)
 378 {
 379         int32 n;
 380         const byte *p;
 381
 382         m = &runtime_m0;
 383         g = &runtime_g0;
 384         m->g0 = g;
 385         m->curg = g;
 386         g->m = m;
 387
 388         initcontext();
 389
 390         m->nomemprof++;
 391         runtime_mallocinit();
 392         mcommoninit(m);
 393
 394         runtime_goargs();
 395         runtime_goenvs();
 396
 397         // For debugging:
 398         // Allocate internal symbol table representation now,
 399         // so that we don't need to call malloc when we crash.
 400         // runtime_findfunc(0);
 401
 402         runtime_gomaxprocs = 1;
 403         p = runtime_getenv("GOMAXPROCS");
 404         if(p != nil && (n = runtime_atoi(p)) != 0) {
 405                 if(n > maxgomaxprocs)
 406                         n = maxgomaxprocs;
 407                 runtime_gomaxprocs = n;
 408         }
 409         // wait for the main goroutine to start before taking
 410         // GOMAXPROCS into account.
 411         setmcpumax(1);
 412         runtime_singleproc = runtime_gomaxprocs == 1;
 413
 414         canaddmcpu();   // mcpu++ to account for bootstrap m
 415         m->helpgc = 1;  // flag to tell schedule() to mcpu--
 416         runtime_sched.grunning++;
 417
 418         // Can not enable GC until all roots are registered.
 419         // mstats.enablegc = 1;
 420         m->nomemprof--;
 421 }
 422
 423 extern void main_init(void) __asm__ ("__go_init_main");
 424 extern void main_main(void) __asm__ ("main.main");
 425
 426 // The main goroutine.
 427 void
 428 runtime_main(void)
 429 {
 430         // Lock the main goroutine onto this, the main OS thread,
 431         // during initialization.  Most programs won't care, but a few
 432         // do require certain calls to be made by the main thread.
 433         // Those can arrange for main.main to run in the main thread
 434         // by calling runtime.LockOSThread during initialization
 435         // to preserve the lock.
 436         runtime_LockOSThread();
 437         // From now on, newgoroutines may use non-main threads.
 438         setmcpumax(runtime_gomaxprocs);
 439         runtime_sched.init = true;
 440         scvg = __go_go(runtime_MHeap_Scavenger, nil);
 441         main_init();
 442         runtime_sched.init = false;
 443         if(!runtime_sched.lockmain)
 444                 runtime_UnlockOSThread();
 445
 446         // For gccgo we have to wait until after main is initialized
 447         // to enable GC, because initializing main registers the GC
 448         // roots.
 449         mstats.enablegc = 1;
 450
 451         // The deadlock detection has false negatives.
 452         // Let scvg start up, to eliminate the false negative
 453         // for the trivial program func main() { select{} }.
 454         runtime_gosched();
 455
 456         main_main();
 457         runtime_exit(0);
 458         for(;;)
 459                 *(int32*)0 = 0;
 460 }
 461
 462 // Lock the scheduler.
 463 static void
 464 schedlock(void)
 465 {
 466         runtime_lock(&runtime_sched);
 467 }
 468
 469 // Unlock the scheduler.
 470 static void
 471 schedunlock(void)
 472 {
 473         M *m;
 474
 475         m = mwakeup;
 476         mwakeup = nil;
 477         runtime_unlock(&runtime_sched);
 478         if(m != nil)
 479                 runtime_notewakeup(&m->havenextg);
 480 }
 481
 482 void
 483 runtime_goexit(void)
 484 {
 485         g->status = Gmoribund;
 486         runtime_gosched();
 487 }
 488
 489 void
 490 runtime_goroutineheader(G *g)
 491 {
 492         const char *status;
 493
 494         switch(g->status) {
 495         case Gidle:
 496                 status = "idle";
 497                 break;
 498         case Grunnable:
 499                 status = "runnable";
 500                 break;
 501         case Grunning:
 502                 status = "running";
 503                 break;
 504         case Gsyscall:
 505                 status = "syscall";
 506                 break;
 507         case Gwaiting:
 508                 if(g->waitreason)
 509                         status = g->waitreason;
 510                 else
 511                         status = "waiting";
 512                 break;
 513         case Gmoribund:
 514                 status = "moribund";
 515                 break;
 516         default:
 517                 status = "???";
 518                 break;
 519         }
 520         runtime_printf("goroutine %d [%s]:\n", g->goid, status);
 521 }
 522
 523 void
 524 runtime_tracebackothers(G *me)
 525 {
 526         G *g;
 527
 528         for(g = runtime_allg; g != nil; g = g->alllink) {
 529                 if(g == me || g->status == Gdead)
 530                         continue;
 531                 runtime_printf("\n");
 532                 runtime_goroutineheader(g);
 533                 // runtime_traceback(g->sched.pc, g->sched.sp, 0, g);
 534         }
 535 }
 536
 537 // Mark this g as m's idle goroutine.
 538 // This functionality might be used in environments where programs
 539 // are limited to a single thread, to simulate a select-driven
 540 // network server.  It is not exposed via the standard runtime API.
 541 void
 542 runtime_idlegoroutine(void)
 543 {
 544         if(g->idlem != nil)
 545                 runtime_throw("g is already an idle goroutine");
 546         g->idlem = m;
 547 }
 548
 549 static void
 550 mcommoninit(M *m)
 551 {
 552         m->id = runtime_sched.mcount++;
 553         m->fastrand = 0x49f6428aUL + m->id + runtime_cputicks();
 554
 555         if(m->mcache == nil)
 556                 m->mcache = runtime_allocmcache();
 557
 558         runtime_callers(1, m->createstack, nelem(m->createstack));
 559
 560         // Add to runtime_allm so garbage collector doesn't free m
 561         // when it is just in a register or thread-local storage.
 562         m->alllink = runtime_allm;
 563         // runtime_NumCgoCall() iterates over allm w/o schedlock,
 564         // so we need to publish it safely.
 565         runtime_atomicstorep(&runtime_allm, m);
 566 }
 567
 568 // Try to increment mcpu.  Report whether succeeded.
 569 static bool
 570 canaddmcpu(void)
 571 {
 572         uint32 v;
 573
 574         for(;;) {
 575                 v = runtime_sched.atomic;
 576                 if(atomic_mcpu(v) >= atomic_mcpumax(v))
 577                         return 0;
 578                 if(runtime_cas(&runtime_sched.atomic, v, v+(1<<mcpuShift)))
 579                         return 1;
 580         }
 581 }
 582
 583 // Put on `g' queue.  Sched must be locked.
 584 static void
 585 gput(G *g)
 586 {
 587         M *m;
 588
 589         // If g is wired, hand it off directly.
 590         if((m = g->lockedm) != nil && canaddmcpu()) {
 591                 mnextg(m, g);
 592                 return;
 593         }
 594
 595         // If g is the idle goroutine for an m, hand it off.
 596         if(g->idlem != nil) {
 597                 if(g->idlem->idleg != nil) {
 598                         runtime_printf("m%d idle out of sync: g%d g%d\n",
 599                                 g->idlem->id,
 600                                 g->idlem->idleg->goid, g->goid);
 601                         runtime_throw("runtime: double idle");
 602                 }
 603                 g->idlem->idleg = g;
 604                 return;
 605         }
 606
 607         g->schedlink = nil;
 608         if(runtime_sched.ghead == nil)
 609                 runtime_sched.ghead = g;
 610         else
 611                 runtime_sched.gtail->schedlink = g;
 612         runtime_sched.gtail = g;
 613
 614         // increment gwait.
 615         // if it transitions to nonzero, set atomic gwaiting bit.
 616         if(runtime_sched.gwait++ == 0)
 617                 runtime_xadd(&runtime_sched.atomic, 1<<gwaitingShift);
 618 }
 619
 620 // Report whether gget would return something.
 621 static bool
 622 haveg(void)
 623 {
 624         return runtime_sched.ghead != nil || m->idleg != nil;
 625 }
 626
 627 // Get from `g' queue.  Sched must be locked.
 628 static G*
 629 gget(void)
 630 {
 631         G *g;
 632
 633         g = runtime_sched.ghead;
 634         if(g){
 635                 runtime_sched.ghead = g->schedlink;
 636                 if(runtime_sched.ghead == nil)
 637                         runtime_sched.gtail = nil;
 638                 // decrement gwait.
 639                 // if it transitions to zero, clear atomic gwaiting bit.
 640                 if(--runtime_sched.gwait == 0)
 641                         runtime_xadd(&runtime_sched.atomic, -1<<gwaitingShift);
 642         } else if(m->idleg != nil) {
 643                 g = m->idleg;
 644                 m->idleg = nil;
 645         }
 646         return g;
 647 }
 648
 649 // Put on `m' list.  Sched must be locked.
 650 static void
 651 mput(M *m)
 652 {
 653         m->schedlink = runtime_sched.mhead;
 654         runtime_sched.mhead = m;
 655         runtime_sched.mwait++;
 656 }
 657
 658 // Get an `m' to run `g'.  Sched must be locked.
 659 static M*
 660 mget(G *g)
 661 {
 662         M *m;
 663
 664         // if g has its own m, use it.
 665         if(g && (m = g->lockedm) != nil)
 666                 return m;
 667
 668         // otherwise use general m pool.
 669         if((m = runtime_sched.mhead) != nil){
 670                 runtime_sched.mhead = m->schedlink;
 671                 runtime_sched.mwait--;
 672         }
 673         return m;
 674 }
 675
 676 // Mark g ready to run.
 677 void
 678 runtime_ready(G *g)
 679 {
 680         schedlock();
 681         readylocked(g);
 682         schedunlock();
 683 }
 684
 685 // Mark g ready to run.  Sched is already locked.
 686 // G might be running already and about to stop.
 687 // The sched lock protects g->status from changing underfoot.
 688 static void
 689 readylocked(G *g)
 690 {
 691         if(g->m){
 692                 // Running on another machine.
 693                 // Ready it when it stops.
 694                 g->readyonstop = 1;
 695                 return;
 696         }
 697
 698         // Mark runnable.
 699         if(g->status == Grunnable || g->status == Grunning) {
 700                 runtime_printf("goroutine %d has status %d\n", g->goid, g->status);
 701                 runtime_throw("bad g->status in ready");
 702         }
 703         g->status = Grunnable;
 704
 705         gput(g);
 706         matchmg();
 707 }
 708
 709 // Same as readylocked but a different symbol so that
 710 // debuggers can set a breakpoint here and catch all
 711 // new goroutines.
 712 static void
 713 newprocreadylocked(G *g)
 714 {
 715         readylocked(g);
 716 }
 717
 718 // Pass g to m for running.
 719 // Caller has already incremented mcpu.
 720 static void
 721 mnextg(M *m, G *g)
 722 {
 723         runtime_sched.grunning++;
 724         m->nextg = g;
 725         if(m->waitnextg) {
 726                 m->waitnextg = 0;
 727                 if(mwakeup != nil)
 728                         runtime_notewakeup(&mwakeup->havenextg);
 729                 mwakeup = m;
 730         }
 731 }
 732
 733 // Get the next goroutine that m should run.
 734 // Sched must be locked on entry, is unlocked on exit.
 735 // Makes sure that at most $GOMAXPROCS g's are
 736 // running on cpus (not in system calls) at any given time.
 737 static G*
 738 nextgandunlock(void)
 739 {
 740         G *gp;
 741         uint32 v;
 742
 743 top:
 744         if(atomic_mcpu(runtime_sched.atomic) >= maxgomaxprocs)
 745                 runtime_throw("negative mcpu");
 746
 747         // If there is a g waiting as m->nextg, the mcpu++
 748         // happened before it was passed to mnextg.
 749         if(m->nextg != nil) {
 750                 gp = m->nextg;
 751                 m->nextg = nil;
 752                 schedunlock();
 753                 return gp;
 754         }
 755
 756         if(m->lockedg != nil) {
 757                 // We can only run one g, and it's not available.
 758                 // Make sure some other cpu is running to handle
 759                 // the ordinary run queue.
 760                 if(runtime_sched.gwait != 0) {
 761                         matchmg();
 762                         // m->lockedg might have been on the queue.
 763                         if(m->nextg != nil) {
 764                                 gp = m->nextg;
 765                                 m->nextg = nil;
 766                                 schedunlock();
 767                                 return gp;
 768                         }
 769                 }
 770         } else {
 771                 // Look for work on global queue.
 772                 while(haveg() && canaddmcpu()) {
 773                         gp = gget();
 774                         if(gp == nil)
 775                                 runtime_throw("gget inconsistency");
 776
 777                         if(gp->lockedm) {
 778                                 mnextg(gp->lockedm, gp);
 779                                 continue;
 780                         }
 781                         runtime_sched.grunning++;
 782                         schedunlock();
 783                         return gp;
 784                 }
 785
 786                 // The while loop ended either because the g queue is empty
 787                 // or because we have maxed out our m procs running go
 788                 // code (mcpu >= mcpumax).  We need to check that
 789                 // concurrent actions by entersyscall/exitsyscall cannot
 790                 // invalidate the decision to end the loop.
 791                 //
 792                 // We hold the sched lock, so no one else is manipulating the
 793                 // g queue or changing mcpumax.  Entersyscall can decrement
 794                 // mcpu, but if does so when there is something on the g queue,
 795                 // the gwait bit will be set, so entersyscall will take the slow path
 796                 // and use the sched lock.  So it cannot invalidate our decision.
 797                 //
 798                 // Wait on global m queue.
 799                 mput(m);
 800         }
 801
 802         // Look for deadlock situation.
 803         // There is a race with the scavenger that causes false negatives:
 804         // if the scavenger is just starting, then we have
 805         //      scvg != nil && grunning == 0 && gwait == 0
 806         // and we do not detect a deadlock.  It is possible that we should
 807         // add that case to the if statement here, but it is too close to Go 1
 808         // to make such a subtle change.  Instead, we work around the
 809         // false negative in trivial programs by calling runtime.gosched
 810         // from the main goroutine just before main.main.
 811         // See runtime_main above.
 812         //
 813         // On a related note, it is also possible that the scvg == nil case is
 814         // wrong and should include gwait, but that does not happen in
 815         // standard Go programs, which all start the scavenger.
 816         //
 817         if((scvg == nil && runtime_sched.grunning == 0) ||
 818            (scvg != nil && runtime_sched.grunning == 1 && runtime_sched.gwait == 0 &&
 819             (scvg->status == Grunning || scvg->status == Gsyscall))) {
 820                 runtime_throw("all goroutines are asleep - deadlock!");
 821         }
 822
 823         m->nextg = nil;
 824         m->waitnextg = 1;
 825         runtime_noteclear(&m->havenextg);
 826
 827         // Stoptheworld is waiting for all but its cpu to go to stop.
 828         // Entersyscall might have decremented mcpu too, but if so
 829         // it will see the waitstop and take the slow path.
 830         // Exitsyscall never increments mcpu beyond mcpumax.
 831         v = runtime_atomicload(&runtime_sched.atomic);
 832         if(atomic_waitstop(v) && atomic_mcpu(v) <= atomic_mcpumax(v)) {
 833                 // set waitstop = 0 (known to be 1)
 834                 runtime_xadd(&runtime_sched.atomic, -1<<waitstopShift);
 835                 runtime_notewakeup(&runtime_sched.stopped);
 836         }
 837         schedunlock();
 838
 839         runtime_notesleep(&m->havenextg);
 840         if(m->helpgc) {
 841                 runtime_gchelper();
 842                 m->helpgc = 0;
 843                 runtime_lock(&runtime_sched);
 844                 goto top;
 845         }
 846         if((gp = m->nextg) == nil)
 847                 runtime_throw("bad m->nextg in nextgoroutine");
 848         m->nextg = nil;
 849         return gp;
 850 }
 851
 852 int32
 853 runtime_helpgc(bool *extra)
 854 {
 855         M *mp;
 856         int32 n, max;
 857
 858         // Figure out how many CPUs to use.
 859         // Limited by gomaxprocs, number of actual CPUs, and MaxGcproc.
 860         max = runtime_gomaxprocs;
 861         if(max > runtime_ncpu)
 862                 max = runtime_ncpu > 0 ? runtime_ncpu : 1;
 863         if(max > MaxGcproc)
 864                 max = MaxGcproc;
 865
 866         // We're going to use one CPU no matter what.
 867         // Figure out the max number of additional CPUs.
 868         max--;
 869
 870         runtime_lock(&runtime_sched);
 871         n = 0;
 872         while(n < max && (mp = mget(nil)) != nil) {
 873                 n++;
 874                 mp->helpgc = 1;
 875                 mp->waitnextg = 0;
 876                 runtime_notewakeup(&mp->havenextg);
 877         }
 878         runtime_unlock(&runtime_sched);
 879         if(extra)
 880                 *extra = n != max;
 881         return n;
 882 }
 883
 884 void
 885 runtime_stoptheworld(void)
 886 {
 887         uint32 v;
 888
 889         schedlock();
 890         runtime_gcwaiting = 1;
 891
 892         setmcpumax(1);
 893
 894         // while mcpu > 1
 895         for(;;) {
 896                 v = runtime_sched.atomic;
 897                 if(atomic_mcpu(v) <= 1)
 898                         break;
 899
 900                 // It would be unsafe for multiple threads to be using
 901                 // the stopped note at once, but there is only
 902                 // ever one thread doing garbage collection.
 903                 runtime_noteclear(&runtime_sched.stopped);
 904                 if(atomic_waitstop(v))
 905                         runtime_throw("invalid waitstop");
 906
 907                 // atomic { waitstop = 1 }, predicated on mcpu <= 1 check above
 908                 // still being true.
 909                 if(!runtime_cas(&runtime_sched.atomic, v, v+(1<<waitstopShift)))
 910                         continue;
 911
 912                 schedunlock();
 913                 runtime_notesleep(&runtime_sched.stopped);
 914                 schedlock();
 915         }
 916         runtime_singleproc = runtime_gomaxprocs == 1;
 917         schedunlock();
 918 }
 919
 920 void
 921 runtime_starttheworld(bool extra)
 922 {
 923         M *m;
 924
 925         schedlock();
 926         runtime_gcwaiting = 0;
 927         setmcpumax(runtime_gomaxprocs);
 928         matchmg();
 929         if(extra && canaddmcpu()) {
 930                 // Start a new m that will (we hope) be idle
 931                 // and so available to help when the next
 932                 // garbage collection happens.
 933                 // canaddmcpu above did mcpu++
 934                 // (necessary, because m will be doing various
 935                 // initialization work so is definitely running),
 936                 // but m is not running a specific goroutine,
 937                 // so set the helpgc flag as a signal to m's
 938                 // first schedule(nil) to mcpu-- and grunning--.
 939                 m = runtime_newm();
 940                 m->helpgc = 1;
 941                 runtime_sched.grunning++;
 942         }
 943         schedunlock();
 944 }
 945
 946 // Called to start an M.
 947 void*
 948 runtime_mstart(void* mp)
 949 {
 950         m = (M*)mp;
 951         g = m->g0;
 952
 953         initcontext();
 954
 955         g->entry = nil;
 956         g->param = nil;
 957
 958         // Record top of stack for use by mcall.
 959         // Once we call schedule we're never coming back,
 960         // so other calls can reuse this stack space.
 961 #ifdef USING_SPLIT_STACK
 962         __splitstack_getcontext(&g->stack_context[0]);
 963 #else
 964         g->gcinitial_sp = &mp;
 965         // Setting gcstack_size to 0 is a marker meaning that gcinitial_sp
 966         // is the top of the stack, not the bottom.
 967         g->gcstack_size = 0;
 968         g->gcnext_sp = &mp;
 969 #endif
 970         getcontext(&g->context);
 971
 972         if(g->entry != nil) {
 973                 // Got here from mcall.
 974                 void (*pfn)(G*) = (void (*)(G*))g->entry;
 975                 G* gp = (G*)g->param;
 976                 pfn(gp);
 977                 *(int*)0x21 = 0x21;
 978         }
 979         runtime_minit();
 980
 981 #ifdef USING_SPLIT_STACK
 982         {
 983           int dont_block_signals = 0;
 984           __splitstack_block_signals(&dont_block_signals, nil);
 985         }
 986 #endif
 987
 988         // Install signal handlers; after minit so that minit can
 989         // prepare the thread to be able to handle the signals.
 990         if(m == &runtime_m0)
 991                 runtime_initsig();
 992
 993         schedule(nil);
 994         return nil;
 995 }
 996
 997 typedef struct CgoThreadStart CgoThreadStart;
 998 struct CgoThreadStart
 999 {
1000         M *m;
1001         G *g;
1002         void (*fn)(void);
1003 };
1004
1005 // Kick off new m's as needed (up to mcpumax).
1006 // Sched is locked.
1007 static void
1008 matchmg(void)
1009 {
1010         G *gp;
1011         M *mp;
1012
1013         if(m->mallocing || m->gcing)
1014                 return;
1015
1016         while(haveg() && canaddmcpu()) {
1017                 gp = gget();
1018                 if(gp == nil)
1019                         runtime_throw("gget inconsistency");
1020
1021                 // Find the m that will run gp.
1022                 if((mp = mget(gp)) == nil)
1023                         mp = runtime_newm();
1024                 mnextg(mp, gp);
1025         }
1026 }
1027
1028 // Create a new m.  It will start off with a call to runtime_mstart.
1029 M*
1030 runtime_newm(void)
1031 {
1032         M *m;
1033         pthread_attr_t attr;
1034         pthread_t tid;
1035
1036         m = runtime_malloc(sizeof(M));
1037         mcommoninit(m);
1038         m->g0 = runtime_malg(-1, nil, nil);
1039
1040         if(pthread_attr_init(&attr) != 0)
1041                 runtime_throw("pthread_attr_init");
1042         if(pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED) != 0)
1043                 runtime_throw("pthread_attr_setdetachstate");
1044
1045 #ifndef PTHREAD_STACK_MIN
1046 #define PTHREAD_STACK_MIN 8192
1047 #endif
1048         if(pthread_attr_setstacksize(&attr, PTHREAD_STACK_MIN) != 0)
1049                 runtime_throw("pthread_attr_setstacksize");
1050
1051         if(pthread_create(&tid, &attr, runtime_mstart, m) != 0)
1052                 runtime_throw("pthread_create");
1053
1054         return m;
1055 }
1056
1057 // One round of scheduler: find a goroutine and run it.
1058 // The argument is the goroutine that was running before
1059 // schedule was called, or nil if this is the first call.
1060 // Never returns.
1061 static void
1062 schedule(G *gp)
1063 {
1064         int32 hz;
1065         uint32 v;
1066
1067         schedlock();
1068         if(gp != nil) {
1069                 // Just finished running gp.
1070                 gp->m = nil;
1071                 runtime_sched.grunning--;
1072
1073                 // atomic { mcpu-- }
1074                 v = runtime_xadd(&runtime_sched.atomic, -1<<mcpuShift);
1075                 if(atomic_mcpu(v) > maxgomaxprocs)
1076                         runtime_throw("negative mcpu in scheduler");
1077
1078                 switch(gp->status){
1079                 case Grunnable:
1080                 case Gdead:
1081                         // Shouldn't have been running!
1082                         runtime_throw("bad gp->status in sched");
1083                 case Grunning:
1084                         gp->status = Grunnable;
1085                         gput(gp);
1086                         break;
1087                 case Gmoribund:
1088                         gp->status = Gdead;
1089                         if(gp->lockedm) {
1090                                 gp->lockedm = nil;
1091                                 m->lockedg = nil;
1092                         }
1093                         gp->idlem = nil;
1094                         gfput(gp);
1095                         if(--runtime_sched.gcount == 0)
1096                                 runtime_exit(0);
1097                         break;
1098                 }
1099                 if(gp->readyonstop){
1100                         gp->readyonstop = 0;
1101                         readylocked(gp);
1102                 }
1103         } else if(m->helpgc) {
1104                 // Bootstrap m or new m started by starttheworld.
1105                 // atomic { mcpu-- }
1106                 v = runtime_xadd(&runtime_sched.atomic, -1<<mcpuShift);
1107                 if(atomic_mcpu(v) > maxgomaxprocs)
1108                         runtime_throw("negative mcpu in scheduler");
1109                 // Compensate for increment in starttheworld().
1110                 runtime_sched.grunning--;
1111                 m->helpgc = 0;
1112         } else if(m->nextg != nil) {
1113                 // New m started by matchmg.
1114         } else {
1115                 runtime_throw("invalid m state in scheduler");
1116         }
1117
1118         // Find (or wait for) g to run.  Unlocks runtime_sched.
1119         gp = nextgandunlock();
1120         gp->readyonstop = 0;
1121         gp->status = Grunning;
1122         m->curg = gp;
1123         gp->m = m;
1124
1125         // Check whether the profiler needs to be turned on or off.
1126         hz = runtime_sched.profilehz;
1127         if(m->profilehz != hz)
1128                 runtime_resetcpuprofiler(hz);
1129
1130         runtime_gogo(gp);
1131 }
1132
1133 // Enter scheduler.  If g->status is Grunning,
1134 // re-queues g and runs everyone else who is waiting
1135 // before running g again.  If g->status is Gmoribund,
1136 // kills off g.
1137 void
1138 runtime_gosched(void)
1139 {
1140         if(m->locks != 0)
1141                 runtime_throw("gosched holding locks");
1142         if(g == m->g0)
1143                 runtime_throw("gosched of g0");
1144         runtime_mcall(schedule);
1145 }
1146
1147 // The goroutine g is about to enter a system call.
1148 // Record that it's not using the cpu anymore.
1149 // This is called only from the go syscall library and cgocall,
1150 // not from the low-level system calls used by the runtime.
1151 //
1152 // Entersyscall cannot split the stack: the runtime_gosave must
1153 // make g->sched refer to the caller's stack segment, because
1154 // entersyscall is going to return immediately after.
1155 // It's okay to call matchmg and notewakeup even after
1156 // decrementing mcpu, because we haven't released the
1157 // sched lock yet, so the garbage collector cannot be running.
1158
1159 void runtime_entersyscall(void) __attribute__ ((no_split_stack));
1160
1161 void
1162 runtime_entersyscall(void)
1163 {
1164         uint32 v;
1165
1166         if(m->profilehz > 0)
1167                 runtime_setprof(false);
1168
1169         // Leave SP around for gc and traceback.
1170 #ifdef USING_SPLIT_STACK
1171         g->gcstack = __splitstack_find(NULL, NULL, &g->gcstack_size,
1172                                        &g->gcnext_segment, &g->gcnext_sp,
1173                                        &g->gcinitial_sp);
1174 #else
1175         g->gcnext_sp = (byte *) &v;
1176 #endif
1177
1178         // Save the registers in the g structure so that any pointers
1179         // held in registers will be seen by the garbage collector.
1180         // We could use getcontext here, but setjmp is more efficient
1181         // because it doesn't need to save the signal mask.
1182         setjmp(g->gcregs);
1183
1184         g->status = Gsyscall;
1185
1186         // Fast path.
1187         // The slow path inside the schedlock/schedunlock will get
1188         // through without stopping if it does:
1189         //      mcpu--
1190         //      gwait not true
1191         //      waitstop && mcpu <= mcpumax not true
1192         // If we can do the same with a single atomic add,
1193         // then we can skip the locks.
1194         v = runtime_xadd(&runtime_sched.atomic, -1<<mcpuShift);
1195         if(!atomic_gwaiting(v) && (!atomic_waitstop(v) || atomic_mcpu(v) > atomic_mcpumax(v)))
1196                 return;
1197
1198         schedlock();
1199         v = runtime_atomicload(&runtime_sched.atomic);
1200         if(atomic_gwaiting(v)) {
1201                 matchmg();
1202                 v = runtime_atomicload(&runtime_sched.atomic);
1203         }
1204         if(atomic_waitstop(v) && atomic_mcpu(v) <= atomic_mcpumax(v)) {
1205                 runtime_xadd(&runtime_sched.atomic, -1<<waitstopShift);
1206                 runtime_notewakeup(&runtime_sched.stopped);
1207         }
1208
1209         schedunlock();
1210 }
1211
1212 // The goroutine g exited its system call.
1213 // Arrange for it to run on a cpu again.
1214 // This is called only from the go syscall library, not
1215 // from the low-level system calls used by the runtime.
1216 void
1217 runtime_exitsyscall(void)
1218 {
1219         G *gp;
1220         uint32 v;
1221
1222         // Fast path.
1223         // If we can do the mcpu++ bookkeeping and
1224         // find that we still have mcpu <= mcpumax, then we can
1225         // start executing Go code immediately, without having to
1226         // schedlock/schedunlock.
1227         gp = g;
1228         v = runtime_xadd(&runtime_sched.atomic, (1<<mcpuShift));
1229         if(m->profilehz == runtime_sched.profilehz && atomic_mcpu(v) <= atomic_mcpumax(v)) {
1230                 // There's a cpu for us, so we can run.
1231                 gp->status = Grunning;
1232                 // Garbage collector isn't running (since we are),
1233                 // so okay to clear gcstack.
1234 #ifdef USING_SPLIT_STACK
1235                 gp->gcstack = nil;
1236 #endif
1237                 gp->gcnext_sp = nil;
1238                 runtime_memclr(gp->gcregs, sizeof gp->gcregs);
1239
1240                 if(m->profilehz > 0)
1241                         runtime_setprof(true);
1242                 return;
1243         }
1244
1245         // Tell scheduler to put g back on the run queue:
1246         // mostly equivalent to g->status = Grunning,
1247         // but keeps the garbage collector from thinking
1248         // that g is running right now, which it's not.
1249         gp->readyonstop = 1;
1250
1251         // All the cpus are taken.
1252         // The scheduler will ready g and put this m to sleep.
1253         // When the scheduler takes g away from m,
1254         // it will undo the runtime_sched.mcpu++ above.
1255         runtime_gosched();
1256
1257         // Gosched returned, so we're allowed to run now.
1258         // Delete the gcstack information that we left for
1259         // the garbage collector during the system call.
1260         // Must wait until now because until gosched returns
1261         // we don't know for sure that the garbage collector
1262         // is not running.
1263 #ifdef USING_SPLIT_STACK
1264         gp->gcstack = nil;
1265 #endif
1266         gp->gcnext_sp = nil;
1267         runtime_memclr(gp->gcregs, sizeof gp->gcregs);
1268 }
1269
1270 // Allocate a new g, with a stack big enough for stacksize bytes.
1271 G*
1272 runtime_malg(int32 stacksize, byte** ret_stack, size_t* ret_stacksize)
1273 {
1274         G *newg;
1275
1276         newg = runtime_malloc(sizeof(G));
1277         if(stacksize >= 0) {
1278 #if USING_SPLIT_STACK
1279                 int dont_block_signals = 0;
1280
1281                 *ret_stack = __splitstack_makecontext(stacksize,
1282                                                       &newg->stack_context[0],
1283                                                       ret_stacksize);
1284                 __splitstack_block_signals_context(&newg->stack_context[0],
1285                                                    &dont_block_signals, nil);
1286 #else
1287                 *ret_stack = runtime_mallocgc(stacksize, FlagNoProfiling|FlagNoGC, 0, 0);
1288                 *ret_stacksize = stacksize;
1289                 newg->gcinitial_sp = *ret_stack;
1290                 newg->gcstack_size = stacksize;
1291 #endif
1292         }
1293         return newg;
1294 }
1295
1296 /* For runtime package testing.  */
1297
1298 void runtime_testing_entersyscall(void)
1299   __asm__("libgo_runtime.runtime.entersyscall");
1300
1301 void
1302 runtime_testing_entersyscall()
1303 {
1304         runtime_entersyscall();
1305 }
1306
1307 void runtime_testing_exitsyscall(void)
1308   __asm__("libgo_runtime.runtime.exitsyscall");
1309
1310 void
1311 runtime_testing_exitsyscall()
1312 {
1313         runtime_exitsyscall();
1314 }
1315
1316 G*
1317 __go_go(void (*fn)(void*), void* arg)
1318 {
1319         byte *sp;
1320         size_t spsize;
1321         G * volatile newg;      // volatile to avoid longjmp warning
1322
1323         schedlock();
1324
1325         if((newg = gfget()) != nil){
1326 #ifdef USING_SPLIT_STACK
1327                 int dont_block_signals = 0;
1328
1329                 sp = __splitstack_resetcontext(&newg->stack_context[0],
1330                                                &spsize);
1331                 __splitstack_block_signals_context(&newg->stack_context[0],
1332                                                    &dont_block_signals, nil);
1333 #else
1334                 sp = newg->gcinitial_sp;
1335                 spsize = newg->gcstack_size;
1336                 if(spsize == 0)
1337                         runtime_throw("bad spsize in __go_go");
1338                 newg->gcnext_sp = sp;
1339 #endif
1340         } else {
1341                 newg = runtime_malg(StackMin, &sp, &spsize);
1342                 if(runtime_lastg == nil)
1343                         runtime_allg = newg;
1344                 else
1345                         runtime_lastg->alllink = newg;
1346                 runtime_lastg = newg;
1347         }
1348         newg->status = Gwaiting;
1349         newg->waitreason = "new goroutine";
1350
1351         newg->entry = (byte*)fn;
1352         newg->param = arg;
1353         newg->gopc = (uintptr)__builtin_return_address(0);
1354
1355         runtime_sched.gcount++;
1356         runtime_sched.goidgen++;
1357         newg->goid = runtime_sched.goidgen;
1358
1359         if(sp == nil)
1360                 runtime_throw("nil g->stack0");
1361
1362         getcontext(&newg->context);
1363         newg->context.uc_stack.ss_sp = sp;
1364 #ifdef MAKECONTEXT_STACK_TOP
1365         newg->context.uc_stack.ss_sp += spsize;
1366 #endif
1367         newg->context.uc_stack.ss_size = spsize;
1368         makecontext(&newg->context, kickoff, 0);
1369
1370         newprocreadylocked(newg);
1371         schedunlock();
1372
1373         return newg;
1374 //printf(" goid=%d\n", newg->goid);
1375 }
1376
1377 // Put on gfree list.  Sched must be locked.
1378 static void
1379 gfput(G *g)
1380 {
1381         g->schedlink = runtime_sched.gfree;
1382         runtime_sched.gfree = g;
1383 }
1384
1385 // Get from gfree list.  Sched must be locked.
1386 static G*
1387 gfget(void)
1388 {
1389         G *g;
1390
1391         g = runtime_sched.gfree;
1392         if(g)
1393                 runtime_sched.gfree = g->schedlink;
1394         return g;
1395 }
1396
1397 // Run all deferred functions for the current goroutine.
1398 static void
1399 rundefer(void)
1400 {
1401         Defer *d;
1402
1403         while((d = g->defer) != nil) {
1404                 void (*pfn)(void*);
1405
1406                 pfn = d->__pfn;
1407                 d->__pfn = nil;
1408                 if (pfn != nil)
1409                         (*pfn)(d->__arg);
1410                 g->defer = d->__next;
1411                 runtime_free(d);
1412         }
1413 }
1414
1415 void runtime_Goexit (void) asm ("libgo_runtime.runtime.Goexit");
1416
1417 void
1418 runtime_Goexit(void)
1419 {
1420         rundefer();
1421         runtime_goexit();
1422 }
1423
1424 void runtime_Gosched (void) asm ("libgo_runtime.runtime.Gosched");
1425
1426 void
1427 runtime_Gosched(void)
1428 {
1429         runtime_gosched();
1430 }
1431
1432 // Implementation of runtime.GOMAXPROCS.
1433 // delete when scheduler is stronger
1434 int32
1435 runtime_gomaxprocsfunc(int32 n)
1436 {
1437         int32 ret;
1438         uint32 v;
1439
1440         schedlock();
1441         ret = runtime_gomaxprocs;
1442         if(n <= 0)
1443                 n = ret;
1444         if(n > maxgomaxprocs)
1445                 n = maxgomaxprocs;
1446         runtime_gomaxprocs = n;
1447         if(runtime_gomaxprocs > 1)
1448                 runtime_singleproc = false;
1449         if(runtime_gcwaiting != 0) {
1450                 if(atomic_mcpumax(runtime_sched.atomic) != 1)
1451                         runtime_throw("invalid mcpumax during gc");
1452                 schedunlock();
1453                 return ret;
1454         }
1455
1456         setmcpumax(n);
1457
1458         // If there are now fewer allowed procs
1459         // than procs running, stop.
1460         v = runtime_atomicload(&runtime_sched.atomic);
1461         if((int32)atomic_mcpu(v) > n) {
1462                 schedunlock();
1463                 runtime_gosched();
1464                 return ret;
1465         }
1466         // handle more procs
1467         matchmg();
1468         schedunlock();
1469         return ret;
1470 }
1471
1472 void
1473 runtime_LockOSThread(void)
1474 {
1475         if(m == &runtime_m0 && runtime_sched.init) {
1476                 runtime_sched.lockmain = true;
1477                 return;
1478         }
1479         m->lockedg = g;
1480         g->lockedm = m;
1481 }
1482
1483 void
1484 runtime_UnlockOSThread(void)
1485 {
1486         if(m == &runtime_m0 && runtime_sched.init) {
1487                 runtime_sched.lockmain = false;
1488                 return;
1489         }
1490         m->lockedg = nil;
1491         g->lockedm = nil;
1492 }
1493
1494 bool
1495 runtime_lockedOSThread(void)
1496 {
1497         return g->lockedm != nil && m->lockedg != nil;
1498 }
1499
1500 // for testing of callbacks
1501
1502 _Bool runtime_golockedOSThread(void)
1503   asm("libgo_runtime.runtime.golockedOSThread");
1504
1505 _Bool
1506 runtime_golockedOSThread(void)
1507 {
1508         return runtime_lockedOSThread();
1509 }
1510
1511 // for testing of wire, unwire
1512 uint32
1513 runtime_mid()
1514 {
1515         return m->id;
1516 }
1517
1518 int32 runtime_NumGoroutine (void)
1519   __asm__ ("libgo_runtime.runtime.NumGoroutine");
1520
1521 int32
1522 runtime_NumGoroutine()
1523 {
1524         return runtime_sched.gcount;
1525 }
1526
1527 int32
1528 runtime_gcount(void)
1529 {
1530         return runtime_sched.gcount;
1531 }
1532
1533 int32
1534 runtime_mcount(void)
1535 {
1536         return runtime_sched.mcount;
1537 }
1538
1539 static struct {
1540         Lock;
1541         void (*fn)(uintptr*, int32);
1542         int32 hz;
1543         uintptr pcbuf[100];
1544 } prof;
1545
1546 // Called if we receive a SIGPROF signal.
1547 void
1548 runtime_sigprof(uint8 *pc __attribute__ ((unused)),
1549                 uint8 *sp __attribute__ ((unused)),
1550                 uint8 *lr __attribute__ ((unused)),
1551                 G *gp __attribute__ ((unused)))
1552 {
1553         // int32 n;
1554
1555         if(prof.fn == nil || prof.hz == 0)
1556                 return;
1557
1558         runtime_lock(&prof);
1559         if(prof.fn == nil) {
1560                 runtime_unlock(&prof);
1561                 return;
1562         }
1563         // n = runtime_gentraceback(pc, sp, lr, gp, 0, prof.pcbuf, nelem(prof.pcbuf));
1564         // if(n > 0)
1565         //      prof.fn(prof.pcbuf, n);
1566         runtime_unlock(&prof);
1567 }
1568
1569 // Arrange to call fn with a traceback hz times a second.
1570 void
1571 runtime_setcpuprofilerate(void (*fn)(uintptr*, int32), int32 hz)
1572 {
1573         // Force sane arguments.
1574         if(hz < 0)
1575                 hz = 0;
1576         if(hz == 0)
1577                 fn = nil;
1578         if(fn == nil)
1579                 hz = 0;
1580
1581         // Stop profiler on this cpu so that it is safe to lock prof.
1582         // if a profiling signal came in while we had prof locked,
1583         // it would deadlock.
1584         runtime_resetcpuprofiler(0);
1585
1586         runtime_lock(&prof);
1587         prof.fn = fn;
1588         prof.hz = hz;
1589         runtime_unlock(&prof);
1590         runtime_lock(&runtime_sched);
1591         runtime_sched.profilehz = hz;
1592         runtime_unlock(&runtime_sched);
1593
1594         if(hz != 0)
1595                 runtime_resetcpuprofiler(hz);
1596 }