runtime: Copy runtime_printf from other Go library.
[gcc.git] / libgo / runtime / mgc0.c
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 // Garbage collector.
6
7 #include <unistd.h>
8
9 #include "runtime.h"
10 #include "arch.h"
11 #include "malloc.h"
12
13 #ifdef USING_SPLIT_STACK
14
15 extern void * __splitstack_find (void *, void *, size_t *, void **, void **,
16 void **);
17
18 extern void * __splitstack_find_context (void *context[10], size_t *, void **,
19 void **, void **);
20
21 #endif
22
23 enum {
24 Debug = 0,
25 PtrSize = sizeof(void*),
26 DebugMark = 0, // run second pass to check mark
27
28 // Four bits per word (see #defines below).
29 wordsPerBitmapWord = sizeof(void*)*8/4,
30 bitShift = sizeof(void*)*8/4,
31 };
32
33 // Bits in per-word bitmap.
34 // #defines because enum might not be able to hold the values.
35 //
36 // Each word in the bitmap describes wordsPerBitmapWord words
37 // of heap memory. There are 4 bitmap bits dedicated to each heap word,
38 // so on a 64-bit system there is one bitmap word per 16 heap words.
39 // The bits in the word are packed together by type first, then by
40 // heap location, so each 64-bit bitmap word consists of, from top to bottom,
41 // the 16 bitSpecial bits for the corresponding heap words, then the 16 bitMarked bits,
42 // then the 16 bitNoPointers/bitBlockBoundary bits, then the 16 bitAllocated bits.
43 // This layout makes it easier to iterate over the bits of a given type.
44 //
45 // The bitmap starts at mheap.arena_start and extends *backward* from
46 // there. On a 64-bit system the off'th word in the arena is tracked by
47 // the off/16+1'th word before mheap.arena_start. (On a 32-bit system,
48 // the only difference is that the divisor is 8.)
49 //
50 // To pull out the bits corresponding to a given pointer p, we use:
51 //
52 // off = p - (uintptr*)mheap.arena_start; // word offset
53 // b = (uintptr*)mheap.arena_start - off/wordsPerBitmapWord - 1;
54 // shift = off % wordsPerBitmapWord
55 // bits = *b >> shift;
56 // /* then test bits & bitAllocated, bits & bitMarked, etc. */
57 //
58 #define bitAllocated ((uintptr)1<<(bitShift*0))
59 #define bitNoPointers ((uintptr)1<<(bitShift*1)) /* when bitAllocated is set */
60 #define bitMarked ((uintptr)1<<(bitShift*2)) /* when bitAllocated is set */
61 #define bitSpecial ((uintptr)1<<(bitShift*3)) /* when bitAllocated is set - has finalizer or being profiled */
62 #define bitBlockBoundary ((uintptr)1<<(bitShift*1)) /* when bitAllocated is NOT set */
63
64 #define bitMask (bitBlockBoundary | bitAllocated | bitMarked | bitSpecial)
65
66 // Holding worldsema grants an M the right to try to stop the world.
67 // The procedure is:
68 //
69 // runtime_semacquire(&runtime_worldsema);
70 // m->gcing = 1;
71 // runtime_stoptheworld();
72 //
73 // ... do stuff ...
74 //
75 // m->gcing = 0;
76 // runtime_semrelease(&runtime_worldsema);
77 // runtime_starttheworld();
78 //
79 uint32 runtime_worldsema = 1;
80
81 // TODO: Make these per-M.
82 static uint64 nhandoff;
83
84 static int32 gctrace;
85
86 typedef struct Workbuf Workbuf;
87 struct Workbuf
88 {
89 Workbuf *next;
90 uintptr nobj;
91 byte *obj[512-2];
92 };
93
94 typedef struct Finalizer Finalizer;
95 struct Finalizer
96 {
97 void (*fn)(void*);
98 void *arg;
99 const struct __go_func_type *ft;
100 };
101
102 typedef struct FinBlock FinBlock;
103 struct FinBlock
104 {
105 FinBlock *alllink;
106 FinBlock *next;
107 int32 cnt;
108 int32 cap;
109 Finalizer fin[1];
110 };
111
112 static G *fing;
113 static FinBlock *finq; // list of finalizers that are to be executed
114 static FinBlock *finc; // cache of free blocks
115 static FinBlock *allfin; // list of all blocks
116 static Lock finlock;
117 static int32 fingwait;
118
119 static void runfinq(void*);
120 static Workbuf* getempty(Workbuf*);
121 static Workbuf* getfull(Workbuf*);
122 static void putempty(Workbuf*);
123 static Workbuf* handoff(Workbuf*);
124
125 static struct {
126 Lock fmu;
127 Workbuf *full;
128 Lock emu;
129 Workbuf *empty;
130 uint32 nproc;
131 volatile uint32 nwait;
132 volatile uint32 ndone;
133 Note alldone;
134 Lock markgate;
135 Lock sweepgate;
136 MSpan *spans;
137
138 Lock;
139 byte *chunk;
140 uintptr nchunk;
141 } work;
142
143 // scanblock scans a block of n bytes starting at pointer b for references
144 // to other objects, scanning any it finds recursively until there are no
145 // unscanned objects left. Instead of using an explicit recursion, it keeps
146 // a work list in the Workbuf* structures and loops in the main function
147 // body. Keeping an explicit work list is easier on the stack allocator and
148 // more efficient.
149 static void
150 scanblock(byte *b, int64 n)
151 {
152 byte *obj, *arena_start, *arena_used, *p;
153 void **vp;
154 uintptr size, *bitp, bits, shift, i, j, x, xbits, off, nobj, nproc;
155 MSpan *s;
156 PageID k;
157 void **wp;
158 Workbuf *wbuf;
159 bool keepworking;
160
161 if((int64)(uintptr)n != n || n < 0) {
162 runtime_printf("scanblock %p %D\n", b, n);
163 runtime_throw("scanblock");
164 }
165
166 // Memory arena parameters.
167 arena_start = runtime_mheap.arena_start;
168 arena_used = runtime_mheap.arena_used;
169 nproc = work.nproc;
170
171 wbuf = nil; // current work buffer
172 wp = nil; // storage for next queued pointer (write pointer)
173 nobj = 0; // number of queued objects
174
175 // Scanblock helpers pass b==nil.
176 // The main proc needs to return to make more
177 // calls to scanblock. But if work.nproc==1 then
178 // might as well process blocks as soon as we
179 // have them.
180 keepworking = b == nil || work.nproc == 1;
181
182 // Align b to a word boundary.
183 off = (uintptr)b & (PtrSize-1);
184 if(off != 0) {
185 b += PtrSize - off;
186 n -= PtrSize - off;
187 }
188
189 for(;;) {
190 // Each iteration scans the block b of length n, queueing pointers in
191 // the work buffer.
192 if(Debug > 1)
193 runtime_printf("scanblock %p %D\n", b, n);
194
195 vp = (void**)b;
196 n >>= (2+PtrSize/8); /* n /= PtrSize (4 or 8) */
197 for(i=0; i<(uintptr)n; i++) {
198 obj = (byte*)vp[i];
199
200 // Words outside the arena cannot be pointers.
201 if((byte*)obj < arena_start || (byte*)obj >= arena_used)
202 continue;
203
204 // obj may be a pointer to a live object.
205 // Try to find the beginning of the object.
206
207 // Round down to word boundary.
208 obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
209
210 // Find bits for this word.
211 off = (uintptr*)obj - (uintptr*)arena_start;
212 bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
213 shift = off % wordsPerBitmapWord;
214 xbits = *bitp;
215 bits = xbits >> shift;
216
217 // Pointing at the beginning of a block?
218 if((bits & (bitAllocated|bitBlockBoundary)) != 0)
219 goto found;
220
221 // Pointing just past the beginning?
222 // Scan backward a little to find a block boundary.
223 for(j=shift; j-->0; ) {
224 if(((xbits>>j) & (bitAllocated|bitBlockBoundary)) != 0) {
225 obj = (byte*)obj - (shift-j)*PtrSize;
226 shift = j;
227 bits = xbits>>shift;
228 goto found;
229 }
230 }
231
232 // Otherwise consult span table to find beginning.
233 // (Manually inlined copy of MHeap_LookupMaybe.)
234 k = (uintptr)obj>>PageShift;
235 x = k;
236 if(sizeof(void*) == 8)
237 x -= (uintptr)arena_start>>PageShift;
238 s = runtime_mheap.map[x];
239 if(s == nil || k < s->start || k - s->start >= s->npages || s->state != MSpanInUse)
240 continue;
241 p = (byte*)((uintptr)s->start<<PageShift);
242 if(s->sizeclass == 0) {
243 obj = p;
244 } else {
245 if((byte*)obj >= (byte*)s->limit)
246 continue;
247 size = runtime_class_to_size[s->sizeclass];
248 int32 i = ((byte*)obj - p)/size;
249 obj = p+i*size;
250 }
251
252 // Now that we know the object header, reload bits.
253 off = (uintptr*)obj - (uintptr*)arena_start;
254 bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
255 shift = off % wordsPerBitmapWord;
256 xbits = *bitp;
257 bits = xbits >> shift;
258
259 found:
260 // Now we have bits, bitp, and shift correct for
261 // obj pointing at the base of the object.
262 // Only care about allocated and not marked.
263 if((bits & (bitAllocated|bitMarked)) != bitAllocated)
264 continue;
265 if(nproc == 1)
266 *bitp |= bitMarked<<shift;
267 else {
268 for(;;) {
269 x = *bitp;
270 if(x & (bitMarked<<shift))
271 goto continue_obj;
272 if(runtime_casp((void**)bitp, (void*)x, (void*)(x|(bitMarked<<shift))))
273 break;
274 }
275 }
276
277 // If object has no pointers, don't need to scan further.
278 if((bits & bitNoPointers) != 0)
279 continue;
280
281 // If another proc wants a pointer, give it some.
282 if(nobj > 4 && work.nwait > 0 && work.full == nil) {
283 wbuf->nobj = nobj;
284 wbuf = handoff(wbuf);
285 nobj = wbuf->nobj;
286 wp = (void**)(wbuf->obj + nobj);
287 }
288
289 // If buffer is full, get a new one.
290 if(wbuf == nil || nobj >= nelem(wbuf->obj)) {
291 if(wbuf != nil)
292 wbuf->nobj = nobj;
293 wbuf = getempty(wbuf);
294 wp = (void**)(wbuf->obj);
295 nobj = 0;
296 }
297 *wp++ = obj;
298 nobj++;
299 continue_obj:;
300 }
301
302 // Done scanning [b, b+n). Prepare for the next iteration of
303 // the loop by setting b and n to the parameters for the next block.
304
305 // Fetch b from the work buffer.
306 if(nobj == 0) {
307 if(!keepworking) {
308 putempty(wbuf);
309 return;
310 }
311 // Emptied our buffer: refill.
312 wbuf = getfull(wbuf);
313 if(wbuf == nil)
314 return;
315 nobj = wbuf->nobj;
316 wp = (void**)(wbuf->obj + wbuf->nobj);
317 }
318 b = *--wp;
319 nobj--;
320
321 // Ask span about size class.
322 // (Manually inlined copy of MHeap_Lookup.)
323 x = (uintptr)b>>PageShift;
324 if(sizeof(void*) == 8)
325 x -= (uintptr)arena_start>>PageShift;
326 s = runtime_mheap.map[x];
327 if(s->sizeclass == 0)
328 n = s->npages<<PageShift;
329 else
330 n = runtime_class_to_size[s->sizeclass];
331 }
332 }
333
334 // debug_scanblock is the debug copy of scanblock.
335 // it is simpler, slower, single-threaded, recursive,
336 // and uses bitSpecial as the mark bit.
337 static void
338 debug_scanblock(byte *b, int64 n)
339 {
340 byte *obj, *p;
341 void **vp;
342 uintptr size, *bitp, bits, shift, i, xbits, off;
343 MSpan *s;
344
345 if(!DebugMark)
346 runtime_throw("debug_scanblock without DebugMark");
347
348 if((int64)(uintptr)n != n || n < 0) {
349 runtime_printf("debug_scanblock %p %D\n", b, n);
350 runtime_throw("debug_scanblock");
351 }
352
353 // Align b to a word boundary.
354 off = (uintptr)b & (PtrSize-1);
355 if(off != 0) {
356 b += PtrSize - off;
357 n -= PtrSize - off;
358 }
359
360 vp = (void**)b;
361 n /= PtrSize;
362 for(i=0; i<(uintptr)n; i++) {
363 obj = (byte*)vp[i];
364
365 // Words outside the arena cannot be pointers.
366 if((byte*)obj < runtime_mheap.arena_start || (byte*)obj >= runtime_mheap.arena_used)
367 continue;
368
369 // Round down to word boundary.
370 obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
371
372 // Consult span table to find beginning.
373 s = runtime_MHeap_LookupMaybe(&runtime_mheap, obj);
374 if(s == nil)
375 continue;
376
377
378 p = (byte*)((uintptr)s->start<<PageShift);
379 if(s->sizeclass == 0) {
380 obj = p;
381 size = (uintptr)s->npages<<PageShift;
382 } else {
383 if((byte*)obj >= (byte*)s->limit)
384 continue;
385 size = runtime_class_to_size[s->sizeclass];
386 int32 i = ((byte*)obj - p)/size;
387 obj = p+i*size;
388 }
389
390 // Now that we know the object header, reload bits.
391 off = (uintptr*)obj - (uintptr*)runtime_mheap.arena_start;
392 bitp = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
393 shift = off % wordsPerBitmapWord;
394 xbits = *bitp;
395 bits = xbits >> shift;
396
397 // Now we have bits, bitp, and shift correct for
398 // obj pointing at the base of the object.
399 // If not allocated or already marked, done.
400 if((bits & bitAllocated) == 0 || (bits & bitSpecial) != 0) // NOTE: bitSpecial not bitMarked
401 continue;
402 *bitp |= bitSpecial<<shift;
403 if(!(bits & bitMarked))
404 runtime_printf("found unmarked block %p in %p\n", obj, vp+i);
405
406 // If object has no pointers, don't need to scan further.
407 if((bits & bitNoPointers) != 0)
408 continue;
409
410 debug_scanblock(obj, size);
411 }
412 }
413
414 // Get an empty work buffer off the work.empty list,
415 // allocating new buffers as needed.
416 static Workbuf*
417 getempty(Workbuf *b)
418 {
419 if(work.nproc == 1) {
420 // Put b on full list.
421 if(b != nil) {
422 b->next = work.full;
423 work.full = b;
424 }
425 // Grab from empty list if possible.
426 b = work.empty;
427 if(b != nil) {
428 work.empty = b->next;
429 goto haveb;
430 }
431 } else {
432 // Put b on full list.
433 if(b != nil) {
434 runtime_lock(&work.fmu);
435 b->next = work.full;
436 work.full = b;
437 runtime_unlock(&work.fmu);
438 }
439 // Grab from empty list if possible.
440 runtime_lock(&work.emu);
441 b = work.empty;
442 if(b != nil)
443 work.empty = b->next;
444 runtime_unlock(&work.emu);
445 if(b != nil)
446 goto haveb;
447 }
448
449 // Need to allocate.
450 runtime_lock(&work);
451 if(work.nchunk < sizeof *b) {
452 work.nchunk = 1<<20;
453 work.chunk = runtime_SysAlloc(work.nchunk);
454 }
455 b = (Workbuf*)work.chunk;
456 work.chunk += sizeof *b;
457 work.nchunk -= sizeof *b;
458 runtime_unlock(&work);
459
460 haveb:
461 b->nobj = 0;
462 return b;
463 }
464
465 static void
466 putempty(Workbuf *b)
467 {
468 if(b == nil)
469 return;
470
471 if(work.nproc == 1) {
472 b->next = work.empty;
473 work.empty = b;
474 return;
475 }
476
477 runtime_lock(&work.emu);
478 b->next = work.empty;
479 work.empty = b;
480 runtime_unlock(&work.emu);
481 }
482
483 // Get a full work buffer off the work.full list, or return nil.
484 static Workbuf*
485 getfull(Workbuf *b)
486 {
487 int32 i;
488 Workbuf *b1;
489
490 if(work.nproc == 1) {
491 // Put b on empty list.
492 if(b != nil) {
493 b->next = work.empty;
494 work.empty = b;
495 }
496 // Grab from full list if possible.
497 // Since work.nproc==1, no one else is
498 // going to give us work.
499 b = work.full;
500 if(b != nil)
501 work.full = b->next;
502 return b;
503 }
504
505 putempty(b);
506
507 // Grab buffer from full list if possible.
508 for(;;) {
509 b1 = work.full;
510 if(b1 == nil)
511 break;
512 runtime_lock(&work.fmu);
513 if(work.full != nil) {
514 b1 = work.full;
515 work.full = b1->next;
516 runtime_unlock(&work.fmu);
517 return b1;
518 }
519 runtime_unlock(&work.fmu);
520 }
521
522 runtime_xadd(&work.nwait, +1);
523 for(i=0;; i++) {
524 b1 = work.full;
525 if(b1 != nil) {
526 runtime_lock(&work.fmu);
527 if(work.full != nil) {
528 runtime_xadd(&work.nwait, -1);
529 b1 = work.full;
530 work.full = b1->next;
531 runtime_unlock(&work.fmu);
532 return b1;
533 }
534 runtime_unlock(&work.fmu);
535 continue;
536 }
537 if(work.nwait == work.nproc)
538 return nil;
539 if(i < 10)
540 runtime_procyield(20);
541 else if(i < 20)
542 runtime_osyield();
543 else
544 runtime_usleep(100);
545 }
546 }
547
548 static Workbuf*
549 handoff(Workbuf *b)
550 {
551 int32 n;
552 Workbuf *b1;
553
554 // Make new buffer with half of b's pointers.
555 b1 = getempty(nil);
556 n = b->nobj/2;
557 b->nobj -= n;
558 b1->nobj = n;
559 runtime_memmove(b1->obj, b->obj+b->nobj, n*sizeof b1->obj[0]);
560 nhandoff += n;
561
562 // Put b on full list - let first half of b get stolen.
563 runtime_lock(&work.fmu);
564 b->next = work.full;
565 work.full = b;
566 runtime_unlock(&work.fmu);
567
568 return b1;
569 }
570
571 // Scanstack calls scanblock on each of gp's stack segments.
572 static void
573 scanstack(void (*scanblock)(byte*, int64), G *gp)
574 {
575 #ifdef USING_SPLIT_STACK
576 M *mp;
577 void* sp;
578 size_t spsize;
579 void* next_segment;
580 void* next_sp;
581 void* initial_sp;
582
583 if(gp == runtime_g()) {
584 // Scanning our own stack.
585 sp = __splitstack_find(nil, nil, &spsize, &next_segment,
586 &next_sp, &initial_sp);
587 } else if((mp = gp->m) != nil && mp->helpgc) {
588 // gchelper's stack is in active use and has no interesting pointers.
589 return;
590 } else {
591 // Scanning another goroutine's stack.
592 // The goroutine is usually asleep (the world is stopped).
593
594 // The exception is that if the goroutine is about to enter or might
595 // have just exited a system call, it may be executing code such
596 // as schedlock and may have needed to start a new stack segment.
597 // Use the stack segment and stack pointer at the time of
598 // the system call instead, since that won't change underfoot.
599 if(gp->gcstack != nil) {
600 sp = gp->gcstack;
601 spsize = gp->gcstack_size;
602 next_segment = gp->gcnext_segment;
603 next_sp = gp->gcnext_sp;
604 initial_sp = gp->gcinitial_sp;
605 } else {
606 sp = __splitstack_find_context(&gp->stack_context[0],
607 &spsize, &next_segment,
608 &next_sp, &initial_sp);
609 }
610 }
611 if(sp != nil) {
612 scanblock(sp, spsize);
613 while((sp = __splitstack_find(next_segment, next_sp,
614 &spsize, &next_segment,
615 &next_sp, &initial_sp)) != nil)
616 scanblock(sp, spsize);
617 }
618 #else
619 M *mp;
620 byte* bottom;
621 byte* top;
622
623 if(gp == runtime_g()) {
624 // Scanning our own stack.
625 bottom = (byte*)&gp;
626 } else if((mp = gp->m) != nil && mp->helpgc) {
627 // gchelper's stack is in active use and has no interesting pointers.
628 return;
629 } else {
630 // Scanning another goroutine's stack.
631 // The goroutine is usually asleep (the world is stopped).
632 bottom = (byte*)gp->gcnext_sp;
633 if(bottom == nil)
634 return;
635 }
636 top = (byte*)gp->gcinitial_sp + gp->gcstack_size;
637 if(top > bottom)
638 scanblock(bottom, top - bottom);
639 else
640 scanblock(top, bottom - top);
641 #endif
642 }
643
644 // Markfin calls scanblock on the blocks that have finalizers:
645 // the things pointed at cannot be freed until the finalizers have run.
646 static void
647 markfin(void *v)
648 {
649 uintptr size;
650
651 size = 0;
652 if(!runtime_mlookup(v, (byte**)&v, &size, nil) || !runtime_blockspecial(v))
653 runtime_throw("mark - finalizer inconsistency");
654
655 // do not mark the finalizer block itself. just mark the things it points at.
656 scanblock(v, size);
657 }
658
659 static struct root_list* roots;
660
661 void
662 __go_register_gc_roots (struct root_list* r)
663 {
664 // FIXME: This needs locking if multiple goroutines can call
665 // dlopen simultaneously.
666 r->next = roots;
667 roots = r;
668 }
669
670 static void
671 debug_markfin(void *v)
672 {
673 uintptr size;
674
675 if(!runtime_mlookup(v, (byte**)&v, &size, nil))
676 runtime_throw("debug_mark - finalizer inconsistency");
677 debug_scanblock(v, size);
678 }
679
680 // Mark
681 static void
682 mark(void (*scan)(byte*, int64))
683 {
684 struct root_list *pl;
685 G *gp;
686 FinBlock *fb;
687
688 // mark data+bss.
689 for(pl = roots; pl != nil; pl = pl->next) {
690 struct root* pr = &pl->roots[0];
691 while(1) {
692 void *decl = pr->decl;
693 if(decl == nil)
694 break;
695 scanblock(decl, pr->size);
696 pr++;
697 }
698 }
699
700 scan((byte*)&runtime_m0, sizeof runtime_m0);
701 scan((byte*)&runtime_g0, sizeof runtime_g0);
702 scan((byte*)&runtime_allg, sizeof runtime_allg);
703 scan((byte*)&runtime_allm, sizeof runtime_allm);
704 runtime_MProf_Mark(scan);
705 runtime_time_scan(scan);
706
707 // mark stacks
708 for(gp=runtime_allg; gp!=nil; gp=gp->alllink) {
709 switch(gp->status){
710 default:
711 runtime_printf("unexpected G.status %d\n", gp->status);
712 runtime_throw("mark - bad status");
713 case Gdead:
714 break;
715 case Grunning:
716 if(gp != runtime_g())
717 runtime_throw("mark - world not stopped");
718 scanstack(scan, gp);
719 break;
720 case Grunnable:
721 case Gsyscall:
722 case Gwaiting:
723 scanstack(scan, gp);
724 break;
725 }
726 }
727
728 // mark things pointed at by objects with finalizers
729 if(scan == debug_scanblock)
730 runtime_walkfintab(debug_markfin, scan);
731 else
732 runtime_walkfintab(markfin, scan);
733
734 for(fb=allfin; fb; fb=fb->alllink)
735 scanblock((byte*)fb->fin, fb->cnt*sizeof(fb->fin[0]));
736
737 // in multiproc mode, join in the queued work.
738 scan(nil, 0);
739 }
740
741 static bool
742 handlespecial(byte *p, uintptr size)
743 {
744 void (*fn)(void*);
745 const struct __go_func_type *ft;
746 FinBlock *block;
747 Finalizer *f;
748
749 if(!runtime_getfinalizer(p, true, &fn, &ft)) {
750 runtime_setblockspecial(p, false);
751 runtime_MProf_Free(p, size);
752 return false;
753 }
754
755 runtime_lock(&finlock);
756 if(finq == nil || finq->cnt == finq->cap) {
757 if(finc == nil) {
758 finc = runtime_SysAlloc(PageSize);
759 finc->cap = (PageSize - sizeof(FinBlock)) / sizeof(Finalizer) + 1;
760 finc->alllink = allfin;
761 allfin = finc;
762 }
763 block = finc;
764 finc = block->next;
765 block->next = finq;
766 finq = block;
767 }
768 f = &finq->fin[finq->cnt];
769 finq->cnt++;
770 f->fn = fn;
771 f->ft = ft;
772 f->arg = p;
773 runtime_unlock(&finlock);
774 return true;
775 }
776
777 // Sweep frees or collects finalizers for blocks not marked in the mark phase.
778 // It clears the mark bits in preparation for the next GC round.
779 static void
780 sweep(void)
781 {
782 M *m;
783 MSpan *s;
784 int32 cl, n, npages;
785 uintptr size;
786 byte *p;
787 MCache *c;
788 byte *arena_start;
789 int64 now;
790
791 m = runtime_m();
792 arena_start = runtime_mheap.arena_start;
793 now = runtime_nanotime();
794
795 for(;;) {
796 s = work.spans;
797 if(s == nil)
798 break;
799 if(!runtime_casp(&work.spans, s, s->allnext))
800 continue;
801
802 // Stamp newly unused spans. The scavenger will use that
803 // info to potentially give back some pages to the OS.
804 if(s->state == MSpanFree && s->unusedsince == 0)
805 s->unusedsince = now;
806
807 if(s->state != MSpanInUse)
808 continue;
809
810 p = (byte*)(s->start << PageShift);
811 cl = s->sizeclass;
812 if(cl == 0) {
813 size = s->npages<<PageShift;
814 n = 1;
815 } else {
816 // Chunk full of small blocks.
817 size = runtime_class_to_size[cl];
818 npages = runtime_class_to_allocnpages[cl];
819 n = (npages << PageShift) / size;
820 }
821
822 // Sweep through n objects of given size starting at p.
823 // This thread owns the span now, so it can manipulate
824 // the block bitmap without atomic operations.
825 for(; n > 0; n--, p += size) {
826 uintptr off, *bitp, shift, bits;
827
828 off = (uintptr*)p - (uintptr*)arena_start;
829 bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
830 shift = off % wordsPerBitmapWord;
831 bits = *bitp>>shift;
832
833 if((bits & bitAllocated) == 0)
834 continue;
835
836 if((bits & bitMarked) != 0) {
837 if(DebugMark) {
838 if(!(bits & bitSpecial))
839 runtime_printf("found spurious mark on %p\n", p);
840 *bitp &= ~(bitSpecial<<shift);
841 }
842 *bitp &= ~(bitMarked<<shift);
843 continue;
844 }
845
846 // Special means it has a finalizer or is being profiled.
847 // In DebugMark mode, the bit has been coopted so
848 // we have to assume all blocks are special.
849 if(DebugMark || (bits & bitSpecial) != 0) {
850 if(handlespecial(p, size))
851 continue;
852 }
853
854 // Mark freed; restore block boundary bit.
855 *bitp = (*bitp & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
856
857 c = m->mcache;
858 if(s->sizeclass == 0) {
859 // Free large span.
860 runtime_unmarkspan(p, 1<<PageShift);
861 *(uintptr*)p = 1; // needs zeroing
862 runtime_MHeap_Free(&runtime_mheap, s, 1);
863 } else {
864 // Free small object.
865 if(size > sizeof(uintptr))
866 ((uintptr*)p)[1] = 1; // mark as "needs to be zeroed"
867 c->local_by_size[s->sizeclass].nfree++;
868 runtime_MCache_Free(c, p, s->sizeclass, size);
869 }
870 c->local_alloc -= size;
871 c->local_nfree++;
872 }
873 }
874 }
875
876 void
877 runtime_gchelper(void)
878 {
879 // Wait until main proc is ready for mark help.
880 runtime_lock(&work.markgate);
881 runtime_unlock(&work.markgate);
882 scanblock(nil, 0);
883
884 // Wait until main proc is ready for sweep help.
885 runtime_lock(&work.sweepgate);
886 runtime_unlock(&work.sweepgate);
887 sweep();
888
889 if(runtime_xadd(&work.ndone, +1) == work.nproc-1)
890 runtime_notewakeup(&work.alldone);
891 }
892
893 // Initialized from $GOGC. GOGC=off means no gc.
894 //
895 // Next gc is after we've allocated an extra amount of
896 // memory proportional to the amount already in use.
897 // If gcpercent=100 and we're using 4M, we'll gc again
898 // when we get to 8M. This keeps the gc cost in linear
899 // proportion to the allocation cost. Adjusting gcpercent
900 // just changes the linear constant (and also the amount of
901 // extra memory used).
902 static int32 gcpercent = -2;
903
904 static void
905 stealcache(void)
906 {
907 M *m;
908
909 for(m=runtime_allm; m; m=m->alllink)
910 runtime_MCache_ReleaseAll(m->mcache);
911 }
912
913 static void
914 cachestats(void)
915 {
916 M *m;
917 MCache *c;
918 uint32 i;
919 uint64 stacks_inuse;
920 uint64 stacks_sys;
921
922 stacks_inuse = 0;
923 stacks_sys = runtime_stacks_sys;
924 for(m=runtime_allm; m; m=m->alllink) {
925 runtime_purgecachedstats(m);
926 // stacks_inuse += m->stackalloc->inuse;
927 // stacks_sys += m->stackalloc->sys;
928 c = m->mcache;
929 for(i=0; i<nelem(c->local_by_size); i++) {
930 mstats.by_size[i].nmalloc += c->local_by_size[i].nmalloc;
931 c->local_by_size[i].nmalloc = 0;
932 mstats.by_size[i].nfree += c->local_by_size[i].nfree;
933 c->local_by_size[i].nfree = 0;
934 }
935 }
936 mstats.stacks_inuse = stacks_inuse;
937 mstats.stacks_sys = stacks_sys;
938 }
939
940 void
941 runtime_gc(int32 force)
942 {
943 M *m;
944 int64 t0, t1, t2, t3;
945 uint64 heap0, heap1, obj0, obj1;
946 const byte *p;
947 bool extra;
948
949 // Make sure all registers are saved on stack so that
950 // scanstack sees them.
951 __builtin_unwind_init();
952
953 // The gc is turned off (via enablegc) until
954 // the bootstrap has completed.
955 // Also, malloc gets called in the guts
956 // of a number of libraries that might be
957 // holding locks. To avoid priority inversion
958 // problems, don't bother trying to run gc
959 // while holding a lock. The next mallocgc
960 // without a lock will do the gc instead.
961 m = runtime_m();
962 if(!mstats.enablegc || m->locks > 0 || runtime_panicking)
963 return;
964
965 if(gcpercent == -2) { // first time through
966 p = runtime_getenv("GOGC");
967 if(p == nil || p[0] == '\0')
968 gcpercent = 100;
969 else if(runtime_strcmp((const char*)p, "off") == 0)
970 gcpercent = -1;
971 else
972 gcpercent = runtime_atoi(p);
973
974 p = runtime_getenv("GOGCTRACE");
975 if(p != nil)
976 gctrace = runtime_atoi(p);
977 }
978 if(gcpercent < 0)
979 return;
980
981 runtime_semacquire(&runtime_worldsema);
982 if(!force && mstats.heap_alloc < mstats.next_gc) {
983 runtime_semrelease(&runtime_worldsema);
984 return;
985 }
986
987 t0 = runtime_nanotime();
988 nhandoff = 0;
989
990 m->gcing = 1;
991 runtime_stoptheworld();
992
993 cachestats();
994 heap0 = mstats.heap_alloc;
995 obj0 = mstats.nmalloc - mstats.nfree;
996
997 runtime_lock(&work.markgate);
998 runtime_lock(&work.sweepgate);
999
1000 extra = false;
1001 work.nproc = 1;
1002 if(runtime_gomaxprocs > 1 && runtime_ncpu > 1) {
1003 runtime_noteclear(&work.alldone);
1004 work.nproc += runtime_helpgc(&extra);
1005 }
1006 work.nwait = 0;
1007 work.ndone = 0;
1008
1009 runtime_unlock(&work.markgate); // let the helpers in
1010 mark(scanblock);
1011 if(DebugMark)
1012 mark(debug_scanblock);
1013 t1 = runtime_nanotime();
1014
1015 work.spans = runtime_mheap.allspans;
1016 runtime_unlock(&work.sweepgate); // let the helpers in
1017 sweep();
1018 if(work.nproc > 1)
1019 runtime_notesleep(&work.alldone);
1020 t2 = runtime_nanotime();
1021
1022 stealcache();
1023 cachestats();
1024
1025 mstats.next_gc = mstats.heap_alloc+(mstats.heap_alloc-runtime_stacks_sys)*gcpercent/100;
1026 m->gcing = 0;
1027
1028 m->locks++; // disable gc during the mallocs in newproc
1029 if(finq != nil) {
1030 // kick off or wake up goroutine to run queued finalizers
1031 if(fing == nil)
1032 fing = __go_go(runfinq, nil);
1033 else if(fingwait) {
1034 fingwait = 0;
1035 runtime_ready(fing);
1036 }
1037 }
1038 m->locks--;
1039
1040 cachestats();
1041 heap1 = mstats.heap_alloc;
1042 obj1 = mstats.nmalloc - mstats.nfree;
1043
1044 t3 = runtime_nanotime();
1045 mstats.last_gc = t3;
1046 mstats.pause_ns[mstats.numgc%nelem(mstats.pause_ns)] = t3 - t0;
1047 mstats.pause_total_ns += t3 - t0;
1048 mstats.numgc++;
1049 if(mstats.debuggc)
1050 runtime_printf("pause %D\n", t3-t0);
1051
1052 if(gctrace) {
1053 runtime_printf("gc%d(%d): %D+%D+%D ms, %D -> %D MB %D -> %D (%D-%D) objects\n",
1054 mstats.numgc, work.nproc, (t1-t0)/1000000, (t2-t1)/1000000, (t3-t2)/1000000,
1055 heap0>>20, heap1>>20, obj0, obj1,
1056 mstats.nmalloc, mstats.nfree);
1057 }
1058
1059 runtime_MProf_GC();
1060 runtime_semrelease(&runtime_worldsema);
1061
1062 // If we could have used another helper proc, start one now,
1063 // in the hope that it will be available next time.
1064 // It would have been even better to start it before the collection,
1065 // but doing so requires allocating memory, so it's tricky to
1066 // coordinate. This lazy approach works out in practice:
1067 // we don't mind if the first couple gc rounds don't have quite
1068 // the maximum number of procs.
1069 runtime_starttheworld(extra);
1070
1071 // give the queued finalizers, if any, a chance to run
1072 if(finq != nil)
1073 runtime_gosched();
1074
1075 if(gctrace > 1 && !force)
1076 runtime_gc(1);
1077 }
1078
1079 void runtime_ReadMemStats(MStats *)
1080 __asm__("runtime.ReadMemStats");
1081
1082 void
1083 runtime_ReadMemStats(MStats *stats)
1084 {
1085 M *m;
1086
1087 // Have to acquire worldsema to stop the world,
1088 // because stoptheworld can only be used by
1089 // one goroutine at a time, and there might be
1090 // a pending garbage collection already calling it.
1091 runtime_semacquire(&runtime_worldsema);
1092 m = runtime_m();
1093 m->gcing = 1;
1094 runtime_stoptheworld();
1095 cachestats();
1096 *stats = mstats;
1097 m->gcing = 0;
1098 runtime_semrelease(&runtime_worldsema);
1099 runtime_starttheworld(false);
1100 }
1101
1102 static void
1103 runfinq(void* dummy __attribute__ ((unused)))
1104 {
1105 G* gp;
1106 Finalizer *f;
1107 FinBlock *fb, *next;
1108 uint32 i;
1109
1110 gp = runtime_g();
1111 for(;;) {
1112 // There's no need for a lock in this section
1113 // because it only conflicts with the garbage
1114 // collector, and the garbage collector only
1115 // runs when everyone else is stopped, and
1116 // runfinq only stops at the gosched() or
1117 // during the calls in the for loop.
1118 fb = finq;
1119 finq = nil;
1120 if(fb == nil) {
1121 fingwait = 1;
1122 gp->status = Gwaiting;
1123 gp->waitreason = "finalizer wait";
1124 runtime_gosched();
1125 continue;
1126 }
1127 for(; fb; fb=next) {
1128 next = fb->next;
1129 for(i=0; i<(uint32)fb->cnt; i++) {
1130 void *params[1];
1131
1132 f = &fb->fin[i];
1133 params[0] = &f->arg;
1134 runtime_setblockspecial(f->arg, false);
1135 reflect_call(f->ft, (void*)f->fn, 0, 0, params, nil);
1136 f->fn = nil;
1137 f->arg = nil;
1138 }
1139 fb->cnt = 0;
1140 fb->next = finc;
1141 finc = fb;
1142 }
1143 runtime_gc(1); // trigger another gc to clean up the finalized objects, if possible
1144 }
1145 }
1146
1147 // mark the block at v of size n as allocated.
1148 // If noptr is true, mark it as having no pointers.
1149 void
1150 runtime_markallocated(void *v, uintptr n, bool noptr)
1151 {
1152 uintptr *b, obits, bits, off, shift;
1153
1154 if(0)
1155 runtime_printf("markallocated %p+%p\n", v, n);
1156
1157 if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
1158 runtime_throw("markallocated: bad pointer");
1159
1160 off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start; // word offset
1161 b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
1162 shift = off % wordsPerBitmapWord;
1163
1164 for(;;) {
1165 obits = *b;
1166 bits = (obits & ~(bitMask<<shift)) | (bitAllocated<<shift);
1167 if(noptr)
1168 bits |= bitNoPointers<<shift;
1169 if(runtime_singleproc) {
1170 *b = bits;
1171 break;
1172 } else {
1173 // more than one goroutine is potentially running: use atomic op
1174 if(runtime_casp((void**)b, (void*)obits, (void*)bits))
1175 break;
1176 }
1177 }
1178 }
1179
1180 // mark the block at v of size n as freed.
1181 void
1182 runtime_markfreed(void *v, uintptr n)
1183 {
1184 uintptr *b, obits, bits, off, shift;
1185
1186 if(0)
1187 runtime_printf("markallocated %p+%p\n", v, n);
1188
1189 if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
1190 runtime_throw("markallocated: bad pointer");
1191
1192 off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start; // word offset
1193 b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
1194 shift = off % wordsPerBitmapWord;
1195
1196 for(;;) {
1197 obits = *b;
1198 bits = (obits & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
1199 if(runtime_singleproc) {
1200 *b = bits;
1201 break;
1202 } else {
1203 // more than one goroutine is potentially running: use atomic op
1204 if(runtime_casp((void**)b, (void*)obits, (void*)bits))
1205 break;
1206 }
1207 }
1208 }
1209
1210 // check that the block at v of size n is marked freed.
1211 void
1212 runtime_checkfreed(void *v, uintptr n)
1213 {
1214 uintptr *b, bits, off, shift;
1215
1216 if(!runtime_checking)
1217 return;
1218
1219 if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
1220 return; // not allocated, so okay
1221
1222 off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start; // word offset
1223 b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
1224 shift = off % wordsPerBitmapWord;
1225
1226 bits = *b>>shift;
1227 if((bits & bitAllocated) != 0) {
1228 runtime_printf("checkfreed %p+%p: off=%p have=%p\n",
1229 v, n, off, bits & bitMask);
1230 runtime_throw("checkfreed: not freed");
1231 }
1232 }
1233
1234 // mark the span of memory at v as having n blocks of the given size.
1235 // if leftover is true, there is left over space at the end of the span.
1236 void
1237 runtime_markspan(void *v, uintptr size, uintptr n, bool leftover)
1238 {
1239 uintptr *b, off, shift;
1240 byte *p;
1241
1242 if((byte*)v+size*n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
1243 runtime_throw("markspan: bad pointer");
1244
1245 p = v;
1246 if(leftover) // mark a boundary just past end of last block too
1247 n++;
1248 for(; n-- > 0; p += size) {
1249 // Okay to use non-atomic ops here, because we control
1250 // the entire span, and each bitmap word has bits for only
1251 // one span, so no other goroutines are changing these
1252 // bitmap words.
1253 off = (uintptr*)p - (uintptr*)runtime_mheap.arena_start; // word offset
1254 b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
1255 shift = off % wordsPerBitmapWord;
1256 *b = (*b & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
1257 }
1258 }
1259
1260 // unmark the span of memory at v of length n bytes.
1261 void
1262 runtime_unmarkspan(void *v, uintptr n)
1263 {
1264 uintptr *p, *b, off;
1265
1266 if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
1267 runtime_throw("markspan: bad pointer");
1268
1269 p = v;
1270 off = p - (uintptr*)runtime_mheap.arena_start; // word offset
1271 if(off % wordsPerBitmapWord != 0)
1272 runtime_throw("markspan: unaligned pointer");
1273 b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
1274 n /= PtrSize;
1275 if(n%wordsPerBitmapWord != 0)
1276 runtime_throw("unmarkspan: unaligned length");
1277 // Okay to use non-atomic ops here, because we control
1278 // the entire span, and each bitmap word has bits for only
1279 // one span, so no other goroutines are changing these
1280 // bitmap words.
1281 n /= wordsPerBitmapWord;
1282 while(n-- > 0)
1283 *b-- = 0;
1284 }
1285
1286 bool
1287 runtime_blockspecial(void *v)
1288 {
1289 uintptr *b, off, shift;
1290
1291 if(DebugMark)
1292 return true;
1293
1294 off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start;
1295 b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
1296 shift = off % wordsPerBitmapWord;
1297
1298 return (*b & (bitSpecial<<shift)) != 0;
1299 }
1300
1301 void
1302 runtime_setblockspecial(void *v, bool s)
1303 {
1304 uintptr *b, off, shift, bits, obits;
1305
1306 if(DebugMark)
1307 return;
1308
1309 off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start;
1310 b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
1311 shift = off % wordsPerBitmapWord;
1312
1313 for(;;) {
1314 obits = *b;
1315 if(s)
1316 bits = obits | (bitSpecial<<shift);
1317 else
1318 bits = obits & ~(bitSpecial<<shift);
1319 if(runtime_singleproc) {
1320 *b = bits;
1321 break;
1322 } else {
1323 // more than one goroutine is potentially running: use atomic op
1324 if(runtime_casp((void**)b, (void*)obits, (void*)bits))
1325 break;
1326 }
1327 }
1328 }
1329
1330 void
1331 runtime_MHeap_MapBits(MHeap *h)
1332 {
1333 size_t page_size;
1334
1335 // Caller has added extra mappings to the arena.
1336 // Add extra mappings of bitmap words as needed.
1337 // We allocate extra bitmap pieces in chunks of bitmapChunk.
1338 enum {
1339 bitmapChunk = 8192
1340 };
1341 uintptr n;
1342
1343 n = (h->arena_used - h->arena_start) / wordsPerBitmapWord;
1344 n = (n+bitmapChunk-1) & ~(bitmapChunk-1);
1345 if(h->bitmap_mapped >= n)
1346 return;
1347
1348 page_size = getpagesize();
1349 n = (n+page_size-1) & ~(page_size-1);
1350
1351 runtime_SysMap(h->arena_start - n, n - h->bitmap_mapped);
1352 h->bitmap_mapped = n;
1353 }