1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
13 #ifdef USING_SPLIT_STACK
15 extern void * __splitstack_find (void *, void *, size_t *, void **, void **,
18 extern void * __splitstack_find_context (void *context
[10], size_t *, void **,
25 PtrSize
= sizeof(void*),
26 DebugMark
= 0, // run second pass to check mark
28 // Four bits per word (see #defines below).
29 wordsPerBitmapWord
= sizeof(void*)*8/4,
30 bitShift
= sizeof(void*)*8/4,
33 // Bits in per-word bitmap.
34 // #defines because enum might not be able to hold the values.
36 // Each word in the bitmap describes wordsPerBitmapWord words
37 // of heap memory. There are 4 bitmap bits dedicated to each heap word,
38 // so on a 64-bit system there is one bitmap word per 16 heap words.
39 // The bits in the word are packed together by type first, then by
40 // heap location, so each 64-bit bitmap word consists of, from top to bottom,
41 // the 16 bitSpecial bits for the corresponding heap words, then the 16 bitMarked bits,
42 // then the 16 bitNoPointers/bitBlockBoundary bits, then the 16 bitAllocated bits.
43 // This layout makes it easier to iterate over the bits of a given type.
45 // The bitmap starts at mheap.arena_start and extends *backward* from
46 // there. On a 64-bit system the off'th word in the arena is tracked by
47 // the off/16+1'th word before mheap.arena_start. (On a 32-bit system,
48 // the only difference is that the divisor is 8.)
50 // To pull out the bits corresponding to a given pointer p, we use:
52 // off = p - (uintptr*)mheap.arena_start; // word offset
53 // b = (uintptr*)mheap.arena_start - off/wordsPerBitmapWord - 1;
54 // shift = off % wordsPerBitmapWord
55 // bits = *b >> shift;
56 // /* then test bits & bitAllocated, bits & bitMarked, etc. */
58 #define bitAllocated ((uintptr)1<<(bitShift*0))
59 #define bitNoPointers ((uintptr)1<<(bitShift*1)) /* when bitAllocated is set */
60 #define bitMarked ((uintptr)1<<(bitShift*2)) /* when bitAllocated is set */
61 #define bitSpecial ((uintptr)1<<(bitShift*3)) /* when bitAllocated is set - has finalizer or being profiled */
62 #define bitBlockBoundary ((uintptr)1<<(bitShift*1)) /* when bitAllocated is NOT set */
64 #define bitMask (bitBlockBoundary | bitAllocated | bitMarked | bitSpecial)
66 // Holding worldsema grants an M the right to try to stop the world.
69 // runtime_semacquire(&runtime_worldsema);
71 // runtime_stoptheworld();
76 // runtime_semrelease(&runtime_worldsema);
77 // runtime_starttheworld();
79 uint32 runtime_worldsema
= 1;
81 // TODO: Make these per-M.
82 static uint64 nhandoff
;
86 typedef struct Workbuf Workbuf
;
94 typedef struct Finalizer Finalizer
;
99 const struct __go_func_type
*ft
;
102 typedef struct FinBlock FinBlock
;
113 static FinBlock
*finq
; // list of finalizers that are to be executed
114 static FinBlock
*finc
; // cache of free blocks
115 static FinBlock
*allfin
; // list of all blocks
117 static int32 fingwait
;
119 static void runfinq(void*);
120 static Workbuf
* getempty(Workbuf
*);
121 static Workbuf
* getfull(Workbuf
*);
122 static void putempty(Workbuf
*);
123 static Workbuf
* handoff(Workbuf
*);
131 volatile uint32 nwait
;
132 volatile uint32 ndone
;
143 // scanblock scans a block of n bytes starting at pointer b for references
144 // to other objects, scanning any it finds recursively until there are no
145 // unscanned objects left. Instead of using an explicit recursion, it keeps
146 // a work list in the Workbuf* structures and loops in the main function
147 // body. Keeping an explicit work list is easier on the stack allocator and
150 scanblock(byte
*b
, int64 n
)
152 byte
*obj
, *arena_start
, *arena_used
, *p
;
154 uintptr size
, *bitp
, bits
, shift
, i
, j
, x
, xbits
, off
, nobj
, nproc
;
161 if((int64
)(uintptr
)n
!= n
|| n
< 0) {
162 runtime_printf("scanblock %p %D\n", b
, n
);
163 runtime_throw("scanblock");
166 // Memory arena parameters.
167 arena_start
= runtime_mheap
.arena_start
;
168 arena_used
= runtime_mheap
.arena_used
;
171 wbuf
= nil
; // current work buffer
172 wp
= nil
; // storage for next queued pointer (write pointer)
173 nobj
= 0; // number of queued objects
175 // Scanblock helpers pass b==nil.
176 // The main proc needs to return to make more
177 // calls to scanblock. But if work.nproc==1 then
178 // might as well process blocks as soon as we
180 keepworking
= b
== nil
|| work
.nproc
== 1;
182 // Align b to a word boundary.
183 off
= (uintptr
)b
& (PtrSize
-1);
190 // Each iteration scans the block b of length n, queueing pointers in
193 runtime_printf("scanblock %p %D\n", b
, n
);
196 n
>>= (2+PtrSize
/8); /* n /= PtrSize (4 or 8) */
197 for(i
=0; i
<(uintptr
)n
; i
++) {
200 // Words outside the arena cannot be pointers.
201 if((byte
*)obj
< arena_start
|| (byte
*)obj
>= arena_used
)
204 // obj may be a pointer to a live object.
205 // Try to find the beginning of the object.
207 // Round down to word boundary.
208 obj
= (void*)((uintptr
)obj
& ~((uintptr
)PtrSize
-1));
210 // Find bits for this word.
211 off
= (uintptr
*)obj
- (uintptr
*)arena_start
;
212 bitp
= (uintptr
*)arena_start
- off
/wordsPerBitmapWord
- 1;
213 shift
= off
% wordsPerBitmapWord
;
215 bits
= xbits
>> shift
;
217 // Pointing at the beginning of a block?
218 if((bits
& (bitAllocated
|bitBlockBoundary
)) != 0)
221 // Pointing just past the beginning?
222 // Scan backward a little to find a block boundary.
223 for(j
=shift
; j
-->0; ) {
224 if(((xbits
>>j
) & (bitAllocated
|bitBlockBoundary
)) != 0) {
225 obj
= (byte
*)obj
- (shift
-j
)*PtrSize
;
232 // Otherwise consult span table to find beginning.
233 // (Manually inlined copy of MHeap_LookupMaybe.)
234 k
= (uintptr
)obj
>>PageShift
;
236 if(sizeof(void*) == 8)
237 x
-= (uintptr
)arena_start
>>PageShift
;
238 s
= runtime_mheap
.map
[x
];
239 if(s
== nil
|| k
< s
->start
|| k
- s
->start
>= s
->npages
|| s
->state
!= MSpanInUse
)
241 p
= (byte
*)((uintptr
)s
->start
<<PageShift
);
242 if(s
->sizeclass
== 0) {
245 if((byte
*)obj
>= (byte
*)s
->limit
)
247 size
= runtime_class_to_size
[s
->sizeclass
];
248 int32 i
= ((byte
*)obj
- p
)/size
;
252 // Now that we know the object header, reload bits.
253 off
= (uintptr
*)obj
- (uintptr
*)arena_start
;
254 bitp
= (uintptr
*)arena_start
- off
/wordsPerBitmapWord
- 1;
255 shift
= off
% wordsPerBitmapWord
;
257 bits
= xbits
>> shift
;
260 // Now we have bits, bitp, and shift correct for
261 // obj pointing at the base of the object.
262 // Only care about allocated and not marked.
263 if((bits
& (bitAllocated
|bitMarked
)) != bitAllocated
)
266 *bitp
|= bitMarked
<<shift
;
270 if(x
& (bitMarked
<<shift
))
272 if(runtime_casp((void**)bitp
, (void*)x
, (void*)(x
|(bitMarked
<<shift
))))
277 // If object has no pointers, don't need to scan further.
278 if((bits
& bitNoPointers
) != 0)
281 // If another proc wants a pointer, give it some.
282 if(nobj
> 4 && work
.nwait
> 0 && work
.full
== nil
) {
284 wbuf
= handoff(wbuf
);
286 wp
= (void**)(wbuf
->obj
+ nobj
);
289 // If buffer is full, get a new one.
290 if(wbuf
== nil
|| nobj
>= nelem(wbuf
->obj
)) {
293 wbuf
= getempty(wbuf
);
294 wp
= (void**)(wbuf
->obj
);
302 // Done scanning [b, b+n). Prepare for the next iteration of
303 // the loop by setting b and n to the parameters for the next block.
305 // Fetch b from the work buffer.
311 // Emptied our buffer: refill.
312 wbuf
= getfull(wbuf
);
316 wp
= (void**)(wbuf
->obj
+ wbuf
->nobj
);
321 // Ask span about size class.
322 // (Manually inlined copy of MHeap_Lookup.)
323 x
= (uintptr
)b
>>PageShift
;
324 if(sizeof(void*) == 8)
325 x
-= (uintptr
)arena_start
>>PageShift
;
326 s
= runtime_mheap
.map
[x
];
327 if(s
->sizeclass
== 0)
328 n
= s
->npages
<<PageShift
;
330 n
= runtime_class_to_size
[s
->sizeclass
];
334 // debug_scanblock is the debug copy of scanblock.
335 // it is simpler, slower, single-threaded, recursive,
336 // and uses bitSpecial as the mark bit.
338 debug_scanblock(byte
*b
, int64 n
)
342 uintptr size
, *bitp
, bits
, shift
, i
, xbits
, off
;
346 runtime_throw("debug_scanblock without DebugMark");
348 if((int64
)(uintptr
)n
!= n
|| n
< 0) {
349 runtime_printf("debug_scanblock %p %D\n", b
, n
);
350 runtime_throw("debug_scanblock");
353 // Align b to a word boundary.
354 off
= (uintptr
)b
& (PtrSize
-1);
362 for(i
=0; i
<(uintptr
)n
; i
++) {
365 // Words outside the arena cannot be pointers.
366 if((byte
*)obj
< runtime_mheap
.arena_start
|| (byte
*)obj
>= runtime_mheap
.arena_used
)
369 // Round down to word boundary.
370 obj
= (void*)((uintptr
)obj
& ~((uintptr
)PtrSize
-1));
372 // Consult span table to find beginning.
373 s
= runtime_MHeap_LookupMaybe(&runtime_mheap
, obj
);
378 p
= (byte
*)((uintptr
)s
->start
<<PageShift
);
379 if(s
->sizeclass
== 0) {
381 size
= (uintptr
)s
->npages
<<PageShift
;
383 if((byte
*)obj
>= (byte
*)s
->limit
)
385 size
= runtime_class_to_size
[s
->sizeclass
];
386 int32 i
= ((byte
*)obj
- p
)/size
;
390 // Now that we know the object header, reload bits.
391 off
= (uintptr
*)obj
- (uintptr
*)runtime_mheap
.arena_start
;
392 bitp
= (uintptr
*)runtime_mheap
.arena_start
- off
/wordsPerBitmapWord
- 1;
393 shift
= off
% wordsPerBitmapWord
;
395 bits
= xbits
>> shift
;
397 // Now we have bits, bitp, and shift correct for
398 // obj pointing at the base of the object.
399 // If not allocated or already marked, done.
400 if((bits
& bitAllocated
) == 0 || (bits
& bitSpecial
) != 0) // NOTE: bitSpecial not bitMarked
402 *bitp
|= bitSpecial
<<shift
;
403 if(!(bits
& bitMarked
))
404 runtime_printf("found unmarked block %p in %p\n", obj
, vp
+i
);
406 // If object has no pointers, don't need to scan further.
407 if((bits
& bitNoPointers
) != 0)
410 debug_scanblock(obj
, size
);
414 // Get an empty work buffer off the work.empty list,
415 // allocating new buffers as needed.
419 if(work
.nproc
== 1) {
420 // Put b on full list.
425 // Grab from empty list if possible.
428 work
.empty
= b
->next
;
432 // Put b on full list.
434 runtime_lock(&work
.fmu
);
437 runtime_unlock(&work
.fmu
);
439 // Grab from empty list if possible.
440 runtime_lock(&work
.emu
);
443 work
.empty
= b
->next
;
444 runtime_unlock(&work
.emu
);
451 if(work
.nchunk
< sizeof *b
) {
453 work
.chunk
= runtime_SysAlloc(work
.nchunk
);
455 b
= (Workbuf
*)work
.chunk
;
456 work
.chunk
+= sizeof *b
;
457 work
.nchunk
-= sizeof *b
;
458 runtime_unlock(&work
);
471 if(work
.nproc
== 1) {
472 b
->next
= work
.empty
;
477 runtime_lock(&work
.emu
);
478 b
->next
= work
.empty
;
480 runtime_unlock(&work
.emu
);
483 // Get a full work buffer off the work.full list, or return nil.
490 if(work
.nproc
== 1) {
491 // Put b on empty list.
493 b
->next
= work
.empty
;
496 // Grab from full list if possible.
497 // Since work.nproc==1, no one else is
498 // going to give us work.
507 // Grab buffer from full list if possible.
512 runtime_lock(&work
.fmu
);
513 if(work
.full
!= nil
) {
515 work
.full
= b1
->next
;
516 runtime_unlock(&work
.fmu
);
519 runtime_unlock(&work
.fmu
);
522 runtime_xadd(&work
.nwait
, +1);
526 runtime_lock(&work
.fmu
);
527 if(work
.full
!= nil
) {
528 runtime_xadd(&work
.nwait
, -1);
530 work
.full
= b1
->next
;
531 runtime_unlock(&work
.fmu
);
534 runtime_unlock(&work
.fmu
);
537 if(work
.nwait
== work
.nproc
)
540 runtime_procyield(20);
554 // Make new buffer with half of b's pointers.
559 runtime_memmove(b1
->obj
, b
->obj
+b
->nobj
, n
*sizeof b1
->obj
[0]);
562 // Put b on full list - let first half of b get stolen.
563 runtime_lock(&work
.fmu
);
566 runtime_unlock(&work
.fmu
);
571 // Scanstack calls scanblock on each of gp's stack segments.
573 scanstack(void (*scanblock
)(byte
*, int64
), G
*gp
)
575 #ifdef USING_SPLIT_STACK
583 if(gp
== runtime_g()) {
584 // Scanning our own stack.
585 sp
= __splitstack_find(nil
, nil
, &spsize
, &next_segment
,
586 &next_sp
, &initial_sp
);
587 } else if((mp
= gp
->m
) != nil
&& mp
->helpgc
) {
588 // gchelper's stack is in active use and has no interesting pointers.
591 // Scanning another goroutine's stack.
592 // The goroutine is usually asleep (the world is stopped).
594 // The exception is that if the goroutine is about to enter or might
595 // have just exited a system call, it may be executing code such
596 // as schedlock and may have needed to start a new stack segment.
597 // Use the stack segment and stack pointer at the time of
598 // the system call instead, since that won't change underfoot.
599 if(gp
->gcstack
!= nil
) {
601 spsize
= gp
->gcstack_size
;
602 next_segment
= gp
->gcnext_segment
;
603 next_sp
= gp
->gcnext_sp
;
604 initial_sp
= gp
->gcinitial_sp
;
606 sp
= __splitstack_find_context(&gp
->stack_context
[0],
607 &spsize
, &next_segment
,
608 &next_sp
, &initial_sp
);
612 scanblock(sp
, spsize
);
613 while((sp
= __splitstack_find(next_segment
, next_sp
,
614 &spsize
, &next_segment
,
615 &next_sp
, &initial_sp
)) != nil
)
616 scanblock(sp
, spsize
);
623 if(gp
== runtime_g()) {
624 // Scanning our own stack.
626 } else if((mp
= gp
->m
) != nil
&& mp
->helpgc
) {
627 // gchelper's stack is in active use and has no interesting pointers.
630 // Scanning another goroutine's stack.
631 // The goroutine is usually asleep (the world is stopped).
632 bottom
= (byte
*)gp
->gcnext_sp
;
636 top
= (byte
*)gp
->gcinitial_sp
+ gp
->gcstack_size
;
638 scanblock(bottom
, top
- bottom
);
640 scanblock(top
, bottom
- top
);
644 // Markfin calls scanblock on the blocks that have finalizers:
645 // the things pointed at cannot be freed until the finalizers have run.
652 if(!runtime_mlookup(v
, (byte
**)&v
, &size
, nil
) || !runtime_blockspecial(v
))
653 runtime_throw("mark - finalizer inconsistency");
655 // do not mark the finalizer block itself. just mark the things it points at.
659 static struct root_list
* roots
;
662 __go_register_gc_roots (struct root_list
* r
)
664 // FIXME: This needs locking if multiple goroutines can call
665 // dlopen simultaneously.
671 debug_markfin(void *v
)
675 if(!runtime_mlookup(v
, (byte
**)&v
, &size
, nil
))
676 runtime_throw("debug_mark - finalizer inconsistency");
677 debug_scanblock(v
, size
);
682 mark(void (*scan
)(byte
*, int64
))
684 struct root_list
*pl
;
689 for(pl
= roots
; pl
!= nil
; pl
= pl
->next
) {
690 struct root
* pr
= &pl
->roots
[0];
692 void *decl
= pr
->decl
;
695 scanblock(decl
, pr
->size
);
700 scan((byte
*)&runtime_m0
, sizeof runtime_m0
);
701 scan((byte
*)&runtime_g0
, sizeof runtime_g0
);
702 scan((byte
*)&runtime_allg
, sizeof runtime_allg
);
703 scan((byte
*)&runtime_allm
, sizeof runtime_allm
);
704 runtime_MProf_Mark(scan
);
705 runtime_time_scan(scan
);
706 runtime_trampoline_scan(scan
);
709 for(gp
=runtime_allg
; gp
!=nil
; gp
=gp
->alllink
) {
712 runtime_printf("unexpected G.status %d\n", gp
->status
);
713 runtime_throw("mark - bad status");
717 if(gp
!= runtime_g())
718 runtime_throw("mark - world not stopped");
729 // mark things pointed at by objects with finalizers
730 if(scan
== debug_scanblock
)
731 runtime_walkfintab(debug_markfin
, scan
);
733 runtime_walkfintab(markfin
, scan
);
735 for(fb
=allfin
; fb
; fb
=fb
->alllink
)
736 scanblock((byte
*)fb
->fin
, fb
->cnt
*sizeof(fb
->fin
[0]));
738 // in multiproc mode, join in the queued work.
743 handlespecial(byte
*p
, uintptr size
)
746 const struct __go_func_type
*ft
;
750 if(!runtime_getfinalizer(p
, true, &fn
, &ft
)) {
751 runtime_setblockspecial(p
, false);
752 runtime_MProf_Free(p
, size
);
756 runtime_lock(&finlock
);
757 if(finq
== nil
|| finq
->cnt
== finq
->cap
) {
759 finc
= runtime_SysAlloc(PageSize
);
760 finc
->cap
= (PageSize
- sizeof(FinBlock
)) / sizeof(Finalizer
) + 1;
761 finc
->alllink
= allfin
;
769 f
= &finq
->fin
[finq
->cnt
];
774 runtime_unlock(&finlock
);
778 // Sweep frees or collects finalizers for blocks not marked in the mark phase.
779 // It clears the mark bits in preparation for the next GC round.
793 arena_start
= runtime_mheap
.arena_start
;
794 now
= runtime_nanotime();
800 if(!runtime_casp(&work
.spans
, s
, s
->allnext
))
803 // Stamp newly unused spans. The scavenger will use that
804 // info to potentially give back some pages to the OS.
805 if(s
->state
== MSpanFree
&& s
->unusedsince
== 0)
806 s
->unusedsince
= now
;
808 if(s
->state
!= MSpanInUse
)
811 p
= (byte
*)(s
->start
<< PageShift
);
814 size
= s
->npages
<<PageShift
;
817 // Chunk full of small blocks.
818 size
= runtime_class_to_size
[cl
];
819 npages
= runtime_class_to_allocnpages
[cl
];
820 n
= (npages
<< PageShift
) / size
;
823 // Sweep through n objects of given size starting at p.
824 // This thread owns the span now, so it can manipulate
825 // the block bitmap without atomic operations.
826 for(; n
> 0; n
--, p
+= size
) {
827 uintptr off
, *bitp
, shift
, bits
;
829 off
= (uintptr
*)p
- (uintptr
*)arena_start
;
830 bitp
= (uintptr
*)arena_start
- off
/wordsPerBitmapWord
- 1;
831 shift
= off
% wordsPerBitmapWord
;
834 if((bits
& bitAllocated
) == 0)
837 if((bits
& bitMarked
) != 0) {
839 if(!(bits
& bitSpecial
))
840 runtime_printf("found spurious mark on %p\n", p
);
841 *bitp
&= ~(bitSpecial
<<shift
);
843 *bitp
&= ~(bitMarked
<<shift
);
847 // Special means it has a finalizer or is being profiled.
848 // In DebugMark mode, the bit has been coopted so
849 // we have to assume all blocks are special.
850 if(DebugMark
|| (bits
& bitSpecial
) != 0) {
851 if(handlespecial(p
, size
))
855 // Mark freed; restore block boundary bit.
856 *bitp
= (*bitp
& ~(bitMask
<<shift
)) | (bitBlockBoundary
<<shift
);
859 if(s
->sizeclass
== 0) {
861 runtime_unmarkspan(p
, 1<<PageShift
);
862 *(uintptr
*)p
= 1; // needs zeroing
863 runtime_MHeap_Free(&runtime_mheap
, s
, 1);
865 // Free small object.
866 if(size
> sizeof(uintptr
))
867 ((uintptr
*)p
)[1] = 1; // mark as "needs to be zeroed"
868 c
->local_by_size
[s
->sizeclass
].nfree
++;
869 runtime_MCache_Free(c
, p
, s
->sizeclass
, size
);
871 c
->local_alloc
-= size
;
878 runtime_gchelper(void)
880 // Wait until main proc is ready for mark help.
881 runtime_lock(&work
.markgate
);
882 runtime_unlock(&work
.markgate
);
885 // Wait until main proc is ready for sweep help.
886 runtime_lock(&work
.sweepgate
);
887 runtime_unlock(&work
.sweepgate
);
890 if(runtime_xadd(&work
.ndone
, +1) == work
.nproc
-1)
891 runtime_notewakeup(&work
.alldone
);
894 // Initialized from $GOGC. GOGC=off means no gc.
896 // Next gc is after we've allocated an extra amount of
897 // memory proportional to the amount already in use.
898 // If gcpercent=100 and we're using 4M, we'll gc again
899 // when we get to 8M. This keeps the gc cost in linear
900 // proportion to the allocation cost. Adjusting gcpercent
901 // just changes the linear constant (and also the amount of
902 // extra memory used).
903 static int32 gcpercent
= -2;
910 for(m
=runtime_allm
; m
; m
=m
->alllink
)
911 runtime_MCache_ReleaseAll(m
->mcache
);
924 stacks_sys
= runtime_stacks_sys
;
925 for(m
=runtime_allm
; m
; m
=m
->alllink
) {
926 runtime_purgecachedstats(m
);
927 // stacks_inuse += m->stackalloc->inuse;
928 // stacks_sys += m->stackalloc->sys;
930 for(i
=0; i
<nelem(c
->local_by_size
); i
++) {
931 mstats
.by_size
[i
].nmalloc
+= c
->local_by_size
[i
].nmalloc
;
932 c
->local_by_size
[i
].nmalloc
= 0;
933 mstats
.by_size
[i
].nfree
+= c
->local_by_size
[i
].nfree
;
934 c
->local_by_size
[i
].nfree
= 0;
937 mstats
.stacks_inuse
= stacks_inuse
;
938 mstats
.stacks_sys
= stacks_sys
;
942 runtime_gc(int32 force
)
945 int64 t0
, t1
, t2
, t3
;
946 uint64 heap0
, heap1
, obj0
, obj1
;
950 // Make sure all registers are saved on stack so that
951 // scanstack sees them.
952 __builtin_unwind_init();
954 // The gc is turned off (via enablegc) until
955 // the bootstrap has completed.
956 // Also, malloc gets called in the guts
957 // of a number of libraries that might be
958 // holding locks. To avoid priority inversion
959 // problems, don't bother trying to run gc
960 // while holding a lock. The next mallocgc
961 // without a lock will do the gc instead.
963 if(!mstats
.enablegc
|| m
->locks
> 0 || runtime_panicking
)
966 if(gcpercent
== -2) { // first time through
967 p
= runtime_getenv("GOGC");
968 if(p
== nil
|| p
[0] == '\0')
970 else if(runtime_strcmp((const char*)p
, "off") == 0)
973 gcpercent
= runtime_atoi(p
);
975 p
= runtime_getenv("GOGCTRACE");
977 gctrace
= runtime_atoi(p
);
982 runtime_semacquire(&runtime_worldsema
);
983 if(!force
&& mstats
.heap_alloc
< mstats
.next_gc
) {
984 runtime_semrelease(&runtime_worldsema
);
988 t0
= runtime_nanotime();
992 runtime_stoptheworld();
995 heap0
= mstats
.heap_alloc
;
996 obj0
= mstats
.nmalloc
- mstats
.nfree
;
998 runtime_lock(&work
.markgate
);
999 runtime_lock(&work
.sweepgate
);
1003 if(runtime_gomaxprocs
> 1 && runtime_ncpu
> 1) {
1004 runtime_noteclear(&work
.alldone
);
1005 work
.nproc
+= runtime_helpgc(&extra
);
1010 runtime_unlock(&work
.markgate
); // let the helpers in
1013 mark(debug_scanblock
);
1014 t1
= runtime_nanotime();
1016 work
.spans
= runtime_mheap
.allspans
;
1017 runtime_unlock(&work
.sweepgate
); // let the helpers in
1020 runtime_notesleep(&work
.alldone
);
1021 t2
= runtime_nanotime();
1026 mstats
.next_gc
= mstats
.heap_alloc
+(mstats
.heap_alloc
-runtime_stacks_sys
)*gcpercent
/100;
1029 m
->locks
++; // disable gc during the mallocs in newproc
1031 // kick off or wake up goroutine to run queued finalizers
1033 fing
= __go_go(runfinq
, nil
);
1036 runtime_ready(fing
);
1042 heap1
= mstats
.heap_alloc
;
1043 obj1
= mstats
.nmalloc
- mstats
.nfree
;
1045 t3
= runtime_nanotime();
1046 mstats
.last_gc
= t3
;
1047 mstats
.pause_ns
[mstats
.numgc
%nelem(mstats
.pause_ns
)] = t3
- t0
;
1048 mstats
.pause_total_ns
+= t3
- t0
;
1051 runtime_printf("pause %D\n", t3
-t0
);
1054 runtime_printf("gc%d(%d): %D+%D+%D ms, %D -> %D MB %D -> %D (%D-%D) objects\n",
1055 mstats
.numgc
, work
.nproc
, (t1
-t0
)/1000000, (t2
-t1
)/1000000, (t3
-t2
)/1000000,
1056 heap0
>>20, heap1
>>20, obj0
, obj1
,
1057 mstats
.nmalloc
, mstats
.nfree
);
1061 runtime_semrelease(&runtime_worldsema
);
1063 // If we could have used another helper proc, start one now,
1064 // in the hope that it will be available next time.
1065 // It would have been even better to start it before the collection,
1066 // but doing so requires allocating memory, so it's tricky to
1067 // coordinate. This lazy approach works out in practice:
1068 // we don't mind if the first couple gc rounds don't have quite
1069 // the maximum number of procs.
1070 runtime_starttheworld(extra
);
1072 // give the queued finalizers, if any, a chance to run
1076 if(gctrace
> 1 && !force
)
1080 void runtime_ReadMemStats(MStats
*)
1081 __asm__("runtime.ReadMemStats");
1084 runtime_ReadMemStats(MStats
*stats
)
1088 // Have to acquire worldsema to stop the world,
1089 // because stoptheworld can only be used by
1090 // one goroutine at a time, and there might be
1091 // a pending garbage collection already calling it.
1092 runtime_semacquire(&runtime_worldsema
);
1095 runtime_stoptheworld();
1099 runtime_semrelease(&runtime_worldsema
);
1100 runtime_starttheworld(false);
1104 runfinq(void* dummy
__attribute__ ((unused
)))
1108 FinBlock
*fb
, *next
;
1113 // There's no need for a lock in this section
1114 // because it only conflicts with the garbage
1115 // collector, and the garbage collector only
1116 // runs when everyone else is stopped, and
1117 // runfinq only stops at the gosched() or
1118 // during the calls in the for loop.
1123 gp
->status
= Gwaiting
;
1124 gp
->waitreason
= "finalizer wait";
1128 for(; fb
; fb
=next
) {
1130 for(i
=0; i
<(uint32
)fb
->cnt
; i
++) {
1134 params
[0] = &f
->arg
;
1135 reflect_call(f
->ft
, (void*)f
->fn
, 0, 0, params
, nil
);
1143 runtime_gc(1); // trigger another gc to clean up the finalized objects, if possible
1147 // mark the block at v of size n as allocated.
1148 // If noptr is true, mark it as having no pointers.
1150 runtime_markallocated(void *v
, uintptr n
, bool noptr
)
1152 uintptr
*b
, obits
, bits
, off
, shift
;
1155 runtime_printf("markallocated %p+%p\n", v
, n
);
1157 if((byte
*)v
+n
> (byte
*)runtime_mheap
.arena_used
|| (byte
*)v
< runtime_mheap
.arena_start
)
1158 runtime_throw("markallocated: bad pointer");
1160 off
= (uintptr
*)v
- (uintptr
*)runtime_mheap
.arena_start
; // word offset
1161 b
= (uintptr
*)runtime_mheap
.arena_start
- off
/wordsPerBitmapWord
- 1;
1162 shift
= off
% wordsPerBitmapWord
;
1166 bits
= (obits
& ~(bitMask
<<shift
)) | (bitAllocated
<<shift
);
1168 bits
|= bitNoPointers
<<shift
;
1169 if(runtime_singleproc
) {
1173 // more than one goroutine is potentially running: use atomic op
1174 if(runtime_casp((void**)b
, (void*)obits
, (void*)bits
))
1180 // mark the block at v of size n as freed.
1182 runtime_markfreed(void *v
, uintptr n
)
1184 uintptr
*b
, obits
, bits
, off
, shift
;
1187 runtime_printf("markallocated %p+%p\n", v
, n
);
1189 if((byte
*)v
+n
> (byte
*)runtime_mheap
.arena_used
|| (byte
*)v
< runtime_mheap
.arena_start
)
1190 runtime_throw("markallocated: bad pointer");
1192 off
= (uintptr
*)v
- (uintptr
*)runtime_mheap
.arena_start
; // word offset
1193 b
= (uintptr
*)runtime_mheap
.arena_start
- off
/wordsPerBitmapWord
- 1;
1194 shift
= off
% wordsPerBitmapWord
;
1198 bits
= (obits
& ~(bitMask
<<shift
)) | (bitBlockBoundary
<<shift
);
1199 if(runtime_singleproc
) {
1203 // more than one goroutine is potentially running: use atomic op
1204 if(runtime_casp((void**)b
, (void*)obits
, (void*)bits
))
1210 // check that the block at v of size n is marked freed.
1212 runtime_checkfreed(void *v
, uintptr n
)
1214 uintptr
*b
, bits
, off
, shift
;
1216 if(!runtime_checking
)
1219 if((byte
*)v
+n
> (byte
*)runtime_mheap
.arena_used
|| (byte
*)v
< runtime_mheap
.arena_start
)
1220 return; // not allocated, so okay
1222 off
= (uintptr
*)v
- (uintptr
*)runtime_mheap
.arena_start
; // word offset
1223 b
= (uintptr
*)runtime_mheap
.arena_start
- off
/wordsPerBitmapWord
- 1;
1224 shift
= off
% wordsPerBitmapWord
;
1227 if((bits
& bitAllocated
) != 0) {
1228 runtime_printf("checkfreed %p+%p: off=%p have=%p\n",
1229 v
, n
, off
, bits
& bitMask
);
1230 runtime_throw("checkfreed: not freed");
1234 // mark the span of memory at v as having n blocks of the given size.
1235 // if leftover is true, there is left over space at the end of the span.
1237 runtime_markspan(void *v
, uintptr size
, uintptr n
, bool leftover
)
1239 uintptr
*b
, off
, shift
;
1242 if((byte
*)v
+size
*n
> (byte
*)runtime_mheap
.arena_used
|| (byte
*)v
< runtime_mheap
.arena_start
)
1243 runtime_throw("markspan: bad pointer");
1246 if(leftover
) // mark a boundary just past end of last block too
1248 for(; n
-- > 0; p
+= size
) {
1249 // Okay to use non-atomic ops here, because we control
1250 // the entire span, and each bitmap word has bits for only
1251 // one span, so no other goroutines are changing these
1253 off
= (uintptr
*)p
- (uintptr
*)runtime_mheap
.arena_start
; // word offset
1254 b
= (uintptr
*)runtime_mheap
.arena_start
- off
/wordsPerBitmapWord
- 1;
1255 shift
= off
% wordsPerBitmapWord
;
1256 *b
= (*b
& ~(bitMask
<<shift
)) | (bitBlockBoundary
<<shift
);
1260 // unmark the span of memory at v of length n bytes.
1262 runtime_unmarkspan(void *v
, uintptr n
)
1264 uintptr
*p
, *b
, off
;
1266 if((byte
*)v
+n
> (byte
*)runtime_mheap
.arena_used
|| (byte
*)v
< runtime_mheap
.arena_start
)
1267 runtime_throw("markspan: bad pointer");
1270 off
= p
- (uintptr
*)runtime_mheap
.arena_start
; // word offset
1271 if(off
% wordsPerBitmapWord
!= 0)
1272 runtime_throw("markspan: unaligned pointer");
1273 b
= (uintptr
*)runtime_mheap
.arena_start
- off
/wordsPerBitmapWord
- 1;
1275 if(n
%wordsPerBitmapWord
!= 0)
1276 runtime_throw("unmarkspan: unaligned length");
1277 // Okay to use non-atomic ops here, because we control
1278 // the entire span, and each bitmap word has bits for only
1279 // one span, so no other goroutines are changing these
1281 n
/= wordsPerBitmapWord
;
1287 runtime_blockspecial(void *v
)
1289 uintptr
*b
, off
, shift
;
1294 off
= (uintptr
*)v
- (uintptr
*)runtime_mheap
.arena_start
;
1295 b
= (uintptr
*)runtime_mheap
.arena_start
- off
/wordsPerBitmapWord
- 1;
1296 shift
= off
% wordsPerBitmapWord
;
1298 return (*b
& (bitSpecial
<<shift
)) != 0;
1302 runtime_setblockspecial(void *v
, bool s
)
1304 uintptr
*b
, off
, shift
, bits
, obits
;
1309 off
= (uintptr
*)v
- (uintptr
*)runtime_mheap
.arena_start
;
1310 b
= (uintptr
*)runtime_mheap
.arena_start
- off
/wordsPerBitmapWord
- 1;
1311 shift
= off
% wordsPerBitmapWord
;
1316 bits
= obits
| (bitSpecial
<<shift
);
1318 bits
= obits
& ~(bitSpecial
<<shift
);
1319 if(runtime_singleproc
) {
1323 // more than one goroutine is potentially running: use atomic op
1324 if(runtime_casp((void**)b
, (void*)obits
, (void*)bits
))
1331 runtime_MHeap_MapBits(MHeap
*h
)
1335 // Caller has added extra mappings to the arena.
1336 // Add extra mappings of bitmap words as needed.
1337 // We allocate extra bitmap pieces in chunks of bitmapChunk.
1343 n
= (h
->arena_used
- h
->arena_start
) / wordsPerBitmapWord
;
1344 n
= (n
+bitmapChunk
-1) & ~(bitmapChunk
-1);
1345 if(h
->bitmap_mapped
>= n
)
1348 page_size
= getpagesize();
1349 n
= (n
+page_size
-1) & ~(page_size
-1);
1351 runtime_SysMap(h
->arena_start
- n
, n
- h
->bitmap_mapped
);
1352 h
->bitmap_mapped
= n
;