1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
6 // Patterned after tcmalloc's algorithms; shorter code.
15 // NOTE(rsc): Everything here could use cas if contention became an issue.
18 // Per-call-stack allocation information.
19 // Lookup by hashing call stack into a linked-list hash table.
20 typedef struct Bucket Bucket;
23 Bucket *next; // next in hash list
24 Bucket *allnext; // next in list of all buckets
29 uintptr recent_allocs; // since last gc
31 uintptr recent_alloc_bytes;
32 uintptr recent_free_bytes;
38 BuckHashSize = 179999,
40 static Bucket **buckhash;
41 static Bucket *buckets;
42 static uintptr bucketmem;
44 // Return the bucket for stk[0:nstk], allocating new bucket if needed.
46 stkbucket(uintptr *stk, int32 nstk, bool alloc)
53 buckhash = runtime_SysAlloc(BuckHashSize*sizeof buckhash[0]);
54 mstats.buckhash_sys += BuckHashSize*sizeof buckhash[0];
59 for(i=0; i<nstk; i++) {
68 for(b = buckhash[i]; b; b=b->next)
69 if(b->hash == h && b->nstk == (uintptr)nstk &&
70 runtime_mcmp((byte*)b->stk, (byte*)stk, nstk*sizeof stk[0]) == 0)
76 b = runtime_mallocgc(sizeof *b + nstk*sizeof stk[0], FlagNoProfiling, 0, 1);
77 bucketmem += sizeof *b + nstk*sizeof stk[0];
78 runtime_memmove(b->stk, stk, nstk*sizeof stk[0]);
81 b->next = buckhash[i];
88 // Record that a gc just happened: all the 'recent' statistics are now real.
90 runtime_MProf_GC(void)
94 runtime_lock(&proflock);
95 for(b=buckets; b; b=b->allnext) {
96 b->allocs += b->recent_allocs;
97 b->frees += b->recent_frees;
98 b->alloc_bytes += b->recent_alloc_bytes;
99 b->free_bytes += b->recent_free_bytes;
100 b->recent_allocs = 0;
102 b->recent_alloc_bytes = 0;
103 b->recent_free_bytes = 0;
105 runtime_unlock(&proflock);
108 // Map from pointer to Bucket* that allocated it.
110 // Linked-list hash table for top N-20 bits.
111 // Array index for next 13 bits.
112 // Linked list for next 7 bits.
113 // This is more efficient than using a general map,
114 // because of the typical clustering of the pointer keys.
116 typedef struct AddrHash AddrHash;
117 typedef struct AddrEntry AddrEntry;
121 AddrHash *next; // next in top-level hash table linked list
122 uintptr addr; // addr>>20
123 AddrEntry *dense[1<<13];
128 AddrEntry *next; // next in bottom-level linked list
134 AddrHashBits = 12 // 1MB per entry, so good for 4GB of used address space
136 static AddrHash *addrhash[1<<AddrHashBits];
137 static AddrEntry *addrfree;
138 static uintptr addrmem;
140 // Multiplicative hash function:
141 // hashMultiplier is the bottom 32 bits of int((sqrt(5)-1)/2 * (1<<32)).
142 // This is a good multiplier as suggested in CLR, Knuth. The hash
143 // value is taken to be the top AddrHashBits bits of the bottom 32 bits
144 // of the multiplied value.
146 HashMultiplier = 2654435769U
149 // Set the bucket associated with addr to b.
151 setaddrbucket(uintptr addr, Bucket *b)
158 h = (uint32)((addr>>20)*HashMultiplier) >> (32-AddrHashBits);
159 for(ah=addrhash[h]; ah; ah=ah->next)
160 if(ah->addr == (addr>>20))
163 ah = runtime_mallocgc(sizeof *ah, FlagNoProfiling, 0, 1);
164 addrmem += sizeof *ah;
165 ah->next = addrhash[h];
170 if((e = addrfree) == nil) {
171 e = runtime_mallocgc(64*sizeof *e, FlagNoProfiling, 0, 0);
172 addrmem += 64*sizeof *e;
173 for(i=0; i+1<64; i++)
178 e->addr = (uint32)~(addr & ((1<<20)-1));
180 h = (addr>>7)&(nelem(ah->dense)-1); // entry in dense is top 13 bits of low 20.
181 e->next = ah->dense[h];
185 // Get the bucket associated with addr and clear the association.
187 getaddrbucket(uintptr addr)
194 h = (uint32)((addr>>20)*HashMultiplier) >> (32-AddrHashBits);
195 for(ah=addrhash[h]; ah; ah=ah->next)
196 if(ah->addr == (addr>>20))
201 h = (addr>>7)&(nelem(ah->dense)-1); // entry in dense is top 13 bits of low 20.
202 for(l=&ah->dense[h]; (e=*l) != nil; l=&e->next) {
203 if(e->addr == (uint32)~(addr & ((1<<20)-1))) {
214 // Called by malloc to record a profiled block.
216 runtime_MProf_Malloc(void *p, uintptr size)
228 nstk = runtime_callers(1, stk, 32);
229 runtime_lock(&proflock);
230 b = stkbucket(stk, nstk, true);
232 b->recent_alloc_bytes += size;
233 setaddrbucket((uintptr)p, b);
234 runtime_unlock(&proflock);
239 // Called when freeing a profiled block.
241 runtime_MProf_Free(void *p, uintptr size)
251 runtime_lock(&proflock);
252 b = getaddrbucket((uintptr)p);
255 b->recent_free_bytes += size;
257 runtime_unlock(&proflock);
263 // Go interface to profile data. (Declared in extern.go)
264 // Assumes Go sizeof(int) == sizeof(int32)
266 // Must match MemProfileRecord in debug.go.
267 typedef struct Record Record;
269 int64 alloc_bytes, free_bytes;
270 int64 alloc_objects, free_objects;
274 // Write b's data to r.
276 record(Record *r, Bucket *b)
280 r->alloc_bytes = b->alloc_bytes;
281 r->free_bytes = b->free_bytes;
282 r->alloc_objects = b->allocs;
283 r->free_objects = b->frees;
284 for(i=0; i<b->nstk && i<nelem(r->stk); i++)
285 r->stk[i] = b->stk[i];
286 for(; i<nelem(r->stk); i++)
290 func MemProfile(p Slice, include_inuse_zero bool) (n int32, ok bool) {
294 runtime_lock(&proflock);
296 for(b=buckets; b; b=b->allnext)
297 if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
302 r = (Record*)p.__values;
303 for(b=buckets; b; b=b->allnext)
304 if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
307 runtime_unlock(&proflock);
311 runtime_MProf_Mark(void (*scan)(byte *, int64))
313 // buckhash is not allocated via mallocgc.
314 scan((byte*)&buckets, sizeof buckets);
315 scan((byte*)&addrhash, sizeof addrhash);
316 scan((byte*)&addrfree, sizeof addrfree);
319 // Must match StackRecord in debug.go.
320 typedef struct TRecord TRecord;
325 func ThreadCreateProfile(p Slice) (n int32, ok bool) {
329 first = runtime_atomicloadp(&runtime_allm);
331 for(m=first; m; m=m->alllink)
336 r = (TRecord*)p.__values;
337 for(m=first; m; m=m->alllink) {
338 runtime_memmove(r->stk, m->createstack, sizeof r->stk);
344 func Stack(b Slice, all bool) (n int32) {
347 sp = runtime_getcallersp(&b);
348 pc = runtime_getcallerpc(&b);
351 runtime_semacquire(&runtime_worldsema);
352 runtime_m()->gcing = 1;
353 runtime_stoptheworld();
360 g->writebuf = (byte*)b.__values;
361 g->writenbuf = b.__count;
364 // runtime_goroutineheader(g);
365 // runtime_traceback(pc, sp, 0, g);
367 // runtime_tracebackothers(g);
368 n = b.__count - g->writenbuf;
374 runtime_m()->gcing = 0;
375 runtime_semrelease(&runtime_worldsema);
376 runtime_starttheworld(false);
381 saveg(byte *pc, byte *sp, G *g, TRecord *r)
388 // n = runtime_gentraceback(pc, sp, 0, g, 0, r->stk, nelem(r->stk));
390 if((size_t)n < nelem(r->stk))
394 func GoroutineProfile(b Slice) (n int32, ok bool) {
399 sp = runtime_getcallersp(&b);
400 pc = runtime_getcallerpc(&b);
403 n = runtime_gcount();
405 runtime_semacquire(&runtime_worldsema);
406 runtime_m()->gcing = 1;
407 runtime_stoptheworld();
409 n = runtime_gcount();
413 r = (TRecord*)b.__values;
414 saveg(pc, sp, g, r++);
415 for(gp = runtime_allg; gp != nil; gp = gp->alllink) {
416 if(gp == g || gp->status == Gdead)
418 //saveg(gp->sched.pc, gp->sched.sp, gp, r++);
423 runtime_m()->gcing = 0;
424 runtime_semrelease(&runtime_worldsema);
425 runtime_starttheworld(false);