runtime: correct a logic error in hashmap growth.
[gcc.git] / libgo / runtime / mprof.goc
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 // Malloc profiling.
6 // Patterned after tcmalloc's algorithms; shorter code.
7
8 package runtime
9 #include "runtime.h"
10 #include "arch.h"
11 #include "malloc.h"
12 #include "defs.h"
13 #include "go-type.h"
14
15 // NOTE(rsc): Everything here could use cas if contention became an issue.
16 static Lock proflock;
17
18 // Per-call-stack allocation information.
19 // Lookup by hashing call stack into a linked-list hash table.
20 typedef struct Bucket Bucket;
21 struct Bucket
22 {
23 Bucket *next; // next in hash list
24 Bucket *allnext; // next in list of all buckets
25 uintptr allocs;
26 uintptr frees;
27 uintptr alloc_bytes;
28 uintptr free_bytes;
29 uintptr recent_allocs; // since last gc
30 uintptr recent_frees;
31 uintptr recent_alloc_bytes;
32 uintptr recent_free_bytes;
33 uintptr hash;
34 uintptr nstk;
35 uintptr stk[1];
36 };
37 enum {
38 BuckHashSize = 179999,
39 };
40 static Bucket **buckhash;
41 static Bucket *buckets;
42 static uintptr bucketmem;
43
44 // Return the bucket for stk[0:nstk], allocating new bucket if needed.
45 static Bucket*
46 stkbucket(uintptr *stk, int32 nstk, bool alloc)
47 {
48 int32 i;
49 uintptr h;
50 Bucket *b;
51
52 if(buckhash == nil) {
53 buckhash = runtime_SysAlloc(BuckHashSize*sizeof buckhash[0]);
54 mstats.buckhash_sys += BuckHashSize*sizeof buckhash[0];
55 }
56
57 // Hash stack.
58 h = 0;
59 for(i=0; i<nstk; i++) {
60 h += stk[i];
61 h += h<<10;
62 h ^= h>>6;
63 }
64 h += h<<3;
65 h ^= h>>11;
66
67 i = h%BuckHashSize;
68 for(b = buckhash[i]; b; b=b->next)
69 if(b->hash == h && b->nstk == (uintptr)nstk &&
70 runtime_mcmp((byte*)b->stk, (byte*)stk, nstk*sizeof stk[0]) == 0)
71 return b;
72
73 if(!alloc)
74 return nil;
75
76 b = runtime_mallocgc(sizeof *b + nstk*sizeof stk[0], FlagNoProfiling, 0, 1);
77 bucketmem += sizeof *b + nstk*sizeof stk[0];
78 runtime_memmove(b->stk, stk, nstk*sizeof stk[0]);
79 b->hash = h;
80 b->nstk = nstk;
81 b->next = buckhash[i];
82 buckhash[i] = b;
83 b->allnext = buckets;
84 buckets = b;
85 return b;
86 }
87
88 // Record that a gc just happened: all the 'recent' statistics are now real.
89 void
90 runtime_MProf_GC(void)
91 {
92 Bucket *b;
93
94 runtime_lock(&proflock);
95 for(b=buckets; b; b=b->allnext) {
96 b->allocs += b->recent_allocs;
97 b->frees += b->recent_frees;
98 b->alloc_bytes += b->recent_alloc_bytes;
99 b->free_bytes += b->recent_free_bytes;
100 b->recent_allocs = 0;
101 b->recent_frees = 0;
102 b->recent_alloc_bytes = 0;
103 b->recent_free_bytes = 0;
104 }
105 runtime_unlock(&proflock);
106 }
107
108 // Map from pointer to Bucket* that allocated it.
109 // Three levels:
110 // Linked-list hash table for top N-20 bits.
111 // Array index for next 13 bits.
112 // Linked list for next 7 bits.
113 // This is more efficient than using a general map,
114 // because of the typical clustering of the pointer keys.
115
116 typedef struct AddrHash AddrHash;
117 typedef struct AddrEntry AddrEntry;
118
119 struct AddrHash
120 {
121 AddrHash *next; // next in top-level hash table linked list
122 uintptr addr; // addr>>20
123 AddrEntry *dense[1<<13];
124 };
125
126 struct AddrEntry
127 {
128 AddrEntry *next; // next in bottom-level linked list
129 uint32 addr;
130 Bucket *b;
131 };
132
133 enum {
134 AddrHashBits = 12 // 1MB per entry, so good for 4GB of used address space
135 };
136 static AddrHash *addrhash[1<<AddrHashBits];
137 static AddrEntry *addrfree;
138 static uintptr addrmem;
139
140 // Multiplicative hash function:
141 // hashMultiplier is the bottom 32 bits of int((sqrt(5)-1)/2 * (1<<32)).
142 // This is a good multiplier as suggested in CLR, Knuth. The hash
143 // value is taken to be the top AddrHashBits bits of the bottom 32 bits
144 // of the multiplied value.
145 enum {
146 HashMultiplier = 2654435769U
147 };
148
149 // Set the bucket associated with addr to b.
150 static void
151 setaddrbucket(uintptr addr, Bucket *b)
152 {
153 int32 i;
154 uint32 h;
155 AddrHash *ah;
156 AddrEntry *e;
157
158 h = (uint32)((addr>>20)*HashMultiplier) >> (32-AddrHashBits);
159 for(ah=addrhash[h]; ah; ah=ah->next)
160 if(ah->addr == (addr>>20))
161 goto found;
162
163 ah = runtime_mallocgc(sizeof *ah, FlagNoProfiling, 0, 1);
164 addrmem += sizeof *ah;
165 ah->next = addrhash[h];
166 ah->addr = addr>>20;
167 addrhash[h] = ah;
168
169 found:
170 if((e = addrfree) == nil) {
171 e = runtime_mallocgc(64*sizeof *e, FlagNoProfiling, 0, 0);
172 addrmem += 64*sizeof *e;
173 for(i=0; i+1<64; i++)
174 e[i].next = &e[i+1];
175 e[63].next = nil;
176 }
177 addrfree = e->next;
178 e->addr = (uint32)~(addr & ((1<<20)-1));
179 e->b = b;
180 h = (addr>>7)&(nelem(ah->dense)-1); // entry in dense is top 13 bits of low 20.
181 e->next = ah->dense[h];
182 ah->dense[h] = e;
183 }
184
185 // Get the bucket associated with addr and clear the association.
186 static Bucket*
187 getaddrbucket(uintptr addr)
188 {
189 uint32 h;
190 AddrHash *ah;
191 AddrEntry *e, **l;
192 Bucket *b;
193
194 h = (uint32)((addr>>20)*HashMultiplier) >> (32-AddrHashBits);
195 for(ah=addrhash[h]; ah; ah=ah->next)
196 if(ah->addr == (addr>>20))
197 goto found;
198 return nil;
199
200 found:
201 h = (addr>>7)&(nelem(ah->dense)-1); // entry in dense is top 13 bits of low 20.
202 for(l=&ah->dense[h]; (e=*l) != nil; l=&e->next) {
203 if(e->addr == (uint32)~(addr & ((1<<20)-1))) {
204 *l = e->next;
205 b = e->b;
206 e->next = addrfree;
207 addrfree = e;
208 return b;
209 }
210 }
211 return nil;
212 }
213
214 // Called by malloc to record a profiled block.
215 void
216 runtime_MProf_Malloc(void *p, uintptr size)
217 {
218 M *m;
219 int32 nstk;
220 uintptr stk[32];
221 Bucket *b;
222
223 m = runtime_m();
224 if(m->nomemprof > 0)
225 return;
226
227 m->nomemprof++;
228 nstk = runtime_callers(1, stk, 32);
229 runtime_lock(&proflock);
230 b = stkbucket(stk, nstk, true);
231 b->recent_allocs++;
232 b->recent_alloc_bytes += size;
233 setaddrbucket((uintptr)p, b);
234 runtime_unlock(&proflock);
235 m = runtime_m();
236 m->nomemprof--;
237 }
238
239 // Called when freeing a profiled block.
240 void
241 runtime_MProf_Free(void *p, uintptr size)
242 {
243 M *m;
244 Bucket *b;
245
246 m = runtime_m();
247 if(m->nomemprof > 0)
248 return;
249
250 m->nomemprof++;
251 runtime_lock(&proflock);
252 b = getaddrbucket((uintptr)p);
253 if(b != nil) {
254 b->recent_frees++;
255 b->recent_free_bytes += size;
256 }
257 runtime_unlock(&proflock);
258 m = runtime_m();
259 m->nomemprof--;
260 }
261
262
263 // Go interface to profile data. (Declared in extern.go)
264 // Assumes Go sizeof(int) == sizeof(int32)
265
266 // Must match MemProfileRecord in debug.go.
267 typedef struct Record Record;
268 struct Record {
269 int64 alloc_bytes, free_bytes;
270 int64 alloc_objects, free_objects;
271 uintptr stk[32];
272 };
273
274 // Write b's data to r.
275 static void
276 record(Record *r, Bucket *b)
277 {
278 uint32 i;
279
280 r->alloc_bytes = b->alloc_bytes;
281 r->free_bytes = b->free_bytes;
282 r->alloc_objects = b->allocs;
283 r->free_objects = b->frees;
284 for(i=0; i<b->nstk && i<nelem(r->stk); i++)
285 r->stk[i] = b->stk[i];
286 for(; i<nelem(r->stk); i++)
287 r->stk[i] = 0;
288 }
289
290 func MemProfile(p Slice, include_inuse_zero bool) (n int32, ok bool) {
291 Bucket *b;
292 Record *r;
293
294 runtime_lock(&proflock);
295 n = 0;
296 for(b=buckets; b; b=b->allnext)
297 if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
298 n++;
299 ok = false;
300 if(n <= p.__count) {
301 ok = true;
302 r = (Record*)p.__values;
303 for(b=buckets; b; b=b->allnext)
304 if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
305 record(r++, b);
306 }
307 runtime_unlock(&proflock);
308 }
309
310 void
311 runtime_MProf_Mark(void (*scan)(byte *, int64))
312 {
313 // buckhash is not allocated via mallocgc.
314 scan((byte*)&buckets, sizeof buckets);
315 scan((byte*)&addrhash, sizeof addrhash);
316 scan((byte*)&addrfree, sizeof addrfree);
317 }
318
319 // Must match StackRecord in debug.go.
320 typedef struct TRecord TRecord;
321 struct TRecord {
322 uintptr stk[32];
323 };
324
325 func ThreadCreateProfile(p Slice) (n int32, ok bool) {
326 TRecord *r;
327 M *first, *m;
328
329 first = runtime_atomicloadp(&runtime_allm);
330 n = 0;
331 for(m=first; m; m=m->alllink)
332 n++;
333 ok = false;
334 if(n <= p.__count) {
335 ok = true;
336 r = (TRecord*)p.__values;
337 for(m=first; m; m=m->alllink) {
338 runtime_memmove(r->stk, m->createstack, sizeof r->stk);
339 r++;
340 }
341 }
342 }
343
344 func Stack(b Slice, all bool) (n int32) {
345 byte *pc, *sp;
346 bool enablegc;
347
348 sp = runtime_getcallersp(&b);
349 pc = runtime_getcallerpc(&b);
350
351 if(all) {
352 runtime_semacquire(&runtime_worldsema);
353 runtime_m()->gcing = 1;
354 runtime_stoptheworld();
355 enablegc = mstats.enablegc;
356 mstats.enablegc = false;
357 }
358
359 if(b.__count == 0)
360 n = 0;
361 else{
362 G* g = runtime_g();
363 g->writebuf = (byte*)b.__values;
364 g->writenbuf = b.__count;
365 USED(pc);
366 USED(sp);
367 runtime_goroutineheader(g);
368 runtime_traceback();
369 runtime_goroutinetrailer(g);
370 if(all)
371 runtime_tracebackothers(g);
372 n = b.__count - g->writenbuf;
373 g->writebuf = nil;
374 g->writenbuf = 0;
375 }
376
377 if(all) {
378 runtime_m()->gcing = 0;
379 mstats.enablegc = enablegc;
380 runtime_semrelease(&runtime_worldsema);
381 runtime_starttheworld(false);
382 }
383 }
384
385 static void
386 saveg(G *g, TRecord *r)
387 {
388 int32 n;
389
390 if(g == runtime_g())
391 n = runtime_callers(0, r->stk, nelem(r->stk));
392 else {
393 // FIXME: Not implemented.
394 n = 0;
395 }
396 if((size_t)n < nelem(r->stk))
397 r->stk[n] = 0;
398 }
399
400 func GoroutineProfile(b Slice) (n int32, ok bool) {
401 TRecord *r;
402 G *gp;
403
404 ok = false;
405 n = runtime_gcount();
406 if(n <= b.__count) {
407 runtime_semacquire(&runtime_worldsema);
408 runtime_m()->gcing = 1;
409 runtime_stoptheworld();
410
411 n = runtime_gcount();
412 if(n <= b.__count) {
413 G* g = runtime_g();
414 ok = true;
415 r = (TRecord*)b.__values;
416 saveg(g, r++);
417 for(gp = runtime_allg; gp != nil; gp = gp->alllink) {
418 if(gp == g || gp->status == Gdead)
419 continue;
420 saveg(gp, r++);
421 }
422 }
423
424 runtime_m()->gcing = 0;
425 runtime_semrelease(&runtime_worldsema);
426 runtime_starttheworld(false);
427 }
428 }
429