runtime: fix context used by getTraceback
[gcc.git] / libgo / runtime / parfor.c
1 // Copyright 2012 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 // Parallel for algorithm.
6
7 #include "runtime.h"
8 #include "malloc.h"
9 #include "arch.h"
10
11 struct ParForThread
12 {
13 // the thread's iteration space [32lsb, 32msb)
14 uint64 pos __attribute__((aligned(8)));
15 // stats
16 uint64 nsteal;
17 uint64 nstealcnt;
18 uint64 nprocyield;
19 uint64 nosyield;
20 uint64 nsleep;
21 byte pad[CacheLineSize];
22 };
23
24 ParFor*
25 runtime_parforalloc(uint32 nthrmax)
26 {
27 ParFor *desc;
28
29 // The ParFor object is followed by CacheLineSize padding
30 // and then nthrmax ParForThread.
31 desc = (ParFor*)runtime_mallocgc(sizeof(ParFor) + CacheLineSize + nthrmax * sizeof(ParForThread), 0, FlagNoInvokeGC);
32 desc->thr = (ParForThread*)((byte*)(desc+1) + CacheLineSize);
33 desc->nthrmax = nthrmax;
34 return desc;
35 }
36
37 void
38 runtime_parforsetup(ParFor *desc, uint32 nthr, uint32 n, bool wait, const FuncVal *body)
39 {
40 uint32 i, begin, end;
41 uint64 *pos;
42
43 if(desc == nil || nthr == 0 || nthr > desc->nthrmax || body == nil) {
44 runtime_printf("desc=%p nthr=%d count=%d body=%p\n", desc, nthr, n, body);
45 runtime_throw("parfor: invalid args");
46 }
47
48 desc->body = body;
49 desc->done = 0;
50 desc->nthr = nthr;
51 desc->thrseq = 0;
52 desc->cnt = n;
53 desc->wait = wait;
54 desc->nsteal = 0;
55 desc->nstealcnt = 0;
56 desc->nprocyield = 0;
57 desc->nosyield = 0;
58 desc->nsleep = 0;
59 for(i=0; i<nthr; i++) {
60 begin = (uint64)n*i / nthr;
61 end = (uint64)n*(i+1) / nthr;
62 pos = &desc->thr[i].pos;
63 if(((uintptr)pos & 7) != 0)
64 runtime_throw("parforsetup: pos is not aligned");
65 *pos = (uint64)begin | (((uint64)end)<<32);
66 }
67 }
68
69 void
70 runtime_parfordo(ParFor *desc)
71 {
72 ParForThread *me;
73 uint32 tid, begin, end, begin2, try, victim, i;
74 uint64 *mypos, *victimpos, pos, newpos;
75 const FuncVal *body;
76 void (*bodyfn)(ParFor*, uint32);
77 bool idle;
78
79 // Obtain 0-based thread index.
80 tid = runtime_xadd(&desc->thrseq, 1) - 1;
81 if(tid >= desc->nthr) {
82 runtime_printf("tid=%d nthr=%d\n", tid, desc->nthr);
83 runtime_throw("parfor: invalid tid");
84 }
85
86 body = desc->body;
87 bodyfn = (void (*)(ParFor*, uint32))(void*)body->fn;
88
89 // If single-threaded, just execute the for serially.
90 if(desc->nthr==1) {
91 for(i=0; i<desc->cnt; i++)
92 __builtin_call_with_static_chain (bodyfn(desc, i), body);
93 return;
94 }
95
96 me = &desc->thr[tid];
97 mypos = &me->pos;
98 for(;;) {
99 for(;;) {
100 // While there is local work,
101 // bump low index and execute the iteration.
102 pos = runtime_xadd64(mypos, 1);
103 begin = (uint32)pos-1;
104 end = (uint32)(pos>>32);
105 if(begin < end) {
106 __builtin_call_with_static_chain(bodyfn(desc, begin), body);
107 continue;
108 }
109 break;
110 }
111
112 // Out of work, need to steal something.
113 idle = false;
114 for(try=0;; try++) {
115 // If we don't see any work for long enough,
116 // increment the done counter...
117 if(try > desc->nthr*4 && !idle) {
118 idle = true;
119 runtime_xadd(&desc->done, 1);
120 }
121 // ...if all threads have incremented the counter,
122 // we are done.
123 if(desc->done + !idle == desc->nthr) {
124 if(!idle)
125 runtime_xadd(&desc->done, 1);
126 goto exit;
127 }
128 // Choose a random victim for stealing.
129 victim = runtime_fastrand() % (desc->nthr-1);
130 if(victim >= tid)
131 victim++;
132 victimpos = &desc->thr[victim].pos;
133 for(;;) {
134 // See if it has any work.
135 pos = runtime_atomicload64(victimpos);
136 begin = (uint32)pos;
137 end = (uint32)(pos>>32);
138 if(begin+1 >= end) {
139 begin = end = 0;
140 break;
141 }
142 if(idle) {
143 runtime_xadd(&desc->done, -1);
144 idle = false;
145 }
146 begin2 = begin + (end-begin)/2;
147 newpos = (uint64)begin | (uint64)begin2<<32;
148 if(runtime_cas64(victimpos, pos, newpos)) {
149 begin = begin2;
150 break;
151 }
152 }
153 if(begin < end) {
154 // Has successfully stolen some work.
155 if(idle)
156 runtime_throw("parfor: should not be idle");
157 runtime_atomicstore64(mypos, (uint64)begin | (uint64)end<<32);
158 me->nsteal++;
159 me->nstealcnt += end-begin;
160 break;
161 }
162 // Backoff.
163 if(try < desc->nthr) {
164 // nothing
165 } else if (try < 4*desc->nthr) {
166 me->nprocyield++;
167 runtime_procyield(20);
168 // If a caller asked not to wait for the others, exit now
169 // (assume that most work is already done at this point).
170 } else if (!desc->wait) {
171 if(!idle)
172 runtime_xadd(&desc->done, 1);
173 goto exit;
174 } else if (try < 6*desc->nthr) {
175 me->nosyield++;
176 runtime_osyield();
177 } else {
178 me->nsleep++;
179 runtime_usleep(1);
180 }
181 }
182 }
183 exit:
184 runtime_xadd64(&desc->nsteal, me->nsteal);
185 runtime_xadd64(&desc->nstealcnt, me->nstealcnt);
186 runtime_xadd64(&desc->nprocyield, me->nprocyield);
187 runtime_xadd64(&desc->nosyield, me->nosyield);
188 runtime_xadd64(&desc->nsleep, me->nsleep);
189 me->nsteal = 0;
190 me->nstealcnt = 0;
191 me->nprocyield = 0;
192 me->nosyield = 0;
193 me->nsleep = 0;
194 }
195
196 // For testing from Go.
197 void
198 runtime_parforiters(ParFor *desc, uintptr tid, uintptr *start, uintptr *end)
199 {
200 *start = (uint32)desc->thr[tid].pos;
201 *end = (uint32)(desc->thr[tid].pos>>32);
202 }