r600g/sb: fix allocation of indirectly addressed input arrays
[mesa.git] / src / gallium / drivers / r600 / sb / sb_ra_coalesce.cpp
1 /*
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Vadim Girlin
25 */
26
27 #define RA_DEBUG 0
28
29 #if RA_DEBUG
30 #define RA_DUMP(q) do { q } while (0)
31 #else
32 #define RA_DUMP(q)
33 #endif
34
35 #include "sb_shader.h"
36 #include "sb_pass.h"
37
38 namespace r600_sb {
39
40 using std::cerr;
41
42 int ra_coalesce::run() {
43 return sh.coal.run();
44 }
45
46 void coalescer::add_edge(value* a, value* b, unsigned cost) {
47 assert(a->is_sgpr() && b->is_sgpr());
48 edges.insert(new ra_edge(a,b, cost));
49 }
50
51 void coalescer::create_chunk(value *v) {
52
53 assert(v->is_sgpr());
54
55 ra_chunk *c = new ra_chunk();
56
57 c->values.push_back(v);
58
59 if (v->is_chan_pinned())
60 c->flags |= RCF_PIN_CHAN;
61 if (v->is_reg_pinned()) {
62 c->flags |= RCF_PIN_REG;
63 }
64
65 c->pin = v->pin_gpr;
66
67 RA_DUMP(
68 cerr << "create_chunk: ";
69 dump_chunk(c);
70 );
71
72 all_chunks.push_back(c);
73 v->chunk = c;
74
75 }
76
77 void coalescer::unify_chunks(ra_edge *e) {
78 ra_chunk *c1 = e->a->chunk, *c2 = e->b->chunk;
79
80 RA_DUMP(
81 cerr << "unify_chunks: ";
82 dump_chunk(c1);
83 dump_chunk(c2);
84 );
85
86 if (c2->is_chan_pinned() && !c1->is_chan_pinned()) {
87 c1->flags |= RCF_PIN_CHAN;
88 c1->pin = sel_chan(c1->pin.sel(), c2->pin.chan());
89 }
90
91 if (c2->is_reg_pinned() && !c1->is_reg_pinned()) {
92 c1->flags |= RCF_PIN_REG;
93 c1->pin = sel_chan(c2->pin.sel(), c1->pin.chan());
94 }
95
96 c1->values.reserve(c1->values.size() + c2->values.size());
97
98 for (vvec::iterator I = c2->values.begin(), E = c2->values.end(); I != E;
99 ++I) {
100 (*I)->chunk = c1;
101 c1->values.push_back(*I);
102 }
103
104 chunk_vec::iterator F = std::find(all_chunks.begin(), all_chunks.end(), c2);
105 assert(F != all_chunks.end());
106
107 all_chunks.erase(F);
108
109 c1->cost += c2->cost + e->cost;
110 delete c2;
111 }
112
113 bool coalescer::chunks_interference(ra_chunk *c1, ra_chunk *c2) {
114 unsigned pin_flags = (c1->flags & c2->flags) &
115 (RCF_PIN_CHAN | RCF_PIN_REG);
116
117 if ((pin_flags & RCF_PIN_CHAN) &&
118 c1->pin.chan() != c2->pin.chan())
119 return true;
120
121 if ((pin_flags & RCF_PIN_REG) &&
122 c1->pin.sel() != c2->pin.sel())
123 return true;
124
125 for (vvec::iterator I = c1->values.begin(), E = c1->values.end(); I != E;
126 ++I) {
127 value *v1 = *I;
128
129 for (vvec::iterator I = c2->values.begin(), E = c2->values.end(); I != E;
130 ++I) {
131 value *v2 = *I;
132
133 if (!v1->v_equal(v2) && v1->interferences.contains(v2))
134 return true;
135 }
136 }
137 return false;
138 }
139
140 void coalescer::build_chunks() {
141
142 for (edge_queue::iterator I = edges.begin(), E = edges.end();
143 I != E; ++I) {
144
145 ra_edge *e = *I;
146
147 if (!e->a->chunk)
148 create_chunk(e->a);
149
150 if (!e->b->chunk)
151 create_chunk(e->b);
152
153 ra_chunk *c1 = e->a->chunk, *c2 = e->b->chunk;
154
155 if (c1 == c2) {
156 c1->cost += e->cost;
157 } else if (!chunks_interference(c1, c2))
158 unify_chunks(e);
159 }
160 }
161
162 ra_constraint* coalescer::create_constraint(constraint_kind kind) {
163 ra_constraint *c = new ra_constraint(kind);
164 all_constraints.push_back(c);
165 return c;
166 }
167
168 void coalescer::dump_edges() {
169 cerr << "######## affinity edges\n";
170
171 for (edge_queue::iterator I = edges.begin(), E = edges.end();
172 I != E; ++I) {
173 ra_edge* e = *I;
174 cerr << " ra_edge ";
175 dump::dump_val(e->a);
176 cerr << " <-> ";
177 dump::dump_val(e->b);
178 cerr << " cost = " << e->cost << "\n";
179 }
180 }
181
182 void coalescer::dump_chunks() {
183 cerr << "######## chunks\n";
184
185 for (chunk_vec::iterator I = all_chunks.begin(), E = all_chunks.end();
186 I != E; ++I) {
187 ra_chunk* c = *I;
188 dump_chunk(c);
189 }
190 }
191
192
193 void coalescer::dump_constraint_queue() {
194 cerr << "######## constraints\n";
195
196 for (constraint_queue::iterator I = constraints.begin(),
197 E = constraints.end(); I != E; ++I) {
198 ra_constraint* c = *I;
199 dump_constraint(c);
200 }
201 }
202
203 void coalescer::dump_chunk(ra_chunk* c) {
204 cerr << " ra_chunk cost = " << c->cost << " : ";
205 dump::dump_vec(c->values);
206
207 if (c->flags & RCF_PIN_REG)
208 cerr << " REG = " << c->pin.sel();
209
210 if (c->flags & RCF_PIN_CHAN)
211 cerr << " CHAN = " << c->pin.chan();
212
213 cerr << (c->flags & RCF_GLOBAL ? " GLOBAL" : "");
214
215 cerr << "\n";
216 }
217
218 void coalescer::dump_constraint(ra_constraint* c) {
219 cerr << " ra_constraint: ";
220 switch (c->kind) {
221 case CK_PACKED_BS: cerr << "PACKED_BS"; break;
222 case CK_PHI: cerr << "PHI"; break;
223 case CK_SAME_REG: cerr << "SAME_REG"; break;
224 default: cerr << "UNKNOWN_KIND"; assert(0); break;
225 }
226
227 cerr << " cost = " << c->cost << " : ";
228 dump::dump_vec(c->values);
229
230 cerr << "\n";
231 }
232
233 void coalescer::get_chunk_interferences(ra_chunk *c, val_set &s) {
234
235 for (vvec::iterator I = c->values.begin(), E = c->values.end(); I != E;
236 ++I) {
237 value *v = *I;
238 s.add_set(v->interferences);
239 }
240 s.remove_vec(c->values);
241 }
242
243 void coalescer::build_chunk_queue() {
244 for (chunk_vec::iterator I = all_chunks.begin(),
245 E = all_chunks.end(); I != E; ++I) {
246 ra_chunk *c = *I;
247
248 if (!c->is_fixed())
249 chunks.insert(c);
250 }
251 }
252
253 void coalescer::build_constraint_queue() {
254 for (constraint_vec::iterator I = all_constraints.begin(),
255 E = all_constraints.end(); I != E; ++I) {
256 ra_constraint *c = *I;
257 unsigned cost = 0;
258
259 if (c->values.empty() || !c->values.front()->is_sgpr())
260 continue;
261
262 if (c->kind != CK_SAME_REG)
263 continue;
264
265 for (vvec::iterator I = c->values.begin(), E = c->values.end();
266 I != E; ++I) {
267 value *v = *I;
268 if (!v->chunk)
269 create_chunk(v);
270 else
271 cost += v->chunk->cost;
272 }
273 c->cost = cost;
274 constraints.insert(c);
275 }
276 }
277
278 void coalescer::color_chunks() {
279
280 for (chunk_queue::iterator I = chunks.begin(), E = chunks.end();
281 I != E; ++I) {
282 ra_chunk *c = *I;
283 if (c->is_fixed() || c->values.size() == 1)
284 continue;
285
286 sb_bitset rb;
287 val_set interf;
288
289 get_chunk_interferences(c, interf);
290
291 RA_DUMP(
292 cerr << "color_chunks: ";
293 dump_chunk(c);
294 cerr << "\n interferences: ";
295 dump::dump_set(sh,interf);
296 cerr << "\n";
297 );
298
299 init_reg_bitset(rb, interf);
300
301 unsigned pass = c->is_reg_pinned() ? 0 : 1;
302
303 unsigned cs = c->is_chan_pinned() ? c->pin.chan() : 0;
304 unsigned ce = c->is_chan_pinned() ? cs + 1 : 4;
305
306 unsigned color = 0;
307
308 while (pass < 2) {
309
310 unsigned rs, re;
311
312 if (pass == 0) {
313 rs = c->pin.sel();
314 re = rs + 1;
315 } else {
316 rs = 0;
317 re = sh.num_nontemp_gpr();
318 }
319
320 for (unsigned reg = rs; reg < re; ++reg) {
321 for (unsigned chan = cs; chan < ce; ++chan) {
322 unsigned bit = sel_chan(reg, chan);
323 if (bit >= rb.size() || !rb.get(bit)) {
324 color = bit;
325 break;
326 }
327 }
328 if (color)
329 break;
330 }
331
332 if (color)
333 break;
334
335 ++pass;
336 }
337
338 assert(color);
339 color_chunk(c, color);
340 }
341 }
342
343 void coalescer::init_reg_bitset(sb_bitset &bs, val_set &vs) {
344
345 for (val_set::iterator I = vs.begin(sh), E = vs.end(sh); I != E; ++I) {
346 value *v = *I;
347
348 if (!v->is_any_gpr())
349 continue;
350
351 unsigned gpr = v->get_final_gpr();
352 if (!gpr)
353 continue;
354
355 if (gpr) {
356 if (gpr >= bs.size())
357 bs.resize(gpr + 64);
358 bs.set(gpr, 1);
359 }
360 }
361 }
362
363 void coalescer::color_chunk(ra_chunk *c, sel_chan color) {
364
365 vvec vv = c->values;
366
367 for (vvec::iterator I = vv.begin(), E = vv.end(); I != E;
368 ++I) {
369 value *v = *I;
370
371 if (v->is_reg_pinned() && v->pin_gpr.sel() != color.sel()) {
372 detach_value(v);
373 continue;
374 }
375
376 if (v->is_chan_pinned() && v->pin_gpr.chan() != color.chan()) {
377 detach_value(v);
378 continue;
379 }
380
381 v->gpr = color;
382
383 if (v->constraint && v->constraint->kind == CK_PHI)
384 v->fix();
385
386
387 RA_DUMP(
388 cerr << " assigned " << color << " to ";
389 dump::dump_val(v);
390 cerr << "\n";
391 );
392 }
393
394 c->pin = color;
395
396 if (c->is_reg_pinned()) {
397 c->fix();
398 }
399 }
400
401 coalescer::~coalescer() {
402
403 // FIXME use pool allocator ??
404
405 for (constraint_vec::iterator I = all_constraints.begin(),
406 E = all_constraints.end(); I != E; ++I) {
407 delete (*I);
408 }
409
410 for (chunk_vec::iterator I = all_chunks.begin(),
411 E = all_chunks.end(); I != E; ++I) {
412 delete (*I);
413 }
414
415 for (edge_queue::iterator I = edges.begin(), E = edges.end();
416 I != E; ++I) {
417 delete (*I);
418 }
419 }
420
421 int coalescer::run() {
422 int r;
423
424 RA_DUMP( dump_edges(); );
425
426 build_chunks();
427 RA_DUMP( dump_chunks(); );
428
429 build_constraint_queue();
430 RA_DUMP( dump_constraint_queue(); );
431
432 if ((r = color_constraints()))
433 return r;
434
435 build_chunk_queue();
436 color_chunks();
437
438 return 0;
439 }
440
441 void coalescer::color_phi_constraint(ra_constraint* c) {
442 }
443
444 ra_chunk* coalescer::detach_value(value *v) {
445
446 vvec::iterator F = std::find(v->chunk->values.begin(),
447 v->chunk->values.end(), v);
448
449 assert(F != v->chunk->values.end());
450 v->chunk->values.erase(F);
451 create_chunk(v);
452
453 if (v->is_reg_pinned()) {
454 v->chunk->fix();
455 }
456
457 RA_DUMP(
458 cerr << " detached : ";
459 dump_chunk(v->chunk);
460 );
461
462 return v->chunk;
463
464 }
465
466 int coalescer::color_reg_constraint(ra_constraint *c) {
467 unsigned k, cnt = c->values.size();
468 vvec & cv = c->values;
469
470 ra_chunk *ch[4];
471 unsigned swz[4] = {0, 1, 2, 3};
472 val_set interf[4];
473 sb_bitset rb[4];
474
475 bool reg_pinned = false;
476 unsigned pin_reg = ~0;
477
478 unsigned chan_mask = 0;
479
480 k = 0;
481 for (vvec::iterator I = cv.begin(), E = cv.end(); I != E; ++I, ++k) {
482 value *v = *I;
483
484 if (!v->chunk)
485 create_chunk(v);
486
487 ch[k] = v->chunk;
488
489 if (v->chunk->is_chan_pinned()) {
490 unsigned chan = 1 << v->chunk->pin.chan();
491
492 if (chan & chan_mask) { // channel already in use
493 ch[k] = detach_value(v);
494 assert(!ch[k]->is_chan_pinned());
495 } else {
496 chan_mask |= chan;
497 }
498 }
499
500 if (v->chunk->is_reg_pinned()) {
501 if (!reg_pinned) {
502 reg_pinned = true;
503 pin_reg = v->chunk->pin.sel();
504 }
505 }
506
507 get_chunk_interferences(ch[k], interf[k]);
508 init_reg_bitset(rb[k], interf[k]);
509 }
510
511 unsigned start_reg, end_reg;
512
513 start_reg = 0;
514 end_reg = sh.num_nontemp_gpr();
515
516 unsigned min_reg = end_reg;
517 unsigned min_swz[4];
518 unsigned i, pass = reg_pinned ? 0 : 1;
519
520 bool done = false;
521
522 while (pass < 2) {
523
524 unsigned rs, re;
525
526 if (pass == 0) {
527 re = pin_reg + 1;
528 rs = pin_reg;
529 } else {
530 re = end_reg;
531 rs = start_reg;
532 }
533
534 min_reg = re;
535
536 // cycle on swizzle combinations
537 do {
538 for (i = 0; i < cnt; ++i) {
539 if (ch[i]->flags & RCF_PIN_CHAN)
540 if (ch[i]->pin.chan() != swz[i])
541 break;
542 }
543 if (i != cnt)
544 continue;
545
546 // looking for minimal reg number such that the constrained chunks
547 // may be colored with the current swizzle combination
548 for (unsigned reg = rs; reg < min_reg; ++reg) {
549 for (i = 0; i < cnt; ++i) {
550 unsigned bit = sel_chan(reg, swz[i]);
551 if (bit < rb[i].size() && rb[i].get(bit))
552 break;
553 }
554 if (i == cnt) {
555 done = true;
556 min_reg = reg;
557 std::copy(swz, swz + 4, min_swz);
558 break;
559 }
560 }
561
562 if (pass == 0 && done)
563 break;
564
565 } while (std::next_permutation(swz, swz + 4));
566
567 if (!done && pass) {
568 cerr << "sb: ra_coalesce - out of registers\n";
569 return -1;
570 }
571
572 if (pass == 0 && done)
573 break;
574
575 ++pass;
576 };
577
578 assert(done);
579
580 RA_DUMP(
581 cerr << "min reg = " << min_reg << " min_swz = "
582 << min_swz[0] << min_swz[1] << min_swz[2] << min_swz[3] << "\n";
583 );
584
585 for (i = 0; i < cnt; ++i) {
586 sel_chan color(min_reg, min_swz[i]);
587 ra_chunk *cc = ch[i];
588
589 if (cc->is_fixed()) {
590 if (cc->pin != color)
591 cc = detach_value(cv[i]);
592 else
593 continue;
594 }
595
596 color_chunk(cc, color);
597 cc->fix();
598 }
599
600 return 0;
601 }
602
603 int coalescer::color_constraints() {
604 int r;
605
606 for (constraint_queue::iterator I = constraints.begin(),
607 E = constraints.end(); I != E; ++I) {
608
609 ra_constraint *c = *I;
610
611 RA_DUMP(
612 cerr << "color_constraints: ";
613 dump_constraint(c);
614 );
615
616 if (c->kind == CK_SAME_REG) {
617 if ((r = color_reg_constraint(c)))
618 return r;
619 } else if (c->kind == CK_PHI)
620 color_phi_constraint(c);
621 }
622 return 0;
623 }
624
625 } // namespace r600_sb