r600g/sb: Enable SB for geometry shaders
[mesa.git] / src / gallium / drivers / r600 / sb / sb_bc_finalize.cpp
1 /*
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Vadim Girlin
25 */
26
27 #define FBC_DEBUG 0
28
29 #if FBC_DEBUG
30 #define FBC_DUMP(q) do { q } while (0)
31 #else
32 #define FBC_DUMP(q)
33 #endif
34
35 #include "sb_bc.h"
36 #include "sb_shader.h"
37 #include "sb_pass.h"
38
39 namespace r600_sb {
40
41 void bc_finalizer::insert_rv6xx_load_ar_workaround(alu_group_node *b4) {
42
43 alu_group_node *g = sh.create_alu_group();
44 alu_node *a = sh.create_alu();
45
46 a->bc.set_op(ALU_OP0_NOP);
47 a->bc.last = 1;
48
49 g->push_back(a);
50 b4->insert_before(g);
51 }
52
53 int bc_finalizer::run() {
54
55 run_on(sh.root);
56
57 regions_vec &rv = sh.get_regions();
58 for (regions_vec::reverse_iterator I = rv.rbegin(), E = rv.rend(); I != E;
59 ++I) {
60 region_node *r = *I;
61
62 assert(r);
63
64 bool loop = r->is_loop();
65
66 if (loop)
67 finalize_loop(r);
68 else
69 finalize_if(r);
70
71 r->expand();
72 }
73
74 cf_peephole();
75
76 // workaround for some problems on r6xx/7xx
77 // add ALU NOP to each vertex shader
78 if (!ctx.is_egcm() && (sh.target == TARGET_VS || sh.target == TARGET_ES)) {
79 cf_node *c = sh.create_clause(NST_ALU_CLAUSE);
80
81 alu_group_node *g = sh.create_alu_group();
82
83 alu_node *a = sh.create_alu();
84 a->bc.set_op(ALU_OP0_NOP);
85 a->bc.last = 1;
86
87 g->push_back(a);
88 c->push_back(g);
89
90 sh.root->push_back(c);
91
92 c = sh.create_cf(CF_OP_NOP);
93 sh.root->push_back(c);
94
95 last_cf = c;
96 }
97
98 if (!ctx.is_cayman() && last_cf->bc.op_ptr->flags & CF_ALU) {
99 last_cf = sh.create_cf(CF_OP_NOP);
100 sh.root->push_back(last_cf);
101 }
102
103 if (ctx.is_cayman()) {
104 if (!last_cf) {
105 cf_node *c = sh.create_cf(CF_OP_CF_END);
106 sh.root->push_back(c);
107 } else
108 last_cf->insert_after(sh.create_cf(CF_OP_CF_END));
109 } else
110 last_cf->bc.end_of_program = 1;
111
112 for (unsigned t = EXP_PIXEL; t < EXP_TYPE_COUNT; ++t) {
113 cf_node *le = last_export[t];
114 if (le)
115 le->bc.set_op(CF_OP_EXPORT_DONE);
116 }
117
118 sh.ngpr = ngpr;
119 sh.nstack = nstack;
120 return 0;
121 }
122
123 void bc_finalizer::finalize_loop(region_node* r) {
124
125 update_nstack(r);
126
127 cf_node *loop_start = sh.create_cf(CF_OP_LOOP_START_DX10);
128 cf_node *loop_end = sh.create_cf(CF_OP_LOOP_END);
129
130 // Update last_cf, but don't overwrite it if it's outside the current loop nest since
131 // it may point to a cf that is later in program order.
132 // The single parent level check is sufficient since finalize_loop() is processed in
133 // reverse order from innermost to outermost loop nest level.
134 if (!last_cf || last_cf->get_parent_region() == r) {
135 last_cf = loop_end;
136 }
137
138 loop_start->jump_after(loop_end);
139 loop_end->jump_after(loop_start);
140
141 for (depart_vec::iterator I = r->departs.begin(), E = r->departs.end();
142 I != E; ++I) {
143 depart_node *dep = *I;
144 cf_node *loop_break = sh.create_cf(CF_OP_LOOP_BREAK);
145 loop_break->jump(loop_end);
146 dep->push_back(loop_break);
147 dep->expand();
148 }
149
150 // FIXME produces unnecessary LOOP_CONTINUE
151 for (repeat_vec::iterator I = r->repeats.begin(), E = r->repeats.end();
152 I != E; ++I) {
153 repeat_node *rep = *I;
154 if (!(rep->parent == r && rep->prev == NULL)) {
155 cf_node *loop_cont = sh.create_cf(CF_OP_LOOP_CONTINUE);
156 loop_cont->jump(loop_end);
157 rep->push_back(loop_cont);
158 }
159 rep->expand();
160 }
161
162 r->push_front(loop_start);
163 r->push_back(loop_end);
164 }
165
166 void bc_finalizer::finalize_if(region_node* r) {
167
168 update_nstack(r);
169
170 // expecting the following control flow structure here:
171 // - region
172 // {
173 // - depart/repeat 1 (it may be depart/repeat for some outer region)
174 // {
175 // - if
176 // {
177 // - depart/repeat 2 (possibly for outer region)
178 // {
179 // - some optional code
180 // }
181 // }
182 // - optional <else> code> ...
183 // }
184 // }
185
186 container_node *repdep1 = static_cast<container_node*>(r->first);
187 assert(repdep1->is_depart() || repdep1->is_repeat());
188
189 if_node *n_if = static_cast<if_node*>(repdep1->first);
190
191 if (n_if) {
192
193
194 assert(n_if->is_if());
195
196 container_node *repdep2 = static_cast<container_node*>(n_if->first);
197 assert(repdep2->is_depart() || repdep2->is_repeat());
198
199 cf_node *if_jump = sh.create_cf(CF_OP_JUMP);
200 cf_node *if_pop = sh.create_cf(CF_OP_POP);
201
202 if_pop->bc.pop_count = 1;
203 if_pop->jump_after(if_pop);
204
205 r->push_front(if_jump);
206 r->push_back(if_pop);
207
208 bool has_else = n_if->next;
209
210 if (has_else) {
211 cf_node *nelse = sh.create_cf(CF_OP_ELSE);
212 n_if->insert_after(nelse);
213 if_jump->jump(nelse);
214 nelse->jump_after(if_pop);
215 nelse->bc.pop_count = 1;
216
217 } else {
218 if_jump->jump_after(if_pop);
219 if_jump->bc.pop_count = 1;
220 }
221
222 n_if->expand();
223 }
224
225 for (depart_vec::iterator I = r->departs.begin(), E = r->departs.end();
226 I != E; ++I) {
227 (*I)->expand();
228 }
229 r->departs.clear();
230 assert(r->repeats.empty());
231 }
232
233 void bc_finalizer::run_on(container_node* c) {
234 node *prev_node = NULL;
235 for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
236 node *n = *I;
237
238 if (n->is_alu_group()) {
239 finalize_alu_group(static_cast<alu_group_node*>(n), prev_node);
240 } else {
241 if (n->is_alu_clause()) {
242 cf_node *c = static_cast<cf_node*>(n);
243
244 if (c->bc.op == CF_OP_ALU_PUSH_BEFORE && ctx.is_egcm()) {
245 if (ctx.stack_workaround_8xx) {
246 region_node *r = c->get_parent_region();
247 if (r) {
248 unsigned ifs, loops;
249 unsigned elems = get_stack_depth(r, loops, ifs);
250 unsigned dmod1 = elems % ctx.stack_entry_size;
251 unsigned dmod2 = (elems + 1) % ctx.stack_entry_size;
252
253 if (elems && (!dmod1 || !dmod2))
254 c->flags |= NF_ALU_STACK_WORKAROUND;
255 }
256 } else if (ctx.stack_workaround_9xx) {
257 region_node *r = c->get_parent_region();
258 if (r) {
259 unsigned ifs, loops;
260 get_stack_depth(r, loops, ifs);
261 if (loops >= 2)
262 c->flags |= NF_ALU_STACK_WORKAROUND;
263 }
264 }
265 }
266 } else if (n->is_fetch_inst()) {
267 finalize_fetch(static_cast<fetch_node*>(n));
268 } else if (n->is_cf_inst()) {
269 finalize_cf(static_cast<cf_node*>(n));
270 }
271 if (n->is_container())
272 run_on(static_cast<container_node*>(n));
273 }
274 prev_node = n;
275 }
276 }
277
278 void bc_finalizer::finalize_alu_group(alu_group_node* g, node *prev_node) {
279
280 alu_node *last = NULL;
281 alu_group_node *prev_g = NULL;
282 bool add_nop = false;
283 if (prev_node && prev_node->is_alu_group()) {
284 prev_g = static_cast<alu_group_node*>(prev_node);
285 }
286
287 for (node_iterator I = g->begin(), E = g->end(); I != E; ++I) {
288 alu_node *n = static_cast<alu_node*>(*I);
289 unsigned slot = n->bc.slot;
290 value *d = n->dst.empty() ? NULL : n->dst[0];
291
292 if (d && d->is_special_reg()) {
293 assert((n->bc.op_ptr->flags & AF_MOVA) || d->is_geometry_emit());
294 d = NULL;
295 }
296
297 sel_chan fdst = d ? d->get_final_gpr() : sel_chan(0, 0);
298
299 if (d) {
300 assert(fdst.chan() == slot || slot == SLOT_TRANS);
301 }
302
303 n->bc.dst_gpr = fdst.sel();
304 n->bc.dst_chan = d ? fdst.chan() : slot < SLOT_TRANS ? slot : 0;
305
306
307 if (d && d->is_rel() && d->rel && !d->rel->is_const()) {
308 n->bc.dst_rel = 1;
309 update_ngpr(d->array->gpr.sel() + d->array->array_size -1);
310 } else {
311 n->bc.dst_rel = 0;
312 }
313
314 n->bc.write_mask = d != NULL;
315 n->bc.last = 0;
316
317 if (n->bc.op_ptr->flags & AF_PRED) {
318 n->bc.update_pred = (n->dst[1] != NULL);
319 n->bc.update_exec_mask = (n->dst[2] != NULL);
320 }
321
322 // FIXME handle predication here
323 n->bc.pred_sel = PRED_SEL_OFF;
324
325 update_ngpr(n->bc.dst_gpr);
326
327 add_nop |= finalize_alu_src(g, n, prev_g);
328
329 last = n;
330 }
331
332 if (add_nop) {
333 if (sh.get_ctx().r6xx_gpr_index_workaround) {
334 insert_rv6xx_load_ar_workaround(g);
335 }
336 }
337 last->bc.last = 1;
338 }
339
340 bool bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a, alu_group_node *prev) {
341 vvec &sv = a->src;
342 bool add_nop = false;
343 FBC_DUMP(
344 sblog << "finalize_alu_src: ";
345 dump::dump_op(a);
346 sblog << "\n";
347 );
348
349 unsigned si = 0;
350
351 for (vvec::iterator I = sv.begin(), E = sv.end(); I != E; ++I, ++si) {
352 value *v = *I;
353 assert(v);
354
355 bc_alu_src &src = a->bc.src[si];
356 sel_chan sc;
357 src.rel = 0;
358
359 sel_chan gpr;
360
361 switch (v->kind) {
362 case VLK_REL_REG:
363 sc = v->get_final_gpr();
364 src.sel = sc.sel();
365 src.chan = sc.chan();
366 if (!v->rel->is_const()) {
367 src.rel = 1;
368 update_ngpr(v->array->gpr.sel() + v->array->array_size -1);
369 if (prev && !add_nop) {
370 for (node_iterator pI = prev->begin(), pE = prev->end(); pI != pE; ++pI) {
371 alu_node *pn = static_cast<alu_node*>(*pI);
372 if (pn->bc.dst_gpr == src.sel) {
373 add_nop = true;
374 break;
375 }
376 }
377 }
378 } else
379 src.rel = 0;
380
381 break;
382 case VLK_REG:
383 gpr = v->get_final_gpr();
384 src.sel = gpr.sel();
385 src.chan = gpr.chan();
386 update_ngpr(src.sel);
387 break;
388 case VLK_TEMP:
389 src.sel = v->gpr.sel();
390 src.chan = v->gpr.chan();
391 update_ngpr(src.sel);
392 break;
393 case VLK_UNDEF:
394 case VLK_CONST: {
395 literal lv = v->literal_value;
396 src.chan = 0;
397
398 if (lv == literal(0))
399 src.sel = ALU_SRC_0;
400 else if (lv == literal(0.5f))
401 src.sel = ALU_SRC_0_5;
402 else if (lv == literal(1.0f))
403 src.sel = ALU_SRC_1;
404 else if (lv == literal(1))
405 src.sel = ALU_SRC_1_INT;
406 else if (lv == literal(-1))
407 src.sel = ALU_SRC_M_1_INT;
408 else {
409 src.sel = ALU_SRC_LITERAL;
410 src.chan = g->literal_chan(lv);
411 src.value = lv;
412 }
413 break;
414 }
415 case VLK_KCACHE: {
416 cf_node *clause = static_cast<cf_node*>(g->parent);
417 assert(clause->is_alu_clause());
418 sel_chan k = translate_kcache(clause, v);
419
420 assert(k && "kcache translation failed");
421
422 src.sel = k.sel();
423 src.chan = k.chan();
424 break;
425 }
426 case VLK_PARAM:
427 case VLK_SPECIAL_CONST:
428 src.sel = v->select.sel();
429 src.chan = v->select.chan();
430 break;
431 default:
432 assert(!"unknown value kind");
433 break;
434 }
435 if (prev && !add_nop) {
436 for (node_iterator pI = prev->begin(), pE = prev->end(); pI != pE; ++pI) {
437 alu_node *pn = static_cast<alu_node*>(*pI);
438 if (pn->bc.dst_rel) {
439 if (pn->bc.dst_gpr == src.sel) {
440 add_nop = true;
441 break;
442 }
443 }
444 }
445 }
446 }
447
448 while (si < 3) {
449 a->bc.src[si++].sel = 0;
450 }
451 return add_nop;
452 }
453
454 void bc_finalizer::copy_fetch_src(fetch_node &dst, fetch_node &src, unsigned arg_start)
455 {
456 int reg = -1;
457
458 for (unsigned chan = 0; chan < 4; ++chan) {
459
460 dst.bc.dst_sel[chan] = SEL_MASK;
461
462 unsigned sel = SEL_MASK;
463
464 value *v = src.src[arg_start + chan];
465
466 if (!v || v->is_undef()) {
467 sel = SEL_MASK;
468 } else if (v->is_const()) {
469 literal l = v->literal_value;
470 if (l == literal(0))
471 sel = SEL_0;
472 else if (l == literal(1.0f))
473 sel = SEL_1;
474 else {
475 sblog << "invalid fetch constant operand " << chan << " ";
476 dump::dump_op(&src);
477 sblog << "\n";
478 abort();
479 }
480
481 } else if (v->is_any_gpr()) {
482 unsigned vreg = v->gpr.sel();
483 unsigned vchan = v->gpr.chan();
484
485 if (reg == -1)
486 reg = vreg;
487 else if ((unsigned)reg != vreg) {
488 sblog << "invalid fetch source operand " << chan << " ";
489 dump::dump_op(&src);
490 sblog << "\n";
491 abort();
492 }
493
494 sel = vchan;
495
496 } else {
497 sblog << "invalid fetch source operand " << chan << " ";
498 dump::dump_op(&src);
499 sblog << "\n";
500 abort();
501 }
502
503 dst.bc.src_sel[chan] = sel;
504 }
505
506 if (reg >= 0)
507 update_ngpr(reg);
508
509 dst.bc.src_gpr = reg >= 0 ? reg : 0;
510 }
511
512 void bc_finalizer::emit_set_grad(fetch_node* f) {
513
514 assert(f->src.size() == 12);
515 unsigned ops[2] = { FETCH_OP_SET_GRADIENTS_V, FETCH_OP_SET_GRADIENTS_H };
516
517 unsigned arg_start = 0;
518
519 for (unsigned op = 0; op < 2; ++op) {
520 fetch_node *n = sh.create_fetch();
521 n->bc.set_op(ops[op]);
522
523 arg_start += 4;
524
525 copy_fetch_src(*n, *f, arg_start);
526
527 f->insert_before(n);
528 }
529
530 }
531
532 void bc_finalizer::emit_set_texture_offsets(fetch_node &f) {
533 assert(f.src.size() == 8);
534
535 fetch_node *n = sh.create_fetch();
536
537 n->bc.set_op(FETCH_OP_SET_TEXTURE_OFFSETS);
538
539 copy_fetch_src(*n, f, 4);
540
541 f.insert_before(n);
542 }
543
544 void bc_finalizer::finalize_fetch(fetch_node* f) {
545
546 int reg = -1;
547
548 // src
549
550 unsigned src_count = 4;
551
552 unsigned flags = f->bc.op_ptr->flags;
553
554 if (flags & FF_VTX) {
555 src_count = 1;
556 } else if (flags & FF_USEGRAD) {
557 emit_set_grad(f);
558 } else if (flags & FF_USE_TEXTURE_OFFSETS) {
559 emit_set_texture_offsets(*f);
560 }
561
562 for (unsigned chan = 0; chan < src_count; ++chan) {
563
564 unsigned sel = f->bc.src_sel[chan];
565
566 if (sel > SEL_W)
567 continue;
568
569 value *v = f->src[chan];
570
571 if (v->is_undef()) {
572 sel = SEL_MASK;
573 } else if (v->is_const()) {
574 literal l = v->literal_value;
575 if (l == literal(0))
576 sel = SEL_0;
577 else if (l == literal(1.0f))
578 sel = SEL_1;
579 else {
580 sblog << "invalid fetch constant operand " << chan << " ";
581 dump::dump_op(f);
582 sblog << "\n";
583 abort();
584 }
585
586 } else if (v->is_any_gpr()) {
587 unsigned vreg = v->gpr.sel();
588 unsigned vchan = v->gpr.chan();
589
590 if (reg == -1)
591 reg = vreg;
592 else if ((unsigned)reg != vreg) {
593 sblog << "invalid fetch source operand " << chan << " ";
594 dump::dump_op(f);
595 sblog << "\n";
596 abort();
597 }
598
599 sel = vchan;
600
601 } else {
602 sblog << "invalid fetch source operand " << chan << " ";
603 dump::dump_op(f);
604 sblog << "\n";
605 abort();
606 }
607
608 f->bc.src_sel[chan] = sel;
609 }
610
611 if (reg >= 0)
612 update_ngpr(reg);
613
614 f->bc.src_gpr = reg >= 0 ? reg : 0;
615
616 // dst
617
618 reg = -1;
619
620 unsigned dst_swz[4] = {SEL_MASK, SEL_MASK, SEL_MASK, SEL_MASK};
621
622 for (unsigned chan = 0; chan < 4; ++chan) {
623
624 unsigned sel = f->bc.dst_sel[chan];
625
626 if (sel == SEL_MASK)
627 continue;
628
629 value *v = f->dst[chan];
630 if (!v)
631 continue;
632
633 if (v->is_any_gpr()) {
634 unsigned vreg = v->gpr.sel();
635 unsigned vchan = v->gpr.chan();
636
637 if (reg == -1)
638 reg = vreg;
639 else if ((unsigned)reg != vreg) {
640 sblog << "invalid fetch dst operand " << chan << " ";
641 dump::dump_op(f);
642 sblog << "\n";
643 abort();
644 }
645
646 dst_swz[vchan] = sel;
647
648 } else {
649 sblog << "invalid fetch dst operand " << chan << " ";
650 dump::dump_op(f);
651 sblog << "\n";
652 abort();
653 }
654
655 }
656
657 for (unsigned i = 0; i < 4; ++i)
658 f->bc.dst_sel[i] = dst_swz[i];
659
660 assert(reg >= 0);
661
662 if (reg >= 0)
663 update_ngpr(reg);
664
665 f->bc.dst_gpr = reg >= 0 ? reg : 0;
666 }
667
668 void bc_finalizer::finalize_cf(cf_node* c) {
669
670 unsigned flags = c->bc.op_ptr->flags;
671
672 c->bc.end_of_program = 0;
673 last_cf = c;
674
675 if (flags & CF_EXP) {
676 c->bc.set_op(CF_OP_EXPORT);
677 last_export[c->bc.type] = c;
678
679 int reg = -1;
680
681 for (unsigned chan = 0; chan < 4; ++chan) {
682
683 unsigned sel = c->bc.sel[chan];
684
685 if (sel > SEL_W)
686 continue;
687
688 value *v = c->src[chan];
689
690 if (v->is_undef()) {
691 sel = SEL_MASK;
692 } else if (v->is_const()) {
693 literal l = v->literal_value;
694 if (l == literal(0))
695 sel = SEL_0;
696 else if (l == literal(1.0f))
697 sel = SEL_1;
698 else {
699 sblog << "invalid export constant operand " << chan << " ";
700 dump::dump_op(c);
701 sblog << "\n";
702 abort();
703 }
704
705 } else if (v->is_any_gpr()) {
706 unsigned vreg = v->gpr.sel();
707 unsigned vchan = v->gpr.chan();
708
709 if (reg == -1)
710 reg = vreg;
711 else if ((unsigned)reg != vreg) {
712 sblog << "invalid export source operand " << chan << " ";
713 dump::dump_op(c);
714 sblog << "\n";
715 abort();
716 }
717
718 sel = vchan;
719
720 } else {
721 sblog << "invalid export source operand " << chan << " ";
722 dump::dump_op(c);
723 sblog << "\n";
724 abort();
725 }
726
727 c->bc.sel[chan] = sel;
728 }
729
730 if (reg >= 0)
731 update_ngpr(reg);
732
733 c->bc.rw_gpr = reg >= 0 ? reg : 0;
734
735 } else if (flags & CF_MEM) {
736
737 int reg = -1;
738 unsigned mask = 0;
739
740 for (unsigned chan = 0; chan < 4; ++chan) {
741 value *v = c->src[chan];
742 if (!v || v->is_undef())
743 continue;
744
745 if (!v->is_any_gpr() || v->gpr.chan() != chan) {
746 sblog << "invalid source operand " << chan << " ";
747 dump::dump_op(c);
748 sblog << "\n";
749 abort();
750 }
751 unsigned vreg = v->gpr.sel();
752 if (reg == -1)
753 reg = vreg;
754 else if ((unsigned)reg != vreg) {
755 sblog << "invalid source operand " << chan << " ";
756 dump::dump_op(c);
757 sblog << "\n";
758 abort();
759 }
760
761 mask |= (1 << chan);
762 }
763
764 assert(reg >= 0 && mask);
765
766 if (reg >= 0)
767 update_ngpr(reg);
768
769 c->bc.rw_gpr = reg >= 0 ? reg : 0;
770 c->bc.comp_mask = mask;
771
772 if (((flags & CF_RAT) || (!(flags & CF_STRM))) && (c->bc.type & 1)) {
773
774 reg = -1;
775
776 for (unsigned chan = 0; chan < 4; ++chan) {
777 value *v = c->src[4 + chan];
778 if (!v || v->is_undef())
779 continue;
780
781 if (!v->is_any_gpr() || v->gpr.chan() != chan) {
782 sblog << "invalid source operand " << chan << " ";
783 dump::dump_op(c);
784 sblog << "\n";
785 abort();
786 }
787 unsigned vreg = v->gpr.sel();
788 if (reg == -1)
789 reg = vreg;
790 else if ((unsigned)reg != vreg) {
791 sblog << "invalid source operand " << chan << " ";
792 dump::dump_op(c);
793 sblog << "\n";
794 abort();
795 }
796 }
797
798 assert(reg >= 0);
799
800 if (reg >= 0)
801 update_ngpr(reg);
802
803 c->bc.index_gpr = reg >= 0 ? reg : 0;
804 }
805 } else if (flags & CF_CALL) {
806 update_nstack(c->get_parent_region(), ctx.wavefront_size == 16 ? 2 : 1);
807 }
808 }
809
810 sel_chan bc_finalizer::translate_kcache(cf_node* alu, value* v) {
811 unsigned sel = v->select.sel();
812 unsigned bank = sel >> 12;
813 unsigned chan = v->select.chan();
814 static const unsigned kc_base[] = {128, 160, 256, 288};
815
816 sel &= 4095;
817
818 unsigned line = sel >> 4;
819
820 for (unsigned k = 0; k < 4; ++k) {
821 bc_kcache &kc = alu->bc.kc[k];
822
823 if (kc.mode == KC_LOCK_NONE)
824 break;
825
826 if (kc.bank == bank && (kc.addr == line ||
827 (kc.mode == KC_LOCK_2 && kc.addr + 1 == line))) {
828
829 sel = kc_base[k] + (sel - (kc.addr << 4));
830
831 return sel_chan(sel, chan);
832 }
833 }
834
835 assert(!"kcache translation error");
836 return 0;
837 }
838
839 void bc_finalizer::update_ngpr(unsigned gpr) {
840 if (gpr < MAX_GPR - ctx.alu_temp_gprs && gpr >= ngpr)
841 ngpr = gpr + 1;
842 }
843
844 unsigned bc_finalizer::get_stack_depth(node *n, unsigned &loops,
845 unsigned &ifs, unsigned add) {
846 unsigned stack_elements = add;
847 bool has_non_wqm_push = (add != 0);
848 region_node *r = n->is_region() ?
849 static_cast<region_node*>(n) : n->get_parent_region();
850
851 loops = 0;
852 ifs = 0;
853
854 while (r) {
855 if (r->is_loop()) {
856 ++loops;
857 } else {
858 ++ifs;
859 has_non_wqm_push = true;
860 }
861 r = r->get_parent_region();
862 }
863 stack_elements += (loops * ctx.stack_entry_size) + ifs;
864
865 // reserve additional elements in some cases
866 switch (ctx.hw_class) {
867 case HW_CLASS_R600:
868 case HW_CLASS_R700:
869 // If any non-WQM push is invoked, 2 elements should be reserved.
870 if (has_non_wqm_push)
871 stack_elements += 2;
872 break;
873 case HW_CLASS_CAYMAN:
874 // If any stack operation is invoked, 2 elements should be reserved
875 if (stack_elements)
876 stack_elements += 2;
877 break;
878 case HW_CLASS_EVERGREEN:
879 // According to the docs we need to reserve 1 element for each of the
880 // following cases:
881 // 1) non-WQM push is used with WQM/LOOP frames on stack
882 // 2) ALU_ELSE_AFTER is used at the point of max stack usage
883 // NOTE:
884 // It was found that the conditions above are not sufficient, there are
885 // other cases where we also need to reserve stack space, that's why
886 // we always reserve 1 stack element if we have non-WQM push on stack.
887 // Condition 2 is ignored for now because we don't use this instruction.
888 if (has_non_wqm_push)
889 ++stack_elements;
890 break;
891 case HW_CLASS_UNKNOWN:
892 assert(0);
893 }
894 return stack_elements;
895 }
896
897 void bc_finalizer::update_nstack(region_node* r, unsigned add) {
898 unsigned loops = 0;
899 unsigned ifs = 0;
900 unsigned elems = r ? get_stack_depth(r, loops, ifs, add) : add;
901
902 // XXX all chips expect this value to be computed using 4 as entry size,
903 // not the real entry size
904 unsigned stack_entries = (elems + 3) >> 2;
905
906 if (nstack < stack_entries)
907 nstack = stack_entries;
908 }
909
910 void bc_finalizer::cf_peephole() {
911 if (ctx.stack_workaround_8xx || ctx.stack_workaround_9xx) {
912 for (node_iterator N, I = sh.root->begin(), E = sh.root->end(); I != E;
913 I = N) {
914 N = I; ++N;
915 cf_node *c = static_cast<cf_node*>(*I);
916
917 if (c->bc.op == CF_OP_ALU_PUSH_BEFORE &&
918 (c->flags & NF_ALU_STACK_WORKAROUND)) {
919 cf_node *push = sh.create_cf(CF_OP_PUSH);
920 c->insert_before(push);
921 push->jump(c);
922 c->bc.set_op(CF_OP_ALU);
923 }
924 }
925 }
926
927 for (node_iterator N, I = sh.root->begin(), E = sh.root->end(); I != E;
928 I = N) {
929 N = I; ++N;
930
931 cf_node *c = static_cast<cf_node*>(*I);
932
933 if (c->jump_after_target) {
934 c->jump_target = static_cast<cf_node*>(c->jump_target->next);
935 c->jump_after_target = false;
936 }
937
938 if (c->is_cf_op(CF_OP_POP)) {
939 node *p = c->prev;
940 if (p->is_alu_clause()) {
941 cf_node *a = static_cast<cf_node*>(p);
942
943 if (a->bc.op == CF_OP_ALU) {
944 a->bc.set_op(CF_OP_ALU_POP_AFTER);
945 c->remove();
946 }
947 }
948 } else if (c->is_cf_op(CF_OP_JUMP) && c->jump_target == c->next) {
949 // if JUMP is immediately followed by its jump target,
950 // then JUMP is useless and we can eliminate it
951 c->remove();
952 }
953 }
954 }
955
956 } // namespace r600_sb