r600g/sb: fix issues cause by GLSL switching to loops for switch
[mesa.git] / src / gallium / drivers / r600 / sb / sb_bc_finalize.cpp
1 /*
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Vadim Girlin
25 */
26
27 #define FBC_DEBUG 0
28
29 #if FBC_DEBUG
30 #define FBC_DUMP(q) do { q } while (0)
31 #else
32 #define FBC_DUMP(q)
33 #endif
34
35 #include "sb_bc.h"
36 #include "sb_shader.h"
37 #include "sb_pass.h"
38
39 namespace r600_sb {
40
41 int bc_finalizer::run() {
42
43 run_on(sh.root);
44
45 regions_vec &rv = sh.get_regions();
46 for (regions_vec::reverse_iterator I = rv.rbegin(), E = rv.rend(); I != E;
47 ++I) {
48 region_node *r = *I;
49 bool is_if = false;
50 assert(r);
51
52 assert(r->first);
53 if (r->first->is_container()) {
54 container_node *repdep1 = static_cast<container_node*>(r->first);
55 assert(repdep1->is_depart() || repdep1->is_repeat());
56 if_node *n_if = static_cast<if_node*>(repdep1->first);
57 if (n_if && n_if->is_if())
58 is_if = true;
59 }
60
61 if (is_if)
62 finalize_if(r);
63 else
64 finalize_loop(r);
65
66 r->expand();
67 }
68
69 cf_peephole();
70
71 // workaround for some problems on r6xx/7xx
72 // add ALU NOP to each vertex shader
73 if (!ctx.is_egcm() && (sh.target == TARGET_VS || sh.target == TARGET_ES)) {
74 cf_node *c = sh.create_clause(NST_ALU_CLAUSE);
75
76 alu_group_node *g = sh.create_alu_group();
77
78 alu_node *a = sh.create_alu();
79 a->bc.set_op(ALU_OP0_NOP);
80 a->bc.last = 1;
81
82 g->push_back(a);
83 c->push_back(g);
84
85 sh.root->push_back(c);
86
87 c = sh.create_cf(CF_OP_NOP);
88 sh.root->push_back(c);
89
90 last_cf = c;
91 }
92
93 if (!ctx.is_cayman() && last_cf->bc.op_ptr->flags & CF_ALU) {
94 last_cf = sh.create_cf(CF_OP_NOP);
95 sh.root->push_back(last_cf);
96 }
97
98 if (ctx.is_cayman()) {
99 if (!last_cf) {
100 cf_node *c = sh.create_cf(CF_OP_CF_END);
101 sh.root->push_back(c);
102 } else
103 last_cf->insert_after(sh.create_cf(CF_OP_CF_END));
104 } else
105 last_cf->bc.end_of_program = 1;
106
107 for (unsigned t = EXP_PIXEL; t < EXP_TYPE_COUNT; ++t) {
108 cf_node *le = last_export[t];
109 if (le)
110 le->bc.set_op(CF_OP_EXPORT_DONE);
111 }
112
113 sh.ngpr = ngpr;
114 sh.nstack = nstack;
115 return 0;
116 }
117
118 void bc_finalizer::finalize_loop(region_node* r) {
119
120 cf_node *loop_start = sh.create_cf(CF_OP_LOOP_START_DX10);
121 cf_node *loop_end = sh.create_cf(CF_OP_LOOP_END);
122 bool has_instr = false;
123
124 if (!r->is_loop()) {
125 for (depart_vec::iterator I = r->departs.begin(), E = r->departs.end();
126 I != E; ++I) {
127 depart_node *dep = *I;
128 if (!dep->empty()) {
129 has_instr = true;
130 break;
131 }
132 }
133 } else
134 has_instr = true;
135
136 if (has_instr) {
137 loop_start->jump_after(loop_end);
138 loop_end->jump_after(loop_start);
139 }
140
141 for (depart_vec::iterator I = r->departs.begin(), E = r->departs.end();
142 I != E; ++I) {
143 depart_node *dep = *I;
144 if (has_instr) {
145 cf_node *loop_break = sh.create_cf(CF_OP_LOOP_BREAK);
146 loop_break->jump(loop_end);
147 dep->push_back(loop_break);
148 }
149 dep->expand();
150 }
151
152 // FIXME produces unnecessary LOOP_CONTINUE
153 for (repeat_vec::iterator I = r->repeats.begin(), E = r->repeats.end();
154 I != E; ++I) {
155 repeat_node *rep = *I;
156 if (!(rep->parent == r && rep->prev == NULL)) {
157 cf_node *loop_cont = sh.create_cf(CF_OP_LOOP_CONTINUE);
158 loop_cont->jump(loop_end);
159 rep->push_back(loop_cont);
160 }
161 rep->expand();
162 }
163
164 if (has_instr) {
165 r->push_front(loop_start);
166 r->push_back(loop_end);
167 }
168 }
169
170 void bc_finalizer::finalize_if(region_node* r) {
171
172 update_nstack(r);
173
174 // expecting the following control flow structure here:
175 // - region
176 // {
177 // - depart/repeat 1 (it may be depart/repeat for some outer region)
178 // {
179 // - if
180 // {
181 // - depart/repeat 2 (possibly for outer region)
182 // {
183 // - some optional code
184 // }
185 // }
186 // - optional <else> code> ...
187 // }
188 // }
189
190 container_node *repdep1 = static_cast<container_node*>(r->first);
191 assert(repdep1->is_depart() || repdep1->is_repeat());
192
193 if_node *n_if = static_cast<if_node*>(repdep1->first);
194
195 if (n_if) {
196
197
198 assert(n_if->is_if());
199
200 container_node *repdep2 = static_cast<container_node*>(n_if->first);
201 assert(repdep2->is_depart() || repdep2->is_repeat());
202
203 cf_node *if_jump = sh.create_cf(CF_OP_JUMP);
204 cf_node *if_pop = sh.create_cf(CF_OP_POP);
205
206 if_pop->bc.pop_count = 1;
207 if_pop->jump_after(if_pop);
208
209 r->push_front(if_jump);
210 r->push_back(if_pop);
211
212 bool has_else = n_if->next;
213
214 if (has_else) {
215 cf_node *nelse = sh.create_cf(CF_OP_ELSE);
216 n_if->insert_after(nelse);
217 if_jump->jump(nelse);
218 nelse->jump_after(if_pop);
219 nelse->bc.pop_count = 1;
220
221 } else {
222 if_jump->jump_after(if_pop);
223 if_jump->bc.pop_count = 1;
224 }
225
226 n_if->expand();
227 }
228
229 for (depart_vec::iterator I = r->departs.begin(), E = r->departs.end();
230 I != E; ++I) {
231 (*I)->expand();
232 }
233 r->departs.clear();
234 assert(r->repeats.empty());
235 }
236
237 void bc_finalizer::run_on(container_node* c) {
238
239 for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
240 node *n = *I;
241
242 if (n->is_alu_group()) {
243 finalize_alu_group(static_cast<alu_group_node*>(n));
244 } else {
245 if (n->is_alu_clause()) {
246 cf_node *c = static_cast<cf_node*>(n);
247
248 if (c->bc.op == CF_OP_ALU_PUSH_BEFORE && ctx.is_egcm()) {
249 if (ctx.stack_workaround_8xx) {
250 region_node *r = c->get_parent_region();
251 if (r) {
252 unsigned ifs, loops;
253 unsigned elems = get_stack_depth(r, loops, ifs);
254 unsigned dmod1 = elems % ctx.stack_entry_size;
255 unsigned dmod2 = (elems + 1) % ctx.stack_entry_size;
256
257 if (elems && (!dmod1 || !dmod2))
258 c->flags |= NF_ALU_STACK_WORKAROUND;
259 }
260 } else if (ctx.stack_workaround_9xx) {
261 region_node *r = c->get_parent_region();
262 if (r) {
263 unsigned ifs, loops;
264 get_stack_depth(r, loops, ifs);
265 if (loops >= 2)
266 c->flags |= NF_ALU_STACK_WORKAROUND;
267 }
268 }
269 }
270 } else if (n->is_fetch_inst()) {
271 finalize_fetch(static_cast<fetch_node*>(n));
272 } else if (n->is_cf_inst()) {
273 finalize_cf(static_cast<cf_node*>(n));
274 }
275 if (n->is_container())
276 run_on(static_cast<container_node*>(n));
277 }
278 }
279 }
280
281 void bc_finalizer::finalize_alu_group(alu_group_node* g) {
282
283 alu_node *last = NULL;
284
285 for (node_iterator I = g->begin(), E = g->end(); I != E; ++I) {
286 alu_node *n = static_cast<alu_node*>(*I);
287 unsigned slot = n->bc.slot;
288
289 value *d = n->dst.empty() ? NULL : n->dst[0];
290
291 if (d && d->is_special_reg()) {
292 assert(n->bc.op_ptr->flags & AF_MOVA);
293 d = NULL;
294 }
295
296 sel_chan fdst = d ? d->get_final_gpr() : sel_chan(0, 0);
297
298 if (d) {
299 assert(fdst.chan() == slot || slot == SLOT_TRANS);
300 }
301
302 n->bc.dst_gpr = fdst.sel();
303 n->bc.dst_chan = d ? fdst.chan() : slot < SLOT_TRANS ? slot : 0;
304
305
306 if (d && d->is_rel() && d->rel && !d->rel->is_const()) {
307 n->bc.dst_rel = 1;
308 update_ngpr(d->array->gpr.sel() + d->array->array_size -1);
309 } else {
310 n->bc.dst_rel = 0;
311 }
312
313 n->bc.write_mask = d != NULL;
314 n->bc.last = 0;
315
316 if (n->bc.op_ptr->flags & AF_PRED) {
317 n->bc.update_pred = (n->dst[1] != NULL);
318 n->bc.update_exec_mask = (n->dst[2] != NULL);
319 }
320
321 // FIXME handle predication here
322 n->bc.pred_sel = PRED_SEL_OFF;
323
324 update_ngpr(n->bc.dst_gpr);
325
326 finalize_alu_src(g, n);
327
328 last = n;
329 }
330
331 last->bc.last = 1;
332 }
333
334 void bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a) {
335 vvec &sv = a->src;
336
337 FBC_DUMP(
338 sblog << "finalize_alu_src: ";
339 dump::dump_op(a);
340 sblog << "\n";
341 );
342
343 unsigned si = 0;
344
345 for (vvec::iterator I = sv.begin(), E = sv.end(); I != E; ++I, ++si) {
346 value *v = *I;
347 assert(v);
348
349 bc_alu_src &src = a->bc.src[si];
350 sel_chan sc;
351 src.rel = 0;
352
353 sel_chan gpr;
354
355 switch (v->kind) {
356 case VLK_REL_REG:
357 sc = v->get_final_gpr();
358 src.sel = sc.sel();
359 src.chan = sc.chan();
360 if (!v->rel->is_const()) {
361 src.rel = 1;
362 update_ngpr(v->array->gpr.sel() + v->array->array_size -1);
363 } else
364 src.rel = 0;
365
366 break;
367 case VLK_REG:
368 gpr = v->get_final_gpr();
369 src.sel = gpr.sel();
370 src.chan = gpr.chan();
371 update_ngpr(src.sel);
372 break;
373 case VLK_TEMP:
374 src.sel = v->gpr.sel();
375 src.chan = v->gpr.chan();
376 update_ngpr(src.sel);
377 break;
378 case VLK_UNDEF:
379 case VLK_CONST: {
380 literal lv = v->literal_value;
381 src.chan = 0;
382
383 if (lv == literal(0))
384 src.sel = ALU_SRC_0;
385 else if (lv == literal(0.5f))
386 src.sel = ALU_SRC_0_5;
387 else if (lv == literal(1.0f))
388 src.sel = ALU_SRC_1;
389 else if (lv == literal(1))
390 src.sel = ALU_SRC_1_INT;
391 else if (lv == literal(-1))
392 src.sel = ALU_SRC_M_1_INT;
393 else {
394 src.sel = ALU_SRC_LITERAL;
395 src.chan = g->literal_chan(lv);
396 src.value = lv;
397 }
398 break;
399 }
400 case VLK_KCACHE: {
401 cf_node *clause = static_cast<cf_node*>(g->parent);
402 assert(clause->is_alu_clause());
403 sel_chan k = translate_kcache(clause, v);
404
405 assert(k && "kcache translation failed");
406
407 src.sel = k.sel();
408 src.chan = k.chan();
409 break;
410 }
411 case VLK_PARAM:
412 case VLK_SPECIAL_CONST:
413 src.sel = v->select.sel();
414 src.chan = v->select.chan();
415 break;
416 default:
417 assert(!"unknown value kind");
418 break;
419 }
420 }
421
422 while (si < 3) {
423 a->bc.src[si++].sel = 0;
424 }
425 }
426
427 void bc_finalizer::copy_fetch_src(fetch_node &dst, fetch_node &src, unsigned arg_start)
428 {
429 int reg = -1;
430
431 for (unsigned chan = 0; chan < 4; ++chan) {
432
433 dst.bc.dst_sel[chan] = SEL_MASK;
434
435 unsigned sel = SEL_MASK;
436
437 value *v = src.src[arg_start + chan];
438
439 if (!v || v->is_undef()) {
440 sel = SEL_MASK;
441 } else if (v->is_const()) {
442 literal l = v->literal_value;
443 if (l == literal(0))
444 sel = SEL_0;
445 else if (l == literal(1.0f))
446 sel = SEL_1;
447 else {
448 sblog << "invalid fetch constant operand " << chan << " ";
449 dump::dump_op(&src);
450 sblog << "\n";
451 abort();
452 }
453
454 } else if (v->is_any_gpr()) {
455 unsigned vreg = v->gpr.sel();
456 unsigned vchan = v->gpr.chan();
457
458 if (reg == -1)
459 reg = vreg;
460 else if ((unsigned)reg != vreg) {
461 sblog << "invalid fetch source operand " << chan << " ";
462 dump::dump_op(&src);
463 sblog << "\n";
464 abort();
465 }
466
467 sel = vchan;
468
469 } else {
470 sblog << "invalid fetch source operand " << chan << " ";
471 dump::dump_op(&src);
472 sblog << "\n";
473 abort();
474 }
475
476 dst.bc.src_sel[chan] = sel;
477 }
478
479 if (reg >= 0)
480 update_ngpr(reg);
481
482 dst.bc.src_gpr = reg >= 0 ? reg : 0;
483 }
484
485 void bc_finalizer::emit_set_grad(fetch_node* f) {
486
487 assert(f->src.size() == 12);
488 unsigned ops[2] = { FETCH_OP_SET_GRADIENTS_V, FETCH_OP_SET_GRADIENTS_H };
489
490 unsigned arg_start = 0;
491
492 for (unsigned op = 0; op < 2; ++op) {
493 fetch_node *n = sh.create_fetch();
494 n->bc.set_op(ops[op]);
495
496 arg_start += 4;
497
498 copy_fetch_src(*n, *f, arg_start);
499
500 f->insert_before(n);
501 }
502
503 }
504
505 void bc_finalizer::emit_set_texture_offsets(fetch_node &f) {
506 assert(f.src.size() == 8);
507
508 fetch_node *n = sh.create_fetch();
509
510 n->bc.set_op(FETCH_OP_SET_TEXTURE_OFFSETS);
511
512 copy_fetch_src(*n, f, 4);
513
514 f.insert_before(n);
515 }
516
517 void bc_finalizer::finalize_fetch(fetch_node* f) {
518
519 int reg = -1;
520
521 // src
522
523 unsigned src_count = 4;
524
525 unsigned flags = f->bc.op_ptr->flags;
526
527 if (flags & FF_VTX) {
528 src_count = 1;
529 } else if (flags & FF_USEGRAD) {
530 emit_set_grad(f);
531 } else if (flags & FF_USE_TEXTURE_OFFSETS) {
532 emit_set_texture_offsets(*f);
533 }
534
535 for (unsigned chan = 0; chan < src_count; ++chan) {
536
537 unsigned sel = f->bc.src_sel[chan];
538
539 if (sel > SEL_W)
540 continue;
541
542 value *v = f->src[chan];
543
544 if (v->is_undef()) {
545 sel = SEL_MASK;
546 } else if (v->is_const()) {
547 literal l = v->literal_value;
548 if (l == literal(0))
549 sel = SEL_0;
550 else if (l == literal(1.0f))
551 sel = SEL_1;
552 else {
553 sblog << "invalid fetch constant operand " << chan << " ";
554 dump::dump_op(f);
555 sblog << "\n";
556 abort();
557 }
558
559 } else if (v->is_any_gpr()) {
560 unsigned vreg = v->gpr.sel();
561 unsigned vchan = v->gpr.chan();
562
563 if (reg == -1)
564 reg = vreg;
565 else if ((unsigned)reg != vreg) {
566 sblog << "invalid fetch source operand " << chan << " ";
567 dump::dump_op(f);
568 sblog << "\n";
569 abort();
570 }
571
572 sel = vchan;
573
574 } else {
575 sblog << "invalid fetch source operand " << chan << " ";
576 dump::dump_op(f);
577 sblog << "\n";
578 abort();
579 }
580
581 f->bc.src_sel[chan] = sel;
582 }
583
584 if (reg >= 0)
585 update_ngpr(reg);
586
587 f->bc.src_gpr = reg >= 0 ? reg : 0;
588
589 // dst
590
591 reg = -1;
592
593 unsigned dst_swz[4] = {SEL_MASK, SEL_MASK, SEL_MASK, SEL_MASK};
594
595 for (unsigned chan = 0; chan < 4; ++chan) {
596
597 unsigned sel = f->bc.dst_sel[chan];
598
599 if (sel == SEL_MASK)
600 continue;
601
602 value *v = f->dst[chan];
603 if (!v)
604 continue;
605
606 if (v->is_any_gpr()) {
607 unsigned vreg = v->gpr.sel();
608 unsigned vchan = v->gpr.chan();
609
610 if (reg == -1)
611 reg = vreg;
612 else if ((unsigned)reg != vreg) {
613 sblog << "invalid fetch dst operand " << chan << " ";
614 dump::dump_op(f);
615 sblog << "\n";
616 abort();
617 }
618
619 dst_swz[vchan] = sel;
620
621 } else {
622 sblog << "invalid fetch dst operand " << chan << " ";
623 dump::dump_op(f);
624 sblog << "\n";
625 abort();
626 }
627
628 }
629
630 for (unsigned i = 0; i < 4; ++i)
631 f->bc.dst_sel[i] = dst_swz[i];
632
633 assert(reg >= 0);
634
635 if (reg >= 0)
636 update_ngpr(reg);
637
638 f->bc.dst_gpr = reg >= 0 ? reg : 0;
639 }
640
641 void bc_finalizer::finalize_cf(cf_node* c) {
642
643 unsigned flags = c->bc.op_ptr->flags;
644
645 c->bc.end_of_program = 0;
646 last_cf = c;
647
648 if (flags & CF_EXP) {
649 c->bc.set_op(CF_OP_EXPORT);
650 last_export[c->bc.type] = c;
651
652 int reg = -1;
653
654 for (unsigned chan = 0; chan < 4; ++chan) {
655
656 unsigned sel = c->bc.sel[chan];
657
658 if (sel > SEL_W)
659 continue;
660
661 value *v = c->src[chan];
662
663 if (v->is_undef()) {
664 sel = SEL_MASK;
665 } else if (v->is_const()) {
666 literal l = v->literal_value;
667 if (l == literal(0))
668 sel = SEL_0;
669 else if (l == literal(1.0f))
670 sel = SEL_1;
671 else {
672 sblog << "invalid export constant operand " << chan << " ";
673 dump::dump_op(c);
674 sblog << "\n";
675 abort();
676 }
677
678 } else if (v->is_any_gpr()) {
679 unsigned vreg = v->gpr.sel();
680 unsigned vchan = v->gpr.chan();
681
682 if (reg == -1)
683 reg = vreg;
684 else if ((unsigned)reg != vreg) {
685 sblog << "invalid export source operand " << chan << " ";
686 dump::dump_op(c);
687 sblog << "\n";
688 abort();
689 }
690
691 sel = vchan;
692
693 } else {
694 sblog << "invalid export source operand " << chan << " ";
695 dump::dump_op(c);
696 sblog << "\n";
697 abort();
698 }
699
700 c->bc.sel[chan] = sel;
701 }
702
703 if (reg >= 0)
704 update_ngpr(reg);
705
706 c->bc.rw_gpr = reg >= 0 ? reg : 0;
707
708 } else if (flags & CF_MEM) {
709
710 int reg = -1;
711 unsigned mask = 0;
712
713 for (unsigned chan = 0; chan < 4; ++chan) {
714 value *v = c->src[chan];
715 if (!v || v->is_undef())
716 continue;
717
718 if (!v->is_any_gpr() || v->gpr.chan() != chan) {
719 sblog << "invalid source operand " << chan << " ";
720 dump::dump_op(c);
721 sblog << "\n";
722 abort();
723 }
724 unsigned vreg = v->gpr.sel();
725 if (reg == -1)
726 reg = vreg;
727 else if ((unsigned)reg != vreg) {
728 sblog << "invalid source operand " << chan << " ";
729 dump::dump_op(c);
730 sblog << "\n";
731 abort();
732 }
733
734 mask |= (1 << chan);
735 }
736
737 assert(reg >= 0 && mask);
738
739 if (reg >= 0)
740 update_ngpr(reg);
741
742 c->bc.rw_gpr = reg >= 0 ? reg : 0;
743 c->bc.comp_mask = mask;
744
745 if (((flags & CF_RAT) || (!(flags & CF_STRM))) && (c->bc.type & 1)) {
746
747 reg = -1;
748
749 for (unsigned chan = 0; chan < 4; ++chan) {
750 value *v = c->src[4 + chan];
751 if (!v || v->is_undef())
752 continue;
753
754 if (!v->is_any_gpr() || v->gpr.chan() != chan) {
755 sblog << "invalid source operand " << chan << " ";
756 dump::dump_op(c);
757 sblog << "\n";
758 abort();
759 }
760 unsigned vreg = v->gpr.sel();
761 if (reg == -1)
762 reg = vreg;
763 else if ((unsigned)reg != vreg) {
764 sblog << "invalid source operand " << chan << " ";
765 dump::dump_op(c);
766 sblog << "\n";
767 abort();
768 }
769 }
770
771 assert(reg >= 0);
772
773 if (reg >= 0)
774 update_ngpr(reg);
775
776 c->bc.index_gpr = reg >= 0 ? reg : 0;
777 }
778 } else if (flags & CF_CALL) {
779 update_nstack(c->get_parent_region(), ctx.wavefront_size == 16 ? 2 : 1);
780 }
781 }
782
783 sel_chan bc_finalizer::translate_kcache(cf_node* alu, value* v) {
784 unsigned sel = v->select.sel();
785 unsigned bank = sel >> 12;
786 unsigned chan = v->select.chan();
787 static const unsigned kc_base[] = {128, 160, 256, 288};
788
789 sel &= 4095;
790
791 unsigned line = sel >> 4;
792
793 for (unsigned k = 0; k < 4; ++k) {
794 bc_kcache &kc = alu->bc.kc[k];
795
796 if (kc.mode == KC_LOCK_NONE)
797 break;
798
799 if (kc.bank == bank && (kc.addr == line ||
800 (kc.mode == KC_LOCK_2 && kc.addr + 1 == line))) {
801
802 sel = kc_base[k] + (sel - (kc.addr << 4));
803
804 return sel_chan(sel, chan);
805 }
806 }
807
808 assert(!"kcache translation error");
809 return 0;
810 }
811
812 void bc_finalizer::update_ngpr(unsigned gpr) {
813 if (gpr < MAX_GPR - ctx.alu_temp_gprs && gpr >= ngpr)
814 ngpr = gpr + 1;
815 }
816
817 unsigned bc_finalizer::get_stack_depth(node *n, unsigned &loops,
818 unsigned &ifs, unsigned add) {
819 unsigned stack_elements = add;
820 bool has_non_wqm_push = (add != 0);
821 region_node *r = n->is_region() ?
822 static_cast<region_node*>(n) : n->get_parent_region();
823
824 loops = 0;
825 ifs = 0;
826
827 while (r) {
828 if (r->is_loop()) {
829 ++loops;
830 } else {
831 ++ifs;
832 has_non_wqm_push = true;
833 }
834 r = r->get_parent_region();
835 }
836 stack_elements += (loops * ctx.stack_entry_size) + ifs;
837
838 // reserve additional elements in some cases
839 switch (ctx.hw_class) {
840 case HW_CLASS_R600:
841 case HW_CLASS_R700:
842 // If any non-WQM push is invoked, 2 elements should be reserved.
843 if (has_non_wqm_push)
844 stack_elements += 2;
845 break;
846 case HW_CLASS_CAYMAN:
847 // If any stack operation is invoked, 2 elements should be reserved
848 if (stack_elements)
849 stack_elements += 2;
850 break;
851 case HW_CLASS_EVERGREEN:
852 // According to the docs we need to reserve 1 element for each of the
853 // following cases:
854 // 1) non-WQM push is used with WQM/LOOP frames on stack
855 // 2) ALU_ELSE_AFTER is used at the point of max stack usage
856 // NOTE:
857 // It was found that the conditions above are not sufficient, there are
858 // other cases where we also need to reserve stack space, that's why
859 // we always reserve 1 stack element if we have non-WQM push on stack.
860 // Condition 2 is ignored for now because we don't use this instruction.
861 if (has_non_wqm_push)
862 ++stack_elements;
863 break;
864 case HW_CLASS_UNKNOWN:
865 assert(0);
866 }
867 return stack_elements;
868 }
869
870 void bc_finalizer::update_nstack(region_node* r, unsigned add) {
871 unsigned loops = 0;
872 unsigned ifs = 0;
873 unsigned elems = r ? get_stack_depth(r, loops, ifs, add) : add;
874
875 // XXX all chips expect this value to be computed using 4 as entry size,
876 // not the real entry size
877 unsigned stack_entries = (elems + 3) >> 2;
878
879 if (nstack < stack_entries)
880 nstack = stack_entries;
881 }
882
883 void bc_finalizer::cf_peephole() {
884 if (ctx.stack_workaround_8xx || ctx.stack_workaround_9xx) {
885 for (node_iterator N, I = sh.root->begin(), E = sh.root->end(); I != E;
886 I = N) {
887 N = I; ++N;
888 cf_node *c = static_cast<cf_node*>(*I);
889
890 if (c->bc.op == CF_OP_ALU_PUSH_BEFORE &&
891 (c->flags & NF_ALU_STACK_WORKAROUND)) {
892 cf_node *push = sh.create_cf(CF_OP_PUSH);
893 c->insert_before(push);
894 push->jump(c);
895 c->bc.set_op(CF_OP_ALU);
896 }
897 }
898 }
899
900 for (node_iterator N, I = sh.root->begin(), E = sh.root->end(); I != E;
901 I = N) {
902 N = I; ++N;
903
904 cf_node *c = static_cast<cf_node*>(*I);
905
906 if (c->jump_after_target) {
907 c->jump_target = static_cast<cf_node*>(c->jump_target->next);
908 c->jump_after_target = false;
909 }
910
911 if (c->is_cf_op(CF_OP_POP)) {
912 node *p = c->prev;
913 if (p->is_alu_clause()) {
914 cf_node *a = static_cast<cf_node*>(p);
915
916 if (a->bc.op == CF_OP_ALU) {
917 a->bc.set_op(CF_OP_ALU_POP_AFTER);
918 c->remove();
919 }
920 }
921 } else if (c->is_cf_op(CF_OP_JUMP) && c->jump_target == c->next) {
922 // if JUMP is immediately followed by its jump target,
923 // then JUMP is useless and we can eliminate it
924 c->remove();
925 }
926 }
927 }
928
929 } // namespace r600_sb