Revert "r600g/sb: fix issues cause by GLSL switching to loops for switch"
[mesa.git] / src / gallium / drivers / r600 / sb / sb_bc_finalize.cpp
1 /*
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Vadim Girlin
25 */
26
27 #define FBC_DEBUG 0
28
29 #if FBC_DEBUG
30 #define FBC_DUMP(q) do { q } while (0)
31 #else
32 #define FBC_DUMP(q)
33 #endif
34
35 #include "sb_bc.h"
36 #include "sb_shader.h"
37 #include "sb_pass.h"
38
39 namespace r600_sb {
40
41 int bc_finalizer::run() {
42
43 run_on(sh.root);
44
45 regions_vec &rv = sh.get_regions();
46 for (regions_vec::reverse_iterator I = rv.rbegin(), E = rv.rend(); I != E;
47 ++I) {
48 region_node *r = *I;
49
50 assert(r);
51
52 bool loop = r->is_loop();
53
54 if (loop)
55 finalize_loop(r);
56 else
57 finalize_if(r);
58
59 r->expand();
60 }
61
62 cf_peephole();
63
64 // workaround for some problems on r6xx/7xx
65 // add ALU NOP to each vertex shader
66 if (!ctx.is_egcm() && (sh.target == TARGET_VS || sh.target == TARGET_ES)) {
67 cf_node *c = sh.create_clause(NST_ALU_CLAUSE);
68
69 alu_group_node *g = sh.create_alu_group();
70
71 alu_node *a = sh.create_alu();
72 a->bc.set_op(ALU_OP0_NOP);
73 a->bc.last = 1;
74
75 g->push_back(a);
76 c->push_back(g);
77
78 sh.root->push_back(c);
79
80 c = sh.create_cf(CF_OP_NOP);
81 sh.root->push_back(c);
82
83 last_cf = c;
84 }
85
86 if (!ctx.is_cayman() && last_cf->bc.op_ptr->flags & CF_ALU) {
87 last_cf = sh.create_cf(CF_OP_NOP);
88 sh.root->push_back(last_cf);
89 }
90
91 if (ctx.is_cayman()) {
92 if (!last_cf) {
93 cf_node *c = sh.create_cf(CF_OP_CF_END);
94 sh.root->push_back(c);
95 } else
96 last_cf->insert_after(sh.create_cf(CF_OP_CF_END));
97 } else
98 last_cf->bc.end_of_program = 1;
99
100 for (unsigned t = EXP_PIXEL; t < EXP_TYPE_COUNT; ++t) {
101 cf_node *le = last_export[t];
102 if (le)
103 le->bc.set_op(CF_OP_EXPORT_DONE);
104 }
105
106 sh.ngpr = ngpr;
107 sh.nstack = nstack;
108 return 0;
109 }
110
111 void bc_finalizer::finalize_loop(region_node* r) {
112
113 cf_node *loop_start = sh.create_cf(CF_OP_LOOP_START_DX10);
114 cf_node *loop_end = sh.create_cf(CF_OP_LOOP_END);
115
116 loop_start->jump_after(loop_end);
117 loop_end->jump_after(loop_start);
118
119 for (depart_vec::iterator I = r->departs.begin(), E = r->departs.end();
120 I != E; ++I) {
121 depart_node *dep = *I;
122 cf_node *loop_break = sh.create_cf(CF_OP_LOOP_BREAK);
123 loop_break->jump(loop_end);
124 dep->push_back(loop_break);
125 dep->expand();
126 }
127
128 // FIXME produces unnecessary LOOP_CONTINUE
129 for (repeat_vec::iterator I = r->repeats.begin(), E = r->repeats.end();
130 I != E; ++I) {
131 repeat_node *rep = *I;
132 if (!(rep->parent == r && rep->prev == NULL)) {
133 cf_node *loop_cont = sh.create_cf(CF_OP_LOOP_CONTINUE);
134 loop_cont->jump(loop_end);
135 rep->push_back(loop_cont);
136 }
137 rep->expand();
138 }
139
140 r->push_front(loop_start);
141 r->push_back(loop_end);
142 }
143
144 void bc_finalizer::finalize_if(region_node* r) {
145
146 update_nstack(r);
147
148 // expecting the following control flow structure here:
149 // - region
150 // {
151 // - depart/repeat 1 (it may be depart/repeat for some outer region)
152 // {
153 // - if
154 // {
155 // - depart/repeat 2 (possibly for outer region)
156 // {
157 // - some optional code
158 // }
159 // }
160 // - optional <else> code> ...
161 // }
162 // }
163
164 container_node *repdep1 = static_cast<container_node*>(r->first);
165 assert(repdep1->is_depart() || repdep1->is_repeat());
166
167 if_node *n_if = static_cast<if_node*>(repdep1->first);
168
169 if (n_if) {
170
171
172 assert(n_if->is_if());
173
174 container_node *repdep2 = static_cast<container_node*>(n_if->first);
175 assert(repdep2->is_depart() || repdep2->is_repeat());
176
177 cf_node *if_jump = sh.create_cf(CF_OP_JUMP);
178 cf_node *if_pop = sh.create_cf(CF_OP_POP);
179
180 if_pop->bc.pop_count = 1;
181 if_pop->jump_after(if_pop);
182
183 r->push_front(if_jump);
184 r->push_back(if_pop);
185
186 bool has_else = n_if->next;
187
188 if (has_else) {
189 cf_node *nelse = sh.create_cf(CF_OP_ELSE);
190 n_if->insert_after(nelse);
191 if_jump->jump(nelse);
192 nelse->jump_after(if_pop);
193 nelse->bc.pop_count = 1;
194
195 } else {
196 if_jump->jump_after(if_pop);
197 if_jump->bc.pop_count = 1;
198 }
199
200 n_if->expand();
201 }
202
203 for (depart_vec::iterator I = r->departs.begin(), E = r->departs.end();
204 I != E; ++I) {
205 (*I)->expand();
206 }
207 r->departs.clear();
208 assert(r->repeats.empty());
209 }
210
211 void bc_finalizer::run_on(container_node* c) {
212
213 for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
214 node *n = *I;
215
216 if (n->is_alu_group()) {
217 finalize_alu_group(static_cast<alu_group_node*>(n));
218 } else {
219 if (n->is_alu_clause()) {
220 cf_node *c = static_cast<cf_node*>(n);
221
222 if (c->bc.op == CF_OP_ALU_PUSH_BEFORE && ctx.is_egcm()) {
223 if (ctx.stack_workaround_8xx) {
224 region_node *r = c->get_parent_region();
225 if (r) {
226 unsigned ifs, loops;
227 unsigned elems = get_stack_depth(r, loops, ifs);
228 unsigned dmod1 = elems % ctx.stack_entry_size;
229 unsigned dmod2 = (elems + 1) % ctx.stack_entry_size;
230
231 if (elems && (!dmod1 || !dmod2))
232 c->flags |= NF_ALU_STACK_WORKAROUND;
233 }
234 } else if (ctx.stack_workaround_9xx) {
235 region_node *r = c->get_parent_region();
236 if (r) {
237 unsigned ifs, loops;
238 get_stack_depth(r, loops, ifs);
239 if (loops >= 2)
240 c->flags |= NF_ALU_STACK_WORKAROUND;
241 }
242 }
243 }
244 } else if (n->is_fetch_inst()) {
245 finalize_fetch(static_cast<fetch_node*>(n));
246 } else if (n->is_cf_inst()) {
247 finalize_cf(static_cast<cf_node*>(n));
248 }
249 if (n->is_container())
250 run_on(static_cast<container_node*>(n));
251 }
252 }
253 }
254
255 void bc_finalizer::finalize_alu_group(alu_group_node* g) {
256
257 alu_node *last = NULL;
258
259 for (node_iterator I = g->begin(), E = g->end(); I != E; ++I) {
260 alu_node *n = static_cast<alu_node*>(*I);
261 unsigned slot = n->bc.slot;
262
263 value *d = n->dst.empty() ? NULL : n->dst[0];
264
265 if (d && d->is_special_reg()) {
266 assert(n->bc.op_ptr->flags & AF_MOVA);
267 d = NULL;
268 }
269
270 sel_chan fdst = d ? d->get_final_gpr() : sel_chan(0, 0);
271
272 if (d) {
273 assert(fdst.chan() == slot || slot == SLOT_TRANS);
274 }
275
276 n->bc.dst_gpr = fdst.sel();
277 n->bc.dst_chan = d ? fdst.chan() : slot < SLOT_TRANS ? slot : 0;
278
279
280 if (d && d->is_rel() && d->rel && !d->rel->is_const()) {
281 n->bc.dst_rel = 1;
282 update_ngpr(d->array->gpr.sel() + d->array->array_size -1);
283 } else {
284 n->bc.dst_rel = 0;
285 }
286
287 n->bc.write_mask = d != NULL;
288 n->bc.last = 0;
289
290 if (n->bc.op_ptr->flags & AF_PRED) {
291 n->bc.update_pred = (n->dst[1] != NULL);
292 n->bc.update_exec_mask = (n->dst[2] != NULL);
293 }
294
295 // FIXME handle predication here
296 n->bc.pred_sel = PRED_SEL_OFF;
297
298 update_ngpr(n->bc.dst_gpr);
299
300 finalize_alu_src(g, n);
301
302 last = n;
303 }
304
305 last->bc.last = 1;
306 }
307
308 void bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a) {
309 vvec &sv = a->src;
310
311 FBC_DUMP(
312 sblog << "finalize_alu_src: ";
313 dump::dump_op(a);
314 sblog << "\n";
315 );
316
317 unsigned si = 0;
318
319 for (vvec::iterator I = sv.begin(), E = sv.end(); I != E; ++I, ++si) {
320 value *v = *I;
321 assert(v);
322
323 bc_alu_src &src = a->bc.src[si];
324 sel_chan sc;
325 src.rel = 0;
326
327 sel_chan gpr;
328
329 switch (v->kind) {
330 case VLK_REL_REG:
331 sc = v->get_final_gpr();
332 src.sel = sc.sel();
333 src.chan = sc.chan();
334 if (!v->rel->is_const()) {
335 src.rel = 1;
336 update_ngpr(v->array->gpr.sel() + v->array->array_size -1);
337 } else
338 src.rel = 0;
339
340 break;
341 case VLK_REG:
342 gpr = v->get_final_gpr();
343 src.sel = gpr.sel();
344 src.chan = gpr.chan();
345 update_ngpr(src.sel);
346 break;
347 case VLK_TEMP:
348 src.sel = v->gpr.sel();
349 src.chan = v->gpr.chan();
350 update_ngpr(src.sel);
351 break;
352 case VLK_UNDEF:
353 case VLK_CONST: {
354 literal lv = v->literal_value;
355 src.chan = 0;
356
357 if (lv == literal(0))
358 src.sel = ALU_SRC_0;
359 else if (lv == literal(0.5f))
360 src.sel = ALU_SRC_0_5;
361 else if (lv == literal(1.0f))
362 src.sel = ALU_SRC_1;
363 else if (lv == literal(1))
364 src.sel = ALU_SRC_1_INT;
365 else if (lv == literal(-1))
366 src.sel = ALU_SRC_M_1_INT;
367 else {
368 src.sel = ALU_SRC_LITERAL;
369 src.chan = g->literal_chan(lv);
370 src.value = lv;
371 }
372 break;
373 }
374 case VLK_KCACHE: {
375 cf_node *clause = static_cast<cf_node*>(g->parent);
376 assert(clause->is_alu_clause());
377 sel_chan k = translate_kcache(clause, v);
378
379 assert(k && "kcache translation failed");
380
381 src.sel = k.sel();
382 src.chan = k.chan();
383 break;
384 }
385 case VLK_PARAM:
386 case VLK_SPECIAL_CONST:
387 src.sel = v->select.sel();
388 src.chan = v->select.chan();
389 break;
390 default:
391 assert(!"unknown value kind");
392 break;
393 }
394 }
395
396 while (si < 3) {
397 a->bc.src[si++].sel = 0;
398 }
399 }
400
401 void bc_finalizer::copy_fetch_src(fetch_node &dst, fetch_node &src, unsigned arg_start)
402 {
403 int reg = -1;
404
405 for (unsigned chan = 0; chan < 4; ++chan) {
406
407 dst.bc.dst_sel[chan] = SEL_MASK;
408
409 unsigned sel = SEL_MASK;
410
411 value *v = src.src[arg_start + chan];
412
413 if (!v || v->is_undef()) {
414 sel = SEL_MASK;
415 } else if (v->is_const()) {
416 literal l = v->literal_value;
417 if (l == literal(0))
418 sel = SEL_0;
419 else if (l == literal(1.0f))
420 sel = SEL_1;
421 else {
422 sblog << "invalid fetch constant operand " << chan << " ";
423 dump::dump_op(&src);
424 sblog << "\n";
425 abort();
426 }
427
428 } else if (v->is_any_gpr()) {
429 unsigned vreg = v->gpr.sel();
430 unsigned vchan = v->gpr.chan();
431
432 if (reg == -1)
433 reg = vreg;
434 else if ((unsigned)reg != vreg) {
435 sblog << "invalid fetch source operand " << chan << " ";
436 dump::dump_op(&src);
437 sblog << "\n";
438 abort();
439 }
440
441 sel = vchan;
442
443 } else {
444 sblog << "invalid fetch source operand " << chan << " ";
445 dump::dump_op(&src);
446 sblog << "\n";
447 abort();
448 }
449
450 dst.bc.src_sel[chan] = sel;
451 }
452
453 if (reg >= 0)
454 update_ngpr(reg);
455
456 dst.bc.src_gpr = reg >= 0 ? reg : 0;
457 }
458
459 void bc_finalizer::emit_set_grad(fetch_node* f) {
460
461 assert(f->src.size() == 12);
462 unsigned ops[2] = { FETCH_OP_SET_GRADIENTS_V, FETCH_OP_SET_GRADIENTS_H };
463
464 unsigned arg_start = 0;
465
466 for (unsigned op = 0; op < 2; ++op) {
467 fetch_node *n = sh.create_fetch();
468 n->bc.set_op(ops[op]);
469
470 arg_start += 4;
471
472 copy_fetch_src(*n, *f, arg_start);
473
474 f->insert_before(n);
475 }
476
477 }
478
479 void bc_finalizer::emit_set_texture_offsets(fetch_node &f) {
480 assert(f.src.size() == 8);
481
482 fetch_node *n = sh.create_fetch();
483
484 n->bc.set_op(FETCH_OP_SET_TEXTURE_OFFSETS);
485
486 copy_fetch_src(*n, f, 4);
487
488 f.insert_before(n);
489 }
490
491 void bc_finalizer::finalize_fetch(fetch_node* f) {
492
493 int reg = -1;
494
495 // src
496
497 unsigned src_count = 4;
498
499 unsigned flags = f->bc.op_ptr->flags;
500
501 if (flags & FF_VTX) {
502 src_count = 1;
503 } else if (flags & FF_USEGRAD) {
504 emit_set_grad(f);
505 } else if (flags & FF_USE_TEXTURE_OFFSETS) {
506 emit_set_texture_offsets(*f);
507 }
508
509 for (unsigned chan = 0; chan < src_count; ++chan) {
510
511 unsigned sel = f->bc.src_sel[chan];
512
513 if (sel > SEL_W)
514 continue;
515
516 value *v = f->src[chan];
517
518 if (v->is_undef()) {
519 sel = SEL_MASK;
520 } else if (v->is_const()) {
521 literal l = v->literal_value;
522 if (l == literal(0))
523 sel = SEL_0;
524 else if (l == literal(1.0f))
525 sel = SEL_1;
526 else {
527 sblog << "invalid fetch constant operand " << chan << " ";
528 dump::dump_op(f);
529 sblog << "\n";
530 abort();
531 }
532
533 } else if (v->is_any_gpr()) {
534 unsigned vreg = v->gpr.sel();
535 unsigned vchan = v->gpr.chan();
536
537 if (reg == -1)
538 reg = vreg;
539 else if ((unsigned)reg != vreg) {
540 sblog << "invalid fetch source operand " << chan << " ";
541 dump::dump_op(f);
542 sblog << "\n";
543 abort();
544 }
545
546 sel = vchan;
547
548 } else {
549 sblog << "invalid fetch source operand " << chan << " ";
550 dump::dump_op(f);
551 sblog << "\n";
552 abort();
553 }
554
555 f->bc.src_sel[chan] = sel;
556 }
557
558 if (reg >= 0)
559 update_ngpr(reg);
560
561 f->bc.src_gpr = reg >= 0 ? reg : 0;
562
563 // dst
564
565 reg = -1;
566
567 unsigned dst_swz[4] = {SEL_MASK, SEL_MASK, SEL_MASK, SEL_MASK};
568
569 for (unsigned chan = 0; chan < 4; ++chan) {
570
571 unsigned sel = f->bc.dst_sel[chan];
572
573 if (sel == SEL_MASK)
574 continue;
575
576 value *v = f->dst[chan];
577 if (!v)
578 continue;
579
580 if (v->is_any_gpr()) {
581 unsigned vreg = v->gpr.sel();
582 unsigned vchan = v->gpr.chan();
583
584 if (reg == -1)
585 reg = vreg;
586 else if ((unsigned)reg != vreg) {
587 sblog << "invalid fetch dst operand " << chan << " ";
588 dump::dump_op(f);
589 sblog << "\n";
590 abort();
591 }
592
593 dst_swz[vchan] = sel;
594
595 } else {
596 sblog << "invalid fetch dst operand " << chan << " ";
597 dump::dump_op(f);
598 sblog << "\n";
599 abort();
600 }
601
602 }
603
604 for (unsigned i = 0; i < 4; ++i)
605 f->bc.dst_sel[i] = dst_swz[i];
606
607 assert(reg >= 0);
608
609 if (reg >= 0)
610 update_ngpr(reg);
611
612 f->bc.dst_gpr = reg >= 0 ? reg : 0;
613 }
614
615 void bc_finalizer::finalize_cf(cf_node* c) {
616
617 unsigned flags = c->bc.op_ptr->flags;
618
619 c->bc.end_of_program = 0;
620 last_cf = c;
621
622 if (flags & CF_EXP) {
623 c->bc.set_op(CF_OP_EXPORT);
624 last_export[c->bc.type] = c;
625
626 int reg = -1;
627
628 for (unsigned chan = 0; chan < 4; ++chan) {
629
630 unsigned sel = c->bc.sel[chan];
631
632 if (sel > SEL_W)
633 continue;
634
635 value *v = c->src[chan];
636
637 if (v->is_undef()) {
638 sel = SEL_MASK;
639 } else if (v->is_const()) {
640 literal l = v->literal_value;
641 if (l == literal(0))
642 sel = SEL_0;
643 else if (l == literal(1.0f))
644 sel = SEL_1;
645 else {
646 sblog << "invalid export constant operand " << chan << " ";
647 dump::dump_op(c);
648 sblog << "\n";
649 abort();
650 }
651
652 } else if (v->is_any_gpr()) {
653 unsigned vreg = v->gpr.sel();
654 unsigned vchan = v->gpr.chan();
655
656 if (reg == -1)
657 reg = vreg;
658 else if ((unsigned)reg != vreg) {
659 sblog << "invalid export source operand " << chan << " ";
660 dump::dump_op(c);
661 sblog << "\n";
662 abort();
663 }
664
665 sel = vchan;
666
667 } else {
668 sblog << "invalid export source operand " << chan << " ";
669 dump::dump_op(c);
670 sblog << "\n";
671 abort();
672 }
673
674 c->bc.sel[chan] = sel;
675 }
676
677 if (reg >= 0)
678 update_ngpr(reg);
679
680 c->bc.rw_gpr = reg >= 0 ? reg : 0;
681
682 } else if (flags & CF_MEM) {
683
684 int reg = -1;
685 unsigned mask = 0;
686
687 for (unsigned chan = 0; chan < 4; ++chan) {
688 value *v = c->src[chan];
689 if (!v || v->is_undef())
690 continue;
691
692 if (!v->is_any_gpr() || v->gpr.chan() != chan) {
693 sblog << "invalid source operand " << chan << " ";
694 dump::dump_op(c);
695 sblog << "\n";
696 abort();
697 }
698 unsigned vreg = v->gpr.sel();
699 if (reg == -1)
700 reg = vreg;
701 else if ((unsigned)reg != vreg) {
702 sblog << "invalid source operand " << chan << " ";
703 dump::dump_op(c);
704 sblog << "\n";
705 abort();
706 }
707
708 mask |= (1 << chan);
709 }
710
711 assert(reg >= 0 && mask);
712
713 if (reg >= 0)
714 update_ngpr(reg);
715
716 c->bc.rw_gpr = reg >= 0 ? reg : 0;
717 c->bc.comp_mask = mask;
718
719 if (((flags & CF_RAT) || (!(flags & CF_STRM))) && (c->bc.type & 1)) {
720
721 reg = -1;
722
723 for (unsigned chan = 0; chan < 4; ++chan) {
724 value *v = c->src[4 + chan];
725 if (!v || v->is_undef())
726 continue;
727
728 if (!v->is_any_gpr() || v->gpr.chan() != chan) {
729 sblog << "invalid source operand " << chan << " ";
730 dump::dump_op(c);
731 sblog << "\n";
732 abort();
733 }
734 unsigned vreg = v->gpr.sel();
735 if (reg == -1)
736 reg = vreg;
737 else if ((unsigned)reg != vreg) {
738 sblog << "invalid source operand " << chan << " ";
739 dump::dump_op(c);
740 sblog << "\n";
741 abort();
742 }
743 }
744
745 assert(reg >= 0);
746
747 if (reg >= 0)
748 update_ngpr(reg);
749
750 c->bc.index_gpr = reg >= 0 ? reg : 0;
751 }
752 } else if (flags & CF_CALL) {
753 update_nstack(c->get_parent_region(), ctx.wavefront_size == 16 ? 2 : 1);
754 }
755 }
756
757 sel_chan bc_finalizer::translate_kcache(cf_node* alu, value* v) {
758 unsigned sel = v->select.sel();
759 unsigned bank = sel >> 12;
760 unsigned chan = v->select.chan();
761 static const unsigned kc_base[] = {128, 160, 256, 288};
762
763 sel &= 4095;
764
765 unsigned line = sel >> 4;
766
767 for (unsigned k = 0; k < 4; ++k) {
768 bc_kcache &kc = alu->bc.kc[k];
769
770 if (kc.mode == KC_LOCK_NONE)
771 break;
772
773 if (kc.bank == bank && (kc.addr == line ||
774 (kc.mode == KC_LOCK_2 && kc.addr + 1 == line))) {
775
776 sel = kc_base[k] + (sel - (kc.addr << 4));
777
778 return sel_chan(sel, chan);
779 }
780 }
781
782 assert(!"kcache translation error");
783 return 0;
784 }
785
786 void bc_finalizer::update_ngpr(unsigned gpr) {
787 if (gpr < MAX_GPR - ctx.alu_temp_gprs && gpr >= ngpr)
788 ngpr = gpr + 1;
789 }
790
791 unsigned bc_finalizer::get_stack_depth(node *n, unsigned &loops,
792 unsigned &ifs, unsigned add) {
793 unsigned stack_elements = add;
794 bool has_non_wqm_push = (add != 0);
795 region_node *r = n->is_region() ?
796 static_cast<region_node*>(n) : n->get_parent_region();
797
798 loops = 0;
799 ifs = 0;
800
801 while (r) {
802 if (r->is_loop()) {
803 ++loops;
804 } else {
805 ++ifs;
806 has_non_wqm_push = true;
807 }
808 r = r->get_parent_region();
809 }
810 stack_elements += (loops * ctx.stack_entry_size) + ifs;
811
812 // reserve additional elements in some cases
813 switch (ctx.hw_class) {
814 case HW_CLASS_R600:
815 case HW_CLASS_R700:
816 // If any non-WQM push is invoked, 2 elements should be reserved.
817 if (has_non_wqm_push)
818 stack_elements += 2;
819 break;
820 case HW_CLASS_CAYMAN:
821 // If any stack operation is invoked, 2 elements should be reserved
822 if (stack_elements)
823 stack_elements += 2;
824 break;
825 case HW_CLASS_EVERGREEN:
826 // According to the docs we need to reserve 1 element for each of the
827 // following cases:
828 // 1) non-WQM push is used with WQM/LOOP frames on stack
829 // 2) ALU_ELSE_AFTER is used at the point of max stack usage
830 // NOTE:
831 // It was found that the conditions above are not sufficient, there are
832 // other cases where we also need to reserve stack space, that's why
833 // we always reserve 1 stack element if we have non-WQM push on stack.
834 // Condition 2 is ignored for now because we don't use this instruction.
835 if (has_non_wqm_push)
836 ++stack_elements;
837 break;
838 case HW_CLASS_UNKNOWN:
839 assert(0);
840 }
841 return stack_elements;
842 }
843
844 void bc_finalizer::update_nstack(region_node* r, unsigned add) {
845 unsigned loops = 0;
846 unsigned ifs = 0;
847 unsigned elems = r ? get_stack_depth(r, loops, ifs, add) : add;
848
849 // XXX all chips expect this value to be computed using 4 as entry size,
850 // not the real entry size
851 unsigned stack_entries = (elems + 3) >> 2;
852
853 if (nstack < stack_entries)
854 nstack = stack_entries;
855 }
856
857 void bc_finalizer::cf_peephole() {
858 if (ctx.stack_workaround_8xx || ctx.stack_workaround_9xx) {
859 for (node_iterator N, I = sh.root->begin(), E = sh.root->end(); I != E;
860 I = N) {
861 N = I; ++N;
862 cf_node *c = static_cast<cf_node*>(*I);
863
864 if (c->bc.op == CF_OP_ALU_PUSH_BEFORE &&
865 (c->flags & NF_ALU_STACK_WORKAROUND)) {
866 cf_node *push = sh.create_cf(CF_OP_PUSH);
867 c->insert_before(push);
868 push->jump(c);
869 c->bc.set_op(CF_OP_ALU);
870 }
871 }
872 }
873
874 for (node_iterator N, I = sh.root->begin(), E = sh.root->end(); I != E;
875 I = N) {
876 N = I; ++N;
877
878 cf_node *c = static_cast<cf_node*>(*I);
879
880 if (c->jump_after_target) {
881 c->jump_target = static_cast<cf_node*>(c->jump_target->next);
882 c->jump_after_target = false;
883 }
884
885 if (c->is_cf_op(CF_OP_POP)) {
886 node *p = c->prev;
887 if (p->is_alu_clause()) {
888 cf_node *a = static_cast<cf_node*>(p);
889
890 if (a->bc.op == CF_OP_ALU) {
891 a->bc.set_op(CF_OP_ALU_POP_AFTER);
892 c->remove();
893 }
894 }
895 } else if (c->is_cf_op(CF_OP_JUMP) && c->jump_target == c->next) {
896 // if JUMP is immediately followed by its jump target,
897 // then JUMP is useless and we can eliminate it
898 c->remove();
899 }
900 }
901 }
902
903 } // namespace r600_sb