r600g/sb: fix issues with loops created for switch
[mesa.git] / src / gallium / drivers / r600 / sb / sb_bc_finalize.cpp
1 /*
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Vadim Girlin
25 */
26
27 #define FBC_DEBUG 0
28
29 #if FBC_DEBUG
30 #define FBC_DUMP(q) do { q } while (0)
31 #else
32 #define FBC_DUMP(q)
33 #endif
34
35 #include "sb_bc.h"
36 #include "sb_shader.h"
37 #include "sb_pass.h"
38
39 namespace r600_sb {
40
41 int bc_finalizer::run() {
42
43 run_on(sh.root);
44
45 regions_vec &rv = sh.get_regions();
46 for (regions_vec::reverse_iterator I = rv.rbegin(), E = rv.rend(); I != E;
47 ++I) {
48 region_node *r = *I;
49
50 assert(r);
51
52 bool loop = r->is_loop();
53
54 if (loop)
55 finalize_loop(r);
56 else
57 finalize_if(r);
58
59 r->expand();
60 }
61
62 cf_peephole();
63
64 // workaround for some problems on r6xx/7xx
65 // add ALU NOP to each vertex shader
66 if (!ctx.is_egcm() && (sh.target == TARGET_VS || sh.target == TARGET_ES)) {
67 cf_node *c = sh.create_clause(NST_ALU_CLAUSE);
68
69 alu_group_node *g = sh.create_alu_group();
70
71 alu_node *a = sh.create_alu();
72 a->bc.set_op(ALU_OP0_NOP);
73 a->bc.last = 1;
74
75 g->push_back(a);
76 c->push_back(g);
77
78 sh.root->push_back(c);
79
80 c = sh.create_cf(CF_OP_NOP);
81 sh.root->push_back(c);
82
83 last_cf = c;
84 }
85
86 if (!ctx.is_cayman() && last_cf->bc.op_ptr->flags & CF_ALU) {
87 last_cf = sh.create_cf(CF_OP_NOP);
88 sh.root->push_back(last_cf);
89 }
90
91 if (ctx.is_cayman()) {
92 if (!last_cf) {
93 cf_node *c = sh.create_cf(CF_OP_CF_END);
94 sh.root->push_back(c);
95 } else
96 last_cf->insert_after(sh.create_cf(CF_OP_CF_END));
97 } else
98 last_cf->bc.end_of_program = 1;
99
100 for (unsigned t = EXP_PIXEL; t < EXP_TYPE_COUNT; ++t) {
101 cf_node *le = last_export[t];
102 if (le)
103 le->bc.set_op(CF_OP_EXPORT_DONE);
104 }
105
106 sh.ngpr = ngpr;
107 sh.nstack = nstack;
108 return 0;
109 }
110
111 void bc_finalizer::finalize_loop(region_node* r) {
112
113 update_nstack(r);
114
115 cf_node *loop_start = sh.create_cf(CF_OP_LOOP_START_DX10);
116 cf_node *loop_end = sh.create_cf(CF_OP_LOOP_END);
117
118 loop_start->jump_after(loop_end);
119 loop_end->jump_after(loop_start);
120
121 for (depart_vec::iterator I = r->departs.begin(), E = r->departs.end();
122 I != E; ++I) {
123 depart_node *dep = *I;
124 cf_node *loop_break = sh.create_cf(CF_OP_LOOP_BREAK);
125 loop_break->jump(loop_end);
126 dep->push_back(loop_break);
127 dep->expand();
128 }
129
130 // FIXME produces unnecessary LOOP_CONTINUE
131 for (repeat_vec::iterator I = r->repeats.begin(), E = r->repeats.end();
132 I != E; ++I) {
133 repeat_node *rep = *I;
134 if (!(rep->parent == r && rep->prev == NULL)) {
135 cf_node *loop_cont = sh.create_cf(CF_OP_LOOP_CONTINUE);
136 loop_cont->jump(loop_end);
137 rep->push_back(loop_cont);
138 }
139 rep->expand();
140 }
141
142 r->push_front(loop_start);
143 r->push_back(loop_end);
144 }
145
146 void bc_finalizer::finalize_if(region_node* r) {
147
148 update_nstack(r);
149
150 // expecting the following control flow structure here:
151 // - region
152 // {
153 // - depart/repeat 1 (it may be depart/repeat for some outer region)
154 // {
155 // - if
156 // {
157 // - depart/repeat 2 (possibly for outer region)
158 // {
159 // - some optional code
160 // }
161 // }
162 // - optional <else> code> ...
163 // }
164 // }
165
166 container_node *repdep1 = static_cast<container_node*>(r->first);
167 assert(repdep1->is_depart() || repdep1->is_repeat());
168
169 if_node *n_if = static_cast<if_node*>(repdep1->first);
170
171 if (n_if) {
172
173
174 assert(n_if->is_if());
175
176 container_node *repdep2 = static_cast<container_node*>(n_if->first);
177 assert(repdep2->is_depart() || repdep2->is_repeat());
178
179 cf_node *if_jump = sh.create_cf(CF_OP_JUMP);
180 cf_node *if_pop = sh.create_cf(CF_OP_POP);
181
182 if_pop->bc.pop_count = 1;
183 if_pop->jump_after(if_pop);
184
185 r->push_front(if_jump);
186 r->push_back(if_pop);
187
188 bool has_else = n_if->next;
189
190 if (has_else) {
191 cf_node *nelse = sh.create_cf(CF_OP_ELSE);
192 n_if->insert_after(nelse);
193 if_jump->jump(nelse);
194 nelse->jump_after(if_pop);
195 nelse->bc.pop_count = 1;
196
197 } else {
198 if_jump->jump_after(if_pop);
199 if_jump->bc.pop_count = 1;
200 }
201
202 n_if->expand();
203 }
204
205 for (depart_vec::iterator I = r->departs.begin(), E = r->departs.end();
206 I != E; ++I) {
207 (*I)->expand();
208 }
209 r->departs.clear();
210 assert(r->repeats.empty());
211 }
212
213 void bc_finalizer::run_on(container_node* c) {
214
215 for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
216 node *n = *I;
217
218 if (n->is_alu_group()) {
219 finalize_alu_group(static_cast<alu_group_node*>(n));
220 } else {
221 if (n->is_alu_clause()) {
222 cf_node *c = static_cast<cf_node*>(n);
223
224 if (c->bc.op == CF_OP_ALU_PUSH_BEFORE && ctx.is_egcm()) {
225 if (ctx.stack_workaround_8xx) {
226 region_node *r = c->get_parent_region();
227 if (r) {
228 unsigned ifs, loops;
229 unsigned elems = get_stack_depth(r, loops, ifs);
230 unsigned dmod1 = elems % ctx.stack_entry_size;
231 unsigned dmod2 = (elems + 1) % ctx.stack_entry_size;
232
233 if (elems && (!dmod1 || !dmod2))
234 c->flags |= NF_ALU_STACK_WORKAROUND;
235 }
236 } else if (ctx.stack_workaround_9xx) {
237 region_node *r = c->get_parent_region();
238 if (r) {
239 unsigned ifs, loops;
240 get_stack_depth(r, loops, ifs);
241 if (loops >= 2)
242 c->flags |= NF_ALU_STACK_WORKAROUND;
243 }
244 }
245 }
246 } else if (n->is_fetch_inst()) {
247 finalize_fetch(static_cast<fetch_node*>(n));
248 } else if (n->is_cf_inst()) {
249 finalize_cf(static_cast<cf_node*>(n));
250 }
251 if (n->is_container())
252 run_on(static_cast<container_node*>(n));
253 }
254 }
255 }
256
257 void bc_finalizer::finalize_alu_group(alu_group_node* g) {
258
259 alu_node *last = NULL;
260
261 for (node_iterator I = g->begin(), E = g->end(); I != E; ++I) {
262 alu_node *n = static_cast<alu_node*>(*I);
263 unsigned slot = n->bc.slot;
264
265 value *d = n->dst.empty() ? NULL : n->dst[0];
266
267 if (d && d->is_special_reg()) {
268 assert(n->bc.op_ptr->flags & AF_MOVA);
269 d = NULL;
270 }
271
272 sel_chan fdst = d ? d->get_final_gpr() : sel_chan(0, 0);
273
274 if (d) {
275 assert(fdst.chan() == slot || slot == SLOT_TRANS);
276 }
277
278 n->bc.dst_gpr = fdst.sel();
279 n->bc.dst_chan = d ? fdst.chan() : slot < SLOT_TRANS ? slot : 0;
280
281
282 if (d && d->is_rel() && d->rel && !d->rel->is_const()) {
283 n->bc.dst_rel = 1;
284 update_ngpr(d->array->gpr.sel() + d->array->array_size -1);
285 } else {
286 n->bc.dst_rel = 0;
287 }
288
289 n->bc.write_mask = d != NULL;
290 n->bc.last = 0;
291
292 if (n->bc.op_ptr->flags & AF_PRED) {
293 n->bc.update_pred = (n->dst[1] != NULL);
294 n->bc.update_exec_mask = (n->dst[2] != NULL);
295 }
296
297 // FIXME handle predication here
298 n->bc.pred_sel = PRED_SEL_OFF;
299
300 update_ngpr(n->bc.dst_gpr);
301
302 finalize_alu_src(g, n);
303
304 last = n;
305 }
306
307 last->bc.last = 1;
308 }
309
310 void bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a) {
311 vvec &sv = a->src;
312
313 FBC_DUMP(
314 sblog << "finalize_alu_src: ";
315 dump::dump_op(a);
316 sblog << "\n";
317 );
318
319 unsigned si = 0;
320
321 for (vvec::iterator I = sv.begin(), E = sv.end(); I != E; ++I, ++si) {
322 value *v = *I;
323 assert(v);
324
325 bc_alu_src &src = a->bc.src[si];
326 sel_chan sc;
327 src.rel = 0;
328
329 sel_chan gpr;
330
331 switch (v->kind) {
332 case VLK_REL_REG:
333 sc = v->get_final_gpr();
334 src.sel = sc.sel();
335 src.chan = sc.chan();
336 if (!v->rel->is_const()) {
337 src.rel = 1;
338 update_ngpr(v->array->gpr.sel() + v->array->array_size -1);
339 } else
340 src.rel = 0;
341
342 break;
343 case VLK_REG:
344 gpr = v->get_final_gpr();
345 src.sel = gpr.sel();
346 src.chan = gpr.chan();
347 update_ngpr(src.sel);
348 break;
349 case VLK_TEMP:
350 src.sel = v->gpr.sel();
351 src.chan = v->gpr.chan();
352 update_ngpr(src.sel);
353 break;
354 case VLK_UNDEF:
355 case VLK_CONST: {
356 literal lv = v->literal_value;
357 src.chan = 0;
358
359 if (lv == literal(0))
360 src.sel = ALU_SRC_0;
361 else if (lv == literal(0.5f))
362 src.sel = ALU_SRC_0_5;
363 else if (lv == literal(1.0f))
364 src.sel = ALU_SRC_1;
365 else if (lv == literal(1))
366 src.sel = ALU_SRC_1_INT;
367 else if (lv == literal(-1))
368 src.sel = ALU_SRC_M_1_INT;
369 else {
370 src.sel = ALU_SRC_LITERAL;
371 src.chan = g->literal_chan(lv);
372 src.value = lv;
373 }
374 break;
375 }
376 case VLK_KCACHE: {
377 cf_node *clause = static_cast<cf_node*>(g->parent);
378 assert(clause->is_alu_clause());
379 sel_chan k = translate_kcache(clause, v);
380
381 assert(k && "kcache translation failed");
382
383 src.sel = k.sel();
384 src.chan = k.chan();
385 break;
386 }
387 case VLK_PARAM:
388 case VLK_SPECIAL_CONST:
389 src.sel = v->select.sel();
390 src.chan = v->select.chan();
391 break;
392 default:
393 assert(!"unknown value kind");
394 break;
395 }
396 }
397
398 while (si < 3) {
399 a->bc.src[si++].sel = 0;
400 }
401 }
402
403 void bc_finalizer::copy_fetch_src(fetch_node &dst, fetch_node &src, unsigned arg_start)
404 {
405 int reg = -1;
406
407 for (unsigned chan = 0; chan < 4; ++chan) {
408
409 dst.bc.dst_sel[chan] = SEL_MASK;
410
411 unsigned sel = SEL_MASK;
412
413 value *v = src.src[arg_start + chan];
414
415 if (!v || v->is_undef()) {
416 sel = SEL_MASK;
417 } else if (v->is_const()) {
418 literal l = v->literal_value;
419 if (l == literal(0))
420 sel = SEL_0;
421 else if (l == literal(1.0f))
422 sel = SEL_1;
423 else {
424 sblog << "invalid fetch constant operand " << chan << " ";
425 dump::dump_op(&src);
426 sblog << "\n";
427 abort();
428 }
429
430 } else if (v->is_any_gpr()) {
431 unsigned vreg = v->gpr.sel();
432 unsigned vchan = v->gpr.chan();
433
434 if (reg == -1)
435 reg = vreg;
436 else if ((unsigned)reg != vreg) {
437 sblog << "invalid fetch source operand " << chan << " ";
438 dump::dump_op(&src);
439 sblog << "\n";
440 abort();
441 }
442
443 sel = vchan;
444
445 } else {
446 sblog << "invalid fetch source operand " << chan << " ";
447 dump::dump_op(&src);
448 sblog << "\n";
449 abort();
450 }
451
452 dst.bc.src_sel[chan] = sel;
453 }
454
455 if (reg >= 0)
456 update_ngpr(reg);
457
458 dst.bc.src_gpr = reg >= 0 ? reg : 0;
459 }
460
461 void bc_finalizer::emit_set_grad(fetch_node* f) {
462
463 assert(f->src.size() == 12);
464 unsigned ops[2] = { FETCH_OP_SET_GRADIENTS_V, FETCH_OP_SET_GRADIENTS_H };
465
466 unsigned arg_start = 0;
467
468 for (unsigned op = 0; op < 2; ++op) {
469 fetch_node *n = sh.create_fetch();
470 n->bc.set_op(ops[op]);
471
472 arg_start += 4;
473
474 copy_fetch_src(*n, *f, arg_start);
475
476 f->insert_before(n);
477 }
478
479 }
480
481 void bc_finalizer::emit_set_texture_offsets(fetch_node &f) {
482 assert(f.src.size() == 8);
483
484 fetch_node *n = sh.create_fetch();
485
486 n->bc.set_op(FETCH_OP_SET_TEXTURE_OFFSETS);
487
488 copy_fetch_src(*n, f, 4);
489
490 f.insert_before(n);
491 }
492
493 void bc_finalizer::finalize_fetch(fetch_node* f) {
494
495 int reg = -1;
496
497 // src
498
499 unsigned src_count = 4;
500
501 unsigned flags = f->bc.op_ptr->flags;
502
503 if (flags & FF_VTX) {
504 src_count = 1;
505 } else if (flags & FF_USEGRAD) {
506 emit_set_grad(f);
507 } else if (flags & FF_USE_TEXTURE_OFFSETS) {
508 emit_set_texture_offsets(*f);
509 }
510
511 for (unsigned chan = 0; chan < src_count; ++chan) {
512
513 unsigned sel = f->bc.src_sel[chan];
514
515 if (sel > SEL_W)
516 continue;
517
518 value *v = f->src[chan];
519
520 if (v->is_undef()) {
521 sel = SEL_MASK;
522 } else if (v->is_const()) {
523 literal l = v->literal_value;
524 if (l == literal(0))
525 sel = SEL_0;
526 else if (l == literal(1.0f))
527 sel = SEL_1;
528 else {
529 sblog << "invalid fetch constant operand " << chan << " ";
530 dump::dump_op(f);
531 sblog << "\n";
532 abort();
533 }
534
535 } else if (v->is_any_gpr()) {
536 unsigned vreg = v->gpr.sel();
537 unsigned vchan = v->gpr.chan();
538
539 if (reg == -1)
540 reg = vreg;
541 else if ((unsigned)reg != vreg) {
542 sblog << "invalid fetch source operand " << chan << " ";
543 dump::dump_op(f);
544 sblog << "\n";
545 abort();
546 }
547
548 sel = vchan;
549
550 } else {
551 sblog << "invalid fetch source operand " << chan << " ";
552 dump::dump_op(f);
553 sblog << "\n";
554 abort();
555 }
556
557 f->bc.src_sel[chan] = sel;
558 }
559
560 if (reg >= 0)
561 update_ngpr(reg);
562
563 f->bc.src_gpr = reg >= 0 ? reg : 0;
564
565 // dst
566
567 reg = -1;
568
569 unsigned dst_swz[4] = {SEL_MASK, SEL_MASK, SEL_MASK, SEL_MASK};
570
571 for (unsigned chan = 0; chan < 4; ++chan) {
572
573 unsigned sel = f->bc.dst_sel[chan];
574
575 if (sel == SEL_MASK)
576 continue;
577
578 value *v = f->dst[chan];
579 if (!v)
580 continue;
581
582 if (v->is_any_gpr()) {
583 unsigned vreg = v->gpr.sel();
584 unsigned vchan = v->gpr.chan();
585
586 if (reg == -1)
587 reg = vreg;
588 else if ((unsigned)reg != vreg) {
589 sblog << "invalid fetch dst operand " << chan << " ";
590 dump::dump_op(f);
591 sblog << "\n";
592 abort();
593 }
594
595 dst_swz[vchan] = sel;
596
597 } else {
598 sblog << "invalid fetch dst operand " << chan << " ";
599 dump::dump_op(f);
600 sblog << "\n";
601 abort();
602 }
603
604 }
605
606 for (unsigned i = 0; i < 4; ++i)
607 f->bc.dst_sel[i] = dst_swz[i];
608
609 assert(reg >= 0);
610
611 if (reg >= 0)
612 update_ngpr(reg);
613
614 f->bc.dst_gpr = reg >= 0 ? reg : 0;
615 }
616
617 void bc_finalizer::finalize_cf(cf_node* c) {
618
619 unsigned flags = c->bc.op_ptr->flags;
620
621 c->bc.end_of_program = 0;
622 last_cf = c;
623
624 if (flags & CF_EXP) {
625 c->bc.set_op(CF_OP_EXPORT);
626 last_export[c->bc.type] = c;
627
628 int reg = -1;
629
630 for (unsigned chan = 0; chan < 4; ++chan) {
631
632 unsigned sel = c->bc.sel[chan];
633
634 if (sel > SEL_W)
635 continue;
636
637 value *v = c->src[chan];
638
639 if (v->is_undef()) {
640 sel = SEL_MASK;
641 } else if (v->is_const()) {
642 literal l = v->literal_value;
643 if (l == literal(0))
644 sel = SEL_0;
645 else if (l == literal(1.0f))
646 sel = SEL_1;
647 else {
648 sblog << "invalid export constant operand " << chan << " ";
649 dump::dump_op(c);
650 sblog << "\n";
651 abort();
652 }
653
654 } else if (v->is_any_gpr()) {
655 unsigned vreg = v->gpr.sel();
656 unsigned vchan = v->gpr.chan();
657
658 if (reg == -1)
659 reg = vreg;
660 else if ((unsigned)reg != vreg) {
661 sblog << "invalid export source operand " << chan << " ";
662 dump::dump_op(c);
663 sblog << "\n";
664 abort();
665 }
666
667 sel = vchan;
668
669 } else {
670 sblog << "invalid export source operand " << chan << " ";
671 dump::dump_op(c);
672 sblog << "\n";
673 abort();
674 }
675
676 c->bc.sel[chan] = sel;
677 }
678
679 if (reg >= 0)
680 update_ngpr(reg);
681
682 c->bc.rw_gpr = reg >= 0 ? reg : 0;
683
684 } else if (flags & CF_MEM) {
685
686 int reg = -1;
687 unsigned mask = 0;
688
689 for (unsigned chan = 0; chan < 4; ++chan) {
690 value *v = c->src[chan];
691 if (!v || v->is_undef())
692 continue;
693
694 if (!v->is_any_gpr() || v->gpr.chan() != chan) {
695 sblog << "invalid source operand " << chan << " ";
696 dump::dump_op(c);
697 sblog << "\n";
698 abort();
699 }
700 unsigned vreg = v->gpr.sel();
701 if (reg == -1)
702 reg = vreg;
703 else if ((unsigned)reg != vreg) {
704 sblog << "invalid source operand " << chan << " ";
705 dump::dump_op(c);
706 sblog << "\n";
707 abort();
708 }
709
710 mask |= (1 << chan);
711 }
712
713 assert(reg >= 0 && mask);
714
715 if (reg >= 0)
716 update_ngpr(reg);
717
718 c->bc.rw_gpr = reg >= 0 ? reg : 0;
719 c->bc.comp_mask = mask;
720
721 if (((flags & CF_RAT) || (!(flags & CF_STRM))) && (c->bc.type & 1)) {
722
723 reg = -1;
724
725 for (unsigned chan = 0; chan < 4; ++chan) {
726 value *v = c->src[4 + chan];
727 if (!v || v->is_undef())
728 continue;
729
730 if (!v->is_any_gpr() || v->gpr.chan() != chan) {
731 sblog << "invalid source operand " << chan << " ";
732 dump::dump_op(c);
733 sblog << "\n";
734 abort();
735 }
736 unsigned vreg = v->gpr.sel();
737 if (reg == -1)
738 reg = vreg;
739 else if ((unsigned)reg != vreg) {
740 sblog << "invalid source operand " << chan << " ";
741 dump::dump_op(c);
742 sblog << "\n";
743 abort();
744 }
745 }
746
747 assert(reg >= 0);
748
749 if (reg >= 0)
750 update_ngpr(reg);
751
752 c->bc.index_gpr = reg >= 0 ? reg : 0;
753 }
754 } else if (flags & CF_CALL) {
755 update_nstack(c->get_parent_region(), ctx.wavefront_size == 16 ? 2 : 1);
756 }
757 }
758
759 sel_chan bc_finalizer::translate_kcache(cf_node* alu, value* v) {
760 unsigned sel = v->select.sel();
761 unsigned bank = sel >> 12;
762 unsigned chan = v->select.chan();
763 static const unsigned kc_base[] = {128, 160, 256, 288};
764
765 sel &= 4095;
766
767 unsigned line = sel >> 4;
768
769 for (unsigned k = 0; k < 4; ++k) {
770 bc_kcache &kc = alu->bc.kc[k];
771
772 if (kc.mode == KC_LOCK_NONE)
773 break;
774
775 if (kc.bank == bank && (kc.addr == line ||
776 (kc.mode == KC_LOCK_2 && kc.addr + 1 == line))) {
777
778 sel = kc_base[k] + (sel - (kc.addr << 4));
779
780 return sel_chan(sel, chan);
781 }
782 }
783
784 assert(!"kcache translation error");
785 return 0;
786 }
787
788 void bc_finalizer::update_ngpr(unsigned gpr) {
789 if (gpr < MAX_GPR - ctx.alu_temp_gprs && gpr >= ngpr)
790 ngpr = gpr + 1;
791 }
792
793 unsigned bc_finalizer::get_stack_depth(node *n, unsigned &loops,
794 unsigned &ifs, unsigned add) {
795 unsigned stack_elements = add;
796 bool has_non_wqm_push = (add != 0);
797 region_node *r = n->is_region() ?
798 static_cast<region_node*>(n) : n->get_parent_region();
799
800 loops = 0;
801 ifs = 0;
802
803 while (r) {
804 if (r->is_loop()) {
805 ++loops;
806 } else {
807 ++ifs;
808 has_non_wqm_push = true;
809 }
810 r = r->get_parent_region();
811 }
812 stack_elements += (loops * ctx.stack_entry_size) + ifs;
813
814 // reserve additional elements in some cases
815 switch (ctx.hw_class) {
816 case HW_CLASS_R600:
817 case HW_CLASS_R700:
818 // If any non-WQM push is invoked, 2 elements should be reserved.
819 if (has_non_wqm_push)
820 stack_elements += 2;
821 break;
822 case HW_CLASS_CAYMAN:
823 // If any stack operation is invoked, 2 elements should be reserved
824 if (stack_elements)
825 stack_elements += 2;
826 break;
827 case HW_CLASS_EVERGREEN:
828 // According to the docs we need to reserve 1 element for each of the
829 // following cases:
830 // 1) non-WQM push is used with WQM/LOOP frames on stack
831 // 2) ALU_ELSE_AFTER is used at the point of max stack usage
832 // NOTE:
833 // It was found that the conditions above are not sufficient, there are
834 // other cases where we also need to reserve stack space, that's why
835 // we always reserve 1 stack element if we have non-WQM push on stack.
836 // Condition 2 is ignored for now because we don't use this instruction.
837 if (has_non_wqm_push)
838 ++stack_elements;
839 break;
840 case HW_CLASS_UNKNOWN:
841 assert(0);
842 }
843 return stack_elements;
844 }
845
846 void bc_finalizer::update_nstack(region_node* r, unsigned add) {
847 unsigned loops = 0;
848 unsigned ifs = 0;
849 unsigned elems = r ? get_stack_depth(r, loops, ifs, add) : add;
850
851 // XXX all chips expect this value to be computed using 4 as entry size,
852 // not the real entry size
853 unsigned stack_entries = (elems + 3) >> 2;
854
855 if (nstack < stack_entries)
856 nstack = stack_entries;
857 }
858
859 void bc_finalizer::cf_peephole() {
860 if (ctx.stack_workaround_8xx || ctx.stack_workaround_9xx) {
861 for (node_iterator N, I = sh.root->begin(), E = sh.root->end(); I != E;
862 I = N) {
863 N = I; ++N;
864 cf_node *c = static_cast<cf_node*>(*I);
865
866 if (c->bc.op == CF_OP_ALU_PUSH_BEFORE &&
867 (c->flags & NF_ALU_STACK_WORKAROUND)) {
868 cf_node *push = sh.create_cf(CF_OP_PUSH);
869 c->insert_before(push);
870 push->jump(c);
871 c->bc.set_op(CF_OP_ALU);
872 }
873 }
874 }
875
876 for (node_iterator N, I = sh.root->begin(), E = sh.root->end(); I != E;
877 I = N) {
878 N = I; ++N;
879
880 cf_node *c = static_cast<cf_node*>(*I);
881
882 if (c->jump_after_target) {
883 c->jump_target = static_cast<cf_node*>(c->jump_target->next);
884 c->jump_after_target = false;
885 }
886
887 if (c->is_cf_op(CF_OP_POP)) {
888 node *p = c->prev;
889 if (p->is_alu_clause()) {
890 cf_node *a = static_cast<cf_node*>(p);
891
892 if (a->bc.op == CF_OP_ALU) {
893 a->bc.set_op(CF_OP_ALU_POP_AFTER);
894 c->remove();
895 }
896 }
897 } else if (c->is_cf_op(CF_OP_JUMP) && c->jump_target == c->next) {
898 // if JUMP is immediately followed by its jump target,
899 // then JUMP is useless and we can eliminate it
900 c->remove();
901 }
902 }
903 }
904
905 } // namespace r600_sb