r600g: initial support for geometry shaders on evergreen (v2)
[mesa.git] / src / gallium / drivers / r600 / sb / sb_bc_finalize.cpp
1 /*
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Vadim Girlin
25 */
26
27 #define FBC_DEBUG 0
28
29 #if FBC_DEBUG
30 #define FBC_DUMP(q) do { q } while (0)
31 #else
32 #define FBC_DUMP(q)
33 #endif
34
35 #include "sb_bc.h"
36 #include "sb_shader.h"
37 #include "sb_pass.h"
38
39 namespace r600_sb {
40
41 int bc_finalizer::run() {
42
43 run_on(sh.root);
44
45 regions_vec &rv = sh.get_regions();
46 for (regions_vec::reverse_iterator I = rv.rbegin(), E = rv.rend(); I != E;
47 ++I) {
48 region_node *r = *I;
49
50 assert(r);
51
52 bool loop = r->is_loop();
53
54 if (loop)
55 finalize_loop(r);
56 else
57 finalize_if(r);
58
59 r->expand();
60 }
61
62 cf_peephole();
63
64 // workaround for some problems on r6xx/7xx
65 // add ALU NOP to each vertex shader
66 if (!ctx.is_egcm() && (sh.target == TARGET_VS || sh.target == TARGET_ES)) {
67 cf_node *c = sh.create_clause(NST_ALU_CLAUSE);
68
69 alu_group_node *g = sh.create_alu_group();
70
71 alu_node *a = sh.create_alu();
72 a->bc.set_op(ALU_OP0_NOP);
73 a->bc.last = 1;
74
75 g->push_back(a);
76 c->push_back(g);
77
78 sh.root->push_back(c);
79
80 c = sh.create_cf(CF_OP_NOP);
81 sh.root->push_back(c);
82
83 last_cf = c;
84 }
85
86 if (last_cf->bc.op_ptr->flags & CF_ALU) {
87 last_cf = sh.create_cf(CF_OP_NOP);
88 sh.root->push_back(last_cf);
89 }
90
91 if (ctx.is_cayman())
92 last_cf->insert_after(sh.create_cf(CF_OP_CF_END));
93 else
94 last_cf->bc.end_of_program = 1;
95
96 for (unsigned t = EXP_PIXEL; t < EXP_TYPE_COUNT; ++t) {
97 cf_node *le = last_export[t];
98 if (le)
99 le->bc.set_op(CF_OP_EXPORT_DONE);
100 }
101
102 sh.ngpr = ngpr;
103 sh.nstack = nstack;
104 return 0;
105 }
106
107 void bc_finalizer::finalize_loop(region_node* r) {
108
109 cf_node *loop_start = sh.create_cf(CF_OP_LOOP_START_DX10);
110 cf_node *loop_end = sh.create_cf(CF_OP_LOOP_END);
111
112 loop_start->jump_after(loop_end);
113 loop_end->jump_after(loop_start);
114
115 for (depart_vec::iterator I = r->departs.begin(), E = r->departs.end();
116 I != E; ++I) {
117 depart_node *dep = *I;
118 cf_node *loop_break = sh.create_cf(CF_OP_LOOP_BREAK);
119 loop_break->jump(loop_end);
120 dep->push_back(loop_break);
121 dep->expand();
122 }
123
124 // FIXME produces unnecessary LOOP_CONTINUE
125 for (repeat_vec::iterator I = r->repeats.begin(), E = r->repeats.end();
126 I != E; ++I) {
127 repeat_node *rep = *I;
128 if (!(rep->parent == r && rep->prev == NULL)) {
129 cf_node *loop_cont = sh.create_cf(CF_OP_LOOP_CONTINUE);
130 loop_cont->jump(loop_end);
131 rep->push_back(loop_cont);
132 }
133 rep->expand();
134 }
135
136 r->push_front(loop_start);
137 r->push_back(loop_end);
138 }
139
140 void bc_finalizer::finalize_if(region_node* r) {
141
142 update_nstack(r);
143
144 // expecting the following control flow structure here:
145 // - region
146 // {
147 // - depart/repeat 1 (it may be depart/repeat for some outer region)
148 // {
149 // - if
150 // {
151 // - depart/repeat 2 (possibly for outer region)
152 // {
153 // - some optional code
154 // }
155 // }
156 // - optional <else> code> ...
157 // }
158 // }
159
160 container_node *repdep1 = static_cast<container_node*>(r->first);
161 assert(repdep1->is_depart() || repdep1->is_repeat());
162
163 if_node *n_if = static_cast<if_node*>(repdep1->first);
164
165 if (n_if) {
166
167
168 assert(n_if->is_if());
169
170 container_node *repdep2 = static_cast<container_node*>(n_if->first);
171 assert(repdep2->is_depart() || repdep2->is_repeat());
172
173 cf_node *if_jump = sh.create_cf(CF_OP_JUMP);
174 cf_node *if_pop = sh.create_cf(CF_OP_POP);
175
176 if_pop->bc.pop_count = 1;
177 if_pop->jump_after(if_pop);
178
179 r->push_front(if_jump);
180 r->push_back(if_pop);
181
182 bool has_else = n_if->next;
183
184 if (has_else) {
185 cf_node *nelse = sh.create_cf(CF_OP_ELSE);
186 n_if->insert_after(nelse);
187 if_jump->jump(nelse);
188 nelse->jump_after(if_pop);
189 nelse->bc.pop_count = 1;
190
191 } else {
192 if_jump->jump_after(if_pop);
193 if_jump->bc.pop_count = 1;
194 }
195
196 n_if->expand();
197 }
198
199 for (depart_vec::iterator I = r->departs.begin(), E = r->departs.end();
200 I != E; ++I) {
201 (*I)->expand();
202 }
203 r->departs.clear();
204 assert(r->repeats.empty());
205 }
206
207 void bc_finalizer::run_on(container_node* c) {
208
209 for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
210 node *n = *I;
211
212 if (n->is_alu_group()) {
213 finalize_alu_group(static_cast<alu_group_node*>(n));
214 } else {
215 if (n->is_alu_clause()) {
216 cf_node *c = static_cast<cf_node*>(n);
217
218 if (c->bc.op == CF_OP_ALU_PUSH_BEFORE && ctx.is_egcm()) {
219 if (ctx.stack_workaround_8xx) {
220 region_node *r = c->get_parent_region();
221 if (r) {
222 unsigned ifs, loops;
223 unsigned elems = get_stack_depth(r, loops, ifs);
224 unsigned dmod1 = elems % ctx.stack_entry_size;
225 unsigned dmod2 = (elems + 1) % ctx.stack_entry_size;
226
227 if (elems && (!dmod1 || !dmod2))
228 c->flags |= NF_ALU_STACK_WORKAROUND;
229 }
230 } else if (ctx.stack_workaround_9xx) {
231 region_node *r = c->get_parent_region();
232 if (r) {
233 unsigned ifs, loops;
234 get_stack_depth(r, loops, ifs);
235 if (loops >= 2)
236 c->flags |= NF_ALU_STACK_WORKAROUND;
237 }
238 }
239 }
240 } else if (n->is_fetch_inst()) {
241 finalize_fetch(static_cast<fetch_node*>(n));
242 } else if (n->is_cf_inst()) {
243 finalize_cf(static_cast<cf_node*>(n));
244 }
245 if (n->is_container())
246 run_on(static_cast<container_node*>(n));
247 }
248 }
249 }
250
251 void bc_finalizer::finalize_alu_group(alu_group_node* g) {
252
253 alu_node *last = NULL;
254
255 for (node_iterator I = g->begin(), E = g->end(); I != E; ++I) {
256 alu_node *n = static_cast<alu_node*>(*I);
257 unsigned slot = n->bc.slot;
258
259 value *d = n->dst.empty() ? NULL : n->dst[0];
260
261 if (d && d->is_special_reg()) {
262 assert(n->bc.op_ptr->flags & AF_MOVA);
263 d = NULL;
264 }
265
266 sel_chan fdst = d ? d->get_final_gpr() : sel_chan(0, 0);
267
268 if (d) {
269 assert(fdst.chan() == slot || slot == SLOT_TRANS);
270 }
271
272 n->bc.dst_gpr = fdst.sel();
273 n->bc.dst_chan = d ? fdst.chan() : slot < SLOT_TRANS ? slot : 0;
274
275
276 if (d && d->is_rel() && d->rel && !d->rel->is_const()) {
277 n->bc.dst_rel = 1;
278 update_ngpr(d->array->gpr.sel() + d->array->array_size -1);
279 } else {
280 n->bc.dst_rel = 0;
281 }
282
283 n->bc.write_mask = d != NULL;
284 n->bc.last = 0;
285
286 if (n->bc.op_ptr->flags & AF_PRED) {
287 n->bc.update_pred = (n->dst[1] != NULL);
288 n->bc.update_exec_mask = (n->dst[2] != NULL);
289 }
290
291 // FIXME handle predication here
292 n->bc.pred_sel = PRED_SEL_OFF;
293
294 update_ngpr(n->bc.dst_gpr);
295
296 finalize_alu_src(g, n);
297
298 last = n;
299 }
300
301 last->bc.last = 1;
302 }
303
304 void bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a) {
305 vvec &sv = a->src;
306
307 FBC_DUMP(
308 sblog << "finalize_alu_src: ";
309 dump::dump_op(a);
310 sblog << "\n";
311 );
312
313 unsigned si = 0;
314
315 for (vvec::iterator I = sv.begin(), E = sv.end(); I != E; ++I, ++si) {
316 value *v = *I;
317 assert(v);
318
319 bc_alu_src &src = a->bc.src[si];
320 sel_chan sc;
321 src.rel = 0;
322
323 sel_chan gpr;
324
325 switch (v->kind) {
326 case VLK_REL_REG:
327 sc = v->get_final_gpr();
328 src.sel = sc.sel();
329 src.chan = sc.chan();
330 if (!v->rel->is_const()) {
331 src.rel = 1;
332 update_ngpr(v->array->gpr.sel() + v->array->array_size -1);
333 } else
334 src.rel = 0;
335
336 break;
337 case VLK_REG:
338 gpr = v->get_final_gpr();
339 src.sel = gpr.sel();
340 src.chan = gpr.chan();
341 update_ngpr(src.sel);
342 break;
343 case VLK_TEMP:
344 src.sel = v->gpr.sel();
345 src.chan = v->gpr.chan();
346 update_ngpr(src.sel);
347 break;
348 case VLK_UNDEF:
349 case VLK_CONST: {
350 literal lv = v->literal_value;
351 src.chan = 0;
352
353 if (lv == literal(0))
354 src.sel = ALU_SRC_0;
355 else if (lv == literal(0.5f))
356 src.sel = ALU_SRC_0_5;
357 else if (lv == literal(1.0f))
358 src.sel = ALU_SRC_1;
359 else if (lv == literal(1))
360 src.sel = ALU_SRC_1_INT;
361 else if (lv == literal(-1))
362 src.sel = ALU_SRC_M_1_INT;
363 else {
364 src.sel = ALU_SRC_LITERAL;
365 src.chan = g->literal_chan(lv);
366 src.value = lv;
367 }
368 break;
369 }
370 case VLK_KCACHE: {
371 cf_node *clause = static_cast<cf_node*>(g->parent);
372 assert(clause->is_alu_clause());
373 sel_chan k = translate_kcache(clause, v);
374
375 assert(k && "kcache translation failed");
376
377 src.sel = k.sel();
378 src.chan = k.chan();
379 break;
380 }
381 case VLK_PARAM:
382 case VLK_SPECIAL_CONST:
383 src.sel = v->select.sel();
384 src.chan = v->select.chan();
385 break;
386 default:
387 assert(!"unknown value kind");
388 break;
389 }
390 }
391
392 while (si < 3) {
393 a->bc.src[si++].sel = 0;
394 }
395 }
396
397 void bc_finalizer::emit_set_grad(fetch_node* f) {
398
399 assert(f->src.size() == 12);
400 unsigned ops[2] = { FETCH_OP_SET_GRADIENTS_V, FETCH_OP_SET_GRADIENTS_H };
401
402 unsigned arg_start = 0;
403
404 for (unsigned op = 0; op < 2; ++op) {
405 fetch_node *n = sh.create_fetch();
406 n->bc.set_op(ops[op]);
407
408 // FIXME extract this loop into a separate method and reuse it
409
410 int reg = -1;
411
412 arg_start += 4;
413
414 for (unsigned chan = 0; chan < 4; ++chan) {
415
416 n->bc.dst_sel[chan] = SEL_MASK;
417
418 unsigned sel = SEL_MASK;
419
420 value *v = f->src[arg_start + chan];
421
422 if (!v || v->is_undef()) {
423 sel = SEL_MASK;
424 } else if (v->is_const()) {
425 literal l = v->literal_value;
426 if (l == literal(0))
427 sel = SEL_0;
428 else if (l == literal(1.0f))
429 sel = SEL_1;
430 else {
431 sblog << "invalid fetch constant operand " << chan << " ";
432 dump::dump_op(f);
433 sblog << "\n";
434 abort();
435 }
436
437 } else if (v->is_any_gpr()) {
438 unsigned vreg = v->gpr.sel();
439 unsigned vchan = v->gpr.chan();
440
441 if (reg == -1)
442 reg = vreg;
443 else if ((unsigned)reg != vreg) {
444 sblog << "invalid fetch source operand " << chan << " ";
445 dump::dump_op(f);
446 sblog << "\n";
447 abort();
448 }
449
450 sel = vchan;
451
452 } else {
453 sblog << "invalid fetch source operand " << chan << " ";
454 dump::dump_op(f);
455 sblog << "\n";
456 abort();
457 }
458
459 n->bc.src_sel[chan] = sel;
460 }
461
462 if (reg >= 0)
463 update_ngpr(reg);
464
465 n->bc.src_gpr = reg >= 0 ? reg : 0;
466
467 f->insert_before(n);
468 }
469
470 }
471
472 void bc_finalizer::finalize_fetch(fetch_node* f) {
473
474 int reg = -1;
475
476 // src
477
478 unsigned src_count = 4;
479
480 unsigned flags = f->bc.op_ptr->flags;
481
482 if (flags & FF_VTX) {
483 src_count = 1;
484 } else if (flags & FF_USEGRAD) {
485 emit_set_grad(f);
486 }
487
488 for (unsigned chan = 0; chan < src_count; ++chan) {
489
490 unsigned sel = f->bc.src_sel[chan];
491
492 if (sel > SEL_W)
493 continue;
494
495 value *v = f->src[chan];
496
497 if (v->is_undef()) {
498 sel = SEL_MASK;
499 } else if (v->is_const()) {
500 literal l = v->literal_value;
501 if (l == literal(0))
502 sel = SEL_0;
503 else if (l == literal(1.0f))
504 sel = SEL_1;
505 else {
506 sblog << "invalid fetch constant operand " << chan << " ";
507 dump::dump_op(f);
508 sblog << "\n";
509 abort();
510 }
511
512 } else if (v->is_any_gpr()) {
513 unsigned vreg = v->gpr.sel();
514 unsigned vchan = v->gpr.chan();
515
516 if (reg == -1)
517 reg = vreg;
518 else if ((unsigned)reg != vreg) {
519 sblog << "invalid fetch source operand " << chan << " ";
520 dump::dump_op(f);
521 sblog << "\n";
522 abort();
523 }
524
525 sel = vchan;
526
527 } else {
528 sblog << "invalid fetch source operand " << chan << " ";
529 dump::dump_op(f);
530 sblog << "\n";
531 abort();
532 }
533
534 f->bc.src_sel[chan] = sel;
535 }
536
537 if (reg >= 0)
538 update_ngpr(reg);
539
540 f->bc.src_gpr = reg >= 0 ? reg : 0;
541
542 // dst
543
544 reg = -1;
545
546 unsigned dst_swz[4] = {SEL_MASK, SEL_MASK, SEL_MASK, SEL_MASK};
547
548 for (unsigned chan = 0; chan < 4; ++chan) {
549
550 unsigned sel = f->bc.dst_sel[chan];
551
552 if (sel == SEL_MASK)
553 continue;
554
555 value *v = f->dst[chan];
556 if (!v)
557 continue;
558
559 if (v->is_any_gpr()) {
560 unsigned vreg = v->gpr.sel();
561 unsigned vchan = v->gpr.chan();
562
563 if (reg == -1)
564 reg = vreg;
565 else if ((unsigned)reg != vreg) {
566 sblog << "invalid fetch dst operand " << chan << " ";
567 dump::dump_op(f);
568 sblog << "\n";
569 abort();
570 }
571
572 dst_swz[vchan] = sel;
573
574 } else {
575 sblog << "invalid fetch dst operand " << chan << " ";
576 dump::dump_op(f);
577 sblog << "\n";
578 abort();
579 }
580
581 }
582
583 for (unsigned i = 0; i < 4; ++i)
584 f->bc.dst_sel[i] = dst_swz[i];
585
586 assert(reg >= 0);
587
588 if (reg >= 0)
589 update_ngpr(reg);
590
591 f->bc.dst_gpr = reg >= 0 ? reg : 0;
592 }
593
594 void bc_finalizer::finalize_cf(cf_node* c) {
595
596 unsigned flags = c->bc.op_ptr->flags;
597
598 c->bc.end_of_program = 0;
599 last_cf = c;
600
601 if (flags & CF_EXP) {
602 c->bc.set_op(CF_OP_EXPORT);
603 last_export[c->bc.type] = c;
604
605 int reg = -1;
606
607 for (unsigned chan = 0; chan < 4; ++chan) {
608
609 unsigned sel = c->bc.sel[chan];
610
611 if (sel > SEL_W)
612 continue;
613
614 value *v = c->src[chan];
615
616 if (v->is_undef()) {
617 sel = SEL_MASK;
618 } else if (v->is_const()) {
619 literal l = v->literal_value;
620 if (l == literal(0))
621 sel = SEL_0;
622 else if (l == literal(1.0f))
623 sel = SEL_1;
624 else {
625 sblog << "invalid export constant operand " << chan << " ";
626 dump::dump_op(c);
627 sblog << "\n";
628 abort();
629 }
630
631 } else if (v->is_any_gpr()) {
632 unsigned vreg = v->gpr.sel();
633 unsigned vchan = v->gpr.chan();
634
635 if (reg == -1)
636 reg = vreg;
637 else if ((unsigned)reg != vreg) {
638 sblog << "invalid export source operand " << chan << " ";
639 dump::dump_op(c);
640 sblog << "\n";
641 abort();
642 }
643
644 sel = vchan;
645
646 } else {
647 sblog << "invalid export source operand " << chan << " ";
648 dump::dump_op(c);
649 sblog << "\n";
650 abort();
651 }
652
653 c->bc.sel[chan] = sel;
654 }
655
656 if (reg >= 0)
657 update_ngpr(reg);
658
659 c->bc.rw_gpr = reg >= 0 ? reg : 0;
660
661 } else if (flags & CF_MEM) {
662
663 int reg = -1;
664 unsigned mask = 0;
665
666 for (unsigned chan = 0; chan < 4; ++chan) {
667 value *v = c->src[chan];
668 if (!v || v->is_undef())
669 continue;
670
671 if (!v->is_any_gpr() || v->gpr.chan() != chan) {
672 sblog << "invalid source operand " << chan << " ";
673 dump::dump_op(c);
674 sblog << "\n";
675 abort();
676 }
677 unsigned vreg = v->gpr.sel();
678 if (reg == -1)
679 reg = vreg;
680 else if ((unsigned)reg != vreg) {
681 sblog << "invalid source operand " << chan << " ";
682 dump::dump_op(c);
683 sblog << "\n";
684 abort();
685 }
686
687 mask |= (1 << chan);
688 }
689
690 assert(reg >= 0 && mask);
691
692 if (reg >= 0)
693 update_ngpr(reg);
694
695 c->bc.rw_gpr = reg >= 0 ? reg : 0;
696 c->bc.comp_mask = mask;
697
698 if ((flags & CF_RAT) && (c->bc.type & 1)) {
699
700 reg = -1;
701
702 for (unsigned chan = 0; chan < 4; ++chan) {
703 value *v = c->src[4 + chan];
704 if (!v || v->is_undef())
705 continue;
706
707 if (!v->is_any_gpr() || v->gpr.chan() != chan) {
708 sblog << "invalid source operand " << chan << " ";
709 dump::dump_op(c);
710 sblog << "\n";
711 abort();
712 }
713 unsigned vreg = v->gpr.sel();
714 if (reg == -1)
715 reg = vreg;
716 else if ((unsigned)reg != vreg) {
717 sblog << "invalid source operand " << chan << " ";
718 dump::dump_op(c);
719 sblog << "\n";
720 abort();
721 }
722 }
723
724 assert(reg >= 0);
725
726 if (reg >= 0)
727 update_ngpr(reg);
728
729 c->bc.index_gpr = reg >= 0 ? reg : 0;
730 }
731 } else if (flags & CF_CALL) {
732 update_nstack(c->get_parent_region(), ctx.wavefront_size == 16 ? 2 : 1);
733 }
734 }
735
736 sel_chan bc_finalizer::translate_kcache(cf_node* alu, value* v) {
737 unsigned sel = v->select.sel();
738 unsigned bank = sel >> 12;
739 unsigned chan = v->select.chan();
740 static const unsigned kc_base[] = {128, 160, 256, 288};
741
742 sel &= 4095;
743
744 unsigned line = sel >> 4;
745
746 for (unsigned k = 0; k < 4; ++k) {
747 bc_kcache &kc = alu->bc.kc[k];
748
749 if (kc.mode == KC_LOCK_NONE)
750 break;
751
752 if (kc.bank == bank && (kc.addr == line ||
753 (kc.mode == KC_LOCK_2 && kc.addr + 1 == line))) {
754
755 sel = kc_base[k] + (sel - (kc.addr << 4));
756
757 return sel_chan(sel, chan);
758 }
759 }
760
761 assert(!"kcache translation error");
762 return 0;
763 }
764
765 void bc_finalizer::update_ngpr(unsigned gpr) {
766 if (gpr < MAX_GPR - ctx.alu_temp_gprs && gpr >= ngpr)
767 ngpr = gpr + 1;
768 }
769
770 unsigned bc_finalizer::get_stack_depth(node *n, unsigned &loops,
771 unsigned &ifs, unsigned add) {
772 unsigned stack_elements = add;
773 bool has_non_wqm_push = (add != 0);
774 region_node *r = n->is_region() ?
775 static_cast<region_node*>(n) : n->get_parent_region();
776
777 loops = 0;
778 ifs = 0;
779
780 while (r) {
781 if (r->is_loop()) {
782 ++loops;
783 } else {
784 ++ifs;
785 has_non_wqm_push = true;
786 }
787 r = r->get_parent_region();
788 }
789 stack_elements += (loops * ctx.stack_entry_size) + ifs;
790
791 // reserve additional elements in some cases
792 switch (ctx.hw_class) {
793 case HW_CLASS_R600:
794 case HW_CLASS_R700:
795 // If any non-WQM push is invoked, 2 elements should be reserved.
796 if (has_non_wqm_push)
797 stack_elements += 2;
798 break;
799 case HW_CLASS_CAYMAN:
800 // If any stack operation is invoked, 2 elements should be reserved
801 if (stack_elements)
802 stack_elements += 2;
803 break;
804 case HW_CLASS_EVERGREEN:
805 // According to the docs we need to reserve 1 element for each of the
806 // following cases:
807 // 1) non-WQM push is used with WQM/LOOP frames on stack
808 // 2) ALU_ELSE_AFTER is used at the point of max stack usage
809 // NOTE:
810 // It was found that the conditions above are not sufficient, there are
811 // other cases where we also need to reserve stack space, that's why
812 // we always reserve 1 stack element if we have non-WQM push on stack.
813 // Condition 2 is ignored for now because we don't use this instruction.
814 if (has_non_wqm_push)
815 ++stack_elements;
816 break;
817 }
818 return stack_elements;
819 }
820
821 void bc_finalizer::update_nstack(region_node* r, unsigned add) {
822 unsigned loops = 0;
823 unsigned ifs = 0;
824 unsigned elems = r ? get_stack_depth(r, loops, ifs, add) : add;
825
826 // XXX all chips expect this value to be computed using 4 as entry size,
827 // not the real entry size
828 unsigned stack_entries = (elems + 3) >> 2;
829
830 if (nstack < stack_entries)
831 nstack = stack_entries;
832 }
833
834 void bc_finalizer::cf_peephole() {
835 if (ctx.stack_workaround_8xx || ctx.stack_workaround_9xx) {
836 for (node_iterator N, I = sh.root->begin(), E = sh.root->end(); I != E;
837 I = N) {
838 N = I; ++N;
839 cf_node *c = static_cast<cf_node*>(*I);
840
841 if (c->bc.op == CF_OP_ALU_PUSH_BEFORE &&
842 (c->flags & NF_ALU_STACK_WORKAROUND)) {
843 cf_node *push = sh.create_cf(CF_OP_PUSH);
844 c->insert_before(push);
845 push->jump(c);
846 c->bc.set_op(CF_OP_ALU);
847 }
848 }
849 }
850
851 for (node_iterator N, I = sh.root->begin(), E = sh.root->end(); I != E;
852 I = N) {
853 N = I; ++N;
854
855 cf_node *c = static_cast<cf_node*>(*I);
856
857 if (c->jump_after_target) {
858 c->jump_target = static_cast<cf_node*>(c->jump_target->next);
859 c->jump_after_target = false;
860 }
861
862 if (c->is_cf_op(CF_OP_POP)) {
863 node *p = c->prev;
864 if (p->is_alu_clause()) {
865 cf_node *a = static_cast<cf_node*>(p);
866
867 if (a->bc.op == CF_OP_ALU) {
868 a->bc.set_op(CF_OP_ALU_POP_AFTER);
869 c->remove();
870 }
871 }
872 } else if (c->is_cf_op(CF_OP_JUMP) && c->jump_target == c->next) {
873 // if JUMP is immediately followed by its jump target,
874 // then JUMP is useless and we can eliminate it
875 c->remove();
876 }
877 }
878 }
879
880 } // namespace r600_sb