r600g: Fix missing SET_TEXTURE_OFFSETS
[mesa.git] / src / gallium / drivers / r600 / sb / sb_bc_finalize.cpp
1 /*
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Vadim Girlin
25 */
26
27 #define FBC_DEBUG 0
28
29 #if FBC_DEBUG
30 #define FBC_DUMP(q) do { q } while (0)
31 #else
32 #define FBC_DUMP(q)
33 #endif
34
35 #include "sb_bc.h"
36 #include "sb_shader.h"
37 #include "sb_pass.h"
38
39 namespace r600_sb {
40
41 int bc_finalizer::run() {
42
43 run_on(sh.root);
44
45 regions_vec &rv = sh.get_regions();
46 for (regions_vec::reverse_iterator I = rv.rbegin(), E = rv.rend(); I != E;
47 ++I) {
48 region_node *r = *I;
49
50 assert(r);
51
52 bool loop = r->is_loop();
53
54 if (loop)
55 finalize_loop(r);
56 else
57 finalize_if(r);
58
59 r->expand();
60 }
61
62 cf_peephole();
63
64 // workaround for some problems on r6xx/7xx
65 // add ALU NOP to each vertex shader
66 if (!ctx.is_egcm() && (sh.target == TARGET_VS || sh.target == TARGET_ES)) {
67 cf_node *c = sh.create_clause(NST_ALU_CLAUSE);
68
69 alu_group_node *g = sh.create_alu_group();
70
71 alu_node *a = sh.create_alu();
72 a->bc.set_op(ALU_OP0_NOP);
73 a->bc.last = 1;
74
75 g->push_back(a);
76 c->push_back(g);
77
78 sh.root->push_back(c);
79
80 c = sh.create_cf(CF_OP_NOP);
81 sh.root->push_back(c);
82
83 last_cf = c;
84 }
85
86 if (last_cf->bc.op_ptr->flags & CF_ALU) {
87 last_cf = sh.create_cf(CF_OP_NOP);
88 sh.root->push_back(last_cf);
89 }
90
91 if (ctx.is_cayman())
92 last_cf->insert_after(sh.create_cf(CF_OP_CF_END));
93 else
94 last_cf->bc.end_of_program = 1;
95
96 for (unsigned t = EXP_PIXEL; t < EXP_TYPE_COUNT; ++t) {
97 cf_node *le = last_export[t];
98 if (le)
99 le->bc.set_op(CF_OP_EXPORT_DONE);
100 }
101
102 sh.ngpr = ngpr;
103 sh.nstack = nstack;
104 return 0;
105 }
106
107 void bc_finalizer::finalize_loop(region_node* r) {
108
109 cf_node *loop_start = sh.create_cf(CF_OP_LOOP_START_DX10);
110 cf_node *loop_end = sh.create_cf(CF_OP_LOOP_END);
111
112 loop_start->jump_after(loop_end);
113 loop_end->jump_after(loop_start);
114
115 for (depart_vec::iterator I = r->departs.begin(), E = r->departs.end();
116 I != E; ++I) {
117 depart_node *dep = *I;
118 cf_node *loop_break = sh.create_cf(CF_OP_LOOP_BREAK);
119 loop_break->jump(loop_end);
120 dep->push_back(loop_break);
121 dep->expand();
122 }
123
124 // FIXME produces unnecessary LOOP_CONTINUE
125 for (repeat_vec::iterator I = r->repeats.begin(), E = r->repeats.end();
126 I != E; ++I) {
127 repeat_node *rep = *I;
128 if (!(rep->parent == r && rep->prev == NULL)) {
129 cf_node *loop_cont = sh.create_cf(CF_OP_LOOP_CONTINUE);
130 loop_cont->jump(loop_end);
131 rep->push_back(loop_cont);
132 }
133 rep->expand();
134 }
135
136 r->push_front(loop_start);
137 r->push_back(loop_end);
138 }
139
140 void bc_finalizer::finalize_if(region_node* r) {
141
142 update_nstack(r);
143
144 // expecting the following control flow structure here:
145 // - region
146 // {
147 // - depart/repeat 1 (it may be depart/repeat for some outer region)
148 // {
149 // - if
150 // {
151 // - depart/repeat 2 (possibly for outer region)
152 // {
153 // - some optional code
154 // }
155 // }
156 // - optional <else> code> ...
157 // }
158 // }
159
160 container_node *repdep1 = static_cast<container_node*>(r->first);
161 assert(repdep1->is_depart() || repdep1->is_repeat());
162
163 if_node *n_if = static_cast<if_node*>(repdep1->first);
164
165 if (n_if) {
166
167
168 assert(n_if->is_if());
169
170 container_node *repdep2 = static_cast<container_node*>(n_if->first);
171 assert(repdep2->is_depart() || repdep2->is_repeat());
172
173 cf_node *if_jump = sh.create_cf(CF_OP_JUMP);
174 cf_node *if_pop = sh.create_cf(CF_OP_POP);
175
176 if_pop->bc.pop_count = 1;
177 if_pop->jump_after(if_pop);
178
179 r->push_front(if_jump);
180 r->push_back(if_pop);
181
182 bool has_else = n_if->next;
183
184 if (has_else) {
185 cf_node *nelse = sh.create_cf(CF_OP_ELSE);
186 n_if->insert_after(nelse);
187 if_jump->jump(nelse);
188 nelse->jump_after(if_pop);
189 nelse->bc.pop_count = 1;
190
191 } else {
192 if_jump->jump_after(if_pop);
193 if_jump->bc.pop_count = 1;
194 }
195
196 n_if->expand();
197 }
198
199 for (depart_vec::iterator I = r->departs.begin(), E = r->departs.end();
200 I != E; ++I) {
201 (*I)->expand();
202 }
203 r->departs.clear();
204 assert(r->repeats.empty());
205 }
206
207 void bc_finalizer::run_on(container_node* c) {
208
209 for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
210 node *n = *I;
211
212 if (n->is_alu_group()) {
213 finalize_alu_group(static_cast<alu_group_node*>(n));
214 } else {
215 if (n->is_alu_clause()) {
216 cf_node *c = static_cast<cf_node*>(n);
217
218 if (c->bc.op == CF_OP_ALU_PUSH_BEFORE && ctx.is_egcm()) {
219 if (ctx.stack_workaround_8xx) {
220 region_node *r = c->get_parent_region();
221 if (r) {
222 unsigned ifs, loops;
223 unsigned elems = get_stack_depth(r, loops, ifs);
224 unsigned dmod1 = elems % ctx.stack_entry_size;
225 unsigned dmod2 = (elems + 1) % ctx.stack_entry_size;
226
227 if (elems && (!dmod1 || !dmod2))
228 c->flags |= NF_ALU_STACK_WORKAROUND;
229 }
230 } else if (ctx.stack_workaround_9xx) {
231 region_node *r = c->get_parent_region();
232 if (r) {
233 unsigned ifs, loops;
234 get_stack_depth(r, loops, ifs);
235 if (loops >= 2)
236 c->flags |= NF_ALU_STACK_WORKAROUND;
237 }
238 }
239 }
240 } else if (n->is_fetch_inst()) {
241 finalize_fetch(static_cast<fetch_node*>(n));
242 } else if (n->is_cf_inst()) {
243 finalize_cf(static_cast<cf_node*>(n));
244 }
245 if (n->is_container())
246 run_on(static_cast<container_node*>(n));
247 }
248 }
249 }
250
251 void bc_finalizer::finalize_alu_group(alu_group_node* g) {
252
253 alu_node *last = NULL;
254
255 for (node_iterator I = g->begin(), E = g->end(); I != E; ++I) {
256 alu_node *n = static_cast<alu_node*>(*I);
257 unsigned slot = n->bc.slot;
258
259 value *d = n->dst.empty() ? NULL : n->dst[0];
260
261 if (d && d->is_special_reg()) {
262 assert(n->bc.op_ptr->flags & AF_MOVA);
263 d = NULL;
264 }
265
266 sel_chan fdst = d ? d->get_final_gpr() : sel_chan(0, 0);
267
268 if (d) {
269 assert(fdst.chan() == slot || slot == SLOT_TRANS);
270 }
271
272 n->bc.dst_gpr = fdst.sel();
273 n->bc.dst_chan = d ? fdst.chan() : slot < SLOT_TRANS ? slot : 0;
274
275
276 if (d && d->is_rel() && d->rel && !d->rel->is_const()) {
277 n->bc.dst_rel = 1;
278 update_ngpr(d->array->gpr.sel() + d->array->array_size -1);
279 } else {
280 n->bc.dst_rel = 0;
281 }
282
283 n->bc.write_mask = d != NULL;
284 n->bc.last = 0;
285
286 if (n->bc.op_ptr->flags & AF_PRED) {
287 n->bc.update_pred = (n->dst[1] != NULL);
288 n->bc.update_exec_mask = (n->dst[2] != NULL);
289 }
290
291 // FIXME handle predication here
292 n->bc.pred_sel = PRED_SEL_OFF;
293
294 update_ngpr(n->bc.dst_gpr);
295
296 finalize_alu_src(g, n);
297
298 last = n;
299 }
300
301 last->bc.last = 1;
302 }
303
304 void bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a) {
305 vvec &sv = a->src;
306
307 FBC_DUMP(
308 sblog << "finalize_alu_src: ";
309 dump::dump_op(a);
310 sblog << "\n";
311 );
312
313 unsigned si = 0;
314
315 for (vvec::iterator I = sv.begin(), E = sv.end(); I != E; ++I, ++si) {
316 value *v = *I;
317 assert(v);
318
319 bc_alu_src &src = a->bc.src[si];
320 sel_chan sc;
321 src.rel = 0;
322
323 sel_chan gpr;
324
325 switch (v->kind) {
326 case VLK_REL_REG:
327 sc = v->get_final_gpr();
328 src.sel = sc.sel();
329 src.chan = sc.chan();
330 if (!v->rel->is_const()) {
331 src.rel = 1;
332 update_ngpr(v->array->gpr.sel() + v->array->array_size -1);
333 } else
334 src.rel = 0;
335
336 break;
337 case VLK_REG:
338 gpr = v->get_final_gpr();
339 src.sel = gpr.sel();
340 src.chan = gpr.chan();
341 update_ngpr(src.sel);
342 break;
343 case VLK_TEMP:
344 src.sel = v->gpr.sel();
345 src.chan = v->gpr.chan();
346 update_ngpr(src.sel);
347 break;
348 case VLK_UNDEF:
349 case VLK_CONST: {
350 literal lv = v->literal_value;
351 src.chan = 0;
352
353 if (lv == literal(0))
354 src.sel = ALU_SRC_0;
355 else if (lv == literal(0.5f))
356 src.sel = ALU_SRC_0_5;
357 else if (lv == literal(1.0f))
358 src.sel = ALU_SRC_1;
359 else if (lv == literal(1))
360 src.sel = ALU_SRC_1_INT;
361 else if (lv == literal(-1))
362 src.sel = ALU_SRC_M_1_INT;
363 else {
364 src.sel = ALU_SRC_LITERAL;
365 src.chan = g->literal_chan(lv);
366 src.value = lv;
367 }
368 break;
369 }
370 case VLK_KCACHE: {
371 cf_node *clause = static_cast<cf_node*>(g->parent);
372 assert(clause->is_alu_clause());
373 sel_chan k = translate_kcache(clause, v);
374
375 assert(k && "kcache translation failed");
376
377 src.sel = k.sel();
378 src.chan = k.chan();
379 break;
380 }
381 case VLK_PARAM:
382 case VLK_SPECIAL_CONST:
383 src.sel = v->select.sel();
384 src.chan = v->select.chan();
385 break;
386 default:
387 assert(!"unknown value kind");
388 break;
389 }
390 }
391
392 while (si < 3) {
393 a->bc.src[si++].sel = 0;
394 }
395 }
396
397 void bc_finalizer::copy_fetch_src(fetch_node &dst, fetch_node &src, unsigned arg_start)
398 {
399 int reg = -1;
400
401 for (unsigned chan = 0; chan < 4; ++chan) {
402
403 dst.bc.dst_sel[chan] = SEL_MASK;
404
405 unsigned sel = SEL_MASK;
406
407 value *v = src.src[arg_start + chan];
408
409 if (!v || v->is_undef()) {
410 sel = SEL_MASK;
411 } else if (v->is_const()) {
412 literal l = v->literal_value;
413 if (l == literal(0))
414 sel = SEL_0;
415 else if (l == literal(1.0f))
416 sel = SEL_1;
417 else {
418 sblog << "invalid fetch constant operand " << chan << " ";
419 dump::dump_op(&src);
420 sblog << "\n";
421 abort();
422 }
423
424 } else if (v->is_any_gpr()) {
425 unsigned vreg = v->gpr.sel();
426 unsigned vchan = v->gpr.chan();
427
428 if (reg == -1)
429 reg = vreg;
430 else if ((unsigned)reg != vreg) {
431 sblog << "invalid fetch source operand " << chan << " ";
432 dump::dump_op(&src);
433 sblog << "\n";
434 abort();
435 }
436
437 sel = vchan;
438
439 } else {
440 sblog << "invalid fetch source operand " << chan << " ";
441 dump::dump_op(&src);
442 sblog << "\n";
443 abort();
444 }
445
446 dst.bc.src_sel[chan] = sel;
447 }
448
449 if (reg >= 0)
450 update_ngpr(reg);
451
452 dst.bc.src_gpr = reg >= 0 ? reg : 0;
453 }
454
455 void bc_finalizer::emit_set_grad(fetch_node* f) {
456
457 assert(f->src.size() == 12);
458 unsigned ops[2] = { FETCH_OP_SET_GRADIENTS_V, FETCH_OP_SET_GRADIENTS_H };
459
460 unsigned arg_start = 0;
461
462 for (unsigned op = 0; op < 2; ++op) {
463 fetch_node *n = sh.create_fetch();
464 n->bc.set_op(ops[op]);
465
466 arg_start += 4;
467
468 copy_fetch_src(*n, *f, arg_start);
469
470 f->insert_before(n);
471 }
472
473 }
474
475 void bc_finalizer::emit_set_texture_offsets(fetch_node &f) {
476 assert(f.src.size() == 8);
477
478 fetch_node *n = sh.create_fetch();
479
480 n->bc.set_op(FETCH_OP_SET_TEXTURE_OFFSETS);
481
482 copy_fetch_src(*n, f, 4);
483
484 f.insert_before(n);
485 }
486
487 void bc_finalizer::finalize_fetch(fetch_node* f) {
488
489 int reg = -1;
490
491 // src
492
493 unsigned src_count = 4;
494
495 unsigned flags = f->bc.op_ptr->flags;
496
497 if (flags & FF_VTX) {
498 src_count = 1;
499 } else if (flags & FF_USEGRAD) {
500 emit_set_grad(f);
501 } else if (flags & FF_USE_TEXTURE_OFFSETS) {
502 emit_set_texture_offsets(*f);
503 }
504
505 for (unsigned chan = 0; chan < src_count; ++chan) {
506
507 unsigned sel = f->bc.src_sel[chan];
508
509 if (sel > SEL_W)
510 continue;
511
512 value *v = f->src[chan];
513
514 if (v->is_undef()) {
515 sel = SEL_MASK;
516 } else if (v->is_const()) {
517 literal l = v->literal_value;
518 if (l == literal(0))
519 sel = SEL_0;
520 else if (l == literal(1.0f))
521 sel = SEL_1;
522 else {
523 sblog << "invalid fetch constant operand " << chan << " ";
524 dump::dump_op(f);
525 sblog << "\n";
526 abort();
527 }
528
529 } else if (v->is_any_gpr()) {
530 unsigned vreg = v->gpr.sel();
531 unsigned vchan = v->gpr.chan();
532
533 if (reg == -1)
534 reg = vreg;
535 else if ((unsigned)reg != vreg) {
536 sblog << "invalid fetch source operand " << chan << " ";
537 dump::dump_op(f);
538 sblog << "\n";
539 abort();
540 }
541
542 sel = vchan;
543
544 } else {
545 sblog << "invalid fetch source operand " << chan << " ";
546 dump::dump_op(f);
547 sblog << "\n";
548 abort();
549 }
550
551 f->bc.src_sel[chan] = sel;
552 }
553
554 if (reg >= 0)
555 update_ngpr(reg);
556
557 f->bc.src_gpr = reg >= 0 ? reg : 0;
558
559 // dst
560
561 reg = -1;
562
563 unsigned dst_swz[4] = {SEL_MASK, SEL_MASK, SEL_MASK, SEL_MASK};
564
565 for (unsigned chan = 0; chan < 4; ++chan) {
566
567 unsigned sel = f->bc.dst_sel[chan];
568
569 if (sel == SEL_MASK)
570 continue;
571
572 value *v = f->dst[chan];
573 if (!v)
574 continue;
575
576 if (v->is_any_gpr()) {
577 unsigned vreg = v->gpr.sel();
578 unsigned vchan = v->gpr.chan();
579
580 if (reg == -1)
581 reg = vreg;
582 else if ((unsigned)reg != vreg) {
583 sblog << "invalid fetch dst operand " << chan << " ";
584 dump::dump_op(f);
585 sblog << "\n";
586 abort();
587 }
588
589 dst_swz[vchan] = sel;
590
591 } else {
592 sblog << "invalid fetch dst operand " << chan << " ";
593 dump::dump_op(f);
594 sblog << "\n";
595 abort();
596 }
597
598 }
599
600 for (unsigned i = 0; i < 4; ++i)
601 f->bc.dst_sel[i] = dst_swz[i];
602
603 assert(reg >= 0);
604
605 if (reg >= 0)
606 update_ngpr(reg);
607
608 f->bc.dst_gpr = reg >= 0 ? reg : 0;
609 }
610
611 void bc_finalizer::finalize_cf(cf_node* c) {
612
613 unsigned flags = c->bc.op_ptr->flags;
614
615 c->bc.end_of_program = 0;
616 last_cf = c;
617
618 if (flags & CF_EXP) {
619 c->bc.set_op(CF_OP_EXPORT);
620 last_export[c->bc.type] = c;
621
622 int reg = -1;
623
624 for (unsigned chan = 0; chan < 4; ++chan) {
625
626 unsigned sel = c->bc.sel[chan];
627
628 if (sel > SEL_W)
629 continue;
630
631 value *v = c->src[chan];
632
633 if (v->is_undef()) {
634 sel = SEL_MASK;
635 } else if (v->is_const()) {
636 literal l = v->literal_value;
637 if (l == literal(0))
638 sel = SEL_0;
639 else if (l == literal(1.0f))
640 sel = SEL_1;
641 else {
642 sblog << "invalid export constant operand " << chan << " ";
643 dump::dump_op(c);
644 sblog << "\n";
645 abort();
646 }
647
648 } else if (v->is_any_gpr()) {
649 unsigned vreg = v->gpr.sel();
650 unsigned vchan = v->gpr.chan();
651
652 if (reg == -1)
653 reg = vreg;
654 else if ((unsigned)reg != vreg) {
655 sblog << "invalid export source operand " << chan << " ";
656 dump::dump_op(c);
657 sblog << "\n";
658 abort();
659 }
660
661 sel = vchan;
662
663 } else {
664 sblog << "invalid export source operand " << chan << " ";
665 dump::dump_op(c);
666 sblog << "\n";
667 abort();
668 }
669
670 c->bc.sel[chan] = sel;
671 }
672
673 if (reg >= 0)
674 update_ngpr(reg);
675
676 c->bc.rw_gpr = reg >= 0 ? reg : 0;
677
678 } else if (flags & CF_MEM) {
679
680 int reg = -1;
681 unsigned mask = 0;
682
683 for (unsigned chan = 0; chan < 4; ++chan) {
684 value *v = c->src[chan];
685 if (!v || v->is_undef())
686 continue;
687
688 if (!v->is_any_gpr() || v->gpr.chan() != chan) {
689 sblog << "invalid source operand " << chan << " ";
690 dump::dump_op(c);
691 sblog << "\n";
692 abort();
693 }
694 unsigned vreg = v->gpr.sel();
695 if (reg == -1)
696 reg = vreg;
697 else if ((unsigned)reg != vreg) {
698 sblog << "invalid source operand " << chan << " ";
699 dump::dump_op(c);
700 sblog << "\n";
701 abort();
702 }
703
704 mask |= (1 << chan);
705 }
706
707 assert(reg >= 0 && mask);
708
709 if (reg >= 0)
710 update_ngpr(reg);
711
712 c->bc.rw_gpr = reg >= 0 ? reg : 0;
713 c->bc.comp_mask = mask;
714
715 if (((flags & CF_RAT) || (!(flags & CF_STRM))) && (c->bc.type & 1)) {
716
717 reg = -1;
718
719 for (unsigned chan = 0; chan < 4; ++chan) {
720 value *v = c->src[4 + chan];
721 if (!v || v->is_undef())
722 continue;
723
724 if (!v->is_any_gpr() || v->gpr.chan() != chan) {
725 sblog << "invalid source operand " << chan << " ";
726 dump::dump_op(c);
727 sblog << "\n";
728 abort();
729 }
730 unsigned vreg = v->gpr.sel();
731 if (reg == -1)
732 reg = vreg;
733 else if ((unsigned)reg != vreg) {
734 sblog << "invalid source operand " << chan << " ";
735 dump::dump_op(c);
736 sblog << "\n";
737 abort();
738 }
739 }
740
741 assert(reg >= 0);
742
743 if (reg >= 0)
744 update_ngpr(reg);
745
746 c->bc.index_gpr = reg >= 0 ? reg : 0;
747 }
748 } else if (flags & CF_CALL) {
749 update_nstack(c->get_parent_region(), ctx.wavefront_size == 16 ? 2 : 1);
750 }
751 }
752
753 sel_chan bc_finalizer::translate_kcache(cf_node* alu, value* v) {
754 unsigned sel = v->select.sel();
755 unsigned bank = sel >> 12;
756 unsigned chan = v->select.chan();
757 static const unsigned kc_base[] = {128, 160, 256, 288};
758
759 sel &= 4095;
760
761 unsigned line = sel >> 4;
762
763 for (unsigned k = 0; k < 4; ++k) {
764 bc_kcache &kc = alu->bc.kc[k];
765
766 if (kc.mode == KC_LOCK_NONE)
767 break;
768
769 if (kc.bank == bank && (kc.addr == line ||
770 (kc.mode == KC_LOCK_2 && kc.addr + 1 == line))) {
771
772 sel = kc_base[k] + (sel - (kc.addr << 4));
773
774 return sel_chan(sel, chan);
775 }
776 }
777
778 assert(!"kcache translation error");
779 return 0;
780 }
781
782 void bc_finalizer::update_ngpr(unsigned gpr) {
783 if (gpr < MAX_GPR - ctx.alu_temp_gprs && gpr >= ngpr)
784 ngpr = gpr + 1;
785 }
786
787 unsigned bc_finalizer::get_stack_depth(node *n, unsigned &loops,
788 unsigned &ifs, unsigned add) {
789 unsigned stack_elements = add;
790 bool has_non_wqm_push = (add != 0);
791 region_node *r = n->is_region() ?
792 static_cast<region_node*>(n) : n->get_parent_region();
793
794 loops = 0;
795 ifs = 0;
796
797 while (r) {
798 if (r->is_loop()) {
799 ++loops;
800 } else {
801 ++ifs;
802 has_non_wqm_push = true;
803 }
804 r = r->get_parent_region();
805 }
806 stack_elements += (loops * ctx.stack_entry_size) + ifs;
807
808 // reserve additional elements in some cases
809 switch (ctx.hw_class) {
810 case HW_CLASS_R600:
811 case HW_CLASS_R700:
812 // If any non-WQM push is invoked, 2 elements should be reserved.
813 if (has_non_wqm_push)
814 stack_elements += 2;
815 break;
816 case HW_CLASS_CAYMAN:
817 // If any stack operation is invoked, 2 elements should be reserved
818 if (stack_elements)
819 stack_elements += 2;
820 break;
821 case HW_CLASS_EVERGREEN:
822 // According to the docs we need to reserve 1 element for each of the
823 // following cases:
824 // 1) non-WQM push is used with WQM/LOOP frames on stack
825 // 2) ALU_ELSE_AFTER is used at the point of max stack usage
826 // NOTE:
827 // It was found that the conditions above are not sufficient, there are
828 // other cases where we also need to reserve stack space, that's why
829 // we always reserve 1 stack element if we have non-WQM push on stack.
830 // Condition 2 is ignored for now because we don't use this instruction.
831 if (has_non_wqm_push)
832 ++stack_elements;
833 break;
834 case HW_CLASS_UNKNOWN:
835 assert(0);
836 }
837 return stack_elements;
838 }
839
840 void bc_finalizer::update_nstack(region_node* r, unsigned add) {
841 unsigned loops = 0;
842 unsigned ifs = 0;
843 unsigned elems = r ? get_stack_depth(r, loops, ifs, add) : add;
844
845 // XXX all chips expect this value to be computed using 4 as entry size,
846 // not the real entry size
847 unsigned stack_entries = (elems + 3) >> 2;
848
849 if (nstack < stack_entries)
850 nstack = stack_entries;
851 }
852
853 void bc_finalizer::cf_peephole() {
854 if (ctx.stack_workaround_8xx || ctx.stack_workaround_9xx) {
855 for (node_iterator N, I = sh.root->begin(), E = sh.root->end(); I != E;
856 I = N) {
857 N = I; ++N;
858 cf_node *c = static_cast<cf_node*>(*I);
859
860 if (c->bc.op == CF_OP_ALU_PUSH_BEFORE &&
861 (c->flags & NF_ALU_STACK_WORKAROUND)) {
862 cf_node *push = sh.create_cf(CF_OP_PUSH);
863 c->insert_before(push);
864 push->jump(c);
865 c->bc.set_op(CF_OP_ALU);
866 }
867 }
868 }
869
870 for (node_iterator N, I = sh.root->begin(), E = sh.root->end(); I != E;
871 I = N) {
872 N = I; ++N;
873
874 cf_node *c = static_cast<cf_node*>(*I);
875
876 if (c->jump_after_target) {
877 c->jump_target = static_cast<cf_node*>(c->jump_target->next);
878 c->jump_after_target = false;
879 }
880
881 if (c->is_cf_op(CF_OP_POP)) {
882 node *p = c->prev;
883 if (p->is_alu_clause()) {
884 cf_node *a = static_cast<cf_node*>(p);
885
886 if (a->bc.op == CF_OP_ALU) {
887 a->bc.set_op(CF_OP_ALU_POP_AFTER);
888 c->remove();
889 }
890 }
891 } else if (c->is_cf_op(CF_OP_JUMP) && c->jump_target == c->next) {
892 // if JUMP is immediately followed by its jump target,
893 // then JUMP is useless and we can eliminate it
894 c->remove();
895 }
896 }
897 }
898
899 } // namespace r600_sb