r600/sb: update last_cf if alu is the last clause
[mesa.git] / src / gallium / drivers / r600 / sb / sb_bc_finalize.cpp
1 /*
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Vadim Girlin
25 */
26
27 #define FBC_DEBUG 0
28
29 #if FBC_DEBUG
30 #define FBC_DUMP(q) do { q } while (0)
31 #else
32 #define FBC_DUMP(q)
33 #endif
34
35 #include "sb_bc.h"
36 #include "sb_shader.h"
37 #include "sb_pass.h"
38
39 namespace r600_sb {
40
41 void bc_finalizer::insert_rv6xx_load_ar_workaround(alu_group_node *b4) {
42
43 alu_group_node *g = sh.create_alu_group();
44 alu_node *a = sh.create_alu();
45
46 a->bc.set_op(ALU_OP0_NOP);
47 a->bc.last = 1;
48
49 g->push_back(a);
50 b4->insert_before(g);
51 }
52
53 int bc_finalizer::run() {
54
55 run_on(sh.root);
56
57 regions_vec &rv = sh.get_regions();
58 for (regions_vec::reverse_iterator I = rv.rbegin(), E = rv.rend(); I != E;
59 ++I) {
60 region_node *r = *I;
61
62 assert(r);
63
64 bool loop = r->is_loop();
65
66 if (loop)
67 finalize_loop(r);
68 else
69 finalize_if(r);
70
71 r->expand();
72 }
73
74 cf_peephole();
75
76 // workaround for some problems on r6xx/7xx
77 // add ALU NOP to each vertex shader
78 if (!ctx.is_egcm() && (sh.target == TARGET_VS || sh.target == TARGET_ES)) {
79 cf_node *c = sh.create_clause(NST_ALU_CLAUSE);
80
81 alu_group_node *g = sh.create_alu_group();
82
83 alu_node *a = sh.create_alu();
84 a->bc.set_op(ALU_OP0_NOP);
85 a->bc.last = 1;
86
87 g->push_back(a);
88 c->push_back(g);
89
90 sh.root->push_back(c);
91
92 c = sh.create_cf(CF_OP_NOP);
93 sh.root->push_back(c);
94
95 last_cf = c;
96 }
97
98 if (!ctx.is_cayman() && last_cf->bc.op_ptr->flags & CF_ALU) {
99 last_cf = sh.create_cf(CF_OP_NOP);
100 sh.root->push_back(last_cf);
101 }
102
103 if (ctx.is_cayman()) {
104 if (!last_cf) {
105 cf_node *c = sh.create_cf(CF_OP_CF_END);
106 sh.root->push_back(c);
107 } else
108 last_cf->insert_after(sh.create_cf(CF_OP_CF_END));
109 } else
110 last_cf->bc.end_of_program = 1;
111
112 for (unsigned t = EXP_PIXEL; t < EXP_TYPE_COUNT; ++t) {
113 cf_node *le = last_export[t];
114 if (le)
115 le->bc.set_op(CF_OP_EXPORT_DONE);
116 }
117
118 sh.ngpr = ngpr;
119 sh.nstack = nstack;
120 return 0;
121 }
122
123 void bc_finalizer::finalize_loop(region_node* r) {
124
125 update_nstack(r);
126
127 cf_node *loop_start = sh.create_cf(CF_OP_LOOP_START_DX10);
128 cf_node *loop_end = sh.create_cf(CF_OP_LOOP_END);
129
130 // Update last_cf, but don't overwrite it if it's outside the current loop nest since
131 // it may point to a cf that is later in program order.
132 // The single parent level check is sufficient since finalize_loop() is processed in
133 // reverse order from innermost to outermost loop nest level.
134 if (!last_cf || last_cf->get_parent_region() == r) {
135 last_cf = loop_end;
136 }
137
138 loop_start->jump_after(loop_end);
139 loop_end->jump_after(loop_start);
140
141 for (depart_vec::iterator I = r->departs.begin(), E = r->departs.end();
142 I != E; ++I) {
143 depart_node *dep = *I;
144 cf_node *loop_break = sh.create_cf(CF_OP_LOOP_BREAK);
145 loop_break->jump(loop_end);
146 dep->push_back(loop_break);
147 dep->expand();
148 }
149
150 // FIXME produces unnecessary LOOP_CONTINUE
151 for (repeat_vec::iterator I = r->repeats.begin(), E = r->repeats.end();
152 I != E; ++I) {
153 repeat_node *rep = *I;
154 if (!(rep->parent == r && rep->prev == NULL)) {
155 cf_node *loop_cont = sh.create_cf(CF_OP_LOOP_CONTINUE);
156 loop_cont->jump(loop_end);
157 rep->push_back(loop_cont);
158 }
159 rep->expand();
160 }
161
162 r->push_front(loop_start);
163 r->push_back(loop_end);
164 }
165
166 void bc_finalizer::finalize_if(region_node* r) {
167
168 update_nstack(r);
169
170 // expecting the following control flow structure here:
171 // - region
172 // {
173 // - depart/repeat 1 (it may be depart/repeat for some outer region)
174 // {
175 // - if
176 // {
177 // - depart/repeat 2 (possibly for outer region)
178 // {
179 // - some optional code
180 // }
181 // }
182 // - optional <else> code> ...
183 // }
184 // }
185
186 container_node *repdep1 = static_cast<container_node*>(r->first);
187 assert(repdep1->is_depart() || repdep1->is_repeat());
188
189 if_node *n_if = static_cast<if_node*>(repdep1->first);
190
191 if (n_if) {
192
193
194 assert(n_if->is_if());
195
196 container_node *repdep2 = static_cast<container_node*>(n_if->first);
197 assert(repdep2->is_depart() || repdep2->is_repeat());
198
199 cf_node *if_jump = sh.create_cf(CF_OP_JUMP);
200 cf_node *if_pop = sh.create_cf(CF_OP_POP);
201
202 if (!last_cf || last_cf->get_parent_region() == r) {
203 last_cf = if_pop;
204 }
205 if_pop->bc.pop_count = 1;
206 if_pop->jump_after(if_pop);
207
208 r->push_front(if_jump);
209 r->push_back(if_pop);
210
211 bool has_else = n_if->next;
212
213 if (has_else) {
214 cf_node *nelse = sh.create_cf(CF_OP_ELSE);
215 n_if->insert_after(nelse);
216 if_jump->jump(nelse);
217 nelse->jump_after(if_pop);
218 nelse->bc.pop_count = 1;
219
220 } else {
221 if_jump->jump_after(if_pop);
222 if_jump->bc.pop_count = 1;
223 }
224
225 n_if->expand();
226 }
227
228 for (depart_vec::iterator I = r->departs.begin(), E = r->departs.end();
229 I != E; ++I) {
230 (*I)->expand();
231 }
232 r->departs.clear();
233 assert(r->repeats.empty());
234 }
235
236 void bc_finalizer::run_on(container_node* c) {
237 node *prev_node = NULL;
238 for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
239 node *n = *I;
240
241 if (n->is_alu_group()) {
242 finalize_alu_group(static_cast<alu_group_node*>(n), prev_node);
243 } else {
244 if (n->is_alu_clause()) {
245 cf_node *c = static_cast<cf_node*>(n);
246
247 if (c->bc.op == CF_OP_ALU_PUSH_BEFORE && ctx.is_egcm()) {
248 if (ctx.stack_workaround_8xx) {
249 region_node *r = c->get_parent_region();
250 if (r) {
251 unsigned ifs, loops;
252 unsigned elems = get_stack_depth(r, loops, ifs);
253 unsigned dmod1 = elems % ctx.stack_entry_size;
254 unsigned dmod2 = (elems + 1) % ctx.stack_entry_size;
255
256 if (elems && (!dmod1 || !dmod2))
257 c->flags |= NF_ALU_STACK_WORKAROUND;
258 }
259 } else if (ctx.stack_workaround_9xx) {
260 region_node *r = c->get_parent_region();
261 if (r) {
262 unsigned ifs, loops;
263 get_stack_depth(r, loops, ifs);
264 if (loops >= 2)
265 c->flags |= NF_ALU_STACK_WORKAROUND;
266 }
267 }
268 }
269 last_cf = c;
270 } else if (n->is_fetch_inst()) {
271 finalize_fetch(static_cast<fetch_node*>(n));
272 } else if (n->is_cf_inst()) {
273 finalize_cf(static_cast<cf_node*>(n));
274 }
275 if (n->is_container())
276 run_on(static_cast<container_node*>(n));
277 }
278 prev_node = n;
279 }
280 }
281
282 void bc_finalizer::finalize_alu_group(alu_group_node* g, node *prev_node) {
283
284 alu_node *last = NULL;
285 alu_group_node *prev_g = NULL;
286 bool add_nop = false;
287 if (prev_node && prev_node->is_alu_group()) {
288 prev_g = static_cast<alu_group_node*>(prev_node);
289 }
290
291 for (node_iterator I = g->begin(), E = g->end(); I != E; ++I) {
292 alu_node *n = static_cast<alu_node*>(*I);
293 unsigned slot = n->bc.slot;
294 value *d = n->dst.empty() ? NULL : n->dst[0];
295
296 if (d && d->is_special_reg()) {
297 assert((n->bc.op_ptr->flags & AF_MOVA) || d->is_geometry_emit());
298 d = NULL;
299 }
300
301 sel_chan fdst = d ? d->get_final_gpr() : sel_chan(0, 0);
302
303 if (d) {
304 assert(fdst.chan() == slot || slot == SLOT_TRANS);
305 }
306
307 if (!(n->bc.op_ptr->flags & AF_MOVA && ctx.is_cayman()))
308 n->bc.dst_gpr = fdst.sel();
309 n->bc.dst_chan = d ? fdst.chan() : slot < SLOT_TRANS ? slot : 0;
310
311
312 if (d && d->is_rel() && d->rel && !d->rel->is_const()) {
313 n->bc.dst_rel = 1;
314 update_ngpr(d->array->gpr.sel() + d->array->array_size -1);
315 } else {
316 n->bc.dst_rel = 0;
317 }
318
319 n->bc.write_mask = d != NULL;
320 n->bc.last = 0;
321
322 if (n->bc.op_ptr->flags & AF_PRED) {
323 n->bc.update_pred = (n->dst[1] != NULL);
324 n->bc.update_exec_mask = (n->dst[2] != NULL);
325 }
326
327 // FIXME handle predication here
328 n->bc.pred_sel = PRED_SEL_OFF;
329
330 update_ngpr(n->bc.dst_gpr);
331
332 add_nop |= finalize_alu_src(g, n, prev_g);
333
334 last = n;
335 }
336
337 if (add_nop) {
338 if (sh.get_ctx().r6xx_gpr_index_workaround) {
339 insert_rv6xx_load_ar_workaround(g);
340 }
341 }
342 last->bc.last = 1;
343 }
344
345 bool bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a, alu_group_node *prev) {
346 vvec &sv = a->src;
347 bool add_nop = false;
348 FBC_DUMP(
349 sblog << "finalize_alu_src: ";
350 dump::dump_op(a);
351 sblog << "\n";
352 );
353
354 unsigned si = 0;
355
356 for (vvec::iterator I = sv.begin(), E = sv.end(); I != E; ++I, ++si) {
357 value *v = *I;
358 assert(v);
359
360 bc_alu_src &src = a->bc.src[si];
361 sel_chan sc;
362 src.rel = 0;
363
364 sel_chan gpr;
365
366 switch (v->kind) {
367 case VLK_REL_REG:
368 sc = v->get_final_gpr();
369 src.sel = sc.sel();
370 src.chan = sc.chan();
371 if (!v->rel->is_const()) {
372 src.rel = 1;
373 update_ngpr(v->array->gpr.sel() + v->array->array_size -1);
374 if (prev && !add_nop) {
375 for (node_iterator pI = prev->begin(), pE = prev->end(); pI != pE; ++pI) {
376 alu_node *pn = static_cast<alu_node*>(*pI);
377 if (pn->bc.dst_gpr == src.sel) {
378 add_nop = true;
379 break;
380 }
381 }
382 }
383 } else
384 src.rel = 0;
385
386 break;
387 case VLK_REG:
388 gpr = v->get_final_gpr();
389 src.sel = gpr.sel();
390 src.chan = gpr.chan();
391 update_ngpr(src.sel);
392 break;
393 case VLK_TEMP:
394 src.sel = v->gpr.sel();
395 src.chan = v->gpr.chan();
396 update_ngpr(src.sel);
397 break;
398 case VLK_UNDEF:
399 case VLK_CONST: {
400 literal lv = v->literal_value;
401 src.chan = 0;
402
403 if (lv == literal(0))
404 src.sel = ALU_SRC_0;
405 else if (lv == literal(0.5f))
406 src.sel = ALU_SRC_0_5;
407 else if (lv == literal(1.0f))
408 src.sel = ALU_SRC_1;
409 else if (lv == literal(1))
410 src.sel = ALU_SRC_1_INT;
411 else if (lv == literal(-1))
412 src.sel = ALU_SRC_M_1_INT;
413 else {
414 src.sel = ALU_SRC_LITERAL;
415 src.chan = g->literal_chan(lv);
416 src.value = lv;
417 }
418 break;
419 }
420 case VLK_KCACHE: {
421 cf_node *clause = static_cast<cf_node*>(g->parent);
422 assert(clause->is_alu_clause());
423 sel_chan k = translate_kcache(clause, v);
424
425 assert(k && "kcache translation failed");
426
427 src.sel = k.sel();
428 src.chan = k.chan();
429 break;
430 }
431 case VLK_PARAM:
432 case VLK_SPECIAL_CONST:
433 src.sel = v->select.sel();
434 src.chan = v->select.chan();
435 break;
436 default:
437 assert(!"unknown value kind");
438 break;
439 }
440 if (prev && !add_nop) {
441 for (node_iterator pI = prev->begin(), pE = prev->end(); pI != pE; ++pI) {
442 alu_node *pn = static_cast<alu_node*>(*pI);
443 if (pn->bc.dst_rel) {
444 if (pn->bc.dst_gpr == src.sel) {
445 add_nop = true;
446 break;
447 }
448 }
449 }
450 }
451 }
452
453 while (si < 3) {
454 a->bc.src[si++].sel = 0;
455 }
456 return add_nop;
457 }
458
459 void bc_finalizer::copy_fetch_src(fetch_node &dst, fetch_node &src, unsigned arg_start)
460 {
461 int reg = -1;
462
463 for (unsigned chan = 0; chan < 4; ++chan) {
464
465 dst.bc.dst_sel[chan] = SEL_MASK;
466
467 unsigned sel = SEL_MASK;
468
469 value *v = src.src[arg_start + chan];
470
471 if (!v || v->is_undef()) {
472 sel = SEL_MASK;
473 } else if (v->is_const()) {
474 literal l = v->literal_value;
475 if (l == literal(0))
476 sel = SEL_0;
477 else if (l == literal(1.0f))
478 sel = SEL_1;
479 else {
480 sblog << "invalid fetch constant operand " << chan << " ";
481 dump::dump_op(&src);
482 sblog << "\n";
483 abort();
484 }
485
486 } else if (v->is_any_gpr()) {
487 unsigned vreg = v->gpr.sel();
488 unsigned vchan = v->gpr.chan();
489
490 if (reg == -1)
491 reg = vreg;
492 else if ((unsigned)reg != vreg) {
493 sblog << "invalid fetch source operand " << chan << " ";
494 dump::dump_op(&src);
495 sblog << "\n";
496 abort();
497 }
498
499 sel = vchan;
500
501 } else {
502 sblog << "invalid fetch source operand " << chan << " ";
503 dump::dump_op(&src);
504 sblog << "\n";
505 abort();
506 }
507
508 dst.bc.src_sel[chan] = sel;
509 }
510
511 if (reg >= 0)
512 update_ngpr(reg);
513
514 dst.bc.src_gpr = reg >= 0 ? reg : 0;
515 }
516
517 void bc_finalizer::emit_set_grad(fetch_node* f) {
518
519 assert(f->src.size() == 12 || f->src.size() == 13);
520 unsigned ops[2] = { FETCH_OP_SET_GRADIENTS_V, FETCH_OP_SET_GRADIENTS_H };
521
522 unsigned arg_start = 0;
523
524 for (unsigned op = 0; op < 2; ++op) {
525 fetch_node *n = sh.create_fetch();
526 n->bc.set_op(ops[op]);
527
528 arg_start += 4;
529
530 copy_fetch_src(*n, *f, arg_start);
531
532 f->insert_before(n);
533 }
534
535 }
536
537 void bc_finalizer::emit_set_texture_offsets(fetch_node &f) {
538 assert(f.src.size() == 8);
539
540 fetch_node *n = sh.create_fetch();
541
542 n->bc.set_op(FETCH_OP_SET_TEXTURE_OFFSETS);
543
544 copy_fetch_src(*n, f, 4);
545
546 f.insert_before(n);
547 }
548
549 void bc_finalizer::finalize_fetch(fetch_node* f) {
550
551 int reg = -1;
552
553 // src
554
555 unsigned src_count = 4;
556
557 unsigned flags = f->bc.op_ptr->flags;
558
559 if (flags & FF_VTX) {
560 src_count = 1;
561 } else if (flags & FF_GDS) {
562 src_count = 2;
563 } else if (flags & FF_USEGRAD) {
564 emit_set_grad(f);
565 } else if (flags & FF_USE_TEXTURE_OFFSETS) {
566 emit_set_texture_offsets(*f);
567 }
568
569 for (unsigned chan = 0; chan < src_count; ++chan) {
570
571 unsigned sel = f->bc.src_sel[chan];
572
573 if (sel > SEL_W)
574 continue;
575
576 value *v = f->src[chan];
577
578 if (v->is_undef()) {
579 sel = SEL_MASK;
580 } else if (v->is_const()) {
581 literal l = v->literal_value;
582 if (l == literal(0))
583 sel = SEL_0;
584 else if (l == literal(1.0f))
585 sel = SEL_1;
586 else {
587 sblog << "invalid fetch constant operand " << chan << " ";
588 dump::dump_op(f);
589 sblog << "\n";
590 abort();
591 }
592
593 } else if (v->is_any_gpr()) {
594 unsigned vreg = v->gpr.sel();
595 unsigned vchan = v->gpr.chan();
596
597 if (reg == -1)
598 reg = vreg;
599 else if ((unsigned)reg != vreg) {
600 sblog << "invalid fetch source operand " << chan << " ";
601 dump::dump_op(f);
602 sblog << "\n";
603 abort();
604 }
605
606 sel = vchan;
607
608 } else {
609 sblog << "invalid fetch source operand " << chan << " ";
610 dump::dump_op(f);
611 sblog << "\n";
612 abort();
613 }
614
615 f->bc.src_sel[chan] = sel;
616 }
617
618 if (reg >= 0)
619 update_ngpr(reg);
620
621 f->bc.src_gpr = reg >= 0 ? reg : 0;
622
623 // dst
624
625 reg = -1;
626
627 unsigned dst_swz[4] = {SEL_MASK, SEL_MASK, SEL_MASK, SEL_MASK};
628
629 for (unsigned chan = 0; chan < 4; ++chan) {
630
631 unsigned sel = f->bc.dst_sel[chan];
632
633 if (sel == SEL_MASK)
634 continue;
635
636 value *v = f->dst[chan];
637 if (!v)
638 continue;
639
640 if (v->is_any_gpr()) {
641 unsigned vreg = v->gpr.sel();
642 unsigned vchan = v->gpr.chan();
643
644 if (reg == -1)
645 reg = vreg;
646 else if ((unsigned)reg != vreg) {
647 sblog << "invalid fetch dst operand " << chan << " ";
648 dump::dump_op(f);
649 sblog << "\n";
650 abort();
651 }
652
653 dst_swz[vchan] = sel;
654
655 } else {
656 sblog << "invalid fetch dst operand " << chan << " ";
657 dump::dump_op(f);
658 sblog << "\n";
659 abort();
660 }
661
662 }
663
664 for (unsigned i = 0; i < 4; ++i)
665 f->bc.dst_sel[i] = dst_swz[i];
666
667 if ((flags & FF_GDS) && reg == -1) {
668 f->bc.dst_sel[0] = SEL_MASK;
669 f->bc.dst_gpr = 0;
670 return ;
671 }
672 assert(reg >= 0);
673
674 if (reg >= 0)
675 update_ngpr(reg);
676
677 f->bc.dst_gpr = reg >= 0 ? reg : 0;
678 }
679
680 void bc_finalizer::finalize_cf(cf_node* c) {
681
682 unsigned flags = c->bc.op_ptr->flags;
683
684 c->bc.end_of_program = 0;
685 last_cf = c;
686
687 if (flags & CF_EXP) {
688 c->bc.set_op(CF_OP_EXPORT);
689 last_export[c->bc.type] = c;
690
691 int reg = -1;
692
693 for (unsigned chan = 0; chan < 4; ++chan) {
694
695 unsigned sel = c->bc.sel[chan];
696
697 if (sel > SEL_W)
698 continue;
699
700 value *v = c->src[chan];
701
702 if (v->is_undef()) {
703 sel = SEL_MASK;
704 } else if (v->is_const()) {
705 literal l = v->literal_value;
706 if (l == literal(0))
707 sel = SEL_0;
708 else if (l == literal(1.0f))
709 sel = SEL_1;
710 else {
711 sblog << "invalid export constant operand " << chan << " ";
712 dump::dump_op(c);
713 sblog << "\n";
714 abort();
715 }
716
717 } else if (v->is_any_gpr()) {
718 unsigned vreg = v->gpr.sel();
719 unsigned vchan = v->gpr.chan();
720
721 if (reg == -1)
722 reg = vreg;
723 else if ((unsigned)reg != vreg) {
724 sblog << "invalid export source operand " << chan << " ";
725 dump::dump_op(c);
726 sblog << "\n";
727 abort();
728 }
729
730 sel = vchan;
731
732 } else {
733 sblog << "invalid export source operand " << chan << " ";
734 dump::dump_op(c);
735 sblog << "\n";
736 abort();
737 }
738
739 c->bc.sel[chan] = sel;
740 }
741
742 if (reg >= 0)
743 update_ngpr(reg);
744
745 c->bc.rw_gpr = reg >= 0 ? reg : 0;
746
747 } else if (flags & CF_MEM) {
748
749 int reg = -1;
750 unsigned mask = 0;
751
752 for (unsigned chan = 0; chan < 4; ++chan) {
753 value *v = c->src[chan];
754 if (!v || v->is_undef())
755 continue;
756
757 if (!v->is_any_gpr() || v->gpr.chan() != chan) {
758 sblog << "invalid source operand " << chan << " ";
759 dump::dump_op(c);
760 sblog << "\n";
761 abort();
762 }
763 unsigned vreg = v->gpr.sel();
764 if (reg == -1)
765 reg = vreg;
766 else if ((unsigned)reg != vreg) {
767 sblog << "invalid source operand " << chan << " ";
768 dump::dump_op(c);
769 sblog << "\n";
770 abort();
771 }
772
773 mask |= (1 << chan);
774 }
775
776 if (reg >= 0)
777 update_ngpr(reg);
778
779 c->bc.rw_gpr = reg >= 0 ? reg : 0;
780 c->bc.comp_mask = mask;
781
782 if (((flags & CF_RAT) || (!(flags & CF_STRM))) && (c->bc.type & 1)) {
783
784 reg = -1;
785
786 for (unsigned chan = 0; chan < 4; ++chan) {
787 value *v = c->src[4 + chan];
788 if (!v || v->is_undef())
789 continue;
790
791 if (!v->is_any_gpr() || v->gpr.chan() != chan) {
792 sblog << "invalid source operand " << chan << " ";
793 dump::dump_op(c);
794 sblog << "\n";
795 abort();
796 }
797 unsigned vreg = v->gpr.sel();
798 if (reg == -1)
799 reg = vreg;
800 else if ((unsigned)reg != vreg) {
801 sblog << "invalid source operand " << chan << " ";
802 dump::dump_op(c);
803 sblog << "\n";
804 abort();
805 }
806 }
807
808 assert(reg >= 0);
809
810 if (reg >= 0)
811 update_ngpr(reg);
812
813 c->bc.index_gpr = reg >= 0 ? reg : 0;
814 }
815 } else if (flags & CF_CALL) {
816 update_nstack(c->get_parent_region(), ctx.wavefront_size == 16 ? 2 : 1);
817 }
818 }
819
820 sel_chan bc_finalizer::translate_kcache(cf_node* alu, value* v) {
821 unsigned sel = v->select.kcache_sel();
822 unsigned bank = v->select.kcache_bank();
823 unsigned chan = v->select.chan();
824 static const unsigned kc_base[] = {128, 160, 256, 288};
825
826 sel &= 4095;
827
828 unsigned line = sel >> 4;
829
830 for (unsigned k = 0; k < 4; ++k) {
831 bc_kcache &kc = alu->bc.kc[k];
832
833 if (kc.mode == KC_LOCK_NONE)
834 break;
835
836 if (kc.bank == bank && (kc.addr == line ||
837 (kc.mode == KC_LOCK_2 && kc.addr + 1 == line))) {
838
839 sel = kc_base[k] + (sel - (kc.addr << 4));
840
841 return sel_chan(sel, chan);
842 }
843 }
844
845 assert(!"kcache translation error");
846 return 0;
847 }
848
849 void bc_finalizer::update_ngpr(unsigned gpr) {
850 if (gpr < MAX_GPR - ctx.alu_temp_gprs && gpr >= ngpr)
851 ngpr = gpr + 1;
852 }
853
854 unsigned bc_finalizer::get_stack_depth(node *n, unsigned &loops,
855 unsigned &ifs, unsigned add) {
856 unsigned stack_elements = add;
857 bool has_non_wqm_push = (add != 0);
858 region_node *r = n->is_region() ?
859 static_cast<region_node*>(n) : n->get_parent_region();
860
861 loops = 0;
862 ifs = 0;
863
864 while (r) {
865 if (r->is_loop()) {
866 ++loops;
867 } else {
868 ++ifs;
869 has_non_wqm_push = true;
870 }
871 r = r->get_parent_region();
872 }
873 stack_elements += (loops * ctx.stack_entry_size) + ifs;
874
875 // reserve additional elements in some cases
876 switch (ctx.hw_class) {
877 case HW_CLASS_R600:
878 case HW_CLASS_R700:
879 // If any non-WQM push is invoked, 2 elements should be reserved.
880 if (has_non_wqm_push)
881 stack_elements += 2;
882 break;
883 case HW_CLASS_CAYMAN:
884 // If any stack operation is invoked, 2 elements should be reserved
885 if (stack_elements)
886 stack_elements += 2;
887 break;
888 case HW_CLASS_EVERGREEN:
889 // According to the docs we need to reserve 1 element for each of the
890 // following cases:
891 // 1) non-WQM push is used with WQM/LOOP frames on stack
892 // 2) ALU_ELSE_AFTER is used at the point of max stack usage
893 // NOTE:
894 // It was found that the conditions above are not sufficient, there are
895 // other cases where we also need to reserve stack space, that's why
896 // we always reserve 1 stack element if we have non-WQM push on stack.
897 // Condition 2 is ignored for now because we don't use this instruction.
898 if (has_non_wqm_push)
899 ++stack_elements;
900 break;
901 case HW_CLASS_UNKNOWN:
902 assert(0);
903 }
904 return stack_elements;
905 }
906
907 void bc_finalizer::update_nstack(region_node* r, unsigned add) {
908 unsigned loops = 0;
909 unsigned ifs = 0;
910 unsigned elems = r ? get_stack_depth(r, loops, ifs, add) : add;
911
912 // XXX all chips expect this value to be computed using 4 as entry size,
913 // not the real entry size
914 unsigned stack_entries = (elems + 3) >> 2;
915
916 if (nstack < stack_entries)
917 nstack = stack_entries;
918 }
919
920 void bc_finalizer::cf_peephole() {
921 if (ctx.stack_workaround_8xx || ctx.stack_workaround_9xx) {
922 for (node_iterator N, I = sh.root->begin(), E = sh.root->end(); I != E;
923 I = N) {
924 N = I; ++N;
925 cf_node *c = static_cast<cf_node*>(*I);
926
927 if (c->bc.op == CF_OP_ALU_PUSH_BEFORE &&
928 (c->flags & NF_ALU_STACK_WORKAROUND)) {
929 cf_node *push = sh.create_cf(CF_OP_PUSH);
930 c->insert_before(push);
931 push->jump(c);
932 c->bc.set_op(CF_OP_ALU);
933 }
934 }
935 }
936
937 for (node_iterator N, I = sh.root->begin(), E = sh.root->end(); I != E;
938 I = N) {
939 N = I; ++N;
940
941 cf_node *c = static_cast<cf_node*>(*I);
942
943 if (c->jump_after_target) {
944 if (c->jump_target->next == NULL) {
945 c->jump_target->insert_after(sh.create_cf(CF_OP_NOP));
946 if (last_cf == c->jump_target)
947 last_cf = static_cast<cf_node*>(c->jump_target->next);
948 }
949 c->jump_target = static_cast<cf_node*>(c->jump_target->next);
950 c->jump_after_target = false;
951 }
952
953 if (c->is_cf_op(CF_OP_POP)) {
954 node *p = c->prev;
955 if (p->is_alu_clause()) {
956 cf_node *a = static_cast<cf_node*>(p);
957
958 if (a->bc.op == CF_OP_ALU) {
959 a->bc.set_op(CF_OP_ALU_POP_AFTER);
960 c->remove();
961 }
962 }
963 } else if (c->is_cf_op(CF_OP_JUMP) && c->jump_target == c->next) {
964 // if JUMP is immediately followed by its jump target,
965 // then JUMP is useless and we can eliminate it
966 c->remove();
967 }
968 }
969 }
970
971 } // namespace r600_sb