r600g/sb: Support gs5 sampler indexing (v2)
[mesa.git] / src / gallium / drivers / r600 / sb / sb_bc_finalize.cpp
1 /*
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Vadim Girlin
25 */
26
27 #define FBC_DEBUG 0
28
29 #if FBC_DEBUG
30 #define FBC_DUMP(q) do { q } while (0)
31 #else
32 #define FBC_DUMP(q)
33 #endif
34
35 #include "sb_bc.h"
36 #include "sb_shader.h"
37 #include "sb_pass.h"
38
39 namespace r600_sb {
40
41 void bc_finalizer::insert_rv6xx_load_ar_workaround(alu_group_node *b4) {
42
43 alu_group_node *g = sh.create_alu_group();
44 alu_node *a = sh.create_alu();
45
46 a->bc.set_op(ALU_OP0_NOP);
47 a->bc.last = 1;
48
49 g->push_back(a);
50 b4->insert_before(g);
51 }
52
53 int bc_finalizer::run() {
54
55 run_on(sh.root);
56
57 regions_vec &rv = sh.get_regions();
58 for (regions_vec::reverse_iterator I = rv.rbegin(), E = rv.rend(); I != E;
59 ++I) {
60 region_node *r = *I;
61
62 assert(r);
63
64 bool loop = r->is_loop();
65
66 if (loop)
67 finalize_loop(r);
68 else
69 finalize_if(r);
70
71 r->expand();
72 }
73
74 cf_peephole();
75
76 // workaround for some problems on r6xx/7xx
77 // add ALU NOP to each vertex shader
78 if (!ctx.is_egcm() && (sh.target == TARGET_VS || sh.target == TARGET_ES)) {
79 cf_node *c = sh.create_clause(NST_ALU_CLAUSE);
80
81 alu_group_node *g = sh.create_alu_group();
82
83 alu_node *a = sh.create_alu();
84 a->bc.set_op(ALU_OP0_NOP);
85 a->bc.last = 1;
86
87 g->push_back(a);
88 c->push_back(g);
89
90 sh.root->push_back(c);
91
92 c = sh.create_cf(CF_OP_NOP);
93 sh.root->push_back(c);
94
95 last_cf = c;
96 }
97
98 if (!ctx.is_cayman() && last_cf->bc.op_ptr->flags & CF_ALU) {
99 last_cf = sh.create_cf(CF_OP_NOP);
100 sh.root->push_back(last_cf);
101 }
102
103 if (ctx.is_cayman()) {
104 if (!last_cf) {
105 cf_node *c = sh.create_cf(CF_OP_CF_END);
106 sh.root->push_back(c);
107 } else
108 last_cf->insert_after(sh.create_cf(CF_OP_CF_END));
109 } else
110 last_cf->bc.end_of_program = 1;
111
112 for (unsigned t = EXP_PIXEL; t < EXP_TYPE_COUNT; ++t) {
113 cf_node *le = last_export[t];
114 if (le)
115 le->bc.set_op(CF_OP_EXPORT_DONE);
116 }
117
118 sh.ngpr = ngpr;
119 sh.nstack = nstack;
120 return 0;
121 }
122
123 void bc_finalizer::finalize_loop(region_node* r) {
124
125 update_nstack(r);
126
127 cf_node *loop_start = sh.create_cf(CF_OP_LOOP_START_DX10);
128 cf_node *loop_end = sh.create_cf(CF_OP_LOOP_END);
129
130 // Update last_cf, but don't overwrite it if it's outside the current loop nest since
131 // it may point to a cf that is later in program order.
132 // The single parent level check is sufficient since finalize_loop() is processed in
133 // reverse order from innermost to outermost loop nest level.
134 if (!last_cf || last_cf->get_parent_region() == r) {
135 last_cf = loop_end;
136 }
137
138 loop_start->jump_after(loop_end);
139 loop_end->jump_after(loop_start);
140
141 for (depart_vec::iterator I = r->departs.begin(), E = r->departs.end();
142 I != E; ++I) {
143 depart_node *dep = *I;
144 cf_node *loop_break = sh.create_cf(CF_OP_LOOP_BREAK);
145 loop_break->jump(loop_end);
146 dep->push_back(loop_break);
147 dep->expand();
148 }
149
150 // FIXME produces unnecessary LOOP_CONTINUE
151 for (repeat_vec::iterator I = r->repeats.begin(), E = r->repeats.end();
152 I != E; ++I) {
153 repeat_node *rep = *I;
154 if (!(rep->parent == r && rep->prev == NULL)) {
155 cf_node *loop_cont = sh.create_cf(CF_OP_LOOP_CONTINUE);
156 loop_cont->jump(loop_end);
157 rep->push_back(loop_cont);
158 }
159 rep->expand();
160 }
161
162 r->push_front(loop_start);
163 r->push_back(loop_end);
164 }
165
166 void bc_finalizer::finalize_if(region_node* r) {
167
168 update_nstack(r);
169
170 // expecting the following control flow structure here:
171 // - region
172 // {
173 // - depart/repeat 1 (it may be depart/repeat for some outer region)
174 // {
175 // - if
176 // {
177 // - depart/repeat 2 (possibly for outer region)
178 // {
179 // - some optional code
180 // }
181 // }
182 // - optional <else> code> ...
183 // }
184 // }
185
186 container_node *repdep1 = static_cast<container_node*>(r->first);
187 assert(repdep1->is_depart() || repdep1->is_repeat());
188
189 if_node *n_if = static_cast<if_node*>(repdep1->first);
190
191 if (n_if) {
192
193
194 assert(n_if->is_if());
195
196 container_node *repdep2 = static_cast<container_node*>(n_if->first);
197 assert(repdep2->is_depart() || repdep2->is_repeat());
198
199 cf_node *if_jump = sh.create_cf(CF_OP_JUMP);
200 cf_node *if_pop = sh.create_cf(CF_OP_POP);
201
202 if (!last_cf || last_cf->get_parent_region() == r) {
203 last_cf = if_pop;
204 }
205 if_pop->bc.pop_count = 1;
206 if_pop->jump_after(if_pop);
207
208 r->push_front(if_jump);
209 r->push_back(if_pop);
210
211 bool has_else = n_if->next;
212
213 if (has_else) {
214 cf_node *nelse = sh.create_cf(CF_OP_ELSE);
215 n_if->insert_after(nelse);
216 if_jump->jump(nelse);
217 nelse->jump_after(if_pop);
218 nelse->bc.pop_count = 1;
219
220 } else {
221 if_jump->jump_after(if_pop);
222 if_jump->bc.pop_count = 1;
223 }
224
225 n_if->expand();
226 }
227
228 for (depart_vec::iterator I = r->departs.begin(), E = r->departs.end();
229 I != E; ++I) {
230 (*I)->expand();
231 }
232 r->departs.clear();
233 assert(r->repeats.empty());
234 }
235
236 void bc_finalizer::run_on(container_node* c) {
237 node *prev_node = NULL;
238 for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
239 node *n = *I;
240
241 if (n->is_alu_group()) {
242 finalize_alu_group(static_cast<alu_group_node*>(n), prev_node);
243 } else {
244 if (n->is_alu_clause()) {
245 cf_node *c = static_cast<cf_node*>(n);
246
247 if (c->bc.op == CF_OP_ALU_PUSH_BEFORE && ctx.is_egcm()) {
248 if (ctx.stack_workaround_8xx) {
249 region_node *r = c->get_parent_region();
250 if (r) {
251 unsigned ifs, loops;
252 unsigned elems = get_stack_depth(r, loops, ifs);
253 unsigned dmod1 = elems % ctx.stack_entry_size;
254 unsigned dmod2 = (elems + 1) % ctx.stack_entry_size;
255
256 if (elems && (!dmod1 || !dmod2))
257 c->flags |= NF_ALU_STACK_WORKAROUND;
258 }
259 } else if (ctx.stack_workaround_9xx) {
260 region_node *r = c->get_parent_region();
261 if (r) {
262 unsigned ifs, loops;
263 get_stack_depth(r, loops, ifs);
264 if (loops >= 2)
265 c->flags |= NF_ALU_STACK_WORKAROUND;
266 }
267 }
268 }
269 } else if (n->is_fetch_inst()) {
270 finalize_fetch(static_cast<fetch_node*>(n));
271 } else if (n->is_cf_inst()) {
272 finalize_cf(static_cast<cf_node*>(n));
273 }
274 if (n->is_container())
275 run_on(static_cast<container_node*>(n));
276 }
277 prev_node = n;
278 }
279 }
280
281 void bc_finalizer::finalize_alu_group(alu_group_node* g, node *prev_node) {
282
283 alu_node *last = NULL;
284 alu_group_node *prev_g = NULL;
285 bool add_nop = false;
286 if (prev_node && prev_node->is_alu_group()) {
287 prev_g = static_cast<alu_group_node*>(prev_node);
288 }
289
290 for (node_iterator I = g->begin(), E = g->end(); I != E; ++I) {
291 alu_node *n = static_cast<alu_node*>(*I);
292 unsigned slot = n->bc.slot;
293 value *d = n->dst.empty() ? NULL : n->dst[0];
294
295 if (d && d->is_special_reg()) {
296 assert((n->bc.op_ptr->flags & AF_MOVA) || d->is_geometry_emit());
297 d = NULL;
298 }
299
300 sel_chan fdst = d ? d->get_final_gpr() : sel_chan(0, 0);
301
302 if (d) {
303 assert(fdst.chan() == slot || slot == SLOT_TRANS);
304 }
305
306 if (!(n->bc.op_ptr->flags & AF_MOVA && ctx.is_cayman()))
307 n->bc.dst_gpr = fdst.sel();
308 n->bc.dst_chan = d ? fdst.chan() : slot < SLOT_TRANS ? slot : 0;
309
310
311 if (d && d->is_rel() && d->rel && !d->rel->is_const()) {
312 n->bc.dst_rel = 1;
313 update_ngpr(d->array->gpr.sel() + d->array->array_size -1);
314 } else {
315 n->bc.dst_rel = 0;
316 }
317
318 n->bc.write_mask = d != NULL;
319 n->bc.last = 0;
320
321 if (n->bc.op_ptr->flags & AF_PRED) {
322 n->bc.update_pred = (n->dst[1] != NULL);
323 n->bc.update_exec_mask = (n->dst[2] != NULL);
324 }
325
326 // FIXME handle predication here
327 n->bc.pred_sel = PRED_SEL_OFF;
328
329 update_ngpr(n->bc.dst_gpr);
330
331 add_nop |= finalize_alu_src(g, n, prev_g);
332
333 last = n;
334 }
335
336 if (add_nop) {
337 if (sh.get_ctx().r6xx_gpr_index_workaround) {
338 insert_rv6xx_load_ar_workaround(g);
339 }
340 }
341 last->bc.last = 1;
342 }
343
344 bool bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a, alu_group_node *prev) {
345 vvec &sv = a->src;
346 bool add_nop = false;
347 FBC_DUMP(
348 sblog << "finalize_alu_src: ";
349 dump::dump_op(a);
350 sblog << "\n";
351 );
352
353 unsigned si = 0;
354
355 for (vvec::iterator I = sv.begin(), E = sv.end(); I != E; ++I, ++si) {
356 value *v = *I;
357 assert(v);
358
359 bc_alu_src &src = a->bc.src[si];
360 sel_chan sc;
361 src.rel = 0;
362
363 sel_chan gpr;
364
365 switch (v->kind) {
366 case VLK_REL_REG:
367 sc = v->get_final_gpr();
368 src.sel = sc.sel();
369 src.chan = sc.chan();
370 if (!v->rel->is_const()) {
371 src.rel = 1;
372 update_ngpr(v->array->gpr.sel() + v->array->array_size -1);
373 if (prev && !add_nop) {
374 for (node_iterator pI = prev->begin(), pE = prev->end(); pI != pE; ++pI) {
375 alu_node *pn = static_cast<alu_node*>(*pI);
376 if (pn->bc.dst_gpr == src.sel) {
377 add_nop = true;
378 break;
379 }
380 }
381 }
382 } else
383 src.rel = 0;
384
385 break;
386 case VLK_REG:
387 gpr = v->get_final_gpr();
388 src.sel = gpr.sel();
389 src.chan = gpr.chan();
390 update_ngpr(src.sel);
391 break;
392 case VLK_TEMP:
393 src.sel = v->gpr.sel();
394 src.chan = v->gpr.chan();
395 update_ngpr(src.sel);
396 break;
397 case VLK_UNDEF:
398 case VLK_CONST: {
399 literal lv = v->literal_value;
400 src.chan = 0;
401
402 if (lv == literal(0))
403 src.sel = ALU_SRC_0;
404 else if (lv == literal(0.5f))
405 src.sel = ALU_SRC_0_5;
406 else if (lv == literal(1.0f))
407 src.sel = ALU_SRC_1;
408 else if (lv == literal(1))
409 src.sel = ALU_SRC_1_INT;
410 else if (lv == literal(-1))
411 src.sel = ALU_SRC_M_1_INT;
412 else {
413 src.sel = ALU_SRC_LITERAL;
414 src.chan = g->literal_chan(lv);
415 src.value = lv;
416 }
417 break;
418 }
419 case VLK_KCACHE: {
420 cf_node *clause = static_cast<cf_node*>(g->parent);
421 assert(clause->is_alu_clause());
422 sel_chan k = translate_kcache(clause, v);
423
424 assert(k && "kcache translation failed");
425
426 src.sel = k.sel();
427 src.chan = k.chan();
428 break;
429 }
430 case VLK_PARAM:
431 case VLK_SPECIAL_CONST:
432 src.sel = v->select.sel();
433 src.chan = v->select.chan();
434 break;
435 default:
436 assert(!"unknown value kind");
437 break;
438 }
439 if (prev && !add_nop) {
440 for (node_iterator pI = prev->begin(), pE = prev->end(); pI != pE; ++pI) {
441 alu_node *pn = static_cast<alu_node*>(*pI);
442 if (pn->bc.dst_rel) {
443 if (pn->bc.dst_gpr == src.sel) {
444 add_nop = true;
445 break;
446 }
447 }
448 }
449 }
450 }
451
452 while (si < 3) {
453 a->bc.src[si++].sel = 0;
454 }
455 return add_nop;
456 }
457
458 void bc_finalizer::copy_fetch_src(fetch_node &dst, fetch_node &src, unsigned arg_start)
459 {
460 int reg = -1;
461
462 for (unsigned chan = 0; chan < 4; ++chan) {
463
464 dst.bc.dst_sel[chan] = SEL_MASK;
465
466 unsigned sel = SEL_MASK;
467
468 value *v = src.src[arg_start + chan];
469
470 if (!v || v->is_undef()) {
471 sel = SEL_MASK;
472 } else if (v->is_const()) {
473 literal l = v->literal_value;
474 if (l == literal(0))
475 sel = SEL_0;
476 else if (l == literal(1.0f))
477 sel = SEL_1;
478 else {
479 sblog << "invalid fetch constant operand " << chan << " ";
480 dump::dump_op(&src);
481 sblog << "\n";
482 abort();
483 }
484
485 } else if (v->is_any_gpr()) {
486 unsigned vreg = v->gpr.sel();
487 unsigned vchan = v->gpr.chan();
488
489 if (reg == -1)
490 reg = vreg;
491 else if ((unsigned)reg != vreg) {
492 sblog << "invalid fetch source operand " << chan << " ";
493 dump::dump_op(&src);
494 sblog << "\n";
495 abort();
496 }
497
498 sel = vchan;
499
500 } else {
501 sblog << "invalid fetch source operand " << chan << " ";
502 dump::dump_op(&src);
503 sblog << "\n";
504 abort();
505 }
506
507 dst.bc.src_sel[chan] = sel;
508 }
509
510 if (reg >= 0)
511 update_ngpr(reg);
512
513 dst.bc.src_gpr = reg >= 0 ? reg : 0;
514 }
515
516 void bc_finalizer::emit_set_grad(fetch_node* f) {
517
518 assert(f->src.size() == 12);
519 unsigned ops[2] = { FETCH_OP_SET_GRADIENTS_V, FETCH_OP_SET_GRADIENTS_H };
520
521 unsigned arg_start = 0;
522
523 for (unsigned op = 0; op < 2; ++op) {
524 fetch_node *n = sh.create_fetch();
525 n->bc.set_op(ops[op]);
526
527 arg_start += 4;
528
529 copy_fetch_src(*n, *f, arg_start);
530
531 f->insert_before(n);
532 }
533
534 }
535
536 void bc_finalizer::emit_set_texture_offsets(fetch_node &f) {
537 assert(f.src.size() == 8);
538
539 fetch_node *n = sh.create_fetch();
540
541 n->bc.set_op(FETCH_OP_SET_TEXTURE_OFFSETS);
542
543 copy_fetch_src(*n, f, 4);
544
545 f.insert_before(n);
546 }
547
548 void bc_finalizer::finalize_fetch(fetch_node* f) {
549
550 int reg = -1;
551
552 // src
553
554 unsigned src_count = 4;
555
556 unsigned flags = f->bc.op_ptr->flags;
557
558 if (flags & FF_VTX) {
559 src_count = 1;
560 } else if (flags & FF_USEGRAD) {
561 emit_set_grad(f);
562 } else if (flags & FF_USE_TEXTURE_OFFSETS) {
563 emit_set_texture_offsets(*f);
564 }
565
566 for (unsigned chan = 0; chan < src_count; ++chan) {
567
568 unsigned sel = f->bc.src_sel[chan];
569
570 if (sel > SEL_W)
571 continue;
572
573 value *v = f->src[chan];
574
575 if (v->is_undef()) {
576 sel = SEL_MASK;
577 } else if (v->is_const()) {
578 literal l = v->literal_value;
579 if (l == literal(0))
580 sel = SEL_0;
581 else if (l == literal(1.0f))
582 sel = SEL_1;
583 else {
584 sblog << "invalid fetch constant operand " << chan << " ";
585 dump::dump_op(f);
586 sblog << "\n";
587 abort();
588 }
589
590 } else if (v->is_any_gpr()) {
591 unsigned vreg = v->gpr.sel();
592 unsigned vchan = v->gpr.chan();
593
594 if (reg == -1)
595 reg = vreg;
596 else if ((unsigned)reg != vreg) {
597 sblog << "invalid fetch source operand " << chan << " ";
598 dump::dump_op(f);
599 sblog << "\n";
600 abort();
601 }
602
603 sel = vchan;
604
605 } else {
606 sblog << "invalid fetch source operand " << chan << " ";
607 dump::dump_op(f);
608 sblog << "\n";
609 abort();
610 }
611
612 f->bc.src_sel[chan] = sel;
613 }
614
615 if (reg >= 0)
616 update_ngpr(reg);
617
618 f->bc.src_gpr = reg >= 0 ? reg : 0;
619
620 // dst
621
622 reg = -1;
623
624 unsigned dst_swz[4] = {SEL_MASK, SEL_MASK, SEL_MASK, SEL_MASK};
625
626 for (unsigned chan = 0; chan < 4; ++chan) {
627
628 unsigned sel = f->bc.dst_sel[chan];
629
630 if (sel == SEL_MASK)
631 continue;
632
633 value *v = f->dst[chan];
634 if (!v)
635 continue;
636
637 if (v->is_any_gpr()) {
638 unsigned vreg = v->gpr.sel();
639 unsigned vchan = v->gpr.chan();
640
641 if (reg == -1)
642 reg = vreg;
643 else if ((unsigned)reg != vreg) {
644 sblog << "invalid fetch dst operand " << chan << " ";
645 dump::dump_op(f);
646 sblog << "\n";
647 abort();
648 }
649
650 dst_swz[vchan] = sel;
651
652 } else {
653 sblog << "invalid fetch dst operand " << chan << " ";
654 dump::dump_op(f);
655 sblog << "\n";
656 abort();
657 }
658
659 }
660
661 for (unsigned i = 0; i < 4; ++i)
662 f->bc.dst_sel[i] = dst_swz[i];
663
664 assert(reg >= 0);
665
666 if (reg >= 0)
667 update_ngpr(reg);
668
669 f->bc.dst_gpr = reg >= 0 ? reg : 0;
670 }
671
672 void bc_finalizer::finalize_cf(cf_node* c) {
673
674 unsigned flags = c->bc.op_ptr->flags;
675
676 c->bc.end_of_program = 0;
677 last_cf = c;
678
679 if (flags & CF_EXP) {
680 c->bc.set_op(CF_OP_EXPORT);
681 last_export[c->bc.type] = c;
682
683 int reg = -1;
684
685 for (unsigned chan = 0; chan < 4; ++chan) {
686
687 unsigned sel = c->bc.sel[chan];
688
689 if (sel > SEL_W)
690 continue;
691
692 value *v = c->src[chan];
693
694 if (v->is_undef()) {
695 sel = SEL_MASK;
696 } else if (v->is_const()) {
697 literal l = v->literal_value;
698 if (l == literal(0))
699 sel = SEL_0;
700 else if (l == literal(1.0f))
701 sel = SEL_1;
702 else {
703 sblog << "invalid export constant operand " << chan << " ";
704 dump::dump_op(c);
705 sblog << "\n";
706 abort();
707 }
708
709 } else if (v->is_any_gpr()) {
710 unsigned vreg = v->gpr.sel();
711 unsigned vchan = v->gpr.chan();
712
713 if (reg == -1)
714 reg = vreg;
715 else if ((unsigned)reg != vreg) {
716 sblog << "invalid export source operand " << chan << " ";
717 dump::dump_op(c);
718 sblog << "\n";
719 abort();
720 }
721
722 sel = vchan;
723
724 } else {
725 sblog << "invalid export source operand " << chan << " ";
726 dump::dump_op(c);
727 sblog << "\n";
728 abort();
729 }
730
731 c->bc.sel[chan] = sel;
732 }
733
734 if (reg >= 0)
735 update_ngpr(reg);
736
737 c->bc.rw_gpr = reg >= 0 ? reg : 0;
738
739 } else if (flags & CF_MEM) {
740
741 int reg = -1;
742 unsigned mask = 0;
743
744 for (unsigned chan = 0; chan < 4; ++chan) {
745 value *v = c->src[chan];
746 if (!v || v->is_undef())
747 continue;
748
749 if (!v->is_any_gpr() || v->gpr.chan() != chan) {
750 sblog << "invalid source operand " << chan << " ";
751 dump::dump_op(c);
752 sblog << "\n";
753 abort();
754 }
755 unsigned vreg = v->gpr.sel();
756 if (reg == -1)
757 reg = vreg;
758 else if ((unsigned)reg != vreg) {
759 sblog << "invalid source operand " << chan << " ";
760 dump::dump_op(c);
761 sblog << "\n";
762 abort();
763 }
764
765 mask |= (1 << chan);
766 }
767
768 if (reg >= 0)
769 update_ngpr(reg);
770
771 c->bc.rw_gpr = reg >= 0 ? reg : 0;
772 c->bc.comp_mask = mask;
773
774 if (((flags & CF_RAT) || (!(flags & CF_STRM))) && (c->bc.type & 1)) {
775
776 reg = -1;
777
778 for (unsigned chan = 0; chan < 4; ++chan) {
779 value *v = c->src[4 + chan];
780 if (!v || v->is_undef())
781 continue;
782
783 if (!v->is_any_gpr() || v->gpr.chan() != chan) {
784 sblog << "invalid source operand " << chan << " ";
785 dump::dump_op(c);
786 sblog << "\n";
787 abort();
788 }
789 unsigned vreg = v->gpr.sel();
790 if (reg == -1)
791 reg = vreg;
792 else if ((unsigned)reg != vreg) {
793 sblog << "invalid source operand " << chan << " ";
794 dump::dump_op(c);
795 sblog << "\n";
796 abort();
797 }
798 }
799
800 assert(reg >= 0);
801
802 if (reg >= 0)
803 update_ngpr(reg);
804
805 c->bc.index_gpr = reg >= 0 ? reg : 0;
806 }
807 } else if (flags & CF_CALL) {
808 update_nstack(c->get_parent_region(), ctx.wavefront_size == 16 ? 2 : 1);
809 }
810 }
811
812 sel_chan bc_finalizer::translate_kcache(cf_node* alu, value* v) {
813 unsigned sel = v->select.sel();
814 unsigned bank = sel >> 12;
815 unsigned chan = v->select.chan();
816 static const unsigned kc_base[] = {128, 160, 256, 288};
817
818 sel &= 4095;
819
820 unsigned line = sel >> 4;
821
822 for (unsigned k = 0; k < 4; ++k) {
823 bc_kcache &kc = alu->bc.kc[k];
824
825 if (kc.mode == KC_LOCK_NONE)
826 break;
827
828 if (kc.bank == bank && (kc.addr == line ||
829 (kc.mode == KC_LOCK_2 && kc.addr + 1 == line))) {
830
831 sel = kc_base[k] + (sel - (kc.addr << 4));
832
833 return sel_chan(sel, chan);
834 }
835 }
836
837 assert(!"kcache translation error");
838 return 0;
839 }
840
841 void bc_finalizer::update_ngpr(unsigned gpr) {
842 if (gpr < MAX_GPR - ctx.alu_temp_gprs && gpr >= ngpr)
843 ngpr = gpr + 1;
844 }
845
846 unsigned bc_finalizer::get_stack_depth(node *n, unsigned &loops,
847 unsigned &ifs, unsigned add) {
848 unsigned stack_elements = add;
849 bool has_non_wqm_push = (add != 0);
850 region_node *r = n->is_region() ?
851 static_cast<region_node*>(n) : n->get_parent_region();
852
853 loops = 0;
854 ifs = 0;
855
856 while (r) {
857 if (r->is_loop()) {
858 ++loops;
859 } else {
860 ++ifs;
861 has_non_wqm_push = true;
862 }
863 r = r->get_parent_region();
864 }
865 stack_elements += (loops * ctx.stack_entry_size) + ifs;
866
867 // reserve additional elements in some cases
868 switch (ctx.hw_class) {
869 case HW_CLASS_R600:
870 case HW_CLASS_R700:
871 // If any non-WQM push is invoked, 2 elements should be reserved.
872 if (has_non_wqm_push)
873 stack_elements += 2;
874 break;
875 case HW_CLASS_CAYMAN:
876 // If any stack operation is invoked, 2 elements should be reserved
877 if (stack_elements)
878 stack_elements += 2;
879 break;
880 case HW_CLASS_EVERGREEN:
881 // According to the docs we need to reserve 1 element for each of the
882 // following cases:
883 // 1) non-WQM push is used with WQM/LOOP frames on stack
884 // 2) ALU_ELSE_AFTER is used at the point of max stack usage
885 // NOTE:
886 // It was found that the conditions above are not sufficient, there are
887 // other cases where we also need to reserve stack space, that's why
888 // we always reserve 1 stack element if we have non-WQM push on stack.
889 // Condition 2 is ignored for now because we don't use this instruction.
890 if (has_non_wqm_push)
891 ++stack_elements;
892 break;
893 case HW_CLASS_UNKNOWN:
894 assert(0);
895 }
896 return stack_elements;
897 }
898
899 void bc_finalizer::update_nstack(region_node* r, unsigned add) {
900 unsigned loops = 0;
901 unsigned ifs = 0;
902 unsigned elems = r ? get_stack_depth(r, loops, ifs, add) : add;
903
904 // XXX all chips expect this value to be computed using 4 as entry size,
905 // not the real entry size
906 unsigned stack_entries = (elems + 3) >> 2;
907
908 if (nstack < stack_entries)
909 nstack = stack_entries;
910 }
911
912 void bc_finalizer::cf_peephole() {
913 if (ctx.stack_workaround_8xx || ctx.stack_workaround_9xx) {
914 for (node_iterator N, I = sh.root->begin(), E = sh.root->end(); I != E;
915 I = N) {
916 N = I; ++N;
917 cf_node *c = static_cast<cf_node*>(*I);
918
919 if (c->bc.op == CF_OP_ALU_PUSH_BEFORE &&
920 (c->flags & NF_ALU_STACK_WORKAROUND)) {
921 cf_node *push = sh.create_cf(CF_OP_PUSH);
922 c->insert_before(push);
923 push->jump(c);
924 c->bc.set_op(CF_OP_ALU);
925 }
926 }
927 }
928
929 for (node_iterator N, I = sh.root->begin(), E = sh.root->end(); I != E;
930 I = N) {
931 N = I; ++N;
932
933 cf_node *c = static_cast<cf_node*>(*I);
934
935 if (c->jump_after_target) {
936 c->jump_target = static_cast<cf_node*>(c->jump_target->next);
937 c->jump_after_target = false;
938 }
939
940 if (c->is_cf_op(CF_OP_POP)) {
941 node *p = c->prev;
942 if (p->is_alu_clause()) {
943 cf_node *a = static_cast<cf_node*>(p);
944
945 if (a->bc.op == CF_OP_ALU) {
946 a->bc.set_op(CF_OP_ALU_POP_AFTER);
947 c->remove();
948 }
949 }
950 } else if (c->is_cf_op(CF_OP_JUMP) && c->jump_target == c->next) {
951 // if JUMP is immediately followed by its jump target,
952 // then JUMP is useless and we can eliminate it
953 c->remove();
954 }
955 }
956 }
957
958 } // namespace r600_sb