r600g/sb: get rid of standard c++ streams
[mesa.git] / src / gallium / drivers / r600 / sb / sb_bc_finalize.cpp
1 /*
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Vadim Girlin
25 */
26
27 #define FBC_DEBUG 0
28
29 #if FBC_DEBUG
30 #define FBC_DUMP(q) do { q } while (0)
31 #else
32 #define FBC_DUMP(q)
33 #endif
34
35 #include "sb_bc.h"
36 #include "sb_shader.h"
37 #include "sb_pass.h"
38
39 namespace r600_sb {
40
41 int bc_finalizer::run() {
42
43 regions_vec &rv = sh.get_regions();
44
45 for (regions_vec::reverse_iterator I = rv.rbegin(), E = rv.rend(); I != E;
46 ++I) {
47 region_node *r = *I;
48
49 assert(r);
50
51 bool loop = r->is_loop();
52
53 if (loop)
54 finalize_loop(r);
55 else
56 finalize_if(r);
57
58 r->expand();
59 }
60
61 run_on(sh.root);
62
63 cf_peephole();
64
65 // workaround for some problems on r6xx/7xx
66 // add ALU NOP to each vertex shader
67 if (!ctx.is_egcm() && sh.target == TARGET_VS) {
68 cf_node *c = sh.create_clause(NST_ALU_CLAUSE);
69
70 alu_group_node *g = sh.create_alu_group();
71
72 alu_node *a = sh.create_alu();
73 a->bc.set_op(ALU_OP0_NOP);
74 a->bc.last = 1;
75
76 g->push_back(a);
77 c->push_back(g);
78
79 sh.root->push_back(c);
80
81 c = sh.create_cf(CF_OP_NOP);
82 sh.root->push_back(c);
83
84 last_cf = c;
85 }
86
87 if (last_cf->bc.op_ptr->flags & CF_ALU) {
88 last_cf = sh.create_cf(CF_OP_NOP);
89 sh.root->push_back(last_cf);
90 }
91
92 if (ctx.is_cayman())
93 last_cf->insert_after(sh.create_cf(CF_OP_CF_END));
94 else
95 last_cf->bc.end_of_program = 1;
96
97 for (unsigned t = EXP_PIXEL; t < EXP_TYPE_COUNT; ++t) {
98 cf_node *le = last_export[t];
99 if (le)
100 le->bc.set_op(CF_OP_EXPORT_DONE);
101 }
102
103 sh.ngpr = ngpr;
104 sh.nstack = nstack;
105 return 0;
106 }
107
108 void bc_finalizer::finalize_loop(region_node* r) {
109
110 cf_node *loop_start = sh.create_cf(CF_OP_LOOP_START_DX10);
111 cf_node *loop_end = sh.create_cf(CF_OP_LOOP_END);
112
113 loop_start->jump_after(loop_end);
114 loop_end->jump_after(loop_start);
115
116 for (depart_vec::iterator I = r->departs.begin(), E = r->departs.end();
117 I != E; ++I) {
118 depart_node *dep = *I;
119 cf_node *loop_break = sh.create_cf(CF_OP_LOOP_BREAK);
120 loop_break->jump(loop_end);
121 dep->push_back(loop_break);
122 dep->expand();
123 }
124
125 // FIXME produces unnecessary LOOP_CONTINUE
126 for (repeat_vec::iterator I = r->repeats.begin(), E = r->repeats.end();
127 I != E; ++I) {
128 repeat_node *rep = *I;
129 if (!(rep->parent == r && rep->prev == NULL)) {
130 cf_node *loop_cont = sh.create_cf(CF_OP_LOOP_CONTINUE);
131 loop_cont->jump(loop_end);
132 rep->push_back(loop_cont);
133 }
134 rep->expand();
135 }
136
137 r->push_front(loop_start);
138 r->push_back(loop_end);
139 }
140
141 void bc_finalizer::finalize_if(region_node* r) {
142
143 update_nstack(r);
144
145 // expecting the following control flow structure here:
146 // - region
147 // {
148 // - depart/repeat 1 (it may be depart/repeat for some outer region)
149 // {
150 // - if
151 // {
152 // - depart/repeat 2 (possibly for outer region)
153 // {
154 // - some optional code
155 // }
156 // }
157 // - optional <else> code> ...
158 // }
159 // }
160
161 container_node *repdep1 = static_cast<container_node*>(r->first);
162 assert(repdep1->is_depart() || repdep1->is_repeat());
163
164 if_node *n_if = static_cast<if_node*>(repdep1->first);
165
166 if (n_if) {
167
168
169 assert(n_if->is_if());
170
171 container_node *repdep2 = static_cast<container_node*>(n_if->first);
172 assert(repdep2->is_depart() || repdep2->is_repeat());
173
174 cf_node *if_jump = sh.create_cf(CF_OP_JUMP);
175 cf_node *if_pop = sh.create_cf(CF_OP_POP);
176
177 if_pop->bc.pop_count = 1;
178 if_pop->jump_after(if_pop);
179
180 r->push_front(if_jump);
181 r->push_back(if_pop);
182
183 bool has_else = n_if->next;
184
185 if (has_else) {
186 cf_node *nelse = sh.create_cf(CF_OP_ELSE);
187 n_if->insert_after(nelse);
188 if_jump->jump(nelse);
189 nelse->jump_after(if_pop);
190 nelse->bc.pop_count = 1;
191
192 } else {
193 if_jump->jump_after(if_pop);
194 if_jump->bc.pop_count = 1;
195 }
196
197 n_if->expand();
198 }
199
200 for (depart_vec::iterator I = r->departs.begin(), E = r->departs.end();
201 I != E; ++I) {
202 (*I)->expand();
203 }
204 r->departs.clear();
205 assert(r->repeats.empty());
206 }
207
208 void bc_finalizer::run_on(container_node* c) {
209
210 for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
211 node *n = *I;
212
213 if (n->is_alu_group()) {
214 finalize_alu_group(static_cast<alu_group_node*>(n));
215 } else {
216 if (n->is_fetch_inst()) {
217 finalize_fetch(static_cast<fetch_node*>(n));
218 } else if (n->is_cf_inst()) {
219 finalize_cf(static_cast<cf_node*>(n));
220 } else if (n->is_alu_clause()) {
221
222 } else if (n->is_fetch_clause()) {
223
224 } else {
225 assert(!"unexpected node");
226 }
227
228 if (n->is_container())
229 run_on(static_cast<container_node*>(n));
230 }
231 }
232 }
233
234 void bc_finalizer::finalize_alu_group(alu_group_node* g) {
235
236 alu_node *last = NULL;
237
238 for (node_iterator I = g->begin(), E = g->end(); I != E; ++I) {
239 alu_node *n = static_cast<alu_node*>(*I);
240 unsigned slot = n->bc.slot;
241
242 value *d = n->dst.empty() ? NULL : n->dst[0];
243
244 if (d && d->is_special_reg()) {
245 assert(n->bc.op_ptr->flags & AF_MOVA);
246 d = NULL;
247 }
248
249 sel_chan fdst = d ? d->get_final_gpr() : sel_chan(0, 0);
250
251 if (d) {
252 assert(fdst.chan() == slot || slot == SLOT_TRANS);
253 }
254
255 n->bc.dst_gpr = fdst.sel();
256 n->bc.dst_chan = d ? fdst.chan() : slot < SLOT_TRANS ? slot : 0;
257
258
259 if (d && d->is_rel() && d->rel && !d->rel->is_const()) {
260 n->bc.dst_rel = 1;
261 update_ngpr(d->array->gpr.sel() + d->array->array_size -1);
262 } else {
263 n->bc.dst_rel = 0;
264 }
265
266 n->bc.write_mask = d != NULL;
267 n->bc.last = 0;
268
269 if (n->bc.op_ptr->flags & AF_PRED) {
270 n->bc.update_pred = (n->dst[1] != NULL);
271 n->bc.update_exec_mask = (n->dst[2] != NULL);
272 }
273
274 // FIXME handle predication here
275 n->bc.pred_sel = PRED_SEL_OFF;
276
277 update_ngpr(n->bc.dst_gpr);
278
279 finalize_alu_src(g, n);
280
281 last = n;
282 }
283
284 last->bc.last = 1;
285 }
286
287 void bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a) {
288 vvec &sv = a->src;
289
290 FBC_DUMP(
291 sblog << "finalize_alu_src: ";
292 dump::dump_op(a);
293 sblog << "\n";
294 );
295
296 unsigned si = 0;
297
298 for (vvec::iterator I = sv.begin(), E = sv.end(); I != E; ++I, ++si) {
299 value *v = *I;
300 assert(v);
301
302 bc_alu_src &src = a->bc.src[si];
303 sel_chan sc;
304 src.rel = 0;
305
306 sel_chan gpr;
307
308 switch (v->kind) {
309 case VLK_REL_REG:
310 sc = v->get_final_gpr();
311 src.sel = sc.sel();
312 src.chan = sc.chan();
313 if (!v->rel->is_const()) {
314 src.rel = 1;
315 update_ngpr(v->array->gpr.sel() + v->array->array_size -1);
316 } else
317 src.rel = 0;
318
319 break;
320 case VLK_REG:
321 gpr = v->get_final_gpr();
322 src.sel = gpr.sel();
323 src.chan = gpr.chan();
324 update_ngpr(src.sel);
325 break;
326 case VLK_TEMP:
327 src.sel = v->gpr.sel();
328 src.chan = v->gpr.chan();
329 update_ngpr(src.sel);
330 break;
331 case VLK_UNDEF:
332 case VLK_CONST: {
333 literal lv = v->literal_value;
334 src.chan = 0;
335
336 if (lv == literal(0))
337 src.sel = ALU_SRC_0;
338 else if (lv == literal(0.5f))
339 src.sel = ALU_SRC_0_5;
340 else if (lv == literal(1.0f))
341 src.sel = ALU_SRC_1;
342 else if (lv == literal(1))
343 src.sel = ALU_SRC_1_INT;
344 else if (lv == literal(-1))
345 src.sel = ALU_SRC_M_1_INT;
346 else {
347 src.sel = ALU_SRC_LITERAL;
348 src.chan = g->literal_chan(lv);
349 src.value = lv;
350 }
351 break;
352 }
353 case VLK_KCACHE: {
354 cf_node *clause = static_cast<cf_node*>(g->parent);
355 assert(clause->is_alu_clause());
356 sel_chan k = translate_kcache(clause, v);
357
358 assert(k && "kcache translation failed");
359
360 src.sel = k.sel();
361 src.chan = k.chan();
362 break;
363 }
364 case VLK_PARAM:
365 case VLK_SPECIAL_CONST:
366 src.sel = v->select.sel();
367 src.chan = v->select.chan();
368 break;
369 default:
370 assert(!"unknown value kind");
371 break;
372 }
373 }
374
375 while (si < 3) {
376 a->bc.src[si++].sel = 0;
377 }
378 }
379
380 void bc_finalizer::emit_set_grad(fetch_node* f) {
381
382 assert(f->src.size() == 12);
383 unsigned ops[2] = { FETCH_OP_SET_GRADIENTS_V, FETCH_OP_SET_GRADIENTS_H };
384
385 unsigned arg_start = 0;
386
387 for (unsigned op = 0; op < 2; ++op) {
388 fetch_node *n = sh.create_fetch();
389 n->bc.set_op(ops[op]);
390
391 // FIXME extract this loop into a separate method and reuse it
392
393 int reg = -1;
394
395 arg_start += 4;
396
397 for (unsigned chan = 0; chan < 4; ++chan) {
398
399 n->bc.dst_sel[chan] = SEL_MASK;
400
401 unsigned sel = SEL_MASK;
402
403 value *v = f->src[arg_start + chan];
404
405 if (!v || v->is_undef()) {
406 sel = SEL_MASK;
407 } else if (v->is_const()) {
408 literal l = v->literal_value;
409 if (l == literal(0))
410 sel = SEL_0;
411 else if (l == literal(1.0f))
412 sel = SEL_1;
413 else {
414 sblog << "invalid fetch constant operand " << chan << " ";
415 dump::dump_op(f);
416 sblog << "\n";
417 abort();
418 }
419
420 } else if (v->is_any_gpr()) {
421 unsigned vreg = v->gpr.sel();
422 unsigned vchan = v->gpr.chan();
423
424 if (reg == -1)
425 reg = vreg;
426 else if ((unsigned)reg != vreg) {
427 sblog << "invalid fetch source operand " << chan << " ";
428 dump::dump_op(f);
429 sblog << "\n";
430 abort();
431 }
432
433 sel = vchan;
434
435 } else {
436 sblog << "invalid fetch source operand " << chan << " ";
437 dump::dump_op(f);
438 sblog << "\n";
439 abort();
440 }
441
442 n->bc.src_sel[chan] = sel;
443 }
444
445 if (reg >= 0)
446 update_ngpr(reg);
447
448 n->bc.src_gpr = reg >= 0 ? reg : 0;
449
450 f->insert_before(n);
451 }
452
453 }
454
455 void bc_finalizer::finalize_fetch(fetch_node* f) {
456
457 int reg = -1;
458
459 // src
460
461 unsigned src_count = 4;
462
463 unsigned flags = f->bc.op_ptr->flags;
464
465 if (flags & FF_VTX) {
466 src_count = 1;
467 } else if (flags & FF_USEGRAD) {
468 emit_set_grad(f);
469 }
470
471 for (unsigned chan = 0; chan < src_count; ++chan) {
472
473 unsigned sel = f->bc.src_sel[chan];
474
475 if (sel > SEL_W)
476 continue;
477
478 value *v = f->src[chan];
479
480 if (v->is_undef()) {
481 sel = SEL_MASK;
482 } else if (v->is_const()) {
483 literal l = v->literal_value;
484 if (l == literal(0))
485 sel = SEL_0;
486 else if (l == literal(1.0f))
487 sel = SEL_1;
488 else {
489 sblog << "invalid fetch constant operand " << chan << " ";
490 dump::dump_op(f);
491 sblog << "\n";
492 abort();
493 }
494
495 } else if (v->is_any_gpr()) {
496 unsigned vreg = v->gpr.sel();
497 unsigned vchan = v->gpr.chan();
498
499 if (reg == -1)
500 reg = vreg;
501 else if ((unsigned)reg != vreg) {
502 sblog << "invalid fetch source operand " << chan << " ";
503 dump::dump_op(f);
504 sblog << "\n";
505 abort();
506 }
507
508 sel = vchan;
509
510 } else {
511 sblog << "invalid fetch source operand " << chan << " ";
512 dump::dump_op(f);
513 sblog << "\n";
514 abort();
515 }
516
517 f->bc.src_sel[chan] = sel;
518 }
519
520 if (reg >= 0)
521 update_ngpr(reg);
522
523 f->bc.src_gpr = reg >= 0 ? reg : 0;
524
525 // dst
526
527 reg = -1;
528
529 unsigned dst_swz[4] = {SEL_MASK, SEL_MASK, SEL_MASK, SEL_MASK};
530
531 for (unsigned chan = 0; chan < 4; ++chan) {
532
533 unsigned sel = f->bc.dst_sel[chan];
534
535 if (sel == SEL_MASK)
536 continue;
537
538 value *v = f->dst[chan];
539 if (!v)
540 continue;
541
542 if (v->is_any_gpr()) {
543 unsigned vreg = v->gpr.sel();
544 unsigned vchan = v->gpr.chan();
545
546 if (reg == -1)
547 reg = vreg;
548 else if ((unsigned)reg != vreg) {
549 sblog << "invalid fetch dst operand " << chan << " ";
550 dump::dump_op(f);
551 sblog << "\n";
552 abort();
553 }
554
555 dst_swz[vchan] = sel;
556
557 } else {
558 sblog << "invalid fetch dst operand " << chan << " ";
559 dump::dump_op(f);
560 sblog << "\n";
561 abort();
562 }
563
564 }
565
566 for (unsigned i = 0; i < 4; ++i)
567 f->bc.dst_sel[i] = dst_swz[i];
568
569 assert(reg >= 0);
570
571 if (reg >= 0)
572 update_ngpr(reg);
573
574 f->bc.dst_gpr = reg >= 0 ? reg : 0;
575 }
576
577 void bc_finalizer::finalize_cf(cf_node* c) {
578
579 unsigned flags = c->bc.op_ptr->flags;
580
581 if (flags & CF_CALL) {
582 update_nstack(c->get_parent_region(), ctx.is_cayman() ? 1 : 2);
583 }
584
585 c->bc.end_of_program = 0;
586 last_cf = c;
587
588 if (flags & CF_EXP) {
589 c->bc.set_op(CF_OP_EXPORT);
590 last_export[c->bc.type] = c;
591
592 int reg = -1;
593
594 for (unsigned chan = 0; chan < 4; ++chan) {
595
596 unsigned sel = c->bc.sel[chan];
597
598 if (sel > SEL_W)
599 continue;
600
601 value *v = c->src[chan];
602
603 if (v->is_undef()) {
604 sel = SEL_MASK;
605 } else if (v->is_const()) {
606 literal l = v->literal_value;
607 if (l == literal(0))
608 sel = SEL_0;
609 else if (l == literal(1.0f))
610 sel = SEL_1;
611 else {
612 sblog << "invalid export constant operand " << chan << " ";
613 dump::dump_op(c);
614 sblog << "\n";
615 abort();
616 }
617
618 } else if (v->is_any_gpr()) {
619 unsigned vreg = v->gpr.sel();
620 unsigned vchan = v->gpr.chan();
621
622 if (reg == -1)
623 reg = vreg;
624 else if ((unsigned)reg != vreg) {
625 sblog << "invalid export source operand " << chan << " ";
626 dump::dump_op(c);
627 sblog << "\n";
628 abort();
629 }
630
631 sel = vchan;
632
633 } else {
634 sblog << "invalid export source operand " << chan << " ";
635 dump::dump_op(c);
636 sblog << "\n";
637 abort();
638 }
639
640 c->bc.sel[chan] = sel;
641 }
642
643 if (reg >= 0)
644 update_ngpr(reg);
645
646 c->bc.rw_gpr = reg >= 0 ? reg : 0;
647
648 } else if (flags & CF_MEM) {
649
650 int reg = -1;
651 unsigned mask = 0;
652
653 for (unsigned chan = 0; chan < 4; ++chan) {
654 value *v = c->src[chan];
655 if (!v || v->is_undef())
656 continue;
657
658 if (!v->is_any_gpr() || v->gpr.chan() != chan) {
659 sblog << "invalid source operand " << chan << " ";
660 dump::dump_op(c);
661 sblog << "\n";
662 abort();
663 }
664 unsigned vreg = v->gpr.sel();
665 if (reg == -1)
666 reg = vreg;
667 else if ((unsigned)reg != vreg) {
668 sblog << "invalid source operand " << chan << " ";
669 dump::dump_op(c);
670 sblog << "\n";
671 abort();
672 }
673
674 mask |= (1 << chan);
675 }
676
677 assert(reg >= 0 && mask);
678
679 if (reg >= 0)
680 update_ngpr(reg);
681
682 c->bc.rw_gpr = reg >= 0 ? reg : 0;
683 c->bc.comp_mask = mask;
684
685 if ((flags & CF_RAT) && (c->bc.type & 1)) {
686
687 reg = -1;
688
689 for (unsigned chan = 0; chan < 4; ++chan) {
690 value *v = c->src[4 + chan];
691 if (!v || v->is_undef())
692 continue;
693
694 if (!v->is_any_gpr() || v->gpr.chan() != chan) {
695 sblog << "invalid source operand " << chan << " ";
696 dump::dump_op(c);
697 sblog << "\n";
698 abort();
699 }
700 unsigned vreg = v->gpr.sel();
701 if (reg == -1)
702 reg = vreg;
703 else if ((unsigned)reg != vreg) {
704 sblog << "invalid source operand " << chan << " ";
705 dump::dump_op(c);
706 sblog << "\n";
707 abort();
708 }
709 }
710
711 assert(reg >= 0);
712
713 if (reg >= 0)
714 update_ngpr(reg);
715
716 c->bc.index_gpr = reg >= 0 ? reg : 0;
717 }
718
719
720
721 } else {
722
723 #if 0
724 if ((flags & (CF_BRANCH | CF_LOOP)) && !sh.uses_gradients) {
725 c->bc.valid_pixel_mode = 1;
726 }
727 #endif
728
729 }
730 }
731
732 sel_chan bc_finalizer::translate_kcache(cf_node* alu, value* v) {
733 unsigned sel = v->select.sel();
734 unsigned bank = sel >> 12;
735 unsigned chan = v->select.chan();
736 static const unsigned kc_base[] = {128, 160, 256, 288};
737
738 sel &= 4095;
739
740 unsigned line = sel >> 4;
741
742 for (unsigned k = 0; k < 4; ++k) {
743 bc_kcache &kc = alu->bc.kc[k];
744
745 if (kc.mode == KC_LOCK_NONE)
746 break;
747
748 if (kc.bank == bank && (kc.addr == line ||
749 (kc.mode == KC_LOCK_2 && kc.addr + 1 == line))) {
750
751 sel = kc_base[k] + (sel - (kc.addr << 4));
752
753 return sel_chan(sel, chan);
754 }
755 }
756
757 assert(!"kcache translation error");
758 return 0;
759 }
760
761 void bc_finalizer::update_ngpr(unsigned gpr) {
762 if (gpr < MAX_GPR - ctx.alu_temp_gprs && gpr >= ngpr)
763 ngpr = gpr + 1;
764 }
765
766 void bc_finalizer::update_nstack(region_node* r, unsigned add) {
767 unsigned loops = 0;
768 unsigned ifs = 0;
769
770 while (r) {
771 if (r->is_loop())
772 ++loops;
773 else
774 ++ifs;
775
776 r = r->get_parent_region();
777 }
778
779 unsigned stack_elements = (loops * ctx.stack_entry_size) + ifs + add;
780
781 // FIXME calculate more precisely
782 if (ctx.is_evergreen()) {
783 ++stack_elements;
784 } else {
785 stack_elements += 2;
786 if (ctx.is_cayman())
787 ++stack_elements;
788 }
789
790 unsigned stack_entries = (stack_elements + 3) >> 2;
791
792 if (nstack < stack_entries)
793 nstack = stack_entries;
794 }
795
796 void bc_finalizer::cf_peephole() {
797
798 for (node_iterator N, I = sh.root->begin(), E = sh.root->end(); I != E;
799 I = N) {
800 N = I; ++N;
801
802 cf_node *c = static_cast<cf_node*>(*I);
803
804 if (c->jump_after_target) {
805 c->jump_target = static_cast<cf_node*>(c->jump_target->next);
806 c->jump_after_target = false;
807 }
808
809 if (c->is_cf_op(CF_OP_POP)) {
810 node *p = c->prev;
811 if (p->is_alu_clause()) {
812 cf_node *a = static_cast<cf_node*>(p);
813
814 if (a->bc.op == CF_OP_ALU) {
815 a->bc.set_op(CF_OP_ALU_POP_AFTER);
816 c->remove();
817 }
818 }
819 } else if (c->is_cf_op(CF_OP_JUMP) && c->jump_target == c->next) {
820 // if JUMP is immediately followed by its jump target,
821 // then JUMP is useless and we can eliminate it
822 c->remove();
823 }
824 }
825 }
826
827 } // namespace r600_sb