2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
30 #define FBC_DUMP(q) do { q } while (0)
36 #include "sb_shader.h"
41 int bc_finalizer::run() {
43 regions_vec
&rv
= sh
.get_regions();
45 for (regions_vec::reverse_iterator I
= rv
.rbegin(), E
= rv
.rend(); I
!= E
;
51 bool loop
= r
->is_loop();
65 // workaround for some problems on r6xx/7xx
66 // add ALU NOP to each vertex shader
67 if (!ctx
.is_egcm() && sh
.target
== TARGET_VS
) {
68 cf_node
*c
= sh
.create_clause(NST_ALU_CLAUSE
);
70 alu_group_node
*g
= sh
.create_alu_group();
72 alu_node
*a
= sh
.create_alu();
73 a
->bc
.set_op(ALU_OP0_NOP
);
79 sh
.root
->push_back(c
);
81 c
= sh
.create_cf(CF_OP_NOP
);
82 sh
.root
->push_back(c
);
87 if (last_cf
->bc
.op_ptr
->flags
& CF_ALU
) {
88 last_cf
= sh
.create_cf(CF_OP_NOP
);
89 sh
.root
->push_back(last_cf
);
93 last_cf
->insert_after(sh
.create_cf(CF_OP_CF_END
));
95 last_cf
->bc
.end_of_program
= 1;
97 for (unsigned t
= EXP_PIXEL
; t
< EXP_TYPE_COUNT
; ++t
) {
98 cf_node
*le
= last_export
[t
];
100 le
->bc
.set_op(CF_OP_EXPORT_DONE
);
108 void bc_finalizer::finalize_loop(region_node
* r
) {
110 cf_node
*loop_start
= sh
.create_cf(CF_OP_LOOP_START_DX10
);
111 cf_node
*loop_end
= sh
.create_cf(CF_OP_LOOP_END
);
113 loop_start
->jump_after(loop_end
);
114 loop_end
->jump_after(loop_start
);
116 for (depart_vec::iterator I
= r
->departs
.begin(), E
= r
->departs
.end();
118 depart_node
*dep
= *I
;
119 cf_node
*loop_break
= sh
.create_cf(CF_OP_LOOP_BREAK
);
120 loop_break
->jump(loop_end
);
121 dep
->push_back(loop_break
);
125 // FIXME produces unnecessary LOOP_CONTINUE
126 for (repeat_vec::iterator I
= r
->repeats
.begin(), E
= r
->repeats
.end();
128 repeat_node
*rep
= *I
;
129 if (!(rep
->parent
== r
&& rep
->prev
== NULL
)) {
130 cf_node
*loop_cont
= sh
.create_cf(CF_OP_LOOP_CONTINUE
);
131 loop_cont
->jump(loop_end
);
132 rep
->push_back(loop_cont
);
137 r
->push_front(loop_start
);
138 r
->push_back(loop_end
);
141 void bc_finalizer::finalize_if(region_node
* r
) {
145 // expecting the following control flow structure here:
148 // - depart/repeat 1 (it may be depart/repeat for some outer region)
152 // - depart/repeat 2 (possibly for outer region)
154 // - some optional code
157 // - optional <else> code> ...
161 container_node
*repdep1
= static_cast<container_node
*>(r
->first
);
162 assert(repdep1
->is_depart() || repdep1
->is_repeat());
164 if_node
*n_if
= static_cast<if_node
*>(repdep1
->first
);
169 assert(n_if
->is_if());
171 container_node
*repdep2
= static_cast<container_node
*>(n_if
->first
);
172 assert(repdep2
->is_depart() || repdep2
->is_repeat());
174 cf_node
*if_jump
= sh
.create_cf(CF_OP_JUMP
);
175 cf_node
*if_pop
= sh
.create_cf(CF_OP_POP
);
177 if_pop
->bc
.pop_count
= 1;
178 if_pop
->jump_after(if_pop
);
180 r
->push_front(if_jump
);
181 r
->push_back(if_pop
);
183 bool has_else
= n_if
->next
;
186 cf_node
*nelse
= sh
.create_cf(CF_OP_ELSE
);
187 n_if
->insert_after(nelse
);
188 if_jump
->jump(nelse
);
189 nelse
->jump_after(if_pop
);
190 nelse
->bc
.pop_count
= 1;
193 if_jump
->jump_after(if_pop
);
194 if_jump
->bc
.pop_count
= 1;
200 for (depart_vec::iterator I
= r
->departs
.begin(), E
= r
->departs
.end();
205 assert(r
->repeats
.empty());
208 void bc_finalizer::run_on(container_node
* c
) {
210 for (node_iterator I
= c
->begin(), E
= c
->end(); I
!= E
; ++I
) {
213 if (n
->is_alu_group()) {
214 finalize_alu_group(static_cast<alu_group_node
*>(n
));
216 if (n
->is_fetch_inst()) {
217 finalize_fetch(static_cast<fetch_node
*>(n
));
218 } else if (n
->is_cf_inst()) {
219 finalize_cf(static_cast<cf_node
*>(n
));
220 } else if (n
->is_alu_clause()) {
222 } else if (n
->is_fetch_clause()) {
225 assert(!"unexpected node");
228 if (n
->is_container())
229 run_on(static_cast<container_node
*>(n
));
234 void bc_finalizer::finalize_alu_group(alu_group_node
* g
) {
236 alu_node
*last
= NULL
;
238 for (node_iterator I
= g
->begin(), E
= g
->end(); I
!= E
; ++I
) {
239 alu_node
*n
= static_cast<alu_node
*>(*I
);
240 unsigned slot
= n
->bc
.slot
;
242 value
*d
= n
->dst
.empty() ? NULL
: n
->dst
[0];
244 if (d
&& d
->is_special_reg()) {
245 assert(n
->bc
.op_ptr
->flags
& AF_MOVA
);
249 sel_chan fdst
= d
? d
->get_final_gpr() : sel_chan(0, 0);
252 assert(fdst
.chan() == slot
|| slot
== SLOT_TRANS
);
255 n
->bc
.dst_gpr
= fdst
.sel();
256 n
->bc
.dst_chan
= d
? fdst
.chan() : slot
< SLOT_TRANS
? slot
: 0;
259 if (d
&& d
->is_rel() && d
->rel
&& !d
->rel
->is_const()) {
261 update_ngpr(d
->array
->gpr
.sel() + d
->array
->array_size
-1);
266 n
->bc
.write_mask
= d
!= NULL
;
269 if (n
->bc
.op_ptr
->flags
& AF_PRED
) {
270 n
->bc
.update_pred
= (n
->dst
[1] != NULL
);
271 n
->bc
.update_exec_mask
= (n
->dst
[2] != NULL
);
274 // FIXME handle predication here
275 n
->bc
.pred_sel
= PRED_SEL_OFF
;
277 update_ngpr(n
->bc
.dst_gpr
);
279 finalize_alu_src(g
, n
);
287 void bc_finalizer::finalize_alu_src(alu_group_node
* g
, alu_node
* a
) {
291 sblog
<< "finalize_alu_src: ";
298 for (vvec::iterator I
= sv
.begin(), E
= sv
.end(); I
!= E
; ++I
, ++si
) {
302 bc_alu_src
&src
= a
->bc
.src
[si
];
310 sc
= v
->get_final_gpr();
312 src
.chan
= sc
.chan();
313 if (!v
->rel
->is_const()) {
315 update_ngpr(v
->array
->gpr
.sel() + v
->array
->array_size
-1);
321 gpr
= v
->get_final_gpr();
323 src
.chan
= gpr
.chan();
324 update_ngpr(src
.sel
);
327 src
.sel
= v
->gpr
.sel();
328 src
.chan
= v
->gpr
.chan();
329 update_ngpr(src
.sel
);
333 literal lv
= v
->literal_value
;
336 if (lv
== literal(0))
338 else if (lv
== literal(0.5f
))
339 src
.sel
= ALU_SRC_0_5
;
340 else if (lv
== literal(1.0f
))
342 else if (lv
== literal(1))
343 src
.sel
= ALU_SRC_1_INT
;
344 else if (lv
== literal(-1))
345 src
.sel
= ALU_SRC_M_1_INT
;
347 src
.sel
= ALU_SRC_LITERAL
;
348 src
.chan
= g
->literal_chan(lv
);
354 cf_node
*clause
= static_cast<cf_node
*>(g
->parent
);
355 assert(clause
->is_alu_clause());
356 sel_chan k
= translate_kcache(clause
, v
);
358 assert(k
&& "kcache translation failed");
365 case VLK_SPECIAL_CONST
:
366 src
.sel
= v
->select
.sel();
367 src
.chan
= v
->select
.chan();
370 assert(!"unknown value kind");
376 a
->bc
.src
[si
++].sel
= 0;
380 void bc_finalizer::emit_set_grad(fetch_node
* f
) {
382 assert(f
->src
.size() == 12);
383 unsigned ops
[2] = { FETCH_OP_SET_GRADIENTS_V
, FETCH_OP_SET_GRADIENTS_H
};
385 unsigned arg_start
= 0;
387 for (unsigned op
= 0; op
< 2; ++op
) {
388 fetch_node
*n
= sh
.create_fetch();
389 n
->bc
.set_op(ops
[op
]);
391 // FIXME extract this loop into a separate method and reuse it
397 for (unsigned chan
= 0; chan
< 4; ++chan
) {
399 n
->bc
.dst_sel
[chan
] = SEL_MASK
;
401 unsigned sel
= SEL_MASK
;
403 value
*v
= f
->src
[arg_start
+ chan
];
405 if (!v
|| v
->is_undef()) {
407 } else if (v
->is_const()) {
408 literal l
= v
->literal_value
;
411 else if (l
== literal(1.0f
))
414 sblog
<< "invalid fetch constant operand " << chan
<< " ";
420 } else if (v
->is_any_gpr()) {
421 unsigned vreg
= v
->gpr
.sel();
422 unsigned vchan
= v
->gpr
.chan();
426 else if ((unsigned)reg
!= vreg
) {
427 sblog
<< "invalid fetch source operand " << chan
<< " ";
436 sblog
<< "invalid fetch source operand " << chan
<< " ";
442 n
->bc
.src_sel
[chan
] = sel
;
448 n
->bc
.src_gpr
= reg
>= 0 ? reg
: 0;
455 void bc_finalizer::finalize_fetch(fetch_node
* f
) {
461 unsigned src_count
= 4;
463 unsigned flags
= f
->bc
.op_ptr
->flags
;
465 if (flags
& FF_VTX
) {
467 } else if (flags
& FF_USEGRAD
) {
471 for (unsigned chan
= 0; chan
< src_count
; ++chan
) {
473 unsigned sel
= f
->bc
.src_sel
[chan
];
478 value
*v
= f
->src
[chan
];
482 } else if (v
->is_const()) {
483 literal l
= v
->literal_value
;
486 else if (l
== literal(1.0f
))
489 sblog
<< "invalid fetch constant operand " << chan
<< " ";
495 } else if (v
->is_any_gpr()) {
496 unsigned vreg
= v
->gpr
.sel();
497 unsigned vchan
= v
->gpr
.chan();
501 else if ((unsigned)reg
!= vreg
) {
502 sblog
<< "invalid fetch source operand " << chan
<< " ";
511 sblog
<< "invalid fetch source operand " << chan
<< " ";
517 f
->bc
.src_sel
[chan
] = sel
;
523 f
->bc
.src_gpr
= reg
>= 0 ? reg
: 0;
529 unsigned dst_swz
[4] = {SEL_MASK
, SEL_MASK
, SEL_MASK
, SEL_MASK
};
531 for (unsigned chan
= 0; chan
< 4; ++chan
) {
533 unsigned sel
= f
->bc
.dst_sel
[chan
];
538 value
*v
= f
->dst
[chan
];
542 if (v
->is_any_gpr()) {
543 unsigned vreg
= v
->gpr
.sel();
544 unsigned vchan
= v
->gpr
.chan();
548 else if ((unsigned)reg
!= vreg
) {
549 sblog
<< "invalid fetch dst operand " << chan
<< " ";
555 dst_swz
[vchan
] = sel
;
558 sblog
<< "invalid fetch dst operand " << chan
<< " ";
566 for (unsigned i
= 0; i
< 4; ++i
)
567 f
->bc
.dst_sel
[i
] = dst_swz
[i
];
574 f
->bc
.dst_gpr
= reg
>= 0 ? reg
: 0;
577 void bc_finalizer::finalize_cf(cf_node
* c
) {
579 unsigned flags
= c
->bc
.op_ptr
->flags
;
581 if (flags
& CF_CALL
) {
582 update_nstack(c
->get_parent_region(), ctx
.is_cayman() ? 1 : 2);
585 c
->bc
.end_of_program
= 0;
588 if (flags
& CF_EXP
) {
589 c
->bc
.set_op(CF_OP_EXPORT
);
590 last_export
[c
->bc
.type
] = c
;
594 for (unsigned chan
= 0; chan
< 4; ++chan
) {
596 unsigned sel
= c
->bc
.sel
[chan
];
601 value
*v
= c
->src
[chan
];
605 } else if (v
->is_const()) {
606 literal l
= v
->literal_value
;
609 else if (l
== literal(1.0f
))
612 sblog
<< "invalid export constant operand " << chan
<< " ";
618 } else if (v
->is_any_gpr()) {
619 unsigned vreg
= v
->gpr
.sel();
620 unsigned vchan
= v
->gpr
.chan();
624 else if ((unsigned)reg
!= vreg
) {
625 sblog
<< "invalid export source operand " << chan
<< " ";
634 sblog
<< "invalid export source operand " << chan
<< " ";
640 c
->bc
.sel
[chan
] = sel
;
646 c
->bc
.rw_gpr
= reg
>= 0 ? reg
: 0;
648 } else if (flags
& CF_MEM
) {
653 for (unsigned chan
= 0; chan
< 4; ++chan
) {
654 value
*v
= c
->src
[chan
];
655 if (!v
|| v
->is_undef())
658 if (!v
->is_any_gpr() || v
->gpr
.chan() != chan
) {
659 sblog
<< "invalid source operand " << chan
<< " ";
664 unsigned vreg
= v
->gpr
.sel();
667 else if ((unsigned)reg
!= vreg
) {
668 sblog
<< "invalid source operand " << chan
<< " ";
677 assert(reg
>= 0 && mask
);
682 c
->bc
.rw_gpr
= reg
>= 0 ? reg
: 0;
683 c
->bc
.comp_mask
= mask
;
685 if ((flags
& CF_RAT
) && (c
->bc
.type
& 1)) {
689 for (unsigned chan
= 0; chan
< 4; ++chan
) {
690 value
*v
= c
->src
[4 + chan
];
691 if (!v
|| v
->is_undef())
694 if (!v
->is_any_gpr() || v
->gpr
.chan() != chan
) {
695 sblog
<< "invalid source operand " << chan
<< " ";
700 unsigned vreg
= v
->gpr
.sel();
703 else if ((unsigned)reg
!= vreg
) {
704 sblog
<< "invalid source operand " << chan
<< " ";
716 c
->bc
.index_gpr
= reg
>= 0 ? reg
: 0;
724 if ((flags
& (CF_BRANCH
| CF_LOOP
)) && !sh
.uses_gradients
) {
725 c
->bc
.valid_pixel_mode
= 1;
732 sel_chan
bc_finalizer::translate_kcache(cf_node
* alu
, value
* v
) {
733 unsigned sel
= v
->select
.sel();
734 unsigned bank
= sel
>> 12;
735 unsigned chan
= v
->select
.chan();
736 static const unsigned kc_base
[] = {128, 160, 256, 288};
740 unsigned line
= sel
>> 4;
742 for (unsigned k
= 0; k
< 4; ++k
) {
743 bc_kcache
&kc
= alu
->bc
.kc
[k
];
745 if (kc
.mode
== KC_LOCK_NONE
)
748 if (kc
.bank
== bank
&& (kc
.addr
== line
||
749 (kc
.mode
== KC_LOCK_2
&& kc
.addr
+ 1 == line
))) {
751 sel
= kc_base
[k
] + (sel
- (kc
.addr
<< 4));
753 return sel_chan(sel
, chan
);
757 assert(!"kcache translation error");
761 void bc_finalizer::update_ngpr(unsigned gpr
) {
762 if (gpr
< MAX_GPR
- ctx
.alu_temp_gprs
&& gpr
>= ngpr
)
766 void bc_finalizer::update_nstack(region_node
* r
, unsigned add
) {
776 r
= r
->get_parent_region();
779 unsigned stack_elements
= (loops
* ctx
.stack_entry_size
) + ifs
+ add
;
781 // FIXME calculate more precisely
782 if (ctx
.is_evergreen()) {
790 unsigned stack_entries
= (stack_elements
+ 3) >> 2;
792 if (nstack
< stack_entries
)
793 nstack
= stack_entries
;
796 void bc_finalizer::cf_peephole() {
798 for (node_iterator N
, I
= sh
.root
->begin(), E
= sh
.root
->end(); I
!= E
;
802 cf_node
*c
= static_cast<cf_node
*>(*I
);
804 if (c
->jump_after_target
) {
805 c
->jump_target
= static_cast<cf_node
*>(c
->jump_target
->next
);
806 c
->jump_after_target
= false;
809 if (c
->is_cf_op(CF_OP_POP
)) {
811 if (p
->is_alu_clause()) {
812 cf_node
*a
= static_cast<cf_node
*>(p
);
814 if (a
->bc
.op
== CF_OP_ALU
) {
815 a
->bc
.set_op(CF_OP_ALU_POP_AFTER
);
819 } else if (c
->is_cf_op(CF_OP_JUMP
) && c
->jump_target
== c
->next
) {
820 // if JUMP is immediately followed by its jump target,
821 // then JUMP is useless and we can eliminate it
827 } // namespace r600_sb