2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
28 #include "sb_shader.h"
33 shader::shader(sb_context
&sctx
, shader_target t
, unsigned id
)
34 : ctx(sctx
), next_temp_value_index(temp_regid_offset
),
35 prep_regs_count(), pred_sels(),
36 regions(), inputs(), undef(), val_pool(sizeof(value
)),
37 pool(), all_nodes(), src_stats(), opt_stats(), errors(),
40 target(t
), vt(ex
), ex(*this), root(),
41 compute_interferences(),
42 has_alu_predication(), uses_gradients(), safe_math(), ngpr(), nstack() {}
44 bool shader::assign_slot(alu_node
* n
, alu_node
*slots
[5]) {
46 unsigned slot_flags
= ctx
.alu_slots(n
->bc
.op
);
47 unsigned slot
= n
->bc
.dst_chan
;
49 if (!ctx
.is_cayman() && (!(slot_flags
& AF_V
) || slots
[slot
]) &&
61 void shader::add_pinned_gpr_values(vvec
& vec
, unsigned gpr
, unsigned comp_mask
,
66 value
*v
= get_gpr_value(src
, gpr
, chan
, false);
67 v
->flags
|= (VLF_PIN_REG
| VLF_PIN_CHAN
);
69 v
->gpr
= v
->pin_gpr
= v
->select
;
72 if (v
->array
&& !v
->array
->gpr
) {
73 // if pinned value can be accessed with indirect addressing
74 // pin the entire array to its original location
75 v
->array
->gpr
= v
->array
->base_gpr
;
84 cf_node
* shader::create_clause(node_subtype nst
) {
85 cf_node
*n
= create_cf();
90 case NST_ALU_CLAUSE
: n
->bc
.set_op(CF_OP_ALU
); break;
91 case NST_TEX_CLAUSE
: n
->bc
.set_op(CF_OP_TEX
); break;
92 case NST_VTX_CLAUSE
: n
->bc
.set_op(CF_OP_VTX
); break;
93 default: assert(!"invalid clause type"); break;
100 void shader::create_bbs() {
101 create_bbs(root
, bbs
);
104 void shader::expand_bbs() {
108 alu_node
* shader::create_mov(value
* dst
, value
* src
) {
109 alu_node
*n
= create_alu();
110 n
->bc
.set_op(ALU_OP1_MOV
);
111 n
->dst
.push_back(dst
);
112 n
->src
.push_back(src
);
118 alu_node
* shader::create_copy_mov(value
* dst
, value
* src
, unsigned affcost
) {
119 alu_node
*n
= create_mov(dst
, src
);
121 dst
->assign_source(src
);
122 n
->flags
|= NF_COPY_MOV
| NF_DONT_HOIST
;
124 if (affcost
&& dst
->is_sgpr() && src
->is_sgpr())
125 coal
.add_edge(src
, dst
, affcost
);
130 value
* shader::get_value(value_kind kind
, sel_chan id
,
132 if (version
== 0 && kind
== VLK_REG
&& id
.sel() < prep_regs_count
)
133 return val_pool
[id
- 1];
136 unsigned key
= (kind
<< 28) | (version
<< 16) | id
;
137 value_map::iterator i
= reg_values
.find(key
);
138 if (i
!= reg_values
.end()) {
141 value
*v
= create_value(kind
, id
, version
);
142 reg_values
.insert(std::make_pair(key
, v
));
146 value
* shader::get_special_value(unsigned sv_id
, unsigned version
) {
147 sel_chan
id(sv_id
, 0);
148 return get_value(VLK_SPECIAL_REG
, id
, version
);
151 void shader::fill_array_values(gpr_array
*a
, vvec
&vv
) {
152 unsigned sz
= a
->array_size
;
154 for (unsigned i
= 0; i
< a
->array_size
; ++i
) {
155 vv
[i
] = get_gpr_value(true, a
->base_gpr
.sel() + i
, a
->base_gpr
.chan(),
160 value
* shader::get_gpr_value(bool src
, unsigned reg
, unsigned chan
, bool rel
,
162 sel_chan
id(reg
, chan
);
164 gpr_array
*a
= get_gpr_array(reg
, chan
);
167 v
= create_value(VLK_REL_REG
, id
, 0);
168 v
->rel
= get_special_value(SV_AR_INDEX
);
169 fill_array_values(a
, v
->muse
);
171 fill_array_values(a
, v
->mdef
);
173 if (version
== 0 && reg
< prep_regs_count
)
174 return (val_pool
[id
- 1]);
176 v
= get_value(VLK_REG
, id
, version
);
180 v
->pin_gpr
= v
->select
;
185 value
* shader::create_temp_value() {
186 sel_chan
id(++next_temp_value_index
, 0);
187 return get_value(VLK_TEMP
, id
, 0);
190 value
* shader::get_kcache_value(unsigned bank
, unsigned index
, unsigned chan
) {
191 return get_ro_value(kcache_values
, VLK_KCACHE
,
192 sel_chan((bank
<< 12) | index
, chan
));
195 void shader::add_input(unsigned gpr
, bool preloaded
, unsigned comp_mask
) {
196 if (inputs
.size() <= gpr
)
197 inputs
.resize(gpr
+1);
199 shader_input
&i
= inputs
[gpr
];
200 i
.preloaded
= preloaded
;
201 i
.comp_mask
= comp_mask
;
204 add_pinned_gpr_values(root
->dst
, gpr
, comp_mask
, true);
209 void shader::init() {
211 root
= create_container();
214 void shader::init_call_fs(cf_node
* cf
) {
217 assert(target
== TARGET_VS
);
219 for(inputs_vec::const_iterator I
= inputs
.begin(),
220 E
= inputs
.end(); I
!= E
; ++I
, ++gpr
) {
222 add_pinned_gpr_values(cf
->dst
, gpr
, I
->comp_mask
, false);
224 add_pinned_gpr_values(cf
->src
, gpr
, I
->comp_mask
, true);
228 void shader::set_undef(val_set
& s
) {
229 value
*undefined
= get_undef_value();
230 if (!undefined
->gvn_source
)
231 vt
.add_value(undefined
);
235 for (val_set::iterator I
= vs
.begin(*this), E
= vs
.end(*this); I
!= E
; ++I
) {
238 assert(!v
->is_readonly() && !v
->is_rel());
240 v
->gvn_source
= undefined
->gvn_source
;
244 value
* shader::create_value(value_kind k
, sel_chan regid
, unsigned ver
) {
245 value
*v
= val_pool
.create(k
, regid
, ver
);
249 value
* shader::get_undef_value() {
251 undef
= create_value(VLK_UNDEF
, 0, 0);
255 node
* shader::create_node(node_type nt
, node_subtype nst
, node_flags flags
) {
256 node
*n
= new (pool
.allocate(sizeof(node
))) node(nt
, nst
, flags
);
257 all_nodes
.push_back(n
);
261 alu_node
* shader::create_alu() {
262 alu_node
* n
= new (pool
.allocate(sizeof(alu_node
))) alu_node();
263 all_nodes
.push_back(n
);
267 alu_group_node
* shader::create_alu_group() {
269 new (pool
.allocate(sizeof(alu_group_node
))) alu_group_node();
270 all_nodes
.push_back(n
);
274 alu_packed_node
* shader::create_alu_packed() {
276 new (pool
.allocate(sizeof(alu_packed_node
))) alu_packed_node();
277 all_nodes
.push_back(n
);
281 cf_node
* shader::create_cf() {
282 cf_node
* n
= new (pool
.allocate(sizeof(cf_node
))) cf_node();
284 all_nodes
.push_back(n
);
288 fetch_node
* shader::create_fetch() {
289 fetch_node
* n
= new (pool
.allocate(sizeof(fetch_node
))) fetch_node();
290 all_nodes
.push_back(n
);
294 region_node
* shader::create_region() {
295 region_node
*n
= new (pool
.allocate(sizeof(region_node
)))
296 region_node(regions
.size());
297 regions
.push_back(n
);
298 all_nodes
.push_back(n
);
302 depart_node
* shader::create_depart(region_node
* target
) {
303 depart_node
* n
= new (pool
.allocate(sizeof(depart_node
)))
304 depart_node(target
, target
->departs
.size());
305 target
->departs
.push_back(n
);
306 all_nodes
.push_back(n
);
310 repeat_node
* shader::create_repeat(region_node
* target
) {
311 repeat_node
* n
= new (pool
.allocate(sizeof(repeat_node
)))
312 repeat_node(target
, target
->repeats
.size() + 1);
313 target
->repeats
.push_back(n
);
314 all_nodes
.push_back(n
);
318 container_node
* shader::create_container(node_type nt
, node_subtype nst
,
320 container_node
*n
= new (pool
.allocate(sizeof(container_node
)))
321 container_node(nt
, nst
, flags
);
322 all_nodes
.push_back(n
);
326 if_node
* shader::create_if() {
327 if_node
* n
= new (pool
.allocate(sizeof(if_node
))) if_node();
328 all_nodes
.push_back(n
);
332 bb_node
* shader::create_bb(unsigned id
, unsigned loop_level
) {
333 bb_node
* n
= new (pool
.allocate(sizeof(bb_node
))) bb_node(id
, loop_level
);
334 all_nodes
.push_back(n
);
338 value
* shader::get_special_ro_value(unsigned sel
) {
339 return get_ro_value(special_ro_values
, VLK_PARAM
, sel
);
342 value
* shader::get_const_value(const literal
&v
) {
343 value
*val
= get_ro_value(const_values
, VLK_CONST
, v
);
344 val
->literal_value
= v
;
349 for (node_vec::iterator I
= all_nodes
.begin(), E
= all_nodes
.end();
353 for (gpr_array_vec::iterator I
= gpr_arrays
.begin(), E
= gpr_arrays
.end();
359 void shader::dump_ir() {
364 value
* shader::get_value_version(value
* v
, unsigned ver
) {
365 assert(!v
->is_readonly() && !v
->is_rel());
366 value
*vv
= get_value(v
->kind
, v
->select
, ver
);
370 vv
->array
= v
->array
;
376 gpr_array
* shader::get_gpr_array(unsigned reg
, unsigned chan
) {
378 for (regarray_vec::iterator I
= gpr_arrays
.begin(),
379 E
= gpr_arrays
.end(); I
!= E
; ++I
) {
381 unsigned achan
= a
->base_gpr
.chan();
382 unsigned areg
= a
->base_gpr
.sel();
383 if (achan
== chan
&& (reg
>= areg
&& reg
< areg
+a
->array_size
))
389 void shader::add_gpr_array(unsigned gpr_start
, unsigned gpr_count
,
390 unsigned comp_mask
) {
394 gpr_array
*a
= new gpr_array(
395 sel_chan(gpr_start
, chan
), gpr_count
);
397 SB_DUMP_PASS( sblog
<< "add_gpr_array: @" << a
->base_gpr
398 << " [" << a
->array_size
<< "]\n";
401 gpr_arrays
.push_back(a
);
408 value
* shader::get_pred_sel(int sel
) {
409 assert(sel
== 0 || sel
== 1);
411 pred_sels
[sel
] = get_const_value(sel
);
413 return pred_sels
[sel
];
416 cf_node
* shader::create_cf(unsigned op
) {
417 cf_node
*c
= create_cf();
423 std::string
shader::get_full_target_name() {
424 std::string s
= get_shader_target_name();
426 s
+= ctx
.get_hw_chip_name();
428 s
+= ctx
.get_hw_class_name();
432 const char* shader::get_shader_target_name() {
434 case TARGET_VS
: return "VS";
435 case TARGET_PS
: return "PS";
436 case TARGET_GS
: return "GS";
437 case TARGET_COMPUTE
: return "COMPUTE";
438 case TARGET_FETCH
: return "FETCH";
440 return "INVALID_TARGET";
444 void shader::simplify_dep_rep(node
* dr
) {
445 container_node
*p
= dr
->parent
;
446 if (p
->is_repeat()) {
447 repeat_node
*r
= static_cast<repeat_node
*>(p
);
448 r
->target
->expand_repeat(r
);
449 } else if (p
->is_depart()) {
450 depart_node
*d
= static_cast<depart_node
*>(p
);
451 d
->target
->expand_depart(d
);
454 dr
->parent
->cut(dr
->next
, NULL
);
458 // FIXME this is used in some places as the max non-temp gpr,
459 // (MAX_GPR - 2 * ctx.alu_temp_gprs) should be used for that instead.
460 unsigned shader::first_temp_gpr() {
461 return MAX_GPR
- ctx
.alu_temp_gprs
;
464 unsigned shader::num_nontemp_gpr() {
465 return MAX_GPR
- 2 * ctx
.alu_temp_gprs
;
468 void shader::set_uses_kill() {
469 if (root
->src
.empty())
473 root
->src
[0] = get_special_value(SV_VALID_MASK
);
476 alu_node
* shader::clone(alu_node
* n
) {
477 alu_node
*c
= create_alu();
479 // FIXME: this may be wrong with indirect operands
489 void shader::collect_stats(bool opt
) {
490 if (!sb_context::dump_stat
)
493 shader_stats
&s
= opt
? opt_stats
: src_stats
;
501 ctx
.opt_stats
.accumulate(s
);
503 ctx
.src_stats
.accumulate(s
);
506 value
* shader::get_ro_value(value_map
& vm
, value_kind vk
, unsigned key
) {
507 value_map::iterator I
= vm
.find(key
);
510 value
*v
= create_value(vk
, key
, 0);
511 v
->flags
= VLF_READONLY
;
512 vm
.insert(std::make_pair(key
, v
));
516 void shader::create_bbs(container_node
* n
, bbs_vec
&bbs
, int loop_level
) {
518 bool inside_bb
= false;
519 bool last_inside_bb
= true;
520 node_iterator
bb_start(n
->begin()), I(bb_start
), E(n
->end());
522 for (; I
!= E
; ++I
) {
524 inside_bb
= k
->type
== NT_OP
;
526 if (inside_bb
&& !last_inside_bb
)
528 else if (!inside_bb
) {
530 && I
->type
!= NT_REPEAT
531 && I
->type
!= NT_DEPART
532 && I
->type
!= NT_IF
) {
533 bb_node
*bb
= create_bb(bbs
.size(), loop_level
);
535 n
->insert_node_before(*bb_start
, bb
);
537 bb
->move(bb_start
, I
);
540 if (k
->is_container()) {
543 if (k
->type
== NT_REGION
) {
544 loop
= static_cast<region_node
*>(k
)->is_loop();
547 create_bbs(static_cast<container_node
*>(k
), bbs
,
552 if (k
->type
== NT_DEPART
)
555 last_inside_bb
= inside_bb
;
558 if (last_inside_bb
) {
559 bb_node
*bb
= create_bb(bbs
.size(), loop_level
);
564 n
->insert_node_before(*bb_start
, bb
);
565 if (bb_start
!= n
->end())
566 bb
->move(bb_start
, n
->end());
569 if (n
->last
&& n
->last
->type
== NT_IF
) {
570 bb_node
*bb
= create_bb(bbs
.size(), loop_level
);
577 void shader::expand_bbs(bbs_vec
&bbs
) {
579 for (bbs_vec::iterator I
= bbs
.begin(), E
= bbs
.end(); I
!= E
; ++I
) {
585 sched_queue_id
shader::get_queue_id(node
* n
) {
586 switch (n
->subtype
) {
588 case NST_ALU_PACKED_INST
:
592 case NST_FETCH_INST
: {
593 fetch_node
*f
= static_cast<fetch_node
*>(n
);
594 if (ctx
.is_r600() && (f
->bc
.op_ptr
->flags
& FF_VTX
))
606 void shader_stats::collect(node
*n
) {
607 if (n
->is_alu_inst())
609 else if (n
->is_fetch_inst())
611 else if (n
->is_container()) {
612 container_node
*c
= static_cast<container_node
*>(n
);
614 if (n
->is_alu_group())
616 else if (n
->is_alu_clause())
618 else if (n
->is_fetch_clause())
620 else if (n
->is_cf_inst())
624 for (node_iterator I
= c
->begin(), E
= c
->end(); I
!= E
; ++I
) {
631 void shader_stats::accumulate(shader_stats
& s
) {
638 alu_groups
+= s
.alu_groups
;
639 alu_clauses
+= s
.alu_clauses
;
641 fetch_clauses
+= s
.fetch_clauses
;
645 void shader_stats::dump() {
646 sblog
<< "dw:" << ndw
<< ", gpr:" << ngpr
<< ", stk:" << nstack
647 << ", alu groups:" << alu_groups
<< ", alu clauses: " << alu_clauses
648 << ", alu:" << alu
<< ", fetch:" << fetch
649 << ", fetch clauses:" << fetch_clauses
653 sblog
<< ", shaders:" << shaders
;
658 static void print_diff(unsigned d1
, unsigned d2
) {
660 sblog
<< ((int)d2
- (int)d1
) * 100 / (int)d1
<< "%";
667 void shader_stats::dump_diff(shader_stats
& s
) {
668 sblog
<< "dw:"; print_diff(ndw
, s
.ndw
);
669 sblog
<< ", gpr:" ; print_diff(ngpr
, s
.ngpr
);
670 sblog
<< ", stk:" ; print_diff(nstack
, s
.nstack
);
671 sblog
<< ", alu groups:" ; print_diff(alu_groups
, s
.alu_groups
);
672 sblog
<< ", alu clauses: " ; print_diff(alu_clauses
, s
.alu_clauses
);
673 sblog
<< ", alu:" ; print_diff(alu
, s
.alu
);
674 sblog
<< ", fetch:" ; print_diff(fetch
, s
.fetch
);
675 sblog
<< ", fetch clauses:" ; print_diff(fetch_clauses
, s
.fetch_clauses
);
676 sblog
<< ", cf:" ; print_diff(cf
, s
.cf
);
680 } // namespace r600_sb