2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
28 #include "sb_shader.h"
33 shader::shader(sb_context
&sctx
, shader_target t
, unsigned id
)
34 : ctx(sctx
), next_temp_value_index(temp_regid_offset
),
35 prep_regs_count(), pred_sels(),
36 regions(), inputs(), undef(), val_pool(sizeof(value
)),
37 pool(), all_nodes(), src_stats(), opt_stats(), errors(),
40 target(t
), vt(ex
), ex(*this), root(),
41 compute_interferences(),
42 has_alu_predication(),
43 uses_gradients(), safe_math(), ngpr(), nstack(), dce_flags() {}
45 bool shader::assign_slot(alu_node
* n
, alu_node
*slots
[5]) {
47 unsigned slot_flags
= ctx
.alu_slots(n
->bc
.op
);
48 unsigned slot
= n
->bc
.dst_chan
;
50 if (!ctx
.is_cayman() && (!(slot_flags
& AF_V
) || slots
[slot
]) &&
62 void shader::add_pinned_gpr_values(vvec
& vec
, unsigned gpr
, unsigned comp_mask
,
67 value
*v
= get_gpr_value(src
, gpr
, chan
, false);
68 v
->flags
|= (VLF_PIN_REG
| VLF_PIN_CHAN
);
70 v
->gpr
= v
->pin_gpr
= v
->select
;
73 if (v
->array
&& !v
->array
->gpr
) {
74 // if pinned value can be accessed with indirect addressing
75 // pin the entire array to its original location
76 v
->array
->gpr
= v
->array
->base_gpr
;
85 cf_node
* shader::create_clause(node_subtype nst
) {
86 cf_node
*n
= create_cf();
91 case NST_ALU_CLAUSE
: n
->bc
.set_op(CF_OP_ALU
); break;
92 case NST_TEX_CLAUSE
: n
->bc
.set_op(CF_OP_TEX
); break;
93 case NST_VTX_CLAUSE
: n
->bc
.set_op(CF_OP_VTX
); break;
94 case NST_GDS_CLAUSE
: n
->bc
.set_op(CF_OP_GDS
); break;
95 default: assert(!"invalid clause type"); break;
102 void shader::create_bbs() {
103 create_bbs(root
, bbs
);
106 void shader::expand_bbs() {
110 alu_node
* shader::create_mov(value
* dst
, value
* src
) {
111 alu_node
*n
= create_alu();
112 n
->bc
.set_op(ALU_OP1_MOV
);
113 n
->dst
.push_back(dst
);
114 n
->src
.push_back(src
);
120 alu_node
* shader::create_copy_mov(value
* dst
, value
* src
, unsigned affcost
) {
121 alu_node
*n
= create_mov(dst
, src
);
123 dst
->assign_source(src
);
124 n
->flags
|= NF_COPY_MOV
| NF_DONT_HOIST
;
126 if (affcost
&& dst
->is_sgpr() && src
->is_sgpr())
127 coal
.add_edge(src
, dst
, affcost
);
132 value
* shader::get_value(value_kind kind
, sel_chan id
,
134 if (version
== 0 && kind
== VLK_REG
&& id
.sel() < prep_regs_count
)
135 return val_pool
[id
- 1];
138 unsigned key
= (kind
<< 28) | (version
<< 16) | id
;
139 value_map::iterator i
= reg_values
.find(key
);
140 if (i
!= reg_values
.end()) {
143 value
*v
= create_value(kind
, id
, version
);
144 reg_values
.insert(std::make_pair(key
, v
));
148 value
* shader::get_special_value(unsigned sv_id
, unsigned version
) {
149 sel_chan
id(sv_id
, 0);
150 return get_value(VLK_SPECIAL_REG
, id
, version
);
153 void shader::fill_array_values(gpr_array
*a
, vvec
&vv
) {
154 unsigned sz
= a
->array_size
;
156 for (unsigned i
= 0; i
< a
->array_size
; ++i
) {
157 vv
[i
] = get_gpr_value(true, a
->base_gpr
.sel() + i
, a
->base_gpr
.chan(),
162 value
* shader::get_gpr_value(bool src
, unsigned reg
, unsigned chan
, bool rel
,
164 sel_chan
id(reg
, chan
);
166 gpr_array
*a
= get_gpr_array(reg
, chan
);
169 v
= create_value(VLK_REL_REG
, id
, 0);
170 v
->rel
= get_special_value(SV_AR_INDEX
);
171 fill_array_values(a
, v
->muse
);
173 fill_array_values(a
, v
->mdef
);
175 if (version
== 0 && reg
< prep_regs_count
)
176 return (val_pool
[id
- 1]);
178 v
= get_value(VLK_REG
, id
, version
);
182 v
->pin_gpr
= v
->select
;
187 value
* shader::create_temp_value() {
188 sel_chan
id(++next_temp_value_index
, 0);
189 return get_value(VLK_TEMP
, id
, 0);
192 value
* shader::get_kcache_value(unsigned bank
, unsigned index
, unsigned chan
, alu_kcache_index_mode index_mode
) {
193 return get_ro_value(kcache_values
, VLK_KCACHE
,
194 sel_chan(bank
, index
, chan
, index_mode
));
197 void shader::add_input(unsigned gpr
, bool preloaded
, unsigned comp_mask
) {
198 if (inputs
.size() <= gpr
)
199 inputs
.resize(gpr
+1);
201 shader_input
&i
= inputs
[gpr
];
202 i
.preloaded
= preloaded
;
203 i
.comp_mask
= comp_mask
;
206 add_pinned_gpr_values(root
->dst
, gpr
, comp_mask
, true);
211 void shader::init() {
213 root
= create_container();
216 void shader::init_call_fs(cf_node
* cf
) {
219 assert(target
== TARGET_LS
|| target
== TARGET_VS
|| target
== TARGET_ES
);
221 for(inputs_vec::const_iterator I
= inputs
.begin(),
222 E
= inputs
.end(); I
!= E
; ++I
, ++gpr
) {
224 add_pinned_gpr_values(cf
->dst
, gpr
, I
->comp_mask
, false);
226 add_pinned_gpr_values(cf
->src
, gpr
, I
->comp_mask
, true);
230 void shader::set_undef(val_set
& s
) {
231 value
*undefined
= get_undef_value();
232 if (!undefined
->gvn_source
)
233 vt
.add_value(undefined
);
237 for (val_set::iterator I
= vs
.begin(*this), E
= vs
.end(*this); I
!= E
; ++I
) {
240 assert(!v
->is_readonly() && !v
->is_rel());
242 v
->gvn_source
= undefined
->gvn_source
;
246 value
* shader::create_value(value_kind k
, sel_chan regid
, unsigned ver
) {
247 value
*v
= val_pool
.create(k
, regid
, ver
);
251 value
* shader::get_undef_value() {
253 undef
= create_value(VLK_UNDEF
, 0, 0);
257 node
* shader::create_node(node_type nt
, node_subtype nst
, node_flags flags
) {
258 node
*n
= new (pool
.allocate(sizeof(node
))) node(nt
, nst
, flags
);
259 all_nodes
.push_back(n
);
263 alu_node
* shader::create_alu() {
264 alu_node
* n
= new (pool
.allocate(sizeof(alu_node
))) alu_node();
265 all_nodes
.push_back(n
);
269 alu_group_node
* shader::create_alu_group() {
271 new (pool
.allocate(sizeof(alu_group_node
))) alu_group_node();
272 all_nodes
.push_back(n
);
276 alu_packed_node
* shader::create_alu_packed() {
278 new (pool
.allocate(sizeof(alu_packed_node
))) alu_packed_node();
279 all_nodes
.push_back(n
);
283 cf_node
* shader::create_cf() {
284 cf_node
* n
= new (pool
.allocate(sizeof(cf_node
))) cf_node();
286 all_nodes
.push_back(n
);
290 fetch_node
* shader::create_fetch() {
291 fetch_node
* n
= new (pool
.allocate(sizeof(fetch_node
))) fetch_node();
292 all_nodes
.push_back(n
);
296 region_node
* shader::create_region() {
297 region_node
*n
= new (pool
.allocate(sizeof(region_node
)))
298 region_node(regions
.size());
299 regions
.push_back(n
);
300 all_nodes
.push_back(n
);
304 depart_node
* shader::create_depart(region_node
* target
) {
305 depart_node
* n
= new (pool
.allocate(sizeof(depart_node
)))
306 depart_node(target
, target
->departs
.size());
307 target
->departs
.push_back(n
);
308 all_nodes
.push_back(n
);
312 repeat_node
* shader::create_repeat(region_node
* target
) {
313 repeat_node
* n
= new (pool
.allocate(sizeof(repeat_node
)))
314 repeat_node(target
, target
->repeats
.size() + 1);
315 target
->repeats
.push_back(n
);
316 all_nodes
.push_back(n
);
320 container_node
* shader::create_container(node_type nt
, node_subtype nst
,
322 container_node
*n
= new (pool
.allocate(sizeof(container_node
)))
323 container_node(nt
, nst
, flags
);
324 all_nodes
.push_back(n
);
328 if_node
* shader::create_if() {
329 if_node
* n
= new (pool
.allocate(sizeof(if_node
))) if_node();
330 all_nodes
.push_back(n
);
334 bb_node
* shader::create_bb(unsigned id
, unsigned loop_level
) {
335 bb_node
* n
= new (pool
.allocate(sizeof(bb_node
))) bb_node(id
, loop_level
);
336 all_nodes
.push_back(n
);
340 value
* shader::get_special_ro_value(unsigned sel
) {
341 return get_ro_value(special_ro_values
, VLK_PARAM
, sel
);
344 value
* shader::get_const_value(const literal
&v
) {
345 value
*val
= get_ro_value(const_values
, VLK_CONST
, v
);
346 val
->literal_value
= v
;
351 for (node_vec::iterator I
= all_nodes
.begin(), E
= all_nodes
.end();
355 for (gpr_array_vec::iterator I
= gpr_arrays
.begin(), E
= gpr_arrays
.end();
361 void shader::dump_ir() {
366 value
* shader::get_value_version(value
* v
, unsigned ver
) {
367 assert(!v
->is_readonly() && !v
->is_rel());
368 value
*vv
= get_value(v
->kind
, v
->select
, ver
);
372 vv
->array
= v
->array
;
378 gpr_array
* shader::get_gpr_array(unsigned reg
, unsigned chan
) {
380 for (regarray_vec::iterator I
= gpr_arrays
.begin(),
381 E
= gpr_arrays
.end(); I
!= E
; ++I
) {
383 unsigned achan
= a
->base_gpr
.chan();
384 unsigned areg
= a
->base_gpr
.sel();
385 if (achan
== chan
&& (reg
>= areg
&& reg
< areg
+a
->array_size
))
391 void shader::add_gpr_array(unsigned gpr_start
, unsigned gpr_count
,
392 unsigned comp_mask
) {
396 gpr_array
*a
= new gpr_array(
397 sel_chan(gpr_start
, chan
), gpr_count
);
399 SB_DUMP_PASS( sblog
<< "add_gpr_array: @" << a
->base_gpr
400 << " [" << a
->array_size
<< "]\n";
403 gpr_arrays
.push_back(a
);
410 value
* shader::get_pred_sel(int sel
) {
411 assert(sel
== 0 || sel
== 1);
413 pred_sels
[sel
] = get_const_value(sel
);
415 return pred_sels
[sel
];
418 cf_node
* shader::create_cf(unsigned op
) {
419 cf_node
*c
= create_cf();
425 std::string
shader::get_full_target_name() {
426 std::string s
= get_shader_target_name();
428 s
+= ctx
.get_hw_chip_name();
430 s
+= ctx
.get_hw_class_name();
434 const char* shader::get_shader_target_name() {
436 case TARGET_VS
: return "VS";
437 case TARGET_ES
: return "ES";
438 case TARGET_PS
: return "PS";
439 case TARGET_GS
: return "GS";
440 case TARGET_HS
: return "HS";
441 case TARGET_LS
: return "LS";
442 case TARGET_COMPUTE
: return "COMPUTE";
443 case TARGET_FETCH
: return "FETCH";
445 return "INVALID_TARGET";
449 void shader::simplify_dep_rep(node
* dr
) {
450 container_node
*p
= dr
->parent
;
451 if (p
->is_repeat()) {
452 repeat_node
*r
= static_cast<repeat_node
*>(p
);
453 r
->target
->expand_repeat(r
);
454 } else if (p
->is_depart()) {
455 depart_node
*d
= static_cast<depart_node
*>(p
);
456 d
->target
->expand_depart(d
);
459 dr
->parent
->cut(dr
->next
, NULL
);
463 // FIXME this is used in some places as the max non-temp gpr,
464 // (MAX_GPR - 2 * ctx.alu_temp_gprs) should be used for that instead.
465 unsigned shader::first_temp_gpr() {
466 return MAX_GPR
- ctx
.alu_temp_gprs
;
469 unsigned shader::num_nontemp_gpr() {
470 return MAX_GPR
- 2 * ctx
.alu_temp_gprs
;
473 void shader::set_uses_kill() {
474 if (root
->src
.empty())
478 root
->src
[0] = get_special_value(SV_VALID_MASK
);
481 alu_node
* shader::clone(alu_node
* n
) {
482 alu_node
*c
= create_alu();
484 // FIXME: this may be wrong with indirect operands
494 void shader::collect_stats(bool opt
) {
495 if (!sb_context::dump_stat
)
498 shader_stats
&s
= opt
? opt_stats
: src_stats
;
506 ctx
.opt_stats
.accumulate(s
);
508 ctx
.src_stats
.accumulate(s
);
511 value
* shader::get_ro_value(value_map
& vm
, value_kind vk
, unsigned key
) {
512 value_map::iterator I
= vm
.find(key
);
515 value
*v
= create_value(vk
, key
, 0);
516 v
->flags
= VLF_READONLY
;
517 vm
.insert(std::make_pair(key
, v
));
521 void shader::create_bbs(container_node
* n
, bbs_vec
&bbs
, int loop_level
) {
523 bool inside_bb
= false;
524 bool last_inside_bb
= true;
525 node_iterator
bb_start(n
->begin()), I(bb_start
), E(n
->end());
527 for (; I
!= E
; ++I
) {
529 inside_bb
= k
->type
== NT_OP
;
531 if (inside_bb
&& !last_inside_bb
)
533 else if (!inside_bb
) {
535 && I
->type
!= NT_REPEAT
536 && I
->type
!= NT_DEPART
537 && I
->type
!= NT_IF
) {
538 bb_node
*bb
= create_bb(bbs
.size(), loop_level
);
540 n
->insert_node_before(*bb_start
, bb
);
542 bb
->move(bb_start
, I
);
545 if (k
->is_container()) {
548 if (k
->type
== NT_REGION
) {
549 loop
= static_cast<region_node
*>(k
)->is_loop();
552 create_bbs(static_cast<container_node
*>(k
), bbs
,
557 if (k
->type
== NT_DEPART
)
560 last_inside_bb
= inside_bb
;
563 if (last_inside_bb
) {
564 bb_node
*bb
= create_bb(bbs
.size(), loop_level
);
569 n
->insert_node_before(*bb_start
, bb
);
570 if (bb_start
!= n
->end())
571 bb
->move(bb_start
, n
->end());
574 if (n
->last
&& n
->last
->type
== NT_IF
) {
575 bb_node
*bb
= create_bb(bbs
.size(), loop_level
);
582 void shader::expand_bbs(bbs_vec
&bbs
) {
584 for (bbs_vec::iterator I
= bbs
.begin(), E
= bbs
.end(); I
!= E
; ++I
) {
590 sched_queue_id
shader::get_queue_id(node
* n
) {
591 switch (n
->subtype
) {
593 case NST_ALU_PACKED_INST
:
597 case NST_FETCH_INST
: {
598 fetch_node
*f
= static_cast<fetch_node
*>(n
);
599 if (ctx
.is_r600() && (f
->bc
.op_ptr
->flags
& FF_VTX
))
601 if (f
->bc
.op_ptr
->flags
& FF_GDS
)
613 void shader_stats::collect(node
*n
) {
614 if (n
->is_alu_inst())
616 else if (n
->is_fetch_inst())
618 else if (n
->is_container()) {
619 container_node
*c
= static_cast<container_node
*>(n
);
621 if (n
->is_alu_group())
623 else if (n
->is_alu_clause())
625 else if (n
->is_fetch_clause())
627 else if (n
->is_cf_inst())
631 for (node_iterator I
= c
->begin(), E
= c
->end(); I
!= E
; ++I
) {
638 void shader_stats::accumulate(shader_stats
& s
) {
645 alu_groups
+= s
.alu_groups
;
646 alu_clauses
+= s
.alu_clauses
;
648 fetch_clauses
+= s
.fetch_clauses
;
652 void shader_stats::dump() {
653 sblog
<< "dw:" << ndw
<< ", gpr:" << ngpr
<< ", stk:" << nstack
654 << ", alu groups:" << alu_groups
<< ", alu clauses: " << alu_clauses
655 << ", alu:" << alu
<< ", fetch:" << fetch
656 << ", fetch clauses:" << fetch_clauses
660 sblog
<< ", shaders:" << shaders
;
665 static void print_diff(unsigned d1
, unsigned d2
) {
667 sblog
<< ((int)d2
- (int)d1
) * 100 / (int)d1
<< "%";
674 void shader_stats::dump_diff(shader_stats
& s
) {
675 sblog
<< "dw:"; print_diff(ndw
, s
.ndw
);
676 sblog
<< ", gpr:" ; print_diff(ngpr
, s
.ngpr
);
677 sblog
<< ", stk:" ; print_diff(nstack
, s
.nstack
);
678 sblog
<< ", alu groups:" ; print_diff(alu_groups
, s
.alu_groups
);
679 sblog
<< ", alu clauses: " ; print_diff(alu_clauses
, s
.alu_clauses
);
680 sblog
<< ", alu:" ; print_diff(alu
, s
.alu
);
681 sblog
<< ", fetch:" ; print_diff(fetch
, s
.fetch
);
682 sblog
<< ", fetch clauses:" ; print_diff(fetch_clauses
, s
.fetch_clauses
);
683 sblog
<< ", cf:" ; print_diff(cf
, s
.cf
);
687 } // namespace r600_sb