2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
28 #include "sb_shader.h"
33 shader::shader(sb_context
&sctx
, shader_target t
, unsigned id
)
34 : ctx(sctx
), next_temp_value_index(temp_regid_offset
),
35 prep_regs_count(), pred_sels(),
36 regions(), inputs(), undef(), val_pool(sizeof(value
)),
37 pool(), all_nodes(), src_stats(), opt_stats(), errors(),
40 target(t
), vt(ex
), ex(*this), root(),
41 compute_interferences(),
42 has_alu_predication(),
43 uses_gradients(), safe_math(), ngpr(), nstack(), dce_flags() {}
45 bool shader::assign_slot(alu_node
* n
, alu_node
*slots
[5]) {
47 unsigned slot_flags
= ctx
.alu_slots(n
->bc
.op
);
48 unsigned slot
= n
->bc
.dst_chan
;
50 if (!ctx
.is_cayman() && (!(slot_flags
& AF_V
) || slots
[slot
]) &&
62 void shader::add_pinned_gpr_values(vvec
& vec
, unsigned gpr
, unsigned comp_mask
,
67 value
*v
= get_gpr_value(src
, gpr
, chan
, false);
68 v
->flags
|= (VLF_PIN_REG
| VLF_PIN_CHAN
);
70 v
->gpr
= v
->pin_gpr
= v
->select
;
73 if (v
->array
&& !v
->array
->gpr
) {
74 // if pinned value can be accessed with indirect addressing
75 // pin the entire array to its original location
76 v
->array
->gpr
= v
->array
->base_gpr
;
85 cf_node
* shader::create_clause(node_subtype nst
) {
86 cf_node
*n
= create_cf();
91 case NST_ALU_CLAUSE
: n
->bc
.set_op(CF_OP_ALU
); break;
92 case NST_TEX_CLAUSE
: n
->bc
.set_op(CF_OP_TEX
); break;
93 case NST_VTX_CLAUSE
: n
->bc
.set_op(CF_OP_VTX
); break;
94 default: assert(!"invalid clause type"); break;
101 void shader::create_bbs() {
102 create_bbs(root
, bbs
);
105 void shader::expand_bbs() {
109 alu_node
* shader::create_mov(value
* dst
, value
* src
) {
110 alu_node
*n
= create_alu();
111 n
->bc
.set_op(ALU_OP1_MOV
);
112 n
->dst
.push_back(dst
);
113 n
->src
.push_back(src
);
119 alu_node
* shader::create_copy_mov(value
* dst
, value
* src
, unsigned affcost
) {
120 alu_node
*n
= create_mov(dst
, src
);
122 dst
->assign_source(src
);
123 n
->flags
|= NF_COPY_MOV
| NF_DONT_HOIST
;
125 if (affcost
&& dst
->is_sgpr() && src
->is_sgpr())
126 coal
.add_edge(src
, dst
, affcost
);
131 value
* shader::get_value(value_kind kind
, sel_chan id
,
133 if (version
== 0 && kind
== VLK_REG
&& id
.sel() < prep_regs_count
)
134 return val_pool
[id
- 1];
137 unsigned key
= (kind
<< 28) | (version
<< 16) | id
;
138 value_map::iterator i
= reg_values
.find(key
);
139 if (i
!= reg_values
.end()) {
142 value
*v
= create_value(kind
, id
, version
);
143 reg_values
.insert(std::make_pair(key
, v
));
147 value
* shader::get_special_value(unsigned sv_id
, unsigned version
) {
148 sel_chan
id(sv_id
, 0);
149 return get_value(VLK_SPECIAL_REG
, id
, version
);
152 void shader::fill_array_values(gpr_array
*a
, vvec
&vv
) {
153 unsigned sz
= a
->array_size
;
155 for (unsigned i
= 0; i
< a
->array_size
; ++i
) {
156 vv
[i
] = get_gpr_value(true, a
->base_gpr
.sel() + i
, a
->base_gpr
.chan(),
161 value
* shader::get_gpr_value(bool src
, unsigned reg
, unsigned chan
, bool rel
,
163 sel_chan
id(reg
, chan
);
165 gpr_array
*a
= get_gpr_array(reg
, chan
);
168 v
= create_value(VLK_REL_REG
, id
, 0);
169 v
->rel
= get_special_value(SV_AR_INDEX
);
170 fill_array_values(a
, v
->muse
);
172 fill_array_values(a
, v
->mdef
);
174 if (version
== 0 && reg
< prep_regs_count
)
175 return (val_pool
[id
- 1]);
177 v
= get_value(VLK_REG
, id
, version
);
181 v
->pin_gpr
= v
->select
;
186 value
* shader::create_temp_value() {
187 sel_chan
id(++next_temp_value_index
, 0);
188 return get_value(VLK_TEMP
, id
, 0);
191 value
* shader::get_kcache_value(unsigned bank
, unsigned index
, unsigned chan
) {
192 return get_ro_value(kcache_values
, VLK_KCACHE
,
193 sel_chan((bank
<< 12) | index
, chan
));
196 void shader::add_input(unsigned gpr
, bool preloaded
, unsigned comp_mask
) {
197 if (inputs
.size() <= gpr
)
198 inputs
.resize(gpr
+1);
200 shader_input
&i
= inputs
[gpr
];
201 i
.preloaded
= preloaded
;
202 i
.comp_mask
= comp_mask
;
205 add_pinned_gpr_values(root
->dst
, gpr
, comp_mask
, true);
210 void shader::init() {
212 root
= create_container();
215 void shader::init_call_fs(cf_node
* cf
) {
218 assert(target
== TARGET_VS
|| target
== TARGET_ES
);
220 for(inputs_vec::const_iterator I
= inputs
.begin(),
221 E
= inputs
.end(); I
!= E
; ++I
, ++gpr
) {
223 add_pinned_gpr_values(cf
->dst
, gpr
, I
->comp_mask
, false);
225 add_pinned_gpr_values(cf
->src
, gpr
, I
->comp_mask
, true);
229 void shader::set_undef(val_set
& s
) {
230 value
*undefined
= get_undef_value();
231 if (!undefined
->gvn_source
)
232 vt
.add_value(undefined
);
236 for (val_set::iterator I
= vs
.begin(*this), E
= vs
.end(*this); I
!= E
; ++I
) {
239 assert(!v
->is_readonly() && !v
->is_rel());
241 v
->gvn_source
= undefined
->gvn_source
;
245 value
* shader::create_value(value_kind k
, sel_chan regid
, unsigned ver
) {
246 value
*v
= val_pool
.create(k
, regid
, ver
);
250 value
* shader::get_undef_value() {
252 undef
= create_value(VLK_UNDEF
, 0, 0);
256 node
* shader::create_node(node_type nt
, node_subtype nst
, node_flags flags
) {
257 node
*n
= new (pool
.allocate(sizeof(node
))) node(nt
, nst
, flags
);
258 all_nodes
.push_back(n
);
262 alu_node
* shader::create_alu() {
263 alu_node
* n
= new (pool
.allocate(sizeof(alu_node
))) alu_node();
264 all_nodes
.push_back(n
);
268 alu_group_node
* shader::create_alu_group() {
270 new (pool
.allocate(sizeof(alu_group_node
))) alu_group_node();
271 all_nodes
.push_back(n
);
275 alu_packed_node
* shader::create_alu_packed() {
277 new (pool
.allocate(sizeof(alu_packed_node
))) alu_packed_node();
278 all_nodes
.push_back(n
);
282 cf_node
* shader::create_cf() {
283 cf_node
* n
= new (pool
.allocate(sizeof(cf_node
))) cf_node();
285 all_nodes
.push_back(n
);
289 fetch_node
* shader::create_fetch() {
290 fetch_node
* n
= new (pool
.allocate(sizeof(fetch_node
))) fetch_node();
291 all_nodes
.push_back(n
);
295 region_node
* shader::create_region() {
296 region_node
*n
= new (pool
.allocate(sizeof(region_node
)))
297 region_node(regions
.size());
298 regions
.push_back(n
);
299 all_nodes
.push_back(n
);
303 depart_node
* shader::create_depart(region_node
* target
) {
304 depart_node
* n
= new (pool
.allocate(sizeof(depart_node
)))
305 depart_node(target
, target
->departs
.size());
306 target
->departs
.push_back(n
);
307 all_nodes
.push_back(n
);
311 repeat_node
* shader::create_repeat(region_node
* target
) {
312 repeat_node
* n
= new (pool
.allocate(sizeof(repeat_node
)))
313 repeat_node(target
, target
->repeats
.size() + 1);
314 target
->repeats
.push_back(n
);
315 all_nodes
.push_back(n
);
319 container_node
* shader::create_container(node_type nt
, node_subtype nst
,
321 container_node
*n
= new (pool
.allocate(sizeof(container_node
)))
322 container_node(nt
, nst
, flags
);
323 all_nodes
.push_back(n
);
327 if_node
* shader::create_if() {
328 if_node
* n
= new (pool
.allocate(sizeof(if_node
))) if_node();
329 all_nodes
.push_back(n
);
333 bb_node
* shader::create_bb(unsigned id
, unsigned loop_level
) {
334 bb_node
* n
= new (pool
.allocate(sizeof(bb_node
))) bb_node(id
, loop_level
);
335 all_nodes
.push_back(n
);
339 value
* shader::get_special_ro_value(unsigned sel
) {
340 return get_ro_value(special_ro_values
, VLK_PARAM
, sel
);
343 value
* shader::get_const_value(const literal
&v
) {
344 value
*val
= get_ro_value(const_values
, VLK_CONST
, v
);
345 val
->literal_value
= v
;
350 for (node_vec::iterator I
= all_nodes
.begin(), E
= all_nodes
.end();
354 for (gpr_array_vec::iterator I
= gpr_arrays
.begin(), E
= gpr_arrays
.end();
360 void shader::dump_ir() {
365 value
* shader::get_value_version(value
* v
, unsigned ver
) {
366 assert(!v
->is_readonly() && !v
->is_rel());
367 value
*vv
= get_value(v
->kind
, v
->select
, ver
);
371 vv
->array
= v
->array
;
377 gpr_array
* shader::get_gpr_array(unsigned reg
, unsigned chan
) {
379 for (regarray_vec::iterator I
= gpr_arrays
.begin(),
380 E
= gpr_arrays
.end(); I
!= E
; ++I
) {
382 unsigned achan
= a
->base_gpr
.chan();
383 unsigned areg
= a
->base_gpr
.sel();
384 if (achan
== chan
&& (reg
>= areg
&& reg
< areg
+a
->array_size
))
390 void shader::add_gpr_array(unsigned gpr_start
, unsigned gpr_count
,
391 unsigned comp_mask
) {
395 gpr_array
*a
= new gpr_array(
396 sel_chan(gpr_start
, chan
), gpr_count
);
398 SB_DUMP_PASS( sblog
<< "add_gpr_array: @" << a
->base_gpr
399 << " [" << a
->array_size
<< "]\n";
402 gpr_arrays
.push_back(a
);
409 value
* shader::get_pred_sel(int sel
) {
410 assert(sel
== 0 || sel
== 1);
412 pred_sels
[sel
] = get_const_value(sel
);
414 return pred_sels
[sel
];
417 cf_node
* shader::create_cf(unsigned op
) {
418 cf_node
*c
= create_cf();
424 std::string
shader::get_full_target_name() {
425 std::string s
= get_shader_target_name();
427 s
+= ctx
.get_hw_chip_name();
429 s
+= ctx
.get_hw_class_name();
433 const char* shader::get_shader_target_name() {
435 case TARGET_VS
: return "VS";
436 case TARGET_ES
: return "ES";
437 case TARGET_PS
: return "PS";
438 case TARGET_GS
: return "GS";
439 case TARGET_COMPUTE
: return "COMPUTE";
440 case TARGET_FETCH
: return "FETCH";
442 return "INVALID_TARGET";
446 void shader::simplify_dep_rep(node
* dr
) {
447 container_node
*p
= dr
->parent
;
448 if (p
->is_repeat()) {
449 repeat_node
*r
= static_cast<repeat_node
*>(p
);
450 r
->target
->expand_repeat(r
);
451 } else if (p
->is_depart()) {
452 depart_node
*d
= static_cast<depart_node
*>(p
);
453 d
->target
->expand_depart(d
);
456 dr
->parent
->cut(dr
->next
, NULL
);
460 // FIXME this is used in some places as the max non-temp gpr,
461 // (MAX_GPR - 2 * ctx.alu_temp_gprs) should be used for that instead.
462 unsigned shader::first_temp_gpr() {
463 return MAX_GPR
- ctx
.alu_temp_gprs
;
466 unsigned shader::num_nontemp_gpr() {
467 return MAX_GPR
- 2 * ctx
.alu_temp_gprs
;
470 void shader::set_uses_kill() {
471 if (root
->src
.empty())
475 root
->src
[0] = get_special_value(SV_VALID_MASK
);
478 alu_node
* shader::clone(alu_node
* n
) {
479 alu_node
*c
= create_alu();
481 // FIXME: this may be wrong with indirect operands
491 void shader::collect_stats(bool opt
) {
492 if (!sb_context::dump_stat
)
495 shader_stats
&s
= opt
? opt_stats
: src_stats
;
503 ctx
.opt_stats
.accumulate(s
);
505 ctx
.src_stats
.accumulate(s
);
508 value
* shader::get_ro_value(value_map
& vm
, value_kind vk
, unsigned key
) {
509 value_map::iterator I
= vm
.find(key
);
512 value
*v
= create_value(vk
, key
, 0);
513 v
->flags
= VLF_READONLY
;
514 vm
.insert(std::make_pair(key
, v
));
518 void shader::create_bbs(container_node
* n
, bbs_vec
&bbs
, int loop_level
) {
520 bool inside_bb
= false;
521 bool last_inside_bb
= true;
522 node_iterator
bb_start(n
->begin()), I(bb_start
), E(n
->end());
524 for (; I
!= E
; ++I
) {
526 inside_bb
= k
->type
== NT_OP
;
528 if (inside_bb
&& !last_inside_bb
)
530 else if (!inside_bb
) {
532 && I
->type
!= NT_REPEAT
533 && I
->type
!= NT_DEPART
534 && I
->type
!= NT_IF
) {
535 bb_node
*bb
= create_bb(bbs
.size(), loop_level
);
537 n
->insert_node_before(*bb_start
, bb
);
539 bb
->move(bb_start
, I
);
542 if (k
->is_container()) {
545 if (k
->type
== NT_REGION
) {
546 loop
= static_cast<region_node
*>(k
)->is_loop();
549 create_bbs(static_cast<container_node
*>(k
), bbs
,
554 if (k
->type
== NT_DEPART
)
557 last_inside_bb
= inside_bb
;
560 if (last_inside_bb
) {
561 bb_node
*bb
= create_bb(bbs
.size(), loop_level
);
566 n
->insert_node_before(*bb_start
, bb
);
567 if (bb_start
!= n
->end())
568 bb
->move(bb_start
, n
->end());
571 if (n
->last
&& n
->last
->type
== NT_IF
) {
572 bb_node
*bb
= create_bb(bbs
.size(), loop_level
);
579 void shader::expand_bbs(bbs_vec
&bbs
) {
581 for (bbs_vec::iterator I
= bbs
.begin(), E
= bbs
.end(); I
!= E
; ++I
) {
587 sched_queue_id
shader::get_queue_id(node
* n
) {
588 switch (n
->subtype
) {
590 case NST_ALU_PACKED_INST
:
594 case NST_FETCH_INST
: {
595 fetch_node
*f
= static_cast<fetch_node
*>(n
);
596 if (ctx
.is_r600() && (f
->bc
.op_ptr
->flags
& FF_VTX
))
608 void shader_stats::collect(node
*n
) {
609 if (n
->is_alu_inst())
611 else if (n
->is_fetch_inst())
613 else if (n
->is_container()) {
614 container_node
*c
= static_cast<container_node
*>(n
);
616 if (n
->is_alu_group())
618 else if (n
->is_alu_clause())
620 else if (n
->is_fetch_clause())
622 else if (n
->is_cf_inst())
626 for (node_iterator I
= c
->begin(), E
= c
->end(); I
!= E
; ++I
) {
633 void shader_stats::accumulate(shader_stats
& s
) {
640 alu_groups
+= s
.alu_groups
;
641 alu_clauses
+= s
.alu_clauses
;
643 fetch_clauses
+= s
.fetch_clauses
;
647 void shader_stats::dump() {
648 sblog
<< "dw:" << ndw
<< ", gpr:" << ngpr
<< ", stk:" << nstack
649 << ", alu groups:" << alu_groups
<< ", alu clauses: " << alu_clauses
650 << ", alu:" << alu
<< ", fetch:" << fetch
651 << ", fetch clauses:" << fetch_clauses
655 sblog
<< ", shaders:" << shaders
;
660 static void print_diff(unsigned d1
, unsigned d2
) {
662 sblog
<< ((int)d2
- (int)d1
) * 100 / (int)d1
<< "%";
669 void shader_stats::dump_diff(shader_stats
& s
) {
670 sblog
<< "dw:"; print_diff(ndw
, s
.ndw
);
671 sblog
<< ", gpr:" ; print_diff(ngpr
, s
.ngpr
);
672 sblog
<< ", stk:" ; print_diff(nstack
, s
.nstack
);
673 sblog
<< ", alu groups:" ; print_diff(alu_groups
, s
.alu_groups
);
674 sblog
<< ", alu clauses: " ; print_diff(alu_clauses
, s
.alu_clauses
);
675 sblog
<< ", alu:" ; print_diff(alu
, s
.alu
);
676 sblog
<< ", fetch:" ; print_diff(fetch
, s
.fetch
);
677 sblog
<< ", fetch clauses:" ; print_diff(fetch_clauses
, s
.fetch_clauses
);
678 sblog
<< ", cf:" ; print_diff(cf
, s
.cf
);
682 } // namespace r600_sb