2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
28 #include "sb_shader.h"
33 shader::shader(sb_context
&sctx
, shader_target t
, unsigned id
)
34 : ctx(sctx
), next_temp_value_index(temp_regid_offset
),
35 prep_regs_count(), pred_sels(),
36 regions(), inputs(), undef(), val_pool(sizeof(value
)),
37 pool(), all_nodes(), src_stats(), opt_stats(), errors(),
40 target(t
), vt(ex
), ex(*this), root(),
41 compute_interferences(),
42 has_alu_predication(), uses_gradients(), safe_math(), ngpr(), nstack() {}
44 bool shader::assign_slot(alu_node
* n
, alu_node
*slots
[5]) {
46 unsigned slot_flags
= ctx
.alu_slots(n
->bc
.op
);
47 unsigned slot
= n
->bc
.dst_chan
;
49 if (!ctx
.is_cayman() && (!(slot_flags
& AF_V
) || slots
[slot
]) &&
61 void shader::add_pinned_gpr_values(vvec
& vec
, unsigned gpr
, unsigned comp_mask
,
66 value
*v
= get_gpr_value(src
, gpr
, chan
, false);
67 v
->flags
|= (VLF_PIN_REG
| VLF_PIN_CHAN
);
69 v
->gpr
= v
->pin_gpr
= v
->select
;
72 if (v
->array
&& !v
->array
->gpr
) {
73 // if pinned value can be accessed with indirect addressing
74 // pin the entire array to its original location
75 v
->array
->gpr
= v
->array
->base_gpr
;
84 cf_node
* shader::create_clause(node_subtype nst
) {
85 cf_node
*n
= create_cf();
90 case NST_ALU_CLAUSE
: n
->bc
.set_op(CF_OP_ALU
); break;
91 case NST_TEX_CLAUSE
: n
->bc
.set_op(CF_OP_TEX
); break;
92 case NST_VTX_CLAUSE
: n
->bc
.set_op(CF_OP_VTX
); break;
93 default: assert(!"invalid clause type"); break;
100 void shader::create_bbs() {
101 create_bbs(root
, bbs
);
104 void shader::expand_bbs() {
108 alu_node
* shader::create_mov(value
* dst
, value
* src
) {
109 alu_node
*n
= create_alu();
110 n
->bc
.set_op(ALU_OP1_MOV
);
111 n
->dst
.push_back(dst
);
112 n
->src
.push_back(src
);
118 alu_node
* shader::create_copy_mov(value
* dst
, value
* src
, unsigned affcost
) {
119 alu_node
*n
= create_mov(dst
, src
);
121 dst
->assign_source(src
);
122 n
->flags
|= NF_COPY_MOV
| NF_DONT_HOIST
;
124 if (affcost
&& dst
->is_sgpr() && src
->is_sgpr())
125 coal
.add_edge(src
, dst
, affcost
);
130 value
* shader::get_value(value_kind kind
, sel_chan id
,
132 if (version
== 0 && kind
== VLK_REG
&& id
.sel() < prep_regs_count
)
133 return val_pool
[id
- 1];
136 unsigned key
= (kind
<< 28) | (version
<< 16) | id
;
137 value_map::iterator i
= reg_values
.find(key
);
138 if (i
!= reg_values
.end()) {
141 value
*v
= create_value(kind
, id
, version
);
142 reg_values
.insert(std::make_pair(key
, v
));
146 value
* shader::get_special_value(unsigned sv_id
, unsigned version
) {
147 sel_chan
id(sv_id
, 0);
148 return get_value(VLK_SPECIAL_REG
, id
, version
);
151 void shader::fill_array_values(gpr_array
*a
, vvec
&vv
) {
152 unsigned sz
= a
->array_size
;
154 for (unsigned i
= 0; i
< a
->array_size
; ++i
) {
155 vv
[i
] = get_gpr_value(true, a
->base_gpr
.sel() + i
, a
->base_gpr
.chan(),
160 value
* shader::get_gpr_value(bool src
, unsigned reg
, unsigned chan
, bool rel
,
162 sel_chan
id(reg
, chan
);
164 gpr_array
*a
= get_gpr_array(reg
, chan
);
167 v
= create_value(VLK_REL_REG
, id
, 0);
168 v
->rel
= get_special_value(SV_AR_INDEX
);
169 fill_array_values(a
, v
->muse
);
171 fill_array_values(a
, v
->mdef
);
173 if (version
== 0 && reg
< prep_regs_count
)
174 return (val_pool
[id
- 1]);
176 v
= get_value(VLK_REG
, id
, version
);
180 v
->pin_gpr
= v
->select
;
185 value
* shader::create_temp_value() {
186 sel_chan
id(++next_temp_value_index
, 0);
187 return get_value(VLK_TEMP
, id
, 0);
190 value
* shader::get_kcache_value(unsigned bank
, unsigned index
, unsigned chan
) {
191 return get_ro_value(kcache_values
, VLK_KCACHE
,
192 sel_chan((bank
<< 12) | index
, chan
));
195 void shader::add_input(unsigned gpr
, bool preloaded
, unsigned comp_mask
) {
196 if (inputs
.size() <= gpr
)
197 inputs
.resize(gpr
+1);
199 shader_input
&i
= inputs
[gpr
];
200 i
.preloaded
= preloaded
;
201 i
.comp_mask
= comp_mask
;
204 add_pinned_gpr_values(root
->dst
, gpr
, comp_mask
, true);
209 void shader::init() {
211 root
= create_container();
214 void shader::init_call_fs(cf_node
* cf
) {
217 assert(target
== TARGET_VS
);
219 for(inputs_vec::const_iterator I
= inputs
.begin(),
220 E
= inputs
.end(); I
!= E
; ++I
, ++gpr
) {
222 add_pinned_gpr_values(cf
->dst
, gpr
, I
->comp_mask
, false);
224 add_pinned_gpr_values(cf
->src
, gpr
, I
->comp_mask
, true);
228 void shader::set_undef(val_set
& s
) {
229 value
*undefined
= get_undef_value();
230 if (!undefined
->gvn_source
)
231 vt
.add_value(undefined
);
235 for (val_set::iterator I
= vs
.begin(*this), E
= vs
.end(*this); I
!= E
; ++I
) {
238 assert(!v
->is_readonly() && !v
->is_rel());
240 v
->gvn_source
= undefined
->gvn_source
;
244 value
* shader::create_value(value_kind k
, sel_chan regid
, unsigned ver
) {
245 value
*v
= val_pool
.create(k
, regid
, ver
);
249 value
* shader::get_undef_value() {
251 undef
= create_value(VLK_UNDEF
, 0, 0);
255 node
* shader::create_node(node_type nt
, node_subtype nst
, node_flags flags
) {
256 node
*n
= new (pool
.allocate(sizeof(node
))) node(nt
, nst
, flags
);
257 all_nodes
.push_back(n
);
261 alu_node
* shader::create_alu() {
262 alu_node
* n
= new (pool
.allocate(sizeof(alu_node
))) alu_node();
263 memset(&n
->bc
, 0, sizeof(bc_alu
));
264 all_nodes
.push_back(n
);
268 alu_group_node
* shader::create_alu_group() {
270 new (pool
.allocate(sizeof(alu_group_node
))) alu_group_node();
271 all_nodes
.push_back(n
);
275 alu_packed_node
* shader::create_alu_packed() {
277 new (pool
.allocate(sizeof(alu_packed_node
))) alu_packed_node();
278 all_nodes
.push_back(n
);
282 cf_node
* shader::create_cf() {
283 cf_node
* n
= new (pool
.allocate(sizeof(cf_node
))) cf_node();
284 memset(&n
->bc
, 0, sizeof(bc_cf
));
286 all_nodes
.push_back(n
);
290 fetch_node
* shader::create_fetch() {
291 fetch_node
* n
= new (pool
.allocate(sizeof(fetch_node
))) fetch_node();
292 memset(&n
->bc
, 0, sizeof(bc_fetch
));
293 all_nodes
.push_back(n
);
297 region_node
* shader::create_region() {
298 region_node
*n
= new (pool
.allocate(sizeof(region_node
)))
299 region_node(regions
.size());
300 regions
.push_back(n
);
301 all_nodes
.push_back(n
);
305 depart_node
* shader::create_depart(region_node
* target
) {
306 depart_node
* n
= new (pool
.allocate(sizeof(depart_node
)))
307 depart_node(target
, target
->departs
.size());
308 target
->departs
.push_back(n
);
309 all_nodes
.push_back(n
);
313 repeat_node
* shader::create_repeat(region_node
* target
) {
314 repeat_node
* n
= new (pool
.allocate(sizeof(repeat_node
)))
315 repeat_node(target
, target
->repeats
.size() + 1);
316 target
->repeats
.push_back(n
);
317 all_nodes
.push_back(n
);
321 container_node
* shader::create_container(node_type nt
, node_subtype nst
,
323 container_node
*n
= new (pool
.allocate(sizeof(container_node
)))
324 container_node(nt
, nst
, flags
);
325 all_nodes
.push_back(n
);
329 if_node
* shader::create_if() {
330 if_node
* n
= new (pool
.allocate(sizeof(if_node
))) if_node();
331 all_nodes
.push_back(n
);
335 bb_node
* shader::create_bb(unsigned id
, unsigned loop_level
) {
336 bb_node
* n
= new (pool
.allocate(sizeof(bb_node
))) bb_node(id
, loop_level
);
337 all_nodes
.push_back(n
);
341 value
* shader::get_special_ro_value(unsigned sel
) {
342 return get_ro_value(special_ro_values
, VLK_PARAM
, sel
);
345 value
* shader::get_const_value(const literal
&v
) {
346 value
*val
= get_ro_value(const_values
, VLK_CONST
, v
);
347 val
->literal_value
= v
;
352 for (node_vec::iterator I
= all_nodes
.begin(), E
= all_nodes
.end();
356 for (gpr_array_vec::iterator I
= gpr_arrays
.begin(), E
= gpr_arrays
.end();
362 void shader::dump_ir() {
367 value
* shader::get_value_version(value
* v
, unsigned ver
) {
368 assert(!v
->is_readonly() && !v
->is_rel());
369 value
*vv
= get_value(v
->kind
, v
->select
, ver
);
373 vv
->array
= v
->array
;
379 gpr_array
* shader::get_gpr_array(unsigned reg
, unsigned chan
) {
381 for (regarray_vec::iterator I
= gpr_arrays
.begin(),
382 E
= gpr_arrays
.end(); I
!= E
; ++I
) {
384 unsigned achan
= a
->base_gpr
.chan();
385 unsigned areg
= a
->base_gpr
.sel();
386 if (achan
== chan
&& (reg
>= areg
&& reg
< areg
+a
->array_size
))
392 void shader::add_gpr_array(unsigned gpr_start
, unsigned gpr_count
,
393 unsigned comp_mask
) {
397 gpr_array
*a
= new gpr_array(
398 sel_chan(gpr_start
, chan
), gpr_count
);
400 SB_DUMP_PASS( sblog
<< "add_gpr_array: @" << a
->base_gpr
401 << " [" << a
->array_size
<< "]\n";
404 gpr_arrays
.push_back(a
);
411 value
* shader::get_pred_sel(int sel
) {
412 assert(sel
== 0 || sel
== 1);
414 pred_sels
[sel
] = get_const_value(sel
);
416 return pred_sels
[sel
];
419 cf_node
* shader::create_cf(unsigned op
) {
420 cf_node
*c
= create_cf();
426 std::string
shader::get_full_target_name() {
427 std::string s
= get_shader_target_name();
429 s
+= ctx
.get_hw_chip_name();
431 s
+= ctx
.get_hw_class_name();
435 const char* shader::get_shader_target_name() {
437 case TARGET_VS
: return "VS";
438 case TARGET_PS
: return "PS";
439 case TARGET_GS
: return "GS";
440 case TARGET_COMPUTE
: return "COMPUTE";
441 case TARGET_FETCH
: return "FETCH";
443 return "INVALID_TARGET";
447 void shader::simplify_dep_rep(node
* dr
) {
448 container_node
*p
= dr
->parent
;
449 if (p
->is_repeat()) {
450 repeat_node
*r
= static_cast<repeat_node
*>(p
);
451 r
->target
->expand_repeat(r
);
452 } else if (p
->is_depart()) {
453 depart_node
*d
= static_cast<depart_node
*>(p
);
454 d
->target
->expand_depart(d
);
457 dr
->parent
->cut(dr
->next
, NULL
);
461 // FIXME this is used in some places as the max non-temp gpr,
462 // (MAX_GPR - 2 * ctx.alu_temp_gprs) should be used for that instead.
463 unsigned shader::first_temp_gpr() {
464 return MAX_GPR
- ctx
.alu_temp_gprs
;
467 unsigned shader::num_nontemp_gpr() {
468 return MAX_GPR
- 2 * ctx
.alu_temp_gprs
;
471 void shader::set_uses_kill() {
472 if (root
->src
.empty())
476 root
->src
[0] = get_special_value(SV_VALID_MASK
);
479 alu_node
* shader::clone(alu_node
* n
) {
480 alu_node
*c
= create_alu();
482 // FIXME: this may be wrong with indirect operands
492 void shader::collect_stats(bool opt
) {
493 if (!sb_context::dump_stat
)
496 shader_stats
&s
= opt
? opt_stats
: src_stats
;
504 ctx
.opt_stats
.accumulate(s
);
506 ctx
.src_stats
.accumulate(s
);
509 value
* shader::get_ro_value(value_map
& vm
, value_kind vk
, unsigned key
) {
510 value_map::iterator I
= vm
.find(key
);
513 value
*v
= create_value(vk
, key
, 0);
514 v
->flags
= VLF_READONLY
;
515 vm
.insert(std::make_pair(key
, v
));
519 void shader::create_bbs(container_node
* n
, bbs_vec
&bbs
, int loop_level
) {
521 bool inside_bb
= false;
522 bool last_inside_bb
= true;
523 node_iterator
bb_start(n
->begin()), I(bb_start
), E(n
->end());
525 for (; I
!= E
; ++I
) {
527 inside_bb
= k
->type
== NT_OP
;
529 if (inside_bb
&& !last_inside_bb
)
531 else if (!inside_bb
) {
533 && I
->type
!= NT_REPEAT
534 && I
->type
!= NT_DEPART
535 && I
->type
!= NT_IF
) {
536 bb_node
*bb
= create_bb(bbs
.size(), loop_level
);
538 n
->insert_node_before(*bb_start
, bb
);
540 bb
->move(bb_start
, I
);
543 if (k
->is_container()) {
546 if (k
->type
== NT_REGION
) {
547 loop
= static_cast<region_node
*>(k
)->is_loop();
550 create_bbs(static_cast<container_node
*>(k
), bbs
,
555 if (k
->type
== NT_DEPART
)
558 last_inside_bb
= inside_bb
;
561 if (last_inside_bb
) {
562 bb_node
*bb
= create_bb(bbs
.size(), loop_level
);
567 n
->insert_node_before(*bb_start
, bb
);
568 if (bb_start
!= n
->end())
569 bb
->move(bb_start
, n
->end());
572 if (n
->last
&& n
->last
->type
== NT_IF
) {
573 bb_node
*bb
= create_bb(bbs
.size(), loop_level
);
580 void shader::expand_bbs(bbs_vec
&bbs
) {
582 for (bbs_vec::iterator I
= bbs
.begin(), E
= bbs
.end(); I
!= E
; ++I
) {
588 sched_queue_id
shader::get_queue_id(node
* n
) {
589 switch (n
->subtype
) {
591 case NST_ALU_PACKED_INST
:
595 case NST_FETCH_INST
: {
596 fetch_node
*f
= static_cast<fetch_node
*>(n
);
597 if (ctx
.is_r600() && (f
->bc
.op_ptr
->flags
& FF_VTX
))
609 void shader_stats::collect(node
*n
) {
610 if (n
->is_alu_inst())
612 else if (n
->is_fetch_inst())
614 else if (n
->is_container()) {
615 container_node
*c
= static_cast<container_node
*>(n
);
617 if (n
->is_alu_group())
619 else if (n
->is_alu_clause())
621 else if (n
->is_fetch_clause())
623 else if (n
->is_cf_inst())
627 for (node_iterator I
= c
->begin(), E
= c
->end(); I
!= E
; ++I
) {
634 void shader_stats::accumulate(shader_stats
& s
) {
641 alu_groups
+= s
.alu_groups
;
642 alu_clauses
+= s
.alu_clauses
;
644 fetch_clauses
+= s
.fetch_clauses
;
648 void shader_stats::dump() {
649 sblog
<< "dw:" << ndw
<< ", gpr:" << ngpr
<< ", stk:" << nstack
650 << ", alu groups:" << alu_groups
<< ", alu clauses: " << alu_clauses
651 << ", alu:" << alu
<< ", fetch:" << fetch
652 << ", fetch clauses:" << fetch_clauses
656 sblog
<< ", shaders:" << shaders
;
661 static void print_diff(unsigned d1
, unsigned d2
) {
663 sblog
<< ((int)d2
- (int)d1
) * 100 / (int)d1
<< "%";
670 void shader_stats::dump_diff(shader_stats
& s
) {
671 sblog
<< "dw:"; print_diff(ndw
, s
.ndw
);
672 sblog
<< ", gpr:" ; print_diff(ngpr
, s
.ngpr
);
673 sblog
<< ", stk:" ; print_diff(nstack
, s
.nstack
);
674 sblog
<< ", alu groups:" ; print_diff(alu_groups
, s
.alu_groups
);
675 sblog
<< ", alu clauses: " ; print_diff(alu_clauses
, s
.alu_clauses
);
676 sblog
<< ", alu:" ; print_diff(alu
, s
.alu
);
677 sblog
<< ", fetch:" ; print_diff(fetch
, s
.fetch
);
678 sblog
<< ", fetch clauses:" ; print_diff(fetch_clauses
, s
.fetch_clauses
);
679 sblog
<< ", cf:" ; print_diff(cf
, s
.cf
);
683 } // namespace r600_sb