5944ba66f48b072580b8267bf1e6622291cd5848
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
28 #include "sb_shader.h"
36 shader::shader(sb_context
&sctx
, shader_target t
, unsigned id
, bool dump
)
37 : ctx(sctx
), next_temp_value_index(temp_regid_offset
),
38 prep_regs_count(), pred_sels(),
39 regions(), inputs(), undef(), val_pool(sizeof(value
)),
40 pool(), all_nodes(), src_stats(), opt_stats(), errors(), enable_dump(dump
),
43 target(t
), vt(ex
), ex(*this), root(),
44 compute_interferences(),
45 has_alu_predication(), uses_gradients(), ngpr(), nstack() {}
47 bool shader::assign_slot(alu_node
* n
, alu_node
*slots
[5]) {
49 unsigned slot_flags
= ctx
.alu_slots(n
->bc
.op
);
50 unsigned slot
= n
->bc
.dst_chan
;
52 if (!ctx
.is_cayman() && (!(slot_flags
& AF_V
) || slots
[slot
]) &&
64 void shader::add_pinned_gpr_values(vvec
& vec
, unsigned gpr
, unsigned comp_mask
,
69 value
*v
= get_gpr_value(src
, gpr
, chan
, false);
70 v
->flags
|= (VLF_PIN_REG
| VLF_PIN_CHAN
);
72 v
->gpr
= v
->pin_gpr
= v
->select
;
75 if (v
->array
&& !v
->array
->gpr
) {
76 // if pinned value can be accessed with indirect addressing
77 // pin the entire array to its original location
78 v
->array
->gpr
= v
->array
->base_gpr
;
87 cf_node
* shader::create_clause(node_subtype nst
) {
88 cf_node
*n
= create_cf();
93 case NST_ALU_CLAUSE
: n
->bc
.set_op(CF_OP_ALU
); break;
94 case NST_TEX_CLAUSE
: n
->bc
.set_op(CF_OP_TEX
); break;
95 case NST_VTX_CLAUSE
: n
->bc
.set_op(CF_OP_VTX
); break;
96 default: assert(!"invalid clause type"); break;
103 void shader::create_bbs() {
104 create_bbs(root
, bbs
);
107 void shader::expand_bbs() {
111 alu_node
* shader::create_mov(value
* dst
, value
* src
) {
112 alu_node
*n
= create_alu();
113 n
->bc
.set_op(ALU_OP1_MOV
);
114 n
->dst
.push_back(dst
);
115 n
->src
.push_back(src
);
121 alu_node
* shader::create_copy_mov(value
* dst
, value
* src
, unsigned affcost
) {
122 alu_node
*n
= create_mov(dst
, src
);
124 dst
->assign_source(src
);
125 n
->flags
|= NF_COPY_MOV
| NF_DONT_HOIST
;
127 if (affcost
&& dst
->is_sgpr() && src
->is_sgpr())
128 coal
.add_edge(src
, dst
, affcost
);
133 value
* shader::get_value(value_kind kind
, sel_chan id
,
135 if (version
== 0 && kind
== VLK_REG
&& id
.sel() < prep_regs_count
)
136 return val_pool
[id
- 1];
139 unsigned key
= (kind
<< 28) | (version
<< 16) | id
;
140 value_map::iterator i
= reg_values
.find(key
);
141 if (i
!= reg_values
.end()) {
144 value
*v
= create_value(kind
, id
, version
);
145 reg_values
.insert(std::make_pair(key
, v
));
149 value
* shader::get_special_value(unsigned sv_id
, unsigned version
) {
150 sel_chan
id(sv_id
, 0);
151 return get_value(VLK_SPECIAL_REG
, id
, version
);
154 void shader::fill_array_values(gpr_array
*a
, vvec
&vv
) {
155 unsigned sz
= a
->array_size
;
157 for (unsigned i
= 0; i
< a
->array_size
; ++i
) {
158 vv
[i
] = get_gpr_value(true, a
->base_gpr
.sel() + i
, a
->base_gpr
.chan(),
163 value
* shader::get_gpr_value(bool src
, unsigned reg
, unsigned chan
, bool rel
,
165 sel_chan
id(reg
, chan
);
167 gpr_array
*a
= get_gpr_array(reg
, chan
);
170 v
= create_value(VLK_REL_REG
, id
, 0);
171 v
->rel
= get_special_value(SV_AR_INDEX
);
172 fill_array_values(a
, v
->muse
);
174 fill_array_values(a
, v
->mdef
);
176 if (version
== 0 && reg
< prep_regs_count
)
177 return (val_pool
[id
- 1]);
179 v
= get_value(VLK_REG
, id
, version
);
183 v
->pin_gpr
= v
->select
;
188 value
* shader::create_temp_value() {
189 sel_chan
id(++next_temp_value_index
, 0);
190 return get_value(VLK_TEMP
, id
, 0);
193 value
* shader::get_kcache_value(unsigned bank
, unsigned index
, unsigned chan
) {
194 return get_ro_value(kcache_values
, VLK_KCACHE
,
195 sel_chan((bank
<< 12) | index
, chan
));
198 void shader::add_input(unsigned gpr
, bool preloaded
, unsigned comp_mask
) {
199 if (inputs
.size() <= gpr
)
200 inputs
.resize(gpr
+1);
202 shader_input
&i
= inputs
[gpr
];
203 i
.preloaded
= preloaded
;
204 i
.comp_mask
= comp_mask
;
207 add_pinned_gpr_values(root
->dst
, gpr
, comp_mask
, true);
212 void shader::init() {
214 root
= create_container();
217 void shader::init_call_fs(cf_node
* cf
) {
220 assert(target
== TARGET_VS
);
222 for(inputs_vec::const_iterator I
= inputs
.begin(),
223 E
= inputs
.end(); I
!= E
; ++I
, ++gpr
) {
225 add_pinned_gpr_values(cf
->dst
, gpr
, I
->comp_mask
, false);
227 add_pinned_gpr_values(cf
->src
, gpr
, I
->comp_mask
, true);
231 void shader::set_undef(val_set
& s
) {
232 value
*undefined
= get_undef_value();
233 if (!undefined
->gvn_source
)
234 vt
.add_value(undefined
);
238 for (val_set::iterator I
= vs
.begin(*this), E
= vs
.end(*this); I
!= E
; ++I
) {
241 assert(!v
->is_readonly() && !v
->is_rel());
243 v
->gvn_source
= undefined
->gvn_source
;
247 value
* shader::create_value(value_kind k
, sel_chan regid
, unsigned ver
) {
248 value
*v
= val_pool
.create(k
, regid
, ver
);
252 value
* shader::get_undef_value() {
254 undef
= create_value(VLK_UNDEF
, 0, 0);
258 node
* shader::create_node(node_type nt
, node_subtype nst
, node_flags flags
) {
259 node
*n
= new (pool
.allocate(sizeof(node
))) node(nt
, nst
, flags
);
260 all_nodes
.push_back(n
);
264 alu_node
* shader::create_alu() {
265 alu_node
* n
= new (pool
.allocate(sizeof(alu_node
))) alu_node();
266 memset(&n
->bc
, 0, sizeof(bc_alu
));
267 all_nodes
.push_back(n
);
271 alu_group_node
* shader::create_alu_group() {
273 new (pool
.allocate(sizeof(alu_group_node
))) alu_group_node();
274 all_nodes
.push_back(n
);
278 alu_packed_node
* shader::create_alu_packed() {
280 new (pool
.allocate(sizeof(alu_packed_node
))) alu_packed_node();
281 all_nodes
.push_back(n
);
285 cf_node
* shader::create_cf() {
286 cf_node
* n
= new (pool
.allocate(sizeof(cf_node
))) cf_node();
287 memset(&n
->bc
, 0, sizeof(bc_cf
));
289 all_nodes
.push_back(n
);
293 fetch_node
* shader::create_fetch() {
294 fetch_node
* n
= new (pool
.allocate(sizeof(fetch_node
))) fetch_node();
295 memset(&n
->bc
, 0, sizeof(bc_fetch
));
296 all_nodes
.push_back(n
);
300 region_node
* shader::create_region() {
301 region_node
*n
= new (pool
.allocate(sizeof(region_node
)))
302 region_node(regions
.size());
303 regions
.push_back(n
);
304 all_nodes
.push_back(n
);
308 depart_node
* shader::create_depart(region_node
* target
) {
309 depart_node
* n
= new (pool
.allocate(sizeof(depart_node
)))
310 depart_node(target
, target
->departs
.size());
311 target
->departs
.push_back(n
);
312 all_nodes
.push_back(n
);
316 repeat_node
* shader::create_repeat(region_node
* target
) {
317 repeat_node
* n
= new (pool
.allocate(sizeof(repeat_node
)))
318 repeat_node(target
, target
->repeats
.size() + 1);
319 target
->repeats
.push_back(n
);
320 all_nodes
.push_back(n
);
324 container_node
* shader::create_container(node_type nt
, node_subtype nst
,
326 container_node
*n
= new (pool
.allocate(sizeof(container_node
)))
327 container_node(nt
, nst
, flags
);
328 all_nodes
.push_back(n
);
332 if_node
* shader::create_if() {
333 if_node
* n
= new (pool
.allocate(sizeof(if_node
))) if_node();
334 all_nodes
.push_back(n
);
338 bb_node
* shader::create_bb(unsigned id
, unsigned loop_level
) {
339 bb_node
* n
= new (pool
.allocate(sizeof(bb_node
))) bb_node(id
, loop_level
);
340 all_nodes
.push_back(n
);
344 value
* shader::get_special_ro_value(unsigned sel
) {
345 return get_ro_value(special_ro_values
, VLK_PARAM
, sel
);
348 value
* shader::get_const_value(const literal
&v
) {
349 value
*val
= get_ro_value(const_values
, VLK_CONST
, v
);
350 val
->literal_value
= v
;
355 for (node_vec::iterator I
= all_nodes
.begin(), E
= all_nodes
.end();
359 for (gpr_array_vec::iterator I
= gpr_arrays
.begin(), E
= gpr_arrays
.end();
365 void shader::dump_ir() {
370 value
* shader::get_value_version(value
* v
, unsigned ver
) {
371 assert(!v
->is_readonly() && !v
->is_rel());
372 value
*vv
= get_value(v
->kind
, v
->select
, ver
);
376 vv
->array
= v
->array
;
382 gpr_array
* shader::get_gpr_array(unsigned reg
, unsigned chan
) {
384 for (regarray_vec::iterator I
= gpr_arrays
.begin(),
385 E
= gpr_arrays
.end(); I
!= E
; ++I
) {
387 unsigned achan
= a
->base_gpr
.chan();
388 unsigned areg
= a
->base_gpr
.sel();
389 if (achan
== chan
&& (reg
>= areg
&& reg
< areg
+a
->array_size
))
395 void shader::add_gpr_array(unsigned gpr_start
, unsigned gpr_count
,
396 unsigned comp_mask
) {
400 gpr_array
*a
= new gpr_array(
401 sel_chan(gpr_start
, chan
), gpr_count
);
403 SB_DUMP_PASS( cerr
<< "add_gpr_array: @" << a
->base_gpr
404 << " [" << a
->array_size
<< "]\n";
407 gpr_arrays
.push_back(a
);
414 value
* shader::get_pred_sel(int sel
) {
415 assert(sel
== 0 || sel
== 1);
417 pred_sels
[sel
] = get_const_value(sel
);
419 return pred_sels
[sel
];
422 cf_node
* shader::create_cf(unsigned op
) {
423 cf_node
*c
= create_cf();
429 std::string
shader::get_full_target_name() {
430 std::string s
= get_shader_target_name();
432 s
+= get_hw_chip_name();
434 s
+= get_hw_class_name();
438 const char* shader::get_hw_class_name() {
439 switch (ctx
.hw_class
) {
440 #define TRANSLATE_HW_CLASS(c) case HW_CLASS_##c: return #c
441 TRANSLATE_HW_CLASS(R600
);
442 TRANSLATE_HW_CLASS(R700
);
443 TRANSLATE_HW_CLASS(EVERGREEN
);
444 TRANSLATE_HW_CLASS(CAYMAN
);
445 #undef TRANSLATE_HW_CLASS
447 return "INVALID_CHIP_CLASS";
451 const char* shader::get_hw_chip_name() {
452 switch (ctx
.hw_chip
) {
453 #define TRANSLATE_CHIP(c) case HW_CHIP_##c: return #c
454 TRANSLATE_CHIP(R600
);
455 TRANSLATE_CHIP(RV610
);
456 TRANSLATE_CHIP(RV630
);
457 TRANSLATE_CHIP(RV670
);
458 TRANSLATE_CHIP(RV620
);
459 TRANSLATE_CHIP(RV635
);
460 TRANSLATE_CHIP(RS780
);
461 TRANSLATE_CHIP(RS880
);
462 TRANSLATE_CHIP(RV770
);
463 TRANSLATE_CHIP(RV730
);
464 TRANSLATE_CHIP(RV710
);
465 TRANSLATE_CHIP(RV740
);
466 TRANSLATE_CHIP(CEDAR
);
467 TRANSLATE_CHIP(REDWOOD
);
468 TRANSLATE_CHIP(JUNIPER
);
469 TRANSLATE_CHIP(CYPRESS
);
470 TRANSLATE_CHIP(HEMLOCK
);
471 TRANSLATE_CHIP(PALM
);
472 TRANSLATE_CHIP(SUMO
);
473 TRANSLATE_CHIP(SUMO2
);
474 TRANSLATE_CHIP(BARTS
);
475 TRANSLATE_CHIP(TURKS
);
476 TRANSLATE_CHIP(CAICOS
);
477 TRANSLATE_CHIP(CAYMAN
);
478 #undef TRANSLATE_CHIP
481 assert(!"unknown chip");
482 return "INVALID_CHIP";
486 const char* shader::get_shader_target_name() {
488 case TARGET_VS
: return "VS";
489 case TARGET_PS
: return "PS";
490 case TARGET_GS
: return "GS";
491 case TARGET_COMPUTE
: return "COMPUTE";
492 case TARGET_FETCH
: return "FETCH";
494 return "INVALID_TARGET";
498 void shader::simplify_dep_rep(node
* dr
) {
499 container_node
*p
= dr
->parent
;
500 if (p
->is_repeat()) {
501 repeat_node
*r
= static_cast<repeat_node
*>(p
);
502 r
->target
->expand_repeat(r
);
503 } else if (p
->is_depart()) {
504 depart_node
*d
= static_cast<depart_node
*>(p
);
505 d
->target
->expand_depart(d
);
508 dr
->parent
->cut(dr
->next
, NULL
);
512 // FIXME this is used in some places as the max non-temp gpr,
513 // (MAX_GPR - 2 * ctx.alu_temp_gprs) should be used for that instead.
514 unsigned shader::first_temp_gpr() {
515 return MAX_GPR
- ctx
.alu_temp_gprs
;
518 unsigned shader::num_nontemp_gpr() {
519 return MAX_GPR
- 2 * ctx
.alu_temp_gprs
;
522 void shader::set_uses_kill() {
523 if (root
->src
.empty())
527 root
->src
[0] = get_special_value(SV_VALID_MASK
);
530 alu_node
* shader::clone(alu_node
* n
) {
531 alu_node
*c
= create_alu();
533 // FIXME: this may be wrong with indirect operands
543 void shader::collect_stats(bool opt
) {
544 if (!sb_context::dump_stat
)
547 shader_stats
&s
= opt
? opt_stats
: src_stats
;
555 ctx
.opt_stats
.accumulate(s
);
557 ctx
.src_stats
.accumulate(s
);
560 value
* shader::get_ro_value(value_map
& vm
, value_kind vk
, unsigned key
) {
561 value_map::iterator I
= vm
.find(key
);
564 value
*v
= create_value(vk
, key
, 0);
565 v
->flags
= VLF_READONLY
;
566 vm
.insert(std::make_pair(key
, v
));
570 void shader::create_bbs(container_node
* n
, bbs_vec
&bbs
, int loop_level
) {
572 bool inside_bb
= false;
573 bool last_inside_bb
= true;
574 node_iterator
bb_start(n
->begin()), I(bb_start
), E(n
->end());
576 for (; I
!= E
; ++I
) {
578 inside_bb
= k
->type
== NT_OP
;
580 if (inside_bb
&& !last_inside_bb
)
582 else if (!inside_bb
) {
584 && I
->type
!= NT_REPEAT
585 && I
->type
!= NT_DEPART
586 && I
->type
!= NT_IF
) {
587 bb_node
*bb
= create_bb(bbs
.size(), loop_level
);
589 n
->insert_node_before(*bb_start
, bb
);
591 bb
->move(bb_start
, I
);
594 if (k
->is_container()) {
597 if (k
->type
== NT_REGION
) {
598 loop
= static_cast<region_node
*>(k
)->is_loop();
601 create_bbs(static_cast<container_node
*>(k
), bbs
,
606 if (k
->type
== NT_DEPART
)
609 last_inside_bb
= inside_bb
;
612 if (last_inside_bb
) {
613 bb_node
*bb
= create_bb(bbs
.size(), loop_level
);
618 n
->insert_node_before(*bb_start
, bb
);
619 if (bb_start
!= n
->end())
620 bb
->move(bb_start
, n
->end());
623 if (n
->last
&& n
->last
->type
== NT_IF
) {
624 bb_node
*bb
= create_bb(bbs
.size(), loop_level
);
631 void shader::expand_bbs(bbs_vec
&bbs
) {
633 for (bbs_vec::iterator I
= bbs
.begin(), E
= bbs
.end(); I
!= E
; ++I
) {
639 sched_queue_id
shader::get_queue_id(node
* n
) {
640 switch (n
->subtype
) {
642 case NST_ALU_PACKED_INST
:
646 case NST_FETCH_INST
: {
647 fetch_node
*f
= static_cast<fetch_node
*>(n
);
648 if (ctx
.is_r600() && (f
->bc
.op_ptr
->flags
& FF_VTX
))
660 void shader_stats::collect(node
*n
) {
661 if (n
->is_alu_inst())
663 else if (n
->is_fetch_inst())
665 else if (n
->is_container()) {
666 container_node
*c
= static_cast<container_node
*>(n
);
668 if (n
->is_alu_group())
670 else if (n
->is_alu_clause())
672 else if (n
->is_fetch_clause())
674 else if (n
->is_cf_inst())
678 for (node_iterator I
= c
->begin(), E
= c
->end(); I
!= E
; ++I
) {
685 void shader_stats::accumulate(shader_stats
& s
) {
692 alu_groups
+= s
.alu_groups
;
693 alu_clauses
+= s
.alu_clauses
;
695 fetch_clauses
+= s
.fetch_clauses
;
699 void shader_stats::dump(std::ostream
& o
) {
700 o
<< "dw:" << ndw
<< ", gpr:" << ngpr
<< ", stk:" << nstack
701 << ", alu groups:" << alu_groups
<< ", alu clauses: " << alu_clauses
702 << ", alu:" << alu
<< ", fetch:" << fetch
703 << ", fetch clauses:" << fetch_clauses
707 o
<< ", shaders:" << shaders
;
712 static void print_diff(std::ostream
&o
, unsigned d1
, unsigned d2
) {
714 o
<< ((int)d2
- (int)d1
) * 100 / (int)d1
<< "%";
721 void shader_stats::dump_diff(std::ostream
& o
, shader_stats
& s
) {
722 o
<< "dw:"; print_diff(o
, ndw
, s
.ndw
);
723 o
<< ", gpr:" ; print_diff(o
, ngpr
, s
.ngpr
);
724 o
<< ", stk:" ; print_diff(o
, nstack
, s
.nstack
);
725 o
<< ", alu groups:" ; print_diff(o
, alu_groups
, s
.alu_groups
);
726 o
<< ", alu clauses: " ; print_diff(o
, alu_clauses
, s
.alu_clauses
);
727 o
<< ", alu:" ; print_diff(o
, alu
, s
.alu
);
728 o
<< ", fetch:" ; print_diff(o
, fetch
, s
.fetch
);
729 o
<< ", fetch clauses:" ; print_diff(o
, fetch_clauses
, s
.fetch_clauses
);
730 o
<< ", cf:" ; print_diff(o
, cf
, s
.cf
);
734 } // namespace r600_sb