2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
47 class vpass
: public pass
{
51 vpass(shader
&s
) : pass(s
) {}
57 virtual void run_on(container_node
&n
);
59 virtual bool visit(node
&n
, bool enter
);
60 virtual bool visit(container_node
&n
, bool enter
);
61 virtual bool visit(alu_group_node
&n
, bool enter
);
62 virtual bool visit(cf_node
&n
, bool enter
);
63 virtual bool visit(alu_node
&n
, bool enter
);
64 virtual bool visit(alu_packed_node
&n
, bool enter
);
65 virtual bool visit(fetch_node
&n
, bool enter
);
66 virtual bool visit(region_node
&n
, bool enter
);
67 virtual bool visit(repeat_node
&n
, bool enter
);
68 virtual bool visit(depart_node
&n
, bool enter
);
69 virtual bool visit(if_node
&n
, bool enter
);
70 virtual bool visit(bb_node
&n
, bool enter
);
74 class rev_vpass
: public vpass
{
77 rev_vpass(shader
&s
) : vpass(s
) {}
79 virtual void run_on(container_node
&n
);
83 // =================== PASSES
87 class bc_dump
: public vpass
{
95 unsigned new_group
, group_index
;
99 bc_dump(shader
&s
, bytecode
*bc
= NULL
);
101 bc_dump(shader
&s
, uint32_t *bc_ptr
, unsigned ndw
) :
102 vpass(s
), bc_data(bc_ptr
), ndw(ndw
), id(), new_group(), group_index() {}
107 virtual bool visit(cf_node
&n
, bool enter
);
108 virtual bool visit(alu_node
&n
, bool enter
);
109 virtual bool visit(fetch_node
&n
, bool enter
);
111 void dump_dw(unsigned dw_id
, unsigned count
= 2);
113 void dump(cf_node
& n
);
114 void dump(alu_node
& n
);
115 void dump(fetch_node
& n
);
119 class dce_cleanup
: public vpass
{
126 dce_cleanup(shader
&s
) : vpass(s
),
127 remove_unused(s
.dce_flags
& DF_REMOVE_UNUSED
), nodes_changed(false) {}
131 virtual bool visit(node
&n
, bool enter
);
132 virtual bool visit(alu_group_node
&n
, bool enter
);
133 virtual bool visit(cf_node
&n
, bool enter
);
134 virtual bool visit(alu_node
&n
, bool enter
);
135 virtual bool visit(alu_packed_node
&n
, bool enter
);
136 virtual bool visit(fetch_node
&n
, bool enter
);
137 virtual bool visit(region_node
&n
, bool enter
);
138 virtual bool visit(container_node
&n
, bool enter
);
142 void cleanup_dst(node
&n
);
143 bool cleanup_dst_vec(vvec
&vv
);
145 // Did we alter/remove nodes during a single pass?
150 class def_use
: public pass
{
154 def_use(shader
&sh
) : pass(sh
) {}
157 void run_on(node
*n
, bool defs
);
161 void process_uses(node
*n
);
162 void process_defs(node
*n
, vvec
&vv
, bool arr_def
);
163 void process_phi(container_node
*c
, bool defs
, bool uses
);
168 class dump
: public vpass
{
175 dump(shader
&s
) : vpass(s
), level(0) {}
177 virtual bool visit(node
&n
, bool enter
);
178 virtual bool visit(container_node
&n
, bool enter
);
179 virtual bool visit(alu_group_node
&n
, bool enter
);
180 virtual bool visit(cf_node
&n
, bool enter
);
181 virtual bool visit(alu_node
&n
, bool enter
);
182 virtual bool visit(alu_packed_node
&n
, bool enter
);
183 virtual bool visit(fetch_node
&n
, bool enter
);
184 virtual bool visit(region_node
&n
, bool enter
);
185 virtual bool visit(repeat_node
&n
, bool enter
);
186 virtual bool visit(depart_node
&n
, bool enter
);
187 virtual bool visit(if_node
&n
, bool enter
);
188 virtual bool visit(bb_node
&n
, bool enter
);
191 static void dump_op(node
&n
, const char *name
);
192 static void dump_vec(const vvec
& vv
);
193 static void dump_set(shader
&sh
, val_set
& v
);
195 static void dump_rels(vvec
& vv
);
197 static void dump_val(value
*v
);
198 static void dump_op(node
*n
);
200 static void dump_op_list(container_node
*c
);
201 static void dump_queue(sched_queue
&q
);
203 static void dump_alu(alu_node
*n
);
209 void dump_common(node
&n
);
210 void dump_flags(node
&n
);
212 void dump_live_values(container_node
&n
, bool before
);
216 // Global Code Motion
218 class gcm
: public pass
{
220 sched_queue bu_ready
[SQ_NUM
];
221 sched_queue bu_ready_next
[SQ_NUM
];
222 sched_queue bu_ready_early
[SQ_NUM
];
224 sched_queue ready_above
;
226 container_node pending
;
231 op_info() : top_bb(), bottom_bb() {}
234 typedef std::map
<node
*, op_info
> op_info_map
;
236 typedef std::map
<node
*, unsigned> nuc_map
;
241 typedef std::vector
<nuc_map
> nuc_stack
;
250 node_list pending_nodes
;
254 // for register pressure tracking in bottom-up pass
258 static const int rp_threshold
= 100;
260 bool pending_exec_mask_update
;
264 gcm(shader
&sh
) : pass(sh
),
265 bu_ready(), bu_ready_next(), bu_ready_early(),
266 ready(), op_map(), uses(), nuc_stk(1), ucs_level(),
267 bu_bb(), pending_defs(), pending_nodes(), cur_sq(),
268 live(), live_count(), pending_exec_mask_update() {}
274 void collect_instructions(container_node
*c
, bool early_pass
);
276 void sched_early(container_node
*n
);
277 void td_sched_bb(bb_node
*bb
);
278 bool td_is_ready(node
*n
);
279 void td_release_uses(vvec
&v
);
280 void td_release_val(value
*v
);
281 void td_schedule(bb_node
*bb
, node
*n
);
283 void sched_late(container_node
*n
);
284 void bu_sched_bb(bb_node
*bb
);
285 void bu_release_defs(vvec
&v
, bool src
);
286 void bu_release_phi_defs(container_node
*p
, unsigned op
);
287 bool bu_is_ready(node
*n
);
288 void bu_release_val(value
*v
);
289 void bu_release_op(node
* n
);
290 void bu_find_best_bb(node
*n
, op_info
&oi
);
291 void bu_schedule(container_node
*bb
, node
*n
);
293 void push_uc_stack();
296 void init_def_count(nuc_map
&m
, container_node
&s
);
297 void init_use_count(nuc_map
&m
, container_node
&s
);
298 unsigned get_uc_vec(vvec
&vv
);
299 unsigned get_dc_vec(vvec
&vv
, bool src
);
301 void add_ready(node
*n
);
303 void dump_uc_stack();
305 unsigned real_alu_count(sched_queue
&q
, unsigned max
);
307 // check if we have not less than threshold ready alu instructions
308 bool check_alu_ready_count(unsigned threshold
);
312 class gvn
: public vpass
{
317 gvn(shader
&sh
) : vpass(sh
) {}
319 virtual bool visit(node
&n
, bool enter
);
320 virtual bool visit(cf_node
&n
, bool enter
);
321 virtual bool visit(alu_node
&n
, bool enter
);
322 virtual bool visit(alu_packed_node
&n
, bool enter
);
323 virtual bool visit(fetch_node
&n
, bool enter
);
324 virtual bool visit(region_node
&n
, bool enter
);
328 void process_op(node
&n
, bool rewrite
= true);
330 // returns true if the value was rewritten
331 bool process_src(value
* &v
, bool rewrite
);
334 void process_alu_src_constants(node
&n
, value
* &v
);
338 class if_conversion
: public pass
{
342 if_conversion(shader
&sh
) : pass(sh
) {}
346 bool run_on(region_node
*r
);
348 void convert_kill_instructions(region_node
*r
, value
*em
, bool branch
,
351 bool check_and_convert(region_node
*r
);
353 alu_node
* convert_phi(value
*select
, node
*phi
);
358 class liveness
: public rev_vpass
{
366 liveness(shader
&s
) : rev_vpass(s
), live_changed(false) {}
370 virtual bool visit(node
&n
, bool enter
);
371 virtual bool visit(bb_node
&n
, bool enter
);
372 virtual bool visit(container_node
&n
, bool enter
);
373 virtual bool visit(alu_group_node
&n
, bool enter
);
374 virtual bool visit(cf_node
&n
, bool enter
);
375 virtual bool visit(alu_node
&n
, bool enter
);
376 virtual bool visit(alu_packed_node
&n
, bool enter
);
377 virtual bool visit(fetch_node
&n
, bool enter
);
378 virtual bool visit(region_node
&n
, bool enter
);
379 virtual bool visit(repeat_node
&n
, bool enter
);
380 virtual bool visit(depart_node
&n
, bool enter
);
381 virtual bool visit(if_node
&n
, bool enter
);
385 void update_interferences();
386 void process_op(node
&n
);
388 bool remove_val(value
*v
);
389 bool remove_vec(vvec
&v
);
390 bool process_outs(node
& n
);
391 void process_ins(node
& n
);
393 void process_phi_outs(container_node
*phi
);
394 void process_phi_branch(container_node
*phi
, unsigned id
);
396 bool process_maydef(value
*v
);
398 bool add_vec(vvec
&vv
, bool src
);
400 void update_src_vec(vvec
&vv
, bool src
);
404 struct bool_op_info
{
411 class peephole
: public pass
{
415 peephole(shader
&sh
) : pass(sh
) {}
419 void run_on(container_node
*c
);
421 void optimize_cc_op(alu_node
*a
);
423 void optimize_cc_op2(alu_node
*a
);
424 void optimize_CNDcc_op(alu_node
*a
);
426 bool get_bool_op_info(value
*b
, bool_op_info
& bop
);
427 bool get_bool_flt_to_int_source(alu_node
* &a
);
428 void convert_float_setcc(alu_node
*f2i
, alu_node
*s
);
432 class psi_ops
: public rev_vpass
{
433 using rev_vpass::visit
;
437 psi_ops(shader
&s
) : rev_vpass(s
) {}
439 virtual bool visit(node
&n
, bool enter
);
440 virtual bool visit(alu_node
&n
, bool enter
);
442 bool try_inline(node
&n
);
443 bool try_reduce(node
&n
);
444 bool eliminate(node
&n
);
446 void unpredicate(node
*n
);
450 // check correctness of the generated code, e.g.:
451 // - expected source operand value is the last value written to its gpr,
452 // - all arguments of phi node should be allocated to the same gpr,
454 class ra_checker
: public pass
{
456 typedef std::map
<sel_chan
, value
*> reg_value_map
;
458 typedef std::vector
<reg_value_map
> regmap_stack
;
460 regmap_stack rm_stack
;
461 unsigned rm_stk_level
;
467 ra_checker(shader
&sh
) : pass(sh
), rm_stk_level(0), prev_dst() {}
471 void run_on(container_node
*c
);
473 void dump_error(const error_info
&e
);
474 void dump_all_errors();
478 reg_value_map
& rmap() { return rm_stack
[rm_stk_level
]; }
483 // when going out of the alu clause, values in the clause temporary gprs,
484 // AR, predicate values, PS/PV are destroyed
485 void kill_alu_only_regs();
486 void error(node
*n
, unsigned id
, std::string msg
);
488 void check_phi_src(container_node
*p
, unsigned id
);
489 void process_phi_dst(container_node
*p
);
490 void check_alu_group(alu_group_node
*g
);
491 void process_op_dst(node
*n
);
492 void check_op_src(node
*n
);
493 void check_src_vec(node
*n
, unsigned id
, vvec
&vv
, bool src
);
494 void check_value_gpr(node
*n
, unsigned id
, value
*v
);
497 // =======================================
500 class ra_coalesce
: public pass
{
504 ra_coalesce(shader
&sh
) : pass(sh
) {}
511 // =======================================
513 class ra_init
: public pass
{
517 ra_init(shader
&sh
) : pass(sh
), prev_chans() {
519 // The parameter below affects register channels distribution.
520 // For cayman (VLIW-4) we're trying to distribute the channels
521 // uniformly, this means significantly better alu slots utilization
522 // at the expense of higher gpr usage. Hopefully this will improve
523 // performance, though it has to be proven with real benchmarks yet.
524 // For VLIW-5 this method could also slightly improve slots
525 // utilization, but increased register pressure seems more significant
526 // and overall performance effect is negative according to some
527 // benchmarks, so it's not used currently. Basically, VLIW-5 doesn't
528 // really need it because trans slot (unrestricted by register write
529 // channel) allows to consume most deviations from uniform channel
531 // Value 3 means that for new allocation we'll use channel that differs
532 // from 3 last used channels. 0 for VLIW-5 effectively turns this off.
534 ra_tune
= sh
.get_ctx().is_cayman() ? 3 : 0;
544 void add_prev_chan(unsigned chan
);
545 unsigned get_preferable_chan_mask();
547 void ra_node(container_node
*c
);
548 void process_op(node
*n
);
550 void color(value
*v
);
552 void color_bs_constraint(ra_constraint
*c
);
554 void assign_color(value
*v
, sel_chan c
);
558 // =======================================
560 class ra_split
: public pass
{
564 ra_split(shader
&sh
) : pass(sh
) {}
568 void split(container_node
*n
);
569 void split_op(node
*n
);
570 void split_alu_packed(alu_packed_node
*n
);
571 void split_vector_inst(node
*n
);
573 void split_packed_ins(alu_packed_node
*n
);
576 void split_pinned_outs(node
*n
);
579 void split_vec(vvec
&vv
, vvec
&v1
, vvec
&v2
, bool allow_swz
);
581 void split_phi_src(container_node
*loc
, container_node
*c
, unsigned id
,
583 void split_phi_dst(node
*loc
, container_node
*c
, bool loop
);
584 void init_phi_constraints(container_node
*c
);
589 class ssa_prepare
: public vpass
{
592 typedef std::vector
<val_set
> vd_stk
;
598 ssa_prepare(shader
&s
) : vpass(s
), level(0) {}
600 virtual bool visit(cf_node
&n
, bool enter
);
601 virtual bool visit(alu_node
&n
, bool enter
);
602 virtual bool visit(fetch_node
&n
, bool enter
);
603 virtual bool visit(region_node
&n
, bool enter
);
604 virtual bool visit(repeat_node
&n
, bool enter
);
605 virtual bool visit(depart_node
&n
, bool enter
);
611 if (level
+ 1 > stk
.size())
619 stk
[level
].add_set(stk
[level
+ 1]);
622 void add_defs(node
&n
);
624 val_set
& cur_set() { return stk
[level
]; }
626 container_node
* create_phi_nodes(int count
);
629 class ssa_rename
: public vpass
{
632 typedef sb_map
<value
*, unsigned> def_map
;
635 std::stack
<def_map
> rename_stack
;
637 typedef std::map
<uint32_t, value
*> val_map
;
642 ssa_rename(shader
&s
) : vpass(s
) {}
646 virtual bool visit(container_node
&n
, bool enter
);
647 virtual bool visit(node
&n
, bool enter
);
648 virtual bool visit(alu_group_node
&n
, bool enter
);
649 virtual bool visit(cf_node
&n
, bool enter
);
650 virtual bool visit(alu_node
&n
, bool enter
);
651 virtual bool visit(alu_packed_node
&n
, bool enter
);
652 virtual bool visit(fetch_node
&n
, bool enter
);
653 virtual bool visit(region_node
&n
, bool enter
);
654 virtual bool visit(repeat_node
&n
, bool enter
);
655 virtual bool visit(depart_node
&n
, bool enter
);
656 virtual bool visit(if_node
&n
, bool enter
);
660 void push(node
*phi
);
663 unsigned get_index(def_map
& m
, value
* v
);
664 void set_index(def_map
& m
, value
* v
, unsigned index
);
665 unsigned new_index(def_map
& m
, value
* v
);
667 value
* rename_use(node
*n
, value
* v
);
668 value
* rename_def(node
*def
, value
* v
);
670 void rename_src_vec(node
*n
, vvec
&vv
, bool src
);
671 void rename_dst_vec(node
*def
, vvec
&vv
, bool set_def
);
673 void rename_src(node
*n
);
674 void rename_dst(node
*n
);
676 void rename_phi_args(container_node
*phi
, unsigned op
, bool def
);
678 void rename_virt(node
*n
);
679 void rename_virt_val(node
*n
, value
*v
);
682 class bc_finalizer
: public pass
{
684 cf_node
*last_export
[EXP_TYPE_COUNT
];
692 bc_finalizer(shader
&sh
) : pass(sh
), last_export(), last_cf(), ngpr(),
697 void finalize_loop(region_node
*r
);
698 void finalize_if(region_node
*r
);
700 void run_on(container_node
*c
);
702 void insert_rv6xx_load_ar_workaround(alu_group_node
*b4
);
703 void finalize_alu_group(alu_group_node
*g
, node
*prev_node
);
704 bool finalize_alu_src(alu_group_node
*g
, alu_node
*a
, alu_group_node
*prev_node
);
706 void emit_set_grad(fetch_node
* f
);
707 void finalize_fetch(fetch_node
*f
);
709 void finalize_cf(cf_node
*c
);
711 sel_chan
translate_kcache(cf_node
*alu
, value
*v
);
713 void update_ngpr(unsigned gpr
);
714 void update_nstack(region_node
*r
, unsigned add
= 0);
716 unsigned get_stack_depth(node
*n
, unsigned &loops
, unsigned &ifs
,
722 void copy_fetch_src(fetch_node
&dst
, fetch_node
&src
, unsigned arg_start
);
723 void emit_set_texture_offsets(fetch_node
&f
);
727 } // namespace r600_sb
729 #endif /* SB_PASS_H_ */