2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
47 class vpass
: public pass
{
51 vpass(shader
&s
) : pass(s
) {}
57 virtual void run_on(container_node
&n
);
59 virtual bool visit(node
&n
, bool enter
);
60 virtual bool visit(container_node
&n
, bool enter
);
61 virtual bool visit(alu_group_node
&n
, bool enter
);
62 virtual bool visit(cf_node
&n
, bool enter
);
63 virtual bool visit(alu_node
&n
, bool enter
);
64 virtual bool visit(alu_packed_node
&n
, bool enter
);
65 virtual bool visit(fetch_node
&n
, bool enter
);
66 virtual bool visit(region_node
&n
, bool enter
);
67 virtual bool visit(repeat_node
&n
, bool enter
);
68 virtual bool visit(depart_node
&n
, bool enter
);
69 virtual bool visit(if_node
&n
, bool enter
);
70 virtual bool visit(bb_node
&n
, bool enter
);
74 class rev_vpass
: public vpass
{
77 rev_vpass(shader
&s
) : vpass(s
) {}
79 virtual void run_on(container_node
&n
);
83 // =================== PASSES
87 class bc_dump
: public vpass
{
95 unsigned new_group
, group_index
;
99 bc_dump(shader
&s
, bytecode
*bc
= NULL
);
101 bc_dump(shader
&s
, uint32_t *bc_ptr
, unsigned ndw
) :
102 vpass(s
), bc_data(bc_ptr
), ndw(ndw
), id(), new_group(), group_index() {}
107 virtual bool visit(cf_node
&n
, bool enter
);
108 virtual bool visit(alu_node
&n
, bool enter
);
109 virtual bool visit(fetch_node
&n
, bool enter
);
111 void dump_dw(unsigned dw_id
, unsigned count
= 2);
113 void dump(cf_node
& n
);
114 void dump(alu_node
& n
);
115 void dump(fetch_node
& n
);
119 class dce_cleanup
: public vpass
{
126 dce_cleanup(shader
&s
) : vpass(s
),
127 remove_unused(s
.dce_flags
& DF_REMOVE_UNUSED
) {}
129 virtual bool visit(node
&n
, bool enter
);
130 virtual bool visit(alu_group_node
&n
, bool enter
);
131 virtual bool visit(cf_node
&n
, bool enter
);
132 virtual bool visit(alu_node
&n
, bool enter
);
133 virtual bool visit(alu_packed_node
&n
, bool enter
);
134 virtual bool visit(fetch_node
&n
, bool enter
);
135 virtual bool visit(region_node
&n
, bool enter
);
136 virtual bool visit(container_node
&n
, bool enter
);
140 void cleanup_dst(node
&n
);
141 bool cleanup_dst_vec(vvec
&vv
);
146 class def_use
: public pass
{
150 def_use(shader
&sh
) : pass(sh
) {}
153 void run_on(node
*n
, bool defs
);
157 void process_uses(node
*n
);
158 void process_defs(node
*n
, vvec
&vv
, bool arr_def
);
159 void process_phi(container_node
*c
, bool defs
, bool uses
);
164 class dump
: public vpass
{
171 dump(shader
&s
) : vpass(s
), level(0) {}
173 virtual bool visit(node
&n
, bool enter
);
174 virtual bool visit(container_node
&n
, bool enter
);
175 virtual bool visit(alu_group_node
&n
, bool enter
);
176 virtual bool visit(cf_node
&n
, bool enter
);
177 virtual bool visit(alu_node
&n
, bool enter
);
178 virtual bool visit(alu_packed_node
&n
, bool enter
);
179 virtual bool visit(fetch_node
&n
, bool enter
);
180 virtual bool visit(region_node
&n
, bool enter
);
181 virtual bool visit(repeat_node
&n
, bool enter
);
182 virtual bool visit(depart_node
&n
, bool enter
);
183 virtual bool visit(if_node
&n
, bool enter
);
184 virtual bool visit(bb_node
&n
, bool enter
);
187 static void dump_op(node
&n
, const char *name
);
188 static void dump_vec(const vvec
& vv
);
189 static void dump_set(shader
&sh
, val_set
& v
);
191 static void dump_rels(vvec
& vv
);
193 static void dump_val(value
*v
);
194 static void dump_op(node
*n
);
196 static void dump_op_list(container_node
*c
);
197 static void dump_queue(sched_queue
&q
);
199 static void dump_alu(alu_node
*n
);
205 void dump_common(node
&n
);
206 void dump_flags(node
&n
);
208 void dump_live_values(container_node
&n
, bool before
);
212 // Global Code Motion
214 class gcm
: public pass
{
216 sched_queue bu_ready
[SQ_NUM
];
217 sched_queue bu_ready_next
[SQ_NUM
];
218 sched_queue bu_ready_early
[SQ_NUM
];
220 sched_queue ready_above
;
222 container_node pending
;
227 op_info() : top_bb(), bottom_bb() {}
230 typedef std::map
<node
*, op_info
> op_info_map
;
232 typedef std::map
<node
*, unsigned> nuc_map
;
237 typedef std::vector
<nuc_map
> nuc_stack
;
246 node_list pending_nodes
;
250 // for register pressure tracking in bottom-up pass
254 static const int rp_threshold
= 100;
256 bool pending_exec_mask_update
;
260 gcm(shader
&sh
) : pass(sh
),
261 bu_ready(), bu_ready_next(), bu_ready_early(),
262 ready(), op_map(), uses(), nuc_stk(1), ucs_level(),
263 bu_bb(), pending_defs(), pending_nodes(), cur_sq(),
264 live(), live_count(), pending_exec_mask_update() {}
270 void collect_instructions(container_node
*c
, bool early_pass
);
272 void sched_early(container_node
*n
);
273 void td_sched_bb(bb_node
*bb
);
274 bool td_is_ready(node
*n
);
275 void td_release_uses(vvec
&v
);
276 void td_release_val(value
*v
);
277 void td_schedule(bb_node
*bb
, node
*n
);
279 void sched_late(container_node
*n
);
280 void bu_sched_bb(bb_node
*bb
);
281 void bu_release_defs(vvec
&v
, bool src
);
282 void bu_release_phi_defs(container_node
*p
, unsigned op
);
283 bool bu_is_ready(node
*n
);
284 void bu_release_val(value
*v
);
285 void bu_release_op(node
* n
);
286 void bu_find_best_bb(node
*n
, op_info
&oi
);
287 void bu_schedule(container_node
*bb
, node
*n
);
289 void push_uc_stack();
292 void init_def_count(nuc_map
&m
, container_node
&s
);
293 void init_use_count(nuc_map
&m
, container_node
&s
);
294 unsigned get_uc_vec(vvec
&vv
);
295 unsigned get_dc_vec(vvec
&vv
, bool src
);
297 void add_ready(node
*n
);
299 void dump_uc_stack();
301 unsigned real_alu_count(sched_queue
&q
, unsigned max
);
303 // check if we have not less than threshold ready alu instructions
304 bool check_alu_ready_count(unsigned threshold
);
308 class gvn
: public vpass
{
313 gvn(shader
&sh
) : vpass(sh
) {}
315 virtual bool visit(node
&n
, bool enter
);
316 virtual bool visit(cf_node
&n
, bool enter
);
317 virtual bool visit(alu_node
&n
, bool enter
);
318 virtual bool visit(alu_packed_node
&n
, bool enter
);
319 virtual bool visit(fetch_node
&n
, bool enter
);
320 virtual bool visit(region_node
&n
, bool enter
);
324 void process_op(node
&n
, bool rewrite
= true);
326 // returns true if the value was rewritten
327 bool process_src(value
* &v
, bool rewrite
);
330 void process_alu_src_constants(node
&n
, value
* &v
);
334 class if_conversion
: public pass
{
338 if_conversion(shader
&sh
) : pass(sh
) {}
342 bool run_on(region_node
*r
);
344 void convert_kill_instructions(region_node
*r
, value
*em
, bool branch
,
347 bool check_and_convert(region_node
*r
);
349 alu_node
* convert_phi(value
*select
, node
*phi
);
354 class liveness
: public rev_vpass
{
362 liveness(shader
&s
) : rev_vpass(s
), live_changed(false) {}
366 virtual bool visit(node
&n
, bool enter
);
367 virtual bool visit(bb_node
&n
, bool enter
);
368 virtual bool visit(container_node
&n
, bool enter
);
369 virtual bool visit(alu_group_node
&n
, bool enter
);
370 virtual bool visit(cf_node
&n
, bool enter
);
371 virtual bool visit(alu_node
&n
, bool enter
);
372 virtual bool visit(alu_packed_node
&n
, bool enter
);
373 virtual bool visit(fetch_node
&n
, bool enter
);
374 virtual bool visit(region_node
&n
, bool enter
);
375 virtual bool visit(repeat_node
&n
, bool enter
);
376 virtual bool visit(depart_node
&n
, bool enter
);
377 virtual bool visit(if_node
&n
, bool enter
);
381 void update_interferences();
382 void process_op(node
&n
);
384 bool remove_val(value
*v
);
385 bool remove_vec(vvec
&v
);
386 bool process_outs(node
& n
);
387 void process_ins(node
& n
);
389 void process_phi_outs(container_node
*phi
);
390 void process_phi_branch(container_node
*phi
, unsigned id
);
392 bool process_maydef(value
*v
);
394 bool add_vec(vvec
&vv
, bool src
);
396 void update_src_vec(vvec
&vv
, bool src
);
400 struct bool_op_info
{
407 class peephole
: public pass
{
411 peephole(shader
&sh
) : pass(sh
) {}
415 void run_on(container_node
*c
);
417 void optimize_cc_op(alu_node
*a
);
419 void optimize_cc_op2(alu_node
*a
);
420 void optimize_CNDcc_op(alu_node
*a
);
422 bool get_bool_op_info(value
*b
, bool_op_info
& bop
);
423 bool get_bool_flt_to_int_source(alu_node
* &a
);
424 void convert_float_setcc(alu_node
*f2i
, alu_node
*s
);
428 class psi_ops
: public rev_vpass
{
429 using rev_vpass::visit
;
433 psi_ops(shader
&s
) : rev_vpass(s
) {}
435 virtual bool visit(node
&n
, bool enter
);
436 virtual bool visit(alu_node
&n
, bool enter
);
438 bool try_inline(node
&n
);
439 bool try_reduce(node
&n
);
440 bool eliminate(node
&n
);
442 void unpredicate(node
*n
);
446 // check correctness of the generated code, e.g.:
447 // - expected source operand value is the last value written to its gpr,
448 // - all arguments of phi node should be allocated to the same gpr,
450 class ra_checker
: public pass
{
452 typedef std::map
<sel_chan
, value
*> reg_value_map
;
454 typedef std::vector
<reg_value_map
> regmap_stack
;
456 regmap_stack rm_stack
;
457 unsigned rm_stk_level
;
463 ra_checker(shader
&sh
) : pass(sh
), rm_stk_level(0), prev_dst() {}
467 void run_on(container_node
*c
);
469 void dump_error(const error_info
&e
);
470 void dump_all_errors();
474 reg_value_map
& rmap() { return rm_stack
[rm_stk_level
]; }
479 // when going out of the alu clause, values in the clause temporary gprs,
480 // AR, predicate values, PS/PV are destroyed
481 void kill_alu_only_regs();
482 void error(node
*n
, unsigned id
, std::string msg
);
484 void check_phi_src(container_node
*p
, unsigned id
);
485 void process_phi_dst(container_node
*p
);
486 void check_alu_group(alu_group_node
*g
);
487 void process_op_dst(node
*n
);
488 void check_op_src(node
*n
);
489 void check_src_vec(node
*n
, unsigned id
, vvec
&vv
, bool src
);
490 void check_value_gpr(node
*n
, unsigned id
, value
*v
);
493 // =======================================
496 class ra_coalesce
: public pass
{
500 ra_coalesce(shader
&sh
) : pass(sh
) {}
507 // =======================================
509 class ra_init
: public pass
{
513 ra_init(shader
&sh
) : pass(sh
), prev_chans() {
515 // The parameter below affects register channels distribution.
516 // For cayman (VLIW-4) we're trying to distribute the channels
517 // uniformly, this means significantly better alu slots utilization
518 // at the expense of higher gpr usage. Hopefully this will improve
519 // performance, though it has to be proven with real benchmarks yet.
520 // For VLIW-5 this method could also slightly improve slots
521 // utilization, but increased register pressure seems more significant
522 // and overall performance effect is negative according to some
523 // benchmarks, so it's not used currently. Basically, VLIW-5 doesn't
524 // really need it because trans slot (unrestricted by register write
525 // channel) allows to consume most deviations from uniform channel
527 // Value 3 means that for new allocation we'll use channel that differs
528 // from 3 last used channels. 0 for VLIW-5 effectively turns this off.
530 ra_tune
= sh
.get_ctx().is_cayman() ? 3 : 0;
540 void add_prev_chan(unsigned chan
);
541 unsigned get_preferable_chan_mask();
543 void ra_node(container_node
*c
);
544 void process_op(node
*n
);
546 void color(value
*v
);
548 void color_bs_constraint(ra_constraint
*c
);
550 void assign_color(value
*v
, sel_chan c
);
554 // =======================================
556 class ra_split
: public pass
{
560 ra_split(shader
&sh
) : pass(sh
) {}
564 void split(container_node
*n
);
565 void split_op(node
*n
);
566 void split_alu_packed(alu_packed_node
*n
);
567 void split_vector_inst(node
*n
);
569 void split_packed_ins(alu_packed_node
*n
);
572 void split_pinned_outs(node
*n
);
575 void split_vec(vvec
&vv
, vvec
&v1
, vvec
&v2
, bool allow_swz
);
577 void split_phi_src(container_node
*loc
, container_node
*c
, unsigned id
,
579 void split_phi_dst(node
*loc
, container_node
*c
, bool loop
);
580 void init_phi_constraints(container_node
*c
);
585 class ssa_prepare
: public vpass
{
588 typedef std::vector
<val_set
> vd_stk
;
594 ssa_prepare(shader
&s
) : vpass(s
), level(0) {}
596 virtual bool visit(cf_node
&n
, bool enter
);
597 virtual bool visit(alu_node
&n
, bool enter
);
598 virtual bool visit(fetch_node
&n
, bool enter
);
599 virtual bool visit(region_node
&n
, bool enter
);
600 virtual bool visit(repeat_node
&n
, bool enter
);
601 virtual bool visit(depart_node
&n
, bool enter
);
607 if (level
+ 1 > stk
.size())
615 stk
[level
].add_set(stk
[level
+ 1]);
618 void add_defs(node
&n
);
620 val_set
& cur_set() { return stk
[level
]; }
622 container_node
* create_phi_nodes(int count
);
625 class ssa_rename
: public vpass
{
628 typedef sb_map
<value
*, unsigned> def_map
;
631 std::stack
<def_map
> rename_stack
;
633 typedef std::map
<uint32_t, value
*> val_map
;
638 ssa_rename(shader
&s
) : vpass(s
) {}
642 virtual bool visit(container_node
&n
, bool enter
);
643 virtual bool visit(node
&n
, bool enter
);
644 virtual bool visit(alu_group_node
&n
, bool enter
);
645 virtual bool visit(cf_node
&n
, bool enter
);
646 virtual bool visit(alu_node
&n
, bool enter
);
647 virtual bool visit(alu_packed_node
&n
, bool enter
);
648 virtual bool visit(fetch_node
&n
, bool enter
);
649 virtual bool visit(region_node
&n
, bool enter
);
650 virtual bool visit(repeat_node
&n
, bool enter
);
651 virtual bool visit(depart_node
&n
, bool enter
);
652 virtual bool visit(if_node
&n
, bool enter
);
656 void push(node
*phi
);
659 unsigned get_index(def_map
& m
, value
* v
);
660 void set_index(def_map
& m
, value
* v
, unsigned index
);
661 unsigned new_index(def_map
& m
, value
* v
);
663 value
* rename_use(node
*n
, value
* v
);
664 value
* rename_def(node
*def
, value
* v
);
666 void rename_src_vec(node
*n
, vvec
&vv
, bool src
);
667 void rename_dst_vec(node
*def
, vvec
&vv
, bool set_def
);
669 void rename_src(node
*n
);
670 void rename_dst(node
*n
);
672 void rename_phi_args(container_node
*phi
, unsigned op
, bool def
);
674 void rename_virt(node
*n
);
675 void rename_virt_val(node
*n
, value
*v
);
678 class bc_finalizer
: public pass
{
680 cf_node
*last_export
[EXP_TYPE_COUNT
];
688 bc_finalizer(shader
&sh
) : pass(sh
), last_export(), last_cf(), ngpr(),
693 void finalize_loop(region_node
*r
);
694 void finalize_if(region_node
*r
);
696 void run_on(container_node
*c
);
698 void insert_rv6xx_load_ar_workaround(alu_group_node
*b4
);
699 void finalize_alu_group(alu_group_node
*g
, node
*prev_node
);
700 bool finalize_alu_src(alu_group_node
*g
, alu_node
*a
, alu_group_node
*prev_node
);
702 void emit_set_grad(fetch_node
* f
);
703 void finalize_fetch(fetch_node
*f
);
705 void finalize_cf(cf_node
*c
);
707 sel_chan
translate_kcache(cf_node
*alu
, value
*v
);
709 void update_ngpr(unsigned gpr
);
710 void update_nstack(region_node
*r
, unsigned add
= 0);
712 unsigned get_stack_depth(node
*n
, unsigned &loops
, unsigned &ifs
,
718 void copy_fetch_src(fetch_node
&dst
, fetch_node
&src
, unsigned arg_start
);
719 void emit_set_texture_offsets(fetch_node
&f
);
723 } // namespace r600_sb
725 #endif /* SB_PASS_H_ */