2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
47 class vpass
: public pass
{
51 vpass(shader
&s
) : pass(s
) {}
57 virtual void run_on(container_node
&n
);
59 virtual bool visit(node
&n
, bool enter
);
60 virtual bool visit(container_node
&n
, bool enter
);
61 virtual bool visit(alu_group_node
&n
, bool enter
);
62 virtual bool visit(cf_node
&n
, bool enter
);
63 virtual bool visit(alu_node
&n
, bool enter
);
64 virtual bool visit(alu_packed_node
&n
, bool enter
);
65 virtual bool visit(fetch_node
&n
, bool enter
);
66 virtual bool visit(region_node
&n
, bool enter
);
67 virtual bool visit(repeat_node
&n
, bool enter
);
68 virtual bool visit(depart_node
&n
, bool enter
);
69 virtual bool visit(if_node
&n
, bool enter
);
70 virtual bool visit(bb_node
&n
, bool enter
);
74 class rev_vpass
: public vpass
{
77 rev_vpass(shader
&s
) : vpass(s
) {}
79 virtual void run_on(container_node
&n
);
83 // =================== PASSES
87 class bc_dump
: public vpass
{
97 unsigned new_group
, group_index
;
101 bc_dump(shader
&s
, std::ostream
&o
, bytecode
*bc
= NULL
);
103 bc_dump(shader
&s
, std::ostream
&o
, uint32_t *bc_ptr
, unsigned ndw
) :
104 vpass(s
), o(o
), bc_data(bc_ptr
), ndw(ndw
), id(), new_group(), group_index() {}
109 virtual bool visit(cf_node
&n
, bool enter
);
110 virtual bool visit(alu_node
&n
, bool enter
);
111 virtual bool visit(fetch_node
&n
, bool enter
);
113 void dump_dw(unsigned dw_id
, unsigned count
= 2);
115 void dump(cf_node
& n
);
116 void dump(alu_node
& n
);
117 void dump(fetch_node
& n
);
121 class dce_cleanup
: public vpass
{
126 dce_cleanup(shader
&s
) : vpass(s
) {}
128 virtual bool visit(node
&n
, bool enter
);
129 virtual bool visit(alu_group_node
&n
, bool enter
);
130 virtual bool visit(cf_node
&n
, bool enter
);
131 virtual bool visit(alu_node
&n
, bool enter
);
132 virtual bool visit(alu_packed_node
&n
, bool enter
);
133 virtual bool visit(fetch_node
&n
, bool enter
);
134 virtual bool visit(region_node
&n
, bool enter
);
135 virtual bool visit(container_node
&n
, bool enter
);
139 void cleanup_dst(node
&n
);
140 void cleanup_dst_vec(vvec
&vv
);
145 class def_use
: public pass
{
149 def_use(shader
&sh
) : pass(sh
) {}
152 void run_on(node
*n
, bool defs
);
156 void process_uses(node
*n
);
157 void process_defs(node
*n
, vvec
&vv
, bool arr_def
);
158 void process_phi(container_node
*c
, bool defs
, bool uses
);
163 class dump
: public vpass
{
170 dump(shader
&s
) : vpass(s
), level(0) {}
172 virtual bool visit(node
&n
, bool enter
);
173 virtual bool visit(container_node
&n
, bool enter
);
174 virtual bool visit(alu_group_node
&n
, bool enter
);
175 virtual bool visit(cf_node
&n
, bool enter
);
176 virtual bool visit(alu_node
&n
, bool enter
);
177 virtual bool visit(alu_packed_node
&n
, bool enter
);
178 virtual bool visit(fetch_node
&n
, bool enter
);
179 virtual bool visit(region_node
&n
, bool enter
);
180 virtual bool visit(repeat_node
&n
, bool enter
);
181 virtual bool visit(depart_node
&n
, bool enter
);
182 virtual bool visit(if_node
&n
, bool enter
);
183 virtual bool visit(bb_node
&n
, bool enter
);
186 static void dump_op(node
&n
, const char *name
);
187 static void dump_vec(const vvec
& vv
);
188 static void dump_set(shader
&sh
, val_set
& v
);
190 static void dump_rels(vvec
& vv
);
192 static void dump_val(value
*v
);
193 static void dump_op(node
*n
);
195 static void dump_op_list(container_node
*c
);
196 static void dump_queue(sched_queue
&q
);
198 static void dump_alu(alu_node
*n
);
204 void dump_common(node
&n
);
205 void dump_flags(node
&n
);
207 void dump_live_values(container_node
&n
, bool before
);
211 // Global Code Motion
213 class gcm
: public pass
{
215 sched_queue bu_ready
[SQ_NUM
];
216 sched_queue bu_ready_next
[SQ_NUM
];
217 sched_queue bu_ready_early
[SQ_NUM
];
219 sched_queue ready_above
;
221 container_node pending
;
226 op_info() : top_bb(), bottom_bb() {}
229 typedef std::map
<node
*, op_info
> op_info_map
;
231 typedef std::map
<node
*, unsigned> nuc_map
;
236 typedef std::vector
<nuc_map
> nuc_stack
;
245 node_list pending_nodes
;
249 // for register pressure tracking in bottom-up pass
253 static const int rp_threshold
= 100;
257 gcm(shader
&sh
) : pass(sh
),
258 bu_ready(), bu_ready_next(), bu_ready_early(),
259 ready(), op_map(), uses(), nuc_stk(1), ucs_level(),
260 bu_bb(), pending_defs(), pending_nodes(), cur_sq(),
261 live(), live_count() {}
267 void collect_instructions(container_node
*c
, bool early_pass
);
269 void sched_early(container_node
*n
);
270 void td_sched_bb(bb_node
*bb
);
271 bool td_is_ready(node
*n
);
272 void td_release_uses(vvec
&v
);
273 void td_release_val(value
*v
);
274 void td_schedule(bb_node
*bb
, node
*n
);
276 void sched_late(container_node
*n
);
277 void bu_sched_bb(bb_node
*bb
);
278 void bu_release_defs(vvec
&v
, bool src
);
279 void bu_release_phi_defs(container_node
*p
, unsigned op
);
280 bool bu_is_ready(node
*n
);
281 void bu_release_val(value
*v
);
282 void bu_release_op(node
* n
);
283 void bu_find_best_bb(node
*n
, op_info
&oi
);
284 void bu_schedule(container_node
*bb
, node
*n
);
286 void push_uc_stack();
289 void init_def_count(nuc_map
&m
, container_node
&s
);
290 void init_use_count(nuc_map
&m
, container_node
&s
);
291 unsigned get_uc_vec(vvec
&vv
);
292 unsigned get_dc_vec(vvec
&vv
, bool src
);
294 void add_ready(node
*n
);
296 void dump_uc_stack();
298 unsigned real_alu_count(sched_queue
&q
, unsigned max
);
300 // check if we have not less than threshold ready alu instructions
301 bool check_alu_ready_count(unsigned threshold
);
305 class gvn
: public vpass
{
310 gvn(shader
&sh
) : vpass(sh
) {}
312 virtual bool visit(node
&n
, bool enter
);
313 virtual bool visit(cf_node
&n
, bool enter
);
314 virtual bool visit(alu_node
&n
, bool enter
);
315 virtual bool visit(alu_packed_node
&n
, bool enter
);
316 virtual bool visit(fetch_node
&n
, bool enter
);
317 virtual bool visit(region_node
&n
, bool enter
);
321 void process_op(node
&n
, bool rewrite
= true);
323 // returns true if the value was rewritten
324 bool process_src(value
* &v
, bool rewrite
);
327 void process_alu_src_constants(node
&n
, value
* &v
);
331 class if_conversion
: public pass
{
335 if_conversion(shader
&sh
) : pass(sh
) {}
339 bool run_on(region_node
*r
);
341 alu_node
* convert_phi(value
*select
, node
*phi
);
343 unsigned try_convert_kills(region_node
* r
);
348 class liveness
: public rev_vpass
{
356 liveness(shader
&s
) : rev_vpass(s
), live_changed(false) {}
360 virtual bool visit(node
&n
, bool enter
);
361 virtual bool visit(bb_node
&n
, bool enter
);
362 virtual bool visit(container_node
&n
, bool enter
);
363 virtual bool visit(alu_group_node
&n
, bool enter
);
364 virtual bool visit(cf_node
&n
, bool enter
);
365 virtual bool visit(alu_node
&n
, bool enter
);
366 virtual bool visit(alu_packed_node
&n
, bool enter
);
367 virtual bool visit(fetch_node
&n
, bool enter
);
368 virtual bool visit(region_node
&n
, bool enter
);
369 virtual bool visit(repeat_node
&n
, bool enter
);
370 virtual bool visit(depart_node
&n
, bool enter
);
371 virtual bool visit(if_node
&n
, bool enter
);
375 void update_interferences();
376 void process_op(node
&n
);
378 bool remove_val(value
*v
);
379 bool remove_vec(vvec
&v
);
380 bool process_outs(node
& n
);
381 void process_ins(node
& n
);
383 void process_phi_outs(container_node
*phi
);
384 void process_phi_branch(container_node
*phi
, unsigned id
);
386 bool process_maydef(value
*v
);
388 bool add_vec(vvec
&vv
, bool src
);
390 void update_src_vec(vvec
&vv
, bool src
);
394 struct bool_op_info
{
401 class peephole
: public pass
{
405 peephole(shader
&sh
) : pass(sh
) {}
409 void run_on(container_node
*c
);
411 void optimize_cc_op(alu_node
*a
);
413 void optimize_SETcc_op(alu_node
*a
);
414 void optimize_CNDcc_op(alu_node
*a
);
416 bool get_bool_op_info(value
*b
, bool_op_info
& bop
);
417 bool get_bool_flt_to_int_source(alu_node
* &a
);
418 void convert_float_setcc(alu_node
*f2i
, alu_node
*s
);
422 class psi_ops
: public rev_vpass
{
423 using rev_vpass::visit
;
427 psi_ops(shader
&s
) : rev_vpass(s
) {}
429 virtual bool visit(node
&n
, bool enter
);
430 virtual bool visit(alu_node
&n
, bool enter
);
432 bool try_inline(node
&n
);
433 bool try_reduce(node
&n
);
434 bool eliminate(node
&n
);
436 void unpredicate(node
*n
);
440 // check correctness of the generated code, e.g.:
441 // - expected source operand value is the last value written to its gpr,
442 // - all arguments of phi node should be allocated to the same gpr,
444 class ra_checker
: public pass
{
446 typedef std::map
<sel_chan
, value
*> reg_value_map
;
448 typedef std::vector
<reg_value_map
> regmap_stack
;
450 regmap_stack rm_stack
;
451 unsigned rm_stk_level
;
457 ra_checker(shader
&sh
) : pass(sh
) {}
461 void run_on(container_node
*c
);
463 void dump_error(const error_info
&e
);
464 void dump_all_errors();
468 reg_value_map
& rmap() { return rm_stack
[rm_stk_level
]; }
473 // when going out of the alu clause, values in the clause temporary gprs,
474 // AR, predicate values, PS/PV are destroyed
475 void kill_alu_only_regs();
476 void error(node
*n
, unsigned id
, std::string msg
);
478 void check_phi_src(container_node
*p
, unsigned id
);
479 void process_phi_dst(container_node
*p
);
480 void check_alu_group(alu_group_node
*g
);
481 void process_op_dst(node
*n
);
482 void check_op_src(node
*n
);
483 void check_src_vec(node
*n
, unsigned id
, vvec
&vv
, bool src
);
484 void check_value_gpr(node
*n
, unsigned id
, value
*v
);
487 // =======================================
490 class ra_coalesce
: public pass
{
494 ra_coalesce(shader
&sh
) : pass(sh
) {}
501 // =======================================
503 class ra_init
: public pass
{
507 ra_init(shader
&sh
) : pass(sh
) {}
513 void ra_node(container_node
*c
);
514 void process_op(node
*n
);
516 void color(value
*v
);
518 void color_bs_constraint(ra_constraint
*c
);
520 void assign_color(value
*v
, sel_chan c
);
524 // =======================================
526 class ra_split
: public pass
{
530 ra_split(shader
&sh
) : pass(sh
) {}
534 void split(container_node
*n
);
535 void split_op(node
*n
);
536 void split_alu_packed(alu_packed_node
*n
);
537 void split_vector_inst(node
*n
);
539 void split_packed_ins(alu_packed_node
*n
);
542 void split_pinned_outs(node
*n
);
545 void split_vec(vvec
&vv
, vvec
&v1
, vvec
&v2
, bool allow_swz
);
547 void split_phi_src(container_node
*loc
, container_node
*c
, unsigned id
,
549 void split_phi_dst(node
*loc
, container_node
*c
, bool loop
);
550 void init_phi_constraints(container_node
*c
);
555 class ssa_prepare
: public vpass
{
558 typedef std::vector
<val_set
> vd_stk
;
564 ssa_prepare(shader
&s
) : vpass(s
), level(0) {}
566 virtual bool visit(cf_node
&n
, bool enter
);
567 virtual bool visit(alu_node
&n
, bool enter
);
568 virtual bool visit(fetch_node
&n
, bool enter
);
569 virtual bool visit(region_node
&n
, bool enter
);
570 virtual bool visit(repeat_node
&n
, bool enter
);
571 virtual bool visit(depart_node
&n
, bool enter
);
577 if (level
+ 1 > stk
.size())
585 stk
[level
].add_set(stk
[level
+ 1]);
588 void add_defs(node
&n
);
590 val_set
& cur_set() { return stk
[level
]; }
592 container_node
* create_phi_nodes(int count
);
595 class ssa_rename
: public vpass
{
598 typedef sb_map
<value
*, unsigned> def_map
;
601 std::stack
<def_map
> rename_stack
;
603 typedef std::map
<uint32_t, value
*> val_map
;
608 ssa_rename(shader
&s
) : vpass(s
) {}
612 virtual bool visit(container_node
&n
, bool enter
);
613 virtual bool visit(node
&n
, bool enter
);
614 virtual bool visit(alu_group_node
&n
, bool enter
);
615 virtual bool visit(cf_node
&n
, bool enter
);
616 virtual bool visit(alu_node
&n
, bool enter
);
617 virtual bool visit(alu_packed_node
&n
, bool enter
);
618 virtual bool visit(fetch_node
&n
, bool enter
);
619 virtual bool visit(region_node
&n
, bool enter
);
620 virtual bool visit(repeat_node
&n
, bool enter
);
621 virtual bool visit(depart_node
&n
, bool enter
);
622 virtual bool visit(if_node
&n
, bool enter
);
626 void push(node
*phi
);
629 unsigned get_index(def_map
& m
, value
* v
);
630 void set_index(def_map
& m
, value
* v
, unsigned index
);
631 unsigned new_index(def_map
& m
, value
* v
);
633 value
* rename_use(node
*n
, value
* v
);
634 value
* rename_def(node
*def
, value
* v
);
636 void rename_src_vec(node
*n
, vvec
&vv
, bool src
);
637 void rename_dst_vec(node
*def
, vvec
&vv
, bool set_def
);
639 void rename_src(node
*n
);
640 void rename_dst(node
*n
);
642 void rename_phi_args(container_node
*phi
, unsigned op
, bool def
);
644 void rename_virt(node
*n
);
645 void rename_virt_val(node
*n
, value
*v
);
648 class bc_finalizer
: public pass
{
650 cf_node
*last_export
[EXP_TYPE_COUNT
];
658 bc_finalizer(shader
&sh
) : pass(sh
), last_export(), last_cf(), ngpr(),
663 void finalize_loop(region_node
*r
);
664 void finalize_if(region_node
*r
);
666 void run_on(container_node
*c
);
668 void finalize_alu_group(alu_group_node
*g
);
669 void finalize_alu_src(alu_group_node
*g
, alu_node
*a
);
671 void emit_set_grad(fetch_node
* f
);
672 void finalize_fetch(fetch_node
*f
);
674 void finalize_cf(cf_node
*c
);
676 sel_chan
translate_kcache(cf_node
*alu
, value
*v
);
678 void update_ngpr(unsigned gpr
);
679 void update_nstack(region_node
*r
, unsigned add
= 0);
686 } // namespace r600_sb
688 #endif /* SB_PASS_H_ */