r600g/sb: fix issues cause by GLSL switching to loops for switch
[mesa.git] / src / gallium / drivers / r600 / sb / sb_pass.h
1 /*
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Vadim Girlin
25 */
26
27 #ifndef SB_PASS_H_
28 #define SB_PASS_H_
29
30 #include <stack>
31
32 namespace r600_sb {
33
34 class pass {
35 protected:
36 sb_context &ctx;
37 shader &sh;
38
39 public:
40 pass(shader &s);
41
42 virtual int run();
43
44 virtual ~pass() {}
45 };
46
47 class vpass : public pass {
48
49 public:
50
51 vpass(shader &s) : pass(s) {}
52
53 virtual int init();
54 virtual int done();
55
56 virtual int run();
57 virtual void run_on(container_node &n);
58
59 virtual bool visit(node &n, bool enter);
60 virtual bool visit(container_node &n, bool enter);
61 virtual bool visit(alu_group_node &n, bool enter);
62 virtual bool visit(cf_node &n, bool enter);
63 virtual bool visit(alu_node &n, bool enter);
64 virtual bool visit(alu_packed_node &n, bool enter);
65 virtual bool visit(fetch_node &n, bool enter);
66 virtual bool visit(region_node &n, bool enter);
67 virtual bool visit(repeat_node &n, bool enter);
68 virtual bool visit(depart_node &n, bool enter);
69 virtual bool visit(if_node &n, bool enter);
70 virtual bool visit(bb_node &n, bool enter);
71
72 };
73
74 class rev_vpass : public vpass {
75
76 public:
77 rev_vpass(shader &s) : vpass(s) {}
78
79 virtual void run_on(container_node &n);
80 };
81
82
83 // =================== PASSES
84
85 class bytecode;
86
87 class bc_dump : public vpass {
88 using vpass::visit;
89
90 uint32_t *bc_data;
91 unsigned ndw;
92
93 unsigned id;
94
95 unsigned new_group, group_index;
96
97 public:
98
99 bc_dump(shader &s, bytecode *bc = NULL);
100
101 bc_dump(shader &s, uint32_t *bc_ptr, unsigned ndw) :
102 vpass(s), bc_data(bc_ptr), ndw(ndw), id(), new_group(), group_index() {}
103
104 virtual int init();
105 virtual int done();
106
107 virtual bool visit(cf_node &n, bool enter);
108 virtual bool visit(alu_node &n, bool enter);
109 virtual bool visit(fetch_node &n, bool enter);
110
111 void dump_dw(unsigned dw_id, unsigned count = 2);
112
113 void dump(cf_node& n);
114 void dump(alu_node& n);
115 void dump(fetch_node& n);
116 };
117
118
119 class dce_cleanup : public vpass {
120 using vpass::visit;
121
122 bool remove_unused;
123
124 public:
125
126 dce_cleanup(shader &s) : vpass(s),
127 remove_unused(s.dce_flags & DF_REMOVE_UNUSED) {}
128
129 virtual bool visit(node &n, bool enter);
130 virtual bool visit(alu_group_node &n, bool enter);
131 virtual bool visit(cf_node &n, bool enter);
132 virtual bool visit(alu_node &n, bool enter);
133 virtual bool visit(alu_packed_node &n, bool enter);
134 virtual bool visit(fetch_node &n, bool enter);
135 virtual bool visit(region_node &n, bool enter);
136 virtual bool visit(container_node &n, bool enter);
137
138 private:
139
140 void cleanup_dst(node &n);
141 bool cleanup_dst_vec(vvec &vv);
142
143 };
144
145
146 class def_use : public pass {
147
148 public:
149
150 def_use(shader &sh) : pass(sh) {}
151
152 virtual int run();
153 void run_on(node *n, bool defs);
154
155 private:
156
157 void process_uses(node *n);
158 void process_defs(node *n, vvec &vv, bool arr_def);
159 void process_phi(container_node *c, bool defs, bool uses);
160 };
161
162
163
164 class dump : public vpass {
165 using vpass::visit;
166
167 int level;
168
169 public:
170
171 dump(shader &s) : vpass(s), level(0) {}
172
173 virtual bool visit(node &n, bool enter);
174 virtual bool visit(container_node &n, bool enter);
175 virtual bool visit(alu_group_node &n, bool enter);
176 virtual bool visit(cf_node &n, bool enter);
177 virtual bool visit(alu_node &n, bool enter);
178 virtual bool visit(alu_packed_node &n, bool enter);
179 virtual bool visit(fetch_node &n, bool enter);
180 virtual bool visit(region_node &n, bool enter);
181 virtual bool visit(repeat_node &n, bool enter);
182 virtual bool visit(depart_node &n, bool enter);
183 virtual bool visit(if_node &n, bool enter);
184 virtual bool visit(bb_node &n, bool enter);
185
186
187 static void dump_op(node &n, const char *name);
188 static void dump_vec(const vvec & vv);
189 static void dump_set(shader &sh, val_set & v);
190
191 static void dump_rels(vvec & vv);
192
193 static void dump_val(value *v);
194 static void dump_op(node *n);
195
196 static void dump_op_list(container_node *c);
197 static void dump_queue(sched_queue &q);
198
199 static void dump_alu(alu_node *n);
200
201 private:
202
203 void indent();
204
205 void dump_common(node &n);
206 void dump_flags(node &n);
207
208 void dump_live_values(container_node &n, bool before);
209 };
210
211
212 // Global Code Motion
213
214 class gcm : public pass {
215
216 sched_queue bu_ready[SQ_NUM];
217 sched_queue bu_ready_next[SQ_NUM];
218 sched_queue bu_ready_early[SQ_NUM];
219 sched_queue ready;
220 sched_queue ready_above;
221
222 container_node pending;
223
224 struct op_info {
225 bb_node* top_bb;
226 bb_node* bottom_bb;
227 op_info() : top_bb(), bottom_bb() {}
228 };
229
230 typedef std::map<node*, op_info> op_info_map;
231
232 typedef std::map<node*, unsigned> nuc_map;
233
234 op_info_map op_map;
235 nuc_map uses;
236
237 typedef std::vector<nuc_map> nuc_stack;
238
239 nuc_stack nuc_stk;
240 unsigned ucs_level;
241
242 bb_node * bu_bb;
243
244 vvec pending_defs;
245
246 node_list pending_nodes;
247
248 unsigned cur_sq;
249
250 // for register pressure tracking in bottom-up pass
251 val_set live;
252 int live_count;
253
254 static const int rp_threshold = 100;
255
256 bool pending_exec_mask_update;
257
258 public:
259
260 gcm(shader &sh) : pass(sh),
261 bu_ready(), bu_ready_next(), bu_ready_early(),
262 ready(), op_map(), uses(), nuc_stk(1), ucs_level(),
263 bu_bb(), pending_defs(), pending_nodes(), cur_sq(),
264 live(), live_count(), pending_exec_mask_update() {}
265
266 virtual int run();
267
268 private:
269
270 void collect_instructions(container_node *c, bool early_pass);
271
272 void sched_early(container_node *n);
273 void td_sched_bb(bb_node *bb);
274 bool td_is_ready(node *n);
275 void td_release_uses(vvec &v);
276 void td_release_val(value *v);
277 void td_schedule(bb_node *bb, node *n);
278
279 void sched_late(container_node *n);
280 void bu_sched_bb(bb_node *bb);
281 void bu_release_defs(vvec &v, bool src);
282 void bu_release_phi_defs(container_node *p, unsigned op);
283 bool bu_is_ready(node *n);
284 void bu_release_val(value *v);
285 void bu_release_op(node * n);
286 void bu_find_best_bb(node *n, op_info &oi);
287 void bu_schedule(container_node *bb, node *n);
288
289 void push_uc_stack();
290 void pop_uc_stack();
291
292 void init_def_count(nuc_map &m, container_node &s);
293 void init_use_count(nuc_map &m, container_node &s);
294 unsigned get_uc_vec(vvec &vv);
295 unsigned get_dc_vec(vvec &vv, bool src);
296
297 void add_ready(node *n);
298
299 void dump_uc_stack();
300
301 unsigned real_alu_count(sched_queue &q, unsigned max);
302
303 // check if we have not less than threshold ready alu instructions
304 bool check_alu_ready_count(unsigned threshold);
305 };
306
307
308 class gvn : public vpass {
309 using vpass::visit;
310
311 public:
312
313 gvn(shader &sh) : vpass(sh) {}
314
315 virtual bool visit(node &n, bool enter);
316 virtual bool visit(cf_node &n, bool enter);
317 virtual bool visit(alu_node &n, bool enter);
318 virtual bool visit(alu_packed_node &n, bool enter);
319 virtual bool visit(fetch_node &n, bool enter);
320 virtual bool visit(region_node &n, bool enter);
321
322 private:
323
324 void process_op(node &n, bool rewrite = true);
325
326 // returns true if the value was rewritten
327 bool process_src(value* &v, bool rewrite);
328
329
330 void process_alu_src_constants(node &n, value* &v);
331 };
332
333
334 class if_conversion : public pass {
335
336 public:
337
338 if_conversion(shader &sh) : pass(sh) {}
339
340 virtual int run();
341
342 bool run_on(region_node *r);
343
344 void convert_kill_instructions(region_node *r, value *em, bool branch,
345 container_node *c);
346
347 bool check_and_convert(region_node *r);
348
349 alu_node* convert_phi(value *select, node *phi);
350
351 };
352
353
354 class liveness : public rev_vpass {
355 using vpass::visit;
356
357 val_set live;
358 bool live_changed;
359
360 public:
361
362 liveness(shader &s) : rev_vpass(s), live_changed(false) {}
363
364 virtual int init();
365
366 virtual bool visit(node &n, bool enter);
367 virtual bool visit(bb_node &n, bool enter);
368 virtual bool visit(container_node &n, bool enter);
369 virtual bool visit(alu_group_node &n, bool enter);
370 virtual bool visit(cf_node &n, bool enter);
371 virtual bool visit(alu_node &n, bool enter);
372 virtual bool visit(alu_packed_node &n, bool enter);
373 virtual bool visit(fetch_node &n, bool enter);
374 virtual bool visit(region_node &n, bool enter);
375 virtual bool visit(repeat_node &n, bool enter);
376 virtual bool visit(depart_node &n, bool enter);
377 virtual bool visit(if_node &n, bool enter);
378
379 private:
380
381 void update_interferences();
382 void process_op(node &n);
383
384 bool remove_val(value *v);
385 bool remove_vec(vvec &v);
386 bool process_outs(node& n);
387 void process_ins(node& n);
388
389 void process_phi_outs(container_node *phi);
390 void process_phi_branch(container_node *phi, unsigned id);
391
392 bool process_maydef(value *v);
393
394 bool add_vec(vvec &vv, bool src);
395
396 void update_src_vec(vvec &vv, bool src);
397 };
398
399
400 struct bool_op_info {
401 bool invert;
402 unsigned int_cvt;
403
404 alu_node *n;
405 };
406
407 class peephole : public pass {
408
409 public:
410
411 peephole(shader &sh) : pass(sh) {}
412
413 virtual int run();
414
415 void run_on(container_node *c);
416
417 void optimize_cc_op(alu_node *a);
418
419 void optimize_cc_op2(alu_node *a);
420 void optimize_CNDcc_op(alu_node *a);
421
422 bool get_bool_op_info(value *b, bool_op_info& bop);
423 bool get_bool_flt_to_int_source(alu_node* &a);
424 void convert_float_setcc(alu_node *f2i, alu_node *s);
425 };
426
427
428 class psi_ops : public rev_vpass {
429 using rev_vpass::visit;
430
431 public:
432
433 psi_ops(shader &s) : rev_vpass(s) {}
434
435 virtual bool visit(node &n, bool enter);
436 virtual bool visit(alu_node &n, bool enter);
437
438 bool try_inline(node &n);
439 bool try_reduce(node &n);
440 bool eliminate(node &n);
441
442 void unpredicate(node *n);
443 };
444
445
446 // check correctness of the generated code, e.g.:
447 // - expected source operand value is the last value written to its gpr,
448 // - all arguments of phi node should be allocated to the same gpr,
449 // TODO other tests
450 class ra_checker : public pass {
451
452 typedef std::map<sel_chan, value *> reg_value_map;
453
454 typedef std::vector<reg_value_map> regmap_stack;
455
456 regmap_stack rm_stack;
457 unsigned rm_stk_level;
458
459 value* prev_dst[5];
460
461 public:
462
463 ra_checker(shader &sh) : pass(sh), rm_stk_level(0), prev_dst() {}
464
465 virtual int run();
466
467 void run_on(container_node *c);
468
469 void dump_error(const error_info &e);
470 void dump_all_errors();
471
472 private:
473
474 reg_value_map& rmap() { return rm_stack[rm_stk_level]; }
475
476 void push_stack();
477 void pop_stack();
478
479 // when going out of the alu clause, values in the clause temporary gprs,
480 // AR, predicate values, PS/PV are destroyed
481 void kill_alu_only_regs();
482 void error(node *n, unsigned id, std::string msg);
483
484 void check_phi_src(container_node *p, unsigned id);
485 void process_phi_dst(container_node *p);
486 void check_alu_group(alu_group_node *g);
487 void process_op_dst(node *n);
488 void check_op_src(node *n);
489 void check_src_vec(node *n, unsigned id, vvec &vv, bool src);
490 void check_value_gpr(node *n, unsigned id, value *v);
491 };
492
493 // =======================================
494
495
496 class ra_coalesce : public pass {
497
498 public:
499
500 ra_coalesce(shader &sh) : pass(sh) {}
501
502 virtual int run();
503 };
504
505
506
507 // =======================================
508
509 class ra_init : public pass {
510
511 public:
512
513 ra_init(shader &sh) : pass(sh), prev_chans() {
514
515 // The parameter below affects register channels distribution.
516 // For cayman (VLIW-4) we're trying to distribute the channels
517 // uniformly, this means significantly better alu slots utilization
518 // at the expense of higher gpr usage. Hopefully this will improve
519 // performance, though it has to be proven with real benchmarks yet.
520 // For VLIW-5 this method could also slightly improve slots
521 // utilization, but increased register pressure seems more significant
522 // and overall performance effect is negative according to some
523 // benchmarks, so it's not used currently. Basically, VLIW-5 doesn't
524 // really need it because trans slot (unrestricted by register write
525 // channel) allows to consume most deviations from uniform channel
526 // distribution.
527 // Value 3 means that for new allocation we'll use channel that differs
528 // from 3 last used channels. 0 for VLIW-5 effectively turns this off.
529
530 ra_tune = sh.get_ctx().is_cayman() ? 3 : 0;
531 }
532
533 virtual int run();
534
535 private:
536
537 unsigned prev_chans;
538 unsigned ra_tune;
539
540 void add_prev_chan(unsigned chan);
541 unsigned get_preferable_chan_mask();
542
543 void ra_node(container_node *c);
544 void process_op(node *n);
545
546 void color(value *v);
547
548 void color_bs_constraint(ra_constraint *c);
549
550 void assign_color(value *v, sel_chan c);
551 void alloc_arrays();
552 };
553
554 // =======================================
555
556 class ra_split : public pass {
557
558 public:
559
560 ra_split(shader &sh) : pass(sh) {}
561
562 virtual int run();
563
564 void split(container_node *n);
565 void split_op(node *n);
566 void split_alu_packed(alu_packed_node *n);
567 void split_vector_inst(node *n);
568
569 void split_packed_ins(alu_packed_node *n);
570
571 #if 0
572 void split_pinned_outs(node *n);
573 #endif
574
575 void split_vec(vvec &vv, vvec &v1, vvec &v2, bool allow_swz);
576
577 void split_phi_src(container_node *loc, container_node *c, unsigned id,
578 bool loop);
579 void split_phi_dst(node *loc, container_node *c, bool loop);
580 void init_phi_constraints(container_node *c);
581 };
582
583
584
585 class ssa_prepare : public vpass {
586 using vpass::visit;
587
588 typedef std::vector<val_set> vd_stk;
589 vd_stk stk;
590
591 unsigned level;
592
593 public:
594 ssa_prepare(shader &s) : vpass(s), level(0) {}
595
596 virtual bool visit(cf_node &n, bool enter);
597 virtual bool visit(alu_node &n, bool enter);
598 virtual bool visit(fetch_node &n, bool enter);
599 virtual bool visit(region_node &n, bool enter);
600 virtual bool visit(repeat_node &n, bool enter);
601 virtual bool visit(depart_node &n, bool enter);
602
603 private:
604
605 void push_stk() {
606 ++level;
607 if (level + 1 > stk.size())
608 stk.resize(level+1);
609 else
610 stk[level].clear();
611 }
612 void pop_stk() {
613 assert(level);
614 --level;
615 stk[level].add_set(stk[level + 1]);
616 }
617
618 void add_defs(node &n);
619
620 val_set & cur_set() { return stk[level]; }
621
622 container_node* create_phi_nodes(int count);
623 };
624
625 class ssa_rename : public vpass {
626 using vpass::visit;
627
628 typedef sb_map<value*, unsigned> def_map;
629
630 def_map def_count;
631 std::stack<def_map> rename_stack;
632
633 typedef std::map<uint32_t, value*> val_map;
634 val_map values;
635
636 public:
637
638 ssa_rename(shader &s) : vpass(s) {}
639
640 virtual int init();
641
642 virtual bool visit(container_node &n, bool enter);
643 virtual bool visit(node &n, bool enter);
644 virtual bool visit(alu_group_node &n, bool enter);
645 virtual bool visit(cf_node &n, bool enter);
646 virtual bool visit(alu_node &n, bool enter);
647 virtual bool visit(alu_packed_node &n, bool enter);
648 virtual bool visit(fetch_node &n, bool enter);
649 virtual bool visit(region_node &n, bool enter);
650 virtual bool visit(repeat_node &n, bool enter);
651 virtual bool visit(depart_node &n, bool enter);
652 virtual bool visit(if_node &n, bool enter);
653
654 private:
655
656 void push(node *phi);
657 void pop();
658
659 unsigned get_index(def_map& m, value* v);
660 void set_index(def_map& m, value* v, unsigned index);
661 unsigned new_index(def_map& m, value* v);
662
663 value* rename_use(node *n, value* v);
664 value* rename_def(node *def, value* v);
665
666 void rename_src_vec(node *n, vvec &vv, bool src);
667 void rename_dst_vec(node *def, vvec &vv, bool set_def);
668
669 void rename_src(node *n);
670 void rename_dst(node *n);
671
672 void rename_phi_args(container_node *phi, unsigned op, bool def);
673
674 void rename_virt(node *n);
675 void rename_virt_val(node *n, value *v);
676 };
677
678 class bc_finalizer : public pass {
679
680 cf_node *last_export[EXP_TYPE_COUNT];
681 cf_node *last_cf;
682
683 unsigned ngpr;
684 unsigned nstack;
685
686 public:
687
688 bc_finalizer(shader &sh) : pass(sh), last_export(), last_cf(), ngpr(),
689 nstack() {}
690
691 virtual int run();
692
693 void finalize_loop(region_node *r);
694 void finalize_if(region_node *r);
695
696 void run_on(container_node *c);
697
698 void finalize_alu_group(alu_group_node *g);
699 void finalize_alu_src(alu_group_node *g, alu_node *a);
700
701 void emit_set_grad(fetch_node* f);
702 void finalize_fetch(fetch_node *f);
703
704 void finalize_cf(cf_node *c);
705
706 sel_chan translate_kcache(cf_node *alu, value *v);
707
708 void update_ngpr(unsigned gpr);
709 void update_nstack(region_node *r, unsigned add = 0);
710
711 unsigned get_stack_depth(node *n, unsigned &loops, unsigned &ifs,
712 unsigned add = 0);
713
714 void cf_peephole();
715
716 private:
717 void copy_fetch_src(fetch_node &dst, fetch_node &src, unsigned arg_start);
718 void emit_set_texture_offsets(fetch_node &f);
719 };
720
721
722 } // namespace r600_sb
723
724 #endif /* SB_PASS_H_ */