r600g/sb: use simple heuristic to limit register pressure
[mesa.git] / src / gallium / drivers / r600 / sb / sb_pass.h
1 /*
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Vadim Girlin
25 */
26
27 #ifndef SB_PASS_H_
28 #define SB_PASS_H_
29
30 #include <stack>
31
32 namespace r600_sb {
33
34 class pass {
35 protected:
36 sb_context &ctx;
37 shader &sh;
38
39 public:
40 pass(shader &s);
41
42 virtual int run();
43
44 virtual ~pass() {}
45 };
46
47 class vpass : public pass {
48
49 public:
50
51 vpass(shader &s) : pass(s) {}
52
53 virtual int init();
54 virtual int done();
55
56 virtual int run();
57 virtual void run_on(container_node &n);
58
59 virtual bool visit(node &n, bool enter);
60 virtual bool visit(container_node &n, bool enter);
61 virtual bool visit(alu_group_node &n, bool enter);
62 virtual bool visit(cf_node &n, bool enter);
63 virtual bool visit(alu_node &n, bool enter);
64 virtual bool visit(alu_packed_node &n, bool enter);
65 virtual bool visit(fetch_node &n, bool enter);
66 virtual bool visit(region_node &n, bool enter);
67 virtual bool visit(repeat_node &n, bool enter);
68 virtual bool visit(depart_node &n, bool enter);
69 virtual bool visit(if_node &n, bool enter);
70 virtual bool visit(bb_node &n, bool enter);
71
72 };
73
74 class rev_vpass : public vpass {
75
76 public:
77 rev_vpass(shader &s) : vpass(s) {}
78
79 virtual void run_on(container_node &n);
80 };
81
82
83 // =================== PASSES
84
85 class bytecode;
86
87 class bc_dump : public vpass {
88 using vpass::visit;
89
90 std::ostream &o;
91
92 uint32_t *bc_data;
93 unsigned ndw;
94
95 unsigned id;
96
97 unsigned new_group, group_index;
98
99 public:
100
101 bc_dump(shader &s, std::ostream &o, bytecode *bc = NULL);
102
103 bc_dump(shader &s, std::ostream &o, uint32_t *bc_ptr, unsigned ndw) :
104 vpass(s), o(o), bc_data(bc_ptr), ndw(ndw), id(), new_group(), group_index() {}
105
106 virtual int init();
107 virtual int done();
108
109 virtual bool visit(cf_node &n, bool enter);
110 virtual bool visit(alu_node &n, bool enter);
111 virtual bool visit(fetch_node &n, bool enter);
112
113 void dump_dw(unsigned dw_id, unsigned count = 2);
114
115 void dump(cf_node& n);
116 void dump(alu_node& n);
117 void dump(fetch_node& n);
118 };
119
120
121 class dce_cleanup : public vpass {
122 using vpass::visit;
123
124 public:
125
126 dce_cleanup(shader &s) : vpass(s) {}
127
128 virtual bool visit(node &n, bool enter);
129 virtual bool visit(alu_group_node &n, bool enter);
130 virtual bool visit(cf_node &n, bool enter);
131 virtual bool visit(alu_node &n, bool enter);
132 virtual bool visit(alu_packed_node &n, bool enter);
133 virtual bool visit(fetch_node &n, bool enter);
134 virtual bool visit(region_node &n, bool enter);
135 virtual bool visit(container_node &n, bool enter);
136
137 private:
138
139 void cleanup_dst(node &n);
140 void cleanup_dst_vec(vvec &vv);
141
142 };
143
144
145 class def_use : public pass {
146
147 public:
148
149 def_use(shader &sh) : pass(sh) {}
150
151 virtual int run();
152 void run_on(node *n, bool defs);
153
154 private:
155
156 void process_uses(node *n);
157 void process_defs(node *n, vvec &vv, bool arr_def);
158 void process_phi(container_node *c, bool defs, bool uses);
159 };
160
161
162
163 class dump : public vpass {
164 using vpass::visit;
165
166 int level;
167
168 public:
169
170 dump(shader &s) : vpass(s), level(0) {}
171
172 virtual bool visit(node &n, bool enter);
173 virtual bool visit(container_node &n, bool enter);
174 virtual bool visit(alu_group_node &n, bool enter);
175 virtual bool visit(cf_node &n, bool enter);
176 virtual bool visit(alu_node &n, bool enter);
177 virtual bool visit(alu_packed_node &n, bool enter);
178 virtual bool visit(fetch_node &n, bool enter);
179 virtual bool visit(region_node &n, bool enter);
180 virtual bool visit(repeat_node &n, bool enter);
181 virtual bool visit(depart_node &n, bool enter);
182 virtual bool visit(if_node &n, bool enter);
183 virtual bool visit(bb_node &n, bool enter);
184
185
186 static void dump_op(node &n, const char *name);
187 static void dump_vec(const vvec & vv);
188 static void dump_set(shader &sh, val_set & v);
189
190 static void dump_rels(vvec & vv);
191
192 static void dump_val(value *v);
193 static void dump_op(node *n);
194
195 static void dump_op_list(container_node *c);
196 static void dump_queue(sched_queue &q);
197
198 static void dump_alu(alu_node *n);
199
200 private:
201
202 void indent();
203
204 void dump_common(node &n);
205 void dump_flags(node &n);
206
207 void dump_live_values(container_node &n, bool before);
208 };
209
210
211 // Global Code Motion
212
213 class gcm : public pass {
214
215 sched_queue bu_ready[SQ_NUM];
216 sched_queue bu_ready_next[SQ_NUM];
217 sched_queue bu_ready_early[SQ_NUM];
218 sched_queue ready;
219 sched_queue ready_above;
220
221 container_node pending;
222
223 struct op_info {
224 bb_node* top_bb;
225 bb_node* bottom_bb;
226 op_info() : top_bb(), bottom_bb() {}
227 };
228
229 typedef std::map<node*, op_info> op_info_map;
230
231 typedef std::map<node*, unsigned> nuc_map;
232
233 op_info_map op_map;
234 nuc_map uses;
235
236 typedef std::vector<nuc_map> nuc_stack;
237
238 nuc_stack nuc_stk;
239 unsigned ucs_level;
240
241 bb_node * bu_bb;
242
243 vvec pending_defs;
244
245 node_list pending_nodes;
246
247 unsigned cur_sq;
248
249 // for register pressure tracking in bottom-up pass
250 val_set live;
251 int live_count;
252
253 static const int rp_threshold = 100;
254
255 public:
256
257 gcm(shader &sh) : pass(sh),
258 bu_ready(), bu_ready_next(), bu_ready_early(),
259 ready(), op_map(), uses(), nuc_stk(1), ucs_level(),
260 bu_bb(), pending_defs(), pending_nodes(), cur_sq(),
261 live(), live_count() {}
262
263 virtual int run();
264
265 private:
266
267 void collect_instructions(container_node *c, bool early_pass);
268
269 void sched_early(container_node *n);
270 void td_sched_bb(bb_node *bb);
271 bool td_is_ready(node *n);
272 void td_release_uses(vvec &v);
273 void td_release_val(value *v);
274 void td_schedule(bb_node *bb, node *n);
275
276 void sched_late(container_node *n);
277 void bu_sched_bb(bb_node *bb);
278 void bu_release_defs(vvec &v, bool src);
279 void bu_release_phi_defs(container_node *p, unsigned op);
280 bool bu_is_ready(node *n);
281 void bu_release_val(value *v);
282 void bu_release_op(node * n);
283 void bu_find_best_bb(node *n, op_info &oi);
284 void bu_schedule(container_node *bb, node *n);
285
286 void push_uc_stack();
287 void pop_uc_stack();
288
289 void init_def_count(nuc_map &m, container_node &s);
290 void init_use_count(nuc_map &m, container_node &s);
291 unsigned get_uc_vec(vvec &vv);
292 unsigned get_dc_vec(vvec &vv, bool src);
293
294 void add_ready(node *n);
295
296 void dump_uc_stack();
297
298 unsigned real_alu_count(sched_queue &q, unsigned max);
299
300 // check if we have not less than threshold ready alu instructions
301 bool check_alu_ready_count(unsigned threshold);
302 };
303
304
305 class gvn : public vpass {
306 using vpass::visit;
307
308 public:
309
310 gvn(shader &sh) : vpass(sh) {}
311
312 virtual bool visit(node &n, bool enter);
313 virtual bool visit(cf_node &n, bool enter);
314 virtual bool visit(alu_node &n, bool enter);
315 virtual bool visit(alu_packed_node &n, bool enter);
316 virtual bool visit(fetch_node &n, bool enter);
317 virtual bool visit(region_node &n, bool enter);
318
319 private:
320
321 void process_op(node &n, bool rewrite = true);
322
323 // returns true if the value was rewritten
324 bool process_src(value* &v, bool rewrite);
325
326
327 void process_alu_src_constants(node &n, value* &v);
328 };
329
330
331 class if_conversion : public pass {
332
333 public:
334
335 if_conversion(shader &sh) : pass(sh) {}
336
337 virtual int run();
338
339 bool run_on(region_node *r);
340
341 alu_node* convert_phi(value *select, node *phi);
342
343 unsigned try_convert_kills(region_node* r);
344
345 };
346
347
348 class liveness : public rev_vpass {
349 using vpass::visit;
350
351 val_set live;
352 bool live_changed;
353
354 public:
355
356 liveness(shader &s) : rev_vpass(s), live_changed(false) {}
357
358 virtual int init();
359
360 virtual bool visit(node &n, bool enter);
361 virtual bool visit(bb_node &n, bool enter);
362 virtual bool visit(container_node &n, bool enter);
363 virtual bool visit(alu_group_node &n, bool enter);
364 virtual bool visit(cf_node &n, bool enter);
365 virtual bool visit(alu_node &n, bool enter);
366 virtual bool visit(alu_packed_node &n, bool enter);
367 virtual bool visit(fetch_node &n, bool enter);
368 virtual bool visit(region_node &n, bool enter);
369 virtual bool visit(repeat_node &n, bool enter);
370 virtual bool visit(depart_node &n, bool enter);
371 virtual bool visit(if_node &n, bool enter);
372
373 private:
374
375 void update_interferences();
376 void process_op(node &n);
377
378 bool remove_val(value *v);
379 bool remove_vec(vvec &v);
380 bool process_outs(node& n);
381 void process_ins(node& n);
382
383 void process_phi_outs(container_node *phi);
384 void process_phi_branch(container_node *phi, unsigned id);
385
386 bool process_maydef(value *v);
387
388 bool add_vec(vvec &vv, bool src);
389
390 void update_src_vec(vvec &vv, bool src);
391 };
392
393
394 struct bool_op_info {
395 bool invert;
396 unsigned int_cvt;
397
398 alu_node *n;
399 };
400
401 class peephole : public pass {
402
403 public:
404
405 peephole(shader &sh) : pass(sh) {}
406
407 virtual int run();
408
409 void run_on(container_node *c);
410
411 void optimize_cc_op(alu_node *a);
412
413 void optimize_SETcc_op(alu_node *a);
414 void optimize_CNDcc_op(alu_node *a);
415
416 bool get_bool_op_info(value *b, bool_op_info& bop);
417 bool get_bool_flt_to_int_source(alu_node* &a);
418 void convert_float_setcc(alu_node *f2i, alu_node *s);
419 };
420
421
422 class psi_ops : public rev_vpass {
423 using rev_vpass::visit;
424
425 public:
426
427 psi_ops(shader &s) : rev_vpass(s) {}
428
429 virtual bool visit(node &n, bool enter);
430 virtual bool visit(alu_node &n, bool enter);
431
432 bool try_inline(node &n);
433 bool try_reduce(node &n);
434 bool eliminate(node &n);
435
436 void unpredicate(node *n);
437 };
438
439
440 // check correctness of the generated code, e.g.:
441 // - expected source operand value is the last value written to its gpr,
442 // - all arguments of phi node should be allocated to the same gpr,
443 // TODO other tests
444 class ra_checker : public pass {
445
446 typedef std::map<sel_chan, value *> reg_value_map;
447
448 typedef std::vector<reg_value_map> regmap_stack;
449
450 regmap_stack rm_stack;
451 unsigned rm_stk_level;
452
453 value* prev_dst[5];
454
455 public:
456
457 ra_checker(shader &sh) : pass(sh) {}
458
459 virtual int run();
460
461 void run_on(container_node *c);
462
463 void dump_error(const error_info &e);
464 void dump_all_errors();
465
466 private:
467
468 reg_value_map& rmap() { return rm_stack[rm_stk_level]; }
469
470 void push_stack();
471 void pop_stack();
472
473 // when going out of the alu clause, values in the clause temporary gprs,
474 // AR, predicate values, PS/PV are destroyed
475 void kill_alu_only_regs();
476 void error(node *n, unsigned id, std::string msg);
477
478 void check_phi_src(container_node *p, unsigned id);
479 void process_phi_dst(container_node *p);
480 void check_alu_group(alu_group_node *g);
481 void process_op_dst(node *n);
482 void check_op_src(node *n);
483 void check_src_vec(node *n, unsigned id, vvec &vv, bool src);
484 void check_value_gpr(node *n, unsigned id, value *v);
485 };
486
487 // =======================================
488
489
490 class ra_coalesce : public pass {
491
492 public:
493
494 ra_coalesce(shader &sh) : pass(sh) {}
495
496 virtual int run();
497 };
498
499
500
501 // =======================================
502
503 class ra_init : public pass {
504
505 public:
506
507 ra_init(shader &sh) : pass(sh) {}
508
509 virtual int run();
510
511 private:
512
513 void ra_node(container_node *c);
514 void process_op(node *n);
515
516 void color(value *v);
517
518 void color_bs_constraint(ra_constraint *c);
519
520 void assign_color(value *v, sel_chan c);
521 void alloc_arrays();
522 };
523
524 // =======================================
525
526 class ra_split : public pass {
527
528 public:
529
530 ra_split(shader &sh) : pass(sh) {}
531
532 virtual int run();
533
534 void split(container_node *n);
535 void split_op(node *n);
536 void split_alu_packed(alu_packed_node *n);
537 void split_vector_inst(node *n);
538
539 void split_packed_ins(alu_packed_node *n);
540
541 #if 0
542 void split_pinned_outs(node *n);
543 #endif
544
545 void split_vec(vvec &vv, vvec &v1, vvec &v2, bool allow_swz);
546
547 void split_phi_src(container_node *loc, container_node *c, unsigned id,
548 bool loop);
549 void split_phi_dst(node *loc, container_node *c, bool loop);
550 void init_phi_constraints(container_node *c);
551 };
552
553
554
555 class ssa_prepare : public vpass {
556 using vpass::visit;
557
558 typedef std::vector<val_set> vd_stk;
559 vd_stk stk;
560
561 unsigned level;
562
563 public:
564 ssa_prepare(shader &s) : vpass(s), level(0) {}
565
566 virtual bool visit(cf_node &n, bool enter);
567 virtual bool visit(alu_node &n, bool enter);
568 virtual bool visit(fetch_node &n, bool enter);
569 virtual bool visit(region_node &n, bool enter);
570 virtual bool visit(repeat_node &n, bool enter);
571 virtual bool visit(depart_node &n, bool enter);
572
573 private:
574
575 void push_stk() {
576 ++level;
577 if (level + 1 > stk.size())
578 stk.resize(level+1);
579 else
580 stk[level].clear();
581 }
582 void pop_stk() {
583 assert(level);
584 --level;
585 stk[level].add_set(stk[level + 1]);
586 }
587
588 void add_defs(node &n);
589
590 val_set & cur_set() { return stk[level]; }
591
592 container_node* create_phi_nodes(int count);
593 };
594
595 class ssa_rename : public vpass {
596 using vpass::visit;
597
598 typedef sb_map<value*, unsigned> def_map;
599
600 def_map def_count;
601 std::stack<def_map> rename_stack;
602
603 typedef std::map<uint32_t, value*> val_map;
604 val_map values;
605
606 public:
607
608 ssa_rename(shader &s) : vpass(s) {}
609
610 virtual int init();
611
612 virtual bool visit(container_node &n, bool enter);
613 virtual bool visit(node &n, bool enter);
614 virtual bool visit(alu_group_node &n, bool enter);
615 virtual bool visit(cf_node &n, bool enter);
616 virtual bool visit(alu_node &n, bool enter);
617 virtual bool visit(alu_packed_node &n, bool enter);
618 virtual bool visit(fetch_node &n, bool enter);
619 virtual bool visit(region_node &n, bool enter);
620 virtual bool visit(repeat_node &n, bool enter);
621 virtual bool visit(depart_node &n, bool enter);
622 virtual bool visit(if_node &n, bool enter);
623
624 private:
625
626 void push(node *phi);
627 void pop();
628
629 unsigned get_index(def_map& m, value* v);
630 void set_index(def_map& m, value* v, unsigned index);
631 unsigned new_index(def_map& m, value* v);
632
633 value* rename_use(node *n, value* v);
634 value* rename_def(node *def, value* v);
635
636 void rename_src_vec(node *n, vvec &vv, bool src);
637 void rename_dst_vec(node *def, vvec &vv, bool set_def);
638
639 void rename_src(node *n);
640 void rename_dst(node *n);
641
642 void rename_phi_args(container_node *phi, unsigned op, bool def);
643
644 void rename_virt(node *n);
645 void rename_virt_val(node *n, value *v);
646 };
647
648 class bc_finalizer : public pass {
649
650 cf_node *last_export[EXP_TYPE_COUNT];
651 cf_node *last_cf;
652
653 unsigned ngpr;
654 unsigned nstack;
655
656 public:
657
658 bc_finalizer(shader &sh) : pass(sh), last_export(), last_cf(), ngpr(),
659 nstack() {}
660
661 virtual int run();
662
663 void finalize_loop(region_node *r);
664 void finalize_if(region_node *r);
665
666 void run_on(container_node *c);
667
668 void finalize_alu_group(alu_group_node *g);
669 void finalize_alu_src(alu_group_node *g, alu_node *a);
670
671 void emit_set_grad(fetch_node* f);
672 void finalize_fetch(fetch_node *f);
673
674 void finalize_cf(cf_node *c);
675
676 sel_chan translate_kcache(cf_node *alu, value *v);
677
678 void update_ngpr(unsigned gpr);
679 void update_nstack(region_node *r, unsigned add = 0);
680
681 void cf_peephole();
682
683 };
684
685
686 } // namespace r600_sb
687
688 #endif /* SB_PASS_H_ */