r600g/sb: use simple heuristic to limit register pressure
authorVadim Girlin <vadimgirlin@gmail.com>
Sat, 27 Apr 2013 08:03:39 +0000 (12:03 +0400)
committerVadim Girlin <vadimgirlin@gmail.com>
Tue, 30 Apr 2013 17:50:48 +0000 (21:50 +0400)
It's not a complete register pressure tracking, yet it helps to prevent
register allocation problems in some cases where they were observed.

The problems are uncovered by false dependencies between fetch instructions
introduced by some recent changes in TGSI and/or default backend.
Sometimes we have code like this:

...
SAMPLE R5.xyzw, R5.xyzw
... store R5.xyzw somewhere
MOV R5.x, <next x coord>
MOV R5.y, <next y coord>
SAMPLE R5.xyzw, R5.xyzw
... <may be repeated a lot of times>

With 2D resources, z and w in SAMPLE src reg aren't used and can be simply
masked, but shader backend doesn't have this information, so it's
considered as data dependency by optimization algorithms.

src/gallium/drivers/r600/sb/sb_gcm.cpp
src/gallium/drivers/r600/sb/sb_pass.h

index b6d20430750d6e1b0b45bbb226aa5ce8f88831a7..2bae0a357260f0ae4d5862249c43a60e7665cd9e 100644 (file)
@@ -368,7 +368,7 @@ void gcm::bu_sched_bb(bb_node* bb) {
 
                        cnt_ready[sq] = bu_ready[sq].size();
 
-                       if ((sq == SQ_TEX || sq == SQ_VTX) &&
+                       if ((sq == SQ_TEX || sq == SQ_VTX) && live_count <= rp_threshold &&
                                        cnt_ready[sq] < ctx.max_fetch/2 &&
                                        !bu_ready_next[SQ_ALU].empty()) {
                                sq = SQ_ALU;
@@ -384,6 +384,16 @@ void gcm::bu_sched_bb(bb_node* bb) {
                                        last_inst_type = sq;
                                }
 
+                               // simple heuristic to limit register pressure,
+                               if (sq == SQ_ALU && live_count > rp_threshold &&
+                                               (!bu_ready[SQ_TEX].empty() ||
+                                                !bu_ready[SQ_VTX].empty() ||
+                                                !bu_ready_next[SQ_TEX].empty() ||
+                                                !bu_ready_next[SQ_VTX].empty())) {
+                                       GCM_DUMP( cerr << "switching to fetch (regpressure)\n"; );
+                                       break;
+                               }
+
                                n = bu_ready[sq].front();
 
                                // real count (e.g. SAMPLE_G will be expanded to 3 instructions,
@@ -442,6 +452,11 @@ void gcm::bu_release_defs(vvec& v, bool src) {
                        bu_release_defs(v->muse, true);
                } else if (src)
                        bu_release_val(v);
+               else {
+                       if (live.remove_val(v)) {
+                               --live_count;
+                       }
+               }
        }
 }
 
@@ -586,6 +601,8 @@ void gcm::add_ready(node *n) {
        sched_queue_id sq = sh.get_queue_id(n);
        if (n->flags & NF_SCHEDULE_EARLY)
                bu_ready_early[sq].push_back(n);
+       else if (sq == SQ_ALU && n->is_copy_mov())
+               bu_ready[sq].push_front(n);
        else
                bu_ready_next[sq].push_back(n);
 }
@@ -665,9 +682,15 @@ void gcm::bu_release_val(value* v) {
        node *n = v->any_def();
 
        if (n && n->parent == &pending) {
-               unsigned uc = ++nuc_stk[ucs_level][n];
+               nuc_map &m = nuc_stk[ucs_level];
+               unsigned uc = ++m[n];
                unsigned uc2 = uses[n];
 
+               if (live.add_val(v)) {
+                       ++live_count;
+                       GCM_DUMP ( cerr << "live_count: " << live_count << "\n"; );
+               }
+
                GCM_DUMP(
                        cerr << "release val ";
                        dump::dump_val(v);
index ac0a51777e92f8d1a1611336f0a6b60ef23d9d3f..a8a657fbb6fb511ea58e2010ec0baf2aabc48193 100644 (file)
@@ -246,12 +246,19 @@ class gcm : public pass {
 
        unsigned cur_sq;
 
+       // for register pressure tracking in bottom-up pass
+       val_set live;
+       int live_count;
+
+       static const int rp_threshold = 100;
+
 public:
 
        gcm(shader &sh) : pass(sh),
                bu_ready(), bu_ready_next(), bu_ready_early(),
                ready(), op_map(), uses(), nuc_stk(1), ucs_level(),
-               bu_bb(), pending_defs(), pending_nodes() {}
+               bu_bb(), pending_defs(), pending_nodes(), cur_sq(),
+               live(), live_count() {}
 
        virtual int run();