i965/vec4: Improve CSE performance by expiring some available expressions.
authorMatt Turner <mattst88@gmail.com>
Wed, 11 Jun 2014 20:43:15 +0000 (13:43 -0700)
committerMatt Turner <mattst88@gmail.com>
Mon, 7 Jul 2014 01:18:52 +0000 (18:18 -0700)
Port of commit 5daf867f to the vec4 code.

src/mesa/drivers/dri/i965/brw_vec4_cse.cpp

index 11e911f2b64cbea9b6afefc80bb3e7f3a4e9999a..296142b740f8ad38b8ac0d87dc1ba030271717d5 100644 (file)
@@ -128,6 +128,7 @@ vec4_visitor::opt_cse_local(bblock_t *block, exec_list *aeb)
 
    void *cse_ctx = ralloc_context(NULL);
 
+   int ip = block->start_ip;
    for (vec4_instruction *inst = (vec4_instruction *)block->start;
         inst != block->end->next;
         inst = (vec4_instruction *) inst->next) {
@@ -193,6 +194,8 @@ vec4_visitor::opt_cse_local(bblock_t *block, exec_list *aeb)
 
       foreach_in_list_safe(aeb_entry, entry, aeb) {
          for (int i = 0; i < 3; i++) {
+            src_reg *src = &entry->generator->src[i];
+
             /* Kill all AEB entries that use the destination we just
              * overwrote.
              */
@@ -202,8 +205,23 @@ vec4_visitor::opt_cse_local(bblock_t *block, exec_list *aeb)
                ralloc_free(entry);
                break;
             }
+
+            /* Kill any AEB entries using registers that don't get reused any
+             * more -- a sure sign they'll fail operands_match().
+             */
+            int last_reg_use = MAX2(MAX2(virtual_grf_end[src->reg * 4 + 0],
+                                         virtual_grf_end[src->reg * 4 + 1]),
+                                    MAX2(virtual_grf_end[src->reg * 4 + 2],
+                                         virtual_grf_end[src->reg * 4 + 3]));
+            if (src->file == GRF && last_reg_use < ip) {
+               entry->remove();
+               ralloc_free(entry);
+               break;
+            }
          }
       }
+
+      ip++;
    }
 
    ralloc_free(cse_ctx);
@@ -219,6 +237,8 @@ vec4_visitor::opt_cse()
 {
    bool progress = false;
 
+   calculate_live_intervals();
+
    cfg_t cfg(&instructions);
 
    for (int b = 0; b < cfg.num_blocks; b++) {