From: Eric Anholt <eric@anholt.net>
Date: Tue, 19 Nov 2013 21:07:12 +0000 (-0800)
Subject: i965/fs: Make the first pre-allocation heuristic be the post heuristic.
X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=46cf80fb366cb14827724a7fea004e81400cc602;p=mesa.git

i965/fs: Make the first pre-allocation heuristic be the post heuristic.

I recently made us try two different things that tried to reduce register
pressure so that we would be more likely to allocate successfully.  But
now that we have the logic for trying two, we can make the first thing we
try be the normal, not-prioritizing-register-pressure heuristic.

This means one less scheduling pass in the common case of that heuristic
not producing spills, plus the best schedule we know how to produce, if
that one happens to succeed.  This is important, because our register
allocation produces a lot of possibly avoidable dependencies for the
post-register-allocation schedule, despite ra_set_allocate_round_robin().

GLB2.7: 1.04127% +/- 0.732461% fps improvement (n=31)
nexuiz: No difference (n=5)
lightsmark: 0.838512% +/- 0.300147% fps improvement (n=86)
minecraft apitrace: No difference (n=15)

Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
---

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index d004521facd..261f9061540 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -3192,6 +3192,7 @@ fs_visitor::run()
 {
    sanity_param_count = fp->Base.Parameters->NumParameters;
    uint32_t orig_nr_params = c->prog_data.nr_params;
+   bool allocated_without_spills;
 
    assign_binding_table_offsets();
 
@@ -3276,27 +3277,45 @@ fs_visitor::run()
       assign_curb_setup();
       assign_urb_setup();
 
-      schedule_instructions(SCHEDULE_PRE_NON_LIFO);
+      static enum instruction_scheduler_mode pre_modes[] = {
+         SCHEDULE_PRE,
+         SCHEDULE_PRE_NON_LIFO,
+         SCHEDULE_PRE_LIFO,
+      };
 
-      if (0)
-	 assign_regs_trivial();
-      else {
-         if (!assign_regs(false)) {
-            /* Try a non-spilling register allocation again with a different
-             * scheduling heuristic.
-             */
-            schedule_instructions(SCHEDULE_PRE_LIFO);
-            if (!assign_regs(false)) {
-               if (dispatch_width == 16) {
-                  fail("Failure to register allocate.  Reduce number of "
-                       "live scalar values to avoid this.");
-               } else {
-                  while (!assign_regs(true)) {
-                     if (failed)
-                        break;
-                  }
-               }
-            }
+      /* Try each scheduling heuristic to see if it can successfully register
+       * allocate without spilling.  They should be ordered by decreasing
+       * performance but increasing likelihood of allocating.
+       */
+      for (unsigned i = 0; i < ARRAY_SIZE(pre_modes); i++) {
+         schedule_instructions(pre_modes[i]);
+
+         if (0) {
+            assign_regs_trivial();
+            allocated_without_spills = true;
+         } else {
+            allocated_without_spills = assign_regs(false);
+         }
+         if (allocated_without_spills)
+            break;
+      }
+
+      if (!allocated_without_spills) {
+         /* We assume that any spilling is worse than just dropping back to
+          * SIMD8.  There's probably actually some intermediate point where
+          * SIMD16 with a couple of spills is still better.
+          */
+         if (dispatch_width == 16) {
+            fail("Failure to register allocate.  Reduce number of "
+                 "live scalar values to avoid this.");
+         }
+
+         /* Since we're out of heuristics, just go spill registers until we
+          * get an allocation.
+          */
+         while (!assign_regs(true)) {
+            if (failed)
+               break;
          }
       }
    }
@@ -3311,7 +3330,8 @@ fs_visitor::run()
    if (failed)
       return false;
 
-   schedule_instructions(SCHEDULE_POST);
+   if (!allocated_without_spills)
+      schedule_instructions(SCHEDULE_POST);
 
    if (dispatch_width == 8) {
       c->prog_data.reg_blocks = brw_register_blocks(grf_used);
diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
index 1050d674d8c..baf67fb1ea2 100644
--- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
@@ -1140,10 +1140,10 @@ fs_instruction_scheduler::choose_instruction_to_schedule()
 {
    schedule_node *chosen = NULL;
 
-   if (post_reg_alloc) {
+   if (mode == SCHEDULE_PRE || mode == SCHEDULE_POST) {
       int chosen_time = 0;
 
-      /* Of the instructions closest ready to execute or the closest to
+      /* Of the instructions ready to execute or the closest to
        * being ready, choose the oldest one.
        */
       foreach_list(node, &instructions) {
diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h
index ae7823e4064..ff5af932e08 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.h
+++ b/src/mesa/drivers/dri/i965/brw_shader.h
@@ -60,6 +60,7 @@ public:
 };
 
 enum instruction_scheduler_mode {
+   SCHEDULE_PRE,
    SCHEDULE_PRE_NON_LIFO,
    SCHEDULE_PRE_LIFO,
    SCHEDULE_POST,