i965/fs: Make the first pre-allocation heuristic be the post heuristic.
authorEric Anholt <eric@anholt.net>
Tue, 19 Nov 2013 21:07:12 +0000 (13:07 -0800)
committerEric Anholt <eric@anholt.net>
Sat, 23 Nov 2013 00:36:27 +0000 (16:36 -0800)
I recently made us try two different things that tried to reduce register
pressure so that we would be more likely to allocate successfully.  But
now that we have the logic for trying two, we can make the first thing we
try be the normal, not-prioritizing-register-pressure heuristic.

This means one less scheduling pass in the common case of that heuristic
not producing spills, plus the best schedule we know how to produce, if
that one happens to succeed.  This is important, because our register
allocation produces a lot of possibly avoidable dependencies for the
post-register-allocation schedule, despite ra_set_allocate_round_robin().

GLB2.7: 1.04127% +/- 0.732461% fps improvement (n=31)
nexuiz: No difference (n=5)
lightsmark: 0.838512% +/- 0.300147% fps improvement (n=86)
minecraft apitrace: No difference (n=15)

Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
src/mesa/drivers/dri/i965/brw_fs.cpp
src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
src/mesa/drivers/dri/i965/brw_shader.h

index d004521facd915c9a7c87554f37c3f919a2d5e1e..261f9061540d3478561c8f2562ace2f0b68568be 100644 (file)
@@ -3192,6 +3192,7 @@ fs_visitor::run()
 {
    sanity_param_count = fp->Base.Parameters->NumParameters;
    uint32_t orig_nr_params = c->prog_data.nr_params;
+   bool allocated_without_spills;
 
    assign_binding_table_offsets();
 
@@ -3276,27 +3277,45 @@ fs_visitor::run()
       assign_curb_setup();
       assign_urb_setup();
 
-      schedule_instructions(SCHEDULE_PRE_NON_LIFO);
+      static enum instruction_scheduler_mode pre_modes[] = {
+         SCHEDULE_PRE,
+         SCHEDULE_PRE_NON_LIFO,
+         SCHEDULE_PRE_LIFO,
+      };
 
-      if (0)
-        assign_regs_trivial();
-      else {
-         if (!assign_regs(false)) {
-            /* Try a non-spilling register allocation again with a different
-             * scheduling heuristic.
-             */
-            schedule_instructions(SCHEDULE_PRE_LIFO);
-            if (!assign_regs(false)) {
-               if (dispatch_width == 16) {
-                  fail("Failure to register allocate.  Reduce number of "
-                       "live scalar values to avoid this.");
-               } else {
-                  while (!assign_regs(true)) {
-                     if (failed)
-                        break;
-                  }
-               }
-            }
+      /* Try each scheduling heuristic to see if it can successfully register
+       * allocate without spilling.  They should be ordered by decreasing
+       * performance but increasing likelihood of allocating.
+       */
+      for (unsigned i = 0; i < ARRAY_SIZE(pre_modes); i++) {
+         schedule_instructions(pre_modes[i]);
+
+         if (0) {
+            assign_regs_trivial();
+            allocated_without_spills = true;
+         } else {
+            allocated_without_spills = assign_regs(false);
+         }
+         if (allocated_without_spills)
+            break;
+      }
+
+      if (!allocated_without_spills) {
+         /* We assume that any spilling is worse than just dropping back to
+          * SIMD8.  There's probably actually some intermediate point where
+          * SIMD16 with a couple of spills is still better.
+          */
+         if (dispatch_width == 16) {
+            fail("Failure to register allocate.  Reduce number of "
+                 "live scalar values to avoid this.");
+         }
+
+         /* Since we're out of heuristics, just go spill registers until we
+          * get an allocation.
+          */
+         while (!assign_regs(true)) {
+            if (failed)
+               break;
          }
       }
    }
@@ -3311,7 +3330,8 @@ fs_visitor::run()
    if (failed)
       return false;
 
-   schedule_instructions(SCHEDULE_POST);
+   if (!allocated_without_spills)
+      schedule_instructions(SCHEDULE_POST);
 
    if (dispatch_width == 8) {
       c->prog_data.reg_blocks = brw_register_blocks(grf_used);
index 1050d674d8c99b03ffabe6f253e780a7d08b2249..baf67fb1ea26252f110148cd52a31c5853a35a43 100644 (file)
@@ -1140,10 +1140,10 @@ fs_instruction_scheduler::choose_instruction_to_schedule()
 {
    schedule_node *chosen = NULL;
 
-   if (post_reg_alloc) {
+   if (mode == SCHEDULE_PRE || mode == SCHEDULE_POST) {
       int chosen_time = 0;
 
-      /* Of the instructions closest ready to execute or the closest to
+      /* Of the instructions ready to execute or the closest to
        * being ready, choose the oldest one.
        */
       foreach_list(node, &instructions) {
index ae7823e4064598086e4e105d33a8fd83b84dc43c..ff5af932e087fdb3a520a1710ac102dfc8d44f41 100644 (file)
@@ -60,6 +60,7 @@ public:
 };
 
 enum instruction_scheduler_mode {
+   SCHEDULE_PRE,
    SCHEDULE_PRE_NON_LIFO,
    SCHEDULE_PRE_LIFO,
    SCHEDULE_POST,