i965/fs: Try a different pre-scheduling heuristic if the first spills.

author Eric Anholt <eric@anholt.net>

Thu, 7 Nov 2013 01:38:23 +0000 (17:38 -0800)

committer Eric Anholt <eric@anholt.net>

Tue, 12 Nov 2013 23:06:28 +0000 (15:06 -0800)
author Eric Anholt <eric@anholt.net>
Thu, 7 Nov 2013 01:38:23 +0000 (17:38 -0800)
committer Eric Anholt <eric@anholt.net>
Tue, 12 Nov 2013 23:06:28 +0000 (15:06 -0800)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp

index afa82c9abbf23ba01c3e301d88e7d44842e05ae8..f89390c346c26d0f52ac227b955b8da4dcd89055 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -3286,15 +3286,28 @@ fs_visitor::run()
        assign_curb_setup();
        assign_urb_setup();
  
-      schedule_instructions(false);
+      schedule_instructions(SCHEDULE_PRE_NON_LIFO);
  
        if (0)
          assign_regs_trivial();
        else {
-        while (!assign_regs()) {
-           if (failed)
-              break;
-        }
+         if (!assign_regs(false)) {
+            /* Try a non-spilling register allocation again with a different
+             * scheduling heuristic.
+             */
+            schedule_instructions(SCHEDULE_PRE_LIFO);
+            if (!assign_regs(false)) {
+               if (dispatch_width == 16) {
+                  fail("Failure to register allocate.  Reduce number of "
+                       "live scalar values to avoid this.");
+               } else {
+                  while (!assign_regs(true)) {
+                     if (failed)
+                        break;
+                  }
+               }
+            }
+         }
        }
     }
     assert(force_uncompressed_stack == 0);
@@ -3309,7 +3322,7 @@ fs_visitor::run()
     if (failed)
        return false;
  
-   schedule_instructions(true);
+   schedule_instructions(SCHEDULE_POST);
  
     if (dispatch_width == 8) {
        c->prog_data.reg_blocks = brw_register_blocks(grf_used);
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h

index dcd5b19e4d7e9f093f9022887135486a564b1f75..529bd3a558acbab86f3ada4f372ece8a3d5b41b1 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -291,7 +291,7 @@ public:
     void assign_curb_setup();
     void calculate_urb_setup();
     void assign_urb_setup();
-   bool assign_regs();
+   bool assign_regs(bool allow_spilling);
     void assign_regs_trivial();
     void get_used_mrfs(bool *mrf_used);
     void setup_payload_interference(struct ra_graph *g, int payload_reg_count,
@@ -322,7 +322,7 @@ public:
     bool remove_dead_constants();
     bool remove_duplicate_mrf_writes();
     bool virtual_grf_interferes(int a, int b);
-   void schedule_instructions(bool post_reg_alloc);
+   void schedule_instructions(instruction_scheduler_mode mode);
     void insert_gen4_send_dependency_workarounds();
     void insert_gen4_pre_send_dependency_workarounds(fs_inst *inst);
     void insert_gen4_post_send_dependency_workarounds(fs_inst *inst);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp

index d9e80d07f4855aa10c1f5677d06c2cd422a414e7..8567afd3c1640faedcdd7045967b40a5df08d9d6 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -417,7 +417,7 @@ fs_visitor::setup_mrf_hack_interference(struct ra_graph *g, int first_mrf_node)
  }
  
  bool
-fs_visitor::assign_regs()
+fs_visitor::assign_regs(bool allow_spilling)
  {
     /* Most of this allocation was written for a reg_width of 1
      * (dispatch_width == 8).  In extending to 16-wide, the code was
@@ -496,14 +496,10 @@ fs_visitor::assign_regs()
        if (reg == -1) {
           fail("no register to spill:\n");
           dump_instructions();
-      } else if (dispatch_width == 16) {
-        fail("Failure to register allocate.  Reduce number of live scalar "
-              "values to avoid this.");
-      } else {
-        spill_reg(reg);
+      } else if (allow_spilling) {
+         spill_reg(reg);
        }
  
-
        ralloc_free(g);
  
        return false;
diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp

index 5710380f12eaff843a903801d0ae12588f7cc03e..befea0a787d4cc083b078566387ddd98c906f030 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
@@ -391,14 +391,16 @@ schedule_node::set_latency_gen7(bool is_haswell)
  
  class instruction_scheduler {
  public:
-   instruction_scheduler(backend_visitor *v, int grf_count, bool post_reg_alloc)
+   instruction_scheduler(backend_visitor *v, int grf_count,
+                         instruction_scheduler_mode mode)
     {
        this->bv = v;
        this->mem_ctx = ralloc_context(NULL);
        this->grf_count = grf_count;
        this->instructions.make_empty();
        this->instructions_to_schedule = 0;
-      this->post_reg_alloc = post_reg_alloc;
+      this->post_reg_alloc = (mode == SCHEDULE_POST);
+      this->mode = mode;
        this->time = 0;
        if (!post_reg_alloc) {
           this->remaining_grf_uses = rzalloc_array(mem_ctx, int, grf_count);
@@ -447,6 +449,8 @@ public:
     exec_list instructions;
     backend_visitor *bv;
  
+   instruction_scheduler_mode mode;
+
     /**
      * Number of instructions left to schedule that reference each vgrf.
      *
@@ -467,7 +471,8 @@ public:
  class fs_instruction_scheduler : public instruction_scheduler
  {
  public:
-   fs_instruction_scheduler(fs_visitor *v, int grf_count, bool post_reg_alloc);
+   fs_instruction_scheduler(fs_visitor *v, int grf_count,
+                            instruction_scheduler_mode mode);
     void calculate_deps();
     bool is_compressed(fs_inst *inst);
     schedule_node *choose_instruction_to_schedule();
@@ -481,8 +486,8 @@ public:
  
  fs_instruction_scheduler::fs_instruction_scheduler(fs_visitor *v,
                                                     int grf_count,
-                                                   bool post_reg_alloc)
-   : instruction_scheduler(v, grf_count, post_reg_alloc),
+                                                   instruction_scheduler_mode mode)
+   : instruction_scheduler(v, grf_count, mode),
       v(v)
  {
  }
@@ -569,7 +574,7 @@ public:
  
  vec4_instruction_scheduler::vec4_instruction_scheduler(vec4_visitor *v,
                                                         int grf_count)
-   : instruction_scheduler(v, grf_count, true),
+   : instruction_scheduler(v, grf_count, SCHEDULE_POST),
       v(v)
  {
  }
@@ -1179,40 +1184,42 @@ fs_instruction_scheduler::choose_instruction_to_schedule()
              continue;
           }
  
-         /* Prefer instructions that recently became available for scheduling.
-          * These are the things that are most likely to (eventually) make a
-          * variable dead and reduce register pressure.  Typical register
-          * pressure estimates don't work for us because most of our pressure
-          * comes from texturing, where no single instruction to schedule will
-          * make a vec4 value dead.
-          */
-         if (n->cand_generation > chosen->cand_generation) {
-            chosen = n;
-            continue;
-         } else if (n->cand_generation < chosen->cand_generation) {
-            continue;
-         }
-
-         /* On MRF-using chips, prefer non-SEND instructions.  If we don't do
-          * this, then because we prefer instructions that just became
-          * candidates, we'll end up in a pattern of scheduling a SEND, then
-          * the MRFs for the next SEND, then the next SEND, then the MRFs,
-          * etc., without ever consuming the results of a send.
-          */
-         if (v->brw->gen < 7) {
-            fs_inst *chosen_inst = (fs_inst *)chosen->inst;
-
-            /* We use regs_written > 1 as our test for the kind of send
-             * instruction to avoid -- only sends generate many regs, and a
-             * single-result send is probably actually reducing register
-             * pressure.
+         if (mode == SCHEDULE_PRE_LIFO) {
+            /* Prefer instructions that recently became available for
+             * scheduling.  These are the things that are most likely to
+             * (eventually) make a variable dead and reduce register pressure.
+             * Typical register pressure estimates don't work for us because
+             * most of our pressure comes from texturing, where no single
+             * instruction to schedule will make a vec4 value dead.
               */
-            if (inst->regs_written <= 1 && chosen_inst->regs_written > 1) {
+            if (n->cand_generation > chosen->cand_generation) {
                 chosen = n;
                 continue;
-            } else if (inst->regs_written > chosen_inst->regs_written) {
+            } else if (n->cand_generation < chosen->cand_generation) {
                 continue;
              }
+
+            /* On MRF-using chips, prefer non-SEND instructions.  If we don't
+             * do this, then because we prefer instructions that just became
+             * candidates, we'll end up in a pattern of scheduling a SEND,
+             * then the MRFs for the next SEND, then the next SEND, then the
+             * MRFs, etc., without ever consuming the results of a send.
+             */
+            if (v->brw->gen < 7) {
+               fs_inst *chosen_inst = (fs_inst *)chosen->inst;
+
+               /* We use regs_written > 1 as our test for the kind of send
+                * instruction to avoid -- only sends generate many regs, and a
+                * single-result send is probably actually reducing register
+                * pressure.
+                */
+               if (inst->regs_written <= 1 && chosen_inst->regs_written > 1) {
+                  chosen = n;
+                  continue;
+               } else if (inst->regs_written > chosen_inst->regs_written) {
+                  continue;
+               }
+            }
           }
  
           /* For instructions pushed on the cands list at the same time, prefer
@@ -1407,18 +1414,18 @@ instruction_scheduler::run(exec_list *all_instructions)
  }
  
  void
-fs_visitor::schedule_instructions(bool post_reg_alloc)
+fs_visitor::schedule_instructions(instruction_scheduler_mode mode)
  {
     int grf_count;
-   if (post_reg_alloc)
+   if (mode == SCHEDULE_POST)
        grf_count = grf_used;
     else
        grf_count = virtual_grf_count;
  
-   fs_instruction_scheduler sched(this, grf_count, post_reg_alloc);
+   fs_instruction_scheduler sched(this, grf_count, mode);
     sched.run(&instructions);
  
-   if (unlikely(INTEL_DEBUG & DEBUG_WM) && post_reg_alloc) {
+   if (unlikely(INTEL_DEBUG & DEBUG_WM) && mode == SCHEDULE_POST) {
        printf("fs%d estimated execution time: %d cycles\n",
               dispatch_width, sched.time);
     }
diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h

index 88c23115e08c4a5b3b51a011e78a78a4f62a6362..aba24c58b627786692823cd5a967e4092a6ee6b1 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_shader.h
+++ b/src/mesa/drivers/dri/i965/brw_shader.h
@@ -59,6 +59,12 @@ public:
     bool predicate_inverse;
  };
  
+enum instruction_scheduler_mode {
+   SCHEDULE_PRE_NON_LIFO,
+   SCHEDULE_PRE_LIFO,
+   SCHEDULE_POST,
+};
+
  class backend_visitor : public ir_visitor {
  public:
author	Eric Anholt <eric@anholt.net>
	Thu, 7 Nov 2013 01:38:23 +0000 (17:38 -0800)
committer	Eric Anholt <eric@anholt.net>
	Tue, 12 Nov 2013 23:06:28 +0000 (15:06 -0800)
src/mesa/drivers/dri/i965/brw_fs.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs.h		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_shader.h		patch \| blob \| history