i965/fs: Schedule instructions both before and after register allocation.

author Eric Anholt <eric@anholt.net>

Tue, 4 Dec 2012 01:58:03 +0000 (17:58 -0800)

committer Eric Anholt <eric@anholt.net>

Fri, 14 Dec 2012 23:17:41 +0000 (15:17 -0800)
author Eric Anholt <eric@anholt.net>
Tue, 4 Dec 2012 01:58:03 +0000 (17:58 -0800)
committer Eric Anholt <eric@anholt.net>
Fri, 14 Dec 2012 23:17:41 +0000 (15:17 -0800)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp

index f428a83d212872076dbeefd7eaeddc932e896a77..9a18410ac5f97c9de62af0abd3dd1bfd95638698 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -2498,7 +2498,7 @@ fs_visitor::run()
  
        remove_dead_constants();
  
-      schedule_instructions();
+      schedule_instructions(false);
  
        assign_curb_setup();
        assign_urb_setup();
@@ -2525,6 +2525,8 @@ fs_visitor::run()
     if (failed)
        return false;
  
+   schedule_instructions(true);
+
     if (dispatch_width == 8) {
        c->prog_data.reg_blocks = brw_register_blocks(grf_used);
     } else {
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h

index 6caf7c337ea19ea820029150dac5587c76fc6507..b75314cd665121ec245179500c9f7514f8a42005 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -328,7 +328,7 @@ public:
     bool remove_dead_constants();
     bool remove_duplicate_mrf_writes();
     bool virtual_grf_interferes(int a, int b);
-   void schedule_instructions();
+   void schedule_instructions(bool post_reg_alloc);
     void fail(const char *msg, ...);
  
     void push_force_uncompressed();
diff --git a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp

index e9c25b0216a5a42fa5bfa15041b7e70759467591..24612625e308cc7fbedfd4b1352b1b3b49528adb 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
@@ -112,13 +112,15 @@ public:
  
  class instruction_scheduler {
  public:
-   instruction_scheduler(fs_visitor *v, void *mem_ctx, int virtual_grf_count)
+   instruction_scheduler(fs_visitor *v, void *mem_ctx, int grf_count,
+                         bool post_reg_alloc)
     {
        this->v = v;
        this->mem_ctx = ralloc_context(mem_ctx);
-      this->virtual_grf_count = virtual_grf_count;
+      this->grf_count = grf_count;
        this->instructions.make_empty();
        this->instructions_to_schedule = 0;
+      this->post_reg_alloc = post_reg_alloc;
     }
  
     ~instruction_scheduler()
@@ -137,8 +139,9 @@ public:
  
     void *mem_ctx;
  
+   bool post_reg_alloc;
     int instructions_to_schedule;
-   int virtual_grf_count;
+   int grf_count;
     exec_list instructions;
     fs_visitor *v;
  };
@@ -247,7 +250,12 @@ instruction_scheduler::is_compressed(fs_inst *inst)
  void
  instruction_scheduler::calculate_deps()
  {
-   schedule_node *last_grf_write[virtual_grf_count];
+   /* Pre-register-allocation, this tracks the last write per VGRF (so
+    * different reg_offsets within it can interfere when they shouldn't).
+    * After register allocation, reg_offsets are gone and we track individual
+    * GRF registers.
+    */
+   schedule_node *last_grf_write[grf_count];
     schedule_node *last_mrf_write[BRW_MAX_MRF];
     schedule_node *last_conditional_mod[2] = { NULL, NULL };
     /* Fixed HW registers are assumed to be separate from the virtual
@@ -256,6 +264,7 @@ instruction_scheduler::calculate_deps()
      * granular level.
      */
     schedule_node *last_fixed_grf_write = NULL;
+   int reg_width = v->dispatch_width / 8;
  
     /* The last instruction always needs to still be the last
      * instruction.  Either it's flow control (IF, ELSE, ENDIF, DO,
@@ -277,11 +286,21 @@ instruction_scheduler::calculate_deps()
        /* read-after-write deps. */
        for (int i = 0; i < 3; i++) {
          if (inst->src[i].file == GRF) {
-           add_dep(last_grf_write[inst->src[i].reg], n);
+            if (post_reg_alloc) {
+               for (int r = 0; r < reg_width; r++)
+                  add_dep(last_grf_write[inst->src[i].reg + r], n);
+            } else {
+               add_dep(last_grf_write[inst->src[i].reg], n);
+            }
          } else if (inst->src[i].file == FIXED_HW_REG &&
                     (inst->src[i].fixed_hw_reg.file ==
                      BRW_GENERAL_REGISTER_FILE)) {
-           add_dep(last_fixed_grf_write, n);
+           if (post_reg_alloc) {
+               for (int r = 0; r < reg_width; r++)
+                  add_dep(last_grf_write[inst->src[i].fixed_hw_reg.nr + r], n);
+            } else {
+               add_dep(last_fixed_grf_write, n);
+            }
          } else if (inst->src[i].file != BAD_FILE &&
                     inst->src[i].file != IMM &&
                     inst->src[i].file != UNIFORM) {
@@ -304,8 +323,15 @@ instruction_scheduler::calculate_deps()
  
        /* write-after-write deps. */
        if (inst->dst.file == GRF) {
-        add_dep(last_grf_write[inst->dst.reg], n);
-        last_grf_write[inst->dst.reg] = n;
+         if (post_reg_alloc) {
+            for (int r = 0; r < inst->regs_written() * reg_width; r++) {
+               add_dep(last_grf_write[inst->dst.reg + r], n);
+               last_grf_write[inst->dst.reg + r] = n;
+            }
+         } else {
+            add_dep(last_grf_write[inst->dst.reg], n);
+            last_grf_write[inst->dst.reg] = n;
+         }
        } else if (inst->dst.file == MRF) {
          int reg = inst->dst.reg & ~BRW_MRF_COMPR4;
  
@@ -321,7 +347,12 @@ instruction_scheduler::calculate_deps()
          }
        } else if (inst->dst.file == FIXED_HW_REG &&
                  inst->dst.fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) {
-        last_fixed_grf_write = n;
+         if (post_reg_alloc) {
+            for (int r = 0; r < reg_width; r++)
+               last_grf_write[inst->dst.fixed_hw_reg.nr + r] = n;
+         } else {
+            last_fixed_grf_write = n;
+         }
        } else if (inst->dst.file != BAD_FILE) {
          add_barrier_deps(n);
        }
@@ -360,12 +391,22 @@ instruction_scheduler::calculate_deps()
        /* write-after-read deps. */
        for (int i = 0; i < 3; i++) {
          if (inst->src[i].file == GRF) {
-           add_dep(n, last_grf_write[inst->src[i].reg]);
+            if (post_reg_alloc) {
+               for (int r = 0; r < reg_width; r++)
+                  add_dep(n, last_grf_write[inst->src[i].reg + r]);
+            } else {
+               add_dep(n, last_grf_write[inst->src[i].reg]);
+            }
          } else if (inst->src[i].file == FIXED_HW_REG &&
                     (inst->src[i].fixed_hw_reg.file ==
                      BRW_GENERAL_REGISTER_FILE)) {
-           add_dep(n, last_fixed_grf_write);
-        } else if (inst->src[i].file != BAD_FILE &&
+           if (post_reg_alloc) {
+               for (int r = 0; r < reg_width; r++)
+                  add_dep(n, last_grf_write[inst->src[i].fixed_hw_reg.nr + r]);
+            } else {
+               add_dep(n, last_fixed_grf_write);
+            }
+         } else if (inst->src[i].file != BAD_FILE &&
                     inst->src[i].file != IMM &&
                     inst->src[i].file != UNIFORM) {
             assert(inst->src[i].file != MRF);
@@ -389,7 +430,12 @@ instruction_scheduler::calculate_deps()
         * can mark this as WAR dependency.
         */
        if (inst->dst.file == GRF) {
-        last_grf_write[inst->dst.reg] = n;
+         if (post_reg_alloc) {
+            for (int r = 0; r < inst->regs_written() * reg_width; r++)
+               last_grf_write[inst->dst.reg + r] = n;
+         } else {
+            last_grf_write[inst->dst.reg] = n;
+         }
        } else if (inst->dst.file == MRF) {
          int reg = inst->dst.reg & ~BRW_MRF_COMPR4;
  
@@ -405,7 +451,12 @@ instruction_scheduler::calculate_deps()
          }
        } else if (inst->dst.file == FIXED_HW_REG &&
                  inst->dst.fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) {
-        last_fixed_grf_write = n;
+         if (post_reg_alloc) {
+            for (int r = 0; r < reg_width; r++)
+               last_grf_write[inst->dst.fixed_hw_reg.nr + r] = n;
+         } else {
+            last_fixed_grf_write = n;
+         }
        } else if (inst->dst.file != BAD_FILE) {
          add_barrier_deps(n);
        }
@@ -499,10 +550,17 @@ instruction_scheduler::schedule_instructions(fs_inst *next_block_header)
  }
  
  void
-fs_visitor::schedule_instructions()
+fs_visitor::schedule_instructions(bool post_reg_alloc)
  {
     fs_inst *next_block_header = (fs_inst *)instructions.head;
-   instruction_scheduler sched(this, mem_ctx, this->virtual_grf_count);
+
+   int grf_count;
+   if (post_reg_alloc)
+      grf_count = grf_used;
+   else
+      grf_count = virtual_grf_count;
+
+   instruction_scheduler sched(this, mem_ctx, grf_count, post_reg_alloc);
  
     while (!next_block_header->is_tail_sentinel()) {
        /* Add things to be scheduled until we get to a new BB. */
author	Eric Anholt <eric@anholt.net>
	Tue, 4 Dec 2012 01:58:03 +0000 (17:58 -0800)
committer	Eric Anholt <eric@anholt.net>
	Fri, 14 Dec 2012 23:17:41 +0000 (15:17 -0800)
src/mesa/drivers/dri/i965/brw_fs.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs.h		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp		patch \| blob \| history