i965/vec4: Fix the scheduler to take into account reads and writes of multiple registers.
authorFrancisco Jerez <currojerez@riseup.net>
Thu, 5 Feb 2015 20:39:33 +0000 (22:39 +0200)
committerFrancisco Jerez <currojerez@riseup.net>
Tue, 10 Feb 2015 17:09:24 +0000 (19:09 +0200)
v2: Avoid nested ternary operators in vec4_instruction::regs_read(). (Matt)

Reviewed-by: Matt Turner <mattst88@gmail.com>
src/mesa/drivers/dri/i965/brw_ir_vec4.h
src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
src/mesa/drivers/dri/i965/brw_vec4.cpp

index f11a2d21d1e5cad9e3fdb06ccac20fba27592971..941086ffd28ee9a6db7d8f493036affa3bb9c353 100644 (file)
@@ -171,6 +171,7 @@ public:
    unsigned sol_vertex; /**< gen6: used for setting dst index in SVB header */
 
    bool is_send_from_grf();
+   unsigned regs_read(unsigned arg) const;
    bool can_reswizzle(int dst_writemask, int swizzle, int swizzle_mask);
    void reswizzle(int dst_writemask, int swizzle);
    bool can_do_source_mods(struct brw_context *brw);
index 88feeca8f8375cf05bcc589703de57097f898bbe..2b22b2c45fcc279a12b341b5488f8fa65bfe888e 100644 (file)
@@ -1063,7 +1063,8 @@ vec4_instruction_scheduler::calculate_deps()
       /* read-after-write deps. */
       for (int i = 0; i < 3; i++) {
          if (inst->src[i].file == GRF) {
-            add_dep(last_grf_write[inst->src[i].reg], n);
+            for (unsigned j = 0; j < inst->regs_read(i); ++j)
+               add_dep(last_grf_write[inst->src[i].reg + j], n);
          } else if (inst->src[i].file == HW_REG &&
                     (inst->src[i].fixed_hw_reg.file ==
                      BRW_GENERAL_REGISTER_FILE)) {
@@ -1103,8 +1104,10 @@ vec4_instruction_scheduler::calculate_deps()
 
       /* write-after-write deps. */
       if (inst->dst.file == GRF) {
-         add_dep(last_grf_write[inst->dst.reg], n);
-         last_grf_write[inst->dst.reg] = n;
+         for (unsigned j = 0; j < inst->regs_written; ++j) {
+            add_dep(last_grf_write[inst->dst.reg + j], n);
+            last_grf_write[inst->dst.reg + j] = n;
+         }
       } else if (inst->dst.file == MRF) {
          add_dep(last_mrf_write[inst->dst.reg], n);
          last_mrf_write[inst->dst.reg] = n;
@@ -1156,7 +1159,8 @@ vec4_instruction_scheduler::calculate_deps()
       /* write-after-read deps. */
       for (int i = 0; i < 3; i++) {
          if (inst->src[i].file == GRF) {
-            add_dep(n, last_grf_write[inst->src[i].reg]);
+            for (unsigned j = 0; j < inst->regs_read(i); ++j)
+               add_dep(n, last_grf_write[inst->src[i].reg + j]);
          } else if (inst->src[i].file == HW_REG &&
                     (inst->src[i].fixed_hw_reg.file ==
                      BRW_GENERAL_REGISTER_FILE)) {
@@ -1194,7 +1198,8 @@ vec4_instruction_scheduler::calculate_deps()
        * can mark this as WAR dependency.
        */
       if (inst->dst.file == GRF) {
-         last_grf_write[inst->dst.reg] = n;
+         for (unsigned j = 0; j < inst->regs_written; ++j)
+            last_grf_write[inst->dst.reg + j] = n;
       } else if (inst->dst.file == MRF) {
          last_mrf_write[inst->dst.reg] = n;
       } else if (inst->dst.file == HW_REG &&
index 48b2a6902f68fa54f5a2b02436819d546e190317..cdd8a446286ab6cdfd4ee8caf8160a220b9ec544 100644 (file)
@@ -262,6 +262,24 @@ vec4_instruction::is_send_from_grf()
    }
 }
 
+unsigned
+vec4_instruction::regs_read(unsigned arg) const
+{
+   if (src[arg].file == BAD_FILE)
+      return 0;
+
+   switch (opcode) {
+   case SHADER_OPCODE_SHADER_TIME_ADD:
+      return arg == 0 ? mlen : 1;
+
+   case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
+      return arg == 1 ? mlen : 1;
+
+   default:
+      return 1;
+   }
+}
+
 bool
 vec4_instruction::can_do_source_mods(struct brw_context *brw)
 {