i965/fs: Use regs_read/written for post-RA scheduling in calculate_deps
authorJason Ekstrand <jason.ekstrand@intel.com>
Fri, 6 Nov 2015 00:37:47 +0000 (16:37 -0800)
committerJason Ekstrand <jason.ekstrand@intel.com>
Sat, 7 Nov 2015 16:41:48 +0000 (08:41 -0800)
Previously, we were assuming that everything read/wrote exactly 1 logical
GRF (1 in SIMD8 and 2 in SIMD16).  This isn't actually true.  In
particular, the PLN instruction reads 2 logical registers in one of the
components.  This commit changes post-RA scheduling to use regs_read and
regs_written instead so that we add enough dependencies.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=92770
Reviewed-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp

index 88c45f743338d2104d7c0048b5793f5af44015bd..d21bc677c8284b2f0d8c05e49946e054a555de8a 100644 (file)
@@ -927,7 +927,6 @@ fs_instruction_scheduler::calculate_deps()
     * granular level.
     */
    schedule_node *last_fixed_grf_write = NULL;
-   int reg_width = v->dispatch_width / 8;
 
    /* The last instruction always needs to still be the last
     * instruction.  Either it's flow control (IF, ELSE, ENDIF, DO,
@@ -964,10 +963,7 @@ fs_instruction_scheduler::calculate_deps()
                     (inst->src[i].fixed_hw_reg.file ==
                      BRW_GENERAL_REGISTER_FILE)) {
             if (post_reg_alloc) {
-               int size = reg_width;
-               if (inst->src[i].fixed_hw_reg.vstride == BRW_VERTICAL_STRIDE_0)
-                  size = 1;
-               for (int r = 0; r < size; r++)
+               for (int r = 0; r < inst->regs_read(i); r++)
                   add_dep(last_grf_write[inst->src[i].fixed_hw_reg.nr + r], n);
             } else {
                add_dep(last_fixed_grf_write, n);
@@ -1031,7 +1027,7 @@ fs_instruction_scheduler::calculate_deps()
       } else if (inst->dst.file == HW_REG &&
                  inst->dst.fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) {
          if (post_reg_alloc) {
-            for (int r = 0; r < reg_width; r++)
+            for (int r = 0; r < inst->regs_written; r++)
                last_grf_write[inst->dst.fixed_hw_reg.nr + r] = n;
          } else {
             last_fixed_grf_write = n;
@@ -1093,10 +1089,7 @@ fs_instruction_scheduler::calculate_deps()
                     (inst->src[i].fixed_hw_reg.file ==
                      BRW_GENERAL_REGISTER_FILE)) {
             if (post_reg_alloc) {
-               int size = reg_width;
-               if (inst->src[i].fixed_hw_reg.vstride == BRW_VERTICAL_STRIDE_0)
-                  size = 1;
-               for (int r = 0; r < size; r++)
+               for (int r = 0; r < inst->regs_read(i); r++)
                   add_dep(n, last_grf_write[inst->src[i].fixed_hw_reg.nr + r], 0);
             } else {
                add_dep(n, last_fixed_grf_write, 0);
@@ -1159,7 +1152,7 @@ fs_instruction_scheduler::calculate_deps()
       } else if (inst->dst.file == HW_REG &&
                  inst->dst.fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) {
          if (post_reg_alloc) {
-            for (int r = 0; r < reg_width; r++)
+            for (int r = 0; r < inst->regs_written; r++)
                last_grf_write[inst->dst.fixed_hw_reg.nr + r] = n;
          } else {
             last_fixed_grf_write = n;