i965/compaction: Add support for G45.

author Matt Turner <mattst88@gmail.com>

Wed, 20 Aug 2014 18:43:29 +0000 (11:43 -0700)

committer Matt Turner <mattst88@gmail.com>

Thu, 25 Sep 2014 18:02:36 +0000 (11:02 -0700)
author Matt Turner <mattst88@gmail.com>
Wed, 20 Aug 2014 18:43:29 +0000 (11:43 -0700)
committer Matt Turner <mattst88@gmail.com>
Thu, 25 Sep 2014 18:02:36 +0000 (11:02 -0700)
diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c b/src/mesa/drivers/dri/i965/brw_eu_compact.c

index a5b2d6cbca71843d5fc65997c751f2c14995d0d6..f1534d996eaef40d0f930db4f1282ce32c1d5cf0 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_eu_compact.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c
@@ -23,12 +23,12 @@
  
  /** @file brw_eu_compact.c
   *
- * Instruction compaction is a feature of gm45 and newer hardware that allows
+ * Instruction compaction is a feature of G45 and newer hardware that allows
   * for a smaller instruction encoding.
   *
   * The instruction cache is on the order of 32KB, and many programs generate
   * far more instructions than that.  The instruction cache is built to barely
- * keep up with instruction dispatch abaility in cache hit cases -- L1
+ * keep up with instruction dispatch ability in cache hit cases -- L1
   * instruction cache misses that still hit in the next level could limit
   * throughput by around 50%.
   *
@@ -1207,14 +1207,15 @@ static void
  update_gen4_jump_count(struct brw_context *brw, brw_inst *insn,
                         int this_old_ip, int *compacted_counts)
  {
-   assert(brw->gen == 5);
+   assert(brw->gen == 5 || brw->is_g4x);
  
     /* Jump Count is in units of:
+    *    - uncompacted instructions on G45; and
      *    - compacted instructions on Gen5.
      */
     int jump_count = brw_inst_gen4_jump_count(brw, insn);
-   int jump_count_compacted = jump_count;
-   int jump_count_uncompacted = jump_count / 2;
+   int jump_count_compacted = jump_count * (brw->is_g4x ? 2 : 1);
+   int jump_count_uncompacted = jump_count / (brw->is_g4x ? 1 : 2);
  
     int target_old_ip = this_old_ip + jump_count_uncompacted;
  
@@ -1222,7 +1223,8 @@ update_gen4_jump_count(struct brw_context *brw, brw_inst *insn,
     int target_compacted_count = compacted_counts[target_old_ip];
  
     jump_count_compacted -= (target_compacted_count - this_compacted_count);
-   brw_inst_set_gen4_jump_count(brw, insn, jump_count_compacted);
+   brw_inst_set_gen4_jump_count(brw, insn, jump_count_compacted /
+                                           (brw->is_g4x ? 2 : 1));
  }
  
  void
@@ -1265,13 +1267,14 @@ brw_init_compaction_tables(struct brw_context *brw)
        src_index_table = gen6_src_index_table;
        break;
     case 5:
+   case 4:
        control_index_table = g45_control_index_table;
        datatype_table = g45_datatype_table;
        subreg_table = g45_subreg_table;
        src_index_table = g45_src_index_table;
        break;
     default:
-      return;
+      unreachable("unknown generation");
     }
  }
  
@@ -1282,7 +1285,8 @@ brw_compact_instructions(struct brw_compile *p, int start_offset,
     struct brw_context *brw = p->brw;
     void *store = p->store + start_offset / 16;
     /* For an instruction at byte offset 16*i before compaction, this is the
-    * number of compacted instructions that preceded it.
+    * number of compacted instructions minus the number of padding NOP/NENOPs
+    * that preceded it.
      */
     int compacted_counts[(p->next_insn_offset - start_offset) / sizeof(brw_inst)];
     /* For an instruction at byte offset 8*i after compaction, this was its IP
@@ -1290,7 +1294,7 @@ brw_compact_instructions(struct brw_compile *p, int start_offset,
      */
     int old_ip[(p->next_insn_offset - start_offset) / sizeof(brw_compact_inst)];
  
-   if (brw->gen == 4)
+   if (brw->gen == 4 && !brw->is_g4x)
        return;
  
     int offset = 0;
@@ -1319,17 +1323,22 @@ brw_compact_instructions(struct brw_compile *p, int start_offset,
           offset += sizeof(brw_compact_inst);
        } else {
           /* It appears that the end of thread SEND instruction needs to be
-          * aligned, or the GPU hangs.
+          * aligned, or the GPU hangs. All uncompacted instructions need to be
+          * aligned on G45.
            */
-         if ((brw_inst_opcode(brw, src) == BRW_OPCODE_SEND ||
-              brw_inst_opcode(brw, src) == BRW_OPCODE_SENDC) &&
-             brw_inst_eot(brw, src) &&
-             (offset & sizeof(brw_compact_inst)) != 0) {
+         if ((offset & sizeof(brw_compact_inst)) != 0 &&
+             (((brw_inst_opcode(brw, src) == BRW_OPCODE_SEND ||
+                brw_inst_opcode(brw, src) == BRW_OPCODE_SENDC) &&
+               brw_inst_eot(brw, src)) ||
+              brw->is_g4x)) {
              brw_compact_inst *align = store + offset;
              memset(align, 0, sizeof(*align));
-            brw_compact_inst_set_opcode(align, BRW_OPCODE_NOP);
+            brw_compact_inst_set_opcode(align, brw->is_g4x ? BRW_OPCODE_NENOP :
+                                                             BRW_OPCODE_NOP);
              brw_compact_inst_set_cmpt_control(align, true);
              offset += sizeof(brw_compact_inst);
+            compacted_count--;
+            compacted_counts[src_offset / sizeof(brw_inst)] = compacted_count;
              old_ip[offset / sizeof(brw_compact_inst)] = src_offset / sizeof(brw_inst);
  
              dst = store + offset;
author	Matt Turner <mattst88@gmail.com>
	Wed, 20 Aug 2014 18:43:29 +0000 (11:43 -0700)
committer	Matt Turner <mattst88@gmail.com>
	Thu, 25 Sep 2014 18:02:36 +0000 (11:02 -0700)