i965/gen8: add debug code to show FS disasm with jump locations

[mesa.git] / src / mesa / drivers / dri / i965 / gen7_urb.c
diff --git a/src/mesa/drivers/dri/i965/gen7_urb.c b/src/mesa/drivers/dri/i965/gen7_urb.c

index 66831171b4e7b77c79d511bfa3aeae37918c7f14..2653e9cbeef4f8d9f2b96e89872da7e400447feb 100644 (file)
--- a/src/mesa/drivers/dri/i965/gen7_urb.c
+++ b/src/mesa/drivers/dri/i965/gen7_urb.c
@@ -61,7 +61,8 @@ static void
  gen7_allocate_push_constants(struct brw_context *brw)
  {
     unsigned avail_size = 16;
-   unsigned multiplier = (brw->is_haswell && brw->gt == 3) ? 2 : 1;
+   unsigned multiplier =
+      (brw->gen >= 8 || (brw->is_haswell && brw->gt == 3)) ? 2 : 1;
  
     /* BRW_NEW_GEOMETRY_PROGRAM */
     bool gs_present = brw->geometry_program;
@@ -112,7 +113,7 @@ gen7_emit_push_constant_state(struct brw_context *brw, unsigned vs_size,
     offset += gs_size;
  
     OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_PS << 16 | (2 - 2));
-   OUT_BATCH(offset | fs_size << GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT);
+   OUT_BATCH(fs_size | offset << GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT);
     ADVANCE_BATCH();
  
     /* From p292 of the Ivy Bridge PRM (11.2.4 3DSTATE_PUSH_CONSTANT_ALLOC_PS):
@@ -122,28 +123,8 @@ gen7_emit_push_constant_state(struct brw_context *brw, unsigned vs_size,
      *
      * No such restriction exists for Haswell.
      */
-   if (!brw->is_haswell) {
-      BEGIN_BATCH(4);
-      OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2));
-      /* From p61 of the Ivy Bridge PRM (1.10.4 PIPE_CONTROL Command: DW1[20]
-       * CS Stall):
-       *
-       *     One of the following must also be set:
-       *     - Render Target Cache Flush Enable ([12] of DW1)
-       *     - Depth Cache Flush Enable ([0] of DW1)
-       *     - Stall at Pixel Scoreboard ([1] of DW1)
-       *     - Depth Stall ([13] of DW1)
-       *     - Post-Sync Operation ([13] of DW1)
-       *
-       * We choose to do a Post-Sync Operation (Write Immediate Data), since
-       * it seems like it will incur the least additional performance penalty.
-       */
-      OUT_BATCH(PIPE_CONTROL_CS_STALL | PIPE_CONTROL_WRITE_IMMEDIATE);
-      OUT_RELOC(brw->batch.workaround_bo,
-                I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);
-      OUT_BATCH(0);
-      ADVANCE_BATCH();
-   }
+   if (brw->gen < 8 && !brw->is_haswell)
+      gen7_emit_cs_stall_flush(brw);
  }
  
  const struct brw_tracked_state gen7_push_constant_space = {
@@ -158,7 +139,8 @@ const struct brw_tracked_state gen7_push_constant_space = {
  static void
  gen7_upload_urb(struct brw_context *brw)
  {
-   const int push_size_kB = brw->is_haswell && brw->gt == 3 ? 32 : 16;
+   const int push_size_kB =
+      (brw->gen >= 8 || (brw->is_haswell && brw->gt == 3)) ? 32 : 16;
  
     /* CACHE_NEW_VS_PROG */
     unsigned vs_size = MAX2(brw->vs.prog_data->base.urb_entry_size, 1);
@@ -195,9 +177,10 @@ gen7_upload_urb(struct brw_context *brw)
      * additional space it could actually make use of).
      */
  
-   /* VS always requires at least 32 URB entries */
+   /* VS has a lower limit on the number of URB entries */
     unsigned vs_chunks =
-      ALIGN(32 * vs_entry_size_bytes, chunk_size_bytes) / chunk_size_bytes;
+      ALIGN(brw->urb.min_vs_entries * vs_entry_size_bytes, chunk_size_bytes) /
+      chunk_size_bytes;
     unsigned vs_wants =
        ALIGN(brw->urb.max_vs_entries * vs_entry_size_bytes,
              chunk_size_bytes) / chunk_size_bytes - vs_chunks;
@@ -261,7 +244,7 @@ gen7_upload_urb(struct brw_context *brw)
     /* Finally, sanity check to make sure we have at least the minimum number
      * of entries needed for each stage.
      */
-   assert(nr_vs_entries >= 32);
+   assert(nr_vs_entries >= brw->urb.min_vs_entries);
     if (gs_present)
        assert(nr_gs_entries >= 2);
  
@@ -280,7 +263,8 @@ gen7_upload_urb(struct brw_context *brw)
     brw->urb.vs_start = push_constant_chunks;
     brw->urb.gs_start = push_constant_chunks + vs_chunks;
  
-   gen7_emit_vs_workaround_flush(brw);
+   if (brw->gen == 7 && !brw->is_haswell)
+      gen7_emit_vs_workaround_flush(brw);
     gen7_emit_urb_state(brw,
                         brw->urb.nr_vs_entries, vs_size, brw->urb.vs_start,
                         brw->urb.nr_gs_entries, gs_size, brw->urb.gs_start);