i965/gen8: add debug code to show FS disasm with jump locations
[mesa.git] / src / mesa / drivers / dri / i965 / gen7_urb.c
index 66831171b4e7b77c79d511bfa3aeae37918c7f14..2653e9cbeef4f8d9f2b96e89872da7e400447feb 100644 (file)
@@ -61,7 +61,8 @@ static void
 gen7_allocate_push_constants(struct brw_context *brw)
 {
    unsigned avail_size = 16;
-   unsigned multiplier = (brw->is_haswell && brw->gt == 3) ? 2 : 1;
+   unsigned multiplier =
+      (brw->gen >= 8 || (brw->is_haswell && brw->gt == 3)) ? 2 : 1;
 
    /* BRW_NEW_GEOMETRY_PROGRAM */
    bool gs_present = brw->geometry_program;
@@ -112,7 +113,7 @@ gen7_emit_push_constant_state(struct brw_context *brw, unsigned vs_size,
    offset += gs_size;
 
    OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_PS << 16 | (2 - 2));
-   OUT_BATCH(offset | fs_size << GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT);
+   OUT_BATCH(fs_size | offset << GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT);
    ADVANCE_BATCH();
 
    /* From p292 of the Ivy Bridge PRM (11.2.4 3DSTATE_PUSH_CONSTANT_ALLOC_PS):
@@ -122,28 +123,8 @@ gen7_emit_push_constant_state(struct brw_context *brw, unsigned vs_size,
     *
     * No such restriction exists for Haswell.
     */
-   if (!brw->is_haswell) {
-      BEGIN_BATCH(4);
-      OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2));
-      /* From p61 of the Ivy Bridge PRM (1.10.4 PIPE_CONTROL Command: DW1[20]
-       * CS Stall):
-       *
-       *     One of the following must also be set:
-       *     - Render Target Cache Flush Enable ([12] of DW1)
-       *     - Depth Cache Flush Enable ([0] of DW1)
-       *     - Stall at Pixel Scoreboard ([1] of DW1)
-       *     - Depth Stall ([13] of DW1)
-       *     - Post-Sync Operation ([13] of DW1)
-       *
-       * We choose to do a Post-Sync Operation (Write Immediate Data), since
-       * it seems like it will incur the least additional performance penalty.
-       */
-      OUT_BATCH(PIPE_CONTROL_CS_STALL | PIPE_CONTROL_WRITE_IMMEDIATE);
-      OUT_RELOC(brw->batch.workaround_bo,
-                I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);
-      OUT_BATCH(0);
-      ADVANCE_BATCH();
-   }
+   if (brw->gen < 8 && !brw->is_haswell)
+      gen7_emit_cs_stall_flush(brw);
 }
 
 const struct brw_tracked_state gen7_push_constant_space = {
@@ -158,7 +139,8 @@ const struct brw_tracked_state gen7_push_constant_space = {
 static void
 gen7_upload_urb(struct brw_context *brw)
 {
-   const int push_size_kB = brw->is_haswell && brw->gt == 3 ? 32 : 16;
+   const int push_size_kB =
+      (brw->gen >= 8 || (brw->is_haswell && brw->gt == 3)) ? 32 : 16;
 
    /* CACHE_NEW_VS_PROG */
    unsigned vs_size = MAX2(brw->vs.prog_data->base.urb_entry_size, 1);
@@ -195,9 +177,10 @@ gen7_upload_urb(struct brw_context *brw)
     * additional space it could actually make use of).
     */
 
-   /* VS always requires at least 32 URB entries */
+   /* VS has a lower limit on the number of URB entries */
    unsigned vs_chunks =
-      ALIGN(32 * vs_entry_size_bytes, chunk_size_bytes) / chunk_size_bytes;
+      ALIGN(brw->urb.min_vs_entries * vs_entry_size_bytes, chunk_size_bytes) /
+      chunk_size_bytes;
    unsigned vs_wants =
       ALIGN(brw->urb.max_vs_entries * vs_entry_size_bytes,
             chunk_size_bytes) / chunk_size_bytes - vs_chunks;
@@ -261,7 +244,7 @@ gen7_upload_urb(struct brw_context *brw)
    /* Finally, sanity check to make sure we have at least the minimum number
     * of entries needed for each stage.
     */
-   assert(nr_vs_entries >= 32);
+   assert(nr_vs_entries >= brw->urb.min_vs_entries);
    if (gs_present)
       assert(nr_gs_entries >= 2);
 
@@ -280,7 +263,8 @@ gen7_upload_urb(struct brw_context *brw)
    brw->urb.vs_start = push_constant_chunks;
    brw->urb.gs_start = push_constant_chunks + vs_chunks;
 
-   gen7_emit_vs_workaround_flush(brw);
+   if (brw->gen == 7 && !brw->is_haswell)
+      gen7_emit_vs_workaround_flush(brw);
    gen7_emit_urb_state(brw,
                        brw->urb.nr_vs_entries, vs_size, brw->urb.vs_start,
                        brw->urb.nr_gs_entries, gs_size, brw->urb.gs_start);