i965: Emit SNB write cache flush W/A from brw_emit_pipe_control_flush.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_misc_state.c
index 3686cdf8ff4b0c12caa8a21b964b62d0f0f1a7f9..c3d341fd1116dbd04267757715b0d70365b02618 100644 (file)
@@ -63,7 +63,8 @@ upload_drawing_rect(struct brw_context *brw)
 const struct brw_tracked_state brw_drawing_rect = {
    .dirty = {
       .mesa = _NEW_BUFFERS,
-      .brw = BRW_NEW_CONTEXT,
+      .brw = BRW_NEW_BLORP |
+             BRW_NEW_CONTEXT,
    },
    .emit = upload_drawing_rect
 };
@@ -118,6 +119,7 @@ const struct brw_tracked_state brw_psp_urb_cbs = {
    .dirty = {
       .mesa = 0,
       .brw = BRW_NEW_BATCH |
+             BRW_NEW_BLORP |
              BRW_NEW_FF_GS_PROG_DATA |
              BRW_NEW_GEN4_UNIT_STATE |
              BRW_NEW_STATE_BASE_ADDRESS |
@@ -644,6 +646,7 @@ brw_emit_depth_stencil_hiz(struct brw_context *brw,
 
       /* Emit hiz buffer. */
       if (hiz) {
+         assert(depth_mt);
          struct intel_mipmap_tree *hiz_mt = depth_mt->hiz_buf->mt;
         BEGIN_BATCH(3);
         OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
@@ -704,7 +707,8 @@ brw_emit_depth_stencil_hiz(struct brw_context *brw,
 const struct brw_tracked_state brw_depthbuffer = {
    .dirty = {
       .mesa = _NEW_BUFFERS,
-      .brw = BRW_NEW_BATCH,
+      .brw = BRW_NEW_BATCH |
+             BRW_NEW_BLORP,
    },
    .emit = brw_emit_depthbuffer,
 };
@@ -868,6 +872,26 @@ brw_emit_select_pipeline(struct brw_context *brw, enum brw_pipeline pipeline)
    const uint32_t _3DSTATE_PIPELINE_SELECT =
       is_965 ? CMD_PIPELINE_SELECT_965 : CMD_PIPELINE_SELECT_GM45;
 
+   if (brw->use_resource_streamer && pipeline != BRW_RENDER_PIPELINE) {
+      /* From "BXML » GT » MI » vol1a GPU Overview » [Instruction]
+       * PIPELINE_SELECT [DevBWR+]":
+       *
+       *   Project: HSW, BDW, CHV, SKL, BXT
+       *
+       *   Hardware Binding Tables are only supported for 3D
+       *   workloads. Resource streamer must be enabled only for 3D
+       *   workloads. Resource streamer must be disabled for Media and GPGPU
+       *   workloads.
+       */
+      BEGIN_BATCH(1);
+      OUT_BATCH(MI_RS_CONTROL | 0);
+      ADVANCE_BATCH();
+
+      gen7_disable_hw_binding_tables(brw);
+
+      /* XXX - Disable gather constant pool too when we start using it. */
+   }
+
    if (brw->gen >= 8 && brw->gen < 10) {
       /* From the Broadwell PRM, Volume 2a: Instructions, PIPELINE_SELECT:
        *
@@ -899,16 +923,7 @@ brw_emit_select_pipeline(struct brw_context *brw, enum brw_pipeline pipeline)
        *   MI_PIPELINE_SELECT command to change the Pipeline Select Mode.
        */
       const unsigned dc_flush =
-         brw->gen >= 7 ? PIPE_CONTROL_DATA_CACHE_INVALIDATE : 0;
-
-      if (brw->gen == 6) {
-         /* Hardware workaround: SNB B-Spec says:
-          *
-          *   Before a PIPE_CONTROL with Write Cache Flush Enable = 1, a
-          *   PIPE_CONTROL with any non-zero post-sync-op is required.
-          */
-         brw_emit_post_sync_nonzero_flush(brw);
-      }
+         brw->gen >= 7 ? PIPE_CONTROL_DATA_CACHE_FLUSH : 0;
 
       brw_emit_pipe_control_flush(brw,
                                   PIPE_CONTROL_RENDER_TARGET_FLUSH |
@@ -968,6 +983,26 @@ brw_emit_select_pipeline(struct brw_context *brw, enum brw_pipeline pipeline)
       OUT_BATCH(0);
       ADVANCE_BATCH();
    }
+
+   if (brw->use_resource_streamer && pipeline == BRW_RENDER_PIPELINE) {
+      /* From "BXML » GT » MI » vol1a GPU Overview » [Instruction]
+       * PIPELINE_SELECT [DevBWR+]":
+       *
+       *   Project: HSW, BDW, CHV, SKL, BXT
+       *
+       *   Hardware Binding Tables are only supported for 3D
+       *   workloads. Resource streamer must be enabled only for 3D
+       *   workloads. Resource streamer must be disabled for Media and GPGPU
+       *   workloads.
+       */
+      BEGIN_BATCH(1);
+      OUT_BATCH(MI_RS_CONTROL | 1);
+      ADVANCE_BATCH();
+
+      gen7_enable_hw_binding_tables(brw);
+
+      /* XXX - Re-enable gather constant pool here. */
+   }
 }
 
 /**
@@ -1004,7 +1039,8 @@ brw_upload_invariant_state(struct brw_context *brw)
 const struct brw_tracked_state brw_invariant_state = {
    .dirty = {
       .mesa = 0,
-      .brw = BRW_NEW_CONTEXT,
+      .brw = BRW_NEW_BLORP |
+             BRW_NEW_CONTEXT,
    },
    .emit = brw_upload_invariant_state
 };
@@ -1019,9 +1055,12 @@ const struct brw_tracked_state brw_invariant_state = {
  * surface state objects, but not the surfaces that the surface state
  * objects point to.
  */
-static void
-upload_state_base_address(struct brw_context *brw)
+void
+brw_upload_state_base_address(struct brw_context *brw)
 {
+   if (brw->batch.state_base_address_emitted)
+      return;
+
    /* FINISHME: According to section 3.6.1 "STATE_BASE_ADDRESS" of
     * vol1a of the G45 PRM, MI_FLUSH with the ISC invalidate should be
     * programmed prior to STATE_BASE_ADDRESS.
@@ -1031,7 +1070,45 @@ upload_state_base_address(struct brw_context *brw)
     * maybe this isn't required for us in particular.
     */
 
-   if (brw->gen >= 6) {
+   if (brw->gen >= 8) {
+      uint32_t mocs_wb = brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB;
+      int pkt_len = brw->gen >= 9 ? 19 : 16;
+
+      BEGIN_BATCH(pkt_len);
+      OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (pkt_len - 2));
+      /* General state base address: stateless DP read/write requests */
+      OUT_BATCH(mocs_wb << 4 | 1);
+      OUT_BATCH(0);
+      OUT_BATCH(mocs_wb << 16);
+      /* Surface state base address: */
+      OUT_RELOC64(brw->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
+                  mocs_wb << 4 | 1);
+      /* Dynamic state base address: */
+      OUT_RELOC64(brw->batch.bo,
+                  I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION, 0,
+                  mocs_wb << 4 | 1);
+      /* Indirect object base address: MEDIA_OBJECT data */
+      OUT_BATCH(mocs_wb << 4 | 1);
+      OUT_BATCH(0);
+      /* Instruction base address: shader kernels (incl. SIP) */
+      OUT_RELOC64(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+                  mocs_wb << 4 | 1);
+
+      /* General state buffer size */
+      OUT_BATCH(0xfffff001);
+      /* Dynamic state buffer size */
+      OUT_BATCH(ALIGN(brw->batch.bo->size, 4096) | 1);
+      /* Indirect object upper bound */
+      OUT_BATCH(0xfffff001);
+      /* Instruction access upper bound */
+      OUT_BATCH(ALIGN(brw->cache.bo->size, 4096) | 1);
+      if (brw->gen >= 9) {
+         OUT_BATCH(1);
+         OUT_BATCH(0);
+         OUT_BATCH(0);
+      }
+      ADVANCE_BATCH();
+   } else if (brw->gen >= 6) {
       uint8_t mocs = brw->gen == 7 ? GEN7_MOCS_L3 : 0;
 
        BEGIN_BATCH(10);
@@ -1119,13 +1196,5 @@ upload_state_base_address(struct brw_context *brw)
     */
 
    brw->ctx.NewDriverState |= BRW_NEW_STATE_BASE_ADDRESS;
+   brw->batch.state_base_address_emitted = true;
 }
-
-const struct brw_tracked_state brw_state_base_address = {
-   .dirty = {
-      .mesa = 0,
-      .brw = BRW_NEW_BATCH |
-             BRW_NEW_PROGRAM_CACHE,
-   },
-   .emit = upload_state_base_address
-};