i965: Emit SNB write cache flush W/A from brw_emit_pipe_control_flush.

[mesa.git] / src / mesa / drivers / dri / i965 / brw_misc_state.c
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c

index e3980fc3408319e2cb710f986e67b74b35aa44e1..c3d341fd1116dbd04267757715b0d70365b02618 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -39,23 +39,23 @@
  #include "brw_state.h"
  #include "brw_defines.h"
  
+#include "main/framebuffer.h"
  #include "main/fbobject.h"
  #include "main/glformats.h"
  
  /* Constant single cliprect for framebuffer object or DRI2 drawing */
-static void upload_drawing_rect(struct brw_context *brw)
+static void
+upload_drawing_rect(struct brw_context *brw)
  {
     struct gl_context *ctx = &brw->ctx;
-
-   /* 3DSTATE_DRAWING_RECTANGLE is non-pipelined. */
-   if (brw->gen == 6)
-      intel_emit_post_sync_nonzero_flush(brw);
+   const struct gl_framebuffer *fb = ctx->DrawBuffer;
+   const unsigned int fb_width = _mesa_geometric_width(fb);
+   const unsigned int fb_height = _mesa_geometric_height(fb);
  
     BEGIN_BATCH(4);
     OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2));
     OUT_BATCH(0); /* xmin, ymin */
-   OUT_BATCH(((ctx->DrawBuffer->Width - 1) & 0xffff) |
-           ((ctx->DrawBuffer->Height - 1) << 16));
+   OUT_BATCH(((fb_width - 1) & 0xffff) | ((fb_height - 1) << 16));
     OUT_BATCH(0);
     ADVANCE_BATCH();
  }
@@ -63,8 +63,8 @@ static void upload_drawing_rect(struct brw_context *brw)
  const struct brw_tracked_state brw_drawing_rect = {
     .dirty = {
        .mesa = _NEW_BUFFERS,
-      .brw = BRW_NEW_CONTEXT,
-      .cache = 0
+      .brw = BRW_NEW_BLORP |
+             BRW_NEW_CONTEXT,
     },
     .emit = upload_drawing_rect
  };
@@ -75,7 +75,8 @@ const struct brw_tracked_state brw_drawing_rect = {
   * The state pointers in this packet are all relative to the general state
   * base address set by CMD_STATE_BASE_ADDRESS, which is 0.
   */
-static void upload_pipelined_state_pointers(struct brw_context *brw )
+static void
+upload_pipelined_state_pointers(struct brw_context *brw)
  {
     if (brw->gen == 5) {
        /* Need to flush before changing clip max threads for errata. */
@@ -103,10 +104,11 @@ static void upload_pipelined_state_pointers(struct brw_context *brw )
              brw->cc.state_offset);
     ADVANCE_BATCH();
  
-   brw->state.dirty.brw |= BRW_NEW_PSP;
+   brw->ctx.NewDriverState |= BRW_NEW_PSP;
  }
  
-static void upload_psp_urb_cbs(struct brw_context *brw )
+static void
+upload_psp_urb_cbs(struct brw_context *brw)
  {
     upload_pipelined_state_pointers(brw);
     brw_upload_urb_fence(brw);
@@ -116,16 +118,12 @@ static void upload_psp_urb_cbs(struct brw_context *brw )
  const struct brw_tracked_state brw_psp_urb_cbs = {
     .dirty = {
        .mesa = 0,
-      .brw = (BRW_NEW_URB_FENCE |
-             BRW_NEW_BATCH |
-             BRW_NEW_STATE_BASE_ADDRESS),
-      .cache = (CACHE_NEW_VS_UNIT |
-               CACHE_NEW_FF_GS_UNIT |
-               CACHE_NEW_FF_GS_PROG |
-               CACHE_NEW_CLIP_UNIT |
-               CACHE_NEW_SF_UNIT |
-               CACHE_NEW_WM_UNIT |
-               CACHE_NEW_CC_UNIT)
+      .brw = BRW_NEW_BATCH |
+             BRW_NEW_BLORP |
+             BRW_NEW_FF_GS_PROG_DATA |
+             BRW_NEW_GEN4_UNIT_STATE |
+             BRW_NEW_STATE_BASE_ADDRESS |
+             BRW_NEW_URB_FENCE,
     },
     .emit = upload_psp_urb_cbs,
  };
@@ -178,13 +176,17 @@ brw_get_depthstencil_tile_masks(struct intel_mipmap_tree *depth_mt,
     uint32_t tile_mask_x = 0, tile_mask_y = 0;
  
     if (depth_mt) {
-      intel_miptree_get_tile_masks(depth_mt, &tile_mask_x, &tile_mask_y, false);
+      intel_get_tile_masks(depth_mt->tiling, depth_mt->tr_mode,
+                           depth_mt->cpp, false,
+                           &tile_mask_x, &tile_mask_y);
  
        if (intel_miptree_level_has_hiz(depth_mt, depth_level)) {
           uint32_t hiz_tile_mask_x, hiz_tile_mask_y;
-         intel_miptree_get_tile_masks(depth_mt->hiz_mt,
-                                      &hiz_tile_mask_x, &hiz_tile_mask_y,
-                                      false);
+         intel_get_tile_masks(depth_mt->hiz_buf->mt->tiling,
+                              depth_mt->hiz_buf->mt->tr_mode,
+                              depth_mt->hiz_buf->mt->cpp,
+                              false, &hiz_tile_mask_x,
+                              &hiz_tile_mask_y);
  
           /* Each HiZ row represents 2 rows of pixels */
           hiz_tile_mask_y = hiz_tile_mask_y << 1 | 1;
@@ -204,9 +206,11 @@ brw_get_depthstencil_tile_masks(struct intel_mipmap_tree *depth_mt,
           tile_mask_y |= 63;
        } else {
           uint32_t stencil_tile_mask_x, stencil_tile_mask_y;
-         intel_miptree_get_tile_masks(stencil_mt,
-                                      &stencil_tile_mask_x,
-                                      &stencil_tile_mask_y, false);
+         intel_get_tile_masks(stencil_mt->tiling,
+                              stencil_mt->tr_mode,
+                              stencil_mt->cpp,
+                              false, &stencil_tile_mask_x,
+                              &stencil_tile_mask_y);
  
           tile_mask_x |= stencil_tile_mask_x;
           tile_mask_y |= stencil_tile_mask_y;
@@ -587,8 +591,7 @@ brw_emit_depth_stencil_hiz(struct brw_context *brw,
      * non-pipelined state that will need the PIPE_CONTROL workaround.
      */
     if (brw->gen == 6) {
-      intel_emit_post_sync_nonzero_flush(brw);
-      intel_emit_depth_stall_flushes(brw);
+      brw_emit_depth_stall_flushes(brw);
     }
  
     unsigned int len;
@@ -643,7 +646,8 @@ brw_emit_depth_stencil_hiz(struct brw_context *brw,
  
        /* Emit hiz buffer. */
        if (hiz) {
-         struct intel_mipmap_tree *hiz_mt = depth_mt->hiz_mt;
+         assert(depth_mt);
+         struct intel_mipmap_tree *hiz_mt = depth_mt->hiz_buf->mt;
          BEGIN_BATCH(3);
          OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
          OUT_BATCH(hiz_mt->pitch - 1);
@@ -691,9 +695,6 @@ brw_emit_depth_stencil_hiz(struct brw_context *brw,
      *     when HiZ is enabled and the DEPTH_BUFFER_STATE changes.
      */
     if (brw->gen >= 6 || hiz) {
-      if (brw->gen == 6)
-        intel_emit_post_sync_nonzero_flush(brw);
-
        BEGIN_BATCH(2);
        OUT_BATCH(_3DSTATE_CLEAR_PARAMS << 16 |
                 GEN5_DEPTH_CLEAR_VALID |
@@ -706,19 +707,17 @@ brw_emit_depth_stencil_hiz(struct brw_context *brw,
  const struct brw_tracked_state brw_depthbuffer = {
     .dirty = {
        .mesa = _NEW_BUFFERS,
-      .brw = BRW_NEW_BATCH,
-      .cache = 0,
+      .brw = BRW_NEW_BATCH |
+             BRW_NEW_BLORP,
     },
     .emit = brw_emit_depthbuffer,
  };
  
-
-
-/***********************************************************************
+/**
   * Polygon stipple packet
   */
-
-static void upload_polygon_stipple(struct brw_context *brw)
+static void
+upload_polygon_stipple(struct brw_context *brw)
  {
     struct gl_context *ctx = &brw->ctx;
     GLuint i;
@@ -727,9 +726,6 @@ static void upload_polygon_stipple(struct brw_context *brw)
     if (!ctx->Polygon.StippleFlag)
        return;
  
-   if (brw->gen == 6)
-      intel_emit_post_sync_nonzero_flush(brw);
-
     BEGIN_BATCH(33);
     OUT_BATCH(_3DSTATE_POLY_STIPPLE_PATTERN << 16 | (33 - 2));
  
@@ -743,8 +739,7 @@ static void upload_polygon_stipple(struct brw_context *brw)
     if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
        for (i = 0; i < 32; i++)
           OUT_BATCH(ctx->PolygonStipple[31 - i]); /* invert */
-   }
-   else {
+   } else {
        for (i = 0; i < 32; i++)
          OUT_BATCH(ctx->PolygonStipple[i]);
     }
@@ -753,20 +748,18 @@ static void upload_polygon_stipple(struct brw_context *brw)
  
  const struct brw_tracked_state brw_polygon_stipple = {
     .dirty = {
-      .mesa = (_NEW_POLYGONSTIPPLE |
-              _NEW_POLYGON),
+      .mesa = _NEW_POLYGON |
+              _NEW_POLYGONSTIPPLE,
        .brw = BRW_NEW_CONTEXT,
-      .cache = 0
     },
     .emit = upload_polygon_stipple
  };
  
-
-/***********************************************************************
+/**
   * Polygon stipple offset packet
   */
-
-static void upload_polygon_stipple_offset(struct brw_context *brw)
+static void
+upload_polygon_stipple_offset(struct brw_context *brw)
  {
     struct gl_context *ctx = &brw->ctx;
  
@@ -774,9 +767,6 @@ static void upload_polygon_stipple_offset(struct brw_context *brw)
     if (!ctx->Polygon.StippleFlag)
        return;
  
-   if (brw->gen == 6)
-      intel_emit_post_sync_nonzero_flush(brw);
-
     BEGIN_BATCH(2);
     OUT_BATCH(_3DSTATE_POLY_STIPPLE_OFFSET << 16 | (2-2));
  
@@ -789,7 +779,7 @@ static void upload_polygon_stipple_offset(struct brw_context *brw)
      * works just fine, and there's no window system to worry about.
      */
     if (_mesa_is_winsys_fbo(ctx->DrawBuffer))
-      OUT_BATCH((32 - (ctx->DrawBuffer->Height & 31)) & 31);
+      OUT_BATCH((32 - (_mesa_geometric_height(ctx->DrawBuffer) & 31)) & 31);
     else
        OUT_BATCH(0);
     ADVANCE_BATCH();
@@ -797,18 +787,18 @@ static void upload_polygon_stipple_offset(struct brw_context *brw)
  
  const struct brw_tracked_state brw_polygon_stipple_offset = {
     .dirty = {
-      .mesa = (_NEW_BUFFERS |
-              _NEW_POLYGON),
+      .mesa = _NEW_BUFFERS |
+              _NEW_POLYGON,
        .brw = BRW_NEW_CONTEXT,
-      .cache = 0
     },
     .emit = upload_polygon_stipple_offset
  };
  
-/**********************************************************************
+/**
   * AA Line parameters
   */
-static void upload_aa_line_parameters(struct brw_context *brw)
+static void
+upload_aa_line_parameters(struct brw_context *brw)
  {
     struct gl_context *ctx = &brw->ctx;
  
@@ -819,9 +809,6 @@ static void upload_aa_line_parameters(struct brw_context *brw)
     if (brw->gen == 4 && !brw->is_g4x)
        return;
  
-   if (brw->gen == 6)
-      intel_emit_post_sync_nonzero_flush(brw);
-
     BEGIN_BATCH(3);
     OUT_BATCH(_3DSTATE_AA_LINE_PARAMETERS << 16 | (3 - 2));
     /* use legacy aa line coverage computation */
@@ -834,16 +821,15 @@ const struct brw_tracked_state brw_aa_line_parameters = {
     .dirty = {
        .mesa = _NEW_LINE,
        .brw = BRW_NEW_CONTEXT,
-      .cache = 0
     },
     .emit = upload_aa_line_parameters
  };
  
-/***********************************************************************
+/**
   * Line stipple packet
   */
-
-static void upload_line_stipple(struct brw_context *brw)
+static void
+upload_line_stipple(struct brw_context *brw)
  {
     struct gl_context *ctx = &brw->ctx;
     GLfloat tmp;
@@ -852,22 +838,18 @@ static void upload_line_stipple(struct brw_context *brw)
     if (!ctx->Line.StippleFlag)
        return;
  
-   if (brw->gen == 6)
-      intel_emit_post_sync_nonzero_flush(brw);
-
     BEGIN_BATCH(3);
     OUT_BATCH(_3DSTATE_LINE_STIPPLE_PATTERN << 16 | (3 - 2));
     OUT_BATCH(ctx->Line.StipplePattern);
  
     if (brw->gen >= 7) {
        /* in U1.16 */
-      tmp = 1.0 / (GLfloat) ctx->Line.StippleFactor;
+      tmp = 1.0f / ctx->Line.StippleFactor;
        tmpi = tmp * (1<<16);
        OUT_BATCH(tmpi << 15 | ctx->Line.StippleFactor);
-   }
-   else {
+   } else {
        /* in U1.13 */
-      tmp = 1.0 / (GLfloat) ctx->Line.StippleFactor;
+      tmp = 1.0f / ctx->Line.StippleFactor;
        tmpi = tmp * (1<<13);
        OUT_BATCH(tmpi << 16 | ctx->Line.StippleFactor);
     }
@@ -879,40 +861,161 @@ const struct brw_tracked_state brw_line_stipple = {
     .dirty = {
        .mesa = _NEW_LINE,
        .brw = BRW_NEW_CONTEXT,
-      .cache = 0
     },
     .emit = upload_line_stipple
  };
  
-
-/***********************************************************************
- * Misc invariant state packets
- */
-
  void
-brw_upload_invariant_state(struct brw_context *brw)
+brw_emit_select_pipeline(struct brw_context *brw, enum brw_pipeline pipeline)
  {
     const bool is_965 = brw->gen == 4 && !brw->is_g4x;
-
-   /* 3DSTATE_SIP, 3DSTATE_MULTISAMPLE, etc. are nonpipelined. */
-   if (brw->gen == 6)
-      intel_emit_post_sync_nonzero_flush(brw);
-
-   /* Select the 3D pipeline (as opposed to media) */
     const uint32_t _3DSTATE_PIPELINE_SELECT =
        is_965 ? CMD_PIPELINE_SELECT_965 : CMD_PIPELINE_SELECT_GM45;
+
+   if (brw->use_resource_streamer && pipeline != BRW_RENDER_PIPELINE) {
+      /* From "BXML » GT » MI » vol1a GPU Overview » [Instruction]
+       * PIPELINE_SELECT [DevBWR+]":
+       *
+       *   Project: HSW, BDW, CHV, SKL, BXT
+       *
+       *   Hardware Binding Tables are only supported for 3D
+       *   workloads. Resource streamer must be enabled only for 3D
+       *   workloads. Resource streamer must be disabled for Media and GPGPU
+       *   workloads.
+       */
+      BEGIN_BATCH(1);
+      OUT_BATCH(MI_RS_CONTROL | 0);
+      ADVANCE_BATCH();
+
+      gen7_disable_hw_binding_tables(brw);
+
+      /* XXX - Disable gather constant pool too when we start using it. */
+   }
+
+   if (brw->gen >= 8 && brw->gen < 10) {
+      /* From the Broadwell PRM, Volume 2a: Instructions, PIPELINE_SELECT:
+       *
+       *   Software must clear the COLOR_CALC_STATE Valid field in
+       *   3DSTATE_CC_STATE_POINTERS command prior to send a PIPELINE_SELECT
+       *   with Pipeline Select set to GPGPU.
+       *
+       * The internal hardware docs recommend the same workaround for Gen9
+       * hardware too.
+       */
+      if (pipeline == BRW_COMPUTE_PIPELINE) {
+         BEGIN_BATCH(2);
+         OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (2 - 2));
+         OUT_BATCH(0);
+         ADVANCE_BATCH();
+
+         brw->ctx.NewDriverState |= BRW_NEW_CC_STATE;
+      }
+
+   } else if (brw->gen >= 6) {
+      /* From "BXML » GT » MI » vol1a GPU Overview » [Instruction]
+       * PIPELINE_SELECT [DevBWR+]":
+       *
+       *   Project: DEVSNB+
+       *
+       *   Software must ensure all the write caches are flushed through a
+       *   stalling PIPE_CONTROL command followed by another PIPE_CONTROL
+       *   command to invalidate read only caches prior to programming
+       *   MI_PIPELINE_SELECT command to change the Pipeline Select Mode.
+       */
+      const unsigned dc_flush =
+         brw->gen >= 7 ? PIPE_CONTROL_DATA_CACHE_FLUSH : 0;
+
+      brw_emit_pipe_control_flush(brw,
+                                  PIPE_CONTROL_RENDER_TARGET_FLUSH |
+                                  PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+                                  dc_flush |
+                                  PIPE_CONTROL_NO_WRITE |
+                                  PIPE_CONTROL_CS_STALL);
+
+      brw_emit_pipe_control_flush(brw,
+                                  PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
+                                  PIPE_CONTROL_CONST_CACHE_INVALIDATE |
+                                  PIPE_CONTROL_STATE_CACHE_INVALIDATE |
+                                  PIPE_CONTROL_INSTRUCTION_INVALIDATE |
+                                  PIPE_CONTROL_NO_WRITE);
+
+   } else {
+      /* From "BXML » GT » MI » vol1a GPU Overview » [Instruction]
+       * PIPELINE_SELECT [DevBWR+]":
+       *
+       *   Project: PRE-DEVSNB
+       *
+       *   Software must ensure the current pipeline is flushed via an
+       *   MI_FLUSH or PIPE_CONTROL prior to the execution of PIPELINE_SELECT.
+       */
+      BEGIN_BATCH(1);
+      OUT_BATCH(MI_FLUSH);
+      ADVANCE_BATCH();
+   }
+
+   /* Select the pipeline */
     BEGIN_BATCH(1);
-   OUT_BATCH(_3DSTATE_PIPELINE_SELECT << 16 | 0);
+   OUT_BATCH(_3DSTATE_PIPELINE_SELECT << 16 |
+             (brw->gen >= 9 ? (3 << 8) : 0) |
+             (pipeline == BRW_COMPUTE_PIPELINE ? 2 : 0));
     ADVANCE_BATCH();
  
-   if (brw->gen < 6) {
-      /* Disable depth offset clamping. */
-      BEGIN_BATCH(2);
-      OUT_BATCH(_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP << 16 | (2 - 2));
-      OUT_BATCH_F(0.0);
+   if (brw->gen == 7 && !brw->is_haswell &&
+       pipeline == BRW_RENDER_PIPELINE) {
+      /* From "BXML » GT » MI » vol1a GPU Overview » [Instruction]
+       * PIPELINE_SELECT [DevBWR+]":
+       *
+       *   Project: DEVIVB, DEVHSW:GT3:A0
+       *
+       *   Software must send a pipe_control with a CS stall and a post sync
+       *   operation and then a dummy DRAW after every MI_SET_CONTEXT and
+       *   after any PIPELINE_SELECT that is enabling 3D mode.
+       */
+      gen7_emit_cs_stall_flush(brw);
+
+      BEGIN_BATCH(7);
+      OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2));
+      OUT_BATCH(_3DPRIM_POINTLIST);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
        ADVANCE_BATCH();
     }
  
+   if (brw->use_resource_streamer && pipeline == BRW_RENDER_PIPELINE) {
+      /* From "BXML » GT » MI » vol1a GPU Overview » [Instruction]
+       * PIPELINE_SELECT [DevBWR+]":
+       *
+       *   Project: HSW, BDW, CHV, SKL, BXT
+       *
+       *   Hardware Binding Tables are only supported for 3D
+       *   workloads. Resource streamer must be enabled only for 3D
+       *   workloads. Resource streamer must be disabled for Media and GPGPU
+       *   workloads.
+       */
+      BEGIN_BATCH(1);
+      OUT_BATCH(MI_RS_CONTROL | 1);
+      ADVANCE_BATCH();
+
+      gen7_enable_hw_binding_tables(brw);
+
+      /* XXX - Re-enable gather constant pool here. */
+   }
+}
+
+/**
+ * Misc invariant state packets
+ */
+void
+brw_upload_invariant_state(struct brw_context *brw)
+{
+   const bool is_965 = brw->gen == 4 && !brw->is_g4x;
+
+   brw_emit_select_pipeline(brw, BRW_RENDER_PIPELINE);
+   brw->last_pipeline = BRW_RENDER_PIPELINE;
+
     if (brw->gen >= 8) {
        BEGIN_BATCH(3);
        OUT_BATCH(CMD_STATE_SIP << 16 | (3 - 2));
@@ -929,16 +1032,15 @@ brw_upload_invariant_state(struct brw_context *brw)
     const uint32_t _3DSTATE_VF_STATISTICS =
        is_965 ? GEN4_3DSTATE_VF_STATISTICS : GM45_3DSTATE_VF_STATISTICS;
     BEGIN_BATCH(1);
-   OUT_BATCH(_3DSTATE_VF_STATISTICS << 16 |
-            (unlikely(INTEL_DEBUG & DEBUG_STATS) ? 1 : 0));
+   OUT_BATCH(_3DSTATE_VF_STATISTICS << 16 | 1);
     ADVANCE_BATCH();
  }
  
  const struct brw_tracked_state brw_invariant_state = {
     .dirty = {
        .mesa = 0,
-      .brw = BRW_NEW_CONTEXT,
-      .cache = 0
+      .brw = BRW_NEW_BLORP |
+             BRW_NEW_CONTEXT,
     },
     .emit = brw_upload_invariant_state
  };
@@ -953,8 +1055,12 @@ const struct brw_tracked_state brw_invariant_state = {
   * surface state objects, but not the surfaces that the surface state
   * objects point to.
   */
-static void upload_state_base_address( struct brw_context *brw )
+void
+brw_upload_state_base_address(struct brw_context *brw)
  {
+   if (brw->batch.state_base_address_emitted)
+      return;
+
     /* FINISHME: According to section 3.6.1 "STATE_BASE_ADDRESS" of
      * vol1a of the G45 PRM, MI_FLUSH with the ISC invalidate should be
      * programmed prior to STATE_BASE_ADDRESS.
@@ -964,11 +1070,46 @@ static void upload_state_base_address( struct brw_context *brw )
      * maybe this isn't required for us in particular.
      */
  
-   if (brw->gen >= 6) {
-      uint8_t mocs = brw->gen == 7 ? GEN7_MOCS_L3 : 0;
+   if (brw->gen >= 8) {
+      uint32_t mocs_wb = brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB;
+      int pkt_len = brw->gen >= 9 ? 19 : 16;
  
-      if (brw->gen == 6)
-        intel_emit_post_sync_nonzero_flush(brw);
+      BEGIN_BATCH(pkt_len);
+      OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (pkt_len - 2));
+      /* General state base address: stateless DP read/write requests */
+      OUT_BATCH(mocs_wb << 4 | 1);
+      OUT_BATCH(0);
+      OUT_BATCH(mocs_wb << 16);
+      /* Surface state base address: */
+      OUT_RELOC64(brw->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
+                  mocs_wb << 4 | 1);
+      /* Dynamic state base address: */
+      OUT_RELOC64(brw->batch.bo,
+                  I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION, 0,
+                  mocs_wb << 4 | 1);
+      /* Indirect object base address: MEDIA_OBJECT data */
+      OUT_BATCH(mocs_wb << 4 | 1);
+      OUT_BATCH(0);
+      /* Instruction base address: shader kernels (incl. SIP) */
+      OUT_RELOC64(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+                  mocs_wb << 4 | 1);
+
+      /* General state buffer size */
+      OUT_BATCH(0xfffff001);
+      /* Dynamic state buffer size */
+      OUT_BATCH(ALIGN(brw->batch.bo->size, 4096) | 1);
+      /* Indirect object upper bound */
+      OUT_BATCH(0xfffff001);
+      /* Instruction access upper bound */
+      OUT_BATCH(ALIGN(brw->cache.bo->size, 4096) | 1);
+      if (brw->gen >= 9) {
+         OUT_BATCH(1);
+         OUT_BATCH(0);
+         OUT_BATCH(0);
+      }
+      ADVANCE_BATCH();
+   } else if (brw->gen >= 6) {
+      uint8_t mocs = brw->gen == 7 ? GEN7_MOCS_L3 : 0;
  
         BEGIN_BATCH(10);
         OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2));
@@ -1054,15 +1195,6 @@ static void upload_state_base_address( struct brw_context *brw )
      * obvious.
      */
  
-   brw->state.dirty.brw |= BRW_NEW_STATE_BASE_ADDRESS;
+   brw->ctx.NewDriverState |= BRW_NEW_STATE_BASE_ADDRESS;
+   brw->batch.state_base_address_emitted = true;
  }
-
-const struct brw_tracked_state brw_state_base_address = {
-   .dirty = {
-      .mesa = 0,
-      .brw = (BRW_NEW_BATCH |
-             BRW_NEW_PROGRAM_CACHE),
-      .cache = 0,
-   },
-   .emit = upload_state_base_address
-};