i965/blorp: Make post draw flush more explicit
[mesa.git] / src / mesa / drivers / dri / i965 / genX_blorp_exec.c
index 87318272cfa63e3fb59596faa5484e43d1cc46e0..647a362afe04325863d99066d60282f07b8ae95d 100644 (file)
 
 #include "intel_batchbuffer.h"
 #include "intel_mipmap_tree.h"
+#include "intel_fbo.h"
 
 #include "brw_context.h"
 #include "brw_state.h"
 
-#include "blorp_priv.h"
+#include "blorp/blorp_genX_exec.h"
 
-#include "genxml/gen_macros.h"
+#include "brw_blorp.h"
 
 static void *
-blorp_emit_dwords(struct brw_context *brw, unsigned n)
+blorp_emit_dwords(struct blorp_batch *batch, unsigned n)
 {
+   assert(batch->blorp->driver_ctx == batch->driver_batch);
+   struct brw_context *brw = batch->driver_batch;
+
    intel_batchbuffer_begin(brw, n, RENDER_RING);
    uint32_t *map = brw->batch.map_next;
    brw->batch.map_next += n;
@@ -43,422 +47,218 @@ blorp_emit_dwords(struct brw_context *brw, unsigned n)
    return map;
 }
 
-struct blorp_address {
-   drm_intel_bo *buffer;
-   uint32_t read_domains;
-   uint32_t write_domain;
-   uint32_t offset;
-};
-
 static uint64_t
-blorp_emit_reloc(struct brw_context *brw, void *location,
-                 struct blorp_address address, uint32_t delta)
+blorp_emit_reloc(struct blorp_batch *batch,
+                 void *location, struct blorp_address address, uint32_t delta)
 {
+   assert(batch->blorp->driver_ctx == batch->driver_batch);
+   struct brw_context *brw = batch->driver_batch;
+
    uint32_t offset = (char *)location - (char *)brw->batch.map;
    if (brw->gen >= 8) {
-      return intel_batchbuffer_reloc64(brw, address.buffer, offset,
+      return intel_batchbuffer_reloc64(&brw->batch, address.buffer, offset,
                                        address.read_domains,
                                        address.write_domain,
                                        address.offset + delta);
    } else {
-      return intel_batchbuffer_reloc(brw, address.buffer, offset,
+      return intel_batchbuffer_reloc(&brw->batch, address.buffer, offset,
                                      address.read_domains,
                                      address.write_domain,
                                      address.offset + delta);
    }
 }
 
-#define __gen_address_type struct blorp_address
-#define __gen_user_data struct brw_context
-
-static uint64_t
-__gen_combine_address(struct brw_context *brw, void *location,
-                      struct blorp_address address, uint32_t delta)
+static void
+blorp_surface_reloc(struct blorp_batch *batch, uint32_t ss_offset,
+                    struct blorp_address address, uint32_t delta)
 {
-   if (address.buffer == NULL) {
-      return address.offset + delta;
-   } else {
-      return blorp_emit_reloc(brw, location, address, delta);
-   }
+   assert(batch->blorp->driver_ctx == batch->driver_batch);
+   struct brw_context *brw = batch->driver_batch;
+   drm_intel_bo *bo = address.buffer;
+
+   drm_intel_bo_emit_reloc(brw->batch.bo, ss_offset,
+                           bo, address.offset + delta,
+                           address.read_domains, address.write_domain);
+
+   uint64_t reloc_val = bo->offset64 + address.offset + delta;
+   void *reloc_ptr = (void *)brw->batch.map + ss_offset;
+#if GEN_GEN >= 8
+   *(uint64_t *)reloc_ptr = reloc_val;
+#else
+   *(uint32_t *)reloc_ptr = reloc_val;
+#endif
 }
 
-#include "genxml/genX_pack.h"
-
-#define _blorp_cmd_length(cmd) cmd ## _length
-#define _blorp_cmd_header(cmd) cmd ## _header
-#define _blorp_cmd_pack(cmd) cmd ## _pack
-
-#define blorp_emit(brw, cmd, name)                                \
-   for (struct cmd name = { _blorp_cmd_header(cmd) },             \
-        *_dst = blorp_emit_dwords(brw, _blorp_cmd_length(cmd));   \
-        __builtin_expect(_dst != NULL, 1);                        \
-        _blorp_cmd_pack(cmd)(brw, (void *)_dst, &name),           \
-        _dst = NULL)
-
-static void
-blorp_emit_sf_config(struct brw_context *brw,
-                     const struct brw_blorp_params *params)
+static void *
+blorp_alloc_dynamic_state(struct blorp_batch *batch,
+                          enum aub_state_struct_type type,
+                          uint32_t size,
+                          uint32_t alignment,
+                          uint32_t *offset)
 {
-   const struct brw_blorp_prog_data *prog_data = params->wm_prog_data;
-
-   /* 3DSTATE_SF
-    *
-    * Disable ViewportTransformEnable (dw2.1)
-    *
-    * From the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D
-    * Primitives Overview":
-    *     RECTLIST: Viewport Mapping must be DISABLED (as is typical with the
-    *     use of screen- space coordinates).
-    *
-    * A solid rectangle must be rendered, so set FrontFaceFillMode (dw2.4:3)
-    * and BackFaceFillMode (dw2.5:6) to SOLID(0).
-    *
-    * From the Sandy Bridge PRM, Volume 2, Part 1, Section
-    * 6.4.1.1 3DSTATE_SF, Field FrontFaceFillMode:
-    *     SOLID: Any triangle or rectangle object found to be front-facing
-    *     is rendered as a solid object. This setting is required when
-    *     (rendering rectangle (RECTLIST) objects.
-    */
-   blorp_emit(brw, GENX(3DSTATE_SF), sf) {
-      sf.FrontFaceFillMode = FILL_MODE_SOLID;
-      sf.BackFaceFillMode = FILL_MODE_SOLID;
-
-      sf.MultisampleRasterizationMode = params->dst.surf.samples > 1 ?
-         MSRASTMODE_ON_PATTERN : MSRASTMODE_OFF_PIXEL;
-
-      sf.VertexURBEntryReadOffset = BRW_SF_URB_ENTRY_READ_OFFSET;
-      if (prog_data) {
-         sf.NumberofSFOutputAttributes = prog_data->num_varying_inputs;
-         sf.VertexURBEntryReadLength = brw_blorp_get_urb_length(prog_data);
-         sf.ConstantInterpolationEnable = prog_data->flat_inputs;
-      } else {
-         sf.NumberofSFOutputAttributes = 0;
-         sf.VertexURBEntryReadLength = 1;
-      }
-   }
+   assert(batch->blorp->driver_ctx == batch->driver_batch);
+   struct brw_context *brw = batch->driver_batch;
+
+   return brw_state_batch(brw, type, size, alignment, offset);
 }
 
 static void
-blorp_emit_wm_config(struct brw_context *brw,
-                     const struct brw_blorp_params *params)
+blorp_alloc_binding_table(struct blorp_batch *batch, unsigned num_entries,
+                          unsigned state_size, unsigned state_alignment,
+                          uint32_t *bt_offset, uint32_t *surface_offsets,
+                          void **surface_maps)
 {
-   const struct brw_blorp_prog_data *prog_data = params->wm_prog_data;
-
-   /* Even when thread dispatch is disabled, max threads (dw5.25:31) must be
-    * nonzero to prevent the GPU from hanging.  While the documentation doesn't
-    * mention this explicitly, it notes that the valid range for the field is
-    * [1,39] = [2,40] threads, which excludes zero.
-    *
-    * To be safe (and to minimize extraneous code) we go ahead and fully
-    * configure the WM state whether or not there is a WM program.
-    */
-   blorp_emit(brw, GENX(3DSTATE_WM), wm) {
-      wm.MaximumNumberofThreads = brw->max_wm_threads - 1;
-
-      switch (params->hiz_op) {
-      case GEN6_HIZ_OP_DEPTH_CLEAR:
-         wm.DepthBufferClear = true;
-         break;
-      case GEN6_HIZ_OP_DEPTH_RESOLVE:
-         wm.DepthBufferResolveEnable = true;
-         break;
-      case GEN6_HIZ_OP_HIZ_RESOLVE:
-         wm.HierarchicalDepthBufferResolveEnable = true;
-         break;
-      case GEN6_HIZ_OP_NONE:
-         break;
-      default:
-         unreachable("not reached");
-      }
-
-      if (prog_data) {
-         wm.ThreadDispatchEnable = true;
-
-         wm.DispatchGRFStartRegisterforConstantSetupData0 =
-            prog_data->first_curbe_grf_0;
-         wm.DispatchGRFStartRegisterforConstantSetupData2 =
-            prog_data->first_curbe_grf_2;
-
-         wm.KernelStartPointer0 = params->wm_prog_kernel;
-         wm.KernelStartPointer2 =
-            params->wm_prog_kernel + prog_data->ksp_offset_2;
-
-         wm._8PixelDispatchEnable = prog_data->dispatch_8;
-         wm._16PixelDispatchEnable = prog_data->dispatch_16;
-
-         wm.NumberofSFOutputAttributes = prog_data->num_varying_inputs;
-      }
-
-      if (params->src.bo) {
-         wm.SamplerCount = 1; /* Up to 4 samplers */
-         wm.PixelShaderKillPixel = true; /* TODO: temporarily smash on */
-      }
-
-      if (params->dst.surf.samples > 1) {
-         wm.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN;
-         wm.MultisampleDispatchMode =
-            (prog_data && prog_data->persample_msaa_dispatch) ?
-            MSDISPMODE_PERSAMPLE : MSDISPMODE_PERPIXEL;
-      } else {
-         wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL;
-         wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
-      }
+   assert(batch->blorp->driver_ctx == batch->driver_batch);
+   struct brw_context *brw = batch->driver_batch;
+
+   uint32_t *bt_map = brw_state_batch(brw, AUB_TRACE_BINDING_TABLE,
+                                      num_entries * sizeof(uint32_t), 32,
+                                      bt_offset);
+
+   for (unsigned i = 0; i < num_entries; i++) {
+      surface_maps[i] = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
+                                        state_size, state_alignment,
+                                        &(surface_offsets)[i]);
+      bt_map[i] = surface_offsets[i];
    }
 }
 
-
-static void
-blorp_emit_depth_stencil_config(struct brw_context *brw,
-                                const struct brw_blorp_params *params)
+static void *
+blorp_alloc_vertex_buffer(struct blorp_batch *batch, uint32_t size,
+                          struct blorp_address *addr)
 {
-   brw_emit_depth_stall_flushes(brw);
-
-   blorp_emit(brw, GENX(3DSTATE_DEPTH_BUFFER), db) {
-      switch (params->depth.surf.dim) {
-      case ISL_SURF_DIM_1D:
-         db.SurfaceType = SURFTYPE_1D;
-         break;
-      case ISL_SURF_DIM_2D:
-         db.SurfaceType = SURFTYPE_2D;
-         break;
-      case ISL_SURF_DIM_3D:
-         db.SurfaceType = SURFTYPE_3D;
-         break;
-      }
-
-      db.SurfaceFormat = params->depth_format;
-
-      db.TiledSurface = true;
-      db.TileWalk = TILEWALK_YMAJOR;
-      db.MIPMapLayoutMode = MIPLAYOUT_BELOW;
-
-      db.HierarchicalDepthBufferEnable = true;
-      db.SeparateStencilBufferEnable = true;
-
-      db.Width = params->depth.surf.logical_level0_px.width - 1;
-      db.Height = params->depth.surf.logical_level0_px.height - 1;
-      db.RenderTargetViewExtent = db.Depth =
-         MAX2(params->depth.surf.logical_level0_px.depth,
-              params->depth.surf.logical_level0_px.array_len) - 1;
-
-      db.LOD = params->depth.view.base_level;
-      db.MinimumArrayElement = params->depth.view.base_array_layer;
+   assert(batch->blorp->driver_ctx == batch->driver_batch);
+   struct brw_context *brw = batch->driver_batch;
 
-      db.SurfacePitch = params->depth.surf.row_pitch - 1;
-      db.SurfaceBaseAddress = (struct blorp_address) {
-         .buffer = params->depth.bo,
-         .read_domains = I915_GEM_DOMAIN_RENDER,
-         .write_domain = I915_GEM_DOMAIN_RENDER,
-         .offset = params->depth.offset,
-      };
-   }
+   uint32_t offset;
+   void *data = brw_state_batch(brw, AUB_TRACE_VERTEX_BUFFER,
+                                size, 32, &offset);
 
-   blorp_emit(brw, GENX(3DSTATE_HIER_DEPTH_BUFFER), hiz) {
-      hiz.SurfacePitch = params->depth.aux_surf.row_pitch - 1;
-      hiz.SurfaceBaseAddress = (struct blorp_address) {
-         .buffer = params->depth.aux_bo,
-         .read_domains = I915_GEM_DOMAIN_RENDER,
-         .write_domain = I915_GEM_DOMAIN_RENDER,
-         .offset = params->depth.aux_offset,
-      };
-   }
+   *addr = (struct blorp_address) {
+      .buffer = brw->batch.bo,
+      .read_domains = I915_GEM_DOMAIN_VERTEX,
+      .write_domain = 0,
+      .offset = offset,
+   };
 
-   blorp_emit(brw, GENX(3DSTATE_STENCIL_BUFFER), sb);
+   return data;
 }
 
-
-/* 3DSTATE_VIEWPORT_STATE_POINTERS */
 static void
-blorp_emit_viewport_state(struct brw_context *brw,
-                          const struct brw_blorp_params *params)
+blorp_emit_urb_config(struct blorp_batch *batch, unsigned vs_entry_size)
 {
-   uint32_t cc_vp_offset;
+   assert(batch->blorp->driver_ctx == batch->driver_batch);
+   struct brw_context *brw = batch->driver_batch;
 
-   void *state = brw_state_batch(brw, AUB_TRACE_CC_VP_STATE,
-                                 GENX(CC_VIEWPORT_length) * 4, 32,
-                                 &cc_vp_offset);
+#if GEN_GEN >= 7
+   if (!(brw->ctx.NewDriverState & (BRW_NEW_CONTEXT | BRW_NEW_URB_SIZE)) &&
+       brw->urb.vsize >= vs_entry_size)
+      return;
 
-   GENX(CC_VIEWPORT_pack)(brw, state,
-      &(struct GENX(CC_VIEWPORT)) {
-         .MinimumDepth = 0.0,
-         .MaximumDepth = 1.0,
-      });
+   brw->ctx.NewDriverState |= BRW_NEW_URB_SIZE;
 
-   blorp_emit(brw, GENX(3DSTATE_VIEWPORT_STATE_POINTERS), vsp) {
-      vsp.CCViewportStateChange = true;
-      vsp.PointertoCC_VIEWPORT = cc_vp_offset;
-   }
+   gen7_upload_urb(brw, vs_entry_size, false, false);
+#else
+   gen6_upload_urb(brw, vs_entry_size, false, 0);
+#endif
 }
 
-
-/**
- * \brief Execute a blit or render pass operation.
- *
- * To execute the operation, this function manually constructs and emits a
- * batch to draw a rectangle primitive. The batchbuffer is flushed before
- * constructing and after emitting the batch.
- *
- * This function alters no GL state.
- */
 void
-genX(blorp_exec)(struct brw_context *brw,
-                 const struct brw_blorp_params *params)
+genX(blorp_exec)(struct blorp_batch *batch,
+                 const struct blorp_params *params)
 {
-   uint32_t cc_blend_state_offset = 0;
-   uint32_t cc_state_offset = 0;
-   uint32_t depthstencil_offset;
-   uint32_t wm_bind_bo_offset = 0;
+   assert(batch->blorp->driver_ctx == batch->driver_batch);
+   struct brw_context *brw = batch->driver_batch;
+   struct gl_context *ctx = &brw->ctx;
+   const uint32_t estimated_max_batch_usage = GEN_GEN >= 8 ? 1800 : 1500;
+   bool check_aperture_failed_once = false;
+
+   /* Flush the sampler and render caches.  We definitely need to flush the
+    * sampler cache so that we get updated contents from the render cache for
+    * the glBlitFramebuffer() source.  Also, we are sometimes warned in the
+    * docs to flush the cache between reinterpretations of the same surface
+    * data with different formats, which blorp does for stencil and depth
+    * data.
+    */
+   if (params->src.enabled)
+      brw_render_cache_set_check_flush(brw, params->src.addr.buffer);
+   brw_render_cache_set_check_flush(brw, params->dst.addr.buffer);
+
+   brw_select_pipeline(brw, BRW_RENDER_PIPELINE);
 
+retry:
+   intel_batchbuffer_require_space(brw, estimated_max_batch_usage, RENDER_RING);
+   intel_batchbuffer_save_state(brw);
+   drm_intel_bo *saved_bo = brw->batch.bo;
+   uint32_t saved_used = USED_BATCH(brw->batch);
+   uint32_t saved_state_batch_offset = brw->batch.state_batch_offset;
+
+#if GEN_GEN == 6
    /* Emit workaround flushes when we switch from drawing to blorping. */
    brw_emit_post_sync_nonzero_flush(brw);
+#endif
 
    brw_upload_state_base_address(brw);
 
-   gen6_blorp_emit_vertices(brw, params);
-
-   /* 3DSTATE_URB
-    *
-    * Assign the entire URB to the VS. Even though the VS disabled, URB space
-    * is still needed because the clipper loads the VUE's from the URB. From
-    * the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE,
-    * Dword 1.15:0 "VS Number of URB Entries":
-    *     This field is always used (even if VS Function Enable is DISABLED).
-    *
-    * The warning below appears in the PRM (Section 3DSTATE_URB), but we can
-    * safely ignore it because this batch contains only one draw call.
-    *     Because of URB corruption caused by allocating a previous GS unit
-    *     URB entry to the VS unit, software is required to send a “GS NULL
-    *     Fence” (Send URB fence with VS URB size == 1 and GS URB size == 0)
-    *     plus a dummy DRAW call before any case where VS will be taking over
-    *     GS URB space.
-    */
-   blorp_emit(brw, GENX(3DSTATE_URB), urb) {
-      urb.VSNumberofURBEntries = brw->urb.max_vs_entries;
-   }
+#if GEN_GEN >= 8
+   gen7_l3_state.emit(brw);
+#endif
 
-   if (params->wm_prog_data) {
-      cc_blend_state_offset = gen6_blorp_emit_blend_state(brw, params);
-      cc_state_offset = gen6_blorp_emit_cc_state(brw);
-   }
-   depthstencil_offset = gen6_blorp_emit_depth_stencil_state(brw, params);
-
-   /* 3DSTATE_CC_STATE_POINTERS
-    *
-    * The pointer offsets are relative to
-    * CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress.
-    *
-    * The HiZ op doesn't use BLEND_STATE or COLOR_CALC_STATE.
-    */
-   blorp_emit(brw, GENX(3DSTATE_CC_STATE_POINTERS), cc) {
-      cc.BLEND_STATEChange = true;
-      cc.COLOR_CALC_STATEChange = true;
-      cc.DEPTH_STENCIL_STATEChange = true;
-      cc.PointertoBLEND_STATE = cc_blend_state_offset;
-      cc.PointertoCOLOR_CALC_STATE = cc_state_offset;
-      cc.PointertoDEPTH_STENCIL_STATE = depthstencil_offset;
-   }
+   if (brw->use_resource_streamer)
+      gen7_disable_hw_binding_tables(brw);
 
-   blorp_emit(brw, GENX(3DSTATE_CONSTANT_VS), vs);
-   blorp_emit(brw, GENX(3DSTATE_CONSTANT_GS), gs);
-   blorp_emit(brw, GENX(3DSTATE_CONSTANT_PS), ps);
-
-   if (params->wm_prog_data) {
-      uint32_t wm_surf_offset_renderbuffer;
-      uint32_t wm_surf_offset_texture = 0;
-
-      wm_surf_offset_renderbuffer =
-         brw_blorp_emit_surface_state(brw, &params->dst,
-                                      I915_GEM_DOMAIN_RENDER,
-                                      I915_GEM_DOMAIN_RENDER, true);
-      if (params->src.bo) {
-         wm_surf_offset_texture =
-            brw_blorp_emit_surface_state(brw, &params->src,
-                                         I915_GEM_DOMAIN_SAMPLER, 0, false);
-      }
-      wm_bind_bo_offset =
-         gen6_blorp_emit_binding_table(brw,
-                                       wm_surf_offset_renderbuffer,
-                                       wm_surf_offset_texture);
-
-      blorp_emit(brw, GENX(3DSTATE_BINDING_TABLE_POINTERS), bt) {
-         bt.PSBindingTableChange = true;
-         bt.PointertoPSBindingTable = wm_bind_bo_offset;
-      }
-   }
+   brw_emit_depth_stall_flushes(brw);
 
-   if (params->src.bo) {
-      const uint32_t sampler_offset =
-         gen6_blorp_emit_sampler_state(brw, MAPFILTER_LINEAR, 0, true);
+#if GEN_GEN == 8
+   gen8_write_pma_stall_bits(brw, 0);
+#endif
 
-      blorp_emit(brw, GENX(3DSTATE_SAMPLER_STATE_POINTERS), ssp) {
-         ssp.VSSamplerStateChange = true;
-         ssp.GSSamplerStateChange = true;
-         ssp.PSSamplerStateChange = true;
-         ssp.PointertoPSSamplerState = sampler_offset;
-      }
+   blorp_emit(batch, GENX(3DSTATE_DRAWING_RECTANGLE), rect) {
+      rect.ClippedDrawingRectangleXMax = MAX2(params->x1, params->x0) - 1;
+      rect.ClippedDrawingRectangleYMax = MAX2(params->y1, params->y0) - 1;
    }
 
-   gen6_emit_3dstate_multisample(brw, params->dst.surf.samples);
+   blorp_exec(batch, params);
 
-   blorp_emit(brw, GENX(3DSTATE_SAMPLE_MASK), mask) {
-      mask.SampleMask = (1 << params->dst.surf.samples) - 1;
-   }
-
-   /* From the BSpec, 3D Pipeline > Geometry > Vertex Shader > State,
-    * 3DSTATE_VS, Dword 5.0 "VS Function Enable":
-    *
-    *   [DevSNB] A pipeline flush must be programmed prior to a
-    *   3DSTATE_VS command that causes the VS Function Enable to
-    *   toggle. Pipeline flush can be executed by sending a PIPE_CONTROL
-    *   command with CS stall bit set and a post sync operation.
-    *
-    * We've already done one at the start of the BLORP operation.
+   /* Make sure we didn't wrap the batch unintentionally, and make sure we
+    * reserved enough space that a wrap will never happen.
     */
-   blorp_emit(brw, GENX(3DSTATE_VS), vs);
-   blorp_emit(brw, GENX(3DSTATE_GS), gs);
-
-   blorp_emit(brw, GENX(3DSTATE_CLIP), clip) {
-      clip.PerspectiveDivideDisable = true;
-   }
-
-   blorp_emit_sf_config(brw, params);
-   blorp_emit_wm_config(brw, params);
-
-   blorp_emit_viewport_state(brw, params);
-
-   if (params->depth.bo) {
-      blorp_emit_depth_stencil_config(brw, params);
-   } else {
-      brw_emit_depth_stall_flushes(brw);
-
-      blorp_emit(brw, GENX(3DSTATE_DEPTH_BUFFER), db) {
-         db.SurfaceType = SURFTYPE_NULL;
-         db.SurfaceFormat = D32_FLOAT;
+   assert(brw->batch.bo == saved_bo);
+   assert((USED_BATCH(brw->batch) - saved_used) * 4 +
+          (saved_state_batch_offset - brw->batch.state_batch_offset) <
+          estimated_max_batch_usage);
+   /* Shut up compiler warnings on release build */
+   (void)saved_bo;
+   (void)saved_used;
+   (void)saved_state_batch_offset;
+
+   /* Check if the blorp op we just did would make our batch likely to fail to
+    * map all the BOs into the GPU at batch exec time later.  If so, flush the
+    * batch and try again with nothing else in the batch.
+    */
+   if (dri_bufmgr_check_aperture_space(&brw->batch.bo, 1)) {
+      if (!check_aperture_failed_once) {
+         check_aperture_failed_once = true;
+         intel_batchbuffer_reset_to_saved(brw);
+         intel_batchbuffer_flush(brw);
+         goto retry;
+      } else {
+         int ret = intel_batchbuffer_flush(brw);
+         WARN_ONCE(ret == -ENOSPC,
+                   "i965: blorp emit exceeded available aperture space\n");
       }
-      blorp_emit(brw, GENX(3DSTATE_HIER_DEPTH_BUFFER), hiz);
-      blorp_emit(brw, GENX(3DSTATE_STENCIL_BUFFER), sb);
    }
 
-   /* 3DSTATE_CLEAR_PARAMS
-    *
-    * From the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE_CLEAR_PARAMS:
-    *   [DevSNB] 3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE
-    *   packet when HiZ is enabled and the DEPTH_BUFFER_STATE changes.
-    */
-   blorp_emit(brw, GENX(3DSTATE_CLEAR_PARAMS), clear) {
-      clear.DepthClearValueValid = true;
-      clear.DepthClearValue = params->depth.clear_color.u32[0];
-   }
+   if (unlikely(brw->always_flush_batch))
+      intel_batchbuffer_flush(brw);
 
-   blorp_emit(brw, GENX(3DSTATE_DRAWING_RECTANGLE), rect) {
-      rect.ClippedDrawingRectangleXMax = MAX2(params->x1, params->x0) - 1;
-      rect.ClippedDrawingRectangleYMax = MAX2(params->y1, params->y0) - 1;
-   }
+   /* We've smashed all state compared to what the normal 3D pipeline
+    * rendering tracks for GL.
+    */
+   brw->ctx.NewDriverState |= BRW_NEW_BLORP;
+   brw->no_depth_or_stencil = false;
+   brw->ib.type = -1;
 
-   blorp_emit(brw, GENX(3DPRIMITIVE), prim) {
-      prim.VertexAccessType = SEQUENTIAL;
-      prim.PrimitiveTopologyType = _3DPRIM_RECTLIST;
-      prim.VertexCountPerInstance = 3;
-      prim.InstanceCount = params->num_layers;
-   }
+   if (params->dst.enabled)
+      brw_render_cache_set_add_bo(brw, params->dst.addr.buffer);
 }