#include "intel_batchbuffer.h"
#include "intel_mipmap_tree.h"
+#include "intel_fbo.h"
#include "brw_context.h"
#include "brw_state.h"
-#include "blorp_priv.h"
+#include "blorp/blorp_genX_exec.h"
-#include "genxml/gen_macros.h"
+#if GEN_GEN <= 5
+#include "gen4_blorp_exec.h"
+#endif
+
+#include "brw_blorp.h"
static void *
-blorp_emit_dwords(struct brw_context *brw, unsigned n)
+blorp_emit_dwords(struct blorp_batch *batch, unsigned n)
{
- intel_batchbuffer_begin(brw, n, RENDER_RING);
+ assert(batch->blorp->driver_ctx == batch->driver_batch);
+ struct brw_context *brw = batch->driver_batch;
+
+ intel_batchbuffer_begin(brw, n);
uint32_t *map = brw->batch.map_next;
brw->batch.map_next += n;
intel_batchbuffer_advance(brw);
return map;
}
-struct blorp_address {
- drm_intel_bo *buffer;
- uint32_t read_domains;
- uint32_t write_domain;
- uint32_t offset;
-};
-
static uint64_t
-blorp_emit_reloc(struct brw_context *brw, void *location,
- struct blorp_address address, uint32_t delta)
+blorp_emit_reloc(struct blorp_batch *batch,
+ void *location, struct blorp_address address, uint32_t delta)
{
- uint32_t offset = (char *)location - (char *)brw->batch.map;
- if (brw->gen >= 8) {
- return intel_batchbuffer_reloc64(brw, address.buffer, offset,
- address.read_domains,
- address.write_domain,
- address.offset + delta);
- } else {
- return intel_batchbuffer_reloc(brw, address.buffer, offset,
- address.read_domains,
- address.write_domain,
- address.offset + delta);
+ assert(batch->blorp->driver_ctx == batch->driver_batch);
+ struct brw_context *brw = batch->driver_batch;
+ uint32_t offset;
+
+ if (GEN_GEN < 6 && brw_ptr_in_state_buffer(&brw->batch, location)) {
+ offset = (char *)location - (char *)brw->batch.state.map;
+ return brw_state_reloc(&brw->batch, offset,
+ address.buffer, address.offset + delta,
+ address.reloc_flags);
}
+
+ assert(!brw_ptr_in_state_buffer(&brw->batch, location));
+
+ offset = (char *)location - (char *)brw->batch.batch.map;
+ return brw_batch_reloc(&brw->batch, offset,
+ address.buffer, address.offset + delta,
+ address.reloc_flags);
}
-#define __gen_address_type struct blorp_address
-#define __gen_user_data struct brw_context
+static void
+blorp_surface_reloc(struct blorp_batch *batch, uint32_t ss_offset,
+ struct blorp_address address, uint32_t delta)
+{
+ assert(batch->blorp->driver_ctx == batch->driver_batch);
+ struct brw_context *brw = batch->driver_batch;
+ struct brw_bo *bo = address.buffer;
+
+ uint64_t reloc_val =
+ brw_state_reloc(&brw->batch, ss_offset, bo, address.offset + delta,
+ address.reloc_flags);
+
+ void *reloc_ptr = (void *)brw->batch.state.map + ss_offset;
+#if GEN_GEN >= 8
+ *(uint64_t *)reloc_ptr = reloc_val;
+#else
+ *(uint32_t *)reloc_ptr = reloc_val;
+#endif
+}
static uint64_t
-__gen_combine_address(struct brw_context *brw, void *location,
- struct blorp_address address, uint32_t delta)
+blorp_get_surface_address(struct blorp_batch *blorp_batch,
+ struct blorp_address address)
{
- if (address.buffer == NULL) {
- return address.offset + delta;
- } else {
- return blorp_emit_reloc(brw, location, address, delta);
- }
+ /* We'll let blorp_surface_reloc write the address. */
+ return 0ull;
}
-#include "genxml/genX_pack.h"
+#if GEN_GEN >= 7 && GEN_GEN < 10
+static struct blorp_address
+blorp_get_surface_base_address(struct blorp_batch *batch)
+{
+ assert(batch->blorp->driver_ctx == batch->driver_batch);
+ struct brw_context *brw = batch->driver_batch;
+ return (struct blorp_address) {
+ .buffer = brw->batch.state.bo,
+ .offset = 0,
+ };
+}
+#endif
-#define _blorp_cmd_length(cmd) cmd ## _length
-#define _blorp_cmd_header(cmd) cmd ## _header
-#define _blorp_cmd_pack(cmd) cmd ## _pack
+static void *
+blorp_alloc_dynamic_state(struct blorp_batch *batch,
+ uint32_t size,
+ uint32_t alignment,
+ uint32_t *offset)
+{
+ assert(batch->blorp->driver_ctx == batch->driver_batch);
+ struct brw_context *brw = batch->driver_batch;
-#define blorp_emit(brw, cmd, name) \
- for (struct cmd name = { _blorp_cmd_header(cmd) }, \
- *_dst = blorp_emit_dwords(brw, _blorp_cmd_length(cmd)); \
- __builtin_expect(_dst != NULL, 1); \
- _blorp_cmd_pack(cmd)(brw, (void *)_dst, &name), \
- _dst = NULL)
+ return brw_state_batch(brw, size, alignment, offset);
+}
static void
-blorp_emit_sf_config(struct brw_context *brw,
- const struct brw_blorp_params *params)
+blorp_alloc_binding_table(struct blorp_batch *batch, unsigned num_entries,
+ unsigned state_size, unsigned state_alignment,
+ uint32_t *bt_offset, uint32_t *surface_offsets,
+ void **surface_maps)
{
- const struct brw_blorp_prog_data *prog_data = params->wm_prog_data;
-
- /* 3DSTATE_SF
- *
- * Disable ViewportTransformEnable (dw2.1)
- *
- * From the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D
- * Primitives Overview":
- * RECTLIST: Viewport Mapping must be DISABLED (as is typical with the
- * use of screen- space coordinates).
- *
- * A solid rectangle must be rendered, so set FrontFaceFillMode (dw2.4:3)
- * and BackFaceFillMode (dw2.5:6) to SOLID(0).
- *
- * From the Sandy Bridge PRM, Volume 2, Part 1, Section
- * 6.4.1.1 3DSTATE_SF, Field FrontFaceFillMode:
- * SOLID: Any triangle or rectangle object found to be front-facing
- * is rendered as a solid object. This setting is required when
- * (rendering rectangle (RECTLIST) objects.
- */
- blorp_emit(brw, GENX(3DSTATE_SF), sf) {
- sf.FrontFaceFillMode = FILL_MODE_SOLID;
- sf.BackFaceFillMode = FILL_MODE_SOLID;
-
- sf.MultisampleRasterizationMode = params->dst.surf.samples > 1 ?
- MSRASTMODE_ON_PATTERN : MSRASTMODE_OFF_PIXEL;
-
- sf.VertexURBEntryReadOffset = BRW_SF_URB_ENTRY_READ_OFFSET;
- if (prog_data) {
- sf.NumberofSFOutputAttributes = prog_data->num_varying_inputs;
- sf.VertexURBEntryReadLength = brw_blorp_get_urb_length(prog_data);
- sf.ConstantInterpolationEnable = prog_data->flat_inputs;
- } else {
- sf.NumberofSFOutputAttributes = 0;
- sf.VertexURBEntryReadLength = 1;
- }
+ assert(batch->blorp->driver_ctx == batch->driver_batch);
+ struct brw_context *brw = batch->driver_batch;
+
+ uint32_t *bt_map = brw_state_batch(brw,
+ num_entries * sizeof(uint32_t), 32,
+ bt_offset);
+
+ for (unsigned i = 0; i < num_entries; i++) {
+ surface_maps[i] = brw_state_batch(brw,
+ state_size, state_alignment,
+ &(surface_offsets)[i]);
+ bt_map[i] = surface_offsets[i];
}
}
-static void
-blorp_emit_wm_config(struct brw_context *brw,
- const struct brw_blorp_params *params)
+static void *
+blorp_alloc_vertex_buffer(struct blorp_batch *batch, uint32_t size,
+ struct blorp_address *addr)
{
- const struct brw_blorp_prog_data *prog_data = params->wm_prog_data;
+ assert(batch->blorp->driver_ctx == batch->driver_batch);
+ struct brw_context *brw = batch->driver_batch;
- /* Even when thread dispatch is disabled, max threads (dw5.25:31) must be
- * nonzero to prevent the GPU from hanging. While the documentation doesn't
- * mention this explicitly, it notes that the valid range for the field is
- * [1,39] = [2,40] threads, which excludes zero.
+ /* From the Skylake PRM, 3DSTATE_VERTEX_BUFFERS:
+ *
+ * "The VF cache needs to be invalidated before binding and then using
+ * Vertex Buffers that overlap with any previously bound Vertex Buffer
+ * (at a 64B granularity) since the last invalidation. A VF cache
+ * invalidate is performed by setting the "VF Cache Invalidation Enable"
+ * bit in PIPE_CONTROL."
*
- * To be safe (and to minimize extraneous code) we go ahead and fully
- * configure the WM state whether or not there is a WM program.
+ * This restriction first appears in the Skylake PRM but the internal docs
+ * also list it as being an issue on Broadwell. In order to avoid this
+ * problem, we align all vertex buffer allocations to 64 bytes.
*/
- blorp_emit(brw, GENX(3DSTATE_WM), wm) {
- wm.MaximumNumberofThreads = brw->max_wm_threads - 1;
-
- switch (params->hiz_op) {
- case GEN6_HIZ_OP_DEPTH_CLEAR:
- wm.DepthBufferClear = true;
- break;
- case GEN6_HIZ_OP_DEPTH_RESOLVE:
- wm.DepthBufferResolveEnable = true;
- break;
- case GEN6_HIZ_OP_HIZ_RESOLVE:
- wm.HierarchicalDepthBufferResolveEnable = true;
- break;
- case GEN6_HIZ_OP_NONE:
- break;
- default:
- unreachable("not reached");
- }
-
- if (prog_data) {
- wm.ThreadDispatchEnable = true;
-
- wm.DispatchGRFStartRegisterforConstantSetupData0 =
- prog_data->first_curbe_grf_0;
- wm.DispatchGRFStartRegisterforConstantSetupData2 =
- prog_data->first_curbe_grf_2;
-
- wm.KernelStartPointer0 = params->wm_prog_kernel;
- wm.KernelStartPointer2 =
- params->wm_prog_kernel + prog_data->ksp_offset_2;
-
- wm._8PixelDispatchEnable = prog_data->dispatch_8;
- wm._16PixelDispatchEnable = prog_data->dispatch_16;
-
- wm.NumberofSFOutputAttributes = prog_data->num_varying_inputs;
- }
-
- if (params->src.bo) {
- wm.SamplerCount = 1; /* Up to 4 samplers */
- wm.PixelShaderKillPixel = true; /* TODO: temporarily smash on */
- }
+ uint32_t offset;
+ void *data = brw_state_batch(brw, size, 64, &offset);
+
+ *addr = (struct blorp_address) {
+ .buffer = brw->batch.state.bo,
+ .offset = offset,
+
+ /* The VF cache designers apparently cut corners, and made the cache
+ * only consider the bottom 32 bits of memory addresses. If you happen
+ * to have two vertex buffers which get placed exactly 4 GiB apart and
+ * use them in back-to-back draw calls, you can get collisions. To work
+ * around this problem, we restrict vertex buffers to the low 32 bits of
+ * the address space.
+ */
+ .reloc_flags = RELOC_32BIT,
+
+#if GEN_GEN == 11
+ .mocs = ICL_MOCS_WB,
+#elif GEN_GEN == 10
+ .mocs = CNL_MOCS_WB,
+#elif GEN_GEN == 9
+ .mocs = SKL_MOCS_WB,
+#elif GEN_GEN == 8
+ .mocs = BDW_MOCS_WB,
+#elif GEN_GEN == 7
+ .mocs = GEN7_MOCS_L3,
+#elif GEN_GEN > 6
+#error "Missing MOCS setting!"
+#endif
+ };
- if (params->dst.surf.samples > 1) {
- wm.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN;
- wm.MultisampleDispatchMode =
- (prog_data && prog_data->persample_msaa_dispatch) ?
- MSDISPMODE_PERSAMPLE : MSDISPMODE_PERPIXEL;
- } else {
- wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL;
- wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
- }
- }
+ return data;
}
-
+/**
+ * See vf_invalidate_for_vb_48b_transitions in genX_state_upload.c.
+ */
static void
-blorp_emit_depth_stencil_config(struct brw_context *brw,
- const struct brw_blorp_params *params)
+blorp_vf_invalidate_for_vb_48b_transitions(struct blorp_batch *batch,
+ const struct blorp_address *addrs,
+ unsigned num_vbs)
{
- brw_emit_depth_stall_flushes(brw);
-
- blorp_emit(brw, GENX(3DSTATE_DEPTH_BUFFER), db) {
- switch (params->depth.surf.dim) {
- case ISL_SURF_DIM_1D:
- db.SurfaceType = SURFTYPE_1D;
- break;
- case ISL_SURF_DIM_2D:
- db.SurfaceType = SURFTYPE_2D;
- break;
- case ISL_SURF_DIM_3D:
- db.SurfaceType = SURFTYPE_3D;
- break;
+#if GEN_GEN >= 8 && GEN_GEN < 11
+ struct brw_context *brw = batch->driver_batch;
+ bool need_invalidate = false;
+
+ for (unsigned i = 0; i < num_vbs; i++) {
+ struct brw_bo *bo = addrs[i].buffer;
+ uint16_t high_bits =
+ bo && (bo->kflags & EXEC_OBJECT_PINNED) ? bo->gtt_offset >> 32u : 0;
+
+ if (high_bits != brw->vb.last_bo_high_bits[i]) {
+ need_invalidate = true;
+ brw->vb.last_bo_high_bits[i] = high_bits;
}
-
- db.SurfaceFormat = params->depth_format;
-
- db.TiledSurface = true;
- db.TileWalk = TILEWALK_YMAJOR;
- db.MIPMapLayoutMode = MIPLAYOUT_BELOW;
-
- db.HierarchicalDepthBufferEnable = true;
- db.SeparateStencilBufferEnable = true;
-
- db.Width = params->depth.surf.logical_level0_px.width - 1;
- db.Height = params->depth.surf.logical_level0_px.height - 1;
- db.RenderTargetViewExtent = db.Depth =
- MAX2(params->depth.surf.logical_level0_px.depth,
- params->depth.surf.logical_level0_px.array_len) - 1;
-
- db.LOD = params->depth.view.base_level;
- db.MinimumArrayElement = params->depth.view.base_array_layer;
-
- db.SurfacePitch = params->depth.surf.row_pitch - 1;
- db.SurfaceBaseAddress = (struct blorp_address) {
- .buffer = params->depth.bo,
- .read_domains = I915_GEM_DOMAIN_RENDER,
- .write_domain = I915_GEM_DOMAIN_RENDER,
- .offset = params->depth.offset,
- };
}
- blorp_emit(brw, GENX(3DSTATE_HIER_DEPTH_BUFFER), hiz) {
- hiz.SurfacePitch = params->depth.aux_surf.row_pitch - 1;
- hiz.SurfaceBaseAddress = (struct blorp_address) {
- .buffer = params->depth.aux_bo,
- .read_domains = I915_GEM_DOMAIN_RENDER,
- .write_domain = I915_GEM_DOMAIN_RENDER,
- .offset = params->depth.aux_offset,
- };
+ if (need_invalidate) {
+ brw_emit_pipe_control_flush(brw, PIPE_CONTROL_VF_CACHE_INVALIDATE | PIPE_CONTROL_CS_STALL);
}
-
- blorp_emit(brw, GENX(3DSTATE_STENCIL_BUFFER), sb);
-}
-
-static uint32_t
-blorp_emit_blend_state(struct brw_context *brw,
- const struct brw_blorp_params *params)
-{
- struct GENX(BLEND_STATE) blend;
- memset(&blend, 0, sizeof(blend));
-
- for (unsigned i = 0; i < params->num_draw_buffers; ++i) {
- blend.Entry[i].PreBlendColorClampEnable = true;
- blend.Entry[i].PostBlendColorClampEnable = true;
- blend.Entry[i].ColorClampRange = COLORCLAMP_RTFORMAT;
-
- blend.Entry[i].WriteDisableRed = params->color_write_disable[0];
- blend.Entry[i].WriteDisableGreen = params->color_write_disable[1];
- blend.Entry[i].WriteDisableBlue = params->color_write_disable[2];
- blend.Entry[i].WriteDisableAlpha = params->color_write_disable[3];
- }
-
- uint32_t offset;
- void *state = brw_state_batch(brw, AUB_TRACE_BLEND_STATE,
- GENX(BLEND_STATE_length) * 4, 64, &offset);
- GENX(BLEND_STATE_pack)(NULL, state, &blend);
-
- return offset;
+#endif
}
-static uint32_t
-blorp_emit_color_calc_state(struct brw_context *brw,
- const struct brw_blorp_params *params)
+#if GEN_GEN >= 8
+static struct blorp_address
+blorp_get_workaround_page(struct blorp_batch *batch)
{
- uint32_t offset;
- void *state = brw_state_batch(brw, AUB_TRACE_CC_STATE,
- GENX(COLOR_CALC_STATE_length) * 4, 64, &offset);
- memset(state, 0, GENX(COLOR_CALC_STATE_length) * 4);
+ assert(batch->blorp->driver_ctx == batch->driver_batch);
+ struct brw_context *brw = batch->driver_batch;
- return offset;
+ return (struct blorp_address) {
+ .buffer = brw->workaround_bo,
+ };
}
+#endif
-static uint32_t
-blorp_emit_depth_stencil_state(struct brw_context *brw,
- const struct brw_blorp_params *params)
+static void
+blorp_flush_range(UNUSED struct blorp_batch *batch, UNUSED void *start,
+ UNUSED size_t size)
{
- /* See the following sections of the Sandy Bridge PRM, Volume 1, Part2:
- * - 7.5.3.1 Depth Buffer Clear
- * - 7.5.3.2 Depth Buffer Resolve
- * - 7.5.3.3 Hierarchical Depth Buffer Resolve
+ /* All allocated states come from the batch which we will flush before we
+ * submit it. There's nothing for us to do here.
*/
- struct GENX(DEPTH_STENCIL_STATE) ds = {
- .DepthBufferWriteEnable = true,
- };
-
- if (params->hiz_op == GEN6_HIZ_OP_DEPTH_RESOLVE) {
- ds.DepthTestEnable = true;
- ds.DepthTestFunction = COMPAREFUNCTION_NEVER;
- }
-
- uint32_t offset;
- void *state = brw_state_batch(brw, AUB_TRACE_DEPTH_STENCIL_STATE,
- GENX(DEPTH_STENCIL_STATE_length) * 4, 64,
- &offset);
- GENX(DEPTH_STENCIL_STATE_pack)(NULL, state, &ds);
-
- return offset;
}
static void
-blorp_emit_sampler_state(struct brw_context *brw,
- const struct brw_blorp_params *params)
+blorp_emit_urb_config(struct blorp_batch *batch,
+ unsigned vs_entry_size,
+ UNUSED unsigned sf_entry_size)
{
- struct GENX(SAMPLER_STATE) sampler = {
- .MipModeFilter = MIPFILTER_NONE,
- .MagModeFilter = MAPFILTER_LINEAR,
- .MinModeFilter = MAPFILTER_LINEAR,
- .MinLOD = 0,
- .MaxLOD = 0,
- .TCXAddressControlMode = TCM_CLAMP,
- .TCYAddressControlMode = TCM_CLAMP,
- .TCZAddressControlMode = TCM_CLAMP,
- .MaximumAnisotropy = RATIO21,
- .RAddressMinFilterRoundingEnable = true,
- .RAddressMagFilterRoundingEnable = true,
- .VAddressMinFilterRoundingEnable = true,
- .VAddressMagFilterRoundingEnable = true,
- .UAddressMinFilterRoundingEnable = true,
- .UAddressMagFilterRoundingEnable = true,
- .NonnormalizedCoordinateEnable = true,
- };
-
- uint32_t offset;
- void *state = brw_state_batch(brw, AUB_TRACE_SAMPLER_STATE,
- GENX(SAMPLER_STATE_length) * 4, 32, &offset);
- GENX(SAMPLER_STATE_pack)(NULL, state, &sampler);
-
- blorp_emit(brw, GENX(3DSTATE_SAMPLER_STATE_POINTERS), ssp) {
- ssp.VSSamplerStateChange = true;
- ssp.GSSamplerStateChange = true;
- ssp.PSSamplerStateChange = true;
- ssp.PointertoPSSamplerState = offset;
- }
+ assert(batch->blorp->driver_ctx == batch->driver_batch);
+ struct brw_context *brw = batch->driver_batch;
+
+#if GEN_GEN >= 7
+ if (brw->urb.vsize >= vs_entry_size)
+ return;
+
+ gen7_upload_urb(brw, vs_entry_size, false, false);
+#elif GEN_GEN == 6
+ gen6_upload_urb(brw, vs_entry_size, false, 0);
+#else
+ /* We calculate it now and emit later. */
+ brw_calculate_urb_fence(brw, 0, vs_entry_size, sf_entry_size);
+#endif
}
-/* 3DSTATE_VIEWPORT_STATE_POINTERS */
-static void
-blorp_emit_viewport_state(struct brw_context *brw,
- const struct brw_blorp_params *params)
+void
+genX(blorp_exec)(struct blorp_batch *batch,
+ const struct blorp_params *params)
{
- uint32_t cc_vp_offset;
-
- void *state = brw_state_batch(brw, AUB_TRACE_CC_VP_STATE,
- GENX(CC_VIEWPORT_length) * 4, 32,
- &cc_vp_offset);
-
- GENX(CC_VIEWPORT_pack)(brw, state,
- &(struct GENX(CC_VIEWPORT)) {
- .MinimumDepth = 0.0,
- .MaximumDepth = 1.0,
- });
+ assert(batch->blorp->driver_ctx == batch->driver_batch);
+ struct brw_context *brw = batch->driver_batch;
+ struct gl_context *ctx = &brw->ctx;
+ bool check_aperture_failed_once = false;
- blorp_emit(brw, GENX(3DSTATE_VIEWPORT_STATE_POINTERS), vsp) {
- vsp.CCViewportStateChange = true;
- vsp.PointertoCC_VIEWPORT = cc_vp_offset;
+#if GEN_GEN >= 11
+ /* The PIPE_CONTROL command description says:
+ *
+ * "Whenever a Binding Table Index (BTI) used by a Render Taget Message
+ * points to a different RENDER_SURFACE_STATE, SW must issue a Render
+ * Target Cache Flush by enabling this bit. When render target flush
+ * is set due to new association of BTI, PS Scoreboard Stall bit must
+ * be set in this packet."
+ */
+ brw_emit_pipe_control_flush(brw,
+ PIPE_CONTROL_RENDER_TARGET_FLUSH |
+ PIPE_CONTROL_STALL_AT_SCOREBOARD);
+#endif
+
+ /* Flush the sampler and render caches. We definitely need to flush the
+ * sampler cache so that we get updated contents from the render cache for
+ * the glBlitFramebuffer() source. Also, we are sometimes warned in the
+ * docs to flush the cache between reinterpretations of the same surface
+ * data with different formats, which blorp does for stencil and depth
+ * data.
+ */
+ if (params->src.enabled)
+ brw_cache_flush_for_read(brw, params->src.addr.buffer);
+ if (params->dst.enabled) {
+ brw_cache_flush_for_render(brw, params->dst.addr.buffer,
+ params->dst.view.format,
+ params->dst.aux_usage);
}
-}
+ if (params->depth.enabled)
+ brw_cache_flush_for_depth(brw, params->depth.addr.buffer);
+ if (params->stencil.enabled)
+ brw_cache_flush_for_depth(brw, params->stencil.addr.buffer);
+ brw_select_pipeline(brw, BRW_RENDER_PIPELINE);
-/**
- * \brief Execute a blit or render pass operation.
- *
- * To execute the operation, this function manually constructs and emits a
- * batch to draw a rectangle primitive. The batchbuffer is flushed before
- * constructing and after emitting the batch.
- *
- * This function alters no GL state.
- */
-void
-genX(blorp_exec)(struct brw_context *brw,
- const struct brw_blorp_params *params)
-{
- uint32_t blend_state_offset = 0;
- uint32_t color_calc_state_offset = 0;
- uint32_t depth_stencil_state_offset;
- uint32_t wm_bind_bo_offset = 0;
+retry:
+ intel_batchbuffer_require_space(brw, 1400);
+ brw_require_statebuffer_space(brw, 600);
+ intel_batchbuffer_save_state(brw);
+ check_aperture_failed_once |= intel_batchbuffer_saved_state_is_empty(brw);
+ brw->batch.no_wrap = true;
+#if GEN_GEN == 6
/* Emit workaround flushes when we switch from drawing to blorping. */
brw_emit_post_sync_nonzero_flush(brw);
+#endif
brw_upload_state_base_address(brw);
- gen6_blorp_emit_vertices(brw, params);
+#if GEN_GEN >= 8
+ gen7_l3_state.emit(brw);
+#endif
- /* 3DSTATE_URB
- *
- * Assign the entire URB to the VS. Even though the VS disabled, URB space
- * is still needed because the clipper loads the VUE's from the URB. From
- * the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE,
- * Dword 1.15:0 "VS Number of URB Entries":
- * This field is always used (even if VS Function Enable is DISABLED).
- *
- * The warning below appears in the PRM (Section 3DSTATE_URB), but we can
- * safely ignore it because this batch contains only one draw call.
- * Because of URB corruption caused by allocating a previous GS unit
- * URB entry to the VS unit, software is required to send a “GS NULL
- * Fence” (Send URB fence with VS URB size == 1 and GS URB size == 0)
- * plus a dummy DRAW call before any case where VS will be taking over
- * GS URB space.
- */
- blorp_emit(brw, GENX(3DSTATE_URB), urb) {
- urb.VSNumberofURBEntries = brw->urb.max_vs_entries;
- }
+#if GEN_GEN >= 6
+ brw_emit_depth_stall_flushes(brw);
+#endif
- if (params->wm_prog_data) {
- blend_state_offset = blorp_emit_blend_state(brw, params);
- color_calc_state_offset = blorp_emit_color_calc_state(brw, params);
- }
- depth_stencil_state_offset = blorp_emit_depth_stencil_state(brw, params);
+#if GEN_GEN == 8
+ gen8_write_pma_stall_bits(brw, 0);
+#endif
- /* 3DSTATE_CC_STATE_POINTERS
- *
- * The pointer offsets are relative to
- * CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress.
- *
- * The HiZ op doesn't use BLEND_STATE or COLOR_CALC_STATE.
- */
- blorp_emit(brw, GENX(3DSTATE_CC_STATE_POINTERS), cc) {
- cc.BLEND_STATEChange = true;
- cc.COLOR_CALC_STATEChange = true;
- cc.DEPTH_STENCIL_STATEChange = true;
- cc.PointertoBLEND_STATE = blend_state_offset;
- cc.PointertoCOLOR_CALC_STATE = color_calc_state_offset;
- cc.PointertoDEPTH_STENCIL_STATE = depth_stencil_state_offset;
+ const unsigned scale = params->fast_clear_op ? UINT_MAX : 1;
+ if (brw->current_hash_scale != scale) {
+ brw_emit_hashing_mode(brw, params->x1 - params->x0,
+ params->y1 - params->y0, scale);
}
- blorp_emit(brw, GENX(3DSTATE_CONSTANT_VS), vs);
- blorp_emit(brw, GENX(3DSTATE_CONSTANT_GS), gs);
- blorp_emit(brw, GENX(3DSTATE_CONSTANT_PS), ps);
-
- if (params->wm_prog_data) {
- uint32_t wm_surf_offset_renderbuffer;
- uint32_t wm_surf_offset_texture = 0;
-
- wm_surf_offset_renderbuffer =
- brw_blorp_emit_surface_state(brw, ¶ms->dst,
- I915_GEM_DOMAIN_RENDER,
- I915_GEM_DOMAIN_RENDER, true);
- if (params->src.bo) {
- wm_surf_offset_texture =
- brw_blorp_emit_surface_state(brw, ¶ms->src,
- I915_GEM_DOMAIN_SAMPLER, 0, false);
- }
- wm_bind_bo_offset =
- gen6_blorp_emit_binding_table(brw,
- wm_surf_offset_renderbuffer,
- wm_surf_offset_texture);
-
- blorp_emit(brw, GENX(3DSTATE_BINDING_TABLE_POINTERS), bt) {
- bt.PSBindingTableChange = true;
- bt.PointertoPSBindingTable = wm_bind_bo_offset;
- }
+ blorp_emit(batch, GENX(3DSTATE_DRAWING_RECTANGLE), rect) {
+ rect.ClippedDrawingRectangleXMax = MAX2(params->x1, params->x0) - 1;
+ rect.ClippedDrawingRectangleYMax = MAX2(params->y1, params->y0) - 1;
}
- if (params->src.bo)
- blorp_emit_sampler_state(brw, params);
+ blorp_exec(batch, params);
- gen6_emit_3dstate_multisample(brw, params->dst.surf.samples);
+ brw->batch.no_wrap = false;
- blorp_emit(brw, GENX(3DSTATE_SAMPLE_MASK), mask) {
- mask.SampleMask = (1 << params->dst.surf.samples) - 1;
- }
-
- /* From the BSpec, 3D Pipeline > Geometry > Vertex Shader > State,
- * 3DSTATE_VS, Dword 5.0 "VS Function Enable":
- *
- * [DevSNB] A pipeline flush must be programmed prior to a
- * 3DSTATE_VS command that causes the VS Function Enable to
- * toggle. Pipeline flush can be executed by sending a PIPE_CONTROL
- * command with CS stall bit set and a post sync operation.
- *
- * We've already done one at the start of the BLORP operation.
+ /* Check if the blorp op we just did would make our batch likely to fail to
+ * map all the BOs into the GPU at batch exec time later. If so, flush the
+ * batch and try again with nothing else in the batch.
*/
- blorp_emit(brw, GENX(3DSTATE_VS), vs);
- blorp_emit(brw, GENX(3DSTATE_GS), gs);
-
- blorp_emit(brw, GENX(3DSTATE_CLIP), clip) {
- clip.PerspectiveDivideDisable = true;
- }
-
- blorp_emit_sf_config(brw, params);
- blorp_emit_wm_config(brw, params);
-
- blorp_emit_viewport_state(brw, params);
-
- if (params->depth.bo) {
- blorp_emit_depth_stencil_config(brw, params);
- } else {
- brw_emit_depth_stall_flushes(brw);
-
- blorp_emit(brw, GENX(3DSTATE_DEPTH_BUFFER), db) {
- db.SurfaceType = SURFTYPE_NULL;
- db.SurfaceFormat = D32_FLOAT;
+ if (!brw_batch_has_aperture_space(brw, 0)) {
+ if (!check_aperture_failed_once) {
+ check_aperture_failed_once = true;
+ intel_batchbuffer_reset_to_saved(brw);
+ intel_batchbuffer_flush(brw);
+ goto retry;
+ } else {
+ int ret = intel_batchbuffer_flush(brw);
+ WARN_ONCE(ret == -ENOSPC,
+ "i965: blorp emit exceeded available aperture space\n");
}
- blorp_emit(brw, GENX(3DSTATE_HIER_DEPTH_BUFFER), hiz);
- blorp_emit(brw, GENX(3DSTATE_STENCIL_BUFFER), sb);
}
- /* 3DSTATE_CLEAR_PARAMS
- *
- * From the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE_CLEAR_PARAMS:
- * [DevSNB] 3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE
- * packet when HiZ is enabled and the DEPTH_BUFFER_STATE changes.
- */
- blorp_emit(brw, GENX(3DSTATE_CLEAR_PARAMS), clear) {
- clear.DepthClearValueValid = true;
- clear.DepthClearValue = params->depth.clear_color.u32[0];
- }
+ if (unlikely(brw->always_flush_batch))
+ intel_batchbuffer_flush(brw);
- blorp_emit(brw, GENX(3DSTATE_DRAWING_RECTANGLE), rect) {
- rect.ClippedDrawingRectangleXMax = MAX2(params->x1, params->x0) - 1;
- rect.ClippedDrawingRectangleYMax = MAX2(params->y1, params->y0) - 1;
- }
-
- blorp_emit(brw, GENX(3DPRIMITIVE), prim) {
- prim.VertexAccessType = SEQUENTIAL;
- prim.PrimitiveTopologyType = _3DPRIM_RECTLIST;
- prim.VertexCountPerInstance = 3;
- prim.InstanceCount = params->num_layers;
+ /* We've smashed all state compared to what the normal 3D pipeline
+ * rendering tracks for GL.
+ */
+ brw->ctx.NewDriverState |= BRW_NEW_BLORP;
+ brw->no_depth_or_stencil = !params->depth.enabled &&
+ !params->stencil.enabled;
+ brw->ib.index_size = -1;
+
+ if (params->dst.enabled) {
+ brw_render_cache_add_bo(brw, params->dst.addr.buffer,
+ params->dst.view.format,
+ params->dst.aux_usage);
}
+ if (params->depth.enabled)
+ brw_depth_cache_add_bo(brw, params->depth.addr.buffer);
+ if (params->stencil.enabled)
+ brw_depth_cache_add_bo(brw, params->stencil.addr.buffer);
}