X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2FgenX_blorp_exec.c;h=3bf6fd6156740201c6163fce8bd5db2618f89d36;hb=bc933d0e8462871e19328f66182c35543e334013;hp=283b1525067b0c49293bccd8d360603b4819da81;hpb=7b035fd0c97939a65825f6e1b467b0d741382bc5;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/genX_blorp_exec.c b/src/mesa/drivers/dri/i965/genX_blorp_exec.c index 283b1525067..3bf6fd61567 100644 --- a/src/mesa/drivers/dri/i965/genX_blorp_exec.c +++ b/src/mesa/drivers/dri/i965/genX_blorp_exec.c @@ -25,17 +25,25 @@ #include "intel_batchbuffer.h" #include "intel_mipmap_tree.h" +#include "intel_fbo.h" #include "brw_context.h" #include "brw_state.h" -#include "blorp_priv.h" +#include "blorp/blorp_genX_exec.h" -#include "genxml/gen_macros.h" +#if GEN_GEN <= 5 +#include "gen4_blorp_exec.h" +#endif + +#include "brw_blorp.h" static void * -blorp_emit_dwords(struct brw_context *brw, unsigned n) +blorp_emit_dwords(struct blorp_batch *batch, unsigned n) { + assert(batch->blorp->driver_ctx == batch->driver_batch); + struct brw_context *brw = batch->driver_batch; + intel_batchbuffer_begin(brw, n, RENDER_RING); uint32_t *map = brw->batch.map_next; brw->batch.map_next += n; @@ -43,517 +51,251 @@ blorp_emit_dwords(struct brw_context *brw, unsigned n) return map; } -struct blorp_address { - drm_intel_bo *buffer; - uint32_t read_domains; - uint32_t write_domain; - uint32_t offset; -}; - static uint64_t -blorp_emit_reloc(struct brw_context *brw, void *location, - struct blorp_address address, uint32_t delta) +blorp_emit_reloc(struct blorp_batch *batch, + void *location, struct blorp_address address, uint32_t delta) { - uint32_t offset = (char *)location - (char *)brw->batch.map; - if (brw->gen >= 8) { - return intel_batchbuffer_reloc64(brw, address.buffer, offset, - address.read_domains, - address.write_domain, - address.offset + delta); - } else { - return intel_batchbuffer_reloc(brw, address.buffer, offset, - address.read_domains, - address.write_domain, - address.offset + delta); - } -} - -#define __gen_address_type struct blorp_address -#define __gen_user_data struct brw_context + assert(batch->blorp->driver_ctx == batch->driver_batch); + struct brw_context *brw = batch->driver_batch; + uint32_t offset; -static uint64_t -__gen_combine_address(struct brw_context *brw, void *location, - struct blorp_address address, uint32_t delta) -{ - if (address.buffer == NULL) { - return address.offset + delta; - } else { - return blorp_emit_reloc(brw, location, address, delta); + if (GEN_GEN < 6 && brw_ptr_in_state_buffer(&brw->batch, location)) { + offset = (char *)location - (char *)brw->batch.state_map; + return brw_state_reloc(&brw->batch, offset, + address.buffer, address.offset + delta, + address.reloc_flags); } -} - -#include "genxml/genX_pack.h" -#define _blorp_cmd_length(cmd) cmd ## _length -#define _blorp_cmd_header(cmd) cmd ## _header -#define _blorp_cmd_pack(cmd) cmd ## _pack + assert(!brw_ptr_in_state_buffer(&brw->batch, location)); -#define blorp_emit(brw, cmd, name) \ - for (struct cmd name = { _blorp_cmd_header(cmd) }, \ - *_dst = blorp_emit_dwords(brw, _blorp_cmd_length(cmd)); \ - __builtin_expect(_dst != NULL, 1); \ - _blorp_cmd_pack(cmd)(brw, (void *)_dst, &name), \ - _dst = NULL) + offset = (char *)location - (char *)brw->batch.map; + return brw_batch_reloc(&brw->batch, offset, + address.buffer, address.offset + delta, + address.reloc_flags); +} static void -blorp_emit_sf_config(struct brw_context *brw, - const struct brw_blorp_params *params) +blorp_surface_reloc(struct blorp_batch *batch, uint32_t ss_offset, + struct blorp_address address, uint32_t delta) { - const struct brw_blorp_prog_data *prog_data = params->wm_prog_data; - - /* 3DSTATE_SF - * - * Disable ViewportTransformEnable (dw2.1) - * - * From the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D - * Primitives Overview": - * RECTLIST: Viewport Mapping must be DISABLED (as is typical with the - * use of screen- space coordinates). - * - * A solid rectangle must be rendered, so set FrontFaceFillMode (dw2.4:3) - * and BackFaceFillMode (dw2.5:6) to SOLID(0). - * - * From the Sandy Bridge PRM, Volume 2, Part 1, Section - * 6.4.1.1 3DSTATE_SF, Field FrontFaceFillMode: - * SOLID: Any triangle or rectangle object found to be front-facing - * is rendered as a solid object. This setting is required when - * (rendering rectangle (RECTLIST) objects. - */ - blorp_emit(brw, GENX(3DSTATE_SF), sf) { - sf.FrontFaceFillMode = FILL_MODE_SOLID; - sf.BackFaceFillMode = FILL_MODE_SOLID; - - sf.MultisampleRasterizationMode = params->dst.surf.samples > 1 ? - MSRASTMODE_ON_PATTERN : MSRASTMODE_OFF_PIXEL; - - sf.VertexURBEntryReadOffset = BRW_SF_URB_ENTRY_READ_OFFSET; - if (prog_data) { - sf.NumberofSFOutputAttributes = prog_data->num_varying_inputs; - sf.VertexURBEntryReadLength = brw_blorp_get_urb_length(prog_data); - sf.ConstantInterpolationEnable = prog_data->flat_inputs; - } else { - sf.NumberofSFOutputAttributes = 0; - sf.VertexURBEntryReadLength = 1; - } - } + assert(batch->blorp->driver_ctx == batch->driver_batch); + struct brw_context *brw = batch->driver_batch; + struct brw_bo *bo = address.buffer; + + uint64_t reloc_val = + brw_state_reloc(&brw->batch, ss_offset, bo, address.offset + delta, + address.reloc_flags); + + void *reloc_ptr = (void *)brw->batch.state_map + ss_offset; +#if GEN_GEN >= 8 + *(uint64_t *)reloc_ptr = reloc_val; +#else + *(uint32_t *)reloc_ptr = reloc_val; +#endif } -static void -blorp_emit_wm_config(struct brw_context *brw, - const struct brw_blorp_params *params) +static void * +blorp_alloc_dynamic_state(struct blorp_batch *batch, + uint32_t size, + uint32_t alignment, + uint32_t *offset) { - const struct brw_blorp_prog_data *prog_data = params->wm_prog_data; - - /* Even when thread dispatch is disabled, max threads (dw5.25:31) must be - * nonzero to prevent the GPU from hanging. While the documentation doesn't - * mention this explicitly, it notes that the valid range for the field is - * [1,39] = [2,40] threads, which excludes zero. - * - * To be safe (and to minimize extraneous code) we go ahead and fully - * configure the WM state whether or not there is a WM program. - */ - blorp_emit(brw, GENX(3DSTATE_WM), wm) { - wm.MaximumNumberofThreads = brw->max_wm_threads - 1; - - switch (params->hiz_op) { - case GEN6_HIZ_OP_DEPTH_CLEAR: - wm.DepthBufferClear = true; - break; - case GEN6_HIZ_OP_DEPTH_RESOLVE: - wm.DepthBufferResolveEnable = true; - break; - case GEN6_HIZ_OP_HIZ_RESOLVE: - wm.HierarchicalDepthBufferResolveEnable = true; - break; - case GEN6_HIZ_OP_NONE: - break; - default: - unreachable("not reached"); - } - - if (prog_data) { - wm.ThreadDispatchEnable = true; - - wm.DispatchGRFStartRegisterforConstantSetupData0 = - prog_data->first_curbe_grf_0; - wm.DispatchGRFStartRegisterforConstantSetupData2 = - prog_data->first_curbe_grf_2; - - wm.KernelStartPointer0 = params->wm_prog_kernel; - wm.KernelStartPointer2 = - params->wm_prog_kernel + prog_data->ksp_offset_2; + assert(batch->blorp->driver_ctx == batch->driver_batch); + struct brw_context *brw = batch->driver_batch; - wm._8PixelDispatchEnable = prog_data->dispatch_8; - wm._16PixelDispatchEnable = prog_data->dispatch_16; - - wm.NumberofSFOutputAttributes = prog_data->num_varying_inputs; - } - - if (params->src.bo) { - wm.SamplerCount = 1; /* Up to 4 samplers */ - wm.PixelShaderKillPixel = true; /* TODO: temporarily smash on */ - } - - if (params->dst.surf.samples > 1) { - wm.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN; - wm.MultisampleDispatchMode = - (prog_data && prog_data->persample_msaa_dispatch) ? - MSDISPMODE_PERSAMPLE : MSDISPMODE_PERPIXEL; - } else { - wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL; - wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE; - } - } + return brw_state_batch(brw, size, alignment, offset); } - static void -blorp_emit_depth_stencil_config(struct brw_context *brw, - const struct brw_blorp_params *params) +blorp_alloc_binding_table(struct blorp_batch *batch, unsigned num_entries, + unsigned state_size, unsigned state_alignment, + uint32_t *bt_offset, uint32_t *surface_offsets, + void **surface_maps) { - brw_emit_depth_stall_flushes(brw); - - blorp_emit(brw, GENX(3DSTATE_DEPTH_BUFFER), db) { - switch (params->depth.surf.dim) { - case ISL_SURF_DIM_1D: - db.SurfaceType = SURFTYPE_1D; - break; - case ISL_SURF_DIM_2D: - db.SurfaceType = SURFTYPE_2D; - break; - case ISL_SURF_DIM_3D: - db.SurfaceType = SURFTYPE_3D; - break; - } - - db.SurfaceFormat = params->depth_format; - - db.TiledSurface = true; - db.TileWalk = TILEWALK_YMAJOR; - db.MIPMapLayoutMode = MIPLAYOUT_BELOW; - - db.HierarchicalDepthBufferEnable = true; - db.SeparateStencilBufferEnable = true; - - db.Width = params->depth.surf.logical_level0_px.width - 1; - db.Height = params->depth.surf.logical_level0_px.height - 1; - db.RenderTargetViewExtent = db.Depth = - MAX2(params->depth.surf.logical_level0_px.depth, - params->depth.surf.logical_level0_px.array_len) - 1; - - db.LOD = params->depth.view.base_level; - db.MinimumArrayElement = params->depth.view.base_array_layer; - - db.SurfacePitch = params->depth.surf.row_pitch - 1; - db.SurfaceBaseAddress = (struct blorp_address) { - .buffer = params->depth.bo, - .read_domains = I915_GEM_DOMAIN_RENDER, - .write_domain = I915_GEM_DOMAIN_RENDER, - .offset = params->depth.offset, - }; - } - - blorp_emit(brw, GENX(3DSTATE_HIER_DEPTH_BUFFER), hiz) { - hiz.SurfacePitch = params->depth.aux_surf.row_pitch - 1; - hiz.SurfaceBaseAddress = (struct blorp_address) { - .buffer = params->depth.aux_bo, - .read_domains = I915_GEM_DOMAIN_RENDER, - .write_domain = I915_GEM_DOMAIN_RENDER, - .offset = params->depth.aux_offset, - }; + assert(batch->blorp->driver_ctx == batch->driver_batch); + struct brw_context *brw = batch->driver_batch; + + uint32_t *bt_map = brw_state_batch(brw, + num_entries * sizeof(uint32_t), 32, + bt_offset); + + for (unsigned i = 0; i < num_entries; i++) { + surface_maps[i] = brw_state_batch(brw, + state_size, state_alignment, + &(surface_offsets)[i]); + bt_map[i] = surface_offsets[i]; } - - blorp_emit(brw, GENX(3DSTATE_STENCIL_BUFFER), sb); } -static uint32_t -blorp_emit_blend_state(struct brw_context *brw, - const struct brw_blorp_params *params) +static void * +blorp_alloc_vertex_buffer(struct blorp_batch *batch, uint32_t size, + struct blorp_address *addr) { - struct GENX(BLEND_STATE) blend; - memset(&blend, 0, sizeof(blend)); - - for (unsigned i = 0; i < params->num_draw_buffers; ++i) { - blend.Entry[i].PreBlendColorClampEnable = true; - blend.Entry[i].PostBlendColorClampEnable = true; - blend.Entry[i].ColorClampRange = COLORCLAMP_RTFORMAT; - - blend.Entry[i].WriteDisableRed = params->color_write_disable[0]; - blend.Entry[i].WriteDisableGreen = params->color_write_disable[1]; - blend.Entry[i].WriteDisableBlue = params->color_write_disable[2]; - blend.Entry[i].WriteDisableAlpha = params->color_write_disable[3]; - } + assert(batch->blorp->driver_ctx == batch->driver_batch); + struct brw_context *brw = batch->driver_batch; + /* From the Skylake PRM, 3DSTATE_VERTEX_BUFFERS: + * + * "The VF cache needs to be invalidated before binding and then using + * Vertex Buffers that overlap with any previously bound Vertex Buffer + * (at a 64B granularity) since the last invalidation. A VF cache + * invalidate is performed by setting the "VF Cache Invalidation Enable" + * bit in PIPE_CONTROL." + * + * This restriction first appears in the Skylake PRM but the internal docs + * also list it as being an issue on Broadwell. In order to avoid this + * problem, we align all vertex buffer allocations to 64 bytes. + */ uint32_t offset; - void *state = brw_state_batch(brw, AUB_TRACE_BLEND_STATE, - GENX(BLEND_STATE_length) * 4, 64, &offset); - GENX(BLEND_STATE_pack)(NULL, state, &blend); + void *data = brw_state_batch(brw, size, 64, &offset); + + *addr = (struct blorp_address) { + .buffer = brw->batch.state_bo, + .offset = offset, + +#if GEN_GEN == 10 + .mocs = CNL_MOCS_WB, +#elif GEN_GEN == 9 + .mocs = SKL_MOCS_WB, +#elif GEN_GEN == 8 + .mocs = BDW_MOCS_WB, +#elif GEN_GEN == 7 + .mocs = GEN7_MOCS_L3, +#endif + }; - return offset; + return data; } -static uint32_t -blorp_emit_color_calc_state(struct brw_context *brw, - const struct brw_blorp_params *params) +#if GEN_GEN >= 8 +static struct blorp_address +blorp_get_workaround_page(struct blorp_batch *batch) { - uint32_t offset; - void *state = brw_state_batch(brw, AUB_TRACE_CC_STATE, - GENX(COLOR_CALC_STATE_length) * 4, 64, &offset); - memset(state, 0, GENX(COLOR_CALC_STATE_length) * 4); - - return offset; -} + assert(batch->blorp->driver_ctx == batch->driver_batch); + struct brw_context *brw = batch->driver_batch; -static uint32_t -blorp_emit_depth_stencil_state(struct brw_context *brw, - const struct brw_blorp_params *params) -{ - /* See the following sections of the Sandy Bridge PRM, Volume 1, Part2: - * - 7.5.3.1 Depth Buffer Clear - * - 7.5.3.2 Depth Buffer Resolve - * - 7.5.3.3 Hierarchical Depth Buffer Resolve - */ - struct GENX(DEPTH_STENCIL_STATE) ds = { - .DepthBufferWriteEnable = true, + return (struct blorp_address) { + .buffer = brw->workaround_bo, }; - - if (params->hiz_op == GEN6_HIZ_OP_DEPTH_RESOLVE) { - ds.DepthTestEnable = true; - ds.DepthTestFunction = COMPAREFUNCTION_NEVER; - } - - uint32_t offset; - void *state = brw_state_batch(brw, AUB_TRACE_DEPTH_STENCIL_STATE, - GENX(DEPTH_STENCIL_STATE_length) * 4, 64, - &offset); - GENX(DEPTH_STENCIL_STATE_pack)(NULL, state, &ds); - - return offset; } +#endif static void -blorp_emit_surface_states(struct brw_context *brw, - const struct brw_blorp_params *params) +blorp_flush_range(struct blorp_batch *batch, void *start, size_t size) { - uint32_t bind_offset; - uint32_t *bind = - brw_state_batch(brw, AUB_TRACE_BINDING_TABLE, - sizeof(uint32_t) * BRW_BLORP_NUM_BINDING_TABLE_ENTRIES, - 32, /* alignment */ &bind_offset); - - bind[BRW_BLORP_RENDERBUFFER_BINDING_TABLE_INDEX] = - brw_blorp_emit_surface_state(brw, ¶ms->dst, - I915_GEM_DOMAIN_RENDER, - I915_GEM_DOMAIN_RENDER, true); - if (params->src.bo) { - bind[BRW_BLORP_TEXTURE_BINDING_TABLE_INDEX] = - brw_blorp_emit_surface_state(brw, ¶ms->src, - I915_GEM_DOMAIN_SAMPLER, 0, false); - } - - blorp_emit(brw, GENX(3DSTATE_BINDING_TABLE_POINTERS), bt) { - bt.PSBindingTableChange = true; - bt.PointertoPSBindingTable = bind_offset; - } + /* All allocated states come from the batch which we will flush before we + * submit it. There's nothing for us to do here. + */ } static void -blorp_emit_sampler_state(struct brw_context *brw, - const struct brw_blorp_params *params) +blorp_emit_urb_config(struct blorp_batch *batch, + unsigned vs_entry_size, unsigned sf_entry_size) { - struct GENX(SAMPLER_STATE) sampler = { - .MipModeFilter = MIPFILTER_NONE, - .MagModeFilter = MAPFILTER_LINEAR, - .MinModeFilter = MAPFILTER_LINEAR, - .MinLOD = 0, - .MaxLOD = 0, - .TCXAddressControlMode = TCM_CLAMP, - .TCYAddressControlMode = TCM_CLAMP, - .TCZAddressControlMode = TCM_CLAMP, - .MaximumAnisotropy = RATIO21, - .RAddressMinFilterRoundingEnable = true, - .RAddressMagFilterRoundingEnable = true, - .VAddressMinFilterRoundingEnable = true, - .VAddressMagFilterRoundingEnable = true, - .UAddressMinFilterRoundingEnable = true, - .UAddressMagFilterRoundingEnable = true, - .NonnormalizedCoordinateEnable = true, - }; - - uint32_t offset; - void *state = brw_state_batch(brw, AUB_TRACE_SAMPLER_STATE, - GENX(SAMPLER_STATE_length) * 4, 32, &offset); - GENX(SAMPLER_STATE_pack)(NULL, state, &sampler); - - blorp_emit(brw, GENX(3DSTATE_SAMPLER_STATE_POINTERS), ssp) { - ssp.VSSamplerStateChange = true; - ssp.GSSamplerStateChange = true; - ssp.PSSamplerStateChange = true; - ssp.PointertoPSSamplerState = offset; - } + assert(batch->blorp->driver_ctx == batch->driver_batch); + struct brw_context *brw = batch->driver_batch; + +#if GEN_GEN >= 7 + if (brw->urb.vsize >= vs_entry_size) + return; + + gen7_upload_urb(brw, vs_entry_size, false, false); +#elif GEN_GEN == 6 + gen6_upload_urb(brw, vs_entry_size, false, 0); +#else + /* We calculate it now and emit later. */ + brw_calculate_urb_fence(brw, 0, vs_entry_size, sf_entry_size); +#endif } -/* 3DSTATE_VIEWPORT_STATE_POINTERS */ -static void -blorp_emit_viewport_state(struct brw_context *brw, - const struct brw_blorp_params *params) +void +genX(blorp_exec)(struct blorp_batch *batch, + const struct blorp_params *params) { - uint32_t cc_vp_offset; - - void *state = brw_state_batch(brw, AUB_TRACE_CC_VP_STATE, - GENX(CC_VIEWPORT_length) * 4, 32, - &cc_vp_offset); - - GENX(CC_VIEWPORT_pack)(brw, state, - &(struct GENX(CC_VIEWPORT)) { - .MinimumDepth = 0.0, - .MaximumDepth = 1.0, - }); + assert(batch->blorp->driver_ctx == batch->driver_batch); + struct brw_context *brw = batch->driver_batch; + struct gl_context *ctx = &brw->ctx; + bool check_aperture_failed_once = false; + + /* Flush the sampler and render caches. We definitely need to flush the + * sampler cache so that we get updated contents from the render cache for + * the glBlitFramebuffer() source. Also, we are sometimes warned in the + * docs to flush the cache between reinterpretations of the same surface + * data with different formats, which blorp does for stencil and depth + * data. + */ + if (params->src.enabled) + brw_render_cache_set_check_flush(brw, params->src.addr.buffer); + brw_render_cache_set_check_flush(brw, params->dst.addr.buffer); - blorp_emit(brw, GENX(3DSTATE_VIEWPORT_STATE_POINTERS), vsp) { - vsp.CCViewportStateChange = true; - vsp.PointertoCC_VIEWPORT = cc_vp_offset; - } -} + brw_select_pipeline(brw, BRW_RENDER_PIPELINE); +retry: + intel_batchbuffer_require_space(brw, 1400, RENDER_RING); + brw_require_statebuffer_space(brw, 600); + intel_batchbuffer_save_state(brw); + brw->batch.no_wrap = true; -/** - * \brief Execute a blit or render pass operation. - * - * To execute the operation, this function manually constructs and emits a - * batch to draw a rectangle primitive. The batchbuffer is flushed before - * constructing and after emitting the batch. - * - * This function alters no GL state. - */ -void -genX(blorp_exec)(struct brw_context *brw, - const struct brw_blorp_params *params) -{ - uint32_t blend_state_offset = 0; - uint32_t color_calc_state_offset = 0; - uint32_t depth_stencil_state_offset; - +#if GEN_GEN == 6 /* Emit workaround flushes when we switch from drawing to blorping. */ brw_emit_post_sync_nonzero_flush(brw); +#endif brw_upload_state_base_address(brw); - gen6_blorp_emit_vertices(brw, params); +#if GEN_GEN >= 8 + gen7_l3_state.emit(brw); +#endif - /* 3DSTATE_URB - * - * Assign the entire URB to the VS. Even though the VS disabled, URB space - * is still needed because the clipper loads the VUE's from the URB. From - * the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE, - * Dword 1.15:0 "VS Number of URB Entries": - * This field is always used (even if VS Function Enable is DISABLED). - * - * The warning below appears in the PRM (Section 3DSTATE_URB), but we can - * safely ignore it because this batch contains only one draw call. - * Because of URB corruption caused by allocating a previous GS unit - * URB entry to the VS unit, software is required to send a “GS NULL - * Fence” (Send URB fence with VS URB size == 1 and GS URB size == 0) - * plus a dummy DRAW call before any case where VS will be taking over - * GS URB space. - */ - blorp_emit(brw, GENX(3DSTATE_URB), urb) { - urb.VSNumberofURBEntries = brw->urb.max_vs_entries; - } +#if GEN_GEN >= 6 + brw_emit_depth_stall_flushes(brw); +#endif - if (params->wm_prog_data) { - blend_state_offset = blorp_emit_blend_state(brw, params); - color_calc_state_offset = blorp_emit_color_calc_state(brw, params); - } - depth_stencil_state_offset = blorp_emit_depth_stencil_state(brw, params); +#if GEN_GEN == 8 + gen8_write_pma_stall_bits(brw, 0); +#endif - /* 3DSTATE_CC_STATE_POINTERS - * - * The pointer offsets are relative to - * CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress. - * - * The HiZ op doesn't use BLEND_STATE or COLOR_CALC_STATE. - */ - blorp_emit(brw, GENX(3DSTATE_CC_STATE_POINTERS), cc) { - cc.BLEND_STATEChange = true; - cc.COLOR_CALC_STATEChange = true; - cc.DEPTH_STENCIL_STATEChange = true; - cc.PointertoBLEND_STATE = blend_state_offset; - cc.PointertoCOLOR_CALC_STATE = color_calc_state_offset; - cc.PointertoDEPTH_STENCIL_STATE = depth_stencil_state_offset; + blorp_emit(batch, GENX(3DSTATE_DRAWING_RECTANGLE), rect) { + rect.ClippedDrawingRectangleXMax = MAX2(params->x1, params->x0) - 1; + rect.ClippedDrawingRectangleYMax = MAX2(params->y1, params->y0) - 1; } - blorp_emit(brw, GENX(3DSTATE_CONSTANT_VS), vs); - blorp_emit(brw, GENX(3DSTATE_CONSTANT_GS), gs); - blorp_emit(brw, GENX(3DSTATE_CONSTANT_PS), ps); + blorp_exec(batch, params); - if (params->wm_prog_data) - blorp_emit_surface_states(brw, params); + brw->batch.no_wrap = false; - if (params->src.bo) - blorp_emit_sampler_state(brw, params); - - gen6_emit_3dstate_multisample(brw, params->dst.surf.samples); - - blorp_emit(brw, GENX(3DSTATE_SAMPLE_MASK), mask) { - mask.SampleMask = (1 << params->dst.surf.samples) - 1; - } - - /* From the BSpec, 3D Pipeline > Geometry > Vertex Shader > State, - * 3DSTATE_VS, Dword 5.0 "VS Function Enable": - * - * [DevSNB] A pipeline flush must be programmed prior to a - * 3DSTATE_VS command that causes the VS Function Enable to - * toggle. Pipeline flush can be executed by sending a PIPE_CONTROL - * command with CS stall bit set and a post sync operation. - * - * We've already done one at the start of the BLORP operation. + /* Check if the blorp op we just did would make our batch likely to fail to + * map all the BOs into the GPU at batch exec time later. If so, flush the + * batch and try again with nothing else in the batch. */ - blorp_emit(brw, GENX(3DSTATE_VS), vs); - blorp_emit(brw, GENX(3DSTATE_GS), gs); - - blorp_emit(brw, GENX(3DSTATE_CLIP), clip) { - clip.PerspectiveDivideDisable = true; - } - - blorp_emit_sf_config(brw, params); - blorp_emit_wm_config(brw, params); - - blorp_emit_viewport_state(brw, params); - - if (params->depth.bo) { - blorp_emit_depth_stencil_config(brw, params); - } else { - brw_emit_depth_stall_flushes(brw); - - blorp_emit(brw, GENX(3DSTATE_DEPTH_BUFFER), db) { - db.SurfaceType = SURFTYPE_NULL; - db.SurfaceFormat = D32_FLOAT; + if (!brw_batch_has_aperture_space(brw, 0)) { + if (!check_aperture_failed_once) { + check_aperture_failed_once = true; + intel_batchbuffer_reset_to_saved(brw); + intel_batchbuffer_flush(brw); + goto retry; + } else { + int ret = intel_batchbuffer_flush(brw); + WARN_ONCE(ret == -ENOSPC, + "i965: blorp emit exceeded available aperture space\n"); } - blorp_emit(brw, GENX(3DSTATE_HIER_DEPTH_BUFFER), hiz); - blorp_emit(brw, GENX(3DSTATE_STENCIL_BUFFER), sb); } - /* 3DSTATE_CLEAR_PARAMS - * - * From the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE_CLEAR_PARAMS: - * [DevSNB] 3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE - * packet when HiZ is enabled and the DEPTH_BUFFER_STATE changes. - */ - blorp_emit(brw, GENX(3DSTATE_CLEAR_PARAMS), clear) { - clear.DepthClearValueValid = true; - clear.DepthClearValue = params->depth.clear_color.u32[0]; - } + if (unlikely(brw->always_flush_batch)) + intel_batchbuffer_flush(brw); - blorp_emit(brw, GENX(3DSTATE_DRAWING_RECTANGLE), rect) { - rect.ClippedDrawingRectangleXMax = MAX2(params->x1, params->x0) - 1; - rect.ClippedDrawingRectangleYMax = MAX2(params->y1, params->y0) - 1; - } - - blorp_emit(brw, GENX(3DPRIMITIVE), prim) { - prim.VertexAccessType = SEQUENTIAL; - prim.PrimitiveTopologyType = _3DPRIM_RECTLIST; - prim.VertexCountPerInstance = 3; - prim.InstanceCount = params->num_layers; - } + /* We've smashed all state compared to what the normal 3D pipeline + * rendering tracks for GL. + */ + brw->ctx.NewDriverState |= BRW_NEW_BLORP; + brw->no_depth_or_stencil = !params->depth.enabled && + !params->stencil.enabled; + brw->ib.index_size = -1; + + if (params->dst.enabled) + brw_render_cache_set_add_bo(brw, params->dst.addr.buffer); + if (params->depth.enabled) + brw_render_cache_set_add_bo(brw, params->depth.addr.buffer); + if (params->stencil.enabled) + brw_render_cache_set_add_bo(brw, params->stencil.addr.buffer); }