X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fintel%2Fblorp%2Fblorp_genX_exec.h;h=152f40d9338b76ac092be4275ff8c188bfca1b1c;hb=4bbc9c493f4b923516d9ef40b41a0dd7648fdb1e;hp=cea514e0cc59b5307ebd4e2a9af897581f0636c4;hpb=ca7ab1a6a5041783edfcc4c181e7341d75dc98f7;p=mesa.git diff --git a/src/intel/blorp/blorp_genX_exec.h b/src/intel/blorp/blorp_genX_exec.h index cea514e0cc5..152f40d9338 100644 --- a/src/intel/blorp/blorp_genX_exec.h +++ b/src/intel/blorp/blorp_genX_exec.h @@ -25,7 +25,7 @@ #define BLORP_GENX_EXEC_H #include "blorp_priv.h" -#include "common/gen_device_info.h" +#include "dev/gen_device_info.h" #include "common/gen_sample_positions.h" #include "genxml/gen_macros.h" @@ -59,6 +59,10 @@ blorp_alloc_dynamic_state(struct blorp_batch *batch, static void * blorp_alloc_vertex_buffer(struct blorp_batch *batch, uint32_t size, struct blorp_address *addr); +static void +blorp_vf_invalidate_for_vb_48b_transitions(struct blorp_batch *batch, + const struct blorp_address *addrs, + unsigned num_vbs); #if GEN_GEN >= 8 static struct blorp_address @@ -78,7 +82,11 @@ static void blorp_surface_reloc(struct blorp_batch *batch, uint32_t ss_offset, struct blorp_address address, uint32_t delta); -#if GEN_GEN >= 7 +static uint64_t +blorp_get_surface_address(struct blorp_batch *batch, + struct blorp_address address); + +#if GEN_GEN >= 7 && GEN_GEN < 10 static struct blorp_address blorp_get_surface_base_address(struct blorp_batch *batch); #endif @@ -200,6 +208,14 @@ emit_urb_config(struct blorp_batch *batch, blorp_emit_urb_config(batch, vs_entry_size, sf_entry_size); } +#if GEN_GEN >= 7 +static void +blorp_emit_memcpy(struct blorp_batch *batch, + struct blorp_address dst, + struct blorp_address src, + uint32_t size); +#endif + static void blorp_emit_vertex_data(struct blorp_batch *batch, const struct blorp_params *params, @@ -260,6 +276,31 @@ blorp_emit_input_varying_data(struct blorp_batch *batch, } blorp_flush_range(batch, data, *size); + + if (params->dst_clear_color_as_input) { +#if GEN_GEN >= 7 + /* In this case, the clear color isn't known statically and instead + * comes in through an indirect which we have to copy into the vertex + * buffer before we execute the 3DPRIMITIVE. We already copied the + * value of params->wm_inputs.clear_color into the vertex buffer in the + * loop above. Now we emit code to stomp it from the GPU with the + * actual clear color value. + */ + assert(num_varyings == 1); + + /* The clear color is the first thing after the header */ + struct blorp_address clear_color_input_addr = *addr; + clear_color_input_addr.offset += 16; + + const unsigned clear_color_size = + GEN_GEN < 10 ? batch->blorp->isl_dev->ss.clear_value_size : 4 * 4; + blorp_emit_memcpy(batch, clear_color_input_addr, + params->dst.clear_color_addr, + clear_color_size); +#else + unreachable("MCS partial resolve is not a thing on SNB and earlier"); +#endif + } } static void @@ -274,7 +315,7 @@ blorp_fill_vertex_buffer_state(struct blorp_batch *batch, vb[idx].BufferPitch = stride; #if GEN_GEN >= 6 - vb[idx].VertexBufferMOCS = addr.mocs; + vb[idx].MOCS = addr.mocs; #endif #if GEN_GEN >= 7 @@ -297,23 +338,27 @@ static void blorp_emit_vertex_buffers(struct blorp_batch *batch, const struct blorp_params *params) { - struct GENX(VERTEX_BUFFER_STATE) vb[2]; + struct GENX(VERTEX_BUFFER_STATE) vb[3]; + uint32_t num_vbs = 2; memset(vb, 0, sizeof(vb)); - struct blorp_address addr; + struct blorp_address addrs[2] = {}; uint32_t size; - blorp_emit_vertex_data(batch, params, &addr, &size); - blorp_fill_vertex_buffer_state(batch, vb, 0, addr, size, 3 * sizeof(float)); + blorp_emit_vertex_data(batch, params, &addrs[0], &size); + blorp_fill_vertex_buffer_state(batch, vb, 0, addrs[0], size, + 3 * sizeof(float)); + + blorp_emit_input_varying_data(batch, params, &addrs[1], &size); + blorp_fill_vertex_buffer_state(batch, vb, 1, addrs[1], size, 0); - blorp_emit_input_varying_data(batch, params, &addr, &size); - blorp_fill_vertex_buffer_state(batch, vb, 1, addr, size, 0); + blorp_vf_invalidate_for_vb_48b_transitions(batch, addrs, num_vbs); - const unsigned num_dwords = 1 + GENX(VERTEX_BUFFER_STATE_length) * 2; + const unsigned num_dwords = 1 + num_vbs * GENX(VERTEX_BUFFER_STATE_length); uint32_t *dw = blorp_emitn(batch, GENX(3DSTATE_VERTEX_BUFFERS), num_dwords); if (!dw) return; - for (unsigned i = 0; i < 2; i++) { + for (unsigned i = 0; i < num_vbs; i++) { GENX(VERTEX_BUFFER_STATE_pack)(batch, dw, &vb[i]); dw += GENX(VERTEX_BUFFER_STATE_length); } @@ -382,7 +427,7 @@ blorp_emit_vertex_elements(struct blorp_batch *batch, ve[slot] = (struct GENX(VERTEX_ELEMENT_STATE)) { .VertexBufferIndex = 1, .Valid = true, - .SourceElementFormat = (enum GENX(SURFACE_FORMAT)) ISL_FORMAT_R32G32B32A32_FLOAT, + .SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT, .SourceElementOffset = 0, .Component0Control = VFCOMP_STORE_SRC, @@ -414,7 +459,7 @@ blorp_emit_vertex_elements(struct blorp_batch *batch, ve[slot] = (struct GENX(VERTEX_ELEMENT_STATE)) { .VertexBufferIndex = 0, .Valid = true, - .SourceElementFormat = (enum GENX(SURFACE_FORMAT)) ISL_FORMAT_R32G32B32_FLOAT, + .SourceElementFormat = ISL_FORMAT_R32G32B32_FLOAT, .SourceElementOffset = 0, .Component0Control = VFCOMP_STORE_SRC, .Component1Control = VFCOMP_STORE_SRC, @@ -428,7 +473,7 @@ blorp_emit_vertex_elements(struct blorp_batch *batch, ve[slot] = (struct GENX(VERTEX_ELEMENT_STATE)) { .VertexBufferIndex = 0, .Valid = true, - .SourceElementFormat = (enum GENX(SURFACE_FORMAT)) ISL_FORMAT_R32G32B32_FLOAT, + .SourceElementFormat = ISL_FORMAT_R32G32B32_FLOAT, .SourceElementOffset = 0, .Component0Control = VFCOMP_STORE_SRC, .Component1Control = VFCOMP_STORE_SRC, @@ -444,7 +489,7 @@ blorp_emit_vertex_elements(struct blorp_batch *batch, ve[slot] = (struct GENX(VERTEX_ELEMENT_STATE)) { .VertexBufferIndex = 1, .Valid = true, - .SourceElementFormat = (enum GENX(SURFACE_FORMAT)) ISL_FORMAT_R32G32B32A32_FLOAT, + .SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT, .SourceElementOffset = 16 + i * 4 * sizeof(float), .Component0Control = VFCOMP_STORE_SRC, .Component1Control = VFCOMP_STORE_SRC, @@ -493,8 +538,7 @@ blorp_emit_vertex_elements(struct blorp_batch *batch, /* 3DSTATE_VIEWPORT_STATE_POINTERS */ static uint32_t -blorp_emit_cc_viewport(struct blorp_batch *batch, - const struct blorp_params *params) +blorp_emit_cc_viewport(struct blorp_batch *batch) { uint32_t cc_vp_offset; blorp_emit_dynamic(batch, GENX(CC_VIEWPORT), vp, 32, &cc_vp_offset) { @@ -517,8 +561,7 @@ blorp_emit_cc_viewport(struct blorp_batch *batch, } static uint32_t -blorp_emit_sampler_state(struct blorp_batch *batch, - const struct blorp_params *params) +blorp_emit_sampler_state(struct blorp_batch *batch) { uint32_t offset; blorp_emit_dynamic(batch, GENX(SAMPLER_STATE), sampler, 32, &offset) { @@ -723,18 +766,45 @@ blorp_emit_ps_config(struct blorp_batch *batch, ps.BindingTableEntryCount = 1; } - if (prog_data) { - ps.DispatchGRFStartRegisterForConstantSetupData0 = - prog_data->base.dispatch_grf_start_reg; - ps.DispatchGRFStartRegisterForConstantSetupData2 = - prog_data->dispatch_grf_start_reg_2; + /* Gen 11 workarounds table #2056 WABTPPrefetchDisable suggests to + * disable prefetching of binding tables on A0 and B0 steppings. + * TODO: Revisit this WA on C0 stepping. + */ + if (GEN_GEN == 11) + ps.BindingTableEntryCount = 0; + if (prog_data) { ps._8PixelDispatchEnable = prog_data->dispatch_8; ps._16PixelDispatchEnable = prog_data->dispatch_16; + ps._32PixelDispatchEnable = prog_data->dispatch_32; + + /* From the Sky Lake PRM 3DSTATE_PS::32 Pixel Dispatch Enable: + * + * "When NUM_MULTISAMPLES = 16 or FORCE_SAMPLE_COUNT = 16, SIMD32 + * Dispatch must not be enabled for PER_PIXEL dispatch mode." + * + * Since 16x MSAA is first introduced on SKL, we don't need to apply + * the workaround on any older hardware. + */ + if (GEN_GEN >= 9 && !prog_data->persample_dispatch && + params->num_samples == 16) { + assert(ps._8PixelDispatchEnable || ps._16PixelDispatchEnable); + ps._32PixelDispatchEnable = false; + } - ps.KernelStartPointer0 = params->wm_prog_kernel; - ps.KernelStartPointer2 = - params->wm_prog_kernel + prog_data->prog_offset_2; + ps.DispatchGRFStartRegisterForConstantSetupData0 = + brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 0); + ps.DispatchGRFStartRegisterForConstantSetupData1 = + brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 1); + ps.DispatchGRFStartRegisterForConstantSetupData2 = + brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 2); + + ps.KernelStartPointer0 = params->wm_prog_kernel + + brw_wm_prog_data_prog_offset(prog_data, ps, 0); + ps.KernelStartPointer1 = params->wm_prog_kernel + + brw_wm_prog_data_prog_offset(prog_data, ps, 1); + ps.KernelStartPointer2 = params->wm_prog_kernel + + brw_wm_prog_data_prog_offset(prog_data, ps, 2); } /* 3DSTATE_PS expects the number of threads per PSD, which is always 64 @@ -828,17 +898,23 @@ blorp_emit_ps_config(struct blorp_batch *batch, #endif if (prog_data) { + ps._8PixelDispatchEnable = prog_data->dispatch_8; + ps._16PixelDispatchEnable = prog_data->dispatch_16; + ps._32PixelDispatchEnable = prog_data->dispatch_32; + ps.DispatchGRFStartRegisterForConstantSetupData0 = - prog_data->base.dispatch_grf_start_reg; + brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 0); + ps.DispatchGRFStartRegisterForConstantSetupData1 = + brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 1); ps.DispatchGRFStartRegisterForConstantSetupData2 = - prog_data->dispatch_grf_start_reg_2; + brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 2); - ps.KernelStartPointer0 = params->wm_prog_kernel; - ps.KernelStartPointer2 = - params->wm_prog_kernel + prog_data->prog_offset_2; - - ps._8PixelDispatchEnable = prog_data->dispatch_8; - ps._16PixelDispatchEnable = prog_data->dispatch_16; + ps.KernelStartPointer0 = params->wm_prog_kernel + + brw_wm_prog_data_prog_offset(prog_data, ps, 0); + ps.KernelStartPointer1 = params->wm_prog_kernel + + brw_wm_prog_data_prog_offset(prog_data, ps, 1); + ps.KernelStartPointer2 = params->wm_prog_kernel + + brw_wm_prog_data_prog_offset(prog_data, ps, 2); ps.AttributeEnable = prog_data->num_varying_inputs > 0; } else { @@ -890,17 +966,23 @@ blorp_emit_ps_config(struct blorp_batch *batch, if (prog_data) { wm.ThreadDispatchEnable = true; + wm._8PixelDispatchEnable = prog_data->dispatch_8; + wm._16PixelDispatchEnable = prog_data->dispatch_16; + wm._32PixelDispatchEnable = prog_data->dispatch_32; + wm.DispatchGRFStartRegisterForConstantSetupData0 = - prog_data->base.dispatch_grf_start_reg; + brw_wm_prog_data_dispatch_grf_start_reg(prog_data, wm, 0); + wm.DispatchGRFStartRegisterForConstantSetupData1 = + brw_wm_prog_data_dispatch_grf_start_reg(prog_data, wm, 1); wm.DispatchGRFStartRegisterForConstantSetupData2 = - prog_data->dispatch_grf_start_reg_2; - - wm.KernelStartPointer0 = params->wm_prog_kernel; - wm.KernelStartPointer2 = - params->wm_prog_kernel + prog_data->prog_offset_2; + brw_wm_prog_data_dispatch_grf_start_reg(prog_data, wm, 2); - wm._8PixelDispatchEnable = prog_data->dispatch_8; - wm._16PixelDispatchEnable = prog_data->dispatch_16; + wm.KernelStartPointer0 = params->wm_prog_kernel + + brw_wm_prog_data_prog_offset(prog_data, wm, 0); + wm.KernelStartPointer1 = params->wm_prog_kernel + + brw_wm_prog_data_prog_offset(prog_data, wm, 1); + wm.KernelStartPointer2 = params->wm_prog_kernel + + brw_wm_prog_data_prog_offset(prog_data, wm, 2); wm.NumberofSFOutputAttributes = prog_data->num_varying_inputs; } @@ -977,7 +1059,7 @@ blorp_emit_blend_state(struct blorp_batch *batch, static uint32_t blorp_emit_color_calc_state(struct blorp_batch *batch, - const struct blorp_params *params) + MAYBE_UNUSED const struct blorp_params *params) { uint32_t offset; blorp_emit_dynamic(batch, GENX(COLOR_CALC_STATE), cc, 64, &offset) { @@ -1168,7 +1250,7 @@ blorp_emit_pipeline(struct blorp_batch *batch, blorp_emit(batch, GENX(3DSTATE_CONSTANT_PS), ps); if (params->src.enabled) - blorp_emit_sampler_state(batch, params); + blorp_emit_sampler_state(batch); blorp_emit_3dstate_multisample(batch, params); @@ -1202,14 +1284,14 @@ blorp_emit_pipeline(struct blorp_batch *batch, blorp_emit_sf_config(batch, params); blorp_emit_ps_config(batch, params); - blorp_emit_cc_viewport(batch, params); + blorp_emit_cc_viewport(batch); } /******** This is the end of the pipeline setup code ********/ #endif /* GEN_GEN >= 6 */ -#if GEN_GEN >= 7 && GEN_GEN <= 10 +#if GEN_GEN >= 7 static void blorp_emit_memcpy(struct blorp_batch *batch, struct blorp_address dst, @@ -1248,6 +1330,7 @@ blorp_emit_memcpy(struct blorp_batch *batch, static void blorp_emit_surface_state(struct blorp_batch *batch, const struct brw_blorp_surface_info *surface, + enum isl_aux_op aux_op, void *state, uint32_t state_offset, const bool color_write_disables[4], bool is_render_target) @@ -1278,11 +1361,22 @@ blorp_emit_surface_state(struct blorp_batch *batch, write_disable_mask |= ISL_CHANNEL_ALPHA_BIT; } + const bool use_clear_address = + GEN_GEN >= 10 && (surface->clear_color_addr.buffer != NULL); + isl_surf_fill_state(batch->blorp->isl_dev, state, .surf = &surf, .view = &surface->view, .aux_surf = &surface->aux_surf, .aux_usage = aux_usage, + .address = + blorp_get_surface_address(batch, surface->addr), + .aux_address = aux_usage == ISL_AUX_USAGE_NONE ? 0 : + blorp_get_surface_address(batch, surface->aux_addr), + .clear_address = !use_clear_address ? 0 : + blorp_get_surface_address(batch, + surface->clear_color_addr), .mocs = surface->addr.mocs, .clear_color = surface->clear_color, + .use_clear_address = use_clear_address, .write_disables = write_disable_mask); blorp_surface_reloc(batch, state_offset + isl_dev->ss.addr_offset, @@ -1299,20 +1393,29 @@ blorp_emit_surface_state(struct blorp_batch *batch, surface->aux_addr, *aux_addr); } - blorp_flush_range(batch, state, GENX(RENDER_SURFACE_STATE_length) * 4); - - if (surface->clear_color_addr.buffer) { -#if GEN_GEN > 10 - unreachable("Implement indirect clear support on gen11+"); -#elif GEN_GEN >= 7 && GEN_GEN <= 10 - struct blorp_address dst_addr = blorp_get_surface_base_address(batch); - dst_addr.offset += state_offset + isl_dev->ss.clear_value_offset; - blorp_emit_memcpy(batch, dst_addr, surface->clear_color_addr, - isl_dev->ss.clear_value_size); + if (aux_usage != ISL_AUX_USAGE_NONE && surface->clear_color_addr.buffer) { +#if GEN_GEN >= 10 + assert((surface->clear_color_addr.offset & 0x3f) == 0); + uint32_t *clear_addr = state + isl_dev->ss.clear_color_state_offset; + blorp_surface_reloc(batch, state_offset + + isl_dev->ss.clear_color_state_offset, + surface->clear_color_addr, *clear_addr); +#elif GEN_GEN >= 7 + /* Fast clears just whack the AUX surface and don't actually use the + * clear color for anything. We can avoid the MI memcpy on that case. + */ + if (aux_op != ISL_AUX_OP_FAST_CLEAR) { + struct blorp_address dst_addr = blorp_get_surface_base_address(batch); + dst_addr.offset += state_offset + isl_dev->ss.clear_value_offset; + blorp_emit_memcpy(batch, dst_addr, surface->clear_color_addr, + isl_dev->ss.clear_value_size); + } #else unreachable("Fast clears are only supported on gen7+"); #endif } + + blorp_flush_range(batch, state, GENX(RENDER_SURFACE_STATE_length) * 4); } static void @@ -1322,7 +1425,7 @@ blorp_emit_null_surface_state(struct blorp_batch *batch, { struct GENX(RENDER_SURFACE_STATE) ss = { .SurfaceType = SURFTYPE_NULL, - .SurfaceFormat = (enum GENX(SURFACE_FORMAT)) ISL_FORMAT_R8G8B8A8_UNORM, + .SurfaceFormat = ISL_FORMAT_R8G8B8A8_UNORM, .Width = surface->surf.logical_level0_px.width - 1, .Height = surface->surf.logical_level0_px.height - 1, .MIPCountLOD = surface->view.base_level, @@ -1354,7 +1457,7 @@ blorp_emit_surface_states(struct blorp_batch *batch, const struct blorp_params *params) { const struct isl_device *isl_dev = batch->blorp->isl_dev; - uint32_t bind_offset, surface_offsets[2]; + uint32_t bind_offset = 0, surface_offsets[2]; void *surface_maps[2]; MAYBE_UNUSED bool has_indirect_clear_color = false; @@ -1368,6 +1471,7 @@ blorp_emit_surface_states(struct blorp_batch *batch, if (params->dst.enabled) { blorp_emit_surface_state(batch, ¶ms->dst, + params->fast_clear_op, surface_maps[BLORP_RENDERBUFFER_BT_INDEX], surface_offsets[BLORP_RENDERBUFFER_BT_INDEX], params->color_write_disable, true); @@ -1383,6 +1487,7 @@ blorp_emit_surface_states(struct blorp_batch *batch, if (params->src.enabled) { blorp_emit_surface_state(batch, ¶ms->src, + params->fast_clear_op, surface_maps[BLORP_TEXTURE_BT_INDEX], surface_offsets[BLORP_TEXTURE_BT_INDEX], NULL, false); @@ -1537,6 +1642,29 @@ blorp_emit_gen8_hiz_op(struct blorp_batch *batch, */ blorp_emit_3dstate_multisample(batch, params); + /* From the BDW PRM Volume 7, Depth Buffer Clear: + * + * The clear value must be between the min and max depth values + * (inclusive) defined in the CC_VIEWPORT. If the depth buffer format is + * D32_FLOAT, then +/-DENORM values are also allowed. + * + * Set the bounds to match our hardware limits, [0.0, 1.0]. + */ + if (params->depth.enabled && params->hiz_op == ISL_AUX_OP_FAST_CLEAR) { + assert(params->depth.clear_color.f32[0] >= 0.0f); + assert(params->depth.clear_color.f32[0] <= 1.0f); + blorp_emit_cc_viewport(batch); + } + + /* According to the SKL PRM formula for WM_INT::ThreadDispatchEnable, the + * 3DSTATE_WM::ForceThreadDispatchEnable field can force WM thread dispatch + * even when WM_HZ_OP is active. However, WM thread dispatch is normally + * disabled for HiZ ops and it appears that force-enabling it can lead to + * GPU hangs on at least Skylake. Since we don't know the current state of + * the 3DSTATE_WM packet, just emit a dummy one prior to 3DSTATE_WM_HZ_OP. + */ + blorp_emit(batch, GENX(3DSTATE_WM), wm); + /* If we can't alter the depth stencil config and multiple layers are * involved, the HiZ op will fail. This is because the op requires that a * new config is emitted for each additional layer. @@ -1595,6 +1723,51 @@ blorp_emit_gen8_hiz_op(struct blorp_batch *batch, } #endif +static void +blorp_update_clear_color(struct blorp_batch *batch, + const struct brw_blorp_surface_info *info, + enum isl_aux_op op) +{ + if (info->clear_color_addr.buffer && op == ISL_AUX_OP_FAST_CLEAR) { +#if GEN_GEN >= 9 + for (int i = 0; i < 4; i++) { + blorp_emit(batch, GENX(MI_STORE_DATA_IMM), sdi) { + sdi.Address = info->clear_color_addr; + sdi.Address.offset += i * 4; + sdi.ImmediateData = info->clear_color.u32[i]; + } + } +#elif GEN_GEN >= 7 + blorp_emit(batch, GENX(MI_STORE_DATA_IMM), sdi) { + sdi.Address = info->clear_color_addr; + sdi.ImmediateData = ISL_CHANNEL_SELECT_RED << 25 | + ISL_CHANNEL_SELECT_GREEN << 22 | + ISL_CHANNEL_SELECT_BLUE << 19 | + ISL_CHANNEL_SELECT_ALPHA << 16; + if (isl_format_has_int_channel(info->view.format)) { + for (unsigned i = 0; i < 4; i++) { + assert(info->clear_color.u32[i] == 0 || + info->clear_color.u32[i] == 1); + } + sdi.ImmediateData |= (info->clear_color.u32[0] != 0) << 31; + sdi.ImmediateData |= (info->clear_color.u32[1] != 0) << 30; + sdi.ImmediateData |= (info->clear_color.u32[2] != 0) << 29; + sdi.ImmediateData |= (info->clear_color.u32[3] != 0) << 28; + } else { + for (unsigned i = 0; i < 4; i++) { + assert(info->clear_color.f32[i] == 0.0f || + info->clear_color.f32[i] == 1.0f); + } + sdi.ImmediateData |= (info->clear_color.f32[0] != 0.0f) << 31; + sdi.ImmediateData |= (info->clear_color.f32[1] != 0.0f) << 30; + sdi.ImmediateData |= (info->clear_color.f32[2] != 0.0f) << 29; + sdi.ImmediateData |= (info->clear_color.f32[3] != 0.0f) << 28; + } + } +#endif + } +} + /** * \brief Execute a blit or render pass operation. * @@ -1607,6 +1780,11 @@ blorp_emit_gen8_hiz_op(struct blorp_batch *batch, static void blorp_exec(struct blorp_batch *batch, const struct blorp_params *params) { + if (!(batch->flags & BLORP_BATCH_NO_UPDATE_CLEAR_COLOR)) { + blorp_update_clear_color(batch, ¶ms->dst, params->fast_clear_op); + blorp_update_clear_color(batch, ¶ms->depth, params->hiz_op); + } + #if GEN_GEN >= 8 if (params->hiz_op != ISL_AUX_OP_NONE) { blorp_emit_gen8_hiz_op(batch, params);