From: Francisco Jerez Date: Wed, 6 May 2020 22:40:30 +0000 (-0700) Subject: OPTIONAL: iris: Perform BLORP buffer barriers outside of iris_blorp_exec() hook. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=8252bb0ec6d429b09d944826d1ddbead69387f0f;p=mesa.git OPTIONAL: iris: Perform BLORP buffer barriers outside of iris_blorp_exec() hook. The iris_blorp_exec() hook needs to be executed under a single indivisible sync region, which means that in cases where we need to emit a PIPE_CONTROL for a buffer barrier we won't be able to track the subsequent commands separately from the previous commands, which will prevent us from optimizing out subsequent PIPE_CONTROLs if we encounter the same buffers again. In particular I've encountered this situation in some SynMark test-cases which perform lots of BLORP operations with the same buffer bound as both source and destination (in order to generate mipmaps): In such a scenario if the source requires flushing we'd also end up flushing for the destination redundantly, even though a single PIPE_CONTROL would have been sufficient. This avoids a 4.5% FPS regression in SynMark OglHdrBloom and a 3.5% FPS regression in SynMark OglMultithread. Reviewed-by: Kenneth Graunke Part-of: --- diff --git a/src/gallium/drivers/iris/iris_blit.c b/src/gallium/drivers/iris/iris_blit.c index 7ba297e2f89..40e1dcac9e7 100644 --- a/src/gallium/drivers/iris/iris_blit.c +++ b/src/gallium/drivers/iris/iris_blit.c @@ -382,6 +382,7 @@ iris_blit(struct pipe_context *ctx, const struct pipe_blit_info *info) iris_resource_prepare_access(ice, src_res, info->src.level, 1, info->src.box.z, info->src.box.depth, src_aux_usage, src_clear_supported); + iris_emit_buffer_barrier_for(batch, src_res->bo, IRIS_DOMAIN_OTHER_READ); struct iris_format_info dst_fmt = iris_format_for_usage(devinfo, info->dst.format, @@ -401,6 +402,7 @@ iris_blit(struct pipe_context *ctx, const struct pipe_blit_info *info) iris_resource_prepare_access(ice, dst_res, info->dst.level, 1, info->dst.box.z, info->dst.box.depth, dst_aux_usage, dst_clear_supported); + iris_emit_buffer_barrier_for(batch, dst_res->bo, IRIS_DOMAIN_RENDER_WRITE); float src_x0 = info->src.box.x; float src_x1 = info->src.box.x + info->src.box.width; @@ -527,9 +529,11 @@ iris_blit(struct pipe_context *ctx, const struct pipe_blit_info *info) iris_resource_prepare_access(ice, src_res, info->src.level, 1, info->src.box.z, info->src.box.depth, stc_src_aux_usage, false); + iris_emit_buffer_barrier_for(batch, src_res->bo, IRIS_DOMAIN_OTHER_READ); iris_resource_prepare_access(ice, stc_dst, info->dst.level, 1, info->dst.box.z, info->dst.box.depth, stc_dst_aux_usage, false); + iris_emit_buffer_barrier_for(batch, stc_dst->bo, IRIS_DOMAIN_RENDER_WRITE); iris_blorp_surf_for_resource(&screen->isl_dev, &src_surf, &src_res->base, stc_src_aux_usage, info->src.level, false); @@ -664,6 +668,11 @@ iris_copy_region(struct blorp_context *blorp, .reloc_flags = EXEC_OBJECT_WRITE, }; + iris_emit_buffer_barrier_for(batch, iris_resource_bo(src), + IRIS_DOMAIN_OTHER_READ); + iris_emit_buffer_barrier_for(batch, iris_resource_bo(dst), + IRIS_DOMAIN_RENDER_WRITE); + iris_batch_maybe_flush(batch, 1500); iris_batch_sync_region_start(batch); @@ -687,6 +696,11 @@ iris_copy_region(struct blorp_context *blorp, dstz, src_box->depth, dst_aux_usage, dst_clear_supported); + iris_emit_buffer_barrier_for(batch, iris_resource_bo(src), + IRIS_DOMAIN_OTHER_READ); + iris_emit_buffer_barrier_for(batch, iris_resource_bo(dst), + IRIS_DOMAIN_RENDER_WRITE); + blorp_batch_init(&ice->blorp, &blorp_batch, batch, 0); for (int slice = 0; slice < src_box->depth; slice++) { diff --git a/src/gallium/drivers/iris/iris_blorp.c b/src/gallium/drivers/iris/iris_blorp.c index 4c3f98eb6b2..34f40f7fa61 100644 --- a/src/gallium/drivers/iris/iris_blorp.c +++ b/src/gallium/drivers/iris/iris_blorp.c @@ -274,27 +274,17 @@ iris_blorp_exec(struct blorp_batch *blorp_batch, PIPE_CONTROL_STALL_AT_SCOREBOARD); #endif - /* Flush the sampler and render caches. We definitely need to flush the - * sampler cache so that we get updated contents from the render cache for - * the glBlitFramebuffer() source. Also, we are sometimes warned in the - * docs to flush the cache between reinterpretations of the same surface - * data with different formats, which blorp does for stencil and depth - * data. + /* Flush the render cache in cases where the same surface is reinterpreted + * with a differernt format, which blorp does for stencil and depth data + * among other things. Invalidation of sampler caches and flushing of any + * caches which had previously written the source surfaces should already + * have been handled by the caller. */ - if (params->src.enabled) - iris_emit_buffer_barrier_for(batch, params->src.addr.buffer, - IRIS_DOMAIN_OTHER_READ); if (params->dst.enabled) { iris_cache_flush_for_render(batch, params->dst.addr.buffer, params->dst.view.format, params->dst.aux_usage); } - if (params->depth.enabled) - iris_emit_buffer_barrier_for(batch, params->depth.addr.buffer, - IRIS_DOMAIN_DEPTH_WRITE); - if (params->stencil.enabled) - iris_emit_buffer_barrier_for(batch, params->stencil.addr.buffer, - IRIS_DOMAIN_DEPTH_WRITE); iris_require_command_space(batch, 1400); diff --git a/src/gallium/drivers/iris/iris_clear.c b/src/gallium/drivers/iris/iris_clear.c index 6d4f579ebf0..8fb95c8f790 100644 --- a/src/gallium/drivers/iris/iris_clear.c +++ b/src/gallium/drivers/iris/iris_clear.c @@ -378,6 +378,7 @@ clear_color(struct iris_context *ice, iris_resource_prepare_render(ice, batch, res, level, box->z, box->depth, aux_usage); + iris_emit_buffer_barrier_for(batch, res->bo, IRIS_DOMAIN_RENDER_WRITE); struct blorp_surf surf; iris_blorp_surf_for_resource(&batch->screen->isl_dev, &surf, @@ -596,6 +597,7 @@ clear_depth_stencil(struct iris_context *ice, if (clear_depth && z_res) { iris_resource_prepare_depth(ice, batch, z_res, level, box->z, box->depth); + iris_emit_buffer_barrier_for(batch, z_res->bo, IRIS_DOMAIN_DEPTH_WRITE); iris_blorp_surf_for_resource(&batch->screen->isl_dev, &z_surf, &z_res->base, z_res->aux.usage, level, true); @@ -605,6 +607,8 @@ clear_depth_stencil(struct iris_context *ice, if (stencil_mask) { iris_resource_prepare_access(ice, stencil_res, level, 1, box->z, box->depth, stencil_res->aux.usage, false); + iris_emit_buffer_barrier_for(batch, stencil_res->bo, + IRIS_DOMAIN_DEPTH_WRITE); iris_blorp_surf_for_resource(&batch->screen->isl_dev, &stencil_surf, &stencil_res->base, stencil_res->aux.usage, level, true); diff --git a/src/gallium/drivers/iris/iris_resolve.c b/src/gallium/drivers/iris/iris_resolve.c index 71530f4b311..0350e58a225 100644 --- a/src/gallium/drivers/iris/iris_resolve.c +++ b/src/gallium/drivers/iris/iris_resolve.c @@ -450,6 +450,7 @@ iris_mcs_partial_resolve(struct iris_context *ice, struct blorp_surf surf; iris_blorp_surf_for_resource(&batch->screen->isl_dev, &surf, &res->base, res->aux.usage, 0, true); + iris_emit_buffer_barrier_for(batch, res->bo, IRIS_DOMAIN_RENDER_WRITE); struct blorp_batch blorp_batch; iris_batch_sync_region_start(batch);