From c10896b593720457e99a731e9493ce8d6c497fab Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 4 Mar 2014 15:52:05 -0800 Subject: [PATCH] i965: Fix render-to-texture in non-FinishRenderTexture cases. We've had several problems now with FinishRenderTexture not getting called enough, and we're ready to just give up on it ever doing what we need. In particular, an upcoming Steam title had rendering bugs that could be fixed by always_flush_cache=true. Instead of hoping Mesa core can figure out when we need to flush our caches, just track what BOs we've rendered to in a set, and when we render from a BO in that set, emit a flush and clear the set. There's some overhead to keeping this set, but most of that is just hashing the pointer -- it turns out our set never even gets very large, because cache flushes are so common (even on cairo-gl). No statistically significant performance difference in cairo-gl (n=100), despite spending ~.5% CPU in these set operations. v1: (Original patch by Eric Anholt.) v2: (Changes by Ken Graunke.) - Rebase forward from May 7th 2013 -> March 4th 2014. - Drop the FinishRenderTexture hook entirely; after rebasing the patch, the hook was just an empty function. - Move the brw_render_cache_set_clear() call from intel_batchbuffer_emit_flush() to brw_emit_pipe_control_flush(). In theory, this could catch more cases where we've flushed. - Consider stencil as a possible texturing source. v3: (changes by anholt): - Move set_clear() back to emit_mi_flush() -- it means we can drop more forced flushes from the code. In the previous location, it wouldn't have been called when we wanted pre-gen6. - Move the set clear from batch init to reset -- it should be empty at the start of every batch, since the kernel handled any inter-batch flush for us. v4: Drop the debug code in set.c that I accidentally committed. Signed-off-by: Eric Anholt Signed-off-by: Kenneth Graunke Reviewed-by: Kenneth Graunke Tested-by: Dylan Baker [v2] --- src/mesa/drivers/dri/i965/brw_context.c | 4 +- src/mesa/drivers/dri/i965/brw_context.h | 7 +++ src/mesa/drivers/dri/i965/brw_draw.c | 30 ++++++++-- src/mesa/drivers/dri/i965/brw_misc_state.c | 5 ++ src/mesa/drivers/dri/i965/intel_batchbuffer.c | 4 ++ src/mesa/drivers/dri/i965/intel_fbo.c | 60 +++++++++++++------ src/mesa/drivers/dri/i965/intel_fbo.h | 4 ++ 7 files changed, 87 insertions(+), 27 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 1441b464091..026b3aa7551 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -698,6 +698,8 @@ brwCreateContext(gl_api api, /* Reinitialize the context point state. It depends on ctx->Const values. */ _mesa_init_point(ctx); + intel_fbo_init(brw); + intel_batchbuffer_init(brw); if (brw->gen >= 6) { @@ -721,8 +723,6 @@ brwCreateContext(gl_api api, intelInitExtensions(ctx); - intel_fbo_init(brw); - brw_init_surface_formats(brw); if (brw->is_g4x || brw->gen >= 5) { diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index dbb30f221aa..734d273a96c 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -1029,6 +1029,13 @@ struct brw_context drm_intel_context *hw_ctx; + /** + * Set of drm_intel_bo * that have been rendered to within this batchbuffer + * and would need flushing before being used from another cache domain that + * isn't coherent with it (i.e. the sampler). + */ + struct set *render_cache; + /** * Number of resets observed in the system at context creation. * diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index bc887fed030..11a03616121 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -297,8 +297,8 @@ static void brw_merge_inputs( struct brw_context *brw, /* * \brief Resolve buffers before drawing. * - * Resolve the depth buffer's HiZ buffer and resolve the depth buffer of each - * enabled depth texture. + * Resolve the depth buffer's HiZ buffer, resolve the depth buffer of each + * enabled depth texture, and flush the render cache for any dirty textures. * * (In the future, this will also perform MSAA resolves). */ @@ -314,9 +314,7 @@ brw_predraw_resolve_buffers(struct brw_context *brw) if (depth_irb) intel_renderbuffer_resolve_hiz(brw, depth_irb); - /* Resolve depth buffer of each enabled depth texture, and color buffer of - * each fast-clear-enabled color texture. - */ + /* Resolve depth buffer and render cache of each enabled texture. */ for (int i = 0; i < ctx->Const.MaxCombinedTextureImageUnits; i++) { if (!ctx->Texture.Unit[i]._ReallyEnabled) continue; @@ -325,6 +323,7 @@ brw_predraw_resolve_buffers(struct brw_context *brw) continue; intel_miptree_all_slices_resolve_depth(brw, tex_obj->mt); intel_miptree_resolve_color(brw, tex_obj->mt); + brw_render_cache_set_check_flush(brw, tex_obj->mt->region->bo); } } @@ -336,6 +335,9 @@ brw_predraw_resolve_buffers(struct brw_context *brw) * * If the color buffer is a multisample window system buffer, then * mark that it needs a downsample. + * + * Also mark any render targets which will be textured as needing a render + * cache flush. */ static void brw_postdraw_set_buffers_need_resolve(struct brw_context *brw) { @@ -345,6 +347,7 @@ static void brw_postdraw_set_buffers_need_resolve(struct brw_context *brw) struct intel_renderbuffer *front_irb = NULL; struct intel_renderbuffer *back_irb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT); struct intel_renderbuffer *depth_irb = intel_get_renderbuffer(fb, BUFFER_DEPTH); + struct intel_renderbuffer *stencil_irb = intel_get_renderbuffer(fb, BUFFER_STENCIL); struct gl_renderbuffer_attachment *depth_att = &fb->Attachment[BUFFER_DEPTH]; if (brw->is_front_buffer_rendering) @@ -354,8 +357,23 @@ static void brw_postdraw_set_buffers_need_resolve(struct brw_context *brw) front_irb->need_downsample = true; if (back_irb) back_irb->need_downsample = true; - if (depth_irb && ctx->Depth.Mask) + if (depth_irb && ctx->Depth.Mask) { intel_renderbuffer_att_set_needs_depth_resolve(depth_att); + brw_render_cache_set_add_bo(brw, depth_irb->mt->region->bo); + } + + if (ctx->Extensions.ARB_stencil_texturing && + stencil_irb && ctx->Stencil._WriteEnabled) { + brw_render_cache_set_add_bo(brw, stencil_irb->mt->region->bo); + } + + for (int i = 0; i < fb->_NumColorDrawBuffers; i++) { + struct intel_renderbuffer *irb = + intel_renderbuffer(fb->_ColorDrawBuffers[i]); + + if (irb) + brw_render_cache_set_add_bo(brw, irb->mt->region->bo); + } } /* May fail if out of video memory for texture or vbo upload, or on diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index b984d47c4f5..c8fb6f312d1 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -552,6 +552,11 @@ brw_emit_depthbuffer(struct brw_context *brw) height = stencil_irb->Base.Base.Height; } + if (depth_mt) + brw_render_cache_set_check_flush(brw, depth_mt->region->bo); + if (stencil_mt) + brw_render_cache_set_check_flush(brw, stencil_mt->region->bo); + brw->vtbl.emit_depth_stencil_hiz(brw, depth_mt, depth_offset, depthbuffer_format, depth_surface_type, stencil_mt, hiz, separate_stencil, diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c index 98759e27e2d..fe953422751 100644 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c @@ -30,6 +30,7 @@ #include "intel_reg.h" #include "intel_bufmgr.h" #include "intel_buffers.h" +#include "intel_fbo.h" #include "brw_context.h" static void @@ -88,6 +89,7 @@ intel_batchbuffer_reset(struct brw_context *brw) brw->batch.last_bo = brw->batch.bo; intel_batchbuffer_clear_cache(brw); + brw_render_cache_set_clear(brw); brw->batch.bo = drm_intel_bo_alloc(brw->bufmgr, "batchbuffer", BATCH_SZ, 4096); @@ -696,6 +698,8 @@ intel_batchbuffer_emit_mi_flush(struct brw_context *brw) } brw_emit_pipe_control_flush(brw, flags); } + + brw_render_cache_set_clear(brw); } void diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c b/src/mesa/drivers/dri/i965/intel_fbo.c index d11cdb6605c..d0e13491cb3 100644 --- a/src/mesa/drivers/dri/i965/intel_fbo.c +++ b/src/mesa/drivers/dri/i965/intel_fbo.c @@ -36,6 +36,8 @@ #include "main/context.h" #include "main/teximage.h" #include "main/image.h" +#include "main/hash_table.h" +#include "main/set.h" #include "swrast/swrast.h" #include "drivers/common/meta.h" @@ -600,24 +602,6 @@ intel_render_texture(struct gl_context * ctx, } -/** - * Called by Mesa when rendering to a texture is done. - */ -static void -intel_finish_render_texture(struct gl_context * ctx, struct gl_renderbuffer *rb) -{ - struct brw_context *brw = brw_context(ctx); - - DBG("Finish render %s texture\n", _mesa_get_format_name(rb->Format)); - - /* Since we've (probably) rendered to the texture and will (likely) use - * it in the texture domain later on in this batchbuffer, flush the - * batch. Once again, we wish for a domain tracker in libdrm to cover - * usage inside of a batchbuffer like GEM does in the kernel. - */ - intel_batchbuffer_emit_mi_flush(brw); -} - #define fbo_incomplete(fb, ...) do { \ static GLuint msg_id = 0; \ if (unlikely(ctx->Const.ContextFlags & GL_CONTEXT_FLAG_DEBUG_BIT)) { \ @@ -953,6 +937,43 @@ intel_renderbuffer_move_to_temp(struct brw_context *brw, intel_miptree_release(&new_mt); } +void +brw_render_cache_set_clear(struct brw_context *brw) +{ + struct set_entry *entry; + + set_foreach(brw->render_cache, entry) { + _mesa_set_remove(brw->render_cache, entry); + } +} + +void +brw_render_cache_set_add_bo(struct brw_context *brw, drm_intel_bo *bo) +{ + _mesa_set_add(brw->render_cache, _mesa_hash_pointer(bo), bo); +} + +/** + * Emits an appropriate flush for a BO if it has been rendered to within the + * same batchbuffer as a read that's about to be emitted. + * + * The GPU has separate, incoherent caches for the render cache and the + * sampler cache, along with other caches. Usually data in the different + * caches don't interact (e.g. we don't render to our driver-generated + * immediate constant data), but for render-to-texture in FBOs we definitely + * do. When a batchbuffer is flushed, the kernel will ensure that everything + * necessary is flushed before another use of that BO, but for reuse from + * different caches within a batchbuffer, it's all our responsibility. + */ +void +brw_render_cache_set_check_flush(struct brw_context *brw, drm_intel_bo *bo) +{ + if (!_mesa_set_search(brw->render_cache, _mesa_hash_pointer(bo), bo)) + return; + + intel_batchbuffer_emit_mi_flush(brw); +} + /** * Do one-time context initializations related to GL_EXT_framebuffer_object. * Hook in device driver functions. @@ -966,9 +987,10 @@ intel_fbo_init(struct brw_context *brw) dd->MapRenderbuffer = intel_map_renderbuffer; dd->UnmapRenderbuffer = intel_unmap_renderbuffer; dd->RenderTexture = intel_render_texture; - dd->FinishRenderTexture = intel_finish_render_texture; dd->ValidateFramebuffer = intel_validate_framebuffer; dd->BlitFramebuffer = intel_blit_framebuffer; dd->EGLImageTargetRenderbufferStorage = intel_image_target_renderbuffer_storage; + + brw->render_cache = _mesa_set_create(brw, _mesa_key_pointer_equal); } diff --git a/src/mesa/drivers/dri/i965/intel_fbo.h b/src/mesa/drivers/dri/i965/intel_fbo.h index b8db7e2d73f..fa457e29d09 100644 --- a/src/mesa/drivers/dri/i965/intel_fbo.h +++ b/src/mesa/drivers/dri/i965/intel_fbo.h @@ -237,6 +237,10 @@ void intel_renderbuffer_upsample(struct brw_context *brw, struct intel_renderbuffer *irb); +void brw_render_cache_set_clear(struct brw_context *brw); +void brw_render_cache_set_add_bo(struct brw_context *brw, drm_intel_bo *bo); +void brw_render_cache_set_check_flush(struct brw_context *brw, drm_intel_bo *bo); + unsigned intel_quantize_num_samples(struct intel_screen *intel, unsigned num_samples); -- 2.30.2