From f12188ff5264b29f6270c40c7592543aca6c2f4a Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Fri, 24 Apr 2020 14:45:04 -0700 Subject: [PATCH] freedreno: optimize rebind_resource() Track how resources are used, ie. which state they may potentially dirty if the backing bo is changed/reallocated, to optimize rebind_resource(). This will be more important in a later patch when we hook up eviction of entries in a6xx tex state cache. Signed-off-by: Rob Clark Part-of: --- .../drivers/freedreno/freedreno_resource.c | 104 ++++++++++++------ .../drivers/freedreno/freedreno_resource.h | 31 ++++++ .../drivers/freedreno/freedreno_state.c | 20 +++- .../drivers/freedreno/freedreno_texture.c | 6 +- 4 files changed, 123 insertions(+), 38 deletions(-) diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c b/src/gallium/drivers/freedreno/freedreno_resource.c index 46deed39b8c..0f846c7b255 100644 --- a/src/gallium/drivers/freedreno/freedreno_resource.c +++ b/src/gallium/drivers/freedreno/freedreno_resource.c @@ -65,63 +65,94 @@ * emitted so the GPU looks at the new backing bo. */ static void -rebind_resource(struct fd_context *ctx, struct pipe_resource *prsc) +rebind_resource_in_ctx(struct fd_context *ctx, struct fd_resource *rsc) { + struct pipe_resource *prsc = &rsc->base; + /* VBOs */ - for (unsigned i = 0; i < ctx->vtx.vertexbuf.count && !(ctx->dirty & FD_DIRTY_VTXBUF); i++) { - if (ctx->vtx.vertexbuf.vb[i].buffer.resource == prsc) - ctx->dirty |= FD_DIRTY_VTXBUF; + if (rsc->dirty & FD_DIRTY_VTXBUF) { + struct fd_vertexbuf_stateobj *vb = &ctx->vtx.vertexbuf; + for (unsigned i = 0; i < vb->count && !(ctx->dirty & FD_DIRTY_VTXBUF); i++) { + if (vb->vb[i].buffer.resource == prsc) + ctx->dirty |= FD_DIRTY_VTXBUF; + } } + const enum fd_dirty_3d_state per_stage_dirty = + FD_DIRTY_CONST | FD_DIRTY_TEX | FD_DIRTY_IMAGE | FD_DIRTY_SSBO; + + if (!(rsc->dirty & per_stage_dirty)) + return; + /* per-shader-stage resources: */ for (unsigned stage = 0; stage < PIPE_SHADER_TYPES; stage++) { /* Constbufs.. note that constbuf[0] is normal uniforms emitted in * cmdstream rather than by pointer.. */ - const unsigned num_ubos = util_last_bit(ctx->constbuf[stage].enabled_mask); - for (unsigned i = 1; i < num_ubos; i++) { - if (ctx->dirty_shader[stage] & FD_DIRTY_SHADER_CONST) - break; - if (ctx->constbuf[stage].cb[i].buffer == prsc) { - ctx->dirty_shader[stage] |= FD_DIRTY_SHADER_CONST; - ctx->dirty |= FD_DIRTY_CONST; + if ((rsc->dirty & FD_DIRTY_CONST) && + !(ctx->dirty_shader[stage] & FD_DIRTY_CONST)) { + struct fd_constbuf_stateobj *cb = &ctx->constbuf[stage]; + const unsigned num_ubos = util_last_bit(cb->enabled_mask); + for (unsigned i = 1; i < num_ubos; i++) { + if (cb->cb[i].buffer == prsc) { + ctx->dirty_shader[stage] |= FD_DIRTY_SHADER_CONST; + ctx->dirty |= FD_DIRTY_CONST; + break; + } } } /* Textures */ - for (unsigned i = 0; i < ctx->tex[stage].num_textures; i++) { - if (ctx->dirty_shader[stage] & FD_DIRTY_SHADER_TEX) - break; - if (ctx->tex[stage].textures[i] && (ctx->tex[stage].textures[i]->texture == prsc)) { - ctx->dirty_shader[stage] |= FD_DIRTY_SHADER_TEX; - ctx->dirty |= FD_DIRTY_TEX; + if ((rsc->dirty & FD_DIRTY_TEX) && + !(ctx->dirty_shader[stage] & FD_DIRTY_TEX)) { + struct fd_texture_stateobj *tex = &ctx->tex[stage]; + for (unsigned i = 0; i < tex->num_textures; i++) { + if (tex->textures[i] && (tex->textures[i]->texture == prsc)) { + ctx->dirty_shader[stage] |= FD_DIRTY_SHADER_TEX; + ctx->dirty |= FD_DIRTY_TEX; + break; + } } } /* Images */ - const unsigned num_images = util_last_bit(ctx->shaderimg[stage].enabled_mask); - for (unsigned i = 0; i < num_images; i++) { - if (ctx->dirty_shader[stage] & FD_DIRTY_SHADER_IMAGE) - break; - if (ctx->shaderimg[stage].si[i].resource == prsc) { - ctx->dirty_shader[stage] |= FD_DIRTY_SHADER_IMAGE; - ctx->dirty |= FD_DIRTY_IMAGE; + if ((rsc->dirty & FD_DIRTY_IMAGE) && + !(ctx->dirty_shader[stage] & FD_DIRTY_IMAGE)) { + struct fd_shaderimg_stateobj *si = &ctx->shaderimg[stage]; + const unsigned num_images = util_last_bit(si->enabled_mask); + for (unsigned i = 0; i < num_images; i++) { + if (si->si[i].resource == prsc) { + ctx->dirty_shader[stage] |= FD_DIRTY_SHADER_IMAGE; + ctx->dirty |= FD_DIRTY_IMAGE; + break; + } } } /* SSBOs */ - const unsigned num_ssbos = util_last_bit(ctx->shaderbuf[stage].enabled_mask); - for (unsigned i = 0; i < num_ssbos; i++) { - if (ctx->dirty_shader[stage] & FD_DIRTY_SHADER_SSBO) - break; - if (ctx->shaderbuf[stage].sb[i].buffer == prsc) { - ctx->dirty_shader[stage] |= FD_DIRTY_SHADER_SSBO; - ctx->dirty |= FD_DIRTY_SSBO; + if ((rsc->dirty & FD_DIRTY_SSBO) && + !(ctx->dirty_shader[stage] & FD_DIRTY_SSBO)) { + struct fd_shaderbuf_stateobj *sb = &ctx->shaderbuf[stage]; + const unsigned num_ssbos = util_last_bit(sb->enabled_mask); + for (unsigned i = 0; i < num_ssbos; i++) { + if (sb->sb[i].buffer == prsc) { + ctx->dirty_shader[stage] |= FD_DIRTY_SHADER_SSBO; + ctx->dirty |= FD_DIRTY_SSBO; + break; + } } } } } +static void +rebind_resource(struct fd_context *ctx, struct fd_resource *rsc) +{ + fd_resource_lock(rsc); + rebind_resource_in_ctx(ctx, rsc); + fd_resource_unlock(rsc); +} + static void realloc_bo(struct fd_resource *rsc, uint32_t size) { @@ -358,7 +389,7 @@ fd_resource_uncompress(struct fd_context *ctx, struct fd_resource *rsc) * enough that they mostly only show up in deqp. */ - rebind_resource(ctx, &rsc->base); + rebind_resource(ctx, rsc); } static struct fd_resource * @@ -607,7 +638,7 @@ fd_resource_transfer_map(struct pipe_context *pctx, if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) { if (needs_flush || fd_resource_busy(rsc, op)) { realloc_bo(rsc, fd_bo_size(rsc->bo)); - rebind_resource(ctx, prsc); + rebind_resource(ctx, rsc); } } else if ((usage & PIPE_TRANSFER_WRITE) && prsc->target == PIPE_BUFFER && @@ -650,7 +681,7 @@ fd_resource_transfer_map(struct pipe_context *pctx, if (needs_flush && fd_try_shadow_resource(ctx, rsc, level, box, DRM_FORMAT_MOD_LINEAR)) { needs_flush = busy = false; - rebind_resource(ctx, prsc); + rebind_resource(ctx, rsc); ctx->stats.shadow_uploads++; } else { struct fd_resource *staging_rsc; @@ -739,6 +770,7 @@ fd_resource_destroy(struct pipe_screen *pscreen, renderonly_scanout_destroy(rsc->scanout, fd_screen(pscreen)->ro); util_range_destroy(&rsc->valid_buffer_range); + simple_mtx_destroy(&rsc->lock); FREE(rsc); } @@ -997,6 +1029,8 @@ fd_resource_create_with_modifiers(struct pipe_screen *pscreen, util_range_init(&rsc->valid_buffer_range); + simple_mtx_init(&rsc->lock, mtx_plain); + rsc->internal_format = format; rsc->layout.ubwc = rsc->layout.tile_mode && is_a6xx(screen) && allow_ubwc; @@ -1080,6 +1114,8 @@ fd_resource_from_handle(struct pipe_screen *pscreen, util_range_init(&rsc->valid_buffer_range); + simple_mtx_init(&rsc->lock, mtx_plain); + rsc->bo = fd_screen_bo_from_handle(pscreen, handle); if (!rsc->bo) goto fail; diff --git a/src/gallium/drivers/freedreno/freedreno_resource.h b/src/gallium/drivers/freedreno/freedreno_resource.h index 9cb1270c74a..d1c673996cc 100644 --- a/src/gallium/drivers/freedreno/freedreno_resource.h +++ b/src/gallium/drivers/freedreno/freedreno_resource.h @@ -30,6 +30,7 @@ #include "util/list.h" #include "util/u_range.h" #include "util/u_transfer_helper.h" +#include "util/simple_mtx.h" #include "freedreno_batch.h" #include "freedreno_util.h" @@ -50,6 +51,8 @@ struct fd_resource { /* TODO rename to secondary or auxiliary? */ struct fd_resource *stencil; + simple_mtx_t lock; + /* bitmask of in-flight batches which reference this resource. Note * that the batch doesn't hold reference to resources (but instead * the fd_ringbuffer holds refs to the underlying fd_bo), but in case @@ -71,6 +74,11 @@ struct fd_resource { /* Sequence # incremented each time bo changes: */ uint16_t seqno; + /* bitmask of state this resource could potentially dirty when rebound, + * see rebind_resource() + */ + enum fd_dirty_3d_state dirty; + /* * LRZ * @@ -119,6 +127,29 @@ fd_resource_busy(struct fd_resource *rsc, unsigned op) return fd_bo_cpu_prep(rsc->bo, NULL, op | DRM_FREEDRENO_PREP_NOSYNC) != 0; } +static inline void +fd_resource_lock(struct fd_resource *rsc) +{ + simple_mtx_lock(&rsc->lock); +} + +static inline void +fd_resource_unlock(struct fd_resource *rsc) +{ + simple_mtx_unlock(&rsc->lock); +} + +static inline void +fd_resource_set_usage(struct pipe_resource *prsc, enum fd_dirty_3d_state usage) +{ + if (!prsc) + return; + struct fd_resource *rsc = fd_resource(prsc); + fd_resource_lock(rsc); + rsc->dirty |= usage; + fd_resource_unlock(rsc); +} + static inline bool has_depth(enum pipe_format format) { diff --git a/src/gallium/drivers/freedreno/freedreno_state.c b/src/gallium/drivers/freedreno/freedreno_state.c index a748bfec82c..65b1e2b6609 100644 --- a/src/gallium/drivers/freedreno/freedreno_state.c +++ b/src/gallium/drivers/freedreno/freedreno_state.c @@ -115,6 +115,8 @@ fd_set_constant_buffer(struct pipe_context *pctx, so->enabled_mask |= 1 << index; ctx->dirty_shader[shader] |= FD_DIRTY_SHADER_CONST; ctx->dirty |= FD_DIRTY_CONST; + + fd_resource_set_usage(cb->buffer, FD_DIRTY_CONST); } static void @@ -146,6 +148,8 @@ fd_set_shader_buffers(struct pipe_context *pctx, buf->buffer_size = buffers[i].buffer_size; pipe_resource_reference(&buf->buffer, buffers[i].buffer); + fd_resource_set_usage(buffers[i].buffer, FD_DIRTY_SSBO); + so->enabled_mask |= BIT(n); } else { pipe_resource_reference(&buf->buffer, NULL); @@ -153,6 +157,7 @@ fd_set_shader_buffers(struct pipe_context *pctx, } ctx->dirty_shader[shader] |= FD_DIRTY_SHADER_SSBO; + ctx->dirty |= FD_DIRTY_SSBO; } void @@ -180,10 +185,12 @@ fd_set_shader_images(struct pipe_context *pctx, mask |= BIT(n); util_copy_image_view(buf, &images[i]); - if (buf->resource) + if (buf->resource) { + fd_resource_set_usage(buf->resource, FD_DIRTY_IMAGE); so->enabled_mask |= BIT(n); - else + } else { so->enabled_mask &= ~BIT(n); + } } } else { mask = (BIT(count) - 1) << start; @@ -199,6 +206,7 @@ fd_set_shader_images(struct pipe_context *pctx, } ctx->dirty_shader[shader] |= FD_DIRTY_SHADER_IMAGE; + ctx->dirty |= FD_DIRTY_IMAGE; } static void @@ -348,7 +356,15 @@ fd_set_vertex_buffers(struct pipe_context *pctx, util_set_vertex_buffers_mask(so->vb, &so->enabled_mask, vb, start_slot, count); so->count = util_last_bit(so->enabled_mask); + if (!vb) + return; + ctx->dirty |= FD_DIRTY_VTXBUF; + + for (unsigned i = 0; i < count; i++) { + assert(!vb[i].is_user_buffer); + fd_resource_set_usage(vb[i].buffer.resource, FD_DIRTY_VTXBUF); + } } static void diff --git a/src/gallium/drivers/freedreno/freedreno_texture.c b/src/gallium/drivers/freedreno/freedreno_texture.c index 84b4df6c1dc..5b0bec17f5d 100644 --- a/src/gallium/drivers/freedreno/freedreno_texture.c +++ b/src/gallium/drivers/freedreno/freedreno_texture.c @@ -75,10 +75,12 @@ static void set_sampler_views(struct fd_texture_stateobj *tex, struct pipe_sampler_view *view = views ? views[i] : NULL; unsigned p = i + start; pipe_sampler_view_reference(&tex->textures[p], view); - if (tex->textures[p]) + if (tex->textures[p]) { + fd_resource_set_usage(tex->textures[p]->texture, FD_DIRTY_TEX); tex->valid_textures |= (1 << p); - else + } else { tex->valid_textures &= ~(1 << p); + } } tex->num_textures = util_last_bit(tex->valid_textures); -- 2.30.2