From 5d554987c2b856fe463afab3bd9103c2d1e41b97 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Thu, 16 Apr 2020 15:25:27 -0700 Subject: [PATCH] freedreno/a6xx: combine sample mask into blend state This gets rid of one lone register we used to emit directly in IB2 whenever blend state changes, at the expense of needing blend state variants when sample-mask changes. I think typically sample-mask should not change frequently, so this seems like a fair trade-off. To further limit the # of variants, we ignore sample-mask bits that are not relavant for the current # of samples. Signed-off-by: Rob Clark Part-of: --- .../drivers/freedreno/a6xx/fd6_blend.c | 83 ++++++++++++++----- .../drivers/freedreno/a6xx/fd6_blend.h | 39 ++++++++- src/gallium/drivers/freedreno/a6xx/fd6_emit.c | 13 +-- .../drivers/freedreno/freedreno_util.h | 2 +- 4 files changed, 102 insertions(+), 35 deletions(-) diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_blend.c b/src/gallium/drivers/freedreno/a6xx/fd6_blend.c index 40a99822297..279f9ea5512 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_blend.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_blend.c @@ -56,33 +56,29 @@ blend_func(unsigned func) } } -void * -fd6_blend_state_create(struct pipe_context *pctx, - const struct pipe_blend_state *cso) +struct fd6_blend_variant * +__fd6_setup_blend_variant(struct fd6_blend_stateobj *blend, unsigned sample_mask) { - struct fd_context *ctx = fd_context(pctx); - struct fd6_blend_stateobj *so; + const struct pipe_blend_state *cso = &blend->base; + struct fd6_blend_variant *so; enum a3xx_rop_code rop = ROP_COPY; bool reads_dest = false; - unsigned i, mrt_blend = 0; + unsigned mrt_blend = 0; if (cso->logicop_enable) { rop = cso->logicop_func; /* maps 1:1 */ reads_dest = util_logicop_reads_dest(cso->logicop_func); } - so = CALLOC_STRUCT(fd6_blend_stateobj); + so = rzalloc_size(blend, sizeof(*so)); if (!so) return NULL; - so->base = *cso; - struct fd_ringbuffer *ring = fd_ringbuffer_new_object(ctx->pipe, - ((A6XX_MAX_RENDER_TARGETS * 4) + 4) * 4); + struct fd_ringbuffer *ring = fd_ringbuffer_new_object(blend->ctx->pipe, + ((A6XX_MAX_RENDER_TARGETS * 4) + 6) * 4); so->stateobj = ring; - so->lrz_write = true; /* unless blend enabled for any MRT */ - - for (i = 0; i < A6XX_MAX_RENDER_TARGETS; i++) { + for (unsigned i = 0; i < A6XX_MAX_RENDER_TARGETS; i++) { const struct pipe_rt_blend_state *rt; if (cso->independent_blend_enable) @@ -109,12 +105,10 @@ fd6_blend_state_create(struct pipe_context *pctx, if (rt->blend_enable) { mrt_blend |= (1 << i); - so->lrz_write = false; } if (reads_dest) { mrt_blend |= (1 << i); - so->lrz_write = false; } } @@ -129,16 +123,60 @@ fd6_blend_state_create(struct pipe_context *pctx, .dither_mode_mrt7 = cso->dither ? DITHER_ALWAYS : DITHER_DISABLE, )); - so->rb_blend_cntl = A6XX_RB_BLEND_CNTL_ENABLE_BLEND(mrt_blend) | - COND(cso->alpha_to_coverage, A6XX_RB_BLEND_CNTL_ALPHA_TO_COVERAGE) | - COND(cso->independent_blend_enable, A6XX_RB_BLEND_CNTL_INDEPENDENT_BLEND); - OUT_REG(ring, A6XX_SP_BLEND_CNTL( .unk8 = true, .alpha_to_coverage = cso->alpha_to_coverage, .enabled = !!mrt_blend, )); + OUT_REG(ring, A6XX_RB_BLEND_CNTL( + .enable_blend = mrt_blend, + .alpha_to_coverage = cso->alpha_to_coverage, + .independent_blend = cso->independent_blend_enable, + .sample_mask = sample_mask + )); + + so->sample_mask = sample_mask; + + util_dynarray_append(&blend->variants, struct fd6_blend_variant *, so); + + return so; +} + +void * +fd6_blend_state_create(struct pipe_context *pctx, + const struct pipe_blend_state *cso) +{ + struct fd6_blend_stateobj *so; + bool reads_dest = false; + + if (cso->logicop_enable) { + reads_dest = util_logicop_reads_dest(cso->logicop_func); + } + + so = rzalloc_size(NULL, sizeof(*so)); + if (!so) + return NULL; + + so->base = *cso; + so->ctx = fd_context(pctx); + so->lrz_write = true; /* unless blend enabled for any MRT */ + + unsigned nr = cso->independent_blend_enable ? A6XX_MAX_RENDER_TARGETS : 1; + for (unsigned i = 0; i < nr; i++) { + const struct pipe_rt_blend_state *rt = &cso->rt[i]; + + if (rt->blend_enable) { + so->lrz_write = false; + } + } + + if (reads_dest) { + so->lrz_write = false; + } + + util_dynarray_init(&so->variants, so); + return so; } @@ -147,7 +185,10 @@ fd6_blend_state_delete(struct pipe_context *pctx, void *hwcso) { struct fd6_blend_stateobj *so = hwcso; - fd_ringbuffer_del(so->stateobj); + util_dynarray_foreach(&so->variants, struct fd6_blend_variant *, vp) { + struct fd6_blend_variant *v = *vp; + fd_ringbuffer_del(v->stateobj); + } - FREE(hwcso); + ralloc_free(so); } diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_blend.h b/src/gallium/drivers/freedreno/a6xx/fd6_blend.h index 75e905f8e55..09c4609f35a 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_blend.h +++ b/src/gallium/drivers/freedreno/a6xx/fd6_blend.h @@ -34,13 +34,22 @@ #include "freedreno_context.h" #include "freedreno_util.h" +/** + * Since the sample-mask is part of the hw blend state, we need to have state + * variants per sample-mask value. But we don't expect the sample-mask state + * to change frequently. + */ +struct fd6_blend_variant { + unsigned sample_mask; + struct fd_ringbuffer *stateobj; +}; + struct fd6_blend_stateobj { struct pipe_blend_state base; - uint32_t rb_blend_cntl; - + struct fd_context *ctx; bool lrz_write; - struct fd_ringbuffer *stateobj; + struct util_dynarray variants; }; static inline struct fd6_blend_stateobj * @@ -49,6 +58,30 @@ fd6_blend_stateobj(struct pipe_blend_state *blend) return (struct fd6_blend_stateobj *)blend; } +struct fd6_blend_variant * __fd6_setup_blend_variant( + struct fd6_blend_stateobj *blend, unsigned sample_mask); + +static inline struct fd6_blend_variant * +fd6_blend_variant(struct pipe_blend_state *cso, + unsigned nr_samples, unsigned sample_mask) +{ + struct fd6_blend_stateobj *blend = fd6_blend_stateobj(cso); + unsigned mask = BITFIELD_MASK(nr_samples); + + util_dynarray_foreach(&blend->variants, struct fd6_blend_variant *, vp) { + struct fd6_blend_variant *v = *vp; + + /* mask out sample-mask bits that we don't care about to avoid + * creating unnecessary variants + */ + if ((mask & v->sample_mask) == (mask & sample_mask)) { + return v; + } + } + + return __fd6_setup_blend_variant(blend, sample_mask); +} + void * fd6_blend_state_create(struct pipe_context *pctx, const struct pipe_blend_state *cso); void fd6_blend_state_delete(struct pipe_context *, void *hwcso); diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c index ccd59f62876..425efbbbe40 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c @@ -1111,17 +1111,10 @@ fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit) if (info->num_outputs) fd6_emit_streamout(ring, emit, info); - if (dirty & FD_DIRTY_BLEND) { - struct fd6_blend_stateobj *blend = fd6_blend_stateobj(ctx->blend); - fd6_emit_add_group(emit, blend->stateobj, FD6_GROUP_BLEND, ENABLE_DRAW); - } - if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_SAMPLE_MASK)) { - struct fd6_blend_stateobj *blend = fd6_blend_stateobj(ctx->blend); - - OUT_PKT4(ring, REG_A6XX_RB_BLEND_CNTL, 1); - OUT_RING(ring, blend->rb_blend_cntl | - A6XX_RB_BLEND_CNTL_SAMPLE_MASK(ctx->sample_mask)); + struct fd6_blend_variant *blend = fd6_blend_variant(ctx->blend, + pfb->samples, ctx->sample_mask); + fd6_emit_add_group(emit, blend->stateobj, FD6_GROUP_BLEND, ENABLE_DRAW); } if (dirty & FD_DIRTY_BLEND_COLOR) { diff --git a/src/gallium/drivers/freedreno/freedreno_util.h b/src/gallium/drivers/freedreno/freedreno_util.h index 6bd900e93ad..6b1024b8d3a 100644 --- a/src/gallium/drivers/freedreno/freedreno_util.h +++ b/src/gallium/drivers/freedreno/freedreno_util.h @@ -297,7 +297,7 @@ OUT_RB(struct fd_ringbuffer *ring, struct fd_ringbuffer *target) static inline void BEGIN_RING(struct fd_ringbuffer *ring, uint32_t ndwords) { - if (ring->cur + ndwords > ring->end) + if (unlikely(ring->cur + ndwords > ring->end)) fd_ringbuffer_grow(ring, ndwords); } -- 2.30.2