freedreno/a6xx: combine sample mask into blend state
authorRob Clark <robdclark@chromium.org>
Thu, 16 Apr 2020 22:25:27 +0000 (15:25 -0700)
committerMarge Bot <eric+marge@anholt.net>
Thu, 30 Apr 2020 20:03:17 +0000 (20:03 +0000)
This gets rid of one lone register we used to emit directly in IB2
whenever blend state changes, at the expense of needing blend state
variants when sample-mask changes.  I think typically sample-mask
should not change frequently, so this seems like a fair trade-off.

To further limit the # of variants, we ignore sample-mask bits that
are not relavant for the current # of samples.

Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4813>

src/gallium/drivers/freedreno/a6xx/fd6_blend.c
src/gallium/drivers/freedreno/a6xx/fd6_blend.h
src/gallium/drivers/freedreno/a6xx/fd6_emit.c
src/gallium/drivers/freedreno/freedreno_util.h

index 40a99822297651cc2993c3e88361bf541147e129..279f9ea551252f94c5fa7e616618d68263d68722 100644 (file)
@@ -56,33 +56,29 @@ blend_func(unsigned func)
        }
 }
 
-void *
-fd6_blend_state_create(struct pipe_context *pctx,
-               const struct pipe_blend_state *cso)
+struct fd6_blend_variant *
+__fd6_setup_blend_variant(struct fd6_blend_stateobj *blend, unsigned sample_mask)
 {
-       struct fd_context *ctx = fd_context(pctx);
-       struct fd6_blend_stateobj *so;
+       const struct pipe_blend_state *cso = &blend->base;
+       struct fd6_blend_variant *so;
        enum a3xx_rop_code rop = ROP_COPY;
        bool reads_dest = false;
-       unsigned i, mrt_blend = 0;
+       unsigned mrt_blend = 0;
 
        if (cso->logicop_enable) {
                rop = cso->logicop_func;  /* maps 1:1 */
                reads_dest = util_logicop_reads_dest(cso->logicop_func);
        }
 
-       so = CALLOC_STRUCT(fd6_blend_stateobj);
+       so = rzalloc_size(blend, sizeof(*so));
        if (!so)
                return NULL;
 
-       so->base = *cso;
-       struct fd_ringbuffer *ring = fd_ringbuffer_new_object(ctx->pipe,
-                       ((A6XX_MAX_RENDER_TARGETS * 4) + 4) * 4);
+       struct fd_ringbuffer *ring = fd_ringbuffer_new_object(blend->ctx->pipe,
+                       ((A6XX_MAX_RENDER_TARGETS * 4) + 6) * 4);
        so->stateobj = ring;
 
-       so->lrz_write = true;  /* unless blend enabled for any MRT */
-
-       for (i = 0; i < A6XX_MAX_RENDER_TARGETS; i++) {
+       for (unsigned i = 0; i < A6XX_MAX_RENDER_TARGETS; i++) {
                const struct pipe_rt_blend_state *rt;
 
                if (cso->independent_blend_enable)
@@ -109,12 +105,10 @@ fd6_blend_state_create(struct pipe_context *pctx,
 
                if (rt->blend_enable) {
                        mrt_blend |= (1 << i);
-                       so->lrz_write = false;
                }
 
                if (reads_dest) {
                        mrt_blend |= (1 << i);
-                       so->lrz_write = false;
                }
        }
 
@@ -129,16 +123,60 @@ fd6_blend_state_create(struct pipe_context *pctx,
                        .dither_mode_mrt7 = cso->dither ? DITHER_ALWAYS : DITHER_DISABLE,
                ));
 
-       so->rb_blend_cntl = A6XX_RB_BLEND_CNTL_ENABLE_BLEND(mrt_blend) |
-               COND(cso->alpha_to_coverage, A6XX_RB_BLEND_CNTL_ALPHA_TO_COVERAGE) |
-               COND(cso->independent_blend_enable, A6XX_RB_BLEND_CNTL_INDEPENDENT_BLEND);
-
        OUT_REG(ring, A6XX_SP_BLEND_CNTL(
                        .unk8              = true,
                        .alpha_to_coverage = cso->alpha_to_coverage,
                        .enabled           = !!mrt_blend,
                ));
 
+       OUT_REG(ring, A6XX_RB_BLEND_CNTL(
+                       .enable_blend      = mrt_blend,
+                       .alpha_to_coverage = cso->alpha_to_coverage,
+                       .independent_blend = cso->independent_blend_enable,
+                       .sample_mask       = sample_mask
+               ));
+
+       so->sample_mask = sample_mask;
+
+       util_dynarray_append(&blend->variants, struct fd6_blend_variant *, so);
+
+       return so;
+}
+
+void *
+fd6_blend_state_create(struct pipe_context *pctx,
+               const struct pipe_blend_state *cso)
+{
+       struct fd6_blend_stateobj *so;
+       bool reads_dest = false;
+
+       if (cso->logicop_enable) {
+               reads_dest = util_logicop_reads_dest(cso->logicop_func);
+       }
+
+       so = rzalloc_size(NULL, sizeof(*so));
+       if (!so)
+               return NULL;
+
+       so->base = *cso;
+       so->ctx = fd_context(pctx);
+       so->lrz_write = true;  /* unless blend enabled for any MRT */
+
+       unsigned nr = cso->independent_blend_enable ? A6XX_MAX_RENDER_TARGETS : 1;
+       for (unsigned i = 0; i < nr; i++) {
+               const struct pipe_rt_blend_state *rt = &cso->rt[i];
+
+               if (rt->blend_enable) {
+                       so->lrz_write = false;
+               }
+       }
+
+       if (reads_dest) {
+               so->lrz_write = false;
+       }
+
+       util_dynarray_init(&so->variants, so);
+
        return so;
 }
 
@@ -147,7 +185,10 @@ fd6_blend_state_delete(struct pipe_context *pctx, void *hwcso)
 {
        struct fd6_blend_stateobj *so = hwcso;
 
-       fd_ringbuffer_del(so->stateobj);
+       util_dynarray_foreach(&so->variants, struct fd6_blend_variant *, vp) {
+               struct fd6_blend_variant *v = *vp;
+               fd_ringbuffer_del(v->stateobj);
+       }
 
-       FREE(hwcso);
+       ralloc_free(so);
 }
index 75e905f8e55b545a3a4828019f66ce974724e66f..09c4609f35aff541cbee076b8c3b41c7241fcddd 100644 (file)
 #include "freedreno_context.h"
 #include "freedreno_util.h"
 
+/**
+ * Since the sample-mask is part of the hw blend state, we need to have state
+ * variants per sample-mask value.  But we don't expect the sample-mask state
+ * to change frequently.
+ */
+struct fd6_blend_variant {
+       unsigned sample_mask;
+       struct fd_ringbuffer *stateobj;
+};
+
 struct fd6_blend_stateobj {
        struct pipe_blend_state base;
 
-       uint32_t rb_blend_cntl;
-
+       struct fd_context *ctx;
        bool lrz_write;
-       struct fd_ringbuffer *stateobj;
+       struct util_dynarray variants;
 };
 
 static inline struct fd6_blend_stateobj *
@@ -49,6 +58,30 @@ fd6_blend_stateobj(struct pipe_blend_state *blend)
        return (struct fd6_blend_stateobj *)blend;
 }
 
+struct fd6_blend_variant * __fd6_setup_blend_variant(
+               struct fd6_blend_stateobj *blend, unsigned sample_mask);
+
+static inline struct fd6_blend_variant *
+fd6_blend_variant(struct pipe_blend_state *cso,
+               unsigned nr_samples, unsigned sample_mask)
+{
+       struct fd6_blend_stateobj *blend = fd6_blend_stateobj(cso);
+       unsigned mask = BITFIELD_MASK(nr_samples);
+
+       util_dynarray_foreach(&blend->variants, struct fd6_blend_variant *, vp) {
+               struct fd6_blend_variant *v = *vp;
+
+               /* mask out sample-mask bits that we don't care about to avoid
+                * creating unnecessary variants
+                */
+               if ((mask & v->sample_mask) == (mask & sample_mask)) {
+                       return v;
+               }
+       }
+
+       return __fd6_setup_blend_variant(blend, sample_mask);
+}
+
 void * fd6_blend_state_create(struct pipe_context *pctx,
                const struct pipe_blend_state *cso);
 void fd6_blend_state_delete(struct pipe_context *, void *hwcso);
index ccd59f62876e764fd777291f8427cfdd577c82e1..425efbbbe40326389246cf869ce43cc941503a62 100644 (file)
@@ -1111,17 +1111,10 @@ fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit)
        if (info->num_outputs)
                fd6_emit_streamout(ring, emit, info);
 
-       if (dirty & FD_DIRTY_BLEND) {
-               struct fd6_blend_stateobj *blend = fd6_blend_stateobj(ctx->blend);
-               fd6_emit_add_group(emit, blend->stateobj, FD6_GROUP_BLEND, ENABLE_DRAW);
-       }
-
        if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_SAMPLE_MASK)) {
-               struct fd6_blend_stateobj *blend = fd6_blend_stateobj(ctx->blend);
-
-               OUT_PKT4(ring, REG_A6XX_RB_BLEND_CNTL, 1);
-               OUT_RING(ring, blend->rb_blend_cntl |
-                               A6XX_RB_BLEND_CNTL_SAMPLE_MASK(ctx->sample_mask));
+               struct fd6_blend_variant *blend = fd6_blend_variant(ctx->blend,
+                               pfb->samples, ctx->sample_mask);
+               fd6_emit_add_group(emit, blend->stateobj, FD6_GROUP_BLEND, ENABLE_DRAW);
        }
 
        if (dirty & FD_DIRTY_BLEND_COLOR) {
index 6bd900e93add7bb509067ef48696950509e98e1c..6b1024b8d3a2e53181dd28914efd86b2ed32d26b 100644 (file)
@@ -297,7 +297,7 @@ OUT_RB(struct fd_ringbuffer *ring, struct fd_ringbuffer *target)
 
 static inline void BEGIN_RING(struct fd_ringbuffer *ring, uint32_t ndwords)
 {
-       if (ring->cur + ndwords > ring->end)
+       if (unlikely(ring->cur + ndwords > ring->end))
                fd_ringbuffer_grow(ring, ndwords);
 }