freedreno/a6xx: re-work LRZ state tracking
authorRob Clark <robdclark@chromium.org>
Wed, 3 Jun 2020 17:06:58 +0000 (10:06 -0700)
committerMarge Bot <eric+marge@anholt.net>
Thu, 4 Jun 2020 02:34:54 +0000 (02:34 +0000)
In particular, properly detect reversal of depth-test direction.
With that we can remove a lot of cases where we were unnecessarily
invalidating LRZ, which was simply papering over the direction-
reversal issue in deqp.

Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5298>

src/gallium/drivers/freedreno/a6xx/fd6_blend.c
src/gallium/drivers/freedreno/a6xx/fd6_blend.h
src/gallium/drivers/freedreno/a6xx/fd6_context.h
src/gallium/drivers/freedreno/a6xx/fd6_draw.c
src/gallium/drivers/freedreno/a6xx/fd6_emit.c
src/gallium/drivers/freedreno/a6xx/fd6_emit.h
src/gallium/drivers/freedreno/a6xx/fd6_gmem.c
src/gallium/drivers/freedreno/a6xx/fd6_zsa.c
src/gallium/drivers/freedreno/a6xx/fd6_zsa.h
src/gallium/drivers/freedreno/freedreno_resource.h

index 16e2bfaf1b815cb7f2a42c769abef3bd603bd289..3d4b100552471e0675746812d72457ff0d6b09d3 100644 (file)
@@ -148,11 +148,6 @@ fd6_blend_state_create(struct pipe_context *pctx,
                const struct pipe_blend_state *cso)
 {
        struct fd6_blend_stateobj *so;
-       bool reads_dest = false;
-
-       if (cso->logicop_enable) {
-               reads_dest = util_logicop_reads_dest(cso->logicop_func);
-       }
 
        so = rzalloc_size(NULL, sizeof(*so));
        if (!so)
@@ -160,21 +155,21 @@ fd6_blend_state_create(struct pipe_context *pctx,
 
        so->base = *cso;
        so->ctx = fd_context(pctx);
-       so->lrz_write = true;  /* unless blend enabled for any MRT */
+
+       if (cso->logicop_enable) {
+               so->reads_dest |= util_logicop_reads_dest(cso->logicop_func);
+       }
 
        unsigned nr = cso->independent_blend_enable ? cso->max_rt : 0;
        for (unsigned i = 0; i <= nr; i++) {
                const struct pipe_rt_blend_state *rt = &cso->rt[i];
 
+               so->reads_dest |= rt->blend_enable;
                if (rt->blend_enable) {
-                       so->lrz_write = false;
+                       so->reads_dest = true;
                }
        }
 
-       if (reads_dest) {
-               so->lrz_write = false;
-       }
-
        util_dynarray_init(&so->variants, so);
 
        return so;
index 09c4609f35aff541cbee076b8c3b41c7241fcddd..ef7dde8cf652d48ba0968fd83273dab37b19a8ba 100644 (file)
@@ -48,7 +48,7 @@ struct fd6_blend_stateobj {
        struct pipe_blend_state base;
 
        struct fd_context *ctx;
-       bool lrz_write;
+       bool reads_dest;
        struct util_dynarray variants;
 };
 
index c890cdd84231fea0f62dc022f1514e8c6b58093d..bb48ecea784a712747873c027f9904923d274958 100644 (file)
 #include "util/u_upload_mgr.h"
 
 #include "freedreno_context.h"
+#include "freedreno_resource.h"
 
 #include "ir3/ir3_shader.h"
 
 #include "a6xx.xml.h"
 
+struct fd6_lrz_state {
+       bool enable : 1;
+       bool write  : 1;
+       bool test   : 1;
+       enum fd_lrz_direction direction : 2;
+};
+
 struct fd6_context {
        struct fd_context base;
 
@@ -106,15 +114,11 @@ struct fd6_context {
                uint32_t SP_UNKNOWN_A0F8;
        } magic;
 
-
        struct {
                /* previous binning/draw lrz state, which is a function of multiple
                 * gallium stateobjs, but doesn't necessarily change as frequently:
                 */
-               struct {
-                       uint32_t gras_lrz_cntl;
-                       uint32_t rb_lrz_cntl;
-               } lrz[2];
+               struct fd6_lrz_state lrz[2];
        } last;
 };
 
index dab1cf32cd9e6e42156e9338cdb7951836245822..232262765a38f2fa30bdeb52935a19799c196f7a 100644 (file)
@@ -217,11 +217,6 @@ fd6_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info,
        ctx->stats.gs_regs += COND(emit.gs, ir3_shader_halfregs(emit.gs));
        ctx->stats.fs_regs += ir3_shader_halfregs(emit.fs);
 
-       /* figure out whether we need to disable LRZ write for binning
-        * pass using draw pass's fs:
-        */
-       emit.no_lrz_write = emit.fs->writes_pos || emit.fs->no_earlyz || emit.fs->has_kill;
-
        struct fd_ringbuffer *ring = ctx->batch->draw;
 
        struct CP_DRAW_INDX_OFFSET_0 draw0 = {
@@ -493,6 +488,7 @@ fd6_clear(struct fd_context *ctx, unsigned buffers,
                struct fd_resource *zsbuf = fd_resource(pfb->zsbuf->texture);
                if (zsbuf->lrz && !is_z32(pfb->zsbuf->format)) {
                        zsbuf->lrz_valid = true;
+                       zsbuf->lrz_direction = FD_LRZ_UNKNOWN;
                        fd6_clear_lrz(ctx->batch, zsbuf, depth);
                }
        }
index cdc8dfb85bd2797b7ec27924e9f8a769a83b24b6..bd30d69dd37def2dfe2d6e9b91cf3916252e83f2 100644 (file)
@@ -585,45 +585,95 @@ build_vbo_state(struct fd6_emit *emit, const struct ir3_shader_variant *vp)
        return ring;
 }
 
-static struct fd_ringbuffer *
-build_lrz(struct fd6_emit *emit, bool binning_pass)
+/**
+ * Calculate normalized LRZ state based on zsa/prog/blend state, updating
+ * the zsbuf's lrz state as necessary to detect the cases where we need
+ * to invalidate lrz.
+ */
+static struct fd6_lrz_state
+compute_lrz_state(struct fd6_emit *emit)
 {
        struct fd_context *ctx = emit->ctx;
+       struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
+       const struct ir3_shader_variant *fs = emit->fs;
+       struct fd6_lrz_state lrz;
+
        struct fd6_blend_stateobj *blend = fd6_blend_stateobj(ctx->blend);
        struct fd6_zsa_stateobj *zsa = fd6_zsa_stateobj(ctx->zsa);
-       struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
        struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
-       uint32_t gras_lrz_cntl = zsa->gras_lrz_cntl;
-       uint32_t rb_lrz_cntl = zsa->rb_lrz_cntl;
 
-       if (zsa->invalidate_lrz) {
+       lrz = zsa->lrz;
+
+       /* normalize lrz state: */
+       if (blend->reads_dest || fs->writes_pos || fs->no_earlyz || fs->has_kill) {
+               lrz.write = false;
+       }
+
+       /* if we change depthfunc direction, bail out on using LRZ.  The
+        * LRZ buffer encodes a min/max depth value per block, but if
+        * we switch from GT/GE <-> LT/LE, those values cannot be
+        * interpreted properly.
+        */
+       if (zsa->base.depth.enabled &&
+                       (rsc->lrz_direction != FD_LRZ_UNKNOWN) &&
+                       (rsc->lrz_direction != lrz.direction)) {
                rsc->lrz_valid = false;
-               gras_lrz_cntl = 0;
-               rb_lrz_cntl = 0;
-       } else if (emit->no_lrz_write || !rsc->lrz || !rsc->lrz_valid) {
-               gras_lrz_cntl = 0;
-               rb_lrz_cntl = 0;
-       } else if (binning_pass && blend->lrz_write && zsa->lrz_write) {
-               gras_lrz_cntl |= A6XX_GRAS_LRZ_CNTL_LRZ_WRITE;
        }
 
+       if (zsa->invalidate_lrz || !rsc->lrz_valid) {
+               rsc->lrz_valid = false;
+               memset(&lrz, 0, sizeof(lrz));
+       }
+
+       if (fs->no_earlyz || fs->writes_pos) {
+               lrz.enable = false;
+               lrz.write = false;
+               lrz.test = false;
+       }
+
+       /* Once we start writing to the real depth buffer, we lock in the
+        * direction for LRZ.. if we have to skip a LRZ write for any
+        * reason, it is still safe to have LRZ until there is a direction
+        * reversal.  Prior to the reversal, since we disabled LRZ writes
+        * in the "unsafe" cases, this just means that the LRZ test may
+        * not early-discard some things that end up not passing a later
+        * test (ie. be overly concervative).  But once you have a reversal
+        * of direction, it is possible to increase/decrease the z value
+        * to the point where the overly-conservative test is incorrect.
+        */
+       if (zsa->base.depth.writemask) {
+               rsc->lrz_direction = lrz.direction;
+       }
+
+       return lrz;
+}
+
+static struct fd_ringbuffer *
+build_lrz(struct fd6_emit *emit, bool binning_pass)
+{
+       struct fd_context *ctx = emit->ctx;
        struct fd6_context *fd6_ctx = fd6_context(ctx);
-       if ((fd6_ctx->last.lrz[binning_pass].gras_lrz_cntl == gras_lrz_cntl) &&
-                       (fd6_ctx->last.lrz[binning_pass].rb_lrz_cntl == rb_lrz_cntl) &&
-                       !ctx->last.dirty)
+       struct fd6_lrz_state lrz = compute_lrz_state(emit);
+
+       /* If the LRZ state has not changed, we can skip the emit: */
+       if (!ctx->last.dirty &&
+                       !memcmp(&fd6_ctx->last.lrz[binning_pass], &lrz, sizeof(lrz)))
                return NULL;
 
-       fd6_ctx->last.lrz[binning_pass].gras_lrz_cntl = gras_lrz_cntl;
-       fd6_ctx->last.lrz[binning_pass].rb_lrz_cntl = rb_lrz_cntl;
+       fd6_ctx->last.lrz[binning_pass] = lrz;
 
        struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(ctx->batch->submit,
-                       16, FD_RINGBUFFER_STREAMING);
-
-       OUT_PKT4(ring, REG_A6XX_GRAS_LRZ_CNTL, 1);
-       OUT_RING(ring, gras_lrz_cntl);
-
-       OUT_PKT4(ring, REG_A6XX_RB_LRZ_CNTL, 1);
-       OUT_RING(ring, rb_lrz_cntl);
+                       4*4, FD_RINGBUFFER_STREAMING);
+
+       OUT_REG(ring, A6XX_GRAS_LRZ_CNTL(
+                       .enable        = lrz.enable,
+                       .lrz_write     = lrz.write,
+                       .greater       = lrz.direction == FD_LRZ_GREATER,
+                       .z_test_enable = lrz.test,
+               ));
+       OUT_REG(ring, A6XX_RB_LRZ_CNTL(
+                       .enable = lrz.enable,
+               ));
 
        return ring;
 }
index 16e54047245e78016516f0e94aefebbd2493bd1f..c66418b1644640e2ba6e2bf7216d142be2240a9c 100644 (file)
@@ -95,12 +95,6 @@ struct fd6_emit {
        bool no_decode_srgb;
        bool primitive_restart;
 
-       /* in binning pass, we don't have real frag shader, so we
-        * don't know if real draw disqualifies lrz write.  So just
-        * figure that out up-front and stash it in the emit.
-        */
-       bool no_lrz_write;
-
        /* cached to avoid repeated lookups: */
        const struct fd6_program_state *prog;
 
index 790215dbc0bab0f7a7bb44cbdca53d96301ccb63..829a4d57c5d901c4145ce970885f4e6c461203d2 100644 (file)
@@ -1381,7 +1381,7 @@ fd6_emit_tile_fini(struct fd_batch *batch)
                fd6_emit_ib(batch->gmem, batch->epilogue);
 
        OUT_PKT4(ring, REG_A6XX_GRAS_LRZ_CNTL, 1);
-       OUT_RING(ring, A6XX_GRAS_LRZ_CNTL_ENABLE | A6XX_GRAS_LRZ_CNTL_FC_ENABLE);
+       OUT_RING(ring, A6XX_GRAS_LRZ_CNTL_ENABLE);
 
        fd6_emit_lrz_flush(ring);
 
index 6aa63b2e89f3468ae802d6ecc5191e849db9387a..d7c2000b4247757a64e3121acd92a7c390269bd8 100644 (file)
 #include "fd6_context.h"
 #include "fd6_format.h"
 
-/* update lza state based on stencil/alpha-test func: */
+/* update lza state based on stencil-test func:
+ *
+ * Conceptually the order of the pipeline is:
+ *
+ *
+ *   FS -> Alpha-Test  ->  Stencil-Test  ->  Depth-Test
+ *                              |                |
+ *                       if wrmask != 0     if wrmask != 0
+ *                              |                |
+ *                              v                v
+ *                        Stencil-Write      Depth-Write
+ *
+ * Because Stencil-Test can have side effects (Stencil-Write) prior
+ * to depth test, in this case we potentially need to disable early
+ * lrz-test.  See:
+ *
+ * https://www.khronos.org/opengl/wiki/Per-Sample_Processing
+ */
 static void
-update_lrz_sa(struct fd6_zsa_stateobj *so, enum pipe_compare_func func)
+update_lrz_stencil(struct fd6_zsa_stateobj *so, enum pipe_compare_func func,
+               bool stencil_write)
 {
        switch (func) {
        case PIPE_FUNC_ALWAYS:
-               /* nothing to do for LRZ: */
+               /* nothing to do for LRZ, but for stencil test when stencil-
+                * write is enabled, we need to disable lrz-test, since
+                * conceptually stencil test and write happens before depth-
+                * test:
+                */
+               if (stencil_write) {
+                       so->lrz.enable = false;
+                       so->lrz.test = false;
+               }
                break;
        case PIPE_FUNC_NEVER:
                /* fragment never passes, disable lrz_write for this draw: */
-               so->lrz_write = false;
+               so->lrz.write = false;
                break;
        default:
                /* whether the fragment passes or not depends on result
                 * of stencil test, which we cannot know when doing binning
                 * pass:
-                *
-                * TODO we maybe don't have to invalidate_lrz, depending on
-                * the depth/stencil func?  Ie. if there is an opaque surface
-                * behind what is currently being drawn, we could just disable
-                * lrz_write for a conservative but correct result?
                 */
-               so->invalidate_lrz = true;
-               so->lrz_write = false;
+               so->lrz.write = false;
+               /* similarly to the PIPE_FUNC_ALWAY case, if there are side-
+                * effects from stencil test we need to disable lrz-test.
+                */
+               if (stencil_write) {
+                       so->lrz.enable = false;
+                       so->lrz.test = false;
+               }
                break;
        }
 }
-
 void *
 fd6_zsa_state_create(struct pipe_context *pctx,
                const struct pipe_depth_stencil_alpha_state *cso)
@@ -83,35 +109,36 @@ fd6_zsa_state_create(struct pipe_context *pctx,
                        A6XX_RB_DEPTH_CNTL_Z_ENABLE |
                        A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE;
 
-               so->gras_lrz_cntl |= A6XX_GRAS_LRZ_CNTL_Z_TEST_ENABLE;
+               so->lrz.test = true;
 
                if (cso->depth.writemask) {
-                       so->lrz_write = true;
+                       so->lrz.write = true;
                }
 
                switch (cso->depth.func) {
                case PIPE_FUNC_LESS:
                case PIPE_FUNC_LEQUAL:
-                       so->gras_lrz_cntl |= A6XX_GRAS_LRZ_CNTL_ENABLE;
-                       so->rb_lrz_cntl |= A6XX_RB_LRZ_CNTL_ENABLE;
+                       so->lrz.enable = true;
+                       so->lrz.direction = FD_LRZ_LESS;
                        break;
 
                case PIPE_FUNC_GREATER:
                case PIPE_FUNC_GEQUAL:
-                       so->gras_lrz_cntl |= A6XX_GRAS_LRZ_CNTL_ENABLE | A6XX_GRAS_LRZ_CNTL_GREATER;
-                       so->rb_lrz_cntl |= A6XX_RB_LRZ_CNTL_ENABLE;
+                       so->lrz.enable = true;
+                       so->lrz.direction = FD_LRZ_GREATER;
                        break;
 
                case PIPE_FUNC_NEVER:
-                       so->gras_lrz_cntl |= A6XX_GRAS_LRZ_CNTL_ENABLE;
-                       so->rb_lrz_cntl |= A6XX_RB_LRZ_CNTL_ENABLE;
-                       so->lrz_write = false;
+                       so->lrz.enable = true;
+                       so->lrz.write = false;
+                       so->lrz.direction = FD_LRZ_LESS;
                        break;
 
+               /* TODO revisit these: */
                case PIPE_FUNC_EQUAL:
                case PIPE_FUNC_NOTEQUAL:
                case PIPE_FUNC_ALWAYS:
-                       so->lrz_write = false;
+                       so->lrz.write = false;
                        so->invalidate_lrz = true;
                        break;
                }
@@ -127,7 +154,7 @@ fd6_zsa_state_create(struct pipe_context *pctx,
                 * stencil test we don't really know what the updates to the
                 * depth buffer will be.
                 */
-               update_lrz_sa(so, s->func);
+               update_lrz_stencil(so, s->func, !!s->writemask);
 
                so->rb_stencil_control |=
                        A6XX_RB_STENCIL_CONTROL_STENCIL_READ |
@@ -143,7 +170,7 @@ fd6_zsa_state_create(struct pipe_context *pctx,
                if (cso->stencil[1].enabled) {
                        const struct pipe_stencil_state *bs = &cso->stencil[1];
 
-                       update_lrz_sa(so, bs->func);
+                       update_lrz_stencil(so, bs->func, !!bs->writemask);
 
                        so->rb_stencil_control |=
                                A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF |
@@ -158,19 +185,18 @@ fd6_zsa_state_create(struct pipe_context *pctx,
        }
 
        if (cso->alpha.enabled) {
-               /* stencil test happens before depth test, so without performing
-                * stencil test we don't really know what the updates to the
-                * depth buffer will be.
+               /* Alpha test is functionally a conditional discard, so we can't
+                * write LRZ before seeing if we end up discarding or not
                 */
-               update_lrz_sa(so, cso->alpha.func);
+               if (cso->alpha.func != PIPE_FUNC_ALWAYS) {
+                       so->lrz.write = false;
+               }
 
                uint32_t ref = cso->alpha.ref_value * 255.0;
                so->rb_alpha_control =
                        A6XX_RB_ALPHA_CONTROL_ALPHA_TEST |
                        A6XX_RB_ALPHA_CONTROL_ALPHA_REF(ref) |
                        A6XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(cso->alpha.func);
-//             so->rb_depth_control |=
-//                     A6XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE;
        }
 
        so->stateobj = fd_ringbuffer_new_object(ctx->pipe, 9 * 4);
index f1c4f6f29b6543610f4d2fe6948562682d1f121b..73bfa4fba0653ba47b39cf6189d2c29d0ce63032 100644 (file)
@@ -34,6 +34,8 @@
 
 #include "freedreno_util.h"
 
+#include "fd6_context.h"
+
 struct fd6_zsa_stateobj {
        struct pipe_depth_stencil_alpha_state base;
 
@@ -42,9 +44,8 @@ struct fd6_zsa_stateobj {
        uint32_t rb_stencil_control;
        uint32_t rb_stencilmask;
        uint32_t rb_stencilwrmask;
-       uint32_t gras_lrz_cntl;
-       uint32_t rb_lrz_cntl;
-       bool lrz_write;
+
+       struct fd6_lrz_state lrz;
        bool invalidate_lrz;
 
        struct fd_ringbuffer *stateobj;
index f71cd6a86172a39c6220abbdaf7f4db40e7996ce..10b7f40f36f8c7df981f51ad252dd271ec128333 100644 (file)
 #include "freedreno_util.h"
 #include "freedreno/fdl/freedreno_layout.h"
 
+enum fd_lrz_direction {
+       FD_LRZ_UNKNOWN,
+       /* Depth func less/less-than: */
+       FD_LRZ_LESS,
+       /* Depth func greater/greater-than: */
+       FD_LRZ_GREATER,
+};
+
 struct fd_resource {
        struct pipe_resource base;
        struct fd_bo *bo;
@@ -86,6 +94,7 @@ struct fd_resource {
         * fdl_layout
         */
        bool lrz_valid : 1;
+       enum fd_lrz_direction lrz_direction : 2;
        uint16_t lrz_width;  // for lrz clear, does this differ from lrz_pitch?
        uint16_t lrz_height;
        uint16_t lrz_pitch;