From: Rob Clark Date: Wed, 3 Jun 2020 17:06:58 +0000 (-0700) Subject: freedreno/a6xx: re-work LRZ state tracking X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=07887c9f34c664c4e87008b9d9b76dc06a2d7c1b;p=mesa.git freedreno/a6xx: re-work LRZ state tracking In particular, properly detect reversal of depth-test direction. With that we can remove a lot of cases where we were unnecessarily invalidating LRZ, which was simply papering over the direction- reversal issue in deqp. Signed-off-by: Rob Clark Part-of: --- diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_blend.c b/src/gallium/drivers/freedreno/a6xx/fd6_blend.c index 16e2bfaf1b8..3d4b1005524 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_blend.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_blend.c @@ -148,11 +148,6 @@ fd6_blend_state_create(struct pipe_context *pctx, const struct pipe_blend_state *cso) { struct fd6_blend_stateobj *so; - bool reads_dest = false; - - if (cso->logicop_enable) { - reads_dest = util_logicop_reads_dest(cso->logicop_func); - } so = rzalloc_size(NULL, sizeof(*so)); if (!so) @@ -160,21 +155,21 @@ fd6_blend_state_create(struct pipe_context *pctx, so->base = *cso; so->ctx = fd_context(pctx); - so->lrz_write = true; /* unless blend enabled for any MRT */ + + if (cso->logicop_enable) { + so->reads_dest |= util_logicop_reads_dest(cso->logicop_func); + } unsigned nr = cso->independent_blend_enable ? cso->max_rt : 0; for (unsigned i = 0; i <= nr; i++) { const struct pipe_rt_blend_state *rt = &cso->rt[i]; + so->reads_dest |= rt->blend_enable; if (rt->blend_enable) { - so->lrz_write = false; + so->reads_dest = true; } } - if (reads_dest) { - so->lrz_write = false; - } - util_dynarray_init(&so->variants, so); return so; diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_blend.h b/src/gallium/drivers/freedreno/a6xx/fd6_blend.h index 09c4609f35a..ef7dde8cf65 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_blend.h +++ b/src/gallium/drivers/freedreno/a6xx/fd6_blend.h @@ -48,7 +48,7 @@ struct fd6_blend_stateobj { struct pipe_blend_state base; struct fd_context *ctx; - bool lrz_write; + bool reads_dest; struct util_dynarray variants; }; diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_context.h b/src/gallium/drivers/freedreno/a6xx/fd6_context.h index c890cdd8423..bb48ecea784 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_context.h +++ b/src/gallium/drivers/freedreno/a6xx/fd6_context.h @@ -31,11 +31,19 @@ #include "util/u_upload_mgr.h" #include "freedreno_context.h" +#include "freedreno_resource.h" #include "ir3/ir3_shader.h" #include "a6xx.xml.h" +struct fd6_lrz_state { + bool enable : 1; + bool write : 1; + bool test : 1; + enum fd_lrz_direction direction : 2; +}; + struct fd6_context { struct fd_context base; @@ -106,15 +114,11 @@ struct fd6_context { uint32_t SP_UNKNOWN_A0F8; } magic; - struct { /* previous binning/draw lrz state, which is a function of multiple * gallium stateobjs, but doesn't necessarily change as frequently: */ - struct { - uint32_t gras_lrz_cntl; - uint32_t rb_lrz_cntl; - } lrz[2]; + struct fd6_lrz_state lrz[2]; } last; }; diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_draw.c b/src/gallium/drivers/freedreno/a6xx/fd6_draw.c index dab1cf32cd9..232262765a3 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_draw.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_draw.c @@ -217,11 +217,6 @@ fd6_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info, ctx->stats.gs_regs += COND(emit.gs, ir3_shader_halfregs(emit.gs)); ctx->stats.fs_regs += ir3_shader_halfregs(emit.fs); - /* figure out whether we need to disable LRZ write for binning - * pass using draw pass's fs: - */ - emit.no_lrz_write = emit.fs->writes_pos || emit.fs->no_earlyz || emit.fs->has_kill; - struct fd_ringbuffer *ring = ctx->batch->draw; struct CP_DRAW_INDX_OFFSET_0 draw0 = { @@ -493,6 +488,7 @@ fd6_clear(struct fd_context *ctx, unsigned buffers, struct fd_resource *zsbuf = fd_resource(pfb->zsbuf->texture); if (zsbuf->lrz && !is_z32(pfb->zsbuf->format)) { zsbuf->lrz_valid = true; + zsbuf->lrz_direction = FD_LRZ_UNKNOWN; fd6_clear_lrz(ctx->batch, zsbuf, depth); } } diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c index cdc8dfb85bd..bd30d69dd37 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c @@ -585,45 +585,95 @@ build_vbo_state(struct fd6_emit *emit, const struct ir3_shader_variant *vp) return ring; } -static struct fd_ringbuffer * -build_lrz(struct fd6_emit *emit, bool binning_pass) +/** + * Calculate normalized LRZ state based on zsa/prog/blend state, updating + * the zsbuf's lrz state as necessary to detect the cases where we need + * to invalidate lrz. + */ +static struct fd6_lrz_state +compute_lrz_state(struct fd6_emit *emit) { struct fd_context *ctx = emit->ctx; + struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer; + const struct ir3_shader_variant *fs = emit->fs; + struct fd6_lrz_state lrz; + struct fd6_blend_stateobj *blend = fd6_blend_stateobj(ctx->blend); struct fd6_zsa_stateobj *zsa = fd6_zsa_stateobj(ctx->zsa); - struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer; struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture); - uint32_t gras_lrz_cntl = zsa->gras_lrz_cntl; - uint32_t rb_lrz_cntl = zsa->rb_lrz_cntl; - if (zsa->invalidate_lrz) { + lrz = zsa->lrz; + + /* normalize lrz state: */ + if (blend->reads_dest || fs->writes_pos || fs->no_earlyz || fs->has_kill) { + lrz.write = false; + } + + /* if we change depthfunc direction, bail out on using LRZ. The + * LRZ buffer encodes a min/max depth value per block, but if + * we switch from GT/GE <-> LT/LE, those values cannot be + * interpreted properly. + */ + if (zsa->base.depth.enabled && + (rsc->lrz_direction != FD_LRZ_UNKNOWN) && + (rsc->lrz_direction != lrz.direction)) { rsc->lrz_valid = false; - gras_lrz_cntl = 0; - rb_lrz_cntl = 0; - } else if (emit->no_lrz_write || !rsc->lrz || !rsc->lrz_valid) { - gras_lrz_cntl = 0; - rb_lrz_cntl = 0; - } else if (binning_pass && blend->lrz_write && zsa->lrz_write) { - gras_lrz_cntl |= A6XX_GRAS_LRZ_CNTL_LRZ_WRITE; } + if (zsa->invalidate_lrz || !rsc->lrz_valid) { + rsc->lrz_valid = false; + memset(&lrz, 0, sizeof(lrz)); + } + + if (fs->no_earlyz || fs->writes_pos) { + lrz.enable = false; + lrz.write = false; + lrz.test = false; + } + + /* Once we start writing to the real depth buffer, we lock in the + * direction for LRZ.. if we have to skip a LRZ write for any + * reason, it is still safe to have LRZ until there is a direction + * reversal. Prior to the reversal, since we disabled LRZ writes + * in the "unsafe" cases, this just means that the LRZ test may + * not early-discard some things that end up not passing a later + * test (ie. be overly concervative). But once you have a reversal + * of direction, it is possible to increase/decrease the z value + * to the point where the overly-conservative test is incorrect. + */ + if (zsa->base.depth.writemask) { + rsc->lrz_direction = lrz.direction; + } + + return lrz; +} + +static struct fd_ringbuffer * +build_lrz(struct fd6_emit *emit, bool binning_pass) +{ + struct fd_context *ctx = emit->ctx; struct fd6_context *fd6_ctx = fd6_context(ctx); - if ((fd6_ctx->last.lrz[binning_pass].gras_lrz_cntl == gras_lrz_cntl) && - (fd6_ctx->last.lrz[binning_pass].rb_lrz_cntl == rb_lrz_cntl) && - !ctx->last.dirty) + struct fd6_lrz_state lrz = compute_lrz_state(emit); + + /* If the LRZ state has not changed, we can skip the emit: */ + if (!ctx->last.dirty && + !memcmp(&fd6_ctx->last.lrz[binning_pass], &lrz, sizeof(lrz))) return NULL; - fd6_ctx->last.lrz[binning_pass].gras_lrz_cntl = gras_lrz_cntl; - fd6_ctx->last.lrz[binning_pass].rb_lrz_cntl = rb_lrz_cntl; + fd6_ctx->last.lrz[binning_pass] = lrz; struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(ctx->batch->submit, - 16, FD_RINGBUFFER_STREAMING); - - OUT_PKT4(ring, REG_A6XX_GRAS_LRZ_CNTL, 1); - OUT_RING(ring, gras_lrz_cntl); - - OUT_PKT4(ring, REG_A6XX_RB_LRZ_CNTL, 1); - OUT_RING(ring, rb_lrz_cntl); + 4*4, FD_RINGBUFFER_STREAMING); + + OUT_REG(ring, A6XX_GRAS_LRZ_CNTL( + .enable = lrz.enable, + .lrz_write = lrz.write, + .greater = lrz.direction == FD_LRZ_GREATER, + .z_test_enable = lrz.test, + )); + OUT_REG(ring, A6XX_RB_LRZ_CNTL( + .enable = lrz.enable, + )); return ring; } diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.h b/src/gallium/drivers/freedreno/a6xx/fd6_emit.h index 16e54047245..c66418b1644 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.h +++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.h @@ -95,12 +95,6 @@ struct fd6_emit { bool no_decode_srgb; bool primitive_restart; - /* in binning pass, we don't have real frag shader, so we - * don't know if real draw disqualifies lrz write. So just - * figure that out up-front and stash it in the emit. - */ - bool no_lrz_write; - /* cached to avoid repeated lookups: */ const struct fd6_program_state *prog; diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c b/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c index 790215dbc0b..829a4d57c5d 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c @@ -1381,7 +1381,7 @@ fd6_emit_tile_fini(struct fd_batch *batch) fd6_emit_ib(batch->gmem, batch->epilogue); OUT_PKT4(ring, REG_A6XX_GRAS_LRZ_CNTL, 1); - OUT_RING(ring, A6XX_GRAS_LRZ_CNTL_ENABLE | A6XX_GRAS_LRZ_CNTL_FC_ENABLE); + OUT_RING(ring, A6XX_GRAS_LRZ_CNTL_ENABLE); fd6_emit_lrz_flush(ring); diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_zsa.c b/src/gallium/drivers/freedreno/a6xx/fd6_zsa.c index 6aa63b2e89f..d7c2000b424 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_zsa.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_zsa.c @@ -34,34 +34,60 @@ #include "fd6_context.h" #include "fd6_format.h" -/* update lza state based on stencil/alpha-test func: */ +/* update lza state based on stencil-test func: + * + * Conceptually the order of the pipeline is: + * + * + * FS -> Alpha-Test -> Stencil-Test -> Depth-Test + * | | + * if wrmask != 0 if wrmask != 0 + * | | + * v v + * Stencil-Write Depth-Write + * + * Because Stencil-Test can have side effects (Stencil-Write) prior + * to depth test, in this case we potentially need to disable early + * lrz-test. See: + * + * https://www.khronos.org/opengl/wiki/Per-Sample_Processing + */ static void -update_lrz_sa(struct fd6_zsa_stateobj *so, enum pipe_compare_func func) +update_lrz_stencil(struct fd6_zsa_stateobj *so, enum pipe_compare_func func, + bool stencil_write) { switch (func) { case PIPE_FUNC_ALWAYS: - /* nothing to do for LRZ: */ + /* nothing to do for LRZ, but for stencil test when stencil- + * write is enabled, we need to disable lrz-test, since + * conceptually stencil test and write happens before depth- + * test: + */ + if (stencil_write) { + so->lrz.enable = false; + so->lrz.test = false; + } break; case PIPE_FUNC_NEVER: /* fragment never passes, disable lrz_write for this draw: */ - so->lrz_write = false; + so->lrz.write = false; break; default: /* whether the fragment passes or not depends on result * of stencil test, which we cannot know when doing binning * pass: - * - * TODO we maybe don't have to invalidate_lrz, depending on - * the depth/stencil func? Ie. if there is an opaque surface - * behind what is currently being drawn, we could just disable - * lrz_write for a conservative but correct result? */ - so->invalidate_lrz = true; - so->lrz_write = false; + so->lrz.write = false; + /* similarly to the PIPE_FUNC_ALWAY case, if there are side- + * effects from stencil test we need to disable lrz-test. + */ + if (stencil_write) { + so->lrz.enable = false; + so->lrz.test = false; + } break; } } - void * fd6_zsa_state_create(struct pipe_context *pctx, const struct pipe_depth_stencil_alpha_state *cso) @@ -83,35 +109,36 @@ fd6_zsa_state_create(struct pipe_context *pctx, A6XX_RB_DEPTH_CNTL_Z_ENABLE | A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE; - so->gras_lrz_cntl |= A6XX_GRAS_LRZ_CNTL_Z_TEST_ENABLE; + so->lrz.test = true; if (cso->depth.writemask) { - so->lrz_write = true; + so->lrz.write = true; } switch (cso->depth.func) { case PIPE_FUNC_LESS: case PIPE_FUNC_LEQUAL: - so->gras_lrz_cntl |= A6XX_GRAS_LRZ_CNTL_ENABLE; - so->rb_lrz_cntl |= A6XX_RB_LRZ_CNTL_ENABLE; + so->lrz.enable = true; + so->lrz.direction = FD_LRZ_LESS; break; case PIPE_FUNC_GREATER: case PIPE_FUNC_GEQUAL: - so->gras_lrz_cntl |= A6XX_GRAS_LRZ_CNTL_ENABLE | A6XX_GRAS_LRZ_CNTL_GREATER; - so->rb_lrz_cntl |= A6XX_RB_LRZ_CNTL_ENABLE; + so->lrz.enable = true; + so->lrz.direction = FD_LRZ_GREATER; break; case PIPE_FUNC_NEVER: - so->gras_lrz_cntl |= A6XX_GRAS_LRZ_CNTL_ENABLE; - so->rb_lrz_cntl |= A6XX_RB_LRZ_CNTL_ENABLE; - so->lrz_write = false; + so->lrz.enable = true; + so->lrz.write = false; + so->lrz.direction = FD_LRZ_LESS; break; + /* TODO revisit these: */ case PIPE_FUNC_EQUAL: case PIPE_FUNC_NOTEQUAL: case PIPE_FUNC_ALWAYS: - so->lrz_write = false; + so->lrz.write = false; so->invalidate_lrz = true; break; } @@ -127,7 +154,7 @@ fd6_zsa_state_create(struct pipe_context *pctx, * stencil test we don't really know what the updates to the * depth buffer will be. */ - update_lrz_sa(so, s->func); + update_lrz_stencil(so, s->func, !!s->writemask); so->rb_stencil_control |= A6XX_RB_STENCIL_CONTROL_STENCIL_READ | @@ -143,7 +170,7 @@ fd6_zsa_state_create(struct pipe_context *pctx, if (cso->stencil[1].enabled) { const struct pipe_stencil_state *bs = &cso->stencil[1]; - update_lrz_sa(so, bs->func); + update_lrz_stencil(so, bs->func, !!bs->writemask); so->rb_stencil_control |= A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF | @@ -158,19 +185,18 @@ fd6_zsa_state_create(struct pipe_context *pctx, } if (cso->alpha.enabled) { - /* stencil test happens before depth test, so without performing - * stencil test we don't really know what the updates to the - * depth buffer will be. + /* Alpha test is functionally a conditional discard, so we can't + * write LRZ before seeing if we end up discarding or not */ - update_lrz_sa(so, cso->alpha.func); + if (cso->alpha.func != PIPE_FUNC_ALWAYS) { + so->lrz.write = false; + } uint32_t ref = cso->alpha.ref_value * 255.0; so->rb_alpha_control = A6XX_RB_ALPHA_CONTROL_ALPHA_TEST | A6XX_RB_ALPHA_CONTROL_ALPHA_REF(ref) | A6XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(cso->alpha.func); -// so->rb_depth_control |= -// A6XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE; } so->stateobj = fd_ringbuffer_new_object(ctx->pipe, 9 * 4); diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_zsa.h b/src/gallium/drivers/freedreno/a6xx/fd6_zsa.h index f1c4f6f29b6..73bfa4fba06 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_zsa.h +++ b/src/gallium/drivers/freedreno/a6xx/fd6_zsa.h @@ -34,6 +34,8 @@ #include "freedreno_util.h" +#include "fd6_context.h" + struct fd6_zsa_stateobj { struct pipe_depth_stencil_alpha_state base; @@ -42,9 +44,8 @@ struct fd6_zsa_stateobj { uint32_t rb_stencil_control; uint32_t rb_stencilmask; uint32_t rb_stencilwrmask; - uint32_t gras_lrz_cntl; - uint32_t rb_lrz_cntl; - bool lrz_write; + + struct fd6_lrz_state lrz; bool invalidate_lrz; struct fd_ringbuffer *stateobj; diff --git a/src/gallium/drivers/freedreno/freedreno_resource.h b/src/gallium/drivers/freedreno/freedreno_resource.h index f71cd6a8617..10b7f40f36f 100644 --- a/src/gallium/drivers/freedreno/freedreno_resource.h +++ b/src/gallium/drivers/freedreno/freedreno_resource.h @@ -36,6 +36,14 @@ #include "freedreno_util.h" #include "freedreno/fdl/freedreno_layout.h" +enum fd_lrz_direction { + FD_LRZ_UNKNOWN, + /* Depth func less/less-than: */ + FD_LRZ_LESS, + /* Depth func greater/greater-than: */ + FD_LRZ_GREATER, +}; + struct fd_resource { struct pipe_resource base; struct fd_bo *bo; @@ -86,6 +94,7 @@ struct fd_resource { * fdl_layout */ bool lrz_valid : 1; + enum fd_lrz_direction lrz_direction : 2; uint16_t lrz_width; // for lrz clear, does this differ from lrz_pitch? uint16_t lrz_height; uint16_t lrz_pitch;