In particular, properly detect reversal of depth-test direction.
With that we can remove a lot of cases where we were unnecessarily
invalidating LRZ, which was simply papering over the direction-
reversal issue in deqp.
Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5298>
const struct pipe_blend_state *cso)
{
struct fd6_blend_stateobj *so;
- bool reads_dest = false;
-
- if (cso->logicop_enable) {
- reads_dest = util_logicop_reads_dest(cso->logicop_func);
- }
so = rzalloc_size(NULL, sizeof(*so));
if (!so)
so->base = *cso;
so->ctx = fd_context(pctx);
- so->lrz_write = true; /* unless blend enabled for any MRT */
+
+ if (cso->logicop_enable) {
+ so->reads_dest |= util_logicop_reads_dest(cso->logicop_func);
+ }
unsigned nr = cso->independent_blend_enable ? cso->max_rt : 0;
for (unsigned i = 0; i <= nr; i++) {
const struct pipe_rt_blend_state *rt = &cso->rt[i];
+ so->reads_dest |= rt->blend_enable;
if (rt->blend_enable) {
- so->lrz_write = false;
+ so->reads_dest = true;
}
}
- if (reads_dest) {
- so->lrz_write = false;
- }
-
util_dynarray_init(&so->variants, so);
return so;
struct pipe_blend_state base;
struct fd_context *ctx;
- bool lrz_write;
+ bool reads_dest;
struct util_dynarray variants;
};
#include "util/u_upload_mgr.h"
#include "freedreno_context.h"
+#include "freedreno_resource.h"
#include "ir3/ir3_shader.h"
#include "a6xx.xml.h"
+struct fd6_lrz_state {
+ bool enable : 1;
+ bool write : 1;
+ bool test : 1;
+ enum fd_lrz_direction direction : 2;
+};
+
struct fd6_context {
struct fd_context base;
uint32_t SP_UNKNOWN_A0F8;
} magic;
-
struct {
/* previous binning/draw lrz state, which is a function of multiple
* gallium stateobjs, but doesn't necessarily change as frequently:
*/
- struct {
- uint32_t gras_lrz_cntl;
- uint32_t rb_lrz_cntl;
- } lrz[2];
+ struct fd6_lrz_state lrz[2];
} last;
};
ctx->stats.gs_regs += COND(emit.gs, ir3_shader_halfregs(emit.gs));
ctx->stats.fs_regs += ir3_shader_halfregs(emit.fs);
- /* figure out whether we need to disable LRZ write for binning
- * pass using draw pass's fs:
- */
- emit.no_lrz_write = emit.fs->writes_pos || emit.fs->no_earlyz || emit.fs->has_kill;
-
struct fd_ringbuffer *ring = ctx->batch->draw;
struct CP_DRAW_INDX_OFFSET_0 draw0 = {
struct fd_resource *zsbuf = fd_resource(pfb->zsbuf->texture);
if (zsbuf->lrz && !is_z32(pfb->zsbuf->format)) {
zsbuf->lrz_valid = true;
+ zsbuf->lrz_direction = FD_LRZ_UNKNOWN;
fd6_clear_lrz(ctx->batch, zsbuf, depth);
}
}
return ring;
}
-static struct fd_ringbuffer *
-build_lrz(struct fd6_emit *emit, bool binning_pass)
+/**
+ * Calculate normalized LRZ state based on zsa/prog/blend state, updating
+ * the zsbuf's lrz state as necessary to detect the cases where we need
+ * to invalidate lrz.
+ */
+static struct fd6_lrz_state
+compute_lrz_state(struct fd6_emit *emit)
{
struct fd_context *ctx = emit->ctx;
+ struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
+ const struct ir3_shader_variant *fs = emit->fs;
+ struct fd6_lrz_state lrz;
+
struct fd6_blend_stateobj *blend = fd6_blend_stateobj(ctx->blend);
struct fd6_zsa_stateobj *zsa = fd6_zsa_stateobj(ctx->zsa);
- struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
- uint32_t gras_lrz_cntl = zsa->gras_lrz_cntl;
- uint32_t rb_lrz_cntl = zsa->rb_lrz_cntl;
- if (zsa->invalidate_lrz) {
+ lrz = zsa->lrz;
+
+ /* normalize lrz state: */
+ if (blend->reads_dest || fs->writes_pos || fs->no_earlyz || fs->has_kill) {
+ lrz.write = false;
+ }
+
+ /* if we change depthfunc direction, bail out on using LRZ. The
+ * LRZ buffer encodes a min/max depth value per block, but if
+ * we switch from GT/GE <-> LT/LE, those values cannot be
+ * interpreted properly.
+ */
+ if (zsa->base.depth.enabled &&
+ (rsc->lrz_direction != FD_LRZ_UNKNOWN) &&
+ (rsc->lrz_direction != lrz.direction)) {
rsc->lrz_valid = false;
- gras_lrz_cntl = 0;
- rb_lrz_cntl = 0;
- } else if (emit->no_lrz_write || !rsc->lrz || !rsc->lrz_valid) {
- gras_lrz_cntl = 0;
- rb_lrz_cntl = 0;
- } else if (binning_pass && blend->lrz_write && zsa->lrz_write) {
- gras_lrz_cntl |= A6XX_GRAS_LRZ_CNTL_LRZ_WRITE;
}
+ if (zsa->invalidate_lrz || !rsc->lrz_valid) {
+ rsc->lrz_valid = false;
+ memset(&lrz, 0, sizeof(lrz));
+ }
+
+ if (fs->no_earlyz || fs->writes_pos) {
+ lrz.enable = false;
+ lrz.write = false;
+ lrz.test = false;
+ }
+
+ /* Once we start writing to the real depth buffer, we lock in the
+ * direction for LRZ.. if we have to skip a LRZ write for any
+ * reason, it is still safe to have LRZ until there is a direction
+ * reversal. Prior to the reversal, since we disabled LRZ writes
+ * in the "unsafe" cases, this just means that the LRZ test may
+ * not early-discard some things that end up not passing a later
+ * test (ie. be overly concervative). But once you have a reversal
+ * of direction, it is possible to increase/decrease the z value
+ * to the point where the overly-conservative test is incorrect.
+ */
+ if (zsa->base.depth.writemask) {
+ rsc->lrz_direction = lrz.direction;
+ }
+
+ return lrz;
+}
+
+static struct fd_ringbuffer *
+build_lrz(struct fd6_emit *emit, bool binning_pass)
+{
+ struct fd_context *ctx = emit->ctx;
struct fd6_context *fd6_ctx = fd6_context(ctx);
- if ((fd6_ctx->last.lrz[binning_pass].gras_lrz_cntl == gras_lrz_cntl) &&
- (fd6_ctx->last.lrz[binning_pass].rb_lrz_cntl == rb_lrz_cntl) &&
- !ctx->last.dirty)
+ struct fd6_lrz_state lrz = compute_lrz_state(emit);
+
+ /* If the LRZ state has not changed, we can skip the emit: */
+ if (!ctx->last.dirty &&
+ !memcmp(&fd6_ctx->last.lrz[binning_pass], &lrz, sizeof(lrz)))
return NULL;
- fd6_ctx->last.lrz[binning_pass].gras_lrz_cntl = gras_lrz_cntl;
- fd6_ctx->last.lrz[binning_pass].rb_lrz_cntl = rb_lrz_cntl;
+ fd6_ctx->last.lrz[binning_pass] = lrz;
struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(ctx->batch->submit,
- 16, FD_RINGBUFFER_STREAMING);
-
- OUT_PKT4(ring, REG_A6XX_GRAS_LRZ_CNTL, 1);
- OUT_RING(ring, gras_lrz_cntl);
-
- OUT_PKT4(ring, REG_A6XX_RB_LRZ_CNTL, 1);
- OUT_RING(ring, rb_lrz_cntl);
+ 4*4, FD_RINGBUFFER_STREAMING);
+
+ OUT_REG(ring, A6XX_GRAS_LRZ_CNTL(
+ .enable = lrz.enable,
+ .lrz_write = lrz.write,
+ .greater = lrz.direction == FD_LRZ_GREATER,
+ .z_test_enable = lrz.test,
+ ));
+ OUT_REG(ring, A6XX_RB_LRZ_CNTL(
+ .enable = lrz.enable,
+ ));
return ring;
}
bool no_decode_srgb;
bool primitive_restart;
- /* in binning pass, we don't have real frag shader, so we
- * don't know if real draw disqualifies lrz write. So just
- * figure that out up-front and stash it in the emit.
- */
- bool no_lrz_write;
-
/* cached to avoid repeated lookups: */
const struct fd6_program_state *prog;
fd6_emit_ib(batch->gmem, batch->epilogue);
OUT_PKT4(ring, REG_A6XX_GRAS_LRZ_CNTL, 1);
- OUT_RING(ring, A6XX_GRAS_LRZ_CNTL_ENABLE | A6XX_GRAS_LRZ_CNTL_FC_ENABLE);
+ OUT_RING(ring, A6XX_GRAS_LRZ_CNTL_ENABLE);
fd6_emit_lrz_flush(ring);
#include "fd6_context.h"
#include "fd6_format.h"
-/* update lza state based on stencil/alpha-test func: */
+/* update lza state based on stencil-test func:
+ *
+ * Conceptually the order of the pipeline is:
+ *
+ *
+ * FS -> Alpha-Test -> Stencil-Test -> Depth-Test
+ * | |
+ * if wrmask != 0 if wrmask != 0
+ * | |
+ * v v
+ * Stencil-Write Depth-Write
+ *
+ * Because Stencil-Test can have side effects (Stencil-Write) prior
+ * to depth test, in this case we potentially need to disable early
+ * lrz-test. See:
+ *
+ * https://www.khronos.org/opengl/wiki/Per-Sample_Processing
+ */
static void
-update_lrz_sa(struct fd6_zsa_stateobj *so, enum pipe_compare_func func)
+update_lrz_stencil(struct fd6_zsa_stateobj *so, enum pipe_compare_func func,
+ bool stencil_write)
{
switch (func) {
case PIPE_FUNC_ALWAYS:
- /* nothing to do for LRZ: */
+ /* nothing to do for LRZ, but for stencil test when stencil-
+ * write is enabled, we need to disable lrz-test, since
+ * conceptually stencil test and write happens before depth-
+ * test:
+ */
+ if (stencil_write) {
+ so->lrz.enable = false;
+ so->lrz.test = false;
+ }
break;
case PIPE_FUNC_NEVER:
/* fragment never passes, disable lrz_write for this draw: */
- so->lrz_write = false;
+ so->lrz.write = false;
break;
default:
/* whether the fragment passes or not depends on result
* of stencil test, which we cannot know when doing binning
* pass:
- *
- * TODO we maybe don't have to invalidate_lrz, depending on
- * the depth/stencil func? Ie. if there is an opaque surface
- * behind what is currently being drawn, we could just disable
- * lrz_write for a conservative but correct result?
*/
- so->invalidate_lrz = true;
- so->lrz_write = false;
+ so->lrz.write = false;
+ /* similarly to the PIPE_FUNC_ALWAY case, if there are side-
+ * effects from stencil test we need to disable lrz-test.
+ */
+ if (stencil_write) {
+ so->lrz.enable = false;
+ so->lrz.test = false;
+ }
break;
}
}
-
void *
fd6_zsa_state_create(struct pipe_context *pctx,
const struct pipe_depth_stencil_alpha_state *cso)
A6XX_RB_DEPTH_CNTL_Z_ENABLE |
A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE;
- so->gras_lrz_cntl |= A6XX_GRAS_LRZ_CNTL_Z_TEST_ENABLE;
+ so->lrz.test = true;
if (cso->depth.writemask) {
- so->lrz_write = true;
+ so->lrz.write = true;
}
switch (cso->depth.func) {
case PIPE_FUNC_LESS:
case PIPE_FUNC_LEQUAL:
- so->gras_lrz_cntl |= A6XX_GRAS_LRZ_CNTL_ENABLE;
- so->rb_lrz_cntl |= A6XX_RB_LRZ_CNTL_ENABLE;
+ so->lrz.enable = true;
+ so->lrz.direction = FD_LRZ_LESS;
break;
case PIPE_FUNC_GREATER:
case PIPE_FUNC_GEQUAL:
- so->gras_lrz_cntl |= A6XX_GRAS_LRZ_CNTL_ENABLE | A6XX_GRAS_LRZ_CNTL_GREATER;
- so->rb_lrz_cntl |= A6XX_RB_LRZ_CNTL_ENABLE;
+ so->lrz.enable = true;
+ so->lrz.direction = FD_LRZ_GREATER;
break;
case PIPE_FUNC_NEVER:
- so->gras_lrz_cntl |= A6XX_GRAS_LRZ_CNTL_ENABLE;
- so->rb_lrz_cntl |= A6XX_RB_LRZ_CNTL_ENABLE;
- so->lrz_write = false;
+ so->lrz.enable = true;
+ so->lrz.write = false;
+ so->lrz.direction = FD_LRZ_LESS;
break;
+ /* TODO revisit these: */
case PIPE_FUNC_EQUAL:
case PIPE_FUNC_NOTEQUAL:
case PIPE_FUNC_ALWAYS:
- so->lrz_write = false;
+ so->lrz.write = false;
so->invalidate_lrz = true;
break;
}
* stencil test we don't really know what the updates to the
* depth buffer will be.
*/
- update_lrz_sa(so, s->func);
+ update_lrz_stencil(so, s->func, !!s->writemask);
so->rb_stencil_control |=
A6XX_RB_STENCIL_CONTROL_STENCIL_READ |
if (cso->stencil[1].enabled) {
const struct pipe_stencil_state *bs = &cso->stencil[1];
- update_lrz_sa(so, bs->func);
+ update_lrz_stencil(so, bs->func, !!bs->writemask);
so->rb_stencil_control |=
A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF |
}
if (cso->alpha.enabled) {
- /* stencil test happens before depth test, so without performing
- * stencil test we don't really know what the updates to the
- * depth buffer will be.
+ /* Alpha test is functionally a conditional discard, so we can't
+ * write LRZ before seeing if we end up discarding or not
*/
- update_lrz_sa(so, cso->alpha.func);
+ if (cso->alpha.func != PIPE_FUNC_ALWAYS) {
+ so->lrz.write = false;
+ }
uint32_t ref = cso->alpha.ref_value * 255.0;
so->rb_alpha_control =
A6XX_RB_ALPHA_CONTROL_ALPHA_TEST |
A6XX_RB_ALPHA_CONTROL_ALPHA_REF(ref) |
A6XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(cso->alpha.func);
-// so->rb_depth_control |=
-// A6XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE;
}
so->stateobj = fd_ringbuffer_new_object(ctx->pipe, 9 * 4);
#include "freedreno_util.h"
+#include "fd6_context.h"
+
struct fd6_zsa_stateobj {
struct pipe_depth_stencil_alpha_state base;
uint32_t rb_stencil_control;
uint32_t rb_stencilmask;
uint32_t rb_stencilwrmask;
- uint32_t gras_lrz_cntl;
- uint32_t rb_lrz_cntl;
- bool lrz_write;
+
+ struct fd6_lrz_state lrz;
bool invalidate_lrz;
struct fd_ringbuffer *stateobj;
#include "freedreno_util.h"
#include "freedreno/fdl/freedreno_layout.h"
+enum fd_lrz_direction {
+ FD_LRZ_UNKNOWN,
+ /* Depth func less/less-than: */
+ FD_LRZ_LESS,
+ /* Depth func greater/greater-than: */
+ FD_LRZ_GREATER,
+};
+
struct fd_resource {
struct pipe_resource base;
struct fd_bo *bo;
* fdl_layout
*/
bool lrz_valid : 1;
+ enum fd_lrz_direction lrz_direction : 2;
uint16_t lrz_width; // for lrz clear, does this differ from lrz_pitch?
uint16_t lrz_height;
uint16_t lrz_pitch;