From 4160ac5ee41630a5c9fc4e1f3520f0fabf42cb14 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 1 Aug 2014 13:32:49 -0700 Subject: [PATCH] vc4: Add support for depth clears and tests within a tile. This doesn't load/store the Z contents across submits yet. It also disables early Z, since it's going to require tracking of Z functions across multiple state updates to track the early Z direction and whether it can be used. v2: Move the key setup to before the search for the key. --- src/gallium/drivers/vc4/vc4_context.c | 6 +++--- src/gallium/drivers/vc4/vc4_context.h | 10 +++++++++- src/gallium/drivers/vc4/vc4_draw.c | 5 ++++- src/gallium/drivers/vc4/vc4_emit.c | 14 ++++++++++---- src/gallium/drivers/vc4/vc4_program.c | 8 ++++++++ src/gallium/drivers/vc4/vc4_qir.c | 1 + src/gallium/drivers/vc4/vc4_qir.h | 1 + src/gallium/drivers/vc4/vc4_qpu_defines.h | 1 + src/gallium/drivers/vc4/vc4_qpu_emit.c | 8 ++++++++ src/gallium/drivers/vc4/vc4_state.c | 23 ++++++++++++++++++++--- 10 files changed, 65 insertions(+), 12 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_context.c b/src/gallium/drivers/vc4/vc4_context.c index 11e42a3ad91..6991e6a2422 100644 --- a/src/gallium/drivers/vc4/vc4_context.c +++ b/src/gallium/drivers/vc4/vc4_context.c @@ -113,9 +113,9 @@ vc4_setup_rcl(struct vc4_context *vc4) cl_reloc(vc4, &vc4->rcl, ctex->bo, csurf->offset); cl_u16(&vc4->rcl, width); cl_u16(&vc4->rcl, height); - cl_u8(&vc4->rcl, (VC4_RENDER_CONFIG_MEMORY_FORMAT_LINEAR | - VC4_RENDER_CONFIG_FORMAT_RGBA8888)); - cl_u8(&vc4->rcl, 0); + cl_u16(&vc4->rcl, (VC4_RENDER_CONFIG_MEMORY_FORMAT_LINEAR | + VC4_RENDER_CONFIG_FORMAT_RGBA8888 | + VC4_RENDER_CONFIG_EARLY_Z_COVERAGE_DISABLE)); /* The tile buffer normally gets cleared when the previous tile is * stored. If the clear values changed between frames, then the tile diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h index d6367871358..fe51072180b 100644 --- a/src/gallium/drivers/vc4/vc4_context.h +++ b/src/gallium/drivers/vc4/vc4_context.h @@ -143,6 +143,7 @@ struct vc4_context { */ uint32_t resolve; uint32_t clear_color[2]; + uint32_t clear_depth; /**< 24-bit unorm depth */ /** * Set if some drawing (triangles, blits, or just a glClear()) has @@ -165,7 +166,7 @@ struct vc4_context { struct pipe_scissor_state scissor; struct pipe_blend_state *blend; struct vc4_rasterizer_state *rasterizer; - struct pipe_depth_stencil_alpha_state *zsa; + struct vc4_depth_stencil_alpha_state *zsa; struct vc4_texture_stateobj verttex, fragtex; @@ -194,6 +195,13 @@ struct vc4_rasterizer_state { float point_size; }; +struct vc4_depth_stencil_alpha_state { + struct pipe_depth_stencil_alpha_state base; + + /* VC4_CONFIGURATION_BITS */ + uint8_t config_bits[3]; +}; + static inline struct vc4_context * vc4_context(struct pipe_context *pcontext) { diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c index 3b8a5d9b91e..77bbecf2a4e 100644 --- a/src/gallium/drivers/vc4/vc4_draw.c +++ b/src/gallium/drivers/vc4/vc4_draw.c @@ -181,7 +181,7 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) cl_u8(&vc4->shader_rec, i * 16); /* CS VPM offset */ } - if (vc4->zsa && vc4->zsa->depth.enabled) { + if (vc4->zsa && vc4->zsa->base.depth.enabled) { vc4->resolve |= PIPE_CLEAR_DEPTH; } vc4->resolve |= PIPE_CLEAR_COLOR0; @@ -215,6 +215,9 @@ vc4_clear(struct pipe_context *pctx, unsigned buffers, color->f); } + if (buffers & PIPE_CLEAR_DEPTH) + vc4->clear_depth = util_pack_z(PIPE_FORMAT_Z24X8_UNORM, depth); + vc4->cleared |= buffers; vc4->resolve |= buffers; diff --git a/src/gallium/drivers/vc4/vc4_emit.c b/src/gallium/drivers/vc4/vc4_emit.c index 9c41505288c..476ea9c6fd4 100644 --- a/src/gallium/drivers/vc4/vc4_emit.c +++ b/src/gallium/drivers/vc4/vc4_emit.c @@ -36,11 +36,17 @@ vc4_emit_state(struct pipe_context *pctx) cl_u16(&vc4->bcl, vc4->scissor.maxy - vc4->scissor.miny); } - if (vc4->dirty & VC4_DIRTY_RASTERIZER) { + if (vc4->dirty & (VC4_DIRTY_RASTERIZER | VC4_DIRTY_ZSA)) { cl_u8(&vc4->bcl, VC4_PACKET_CONFIGURATION_BITS); - cl_u8(&vc4->bcl, vc4->rasterizer->config_bits[0]); - cl_u8(&vc4->bcl, vc4->rasterizer->config_bits[1]); - cl_u8(&vc4->bcl, vc4->rasterizer->config_bits[2]); + cl_u8(&vc4->bcl, + vc4->rasterizer->config_bits[0] | + vc4->zsa->config_bits[0]); + cl_u8(&vc4->bcl, + vc4->rasterizer->config_bits[1] | + vc4->zsa->config_bits[1]); + cl_u8(&vc4->bcl, + vc4->rasterizer->config_bits[2] | + vc4->zsa->config_bits[2]); } if (vc4->dirty & VC4_DIRTY_VIEWPORT) { diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index b7ed1bf60a0..963e6eb3b44 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -67,6 +67,7 @@ struct vc4_key { struct vc4_fs_key { struct vc4_key base; enum pipe_format color_format; + bool depth_enabled; }; struct vc4_vs_key { @@ -738,6 +739,11 @@ emit_frag_end(struct tgsi_to_qir *trans) trans->outputs[format_desc->swizzle[3]], }; + if (trans->fs_key->depth_enabled) { + qir_emit(c, qir_inst(QOP_TLB_PASSTHROUGH_Z_WRITE, c->undef, + c->undef, c->undef)); + } + qir_emit(c, qir_inst4(QOP_PACK_COLORS, t, swizzled_outputs[0], swizzled_outputs[1], @@ -1001,6 +1007,8 @@ vc4_update_compiled_fs(struct vc4_context *vc4) if (vc4->framebuffer.cbufs[0]) key->color_format = vc4->framebuffer.cbufs[0]->format; + key->depth_enabled = vc4->zsa->base.depth.enabled; + vc4->prog.fs = util_hash_table_get(vc4->fs_cache, key); if (vc4->prog.fs) return; diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c index 0499eb9406f..9462da58660 100644 --- a/src/gallium/drivers/vc4/vc4_qir.c +++ b/src/gallium/drivers/vc4/vc4_qir.c @@ -61,6 +61,7 @@ static const struct qir_op_info qir_op_info[] = { [QOP_PACK_SCALED] = { "pack_scaled", 1, 2 }, [QOP_VPM_WRITE] = { "vpm_write", 0, 1, true }, [QOP_VPM_READ] = { "vpm_read", 0, 1, true }, + [QOP_TLB_PASSTHROUGH_Z_WRITE] = { "tlb_passthrough_z", 0, 0, true }, [QOP_TLB_COLOR_WRITE] = { "tlb_color", 0, 1, true }, [QOP_VARY_ADD_C] = { "vary_add_c", 1, 1 }, diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h index 1b450cac8c5..b578e7e0b9e 100644 --- a/src/gallium/drivers/vc4/vc4_qir.h +++ b/src/gallium/drivers/vc4/vc4_qir.h @@ -70,6 +70,7 @@ enum qop { QOP_PACK_COLORS, QOP_VPM_WRITE, QOP_VPM_READ, + QOP_TLB_PASSTHROUGH_Z_WRITE, QOP_TLB_COLOR_WRITE, QOP_VARY_ADD_C, diff --git a/src/gallium/drivers/vc4/vc4_qpu_defines.h b/src/gallium/drivers/vc4/vc4_qpu_defines.h index 224d9aaa44d..f7ad01c6163 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_defines.h +++ b/src/gallium/drivers/vc4/vc4_qpu_defines.h @@ -67,6 +67,7 @@ enum qpu_op_mul { }; enum qpu_raddr { + QPU_R_FRAG_PAYLOAD_ZW = 15, /* W for A file, Z for B file */ /* 0-31 are the plain regfile a or b fields */ QPU_R_UNIF = 32, QPU_R_VARY = 35, diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c index 9d55390c67f..33abf6dcde4 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_emit.c +++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c @@ -213,6 +213,8 @@ vc4_generate_code(struct qcompile *c) if (qinst->src[i].file == QFILE_TEMP) reg_uses_remaining[qinst->src[i].index]++; } + if (qinst->op == QOP_TLB_PASSTHROUGH_Z_WRITE) + reg_in_use[3 + 32 + QPU_R_FRAG_PAYLOAD_ZW] = true; } switch (c->stage) { @@ -458,6 +460,12 @@ vc4_generate_code(struct qcompile *c) break; + case QOP_TLB_PASSTHROUGH_Z_WRITE: + queue(c, qpu_inst(qpu_a_MOV(qpu_ra(QPU_W_TLB_Z), + qpu_rb(QPU_R_FRAG_PAYLOAD_ZW)), + qpu_m_NOP())); + break; + case QOP_TLB_COLOR_WRITE: queue(c, qpu_inst(qpu_a_MOV(qpu_tlbc(), src[0]), diff --git a/src/gallium/drivers/vc4/vc4_state.c b/src/gallium/drivers/vc4/vc4_state.c index ab1e8be6ee3..69fd2187720 100644 --- a/src/gallium/drivers/vc4/vc4_state.c +++ b/src/gallium/drivers/vc4/vc4_state.c @@ -107,8 +107,6 @@ vc4_create_rasterizer_state(struct pipe_context *pctx, if (cso->offset_tri) so->config_bits[0] |= VC4_CONFIG_BITS_ENABLE_DEPTH_OFFSET; - so->config_bits[2] |= VC4_CONFIG_BITS_EARLY_Z_UPDATE; - return so; } @@ -124,7 +122,26 @@ static void * vc4_create_depth_stencil_alpha_state(struct pipe_context *pctx, const struct pipe_depth_stencil_alpha_state *cso) { - return vc4_generic_cso_state_create(cso, sizeof(*cso)); + struct vc4_depth_stencil_alpha_state *so; + + so = CALLOC_STRUCT(vc4_depth_stencil_alpha_state); + if (!so) + return NULL; + + so->base = *cso; + + if (cso->depth.enabled) { + if (cso->depth.writemask) { + so->config_bits[1] |= VC4_CONFIG_BITS_Z_UPDATE; + } + so->config_bits[1] |= (cso->depth.func << + VC4_CONFIG_BITS_DEPTH_FUNC_SHIFT); + } else { + so->config_bits[1] |= (PIPE_FUNC_ALWAYS << + VC4_CONFIG_BITS_DEPTH_FUNC_SHIFT); + } + + return so; } static void -- 2.30.2