From 1a51c4a87ea9202af90ccb28bd697f0df753f587 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Fri, 12 Oct 2018 15:10:46 -0400 Subject: [PATCH] freedreno/a6xx: a bit more state emit cleanup Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/a6xx/fd6_draw.c | 2 +- src/gallium/drivers/freedreno/a6xx/fd6_emit.c | 44 +++++-------------- src/gallium/drivers/freedreno/a6xx/fd6_emit.h | 3 +- .../drivers/freedreno/a6xx/fd6_program.c | 15 ++++++- 4 files changed, 27 insertions(+), 37 deletions(-) diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_draw.c b/src/gallium/drivers/freedreno/a6xx/fd6_draw.c index 0061a6d094a..f758645548e 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_draw.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_draw.c @@ -133,7 +133,7 @@ draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring, const struct pipe_draw_info *info = emit->info; enum pc_di_primtype primtype = ctx->primtypes[info->mode]; - fd6_emit_state(ctx, ring, emit); + fd6_emit_state(ring, emit); if (emit->dirty & (FD_DIRTY_VTXBUF | FD_DIRTY_VTXSTATE)) fd6_emit_vertex_bufs(ring, emit); diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c index 1591c202580..e5c568fd6b5 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c @@ -584,9 +584,9 @@ fd6_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd6_emit *emit) } void -fd6_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, - struct fd6_emit *emit) +fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit) { + struct fd_context *ctx = emit->ctx; struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer; const struct fd6_program_state *prog = fd6_emit_get_prog(emit); const struct ir3_shader_variant *vp = emit->vs; @@ -608,9 +608,12 @@ fd6_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_PKT4(ring, REG_A6XX_RB_STENCIL_CONTROL, 1); OUT_RING(ring, zsa->rb_stencil_control); + + OUT_PKT4(ring, REG_A6XX_RB_DEPTH_CNTL, 1); + OUT_RING(ring, zsa->rb_depth_cntl); } - if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_BLEND | FD_DIRTY_PROG)) { + if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_PROG)) { struct fd6_zsa_stateobj *zsa = fd6_zsa_stateobj(ctx->zsa); if (pfb->zsbuf) { @@ -644,20 +647,6 @@ fd6_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_RING(ring, zsa->rb_stencilwrmask); } - if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) { - struct fd6_zsa_stateobj *zsa = fd6_zsa_stateobj(ctx->zsa); - bool fragz = fp->has_kill | fp->writes_pos; - - OUT_PKT4(ring, REG_A6XX_RB_DEPTH_CNTL, 1); - OUT_RING(ring, zsa->rb_depth_cntl); - - OUT_PKT4(ring, REG_A6XX_RB_DEPTH_PLANE_CNTL, 1); - OUT_RING(ring, COND(fragz, A6XX_RB_DEPTH_PLANE_CNTL_FRAG_WRITES_Z)); - - OUT_PKT4(ring, REG_A6XX_GRAS_SU_DEPTH_PLANE_CNTL, 1); - OUT_RING(ring, COND(fragz, A6XX_GRAS_SU_DEPTH_PLANE_CNTL_FRAG_WRITES_Z)); - } - /* NOTE: scissor enabled bit is part of rasterizer state: */ if (dirty & (FD_DIRTY_SCISSOR | FD_DIRTY_RASTERIZER)) { struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx); @@ -737,12 +726,7 @@ fd6_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, #endif } - /* note: must come after program emit.. because there is some overlap - * in registers, ex. PC_PRIMITIVE_CNTL and we rely on some cached - * values from fd6_program_emit() to avoid having to re-emit the prog - * every time rast state changes. - * - * Since the primitive restart state is not part of a tracked object, we + /* Since the primitive restart state is not part of a tracked object, we * re-emit this register every time. */ if (emit->info && ctx->rasterizer) { @@ -763,21 +747,16 @@ fd6_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, } if (dirty & (FD_DIRTY_FRAMEBUFFER | FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) { - uint32_t posz_regid = ir3_find_output_regid(fp, FRAG_RESULT_DEPTH); unsigned nr = pfb->nr_cbufs; - if (emit->binning_pass) - nr = 0; - else if (ctx->rasterizer->rasterizer_discard) + if (ctx->rasterizer->rasterizer_discard) nr = 0; OUT_PKT4(ring, REG_A6XX_RB_FS_OUTPUT_CNTL0, 2); OUT_RING(ring, COND(fp->writes_pos, A6XX_RB_FS_OUTPUT_CNTL0_FRAG_WRITES_Z)); OUT_RING(ring, A6XX_RB_FS_OUTPUT_CNTL1_MRT(nr)); - OUT_PKT4(ring, REG_A6XX_SP_FS_OUTPUT_CNTL0, 2); - OUT_RING(ring, A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID(posz_regid) | - 0xfcfc0000); + OUT_PKT4(ring, REG_A6XX_SP_FS_OUTPUT_CNTL1, 1); OUT_RING(ring, A6XX_SP_FS_OUTPUT_CNTL1_MRT(nr)); } @@ -794,14 +773,13 @@ fd6_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, fd_ringbuffer_del(vsconstobj); } - if ((ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & DIRTY_CONST) && - !emit->binning_pass) { + if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & DIRTY_CONST) { struct fd_ringbuffer *fsconstobj = fd_ringbuffer_new_flags(ctx->pipe, 0x1000, FD_RINGBUFFER_OBJECT | FD_RINGBUFFER_STREAMING); ir3_emit_fs_consts(fp, fsconstobj, ctx); - fd6_emit_add_group(emit, fsconstobj, FD6_GROUP_FS_CONST, 0x7); + fd6_emit_add_group(emit, fsconstobj, FD6_GROUP_FS_CONST, 0x6); fd_ringbuffer_del(fsconstobj); } diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.h b/src/gallium/drivers/freedreno/a6xx/fd6_emit.h index 02c41f03ce7..8130b86a0b8 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.h +++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.h @@ -171,8 +171,7 @@ bool fd6_emit_textures(struct fd_pipe *pipe, struct fd_ringbuffer *ring, void fd6_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd6_emit *emit); -void fd6_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, - struct fd6_emit *emit); +void fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit); void fd6_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring, struct ir3_shader_variant *cp); diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_program.c b/src/gallium/drivers/freedreno/a6xx/fd6_program.c index 9fff9d9f7b0..bc79e5b0621 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_program.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_program.c @@ -303,7 +303,7 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd6_program_state *state, bool binning_pass) { struct stage s[MAX_STAGES]; - uint32_t pos_regid, psize_regid, color_regid[8]; + uint32_t pos_regid, psize_regid, color_regid[8], posz_regid; uint32_t face_regid, coord_regid, zwcoord_regid, samp_id_regid, samp_mask_regid; uint32_t vcoord_regid, vertex_regid, instance_regid; enum a3xx_threadsize fssz; @@ -340,6 +340,7 @@ setup_stateobj(struct fd_ringbuffer *ring, coord_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_FRAG_COORD); zwcoord_regid = (coord_regid == regid(63,0)) ? regid(63,0) : (coord_regid + 2); vcoord_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_VARYING_COORD); + posz_regid = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DEPTH); /* we could probably divide this up into things that need to be * emitted if frag-prog is dirty vs if vert-prog is dirty.. @@ -384,6 +385,10 @@ setup_stateobj(struct fd_ringbuffer *ring, A6XX_SP_FS_CONFIG_NSAMP(s[FS].v->num_samp)); /* SP_FS_CONFIG */ OUT_RING(ring, s[FS].instrlen); /* SP_FS_INSTRLEN */ + OUT_PKT4(ring, REG_A6XX_SP_FS_OUTPUT_CNTL0, 1); + OUT_RING(ring, A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID(posz_regid) | + 0xfcfc0000); + OUT_PKT4(ring, REG_A6XX_HLSQ_VS_CNTL, 4); OUT_RING(ring, A6XX_HLSQ_VS_CNTL_CONSTLEN(s[VS].constlen) | 0x100); /* HLSQ_VS_CONSTLEN */ OUT_RING(ring, A6XX_HLSQ_HS_CNTL_CONSTLEN(s[HS].constlen)); /* HLSQ_HS_CONSTLEN */ @@ -607,6 +612,14 @@ setup_stateobj(struct fd_ringbuffer *ring, OUT_RING(ring, 0x000000fc); /* VFD_CONTROL_4 */ OUT_RING(ring, 0x0000fcfc); /* VFD_CONTROL_5 */ OUT_RING(ring, 0x00000000); /* VFD_CONTROL_6 */ + + bool fragz = s[FS].v->has_kill | s[FS].v->writes_pos; + + OUT_PKT4(ring, REG_A6XX_RB_DEPTH_PLANE_CNTL, 1); + OUT_RING(ring, COND(fragz, A6XX_RB_DEPTH_PLANE_CNTL_FRAG_WRITES_Z)); + + OUT_PKT4(ring, REG_A6XX_GRAS_SU_DEPTH_PLANE_CNTL, 1); + OUT_RING(ring, COND(fragz, A6XX_GRAS_SU_DEPTH_PLANE_CNTL_FRAG_WRITES_Z)); } /* emits the program state which is not part of the stateobj because of -- 2.30.2