From 83d7230fd5ab69e7e111e3a02e604e65922fb171 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Sat, 20 Aug 2016 00:14:43 -0400 Subject: [PATCH] a3xx: make use of software clipping when hw can't handle it The hw clipper only handles up to 6 UCPs. If there are more than 6 UCPs, or a clip vertex, or clip distances are in use, then we must use the fallback discard-based clipping from the frag shader. Signed-off-by: Ilia Mirkin Cc: mesa-stable@lists.freedesktop.org --- src/gallium/drivers/freedreno/a3xx/fd3_draw.c | 3 +++ src/gallium/drivers/freedreno/a3xx/fd3_emit.c | 12 ++++++++---- src/gallium/drivers/freedreno/a3xx/fd3_program.c | 15 +++++++++++++++ src/gallium/drivers/freedreno/a3xx/fd3_program.h | 3 +++ src/gallium/drivers/freedreno/ir3/ir3_shader.c | 6 ++++++ src/gallium/drivers/freedreno/ir3/ir3_shader.h | 1 + 6 files changed, 36 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c index a1594b641a4..d26786f6499 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c @@ -156,6 +156,9 @@ fd3_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info) .sprite_coord_mode = ctx->rasterizer->sprite_coord_mode, }; + if (fd3_needs_manual_clipping(ctx->prog.vp, ctx->rasterizer)) + emit.key.ucp_enables = ctx->rasterizer->clip_plane_enable; + fixup_shader_state(ctx, &emit.key); unsigned dirty = ctx->dirty; diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c index e66836b43c2..7945184d8f0 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c @@ -571,20 +571,24 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) { uint32_t val = fd3_rasterizer_stateobj(ctx->rasterizer) ->gras_cl_clip_cntl; + uint8_t planes = ctx->rasterizer->clip_plane_enable; val |= COND(fp->writes_pos, A3XX_GRAS_CL_CLIP_CNTL_ZCLIP_DISABLE); val |= COND(fp->frag_coord, A3XX_GRAS_CL_CLIP_CNTL_ZCOORD | A3XX_GRAS_CL_CLIP_CNTL_WCOORD); - /* TODO only use if prog doesn't use clipvertex/clipdist */ - val |= A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES( - MIN2(util_bitcount(ctx->rasterizer->clip_plane_enable), 6)); + if (!emit->key.ucp_enables) + val |= A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES( + MIN2(util_bitcount(planes), 6)); OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1); OUT_RING(ring, val); } - if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_UCP)) { + if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_PROG | FD_DIRTY_UCP)) { uint32_t planes = ctx->rasterizer->clip_plane_enable; int count = 0; + if (emit->key.ucp_enables) + planes = 0; + while (planes && count < 6) { int i = ffs(planes) - 1; diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c index 485a4da2c1a..3146dc5d062 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c @@ -28,6 +28,7 @@ #include "pipe/p_state.h" #include "util/u_string.h" +#include "util/u_math.h" #include "util/u_memory.h" #include "util/u_inlines.h" #include "util/u_format.h" @@ -85,6 +86,20 @@ fd3_vp_state_delete(struct pipe_context *pctx, void *hwcso) delete_shader_stateobj(so); } +bool +fd3_needs_manual_clipping(const struct fd3_shader_stateobj *so, + const struct pipe_rasterizer_state *rast) +{ + uint64_t outputs = ir3_shader_outputs(so->shader); + + return (!rast->depth_clip || + util_bitcount(rast->clip_plane_enable) > 6 || + outputs & ((1ULL << VARYING_SLOT_CLIP_VERTEX) | + (1ULL << VARYING_SLOT_CLIP_DIST0) | + (1ULL << VARYING_SLOT_CLIP_DIST1))); +} + + static void emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so) { diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.h b/src/gallium/drivers/freedreno/a3xx/fd3_program.h index b3fcc0c68c9..b95df4cc6b3 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_program.h +++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.h @@ -44,4 +44,7 @@ void fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit, void fd3_prog_init(struct pipe_context *pctx); +bool fd3_needs_manual_clipping(const struct fd3_shader_stateobj *, + const struct pipe_rasterizer_state *); + #endif /* FD3_PROGRAM_H_ */ diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.c b/src/gallium/drivers/freedreno/ir3/ir3_shader.c index ac48132026c..76460d9231a 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_shader.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.c @@ -469,6 +469,12 @@ ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin) debug_printf("\n"); } +uint64_t +ir3_shader_outputs(const struct ir3_shader *so) +{ + return so->nir->info.outputs_written; +} + /* This has to reach into the fd_context a bit more than the rest of * ir3, but it needs to be aligned with the compiler, so both agree * on which const regs hold what. And the logic is identical between diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.h b/src/gallium/drivers/freedreno/ir3/ir3_shader.h index b773609249f..8c9483e1bc7 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h +++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h @@ -272,6 +272,7 @@ void ir3_shader_destroy(struct ir3_shader *shader); struct ir3_shader_variant * ir3_shader_variant(struct ir3_shader *shader, struct ir3_shader_key key, struct pipe_debug_callback *debug); void ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin); +uint64_t ir3_shader_outputs(const struct ir3_shader *so); struct fd_ringbuffer; struct fd_context; -- 2.30.2