a3xx: make use of software clipping when hw can't handle it
authorIlia Mirkin <imirkin@alum.mit.edu>
Sat, 20 Aug 2016 04:14:43 +0000 (00:14 -0400)
committerIlia Mirkin <imirkin@alum.mit.edu>
Sat, 3 Sep 2016 23:58:42 +0000 (19:58 -0400)
The hw clipper only handles up to 6 UCPs. If there are more than 6 UCPs,
or a clip vertex, or clip distances are in use, then we must use the
fallback discard-based clipping from the frag shader.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Cc: mesa-stable@lists.freedesktop.org
src/gallium/drivers/freedreno/a3xx/fd3_draw.c
src/gallium/drivers/freedreno/a3xx/fd3_emit.c
src/gallium/drivers/freedreno/a3xx/fd3_program.c
src/gallium/drivers/freedreno/a3xx/fd3_program.h
src/gallium/drivers/freedreno/ir3/ir3_shader.c
src/gallium/drivers/freedreno/ir3/ir3_shader.h

index a1594b641a43f49417b382303960b0957c10f110..d26786f6499074ca5d24c47736fd9bd106c33a2f 100644 (file)
@@ -156,6 +156,9 @@ fd3_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info)
                .sprite_coord_mode = ctx->rasterizer->sprite_coord_mode,
        };
 
+       if (fd3_needs_manual_clipping(ctx->prog.vp, ctx->rasterizer))
+               emit.key.ucp_enables = ctx->rasterizer->clip_plane_enable;
+
        fixup_shader_state(ctx, &emit.key);
 
        unsigned dirty = ctx->dirty;
index e66836b43c29a3dddf2ab758245fb31e2c8ca500..7945184d8f0f6cb293f12cb761c0ab7528716aab 100644 (file)
@@ -571,20 +571,24 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
        if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) {
                uint32_t val = fd3_rasterizer_stateobj(ctx->rasterizer)
                                ->gras_cl_clip_cntl;
+               uint8_t planes = ctx->rasterizer->clip_plane_enable;
                val |= COND(fp->writes_pos, A3XX_GRAS_CL_CLIP_CNTL_ZCLIP_DISABLE);
                val |= COND(fp->frag_coord, A3XX_GRAS_CL_CLIP_CNTL_ZCOORD |
                                A3XX_GRAS_CL_CLIP_CNTL_WCOORD);
-               /* TODO only use if prog doesn't use clipvertex/clipdist */
-               val |= A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES(
-                               MIN2(util_bitcount(ctx->rasterizer->clip_plane_enable), 6));
+               if (!emit->key.ucp_enables)
+                       val |= A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES(
+                                       MIN2(util_bitcount(planes), 6));
                OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
                OUT_RING(ring, val);
        }
 
-       if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_UCP)) {
+       if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_PROG | FD_DIRTY_UCP)) {
                uint32_t planes = ctx->rasterizer->clip_plane_enable;
                int count = 0;
 
+               if (emit->key.ucp_enables)
+                       planes = 0;
+
                while (planes && count < 6) {
                        int i = ffs(planes) - 1;
 
index 485a4da2c1a4e7a7adf85cdf015924941b611cd0..3146dc5d062bea1cb72b6fc3f835e023ce8f9951 100644 (file)
@@ -28,6 +28,7 @@
 
 #include "pipe/p_state.h"
 #include "util/u_string.h"
+#include "util/u_math.h"
 #include "util/u_memory.h"
 #include "util/u_inlines.h"
 #include "util/u_format.h"
@@ -85,6 +86,20 @@ fd3_vp_state_delete(struct pipe_context *pctx, void *hwcso)
        delete_shader_stateobj(so);
 }
 
+bool
+fd3_needs_manual_clipping(const struct fd3_shader_stateobj *so,
+                                                 const struct pipe_rasterizer_state *rast)
+{
+       uint64_t outputs = ir3_shader_outputs(so->shader);
+
+       return (!rast->depth_clip ||
+                       util_bitcount(rast->clip_plane_enable) > 6 ||
+                       outputs & ((1ULL << VARYING_SLOT_CLIP_VERTEX) |
+                                          (1ULL << VARYING_SLOT_CLIP_DIST0) |
+                                          (1ULL << VARYING_SLOT_CLIP_DIST1)));
+}
+
+
 static void
 emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so)
 {
index b3fcc0c68c9bd4620fb41822a5f8356d40b55031..b95df4cc6b37e05d8ad11b4f3aabc6b0398342af 100644 (file)
@@ -44,4 +44,7 @@ void fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit,
 
 void fd3_prog_init(struct pipe_context *pctx);
 
+bool fd3_needs_manual_clipping(const struct fd3_shader_stateobj *,
+                                                          const struct pipe_rasterizer_state *);
+
 #endif /* FD3_PROGRAM_H_ */
index ac48132026c43d044799de75ee3565b3fa031f25..76460d9231a4524c0f09c0dcdd0131f2c8cd5add 100644 (file)
@@ -469,6 +469,12 @@ ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin)
        debug_printf("\n");
 }
 
+uint64_t
+ir3_shader_outputs(const struct ir3_shader *so)
+{
+       return so->nir->info.outputs_written;
+}
+
 /* This has to reach into the fd_context a bit more than the rest of
  * ir3, but it needs to be aligned with the compiler, so both agree
  * on which const regs hold what.  And the logic is identical between
index b773609249f9d9b87129c10ed39c6c91399cb37e..8c9483e1bc751dc67a094a93b2598627974f9fec 100644 (file)
@@ -272,6 +272,7 @@ void ir3_shader_destroy(struct ir3_shader *shader);
 struct ir3_shader_variant * ir3_shader_variant(struct ir3_shader *shader,
                struct ir3_shader_key key, struct pipe_debug_callback *debug);
 void ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin);
+uint64_t ir3_shader_outputs(const struct ir3_shader *so);
 
 struct fd_ringbuffer;
 struct fd_context;