freedreno/ir3: Track whether shader needs derivatives
authorKristian H. Kristensen <hoegsberg@chromium.org>
Mon, 25 Mar 2019 21:12:41 +0000 (14:12 -0700)
committerKristian H. Kristensen <hoegsberg@chromium.org>
Tue, 26 Mar 2019 01:36:48 +0000 (18:36 -0700)
In 1088b788 ("freedreno/ir3: find # of samplers from uniform vars") we
started counting number of samplers based on the uniform vars instead
of number of cat5 instructions.  We used the number of samplers to
determine whether to enable derivatives, but when we only use
derivatives and no samplers, that now breaks.  Track whether we need
derivatives explicitly and use that to enable the state.

Fixes: 1088b788 ("freedreno/ir3: find # of samplers from uniform vars")
Signed-off-by: Kristian H. Kristensen <hoegsberg@chromium.org>
Reviewed-by: Rob Clark <robdclark@gmail.com>
src/freedreno/ir3/ir3.h
src/freedreno/ir3/ir3_compiler_nir.c
src/freedreno/ir3/ir3_legalize.c
src/freedreno/ir3/ir3_shader.h
src/gallium/drivers/freedreno/a6xx/fd6_compute.c
src/gallium/drivers/freedreno/a6xx/fd6_program.c

index 245320fe2fdcdf52c24fd05e7a62dc656487c2d1..6e30f74d4ab6411b13b87810ffc1801ad79505e1 100644 (file)
@@ -1028,7 +1028,7 @@ int ir3_ra(struct ir3 *ir3, gl_shader_stage type,
                bool frag_coord, bool frag_face);
 
 /* legalize: */
-void ir3_legalize(struct ir3 *ir, bool *has_ssbo, int *max_bary);
+void ir3_legalize(struct ir3 *ir, bool *has_ssbo, bool *need_pixlod, int *max_bary);
 
 /* ************************************************************************* */
 /* instruction helpers */
index 402da13792bd8951dac2a48896bc3853ac1a110d..57a8758140b2881fab38c3527013b286928be4ab 100644 (file)
@@ -2798,7 +2798,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
        /* We need to do legalize after (for frag shader's) the "bary.f"
         * offsets (inloc) have been assigned.
         */
-       ir3_legalize(ir, &so->has_ssbo, &max_bary);
+       ir3_legalize(ir, &so->has_ssbo, &so->need_pixlod, &max_bary);
 
        if (ir3_shader_debug & IR3_DBG_OPTMSGS) {
                printf("AFTER LEGALIZE:\n");
index f015c6fede8813e1806cb13bddd8c9e7546a8d3c..cb9a3f97292357143906cd7c2681a58ac9397ff7 100644 (file)
@@ -42,6 +42,7 @@
 struct ir3_legalize_ctx {
        struct ir3_compiler *compiler;
        bool has_ssbo;
+       bool need_pixlod;
        int max_bary;
 };
 
@@ -218,6 +219,7 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
 
                if (is_tex(n)) {
                        regmask_set(&state->needs_sy, n->regs[0]);
+                       ctx->need_pixlod = true;
                } else if (n->opc == OPC_RESINFO) {
                        regmask_set(&state->needs_ss, n->regs[0]);
                        ir3_NOP(block)->flags |= IR3_INSTR_SS;
@@ -471,7 +473,7 @@ mark_convergence_points(struct ir3 *ir)
 }
 
 void
-ir3_legalize(struct ir3 *ir, bool *has_ssbo, int *max_bary)
+ir3_legalize(struct ir3 *ir, bool *has_ssbo, bool *need_pixlod, int *max_bary)
 {
        struct ir3_legalize_ctx *ctx = rzalloc(ir, struct ir3_legalize_ctx);
        bool progress;
@@ -493,6 +495,7 @@ ir3_legalize(struct ir3 *ir, bool *has_ssbo, int *max_bary)
        } while (progress);
 
        *has_ssbo = ctx->has_ssbo;
+       *need_pixlod = ctx->need_pixlod;
        *max_bary = ctx->max_bary;
 
        do {
index d598dd76eee8fc94c9d96be884368a7a291a2c2a..647651c03b0cc460df6f1fd516d1397086755147 100644 (file)
@@ -431,6 +431,9 @@ struct ir3_shader_variant {
        /* do we have one or more SSBO instructions: */
        bool has_ssbo;
 
+       /* do we need derivatives: */
+       bool need_pixlod;
+
        /* do we have kill, image write, etc (which prevents early-z): */
        bool no_earlyz;
 
index 1219d7ad9b365e3f0a935bc3585eaccb0a8aee57..f3bc0ed231d5836e0e6afa6f06ac82d088d45310 100644 (file)
@@ -96,7 +96,7 @@ cs_program_emit(struct fd_ringbuffer *ring, struct ir3_shader_variant *v,
                        A6XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT(i->max_reg + 1) |
                        A6XX_SP_CS_CTRL_REG0_MERGEDREGS |
                        A6XX_SP_CS_CTRL_REG0_BRANCHSTACK(v->branchstack) |
-                       COND(v->num_samp > 0, A6XX_SP_CS_CTRL_REG0_PIXLODENABLE));
+                       COND(v->need_pixlod, A6XX_SP_CS_CTRL_REG0_PIXLODENABLE));
 
        OUT_PKT4(ring, REG_A6XX_SP_CS_UNKNOWN_A9B1, 1);
        OUT_RING(ring, 0x41);
index 668b10cccc9426c237854f3001a4d40d70351656..c20472f63c26377bee79f12b64316fcc2bdccd23 100644 (file)
@@ -396,7 +396,7 @@ setup_stateobj(struct fd_ringbuffer *ring,
                        A6XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(s[VS].i->max_reg + 1) |
                        A6XX_SP_VS_CTRL_REG0_MERGEDREGS |
                        A6XX_SP_VS_CTRL_REG0_BRANCHSTACK(s[VS].v->branchstack) |
-                       COND(s[VS].v->num_samp > 0, A6XX_SP_VS_CTRL_REG0_PIXLODENABLE));
+                       COND(s[VS].v->need_pixlod, A6XX_SP_VS_CTRL_REG0_PIXLODENABLE));
 
        struct ir3_shader_linkage l = {0};
        ir3_link_shaders(&l, s[VS].v, s[FS].v);
@@ -518,7 +518,7 @@ setup_stateobj(struct fd_ringbuffer *ring,
                        A6XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(s[FS].i->max_reg + 1) |
                        A6XX_SP_FS_CTRL_REG0_MERGEDREGS |
                        A6XX_SP_FS_CTRL_REG0_BRANCHSTACK(s[FS].v->branchstack) |
-                       COND(s[FS].v->num_samp > 0, A6XX_SP_FS_CTRL_REG0_PIXLODENABLE));
+                       COND(s[FS].v->need_pixlod, A6XX_SP_FS_CTRL_REG0_PIXLODENABLE));
 
        OUT_PKT4(ring, REG_A6XX_SP_UNKNOWN_A982, 1);
        OUT_RING(ring, 0);        /* XXX */