In
1088b788 ("freedreno/ir3: find # of samplers from uniform vars") we
started counting number of samplers based on the uniform vars instead
of number of cat5 instructions. We used the number of samplers to
determine whether to enable derivatives, but when we only use
derivatives and no samplers, that now breaks. Track whether we need
derivatives explicitly and use that to enable the state.
Fixes: 1088b788 ("freedreno/ir3: find # of samplers from uniform vars")
Signed-off-by: Kristian H. Kristensen <hoegsberg@chromium.org>
Reviewed-by: Rob Clark <robdclark@gmail.com>
bool frag_coord, bool frag_face);
/* legalize: */
-void ir3_legalize(struct ir3 *ir, bool *has_ssbo, int *max_bary);
+void ir3_legalize(struct ir3 *ir, bool *has_ssbo, bool *need_pixlod, int *max_bary);
/* ************************************************************************* */
/* instruction helpers */
/* We need to do legalize after (for frag shader's) the "bary.f"
* offsets (inloc) have been assigned.
*/
- ir3_legalize(ir, &so->has_ssbo, &max_bary);
+ ir3_legalize(ir, &so->has_ssbo, &so->need_pixlod, &max_bary);
if (ir3_shader_debug & IR3_DBG_OPTMSGS) {
printf("AFTER LEGALIZE:\n");
struct ir3_legalize_ctx {
struct ir3_compiler *compiler;
bool has_ssbo;
+ bool need_pixlod;
int max_bary;
};
if (is_tex(n)) {
regmask_set(&state->needs_sy, n->regs[0]);
+ ctx->need_pixlod = true;
} else if (n->opc == OPC_RESINFO) {
regmask_set(&state->needs_ss, n->regs[0]);
ir3_NOP(block)->flags |= IR3_INSTR_SS;
}
void
-ir3_legalize(struct ir3 *ir, bool *has_ssbo, int *max_bary)
+ir3_legalize(struct ir3 *ir, bool *has_ssbo, bool *need_pixlod, int *max_bary)
{
struct ir3_legalize_ctx *ctx = rzalloc(ir, struct ir3_legalize_ctx);
bool progress;
} while (progress);
*has_ssbo = ctx->has_ssbo;
+ *need_pixlod = ctx->need_pixlod;
*max_bary = ctx->max_bary;
do {
/* do we have one or more SSBO instructions: */
bool has_ssbo;
+ /* do we need derivatives: */
+ bool need_pixlod;
+
/* do we have kill, image write, etc (which prevents early-z): */
bool no_earlyz;
A6XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT(i->max_reg + 1) |
A6XX_SP_CS_CTRL_REG0_MERGEDREGS |
A6XX_SP_CS_CTRL_REG0_BRANCHSTACK(v->branchstack) |
- COND(v->num_samp > 0, A6XX_SP_CS_CTRL_REG0_PIXLODENABLE));
+ COND(v->need_pixlod, A6XX_SP_CS_CTRL_REG0_PIXLODENABLE));
OUT_PKT4(ring, REG_A6XX_SP_CS_UNKNOWN_A9B1, 1);
OUT_RING(ring, 0x41);
A6XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(s[VS].i->max_reg + 1) |
A6XX_SP_VS_CTRL_REG0_MERGEDREGS |
A6XX_SP_VS_CTRL_REG0_BRANCHSTACK(s[VS].v->branchstack) |
- COND(s[VS].v->num_samp > 0, A6XX_SP_VS_CTRL_REG0_PIXLODENABLE));
+ COND(s[VS].v->need_pixlod, A6XX_SP_VS_CTRL_REG0_PIXLODENABLE));
struct ir3_shader_linkage l = {0};
ir3_link_shaders(&l, s[VS].v, s[FS].v);
A6XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(s[FS].i->max_reg + 1) |
A6XX_SP_FS_CTRL_REG0_MERGEDREGS |
A6XX_SP_FS_CTRL_REG0_BRANCHSTACK(s[FS].v->branchstack) |
- COND(s[FS].v->num_samp > 0, A6XX_SP_FS_CTRL_REG0_PIXLODENABLE));
+ COND(s[FS].v->need_pixlod, A6XX_SP_FS_CTRL_REG0_PIXLODENABLE));
OUT_PKT4(ring, REG_A6XX_SP_UNKNOWN_A982, 1);
OUT_RING(ring, 0); /* XXX */