From: Rob Clark Date: Fri, 3 Oct 2014 14:02:31 +0000 (-0400) Subject: freedreno/ir3: fix lockups with lame FRAG shaders X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=af4d08839581c2372f17f75f1ad0fd1284ea7d8b;p=mesa.git freedreno/ir3: fix lockups with lame FRAG shaders Shaders like: FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL TEMP[0], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: TEX TEMP[0], IN[0].xyyy, SAMP[0], 2D 1: MOV OUT[0], IMM[0].xyxx 2: END cause unhappyness. They have an IN[], but once this is compiled the useless TEX instruction goes away. Leaving a varying that is never fetched, which makes the hw unhappy. In the process fix a signed vs unsigned compare. If the vertex shader has max_reg=-1, MAX2() vs an unsigned would not give the desired result. Signed-off-by: Rob Clark --- diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h index c73e9cc8d7a..b92a57a43f8 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3.h +++ b/src/gallium/drivers/freedreno/ir3/ir3.h @@ -413,7 +413,7 @@ int ir3_block_sched(struct ir3_block *block); /* register assignment: */ int ir3_block_ra(struct ir3_block *block, enum shader_t type, bool half_precision, bool frag_coord, bool frag_face, - bool *has_samp); + bool *has_samp, int *max_bary); #ifndef ARRAY_SIZE # define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c index 66f78a80f6d..80676830dd7 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c @@ -3068,7 +3068,7 @@ ir3_compile_shader(struct ir3_shader_variant *so, struct ir3_block *block; struct ir3_instruction **inputs; unsigned i, j, actual_in; - int ret = 0; + int ret = 0, max_bary; assert(!so->ir); @@ -3183,7 +3183,7 @@ ir3_compile_shader(struct ir3_shader_variant *so, } ret = ir3_block_ra(block, so->type, key.half_precision, - so->frag_coord, so->frag_face, &so->has_samp); + so->frag_coord, so->frag_face, &so->has_samp, &max_bary); if (ret) { DBG("RA failed!"); goto out; @@ -3230,6 +3230,8 @@ ir3_compile_shader(struct ir3_shader_variant *so, */ if (so->type == SHADER_VERTEX) so->total_in = actual_in; + else + so->total_in = align(max_bary + 1, 4); out: if (ret) { diff --git a/src/gallium/drivers/freedreno/ir3/ir3_ra.c b/src/gallium/drivers/freedreno/ir3/ir3_ra.c index 3ac626ca3b6..aa8ad513e04 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_ra.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_ra.c @@ -58,6 +58,7 @@ struct ir3_ra_ctx { bool frag_face; bool has_samp; int cnt; + int max_bary; bool error; }; @@ -614,6 +615,12 @@ static void legalize(struct ir3_ra_ctx *ctx, struct ir3_block *block) if (is_meta(n)) continue; + if (is_input(n)) { + struct ir3_register *inloc = n->regs[1]; + assert(inloc->flags & IR3_REG_IMMED); + ctx->max_bary = MAX2(ctx->max_bary, inloc->iim_val); + } + for (i = 1; i < n->regs_count; i++) { reg = n->regs[i]; @@ -775,7 +782,7 @@ static int block_ra(struct ir3_ra_ctx *ctx, struct ir3_block *block) int ir3_block_ra(struct ir3_block *block, enum shader_t type, bool half_precision, bool frag_coord, bool frag_face, - bool *has_samp) + bool *has_samp, int *max_bary) { struct ir3_ra_ctx ctx = { .block = block, @@ -783,12 +790,14 @@ int ir3_block_ra(struct ir3_block *block, enum shader_t type, .half_precision = half_precision, .frag_coord = frag_coord, .frag_face = frag_face, + .max_bary = -1, }; int ret; ir3_clear_mark(block->shader); ret = block_ra(&ctx, block); *has_samp = ctx.has_samp; + *max_bary = ctx.max_bary; return ret; } diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.c b/src/gallium/drivers/freedreno/ir3/ir3_shader.c index ed7c639c930..31e302b92c2 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_shader.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.c @@ -91,12 +91,12 @@ fixup_vp_regfootprint(struct ir3_shader_variant *v) unsigned i; for (i = 0; i < v->inputs_count; i++) { if (v->inputs[i].compmask) { - uint32_t regid = (v->inputs[i].regid + 3) >> 2; + int32_t regid = (v->inputs[i].regid + 3) >> 2; v->info.max_reg = MAX2(v->info.max_reg, regid); } } for (i = 0; i < v->outputs_count; i++) { - uint32_t regid = (v->outputs[i].regid + 3) >> 2; + int32_t regid = (v->outputs[i].regid + 3) >> 2; v->info.max_reg = MAX2(v->info.max_reg, regid); } }