From 175d1b437263e5eab0e47378fbeca3dd71002cd0 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 28 Feb 2018 17:33:29 -0500 Subject: [PATCH] freedreno/ir3: fix fixing-up register footprint It isn't just vertex shaders that need to fixup reg footprint for inputs populated before shader starts. This problem showed up with compute shaders. If you have (for example) a localregid sysval, but only the .x component is used, the hw still writes the .yz components, which could overflow into other threads causing corruption. Showed up in cl cts 'basic/test_basic intmath_int'. But in theory the same problem could crop up elsewhere. Signed-off-by: Rob Clark --- .../drivers/freedreno/ir3/ir3_compiler_nir.c | 2 +- .../drivers/freedreno/ir3/ir3_shader.c | 43 +++++++++++-------- 2 files changed, 27 insertions(+), 18 deletions(-) diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index 1c8d836a87f..8644bc19218 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -3418,7 +3418,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, so->varying_in++; so->inputs[i].compmask = (1 << maxcomp) - 1; inloc += maxcomp; - } else { + } else if (!so->inputs[i].sysval){ so->inputs[i].compmask = compmask; } so->inputs[i].regid = regid; diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.c b/src/gallium/drivers/freedreno/ir3/ir3_shader.c index 3d6cae9f80e..555c654374e 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_shader.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.c @@ -70,26 +70,35 @@ delete_variant(struct ir3_shader_variant *v) static void fixup_regfootprint(struct ir3_shader_variant *v) { - if (v->type == SHADER_VERTEX) { - unsigned i; - for (i = 0; i < v->inputs_count; i++) { - /* skip frag inputs fetch via bary.f since their reg's are - * not written by gpu before shader starts (and in fact the - * regid's might not even be valid) - */ - if (v->inputs[i].bary) - continue; + unsigned i; - if (v->inputs[i].compmask) { - int32_t regid = (v->inputs[i].regid + 3) >> 2; - v->info.max_reg = MAX2(v->info.max_reg, regid); - } - } - for (i = 0; i < v->outputs_count; i++) { - int32_t regid = (v->outputs[i].regid + 3) >> 2; + for (i = 0; i < v->inputs_count; i++) { + /* skip frag inputs fetch via bary.f since their reg's are + * not written by gpu before shader starts (and in fact the + * regid's might not even be valid) + */ + if (v->inputs[i].bary) + continue; + + /* ignore high regs that are global to all threads in a warp + * (they exist by default) (a5xx+) + */ + if (v->inputs[i].regid >= regid(48,0)) + continue; + + if (v->inputs[i].compmask) { + unsigned n = util_last_bit(v->inputs[i].compmask) - 1; + int32_t regid = (v->inputs[i].regid + n) >> 2; v->info.max_reg = MAX2(v->info.max_reg, regid); } - } else if (v->type == SHADER_FRAGMENT) { + } + + for (i = 0; i < v->outputs_count; i++) { + int32_t regid = (v->outputs[i].regid + 3) >> 2; + v->info.max_reg = MAX2(v->info.max_reg, regid); + } + + if (v->type == SHADER_FRAGMENT) { /* NOTE: not sure how to turn pos_regid off.. but this could * be, for example, r1.x while max reg used by the shader is * r0.*, in which case we need to fixup the reg footprint: -- 2.30.2