From ec6c2297634eba77248a929048cf4201887a5f0a Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 29 Apr 2019 13:12:31 -0700 Subject: [PATCH] freedreno/ir3: fixes for half reg in/out Needs to update max_half_reg, or be remapped to full reg and update max_reg accordingly, depending on generation.. Signed-off-by: Rob Clark --- src/freedreno/ir3/ir3.c | 14 +++++++------- src/freedreno/ir3/ir3_compiler_nir.c | 8 ++++++++ src/freedreno/ir3/ir3_shader.c | 28 ++++++++++++++++++++++------ src/freedreno/ir3/ir3_shader.h | 2 ++ 4 files changed, 39 insertions(+), 13 deletions(-) diff --git a/src/freedreno/ir3/ir3.c b/src/freedreno/ir3/ir3.c index 55e03d86af1..97f4ae96cd9 100644 --- a/src/freedreno/ir3/ir3.c +++ b/src/freedreno/ir3/ir3.c @@ -104,28 +104,28 @@ static uint32_t reg(struct ir3_register *reg, struct ir3_info *info, if (reg->flags & IR3_REG_RELATIV) { components = reg->size; val.idummy10 = reg->array.offset; - max = (reg->array.offset + repeat + components - 1) >> 2; + max = (reg->array.offset + repeat + components - 1); } else { components = util_last_bit(reg->wrmask); val.comp = reg->num & 0x3; val.num = reg->num >> 2; - max = (reg->num + repeat + components - 1) >> 2; + max = (reg->num + repeat + components - 1); } if (reg->flags & IR3_REG_CONST) { - info->max_const = MAX2(info->max_const, max); + info->max_const = MAX2(info->max_const, max >> 2); } else if (val.num == 63) { /* ignore writes to dummy register r63.x */ - } else if (max < 48) { + } else if (max < regid(48, 0)) { if (reg->flags & IR3_REG_HALF) { if (info->gpu_id >= 600) { /* starting w/ a6xx, half regs conflict with full regs: */ - info->max_reg = MAX2(info->max_reg, (max+1)/2); + info->max_reg = MAX2(info->max_reg, max >> 3); } else { - info->max_half_reg = MAX2(info->max_half_reg, max); + info->max_half_reg = MAX2(info->max_half_reg, max >> 2); } } else { - info->max_reg = MAX2(info->max_reg, max); + info->max_reg = MAX2(info->max_reg, max >> 2); } } } diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index 4e139dc136e..3c813c73ae0 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -2954,6 +2954,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, struct ir3_instruction *instr = ir->outputs[(i*4) + j]; if (instr) { so->outputs[i].regid = instr->regs[0]->num; + so->outputs[i].half = !!(instr->regs[0]->flags & IR3_REG_HALF); break; } } @@ -2962,14 +2963,21 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, /* Note that some or all channels of an input may be unused: */ for (i = 0; i < so->inputs_count; i++) { unsigned j, reg = regid(63,0); + bool half = false; for (j = 0; j < 4; j++) { struct ir3_instruction *in = inputs[(i*4) + j]; if (in && !(in->flags & IR3_INSTR_UNUSED)) { reg = in->regs[0]->num - j; + if (half) { + compile_assert(ctx, in->regs[0]->flags & IR3_REG_HALF); + } else { + half = !!(in->regs[0]->flags & IR3_REG_HALF); + } } } so->inputs[i].regid = reg; + so->inputs[i].half = half; } if (ctx->astc_srgb) diff --git a/src/freedreno/ir3/ir3_shader.c b/src/freedreno/ir3/ir3_shader.c index 3f8e8abdc08..46eba2a0c5e 100644 --- a/src/freedreno/ir3/ir3_shader.c +++ b/src/freedreno/ir3/ir3_shader.c @@ -63,7 +63,7 @@ delete_variant(struct ir3_shader_variant *v) * the reg off. */ static void -fixup_regfootprint(struct ir3_shader_variant *v) +fixup_regfootprint(struct ir3_shader_variant *v, uint32_t gpu_id) { unsigned i; @@ -83,14 +83,30 @@ fixup_regfootprint(struct ir3_shader_variant *v) if (v->inputs[i].compmask) { unsigned n = util_last_bit(v->inputs[i].compmask) - 1; - int32_t regid = (v->inputs[i].regid + n) >> 2; - v->info.max_reg = MAX2(v->info.max_reg, regid); + int32_t regid = v->inputs[i].regid + n; + if (v->inputs[i].half) { + if (gpu_id < 500) { + v->info.max_half_reg = MAX2(v->info.max_half_reg, regid >> 2); + } else { + v->info.max_reg = MAX2(v->info.max_reg, regid >> 3); + } + } else { + v->info.max_reg = MAX2(v->info.max_reg, regid >> 2); + } } } for (i = 0; i < v->outputs_count; i++) { - int32_t regid = (v->outputs[i].regid + 3) >> 2; - v->info.max_reg = MAX2(v->info.max_reg, regid); + int32_t regid = v->outputs[i].regid + 3; + if (v->outputs[i].half) { + if (gpu_id < 500) { + v->info.max_half_reg = MAX2(v->info.max_half_reg, regid >> 2); + } else { + v->info.max_reg = MAX2(v->info.max_reg, regid >> 3); + } + } else { + v->info.max_reg = MAX2(v->info.max_reg, regid >> 2); + } } } @@ -117,7 +133,7 @@ void * ir3_shader_assemble(struct ir3_shader_variant *v, uint32_t gpu_id) */ v->constlen = MIN2(255, MAX2(v->constlen, v->info.max_const + 1)); - fixup_regfootprint(v); + fixup_regfootprint(v, gpu_id); return bin; } diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h index 7f09ee5312f..4e8ab085d7e 100644 --- a/src/freedreno/ir3/ir3_shader.h +++ b/src/freedreno/ir3/ir3_shader.h @@ -390,6 +390,7 @@ struct ir3_shader_variant { struct { uint8_t slot; uint8_t regid; + bool half : 1; } outputs[16 + 2]; /* +POSITION +PSIZE */ bool writes_pos, writes_smask, writes_psize; @@ -413,6 +414,7 @@ struct ir3_shader_variant { /* fragment shader specific: */ bool bary : 1; /* fetched varying (vs one loaded into reg) */ bool rasterflat : 1; /* special handling for emit->rasterflat */ + bool half : 1; enum glsl_interp_mode interpolate; } inputs[16 + 2]; /* +POSITION +FACE */ -- 2.30.2