From: Rob Clark Date: Sat, 11 Aug 2018 14:30:38 +0000 (-0400) Subject: freedreno/ir3: add support for a6xx 'merged' register set X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=70bf639328fa00d7875e82b43fb011b0687559c0;p=mesa.git freedreno/ir3: add support for a6xx 'merged' register set Starting with a6xx, half and full precision registers conflict. Which makes things a bit more efficient, ie. if some parts of the shader are heavy on half-precision and others on full precision, you don't have to allocate the worst case for both. But it means we need to setup some additional conflicts. Signed-off-by: Rob Clark --- diff --git a/src/gallium/drivers/freedreno/ir3/ir3.c b/src/gallium/drivers/freedreno/ir3/ir3.c index d1a73ddc727..3f1216bdaa7 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3.c +++ b/src/gallium/drivers/freedreno/ir3/ir3.c @@ -115,7 +115,12 @@ static uint32_t reg(struct ir3_register *reg, struct ir3_info *info, /* ignore writes to dummy register r63.x */ } else if (max < 48) { if (reg->flags & IR3_REG_HALF) { - info->max_half_reg = MAX2(info->max_half_reg, max); + if (info->gpu_id >= 600) { + /* starting w/ a6xx, half regs conflict with full regs: */ + info->max_reg = MAX2(info->max_reg, (max+1)/2); + } else { + info->max_half_reg = MAX2(info->max_half_reg, max); + } } else { info->max_reg = MAX2(info->max_reg, max); } diff --git a/src/gallium/drivers/freedreno/ir3/ir3_ra.c b/src/gallium/drivers/freedreno/ir3/ir3_ra.c index a1c048c4a15..f6605443e5b 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_ra.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_ra.c @@ -285,8 +285,25 @@ ir3_ra_alloc_reg_set(struct ir3_compiler *compiler) } } + /* starting a6xx, half precision regs conflict w/ full precision regs: */ + if (compiler->gpu_id >= 600) { + /* because of transitivity, we can get away with just setting up + * conflicts between the first class of full and half regs: + */ + for (unsigned j = 0; j < CLASS_REGS(0) / 2; j++) { + unsigned freg = set->gpr_to_ra_reg[0][j]; + unsigned hreg0 = set->gpr_to_ra_reg[HALF_OFFSET][(j * 2) + 0]; + unsigned hreg1 = set->gpr_to_ra_reg[HALF_OFFSET][(j * 2) + 1]; + + ra_add_transitive_reg_conflict(set->regs, freg, hreg0); + ra_add_transitive_reg_conflict(set->regs, freg, hreg1); + } - ra_set_finalize(set->regs, q_values); + // TODO also need to update q_values, but for now: + ra_set_finalize(set->regs, NULL); + } else { + ra_set_finalize(set->regs, q_values); + } ralloc_free(q_values);