From 9eed0c601137a6657d6c0de4c2362549456f9465 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 11 Mar 2020 15:06:51 -0700 Subject: [PATCH] freedreno/ir3/delay: calculate delay properly for (rptN)'d instructions When a sequence of same instruction is encoded with repeat flag, destination registers are written on successive cycles. Teach the delay calculation about this. Signed-off-by: Rob Clark Part-of: --- src/freedreno/ir3/ir3_delay.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/src/freedreno/ir3/ir3_delay.c b/src/freedreno/ir3/ir3_delay.c index 2ea8ee5ed6e..247ff6ee250 100644 --- a/src/freedreno/ir3/ir3_delay.c +++ b/src/freedreno/ir3/ir3_delay.c @@ -197,13 +197,35 @@ delay_calc_srcn(struct ir3_block *block, unsigned delay = 0; if (is_meta(assigner)) { - foreach_src (src, assigner) { + foreach_src_n (src, n, assigner) { unsigned d; if (!src->instr) continue; d = delay_calc_srcn(block, src->instr, consumer, srcn, soft, pred); + + /* A (rptN) instruction executes in consecutive cycles so + * it's outputs are written in successive cycles. And + * likewise for it's (r)'d (incremented) inputs, they are + * read on successive cycles. + * + * So we need to adjust the delay for (rptN)'s assigners + * and consumers accordingly. + * + * Note that the dst of a (rptN) instruction is implicitly + * (r) (the assigner case), although that is not the case + * for src registers. There is exactly one case, bary.f, + * which has a vecN (collect) src that is not (r)'d. + */ + if ((assigner->opc == OPC_META_SPLIT) && src->instr->repeat) { + /* (rptN) assigner case: */ + d -= MIN2(d, src->instr->repeat - assigner->split.off); + } else if ((assigner->opc == OPC_META_COLLECT) && consumer->repeat && + (consumer->regs[srcn]->flags & IR3_REG_R)) { + d -= MIN2(d, n); + } + delay = MAX2(delay, d); } } else { -- 2.30.2