freedreno/ir3/delay: calculate delay properly for (rptN)'d instructions
authorRob Clark <robdclark@chromium.org>
Wed, 11 Mar 2020 22:06:51 +0000 (15:06 -0700)
committerMarge Bot <eric+marge@anholt.net>
Tue, 16 Jun 2020 20:56:15 +0000 (20:56 +0000)
When a sequence of same instruction is encoded with repeat flag,
destination registers are written on successive cycles.  Teach the
delay calculation about this.

Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5280>

src/freedreno/ir3/ir3_delay.c

index 2ea8ee5ed6eb481b2dd2f7b4237a1b7bc7ed5489..247ff6ee25064a639cfb5847235686b5fda8e3b7 100644 (file)
@@ -197,13 +197,35 @@ delay_calc_srcn(struct ir3_block *block,
        unsigned delay = 0;
 
        if (is_meta(assigner)) {
-               foreach_src (src, assigner) {
+               foreach_src_n (src, n, assigner) {
                        unsigned d;
 
                        if (!src->instr)
                                continue;
 
                        d = delay_calc_srcn(block, src->instr, consumer, srcn, soft, pred);
+
+                       /* A (rptN) instruction executes in consecutive cycles so
+                        * it's outputs are written in successive cycles.  And
+                        * likewise for it's (r)'d (incremented) inputs, they are
+                        * read on successive cycles.
+                        *
+                        * So we need to adjust the delay for (rptN)'s assigners
+                        * and consumers accordingly.
+                        *
+                        * Note that the dst of a (rptN) instruction is implicitly
+                        * (r) (the assigner case), although that is not the case
+                        * for src registers.  There is exactly one case, bary.f,
+                        * which has a vecN (collect) src that is not (r)'d.
+                        */
+                       if ((assigner->opc == OPC_META_SPLIT) && src->instr->repeat) {
+                               /* (rptN) assigner case: */
+                               d -= MIN2(d, src->instr->repeat - assigner->split.off);
+                       } else if ((assigner->opc == OPC_META_COLLECT) && consumer->repeat &&
+                                       (consumer->regs[srcn]->flags & IR3_REG_R)) {
+                               d -= MIN2(d, n);
+                       }
+
                        delay = MAX2(delay, d);
                }
        } else {