freedreno/ir3: move block-scheduling into legalize
authorRob Clark <robdclark@chromium.org>
Wed, 18 Dec 2019 22:04:36 +0000 (14:04 -0800)
committerMarge Bot <eric+marge@anholt.net>
Sat, 1 Feb 2020 02:40:22 +0000 (02:40 +0000)
We want to do this only once.  If we have post-RA sched pass, then we
don't want to do it pre-RA.  Since legalize is where we resolve the
branch/jumps, we might as well move this into legalize.

Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3569>

src/freedreno/ir3/ir3.h
src/freedreno/ir3/ir3_delay.c
src/freedreno/ir3/ir3_legalize.c
src/freedreno/ir3/ir3_sched.c

index ac2949341338759e62b9d9c4c8b750913b6f6a70..bf0f92a478a42942fd747bec99c93cdfd475fd7b 100644 (file)
@@ -1116,8 +1116,6 @@ void ir3_print_instr(struct ir3_instruction *instr);
 /* delay calculation: */
 int ir3_delayslots(struct ir3_instruction *assigner,
                struct ir3_instruction *consumer, unsigned n);
-unsigned ir3_distance(struct ir3_block *block, struct ir3_instruction *instr,
-               unsigned maxd, bool pred);
 unsigned ir3_delay_calc(struct ir3_block *block, struct ir3_instruction *instr,
                bool soft, bool pred);
 void ir3_remove_nops(struct ir3 *ir);
@@ -1359,7 +1357,7 @@ ir3_##name(struct ir3_block *block,                                      \
 #define INSTR4(name)        __INSTR4(0, name, OPC_##name)
 
 /* cat0 instructions: */
-INSTR0(BR)
+INSTR1(BR)
 INSTR0(JUMP)
 INSTR1(KILL)
 INSTR0(END)
index 207c8cb91ccf0ddf257d3466af47378e829d5bbb..0b796a4183a9b2f945a16b950d3cf676edd7ee2b 100644 (file)
@@ -126,8 +126,8 @@ count_instruction(struct ir3_instruction *n)
  *    find the worst case (shortest) distance (only possible after
  *    individual blocks are all scheduled)
  */
-unsigned
-ir3_distance(struct ir3_block *block, struct ir3_instruction *instr,
+static unsigned
+distance(struct ir3_block *block, struct ir3_instruction *instr,
                unsigned maxd, bool pred)
 {
        unsigned d = 0;
@@ -162,7 +162,7 @@ ir3_distance(struct ir3_block *block, struct ir3_instruction *instr,
                        struct ir3_block *pred = (struct ir3_block *)entry->key;
                        unsigned n;
 
-                       n = ir3_distance(pred, instr, min, pred);
+                       n = distance(pred, instr, min, pred);
 
                        min = MIN2(min, n);
                }
@@ -204,7 +204,7 @@ delay_calc_srcn(struct ir3_block *block,
                } else {
                        delay = ir3_delayslots(assigner, consumer, srcn);
                }
-               delay -= ir3_distance(block, assigner, delay, pred);
+               delay -= distance(block, assigner, delay, pred);
        }
 
        return delay;
index 4b95b905e20393362e190d40af0d102aafecd9a8..458629021a4caf0ab810ace270b8bdb90a6794b0 100644 (file)
@@ -553,6 +553,45 @@ mark_xvergence_points(struct ir3 *ir)
        }
 }
 
+/* Insert the branch/jump instructions for flow control between blocks.
+ * Initially this is done naively, without considering if the successor
+ * block immediately follows the current block (ie. so no jump required),
+ * but that is cleaned up in resolve_jumps().
+ *
+ * TODO what ensures that the last write to p0.x in a block is the
+ * branch condition?  Have we been getting lucky all this time?
+ */
+static void
+block_sched(struct ir3 *ir)
+{
+       foreach_block (block, &ir->block_list) {
+               if (block->successors[1]) {
+                       /* if/else, conditional branches to "then" or "else": */
+                       struct ir3_instruction *br;
+
+                       debug_assert(block->condition);
+
+                       /* create "else" branch first (since "then" block should
+                        * frequently/always end up being a fall-thru):
+                        */
+                       br = ir3_BR(block, block->condition, 0);
+                       br->cat0.inv = true;
+                       br->cat0.target = block->successors[1];
+
+                       /* "then" branch: */
+                       br = ir3_BR(block, block->condition, 0);
+                       br->cat0.target = block->successors[0];
+
+               } else if (block->successors[0]) {
+                       /* otherwise unconditional jump to next block: */
+                       struct ir3_instruction *jmp;
+
+                       jmp = ir3_JUMP(block);
+                       jmp->cat0.target = block->successors[0];
+               }
+       }
+}
+
 /* Insert nop's required to make this a legal/valid shader program: */
 static void
 nop_sched(struct ir3 *ir)
@@ -629,6 +668,7 @@ ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary)
 
        *max_bary = ctx->max_bary;
 
+       block_sched(ir);
        nop_sched(ir);
 
        do {
index 13ec6e023acce55a072d7b62433870414283c299..986c80e4940ac02431f8df403ae1ca00830e921f 100644 (file)
@@ -761,48 +761,6 @@ sched_block(struct ir3_sched_ctx *ctx, struct ir3_block *block)
                        }
                }
        }
-
-       /* And lastly, insert branch/jump instructions to take us to
-        * the next block.  Later we'll strip back out the branches
-        * that simply jump to next instruction.
-        */
-       if (block->successors[1]) {
-               /* if/else, conditional branches to "then" or "else": */
-               struct ir3_instruction *br;
-
-               debug_assert(ctx->pred);
-               debug_assert(block->condition);
-
-               /* create "else" branch first (since "then" block should
-                * frequently/always end up being a fall-thru):
-                */
-               br = ir3_BR(block);
-               br->cat0.inv = true;
-               br->cat0.target = block->successors[1];
-
-               /* NOTE: we have to hard code delay of 6 above, since
-                * we want to insert the nop's before constructing the
-                * branch.  Throw in an assert so we notice if this
-                * ever breaks on future generation:
-                */
-               debug_assert(ir3_delayslots(ctx->pred, br, 0) == 6);
-
-               br = ir3_BR(block);
-               br->cat0.target = block->successors[0];
-
-       } else if (block->successors[0]) {
-               /* otherwise unconditional jump to next block: */
-               struct ir3_instruction *jmp;
-
-               jmp = ir3_JUMP(block);
-               jmp->cat0.target = block->successors[0];
-       }
-
-       /* NOTE: if we kept track of the predecessors, we could do a better
-        * job w/ (jp) flags.. every node w/ > predecessor is a join point.
-        * Note that as we eliminate blocks which contain only an unconditional
-        * jump we probably need to propagate (jp) flag..
-        */
 }
 
 int ir3_sched(struct ir3 *ir)