freedreno/ir3: account for arrays in delayslot calc
authorRob Clark <robdclark@gmail.com>
Sun, 4 Feb 2018 17:42:19 +0000 (12:42 -0500)
committerRob Clark <robdclark@gmail.com>
Sat, 10 Feb 2018 19:54:58 +0000 (14:54 -0500)
Normally false-deps are not something to consider, since they mostly
exist for delay-slot related reasons:

 * barriers
 * ordering writes after read
 * SSBO/image access ordering

The exception is a false-dependency on an array store.

Signed-off-by: Rob Clark <robdclark@gmail.com>
src/gallium/drivers/freedreno/ir3/ir3_depth.c

index 55ca5333b4776e78ff6a21cdc260b5c74ad21fe5..b58bf8ff3aee74446fe71667f3fc564c55e9606c 100644 (file)
  * blocks depth sorted list, which is used by the scheduling pass.
  */
 
+/* generally don't count false dependencies, since this can just be
+ * something like a barrier, or SSBO store.  The exception is array
+ * dependencies if the assigner is an array write and the consumer
+ * reads the same array.
+ */
+static bool
+ignore_dep(struct ir3_instruction *assigner,
+               struct ir3_instruction *consumer, unsigned n)
+{
+       if (!__is_false_dep(consumer, n))
+               return false;
+
+       if (assigner->barrier_class & IR3_BARRIER_ARRAY_W) {
+               struct ir3_register *dst = assigner->regs[0];
+               struct ir3_register *src;
+
+               debug_assert(dst->flags & IR3_REG_ARRAY);
+
+               foreach_src(src, consumer) {
+                       if ((src->flags & IR3_REG_ARRAY) &&
+                                       (dst->array.id == src->array.id)) {
+                               return false;
+                       }
+               }
+       }
+
+       return true;
+}
+
 /* calculate required # of delay slots between the instruction that
  * assigns a value and the one that consumes
  */
 int ir3_delayslots(struct ir3_instruction *assigner,
                struct ir3_instruction *consumer, unsigned n)
 {
-       /* don't count false-dependencies: */
-       if (__is_false_dep(consumer, n))
+       if (ignore_dep(assigner, consumer, n))
                return 0;
 
        /* worst case is cat1-3 (alu) -> cat4/5 needing 6 cycles, normal