nir/sink: Rewrite loop handling logic

author Connor Abbott <cwabbott0@gmail.com>

Wed, 25 Sep 2019 12:02:48 +0000 (14:02 +0200)

committer Connor Abbott <cwabbott0@gmail.com>

Wed, 9 Oct 2019 17:55:25 +0000 (17:55 +0000)
author Connor Abbott <cwabbott0@gmail.com>
Wed, 25 Sep 2019 12:02:48 +0000 (14:02 +0200)
committer Connor Abbott <cwabbott0@gmail.com>
Wed, 9 Oct 2019 17:55:25 +0000 (17:55 +0000)
diff --git a/src/compiler/nir/nir_opt_sink.c b/src/compiler/nir/nir_opt_sink.c

index bdeedde5f4848adfc8553e5606514c4110ad264b..f6c7093a0d3134eb85a0d44e8aa354ee18d8989d 100644 (file)
--- a/src/compiler/nir/nir_opt_sink.c
+++ b/src/compiler/nir/nir_opt_sink.c
@@ -76,23 +76,36 @@ get_innermost_loop(nir_cf_node *node)
     return NULL;
  }
  
-/* return last block not after use_block with def_loop as it's innermost loop */
-static nir_block *
-adjust_block_for_loops(nir_block *use_block, nir_loop *def_loop)
+static bool
+loop_contains_block(nir_loop *loop, nir_block *block)
  {
-   nir_loop *use_loop = NULL;
+   nir_block *before = nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node));
+   nir_block *after = nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node));
  
-   for (nir_cf_node *node = &use_block->cf_node; node != NULL; node = node->parent) {
-      if (def_loop && node == &def_loop->cf_node)
-         break;
-      if (node->type == nir_cf_node_loop)
-         use_loop = nir_cf_node_as_loop(node);
-   }
-   if (use_loop) {
-      return nir_block_cf_tree_prev(nir_loop_first_block(use_loop));
-   } else {
-      return use_block;
+   return block->index > before->index && block->index < after->index;
+}
+
+/* Given the LCA of all uses and the definition, find a block on the path
+ * between them in the dominance tree that is outside of as many loops as
+ * possible.
+ */
+
+static nir_block *
+adjust_block_for_loops(nir_block *use_block, nir_block *def_block)
+{
+   for (nir_block *cur_block = use_block; cur_block != def_block->imm_dom;
+        cur_block = cur_block->imm_dom) {
+      nir_cf_node *next = nir_cf_node_next(&cur_block->cf_node);
+      if (next && next->type == nir_cf_node_loop) {
+         nir_loop *following_loop = nir_cf_node_as_loop(next);
+         if (loop_contains_block(following_loop, use_block)) {
+             use_block = cur_block;
+             continue;
+         }
+      }
     }
+
+   return use_block;
  }
  
  /* iterate a ssa def's use's and try to find a more optimal block to
@@ -106,10 +119,6 @@ get_preferred_block(nir_ssa_def *def, bool sink_into_loops)
  {
     nir_block *lca = NULL;
  
-   nir_loop *def_loop = NULL;
-   if (!sink_into_loops)
-      def_loop = get_innermost_loop(&def->parent_instr->block->cf_node);
-
     nir_foreach_use(use, def) {
        nir_instr *instr = use->parent_instr;
        nir_block *use_block = instr->block;
@@ -131,18 +140,6 @@ get_preferred_block(nir_ssa_def *def, bool sink_into_loops)
           use_block = phi_lca;
        }
  
-      /* If we're moving a load_ubo or load_interpolated_input, we don't want to
-       * sink it down into loops, which may result in accessing memory or shared
-       * functions multiple times.  Sink it just above the start of the loop
-       * where it's used.  For load_consts, undefs, and comparisons, we expect
-       * the driver to be able to emit them as simple ALU ops, so sinking as far
-       * in as we can go is probably worth it for register pressure.
-       */
-      if (!sink_into_loops) {
-         use_block = adjust_block_for_loops(use_block, def_loop);
-         assert(nir_block_dominates(def->parent_instr->block, use_block));
-      }
-
        lca = nir_dominance_lca(lca, use_block);
     }
  
@@ -150,14 +147,22 @@ get_preferred_block(nir_ssa_def *def, bool sink_into_loops)
        nir_block *use_block =
           nir_cf_node_as_block(nir_cf_node_prev(&use->parent_if->cf_node));
  
-      if (!sink_into_loops) {
-         use_block = adjust_block_for_loops(use_block, def_loop);
-         assert(nir_block_dominates(def->parent_instr->block, use_block));
-      }
-
        lca = nir_dominance_lca(lca, use_block);
     }
  
+   /* If we're moving a load_ubo or load_interpolated_input, we don't want to
+    * sink it down into loops, which may result in accessing memory or shared
+    * functions multiple times.  Sink it just above the start of the loop
+    * where it's used.  For load_consts, undefs, and comparisons, we expect
+    * the driver to be able to emit them as simple ALU ops, so sinking as far
+    * in as we can go is probably worth it for register pressure.
+    */
+   if (!sink_into_loops) {
+      lca = adjust_block_for_loops(lca, def->parent_instr->block);
+      assert(nir_block_dominates(def->parent_instr->block, lca));
+   }
+
+
     return lca;
  }
author	Connor Abbott <cwabbott0@gmail.com>
	Wed, 25 Sep 2019 12:02:48 +0000 (14:02 +0200)
committer	Connor Abbott <cwabbott0@gmail.com>
	Wed, 9 Oct 2019 17:55:25 +0000 (17:55 +0000)