nir/radv: remove restrictions on opt_if_loop_last_continue()
authorTimothy Arceri <tarceri@itsqueeze.com>
Mon, 8 Apr 2019 10:13:49 +0000 (20:13 +1000)
committerTimothy Arceri <tarceri@itsqueeze.com>
Tue, 9 Apr 2019 01:29:41 +0000 (11:29 +1000)
When I implemented opt_if_loop_last_continue() I had restricted
this pass from moving other if-statements inside the branch opposite
the continue. At the time it was causing a bunch of spilling in
shader-db for i965.

However Samuel Pitoiset noticed that making this pass more aggressive
significantly improved the performance of Doom on RADV. Below are
the statistics he gathered.

28717 shaders in 14931 tests
Totals:
SGPRS: 1267317 -> 1267549 (0.02 %)
VGPRS: 896876 -> 895920 (-0.11 %)
Spilled SGPRs: 24701 -> 26367 (6.74 %)
Code Size: 48379452 -> 48507880 (0.27 %) bytes
Max Waves: 241159 -> 241190 (0.01 %)

Totals from affected shaders:
SGPRS: 23584 -> 23816 (0.98 %)
VGPRS: 25908 -> 24952 (-3.69 %)
Spilled SGPRs: 503 -> 2169 (331.21 %)
Code Size: 2471392 -> 2599820 (5.20 %) bytes
Max Waves: 586 -> 617 (5.29 %)

The codesize increases is related to Wolfenstein II it seems largely
due to an increase in phis rather than the existing jumps.

This gives +10% FPS with Doom on my Vega56.

Rhys Perry also benchmarked Doom on his VEGA64:

Before: 72.53 FPS
After:  80.77 FPS

v2: disable pass on non-AMD drivers

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> (v1)
Acked-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
src/amd/vulkan/radv_shader.c
src/compiler/nir/nir.h
src/compiler/nir/nir_opt_if.c
src/freedreno/ir3/ir3_nir.c
src/gallium/auxiliary/nir/tgsi_to_nir.c
src/gallium/drivers/freedreno/a2xx/ir2_nir.c
src/gallium/drivers/radeonsi/si_shader_nir.c
src/intel/compiler/brw_nir.c
src/mesa/state_tracker/st_glsl_to_nir.cpp

index d3d073d1db81a9e80049d7788eed23cc14bca7cb..7cde5e728e4e06a7b03f5539332ae8008fd7eee2 100644 (file)
@@ -158,7 +158,7 @@ radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively,
                        NIR_PASS(progress, shader, nir_opt_remove_phis);
                         NIR_PASS(progress, shader, nir_opt_dce);
                 }
-                NIR_PASS(progress, shader, nir_opt_if);
+                NIR_PASS(progress, shader, nir_opt_if, true);
                 NIR_PASS(progress, shader, nir_opt_dead_cf);
                 NIR_PASS(progress, shader, nir_opt_cse);
                 NIR_PASS(progress, shader, nir_opt_peephole_select, 8, true, true);
index bc72d8f83f538d34e827b3e4c7aaa2a9a8710db6..c1ecf5ad56154afe776b180017f50bde1c6bc39f 100644 (file)
@@ -3434,7 +3434,7 @@ bool nir_opt_gcm(nir_shader *shader, bool value_number);
 
 bool nir_opt_idiv_const(nir_shader *shader, unsigned min_bit_size);
 
-bool nir_opt_if(nir_shader *shader);
+bool nir_opt_if(nir_shader *shader, bool aggressive_last_continue);
 
 bool nir_opt_intrinsics(nir_shader *shader);
 
index 4d3183ed151bd1b7bba01553cc218fe493ed7a3d..713bdf0c38a6d4eead1f823ff82184862b0603e4 100644 (file)
@@ -824,47 +824,60 @@ nir_block_ends_in_continue(nir_block *block)
  * The continue should then be removed by nir_opt_trivial_continues() and the
  * loop can potentially be unrolled.
  *
- * Note: do_work_2() is only ever blocks and nested loops. We could also nest
- * other if-statments in the branch which would allow further continues to
- * be removed. However in practice this can result in increased register
- * pressure.
+ * Note: Unless the function param aggressive_last_continue==true do_work_2()
+ * is only ever blocks and nested loops. We avoid nesting other if-statments
+ * in the branch as this can result in increased register pressure, and in
+ * the i965 driver it causes a large amount of spilling in shader-db.
+ * For RADV however nesting these if-statements allows further continues to be
+ * remove and provides a significant FPS boost in Doom, which is why we have
+ * opted for this special bool to enable more aggresive optimisations.
+ * TODO: The GCM pass solves most of the spilling regressions in i965, if it
+ * is ever enabled we should consider removing the aggressive_last_continue
+ * param.
  */
 static bool
-opt_if_loop_last_continue(nir_loop *loop)
+opt_if_loop_last_continue(nir_loop *loop, bool aggressive_last_continue)
 {
-   /* Get the last if-stament in the loop */
+   nir_if *nif;
+   bool then_ends_in_continue;
+   bool else_ends_in_continue;
+
+   /* Scan the control flow of the loop from the last to the first node
+    * looking for an if-statement we can optimise.
+    */
    nir_block *last_block = nir_loop_last_block(loop);
    nir_cf_node *if_node = nir_cf_node_prev(&last_block->cf_node);
    while (if_node) {
-      if (if_node->type == nir_cf_node_if)
-         break;
+      if (if_node->type == nir_cf_node_if) {
+         nif = nir_cf_node_as_if(if_node);
+         nir_block *then_block = nir_if_last_then_block(nif);
+         nir_block *else_block = nir_if_last_else_block(nif);
 
-      if_node = nir_cf_node_prev(if_node);
-   }
+         then_ends_in_continue = nir_block_ends_in_continue(then_block);
+         else_ends_in_continue = nir_block_ends_in_continue(else_block);
 
-   if (!if_node || if_node->type != nir_cf_node_if)
-      return false;
-
-   nir_if *nif = nir_cf_node_as_if(if_node);
-   nir_block *then_block = nir_if_last_then_block(nif);
-   nir_block *else_block = nir_if_last_else_block(nif);
+         /* If both branches end in a jump do nothing, this should be handled
+          * by nir_opt_dead_cf().
+          */
+         if ((then_ends_in_continue || nir_block_ends_in_break(then_block)) &&
+             (else_ends_in_continue || nir_block_ends_in_break(else_block)))
+            return false;
 
-   bool then_ends_in_continue = nir_block_ends_in_continue(then_block);
-   bool else_ends_in_continue = nir_block_ends_in_continue(else_block);
+         /* If continue found stop scanning and attempt optimisation, or
+          */
+         if (then_ends_in_continue || else_ends_in_continue ||
+             !aggressive_last_continue)
+            break;
+      }
 
-   /* If both branches end in a continue do nothing, this should be handled
-    * by nir_opt_dead_cf().
-    */
-   if ((then_ends_in_continue || nir_block_ends_in_break(then_block)) &&
-       (else_ends_in_continue || nir_block_ends_in_break(else_block)))
-      return false;
+      if_node = nir_cf_node_prev(if_node);
+   }
 
+   /* If we didn't find an if to optimise return */
    if (!then_ends_in_continue && !else_ends_in_continue)
       return false;
 
-   /* if the block after the if/else is empty we bail, otherwise we might end
-    * up looping forever
-    */
+   /* If there is nothing after the if-statement we bail */
    if (&nif->cf_node == nir_cf_node_prev(&last_block->cf_node) &&
        exec_list_is_empty(&last_block->instr_list))
       return false;
@@ -1327,7 +1340,8 @@ opt_if_merge(nir_if *nif)
 }
 
 static bool
-opt_if_cf_list(nir_builder *b, struct exec_list *cf_list)
+opt_if_cf_list(nir_builder *b, struct exec_list *cf_list,
+               bool aggressive_last_continue)
 {
    bool progress = false;
    foreach_list_typed(nir_cf_node, cf_node, node, cf_list) {
@@ -1337,8 +1351,10 @@ opt_if_cf_list(nir_builder *b, struct exec_list *cf_list)
 
       case nir_cf_node_if: {
          nir_if *nif = nir_cf_node_as_if(cf_node);
-         progress |= opt_if_cf_list(b, &nif->then_list);
-         progress |= opt_if_cf_list(b, &nif->else_list);
+         progress |= opt_if_cf_list(b, &nif->then_list,
+                                    aggressive_last_continue);
+         progress |= opt_if_cf_list(b, &nif->else_list,
+                                    aggressive_last_continue);
          progress |= opt_if_loop_terminator(nif);
          progress |= opt_if_merge(nif);
          progress |= opt_if_simplification(b, nif);
@@ -1347,10 +1363,12 @@ opt_if_cf_list(nir_builder *b, struct exec_list *cf_list)
 
       case nir_cf_node_loop: {
          nir_loop *loop = nir_cf_node_as_loop(cf_node);
-         progress |= opt_if_cf_list(b, &loop->body);
+         progress |= opt_if_cf_list(b, &loop->body,
+                                    aggressive_last_continue);
          progress |= opt_simplify_bcsel_of_phi(b, loop);
          progress |= opt_peel_loop_initial_if(loop);
-         progress |= opt_if_loop_last_continue(loop);
+         progress |= opt_if_loop_last_continue(loop,
+                                               aggressive_last_continue);
          break;
       }
 
@@ -1399,7 +1417,7 @@ opt_if_safe_cf_list(nir_builder *b, struct exec_list *cf_list)
 }
 
 bool
-nir_opt_if(nir_shader *shader)
+nir_opt_if(nir_shader *shader, bool aggressive_last_continue)
 {
    bool progress = false;
 
@@ -1416,7 +1434,8 @@ nir_opt_if(nir_shader *shader)
       nir_metadata_preserve(function->impl, nir_metadata_block_index |
                             nir_metadata_dominance);
 
-      if (opt_if_cf_list(&b, &function->impl->body)) {
+      if (opt_if_cf_list(&b, &function->impl->body,
+                         aggressive_last_continue)) {
          nir_metadata_preserve(function->impl, nir_metadata_none);
 
          /* If that made progress, we're no longer really in SSA form.  We
index 8b66615a6e01a0f35477d4568d39befdc3764bfb..7a8b975364300903b43312aef731d57437d4416d 100644 (file)
@@ -147,7 +147,7 @@ ir3_optimize_loop(nir_shader *s)
                        OPT(s, nir_copy_prop);
                        OPT(s, nir_opt_dce);
                }
-               progress |= OPT(s, nir_opt_if);
+               progress |= OPT(s, nir_opt_if, false);
                progress |= OPT(s, nir_opt_remove_phis);
                progress |= OPT(s, nir_opt_undef);
 
index 09e40977fd8f5ca2f2f11e02bb28502a4f76efb3..e3cc5560033104fb0a9746f5f865836387690706 100644 (file)
@@ -2066,7 +2066,7 @@ ttn_optimize_nir(nir_shader *nir, bool scalar)
          NIR_PASS(progress, nir, nir_opt_dce);
       }
 
-      NIR_PASS(progress, nir, nir_opt_if);
+      NIR_PASS(progress, nir, nir_opt_if, false);
       NIR_PASS(progress, nir, nir_opt_dead_cf);
       NIR_PASS(progress, nir, nir_opt_cse);
       NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);
index 6aaff393167c8098429c712a7899ad0fe50fc5dc..3d4145fccdcaf819e4e2565a089a3385cf259005 100644 (file)
@@ -94,7 +94,7 @@ ir2_optimize_loop(nir_shader *s)
                        OPT(s, nir_opt_dce);
                }
                progress |= OPT(s, nir_opt_loop_unroll, nir_var_all);
-               progress |= OPT(s, nir_opt_if);
+               progress |= OPT(s, nir_opt_if, false);
                progress |= OPT(s, nir_opt_remove_phis);
                progress |= OPT(s, nir_opt_undef);
 
index 5ac18e2ebc81fce020cca530f7d9e3d7b7b061b2..938b0efcb7679aa28ec4ede1b54a539d9a9c58d8 100644 (file)
@@ -880,7 +880,7 @@ si_lower_nir(struct si_shader_selector* sel)
                        NIR_PASS(progress, sel->nir, nir_copy_prop);
                        NIR_PASS(progress, sel->nir, nir_opt_dce);
                }
-               NIR_PASS(progress, sel->nir, nir_opt_if);
+               NIR_PASS(progress, sel->nir, nir_opt_if, true);
                NIR_PASS(progress, sel->nir, nir_opt_dead_cf);
                NIR_PASS(progress, sel->nir, nir_opt_cse);
                NIR_PASS(progress, sel->nir, nir_opt_peephole_select, 8, true, true);
index 238db902b4769fcac28010bbaa62c8f847b52edb..2e63efdc4272cf73913ebd131d814e66a76e2509 100644 (file)
@@ -609,7 +609,7 @@ brw_nir_optimize(nir_shader *nir, const struct brw_compiler *compiler,
          OPT(nir_copy_prop);
          OPT(nir_opt_dce);
       }
-      OPT(nir_opt_if);
+      OPT(nir_opt_if, false);
       if (nir->options->max_unroll_iterations != 0) {
          OPT(nir_opt_loop_unroll, indirect_mask);
       }
index 9a4e030413bf93a0841ebcadb48f7caab6f64c2e..fb10869c9f968f5660f267f9f0adf030fb7698d3 100644 (file)
@@ -324,7 +324,7 @@ st_nir_opts(nir_shader *nir, bool scalar)
          NIR_PASS(progress, nir, nir_copy_prop);
          NIR_PASS(progress, nir, nir_opt_dce);
       }
-      NIR_PASS(progress, nir, nir_opt_if);
+      NIR_PASS(progress, nir, nir_opt_if, false);
       NIR_PASS(progress, nir, nir_opt_dead_cf);
       NIR_PASS(progress, nir, nir_opt_cse);
       NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);