ir3: Split out variant-specific lowering and optimizations
authorConnor Abbott <cwabbott0@gmail.com>
Wed, 10 Jun 2020 09:11:27 +0000 (11:11 +0200)
committerMarge Bot <eric+marge@anholt.net>
Fri, 19 Jun 2020 13:16:57 +0000 (13:16 +0000)
It seems a lot of the lowerings being run the second time were
unnecessary. In addition, when const_state is moved to the variant,
then it will become impossible to know ahead of time whether a variant
needs additional optimizing, which means that ir3_key_lowers_nir() needs
to go away. The new approach should have the same effect, since it skips
running lowerings that are unnecessary and then skips the opt loop if no
optimizations made progress, but it will work better when we move
ir3_nir_analyze_ubo_ranges() to be after variant creation.

The one maybe controversial thing I did is to make
nir_opt_algebraic_late() always happen during variant lowering. I wanted
to avoid code duplication, and it seems to me that we should push the
_late variants as far back as possible so that later opt_algebraic runs
don't miss out on optimization opportunities.

Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5508>

src/freedreno/ir3/ir3_context.c
src/freedreno/ir3/ir3_nir.c
src/freedreno/ir3/ir3_nir.h
src/freedreno/ir3/ir3_shader.c
src/gallium/drivers/freedreno/ir3/ir3_cmdline.c

index b9905625b703703309ae117fbea3a3c1572af029..522c6f69aca976abd4f329d95bf6fd8d3d775f4f 100644 (file)
@@ -74,8 +74,7 @@ ir3_context_init(struct ir3_compiler *compiler,
         */
 
        ctx->s = nir_shader_clone(ctx, so->shader->nir);
-       if (ir3_key_lowers_nir(&so->key))
-               ir3_optimize_nir(so->shader, ctx->s, &so->key);
+       ir3_nir_lower_variant(so, ctx->s);
 
        /* this needs to be the last pass run, so do this here instead of
         * in ir3_optimize_nir():
index 0afe624a3552734bc6c970c4062a661639bc4f16..2df5715f51c0768a8c2c24f798b518e66741556d 100644 (file)
@@ -123,17 +123,6 @@ ir3_get_compiler_options(struct ir3_compiler *compiler)
        return &options;
 }
 
-/* for given shader key, are any steps handled in nir? */
-bool
-ir3_key_lowers_nir(const struct ir3_shader_key *key)
-{
-       return key->fsaturate_s | key->fsaturate_t | key->fsaturate_r |
-                       key->vsaturate_s | key->vsaturate_t | key->vsaturate_r |
-                       key->ucp_enables | key->color_two_side |
-                       key->fclamp_color | key->vclamp_color |
-                       key->tessellation | key->has_gs;
-}
-
 #define OPT(nir, pass, ...) ({                             \
    bool this_progress = false;                             \
    NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__);      \
@@ -224,54 +213,13 @@ should_split_wrmask(const nir_instr *instr, const void *data)
 }
 
 void
-ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s,
-               const struct ir3_shader_key *key)
+ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s)
 {
        struct nir_lower_tex_options tex_options = {
                        .lower_rect = 0,
                        .lower_tg4_offsets = true,
        };
 
-       if (key && (key->has_gs || key->tessellation)) {
-               switch (shader->type) {
-               case MESA_SHADER_VERTEX:
-                       NIR_PASS_V(s, ir3_nir_lower_to_explicit_output, shader, key->tessellation);
-                       break;
-               case MESA_SHADER_TESS_CTRL:
-                       NIR_PASS_V(s, ir3_nir_lower_tess_ctrl, shader, key->tessellation);
-                       NIR_PASS_V(s, ir3_nir_lower_to_explicit_input);
-                       break;
-               case MESA_SHADER_TESS_EVAL:
-                       NIR_PASS_V(s, ir3_nir_lower_tess_eval, key->tessellation);
-                       if (key->has_gs)
-                               NIR_PASS_V(s, ir3_nir_lower_to_explicit_output, shader, key->tessellation);
-                       break;
-               case MESA_SHADER_GEOMETRY:
-                       NIR_PASS_V(s, ir3_nir_lower_to_explicit_input);
-                       break;
-               default:
-                       break;
-               }
-       }
-
-       if (key) {
-               switch (shader->type) {
-               case MESA_SHADER_FRAGMENT:
-                       tex_options.saturate_s = key->fsaturate_s;
-                       tex_options.saturate_t = key->fsaturate_t;
-                       tex_options.saturate_r = key->fsaturate_r;
-                       break;
-               case MESA_SHADER_VERTEX:
-                       tex_options.saturate_s = key->vsaturate_s;
-                       tex_options.saturate_t = key->vsaturate_t;
-                       tex_options.saturate_r = key->vsaturate_r;
-                       break;
-               default:
-                       /* TODO */
-                       break;
-               }
-       }
-
        if (shader->compiler->gpu_id >= 400) {
                /* a4xx seems to have *no* sam.p */
                tex_options.lower_txp = ~0;  /* lower all txp */
@@ -289,31 +237,10 @@ ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s,
        OPT_V(s, nir_lower_regs_to_ssa);
        OPT_V(s, nir_lower_wrmasks, should_split_wrmask, s);
 
-       if (key) {
-               if (s->info.stage == MESA_SHADER_VERTEX) {
-                       OPT_V(s, nir_lower_clip_vs, key->ucp_enables, false, false, NULL);
-                       if (key->vclamp_color)
-                               OPT_V(s, nir_lower_clamp_color_outputs);
-               } else if (s->info.stage == MESA_SHADER_FRAGMENT) {
-                       OPT_V(s, nir_lower_clip_fs, key->ucp_enables, false);
-                       if (key->fclamp_color)
-                               OPT_V(s, nir_lower_clamp_color_outputs);
-               }
-               if (key->color_two_side) {
-                       OPT_V(s, nir_lower_two_sided_color);
-               }
-       } else {
-               /* only want to do this the first time (when key is null)
-                * and not again on any potential 2nd variant lowering pass:
-                */
-               OPT_V(s, ir3_nir_apply_trig_workarounds);
-
-               /* This wouldn't hurt to run multiple times, but there is
-                * no need to:
-                */
-               if (shader->type == MESA_SHADER_FRAGMENT)
-                       OPT_V(s, nir_lower_fb_read);
-       }
+       OPT_V(s, ir3_nir_apply_trig_workarounds);
+
+       if (shader->type == MESA_SHADER_FRAGMENT)
+               OPT_V(s, nir_lower_fb_read);
 
        OPT_V(s, nir_lower_tex, &tex_options);
        OPT_V(s, nir_lower_load_const_to_scalar);
@@ -328,7 +255,7 @@ ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s,
         *
         * NOTE that UBO analysis pass should only be done once, before variants
         */
-       const bool ubo_progress = !key && OPT(s, ir3_nir_analyze_ubo_ranges, shader);
+       const bool ubo_progress = OPT(s, ir3_nir_analyze_ubo_ranges, shader);
        const bool idiv_progress = OPT(s, nir_lower_idiv, nir_lower_idiv_fast);
        /* UBO offset lowering has to come after we've decided what will be left as load_ubo */
        OPT_V(s, ir3_nir_lower_io_offsets, shader->compiler->gpu_id);
@@ -336,6 +263,102 @@ ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s,
        if (ubo_progress || idiv_progress)
                ir3_optimize_loop(s);
 
+       OPT_V(s, nir_remove_dead_variables, nir_var_function_temp, NULL);
+
+       if (ir3_shader_debug & IR3_DBG_DISASM) {
+               debug_printf("----------------------\n");
+               nir_print_shader(s, stdout);
+               debug_printf("----------------------\n");
+       }
+
+       nir_sweep(s);
+
+       /* The first time thru, when not creating variant, do the one-time
+        * const_state layout setup.  This should be done after ubo range
+        * analysis.
+        */
+       ir3_setup_const_state(shader, s, &shader->const_state);
+}
+
+void
+ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s)
+{
+       if (ir3_shader_debug & IR3_DBG_DISASM) {
+               debug_printf("----------------------\n");
+               nir_print_shader(s, stdout);
+               debug_printf("----------------------\n");
+       }
+
+       bool progress = false;
+
+       if (so->key.has_gs || so->key.tessellation) {
+               switch (so->shader->type) {
+               case MESA_SHADER_VERTEX:
+                       NIR_PASS_V(s, ir3_nir_lower_to_explicit_output, so->shader, so->key.tessellation);
+                       progress = true;
+                       break;
+               case MESA_SHADER_TESS_CTRL:
+                       NIR_PASS_V(s, ir3_nir_lower_tess_ctrl, so->shader, so->key.tessellation);
+                       NIR_PASS_V(s, ir3_nir_lower_to_explicit_input);
+                       progress = true;
+                       break;
+               case MESA_SHADER_TESS_EVAL:
+                       NIR_PASS_V(s, ir3_nir_lower_tess_eval, so->key.tessellation);
+                       if (so->key.has_gs)
+                               NIR_PASS_V(s, ir3_nir_lower_to_explicit_output, so->shader, so->key.tessellation);
+                       progress = true;
+                       break;
+               case MESA_SHADER_GEOMETRY:
+                       NIR_PASS_V(s, ir3_nir_lower_to_explicit_input);
+                       progress = true;
+                       break;
+               default:
+                       break;
+               }
+       }
+
+       if (s->info.stage == MESA_SHADER_VERTEX) {
+               if (so->key.ucp_enables)
+                       progress |= OPT(s, nir_lower_clip_vs, so->key.ucp_enables, false, false, NULL);
+               if (so->key.vclamp_color)
+                       progress |= OPT(s, nir_lower_clamp_color_outputs);
+       } else if (s->info.stage == MESA_SHADER_FRAGMENT) {
+               if (so->key.ucp_enables)
+                       progress |= OPT(s, nir_lower_clip_fs, so->key.ucp_enables, false);
+               if (so->key.fclamp_color)
+                       progress |= OPT(s, nir_lower_clamp_color_outputs);
+       }
+       if (so->key.color_two_side) {
+               OPT_V(s, nir_lower_two_sided_color);
+               progress = true;
+       }
+
+       struct nir_lower_tex_options tex_options = { };
+
+       switch (so->shader->type) {
+       case MESA_SHADER_FRAGMENT:
+               tex_options.saturate_s = so->key.fsaturate_s;
+               tex_options.saturate_t = so->key.fsaturate_t;
+               tex_options.saturate_r = so->key.fsaturate_r;
+               break;
+       case MESA_SHADER_VERTEX:
+               tex_options.saturate_s = so->key.vsaturate_s;
+               tex_options.saturate_t = so->key.vsaturate_t;
+               tex_options.saturate_r = so->key.vsaturate_r;
+               break;
+       default:
+               /* TODO */
+               break;
+       }
+
+       if (tex_options.saturate_s || tex_options.saturate_t ||
+               tex_options.saturate_r) {
+               progress |= OPT(s, nir_lower_tex, &tex_options);
+       }
+
+       if (progress)
+               ir3_optimize_loop(s);
+
        /* Do late algebraic optimization to turn add(a, neg(b)) back into
        * subs, then the mandatory cleanup after algebraic.  Note that it may
        * produce fnegs, and if so then we need to keep running to squash
@@ -350,8 +373,6 @@ ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s,
                OPT_V(s, nir_opt_cse);
        }
 
-       OPT_V(s, nir_remove_dead_variables, nir_var_function_temp, NULL);
-
        OPT_V(s, nir_opt_sink, nir_move_const_undef);
 
        if (ir3_shader_debug & IR3_DBG_DISASM) {
@@ -361,14 +382,6 @@ ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s,
        }
 
        nir_sweep(s);
-
-       /* The first time thru, when not creating variant, do the one-time
-        * const_state layout setup.  This should be done after ubo range
-        * analysis.
-        */
-       if (!key) {
-               ir3_setup_const_state(shader, s, &shader->const_state);
-       }
 }
 
 static void
index 39930a4077819b8eab03e74d576835926ca9945b..67c56f931d7959c83304911bb93bd54fee6bc4bf 100644 (file)
@@ -52,9 +52,8 @@ void ir3_nir_lower_tess_eval(nir_shader *shader, unsigned topology);
 void ir3_nir_lower_gs(nir_shader *shader);
 
 const nir_shader_compiler_options * ir3_get_compiler_options(struct ir3_compiler *compiler);
-bool ir3_key_lowers_nir(const struct ir3_shader_key *key);
-void ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s,
-               const struct ir3_shader_key *key);
+void ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s);
+void ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s);
 
 void ir3_setup_const_state(struct ir3_shader *shader, nir_shader *nir,
                struct ir3_const_state *const_state);
index 4bd427b4dc845e1710b1fe3ba63ed3d2f172345e..78ed751a3f25d0c0724b0e39b2b6dddc9b0b695b 100644 (file)
@@ -391,7 +391,7 @@ ir3_shader_from_nir(struct ir3_compiler *compiler, nir_shader *nir,
        NIR_PASS_V(nir, nir_lower_amul, ir3_glsl_type_size);
 
        /* do first pass optimization, ignoring the key: */
-       ir3_optimize_nir(shader, nir, NULL);
+       ir3_optimize_nir(shader, nir);
 
        shader->nir = nir;
        if (ir3_shader_debug & IR3_DBG_DISASM) {
index a16c06872c319303c90884ab8e34ed26dd065e6d..cd9950436c5cfcb3fb6eecbc66ae85078754bc27 100644 (file)
@@ -491,12 +491,14 @@ int main(int argc, char **argv)
        s.compiler = compiler;
        s.nir = nir;
 
-       ir3_optimize_nir(&s, nir, NULL);
+       ir3_optimize_nir(&s, nir);
 
        v.key = key;
        v.shader = &s;
        s.type = v.type = nir->info.stage;
 
+       ir3_nir_lower_variant(&v, nir);
+
        info = "NIR compiler";
        ret = ir3_compile_shader_nir(s.compiler, &v);
        if (ret) {