radeonsi/nir: implement pipe_screen::finalize_nir
authorMarek Olšák <marek.olsak@amd.com>
Fri, 27 Sep 2019 00:24:17 +0000 (20:24 -0400)
committerMarek Olšák <marek.olsak@amd.com>
Thu, 24 Oct 2019 01:12:52 +0000 (21:12 -0400)
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/gallium/drivers/radeonsi/si_compute.c
src/gallium/drivers/radeonsi/si_pipe.c
src/gallium/drivers/radeonsi/si_shader.h
src/gallium/drivers/radeonsi/si_shader_nir.c
src/gallium/drivers/radeonsi/si_state_shaders.c

index ba8271d3fe3f2e4420fa2cbd7b42208072d08ce3..19d4cca0dba63e6a2cc1b798b8c84cf4e8333ac9 100644 (file)
@@ -126,9 +126,7 @@ static void si_create_compute_state_async(void *job, int thread_index)
        } else {
                assert(program->ir_type == PIPE_SHADER_IR_NIR);
 
-               si_nir_opts(sel->nir);
                si_nir_scan_shader(sel->nir, &sel->info);
-               si_lower_nir(sel->screen, sel->nir);
        }
 
        /* Store the declared LDS size into tgsi_shader_info for the shader
index d9cd4ff92e99bf71cfa6dfb5d90b4c5981f383bd..1b7fdf23c6026b12b3906a0b67d38c1a1d06d96c 100644 (file)
@@ -977,6 +977,7 @@ radeonsi_screen_create_impl(struct radeon_winsys *ws,
                si_set_max_shader_compiler_threads;
        sscreen->b.is_parallel_shader_compilation_finished =
                si_is_parallel_shader_compilation_finished;
+       sscreen->b.finalize_nir = si_finalize_nir;
 
        si_init_screen_get_functions(sscreen);
        si_init_screen_buffer_functions(sscreen);
index cb8d6dbcced74b40b9eb1794cf4a83d55ec64d12..1d41b7aa042cb25d2affe6a12f2435adb573b715 100644 (file)
@@ -756,9 +756,8 @@ void si_nir_scan_shader(const struct nir_shader *nir,
                        struct tgsi_shader_info *info);
 void si_nir_scan_tess_ctrl(const struct nir_shader *nir,
                           struct tgsi_tessctrl_info *out);
-void si_nir_lower_ps_inputs(struct nir_shader *nir);
-void si_lower_nir(struct si_screen *sscreen, struct nir_shader *nir);
-void si_nir_opts(struct nir_shader *nir);
+void si_nir_adjust_driver_locations(struct nir_shader *nir);
+void si_finalize_nir(struct pipe_screen *screen, void *nirptr, bool optimize);
 
 /* si_state_shaders.c */
 void gfx9_get_gs_info(struct si_shader_selector *es,
index aa82a7bd371d6d9d69d9f2b44d13ef8cd3ce0a40..4df625ed274068e943367053cc95583fc3b25679 100644 (file)
@@ -801,7 +801,7 @@ void si_nir_scan_shader(const struct nir_shader *nir,
        }
 }
 
-void
+static void
 si_nir_opts(struct nir_shader *nir)
 {
        bool progress;
@@ -913,7 +913,7 @@ si_nir_lower_color(nir_shader *nir)
         }
 }
 
-void si_nir_lower_ps_inputs(struct nir_shader *nir)
+static void si_nir_lower_ps_inputs(struct nir_shader *nir)
 {
        if (nir->info.stage != MESA_SHADER_FRAGMENT)
                return;
@@ -938,11 +938,7 @@ void si_nir_lower_ps_inputs(struct nir_shader *nir)
                   nir_var_shader_in);
 }
 
-/**
- * Perform "lowering" operations on the NIR that are run once when the shader
- * selector is created.
- */
-void si_lower_nir(struct si_screen *sscreen, struct nir_shader *nir)
+void si_nir_adjust_driver_locations(struct nir_shader *nir)
 {
        /* Adjust the driver location of inputs and outputs. The state tracker
         * interprets them as slots, while the ac/nir backend interprets them
@@ -963,7 +959,14 @@ void si_lower_nir(struct si_screen *sscreen, struct nir_shader *nir)
                                variable->data.driver_location += 1;
                }
        }
+}
 
+/**
+ * Perform "lowering" operations on the NIR that are run once when the shader
+ * selector is created.
+ */
+static void si_lower_nir(struct si_screen *sscreen, struct nir_shader *nir)
+{
        /* Perform lowerings (and optimizations) of code.
         *
         * Performance considerations aside, we must:
@@ -990,14 +993,20 @@ void si_lower_nir(struct si_screen *sscreen, struct nir_shader *nir)
        /* Lower load constants to scalar and then clean up the mess */
        NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
        NIR_PASS_V(nir, nir_lower_var_copies);
+       NIR_PASS_V(nir, nir_lower_pack);
+       NIR_PASS_V(nir, nir_opt_access);
        si_nir_opts(nir);
 
        /* Lower large variables that are always constant with load_constant
         * intrinsics, which get turned into PC-relative loads from a data
         * section next to the shader.
+        *
+        * st/mesa calls finalize_nir twice, but we can't call this pass twice.
         */
-       NIR_PASS_V(nir, nir_opt_large_constants,
-                  glsl_get_natural_size_align_bytes, 16);
+       if (!nir->constant_data) {
+               NIR_PASS_V(nir, nir_opt_large_constants,
+                          glsl_get_natural_size_align_bytes, 16);
+       }
 
        ac_lower_indirect_derefs(nir, sscreen->info.chip_class);
 
@@ -1006,6 +1015,16 @@ void si_lower_nir(struct si_screen *sscreen, struct nir_shader *nir)
        NIR_PASS_V(nir, nir_lower_bool_to_int32);
 }
 
+void si_finalize_nir(struct pipe_screen *screen, void *nirptr, bool optimize)
+{
+       struct si_screen *sscreen = (struct si_screen *)screen;
+       struct nir_shader *nir = (struct nir_shader *)nirptr;
+
+       nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
+       si_nir_lower_ps_inputs(nir);
+       si_lower_nir(sscreen, nir);
+}
+
 static void declare_nir_input_vs(struct si_shader_context *ctx,
                                 struct nir_variable *variable,
                                 unsigned input_index,
index 04ff331444b3077552af5cb86c9a670570195870..e5e7d523cd44d7b2419a9f8ebe9b03d2bdd95ee9 100644 (file)
@@ -2472,9 +2472,6 @@ static void si_init_shader_selector_async(void *job, int thread_index)
        assert(thread_index < ARRAY_SIZE(sscreen->compiler));
        compiler = &sscreen->compiler[thread_index];
 
-       if (sel->nir)
-               si_lower_nir(sel->screen, sel->nir);
-
        /* Compile the main shader part for use with a prolog and/or epilog.
         * If this fails, the driver will try to compile a monolithic shader
         * on demand.
@@ -2715,10 +2712,9 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
                        sel->nir = state->ir.nir;
                }
 
-               si_nir_lower_ps_inputs(sel->nir);
-               si_nir_opts(sel->nir);
                si_nir_scan_shader(sel->nir, &sel->info);
                si_nir_scan_tess_ctrl(sel->nir, &sel->tcs_info);
+               si_nir_adjust_driver_locations(sel->nir);
        }
 
        sel->type = sel->info.processor;