From: Marek Olšák Date: Fri, 27 Sep 2019 00:24:17 +0000 (-0400) Subject: radeonsi/nir: implement pipe_screen::finalize_nir X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=fff884e09d2f1f560a1106d9b6f70eabec84b0bc;p=mesa.git radeonsi/nir: implement pipe_screen::finalize_nir Reviewed-by: Kenneth Graunke --- diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index ba8271d3fe3..19d4cca0dba 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -126,9 +126,7 @@ static void si_create_compute_state_async(void *job, int thread_index) } else { assert(program->ir_type == PIPE_SHADER_IR_NIR); - si_nir_opts(sel->nir); si_nir_scan_shader(sel->nir, &sel->info); - si_lower_nir(sel->screen, sel->nir); } /* Store the declared LDS size into tgsi_shader_info for the shader diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index d9cd4ff92e9..1b7fdf23c60 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -977,6 +977,7 @@ radeonsi_screen_create_impl(struct radeon_winsys *ws, si_set_max_shader_compiler_threads; sscreen->b.is_parallel_shader_compilation_finished = si_is_parallel_shader_compilation_finished; + sscreen->b.finalize_nir = si_finalize_nir; si_init_screen_get_functions(sscreen); si_init_screen_buffer_functions(sscreen); diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index cb8d6dbcced..1d41b7aa042 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -756,9 +756,8 @@ void si_nir_scan_shader(const struct nir_shader *nir, struct tgsi_shader_info *info); void si_nir_scan_tess_ctrl(const struct nir_shader *nir, struct tgsi_tessctrl_info *out); -void si_nir_lower_ps_inputs(struct nir_shader *nir); -void si_lower_nir(struct si_screen *sscreen, struct nir_shader *nir); -void si_nir_opts(struct nir_shader *nir); +void si_nir_adjust_driver_locations(struct nir_shader *nir); +void si_finalize_nir(struct pipe_screen *screen, void *nirptr, bool optimize); /* si_state_shaders.c */ void gfx9_get_gs_info(struct si_shader_selector *es, diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c index aa82a7bd371..4df625ed274 100644 --- a/src/gallium/drivers/radeonsi/si_shader_nir.c +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c @@ -801,7 +801,7 @@ void si_nir_scan_shader(const struct nir_shader *nir, } } -void +static void si_nir_opts(struct nir_shader *nir) { bool progress; @@ -913,7 +913,7 @@ si_nir_lower_color(nir_shader *nir) } } -void si_nir_lower_ps_inputs(struct nir_shader *nir) +static void si_nir_lower_ps_inputs(struct nir_shader *nir) { if (nir->info.stage != MESA_SHADER_FRAGMENT) return; @@ -938,11 +938,7 @@ void si_nir_lower_ps_inputs(struct nir_shader *nir) nir_var_shader_in); } -/** - * Perform "lowering" operations on the NIR that are run once when the shader - * selector is created. - */ -void si_lower_nir(struct si_screen *sscreen, struct nir_shader *nir) +void si_nir_adjust_driver_locations(struct nir_shader *nir) { /* Adjust the driver location of inputs and outputs. The state tracker * interprets them as slots, while the ac/nir backend interprets them @@ -963,7 +959,14 @@ void si_lower_nir(struct si_screen *sscreen, struct nir_shader *nir) variable->data.driver_location += 1; } } +} +/** + * Perform "lowering" operations on the NIR that are run once when the shader + * selector is created. + */ +static void si_lower_nir(struct si_screen *sscreen, struct nir_shader *nir) +{ /* Perform lowerings (and optimizations) of code. * * Performance considerations aside, we must: @@ -990,14 +993,20 @@ void si_lower_nir(struct si_screen *sscreen, struct nir_shader *nir) /* Lower load constants to scalar and then clean up the mess */ NIR_PASS_V(nir, nir_lower_load_const_to_scalar); NIR_PASS_V(nir, nir_lower_var_copies); + NIR_PASS_V(nir, nir_lower_pack); + NIR_PASS_V(nir, nir_opt_access); si_nir_opts(nir); /* Lower large variables that are always constant with load_constant * intrinsics, which get turned into PC-relative loads from a data * section next to the shader. + * + * st/mesa calls finalize_nir twice, but we can't call this pass twice. */ - NIR_PASS_V(nir, nir_opt_large_constants, - glsl_get_natural_size_align_bytes, 16); + if (!nir->constant_data) { + NIR_PASS_V(nir, nir_opt_large_constants, + glsl_get_natural_size_align_bytes, 16); + } ac_lower_indirect_derefs(nir, sscreen->info.chip_class); @@ -1006,6 +1015,16 @@ void si_lower_nir(struct si_screen *sscreen, struct nir_shader *nir) NIR_PASS_V(nir, nir_lower_bool_to_int32); } +void si_finalize_nir(struct pipe_screen *screen, void *nirptr, bool optimize) +{ + struct si_screen *sscreen = (struct si_screen *)screen; + struct nir_shader *nir = (struct nir_shader *)nirptr; + + nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); + si_nir_lower_ps_inputs(nir); + si_lower_nir(sscreen, nir); +} + static void declare_nir_input_vs(struct si_shader_context *ctx, struct nir_variable *variable, unsigned input_index, diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 04ff331444b..e5e7d523cd4 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -2472,9 +2472,6 @@ static void si_init_shader_selector_async(void *job, int thread_index) assert(thread_index < ARRAY_SIZE(sscreen->compiler)); compiler = &sscreen->compiler[thread_index]; - if (sel->nir) - si_lower_nir(sel->screen, sel->nir); - /* Compile the main shader part for use with a prolog and/or epilog. * If this fails, the driver will try to compile a monolithic shader * on demand. @@ -2715,10 +2712,9 @@ static void *si_create_shader_selector(struct pipe_context *ctx, sel->nir = state->ir.nir; } - si_nir_lower_ps_inputs(sel->nir); - si_nir_opts(sel->nir); si_nir_scan_shader(sel->nir, &sel->info); si_nir_scan_tess_ctrl(sel->nir, &sel->tcs_info); + si_nir_adjust_driver_locations(sel->nir); } sel->type = sel->info.processor;