From ed29b576cbc1da8eb0d1fa3483104bbf61e73ccf Mon Sep 17 00:00:00 2001 From: Neil Roberts Date: Wed, 17 Jun 2020 15:18:06 +0200 Subject: [PATCH] nir/scheduler: Add an option to specify what stages share memory for I/O The scheduler has code to handle hardware that shares the same memory for inputs and outputs. Seeing as the specific stages that need this is probably hardware-dependent, this patch makes it a configurable option instead of hard-coding it to everything but fragment shaders. Reviewed-by: Eric Anholt Reviewed-by: Iago Toral Quiroga Part-of: --- src/broadcom/compiler/vir.c | 18 ++++++++++++++---- src/compiler/nir/nir.h | 15 ++++++++++++++- src/compiler/nir/nir_schedule.c | 22 +++++++++++++++------- 3 files changed, 43 insertions(+), 12 deletions(-) diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c index adfd587d534..4c9e99cfd77 100644 --- a/src/broadcom/compiler/vir.c +++ b/src/broadcom/compiler/vir.c @@ -1072,10 +1072,20 @@ uint64_t *v3d_compile(const struct v3d_compiler *compiler, NIR_PASS_V(c->s, nir_lower_bool_to_int32); NIR_PASS_V(c->s, nir_convert_from_ssa, true); - /* Schedule for about half our register space, to enable more shaders - * to hit 4 threads. - */ - NIR_PASS_V(c->s, nir_schedule, 24); + static const struct nir_schedule_options schedule_options = { + /* Schedule for about half our register space, to enable more + * shaders to hit 4 threads. + */ + .threshold = 24, + + /* Vertex shaders share the same memory for inputs and outputs, + * fragement and geometry shaders do not. + */ + .stages_with_shared_io_memory = + (((1 << MESA_ALL_SHADER_STAGES) - 1) & + ~(1 << MESA_SHADER_FRAGMENT)), + }; + NIR_PASS_V(c->s, nir_schedule, &schedule_options); v3d_nir_to_vir(c); diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index b0316df4ffa..463e5468fc2 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -4579,7 +4579,20 @@ bool nir_opt_load_store_vectorize(nir_shader *shader, nir_variable_mode modes, nir_should_vectorize_mem_func callback, nir_variable_mode robust_modes); -void nir_schedule(nir_shader *shader, int threshold); +typedef struct nir_schedule_options { + /* On some hardware with some stages the inputs and outputs to the shader + * share the same memory. In that case scheduler needs to ensure that all + * output writes are scheduled after all of the input writes to avoid + * overwriting them. This is a bitmask of stages that need that. + */ + unsigned stages_with_shared_io_memory; + /* The approximate amount of register pressure at which point the scheduler + * will try to reduce register usage. + */ + int threshold; +} nir_schedule_options; + +void nir_schedule(nir_shader *shader, const nir_schedule_options *options); void nir_strip(nir_shader *shader); diff --git a/src/compiler/nir/nir_schedule.c b/src/compiler/nir/nir_schedule.c index 469ec95c946..9fa02547dc1 100644 --- a/src/compiler/nir/nir_schedule.c +++ b/src/compiler/nir/nir_schedule.c @@ -114,6 +114,9 @@ typedef struct { * pressure-prioritizing scheduling heuristic. */ int threshold; + + /* Mask of stages that share memory for inputs and outputs */ + unsigned stages_with_shared_io_memory; } nir_schedule_scoreboard; /* When walking the instructions in reverse, we use this flag to swap @@ -323,10 +326,11 @@ nir_schedule_intrinsic_deps(nir_deps_state *state, break; case nir_intrinsic_store_output: - /* For some non-FS shader stages, or for some hardware, output stores - * affect the same shared memory as input loads. + /* For some hardware and stages, output stores affect the same shared + * memory as input loads. */ - if (state->scoreboard->shader->info.stage != MESA_SHADER_FRAGMENT) + if ((state->scoreboard->stages_with_shared_io_memory & + (1 << state->scoreboard->shader->info.stage))) add_write_dep(state, &state->load_input, n); /* Make sure that preceding discards stay before the store_output */ @@ -979,14 +983,17 @@ nir_schedule_ssa_def_init_scoreboard(nir_ssa_def *def, void *state) } static nir_schedule_scoreboard * -nir_schedule_get_scoreboard(nir_shader *shader, int threshold) +nir_schedule_get_scoreboard(nir_shader *shader, + const nir_schedule_options *options) { nir_schedule_scoreboard *scoreboard = rzalloc(NULL, nir_schedule_scoreboard); scoreboard->shader = shader; scoreboard->live_values = _mesa_pointer_set_create(scoreboard); scoreboard->remaining_uses = _mesa_pointer_hash_table_create(scoreboard); - scoreboard->threshold = threshold; + scoreboard->threshold = options->threshold; + scoreboard->stages_with_shared_io_memory = + options->stages_with_shared_io_memory; scoreboard->pressure = 0; nir_foreach_function(function, shader) { @@ -1063,10 +1070,11 @@ nir_schedule_validate_uses(nir_schedule_scoreboard *scoreboard) * tune. */ void -nir_schedule(nir_shader *shader, int threshold) +nir_schedule(nir_shader *shader, + const nir_schedule_options *options) { nir_schedule_scoreboard *scoreboard = nir_schedule_get_scoreboard(shader, - threshold); + options); if (debug) { fprintf(stderr, "NIR shader before scheduling:\n"); -- 2.30.2