From: Jason Ekstrand Date: Fri, 29 Sep 2017 17:37:40 +0000 (-0700) Subject: intel/cs: Grow prog_data::param on-demand for thread_local_id_index X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=6bcc5c0c75226bb89f35d7529de11182051c729e;p=mesa.git intel/cs: Grow prog_data::param on-demand for thread_local_id_index Instead of making the caller of brw_compile_cs add something to the param array for thread_local_id_index, just add it on-demand in brw_nir_intrinsics and grow the array. This is now safe to do because everyone is now using ralloc for prog_data::param. Reviewed-by: Jordan Justen Reviewed-by: Kenneth Graunke --- diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 371df710554..c08e28d166a 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -6758,14 +6758,6 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data, nir_shader *shader = nir_shader_clone(mem_ctx, src_shader); shader = brw_nir_apply_sampler_key(shader, compiler, &key->tex, true); - /* Now that we cloned the nir_shader, we can update num_uniforms based on - * the thread_local_id_index. - */ - assert(prog_data->thread_local_id_index >= 0); - shader->num_uniforms = - MAX2(shader->num_uniforms, - (unsigned)4 * (prog_data->thread_local_id_index + 1)); - brw_nir_lower_cs_intrinsics(shader, prog_data); shader = brw_postprocess_nir(shader, compiler, true); diff --git a/src/intel/compiler/brw_nir_lower_cs_intrinsics.c b/src/intel/compiler/brw_nir_lower_cs_intrinsics.c index 602ef2e1749..01718eb5dd1 100644 --- a/src/intel/compiler/brw_nir_lower_cs_intrinsics.c +++ b/src/intel/compiler/brw_nir_lower_cs_intrinsics.c @@ -30,12 +30,12 @@ struct lower_intrinsics_state { nir_function_impl *impl; bool progress; nir_builder builder; - bool cs_thread_id_used; }; static nir_ssa_def * read_thread_local_id(struct lower_intrinsics_state *state) { + struct brw_cs_prog_data *prog_data = state->prog_data; nir_builder *b = &state->builder; nir_shader *nir = state->nir; const unsigned *sizes = nir->info.cs.local_size; @@ -47,9 +47,12 @@ read_thread_local_id(struct lower_intrinsics_state *state) if (group_size <= 8) return nir_imm_int(b, 0); - assert(state->prog_data->thread_local_id_index >= 0); - state->cs_thread_id_used = true; - const int id_index = state->prog_data->thread_local_id_index; + if (prog_data->thread_local_id_index == -1) { + prog_data->thread_local_id_index = prog_data->base.nr_params; + brw_stage_prog_data_add_params(&prog_data->base, 1); + nir->num_uniforms += 4; + } + unsigned id_index = prog_data->thread_local_id_index; nir_intrinsic_instr *load = nir_intrinsic_instr_create(nir, nir_intrinsic_load_uniform); @@ -162,6 +165,8 @@ brw_nir_lower_cs_intrinsics(nir_shader *nir, state.nir = nir; state.prog_data = prog_data; + state.prog_data->thread_local_id_index = -1; + do { state.progress = false; nir_foreach_function(function, nir) { @@ -173,8 +178,5 @@ brw_nir_lower_cs_intrinsics(nir_shader *nir, progress |= state.progress; } while (state.progress); - if (!state.cs_thread_id_used) - state.prog_data->thread_local_id_index = -1; - return progress; } diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 1e81edd390c..148d080e9c3 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -409,10 +409,6 @@ anv_pipeline_compile(struct anv_pipeline *pipeline, pipeline->needs_data_cache = true; } - if (stage == MESA_SHADER_COMPUTE) - ((struct brw_cs_prog_data *)prog_data)->thread_local_id_index = - prog_data->nr_params++; /* The CS Thread ID uniform */ - if (nir->info.num_ssbos > 0) pipeline->needs_data_cache = true; diff --git a/src/mesa/drivers/dri/i965/brw_cs.c b/src/mesa/drivers/dri/i965/brw_cs.c index 41a543138ba..dacb25e5eea 100644 --- a/src/mesa/drivers/dri/i965/brw_cs.c +++ b/src/mesa/drivers/dri/i965/brw_cs.c @@ -83,9 +83,6 @@ brw_codegen_cs_prog(struct brw_context *brw, */ int param_count = cp->program.nir->num_uniforms / 4; - /* The backend also sometimes add a param for the thread local id. */ - prog_data.thread_local_id_index = param_count++; - prog_data.base.param = rzalloc_array(NULL, uint32_t, param_count); prog_data.base.pull_param = rzalloc_array(NULL, uint32_t, param_count); prog_data.base.nr_params = param_count;