intel/cs: Grow prog_data::param on-demand for thread_local_id_index
authorJason Ekstrand <jason.ekstrand@intel.com>
Fri, 29 Sep 2017 17:37:40 +0000 (10:37 -0700)
committerJason Ekstrand <jason.ekstrand@intel.com>
Fri, 13 Oct 2017 05:39:30 +0000 (22:39 -0700)
Instead of making the caller of brw_compile_cs add something to the
param array for thread_local_id_index, just add it on-demand in
brw_nir_intrinsics and grow the array.  This is now safe to do because
everyone is now using ralloc for prog_data::param.

Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/intel/compiler/brw_fs.cpp
src/intel/compiler/brw_nir_lower_cs_intrinsics.c
src/intel/vulkan/anv_pipeline.c
src/mesa/drivers/dri/i965/brw_cs.c

index 371df71055443004a9a660dc76e29255e921b9bc..c08e28d166ab423c525a04b52b65db2557a85ac5 100644 (file)
@@ -6758,14 +6758,6 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data,
    nir_shader *shader = nir_shader_clone(mem_ctx, src_shader);
    shader = brw_nir_apply_sampler_key(shader, compiler, &key->tex, true);
 
-   /* Now that we cloned the nir_shader, we can update num_uniforms based on
-    * the thread_local_id_index.
-    */
-   assert(prog_data->thread_local_id_index >= 0);
-   shader->num_uniforms =
-      MAX2(shader->num_uniforms,
-           (unsigned)4 * (prog_data->thread_local_id_index + 1));
-
    brw_nir_lower_cs_intrinsics(shader, prog_data);
    shader = brw_postprocess_nir(shader, compiler, true);
 
index 602ef2e17498151cd8e14cc498a64d3284a46045..01718eb5dd1b0f02366a41671220da844a30cea8 100644 (file)
@@ -30,12 +30,12 @@ struct lower_intrinsics_state {
    nir_function_impl *impl;
    bool progress;
    nir_builder builder;
-   bool cs_thread_id_used;
 };
 
 static nir_ssa_def *
 read_thread_local_id(struct lower_intrinsics_state *state)
 {
+   struct brw_cs_prog_data *prog_data = state->prog_data;
    nir_builder *b = &state->builder;
    nir_shader *nir = state->nir;
    const unsigned *sizes = nir->info.cs.local_size;
@@ -47,9 +47,12 @@ read_thread_local_id(struct lower_intrinsics_state *state)
    if (group_size <= 8)
       return nir_imm_int(b, 0);
 
-   assert(state->prog_data->thread_local_id_index >= 0);
-   state->cs_thread_id_used = true;
-   const int id_index = state->prog_data->thread_local_id_index;
+   if (prog_data->thread_local_id_index == -1) {
+      prog_data->thread_local_id_index = prog_data->base.nr_params;
+      brw_stage_prog_data_add_params(&prog_data->base, 1);
+      nir->num_uniforms += 4;
+   }
+   unsigned id_index = prog_data->thread_local_id_index;
 
    nir_intrinsic_instr *load =
       nir_intrinsic_instr_create(nir, nir_intrinsic_load_uniform);
@@ -162,6 +165,8 @@ brw_nir_lower_cs_intrinsics(nir_shader *nir,
    state.nir = nir;
    state.prog_data = prog_data;
 
+   state.prog_data->thread_local_id_index = -1;
+
    do {
       state.progress = false;
       nir_foreach_function(function, nir) {
@@ -173,8 +178,5 @@ brw_nir_lower_cs_intrinsics(nir_shader *nir,
       progress |= state.progress;
    } while (state.progress);
 
-   if (!state.cs_thread_id_used)
-      state.prog_data->thread_local_id_index = -1;
-
    return progress;
 }
index 1e81edd390cbfa43c21d01a1c5fba26ad3a1d0b0..148d080e9c31461638ad983b83469cecdbb5d9d4 100644 (file)
@@ -409,10 +409,6 @@ anv_pipeline_compile(struct anv_pipeline *pipeline,
       pipeline->needs_data_cache = true;
    }
 
-   if (stage == MESA_SHADER_COMPUTE)
-      ((struct brw_cs_prog_data *)prog_data)->thread_local_id_index =
-         prog_data->nr_params++; /* The CS Thread ID uniform */
-
    if (nir->info.num_ssbos > 0)
       pipeline->needs_data_cache = true;
 
index 41a543138ba49cb3ec8f39456c267e77b9f8cce3..dacb25e5eeafdd2c8aaf1dd370fd4f7de23af875 100644 (file)
@@ -83,9 +83,6 @@ brw_codegen_cs_prog(struct brw_context *brw,
     */
    int param_count = cp->program.nir->num_uniforms / 4;
 
-   /* The backend also sometimes add a param for the thread local id. */
-   prog_data.thread_local_id_index = param_count++;
-
    prog_data.base.param = rzalloc_array(NULL, uint32_t, param_count);
    prog_data.base.pull_param = rzalloc_array(NULL, uint32_t, param_count);
    prog_data.base.nr_params = param_count;