From 79d403417cacd2728916e32ae55f4fc2a018515c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 29 Sep 2017 12:22:48 -0700 Subject: [PATCH] intel/cs: Make thread_local_id a regular builtin param This is a lot more natural than special casing it all over the place. We still have to do a bit of special-casing in assign_constant_locations but it's not special-cased quite as bad as it was before. Reviewed-by: Jordan Justen Reviewed-by: Kenneth Graunke --- src/intel/compiler/brw_compiler.h | 3 +- src/intel/compiler/brw_fs.cpp | 42 +++++++++---------- .../compiler/brw_nir_lower_cs_intrinsics.c | 12 +++--- src/intel/vulkan/anv_cmd_buffer.c | 10 ++--- .../drivers/dri/i965/gen6_constant_state.c | 10 ++--- 5 files changed, 37 insertions(+), 40 deletions(-) diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h index e2046581278..014202d36ca 100644 --- a/src/intel/compiler/brw_compiler.h +++ b/src/intel/compiler/brw_compiler.h @@ -542,6 +542,8 @@ enum brw_param_builtin { BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_W, BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X, BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_Y, + + BRW_PARAM_BUILTIN_THREAD_LOCAL_ID, }; #define BRW_PARAM_BUILTIN_CLIP_PLANE(idx, comp) \ @@ -738,7 +740,6 @@ struct brw_cs_prog_data { unsigned threads; bool uses_barrier; bool uses_num_work_groups; - int thread_local_id_index; struct { struct brw_push_const_block cross_thread; diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index f9e7385afdf..66cb33131b6 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -1935,6 +1935,20 @@ set_push_pull_constant_loc(unsigned uniform, int *chunk_start, } } +static int +get_thread_local_id_param_index(const brw_stage_prog_data *prog_data) +{ + if (prog_data->nr_params == 0) + return -1; + + /* The local thread id is always the last parameter in the list */ + uint32_t last_param = prog_data->param[prog_data->nr_params - 1]; + if (last_param == BRW_PARAM_BUILTIN_THREAD_LOCAL_ID) + return prog_data->nr_params - 1; + + return -1; +} + /** * Assign UNIFORM file registers to either push constants or pull constants. * @@ -1963,10 +1977,6 @@ fs_visitor::assign_constant_locations() bool contiguous[uniforms]; memset(contiguous, 0, sizeof(contiguous)); - int thread_local_id_index = - (stage == MESA_SHADER_COMPUTE) ? - brw_cs_prog_data(stage_prog_data)->thread_local_id_index : -1; - /* First, we walk through the instructions and do two things: * * 1) Figure out which uniforms are live. @@ -2009,8 +2019,7 @@ fs_visitor::assign_constant_locations() } } - if (thread_local_id_index >= 0 && !is_live[thread_local_id_index]) - thread_local_id_index = -1; + int thread_local_id_index = get_thread_local_id_param_index(stage_prog_data); /* Only allow 16 registers (128 uniform components) as push constants. * @@ -2118,22 +2127,15 @@ fs_visitor::assign_constant_locations() * push_constant_loc[i] <= i and we can do it in one smooth loop without * having to make a copy. */ - int new_thread_local_id_index = -1; for (unsigned int i = 0; i < uniforms; i++) { uint32_t value = param[i]; if (pull_constant_loc[i] != -1) { stage_prog_data->pull_param[pull_constant_loc[i]] = value; } else if (push_constant_loc[i] != -1) { stage_prog_data->param[push_constant_loc[i]] = value; - if (thread_local_id_index == (int)i) - new_thread_local_id_index = push_constant_loc[i]; } } ralloc_free(param); - - if (stage == MESA_SHADER_COMPUTE) - brw_cs_prog_data(stage_prog_data)->thread_local_id_index = - new_thread_local_id_index; } bool @@ -6698,24 +6700,20 @@ cs_fill_push_const_info(const struct gen_device_info *devinfo, struct brw_cs_prog_data *cs_prog_data) { const struct brw_stage_prog_data *prog_data = &cs_prog_data->base; - bool fill_thread_id = - cs_prog_data->thread_local_id_index >= 0 && - cs_prog_data->thread_local_id_index < (int)prog_data->nr_params; + int thread_local_id_index = get_thread_local_id_param_index(prog_data); bool cross_thread_supported = devinfo->gen > 7 || devinfo->is_haswell; /* The thread ID should be stored in the last param dword */ - assert(prog_data->nr_params > 0 || !fill_thread_id); - assert(!fill_thread_id || - cs_prog_data->thread_local_id_index == - (int)prog_data->nr_params - 1); + assert(thread_local_id_index == -1 || + thread_local_id_index == (int)prog_data->nr_params - 1); unsigned cross_thread_dwords, per_thread_dwords; if (!cross_thread_supported) { cross_thread_dwords = 0u; per_thread_dwords = prog_data->nr_params; - } else if (fill_thread_id) { + } else if (thread_local_id_index >= 0) { /* Fill all but the last register with cross-thread payload */ - cross_thread_dwords = 8 * (cs_prog_data->thread_local_id_index / 8); + cross_thread_dwords = 8 * (thread_local_id_index / 8); per_thread_dwords = prog_data->nr_params - cross_thread_dwords; assert(per_thread_dwords > 0 && per_thread_dwords <= 8); } else { diff --git a/src/intel/compiler/brw_nir_lower_cs_intrinsics.c b/src/intel/compiler/brw_nir_lower_cs_intrinsics.c index 01718eb5dd1..9b4a0fdf2eb 100644 --- a/src/intel/compiler/brw_nir_lower_cs_intrinsics.c +++ b/src/intel/compiler/brw_nir_lower_cs_intrinsics.c @@ -30,6 +30,7 @@ struct lower_intrinsics_state { nir_function_impl *impl; bool progress; nir_builder builder; + int thread_local_id_index; }; static nir_ssa_def * @@ -47,12 +48,13 @@ read_thread_local_id(struct lower_intrinsics_state *state) if (group_size <= 8) return nir_imm_int(b, 0); - if (prog_data->thread_local_id_index == -1) { - prog_data->thread_local_id_index = prog_data->base.nr_params; - brw_stage_prog_data_add_params(&prog_data->base, 1); + if (state->thread_local_id_index == -1) { + state->thread_local_id_index = prog_data->base.nr_params; + uint32_t *param = brw_stage_prog_data_add_params(&prog_data->base, 1); + *param = BRW_PARAM_BUILTIN_THREAD_LOCAL_ID; nir->num_uniforms += 4; } - unsigned id_index = prog_data->thread_local_id_index; + unsigned id_index = state->thread_local_id_index; nir_intrinsic_instr *load = nir_intrinsic_instr_create(nir, nir_intrinsic_load_uniform); @@ -165,7 +167,7 @@ brw_nir_lower_cs_intrinsics(nir_shader *nir, state.nir = nir; state.prog_data = prog_data; - state.prog_data->thread_local_id_index = -1; + state.thread_local_id_index = -1; do { state.progress = false; diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c index 64d1417f5b1..b45f8f83757 100644 --- a/src/intel/vulkan/anv_cmd_buffer.c +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -707,12 +707,10 @@ anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer) uint32_t *u32_map = state.map; if (cs_prog_data->push.cross_thread.size > 0) { - assert(cs_prog_data->thread_local_id_index < 0 || - cs_prog_data->thread_local_id_index >= - cs_prog_data->push.cross_thread.dwords); for (unsigned i = 0; i < cs_prog_data->push.cross_thread.dwords; i++) { + assert(prog_data->param[i] != BRW_PARAM_BUILTIN_THREAD_LOCAL_ID); u32_map[i] = anv_push_constant_value(data, prog_data->param[i]); } } @@ -724,11 +722,11 @@ anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer) cs_prog_data->push.cross_thread.regs); unsigned src = cs_prog_data->push.cross_thread.dwords; for ( ; src < prog_data->nr_params; src++, dst++) { - if (src != cs_prog_data->thread_local_id_index) { + if (prog_data->param[src] == BRW_PARAM_BUILTIN_THREAD_LOCAL_ID) { + u32_map[dst] = t * cs_prog_data->simd_size; + } else { u32_map[dst] = anv_push_constant_value(data, prog_data->param[src]); - } else { - u32_map[dst] = t * cs_prog_data->simd_size; } } } diff --git a/src/mesa/drivers/dri/i965/gen6_constant_state.c b/src/mesa/drivers/dri/i965/gen6_constant_state.c index eb9e29187c4..62ad6b04976 100644 --- a/src/mesa/drivers/dri/i965/gen6_constant_state.c +++ b/src/mesa/drivers/dri/i965/gen6_constant_state.c @@ -305,12 +305,10 @@ brw_upload_cs_push_constants(struct brw_context *brw, if (cs_prog_data->push.cross_thread.size > 0) { uint32_t *param_copy = param; - assert(cs_prog_data->thread_local_id_index < 0 || - cs_prog_data->thread_local_id_index >= - cs_prog_data->push.cross_thread.dwords); for (unsigned i = 0; i < cs_prog_data->push.cross_thread.dwords; i++) { + assert(prog_data->param[i] != BRW_PARAM_BUILTIN_THREAD_LOCAL_ID); param_copy[i] = brw_param_value(brw, prog, stage_state, prog_data->param[i]); } @@ -323,11 +321,11 @@ brw_upload_cs_push_constants(struct brw_context *brw, cs_prog_data->push.cross_thread.regs); unsigned src = cs_prog_data->push.cross_thread.dwords; for ( ; src < prog_data->nr_params; src++, dst++) { - if (src != cs_prog_data->thread_local_id_index) { + if (prog_data->param[src] == BRW_PARAM_BUILTIN_THREAD_LOCAL_ID) { + param[dst] = t * cs_prog_data->simd_size; + } else { param[dst] = brw_param_value(brw, prog, stage_state, prog_data->param[src]); - } else { - param[dst] = t * cs_prog_data->simd_size; } } } -- 2.30.2