From ec15e0d3015ccef4d6c60bd282cfb848118557ae Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 18 Apr 2017 05:23:47 +1000 Subject: [PATCH] radv: optimise compute shader grid size emission. Reviewed-by: Bas Nieuwenhuizen Signed-off-by: Dave Airlie --- src/amd/common/ac_nir_to_llvm.c | 13 ++++++++----- src/amd/common/ac_shader_info.c | 3 +++ src/amd/common/ac_shader_info.h | 3 +++ src/amd/vulkan/radv_cmd_buffer.c | 23 +++++++++++++++-------- 4 files changed, 29 insertions(+), 13 deletions(-) diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 31b772f5669..f8543563293 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -604,7 +604,8 @@ static void create_function(struct nir_to_llvm_context *ctx) switch (ctx->stage) { case MESA_SHADER_COMPUTE: - arg_types[arg_idx++] = LLVMVectorType(ctx->i32, 3); /* grid size */ + if (ctx->shader_info->info.cs.grid_components_used) + arg_types[arg_idx++] = LLVMVectorType(ctx->i32, ctx->shader_info->info.cs.grid_components_used); /* grid size */ user_sgpr_count = arg_idx; arg_types[arg_idx++] = LLVMVectorType(ctx->i32, 3); arg_types[arg_idx++] = ctx->i32; @@ -762,10 +763,12 @@ static void create_function(struct nir_to_llvm_context *ctx) switch (ctx->stage) { case MESA_SHADER_COMPUTE: - set_userdata_location_shader(ctx, AC_UD_CS_GRID_SIZE, user_sgpr_idx, 3); - user_sgpr_idx += 3; - ctx->num_work_groups = - LLVMGetParam(ctx->main_function, arg_idx++); + if (ctx->shader_info->info.cs.grid_components_used) { + set_userdata_location_shader(ctx, AC_UD_CS_GRID_SIZE, user_sgpr_idx, ctx->shader_info->info.cs.grid_components_used); + user_sgpr_idx += ctx->shader_info->info.cs.grid_components_used; + ctx->num_work_groups = + LLVMGetParam(ctx->main_function, arg_idx++); + } ctx->workgroup_ids = LLVMGetParam(ctx->main_function, arg_idx++); ctx->tg_size = diff --git a/src/amd/common/ac_shader_info.c b/src/amd/common/ac_shader_info.c index 6ad562505cf..ef3692563b2 100644 --- a/src/amd/common/ac_shader_info.c +++ b/src/amd/common/ac_shader_info.c @@ -33,6 +33,9 @@ gather_intrinsic_info(nir_intrinsic_instr *instr, struct ac_shader_info *info) case nir_intrinsic_load_draw_id: info->vs.needs_draw_id = true; break; + case nir_intrinsic_load_num_work_groups: + info->cs.grid_components_used = instr->num_components; + break; default: break; } diff --git a/src/amd/common/ac_shader_info.h b/src/amd/common/ac_shader_info.h index 7e2b6c885a6..ed97d06d56b 100644 --- a/src/amd/common/ac_shader_info.h +++ b/src/amd/common/ac_shader_info.h @@ -35,6 +35,9 @@ struct ac_shader_info { struct { bool needs_sample_positions; } ps; + struct { + uint8_t grid_components_used; + } cs; }; void diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 52b74539068..7056d0d0266 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -2781,11 +2781,14 @@ void radv_CmdDispatch( MESA_SHADER_COMPUTE, AC_UD_CS_GRID_SIZE); if (loc->sgpr_idx != -1) { assert(!loc->indirect); - assert(loc->num_sgprs == 3); - radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4, 3); + uint8_t grid_used = cmd_buffer->state.pipeline->shaders[MESA_SHADER_COMPUTE]->info.info.cs.grid_components_used; + assert(loc->num_sgprs == grid_used); + radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4, grid_used); radeon_emit(cmd_buffer->cs, x); - radeon_emit(cmd_buffer->cs, y); - radeon_emit(cmd_buffer->cs, z); + if (grid_used > 1) + radeon_emit(cmd_buffer->cs, y); + if (grid_used > 2) + radeon_emit(cmd_buffer->cs, z); } radeon_emit(cmd_buffer->cs, PKT3(PKT3_DISPATCH_DIRECT, 3, 0) | @@ -2817,7 +2820,8 @@ void radv_CmdDispatchIndirect( struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.compute_pipeline, MESA_SHADER_COMPUTE, AC_UD_CS_GRID_SIZE); if (loc->sgpr_idx != -1) { - for (unsigned i = 0; i < 3; ++i) { + uint8_t grid_used = cmd_buffer->state.pipeline->shaders[MESA_SHADER_COMPUTE]->info.info.cs.grid_components_used; + for (unsigned i = 0; i < grid_used; ++i) { radeon_emit(cmd_buffer->cs, PKT3(PKT3_COPY_DATA, 4, 0)); radeon_emit(cmd_buffer->cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) | COPY_DATA_DST_SEL(COPY_DATA_REG)); @@ -2888,10 +2892,13 @@ void radv_unaligned_dispatch( struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.compute_pipeline, MESA_SHADER_COMPUTE, AC_UD_CS_GRID_SIZE); if (loc->sgpr_idx != -1) { - radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4, 3); + uint8_t grid_used = cmd_buffer->state.pipeline->shaders[MESA_SHADER_COMPUTE]->info.info.cs.grid_components_used; + radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4, grid_used); radeon_emit(cmd_buffer->cs, blocks[0]); - radeon_emit(cmd_buffer->cs, blocks[1]); - radeon_emit(cmd_buffer->cs, blocks[2]); + if (grid_used > 1) + radeon_emit(cmd_buffer->cs, blocks[1]); + if (grid_used > 2) + radeon_emit(cmd_buffer->cs, blocks[2]); } radeon_emit(cmd_buffer->cs, PKT3(PKT3_DISPATCH_DIRECT, 3, 0) | PKT3_SHADER_TYPE_S(1)); -- 2.30.2