radv: optimise compute shader grid size emission.
authorDave Airlie <airlied@redhat.com>
Mon, 17 Apr 2017 19:23:47 +0000 (05:23 +1000)
committerDave Airlie <airlied@redhat.com>
Tue, 18 Apr 2017 23:00:42 +0000 (09:00 +1000)
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Signed-off-by: Dave Airlie <airlied@redhat.com>
src/amd/common/ac_nir_to_llvm.c
src/amd/common/ac_shader_info.c
src/amd/common/ac_shader_info.h
src/amd/vulkan/radv_cmd_buffer.c

index 31b772f56691672fc4317c0f99aa8e7cdfd14a5b..f854356329321d70f9adc75ead1b1c2281426110 100644 (file)
@@ -604,7 +604,8 @@ static void create_function(struct nir_to_llvm_context *ctx)
 
        switch (ctx->stage) {
        case MESA_SHADER_COMPUTE:
-               arg_types[arg_idx++] = LLVMVectorType(ctx->i32, 3); /* grid size */
+               if (ctx->shader_info->info.cs.grid_components_used)
+                       arg_types[arg_idx++] = LLVMVectorType(ctx->i32, ctx->shader_info->info.cs.grid_components_used); /* grid size */
                user_sgpr_count = arg_idx;
                arg_types[arg_idx++] = LLVMVectorType(ctx->i32, 3);
                arg_types[arg_idx++] = ctx->i32;
@@ -762,10 +763,12 @@ static void create_function(struct nir_to_llvm_context *ctx)
 
        switch (ctx->stage) {
        case MESA_SHADER_COMPUTE:
-               set_userdata_location_shader(ctx, AC_UD_CS_GRID_SIZE, user_sgpr_idx, 3);
-               user_sgpr_idx += 3;
-               ctx->num_work_groups =
-                   LLVMGetParam(ctx->main_function, arg_idx++);
+               if (ctx->shader_info->info.cs.grid_components_used) {
+                       set_userdata_location_shader(ctx, AC_UD_CS_GRID_SIZE, user_sgpr_idx, ctx->shader_info->info.cs.grid_components_used);
+                       user_sgpr_idx += ctx->shader_info->info.cs.grid_components_used;
+                       ctx->num_work_groups =
+                               LLVMGetParam(ctx->main_function, arg_idx++);
+               }
                ctx->workgroup_ids =
                    LLVMGetParam(ctx->main_function, arg_idx++);
                ctx->tg_size =
index 6ad562505cf2e2510801000ad207a00021fc9ef8..ef3692563b2afac65ad4d5c0eb347fff6af762f7 100644 (file)
@@ -33,6 +33,9 @@ gather_intrinsic_info(nir_intrinsic_instr *instr, struct ac_shader_info *info)
        case nir_intrinsic_load_draw_id:
                info->vs.needs_draw_id = true;
                break;
+       case nir_intrinsic_load_num_work_groups:
+               info->cs.grid_components_used = instr->num_components;
+               break;
        default:
                break;
        }
index 7e2b6c885a6d51807fd88a64b1abae7e5afd9a2d..ed97d06d56bb1cd166f580d13cfc1ac538f4f416 100644 (file)
@@ -35,6 +35,9 @@ struct ac_shader_info {
        struct {
                bool needs_sample_positions;
        } ps;
+       struct {
+               uint8_t grid_components_used;
+       } cs;
 };
 
 void
index 52b74539068f424009aa971d7ce776254f68770d..7056d0d0266a32e6b93d58a30b34395ba9576cf8 100644 (file)
@@ -2781,11 +2781,14 @@ void radv_CmdDispatch(
                                                             MESA_SHADER_COMPUTE, AC_UD_CS_GRID_SIZE);
        if (loc->sgpr_idx != -1) {
                assert(!loc->indirect);
-               assert(loc->num_sgprs == 3);
-               radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4, 3);
+               uint8_t grid_used = cmd_buffer->state.pipeline->shaders[MESA_SHADER_COMPUTE]->info.info.cs.grid_components_used;
+               assert(loc->num_sgprs == grid_used);
+               radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4, grid_used);
                radeon_emit(cmd_buffer->cs, x);
-               radeon_emit(cmd_buffer->cs, y);
-               radeon_emit(cmd_buffer->cs, z);
+               if (grid_used > 1)
+                       radeon_emit(cmd_buffer->cs, y);
+               if (grid_used > 2)
+                       radeon_emit(cmd_buffer->cs, z);
        }
 
        radeon_emit(cmd_buffer->cs, PKT3(PKT3_DISPATCH_DIRECT, 3, 0) |
@@ -2817,7 +2820,8 @@ void radv_CmdDispatchIndirect(
        struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.compute_pipeline,
                                                             MESA_SHADER_COMPUTE, AC_UD_CS_GRID_SIZE);
        if (loc->sgpr_idx != -1) {
-               for (unsigned i = 0; i < 3; ++i) {
+               uint8_t grid_used = cmd_buffer->state.pipeline->shaders[MESA_SHADER_COMPUTE]->info.info.cs.grid_components_used;
+               for (unsigned i = 0; i < grid_used; ++i) {
                        radeon_emit(cmd_buffer->cs, PKT3(PKT3_COPY_DATA, 4, 0));
                        radeon_emit(cmd_buffer->cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) |
                                    COPY_DATA_DST_SEL(COPY_DATA_REG));
@@ -2888,10 +2892,13 @@ void radv_unaligned_dispatch(
        struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.compute_pipeline,
                                                             MESA_SHADER_COMPUTE, AC_UD_CS_GRID_SIZE);
        if (loc->sgpr_idx != -1) {
-               radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4, 3);
+               uint8_t grid_used = cmd_buffer->state.pipeline->shaders[MESA_SHADER_COMPUTE]->info.info.cs.grid_components_used;
+               radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4, grid_used);
                radeon_emit(cmd_buffer->cs, blocks[0]);
-               radeon_emit(cmd_buffer->cs, blocks[1]);
-               radeon_emit(cmd_buffer->cs, blocks[2]);
+               if (grid_used > 1)
+                       radeon_emit(cmd_buffer->cs, blocks[1]);
+               if (grid_used > 2)
+                       radeon_emit(cmd_buffer->cs, blocks[2]);
        }
        radeon_emit(cmd_buffer->cs, PKT3(PKT3_DISPATCH_DIRECT, 3, 0) |
                    PKT3_SHADER_TYPE_S(1));