radv: always emit all compute block components
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Thu, 14 Dec 2017 11:51:06 +0000 (12:51 +0100)
committerSamuel Pitoiset <samuel.pitoiset@gmail.com>
Thu, 14 Dec 2017 21:19:39 +0000 (22:19 +0100)
The number of grid components is always 3 when gl_NumWorkGroups
is declared, because it relies on the number of components of
nir_instrinsic_load_num_work_groups.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
src/amd/common/ac_nir_to_llvm.c
src/amd/vulkan/radv_cmd_buffer.c

index f3602a267de5cb1cd3045b42ec49fb52d8de6eb9..ce25e57eba76c9721371c1f4a7064963c0eb9d5f 100644 (file)
@@ -745,8 +745,10 @@ static void create_function(struct nir_to_llvm_context *ctx,
        switch (stage) {
        case MESA_SHADER_COMPUTE:
                radv_define_common_user_sgprs_phase1(ctx, stage, has_previous_stage, previous_stage, &user_sgpr_info, &args, &desc_sets);
-               if (ctx->shader_info->info.cs.grid_components_used)
-                       add_user_sgpr_argument(&args, LLVMVectorType(ctx->ac.i32, ctx->shader_info->info.cs.grid_components_used), &ctx->num_work_groups); /* grid size */
+               if (ctx->shader_info->info.cs.grid_components_used) {
+                       add_user_sgpr_argument(&args, ctx->ac.v3i32,
+                                              &ctx->num_work_groups);
+               }
                add_sgpr_argument(&args, ctx->ac.v3i32, &ctx->workgroup_ids);
                add_sgpr_argument(&args, ctx->ac.i32, &ctx->tg_size);
                add_vgpr_argument(&args, ctx->ac.v3i32, &ctx->local_invocation_ids);
@@ -950,7 +952,8 @@ static void create_function(struct nir_to_llvm_context *ctx,
        switch (stage) {
        case MESA_SHADER_COMPUTE:
                if (ctx->shader_info->info.cs.grid_components_used) {
-                       set_userdata_location_shader(ctx, AC_UD_CS_GRID_SIZE, &user_sgpr_idx, ctx->shader_info->info.cs.grid_components_used);
+                       set_userdata_location_shader(ctx, AC_UD_CS_GRID_SIZE,
+                                                    &user_sgpr_idx, 3);
                }
                break;
        case MESA_SHADER_VERTEX:
index 68371dbbe7c7c24b8326d498c5fc4a7bfeb36f49..e68c5a40388b01622ca712b7ed02baba2fa89847 100644 (file)
@@ -3487,9 +3487,6 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer,
        struct radeon_winsys_cs *cs = cmd_buffer->cs;
        struct ac_userdata_info *loc;
        unsigned dispatch_initiator;
-       uint8_t grid_used;
-
-       grid_used = compute_shader->info.info.cs.grid_components_used;
 
        loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_COMPUTE,
                                    AC_UD_CS_GRID_SIZE);
@@ -3514,7 +3511,7 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer,
                radv_cs_add_buffer(ws, cs, info->indirect->bo, 8);
 
                if (loc->sgpr_idx != -1) {
-                       for (unsigned i = 0; i < grid_used; ++i) {
+                       for (unsigned i = 0; i < 3; ++i) {
                                radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
                                radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) |
                                                COPY_DATA_DST_SEL(COPY_DATA_REG));
@@ -3581,15 +3578,13 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer,
 
                if (loc->sgpr_idx != -1) {
                        assert(!loc->indirect);
-                       assert(loc->num_sgprs == grid_used);
+                       assert(loc->num_sgprs == 3);
 
                        radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0 +
-                                                 loc->sgpr_idx * 4, grid_used);
+                                                 loc->sgpr_idx * 4, 3);
                        radeon_emit(cs, blocks[0]);
-                       if (grid_used > 1)
-                               radeon_emit(cs, blocks[1]);
-                       if (grid_used > 2)
-                               radeon_emit(cs, blocks[2]);
+                       radeon_emit(cs, blocks[1]);
+                       radeon_emit(cs, blocks[2]);
                }
 
                radeon_emit(cs, PKT3(PKT3_DISPATCH_DIRECT, 3, 0) |