radv: properly load unused gl_LocalInvocationID/gl_WorkGroupID components
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Mon, 18 Dec 2017 21:06:38 +0000 (22:06 +0100)
committerSamuel Pitoiset <samuel.pitoiset@gmail.com>
Tue, 19 Dec 2017 20:26:25 +0000 (21:26 +0100)
F1 2017 looks good now.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
src/amd/common/ac_nir_to_llvm.c
src/amd/vulkan/radv_shader.c

index bd5748d80d526649b9a7019c1cc4eb60822fe1a5..cbc6d1b61b197b7f908e5bc24ae890d81496f96e 100644 (file)
@@ -94,7 +94,7 @@ struct nir_to_llvm_context {
        LLVMValueRef push_constants;
        LLVMValueRef view_index;
        LLVMValueRef num_work_groups;
-       LLVMValueRef workgroup_ids;
+       LLVMValueRef workgroup_ids[3];
        LLVMValueRef local_invocation_ids;
        LLVMValueRef tg_size;
 
@@ -751,7 +751,15 @@ static void create_function(struct nir_to_llvm_context *ctx,
                        add_user_sgpr_argument(&args, ctx->ac.v3i32,
                                               &ctx->num_work_groups);
                }
-               add_sgpr_argument(&args, ctx->ac.v3i32, &ctx->workgroup_ids);
+
+               for (int i = 0; i < 3; i++) {
+                       ctx->workgroup_ids[i] = NULL;
+                       if (ctx->shader_info->info.cs.uses_block_id[i]) {
+                               add_sgpr_argument(&args, ctx->ac.i32,
+                                                 &ctx->workgroup_ids[i]);
+                       }
+               }
+
                if (ctx->shader_info->info.cs.uses_local_invocation_idx)
                        add_sgpr_argument(&args, ctx->ac.i32, &ctx->tg_size);
                add_vgpr_argument(&args, ctx->ac.v3i32, &ctx->local_invocation_ids);
@@ -4050,7 +4058,14 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
 
        switch (instr->intrinsic) {
        case nir_intrinsic_load_work_group_id: {
-               result = ctx->nctx->workgroup_ids;
+               LLVMValueRef values[3];
+
+               for (int i = 0; i < 3; i++) {
+                       values[i] = ctx->nctx->workgroup_ids[i] ?
+                                   ctx->nctx->workgroup_ids[i] : ctx->ac.i32_0;
+               }
+
+               result = ac_build_gather_values(&ctx->ac, values, 3);
                break;
        }
        case nir_intrinsic_load_base_vertex: {
index f96b0c07f1e6b03c20d8d84ae61e3be6e18aca11..ab8ba42511e1f8ad423dbf03ab4e367de4aa5ea8 100644 (file)
@@ -395,8 +395,11 @@ radv_fill_shader_variant(struct radv_device *device,
        case MESA_SHADER_COMPUTE: {
                struct ac_shader_info *info = &variant->info.info;
                variant->rsrc2 |=
-                       S_00B84C_TGID_X_EN(1) | S_00B84C_TGID_Y_EN(1) |
-                       S_00B84C_TGID_Z_EN(1) | S_00B84C_TIDIG_COMP_CNT(2) |
+                       S_00B84C_TGID_X_EN(info->cs.uses_block_id[0]) |
+                       S_00B84C_TGID_Y_EN(info->cs.uses_block_id[1]) |
+                       S_00B84C_TGID_Z_EN(info->cs.uses_block_id[2]) |
+                       S_00B84C_TIDIG_COMP_CNT(info->cs.uses_thread_id[2] ? 2 :
+                                               info->cs.uses_thread_id[1] ? 1 : 0) |
                        S_00B84C_TG_SIZE_EN(info->cs.uses_local_invocation_idx) |
                        S_00B84C_LDS_SIZE(variant->config.lds_size);
                break;