ac/nir: set workgroup size attribute to correct value.
authorDave Airlie <airlied@redhat.com>
Mon, 5 Jun 2017 00:20:48 +0000 (01:20 +0100)
committerDave Airlie <airlied@redhat.com>
Mon, 5 Jun 2017 00:37:44 +0000 (01:37 +0100)
This ports: 55445ff1891724c78e6573d2f8c721e14c0449fc from radeonsi

    radeonsi: tell LLVM not to remove s_barrier instructions

    LLVM 5.0 removes s_barrier instructions if the max-work-group-size
    attribute is not set. What a surprise.

Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Signed-off-by: Dave Airlie <airlied@redhat.com>
src/amd/common/ac_nir_to_llvm.c

index 28ba47d502f7d73e8b6f398d68eb3453ed7866fc..4e5d19acb853fe8bbe05ce7d18929fe3698b20e0 100644 (file)
@@ -57,7 +57,7 @@ struct nir_to_llvm_context {
        struct ac_llvm_context ac;
        const struct ac_nir_compiler_options *options;
        struct ac_shader_variant_info *shader_info;
-
+       unsigned max_workgroup_size;
        LLVMContextRef context;
        LLVMModuleRef module;
        LLVMBuilderRef builder;
@@ -257,7 +257,8 @@ create_llvm_function(LLVMContextRef ctx, LLVMModuleRef module,
                      LLVMBuilderRef builder, LLVMTypeRef *return_types,
                      unsigned num_return_elems, LLVMTypeRef *param_types,
                      unsigned param_count, unsigned array_params_mask,
-                     unsigned sgpr_params, bool unsafe_math)
+                     unsigned sgpr_params, unsigned max_workgroup_size,
+                    bool unsafe_math)
 {
        LLVMTypeRef main_function_type, ret_type;
        LLVMBasicBlockRef main_function_body;
@@ -289,6 +290,11 @@ create_llvm_function(LLVMContextRef ctx, LLVMModuleRef module,
                }
        }
 
+       if (max_workgroup_size) {
+               ac_llvm_add_target_dep_function_attr(main_function,
+                                                    "amdgpu-max-work-group-size",
+                                                    max_workgroup_size);
+       }
        if (unsafe_math) {
                /* These were copied from some LLVM test. */
                LLVMAddTargetDependentFunctionAttr(main_function,
@@ -773,7 +779,8 @@ static void create_function(struct nir_to_llvm_context *ctx)
 
        ctx->main_function = create_llvm_function(
            ctx->context, ctx->module, ctx->builder, NULL, 0, arg_types,
-           arg_idx, array_params_mask, sgpr_count, ctx->options->unsafe_math);
+           arg_idx, array_params_mask, sgpr_count, ctx->max_workgroup_size,
+           ctx->options->unsafe_math);
        set_llvm_calling_convention(ctx->main_function, ctx->stage);
 
        ctx->shader_info->num_input_sgprs = 0;
@@ -5855,6 +5862,27 @@ ac_setup_rings(struct nir_to_llvm_context *ctx)
        }
 }
 
+static unsigned
+ac_nir_get_max_workgroup_size(enum chip_class chip_class,
+                             struct nir_shader *nir)
+{
+       switch (nir->stage) {
+       case MESA_SHADER_TESS_CTRL:
+               return chip_class >= CIK ? 128 : 64;
+       case MESA_SHADER_GEOMETRY:
+               return 64;
+       case MESA_SHADER_COMPUTE:
+               break;
+       default:
+               return 0;
+       }
+
+       unsigned max_workgroup_size = nir->info.cs.local_size[0] *
+               nir->info.cs.local_size[1] *
+               nir->info.cs.local_size[2];
+       return max_workgroup_size;
+}
+
 static
 LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
                                        struct nir_shader *nir,
@@ -5891,6 +5919,7 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
        ctx.builder = LLVMCreateBuilderInContext(ctx.context);
        ctx.ac.builder = ctx.builder;
        ctx.stage = nir->stage;
+       ctx.max_workgroup_size = ac_nir_get_max_workgroup_size(ctx.options->chip_class, nir);
 
        for (i = 0; i < AC_UD_MAX_SETS; i++)
                shader_info->user_sgprs_locs.descriptor_sets[i].sgpr_idx = -1;