ac/nir/radeonsi: add ARB_shader_ballot support
authorTimothy Arceri <tarceri@itsqueeze.com>
Mon, 22 Jan 2018 01:53:45 +0000 (12:53 +1100)
committerTimothy Arceri <tarceri@itsqueeze.com>
Mon, 22 Jan 2018 22:11:22 +0000 (09:11 +1100)
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
src/amd/common/ac_nir_to_llvm.c
src/gallium/drivers/radeonsi/si_shader_nir.c

index 53940e2c9e84a4518b3f28cf9460aae6018908d8..82f29c058809a4efcdbda89252861d2753ff2d02 100644 (file)
@@ -4247,6 +4247,43 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
        LLVMValueRef result = NULL;
 
        switch (instr->intrinsic) {
+       case nir_intrinsic_ballot:
+               result = ac_build_ballot(&ctx->ac, get_src(ctx, instr->src[0]));
+               break;
+       case nir_intrinsic_read_invocation:
+       case nir_intrinsic_read_first_invocation: {
+               LLVMValueRef args[2];
+
+               /* Value */
+               args[0] = get_src(ctx, instr->src[0]);
+
+               unsigned num_args;
+               const char *intr_name;
+               if (instr->intrinsic == nir_intrinsic_read_invocation) {
+                       num_args = 2;
+                       intr_name = "llvm.amdgcn.readlane";
+
+                       /* Invocation */
+                       args[1] = get_src(ctx, instr->src[1]);
+               } else {
+                       num_args = 1;
+                       intr_name = "llvm.amdgcn.readfirstlane";
+               }
+
+               /* We currently have no other way to prevent LLVM from lifting the icmp
+                * calls to a dominating basic block.
+                */
+               ac_build_optimization_barrier(&ctx->ac, &args[0]);
+
+               result = ac_build_intrinsic(&ctx->ac, intr_name,
+                                           ctx->ac.i32, args, num_args,
+                                           AC_FUNC_ATTR_READNONE |
+                                           AC_FUNC_ATTR_CONVERGENT);
+               break;
+       }
+       case nir_intrinsic_load_subgroup_invocation:
+               result = ac_get_thread_id(&ctx->ac);
+               break;
        case nir_intrinsic_load_work_group_id: {
                LLVMValueRef values[3];
 
index e26994c06b926885c284522f99573a5803554fcd..97b647434b6d37c188f0e8ed16ea69d340a44c59 100644 (file)
@@ -505,6 +505,15 @@ si_lower_nir(struct si_shader_selector* sel)
        };
        NIR_PASS_V(sel->nir, nir_lower_tex, &lower_tex_options);
 
+       const nir_lower_subgroups_options subgroups_options = {
+               .subgroup_size = 64,
+               .ballot_bit_size = 32,
+               .lower_to_scalar = true,
+               .lower_subgroup_masks = true,
+               .lower_vote_trivial = false,
+       };
+       NIR_PASS_V(sel->nir, nir_lower_subgroups, &subgroups_options);
+
        bool progress;
        do {
                progress = false;