radv: Add minimal subgroup support.
authorBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Sun, 21 Jan 2018 14:06:10 +0000 (15:06 +0100)
committerBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Wed, 7 Mar 2018 20:18:35 +0000 (21:18 +0100)
Deliberately not implementing workgroup scopes as that is not needed
for core vulkan.

Reviewed-by: Dave Airlie <airlied@redhat.com>
src/amd/common/ac_nir_to_llvm.c
src/amd/common/ac_shader_info.c
src/amd/vulkan/radv_device.c
src/amd/vulkan/radv_shader.c

index f6ad51a412a4815984f59bc5590dc53d25022d1d..d02238dba7d1c79a2d58cf38112591bb6866d4f6 100644 (file)
@@ -3883,6 +3883,46 @@ visit_load_local_invocation_index(struct ac_nir_context *ctx)
        return LLVMBuildAdd(ctx->ac.builder, result, thread_id, "");
 }
 
+static LLVMValueRef
+visit_load_subgroup_id(struct ac_nir_context *ctx)
+{
+       if (ctx->stage == MESA_SHADER_COMPUTE) {
+               LLVMValueRef result;
+               result = LLVMBuildAnd(ctx->ac.builder, ctx->abi->tg_size,
+                               LLVMConstInt(ctx->ac.i32, 0xfc0, false), "");
+               return LLVMBuildLShr(ctx->ac.builder, result,  LLVMConstInt(ctx->ac.i32, 6, false), "");
+       } else {
+               return LLVMConstInt(ctx->ac.i32, 0, false);
+       }
+}
+
+static LLVMValueRef
+visit_load_num_subgroups(struct ac_nir_context *ctx)
+{
+       if (ctx->stage == MESA_SHADER_COMPUTE) {
+               return LLVMBuildAnd(ctx->ac.builder, ctx->abi->tg_size,
+                                   LLVMConstInt(ctx->ac.i32, 0x3f, false), "");
+       } else {
+               return LLVMConstInt(ctx->ac.i32, 1, false);
+       }
+}
+
+static LLVMValueRef
+visit_first_invocation(struct ac_nir_context *ctx)
+{
+       LLVMValueRef active_set = ac_build_ballot(&ctx->ac, ctx->ac.i32_1);
+
+       /* The second argument is whether cttz(0) should be defined, but we do not care. */
+       LLVMValueRef args[] = {active_set, LLVMConstInt(ctx->ac.i1, 0, false)};
+       LLVMValueRef result =  ac_build_intrinsic(&ctx->ac,
+                                                 "llvm.cttz.i64",
+                                                 ctx->ac.i64, args, 2,
+                                                 AC_FUNC_ATTR_NOUNWIND |
+                                                 AC_FUNC_ATTR_READNONE);
+
+       return LLVMBuildTrunc(ctx->ac.builder, result, ctx->ac.i32, "");
+}
+
 static LLVMValueRef
 visit_load_shared(struct ac_nir_context *ctx,
                   const nir_intrinsic_instr *instr)
@@ -4411,6 +4451,15 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
        case nir_intrinsic_load_local_invocation_index:
                result = visit_load_local_invocation_index(ctx);
                break;
+       case nir_intrinsic_load_subgroup_id:
+               result = visit_load_subgroup_id(ctx);
+               break;
+       case nir_intrinsic_load_num_subgroups:
+               result = visit_load_num_subgroups(ctx);
+               break;
+       case nir_intrinsic_first_invocation:
+               result = visit_first_invocation(ctx);
+               break;
        case nir_intrinsic_load_push_constant:
                result = visit_load_push_constant(ctx, instr);
                break;
index 98de963147b13d4f13eaad8851aa352d4c361a0c..883358faaae247742364c00f8952d6f9d7e22887 100644 (file)
@@ -61,6 +61,8 @@ gather_intrinsic_info(const nir_shader *nir, const nir_intrinsic_instr *instr,
                break;
        }
        case nir_intrinsic_load_local_invocation_index:
+       case nir_intrinsic_load_subgroup_id:
+       case nir_intrinsic_load_num_subgroups:
                info->cs.uses_local_invocation_idx = true;
                break;
        case nir_intrinsic_load_sample_id:
index 0e584fc9dc99714aed8344f9eedbbc59ee060504..00bb70612eb206403a9d8770028e1e962e49ac5c 100644 (file)
@@ -866,6 +866,15 @@ void radv_GetPhysicalDeviceProperties2(
                        properties->minImportedHostPointerAlignment = 4096;
                        break;
                }
+               case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES: {
+                       VkPhysicalDeviceSubgroupProperties *properties =
+                           (VkPhysicalDeviceSubgroupProperties*)ext;
+                       properties->subgroupSize = 64;
+                       properties->supportedStages = VK_SHADER_STAGE_ALL;
+                       properties->supportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT;
+                       properties->quadOperationsInAllStages = false;
+                       break;
+               }
                default:
                        break;
                }
index 8a241f8ec8ec85cf867421177a507558f4a53758..1e20aa6f9adf1a10f00435530fc16b0f301a8f4e 100644 (file)
@@ -210,6 +210,7 @@ radv_shader_compile_to_nir(struct radv_device *device,
                                .tessellation = true,
                                .int64 = true,
                                .multiview = true,
+                               .subgroup_basic = true,
                                .variable_pointers = true,
                        },
                };
@@ -266,6 +267,15 @@ radv_shader_compile_to_nir(struct radv_device *device,
        nir_lower_global_vars_to_local(nir);
        nir_remove_dead_variables(nir, nir_var_local);
        ac_lower_indirect_derefs(nir, device->physical_device->rad_info.chip_class);
+       nir_lower_subgroups(nir, &(struct nir_lower_subgroups_options) {
+                       .subgroup_size = 64,
+                       .ballot_bit_size = 64,
+                       .lower_to_scalar = 1,
+                       .lower_subgroup_masks = 1,
+                       .lower_shuffle = 1,
+                       .lower_quad =  1,
+               });
+
        radv_optimize_nir(nir);
 
        return nir;