From 7ac8bb33cd6025f805a390e7647506e932f4db0d Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Mon, 23 Mar 2020 12:02:15 +0100 Subject: [PATCH] radv/llvm: fix subgroup shuffle for chips without bpermute bpermute only exists on GFX8+ and only with Wave32 on GFX10. Instead we have to use readlane with a waterfall loop to defeat the LLVM backend. This fixes DOOM Eternal which requires subgroup shuffle. Cc: Signed-off-by: Samuel Pitoiset Reviewed-by: Bas Nieuwenhuizen Tested-by: Marge Bot Part-of: --- src/amd/llvm/ac_nir_to_llvm.c | 29 +++++++++++++++++++++++++++-- src/amd/vulkan/radv_device.c | 6 +++--- 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index 49627990163..871c6abc17f 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -3950,8 +3950,33 @@ static void visit_intrinsic(struct ac_nir_context *ctx, break; } case nir_intrinsic_shuffle: - result = ac_build_shuffle(&ctx->ac, get_src(ctx, instr->src[0]), - get_src(ctx, instr->src[1])); + if (ctx->ac.chip_class == GFX8 || + ctx->ac.chip_class == GFX9 || + (ctx->ac.chip_class == GFX10 && ctx->ac.wave_size == 32)) { + result = ac_build_shuffle(&ctx->ac, get_src(ctx, instr->src[0]), + get_src(ctx, instr->src[1])); + } else { + LLVMValueRef src = get_src(ctx, instr->src[0]); + LLVMValueRef index = get_src(ctx, instr->src[1]); + LLVMTypeRef type = LLVMTypeOf(src); + struct waterfall_context wctx; + LLVMValueRef index_val; + + index_val = enter_waterfall(ctx, &wctx, index, true); + + src = LLVMBuildZExt(ctx->ac.builder, src, + ctx->ac.i32, ""); + + result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.readlane", + ctx->ac.i32, + (LLVMValueRef []) { src, index_val }, 2, + AC_FUNC_ATTR_READNONE | + AC_FUNC_ATTR_CONVERGENT); + + result = LLVMBuildTrunc(ctx->ac.builder, result, type, ""); + + result = exit_waterfall(ctx, &wctx, result); + } break; case nir_intrinsic_reduce: result = ac_build_reduce(&ctx->ac, diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index 2f44b279ac1..1ecac0c1b55 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -1481,9 +1481,9 @@ radv_get_physical_device_properties_1_1(struct radv_physical_device *pdevice, VK_SUBGROUP_FEATURE_CLUSTERED_BIT | VK_SUBGROUP_FEATURE_QUAD_BIT; - if (pdevice->rad_info.chip_class == GFX8 || - pdevice->rad_info.chip_class == GFX9 || - (pdevice->rad_info.chip_class == GFX10 && pdevice->use_aco)) { + if (((pdevice->rad_info.chip_class == GFX6 || + pdevice->rad_info.chip_class == GFX7) && !pdevice->use_aco) || + pdevice->rad_info.chip_class >= GFX8) { p->subgroupSupportedOperations |= VK_SUBGROUP_FEATURE_SHUFFLE_BIT | VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT; } -- 2.30.2