From 9e467d111b2c9046c9b35b9e76891a8cfbb752c1 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 12 Jul 2019 17:12:17 -0400 Subject: [PATCH] ac: initial Wave32 support in LLVM build helpers Reviewed-by: Samuel Pitoiset --- src/amd/common/ac_llvm_build.c | 24 ++++++++++++------- src/amd/common/ac_llvm_build.h | 4 +++- src/amd/vulkan/radv_nir_to_llvm.c | 4 ++-- .../drivers/radeonsi/si_shader_tgsi_setup.c | 2 +- 4 files changed, 21 insertions(+), 13 deletions(-) diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index 041b6cd797e..1551df07959 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -58,7 +58,8 @@ struct ac_llvm_flow { */ void ac_llvm_context_init(struct ac_llvm_context *ctx, - enum chip_class chip_class, enum radeon_family family) + enum chip_class chip_class, enum radeon_family family, + unsigned wave_size) { LLVMValueRef args[1]; @@ -66,6 +67,7 @@ ac_llvm_context_init(struct ac_llvm_context *ctx, ctx->chip_class = chip_class; ctx->family = family; + ctx->wave_size = wave_size; ctx->module = NULL; ctx->builder = NULL; @@ -2225,10 +2227,14 @@ ac_get_thread_id(struct ac_llvm_context *ctx) "llvm.amdgcn.mbcnt.lo", ctx->i32, tid_args, 2, AC_FUNC_ATTR_READNONE); - tid = ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.hi", - ctx->i32, tid_args, - 2, AC_FUNC_ATTR_READNONE); - set_range_metadata(ctx, tid, 0, 64); + if (ctx->wave_size == 32) { + tid = tid_args[1]; + } else { + tid = ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.hi", + ctx->i32, tid_args, + 2, AC_FUNC_ATTR_READNONE); + } + set_range_metadata(ctx, tid, 0, ctx->wave_size); return tid; } @@ -4260,7 +4266,7 @@ ac_build_inclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op get_reduction_identity(ctx, op, ac_get_type_size(LLVMTypeOf(src))); result = LLVMBuildBitCast(ctx->builder, ac_build_set_inactive(ctx, src, identity), LLVMTypeOf(identity), ""); - result = ac_build_scan(ctx, op, result, identity, 64, true); + result = ac_build_scan(ctx, op, result, identity, ctx->wave_size, true); return ac_build_wwm(ctx, result); } @@ -4284,7 +4290,7 @@ ac_build_exclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op get_reduction_identity(ctx, op, ac_get_type_size(LLVMTypeOf(src))); result = LLVMBuildBitCast(ctx->builder, ac_build_set_inactive(ctx, src, identity), LLVMTypeOf(identity), ""); - result = ac_build_scan(ctx, op, result, identity, 64, false); + result = ac_build_scan(ctx, op, result, identity, ctx->wave_size, false); return ac_build_wwm(ctx, result); } @@ -4360,12 +4366,12 @@ ac_build_wg_wavescan_top(struct ac_llvm_context *ctx, struct ac_wg_scan *ws) if (ws->maxwaves <= 1) return; - const LLVMValueRef i32_63 = LLVMConstInt(ctx->i32, 63, false); + const LLVMValueRef last_lane = LLVMConstInt(ctx->i32, ctx->wave_size - 1, false); LLVMBuilderRef builder = ctx->builder; LLVMValueRef tid = ac_get_thread_id(ctx); LLVMValueRef tmp; - tmp = LLVMBuildICmp(builder, LLVMIntEQ, tid, i32_63, ""); + tmp = LLVMBuildICmp(builder, LLVMIntEQ, tid, last_lane, ""); ac_build_ifcc(ctx, tmp, 1000); LLVMBuildStore(builder, ws->src, LLVMBuildGEP(builder, ws->scratch, &ws->waveidx, 1, "")); ac_build_endif(ctx, 1000); diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h index 7f6139e5cdd..588ef242c20 100644 --- a/src/amd/common/ac_llvm_build.h +++ b/src/amd/common/ac_llvm_build.h @@ -103,13 +103,15 @@ struct ac_llvm_context { enum chip_class chip_class; enum radeon_family family; + unsigned wave_size; LLVMValueRef lds; }; void ac_llvm_context_init(struct ac_llvm_context *ctx, - enum chip_class chip_class, enum radeon_family family); + enum chip_class chip_class, enum radeon_family family, + unsigned wave_size); void ac_llvm_context_dispose(struct ac_llvm_context *ctx); diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index f5fb54f2d11..a18718ef081 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -4319,7 +4319,7 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, ctx.options = options; ctx.shader_info = shader_info; - ac_llvm_context_init(&ctx.ac, options->chip_class, options->family); + ac_llvm_context_init(&ctx.ac, options->chip_class, options->family, 64); ctx.context = ctx.ac.context; ctx.ac.module = ac_create_module(ac_llvm->tm, ctx.context); @@ -4838,7 +4838,7 @@ radv_compile_gs_copy_shader(struct ac_llvm_compiler *ac_llvm, ctx.options = options; ctx.shader_info = shader_info; - ac_llvm_context_init(&ctx.ac, options->chip_class, options->family); + ac_llvm_context_init(&ctx.ac, options->chip_class, options->family, 64); ctx.context = ctx.ac.context; ctx.ac.module = ac_create_module(ac_llvm->tm, ctx.context); diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c index 1690c3cb8a8..9a9f3d63cc6 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c @@ -962,7 +962,7 @@ void si_llvm_context_init(struct si_shader_context *ctx, ctx->screen = sscreen; ctx->compiler = compiler; - ac_llvm_context_init(&ctx->ac, sscreen->info.chip_class, sscreen->info.family); + ac_llvm_context_init(&ctx->ac, sscreen->info.chip_class, sscreen->info.family, 64); ctx->ac.module = ac_create_module(compiler->tm, ctx->ac.context); enum ac_float_mode float_mode = -- 2.30.2