From 54e6900eded795fa6b63add92f31ee88674f783a Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 16 Jul 2019 00:55:46 -0400 Subject: [PATCH] radeonsi/gfx10: use 32-bit wavemasks for Wave32 Acked-by: Pierre-Eric Pelloux-Prayer Reviewed-by: Samuel Pitoiset --- src/amd/common/ac_llvm_build.c | 24 +++++++++++++++----- src/amd/common/ac_llvm_build.h | 1 + src/amd/common/ac_nir_to_llvm.c | 6 ++--- src/gallium/drivers/radeonsi/si_shader.c | 28 +++++++++++++++++------- 4 files changed, 43 insertions(+), 16 deletions(-) diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index 855ebb3d3dd..250bfc5229e 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -92,6 +92,7 @@ ac_llvm_context_init(struct ac_llvm_context *ctx, ctx->v3f32 = LLVMVectorType(ctx->f32, 3); ctx->v4f32 = LLVMVectorType(ctx->f32, 4); ctx->v8i32 = LLVMVectorType(ctx->i32, 8); + ctx->iN_wavemask = LLVMIntTypeInContext(ctx->context, ctx->wave_size); ctx->i8_0 = LLVMConstInt(ctx->i8, 0, false); ctx->i8_1 = LLVMConstInt(ctx->i8, 1, false); @@ -447,7 +448,16 @@ LLVMValueRef ac_build_ballot(struct ac_llvm_context *ctx, LLVMValueRef value) { - const char *name = HAVE_LLVM >= 0x900 ? "llvm.amdgcn.icmp.i64.i32" : "llvm.amdgcn.icmp.i32"; + const char *name; + + if (HAVE_LLVM >= 0x900) { + if (ctx->wave_size == 64) + name = "llvm.amdgcn.icmp.i64.i32"; + else + name = "llvm.amdgcn.icmp.i32.i32"; + } else { + name = "llvm.amdgcn.icmp.i32"; + } LLVMValueRef args[3] = { value, ctx->i32_0, @@ -461,8 +471,7 @@ ac_build_ballot(struct ac_llvm_context *ctx, args[0] = ac_to_integer(ctx, args[0]); - return ac_build_intrinsic(ctx, name, - ctx->i64, args, 3, + return ac_build_intrinsic(ctx, name, ctx->iN_wavemask, args, 3, AC_FUNC_ATTR_NOUNWIND | AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT); @@ -498,7 +507,7 @@ ac_build_vote_any(struct ac_llvm_context *ctx, LLVMValueRef value) { LLVMValueRef vote_set = ac_build_ballot(ctx, value); return LLVMBuildICmp(ctx->builder, LLVMIntNE, vote_set, - LLVMConstInt(ctx->i64, 0, 0), ""); + LLVMConstInt(ctx->iN_wavemask, 0, 0), ""); } LLVMValueRef @@ -511,7 +520,7 @@ ac_build_vote_eq(struct ac_llvm_context *ctx, LLVMValueRef value) vote_set, active_set, ""); LLVMValueRef none = LLVMBuildICmp(ctx->builder, LLVMIntEQ, vote_set, - LLVMConstInt(ctx->i64, 0, 0), ""); + LLVMConstInt(ctx->iN_wavemask, 0, 0), ""); return LLVMBuildOr(ctx->builder, all, none, ""); } @@ -3848,6 +3857,11 @@ ac_build_writelane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef v LLVMValueRef ac_build_mbcnt(struct ac_llvm_context *ctx, LLVMValueRef mask) { + if (ctx->wave_size == 32) { + return ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.lo", ctx->i32, + (LLVMValueRef []) { mask, ctx->i32_0 }, + 2, AC_FUNC_ATTR_READNONE); + } LLVMValueRef mask_vec = LLVMBuildBitCast(ctx->builder, mask, LLVMVectorType(ctx->i32, 2), ""); diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h index cc1807221b2..8fcede66fb2 100644 --- a/src/amd/common/ac_llvm_build.h +++ b/src/amd/common/ac_llvm_build.h @@ -74,6 +74,7 @@ struct ac_llvm_context { LLVMTypeRef v3f32; LLVMTypeRef v4f32; LLVMTypeRef v8i32; + LLVMTypeRef iN_wavemask; LLVMValueRef i8_0; LLVMValueRef i8_1; diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 96bf89a8bf9..f69e02f6d0a 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -2809,12 +2809,12 @@ static LLVMValueRef visit_first_invocation(struct ac_nir_context *ctx) { LLVMValueRef active_set = ac_build_ballot(&ctx->ac, ctx->ac.i32_1); + const char *intr = ctx->ac.wave_size == 32 ? "llvm.cttz.i32" : "llvm.cttz.i64"; /* The second argument is whether cttz(0) should be defined, but we do not care. */ LLVMValueRef args[] = {active_set, ctx->ac.i1false}; - LLVMValueRef result = ac_build_intrinsic(&ctx->ac, - "llvm.cttz.i64", - ctx->ac.i64, args, 2, + LLVMValueRef result = ac_build_intrinsic(&ctx->ac, intr, + ctx->ac.iN_wavemask, args, 2, AC_FUNC_ATTR_NOUNWIND | AC_FUNC_ATTR_READNONE); diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index b41e1e62522..23a6a7455ec 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2178,8 +2178,12 @@ void si_load_system_value(struct si_shader_context *ctx, case TGSI_SEMANTIC_SUBGROUP_EQ_MASK: { LLVMValueRef id = ac_get_thread_id(&ctx->ac); - id = LLVMBuildZExt(ctx->ac.builder, id, ctx->i64, ""); - value = LLVMBuildShl(ctx->ac.builder, LLVMConstInt(ctx->i64, 1, 0), id, ""); + if (ctx->ac.wave_size == 64) + id = LLVMBuildZExt(ctx->ac.builder, id, ctx->i64, ""); + value = LLVMBuildShl(ctx->ac.builder, + LLVMConstInt(ctx->ac.iN_wavemask, 1, 0), id, ""); + if (ctx->ac.wave_size == 32) + value = LLVMBuildZExt(ctx->ac.builder, value, ctx->i64, ""); value = LLVMBuildBitCast(ctx->ac.builder, value, ctx->v2i32, ""); break; } @@ -2193,16 +2197,19 @@ void si_load_system_value(struct si_shader_context *ctx, if (decl->Semantic.Name == TGSI_SEMANTIC_SUBGROUP_GT_MASK || decl->Semantic.Name == TGSI_SEMANTIC_SUBGROUP_LE_MASK) { /* All bits set except LSB */ - value = LLVMConstInt(ctx->i64, -2, 0); + value = LLVMConstInt(ctx->ac.iN_wavemask, -2, 0); } else { /* All bits set */ - value = LLVMConstInt(ctx->i64, -1, 0); + value = LLVMConstInt(ctx->ac.iN_wavemask, -1, 0); } - id = LLVMBuildZExt(ctx->ac.builder, id, ctx->i64, ""); + if (ctx->ac.wave_size == 64) + id = LLVMBuildZExt(ctx->ac.builder, id, ctx->i64, ""); value = LLVMBuildShl(ctx->ac.builder, value, id, ""); if (decl->Semantic.Name == TGSI_SEMANTIC_SUBGROUP_LE_MASK || decl->Semantic.Name == TGSI_SEMANTIC_SUBGROUP_LT_MASK) value = LLVMBuildNot(ctx->ac.builder, value, ""); + if (ctx->ac.wave_size == 32) + value = LLVMBuildZExt(ctx->ac.builder, value, ctx->i64, ""); value = LLVMBuildBitCast(ctx->ac.builder, value, ctx->v2i32, ""); break; } @@ -4186,10 +4193,15 @@ static void ballot_emit( tmp = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_X); tmp = ac_build_ballot(&ctx->ac, tmp); - tmp = LLVMBuildBitCast(builder, tmp, ctx->v2i32, ""); - emit_data->output[0] = LLVMBuildExtractElement(builder, tmp, ctx->i32_0, ""); - emit_data->output[1] = LLVMBuildExtractElement(builder, tmp, ctx->i32_1, ""); + emit_data->output[0] = LLVMBuildTrunc(builder, tmp, ctx->i32, ""); + + if (ctx->ac.wave_size == 32) { + emit_data->output[1] = ctx->i32_0; + } else { + tmp = LLVMBuildLShr(builder, tmp, LLVMConstInt(ctx->i64, 32, 0), ""); + emit_data->output[1] = LLVMBuildTrunc(builder, tmp, ctx->i32, ""); + } } static void read_lane_emit( -- 2.30.2