From 48a75e7af096f07d3d1984124bb29694c14ff61f Mon Sep 17 00:00:00 2001 From: =?utf8?q?Daniel=20Sch=C3=BCrmann?= Date: Fri, 25 Jan 2019 16:08:38 +0100 Subject: [PATCH] amd/common: lower bitfield_insert to bfm & bitfield_select Reviewed-by: Connor Abbott --- src/amd/common/ac_nir_to_llvm.c | 51 +++++++++++++-------------- src/amd/vulkan/radv_shader.c | 1 + src/gallium/drivers/radeonsi/si_get.c | 1 + 3 files changed, 27 insertions(+), 26 deletions(-) diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index dffaeedfbb4..9b6e65db8b9 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -455,34 +455,30 @@ static LLVMValueRef emit_bitfield_extract(struct ac_llvm_context *ctx, return result; } -static LLVMValueRef emit_bitfield_insert(struct ac_llvm_context *ctx, - LLVMValueRef src0, LLVMValueRef src1, - LLVMValueRef src2, LLVMValueRef src3) +static LLVMValueRef emit_bfm(struct ac_llvm_context *ctx, + LLVMValueRef bits, LLVMValueRef offset) { - LLVMValueRef bfi_args[3], result; - - bfi_args[0] = LLVMBuildShl(ctx->builder, - LLVMBuildSub(ctx->builder, - LLVMBuildShl(ctx->builder, - ctx->i32_1, - src3, ""), - ctx->i32_1, ""), - src2, ""); - bfi_args[1] = LLVMBuildShl(ctx->builder, src1, src2, ""); - bfi_args[2] = src0; - - LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, src3, LLVMConstInt(ctx->i32, 32, false), ""); + /* mask = ((1 << bits) - 1) << offset */ + return LLVMBuildShl(ctx->builder, + LLVMBuildSub(ctx->builder, + LLVMBuildShl(ctx->builder, + ctx->i32_1, + bits, ""), + ctx->i32_1, ""), + offset, ""); +} +static LLVMValueRef emit_bitfield_select(struct ac_llvm_context *ctx, + LLVMValueRef mask, LLVMValueRef insert, + LLVMValueRef base) +{ /* Calculate: - * (arg0 & arg1) | (~arg0 & arg2) = arg2 ^ (arg0 & (arg1 ^ arg2) + * (mask & insert) | (~mask & base) = base ^ (mask & (insert ^ base)) * Use the right-hand side, which the LLVM backend can convert to V_BFI. */ - result = LLVMBuildXor(ctx->builder, bfi_args[2], - LLVMBuildAnd(ctx->builder, bfi_args[0], - LLVMBuildXor(ctx->builder, bfi_args[1], bfi_args[2], ""), ""), ""); - - result = LLVMBuildSelect(ctx->builder, icond, src1, result, ""); - return result; + return LLVMBuildXor(ctx->builder, base, + LLVMBuildAnd(ctx->builder, mask, + LLVMBuildXor(ctx->builder, insert, base, ""), ""), ""); } static LLVMValueRef emit_pack_half_2x16(struct ac_llvm_context *ctx, @@ -835,15 +831,18 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) else result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f64", ctx->ac.f64, src, 2, AC_FUNC_ATTR_READNONE); break; + case nir_op_bfm: + result = emit_bfm(&ctx->ac, src[0], src[1]); + break; + case nir_op_bitfield_select: + result = emit_bitfield_select(&ctx->ac, src[0], src[1], src[2]); + break; case nir_op_ibitfield_extract: result = emit_bitfield_extract(&ctx->ac, true, src); break; case nir_op_ubitfield_extract: result = emit_bitfield_extract(&ctx->ac, false, src); break; - case nir_op_bitfield_insert: - result = emit_bitfield_insert(&ctx->ac, src[0], src[1], src[2], src[3]); - break; case nir_op_bitfield_reverse: result = ac_build_bitfield_reverse(&ctx->ac, src[0]); break; diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index c3bbf3fdda7..3452a0b2ad2 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -58,6 +58,7 @@ static const struct nir_shader_compiler_options nir_options = { .lower_device_index_to_zero = true, .lower_fsat = true, .lower_fdiv = true, + .lower_bitfield_insert_to_bitfield_select = true, .lower_sub = true, .lower_pack_snorm_2x16 = true, .lower_pack_snorm_4x8 = true, diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c index 0c561e7ca91..b7c7f1aa78c 100644 --- a/src/gallium/drivers/radeonsi/si_get.c +++ b/src/gallium/drivers/radeonsi/si_get.c @@ -487,6 +487,7 @@ static const struct nir_shader_compiler_options nir_options = { .lower_flrp64 = true, .lower_fsat = true, .lower_fdiv = true, + .lower_bitfield_insert_to_bitfield_select = true, .lower_sub = true, .lower_ffma = true, .lower_fmod = true, -- 2.30.2