radeonsi: fix BFE/BFI lowering for GLSL semantics

author Nicolai Hähnle <nicolai.haehnle@amd.com>

Thu, 27 Oct 2016 08:59:37 +0000 (10:59 +0200)

committer Nicolai Hähnle <nicolai.haehnle@amd.com>

Wed, 2 Nov 2016 11:30:11 +0000 (12:30 +0100)
author Nicolai Hähnle <nicolai.haehnle@amd.com>
Thu, 27 Oct 2016 08:59:37 +0000 (10:59 +0200)
committer Nicolai Hähnle <nicolai.haehnle@amd.com>
Wed, 2 Nov 2016 11:30:11 +0000 (12:30 +0100)
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c

index 123ff5d86b076d7f7d9d1f469db50889271c8fe6..18e905b275bd6f07bd659a49151680f65218a1ee 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
@@ -459,6 +459,8 @@ static void emit_bfi(const struct lp_build_tgsi_action *action,
         struct gallivm_state *gallivm = bld_base->base.gallivm;
         LLVMBuilderRef builder = gallivm->builder;
         LLVMValueRef bfi_args[3];
+       LLVMValueRef bfi_sm5;
+       LLVMValueRef cond;
  
         // Calculate the bitmask: (((1 << src3) - 1) << src2
         bfi_args[0] = LLVMBuildShl(builder,
@@ -478,11 +480,40 @@ static void emit_bfi(const struct lp_build_tgsi_action *action,
          *   (arg0 & arg1) | (~arg0 & arg2) = arg2 ^ (arg0 & (arg1 ^ arg2)
          * Use the right-hand side, which the LLVM backend can convert to V_BFI.
          */
-       emit_data->output[emit_data->chan] =
+       bfi_sm5 =
                 LLVMBuildXor(builder, bfi_args[2],
                         LLVMBuildAnd(builder, bfi_args[0],
                                 LLVMBuildXor(builder, bfi_args[1], bfi_args[2],
                                              ""), ""), "");
+
+       /* Since shifts of >= 32 bits are undefined in LLVM IR, the backend
+        * uses the convenient V_BFI lowering for the above, which follows SM5
+        * and disagrees with GLSL semantics when bits (src3) is 32.
+        */
+       cond = LLVMBuildICmp(builder, LLVMIntUGE, emit_data->args[3],
+                            lp_build_const_int32(gallivm, 32), "");
+       emit_data->output[emit_data->chan] =
+               LLVMBuildSelect(builder, cond, emit_data->args[1], bfi_sm5, "");
+}
+
+static void emit_bfe(const struct lp_build_tgsi_action *action,
+                    struct lp_build_tgsi_context *bld_base,
+                    struct lp_build_emit_data *emit_data)
+{
+       struct gallivm_state *gallivm = bld_base->base.gallivm;
+       LLVMBuilderRef builder = gallivm->builder;
+       LLVMValueRef bfe_sm5;
+       LLVMValueRef cond;
+
+       bfe_sm5 = lp_build_intrinsic(builder, action->intr_name,
+                                    emit_data->dst_type, emit_data->args,
+                                    emit_data->arg_count, LLVMReadNoneAttribute);
+
+       /* Correct for GLSL semantics. */
+       cond = LLVMBuildICmp(builder, LLVMIntUGE, emit_data->args[2],
+                            lp_build_const_int32(gallivm, 32), "");
+       emit_data->output[emit_data->chan] =
+               LLVMBuildSelect(builder, cond, emit_data->args[0], bfe_sm5, "");
  }
  
  /* this is ffs in C */
@@ -783,7 +814,7 @@ void si_shader_context_init_alu(struct lp_build_tgsi_context *bld_base)
         bld_base->op_actions[TGSI_OPCODE_FSLT].emit = emit_fcmp;
         bld_base->op_actions[TGSI_OPCODE_FSNE].emit = emit_fcmp;
         bld_base->op_actions[TGSI_OPCODE_IABS].emit = emit_iabs;
-       bld_base->op_actions[TGSI_OPCODE_IBFE].emit = build_tgsi_intrinsic_nomem;
+       bld_base->op_actions[TGSI_OPCODE_IBFE].emit = emit_bfe;
         bld_base->op_actions[TGSI_OPCODE_IBFE].intr_name = "llvm.AMDGPU.bfe.i32";
         bld_base->op_actions[TGSI_OPCODE_IDIV].emit = emit_idiv;
         bld_base->op_actions[TGSI_OPCODE_IMAX].emit = emit_minmax_int;
@@ -835,7 +866,7 @@ void si_shader_context_init_alu(struct lp_build_tgsi_context *bld_base)
         bld_base->op_actions[TGSI_OPCODE_TRUNC].emit = build_tgsi_intrinsic_nomem;
         bld_base->op_actions[TGSI_OPCODE_TRUNC].intr_name = "llvm.trunc.f32";
         bld_base->op_actions[TGSI_OPCODE_UADD].emit = emit_uadd;
-       bld_base->op_actions[TGSI_OPCODE_UBFE].emit = build_tgsi_intrinsic_nomem;
+       bld_base->op_actions[TGSI_OPCODE_UBFE].emit = emit_bfe;
         bld_base->op_actions[TGSI_OPCODE_UBFE].intr_name = "llvm.AMDGPU.bfe.u32";
         bld_base->op_actions[TGSI_OPCODE_UDIV].emit = emit_udiv;
         bld_base->op_actions[TGSI_OPCODE_UMAX].emit = emit_minmax_int;
author	Nicolai Hähnle <nicolai.haehnle@amd.com>
	Thu, 27 Oct 2016 08:59:37 +0000 (10:59 +0200)
committer	Nicolai Hähnle <nicolai.haehnle@amd.com>
	Wed, 2 Nov 2016 11:30:11 +0000 (12:30 +0100)