radeonsi/gfx10: add si_context::emit_cache_flush
[mesa.git] / src / gallium / drivers / radeonsi / si_shader_tgsi_alu.c
index 207f552d2d9d8d5d4ffd9b5e375a862e3ee7433f..adad3223d99a83a55800ae9fb6290554fb6e33b9 100644 (file)
  */
 
 #include "si_shader_internal.h"
-#include "gallivm/lp_bld_const.h"
-#include "gallivm/lp_bld_intr.h"
-#include "gallivm/lp_bld_gather.h"
-#include "tgsi/tgsi_parse.h"
-#include "amd/common/ac_llvm_build.h"
-
-static void kill_if_fetch_args(struct lp_build_tgsi_context *bld_base,
-                              struct lp_build_emit_data *emit_data)
-{
-       const struct tgsi_full_instruction *inst = emit_data->inst;
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       LLVMBuilderRef builder = ctx->ac.builder;
-       unsigned i;
-       LLVMValueRef conds[TGSI_NUM_CHANNELS];
-
-       for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
-               LLVMValueRef value = lp_build_emit_fetch(bld_base, inst, 0, i);
-               conds[i] = LLVMBuildFCmp(builder, LLVMRealOGE, value,
-                                       ctx->ac.f32_0, "");
-       }
-
-       /* And the conditions together */
-       for (i = TGSI_NUM_CHANNELS - 1; i > 0; i--) {
-               conds[i - 1] = LLVMBuildAnd(builder, conds[i], conds[i - 1], "");
-       }
-
-       emit_data->dst_type = ctx->voidt;
-       emit_data->arg_count = 1;
-       emit_data->args[0] = conds[0];
-}
+#include "ac_llvm_util.h"
 
 void si_llvm_emit_kill(struct ac_shader_abi *abi, LLVMValueRef visible)
 {
@@ -60,11 +31,9 @@ void si_llvm_emit_kill(struct ac_shader_abi *abi, LLVMValueRef visible)
        LLVMBuilderRef builder = ctx->ac.builder;
 
        if (ctx->shader->selector->force_correct_derivs_after_kill) {
-               /* LLVM 6.0 can kill immediately while maintaining WQM. */
-               if (HAVE_LLVM >= 0x0600) {
-                       ac_build_kill_if_false(&ctx->ac,
-                                              ac_build_wqm_vote(&ctx->ac, visible));
-               }
+               /* Kill immediately while maintaining WQM. */
+               ac_build_kill_if_false(&ctx->ac,
+                                      ac_build_wqm_vote(&ctx->ac, visible));
 
                LLVMValueRef mask = LLVMBuildLoad(builder, ctx->postponed_kill, "");
                mask = LLVMBuildAnd(builder, mask, visible, "");
@@ -83,10 +52,27 @@ static void kil_emit(const struct lp_build_tgsi_action *action,
        LLVMValueRef visible;
 
        if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_KILL_IF) {
-               visible = emit_data->args[0];
+               const struct tgsi_full_instruction *inst = emit_data->inst;
+               struct si_shader_context *ctx = si_shader_context(bld_base);
+               LLVMBuilderRef builder = ctx->ac.builder;
+               unsigned i;
+               LLVMValueRef conds[TGSI_NUM_CHANNELS];
+
+               for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
+                       LLVMValueRef value = lp_build_emit_fetch(bld_base, inst, 0, i);
+                       /* UGE because NaN shouldn't get killed */
+                       conds[i] = LLVMBuildFCmp(builder, LLVMRealUGE, value,
+                                               ctx->ac.f32_0, "");
+               }
+
+               /* And the conditions together */
+               for (i = TGSI_NUM_CHANNELS - 1; i > 0; i--) {
+                       conds[i - 1] = LLVMBuildAnd(builder, conds[i], conds[i - 1], "");
+               }
+               visible = conds[0];
        } else {
                assert(emit_data->inst->Instruction.Opcode == TGSI_OPCODE_KILL);
-               visible = LLVMConstInt(ctx->i1, false, 0);
+               visible = ctx->i1false;
        }
 
        si_llvm_emit_kill(&ctx->abi, visible);
@@ -248,7 +234,9 @@ static void emit_arl(const struct lp_build_tgsi_action *action,
                     struct lp_build_emit_data *emit_data)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
-       LLVMValueRef floor_index =  lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_FLR, emit_data->args[0]);
+       LLVMValueRef floor_index =
+               ac_build_intrinsic(&ctx->ac, "llvm.floor.f32", ctx->f32,
+                                  &emit_data->args[0], 1, AC_FUNC_ATTR_READNONE);
        emit_data->output[emit_data->chan] = LLVMBuildFPToSI(ctx->ac.builder,
                        floor_index, ctx->i32, "");
 }
@@ -453,9 +441,9 @@ build_tgsi_intrinsic_nomem(const struct lp_build_tgsi_action *action,
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
        emit_data->output[emit_data->chan] =
-               lp_build_intrinsic(ctx->ac.builder, action->intr_name,
+               ac_build_intrinsic(&ctx->ac, action->intr_name,
                                   emit_data->dst_type, emit_data->args,
-                                  emit_data->arg_count, LP_FUNC_ATTR_READNONE);
+                                  emit_data->arg_count, AC_FUNC_ATTR_READNONE);
 }
 
 static void emit_bfi(const struct lp_build_tgsi_action *action,
@@ -507,18 +495,24 @@ static void emit_bfe(const struct lp_build_tgsi_action *action,
                     struct lp_build_emit_data *emit_data)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
-       LLVMValueRef bfe_sm5;
-       LLVMValueRef cond;
 
-       bfe_sm5 = ac_build_bfe(&ctx->ac, emit_data->args[0],
-                              emit_data->args[1], emit_data->args[2],
-                              emit_data->info->opcode == TGSI_OPCODE_IBFE);
+       /* FIXME: LLVM 7 returns incorrect result when count is 0.
+        * https://bugs.freedesktop.org/show_bug.cgi?id=107276
+        */
+       LLVMValueRef zero = ctx->i32_0;
+       LLVMValueRef bfe_sm5 =
+               ac_build_bfe(&ctx->ac, emit_data->args[0],
+                            emit_data->args[1], emit_data->args[2],
+                            emit_data->info->opcode == TGSI_OPCODE_IBFE);
 
        /* Correct for GLSL semantics. */
-       cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntUGE, emit_data->args[2],
-                            LLVMConstInt(ctx->i32, 32, 0), "");
+       LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntUGE, emit_data->args[2],
+                                         LLVMConstInt(ctx->i32, 32, 0), "");
+       LLVMValueRef cond2 = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, emit_data->args[2],
+                                          zero, "");
+       bfe_sm5 = LLVMBuildSelect(ctx->ac.builder, cond, emit_data->args[0], bfe_sm5, "");
        emit_data->output[emit_data->chan] =
-               LLVMBuildSelect(ctx->ac.builder, cond, emit_data->args[0], bfe_sm5, "");
+               LLVMBuildSelect(ctx->ac.builder, cond2, zero, bfe_sm5, "");
 }
 
 /* this is ffs in C */
@@ -560,10 +554,8 @@ static void emit_iabs(const struct lp_build_tgsi_action *action,
        struct si_shader_context *ctx = si_shader_context(bld_base);
 
        emit_data->output[emit_data->chan] =
-               lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_IMAX,
-                                         emit_data->args[0],
-                                         LLVMBuildNeg(ctx->ac.builder,
-                                                      emit_data->args[0], ""));
+               ac_build_imax(&ctx->ac,  emit_data->args[0],
+                             LLVMBuildNeg(ctx->ac.builder, emit_data->args[0], ""));
 }
 
 static void emit_minmax_int(const struct lp_build_tgsi_action *action,
@@ -602,34 +594,25 @@ static void emit_minmax_int(const struct lp_build_tgsi_action *action,
                                emit_data->args[1], "");
 }
 
-static void pk2h_fetch_args(struct lp_build_tgsi_context *bld_base,
-                           struct lp_build_emit_data *emit_data)
-{
-       emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
-                                                0, TGSI_CHAN_X);
-       emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
-                                                0, TGSI_CHAN_Y);
-}
-
 static void emit_pk2h(const struct lp_build_tgsi_action *action,
                      struct lp_build_tgsi_context *bld_base,
                      struct lp_build_emit_data *emit_data)
 {
+       struct si_shader_context *ctx = si_shader_context(bld_base);
+       LLVMValueRef v[] = {
+               lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_X),
+               lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_Y),
+       };
+
+
        /* From the GLSL 4.50 spec:
         *   "The rounding mode cannot be set and is undefined."
         *
         * v_cvt_pkrtz_f16 rounds to zero, but it's fastest.
         */
        emit_data->output[emit_data->chan] =
-               ac_build_cvt_pkrtz_f16(&si_shader_context(bld_base)->ac,
-                                      emit_data->args);
-}
-
-static void up2h_fetch_args(struct lp_build_tgsi_context *bld_base,
-                           struct lp_build_emit_data *emit_data)
-{
-       emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
-                                                0, TGSI_CHAN_X);
+               LLVMBuildBitCast(ctx->ac.builder, ac_build_cvt_pkrtz_f16(&ctx->ac, v),
+                                ctx->i32, "");
 }
 
 static void emit_up2h(const struct lp_build_tgsi_action *action,
@@ -643,7 +626,7 @@ static void emit_up2h(const struct lp_build_tgsi_action *action,
 
        i16 = LLVMInt16TypeInContext(ctx->ac.context);
        const16 = LLVMConstInt(ctx->i32, 16, 0);
-       input = emit_data->args[0];
+       input = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_X);
 
        for (i = 0; i < 2; i++) {
                val = i == 1 ? LLVMBuildLShr(ctx->ac.builder, input, const16, "") : input;
@@ -672,19 +655,11 @@ static void emit_rsq(const struct lp_build_tgsi_action *action,
        struct si_shader_context *ctx = si_shader_context(bld_base);
 
        LLVMValueRef sqrt =
-               lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_SQRT,
-                                        emit_data->args[0]);
+               ac_build_intrinsic(&ctx->ac, "llvm.sqrt.f32", ctx->f32,
+                                  &emit_data->args[0], 1, AC_FUNC_ATTR_READNONE);
 
        emit_data->output[emit_data->chan] =
-               lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_DIV,
-                                         ctx->ac.f32_1, sqrt);
-}
-
-static void dfracexp_fetch_args(struct lp_build_tgsi_context *bld_base,
-                               struct lp_build_emit_data *emit_data)
-{
-       emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_X);
-       emit_data->arg_count = 1;
+               ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, sqrt);
 }
 
 static void dfracexp_emit(const struct lp_build_tgsi_action *action,
@@ -692,13 +667,14 @@ static void dfracexp_emit(const struct lp_build_tgsi_action *action,
                          struct lp_build_emit_data *emit_data)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
+       LLVMValueRef in = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_X);
 
        emit_data->output[emit_data->chan] =
-               lp_build_intrinsic(ctx->ac.builder, "llvm.amdgcn.frexp.mant.f64",
-                                  ctx->ac.f64, &emit_data->args[0], 1, 0);
+               ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.frexp.mant.f64",
+                                  ctx->ac.f64, &in, 1, 0);
        emit_data->output1[emit_data->chan] =
-               lp_build_intrinsic(ctx->ac.builder, "llvm.amdgcn.frexp.exp.i32.f64",
-                                  ctx->ac.i32, &emit_data->args[0], 1, 0);
+               ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.frexp.exp.i32.f64",
+                                  ctx->ac.i32, &in, 1, 0);
 }
 
 void si_shader_context_init_alu(struct lp_build_tgsi_context *bld_base)
@@ -739,7 +715,6 @@ void si_shader_context_init_alu(struct lp_build_tgsi_context *bld_base)
        bld_base->op_actions[TGSI_OPCODE_DSQRT].intr_name = "llvm.sqrt.f64";
        bld_base->op_actions[TGSI_OPCODE_DTRUNC].emit = build_tgsi_intrinsic_nomem;
        bld_base->op_actions[TGSI_OPCODE_DTRUNC].intr_name = "llvm.trunc.f64";
-       bld_base->op_actions[TGSI_OPCODE_DFRACEXP].fetch_args = dfracexp_fetch_args;
        bld_base->op_actions[TGSI_OPCODE_DFRACEXP].emit = dfracexp_emit;
        bld_base->op_actions[TGSI_OPCODE_DLDEXP].emit = build_tgsi_intrinsic_nomem;
        bld_base->op_actions[TGSI_OPCODE_DLDEXP].intr_name = "llvm.amdgcn.ldexp.f64";
@@ -768,7 +743,6 @@ void si_shader_context_init_alu(struct lp_build_tgsi_context *bld_base)
        bld_base->op_actions[TGSI_OPCODE_ISLT].emit = emit_icmp;
        bld_base->op_actions[TGSI_OPCODE_ISSG].emit = emit_ssg;
        bld_base->op_actions[TGSI_OPCODE_I2F].emit = emit_i2f;
-       bld_base->op_actions[TGSI_OPCODE_KILL_IF].fetch_args = kill_if_fetch_args;
        bld_base->op_actions[TGSI_OPCODE_KILL_IF].emit = kil_emit;
        bld_base->op_actions[TGSI_OPCODE_KILL].emit = kil_emit;
        bld_base->op_actions[TGSI_OPCODE_LDEXP].emit = build_tgsi_intrinsic_nomem;
@@ -784,7 +758,6 @@ void si_shader_context_init_alu(struct lp_build_tgsi_context *bld_base)
        bld_base->op_actions[TGSI_OPCODE_UMSB].emit = emit_umsb;
        bld_base->op_actions[TGSI_OPCODE_NOT].emit = emit_not;
        bld_base->op_actions[TGSI_OPCODE_OR].emit = emit_or;
-       bld_base->op_actions[TGSI_OPCODE_PK2H].fetch_args = pk2h_fetch_args;
        bld_base->op_actions[TGSI_OPCODE_PK2H].emit = emit_pk2h;
        bld_base->op_actions[TGSI_OPCODE_POPC].emit = build_tgsi_intrinsic_nomem;
        bld_base->op_actions[TGSI_OPCODE_POPC].intr_name = "llvm.ctpop.i32";
@@ -821,7 +794,6 @@ void si_shader_context_init_alu(struct lp_build_tgsi_context *bld_base)
        bld_base->op_actions[TGSI_OPCODE_U2F].emit = emit_u2f;
        bld_base->op_actions[TGSI_OPCODE_XOR].emit = emit_xor;
        bld_base->op_actions[TGSI_OPCODE_UCMP].emit = emit_ucmp;
-       bld_base->op_actions[TGSI_OPCODE_UP2H].fetch_args = up2h_fetch_args;
        bld_base->op_actions[TGSI_OPCODE_UP2H].emit = emit_up2h;
 
        bld_base->op_actions[TGSI_OPCODE_I64MAX].emit = emit_minmax_int;