radeonsi: Adapt to sample intrinsics changes.
[mesa.git] / src / gallium / drivers / radeonsi / radeonsi_shader.c
index ce2095c8652654998d7ebdc31beff1139c74b475..575cc67383a79e5eda4a301553dca91e67837f4e 100644 (file)
@@ -31,6 +31,7 @@
 #include "gallivm/lp_bld_const.h"
 #include "gallivm/lp_bld_gather.h"
 #include "gallivm/lp_bld_intr.h"
+#include "gallivm/lp_bld_logic.h"
 #include "gallivm/lp_bld_tgsi.h"
 #include "radeon_llvm.h"
 #include "radeon_llvm_emit.h"
@@ -477,6 +478,13 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
                if (cbuf >= 0 && cbuf < 8) {
                        struct r600_context *rctx = si_shader_ctx->rctx;
                        compressed = (si_shader_ctx->key.export_16bpc >> cbuf) & 0x1;
+
+                       if (compressed)
+                               si_shader_ctx->shader->spi_shader_col_format |=
+                                       V_028714_SPI_SHADER_FP16_ABGR << (4 * cbuf);
+                       else
+                               si_shader_ctx->shader->spi_shader_col_format |=
+                                       V_028714_SPI_SHADER_32_ABGR << (4 * cbuf);
                }
        }
 
@@ -495,7 +503,11 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
                                                LLVMInt32TypeInContext(base->gallivm->context),
                                                args, 2,
                                                LLVMReadNoneAttribute);
-                       args[chan + 7] = args[chan + 5];
+                       args[chan + 7] = args[chan + 5] =
+                               LLVMBuildBitCast(base->gallivm->builder,
+                                                args[chan + 5],
+                                                LLVMFloatTypeInContext(base->gallivm->context),
+                                                "");
                }
 
                /* Set COMPR flag */
@@ -546,6 +558,37 @@ static void si_llvm_emit_prologue(struct lp_build_tgsi_context *bld_base)
 }
 
 
+static void si_alpha_test(struct lp_build_tgsi_context *bld_base,
+                         unsigned index)
+{
+       struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+       struct gallivm_state *gallivm = bld_base->base.gallivm;
+
+       if (si_shader_ctx->key.alpha_func != PIPE_FUNC_NEVER) {
+               LLVMValueRef out_ptr = si_shader_ctx->radeon_bld.soa.outputs[index][3];
+               LLVMValueRef alpha_pass =
+                       lp_build_cmp(&bld_base->base,
+                                    si_shader_ctx->key.alpha_func,
+                                    LLVMBuildLoad(gallivm->builder, out_ptr, ""),
+                                    lp_build_const_float(gallivm, si_shader_ctx->key.alpha_ref));
+               LLVMValueRef arg =
+                       lp_build_select(&bld_base->base,
+                                       alpha_pass,
+                                       lp_build_const_float(gallivm, 1.0f),
+                                       lp_build_const_float(gallivm, -1.0f));
+
+               build_intrinsic(gallivm->builder,
+                               "llvm.AMDGPU.kill",
+                               LLVMVoidTypeInContext(gallivm->context),
+                               &arg, 1, 0);
+       } else {
+               build_intrinsic(gallivm->builder,
+                               "llvm.AMDGPU.kilp",
+                               LLVMVoidTypeInContext(gallivm->context),
+                               NULL, 0, 0);
+       }
+}
+
 /* XXX: This is partially implemented for VS only at this point.  It is not complete */
 static void si_llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
 {
@@ -555,14 +598,15 @@ static void si_llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
        struct lp_build_context * uint =
                                &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
        struct tgsi_parse_context *parse = &si_shader_ctx->parse;
+       LLVMValueRef args[9];
        LLVMValueRef last_args[9] = { 0 };
        unsigned color_count = 0;
        unsigned param_count = 0;
+       int depth_index = -1, stencil_index = -1;
 
        while (!tgsi_parse_end_of_tokens(parse)) {
                struct tgsi_full_declaration *d =
                                        &parse->FullToken.FullDeclaration;
-               LLVMValueRef args[9];
                unsigned target;
                unsigned index;
                int i;
@@ -595,9 +639,19 @@ static void si_llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
                        /* Select the correct target */
                        switch(d->Semantic.Name) {
                        case TGSI_SEMANTIC_PSIZE:
-                       case TGSI_SEMANTIC_POSITION:
                                target = V_008DFC_SQ_EXP_POS;
                                break;
+                       case TGSI_SEMANTIC_POSITION:
+                               if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) {
+                                       target = V_008DFC_SQ_EXP_POS;
+                                       break;
+                               } else {
+                                       depth_index = index;
+                                       continue;
+                               }
+                       case TGSI_SEMANTIC_STENCIL:
+                               stencil_index = index;
+                               continue;
                        case TGSI_SEMANTIC_COLOR:
                                if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) {
                        case TGSI_SEMANTIC_BCOLOR:
@@ -606,6 +660,10 @@ static void si_llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
                                        param_count++;
                                } else {
                                        target = V_008DFC_SQ_EXP_MRT + color_count;
+                                       if (color_count == 0 &&
+                                           si_shader_ctx->key.alpha_func != PIPE_FUNC_ALWAYS)
+                                               si_alpha_test(bld_base, index);
+
                                        color_count++;
                                }
                                break;
@@ -645,6 +703,52 @@ static void si_llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
                }
        }
 
+       if (depth_index >= 0 || stencil_index >= 0) {
+               LLVMValueRef out_ptr;
+               unsigned mask = 0;
+
+               /* Specify the target we are exporting */
+               args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_MRTZ);
+
+               if (depth_index >= 0) {
+                       out_ptr = si_shader_ctx->radeon_bld.soa.outputs[depth_index][2];
+                       args[5] = LLVMBuildLoad(base->gallivm->builder, out_ptr, "");
+                       mask |= 0x1;
+
+                       if (stencil_index < 0) {
+                               args[6] =
+                               args[7] =
+                               args[8] = args[5];
+                       }
+               }
+
+               if (stencil_index >= 0) {
+                       out_ptr = si_shader_ctx->radeon_bld.soa.outputs[stencil_index][1];
+                       args[7] =
+                       args[8] =
+                       args[6] = LLVMBuildLoad(base->gallivm->builder, out_ptr, "");
+                       mask |= 0x2;
+
+                       if (depth_index < 0)
+                               args[5] = args[6];
+               }
+
+               /* Specify which components to enable */
+               args[0] = lp_build_const_int32(base->gallivm, mask);
+
+               args[1] =
+               args[2] =
+               args[4] = uint->zero;
+
+               if (last_args[0])
+                       lp_build_intrinsic(base->gallivm->builder,
+                                          "llvm.SI.export",
+                                          LLVMVoidTypeInContext(base->gallivm->context),
+                                          args, 9);
+               else
+                       memcpy(last_args, args, sizeof(args));
+       }
+
        if (!last_args[0]) {
                assert(si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT);
 
@@ -662,6 +766,9 @@ static void si_llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
                last_args[6]= uint->zero;
                last_args[7]= uint->zero;
                last_args[8]= uint->zero;
+
+               si_shader_ctx->shader->spi_shader_col_format |=
+                       V_028714_SPI_SHADER_32_ABGR;
        }
 
        /* Specify whether the EXEC mask represents the valid mask */
@@ -684,9 +791,12 @@ static void tex_fetch_args(
        struct lp_build_tgsi_context * bld_base,
        struct lp_build_emit_data * emit_data)
 {
+       struct gallivm_state *gallivm = bld_base->base.gallivm;
        const struct tgsi_full_instruction * inst = emit_data->inst;
        LLVMValueRef ptr;
        LLVMValueRef offset;
+       LLVMValueRef coords[5];
+       unsigned chan;
 
        /* WriteMask */
        /* XXX: should be optimized using emit_data->inst->Dst[0].Register.WriteMask*/
@@ -694,27 +804,48 @@ static void tex_fetch_args(
 
        /* Coordinates */
        /* XXX: Not all sample instructions need 4 address arguments. */
-       if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
-               LLVMValueRef src_w;
-               unsigned chan;
-               LLVMValueRef coords[4];
-
-               emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
-               src_w = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W);
-
-               for (chan = 0; chan < 3; chan++ ) {
-                       LLVMValueRef arg = lp_build_emit_fetch(bld_base,
-                                                              emit_data->inst, 0, chan);
+       if (inst->Instruction.Opcode == TGSI_OPCODE_TXP)
+               coords[3] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W)
+;
+
+       for (chan = 0; chan < 3; chan++ ) {
+               coords[chan] = lp_build_emit_fetch(bld_base,
+                                                  emit_data->inst, 0,
+                                                  chan);
+               if (inst->Instruction.Opcode == TGSI_OPCODE_TXP)
                        coords[chan] = lp_build_emit_llvm_binary(bld_base,
                                                                 TGSI_OPCODE_DIV,
-                                                                arg, src_w);
-               }
-               coords[3] = bld_base->base.one;
-               emit_data->args[1] = lp_build_gather_values(bld_base->base.gallivm,
-                                                           coords, 4);
-       } else
-               emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
-                                                        0, LP_CHAN_ALL);
+                                                                coords[chan],
+                                                                coords[3]);
+       }
+
+       coords[3] = bld_base->base.one;
+
+       if (inst->Instruction.Opcode == TGSI_OPCODE_TEX2 ||
+               inst->Instruction.Opcode == TGSI_OPCODE_TXB2 ||
+               inst->Instruction.Opcode == TGSI_OPCODE_TXL2) {
+               /* These instructions have additional operand that should be packed
+                * into the cube coord vector by radeon_llvm_emit_prepare_cube_coords.
+                * That operand should be passed as a float value in the args array
+                * right after the coord vector. After packing it's not used anymore,
+                * that's why arg_count is not increased */
+               coords[4] = lp_build_emit_fetch(bld_base, inst, 1, 0);
+       }
+
+       if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
+            inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE) &&
+           inst->Instruction.Opcode != TGSI_OPCODE_TXQ) {
+               radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords);
+       }
+
+       for (chan = 0; chan < 4; chan++ ) {
+               coords[chan] = LLVMBuildBitCast(gallivm->builder,
+                                               coords[chan],
+                                               LLVMInt32TypeInContext(gallivm->context),
+                                               "");
+       }
+
+       emit_data->args[1] = lp_build_gather_values(gallivm, coords, 4);
 
        /* Resource */
        ptr = use_sgpr(bld_base->base.gallivm, SGPR_CONST_PTR_V8I32, SI_SGPR_RESOURCE);
@@ -731,12 +862,10 @@ static void tex_fetch_args(
                                                ptr, offset);
 
        /* Dimensions */
-       /* XXX: We might want to pass this information to the shader at some. */
-/*     emit_data->args[4] = lp_build_const_int32(bld_base->base.gallivm,
+       emit_data->args[4] = lp_build_const_int32(bld_base->base.gallivm,
                                        emit_data->inst->Texture.Texture);
-*/
 
-       emit_data->arg_count = 4;
+       emit_data->arg_count = 5;
        /* XXX: To optimize, we could use a float or v2f32, if the last bits of
         * the writemask are clear */
        emit_data->dst_type = LLVMVectorType(
@@ -747,7 +876,19 @@ static void tex_fetch_args(
 static const struct lp_build_tgsi_action tex_action = {
        .fetch_args = tex_fetch_args,
        .emit = lp_build_tgsi_intrinsic,
-       .intr_name = "llvm.SI.sample"
+       .intr_name = "llvm.SI.sample."
+};
+
+static const struct lp_build_tgsi_action txb_action = {
+       .fetch_args = tex_fetch_args,
+       .emit = lp_build_tgsi_intrinsic,
+       .intr_name = "llvm.SI.sampleb."
+};
+
+static const struct lp_build_tgsi_action txl_action = {
+       .fetch_args = tex_fetch_args,
+       .emit = lp_build_tgsi_intrinsic,
+       .intr_name = "llvm.SI.samplel."
 };
 
 
@@ -779,6 +920,11 @@ int si_pipe_shader_create(
        bld_base = &si_shader_ctx.radeon_bld.soa.bld_base;
 
        tgsi_scan_shader(sel->tokens, &shader_info);
+       if (shader_info.indirect_files != 0) {
+               fprintf(stderr, "Indirect addressing not fully handled yet\n");
+               return -ENOSYS;
+       }
+
        shader->shader.uses_kill = shader_info.uses_kill;
        bld_base->info = &shader_info;
        bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = fetch_constant;
@@ -786,6 +932,8 @@ int si_pipe_shader_create(
        bld_base->emit_epilogue = si_llvm_emit_epilogue;
 
        bld_base->op_actions[TGSI_OPCODE_TEX] = tex_action;
+       bld_base->op_actions[TGSI_OPCODE_TXB] = txb_action;
+       bld_base->op_actions[TGSI_OPCODE_TXL] = txl_action;
        bld_base->op_actions[TGSI_OPCODE_TXP] = tex_action;
 
        si_shader_ctx.radeon_bld.load_input = declare_input;