r600g: fix RSQ of negative value on Cayman
[mesa.git] / src / gallium / drivers / r600 / r600_llvm.c
index f916604db7bd132ed9f2c84206ff0f15038c2fa6..71ea57812204384fecf0fabbd6c5a948e20bc7c8 100644 (file)
@@ -5,6 +5,7 @@
 #include "gallivm/lp_bld_gather.h"
 #include "tgsi/tgsi_parse.h"
 #include "util/u_double_list.h"
+#include "util/u_memory.h"
 
 #include "r600.h"
 #include "r600_asm.h"
 
 #include <stdio.h>
 
+#if defined R600_USE_LLVM || defined HAVE_OPENCL
+
 static LLVMValueRef llvm_fetch_const(
        struct lp_build_tgsi_context * bld_base,
        const struct tgsi_full_src_register *reg,
        enum tgsi_opcode_type type,
        unsigned swizzle)
 {
-       LLVMValueRef cval = lp_build_intrinsic_unary(bld_base->base.gallivm->builder,
+       LLVMValueRef idx = lp_build_const_int32(bld_base->base.gallivm,
+                       radeon_llvm_reg_index_soa(reg->Register.Index, swizzle));
+       LLVMValueRef cval = build_intrinsic(bld_base->base.gallivm->builder,
                "llvm.AMDGPU.load.const", bld_base->base.elem_type,
-               lp_build_const_int32(bld_base->base.gallivm,
-               radeon_llvm_reg_index_soa(reg->Register.Index, swizzle)));
+               &idx, 1, LLVMReadNoneAttribute);
 
        return bitcast(bld_base, type, cval);
 }
@@ -44,10 +48,11 @@ static void llvm_load_system_value(
 
        LLVMValueRef reg = lp_build_const_int32(
                        ctx->soa.bld_base.base.gallivm, chan);
-       ctx->system_values[index] = lp_build_intrinsic_unary(
+       ctx->system_values[index] = build_intrinsic(
                        ctx->soa.bld_base.base.gallivm->builder,
                        "llvm.R600.load.input",
-                       ctx->soa.bld_base.base.elem_type, reg);
+                       ctx->soa.bld_base.base.elem_type, &reg, 1,
+                       LLVMReadNoneAttribute);
 }
 
 static LLVMValueRef llvm_fetch_system_value(
@@ -76,10 +81,11 @@ static void llvm_load_input(
                LLVMValueRef reg = lp_build_const_int32(
                                ctx->soa.bld_base.base.gallivm,
                                soa_index + (ctx->reserved_reg_count * 4));
-               ctx->inputs[soa_index] = lp_build_intrinsic_unary(
+               ctx->inputs[soa_index] = build_intrinsic(
                                ctx->soa.bld_base.base.gallivm->builder,
                                "llvm.R600.load.input",
-                               ctx->soa.bld_base.base.elem_type, reg);
+                               ctx->soa.bld_base.base.elem_type, &reg, 1,
+                               LLVMReadNoneAttribute);
        }
 }
 
@@ -139,16 +145,38 @@ static void llvm_emit_tex(
        struct lp_build_emit_data * emit_data)
 {
        struct gallivm_state * gallivm = bld_base->base.gallivm;
-       LLVMValueRef args[3];
+       LLVMValueRef args[6];
+       unsigned c, sampler_src;
+
+       assert(emit_data->arg_count + 2 <= Elements(args));
+
+       for (c = 0; c < emit_data->arg_count; ++c)
+               args[c] = emit_data->args[c];
 
-       args[0] = emit_data->args[0];
-       args[1] = lp_build_const_int32(gallivm,
-                                       emit_data->inst->Src[1].Register.Index);
-       args[2] = lp_build_const_int32(gallivm,
+       sampler_src = emit_data->inst->Instruction.NumSrcRegs-1;
+
+       args[c++] = lp_build_const_int32(gallivm,
+                                       emit_data->inst->Src[sampler_src].Register.Index);
+       args[c++] = lp_build_const_int32(gallivm,
                                        emit_data->inst->Texture.Texture);
-       emit_data->output[0] = lp_build_intrinsic(gallivm->builder,
+
+       emit_data->output[0] = build_intrinsic(gallivm->builder,
                                        action->intr_name,
-                                       emit_data->dst_type, args, 3);
+                                       emit_data->dst_type, args, c, LLVMReadNoneAttribute);
+}
+
+static void emit_cndlt(
+               const struct lp_build_tgsi_action * action,
+               struct lp_build_tgsi_context * bld_base,
+               struct lp_build_emit_data * emit_data)
+{
+       LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+       LLVMValueRef float_zero = lp_build_const_float(
+               bld_base->base.gallivm, 0.0f);
+       LLVMValueRef cmp = LLVMBuildFCmp(
+               builder, LLVMRealULT, emit_data->args[0], float_zero, "");
+       emit_data->output[emit_data->chan] = LLVMBuildSelect(builder,
+               cmp, emit_data->args[1], emit_data->args[2], "");
 }
 
 static void dp_fetch_args(
@@ -189,7 +217,7 @@ static void dp_fetch_args(
 
 static struct lp_build_tgsi_action dot_action = {
        .fetch_args = dp_fetch_args,
-       .emit = lp_build_tgsi_intrinsic,
+       .emit = build_tgsi_intrinsic_nomem,
        .intr_name = "llvm.AMDGPU.dp4"
 };
 
@@ -227,6 +255,7 @@ LLVMModuleRef r600_tgsi_llvm(
        bld_base->op_actions[TGSI_OPCODE_TXF].emit = llvm_emit_tex;
        bld_base->op_actions[TGSI_OPCODE_TXQ].emit = llvm_emit_tex;
        bld_base->op_actions[TGSI_OPCODE_TXP].emit = llvm_emit_tex;
+       bld_base->op_actions[TGSI_OPCODE_CMP].emit = emit_cndlt;
 
        lp_build_tgsi_llvm(bld_base, tokens);
 
@@ -247,6 +276,8 @@ const char * r600_llvm_gpu_string(enum radeon_family family)
        case CHIP_RV635:
        case CHIP_RS780:
        case CHIP_RS880:
+               gpu_family = "r600";
+               break;
        case CHIP_RV710:
                gpu_family = "rv710";
                break;
@@ -307,3 +338,5 @@ unsigned r600_llvm_compile(
        return radeon_llvm_compile(mod, inst_bytes, inst_byte_count,
                                                        gpu_family, dump);
 }
+
+#endif