radeon/llvm: add SET_GRADIENTS*, fix SAMPLE_G
authorVadim Girlin <vadimgirlin@gmail.com>
Tue, 15 May 2012 14:53:06 +0000 (18:53 +0400)
committerVadim Girlin <vadimgirlin@gmail.com>
Tue, 15 May 2012 14:53:06 +0000 (18:53 +0400)
Signed-off-by: Vadim Girlin <vadimgirlin@gmail.com>
Reviewed-by: Tom Stellard <thomas.stellard@amd.com>
src/gallium/drivers/r600/r600_llvm.c
src/gallium/drivers/radeon/AMDGPUIntrinsics.td
src/gallium/drivers/radeon/AMDGPUUtil.cpp
src/gallium/drivers/radeon/R600ISelLowering.cpp
src/gallium/drivers/radeon/R600Instructions.td
src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c

index adcc24f0ca309ed1962db2085c11eae0416ce966..2ca838f87fa130c7fd6ba0acdd9a7cdf0009acc0 100644 (file)
@@ -5,6 +5,7 @@
 #include "gallivm/lp_bld_gather.h"
 #include "tgsi/tgsi_parse.h"
 #include "util/u_double_list.h"
+#include "util/u_memory.h"
 
 #include "r600.h"
 #include "r600_asm.h"
@@ -142,16 +143,22 @@ static void llvm_emit_tex(
        struct lp_build_emit_data * emit_data)
 {
        struct gallivm_state * gallivm = bld_base->base.gallivm;
-       LLVMValueRef args[3];
+       LLVMValueRef args[6];
+       unsigned c;
 
-       args[0] = emit_data->args[0];
-       args[1] = lp_build_const_int32(gallivm,
+       assert(emit_data->arg_count + 2 <= Elements(args));
+
+       for (c = 0; c < emit_data->arg_count; ++c)
+               args[c] = emit_data->args[c];
+
+       args[c++] = lp_build_const_int32(gallivm,
                                        emit_data->inst->Src[1].Register.Index);
-       args[2] = lp_build_const_int32(gallivm,
+       args[c++] = lp_build_const_int32(gallivm,
                                        emit_data->inst->Texture.Texture);
+
        emit_data->output[0] = build_intrinsic(gallivm->builder,
                                        action->intr_name,
-                                       emit_data->dst_type, args, 3, LLVMReadNoneAttribute);
+                                       emit_data->dst_type, args, c, LLVMReadNoneAttribute);
 }
 
 static void dp_fetch_args(
index 1b6d8023baeb3a82088deb9c9328e67e4695bffa..a7c5e96e3f1f654e8432a4e4103988b2b6bbebf1 100644 (file)
@@ -44,7 +44,7 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in {
   def int_AMDGPU_txb : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
   def int_AMDGPU_txf : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
   def int_AMDGPU_txq : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_AMDGPU_txd : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_AMDGPU_txd : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
   def int_AMDGPU_txl : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
   def int_AMDGPU_trunc : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
   def int_AMDGPU_ddx : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
index f0621384dbc0899a55440f220076993254bcfaa5..20831a613b7d54bfd7f3b5d198b042fa365fd86f 100644 (file)
@@ -83,6 +83,8 @@ bool AMDGPU::isTexOp(unsigned opcode)
   case AMDIL::TEX_SAMPLE_C_G:
   case AMDIL::TEX_GET_GRADIENTS_H:
   case AMDIL::TEX_GET_GRADIENTS_V:
+  case AMDIL::TEX_SET_GRADIENTS_H:
+  case AMDIL::TEX_SET_GRADIENTS_V:
     return true;
   }
 }
index e85ac31b34c59681f9b5b1ac11d11824f3c8ebdc..4db40f799ede6e79f7e44d55ac6d07e6d0014d54 100644 (file)
@@ -131,6 +131,53 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
       MFI->ReservedRegs.push_back(ReservedReg);
       break;
     }
+
+  case AMDIL::TXD:
+    {
+      unsigned t0 = MRI.createVirtualRegister(AMDIL::R600_Reg128RegisterClass);
+      unsigned t1 = MRI.createVirtualRegister(AMDIL::R600_Reg128RegisterClass);
+
+      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDIL::TEX_SET_GRADIENTS_H), t0)
+              .addOperand(MI->getOperand(3))
+              .addOperand(MI->getOperand(4))
+              .addOperand(MI->getOperand(5));
+      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDIL::TEX_SET_GRADIENTS_V), t1)
+              .addOperand(MI->getOperand(2))
+              .addOperand(MI->getOperand(4))
+              .addOperand(MI->getOperand(5));
+      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDIL::TEX_SAMPLE_G))
+              .addOperand(MI->getOperand(0))
+              .addOperand(MI->getOperand(1))
+              .addOperand(MI->getOperand(4))
+              .addOperand(MI->getOperand(5))
+              .addReg(t0, RegState::Implicit)
+              .addReg(t1, RegState::Implicit);
+      break;
+    }
+  case AMDIL::TXD_SHADOW:
+    {
+      unsigned t0 = MRI.createVirtualRegister(AMDIL::R600_Reg128RegisterClass);
+      unsigned t1 = MRI.createVirtualRegister(AMDIL::R600_Reg128RegisterClass);
+
+      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDIL::TEX_SET_GRADIENTS_H), t0)
+              .addOperand(MI->getOperand(3))
+              .addOperand(MI->getOperand(4))
+              .addOperand(MI->getOperand(5));
+      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDIL::TEX_SET_GRADIENTS_V), t1)
+              .addOperand(MI->getOperand(2))
+              .addOperand(MI->getOperand(4))
+              .addOperand(MI->getOperand(5));
+      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDIL::TEX_SAMPLE_C_G))
+              .addOperand(MI->getOperand(0))
+              .addOperand(MI->getOperand(1))
+              .addOperand(MI->getOperand(4))
+              .addOperand(MI->getOperand(5))
+              .addReg(t0, RegState::Implicit)
+              .addReg(t1, RegState::Implicit);
+      break;
+    }
+
+
   }
 
   MI->eraseFromParent();
index e145b9c8e942b9bd7f1c01a3e104c88545a8f29d..c323578886780f06d923bb95549fba2ad7e257c6 100644 (file)
@@ -448,6 +448,16 @@ def TEX_GET_GRADIENTS_V : R600_TEX <
   [(set R600_Reg128:$dst, (int_AMDGPU_ddy R600_Reg128:$src0, imm:$src1, imm:$src2))]
 >;
 
+def TEX_SET_GRADIENTS_H : R600_TEX <
+  0x0B, "TEX_SET_GRADIENTS_H",
+  []
+>;
+
+def TEX_SET_GRADIENTS_V : R600_TEX <
+  0x0C, "TEX_SET_GRADIENTS_V",
+  []
+>;
+
 def TEX_SAMPLE : R600_TEX <
   0x10, "TEX_SAMPLE",
   [(set R600_Reg128:$dst, (int_AMDGPU_tex R600_Reg128:$src0, imm:$src1, imm:$src2))]
@@ -480,12 +490,12 @@ def TEX_SAMPLE_C_LB : R600_TEX <
 
 def TEX_SAMPLE_G : R600_TEX <
   0x14, "TEX_SAMPLE_G",
-  [(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, imm:$src1, imm:$src2))]
+  []
 >;
 
 def TEX_SAMPLE_C_G : R600_TEX <
   0x1C, "TEX_SAMPLE_C_G",
-  [(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, imm:$src1, TEX_SHADOW:$src2))]
+  []
 >;
 
 } // End Gen R600_CAYMAN
@@ -1009,6 +1019,20 @@ def STORE_OUTPUT: AMDGPUShaderInst <
   [(int_AMDGPU_store_output R600_Reg32:$src0, imm:$src1)]
 >;
 
+def TXD: AMDGPUShaderInst <
+  (outs R600_Reg128:$dst),
+  (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$src3, i32imm:$src4),
+  "TXD $dst, $src0, $src1, $src2, $src3, $src4",
+  [(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, imm:$src3, imm:$src4))]
+>;
+
+def TXD_SHADOW: AMDGPUShaderInst <
+  (outs R600_Reg128:$dst),
+  (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$src3, i32imm:$src4),
+  "TXD_SHADOW $dst, $src0, $src1, $src2, $src3, $src4",
+  [(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, imm:$src3, TEX_SHADOW:$src4))]
+>;
+
 } // End usesCustomInserter = 1, isPseudo = 1
 
 } // End isCodeGenOnly = 1
index cbe052d74b6ee0b28b30316a29b40a4dd2330761..44de04295d848092ecb0080528e179258f1fe1d2 100644 (file)
@@ -562,6 +562,25 @@ static void emit_prepare_cube_coords(
                                                coords, 4);
 }
 
+static void txd_fetch_args(
+       struct lp_build_tgsi_context * bld_base,
+       struct lp_build_emit_data * emit_data)
+{
+       const struct tgsi_full_instruction * inst = emit_data->inst;
+
+       LLVMValueRef coords[4];
+       unsigned chan, src;
+       for (src = 0; src < 3; src++) {
+               for (chan = 0; chan < 4; chan++)
+                       coords[chan] = lp_build_emit_fetch(bld_base, inst, src, chan);
+
+               emit_data->args[src] = lp_build_gather_values(bld_base->base.gallivm,
+                               coords, 4);
+       }
+       emit_data->arg_count = 3;
+       emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
+}
+
 static void txp_fetch_args(
        struct lp_build_tgsi_context * bld_base,
        struct lp_build_emit_data * emit_data)
@@ -1086,7 +1105,7 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
        bld_base->op_actions[TGSI_OPCODE_TEX].intr_name = "llvm.AMDGPU.tex";
        bld_base->op_actions[TGSI_OPCODE_TXB].fetch_args = tex_fetch_args;
        bld_base->op_actions[TGSI_OPCODE_TXB].intr_name = "llvm.AMDGPU.txb";
-       bld_base->op_actions[TGSI_OPCODE_TXD].fetch_args = tex_fetch_args;
+       bld_base->op_actions[TGSI_OPCODE_TXD].fetch_args = txd_fetch_args;
        bld_base->op_actions[TGSI_OPCODE_TXD].intr_name = "llvm.AMDGPU.txd";
        bld_base->op_actions[TGSI_OPCODE_TXL].fetch_args = tex_fetch_args;
        bld_base->op_actions[TGSI_OPCODE_TXL].intr_name = "llvm.AMDGPU.txl";