radeon/llvm: Move lowering of SETCC node to R600ISelLowering

[mesa.git] / src / gallium / drivers / radeon / R600ISelLowering.cpp
diff --git a/src/gallium/drivers/radeon/R600ISelLowering.cpp b/src/gallium/drivers/radeon/R600ISelLowering.cpp

index 4db40f799ede6e79f7e44d55ac6d07e6d0014d54..9c92498bfdccbe4e412e1f529a8fd261fa4e3cab 100644 (file)
--- a/src/gallium/drivers/radeon/R600ISelLowering.cpp
+++ b/src/gallium/drivers/radeon/R600ISelLowering.cpp
@@ -7,12 +7,13 @@
  //
  //===----------------------------------------------------------------------===//
  //
-// Most of the DAG lowering is handled in AMDILISelLowering.cpp.  This file
+// Most of the DAG lowering is handled in AMDGPUISelLowering.cpp.  This file
  // is mostly EmitInstrWithCustomInserter().
  //
  //===----------------------------------------------------------------------===//
  
  #include "R600ISelLowering.h"
+#include "AMDGPUUtil.h"
  #include "R600InstrInfo.h"
  #include "R600MachineFunctionInfo.h"
  #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -24,16 +25,22 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
      TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo()))
  {
    setOperationAction(ISD::MUL, MVT::i64, Expand);
-//  setSchedulingPreference(Sched::VLIW);
-  addRegisterClass(MVT::v4f32, &AMDIL::R600_Reg128RegClass);
-  addRegisterClass(MVT::f32, &AMDIL::R600_Reg32RegClass);
-  addRegisterClass(MVT::v4i32, &AMDIL::R600_Reg128RegClass);
-  addRegisterClass(MVT::i32, &AMDIL::R600_Reg32RegClass);
-
-  setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal);
-  setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Legal);
-  setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal);
-  setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Legal);
+  addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
+  addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
+  addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
+  addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
+  computeRegisterProperties();
+
+  setOperationAction(ISD::FSUB, MVT::f32, Expand);
+
+  setOperationAction(ISD::ROTL, MVT::i32, Custom);
+
+  setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
+  setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
+
+  setOperationAction(ISD::SETCC, MVT::i32, Custom);
+
+  setSchedulingPreference(Sched::VLIW);
  }
  
  MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
@@ -45,75 +52,131 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
  
    switch (MI->getOpcode()) {
    default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
-  case AMDIL::TGID_X:
-    addLiveIn(MI, MF, MRI, TII, AMDIL::T1_X);
+  case AMDGPU::TGID_X:
+    addLiveIn(MI, MF, MRI, TII, AMDGPU::T1_X);
      break;
-  case AMDIL::TGID_Y:
-    addLiveIn(MI, MF, MRI, TII, AMDIL::T1_Y);
+  case AMDGPU::TGID_Y:
+    addLiveIn(MI, MF, MRI, TII, AMDGPU::T1_Y);
      break;
-  case AMDIL::TGID_Z:
-    addLiveIn(MI, MF, MRI, TII, AMDIL::T1_Z);
+  case AMDGPU::TGID_Z:
+    addLiveIn(MI, MF, MRI, TII, AMDGPU::T1_Z);
      break;
-  case AMDIL::TIDIG_X:
-    addLiveIn(MI, MF, MRI, TII, AMDIL::T0_X);
+  case AMDGPU::TIDIG_X:
+    addLiveIn(MI, MF, MRI, TII, AMDGPU::T0_X);
      break;
-  case AMDIL::TIDIG_Y:
-    addLiveIn(MI, MF, MRI, TII, AMDIL::T0_Y);
+  case AMDGPU::TIDIG_Y:
+    addLiveIn(MI, MF, MRI, TII, AMDGPU::T0_Y);
      break;
-  case AMDIL::TIDIG_Z:
-    addLiveIn(MI, MF, MRI, TII, AMDIL::T0_Z);
+  case AMDGPU::TIDIG_Z:
+    addLiveIn(MI, MF, MRI, TII, AMDGPU::T0_Z);
      break;
-  case AMDIL::NGROUPS_X:
+  case AMDGPU::NGROUPS_X:
      lowerImplicitParameter(MI, *BB, MRI, 0);
      break;
-  case AMDIL::NGROUPS_Y:
+  case AMDGPU::NGROUPS_Y:
      lowerImplicitParameter(MI, *BB, MRI, 1);
      break;
-  case AMDIL::NGROUPS_Z:
+  case AMDGPU::NGROUPS_Z:
      lowerImplicitParameter(MI, *BB, MRI, 2);
      break;
-  case AMDIL::GLOBAL_SIZE_X:
+  case AMDGPU::GLOBAL_SIZE_X:
      lowerImplicitParameter(MI, *BB, MRI, 3);
      break;
-  case AMDIL::GLOBAL_SIZE_Y:
+  case AMDGPU::GLOBAL_SIZE_Y:
      lowerImplicitParameter(MI, *BB, MRI, 4);
      break;
-  case AMDIL::GLOBAL_SIZE_Z:
+  case AMDGPU::GLOBAL_SIZE_Z:
      lowerImplicitParameter(MI, *BB, MRI, 5);
      break;
-  case AMDIL::LOCAL_SIZE_X:
+  case AMDGPU::LOCAL_SIZE_X:
      lowerImplicitParameter(MI, *BB, MRI, 6);
      break;
-  case AMDIL::LOCAL_SIZE_Y:
+  case AMDGPU::LOCAL_SIZE_Y:
      lowerImplicitParameter(MI, *BB, MRI, 7);
      break;
-  case AMDIL::LOCAL_SIZE_Z:
+  case AMDGPU::LOCAL_SIZE_Z:
      lowerImplicitParameter(MI, *BB, MRI, 8);
      break;
  
-  case AMDIL::R600_LOAD_CONST:
+  case AMDGPU::CLAMP_R600:
+    MI->getOperand(0).addTargetFlag(MO_FLAG_CLAMP);
+    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
+           .addOperand(MI->getOperand(0))
+           .addOperand(MI->getOperand(1));
+    break;
+
+  case AMDGPU::FABS_R600:
+    MI->getOperand(1).addTargetFlag(MO_FLAG_ABS);
+    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
+           .addOperand(MI->getOperand(0))
+           .addOperand(MI->getOperand(1));
+    break;
+
+  case AMDGPU::FNEG_R600:
+    MI->getOperand(1).addTargetFlag(MO_FLAG_NEG);
+    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
+            .addOperand(MI->getOperand(0))
+            .addOperand(MI->getOperand(1));
+    break;
+
+  case AMDGPU::R600_LOAD_CONST:
      {
        int64_t RegIndex = MI->getOperand(1).getImm();
-      unsigned ConstantReg = AMDIL::R600_CReg32RegClass.getRegister(RegIndex);
-      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDIL::COPY))
+      unsigned ConstantReg = AMDGPU::R600_CReg32RegClass.getRegister(RegIndex);
+      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY))
                    .addOperand(MI->getOperand(0))
                    .addReg(ConstantReg);
        break;
      }
  
-  case AMDIL::LOAD_INPUT:
+  case AMDGPU::LOAD_INPUT:
      {
        int64_t RegIndex = MI->getOperand(1).getImm();
        addLiveIn(MI, MF, MRI, TII,
-                AMDIL::R600_TReg32RegClass.getRegister(RegIndex));
+                AMDGPU::R600_TReg32RegClass.getRegister(RegIndex));
        break;
      }
-  case AMDIL::STORE_OUTPUT:
+
+  case AMDGPU::MASK_WRITE:
+    {
+      unsigned maskedRegister = MI->getOperand(0).getReg();
+      assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
+      MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
+      MachineOperand * def = defInstr->findRegisterDefOperand(maskedRegister);
+      def->addTargetFlag(MO_FLAG_MASK);
+      // Return early so the instruction is not erased
+      return BB;
+    }
+
+  case AMDGPU::RAT_WRITE_CACHELESS_eg:
+    {
+      // Convert to DWORD address
+      unsigned NewAddr = MRI.createVirtualRegister(
+                                             AMDGPU::R600_TReg32_XRegisterClass);
+      unsigned ShiftValue = MRI.createVirtualRegister(
+                                              AMDGPU::R600_TReg32RegisterClass);
+
+      // XXX In theory, we should be able to pass ShiftValue directly to
+      // the LSHR_eg instruction as an inline literal, but I tried doing it
+      // this way and it didn't produce the correct results.
+      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV), ShiftValue)
+              .addReg(AMDGPU::ALU_LITERAL_X)
+              .addImm(2);
+      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::LSHR_eg), NewAddr)
+              .addOperand(MI->getOperand(1))
+              .addReg(ShiftValue);
+      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
+              .addOperand(MI->getOperand(0))
+              .addReg(NewAddr);
+      break;
+    }
+
+  case AMDGPU::STORE_OUTPUT:
      {
        int64_t OutputIndex = MI->getOperand(1).getImm();
-      unsigned OutputReg = AMDIL::R600_TReg32RegClass.getRegister(OutputIndex);
+      unsigned OutputReg = AMDGPU::R600_TReg32RegClass.getRegister(OutputIndex);
  
-      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDIL::COPY), OutputReg)
+      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY), OutputReg)
                    .addOperand(MI->getOperand(0));
  
        if (!MRI.isLiveOut(OutputReg)) {
@@ -122,30 +185,30 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
        break;
      }
  
-  case AMDIL::RESERVE_REG:
+  case AMDGPU::RESERVE_REG:
      {
        R600MachineFunctionInfo * MFI = MF->getInfo<R600MachineFunctionInfo>();
        int64_t ReservedIndex = MI->getOperand(0).getImm();
        unsigned ReservedReg =
-                          AMDIL::R600_TReg32RegClass.getRegister(ReservedIndex);
+                          AMDGPU::R600_TReg32RegClass.getRegister(ReservedIndex);
        MFI->ReservedRegs.push_back(ReservedReg);
        break;
      }
  
-  case AMDIL::TXD:
+  case AMDGPU::TXD:
      {
-      unsigned t0 = MRI.createVirtualRegister(AMDIL::R600_Reg128RegisterClass);
-      unsigned t1 = MRI.createVirtualRegister(AMDIL::R600_Reg128RegisterClass);
+      unsigned t0 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
+      unsigned t1 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
  
-      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDIL::TEX_SET_GRADIENTS_H), t0)
+      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0)
                .addOperand(MI->getOperand(3))
                .addOperand(MI->getOperand(4))
                .addOperand(MI->getOperand(5));
-      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDIL::TEX_SET_GRADIENTS_V), t1)
+      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1)
                .addOperand(MI->getOperand(2))
                .addOperand(MI->getOperand(4))
                .addOperand(MI->getOperand(5));
-      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDIL::TEX_SAMPLE_G))
+      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
                .addOperand(MI->getOperand(0))
                .addOperand(MI->getOperand(1))
                .addOperand(MI->getOperand(4))
@@ -154,20 +217,20 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
                .addReg(t1, RegState::Implicit);
        break;
      }
-  case AMDIL::TXD_SHADOW:
+  case AMDGPU::TXD_SHADOW:
      {
-      unsigned t0 = MRI.createVirtualRegister(AMDIL::R600_Reg128RegisterClass);
-      unsigned t1 = MRI.createVirtualRegister(AMDIL::R600_Reg128RegisterClass);
+      unsigned t0 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
+      unsigned t1 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
  
-      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDIL::TEX_SET_GRADIENTS_H), t0)
+      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0)
                .addOperand(MI->getOperand(3))
                .addOperand(MI->getOperand(4))
                .addOperand(MI->getOperand(5));
-      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDIL::TEX_SET_GRADIENTS_V), t1)
+      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1)
                .addOperand(MI->getOperand(2))
                .addOperand(MI->getOperand(4))
                .addOperand(MI->getOperand(5));
-      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDIL::TEX_SAMPLE_C_G))
+      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
                .addOperand(MI->getOperand(0))
                .addOperand(MI->getOperand(1))
                .addOperand(MI->getOperand(4))
@@ -188,15 +251,174 @@ void R600TargetLowering::lowerImplicitParameter(MachineInstr *MI, MachineBasicBl
      MachineRegisterInfo & MRI, unsigned dword_offset) const
  {
    MachineBasicBlock::iterator I = *MI;
-  unsigned offsetReg = MRI.createVirtualRegister(&AMDIL::R600_TReg32_XRegClass);
-  MRI.setRegClass(MI->getOperand(0).getReg(), &AMDIL::R600_TReg32_XRegClass);
+  unsigned PtrReg = MRI.createVirtualRegister(&AMDGPU::R600_TReg32_XRegClass);
+  MRI.setRegClass(MI->getOperand(0).getReg(), &AMDGPU::R600_TReg32_XRegClass);
  
-  BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDIL::MOV), offsetReg)
-          .addReg(AMDIL::ALU_LITERAL_X)
+  BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::MOV), PtrReg)
+          .addReg(AMDGPU::ALU_LITERAL_X)
            .addImm(dword_offset * 4);
  
-  BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDIL::VTX_READ_eg))
+  BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::VTX_READ_PARAM_i32_eg))
            .addOperand(MI->getOperand(0))
-          .addReg(offsetReg)
+          .addReg(PtrReg)
            .addImm(0);
  }
+
+//===----------------------------------------------------------------------===//
+// Custom DAG Lowering Operations
+//===----------------------------------------------------------------------===//
+
+
+SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
+{
+  switch (Op.getOpcode()) {
+  default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
+  case ISD::ROTL: return LowerROTL(Op, DAG);
+  case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+  case ISD::SETCC: return LowerSETCC(Op, DAG);
+  }
+}
+
+SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const
+{
+  DebugLoc DL = Op.getDebugLoc();
+  EVT VT = Op.getValueType();
+
+  return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT,
+                     Op.getOperand(0),
+                     Op.getOperand(0),
+                     DAG.getNode(ISD::SUB, DL, VT,
+                                 DAG.getConstant(32, MVT::i32),
+                                 Op.getOperand(1)));
+}
+
+SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
+{
+  DebugLoc DL = Op.getDebugLoc();
+  EVT VT = Op.getValueType();
+
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+  SDValue True = Op.getOperand(2);
+  SDValue False = Op.getOperand(3);
+  SDValue CC = Op.getOperand(4);
+  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
+  SDValue Temp;
+
+  // LHS and RHS are guaranteed to be the same value type
+  EVT CompareVT = LHS.getValueType();
+
+  // We need all the operands of SELECT_CC to have the same value type, so if
+  // necessary we need to convert LHS and RHS to be the same type True and
+  // False.  True and False are guaranteed to have the same type as this
+  // SELECT_CC node.
+
+  if (CompareVT !=  VT) {
+    ISD::NodeType ConversionOp = ISD::DELETED_NODE;
+    if (VT == MVT::f32 && CompareVT == MVT::i32) {
+      if (isUnsignedIntSetCC(CCOpcode)) {
+        ConversionOp = ISD::UINT_TO_FP;
+      } else {
+        ConversionOp = ISD::SINT_TO_FP;
+      }
+    } else if (VT == MVT::i32 && CompareVT == MVT::f32) {
+      ConversionOp = ISD::FP_TO_SINT;
+    } else {
+      // I don't think there will be any other type pairings.
+      assert(!"Unhandled operand type parings in SELECT_CC");
+    }
+    // XXX Check the value of LHS and RHS and avoid creating sequences like
+    // (FTOI (ITOF))
+    LHS = DAG.getNode(ConversionOp, DL, VT, LHS);
+    RHS = DAG.getNode(ConversionOp, DL, VT, RHS);
+  }
+
+  // If True is a hardware TRUE value and False is a hardware FALSE value or
+  // vice-versa we can handle this with a native instruction (SET* instructions).
+  if ((isHWTrueValue(True) && isHWFalseValue(False))) {
+    return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
+  }
+
+  // XXX If True is a hardware TRUE value and False is a hardware FALSE value,
+  // we can handle this with a native instruction, but we need to swap true
+  // and false and change the conditional.
+  if (isHWTrueValue(False) && isHWFalseValue(True)) {
+  }
+
+  // XXX Check if we can lower this to a SELECT or if it is supported by a native
+  // operation. (The code below does this but we don't have the Instruction
+  // selection patterns to do this yet.
+#if 0
+  if (isZero(LHS) || isZero(RHS)) {
+    SDValue Cond = (isZero(LHS) ? RHS : LHS);
+    bool SwapTF = false;
+    switch (CCOpcode) {
+    case ISD::SETOEQ:
+    case ISD::SETUEQ:
+    case ISD::SETEQ:
+      SwapTF = true;
+      // Fall through
+    case ISD::SETONE:
+    case ISD::SETUNE:
+    case ISD::SETNE:
+      // We can lower to select
+      if (SwapTF) {
+        Temp = True;
+        True = False;
+        False = Temp;
+      }
+      // CNDE
+      return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
+    default:
+      // Supported by a native operation (CNDGE, CNDGT)
+      return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
+    }
+  }
+#endif
+
+  // If we make it this for it means we have no native instructions to handle
+  // this SELECT_CC, so we must lower it.
+  SDValue HWTrue, HWFalse;
+
+  if (VT == MVT::f32) {
+    HWTrue = DAG.getConstantFP(1.0f, VT);
+    HWFalse = DAG.getConstantFP(0.0f, VT);
+  } else if (VT == MVT::i32) {
+    HWTrue = DAG.getConstant(-1, VT);
+    HWFalse = DAG.getConstant(0, VT);
+  }
+  else {
+    assert(!"Unhandled value type in LowerSELECT_CC");
+  }
+
+  // Lower this unsupported SELECT_CC into a combination of two supported
+  // SELECT_CC operations.
+  SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, HWTrue, HWFalse, CC);
+
+  return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
+}
+
+SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const
+{
+  SDValue Cond;
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+  SDValue CC  = Op.getOperand(2);
+  DebugLoc DL = Op.getDebugLoc();
+  assert(Op.getValueType() == MVT::i32);
+  Cond = DAG.getNode(
+      ISD::SELECT_CC,
+      Op.getDebugLoc(),
+      MVT::i32,
+      LHS, RHS,
+      DAG.getConstant(-1, MVT::i32),
+      DAG.getConstant(0, MVT::i32),
+      CC);
+  Cond = DAG.getNode(
+      ISD::AND,
+      DL,
+      MVT::i32,
+      DAG.getConstant(1, MVT::i32),
+      Cond);
+  return Cond;
+}