src/gallium/drivers/radeon/R600ISelLowering.cpp

   1 //===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // Most of the DAG lowering is handled in AMDGPUISelLowering.cpp.  This file
  11 // is mostly EmitInstrWithCustomInserter().
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #include "R600ISelLowering.h"
  16 #include "R600Defines.h"
  17 #include "R600InstrInfo.h"
  18 #include "R600MachineFunctionInfo.h"
  19 #include "llvm/Argument.h"
  20 #include "llvm/CodeGen/MachineInstrBuilder.h"
  21 #include "llvm/CodeGen/MachineRegisterInfo.h"
  22 #include "llvm/CodeGen/SelectionDAG.h"
  23
  24 using namespace llvm;
  25
  26 R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
  27     AMDGPUTargetLowering(TM),
  28     TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo()))
  29 {
  30   setOperationAction(ISD::MUL, MVT::i64, Expand);
  31   addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
  32   addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
  33   addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
  34   addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
  35   computeRegisterProperties();
  36
  37   setOperationAction(ISD::FADD, MVT::v4f32, Expand);
  38   setOperationAction(ISD::FMUL, MVT::v4f32, Expand);
  39
  40   setOperationAction(ISD::BR_CC, MVT::i32, Custom);
  41   setOperationAction(ISD::BR_CC, MVT::f32, Custom);
  42
  43   setOperationAction(ISD::FSUB, MVT::f32, Expand);
  44
  45   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
  46   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
  47   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
  48
  49   setOperationAction(ISD::ROTL, MVT::i32, Custom);
  50
  51   setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
  52   setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
  53
  54   setOperationAction(ISD::SETCC, MVT::i32, Custom);
  55   setOperationAction(ISD::SETCC, MVT::f32, Custom);
  56   setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
  57
  58   setTargetDAGCombine(ISD::FP_ROUND);
  59
  60   setSchedulingPreference(Sched::VLIW);
  61 }
  62
  63 MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
  64     MachineInstr * MI, MachineBasicBlock * BB) const
  65 {
  66   MachineFunction * MF = BB->getParent();
  67   MachineRegisterInfo &MRI = MF->getRegInfo();
  68   MachineBasicBlock::iterator I = *MI;
  69
  70   switch (MI->getOpcode()) {
  71   default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
  72   case AMDGPU::SHADER_TYPE: break;
  73   case AMDGPU::CLAMP_R600:
  74     {
  75       MachineInstr *NewMI =
  76         BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
  77                .addOperand(MI->getOperand(0))
  78                .addOperand(MI->getOperand(1))
  79                .addImm(0) // Flags
  80                .addReg(AMDGPU::PRED_SEL_OFF);
  81       TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
  82       break;
  83     }
  84   case AMDGPU::FABS_R600:
  85     {
  86       MachineInstr *NewMI =
  87         BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
  88                .addOperand(MI->getOperand(0))
  89                .addOperand(MI->getOperand(1))
  90                .addImm(0) // Flags
  91                .addReg(AMDGPU::PRED_SEL_OFF);
  92       TII->addFlag(NewMI, 1, MO_FLAG_ABS);
  93       break;
  94     }
  95
  96   case AMDGPU::FNEG_R600:
  97     {
  98       MachineInstr *NewMI =
  99         BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
 100                 .addOperand(MI->getOperand(0))
 101                 .addOperand(MI->getOperand(1))
 102                 .addImm(0) // Flags
 103                 .addReg(AMDGPU::PRED_SEL_OFF);
 104       TII->addFlag(NewMI, 1, MO_FLAG_NEG);
 105     break;
 106     }
 107
 108   case AMDGPU::R600_LOAD_CONST:
 109     {
 110       int64_t RegIndex = MI->getOperand(1).getImm();
 111       unsigned ConstantReg = AMDGPU::R600_CReg32RegClass.getRegister(RegIndex);
 112       BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY))
 113                   .addOperand(MI->getOperand(0))
 114                   .addReg(ConstantReg);
 115       break;
 116     }
 117
 118   case AMDGPU::MASK_WRITE:
 119     {
 120       unsigned maskedRegister = MI->getOperand(0).getReg();
 121       assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
 122       MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
 123       TII->addFlag(defInstr, 0, MO_FLAG_MASK);
 124       // Return early so the instruction is not erased
 125       return BB;
 126     }
 127
 128   case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
 129   case AMDGPU::RAT_WRITE_CACHELESS_128_eg:
 130     {
 131       // Convert to DWORD address
 132       unsigned NewAddr = MRI.createVirtualRegister(
 133                                              &AMDGPU::R600_TReg32_XRegClass);
 134       unsigned ShiftValue = MRI.createVirtualRegister(
 135                                               &AMDGPU::R600_TReg32RegClass);
 136       unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
 137
 138       // XXX In theory, we should be able to pass ShiftValue directly to
 139       // the LSHR_eg instruction as an inline literal, but I tried doing it
 140       // this way and it didn't produce the correct results.
 141       BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV_IMM_I32),
 142               ShiftValue)
 143               .addReg(AMDGPU::ALU_LITERAL_X)
 144               .addReg(AMDGPU::PRED_SEL_OFF)
 145               .addImm(2);
 146       BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::LSHR_eg), NewAddr)
 147               .addOperand(MI->getOperand(1))
 148               .addReg(ShiftValue)
 149               .addReg(AMDGPU::PRED_SEL_OFF);
 150       BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
 151               .addOperand(MI->getOperand(0))
 152               .addReg(NewAddr)
 153               .addImm(EOP); // Set End of program bit
 154       break;
 155     }
 156
 157   case AMDGPU::RESERVE_REG:
 158     {
 159       R600MachineFunctionInfo * MFI = MF->getInfo<R600MachineFunctionInfo>();
 160       int64_t ReservedIndex = MI->getOperand(0).getImm();
 161       unsigned ReservedReg =
 162                           AMDGPU::R600_TReg32RegClass.getRegister(ReservedIndex);
 163       MFI->ReservedRegs.push_back(ReservedReg);
 164       break;
 165     }
 166
 167   case AMDGPU::TXD:
 168     {
 169       unsigned t0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
 170       unsigned t1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
 171
 172       BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0)
 173               .addOperand(MI->getOperand(3))
 174               .addOperand(MI->getOperand(4))
 175               .addOperand(MI->getOperand(5));
 176       BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1)
 177               .addOperand(MI->getOperand(2))
 178               .addOperand(MI->getOperand(4))
 179               .addOperand(MI->getOperand(5));
 180       BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
 181               .addOperand(MI->getOperand(0))
 182               .addOperand(MI->getOperand(1))
 183               .addOperand(MI->getOperand(4))
 184               .addOperand(MI->getOperand(5))
 185               .addReg(t0, RegState::Implicit)
 186               .addReg(t1, RegState::Implicit);
 187       break;
 188     }
 189   case AMDGPU::TXD_SHADOW:
 190     {
 191       unsigned t0 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
 192       unsigned t1 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
 193
 194       BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0)
 195               .addOperand(MI->getOperand(3))
 196               .addOperand(MI->getOperand(4))
 197               .addOperand(MI->getOperand(5));
 198       BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1)
 199               .addOperand(MI->getOperand(2))
 200               .addOperand(MI->getOperand(4))
 201               .addOperand(MI->getOperand(5));
 202       BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
 203               .addOperand(MI->getOperand(0))
 204               .addOperand(MI->getOperand(1))
 205               .addOperand(MI->getOperand(4))
 206               .addOperand(MI->getOperand(5))
 207               .addReg(t0, RegState::Implicit)
 208               .addReg(t1, RegState::Implicit);
 209       break;
 210     }
 211   case AMDGPU::BRANCH:
 212       BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
 213               .addOperand(MI->getOperand(0))
 214               .addReg(0);
 215       break;
 216   case AMDGPU::BRANCH_COND_f32:
 217     {
 218       MachineInstr *NewMI =
 219         BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X))
 220                 .addReg(AMDGPU::PREDICATE_BIT)
 221                 .addOperand(MI->getOperand(1))
 222                 .addImm(OPCODE_IS_NOT_ZERO)
 223                 .addImm(0); // Flags
 224       TII->addFlag(NewMI, 1, MO_FLAG_PUSH);
 225       BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
 226               .addOperand(MI->getOperand(0))
 227               .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
 228       break;
 229     }
 230   case AMDGPU::BRANCH_COND_i32:
 231     {
 232       MachineInstr *NewMI =
 233         BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X))
 234               .addReg(AMDGPU::PREDICATE_BIT)
 235               .addOperand(MI->getOperand(1))
 236               .addImm(OPCODE_IS_NOT_ZERO_INT)
 237               .addImm(0); // Flags
 238       TII->addFlag(NewMI, 1, MO_FLAG_PUSH);
 239       BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
 240              .addOperand(MI->getOperand(0))
 241               .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
 242       break;
 243     }
 244   case AMDGPU::input_perspective:
 245     {
 246       R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
 247
 248       // XXX Be more fine about register reservation
 249       for (unsigned i = 0; i < 4; i ++) {
 250         unsigned ReservedReg = AMDGPU::R600_TReg32RegClass.getRegister(i);
 251         MFI->ReservedRegs.push_back(ReservedReg);
 252       }
 253
 254       switch (MI->getOperand(1).getImm()) {
 255       case 0:// Perspective
 256         MFI->HasPerspectiveInterpolation = true;
 257         break;
 258       case 1:// Linear
 259         MFI->HasLinearInterpolation = true;
 260         break;
 261       default:
 262         assert(0 && "Unknow ij index");
 263       }
 264
 265       return BB;
 266     }
 267   }
 268
 269   MI->eraseFromParent();
 270   return BB;
 271 }
 272
 273 //===----------------------------------------------------------------------===//
 274 // Custom DAG Lowering Operations
 275 //===----------------------------------------------------------------------===//
 276
 277 using namespace llvm::Intrinsic;
 278 using namespace llvm::AMDGPUIntrinsic;
 279
 280 SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
 281 {
 282   switch (Op.getOpcode()) {
 283   default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
 284   case ISD::BR_CC: return LowerBR_CC(Op, DAG);
 285   case ISD::ROTL: return LowerROTL(Op, DAG);
 286   case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
 287   case ISD::SETCC: return LowerSETCC(Op, DAG);
 288   case ISD::INTRINSIC_VOID: {
 289     SDValue Chain = Op.getOperand(0);
 290     unsigned IntrinsicID =
 291                          cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
 292     switch (IntrinsicID) {
 293     case AMDGPUIntrinsic::AMDGPU_store_output: {
 294       MachineFunction &MF = DAG.getMachineFunction();
 295       MachineRegisterInfo &MRI = MF.getRegInfo();
 296       int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
 297       unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
 298       if (!MRI.isLiveOut(Reg)) {
 299         MRI.addLiveOut(Reg);
 300       }
 301       return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2));
 302     }
 303     // default for switch(IntrinsicID)
 304     default: break;
 305     }
 306     // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
 307     break;
 308   }
 309   case ISD::INTRINSIC_WO_CHAIN: {
 310     unsigned IntrinsicID =
 311                          cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
 312     EVT VT = Op.getValueType();
 313     DebugLoc DL = Op.getDebugLoc();
 314     switch(IntrinsicID) {
 315     default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
 316     case AMDGPUIntrinsic::R600_load_input: {
 317       int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
 318       unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
 319       return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT);
 320     }
 321     case AMDGPUIntrinsic::R600_load_input_perspective: {
 322       unsigned slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
 323       SDValue FullVector = DAG.getNode(
 324           AMDGPUISD::INTERP,
 325           DL, MVT::v4f32,
 326           DAG.getConstant(0, MVT::i32), DAG.getConstant(slot / 4 , MVT::i32));
 327       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
 328         DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32));
 329     }
 330     case AMDGPUIntrinsic::R600_load_input_linear: {
 331       unsigned slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
 332       SDValue FullVector = DAG.getNode(
 333         AMDGPUISD::INTERP,
 334         DL, MVT::v4f32,
 335         DAG.getConstant(1, MVT::i32), DAG.getConstant(slot / 4 , MVT::i32));
 336       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
 337         DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32));
 338     }
 339     case AMDGPUIntrinsic::R600_load_input_constant: {
 340       unsigned slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
 341       SDValue FullVector = DAG.getNode(
 342         AMDGPUISD::INTERP_P0,
 343         DL, MVT::v4f32,
 344         DAG.getConstant(slot / 4 , MVT::i32));
 345       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
 346           DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32));
 347     }
 348     case AMDGPUIntrinsic::R600_load_input_position: {
 349       unsigned slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
 350       unsigned RegIndex = AMDGPU::R600_TReg32RegClass.getRegister(slot);
 351       SDValue Reg = CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
 352             RegIndex, MVT::f32);
 353       if ((slot % 4) == 3) {
 354         return DAG.getNode(ISD::FDIV,
 355             DL, VT,
 356             DAG.getConstantFP(1.0f, MVT::f32),
 357             Reg);
 358       } else {
 359         return Reg;
 360       }
 361     }
 362
 363     case r600_read_ngroups_x:
 364       return LowerImplicitParameter(DAG, VT, DL, 0);
 365     case r600_read_ngroups_y:
 366       return LowerImplicitParameter(DAG, VT, DL, 1);
 367     case r600_read_ngroups_z:
 368       return LowerImplicitParameter(DAG, VT, DL, 2);
 369     case r600_read_global_size_x:
 370       return LowerImplicitParameter(DAG, VT, DL, 3);
 371     case r600_read_global_size_y:
 372       return LowerImplicitParameter(DAG, VT, DL, 4);
 373     case r600_read_global_size_z:
 374       return LowerImplicitParameter(DAG, VT, DL, 5);
 375     case r600_read_local_size_x:
 376       return LowerImplicitParameter(DAG, VT, DL, 6);
 377     case r600_read_local_size_y:
 378       return LowerImplicitParameter(DAG, VT, DL, 7);
 379     case r600_read_local_size_z:
 380       return LowerImplicitParameter(DAG, VT, DL, 8);
 381
 382     case r600_read_tgid_x:
 383       return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
 384                                   AMDGPU::T1_X, VT);
 385     case r600_read_tgid_y:
 386       return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
 387                                   AMDGPU::T1_Y, VT);
 388     case r600_read_tgid_z:
 389       return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
 390                                   AMDGPU::T1_Z, VT);
 391     case r600_read_tidig_x:
 392       return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
 393                                   AMDGPU::T0_X, VT);
 394     case r600_read_tidig_y:
 395       return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
 396                                   AMDGPU::T0_Y, VT);
 397     case r600_read_tidig_z:
 398       return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
 399                                   AMDGPU::T0_Z, VT);
 400     }
 401     // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
 402     break;
 403   }
 404   } // end switch(Op.getOpcode())
 405   return SDValue();
 406 }
 407
 408 void R600TargetLowering::ReplaceNodeResults(SDNode *N,
 409                                             SmallVectorImpl<SDValue> &Results,
 410                                             SelectionDAG &DAG) const
 411 {
 412   switch (N->getOpcode()) {
 413   default: return;
 414   case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
 415   case ISD::INTRINSIC_WO_CHAIN:
 416     {
 417       unsigned IntrinsicID =
 418           cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
 419       if (IntrinsicID == AMDGPUIntrinsic::R600_load_input_face) {
 420         Results.push_back(LowerInputFace(N, DAG));
 421       } else {
 422         return;
 423       }
 424     }
 425   }
 426 }
 427
 428 SDValue R600TargetLowering::LowerInputFace(SDNode* Op, SelectionDAG &DAG) const
 429 {
 430   unsigned slot = cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue();
 431   unsigned RegIndex = AMDGPU::R600_TReg32RegClass.getRegister(slot);
 432   SDValue Reg = CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
 433       RegIndex, MVT::f32);
 434   return DAG.getNode(ISD::SETCC, Op->getDebugLoc(), MVT::i1,
 435       Reg, DAG.getConstantFP(0.0f, MVT::f32),
 436       DAG.getCondCode(ISD::SETUGT));
 437 }
 438
 439 SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const
 440 {
 441   return DAG.getNode(
 442       ISD::SETCC,
 443       Op.getDebugLoc(),
 444       MVT::i1,
 445       Op, DAG.getConstantFP(0.0f, MVT::f32),
 446       DAG.getCondCode(ISD::SETNE)
 447       );
 448 }
 449
 450 SDValue R600TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const
 451 {
 452   SDValue Chain = Op.getOperand(0);
 453   SDValue CC = Op.getOperand(1);
 454   SDValue LHS   = Op.getOperand(2);
 455   SDValue RHS   = Op.getOperand(3);
 456   SDValue JumpT  = Op.getOperand(4);
 457   SDValue CmpValue;
 458   SDValue Result;
 459
 460   if (LHS.getValueType() == MVT::i32) {
 461     CmpValue = DAG.getNode(
 462         ISD::SELECT_CC,
 463         Op.getDebugLoc(),
 464         MVT::i32,
 465         LHS, RHS,
 466         DAG.getConstant(-1, MVT::i32),
 467         DAG.getConstant(0, MVT::i32),
 468         CC);
 469   } else if (LHS.getValueType() == MVT::f32) {
 470     CmpValue = DAG.getNode(
 471         ISD::SELECT_CC,
 472         Op.getDebugLoc(),
 473         MVT::f32,
 474         LHS, RHS,
 475         DAG.getConstantFP(1.0f, MVT::f32),
 476         DAG.getConstantFP(0.0f, MVT::f32),
 477         CC);
 478   } else {
 479     assert(0 && "Not valid type for br_cc");
 480   }
 481   Result = DAG.getNode(
 482       AMDGPUISD::BRANCH_COND,
 483       CmpValue.getDebugLoc(),
 484       MVT::Other, Chain,
 485       JumpT, CmpValue);
 486   return Result;
 487 }
 488
 489 SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
 490                                                    DebugLoc DL,
 491                                                    unsigned DwordOffset) const
 492 {
 493   unsigned ByteOffset = DwordOffset * 4;
 494   PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
 495                                       AMDGPUAS::PARAM_I_ADDRESS);
 496
 497   // We shouldn't be using an offset wider than 16-bits for implicit parameters.
 498   assert(isInt<16>(ByteOffset));
 499
 500   return DAG.getLoad(VT, DL, DAG.getEntryNode(),
 501                      DAG.getConstant(ByteOffset, MVT::i32), // PTR
 502                      MachinePointerInfo(ConstantPointerNull::get(PtrType)),
 503                      false, false, false, 0);
 504 }
 505
 506 SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const
 507 {
 508   DebugLoc DL = Op.getDebugLoc();
 509   EVT VT = Op.getValueType();
 510
 511   return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT,
 512                      Op.getOperand(0),
 513                      Op.getOperand(0),
 514                      DAG.getNode(ISD::SUB, DL, VT,
 515                                  DAG.getConstant(32, MVT::i32),
 516                                  Op.getOperand(1)));
 517 }
 518
 519 bool R600TargetLowering::isZero(SDValue Op) const
 520 {
 521   if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
 522     return Cst->isNullValue();
 523   } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
 524     return CstFP->isZero();
 525   } else {
 526     return false;
 527   }
 528 }
 529
 530 SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
 531 {
 532   DebugLoc DL = Op.getDebugLoc();
 533   EVT VT = Op.getValueType();
 534
 535   SDValue LHS = Op.getOperand(0);
 536   SDValue RHS = Op.getOperand(1);
 537   SDValue True = Op.getOperand(2);
 538   SDValue False = Op.getOperand(3);
 539   SDValue CC = Op.getOperand(4);
 540   SDValue Temp;
 541
 542   // LHS and RHS are guaranteed to be the same value type
 543   EVT CompareVT = LHS.getValueType();
 544
 545   // We need all the operands of SELECT_CC to have the same value type, so if
 546   // necessary we need to convert LHS and RHS to be the same type True and
 547   // False.  True and False are guaranteed to have the same type as this
 548   // SELECT_CC node.
 549
 550   if (isHWTrueValue(True) && isHWFalseValue(False)) {
 551     if (CompareVT !=  VT) {
 552       if (VT == MVT::f32 && CompareVT == MVT::i32) {
 553         SDValue Boolean = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
 554             LHS, RHS,
 555             DAG.getConstant(-1, MVT::i32),
 556             DAG.getConstant(0, MVT::i32),
 557             CC);
 558         return DAG.getNode(ISD::UINT_TO_FP, DL, VT, Boolean);
 559       } else if (VT == MVT::i32 && CompareVT == MVT::f32) {
 560         SDValue BoolAsFlt = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
 561             LHS, RHS,
 562             DAG.getConstantFP(1.0f, MVT::f32),
 563             DAG.getConstantFP(0.0f, MVT::f32),
 564             CC);
 565         return DAG.getNode(ISD::FP_TO_UINT, DL, VT, BoolAsFlt);
 566       } else {
 567         // I don't think there will be any other type pairings.
 568         assert(!"Unhandled operand type parings in SELECT_CC");
 569       }
 570     } else {
 571       return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
 572     }
 573   }
 574
 575
 576   // XXX If True is a hardware TRUE value and False is a hardware FALSE value,
 577   // we can handle this with a native instruction, but we need to swap true
 578   // and false and change the conditional.
 579   if (isHWTrueValue(False) && isHWFalseValue(True)) {
 580   }
 581
 582   // Check if we can lower this to a native operation.
 583   // CND* instructions requires all operands to have the same type,
 584   // and RHS to be zero.
 585
 586   if (isZero(LHS) || isZero(RHS)) {
 587     SDValue Cond = (isZero(LHS) ? RHS : LHS);
 588     SDValue Zero = (isZero(LHS) ? LHS : RHS);
 589     ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
 590     if (CompareVT != VT) {
 591       True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
 592       False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
 593     }
 594     if (isZero(LHS)) {
 595       CCOpcode = ISD::getSetCCSwappedOperands(CCOpcode);
 596     }
 597
 598     switch (CCOpcode) {
 599     case ISD::SETONE:
 600     case ISD::SETUNE:
 601     case ISD::SETNE:
 602     case ISD::SETULE:
 603     case ISD::SETULT:
 604     case ISD::SETOLE:
 605     case ISD::SETOLT:
 606     case ISD::SETLE:
 607     case ISD::SETLT:
 608       CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
 609       Temp = True;
 610       True = False;
 611       False = Temp;
 612       break;
 613     default:
 614       break;
 615     }
 616     SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
 617         Cond, Zero,
 618         True, False,
 619         DAG.getCondCode(CCOpcode));
 620     return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
 621   }
 622
 623
 624   // If we make it this for it means we have no native instructions to handle
 625   // this SELECT_CC, so we must lower it.
 626   SDValue HWTrue, HWFalse;
 627
 628   if (CompareVT == MVT::f32) {
 629     HWTrue = DAG.getConstantFP(1.0f, CompareVT);
 630     HWFalse = DAG.getConstantFP(0.0f, CompareVT);
 631   } else if (CompareVT == MVT::i32) {
 632     HWTrue = DAG.getConstant(-1, CompareVT);
 633     HWFalse = DAG.getConstant(0, CompareVT);
 634   }
 635   else {
 636     assert(!"Unhandled value type in LowerSELECT_CC");
 637   }
 638
 639   // Lower this unsupported SELECT_CC into a combination of two supported
 640   // SELECT_CC operations.
 641   SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
 642
 643   return DAG.getNode(ISD::SELECT_CC, DL, VT,
 644       Cond, HWFalse,
 645       True, False,
 646       DAG.getCondCode(ISD::SETNE));
 647 }
 648
 649 SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const
 650 {
 651   SDValue Cond;
 652   SDValue LHS = Op.getOperand(0);
 653   SDValue RHS = Op.getOperand(1);
 654   SDValue CC  = Op.getOperand(2);
 655   DebugLoc DL = Op.getDebugLoc();
 656   assert(Op.getValueType() == MVT::i32);
 657   if (LHS.getValueType() == MVT::i32) {
 658     Cond = DAG.getNode(
 659         ISD::SELECT_CC,
 660         Op.getDebugLoc(),
 661         MVT::i32,
 662         LHS, RHS,
 663         DAG.getConstant(-1, MVT::i32),
 664         DAG.getConstant(0, MVT::i32),
 665         CC);
 666   } else if (LHS.getValueType() == MVT::f32) {
 667     Cond = DAG.getNode(
 668         ISD::SELECT_CC,
 669         Op.getDebugLoc(),
 670         MVT::f32,
 671         LHS, RHS,
 672         DAG.getConstantFP(1.0f, MVT::f32),
 673         DAG.getConstantFP(0.0f, MVT::f32),
 674         CC);
 675     Cond = DAG.getNode(
 676         ISD::FP_TO_SINT,
 677         DL,
 678         MVT::i32,
 679         Cond);
 680   } else {
 681     assert(0 && "Not valid type for set_cc");
 682   }
 683   Cond = DAG.getNode(
 684       ISD::AND,
 685       DL,
 686       MVT::i32,
 687       DAG.getConstant(1, MVT::i32),
 688       Cond);
 689   return Cond;
 690 }
 691
 692 // XXX Only kernel functions are supporte, so we can assume for now that
 693 // every function is a kernel function, but in the future we should use
 694 // separate calling conventions for kernel and non-kernel functions.
 695 // Only kernel functions are supported, so we can assume for now
 696 SDValue R600TargetLowering::LowerFormalArguments(
 697                                       SDValue Chain,
 698                                       CallingConv::ID CallConv,
 699                                       bool isVarArg,
 700                                       const SmallVectorImpl<ISD::InputArg> &Ins,
 701                                       DebugLoc DL, SelectionDAG &DAG,
 702                                       SmallVectorImpl<SDValue> &InVals) const
 703 {
 704   unsigned ParamOffsetBytes = 36;
 705   for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
 706     EVT VT = Ins[i].VT;
 707     PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
 708                                                     AMDGPUAS::PARAM_I_ADDRESS);
 709     SDValue Arg = DAG.getLoad(VT, DL, DAG.getRoot(),
 710                                 DAG.getConstant(ParamOffsetBytes, MVT::i32),
 711                                 MachinePointerInfo(new Argument(PtrTy)),
 712                                 false, false, false, 4);
 713     InVals.push_back(Arg);
 714     ParamOffsetBytes += (VT.getStoreSize());
 715   }
 716   return Chain;
 717 }
 718
 719 //===----------------------------------------------------------------------===//
 720 // Custom DAG Optimizations
 721 //===----------------------------------------------------------------------===//
 722
 723 SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
 724                                               DAGCombinerInfo &DCI) const
 725 {
 726   SelectionDAG &DAG = DCI.DAG;
 727
 728   switch (N->getOpcode()) {
 729   // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
 730   case ISD::FP_ROUND: {
 731       SDValue Arg = N->getOperand(0);
 732       if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
 733         return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), N->getValueType(0),
 734                            Arg.getOperand(0));
 735       }
 736       break;
 737     }
 738   }
 739   return SDValue();
 740 }