src/gallium/drivers/radeon/AMDILISelLowering.cpp

   1 //===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //==-----------------------------------------------------------------------===//
   9 //
  10 // This file contains TargetLowering functions borrowed from AMDLI.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "AMDGPUISelLowering.h"
  15 #include "AMDGPURegisterInfo.h"
  16 #include "AMDILDevices.h"
  17 #include "AMDILIntrinsicInfo.h"
  18 #include "AMDILSubtarget.h"
  19 #include "AMDILUtilityFunctions.h"
  20 #include "llvm/CallingConv.h"
  21 #include "llvm/CodeGen/MachineFrameInfo.h"
  22 #include "llvm/CodeGen/MachineRegisterInfo.h"
  23 #include "llvm/CodeGen/PseudoSourceValue.h"
  24 #include "llvm/CodeGen/SelectionDAG.h"
  25 #include "llvm/CodeGen/SelectionDAGNodes.h"
  26 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
  27 #include "llvm/DerivedTypes.h"
  28 #include "llvm/Instructions.h"
  29 #include "llvm/Intrinsics.h"
  30 #include "llvm/Support/raw_ostream.h"
  31 #include "llvm/Target/TargetInstrInfo.h"
  32 #include "llvm/Target/TargetOptions.h"
  33
  34 using namespace llvm;
  35 //===----------------------------------------------------------------------===//
  36 // Calling Convention Implementation
  37 //===----------------------------------------------------------------------===//
  38 #include "AMDGPUGenCallingConv.inc"
  39
  40 //===----------------------------------------------------------------------===//
  41 // TargetLowering Implementation Help Functions Begin
  42 //===----------------------------------------------------------------------===//
  43 namespace llvm {
  44 namespace AMDGPU {
  45   static SDValue
  46 getConversionNode(SelectionDAG &DAG, SDValue& Src, SDValue& Dst, bool asType)
  47 {
  48   DebugLoc DL = Src.getDebugLoc();
  49   EVT svt = Src.getValueType().getScalarType();
  50   EVT dvt = Dst.getValueType().getScalarType();
  51   if (svt.isFloatingPoint() && dvt.isFloatingPoint()) {
  52     if (dvt.bitsGT(svt)) {
  53       Src = DAG.getNode(ISD::FP_EXTEND, DL, dvt, Src);
  54     } else if (svt.bitsLT(svt)) {
  55       Src = DAG.getNode(ISD::FP_ROUND, DL, dvt, Src,
  56           DAG.getConstant(1, MVT::i32));
  57     }
  58   } else if (svt.isInteger() && dvt.isInteger()) {
  59     if (!svt.bitsEq(dvt)) {
  60       Src = DAG.getSExtOrTrunc(Src, DL, dvt);
  61     }
  62   } else if (svt.isInteger()) {
  63     unsigned opcode = (asType) ? ISD::BITCAST : ISD::SINT_TO_FP;
  64     if (!svt.bitsEq(dvt)) {
  65       if (dvt.getSimpleVT().SimpleTy == MVT::f32) {
  66         Src = DAG.getSExtOrTrunc(Src, DL, MVT::i32);
  67       } else if (dvt.getSimpleVT().SimpleTy == MVT::f64) {
  68         Src = DAG.getSExtOrTrunc(Src, DL, MVT::i64);
  69       } else {
  70         assert(0 && "We only support 32 and 64bit fp types");
  71       }
  72     }
  73     Src = DAG.getNode(opcode, DL, dvt, Src);
  74   } else if (dvt.isInteger()) {
  75     unsigned opcode = (asType) ? ISD::BITCAST : ISD::FP_TO_SINT;
  76     if (svt.getSimpleVT().SimpleTy == MVT::f32) {
  77       Src = DAG.getNode(opcode, DL, MVT::i32, Src);
  78     } else if (svt.getSimpleVT().SimpleTy == MVT::f64) {
  79       Src = DAG.getNode(opcode, DL, MVT::i64, Src);
  80     } else {
  81       assert(0 && "We only support 32 and 64bit fp types");
  82     }
  83     Src = DAG.getSExtOrTrunc(Src, DL, dvt);
  84   }
  85   return Src;
  86 }
  87
  88 } // End namespace AMDPGU
  89 } // End namespace llvm
  90
  91 //===----------------------------------------------------------------------===//
  92 // TargetLowering Implementation Help Functions End
  93 //===----------------------------------------------------------------------===//
  94
  95 //===----------------------------------------------------------------------===//
  96 // TargetLowering Class Implementation Begins
  97 //===----------------------------------------------------------------------===//
  98 void AMDGPUTargetLowering::InitAMDILLowering()
  99 {
 100   int types[] =
 101   {
 102     (int)MVT::i8,
 103     (int)MVT::i16,
 104     (int)MVT::i32,
 105     (int)MVT::f32,
 106     (int)MVT::f64,
 107     (int)MVT::i64,
 108     (int)MVT::v2i8,
 109     (int)MVT::v4i8,
 110     (int)MVT::v2i16,
 111     (int)MVT::v4i16,
 112     (int)MVT::v4f32,
 113     (int)MVT::v4i32,
 114     (int)MVT::v2f32,
 115     (int)MVT::v2i32,
 116     (int)MVT::v2f64,
 117     (int)MVT::v2i64
 118   };
 119
 120   int IntTypes[] =
 121   {
 122     (int)MVT::i8,
 123     (int)MVT::i16,
 124     (int)MVT::i32,
 125     (int)MVT::i64
 126   };
 127
 128   int FloatTypes[] =
 129   {
 130     (int)MVT::f32,
 131     (int)MVT::f64
 132   };
 133
 134   int VectorTypes[] =
 135   {
 136     (int)MVT::v2i8,
 137     (int)MVT::v4i8,
 138     (int)MVT::v2i16,
 139     (int)MVT::v4i16,
 140     (int)MVT::v4f32,
 141     (int)MVT::v4i32,
 142     (int)MVT::v2f32,
 143     (int)MVT::v2i32,
 144     (int)MVT::v2f64,
 145     (int)MVT::v2i64
 146   };
 147   size_t numTypes = sizeof(types) / sizeof(*types);
 148   size_t numFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes);
 149   size_t numIntTypes = sizeof(IntTypes) / sizeof(*IntTypes);
 150   size_t numVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes);
 151
 152   const AMDILSubtarget &STM = getTargetMachine().getSubtarget<AMDILSubtarget>();
 153   // These are the current register classes that are
 154   // supported
 155
 156   for (unsigned int x  = 0; x < numTypes; ++x) {
 157     MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
 158
 159     //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
 160     // We cannot sextinreg, expand to shifts
 161     setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
 162     setOperationAction(ISD::SUBE, VT, Expand);
 163     setOperationAction(ISD::SUBC, VT, Expand);
 164     setOperationAction(ISD::ADDE, VT, Expand);
 165     setOperationAction(ISD::ADDC, VT, Expand);
 166     setOperationAction(ISD::BRCOND, VT, Custom);
 167     setOperationAction(ISD::BR_JT, VT, Expand);
 168     setOperationAction(ISD::BRIND, VT, Expand);
 169     // TODO: Implement custom UREM/SREM routines
 170     setOperationAction(ISD::SREM, VT, Expand);
 171     setOperationAction(ISD::SELECT, VT, Custom);
 172     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
 173     setOperationAction(ISD::UMUL_LOHI, VT, Expand);
 174     if (VT != MVT::i64 && VT != MVT::v2i64) {
 175       setOperationAction(ISD::SDIV, VT, Custom);
 176     }
 177   }
 178   for (unsigned int x = 0; x < numFloatTypes; ++x) {
 179     MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
 180
 181     // IL does not have these operations for floating point types
 182     setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
 183     setOperationAction(ISD::SETOLT, VT, Expand);
 184     setOperationAction(ISD::SETOGE, VT, Expand);
 185     setOperationAction(ISD::SETOGT, VT, Expand);
 186     setOperationAction(ISD::SETOLE, VT, Expand);
 187     setOperationAction(ISD::SETULT, VT, Expand);
 188     setOperationAction(ISD::SETUGE, VT, Expand);
 189     setOperationAction(ISD::SETUGT, VT, Expand);
 190     setOperationAction(ISD::SETULE, VT, Expand);
 191   }
 192
 193   for (unsigned int x = 0; x < numIntTypes; ++x) {
 194     MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
 195
 196     // GPU also does not have divrem function for signed or unsigned
 197     setOperationAction(ISD::SDIVREM, VT, Expand);
 198
 199     // GPU does not have [S|U]MUL_LOHI functions as a single instruction
 200     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
 201     setOperationAction(ISD::UMUL_LOHI, VT, Expand);
 202
 203     // GPU doesn't have a rotl, rotr, or byteswap instruction
 204     setOperationAction(ISD::ROTR, VT, Expand);
 205     setOperationAction(ISD::BSWAP, VT, Expand);
 206
 207     // GPU doesn't have any counting operators
 208     setOperationAction(ISD::CTPOP, VT, Expand);
 209     setOperationAction(ISD::CTTZ, VT, Expand);
 210     setOperationAction(ISD::CTLZ, VT, Expand);
 211   }
 212
 213   for ( unsigned int ii = 0; ii < numVectorTypes; ++ii )
 214   {
 215     MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
 216
 217     setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
 218     setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
 219     setOperationAction(ISD::SDIVREM, VT, Expand);
 220     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
 221     // setOperationAction(ISD::VSETCC, VT, Expand);
 222     setOperationAction(ISD::SELECT_CC, VT, Expand);
 223     setOperationAction(ISD::SELECT, VT, Expand);
 224
 225   }
 226   if (STM.device()->isSupported(AMDILDeviceInfo::LongOps)) {
 227     setOperationAction(ISD::MULHU, MVT::i64, Expand);
 228     setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
 229     setOperationAction(ISD::MULHS, MVT::i64, Expand);
 230     setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
 231     setOperationAction(ISD::ADD, MVT::v2i64, Expand);
 232     setOperationAction(ISD::SREM, MVT::v2i64, Expand);
 233     setOperationAction(ISD::Constant          , MVT::i64  , Legal);
 234     setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
 235     setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
 236     setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
 237     setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
 238     setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
 239   }
 240   if (STM.device()->isSupported(AMDILDeviceInfo::DoubleOps)) {
 241     // we support loading/storing v2f64 but not operations on the type
 242     setOperationAction(ISD::FADD, MVT::v2f64, Expand);
 243     setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
 244     setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
 245     setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
 246     setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
 247     setOperationAction(ISD::ConstantFP        , MVT::f64  , Legal);
 248     // We want to expand vector conversions into their scalar
 249     // counterparts.
 250     setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
 251     setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
 252     setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
 253     setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
 254     setOperationAction(ISD::FABS, MVT::f64, Expand);
 255     setOperationAction(ISD::FABS, MVT::v2f64, Expand);
 256   }
 257   // TODO: Fix the UDIV24 algorithm so it works for these
 258   // types correctly. This needs vector comparisons
 259   // for this to work correctly.
 260   setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
 261   setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
 262   setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
 263   setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
 264   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
 265   setOperationAction(ISD::SUBC, MVT::Other, Expand);
 266   setOperationAction(ISD::ADDE, MVT::Other, Expand);
 267   setOperationAction(ISD::ADDC, MVT::Other, Expand);
 268   setOperationAction(ISD::BRCOND, MVT::Other, Custom);
 269   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
 270   setOperationAction(ISD::BRIND, MVT::Other, Expand);
 271   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
 272
 273   setOperationAction(ISD::BUILD_VECTOR, MVT::Other, Custom);
 274
 275   // Use the default implementation.
 276   setOperationAction(ISD::ConstantFP        , MVT::f32    , Legal);
 277   setOperationAction(ISD::Constant          , MVT::i32    , Legal);
 278
 279   setSchedulingPreference(Sched::RegPressure);
 280   setPow2DivIsCheap(false);
 281   setPrefLoopAlignment(16);
 282   setSelectIsExpensive(true);
 283   setJumpIsExpensive(true);
 284
 285   maxStoresPerMemcpy  = 4096;
 286   maxStoresPerMemmove = 4096;
 287   maxStoresPerMemset  = 4096;
 288
 289 #undef numTypes
 290 #undef numIntTypes
 291 #undef numVectorTypes
 292 #undef numFloatTypes
 293 }
 294
 295 bool
 296 AMDGPUTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
 297     const CallInst &I, unsigned Intrinsic) const
 298 {
 299   return false;
 300 }
 301 // The backend supports 32 and 64 bit floating point immediates
 302 bool
 303 AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const
 304 {
 305   if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
 306       || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
 307     return true;
 308   } else {
 309     return false;
 310   }
 311 }
 312
 313 bool
 314 AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const
 315 {
 316   if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
 317       || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
 318     return false;
 319   } else {
 320     return true;
 321   }
 322 }
 323
 324
 325 // isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
 326 // be zero. Op is expected to be a target specific node. Used by DAG
 327 // combiner.
 328
 329 void
 330 AMDGPUTargetLowering::computeMaskedBitsForTargetNode(
 331     const SDValue Op,
 332     APInt &KnownZero,
 333     APInt &KnownOne,
 334     const SelectionDAG &DAG,
 335     unsigned Depth) const
 336 {
 337   APInt KnownZero2;
 338   APInt KnownOne2;
 339   KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything
 340   switch (Op.getOpcode()) {
 341     default: break;
 342     case ISD::SELECT_CC:
 343              DAG.ComputeMaskedBits(
 344                  Op.getOperand(1),
 345                  KnownZero,
 346                  KnownOne,
 347                  Depth + 1
 348                  );
 349              DAG.ComputeMaskedBits(
 350                  Op.getOperand(0),
 351                  KnownZero2,
 352                  KnownOne2
 353                  );
 354              assert((KnownZero & KnownOne) == 0
 355                  && "Bits known to be one AND zero?");
 356              assert((KnownZero2 & KnownOne2) == 0
 357                  && "Bits known to be one AND zero?");
 358              // Only known if known in both the LHS and RHS
 359              KnownOne &= KnownOne2;
 360              KnownZero &= KnownZero2;
 361              break;
 362   };
 363 }
 364
 365 //===----------------------------------------------------------------------===//
 366 //                           Other Lowering Hooks
 367 //===----------------------------------------------------------------------===//
 368
 369 SDValue
 370 AMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const
 371 {
 372   EVT OVT = Op.getValueType();
 373   SDValue DST;
 374   if (OVT.getScalarType() == MVT::i64) {
 375     DST = LowerSDIV64(Op, DAG);
 376   } else if (OVT.getScalarType() == MVT::i32) {
 377     DST = LowerSDIV32(Op, DAG);
 378   } else if (OVT.getScalarType() == MVT::i16
 379       || OVT.getScalarType() == MVT::i8) {
 380     DST = LowerSDIV24(Op, DAG);
 381   } else {
 382     DST = SDValue(Op.getNode(), 0);
 383   }
 384   return DST;
 385 }
 386
 387 SDValue
 388 AMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const
 389 {
 390   EVT OVT = Op.getValueType();
 391   SDValue DST;
 392   if (OVT.getScalarType() == MVT::i64) {
 393     DST = LowerSREM64(Op, DAG);
 394   } else if (OVT.getScalarType() == MVT::i32) {
 395     DST = LowerSREM32(Op, DAG);
 396   } else if (OVT.getScalarType() == MVT::i16) {
 397     DST = LowerSREM16(Op, DAG);
 398   } else if (OVT.getScalarType() == MVT::i8) {
 399     DST = LowerSREM8(Op, DAG);
 400   } else {
 401     DST = SDValue(Op.getNode(), 0);
 402   }
 403   return DST;
 404 }
 405
 406 SDValue
 407 AMDGPUTargetLowering::LowerBUILD_VECTOR( SDValue Op, SelectionDAG &DAG ) const
 408 {
 409   EVT VT = Op.getValueType();
 410   SDValue Nodes1;
 411   SDValue second;
 412   SDValue third;
 413   SDValue fourth;
 414   DebugLoc DL = Op.getDebugLoc();
 415   Nodes1 = DAG.getNode(AMDGPUISD::VBUILD,
 416       DL,
 417       VT, Op.getOperand(0));
 418 #if 0
 419   bool allEqual = true;
 420   for (unsigned x = 1, y = Op.getNumOperands(); x < y; ++x) {
 421     if (Op.getOperand(0) != Op.getOperand(x)) {
 422       allEqual = false;
 423       break;
 424     }
 425   }
 426   if (allEqual) {
 427     return Nodes1;
 428   }
 429 #endif
 430   switch(Op.getNumOperands()) {
 431     default:
 432     case 1:
 433       break;
 434     case 4:
 435       fourth = Op.getOperand(3);
 436       if (fourth.getOpcode() != ISD::UNDEF) {
 437         Nodes1 = DAG.getNode(
 438             ISD::INSERT_VECTOR_ELT,
 439             DL,
 440             Op.getValueType(),
 441             Nodes1,
 442             fourth,
 443             DAG.getConstant(7, MVT::i32));
 444       }
 445     case 3:
 446       third = Op.getOperand(2);
 447       if (third.getOpcode() != ISD::UNDEF) {
 448         Nodes1 = DAG.getNode(
 449             ISD::INSERT_VECTOR_ELT,
 450             DL,
 451             Op.getValueType(),
 452             Nodes1,
 453             third,
 454             DAG.getConstant(6, MVT::i32));
 455       }
 456     case 2:
 457       second = Op.getOperand(1);
 458       if (second.getOpcode() != ISD::UNDEF) {
 459         Nodes1 = DAG.getNode(
 460             ISD::INSERT_VECTOR_ELT,
 461             DL,
 462             Op.getValueType(),
 463             Nodes1,
 464             second,
 465             DAG.getConstant(5, MVT::i32));
 466       }
 467       break;
 468   };
 469   return Nodes1;
 470 }
 471
 472 SDValue
 473 AMDGPUTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const
 474 {
 475   SDValue Cond = Op.getOperand(0);
 476   SDValue LHS = Op.getOperand(1);
 477   SDValue RHS = Op.getOperand(2);
 478   DebugLoc DL = Op.getDebugLoc();
 479   Cond = AMDGPU::getConversionNode(DAG, Cond, Op, true);
 480   Cond = DAG.getNode(AMDGPUISD::CMOVLOG,
 481       DL,
 482       Op.getValueType(), Cond, LHS, RHS);
 483   return Cond;
 484 }
 485
 486 SDValue
 487 AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const
 488 {
 489   SDValue Data = Op.getOperand(0);
 490   VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
 491   DebugLoc DL = Op.getDebugLoc();
 492   EVT DVT = Data.getValueType();
 493   EVT BVT = BaseType->getVT();
 494   unsigned baseBits = BVT.getScalarType().getSizeInBits();
 495   unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;
 496   unsigned shiftBits = srcBits - baseBits;
 497   if (srcBits < 32) {
 498     // If the op is less than 32 bits, then it needs to extend to 32bits
 499     // so it can properly keep the upper bits valid.
 500     EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);
 501     Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);
 502     shiftBits = 32 - baseBits;
 503     DVT = IVT;
 504   }
 505   SDValue Shift = DAG.getConstant(shiftBits, DVT);
 506   // Shift left by 'Shift' bits.
 507   Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);
 508   // Signed shift Right by 'Shift' bits.
 509   Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);
 510   if (srcBits < 32) {
 511     // Once the sign extension is done, the op needs to be converted to
 512     // its original type.
 513     Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());
 514   }
 515   return Data;
 516 }
 517 EVT
 518 AMDGPUTargetLowering::genIntType(uint32_t size, uint32_t numEle) const
 519 {
 520   int iSize = (size * numEle);
 521   int vEle = (iSize >> ((size == 64) ? 6 : 5));
 522   if (!vEle) {
 523     vEle = 1;
 524   }
 525   if (size == 64) {
 526     if (vEle == 1) {
 527       return EVT(MVT::i64);
 528     } else {
 529       return EVT(MVT::getVectorVT(MVT::i64, vEle));
 530     }
 531   } else {
 532     if (vEle == 1) {
 533       return EVT(MVT::i32);
 534     } else {
 535       return EVT(MVT::getVectorVT(MVT::i32, vEle));
 536     }
 537   }
 538 }
 539
 540 SDValue
 541 AMDGPUTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
 542 {
 543   SDValue Chain = Op.getOperand(0);
 544   SDValue Cond  = Op.getOperand(1);
 545   SDValue Jump  = Op.getOperand(2);
 546   SDValue Result;
 547   Result = DAG.getNode(
 548       AMDGPUISD::BRANCH_COND,
 549       Op.getDebugLoc(),
 550       Op.getValueType(),
 551       Chain, Jump, Cond);
 552   return Result;
 553 }
 554
 555 SDValue
 556 AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const
 557 {
 558   DebugLoc DL = Op.getDebugLoc();
 559   EVT OVT = Op.getValueType();
 560   SDValue LHS = Op.getOperand(0);
 561   SDValue RHS = Op.getOperand(1);
 562   MVT INTTY;
 563   MVT FLTTY;
 564   if (!OVT.isVector()) {
 565     INTTY = MVT::i32;
 566     FLTTY = MVT::f32;
 567   } else if (OVT.getVectorNumElements() == 2) {
 568     INTTY = MVT::v2i32;
 569     FLTTY = MVT::v2f32;
 570   } else if (OVT.getVectorNumElements() == 4) {
 571     INTTY = MVT::v4i32;
 572     FLTTY = MVT::v4f32;
 573   }
 574   unsigned bitsize = OVT.getScalarType().getSizeInBits();
 575   // char|short jq = ia ^ ib;
 576   SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
 577
 578   // jq = jq >> (bitsize - 2)
 579   jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
 580
 581   // jq = jq | 0x1
 582   jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
 583
 584   // jq = (int)jq
 585   jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
 586
 587   // int ia = (int)LHS;
 588   SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
 589
 590   // int ib, (int)RHS;
 591   SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
 592
 593   // float fa = (float)ia;
 594   SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
 595
 596   // float fb = (float)ib;
 597   SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
 598
 599   // float fq = native_divide(fa, fb);
 600   SDValue fq = DAG.getNode(AMDGPUISD::DIV_INF, DL, FLTTY, fa, fb);
 601
 602   // fq = trunc(fq);
 603   fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
 604
 605   // float fqneg = -fq;
 606   SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
 607
 608   // float fr = mad(fqneg, fb, fa);
 609   SDValue fr = DAG.getNode(AMDGPUISD::MAD, DL, FLTTY, fqneg, fb, fa);
 610
 611   // int iq = (int)fq;
 612   SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
 613
 614   // fr = fabs(fr);
 615   fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
 616
 617   // fb = fabs(fb);
 618   fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
 619
 620   // int cv = fr >= fb;
 621   SDValue cv;
 622   if (INTTY == MVT::i32) {
 623     cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
 624   } else {
 625     cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
 626   }
 627   // jq = (cv ? jq : 0);
 628   jq = DAG.getNode(AMDGPUISD::CMOVLOG, DL, OVT, cv, jq,
 629       DAG.getConstant(0, OVT));
 630   // dst = iq + jq;
 631   iq = DAG.getSExtOrTrunc(iq, DL, OVT);
 632   iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
 633   return iq;
 634 }
 635
 636 SDValue
 637 AMDGPUTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const
 638 {
 639   DebugLoc DL = Op.getDebugLoc();
 640   EVT OVT = Op.getValueType();
 641   SDValue LHS = Op.getOperand(0);
 642   SDValue RHS = Op.getOperand(1);
 643   // The LowerSDIV32 function generates equivalent to the following IL.
 644   // mov r0, LHS
 645   // mov r1, RHS
 646   // ilt r10, r0, 0
 647   // ilt r11, r1, 0
 648   // iadd r0, r0, r10
 649   // iadd r1, r1, r11
 650   // ixor r0, r0, r10
 651   // ixor r1, r1, r11
 652   // udiv r0, r0, r1
 653   // ixor r10, r10, r11
 654   // iadd r0, r0, r10
 655   // ixor DST, r0, r10
 656
 657   // mov r0, LHS
 658   SDValue r0 = LHS;
 659
 660   // mov r1, RHS
 661   SDValue r1 = RHS;
 662
 663   // ilt r10, r0, 0
 664   SDValue r10 = DAG.getSelectCC(DL,
 665       r0, DAG.getConstant(0, OVT),
 666       DAG.getConstant(-1, MVT::i32),
 667       DAG.getConstant(0, MVT::i32),
 668       ISD::SETLT);
 669
 670   // ilt r11, r1, 0
 671   SDValue r11 = DAG.getSelectCC(DL,
 672       r1, DAG.getConstant(0, OVT),
 673       DAG.getConstant(-1, MVT::i32),
 674       DAG.getConstant(0, MVT::i32),
 675       ISD::SETLT);
 676
 677   // iadd r0, r0, r10
 678   r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
 679
 680   // iadd r1, r1, r11
 681   r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
 682
 683   // ixor r0, r0, r10
 684   r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
 685
 686   // ixor r1, r1, r11
 687   r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
 688
 689   // udiv r0, r0, r1
 690   r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
 691
 692   // ixor r10, r10, r11
 693   r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
 694
 695   // iadd r0, r0, r10
 696   r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
 697
 698   // ixor DST, r0, r10
 699   SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
 700   return DST;
 701 }
 702
 703 SDValue
 704 AMDGPUTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const
 705 {
 706   return SDValue(Op.getNode(), 0);
 707 }
 708
 709 SDValue
 710 AMDGPUTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const
 711 {
 712   DebugLoc DL = Op.getDebugLoc();
 713   EVT OVT = Op.getValueType();
 714   MVT INTTY = MVT::i32;
 715   if (OVT == MVT::v2i8) {
 716     INTTY = MVT::v2i32;
 717   } else if (OVT == MVT::v4i8) {
 718     INTTY = MVT::v4i32;
 719   }
 720   SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
 721   SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
 722   LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
 723   LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
 724   return LHS;
 725 }
 726
 727 SDValue
 728 AMDGPUTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const
 729 {
 730   DebugLoc DL = Op.getDebugLoc();
 731   EVT OVT = Op.getValueType();
 732   MVT INTTY = MVT::i32;
 733   if (OVT == MVT::v2i16) {
 734     INTTY = MVT::v2i32;
 735   } else if (OVT == MVT::v4i16) {
 736     INTTY = MVT::v4i32;
 737   }
 738   SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
 739   SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
 740   LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
 741   LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
 742   return LHS;
 743 }
 744
 745 SDValue
 746 AMDGPUTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const
 747 {
 748   DebugLoc DL = Op.getDebugLoc();
 749   EVT OVT = Op.getValueType();
 750   SDValue LHS = Op.getOperand(0);
 751   SDValue RHS = Op.getOperand(1);
 752   // The LowerSREM32 function generates equivalent to the following IL.
 753   // mov r0, LHS
 754   // mov r1, RHS
 755   // ilt r10, r0, 0
 756   // ilt r11, r1, 0
 757   // iadd r0, r0, r10
 758   // iadd r1, r1, r11
 759   // ixor r0, r0, r10
 760   // ixor r1, r1, r11
 761   // udiv r20, r0, r1
 762   // umul r20, r20, r1
 763   // sub r0, r0, r20
 764   // iadd r0, r0, r10
 765   // ixor DST, r0, r10
 766
 767   // mov r0, LHS
 768   SDValue r0 = LHS;
 769
 770   // mov r1, RHS
 771   SDValue r1 = RHS;
 772
 773   // ilt r10, r0, 0
 774   SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT);
 775
 776   // ilt r11, r1, 0
 777   SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT);
 778
 779   // iadd r0, r0, r10
 780   r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
 781
 782   // iadd r1, r1, r11
 783   r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
 784
 785   // ixor r0, r0, r10
 786   r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
 787
 788   // ixor r1, r1, r11
 789   r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
 790
 791   // udiv r20, r0, r1
 792   SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
 793
 794   // umul r20, r20, r1
 795   r20 = DAG.getNode(AMDGPUISD::UMUL, DL, OVT, r20, r1);
 796
 797   // sub r0, r0, r20
 798   r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
 799
 800   // iadd r0, r0, r10
 801   r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
 802
 803   // ixor DST, r0, r10
 804   SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
 805   return DST;
 806 }
 807
 808 SDValue
 809 AMDGPUTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const
 810 {
 811   return SDValue(Op.getNode(), 0);
 812 }