src/gallium/drivers/radeon/AMDILISelLowering.cpp

   1 //===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //==-----------------------------------------------------------------------===//
   9 //
  10 // This file contains TargetLowering functions borrowed from AMDLI.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "AMDGPUISelLowering.h"
  15 #include "AMDGPURegisterInfo.h"
  16 #include "AMDILDevices.h"
  17 #include "AMDILIntrinsicInfo.h"
  18 #include "AMDGPUSubtarget.h"
  19 #include "AMDILUtilityFunctions.h"
  20 #include "llvm/CallingConv.h"
  21 #include "llvm/CodeGen/MachineFrameInfo.h"
  22 #include "llvm/CodeGen/MachineRegisterInfo.h"
  23 #include "llvm/CodeGen/PseudoSourceValue.h"
  24 #include "llvm/CodeGen/SelectionDAG.h"
  25 #include "llvm/CodeGen/SelectionDAGNodes.h"
  26 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
  27 #include "llvm/DerivedTypes.h"
  28 #include "llvm/Instructions.h"
  29 #include "llvm/Intrinsics.h"
  30 #include "llvm/Support/raw_ostream.h"
  31 #include "llvm/Target/TargetInstrInfo.h"
  32 #include "llvm/Target/TargetOptions.h"
  33
  34 using namespace llvm;
  35 //===----------------------------------------------------------------------===//
  36 // Calling Convention Implementation
  37 //===----------------------------------------------------------------------===//
  38 #include "AMDGPUGenCallingConv.inc"
  39
  40 //===----------------------------------------------------------------------===//
  41 // TargetLowering Implementation Help Functions End
  42 //===----------------------------------------------------------------------===//
  43
  44 //===----------------------------------------------------------------------===//
  45 // TargetLowering Class Implementation Begins
  46 //===----------------------------------------------------------------------===//
  47 void AMDGPUTargetLowering::InitAMDILLowering()
  48 {
  49   int types[] =
  50   {
  51     (int)MVT::i8,
  52     (int)MVT::i16,
  53     (int)MVT::i32,
  54     (int)MVT::f32,
  55     (int)MVT::f64,
  56     (int)MVT::i64,
  57     (int)MVT::v2i8,
  58     (int)MVT::v4i8,
  59     (int)MVT::v2i16,
  60     (int)MVT::v4i16,
  61     (int)MVT::v4f32,
  62     (int)MVT::v4i32,
  63     (int)MVT::v2f32,
  64     (int)MVT::v2i32,
  65     (int)MVT::v2f64,
  66     (int)MVT::v2i64
  67   };
  68
  69   int IntTypes[] =
  70   {
  71     (int)MVT::i8,
  72     (int)MVT::i16,
  73     (int)MVT::i32,
  74     (int)MVT::i64
  75   };
  76
  77   int FloatTypes[] =
  78   {
  79     (int)MVT::f32,
  80     (int)MVT::f64
  81   };
  82
  83   int VectorTypes[] =
  84   {
  85     (int)MVT::v2i8,
  86     (int)MVT::v4i8,
  87     (int)MVT::v2i16,
  88     (int)MVT::v4i16,
  89     (int)MVT::v4f32,
  90     (int)MVT::v4i32,
  91     (int)MVT::v2f32,
  92     (int)MVT::v2i32,
  93     (int)MVT::v2f64,
  94     (int)MVT::v2i64
  95   };
  96   size_t numTypes = sizeof(types) / sizeof(*types);
  97   size_t numFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes);
  98   size_t numIntTypes = sizeof(IntTypes) / sizeof(*IntTypes);
  99   size_t numVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes);
 100
 101   const AMDGPUSubtarget &STM = getTargetMachine().getSubtarget<AMDGPUSubtarget>();
 102   // These are the current register classes that are
 103   // supported
 104
 105   for (unsigned int x  = 0; x < numTypes; ++x) {
 106     MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
 107
 108     //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
 109     // We cannot sextinreg, expand to shifts
 110     setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
 111     setOperationAction(ISD::SUBE, VT, Expand);
 112     setOperationAction(ISD::SUBC, VT, Expand);
 113     setOperationAction(ISD::ADDE, VT, Expand);
 114     setOperationAction(ISD::ADDC, VT, Expand);
 115     setOperationAction(ISD::BRCOND, VT, Custom);
 116     setOperationAction(ISD::BR_JT, VT, Expand);
 117     setOperationAction(ISD::BRIND, VT, Expand);
 118     // TODO: Implement custom UREM/SREM routines
 119     setOperationAction(ISD::SREM, VT, Expand);
 120     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
 121     setOperationAction(ISD::UMUL_LOHI, VT, Expand);
 122     if (VT != MVT::i64 && VT != MVT::v2i64) {
 123       setOperationAction(ISD::SDIV, VT, Custom);
 124     }
 125   }
 126   for (unsigned int x = 0; x < numFloatTypes; ++x) {
 127     MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
 128
 129     // IL does not have these operations for floating point types
 130     setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
 131     setOperationAction(ISD::SETOLT, VT, Expand);
 132     setOperationAction(ISD::SETOGE, VT, Expand);
 133     setOperationAction(ISD::SETOGT, VT, Expand);
 134     setOperationAction(ISD::SETOLE, VT, Expand);
 135     setOperationAction(ISD::SETULT, VT, Expand);
 136     setOperationAction(ISD::SETUGE, VT, Expand);
 137     setOperationAction(ISD::SETUGT, VT, Expand);
 138     setOperationAction(ISD::SETULE, VT, Expand);
 139   }
 140
 141   for (unsigned int x = 0; x < numIntTypes; ++x) {
 142     MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
 143
 144     // GPU also does not have divrem function for signed or unsigned
 145     setOperationAction(ISD::SDIVREM, VT, Expand);
 146
 147     // GPU does not have [S|U]MUL_LOHI functions as a single instruction
 148     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
 149     setOperationAction(ISD::UMUL_LOHI, VT, Expand);
 150
 151     // GPU doesn't have a rotl, rotr, or byteswap instruction
 152     setOperationAction(ISD::ROTR, VT, Expand);
 153     setOperationAction(ISD::BSWAP, VT, Expand);
 154
 155     // GPU doesn't have any counting operators
 156     setOperationAction(ISD::CTPOP, VT, Expand);
 157     setOperationAction(ISD::CTTZ, VT, Expand);
 158     setOperationAction(ISD::CTLZ, VT, Expand);
 159   }
 160
 161   for ( unsigned int ii = 0; ii < numVectorTypes; ++ii )
 162   {
 163     MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
 164
 165     setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
 166     setOperationAction(ISD::SDIVREM, VT, Expand);
 167     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
 168     // setOperationAction(ISD::VSETCC, VT, Expand);
 169     setOperationAction(ISD::SELECT_CC, VT, Expand);
 170
 171   }
 172   if (STM.device()->isSupported(AMDGPUDeviceInfo::LongOps)) {
 173     setOperationAction(ISD::MULHU, MVT::i64, Expand);
 174     setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
 175     setOperationAction(ISD::MULHS, MVT::i64, Expand);
 176     setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
 177     setOperationAction(ISD::ADD, MVT::v2i64, Expand);
 178     setOperationAction(ISD::SREM, MVT::v2i64, Expand);
 179     setOperationAction(ISD::Constant          , MVT::i64  , Legal);
 180     setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
 181     setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
 182     setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
 183     setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
 184     setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
 185   }
 186   if (STM.device()->isSupported(AMDGPUDeviceInfo::DoubleOps)) {
 187     // we support loading/storing v2f64 but not operations on the type
 188     setOperationAction(ISD::FADD, MVT::v2f64, Expand);
 189     setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
 190     setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
 191     setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
 192     setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
 193     setOperationAction(ISD::ConstantFP        , MVT::f64  , Legal);
 194     // We want to expand vector conversions into their scalar
 195     // counterparts.
 196     setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
 197     setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
 198     setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
 199     setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
 200     setOperationAction(ISD::FABS, MVT::f64, Expand);
 201     setOperationAction(ISD::FABS, MVT::v2f64, Expand);
 202   }
 203   // TODO: Fix the UDIV24 algorithm so it works for these
 204   // types correctly. This needs vector comparisons
 205   // for this to work correctly.
 206   setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
 207   setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
 208   setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
 209   setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
 210   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
 211   setOperationAction(ISD::SUBC, MVT::Other, Expand);
 212   setOperationAction(ISD::ADDE, MVT::Other, Expand);
 213   setOperationAction(ISD::ADDC, MVT::Other, Expand);
 214   setOperationAction(ISD::BRCOND, MVT::Other, Custom);
 215   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
 216   setOperationAction(ISD::BRIND, MVT::Other, Expand);
 217   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
 218
 219
 220   // Use the default implementation.
 221   setOperationAction(ISD::ConstantFP        , MVT::f32    , Legal);
 222   setOperationAction(ISD::Constant          , MVT::i32    , Legal);
 223
 224   setSchedulingPreference(Sched::RegPressure);
 225   setPow2DivIsCheap(false);
 226   setPrefLoopAlignment(16);
 227   setSelectIsExpensive(true);
 228   setJumpIsExpensive(true);
 229
 230   maxStoresPerMemcpy  = 4096;
 231   maxStoresPerMemmove = 4096;
 232   maxStoresPerMemset  = 4096;
 233
 234 #undef numTypes
 235 #undef numIntTypes
 236 #undef numVectorTypes
 237 #undef numFloatTypes
 238 }
 239
 240 bool
 241 AMDGPUTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
 242     const CallInst &I, unsigned Intrinsic) const
 243 {
 244   return false;
 245 }
 246 // The backend supports 32 and 64 bit floating point immediates
 247 bool
 248 AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const
 249 {
 250   if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
 251       || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
 252     return true;
 253   } else {
 254     return false;
 255   }
 256 }
 257
 258 bool
 259 AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const
 260 {
 261   if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
 262       || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
 263     return false;
 264   } else {
 265     return true;
 266   }
 267 }
 268
 269
 270 // isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
 271 // be zero. Op is expected to be a target specific node. Used by DAG
 272 // combiner.
 273
 274 void
 275 AMDGPUTargetLowering::computeMaskedBitsForTargetNode(
 276     const SDValue Op,
 277     APInt &KnownZero,
 278     APInt &KnownOne,
 279     const SelectionDAG &DAG,
 280     unsigned Depth) const
 281 {
 282   APInt KnownZero2;
 283   APInt KnownOne2;
 284   KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything
 285   switch (Op.getOpcode()) {
 286     default: break;
 287     case ISD::SELECT_CC:
 288              DAG.ComputeMaskedBits(
 289                  Op.getOperand(1),
 290                  KnownZero,
 291                  KnownOne,
 292                  Depth + 1
 293                  );
 294              DAG.ComputeMaskedBits(
 295                  Op.getOperand(0),
 296                  KnownZero2,
 297                  KnownOne2
 298                  );
 299              assert((KnownZero & KnownOne) == 0
 300                  && "Bits known to be one AND zero?");
 301              assert((KnownZero2 & KnownOne2) == 0
 302                  && "Bits known to be one AND zero?");
 303              // Only known if known in both the LHS and RHS
 304              KnownOne &= KnownOne2;
 305              KnownZero &= KnownZero2;
 306              break;
 307   };
 308 }
 309
 310 //===----------------------------------------------------------------------===//
 311 //                           Other Lowering Hooks
 312 //===----------------------------------------------------------------------===//
 313
 314 SDValue
 315 AMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const
 316 {
 317   EVT OVT = Op.getValueType();
 318   SDValue DST;
 319   if (OVT.getScalarType() == MVT::i64) {
 320     DST = LowerSDIV64(Op, DAG);
 321   } else if (OVT.getScalarType() == MVT::i32) {
 322     DST = LowerSDIV32(Op, DAG);
 323   } else if (OVT.getScalarType() == MVT::i16
 324       || OVT.getScalarType() == MVT::i8) {
 325     DST = LowerSDIV24(Op, DAG);
 326   } else {
 327     DST = SDValue(Op.getNode(), 0);
 328   }
 329   return DST;
 330 }
 331
 332 SDValue
 333 AMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const
 334 {
 335   EVT OVT = Op.getValueType();
 336   SDValue DST;
 337   if (OVT.getScalarType() == MVT::i64) {
 338     DST = LowerSREM64(Op, DAG);
 339   } else if (OVT.getScalarType() == MVT::i32) {
 340     DST = LowerSREM32(Op, DAG);
 341   } else if (OVT.getScalarType() == MVT::i16) {
 342     DST = LowerSREM16(Op, DAG);
 343   } else if (OVT.getScalarType() == MVT::i8) {
 344     DST = LowerSREM8(Op, DAG);
 345   } else {
 346     DST = SDValue(Op.getNode(), 0);
 347   }
 348   return DST;
 349 }
 350
 351 SDValue
 352 AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const
 353 {
 354   SDValue Data = Op.getOperand(0);
 355   VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
 356   DebugLoc DL = Op.getDebugLoc();
 357   EVT DVT = Data.getValueType();
 358   EVT BVT = BaseType->getVT();
 359   unsigned baseBits = BVT.getScalarType().getSizeInBits();
 360   unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;
 361   unsigned shiftBits = srcBits - baseBits;
 362   if (srcBits < 32) {
 363     // If the op is less than 32 bits, then it needs to extend to 32bits
 364     // so it can properly keep the upper bits valid.
 365     EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);
 366     Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);
 367     shiftBits = 32 - baseBits;
 368     DVT = IVT;
 369   }
 370   SDValue Shift = DAG.getConstant(shiftBits, DVT);
 371   // Shift left by 'Shift' bits.
 372   Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);
 373   // Signed shift Right by 'Shift' bits.
 374   Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);
 375   if (srcBits < 32) {
 376     // Once the sign extension is done, the op needs to be converted to
 377     // its original type.
 378     Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());
 379   }
 380   return Data;
 381 }
 382 EVT
 383 AMDGPUTargetLowering::genIntType(uint32_t size, uint32_t numEle) const
 384 {
 385   int iSize = (size * numEle);
 386   int vEle = (iSize >> ((size == 64) ? 6 : 5));
 387   if (!vEle) {
 388     vEle = 1;
 389   }
 390   if (size == 64) {
 391     if (vEle == 1) {
 392       return EVT(MVT::i64);
 393     } else {
 394       return EVT(MVT::getVectorVT(MVT::i64, vEle));
 395     }
 396   } else {
 397     if (vEle == 1) {
 398       return EVT(MVT::i32);
 399     } else {
 400       return EVT(MVT::getVectorVT(MVT::i32, vEle));
 401     }
 402   }
 403 }
 404
 405 SDValue
 406 AMDGPUTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
 407 {
 408   SDValue Chain = Op.getOperand(0);
 409   SDValue Cond  = Op.getOperand(1);
 410   SDValue Jump  = Op.getOperand(2);
 411   SDValue Result;
 412   Result = DAG.getNode(
 413       AMDGPUISD::BRANCH_COND,
 414       Op.getDebugLoc(),
 415       Op.getValueType(),
 416       Chain, Jump, Cond);
 417   return Result;
 418 }
 419
 420 SDValue
 421 AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const
 422 {
 423   DebugLoc DL = Op.getDebugLoc();
 424   EVT OVT = Op.getValueType();
 425   SDValue LHS = Op.getOperand(0);
 426   SDValue RHS = Op.getOperand(1);
 427   MVT INTTY;
 428   MVT FLTTY;
 429   if (!OVT.isVector()) {
 430     INTTY = MVT::i32;
 431     FLTTY = MVT::f32;
 432   } else if (OVT.getVectorNumElements() == 2) {
 433     INTTY = MVT::v2i32;
 434     FLTTY = MVT::v2f32;
 435   } else if (OVT.getVectorNumElements() == 4) {
 436     INTTY = MVT::v4i32;
 437     FLTTY = MVT::v4f32;
 438   }
 439   unsigned bitsize = OVT.getScalarType().getSizeInBits();
 440   // char|short jq = ia ^ ib;
 441   SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
 442
 443   // jq = jq >> (bitsize - 2)
 444   jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
 445
 446   // jq = jq | 0x1
 447   jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
 448
 449   // jq = (int)jq
 450   jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
 451
 452   // int ia = (int)LHS;
 453   SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
 454
 455   // int ib, (int)RHS;
 456   SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
 457
 458   // float fa = (float)ia;
 459   SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
 460
 461   // float fb = (float)ib;
 462   SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
 463
 464   // float fq = native_divide(fa, fb);
 465   SDValue fq = DAG.getNode(AMDGPUISD::DIV_INF, DL, FLTTY, fa, fb);
 466
 467   // fq = trunc(fq);
 468   fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
 469
 470   // float fqneg = -fq;
 471   SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
 472
 473   // float fr = mad(fqneg, fb, fa);
 474   SDValue fr = DAG.getNode(AMDGPUISD::MAD, DL, FLTTY, fqneg, fb, fa);
 475
 476   // int iq = (int)fq;
 477   SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
 478
 479   // fr = fabs(fr);
 480   fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
 481
 482   // fb = fabs(fb);
 483   fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
 484
 485   // int cv = fr >= fb;
 486   SDValue cv;
 487   if (INTTY == MVT::i32) {
 488     cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
 489   } else {
 490     cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
 491   }
 492   // jq = (cv ? jq : 0);
 493   jq = DAG.getNode(ISD::SELECT, DL, OVT, cv, jq,
 494       DAG.getConstant(0, OVT));
 495   // dst = iq + jq;
 496   iq = DAG.getSExtOrTrunc(iq, DL, OVT);
 497   iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
 498   return iq;
 499 }
 500
 501 SDValue
 502 AMDGPUTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const
 503 {
 504   DebugLoc DL = Op.getDebugLoc();
 505   EVT OVT = Op.getValueType();
 506   SDValue LHS = Op.getOperand(0);
 507   SDValue RHS = Op.getOperand(1);
 508   // The LowerSDIV32 function generates equivalent to the following IL.
 509   // mov r0, LHS
 510   // mov r1, RHS
 511   // ilt r10, r0, 0
 512   // ilt r11, r1, 0
 513   // iadd r0, r0, r10
 514   // iadd r1, r1, r11
 515   // ixor r0, r0, r10
 516   // ixor r1, r1, r11
 517   // udiv r0, r0, r1
 518   // ixor r10, r10, r11
 519   // iadd r0, r0, r10
 520   // ixor DST, r0, r10
 521
 522   // mov r0, LHS
 523   SDValue r0 = LHS;
 524
 525   // mov r1, RHS
 526   SDValue r1 = RHS;
 527
 528   // ilt r10, r0, 0
 529   SDValue r10 = DAG.getSelectCC(DL,
 530       r0, DAG.getConstant(0, OVT),
 531       DAG.getConstant(-1, MVT::i32),
 532       DAG.getConstant(0, MVT::i32),
 533       ISD::SETLT);
 534
 535   // ilt r11, r1, 0
 536   SDValue r11 = DAG.getSelectCC(DL,
 537       r1, DAG.getConstant(0, OVT),
 538       DAG.getConstant(-1, MVT::i32),
 539       DAG.getConstant(0, MVT::i32),
 540       ISD::SETLT);
 541
 542   // iadd r0, r0, r10
 543   r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
 544
 545   // iadd r1, r1, r11
 546   r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
 547
 548   // ixor r0, r0, r10
 549   r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
 550
 551   // ixor r1, r1, r11
 552   r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
 553
 554   // udiv r0, r0, r1
 555   r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
 556
 557   // ixor r10, r10, r11
 558   r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
 559
 560   // iadd r0, r0, r10
 561   r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
 562
 563   // ixor DST, r0, r10
 564   SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
 565   return DST;
 566 }
 567
 568 SDValue
 569 AMDGPUTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const
 570 {
 571   return SDValue(Op.getNode(), 0);
 572 }
 573
 574 SDValue
 575 AMDGPUTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const
 576 {
 577   DebugLoc DL = Op.getDebugLoc();
 578   EVT OVT = Op.getValueType();
 579   MVT INTTY = MVT::i32;
 580   if (OVT == MVT::v2i8) {
 581     INTTY = MVT::v2i32;
 582   } else if (OVT == MVT::v4i8) {
 583     INTTY = MVT::v4i32;
 584   }
 585   SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
 586   SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
 587   LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
 588   LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
 589   return LHS;
 590 }
 591
 592 SDValue
 593 AMDGPUTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const
 594 {
 595   DebugLoc DL = Op.getDebugLoc();
 596   EVT OVT = Op.getValueType();
 597   MVT INTTY = MVT::i32;
 598   if (OVT == MVT::v2i16) {
 599     INTTY = MVT::v2i32;
 600   } else if (OVT == MVT::v4i16) {
 601     INTTY = MVT::v4i32;
 602   }
 603   SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
 604   SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
 605   LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
 606   LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
 607   return LHS;
 608 }
 609
 610 SDValue
 611 AMDGPUTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const
 612 {
 613   DebugLoc DL = Op.getDebugLoc();
 614   EVT OVT = Op.getValueType();
 615   SDValue LHS = Op.getOperand(0);
 616   SDValue RHS = Op.getOperand(1);
 617   // The LowerSREM32 function generates equivalent to the following IL.
 618   // mov r0, LHS
 619   // mov r1, RHS
 620   // ilt r10, r0, 0
 621   // ilt r11, r1, 0
 622   // iadd r0, r0, r10
 623   // iadd r1, r1, r11
 624   // ixor r0, r0, r10
 625   // ixor r1, r1, r11
 626   // udiv r20, r0, r1
 627   // umul r20, r20, r1
 628   // sub r0, r0, r20
 629   // iadd r0, r0, r10
 630   // ixor DST, r0, r10
 631
 632   // mov r0, LHS
 633   SDValue r0 = LHS;
 634
 635   // mov r1, RHS
 636   SDValue r1 = RHS;
 637
 638   // ilt r10, r0, 0
 639   SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT);
 640
 641   // ilt r11, r1, 0
 642   SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT);
 643
 644   // iadd r0, r0, r10
 645   r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
 646
 647   // iadd r1, r1, r11
 648   r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
 649
 650   // ixor r0, r0, r10
 651   r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
 652
 653   // ixor r1, r1, r11
 654   r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
 655
 656   // udiv r20, r0, r1
 657   SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
 658
 659   // umul r20, r20, r1
 660   r20 = DAG.getNode(AMDGPUISD::UMUL, DL, OVT, r20, r1);
 661
 662   // sub r0, r0, r20
 663   r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
 664
 665   // iadd r0, r0, r10
 666   r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
 667
 668   // ixor DST, r0, r10
 669   SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
 670   return DST;
 671 }
 672
 673 SDValue
 674 AMDGPUTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const
 675 {
 676   return SDValue(Op.getNode(), 0);
 677 }