src/gallium/drivers/radeon/AMDILISelLowering.cpp

   1 //===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //==-----------------------------------------------------------------------===//
   9 //
  10 // This file contains TargetLowering functions borrowed from AMDLI.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "AMDGPUISelLowering.h"
  15 #include "AMDGPURegisterInfo.h"
  16 #include "AMDILDevices.h"
  17 #include "AMDILIntrinsicInfo.h"
  18 #include "AMDGPUSubtarget.h"
  19 #include "AMDILUtilityFunctions.h"
  20 #include "llvm/CallingConv.h"
  21 #include "llvm/CodeGen/MachineFrameInfo.h"
  22 #include "llvm/CodeGen/MachineRegisterInfo.h"
  23 #include "llvm/CodeGen/PseudoSourceValue.h"
  24 #include "llvm/CodeGen/SelectionDAG.h"
  25 #include "llvm/CodeGen/SelectionDAGNodes.h"
  26 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
  27 #include "llvm/DerivedTypes.h"
  28 #include "llvm/Instructions.h"
  29 #include "llvm/Intrinsics.h"
  30 #include "llvm/Support/raw_ostream.h"
  31 #include "llvm/Target/TargetInstrInfo.h"
  32 #include "llvm/Target/TargetOptions.h"
  33
  34 using namespace llvm;
  35 //===----------------------------------------------------------------------===//
  36 // Calling Convention Implementation
  37 //===----------------------------------------------------------------------===//
  38 #include "AMDGPUGenCallingConv.inc"
  39
  40 //===----------------------------------------------------------------------===//
  41 // TargetLowering Implementation Help Functions End
  42 //===----------------------------------------------------------------------===//
  43
  44 //===----------------------------------------------------------------------===//
  45 // TargetLowering Class Implementation Begins
  46 //===----------------------------------------------------------------------===//
  47 void AMDGPUTargetLowering::InitAMDILLowering()
  48 {
  49   int types[] =
  50   {
  51     (int)MVT::i8,
  52     (int)MVT::i16,
  53     (int)MVT::i32,
  54     (int)MVT::f32,
  55     (int)MVT::f64,
  56     (int)MVT::i64,
  57     (int)MVT::v2i8,
  58     (int)MVT::v4i8,
  59     (int)MVT::v2i16,
  60     (int)MVT::v4i16,
  61     (int)MVT::v4f32,
  62     (int)MVT::v4i32,
  63     (int)MVT::v2f32,
  64     (int)MVT::v2i32,
  65     (int)MVT::v2f64,
  66     (int)MVT::v2i64
  67   };
  68
  69   int IntTypes[] =
  70   {
  71     (int)MVT::i8,
  72     (int)MVT::i16,
  73     (int)MVT::i32,
  74     (int)MVT::i64
  75   };
  76
  77   int FloatTypes[] =
  78   {
  79     (int)MVT::f32,
  80     (int)MVT::f64
  81   };
  82
  83   int VectorTypes[] =
  84   {
  85     (int)MVT::v2i8,
  86     (int)MVT::v4i8,
  87     (int)MVT::v2i16,
  88     (int)MVT::v4i16,
  89     (int)MVT::v4f32,
  90     (int)MVT::v4i32,
  91     (int)MVT::v2f32,
  92     (int)MVT::v2i32,
  93     (int)MVT::v2f64,
  94     (int)MVT::v2i64
  95   };
  96   size_t numTypes = sizeof(types) / sizeof(*types);
  97   size_t numFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes);
  98   size_t numIntTypes = sizeof(IntTypes) / sizeof(*IntTypes);
  99   size_t numVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes);
 100
 101   const AMDGPUSubtarget &STM = getTargetMachine().getSubtarget<AMDGPUSubtarget>();
 102   // These are the current register classes that are
 103   // supported
 104
 105   for (unsigned int x  = 0; x < numTypes; ++x) {
 106     MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
 107
 108     //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
 109     // We cannot sextinreg, expand to shifts
 110     setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
 111     setOperationAction(ISD::SUBE, VT, Expand);
 112     setOperationAction(ISD::SUBC, VT, Expand);
 113     setOperationAction(ISD::ADDE, VT, Expand);
 114     setOperationAction(ISD::ADDC, VT, Expand);
 115     setOperationAction(ISD::BRCOND, VT, Custom);
 116     setOperationAction(ISD::BR_JT, VT, Expand);
 117     setOperationAction(ISD::BRIND, VT, Expand);
 118     // TODO: Implement custom UREM/SREM routines
 119     setOperationAction(ISD::SREM, VT, Expand);
 120     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
 121     setOperationAction(ISD::UMUL_LOHI, VT, Expand);
 122     if (VT != MVT::i64 && VT != MVT::v2i64) {
 123       setOperationAction(ISD::SDIV, VT, Custom);
 124     }
 125   }
 126   for (unsigned int x = 0; x < numFloatTypes; ++x) {
 127     MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
 128
 129     // IL does not have these operations for floating point types
 130     setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
 131     setOperationAction(ISD::SETOLT, VT, Expand);
 132     setOperationAction(ISD::SETOGE, VT, Expand);
 133     setOperationAction(ISD::SETOGT, VT, Expand);
 134     setOperationAction(ISD::SETOLE, VT, Expand);
 135     setOperationAction(ISD::SETULT, VT, Expand);
 136     setOperationAction(ISD::SETUGE, VT, Expand);
 137     setOperationAction(ISD::SETUGT, VT, Expand);
 138     setOperationAction(ISD::SETULE, VT, Expand);
 139   }
 140
 141   for (unsigned int x = 0; x < numIntTypes; ++x) {
 142     MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
 143
 144     // GPU also does not have divrem function for signed or unsigned
 145     setOperationAction(ISD::SDIVREM, VT, Expand);
 146
 147     // GPU does not have [S|U]MUL_LOHI functions as a single instruction
 148     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
 149     setOperationAction(ISD::UMUL_LOHI, VT, Expand);
 150
 151     // GPU doesn't have a rotl, rotr, or byteswap instruction
 152     setOperationAction(ISD::ROTR, VT, Expand);
 153     setOperationAction(ISD::BSWAP, VT, Expand);
 154
 155     // GPU doesn't have any counting operators
 156     setOperationAction(ISD::CTPOP, VT, Expand);
 157     setOperationAction(ISD::CTTZ, VT, Expand);
 158     setOperationAction(ISD::CTLZ, VT, Expand);
 159   }
 160
 161   for ( unsigned int ii = 0; ii < numVectorTypes; ++ii )
 162   {
 163     MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
 164
 165     setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
 166     setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
 167     setOperationAction(ISD::SDIVREM, VT, Expand);
 168     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
 169     // setOperationAction(ISD::VSETCC, VT, Expand);
 170     setOperationAction(ISD::SELECT_CC, VT, Expand);
 171
 172   }
 173   if (STM.device()->isSupported(AMDGPUDeviceInfo::LongOps)) {
 174     setOperationAction(ISD::MULHU, MVT::i64, Expand);
 175     setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
 176     setOperationAction(ISD::MULHS, MVT::i64, Expand);
 177     setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
 178     setOperationAction(ISD::ADD, MVT::v2i64, Expand);
 179     setOperationAction(ISD::SREM, MVT::v2i64, Expand);
 180     setOperationAction(ISD::Constant          , MVT::i64  , Legal);
 181     setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
 182     setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
 183     setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
 184     setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
 185     setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
 186   }
 187   if (STM.device()->isSupported(AMDGPUDeviceInfo::DoubleOps)) {
 188     // we support loading/storing v2f64 but not operations on the type
 189     setOperationAction(ISD::FADD, MVT::v2f64, Expand);
 190     setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
 191     setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
 192     setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
 193     setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
 194     setOperationAction(ISD::ConstantFP        , MVT::f64  , Legal);
 195     // We want to expand vector conversions into their scalar
 196     // counterparts.
 197     setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
 198     setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
 199     setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
 200     setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
 201     setOperationAction(ISD::FABS, MVT::f64, Expand);
 202     setOperationAction(ISD::FABS, MVT::v2f64, Expand);
 203   }
 204   // TODO: Fix the UDIV24 algorithm so it works for these
 205   // types correctly. This needs vector comparisons
 206   // for this to work correctly.
 207   setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
 208   setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
 209   setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
 210   setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
 211   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
 212   setOperationAction(ISD::SUBC, MVT::Other, Expand);
 213   setOperationAction(ISD::ADDE, MVT::Other, Expand);
 214   setOperationAction(ISD::ADDC, MVT::Other, Expand);
 215   setOperationAction(ISD::BRCOND, MVT::Other, Custom);
 216   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
 217   setOperationAction(ISD::BRIND, MVT::Other, Expand);
 218   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
 219
 220   setOperationAction(ISD::BUILD_VECTOR, MVT::Other, Custom);
 221
 222   // Use the default implementation.
 223   setOperationAction(ISD::ConstantFP        , MVT::f32    , Legal);
 224   setOperationAction(ISD::Constant          , MVT::i32    , Legal);
 225
 226   setSchedulingPreference(Sched::RegPressure);
 227   setPow2DivIsCheap(false);
 228   setPrefLoopAlignment(16);
 229   setSelectIsExpensive(true);
 230   setJumpIsExpensive(true);
 231
 232   maxStoresPerMemcpy  = 4096;
 233   maxStoresPerMemmove = 4096;
 234   maxStoresPerMemset  = 4096;
 235
 236 #undef numTypes
 237 #undef numIntTypes
 238 #undef numVectorTypes
 239 #undef numFloatTypes
 240 }
 241
 242 bool
 243 AMDGPUTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
 244     const CallInst &I, unsigned Intrinsic) const
 245 {
 246   return false;
 247 }
 248 // The backend supports 32 and 64 bit floating point immediates
 249 bool
 250 AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const
 251 {
 252   if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
 253       || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
 254     return true;
 255   } else {
 256     return false;
 257   }
 258 }
 259
 260 bool
 261 AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const
 262 {
 263   if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
 264       || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
 265     return false;
 266   } else {
 267     return true;
 268   }
 269 }
 270
 271
 272 // isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
 273 // be zero. Op is expected to be a target specific node. Used by DAG
 274 // combiner.
 275
 276 void
 277 AMDGPUTargetLowering::computeMaskedBitsForTargetNode(
 278     const SDValue Op,
 279     APInt &KnownZero,
 280     APInt &KnownOne,
 281     const SelectionDAG &DAG,
 282     unsigned Depth) const
 283 {
 284   APInt KnownZero2;
 285   APInt KnownOne2;
 286   KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything
 287   switch (Op.getOpcode()) {
 288     default: break;
 289     case ISD::SELECT_CC:
 290              DAG.ComputeMaskedBits(
 291                  Op.getOperand(1),
 292                  KnownZero,
 293                  KnownOne,
 294                  Depth + 1
 295                  );
 296              DAG.ComputeMaskedBits(
 297                  Op.getOperand(0),
 298                  KnownZero2,
 299                  KnownOne2
 300                  );
 301              assert((KnownZero & KnownOne) == 0
 302                  && "Bits known to be one AND zero?");
 303              assert((KnownZero2 & KnownOne2) == 0
 304                  && "Bits known to be one AND zero?");
 305              // Only known if known in both the LHS and RHS
 306              KnownOne &= KnownOne2;
 307              KnownZero &= KnownZero2;
 308              break;
 309   };
 310 }
 311
 312 //===----------------------------------------------------------------------===//
 313 //                           Other Lowering Hooks
 314 //===----------------------------------------------------------------------===//
 315
 316 SDValue
 317 AMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const
 318 {
 319   EVT OVT = Op.getValueType();
 320   SDValue DST;
 321   if (OVT.getScalarType() == MVT::i64) {
 322     DST = LowerSDIV64(Op, DAG);
 323   } else if (OVT.getScalarType() == MVT::i32) {
 324     DST = LowerSDIV32(Op, DAG);
 325   } else if (OVT.getScalarType() == MVT::i16
 326       || OVT.getScalarType() == MVT::i8) {
 327     DST = LowerSDIV24(Op, DAG);
 328   } else {
 329     DST = SDValue(Op.getNode(), 0);
 330   }
 331   return DST;
 332 }
 333
 334 SDValue
 335 AMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const
 336 {
 337   EVT OVT = Op.getValueType();
 338   SDValue DST;
 339   if (OVT.getScalarType() == MVT::i64) {
 340     DST = LowerSREM64(Op, DAG);
 341   } else if (OVT.getScalarType() == MVT::i32) {
 342     DST = LowerSREM32(Op, DAG);
 343   } else if (OVT.getScalarType() == MVT::i16) {
 344     DST = LowerSREM16(Op, DAG);
 345   } else if (OVT.getScalarType() == MVT::i8) {
 346     DST = LowerSREM8(Op, DAG);
 347   } else {
 348     DST = SDValue(Op.getNode(), 0);
 349   }
 350   return DST;
 351 }
 352
 353 SDValue
 354 AMDGPUTargetLowering::LowerBUILD_VECTOR( SDValue Op, SelectionDAG &DAG ) const
 355 {
 356   EVT VT = Op.getValueType();
 357   SDValue Nodes1;
 358   SDValue second;
 359   SDValue third;
 360   SDValue fourth;
 361   DebugLoc DL = Op.getDebugLoc();
 362   Nodes1 = DAG.getNode(AMDGPUISD::VBUILD,
 363       DL,
 364       VT, Op.getOperand(0));
 365 #if 0
 366   bool allEqual = true;
 367   for (unsigned x = 1, y = Op.getNumOperands(); x < y; ++x) {
 368     if (Op.getOperand(0) != Op.getOperand(x)) {
 369       allEqual = false;
 370       break;
 371     }
 372   }
 373   if (allEqual) {
 374     return Nodes1;
 375   }
 376 #endif
 377   switch(Op.getNumOperands()) {
 378     default:
 379     case 1:
 380       break;
 381     case 4:
 382       fourth = Op.getOperand(3);
 383       if (fourth.getOpcode() != ISD::UNDEF) {
 384         Nodes1 = DAG.getNode(
 385             ISD::INSERT_VECTOR_ELT,
 386             DL,
 387             Op.getValueType(),
 388             Nodes1,
 389             fourth,
 390             DAG.getConstant(7, MVT::i32));
 391       }
 392     case 3:
 393       third = Op.getOperand(2);
 394       if (third.getOpcode() != ISD::UNDEF) {
 395         Nodes1 = DAG.getNode(
 396             ISD::INSERT_VECTOR_ELT,
 397             DL,
 398             Op.getValueType(),
 399             Nodes1,
 400             third,
 401             DAG.getConstant(6, MVT::i32));
 402       }
 403     case 2:
 404       second = Op.getOperand(1);
 405       if (second.getOpcode() != ISD::UNDEF) {
 406         Nodes1 = DAG.getNode(
 407             ISD::INSERT_VECTOR_ELT,
 408             DL,
 409             Op.getValueType(),
 410             Nodes1,
 411             second,
 412             DAG.getConstant(5, MVT::i32));
 413       }
 414       break;
 415   };
 416   return Nodes1;
 417 }
 418
 419 SDValue
 420 AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const
 421 {
 422   SDValue Data = Op.getOperand(0);
 423   VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
 424   DebugLoc DL = Op.getDebugLoc();
 425   EVT DVT = Data.getValueType();
 426   EVT BVT = BaseType->getVT();
 427   unsigned baseBits = BVT.getScalarType().getSizeInBits();
 428   unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;
 429   unsigned shiftBits = srcBits - baseBits;
 430   if (srcBits < 32) {
 431     // If the op is less than 32 bits, then it needs to extend to 32bits
 432     // so it can properly keep the upper bits valid.
 433     EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);
 434     Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);
 435     shiftBits = 32 - baseBits;
 436     DVT = IVT;
 437   }
 438   SDValue Shift = DAG.getConstant(shiftBits, DVT);
 439   // Shift left by 'Shift' bits.
 440   Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);
 441   // Signed shift Right by 'Shift' bits.
 442   Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);
 443   if (srcBits < 32) {
 444     // Once the sign extension is done, the op needs to be converted to
 445     // its original type.
 446     Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());
 447   }
 448   return Data;
 449 }
 450 EVT
 451 AMDGPUTargetLowering::genIntType(uint32_t size, uint32_t numEle) const
 452 {
 453   int iSize = (size * numEle);
 454   int vEle = (iSize >> ((size == 64) ? 6 : 5));
 455   if (!vEle) {
 456     vEle = 1;
 457   }
 458   if (size == 64) {
 459     if (vEle == 1) {
 460       return EVT(MVT::i64);
 461     } else {
 462       return EVT(MVT::getVectorVT(MVT::i64, vEle));
 463     }
 464   } else {
 465     if (vEle == 1) {
 466       return EVT(MVT::i32);
 467     } else {
 468       return EVT(MVT::getVectorVT(MVT::i32, vEle));
 469     }
 470   }
 471 }
 472
 473 SDValue
 474 AMDGPUTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
 475 {
 476   SDValue Chain = Op.getOperand(0);
 477   SDValue Cond  = Op.getOperand(1);
 478   SDValue Jump  = Op.getOperand(2);
 479   SDValue Result;
 480   Result = DAG.getNode(
 481       AMDGPUISD::BRANCH_COND,
 482       Op.getDebugLoc(),
 483       Op.getValueType(),
 484       Chain, Jump, Cond);
 485   return Result;
 486 }
 487
 488 SDValue
 489 AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const
 490 {
 491   DebugLoc DL = Op.getDebugLoc();
 492   EVT OVT = Op.getValueType();
 493   SDValue LHS = Op.getOperand(0);
 494   SDValue RHS = Op.getOperand(1);
 495   MVT INTTY;
 496   MVT FLTTY;
 497   if (!OVT.isVector()) {
 498     INTTY = MVT::i32;
 499     FLTTY = MVT::f32;
 500   } else if (OVT.getVectorNumElements() == 2) {
 501     INTTY = MVT::v2i32;
 502     FLTTY = MVT::v2f32;
 503   } else if (OVT.getVectorNumElements() == 4) {
 504     INTTY = MVT::v4i32;
 505     FLTTY = MVT::v4f32;
 506   }
 507   unsigned bitsize = OVT.getScalarType().getSizeInBits();
 508   // char|short jq = ia ^ ib;
 509   SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
 510
 511   // jq = jq >> (bitsize - 2)
 512   jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
 513
 514   // jq = jq | 0x1
 515   jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
 516
 517   // jq = (int)jq
 518   jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
 519
 520   // int ia = (int)LHS;
 521   SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
 522
 523   // int ib, (int)RHS;
 524   SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
 525
 526   // float fa = (float)ia;
 527   SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
 528
 529   // float fb = (float)ib;
 530   SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
 531
 532   // float fq = native_divide(fa, fb);
 533   SDValue fq = DAG.getNode(AMDGPUISD::DIV_INF, DL, FLTTY, fa, fb);
 534
 535   // fq = trunc(fq);
 536   fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
 537
 538   // float fqneg = -fq;
 539   SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
 540
 541   // float fr = mad(fqneg, fb, fa);
 542   SDValue fr = DAG.getNode(AMDGPUISD::MAD, DL, FLTTY, fqneg, fb, fa);
 543
 544   // int iq = (int)fq;
 545   SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
 546
 547   // fr = fabs(fr);
 548   fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
 549
 550   // fb = fabs(fb);
 551   fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
 552
 553   // int cv = fr >= fb;
 554   SDValue cv;
 555   if (INTTY == MVT::i32) {
 556     cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
 557   } else {
 558     cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
 559   }
 560   // jq = (cv ? jq : 0);
 561   jq = DAG.getNode(ISD::SELECT, DL, OVT, cv, jq,
 562       DAG.getConstant(0, OVT));
 563   // dst = iq + jq;
 564   iq = DAG.getSExtOrTrunc(iq, DL, OVT);
 565   iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
 566   return iq;
 567 }
 568
 569 SDValue
 570 AMDGPUTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const
 571 {
 572   DebugLoc DL = Op.getDebugLoc();
 573   EVT OVT = Op.getValueType();
 574   SDValue LHS = Op.getOperand(0);
 575   SDValue RHS = Op.getOperand(1);
 576   // The LowerSDIV32 function generates equivalent to the following IL.
 577   // mov r0, LHS
 578   // mov r1, RHS
 579   // ilt r10, r0, 0
 580   // ilt r11, r1, 0
 581   // iadd r0, r0, r10
 582   // iadd r1, r1, r11
 583   // ixor r0, r0, r10
 584   // ixor r1, r1, r11
 585   // udiv r0, r0, r1
 586   // ixor r10, r10, r11
 587   // iadd r0, r0, r10
 588   // ixor DST, r0, r10
 589
 590   // mov r0, LHS
 591   SDValue r0 = LHS;
 592
 593   // mov r1, RHS
 594   SDValue r1 = RHS;
 595
 596   // ilt r10, r0, 0
 597   SDValue r10 = DAG.getSelectCC(DL,
 598       r0, DAG.getConstant(0, OVT),
 599       DAG.getConstant(-1, MVT::i32),
 600       DAG.getConstant(0, MVT::i32),
 601       ISD::SETLT);
 602
 603   // ilt r11, r1, 0
 604   SDValue r11 = DAG.getSelectCC(DL,
 605       r1, DAG.getConstant(0, OVT),
 606       DAG.getConstant(-1, MVT::i32),
 607       DAG.getConstant(0, MVT::i32),
 608       ISD::SETLT);
 609
 610   // iadd r0, r0, r10
 611   r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
 612
 613   // iadd r1, r1, r11
 614   r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
 615
 616   // ixor r0, r0, r10
 617   r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
 618
 619   // ixor r1, r1, r11
 620   r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
 621
 622   // udiv r0, r0, r1
 623   r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
 624
 625   // ixor r10, r10, r11
 626   r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
 627
 628   // iadd r0, r0, r10
 629   r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
 630
 631   // ixor DST, r0, r10
 632   SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
 633   return DST;
 634 }
 635
 636 SDValue
 637 AMDGPUTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const
 638 {
 639   return SDValue(Op.getNode(), 0);
 640 }
 641
 642 SDValue
 643 AMDGPUTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const
 644 {
 645   DebugLoc DL = Op.getDebugLoc();
 646   EVT OVT = Op.getValueType();
 647   MVT INTTY = MVT::i32;
 648   if (OVT == MVT::v2i8) {
 649     INTTY = MVT::v2i32;
 650   } else if (OVT == MVT::v4i8) {
 651     INTTY = MVT::v4i32;
 652   }
 653   SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
 654   SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
 655   LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
 656   LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
 657   return LHS;
 658 }
 659
 660 SDValue
 661 AMDGPUTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const
 662 {
 663   DebugLoc DL = Op.getDebugLoc();
 664   EVT OVT = Op.getValueType();
 665   MVT INTTY = MVT::i32;
 666   if (OVT == MVT::v2i16) {
 667     INTTY = MVT::v2i32;
 668   } else if (OVT == MVT::v4i16) {
 669     INTTY = MVT::v4i32;
 670   }
 671   SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
 672   SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
 673   LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
 674   LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
 675   return LHS;
 676 }
 677
 678 SDValue
 679 AMDGPUTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const
 680 {
 681   DebugLoc DL = Op.getDebugLoc();
 682   EVT OVT = Op.getValueType();
 683   SDValue LHS = Op.getOperand(0);
 684   SDValue RHS = Op.getOperand(1);
 685   // The LowerSREM32 function generates equivalent to the following IL.
 686   // mov r0, LHS
 687   // mov r1, RHS
 688   // ilt r10, r0, 0
 689   // ilt r11, r1, 0
 690   // iadd r0, r0, r10
 691   // iadd r1, r1, r11
 692   // ixor r0, r0, r10
 693   // ixor r1, r1, r11
 694   // udiv r20, r0, r1
 695   // umul r20, r20, r1
 696   // sub r0, r0, r20
 697   // iadd r0, r0, r10
 698   // ixor DST, r0, r10
 699
 700   // mov r0, LHS
 701   SDValue r0 = LHS;
 702
 703   // mov r1, RHS
 704   SDValue r1 = RHS;
 705
 706   // ilt r10, r0, 0
 707   SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT);
 708
 709   // ilt r11, r1, 0
 710   SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT);
 711
 712   // iadd r0, r0, r10
 713   r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
 714
 715   // iadd r1, r1, r11
 716   r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
 717
 718   // ixor r0, r0, r10
 719   r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
 720
 721   // ixor r1, r1, r11
 722   r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
 723
 724   // udiv r20, r0, r1
 725   SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
 726
 727   // umul r20, r20, r1
 728   r20 = DAG.getNode(AMDGPUISD::UMUL, DL, OVT, r20, r1);
 729
 730   // sub r0, r0, r20
 731   r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
 732
 733   // iadd r0, r0, r10
 734   r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
 735
 736   // ixor DST, r0, r10
 737   SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
 738   return DST;
 739 }
 740
 741 SDValue
 742 AMDGPUTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const
 743 {
 744   return SDValue(Op.getNode(), 0);
 745 }