src/gallium/drivers/radeon/AMDILISelLowering.cpp

   1 //===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //==-----------------------------------------------------------------------===//
   9 //
  10 // This file implements the interfaces that AMDIL uses to lower LLVM code into a
  11 // selection DAG.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #include "AMDILISelLowering.h"
  16 #include "AMDILDevices.h"
  17 #include "AMDILIntrinsicInfo.h"
  18 #include "AMDILRegisterInfo.h"
  19 #include "AMDILSubtarget.h"
  20 #include "AMDILUtilityFunctions.h"
  21 #include "llvm/CallingConv.h"
  22 #include "llvm/CodeGen/MachineFrameInfo.h"
  23 #include "llvm/CodeGen/MachineRegisterInfo.h"
  24 #include "llvm/CodeGen/PseudoSourceValue.h"
  25 #include "llvm/CodeGen/SelectionDAG.h"
  26 #include "llvm/CodeGen/SelectionDAGNodes.h"
  27 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
  28 #include "llvm/DerivedTypes.h"
  29 #include "llvm/Instructions.h"
  30 #include "llvm/Intrinsics.h"
  31 #include "llvm/Support/raw_ostream.h"
  32 #include "llvm/Target/TargetInstrInfo.h"
  33 #include "llvm/Target/TargetOptions.h"
  34
  35 using namespace llvm;
  36 #define ISDBITCAST  ISD::BITCAST
  37 #define MVTGLUE     MVT::Glue
  38 //===----------------------------------------------------------------------===//
  39 // Calling Convention Implementation
  40 //===----------------------------------------------------------------------===//
  41 #include "AMDGPUGenCallingConv.inc"
  42
  43 //===----------------------------------------------------------------------===//
  44 // TargetLowering Implementation Help Functions Begin
  45 //===----------------------------------------------------------------------===//
  46   static SDValue
  47 getConversionNode(SelectionDAG &DAG, SDValue& Src, SDValue& Dst, bool asType)
  48 {
  49   DebugLoc DL = Src.getDebugLoc();
  50   EVT svt = Src.getValueType().getScalarType();
  51   EVT dvt = Dst.getValueType().getScalarType();
  52   if (svt.isFloatingPoint() && dvt.isFloatingPoint()) {
  53     if (dvt.bitsGT(svt)) {
  54       Src = DAG.getNode(ISD::FP_EXTEND, DL, dvt, Src);
  55     } else if (svt.bitsLT(svt)) {
  56       Src = DAG.getNode(ISD::FP_ROUND, DL, dvt, Src,
  57           DAG.getConstant(1, MVT::i32));
  58     }
  59   } else if (svt.isInteger() && dvt.isInteger()) {
  60     if (!svt.bitsEq(dvt)) {
  61       Src = DAG.getSExtOrTrunc(Src, DL, dvt);
  62     }
  63   } else if (svt.isInteger()) {
  64     unsigned opcode = (asType) ? ISDBITCAST : ISD::SINT_TO_FP;
  65     if (!svt.bitsEq(dvt)) {
  66       if (dvt.getSimpleVT().SimpleTy == MVT::f32) {
  67         Src = DAG.getSExtOrTrunc(Src, DL, MVT::i32);
  68       } else if (dvt.getSimpleVT().SimpleTy == MVT::f64) {
  69         Src = DAG.getSExtOrTrunc(Src, DL, MVT::i64);
  70       } else {
  71         assert(0 && "We only support 32 and 64bit fp types");
  72       }
  73     }
  74     Src = DAG.getNode(opcode, DL, dvt, Src);
  75   } else if (dvt.isInteger()) {
  76     unsigned opcode = (asType) ? ISDBITCAST : ISD::FP_TO_SINT;
  77     if (svt.getSimpleVT().SimpleTy == MVT::f32) {
  78       Src = DAG.getNode(opcode, DL, MVT::i32, Src);
  79     } else if (svt.getSimpleVT().SimpleTy == MVT::f64) {
  80       Src = DAG.getNode(opcode, DL, MVT::i64, Src);
  81     } else {
  82       assert(0 && "We only support 32 and 64bit fp types");
  83     }
  84     Src = DAG.getSExtOrTrunc(Src, DL, dvt);
  85   }
  86   return Src;
  87 }
  88 // CondCCodeToCC - Convert a DAG condition code to a AMDIL CC
  89 // condition.
  90   static AMDILCC::CondCodes
  91 CondCCodeToCC(ISD::CondCode CC, const MVT::SimpleValueType& type)
  92 {
  93   switch (CC) {
  94     default:
  95       {
  96         errs()<<"Condition Code: "<< (unsigned int)CC<<"\n";
  97         assert(0 && "Unknown condition code!");
  98       }
  99     case ISD::SETO:
 100       switch(type) {
 101         case MVT::f32:
 102           return AMDILCC::IL_CC_F_O;
 103         case MVT::f64:
 104           return AMDILCC::IL_CC_D_O;
 105         default:
 106           assert(0 && "Opcode combination not generated correctly!");
 107           return AMDILCC::COND_ERROR;
 108       };
 109     case ISD::SETUO:
 110       switch(type) {
 111         case MVT::f32:
 112           return AMDILCC::IL_CC_F_UO;
 113         case MVT::f64:
 114           return AMDILCC::IL_CC_D_UO;
 115         default:
 116           assert(0 && "Opcode combination not generated correctly!");
 117           return AMDILCC::COND_ERROR;
 118       };
 119     case ISD::SETGT:
 120       switch (type) {
 121         case MVT::i1:
 122         case MVT::i8:
 123         case MVT::i16:
 124         case MVT::i32:
 125           return AMDILCC::IL_CC_I_GT;
 126         case MVT::f32:
 127           return AMDILCC::IL_CC_F_GT;
 128         case MVT::f64:
 129           return AMDILCC::IL_CC_D_GT;
 130         case MVT::i64:
 131           return AMDILCC::IL_CC_L_GT;
 132         default:
 133           assert(0 && "Opcode combination not generated correctly!");
 134           return AMDILCC::COND_ERROR;
 135       };
 136     case ISD::SETGE:
 137       switch (type) {
 138         case MVT::i1:
 139         case MVT::i8:
 140         case MVT::i16:
 141         case MVT::i32:
 142           return AMDILCC::IL_CC_I_GE;
 143         case MVT::f32:
 144           return AMDILCC::IL_CC_F_GE;
 145         case MVT::f64:
 146           return AMDILCC::IL_CC_D_GE;
 147         case MVT::i64:
 148           return AMDILCC::IL_CC_L_GE;
 149         default:
 150           assert(0 && "Opcode combination not generated correctly!");
 151           return AMDILCC::COND_ERROR;
 152       };
 153     case ISD::SETLT:
 154       switch (type) {
 155         case MVT::i1:
 156         case MVT::i8:
 157         case MVT::i16:
 158         case MVT::i32:
 159           return AMDILCC::IL_CC_I_LT;
 160         case MVT::f32:
 161           return AMDILCC::IL_CC_F_LT;
 162         case MVT::f64:
 163           return AMDILCC::IL_CC_D_LT;
 164         case MVT::i64:
 165           return AMDILCC::IL_CC_L_LT;
 166         default:
 167           assert(0 && "Opcode combination not generated correctly!");
 168           return AMDILCC::COND_ERROR;
 169       };
 170     case ISD::SETLE:
 171       switch (type) {
 172         case MVT::i1:
 173         case MVT::i8:
 174         case MVT::i16:
 175         case MVT::i32:
 176           return AMDILCC::IL_CC_I_LE;
 177         case MVT::f32:
 178           return AMDILCC::IL_CC_F_LE;
 179         case MVT::f64:
 180           return AMDILCC::IL_CC_D_LE;
 181         case MVT::i64:
 182           return AMDILCC::IL_CC_L_LE;
 183         default:
 184           assert(0 && "Opcode combination not generated correctly!");
 185           return AMDILCC::COND_ERROR;
 186       };
 187     case ISD::SETNE:
 188       switch (type) {
 189         case MVT::i1:
 190         case MVT::i8:
 191         case MVT::i16:
 192         case MVT::i32:
 193           return AMDILCC::IL_CC_I_NE;
 194         case MVT::f32:
 195           return AMDILCC::IL_CC_F_NE;
 196         case MVT::f64:
 197           return AMDILCC::IL_CC_D_NE;
 198         case MVT::i64:
 199           return AMDILCC::IL_CC_L_NE;
 200         default:
 201           assert(0 && "Opcode combination not generated correctly!");
 202           return AMDILCC::COND_ERROR;
 203       };
 204     case ISD::SETEQ:
 205       switch (type) {
 206         case MVT::i1:
 207         case MVT::i8:
 208         case MVT::i16:
 209         case MVT::i32:
 210           return AMDILCC::IL_CC_I_EQ;
 211         case MVT::f32:
 212           return AMDILCC::IL_CC_F_EQ;
 213         case MVT::f64:
 214           return AMDILCC::IL_CC_D_EQ;
 215         case MVT::i64:
 216           return AMDILCC::IL_CC_L_EQ;
 217         default:
 218           assert(0 && "Opcode combination not generated correctly!");
 219           return AMDILCC::COND_ERROR;
 220       };
 221     case ISD::SETUGT:
 222       switch (type) {
 223         case MVT::i1:
 224         case MVT::i8:
 225         case MVT::i16:
 226         case MVT::i32:
 227           return AMDILCC::IL_CC_U_GT;
 228         case MVT::f32:
 229           return AMDILCC::IL_CC_F_UGT;
 230         case MVT::f64:
 231           return AMDILCC::IL_CC_D_UGT;
 232         case MVT::i64:
 233           return AMDILCC::IL_CC_UL_GT;
 234         default:
 235           assert(0 && "Opcode combination not generated correctly!");
 236           return AMDILCC::COND_ERROR;
 237       };
 238     case ISD::SETUGE:
 239       switch (type) {
 240         case MVT::i1:
 241         case MVT::i8:
 242         case MVT::i16:
 243         case MVT::i32:
 244           return AMDILCC::IL_CC_U_GE;
 245         case MVT::f32:
 246           return AMDILCC::IL_CC_F_UGE;
 247         case MVT::f64:
 248           return AMDILCC::IL_CC_D_UGE;
 249         case MVT::i64:
 250           return AMDILCC::IL_CC_UL_GE;
 251         default:
 252           assert(0 && "Opcode combination not generated correctly!");
 253           return AMDILCC::COND_ERROR;
 254       };
 255     case ISD::SETULT:
 256       switch (type) {
 257         case MVT::i1:
 258         case MVT::i8:
 259         case MVT::i16:
 260         case MVT::i32:
 261           return AMDILCC::IL_CC_U_LT;
 262         case MVT::f32:
 263           return AMDILCC::IL_CC_F_ULT;
 264         case MVT::f64:
 265           return AMDILCC::IL_CC_D_ULT;
 266         case MVT::i64:
 267           return AMDILCC::IL_CC_UL_LT;
 268         default:
 269           assert(0 && "Opcode combination not generated correctly!");
 270           return AMDILCC::COND_ERROR;
 271       };
 272     case ISD::SETULE:
 273       switch (type) {
 274         case MVT::i1:
 275         case MVT::i8:
 276         case MVT::i16:
 277         case MVT::i32:
 278           return AMDILCC::IL_CC_U_LE;
 279         case MVT::f32:
 280           return AMDILCC::IL_CC_F_ULE;
 281         case MVT::f64:
 282           return AMDILCC::IL_CC_D_ULE;
 283         case MVT::i64:
 284           return AMDILCC::IL_CC_UL_LE;
 285         default:
 286           assert(0 && "Opcode combination not generated correctly!");
 287           return AMDILCC::COND_ERROR;
 288       };
 289     case ISD::SETUNE:
 290       switch (type) {
 291         case MVT::i1:
 292         case MVT::i8:
 293         case MVT::i16:
 294         case MVT::i32:
 295           return AMDILCC::IL_CC_U_NE;
 296         case MVT::f32:
 297           return AMDILCC::IL_CC_F_UNE;
 298         case MVT::f64:
 299           return AMDILCC::IL_CC_D_UNE;
 300         case MVT::i64:
 301           return AMDILCC::IL_CC_UL_NE;
 302         default:
 303           assert(0 && "Opcode combination not generated correctly!");
 304           return AMDILCC::COND_ERROR;
 305       };
 306     case ISD::SETUEQ:
 307       switch (type) {
 308         case MVT::i1:
 309         case MVT::i8:
 310         case MVT::i16:
 311         case MVT::i32:
 312           return AMDILCC::IL_CC_U_EQ;
 313         case MVT::f32:
 314           return AMDILCC::IL_CC_F_UEQ;
 315         case MVT::f64:
 316           return AMDILCC::IL_CC_D_UEQ;
 317         case MVT::i64:
 318           return AMDILCC::IL_CC_UL_EQ;
 319         default:
 320           assert(0 && "Opcode combination not generated correctly!");
 321           return AMDILCC::COND_ERROR;
 322       };
 323     case ISD::SETOGT:
 324       switch (type) {
 325         case MVT::f32:
 326           return AMDILCC::IL_CC_F_OGT;
 327         case MVT::f64:
 328           return AMDILCC::IL_CC_D_OGT;
 329         case MVT::i1:
 330         case MVT::i8:
 331         case MVT::i16:
 332         case MVT::i32:
 333         case MVT::i64:
 334         default:
 335           assert(0 && "Opcode combination not generated correctly!");
 336           return AMDILCC::COND_ERROR;
 337       };
 338     case ISD::SETOGE:
 339       switch (type) {
 340         case MVT::f32:
 341           return AMDILCC::IL_CC_F_OGE;
 342         case MVT::f64:
 343           return AMDILCC::IL_CC_D_OGE;
 344         case MVT::i1:
 345         case MVT::i8:
 346         case MVT::i16:
 347         case MVT::i32:
 348         case MVT::i64:
 349         default:
 350           assert(0 && "Opcode combination not generated correctly!");
 351           return AMDILCC::COND_ERROR;
 352       };
 353     case ISD::SETOLT:
 354       switch (type) {
 355         case MVT::f32:
 356           return AMDILCC::IL_CC_F_OLT;
 357         case MVT::f64:
 358           return AMDILCC::IL_CC_D_OLT;
 359         case MVT::i1:
 360         case MVT::i8:
 361         case MVT::i16:
 362         case MVT::i32:
 363         case MVT::i64:
 364         default:
 365           assert(0 && "Opcode combination not generated correctly!");
 366           return AMDILCC::COND_ERROR;
 367       };
 368     case ISD::SETOLE:
 369       switch (type) {
 370         case MVT::f32:
 371           return AMDILCC::IL_CC_F_OLE;
 372         case MVT::f64:
 373           return AMDILCC::IL_CC_D_OLE;
 374         case MVT::i1:
 375         case MVT::i8:
 376         case MVT::i16:
 377         case MVT::i32:
 378         case MVT::i64:
 379         default:
 380           assert(0 && "Opcode combination not generated correctly!");
 381           return AMDILCC::COND_ERROR;
 382       };
 383     case ISD::SETONE:
 384       switch (type) {
 385         case MVT::f32:
 386           return AMDILCC::IL_CC_F_ONE;
 387         case MVT::f64:
 388           return AMDILCC::IL_CC_D_ONE;
 389         case MVT::i1:
 390         case MVT::i8:
 391         case MVT::i16:
 392         case MVT::i32:
 393         case MVT::i64:
 394         default:
 395           assert(0 && "Opcode combination not generated correctly!");
 396           return AMDILCC::COND_ERROR;
 397       };
 398     case ISD::SETOEQ:
 399       switch (type) {
 400         case MVT::f32:
 401           return AMDILCC::IL_CC_F_OEQ;
 402         case MVT::f64:
 403           return AMDILCC::IL_CC_D_OEQ;
 404         case MVT::i1:
 405         case MVT::i8:
 406         case MVT::i16:
 407         case MVT::i32:
 408         case MVT::i64:
 409         default:
 410           assert(0 && "Opcode combination not generated correctly!");
 411           return AMDILCC::COND_ERROR;
 412       };
 413   };
 414 }
 415
 416 SDValue
 417 AMDILTargetLowering::LowerMemArgument(
 418     SDValue Chain,
 419     CallingConv::ID CallConv,
 420     const SmallVectorImpl<ISD::InputArg> &Ins,
 421     DebugLoc dl, SelectionDAG &DAG,
 422     const CCValAssign &VA,
 423     MachineFrameInfo *MFI,
 424     unsigned i) const
 425 {
 426   // Create the nodes corresponding to a load from this parameter slot.
 427   ISD::ArgFlagsTy Flags = Ins[i].Flags;
 428
 429   bool AlwaysUseMutable = (CallConv==CallingConv::Fast) &&
 430     getTargetMachine().Options.GuaranteedTailCallOpt;
 431   bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
 432
 433   // FIXME: For now, all byval parameter objects are marked mutable. This can
 434   // be changed with more analysis.
 435   // In case of tail call optimization mark all arguments mutable. Since they
 436   // could be overwritten by lowering of arguments in case of a tail call.
 437   int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
 438       VA.getLocMemOffset(), isImmutable);
 439   SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
 440
 441   if (Flags.isByVal())
 442     return FIN;
 443   return DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
 444       MachinePointerInfo::getFixedStack(FI),
 445       false, false, false, 0);
 446 }
 447 //===----------------------------------------------------------------------===//
 448 // TargetLowering Implementation Help Functions End
 449 //===----------------------------------------------------------------------===//
 450
 451 //===----------------------------------------------------------------------===//
 452 // TargetLowering Class Implementation Begins
 453 //===----------------------------------------------------------------------===//
 454   AMDILTargetLowering::AMDILTargetLowering(TargetMachine &TM)
 455 : TargetLowering(TM, new TargetLoweringObjectFileELF())
 456 {
 457   int types[] =
 458   {
 459     (int)MVT::i8,
 460     (int)MVT::i16,
 461     (int)MVT::i32,
 462     (int)MVT::f32,
 463     (int)MVT::f64,
 464     (int)MVT::i64,
 465     (int)MVT::v2i8,
 466     (int)MVT::v4i8,
 467     (int)MVT::v2i16,
 468     (int)MVT::v4i16,
 469     (int)MVT::v4f32,
 470     (int)MVT::v4i32,
 471     (int)MVT::v2f32,
 472     (int)MVT::v2i32,
 473     (int)MVT::v2f64,
 474     (int)MVT::v2i64
 475   };
 476
 477   int IntTypes[] =
 478   {
 479     (int)MVT::i8,
 480     (int)MVT::i16,
 481     (int)MVT::i32,
 482     (int)MVT::i64
 483   };
 484
 485   int FloatTypes[] =
 486   {
 487     (int)MVT::f32,
 488     (int)MVT::f64
 489   };
 490
 491   int VectorTypes[] =
 492   {
 493     (int)MVT::v2i8,
 494     (int)MVT::v4i8,
 495     (int)MVT::v2i16,
 496     (int)MVT::v4i16,
 497     (int)MVT::v4f32,
 498     (int)MVT::v4i32,
 499     (int)MVT::v2f32,
 500     (int)MVT::v2i32,
 501     (int)MVT::v2f64,
 502     (int)MVT::v2i64
 503   };
 504   size_t numTypes = sizeof(types) / sizeof(*types);
 505   size_t numFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes);
 506   size_t numIntTypes = sizeof(IntTypes) / sizeof(*IntTypes);
 507   size_t numVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes);
 508
 509   const AMDILSubtarget &STM = getTargetMachine().getSubtarget<AMDILSubtarget>();
 510   // These are the current register classes that are
 511   // supported
 512
 513   for (unsigned int x  = 0; x < numTypes; ++x) {
 514     MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
 515
 516     //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
 517     // We cannot sextinreg, expand to shifts
 518     setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
 519     setOperationAction(ISD::SUBE, VT, Expand);
 520     setOperationAction(ISD::SUBC, VT, Expand);
 521     setOperationAction(ISD::ADDE, VT, Expand);
 522     setOperationAction(ISD::ADDC, VT, Expand);
 523     setOperationAction(ISD::SETCC, VT, Custom);
 524     setOperationAction(ISD::BRCOND, VT, Custom);
 525     setOperationAction(ISD::BR_CC, VT, Custom);
 526     setOperationAction(ISD::BR_JT, VT, Expand);
 527     setOperationAction(ISD::BRIND, VT, Expand);
 528     // TODO: Implement custom UREM/SREM routines
 529     setOperationAction(ISD::SREM, VT, Expand);
 530     setOperationAction(ISD::GlobalAddress, VT, Custom);
 531     setOperationAction(ISD::JumpTable, VT, Custom);
 532     setOperationAction(ISD::ConstantPool, VT, Custom);
 533     setOperationAction(ISD::SELECT, VT, Custom);
 534     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
 535     setOperationAction(ISD::UMUL_LOHI, VT, Expand);
 536     if (VT != MVT::i64 && VT != MVT::v2i64) {
 537       setOperationAction(ISD::SDIV, VT, Custom);
 538     }
 539   }
 540   for (unsigned int x = 0; x < numFloatTypes; ++x) {
 541     MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
 542
 543     // IL does not have these operations for floating point types
 544     setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
 545     setOperationAction(ISD::SETOLT, VT, Expand);
 546     setOperationAction(ISD::SETOGE, VT, Expand);
 547     setOperationAction(ISD::SETOGT, VT, Expand);
 548     setOperationAction(ISD::SETOLE, VT, Expand);
 549     setOperationAction(ISD::SETULT, VT, Expand);
 550     setOperationAction(ISD::SETUGE, VT, Expand);
 551     setOperationAction(ISD::SETUGT, VT, Expand);
 552     setOperationAction(ISD::SETULE, VT, Expand);
 553   }
 554
 555   for (unsigned int x = 0; x < numIntTypes; ++x) {
 556     MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
 557
 558     // GPU also does not have divrem function for signed or unsigned
 559     setOperationAction(ISD::SDIVREM, VT, Expand);
 560
 561     // GPU does not have [S|U]MUL_LOHI functions as a single instruction
 562     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
 563     setOperationAction(ISD::UMUL_LOHI, VT, Expand);
 564
 565     // GPU doesn't have a rotl, rotr, or byteswap instruction
 566     setOperationAction(ISD::ROTR, VT, Expand);
 567     setOperationAction(ISD::BSWAP, VT, Expand);
 568
 569     // GPU doesn't have any counting operators
 570     setOperationAction(ISD::CTPOP, VT, Expand);
 571     setOperationAction(ISD::CTTZ, VT, Expand);
 572     setOperationAction(ISD::CTLZ, VT, Expand);
 573   }
 574
 575   for ( unsigned int ii = 0; ii < numVectorTypes; ++ii )
 576   {
 577     MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
 578
 579     setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
 580     setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
 581     setOperationAction(ISD::SDIVREM, VT, Expand);
 582     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
 583     // setOperationAction(ISD::VSETCC, VT, Expand);
 584     setOperationAction(ISD::SETCC, VT, Expand);
 585     setOperationAction(ISD::SELECT_CC, VT, Expand);
 586     setOperationAction(ISD::SELECT, VT, Expand);
 587
 588   }
 589   if (STM.device()->isSupported(AMDILDeviceInfo::LongOps)) {
 590     setOperationAction(ISD::MULHU, MVT::i64, Expand);
 591     setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
 592     setOperationAction(ISD::MULHS, MVT::i64, Expand);
 593     setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
 594     setOperationAction(ISD::ADD, MVT::v2i64, Expand);
 595     setOperationAction(ISD::SREM, MVT::v2i64, Expand);
 596     setOperationAction(ISD::Constant          , MVT::i64  , Legal);
 597     setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
 598     setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
 599     setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
 600     setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
 601     setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
 602   }
 603   if (STM.device()->isSupported(AMDILDeviceInfo::DoubleOps)) {
 604     // we support loading/storing v2f64 but not operations on the type
 605     setOperationAction(ISD::FADD, MVT::v2f64, Expand);
 606     setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
 607     setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
 608     setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
 609     setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
 610     setOperationAction(ISD::ConstantFP        , MVT::f64  , Legal);
 611     // We want to expand vector conversions into their scalar
 612     // counterparts.
 613     setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
 614     setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
 615     setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
 616     setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
 617     setOperationAction(ISD::FABS, MVT::f64, Expand);
 618     setOperationAction(ISD::FABS, MVT::v2f64, Expand);
 619   }
 620   // TODO: Fix the UDIV24 algorithm so it works for these
 621   // types correctly. This needs vector comparisons
 622   // for this to work correctly.
 623   setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
 624   setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
 625   setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
 626   setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
 627   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
 628   setOperationAction(ISD::SUBC, MVT::Other, Expand);
 629   setOperationAction(ISD::ADDE, MVT::Other, Expand);
 630   setOperationAction(ISD::ADDC, MVT::Other, Expand);
 631   setOperationAction(ISD::BRCOND, MVT::Other, Custom);
 632   setOperationAction(ISD::BR_CC, MVT::Other, Custom);
 633   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
 634   setOperationAction(ISD::BRIND, MVT::Other, Expand);
 635   setOperationAction(ISD::SETCC, MVT::Other, Custom);
 636   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
 637
 638   setOperationAction(ISD::BUILD_VECTOR, MVT::Other, Custom);
 639   // Use the default implementation.
 640   setOperationAction(ISD::VAARG             , MVT::Other, Expand);
 641   setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
 642   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
 643   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
 644   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
 645   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Custom);
 646   setOperationAction(ISD::ConstantFP        , MVT::f32    , Legal);
 647   setOperationAction(ISD::Constant          , MVT::i32    , Legal);
 648   setOperationAction(ISD::TRAP              , MVT::Other  , Legal);
 649
 650   setStackPointerRegisterToSaveRestore(AMDGPU::SP);
 651   setSchedulingPreference(Sched::RegPressure);
 652   setPow2DivIsCheap(false);
 653   setPrefLoopAlignment(16);
 654   setSelectIsExpensive(true);
 655   setJumpIsExpensive(true);
 656
 657   maxStoresPerMemcpy  = 4096;
 658   maxStoresPerMemmove = 4096;
 659   maxStoresPerMemset  = 4096;
 660
 661 #undef numTypes
 662 #undef numIntTypes
 663 #undef numVectorTypes
 664 #undef numFloatTypes
 665 }
 666
 667 const char *
 668 AMDILTargetLowering::getTargetNodeName(unsigned Opcode) const
 669 {
 670   switch (Opcode) {
 671     default: return 0;
 672     case AMDILISD::CMOVLOG:  return "AMDILISD::CMOVLOG";
 673     case AMDILISD::MAD:  return "AMDILISD::MAD";
 674     case AMDILISD::CALL:  return "AMDILISD::CALL";
 675     case AMDILISD::SELECT_CC: return "AMDILISD::SELECT_CC";
 676     case AMDILISD::UMUL: return "AMDILISD::UMUL";
 677     case AMDILISD::DIV_INF: return "AMDILISD::DIV_INF";
 678     case AMDILISD::VBUILD: return "AMDILISD::VBUILD";
 679     case AMDILISD::CMP: return "AMDILISD::CMP";
 680     case AMDILISD::IL_CC_I_LT: return "AMDILISD::IL_CC_I_LT";
 681     case AMDILISD::IL_CC_I_LE: return "AMDILISD::IL_CC_I_LE";
 682     case AMDILISD::IL_CC_I_GT: return "AMDILISD::IL_CC_I_GT";
 683     case AMDILISD::IL_CC_I_GE: return "AMDILISD::IL_CC_I_GE";
 684     case AMDILISD::IL_CC_I_EQ: return "AMDILISD::IL_CC_I_EQ";
 685     case AMDILISD::IL_CC_I_NE: return "AMDILISD::IL_CC_I_NE";
 686     case AMDILISD::RET_FLAG: return "AMDILISD::RET_FLAG";
 687     case AMDILISD::BRANCH_COND: return "AMDILISD::BRANCH_COND";
 688
 689   };
 690 }
 691 bool
 692 AMDILTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
 693     const CallInst &I, unsigned Intrinsic) const
 694 {
 695   return false;
 696 }
 697 // The backend supports 32 and 64 bit floating point immediates
 698 bool
 699 AMDILTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const
 700 {
 701   if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
 702       || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
 703     return true;
 704   } else {
 705     return false;
 706   }
 707 }
 708
 709 bool
 710 AMDILTargetLowering::ShouldShrinkFPConstant(EVT VT) const
 711 {
 712   if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
 713       || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
 714     return false;
 715   } else {
 716     return true;
 717   }
 718 }
 719
 720
 721 // isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
 722 // be zero. Op is expected to be a target specific node. Used by DAG
 723 // combiner.
 724
 725 void
 726 AMDILTargetLowering::computeMaskedBitsForTargetNode(
 727     const SDValue Op,
 728     APInt &KnownZero,
 729     APInt &KnownOne,
 730     const SelectionDAG &DAG,
 731     unsigned Depth) const
 732 {
 733   APInt KnownZero2;
 734   APInt KnownOne2;
 735   KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything
 736   switch (Op.getOpcode()) {
 737     default: break;
 738     case AMDILISD::SELECT_CC:
 739              DAG.ComputeMaskedBits(
 740                  Op.getOperand(1),
 741                  KnownZero,
 742                  KnownOne,
 743                  Depth + 1
 744                  );
 745              DAG.ComputeMaskedBits(
 746                  Op.getOperand(0),
 747                  KnownZero2,
 748                  KnownOne2
 749                  );
 750              assert((KnownZero & KnownOne) == 0
 751                  && "Bits known to be one AND zero?");
 752              assert((KnownZero2 & KnownOne2) == 0
 753                  && "Bits known to be one AND zero?");
 754              // Only known if known in both the LHS and RHS
 755              KnownOne &= KnownOne2;
 756              KnownZero &= KnownZero2;
 757              break;
 758   };
 759 }
 760
 761 // This is the function that determines which calling convention should
 762 // be used. Currently there is only one calling convention
 763 CCAssignFn*
 764 AMDILTargetLowering::CCAssignFnForNode(unsigned int Op) const
 765 {
 766   //uint64_t CC = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
 767   return CC_AMDIL32;
 768 }
 769
 770 // LowerCallResult - Lower the result values of an ISD::CALL into the
 771 // appropriate copies out of appropriate physical registers.  This assumes that
 772 // Chain/InFlag are the input chain/flag to use, and that TheCall is the call
 773 // being lowered.  The returns a SDNode with the same number of values as the
 774 // ISD::CALL.
 775 SDValue
 776 AMDILTargetLowering::LowerCallResult(
 777     SDValue Chain,
 778     SDValue InFlag,
 779     CallingConv::ID CallConv,
 780     bool isVarArg,
 781     const SmallVectorImpl<ISD::InputArg> &Ins,
 782     DebugLoc dl,
 783     SelectionDAG &DAG,
 784     SmallVectorImpl<SDValue> &InVals) const
 785 {
 786   // Assign locations to each value returned by this call
 787   SmallVector<CCValAssign, 16> RVLocs;
 788   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
 789                  getTargetMachine(), RVLocs, *DAG.getContext());
 790   CCInfo.AnalyzeCallResult(Ins, RetCC_AMDIL32);
 791
 792   // Copy all of the result registers out of their specified physreg.
 793   for (unsigned i = 0; i != RVLocs.size(); ++i) {
 794     EVT CopyVT = RVLocs[i].getValVT();
 795     if (RVLocs[i].isRegLoc()) {
 796       Chain = DAG.getCopyFromReg(
 797           Chain,
 798           dl,
 799           RVLocs[i].getLocReg(),
 800           CopyVT,
 801           InFlag
 802           ).getValue(1);
 803       SDValue Val = Chain.getValue(0);
 804       InFlag = Chain.getValue(2);
 805       InVals.push_back(Val);
 806     }
 807   }
 808
 809   return Chain;
 810
 811 }
 812
 813 //===----------------------------------------------------------------------===//
 814 //                           Other Lowering Hooks
 815 //===----------------------------------------------------------------------===//
 816
 817 // Recursively assign SDNodeOrdering to any unordered nodes
 818 // This is necessary to maintain source ordering of instructions
 819 // under -O0 to avoid odd-looking "skipping around" issues.
 820   static const SDValue
 821 Ordered( SelectionDAG &DAG, unsigned order, const SDValue New )
 822 {
 823   if (order != 0 && DAG.GetOrdering( New.getNode() ) == 0) {
 824     DAG.AssignOrdering( New.getNode(), order );
 825     for (unsigned i = 0, e = New.getNumOperands(); i < e; ++i)
 826       Ordered( DAG, order, New.getOperand(i) );
 827   }
 828   return New;
 829 }
 830
 831 #define LOWER(A) \
 832   case ISD:: A: \
 833 return Ordered( DAG, DAG.GetOrdering( Op.getNode() ), Lower##A(Op, DAG) )
 834
 835 SDValue
 836 AMDILTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
 837 {
 838   switch (Op.getOpcode()) {
 839     default:
 840       Op.getNode()->dump();
 841       assert(0 && "Custom lowering code for this"
 842           "instruction is not implemented yet!");
 843       break;
 844       LOWER(GlobalAddress);
 845       LOWER(JumpTable);
 846       LOWER(ConstantPool);
 847       LOWER(ExternalSymbol);
 848       LOWER(SDIV);
 849       LOWER(SREM);
 850       LOWER(BUILD_VECTOR);
 851       LOWER(SELECT);
 852       LOWER(SETCC);
 853       LOWER(SIGN_EXTEND_INREG);
 854       LOWER(DYNAMIC_STACKALLOC);
 855       LOWER(BRCOND);
 856       LOWER(BR_CC);
 857   }
 858   return Op;
 859 }
 860
 861 #undef LOWER
 862
 863 SDValue
 864 AMDILTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const
 865 {
 866   SDValue DST = Op;
 867   const GlobalAddressSDNode *GADN = cast<GlobalAddressSDNode>(Op);
 868   const GlobalValue *G = GADN->getGlobal();
 869   DebugLoc DL = Op.getDebugLoc();
 870   const GlobalVariable *GV = dyn_cast<GlobalVariable>(G);
 871   if (!GV) {
 872     DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
 873   } else {
 874     if (GV->hasInitializer()) {
 875       const Constant *C = dyn_cast<Constant>(GV->getInitializer());
 876       if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
 877         DST = DAG.getConstant(CI->getValue(), Op.getValueType());
 878       } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(C)) {
 879         DST = DAG.getConstantFP(CF->getValueAPF(),
 880             Op.getValueType());
 881       } else if (dyn_cast<ConstantAggregateZero>(C)) {
 882         EVT VT = Op.getValueType();
 883         if (VT.isInteger()) {
 884           DST = DAG.getConstant(0, VT);
 885         } else {
 886           DST = DAG.getConstantFP(0, VT);
 887         }
 888       } else {
 889         assert(!"lowering this type of Global Address "
 890             "not implemented yet!");
 891         C->dump();
 892         DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
 893       }
 894     } else {
 895       DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
 896     }
 897   }
 898   return DST;
 899 }
 900
 901 SDValue
 902 AMDILTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const
 903 {
 904   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
 905   SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32);
 906   return Result;
 907 }
 908 SDValue
 909 AMDILTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
 910 {
 911   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
 912   EVT PtrVT = Op.getValueType();
 913   SDValue Result;
 914   if (CP->isMachineConstantPoolEntry()) {
 915     Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
 916         CP->getAlignment(), CP->getOffset(), CP->getTargetFlags());
 917   } else {
 918     Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
 919         CP->getAlignment(), CP->getOffset(), CP->getTargetFlags());
 920   }
 921   return Result;
 922 }
 923
 924 SDValue
 925 AMDILTargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const
 926 {
 927   const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
 928   SDValue Result = DAG.getTargetExternalSymbol(Sym, MVT::i32);
 929   return Result;
 930 }
 931
 932 /// LowerFORMAL_ARGUMENTS - transform physical registers into
 933 /// virtual registers and generate load operations for
 934 /// arguments places on the stack.
 935 /// TODO: isVarArg, hasStructRet, isMemReg
 936   SDValue
 937 AMDILTargetLowering::LowerFormalArguments(SDValue Chain,
 938     CallingConv::ID CallConv,
 939     bool isVarArg,
 940     const SmallVectorImpl<ISD::InputArg> &Ins,
 941     DebugLoc dl,
 942     SelectionDAG &DAG,
 943     SmallVectorImpl<SDValue> &InVals)
 944 const
 945 {
 946
 947   MachineFunction &MF = DAG.getMachineFunction();
 948   MachineFrameInfo *MFI = MF.getFrameInfo();
 949   //const Function *Fn = MF.getFunction();
 950   //MachineRegisterInfo &RegInfo = MF.getRegInfo();
 951
 952   SmallVector<CCValAssign, 16> ArgLocs;
 953   CallingConv::ID CC = MF.getFunction()->getCallingConv();
 954   //bool hasStructRet = MF.getFunction()->hasStructRetAttr();
 955
 956   CCState CCInfo(CC, isVarArg, DAG.getMachineFunction(),
 957                  getTargetMachine(), ArgLocs, *DAG.getContext());
 958
 959   // When more calling conventions are added, they need to be chosen here
 960   CCInfo.AnalyzeFormalArguments(Ins, CC_AMDIL32);
 961   SDValue StackPtr;
 962
 963   //unsigned int FirstStackArgLoc = 0;
 964
 965   for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) {
 966     CCValAssign &VA = ArgLocs[i];
 967     if (VA.isRegLoc()) {
 968       EVT RegVT = VA.getLocVT();
 969       const TargetRegisterClass *RC = getRegClassFor(
 970           RegVT.getSimpleVT().SimpleTy);
 971
 972       unsigned int Reg = MF.addLiveIn(VA.getLocReg(), RC);
 973       SDValue ArgValue = DAG.getCopyFromReg(
 974           Chain,
 975           dl,
 976           Reg,
 977           RegVT);
 978       // If this is an 8 or 16-bit value, it is really passed
 979       // promoted to 32 bits.  Insert an assert[sz]ext to capture
 980       // this, then truncate to the right size.
 981
 982       if (VA.getLocInfo() == CCValAssign::SExt) {
 983         ArgValue = DAG.getNode(
 984             ISD::AssertSext,
 985             dl,
 986             RegVT,
 987             ArgValue,
 988             DAG.getValueType(VA.getValVT()));
 989       } else if (VA.getLocInfo() == CCValAssign::ZExt) {
 990         ArgValue = DAG.getNode(
 991             ISD::AssertZext,
 992             dl,
 993             RegVT,
 994             ArgValue,
 995             DAG.getValueType(VA.getValVT()));
 996       }
 997       if (VA.getLocInfo() != CCValAssign::Full) {
 998         ArgValue = DAG.getNode(
 999             ISD::TRUNCATE,
1000             dl,
1001             VA.getValVT(),
1002             ArgValue);
1003       }
1004       // Add the value to the list of arguments
1005       // to be passed in registers
1006       InVals.push_back(ArgValue);
1007       if (isVarArg) {
1008         assert(0 && "Variable arguments are not yet supported");
1009         // See MipsISelLowering.cpp for ideas on how to implement
1010       }
1011     } else if(VA.isMemLoc()) {
1012       InVals.push_back(LowerMemArgument(Chain, CallConv, Ins,
1013             dl, DAG, VA, MFI, i));
1014     } else {
1015       assert(0 && "found a Value Assign that is "
1016           "neither a register or a memory location");
1017     }
1018   }
1019   /*if (hasStructRet) {
1020     assert(0 && "Has struct return is not yet implemented");
1021   // See MipsISelLowering.cpp for ideas on how to implement
1022   }*/
1023
1024   if (isVarArg) {
1025     assert(0 && "Variable arguments are not yet supported");
1026     // See X86/PPC/CellSPU ISelLowering.cpp for ideas on how to implement
1027   }
1028   // This needs to be changed to non-zero if the return function needs
1029   // to pop bytes
1030   return Chain;
1031 }
1032 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
1033 /// by "Src" to address "Dst" with size and alignment information specified by
1034 /// the specific parameter attribute. The copy will be passed as a byval
1035 /// function parameter.
1036 static SDValue
1037 CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
1038     ISD::ArgFlagsTy Flags, SelectionDAG &DAG) {
1039   assert(0 && "MemCopy does not exist yet");
1040   SDValue SizeNode     = DAG.getConstant(Flags.getByValSize(), MVT::i32);
1041
1042   return DAG.getMemcpy(Chain,
1043       Src.getDebugLoc(),
1044       Dst, Src, SizeNode, Flags.getByValAlign(),
1045       /*IsVol=*/false, /*AlwaysInline=*/true,
1046       MachinePointerInfo(), MachinePointerInfo());
1047 }
1048
1049 SDValue
1050 AMDILTargetLowering::LowerMemOpCallTo(SDValue Chain,
1051     SDValue StackPtr, SDValue Arg,
1052     DebugLoc dl, SelectionDAG &DAG,
1053     const CCValAssign &VA,
1054     ISD::ArgFlagsTy Flags) const
1055 {
1056   unsigned int LocMemOffset = VA.getLocMemOffset();
1057   SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
1058   PtrOff = DAG.getNode(ISD::ADD,
1059       dl,
1060       getPointerTy(), StackPtr, PtrOff);
1061   if (Flags.isByVal()) {
1062     PtrOff = CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG);
1063   } else {
1064     PtrOff = DAG.getStore(Chain, dl, Arg, PtrOff,
1065         MachinePointerInfo::getStack(LocMemOffset),
1066         false, false, 0);
1067   }
1068   return PtrOff;
1069 }
1070 /// LowerCAL - functions arguments are copied from virtual
1071 /// regs to (physical regs)/(stack frame), CALLSEQ_START and
1072 /// CALLSEQ_END are emitted.
1073 /// TODO: isVarArg, isTailCall, hasStructRet
1074 SDValue
1075 AMDILTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
1076     CallingConv::ID CallConv, bool isVarArg, bool doesNotRet,
1077     bool& isTailCall,
1078     const SmallVectorImpl<ISD::OutputArg> &Outs,
1079     const SmallVectorImpl<SDValue> &OutVals,
1080     const SmallVectorImpl<ISD::InputArg> &Ins,
1081     DebugLoc dl, SelectionDAG &DAG,
1082     SmallVectorImpl<SDValue> &InVals)
1083 const
1084 {
1085   isTailCall = false;
1086   MachineFunction& MF = DAG.getMachineFunction();
1087   // FIXME: DO we need to handle fast calling conventions and tail call
1088   // optimizations?? X86/PPC ISelLowering
1089   /*bool hasStructRet = (TheCall->getNumArgs())
1090     ? TheCall->getArgFlags(0).device()->isSRet()
1091     : false;*/
1092
1093   MachineFrameInfo *MFI = MF.getFrameInfo();
1094
1095   // Analyze operands of the call, assigning locations to each operand
1096   SmallVector<CCValAssign, 16> ArgLocs;
1097   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1098                  getTargetMachine(), ArgLocs, *DAG.getContext());
1099   // Analyize the calling operands, but need to change
1100   // if we have more than one calling convetion
1101   CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv));
1102
1103   unsigned int NumBytes = CCInfo.getNextStackOffset();
1104   if (isTailCall) {
1105     assert(isTailCall && "Tail Call not handled yet!");
1106     // See X86/PPC ISelLowering
1107   }
1108
1109   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
1110
1111   SmallVector<std::pair<unsigned int, SDValue>, 8> RegsToPass;
1112   SmallVector<SDValue, 8> MemOpChains;
1113   SDValue StackPtr;
1114   //unsigned int FirstStacArgLoc = 0;
1115   //int LastArgStackLoc = 0;
1116
1117   // Walk the register/memloc assignments, insert copies/loads
1118   for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) {
1119     CCValAssign &VA = ArgLocs[i];
1120     //bool isByVal = Flags.isByVal(); // handle byval/bypointer registers
1121     // Arguments start after the 5 first operands of ISD::CALL
1122     SDValue Arg = OutVals[i];
1123     //Promote the value if needed
1124     switch(VA.getLocInfo()) {
1125       default: assert(0 && "Unknown loc info!");
1126       case CCValAssign::Full:
1127                break;
1128       case CCValAssign::SExt:
1129                Arg = DAG.getNode(ISD::SIGN_EXTEND,
1130                    dl,
1131                    VA.getLocVT(), Arg);
1132                break;
1133       case CCValAssign::ZExt:
1134                Arg = DAG.getNode(ISD::ZERO_EXTEND,
1135                    dl,
1136                    VA.getLocVT(), Arg);
1137                break;
1138       case CCValAssign::AExt:
1139                Arg = DAG.getNode(ISD::ANY_EXTEND,
1140                    dl,
1141                    VA.getLocVT(), Arg);
1142                break;
1143     }
1144
1145     if (VA.isRegLoc()) {
1146       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1147     } else if (VA.isMemLoc()) {
1148       // Create the frame index object for this incoming parameter
1149       int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
1150           VA.getLocMemOffset(), true);
1151       SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy());
1152
1153       // emit ISD::STORE whichs stores the
1154       // parameter value to a stack Location
1155       MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
1156             MachinePointerInfo::getFixedStack(FI),
1157             false, false, 0));
1158     } else {
1159       assert(0 && "Not a Reg/Mem Loc, major error!");
1160     }
1161   }
1162   if (!MemOpChains.empty()) {
1163     Chain = DAG.getNode(ISD::TokenFactor,
1164         dl,
1165         MVT::Other,
1166         &MemOpChains[0],
1167         MemOpChains.size());
1168   }
1169   SDValue InFlag;
1170   if (!isTailCall) {
1171     for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) {
1172       Chain = DAG.getCopyToReg(Chain,
1173           dl,
1174           RegsToPass[i].first,
1175           RegsToPass[i].second,
1176           InFlag);
1177       InFlag = Chain.getValue(1);
1178     }
1179   }
1180
1181   // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
1182   // every direct call is) turn it into a TargetGlobalAddress/
1183   // TargetExternalSymbol
1184   // node so that legalize doesn't hack it.
1185   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))  {
1186     Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy());
1187   }
1188   else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1189     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
1190   }
1191   else if (isTailCall) {
1192     assert(0 && "Tail calls are not handled yet");
1193     // see X86 ISelLowering for ideas on implementation: 1708
1194   }
1195
1196   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVTGLUE);
1197   SmallVector<SDValue, 8> Ops;
1198
1199   if (isTailCall) {
1200     assert(0 && "Tail calls are not handled yet");
1201     // see X86 ISelLowering for ideas on implementation: 1721
1202   }
1203   // If this is a direct call, pass the chain and the callee
1204   if (Callee.getNode()) {
1205     Ops.push_back(Chain);
1206     Ops.push_back(Callee);
1207   }
1208
1209   if (isTailCall) {
1210     assert(0 && "Tail calls are not handled yet");
1211     // see X86 ISelLowering for ideas on implementation: 1739
1212   }
1213
1214   // Add argument registers to the end of the list so that they are known
1215   // live into the call
1216   for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) {
1217     Ops.push_back(DAG.getRegister(
1218           RegsToPass[i].first,
1219           RegsToPass[i].second.getValueType()));
1220   }
1221   if (InFlag.getNode()) {
1222     Ops.push_back(InFlag);
1223   }
1224
1225   // Emit Tail Call
1226   if (isTailCall) {
1227     assert(0 && "Tail calls are not handled yet");
1228     // see X86 ISelLowering for ideas on implementation: 1762
1229   }
1230
1231   Chain = DAG.getNode(AMDILISD::CALL,
1232       dl,
1233       NodeTys, &Ops[0], Ops.size());
1234   InFlag = Chain.getValue(1);
1235
1236   // Create the CALLSEQ_END node
1237   Chain = DAG.getCALLSEQ_END(
1238       Chain,
1239       DAG.getIntPtrConstant(NumBytes, true),
1240       DAG.getIntPtrConstant(0, true),
1241       InFlag);
1242   InFlag = Chain.getValue(1);
1243   // Handle result values, copying them out of physregs into vregs that
1244   // we return
1245   return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
1246       InVals);
1247 }
1248
1249 SDValue
1250 AMDILTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const
1251 {
1252   EVT OVT = Op.getValueType();
1253   SDValue DST;
1254   if (OVT.getScalarType() == MVT::i64) {
1255     DST = LowerSDIV64(Op, DAG);
1256   } else if (OVT.getScalarType() == MVT::i32) {
1257     DST = LowerSDIV32(Op, DAG);
1258   } else if (OVT.getScalarType() == MVT::i16
1259       || OVT.getScalarType() == MVT::i8) {
1260     DST = LowerSDIV24(Op, DAG);
1261   } else {
1262     DST = SDValue(Op.getNode(), 0);
1263   }
1264   return DST;
1265 }
1266
1267 SDValue
1268 AMDILTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const
1269 {
1270   EVT OVT = Op.getValueType();
1271   SDValue DST;
1272   if (OVT.getScalarType() == MVT::i64) {
1273     DST = LowerSREM64(Op, DAG);
1274   } else if (OVT.getScalarType() == MVT::i32) {
1275     DST = LowerSREM32(Op, DAG);
1276   } else if (OVT.getScalarType() == MVT::i16) {
1277     DST = LowerSREM16(Op, DAG);
1278   } else if (OVT.getScalarType() == MVT::i8) {
1279     DST = LowerSREM8(Op, DAG);
1280   } else {
1281     DST = SDValue(Op.getNode(), 0);
1282   }
1283   return DST;
1284 }
1285
1286 SDValue
1287 AMDILTargetLowering::LowerBUILD_VECTOR( SDValue Op, SelectionDAG &DAG ) const
1288 {
1289   EVT VT = Op.getValueType();
1290   SDValue Nodes1;
1291   SDValue second;
1292   SDValue third;
1293   SDValue fourth;
1294   DebugLoc DL = Op.getDebugLoc();
1295   Nodes1 = DAG.getNode(AMDILISD::VBUILD,
1296       DL,
1297       VT, Op.getOperand(0));
1298 #if 0
1299   bool allEqual = true;
1300   for (unsigned x = 1, y = Op.getNumOperands(); x < y; ++x) {
1301     if (Op.getOperand(0) != Op.getOperand(x)) {
1302       allEqual = false;
1303       break;
1304     }
1305   }
1306   if (allEqual) {
1307     return Nodes1;
1308   }
1309 #endif
1310   switch(Op.getNumOperands()) {
1311     default:
1312     case 1:
1313       break;
1314     case 4:
1315       fourth = Op.getOperand(3);
1316       if (fourth.getOpcode() != ISD::UNDEF) {
1317         Nodes1 = DAG.getNode(
1318             ISD::INSERT_VECTOR_ELT,
1319             DL,
1320             Op.getValueType(),
1321             Nodes1,
1322             fourth,
1323             DAG.getConstant(7, MVT::i32));
1324       }
1325     case 3:
1326       third = Op.getOperand(2);
1327       if (third.getOpcode() != ISD::UNDEF) {
1328         Nodes1 = DAG.getNode(
1329             ISD::INSERT_VECTOR_ELT,
1330             DL,
1331             Op.getValueType(),
1332             Nodes1,
1333             third,
1334             DAG.getConstant(6, MVT::i32));
1335       }
1336     case 2:
1337       second = Op.getOperand(1);
1338       if (second.getOpcode() != ISD::UNDEF) {
1339         Nodes1 = DAG.getNode(
1340             ISD::INSERT_VECTOR_ELT,
1341             DL,
1342             Op.getValueType(),
1343             Nodes1,
1344             second,
1345             DAG.getConstant(5, MVT::i32));
1346       }
1347       break;
1348   };
1349   return Nodes1;
1350 }
1351
1352 SDValue
1353 AMDILTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const
1354 {
1355   SDValue Cond = Op.getOperand(0);
1356   SDValue LHS = Op.getOperand(1);
1357   SDValue RHS = Op.getOperand(2);
1358   DebugLoc DL = Op.getDebugLoc();
1359   Cond = getConversionNode(DAG, Cond, Op, true);
1360   Cond = DAG.getNode(AMDILISD::CMOVLOG,
1361       DL,
1362       Op.getValueType(), Cond, LHS, RHS);
1363   return Cond;
1364 }
1365 SDValue
1366 AMDILTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const
1367 {
1368   SDValue Cond;
1369   SDValue LHS = Op.getOperand(0);
1370   SDValue RHS = Op.getOperand(1);
1371   SDValue CC  = Op.getOperand(2);
1372   DebugLoc DL = Op.getDebugLoc();
1373   ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
1374   unsigned int AMDILCC = CondCCodeToCC(
1375       SetCCOpcode,
1376       LHS.getValueType().getSimpleVT().SimpleTy);
1377   assert((AMDILCC != AMDILCC::COND_ERROR) && "Invalid SetCC!");
1378   Cond = DAG.getNode(
1379       ISD::SELECT_CC,
1380       Op.getDebugLoc(),
1381       MVT::i32,
1382       LHS, RHS,
1383       DAG.getConstant(-1, MVT::i32),
1384       DAG.getConstant(0, MVT::i32),
1385       CC);
1386   Cond = getConversionNode(DAG, Cond, Op, true);
1387   Cond = DAG.getNode(
1388       ISD::AND,
1389       DL,
1390       Cond.getValueType(),
1391       DAG.getConstant(1, Cond.getValueType()),
1392       Cond);
1393   return Cond;
1394 }
1395
1396 SDValue
1397 AMDILTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const
1398 {
1399   SDValue Data = Op.getOperand(0);
1400   VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
1401   DebugLoc DL = Op.getDebugLoc();
1402   EVT DVT = Data.getValueType();
1403   EVT BVT = BaseType->getVT();
1404   unsigned baseBits = BVT.getScalarType().getSizeInBits();
1405   unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;
1406   unsigned shiftBits = srcBits - baseBits;
1407   if (srcBits < 32) {
1408     // If the op is less than 32 bits, then it needs to extend to 32bits
1409     // so it can properly keep the upper bits valid.
1410     EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);
1411     Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);
1412     shiftBits = 32 - baseBits;
1413     DVT = IVT;
1414   }
1415   SDValue Shift = DAG.getConstant(shiftBits, DVT);
1416   // Shift left by 'Shift' bits.
1417   Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);
1418   // Signed shift Right by 'Shift' bits.
1419   Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);
1420   if (srcBits < 32) {
1421     // Once the sign extension is done, the op needs to be converted to
1422     // its original type.
1423     Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());
1424   }
1425   return Data;
1426 }
1427 EVT
1428 AMDILTargetLowering::genIntType(uint32_t size, uint32_t numEle) const
1429 {
1430   int iSize = (size * numEle);
1431   int vEle = (iSize >> ((size == 64) ? 6 : 5));
1432   if (!vEle) {
1433     vEle = 1;
1434   }
1435   if (size == 64) {
1436     if (vEle == 1) {
1437       return EVT(MVT::i64);
1438     } else {
1439       return EVT(MVT::getVectorVT(MVT::i64, vEle));
1440     }
1441   } else {
1442     if (vEle == 1) {
1443       return EVT(MVT::i32);
1444     } else {
1445       return EVT(MVT::getVectorVT(MVT::i32, vEle));
1446     }
1447   }
1448 }
1449
1450 SDValue
1451 AMDILTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
1452     SelectionDAG &DAG) const
1453 {
1454   SDValue Chain = Op.getOperand(0);
1455   SDValue Size = Op.getOperand(1);
1456   unsigned int SPReg = AMDGPU::SP;
1457   DebugLoc DL = Op.getDebugLoc();
1458   SDValue SP = DAG.getCopyFromReg(Chain,
1459       DL,
1460       SPReg, MVT::i32);
1461   SDValue NewSP = DAG.getNode(ISD::ADD,
1462       DL,
1463       MVT::i32, SP, Size);
1464   Chain = DAG.getCopyToReg(SP.getValue(1),
1465       DL,
1466       SPReg, NewSP);
1467   SDValue Ops[2] = {NewSP, Chain};
1468   Chain = DAG.getMergeValues(Ops, 2 ,DL);
1469   return Chain;
1470 }
1471 SDValue
1472 AMDILTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
1473 {
1474   SDValue Chain = Op.getOperand(0);
1475   SDValue Cond  = Op.getOperand(1);
1476   SDValue Jump  = Op.getOperand(2);
1477   SDValue Result;
1478   Result = DAG.getNode(
1479       AMDILISD::BRANCH_COND,
1480       Op.getDebugLoc(),
1481       Op.getValueType(),
1482       Chain, Jump, Cond);
1483   return Result;
1484 }
1485
1486 SDValue
1487 AMDILTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const
1488 {
1489   SDValue Chain = Op.getOperand(0);
1490   SDValue CC = Op.getOperand(1);
1491   SDValue LHS   = Op.getOperand(2);
1492   SDValue RHS   = Op.getOperand(3);
1493   SDValue JumpT  = Op.getOperand(4);
1494   SDValue CmpValue;
1495   SDValue Result;
1496   CmpValue = DAG.getNode(
1497       ISD::SELECT_CC,
1498       Op.getDebugLoc(),
1499       MVT::i32,
1500       LHS, RHS,
1501       DAG.getConstant(-1, MVT::i32),
1502       DAG.getConstant(0, MVT::i32),
1503       CC);
1504   Result = DAG.getNode(
1505       AMDILISD::BRANCH_COND,
1506       CmpValue.getDebugLoc(),
1507       MVT::Other, Chain,
1508       JumpT, CmpValue);
1509   return Result;
1510 }
1511
1512 // LowerRET - Lower an ISD::RET node.
1513 SDValue
1514 AMDILTargetLowering::LowerReturn(SDValue Chain,
1515     CallingConv::ID CallConv, bool isVarArg,
1516     const SmallVectorImpl<ISD::OutputArg> &Outs,
1517     const SmallVectorImpl<SDValue> &OutVals,
1518     DebugLoc dl, SelectionDAG &DAG)
1519 const
1520 {
1521   //MachineFunction& MF = DAG.getMachineFunction();
1522   // CCValAssign - represent the assignment of the return value
1523   // to a location
1524   SmallVector<CCValAssign, 16> RVLocs;
1525
1526   // CCState - Info about the registers and stack slot
1527   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1528                  getTargetMachine(), RVLocs, *DAG.getContext());
1529
1530   // Analyze return values of ISD::RET
1531   CCInfo.AnalyzeReturn(Outs, RetCC_AMDIL32);
1532   // If this is the first return lowered for this function, add
1533   // the regs to the liveout set for the function
1534   MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
1535   for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) {
1536     if (RVLocs[i].isRegLoc() && !MRI.isLiveOut(RVLocs[i].getLocReg())) {
1537       MRI.addLiveOut(RVLocs[i].getLocReg());
1538     }
1539   }
1540   // FIXME: implement this when tail call is implemented
1541   // Chain = GetPossiblePreceedingTailCall(Chain, AMDILISD::TAILCALL);
1542   // both x86 and ppc implement this in ISelLowering
1543
1544   // Regular return here
1545   SDValue Flag;
1546   SmallVector<SDValue, 6> RetOps;
1547   RetOps.push_back(Chain);
1548   RetOps.push_back(DAG.getConstant(0/*getBytesToPopOnReturn()*/, MVT::i32));
1549   for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) {
1550     CCValAssign &VA = RVLocs[i];
1551     SDValue ValToCopy = OutVals[i];
1552     assert(VA.isRegLoc() && "Can only return in registers!");
1553     // ISD::Ret => ret chain, (regnum1, val1), ...
1554     // So i * 2 + 1 index only the regnums
1555     Chain = DAG.getCopyToReg(Chain,
1556         dl,
1557         VA.getLocReg(),
1558         ValToCopy,
1559         Flag);
1560     // guarantee that all emitted copies are stuck together
1561     // avoiding something bad
1562     Flag = Chain.getValue(1);
1563   }
1564   /*if (MF.getFunction()->hasStructRetAttr()) {
1565     assert(0 && "Struct returns are not yet implemented!");
1566   // Both MIPS and X86 have this
1567   }*/
1568   RetOps[0] = Chain;
1569   if (Flag.getNode())
1570     RetOps.push_back(Flag);
1571
1572   Flag = DAG.getNode(AMDILISD::RET_FLAG,
1573       dl,
1574       MVT::Other, &RetOps[0], RetOps.size());
1575   return Flag;
1576 }
1577
1578 unsigned int
1579 AMDILTargetLowering::getFunctionAlignment(const Function *) const
1580 {
1581   return 0;
1582 }
1583
1584 SDValue
1585 AMDILTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const
1586 {
1587   DebugLoc DL = Op.getDebugLoc();
1588   EVT OVT = Op.getValueType();
1589   SDValue LHS = Op.getOperand(0);
1590   SDValue RHS = Op.getOperand(1);
1591   MVT INTTY;
1592   MVT FLTTY;
1593   if (!OVT.isVector()) {
1594     INTTY = MVT::i32;
1595     FLTTY = MVT::f32;
1596   } else if (OVT.getVectorNumElements() == 2) {
1597     INTTY = MVT::v2i32;
1598     FLTTY = MVT::v2f32;
1599   } else if (OVT.getVectorNumElements() == 4) {
1600     INTTY = MVT::v4i32;
1601     FLTTY = MVT::v4f32;
1602   }
1603   unsigned bitsize = OVT.getScalarType().getSizeInBits();
1604   // char|short jq = ia ^ ib;
1605   SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
1606
1607   // jq = jq >> (bitsize - 2)
1608   jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
1609
1610   // jq = jq | 0x1
1611   jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
1612
1613   // jq = (int)jq
1614   jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
1615
1616   // int ia = (int)LHS;
1617   SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
1618
1619   // int ib, (int)RHS;
1620   SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
1621
1622   // float fa = (float)ia;
1623   SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
1624
1625   // float fb = (float)ib;
1626   SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
1627
1628   // float fq = native_divide(fa, fb);
1629   SDValue fq = DAG.getNode(AMDILISD::DIV_INF, DL, FLTTY, fa, fb);
1630
1631   // fq = trunc(fq);
1632   fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
1633
1634   // float fqneg = -fq;
1635   SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
1636
1637   // float fr = mad(fqneg, fb, fa);
1638   SDValue fr = DAG.getNode(AMDILISD::MAD, DL, FLTTY, fqneg, fb, fa);
1639
1640   // int iq = (int)fq;
1641   SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
1642
1643   // fr = fabs(fr);
1644   fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
1645
1646   // fb = fabs(fb);
1647   fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
1648
1649   // int cv = fr >= fb;
1650   SDValue cv;
1651   if (INTTY == MVT::i32) {
1652     cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
1653   } else {
1654     cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
1655   }
1656   // jq = (cv ? jq : 0);
1657   jq = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, cv, jq,
1658       DAG.getConstant(0, OVT));
1659   // dst = iq + jq;
1660   iq = DAG.getSExtOrTrunc(iq, DL, OVT);
1661   iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
1662   return iq;
1663 }
1664
1665 SDValue
1666 AMDILTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const
1667 {
1668   DebugLoc DL = Op.getDebugLoc();
1669   EVT OVT = Op.getValueType();
1670   SDValue LHS = Op.getOperand(0);
1671   SDValue RHS = Op.getOperand(1);
1672   // The LowerSDIV32 function generates equivalent to the following IL.
1673   // mov r0, LHS
1674   // mov r1, RHS
1675   // ilt r10, r0, 0
1676   // ilt r11, r1, 0
1677   // iadd r0, r0, r10
1678   // iadd r1, r1, r11
1679   // ixor r0, r0, r10
1680   // ixor r1, r1, r11
1681   // udiv r0, r0, r1
1682   // ixor r10, r10, r11
1683   // iadd r0, r0, r10
1684   // ixor DST, r0, r10
1685
1686   // mov r0, LHS
1687   SDValue r0 = LHS;
1688
1689   // mov r1, RHS
1690   SDValue r1 = RHS;
1691
1692   // ilt r10, r0, 0
1693   SDValue r10 = DAG.getSelectCC(DL,
1694       r0, DAG.getConstant(0, OVT),
1695       DAG.getConstant(-1, MVT::i32),
1696       DAG.getConstant(0, MVT::i32),
1697       ISD::SETLT);
1698
1699   // ilt r11, r1, 0
1700   SDValue r11 = DAG.getSelectCC(DL,
1701       r1, DAG.getConstant(0, OVT),
1702       DAG.getConstant(-1, MVT::i32),
1703       DAG.getConstant(0, MVT::i32),
1704       ISD::SETLT);
1705
1706   // iadd r0, r0, r10
1707   r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
1708
1709   // iadd r1, r1, r11
1710   r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
1711
1712   // ixor r0, r0, r10
1713   r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
1714
1715   // ixor r1, r1, r11
1716   r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
1717
1718   // udiv r0, r0, r1
1719   r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
1720
1721   // ixor r10, r10, r11
1722   r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
1723
1724   // iadd r0, r0, r10
1725   r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
1726
1727   // ixor DST, r0, r10
1728   SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
1729   return DST;
1730 }
1731
1732 SDValue
1733 AMDILTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const
1734 {
1735   return SDValue(Op.getNode(), 0);
1736 }
1737
1738 SDValue
1739 AMDILTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const
1740 {
1741   DebugLoc DL = Op.getDebugLoc();
1742   EVT OVT = Op.getValueType();
1743   MVT INTTY = MVT::i32;
1744   if (OVT == MVT::v2i8) {
1745     INTTY = MVT::v2i32;
1746   } else if (OVT == MVT::v4i8) {
1747     INTTY = MVT::v4i32;
1748   }
1749   SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
1750   SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
1751   LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
1752   LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
1753   return LHS;
1754 }
1755
1756 SDValue
1757 AMDILTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const
1758 {
1759   DebugLoc DL = Op.getDebugLoc();
1760   EVT OVT = Op.getValueType();
1761   MVT INTTY = MVT::i32;
1762   if (OVT == MVT::v2i16) {
1763     INTTY = MVT::v2i32;
1764   } else if (OVT == MVT::v4i16) {
1765     INTTY = MVT::v4i32;
1766   }
1767   SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
1768   SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
1769   LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
1770   LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
1771   return LHS;
1772 }
1773
1774 SDValue
1775 AMDILTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const
1776 {
1777   DebugLoc DL = Op.getDebugLoc();
1778   EVT OVT = Op.getValueType();
1779   SDValue LHS = Op.getOperand(0);
1780   SDValue RHS = Op.getOperand(1);
1781   // The LowerSREM32 function generates equivalent to the following IL.
1782   // mov r0, LHS
1783   // mov r1, RHS
1784   // ilt r10, r0, 0
1785   // ilt r11, r1, 0
1786   // iadd r0, r0, r10
1787   // iadd r1, r1, r11
1788   // ixor r0, r0, r10
1789   // ixor r1, r1, r11
1790   // udiv r20, r0, r1
1791   // umul r20, r20, r1
1792   // sub r0, r0, r20
1793   // iadd r0, r0, r10
1794   // ixor DST, r0, r10
1795
1796   // mov r0, LHS
1797   SDValue r0 = LHS;
1798
1799   // mov r1, RHS
1800   SDValue r1 = RHS;
1801
1802   // ilt r10, r0, 0
1803   SDValue r10 = DAG.getNode(AMDILISD::CMP, DL, OVT,
1804       DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
1805       r0, DAG.getConstant(0, OVT));
1806
1807   // ilt r11, r1, 0
1808   SDValue r11 = DAG.getNode(AMDILISD::CMP, DL, OVT,
1809       DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
1810       r1, DAG.getConstant(0, OVT));
1811
1812   // iadd r0, r0, r10
1813   r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
1814
1815   // iadd r1, r1, r11
1816   r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
1817
1818   // ixor r0, r0, r10
1819   r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
1820
1821   // ixor r1, r1, r11
1822   r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
1823
1824   // udiv r20, r0, r1
1825   SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
1826
1827   // umul r20, r20, r1
1828   r20 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r20, r1);
1829
1830   // sub r0, r0, r20
1831   r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
1832
1833   // iadd r0, r0, r10
1834   r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
1835
1836   // ixor DST, r0, r10
1837   SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
1838   return DST;
1839 }
1840
1841 SDValue
1842 AMDILTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const
1843 {
1844   return SDValue(Op.getNode(), 0);
1845 }