src/gallium/drivers/radeon/AMDILISelLowering.cpp

   1 //===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //==-----------------------------------------------------------------------===//
   9 //
  10 // This file implements the interfaces that AMDIL uses to lower LLVM code into a
  11 // selection DAG.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #include "AMDILISelLowering.h"
  16 #include "AMDILDevices.h"
  17 #include "AMDILIntrinsicInfo.h"
  18 #include "AMDILSubtarget.h"
  19 #include "AMDILTargetMachine.h"
  20 #include "AMDILUtilityFunctions.h"
  21 #include "llvm/CallingConv.h"
  22 #include "llvm/CodeGen/MachineFrameInfo.h"
  23 #include "llvm/CodeGen/MachineRegisterInfo.h"
  24 #include "llvm/CodeGen/PseudoSourceValue.h"
  25 #include "llvm/CodeGen/SelectionDAG.h"
  26 #include "llvm/CodeGen/SelectionDAGNodes.h"
  27 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
  28 #include "llvm/DerivedTypes.h"
  29 #include "llvm/Instructions.h"
  30 #include "llvm/Intrinsics.h"
  31 #include "llvm/Support/raw_ostream.h"
  32 #include "llvm/Target/TargetOptions.h"
  33
  34 using namespace llvm;
  35 #define ISDBITCAST  ISD::BITCAST
  36 #define MVTGLUE     MVT::Glue
  37 //===----------------------------------------------------------------------===//
  38 // Calling Convention Implementation
  39 //===----------------------------------------------------------------------===//
  40 #include "AMDILGenCallingConv.inc"
  41
  42 //===----------------------------------------------------------------------===//
  43 // TargetLowering Implementation Help Functions Begin
  44 //===----------------------------------------------------------------------===//
  45   static SDValue
  46 getConversionNode(SelectionDAG &DAG, SDValue& Src, SDValue& Dst, bool asType)
  47 {
  48   DebugLoc DL = Src.getDebugLoc();
  49   EVT svt = Src.getValueType().getScalarType();
  50   EVT dvt = Dst.getValueType().getScalarType();
  51   if (svt.isFloatingPoint() && dvt.isFloatingPoint()) {
  52     if (dvt.bitsGT(svt)) {
  53       Src = DAG.getNode(ISD::FP_EXTEND, DL, dvt, Src);
  54     } else if (svt.bitsLT(svt)) {
  55       Src = DAG.getNode(ISD::FP_ROUND, DL, dvt, Src,
  56           DAG.getConstant(1, MVT::i32));
  57     }
  58   } else if (svt.isInteger() && dvt.isInteger()) {
  59     if (!svt.bitsEq(dvt)) {
  60       Src = DAG.getSExtOrTrunc(Src, DL, dvt);
  61     } else {
  62       Src = DAG.getNode(AMDILISD::MOVE, DL, dvt, Src);
  63     }
  64   } else if (svt.isInteger()) {
  65     unsigned opcode = (asType) ? ISDBITCAST : ISD::SINT_TO_FP;
  66     if (!svt.bitsEq(dvt)) {
  67       if (dvt.getSimpleVT().SimpleTy == MVT::f32) {
  68         Src = DAG.getSExtOrTrunc(Src, DL, MVT::i32);
  69       } else if (dvt.getSimpleVT().SimpleTy == MVT::f64) {
  70         Src = DAG.getSExtOrTrunc(Src, DL, MVT::i64);
  71       } else {
  72         assert(0 && "We only support 32 and 64bit fp types");
  73       }
  74     }
  75     Src = DAG.getNode(opcode, DL, dvt, Src);
  76   } else if (dvt.isInteger()) {
  77     unsigned opcode = (asType) ? ISDBITCAST : ISD::FP_TO_SINT;
  78     if (svt.getSimpleVT().SimpleTy == MVT::f32) {
  79       Src = DAG.getNode(opcode, DL, MVT::i32, Src);
  80     } else if (svt.getSimpleVT().SimpleTy == MVT::f64) {
  81       Src = DAG.getNode(opcode, DL, MVT::i64, Src);
  82     } else {
  83       assert(0 && "We only support 32 and 64bit fp types");
  84     }
  85     Src = DAG.getSExtOrTrunc(Src, DL, dvt);
  86   }
  87   return Src;
  88 }
  89 // CondCCodeToCC - Convert a DAG condition code to a AMDIL CC
  90 // condition.
  91   static AMDILCC::CondCodes
  92 CondCCodeToCC(ISD::CondCode CC, const MVT::SimpleValueType& type)
  93 {
  94   switch (CC) {
  95     default:
  96       {
  97         errs()<<"Condition Code: "<< (unsigned int)CC<<"\n";
  98         assert(0 && "Unknown condition code!");
  99       }
 100     case ISD::SETO:
 101       switch(type) {
 102         case MVT::f32:
 103           return AMDILCC::IL_CC_F_O;
 104         case MVT::f64:
 105           return AMDILCC::IL_CC_D_O;
 106         default:
 107           assert(0 && "Opcode combination not generated correctly!");
 108           return AMDILCC::COND_ERROR;
 109       };
 110     case ISD::SETUO:
 111       switch(type) {
 112         case MVT::f32:
 113           return AMDILCC::IL_CC_F_UO;
 114         case MVT::f64:
 115           return AMDILCC::IL_CC_D_UO;
 116         default:
 117           assert(0 && "Opcode combination not generated correctly!");
 118           return AMDILCC::COND_ERROR;
 119       };
 120     case ISD::SETGT:
 121       switch (type) {
 122         case MVT::i1:
 123         case MVT::i8:
 124         case MVT::i16:
 125         case MVT::i32:
 126           return AMDILCC::IL_CC_I_GT;
 127         case MVT::f32:
 128           return AMDILCC::IL_CC_F_GT;
 129         case MVT::f64:
 130           return AMDILCC::IL_CC_D_GT;
 131         case MVT::i64:
 132           return AMDILCC::IL_CC_L_GT;
 133         default:
 134           assert(0 && "Opcode combination not generated correctly!");
 135           return AMDILCC::COND_ERROR;
 136       };
 137     case ISD::SETGE:
 138       switch (type) {
 139         case MVT::i1:
 140         case MVT::i8:
 141         case MVT::i16:
 142         case MVT::i32:
 143           return AMDILCC::IL_CC_I_GE;
 144         case MVT::f32:
 145           return AMDILCC::IL_CC_F_GE;
 146         case MVT::f64:
 147           return AMDILCC::IL_CC_D_GE;
 148         case MVT::i64:
 149           return AMDILCC::IL_CC_L_GE;
 150         default:
 151           assert(0 && "Opcode combination not generated correctly!");
 152           return AMDILCC::COND_ERROR;
 153       };
 154     case ISD::SETLT:
 155       switch (type) {
 156         case MVT::i1:
 157         case MVT::i8:
 158         case MVT::i16:
 159         case MVT::i32:
 160           return AMDILCC::IL_CC_I_LT;
 161         case MVT::f32:
 162           return AMDILCC::IL_CC_F_LT;
 163         case MVT::f64:
 164           return AMDILCC::IL_CC_D_LT;
 165         case MVT::i64:
 166           return AMDILCC::IL_CC_L_LT;
 167         default:
 168           assert(0 && "Opcode combination not generated correctly!");
 169           return AMDILCC::COND_ERROR;
 170       };
 171     case ISD::SETLE:
 172       switch (type) {
 173         case MVT::i1:
 174         case MVT::i8:
 175         case MVT::i16:
 176         case MVT::i32:
 177           return AMDILCC::IL_CC_I_LE;
 178         case MVT::f32:
 179           return AMDILCC::IL_CC_F_LE;
 180         case MVT::f64:
 181           return AMDILCC::IL_CC_D_LE;
 182         case MVT::i64:
 183           return AMDILCC::IL_CC_L_LE;
 184         default:
 185           assert(0 && "Opcode combination not generated correctly!");
 186           return AMDILCC::COND_ERROR;
 187       };
 188     case ISD::SETNE:
 189       switch (type) {
 190         case MVT::i1:
 191         case MVT::i8:
 192         case MVT::i16:
 193         case MVT::i32:
 194           return AMDILCC::IL_CC_I_NE;
 195         case MVT::f32:
 196           return AMDILCC::IL_CC_F_NE;
 197         case MVT::f64:
 198           return AMDILCC::IL_CC_D_NE;
 199         case MVT::i64:
 200           return AMDILCC::IL_CC_L_NE;
 201         default:
 202           assert(0 && "Opcode combination not generated correctly!");
 203           return AMDILCC::COND_ERROR;
 204       };
 205     case ISD::SETEQ:
 206       switch (type) {
 207         case MVT::i1:
 208         case MVT::i8:
 209         case MVT::i16:
 210         case MVT::i32:
 211           return AMDILCC::IL_CC_I_EQ;
 212         case MVT::f32:
 213           return AMDILCC::IL_CC_F_EQ;
 214         case MVT::f64:
 215           return AMDILCC::IL_CC_D_EQ;
 216         case MVT::i64:
 217           return AMDILCC::IL_CC_L_EQ;
 218         default:
 219           assert(0 && "Opcode combination not generated correctly!");
 220           return AMDILCC::COND_ERROR;
 221       };
 222     case ISD::SETUGT:
 223       switch (type) {
 224         case MVT::i1:
 225         case MVT::i8:
 226         case MVT::i16:
 227         case MVT::i32:
 228           return AMDILCC::IL_CC_U_GT;
 229         case MVT::f32:
 230           return AMDILCC::IL_CC_F_UGT;
 231         case MVT::f64:
 232           return AMDILCC::IL_CC_D_UGT;
 233         case MVT::i64:
 234           return AMDILCC::IL_CC_UL_GT;
 235         default:
 236           assert(0 && "Opcode combination not generated correctly!");
 237           return AMDILCC::COND_ERROR;
 238       };
 239     case ISD::SETUGE:
 240       switch (type) {
 241         case MVT::i1:
 242         case MVT::i8:
 243         case MVT::i16:
 244         case MVT::i32:
 245           return AMDILCC::IL_CC_U_GE;
 246         case MVT::f32:
 247           return AMDILCC::IL_CC_F_UGE;
 248         case MVT::f64:
 249           return AMDILCC::IL_CC_D_UGE;
 250         case MVT::i64:
 251           return AMDILCC::IL_CC_UL_GE;
 252         default:
 253           assert(0 && "Opcode combination not generated correctly!");
 254           return AMDILCC::COND_ERROR;
 255       };
 256     case ISD::SETULT:
 257       switch (type) {
 258         case MVT::i1:
 259         case MVT::i8:
 260         case MVT::i16:
 261         case MVT::i32:
 262           return AMDILCC::IL_CC_U_LT;
 263         case MVT::f32:
 264           return AMDILCC::IL_CC_F_ULT;
 265         case MVT::f64:
 266           return AMDILCC::IL_CC_D_ULT;
 267         case MVT::i64:
 268           return AMDILCC::IL_CC_UL_LT;
 269         default:
 270           assert(0 && "Opcode combination not generated correctly!");
 271           return AMDILCC::COND_ERROR;
 272       };
 273     case ISD::SETULE:
 274       switch (type) {
 275         case MVT::i1:
 276         case MVT::i8:
 277         case MVT::i16:
 278         case MVT::i32:
 279           return AMDILCC::IL_CC_U_LE;
 280         case MVT::f32:
 281           return AMDILCC::IL_CC_F_ULE;
 282         case MVT::f64:
 283           return AMDILCC::IL_CC_D_ULE;
 284         case MVT::i64:
 285           return AMDILCC::IL_CC_UL_LE;
 286         default:
 287           assert(0 && "Opcode combination not generated correctly!");
 288           return AMDILCC::COND_ERROR;
 289       };
 290     case ISD::SETUNE:
 291       switch (type) {
 292         case MVT::i1:
 293         case MVT::i8:
 294         case MVT::i16:
 295         case MVT::i32:
 296           return AMDILCC::IL_CC_U_NE;
 297         case MVT::f32:
 298           return AMDILCC::IL_CC_F_UNE;
 299         case MVT::f64:
 300           return AMDILCC::IL_CC_D_UNE;
 301         case MVT::i64:
 302           return AMDILCC::IL_CC_UL_NE;
 303         default:
 304           assert(0 && "Opcode combination not generated correctly!");
 305           return AMDILCC::COND_ERROR;
 306       };
 307     case ISD::SETUEQ:
 308       switch (type) {
 309         case MVT::i1:
 310         case MVT::i8:
 311         case MVT::i16:
 312         case MVT::i32:
 313           return AMDILCC::IL_CC_U_EQ;
 314         case MVT::f32:
 315           return AMDILCC::IL_CC_F_UEQ;
 316         case MVT::f64:
 317           return AMDILCC::IL_CC_D_UEQ;
 318         case MVT::i64:
 319           return AMDILCC::IL_CC_UL_EQ;
 320         default:
 321           assert(0 && "Opcode combination not generated correctly!");
 322           return AMDILCC::COND_ERROR;
 323       };
 324     case ISD::SETOGT:
 325       switch (type) {
 326         case MVT::f32:
 327           return AMDILCC::IL_CC_F_OGT;
 328         case MVT::f64:
 329           return AMDILCC::IL_CC_D_OGT;
 330         case MVT::i1:
 331         case MVT::i8:
 332         case MVT::i16:
 333         case MVT::i32:
 334         case MVT::i64:
 335         default:
 336           assert(0 && "Opcode combination not generated correctly!");
 337           return AMDILCC::COND_ERROR;
 338       };
 339     case ISD::SETOGE:
 340       switch (type) {
 341         case MVT::f32:
 342           return AMDILCC::IL_CC_F_OGE;
 343         case MVT::f64:
 344           return AMDILCC::IL_CC_D_OGE;
 345         case MVT::i1:
 346         case MVT::i8:
 347         case MVT::i16:
 348         case MVT::i32:
 349         case MVT::i64:
 350         default:
 351           assert(0 && "Opcode combination not generated correctly!");
 352           return AMDILCC::COND_ERROR;
 353       };
 354     case ISD::SETOLT:
 355       switch (type) {
 356         case MVT::f32:
 357           return AMDILCC::IL_CC_F_OLT;
 358         case MVT::f64:
 359           return AMDILCC::IL_CC_D_OLT;
 360         case MVT::i1:
 361         case MVT::i8:
 362         case MVT::i16:
 363         case MVT::i32:
 364         case MVT::i64:
 365         default:
 366           assert(0 && "Opcode combination not generated correctly!");
 367           return AMDILCC::COND_ERROR;
 368       };
 369     case ISD::SETOLE:
 370       switch (type) {
 371         case MVT::f32:
 372           return AMDILCC::IL_CC_F_OLE;
 373         case MVT::f64:
 374           return AMDILCC::IL_CC_D_OLE;
 375         case MVT::i1:
 376         case MVT::i8:
 377         case MVT::i16:
 378         case MVT::i32:
 379         case MVT::i64:
 380         default:
 381           assert(0 && "Opcode combination not generated correctly!");
 382           return AMDILCC::COND_ERROR;
 383       };
 384     case ISD::SETONE:
 385       switch (type) {
 386         case MVT::f32:
 387           return AMDILCC::IL_CC_F_ONE;
 388         case MVT::f64:
 389           return AMDILCC::IL_CC_D_ONE;
 390         case MVT::i1:
 391         case MVT::i8:
 392         case MVT::i16:
 393         case MVT::i32:
 394         case MVT::i64:
 395         default:
 396           assert(0 && "Opcode combination not generated correctly!");
 397           return AMDILCC::COND_ERROR;
 398       };
 399     case ISD::SETOEQ:
 400       switch (type) {
 401         case MVT::f32:
 402           return AMDILCC::IL_CC_F_OEQ;
 403         case MVT::f64:
 404           return AMDILCC::IL_CC_D_OEQ;
 405         case MVT::i1:
 406         case MVT::i8:
 407         case MVT::i16:
 408         case MVT::i32:
 409         case MVT::i64:
 410         default:
 411           assert(0 && "Opcode combination not generated correctly!");
 412           return AMDILCC::COND_ERROR;
 413       };
 414   };
 415 }
 416
 417 /// Helper function used by LowerFormalArguments
 418 static const TargetRegisterClass*
 419 getRegClassFromType(unsigned int type) {
 420   switch (type) {
 421   default:
 422     assert(0 && "Passed in type does not match any register classes.");
 423   case MVT::i8:
 424     return &AMDIL::GPRI8RegClass;
 425   case MVT::i16:
 426     return &AMDIL::GPRI16RegClass;
 427   case MVT::i32:
 428     return &AMDIL::GPRI32RegClass;
 429   case MVT::f32:
 430     return &AMDIL::GPRF32RegClass;
 431   case MVT::i64:
 432     return &AMDIL::GPRI64RegClass;
 433   case MVT::f64:
 434     return &AMDIL::GPRF64RegClass;
 435   case MVT::v4f32:
 436     return &AMDIL::GPRV4F32RegClass;
 437   case MVT::v4i8:
 438     return &AMDIL::GPRV4I8RegClass;
 439   case MVT::v4i16:
 440     return &AMDIL::GPRV4I16RegClass;
 441   case MVT::v4i32:
 442     return &AMDIL::GPRV4I32RegClass;
 443   case MVT::v2f32:
 444     return &AMDIL::GPRV2F32RegClass;
 445   case MVT::v2i8:
 446     return &AMDIL::GPRV2I8RegClass;
 447   case MVT::v2i16:
 448     return &AMDIL::GPRV2I16RegClass;
 449   case MVT::v2i32:
 450     return &AMDIL::GPRV2I32RegClass;
 451   case MVT::v2f64:
 452     return &AMDIL::GPRV2F64RegClass;
 453   case MVT::v2i64:
 454     return &AMDIL::GPRV2I64RegClass;
 455   }
 456 }
 457
 458 SDValue
 459 AMDILTargetLowering::LowerMemArgument(
 460     SDValue Chain,
 461     CallingConv::ID CallConv,
 462     const SmallVectorImpl<ISD::InputArg> &Ins,
 463     DebugLoc dl, SelectionDAG &DAG,
 464     const CCValAssign &VA,
 465     MachineFrameInfo *MFI,
 466     unsigned i) const
 467 {
 468   // Create the nodes corresponding to a load from this parameter slot.
 469   ISD::ArgFlagsTy Flags = Ins[i].Flags;
 470
 471   bool AlwaysUseMutable = (CallConv==CallingConv::Fast) &&
 472     getTargetMachine().Options.GuaranteedTailCallOpt;
 473   bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
 474
 475   // FIXME: For now, all byval parameter objects are marked mutable. This can
 476   // be changed with more analysis.
 477   // In case of tail call optimization mark all arguments mutable. Since they
 478   // could be overwritten by lowering of arguments in case of a tail call.
 479   int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
 480       VA.getLocMemOffset(), isImmutable);
 481   SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
 482
 483   if (Flags.isByVal())
 484     return FIN;
 485   return DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
 486       MachinePointerInfo::getFixedStack(FI),
 487       false, false, false, 0);
 488 }
 489 //===----------------------------------------------------------------------===//
 490 // TargetLowering Implementation Help Functions End
 491 //===----------------------------------------------------------------------===//
 492 //===----------------------------------------------------------------------===//
 493 // Instruction generation functions
 494 //===----------------------------------------------------------------------===//
 495 uint32_t
 496 AMDILTargetLowering::addExtensionInstructions(
 497     uint32_t reg, bool signedShift,
 498     unsigned int simpleVT) const
 499 {
 500   int shiftSize = 0;
 501   uint32_t LShift, RShift;
 502   switch(simpleVT)
 503   {
 504     default:
 505       return reg;
 506     case AMDIL::GPRI8RegClassID:
 507       shiftSize = 24;
 508       LShift = AMDIL::SHL_i8;
 509       if (signedShift) {
 510         RShift = AMDIL::SHR_i8;
 511       } else {
 512         RShift = AMDIL::USHR_i8;
 513       }
 514       break;
 515     case AMDIL::GPRV2I8RegClassID:
 516       shiftSize = 24;
 517       LShift = AMDIL::SHL_v2i8;
 518       if (signedShift) {
 519         RShift = AMDIL::SHR_v2i8;
 520       } else {
 521         RShift = AMDIL::USHR_v2i8;
 522       }
 523       break;
 524     case AMDIL::GPRV4I8RegClassID:
 525       shiftSize = 24;
 526       LShift = AMDIL::SHL_v4i8;
 527       if (signedShift) {
 528         RShift = AMDIL::SHR_v4i8;
 529       } else {
 530         RShift = AMDIL::USHR_v4i8;
 531       }
 532       break;
 533     case AMDIL::GPRI16RegClassID:
 534       shiftSize = 16;
 535       LShift = AMDIL::SHL_i16;
 536       if (signedShift) {
 537         RShift = AMDIL::SHR_i16;
 538       } else {
 539         RShift = AMDIL::USHR_i16;
 540       }
 541       break;
 542     case AMDIL::GPRV2I16RegClassID:
 543       shiftSize = 16;
 544       LShift = AMDIL::SHL_v2i16;
 545       if (signedShift) {
 546         RShift = AMDIL::SHR_v2i16;
 547       } else {
 548         RShift = AMDIL::USHR_v2i16;
 549       }
 550       break;
 551     case AMDIL::GPRV4I16RegClassID:
 552       shiftSize = 16;
 553       LShift = AMDIL::SHL_v4i16;
 554       if (signedShift) {
 555         RShift = AMDIL::SHR_v4i16;
 556       } else {
 557         RShift = AMDIL::USHR_v4i16;
 558       }
 559       break;
 560   };
 561   uint32_t LoadReg = genVReg(simpleVT);
 562   uint32_t tmp1 = genVReg(simpleVT);
 563   uint32_t tmp2 = genVReg(simpleVT);
 564   generateMachineInst(AMDIL::LOADCONST_i32, LoadReg).addImm(shiftSize);
 565   generateMachineInst(LShift, tmp1, reg, LoadReg);
 566   generateMachineInst(RShift, tmp2, tmp1, LoadReg);
 567   return tmp2;
 568 }
 569
 570 MachineOperand
 571 AMDILTargetLowering::convertToReg(MachineOperand op) const
 572 {
 573   if (op.isReg()) {
 574     return op;
 575   } else if (op.isImm()) {
 576     uint32_t loadReg
 577       = genVReg(op.getParent()->getDesc().OpInfo[0].RegClass);
 578     generateMachineInst(AMDIL::LOADCONST_i32, loadReg)
 579       .addImm(op.getImm());
 580     op.ChangeToRegister(loadReg, false);
 581   } else if (op.isFPImm()) {
 582     uint32_t loadReg
 583       = genVReg(op.getParent()->getDesc().OpInfo[0].RegClass);
 584     generateMachineInst(AMDIL::LOADCONST_f32, loadReg)
 585       .addFPImm(op.getFPImm());
 586     op.ChangeToRegister(loadReg, false);
 587   } else if (op.isMBB()) {
 588     op.ChangeToRegister(0, false);
 589   } else if (op.isFI()) {
 590     op.ChangeToRegister(0, false);
 591   } else if (op.isCPI()) {
 592     op.ChangeToRegister(0, false);
 593   } else if (op.isJTI()) {
 594     op.ChangeToRegister(0, false);
 595   } else if (op.isGlobal()) {
 596     op.ChangeToRegister(0, false);
 597   } else if (op.isSymbol()) {
 598     op.ChangeToRegister(0, false);
 599   }/* else if (op.isMetadata()) {
 600       op.ChangeToRegister(0, false);
 601       }*/
 602   return op;
 603 }
 604
 605 //===----------------------------------------------------------------------===//
 606 // TargetLowering Class Implementation Begins
 607 //===----------------------------------------------------------------------===//
 608   AMDILTargetLowering::AMDILTargetLowering(TargetMachine &TM)
 609 : TargetLowering(TM, new TargetLoweringObjectFileELF())
 610 {
 611   int types[] =
 612   {
 613     (int)MVT::i8,
 614     (int)MVT::i16,
 615     (int)MVT::i32,
 616     (int)MVT::f32,
 617     (int)MVT::f64,
 618     (int)MVT::i64,
 619     (int)MVT::v2i8,
 620     (int)MVT::v4i8,
 621     (int)MVT::v2i16,
 622     (int)MVT::v4i16,
 623     (int)MVT::v4f32,
 624     (int)MVT::v4i32,
 625     (int)MVT::v2f32,
 626     (int)MVT::v2i32,
 627     (int)MVT::v2f64,
 628     (int)MVT::v2i64
 629   };
 630
 631   int IntTypes[] =
 632   {
 633     (int)MVT::i8,
 634     (int)MVT::i16,
 635     (int)MVT::i32,
 636     (int)MVT::i64
 637   };
 638
 639   int FloatTypes[] =
 640   {
 641     (int)MVT::f32,
 642     (int)MVT::f64
 643   };
 644
 645   int VectorTypes[] =
 646   {
 647     (int)MVT::v2i8,
 648     (int)MVT::v4i8,
 649     (int)MVT::v2i16,
 650     (int)MVT::v4i16,
 651     (int)MVT::v4f32,
 652     (int)MVT::v4i32,
 653     (int)MVT::v2f32,
 654     (int)MVT::v2i32,
 655     (int)MVT::v2f64,
 656     (int)MVT::v2i64
 657   };
 658   size_t numTypes = sizeof(types) / sizeof(*types);
 659   size_t numFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes);
 660   size_t numIntTypes = sizeof(IntTypes) / sizeof(*IntTypes);
 661   size_t numVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes);
 662
 663   const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
 664       &this->getTargetMachine())->getSubtargetImpl();
 665   // These are the current register classes that are
 666   // supported
 667
 668   addRegisterClass(MVT::i32, AMDIL::GPRI32RegisterClass);
 669   addRegisterClass(MVT::f32, AMDIL::GPRF32RegisterClass);
 670
 671   if (stm->device()->isSupported(AMDILDeviceInfo::DoubleOps)) {
 672     addRegisterClass(MVT::f64, AMDIL::GPRF64RegisterClass);
 673     addRegisterClass(MVT::v2f64, AMDIL::GPRV2F64RegisterClass);
 674   }
 675   if (stm->device()->isSupported(AMDILDeviceInfo::ByteOps)) {
 676     addRegisterClass(MVT::i8, AMDIL::GPRI8RegisterClass);
 677     addRegisterClass(MVT::v2i8, AMDIL::GPRV2I8RegisterClass);
 678     addRegisterClass(MVT::v4i8, AMDIL::GPRV4I8RegisterClass);
 679     setOperationAction(ISD::Constant          , MVT::i8   , Legal);
 680   }
 681   if (stm->device()->isSupported(AMDILDeviceInfo::ShortOps)) {
 682     addRegisterClass(MVT::i16, AMDIL::GPRI16RegisterClass);
 683     addRegisterClass(MVT::v2i16, AMDIL::GPRV2I16RegisterClass);
 684     addRegisterClass(MVT::v4i16, AMDIL::GPRV4I16RegisterClass);
 685     setOperationAction(ISD::Constant          , MVT::i16  , Legal);
 686   }
 687   addRegisterClass(MVT::v2f32, AMDIL::GPRV2F32RegisterClass);
 688   addRegisterClass(MVT::v4f32, AMDIL::GPRV4F32RegisterClass);
 689   addRegisterClass(MVT::v2i32, AMDIL::GPRV2I32RegisterClass);
 690   addRegisterClass(MVT::v4i32, AMDIL::GPRV4I32RegisterClass);
 691   if (stm->device()->isSupported(AMDILDeviceInfo::LongOps)) {
 692     addRegisterClass(MVT::i64, AMDIL::GPRI64RegisterClass);
 693     addRegisterClass(MVT::v2i64, AMDIL::GPRV2I64RegisterClass);
 694   }
 695
 696   for (unsigned int x  = 0; x < numTypes; ++x) {
 697     MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
 698
 699     //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
 700     // We cannot sextinreg, expand to shifts
 701     setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
 702     setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
 703     setOperationAction(ISD::FP_ROUND, VT, Expand);
 704     setOperationAction(ISD::SUBE, VT, Expand);
 705     setOperationAction(ISD::SUBC, VT, Expand);
 706     setOperationAction(ISD::ADD, VT, Custom);
 707     setOperationAction(ISD::ADDE, VT, Expand);
 708     setOperationAction(ISD::ADDC, VT, Expand);
 709     setOperationAction(ISD::SETCC, VT, Custom);
 710     setOperationAction(ISD::BRCOND, VT, Custom);
 711     setOperationAction(ISD::BR_CC, VT, Custom);
 712     setOperationAction(ISD::BR_JT, VT, Expand);
 713     setOperationAction(ISD::BRIND, VT, Expand);
 714     // TODO: Implement custom UREM/SREM routines
 715     setOperationAction(ISD::UREM, VT, Expand);
 716     setOperationAction(ISD::SREM, VT, Expand);
 717     setOperationAction(ISD::SINT_TO_FP, VT, Custom);
 718     setOperationAction(ISD::UINT_TO_FP, VT, Custom);
 719     setOperationAction(ISD::FP_TO_SINT, VT, Custom);
 720     setOperationAction(ISD::FP_TO_UINT, VT, Custom);
 721     setOperationAction(ISDBITCAST, VT, Custom);
 722     setOperationAction(ISD::GlobalAddress, VT, Custom);
 723     setOperationAction(ISD::JumpTable, VT, Custom);
 724     setOperationAction(ISD::ConstantPool, VT, Custom);
 725     setOperationAction(ISD::SELECT_CC, VT, Custom);
 726     setOperationAction(ISD::SELECT, VT, Custom);
 727     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
 728     setOperationAction(ISD::UMUL_LOHI, VT, Expand);
 729     if (VT != MVT::i64 && VT != MVT::v2i64) {
 730       setOperationAction(ISD::SDIV, VT, Custom);
 731       setOperationAction(ISD::UDIV, VT, Custom);
 732     }
 733     setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
 734     setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
 735   }
 736   for (unsigned int x = 0; x < numFloatTypes; ++x) {
 737     MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
 738
 739     // IL does not have these operations for floating point types
 740     setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
 741     setOperationAction(ISD::FP_ROUND, VT, Custom);
 742     setOperationAction(ISD::SETOLT, VT, Expand);
 743     setOperationAction(ISD::SETOGE, VT, Expand);
 744     setOperationAction(ISD::SETOGT, VT, Expand);
 745     setOperationAction(ISD::SETOLE, VT, Expand);
 746     setOperationAction(ISD::SETULT, VT, Expand);
 747     setOperationAction(ISD::SETUGE, VT, Expand);
 748     setOperationAction(ISD::SETUGT, VT, Expand);
 749     setOperationAction(ISD::SETULE, VT, Expand);
 750   }
 751
 752   for (unsigned int x = 0; x < numIntTypes; ++x) {
 753     MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
 754
 755     // GPU also does not have divrem function for signed or unsigned
 756     setOperationAction(ISD::SDIVREM, VT, Expand);
 757     setOperationAction(ISD::UDIVREM, VT, Expand);
 758     setOperationAction(ISD::FP_ROUND, VT, Expand);
 759
 760     // GPU does not have [S|U]MUL_LOHI functions as a single instruction
 761     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
 762     setOperationAction(ISD::UMUL_LOHI, VT, Expand);
 763
 764     // GPU doesn't have a rotl, rotr, or byteswap instruction
 765     setOperationAction(ISD::ROTR, VT, Expand);
 766     setOperationAction(ISD::ROTL, VT, Expand);
 767     setOperationAction(ISD::BSWAP, VT, Expand);
 768
 769     // GPU doesn't have any counting operators
 770     setOperationAction(ISD::CTPOP, VT, Expand);
 771     setOperationAction(ISD::CTTZ, VT, Expand);
 772     setOperationAction(ISD::CTLZ, VT, Expand);
 773   }
 774
 775   for ( unsigned int ii = 0; ii < numVectorTypes; ++ii )
 776   {
 777     MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
 778
 779     setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
 780     setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
 781     setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
 782     setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
 783     setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
 784     setOperationAction(ISD::FP_ROUND, VT, Expand);
 785     setOperationAction(ISD::SDIVREM, VT, Expand);
 786     setOperationAction(ISD::UDIVREM, VT, Expand);
 787     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
 788     // setOperationAction(ISD::VSETCC, VT, Expand);
 789     setOperationAction(ISD::SETCC, VT, Expand);
 790     setOperationAction(ISD::SELECT_CC, VT, Expand);
 791     setOperationAction(ISD::SELECT, VT, Expand);
 792
 793   }
 794   setOperationAction(ISD::FP_ROUND, MVT::Other, Expand);
 795   if (stm->device()->isSupported(AMDILDeviceInfo::LongOps)) {
 796     if (stm->calVersion() < CAL_VERSION_SC_139
 797         || stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
 798       setOperationAction(ISD::MUL, MVT::i64, Custom);
 799     }
 800     setOperationAction(ISD::SUB, MVT::i64, Custom);
 801     setOperationAction(ISD::ADD, MVT::i64, Custom);
 802     setOperationAction(ISD::MULHU, MVT::i64, Expand);
 803     setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
 804     setOperationAction(ISD::MULHS, MVT::i64, Expand);
 805     setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
 806     setOperationAction(ISD::MUL, MVT::v2i64, Expand);
 807     setOperationAction(ISD::SUB, MVT::v2i64, Expand);
 808     setOperationAction(ISD::ADD, MVT::v2i64, Expand);
 809     setOperationAction(ISD::SREM, MVT::v2i64, Expand);
 810     setOperationAction(ISD::Constant          , MVT::i64  , Legal);
 811     setOperationAction(ISD::UDIV, MVT::v2i64, Expand);
 812     setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
 813     setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Expand);
 814     setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Expand);
 815     setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Expand);
 816     setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Expand);
 817     setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
 818     setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
 819     setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
 820     setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
 821   }
 822   if (stm->device()->isSupported(AMDILDeviceInfo::DoubleOps)) {
 823     // we support loading/storing v2f64 but not operations on the type
 824     setOperationAction(ISD::FADD, MVT::v2f64, Expand);
 825     setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
 826     setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
 827     setOperationAction(ISD::FP_ROUND, MVT::v2f64, Expand);
 828     setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
 829     setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
 830     setOperationAction(ISD::ConstantFP        , MVT::f64  , Legal);
 831     setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
 832     // We want to expand vector conversions into their scalar
 833     // counterparts.
 834     setOperationAction(ISD::SINT_TO_FP, MVT::v2f64, Expand);
 835     setOperationAction(ISD::UINT_TO_FP, MVT::v2f64, Expand);
 836     setOperationAction(ISD::FP_TO_SINT, MVT::v2f64, Expand);
 837     setOperationAction(ISD::FP_TO_UINT, MVT::v2f64, Expand);
 838     setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
 839     setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
 840     setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
 841     setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
 842     setOperationAction(ISD::FABS, MVT::f64, Expand);
 843     setOperationAction(ISD::FABS, MVT::v2f64, Expand);
 844   }
 845   // TODO: Fix the UDIV24 algorithm so it works for these
 846   // types correctly. This needs vector comparisons
 847   // for this to work correctly.
 848   setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
 849   setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
 850   setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
 851   setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
 852   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
 853   setOperationAction(ISD::SUBC, MVT::Other, Expand);
 854   setOperationAction(ISD::ADDE, MVT::Other, Expand);
 855   setOperationAction(ISD::ADDC, MVT::Other, Expand);
 856   setOperationAction(ISD::BRCOND, MVT::Other, Custom);
 857   setOperationAction(ISD::BR_CC, MVT::Other, Custom);
 858   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
 859   setOperationAction(ISD::BRIND, MVT::Other, Expand);
 860   setOperationAction(ISD::SETCC, MVT::Other, Custom);
 861   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
 862   setOperationAction(ISD::FDIV, MVT::f32, Custom);
 863   setOperationAction(ISD::FDIV, MVT::v2f32, Custom);
 864   setOperationAction(ISD::FDIV, MVT::v4f32, Custom);
 865
 866   setOperationAction(ISD::BUILD_VECTOR, MVT::Other, Custom);
 867   // Use the default implementation.
 868   setOperationAction(ISD::VAARG             , MVT::Other, Expand);
 869   setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
 870   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
 871   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
 872   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
 873   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Custom);
 874   setOperationAction(ISD::ConstantFP        , MVT::f32    , Legal);
 875   setOperationAction(ISD::Constant          , MVT::i32    , Legal);
 876   setOperationAction(ISD::TRAP              , MVT::Other  , Legal);
 877
 878   setStackPointerRegisterToSaveRestore(AMDIL::SP);
 879   setSchedulingPreference(Sched::RegPressure);
 880   setPow2DivIsCheap(false);
 881   setPrefLoopAlignment(16);
 882   setSelectIsExpensive(true);
 883   setJumpIsExpensive(true);
 884   computeRegisterProperties();
 885
 886   maxStoresPerMemcpy  = 4096;
 887   maxStoresPerMemmove = 4096;
 888   maxStoresPerMemset  = 4096;
 889
 890 #undef numTypes
 891 #undef numIntTypes
 892 #undef numVectorTypes
 893 #undef numFloatTypes
 894 }
 895
 896 const char *
 897 AMDILTargetLowering::getTargetNodeName(unsigned Opcode) const
 898 {
 899   switch (Opcode) {
 900     default: return 0;
 901     case AMDILISD::INTTOANY: return "AMDILISD::INTTOANY";
 902     case AMDILISD::DP_TO_FP:  return "AMDILISD::DP_TO_FP";
 903     case AMDILISD::FP_TO_DP:  return "AMDILISD::FP_TO_DP";
 904     case AMDILISD::BITCONV: return "AMDILISD::BITCONV";
 905     case AMDILISD::CMOV:  return "AMDILISD::CMOV";
 906     case AMDILISD::CMOVLOG:  return "AMDILISD::CMOVLOG";
 907     case AMDILISD::INEGATE:  return "AMDILISD::INEGATE";
 908     case AMDILISD::MAD:  return "AMDILISD::MAD";
 909     case AMDILISD::UMAD:  return "AMDILISD::UMAD";
 910     case AMDILISD::CALL:  return "AMDILISD::CALL";
 911     case AMDILISD::RET:   return "AMDILISD::RET";
 912     case AMDILISD::IFFB_HI: return "AMDILISD::IFFB_HI";
 913     case AMDILISD::IFFB_LO: return "AMDILISD::IFFB_LO";
 914     case AMDILISD::ADD: return "AMDILISD::ADD";
 915     case AMDILISD::UMUL: return "AMDILISD::UMUL";
 916     case AMDILISD::AND: return "AMDILISD::AND";
 917     case AMDILISD::OR: return "AMDILISD::OR";
 918     case AMDILISD::NOT: return "AMDILISD::NOT";
 919     case AMDILISD::XOR: return "AMDILISD::XOR";
 920     case AMDILISD::DIV_INF: return "AMDILISD::DIV_INF";
 921     case AMDILISD::SMAX: return "AMDILISD::SMAX";
 922     case AMDILISD::PHIMOVE: return "AMDILISD::PHIMOVE";
 923     case AMDILISD::MOVE: return "AMDILISD::MOVE";
 924     case AMDILISD::VBUILD: return "AMDILISD::VBUILD";
 925     case AMDILISD::VEXTRACT: return "AMDILISD::VEXTRACT";
 926     case AMDILISD::VINSERT: return "AMDILISD::VINSERT";
 927     case AMDILISD::VCONCAT: return "AMDILISD::VCONCAT";
 928     case AMDILISD::LCREATE: return "AMDILISD::LCREATE";
 929     case AMDILISD::LCOMPHI: return "AMDILISD::LCOMPHI";
 930     case AMDILISD::LCOMPLO: return "AMDILISD::LCOMPLO";
 931     case AMDILISD::DCREATE: return "AMDILISD::DCREATE";
 932     case AMDILISD::DCOMPHI: return "AMDILISD::DCOMPHI";
 933     case AMDILISD::DCOMPLO: return "AMDILISD::DCOMPLO";
 934     case AMDILISD::LCREATE2: return "AMDILISD::LCREATE2";
 935     case AMDILISD::LCOMPHI2: return "AMDILISD::LCOMPHI2";
 936     case AMDILISD::LCOMPLO2: return "AMDILISD::LCOMPLO2";
 937     case AMDILISD::DCREATE2: return "AMDILISD::DCREATE2";
 938     case AMDILISD::DCOMPHI2: return "AMDILISD::DCOMPHI2";
 939     case AMDILISD::DCOMPLO2: return "AMDILISD::DCOMPLO2";
 940     case AMDILISD::CMP: return "AMDILISD::CMP";
 941     case AMDILISD::IL_CC_I_LT: return "AMDILISD::IL_CC_I_LT";
 942     case AMDILISD::IL_CC_I_LE: return "AMDILISD::IL_CC_I_LE";
 943     case AMDILISD::IL_CC_I_GT: return "AMDILISD::IL_CC_I_GT";
 944     case AMDILISD::IL_CC_I_GE: return "AMDILISD::IL_CC_I_GE";
 945     case AMDILISD::IL_CC_I_EQ: return "AMDILISD::IL_CC_I_EQ";
 946     case AMDILISD::IL_CC_I_NE: return "AMDILISD::IL_CC_I_NE";
 947     case AMDILISD::RET_FLAG: return "AMDILISD::RET_FLAG";
 948     case AMDILISD::BRANCH_COND: return "AMDILISD::BRANCH_COND";
 949     case AMDILISD::LOOP_NZERO: return "AMDILISD::LOOP_NZERO";
 950     case AMDILISD::LOOP_ZERO: return "AMDILISD::LOOP_ZERO";
 951     case AMDILISD::LOOP_CMP: return "AMDILISD::LOOP_CMP";
 952     case AMDILISD::ADDADDR: return "AMDILISD::ADDADDR";
 953     case AMDILISD::ATOM_G_ADD: return "AMDILISD::ATOM_G_ADD";
 954     case AMDILISD::ATOM_G_AND: return "AMDILISD::ATOM_G_AND";
 955     case AMDILISD::ATOM_G_CMPXCHG: return "AMDILISD::ATOM_G_CMPXCHG";
 956     case AMDILISD::ATOM_G_DEC: return "AMDILISD::ATOM_G_DEC";
 957     case AMDILISD::ATOM_G_INC: return "AMDILISD::ATOM_G_INC";
 958     case AMDILISD::ATOM_G_MAX: return "AMDILISD::ATOM_G_MAX";
 959     case AMDILISD::ATOM_G_UMAX: return "AMDILISD::ATOM_G_UMAX";
 960     case AMDILISD::ATOM_G_MIN: return "AMDILISD::ATOM_G_MIN";
 961     case AMDILISD::ATOM_G_UMIN: return "AMDILISD::ATOM_G_UMIN";
 962     case AMDILISD::ATOM_G_OR: return "AMDILISD::ATOM_G_OR";
 963     case AMDILISD::ATOM_G_SUB: return "AMDILISD::ATOM_G_SUB";
 964     case AMDILISD::ATOM_G_RSUB: return "AMDILISD::ATOM_G_RSUB";
 965     case AMDILISD::ATOM_G_XCHG: return "AMDILISD::ATOM_G_XCHG";
 966     case AMDILISD::ATOM_G_XOR: return "AMDILISD::ATOM_G_XOR";
 967     case AMDILISD::ATOM_G_ADD_NORET: return "AMDILISD::ATOM_G_ADD_NORET";
 968     case AMDILISD::ATOM_G_AND_NORET: return "AMDILISD::ATOM_G_AND_NORET";
 969     case AMDILISD::ATOM_G_CMPXCHG_NORET: return "AMDILISD::ATOM_G_CMPXCHG_NORET";
 970     case AMDILISD::ATOM_G_DEC_NORET: return "AMDILISD::ATOM_G_DEC_NORET";
 971     case AMDILISD::ATOM_G_INC_NORET: return "AMDILISD::ATOM_G_INC_NORET";
 972     case AMDILISD::ATOM_G_MAX_NORET: return "AMDILISD::ATOM_G_MAX_NORET";
 973     case AMDILISD::ATOM_G_UMAX_NORET: return "AMDILISD::ATOM_G_UMAX_NORET";
 974     case AMDILISD::ATOM_G_MIN_NORET: return "AMDILISD::ATOM_G_MIN_NORET";
 975     case AMDILISD::ATOM_G_UMIN_NORET: return "AMDILISD::ATOM_G_UMIN_NORET";
 976     case AMDILISD::ATOM_G_OR_NORET: return "AMDILISD::ATOM_G_OR_NORET";
 977     case AMDILISD::ATOM_G_SUB_NORET: return "AMDILISD::ATOM_G_SUB_NORET";
 978     case AMDILISD::ATOM_G_RSUB_NORET: return "AMDILISD::ATOM_G_RSUB_NORET";
 979     case AMDILISD::ATOM_G_XCHG_NORET: return "AMDILISD::ATOM_G_XCHG_NORET";
 980     case AMDILISD::ATOM_G_XOR_NORET: return "AMDILISD::ATOM_G_XOR_NORET";
 981     case AMDILISD::ATOM_L_ADD: return "AMDILISD::ATOM_L_ADD";
 982     case AMDILISD::ATOM_L_AND: return "AMDILISD::ATOM_L_AND";
 983     case AMDILISD::ATOM_L_CMPXCHG: return "AMDILISD::ATOM_L_CMPXCHG";
 984     case AMDILISD::ATOM_L_DEC: return "AMDILISD::ATOM_L_DEC";
 985     case AMDILISD::ATOM_L_INC: return "AMDILISD::ATOM_L_INC";
 986     case AMDILISD::ATOM_L_MAX: return "AMDILISD::ATOM_L_MAX";
 987     case AMDILISD::ATOM_L_UMAX: return "AMDILISD::ATOM_L_UMAX";
 988     case AMDILISD::ATOM_L_MIN: return "AMDILISD::ATOM_L_MIN";
 989     case AMDILISD::ATOM_L_UMIN: return "AMDILISD::ATOM_L_UMIN";
 990     case AMDILISD::ATOM_L_OR: return "AMDILISD::ATOM_L_OR";
 991     case AMDILISD::ATOM_L_SUB: return "AMDILISD::ATOM_L_SUB";
 992     case AMDILISD::ATOM_L_RSUB: return "AMDILISD::ATOM_L_RSUB";
 993     case AMDILISD::ATOM_L_XCHG: return "AMDILISD::ATOM_L_XCHG";
 994     case AMDILISD::ATOM_L_XOR: return "AMDILISD::ATOM_L_XOR";
 995     case AMDILISD::ATOM_L_ADD_NORET: return "AMDILISD::ATOM_L_ADD_NORET";
 996     case AMDILISD::ATOM_L_AND_NORET: return "AMDILISD::ATOM_L_AND_NORET";
 997     case AMDILISD::ATOM_L_CMPXCHG_NORET: return "AMDILISD::ATOM_L_CMPXCHG_NORET";
 998     case AMDILISD::ATOM_L_DEC_NORET: return "AMDILISD::ATOM_L_DEC_NORET";
 999     case AMDILISD::ATOM_L_INC_NORET: return "AMDILISD::ATOM_L_INC_NORET";
1000     case AMDILISD::ATOM_L_MAX_NORET: return "AMDILISD::ATOM_L_MAX_NORET";
1001     case AMDILISD::ATOM_L_UMAX_NORET: return "AMDILISD::ATOM_L_UMAX_NORET";
1002     case AMDILISD::ATOM_L_MIN_NORET: return "AMDILISD::ATOM_L_MIN_NORET";
1003     case AMDILISD::ATOM_L_UMIN_NORET: return "AMDILISD::ATOM_L_UMIN_NORET";
1004     case AMDILISD::ATOM_L_OR_NORET: return "AMDILISD::ATOM_L_OR_NORET";
1005     case AMDILISD::ATOM_L_SUB_NORET: return "AMDILISD::ATOM_L_SUB_NORET";
1006     case AMDILISD::ATOM_L_RSUB_NORET: return "AMDILISD::ATOM_L_RSUB_NORET";
1007     case AMDILISD::ATOM_L_XCHG_NORET: return "AMDILISD::ATOM_L_XCHG_NORET";
1008     case AMDILISD::ATOM_R_ADD: return "AMDILISD::ATOM_R_ADD";
1009     case AMDILISD::ATOM_R_AND: return "AMDILISD::ATOM_R_AND";
1010     case AMDILISD::ATOM_R_CMPXCHG: return "AMDILISD::ATOM_R_CMPXCHG";
1011     case AMDILISD::ATOM_R_DEC: return "AMDILISD::ATOM_R_DEC";
1012     case AMDILISD::ATOM_R_INC: return "AMDILISD::ATOM_R_INC";
1013     case AMDILISD::ATOM_R_MAX: return "AMDILISD::ATOM_R_MAX";
1014     case AMDILISD::ATOM_R_UMAX: return "AMDILISD::ATOM_R_UMAX";
1015     case AMDILISD::ATOM_R_MIN: return "AMDILISD::ATOM_R_MIN";
1016     case AMDILISD::ATOM_R_UMIN: return "AMDILISD::ATOM_R_UMIN";
1017     case AMDILISD::ATOM_R_OR: return "AMDILISD::ATOM_R_OR";
1018     case AMDILISD::ATOM_R_MSKOR: return "AMDILISD::ATOM_R_MSKOR";
1019     case AMDILISD::ATOM_R_SUB: return "AMDILISD::ATOM_R_SUB";
1020     case AMDILISD::ATOM_R_RSUB: return "AMDILISD::ATOM_R_RSUB";
1021     case AMDILISD::ATOM_R_XCHG: return "AMDILISD::ATOM_R_XCHG";
1022     case AMDILISD::ATOM_R_XOR: return "AMDILISD::ATOM_R_XOR";
1023     case AMDILISD::ATOM_R_ADD_NORET: return "AMDILISD::ATOM_R_ADD_NORET";
1024     case AMDILISD::ATOM_R_AND_NORET: return "AMDILISD::ATOM_R_AND_NORET";
1025     case AMDILISD::ATOM_R_CMPXCHG_NORET: return "AMDILISD::ATOM_R_CMPXCHG_NORET";
1026     case AMDILISD::ATOM_R_DEC_NORET: return "AMDILISD::ATOM_R_DEC_NORET";
1027     case AMDILISD::ATOM_R_INC_NORET: return "AMDILISD::ATOM_R_INC_NORET";
1028     case AMDILISD::ATOM_R_MAX_NORET: return "AMDILISD::ATOM_R_MAX_NORET";
1029     case AMDILISD::ATOM_R_UMAX_NORET: return "AMDILISD::ATOM_R_UMAX_NORET";
1030     case AMDILISD::ATOM_R_MIN_NORET: return "AMDILISD::ATOM_R_MIN_NORET";
1031     case AMDILISD::ATOM_R_UMIN_NORET: return "AMDILISD::ATOM_R_UMIN_NORET";
1032     case AMDILISD::ATOM_R_OR_NORET: return "AMDILISD::ATOM_R_OR_NORET";
1033     case AMDILISD::ATOM_R_MSKOR_NORET: return "AMDILISD::ATOM_R_MSKOR_NORET";
1034     case AMDILISD::ATOM_R_SUB_NORET: return "AMDILISD::ATOM_R_SUB_NORET";
1035     case AMDILISD::ATOM_R_RSUB_NORET: return "AMDILISD::ATOM_R_RSUB_NORET";
1036     case AMDILISD::ATOM_R_XCHG_NORET: return "AMDILISD::ATOM_R_XCHG_NORET";
1037     case AMDILISD::ATOM_R_XOR_NORET: return "AMDILISD::ATOM_R_XOR_NORET";
1038     case AMDILISD::APPEND_ALLOC: return "AMDILISD::APPEND_ALLOC";
1039     case AMDILISD::APPEND_ALLOC_NORET: return "AMDILISD::APPEND_ALLOC_NORET";
1040     case AMDILISD::APPEND_CONSUME: return "AMDILISD::APPEND_CONSUME";
1041     case AMDILISD::APPEND_CONSUME_NORET: return "AMDILISD::APPEND_CONSUME_NORET";
1042     case AMDILISD::IMAGE2D_READ: return "AMDILISD::IMAGE2D_READ";
1043     case AMDILISD::IMAGE2D_WRITE: return "AMDILISD::IMAGE2D_WRITE";
1044     case AMDILISD::IMAGE2D_INFO0: return "AMDILISD::IMAGE2D_INFO0";
1045     case AMDILISD::IMAGE2D_INFO1: return "AMDILISD::IMAGE2D_INFO1";
1046     case AMDILISD::IMAGE3D_READ: return "AMDILISD::IMAGE3D_READ";
1047     case AMDILISD::IMAGE3D_WRITE: return "AMDILISD::IMAGE3D_WRITE";
1048     case AMDILISD::IMAGE3D_INFO0: return "AMDILISD::IMAGE3D_INFO0";
1049     case AMDILISD::IMAGE3D_INFO1: return "AMDILISD::IMAGE3D_INFO1";
1050
1051   };
1052 }
1053 bool
1054 AMDILTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
1055     const CallInst &I, unsigned Intrinsic) const
1056 {
1057   if (Intrinsic <= AMDGPUIntrinsic::last_non_AMDIL_intrinsic
1058       || Intrinsic > AMDGPUIntrinsic::num_AMDIL_intrinsics) {
1059     return false;
1060   }
1061   bool bitCastToInt = false;
1062   unsigned IntNo;
1063   bool isRet = true;
1064   const AMDILSubtarget *STM = &this->getTargetMachine()
1065     .getSubtarget<AMDILSubtarget>();
1066   switch (Intrinsic) {
1067     default: return false; // Don't custom lower most intrinsics.
1068     case AMDGPUIntrinsic::AMDIL_atomic_add_gi32:
1069     case AMDGPUIntrinsic::AMDIL_atomic_add_gu32:
1070              IntNo = AMDILISD::ATOM_G_ADD; break;
1071     case AMDGPUIntrinsic::AMDIL_atomic_add_gi32_noret:
1072     case AMDGPUIntrinsic::AMDIL_atomic_add_gu32_noret:
1073              isRet = false;
1074              IntNo = AMDILISD::ATOM_G_ADD_NORET; break;
1075     case AMDGPUIntrinsic::AMDIL_atomic_add_lu32:
1076     case AMDGPUIntrinsic::AMDIL_atomic_add_li32:
1077              IntNo = AMDILISD::ATOM_L_ADD; break;
1078     case AMDGPUIntrinsic::AMDIL_atomic_add_li32_noret:
1079     case AMDGPUIntrinsic::AMDIL_atomic_add_lu32_noret:
1080              isRet = false;
1081              IntNo = AMDILISD::ATOM_L_ADD_NORET; break;
1082     case AMDGPUIntrinsic::AMDIL_atomic_add_ru32:
1083     case AMDGPUIntrinsic::AMDIL_atomic_add_ri32:
1084              IntNo = AMDILISD::ATOM_R_ADD; break;
1085     case AMDGPUIntrinsic::AMDIL_atomic_add_ri32_noret:
1086     case AMDGPUIntrinsic::AMDIL_atomic_add_ru32_noret:
1087              isRet = false;
1088              IntNo = AMDILISD::ATOM_R_ADD_NORET; break;
1089     case AMDGPUIntrinsic::AMDIL_atomic_and_gi32:
1090     case AMDGPUIntrinsic::AMDIL_atomic_and_gu32:
1091              IntNo = AMDILISD::ATOM_G_AND; break;
1092     case AMDGPUIntrinsic::AMDIL_atomic_and_gi32_noret:
1093     case AMDGPUIntrinsic::AMDIL_atomic_and_gu32_noret:
1094              isRet = false;
1095              IntNo = AMDILISD::ATOM_G_AND_NORET; break;
1096     case AMDGPUIntrinsic::AMDIL_atomic_and_li32:
1097     case AMDGPUIntrinsic::AMDIL_atomic_and_lu32:
1098              IntNo = AMDILISD::ATOM_L_AND; break;
1099     case AMDGPUIntrinsic::AMDIL_atomic_and_li32_noret:
1100     case AMDGPUIntrinsic::AMDIL_atomic_and_lu32_noret:
1101              isRet = false;
1102              IntNo = AMDILISD::ATOM_L_AND_NORET; break;
1103     case AMDGPUIntrinsic::AMDIL_atomic_and_ri32:
1104     case AMDGPUIntrinsic::AMDIL_atomic_and_ru32:
1105              IntNo = AMDILISD::ATOM_R_AND; break;
1106     case AMDGPUIntrinsic::AMDIL_atomic_and_ri32_noret:
1107     case AMDGPUIntrinsic::AMDIL_atomic_and_ru32_noret:
1108              isRet = false;
1109              IntNo = AMDILISD::ATOM_R_AND_NORET; break;
1110     case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gi32:
1111     case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gu32:
1112              IntNo = AMDILISD::ATOM_G_CMPXCHG; break;
1113     case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gi32_noret:
1114     case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gu32_noret:
1115              isRet = false;
1116              IntNo = AMDILISD::ATOM_G_CMPXCHG_NORET; break;
1117     case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_li32:
1118     case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_lu32:
1119              IntNo = AMDILISD::ATOM_L_CMPXCHG; break;
1120     case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_li32_noret:
1121     case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_lu32_noret:
1122              isRet = false;
1123              IntNo = AMDILISD::ATOM_L_CMPXCHG_NORET; break;
1124     case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ri32:
1125     case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ru32:
1126              IntNo = AMDILISD::ATOM_R_CMPXCHG; break;
1127     case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ri32_noret:
1128     case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ru32_noret:
1129              isRet = false;
1130              IntNo = AMDILISD::ATOM_R_CMPXCHG_NORET; break;
1131     case AMDGPUIntrinsic::AMDIL_atomic_dec_gi32:
1132     case AMDGPUIntrinsic::AMDIL_atomic_dec_gu32:
1133              if (STM->calVersion() >= CAL_VERSION_SC_136) {
1134                IntNo = AMDILISD::ATOM_G_DEC;
1135              } else {
1136                IntNo = AMDILISD::ATOM_G_SUB;
1137              }
1138              break;
1139     case AMDGPUIntrinsic::AMDIL_atomic_dec_gi32_noret:
1140     case AMDGPUIntrinsic::AMDIL_atomic_dec_gu32_noret:
1141              isRet = false;
1142              if (STM->calVersion() >= CAL_VERSION_SC_136) {
1143                IntNo = AMDILISD::ATOM_G_DEC_NORET;
1144              } else {
1145                IntNo = AMDILISD::ATOM_G_SUB_NORET;
1146              }
1147              break;
1148     case AMDGPUIntrinsic::AMDIL_atomic_dec_li32:
1149     case AMDGPUIntrinsic::AMDIL_atomic_dec_lu32:
1150              if (STM->calVersion() >= CAL_VERSION_SC_136) {
1151                IntNo = AMDILISD::ATOM_L_DEC;
1152              } else {
1153                IntNo = AMDILISD::ATOM_L_SUB;
1154              }
1155              break;
1156     case AMDGPUIntrinsic::AMDIL_atomic_dec_li32_noret:
1157     case AMDGPUIntrinsic::AMDIL_atomic_dec_lu32_noret:
1158              isRet = false;
1159              if (STM->calVersion() >= CAL_VERSION_SC_136) {
1160                IntNo = AMDILISD::ATOM_L_DEC_NORET;
1161              } else {
1162                IntNo = AMDILISD::ATOM_L_SUB_NORET;
1163              }
1164              break;
1165     case AMDGPUIntrinsic::AMDIL_atomic_dec_ri32:
1166     case AMDGPUIntrinsic::AMDIL_atomic_dec_ru32:
1167              if (STM->calVersion() >= CAL_VERSION_SC_136) {
1168                IntNo = AMDILISD::ATOM_R_DEC;
1169              } else {
1170                IntNo = AMDILISD::ATOM_R_SUB;
1171              }
1172              break;
1173     case AMDGPUIntrinsic::AMDIL_atomic_dec_ri32_noret:
1174     case AMDGPUIntrinsic::AMDIL_atomic_dec_ru32_noret:
1175              isRet = false;
1176              if (STM->calVersion() >= CAL_VERSION_SC_136) {
1177                IntNo = AMDILISD::ATOM_R_DEC_NORET;
1178              } else {
1179                IntNo = AMDILISD::ATOM_R_SUB_NORET;
1180              }
1181              break;
1182     case AMDGPUIntrinsic::AMDIL_atomic_inc_gi32:
1183     case AMDGPUIntrinsic::AMDIL_atomic_inc_gu32:
1184              if (STM->calVersion() >= CAL_VERSION_SC_136) {
1185                IntNo = AMDILISD::ATOM_G_INC;
1186              } else {
1187                IntNo = AMDILISD::ATOM_G_ADD;
1188              }
1189              break;
1190     case AMDGPUIntrinsic::AMDIL_atomic_inc_gi32_noret:
1191     case AMDGPUIntrinsic::AMDIL_atomic_inc_gu32_noret:
1192              isRet = false;
1193              if (STM->calVersion() >= CAL_VERSION_SC_136) {
1194                IntNo = AMDILISD::ATOM_G_INC_NORET;
1195              } else {
1196                IntNo = AMDILISD::ATOM_G_ADD_NORET;
1197              }
1198              break;
1199     case AMDGPUIntrinsic::AMDIL_atomic_inc_li32:
1200     case AMDGPUIntrinsic::AMDIL_atomic_inc_lu32:
1201              if (STM->calVersion() >= CAL_VERSION_SC_136) {
1202                IntNo = AMDILISD::ATOM_L_INC;
1203              } else {
1204                IntNo = AMDILISD::ATOM_L_ADD;
1205              }
1206              break;
1207     case AMDGPUIntrinsic::AMDIL_atomic_inc_li32_noret:
1208     case AMDGPUIntrinsic::AMDIL_atomic_inc_lu32_noret:
1209              isRet = false;
1210              if (STM->calVersion() >= CAL_VERSION_SC_136) {
1211                IntNo = AMDILISD::ATOM_L_INC_NORET;
1212              } else {
1213                IntNo = AMDILISD::ATOM_L_ADD_NORET;
1214              }
1215              break;
1216     case AMDGPUIntrinsic::AMDIL_atomic_inc_ri32:
1217     case AMDGPUIntrinsic::AMDIL_atomic_inc_ru32:
1218              if (STM->calVersion() >= CAL_VERSION_SC_136) {
1219                IntNo = AMDILISD::ATOM_R_INC;
1220              } else {
1221                IntNo = AMDILISD::ATOM_R_ADD;
1222              }
1223              break;
1224     case AMDGPUIntrinsic::AMDIL_atomic_inc_ri32_noret:
1225     case AMDGPUIntrinsic::AMDIL_atomic_inc_ru32_noret:
1226              isRet = false;
1227              if (STM->calVersion() >= CAL_VERSION_SC_136) {
1228                IntNo = AMDILISD::ATOM_R_INC_NORET;
1229              } else {
1230                IntNo = AMDILISD::ATOM_R_ADD_NORET;
1231              }
1232              break;
1233     case AMDGPUIntrinsic::AMDIL_atomic_max_gi32:
1234              IntNo = AMDILISD::ATOM_G_MAX; break;
1235     case AMDGPUIntrinsic::AMDIL_atomic_max_gu32:
1236              IntNo = AMDILISD::ATOM_G_UMAX; break;
1237     case AMDGPUIntrinsic::AMDIL_atomic_max_gi32_noret:
1238              isRet = false;
1239              IntNo = AMDILISD::ATOM_G_MAX_NORET; break;
1240     case AMDGPUIntrinsic::AMDIL_atomic_max_gu32_noret:
1241              isRet = false;
1242              IntNo = AMDILISD::ATOM_G_UMAX_NORET; break;
1243     case AMDGPUIntrinsic::AMDIL_atomic_max_li32:
1244              IntNo = AMDILISD::ATOM_L_MAX; break;
1245     case AMDGPUIntrinsic::AMDIL_atomic_max_lu32:
1246              IntNo = AMDILISD::ATOM_L_UMAX; break;
1247     case AMDGPUIntrinsic::AMDIL_atomic_max_li32_noret:
1248              isRet = false;
1249              IntNo = AMDILISD::ATOM_L_MAX_NORET; break;
1250     case AMDGPUIntrinsic::AMDIL_atomic_max_lu32_noret:
1251              isRet = false;
1252              IntNo = AMDILISD::ATOM_L_UMAX_NORET; break;
1253     case AMDGPUIntrinsic::AMDIL_atomic_max_ri32:
1254              IntNo = AMDILISD::ATOM_R_MAX; break;
1255     case AMDGPUIntrinsic::AMDIL_atomic_max_ru32:
1256              IntNo = AMDILISD::ATOM_R_UMAX; break;
1257     case AMDGPUIntrinsic::AMDIL_atomic_max_ri32_noret:
1258              isRet = false;
1259              IntNo = AMDILISD::ATOM_R_MAX_NORET; break;
1260     case AMDGPUIntrinsic::AMDIL_atomic_max_ru32_noret:
1261              isRet = false;
1262              IntNo = AMDILISD::ATOM_R_UMAX_NORET; break;
1263     case AMDGPUIntrinsic::AMDIL_atomic_min_gi32:
1264              IntNo = AMDILISD::ATOM_G_MIN; break;
1265     case AMDGPUIntrinsic::AMDIL_atomic_min_gu32:
1266              IntNo = AMDILISD::ATOM_G_UMIN; break;
1267     case AMDGPUIntrinsic::AMDIL_atomic_min_gi32_noret:
1268              isRet = false;
1269              IntNo = AMDILISD::ATOM_G_MIN_NORET; break;
1270     case AMDGPUIntrinsic::AMDIL_atomic_min_gu32_noret:
1271              isRet = false;
1272              IntNo = AMDILISD::ATOM_G_UMIN_NORET; break;
1273     case AMDGPUIntrinsic::AMDIL_atomic_min_li32:
1274              IntNo = AMDILISD::ATOM_L_MIN; break;
1275     case AMDGPUIntrinsic::AMDIL_atomic_min_lu32:
1276              IntNo = AMDILISD::ATOM_L_UMIN; break;
1277     case AMDGPUIntrinsic::AMDIL_atomic_min_li32_noret:
1278              isRet = false;
1279              IntNo = AMDILISD::ATOM_L_MIN_NORET; break;
1280     case AMDGPUIntrinsic::AMDIL_atomic_min_lu32_noret:
1281              isRet = false;
1282              IntNo = AMDILISD::ATOM_L_UMIN_NORET; break;
1283     case AMDGPUIntrinsic::AMDIL_atomic_min_ri32:
1284              IntNo = AMDILISD::ATOM_R_MIN; break;
1285     case AMDGPUIntrinsic::AMDIL_atomic_min_ru32:
1286              IntNo = AMDILISD::ATOM_R_UMIN; break;
1287     case AMDGPUIntrinsic::AMDIL_atomic_min_ri32_noret:
1288              isRet = false;
1289              IntNo = AMDILISD::ATOM_R_MIN_NORET; break;
1290     case AMDGPUIntrinsic::AMDIL_atomic_min_ru32_noret:
1291              isRet = false;
1292              IntNo = AMDILISD::ATOM_R_UMIN_NORET; break;
1293     case AMDGPUIntrinsic::AMDIL_atomic_or_gi32:
1294     case AMDGPUIntrinsic::AMDIL_atomic_or_gu32:
1295              IntNo = AMDILISD::ATOM_G_OR; break;
1296     case AMDGPUIntrinsic::AMDIL_atomic_or_gi32_noret:
1297     case AMDGPUIntrinsic::AMDIL_atomic_or_gu32_noret:
1298              isRet = false;
1299              IntNo = AMDILISD::ATOM_G_OR_NORET; break;
1300     case AMDGPUIntrinsic::AMDIL_atomic_or_li32:
1301     case AMDGPUIntrinsic::AMDIL_atomic_or_lu32:
1302              IntNo = AMDILISD::ATOM_L_OR; break;
1303     case AMDGPUIntrinsic::AMDIL_atomic_or_li32_noret:
1304     case AMDGPUIntrinsic::AMDIL_atomic_or_lu32_noret:
1305              isRet = false;
1306              IntNo = AMDILISD::ATOM_L_OR_NORET; break;
1307     case AMDGPUIntrinsic::AMDIL_atomic_or_ri32:
1308     case AMDGPUIntrinsic::AMDIL_atomic_or_ru32:
1309              IntNo = AMDILISD::ATOM_R_OR; break;
1310     case AMDGPUIntrinsic::AMDIL_atomic_or_ri32_noret:
1311     case AMDGPUIntrinsic::AMDIL_atomic_or_ru32_noret:
1312              isRet = false;
1313              IntNo = AMDILISD::ATOM_R_OR_NORET; break;
1314     case AMDGPUIntrinsic::AMDIL_atomic_sub_gi32:
1315     case AMDGPUIntrinsic::AMDIL_atomic_sub_gu32:
1316              IntNo = AMDILISD::ATOM_G_SUB; break;
1317     case AMDGPUIntrinsic::AMDIL_atomic_sub_gi32_noret:
1318     case AMDGPUIntrinsic::AMDIL_atomic_sub_gu32_noret:
1319              isRet = false;
1320              IntNo = AMDILISD::ATOM_G_SUB_NORET; break;
1321     case AMDGPUIntrinsic::AMDIL_atomic_sub_li32:
1322     case AMDGPUIntrinsic::AMDIL_atomic_sub_lu32:
1323              IntNo = AMDILISD::ATOM_L_SUB; break;
1324     case AMDGPUIntrinsic::AMDIL_atomic_sub_li32_noret:
1325     case AMDGPUIntrinsic::AMDIL_atomic_sub_lu32_noret:
1326              isRet = false;
1327              IntNo = AMDILISD::ATOM_L_SUB_NORET; break;
1328     case AMDGPUIntrinsic::AMDIL_atomic_sub_ri32:
1329     case AMDGPUIntrinsic::AMDIL_atomic_sub_ru32:
1330              IntNo = AMDILISD::ATOM_R_SUB; break;
1331     case AMDGPUIntrinsic::AMDIL_atomic_sub_ri32_noret:
1332     case AMDGPUIntrinsic::AMDIL_atomic_sub_ru32_noret:
1333              isRet = false;
1334              IntNo = AMDILISD::ATOM_R_SUB_NORET; break;
1335     case AMDGPUIntrinsic::AMDIL_atomic_rsub_gi32:
1336     case AMDGPUIntrinsic::AMDIL_atomic_rsub_gu32:
1337              IntNo = AMDILISD::ATOM_G_RSUB; break;
1338     case AMDGPUIntrinsic::AMDIL_atomic_rsub_gi32_noret:
1339     case AMDGPUIntrinsic::AMDIL_atomic_rsub_gu32_noret:
1340              isRet = false;
1341              IntNo = AMDILISD::ATOM_G_RSUB_NORET; break;
1342     case AMDGPUIntrinsic::AMDIL_atomic_rsub_li32:
1343     case AMDGPUIntrinsic::AMDIL_atomic_rsub_lu32:
1344              IntNo = AMDILISD::ATOM_L_RSUB; break;
1345     case AMDGPUIntrinsic::AMDIL_atomic_rsub_li32_noret:
1346     case AMDGPUIntrinsic::AMDIL_atomic_rsub_lu32_noret:
1347              isRet = false;
1348              IntNo = AMDILISD::ATOM_L_RSUB_NORET; break;
1349     case AMDGPUIntrinsic::AMDIL_atomic_rsub_ri32:
1350     case AMDGPUIntrinsic::AMDIL_atomic_rsub_ru32:
1351              IntNo = AMDILISD::ATOM_R_RSUB; break;
1352     case AMDGPUIntrinsic::AMDIL_atomic_rsub_ri32_noret:
1353     case AMDGPUIntrinsic::AMDIL_atomic_rsub_ru32_noret:
1354              isRet = false;
1355              IntNo = AMDILISD::ATOM_R_RSUB_NORET; break;
1356     case AMDGPUIntrinsic::AMDIL_atomic_xchg_gf32:
1357              bitCastToInt = true;
1358     case AMDGPUIntrinsic::AMDIL_atomic_xchg_gi32:
1359     case AMDGPUIntrinsic::AMDIL_atomic_xchg_gu32:
1360              IntNo = AMDILISD::ATOM_G_XCHG; break;
1361     case AMDGPUIntrinsic::AMDIL_atomic_xchg_gf32_noret:
1362              bitCastToInt = true;
1363     case AMDGPUIntrinsic::AMDIL_atomic_xchg_gi32_noret:
1364     case AMDGPUIntrinsic::AMDIL_atomic_xchg_gu32_noret:
1365              isRet = false;
1366              IntNo = AMDILISD::ATOM_G_XCHG_NORET; break;
1367     case AMDGPUIntrinsic::AMDIL_atomic_xchg_lf32:
1368              bitCastToInt = true;
1369     case AMDGPUIntrinsic::AMDIL_atomic_xchg_li32:
1370     case AMDGPUIntrinsic::AMDIL_atomic_xchg_lu32:
1371              IntNo = AMDILISD::ATOM_L_XCHG; break;
1372     case AMDGPUIntrinsic::AMDIL_atomic_xchg_lf32_noret:
1373              bitCastToInt = true;
1374     case AMDGPUIntrinsic::AMDIL_atomic_xchg_li32_noret:
1375     case AMDGPUIntrinsic::AMDIL_atomic_xchg_lu32_noret:
1376              isRet = false;
1377              IntNo = AMDILISD::ATOM_L_XCHG_NORET; break;
1378     case AMDGPUIntrinsic::AMDIL_atomic_xchg_rf32:
1379              bitCastToInt = true;
1380     case AMDGPUIntrinsic::AMDIL_atomic_xchg_ri32:
1381     case AMDGPUIntrinsic::AMDIL_atomic_xchg_ru32:
1382              IntNo = AMDILISD::ATOM_R_XCHG; break;
1383     case AMDGPUIntrinsic::AMDIL_atomic_xchg_rf32_noret:
1384              bitCastToInt = true;
1385     case AMDGPUIntrinsic::AMDIL_atomic_xchg_ri32_noret:
1386     case AMDGPUIntrinsic::AMDIL_atomic_xchg_ru32_noret:
1387              isRet = false;
1388              IntNo = AMDILISD::ATOM_R_XCHG_NORET; break;
1389     case AMDGPUIntrinsic::AMDIL_atomic_xor_gi32:
1390     case AMDGPUIntrinsic::AMDIL_atomic_xor_gu32:
1391              IntNo = AMDILISD::ATOM_G_XOR; break;
1392     case AMDGPUIntrinsic::AMDIL_atomic_xor_gi32_noret:
1393     case AMDGPUIntrinsic::AMDIL_atomic_xor_gu32_noret:
1394              isRet = false;
1395              IntNo = AMDILISD::ATOM_G_XOR_NORET; break;
1396     case AMDGPUIntrinsic::AMDIL_atomic_xor_li32:
1397     case AMDGPUIntrinsic::AMDIL_atomic_xor_lu32:
1398              IntNo = AMDILISD::ATOM_L_XOR; break;
1399     case AMDGPUIntrinsic::AMDIL_atomic_xor_li32_noret:
1400     case AMDGPUIntrinsic::AMDIL_atomic_xor_lu32_noret:
1401              isRet = false;
1402              IntNo = AMDILISD::ATOM_L_XOR_NORET; break;
1403     case AMDGPUIntrinsic::AMDIL_atomic_xor_ri32:
1404     case AMDGPUIntrinsic::AMDIL_atomic_xor_ru32:
1405              IntNo = AMDILISD::ATOM_R_XOR; break;
1406     case AMDGPUIntrinsic::AMDIL_atomic_xor_ri32_noret:
1407     case AMDGPUIntrinsic::AMDIL_atomic_xor_ru32_noret:
1408              isRet = false;
1409              IntNo = AMDILISD::ATOM_R_XOR_NORET; break;
1410     case AMDGPUIntrinsic::AMDIL_append_alloc_i32:
1411              IntNo = AMDILISD::APPEND_ALLOC; break;
1412     case AMDGPUIntrinsic::AMDIL_append_alloc_i32_noret:
1413              isRet = false;
1414              IntNo = AMDILISD::APPEND_ALLOC_NORET; break;
1415     case AMDGPUIntrinsic::AMDIL_append_consume_i32:
1416              IntNo = AMDILISD::APPEND_CONSUME; break;
1417     case AMDGPUIntrinsic::AMDIL_append_consume_i32_noret:
1418              isRet = false;
1419              IntNo = AMDILISD::APPEND_CONSUME_NORET; break;
1420   };
1421
1422   Info.opc = IntNo;
1423   Info.memVT = (bitCastToInt) ? MVT::f32 : MVT::i32;
1424   Info.ptrVal = I.getOperand(0);
1425   Info.offset = 0;
1426   Info.align = 4;
1427   Info.vol = true;
1428   Info.readMem = isRet;
1429   Info.writeMem = true;
1430   return true;
1431 }
1432 // The backend supports 32 and 64 bit floating point immediates
1433 bool
1434 AMDILTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const
1435 {
1436   if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
1437       || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
1438     return true;
1439   } else {
1440     return false;
1441   }
1442 }
1443
1444 bool
1445 AMDILTargetLowering::ShouldShrinkFPConstant(EVT VT) const
1446 {
1447   if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
1448       || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
1449     return false;
1450   } else {
1451     return true;
1452   }
1453 }
1454
1455
1456 // isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
1457 // be zero. Op is expected to be a target specific node. Used by DAG
1458 // combiner.
1459
1460 void
1461 AMDILTargetLowering::computeMaskedBitsForTargetNode(
1462     const SDValue Op,
1463     APInt &KnownZero,
1464     APInt &KnownOne,
1465     const SelectionDAG &DAG,
1466     unsigned Depth) const
1467 {
1468   APInt KnownZero2;
1469   APInt KnownOne2;
1470   KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything
1471   switch (Op.getOpcode()) {
1472     default: break;
1473     case AMDILISD::SELECT_CC:
1474              DAG.ComputeMaskedBits(
1475                  Op.getOperand(1),
1476                  KnownZero,
1477                  KnownOne,
1478                  Depth + 1
1479                  );
1480              DAG.ComputeMaskedBits(
1481                  Op.getOperand(0),
1482                  KnownZero2,
1483                  KnownOne2
1484                  );
1485              assert((KnownZero & KnownOne) == 0
1486                  && "Bits known to be one AND zero?");
1487              assert((KnownZero2 & KnownOne2) == 0
1488                  && "Bits known to be one AND zero?");
1489              // Only known if known in both the LHS and RHS
1490              KnownOne &= KnownOne2;
1491              KnownZero &= KnownZero2;
1492              break;
1493   };
1494 }
1495
1496 // This is the function that determines which calling convention should
1497 // be used. Currently there is only one calling convention
1498 CCAssignFn*
1499 AMDILTargetLowering::CCAssignFnForNode(unsigned int Op) const
1500 {
1501   //uint64_t CC = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
1502   return CC_AMDIL32;
1503 }
1504
1505 // LowerCallResult - Lower the result values of an ISD::CALL into the
1506 // appropriate copies out of appropriate physical registers.  This assumes that
1507 // Chain/InFlag are the input chain/flag to use, and that TheCall is the call
1508 // being lowered.  The returns a SDNode with the same number of values as the
1509 // ISD::CALL.
1510 SDValue
1511 AMDILTargetLowering::LowerCallResult(
1512     SDValue Chain,
1513     SDValue InFlag,
1514     CallingConv::ID CallConv,
1515     bool isVarArg,
1516     const SmallVectorImpl<ISD::InputArg> &Ins,
1517     DebugLoc dl,
1518     SelectionDAG &DAG,
1519     SmallVectorImpl<SDValue> &InVals) const
1520 {
1521   // Assign locations to each value returned by this call
1522   SmallVector<CCValAssign, 16> RVLocs;
1523   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1524                  getTargetMachine(), RVLocs, *DAG.getContext());
1525   CCInfo.AnalyzeCallResult(Ins, RetCC_AMDIL32);
1526
1527   // Copy all of the result registers out of their specified physreg.
1528   for (unsigned i = 0; i != RVLocs.size(); ++i) {
1529     EVT CopyVT = RVLocs[i].getValVT();
1530     if (RVLocs[i].isRegLoc()) {
1531       Chain = DAG.getCopyFromReg(
1532           Chain,
1533           dl,
1534           RVLocs[i].getLocReg(),
1535           CopyVT,
1536           InFlag
1537           ).getValue(1);
1538       SDValue Val = Chain.getValue(0);
1539       InFlag = Chain.getValue(2);
1540       InVals.push_back(Val);
1541     }
1542   }
1543
1544   return Chain;
1545
1546 }
1547
1548 //===----------------------------------------------------------------------===//
1549 //                           Other Lowering Hooks
1550 //===----------------------------------------------------------------------===//
1551
1552 // Recursively assign SDNodeOrdering to any unordered nodes
1553 // This is necessary to maintain source ordering of instructions
1554 // under -O0 to avoid odd-looking "skipping around" issues.
1555   static const SDValue
1556 Ordered( SelectionDAG &DAG, unsigned order, const SDValue New )
1557 {
1558   if (order != 0 && DAG.GetOrdering( New.getNode() ) == 0) {
1559     DAG.AssignOrdering( New.getNode(), order );
1560     for (unsigned i = 0, e = New.getNumOperands(); i < e; ++i)
1561       Ordered( DAG, order, New.getOperand(i) );
1562   }
1563   return New;
1564 }
1565
1566 #define LOWER(A) \
1567   case ISD:: A: \
1568 return Ordered( DAG, DAG.GetOrdering( Op.getNode() ), Lower##A(Op, DAG) )
1569
1570 SDValue
1571 AMDILTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
1572 {
1573   switch (Op.getOpcode()) {
1574     default:
1575       Op.getNode()->dump();
1576       assert(0 && "Custom lowering code for this"
1577           "instruction is not implemented yet!");
1578       break;
1579       LOWER(GlobalAddress);
1580       LOWER(JumpTable);
1581       LOWER(ConstantPool);
1582       LOWER(ExternalSymbol);
1583       LOWER(FP_TO_SINT);
1584       LOWER(FP_TO_UINT);
1585       LOWER(SINT_TO_FP);
1586       LOWER(UINT_TO_FP);
1587       LOWER(ADD);
1588       LOWER(MUL);
1589       LOWER(SUB);
1590       LOWER(FDIV);
1591       LOWER(SDIV);
1592       LOWER(SREM);
1593       LOWER(UDIV);
1594       LOWER(UREM);
1595       LOWER(BUILD_VECTOR);
1596       LOWER(INSERT_VECTOR_ELT);
1597       LOWER(EXTRACT_VECTOR_ELT);
1598       LOWER(EXTRACT_SUBVECTOR);
1599       LOWER(SCALAR_TO_VECTOR);
1600       LOWER(CONCAT_VECTORS);
1601       LOWER(SELECT);
1602       LOWER(SETCC);
1603       LOWER(SIGN_EXTEND_INREG);
1604       LOWER(BITCAST);
1605       LOWER(DYNAMIC_STACKALLOC);
1606       LOWER(BRCOND);
1607       LOWER(BR_CC);
1608       LOWER(FP_ROUND);
1609   }
1610   return Op;
1611 }
1612
1613 int
1614 AMDILTargetLowering::getVarArgsFrameOffset() const
1615 {
1616   return VarArgsFrameOffset;
1617 }
1618 #undef LOWER
1619
1620 SDValue
1621 AMDILTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const
1622 {
1623   SDValue DST = Op;
1624   const GlobalAddressSDNode *GADN = cast<GlobalAddressSDNode>(Op);
1625   const GlobalValue *G = GADN->getGlobal();
1626   DebugLoc DL = Op.getDebugLoc();
1627   const GlobalVariable *GV = dyn_cast<GlobalVariable>(G);
1628   if (!GV) {
1629     DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
1630   } else {
1631     if (GV->hasInitializer()) {
1632       const Constant *C = dyn_cast<Constant>(GV->getInitializer());
1633       if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
1634         DST = DAG.getConstant(CI->getValue(), Op.getValueType());
1635       } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(C)) {
1636         DST = DAG.getConstantFP(CF->getValueAPF(),
1637             Op.getValueType());
1638       } else if (dyn_cast<ConstantAggregateZero>(C)) {
1639         EVT VT = Op.getValueType();
1640         if (VT.isInteger()) {
1641           DST = DAG.getConstant(0, VT);
1642         } else {
1643           DST = DAG.getConstantFP(0, VT);
1644         }
1645       } else {
1646         assert(!"lowering this type of Global Address "
1647             "not implemented yet!");
1648         C->dump();
1649         DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
1650       }
1651     } else {
1652       DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
1653     }
1654   }
1655   return DST;
1656 }
1657
1658 SDValue
1659 AMDILTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const
1660 {
1661   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
1662   SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32);
1663   return Result;
1664 }
1665 SDValue
1666 AMDILTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
1667 {
1668   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
1669   EVT PtrVT = Op.getValueType();
1670   SDValue Result;
1671   if (CP->isMachineConstantPoolEntry()) {
1672     Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
1673         CP->getAlignment(), CP->getOffset(), CP->getTargetFlags());
1674   } else {
1675     Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
1676         CP->getAlignment(), CP->getOffset(), CP->getTargetFlags());
1677   }
1678   return Result;
1679 }
1680
1681 SDValue
1682 AMDILTargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const
1683 {
1684   const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
1685   SDValue Result = DAG.getTargetExternalSymbol(Sym, MVT::i32);
1686   return Result;
1687 }
1688
1689 /// LowerFORMAL_ARGUMENTS - transform physical registers into
1690 /// virtual registers and generate load operations for
1691 /// arguments places on the stack.
1692 /// TODO: isVarArg, hasStructRet, isMemReg
1693   SDValue
1694 AMDILTargetLowering::LowerFormalArguments(SDValue Chain,
1695     CallingConv::ID CallConv,
1696     bool isVarArg,
1697     const SmallVectorImpl<ISD::InputArg> &Ins,
1698     DebugLoc dl,
1699     SelectionDAG &DAG,
1700     SmallVectorImpl<SDValue> &InVals)
1701 const
1702 {
1703
1704   MachineFunction &MF = DAG.getMachineFunction();
1705   MachineFrameInfo *MFI = MF.getFrameInfo();
1706   //const Function *Fn = MF.getFunction();
1707   //MachineRegisterInfo &RegInfo = MF.getRegInfo();
1708
1709   SmallVector<CCValAssign, 16> ArgLocs;
1710   CallingConv::ID CC = MF.getFunction()->getCallingConv();
1711   //bool hasStructRet = MF.getFunction()->hasStructRetAttr();
1712
1713   CCState CCInfo(CC, isVarArg, DAG.getMachineFunction(),
1714                  getTargetMachine(), ArgLocs, *DAG.getContext());
1715
1716   // When more calling conventions are added, they need to be chosen here
1717   CCInfo.AnalyzeFormalArguments(Ins, CC_AMDIL32);
1718   SDValue StackPtr;
1719
1720   //unsigned int FirstStackArgLoc = 0;
1721
1722   for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) {
1723     CCValAssign &VA = ArgLocs[i];
1724     if (VA.isRegLoc()) {
1725       EVT RegVT = VA.getLocVT();
1726       const TargetRegisterClass *RC = getRegClassFromType(
1727           RegVT.getSimpleVT().SimpleTy);
1728
1729       unsigned int Reg = MF.addLiveIn(VA.getLocReg(), RC);
1730       SDValue ArgValue = DAG.getCopyFromReg(
1731           Chain,
1732           dl,
1733           Reg,
1734           RegVT);
1735       // If this is an 8 or 16-bit value, it is really passed
1736       // promoted to 32 bits.  Insert an assert[sz]ext to capture
1737       // this, then truncate to the right size.
1738
1739       if (VA.getLocInfo() == CCValAssign::SExt) {
1740         ArgValue = DAG.getNode(
1741             ISD::AssertSext,
1742             dl,
1743             RegVT,
1744             ArgValue,
1745             DAG.getValueType(VA.getValVT()));
1746       } else if (VA.getLocInfo() == CCValAssign::ZExt) {
1747         ArgValue = DAG.getNode(
1748             ISD::AssertZext,
1749             dl,
1750             RegVT,
1751             ArgValue,
1752             DAG.getValueType(VA.getValVT()));
1753       }
1754       if (VA.getLocInfo() != CCValAssign::Full) {
1755         ArgValue = DAG.getNode(
1756             ISD::TRUNCATE,
1757             dl,
1758             VA.getValVT(),
1759             ArgValue);
1760       }
1761       // Add the value to the list of arguments
1762       // to be passed in registers
1763       InVals.push_back(ArgValue);
1764       if (isVarArg) {
1765         assert(0 && "Variable arguments are not yet supported");
1766         // See MipsISelLowering.cpp for ideas on how to implement
1767       }
1768     } else if(VA.isMemLoc()) {
1769       InVals.push_back(LowerMemArgument(Chain, CallConv, Ins,
1770             dl, DAG, VA, MFI, i));
1771     } else {
1772       assert(0 && "found a Value Assign that is "
1773           "neither a register or a memory location");
1774     }
1775   }
1776   /*if (hasStructRet) {
1777     assert(0 && "Has struct return is not yet implemented");
1778   // See MipsISelLowering.cpp for ideas on how to implement
1779   }*/
1780
1781   if (isVarArg) {
1782     assert(0 && "Variable arguments are not yet supported");
1783     // See X86/PPC/CellSPU ISelLowering.cpp for ideas on how to implement
1784   }
1785   // This needs to be changed to non-zero if the return function needs
1786   // to pop bytes
1787   return Chain;
1788 }
1789 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
1790 /// by "Src" to address "Dst" with size and alignment information specified by
1791 /// the specific parameter attribute. The copy will be passed as a byval
1792 /// function parameter.
1793 static SDValue
1794 CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
1795     ISD::ArgFlagsTy Flags, SelectionDAG &DAG) {
1796   assert(0 && "MemCopy does not exist yet");
1797   SDValue SizeNode     = DAG.getConstant(Flags.getByValSize(), MVT::i32);
1798
1799   return DAG.getMemcpy(Chain,
1800       Src.getDebugLoc(),
1801       Dst, Src, SizeNode, Flags.getByValAlign(),
1802       /*IsVol=*/false, /*AlwaysInline=*/true,
1803       MachinePointerInfo(), MachinePointerInfo());
1804 }
1805
1806 SDValue
1807 AMDILTargetLowering::LowerMemOpCallTo(SDValue Chain,
1808     SDValue StackPtr, SDValue Arg,
1809     DebugLoc dl, SelectionDAG &DAG,
1810     const CCValAssign &VA,
1811     ISD::ArgFlagsTy Flags) const
1812 {
1813   unsigned int LocMemOffset = VA.getLocMemOffset();
1814   SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
1815   PtrOff = DAG.getNode(ISD::ADD,
1816       dl,
1817       getPointerTy(), StackPtr, PtrOff);
1818   if (Flags.isByVal()) {
1819     PtrOff = CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG);
1820   } else {
1821     PtrOff = DAG.getStore(Chain, dl, Arg, PtrOff,
1822         MachinePointerInfo::getStack(LocMemOffset),
1823         false, false, 0);
1824   }
1825   return PtrOff;
1826 }
1827 /// LowerCAL - functions arguments are copied from virtual
1828 /// regs to (physical regs)/(stack frame), CALLSEQ_START and
1829 /// CALLSEQ_END are emitted.
1830 /// TODO: isVarArg, isTailCall, hasStructRet
1831 SDValue
1832 AMDILTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
1833     CallingConv::ID CallConv, bool isVarArg, bool doesNotRet,
1834     bool& isTailCall,
1835     const SmallVectorImpl<ISD::OutputArg> &Outs,
1836     const SmallVectorImpl<SDValue> &OutVals,
1837     const SmallVectorImpl<ISD::InputArg> &Ins,
1838     DebugLoc dl, SelectionDAG &DAG,
1839     SmallVectorImpl<SDValue> &InVals)
1840 const
1841 {
1842   isTailCall = false;
1843   MachineFunction& MF = DAG.getMachineFunction();
1844   // FIXME: DO we need to handle fast calling conventions and tail call
1845   // optimizations?? X86/PPC ISelLowering
1846   /*bool hasStructRet = (TheCall->getNumArgs())
1847     ? TheCall->getArgFlags(0).device()->isSRet()
1848     : false;*/
1849
1850   MachineFrameInfo *MFI = MF.getFrameInfo();
1851
1852   // Analyze operands of the call, assigning locations to each operand
1853   SmallVector<CCValAssign, 16> ArgLocs;
1854   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1855                  getTargetMachine(), ArgLocs, *DAG.getContext());
1856   // Analyize the calling operands, but need to change
1857   // if we have more than one calling convetion
1858   CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv));
1859
1860   unsigned int NumBytes = CCInfo.getNextStackOffset();
1861   if (isTailCall) {
1862     assert(isTailCall && "Tail Call not handled yet!");
1863     // See X86/PPC ISelLowering
1864   }
1865
1866   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
1867
1868   SmallVector<std::pair<unsigned int, SDValue>, 8> RegsToPass;
1869   SmallVector<SDValue, 8> MemOpChains;
1870   SDValue StackPtr;
1871   //unsigned int FirstStacArgLoc = 0;
1872   //int LastArgStackLoc = 0;
1873
1874   // Walk the register/memloc assignments, insert copies/loads
1875   for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) {
1876     CCValAssign &VA = ArgLocs[i];
1877     //bool isByVal = Flags.isByVal(); // handle byval/bypointer registers
1878     // Arguments start after the 5 first operands of ISD::CALL
1879     SDValue Arg = OutVals[i];
1880     //Promote the value if needed
1881     switch(VA.getLocInfo()) {
1882       default: assert(0 && "Unknown loc info!");
1883       case CCValAssign::Full:
1884                break;
1885       case CCValAssign::SExt:
1886                Arg = DAG.getNode(ISD::SIGN_EXTEND,
1887                    dl,
1888                    VA.getLocVT(), Arg);
1889                break;
1890       case CCValAssign::ZExt:
1891                Arg = DAG.getNode(ISD::ZERO_EXTEND,
1892                    dl,
1893                    VA.getLocVT(), Arg);
1894                break;
1895       case CCValAssign::AExt:
1896                Arg = DAG.getNode(ISD::ANY_EXTEND,
1897                    dl,
1898                    VA.getLocVT(), Arg);
1899                break;
1900     }
1901
1902     if (VA.isRegLoc()) {
1903       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1904     } else if (VA.isMemLoc()) {
1905       // Create the frame index object for this incoming parameter
1906       int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
1907           VA.getLocMemOffset(), true);
1908       SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy());
1909
1910       // emit ISD::STORE whichs stores the
1911       // parameter value to a stack Location
1912       MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
1913             MachinePointerInfo::getFixedStack(FI),
1914             false, false, 0));
1915     } else {
1916       assert(0 && "Not a Reg/Mem Loc, major error!");
1917     }
1918   }
1919   if (!MemOpChains.empty()) {
1920     Chain = DAG.getNode(ISD::TokenFactor,
1921         dl,
1922         MVT::Other,
1923         &MemOpChains[0],
1924         MemOpChains.size());
1925   }
1926   SDValue InFlag;
1927   if (!isTailCall) {
1928     for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) {
1929       Chain = DAG.getCopyToReg(Chain,
1930           dl,
1931           RegsToPass[i].first,
1932           RegsToPass[i].second,
1933           InFlag);
1934       InFlag = Chain.getValue(1);
1935     }
1936   }
1937
1938   // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
1939   // every direct call is) turn it into a TargetGlobalAddress/
1940   // TargetExternalSymbol
1941   // node so that legalize doesn't hack it.
1942   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))  {
1943     Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy());
1944   }
1945   else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1946     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
1947   }
1948   else if (isTailCall) {
1949     assert(0 && "Tail calls are not handled yet");
1950     // see X86 ISelLowering for ideas on implementation: 1708
1951   }
1952
1953   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVTGLUE);
1954   SmallVector<SDValue, 8> Ops;
1955
1956   if (isTailCall) {
1957     assert(0 && "Tail calls are not handled yet");
1958     // see X86 ISelLowering for ideas on implementation: 1721
1959   }
1960   // If this is a direct call, pass the chain and the callee
1961   if (Callee.getNode()) {
1962     Ops.push_back(Chain);
1963     Ops.push_back(Callee);
1964   }
1965
1966   if (isTailCall) {
1967     assert(0 && "Tail calls are not handled yet");
1968     // see X86 ISelLowering for ideas on implementation: 1739
1969   }
1970
1971   // Add argument registers to the end of the list so that they are known
1972   // live into the call
1973   for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) {
1974     Ops.push_back(DAG.getRegister(
1975           RegsToPass[i].first,
1976           RegsToPass[i].second.getValueType()));
1977   }
1978   if (InFlag.getNode()) {
1979     Ops.push_back(InFlag);
1980   }
1981
1982   // Emit Tail Call
1983   if (isTailCall) {
1984     assert(0 && "Tail calls are not handled yet");
1985     // see X86 ISelLowering for ideas on implementation: 1762
1986   }
1987
1988   Chain = DAG.getNode(AMDILISD::CALL,
1989       dl,
1990       NodeTys, &Ops[0], Ops.size());
1991   InFlag = Chain.getValue(1);
1992
1993   // Create the CALLSEQ_END node
1994   Chain = DAG.getCALLSEQ_END(
1995       Chain,
1996       DAG.getIntPtrConstant(NumBytes, true),
1997       DAG.getIntPtrConstant(0, true),
1998       InFlag);
1999   InFlag = Chain.getValue(1);
2000   // Handle result values, copying them out of physregs into vregs that
2001   // we return
2002   return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
2003       InVals);
2004 }
2005 static void checkMADType(
2006     SDValue Op, const AMDILSubtarget *STM, bool& is24bitMAD, bool& is32bitMAD)
2007 {
2008   bool globalLoadStore = false;
2009   is24bitMAD = false;
2010   is32bitMAD = false;
2011   return;
2012   assert(Op.getOpcode() == ISD::ADD && "The opcode must be a add in order for "
2013       "this to work correctly!");
2014   if (Op.getNode()->use_empty()) {
2015     return;
2016   }
2017   for (SDNode::use_iterator nBegin = Op.getNode()->use_begin(),
2018       nEnd = Op.getNode()->use_end(); nBegin != nEnd; ++nBegin) {
2019     SDNode *ptr = *nBegin;
2020     const LSBaseSDNode *lsNode = dyn_cast<LSBaseSDNode>(ptr);
2021     // If we are not a LSBaseSDNode then we don't do this
2022     // optimization.
2023     // If we are a LSBaseSDNode, but the op is not the offset
2024     // or base pointer, then we don't do this optimization
2025     // (i.e. we are the value being stored)
2026     if (!lsNode ||
2027         (lsNode->writeMem() && lsNode->getOperand(1) == Op)) {
2028       return;
2029     }
2030     const PointerType *PT =
2031       dyn_cast<PointerType>(lsNode->getSrcValue()->getType());
2032     unsigned as = PT->getAddressSpace();
2033     switch(as) {
2034       default:
2035         globalLoadStore = true;
2036       case AMDILAS::PRIVATE_ADDRESS:
2037         if (!STM->device()->usesHardware(AMDILDeviceInfo::PrivateMem)) {
2038           globalLoadStore = true;
2039         }
2040         break;
2041       case AMDILAS::CONSTANT_ADDRESS:
2042         if (!STM->device()->usesHardware(AMDILDeviceInfo::ConstantMem)) {
2043           globalLoadStore = true;
2044         }
2045         break;
2046       case AMDILAS::LOCAL_ADDRESS:
2047         if (!STM->device()->usesHardware(AMDILDeviceInfo::LocalMem)) {
2048           globalLoadStore = true;
2049         }
2050         break;
2051       case AMDILAS::REGION_ADDRESS:
2052         if (!STM->device()->usesHardware(AMDILDeviceInfo::RegionMem)) {
2053           globalLoadStore = true;
2054         }
2055         break;
2056     }
2057   }
2058   if (globalLoadStore) {
2059     is32bitMAD = true;
2060   } else {
2061     is24bitMAD = true;
2062   }
2063 }
2064
2065 SDValue
2066 AMDILTargetLowering::LowerADD(SDValue Op, SelectionDAG &DAG) const
2067 {
2068   SDValue LHS = Op.getOperand(0);
2069   SDValue RHS = Op.getOperand(1);
2070   DebugLoc DL = Op.getDebugLoc();
2071   EVT OVT = Op.getValueType();
2072   SDValue DST;
2073   const AMDILSubtarget *stm = &this->getTargetMachine()
2074     .getSubtarget<AMDILSubtarget>();
2075   bool isVec = OVT.isVector();
2076   if (OVT.getScalarType() == MVT::i64) {
2077     MVT INTTY = MVT::i32;
2078     if (OVT == MVT::v2i64) {
2079       INTTY = MVT::v2i32;
2080     }
2081     if (stm->device()->usesHardware(AMDILDeviceInfo::LongOps)
2082         && INTTY == MVT::i32) {
2083       DST = DAG.getNode(AMDILISD::ADD,
2084           DL,
2085           OVT,
2086           LHS, RHS);
2087     } else {
2088       SDValue LHSLO, LHSHI, RHSLO, RHSHI, INTLO, INTHI;
2089       // TODO: need to turn this into a bitcast of i64/v2i64 to v2i32/v4i32
2090       LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, LHS);
2091       RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, RHS);
2092       LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, LHS);
2093       RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, RHS);
2094       INTLO = DAG.getNode(ISD::ADD, DL, INTTY, LHSLO, RHSLO);
2095       INTHI = DAG.getNode(ISD::ADD, DL, INTTY, LHSHI, RHSHI);
2096       SDValue cmp;
2097       cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
2098           DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
2099           INTLO, RHSLO);
2100       cmp = DAG.getNode(AMDILISD::INEGATE, DL, INTTY, cmp);
2101       INTHI = DAG.getNode(ISD::ADD, DL, INTTY, INTHI, cmp);
2102       DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, OVT,
2103           INTLO, INTHI);
2104     }
2105   } else {
2106     if (LHS.getOpcode() == ISD::FrameIndex ||
2107         RHS.getOpcode() == ISD::FrameIndex) {
2108       DST = DAG.getNode(AMDILISD::ADDADDR,
2109           DL,
2110           OVT,
2111           LHS, RHS);
2112     } else {
2113       if (stm->device()->usesHardware(AMDILDeviceInfo::LocalMem)
2114           && LHS.getNumOperands()
2115           && RHS.getNumOperands()) {
2116         bool is24bitMAD = false;
2117         bool is32bitMAD = false;
2118         const ConstantSDNode *LHSConstOpCode =
2119           dyn_cast<ConstantSDNode>(LHS.getOperand(LHS.getNumOperands()-1));
2120         const ConstantSDNode *RHSConstOpCode =
2121           dyn_cast<ConstantSDNode>(RHS.getOperand(RHS.getNumOperands()-1));
2122         if ((LHS.getOpcode() == ISD::SHL && LHSConstOpCode)
2123             || (RHS.getOpcode() == ISD::SHL && RHSConstOpCode)
2124             || LHS.getOpcode() == ISD::MUL
2125             || RHS.getOpcode() == ISD::MUL) {
2126           SDValue Op1, Op2, Op3;
2127           // FIXME: Fix this so that it works for unsigned 24bit ops.
2128           if (LHS.getOpcode() == ISD::MUL) {
2129             Op1 = LHS.getOperand(0);
2130             Op2 = LHS.getOperand(1);
2131             Op3 = RHS;
2132           } else if (RHS.getOpcode() == ISD::MUL) {
2133             Op1 = RHS.getOperand(0);
2134             Op2 = RHS.getOperand(1);
2135             Op3 = LHS;
2136           } else if (LHS.getOpcode() == ISD::SHL && LHSConstOpCode) {
2137             Op1 = LHS.getOperand(0);
2138             Op2 = DAG.getConstant(
2139                 1 << LHSConstOpCode->getZExtValue(), MVT::i32);
2140             Op3 = RHS;
2141           } else if (RHS.getOpcode() == ISD::SHL && RHSConstOpCode) {
2142             Op1 = RHS.getOperand(0);
2143             Op2 = DAG.getConstant(
2144                 1 << RHSConstOpCode->getZExtValue(), MVT::i32);
2145             Op3 = LHS;
2146           }
2147           checkMADType(Op, stm, is24bitMAD, is32bitMAD);
2148           // We can possibly do a MAD transform!
2149           if (is24bitMAD && stm->device()->usesHardware(AMDILDeviceInfo::Signed24BitOps)) {
2150             uint32_t opcode = AMDGPUIntrinsic::AMDIL_mad24_i32;
2151             SDVTList Tys = DAG.getVTList(OVT/*, MVT::Other*/);
2152             DST = DAG.getNode(ISD::INTRINSIC_W_CHAIN,
2153                 DL, Tys, DAG.getEntryNode(), DAG.getConstant(opcode, MVT::i32),
2154                 Op1, Op2, Op3);
2155           } else if(is32bitMAD) {
2156             SDVTList Tys = DAG.getVTList(OVT/*, MVT::Other*/);
2157             DST = DAG.getNode(ISD::INTRINSIC_W_CHAIN,
2158                 DL, Tys, DAG.getEntryNode(),
2159                 DAG.getConstant(
2160                   AMDGPUIntrinsic::AMDIL_mad_i32, MVT::i32),
2161                 Op1, Op2, Op3);
2162           }
2163         }
2164       }
2165       DST = DAG.getNode(AMDILISD::ADD,
2166           DL,
2167           OVT,
2168           LHS, RHS);
2169     }
2170   }
2171   return DST;
2172 }
2173 SDValue
2174 AMDILTargetLowering::genCLZuN(SDValue Op, SelectionDAG &DAG,
2175     uint32_t bits) const
2176 {
2177   DebugLoc DL = Op.getDebugLoc();
2178   EVT INTTY = Op.getValueType();
2179   EVT FPTY;
2180   if (INTTY.isVector()) {
2181     FPTY = EVT(MVT::getVectorVT(MVT::f32,
2182           INTTY.getVectorNumElements()));
2183   } else {
2184     FPTY = EVT(MVT::f32);
2185   }
2186   /* static inline uint
2187      __clz_Nbit(uint x)
2188      {
2189      int xor = 0x3f800000U | x;
2190      float tp = as_float(xor);
2191      float t = tp + -1.0f;
2192      uint tint = as_uint(t);
2193      int cmp = (x != 0);
2194      uint tsrc = tint >> 23;
2195      uint tmask = tsrc & 0xffU;
2196      uint cst = (103 + N)U - tmask;
2197      return cmp ? cst : N;
2198      }
2199      */
2200   assert(INTTY.getScalarType().getSimpleVT().SimpleTy == MVT::i32
2201       && "genCLZu16 only works on 32bit types");
2202   // uint x = Op
2203   SDValue x = Op;
2204   // xornode = 0x3f800000 | x
2205   SDValue xornode = DAG.getNode(ISD::OR, DL, INTTY,
2206       DAG.getConstant(0x3f800000, INTTY), x);
2207   // float tp = as_float(xornode)
2208   SDValue tp = DAG.getNode(ISDBITCAST, DL, FPTY, xornode);
2209   // float t = tp + -1.0f
2210   SDValue t = DAG.getNode(ISD::FADD, DL, FPTY, tp,
2211       DAG.getConstantFP(-1.0f, FPTY));
2212   // uint tint = as_uint(t)
2213   SDValue tint = DAG.getNode(ISDBITCAST, DL, INTTY, t);
2214   // int cmp = (x != 0)
2215   SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
2216       DAG.getConstant(CondCCodeToCC(ISD::SETNE, MVT::i32), MVT::i32), x,
2217       DAG.getConstant(0, INTTY));
2218   // uint tsrc = tint >> 23
2219   SDValue tsrc = DAG.getNode(ISD::SRL, DL, INTTY, tint,
2220       DAG.getConstant(23, INTTY));
2221   // uint tmask = tsrc & 0xFF
2222   SDValue tmask = DAG.getNode(ISD::AND, DL, INTTY, tsrc,
2223       DAG.getConstant(0xFFU, INTTY));
2224   // uint cst = (103 + bits) - tmask
2225   SDValue cst = DAG.getNode(ISD::SUB, DL, INTTY,
2226       DAG.getConstant((103U + bits), INTTY), tmask);
2227   // return cmp ? cst : N
2228   cst = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp, cst,
2229       DAG.getConstant(bits, INTTY));
2230   return cst;
2231 }
2232
2233 SDValue
2234 AMDILTargetLowering::genCLZu32(SDValue Op, SelectionDAG &DAG) const
2235 {
2236   SDValue DST = SDValue();
2237   DebugLoc DL = Op.getDebugLoc();
2238   EVT INTTY = Op.getValueType();
2239   const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
2240       &this->getTargetMachine())->getSubtargetImpl();
2241   if (stm->device()->getGeneration() >= AMDILDeviceInfo::HD5XXX) {
2242     //__clz_32bit(uint u)
2243     //{
2244     // int z = __amdil_ffb_hi(u) ;
2245     // return z < 0 ? 32 : z;
2246     // }
2247     // uint u = op
2248     SDValue u = Op;
2249     // int z = __amdil_ffb_hi(u)
2250     SDValue z = DAG.getNode(AMDILISD::IFFB_HI, DL, INTTY, u);
2251     // int cmp = z < 0
2252     SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
2253         DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
2254         z, DAG.getConstant(0, INTTY));
2255     // return cmp ? 32 : z
2256     DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp,
2257         DAG.getConstant(32, INTTY), z);
2258   } else if (stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
2259     //  static inline uint
2260     //__clz_32bit(uint x)
2261     //{
2262     //    uint zh = __clz_16bit(x >> 16);
2263     //    uint zl = __clz_16bit(x & 0xffffU);
2264     //   return zh == 16U ? 16U + zl : zh;
2265     //}
2266     // uint x = Op
2267     SDValue x = Op;
2268     // uint xs16 = x >> 16
2269     SDValue xs16 = DAG.getNode(ISD::SRL, DL, INTTY, x,
2270         DAG.getConstant(16, INTTY));
2271     // uint zh = __clz_16bit(xs16)
2272     SDValue zh = genCLZuN(xs16, DAG, 16);
2273     // uint xa16 = x & 0xFFFF
2274     SDValue xa16 = DAG.getNode(ISD::AND, DL, INTTY, x,
2275         DAG.getConstant(0xFFFFU, INTTY));
2276     // uint zl = __clz_16bit(xa16)
2277     SDValue zl = genCLZuN(xa16, DAG, 16);
2278     // uint cmp = zh == 16U
2279     SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
2280         DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
2281         zh, DAG.getConstant(16U, INTTY));
2282     // uint zl16 = zl + 16
2283     SDValue zl16 = DAG.getNode(ISD::ADD, DL, INTTY,
2284         DAG.getConstant(16, INTTY), zl);
2285     // return cmp ? zl16 : zh
2286     DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY,
2287         cmp, zl16, zh);
2288   } else {
2289     assert(0 && "Attempting to generate a CLZ function with an"
2290         " unknown graphics card");
2291   }
2292   return DST;
2293 }
2294 SDValue
2295 AMDILTargetLowering::genCLZu64(SDValue Op, SelectionDAG &DAG) const
2296 {
2297   SDValue DST = SDValue();
2298   DebugLoc DL = Op.getDebugLoc();
2299   EVT INTTY;
2300   EVT LONGTY = Op.getValueType();
2301   bool isVec = LONGTY.isVector();
2302   if (isVec) {
2303     INTTY = EVT(MVT::getVectorVT(MVT::i32, Op.getValueType()
2304           .getVectorNumElements()));
2305   } else {
2306     INTTY = EVT(MVT::i32);
2307   }
2308   const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
2309       &this->getTargetMachine())->getSubtargetImpl();
2310   if (stm->device()->getGeneration() >= AMDILDeviceInfo::HD5XXX) {
2311     // Evergreen:
2312     // static inline uint
2313     // __clz_u64(ulong x)
2314     // {
2315     //uint zhi = __clz_32bit((uint)(x >> 32));
2316     //uint zlo = __clz_32bit((uint)(x & 0xffffffffUL));
2317     //return zhi == 32U ? 32U + zlo : zhi;
2318     //}
2319     //ulong x = op
2320     SDValue x = Op;
2321     // uint xhi = x >> 32
2322     SDValue xlo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, x);
2323     // uint xlo = x & 0xFFFFFFFF
2324     SDValue xhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, x);
2325     // uint zhi = __clz_32bit(xhi)
2326     SDValue zhi = genCLZu32(xhi, DAG);
2327     // uint zlo = __clz_32bit(xlo)
2328     SDValue zlo = genCLZu32(xlo, DAG);
2329     // uint cmp = zhi == 32
2330     SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
2331         DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
2332         zhi, DAG.getConstant(32U, INTTY));
2333     // uint zlop32 = 32 + zlo
2334     SDValue zlop32 = DAG.getNode(AMDILISD::ADD, DL, INTTY,
2335         DAG.getConstant(32U, INTTY), zlo);
2336     // return cmp ? zlop32: zhi
2337     DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp, zlop32, zhi);
2338   } else if (stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
2339     // HD4XXX:
2340     //  static inline uint
2341     //__clz_64bit(ulong x)
2342     //{
2343     //uint zh = __clz_23bit((uint)(x >> 46)) - 5U;
2344     //uint zm = __clz_23bit((uint)(x >> 23) & 0x7fffffU);
2345     //uint zl = __clz_23bit((uint)x & 0x7fffffU);
2346     //uint r = zh == 18U ? 18U + zm : zh;
2347     //return zh + zm == 41U ? 41U + zl : r;
2348     //}
2349     //ulong x = Op
2350     SDValue x = Op;
2351     // ulong xs46 = x >> 46
2352     SDValue xs46 = DAG.getNode(ISD::SRL, DL, LONGTY, x,
2353         DAG.getConstant(46, LONGTY));
2354     // uint ixs46 = (uint)xs46
2355     SDValue ixs46 = DAG.getNode(ISD::TRUNCATE, DL, INTTY, xs46);
2356     // ulong xs23 = x >> 23
2357     SDValue xs23 = DAG.getNode(ISD::SRL, DL, LONGTY, x,
2358         DAG.getConstant(23, LONGTY));
2359     // uint ixs23 = (uint)xs23
2360     SDValue ixs23 = DAG.getNode(ISD::TRUNCATE, DL, INTTY, xs23);
2361     // uint xs23m23 = ixs23 & 0x7FFFFF
2362     SDValue xs23m23 = DAG.getNode(ISD::AND, DL, INTTY, ixs23,
2363         DAG.getConstant(0x7fffffU, INTTY));
2364     // uint ix = (uint)x
2365     SDValue ix = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, x);
2366     // uint xm23 = ix & 0x7FFFFF
2367     SDValue xm23 = DAG.getNode(ISD::AND, DL, INTTY, ix,
2368         DAG.getConstant(0x7fffffU, INTTY));
2369     // uint zh = __clz_23bit(ixs46)
2370     SDValue zh = genCLZuN(ixs46, DAG, 23);
2371     // uint zm = __clz_23bit(xs23m23)
2372     SDValue zm = genCLZuN(xs23m23, DAG, 23);
2373     // uint zl = __clz_23bit(xm23)
2374     SDValue zl = genCLZuN(xm23, DAG, 23);
2375     // uint zhm5 = zh - 5
2376     SDValue zhm5 = DAG.getNode(ISD::ADD, DL, INTTY, zh,
2377         DAG.getConstant(-5U, INTTY));
2378     SDValue const18 = DAG.getConstant(18, INTTY);
2379     SDValue const41 = DAG.getConstant(41, INTTY);
2380     // uint cmp1 = zh = 18
2381     SDValue cmp1 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
2382         DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
2383         zhm5, const18);
2384     // uint zhm5zm = zhm5 + zh
2385     SDValue zhm5zm = DAG.getNode(ISD::ADD, DL, INTTY, zhm5, zm);
2386     // uint cmp2 = zhm5zm == 41
2387     SDValue cmp2 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
2388         DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
2389         zhm5zm, const41);
2390     // uint zmp18 = zhm5 + 18
2391     SDValue zmp18 = DAG.getNode(ISD::ADD, DL, INTTY, zm, const18);
2392     // uint zlp41 = zl + 41
2393     SDValue zlp41 = DAG.getNode(ISD::ADD, DL, INTTY, zl, const41);
2394     // uint r = cmp1 ? zmp18 : zh
2395     SDValue r = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY,
2396         cmp1, zmp18, zhm5);
2397     // return cmp2 ? zlp41 : r
2398     DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp2, zlp41, r);
2399   } else {
2400     assert(0 && "Attempting to generate a CLZ function with an"
2401         " unknown graphics card");
2402   }
2403   return DST;
2404 }
2405 SDValue
2406 AMDILTargetLowering::genf64toi64(SDValue RHS, SelectionDAG &DAG,
2407     bool includeSign) const
2408 {
2409   EVT INTVT;
2410   EVT LONGVT;
2411   SDValue DST;
2412   DebugLoc DL = RHS.getDebugLoc();
2413   EVT RHSVT = RHS.getValueType();
2414   bool isVec = RHSVT.isVector();
2415   if (isVec) {
2416     LONGVT = EVT(MVT::getVectorVT(MVT::i64, RHSVT
2417           .getVectorNumElements()));
2418     INTVT = EVT(MVT::getVectorVT(MVT::i32, RHSVT
2419           .getVectorNumElements()));
2420   } else {
2421     LONGVT = EVT(MVT::i64);
2422     INTVT = EVT(MVT::i32);
2423   }
2424   const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
2425       &this->getTargetMachine())->getSubtargetImpl();
2426   if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
2427     // unsigned version:
2428     // uint uhi = (uint)(d * 0x1.0p-32);
2429     // uint ulo = (uint)(mad((double)uhi, -0x1.0p+32, d));
2430     // return as_ulong2((uint2)(ulo, uhi));
2431     //
2432     // signed version:
2433     // double ad = fabs(d);
2434     // long l = unsigned_version(ad);
2435     // long nl = -l;
2436     // return d == ad ? l : nl;
2437     SDValue d = RHS;
2438     if (includeSign) {
2439       d = DAG.getNode(ISD::FABS, DL, RHSVT, d);
2440     }
2441     SDValue uhid = DAG.getNode(ISD::FMUL, DL, RHSVT, d,
2442         DAG.getConstantFP(0x2f800000, RHSVT));
2443     SDValue uhi = DAG.getNode(ISD::FP_TO_UINT, DL, INTVT, uhid);
2444     SDValue ulod = DAG.getNode(ISD::UINT_TO_FP, DL, RHSVT, uhi);
2445     ulod = DAG.getNode(AMDILISD::MAD, DL, RHSVT, ulod,
2446         DAG.getConstantFP(0xcf800000, RHSVT), d);
2447     SDValue ulo = DAG.getNode(ISD::FP_TO_UINT, DL, INTVT, ulod);
2448     SDValue l = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, ulo, uhi);
2449     if (includeSign) {
2450       SDValue nl = DAG.getNode(AMDILISD::INEGATE, DL, LONGVT, l);
2451       SDValue c = DAG.getNode(AMDILISD::CMP, DL, RHSVT,
2452           DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::f64), MVT::i32),
2453           RHS, d);
2454       l = DAG.getNode(AMDILISD::CMOVLOG, DL, LONGVT, c, l, nl);
2455     }
2456     DST = l;
2457   } else {
2458     /*
2459        __attribute__((always_inline)) long
2460        cast_f64_to_i64(double d)
2461        {
2462     // Convert d in to 32-bit components
2463     long x = as_long(d);
2464     xhi = LCOMPHI(x);
2465     xlo = LCOMPLO(x);
2466
2467     // Generate 'normalized' mantissa
2468     mhi = xhi | 0x00100000; // hidden bit
2469     mhi <<= 11;
2470     temp = xlo >> (32 - 11);
2471     mhi |= temp
2472     mlo = xlo << 11;
2473
2474     // Compute shift right count from exponent
2475     e = (xhi >> (52-32)) & 0x7ff;
2476     sr = 1023 + 63 - e;
2477     srge64 = sr >= 64;
2478     srge32 = sr >= 32;
2479
2480     // Compute result for 0 <= sr < 32
2481     rhi0 = mhi >> (sr &31);
2482     rlo0 = mlo >> (sr &31);
2483     temp = mhi << (32 - sr);
2484     temp |= rlo0;
2485     rlo0 = sr ? temp : rlo0;
2486
2487     // Compute result for 32 <= sr
2488     rhi1 = 0;
2489     rlo1 = srge64 ? 0 : rhi0;
2490
2491     // Pick between the 2 results
2492     rhi = srge32 ? rhi1 : rhi0;
2493     rlo = srge32 ? rlo1 : rlo0;
2494
2495     // Optional saturate on overflow
2496     srlt0 = sr < 0;
2497     rhi = srlt0 ? MAXVALUE : rhi;
2498     rlo = srlt0 ? MAXVALUE : rlo;
2499
2500     // Create long
2501     res = LCREATE( rlo, rhi );
2502
2503     // Deal with sign bit (ignoring whether result is signed or unsigned value)
2504     if (includeSign) {
2505     sign = ((signed int) xhi) >> 31; fill with sign bit
2506     sign = LCREATE( sign, sign );
2507     res += sign;
2508     res ^= sign;
2509     }
2510
2511     return res;
2512     }
2513     */
2514     SDValue c11 = DAG.getConstant( 63 - 52, INTVT );
2515     SDValue c32 = DAG.getConstant( 32, INTVT );
2516
2517     // Convert d in to 32-bit components
2518     SDValue d = RHS;
2519     SDValue x = DAG.getNode(ISDBITCAST, DL, LONGVT, d);
2520     SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x );
2521     SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x );
2522
2523     // Generate 'normalized' mantissa
2524     SDValue mhi = DAG.getNode( ISD::OR, DL, INTVT,
2525         xhi, DAG.getConstant( 0x00100000, INTVT ) );
2526     mhi = DAG.getNode( ISD::SHL, DL, INTVT, mhi, c11 );
2527     SDValue temp = DAG.getNode( ISD::SRL, DL, INTVT,
2528         xlo, DAG.getConstant( 32 - (63 - 52), INTVT ) );
2529     mhi = DAG.getNode( ISD::OR, DL, INTVT, mhi, temp );
2530     SDValue mlo = DAG.getNode( ISD::SHL, DL, INTVT, xlo, c11 );
2531
2532     // Compute shift right count from exponent
2533     SDValue e = DAG.getNode( ISD::SRL, DL, INTVT,
2534         xhi, DAG.getConstant( 52-32, INTVT ) );
2535     e = DAG.getNode( ISD::AND, DL, INTVT,
2536         e, DAG.getConstant( 0x7ff, INTVT ) );
2537     SDValue sr = DAG.getNode( ISD::SUB, DL, INTVT,
2538         DAG.getConstant( 1023 + 63, INTVT ), e );
2539     SDValue srge64 = DAG.getNode( AMDILISD::CMP, DL, INTVT,
2540         DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
2541         sr, DAG.getConstant(64, INTVT));
2542     SDValue srge32 = DAG.getNode( AMDILISD::CMP, DL, INTVT,
2543         DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
2544         sr, DAG.getConstant(32, INTVT));
2545
2546     // Compute result for 0 <= sr < 32
2547     SDValue rhi0 = DAG.getNode( ISD::SRL, DL, INTVT, mhi, sr );
2548     SDValue rlo0 = DAG.getNode( ISD::SRL, DL, INTVT, mlo, sr );
2549     temp = DAG.getNode( ISD::SUB, DL, INTVT, c32, sr );
2550     temp = DAG.getNode( ISD::SHL, DL, INTVT, mhi, temp );
2551     temp = DAG.getNode( ISD::OR,  DL, INTVT, rlo0, temp );
2552     rlo0 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, sr, temp, rlo0 );
2553
2554     // Compute result for 32 <= sr
2555     SDValue rhi1 = DAG.getConstant( 0, INTVT );
2556     SDValue rlo1 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
2557         srge64, rhi1, rhi0 );
2558
2559     // Pick between the 2 results
2560     SDValue rhi = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
2561         srge32, rhi1, rhi0 );
2562     SDValue rlo = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
2563         srge32, rlo1, rlo0 );
2564
2565     // Create long
2566     SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi );
2567
2568     // Deal with sign bit
2569     if (includeSign) {
2570       SDValue sign = DAG.getNode( ISD::SRA, DL, INTVT,
2571           xhi, DAG.getConstant( 31, INTVT ) );
2572       sign = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, sign, sign );
2573       res = DAG.getNode( ISD::ADD, DL, LONGVT, res, sign );
2574       res = DAG.getNode( ISD::XOR, DL, LONGVT, res, sign );
2575     }
2576     DST = res;
2577   }
2578   return DST;
2579 }
2580 SDValue
2581 AMDILTargetLowering::genf64toi32(SDValue RHS, SelectionDAG &DAG,
2582     bool includeSign) const
2583 {
2584   EVT INTVT;
2585   EVT LONGVT;
2586   DebugLoc DL = RHS.getDebugLoc();
2587   EVT RHSVT = RHS.getValueType();
2588   bool isVec = RHSVT.isVector();
2589   if (isVec) {
2590     LONGVT = EVT(MVT::getVectorVT(MVT::i64,
2591           RHSVT.getVectorNumElements()));
2592     INTVT = EVT(MVT::getVectorVT(MVT::i32,
2593           RHSVT.getVectorNumElements()));
2594   } else {
2595     LONGVT = EVT(MVT::i64);
2596     INTVT = EVT(MVT::i32);
2597   }
2598   /*
2599      __attribute__((always_inline)) int
2600      cast_f64_to_[u|i]32(double d)
2601      {
2602   // Convert d in to 32-bit components
2603   long x = as_long(d);
2604   xhi = LCOMPHI(x);
2605   xlo = LCOMPLO(x);
2606
2607   // Generate 'normalized' mantissa
2608   mhi = xhi | 0x00100000; // hidden bit
2609   mhi <<= 11;
2610   temp = xlo >> (32 - 11);
2611   mhi |= temp
2612
2613   // Compute shift right count from exponent
2614   e = (xhi >> (52-32)) & 0x7ff;
2615   sr = 1023 + 31 - e;
2616   srge32 = sr >= 32;
2617
2618   // Compute result for 0 <= sr < 32
2619   res = mhi >> (sr &31);
2620   res = srge32 ? 0 : res;
2621
2622   // Optional saturate on overflow
2623   srlt0 = sr < 0;
2624   res = srlt0 ? MAXVALUE : res;
2625
2626   // Deal with sign bit (ignoring whether result is signed or unsigned value)
2627   if (includeSign) {
2628   sign = ((signed int) xhi) >> 31; fill with sign bit
2629   res += sign;
2630   res ^= sign;
2631   }
2632
2633   return res;
2634   }
2635   */
2636   SDValue c11 = DAG.getConstant( 63 - 52, INTVT );
2637
2638   // Convert d in to 32-bit components
2639   SDValue d = RHS;
2640   SDValue x = DAG.getNode(ISDBITCAST, DL, LONGVT, d);
2641   SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x );
2642   SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x );
2643
2644   // Generate 'normalized' mantissa
2645   SDValue mhi = DAG.getNode( ISD::OR, DL, INTVT,
2646       xhi, DAG.getConstant( 0x00100000, INTVT ) );
2647   mhi = DAG.getNode( ISD::SHL, DL, INTVT, mhi, c11 );
2648   SDValue temp = DAG.getNode( ISD::SRL, DL, INTVT,
2649       xlo, DAG.getConstant( 32 - (63 - 52), INTVT ) );
2650   mhi = DAG.getNode( ISD::OR, DL, INTVT, mhi, temp );
2651
2652   // Compute shift right count from exponent
2653   SDValue e = DAG.getNode( ISD::SRL, DL, INTVT,
2654       xhi, DAG.getConstant( 52-32, INTVT ) );
2655   e = DAG.getNode( ISD::AND, DL, INTVT,
2656       e, DAG.getConstant( 0x7ff, INTVT ) );
2657   SDValue sr = DAG.getNode( ISD::SUB, DL, INTVT,
2658       DAG.getConstant( 1023 + 31, INTVT ), e );
2659   SDValue srge32 = DAG.getNode( AMDILISD::CMP, DL, INTVT,
2660       DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
2661       sr, DAG.getConstant(32, INTVT));
2662
2663   // Compute result for 0 <= sr < 32
2664   SDValue res = DAG.getNode( ISD::SRL, DL, INTVT, mhi, sr );
2665   res = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
2666       srge32, DAG.getConstant(0,INTVT), res );
2667
2668   // Deal with sign bit
2669   if (includeSign) {
2670     SDValue sign = DAG.getNode( ISD::SRA, DL, INTVT,
2671         xhi, DAG.getConstant( 31, INTVT ) );
2672     res = DAG.getNode( ISD::ADD, DL, INTVT, res, sign );
2673     res = DAG.getNode( ISD::XOR, DL, INTVT, res, sign );
2674   }
2675   return res;
2676 }
2677 SDValue
2678 AMDILTargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const
2679 {
2680   SDValue RHS = Op.getOperand(0);
2681   EVT RHSVT = RHS.getValueType();
2682   MVT RST = RHSVT.getScalarType().getSimpleVT();
2683   EVT LHSVT = Op.getValueType();
2684   MVT LST = LHSVT.getScalarType().getSimpleVT();
2685   DebugLoc DL = Op.getDebugLoc();
2686   SDValue DST;
2687   const AMDILTargetMachine*
2688     amdtm = reinterpret_cast<const AMDILTargetMachine*>
2689     (&this->getTargetMachine());
2690   const AMDILSubtarget*
2691     stm = static_cast<const AMDILSubtarget*>(
2692         amdtm->getSubtargetImpl());
2693   if (RST == MVT::f64 && RHSVT.isVector()
2694       && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX)  {
2695     // We dont support vector 64bit floating point convertions.
2696     for (unsigned x = 0, y = RHSVT.getVectorNumElements(); x < y; ++x) {
2697       SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
2698           DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32));
2699       op = DAG.getNode(ISD::FP_TO_SINT, DL, LST, op);
2700       if (!x) {
2701         DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op);
2702       } else {
2703         DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT,
2704             DST, op, DAG.getTargetConstant(x, MVT::i32));
2705       }
2706     }
2707   } else {
2708     if (RST == MVT::f64
2709         && LST == MVT::i32) {
2710       if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
2711         DST = SDValue(Op.getNode(), 0);
2712       } else {
2713         DST = genf64toi32(RHS, DAG, true);
2714       }
2715     } else if (RST == MVT::f64
2716         && LST == MVT::i64) {
2717       DST = genf64toi64(RHS, DAG, true);
2718     } else if (RST == MVT::f64
2719         && (LST == MVT::i8 || LST == MVT::i16)) {
2720       if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
2721         DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, SDValue(Op.getNode(), 0));
2722       } else {
2723         SDValue ToInt = genf64toi32(RHS, DAG, true);
2724         DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, ToInt);
2725       }
2726
2727     } else {
2728       DST = SDValue(Op.getNode(), 0);
2729     }
2730   }
2731   return DST;
2732 }
2733
2734 SDValue
2735 AMDILTargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const
2736 {
2737   SDValue DST;
2738   SDValue RHS = Op.getOperand(0);
2739   EVT RHSVT = RHS.getValueType();
2740   MVT RST = RHSVT.getScalarType().getSimpleVT();
2741   EVT LHSVT = Op.getValueType();
2742   MVT LST = LHSVT.getScalarType().getSimpleVT();
2743   DebugLoc DL = Op.getDebugLoc();
2744   const AMDILTargetMachine*
2745     amdtm = reinterpret_cast<const AMDILTargetMachine*>
2746     (&this->getTargetMachine());
2747   const AMDILSubtarget*
2748     stm = static_cast<const AMDILSubtarget*>(
2749         amdtm->getSubtargetImpl());
2750   if (RST == MVT::f64 && RHSVT.isVector()
2751       && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX)  {
2752     // We dont support vector 64bit floating point convertions.
2753     for (unsigned x = 0, y = RHSVT.getVectorNumElements(); x < y; ++x) {
2754       SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
2755           DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32));
2756       op = DAG.getNode(ISD::FP_TO_SINT, DL, LST, op);
2757       if (!x) {
2758         DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op);
2759       } else {
2760         DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT,
2761             DST, op, DAG.getTargetConstant(x, MVT::i32));
2762       }
2763
2764     }
2765   } else {
2766     if (RST == MVT::f64
2767         && LST == MVT::i32) {
2768       if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
2769         DST = SDValue(Op.getNode(), 0);
2770       } else {
2771         DST = genf64toi32(RHS, DAG, false);
2772       }
2773     } else if (RST == MVT::f64
2774         && LST == MVT::i64) {
2775       DST = genf64toi64(RHS, DAG, false);
2776     } else if (RST == MVT::f64
2777         && (LST == MVT::i8 || LST == MVT::i16)) {
2778       if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
2779         DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, SDValue(Op.getNode(), 0));
2780       } else {
2781         SDValue ToInt = genf64toi32(RHS, DAG, false);
2782         DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, ToInt);
2783       }
2784
2785     } else {
2786       DST = SDValue(Op.getNode(), 0);
2787     }
2788   }
2789   return DST;
2790 }
2791 SDValue
2792 AMDILTargetLowering::genu32tof64(SDValue RHS, EVT LHSVT,
2793     SelectionDAG &DAG) const
2794 {
2795   EVT RHSVT = RHS.getValueType();
2796   DebugLoc DL = RHS.getDebugLoc();
2797   EVT INTVT;
2798   EVT LONGVT;
2799   bool isVec = RHSVT.isVector();
2800   if (isVec) {
2801     LONGVT = EVT(MVT::getVectorVT(MVT::i64,
2802           RHSVT.getVectorNumElements()));
2803     INTVT = EVT(MVT::getVectorVT(MVT::i32,
2804           RHSVT.getVectorNumElements()));
2805   } else {
2806     LONGVT = EVT(MVT::i64);
2807     INTVT = EVT(MVT::i32);
2808   }
2809   SDValue x = RHS;
2810   const AMDILTargetMachine*
2811     amdtm = reinterpret_cast<const AMDILTargetMachine*>
2812     (&this->getTargetMachine());
2813   const AMDILSubtarget*
2814     stm = static_cast<const AMDILSubtarget*>(
2815         amdtm->getSubtargetImpl());
2816   if (stm->calVersion() >= CAL_VERSION_SC_135) {
2817     // unsigned x = RHS;
2818     // ulong xd = (ulong)(0x4330_0000 << 32) | x;
2819     // double d = as_double( xd );
2820     // return d - 0x1.0p+52; // 0x1.0p+52 == 0x4330_0000_0000_0000
2821     SDValue xd = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, x,
2822         DAG.getConstant( 0x43300000, INTVT ) );
2823     SDValue d = DAG.getNode( ISDBITCAST, DL, LHSVT, xd );
2824     SDValue offsetd = DAG.getNode( ISDBITCAST, DL, LHSVT,
2825         DAG.getConstant( 0x4330000000000000ULL, LONGVT ) );
2826     return DAG.getNode( ISD::FSUB, DL, LHSVT, d, offsetd );
2827   } else {
2828     SDValue clz = genCLZu32(x, DAG);
2829
2830     // Compute the exponent. 1023 is the bias, 31-clz the actual power of 2
2831     // Except for an input 0... which requires a 0 exponent
2832     SDValue exp = DAG.getNode( ISD::SUB, DL, INTVT,
2833         DAG.getConstant( (1023+31), INTVT), clz );
2834     exp = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, x, exp, x );
2835
2836     // Normalize frac
2837     SDValue rhi = DAG.getNode( ISD::SHL, DL, INTVT, x, clz );
2838
2839     // Eliminate hidden bit
2840     rhi = DAG.getNode( ISD::AND, DL, INTVT,
2841         rhi, DAG.getConstant( 0x7fffffff, INTVT ) );
2842
2843     // Pack exponent and frac
2844     SDValue rlo = DAG.getNode( ISD::SHL, DL, INTVT,
2845         rhi, DAG.getConstant( (32 - 11), INTVT ) );
2846     rhi = DAG.getNode( ISD::SRL, DL, INTVT,
2847         rhi, DAG.getConstant( 11, INTVT ) );
2848     exp = DAG.getNode( ISD::SHL, DL, INTVT,
2849         exp, DAG.getConstant( 20, INTVT ) );
2850     rhi = DAG.getNode( ISD::OR, DL, INTVT, rhi, exp );
2851
2852     // Convert 2 x 32 in to 1 x 64, then to double precision float type
2853     SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi );
2854     return DAG.getNode(ISDBITCAST, DL, LHSVT, res);
2855   }
2856 }
2857 SDValue
2858 AMDILTargetLowering::genu64tof64(SDValue RHS, EVT LHSVT,
2859     SelectionDAG &DAG) const
2860 {
2861   EVT RHSVT = RHS.getValueType();
2862   DebugLoc DL = RHS.getDebugLoc();
2863   EVT INTVT;
2864   EVT LONGVT;
2865   bool isVec = RHSVT.isVector();
2866   if (isVec) {
2867     INTVT = EVT(MVT::getVectorVT(MVT::i32,
2868           RHSVT.getVectorNumElements()));
2869   } else {
2870     INTVT = EVT(MVT::i32);
2871   }
2872   LONGVT = RHSVT;
2873   SDValue x = RHS;
2874   const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
2875       &this->getTargetMachine())->getSubtargetImpl();
2876   if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
2877     // double dhi = (double)(as_uint2(x).y);
2878     // double dlo = (double)(as_uint2(x).x);
2879     // return mad(dhi, 0x1.0p+32, dlo)
2880     SDValue dhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x);
2881     dhi = DAG.getNode(ISD::UINT_TO_FP, DL, LHSVT, dhi);
2882     SDValue dlo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x);
2883     dlo = DAG.getNode(ISD::UINT_TO_FP, DL, LHSVT, dlo);
2884     return DAG.getNode(AMDILISD::MAD, DL, LHSVT, dhi,
2885         DAG.getConstantFP(0x4f800000, LHSVT), dlo);
2886   } else if (stm->calVersion() >= CAL_VERSION_SC_135) {
2887     // double lo = as_double( as_ulong( 0x1.0p+52) | (u & 0xffff_ffffUL));
2888     // double hi = as_double( as_ulong( 0x1.0p+84) | (u >> 32));
2889     // return (hi - (0x1.0p+84 + 0x1.0p+52)) + lo;
2890     SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x );  // x & 0xffff_ffffUL
2891     SDValue xd = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, xlo, DAG.getConstant( 0x43300000, INTVT ) );
2892     SDValue lo = DAG.getNode( ISDBITCAST, DL, LHSVT, xd );
2893     SDValue xhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 :  AMDILISD::LCOMPHI, DL, INTVT, x ); // x >> 32
2894     SDValue xe = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, xhi, DAG.getConstant( 0x45300000, INTVT ) );
2895     SDValue hi = DAG.getNode( ISDBITCAST, DL, LHSVT, xe );
2896     SDValue c = DAG.getNode( ISDBITCAST, DL, LHSVT,
2897         DAG.getConstant( 0x4530000000100000ULL, LONGVT ) );
2898     hi = DAG.getNode( ISD::FSUB, DL, LHSVT, hi, c );
2899     return DAG.getNode( ISD::FADD, DL, LHSVT, hi, lo );
2900
2901   } else {
2902     SDValue clz = genCLZu64(x, DAG);
2903     SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x );
2904     SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x );
2905
2906     // Compute the exponent. 1023 is the bias, 63-clz the actual power of 2
2907     SDValue exp = DAG.getNode( ISD::SUB, DL, INTVT,
2908         DAG.getConstant( (1023+63), INTVT), clz );
2909     SDValue mash = DAG.getNode( ISD::OR, DL, INTVT, xhi, xlo );
2910     exp = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
2911         mash, exp, mash );  // exp = exp, or 0 if input was 0
2912
2913     // Normalize frac
2914     SDValue clz31 = DAG.getNode( ISD::AND, DL, INTVT,
2915         clz, DAG.getConstant( 31, INTVT ) );
2916     SDValue rshift = DAG.getNode( ISD::SUB, DL, INTVT,
2917         DAG.getConstant( 32, INTVT ), clz31 );
2918     SDValue t1 = DAG.getNode( ISD::SHL, DL, INTVT, xhi, clz31 );
2919     SDValue t2 = DAG.getNode( ISD::SRL, DL, INTVT, xlo, rshift );
2920     t2 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, clz31, t2, t1 );
2921     SDValue rhi1 = DAG.getNode( ISD::OR, DL, INTVT, t1, t2 );
2922     SDValue rlo1 = DAG.getNode( ISD::SHL, DL, INTVT, xlo, clz31 );
2923     SDValue rhi2 = DAG.getNode( ISD::SHL, DL, INTVT, xlo, clz31 );
2924     SDValue rlo2 = DAG.getConstant( 0, INTVT );
2925     SDValue clz32 = DAG.getNode( ISD::AND, DL, INTVT,
2926         clz, DAG.getConstant( 32, INTVT ) );
2927     SDValue rhi = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
2928         clz32, rhi2, rhi1 );
2929     SDValue rlo = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
2930         clz32, rlo2, rlo1 );
2931
2932     // Eliminate hidden bit
2933     rhi = DAG.getNode( ISD::AND, DL, INTVT,
2934         rhi, DAG.getConstant( 0x7fffffff, INTVT ) );
2935
2936     // Save bits needed to round properly
2937     SDValue round = DAG.getNode( ISD::AND, DL, INTVT,
2938         rlo, DAG.getConstant( 0x7ff, INTVT ) );
2939
2940     // Pack exponent and frac
2941     rlo = DAG.getNode( ISD::SRL, DL, INTVT,
2942         rlo, DAG.getConstant( 11, INTVT ) );
2943     SDValue temp = DAG.getNode( ISD::SHL, DL, INTVT,
2944         rhi, DAG.getConstant( (32 - 11), INTVT ) );
2945     rlo = DAG.getNode( ISD::OR, DL, INTVT, rlo, temp );
2946     rhi = DAG.getNode( ISD::SRL, DL, INTVT,
2947         rhi, DAG.getConstant( 11, INTVT ) );
2948     exp = DAG.getNode( ISD::SHL, DL, INTVT,
2949         exp, DAG.getConstant( 20, INTVT ) );
2950     rhi = DAG.getNode( ISD::OR, DL, INTVT, rhi, exp );
2951
2952     // Compute rounding bit
2953     SDValue even = DAG.getNode( ISD::AND, DL, INTVT,
2954         rlo, DAG.getConstant( 1, INTVT ) );
2955     SDValue grs = DAG.getNode( ISD::AND, DL, INTVT,
2956         round, DAG.getConstant( 0x3ff, INTVT ) );
2957     grs = DAG.getNode( AMDILISD::CMP, DL, INTVT,
2958         DAG.getConstant( CondCCodeToCC( ISD::SETNE, MVT::i32), MVT::i32),
2959         grs, DAG.getConstant( 0, INTVT ) ); // -1 if any GRS set, 0 if none
2960     grs = DAG.getNode( ISD::OR, DL, INTVT, grs, even );
2961     round = DAG.getNode( ISD::SRL, DL, INTVT,
2962         round, DAG.getConstant( 10, INTVT ) );
2963     round = DAG.getNode( ISD::AND, DL, INTVT, round, grs ); // 0 or 1
2964
2965     // Add rounding bit
2966     SDValue lround = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT,
2967         round, DAG.getConstant( 0, INTVT ) );
2968     SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi );
2969     res = DAG.getNode( ISD::ADD, DL, LONGVT, res, lround );
2970     return DAG.getNode(ISDBITCAST, DL, LHSVT, res);
2971   }
2972 }
2973 SDValue
2974 AMDILTargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const
2975 {
2976   SDValue RHS = Op.getOperand(0);
2977   EVT RHSVT = RHS.getValueType();
2978   MVT RST = RHSVT.getScalarType().getSimpleVT();
2979   EVT LHSVT = Op.getValueType();
2980   MVT LST = LHSVT.getScalarType().getSimpleVT();
2981   DebugLoc DL = Op.getDebugLoc();
2982   SDValue DST;
2983   EVT INTVT;
2984   EVT LONGVT;
2985   const AMDILTargetMachine*
2986     amdtm = reinterpret_cast<const AMDILTargetMachine*>
2987     (&this->getTargetMachine());
2988   const AMDILSubtarget*
2989     stm = static_cast<const AMDILSubtarget*>(
2990         amdtm->getSubtargetImpl());
2991   if (LST == MVT::f64 && LHSVT.isVector()
2992       && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX)  {
2993     // We dont support vector 64bit floating point convertions.
2994     DST = Op;
2995     for (unsigned x = 0, y = LHSVT.getVectorNumElements(); x < y; ++x) {
2996       SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
2997           DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32));
2998       op = DAG.getNode(ISD::UINT_TO_FP, DL, LST, op);
2999       if (!x) {
3000         DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op);
3001       } else {
3002         DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT, DST,
3003             op, DAG.getTargetConstant(x, MVT::i32));
3004       }
3005
3006     }
3007   } else {
3008
3009     if (RST == MVT::i32
3010         && LST == MVT::f64) {
3011       if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
3012         DST = SDValue(Op.getNode(), 0);
3013       } else {
3014         DST = genu32tof64(RHS, LHSVT, DAG);
3015       }
3016     } else if (RST == MVT::i64
3017         && LST == MVT::f64) {
3018       DST = genu64tof64(RHS, LHSVT, DAG);
3019     } else {
3020       DST = SDValue(Op.getNode(), 0);
3021     }
3022   }
3023   return DST;
3024 }
3025
3026 SDValue
3027 AMDILTargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const
3028 {
3029   SDValue RHS = Op.getOperand(0);
3030   EVT RHSVT = RHS.getValueType();
3031   MVT RST = RHSVT.getScalarType().getSimpleVT();
3032   EVT INTVT;
3033   EVT LONGVT;
3034   SDValue DST;
3035   bool isVec = RHSVT.isVector();
3036   DebugLoc DL = Op.getDebugLoc();
3037   EVT LHSVT = Op.getValueType();
3038   MVT LST = LHSVT.getScalarType().getSimpleVT();
3039   const AMDILTargetMachine*
3040     amdtm = reinterpret_cast<const AMDILTargetMachine*>
3041     (&this->getTargetMachine());
3042   const AMDILSubtarget*
3043     stm = static_cast<const AMDILSubtarget*>(
3044         amdtm->getSubtargetImpl());
3045   if (LST == MVT::f64 && LHSVT.isVector()
3046       && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX)  {
3047     // We dont support vector 64bit floating point convertions.
3048     for (unsigned x = 0, y = LHSVT.getVectorNumElements(); x < y; ++x) {
3049       SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
3050           DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32));
3051       op = DAG.getNode(ISD::UINT_TO_FP, DL, LST, op);
3052       if (!x) {
3053         DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op);
3054       } else {
3055         DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT, DST,
3056             op, DAG.getTargetConstant(x, MVT::i32));
3057       }
3058
3059     }
3060   } else {
3061
3062     if (isVec) {
3063       LONGVT = EVT(MVT::getVectorVT(MVT::i64,
3064             RHSVT.getVectorNumElements()));
3065       INTVT = EVT(MVT::getVectorVT(MVT::i32,
3066             RHSVT.getVectorNumElements()));
3067     } else {
3068       LONGVT = EVT(MVT::i64);
3069       INTVT = EVT(MVT::i32);
3070     }
3071     MVT RST = RHSVT.getScalarType().getSimpleVT();
3072     if ((RST == MVT::i32 || RST == MVT::i64)
3073         && LST == MVT::f64) {
3074       if (RST == MVT::i32) {
3075         if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
3076           DST = SDValue(Op.getNode(), 0);
3077           return DST;
3078         }
3079       }
3080       SDValue c31 = DAG.getConstant( 31, INTVT );
3081       SDValue cSbit = DAG.getConstant( 0x80000000, INTVT );
3082
3083       SDValue S;      // Sign, as 0 or -1
3084       SDValue Sbit;   // Sign bit, as one bit, MSB only.
3085       if (RST == MVT::i32) {
3086         Sbit = DAG.getNode( ISD::AND, DL, INTVT, RHS, cSbit );
3087         S = DAG.getNode(ISD::SRA, DL, RHSVT, RHS, c31 );
3088       } else { // 64-bit case... SRA of 64-bit values is slow
3089         SDValue hi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, RHS );
3090         Sbit = DAG.getNode( ISD::AND, DL, INTVT, hi, cSbit );
3091         SDValue temp = DAG.getNode( ISD::SRA, DL, INTVT, hi, c31 );
3092         S = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, RHSVT, temp, temp );
3093       }
3094
3095       // get abs() of input value, given sign as S (0 or -1)
3096       // SpI = RHS + S
3097       SDValue SpI = DAG.getNode(ISD::ADD, DL, RHSVT, RHS, S);
3098       // SpIxS = SpI ^ S
3099       SDValue SpIxS = DAG.getNode(ISD::XOR, DL, RHSVT, SpI, S);
3100
3101       // Convert unsigned value to double precision
3102       SDValue R;
3103       if (RST == MVT::i32) {
3104         // r = cast_u32_to_f64(SpIxS)
3105         R = genu32tof64(SpIxS, LHSVT, DAG);
3106       } else {
3107         // r = cast_u64_to_f64(SpIxS)
3108         R = genu64tof64(SpIxS, LHSVT, DAG);
3109       }
3110
3111       // drop in the sign bit
3112       SDValue t = DAG.getNode( AMDILISD::BITCONV, DL, LONGVT, R );
3113       SDValue thi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, t );
3114       SDValue tlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, t );
3115       thi = DAG.getNode( ISD::OR, DL, INTVT, thi, Sbit );
3116       t = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, tlo, thi );
3117       DST = DAG.getNode( AMDILISD::BITCONV, DL, LHSVT, t );
3118     } else {
3119       DST = SDValue(Op.getNode(), 0);
3120     }
3121   }
3122   return DST;
3123 }
3124 SDValue
3125 AMDILTargetLowering::LowerSUB(SDValue Op, SelectionDAG &DAG) const
3126 {
3127   SDValue LHS = Op.getOperand(0);
3128   SDValue RHS = Op.getOperand(1);
3129   DebugLoc DL = Op.getDebugLoc();
3130   EVT OVT = Op.getValueType();
3131   SDValue DST;
3132   bool isVec = RHS.getValueType().isVector();
3133   if (OVT.getScalarType() == MVT::i64) {
3134     /*const AMDILTargetMachine*
3135       amdtm = reinterpret_cast<const AMDILTargetMachine*>
3136       (&this->getTargetMachine());
3137       const AMDILSubtarget*
3138       stm = dynamic_cast<const AMDILSubtarget*>(
3139       amdtm->getSubtargetImpl());*/
3140     MVT INTTY = MVT::i32;
3141     if (OVT == MVT::v2i64) {
3142       INTTY = MVT::v2i32;
3143     }
3144     SDValue LHSLO, LHSHI, RHSLO, RHSHI, INTLO, INTHI;
3145     // TODO: need to turn this into a bitcast of i64/v2i64 to v2i32/v4i32
3146     LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, LHS);
3147     RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, RHS);
3148     LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, LHS);
3149     RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, RHS);
3150     INTLO = DAG.getNode(ISD::SUB, DL, INTTY, LHSLO, RHSLO);
3151     INTHI = DAG.getNode(ISD::SUB, DL, INTTY, LHSHI, RHSHI);
3152     //TODO: need to use IBORROW on HD5XXX and later hardware
3153     SDValue cmp;
3154     if (OVT == MVT::i64) {
3155       cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
3156           DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
3157           LHSLO, RHSLO);
3158     } else {
3159       SDValue cmplo;
3160       SDValue cmphi;
3161       SDValue LHSRLO = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
3162           DL, MVT::i32, LHSLO, DAG.getTargetConstant(0, MVT::i32));
3163       SDValue LHSRHI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
3164           DL, MVT::i32, LHSLO, DAG.getTargetConstant(1, MVT::i32));
3165       SDValue RHSRLO = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
3166           DL, MVT::i32, RHSLO, DAG.getTargetConstant(0, MVT::i32));
3167       SDValue RHSRHI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
3168           DL, MVT::i32, RHSLO, DAG.getTargetConstant(1, MVT::i32));
3169       cmplo = DAG.getNode(AMDILISD::CMP, DL, MVT::i32,
3170           DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
3171           LHSRLO, RHSRLO);
3172       cmphi = DAG.getNode(AMDILISD::CMP, DL, MVT::i32,
3173           DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
3174           LHSRHI, RHSRHI);
3175       cmp = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v2i32, cmplo);
3176       cmp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i32,
3177           cmp, cmphi, DAG.getTargetConstant(1, MVT::i32));
3178     }
3179     INTHI = DAG.getNode(ISD::ADD, DL, INTTY, INTHI, cmp);
3180     DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, OVT,
3181         INTLO, INTHI);
3182   } else {
3183     DST = SDValue(Op.getNode(), 0);
3184   }
3185   return DST;
3186 }
3187 SDValue
3188 AMDILTargetLowering::LowerFDIV(SDValue Op, SelectionDAG &DAG) const
3189 {
3190   EVT OVT = Op.getValueType();
3191   SDValue DST;
3192   if (OVT.getScalarType() == MVT::f64) {
3193     DST = LowerFDIV64(Op, DAG);
3194   } else if (OVT.getScalarType() == MVT::f32) {
3195     DST = LowerFDIV32(Op, DAG);
3196   } else {
3197     DST = SDValue(Op.getNode(), 0);
3198   }
3199   return DST;
3200 }
3201
3202 SDValue
3203 AMDILTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const
3204 {
3205   EVT OVT = Op.getValueType();
3206   SDValue DST;
3207   if (OVT.getScalarType() == MVT::i64) {
3208     DST = LowerSDIV64(Op, DAG);
3209   } else if (OVT.getScalarType() == MVT::i32) {
3210     DST = LowerSDIV32(Op, DAG);
3211   } else if (OVT.getScalarType() == MVT::i16
3212       || OVT.getScalarType() == MVT::i8) {
3213     DST = LowerSDIV24(Op, DAG);
3214   } else {
3215     DST = SDValue(Op.getNode(), 0);
3216   }
3217   return DST;
3218 }
3219
3220 SDValue
3221 AMDILTargetLowering::LowerUDIV(SDValue Op, SelectionDAG &DAG) const
3222 {
3223   EVT OVT = Op.getValueType();
3224   SDValue DST;
3225   if (OVT.getScalarType() == MVT::i64) {
3226     DST = LowerUDIV64(Op, DAG);
3227   } else if (OVT.getScalarType() == MVT::i32) {
3228     DST = LowerUDIV32(Op, DAG);
3229   } else if (OVT.getScalarType() == MVT::i16
3230       || OVT.getScalarType() == MVT::i8) {
3231     DST = LowerUDIV24(Op, DAG);
3232   } else {
3233     DST = SDValue(Op.getNode(), 0);
3234   }
3235   return DST;
3236 }
3237
3238 SDValue
3239 AMDILTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const
3240 {
3241   EVT OVT = Op.getValueType();
3242   SDValue DST;
3243   if (OVT.getScalarType() == MVT::i64) {
3244     DST = LowerSREM64(Op, DAG);
3245   } else if (OVT.getScalarType() == MVT::i32) {
3246     DST = LowerSREM32(Op, DAG);
3247   } else if (OVT.getScalarType() == MVT::i16) {
3248     DST = LowerSREM16(Op, DAG);
3249   } else if (OVT.getScalarType() == MVT::i8) {
3250     DST = LowerSREM8(Op, DAG);
3251   } else {
3252     DST = SDValue(Op.getNode(), 0);
3253   }
3254   return DST;
3255 }
3256
3257 SDValue
3258 AMDILTargetLowering::LowerUREM(SDValue Op, SelectionDAG &DAG) const
3259 {
3260   EVT OVT = Op.getValueType();
3261   SDValue DST;
3262   if (OVT.getScalarType() == MVT::i64) {
3263     DST = LowerUREM64(Op, DAG);
3264   } else if (OVT.getScalarType() == MVT::i32) {
3265     DST = LowerUREM32(Op, DAG);
3266   } else if (OVT.getScalarType() == MVT::i16) {
3267     DST = LowerUREM16(Op, DAG);
3268   } else if (OVT.getScalarType() == MVT::i8) {
3269     DST = LowerUREM8(Op, DAG);
3270   } else {
3271     DST = SDValue(Op.getNode(), 0);
3272   }
3273   return DST;
3274 }
3275
3276 SDValue
3277 AMDILTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const
3278 {
3279   DebugLoc DL = Op.getDebugLoc();
3280   EVT OVT = Op.getValueType();
3281   SDValue DST;
3282   bool isVec = OVT.isVector();
3283   if (OVT.getScalarType() != MVT::i64)
3284   {
3285     DST = SDValue(Op.getNode(), 0);
3286   } else {
3287     assert(OVT.getScalarType() == MVT::i64 && "Only 64 bit mul should be lowered!");
3288     // TODO: This needs to be turned into a tablegen pattern
3289     SDValue LHS = Op.getOperand(0);
3290     SDValue RHS = Op.getOperand(1);
3291
3292     MVT INTTY = MVT::i32;
3293     if (OVT == MVT::v2i64) {
3294       INTTY = MVT::v2i32;
3295     }
3296     // mul64(h1, l1, h0, l0)
3297     SDValue LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO,
3298         DL,
3299         INTTY, LHS);
3300     SDValue LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI,
3301         DL,
3302         INTTY, LHS);
3303     SDValue RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO,
3304         DL,
3305         INTTY, RHS);
3306     SDValue RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI,
3307         DL,
3308         INTTY, RHS);
3309     // MULLO_UINT_1 r1, h0, l1
3310     SDValue RHILLO = DAG.getNode(AMDILISD::UMUL,
3311         DL,
3312         INTTY, RHSHI, LHSLO);
3313     // MULLO_UINT_1 r2, h1, l0
3314     SDValue RLOHHI = DAG.getNode(AMDILISD::UMUL,
3315         DL,
3316         INTTY, RHSLO, LHSHI);
3317     // ADD_INT hr, r1, r2
3318     SDValue ADDHI = DAG.getNode(ISD::ADD,
3319         DL,
3320         INTTY, RHILLO, RLOHHI);
3321     // MULHI_UINT_1 r3, l1, l0
3322     SDValue RLOLLO = DAG.getNode(ISD::MULHU,
3323         DL,
3324         INTTY, RHSLO, LHSLO);
3325     // ADD_INT hr, hr, r3
3326     SDValue HIGH = DAG.getNode(ISD::ADD,
3327         DL,
3328         INTTY, ADDHI, RLOLLO);
3329     // MULLO_UINT_1 l3, l1, l0
3330     SDValue LOW = DAG.getNode(AMDILISD::UMUL,
3331         DL,
3332         INTTY, LHSLO, RHSLO);
3333     DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE,
3334         DL,
3335         OVT, LOW, HIGH);
3336   }
3337   return DST;
3338 }
3339 SDValue
3340 AMDILTargetLowering::LowerBUILD_VECTOR( SDValue Op, SelectionDAG &DAG ) const
3341 {
3342   EVT VT = Op.getValueType();
3343   SDValue Nodes1;
3344   SDValue second;
3345   SDValue third;
3346   SDValue fourth;
3347   DebugLoc DL = Op.getDebugLoc();
3348   Nodes1 = DAG.getNode(AMDILISD::VBUILD,
3349       DL,
3350       VT, Op.getOperand(0));
3351 #if 0
3352   bool allEqual = true;
3353   for (unsigned x = 1, y = Op.getNumOperands(); x < y; ++x) {
3354     if (Op.getOperand(0) != Op.getOperand(x)) {
3355       allEqual = false;
3356       break;
3357     }
3358   }
3359   if (allEqual) {
3360     return Nodes1;
3361   }
3362 #endif
3363   switch(Op.getNumOperands()) {
3364     default:
3365     case 1:
3366       break;
3367     case 4:
3368       fourth = Op.getOperand(3);
3369       if (fourth.getOpcode() != ISD::UNDEF) {
3370         Nodes1 = DAG.getNode(
3371             ISD::INSERT_VECTOR_ELT,
3372             DL,
3373             Op.getValueType(),
3374             Nodes1,
3375             fourth,
3376             DAG.getConstant(7, MVT::i32));
3377       }
3378     case 3:
3379       third = Op.getOperand(2);
3380       if (third.getOpcode() != ISD::UNDEF) {
3381         Nodes1 = DAG.getNode(
3382             ISD::INSERT_VECTOR_ELT,
3383             DL,
3384             Op.getValueType(),
3385             Nodes1,
3386             third,
3387             DAG.getConstant(6, MVT::i32));
3388       }
3389     case 2:
3390       second = Op.getOperand(1);
3391       if (second.getOpcode() != ISD::UNDEF) {
3392         Nodes1 = DAG.getNode(
3393             ISD::INSERT_VECTOR_ELT,
3394             DL,
3395             Op.getValueType(),
3396             Nodes1,
3397             second,
3398             DAG.getConstant(5, MVT::i32));
3399       }
3400       break;
3401   };
3402   return Nodes1;
3403 }
3404
3405 SDValue
3406 AMDILTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
3407     SelectionDAG &DAG) const
3408 {
3409   DebugLoc DL = Op.getDebugLoc();
3410   EVT VT = Op.getValueType();
3411   const SDValue *ptr = NULL;
3412   const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(2));
3413   uint32_t swizzleNum = 0;
3414   SDValue DST;
3415   if (!VT.isVector()) {
3416     SDValue Res = Op.getOperand(0);
3417     return Res;
3418   }
3419
3420   if (Op.getOperand(1).getOpcode() != ISD::UNDEF) {
3421     ptr = &Op.getOperand(1);
3422   } else {
3423     ptr = &Op.getOperand(0);
3424   }
3425   if (CSDN) {
3426     swizzleNum = (uint32_t)CSDN->getZExtValue();
3427     uint32_t mask2 = 0x04030201 & ~(0xFF << (swizzleNum * 8));
3428     uint32_t mask3 = 0x01010101 & (0xFF << (swizzleNum * 8));
3429     DST = DAG.getNode(AMDILISD::VINSERT,
3430         DL,
3431         VT,
3432         Op.getOperand(0),
3433         *ptr,
3434         DAG.getTargetConstant(mask2, MVT::i32),
3435         DAG.getTargetConstant(mask3, MVT::i32));
3436   } else {
3437     uint32_t mask2 = 0x04030201 & ~(0xFF << (swizzleNum * 8));
3438     uint32_t mask3 = 0x01010101 & (0xFF << (swizzleNum * 8));
3439     SDValue res = DAG.getNode(AMDILISD::VINSERT,
3440         DL, VT, Op.getOperand(0), *ptr,
3441         DAG.getTargetConstant(mask2, MVT::i32),
3442         DAG.getTargetConstant(mask3, MVT::i32));
3443     for (uint32_t x = 1; x < VT.getVectorNumElements(); ++x) {
3444       mask2 = 0x04030201 & ~(0xFF << (x * 8));
3445       mask3 = 0x01010101 & (0xFF << (x * 8));
3446       SDValue t = DAG.getNode(AMDILISD::VINSERT,
3447           DL, VT, Op.getOperand(0), *ptr,
3448           DAG.getTargetConstant(mask2, MVT::i32),
3449           DAG.getTargetConstant(mask3, MVT::i32));
3450       SDValue c = DAG.getNode(AMDILISD::CMP, DL, ptr->getValueType(),
3451           DAG.getConstant(AMDILCC::IL_CC_I_EQ, MVT::i32),
3452           Op.getOperand(2), DAG.getConstant(x, MVT::i32));
3453       c = DAG.getNode(AMDILISD::VBUILD, DL, Op.getValueType(), c);
3454       res = DAG.getNode(AMDILISD::CMOVLOG, DL, VT, c, t, res);
3455     }
3456     DST = res;
3457   }
3458   return DST;
3459 }
3460
3461 SDValue
3462 AMDILTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
3463     SelectionDAG &DAG) const
3464 {
3465   EVT VT = Op.getValueType();
3466   const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(1));
3467   uint64_t swizzleNum = 0;
3468   DebugLoc DL = Op.getDebugLoc();
3469   SDValue Res;
3470   if (!Op.getOperand(0).getValueType().isVector()) {
3471     Res = Op.getOperand(0);
3472     return Res;
3473   }
3474   if (CSDN) {
3475     // Static vector extraction
3476     swizzleNum = CSDN->getZExtValue() + 1;
3477     Res = DAG.getNode(AMDILISD::VEXTRACT,
3478         DL, VT,
3479         Op.getOperand(0),
3480         DAG.getTargetConstant(swizzleNum, MVT::i32));
3481   } else {
3482     SDValue Op1 = Op.getOperand(1);
3483     uint32_t vecSize = 4;
3484     SDValue Op0 = Op.getOperand(0);
3485     SDValue res = DAG.getNode(AMDILISD::VEXTRACT,
3486         DL, VT, Op0,
3487         DAG.getTargetConstant(1, MVT::i32));
3488     if (Op0.getValueType().isVector()) {
3489       vecSize = Op0.getValueType().getVectorNumElements();
3490     }
3491     for (uint32_t x = 2; x <= vecSize; ++x) {
3492       SDValue t = DAG.getNode(AMDILISD::VEXTRACT,
3493           DL, VT, Op0,
3494           DAG.getTargetConstant(x, MVT::i32));
3495       SDValue c = DAG.getNode(AMDILISD::CMP,
3496           DL, Op1.getValueType(),
3497           DAG.getConstant(AMDILCC::IL_CC_I_EQ, MVT::i32),
3498           Op1, DAG.getConstant(x, MVT::i32));
3499       res = DAG.getNode(AMDILISD::CMOVLOG, DL,
3500           VT, c, t, res);
3501
3502     }
3503     Res = res;
3504   }
3505   return Res;
3506 }
3507
3508 SDValue
3509 AMDILTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
3510     SelectionDAG &DAG) const
3511 {
3512   uint32_t vecSize = Op.getValueType().getVectorNumElements();
3513   SDValue src = Op.getOperand(0);
3514   const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(1));
3515   uint64_t offset = 0;
3516   EVT vecType = Op.getValueType().getVectorElementType();
3517   DebugLoc DL = Op.getDebugLoc();
3518   SDValue Result;
3519   if (CSDN) {
3520     offset = CSDN->getZExtValue();
3521     Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
3522         DL,vecType, src, DAG.getConstant(offset, MVT::i32));
3523     Result = DAG.getNode(AMDILISD::VBUILD, DL,
3524         Op.getValueType(), Result);
3525     for (uint32_t x = 1; x < vecSize; ++x) {
3526       SDValue elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, vecType,
3527           src, DAG.getConstant(offset + x, MVT::i32));
3528       if (elt.getOpcode() != ISD::UNDEF) {
3529         Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
3530             Op.getValueType(), Result, elt,
3531             DAG.getConstant(x, MVT::i32));
3532       }
3533     }
3534   } else {
3535     SDValue idx = Op.getOperand(1);
3536     Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
3537         DL, vecType, src, idx);
3538     Result = DAG.getNode(AMDILISD::VBUILD, DL,
3539         Op.getValueType(), Result);
3540     for (uint32_t x = 1; x < vecSize; ++x) {
3541       idx = DAG.getNode(ISD::ADD, DL, vecType,
3542           idx, DAG.getConstant(1, MVT::i32));
3543       SDValue elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, vecType,
3544           src, idx);
3545       if (elt.getOpcode() != ISD::UNDEF) {
3546         Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
3547             Op.getValueType(), Result, elt, idx);
3548       }
3549     }
3550   }
3551   return Result;
3552 }
3553 SDValue
3554 AMDILTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
3555     SelectionDAG &DAG) const
3556 {
3557   SDValue Res = DAG.getNode(AMDILISD::VBUILD,
3558       Op.getDebugLoc(),
3559       Op.getValueType(),
3560       Op.getOperand(0));
3561   return Res;
3562 }
3563 SDValue
3564 AMDILTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const
3565 {
3566   SDValue Cond = Op.getOperand(0);
3567   SDValue LHS = Op.getOperand(1);
3568   SDValue RHS = Op.getOperand(2);
3569   DebugLoc DL = Op.getDebugLoc();
3570   Cond = getConversionNode(DAG, Cond, Op, true);
3571   Cond = DAG.getNode(AMDILISD::CMOVLOG,
3572       DL,
3573       Op.getValueType(), Cond, LHS, RHS);
3574   return Cond;
3575 }
3576 SDValue
3577 AMDILTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const
3578 {
3579   SDValue Cond;
3580   SDValue LHS = Op.getOperand(0);
3581   SDValue RHS = Op.getOperand(1);
3582   SDValue CC  = Op.getOperand(2);
3583   DebugLoc DL = Op.getDebugLoc();
3584   ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
3585   unsigned int AMDILCC = CondCCodeToCC(
3586       SetCCOpcode,
3587       LHS.getValueType().getSimpleVT().SimpleTy);
3588   assert((AMDILCC != AMDILCC::COND_ERROR) && "Invalid SetCC!");
3589   Cond = DAG.getNode(
3590       ISD::SELECT_CC,
3591       Op.getDebugLoc(),
3592       LHS.getValueType(),
3593       LHS, RHS,
3594       DAG.getConstant(-1, MVT::i32),
3595       DAG.getConstant(0, MVT::i32),
3596       CC);
3597   Cond = getConversionNode(DAG, Cond, Op, true);
3598   Cond = DAG.getNode(
3599       ISD::AND,
3600       DL,
3601       Cond.getValueType(),
3602       DAG.getConstant(1, Cond.getValueType()),
3603       Cond);
3604   return Cond;
3605 }
3606
3607 SDValue
3608 AMDILTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const
3609 {
3610   SDValue Data = Op.getOperand(0);
3611   VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
3612   DebugLoc DL = Op.getDebugLoc();
3613   EVT DVT = Data.getValueType();
3614   EVT BVT = BaseType->getVT();
3615   unsigned baseBits = BVT.getScalarType().getSizeInBits();
3616   unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;
3617   unsigned shiftBits = srcBits - baseBits;
3618   if (srcBits < 32) {
3619     // If the op is less than 32 bits, then it needs to extend to 32bits
3620     // so it can properly keep the upper bits valid.
3621     EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);
3622     Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);
3623     shiftBits = 32 - baseBits;
3624     DVT = IVT;
3625   }
3626   SDValue Shift = DAG.getConstant(shiftBits, DVT);
3627   // Shift left by 'Shift' bits.
3628   Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);
3629   // Signed shift Right by 'Shift' bits.
3630   Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);
3631   if (srcBits < 32) {
3632     // Once the sign extension is done, the op needs to be converted to
3633     // its original type.
3634     Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());
3635   }
3636   return Data;
3637 }
3638 EVT
3639 AMDILTargetLowering::genIntType(uint32_t size, uint32_t numEle) const
3640 {
3641   int iSize = (size * numEle);
3642   int vEle = (iSize >> ((size == 64) ? 6 : 5));
3643   if (!vEle) {
3644     vEle = 1;
3645   }
3646   if (size == 64) {
3647     if (vEle == 1) {
3648       return EVT(MVT::i64);
3649     } else {
3650       return EVT(MVT::getVectorVT(MVT::i64, vEle));
3651     }
3652   } else {
3653     if (vEle == 1) {
3654       return EVT(MVT::i32);
3655     } else {
3656       return EVT(MVT::getVectorVT(MVT::i32, vEle));
3657     }
3658   }
3659 }
3660
3661 SDValue
3662 AMDILTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const
3663 {
3664   SDValue Src = Op.getOperand(0);
3665   SDValue Dst = Op;
3666   SDValue Res;
3667   DebugLoc DL = Op.getDebugLoc();
3668   EVT SrcVT = Src.getValueType();
3669   EVT DstVT = Dst.getValueType();
3670   // Lets bitcast the floating point types to an
3671   // equivalent integer type before converting to vectors.
3672   if (SrcVT.getScalarType().isFloatingPoint()) {
3673     Src = DAG.getNode(AMDILISD::BITCONV, DL, genIntType(
3674           SrcVT.getScalarType().getSimpleVT().getSizeInBits(),
3675           SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1),
3676         Src);
3677     SrcVT = Src.getValueType();
3678   }
3679   uint32_t ScalarSrcSize = SrcVT.getScalarType()
3680     .getSimpleVT().getSizeInBits();
3681   uint32_t ScalarDstSize = DstVT.getScalarType()
3682     .getSimpleVT().getSizeInBits();
3683   uint32_t SrcNumEle = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
3684   uint32_t DstNumEle = DstVT.isVector() ? DstVT.getVectorNumElements() : 1;
3685   bool isVec = SrcVT.isVector();
3686   if (DstVT.getScalarType().isInteger() &&
3687       (SrcVT.getScalarType().isInteger()
3688        || SrcVT.getScalarType().isFloatingPoint())) {
3689     if ((ScalarDstSize == 64 && SrcNumEle == 4 && ScalarSrcSize == 16)
3690         || (ScalarSrcSize == 64
3691           && DstNumEle == 4
3692           && ScalarDstSize == 16)) {
3693       // This is the problematic case when bitcasting i64 <-> <4 x i16>
3694       // This approach is a little different as we cannot generate a
3695       // <4 x i64> vector
3696       // as that is illegal in our backend and we are already past
3697       // the DAG legalizer.
3698       // So, in this case, we will do the following conversion.
3699       // Case 1:
3700       // %dst = <4 x i16> %src bitconvert i64 ==>
3701       // %tmp = <4 x i16> %src convert <4 x i32>
3702       // %tmp = <4 x i32> %tmp and 0xFFFF
3703       // %tmp = <4 x i32> %tmp shift_left <0, 16, 0, 16>
3704       // %tmp = <4 x i32> %tmp or %tmp.xz %tmp.yw
3705       // %dst = <2 x i32> %tmp bitcast i64
3706       // case 2:
3707       // %dst = i64 %src bitconvert <4 x i16> ==>
3708       // %tmp = i64 %src bitcast <2 x i32>
3709       // %tmp = <4 x i32> %tmp vinsert %tmp.xxyy
3710       // %tmp = <4 x i32> %tmp shift_right <0, 16, 0, 16>
3711       // %tmp = <4 x i32> %tmp and 0xFFFF
3712       // %dst = <4 x i16> %tmp bitcast <4 x i32>
3713       SDValue mask = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v4i32,
3714           DAG.getConstant(0xFFFF, MVT::i32));
3715       SDValue const16 = DAG.getConstant(16, MVT::i32);
3716       if (ScalarDstSize == 64) {
3717         // case 1
3718         Op = DAG.getSExtOrTrunc(Src, DL, MVT::v4i32);
3719         Op = DAG.getNode(ISD::AND, DL, Op.getValueType(), Op, mask);
3720         SDValue x = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
3721             Op, DAG.getConstant(0, MVT::i32));
3722         SDValue y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
3723             Op, DAG.getConstant(1, MVT::i32));
3724         y = DAG.getNode(ISD::SHL, DL, MVT::i32, y, const16);
3725         SDValue z = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
3726             Op, DAG.getConstant(2, MVT::i32));
3727         SDValue w = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
3728             Op, DAG.getConstant(3, MVT::i32));
3729         w = DAG.getNode(ISD::SHL, DL, MVT::i32, w, const16);
3730         x = DAG.getNode(ISD::OR, DL, MVT::i32, x, y);
3731         y = DAG.getNode(ISD::OR, DL, MVT::i32, z, w);
3732         Res = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, MVT::i64, x, y);
3733         return Res;
3734       } else {
3735         // case 2
3736         SDValue lo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, MVT::i32, Src);
3737         SDValue lor16
3738           = DAG.getNode(ISD::SRL, DL, MVT::i32, lo, const16);
3739         SDValue hi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, MVT::i32, Src);
3740         SDValue hir16
3741           = DAG.getNode(ISD::SRL, DL, MVT::i32, hi, const16);
3742         SDValue resVec = DAG.getNode(AMDILISD::VBUILD, DL,
3743             MVT::v4i32, lo);
3744         SDValue idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL,
3745             getPointerTy(), DAG.getConstant(1, MVT::i32));
3746         resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32,
3747             resVec, lor16, idxVal);
3748         idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL,
3749             getPointerTy(), DAG.getConstant(2, MVT::i32));
3750         resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32,
3751             resVec, hi, idxVal);
3752         idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL,
3753             getPointerTy(), DAG.getConstant(3, MVT::i32));
3754         resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32,
3755             resVec, hir16, idxVal);
3756         resVec = DAG.getNode(ISD::AND, DL, MVT::v4i32, resVec, mask);
3757         Res = DAG.getSExtOrTrunc(resVec, DL, MVT::v4i16);
3758         return Res;
3759       }
3760     } else {
3761       // There are four cases we need to worry about for bitcasts
3762       // where the size of all
3763       // source, intermediates and result is <= 128 bits, unlike
3764       // the above case
3765       // 1) Sub32bit bitcast 32bitAlign
3766       // %dst = <4 x i8> bitcast i32
3767       // (also <[2|4] x i16> to <[2|4] x i32>)
3768       // 2) 32bitAlign bitcast Sub32bit
3769       // %dst = i32 bitcast <4 x i8>
3770       // 3) Sub32bit bitcast LargerSub32bit
3771       // %dst = <2 x i8> bitcast i16
3772       // (also <4 x i8> to <2 x i16>)
3773       // 4) Sub32bit bitcast SmallerSub32bit
3774       // %dst = i16 bitcast <2 x i8>
3775       // (also <2 x i16> to <4 x i8>)
3776       // This also only handles types that are powers of two
3777       if ((ScalarDstSize & (ScalarDstSize - 1))
3778           || (ScalarSrcSize & (ScalarSrcSize - 1))) {
3779       } else if (ScalarDstSize >= 32 && ScalarSrcSize < 32) {
3780         // case 1:
3781         EVT IntTy = genIntType(ScalarDstSize, SrcNumEle);
3782 #if 0 // TODO: LLVM does not like this for some reason, cannot SignExt vectors
3783         SDValue res = DAG.getSExtOrTrunc(Src, DL, IntTy);
3784 #else
3785         SDValue res = DAG.getNode(AMDILISD::VBUILD, DL, IntTy,
3786             DAG.getUNDEF(IntTy.getScalarType()));
3787         for (uint32_t x = 0; x < SrcNumEle; ++x) {
3788           SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
3789               getPointerTy(), DAG.getConstant(x, MVT::i32));
3790           SDValue temp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
3791               SrcVT.getScalarType(), Src,
3792               DAG.getConstant(x, MVT::i32));
3793           temp = DAG.getSExtOrTrunc(temp, DL, IntTy.getScalarType());
3794           res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntTy,
3795               res, temp, idx);
3796         }
3797 #endif
3798         SDValue mask = DAG.getNode(AMDILISD::VBUILD, DL, IntTy,
3799             DAG.getConstant((1 << ScalarSrcSize) - 1, MVT::i32));
3800         SDValue *newEle = new SDValue[SrcNumEle];
3801         res = DAG.getNode(ISD::AND, DL, IntTy, res, mask);
3802         for (uint32_t x = 0; x < SrcNumEle; ++x) {
3803           newEle[x] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
3804               IntTy.getScalarType(), res,
3805               DAG.getConstant(x, MVT::i32));
3806         }
3807         uint32_t Ratio = SrcNumEle / DstNumEle;
3808         for (uint32_t x = 0; x < SrcNumEle; ++x) {
3809           if (x % Ratio) {
3810             newEle[x] = DAG.getNode(ISD::SHL, DL,
3811                 IntTy.getScalarType(), newEle[x],
3812                 DAG.getConstant(ScalarSrcSize * (x % Ratio),
3813                   MVT::i32));
3814           }
3815         }
3816         for (uint32_t x = 0; x < SrcNumEle; x += 2) {
3817           newEle[x] = DAG.getNode(ISD::OR, DL,
3818               IntTy.getScalarType(), newEle[x], newEle[x + 1]);
3819         }
3820         if (ScalarSrcSize == 8) {
3821           for (uint32_t x = 0; x < SrcNumEle; x += 4) {
3822             newEle[x] = DAG.getNode(ISD::OR, DL,
3823                 IntTy.getScalarType(), newEle[x], newEle[x + 2]);
3824           }
3825           if (DstNumEle == 1) {
3826             Dst = newEle[0];
3827           } else {
3828             Dst = DAG.getNode(AMDILISD::VBUILD, DL, DstVT,
3829                 newEle[0]);
3830             for (uint32_t x = 1; x < DstNumEle; ++x) {
3831               SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
3832                   getPointerTy(), DAG.getConstant(x, MVT::i32));
3833               Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
3834                   DstVT, Dst, newEle[x * 4], idx);
3835             }
3836           }
3837         } else {
3838           if (DstNumEle == 1) {
3839             Dst = newEle[0];
3840           } else {
3841             Dst = DAG.getNode(AMDILISD::VBUILD, DL, DstVT,
3842                 newEle[0]);
3843             for (uint32_t x = 1; x < DstNumEle; ++x) {
3844               SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
3845                   getPointerTy(), DAG.getConstant(x, MVT::i32));
3846               Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
3847                   DstVT, Dst, newEle[x * 2], idx);
3848             }
3849           }
3850         }
3851         delete [] newEle;
3852         return Dst;
3853       } else if (ScalarDstSize < 32 && ScalarSrcSize >= 32) {
3854         // case 2:
3855         EVT IntTy = genIntType(ScalarSrcSize, DstNumEle);
3856         SDValue vec = DAG.getNode(AMDILISD::VBUILD, DL, IntTy,
3857             DAG.getUNDEF(IntTy.getScalarType()));
3858         uint32_t mult = (ScalarDstSize == 8) ? 4 : 2;
3859         for (uint32_t x = 0; x < SrcNumEle; ++x) {
3860           for (uint32_t y = 0; y < mult; ++y) {
3861             SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
3862                 getPointerTy(),
3863                 DAG.getConstant(x * mult + y, MVT::i32));
3864             SDValue t;
3865             if (SrcNumEle > 1) {
3866               t = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
3867                   DL, SrcVT.getScalarType(), Src,
3868                   DAG.getConstant(x, MVT::i32));
3869             } else {
3870               t = Src;
3871             }
3872             if (y != 0) {
3873               t = DAG.getNode(ISD::SRL, DL, t.getValueType(),
3874                   t, DAG.getConstant(y * ScalarDstSize,
3875                     MVT::i32));
3876             }
3877             vec = DAG.getNode(ISD::INSERT_VECTOR_ELT,
3878                 DL, IntTy, vec, t, idx);
3879           }
3880         }
3881         Dst = DAG.getSExtOrTrunc(vec, DL, DstVT);
3882         return Dst;
3883       } else if (ScalarDstSize == 16 && ScalarSrcSize == 8) {
3884         // case 3:
3885         SDValue *numEle = new SDValue[SrcNumEle];
3886         for (uint32_t x = 0; x < SrcNumEle; ++x) {
3887           numEle[x] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
3888               MVT::i8, Src, DAG.getConstant(x, MVT::i32));
3889           numEle[x] = DAG.getSExtOrTrunc(numEle[x], DL, MVT::i16);
3890           numEle[x] = DAG.getNode(ISD::AND, DL, MVT::i16, numEle[x],
3891               DAG.getConstant(0xFF, MVT::i16));
3892         }
3893         for (uint32_t x = 1; x < SrcNumEle; x += 2) {
3894           numEle[x] = DAG.getNode(ISD::SHL, DL, MVT::i16, numEle[x],
3895               DAG.getConstant(8, MVT::i16));
3896           numEle[x - 1] = DAG.getNode(ISD::OR, DL, MVT::i16,
3897               numEle[x-1], numEle[x]);
3898         }
3899         if (DstNumEle > 1) {
3900           // If we are not a scalar i16, the only other case is a
3901           // v2i16 since we can't have v8i8 at this point, v4i16
3902           // cannot be generated
3903           Dst = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v2i16,
3904               numEle[0]);
3905           SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
3906               getPointerTy(), DAG.getConstant(1, MVT::i32));
3907           Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i16,
3908               Dst, numEle[2], idx);
3909         } else {
3910           Dst = numEle[0];
3911         }
3912         delete [] numEle;
3913         return Dst;
3914       } else if (ScalarDstSize == 8 && ScalarSrcSize == 16) {
3915         // case 4:
3916         SDValue *numEle = new SDValue[DstNumEle];
3917         for (uint32_t x = 0; x < SrcNumEle; ++x) {
3918           numEle[x * 2] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
3919               MVT::i16, Src, DAG.getConstant(x, MVT::i32));
3920           numEle[x * 2 + 1] = DAG.getNode(ISD::SRL, DL, MVT::i16,
3921               numEle[x * 2], DAG.getConstant(8, MVT::i16));
3922         }
3923         MVT ty = (SrcNumEle == 1) ? MVT::v2i16 : MVT::v4i16;
3924         Dst = DAG.getNode(AMDILISD::VBUILD, DL, ty, numEle[0]);
3925         for (uint32_t x = 1; x < DstNumEle; ++x) {
3926           SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
3927               getPointerTy(), DAG.getConstant(x, MVT::i32));
3928           Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ty,
3929               Dst, numEle[x], idx);
3930         }
3931         delete [] numEle;
3932         ty = (SrcNumEle == 1) ? MVT::v2i8 : MVT::v4i8;
3933         Res = DAG.getSExtOrTrunc(Dst, DL, ty);
3934         return Res;
3935       }
3936     }
3937   }
3938   Res = DAG.getNode(AMDILISD::BITCONV,
3939       Dst.getDebugLoc(),
3940       Dst.getValueType(), Src);
3941   return Res;
3942 }
3943
3944 SDValue
3945 AMDILTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
3946     SelectionDAG &DAG) const
3947 {
3948   SDValue Chain = Op.getOperand(0);
3949   SDValue Size = Op.getOperand(1);
3950   unsigned int SPReg = AMDIL::SP;
3951   DebugLoc DL = Op.getDebugLoc();
3952   SDValue SP = DAG.getCopyFromReg(Chain,
3953       DL,
3954       SPReg, MVT::i32);
3955   SDValue NewSP = DAG.getNode(ISD::ADD,
3956       DL,
3957       MVT::i32, SP, Size);
3958   Chain = DAG.getCopyToReg(SP.getValue(1),
3959       DL,
3960       SPReg, NewSP);
3961   SDValue Ops[2] = {NewSP, Chain};
3962   Chain = DAG.getMergeValues(Ops, 2 ,DL);
3963   return Chain;
3964 }
3965 SDValue
3966 AMDILTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
3967 {
3968   SDValue Chain = Op.getOperand(0);
3969   SDValue Cond  = Op.getOperand(1);
3970   SDValue Jump  = Op.getOperand(2);
3971   SDValue Result;
3972   Result = DAG.getNode(
3973       AMDILISD::BRANCH_COND,
3974       Op.getDebugLoc(),
3975       Op.getValueType(),
3976       Chain, Jump, Cond);
3977   return Result;
3978 }
3979
3980 SDValue
3981 AMDILTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const
3982 {
3983   SDValue Chain = Op.getOperand(0);
3984   SDValue CC = Op.getOperand(1);
3985   SDValue LHS   = Op.getOperand(2);
3986   SDValue RHS   = Op.getOperand(3);
3987   SDValue JumpT  = Op.getOperand(4);
3988   SDValue CmpValue;
3989   SDValue Result;
3990   CmpValue = DAG.getNode(
3991       ISD::SELECT_CC,
3992       Op.getDebugLoc(),
3993       LHS.getValueType(),
3994       LHS, RHS,
3995       DAG.getConstant(-1, MVT::i32),
3996       DAG.getConstant(0, MVT::i32),
3997       CC);
3998   Result = DAG.getNode(
3999       AMDILISD::BRANCH_COND,
4000       CmpValue.getDebugLoc(),
4001       MVT::Other, Chain,
4002       JumpT, CmpValue);
4003   return Result;
4004 }
4005
4006 SDValue
4007 AMDILTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const
4008 {
4009   SDValue Result = DAG.getNode(
4010       AMDILISD::DP_TO_FP,
4011       Op.getDebugLoc(),
4012       Op.getValueType(),
4013       Op.getOperand(0),
4014       Op.getOperand(1));
4015   return Result;
4016 }
4017
4018 SDValue
4019 AMDILTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const
4020 {
4021   SDValue Result = DAG.getNode(
4022       AMDILISD::VCONCAT,
4023       Op.getDebugLoc(),
4024       Op.getValueType(),
4025       Op.getOperand(0),
4026       Op.getOperand(1));
4027   return Result;
4028 }
4029 // LowerRET - Lower an ISD::RET node.
4030 SDValue
4031 AMDILTargetLowering::LowerReturn(SDValue Chain,
4032     CallingConv::ID CallConv, bool isVarArg,
4033     const SmallVectorImpl<ISD::OutputArg> &Outs,
4034     const SmallVectorImpl<SDValue> &OutVals,
4035     DebugLoc dl, SelectionDAG &DAG)
4036 const
4037 {
4038   //MachineFunction& MF = DAG.getMachineFunction();
4039   // CCValAssign - represent the assignment of the return value
4040   // to a location
4041   SmallVector<CCValAssign, 16> RVLocs;
4042
4043   // CCState - Info about the registers and stack slot
4044   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
4045                  getTargetMachine(), RVLocs, *DAG.getContext());
4046
4047   // Analyze return values of ISD::RET
4048   CCInfo.AnalyzeReturn(Outs, RetCC_AMDIL32);
4049   // If this is the first return lowered for this function, add
4050   // the regs to the liveout set for the function
4051   MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
4052   for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) {
4053     if (RVLocs[i].isRegLoc() && !MRI.isLiveOut(RVLocs[i].getLocReg())) {
4054       MRI.addLiveOut(RVLocs[i].getLocReg());
4055     }
4056   }
4057   // FIXME: implement this when tail call is implemented
4058   // Chain = GetPossiblePreceedingTailCall(Chain, AMDILISD::TAILCALL);
4059   // both x86 and ppc implement this in ISelLowering
4060
4061   // Regular return here
4062   SDValue Flag;
4063   SmallVector<SDValue, 6> RetOps;
4064   RetOps.push_back(Chain);
4065   RetOps.push_back(DAG.getConstant(0/*getBytesToPopOnReturn()*/, MVT::i32));
4066   for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) {
4067     CCValAssign &VA = RVLocs[i];
4068     SDValue ValToCopy = OutVals[i];
4069     assert(VA.isRegLoc() && "Can only return in registers!");
4070     // ISD::Ret => ret chain, (regnum1, val1), ...
4071     // So i * 2 + 1 index only the regnums
4072     Chain = DAG.getCopyToReg(Chain,
4073         dl,
4074         VA.getLocReg(),
4075         ValToCopy,
4076         Flag);
4077     // guarantee that all emitted copies are stuck together
4078     // avoiding something bad
4079     Flag = Chain.getValue(1);
4080   }
4081   /*if (MF.getFunction()->hasStructRetAttr()) {
4082     assert(0 && "Struct returns are not yet implemented!");
4083   // Both MIPS and X86 have this
4084   }*/
4085   RetOps[0] = Chain;
4086   if (Flag.getNode())
4087     RetOps.push_back(Flag);
4088
4089   Flag = DAG.getNode(AMDILISD::RET_FLAG,
4090       dl,
4091       MVT::Other, &RetOps[0], RetOps.size());
4092   return Flag;
4093 }
4094
4095 unsigned int
4096 AMDILTargetLowering::getFunctionAlignment(const Function *) const
4097 {
4098   return 0;
4099 }
4100
4101 void
4102 AMDILTargetLowering::setPrivateData(MachineBasicBlock *BB,
4103     MachineBasicBlock::iterator &BBI,
4104     DebugLoc *DL, const TargetInstrInfo *TII) const
4105 {
4106   mBB = BB;
4107   mBBI = BBI;
4108   mDL = DL;
4109   mTII = TII;
4110 }
4111 uint32_t
4112 AMDILTargetLowering::genVReg(uint32_t regType) const
4113 {
4114   return mBB->getParent()->getRegInfo().createVirtualRegister(
4115       getTargetMachine().getRegisterInfo()->getRegClass(regType));
4116 }
4117
4118 MachineInstrBuilder
4119 AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst) const
4120 {
4121   return BuildMI(*mBB, mBBI, *mDL, mTII->get(opcode), dst);
4122 }
4123
4124 MachineInstrBuilder
4125 AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst,
4126     uint32_t src1) const
4127 {
4128   return generateMachineInst(opcode, dst).addReg(src1);
4129 }
4130
4131 MachineInstrBuilder
4132 AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst,
4133     uint32_t src1, uint32_t src2) const
4134 {
4135   return generateMachineInst(opcode, dst, src1).addReg(src2);
4136 }
4137
4138 MachineInstrBuilder
4139 AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst,
4140     uint32_t src1, uint32_t src2, uint32_t src3) const
4141 {
4142   return generateMachineInst(opcode, dst, src1, src2).addReg(src3);
4143 }
4144
4145
4146 SDValue
4147 AMDILTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const
4148 {
4149   DebugLoc DL = Op.getDebugLoc();
4150   EVT OVT = Op.getValueType();
4151   SDValue LHS = Op.getOperand(0);
4152   SDValue RHS = Op.getOperand(1);
4153   MVT INTTY;
4154   MVT FLTTY;
4155   if (!OVT.isVector()) {
4156     INTTY = MVT::i32;
4157     FLTTY = MVT::f32;
4158   } else if (OVT.getVectorNumElements() == 2) {
4159     INTTY = MVT::v2i32;
4160     FLTTY = MVT::v2f32;
4161   } else if (OVT.getVectorNumElements() == 4) {
4162     INTTY = MVT::v4i32;
4163     FLTTY = MVT::v4f32;
4164   }
4165   unsigned bitsize = OVT.getScalarType().getSizeInBits();
4166   // char|short jq = ia ^ ib;
4167   SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
4168
4169   // jq = jq >> (bitsize - 2)
4170   jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
4171
4172   // jq = jq | 0x1
4173   jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
4174
4175   // jq = (int)jq
4176   jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
4177
4178   // int ia = (int)LHS;
4179   SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
4180
4181   // int ib, (int)RHS;
4182   SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
4183
4184   // float fa = (float)ia;
4185   SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
4186
4187   // float fb = (float)ib;
4188   SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
4189
4190   // float fq = native_divide(fa, fb);
4191   SDValue fq = DAG.getNode(AMDILISD::DIV_INF, DL, FLTTY, fa, fb);
4192
4193   // fq = trunc(fq);
4194   fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
4195
4196   // float fqneg = -fq;
4197   SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
4198
4199   // float fr = mad(fqneg, fb, fa);
4200   SDValue fr = DAG.getNode(AMDILISD::MAD, DL, FLTTY, fqneg, fb, fa);
4201
4202   // int iq = (int)fq;
4203   SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
4204
4205   // fr = fabs(fr);
4206   fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
4207
4208   // fb = fabs(fb);
4209   fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
4210
4211   // int cv = fr >= fb;
4212   SDValue cv;
4213   if (INTTY == MVT::i32) {
4214     cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
4215   } else {
4216     cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
4217   }
4218   // jq = (cv ? jq : 0);
4219   jq = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, cv, jq,
4220       DAG.getConstant(0, OVT));
4221   // dst = iq + jq;
4222   iq = DAG.getSExtOrTrunc(iq, DL, OVT);
4223   iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
4224   return iq;
4225 }
4226
4227 SDValue
4228 AMDILTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const
4229 {
4230   DebugLoc DL = Op.getDebugLoc();
4231   EVT OVT = Op.getValueType();
4232   SDValue LHS = Op.getOperand(0);
4233   SDValue RHS = Op.getOperand(1);
4234   // The LowerSDIV32 function generates equivalent to the following IL.
4235   // mov r0, LHS
4236   // mov r1, RHS
4237   // ilt r10, r0, 0
4238   // ilt r11, r1, 0
4239   // iadd r0, r0, r10
4240   // iadd r1, r1, r11
4241   // ixor r0, r0, r10
4242   // ixor r1, r1, r11
4243   // udiv r0, r0, r1
4244   // ixor r10, r10, r11
4245   // iadd r0, r0, r10
4246   // ixor DST, r0, r10
4247
4248   // mov r0, LHS
4249   SDValue r0 = LHS;
4250
4251   // mov r1, RHS
4252   SDValue r1 = RHS;
4253
4254   // ilt r10, r0, 0
4255   SDValue r10 = DAG.getSelectCC(DL,
4256       r0, DAG.getConstant(0, OVT),
4257       DAG.getConstant(-1, MVT::i32),
4258       DAG.getConstant(0, MVT::i32),
4259       ISD::SETLT);
4260
4261   // ilt r11, r1, 0
4262   SDValue r11 = DAG.getSelectCC(DL,
4263       r1, DAG.getConstant(0, OVT),
4264       DAG.getConstant(-1, MVT::i32),
4265       DAG.getConstant(0, MVT::i32),
4266       ISD::SETLT);
4267
4268   // iadd r0, r0, r10
4269   r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
4270
4271   // iadd r1, r1, r11
4272   r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
4273
4274   // ixor r0, r0, r10
4275   r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
4276
4277   // ixor r1, r1, r11
4278   r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
4279
4280   // udiv r0, r0, r1
4281   r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
4282
4283   // ixor r10, r10, r11
4284   r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
4285
4286   // iadd r0, r0, r10
4287   r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
4288
4289   // ixor DST, r0, r10
4290   SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
4291   return DST;
4292 }
4293
4294 SDValue
4295 AMDILTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const
4296 {
4297   return SDValue(Op.getNode(), 0);
4298 }
4299
4300 SDValue
4301 AMDILTargetLowering::LowerUDIV24(SDValue Op, SelectionDAG &DAG) const
4302 {
4303   DebugLoc DL = Op.getDebugLoc();
4304   EVT OVT = Op.getValueType();
4305   SDValue LHS = Op.getOperand(0);
4306   SDValue RHS = Op.getOperand(1);
4307   MVT INTTY;
4308   MVT FLTTY;
4309   if (!OVT.isVector()) {
4310     INTTY = MVT::i32;
4311     FLTTY = MVT::f32;
4312   } else if (OVT.getVectorNumElements() == 2) {
4313     INTTY = MVT::v2i32;
4314     FLTTY = MVT::v2f32;
4315   } else if (OVT.getVectorNumElements() == 4) {
4316     INTTY = MVT::v4i32;
4317     FLTTY = MVT::v4f32;
4318   }
4319
4320   // The LowerUDIV24 function implements the following CL.
4321   // int ia = (int)LHS
4322   // float fa = (float)ia
4323   // int ib = (int)RHS
4324   // float fb = (float)ib
4325   // float fq = native_divide(fa, fb)
4326   // fq = trunc(fq)
4327   // float t = mad(fq, fb, fb)
4328   // int iq = (int)fq - (t <= fa)
4329   // return (type)iq
4330
4331   // int ia = (int)LHS
4332   SDValue ia = DAG.getZExtOrTrunc(LHS, DL, INTTY);
4333
4334   // float fa = (float)ia
4335   SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
4336
4337   // int ib = (int)RHS
4338   SDValue ib = DAG.getZExtOrTrunc(RHS, DL, INTTY);
4339
4340   // float fb = (float)ib
4341   SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
4342
4343   // float fq = native_divide(fa, fb)
4344   SDValue fq = DAG.getNode(AMDILISD::DIV_INF, DL, FLTTY, fa, fb);
4345
4346   // fq = trunc(fq)
4347   fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
4348
4349   // float t = mad(fq, fb, fb)
4350   SDValue t = DAG.getNode(AMDILISD::MAD, DL, FLTTY, fq, fb, fb);
4351
4352   // int iq = (int)fq - (t <= fa) // This is sub and not add because GPU returns 0, -1
4353   SDValue iq;
4354   fq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
4355   if (INTTY == MVT::i32) {
4356     iq = DAG.getSetCC(DL, INTTY, t, fa, ISD::SETOLE);
4357   } else {
4358     iq = DAG.getSetCC(DL, INTTY, t, fa, ISD::SETOLE);
4359   }
4360   iq = DAG.getNode(ISD::ADD, DL, INTTY, fq, iq);
4361
4362
4363   // return (type)iq
4364   iq = DAG.getZExtOrTrunc(iq, DL, OVT);
4365   return iq;
4366
4367 }
4368
4369 SDValue
4370 AMDILTargetLowering::LowerUDIV32(SDValue Op, SelectionDAG &DAG) const
4371 {
4372   return SDValue(Op.getNode(), 0);
4373 }
4374
4375 SDValue
4376 AMDILTargetLowering::LowerUDIV64(SDValue Op, SelectionDAG &DAG) const
4377 {
4378   return SDValue(Op.getNode(), 0);
4379 }
4380 SDValue
4381 AMDILTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const
4382 {
4383   DebugLoc DL = Op.getDebugLoc();
4384   EVT OVT = Op.getValueType();
4385   MVT INTTY = MVT::i32;
4386   if (OVT == MVT::v2i8) {
4387     INTTY = MVT::v2i32;
4388   } else if (OVT == MVT::v4i8) {
4389     INTTY = MVT::v4i32;
4390   }
4391   SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
4392   SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
4393   LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
4394   LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
4395   return LHS;
4396 }
4397
4398 SDValue
4399 AMDILTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const
4400 {
4401   DebugLoc DL = Op.getDebugLoc();
4402   EVT OVT = Op.getValueType();
4403   MVT INTTY = MVT::i32;
4404   if (OVT == MVT::v2i16) {
4405     INTTY = MVT::v2i32;
4406   } else if (OVT == MVT::v4i16) {
4407     INTTY = MVT::v4i32;
4408   }
4409   SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
4410   SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
4411   LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
4412   LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
4413   return LHS;
4414 }
4415
4416 SDValue
4417 AMDILTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const
4418 {
4419   DebugLoc DL = Op.getDebugLoc();
4420   EVT OVT = Op.getValueType();
4421   SDValue LHS = Op.getOperand(0);
4422   SDValue RHS = Op.getOperand(1);
4423   // The LowerSREM32 function generates equivalent to the following IL.
4424   // mov r0, LHS
4425   // mov r1, RHS
4426   // ilt r10, r0, 0
4427   // ilt r11, r1, 0
4428   // iadd r0, r0, r10
4429   // iadd r1, r1, r11
4430   // ixor r0, r0, r10
4431   // ixor r1, r1, r11
4432   // udiv r20, r0, r1
4433   // umul r20, r20, r1
4434   // sub r0, r0, r20
4435   // iadd r0, r0, r10
4436   // ixor DST, r0, r10
4437
4438   // mov r0, LHS
4439   SDValue r0 = LHS;
4440
4441   // mov r1, RHS
4442   SDValue r1 = RHS;
4443
4444   // ilt r10, r0, 0
4445   SDValue r10 = DAG.getNode(AMDILISD::CMP, DL, OVT,
4446       DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
4447       r0, DAG.getConstant(0, OVT));
4448
4449   // ilt r11, r1, 0
4450   SDValue r11 = DAG.getNode(AMDILISD::CMP, DL, OVT,
4451       DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
4452       r1, DAG.getConstant(0, OVT));
4453
4454   // iadd r0, r0, r10
4455   r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
4456
4457   // iadd r1, r1, r11
4458   r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
4459
4460   // ixor r0, r0, r10
4461   r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
4462
4463   // ixor r1, r1, r11
4464   r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
4465
4466   // udiv r20, r0, r1
4467   SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
4468
4469   // umul r20, r20, r1
4470   r20 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r20, r1);
4471
4472   // sub r0, r0, r20
4473   r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
4474
4475   // iadd r0, r0, r10
4476   r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
4477
4478   // ixor DST, r0, r10
4479   SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
4480   return DST;
4481 }
4482
4483 SDValue
4484 AMDILTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const
4485 {
4486   return SDValue(Op.getNode(), 0);
4487 }
4488
4489 SDValue
4490 AMDILTargetLowering::LowerUREM8(SDValue Op, SelectionDAG &DAG) const
4491 {
4492   DebugLoc DL = Op.getDebugLoc();
4493   EVT OVT = Op.getValueType();
4494   MVT INTTY = MVT::i32;
4495   if (OVT == MVT::v2i8) {
4496     INTTY = MVT::v2i32;
4497   } else if (OVT == MVT::v4i8) {
4498     INTTY = MVT::v4i32;
4499   }
4500   SDValue LHS = Op.getOperand(0);
4501   SDValue RHS = Op.getOperand(1);
4502   // The LowerUREM8 function generates equivalent to the following IL.
4503   // mov r0, as_u32(LHS)
4504   // mov r1, as_u32(RHS)
4505   // and r10, r0, 0xFF
4506   // and r11, r1, 0xFF
4507   // cmov_logical r3, r11, r11, 0x1
4508   // udiv r3, r10, r3
4509   // cmov_logical r3, r11, r3, 0
4510   // umul r3, r3, r11
4511   // sub r3, r10, r3
4512   // and as_u8(DST), r3, 0xFF
4513
4514   // mov r0, as_u32(LHS)
4515   SDValue r0 = DAG.getSExtOrTrunc(LHS, DL, INTTY);
4516
4517   // mov r1, as_u32(RHS)
4518   SDValue r1 = DAG.getSExtOrTrunc(RHS, DL, INTTY);
4519
4520   // and r10, r0, 0xFF
4521   SDValue r10 = DAG.getNode(ISD::AND, DL, INTTY, r0,
4522       DAG.getConstant(0xFF, INTTY));
4523
4524   // and r11, r1, 0xFF
4525   SDValue r11 = DAG.getNode(ISD::AND, DL, INTTY, r1,
4526       DAG.getConstant(0xFF, INTTY));
4527
4528   // cmov_logical r3, r11, r11, 0x1
4529   SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, r11, r11,
4530       DAG.getConstant(0x01, INTTY));
4531
4532   // udiv r3, r10, r3
4533   r3 = DAG.getNode(ISD::UREM, DL, INTTY, r10, r3);
4534
4535   // cmov_logical r3, r11, r3, 0
4536   r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, r11, r3,
4537       DAG.getConstant(0, INTTY));
4538
4539   // umul r3, r3, r11
4540   r3 = DAG.getNode(AMDILISD::UMUL, DL, INTTY, r3, r11);
4541
4542   // sub r3, r10, r3
4543   r3 = DAG.getNode(ISD::SUB, DL, INTTY, r10, r3);
4544
4545   // and as_u8(DST), r3, 0xFF
4546   SDValue DST = DAG.getNode(ISD::AND, DL, INTTY, r3,
4547       DAG.getConstant(0xFF, INTTY));
4548   DST = DAG.getZExtOrTrunc(DST, DL, OVT);
4549   return DST;
4550 }
4551
4552 SDValue
4553 AMDILTargetLowering::LowerUREM16(SDValue Op, SelectionDAG &DAG) const
4554 {
4555   DebugLoc DL = Op.getDebugLoc();
4556   EVT OVT = Op.getValueType();
4557   MVT INTTY = MVT::i32;
4558   if (OVT == MVT::v2i16) {
4559     INTTY = MVT::v2i32;
4560   } else if (OVT == MVT::v4i16) {
4561     INTTY = MVT::v4i32;
4562   }
4563   SDValue LHS = Op.getOperand(0);
4564   SDValue RHS = Op.getOperand(1);
4565   // The LowerUREM16 function generatest equivalent to the following IL.
4566   // mov r0, LHS
4567   // mov r1, RHS
4568   // DIV = LowerUDIV16(LHS, RHS)
4569   // and r10, r0, 0xFFFF
4570   // and r11, r1, 0xFFFF
4571   // cmov_logical r3, r11, r11, 0x1
4572   // udiv as_u16(r3), as_u32(r10), as_u32(r3)
4573   // and r3, r3, 0xFFFF
4574   // cmov_logical r3, r11, r3, 0
4575   // umul r3, r3, r11
4576   // sub r3, r10, r3
4577   // and DST, r3, 0xFFFF
4578
4579   // mov r0, LHS
4580   SDValue r0 = LHS;
4581
4582   // mov r1, RHS
4583   SDValue r1 = RHS;
4584
4585   // and r10, r0, 0xFFFF
4586   SDValue r10 = DAG.getNode(ISD::AND, DL, OVT, r0,
4587       DAG.getConstant(0xFFFF, OVT));
4588
4589   // and r11, r1, 0xFFFF
4590   SDValue r11 = DAG.getNode(ISD::AND, DL, OVT, r1,
4591       DAG.getConstant(0xFFFF, OVT));
4592
4593   // cmov_logical r3, r11, r11, 0x1
4594   SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r11, r11,
4595       DAG.getConstant(0x01, OVT));
4596
4597   // udiv as_u16(r3), as_u32(r10), as_u32(r3)
4598   r10 = DAG.getZExtOrTrunc(r10, DL, INTTY);
4599   r3 = DAG.getZExtOrTrunc(r3, DL, INTTY);
4600   r3 = DAG.getNode(ISD::UREM, DL, INTTY, r10, r3);
4601   r3 = DAG.getZExtOrTrunc(r3, DL, OVT);
4602   r10 = DAG.getZExtOrTrunc(r10, DL, OVT);
4603
4604   // and r3, r3, 0xFFFF
4605   r3 = DAG.getNode(ISD::AND, DL, OVT, r3,
4606       DAG.getConstant(0xFFFF, OVT));
4607
4608   // cmov_logical r3, r11, r3, 0
4609   r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r11, r3,
4610       DAG.getConstant(0, OVT));
4611   // umul r3, r3, r11
4612   r3 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r3, r11);
4613
4614   // sub r3, r10, r3
4615   r3 = DAG.getNode(ISD::SUB, DL, OVT, r10, r3);
4616
4617   // and DST, r3, 0xFFFF
4618   SDValue DST = DAG.getNode(ISD::AND, DL, OVT, r3,
4619       DAG.getConstant(0xFFFF, OVT));
4620   return DST;
4621 }
4622
4623 SDValue
4624 AMDILTargetLowering::LowerUREM32(SDValue Op, SelectionDAG &DAG) const
4625 {
4626   DebugLoc DL = Op.getDebugLoc();
4627   EVT OVT = Op.getValueType();
4628   SDValue LHS = Op.getOperand(0);
4629   SDValue RHS = Op.getOperand(1);
4630   // The LowerUREM32 function generates equivalent to the following IL.
4631   // udiv r20, LHS, RHS
4632   // umul r20, r20, RHS
4633   // sub DST, LHS, r20
4634
4635   // udiv r20, LHS, RHS
4636   SDValue r20 = DAG.getNode(ISD::UDIV, DL, OVT, LHS, RHS);
4637
4638   // umul r20, r20, RHS
4639   r20 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r20, RHS);
4640
4641   // sub DST, LHS, r20
4642   SDValue DST = DAG.getNode(ISD::SUB, DL, OVT, LHS, r20);
4643   return DST;
4644 }
4645
4646 SDValue
4647 AMDILTargetLowering::LowerUREM64(SDValue Op, SelectionDAG &DAG) const
4648 {
4649   return SDValue(Op.getNode(), 0);
4650 }
4651
4652
4653 SDValue
4654 AMDILTargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const
4655 {
4656   DebugLoc DL = Op.getDebugLoc();
4657   EVT OVT = Op.getValueType();
4658   MVT INTTY = MVT::i32;
4659   if (OVT == MVT::v2f32) {
4660     INTTY = MVT::v2i32;
4661   } else if (OVT == MVT::v4f32) {
4662     INTTY = MVT::v4i32;
4663   }
4664   SDValue LHS = Op.getOperand(0);
4665   SDValue RHS = Op.getOperand(1);
4666   SDValue DST;
4667   const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
4668       &this->getTargetMachine())->getSubtargetImpl();
4669   if (stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
4670     // TODO: This doesn't work for vector types yet
4671     // The LowerFDIV32 function generates equivalent to the following
4672     // IL:
4673     // mov r20, as_int(LHS)
4674     // mov r21, as_int(RHS)
4675     // and r30, r20, 0x7f800000
4676     // and r31, r20, 0x807FFFFF
4677     // and r32, r21, 0x7f800000
4678     // and r33, r21, 0x807FFFFF
4679     // ieq r40, r30, 0x7F800000
4680     // ieq r41, r31, 0x7F800000
4681     // ieq r42, r32, 0
4682     // ieq r43, r33, 0
4683     // and r50, r20, 0x80000000
4684     // and r51, r21, 0x80000000
4685     // ior r32, r32, 0x3f800000
4686     // ior r33, r33, 0x3f800000
4687     // cmov_logical r32, r42, r50, r32
4688     // cmov_logical r33, r43, r51, r33
4689     // cmov_logical r32, r40, r20, r32
4690     // cmov_logical r33, r41, r21, r33
4691     // ior r50, r40, r41
4692     // ior r51, r42, r43
4693     // ior r50, r50, r51
4694     // inegate r52, r31
4695     // iadd r30, r30, r52
4696     // cmov_logical r30, r50, 0, r30
4697     // div_zeroop(infinity) r21, 1.0, r33
4698     // mul_ieee r20, r32, r21
4699     // and r22, r20, 0x7FFFFFFF
4700     // and r23, r20, 0x80000000
4701     // ishr r60, r22, 0x00000017
4702     // ishr r61, r30, 0x00000017
4703     // iadd r20, r20, r30
4704     // iadd r21, r22, r30
4705     // iadd r60, r60, r61
4706     // ige r42, 0, R60
4707     // ior r41, r23, 0x7F800000
4708     // ige r40, r60, 0x000000FF
4709     // cmov_logical r40, r50, 0, r40
4710     // cmov_logical r20, r42, r23, r20
4711     // cmov_logical DST, r40, r41, r20
4712     // as_float(DST)
4713
4714     // mov r20, as_int(LHS)
4715     SDValue R20 = DAG.getNode(ISDBITCAST, DL, INTTY, LHS);
4716
4717     // mov r21, as_int(RHS)
4718     SDValue R21 = DAG.getNode(ISDBITCAST, DL, INTTY, RHS);
4719
4720     // and r30, r20, 0x7f800000
4721     SDValue R30 = DAG.getNode(ISD::AND, DL, INTTY, R20,
4722         DAG.getConstant(0x7F800000, INTTY));
4723
4724     // and r31, r21, 0x7f800000
4725     SDValue R31 = DAG.getNode(ISD::AND, DL, INTTY, R21,
4726         DAG.getConstant(0x7f800000, INTTY));
4727
4728     // and r32, r20, 0x807FFFFF
4729     SDValue R32 = DAG.getNode(ISD::AND, DL, INTTY, R20,
4730         DAG.getConstant(0x807FFFFF, INTTY));
4731
4732     // and r33, r21, 0x807FFFFF
4733     SDValue R33 = DAG.getNode(ISD::AND, DL, INTTY, R21,
4734         DAG.getConstant(0x807FFFFF, INTTY));
4735
4736     // ieq r40, r30, 0x7F800000
4737     SDValue R40 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
4738         DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
4739         R30, DAG.getConstant(0x7F800000, INTTY));
4740
4741     // ieq r41, r31, 0x7F800000
4742     SDValue R41 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
4743         DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
4744         R31, DAG.getConstant(0x7F800000, INTTY));
4745
4746     // ieq r42, r30, 0
4747     SDValue R42 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
4748         DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
4749         R30, DAG.getConstant(0, INTTY));
4750
4751     // ieq r43, r31, 0
4752     SDValue R43 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
4753         DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
4754         R31, DAG.getConstant(0, INTTY));
4755
4756     // and r50, r20, 0x80000000
4757     SDValue R50 = DAG.getNode(ISD::AND, DL, INTTY, R20,
4758         DAG.getConstant(0x80000000, INTTY));
4759
4760     // and r51, r21, 0x80000000
4761     SDValue R51 = DAG.getNode(ISD::AND, DL, INTTY, R21,
4762         DAG.getConstant(0x80000000, INTTY));
4763
4764     // ior r32, r32, 0x3f800000
4765     R32 = DAG.getNode(ISD::OR, DL, INTTY, R32,
4766         DAG.getConstant(0x3F800000, INTTY));
4767
4768     // ior r33, r33, 0x3f800000
4769     R33 = DAG.getNode(ISD::OR, DL, INTTY, R33,
4770         DAG.getConstant(0x3F800000, INTTY));
4771
4772     // cmov_logical r32, r42, r50, r32
4773     R32 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R42, R50, R32);
4774
4775     // cmov_logical r33, r43, r51, r33
4776     R33 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R43, R51, R33);
4777
4778     // cmov_logical r32, r40, r20, r32
4779     R32 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R40, R20, R32);
4780
4781     // cmov_logical r33, r41, r21, r33
4782     R33 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R41, R21, R33);
4783
4784     // ior r50, r40, r41
4785     R50 = DAG.getNode(ISD::OR, DL, INTTY, R40, R41);
4786
4787     // ior r51, r42, r43
4788     R51 = DAG.getNode(ISD::OR, DL, INTTY, R42, R43);
4789
4790     // ior r50, r50, r51
4791     R50 = DAG.getNode(ISD::OR, DL, INTTY, R50, R51);
4792
4793     // inegate r52, r31
4794     SDValue R52 = DAG.getNode(AMDILISD::INEGATE, DL, INTTY, R31);
4795
4796     // iadd r30, r30, r52
4797     R30 = DAG.getNode(ISD::ADD, DL, INTTY, R30, R52);
4798
4799     // cmov_logical r30, r50, 0, r30
4800     R30 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R50,
4801         DAG.getConstant(0, INTTY), R30);
4802
4803     // div_zeroop(infinity) r21, 1.0, as_float(r33)
4804     R33 = DAG.getNode(ISDBITCAST, DL, OVT, R33);
4805     R21 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT,
4806         DAG.getConstantFP(1.0f, OVT), R33);
4807
4808     // mul_ieee as_int(r20), as_float(r32), r21
4809     R32 = DAG.getNode(ISDBITCAST, DL, OVT, R32);
4810     R20 = DAG.getNode(ISD::FMUL, DL, OVT, R32, R21);
4811     R20 = DAG.getNode(ISDBITCAST, DL, INTTY, R20);
4812
4813     // div_zeroop(infinity) r21, 1.0, as_float(r33)
4814     R33 = DAG.getNode(ISDBITCAST, DL, OVT, R33);
4815     R21 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT,
4816         DAG.getConstantFP(1.0f, OVT), R33);
4817
4818     // mul_ieee as_int(r20), as_float(r32), r21
4819     R32 = DAG.getNode(ISDBITCAST, DL, OVT, R32);
4820     R20 = DAG.getNode(ISD::FMUL, DL, OVT, R32, R21);
4821     R20 = DAG.getNode(ISDBITCAST, DL, INTTY, R20);
4822
4823     // and r22, r20, 0x7FFFFFFF
4824     SDValue R22 = DAG.getNode(ISD::AND, DL, INTTY, R20,
4825         DAG.getConstant(0x7FFFFFFF, INTTY));
4826
4827     // and r23, r20, 0x80000000
4828     SDValue R23 = DAG.getNode(ISD::AND, DL, INTTY, R20,
4829         DAG.getConstant(0x80000000, INTTY));
4830
4831     // ishr r60, r22, 0x00000017
4832     SDValue R60 = DAG.getNode(ISD::SRA, DL, INTTY, R22,
4833         DAG.getConstant(0x00000017, INTTY));
4834
4835     // ishr r61, r30, 0x00000017
4836     SDValue R61 = DAG.getNode(ISD::SRA, DL, INTTY, R30,
4837         DAG.getConstant(0x00000017, INTTY));
4838
4839     // iadd r20, r20, r30
4840     R20 = DAG.getNode(ISD::ADD, DL, INTTY, R20, R30);
4841
4842     // iadd r21, r22, r30
4843     R21 = DAG.getNode(ISD::ADD, DL, INTTY, R22, R30);
4844
4845     // iadd r60, r60, r61
4846     R60 = DAG.getNode(ISD::ADD, DL, INTTY, R60, R61);
4847
4848     // ige r42, 0, R60
4849     R42 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
4850         DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
4851         DAG.getConstant(0, INTTY),
4852         R60);
4853
4854     // ior r41, r23, 0x7F800000
4855     R41 = DAG.getNode(ISD::OR, DL, INTTY, R23,
4856         DAG.getConstant(0x7F800000, INTTY));
4857
4858     // ige r40, r60, 0x000000FF
4859     R40 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
4860         DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
4861         R60,
4862         DAG.getConstant(0x0000000FF, INTTY));
4863
4864     // cmov_logical r40, r50, 0, r40
4865     R40 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R50,
4866         DAG.getConstant(0, INTTY),
4867         R40);
4868
4869     // cmov_logical r20, r42, r23, r20
4870     R20 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R42, R23, R20);
4871
4872     // cmov_logical DST, r40, r41, r20
4873     DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R40, R41, R20);
4874
4875     // as_float(DST)
4876     DST = DAG.getNode(ISDBITCAST, DL, OVT, DST);
4877   } else {
4878     // The following sequence of DAG nodes produce the following IL:
4879     // fabs r1, RHS
4880     // lt r2, 0x1.0p+96f, r1
4881     // cmov_logical r3, r2, 0x1.0p-23f, 1.0f
4882     // mul_ieee r1, RHS, r3
4883     // div_zeroop(infinity) r0, LHS, r1
4884     // mul_ieee DST, r0, r3
4885
4886     // fabs r1, RHS
4887     SDValue r1 = DAG.getNode(ISD::FABS, DL, OVT, RHS);
4888     // lt r2, 0x1.0p+96f, r1
4889     SDValue r2 = DAG.getNode(AMDILISD::CMP, DL, OVT,
4890         DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::f32), MVT::i32),
4891         DAG.getConstant(0x6f800000, INTTY), r1);
4892     // cmov_logical r3, r2, 0x1.0p-23f, 1.0f
4893     SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r2,
4894         DAG.getConstant(0x2f800000, INTTY),
4895         DAG.getConstant(0x3f800000, INTTY));
4896     // mul_ieee r1, RHS, r3
4897     r1 = DAG.getNode(ISD::FMUL, DL, OVT, RHS, r3);
4898     // div_zeroop(infinity) r0, LHS, r1
4899     SDValue r0 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT, LHS, r1);
4900     // mul_ieee DST, r0, r3
4901     DST = DAG.getNode(ISD::FMUL, DL, OVT, r0, r3);
4902   }
4903   return DST;
4904 }
4905
4906 SDValue
4907 AMDILTargetLowering::LowerFDIV64(SDValue Op, SelectionDAG &DAG) const
4908 {
4909   return SDValue(Op.getNode(), 0);
4910 }