src/gallium/drivers/radeon/AMDILISelLowering.cpp

   1 //===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //==-----------------------------------------------------------------------===//
   9 //
  10 // This file implements the interfaces that AMDIL uses to lower LLVM code into a
  11 // selection DAG.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #include "AMDILISelLowering.h"
  16 #include "AMDILDevices.h"
  17 #include "AMDILIntrinsicInfo.h"
  18 #include "AMDILSubtarget.h"
  19 #include "AMDILTargetMachine.h"
  20 #include "AMDILUtilityFunctions.h"
  21 #include "llvm/CallingConv.h"
  22 #include "llvm/CodeGen/MachineFrameInfo.h"
  23 #include "llvm/CodeGen/MachineRegisterInfo.h"
  24 #include "llvm/CodeGen/PseudoSourceValue.h"
  25 #include "llvm/CodeGen/SelectionDAG.h"
  26 #include "llvm/CodeGen/SelectionDAGNodes.h"
  27 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
  28 #include "llvm/DerivedTypes.h"
  29 #include "llvm/Instructions.h"
  30 #include "llvm/Intrinsics.h"
  31 #include "llvm/Support/raw_ostream.h"
  32 #include "llvm/Target/TargetOptions.h"
  33
  34 using namespace llvm;
  35 #define ISDBITCAST  ISD::BITCAST
  36 #define MVTGLUE     MVT::Glue
  37 //===----------------------------------------------------------------------===//
  38 // Calling Convention Implementation
  39 //===----------------------------------------------------------------------===//
  40 #include "AMDILGenCallingConv.inc"
  41
  42 //===----------------------------------------------------------------------===//
  43 // TargetLowering Implementation Help Functions Begin
  44 //===----------------------------------------------------------------------===//
  45   static SDValue
  46 getConversionNode(SelectionDAG &DAG, SDValue& Src, SDValue& Dst, bool asType)
  47 {
  48   DebugLoc DL = Src.getDebugLoc();
  49   EVT svt = Src.getValueType().getScalarType();
  50   EVT dvt = Dst.getValueType().getScalarType();
  51   if (svt.isFloatingPoint() && dvt.isFloatingPoint()) {
  52     if (dvt.bitsGT(svt)) {
  53       Src = DAG.getNode(ISD::FP_EXTEND, DL, dvt, Src);
  54     } else if (svt.bitsLT(svt)) {
  55       Src = DAG.getNode(ISD::FP_ROUND, DL, dvt, Src,
  56           DAG.getConstant(1, MVT::i32));
  57     }
  58   } else if (svt.isInteger() && dvt.isInteger()) {
  59     if (!svt.bitsEq(dvt)) {
  60       Src = DAG.getSExtOrTrunc(Src, DL, dvt);
  61     } else {
  62       Src = DAG.getNode(AMDILISD::MOVE, DL, dvt, Src);
  63     }
  64   } else if (svt.isInteger()) {
  65     unsigned opcode = (asType) ? ISDBITCAST : ISD::SINT_TO_FP;
  66     if (!svt.bitsEq(dvt)) {
  67       if (dvt.getSimpleVT().SimpleTy == MVT::f32) {
  68         Src = DAG.getSExtOrTrunc(Src, DL, MVT::i32);
  69       } else if (dvt.getSimpleVT().SimpleTy == MVT::f64) {
  70         Src = DAG.getSExtOrTrunc(Src, DL, MVT::i64);
  71       } else {
  72         assert(0 && "We only support 32 and 64bit fp types");
  73       }
  74     }
  75     Src = DAG.getNode(opcode, DL, dvt, Src);
  76   } else if (dvt.isInteger()) {
  77     unsigned opcode = (asType) ? ISDBITCAST : ISD::FP_TO_SINT;
  78     if (svt.getSimpleVT().SimpleTy == MVT::f32) {
  79       Src = DAG.getNode(opcode, DL, MVT::i32, Src);
  80     } else if (svt.getSimpleVT().SimpleTy == MVT::f64) {
  81       Src = DAG.getNode(opcode, DL, MVT::i64, Src);
  82     } else {
  83       assert(0 && "We only support 32 and 64bit fp types");
  84     }
  85     Src = DAG.getSExtOrTrunc(Src, DL, dvt);
  86   }
  87   return Src;
  88 }
  89 // CondCCodeToCC - Convert a DAG condition code to a AMDIL CC
  90 // condition.
  91   static AMDILCC::CondCodes
  92 CondCCodeToCC(ISD::CondCode CC, const MVT::SimpleValueType& type)
  93 {
  94   switch (CC) {
  95     default:
  96       {
  97         errs()<<"Condition Code: "<< (unsigned int)CC<<"\n";
  98         assert(0 && "Unknown condition code!");
  99       }
 100     case ISD::SETO:
 101       switch(type) {
 102         case MVT::f32:
 103           return AMDILCC::IL_CC_F_O;
 104         case MVT::f64:
 105           return AMDILCC::IL_CC_D_O;
 106         default:
 107           assert(0 && "Opcode combination not generated correctly!");
 108           return AMDILCC::COND_ERROR;
 109       };
 110     case ISD::SETUO:
 111       switch(type) {
 112         case MVT::f32:
 113           return AMDILCC::IL_CC_F_UO;
 114         case MVT::f64:
 115           return AMDILCC::IL_CC_D_UO;
 116         default:
 117           assert(0 && "Opcode combination not generated correctly!");
 118           return AMDILCC::COND_ERROR;
 119       };
 120     case ISD::SETGT:
 121       switch (type) {
 122         case MVT::i1:
 123         case MVT::i8:
 124         case MVT::i16:
 125         case MVT::i32:
 126           return AMDILCC::IL_CC_I_GT;
 127         case MVT::f32:
 128           return AMDILCC::IL_CC_F_GT;
 129         case MVT::f64:
 130           return AMDILCC::IL_CC_D_GT;
 131         case MVT::i64:
 132           return AMDILCC::IL_CC_L_GT;
 133         default:
 134           assert(0 && "Opcode combination not generated correctly!");
 135           return AMDILCC::COND_ERROR;
 136       };
 137     case ISD::SETGE:
 138       switch (type) {
 139         case MVT::i1:
 140         case MVT::i8:
 141         case MVT::i16:
 142         case MVT::i32:
 143           return AMDILCC::IL_CC_I_GE;
 144         case MVT::f32:
 145           return AMDILCC::IL_CC_F_GE;
 146         case MVT::f64:
 147           return AMDILCC::IL_CC_D_GE;
 148         case MVT::i64:
 149           return AMDILCC::IL_CC_L_GE;
 150         default:
 151           assert(0 && "Opcode combination not generated correctly!");
 152           return AMDILCC::COND_ERROR;
 153       };
 154     case ISD::SETLT:
 155       switch (type) {
 156         case MVT::i1:
 157         case MVT::i8:
 158         case MVT::i16:
 159         case MVT::i32:
 160           return AMDILCC::IL_CC_I_LT;
 161         case MVT::f32:
 162           return AMDILCC::IL_CC_F_LT;
 163         case MVT::f64:
 164           return AMDILCC::IL_CC_D_LT;
 165         case MVT::i64:
 166           return AMDILCC::IL_CC_L_LT;
 167         default:
 168           assert(0 && "Opcode combination not generated correctly!");
 169           return AMDILCC::COND_ERROR;
 170       };
 171     case ISD::SETLE:
 172       switch (type) {
 173         case MVT::i1:
 174         case MVT::i8:
 175         case MVT::i16:
 176         case MVT::i32:
 177           return AMDILCC::IL_CC_I_LE;
 178         case MVT::f32:
 179           return AMDILCC::IL_CC_F_LE;
 180         case MVT::f64:
 181           return AMDILCC::IL_CC_D_LE;
 182         case MVT::i64:
 183           return AMDILCC::IL_CC_L_LE;
 184         default:
 185           assert(0 && "Opcode combination not generated correctly!");
 186           return AMDILCC::COND_ERROR;
 187       };
 188     case ISD::SETNE:
 189       switch (type) {
 190         case MVT::i1:
 191         case MVT::i8:
 192         case MVT::i16:
 193         case MVT::i32:
 194           return AMDILCC::IL_CC_I_NE;
 195         case MVT::f32:
 196           return AMDILCC::IL_CC_F_NE;
 197         case MVT::f64:
 198           return AMDILCC::IL_CC_D_NE;
 199         case MVT::i64:
 200           return AMDILCC::IL_CC_L_NE;
 201         default:
 202           assert(0 && "Opcode combination not generated correctly!");
 203           return AMDILCC::COND_ERROR;
 204       };
 205     case ISD::SETEQ:
 206       switch (type) {
 207         case MVT::i1:
 208         case MVT::i8:
 209         case MVT::i16:
 210         case MVT::i32:
 211           return AMDILCC::IL_CC_I_EQ;
 212         case MVT::f32:
 213           return AMDILCC::IL_CC_F_EQ;
 214         case MVT::f64:
 215           return AMDILCC::IL_CC_D_EQ;
 216         case MVT::i64:
 217           return AMDILCC::IL_CC_L_EQ;
 218         default:
 219           assert(0 && "Opcode combination not generated correctly!");
 220           return AMDILCC::COND_ERROR;
 221       };
 222     case ISD::SETUGT:
 223       switch (type) {
 224         case MVT::i1:
 225         case MVT::i8:
 226         case MVT::i16:
 227         case MVT::i32:
 228           return AMDILCC::IL_CC_U_GT;
 229         case MVT::f32:
 230           return AMDILCC::IL_CC_F_UGT;
 231         case MVT::f64:
 232           return AMDILCC::IL_CC_D_UGT;
 233         case MVT::i64:
 234           return AMDILCC::IL_CC_UL_GT;
 235         default:
 236           assert(0 && "Opcode combination not generated correctly!");
 237           return AMDILCC::COND_ERROR;
 238       };
 239     case ISD::SETUGE:
 240       switch (type) {
 241         case MVT::i1:
 242         case MVT::i8:
 243         case MVT::i16:
 244         case MVT::i32:
 245           return AMDILCC::IL_CC_U_GE;
 246         case MVT::f32:
 247           return AMDILCC::IL_CC_F_UGE;
 248         case MVT::f64:
 249           return AMDILCC::IL_CC_D_UGE;
 250         case MVT::i64:
 251           return AMDILCC::IL_CC_UL_GE;
 252         default:
 253           assert(0 && "Opcode combination not generated correctly!");
 254           return AMDILCC::COND_ERROR;
 255       };
 256     case ISD::SETULT:
 257       switch (type) {
 258         case MVT::i1:
 259         case MVT::i8:
 260         case MVT::i16:
 261         case MVT::i32:
 262           return AMDILCC::IL_CC_U_LT;
 263         case MVT::f32:
 264           return AMDILCC::IL_CC_F_ULT;
 265         case MVT::f64:
 266           return AMDILCC::IL_CC_D_ULT;
 267         case MVT::i64:
 268           return AMDILCC::IL_CC_UL_LT;
 269         default:
 270           assert(0 && "Opcode combination not generated correctly!");
 271           return AMDILCC::COND_ERROR;
 272       };
 273     case ISD::SETULE:
 274       switch (type) {
 275         case MVT::i1:
 276         case MVT::i8:
 277         case MVT::i16:
 278         case MVT::i32:
 279           return AMDILCC::IL_CC_U_LE;
 280         case MVT::f32:
 281           return AMDILCC::IL_CC_F_ULE;
 282         case MVT::f64:
 283           return AMDILCC::IL_CC_D_ULE;
 284         case MVT::i64:
 285           return AMDILCC::IL_CC_UL_LE;
 286         default:
 287           assert(0 && "Opcode combination not generated correctly!");
 288           return AMDILCC::COND_ERROR;
 289       };
 290     case ISD::SETUNE:
 291       switch (type) {
 292         case MVT::i1:
 293         case MVT::i8:
 294         case MVT::i16:
 295         case MVT::i32:
 296           return AMDILCC::IL_CC_U_NE;
 297         case MVT::f32:
 298           return AMDILCC::IL_CC_F_UNE;
 299         case MVT::f64:
 300           return AMDILCC::IL_CC_D_UNE;
 301         case MVT::i64:
 302           return AMDILCC::IL_CC_UL_NE;
 303         default:
 304           assert(0 && "Opcode combination not generated correctly!");
 305           return AMDILCC::COND_ERROR;
 306       };
 307     case ISD::SETUEQ:
 308       switch (type) {
 309         case MVT::i1:
 310         case MVT::i8:
 311         case MVT::i16:
 312         case MVT::i32:
 313           return AMDILCC::IL_CC_U_EQ;
 314         case MVT::f32:
 315           return AMDILCC::IL_CC_F_UEQ;
 316         case MVT::f64:
 317           return AMDILCC::IL_CC_D_UEQ;
 318         case MVT::i64:
 319           return AMDILCC::IL_CC_UL_EQ;
 320         default:
 321           assert(0 && "Opcode combination not generated correctly!");
 322           return AMDILCC::COND_ERROR;
 323       };
 324     case ISD::SETOGT:
 325       switch (type) {
 326         case MVT::f32:
 327           return AMDILCC::IL_CC_F_OGT;
 328         case MVT::f64:
 329           return AMDILCC::IL_CC_D_OGT;
 330         case MVT::i1:
 331         case MVT::i8:
 332         case MVT::i16:
 333         case MVT::i32:
 334         case MVT::i64:
 335         default:
 336           assert(0 && "Opcode combination not generated correctly!");
 337           return AMDILCC::COND_ERROR;
 338       };
 339     case ISD::SETOGE:
 340       switch (type) {
 341         case MVT::f32:
 342           return AMDILCC::IL_CC_F_OGE;
 343         case MVT::f64:
 344           return AMDILCC::IL_CC_D_OGE;
 345         case MVT::i1:
 346         case MVT::i8:
 347         case MVT::i16:
 348         case MVT::i32:
 349         case MVT::i64:
 350         default:
 351           assert(0 && "Opcode combination not generated correctly!");
 352           return AMDILCC::COND_ERROR;
 353       };
 354     case ISD::SETOLT:
 355       switch (type) {
 356         case MVT::f32:
 357           return AMDILCC::IL_CC_F_OLT;
 358         case MVT::f64:
 359           return AMDILCC::IL_CC_D_OLT;
 360         case MVT::i1:
 361         case MVT::i8:
 362         case MVT::i16:
 363         case MVT::i32:
 364         case MVT::i64:
 365         default:
 366           assert(0 && "Opcode combination not generated correctly!");
 367           return AMDILCC::COND_ERROR;
 368       };
 369     case ISD::SETOLE:
 370       switch (type) {
 371         case MVT::f32:
 372           return AMDILCC::IL_CC_F_OLE;
 373         case MVT::f64:
 374           return AMDILCC::IL_CC_D_OLE;
 375         case MVT::i1:
 376         case MVT::i8:
 377         case MVT::i16:
 378         case MVT::i32:
 379         case MVT::i64:
 380         default:
 381           assert(0 && "Opcode combination not generated correctly!");
 382           return AMDILCC::COND_ERROR;
 383       };
 384     case ISD::SETONE:
 385       switch (type) {
 386         case MVT::f32:
 387           return AMDILCC::IL_CC_F_ONE;
 388         case MVT::f64:
 389           return AMDILCC::IL_CC_D_ONE;
 390         case MVT::i1:
 391         case MVT::i8:
 392         case MVT::i16:
 393         case MVT::i32:
 394         case MVT::i64:
 395         default:
 396           assert(0 && "Opcode combination not generated correctly!");
 397           return AMDILCC::COND_ERROR;
 398       };
 399     case ISD::SETOEQ:
 400       switch (type) {
 401         case MVT::f32:
 402           return AMDILCC::IL_CC_F_OEQ;
 403         case MVT::f64:
 404           return AMDILCC::IL_CC_D_OEQ;
 405         case MVT::i1:
 406         case MVT::i8:
 407         case MVT::i16:
 408         case MVT::i32:
 409         case MVT::i64:
 410         default:
 411           assert(0 && "Opcode combination not generated correctly!");
 412           return AMDILCC::COND_ERROR;
 413       };
 414   };
 415 }
 416
 417   static unsigned int
 418 translateToOpcode(uint64_t CCCode, unsigned int regClass)
 419 {
 420   switch (CCCode) {
 421     case AMDILCC::IL_CC_D_EQ:
 422     case AMDILCC::IL_CC_D_OEQ:
 423       if (regClass == AMDIL::GPRV2F64RegClassID) {
 424         return (unsigned int)AMDIL::DEQ_v2f64;
 425       } else {
 426         return (unsigned int)AMDIL::DEQ;
 427       }
 428     case AMDILCC::IL_CC_D_LE:
 429     case AMDILCC::IL_CC_D_OLE:
 430     case AMDILCC::IL_CC_D_ULE:
 431     case AMDILCC::IL_CC_D_GE:
 432     case AMDILCC::IL_CC_D_OGE:
 433     case AMDILCC::IL_CC_D_UGE:
 434       return (unsigned int)AMDIL::DGE;
 435     case AMDILCC::IL_CC_D_LT:
 436     case AMDILCC::IL_CC_D_OLT:
 437     case AMDILCC::IL_CC_D_ULT:
 438     case AMDILCC::IL_CC_D_GT:
 439     case AMDILCC::IL_CC_D_OGT:
 440     case AMDILCC::IL_CC_D_UGT:
 441       return (unsigned int)AMDIL::DLT;
 442     case AMDILCC::IL_CC_D_NE:
 443     case AMDILCC::IL_CC_D_UNE:
 444       return (unsigned int)AMDIL::DNE;
 445     case AMDILCC::IL_CC_F_EQ:
 446     case AMDILCC::IL_CC_F_OEQ:
 447       return (unsigned int)AMDIL::FEQ;
 448     case AMDILCC::IL_CC_F_LE:
 449     case AMDILCC::IL_CC_F_ULE:
 450     case AMDILCC::IL_CC_F_OLE:
 451     case AMDILCC::IL_CC_F_GE:
 452     case AMDILCC::IL_CC_F_UGE:
 453     case AMDILCC::IL_CC_F_OGE:
 454       return (unsigned int)AMDIL::FGE;
 455     case AMDILCC::IL_CC_F_LT:
 456     case AMDILCC::IL_CC_F_OLT:
 457     case AMDILCC::IL_CC_F_ULT:
 458     case AMDILCC::IL_CC_F_GT:
 459     case AMDILCC::IL_CC_F_OGT:
 460     case AMDILCC::IL_CC_F_UGT:
 461       if (regClass == AMDIL::GPRV2F32RegClassID) {
 462         return (unsigned int)AMDIL::FLT_v2f32;
 463       } else if (regClass == AMDIL::GPRV4F32RegClassID) {
 464         return (unsigned int)AMDIL::FLT_v4f32;
 465       } else {
 466         return (unsigned int)AMDIL::FLT;
 467       }
 468     case AMDILCC::IL_CC_F_NE:
 469     case AMDILCC::IL_CC_F_UNE:
 470       return (unsigned int)AMDIL::FNE;
 471     case AMDILCC::IL_CC_I_EQ:
 472     case AMDILCC::IL_CC_U_EQ:
 473       if (regClass == AMDIL::GPRI32RegClassID
 474           || regClass == AMDIL::GPRI8RegClassID
 475           || regClass == AMDIL::GPRI16RegClassID) {
 476         return (unsigned int)AMDIL::IEQ;
 477       } else if (regClass == AMDIL::GPRV2I32RegClassID
 478           || regClass == AMDIL::GPRV2I8RegClassID
 479           || regClass == AMDIL::GPRV2I16RegClassID) {
 480         return (unsigned int)AMDIL::IEQ_v2i32;
 481       } else if (regClass == AMDIL::GPRV4I32RegClassID
 482           || regClass == AMDIL::GPRV4I8RegClassID
 483           || regClass == AMDIL::GPRV4I16RegClassID) {
 484         return (unsigned int)AMDIL::IEQ_v4i32;
 485       } else {
 486         assert(!"Unknown reg class!");
 487       }
 488     case AMDILCC::IL_CC_L_EQ:
 489     case AMDILCC::IL_CC_UL_EQ:
 490       return (unsigned int)AMDIL::LEQ;
 491     case AMDILCC::IL_CC_I_GE:
 492     case AMDILCC::IL_CC_I_LE:
 493       if (regClass == AMDIL::GPRI32RegClassID
 494           || regClass == AMDIL::GPRI8RegClassID
 495           || regClass == AMDIL::GPRI16RegClassID) {
 496         return (unsigned int)AMDIL::IGE;
 497       } else if (regClass == AMDIL::GPRV2I32RegClassID
 498           || regClass == AMDIL::GPRI8RegClassID
 499           || regClass == AMDIL::GPRI16RegClassID) {
 500         return (unsigned int)AMDIL::IGE_v2i32;
 501       } else if (regClass == AMDIL::GPRV4I32RegClassID
 502           || regClass == AMDIL::GPRI8RegClassID
 503           || regClass == AMDIL::GPRI16RegClassID) {
 504         return (unsigned int)AMDIL::IGE_v4i32;
 505       } else {
 506         assert(!"Unknown reg class!");
 507       }
 508     case AMDILCC::IL_CC_I_LT:
 509     case AMDILCC::IL_CC_I_GT:
 510       if (regClass == AMDIL::GPRI32RegClassID
 511           || regClass == AMDIL::GPRI8RegClassID
 512           || regClass == AMDIL::GPRI16RegClassID) {
 513         return (unsigned int)AMDIL::ILT;
 514       } else if (regClass == AMDIL::GPRV2I32RegClassID
 515           || regClass == AMDIL::GPRI8RegClassID
 516           || regClass == AMDIL::GPRI16RegClassID) {
 517         return (unsigned int)AMDIL::ILT_v2i32;
 518       } else if (regClass == AMDIL::GPRV4I32RegClassID
 519           || regClass == AMDIL::GPRI8RegClassID
 520           || regClass == AMDIL::GPRI16RegClassID) {
 521         return (unsigned int)AMDIL::ILT_v4i32;
 522       } else {
 523         assert(!"Unknown reg class!");
 524       }
 525     case AMDILCC::IL_CC_L_GE:
 526       return (unsigned int)AMDIL::LGE;
 527     case AMDILCC::IL_CC_L_LE:
 528       return (unsigned int)AMDIL::LLE;
 529     case AMDILCC::IL_CC_L_LT:
 530       return (unsigned int)AMDIL::LLT;
 531     case AMDILCC::IL_CC_L_GT:
 532       return (unsigned int)AMDIL::LGT;
 533     case AMDILCC::IL_CC_I_NE:
 534     case AMDILCC::IL_CC_U_NE:
 535       if (regClass == AMDIL::GPRI32RegClassID
 536           || regClass == AMDIL::GPRI8RegClassID
 537           || regClass == AMDIL::GPRI16RegClassID) {
 538         return (unsigned int)AMDIL::INE;
 539       } else if (regClass == AMDIL::GPRV2I32RegClassID
 540           || regClass == AMDIL::GPRI8RegClassID
 541           || regClass == AMDIL::GPRI16RegClassID) {
 542         return (unsigned int)AMDIL::INE_v2i32;
 543       } else if (regClass == AMDIL::GPRV4I32RegClassID
 544           || regClass == AMDIL::GPRI8RegClassID
 545           || regClass == AMDIL::GPRI16RegClassID) {
 546         return (unsigned int)AMDIL::INE_v4i32;
 547       } else {
 548         assert(!"Unknown reg class!");
 549       }
 550     case AMDILCC::IL_CC_U_GE:
 551     case AMDILCC::IL_CC_U_LE:
 552       if (regClass == AMDIL::GPRI32RegClassID
 553           || regClass == AMDIL::GPRI8RegClassID
 554           || regClass == AMDIL::GPRI16RegClassID) {
 555         return (unsigned int)AMDIL::UGE;
 556       } else if (regClass == AMDIL::GPRV2I32RegClassID
 557           || regClass == AMDIL::GPRI8RegClassID
 558           || regClass == AMDIL::GPRI16RegClassID) {
 559         return (unsigned int)AMDIL::UGE_v2i32;
 560       } else if (regClass == AMDIL::GPRV4I32RegClassID
 561           || regClass == AMDIL::GPRI8RegClassID
 562           || regClass == AMDIL::GPRI16RegClassID) {
 563         return (unsigned int)AMDIL::UGE_v4i32;
 564       } else {
 565         assert(!"Unknown reg class!");
 566       }
 567     case AMDILCC::IL_CC_L_NE:
 568     case AMDILCC::IL_CC_UL_NE:
 569       return (unsigned int)AMDIL::LNE;
 570     case AMDILCC::IL_CC_UL_GE:
 571       return (unsigned int)AMDIL::ULGE;
 572     case AMDILCC::IL_CC_UL_LE:
 573       return (unsigned int)AMDIL::ULLE;
 574     case AMDILCC::IL_CC_U_LT:
 575       if (regClass == AMDIL::GPRI32RegClassID
 576           || regClass == AMDIL::GPRI8RegClassID
 577           || regClass == AMDIL::GPRI16RegClassID) {
 578         return (unsigned int)AMDIL::ULT;
 579       } else if (regClass == AMDIL::GPRV2I32RegClassID
 580           || regClass == AMDIL::GPRI8RegClassID
 581           || regClass == AMDIL::GPRI16RegClassID) {
 582         return (unsigned int)AMDIL::ULT_v2i32;
 583       } else if (regClass == AMDIL::GPRV4I32RegClassID
 584           || regClass == AMDIL::GPRI8RegClassID
 585           || regClass == AMDIL::GPRI16RegClassID) {
 586         return (unsigned int)AMDIL::ULT_v4i32;
 587       } else {
 588         assert(!"Unknown reg class!");
 589       }
 590     case AMDILCC::IL_CC_U_GT:
 591       if (regClass == AMDIL::GPRI32RegClassID
 592           || regClass == AMDIL::GPRI8RegClassID
 593           || regClass == AMDIL::GPRI16RegClassID) {
 594         return (unsigned int)AMDIL::UGT;
 595       } else if (regClass == AMDIL::GPRV2I32RegClassID
 596           || regClass == AMDIL::GPRI8RegClassID
 597           || regClass == AMDIL::GPRI16RegClassID) {
 598         return (unsigned int)AMDIL::UGT_v2i32;
 599       } else if (regClass == AMDIL::GPRV4I32RegClassID
 600           || regClass == AMDIL::GPRI8RegClassID
 601           || regClass == AMDIL::GPRI16RegClassID) {
 602         return (unsigned int)AMDIL::UGT_v4i32;
 603       } else {
 604         assert(!"Unknown reg class!");
 605       }
 606     case AMDILCC::IL_CC_UL_LT:
 607       return (unsigned int)AMDIL::ULLT;
 608     case AMDILCC::IL_CC_UL_GT:
 609       return (unsigned int)AMDIL::ULGT;
 610     case AMDILCC::IL_CC_F_UEQ:
 611     case AMDILCC::IL_CC_D_UEQ:
 612     case AMDILCC::IL_CC_F_ONE:
 613     case AMDILCC::IL_CC_D_ONE:
 614     case AMDILCC::IL_CC_F_O:
 615     case AMDILCC::IL_CC_F_UO:
 616     case AMDILCC::IL_CC_D_O:
 617     case AMDILCC::IL_CC_D_UO:
 618       // we don't care
 619       return 0;
 620
 621   }
 622   errs()<<"Opcode: "<<CCCode<<"\n";
 623   assert(0 && "Unknown opcode retrieved");
 624   return 0;
 625 }
 626
 627 /// Helper function used by LowerFormalArguments
 628 static const TargetRegisterClass*
 629 getRegClassFromType(unsigned int type) {
 630   switch (type) {
 631   default:
 632     assert(0 && "Passed in type does not match any register classes.");
 633   case MVT::i8:
 634     return &AMDIL::GPRI8RegClass;
 635   case MVT::i16:
 636     return &AMDIL::GPRI16RegClass;
 637   case MVT::i32:
 638     return &AMDIL::GPRI32RegClass;
 639   case MVT::f32:
 640     return &AMDIL::GPRF32RegClass;
 641   case MVT::i64:
 642     return &AMDIL::GPRI64RegClass;
 643   case MVT::f64:
 644     return &AMDIL::GPRF64RegClass;
 645   case MVT::v4f32:
 646     return &AMDIL::GPRV4F32RegClass;
 647   case MVT::v4i8:
 648     return &AMDIL::GPRV4I8RegClass;
 649   case MVT::v4i16:
 650     return &AMDIL::GPRV4I16RegClass;
 651   case MVT::v4i32:
 652     return &AMDIL::GPRV4I32RegClass;
 653   case MVT::v2f32:
 654     return &AMDIL::GPRV2F32RegClass;
 655   case MVT::v2i8:
 656     return &AMDIL::GPRV2I8RegClass;
 657   case MVT::v2i16:
 658     return &AMDIL::GPRV2I16RegClass;
 659   case MVT::v2i32:
 660     return &AMDIL::GPRV2I32RegClass;
 661   case MVT::v2f64:
 662     return &AMDIL::GPRV2F64RegClass;
 663   case MVT::v2i64:
 664     return &AMDIL::GPRV2I64RegClass;
 665   }
 666 }
 667
 668 SDValue
 669 AMDILTargetLowering::LowerMemArgument(
 670     SDValue Chain,
 671     CallingConv::ID CallConv,
 672     const SmallVectorImpl<ISD::InputArg> &Ins,
 673     DebugLoc dl, SelectionDAG &DAG,
 674     const CCValAssign &VA,
 675     MachineFrameInfo *MFI,
 676     unsigned i) const
 677 {
 678   // Create the nodes corresponding to a load from this parameter slot.
 679   ISD::ArgFlagsTy Flags = Ins[i].Flags;
 680
 681   bool AlwaysUseMutable = (CallConv==CallingConv::Fast) &&
 682     getTargetMachine().Options.GuaranteedTailCallOpt;
 683   bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
 684
 685   // FIXME: For now, all byval parameter objects are marked mutable. This can
 686   // be changed with more analysis.
 687   // In case of tail call optimization mark all arguments mutable. Since they
 688   // could be overwritten by lowering of arguments in case of a tail call.
 689   int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
 690       VA.getLocMemOffset(), isImmutable);
 691   SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
 692
 693   if (Flags.isByVal())
 694     return FIN;
 695   return DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
 696       MachinePointerInfo::getFixedStack(FI),
 697       false, false, false, 0);
 698 }
 699 //===----------------------------------------------------------------------===//
 700 // TargetLowering Implementation Help Functions End
 701 //===----------------------------------------------------------------------===//
 702 //===----------------------------------------------------------------------===//
 703 // Instruction generation functions
 704 //===----------------------------------------------------------------------===//
 705 uint32_t
 706 AMDILTargetLowering::addExtensionInstructions(
 707     uint32_t reg, bool signedShift,
 708     unsigned int simpleVT) const
 709 {
 710   int shiftSize = 0;
 711   uint32_t LShift, RShift;
 712   switch(simpleVT)
 713   {
 714     default:
 715       return reg;
 716     case AMDIL::GPRI8RegClassID:
 717       shiftSize = 24;
 718       LShift = AMDIL::SHL_i8;
 719       if (signedShift) {
 720         RShift = AMDIL::SHR_i8;
 721       } else {
 722         RShift = AMDIL::USHR_i8;
 723       }
 724       break;
 725     case AMDIL::GPRV2I8RegClassID:
 726       shiftSize = 24;
 727       LShift = AMDIL::SHL_v2i8;
 728       if (signedShift) {
 729         RShift = AMDIL::SHR_v2i8;
 730       } else {
 731         RShift = AMDIL::USHR_v2i8;
 732       }
 733       break;
 734     case AMDIL::GPRV4I8RegClassID:
 735       shiftSize = 24;
 736       LShift = AMDIL::SHL_v4i8;
 737       if (signedShift) {
 738         RShift = AMDIL::SHR_v4i8;
 739       } else {
 740         RShift = AMDIL::USHR_v4i8;
 741       }
 742       break;
 743     case AMDIL::GPRI16RegClassID:
 744       shiftSize = 16;
 745       LShift = AMDIL::SHL_i16;
 746       if (signedShift) {
 747         RShift = AMDIL::SHR_i16;
 748       } else {
 749         RShift = AMDIL::USHR_i16;
 750       }
 751       break;
 752     case AMDIL::GPRV2I16RegClassID:
 753       shiftSize = 16;
 754       LShift = AMDIL::SHL_v2i16;
 755       if (signedShift) {
 756         RShift = AMDIL::SHR_v2i16;
 757       } else {
 758         RShift = AMDIL::USHR_v2i16;
 759       }
 760       break;
 761     case AMDIL::GPRV4I16RegClassID:
 762       shiftSize = 16;
 763       LShift = AMDIL::SHL_v4i16;
 764       if (signedShift) {
 765         RShift = AMDIL::SHR_v4i16;
 766       } else {
 767         RShift = AMDIL::USHR_v4i16;
 768       }
 769       break;
 770   };
 771   uint32_t LoadReg = genVReg(simpleVT);
 772   uint32_t tmp1 = genVReg(simpleVT);
 773   uint32_t tmp2 = genVReg(simpleVT);
 774   generateMachineInst(AMDIL::LOADCONST_i32, LoadReg).addImm(shiftSize);
 775   generateMachineInst(LShift, tmp1, reg, LoadReg);
 776   generateMachineInst(RShift, tmp2, tmp1, LoadReg);
 777   return tmp2;
 778 }
 779
 780 MachineOperand
 781 AMDILTargetLowering::convertToReg(MachineOperand op) const
 782 {
 783   if (op.isReg()) {
 784     return op;
 785   } else if (op.isImm()) {
 786     uint32_t loadReg
 787       = genVReg(op.getParent()->getDesc().OpInfo[0].RegClass);
 788     generateMachineInst(AMDIL::LOADCONST_i32, loadReg)
 789       .addImm(op.getImm());
 790     op.ChangeToRegister(loadReg, false);
 791   } else if (op.isFPImm()) {
 792     uint32_t loadReg
 793       = genVReg(op.getParent()->getDesc().OpInfo[0].RegClass);
 794     generateMachineInst(AMDIL::LOADCONST_f32, loadReg)
 795       .addFPImm(op.getFPImm());
 796     op.ChangeToRegister(loadReg, false);
 797   } else if (op.isMBB()) {
 798     op.ChangeToRegister(0, false);
 799   } else if (op.isFI()) {
 800     op.ChangeToRegister(0, false);
 801   } else if (op.isCPI()) {
 802     op.ChangeToRegister(0, false);
 803   } else if (op.isJTI()) {
 804     op.ChangeToRegister(0, false);
 805   } else if (op.isGlobal()) {
 806     op.ChangeToRegister(0, false);
 807   } else if (op.isSymbol()) {
 808     op.ChangeToRegister(0, false);
 809   }/* else if (op.isMetadata()) {
 810       op.ChangeToRegister(0, false);
 811       }*/
 812   return op;
 813 }
 814
 815 void
 816 AMDILTargetLowering::generateCMPInstr(
 817     MachineInstr *MI,
 818     MachineBasicBlock *BB,
 819     const TargetInstrInfo& TII)
 820 const
 821 {
 822   MachineOperand DST = MI->getOperand(0);
 823   MachineOperand CC = MI->getOperand(1);
 824   MachineOperand LHS = MI->getOperand(2);
 825   MachineOperand RHS = MI->getOperand(3);
 826   int64_t ccCode = CC.getImm();
 827   unsigned int simpleVT = MI->getDesc().OpInfo[0].RegClass;
 828   unsigned int opCode = translateToOpcode(ccCode, simpleVT);
 829   DebugLoc DL = MI->getDebugLoc();
 830   MachineBasicBlock::iterator BBI = MI;
 831   setPrivateData(BB, BBI, &DL, &TII);
 832   if (!LHS.isReg()) {
 833     LHS = convertToReg(LHS);
 834   }
 835   if (!RHS.isReg()) {
 836     RHS = convertToReg(RHS);
 837   }
 838   switch (ccCode) {
 839     case AMDILCC::IL_CC_I_EQ:
 840     case AMDILCC::IL_CC_I_NE:
 841     case AMDILCC::IL_CC_I_GE:
 842     case AMDILCC::IL_CC_I_LT:
 843       {
 844         uint32_t lhsreg = addExtensionInstructions(
 845             LHS.getReg(), true, simpleVT);
 846         uint32_t rhsreg = addExtensionInstructions(
 847             RHS.getReg(), true, simpleVT);
 848         generateMachineInst(opCode, DST.getReg(), lhsreg, rhsreg);
 849       }
 850       break;
 851     case AMDILCC::IL_CC_U_EQ:
 852     case AMDILCC::IL_CC_U_NE:
 853     case AMDILCC::IL_CC_U_GE:
 854     case AMDILCC::IL_CC_U_LT:
 855     case AMDILCC::IL_CC_D_EQ:
 856     case AMDILCC::IL_CC_F_EQ:
 857     case AMDILCC::IL_CC_F_OEQ:
 858     case AMDILCC::IL_CC_D_OEQ:
 859     case AMDILCC::IL_CC_D_NE:
 860     case AMDILCC::IL_CC_F_NE:
 861     case AMDILCC::IL_CC_F_UNE:
 862     case AMDILCC::IL_CC_D_UNE:
 863     case AMDILCC::IL_CC_D_GE:
 864     case AMDILCC::IL_CC_F_GE:
 865     case AMDILCC::IL_CC_D_OGE:
 866     case AMDILCC::IL_CC_F_OGE:
 867     case AMDILCC::IL_CC_D_LT:
 868     case AMDILCC::IL_CC_F_LT:
 869     case AMDILCC::IL_CC_F_OLT:
 870     case AMDILCC::IL_CC_D_OLT:
 871       generateMachineInst(opCode, DST.getReg(),
 872           LHS.getReg(), RHS.getReg());
 873       break;
 874     case AMDILCC::IL_CC_I_GT:
 875     case AMDILCC::IL_CC_I_LE:
 876       {
 877         uint32_t lhsreg = addExtensionInstructions(
 878             LHS.getReg(), true, simpleVT);
 879         uint32_t rhsreg = addExtensionInstructions(
 880             RHS.getReg(), true, simpleVT);
 881         generateMachineInst(opCode, DST.getReg(), rhsreg, lhsreg);
 882       }
 883       break;
 884     case AMDILCC::IL_CC_U_GT:
 885     case AMDILCC::IL_CC_U_LE:
 886     case AMDILCC::IL_CC_F_GT:
 887     case AMDILCC::IL_CC_D_GT:
 888     case AMDILCC::IL_CC_F_OGT:
 889     case AMDILCC::IL_CC_D_OGT:
 890     case AMDILCC::IL_CC_F_LE:
 891     case AMDILCC::IL_CC_D_LE:
 892     case AMDILCC::IL_CC_D_OLE:
 893     case AMDILCC::IL_CC_F_OLE:
 894       generateMachineInst(opCode, DST.getReg(),
 895           RHS.getReg(), LHS.getReg());
 896       break;
 897     case AMDILCC::IL_CC_F_UGT:
 898     case AMDILCC::IL_CC_F_ULE:
 899       {
 900         uint32_t VReg[4] = {
 901           genVReg(simpleVT), genVReg(simpleVT),
 902           genVReg(simpleVT), genVReg(simpleVT)
 903         };
 904         generateMachineInst(opCode, VReg[0],
 905             RHS.getReg(), LHS.getReg());
 906         generateMachineInst(AMDIL::FNE, VReg[1],
 907             RHS.getReg(), RHS.getReg());
 908         generateMachineInst(AMDIL::FNE, VReg[2],
 909             LHS.getReg(), LHS.getReg());
 910         generateMachineInst(AMDIL::BINARY_OR_f32,
 911             VReg[3], VReg[0], VReg[1]);
 912         generateMachineInst(AMDIL::BINARY_OR_f32,
 913             DST.getReg(), VReg[2], VReg[3]);
 914       }
 915       break;
 916     case AMDILCC::IL_CC_F_ULT:
 917     case AMDILCC::IL_CC_F_UGE:
 918       {
 919         uint32_t VReg[4] = {
 920           genVReg(simpleVT), genVReg(simpleVT),
 921           genVReg(simpleVT), genVReg(simpleVT)
 922         };
 923         generateMachineInst(opCode, VReg[0],
 924             LHS.getReg(), RHS.getReg());
 925         generateMachineInst(AMDIL::FNE, VReg[1],
 926             RHS.getReg(), RHS.getReg());
 927         generateMachineInst(AMDIL::FNE, VReg[2],
 928             LHS.getReg(), LHS.getReg());
 929         generateMachineInst(AMDIL::BINARY_OR_f32,
 930             VReg[3], VReg[0], VReg[1]);
 931         generateMachineInst(AMDIL::BINARY_OR_f32,
 932             DST.getReg(), VReg[2], VReg[3]);
 933       }
 934       break;
 935     case AMDILCC::IL_CC_D_UGT:
 936     case AMDILCC::IL_CC_D_ULE:
 937       {
 938         uint32_t regID = AMDIL::GPRF64RegClassID;
 939         uint32_t VReg[4] = {
 940           genVReg(regID), genVReg(regID),
 941           genVReg(regID), genVReg(regID)
 942         };
 943         // The result of a double comparison is a 32bit result
 944         generateMachineInst(opCode, VReg[0],
 945             RHS.getReg(), LHS.getReg());
 946         generateMachineInst(AMDIL::DNE, VReg[1],
 947             RHS.getReg(), RHS.getReg());
 948         generateMachineInst(AMDIL::DNE, VReg[2],
 949             LHS.getReg(), LHS.getReg());
 950         generateMachineInst(AMDIL::BINARY_OR_f32,
 951             VReg[3], VReg[0], VReg[1]);
 952         generateMachineInst(AMDIL::BINARY_OR_f32,
 953             DST.getReg(), VReg[2], VReg[3]);
 954       }
 955       break;
 956     case AMDILCC::IL_CC_D_UGE:
 957     case AMDILCC::IL_CC_D_ULT:
 958       {
 959         uint32_t regID = AMDIL::GPRF64RegClassID;
 960         uint32_t VReg[4] = {
 961           genVReg(regID), genVReg(regID),
 962           genVReg(regID), genVReg(regID)
 963         };
 964         // The result of a double comparison is a 32bit result
 965         generateMachineInst(opCode, VReg[0],
 966             LHS.getReg(), RHS.getReg());
 967         generateMachineInst(AMDIL::DNE, VReg[1],
 968             RHS.getReg(), RHS.getReg());
 969         generateMachineInst(AMDIL::DNE, VReg[2],
 970             LHS.getReg(), LHS.getReg());
 971         generateMachineInst(AMDIL::BINARY_OR_f32,
 972             VReg[3], VReg[0], VReg[1]);
 973         generateMachineInst(AMDIL::BINARY_OR_f32,
 974             DST.getReg(), VReg[2], VReg[3]);
 975       }
 976       break;
 977     case AMDILCC::IL_CC_F_UEQ:
 978       {
 979         uint32_t VReg[4] = {
 980           genVReg(simpleVT), genVReg(simpleVT),
 981           genVReg(simpleVT), genVReg(simpleVT)
 982         };
 983         generateMachineInst(AMDIL::FEQ, VReg[0],
 984             LHS.getReg(), RHS.getReg());
 985         generateMachineInst(AMDIL::FNE, VReg[1],
 986             LHS.getReg(), LHS.getReg());
 987         generateMachineInst(AMDIL::FNE, VReg[2],
 988             RHS.getReg(), RHS.getReg());
 989         generateMachineInst(AMDIL::BINARY_OR_f32,
 990             VReg[3], VReg[0], VReg[1]);
 991         generateMachineInst(AMDIL::BINARY_OR_f32,
 992             DST.getReg(), VReg[2], VReg[3]);
 993       }
 994       break;
 995     case AMDILCC::IL_CC_F_ONE:
 996       {
 997         uint32_t VReg[4] = {
 998           genVReg(simpleVT), genVReg(simpleVT),
 999           genVReg(simpleVT), genVReg(simpleVT)
1000         };
1001         generateMachineInst(AMDIL::FNE, VReg[0],
1002             LHS.getReg(), RHS.getReg());
1003         generateMachineInst(AMDIL::FEQ, VReg[1],
1004             LHS.getReg(), LHS.getReg());
1005         generateMachineInst(AMDIL::FEQ, VReg[2],
1006             RHS.getReg(), RHS.getReg());
1007         generateMachineInst(AMDIL::BINARY_AND_f32,
1008             VReg[3], VReg[0], VReg[1]);
1009         generateMachineInst(AMDIL::BINARY_AND_f32,
1010             DST.getReg(), VReg[2], VReg[3]);
1011       }
1012       break;
1013     case AMDILCC::IL_CC_D_UEQ:
1014       {
1015         uint32_t regID = AMDIL::GPRF64RegClassID;
1016         uint32_t VReg[4] = {
1017           genVReg(regID), genVReg(regID),
1018           genVReg(regID), genVReg(regID)
1019         };
1020         // The result of a double comparison is a 32bit result
1021         generateMachineInst(AMDIL::DEQ, VReg[0],
1022             LHS.getReg(), RHS.getReg());
1023         generateMachineInst(AMDIL::DNE, VReg[1],
1024             LHS.getReg(), LHS.getReg());
1025         generateMachineInst(AMDIL::DNE, VReg[2],
1026             RHS.getReg(), RHS.getReg());
1027         generateMachineInst(AMDIL::BINARY_OR_f32,
1028             VReg[3], VReg[0], VReg[1]);
1029         generateMachineInst(AMDIL::BINARY_OR_f32,
1030             DST.getReg(), VReg[2], VReg[3]);
1031
1032       }
1033       break;
1034     case AMDILCC::IL_CC_D_ONE:
1035       {
1036         uint32_t regID = AMDIL::GPRF64RegClassID;
1037         uint32_t VReg[4] = {
1038           genVReg(regID), genVReg(regID),
1039           genVReg(regID), genVReg(regID)
1040         };
1041         // The result of a double comparison is a 32bit result
1042         generateMachineInst(AMDIL::DNE, VReg[0],
1043             LHS.getReg(), RHS.getReg());
1044         generateMachineInst(AMDIL::DEQ, VReg[1],
1045             LHS.getReg(), LHS.getReg());
1046         generateMachineInst(AMDIL::DEQ, VReg[2],
1047             RHS.getReg(), RHS.getReg());
1048         generateMachineInst(AMDIL::BINARY_AND_f32,
1049             VReg[3], VReg[0], VReg[1]);
1050         generateMachineInst(AMDIL::BINARY_AND_f32,
1051             DST.getReg(), VReg[2], VReg[3]);
1052
1053       }
1054       break;
1055     case AMDILCC::IL_CC_F_O:
1056       {
1057         uint32_t VReg[2] = { genVReg(simpleVT), genVReg(simpleVT) };
1058         generateMachineInst(AMDIL::FEQ, VReg[0],
1059             RHS.getReg(), RHS.getReg());
1060         generateMachineInst(AMDIL::FEQ, VReg[1],
1061             LHS.getReg(), LHS.getReg());
1062         generateMachineInst(AMDIL::BINARY_AND_f32,
1063             DST.getReg(), VReg[0], VReg[1]);
1064       }
1065       break;
1066     case AMDILCC::IL_CC_D_O:
1067       {
1068         uint32_t regID = AMDIL::GPRF64RegClassID;
1069         uint32_t VReg[2] = { genVReg(regID), genVReg(regID) };
1070         // The result of a double comparison is a 32bit result
1071         generateMachineInst(AMDIL::DEQ, VReg[0],
1072             RHS.getReg(), RHS.getReg());
1073         generateMachineInst(AMDIL::DEQ, VReg[1],
1074             LHS.getReg(), LHS.getReg());
1075         generateMachineInst(AMDIL::BINARY_AND_f32,
1076             DST.getReg(), VReg[0], VReg[1]);
1077       }
1078       break;
1079     case AMDILCC::IL_CC_F_UO:
1080       {
1081         uint32_t VReg[2] = { genVReg(simpleVT), genVReg(simpleVT) };
1082         generateMachineInst(AMDIL::FNE, VReg[0],
1083             RHS.getReg(), RHS.getReg());
1084         generateMachineInst(AMDIL::FNE, VReg[1],
1085             LHS.getReg(), LHS.getReg());
1086         generateMachineInst(AMDIL::BINARY_OR_f32,
1087             DST.getReg(), VReg[0], VReg[1]);
1088       }
1089       break;
1090     case AMDILCC::IL_CC_D_UO:
1091       {
1092         uint32_t regID = AMDIL::GPRF64RegClassID;
1093         uint32_t VReg[2] = { genVReg(regID), genVReg(regID) };
1094         // The result of a double comparison is a 32bit result
1095         generateMachineInst(AMDIL::DNE, VReg[0],
1096             RHS.getReg(), RHS.getReg());
1097         generateMachineInst(AMDIL::DNE, VReg[1],
1098             LHS.getReg(), LHS.getReg());
1099         generateMachineInst(AMDIL::BINARY_OR_f32,
1100             DST.getReg(), VReg[0], VReg[1]);
1101       }
1102       break;
1103     case AMDILCC::IL_CC_L_LE:
1104     case AMDILCC::IL_CC_L_GE:
1105     case AMDILCC::IL_CC_L_EQ:
1106     case AMDILCC::IL_CC_L_NE:
1107     case AMDILCC::IL_CC_L_LT:
1108     case AMDILCC::IL_CC_L_GT:
1109     case AMDILCC::IL_CC_UL_LE:
1110     case AMDILCC::IL_CC_UL_GE:
1111     case AMDILCC::IL_CC_UL_EQ:
1112     case AMDILCC::IL_CC_UL_NE:
1113     case AMDILCC::IL_CC_UL_LT:
1114     case AMDILCC::IL_CC_UL_GT:
1115       {
1116         const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
1117             &this->getTargetMachine())->getSubtargetImpl();
1118         if (stm->device()->usesHardware(AMDILDeviceInfo::LongOps)) {
1119           generateMachineInst(opCode, DST.getReg(), LHS.getReg(), RHS.getReg());
1120         } else {
1121           generateLongRelational(MI, opCode);
1122         }
1123       }
1124       break;
1125     case AMDILCC::COND_ERROR:
1126       assert(0 && "Invalid CC code");
1127       break;
1128   };
1129 }
1130
1131 //===----------------------------------------------------------------------===//
1132 // TargetLowering Class Implementation Begins
1133 //===----------------------------------------------------------------------===//
1134   AMDILTargetLowering::AMDILTargetLowering(TargetMachine &TM)
1135 : TargetLowering(TM, new TargetLoweringObjectFileELF())
1136 {
1137   int types[] =
1138   {
1139     (int)MVT::i8,
1140     (int)MVT::i16,
1141     (int)MVT::i32,
1142     (int)MVT::f32,
1143     (int)MVT::f64,
1144     (int)MVT::i64,
1145     (int)MVT::v2i8,
1146     (int)MVT::v4i8,
1147     (int)MVT::v2i16,
1148     (int)MVT::v4i16,
1149     (int)MVT::v4f32,
1150     (int)MVT::v4i32,
1151     (int)MVT::v2f32,
1152     (int)MVT::v2i32,
1153     (int)MVT::v2f64,
1154     (int)MVT::v2i64
1155   };
1156
1157   int IntTypes[] =
1158   {
1159     (int)MVT::i8,
1160     (int)MVT::i16,
1161     (int)MVT::i32,
1162     (int)MVT::i64
1163   };
1164
1165   int FloatTypes[] =
1166   {
1167     (int)MVT::f32,
1168     (int)MVT::f64
1169   };
1170
1171   int VectorTypes[] =
1172   {
1173     (int)MVT::v2i8,
1174     (int)MVT::v4i8,
1175     (int)MVT::v2i16,
1176     (int)MVT::v4i16,
1177     (int)MVT::v4f32,
1178     (int)MVT::v4i32,
1179     (int)MVT::v2f32,
1180     (int)MVT::v2i32,
1181     (int)MVT::v2f64,
1182     (int)MVT::v2i64
1183   };
1184   size_t numTypes = sizeof(types) / sizeof(*types);
1185   size_t numFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes);
1186   size_t numIntTypes = sizeof(IntTypes) / sizeof(*IntTypes);
1187   size_t numVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes);
1188
1189   const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
1190       &this->getTargetMachine())->getSubtargetImpl();
1191   // These are the current register classes that are
1192   // supported
1193
1194   addRegisterClass(MVT::i32, AMDIL::GPRI32RegisterClass);
1195   addRegisterClass(MVT::f32, AMDIL::GPRF32RegisterClass);
1196
1197   if (stm->device()->isSupported(AMDILDeviceInfo::DoubleOps)) {
1198     addRegisterClass(MVT::f64, AMDIL::GPRF64RegisterClass);
1199     addRegisterClass(MVT::v2f64, AMDIL::GPRV2F64RegisterClass);
1200   }
1201   if (stm->device()->isSupported(AMDILDeviceInfo::ByteOps)) {
1202     addRegisterClass(MVT::i8, AMDIL::GPRI8RegisterClass);
1203     addRegisterClass(MVT::v2i8, AMDIL::GPRV2I8RegisterClass);
1204     addRegisterClass(MVT::v4i8, AMDIL::GPRV4I8RegisterClass);
1205     setOperationAction(ISD::Constant          , MVT::i8   , Legal);
1206   }
1207   if (stm->device()->isSupported(AMDILDeviceInfo::ShortOps)) {
1208     addRegisterClass(MVT::i16, AMDIL::GPRI16RegisterClass);
1209     addRegisterClass(MVT::v2i16, AMDIL::GPRV2I16RegisterClass);
1210     addRegisterClass(MVT::v4i16, AMDIL::GPRV4I16RegisterClass);
1211     setOperationAction(ISD::Constant          , MVT::i16  , Legal);
1212   }
1213   addRegisterClass(MVT::v2f32, AMDIL::GPRV2F32RegisterClass);
1214   addRegisterClass(MVT::v4f32, AMDIL::GPRV4F32RegisterClass);
1215   addRegisterClass(MVT::v2i32, AMDIL::GPRV2I32RegisterClass);
1216   addRegisterClass(MVT::v4i32, AMDIL::GPRV4I32RegisterClass);
1217   if (stm->device()->isSupported(AMDILDeviceInfo::LongOps)) {
1218     addRegisterClass(MVT::i64, AMDIL::GPRI64RegisterClass);
1219     addRegisterClass(MVT::v2i64, AMDIL::GPRV2I64RegisterClass);
1220   }
1221
1222   for (unsigned int x  = 0; x < numTypes; ++x) {
1223     MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
1224
1225     //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
1226     // We cannot sextinreg, expand to shifts
1227     setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
1228     setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1229     setOperationAction(ISD::FP_ROUND, VT, Expand);
1230     setOperationAction(ISD::OR, VT, Custom);
1231     setOperationAction(ISD::SUBE, VT, Expand);
1232     setOperationAction(ISD::SUBC, VT, Expand);
1233     setOperationAction(ISD::ADD, VT, Custom);
1234     setOperationAction(ISD::ADDE, VT, Expand);
1235     setOperationAction(ISD::ADDC, VT, Expand);
1236     setOperationAction(ISD::SETCC, VT, Custom);
1237     setOperationAction(ISD::BRCOND, VT, Custom);
1238     setOperationAction(ISD::BR_CC, VT, Custom);
1239     setOperationAction(ISD::BR_JT, VT, Expand);
1240     setOperationAction(ISD::BRIND, VT, Expand);
1241     // TODO: Implement custom UREM/SREM routines
1242     setOperationAction(ISD::UREM, VT, Expand);
1243     setOperationAction(ISD::SREM, VT, Expand);
1244     setOperationAction(ISD::SINT_TO_FP, VT, Custom);
1245     setOperationAction(ISD::UINT_TO_FP, VT, Custom);
1246     setOperationAction(ISD::FP_TO_SINT, VT, Custom);
1247     setOperationAction(ISD::FP_TO_UINT, VT, Custom);
1248     setOperationAction(ISDBITCAST, VT, Custom);
1249     setOperationAction(ISD::GlobalAddress, VT, Custom);
1250     setOperationAction(ISD::JumpTable, VT, Custom);
1251     setOperationAction(ISD::ConstantPool, VT, Custom);
1252     setOperationAction(ISD::SELECT_CC, VT, Custom);
1253     setOperationAction(ISD::SELECT, VT, Custom);
1254     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
1255     setOperationAction(ISD::UMUL_LOHI, VT, Expand);
1256     if (VT != MVT::i64 && VT != MVT::v2i64) {
1257       setOperationAction(ISD::SDIV, VT, Custom);
1258       setOperationAction(ISD::UDIV, VT, Custom);
1259     }
1260     setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1261     setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1262   }
1263   for (unsigned int x = 0; x < numFloatTypes; ++x) {
1264     MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
1265
1266     // IL does not have these operations for floating point types
1267     setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
1268     setOperationAction(ISD::FP_ROUND, VT, Custom);
1269     setOperationAction(ISD::SETOLT, VT, Expand);
1270     setOperationAction(ISD::SETOGE, VT, Expand);
1271     setOperationAction(ISD::SETOGT, VT, Expand);
1272     setOperationAction(ISD::SETOLE, VT, Expand);
1273     setOperationAction(ISD::SETULT, VT, Expand);
1274     setOperationAction(ISD::SETUGE, VT, Expand);
1275     setOperationAction(ISD::SETUGT, VT, Expand);
1276     setOperationAction(ISD::SETULE, VT, Expand);
1277   }
1278
1279   for (unsigned int x = 0; x < numIntTypes; ++x) {
1280     MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
1281
1282     // GPU also does not have divrem function for signed or unsigned
1283     setOperationAction(ISD::SDIVREM, VT, Expand);
1284     setOperationAction(ISD::UDIVREM, VT, Expand);
1285     setOperationAction(ISD::FP_ROUND, VT, Expand);
1286
1287     // GPU does not have [S|U]MUL_LOHI functions as a single instruction
1288     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
1289     setOperationAction(ISD::UMUL_LOHI, VT, Expand);
1290
1291     // GPU doesn't have a rotl, rotr, or byteswap instruction
1292     setOperationAction(ISD::ROTR, VT, Expand);
1293     setOperationAction(ISD::ROTL, VT, Expand);
1294     setOperationAction(ISD::BSWAP, VT, Expand);
1295
1296     // GPU doesn't have any counting operators
1297     setOperationAction(ISD::CTPOP, VT, Expand);
1298     setOperationAction(ISD::CTTZ, VT, Expand);
1299     setOperationAction(ISD::CTLZ, VT, Expand);
1300   }
1301
1302   for ( unsigned int ii = 0; ii < numVectorTypes; ++ii )
1303   {
1304     MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
1305
1306     setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1307     setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1308     setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1309     setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
1310     setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1311     setOperationAction(ISD::FP_ROUND, VT, Expand);
1312     setOperationAction(ISD::SDIVREM, VT, Expand);
1313     setOperationAction(ISD::UDIVREM, VT, Expand);
1314     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
1315     // setOperationAction(ISD::VSETCC, VT, Expand);
1316     setOperationAction(ISD::SETCC, VT, Expand);
1317     setOperationAction(ISD::SELECT_CC, VT, Expand);
1318     setOperationAction(ISD::SELECT, VT, Expand);
1319
1320   }
1321   setOperationAction(ISD::FP_ROUND, MVT::Other, Expand);
1322   if (stm->device()->isSupported(AMDILDeviceInfo::LongOps)) {
1323     if (stm->calVersion() < CAL_VERSION_SC_139
1324         || stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
1325       setOperationAction(ISD::MUL, MVT::i64, Custom);
1326     }
1327     setOperationAction(ISD::SUB, MVT::i64, Custom);
1328     setOperationAction(ISD::ADD, MVT::i64, Custom);
1329     setOperationAction(ISD::MULHU, MVT::i64, Expand);
1330     setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
1331     setOperationAction(ISD::MULHS, MVT::i64, Expand);
1332     setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
1333     setOperationAction(ISD::MUL, MVT::v2i64, Expand);
1334     setOperationAction(ISD::SUB, MVT::v2i64, Expand);
1335     setOperationAction(ISD::ADD, MVT::v2i64, Expand);
1336     setOperationAction(ISD::SREM, MVT::v2i64, Expand);
1337     setOperationAction(ISD::Constant          , MVT::i64  , Legal);
1338     setOperationAction(ISD::UDIV, MVT::v2i64, Expand);
1339     setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
1340     setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Expand);
1341     setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Expand);
1342     setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Expand);
1343     setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Expand);
1344     setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
1345     setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
1346     setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
1347     setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
1348   }
1349   if (stm->device()->isSupported(AMDILDeviceInfo::DoubleOps)) {
1350     // we support loading/storing v2f64 but not operations on the type
1351     setOperationAction(ISD::FADD, MVT::v2f64, Expand);
1352     setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
1353     setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
1354     setOperationAction(ISD::FP_ROUND, MVT::v2f64, Expand);
1355     setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
1356     setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
1357     setOperationAction(ISD::ConstantFP        , MVT::f64  , Legal);
1358     setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
1359     // We want to expand vector conversions into their scalar
1360     // counterparts.
1361     setOperationAction(ISD::SINT_TO_FP, MVT::v2f64, Expand);
1362     setOperationAction(ISD::UINT_TO_FP, MVT::v2f64, Expand);
1363     setOperationAction(ISD::FP_TO_SINT, MVT::v2f64, Expand);
1364     setOperationAction(ISD::FP_TO_UINT, MVT::v2f64, Expand);
1365     setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
1366     setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
1367     setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
1368     setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
1369     setOperationAction(ISD::FABS, MVT::f64, Expand);
1370     setOperationAction(ISD::FABS, MVT::v2f64, Expand);
1371   }
1372   // TODO: Fix the UDIV24 algorithm so it works for these
1373   // types correctly. This needs vector comparisons
1374   // for this to work correctly.
1375   setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
1376   setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
1377   setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
1378   setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
1379   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
1380   setOperationAction(ISD::SUBC, MVT::Other, Expand);
1381   setOperationAction(ISD::ADDE, MVT::Other, Expand);
1382   setOperationAction(ISD::ADDC, MVT::Other, Expand);
1383   setOperationAction(ISD::BRCOND, MVT::Other, Custom);
1384   setOperationAction(ISD::BR_CC, MVT::Other, Custom);
1385   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
1386   setOperationAction(ISD::BRIND, MVT::Other, Expand);
1387   setOperationAction(ISD::SETCC, MVT::Other, Custom);
1388   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
1389   setOperationAction(ISD::FDIV, MVT::f32, Custom);
1390   setOperationAction(ISD::FDIV, MVT::v2f32, Custom);
1391   setOperationAction(ISD::FDIV, MVT::v4f32, Custom);
1392
1393   setOperationAction(ISD::BUILD_VECTOR, MVT::Other, Custom);
1394   // Use the default implementation.
1395   setOperationAction(ISD::VAARG             , MVT::Other, Expand);
1396   setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
1397   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
1398   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
1399   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
1400   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Custom);
1401   setOperationAction(ISD::ConstantFP        , MVT::f32    , Legal);
1402   setOperationAction(ISD::Constant          , MVT::i32    , Legal);
1403   setOperationAction(ISD::TRAP              , MVT::Other  , Legal);
1404
1405   setStackPointerRegisterToSaveRestore(AMDIL::SP);
1406   setSchedulingPreference(Sched::RegPressure);
1407   setPow2DivIsCheap(false);
1408   setPrefLoopAlignment(16);
1409   setSelectIsExpensive(true);
1410   setJumpIsExpensive(true);
1411   computeRegisterProperties();
1412
1413   maxStoresPerMemcpy  = 4096;
1414   maxStoresPerMemmove = 4096;
1415   maxStoresPerMemset  = 4096;
1416
1417 #undef numTypes
1418 #undef numIntTypes
1419 #undef numVectorTypes
1420 #undef numFloatTypes
1421 }
1422
1423 const char *
1424 AMDILTargetLowering::getTargetNodeName(unsigned Opcode) const
1425 {
1426   switch (Opcode) {
1427     default: return 0;
1428     case AMDILISD::INTTOANY: return "AMDILISD::INTTOANY";
1429     case AMDILISD::DP_TO_FP:  return "AMDILISD::DP_TO_FP";
1430     case AMDILISD::FP_TO_DP:  return "AMDILISD::FP_TO_DP";
1431     case AMDILISD::BITCONV: return "AMDILISD::BITCONV";
1432     case AMDILISD::CMOV:  return "AMDILISD::CMOV";
1433     case AMDILISD::CMOVLOG:  return "AMDILISD::CMOVLOG";
1434     case AMDILISD::INEGATE:  return "AMDILISD::INEGATE";
1435     case AMDILISD::MAD:  return "AMDILISD::MAD";
1436     case AMDILISD::UMAD:  return "AMDILISD::UMAD";
1437     case AMDILISD::CALL:  return "AMDILISD::CALL";
1438     case AMDILISD::RET:   return "AMDILISD::RET";
1439     case AMDILISD::IFFB_HI: return "AMDILISD::IFFB_HI";
1440     case AMDILISD::IFFB_LO: return "AMDILISD::IFFB_LO";
1441     case AMDILISD::ADD: return "AMDILISD::ADD";
1442     case AMDILISD::UMUL: return "AMDILISD::UMUL";
1443     case AMDILISD::AND: return "AMDILISD::AND";
1444     case AMDILISD::OR: return "AMDILISD::OR";
1445     case AMDILISD::NOT: return "AMDILISD::NOT";
1446     case AMDILISD::XOR: return "AMDILISD::XOR";
1447     case AMDILISD::DIV_INF: return "AMDILISD::DIV_INF";
1448     case AMDILISD::SMAX: return "AMDILISD::SMAX";
1449     case AMDILISD::PHIMOVE: return "AMDILISD::PHIMOVE";
1450     case AMDILISD::MOVE: return "AMDILISD::MOVE";
1451     case AMDILISD::VBUILD: return "AMDILISD::VBUILD";
1452     case AMDILISD::VEXTRACT: return "AMDILISD::VEXTRACT";
1453     case AMDILISD::VINSERT: return "AMDILISD::VINSERT";
1454     case AMDILISD::VCONCAT: return "AMDILISD::VCONCAT";
1455     case AMDILISD::LCREATE: return "AMDILISD::LCREATE";
1456     case AMDILISD::LCOMPHI: return "AMDILISD::LCOMPHI";
1457     case AMDILISD::LCOMPLO: return "AMDILISD::LCOMPLO";
1458     case AMDILISD::DCREATE: return "AMDILISD::DCREATE";
1459     case AMDILISD::DCOMPHI: return "AMDILISD::DCOMPHI";
1460     case AMDILISD::DCOMPLO: return "AMDILISD::DCOMPLO";
1461     case AMDILISD::LCREATE2: return "AMDILISD::LCREATE2";
1462     case AMDILISD::LCOMPHI2: return "AMDILISD::LCOMPHI2";
1463     case AMDILISD::LCOMPLO2: return "AMDILISD::LCOMPLO2";
1464     case AMDILISD::DCREATE2: return "AMDILISD::DCREATE2";
1465     case AMDILISD::DCOMPHI2: return "AMDILISD::DCOMPHI2";
1466     case AMDILISD::DCOMPLO2: return "AMDILISD::DCOMPLO2";
1467     case AMDILISD::CMP: return "AMDILISD::CMP";
1468     case AMDILISD::IL_CC_I_LT: return "AMDILISD::IL_CC_I_LT";
1469     case AMDILISD::IL_CC_I_LE: return "AMDILISD::IL_CC_I_LE";
1470     case AMDILISD::IL_CC_I_GT: return "AMDILISD::IL_CC_I_GT";
1471     case AMDILISD::IL_CC_I_GE: return "AMDILISD::IL_CC_I_GE";
1472     case AMDILISD::IL_CC_I_EQ: return "AMDILISD::IL_CC_I_EQ";
1473     case AMDILISD::IL_CC_I_NE: return "AMDILISD::IL_CC_I_NE";
1474     case AMDILISD::RET_FLAG: return "AMDILISD::RET_FLAG";
1475     case AMDILISD::BRANCH_COND: return "AMDILISD::BRANCH_COND";
1476     case AMDILISD::LOOP_NZERO: return "AMDILISD::LOOP_NZERO";
1477     case AMDILISD::LOOP_ZERO: return "AMDILISD::LOOP_ZERO";
1478     case AMDILISD::LOOP_CMP: return "AMDILISD::LOOP_CMP";
1479     case AMDILISD::ADDADDR: return "AMDILISD::ADDADDR";
1480     case AMDILISD::ATOM_G_ADD: return "AMDILISD::ATOM_G_ADD";
1481     case AMDILISD::ATOM_G_AND: return "AMDILISD::ATOM_G_AND";
1482     case AMDILISD::ATOM_G_CMPXCHG: return "AMDILISD::ATOM_G_CMPXCHG";
1483     case AMDILISD::ATOM_G_DEC: return "AMDILISD::ATOM_G_DEC";
1484     case AMDILISD::ATOM_G_INC: return "AMDILISD::ATOM_G_INC";
1485     case AMDILISD::ATOM_G_MAX: return "AMDILISD::ATOM_G_MAX";
1486     case AMDILISD::ATOM_G_UMAX: return "AMDILISD::ATOM_G_UMAX";
1487     case AMDILISD::ATOM_G_MIN: return "AMDILISD::ATOM_G_MIN";
1488     case AMDILISD::ATOM_G_UMIN: return "AMDILISD::ATOM_G_UMIN";
1489     case AMDILISD::ATOM_G_OR: return "AMDILISD::ATOM_G_OR";
1490     case AMDILISD::ATOM_G_SUB: return "AMDILISD::ATOM_G_SUB";
1491     case AMDILISD::ATOM_G_RSUB: return "AMDILISD::ATOM_G_RSUB";
1492     case AMDILISD::ATOM_G_XCHG: return "AMDILISD::ATOM_G_XCHG";
1493     case AMDILISD::ATOM_G_XOR: return "AMDILISD::ATOM_G_XOR";
1494     case AMDILISD::ATOM_G_ADD_NORET: return "AMDILISD::ATOM_G_ADD_NORET";
1495     case AMDILISD::ATOM_G_AND_NORET: return "AMDILISD::ATOM_G_AND_NORET";
1496     case AMDILISD::ATOM_G_CMPXCHG_NORET: return "AMDILISD::ATOM_G_CMPXCHG_NORET";
1497     case AMDILISD::ATOM_G_DEC_NORET: return "AMDILISD::ATOM_G_DEC_NORET";
1498     case AMDILISD::ATOM_G_INC_NORET: return "AMDILISD::ATOM_G_INC_NORET";
1499     case AMDILISD::ATOM_G_MAX_NORET: return "AMDILISD::ATOM_G_MAX_NORET";
1500     case AMDILISD::ATOM_G_UMAX_NORET: return "AMDILISD::ATOM_G_UMAX_NORET";
1501     case AMDILISD::ATOM_G_MIN_NORET: return "AMDILISD::ATOM_G_MIN_NORET";
1502     case AMDILISD::ATOM_G_UMIN_NORET: return "AMDILISD::ATOM_G_UMIN_NORET";
1503     case AMDILISD::ATOM_G_OR_NORET: return "AMDILISD::ATOM_G_OR_NORET";
1504     case AMDILISD::ATOM_G_SUB_NORET: return "AMDILISD::ATOM_G_SUB_NORET";
1505     case AMDILISD::ATOM_G_RSUB_NORET: return "AMDILISD::ATOM_G_RSUB_NORET";
1506     case AMDILISD::ATOM_G_XCHG_NORET: return "AMDILISD::ATOM_G_XCHG_NORET";
1507     case AMDILISD::ATOM_G_XOR_NORET: return "AMDILISD::ATOM_G_XOR_NORET";
1508     case AMDILISD::ATOM_L_ADD: return "AMDILISD::ATOM_L_ADD";
1509     case AMDILISD::ATOM_L_AND: return "AMDILISD::ATOM_L_AND";
1510     case AMDILISD::ATOM_L_CMPXCHG: return "AMDILISD::ATOM_L_CMPXCHG";
1511     case AMDILISD::ATOM_L_DEC: return "AMDILISD::ATOM_L_DEC";
1512     case AMDILISD::ATOM_L_INC: return "AMDILISD::ATOM_L_INC";
1513     case AMDILISD::ATOM_L_MAX: return "AMDILISD::ATOM_L_MAX";
1514     case AMDILISD::ATOM_L_UMAX: return "AMDILISD::ATOM_L_UMAX";
1515     case AMDILISD::ATOM_L_MIN: return "AMDILISD::ATOM_L_MIN";
1516     case AMDILISD::ATOM_L_UMIN: return "AMDILISD::ATOM_L_UMIN";
1517     case AMDILISD::ATOM_L_OR: return "AMDILISD::ATOM_L_OR";
1518     case AMDILISD::ATOM_L_SUB: return "AMDILISD::ATOM_L_SUB";
1519     case AMDILISD::ATOM_L_RSUB: return "AMDILISD::ATOM_L_RSUB";
1520     case AMDILISD::ATOM_L_XCHG: return "AMDILISD::ATOM_L_XCHG";
1521     case AMDILISD::ATOM_L_XOR: return "AMDILISD::ATOM_L_XOR";
1522     case AMDILISD::ATOM_L_ADD_NORET: return "AMDILISD::ATOM_L_ADD_NORET";
1523     case AMDILISD::ATOM_L_AND_NORET: return "AMDILISD::ATOM_L_AND_NORET";
1524     case AMDILISD::ATOM_L_CMPXCHG_NORET: return "AMDILISD::ATOM_L_CMPXCHG_NORET";
1525     case AMDILISD::ATOM_L_DEC_NORET: return "AMDILISD::ATOM_L_DEC_NORET";
1526     case AMDILISD::ATOM_L_INC_NORET: return "AMDILISD::ATOM_L_INC_NORET";
1527     case AMDILISD::ATOM_L_MAX_NORET: return "AMDILISD::ATOM_L_MAX_NORET";
1528     case AMDILISD::ATOM_L_UMAX_NORET: return "AMDILISD::ATOM_L_UMAX_NORET";
1529     case AMDILISD::ATOM_L_MIN_NORET: return "AMDILISD::ATOM_L_MIN_NORET";
1530     case AMDILISD::ATOM_L_UMIN_NORET: return "AMDILISD::ATOM_L_UMIN_NORET";
1531     case AMDILISD::ATOM_L_OR_NORET: return "AMDILISD::ATOM_L_OR_NORET";
1532     case AMDILISD::ATOM_L_SUB_NORET: return "AMDILISD::ATOM_L_SUB_NORET";
1533     case AMDILISD::ATOM_L_RSUB_NORET: return "AMDILISD::ATOM_L_RSUB_NORET";
1534     case AMDILISD::ATOM_L_XCHG_NORET: return "AMDILISD::ATOM_L_XCHG_NORET";
1535     case AMDILISD::ATOM_R_ADD: return "AMDILISD::ATOM_R_ADD";
1536     case AMDILISD::ATOM_R_AND: return "AMDILISD::ATOM_R_AND";
1537     case AMDILISD::ATOM_R_CMPXCHG: return "AMDILISD::ATOM_R_CMPXCHG";
1538     case AMDILISD::ATOM_R_DEC: return "AMDILISD::ATOM_R_DEC";
1539     case AMDILISD::ATOM_R_INC: return "AMDILISD::ATOM_R_INC";
1540     case AMDILISD::ATOM_R_MAX: return "AMDILISD::ATOM_R_MAX";
1541     case AMDILISD::ATOM_R_UMAX: return "AMDILISD::ATOM_R_UMAX";
1542     case AMDILISD::ATOM_R_MIN: return "AMDILISD::ATOM_R_MIN";
1543     case AMDILISD::ATOM_R_UMIN: return "AMDILISD::ATOM_R_UMIN";
1544     case AMDILISD::ATOM_R_OR: return "AMDILISD::ATOM_R_OR";
1545     case AMDILISD::ATOM_R_MSKOR: return "AMDILISD::ATOM_R_MSKOR";
1546     case AMDILISD::ATOM_R_SUB: return "AMDILISD::ATOM_R_SUB";
1547     case AMDILISD::ATOM_R_RSUB: return "AMDILISD::ATOM_R_RSUB";
1548     case AMDILISD::ATOM_R_XCHG: return "AMDILISD::ATOM_R_XCHG";
1549     case AMDILISD::ATOM_R_XOR: return "AMDILISD::ATOM_R_XOR";
1550     case AMDILISD::ATOM_R_ADD_NORET: return "AMDILISD::ATOM_R_ADD_NORET";
1551     case AMDILISD::ATOM_R_AND_NORET: return "AMDILISD::ATOM_R_AND_NORET";
1552     case AMDILISD::ATOM_R_CMPXCHG_NORET: return "AMDILISD::ATOM_R_CMPXCHG_NORET";
1553     case AMDILISD::ATOM_R_DEC_NORET: return "AMDILISD::ATOM_R_DEC_NORET";
1554     case AMDILISD::ATOM_R_INC_NORET: return "AMDILISD::ATOM_R_INC_NORET";
1555     case AMDILISD::ATOM_R_MAX_NORET: return "AMDILISD::ATOM_R_MAX_NORET";
1556     case AMDILISD::ATOM_R_UMAX_NORET: return "AMDILISD::ATOM_R_UMAX_NORET";
1557     case AMDILISD::ATOM_R_MIN_NORET: return "AMDILISD::ATOM_R_MIN_NORET";
1558     case AMDILISD::ATOM_R_UMIN_NORET: return "AMDILISD::ATOM_R_UMIN_NORET";
1559     case AMDILISD::ATOM_R_OR_NORET: return "AMDILISD::ATOM_R_OR_NORET";
1560     case AMDILISD::ATOM_R_MSKOR_NORET: return "AMDILISD::ATOM_R_MSKOR_NORET";
1561     case AMDILISD::ATOM_R_SUB_NORET: return "AMDILISD::ATOM_R_SUB_NORET";
1562     case AMDILISD::ATOM_R_RSUB_NORET: return "AMDILISD::ATOM_R_RSUB_NORET";
1563     case AMDILISD::ATOM_R_XCHG_NORET: return "AMDILISD::ATOM_R_XCHG_NORET";
1564     case AMDILISD::ATOM_R_XOR_NORET: return "AMDILISD::ATOM_R_XOR_NORET";
1565     case AMDILISD::APPEND_ALLOC: return "AMDILISD::APPEND_ALLOC";
1566     case AMDILISD::APPEND_ALLOC_NORET: return "AMDILISD::APPEND_ALLOC_NORET";
1567     case AMDILISD::APPEND_CONSUME: return "AMDILISD::APPEND_CONSUME";
1568     case AMDILISD::APPEND_CONSUME_NORET: return "AMDILISD::APPEND_CONSUME_NORET";
1569     case AMDILISD::IMAGE2D_READ: return "AMDILISD::IMAGE2D_READ";
1570     case AMDILISD::IMAGE2D_WRITE: return "AMDILISD::IMAGE2D_WRITE";
1571     case AMDILISD::IMAGE2D_INFO0: return "AMDILISD::IMAGE2D_INFO0";
1572     case AMDILISD::IMAGE2D_INFO1: return "AMDILISD::IMAGE2D_INFO1";
1573     case AMDILISD::IMAGE3D_READ: return "AMDILISD::IMAGE3D_READ";
1574     case AMDILISD::IMAGE3D_WRITE: return "AMDILISD::IMAGE3D_WRITE";
1575     case AMDILISD::IMAGE3D_INFO0: return "AMDILISD::IMAGE3D_INFO0";
1576     case AMDILISD::IMAGE3D_INFO1: return "AMDILISD::IMAGE3D_INFO1";
1577
1578   };
1579 }
1580 bool
1581 AMDILTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
1582     const CallInst &I, unsigned Intrinsic) const
1583 {
1584   if (Intrinsic <= AMDGPUIntrinsic::last_non_AMDIL_intrinsic
1585       || Intrinsic > AMDGPUIntrinsic::num_AMDIL_intrinsics) {
1586     return false;
1587   }
1588   bool bitCastToInt = false;
1589   unsigned IntNo;
1590   bool isRet = true;
1591   const AMDILSubtarget *STM = &this->getTargetMachine()
1592     .getSubtarget<AMDILSubtarget>();
1593   switch (Intrinsic) {
1594     default: return false; // Don't custom lower most intrinsics.
1595     case AMDGPUIntrinsic::AMDIL_atomic_add_gi32:
1596     case AMDGPUIntrinsic::AMDIL_atomic_add_gu32:
1597              IntNo = AMDILISD::ATOM_G_ADD; break;
1598     case AMDGPUIntrinsic::AMDIL_atomic_add_gi32_noret:
1599     case AMDGPUIntrinsic::AMDIL_atomic_add_gu32_noret:
1600              isRet = false;
1601              IntNo = AMDILISD::ATOM_G_ADD_NORET; break;
1602     case AMDGPUIntrinsic::AMDIL_atomic_add_lu32:
1603     case AMDGPUIntrinsic::AMDIL_atomic_add_li32:
1604              IntNo = AMDILISD::ATOM_L_ADD; break;
1605     case AMDGPUIntrinsic::AMDIL_atomic_add_li32_noret:
1606     case AMDGPUIntrinsic::AMDIL_atomic_add_lu32_noret:
1607              isRet = false;
1608              IntNo = AMDILISD::ATOM_L_ADD_NORET; break;
1609     case AMDGPUIntrinsic::AMDIL_atomic_add_ru32:
1610     case AMDGPUIntrinsic::AMDIL_atomic_add_ri32:
1611              IntNo = AMDILISD::ATOM_R_ADD; break;
1612     case AMDGPUIntrinsic::AMDIL_atomic_add_ri32_noret:
1613     case AMDGPUIntrinsic::AMDIL_atomic_add_ru32_noret:
1614              isRet = false;
1615              IntNo = AMDILISD::ATOM_R_ADD_NORET; break;
1616     case AMDGPUIntrinsic::AMDIL_atomic_and_gi32:
1617     case AMDGPUIntrinsic::AMDIL_atomic_and_gu32:
1618              IntNo = AMDILISD::ATOM_G_AND; break;
1619     case AMDGPUIntrinsic::AMDIL_atomic_and_gi32_noret:
1620     case AMDGPUIntrinsic::AMDIL_atomic_and_gu32_noret:
1621              isRet = false;
1622              IntNo = AMDILISD::ATOM_G_AND_NORET; break;
1623     case AMDGPUIntrinsic::AMDIL_atomic_and_li32:
1624     case AMDGPUIntrinsic::AMDIL_atomic_and_lu32:
1625              IntNo = AMDILISD::ATOM_L_AND; break;
1626     case AMDGPUIntrinsic::AMDIL_atomic_and_li32_noret:
1627     case AMDGPUIntrinsic::AMDIL_atomic_and_lu32_noret:
1628              isRet = false;
1629              IntNo = AMDILISD::ATOM_L_AND_NORET; break;
1630     case AMDGPUIntrinsic::AMDIL_atomic_and_ri32:
1631     case AMDGPUIntrinsic::AMDIL_atomic_and_ru32:
1632              IntNo = AMDILISD::ATOM_R_AND; break;
1633     case AMDGPUIntrinsic::AMDIL_atomic_and_ri32_noret:
1634     case AMDGPUIntrinsic::AMDIL_atomic_and_ru32_noret:
1635              isRet = false;
1636              IntNo = AMDILISD::ATOM_R_AND_NORET; break;
1637     case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gi32:
1638     case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gu32:
1639              IntNo = AMDILISD::ATOM_G_CMPXCHG; break;
1640     case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gi32_noret:
1641     case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gu32_noret:
1642              isRet = false;
1643              IntNo = AMDILISD::ATOM_G_CMPXCHG_NORET; break;
1644     case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_li32:
1645     case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_lu32:
1646              IntNo = AMDILISD::ATOM_L_CMPXCHG; break;
1647     case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_li32_noret:
1648     case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_lu32_noret:
1649              isRet = false;
1650              IntNo = AMDILISD::ATOM_L_CMPXCHG_NORET; break;
1651     case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ri32:
1652     case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ru32:
1653              IntNo = AMDILISD::ATOM_R_CMPXCHG; break;
1654     case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ri32_noret:
1655     case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ru32_noret:
1656              isRet = false;
1657              IntNo = AMDILISD::ATOM_R_CMPXCHG_NORET; break;
1658     case AMDGPUIntrinsic::AMDIL_atomic_dec_gi32:
1659     case AMDGPUIntrinsic::AMDIL_atomic_dec_gu32:
1660              if (STM->calVersion() >= CAL_VERSION_SC_136) {
1661                IntNo = AMDILISD::ATOM_G_DEC;
1662              } else {
1663                IntNo = AMDILISD::ATOM_G_SUB;
1664              }
1665              break;
1666     case AMDGPUIntrinsic::AMDIL_atomic_dec_gi32_noret:
1667     case AMDGPUIntrinsic::AMDIL_atomic_dec_gu32_noret:
1668              isRet = false;
1669              if (STM->calVersion() >= CAL_VERSION_SC_136) {
1670                IntNo = AMDILISD::ATOM_G_DEC_NORET;
1671              } else {
1672                IntNo = AMDILISD::ATOM_G_SUB_NORET;
1673              }
1674              break;
1675     case AMDGPUIntrinsic::AMDIL_atomic_dec_li32:
1676     case AMDGPUIntrinsic::AMDIL_atomic_dec_lu32:
1677              if (STM->calVersion() >= CAL_VERSION_SC_136) {
1678                IntNo = AMDILISD::ATOM_L_DEC;
1679              } else {
1680                IntNo = AMDILISD::ATOM_L_SUB;
1681              }
1682              break;
1683     case AMDGPUIntrinsic::AMDIL_atomic_dec_li32_noret:
1684     case AMDGPUIntrinsic::AMDIL_atomic_dec_lu32_noret:
1685              isRet = false;
1686              if (STM->calVersion() >= CAL_VERSION_SC_136) {
1687                IntNo = AMDILISD::ATOM_L_DEC_NORET;
1688              } else {
1689                IntNo = AMDILISD::ATOM_L_SUB_NORET;
1690              }
1691              break;
1692     case AMDGPUIntrinsic::AMDIL_atomic_dec_ri32:
1693     case AMDGPUIntrinsic::AMDIL_atomic_dec_ru32:
1694              if (STM->calVersion() >= CAL_VERSION_SC_136) {
1695                IntNo = AMDILISD::ATOM_R_DEC;
1696              } else {
1697                IntNo = AMDILISD::ATOM_R_SUB;
1698              }
1699              break;
1700     case AMDGPUIntrinsic::AMDIL_atomic_dec_ri32_noret:
1701     case AMDGPUIntrinsic::AMDIL_atomic_dec_ru32_noret:
1702              isRet = false;
1703              if (STM->calVersion() >= CAL_VERSION_SC_136) {
1704                IntNo = AMDILISD::ATOM_R_DEC_NORET;
1705              } else {
1706                IntNo = AMDILISD::ATOM_R_SUB_NORET;
1707              }
1708              break;
1709     case AMDGPUIntrinsic::AMDIL_atomic_inc_gi32:
1710     case AMDGPUIntrinsic::AMDIL_atomic_inc_gu32:
1711              if (STM->calVersion() >= CAL_VERSION_SC_136) {
1712                IntNo = AMDILISD::ATOM_G_INC;
1713              } else {
1714                IntNo = AMDILISD::ATOM_G_ADD;
1715              }
1716              break;
1717     case AMDGPUIntrinsic::AMDIL_atomic_inc_gi32_noret:
1718     case AMDGPUIntrinsic::AMDIL_atomic_inc_gu32_noret:
1719              isRet = false;
1720              if (STM->calVersion() >= CAL_VERSION_SC_136) {
1721                IntNo = AMDILISD::ATOM_G_INC_NORET;
1722              } else {
1723                IntNo = AMDILISD::ATOM_G_ADD_NORET;
1724              }
1725              break;
1726     case AMDGPUIntrinsic::AMDIL_atomic_inc_li32:
1727     case AMDGPUIntrinsic::AMDIL_atomic_inc_lu32:
1728              if (STM->calVersion() >= CAL_VERSION_SC_136) {
1729                IntNo = AMDILISD::ATOM_L_INC;
1730              } else {
1731                IntNo = AMDILISD::ATOM_L_ADD;
1732              }
1733              break;
1734     case AMDGPUIntrinsic::AMDIL_atomic_inc_li32_noret:
1735     case AMDGPUIntrinsic::AMDIL_atomic_inc_lu32_noret:
1736              isRet = false;
1737              if (STM->calVersion() >= CAL_VERSION_SC_136) {
1738                IntNo = AMDILISD::ATOM_L_INC_NORET;
1739              } else {
1740                IntNo = AMDILISD::ATOM_L_ADD_NORET;
1741              }
1742              break;
1743     case AMDGPUIntrinsic::AMDIL_atomic_inc_ri32:
1744     case AMDGPUIntrinsic::AMDIL_atomic_inc_ru32:
1745              if (STM->calVersion() >= CAL_VERSION_SC_136) {
1746                IntNo = AMDILISD::ATOM_R_INC;
1747              } else {
1748                IntNo = AMDILISD::ATOM_R_ADD;
1749              }
1750              break;
1751     case AMDGPUIntrinsic::AMDIL_atomic_inc_ri32_noret:
1752     case AMDGPUIntrinsic::AMDIL_atomic_inc_ru32_noret:
1753              isRet = false;
1754              if (STM->calVersion() >= CAL_VERSION_SC_136) {
1755                IntNo = AMDILISD::ATOM_R_INC_NORET;
1756              } else {
1757                IntNo = AMDILISD::ATOM_R_ADD_NORET;
1758              }
1759              break;
1760     case AMDGPUIntrinsic::AMDIL_atomic_max_gi32:
1761              IntNo = AMDILISD::ATOM_G_MAX; break;
1762     case AMDGPUIntrinsic::AMDIL_atomic_max_gu32:
1763              IntNo = AMDILISD::ATOM_G_UMAX; break;
1764     case AMDGPUIntrinsic::AMDIL_atomic_max_gi32_noret:
1765              isRet = false;
1766              IntNo = AMDILISD::ATOM_G_MAX_NORET; break;
1767     case AMDGPUIntrinsic::AMDIL_atomic_max_gu32_noret:
1768              isRet = false;
1769              IntNo = AMDILISD::ATOM_G_UMAX_NORET; break;
1770     case AMDGPUIntrinsic::AMDIL_atomic_max_li32:
1771              IntNo = AMDILISD::ATOM_L_MAX; break;
1772     case AMDGPUIntrinsic::AMDIL_atomic_max_lu32:
1773              IntNo = AMDILISD::ATOM_L_UMAX; break;
1774     case AMDGPUIntrinsic::AMDIL_atomic_max_li32_noret:
1775              isRet = false;
1776              IntNo = AMDILISD::ATOM_L_MAX_NORET; break;
1777     case AMDGPUIntrinsic::AMDIL_atomic_max_lu32_noret:
1778              isRet = false;
1779              IntNo = AMDILISD::ATOM_L_UMAX_NORET; break;
1780     case AMDGPUIntrinsic::AMDIL_atomic_max_ri32:
1781              IntNo = AMDILISD::ATOM_R_MAX; break;
1782     case AMDGPUIntrinsic::AMDIL_atomic_max_ru32:
1783              IntNo = AMDILISD::ATOM_R_UMAX; break;
1784     case AMDGPUIntrinsic::AMDIL_atomic_max_ri32_noret:
1785              isRet = false;
1786              IntNo = AMDILISD::ATOM_R_MAX_NORET; break;
1787     case AMDGPUIntrinsic::AMDIL_atomic_max_ru32_noret:
1788              isRet = false;
1789              IntNo = AMDILISD::ATOM_R_UMAX_NORET; break;
1790     case AMDGPUIntrinsic::AMDIL_atomic_min_gi32:
1791              IntNo = AMDILISD::ATOM_G_MIN; break;
1792     case AMDGPUIntrinsic::AMDIL_atomic_min_gu32:
1793              IntNo = AMDILISD::ATOM_G_UMIN; break;
1794     case AMDGPUIntrinsic::AMDIL_atomic_min_gi32_noret:
1795              isRet = false;
1796              IntNo = AMDILISD::ATOM_G_MIN_NORET; break;
1797     case AMDGPUIntrinsic::AMDIL_atomic_min_gu32_noret:
1798              isRet = false;
1799              IntNo = AMDILISD::ATOM_G_UMIN_NORET; break;
1800     case AMDGPUIntrinsic::AMDIL_atomic_min_li32:
1801              IntNo = AMDILISD::ATOM_L_MIN; break;
1802     case AMDGPUIntrinsic::AMDIL_atomic_min_lu32:
1803              IntNo = AMDILISD::ATOM_L_UMIN; break;
1804     case AMDGPUIntrinsic::AMDIL_atomic_min_li32_noret:
1805              isRet = false;
1806              IntNo = AMDILISD::ATOM_L_MIN_NORET; break;
1807     case AMDGPUIntrinsic::AMDIL_atomic_min_lu32_noret:
1808              isRet = false;
1809              IntNo = AMDILISD::ATOM_L_UMIN_NORET; break;
1810     case AMDGPUIntrinsic::AMDIL_atomic_min_ri32:
1811              IntNo = AMDILISD::ATOM_R_MIN; break;
1812     case AMDGPUIntrinsic::AMDIL_atomic_min_ru32:
1813              IntNo = AMDILISD::ATOM_R_UMIN; break;
1814     case AMDGPUIntrinsic::AMDIL_atomic_min_ri32_noret:
1815              isRet = false;
1816              IntNo = AMDILISD::ATOM_R_MIN_NORET; break;
1817     case AMDGPUIntrinsic::AMDIL_atomic_min_ru32_noret:
1818              isRet = false;
1819              IntNo = AMDILISD::ATOM_R_UMIN_NORET; break;
1820     case AMDGPUIntrinsic::AMDIL_atomic_or_gi32:
1821     case AMDGPUIntrinsic::AMDIL_atomic_or_gu32:
1822              IntNo = AMDILISD::ATOM_G_OR; break;
1823     case AMDGPUIntrinsic::AMDIL_atomic_or_gi32_noret:
1824     case AMDGPUIntrinsic::AMDIL_atomic_or_gu32_noret:
1825              isRet = false;
1826              IntNo = AMDILISD::ATOM_G_OR_NORET; break;
1827     case AMDGPUIntrinsic::AMDIL_atomic_or_li32:
1828     case AMDGPUIntrinsic::AMDIL_atomic_or_lu32:
1829              IntNo = AMDILISD::ATOM_L_OR; break;
1830     case AMDGPUIntrinsic::AMDIL_atomic_or_li32_noret:
1831     case AMDGPUIntrinsic::AMDIL_atomic_or_lu32_noret:
1832              isRet = false;
1833              IntNo = AMDILISD::ATOM_L_OR_NORET; break;
1834     case AMDGPUIntrinsic::AMDIL_atomic_or_ri32:
1835     case AMDGPUIntrinsic::AMDIL_atomic_or_ru32:
1836              IntNo = AMDILISD::ATOM_R_OR; break;
1837     case AMDGPUIntrinsic::AMDIL_atomic_or_ri32_noret:
1838     case AMDGPUIntrinsic::AMDIL_atomic_or_ru32_noret:
1839              isRet = false;
1840              IntNo = AMDILISD::ATOM_R_OR_NORET; break;
1841     case AMDGPUIntrinsic::AMDIL_atomic_sub_gi32:
1842     case AMDGPUIntrinsic::AMDIL_atomic_sub_gu32:
1843              IntNo = AMDILISD::ATOM_G_SUB; break;
1844     case AMDGPUIntrinsic::AMDIL_atomic_sub_gi32_noret:
1845     case AMDGPUIntrinsic::AMDIL_atomic_sub_gu32_noret:
1846              isRet = false;
1847              IntNo = AMDILISD::ATOM_G_SUB_NORET; break;
1848     case AMDGPUIntrinsic::AMDIL_atomic_sub_li32:
1849     case AMDGPUIntrinsic::AMDIL_atomic_sub_lu32:
1850              IntNo = AMDILISD::ATOM_L_SUB; break;
1851     case AMDGPUIntrinsic::AMDIL_atomic_sub_li32_noret:
1852     case AMDGPUIntrinsic::AMDIL_atomic_sub_lu32_noret:
1853              isRet = false;
1854              IntNo = AMDILISD::ATOM_L_SUB_NORET; break;
1855     case AMDGPUIntrinsic::AMDIL_atomic_sub_ri32:
1856     case AMDGPUIntrinsic::AMDIL_atomic_sub_ru32:
1857              IntNo = AMDILISD::ATOM_R_SUB; break;
1858     case AMDGPUIntrinsic::AMDIL_atomic_sub_ri32_noret:
1859     case AMDGPUIntrinsic::AMDIL_atomic_sub_ru32_noret:
1860              isRet = false;
1861              IntNo = AMDILISD::ATOM_R_SUB_NORET; break;
1862     case AMDGPUIntrinsic::AMDIL_atomic_rsub_gi32:
1863     case AMDGPUIntrinsic::AMDIL_atomic_rsub_gu32:
1864              IntNo = AMDILISD::ATOM_G_RSUB; break;
1865     case AMDGPUIntrinsic::AMDIL_atomic_rsub_gi32_noret:
1866     case AMDGPUIntrinsic::AMDIL_atomic_rsub_gu32_noret:
1867              isRet = false;
1868              IntNo = AMDILISD::ATOM_G_RSUB_NORET; break;
1869     case AMDGPUIntrinsic::AMDIL_atomic_rsub_li32:
1870     case AMDGPUIntrinsic::AMDIL_atomic_rsub_lu32:
1871              IntNo = AMDILISD::ATOM_L_RSUB; break;
1872     case AMDGPUIntrinsic::AMDIL_atomic_rsub_li32_noret:
1873     case AMDGPUIntrinsic::AMDIL_atomic_rsub_lu32_noret:
1874              isRet = false;
1875              IntNo = AMDILISD::ATOM_L_RSUB_NORET; break;
1876     case AMDGPUIntrinsic::AMDIL_atomic_rsub_ri32:
1877     case AMDGPUIntrinsic::AMDIL_atomic_rsub_ru32:
1878              IntNo = AMDILISD::ATOM_R_RSUB; break;
1879     case AMDGPUIntrinsic::AMDIL_atomic_rsub_ri32_noret:
1880     case AMDGPUIntrinsic::AMDIL_atomic_rsub_ru32_noret:
1881              isRet = false;
1882              IntNo = AMDILISD::ATOM_R_RSUB_NORET; break;
1883     case AMDGPUIntrinsic::AMDIL_atomic_xchg_gf32:
1884              bitCastToInt = true;
1885     case AMDGPUIntrinsic::AMDIL_atomic_xchg_gi32:
1886     case AMDGPUIntrinsic::AMDIL_atomic_xchg_gu32:
1887              IntNo = AMDILISD::ATOM_G_XCHG; break;
1888     case AMDGPUIntrinsic::AMDIL_atomic_xchg_gf32_noret:
1889              bitCastToInt = true;
1890     case AMDGPUIntrinsic::AMDIL_atomic_xchg_gi32_noret:
1891     case AMDGPUIntrinsic::AMDIL_atomic_xchg_gu32_noret:
1892              isRet = false;
1893              IntNo = AMDILISD::ATOM_G_XCHG_NORET; break;
1894     case AMDGPUIntrinsic::AMDIL_atomic_xchg_lf32:
1895              bitCastToInt = true;
1896     case AMDGPUIntrinsic::AMDIL_atomic_xchg_li32:
1897     case AMDGPUIntrinsic::AMDIL_atomic_xchg_lu32:
1898              IntNo = AMDILISD::ATOM_L_XCHG; break;
1899     case AMDGPUIntrinsic::AMDIL_atomic_xchg_lf32_noret:
1900              bitCastToInt = true;
1901     case AMDGPUIntrinsic::AMDIL_atomic_xchg_li32_noret:
1902     case AMDGPUIntrinsic::AMDIL_atomic_xchg_lu32_noret:
1903              isRet = false;
1904              IntNo = AMDILISD::ATOM_L_XCHG_NORET; break;
1905     case AMDGPUIntrinsic::AMDIL_atomic_xchg_rf32:
1906              bitCastToInt = true;
1907     case AMDGPUIntrinsic::AMDIL_atomic_xchg_ri32:
1908     case AMDGPUIntrinsic::AMDIL_atomic_xchg_ru32:
1909              IntNo = AMDILISD::ATOM_R_XCHG; break;
1910     case AMDGPUIntrinsic::AMDIL_atomic_xchg_rf32_noret:
1911              bitCastToInt = true;
1912     case AMDGPUIntrinsic::AMDIL_atomic_xchg_ri32_noret:
1913     case AMDGPUIntrinsic::AMDIL_atomic_xchg_ru32_noret:
1914              isRet = false;
1915              IntNo = AMDILISD::ATOM_R_XCHG_NORET; break;
1916     case AMDGPUIntrinsic::AMDIL_atomic_xor_gi32:
1917     case AMDGPUIntrinsic::AMDIL_atomic_xor_gu32:
1918              IntNo = AMDILISD::ATOM_G_XOR; break;
1919     case AMDGPUIntrinsic::AMDIL_atomic_xor_gi32_noret:
1920     case AMDGPUIntrinsic::AMDIL_atomic_xor_gu32_noret:
1921              isRet = false;
1922              IntNo = AMDILISD::ATOM_G_XOR_NORET; break;
1923     case AMDGPUIntrinsic::AMDIL_atomic_xor_li32:
1924     case AMDGPUIntrinsic::AMDIL_atomic_xor_lu32:
1925              IntNo = AMDILISD::ATOM_L_XOR; break;
1926     case AMDGPUIntrinsic::AMDIL_atomic_xor_li32_noret:
1927     case AMDGPUIntrinsic::AMDIL_atomic_xor_lu32_noret:
1928              isRet = false;
1929              IntNo = AMDILISD::ATOM_L_XOR_NORET; break;
1930     case AMDGPUIntrinsic::AMDIL_atomic_xor_ri32:
1931     case AMDGPUIntrinsic::AMDIL_atomic_xor_ru32:
1932              IntNo = AMDILISD::ATOM_R_XOR; break;
1933     case AMDGPUIntrinsic::AMDIL_atomic_xor_ri32_noret:
1934     case AMDGPUIntrinsic::AMDIL_atomic_xor_ru32_noret:
1935              isRet = false;
1936              IntNo = AMDILISD::ATOM_R_XOR_NORET; break;
1937     case AMDGPUIntrinsic::AMDIL_append_alloc_i32:
1938              IntNo = AMDILISD::APPEND_ALLOC; break;
1939     case AMDGPUIntrinsic::AMDIL_append_alloc_i32_noret:
1940              isRet = false;
1941              IntNo = AMDILISD::APPEND_ALLOC_NORET; break;
1942     case AMDGPUIntrinsic::AMDIL_append_consume_i32:
1943              IntNo = AMDILISD::APPEND_CONSUME; break;
1944     case AMDGPUIntrinsic::AMDIL_append_consume_i32_noret:
1945              isRet = false;
1946              IntNo = AMDILISD::APPEND_CONSUME_NORET; break;
1947   };
1948
1949   Info.opc = IntNo;
1950   Info.memVT = (bitCastToInt) ? MVT::f32 : MVT::i32;
1951   Info.ptrVal = I.getOperand(0);
1952   Info.offset = 0;
1953   Info.align = 4;
1954   Info.vol = true;
1955   Info.readMem = isRet;
1956   Info.writeMem = true;
1957   return true;
1958 }
1959 // The backend supports 32 and 64 bit floating point immediates
1960 bool
1961 AMDILTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const
1962 {
1963   if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
1964       || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
1965     return true;
1966   } else {
1967     return false;
1968   }
1969 }
1970
1971 bool
1972 AMDILTargetLowering::ShouldShrinkFPConstant(EVT VT) const
1973 {
1974   if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
1975       || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
1976     return false;
1977   } else {
1978     return true;
1979   }
1980 }
1981
1982
1983 // isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
1984 // be zero. Op is expected to be a target specific node. Used by DAG
1985 // combiner.
1986
1987 void
1988 AMDILTargetLowering::computeMaskedBitsForTargetNode(
1989     const SDValue Op,
1990     APInt &KnownZero,
1991     APInt &KnownOne,
1992     const SelectionDAG &DAG,
1993     unsigned Depth) const
1994 {
1995   APInt KnownZero2;
1996   APInt KnownOne2;
1997   KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything
1998   switch (Op.getOpcode()) {
1999     default: break;
2000     case AMDILISD::SELECT_CC:
2001              DAG.ComputeMaskedBits(
2002                  Op.getOperand(1),
2003                  KnownZero,
2004                  KnownOne,
2005                  Depth + 1
2006                  );
2007              DAG.ComputeMaskedBits(
2008                  Op.getOperand(0),
2009                  KnownZero2,
2010                  KnownOne2
2011                  );
2012              assert((KnownZero & KnownOne) == 0
2013                  && "Bits known to be one AND zero?");
2014              assert((KnownZero2 & KnownOne2) == 0
2015                  && "Bits known to be one AND zero?");
2016              // Only known if known in both the LHS and RHS
2017              KnownOne &= KnownOne2;
2018              KnownZero &= KnownZero2;
2019              break;
2020   };
2021 }
2022
2023 // This is the function that determines which calling convention should
2024 // be used. Currently there is only one calling convention
2025 CCAssignFn*
2026 AMDILTargetLowering::CCAssignFnForNode(unsigned int Op) const
2027 {
2028   //uint64_t CC = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
2029   return CC_AMDIL32;
2030 }
2031
2032 // LowerCallResult - Lower the result values of an ISD::CALL into the
2033 // appropriate copies out of appropriate physical registers.  This assumes that
2034 // Chain/InFlag are the input chain/flag to use, and that TheCall is the call
2035 // being lowered.  The returns a SDNode with the same number of values as the
2036 // ISD::CALL.
2037 SDValue
2038 AMDILTargetLowering::LowerCallResult(
2039     SDValue Chain,
2040     SDValue InFlag,
2041     CallingConv::ID CallConv,
2042     bool isVarArg,
2043     const SmallVectorImpl<ISD::InputArg> &Ins,
2044     DebugLoc dl,
2045     SelectionDAG &DAG,
2046     SmallVectorImpl<SDValue> &InVals) const
2047 {
2048   // Assign locations to each value returned by this call
2049   SmallVector<CCValAssign, 16> RVLocs;
2050   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
2051                  getTargetMachine(), RVLocs, *DAG.getContext());
2052   CCInfo.AnalyzeCallResult(Ins, RetCC_AMDIL32);
2053
2054   // Copy all of the result registers out of their specified physreg.
2055   for (unsigned i = 0; i != RVLocs.size(); ++i) {
2056     EVT CopyVT = RVLocs[i].getValVT();
2057     if (RVLocs[i].isRegLoc()) {
2058       Chain = DAG.getCopyFromReg(
2059           Chain,
2060           dl,
2061           RVLocs[i].getLocReg(),
2062           CopyVT,
2063           InFlag
2064           ).getValue(1);
2065       SDValue Val = Chain.getValue(0);
2066       InFlag = Chain.getValue(2);
2067       InVals.push_back(Val);
2068     }
2069   }
2070
2071   return Chain;
2072
2073 }
2074
2075 //===----------------------------------------------------------------------===//
2076 //                           Other Lowering Hooks
2077 //===----------------------------------------------------------------------===//
2078
2079 MachineBasicBlock *
2080 AMDILTargetLowering::EmitInstrWithCustomInserter(
2081     MachineInstr *MI, MachineBasicBlock *BB) const
2082 {
2083   const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
2084   switch (MI->getOpcode()) {
2085     ExpandCaseToAllTypes(AMDIL::CMP);
2086     generateCMPInstr(MI, BB, TII);
2087     MI->eraseFromParent();
2088     break;
2089     default:
2090     break;
2091   }
2092   return BB;
2093 }
2094
2095 // Recursively assign SDNodeOrdering to any unordered nodes
2096 // This is necessary to maintain source ordering of instructions
2097 // under -O0 to avoid odd-looking "skipping around" issues.
2098   static const SDValue
2099 Ordered( SelectionDAG &DAG, unsigned order, const SDValue New )
2100 {
2101   if (order != 0 && DAG.GetOrdering( New.getNode() ) == 0) {
2102     DAG.AssignOrdering( New.getNode(), order );
2103     for (unsigned i = 0, e = New.getNumOperands(); i < e; ++i)
2104       Ordered( DAG, order, New.getOperand(i) );
2105   }
2106   return New;
2107 }
2108
2109 #define LOWER(A) \
2110   case ISD:: A: \
2111 return Ordered( DAG, DAG.GetOrdering( Op.getNode() ), Lower##A(Op, DAG) )
2112
2113 SDValue
2114 AMDILTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
2115 {
2116   switch (Op.getOpcode()) {
2117     default:
2118       Op.getNode()->dump();
2119       assert(0 && "Custom lowering code for this"
2120           "instruction is not implemented yet!");
2121       break;
2122       LOWER(GlobalAddress);
2123       LOWER(JumpTable);
2124       LOWER(ConstantPool);
2125       LOWER(ExternalSymbol);
2126       LOWER(FP_TO_SINT);
2127       LOWER(FP_TO_UINT);
2128       LOWER(SINT_TO_FP);
2129       LOWER(UINT_TO_FP);
2130       LOWER(ADD);
2131       LOWER(MUL);
2132       LOWER(SUB);
2133       LOWER(FDIV);
2134       LOWER(SDIV);
2135       LOWER(SREM);
2136       LOWER(UDIV);
2137       LOWER(UREM);
2138       LOWER(BUILD_VECTOR);
2139       LOWER(INSERT_VECTOR_ELT);
2140       LOWER(EXTRACT_VECTOR_ELT);
2141       LOWER(EXTRACT_SUBVECTOR);
2142       LOWER(SCALAR_TO_VECTOR);
2143       LOWER(CONCAT_VECTORS);
2144       LOWER(AND);
2145       LOWER(OR);
2146       LOWER(SELECT);
2147       LOWER(SELECT_CC);
2148       LOWER(SETCC);
2149       LOWER(SIGN_EXTEND_INREG);
2150       LOWER(BITCAST);
2151       LOWER(DYNAMIC_STACKALLOC);
2152       LOWER(BRCOND);
2153       LOWER(BR_CC);
2154       LOWER(FP_ROUND);
2155   }
2156   return Op;
2157 }
2158
2159 int
2160 AMDILTargetLowering::getVarArgsFrameOffset() const
2161 {
2162   return VarArgsFrameOffset;
2163 }
2164 #undef LOWER
2165
2166 SDValue
2167 AMDILTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const
2168 {
2169   SDValue DST = Op;
2170   const GlobalAddressSDNode *GADN = cast<GlobalAddressSDNode>(Op);
2171   const GlobalValue *G = GADN->getGlobal();
2172   DebugLoc DL = Op.getDebugLoc();
2173   const GlobalVariable *GV = dyn_cast<GlobalVariable>(G);
2174   if (!GV) {
2175     DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
2176   } else {
2177     if (GV->hasInitializer()) {
2178       const Constant *C = dyn_cast<Constant>(GV->getInitializer());
2179       if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
2180         DST = DAG.getConstant(CI->getValue(), Op.getValueType());
2181       } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(C)) {
2182         DST = DAG.getConstantFP(CF->getValueAPF(),
2183             Op.getValueType());
2184       } else if (dyn_cast<ConstantAggregateZero>(C)) {
2185         EVT VT = Op.getValueType();
2186         if (VT.isInteger()) {
2187           DST = DAG.getConstant(0, VT);
2188         } else {
2189           DST = DAG.getConstantFP(0, VT);
2190         }
2191       } else {
2192         assert(!"lowering this type of Global Address "
2193             "not implemented yet!");
2194         C->dump();
2195         DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
2196       }
2197     } else {
2198       DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
2199     }
2200   }
2201   return DST;
2202 }
2203
2204 SDValue
2205 AMDILTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const
2206 {
2207   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
2208   SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32);
2209   return Result;
2210 }
2211 SDValue
2212 AMDILTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
2213 {
2214   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2215   EVT PtrVT = Op.getValueType();
2216   SDValue Result;
2217   if (CP->isMachineConstantPoolEntry()) {
2218     Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
2219         CP->getAlignment(), CP->getOffset(), CP->getTargetFlags());
2220   } else {
2221     Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
2222         CP->getAlignment(), CP->getOffset(), CP->getTargetFlags());
2223   }
2224   return Result;
2225 }
2226
2227 SDValue
2228 AMDILTargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const
2229 {
2230   const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
2231   SDValue Result = DAG.getTargetExternalSymbol(Sym, MVT::i32);
2232   return Result;
2233 }
2234
2235 /// LowerFORMAL_ARGUMENTS - transform physical registers into
2236 /// virtual registers and generate load operations for
2237 /// arguments places on the stack.
2238 /// TODO: isVarArg, hasStructRet, isMemReg
2239   SDValue
2240 AMDILTargetLowering::LowerFormalArguments(SDValue Chain,
2241     CallingConv::ID CallConv,
2242     bool isVarArg,
2243     const SmallVectorImpl<ISD::InputArg> &Ins,
2244     DebugLoc dl,
2245     SelectionDAG &DAG,
2246     SmallVectorImpl<SDValue> &InVals)
2247 const
2248 {
2249
2250   MachineFunction &MF = DAG.getMachineFunction();
2251   MachineFrameInfo *MFI = MF.getFrameInfo();
2252   //const Function *Fn = MF.getFunction();
2253   //MachineRegisterInfo &RegInfo = MF.getRegInfo();
2254
2255   SmallVector<CCValAssign, 16> ArgLocs;
2256   CallingConv::ID CC = MF.getFunction()->getCallingConv();
2257   //bool hasStructRet = MF.getFunction()->hasStructRetAttr();
2258
2259   CCState CCInfo(CC, isVarArg, DAG.getMachineFunction(),
2260                  getTargetMachine(), ArgLocs, *DAG.getContext());
2261
2262   // When more calling conventions are added, they need to be chosen here
2263   CCInfo.AnalyzeFormalArguments(Ins, CC_AMDIL32);
2264   SDValue StackPtr;
2265
2266   //unsigned int FirstStackArgLoc = 0;
2267
2268   for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) {
2269     CCValAssign &VA = ArgLocs[i];
2270     if (VA.isRegLoc()) {
2271       EVT RegVT = VA.getLocVT();
2272       const TargetRegisterClass *RC = getRegClassFromType(
2273           RegVT.getSimpleVT().SimpleTy);
2274
2275       unsigned int Reg = MF.addLiveIn(VA.getLocReg(), RC);
2276       SDValue ArgValue = DAG.getCopyFromReg(
2277           Chain,
2278           dl,
2279           Reg,
2280           RegVT);
2281       // If this is an 8 or 16-bit value, it is really passed
2282       // promoted to 32 bits.  Insert an assert[sz]ext to capture
2283       // this, then truncate to the right size.
2284
2285       if (VA.getLocInfo() == CCValAssign::SExt) {
2286         ArgValue = DAG.getNode(
2287             ISD::AssertSext,
2288             dl,
2289             RegVT,
2290             ArgValue,
2291             DAG.getValueType(VA.getValVT()));
2292       } else if (VA.getLocInfo() == CCValAssign::ZExt) {
2293         ArgValue = DAG.getNode(
2294             ISD::AssertZext,
2295             dl,
2296             RegVT,
2297             ArgValue,
2298             DAG.getValueType(VA.getValVT()));
2299       }
2300       if (VA.getLocInfo() != CCValAssign::Full) {
2301         ArgValue = DAG.getNode(
2302             ISD::TRUNCATE,
2303             dl,
2304             VA.getValVT(),
2305             ArgValue);
2306       }
2307       // Add the value to the list of arguments
2308       // to be passed in registers
2309       InVals.push_back(ArgValue);
2310       if (isVarArg) {
2311         assert(0 && "Variable arguments are not yet supported");
2312         // See MipsISelLowering.cpp for ideas on how to implement
2313       }
2314     } else if(VA.isMemLoc()) {
2315       InVals.push_back(LowerMemArgument(Chain, CallConv, Ins,
2316             dl, DAG, VA, MFI, i));
2317     } else {
2318       assert(0 && "found a Value Assign that is "
2319           "neither a register or a memory location");
2320     }
2321   }
2322   /*if (hasStructRet) {
2323     assert(0 && "Has struct return is not yet implemented");
2324   // See MipsISelLowering.cpp for ideas on how to implement
2325   }*/
2326
2327   if (isVarArg) {
2328     assert(0 && "Variable arguments are not yet supported");
2329     // See X86/PPC/CellSPU ISelLowering.cpp for ideas on how to implement
2330   }
2331   // This needs to be changed to non-zero if the return function needs
2332   // to pop bytes
2333   return Chain;
2334 }
2335 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
2336 /// by "Src" to address "Dst" with size and alignment information specified by
2337 /// the specific parameter attribute. The copy will be passed as a byval
2338 /// function parameter.
2339 static SDValue
2340 CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
2341     ISD::ArgFlagsTy Flags, SelectionDAG &DAG) {
2342   assert(0 && "MemCopy does not exist yet");
2343   SDValue SizeNode     = DAG.getConstant(Flags.getByValSize(), MVT::i32);
2344
2345   return DAG.getMemcpy(Chain,
2346       Src.getDebugLoc(),
2347       Dst, Src, SizeNode, Flags.getByValAlign(),
2348       /*IsVol=*/false, /*AlwaysInline=*/true,
2349       MachinePointerInfo(), MachinePointerInfo());
2350 }
2351
2352 SDValue
2353 AMDILTargetLowering::LowerMemOpCallTo(SDValue Chain,
2354     SDValue StackPtr, SDValue Arg,
2355     DebugLoc dl, SelectionDAG &DAG,
2356     const CCValAssign &VA,
2357     ISD::ArgFlagsTy Flags) const
2358 {
2359   unsigned int LocMemOffset = VA.getLocMemOffset();
2360   SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
2361   PtrOff = DAG.getNode(ISD::ADD,
2362       dl,
2363       getPointerTy(), StackPtr, PtrOff);
2364   if (Flags.isByVal()) {
2365     PtrOff = CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG);
2366   } else {
2367     PtrOff = DAG.getStore(Chain, dl, Arg, PtrOff,
2368         MachinePointerInfo::getStack(LocMemOffset),
2369         false, false, 0);
2370   }
2371   return PtrOff;
2372 }
2373 /// LowerCAL - functions arguments are copied from virtual
2374 /// regs to (physical regs)/(stack frame), CALLSEQ_START and
2375 /// CALLSEQ_END are emitted.
2376 /// TODO: isVarArg, isTailCall, hasStructRet
2377 SDValue
2378 AMDILTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
2379     CallingConv::ID CallConv, bool isVarArg, bool doesNotRet,
2380     bool& isTailCall,
2381     const SmallVectorImpl<ISD::OutputArg> &Outs,
2382     const SmallVectorImpl<SDValue> &OutVals,
2383     const SmallVectorImpl<ISD::InputArg> &Ins,
2384     DebugLoc dl, SelectionDAG &DAG,
2385     SmallVectorImpl<SDValue> &InVals)
2386 const
2387 {
2388   isTailCall = false;
2389   MachineFunction& MF = DAG.getMachineFunction();
2390   // FIXME: DO we need to handle fast calling conventions and tail call
2391   // optimizations?? X86/PPC ISelLowering
2392   /*bool hasStructRet = (TheCall->getNumArgs())
2393     ? TheCall->getArgFlags(0).device()->isSRet()
2394     : false;*/
2395
2396   MachineFrameInfo *MFI = MF.getFrameInfo();
2397
2398   // Analyze operands of the call, assigning locations to each operand
2399   SmallVector<CCValAssign, 16> ArgLocs;
2400   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
2401                  getTargetMachine(), ArgLocs, *DAG.getContext());
2402   // Analyize the calling operands, but need to change
2403   // if we have more than one calling convetion
2404   CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv));
2405
2406   unsigned int NumBytes = CCInfo.getNextStackOffset();
2407   if (isTailCall) {
2408     assert(isTailCall && "Tail Call not handled yet!");
2409     // See X86/PPC ISelLowering
2410   }
2411
2412   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
2413
2414   SmallVector<std::pair<unsigned int, SDValue>, 8> RegsToPass;
2415   SmallVector<SDValue, 8> MemOpChains;
2416   SDValue StackPtr;
2417   //unsigned int FirstStacArgLoc = 0;
2418   //int LastArgStackLoc = 0;
2419
2420   // Walk the register/memloc assignments, insert copies/loads
2421   for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) {
2422     CCValAssign &VA = ArgLocs[i];
2423     //bool isByVal = Flags.isByVal(); // handle byval/bypointer registers
2424     // Arguments start after the 5 first operands of ISD::CALL
2425     SDValue Arg = OutVals[i];
2426     //Promote the value if needed
2427     switch(VA.getLocInfo()) {
2428       default: assert(0 && "Unknown loc info!");
2429       case CCValAssign::Full:
2430                break;
2431       case CCValAssign::SExt:
2432                Arg = DAG.getNode(ISD::SIGN_EXTEND,
2433                    dl,
2434                    VA.getLocVT(), Arg);
2435                break;
2436       case CCValAssign::ZExt:
2437                Arg = DAG.getNode(ISD::ZERO_EXTEND,
2438                    dl,
2439                    VA.getLocVT(), Arg);
2440                break;
2441       case CCValAssign::AExt:
2442                Arg = DAG.getNode(ISD::ANY_EXTEND,
2443                    dl,
2444                    VA.getLocVT(), Arg);
2445                break;
2446     }
2447
2448     if (VA.isRegLoc()) {
2449       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
2450     } else if (VA.isMemLoc()) {
2451       // Create the frame index object for this incoming parameter
2452       int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
2453           VA.getLocMemOffset(), true);
2454       SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy());
2455
2456       // emit ISD::STORE whichs stores the
2457       // parameter value to a stack Location
2458       MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
2459             MachinePointerInfo::getFixedStack(FI),
2460             false, false, 0));
2461     } else {
2462       assert(0 && "Not a Reg/Mem Loc, major error!");
2463     }
2464   }
2465   if (!MemOpChains.empty()) {
2466     Chain = DAG.getNode(ISD::TokenFactor,
2467         dl,
2468         MVT::Other,
2469         &MemOpChains[0],
2470         MemOpChains.size());
2471   }
2472   SDValue InFlag;
2473   if (!isTailCall) {
2474     for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) {
2475       Chain = DAG.getCopyToReg(Chain,
2476           dl,
2477           RegsToPass[i].first,
2478           RegsToPass[i].second,
2479           InFlag);
2480       InFlag = Chain.getValue(1);
2481     }
2482   }
2483
2484   // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
2485   // every direct call is) turn it into a TargetGlobalAddress/
2486   // TargetExternalSymbol
2487   // node so that legalize doesn't hack it.
2488   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))  {
2489     Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy());
2490   }
2491   else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2492     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
2493   }
2494   else if (isTailCall) {
2495     assert(0 && "Tail calls are not handled yet");
2496     // see X86 ISelLowering for ideas on implementation: 1708
2497   }
2498
2499   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVTGLUE);
2500   SmallVector<SDValue, 8> Ops;
2501
2502   if (isTailCall) {
2503     assert(0 && "Tail calls are not handled yet");
2504     // see X86 ISelLowering for ideas on implementation: 1721
2505   }
2506   // If this is a direct call, pass the chain and the callee
2507   if (Callee.getNode()) {
2508     Ops.push_back(Chain);
2509     Ops.push_back(Callee);
2510   }
2511
2512   if (isTailCall) {
2513     assert(0 && "Tail calls are not handled yet");
2514     // see X86 ISelLowering for ideas on implementation: 1739
2515   }
2516
2517   // Add argument registers to the end of the list so that they are known
2518   // live into the call
2519   for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) {
2520     Ops.push_back(DAG.getRegister(
2521           RegsToPass[i].first,
2522           RegsToPass[i].second.getValueType()));
2523   }
2524   if (InFlag.getNode()) {
2525     Ops.push_back(InFlag);
2526   }
2527
2528   // Emit Tail Call
2529   if (isTailCall) {
2530     assert(0 && "Tail calls are not handled yet");
2531     // see X86 ISelLowering for ideas on implementation: 1762
2532   }
2533
2534   Chain = DAG.getNode(AMDILISD::CALL,
2535       dl,
2536       NodeTys, &Ops[0], Ops.size());
2537   InFlag = Chain.getValue(1);
2538
2539   // Create the CALLSEQ_END node
2540   Chain = DAG.getCALLSEQ_END(
2541       Chain,
2542       DAG.getIntPtrConstant(NumBytes, true),
2543       DAG.getIntPtrConstant(0, true),
2544       InFlag);
2545   InFlag = Chain.getValue(1);
2546   // Handle result values, copying them out of physregs into vregs that
2547   // we return
2548   return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
2549       InVals);
2550 }
2551 static void checkMADType(
2552     SDValue Op, const AMDILSubtarget *STM, bool& is24bitMAD, bool& is32bitMAD)
2553 {
2554   bool globalLoadStore = false;
2555   is24bitMAD = false;
2556   is32bitMAD = false;
2557   return;
2558   assert(Op.getOpcode() == ISD::ADD && "The opcode must be a add in order for "
2559       "this to work correctly!");
2560   if (Op.getNode()->use_empty()) {
2561     return;
2562   }
2563   for (SDNode::use_iterator nBegin = Op.getNode()->use_begin(),
2564       nEnd = Op.getNode()->use_end(); nBegin != nEnd; ++nBegin) {
2565     SDNode *ptr = *nBegin;
2566     const LSBaseSDNode *lsNode = dyn_cast<LSBaseSDNode>(ptr);
2567     // If we are not a LSBaseSDNode then we don't do this
2568     // optimization.
2569     // If we are a LSBaseSDNode, but the op is not the offset
2570     // or base pointer, then we don't do this optimization
2571     // (i.e. we are the value being stored)
2572     if (!lsNode ||
2573         (lsNode->writeMem() && lsNode->getOperand(1) == Op)) {
2574       return;
2575     }
2576     const PointerType *PT =
2577       dyn_cast<PointerType>(lsNode->getSrcValue()->getType());
2578     unsigned as = PT->getAddressSpace();
2579     switch(as) {
2580       default:
2581         globalLoadStore = true;
2582       case AMDILAS::PRIVATE_ADDRESS:
2583         if (!STM->device()->usesHardware(AMDILDeviceInfo::PrivateMem)) {
2584           globalLoadStore = true;
2585         }
2586         break;
2587       case AMDILAS::CONSTANT_ADDRESS:
2588         if (!STM->device()->usesHardware(AMDILDeviceInfo::ConstantMem)) {
2589           globalLoadStore = true;
2590         }
2591         break;
2592       case AMDILAS::LOCAL_ADDRESS:
2593         if (!STM->device()->usesHardware(AMDILDeviceInfo::LocalMem)) {
2594           globalLoadStore = true;
2595         }
2596         break;
2597       case AMDILAS::REGION_ADDRESS:
2598         if (!STM->device()->usesHardware(AMDILDeviceInfo::RegionMem)) {
2599           globalLoadStore = true;
2600         }
2601         break;
2602     }
2603   }
2604   if (globalLoadStore) {
2605     is32bitMAD = true;
2606   } else {
2607     is24bitMAD = true;
2608   }
2609 }
2610
2611 SDValue
2612 AMDILTargetLowering::LowerADD(SDValue Op, SelectionDAG &DAG) const
2613 {
2614   SDValue LHS = Op.getOperand(0);
2615   SDValue RHS = Op.getOperand(1);
2616   DebugLoc DL = Op.getDebugLoc();
2617   EVT OVT = Op.getValueType();
2618   SDValue DST;
2619   const AMDILSubtarget *stm = &this->getTargetMachine()
2620     .getSubtarget<AMDILSubtarget>();
2621   bool isVec = OVT.isVector();
2622   if (OVT.getScalarType() == MVT::i64) {
2623     MVT INTTY = MVT::i32;
2624     if (OVT == MVT::v2i64) {
2625       INTTY = MVT::v2i32;
2626     }
2627     if (stm->device()->usesHardware(AMDILDeviceInfo::LongOps)
2628         && INTTY == MVT::i32) {
2629       DST = DAG.getNode(AMDILISD::ADD,
2630           DL,
2631           OVT,
2632           LHS, RHS);
2633     } else {
2634       SDValue LHSLO, LHSHI, RHSLO, RHSHI, INTLO, INTHI;
2635       // TODO: need to turn this into a bitcast of i64/v2i64 to v2i32/v4i32
2636       LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, LHS);
2637       RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, RHS);
2638       LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, LHS);
2639       RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, RHS);
2640       INTLO = DAG.getNode(ISD::ADD, DL, INTTY, LHSLO, RHSLO);
2641       INTHI = DAG.getNode(ISD::ADD, DL, INTTY, LHSHI, RHSHI);
2642       SDValue cmp;
2643       cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
2644           DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
2645           INTLO, RHSLO);
2646       cmp = DAG.getNode(AMDILISD::INEGATE, DL, INTTY, cmp);
2647       INTHI = DAG.getNode(ISD::ADD, DL, INTTY, INTHI, cmp);
2648       DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, OVT,
2649           INTLO, INTHI);
2650     }
2651   } else {
2652     if (LHS.getOpcode() == ISD::FrameIndex ||
2653         RHS.getOpcode() == ISD::FrameIndex) {
2654       DST = DAG.getNode(AMDILISD::ADDADDR,
2655           DL,
2656           OVT,
2657           LHS, RHS);
2658     } else {
2659       if (stm->device()->usesHardware(AMDILDeviceInfo::LocalMem)
2660           && LHS.getNumOperands()
2661           && RHS.getNumOperands()) {
2662         bool is24bitMAD = false;
2663         bool is32bitMAD = false;
2664         const ConstantSDNode *LHSConstOpCode =
2665           dyn_cast<ConstantSDNode>(LHS.getOperand(LHS.getNumOperands()-1));
2666         const ConstantSDNode *RHSConstOpCode =
2667           dyn_cast<ConstantSDNode>(RHS.getOperand(RHS.getNumOperands()-1));
2668         if ((LHS.getOpcode() == ISD::SHL && LHSConstOpCode)
2669             || (RHS.getOpcode() == ISD::SHL && RHSConstOpCode)
2670             || LHS.getOpcode() == ISD::MUL
2671             || RHS.getOpcode() == ISD::MUL) {
2672           SDValue Op1, Op2, Op3;
2673           // FIXME: Fix this so that it works for unsigned 24bit ops.
2674           if (LHS.getOpcode() == ISD::MUL) {
2675             Op1 = LHS.getOperand(0);
2676             Op2 = LHS.getOperand(1);
2677             Op3 = RHS;
2678           } else if (RHS.getOpcode() == ISD::MUL) {
2679             Op1 = RHS.getOperand(0);
2680             Op2 = RHS.getOperand(1);
2681             Op3 = LHS;
2682           } else if (LHS.getOpcode() == ISD::SHL && LHSConstOpCode) {
2683             Op1 = LHS.getOperand(0);
2684             Op2 = DAG.getConstant(
2685                 1 << LHSConstOpCode->getZExtValue(), MVT::i32);
2686             Op3 = RHS;
2687           } else if (RHS.getOpcode() == ISD::SHL && RHSConstOpCode) {
2688             Op1 = RHS.getOperand(0);
2689             Op2 = DAG.getConstant(
2690                 1 << RHSConstOpCode->getZExtValue(), MVT::i32);
2691             Op3 = LHS;
2692           }
2693           checkMADType(Op, stm, is24bitMAD, is32bitMAD);
2694           // We can possibly do a MAD transform!
2695           if (is24bitMAD && stm->device()->usesHardware(AMDILDeviceInfo::Signed24BitOps)) {
2696             uint32_t opcode = AMDGPUIntrinsic::AMDIL_mad24_i32;
2697             SDVTList Tys = DAG.getVTList(OVT/*, MVT::Other*/);
2698             DST = DAG.getNode(ISD::INTRINSIC_W_CHAIN,
2699                 DL, Tys, DAG.getEntryNode(), DAG.getConstant(opcode, MVT::i32),
2700                 Op1, Op2, Op3);
2701           } else if(is32bitMAD) {
2702             SDVTList Tys = DAG.getVTList(OVT/*, MVT::Other*/);
2703             DST = DAG.getNode(ISD::INTRINSIC_W_CHAIN,
2704                 DL, Tys, DAG.getEntryNode(),
2705                 DAG.getConstant(
2706                   AMDGPUIntrinsic::AMDIL_mad_i32, MVT::i32),
2707                 Op1, Op2, Op3);
2708           }
2709         }
2710       }
2711       DST = DAG.getNode(AMDILISD::ADD,
2712           DL,
2713           OVT,
2714           LHS, RHS);
2715     }
2716   }
2717   return DST;
2718 }
2719 SDValue
2720 AMDILTargetLowering::genCLZuN(SDValue Op, SelectionDAG &DAG,
2721     uint32_t bits) const
2722 {
2723   DebugLoc DL = Op.getDebugLoc();
2724   EVT INTTY = Op.getValueType();
2725   EVT FPTY;
2726   if (INTTY.isVector()) {
2727     FPTY = EVT(MVT::getVectorVT(MVT::f32,
2728           INTTY.getVectorNumElements()));
2729   } else {
2730     FPTY = EVT(MVT::f32);
2731   }
2732   /* static inline uint
2733      __clz_Nbit(uint x)
2734      {
2735      int xor = 0x3f800000U | x;
2736      float tp = as_float(xor);
2737      float t = tp + -1.0f;
2738      uint tint = as_uint(t);
2739      int cmp = (x != 0);
2740      uint tsrc = tint >> 23;
2741      uint tmask = tsrc & 0xffU;
2742      uint cst = (103 + N)U - tmask;
2743      return cmp ? cst : N;
2744      }
2745      */
2746   assert(INTTY.getScalarType().getSimpleVT().SimpleTy == MVT::i32
2747       && "genCLZu16 only works on 32bit types");
2748   // uint x = Op
2749   SDValue x = Op;
2750   // xornode = 0x3f800000 | x
2751   SDValue xornode = DAG.getNode(ISD::OR, DL, INTTY,
2752       DAG.getConstant(0x3f800000, INTTY), x);
2753   // float tp = as_float(xornode)
2754   SDValue tp = DAG.getNode(ISDBITCAST, DL, FPTY, xornode);
2755   // float t = tp + -1.0f
2756   SDValue t = DAG.getNode(ISD::FADD, DL, FPTY, tp,
2757       DAG.getConstantFP(-1.0f, FPTY));
2758   // uint tint = as_uint(t)
2759   SDValue tint = DAG.getNode(ISDBITCAST, DL, INTTY, t);
2760   // int cmp = (x != 0)
2761   SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
2762       DAG.getConstant(CondCCodeToCC(ISD::SETNE, MVT::i32), MVT::i32), x,
2763       DAG.getConstant(0, INTTY));
2764   // uint tsrc = tint >> 23
2765   SDValue tsrc = DAG.getNode(ISD::SRL, DL, INTTY, tint,
2766       DAG.getConstant(23, INTTY));
2767   // uint tmask = tsrc & 0xFF
2768   SDValue tmask = DAG.getNode(ISD::AND, DL, INTTY, tsrc,
2769       DAG.getConstant(0xFFU, INTTY));
2770   // uint cst = (103 + bits) - tmask
2771   SDValue cst = DAG.getNode(ISD::SUB, DL, INTTY,
2772       DAG.getConstant((103U + bits), INTTY), tmask);
2773   // return cmp ? cst : N
2774   cst = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp, cst,
2775       DAG.getConstant(bits, INTTY));
2776   return cst;
2777 }
2778
2779 SDValue
2780 AMDILTargetLowering::genCLZu32(SDValue Op, SelectionDAG &DAG) const
2781 {
2782   SDValue DST = SDValue();
2783   DebugLoc DL = Op.getDebugLoc();
2784   EVT INTTY = Op.getValueType();
2785   const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
2786       &this->getTargetMachine())->getSubtargetImpl();
2787   if (stm->device()->getGeneration() >= AMDILDeviceInfo::HD5XXX) {
2788     //__clz_32bit(uint u)
2789     //{
2790     // int z = __amdil_ffb_hi(u) ;
2791     // return z < 0 ? 32 : z;
2792     // }
2793     // uint u = op
2794     SDValue u = Op;
2795     // int z = __amdil_ffb_hi(u)
2796     SDValue z = DAG.getNode(AMDILISD::IFFB_HI, DL, INTTY, u);
2797     // int cmp = z < 0
2798     SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
2799         DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
2800         z, DAG.getConstant(0, INTTY));
2801     // return cmp ? 32 : z
2802     DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp,
2803         DAG.getConstant(32, INTTY), z);
2804   } else if (stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
2805     //  static inline uint
2806     //__clz_32bit(uint x)
2807     //{
2808     //    uint zh = __clz_16bit(x >> 16);
2809     //    uint zl = __clz_16bit(x & 0xffffU);
2810     //   return zh == 16U ? 16U + zl : zh;
2811     //}
2812     // uint x = Op
2813     SDValue x = Op;
2814     // uint xs16 = x >> 16
2815     SDValue xs16 = DAG.getNode(ISD::SRL, DL, INTTY, x,
2816         DAG.getConstant(16, INTTY));
2817     // uint zh = __clz_16bit(xs16)
2818     SDValue zh = genCLZuN(xs16, DAG, 16);
2819     // uint xa16 = x & 0xFFFF
2820     SDValue xa16 = DAG.getNode(ISD::AND, DL, INTTY, x,
2821         DAG.getConstant(0xFFFFU, INTTY));
2822     // uint zl = __clz_16bit(xa16)
2823     SDValue zl = genCLZuN(xa16, DAG, 16);
2824     // uint cmp = zh == 16U
2825     SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
2826         DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
2827         zh, DAG.getConstant(16U, INTTY));
2828     // uint zl16 = zl + 16
2829     SDValue zl16 = DAG.getNode(ISD::ADD, DL, INTTY,
2830         DAG.getConstant(16, INTTY), zl);
2831     // return cmp ? zl16 : zh
2832     DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY,
2833         cmp, zl16, zh);
2834   } else {
2835     assert(0 && "Attempting to generate a CLZ function with an"
2836         " unknown graphics card");
2837   }
2838   return DST;
2839 }
2840 SDValue
2841 AMDILTargetLowering::genCLZu64(SDValue Op, SelectionDAG &DAG) const
2842 {
2843   SDValue DST = SDValue();
2844   DebugLoc DL = Op.getDebugLoc();
2845   EVT INTTY;
2846   EVT LONGTY = Op.getValueType();
2847   bool isVec = LONGTY.isVector();
2848   if (isVec) {
2849     INTTY = EVT(MVT::getVectorVT(MVT::i32, Op.getValueType()
2850           .getVectorNumElements()));
2851   } else {
2852     INTTY = EVT(MVT::i32);
2853   }
2854   const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
2855       &this->getTargetMachine())->getSubtargetImpl();
2856   if (stm->device()->getGeneration() >= AMDILDeviceInfo::HD5XXX) {
2857     // Evergreen:
2858     // static inline uint
2859     // __clz_u64(ulong x)
2860     // {
2861     //uint zhi = __clz_32bit((uint)(x >> 32));
2862     //uint zlo = __clz_32bit((uint)(x & 0xffffffffUL));
2863     //return zhi == 32U ? 32U + zlo : zhi;
2864     //}
2865     //ulong x = op
2866     SDValue x = Op;
2867     // uint xhi = x >> 32
2868     SDValue xlo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, x);
2869     // uint xlo = x & 0xFFFFFFFF
2870     SDValue xhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, x);
2871     // uint zhi = __clz_32bit(xhi)
2872     SDValue zhi = genCLZu32(xhi, DAG);
2873     // uint zlo = __clz_32bit(xlo)
2874     SDValue zlo = genCLZu32(xlo, DAG);
2875     // uint cmp = zhi == 32
2876     SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
2877         DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
2878         zhi, DAG.getConstant(32U, INTTY));
2879     // uint zlop32 = 32 + zlo
2880     SDValue zlop32 = DAG.getNode(AMDILISD::ADD, DL, INTTY,
2881         DAG.getConstant(32U, INTTY), zlo);
2882     // return cmp ? zlop32: zhi
2883     DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp, zlop32, zhi);
2884   } else if (stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
2885     // HD4XXX:
2886     //  static inline uint
2887     //__clz_64bit(ulong x)
2888     //{
2889     //uint zh = __clz_23bit((uint)(x >> 46)) - 5U;
2890     //uint zm = __clz_23bit((uint)(x >> 23) & 0x7fffffU);
2891     //uint zl = __clz_23bit((uint)x & 0x7fffffU);
2892     //uint r = zh == 18U ? 18U + zm : zh;
2893     //return zh + zm == 41U ? 41U + zl : r;
2894     //}
2895     //ulong x = Op
2896     SDValue x = Op;
2897     // ulong xs46 = x >> 46
2898     SDValue xs46 = DAG.getNode(ISD::SRL, DL, LONGTY, x,
2899         DAG.getConstant(46, LONGTY));
2900     // uint ixs46 = (uint)xs46
2901     SDValue ixs46 = DAG.getNode(ISD::TRUNCATE, DL, INTTY, xs46);
2902     // ulong xs23 = x >> 23
2903     SDValue xs23 = DAG.getNode(ISD::SRL, DL, LONGTY, x,
2904         DAG.getConstant(23, LONGTY));
2905     // uint ixs23 = (uint)xs23
2906     SDValue ixs23 = DAG.getNode(ISD::TRUNCATE, DL, INTTY, xs23);
2907     // uint xs23m23 = ixs23 & 0x7FFFFF
2908     SDValue xs23m23 = DAG.getNode(ISD::AND, DL, INTTY, ixs23,
2909         DAG.getConstant(0x7fffffU, INTTY));
2910     // uint ix = (uint)x
2911     SDValue ix = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, x);
2912     // uint xm23 = ix & 0x7FFFFF
2913     SDValue xm23 = DAG.getNode(ISD::AND, DL, INTTY, ix,
2914         DAG.getConstant(0x7fffffU, INTTY));
2915     // uint zh = __clz_23bit(ixs46)
2916     SDValue zh = genCLZuN(ixs46, DAG, 23);
2917     // uint zm = __clz_23bit(xs23m23)
2918     SDValue zm = genCLZuN(xs23m23, DAG, 23);
2919     // uint zl = __clz_23bit(xm23)
2920     SDValue zl = genCLZuN(xm23, DAG, 23);
2921     // uint zhm5 = zh - 5
2922     SDValue zhm5 = DAG.getNode(ISD::ADD, DL, INTTY, zh,
2923         DAG.getConstant(-5U, INTTY));
2924     SDValue const18 = DAG.getConstant(18, INTTY);
2925     SDValue const41 = DAG.getConstant(41, INTTY);
2926     // uint cmp1 = zh = 18
2927     SDValue cmp1 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
2928         DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
2929         zhm5, const18);
2930     // uint zhm5zm = zhm5 + zh
2931     SDValue zhm5zm = DAG.getNode(ISD::ADD, DL, INTTY, zhm5, zm);
2932     // uint cmp2 = zhm5zm == 41
2933     SDValue cmp2 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
2934         DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
2935         zhm5zm, const41);
2936     // uint zmp18 = zhm5 + 18
2937     SDValue zmp18 = DAG.getNode(ISD::ADD, DL, INTTY, zm, const18);
2938     // uint zlp41 = zl + 41
2939     SDValue zlp41 = DAG.getNode(ISD::ADD, DL, INTTY, zl, const41);
2940     // uint r = cmp1 ? zmp18 : zh
2941     SDValue r = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY,
2942         cmp1, zmp18, zhm5);
2943     // return cmp2 ? zlp41 : r
2944     DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp2, zlp41, r);
2945   } else {
2946     assert(0 && "Attempting to generate a CLZ function with an"
2947         " unknown graphics card");
2948   }
2949   return DST;
2950 }
2951 SDValue
2952 AMDILTargetLowering::genf64toi64(SDValue RHS, SelectionDAG &DAG,
2953     bool includeSign) const
2954 {
2955   EVT INTVT;
2956   EVT LONGVT;
2957   SDValue DST;
2958   DebugLoc DL = RHS.getDebugLoc();
2959   EVT RHSVT = RHS.getValueType();
2960   bool isVec = RHSVT.isVector();
2961   if (isVec) {
2962     LONGVT = EVT(MVT::getVectorVT(MVT::i64, RHSVT
2963           .getVectorNumElements()));
2964     INTVT = EVT(MVT::getVectorVT(MVT::i32, RHSVT
2965           .getVectorNumElements()));
2966   } else {
2967     LONGVT = EVT(MVT::i64);
2968     INTVT = EVT(MVT::i32);
2969   }
2970   const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
2971       &this->getTargetMachine())->getSubtargetImpl();
2972   if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
2973     // unsigned version:
2974     // uint uhi = (uint)(d * 0x1.0p-32);
2975     // uint ulo = (uint)(mad((double)uhi, -0x1.0p+32, d));
2976     // return as_ulong2((uint2)(ulo, uhi));
2977     //
2978     // signed version:
2979     // double ad = fabs(d);
2980     // long l = unsigned_version(ad);
2981     // long nl = -l;
2982     // return d == ad ? l : nl;
2983     SDValue d = RHS;
2984     if (includeSign) {
2985       d = DAG.getNode(ISD::FABS, DL, RHSVT, d);
2986     }
2987     SDValue uhid = DAG.getNode(ISD::FMUL, DL, RHSVT, d,
2988         DAG.getConstantFP(0x2f800000, RHSVT));
2989     SDValue uhi = DAG.getNode(ISD::FP_TO_UINT, DL, INTVT, uhid);
2990     SDValue ulod = DAG.getNode(ISD::UINT_TO_FP, DL, RHSVT, uhi);
2991     ulod = DAG.getNode(AMDILISD::MAD, DL, RHSVT, ulod,
2992         DAG.getConstantFP(0xcf800000, RHSVT), d);
2993     SDValue ulo = DAG.getNode(ISD::FP_TO_UINT, DL, INTVT, ulod);
2994     SDValue l = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, ulo, uhi);
2995     if (includeSign) {
2996       SDValue nl = DAG.getNode(AMDILISD::INEGATE, DL, LONGVT, l);
2997       SDValue c = DAG.getNode(AMDILISD::CMP, DL, RHSVT,
2998           DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::f64), MVT::i32),
2999           RHS, d);
3000       l = DAG.getNode(AMDILISD::CMOVLOG, DL, LONGVT, c, l, nl);
3001     }
3002     DST = l;
3003   } else {
3004     /*
3005        __attribute__((always_inline)) long
3006        cast_f64_to_i64(double d)
3007        {
3008     // Convert d in to 32-bit components
3009     long x = as_long(d);
3010     xhi = LCOMPHI(x);
3011     xlo = LCOMPLO(x);
3012
3013     // Generate 'normalized' mantissa
3014     mhi = xhi | 0x00100000; // hidden bit
3015     mhi <<= 11;
3016     temp = xlo >> (32 - 11);
3017     mhi |= temp
3018     mlo = xlo << 11;
3019
3020     // Compute shift right count from exponent
3021     e = (xhi >> (52-32)) & 0x7ff;
3022     sr = 1023 + 63 - e;
3023     srge64 = sr >= 64;
3024     srge32 = sr >= 32;
3025
3026     // Compute result for 0 <= sr < 32
3027     rhi0 = mhi >> (sr &31);
3028     rlo0 = mlo >> (sr &31);
3029     temp = mhi << (32 - sr);
3030     temp |= rlo0;
3031     rlo0 = sr ? temp : rlo0;
3032
3033     // Compute result for 32 <= sr
3034     rhi1 = 0;
3035     rlo1 = srge64 ? 0 : rhi0;
3036
3037     // Pick between the 2 results
3038     rhi = srge32 ? rhi1 : rhi0;
3039     rlo = srge32 ? rlo1 : rlo0;
3040
3041     // Optional saturate on overflow
3042     srlt0 = sr < 0;
3043     rhi = srlt0 ? MAXVALUE : rhi;
3044     rlo = srlt0 ? MAXVALUE : rlo;
3045
3046     // Create long
3047     res = LCREATE( rlo, rhi );
3048
3049     // Deal with sign bit (ignoring whether result is signed or unsigned value)
3050     if (includeSign) {
3051     sign = ((signed int) xhi) >> 31; fill with sign bit
3052     sign = LCREATE( sign, sign );
3053     res += sign;
3054     res ^= sign;
3055     }
3056
3057     return res;
3058     }
3059     */
3060     SDValue c11 = DAG.getConstant( 63 - 52, INTVT );
3061     SDValue c32 = DAG.getConstant( 32, INTVT );
3062
3063     // Convert d in to 32-bit components
3064     SDValue d = RHS;
3065     SDValue x = DAG.getNode(ISDBITCAST, DL, LONGVT, d);
3066     SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x );
3067     SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x );
3068
3069     // Generate 'normalized' mantissa
3070     SDValue mhi = DAG.getNode( ISD::OR, DL, INTVT,
3071         xhi, DAG.getConstant( 0x00100000, INTVT ) );
3072     mhi = DAG.getNode( ISD::SHL, DL, INTVT, mhi, c11 );
3073     SDValue temp = DAG.getNode( ISD::SRL, DL, INTVT,
3074         xlo, DAG.getConstant( 32 - (63 - 52), INTVT ) );
3075     mhi = DAG.getNode( ISD::OR, DL, INTVT, mhi, temp );
3076     SDValue mlo = DAG.getNode( ISD::SHL, DL, INTVT, xlo, c11 );
3077
3078     // Compute shift right count from exponent
3079     SDValue e = DAG.getNode( ISD::SRL, DL, INTVT,
3080         xhi, DAG.getConstant( 52-32, INTVT ) );
3081     e = DAG.getNode( ISD::AND, DL, INTVT,
3082         e, DAG.getConstant( 0x7ff, INTVT ) );
3083     SDValue sr = DAG.getNode( ISD::SUB, DL, INTVT,
3084         DAG.getConstant( 1023 + 63, INTVT ), e );
3085     SDValue srge64 = DAG.getNode( AMDILISD::CMP, DL, INTVT,
3086         DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
3087         sr, DAG.getConstant(64, INTVT));
3088     SDValue srge32 = DAG.getNode( AMDILISD::CMP, DL, INTVT,
3089         DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
3090         sr, DAG.getConstant(32, INTVT));
3091
3092     // Compute result for 0 <= sr < 32
3093     SDValue rhi0 = DAG.getNode( ISD::SRL, DL, INTVT, mhi, sr );
3094     SDValue rlo0 = DAG.getNode( ISD::SRL, DL, INTVT, mlo, sr );
3095     temp = DAG.getNode( ISD::SUB, DL, INTVT, c32, sr );
3096     temp = DAG.getNode( ISD::SHL, DL, INTVT, mhi, temp );
3097     temp = DAG.getNode( ISD::OR,  DL, INTVT, rlo0, temp );
3098     rlo0 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, sr, temp, rlo0 );
3099
3100     // Compute result for 32 <= sr
3101     SDValue rhi1 = DAG.getConstant( 0, INTVT );
3102     SDValue rlo1 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
3103         srge64, rhi1, rhi0 );
3104
3105     // Pick between the 2 results
3106     SDValue rhi = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
3107         srge32, rhi1, rhi0 );
3108     SDValue rlo = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
3109         srge32, rlo1, rlo0 );
3110
3111     // Create long
3112     SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi );
3113
3114     // Deal with sign bit
3115     if (includeSign) {
3116       SDValue sign = DAG.getNode( ISD::SRA, DL, INTVT,
3117           xhi, DAG.getConstant( 31, INTVT ) );
3118       sign = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, sign, sign );
3119       res = DAG.getNode( ISD::ADD, DL, LONGVT, res, sign );
3120       res = DAG.getNode( ISD::XOR, DL, LONGVT, res, sign );
3121     }
3122     DST = res;
3123   }
3124   return DST;
3125 }
3126 SDValue
3127 AMDILTargetLowering::genf64toi32(SDValue RHS, SelectionDAG &DAG,
3128     bool includeSign) const
3129 {
3130   EVT INTVT;
3131   EVT LONGVT;
3132   DebugLoc DL = RHS.getDebugLoc();
3133   EVT RHSVT = RHS.getValueType();
3134   bool isVec = RHSVT.isVector();
3135   if (isVec) {
3136     LONGVT = EVT(MVT::getVectorVT(MVT::i64,
3137           RHSVT.getVectorNumElements()));
3138     INTVT = EVT(MVT::getVectorVT(MVT::i32,
3139           RHSVT.getVectorNumElements()));
3140   } else {
3141     LONGVT = EVT(MVT::i64);
3142     INTVT = EVT(MVT::i32);
3143   }
3144   /*
3145      __attribute__((always_inline)) int
3146      cast_f64_to_[u|i]32(double d)
3147      {
3148   // Convert d in to 32-bit components
3149   long x = as_long(d);
3150   xhi = LCOMPHI(x);
3151   xlo = LCOMPLO(x);
3152
3153   // Generate 'normalized' mantissa
3154   mhi = xhi | 0x00100000; // hidden bit
3155   mhi <<= 11;
3156   temp = xlo >> (32 - 11);
3157   mhi |= temp
3158
3159   // Compute shift right count from exponent
3160   e = (xhi >> (52-32)) & 0x7ff;
3161   sr = 1023 + 31 - e;
3162   srge32 = sr >= 32;
3163
3164   // Compute result for 0 <= sr < 32
3165   res = mhi >> (sr &31);
3166   res = srge32 ? 0 : res;
3167
3168   // Optional saturate on overflow
3169   srlt0 = sr < 0;
3170   res = srlt0 ? MAXVALUE : res;
3171
3172   // Deal with sign bit (ignoring whether result is signed or unsigned value)
3173   if (includeSign) {
3174   sign = ((signed int) xhi) >> 31; fill with sign bit
3175   res += sign;
3176   res ^= sign;
3177   }
3178
3179   return res;
3180   }
3181   */
3182   SDValue c11 = DAG.getConstant( 63 - 52, INTVT );
3183
3184   // Convert d in to 32-bit components
3185   SDValue d = RHS;
3186   SDValue x = DAG.getNode(ISDBITCAST, DL, LONGVT, d);
3187   SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x );
3188   SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x );
3189
3190   // Generate 'normalized' mantissa
3191   SDValue mhi = DAG.getNode( ISD::OR, DL, INTVT,
3192       xhi, DAG.getConstant( 0x00100000, INTVT ) );
3193   mhi = DAG.getNode( ISD::SHL, DL, INTVT, mhi, c11 );
3194   SDValue temp = DAG.getNode( ISD::SRL, DL, INTVT,
3195       xlo, DAG.getConstant( 32 - (63 - 52), INTVT ) );
3196   mhi = DAG.getNode( ISD::OR, DL, INTVT, mhi, temp );
3197
3198   // Compute shift right count from exponent
3199   SDValue e = DAG.getNode( ISD::SRL, DL, INTVT,
3200       xhi, DAG.getConstant( 52-32, INTVT ) );
3201   e = DAG.getNode( ISD::AND, DL, INTVT,
3202       e, DAG.getConstant( 0x7ff, INTVT ) );
3203   SDValue sr = DAG.getNode( ISD::SUB, DL, INTVT,
3204       DAG.getConstant( 1023 + 31, INTVT ), e );
3205   SDValue srge32 = DAG.getNode( AMDILISD::CMP, DL, INTVT,
3206       DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
3207       sr, DAG.getConstant(32, INTVT));
3208
3209   // Compute result for 0 <= sr < 32
3210   SDValue res = DAG.getNode( ISD::SRL, DL, INTVT, mhi, sr );
3211   res = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
3212       srge32, DAG.getConstant(0,INTVT), res );
3213
3214   // Deal with sign bit
3215   if (includeSign) {
3216     SDValue sign = DAG.getNode( ISD::SRA, DL, INTVT,
3217         xhi, DAG.getConstant( 31, INTVT ) );
3218     res = DAG.getNode( ISD::ADD, DL, INTVT, res, sign );
3219     res = DAG.getNode( ISD::XOR, DL, INTVT, res, sign );
3220   }
3221   return res;
3222 }
3223 SDValue
3224 AMDILTargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const
3225 {
3226   SDValue RHS = Op.getOperand(0);
3227   EVT RHSVT = RHS.getValueType();
3228   MVT RST = RHSVT.getScalarType().getSimpleVT();
3229   EVT LHSVT = Op.getValueType();
3230   MVT LST = LHSVT.getScalarType().getSimpleVT();
3231   DebugLoc DL = Op.getDebugLoc();
3232   SDValue DST;
3233   const AMDILTargetMachine*
3234     amdtm = reinterpret_cast<const AMDILTargetMachine*>
3235     (&this->getTargetMachine());
3236   const AMDILSubtarget*
3237     stm = static_cast<const AMDILSubtarget*>(
3238         amdtm->getSubtargetImpl());
3239   if (RST == MVT::f64 && RHSVT.isVector()
3240       && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX)  {
3241     // We dont support vector 64bit floating point convertions.
3242     for (unsigned x = 0, y = RHSVT.getVectorNumElements(); x < y; ++x) {
3243       SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
3244           DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32));
3245       op = DAG.getNode(ISD::FP_TO_SINT, DL, LST, op);
3246       if (!x) {
3247         DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op);
3248       } else {
3249         DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT,
3250             DST, op, DAG.getTargetConstant(x, MVT::i32));
3251       }
3252     }
3253   } else {
3254     if (RST == MVT::f64
3255         && LST == MVT::i32) {
3256       if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
3257         DST = SDValue(Op.getNode(), 0);
3258       } else {
3259         DST = genf64toi32(RHS, DAG, true);
3260       }
3261     } else if (RST == MVT::f64
3262         && LST == MVT::i64) {
3263       DST = genf64toi64(RHS, DAG, true);
3264     } else if (RST == MVT::f64
3265         && (LST == MVT::i8 || LST == MVT::i16)) {
3266       if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
3267         DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, SDValue(Op.getNode(), 0));
3268       } else {
3269         SDValue ToInt = genf64toi32(RHS, DAG, true);
3270         DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, ToInt);
3271       }
3272
3273     } else {
3274       DST = SDValue(Op.getNode(), 0);
3275     }
3276   }
3277   return DST;
3278 }
3279
3280 SDValue
3281 AMDILTargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const
3282 {
3283   SDValue DST;
3284   SDValue RHS = Op.getOperand(0);
3285   EVT RHSVT = RHS.getValueType();
3286   MVT RST = RHSVT.getScalarType().getSimpleVT();
3287   EVT LHSVT = Op.getValueType();
3288   MVT LST = LHSVT.getScalarType().getSimpleVT();
3289   DebugLoc DL = Op.getDebugLoc();
3290   const AMDILTargetMachine*
3291     amdtm = reinterpret_cast<const AMDILTargetMachine*>
3292     (&this->getTargetMachine());
3293   const AMDILSubtarget*
3294     stm = static_cast<const AMDILSubtarget*>(
3295         amdtm->getSubtargetImpl());
3296   if (RST == MVT::f64 && RHSVT.isVector()
3297       && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX)  {
3298     // We dont support vector 64bit floating point convertions.
3299     for (unsigned x = 0, y = RHSVT.getVectorNumElements(); x < y; ++x) {
3300       SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
3301           DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32));
3302       op = DAG.getNode(ISD::FP_TO_SINT, DL, LST, op);
3303       if (!x) {
3304         DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op);
3305       } else {
3306         DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT,
3307             DST, op, DAG.getTargetConstant(x, MVT::i32));
3308       }
3309
3310     }
3311   } else {
3312     if (RST == MVT::f64
3313         && LST == MVT::i32) {
3314       if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
3315         DST = SDValue(Op.getNode(), 0);
3316       } else {
3317         DST = genf64toi32(RHS, DAG, false);
3318       }
3319     } else if (RST == MVT::f64
3320         && LST == MVT::i64) {
3321       DST = genf64toi64(RHS, DAG, false);
3322     } else if (RST == MVT::f64
3323         && (LST == MVT::i8 || LST == MVT::i16)) {
3324       if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
3325         DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, SDValue(Op.getNode(), 0));
3326       } else {
3327         SDValue ToInt = genf64toi32(RHS, DAG, false);
3328         DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, ToInt);
3329       }
3330
3331     } else {
3332       DST = SDValue(Op.getNode(), 0);
3333     }
3334   }
3335   return DST;
3336 }
3337 SDValue
3338 AMDILTargetLowering::genu32tof64(SDValue RHS, EVT LHSVT,
3339     SelectionDAG &DAG) const
3340 {
3341   EVT RHSVT = RHS.getValueType();
3342   DebugLoc DL = RHS.getDebugLoc();
3343   EVT INTVT;
3344   EVT LONGVT;
3345   bool isVec = RHSVT.isVector();
3346   if (isVec) {
3347     LONGVT = EVT(MVT::getVectorVT(MVT::i64,
3348           RHSVT.getVectorNumElements()));
3349     INTVT = EVT(MVT::getVectorVT(MVT::i32,
3350           RHSVT.getVectorNumElements()));
3351   } else {
3352     LONGVT = EVT(MVT::i64);
3353     INTVT = EVT(MVT::i32);
3354   }
3355   SDValue x = RHS;
3356   const AMDILTargetMachine*
3357     amdtm = reinterpret_cast<const AMDILTargetMachine*>
3358     (&this->getTargetMachine());
3359   const AMDILSubtarget*
3360     stm = static_cast<const AMDILSubtarget*>(
3361         amdtm->getSubtargetImpl());
3362   if (stm->calVersion() >= CAL_VERSION_SC_135) {
3363     // unsigned x = RHS;
3364     // ulong xd = (ulong)(0x4330_0000 << 32) | x;
3365     // double d = as_double( xd );
3366     // return d - 0x1.0p+52; // 0x1.0p+52 == 0x4330_0000_0000_0000
3367     SDValue xd = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, x,
3368         DAG.getConstant( 0x43300000, INTVT ) );
3369     SDValue d = DAG.getNode( ISDBITCAST, DL, LHSVT, xd );
3370     SDValue offsetd = DAG.getNode( ISDBITCAST, DL, LHSVT,
3371         DAG.getConstant( 0x4330000000000000ULL, LONGVT ) );
3372     return DAG.getNode( ISD::FSUB, DL, LHSVT, d, offsetd );
3373   } else {
3374     SDValue clz = genCLZu32(x, DAG);
3375
3376     // Compute the exponent. 1023 is the bias, 31-clz the actual power of 2
3377     // Except for an input 0... which requires a 0 exponent
3378     SDValue exp = DAG.getNode( ISD::SUB, DL, INTVT,
3379         DAG.getConstant( (1023+31), INTVT), clz );
3380     exp = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, x, exp, x );
3381
3382     // Normalize frac
3383     SDValue rhi = DAG.getNode( ISD::SHL, DL, INTVT, x, clz );
3384
3385     // Eliminate hidden bit
3386     rhi = DAG.getNode( ISD::AND, DL, INTVT,
3387         rhi, DAG.getConstant( 0x7fffffff, INTVT ) );
3388
3389     // Pack exponent and frac
3390     SDValue rlo = DAG.getNode( ISD::SHL, DL, INTVT,
3391         rhi, DAG.getConstant( (32 - 11), INTVT ) );
3392     rhi = DAG.getNode( ISD::SRL, DL, INTVT,
3393         rhi, DAG.getConstant( 11, INTVT ) );
3394     exp = DAG.getNode( ISD::SHL, DL, INTVT,
3395         exp, DAG.getConstant( 20, INTVT ) );
3396     rhi = DAG.getNode( ISD::OR, DL, INTVT, rhi, exp );
3397
3398     // Convert 2 x 32 in to 1 x 64, then to double precision float type
3399     SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi );
3400     return DAG.getNode(ISDBITCAST, DL, LHSVT, res);
3401   }
3402 }
3403 SDValue
3404 AMDILTargetLowering::genu64tof64(SDValue RHS, EVT LHSVT,
3405     SelectionDAG &DAG) const
3406 {
3407   EVT RHSVT = RHS.getValueType();
3408   DebugLoc DL = RHS.getDebugLoc();
3409   EVT INTVT;
3410   EVT LONGVT;
3411   bool isVec = RHSVT.isVector();
3412   if (isVec) {
3413     INTVT = EVT(MVT::getVectorVT(MVT::i32,
3414           RHSVT.getVectorNumElements()));
3415   } else {
3416     INTVT = EVT(MVT::i32);
3417   }
3418   LONGVT = RHSVT;
3419   SDValue x = RHS;
3420   const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
3421       &this->getTargetMachine())->getSubtargetImpl();
3422   if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
3423     // double dhi = (double)(as_uint2(x).y);
3424     // double dlo = (double)(as_uint2(x).x);
3425     // return mad(dhi, 0x1.0p+32, dlo)
3426     SDValue dhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x);
3427     dhi = DAG.getNode(ISD::UINT_TO_FP, DL, LHSVT, dhi);
3428     SDValue dlo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x);
3429     dlo = DAG.getNode(ISD::UINT_TO_FP, DL, LHSVT, dlo);
3430     return DAG.getNode(AMDILISD::MAD, DL, LHSVT, dhi,
3431         DAG.getConstantFP(0x4f800000, LHSVT), dlo);
3432   } else if (stm->calVersion() >= CAL_VERSION_SC_135) {
3433     // double lo = as_double( as_ulong( 0x1.0p+52) | (u & 0xffff_ffffUL));
3434     // double hi = as_double( as_ulong( 0x1.0p+84) | (u >> 32));
3435     // return (hi - (0x1.0p+84 + 0x1.0p+52)) + lo;
3436     SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x );  // x & 0xffff_ffffUL
3437     SDValue xd = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, xlo, DAG.getConstant( 0x43300000, INTVT ) );
3438     SDValue lo = DAG.getNode( ISDBITCAST, DL, LHSVT, xd );
3439     SDValue xhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 :  AMDILISD::LCOMPHI, DL, INTVT, x ); // x >> 32
3440     SDValue xe = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, xhi, DAG.getConstant( 0x45300000, INTVT ) );
3441     SDValue hi = DAG.getNode( ISDBITCAST, DL, LHSVT, xe );
3442     SDValue c = DAG.getNode( ISDBITCAST, DL, LHSVT,
3443         DAG.getConstant( 0x4530000000100000ULL, LONGVT ) );
3444     hi = DAG.getNode( ISD::FSUB, DL, LHSVT, hi, c );
3445     return DAG.getNode( ISD::FADD, DL, LHSVT, hi, lo );
3446
3447   } else {
3448     SDValue clz = genCLZu64(x, DAG);
3449     SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x );
3450     SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x );
3451
3452     // Compute the exponent. 1023 is the bias, 63-clz the actual power of 2
3453     SDValue exp = DAG.getNode( ISD::SUB, DL, INTVT,
3454         DAG.getConstant( (1023+63), INTVT), clz );
3455     SDValue mash = DAG.getNode( ISD::OR, DL, INTVT, xhi, xlo );
3456     exp = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
3457         mash, exp, mash );  // exp = exp, or 0 if input was 0
3458
3459     // Normalize frac
3460     SDValue clz31 = DAG.getNode( ISD::AND, DL, INTVT,
3461         clz, DAG.getConstant( 31, INTVT ) );
3462     SDValue rshift = DAG.getNode( ISD::SUB, DL, INTVT,
3463         DAG.getConstant( 32, INTVT ), clz31 );
3464     SDValue t1 = DAG.getNode( ISD::SHL, DL, INTVT, xhi, clz31 );
3465     SDValue t2 = DAG.getNode( ISD::SRL, DL, INTVT, xlo, rshift );
3466     t2 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, clz31, t2, t1 );
3467     SDValue rhi1 = DAG.getNode( ISD::OR, DL, INTVT, t1, t2 );
3468     SDValue rlo1 = DAG.getNode( ISD::SHL, DL, INTVT, xlo, clz31 );
3469     SDValue rhi2 = DAG.getNode( ISD::SHL, DL, INTVT, xlo, clz31 );
3470     SDValue rlo2 = DAG.getConstant( 0, INTVT );
3471     SDValue clz32 = DAG.getNode( ISD::AND, DL, INTVT,
3472         clz, DAG.getConstant( 32, INTVT ) );
3473     SDValue rhi = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
3474         clz32, rhi2, rhi1 );
3475     SDValue rlo = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
3476         clz32, rlo2, rlo1 );
3477
3478     // Eliminate hidden bit
3479     rhi = DAG.getNode( ISD::AND, DL, INTVT,
3480         rhi, DAG.getConstant( 0x7fffffff, INTVT ) );
3481
3482     // Save bits needed to round properly
3483     SDValue round = DAG.getNode( ISD::AND, DL, INTVT,
3484         rlo, DAG.getConstant( 0x7ff, INTVT ) );
3485
3486     // Pack exponent and frac
3487     rlo = DAG.getNode( ISD::SRL, DL, INTVT,
3488         rlo, DAG.getConstant( 11, INTVT ) );
3489     SDValue temp = DAG.getNode( ISD::SHL, DL, INTVT,
3490         rhi, DAG.getConstant( (32 - 11), INTVT ) );
3491     rlo = DAG.getNode( ISD::OR, DL, INTVT, rlo, temp );
3492     rhi = DAG.getNode( ISD::SRL, DL, INTVT,
3493         rhi, DAG.getConstant( 11, INTVT ) );
3494     exp = DAG.getNode( ISD::SHL, DL, INTVT,
3495         exp, DAG.getConstant( 20, INTVT ) );
3496     rhi = DAG.getNode( ISD::OR, DL, INTVT, rhi, exp );
3497
3498     // Compute rounding bit
3499     SDValue even = DAG.getNode( ISD::AND, DL, INTVT,
3500         rlo, DAG.getConstant( 1, INTVT ) );
3501     SDValue grs = DAG.getNode( ISD::AND, DL, INTVT,
3502         round, DAG.getConstant( 0x3ff, INTVT ) );
3503     grs = DAG.getNode( AMDILISD::CMP, DL, INTVT,
3504         DAG.getConstant( CondCCodeToCC( ISD::SETNE, MVT::i32), MVT::i32),
3505         grs, DAG.getConstant( 0, INTVT ) ); // -1 if any GRS set, 0 if none
3506     grs = DAG.getNode( ISD::OR, DL, INTVT, grs, even );
3507     round = DAG.getNode( ISD::SRL, DL, INTVT,
3508         round, DAG.getConstant( 10, INTVT ) );
3509     round = DAG.getNode( ISD::AND, DL, INTVT, round, grs ); // 0 or 1
3510
3511     // Add rounding bit
3512     SDValue lround = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT,
3513         round, DAG.getConstant( 0, INTVT ) );
3514     SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi );
3515     res = DAG.getNode( ISD::ADD, DL, LONGVT, res, lround );
3516     return DAG.getNode(ISDBITCAST, DL, LHSVT, res);
3517   }
3518 }
3519 SDValue
3520 AMDILTargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const
3521 {
3522   SDValue RHS = Op.getOperand(0);
3523   EVT RHSVT = RHS.getValueType();
3524   MVT RST = RHSVT.getScalarType().getSimpleVT();
3525   EVT LHSVT = Op.getValueType();
3526   MVT LST = LHSVT.getScalarType().getSimpleVT();
3527   DebugLoc DL = Op.getDebugLoc();
3528   SDValue DST;
3529   EVT INTVT;
3530   EVT LONGVT;
3531   const AMDILTargetMachine*
3532     amdtm = reinterpret_cast<const AMDILTargetMachine*>
3533     (&this->getTargetMachine());
3534   const AMDILSubtarget*
3535     stm = static_cast<const AMDILSubtarget*>(
3536         amdtm->getSubtargetImpl());
3537   if (LST == MVT::f64 && LHSVT.isVector()
3538       && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX)  {
3539     // We dont support vector 64bit floating point convertions.
3540     DST = Op;
3541     for (unsigned x = 0, y = LHSVT.getVectorNumElements(); x < y; ++x) {
3542       SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
3543           DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32));
3544       op = DAG.getNode(ISD::UINT_TO_FP, DL, LST, op);
3545       if (!x) {
3546         DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op);
3547       } else {
3548         DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT, DST,
3549             op, DAG.getTargetConstant(x, MVT::i32));
3550       }
3551
3552     }
3553   } else {
3554
3555     if (RST == MVT::i32
3556         && LST == MVT::f64) {
3557       if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
3558         DST = SDValue(Op.getNode(), 0);
3559       } else {
3560         DST = genu32tof64(RHS, LHSVT, DAG);
3561       }
3562     } else if (RST == MVT::i64
3563         && LST == MVT::f64) {
3564       DST = genu64tof64(RHS, LHSVT, DAG);
3565     } else {
3566       DST = SDValue(Op.getNode(), 0);
3567     }
3568   }
3569   return DST;
3570 }
3571
3572 SDValue
3573 AMDILTargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const
3574 {
3575   SDValue RHS = Op.getOperand(0);
3576   EVT RHSVT = RHS.getValueType();
3577   MVT RST = RHSVT.getScalarType().getSimpleVT();
3578   EVT INTVT;
3579   EVT LONGVT;
3580   SDValue DST;
3581   bool isVec = RHSVT.isVector();
3582   DebugLoc DL = Op.getDebugLoc();
3583   EVT LHSVT = Op.getValueType();
3584   MVT LST = LHSVT.getScalarType().getSimpleVT();
3585   const AMDILTargetMachine*
3586     amdtm = reinterpret_cast<const AMDILTargetMachine*>
3587     (&this->getTargetMachine());
3588   const AMDILSubtarget*
3589     stm = static_cast<const AMDILSubtarget*>(
3590         amdtm->getSubtargetImpl());
3591   if (LST == MVT::f64 && LHSVT.isVector()
3592       && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX)  {
3593     // We dont support vector 64bit floating point convertions.
3594     for (unsigned x = 0, y = LHSVT.getVectorNumElements(); x < y; ++x) {
3595       SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
3596           DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32));
3597       op = DAG.getNode(ISD::UINT_TO_FP, DL, LST, op);
3598       if (!x) {
3599         DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op);
3600       } else {
3601         DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT, DST,
3602             op, DAG.getTargetConstant(x, MVT::i32));
3603       }
3604
3605     }
3606   } else {
3607
3608     if (isVec) {
3609       LONGVT = EVT(MVT::getVectorVT(MVT::i64,
3610             RHSVT.getVectorNumElements()));
3611       INTVT = EVT(MVT::getVectorVT(MVT::i32,
3612             RHSVT.getVectorNumElements()));
3613     } else {
3614       LONGVT = EVT(MVT::i64);
3615       INTVT = EVT(MVT::i32);
3616     }
3617     MVT RST = RHSVT.getScalarType().getSimpleVT();
3618     if ((RST == MVT::i32 || RST == MVT::i64)
3619         && LST == MVT::f64) {
3620       if (RST == MVT::i32) {
3621         if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
3622           DST = SDValue(Op.getNode(), 0);
3623           return DST;
3624         }
3625       }
3626       SDValue c31 = DAG.getConstant( 31, INTVT );
3627       SDValue cSbit = DAG.getConstant( 0x80000000, INTVT );
3628
3629       SDValue S;      // Sign, as 0 or -1
3630       SDValue Sbit;   // Sign bit, as one bit, MSB only.
3631       if (RST == MVT::i32) {
3632         Sbit = DAG.getNode( ISD::AND, DL, INTVT, RHS, cSbit );
3633         S = DAG.getNode(ISD::SRA, DL, RHSVT, RHS, c31 );
3634       } else { // 64-bit case... SRA of 64-bit values is slow
3635         SDValue hi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, RHS );
3636         Sbit = DAG.getNode( ISD::AND, DL, INTVT, hi, cSbit );
3637         SDValue temp = DAG.getNode( ISD::SRA, DL, INTVT, hi, c31 );
3638         S = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, RHSVT, temp, temp );
3639       }
3640
3641       // get abs() of input value, given sign as S (0 or -1)
3642       // SpI = RHS + S
3643       SDValue SpI = DAG.getNode(ISD::ADD, DL, RHSVT, RHS, S);
3644       // SpIxS = SpI ^ S
3645       SDValue SpIxS = DAG.getNode(ISD::XOR, DL, RHSVT, SpI, S);
3646
3647       // Convert unsigned value to double precision
3648       SDValue R;
3649       if (RST == MVT::i32) {
3650         // r = cast_u32_to_f64(SpIxS)
3651         R = genu32tof64(SpIxS, LHSVT, DAG);
3652       } else {
3653         // r = cast_u64_to_f64(SpIxS)
3654         R = genu64tof64(SpIxS, LHSVT, DAG);
3655       }
3656
3657       // drop in the sign bit
3658       SDValue t = DAG.getNode( AMDILISD::BITCONV, DL, LONGVT, R );
3659       SDValue thi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, t );
3660       SDValue tlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, t );
3661       thi = DAG.getNode( ISD::OR, DL, INTVT, thi, Sbit );
3662       t = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, tlo, thi );
3663       DST = DAG.getNode( AMDILISD::BITCONV, DL, LHSVT, t );
3664     } else {
3665       DST = SDValue(Op.getNode(), 0);
3666     }
3667   }
3668   return DST;
3669 }
3670 SDValue
3671 AMDILTargetLowering::LowerSUB(SDValue Op, SelectionDAG &DAG) const
3672 {
3673   SDValue LHS = Op.getOperand(0);
3674   SDValue RHS = Op.getOperand(1);
3675   DebugLoc DL = Op.getDebugLoc();
3676   EVT OVT = Op.getValueType();
3677   SDValue DST;
3678   bool isVec = RHS.getValueType().isVector();
3679   if (OVT.getScalarType() == MVT::i64) {
3680     /*const AMDILTargetMachine*
3681       amdtm = reinterpret_cast<const AMDILTargetMachine*>
3682       (&this->getTargetMachine());
3683       const AMDILSubtarget*
3684       stm = dynamic_cast<const AMDILSubtarget*>(
3685       amdtm->getSubtargetImpl());*/
3686     MVT INTTY = MVT::i32;
3687     if (OVT == MVT::v2i64) {
3688       INTTY = MVT::v2i32;
3689     }
3690     SDValue LHSLO, LHSHI, RHSLO, RHSHI, INTLO, INTHI;
3691     // TODO: need to turn this into a bitcast of i64/v2i64 to v2i32/v4i32
3692     LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, LHS);
3693     RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, RHS);
3694     LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, LHS);
3695     RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, RHS);
3696     INTLO = DAG.getNode(ISD::SUB, DL, INTTY, LHSLO, RHSLO);
3697     INTHI = DAG.getNode(ISD::SUB, DL, INTTY, LHSHI, RHSHI);
3698     //TODO: need to use IBORROW on HD5XXX and later hardware
3699     SDValue cmp;
3700     if (OVT == MVT::i64) {
3701       cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
3702           DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
3703           LHSLO, RHSLO);
3704     } else {
3705       SDValue cmplo;
3706       SDValue cmphi;
3707       SDValue LHSRLO = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
3708           DL, MVT::i32, LHSLO, DAG.getTargetConstant(0, MVT::i32));
3709       SDValue LHSRHI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
3710           DL, MVT::i32, LHSLO, DAG.getTargetConstant(1, MVT::i32));
3711       SDValue RHSRLO = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
3712           DL, MVT::i32, RHSLO, DAG.getTargetConstant(0, MVT::i32));
3713       SDValue RHSRHI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
3714           DL, MVT::i32, RHSLO, DAG.getTargetConstant(1, MVT::i32));
3715       cmplo = DAG.getNode(AMDILISD::CMP, DL, MVT::i32,
3716           DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
3717           LHSRLO, RHSRLO);
3718       cmphi = DAG.getNode(AMDILISD::CMP, DL, MVT::i32,
3719           DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
3720           LHSRHI, RHSRHI);
3721       cmp = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v2i32, cmplo);
3722       cmp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i32,
3723           cmp, cmphi, DAG.getTargetConstant(1, MVT::i32));
3724     }
3725     INTHI = DAG.getNode(ISD::ADD, DL, INTTY, INTHI, cmp);
3726     DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, OVT,
3727         INTLO, INTHI);
3728   } else {
3729     DST = SDValue(Op.getNode(), 0);
3730   }
3731   return DST;
3732 }
3733 SDValue
3734 AMDILTargetLowering::LowerFDIV(SDValue Op, SelectionDAG &DAG) const
3735 {
3736   EVT OVT = Op.getValueType();
3737   SDValue DST;
3738   if (OVT.getScalarType() == MVT::f64) {
3739     DST = LowerFDIV64(Op, DAG);
3740   } else if (OVT.getScalarType() == MVT::f32) {
3741     DST = LowerFDIV32(Op, DAG);
3742   } else {
3743     DST = SDValue(Op.getNode(), 0);
3744   }
3745   return DST;
3746 }
3747
3748 SDValue
3749 AMDILTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const
3750 {
3751   EVT OVT = Op.getValueType();
3752   SDValue DST;
3753   if (OVT.getScalarType() == MVT::i64) {
3754     DST = LowerSDIV64(Op, DAG);
3755   } else if (OVT.getScalarType() == MVT::i32) {
3756     DST = LowerSDIV32(Op, DAG);
3757   } else if (OVT.getScalarType() == MVT::i16
3758       || OVT.getScalarType() == MVT::i8) {
3759     DST = LowerSDIV24(Op, DAG);
3760   } else {
3761     DST = SDValue(Op.getNode(), 0);
3762   }
3763   return DST;
3764 }
3765
3766 SDValue
3767 AMDILTargetLowering::LowerUDIV(SDValue Op, SelectionDAG &DAG) const
3768 {
3769   EVT OVT = Op.getValueType();
3770   SDValue DST;
3771   if (OVT.getScalarType() == MVT::i64) {
3772     DST = LowerUDIV64(Op, DAG);
3773   } else if (OVT.getScalarType() == MVT::i32) {
3774     DST = LowerUDIV32(Op, DAG);
3775   } else if (OVT.getScalarType() == MVT::i16
3776       || OVT.getScalarType() == MVT::i8) {
3777     DST = LowerUDIV24(Op, DAG);
3778   } else {
3779     DST = SDValue(Op.getNode(), 0);
3780   }
3781   return DST;
3782 }
3783
3784 SDValue
3785 AMDILTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const
3786 {
3787   EVT OVT = Op.getValueType();
3788   SDValue DST;
3789   if (OVT.getScalarType() == MVT::i64) {
3790     DST = LowerSREM64(Op, DAG);
3791   } else if (OVT.getScalarType() == MVT::i32) {
3792     DST = LowerSREM32(Op, DAG);
3793   } else if (OVT.getScalarType() == MVT::i16) {
3794     DST = LowerSREM16(Op, DAG);
3795   } else if (OVT.getScalarType() == MVT::i8) {
3796     DST = LowerSREM8(Op, DAG);
3797   } else {
3798     DST = SDValue(Op.getNode(), 0);
3799   }
3800   return DST;
3801 }
3802
3803 SDValue
3804 AMDILTargetLowering::LowerUREM(SDValue Op, SelectionDAG &DAG) const
3805 {
3806   EVT OVT = Op.getValueType();
3807   SDValue DST;
3808   if (OVT.getScalarType() == MVT::i64) {
3809     DST = LowerUREM64(Op, DAG);
3810   } else if (OVT.getScalarType() == MVT::i32) {
3811     DST = LowerUREM32(Op, DAG);
3812   } else if (OVT.getScalarType() == MVT::i16) {
3813     DST = LowerUREM16(Op, DAG);
3814   } else if (OVT.getScalarType() == MVT::i8) {
3815     DST = LowerUREM8(Op, DAG);
3816   } else {
3817     DST = SDValue(Op.getNode(), 0);
3818   }
3819   return DST;
3820 }
3821
3822 SDValue
3823 AMDILTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const
3824 {
3825   DebugLoc DL = Op.getDebugLoc();
3826   EVT OVT = Op.getValueType();
3827   SDValue DST;
3828   bool isVec = OVT.isVector();
3829   if (OVT.getScalarType() != MVT::i64)
3830   {
3831     DST = SDValue(Op.getNode(), 0);
3832   } else {
3833     assert(OVT.getScalarType() == MVT::i64 && "Only 64 bit mul should be lowered!");
3834     // TODO: This needs to be turned into a tablegen pattern
3835     SDValue LHS = Op.getOperand(0);
3836     SDValue RHS = Op.getOperand(1);
3837
3838     MVT INTTY = MVT::i32;
3839     if (OVT == MVT::v2i64) {
3840       INTTY = MVT::v2i32;
3841     }
3842     // mul64(h1, l1, h0, l0)
3843     SDValue LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO,
3844         DL,
3845         INTTY, LHS);
3846     SDValue LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI,
3847         DL,
3848         INTTY, LHS);
3849     SDValue RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO,
3850         DL,
3851         INTTY, RHS);
3852     SDValue RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI,
3853         DL,
3854         INTTY, RHS);
3855     // MULLO_UINT_1 r1, h0, l1
3856     SDValue RHILLO = DAG.getNode(AMDILISD::UMUL,
3857         DL,
3858         INTTY, RHSHI, LHSLO);
3859     // MULLO_UINT_1 r2, h1, l0
3860     SDValue RLOHHI = DAG.getNode(AMDILISD::UMUL,
3861         DL,
3862         INTTY, RHSLO, LHSHI);
3863     // ADD_INT hr, r1, r2
3864     SDValue ADDHI = DAG.getNode(ISD::ADD,
3865         DL,
3866         INTTY, RHILLO, RLOHHI);
3867     // MULHI_UINT_1 r3, l1, l0
3868     SDValue RLOLLO = DAG.getNode(ISD::MULHU,
3869         DL,
3870         INTTY, RHSLO, LHSLO);
3871     // ADD_INT hr, hr, r3
3872     SDValue HIGH = DAG.getNode(ISD::ADD,
3873         DL,
3874         INTTY, ADDHI, RLOLLO);
3875     // MULLO_UINT_1 l3, l1, l0
3876     SDValue LOW = DAG.getNode(AMDILISD::UMUL,
3877         DL,
3878         INTTY, LHSLO, RHSLO);
3879     DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE,
3880         DL,
3881         OVT, LOW, HIGH);
3882   }
3883   return DST;
3884 }
3885 SDValue
3886 AMDILTargetLowering::LowerBUILD_VECTOR( SDValue Op, SelectionDAG &DAG ) const
3887 {
3888   EVT VT = Op.getValueType();
3889   SDValue Nodes1;
3890   SDValue second;
3891   SDValue third;
3892   SDValue fourth;
3893   DebugLoc DL = Op.getDebugLoc();
3894   Nodes1 = DAG.getNode(AMDILISD::VBUILD,
3895       DL,
3896       VT, Op.getOperand(0));
3897 #if 0
3898   bool allEqual = true;
3899   for (unsigned x = 1, y = Op.getNumOperands(); x < y; ++x) {
3900     if (Op.getOperand(0) != Op.getOperand(x)) {
3901       allEqual = false;
3902       break;
3903     }
3904   }
3905   if (allEqual) {
3906     return Nodes1;
3907   }
3908 #endif
3909   switch(Op.getNumOperands()) {
3910     default:
3911     case 1:
3912       break;
3913     case 4:
3914       fourth = Op.getOperand(3);
3915       if (fourth.getOpcode() != ISD::UNDEF) {
3916         Nodes1 = DAG.getNode(
3917             ISD::INSERT_VECTOR_ELT,
3918             DL,
3919             Op.getValueType(),
3920             Nodes1,
3921             fourth,
3922             DAG.getConstant(7, MVT::i32));
3923       }
3924     case 3:
3925       third = Op.getOperand(2);
3926       if (third.getOpcode() != ISD::UNDEF) {
3927         Nodes1 = DAG.getNode(
3928             ISD::INSERT_VECTOR_ELT,
3929             DL,
3930             Op.getValueType(),
3931             Nodes1,
3932             third,
3933             DAG.getConstant(6, MVT::i32));
3934       }
3935     case 2:
3936       second = Op.getOperand(1);
3937       if (second.getOpcode() != ISD::UNDEF) {
3938         Nodes1 = DAG.getNode(
3939             ISD::INSERT_VECTOR_ELT,
3940             DL,
3941             Op.getValueType(),
3942             Nodes1,
3943             second,
3944             DAG.getConstant(5, MVT::i32));
3945       }
3946       break;
3947   };
3948   return Nodes1;
3949 }
3950
3951 SDValue
3952 AMDILTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
3953     SelectionDAG &DAG) const
3954 {
3955   DebugLoc DL = Op.getDebugLoc();
3956   EVT VT = Op.getValueType();
3957   const SDValue *ptr = NULL;
3958   const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(2));
3959   uint32_t swizzleNum = 0;
3960   SDValue DST;
3961   if (!VT.isVector()) {
3962     SDValue Res = Op.getOperand(0);
3963     return Res;
3964   }
3965
3966   if (Op.getOperand(1).getOpcode() != ISD::UNDEF) {
3967     ptr = &Op.getOperand(1);
3968   } else {
3969     ptr = &Op.getOperand(0);
3970   }
3971   if (CSDN) {
3972     swizzleNum = (uint32_t)CSDN->getZExtValue();
3973     uint32_t mask2 = 0x04030201 & ~(0xFF << (swizzleNum * 8));
3974     uint32_t mask3 = 0x01010101 & (0xFF << (swizzleNum * 8));
3975     DST = DAG.getNode(AMDILISD::VINSERT,
3976         DL,
3977         VT,
3978         Op.getOperand(0),
3979         *ptr,
3980         DAG.getTargetConstant(mask2, MVT::i32),
3981         DAG.getTargetConstant(mask3, MVT::i32));
3982   } else {
3983     uint32_t mask2 = 0x04030201 & ~(0xFF << (swizzleNum * 8));
3984     uint32_t mask3 = 0x01010101 & (0xFF << (swizzleNum * 8));
3985     SDValue res = DAG.getNode(AMDILISD::VINSERT,
3986         DL, VT, Op.getOperand(0), *ptr,
3987         DAG.getTargetConstant(mask2, MVT::i32),
3988         DAG.getTargetConstant(mask3, MVT::i32));
3989     for (uint32_t x = 1; x < VT.getVectorNumElements(); ++x) {
3990       mask2 = 0x04030201 & ~(0xFF << (x * 8));
3991       mask3 = 0x01010101 & (0xFF << (x * 8));
3992       SDValue t = DAG.getNode(AMDILISD::VINSERT,
3993           DL, VT, Op.getOperand(0), *ptr,
3994           DAG.getTargetConstant(mask2, MVT::i32),
3995           DAG.getTargetConstant(mask3, MVT::i32));
3996       SDValue c = DAG.getNode(AMDILISD::CMP, DL, ptr->getValueType(),
3997           DAG.getConstant(AMDILCC::IL_CC_I_EQ, MVT::i32),
3998           Op.getOperand(2), DAG.getConstant(x, MVT::i32));
3999       c = DAG.getNode(AMDILISD::VBUILD, DL, Op.getValueType(), c);
4000       res = DAG.getNode(AMDILISD::CMOVLOG, DL, VT, c, t, res);
4001     }
4002     DST = res;
4003   }
4004   return DST;
4005 }
4006
4007 SDValue
4008 AMDILTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
4009     SelectionDAG &DAG) const
4010 {
4011   EVT VT = Op.getValueType();
4012   const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(1));
4013   uint64_t swizzleNum = 0;
4014   DebugLoc DL = Op.getDebugLoc();
4015   SDValue Res;
4016   if (!Op.getOperand(0).getValueType().isVector()) {
4017     Res = Op.getOperand(0);
4018     return Res;
4019   }
4020   if (CSDN) {
4021     // Static vector extraction
4022     swizzleNum = CSDN->getZExtValue() + 1;
4023     Res = DAG.getNode(AMDILISD::VEXTRACT,
4024         DL, VT,
4025         Op.getOperand(0),
4026         DAG.getTargetConstant(swizzleNum, MVT::i32));
4027   } else {
4028     SDValue Op1 = Op.getOperand(1);
4029     uint32_t vecSize = 4;
4030     SDValue Op0 = Op.getOperand(0);
4031     SDValue res = DAG.getNode(AMDILISD::VEXTRACT,
4032         DL, VT, Op0,
4033         DAG.getTargetConstant(1, MVT::i32));
4034     if (Op0.getValueType().isVector()) {
4035       vecSize = Op0.getValueType().getVectorNumElements();
4036     }
4037     for (uint32_t x = 2; x <= vecSize; ++x) {
4038       SDValue t = DAG.getNode(AMDILISD::VEXTRACT,
4039           DL, VT, Op0,
4040           DAG.getTargetConstant(x, MVT::i32));
4041       SDValue c = DAG.getNode(AMDILISD::CMP,
4042           DL, Op1.getValueType(),
4043           DAG.getConstant(AMDILCC::IL_CC_I_EQ, MVT::i32),
4044           Op1, DAG.getConstant(x, MVT::i32));
4045       res = DAG.getNode(AMDILISD::CMOVLOG, DL,
4046           VT, c, t, res);
4047
4048     }
4049     Res = res;
4050   }
4051   return Res;
4052 }
4053
4054 SDValue
4055 AMDILTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
4056     SelectionDAG &DAG) const
4057 {
4058   uint32_t vecSize = Op.getValueType().getVectorNumElements();
4059   SDValue src = Op.getOperand(0);
4060   const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(1));
4061   uint64_t offset = 0;
4062   EVT vecType = Op.getValueType().getVectorElementType();
4063   DebugLoc DL = Op.getDebugLoc();
4064   SDValue Result;
4065   if (CSDN) {
4066     offset = CSDN->getZExtValue();
4067     Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
4068         DL,vecType, src, DAG.getConstant(offset, MVT::i32));
4069     Result = DAG.getNode(AMDILISD::VBUILD, DL,
4070         Op.getValueType(), Result);
4071     for (uint32_t x = 1; x < vecSize; ++x) {
4072       SDValue elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, vecType,
4073           src, DAG.getConstant(offset + x, MVT::i32));
4074       if (elt.getOpcode() != ISD::UNDEF) {
4075         Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
4076             Op.getValueType(), Result, elt,
4077             DAG.getConstant(x, MVT::i32));
4078       }
4079     }
4080   } else {
4081     SDValue idx = Op.getOperand(1);
4082     Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
4083         DL, vecType, src, idx);
4084     Result = DAG.getNode(AMDILISD::VBUILD, DL,
4085         Op.getValueType(), Result);
4086     for (uint32_t x = 1; x < vecSize; ++x) {
4087       idx = DAG.getNode(ISD::ADD, DL, vecType,
4088           idx, DAG.getConstant(1, MVT::i32));
4089       SDValue elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, vecType,
4090           src, idx);
4091       if (elt.getOpcode() != ISD::UNDEF) {
4092         Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
4093             Op.getValueType(), Result, elt, idx);
4094       }
4095     }
4096   }
4097   return Result;
4098 }
4099 SDValue
4100 AMDILTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
4101     SelectionDAG &DAG) const
4102 {
4103   SDValue Res = DAG.getNode(AMDILISD::VBUILD,
4104       Op.getDebugLoc(),
4105       Op.getValueType(),
4106       Op.getOperand(0));
4107   return Res;
4108 }
4109 SDValue
4110 AMDILTargetLowering::LowerAND(SDValue Op, SelectionDAG &DAG) const
4111 {
4112   SDValue andOp;
4113   andOp = DAG.getNode(
4114       AMDILISD::AND,
4115       Op.getDebugLoc(),
4116       Op.getValueType(),
4117       Op.getOperand(0),
4118       Op.getOperand(1));
4119   return andOp;
4120 }
4121 SDValue
4122 AMDILTargetLowering::LowerOR(SDValue Op, SelectionDAG &DAG) const
4123 {
4124   SDValue orOp;
4125   orOp = DAG.getNode(AMDILISD::OR,
4126       Op.getDebugLoc(),
4127       Op.getValueType(),
4128       Op.getOperand(0),
4129       Op.getOperand(1));
4130   return orOp;
4131 }
4132 SDValue
4133 AMDILTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const
4134 {
4135   SDValue Cond = Op.getOperand(0);
4136   SDValue LHS = Op.getOperand(1);
4137   SDValue RHS = Op.getOperand(2);
4138   DebugLoc DL = Op.getDebugLoc();
4139   Cond = getConversionNode(DAG, Cond, Op, true);
4140   Cond = DAG.getNode(AMDILISD::CMOVLOG,
4141       DL,
4142       Op.getValueType(), Cond, LHS, RHS);
4143   return Cond;
4144 }
4145 SDValue
4146 AMDILTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
4147 {
4148   SDValue Cond;
4149   SDValue LHS = Op.getOperand(0);
4150   SDValue RHS = Op.getOperand(1);
4151   SDValue TRUE = Op.getOperand(2);
4152   SDValue FALSE = Op.getOperand(3);
4153   SDValue CC = Op.getOperand(4);
4154   DebugLoc DL = Op.getDebugLoc();
4155   bool skipCMov = false;
4156   bool genINot = false;
4157   EVT OVT = Op.getValueType();
4158
4159   // Check for possible elimination of cmov
4160   if (TRUE.getValueType().getSimpleVT().SimpleTy == MVT::i32) {
4161     const ConstantSDNode *trueConst
4162       = dyn_cast<ConstantSDNode>( TRUE.getNode() );
4163     const ConstantSDNode *falseConst
4164       = dyn_cast<ConstantSDNode>( FALSE.getNode() );
4165     if (trueConst && falseConst) {
4166       // both possible result values are constants
4167       if (trueConst->isAllOnesValue()
4168           && falseConst->isNullValue()) { // and convenient constants
4169         skipCMov = true;
4170       }
4171       else if (trueConst->isNullValue()
4172           && falseConst->isAllOnesValue()) { // less convenient
4173         skipCMov = true;
4174         genINot = true;
4175       }
4176     }
4177   }
4178   ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
4179   unsigned int AMDILCC = CondCCodeToCC(
4180       SetCCOpcode,
4181       LHS.getValueType().getSimpleVT().SimpleTy);
4182   assert((AMDILCC != AMDILCC::COND_ERROR) && "Invalid SetCC!");
4183   Cond = DAG.getNode(
4184       AMDILISD::CMP,
4185       DL,
4186       LHS.getValueType(),
4187       DAG.getConstant(AMDILCC, MVT::i32),
4188       LHS,
4189       RHS);
4190   Cond = getConversionNode(DAG, Cond, Op, true);
4191   if (genINot) {
4192     Cond = DAG.getNode(AMDILISD::NOT, DL, OVT, Cond);
4193   }
4194   if (!skipCMov) {
4195     Cond = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, Cond, TRUE, FALSE);
4196   }
4197   return Cond;
4198 }
4199 SDValue
4200 AMDILTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const
4201 {
4202   SDValue Cond;
4203   SDValue LHS = Op.getOperand(0);
4204   SDValue RHS = Op.getOperand(1);
4205   SDValue CC  = Op.getOperand(2);
4206   DebugLoc DL = Op.getDebugLoc();
4207   ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
4208   unsigned int AMDILCC = CondCCodeToCC(
4209       SetCCOpcode,
4210       LHS.getValueType().getSimpleVT().SimpleTy);
4211   assert((AMDILCC != AMDILCC::COND_ERROR) && "Invalid SetCC!");
4212   Cond = DAG.getNode(
4213       AMDILISD::CMP,
4214       DL,
4215       LHS.getValueType(),
4216       DAG.getConstant(AMDILCC, MVT::i32),
4217       LHS,
4218       RHS);
4219   Cond = getConversionNode(DAG, Cond, Op, true);
4220   Cond = DAG.getNode(
4221       ISD::AND,
4222       DL,
4223       Cond.getValueType(),
4224       DAG.getConstant(1, Cond.getValueType()),
4225       Cond);
4226   return Cond;
4227 }
4228
4229 SDValue
4230 AMDILTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const
4231 {
4232   SDValue Data = Op.getOperand(0);
4233   VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
4234   DebugLoc DL = Op.getDebugLoc();
4235   EVT DVT = Data.getValueType();
4236   EVT BVT = BaseType->getVT();
4237   unsigned baseBits = BVT.getScalarType().getSizeInBits();
4238   unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;
4239   unsigned shiftBits = srcBits - baseBits;
4240   if (srcBits < 32) {
4241     // If the op is less than 32 bits, then it needs to extend to 32bits
4242     // so it can properly keep the upper bits valid.
4243     EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);
4244     Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);
4245     shiftBits = 32 - baseBits;
4246     DVT = IVT;
4247   }
4248   SDValue Shift = DAG.getConstant(shiftBits, DVT);
4249   // Shift left by 'Shift' bits.
4250   Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);
4251   // Signed shift Right by 'Shift' bits.
4252   Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);
4253   if (srcBits < 32) {
4254     // Once the sign extension is done, the op needs to be converted to
4255     // its original type.
4256     Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());
4257   }
4258   return Data;
4259 }
4260 EVT
4261 AMDILTargetLowering::genIntType(uint32_t size, uint32_t numEle) const
4262 {
4263   int iSize = (size * numEle);
4264   int vEle = (iSize >> ((size == 64) ? 6 : 5));
4265   if (!vEle) {
4266     vEle = 1;
4267   }
4268   if (size == 64) {
4269     if (vEle == 1) {
4270       return EVT(MVT::i64);
4271     } else {
4272       return EVT(MVT::getVectorVT(MVT::i64, vEle));
4273     }
4274   } else {
4275     if (vEle == 1) {
4276       return EVT(MVT::i32);
4277     } else {
4278       return EVT(MVT::getVectorVT(MVT::i32, vEle));
4279     }
4280   }
4281 }
4282
4283 SDValue
4284 AMDILTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const
4285 {
4286   SDValue Src = Op.getOperand(0);
4287   SDValue Dst = Op;
4288   SDValue Res;
4289   DebugLoc DL = Op.getDebugLoc();
4290   EVT SrcVT = Src.getValueType();
4291   EVT DstVT = Dst.getValueType();
4292   // Lets bitcast the floating point types to an
4293   // equivalent integer type before converting to vectors.
4294   if (SrcVT.getScalarType().isFloatingPoint()) {
4295     Src = DAG.getNode(AMDILISD::BITCONV, DL, genIntType(
4296           SrcVT.getScalarType().getSimpleVT().getSizeInBits(),
4297           SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1),
4298         Src);
4299     SrcVT = Src.getValueType();
4300   }
4301   uint32_t ScalarSrcSize = SrcVT.getScalarType()
4302     .getSimpleVT().getSizeInBits();
4303   uint32_t ScalarDstSize = DstVT.getScalarType()
4304     .getSimpleVT().getSizeInBits();
4305   uint32_t SrcNumEle = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
4306   uint32_t DstNumEle = DstVT.isVector() ? DstVT.getVectorNumElements() : 1;
4307   bool isVec = SrcVT.isVector();
4308   if (DstVT.getScalarType().isInteger() &&
4309       (SrcVT.getScalarType().isInteger()
4310        || SrcVT.getScalarType().isFloatingPoint())) {
4311     if ((ScalarDstSize == 64 && SrcNumEle == 4 && ScalarSrcSize == 16)
4312         || (ScalarSrcSize == 64
4313           && DstNumEle == 4
4314           && ScalarDstSize == 16)) {
4315       // This is the problematic case when bitcasting i64 <-> <4 x i16>
4316       // This approach is a little different as we cannot generate a
4317       // <4 x i64> vector
4318       // as that is illegal in our backend and we are already past
4319       // the DAG legalizer.
4320       // So, in this case, we will do the following conversion.
4321       // Case 1:
4322       // %dst = <4 x i16> %src bitconvert i64 ==>
4323       // %tmp = <4 x i16> %src convert <4 x i32>
4324       // %tmp = <4 x i32> %tmp and 0xFFFF
4325       // %tmp = <4 x i32> %tmp shift_left <0, 16, 0, 16>
4326       // %tmp = <4 x i32> %tmp or %tmp.xz %tmp.yw
4327       // %dst = <2 x i32> %tmp bitcast i64
4328       // case 2:
4329       // %dst = i64 %src bitconvert <4 x i16> ==>
4330       // %tmp = i64 %src bitcast <2 x i32>
4331       // %tmp = <4 x i32> %tmp vinsert %tmp.xxyy
4332       // %tmp = <4 x i32> %tmp shift_right <0, 16, 0, 16>
4333       // %tmp = <4 x i32> %tmp and 0xFFFF
4334       // %dst = <4 x i16> %tmp bitcast <4 x i32>
4335       SDValue mask = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v4i32,
4336           DAG.getConstant(0xFFFF, MVT::i32));
4337       SDValue const16 = DAG.getConstant(16, MVT::i32);
4338       if (ScalarDstSize == 64) {
4339         // case 1
4340         Op = DAG.getSExtOrTrunc(Src, DL, MVT::v4i32);
4341         Op = DAG.getNode(ISD::AND, DL, Op.getValueType(), Op, mask);
4342         SDValue x = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
4343             Op, DAG.getConstant(0, MVT::i32));
4344         SDValue y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
4345             Op, DAG.getConstant(1, MVT::i32));
4346         y = DAG.getNode(ISD::SHL, DL, MVT::i32, y, const16);
4347         SDValue z = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
4348             Op, DAG.getConstant(2, MVT::i32));
4349         SDValue w = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
4350             Op, DAG.getConstant(3, MVT::i32));
4351         w = DAG.getNode(ISD::SHL, DL, MVT::i32, w, const16);
4352         x = DAG.getNode(ISD::OR, DL, MVT::i32, x, y);
4353         y = DAG.getNode(ISD::OR, DL, MVT::i32, z, w);
4354         Res = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, MVT::i64, x, y);
4355         return Res;
4356       } else {
4357         // case 2
4358         SDValue lo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, MVT::i32, Src);
4359         SDValue lor16
4360           = DAG.getNode(ISD::SRL, DL, MVT::i32, lo, const16);
4361         SDValue hi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, MVT::i32, Src);
4362         SDValue hir16
4363           = DAG.getNode(ISD::SRL, DL, MVT::i32, hi, const16);
4364         SDValue resVec = DAG.getNode(AMDILISD::VBUILD, DL,
4365             MVT::v4i32, lo);
4366         SDValue idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL,
4367             getPointerTy(), DAG.getConstant(1, MVT::i32));
4368         resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32,
4369             resVec, lor16, idxVal);
4370         idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL,
4371             getPointerTy(), DAG.getConstant(2, MVT::i32));
4372         resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32,
4373             resVec, hi, idxVal);
4374         idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL,
4375             getPointerTy(), DAG.getConstant(3, MVT::i32));
4376         resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32,
4377             resVec, hir16, idxVal);
4378         resVec = DAG.getNode(ISD::AND, DL, MVT::v4i32, resVec, mask);
4379         Res = DAG.getSExtOrTrunc(resVec, DL, MVT::v4i16);
4380         return Res;
4381       }
4382     } else {
4383       // There are four cases we need to worry about for bitcasts
4384       // where the size of all
4385       // source, intermediates and result is <= 128 bits, unlike
4386       // the above case
4387       // 1) Sub32bit bitcast 32bitAlign
4388       // %dst = <4 x i8> bitcast i32
4389       // (also <[2|4] x i16> to <[2|4] x i32>)
4390       // 2) 32bitAlign bitcast Sub32bit
4391       // %dst = i32 bitcast <4 x i8>
4392       // 3) Sub32bit bitcast LargerSub32bit
4393       // %dst = <2 x i8> bitcast i16
4394       // (also <4 x i8> to <2 x i16>)
4395       // 4) Sub32bit bitcast SmallerSub32bit
4396       // %dst = i16 bitcast <2 x i8>
4397       // (also <2 x i16> to <4 x i8>)
4398       // This also only handles types that are powers of two
4399       if ((ScalarDstSize & (ScalarDstSize - 1))
4400           || (ScalarSrcSize & (ScalarSrcSize - 1))) {
4401       } else if (ScalarDstSize >= 32 && ScalarSrcSize < 32) {
4402         // case 1:
4403         EVT IntTy = genIntType(ScalarDstSize, SrcNumEle);
4404 #if 0 // TODO: LLVM does not like this for some reason, cannot SignExt vectors
4405         SDValue res = DAG.getSExtOrTrunc(Src, DL, IntTy);
4406 #else
4407         SDValue res = DAG.getNode(AMDILISD::VBUILD, DL, IntTy,
4408             DAG.getUNDEF(IntTy.getScalarType()));
4409         for (uint32_t x = 0; x < SrcNumEle; ++x) {
4410           SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
4411               getPointerTy(), DAG.getConstant(x, MVT::i32));
4412           SDValue temp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
4413               SrcVT.getScalarType(), Src,
4414               DAG.getConstant(x, MVT::i32));
4415           temp = DAG.getSExtOrTrunc(temp, DL, IntTy.getScalarType());
4416           res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntTy,
4417               res, temp, idx);
4418         }
4419 #endif
4420         SDValue mask = DAG.getNode(AMDILISD::VBUILD, DL, IntTy,
4421             DAG.getConstant((1 << ScalarSrcSize) - 1, MVT::i32));
4422         SDValue *newEle = new SDValue[SrcNumEle];
4423         res = DAG.getNode(ISD::AND, DL, IntTy, res, mask);
4424         for (uint32_t x = 0; x < SrcNumEle; ++x) {
4425           newEle[x] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
4426               IntTy.getScalarType(), res,
4427               DAG.getConstant(x, MVT::i32));
4428         }
4429         uint32_t Ratio = SrcNumEle / DstNumEle;
4430         for (uint32_t x = 0; x < SrcNumEle; ++x) {
4431           if (x % Ratio) {
4432             newEle[x] = DAG.getNode(ISD::SHL, DL,
4433                 IntTy.getScalarType(), newEle[x],
4434                 DAG.getConstant(ScalarSrcSize * (x % Ratio),
4435                   MVT::i32));
4436           }
4437         }
4438         for (uint32_t x = 0; x < SrcNumEle; x += 2) {
4439           newEle[x] = DAG.getNode(ISD::OR, DL,
4440               IntTy.getScalarType(), newEle[x], newEle[x + 1]);
4441         }
4442         if (ScalarSrcSize == 8) {
4443           for (uint32_t x = 0; x < SrcNumEle; x += 4) {
4444             newEle[x] = DAG.getNode(ISD::OR, DL,
4445                 IntTy.getScalarType(), newEle[x], newEle[x + 2]);
4446           }
4447           if (DstNumEle == 1) {
4448             Dst = newEle[0];
4449           } else {
4450             Dst = DAG.getNode(AMDILISD::VBUILD, DL, DstVT,
4451                 newEle[0]);
4452             for (uint32_t x = 1; x < DstNumEle; ++x) {
4453               SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
4454                   getPointerTy(), DAG.getConstant(x, MVT::i32));
4455               Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
4456                   DstVT, Dst, newEle[x * 4], idx);
4457             }
4458           }
4459         } else {
4460           if (DstNumEle == 1) {
4461             Dst = newEle[0];
4462           } else {
4463             Dst = DAG.getNode(AMDILISD::VBUILD, DL, DstVT,
4464                 newEle[0]);
4465             for (uint32_t x = 1; x < DstNumEle; ++x) {
4466               SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
4467                   getPointerTy(), DAG.getConstant(x, MVT::i32));
4468               Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
4469                   DstVT, Dst, newEle[x * 2], idx);
4470             }
4471           }
4472         }
4473         delete [] newEle;
4474         return Dst;
4475       } else if (ScalarDstSize < 32 && ScalarSrcSize >= 32) {
4476         // case 2:
4477         EVT IntTy = genIntType(ScalarSrcSize, DstNumEle);
4478         SDValue vec = DAG.getNode(AMDILISD::VBUILD, DL, IntTy,
4479             DAG.getUNDEF(IntTy.getScalarType()));
4480         uint32_t mult = (ScalarDstSize == 8) ? 4 : 2;
4481         for (uint32_t x = 0; x < SrcNumEle; ++x) {
4482           for (uint32_t y = 0; y < mult; ++y) {
4483             SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
4484                 getPointerTy(),
4485                 DAG.getConstant(x * mult + y, MVT::i32));
4486             SDValue t;
4487             if (SrcNumEle > 1) {
4488               t = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
4489                   DL, SrcVT.getScalarType(), Src,
4490                   DAG.getConstant(x, MVT::i32));
4491             } else {
4492               t = Src;
4493             }
4494             if (y != 0) {
4495               t = DAG.getNode(ISD::SRL, DL, t.getValueType(),
4496                   t, DAG.getConstant(y * ScalarDstSize,
4497                     MVT::i32));
4498             }
4499             vec = DAG.getNode(ISD::INSERT_VECTOR_ELT,
4500                 DL, IntTy, vec, t, idx);
4501           }
4502         }
4503         Dst = DAG.getSExtOrTrunc(vec, DL, DstVT);
4504         return Dst;
4505       } else if (ScalarDstSize == 16 && ScalarSrcSize == 8) {
4506         // case 3:
4507         SDValue *numEle = new SDValue[SrcNumEle];
4508         for (uint32_t x = 0; x < SrcNumEle; ++x) {
4509           numEle[x] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
4510               MVT::i8, Src, DAG.getConstant(x, MVT::i32));
4511           numEle[x] = DAG.getSExtOrTrunc(numEle[x], DL, MVT::i16);
4512           numEle[x] = DAG.getNode(ISD::AND, DL, MVT::i16, numEle[x],
4513               DAG.getConstant(0xFF, MVT::i16));
4514         }
4515         for (uint32_t x = 1; x < SrcNumEle; x += 2) {
4516           numEle[x] = DAG.getNode(ISD::SHL, DL, MVT::i16, numEle[x],
4517               DAG.getConstant(8, MVT::i16));
4518           numEle[x - 1] = DAG.getNode(ISD::OR, DL, MVT::i16,
4519               numEle[x-1], numEle[x]);
4520         }
4521         if (DstNumEle > 1) {
4522           // If we are not a scalar i16, the only other case is a
4523           // v2i16 since we can't have v8i8 at this point, v4i16
4524           // cannot be generated
4525           Dst = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v2i16,
4526               numEle[0]);
4527           SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
4528               getPointerTy(), DAG.getConstant(1, MVT::i32));
4529           Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i16,
4530               Dst, numEle[2], idx);
4531         } else {
4532           Dst = numEle[0];
4533         }
4534         delete [] numEle;
4535         return Dst;
4536       } else if (ScalarDstSize == 8 && ScalarSrcSize == 16) {
4537         // case 4:
4538         SDValue *numEle = new SDValue[DstNumEle];
4539         for (uint32_t x = 0; x < SrcNumEle; ++x) {
4540           numEle[x * 2] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
4541               MVT::i16, Src, DAG.getConstant(x, MVT::i32));
4542           numEle[x * 2 + 1] = DAG.getNode(ISD::SRL, DL, MVT::i16,
4543               numEle[x * 2], DAG.getConstant(8, MVT::i16));
4544         }
4545         MVT ty = (SrcNumEle == 1) ? MVT::v2i16 : MVT::v4i16;
4546         Dst = DAG.getNode(AMDILISD::VBUILD, DL, ty, numEle[0]);
4547         for (uint32_t x = 1; x < DstNumEle; ++x) {
4548           SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
4549               getPointerTy(), DAG.getConstant(x, MVT::i32));
4550           Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ty,
4551               Dst, numEle[x], idx);
4552         }
4553         delete [] numEle;
4554         ty = (SrcNumEle == 1) ? MVT::v2i8 : MVT::v4i8;
4555         Res = DAG.getSExtOrTrunc(Dst, DL, ty);
4556         return Res;
4557       }
4558     }
4559   }
4560   Res = DAG.getNode(AMDILISD::BITCONV,
4561       Dst.getDebugLoc(),
4562       Dst.getValueType(), Src);
4563   return Res;
4564 }
4565
4566 SDValue
4567 AMDILTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
4568     SelectionDAG &DAG) const
4569 {
4570   SDValue Chain = Op.getOperand(0);
4571   SDValue Size = Op.getOperand(1);
4572   unsigned int SPReg = AMDIL::SP;
4573   DebugLoc DL = Op.getDebugLoc();
4574   SDValue SP = DAG.getCopyFromReg(Chain,
4575       DL,
4576       SPReg, MVT::i32);
4577   SDValue NewSP = DAG.getNode(ISD::ADD,
4578       DL,
4579       MVT::i32, SP, Size);
4580   Chain = DAG.getCopyToReg(SP.getValue(1),
4581       DL,
4582       SPReg, NewSP);
4583   SDValue Ops[2] = {NewSP, Chain};
4584   Chain = DAG.getMergeValues(Ops, 2 ,DL);
4585   return Chain;
4586 }
4587 SDValue
4588 AMDILTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
4589 {
4590   SDValue Chain = Op.getOperand(0);
4591   SDValue Cond  = Op.getOperand(1);
4592   SDValue Jump  = Op.getOperand(2);
4593   SDValue Result;
4594   Result = DAG.getNode(
4595       AMDILISD::BRANCH_COND,
4596       Op.getDebugLoc(),
4597       Op.getValueType(),
4598       Chain, Jump, Cond);
4599   return Result;
4600 }
4601
4602 SDValue
4603 AMDILTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const
4604 {
4605   SDValue Chain = Op.getOperand(0);
4606   CondCodeSDNode *CCNode = cast<CondCodeSDNode>(Op.getOperand(1));
4607   SDValue LHS   = Op.getOperand(2);
4608   SDValue RHS   = Op.getOperand(3);
4609   SDValue JumpT  = Op.getOperand(4);
4610   SDValue CmpValue;
4611   ISD::CondCode CC = CCNode->get();
4612   SDValue Result;
4613   unsigned int cmpOpcode = CondCCodeToCC(
4614       CC,
4615       LHS.getValueType().getSimpleVT().SimpleTy);
4616   CmpValue = DAG.getNode(
4617       AMDILISD::CMP,
4618       Op.getDebugLoc(),
4619       LHS.getValueType(),
4620       DAG.getConstant(cmpOpcode, MVT::i32),
4621       LHS, RHS);
4622   Result = DAG.getNode(
4623       AMDILISD::BRANCH_COND,
4624       CmpValue.getDebugLoc(),
4625       MVT::Other, Chain,
4626       JumpT, CmpValue);
4627   return Result;
4628 }
4629
4630 SDValue
4631 AMDILTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const
4632 {
4633   SDValue Result = DAG.getNode(
4634       AMDILISD::DP_TO_FP,
4635       Op.getDebugLoc(),
4636       Op.getValueType(),
4637       Op.getOperand(0),
4638       Op.getOperand(1));
4639   return Result;
4640 }
4641
4642 SDValue
4643 AMDILTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const
4644 {
4645   SDValue Result = DAG.getNode(
4646       AMDILISD::VCONCAT,
4647       Op.getDebugLoc(),
4648       Op.getValueType(),
4649       Op.getOperand(0),
4650       Op.getOperand(1));
4651   return Result;
4652 }
4653 // LowerRET - Lower an ISD::RET node.
4654 SDValue
4655 AMDILTargetLowering::LowerReturn(SDValue Chain,
4656     CallingConv::ID CallConv, bool isVarArg,
4657     const SmallVectorImpl<ISD::OutputArg> &Outs,
4658     const SmallVectorImpl<SDValue> &OutVals,
4659     DebugLoc dl, SelectionDAG &DAG)
4660 const
4661 {
4662   //MachineFunction& MF = DAG.getMachineFunction();
4663   // CCValAssign - represent the assignment of the return value
4664   // to a location
4665   SmallVector<CCValAssign, 16> RVLocs;
4666
4667   // CCState - Info about the registers and stack slot
4668   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
4669                  getTargetMachine(), RVLocs, *DAG.getContext());
4670
4671   // Analyze return values of ISD::RET
4672   CCInfo.AnalyzeReturn(Outs, RetCC_AMDIL32);
4673   // If this is the first return lowered for this function, add
4674   // the regs to the liveout set for the function
4675   MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
4676   for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) {
4677     if (RVLocs[i].isRegLoc() && !MRI.isLiveOut(RVLocs[i].getLocReg())) {
4678       MRI.addLiveOut(RVLocs[i].getLocReg());
4679     }
4680   }
4681   // FIXME: implement this when tail call is implemented
4682   // Chain = GetPossiblePreceedingTailCall(Chain, AMDILISD::TAILCALL);
4683   // both x86 and ppc implement this in ISelLowering
4684
4685   // Regular return here
4686   SDValue Flag;
4687   SmallVector<SDValue, 6> RetOps;
4688   RetOps.push_back(Chain);
4689   RetOps.push_back(DAG.getConstant(0/*getBytesToPopOnReturn()*/, MVT::i32));
4690   for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) {
4691     CCValAssign &VA = RVLocs[i];
4692     SDValue ValToCopy = OutVals[i];
4693     assert(VA.isRegLoc() && "Can only return in registers!");
4694     // ISD::Ret => ret chain, (regnum1, val1), ...
4695     // So i * 2 + 1 index only the regnums
4696     Chain = DAG.getCopyToReg(Chain,
4697         dl,
4698         VA.getLocReg(),
4699         ValToCopy,
4700         Flag);
4701     // guarantee that all emitted copies are stuck together
4702     // avoiding something bad
4703     Flag = Chain.getValue(1);
4704   }
4705   /*if (MF.getFunction()->hasStructRetAttr()) {
4706     assert(0 && "Struct returns are not yet implemented!");
4707   // Both MIPS and X86 have this
4708   }*/
4709   RetOps[0] = Chain;
4710   if (Flag.getNode())
4711     RetOps.push_back(Flag);
4712
4713   Flag = DAG.getNode(AMDILISD::RET_FLAG,
4714       dl,
4715       MVT::Other, &RetOps[0], RetOps.size());
4716   return Flag;
4717 }
4718 void
4719 AMDILTargetLowering::generateLongRelational(MachineInstr *MI,
4720     unsigned int opCode) const
4721 {
4722   MachineOperand DST = MI->getOperand(0);
4723   MachineOperand LHS = MI->getOperand(2);
4724   MachineOperand RHS = MI->getOperand(3);
4725   unsigned int opi32Code = 0, si32Code = 0;
4726   unsigned int simpleVT = MI->getDesc().OpInfo[0].RegClass;
4727   uint32_t REGS[12];
4728   // All the relationals can be generated with with 6 temp registers
4729   for (int x = 0; x < 12; ++x) {
4730     REGS[x] = genVReg(simpleVT);
4731   }
4732   // Pull out the high and low components of each 64 bit register
4733   generateMachineInst(AMDIL::LHI, REGS[0], LHS.getReg());
4734   generateMachineInst(AMDIL::LLO, REGS[1], LHS.getReg());
4735   generateMachineInst(AMDIL::LHI, REGS[2], RHS.getReg());
4736   generateMachineInst(AMDIL::LLO, REGS[3], RHS.getReg());
4737   // Determine the correct opcode that we should use
4738   switch(opCode) {
4739     default:
4740       assert(!"comparison case not handled!");
4741       break;
4742     case AMDIL::LEQ:
4743       si32Code = opi32Code = AMDIL::IEQ;
4744       break;
4745     case AMDIL::LNE:
4746       si32Code = opi32Code = AMDIL::INE;
4747       break;
4748     case AMDIL::LLE:
4749     case AMDIL::ULLE:
4750     case AMDIL::LGE:
4751     case AMDIL::ULGE:
4752       if (opCode == AMDIL::LGE || opCode == AMDIL::ULGE) {
4753         std::swap(REGS[0], REGS[2]);
4754       } else {
4755         std::swap(REGS[1], REGS[3]);
4756       }
4757       if (opCode == AMDIL::LLE || opCode == AMDIL::LGE) {
4758         opi32Code = AMDIL::ILT;
4759       } else {
4760         opi32Code = AMDIL::ULT;
4761       }
4762       si32Code = AMDIL::UGE;
4763       break;
4764     case AMDIL::LGT:
4765     case AMDIL::ULGT:
4766       std::swap(REGS[0], REGS[2]);
4767       std::swap(REGS[1], REGS[3]);
4768     case AMDIL::LLT:
4769     case AMDIL::ULLT:
4770       if (opCode == AMDIL::LGT || opCode == AMDIL::LLT) {
4771         opi32Code = AMDIL::ILT;
4772       } else {
4773         opi32Code = AMDIL::ULT;
4774       }
4775       si32Code = AMDIL::ULT;
4776       break;
4777   };
4778   // Do the initial opcode on the high and low components.
4779   // This leaves the following:
4780   // REGS[4] = L_HI OP R_HI
4781   // REGS[5] = L_LO OP R_LO
4782   generateMachineInst(opi32Code, REGS[4], REGS[0], REGS[2]);
4783   generateMachineInst(si32Code, REGS[5], REGS[1], REGS[3]);
4784   switch(opi32Code) {
4785     case AMDIL::IEQ:
4786     case AMDIL::INE:
4787       {
4788         // combine the results with an and or or depending on if
4789         // we are eq or ne
4790         uint32_t combineOp = (opi32Code == AMDIL::IEQ)
4791           ? AMDIL::BINARY_AND_i32 : AMDIL::BINARY_OR_i32;
4792         generateMachineInst(combineOp, REGS[11], REGS[4], REGS[5]);
4793       }
4794       break;
4795     default:
4796       // this finishes codegen for the following pattern
4797       // REGS[4] || (REGS[5] && (L_HI == R_HI))
4798       generateMachineInst(AMDIL::IEQ, REGS[9], REGS[0], REGS[2]);
4799       generateMachineInst(AMDIL::BINARY_AND_i32, REGS[10], REGS[5],
4800           REGS[9]);
4801       generateMachineInst(AMDIL::BINARY_OR_i32, REGS[11], REGS[4],
4802           REGS[10]);
4803       break;
4804   }
4805   generateMachineInst(AMDIL::LCREATE, DST.getReg(), REGS[11], REGS[11]);
4806 }
4807
4808 unsigned int
4809 AMDILTargetLowering::getFunctionAlignment(const Function *) const
4810 {
4811   return 0;
4812 }
4813
4814 void
4815 AMDILTargetLowering::setPrivateData(MachineBasicBlock *BB,
4816     MachineBasicBlock::iterator &BBI,
4817     DebugLoc *DL, const TargetInstrInfo *TII) const
4818 {
4819   mBB = BB;
4820   mBBI = BBI;
4821   mDL = DL;
4822   mTII = TII;
4823 }
4824 uint32_t
4825 AMDILTargetLowering::genVReg(uint32_t regType) const
4826 {
4827   return mBB->getParent()->getRegInfo().createVirtualRegister(
4828       getTargetMachine().getRegisterInfo()->getRegClass(regType));
4829 }
4830
4831 MachineInstrBuilder
4832 AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst) const
4833 {
4834   return BuildMI(*mBB, mBBI, *mDL, mTII->get(opcode), dst);
4835 }
4836
4837 MachineInstrBuilder
4838 AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst,
4839     uint32_t src1) const
4840 {
4841   return generateMachineInst(opcode, dst).addReg(src1);
4842 }
4843
4844 MachineInstrBuilder
4845 AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst,
4846     uint32_t src1, uint32_t src2) const
4847 {
4848   return generateMachineInst(opcode, dst, src1).addReg(src2);
4849 }
4850
4851 MachineInstrBuilder
4852 AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst,
4853     uint32_t src1, uint32_t src2, uint32_t src3) const
4854 {
4855   return generateMachineInst(opcode, dst, src1, src2).addReg(src3);
4856 }
4857
4858
4859 SDValue
4860 AMDILTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const
4861 {
4862   DebugLoc DL = Op.getDebugLoc();
4863   EVT OVT = Op.getValueType();
4864   SDValue LHS = Op.getOperand(0);
4865   SDValue RHS = Op.getOperand(1);
4866   MVT INTTY;
4867   MVT FLTTY;
4868   if (!OVT.isVector()) {
4869     INTTY = MVT::i32;
4870     FLTTY = MVT::f32;
4871   } else if (OVT.getVectorNumElements() == 2) {
4872     INTTY = MVT::v2i32;
4873     FLTTY = MVT::v2f32;
4874   } else if (OVT.getVectorNumElements() == 4) {
4875     INTTY = MVT::v4i32;
4876     FLTTY = MVT::v4f32;
4877   }
4878   unsigned bitsize = OVT.getScalarType().getSizeInBits();
4879   // char|short jq = ia ^ ib;
4880   SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
4881
4882   // jq = jq >> (bitsize - 2)
4883   jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
4884
4885   // jq = jq | 0x1
4886   jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
4887
4888   // jq = (int)jq
4889   jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
4890
4891   // int ia = (int)LHS;
4892   SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
4893
4894   // int ib, (int)RHS;
4895   SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
4896
4897   // float fa = (float)ia;
4898   SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
4899
4900   // float fb = (float)ib;
4901   SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
4902
4903   // float fq = native_divide(fa, fb);
4904   SDValue fq = DAG.getNode(AMDILISD::DIV_INF, DL, FLTTY, fa, fb);
4905
4906   // fq = trunc(fq);
4907   fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
4908
4909   // float fqneg = -fq;
4910   SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
4911
4912   // float fr = mad(fqneg, fb, fa);
4913   SDValue fr = DAG.getNode(AMDILISD::MAD, DL, FLTTY, fqneg, fb, fa);
4914
4915   // int iq = (int)fq;
4916   SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
4917
4918   // fr = fabs(fr);
4919   fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
4920
4921   // fb = fabs(fb);
4922   fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
4923
4924   // int cv = fr >= fb;
4925   SDValue cv;
4926   if (INTTY == MVT::i32) {
4927     cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
4928   } else {
4929     cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
4930   }
4931   // jq = (cv ? jq : 0);
4932   jq = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, cv, jq,
4933       DAG.getConstant(0, OVT));
4934   // dst = iq + jq;
4935   iq = DAG.getSExtOrTrunc(iq, DL, OVT);
4936   iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
4937   return iq;
4938 }
4939
4940 SDValue
4941 AMDILTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const
4942 {
4943   DebugLoc DL = Op.getDebugLoc();
4944   EVT OVT = Op.getValueType();
4945   SDValue LHS = Op.getOperand(0);
4946   SDValue RHS = Op.getOperand(1);
4947   // The LowerSDIV32 function generates equivalent to the following IL.
4948   // mov r0, LHS
4949   // mov r1, RHS
4950   // ilt r10, r0, 0
4951   // ilt r11, r1, 0
4952   // iadd r0, r0, r10
4953   // iadd r1, r1, r11
4954   // ixor r0, r0, r10
4955   // ixor r1, r1, r11
4956   // udiv r0, r0, r1
4957   // ixor r10, r10, r11
4958   // iadd r0, r0, r10
4959   // ixor DST, r0, r10
4960
4961   // mov r0, LHS
4962   SDValue r0 = LHS;
4963
4964   // mov r1, RHS
4965   SDValue r1 = RHS;
4966
4967   // ilt r10, r0, 0
4968   SDValue r10 = DAG.getNode(AMDILISD::CMP, DL, OVT,
4969       DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
4970       r0, DAG.getConstant(0, OVT));
4971
4972   // ilt r11, r1, 0
4973   SDValue r11 = DAG.getNode(AMDILISD::CMP, DL, OVT,
4974       DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
4975       r1, DAG.getConstant(0, OVT));
4976
4977   // iadd r0, r0, r10
4978   r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
4979
4980   // iadd r1, r1, r11
4981   r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
4982
4983   // ixor r0, r0, r10
4984   r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
4985
4986   // ixor r1, r1, r11
4987   r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
4988
4989   // udiv r0, r0, r1
4990   r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
4991
4992   // ixor r10, r10, r11
4993   r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
4994
4995   // iadd r0, r0, r10
4996   r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
4997
4998   // ixor DST, r0, r10
4999   SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
5000   return DST;
5001 }
5002
5003 SDValue
5004 AMDILTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const
5005 {
5006   return SDValue(Op.getNode(), 0);
5007 }
5008
5009 SDValue
5010 AMDILTargetLowering::LowerUDIV24(SDValue Op, SelectionDAG &DAG) const
5011 {
5012   DebugLoc DL = Op.getDebugLoc();
5013   EVT OVT = Op.getValueType();
5014   SDValue LHS = Op.getOperand(0);
5015   SDValue RHS = Op.getOperand(1);
5016   MVT INTTY;
5017   MVT FLTTY;
5018   if (!OVT.isVector()) {
5019     INTTY = MVT::i32;
5020     FLTTY = MVT::f32;
5021   } else if (OVT.getVectorNumElements() == 2) {
5022     INTTY = MVT::v2i32;
5023     FLTTY = MVT::v2f32;
5024   } else if (OVT.getVectorNumElements() == 4) {
5025     INTTY = MVT::v4i32;
5026     FLTTY = MVT::v4f32;
5027   }
5028
5029   // The LowerUDIV24 function implements the following CL.
5030   // int ia = (int)LHS
5031   // float fa = (float)ia
5032   // int ib = (int)RHS
5033   // float fb = (float)ib
5034   // float fq = native_divide(fa, fb)
5035   // fq = trunc(fq)
5036   // float t = mad(fq, fb, fb)
5037   // int iq = (int)fq - (t <= fa)
5038   // return (type)iq
5039
5040   // int ia = (int)LHS
5041   SDValue ia = DAG.getZExtOrTrunc(LHS, DL, INTTY);
5042
5043   // float fa = (float)ia
5044   SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
5045
5046   // int ib = (int)RHS
5047   SDValue ib = DAG.getZExtOrTrunc(RHS, DL, INTTY);
5048
5049   // float fb = (float)ib
5050   SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
5051
5052   // float fq = native_divide(fa, fb)
5053   SDValue fq = DAG.getNode(AMDILISD::DIV_INF, DL, FLTTY, fa, fb);
5054
5055   // fq = trunc(fq)
5056   fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
5057
5058   // float t = mad(fq, fb, fb)
5059   SDValue t = DAG.getNode(AMDILISD::MAD, DL, FLTTY, fq, fb, fb);
5060
5061   // int iq = (int)fq - (t <= fa) // This is sub and not add because GPU returns 0, -1
5062   SDValue iq;
5063   fq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
5064   if (INTTY == MVT::i32) {
5065     iq = DAG.getSetCC(DL, INTTY, t, fa, ISD::SETOLE);
5066   } else {
5067     iq = DAG.getSetCC(DL, INTTY, t, fa, ISD::SETOLE);
5068   }
5069   iq = DAG.getNode(ISD::ADD, DL, INTTY, fq, iq);
5070
5071
5072   // return (type)iq
5073   iq = DAG.getZExtOrTrunc(iq, DL, OVT);
5074   return iq;
5075
5076 }
5077
5078 SDValue
5079 AMDILTargetLowering::LowerUDIV32(SDValue Op, SelectionDAG &DAG) const
5080 {
5081   return SDValue(Op.getNode(), 0);
5082 }
5083
5084 SDValue
5085 AMDILTargetLowering::LowerUDIV64(SDValue Op, SelectionDAG &DAG) const
5086 {
5087   return SDValue(Op.getNode(), 0);
5088 }
5089 SDValue
5090 AMDILTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const
5091 {
5092   DebugLoc DL = Op.getDebugLoc();
5093   EVT OVT = Op.getValueType();
5094   MVT INTTY = MVT::i32;
5095   if (OVT == MVT::v2i8) {
5096     INTTY = MVT::v2i32;
5097   } else if (OVT == MVT::v4i8) {
5098     INTTY = MVT::v4i32;
5099   }
5100   SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
5101   SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
5102   LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
5103   LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
5104   return LHS;
5105 }
5106
5107 SDValue
5108 AMDILTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const
5109 {
5110   DebugLoc DL = Op.getDebugLoc();
5111   EVT OVT = Op.getValueType();
5112   MVT INTTY = MVT::i32;
5113   if (OVT == MVT::v2i16) {
5114     INTTY = MVT::v2i32;
5115   } else if (OVT == MVT::v4i16) {
5116     INTTY = MVT::v4i32;
5117   }
5118   SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
5119   SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
5120   LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
5121   LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
5122   return LHS;
5123 }
5124
5125 SDValue
5126 AMDILTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const
5127 {
5128   DebugLoc DL = Op.getDebugLoc();
5129   EVT OVT = Op.getValueType();
5130   SDValue LHS = Op.getOperand(0);
5131   SDValue RHS = Op.getOperand(1);
5132   // The LowerSREM32 function generates equivalent to the following IL.
5133   // mov r0, LHS
5134   // mov r1, RHS
5135   // ilt r10, r0, 0
5136   // ilt r11, r1, 0
5137   // iadd r0, r0, r10
5138   // iadd r1, r1, r11
5139   // ixor r0, r0, r10
5140   // ixor r1, r1, r11
5141   // udiv r20, r0, r1
5142   // umul r20, r20, r1
5143   // sub r0, r0, r20
5144   // iadd r0, r0, r10
5145   // ixor DST, r0, r10
5146
5147   // mov r0, LHS
5148   SDValue r0 = LHS;
5149
5150   // mov r1, RHS
5151   SDValue r1 = RHS;
5152
5153   // ilt r10, r0, 0
5154   SDValue r10 = DAG.getNode(AMDILISD::CMP, DL, OVT,
5155       DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
5156       r0, DAG.getConstant(0, OVT));
5157
5158   // ilt r11, r1, 0
5159   SDValue r11 = DAG.getNode(AMDILISD::CMP, DL, OVT,
5160       DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
5161       r1, DAG.getConstant(0, OVT));
5162
5163   // iadd r0, r0, r10
5164   r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
5165
5166   // iadd r1, r1, r11
5167   r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
5168
5169   // ixor r0, r0, r10
5170   r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
5171
5172   // ixor r1, r1, r11
5173   r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
5174
5175   // udiv r20, r0, r1
5176   SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
5177
5178   // umul r20, r20, r1
5179   r20 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r20, r1);
5180
5181   // sub r0, r0, r20
5182   r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
5183
5184   // iadd r0, r0, r10
5185   r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
5186
5187   // ixor DST, r0, r10
5188   SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
5189   return DST;
5190 }
5191
5192 SDValue
5193 AMDILTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const
5194 {
5195   return SDValue(Op.getNode(), 0);
5196 }
5197
5198 SDValue
5199 AMDILTargetLowering::LowerUREM8(SDValue Op, SelectionDAG &DAG) const
5200 {
5201   DebugLoc DL = Op.getDebugLoc();
5202   EVT OVT = Op.getValueType();
5203   MVT INTTY = MVT::i32;
5204   if (OVT == MVT::v2i8) {
5205     INTTY = MVT::v2i32;
5206   } else if (OVT == MVT::v4i8) {
5207     INTTY = MVT::v4i32;
5208   }
5209   SDValue LHS = Op.getOperand(0);
5210   SDValue RHS = Op.getOperand(1);
5211   // The LowerUREM8 function generates equivalent to the following IL.
5212   // mov r0, as_u32(LHS)
5213   // mov r1, as_u32(RHS)
5214   // and r10, r0, 0xFF
5215   // and r11, r1, 0xFF
5216   // cmov_logical r3, r11, r11, 0x1
5217   // udiv r3, r10, r3
5218   // cmov_logical r3, r11, r3, 0
5219   // umul r3, r3, r11
5220   // sub r3, r10, r3
5221   // and as_u8(DST), r3, 0xFF
5222
5223   // mov r0, as_u32(LHS)
5224   SDValue r0 = DAG.getSExtOrTrunc(LHS, DL, INTTY);
5225
5226   // mov r1, as_u32(RHS)
5227   SDValue r1 = DAG.getSExtOrTrunc(RHS, DL, INTTY);
5228
5229   // and r10, r0, 0xFF
5230   SDValue r10 = DAG.getNode(ISD::AND, DL, INTTY, r0,
5231       DAG.getConstant(0xFF, INTTY));
5232
5233   // and r11, r1, 0xFF
5234   SDValue r11 = DAG.getNode(ISD::AND, DL, INTTY, r1,
5235       DAG.getConstant(0xFF, INTTY));
5236
5237   // cmov_logical r3, r11, r11, 0x1
5238   SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, r11, r11,
5239       DAG.getConstant(0x01, INTTY));
5240
5241   // udiv r3, r10, r3
5242   r3 = DAG.getNode(ISD::UREM, DL, INTTY, r10, r3);
5243
5244   // cmov_logical r3, r11, r3, 0
5245   r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, r11, r3,
5246       DAG.getConstant(0, INTTY));
5247
5248   // umul r3, r3, r11
5249   r3 = DAG.getNode(AMDILISD::UMUL, DL, INTTY, r3, r11);
5250
5251   // sub r3, r10, r3
5252   r3 = DAG.getNode(ISD::SUB, DL, INTTY, r10, r3);
5253
5254   // and as_u8(DST), r3, 0xFF
5255   SDValue DST = DAG.getNode(ISD::AND, DL, INTTY, r3,
5256       DAG.getConstant(0xFF, INTTY));
5257   DST = DAG.getZExtOrTrunc(DST, DL, OVT);
5258   return DST;
5259 }
5260
5261 SDValue
5262 AMDILTargetLowering::LowerUREM16(SDValue Op, SelectionDAG &DAG) const
5263 {
5264   DebugLoc DL = Op.getDebugLoc();
5265   EVT OVT = Op.getValueType();
5266   MVT INTTY = MVT::i32;
5267   if (OVT == MVT::v2i16) {
5268     INTTY = MVT::v2i32;
5269   } else if (OVT == MVT::v4i16) {
5270     INTTY = MVT::v4i32;
5271   }
5272   SDValue LHS = Op.getOperand(0);
5273   SDValue RHS = Op.getOperand(1);
5274   // The LowerUREM16 function generatest equivalent to the following IL.
5275   // mov r0, LHS
5276   // mov r1, RHS
5277   // DIV = LowerUDIV16(LHS, RHS)
5278   // and r10, r0, 0xFFFF
5279   // and r11, r1, 0xFFFF
5280   // cmov_logical r3, r11, r11, 0x1
5281   // udiv as_u16(r3), as_u32(r10), as_u32(r3)
5282   // and r3, r3, 0xFFFF
5283   // cmov_logical r3, r11, r3, 0
5284   // umul r3, r3, r11
5285   // sub r3, r10, r3
5286   // and DST, r3, 0xFFFF
5287
5288   // mov r0, LHS
5289   SDValue r0 = LHS;
5290
5291   // mov r1, RHS
5292   SDValue r1 = RHS;
5293
5294   // and r10, r0, 0xFFFF
5295   SDValue r10 = DAG.getNode(ISD::AND, DL, OVT, r0,
5296       DAG.getConstant(0xFFFF, OVT));
5297
5298   // and r11, r1, 0xFFFF
5299   SDValue r11 = DAG.getNode(ISD::AND, DL, OVT, r1,
5300       DAG.getConstant(0xFFFF, OVT));
5301
5302   // cmov_logical r3, r11, r11, 0x1
5303   SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r11, r11,
5304       DAG.getConstant(0x01, OVT));
5305
5306   // udiv as_u16(r3), as_u32(r10), as_u32(r3)
5307   r10 = DAG.getZExtOrTrunc(r10, DL, INTTY);
5308   r3 = DAG.getZExtOrTrunc(r3, DL, INTTY);
5309   r3 = DAG.getNode(ISD::UREM, DL, INTTY, r10, r3);
5310   r3 = DAG.getZExtOrTrunc(r3, DL, OVT);
5311   r10 = DAG.getZExtOrTrunc(r10, DL, OVT);
5312
5313   // and r3, r3, 0xFFFF
5314   r3 = DAG.getNode(ISD::AND, DL, OVT, r3,
5315       DAG.getConstant(0xFFFF, OVT));
5316
5317   // cmov_logical r3, r11, r3, 0
5318   r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r11, r3,
5319       DAG.getConstant(0, OVT));
5320   // umul r3, r3, r11
5321   r3 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r3, r11);
5322
5323   // sub r3, r10, r3
5324   r3 = DAG.getNode(ISD::SUB, DL, OVT, r10, r3);
5325
5326   // and DST, r3, 0xFFFF
5327   SDValue DST = DAG.getNode(ISD::AND, DL, OVT, r3,
5328       DAG.getConstant(0xFFFF, OVT));
5329   return DST;
5330 }
5331
5332 SDValue
5333 AMDILTargetLowering::LowerUREM32(SDValue Op, SelectionDAG &DAG) const
5334 {
5335   DebugLoc DL = Op.getDebugLoc();
5336   EVT OVT = Op.getValueType();
5337   SDValue LHS = Op.getOperand(0);
5338   SDValue RHS = Op.getOperand(1);
5339   // The LowerUREM32 function generates equivalent to the following IL.
5340   // udiv r20, LHS, RHS
5341   // umul r20, r20, RHS
5342   // sub DST, LHS, r20
5343
5344   // udiv r20, LHS, RHS
5345   SDValue r20 = DAG.getNode(ISD::UDIV, DL, OVT, LHS, RHS);
5346
5347   // umul r20, r20, RHS
5348   r20 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r20, RHS);
5349
5350   // sub DST, LHS, r20
5351   SDValue DST = DAG.getNode(ISD::SUB, DL, OVT, LHS, r20);
5352   return DST;
5353 }
5354
5355 SDValue
5356 AMDILTargetLowering::LowerUREM64(SDValue Op, SelectionDAG &DAG) const
5357 {
5358   return SDValue(Op.getNode(), 0);
5359 }
5360
5361
5362 SDValue
5363 AMDILTargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const
5364 {
5365   DebugLoc DL = Op.getDebugLoc();
5366   EVT OVT = Op.getValueType();
5367   MVT INTTY = MVT::i32;
5368   if (OVT == MVT::v2f32) {
5369     INTTY = MVT::v2i32;
5370   } else if (OVT == MVT::v4f32) {
5371     INTTY = MVT::v4i32;
5372   }
5373   SDValue LHS = Op.getOperand(0);
5374   SDValue RHS = Op.getOperand(1);
5375   SDValue DST;
5376   const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
5377       &this->getTargetMachine())->getSubtargetImpl();
5378   if (stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
5379     // TODO: This doesn't work for vector types yet
5380     // The LowerFDIV32 function generates equivalent to the following
5381     // IL:
5382     // mov r20, as_int(LHS)
5383     // mov r21, as_int(RHS)
5384     // and r30, r20, 0x7f800000
5385     // and r31, r20, 0x807FFFFF
5386     // and r32, r21, 0x7f800000
5387     // and r33, r21, 0x807FFFFF
5388     // ieq r40, r30, 0x7F800000
5389     // ieq r41, r31, 0x7F800000
5390     // ieq r42, r32, 0
5391     // ieq r43, r33, 0
5392     // and r50, r20, 0x80000000
5393     // and r51, r21, 0x80000000
5394     // ior r32, r32, 0x3f800000
5395     // ior r33, r33, 0x3f800000
5396     // cmov_logical r32, r42, r50, r32
5397     // cmov_logical r33, r43, r51, r33
5398     // cmov_logical r32, r40, r20, r32
5399     // cmov_logical r33, r41, r21, r33
5400     // ior r50, r40, r41
5401     // ior r51, r42, r43
5402     // ior r50, r50, r51
5403     // inegate r52, r31
5404     // iadd r30, r30, r52
5405     // cmov_logical r30, r50, 0, r30
5406     // div_zeroop(infinity) r21, 1.0, r33
5407     // mul_ieee r20, r32, r21
5408     // and r22, r20, 0x7FFFFFFF
5409     // and r23, r20, 0x80000000
5410     // ishr r60, r22, 0x00000017
5411     // ishr r61, r30, 0x00000017
5412     // iadd r20, r20, r30
5413     // iadd r21, r22, r30
5414     // iadd r60, r60, r61
5415     // ige r42, 0, R60
5416     // ior r41, r23, 0x7F800000
5417     // ige r40, r60, 0x000000FF
5418     // cmov_logical r40, r50, 0, r40
5419     // cmov_logical r20, r42, r23, r20
5420     // cmov_logical DST, r40, r41, r20
5421     // as_float(DST)
5422
5423     // mov r20, as_int(LHS)
5424     SDValue R20 = DAG.getNode(ISDBITCAST, DL, INTTY, LHS);
5425
5426     // mov r21, as_int(RHS)
5427     SDValue R21 = DAG.getNode(ISDBITCAST, DL, INTTY, RHS);
5428
5429     // and r30, r20, 0x7f800000
5430     SDValue R30 = DAG.getNode(ISD::AND, DL, INTTY, R20,
5431         DAG.getConstant(0x7F800000, INTTY));
5432
5433     // and r31, r21, 0x7f800000
5434     SDValue R31 = DAG.getNode(ISD::AND, DL, INTTY, R21,
5435         DAG.getConstant(0x7f800000, INTTY));
5436
5437     // and r32, r20, 0x807FFFFF
5438     SDValue R32 = DAG.getNode(ISD::AND, DL, INTTY, R20,
5439         DAG.getConstant(0x807FFFFF, INTTY));
5440
5441     // and r33, r21, 0x807FFFFF
5442     SDValue R33 = DAG.getNode(ISD::AND, DL, INTTY, R21,
5443         DAG.getConstant(0x807FFFFF, INTTY));
5444
5445     // ieq r40, r30, 0x7F800000
5446     SDValue R40 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
5447         DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
5448         R30, DAG.getConstant(0x7F800000, INTTY));
5449
5450     // ieq r41, r31, 0x7F800000
5451     SDValue R41 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
5452         DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
5453         R31, DAG.getConstant(0x7F800000, INTTY));
5454
5455     // ieq r42, r30, 0
5456     SDValue R42 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
5457         DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
5458         R30, DAG.getConstant(0, INTTY));
5459
5460     // ieq r43, r31, 0
5461     SDValue R43 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
5462         DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
5463         R31, DAG.getConstant(0, INTTY));
5464
5465     // and r50, r20, 0x80000000
5466     SDValue R50 = DAG.getNode(ISD::AND, DL, INTTY, R20,
5467         DAG.getConstant(0x80000000, INTTY));
5468
5469     // and r51, r21, 0x80000000
5470     SDValue R51 = DAG.getNode(ISD::AND, DL, INTTY, R21,
5471         DAG.getConstant(0x80000000, INTTY));
5472
5473     // ior r32, r32, 0x3f800000
5474     R32 = DAG.getNode(ISD::OR, DL, INTTY, R32,
5475         DAG.getConstant(0x3F800000, INTTY));
5476
5477     // ior r33, r33, 0x3f800000
5478     R33 = DAG.getNode(ISD::OR, DL, INTTY, R33,
5479         DAG.getConstant(0x3F800000, INTTY));
5480
5481     // cmov_logical r32, r42, r50, r32
5482     R32 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R42, R50, R32);
5483
5484     // cmov_logical r33, r43, r51, r33
5485     R33 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R43, R51, R33);
5486
5487     // cmov_logical r32, r40, r20, r32
5488     R32 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R40, R20, R32);
5489
5490     // cmov_logical r33, r41, r21, r33
5491     R33 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R41, R21, R33);
5492
5493     // ior r50, r40, r41
5494     R50 = DAG.getNode(ISD::OR, DL, INTTY, R40, R41);
5495
5496     // ior r51, r42, r43
5497     R51 = DAG.getNode(ISD::OR, DL, INTTY, R42, R43);
5498
5499     // ior r50, r50, r51
5500     R50 = DAG.getNode(ISD::OR, DL, INTTY, R50, R51);
5501
5502     // inegate r52, r31
5503     SDValue R52 = DAG.getNode(AMDILISD::INEGATE, DL, INTTY, R31);
5504
5505     // iadd r30, r30, r52
5506     R30 = DAG.getNode(ISD::ADD, DL, INTTY, R30, R52);
5507
5508     // cmov_logical r30, r50, 0, r30
5509     R30 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R50,
5510         DAG.getConstant(0, INTTY), R30);
5511
5512     // div_zeroop(infinity) r21, 1.0, as_float(r33)
5513     R33 = DAG.getNode(ISDBITCAST, DL, OVT, R33);
5514     R21 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT,
5515         DAG.getConstantFP(1.0f, OVT), R33);
5516
5517     // mul_ieee as_int(r20), as_float(r32), r21
5518     R32 = DAG.getNode(ISDBITCAST, DL, OVT, R32);
5519     R20 = DAG.getNode(ISD::FMUL, DL, OVT, R32, R21);
5520     R20 = DAG.getNode(ISDBITCAST, DL, INTTY, R20);
5521
5522     // div_zeroop(infinity) r21, 1.0, as_float(r33)
5523     R33 = DAG.getNode(ISDBITCAST, DL, OVT, R33);
5524     R21 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT,
5525         DAG.getConstantFP(1.0f, OVT), R33);
5526
5527     // mul_ieee as_int(r20), as_float(r32), r21
5528     R32 = DAG.getNode(ISDBITCAST, DL, OVT, R32);
5529     R20 = DAG.getNode(ISD::FMUL, DL, OVT, R32, R21);
5530     R20 = DAG.getNode(ISDBITCAST, DL, INTTY, R20);
5531
5532     // and r22, r20, 0x7FFFFFFF
5533     SDValue R22 = DAG.getNode(ISD::AND, DL, INTTY, R20,
5534         DAG.getConstant(0x7FFFFFFF, INTTY));
5535
5536     // and r23, r20, 0x80000000
5537     SDValue R23 = DAG.getNode(ISD::AND, DL, INTTY, R20,
5538         DAG.getConstant(0x80000000, INTTY));
5539
5540     // ishr r60, r22, 0x00000017
5541     SDValue R60 = DAG.getNode(ISD::SRA, DL, INTTY, R22,
5542         DAG.getConstant(0x00000017, INTTY));
5543
5544     // ishr r61, r30, 0x00000017
5545     SDValue R61 = DAG.getNode(ISD::SRA, DL, INTTY, R30,
5546         DAG.getConstant(0x00000017, INTTY));
5547
5548     // iadd r20, r20, r30
5549     R20 = DAG.getNode(ISD::ADD, DL, INTTY, R20, R30);
5550
5551     // iadd r21, r22, r30
5552     R21 = DAG.getNode(ISD::ADD, DL, INTTY, R22, R30);
5553
5554     // iadd r60, r60, r61
5555     R60 = DAG.getNode(ISD::ADD, DL, INTTY, R60, R61);
5556
5557     // ige r42, 0, R60
5558     R42 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
5559         DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
5560         DAG.getConstant(0, INTTY),
5561         R60);
5562
5563     // ior r41, r23, 0x7F800000
5564     R41 = DAG.getNode(ISD::OR, DL, INTTY, R23,
5565         DAG.getConstant(0x7F800000, INTTY));
5566
5567     // ige r40, r60, 0x000000FF
5568     R40 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
5569         DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
5570         R60,
5571         DAG.getConstant(0x0000000FF, INTTY));
5572
5573     // cmov_logical r40, r50, 0, r40
5574     R40 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R50,
5575         DAG.getConstant(0, INTTY),
5576         R40);
5577
5578     // cmov_logical r20, r42, r23, r20
5579     R20 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R42, R23, R20);
5580
5581     // cmov_logical DST, r40, r41, r20
5582     DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R40, R41, R20);
5583
5584     // as_float(DST)
5585     DST = DAG.getNode(ISDBITCAST, DL, OVT, DST);
5586   } else {
5587     // The following sequence of DAG nodes produce the following IL:
5588     // fabs r1, RHS
5589     // lt r2, 0x1.0p+96f, r1
5590     // cmov_logical r3, r2, 0x1.0p-23f, 1.0f
5591     // mul_ieee r1, RHS, r3
5592     // div_zeroop(infinity) r0, LHS, r1
5593     // mul_ieee DST, r0, r3
5594
5595     // fabs r1, RHS
5596     SDValue r1 = DAG.getNode(ISD::FABS, DL, OVT, RHS);
5597     // lt r2, 0x1.0p+96f, r1
5598     SDValue r2 = DAG.getNode(AMDILISD::CMP, DL, OVT,
5599         DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::f32), MVT::i32),
5600         DAG.getConstant(0x6f800000, INTTY), r1);
5601     // cmov_logical r3, r2, 0x1.0p-23f, 1.0f
5602     SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r2,
5603         DAG.getConstant(0x2f800000, INTTY),
5604         DAG.getConstant(0x3f800000, INTTY));
5605     // mul_ieee r1, RHS, r3
5606     r1 = DAG.getNode(ISD::FMUL, DL, OVT, RHS, r3);
5607     // div_zeroop(infinity) r0, LHS, r1
5608     SDValue r0 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT, LHS, r1);
5609     // mul_ieee DST, r0, r3
5610     DST = DAG.getNode(ISD::FMUL, DL, OVT, r0, r3);
5611   }
5612   return DST;
5613 }
5614
5615 SDValue
5616 AMDILTargetLowering::LowerFDIV64(SDValue Op, SelectionDAG &DAG) const
5617 {
5618   return SDValue(Op.getNode(), 0);
5619 }