src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp

   1 /*
   2  * Copyright 2011 Christoph Bumiller
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included in
  12  * all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
  18  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
  19  * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  20  * SOFTWARE.
  21  */
  22
  23 #include "nv50_ir_target_nvc0.h"
  24
  25 namespace nv50_ir {
  26
  27 Target *getTargetNVC0(unsigned int chipset)
  28 {
  29    return new TargetNVC0(chipset);
  30 }
  31
  32 TargetNVC0::TargetNVC0(unsigned int card)
  33 {
  34    chipset = card;
  35    initOpInfo();
  36 }
  37
  38 // BULTINS / LIBRARY FUNCTIONS:
  39
  40 // lazyness -> will just hardcode everything for the time being
  41
  42 // Will probably make this nicer once we support subroutines properly,
  43 // i.e. when we have an input IR that provides function declarations.
  44
  45 static const uint32_t nvc0_builtin_code[] =
  46 {
  47 // DIV U32: slow unsigned integer division
  48 //
  49 // UNR recurrence (q = a / b):
  50 // look for z such that 2^32 - b <= b * z < 2^32
  51 // then q - 1 <= (a * z) / 2^32 <= q
  52 //
  53 // INPUT:   $r0: dividend, $r1: divisor
  54 // OUTPUT:  $r0: result, $r1: modulus
  55 // CLOBBER: $r2 - $r3, $p0 - $p1
  56 // SIZE:    22 / 14 * 8 bytes
  57 //
  58 #if 1
  59    0x04009c03, 0x78000000,
  60    0x7c209cdd,
  61    0x0010dd18,
  62    0x08309c03, 0x60000000,
  63    0x05605c18,
  64    0x0810dc2a,
  65    0x0c209c43, 0x20040000,
  66    0x0810dc03, 0x50000000,
  67    0x0c209c43, 0x20040000,
  68    0x0810dc03, 0x50000000,
  69    0x0c209c43, 0x20040000,
  70    0x0810dc03, 0x50000000,
  71    0x0c209c43, 0x20040000,
  72    0x0810dc03, 0x50000000,
  73    0x0c209c43, 0x20040000,
  74    0x0000dde4, 0x28000000,
  75    0x08001c43, 0x50000000,
  76    0x05609c18,
  77    0x0010430d,
  78    0x0811dc03, 0x1b0e0000,
  79    0x08104103, 0x48000000,
  80    0x04000002, 0x08000000,
  81    0x0811c003, 0x1b0e0000,
  82    0x08104103, 0x48000000,
  83    0x040000ac,
  84    0x90001dff,
  85 #else
  86    0x0401dc03, 0x1b0e0000,
  87    0x00008003, 0x78000000,
  88    0x0400c003, 0x78000000,
  89    0x0c20c103, 0x48000000,
  90    0x0c108003, 0x60000000,
  91    0x00005c28,
  92    0x00001d18,
  93    0x0031c023, 0x1b0ec000,
  94    0xb000a1e7, 0x40000000,
  95    0x04000003, 0x6000c000,
  96    0x0813dc03, 0x1b000000,
  97    0x0420446c,
  98    0x040004bd,
  99    0x04208003, 0x5800c000,
 100    0x0430c103, 0x4800c000,
 101    0x0ffc5dff,
 102    0x90001dff,
 103 #endif
 104
 105 // DIV S32: slow signed integer division
 106 //
 107 // INPUT:   $r0: dividend, $r1: divisor
 108 // OUTPUT:  $r0: result, $r1: modulus
 109 // CLOBBER: $r2 - $r3, $p0 - $p3
 110 // SIZE:    18 * 8 bytes
 111 //
 112    0xfc05dc23, 0x188e0000,
 113    0xfc17dc23, 0x18c40000,
 114    0x03301e18,
 115    0x07305e18,
 116    0x0401dc03, 0x1b0e0000,
 117    0x00008003, 0x78000000,
 118    0x0400c003, 0x78000000,
 119    0x0c20c103, 0x48000000,
 120    0x0c108003, 0x60000000,
 121    0x00005c28,
 122    0x00001d18,
 123    0x0031c023, 0x1b0ec000,
 124    0xb000a1e7, 0x40000000,
 125    0x04000003, 0x6000c000,
 126    0x0813dc03, 0x1b000000,
 127    0x0420446c,
 128    0x040004bd,
 129    0x04208003, 0x5800c000,
 130    0x0430c103, 0x4800c000,
 131    0x0ffc5dff,
 132    0x01700e18,
 133    0x05704a18,
 134    0x90001dff,
 135
 136 // RCP F64: Newton Raphson reciprocal(x): r_{i+1} = r_i * (2.0 - x * r_i)
 137 //
 138 // INPUT:   $r0d (x)
 139 // OUTPUT:  $r0d (rcp(x))
 140 // CLOBBER: $r2 - $r7
 141 // SIZE:    9 * 8 bytes
 142 //
 143    0x9810dc08,
 144    0x00009c28,
 145    0x4001df18,
 146    0x00019d18,
 147    0x08011e01, 0x200c0000,
 148    0x10209c01, 0x50000000,
 149    0x08011e01, 0x200c0000,
 150    0x10209c01, 0x50000000,
 151    0x08011e01, 0x200c0000,
 152    0x10201c01, 0x50000000,
 153    0x00001de7, 0x90000000,
 154
 155 // RSQ F64: Newton Raphson rsqrt(x): r_{i+1} = r_i * (1.5 - 0.5 * x * r_i * r_i)
 156 //
 157 // INPUT:   $r0d (x)
 158 // OUTPUT:  $r0d (rsqrt(x))
 159 // CLOBBER: $r2 - $r7
 160 // SIZE:    14 * 8 bytes
 161 //
 162    0x9c10dc08,
 163    0x00009c28,
 164    0x00019d18,
 165    0x3fe1df18,
 166    0x18001c01, 0x50000000,
 167    0x0001dde2, 0x18ffe000,
 168    0x08211c01, 0x50000000,
 169    0x10011e01, 0x200c0000,
 170    0x10209c01, 0x50000000,
 171    0x08211c01, 0x50000000,
 172    0x10011e01, 0x200c0000,
 173    0x10209c01, 0x50000000,
 174    0x08211c01, 0x50000000,
 175    0x10011e01, 0x200c0000,
 176    0x10201c01, 0x50000000,
 177    0x00001de7, 0x90000000,
 178 };
 179
 180 static const uint16_t nvc0_builtin_offsets[NVC0_BUILTIN_COUNT] =
 181 {
 182    0,
 183    8 * (22),
 184    8 * (22 + 18),
 185    8 * (22 + 18 + 9)
 186 };
 187
 188 void
 189 TargetNVC0::getBuiltinCode(const uint32_t **code, uint32_t *size) const
 190 {
 191    *code = &nvc0_builtin_code[0];
 192    *size = sizeof(nvc0_builtin_code);
 193 }
 194
 195 uint32_t
 196 TargetNVC0::getBuiltinOffset(int builtin) const
 197 {
 198    assert(builtin < NVC0_BUILTIN_COUNT);
 199    return nvc0_builtin_offsets[builtin];
 200 }
 201
 202 struct opProperties
 203 {
 204    operation op;
 205    unsigned int mNeg   : 4;
 206    unsigned int mAbs   : 4;
 207    unsigned int mNot   : 4;
 208    unsigned int mSat   : 4;
 209    unsigned int fConst : 3;
 210    unsigned int fImmd  : 4; // last bit indicates if full immediate is suppoted
 211 };
 212
 213 static const struct opProperties _initProps[] =
 214 {
 215    //           neg  abs  not  sat  c[]  imm
 216    { OP_ADD,    0x3, 0x3, 0x0, 0x8, 0x2, 0x2 | 0x8 },
 217    { OP_SUB,    0x3, 0x3, 0x0, 0x0, 0x2, 0x2 | 0x8 },
 218    { OP_MUL,    0x3, 0x0, 0x0, 0x8, 0x2, 0x2 | 0x8 },
 219    { OP_MAX,    0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
 220    { OP_MIN,    0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
 221    { OP_MAD,    0x7, 0x0, 0x0, 0x8, 0x6, 0x2 | 0x8 }, // special c[] constraint
 222    { OP_ABS,    0x0, 0x0, 0x0, 0x0, 0x1, 0x0 },
 223    { OP_NEG,    0x0, 0x1, 0x0, 0x0, 0x1, 0x0 },
 224    { OP_CVT,    0x1, 0x1, 0x0, 0x8, 0x1, 0x0 },
 225    { OP_AND,    0x0, 0x0, 0x3, 0x0, 0x2, 0x2 | 0x8 },
 226    { OP_OR,     0x0, 0x0, 0x3, 0x0, 0x2, 0x2 | 0x8 },
 227    { OP_XOR,    0x0, 0x0, 0x3, 0x0, 0x2, 0x2 | 0x8 },
 228    { OP_SHL,    0x0, 0x0, 0x0, 0x0, 0x2, 0x2 },
 229    { OP_SHR,    0x0, 0x0, 0x0, 0x0, 0x2, 0x2 },
 230    { OP_SET,    0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
 231    { OP_SLCT,   0x4, 0x0, 0x0, 0x0, 0x6, 0x2 }, // special c[] constraint
 232    { OP_PREEX2, 0x1, 0x1, 0x0, 0x0, 0x1, 0x1 },
 233    { OP_PRESIN, 0x1, 0x1, 0x0, 0x0, 0x1, 0x1 },
 234    { OP_COS,    0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
 235    { OP_SIN,    0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
 236    { OP_EX2,    0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
 237    { OP_LG2,    0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
 238    { OP_RCP,    0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
 239    { OP_RSQ,    0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
 240    { OP_DFDX,   0x1, 0x0, 0x0, 0x0, 0x0, 0x0 },
 241    { OP_DFDY,   0x1, 0x0, 0x0, 0x0, 0x0, 0x0 },
 242    { OP_CALL,   0x0, 0x0, 0x0, 0x0, 0x1, 0x0 },
 243    { OP_INSBF,  0x0, 0x0, 0x0, 0x0, 0x0, 0x4 },
 244    { OP_SET_AND, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
 245    { OP_SET_OR, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
 246    { OP_SET_XOR, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
 247    // saturate only:
 248    { OP_LINTERP, 0x0, 0x0, 0x0, 0x8, 0x0, 0x0 },
 249    { OP_PINTERP, 0x0, 0x0, 0x0, 0x8, 0x0, 0x0 },
 250 };
 251
 252 void TargetNVC0::initOpInfo()
 253 {
 254    unsigned int i, j;
 255
 256    static const uint32_t commutative[(OP_LAST + 31) / 32] =
 257    {
 258       // ADD, MAD, MUL, AND, OR, XOR, MAX, MIN
 259       0x0670ca00, 0x0000003f, 0x00000000
 260    };
 261
 262    static const uint32_t shortForm[(OP_LAST + 31) / 32] =
 263    {
 264       // ADD, MAD, MUL, AND, OR, XOR, PRESIN, PREEX2, SFN, CVT, PINTERP, MOV
 265       0x0670ca00, 0x00000000, 0x00000000
 266    };
 267
 268    static const operation noDest[] =
 269    {
 270       OP_STORE, OP_WRSV, OP_EXPORT, OP_BRA, OP_CALL, OP_RET, OP_EXIT,
 271       OP_DISCARD, OP_CONT, OP_BREAK, OP_PRECONT, OP_PREBREAK, OP_PRERET,
 272       OP_JOIN, OP_JOINAT, OP_BRKPT, OP_MEMBAR, OP_EMIT, OP_RESTART,
 273       OP_QUADON, OP_QUADPOP
 274    };
 275
 276    joinAnterior = false;
 277
 278    for (i = 0; i < DATA_FILE_COUNT; ++i)
 279       nativeFileMap[i] = (DataFile)i;
 280    nativeFileMap[FILE_ADDRESS] = FILE_GPR;
 281
 282    for (i = 0; i < OP_LAST; ++i) {
 283       opInfo[i].variants = NULL;
 284       opInfo[i].op = (operation)i;
 285       opInfo[i].srcTypes = 1 << (int)TYPE_F32;
 286       opInfo[i].dstTypes = 1 << (int)TYPE_F32;
 287       opInfo[i].immdBits = 0;
 288       opInfo[i].srcNr = operationSrcNr[i];
 289
 290       for (j = 0; j < opInfo[i].srcNr; ++j) {
 291          opInfo[i].srcMods[j] = 0;
 292          opInfo[i].srcFiles[j] = 1 << (int)FILE_GPR;
 293       }
 294       opInfo[i].dstMods = 0;
 295       opInfo[i].dstFiles = 1 << (int)FILE_GPR;
 296
 297       opInfo[i].hasDest = 1;
 298       opInfo[i].vector = (i >= OP_TEX && i <= OP_TEXCSAA);
 299       opInfo[i].commutative = (commutative[i / 32] >> (i % 32)) & 1;
 300       opInfo[i].pseudo = (i < OP_MOV);
 301       opInfo[i].predicate = !opInfo[i].pseudo;
 302       opInfo[i].flow = (i >= OP_BRA && i <= OP_JOIN);
 303       opInfo[i].minEncSize = (shortForm[i / 32] & (1 << (i % 32))) ? 4 : 8;
 304    }
 305    for (i = 0; i < sizeof(noDest) / sizeof(noDest[0]); ++i)
 306       opInfo[noDest[i]].hasDest = 0;
 307
 308    for (i = 0; i < sizeof(_initProps) / sizeof(_initProps[0]); ++i) {
 309       const struct opProperties *prop = &_initProps[i];
 310
 311       for (int s = 0; s < 3; ++s) {
 312          if (prop->mNeg & (1 << s))
 313             opInfo[prop->op].srcMods[s] |= NV50_IR_MOD_NEG;
 314          if (prop->mAbs & (1 << s))
 315             opInfo[prop->op].srcMods[s] |= NV50_IR_MOD_ABS;
 316          if (prop->mNot & (1 << s))
 317             opInfo[prop->op].srcMods[s] |= NV50_IR_MOD_NOT;
 318          if (prop->fConst & (1 << s))
 319             opInfo[prop->op].srcFiles[s] |= 1 << (int)FILE_MEMORY_CONST;
 320          if (prop->fImmd & (1 << s))
 321             opInfo[prop->op].srcFiles[s] |= 1 << (int)FILE_IMMEDIATE;
 322          if (prop->fImmd & 8)
 323             opInfo[prop->op].immdBits = 0xffffffff;
 324       }
 325       if (prop->mSat & 8)
 326          opInfo[prop->op].dstMods = NV50_IR_MOD_SAT;
 327    }
 328 }
 329
 330 unsigned int
 331 TargetNVC0::getFileSize(DataFile file) const
 332 {
 333    switch (file) {
 334    case FILE_NULL:          return 0;
 335    case FILE_GPR:           return 63;
 336    case FILE_PREDICATE:     return 7;
 337    case FILE_FLAGS:         return 1;
 338    case FILE_ADDRESS:       return 0;
 339    case FILE_IMMEDIATE:     return 0;
 340    case FILE_MEMORY_CONST:  return 65536;
 341    case FILE_SHADER_INPUT:  return 0x400;
 342    case FILE_SHADER_OUTPUT: return 0x400;
 343    case FILE_MEMORY_GLOBAL: return 0xffffffff;
 344    case FILE_MEMORY_SHARED: return 16 << 10;
 345    case FILE_MEMORY_LOCAL:  return 48 << 10;
 346    case FILE_SYSTEM_VALUE:  return 32;
 347    default:
 348       assert(!"invalid file");
 349       return 0;
 350    }
 351 }
 352
 353 unsigned int
 354 TargetNVC0::getFileUnit(DataFile file) const
 355 {
 356    if (file == FILE_GPR || file == FILE_ADDRESS || file == FILE_SYSTEM_VALUE)
 357       return 2;
 358    return 0;
 359 }
 360
 361 uint32_t
 362 TargetNVC0::getSVAddress(DataFile shaderFile, const Symbol *sym) const
 363 {
 364    const int idx = sym->reg.data.sv.index;
 365    const SVSemantic sv = sym->reg.data.sv.sv;
 366
 367    const bool isInput = shaderFile == FILE_SHADER_INPUT;
 368
 369    switch (sv) {
 370    case SV_POSITION:       return 0x070 + idx * 4;
 371    case SV_INSTANCE_ID:    return 0x2f8;
 372    case SV_VERTEX_ID:      return 0x2fc;
 373    case SV_PRIMITIVE_ID:   return isInput ? 0x060 : 0x040;
 374    case SV_LAYER:          return 0x064;
 375    case SV_VIEWPORT_INDEX: return 0x068;
 376    case SV_POINT_SIZE:     return 0x06c;
 377    case SV_CLIP_DISTANCE:  return 0x2c0 + idx * 4;
 378    case SV_POINT_COORD:    return 0x2e0 + idx * 4;
 379    case SV_FACE:           return 0x3fc;
 380    case SV_TESS_FACTOR:    return 0x000 + idx * 4;
 381    case SV_TESS_COORD:     return 0x2f0 + idx * 4;
 382    default:
 383       return 0xffffffff;
 384    }
 385 }
 386
 387 bool
 388 TargetNVC0::insnCanLoad(const Instruction *i, int s,
 389                         const Instruction *ld) const
 390 {
 391    DataFile sf = ld->src[0].getFile();
 392
 393    // immediate 0 can be represented by GPR $r63
 394    if (sf == FILE_IMMEDIATE && ld->getSrc(0)->reg.data.u64 == 0)
 395       return (!i->asTex() && i->op != OP_EXPORT && i->op != OP_STORE);
 396
 397    if (s > opInfo[i->op].srcNr)
 398       return false;
 399    if (!(opInfo[i->op].srcFiles[s] & (1 << (int)sf)))
 400       return false;
 401
 402    // indirect loads can only be done by OP_LOAD/VFETCH/INTERP on nvc0
 403    if (ld->src[0].isIndirect(0))
 404       return false;
 405
 406    for (int k = 0; i->srcExists(k); ++k) {
 407       if (i->src[k].getFile() == FILE_IMMEDIATE) {
 408          if (i->getSrc(k)->reg.data.u64 != 0)
 409             return false;
 410       } else
 411       if (i->src[k].getFile() != FILE_GPR &&
 412           i->src[k].getFile() != FILE_PREDICATE) {
 413          return false;
 414       }
 415    }
 416
 417    // not all instructions support full 32 bit immediates
 418    if (sf == FILE_IMMEDIATE) {
 419       Storage &reg = ld->getSrc(0)->asImm()->reg;
 420
 421       if (opInfo[i->op].immdBits != 0xffffffff) {
 422          if (i->sType == TYPE_F32) {
 423             if (reg.data.u32 & 0xfff)
 424                return false;
 425          } else
 426          if (i->sType == TYPE_S32 || i->sType == TYPE_U32) {
 427             // with u32, 0xfffff counts as 0xffffffff as well
 428             if (reg.data.s32 > 0x7ffff || reg.data.s32 < -0x80000)
 429                return false;
 430          }
 431       } else
 432       if (i->op == OP_MAD || i->op == OP_FMA) {
 433          // requires src == dst, cannot decide before RA
 434          // (except if we implement more constraints)
 435          if (ld->getSrc(0)->asImm()->reg.data.u32 & 0xfff)
 436             return false;
 437       }
 438    }
 439
 440    return true;
 441 }
 442
 443 bool
 444 TargetNVC0::isAccessSupported(DataFile file, DataType ty) const
 445 {
 446    if (ty == TYPE_NONE)
 447       return false;
 448    if (ty == TYPE_B96)
 449       return (file == FILE_SHADER_INPUT) || (file == FILE_SHADER_OUTPUT);
 450    return true;
 451 }
 452
 453 bool
 454 TargetNVC0::isOpSupported(operation op, DataType ty) const
 455 {
 456    if ((op == OP_MAD || op == OP_FMA) && (ty != TYPE_F32))
 457       return false;
 458    if (op == OP_SAD && ty != TYPE_S32)
 459       return false;
 460    if (op == OP_POW || op == OP_SQRT || op == OP_DIV || op == OP_MOD)
 461       return false;
 462    return true;
 463 }
 464
 465 bool
 466 TargetNVC0::isModSupported(const Instruction *insn, int s, Modifier mod) const
 467 {
 468    if (!isFloatType(insn->dType)) {
 469       switch (insn->op) {
 470       case OP_ABS:
 471       case OP_NEG:
 472       case OP_CVT:
 473       case OP_CEIL:
 474       case OP_FLOOR:
 475       case OP_TRUNC:
 476       case OP_AND:
 477       case OP_OR:
 478       case OP_XOR:
 479          break;
 480       case OP_ADD:
 481          if (insn->src[s ? 0 : 1].mod.neg())
 482             return false;
 483          break;
 484       case OP_SUB:
 485          if (s == 0)
 486             return insn->src[1].mod.neg() ? false : true;
 487          break;
 488       default:
 489          return false;
 490       }
 491    }
 492    if (s > 3)
 493       return false;
 494    return (mod & Modifier(opInfo[insn->op].srcMods[s])) == mod;
 495 }
 496
 497 bool
 498 TargetNVC0::mayPredicate(const Instruction *insn, const Value *pred) const
 499 {
 500    if (insn->getPredicate())
 501       return false;
 502    return opInfo[insn->op].predicate;
 503 }
 504
 505 bool
 506 TargetNVC0::isSatSupported(const Instruction *insn) const
 507 {
 508    if (insn->op == OP_CVT)
 509       return true;
 510    if (!(opInfo[insn->op].dstMods & NV50_IR_MOD_SAT))
 511       return false;
 512
 513    if (insn->dType == TYPE_U32)
 514       return (insn->op == OP_ADD) || (insn->op == OP_MAD);
 515
 516    return insn->dType == TYPE_F32;
 517 }
 518
 519 bool
 520 TargetNVC0::isPostMultiplySupported(operation op, float f, int& e) const
 521 {
 522    if (op != OP_MUL)
 523       return false;
 524    f = fabsf(f);
 525    e = static_cast<int>(log2f(f));
 526    if (e < -3 || e > 3)
 527       return false;
 528    return f == exp2f(static_cast<float>(e));
 529 }
 530
 531 // TODO: better values
 532 int TargetNVC0::getLatency(const Instruction *i) const
 533 {
 534    if (i->op == OP_LOAD) {
 535       if (i->cache == CACHE_CV)
 536          return 700;
 537       return 48;
 538    }
 539    return 24;
 540 }
 541
 542 // These are "inverse" throughput values, i.e. the number of cycles required
 543 // to issue a specific instruction for a full warp (32 threads).
 544 //
 545 // Assuming we have more than 1 warp in flight, a higher issue latency results
 546 // in a lower result latency since the MP will have spent more time with other
 547 // warps.
 548 // This also helps to determine the number of cycles between instructions in
 549 // a single warp.
 550 //
 551 int TargetNVC0::getThroughput(const Instruction *i) const
 552 {
 553    // TODO: better values
 554    if (i->dType == TYPE_F32) {
 555       switch (i->op) {
 556       case OP_ADD:
 557       case OP_MUL:
 558       case OP_MAD:
 559       case OP_FMA:
 560          return 1;
 561       case OP_CVT:
 562       case OP_CEIL:
 563       case OP_FLOOR:
 564       case OP_TRUNC:
 565       case OP_SET:
 566       case OP_SLCT:
 567       case OP_MIN:
 568       case OP_MAX:
 569          return 2;
 570       case OP_RCP:
 571       case OP_RSQ:
 572       case OP_LG2:
 573       case OP_SIN:
 574       case OP_COS:
 575       case OP_PRESIN:
 576       case OP_PREEX2:
 577       default:
 578          return 8;
 579       }
 580    } else
 581    if (i->dType == TYPE_U32 || i->dType == TYPE_S32) {
 582       switch (i->op) {
 583       case OP_ADD:
 584       case OP_AND:
 585       case OP_OR:
 586       case OP_XOR:
 587       case OP_NOT:
 588          return 1;
 589       case OP_MUL:
 590       case OP_MAD:
 591       case OP_CVT:
 592       case OP_SET:
 593       case OP_SLCT:
 594       case OP_SHL:
 595       case OP_SHR:
 596       case OP_NEG:
 597       case OP_ABS:
 598       case OP_MIN:
 599       case OP_MAX:
 600       default:
 601          return 2;
 602       }
 603    } else
 604    if (i->dType == TYPE_F64) {
 605       return 2;
 606    } else {
 607       return 1;
 608    }
 609 }
 610
 611 } // namespace nv50_ir