src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp

   1 /*
   2  * Copyright 2011 Christoph Bumiller
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included in
  12  * all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
  18  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
  19  * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  20  * SOFTWARE.
  21  */
  22
  23 #include "nv50_ir_target_nvc0.h"
  24
  25 namespace nv50_ir {
  26
  27 Target *getTargetNVC0(unsigned int chipset)
  28 {
  29    return new TargetNVC0(chipset);
  30 }
  31
  32 TargetNVC0::TargetNVC0(unsigned int card)
  33 {
  34    chipset = card;
  35    initOpInfo();
  36 }
  37
  38 // BULTINS / LIBRARY FUNCTIONS:
  39
  40 // lazyness -> will just hardcode everything for the time being
  41
  42 // Will probably make this nicer once we support subroutines properly,
  43 // i.e. when we have an input IR that provides function declarations.
  44
  45 static const uint32_t nvc0_builtin_code[] =
  46 {
  47 // DIV U32: slow unsigned integer division
  48 //
  49 // UNR recurrence (q = a / b):
  50 // look for z such that 2^32 - b <= b * z < 2^32
  51 // then q - 1 <= (a * z) / 2^32 <= q
  52 //
  53 // INPUT:   $r0: dividend, $r1: divisor
  54 // OUTPUT:  $r0: result, $r1: modulus
  55 // CLOBBER: $r2 - $r3, $p0 - $p1
  56 // SIZE:    22 / 14 * 8 bytes
  57 //
  58 #if 1
  59    0x04009c03, 0x78000000,
  60    0x7c209cdd,
  61    0x0010dd18,
  62    0x08309c03, 0x60000000,
  63    0x05605c18,
  64    0x0810dc2a,
  65    0x0c209c43, 0x20040000,
  66    0x0810dc03, 0x50000000,
  67    0x0c209c43, 0x20040000,
  68    0x0810dc03, 0x50000000,
  69    0x0c209c43, 0x20040000,
  70    0x0810dc03, 0x50000000,
  71    0x0c209c43, 0x20040000,
  72    0x0810dc03, 0x50000000,
  73    0x0c209c43, 0x20040000,
  74    0x0000dde4, 0x28000000,
  75    0x08001c43, 0x50000000,
  76    0x05609c18,
  77    0x0010430d,
  78    0x0811dc03, 0x1b0e0000,
  79    0x08104103, 0x48000000,
  80    0x04000002, 0x08000000,
  81    0x0811c003, 0x1b0e0000,
  82    0x08104103, 0x48000000,
  83    0x040000ac,
  84    0x90001dff,
  85 #else
  86    0x0401dc03, 0x1b0e0000,
  87    0x00008003, 0x78000000,
  88    0x0400c003, 0x78000000,
  89    0x0c20c103, 0x48000000,
  90    0x0c108003, 0x60000000,
  91    0x00005c28,
  92    0x00001d18,
  93    0x0031c023, 0x1b0ec000,
  94    0xb000a1e7, 0x40000000,
  95    0x04000003, 0x6000c000,
  96    0x0813dc03, 0x1b000000,
  97    0x0420446c,
  98    0x040004bd,
  99    0x04208003, 0x5800c000,
 100    0x0430c103, 0x4800c000,
 101    0x0ffc5dff,
 102    0x90001dff,
 103 #endif
 104
 105 // DIV S32: slow signed integer division
 106 //
 107 // INPUT:   $r0: dividend, $r1: divisor
 108 // OUTPUT:  $r0: result, $r1: modulus
 109 // CLOBBER: $r2 - $r3, $p0 - $p3
 110 // SIZE:    18 * 8 bytes
 111 //
 112    0xfc05dc23, 0x188e0000,
 113    0xfc17dc23, 0x18c40000,
 114    0x03301e18,
 115    0x07305e18,
 116    0x0401dc03, 0x1b0e0000,
 117    0x00008003, 0x78000000,
 118    0x0400c003, 0x78000000,
 119    0x0c20c103, 0x48000000,
 120    0x0c108003, 0x60000000,
 121    0x00005c28,
 122    0x00001d18,
 123    0x0031c023, 0x1b0ec000,
 124    0xb000a1e7, 0x40000000,
 125    0x04000003, 0x6000c000,
 126    0x0813dc03, 0x1b000000,
 127    0x0420446c,
 128    0x040004bd,
 129    0x04208003, 0x5800c000,
 130    0x0430c103, 0x4800c000,
 131    0x0ffc5dff,
 132    0x01700e18,
 133    0x05704a18,
 134    0x90001dff,
 135
 136 // RCP F64: Newton Raphson reciprocal(x): r_{i+1} = r_i * (2.0 - x * r_i)
 137 //
 138 // INPUT:   $r0d (x)
 139 // OUTPUT:  $r0d (rcp(x))
 140 // CLOBBER: $r2 - $r7
 141 // SIZE:    9 * 8 bytes
 142 //
 143    0x9810dc08,
 144    0x00009c28,
 145    0x4001df18,
 146    0x00019d18,
 147    0x08011e01, 0x200c0000,
 148    0x10209c01, 0x50000000,
 149    0x08011e01, 0x200c0000,
 150    0x10209c01, 0x50000000,
 151    0x08011e01, 0x200c0000,
 152    0x10201c01, 0x50000000,
 153    0x00001de7, 0x90000000,
 154
 155 // RSQ F64: Newton Raphson rsqrt(x): r_{i+1} = r_i * (1.5 - 0.5 * x * r_i * r_i)
 156 //
 157 // INPUT:   $r0d (x)
 158 // OUTPUT:  $r0d (rsqrt(x))
 159 // CLOBBER: $r2 - $r7
 160 // SIZE:    14 * 8 bytes
 161 //
 162    0x9c10dc08,
 163    0x00009c28,
 164    0x00019d18,
 165    0x3fe1df18,
 166    0x18001c01, 0x50000000,
 167    0x0001dde2, 0x18ffe000,
 168    0x08211c01, 0x50000000,
 169    0x10011e01, 0x200c0000,
 170    0x10209c01, 0x50000000,
 171    0x08211c01, 0x50000000,
 172    0x10011e01, 0x200c0000,
 173    0x10209c01, 0x50000000,
 174    0x08211c01, 0x50000000,
 175    0x10011e01, 0x200c0000,
 176    0x10201c01, 0x50000000,
 177    0x00001de7, 0x90000000,
 178 };
 179
 180 static const uint16_t nvc0_builtin_offsets[NVC0_BUILTIN_COUNT] =
 181 {
 182    0,
 183    8 * (22),
 184    8 * (22 + 18),
 185    8 * (22 + 18 + 9)
 186 };
 187
 188 void
 189 TargetNVC0::getBuiltinCode(const uint32_t **code, uint32_t *size) const
 190 {
 191    *code = &nvc0_builtin_code[0];
 192    *size = sizeof(nvc0_builtin_code);
 193 }
 194
 195 uint32_t
 196 TargetNVC0::getBuiltinOffset(int builtin) const
 197 {
 198    assert(builtin < NVC0_BUILTIN_COUNT);
 199    return nvc0_builtin_offsets[builtin];
 200 }
 201
 202 struct opProperties
 203 {
 204    operation op;
 205    unsigned int mNeg   : 4;
 206    unsigned int mAbs   : 4;
 207    unsigned int mNot   : 4;
 208    unsigned int mSat   : 4;
 209    unsigned int fConst : 3;
 210    unsigned int fImmd  : 4; // last bit indicates if full immediate is suppoted
 211 };
 212
 213 static const struct opProperties _initProps[] =
 214 {
 215    //           neg  abs  not  sat  c[]  imm
 216    { OP_ADD,    0x3, 0x3, 0x0, 0x8, 0x2, 0x2 | 0x8 },
 217    { OP_SUB,    0x3, 0x3, 0x0, 0x0, 0x2, 0x2 | 0x8 },
 218    { OP_MUL,    0x3, 0x0, 0x0, 0x8, 0x2, 0x2 | 0x8 },
 219    { OP_MAX,    0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
 220    { OP_MIN,    0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
 221    { OP_MAD,    0x7, 0x0, 0x0, 0x8, 0x6, 0x2 | 0x8 }, // special c[] constraint
 222    { OP_ABS,    0x0, 0x0, 0x0, 0x0, 0x1, 0x0 },
 223    { OP_NEG,    0x0, 0x1, 0x0, 0x0, 0x1, 0x0 },
 224    { OP_CVT,    0x1, 0x1, 0x0, 0x8, 0x1, 0x0 },
 225    { OP_AND,    0x0, 0x0, 0x3, 0x0, 0x2, 0x2 | 0x8 },
 226    { OP_OR,     0x0, 0x0, 0x3, 0x0, 0x2, 0x2 | 0x8 },
 227    { OP_XOR,    0x0, 0x0, 0x3, 0x0, 0x2, 0x2 | 0x8 },
 228    { OP_SHL,    0x0, 0x0, 0x0, 0x0, 0x2, 0x2 },
 229    { OP_SHR,    0x0, 0x0, 0x0, 0x0, 0x2, 0x2 },
 230    { OP_SET,    0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
 231    { OP_SLCT,   0x4, 0x0, 0x0, 0x0, 0x6, 0x2 }, // special c[] constraint
 232    { OP_PREEX2, 0x1, 0x1, 0x0, 0x0, 0x1, 0x1 },
 233    { OP_PRESIN, 0x1, 0x1, 0x0, 0x0, 0x1, 0x1 },
 234    { OP_COS,    0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
 235    { OP_SIN,    0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
 236    { OP_EX2,    0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
 237    { OP_LG2,    0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
 238    { OP_RCP,    0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
 239    { OP_RSQ,    0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
 240    { OP_DFDX,   0x1, 0x0, 0x0, 0x0, 0x0, 0x0 },
 241    { OP_DFDY,   0x1, 0x0, 0x0, 0x0, 0x0, 0x0 },
 242    { OP_CALL,   0x0, 0x0, 0x0, 0x0, 0x1, 0x0 },
 243    { OP_INSBF,  0x0, 0x0, 0x0, 0x0, 0x0, 0x4 },
 244    { OP_SET_AND, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
 245    { OP_SET_OR, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
 246    { OP_SET_XOR, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
 247    // saturate only:
 248    { OP_LINTERP, 0x0, 0x0, 0x0, 0x8, 0x0, 0x0 },
 249    { OP_PINTERP, 0x0, 0x0, 0x0, 0x8, 0x0, 0x0 },
 250 };
 251
 252 void TargetNVC0::initOpInfo()
 253 {
 254    unsigned int i, j;
 255
 256    static const uint32_t commutative[(OP_LAST + 31) / 32] =
 257    {
 258       // ADD, MAD, MUL, AND, OR, XOR, MAX, MIN
 259       0x0670ca00, 0x0000003f, 0x00000000
 260    };
 261
 262    static const uint32_t shortForm[(OP_LAST + 31) / 32] =
 263    {
 264       // ADD, MAD, MUL, AND, OR, XOR, PRESIN, PREEX2, SFN, CVT, PINTERP, MOV
 265       0x0670ca00, 0x00000000, 0x00000000
 266    };
 267
 268    static const operation noDest[] =
 269    {
 270       OP_STORE, OP_WRSV, OP_EXPORT, OP_BRA, OP_CALL, OP_RET, OP_EXIT,
 271       OP_DISCARD, OP_CONT, OP_BREAK, OP_PRECONT, OP_PREBREAK, OP_PRERET,
 272       OP_JOIN, OP_JOINAT, OP_BRKPT, OP_MEMBAR, OP_EMIT, OP_RESTART,
 273       OP_QUADON, OP_QUADPOP
 274    };
 275
 276    joinAnterior = false;
 277
 278    for (i = 0; i < DATA_FILE_COUNT; ++i)
 279       nativeFileMap[i] = (DataFile)i;
 280    nativeFileMap[FILE_ADDRESS] = FILE_GPR;
 281
 282    for (i = 0; i < OP_LAST; ++i) {
 283       opInfo[i].variants = NULL;
 284       opInfo[i].op = (operation)i;
 285       opInfo[i].srcTypes = 1 << (int)TYPE_F32;
 286       opInfo[i].dstTypes = 1 << (int)TYPE_F32;
 287       opInfo[i].immdBits = 0;
 288       opInfo[i].srcNr = operationSrcNr[i];
 289
 290       for (j = 0; j < opInfo[i].srcNr; ++j) {
 291          opInfo[i].srcMods[j] = 0;
 292          opInfo[i].srcFiles[j] = 1 << (int)FILE_GPR;
 293       }
 294       opInfo[i].dstMods = 0;
 295       opInfo[i].dstFiles = 1 << (int)FILE_GPR;
 296
 297       opInfo[i].hasDest = 1;
 298       opInfo[i].vector = (i >= OP_TEX && i <= OP_TEXCSAA);
 299       opInfo[i].commutative = (commutative[i / 32] >> (i % 32)) & 1;
 300       opInfo[i].pseudo = (i < OP_MOV);
 301       opInfo[i].predicate = !opInfo[i].pseudo;
 302       opInfo[i].flow = (i >= OP_BRA && i <= OP_JOIN);
 303       opInfo[i].minEncSize = (shortForm[i / 32] & (1 << (i % 32))) ? 4 : 8;
 304    }
 305    for (i = 0; i < sizeof(noDest) / sizeof(noDest[0]); ++i)
 306       opInfo[noDest[i]].hasDest = 0;
 307
 308    for (i = 0; i < sizeof(_initProps) / sizeof(_initProps[0]); ++i) {
 309       const struct opProperties *prop = &_initProps[i];
 310
 311       for (int s = 0; s < 3; ++s) {
 312          if (prop->mNeg & (1 << s))
 313             opInfo[prop->op].srcMods[s] |= NV50_IR_MOD_NEG;
 314          if (prop->mAbs & (1 << s))
 315             opInfo[prop->op].srcMods[s] |= NV50_IR_MOD_ABS;
 316          if (prop->mNot & (1 << s))
 317             opInfo[prop->op].srcMods[s] |= NV50_IR_MOD_NOT;
 318          if (prop->fConst & (1 << s))
 319             opInfo[prop->op].srcFiles[s] |= 1 << (int)FILE_MEMORY_CONST;
 320          if (prop->fImmd & (1 << s))
 321             opInfo[prop->op].srcFiles[s] |= 1 << (int)FILE_IMMEDIATE;
 322          if (prop->fImmd & 8)
 323             opInfo[prop->op].immdBits = 0xffffffff;
 324       }
 325       if (prop->mSat & 8)
 326          opInfo[prop->op].dstMods = NV50_IR_MOD_SAT;
 327    }
 328 }
 329
 330 unsigned int
 331 TargetNVC0::getFileSize(DataFile file) const
 332 {
 333    switch (file) {
 334    case FILE_NULL:          return 0;
 335    case FILE_GPR:           return 63;
 336    case FILE_PREDICATE:     return 7;
 337    case FILE_FLAGS:         return 1;
 338    case FILE_ADDRESS:       return 0;
 339    case FILE_IMMEDIATE:     return 0;
 340    case FILE_MEMORY_CONST:  return 65536;
 341    case FILE_SHADER_INPUT:  return 0x400;
 342    case FILE_SHADER_OUTPUT: return 0x400;
 343    case FILE_MEMORY_GLOBAL: return 0xffffffff;
 344    case FILE_MEMORY_SHARED: return 16 << 10;
 345    case FILE_MEMORY_LOCAL:  return 48 << 10;
 346    case FILE_SYSTEM_VALUE:  return 32;
 347    default:
 348       assert(!"invalid file");
 349       return 0;
 350    }
 351 }
 352
 353 unsigned int
 354 TargetNVC0::getFileUnit(DataFile file) const
 355 {
 356    if (file == FILE_GPR || file == FILE_ADDRESS || file == FILE_SYSTEM_VALUE)
 357       return 2;
 358    return 0;
 359 }
 360
 361 uint32_t
 362 TargetNVC0::getSVAddress(DataFile shaderFile, const Symbol *sym) const
 363 {
 364    const int idx = sym->reg.data.sv.index;
 365    const SVSemantic sv = sym->reg.data.sv.sv;
 366
 367    const bool isInput = shaderFile == FILE_SHADER_INPUT;
 368
 369    switch (sv) {
 370    case SV_POSITION:       return 0x070 + idx * 4;
 371    case SV_INSTANCE_ID:    return 0x2f8;
 372    case SV_VERTEX_ID:      return 0x2fc;
 373    case SV_PRIMITIVE_ID:   return isInput ? 0x060 : 0x040;
 374    case SV_LAYER:          return 0x064;
 375    case SV_VIEWPORT_INDEX: return 0x068;
 376    case SV_POINT_SIZE:     return 0x06c;
 377    case SV_CLIP_DISTANCE:  return 0x2c0 + idx * 4;
 378    case SV_POINT_COORD:    return 0x2e0 + idx * 4;
 379    case SV_FACE:           return 0x3fc;
 380    case SV_TESS_FACTOR:    return 0x000 + idx * 4;
 381    case SV_TESS_COORD:     return 0x2f0 + idx * 4;
 382    default:
 383       return 0xffffffff;
 384    }
 385 }
 386
 387 bool
 388 TargetNVC0::insnCanLoad(const Instruction *i, int s,
 389                         const Instruction *ld) const
 390 {
 391    DataFile sf = ld->src[0].getFile();
 392
 393    // immediate 0 can be represented by GPR $r63
 394    if (sf == FILE_IMMEDIATE && ld->getSrc(0)->reg.data.u64 == 0)
 395       return (!i->asTex() && i->op != OP_EXPORT && i->op != OP_STORE);
 396
 397    if (s > opInfo[i->op].srcNr)
 398       return false;
 399    if (!(opInfo[i->op].srcFiles[s] & (1 << (int)sf)))
 400       return false;
 401
 402    // indirect loads can only be done by OP_LOAD/VFETCH/INTERP on nvc0
 403    if (ld->src[0].isIndirect(0))
 404       return false;
 405
 406    for (int k = 0; i->srcExists(k); ++k) {
 407       if (i->src[k].getFile() == FILE_IMMEDIATE) {
 408          if (i->getSrc(k)->reg.data.u64 != 0)
 409             return false;
 410       } else
 411       if (i->src[k].getFile() != FILE_GPR &&
 412           i->src[k].getFile() != FILE_PREDICATE) {
 413          return false;
 414       }
 415    }
 416
 417    // not all instructions support full 32 bit immediates
 418    if (sf == FILE_IMMEDIATE) {
 419       Storage &reg = ld->getSrc(0)->asImm()->reg;
 420
 421       if (opInfo[i->op].immdBits != 0xffffffff) {
 422          if (i->sType == TYPE_F32) {
 423             if (reg.data.u32 & 0xfff)
 424                return false;
 425          } else
 426          if (i->sType == TYPE_S32 || i->sType == TYPE_U32) {
 427             // with u32, 0xfffff counts as 0xffffffff as well
 428             if (reg.data.s32 > 0x7ffff || reg.data.s32 < -0x80000)
 429                return false;
 430          }
 431       } else
 432       if (i->op == OP_MAD || i->op == OP_FMA) {
 433          // requires src == dst, cannot decide before RA
 434          // (except if we implement more constraints)
 435          if (ld->getSrc(0)->asImm()->reg.data.u32 & 0xfff)
 436             return false;
 437       }
 438    }
 439
 440    return true;
 441 }
 442
 443 bool
 444 TargetNVC0::isOpSupported(operation op, DataType ty) const
 445 {
 446    if ((op == OP_MAD || op == OP_FMA) && (ty != TYPE_F32))
 447       return false;
 448    if (op == OP_SAD && ty != TYPE_S32)
 449       return false;
 450    if (op == OP_POW || op == OP_SQRT || op == OP_DIV || op == OP_MOD)
 451       return false;
 452    return true;
 453 }
 454
 455 bool
 456 TargetNVC0::isModSupported(const Instruction *insn, int s, Modifier mod) const
 457 {
 458    if (!isFloatType(insn->dType)) {
 459       switch (insn->op) {
 460       case OP_ABS:
 461       case OP_NEG:
 462       case OP_CVT:
 463       case OP_CEIL:
 464       case OP_FLOOR:
 465       case OP_TRUNC:
 466       case OP_AND:
 467       case OP_OR:
 468       case OP_XOR:
 469          break;
 470       case OP_ADD:
 471          if (insn->src[s ? 0 : 1].mod.neg())
 472             return false;
 473          break;
 474       case OP_SUB:
 475          if (s == 0)
 476             return insn->src[1].mod.neg() ? false : true;
 477          break;
 478       default:
 479          return false;
 480       }
 481    }
 482    if (s > 3)
 483       return false;
 484    return (mod & Modifier(opInfo[insn->op].srcMods[s])) == mod;
 485 }
 486
 487 bool
 488 TargetNVC0::mayPredicate(const Instruction *insn, const Value *pred) const
 489 {
 490    if (insn->getPredicate())
 491       return false;
 492    return opInfo[insn->op].predicate;
 493 }
 494
 495 bool
 496 TargetNVC0::isSatSupported(const Instruction *insn) const
 497 {
 498    if (insn->op == OP_CVT)
 499       return true;
 500    if (!(opInfo[insn->op].dstMods & NV50_IR_MOD_SAT))
 501       return false;
 502
 503    if (insn->dType == TYPE_U32)
 504       return (insn->op == OP_ADD) || (insn->op == OP_MAD);
 505
 506    return insn->dType == TYPE_F32;
 507 }
 508
 509 // TODO: better values
 510 int TargetNVC0::getLatency(const Instruction *i) const
 511 {
 512    if (i->op == OP_LOAD) {
 513       if (i->cache == CACHE_CV)
 514          return 700;
 515       return 48;
 516    }
 517    return 24;
 518 }
 519
 520 // These are "inverse" throughput values, i.e. the number of cycles required
 521 // to issue a specific instruction for a full warp (32 threads).
 522 //
 523 // Assuming we have more than 1 warp in flight, a higher issue latency results
 524 // in a lower result latency since the MP will have spent more time with other
 525 // warps.
 526 // This also helps to determine the number of cycles between instructions in
 527 // a single warp.
 528 //
 529 int TargetNVC0::getThroughput(const Instruction *i) const
 530 {
 531    // TODO: better values
 532    if (i->dType == TYPE_F32) {
 533       switch (i->op) {
 534       case OP_ADD:
 535       case OP_MUL:
 536       case OP_MAD:
 537       case OP_FMA:
 538          return 1;
 539       case OP_CVT:
 540       case OP_CEIL:
 541       case OP_FLOOR:
 542       case OP_TRUNC:
 543       case OP_SET:
 544       case OP_SLCT:
 545       case OP_MIN:
 546       case OP_MAX:
 547          return 2;
 548       case OP_RCP:
 549       case OP_RSQ:
 550       case OP_LG2:
 551       case OP_SIN:
 552       case OP_COS:
 553       case OP_PRESIN:
 554       case OP_PREEX2:
 555       default:
 556          return 8;
 557       }
 558    } else
 559    if (i->dType == TYPE_U32 || i->dType == TYPE_S32) {
 560       switch (i->op) {
 561       case OP_ADD:
 562       case OP_AND:
 563       case OP_OR:
 564       case OP_XOR:
 565       case OP_NOT:
 566          return 1;
 567       case OP_MUL:
 568       case OP_MAD:
 569       case OP_CVT:
 570       case OP_SET:
 571       case OP_SLCT:
 572       case OP_SHL:
 573       case OP_SHR:
 574       case OP_NEG:
 575       case OP_ABS:
 576       case OP_MIN:
 577       case OP_MAX:
 578       default:
 579          return 2;
 580       }
 581    } else
 582    if (i->dType == TYPE_F64) {
 583       return 2;
 584    } else {
 585       return 1;
 586    }
 587 }
 588
 589 } // namespace nv50_ir