src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp

   1 /*
   2  * Copyright 2011 Christoph Bumiller
   3  *           2014 Red Hat Inc.
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining a
   6  * copy of this software and associated documentation files (the "Software"),
   7  * to deal in the Software without restriction, including without limitation
   8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   9  * and/or sell copies of the Software, and to permit persons to whom the
  10  * Software is furnished to do so, subject to the following conditions:
  11  *
  12  * The above copyright notice and this permission notice shall be included in
  13  * all copies or substantial portions of the Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  19  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  20  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  21  * OTHER DEALINGS IN THE SOFTWARE.
  22  */
  23
  24 #include "codegen/nv50_ir_target_gm107.h"
  25 #include "codegen/nv50_ir_lowering_gm107.h"
  26
  27 namespace nv50_ir {
  28
  29 Target *getTargetGM107(unsigned int chipset)
  30 {
  31    return new TargetGM107(chipset);
  32 }
  33
  34 // BULTINS / LIBRARY FUNCTIONS:
  35
  36 // lazyness -> will just hardcode everything for the time being
  37
  38 #include "lib/gm107.asm.h"
  39
  40 void
  41 TargetGM107::getBuiltinCode(const uint32_t **code, uint32_t *size) const
  42 {
  43    *code = (const uint32_t *)&gm107_builtin_code[0];
  44    *size = sizeof(gm107_builtin_code);
  45 }
  46
  47 uint32_t
  48 TargetGM107::getBuiltinOffset(int builtin) const
  49 {
  50    assert(builtin < NVC0_BUILTIN_COUNT);
  51    return gm107_builtin_offsets[builtin];
  52 }
  53
  54 bool
  55 TargetGM107::isOpSupported(operation op, DataType ty) const
  56 {
  57    switch (op) {
  58    case OP_SAD:
  59    case OP_POW:
  60    case OP_DIV:
  61    case OP_MOD:
  62       return false;
  63    case OP_SQRT:
  64       if (ty == TYPE_F64)
  65          return false;
  66       return chipset >= NVISA_GM200_CHIPSET;
  67    default:
  68       break;
  69    }
  70
  71    return true;
  72 }
  73
  74 // Return true when an instruction supports the reuse flag. When supported, the
  75 // hardware will use the operand reuse cache introduced since Maxwell, which
  76 // should try to reduce bank conflicts by caching values for the subsequent
  77 // instructions. Note that the next instructions have to use the same GPR id in
  78 // the same operand slot.
  79 bool
  80 TargetGM107::isReuseSupported(const Instruction *insn) const
  81 {
  82    const OpClass cl = getOpClass(insn->op);
  83
  84    // TODO: double-check!
  85    switch (cl) {
  86    case OPCLASS_ARITH:
  87    case OPCLASS_COMPARE:
  88    case OPCLASS_LOGIC:
  89    case OPCLASS_MOVE:
  90    case OPCLASS_SHIFT:
  91       return true;
  92    case OPCLASS_BITFIELD:
  93       if (insn->op == OP_INSBF || insn->op == OP_EXTBF)
  94          return true;
  95       break;
  96    default:
  97       break;
  98    }
  99    return false;
 100 }
 101
 102 // Return true when an instruction requires to set up a barrier because it
 103 // doesn't operate at a fixed latency. Variable latency instructions are memory
 104 // operations, double precision operations, special function unit operations
 105 // and other low throughput instructions.
 106 bool
 107 TargetGM107::isBarrierRequired(const Instruction *insn) const
 108 {
 109    const OpClass cl = getOpClass(insn->op);
 110
 111    if (insn->dType == TYPE_F64 || insn->sType == TYPE_F64)
 112       return true;
 113
 114    switch (cl) {
 115    case OPCLASS_ATOMIC:
 116    case OPCLASS_LOAD:
 117    case OPCLASS_STORE:
 118    case OPCLASS_SURFACE:
 119    case OPCLASS_TEXTURE:
 120       return true;
 121    case OPCLASS_SFU:
 122       switch (insn->op) {
 123       case OP_COS:
 124       case OP_EX2:
 125       case OP_LG2:
 126       case OP_LINTERP:
 127       case OP_PINTERP:
 128       case OP_RCP:
 129       case OP_RSQ:
 130       case OP_SIN:
 131       case OP_SQRT:
 132          return true;
 133       default:
 134          break;
 135       }
 136       break;
 137    case OPCLASS_BITFIELD:
 138       switch (insn->op) {
 139       case OP_BFIND:
 140       case OP_POPCNT:
 141          return true;
 142       default:
 143          break;
 144       }
 145       break;
 146    case OPCLASS_CONTROL:
 147       switch (insn->op) {
 148       case OP_EMIT:
 149       case OP_RESTART:
 150          return true;
 151       default:
 152          break;
 153       }
 154       break;
 155    case OPCLASS_OTHER:
 156       switch (insn->op) {
 157       case OP_AFETCH:
 158       case OP_PFETCH:
 159       case OP_PIXLD:
 160       case OP_SHFL:
 161          return true;
 162       case OP_RDSV:
 163          return !isCS2RSV(insn->getSrc(0)->reg.data.sv.sv);
 164       default:
 165          break;
 166       }
 167       break;
 168    case OPCLASS_ARITH:
 169       // TODO: IMUL/IMAD require barriers too, use of XMAD instead!
 170       if ((insn->op == OP_MUL || insn->op == OP_MAD) &&
 171           !isFloatType(insn->dType))
 172          return true;
 173       break;
 174    case OPCLASS_CONVERT:
 175       if (insn->def(0).getFile() != FILE_PREDICATE &&
 176           insn->src(0).getFile() != FILE_PREDICATE)
 177          return true;
 178       break;
 179    default:
 180       break;
 181    }
 182    return false;
 183 }
 184
 185 bool
 186 TargetGM107::canDualIssue(const Instruction *a, const Instruction *b) const
 187 {
 188    // TODO
 189    return false;
 190 }
 191
 192 // Return the number of stall counts needed to complete a single instruction.
 193 // On Maxwell GPUs, the pipeline depth is 6, but some instructions require
 194 // different number of stall counts like memory operations.
 195 int
 196 TargetGM107::getLatency(const Instruction *insn) const
 197 {
 198    // TODO: better values! This should be good enough for now though.
 199    switch (insn->op) {
 200    case OP_EMIT:
 201    case OP_EXPORT:
 202    case OP_PIXLD:
 203    case OP_RESTART:
 204    case OP_STORE:
 205    case OP_SUSTB:
 206    case OP_SUSTP:
 207       return 1;
 208    case OP_SHFL:
 209       return 2;
 210    case OP_ADD:
 211    case OP_AND:
 212    case OP_EXTBF:
 213    case OP_FMA:
 214    case OP_INSBF:
 215    case OP_MAD:
 216    case OP_MAX:
 217    case OP_MIN:
 218    case OP_MOV:
 219    case OP_MUL:
 220    case OP_NOT:
 221    case OP_OR:
 222    case OP_PREEX2:
 223    case OP_PRESIN:
 224    case OP_QUADOP:
 225    case OP_SELP:
 226    case OP_SET:
 227    case OP_SET_AND:
 228    case OP_SET_OR:
 229    case OP_SET_XOR:
 230    case OP_SHL:
 231    case OP_SHLADD:
 232    case OP_SHR:
 233    case OP_SLCT:
 234    case OP_SUB:
 235    case OP_VOTE:
 236    case OP_XOR:
 237       if (insn->dType != TYPE_F64)
 238          return 6;
 239       break;
 240    case OP_RDSV:
 241       return isCS2RSV(insn->getSrc(0)->reg.data.sv.sv) ? 6 : 15;
 242    case OP_ABS:
 243    case OP_CEIL:
 244    case OP_CVT:
 245    case OP_FLOOR:
 246    case OP_NEG:
 247    case OP_SAT:
 248    case OP_TRUNC:
 249       if (insn->op == OP_CVT && (insn->def(0).getFile() == FILE_PREDICATE ||
 250                                  insn->src(0).getFile() == FILE_PREDICATE))
 251          return 6;
 252       break;
 253    case OP_BFIND:
 254    case OP_COS:
 255    case OP_EX2:
 256    case OP_LG2:
 257    case OP_POPCNT:
 258    case OP_QUADON:
 259    case OP_QUADPOP:
 260    case OP_RCP:
 261    case OP_RSQ:
 262    case OP_SIN:
 263    case OP_SQRT:
 264       return 13;
 265    default:
 266       break;
 267    }
 268    // Use the maximum number of stall counts for other instructions.
 269    return 15;
 270 }
 271
 272 // Return the operand read latency which is the number of stall counts before
 273 // an instruction can read its sources. For memory operations like ATOM, LOAD
 274 // and STORE, the memory access has to be indirect.
 275 int
 276 TargetGM107::getReadLatency(const Instruction *insn) const
 277 {
 278    switch (insn->op) {
 279    case OP_ABS:
 280    case OP_BFIND:
 281    case OP_CEIL:
 282    case OP_COS:
 283    case OP_EX2:
 284    case OP_FLOOR:
 285    case OP_LG2:
 286    case OP_NEG:
 287    case OP_POPCNT:
 288    case OP_RCP:
 289    case OP_RSQ:
 290    case OP_SAT:
 291    case OP_SIN:
 292    case OP_SQRT:
 293    case OP_SULDB:
 294    case OP_SULDP:
 295    case OP_SUREDB:
 296    case OP_SUREDP:
 297    case OP_SUSTB:
 298    case OP_SUSTP:
 299    case OP_TRUNC:
 300       return 4;
 301    case OP_CVT:
 302       if (insn->def(0).getFile() != FILE_PREDICATE &&
 303           insn->src(0).getFile() != FILE_PREDICATE)
 304          return 4;
 305       break;
 306    case OP_ATOM:
 307    case OP_LOAD:
 308    case OP_STORE:
 309       if (insn->src(0).isIndirect(0)) {
 310          switch (insn->src(0).getFile()) {
 311          case FILE_MEMORY_SHARED:
 312          case FILE_MEMORY_CONST:
 313             return 2;
 314          case FILE_MEMORY_GLOBAL:
 315          case FILE_MEMORY_LOCAL:
 316             return 4;
 317          default:
 318             break;
 319          }
 320       }
 321       break;
 322    case OP_EXPORT:
 323    case OP_PFETCH:
 324    case OP_SHFL:
 325    case OP_VFETCH:
 326       return 2;
 327    default:
 328       break;
 329    }
 330    return 0;
 331 }
 332
 333 bool
 334 TargetGM107::isCS2RSV(SVSemantic sv) const
 335 {
 336    return sv == SV_CLOCK;
 337 }
 338
 339 bool
 340 TargetGM107::runLegalizePass(Program *prog, CGStage stage) const
 341 {
 342    if (stage == CG_STAGE_PRE_SSA) {
 343       GM107LoweringPass pass(prog);
 344       return pass.run(prog, false, true);
 345    } else
 346    if (stage == CG_STAGE_POST_RA) {
 347       NVC0LegalizePostRA pass(prog);
 348       return pass.run(prog, false, true);
 349    } else
 350    if (stage == CG_STAGE_SSA) {
 351       GM107LegalizeSSA pass;
 352       return pass.run(prog, false, true);
 353    }
 354    return false;
 355 }
 356
 357 CodeEmitter *
 358 TargetGM107::getCodeEmitter(Program::Type type)
 359 {
 360    return createCodeEmitterGM107(type);
 361 }
 362
 363 } // namespace nv50_ir