src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp

   1 /*
   2  * Copyright 2011 Christoph Bumiller
   3  *           2014 Red Hat Inc.
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining a
   6  * copy of this software and associated documentation files (the "Software"),
   7  * to deal in the Software without restriction, including without limitation
   8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   9  * and/or sell copies of the Software, and to permit persons to whom the
  10  * Software is furnished to do so, subject to the following conditions:
  11  *
  12  * The above copyright notice and this permission notice shall be included in
  13  * all copies or substantial portions of the Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  19  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  20  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  21  * OTHER DEALINGS IN THE SOFTWARE.
  22  */
  23
  24 #include "codegen/nv50_ir.h"
  25 #include "codegen/nv50_ir_build_util.h"
  26
  27 #include "codegen/nv50_ir_target_nvc0.h"
  28 #include "codegen/nv50_ir_lowering_gm107.h"
  29
  30 #include <limits>
  31
  32 namespace nv50_ir {
  33
  34 #define QOP_ADD  0
  35 #define QOP_SUBR 1
  36 #define QOP_SUB  2
  37 #define QOP_MOV2 3
  38
  39 //             UL UR LL LR
  40 #define QUADOP(q, r, s, t)                      \
  41    ((QOP_##q << 6) | (QOP_##r << 4) |           \
  42     (QOP_##s << 2) | (QOP_##t << 0))
  43
  44 bool
  45 GM107LoweringPass::handleManualTXD(TexInstruction *i)
  46 {
  47    static const uint8_t qOps[4][2] =
  48    {
  49       { QUADOP(MOV2, ADD,  MOV2, ADD),  QUADOP(MOV2, MOV2, ADD,  ADD) }, // l0
  50       { QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(MOV2, MOV2, ADD,  ADD) }, // l1
  51       { QUADOP(MOV2, ADD,  MOV2, ADD),  QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l2
  52       { QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l3
  53    };
  54    Value *def[4][4];
  55    Value *crd[3];
  56    Value *tmp;
  57    Instruction *tex, *add;
  58    Value *zero = bld.loadImm(bld.getSSA(), 0);
  59    int l, c;
  60    const int dim = i->tex.target.getDim();
  61
  62    i->op = OP_TEX; // no need to clone dPdx/dPdy later
  63
  64    for (c = 0; c < dim; ++c)
  65       crd[c] = bld.getScratch();
  66    tmp = bld.getScratch();
  67
  68    for (l = 0; l < 4; ++l) {
  69       // mov coordinates from lane l to all lanes
  70       bld.mkOp(OP_QUADON, TYPE_NONE, NULL);
  71       for (c = 0; c < dim; ++c) {
  72          bld.mkOp2(OP_SHFL, TYPE_F32, crd[c], i->getSrc(c), bld.mkImm(l));
  73          add = bld.mkOp2(OP_QUADOP, TYPE_F32, crd[c], crd[c], zero);
  74          add->subOp = 0x00;
  75          add->lanes = 1; /* abused for .ndv */
  76       }
  77
  78       // add dPdx from lane l to lanes dx
  79       for (c = 0; c < dim; ++c) {
  80          bld.mkOp2(OP_SHFL, TYPE_F32, tmp, i->dPdx[c].get(), bld.mkImm(l));
  81          add = bld.mkOp2(OP_QUADOP, TYPE_F32, crd[c], tmp, crd[c]);
  82          add->subOp = qOps[l][0];
  83          add->lanes = 1; /* abused for .ndv */
  84       }
  85
  86       // add dPdy from lane l to lanes dy
  87       for (c = 0; c < dim; ++c) {
  88          bld.mkOp2(OP_SHFL, TYPE_F32, tmp, i->dPdy[c].get(), bld.mkImm(l));
  89          add = bld.mkOp2(OP_QUADOP, TYPE_F32, crd[c], tmp, crd[c]);
  90          add->subOp = qOps[l][1];
  91          add->lanes = 1; /* abused for .ndv */
  92       }
  93
  94       // texture
  95       bld.insert(tex = cloneForward(func, i));
  96       for (c = 0; c < dim; ++c)
  97          tex->setSrc(c, crd[c]);
  98       bld.mkOp(OP_QUADPOP, TYPE_NONE, NULL);
  99
 100       // save results
 101       for (c = 0; i->defExists(c); ++c) {
 102          Instruction *mov;
 103          def[c][l] = bld.getSSA();
 104          mov = bld.mkMov(def[c][l], tex->getDef(c));
 105          mov->fixed = 1;
 106          mov->lanes = 1 << l;
 107       }
 108    }
 109
 110    for (c = 0; i->defExists(c); ++c) {
 111       Instruction *u = bld.mkOp(OP_UNION, TYPE_U32, i->getDef(c));
 112       for (l = 0; l < 4; ++l)
 113          u->setSrc(l, def[c][l]);
 114    }
 115
 116    i->bb->remove(i);
 117    return true;
 118 }
 119
 120 bool
 121 GM107LoweringPass::handleDFDX(Instruction *insn)
 122 {
 123    Instruction *shfl;
 124    int qop = 0, xid = 0;
 125
 126    switch (insn->op) {
 127    case OP_DFDX:
 128       qop = QUADOP(SUB, SUBR, SUB, SUBR);
 129       xid = 1;
 130       break;
 131    case OP_DFDY:
 132       qop = QUADOP(SUB, SUB, SUBR, SUBR);
 133       xid = 2;
 134       break;
 135    default:
 136       assert(!"invalid dfdx opcode");
 137       break;
 138    }
 139
 140    shfl = bld.mkOp2(OP_SHFL, TYPE_F32, bld.getScratch(),
 141                     insn->getSrc(0), bld.mkImm(xid));
 142    shfl->subOp = NV50_IR_SUBOP_SHFL_BFLY;
 143    insn->op = OP_QUADOP;
 144    insn->subOp = qop;
 145    insn->lanes = 0; /* abused for !.ndv */
 146    insn->setSrc(1, insn->getSrc(0));
 147    insn->setSrc(0, shfl->getDef(0));
 148    return true;
 149 }
 150
 151 bool
 152 GM107LoweringPass::handlePFETCH(Instruction *i)
 153 {
 154    Value *tmp0 = bld.getScratch();
 155    Value *tmp1 = bld.getScratch();
 156    Value *tmp2 = bld.getScratch();
 157    bld.mkOp1(OP_RDSV, TYPE_U32, tmp0, bld.mkSysVal(SV_INVOCATION_INFO, 0));
 158    bld.mkOp2(OP_SHR , TYPE_U32, tmp1, tmp0, bld.mkImm(16));
 159    bld.mkOp2(OP_AND , TYPE_U32, tmp0, tmp0, bld.mkImm(0xff));
 160    bld.mkOp2(OP_AND , TYPE_U32, tmp1, tmp1, bld.mkImm(0xff));
 161    bld.mkOp1(OP_MOV , TYPE_U32, tmp2, bld.mkImm(i->getSrc(0)->reg.data.u32));
 162    bld.mkOp3(OP_MAD , TYPE_U32, tmp0, tmp0, tmp1, tmp2);
 163    i->setSrc(0, tmp0);
 164    i->setSrc(1, NULL);
 165    return true;
 166 }
 167
 168 bool
 169 GM107LoweringPass::handlePOPCNT(Instruction *i)
 170 {
 171    Value *tmp = bld.mkOp2v(OP_AND, i->sType, bld.getScratch(),
 172                            i->getSrc(0), i->getSrc(1));
 173    i->setSrc(0, tmp);
 174    i->setSrc(1, NULL);
 175    return TRUE;
 176 }
 177
 178 //
 179 // - add quadop dance for texturing
 180 // - put FP outputs in GPRs
 181 // - convert instruction sequences
 182 //
 183 bool
 184 GM107LoweringPass::visit(Instruction *i)
 185 {
 186    bld.setPosition(i, false);
 187
 188    if (i->cc != CC_ALWAYS)
 189       checkPredicate(i);
 190
 191    switch (i->op) {
 192    case OP_TEX:
 193    case OP_TXB:
 194    case OP_TXL:
 195    case OP_TXF:
 196    case OP_TXG:
 197       return handleTEX(i->asTex());
 198    case OP_TXD:
 199       return handleTXD(i->asTex());
 200    case OP_TXLQ:
 201       return handleTXLQ(i->asTex());
 202    case OP_TXQ:
 203       return handleTXQ(i->asTex());
 204    case OP_EX2:
 205       bld.mkOp1(OP_PREEX2, TYPE_F32, i->getDef(0), i->getSrc(0));
 206       i->setSrc(0, i->getDef(0));
 207       break;
 208    case OP_POW:
 209       return handlePOW(i);
 210    case OP_DIV:
 211       return handleDIV(i);
 212    case OP_MOD:
 213       return handleMOD(i);
 214    case OP_SQRT:
 215       return handleSQRT(i);
 216    case OP_EXPORT:
 217       return handleEXPORT(i);
 218    case OP_PFETCH:
 219       return handlePFETCH(i);
 220    case OP_EMIT:
 221    case OP_RESTART:
 222       return handleOUT(i);
 223    case OP_RDSV:
 224       return handleRDSV(i);
 225    case OP_WRSV:
 226       return handleWRSV(i);
 227    case OP_LOAD:
 228       if (i->src(0).getFile() == FILE_SHADER_INPUT) {
 229          if (prog->getType() == Program::TYPE_COMPUTE) {
 230             i->getSrc(0)->reg.file = FILE_MEMORY_CONST;
 231             i->getSrc(0)->reg.fileIndex = 0;
 232          } else
 233          if (prog->getType() == Program::TYPE_GEOMETRY &&
 234              i->src(0).isIndirect(0)) {
 235             // XXX: this assumes vec4 units
 236             Value *ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(),
 237                                     i->getIndirect(0, 0), bld.mkImm(4));
 238             i->setIndirect(0, 0, ptr);
 239          } else {
 240             i->op = OP_VFETCH;
 241             assert(prog->getType() != Program::TYPE_FRAGMENT); // INTERP
 242          }
 243       }
 244       break;
 245    case OP_ATOM:
 246    {
 247       const bool cctl = i->src(0).getFile() == FILE_MEMORY_GLOBAL;
 248       handleATOM(i);
 249       handleCasExch(i, cctl);
 250    }
 251       break;
 252    case OP_SULDB:
 253    case OP_SULDP:
 254    case OP_SUSTB:
 255    case OP_SUSTP:
 256    case OP_SUREDB:
 257    case OP_SUREDP:
 258       handleSurfaceOpNVE4(i->asTex());
 259       break;
 260    case OP_DFDX:
 261    case OP_DFDY:
 262       handleDFDX(i);
 263       break;
 264    case OP_POPCNT:
 265       handlePOPCNT(i);
 266       break;
 267    default:
 268       break;
 269    }
 270    return true;
 271 }
 272
 273 } // namespace nv50_ir