src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp

   1 /*
   2  * Copyright 2011 Christoph Bumiller
   3  *           2014 Red Hat Inc.
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining a
   6  * copy of this software and associated documentation files (the "Software"),
   7  * to deal in the Software without restriction, including without limitation
   8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   9  * and/or sell copies of the Software, and to permit persons to whom the
  10  * Software is furnished to do so, subject to the following conditions:
  11  *
  12  * The above copyright notice and this permission notice shall be included in
  13  * all copies or substantial portions of the Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  19  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  20  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  21  * OTHER DEALINGS IN THE SOFTWARE.
  22  */
  23
  24 #include "codegen/nv50_ir.h"
  25 #include "codegen/nv50_ir_build_util.h"
  26
  27 #include "codegen/nv50_ir_target_nvc0.h"
  28 #include "codegen/nv50_ir_lowering_gm107.h"
  29
  30 #include <limits>
  31
  32 namespace nv50_ir {
  33
  34 #define QOP_ADD  0
  35 #define QOP_SUBR 1
  36 #define QOP_SUB  2
  37 #define QOP_MOV2 3
  38
  39 //             UL UR LL LR
  40 #define QUADOP(q, r, s, t)                      \
  41    ((QOP_##q << 6) | (QOP_##r << 4) |           \
  42     (QOP_##s << 2) | (QOP_##t << 0))
  43
  44 bool
  45 GM107LoweringPass::handleManualTXD(TexInstruction *i)
  46 {
  47    static const uint8_t qOps[4][2] =
  48    {
  49       { QUADOP(MOV2, ADD,  MOV2, ADD),  QUADOP(MOV2, MOV2, ADD,  ADD) }, // l0
  50       { QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(MOV2, MOV2, ADD,  ADD) }, // l1
  51       { QUADOP(MOV2, ADD,  MOV2, ADD),  QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l2
  52       { QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l3
  53    };
  54    Value *def[4][4];
  55    Value *crd[3];
  56    Value *tmp;
  57    Instruction *tex, *add;
  58    Value *zero = bld.loadImm(bld.getSSA(), 0);
  59    int l, c;
  60    const int dim = i->tex.target.getDim();
  61    const int array = i->tex.target.isArray();
  62
  63    i->op = OP_TEX; // no need to clone dPdx/dPdy later
  64
  65    for (c = 0; c < dim; ++c)
  66       crd[c] = bld.getScratch();
  67    tmp = bld.getScratch();
  68
  69    for (l = 0; l < 4; ++l) {
  70       // mov coordinates from lane l to all lanes
  71       bld.mkOp(OP_QUADON, TYPE_NONE, NULL);
  72       for (c = 0; c < dim; ++c) {
  73          bld.mkOp2(OP_SHFL, TYPE_F32, crd[c], i->getSrc(c + array), bld.mkImm(l));
  74          add = bld.mkOp2(OP_QUADOP, TYPE_F32, crd[c], crd[c], zero);
  75          add->subOp = 0x00;
  76          add->lanes = 1; /* abused for .ndv */
  77       }
  78
  79       // add dPdx from lane l to lanes dx
  80       for (c = 0; c < dim; ++c) {
  81          bld.mkOp2(OP_SHFL, TYPE_F32, tmp, i->dPdx[c].get(), bld.mkImm(l));
  82          add = bld.mkOp2(OP_QUADOP, TYPE_F32, crd[c], tmp, crd[c]);
  83          add->subOp = qOps[l][0];
  84          add->lanes = 1; /* abused for .ndv */
  85       }
  86
  87       // add dPdy from lane l to lanes dy
  88       for (c = 0; c < dim; ++c) {
  89          bld.mkOp2(OP_SHFL, TYPE_F32, tmp, i->dPdy[c].get(), bld.mkImm(l));
  90          add = bld.mkOp2(OP_QUADOP, TYPE_F32, crd[c], tmp, crd[c]);
  91          add->subOp = qOps[l][1];
  92          add->lanes = 1; /* abused for .ndv */
  93       }
  94
  95       // texture
  96       bld.insert(tex = cloneForward(func, i));
  97       for (c = 0; c < dim; ++c)
  98          tex->setSrc(c + array, crd[c]);
  99       bld.mkOp(OP_QUADPOP, TYPE_NONE, NULL);
 100
 101       // save results
 102       for (c = 0; i->defExists(c); ++c) {
 103          Instruction *mov;
 104          def[c][l] = bld.getSSA();
 105          mov = bld.mkMov(def[c][l], tex->getDef(c));
 106          mov->fixed = 1;
 107          mov->lanes = 1 << l;
 108       }
 109    }
 110
 111    for (c = 0; i->defExists(c); ++c) {
 112       Instruction *u = bld.mkOp(OP_UNION, TYPE_U32, i->getDef(c));
 113       for (l = 0; l < 4; ++l)
 114          u->setSrc(l, def[c][l]);
 115    }
 116
 117    i->bb->remove(i);
 118    return true;
 119 }
 120
 121 bool
 122 GM107LoweringPass::handleDFDX(Instruction *insn)
 123 {
 124    Instruction *shfl;
 125    int qop = 0, xid = 0;
 126
 127    switch (insn->op) {
 128    case OP_DFDX:
 129       qop = QUADOP(SUB, SUBR, SUB, SUBR);
 130       xid = 1;
 131       break;
 132    case OP_DFDY:
 133       qop = QUADOP(SUB, SUB, SUBR, SUBR);
 134       xid = 2;
 135       break;
 136    default:
 137       assert(!"invalid dfdx opcode");
 138       break;
 139    }
 140
 141    shfl = bld.mkOp2(OP_SHFL, TYPE_F32, bld.getScratch(),
 142                     insn->getSrc(0), bld.mkImm(xid));
 143    shfl->subOp = NV50_IR_SUBOP_SHFL_BFLY;
 144    insn->op = OP_QUADOP;
 145    insn->subOp = qop;
 146    insn->lanes = 0; /* abused for !.ndv */
 147    insn->setSrc(1, insn->getSrc(0));
 148    insn->setSrc(0, shfl->getDef(0));
 149    return true;
 150 }
 151
 152 bool
 153 GM107LoweringPass::handlePFETCH(Instruction *i)
 154 {
 155    Value *tmp0 = bld.getScratch();
 156    Value *tmp1 = bld.getScratch();
 157    Value *tmp2 = bld.getScratch();
 158    bld.mkOp1(OP_RDSV, TYPE_U32, tmp0, bld.mkSysVal(SV_INVOCATION_INFO, 0));
 159    bld.mkOp2(OP_SHR , TYPE_U32, tmp1, tmp0, bld.mkImm(16));
 160    bld.mkOp2(OP_AND , TYPE_U32, tmp0, tmp0, bld.mkImm(0xff));
 161    bld.mkOp2(OP_AND , TYPE_U32, tmp1, tmp1, bld.mkImm(0xff));
 162    if (i->getSrc(1))
 163       bld.mkOp2(OP_ADD , TYPE_U32, tmp2, i->getSrc(0), i->getSrc(1));
 164    else
 165       bld.mkOp1(OP_MOV , TYPE_U32, tmp2, i->getSrc(0));
 166    bld.mkOp3(OP_MAD , TYPE_U32, tmp0, tmp0, tmp1, tmp2);
 167    i->setSrc(0, tmp0);
 168    i->setSrc(1, NULL);
 169    return true;
 170 }
 171
 172 bool
 173 GM107LoweringPass::handlePOPCNT(Instruction *i)
 174 {
 175    Value *tmp = bld.mkOp2v(OP_AND, i->sType, bld.getScratch(),
 176                            i->getSrc(0), i->getSrc(1));
 177    i->setSrc(0, tmp);
 178    i->setSrc(1, NULL);
 179    return true;
 180 }
 181
 182 //
 183 // - add quadop dance for texturing
 184 // - put FP outputs in GPRs
 185 // - convert instruction sequences
 186 //
 187 bool
 188 GM107LoweringPass::visit(Instruction *i)
 189 {
 190    bld.setPosition(i, false);
 191
 192    if (i->cc != CC_ALWAYS)
 193       checkPredicate(i);
 194
 195    switch (i->op) {
 196    case OP_PFETCH:
 197       return handlePFETCH(i);
 198    case OP_DFDX:
 199    case OP_DFDY:
 200       return handleDFDX(i);
 201    case OP_POPCNT:
 202       return handlePOPCNT(i);
 203    default:
 204       return NVC0LoweringPass::visit(i);
 205    }
 206 }
 207
 208 } // namespace nv50_ir