void emitInterpMode(const Instruction *);
void emitLoadStoreType(DataType ty);
void emitSUGType(DataType);
+ void emitSUAddr(const TexInstruction *);
+ void emitSUDim(const TexInstruction *);
void emitCachingMode(CacheMode c);
void emitShortSrc2(const ValueRef&);
void emitDMUL(const Instruction *);
void emitIMAD(const Instruction *);
void emitISAD(const Instruction *);
+ void emitSHLADD(const Instruction *a);
void emitFMAD(const Instruction *);
void emitDMAD(const Instruction *);
void emitMADSP(const Instruction *);
void emitSET(const CmpInstruction *);
void emitSLCT(const CmpInstruction *);
- void emitSELP(const CmpInstruction *);
+ void emitSELP(const Instruction *);
void emitTEXBAR(const Instruction *);
void emitTEX(const TexInstruction *);
void emitSULDGB(const TexInstruction *);
void emitSUSTGx(const TexInstruction *);
+ void emitSULDB(const TexInstruction *);
+ void emitSUSTx(const TexInstruction *);
+ void emitSULEA(const TexInstruction *);
+
void emitVSHL(const Instruction *);
void emitVectorSubOp(const Instruction *);
void emitPIXLD(const Instruction *);
+ void emitVOTE(const Instruction *);
+
inline void defId(const ValueDef&, const int pos);
inline void defId(const Instruction *, int d, const int pos);
inline void srcId(const ValueRef&, const int pos);
if (i->saturate)
code[0] |= 1 << 5;
+
+ if (i->dnz)
+ code[0] |= 1 << 7;
+ else
if (i->ftz)
code[0] |= 1 << 6;
} else {
uint32_t addOp = 0;
assert(!i->src(0).mod.abs() && !i->src(1).mod.abs());
- assert(!i->src(0).mod.neg() || !i->src(1).mod.neg());
if (i->src(0).mod.neg())
addOp |= 0x200;
if (i->src(1).mod.neg())
addOp |= 0x100;
- if (i->op == OP_SUB) {
+ if (i->op == OP_SUB)
addOp ^= 0x100;
- assert(addOp != 0x300); // would be add-plus-one
- }
+
+ assert(addOp != 0x300); // would be add-plus-one
if (i->encSize == 8) {
if (isLIMM(i->src(1), TYPE_U32)) {
}
}
-// TODO: shl-add
void
CodeEmitterNVC0::emitIMAD(const Instruction *i)
{
+ uint8_t addOp =
+ i->src(2).mod.neg() | ((i->src(0).mod.neg() ^ i->src(1).mod.neg()) << 1);
+
assert(i->encSize == 8);
emitForm_A(i, HEX64(20000000, 00000003));
+ assert(addOp != 3);
+ code[0] |= addOp << 8;
+
if (isSignedType(i->dType))
code[0] |= 1 << 7;
if (isSignedType(i->sType))
if (i->flagsDef >= 0) code[1] |= 1 << 16;
if (i->flagsSrc >= 0) code[1] |= 1 << 23;
- if (i->src(2).mod.neg()) code[0] |= 0x10;
- if (i->src(1).mod.neg() ^
- i->src(0).mod.neg()) code[0] |= 0x20;
-
if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
code[0] |= 1 << 6;
}
+void
+CodeEmitterNVC0::emitSHLADD(const Instruction *i)
+{
+ uint8_t addOp = (i->src(0).mod.neg() << 1) | i->src(2).mod.neg();
+ const ImmediateValue *imm = i->src(1).get()->asImm();
+ assert(imm);
+
+ code[0] = 0x00000003;
+ code[1] = 0x40000000 | addOp << 23;
+
+ emitPredicate(i);
+
+ defId(i->def(0), 14);
+ srcId(i->src(0), 20);
+
+ if (i->flagsDef >= 0)
+ code[1] |= 1 << 16;
+
+ assert(!(imm->reg.data.u32 & 0xffffffe0));
+ code[0] |= imm->reg.data.u32 << 5;
+
+ switch (i->src(2).getFile()) {
+ case FILE_GPR:
+ srcId(i->src(2), 26);
+ break;
+ case FILE_MEMORY_CONST:
+ code[1] |= 0x4000;
+ code[1] |= i->getSrc(2)->reg.fileIndex << 10;
+ setAddress16(i->src(2));
+ break;
+ case FILE_IMMEDIATE:
+ setImmediate(i, 2);
+ break;
+ default:
+ assert(!"bad src2 file");
+ break;
+ }
+}
+
void
CodeEmitterNVC0::emitMADSP(const Instruction *i)
{
// (a OP b) OP c
if (i->predSrc != 2 && i->srcExists(2)) {
code[1] |= subOp << 21;
- srcId(i->src(2), 17);
- if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 20;
+ srcId(i->src(2), 49);
+ if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT)) code[1] |= 1 << 20;
} else {
code[1] |= 0x000e0000;
}
if (i->ftz)
op |= 1 << 5;
else
- if (!isFloatType(i->dType))
+ if (!isFloatType(i->dType)) {
op |= isSignedType(i->dType) ? 0x23 : 0x03;
+ op |= i->subOp << 6;
+ }
if (i->dType == TYPE_F64)
op |= 0x01;
emitForm_A(i, op);
emitNegAbs12(i);
+
+ if (i->flagsDef >= 0)
+ code[1] |= 1 << 16;
}
void
code[0] |= 1 << 5;
}
-void CodeEmitterNVC0::emitSELP(const CmpInstruction *i)
+static void
+selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data)
+{
+ int loc = entry->loc;
+ if (data.force_persample_interp)
+ code[loc + 1] |= 1 << 20;
+ else
+ code[loc + 1] &= ~(1 << 20);
+}
+
+void CodeEmitterNVC0::emitSELP(const Instruction *i)
{
emitForm_A(i, HEX64(20000000, 00000004));
- if (i->setCond == CC_NOT_P || i->src(2).mod & Modifier(NV50_IR_MOD_NOT))
+ if (i->src(2).mod & Modifier(NV50_IR_MOD_NOT))
code[1] |= 1 << 20;
+
+ if (i->subOp == 1) {
+ addInterp(0, 0, selpFlip);
+ }
}
void CodeEmitterNVC0::emitTEXBAR(const Instruction *i)
void
CodeEmitterNVC0::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask)
{
- code[0] = 0x00000000 | (laneMask << 6);
+ code[0] = 0x00000200 | (laneMask << 6); // dall
code[1] = 0x48000000 | qOp;
defId(i->def(0), 14);
srcId(i->src(0), 20);
srcId((i->srcExists(1) && i->predSrc != 1) ? i->src(1) : i->src(0), 26);
- if (i->op == OP_QUADOP && progType != Program::TYPE_FRAGMENT)
- code[0] |= 1 << 9; // dall
-
emitPredicate(i);
}
} else {
ImmediateValue *imm = i->getSrc(1)->asImm();
assert(imm);
+ assert(imm->reg.data.u32 <= 0xfff);
code[0] |= imm->reg.data.u32 << 26;
code[1] |= imm->reg.data.u32 >> 6;
code[1] |= 0x4000;
}
static void
-interpApply(const InterpEntry *entry, uint32_t *code,
- bool force_persample_interp, bool flatshade)
+interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data)
{
int ipa = entry->ipa;
int reg = entry->reg;
int loc = entry->loc;
- if (flatshade &&
+ if (data.flatshade &&
(ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) {
ipa = NV50_IR_INTERP_FLAT;
reg = 0x3f;
- } else if (force_persample_interp &&
+ } else if (data.force_persample_interp &&
(ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
(ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
ipa |= NV50_IR_INTERP_CENTROID;
case FILE_MEMORY_GLOBAL: opc = 0x90000000; break;
case FILE_MEMORY_LOCAL: opc = 0xc8000000; break;
case FILE_MEMORY_SHARED:
- opc = 0xc8000000;
- if (i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED)
- opc |= (1 << 26);
- else
- opc |= (1 << 24);
+ if (i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED) {
+ if (targ->getChipset() >= NVISA_GK104_CHIPSET)
+ opc = 0xb8000000;
+ else
+ opc = 0xcc000000;
+ } else {
+ opc = 0xc9000000;
+ }
break;
default:
assert(!"invalid memory file");
code[0] = 0x00000005;
code[1] = opc;
+ if (targ->getChipset() >= NVISA_GK104_CHIPSET) {
+ // Unlocked store on shared memory can fail.
+ if (i->src(0).getFile() == FILE_MEMORY_SHARED &&
+ i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED) {
+ assert(i->defExists(0));
+ defId(i->def(0), 8);
+ }
+ }
+
setAddressByFile(i->src(0));
srcId(i->src(1), 14);
srcId(i->src(0).getIndirect(0), 20);
case FILE_MEMORY_GLOBAL: opc = 0x80000000; break;
case FILE_MEMORY_LOCAL: opc = 0xc0000000; break;
case FILE_MEMORY_SHARED:
- opc = 0xc0000000;
- if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED)
- opc |= (1 << 26);
- else
- opc |= (1 << 24);
+ if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) {
+ if (targ->getChipset() >= NVISA_GK104_CHIPSET)
+ opc = 0xa8000000;
+ else
+ opc = 0xc4000000;
+ } else {
+ opc = 0xc1000000;
+ }
break;
case FILE_MEMORY_CONST:
if (!i->src(0).isIndirect(0) && typeSizeof(i->dType) == 4) {
}
code[1] = opc;
- defId(i->def(0), 14);
+ int r = 0, p = -1;
+ if (i->src(0).getFile() == FILE_MEMORY_SHARED) {
+ if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) {
+ if (i->def(0).getFile() == FILE_PREDICATE) { // p, #
+ r = -1;
+ p = 0;
+ } else if (i->defExists(1)) { // r, p
+ p = 1;
+ } else {
+ assert(!"Expected predicate dest for load locked");
+ }
+ }
+ }
+
+ if (r >= 0)
+ defId(i->def(r), 14);
+ else
+ code[0] |= 63 << 14;
+
+ if (p >= 0) {
+ if (targ->getChipset() >= NVISA_GK104_CHIPSET)
+ defId(i->def(p), 8);
+ else
+ defId(i->def(p), 32 + 18);
+ }
setAddressByFile(i->src(0));
srcId(i->src(0).getIndirect(0), 20);
else
opc = HEX64(28000000, 00000004);
- opc |= i->lanes << 5;
+ if (i->src(0).getFile() != FILE_PREDICATE)
+ opc |= i->lanes << 5;
emitForm_B(i, opc);
+
+ // Explicitly emit the predicate source as emitForm_B skips it.
+ if (i->src(0).getFile() == FILE_PREDICATE)
+ srcId(i->src(0), 20);
} else {
uint32_t imm;
setSUPred(i, 2);
}
+void
+CodeEmitterNVC0::emitSUAddr(const TexInstruction *i)
+{
+ assert(targ->getChipset() < NVISA_GK104_CHIPSET);
+
+ if (i->tex.rIndirectSrc < 0) {
+ code[1] |= 0x00004000;
+ code[0] |= i->tex.r << 26;
+ } else {
+ srcId(i, i->tex.rIndirectSrc, 26);
+ }
+}
+
+void
+CodeEmitterNVC0::emitSUDim(const TexInstruction *i)
+{
+ assert(targ->getChipset() < NVISA_GK104_CHIPSET);
+
+ code[1] |= (i->tex.target.getDim() - 1) << 12;
+ if (i->tex.target.isArray() || i->tex.target.isCube() ||
+ i->tex.target.getDim() == 3) {
+ // use e2d mode for 3-dim images, arrays and cubes.
+ code[1] |= 3 << 12;
+ }
+
+ srcId(i->src(0), 20);
+}
+
+void
+CodeEmitterNVC0::emitSULEA(const TexInstruction *i)
+{
+ assert(targ->getChipset() < NVISA_GK104_CHIPSET);
+
+ code[0] = 0x5;
+ code[1] = 0xf0000000;
+
+ emitPredicate(i);
+ emitLoadStoreType(i->sType);
+
+ defId(i->def(0), 14);
+
+ if (i->defExists(1)) {
+ defId(i->def(1), 32 + 22);
+ } else {
+ code[1] |= 7 << 22;
+ }
+
+ emitSUAddr(i);
+ emitSUDim(i);
+}
+
+void
+CodeEmitterNVC0::emitSULDB(const TexInstruction *i)
+{
+ assert(targ->getChipset() < NVISA_GK104_CHIPSET);
+
+ code[0] = 0x5;
+ code[1] = 0xd4000000 | (i->subOp << 15);
+
+ emitPredicate(i);
+ emitLoadStoreType(i->dType);
+
+ defId(i->def(0), 14);
+
+ emitCachingMode(i->cache);
+ emitSUAddr(i);
+ emitSUDim(i);
+}
+
+void
+CodeEmitterNVC0::emitSUSTx(const TexInstruction *i)
+{
+ assert(targ->getChipset() < NVISA_GK104_CHIPSET);
+
+ code[0] = 0x5;
+ code[1] = 0xdc000000 | (i->subOp << 15);
+
+ if (i->op == OP_SUSTP)
+ code[1] |= i->tex.mask << 17;
+ else
+ emitLoadStoreType(i->dType);
+
+ emitPredicate(i);
+
+ srcId(i->src(1), 14);
+
+ emitCachingMode(i->cache);
+ emitSUAddr(i);
+ emitSUDim(i);
+}
+
void
CodeEmitterNVC0::emitVectorSubOp(const Instruction *i)
{
code[1] |= 0x00e00000;
}
+void
+CodeEmitterNVC0::emitVOTE(const Instruction *i)
+{
+ assert(i->src(0).getFile() == FILE_PREDICATE);
+
+ code[0] = 0x00000004 | (i->subOp << 5);
+ code[1] = 0x48000000;
+
+ emitPredicate(i);
+
+ unsigned rp = 0;
+ for (int d = 0; i->defExists(d); d++) {
+ if (i->def(d).getFile() == FILE_PREDICATE) {
+ assert(!(rp & 2));
+ rp |= 2;
+ defId(i->def(d), 32 + 22);
+ } else if (i->def(d).getFile() == FILE_GPR) {
+ assert(!(rp & 1));
+ rp |= 1;
+ defId(i->def(d), 14);
+ } else {
+ assert(!"Unhandled def");
+ }
+ }
+ if (!(rp & 1))
+ code[0] |= 63 << 14;
+ if (!(rp & 2))
+ code[1] |= 7 << 22;
+ if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT))
+ code[0] |= 1 << 23;
+ srcId(i->src(0), 20);
+}
+
bool
CodeEmitterNVC0::emitInstruction(Instruction *insn)
{
case OP_SAD:
emitISAD(insn);
break;
+ case OP_SHLADD:
+ emitSHLADD(insn);
+ break;
case OP_NOT:
emitNOT(insn);
break;
emitSET(insn->asCmp());
break;
case OP_SELP:
- emitSELP(insn->asCmp());
+ emitSELP(insn);
break;
case OP_SLCT:
emitSLCT(insn->asCmp());
if (targ->getChipset() >= NVISA_GK104_CHIPSET)
emitSULDGB(insn->asTex());
else
- ERROR("SULDB not yet supported on < nve4\n");
+ emitSULDB(insn->asTex());
break;
case OP_SUSTB:
case OP_SUSTP:
if (targ->getChipset() >= NVISA_GK104_CHIPSET)
emitSUSTGx(insn->asTex());
else
- ERROR("SUSTx not yet supported on < nve4\n");
+ emitSUSTx(insn->asTex());
+ break;
+ case OP_SULEA:
+ emitSULEA(insn->asTex());
break;
case OP_ATOM:
emitATOM(insn);
case OP_PIXLD:
emitPIXLD(insn);
break;
+ case OP_VOTE:
+ emitVOTE(insn);
+ break;
case OP_PHI:
case OP_UNION:
case OP_CONSTRAINT: