void emitLOAD(const Instruction *);
void emitSTORE(const Instruction *);
void emitMOV(const Instruction *);
+ void emitATOM(const Instruction *);
+ void emitCCTL(const Instruction *);
void emitINTERP(const Instruction *);
+ void emitAFETCH(const Instruction *);
void emitPFETCH(const Instruction *);
void emitVFETCH(const Instruction *);
void emitEXPORT(const Instruction *);
void emitUADD(const Instruction *);
void emitFADD(const Instruction *);
+ void emitDADD(const Instruction *);
void emitIMUL(const Instruction *);
void emitFMUL(const Instruction *);
+ void emitDMUL(const Instruction *);
void emitIMAD(const Instruction *);
void emitISAD(const Instruction *);
void emitFMAD(const Instruction *);
+ void emitDMAD(const Instruction *);
void emitNOT(const Instruction *);
void emitLogicOp(const Instruction *, uint8_t subOp);
void emitPOPC(const Instruction *);
void emitINSBF(const Instruction *);
+ void emitEXTBF(const Instruction *);
+ void emitBFIND(const Instruction *);
void emitShift(const Instruction *);
void emitSFnOp(const Instruction *, uint8_t subOp);
void emitQUADOP(const Instruction *, uint8_t qOp, uint8_t laneMask);
+ void emitPIXLD(const Instruction *);
+
+ void emitBAR(const Instruction *);
+ void emitMEMBAR(const Instruction *);
+
void emitFlow(const Instruction *);
+ void emitVOTE(const Instruction *);
+
inline void defId(const ValueDef&, const int pos);
inline void srcId(const ValueRef&, const int pos);
inline void srcId(const ValueRef *, const int pos);
code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
#define FTZ_(b) if (i->ftz) code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
+#define DNZ_(b) if (i->dnz) code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
#define SAT_(b) if (i->saturate) code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
void
CodeEmitterGK110::setCAddress14(const ValueRef& src)
{
- const int32_t addr = src.get()->asSym()->reg.data.offset / 4;
+ const Storage& res = src.get()->asSym()->reg;
+ const int32_t addr = res.data.offset / 4;
code[0] |= (addr & 0x01ff) << 23;
code[1] |= (addr & 0x3e00) >> 9;
+ code[1] |= res.fileIndex << 5;
}
void
case FILE_MEMORY_CONST:
code[1] &= (s == 2) ? ~(0x4 << 28) : ~(0x8 << 28);
setCAddress14(i->src(s));
- code[1] |= i->getSrc(s)->reg.fileIndex << 5;
break;
case FILE_IMMEDIATE:
setShortImmediate(i, s);
srcId(i->src(s), s ? ((s == 2) ? 42 : s1) : 10);
break;
default:
+ if (i->op == OP_SELP) {
+ assert(s == 2 && i->src(s).getFile() == FILE_PREDICATE);
+ srcId(i->src(s), 42);
+ }
// ignore here, can be predicate or flags, but must not be address
break;
}
SAT_(35);
RND_(36, F);
FTZ_(38);
+ DNZ_(39);
+
+ bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg();
+
+ if (code[0] & 0x1) {
+ if (neg1)
+ code[1] ^= 1 << 27;
+ } else
+ if (neg1) {
+ code[1] |= 1 << 19;
+ }
+}
+
+void
+CodeEmitterGK110::emitDMAD(const Instruction *i)
+{
+ assert(!i->saturate);
+ assert(!i->ftz);
+
+ emitForm_21(i, 0x1b8, 0xb38);
+
+ NEG_(34, 2);
+ RND_(36, F);
bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg();
emitForm_L(i, 0x200, 0x2, Modifier(0));
FTZ_(38);
+ DNZ_(39);
SAT_(3a);
if (neg)
code[1] ^= 1 << 22;
assert(i->postFactor == 0);
} else {
emitForm_21(i, 0x234, 0xc34);
+ code[1] |= ((i->postFactor > 0) ?
+ (7 - i->postFactor) : (0 - i->postFactor)) << 12;
RND_(2a, F);
FTZ_(2f);
+ DNZ_(30);
SAT_(35);
if (code[0] & 0x1) {
}
}
+void
+CodeEmitterGK110::emitDMUL(const Instruction *i)
+{
+ bool neg = (i->src(0).mod ^ i->src(1).mod).neg();
+
+ assert(!i->postFactor);
+ assert(!i->saturate);
+ assert(!i->ftz);
+ assert(!i->dnz);
+
+ emitForm_21(i, 0x240, 0xc40);
+
+ RND_(2a, F);
+
+ if (code[0] & 0x1) {
+ if (neg)
+ code[1] ^= 1 << 27;
+ } else
+ if (neg) {
+ code[1] |= 1 << 19;
+ }
+}
+
void
CodeEmitterGK110::emitIMUL(const Instruction *i)
{
assert(!i->src(0).mod.neg() && !i->src(1).mod.neg());
assert(!i->src(0).mod.abs() && !i->src(1).mod.abs());
- if (isLIMM(i->src(1), TYPE_S32)) {
+ if (i->src(1).getFile() == FILE_IMMEDIATE) {
emitForm_L(i, 0x280, 2, Modifier(0));
- assert(i->subOp != NV50_IR_SUBOP_MUL_HIGH);
-
+ if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
+ code[1] |= 1 << 24;
if (i->sType == TYPE_S32)
code[1] |= 3 << 25;
} else {
assert(i->rnd == ROUND_N);
assert(!i->saturate);
- emitForm_L(i, 0x400, 0, i->src(1).mod);
+ Modifier mod = i->src(1).mod ^
+ Modifier(i->op == OP_SUB ? NV50_IR_MOD_NEG : 0);
+
+ emitForm_L(i, 0x400, 0, mod);
FTZ_(3a);
NEG_(3b, 0);
RND_(2a, F);
ABS_(31, 0);
NEG_(33, 0);
+ SAT_(35);
if (code[0] & 0x1) {
modNegAbsF32_3b(i, 1);
+ if (i->op == OP_SUB) code[1] ^= 1 << 27;
} else {
ABS_(34, 1);
NEG_(30, 1);
+ if (i->op == OP_SUB) code[1] ^= 1 << 16;
}
}
}
+void
+CodeEmitterGK110::emitDADD(const Instruction *i)
+{
+ assert(!i->saturate);
+ assert(!i->ftz);
+
+ emitForm_21(i, 0x238, 0xc38);
+ RND_(2a, F);
+ ABS_(31, 0);
+ NEG_(33, 0);
+ if (code[0] & 0x1) {
+ modNegAbsF32_3b(i, 1);
+ if (i->op == OP_SUB) code[1] ^= 1 << 27;
+ } else {
+ NEG_(30, 1);
+ ABS_(34, 1);
+ if (i->op == OP_SUB) code[1] ^= 1 << 16;
+ }
+}
+
void
CodeEmitterGK110::emitUADD(const Instruction *i)
{
if (i->sType == TYPE_S32)
code[1] |= (1 << 19) | (1 << 24);
- if (code[0] & 0x1) {
- assert(!i->subOp);
- SAT_(39);
- } else {
- if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
- code[1] |= 1 << 25;
- SAT_(35);
- }
+ if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
+ code[1] |= 1 << 25;
+
+ if (i->flagsDef >= 0) code[1] |= 1 << 18;
+ if (i->flagsSrc >= 0) code[1] |= 1 << 20;
+
+ SAT_(35);
}
void
{
assert(i->dType == TYPE_S32 || i->dType == TYPE_U32);
- emitForm_21(i, 0x1fc, 0xb74);
+ emitForm_21(i, 0x1f4, 0xb74);
if (i->dType == TYPE_S32)
code[1] |= 1 << 19;
void
CodeEmitterGK110::emitLogicOp(const Instruction *i, uint8_t subOp)
{
- assert(!(i->src(0).mod & Modifier(NV50_IR_MOD_NOT))); // XXX: find me
+ if (i->def(0).getFile() == FILE_PREDICATE) {
+ code[0] = 0x00000002 | (subOp << 27);
+ code[1] = 0x84800000;
+ emitPredicate(i);
+
+ defId(i->def(0), 5);
+ srcId(i->src(0), 14);
+ if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 17;
+ srcId(i->src(1), 32);
+ if (i->src(1).mod == Modifier(NV50_IR_MOD_NOT)) code[1] |= 1 << 3;
+
+ if (i->defExists(1)) {
+ defId(i->def(1), 2);
+ } else {
+ code[0] |= 7 << 2;
+ }
+ // (a OP b) OP c
+ if (i->predSrc != 2 && i->srcExists(2)) {
+ code[1] |= subOp << 16;
+ srcId(i->src(2), 42);
+ if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT)) code[1] |= 1 << 13;
+ } else {
+ code[1] |= 7 << 10;
+ }
+ } else
if (isLIMM(i->src(1), TYPE_S32)) {
emitForm_L(i, 0x200, 0, i->src(1).mod);
code[1] |= subOp << 24;
+ NOT_(3a, 0);
} else {
emitForm_21(i, 0x220, 0xc20);
code[1] |= subOp << 12;
+ NOT_(2a, 0);
NOT_(2b, 1);
}
- assert(!(code[0] & 0x1) || !(i->src(1).mod & Modifier(NV50_IR_MOD_NOT)));
}
void
}
void
-CodeEmitterGK110::emitShift(const Instruction *i)
+CodeEmitterGK110::emitEXTBF(const Instruction *i)
{
- const bool sar = i->op == OP_SHR && isSignedType(i->sType);
+ emitForm_21(i, 0x600, 0xc00);
- if (sar) {
- emitForm_21(i, 0x214, 0x014);
- code[1] |= 1 << 19;
- } else
+ if (i->dType == TYPE_S32)
+ code[1] |= 0x80000;
+ if (i->subOp == NV50_IR_SUBOP_EXTBF_REV)
+ code[1] |= 0x800;
+}
+
+void
+CodeEmitterGK110::emitBFIND(const Instruction *i)
+{
+ emitForm_C(i, 0x218, 0x2);
+
+ if (i->dType == TYPE_S32)
+ code[1] |= 0x80000;
+ if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT))
+ code[1] |= 0x800;
+ if (i->subOp == NV50_IR_SUBOP_BFIND_SAMT)
+ code[1] |= 0x1000;
+}
+
+void
+CodeEmitterGK110::emitShift(const Instruction *i)
+{
if (i->op == OP_SHR) {
- // this is actually RSHF
- emitForm_21(i, 0x27c, 0x87c);
- code[1] |= GK110_GPR_ZERO << 10;
+ emitForm_21(i, 0x214, 0xc14);
+ if (isSignedType(i->dType))
+ code[1] |= 1 << 19;
} else {
- // this is actually LSHF
- emitForm_21(i, 0x1fc, 0xb7c);
- code[1] |= GK110_GPR_ZERO << 10;
+ emitForm_21(i, 0x224, 0xc24);
}
- if (i->subOp == NV50_IR_SUBOP_SHIFT_WRAP) {
- if (!sar)
- code[1] |= 1 << 21;
- // XXX: find wrap modifier for SHR S32
- }
+ if (i->subOp == NV50_IR_SUBOP_SHIFT_WRAP)
+ code[1] |= 1 << 10;
}
void
CodeEmitterGK110::emitPreOp(const Instruction *i)
{
- emitForm_21(i, 0x248, -1);
+ emitForm_C(i, 0x248, 0x2);
if (i->op == OP_PREEX2)
code[1] |= 1 << 10;
code[0] |= typeSizeofLog2(dType) << 10;
code[0] |= typeSizeofLog2(i->sType) << 12;
+ code[1] |= i->subOp << 12;
if (isSignedIntType(dType))
code[0] |= 0x4000;
code[0] = (code[0] & ~0xfc) | ((code[0] << 3) & 0xe0);
if (i->defExists(1))
defId(i->def(1), 2);
- else
- code[0] |= 0x1c;
+ else
+ code[0] |= 0x1c;
} else {
switch (i->sType) {
- case TYPE_F32: op2 = 0x000; op1 = 0x820; break;
+ case TYPE_F32: op2 = 0x000; op1 = 0x800; break;
case TYPE_F64: op2 = 0x080; op1 = 0x900; break;
default:
op2 = 0x1a8;
modNegAbsF32_3b(i, 1);
}
FTZ_(3a);
+
+ if (i->dType == TYPE_F32) {
+ if (isFloatType(i->sType))
+ code[1] |= 1 << 23;
+ else
+ code[1] |= 1 << 15;
+ }
}
if (i->sType == TYPE_S32)
code[1] |= 1 << 19;
FTZ_(32);
emitCondCode(cc, 0x33, 0xf);
} else {
- emitForm_21(i, 0x1a4, 0xb20);
+ emitForm_21(i, 0x1a0, 0xb20);
emitCondCode(cc, 0x34, 0x7);
}
}
{
emitForm_21(i, 0x250, 0x050);
- if ((i->cc == CC_NOT_P) ^ (bool)(i->src(2).mod & Modifier(NV50_IR_MOD_NOT)))
+ if (i->src(2).mod & Modifier(NV50_IR_MOD_NOT))
code[1] |= 1 << 13;
}
void CodeEmitterGK110::emitTEXBAR(const Instruction *i)
{
- code[0] = 0x00000002 | (i->subOp << 23);
+ code[0] = 0x0000003e | (i->subOp << 23);
code[1] = 0x77000000;
emitPredicate(i);
case OP_TXD:
code[1] = 0x7e000000;
break;
+ case OP_TXLQ:
+ code[1] = 0x7e800000;
+ break;
case OP_TXF:
code[1] = 0x78000000;
break;
+ case OP_TXG:
+ code[1] = 0x7dc00000;
+ break;
default:
code[1] = 0x7d800000;
break;
code[1] = 0x76000000;
code[1] |= i->tex.r << 9;
break;
+ case OP_TXLQ:
+ code[0] = 0x00000002;
+ code[1] = 0x76800000;
+ code[1] |= i->tex.r << 9;
+ break;
case OP_TXF:
code[0] = 0x00000002;
code[1] = 0x70000000;
code[1] |= i->tex.r << 13;
break;
+ case OP_TXG:
+ code[0] = 0x00000001;
+ code[1] = 0x70000000;
+ code[1] |= i->tex.r << 15;
+ break;
default:
code[0] = 0x00000001;
code[1] = 0x60000000;
case OP_TXB: code[1] |= 0x2000; break;
case OP_TXL: code[1] |= 0x3000; break;
case OP_TXF: break;
- case OP_TXG: break; // XXX
+ case OP_TXG: break;
case OP_TXD: break;
+ case OP_TXLQ: break;
default:
assert(!"invalid texture op");
break;
srcId(i->src(0), 10);
srcId(i, src1, 23);
- // if (i->op == OP_TXG) code[0] |= i->tex.gatherComp << 5;
+ if (i->op == OP_TXG) code[1] |= i->tex.gatherComp << 13;
// texture target:
code[1] |= (i->tex.target.isCube() ? 3 : (i->tex.target.getDim() - 1)) << 7;
// ?
}
- if (i->tex.useOffsets) {
+ if (i->tex.useOffsets == 1) {
switch (i->op) {
case OP_TXF: code[1] |= 0x200; break;
+ case OP_TXD: code[1] |= 0x00400000; break;
default: code[1] |= 0x800; break;
}
}
+ if (i->tex.useOffsets == 4)
+ code[1] |= 0x1000;
}
void
void
CodeEmitterGK110::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask)
{
- emitNOP(i); // TODO
+ code[0] = 0x00000002 | ((qOp & 1) << 31);
+ code[1] = 0x7fc00000 | (qOp >> 1) | (laneMask << 12);
+
+ defId(i->def(0), 2);
+ srcId(i->src(0), 10);
+ srcId((i->srcExists(1) && i->predSrc != 1) ? i->src(1) : i->src(0), 23);
+
+ if (i->op == OP_QUADOP && progType != Program::TYPE_FRAGMENT)
+ code[1] |= 1 << 9; // dall
+
+ emitPredicate(i);
+}
+
+void
+CodeEmitterGK110::emitPIXLD(const Instruction *i)
+{
+ emitForm_L(i, 0x7f4, 2, Modifier(0));
+ code[1] |= i->subOp << 2;
+ code[1] |= 0x00070000;
+}
+
+void
+CodeEmitterGK110::emitBAR(const Instruction *i)
+{
+ code[0] = 0x00000002;
+ code[1] = 0x85400000;
+
+ switch (i->subOp) {
+ case NV50_IR_SUBOP_BAR_ARRIVE: code[1] |= 0x08; break;
+ case NV50_IR_SUBOP_BAR_RED_AND: code[1] |= 0x50; break;
+ case NV50_IR_SUBOP_BAR_RED_OR: code[1] |= 0x90; break;
+ case NV50_IR_SUBOP_BAR_RED_POPC: code[1] |= 0x10; break;
+ default:
+ assert(i->subOp == NV50_IR_SUBOP_BAR_SYNC);
+ break;
+ }
+
+ emitPredicate(i);
+
+ // barrier id
+ if (i->src(0).getFile() == FILE_GPR) {
+ srcId(i->src(0), 10);
+ } else {
+ ImmediateValue *imm = i->getSrc(0)->asImm();
+ assert(imm);
+ code[0] |= imm->reg.data.u32 << 10;
+ code[1] |= 0x8000;
+ }
+
+ // thread count
+ if (i->src(1).getFile() == FILE_GPR) {
+ srcId(i->src(1), 23);
+ } else {
+ ImmediateValue *imm = i->getSrc(0)->asImm();
+ assert(imm);
+ assert(imm->reg.data.u32 <= 0xfff);
+ code[0] |= imm->reg.data.u32 << 23;
+ code[1] |= imm->reg.data.u32 >> 9;
+ code[1] |= 0x4000;
+ }
+
+ if (i->srcExists(2) && (i->predSrc != 2)) {
+ srcId(i->src(2), 32 + 10);
+ if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT))
+ code[1] |= 1 << 13;
+ } else {
+ code[1] |= 7 << 10;
+ }
+}
+
+void CodeEmitterGK110::emitMEMBAR(const Instruction *i)
+{
+ code[0] = 0x00000002 | NV50_IR_SUBOP_MEMBAR_SCOPE(i->subOp) << 8;
+ code[1] = 0x7cc00000;
+
+ emitPredicate(i);
}
void
case OP_PRECONT: code[1] = 0x15800000; mask = 2; break;
case OP_PRERET: code[1] = 0x13800000; mask = 2; break;
- case OP_QUADON: code[1] = 0x1b000000; mask = 0; break;
+ case OP_QUADON: code[1] = 0x1b800000; mask = 0; break;
case OP_QUADPOP: code[1] = 0x1c000000; mask = 0; break;
case OP_BRKPT: code[1] = 0x00000000; mask = 0; break;
default:
} else
if (mask & 2) {
int32_t pcRel = f->target.bb->binPos - (codeSize + 8);
+ if (writeIssueDelays && !(f->target.bb->binPos & 0x3f))
+ pcRel += 8;
// currently we don't want absolute branches
assert(!f->absolute);
code[0] |= (pcRel & 0x1ff) << 23;
}
}
+void
+CodeEmitterGK110::emitVOTE(const Instruction *i)
+{
+ assert(i->src(0).getFile() == FILE_PREDICATE &&
+ i->def(1).getFile() == FILE_PREDICATE);
+
+ code[0] = 0x00000002;
+ code[1] = 0x86c00000 | (i->subOp << 19);
+
+ emitPredicate(i);
+
+ defId(i->def(0), 2);
+ defId(i->def(1), 48);
+ if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT))
+ code[1] |= 1 << 13;
+ srcId(i->src(0), 42);
+}
+
+void
+CodeEmitterGK110::emitAFETCH(const Instruction *i)
+{
+ uint32_t offset = i->src(0).get()->reg.data.offset & 0x7ff;
+
+ code[0] = 0x00000002 | (offset << 23);
+ code[1] = 0x7d000000 | (offset >> 9);
+
+ if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT)
+ code[1] |= 0x8;
+
+ emitPredicate(i);
+
+ defId(i->def(0), 2);
+ srcId(i->src(0).getIndirect(0), 10);
+}
+
void
CodeEmitterGK110::emitPFETCH(const Instruction *i)
{
emitPredicate(i);
+ const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
+
defId(i->def(0), 2);
- srcId(i->src(1), 10);
+ srcId(i, src1, 10);
}
void
CodeEmitterGK110::emitVFETCH(const Instruction *i)
{
+ unsigned int size = typeSizeof(i->dType);
uint32_t offset = i->src(0).get()->reg.data.offset;
code[0] = 0x00000002 | (offset << 23);
code[1] = 0x7ec00000 | (offset >> 9);
+ code[1] |= (size / 4 - 1) << 18;
-#if 0
if (i->perPatch)
- code[0] |= 0x100;
+ code[1] |= 0x4;
if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT)
- code[0] |= 0x200; // yes, TCPs can read from *outputs* of other threads
-#endif
+ code[1] |= 0x8; // yes, TCPs can read from *outputs* of other threads
emitPredicate(i);
void
CodeEmitterGK110::emitEXPORT(const Instruction *i)
{
+ unsigned int size = typeSizeof(i->dType);
uint32_t offset = i->src(0).get()->reg.data.offset;
code[0] = 0x00000002 | (offset << 23);
code[1] = 0x7f000000 | (offset >> 9);
+ code[1] |= (size / 4 - 1) << 18;
-#if 0
if (i->perPatch)
- code[0] |= 0x100;
-#endif
+ code[1] |= 0x4;
emitPredicate(i);
void
CodeEmitterGK110::emitInterpMode(const Instruction *i)
{
- code[1] |= i->ipa << 21; // TODO: INTERP_SAMPLEID
+ code[1] |= (i->ipa & 0x3) << 21; // TODO: INTERP_SAMPLEID
+ code[1] |= (i->ipa & 0xc) << (19 - 2);
+}
+
+static void
+interpApply(const InterpEntry *entry, uint32_t *code,
+ bool force_persample_interp, bool flatshade)
+{
+ int ipa = entry->ipa;
+ int reg = entry->reg;
+ int loc = entry->loc;
+
+ if (flatshade &&
+ (ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) {
+ ipa = NV50_IR_INTERP_FLAT;
+ reg = 0xff;
+ } else if (force_persample_interp &&
+ (ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
+ (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
+ ipa |= NV50_IR_INTERP_CENTROID;
+ }
+ code[loc + 1] &= ~(0xf << 19);
+ code[loc + 1] |= (ipa & 0x3) << 21;
+ code[loc + 1] |= (ipa & 0xc) << (19 - 2);
+ code[loc + 0] &= ~(0xff << 23);
+ code[loc + 0] |= reg << 23;
}
void
if (i->saturate)
code[1] |= 1 << 18;
- if (i->op == OP_PINTERP)
+ if (i->op == OP_PINTERP) {
srcId(i->src(1), 23);
- else
+ addInterp(i->ipa, SDATA(i->src(1)).id, interpApply);
+ } else {
code[0] |= 0xff << 23;
+ addInterp(i->ipa, 0xff, interpApply);
+ }
srcId(i->src(0).getIndirect(0), 10);
emitInterpMode(i);
switch (i->src(0).getFile()) {
case FILE_MEMORY_GLOBAL: code[1] = 0xe0000000; code[0] = 0x00000000; break;
case FILE_MEMORY_LOCAL: code[1] = 0x7a800000; code[0] = 0x00000002; break;
- case FILE_MEMORY_SHARED: code[1] = 0x7ac00000; code[0] = 0x00000002; break;
+ case FILE_MEMORY_SHARED:
+ code[0] = 0x00000002;
+ if (i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED)
+ code[1] = 0x78400000;
+ else
+ code[1] = 0x7ac00000;
+ break;
default:
assert(!"invalid memory file");
break;
}
- if (i->src(0).getFile() != FILE_MEMORY_GLOBAL)
- offset &= 0xffffff;
-
if (code[0] & 0x2) {
+ offset &= 0xffffff;
emitLoadStoreType(i->dType, 0x33);
if (i->src(0).getFile() == FILE_MEMORY_LOCAL)
emitCachingMode(i->cache, 0x2f);
code[0] |= offset << 23;
code[1] |= offset >> 9;
+ // Unlocked store on shared memory can fail.
+ if (i->src(0).getFile() == FILE_MEMORY_SHARED &&
+ i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED) {
+ assert(i->defExists(0));
+ defId(i->def(0), 32 + 16);
+ }
+
emitPredicate(i);
srcId(i->src(1), 2);
srcId(i->src(0).getIndirect(0), 10);
+ if (i->src(0).getFile() == FILE_MEMORY_GLOBAL &&
+ i->src(0).isIndirect(0) &&
+ i->getIndirect(0, 0)->reg.size == 8)
+ code[1] |= 1 << 23;
}
void
switch (i->src(0).getFile()) {
case FILE_MEMORY_GLOBAL: code[1] = 0xc0000000; code[0] = 0x00000000; break;
case FILE_MEMORY_LOCAL: code[1] = 0x7a000000; code[0] = 0x00000002; break;
- case FILE_MEMORY_SHARED: code[1] = 0x7ac00000; code[0] = 0x00000002; break;
+ case FILE_MEMORY_SHARED:
+ code[0] = 0x00000002;
+ if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED)
+ code[1] = 0x77400000;
+ else
+ code[1] = 0x7a400000;
+ break;
case FILE_MEMORY_CONST:
if (!i->src(0).isIndirect(0) && typeSizeof(i->dType) == 4) {
emitMOV(i);
offset &= 0xffff;
code[0] = 0x00000002;
code[1] = 0x7c800000 | (i->src(0).get()->reg.fileIndex << 7);
+ code[1] |= i->subOp << 15;
break;
default:
assert(!"invalid memory file");
code[0] |= offset << 23;
code[1] |= offset >> 9;
+ // Locked store on shared memory can fail.
+ if (i->src(0).getFile() == FILE_MEMORY_SHARED &&
+ i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) {
+ assert(i->defExists(1));
+ defId(i->def(1), 32 + 16);
+ }
+
emitPredicate(i);
defId(i->def(0), 2);
- srcId(i->src(0).getIndirect(0), 10);
+ if (i->getIndirect(0, 0)) {
+ srcId(i->src(0).getIndirect(0), 10);
+ if (i->getIndirect(0, 0)->reg.size == 8)
+ code[1] |= 1 << 23;
+ } else {
+ code[0] |= 255 << 10;
+ }
}
uint8_t
case SV_VERTEX_COUNT: return 0x10;
case SV_INVOCATION_ID: return 0x11;
case SV_YDIR: return 0x12;
+ case SV_THREAD_KILL: return 0x13;
case SV_TID: return 0x21 + SDATA(ref).sv.index;
case SV_CTAID: return 0x25 + SDATA(ref).sv.index;
case SV_NTID: return 0x29 + SDATA(ref).sv.index;
setImmediate32(i, 0, Modifier(0));
} else
if (i->src(0).getFile() == FILE_PREDICATE) {
- // TODO
+ code[0] = 0x00000002;
+ code[1] = 0x84401c07;
+ emitPredicate(i);
+ defId(i->def(0), 2);
+ srcId(i->src(0), 14);
} else {
emitForm_C(i, 0x24c, 2);
code[1] |= i->lanes << 10;
}
}
+static inline bool
+uses64bitAddress(const Instruction *ldst)
+{
+ return ldst->src(0).getFile() == FILE_MEMORY_GLOBAL &&
+ ldst->src(0).isIndirect(0) &&
+ ldst->getIndirect(0, 0)->reg.size == 8;
+}
+
+void
+CodeEmitterGK110::emitATOM(const Instruction *i)
+{
+ const bool hasDst = i->defExists(0);
+ const bool exch = i->subOp == NV50_IR_SUBOP_ATOM_EXCH;
+
+ code[0] = 0x00000002;
+ if (i->subOp == NV50_IR_SUBOP_ATOM_CAS)
+ code[1] = 0x77800000;
+ else
+ code[1] = 0x68000000;
+
+ switch (i->subOp) {
+ case NV50_IR_SUBOP_ATOM_CAS: break;
+ case NV50_IR_SUBOP_ATOM_EXCH: code[1] |= 0x04000000; break;
+ default: code[1] |= i->subOp << 23; break;
+ }
+
+ switch (i->dType) {
+ case TYPE_U32: break;
+ case TYPE_S32: code[1] |= 0x00100000; break;
+ case TYPE_U64: code[1] |= 0x00200000; break;
+ case TYPE_F32: code[1] |= 0x00300000; break;
+ case TYPE_B128: code[1] |= 0x00400000; break; /* TODO: U128 */
+ case TYPE_S64: code[1] |= 0x00500000; break;
+ default: assert(!"unsupported type"); break;
+ }
+
+ emitPredicate(i);
+
+ /* TODO: cas: check that src regs line up */
+ /* TODO: cas: flip bits if $r255 is used */
+ srcId(i->src(1), 23);
+
+ if (hasDst) {
+ defId(i->def(0), 2);
+ } else
+ if (!exch) {
+ code[0] |= 255 << 2;
+ }
+
+ if (hasDst || !exch) {
+ const int32_t offset = SDATA(i->src(0)).offset;
+ assert(offset < 0x80000 && offset >= -0x80000);
+ code[0] |= (offset & 1) << 31;
+ code[1] |= (offset & 0xffffe) >> 1;
+ } else {
+ srcAddr32(i->src(0), 31);
+ }
+
+ if (i->getIndirect(0, 0)) {
+ srcId(i->getIndirect(0, 0), 10);
+ if (i->getIndirect(0, 0)->reg.size == 8)
+ code[1] |= 1 << 19;
+ } else {
+ code[0] |= 255 << 10;
+ }
+}
+
+void
+CodeEmitterGK110::emitCCTL(const Instruction *i)
+{
+ int32_t offset = SDATA(i->src(0)).offset;
+
+ code[0] = 0x00000002 | (i->subOp << 2);
+
+ if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) {
+ code[1] = 0x7b000000;
+ } else {
+ code[1] = 0x7c000000;
+ offset &= 0xffffff;
+ }
+ code[0] |= offset << 23;
+ code[1] |= offset >> 9;
+
+ if (uses64bitAddress(i))
+ code[1] |= 1 << 23;
+ srcId(i->src(0).getIndirect(0), 10);
+
+ emitPredicate(i);
+}
+
bool
CodeEmitterGK110::emitInstruction(Instruction *insn)
{
case OP_EXPORT:
emitEXPORT(insn);
break;
+ case OP_AFETCH:
+ emitAFETCH(insn);
+ break;
case OP_PFETCH:
emitPFETCH(insn);
break;
break;
case OP_ADD:
case OP_SUB:
- if (isFloatType(insn->dType))
+ if (insn->dType == TYPE_F64)
+ emitDADD(insn);
+ else if (isFloatType(insn->dType))
emitFADD(insn);
else
emitUADD(insn);
break;
case OP_MUL:
- if (isFloatType(insn->dType))
+ if (insn->dType == TYPE_F64)
+ emitDMUL(insn);
+ else if (isFloatType(insn->dType))
emitFMUL(insn);
else
emitIMUL(insn);
break;
case OP_MAD:
case OP_FMA:
- if (isFloatType(insn->dType))
+ if (insn->dType == TYPE_F64)
+ emitDMAD(insn);
+ else if (isFloatType(insn->dType))
emitFMAD(insn);
else
emitIMAD(insn);
case OP_CEIL:
case OP_FLOOR:
case OP_TRUNC:
- case OP_CVT:
case OP_SAT:
emitCVT(insn);
break;
+ case OP_CVT:
+ if (insn->def(0).getFile() == FILE_PREDICATE ||
+ insn->src(0).getFile() == FILE_PREDICATE)
+ emitMOV(insn);
+ else
+ emitCVT(insn);
+ break;
case OP_RSQ:
- emitSFnOp(insn, 5);
+ emitSFnOp(insn, 5 + 2 * insn->subOp);
break;
case OP_RCP:
- emitSFnOp(insn, 4);
+ emitSFnOp(insn, 4 + 2 * insn->subOp);
break;
case OP_LG2:
emitSFnOp(insn, 3);
case OP_TXL:
case OP_TXD:
case OP_TXF:
+ case OP_TXG:
+ case OP_TXLQ:
emitTEX(insn->asTex());
break;
case OP_TXQ:
case OP_TEXBAR:
emitTEXBAR(insn);
break;
+ case OP_PIXLD:
+ emitPIXLD(insn);
+ break;
case OP_BRA:
case OP_CALL:
case OP_PRERET:
case OP_POPCNT:
emitPOPC(insn);
break;
+ case OP_INSBF:
+ emitINSBF(insn);
+ break;
+ case OP_EXTBF:
+ emitEXTBF(insn);
+ break;
+ case OP_BFIND:
+ emitBFIND(insn);
+ break;
case OP_JOIN:
emitNOP(insn);
insn->join = 1;
break;
+ case OP_BAR:
+ emitBAR(insn);
+ break;
+ case OP_MEMBAR:
+ emitMEMBAR(insn);
+ break;
+ case OP_ATOM:
+ emitATOM(insn);
+ break;
+ case OP_CCTL:
+ emitCCTL(insn);
+ break;
+ case OP_VOTE:
+ emitVOTE(insn);
+ break;
case OP_PHI:
case OP_UNION:
case OP_CONSTRAINT:
ERROR("operation should have been lowered\n");
return false;
default:
- ERROR("unknow op\n");
+ ERROR("unknown op: %u\n", insn->op);
return false;
}