void emitPIXLD(const Instruction *);
+ void emitVOTE(const Instruction *);
+
inline void defId(const ValueDef&, const int pos);
inline void defId(const Instruction *, int d, const int pos);
inline void srcId(const ValueRef&, const int pos);
srcId(i->src(s), s ? ((s == 2) ? 49 : s1) : 20);
break;
default:
+ if (i->op == OP_SELP) {
+ // OP_SELP is used to implement shared+atomics on Fermi.
+ assert(s == 2 && i->src(s).getFile() == FILE_PREDICATE);
+ srcId(i->src(s), 49);
+ }
// ignore here, can be predicate or flags, but must not be address
break;
}
// (a OP b) OP c
if (i->predSrc != 2 && i->srcExists(2)) {
code[1] |= subOp << 21;
- srcId(i->src(2), 17);
- if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 20;
+ srcId(i->src(2), 49);
+ if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT)) code[1] |= 1 << 20;
} else {
code[1] |= 0x000e0000;
}
// for 8/16 source types, the byte/word is in subOp. word 1 is
// represented as 2.
- code[1] |= i->subOp << 0x17;
+ if (!isFloatType(i->sType))
+ code[1] |= i->subOp << 0x17;
+ else
+ code[1] |= i->subOp << 0x18;
if (sat)
code[0] |= 0x20;
{
emitForm_A(i, HEX64(20000000, 00000004));
- if (i->cc == CC_NOT_P || i->src(2).mod & Modifier(NV50_IR_MOD_NOT))
+ if (i->src(2).mod & Modifier(NV50_IR_MOD_NOT))
code[1] |= 1 << 20;
}
defId(i->def(0), 14);
srcId(i->src(0), 20);
- srcId(i->srcExists(1) ? i->src(1) : i->src(0), 26);
+ srcId((i->srcExists(1) && i->predSrc != 1) ? i->src(1) : i->src(0), 26);
if (i->op == OP_QUADOP && progType != Program::TYPE_FRAGMENT)
code[0] |= 1 << 9; // dall
} else {
ImmediateValue *imm = i->getSrc(1)->asImm();
assert(imm);
+ assert(imm->reg.data.u32 <= 0xfff);
code[0] |= imm->reg.data.u32 << 26;
code[1] |= imm->reg.data.u32 >> 6;
code[1] |= 0x4000;
switch (i->src(0).getFile()) {
case FILE_MEMORY_GLOBAL: opc = 0x90000000; break;
case FILE_MEMORY_LOCAL: opc = 0xc8000000; break;
- case FILE_MEMORY_SHARED: opc = 0xc9000000; break;
+ case FILE_MEMORY_SHARED:
+ if (i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED) {
+ if (targ->getChipset() >= NVISA_GK104_CHIPSET)
+ opc = 0xb8000000;
+ else
+ opc = 0xcc000000;
+ } else {
+ opc = 0xc9000000;
+ }
+ break;
default:
assert(!"invalid memory file");
opc = 0;
code[0] = 0x00000005;
code[1] = opc;
+ if (targ->getChipset() >= NVISA_GK104_CHIPSET) {
+ // Unlocked store on shared memory can fail.
+ if (i->src(0).getFile() == FILE_MEMORY_SHARED &&
+ i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED) {
+ assert(i->defExists(0));
+ defId(i->def(0), 8);
+ }
+ }
+
setAddressByFile(i->src(0));
srcId(i->src(1), 14);
srcId(i->src(0).getIndirect(0), 20);
switch (i->src(0).getFile()) {
case FILE_MEMORY_GLOBAL: opc = 0x80000000; break;
case FILE_MEMORY_LOCAL: opc = 0xc0000000; break;
- case FILE_MEMORY_SHARED: opc = 0xc1000000; break;
+ case FILE_MEMORY_SHARED:
+ if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) {
+ if (targ->getChipset() >= NVISA_GK104_CHIPSET)
+ opc = 0xa8000000;
+ else
+ opc = 0xc4000000;
+ } else {
+ opc = 0xc1000000;
+ }
+ break;
case FILE_MEMORY_CONST:
if (!i->src(0).isIndirect(0) && typeSizeof(i->dType) == 4) {
emitMOV(i); // not sure if this is any better
}
code[1] = opc;
+ if (i->src(0).getFile() == FILE_MEMORY_SHARED) {
+ if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) {
+ assert(i->defExists(1));
+ if (targ->getChipset() >= NVISA_GK104_CHIPSET)
+ defId(i->def(1), 8);
+ else
+ defId(i->def(1), 32 + 18);
+ }
+ }
+
defId(i->def(0), 14);
setAddressByFile(i->src(0));
code[0] |= 63 << 20;
}
- if (i->subOp == NV50_IR_SUBOP_ATOM_CAS)
- srcId(i->src(2), 32 + 17);
+ if (i->subOp == NV50_IR_SUBOP_ATOM_CAS) {
+ assert(i->src(1).getSize() == 2 * typeSizeof(i->sType));
+ code[1] |= (SDATA(i->src(1)).id + 1) << 17;
+ }
}
void
code[1] |= 0x00e00000;
}
+void
+CodeEmitterNVC0::emitVOTE(const Instruction *i)
+{
+ assert(i->src(0).getFile() == FILE_PREDICATE &&
+ i->def(1).getFile() == FILE_PREDICATE);
+
+ code[0] = 0x00000004 | (i->subOp << 5);
+ code[1] = 0x48000000;
+
+ emitPredicate(i);
+
+ defId(i->def(0), 14);
+ defId(i->def(1), 32 + 22);
+ if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT))
+ code[0] |= 1 << 23;
+ srcId(i->src(0), 20);
+}
+
bool
CodeEmitterNVC0::emitInstruction(Instruction *insn)
{
case OP_CEIL:
case OP_FLOOR:
case OP_TRUNC:
- case OP_CVT:
case OP_SAT:
emitCVT(insn);
break;
+ case OP_CVT:
+ if (insn->def(0).getFile() == FILE_PREDICATE ||
+ insn->src(0).getFile() == FILE_PREDICATE)
+ emitMOV(insn);
+ else
+ emitCVT(insn);
+ break;
case OP_RSQ:
emitSFnOp(insn, 5 + 2 * insn->subOp);
break;
case OP_PIXLD:
emitPIXLD(insn);
break;
+ case OP_VOTE:
+ emitVOTE(insn);
+ break;
case OP_PHI:
case OP_UNION:
case OP_CONSTRAINT: