void emitMOV();
void emitS2R();
+ void emitCS2R();
void emitF2F();
void emitF2I();
void emitI2F();
void emitIMUL();
void emitIMAD();
void emitISCADD();
+ void emitXMAD();
void emitIMNMX();
void emitICMP();
void emitISET();
void emitTEXs(int);
void emitTEX();
+ void emitTEXS();
void emitTLD();
void emitTLD4();
void emitTXD();
case SV_INVOCATION_ID : id = 0x11; break;
case SV_THREAD_KILL : id = 0x13; break;
case SV_INVOCATION_INFO: id = 0x1d; break;
+ case SV_COMBINED_TID : id = 0x20; break;
case SV_TID : id = 0x21 + val->reg.data.sv.index; break;
case SV_CTAID : id = 0x25 + val->reg.data.sv.index; break;
+ case SV_LANEMASK_EQ : id = 0x38; break;
+ case SV_LANEMASK_LT : id = 0x39; break;
+ case SV_LANEMASK_LE : id = 0x3a; break;
+ case SV_LANEMASK_GT : id = 0x3b; break;
+ case SV_LANEMASK_GE : id = 0x3c; break;
case SV_CLOCK : id = 0x50 + val->reg.data.sv.index; break;
default:
assert(!"invalid system value");
{
if (ref.getFile() == FILE_IMMEDIATE) {
const ImmediateValue *imm = ref.get()->asImm();
- if (isFloatType(insn->sType)) {
- if ((imm->reg.data.u32 & 0x00000fff) != 0x00000000)
- return true;
- } else {
- if ((imm->reg.data.u32 & 0xfff00000) != 0x00000000 &&
- (imm->reg.data.u32 & 0xfff00000) != 0xfff00000)
- return true;
- }
+ if (isFloatType(insn->sType))
+ return imm->reg.data.u32 & 0xfff;
+ else
+ return imm->reg.data.s32 > 0x7ffff || imm->reg.data.s32 < -0x80000;
}
return false;
}
} else if (insn->sType == TYPE_F64) {
assert(!(imm->reg.data.u64 & 0x00000fffffffffffULL));
val = imm->reg.data.u64 >> 44;
+ } else {
+ assert(!(val & 0xfff80000) || (val & 0xfff80000) == 0xfff80000);
}
- assert(!(val & 0xfff00000) || (val & 0xfff00000) == 0xfff00000);
emitField( 56, 1, (val & 0x80000) >> 19);
emitField(pos, len, (val & 0x7ffff));
} else {
emitGPR (0x00, insn->def(0));
}
+void
+CodeEmitterGM107::emitCS2R()
+{
+ emitInsn(0x50c80000);
+ emitSYS (0x14, insn->src(0));
+ emitGPR (0x00, insn->def(0));
+}
+
void
CodeEmitterGM107::emitF2F()
{
case OP_LG2: mufu = 3; break;
case OP_RCP: mufu = 4 + 2 * insn->subOp; break;
case OP_RSQ: mufu = 5 + 2 * insn->subOp; break;
+ case OP_SQRT: mufu = 8; break;
default:
assert(!"invalid mufu");
break;
emitSAT (0x32);
emitNEG (0x30, insn->src(0));
emitABS (0x2e, insn->src(0));
- emitField(0x14, 3, mufu);
+ emitField(0x14, 4, mufu);
emitGPR (0x08, insn->src(0));
emitGPR (0x00, insn->def(0));
}
break;
}
- if (insn->src(1).getFile() != FILE_IMMEDIATE) {
+ if (!longIMMD(insn->src(1))) {
switch (insn->src(1).getFile()) {
case FILE_GPR:
emitInsn(0x5c400000);
void
CodeEmitterGM107::emitIADD()
{
- if (insn->src(1).getFile() != FILE_IMMEDIATE) {
+ if (!longIMMD(insn->src(1))) {
switch (insn->src(1).getFile()) {
case FILE_GPR:
emitInsn(0x5c100000);
void
CodeEmitterGM107::emitIMUL()
{
- if (insn->src(1).getFile() != FILE_IMMEDIATE) {
+ if (!longIMMD(insn->src(1))) {
switch (insn->src(1).getFile()) {
case FILE_GPR:
emitInsn(0x5c380000);
void
CodeEmitterGM107::emitISCADD()
{
+ assert(insn->src(1).get()->asImm());
+
switch (insn->src(2).getFile()) {
case FILE_GPR:
emitInsn(0x5c180000);
emitGPR (0x00, insn->def(0));
}
+void
+CodeEmitterGM107::emitXMAD()
+{
+ assert(insn->src(0).getFile() == FILE_GPR);
+
+ bool constbuf = false;
+ bool psl_mrg = true;
+ bool immediate = false;
+ if (insn->src(2).getFile() == FILE_MEMORY_CONST) {
+ assert(insn->src(1).getFile() == FILE_GPR);
+ constbuf = true;
+ psl_mrg = false;
+ emitInsn(0x51000000);
+ emitGPR(0x27, insn->src(1));
+ emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
+ } else if (insn->src(1).getFile() == FILE_MEMORY_CONST) {
+ assert(insn->src(2).getFile() == FILE_GPR);
+ constbuf = true;
+ emitInsn(0x4e000000);
+ emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
+ emitGPR(0x27, insn->src(2));
+ } else if (insn->src(1).getFile() == FILE_IMMEDIATE) {
+ assert(insn->src(2).getFile() == FILE_GPR);
+ assert(!(insn->subOp & NV50_IR_SUBOP_XMAD_H1(1)));
+ immediate = true;
+ emitInsn(0x36000000);
+ emitIMMD(0x14, 16, insn->src(1));
+ emitGPR(0x27, insn->src(2));
+ } else {
+ assert(insn->src(1).getFile() == FILE_GPR);
+ assert(insn->src(2).getFile() == FILE_GPR);
+ emitInsn(0x5b000000);
+ emitGPR(0x14, insn->src(1));
+ emitGPR(0x27, insn->src(2));
+ }
+
+ if (psl_mrg)
+ emitField(constbuf ? 0x37 : 0x24, 2, insn->subOp & 0x3);
+
+ unsigned cmode = (insn->subOp & NV50_IR_SUBOP_XMAD_CMODE_MASK);
+ cmode >>= NV50_IR_SUBOP_XMAD_CMODE_SHIFT;
+ emitField(0x32, constbuf ? 2 : 3, cmode);
+
+ emitX(constbuf ? 0x36 : 0x26);
+ emitCC(0x2f);
+
+ emitGPR(0x0, insn->def(0));
+ emitGPR(0x8, insn->src(0));
+
+ // source flags
+ if (isSignedType(insn->sType)) {
+ uint16_t h1s = insn->subOp & NV50_IR_SUBOP_XMAD_H1_MASK;
+ emitField(0x30, 2, h1s >> NV50_IR_SUBOP_XMAD_H1_SHIFT);
+ }
+ emitField(0x35, 1, insn->subOp & NV50_IR_SUBOP_XMAD_H1(0) ? 1 : 0);
+ if (!immediate) {
+ bool h1 = insn->subOp & NV50_IR_SUBOP_XMAD_H1(1);
+ emitField(constbuf ? 0x34 : 0x23, 1, h1);
+ }
+}
+
void
CodeEmitterGM107::emitIMNMX()
{
emitGPR(pos);
}
+static uint8_t
+getTEXSMask(uint8_t mask)
+{
+ switch (mask) {
+ case 0x1: return 0x0;
+ case 0x2: return 0x1;
+ case 0x3: return 0x4;
+ case 0x4: return 0x2;
+ case 0x7: return 0x0;
+ case 0x8: return 0x3;
+ case 0x9: return 0x5;
+ case 0xa: return 0x6;
+ case 0xb: return 0x1;
+ case 0xc: return 0x7;
+ case 0xd: return 0x2;
+ case 0xe: return 0x3;
+ case 0xf: return 0x4;
+ default:
+ assert(!"invalid mask");
+ return 0;
+ }
+}
+
+static uint8_t
+getTEXSTarget(const TexInstruction *tex)
+{
+ assert(tex->op == OP_TEX || tex->op == OP_TXL);
+
+ switch (tex->tex.target.getEnum()) {
+ case TEX_TARGET_1D:
+ assert(tex->tex.levelZero);
+ return 0x0;
+ case TEX_TARGET_2D:
+ case TEX_TARGET_RECT:
+ if (tex->tex.levelZero)
+ return 0x2;
+ if (tex->op == OP_TXL)
+ return 0x3;
+ return 0x1;
+ case TEX_TARGET_2D_SHADOW:
+ case TEX_TARGET_RECT_SHADOW:
+ if (tex->tex.levelZero)
+ return 0x6;
+ if (tex->op == OP_TXL)
+ return 0x5;
+ return 0x4;
+ case TEX_TARGET_2D_ARRAY:
+ if (tex->tex.levelZero)
+ return 0x8;
+ return 0x7;
+ case TEX_TARGET_2D_ARRAY_SHADOW:
+ assert(tex->tex.levelZero);
+ return 0x9;
+ case TEX_TARGET_3D:
+ if (tex->tex.levelZero)
+ return 0xb;
+ assert(tex->op != OP_TXL);
+ return 0xa;
+ case TEX_TARGET_CUBE:
+ assert(!tex->tex.levelZero);
+ if (tex->op == OP_TXL)
+ return 0xd;
+ return 0xc;
+ default:
+ assert(false);
+ return 0x0;
+ }
+}
+
+static uint8_t
+getTLDSTarget(const TexInstruction *tex)
+{
+ switch (tex->tex.target.getEnum()) {
+ case TEX_TARGET_1D:
+ if (tex->tex.levelZero)
+ return 0x0;
+ return 0x1;
+ case TEX_TARGET_2D:
+ case TEX_TARGET_RECT:
+ if (tex->tex.levelZero)
+ return tex->tex.useOffsets ? 0x4 : 0x2;
+ return tex->tex.useOffsets ? 0xc : 0x5;
+ case TEX_TARGET_2D_MS:
+ assert(tex->tex.levelZero);
+ return 0x6;
+ case TEX_TARGET_3D:
+ assert(tex->tex.levelZero);
+ return 0x7;
+ case TEX_TARGET_2D_ARRAY:
+ assert(tex->tex.levelZero);
+ return 0x8;
+
+ default:
+ assert(false);
+ return 0x0;
+ }
+}
+
void
CodeEmitterGM107::emitTEX()
{
emitGPR (0x00, insn->def(0));
}
+void
+CodeEmitterGM107::emitTEXS()
+{
+ const TexInstruction *insn = this->insn->asTex();
+ assert(!insn->tex.derivAll);
+
+ switch (insn->op) {
+ case OP_TEX:
+ case OP_TXL:
+ emitInsn (0xd8000000);
+ emitField(0x35, 4, getTEXSTarget(insn));
+ emitField(0x32, 3, getTEXSMask(insn->tex.mask));
+ break;
+ case OP_TXF:
+ emitInsn (0xda000000);
+ emitField(0x35, 4, getTLDSTarget(insn));
+ emitField(0x32, 3, getTEXSMask(insn->tex.mask));
+ break;
+ case OP_TXG:
+ assert(insn->tex.useOffsets != 4);
+ emitInsn (0xdf000000);
+ emitField(0x34, 2, insn->tex.gatherComp);
+ emitField(0x33, 1, insn->tex.useOffsets == 1);
+ emitField(0x32, 1, insn->tex.target.isShadow());
+ break;
+ default:
+ unreachable("unknown op in emitTEXS()");
+ break;
+ }
+
+ emitField(0x31, 1, insn->tex.liveOnly);
+ emitField(0x24, 13, insn->tex.r);
+ if (insn->defExists(1))
+ emitGPR(0x1c, insn->def(1));
+ else
+ emitGPR(0x1c);
+ if (insn->srcExists(1))
+ emitGPR(0x14, insn->getSrc(1));
+ else
+ emitGPR(0x14);
+ emitGPR (0x08, insn->src(0));
+ emitGPR (0x00, insn->def(0));
+}
+
void
CodeEmitterGM107::emitTLD()
{
emitMOV();
break;
case OP_RDSV:
- emitS2R();
+ if (targGM107->isCS2RSV(insn->getSrc(0)->reg.data.sv.sv))
+ emitCS2R();
+ else
+ emitS2R();
break;
case OP_ABS:
case OP_NEG:
case OP_SHLADD:
emitISCADD();
break;
+ case OP_XMAD:
+ emitXMAD();
+ break;
case OP_MIN:
case OP_MAX:
if (isFloatType(insn->dType)) {
case OP_LG2:
case OP_RCP:
case OP_RSQ:
+ case OP_SQRT:
emitMUFU();
break;
case OP_AND:
emitPIXLD();
break;
case OP_TEX:
- case OP_TXB:
case OP_TXL:
+ if (insn->asTex()->tex.scalar)
+ emitTEXS();
+ else
+ emitTEX();
+ break;
+ case OP_TXB:
emitTEX();
break;
case OP_TXF:
- emitTLD();
+ if (insn->asTex()->tex.scalar)
+ emitTEXS();
+ else
+ emitTLD();
break;
case OP_TXG:
- emitTLD4();
+ if (insn->asTex()->tex.scalar)
+ emitTEXS();
+ else
+ emitTLD4();
break;
case OP_TXD:
emitTXD();
bool insertBarriers(BasicBlock *);
+ bool doesInsnWriteTo(const Instruction *insn, const Value *val) const;
Instruction *findFirstUse(const Instruction *) const;
Instruction *findFirstDef(const Instruction *) const;
for (int d = 0; insn->defExists(d); ++d) {
if (insn->def(d).getFile() == FILE_GPR ||
+ insn->def(d).getFile() == FILE_FLAGS ||
insn->def(d).getFile() == FILE_PREDICATE)
return true;
}
return false;
}
-// Find the next instruction inside the same basic block which uses the output
-// of the given instruction in order to avoid RaW hazards.
+// Helper function for findFirstUse() and findFirstDef()
+bool
+SchedDataCalculatorGM107::doesInsnWriteTo(const Instruction *insn,
+ const Value *val) const
+{
+ if (val->reg.file != FILE_GPR &&
+ val->reg.file != FILE_PREDICATE &&
+ val->reg.file != FILE_FLAGS)
+ return false;
+
+ for (int d = 0; insn->defExists(d); ++d) {
+ const Value* def = insn->getDef(d);
+ int minGPR = def->reg.data.id;
+ int maxGPR = minGPR + def->reg.size / 4 - 1;
+
+ if (def->reg.file != val->reg.file)
+ continue;
+
+ if (def->reg.file == FILE_GPR) {
+ if (val->reg.data.id + val->reg.size / 4 - 1 < minGPR ||
+ val->reg.data.id > maxGPR)
+ continue;
+ return true;
+ } else
+ if (def->reg.file == FILE_PREDICATE) {
+ if (val->reg.data.id != minGPR)
+ continue;
+ return true;
+ } else
+ if (def->reg.file == FILE_FLAGS) {
+ if (val->reg.data.id != minGPR)
+ continue;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+// Find the next instruction inside the same basic block which uses (reads or
+// writes from) the output of the given instruction in order to avoid RaW and
+// WaW hazards.
Instruction *
SchedDataCalculatorGM107::findFirstUse(const Instruction *bari) const
{
Instruction *insn, *next;
- int minGPR, maxGPR;
if (!bari->defExists(0))
return NULL;
- minGPR = bari->def(0).rep()->reg.data.id;
- maxGPR = minGPR + bari->def(0).rep()->reg.size / 4 - 1;
-
for (insn = bari->next; insn != NULL; insn = next) {
next = insn->next;
- for (int s = 0; insn->srcExists(s); ++s) {
- const Value *src = insn->src(s).rep();
- if (bari->def(0).getFile() == FILE_GPR) {
- if (insn->src(s).getFile() != FILE_GPR ||
- src->reg.data.id + src->reg.size / 4 - 1 < minGPR ||
- src->reg.data.id > maxGPR)
- continue;
+ for (int s = 0; insn->srcExists(s); ++s)
+ if (doesInsnWriteTo(bari, insn->getSrc(s)))
return insn;
- } else
- if (bari->def(0).getFile() == FILE_PREDICATE) {
- if (insn->src(s).getFile() != FILE_PREDICATE ||
- src->reg.data.id != minGPR)
- continue;
+
+ for (int d = 0; insn->defExists(d); ++d)
+ if (doesInsnWriteTo(bari, insn->getDef(d)))
return insn;
- }
- }
}
return NULL;
}
SchedDataCalculatorGM107::findFirstDef(const Instruction *bari) const
{
Instruction *insn, *next;
- int minGPR, maxGPR;
+
+ if (!bari->srcExists(0))
+ return NULL;
for (insn = bari->next; insn != NULL; insn = next) {
next = insn->next;
- for (int d = 0; insn->defExists(d); ++d) {
- const Value *def = insn->def(d).rep();
- if (insn->def(d).getFile() != FILE_GPR)
- continue;
-
- minGPR = def->reg.data.id;
- maxGPR = minGPR + def->reg.size / 4 - 1;
-
- for (int s = 0; bari->srcExists(s); ++s) {
- const Value *src = bari->src(s).rep();
- if (bari->src(s).getFile() != FILE_GPR ||
- src->reg.data.id + src->reg.size / 4 - 1 < minGPR ||
- src->reg.data.id > maxGPR)
- continue;
+ for (int s = 0; bari->srcExists(s); ++s)
+ if (doesInsnWriteTo(insn, bari->getSrc(s)))
return insn;
- }
- }
}
return NULL;
}
if (need_wr_bar) {
// When the instruction requires to emit a write dependency barrier
// (all which write something at a variable latency), find the next
- // instruction which reads the outputs.
+ // instruction which reads the outputs (or writes to them, potentially
+ // completing before this insn.
usei = findFirstUse(insn);
// Allocate and emit a new barrier.