X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fnouveau%2Fcodegen%2Fnv50_ir_ra.cpp;h=2d3486ba2bc2dbf903f4be3e9cc627b909bfde14;hb=3abe68b8282496688186157b51da5600ac540906;hp=7859c8e79bd6a220afb7bf1264e9a8a439918b90;hpb=d31005e3e5588b20760c774f14ac0ea80375a181;p=mesa.git diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp index 7859c8e79bd..2d3486ba2bc 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp @@ -23,6 +23,7 @@ #include "codegen/nv50_ir.h" #include "codegen/nv50_ir_target.h" +#include #include #include #if __cplusplus >= 201103L @@ -100,7 +101,9 @@ public: return (size < 4) ? u : ((u << unit[f]) / 4); } - void print() const; + void print(DataFile f) const; + + const bool restrictedGPR16Range; private: BitSet bits[LAST_REGISTER_FILE + 1]; @@ -109,8 +112,6 @@ private: int last[LAST_REGISTER_FILE + 1]; int fill[LAST_REGISTER_FILE + 1]; - - const bool restrictedGPR16Range; }; void @@ -155,10 +156,10 @@ RegisterSet::intersect(DataFile f, const RegisterSet *set) } void -RegisterSet::print() const +RegisterSet::print(DataFile f) const { INFO("GPR:"); - bits[FILE_GPR].print(); + bits[f].print(); INFO("\n"); } @@ -839,6 +840,32 @@ GCRA::printNodeInfo() const } } +static bool +isShortRegOp(Instruction *insn) +{ + // Immediates are always in src1. Every other situation can be resolved by + // using a long encoding. + return insn->srcExists(1) && insn->src(1).getFile() == FILE_IMMEDIATE; +} + +// Check if this LValue is ever used in an instruction that can't be encoded +// with long registers (i.e. > r63) +static bool +isShortRegVal(LValue *lval) +{ + if (lval->getInsn() == NULL) + return false; + for (Value::DefCIterator def = lval->defs.begin(); + def != lval->defs.end(); ++def) + if (isShortRegOp((*def)->getInsn())) + return true; + for (Value::UseCIterator use = lval->uses.begin(); + use != lval->uses.end(); ++use) + if (isShortRegOp((*use)->getInsn())) + return true; + return false; +} + void GCRA::RIG_Node::init(const RegisterSet& regs, LValue *lval) { @@ -854,7 +881,12 @@ GCRA::RIG_Node::init(const RegisterSet& regs, LValue *lval) weight = std::numeric_limits::infinity(); degree = 0; - degreeLimit = regs.getFileSize(f, lval->reg.size); + int size = regs.getFileSize(f, lval->reg.size); + // On nv50, we lose a bit of gpr encoding when there's an embedded + // immediate. + if (regs.restrictedGPR16Range && f == FILE_GPR && isShortRegVal(lval)) + size /= 2; + degreeLimit = size; degreeLimit -= relDegree[1][colors] - 1; livei.insert(lval->livei); @@ -936,6 +968,8 @@ GCRA::coalesce(ArrayList& insns) case 0xf0: case 0x100: case 0x110: + case 0x120: + case 0x130: ret = doCoalesce(insns, JOIN_MASK_UNION); break; default: @@ -1391,7 +1425,7 @@ GCRA::selectRegisters() continue; LValue *lval = node->getValue(); if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) - regs.print(); + regs.print(node->f); bool ret = regs.assign(node->reg, node->f, node->colors); if (ret) { INFO_DBG(prog->dbgFlags, REG_ALLOC, "assigned reg %i\n", node->reg); @@ -1433,6 +1467,19 @@ GCRA::allocateRegisters(ArrayList& insns) if (lval) { nodes[i].init(regs, lval); RIG.insert(&nodes[i]); + + if (lval->inFile(FILE_GPR) && lval->getInsn() != NULL && + prog->getTarget()->getChipset() < 0xc0) { + Instruction *insn = lval->getInsn(); + if (insn->op == OP_MAD || insn->op == OP_SAD) + // Short encoding only possible if they're all GPRs, no need to + // affect them otherwise. + if (insn->flagsDef < 0 && + insn->src(0).getFile() == FILE_GPR && + insn->src(1).getFile() == FILE_GPR && + insn->src(2).getFile() == FILE_GPR) + nodes[i].addRegPreference(getNode(insn->getSrc(2)->asLValue())); + } } } @@ -1499,6 +1546,9 @@ GCRA::cleanup(const bool success) delete[] nodes; nodes = NULL; + hi.next = hi.prev = &hi; + lo[0].next = lo[0].prev = &lo[0]; + lo[1].next = lo[1].prev = &lo[1]; } Symbol * @@ -1573,14 +1623,34 @@ SpillCodeInserter::spill(Instruction *defi, Value *slot, LValue *lval) Instruction *st; if (slot->reg.file == FILE_MEMORY_LOCAL) { - st = new_Instruction(func, OP_STORE, ty); - st->setSrc(0, slot); - st->setSrc(1, lval); lval->noSpill = 1; + if (ty != TYPE_B96) { + st = new_Instruction(func, OP_STORE, ty); + st->setSrc(0, slot); + st->setSrc(1, lval); + } else { + st = new_Instruction(func, OP_SPLIT, ty); + st->setSrc(0, lval); + for (int d = 0; d < lval->reg.size / 4; ++d) + st->setDef(d, new_LValue(func, FILE_GPR)); + + for (int d = lval->reg.size / 4 - 1; d >= 0; --d) { + Value *tmp = cloneShallow(func, slot); + tmp->reg.size = 4; + tmp->reg.data.offset += 4 * d; + + Instruction *s = new_Instruction(func, OP_STORE, TYPE_U32); + s->setSrc(0, tmp); + s->setSrc(1, st->getDef(d)); + defi->bb->insertAfter(defi, s); + } + } } else { st = new_Instruction(func, OP_CVT, ty); st->setDef(0, slot); st->setSrc(0, lval); + if (lval->reg.file == FILE_FLAGS) + st->flagsSrc = 0; } defi->bb->insertAfter(defi, st); } @@ -1596,17 +1666,46 @@ SpillCodeInserter::unspill(Instruction *usei, LValue *lval, Value *slot) Instruction *ld; if (slot->reg.file == FILE_MEMORY_LOCAL) { lval->noSpill = 1; - ld = new_Instruction(func, OP_LOAD, ty); + if (ty != TYPE_B96) { + ld = new_Instruction(func, OP_LOAD, ty); + } else { + ld = new_Instruction(func, OP_MERGE, ty); + for (int d = 0; d < lval->reg.size / 4; ++d) { + Value *tmp = cloneShallow(func, slot); + LValue *val; + tmp->reg.size = 4; + tmp->reg.data.offset += 4 * d; + + Instruction *l = new_Instruction(func, OP_LOAD, TYPE_U32); + l->setDef(0, (val = new_LValue(func, FILE_GPR))); + l->setSrc(0, tmp); + usei->bb->insertBefore(usei, l); + ld->setSrc(d, val); + val->noSpill = 1; + } + ld->setDef(0, lval); + usei->bb->insertBefore(usei, ld); + return lval; + } } else { ld = new_Instruction(func, OP_CVT, ty); } ld->setDef(0, lval); ld->setSrc(0, slot); + if (lval->reg.file == FILE_FLAGS) + ld->flagsDef = 0; usei->bb->insertBefore(usei, ld); return lval; } +static bool +value_cmp(ValueRef *a, ValueRef *b) { + Instruction *ai = a->getInsn(), *bi = b->getInsn(); + if (ai->bb != bi->bb) + return ai->bb->getId() < bi->bb->getId(); + return ai->serial < bi->serial; +} // For each value that is to be spilled, go through all its definitions. // A value can have multiple definitions if it has been coalesced before. @@ -1640,18 +1739,25 @@ SpillCodeInserter::run(const std::list& lst) LValue *dval = (*d)->get()->asLValue(); Instruction *defi = (*d)->getInsn(); + // Sort all the uses by BB/instruction so that we don't unspill + // multiple times in a row, and also remove a source of + // non-determinism. + std::vector refs(dval->uses.begin(), dval->uses.end()); + std::sort(refs.begin(), refs.end(), value_cmp); + // Unspill at each use *before* inserting spill instructions, // we don't want to have the spill instructions in the use list here. - while (!dval->uses.empty()) { - ValueRef *u = *dval->uses.begin(); + for (std::vector::const_iterator it = refs.begin(); + it != refs.end(); ++it) { + ValueRef *u = *it; Instruction *usei = u->getInsn(); assert(usei); if (usei->isPseudo()) { tmp = (slot->reg.file == FILE_MEMORY_LOCAL) ? NULL : slot; last = NULL; - } else - if (!last || usei != last->next) { // TODO: sort uses - tmp = unspill(usei, dval, slot); + } else { + if (!last || (usei != last->next && usei != last)) + tmp = unspill(usei, dval, slot); last = usei; } u->set(tmp); @@ -1968,14 +2074,9 @@ RegAlloc::InsertConstraintsPass::condenseSrcs(Instruction *insn, merge->setDef(0, lval); for (int s = a, i = 0; s <= b; ++s, ++i) { merge->setSrc(i, insn->getSrc(s)); - insn->setSrc(s, NULL); } + insn->moveSources(b + 1, a - b); insn->setSrc(a, lval); - - for (int k = a + 1, s = b + 1; insn->srcExists(s); ++s, ++k) { - insn->setSrc(k, insn->getSrc(s)); - insn->setSrc(s, NULL); - } insn->bb->insertBefore(insn, merge); insn->putExtraSources(0, save); @@ -1992,8 +2093,29 @@ RegAlloc::InsertConstraintsPass::texConstraintGM107(TexInstruction *tex) textureMask(tex); condenseDefs(tex); - if (tex->op == OP_SUSTB || tex->op == OP_SUSTP) { - condenseSrcs(tex, 3, (3 + typeSizeof(tex->dType) / 4) - 1); + if (isSurfaceOp(tex->op)) { + int s = tex->tex.target.getDim() + + (tex->tex.target.isArray() || tex->tex.target.isCube()); + int n = 0; + + switch (tex->op) { + case OP_SUSTB: + case OP_SUSTP: + n = 4; + break; + case OP_SUREDB: + case OP_SUREDP: + if (tex->subOp == NV50_IR_SUBOP_ATOM_CAS) + n = 2; + break; + default: + break; + } + + if (s > 1) + condenseSrcs(tex, 0, s - 1); + if (n > 1) + condenseSrcs(tex, 1, n); // do not condense the tex handle } else if (isTextureOp(tex->op)) { if (tex->op != OP_TXQ) { @@ -2026,7 +2148,7 @@ RegAlloc::InsertConstraintsPass::texConstraintNVE0(TexInstruction *tex) condenseDefs(tex); if (tex->op == OP_SUSTB || tex->op == OP_SUSTP) { - condenseSrcs(tex, 3, (3 + typeSizeof(tex->dType) / 4) - 1); + condenseSrcs(tex, 3, 6); } else if (isTextureOp(tex->op)) { int n = tex->srcCount(0xff, true); @@ -2046,11 +2168,18 @@ RegAlloc::InsertConstraintsPass::texConstraintNVC0(TexInstruction *tex) { int n, s; - textureMask(tex); + if (isTextureOp(tex->op)) + textureMask(tex); if (tex->op == OP_TXQ) { s = tex->srcCount(0xff); n = 0; + } else if (isSurfaceOp(tex->op)) { + s = tex->tex.target.getDim() + (tex->tex.target.isArray() || tex->tex.target.isCube()); + if (tex->op == OP_SUSTB || tex->op == OP_SUSTP) + n = 4; + else + n = 0; } else { s = tex->tex.target.getArgCount() - tex->tex.target.isMS(); if (!tex->tex.target.isArray() && @@ -2126,6 +2255,8 @@ RegAlloc::InsertConstraintsPass::visit(BasicBlock *bb) texConstraintNVE0(tex); break; case 0x110: + case 0x120: + case 0x130: texConstraintGM107(tex); break; default: