X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fnouveau%2Fcodegen%2Fnv50_ir_ra.cpp;h=3a0e56e1385d3b1fea8c47bcd4af6fc2e69b0b56;hb=0bd83d04612520ff97e21d41bcc3ad2e68e160df;hp=b32bc13f755e9fad96c0c690df53ecdcdb1501fc;hpb=44260d908062a4771c30ab635dd527f4266dbaec;p=mesa.git diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp index b32bc13f755..3a0e56e1385 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp @@ -37,11 +37,9 @@ namespace nv50_ir { #if __cplusplus >= 201103L using std::hash; using std::unordered_map; -#elif !defined(ANDROID) +#else using std::tr1::hash; using std::tr1::unordered_map; -#else -#error Android release before Lollipop is not supported! #endif #define MAX_REGISTER_FILE_SIZE 256 @@ -101,7 +99,7 @@ public: return (size < 4) ? u : ((u << unit[f]) / 4); } - void print() const; + void print(DataFile f) const; const bool restrictedGPR16Range; @@ -156,10 +154,10 @@ RegisterSet::intersect(DataFile f, const RegisterSet *set) } void -RegisterSet::print() const +RegisterSet::print(DataFile f) const { INFO("GPR:"); - bits[FILE_GPR].print(); + bits[f].print(); INFO("\n"); } @@ -771,7 +769,7 @@ private: bool coalesce(ArrayList&); bool doCoalesce(ArrayList&, unsigned int mask); void calculateSpillWeights(); - void simplify(); + bool simplify(); bool selectRegisters(); void cleanup(const bool success); @@ -853,7 +851,7 @@ isShortRegOp(Instruction *insn) static bool isShortRegVal(LValue *lval) { - if (lval->defs.size() == 0) + if (lval->getInsn() == NULL) return false; for (Value::DefCIterator def = lval->defs.begin(); def != lval->defs.end(); ++def) @@ -968,6 +966,8 @@ GCRA::coalesce(ArrayList& insns) case 0xf0: case 0x100: case 0x110: + case 0x120: + case 0x130: ret = doCoalesce(insns, JOIN_MASK_UNION); break; default: @@ -1303,7 +1303,7 @@ GCRA::simplifyNode(RIG_Node *node) (node->degree < node->degreeLimit) ? "" : "(spill)"); } -void +bool GCRA::simplify() { for (;;) { @@ -1328,11 +1328,11 @@ GCRA::simplify() } if (isinf(bestScore)) { ERROR("no viable spill candidates left\n"); - break; + return false; } simplifyNode(best); } else { - break; + return true; } } } @@ -1423,7 +1423,7 @@ GCRA::selectRegisters() continue; LValue *lval = node->getValue(); if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) - regs.print(); + regs.print(node->f); bool ret = regs.assign(node->reg, node->f, node->colors); if (ret) { INFO_DBG(prog->dbgFlags, REG_ALLOC, "assigned reg %i\n", node->reg); @@ -1466,14 +1466,13 @@ GCRA::allocateRegisters(ArrayList& insns) nodes[i].init(regs, lval); RIG.insert(&nodes[i]); - if (lval->inFile(FILE_GPR) && lval->defs.size() > 0 && + if (lval->inFile(FILE_GPR) && lval->getInsn() != NULL && prog->getTarget()->getChipset() < 0xc0) { Instruction *insn = lval->getInsn(); - if (insn->op == OP_MAD || insn->op == OP_SAD) + if (insn->op == OP_MAD || insn->op == OP_FMA || insn->op == OP_SAD) // Short encoding only possible if they're all GPRs, no need to // affect them otherwise. if (insn->flagsDef < 0 && - isFloatType(insn->dType) && insn->src(0).getFile() == FILE_GPR && insn->src(1).getFile() == FILE_GPR && insn->src(2).getFile() == FILE_GPR) @@ -1492,7 +1491,9 @@ GCRA::allocateRegisters(ArrayList& insns) buildRIG(insns); calculateSpillWeights(); - simplify(); + ret = simplify(); + if (!ret) + goto out; ret = selectRegisters(); if (!ret) { @@ -1545,6 +1546,9 @@ GCRA::cleanup(const bool success) delete[] nodes; nodes = NULL; + hi.next = hi.prev = &hi; + lo[0].next = lo[0].prev = &lo[0]; + lo[1].next = lo[1].prev = &lo[1]; } Symbol * @@ -1899,8 +1903,10 @@ GCRA::resolveSplitsAndMerges() // their registers should be identical. if (v->getInsn()->op == OP_PHI || v->getInsn()->op == OP_UNION) { Instruction *phi = v->getInsn(); - for (int phis = 0; phi->srcExists(phis); ++phis) + for (int phis = 0; phi->srcExists(phis); ++phis) { phi->getSrc(phis)->join = v; + phi->getSrc(phis)->reg.data.id = v->reg.data.id; + } } reg += v->reg.size; } @@ -2070,14 +2076,9 @@ RegAlloc::InsertConstraintsPass::condenseSrcs(Instruction *insn, merge->setDef(0, lval); for (int s = a, i = 0; s <= b; ++s, ++i) { merge->setSrc(i, insn->getSrc(s)); - insn->setSrc(s, NULL); } + insn->moveSources(b + 1, a - b); insn->setSrc(a, lval); - - for (int k = a + 1, s = b + 1; insn->srcExists(s); ++s, ++k) { - insn->setSrc(k, insn->getSrc(s)); - insn->setSrc(s, NULL); - } insn->bb->insertBefore(insn, merge); insn->putExtraSources(0, save); @@ -2094,8 +2095,29 @@ RegAlloc::InsertConstraintsPass::texConstraintGM107(TexInstruction *tex) textureMask(tex); condenseDefs(tex); - if (tex->op == OP_SUSTB || tex->op == OP_SUSTP) { - condenseSrcs(tex, 3, (3 + typeSizeof(tex->dType) / 4) - 1); + if (isSurfaceOp(tex->op)) { + int s = tex->tex.target.getDim() + + (tex->tex.target.isArray() || tex->tex.target.isCube()); + int n = 0; + + switch (tex->op) { + case OP_SUSTB: + case OP_SUSTP: + n = 4; + break; + case OP_SUREDB: + case OP_SUREDP: + if (tex->subOp == NV50_IR_SUBOP_ATOM_CAS) + n = 2; + break; + default: + break; + } + + if (s > 1) + condenseSrcs(tex, 0, s - 1); + if (n > 1) + condenseSrcs(tex, 1, n); // do not condense the tex handle } else if (isTextureOp(tex->op)) { if (tex->op != OP_TXQ) { @@ -2128,7 +2150,7 @@ RegAlloc::InsertConstraintsPass::texConstraintNVE0(TexInstruction *tex) condenseDefs(tex); if (tex->op == OP_SUSTB || tex->op == OP_SUSTP) { - condenseSrcs(tex, 3, (3 + typeSizeof(tex->dType) / 4) - 1); + condenseSrcs(tex, 3, 6); } else if (isTextureOp(tex->op)) { int n = tex->srcCount(0xff, true); @@ -2154,6 +2176,12 @@ RegAlloc::InsertConstraintsPass::texConstraintNVC0(TexInstruction *tex) if (tex->op == OP_TXQ) { s = tex->srcCount(0xff); n = 0; + } else if (isSurfaceOp(tex->op)) { + s = tex->tex.target.getDim() + (tex->tex.target.isArray() || tex->tex.target.isCube()); + if (tex->op == OP_SUSTB || tex->op == OP_SUSTP) + n = 4; + else + n = 0; } else { s = tex->tex.target.getArgCount() - tex->tex.target.isMS(); if (!tex->tex.target.isArray() && @@ -2229,6 +2257,8 @@ RegAlloc::InsertConstraintsPass::visit(BasicBlock *bb) texConstraintNVE0(tex); break; case 0x110: + case 0x120: + case 0x130: texConstraintGM107(tex); break; default: @@ -2301,9 +2331,21 @@ RegAlloc::InsertConstraintsPass::insertConstraintMoves() assert(cst->getSrc(s)->defs.size() == 1); // still SSA Instruction *defi = cst->getSrc(s)->defs.front()->getInsn(); + bool imm = defi->op == OP_MOV && + defi->src(0).getFile() == FILE_IMMEDIATE; + bool load = defi->op == OP_LOAD && + defi->src(0).getFile() == FILE_MEMORY_CONST && + !defi->src(0).isIndirect(0); // catch some cases where don't really need MOVs - if (cst->getSrc(s)->refCount() == 1 && !defi->constrainedDefs()) + if (cst->getSrc(s)->refCount() == 1 && !defi->constrainedDefs()) { + if (imm || load) { + // Move the defi right before the cst. No point in expanding + // the range. + defi->bb->remove(defi); + cst->bb->insertBefore(cst, defi); + } continue; + } LValue *lval = new_LValue(func, cst->src(s).getFile()); lval->reg.size = size; @@ -2311,6 +2353,14 @@ RegAlloc::InsertConstraintsPass::insertConstraintMoves() mov = new_Instruction(func, OP_MOV, typeOfSize(size)); mov->setDef(0, lval); mov->setSrc(0, cst->getSrc(s)); + + if (load) { + mov->op = OP_LOAD; + mov->setSrc(0, defi->getSrc(0)); + } else if (imm) { + mov->setSrc(0, defi->getSrc(0)); + } + cst->setSrc(s, mov->getDef(0)); cst->bb->insertBefore(cst, mov);