X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fnv50%2Fcodegen%2Fnv50_ir_ra.cpp;h=e0fea4b933754e6f5e8d3be5b3d47f20c35c1528;hb=c82714c593ac38ea87e061b92d10b34853784723;hp=7e3c44d3b15780f8fb0513d6104ae4451e0f063c;hpb=57594065c30feec9376be9b2132659f7d87362ee;p=mesa.git diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_ra.cpp index 7e3c44d3b15..e0fea4b9337 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_ra.cpp +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_ra.cpp @@ -1,8 +1,30 @@ +/* + * Copyright 2011 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ #include "nv50_ir.h" #include "nv50_ir_target.h" -#include "nv50/nv50_debug.h" +#include +#include namespace nv50_ir { @@ -11,41 +33,77 @@ namespace nv50_ir { class RegisterSet { public: - RegisterSet(); RegisterSet(const Target *); void init(const Target *); - void reset(); // reset allocation status, but not max assigned regs + void reset(DataFile, bool resetMax = false); void periodicMask(DataFile f, uint32_t lock, uint32_t unlock); void intersect(DataFile f, const RegisterSet *); - bool assign(Value **, int nr); - void release(const Value *); + bool assign(int32_t& reg, DataFile f, unsigned int size); + void release(DataFile f, int32_t reg, unsigned int size); + void occupy(DataFile f, int32_t reg, unsigned int size); void occupy(const Value *); + void occupyMask(DataFile f, int32_t reg, uint8_t mask); + bool isOccupied(DataFile f, int32_t reg, unsigned int size) const; + bool testOccupy(const Value *); + bool testOccupy(DataFile f, int32_t reg, unsigned int size); + + inline int getMaxAssigned(DataFile f) const { return fill[f]; } + + inline unsigned int getFileSize(DataFile f, uint8_t regSize) const + { + if (restrictedGPR16Range && f == FILE_GPR && regSize == 2) + return (last[f] + 1) / 2; + return last[f] + 1; + } - int getMaxAssigned(DataFile f) const { return fill[f]; } + inline unsigned int units(DataFile f, unsigned int size) const + { + return size >> unit[f]; + } + // for regs of size >= 4, id is counted in 4-byte words (like nv50/c0 binary) + inline unsigned int idToBytes(const Value *v) const + { + return v->reg.data.id * MIN2(v->reg.size, 4); + } + inline unsigned int idToUnits(const Value *v) const + { + return units(v->reg.file, idToBytes(v)); + } + inline int bytesToId(Value *v, unsigned int bytes) const + { + if (v->reg.size < 4) + return units(v->reg.file, bytes); + return bytes / 4; + } + inline int unitsToId(DataFile f, int u, uint8_t size) const + { + if (u < 0) + return -1; + return (size < 4) ? u : ((u << unit[f]) / 4); + } void print() const; private: - uint32_t bits[FILE_ADDRESS + 1][(MAX_REGISTER_FILE_SIZE + 31) / 32]; + BitSet bits[LAST_REGISTER_FILE + 1]; - int unit[FILE_ADDRESS + 1]; // log2 of allocation granularity + int unit[LAST_REGISTER_FILE + 1]; // log2 of allocation granularity - int last[FILE_ADDRESS + 1]; - int fill[FILE_ADDRESS + 1]; + int last[LAST_REGISTER_FILE + 1]; + int fill[LAST_REGISTER_FILE + 1]; + + const bool restrictedGPR16Range; }; void -RegisterSet::reset() +RegisterSet::reset(DataFile f, bool resetMax) { - memset(bits, 0, sizeof(bits)); -} - -RegisterSet::RegisterSet() -{ - reset(); + bits[f].fill(0); + if (resetMax) + fill[f] = -1; } void @@ -57,115 +115,100 @@ RegisterSet::init(const Target *targ) unit[rf] = targ->getFileUnit(f); fill[rf] = -1; assert(last[rf] < MAX_REGISTER_FILE_SIZE); + bits[rf].allocate(last[rf] + 1, true); } } RegisterSet::RegisterSet(const Target *targ) + : restrictedGPR16Range(targ->getChipset() < 0xc0) { - reset(); init(targ); + for (unsigned int i = 0; i <= LAST_REGISTER_FILE; ++i) + reset(static_cast(i)); } void RegisterSet::periodicMask(DataFile f, uint32_t lock, uint32_t unlock) { - for (int i = 0; i < (last[f] + 31) / 32; ++i) - bits[f][i] = (bits[f][i] | lock) & ~unlock; + bits[f].periodicMask32(lock, unlock); } void RegisterSet::intersect(DataFile f, const RegisterSet *set) { - for (int i = 0; i < (last[f] + 31) / 32; ++i) - bits[f][i] |= set->bits[f][i]; + bits[f] |= set->bits[f]; } void RegisterSet::print() const { INFO("GPR:"); - for (int i = 0; i < (last[FILE_GPR] + 31) / 32; ++i) - INFO(" %08x", bits[FILE_GPR][i]); + bits[FILE_GPR].print(); INFO("\n"); } bool -RegisterSet::assign(Value **def, int nr) +RegisterSet::assign(int32_t& reg, DataFile f, unsigned int size) { - DataFile f = def[0]->reg.file; - int n = nr; - if (n == 3) - n = 4; - int s = (n * def[0]->reg.size) >> unit[f]; - uint32_t m = (1 << s) - 1; - - int id = last[f] + 1; - int i; - - for (i = 0; (i * 32) < last[f]; ++i) { - if (bits[f][i] == 0xffffffff) - continue; - - for (id = 0; id < 32; id += s) - if (!(bits[f][i] & (m << id))) - break; - if (id < 32) - break; - } - id += i * 32; - if (id > last[f]) + reg = bits[f].findFreeRange(size); + if (reg < 0) return false; + fill[f] = MAX2(fill[f], (int32_t)(reg + size - 1)); + return true; +} - bits[f][id / 32] |= m << (id % 32); +bool +RegisterSet::isOccupied(DataFile f, int32_t reg, unsigned int size) const +{ + return bits[f].testRange(reg, size); +} - if (id + (s - 1) > fill[f]) - fill[f] = id + (s - 1); +void +RegisterSet::occupy(const Value *v) +{ + occupy(v->reg.file, idToUnits(v), v->reg.size >> unit[v->reg.file]); +} - for (i = 0; i < nr; ++i, ++id) - if (!def[i]->livei.isEmpty()) // XXX: really increased id if empty ? - def[i]->reg.data.id = id; - return true; +void +RegisterSet::occupyMask(DataFile f, int32_t reg, uint8_t mask) +{ + bits[f].setMask(reg & ~31, static_cast(mask) << (reg % 32)); } void -RegisterSet::occupy(const Value *val) +RegisterSet::occupy(DataFile f, int32_t reg, unsigned int size) { - int id = val->reg.data.id; - if (id < 0) - return; - unsigned int f = val->reg.file; + bits[f].setRange(reg, size); - uint32_t m = (1 << (val->reg.size >> unit[f])) - 1; + INFO_DBG(0, REG_ALLOC, "reg occupy: %u[%i] %u\n", f, reg, size); - INFO_DBG(0, REG_ALLOC, "reg occupy: %u[%i] %x\n", f, id, m); + fill[f] = MAX2(fill[f], (int32_t)(reg + size - 1)); +} - bits[f][id / 32] |= m << (id % 32); +bool +RegisterSet::testOccupy(const Value *v) +{ + return testOccupy(v->reg.file, + idToUnits(v), v->reg.size >> unit[v->reg.file]); +} - if (fill[f] < id) - fill[f] = id; +bool +RegisterSet::testOccupy(DataFile f, int32_t reg, unsigned int size) +{ + if (isOccupied(f, reg, size)) + return false; + occupy(f, reg, size); + return true; } void -RegisterSet::release(const Value *val) +RegisterSet::release(DataFile f, int32_t reg, unsigned int size) { - int id = val->reg.data.id; - if (id < 0) - return; - unsigned int f = val->reg.file; + bits[f].clrRange(reg, size); - uint32_t m = (1 << (val->reg.size >> unit[f])) - 1; - - INFO_DBG(0, REG_ALLOC, "reg release: %u[%i] %x\n", f, id, m); - - bits[f][id / 32] &= ~(m << (id % 32)); + INFO_DBG(0, REG_ALLOC, "reg release: %u[%i] %u\n", f, reg, size); } -#define JOIN_MASK_PHI (1 << 0) -#define JOIN_MASK_UNION (1 << 1) -#define JOIN_MASK_MOV (1 << 2) -#define JOIN_MASK_TEX (1 << 3) -#define JOIN_MASK_CONSTRAINT (1 << 4) - class RegAlloc { public: @@ -174,11 +217,6 @@ public: bool exec(); bool execFunc(); -private: - bool coalesceValues(unsigned int mask); - bool linearScan(); - bool allocateConstrainedValues(); - private: class PhiMovesPass : public Pass { private: @@ -186,6 +224,11 @@ private: inline bool needNewElseBlock(BasicBlock *b, BasicBlock *p); }; + class ArgumentMovesPass : public Pass { + private: + virtual bool visit(BasicBlock *); + }; + class BuildIntervalsPass : public Pass { private: virtual bool visit(BasicBlock *); @@ -201,19 +244,25 @@ private: bool insertConstraintMoves(); + void condenseDefs(Instruction *); + void condenseSrcs(Instruction *, const int first, const int last); + void addHazard(Instruction *i, const ValueRef *src); void textureMask(TexInstruction *); void addConstraint(Instruction *, int s, int n); bool detectConflict(Instruction *, int s); - DLList constrList; + // target specific functions, TODO: put in subclass or Target + void texConstraintNV50(TexInstruction *); + void texConstraintNVC0(TexInstruction *); + void texConstraintNVE0(TexInstruction *); + + std::list constrList; + + const Target *targ; }; bool buildLiveSets(BasicBlock *); - void collectLValues(DLList&, bool assignedOnly); - - void insertOrderedTail(DLList&, Value *); - inline Instruction *insnBySerial(int); private: Program *prog; @@ -225,11 +274,36 @@ private: int sequence; // for manual passes through CFG }; -Instruction * -RegAlloc::insnBySerial(int serial) +typedef std::pair ValuePair; + +class SpillCodeInserter { - return reinterpret_cast(insns.get(serial)); -} +public: + SpillCodeInserter(Function *fn) : func(fn), stackSize(0), stackBase(0) { } + + bool run(const std::list&); + + Symbol *assignSlot(const Interval&, const unsigned int size); + inline int32_t getStackSize() const { return stackSize; } + +private: + Function *func; + + struct SpillSlot + { + Interval occup; + std::list residents; // needed to recalculate occup + Symbol *sym; + int32_t offset; + inline uint8_t size() const { return sym->reg.size; } + }; + std::list slots; + int32_t stackSize; + int32_t stackBase; + + LValue *unspill(Instruction *usei, LValue *, Value *slot); + void spill(Instruction *defi, Value *slot, LValue *); +}; void RegAlloc::BuildIntervalsPass::addLiveRange(Value *val, @@ -239,7 +313,8 @@ RegAlloc::BuildIntervalsPass::addLiveRange(Value *val, Instruction *insn = val->getUniqueInsn(); if (!insn) - return; + insn = bb->getFirst(); + assert(bb->getFirst()->serial <= bb->getExit()->serial); assert(bb->getExit()->serial + 1 >= end); @@ -282,26 +357,29 @@ RegAlloc::PhiMovesPass::visit(BasicBlock *bb) Instruction *phi, *mov; BasicBlock *pb, *pn; + std::stack stack; + for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) { - pb = pn = BasicBlock::get(ei.getNode()); + pb = BasicBlock::get(ei.getNode()); assert(pb); - - if (needNewElseBlock(bb, pb)) { - pn = new BasicBlock(func); - - // deletes an edge, iterator is invalid after this: - pb->cfg.detach(&bb->cfg); - pb->cfg.attach(&pn->cfg, Graph::Edge::TREE); - pn->cfg.attach(&bb->cfg, Graph::Edge::FORWARD); // XXX: check order ! - - assert(pb->getExit()->op != OP_CALL); - if (pb->getExit()->asFlow()->target.bb == bb) - pb->getExit()->asFlow()->target.bb = pn; - break; - } + if (needNewElseBlock(bb, pb)) + stack.push(pb); + } + while (!stack.empty()) { + pb = stack.top(); + pn = new BasicBlock(func); + stack.pop(); + + pb->cfg.detach(&bb->cfg); + pb->cfg.attach(&pn->cfg, Graph::Edge::TREE); + pn->cfg.attach(&bb->cfg, Graph::Edge::FORWARD); + + assert(pb->getExit()->op != OP_CALL); + if (pb->getExit()->asFlow()->target.bb == bb) + pb->getExit()->asFlow()->target.bb = pn; } - // insert MOVs (phi->src[j] should stem from j-th in-BB) + // insert MOVs (phi->src(j) should stem from j-th in-BB) int j = 0; for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) { pb = BasicBlock::get(ei.getNode()); @@ -323,10 +401,75 @@ RegAlloc::PhiMovesPass::visit(BasicBlock *bb) return true; } +bool +RegAlloc::ArgumentMovesPass::visit(BasicBlock *bb) +{ + // Bind function call inputs/outputs to the same physical register + // the callee uses, inserting moves as appropriate for the case a + // conflict arises. + for (Instruction *i = bb->getEntry(); i; i = i->next) { + FlowInstruction *cal = i->asFlow(); + if (!cal || cal->op != OP_CALL || cal->builtin) + continue; + RegisterSet clobberSet(prog->getTarget()); + + // Bind input values. + for (int s = 0; cal->srcExists(s); ++s) { + LValue *tmp = new_LValue(func, cal->getSrc(s)->asLValue()); + tmp->reg.data.id = cal->target.fn->ins[s].rep()->reg.data.id; + + Instruction *mov = + new_Instruction(func, OP_MOV, typeOfSize(tmp->reg.size)); + mov->setDef(0, tmp); + mov->setSrc(0, cal->getSrc(s)); + cal->setSrc(s, tmp); + + bb->insertBefore(cal, mov); + } + + // Bind output values. + for (int d = 0; cal->defExists(d); ++d) { + LValue *tmp = new_LValue(func, cal->getDef(d)->asLValue()); + tmp->reg.data.id = cal->target.fn->outs[d].rep()->reg.data.id; + + Instruction *mov = + new_Instruction(func, OP_MOV, typeOfSize(tmp->reg.size)); + mov->setSrc(0, tmp); + mov->setDef(0, cal->getDef(d)); + cal->setDef(d, tmp); + + bb->insertAfter(cal, mov); + clobberSet.occupy(tmp); + } + + // Bind clobbered values. + for (std::deque::iterator it = cal->target.fn->clobbers.begin(); + it != cal->target.fn->clobbers.end(); + ++it) { + if (clobberSet.testOccupy(*it)) { + Value *tmp = new_LValue(func, (*it)->asLValue()); + tmp->reg.data.id = (*it)->reg.data.id; + cal->setDef(cal->defCount(), tmp); + } + } + } + + // Update the clobber set of the function. + if (BasicBlock::get(func->cfgExit) == bb) { + func->buildDefSets(); + for (unsigned int i = 0; i < bb->defSet.getSize(); ++i) + if (bb->defSet.test(i)) + func->clobbers.push_back(func->getLValue(i)); + } + + return true; +} + // Build the set of live-in variables of bb. bool RegAlloc::buildLiveSets(BasicBlock *bb) { + Function *f = bb->getFunction(); BasicBlock *bn; Instruction *i; unsigned int s, d; @@ -343,10 +486,10 @@ RegAlloc::buildLiveSets(BasicBlock *bb) if (bn->cfg.visit(sequence)) if (!buildLiveSets(bn)) return false; - if (n++ == 0) - bb->liveSet = bn->liveSet; - else + if (n++ || bb->liveSet.marker) bb->liveSet |= bn->liveSet; + else + bb->liveSet = bn->liveSet; } if (!n && !bb->liveSet.marker) bb->liveSet.fill(0); @@ -360,6 +503,14 @@ RegAlloc::buildLiveSets(BasicBlock *bb) // if (!bb->getEntry()) // return true; + if (bb == BasicBlock::get(f->cfgExit)) { + for (std::deque::iterator it = f->outs.begin(); + it != f->outs.end(); ++it) { + assert(it->get()->asLValue()); + bb->liveSet.set(it->get()->id); + } + } + for (i = bb->getExit(); i && i != bb->getEntry()->prev; i = i->prev) { for (d = 0; i->defExists(d); ++d) bb->liveSet.clr(i->getDef(d)->id); @@ -383,8 +534,6 @@ RegAlloc::BuildIntervalsPass::collectLiveValues(BasicBlock *bb) { BasicBlock *bbA = NULL, *bbB = NULL; - assert(bb->cfg.incidentCount() || bb->liveSet.popCount() == 0); - if (bb->cfg.outgoingCount()) { // trickery to save a loop of OR'ing liveSets // aliasing works fine with BitSet::setOr @@ -421,8 +570,8 @@ RegAlloc::BuildIntervalsPass::visit(BasicBlock *bb) for (Instruction *i = out->getPhi(); i && i->op == OP_PHI; i = i->next) { bb->liveSet.clr(i->getDef(0)->id); - for (int s = 0; s < NV50_IR_MAX_SRCS && i->src[s].exists(); ++s) { - assert(i->src[s].getInsn()); + for (int s = 0; i->srcExists(s); ++s) { + assert(i->src(s).getInsn()); if (i->getSrc(s)->getUniqueInsn()->bb == bb) // XXX: reachableBy ? bb->liveSet.set(i->getSrc(s)->id); else @@ -455,46 +604,385 @@ RegAlloc::BuildIntervalsPass::visit(BasicBlock *bb) } } + if (bb == BasicBlock::get(func->cfg.getRoot())) { + for (std::deque::iterator it = func->ins.begin(); + it != func->ins.end(); ++it) { + if (it->get()->reg.data.id >= 0) // add hazard for fixed regs + it->get()->livei.extend(0, 1); + } + } + + return true; +} + + +#define JOIN_MASK_PHI (1 << 0) +#define JOIN_MASK_UNION (1 << 1) +#define JOIN_MASK_MOV (1 << 2) +#define JOIN_MASK_TEX (1 << 3) + +class GCRA +{ +public: + GCRA(Function *, SpillCodeInserter&); + ~GCRA(); + + bool allocateRegisters(ArrayList& insns); + + void printNodeInfo() const; + +private: + class RIG_Node : public Graph::Node + { + public: + RIG_Node(); + + void init(const RegisterSet&, LValue *); + + void addInterference(RIG_Node *); + void addRegPreference(RIG_Node *); + + inline LValue *getValue() const + { + return reinterpret_cast(data); + } + inline void setValue(LValue *lval) { data = lval; } + + inline uint8_t getCompMask() const + { + return ((1 << colors) - 1) << (reg & 7); + } + + static inline RIG_Node *get(const Graph::EdgeIterator& ei) + { + return static_cast(ei.getNode()); + } + + public: + uint32_t degree; + uint16_t degreeLimit; // if deg < degLimit, node is trivially colourable + uint16_t colors; + + DataFile f; + int32_t reg; + + float weight; + + // list pointers for simplify() phase + RIG_Node *next; + RIG_Node *prev; + + // union of the live intervals of all coalesced values (we want to retain + // the separate intervals for testing interference of compound values) + Interval livei; + + std::list prefRegs; + }; + +private: + inline RIG_Node *getNode(const LValue *v) const { return &nodes[v->id]; } + + void buildRIG(ArrayList&); + bool coalesce(ArrayList&); + bool doCoalesce(ArrayList&, unsigned int mask); + void calculateSpillWeights(); + void simplify(); + bool selectRegisters(); + void cleanup(const bool success); + + void simplifyEdge(RIG_Node *, RIG_Node *); + void simplifyNode(RIG_Node *); + + bool coalesceValues(Value *, Value *, bool force); + void resolveSplitsAndMerges(); + void makeCompound(Instruction *, bool isSplit); + + inline void checkInterference(const RIG_Node *, Graph::EdgeIterator&); + + inline void insertOrderedTail(std::list&, RIG_Node *); + void checkList(std::list&); + +private: + std::stack stack; + + // list headers for simplify() phase + RIG_Node lo[2]; + RIG_Node hi; + + Graph RIG; + RIG_Node *nodes; + unsigned int nodeCount; + + Function *func; + Program *prog; + + static uint8_t relDegree[17][17]; + + RegisterSet regs; + + // need to fixup register id for participants of OP_MERGE/SPLIT + std::list merges; + std::list splits; + + SpillCodeInserter& spill; + std::list mustSpill; +}; + +uint8_t GCRA::relDegree[17][17]; + +GCRA::RIG_Node::RIG_Node() : Node(NULL), next(this), prev(this) +{ + colors = 0; +} + +void +GCRA::printNodeInfo() const +{ + for (unsigned int i = 0; i < nodeCount; ++i) { + if (!nodes[i].colors) + continue; + INFO("RIG_Node[%%%i]($[%u]%i): %u colors, weight %f, deg %u/%u\n X", + i, + nodes[i].f,nodes[i].reg,nodes[i].colors, + nodes[i].weight, + nodes[i].degree, nodes[i].degreeLimit); + + for (Graph::EdgeIterator ei = nodes[i].outgoing(); !ei.end(); ei.next()) + INFO(" %%%i", RIG_Node::get(ei)->getValue()->id); + for (Graph::EdgeIterator ei = nodes[i].incident(); !ei.end(); ei.next()) + INFO(" %%%i", RIG_Node::get(ei)->getValue()->id); + INFO("\n"); + } +} + +void +GCRA::RIG_Node::init(const RegisterSet& regs, LValue *lval) +{ + setValue(lval); + if (lval->reg.data.id >= 0) + lval->noSpill = lval->fixedReg = 1; + + colors = regs.units(lval->reg.file, lval->reg.size); + f = lval->reg.file; + reg = -1; + if (lval->reg.data.id >= 0) + reg = regs.idToUnits(lval); + + weight = std::numeric_limits::infinity(); + degree = 0; + degreeLimit = regs.getFileSize(f, lval->reg.size); + + livei.insert(lval->livei); +} + +bool +GCRA::coalesceValues(Value *dst, Value *src, bool force) +{ + LValue *rep = dst->join->asLValue(); + LValue *val = src->join->asLValue(); + + if (!force && val->reg.data.id >= 0) { + rep = src->join->asLValue(); + val = dst->join->asLValue(); + } + RIG_Node *nRep = &nodes[rep->id]; + RIG_Node *nVal = &nodes[val->id]; + + if (src->reg.file != dst->reg.file) { + if (!force) + return false; + WARN("forced coalescing of values in different files !\n"); + } + if (!force && dst->reg.size != src->reg.size) + return false; + + if ((rep->reg.data.id >= 0) && (rep->reg.data.id != val->reg.data.id)) { + if (force) { + if (val->reg.data.id >= 0) + WARN("forced coalescing of values in different fixed regs !\n"); + } else { + if (val->reg.data.id >= 0) + return false; + // make sure that there is no overlap with the fixed register of rep + for (ArrayList::Iterator it = func->allLValues.iterator(); + !it.end(); it.next()) { + Value *reg = reinterpret_cast(it.get())->asLValue(); + assert(reg); + if (reg->interfers(rep) && reg->livei.overlaps(nVal->livei)) + return false; + } + } + } + + if (!force && nRep->livei.overlaps(nVal->livei)) + return false; + + INFO_DBG(prog->dbgFlags, REG_ALLOC, "joining %%%i($%i) <- %%%i\n", + rep->id, rep->reg.data.id, val->id); + + // set join pointer of all values joined with val + for (Value::DefIterator def = val->defs.begin(); def != val->defs.end(); + ++def) + (*def)->get()->join = rep; + assert(rep->join == rep && val->join == rep); + + // add val's definitions to rep and extend the live interval of its RIG node + rep->defs.insert(rep->defs.end(), val->defs.begin(), val->defs.end()); + nRep->livei.unify(nVal->livei); return true; } bool -RegAlloc::coalesceValues(unsigned int mask) +GCRA::coalesce(ArrayList& insns) +{ + bool ret = doCoalesce(insns, JOIN_MASK_PHI); + if (!ret) + return false; + switch (func->getProgram()->getTarget()->getChipset() & ~0xf) { + case 0x50: + case 0x80: + case 0x90: + case 0xa0: + ret = doCoalesce(insns, JOIN_MASK_UNION | JOIN_MASK_TEX); + break; + case 0xc0: + case 0xd0: + case 0xe0: + ret = doCoalesce(insns, JOIN_MASK_UNION); + break; + default: + break; + } + if (!ret) + return false; + return doCoalesce(insns, JOIN_MASK_MOV); +} + +static inline uint8_t makeCompMask(int compSize, int base, int size) +{ + uint8_t m = ((1 << size) - 1) << base; + + switch (compSize) { + case 1: + return 0xff; + case 2: + m |= (m << 2); + return (m << 4) | m; + case 3: + case 4: + return (m << 4) | m; + default: + assert(compSize <= 8); + return m; + } +} + +// Used when coalescing moves. The non-compound value will become one, e.g.: +// mov b32 $r0 $r2 / merge b64 $r0d { $r0 $r1 } +// split b64 { $r0 $r1 } $r0d / mov b64 $r0d f64 $r2d +static inline void copyCompound(Value *dst, Value *src) +{ + LValue *ldst = dst->asLValue(); + LValue *lsrc = src->asLValue(); + + if (ldst->compound && !lsrc->compound) { + LValue *swap = lsrc; + lsrc = ldst; + ldst = swap; + } + + ldst->compound = lsrc->compound; + ldst->compMask = lsrc->compMask; +} + +void +GCRA::makeCompound(Instruction *insn, bool split) +{ + LValue *rep = (split ? insn->getSrc(0) : insn->getDef(0))->asLValue(); + + if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) { + INFO("makeCompound(split = %i): ", split); + insn->print(); + } + + const unsigned int size = getNode(rep)->colors; + unsigned int base = 0; + + if (!rep->compound) + rep->compMask = 0xff; + rep->compound = 1; + + for (int c = 0; split ? insn->defExists(c) : insn->srcExists(c); ++c) { + LValue *val = (split ? insn->getDef(c) : insn->getSrc(c))->asLValue(); + + val->compound = 1; + if (!val->compMask) + val->compMask = 0xff; + val->compMask &= makeCompMask(size, base, getNode(val)->colors); + assert(val->compMask); + + INFO_DBG(prog->dbgFlags, REG_ALLOC, "compound: %%%i:%02x <- %%%i:%02x\n", + rep->id, rep->compMask, val->id, val->compMask); + + base += getNode(val)->colors; + } + assert(base == size); +} + +bool +GCRA::doCoalesce(ArrayList& insns, unsigned int mask) { int c, n; for (n = 0; n < insns.getSize(); ++n) { Instruction *i; - Instruction *insn = insnBySerial(n); + Instruction *insn = reinterpret_cast(insns.get(n)); switch (insn->op) { case OP_PHI: if (!(mask & JOIN_MASK_PHI)) break; for (c = 0; insn->srcExists(c); ++c) - if (!insn->getDef(0)->coalesce(insn->getSrc(c), false)) { + if (!coalesceValues(insn->getDef(0), insn->getSrc(c), false)) { + // this is bad ERROR("failed to coalesce phi operands\n"); return false; } break; case OP_UNION: + case OP_MERGE: if (!(mask & JOIN_MASK_UNION)) break; for (c = 0; insn->srcExists(c); ++c) - insn->getDef(0)->coalesce(insn->getSrc(c), true); + coalesceValues(insn->getDef(0), insn->getSrc(c), true); + if (insn->op == OP_MERGE) { + merges.push_back(insn); + if (insn->srcExists(1)) + makeCompound(insn, false); + } break; - case OP_CONSTRAINT: - if (!(mask & JOIN_MASK_CONSTRAINT)) + case OP_SPLIT: + if (!(mask & JOIN_MASK_UNION)) break; - for (c = 0; c < 4 && insn->srcExists(c); ++c) - insn->getDef(c)->coalesce(insn->getSrc(c), true); + splits.push_back(insn); + for (c = 0; insn->defExists(c); ++c) + coalesceValues(insn->getSrc(0), insn->getDef(c), true); + makeCompound(insn, true); break; case OP_MOV: if (!(mask & JOIN_MASK_MOV)) break; + i = NULL; + if (!insn->getDef(0)->uses.empty()) + i = insn->getDef(0)->uses.front()->getInsn(); + // if this is a contraint-move there will only be a single use + if (i && i->op == OP_MERGE) // do we really still need this ? + break; i = insn->getSrc(0)->getUniqueInsn(); - if (i && !i->constrainedDefs()) - insn->getDef(0)->coalesce(insn->getSrc(0), false); + if (i && !i->constrainedDefs()) { + if (coalesceValues(insn->getDef(0), insn->getSrc(0), false)) + copyCompound(insn->getSrc(0), insn->getDef(0)); + } break; case OP_TEX: case OP_TXB: @@ -506,8 +994,8 @@ RegAlloc::coalesceValues(unsigned int mask) case OP_TEXCSAA: if (!(mask & JOIN_MASK_TEX)) break; - for (c = 0; c < 4 && insn->srcExists(c); ++c) - insn->getDef(c)->coalesce(insn->getSrc(c), true); + for (c = 0; insn->srcExists(c) && c != insn->predSrc; ++c) + coalesceValues(insn->getDef(c), insn->getSrc(c), true); break; default: break; @@ -517,191 +1005,569 @@ RegAlloc::coalesceValues(unsigned int mask) } void -RegAlloc::insertOrderedTail(DLList &list, Value *val) +GCRA::RIG_Node::addInterference(RIG_Node *node) +{ + this->degree += relDegree[node->colors][colors]; + node->degree += relDegree[colors][node->colors]; + + this->attach(node, Graph::Edge::CROSS); +} + +void +GCRA::RIG_Node::addRegPreference(RIG_Node *node) +{ + prefRegs.push_back(node); +} + +GCRA::GCRA(Function *fn, SpillCodeInserter& spill) : + func(fn), + regs(fn->getProgram()->getTarget()), + spill(spill) +{ + prog = func->getProgram(); + + // initialize relative degrees array - i takes away from j + for (int i = 1; i <= 16; ++i) + for (int j = 1; j <= 16; ++j) + relDegree[i][j] = j * ((i + j - 1) / j); +} + +GCRA::~GCRA() +{ + if (nodes) + delete[] nodes; +} + +void +GCRA::checkList(std::list& lst) +{ + GCRA::RIG_Node *prev = NULL; + + for (std::list::iterator it = lst.begin(); + it != lst.end(); + ++it) { + assert((*it)->getValue()->join == (*it)->getValue()); + if (prev) + assert(prev->livei.begin() <= (*it)->livei.begin()); + prev = *it; + } +} + +void +GCRA::insertOrderedTail(std::list& list, RIG_Node *node) { - // we insert the live intervals in order, so this should be short - DLList::Iterator iter = list.revIterator(); - const int begin = val->livei.begin(); - for (; !iter.end(); iter.next()) { - if (reinterpret_cast(iter.get())->livei.begin() <= begin) + if (node->livei.isEmpty()) + return; + // only the intervals of joined values don't necessarily arrive in order + std::list::iterator prev, it; + for (it = list.end(); it != list.begin(); it = prev) { + prev = it; + --prev; + if ((*prev)->livei.begin() <= node->livei.begin()) break; } - iter.insert(val); + list.insert(it, node); } -static void -checkList(DLList &list) +void +GCRA::buildRIG(ArrayList& insns) { - Value *prev = NULL; - Value *next = NULL; + std::list values, active; + + for (std::deque::iterator it = func->ins.begin(); + it != func->ins.end(); ++it) + insertOrderedTail(values, getNode(it->get()->asLValue())); + + for (int i = 0; i < insns.getSize(); ++i) { + Instruction *insn = reinterpret_cast(insns.get(i)); + for (int d = 0; insn->defExists(d); ++d) + if (insn->getDef(d)->rep() == insn->getDef(d)) + insertOrderedTail(values, getNode(insn->getDef(d)->asLValue())); + } + checkList(values); - for (DLList::Iterator iter = list.iterator(); !iter.end(); iter.next()) { - next = Value::get(iter); - assert(next); - if (prev) { - assert(prev->livei.begin() <= next->livei.begin()); + while (!values.empty()) { + RIG_Node *cur = values.front(); + + for (std::list::iterator it = active.begin(); + it != active.end();) { + RIG_Node *node = *it; + + if (node->livei.end() <= cur->livei.begin()) { + it = active.erase(it); + } else { + if (node->f == cur->f && node->livei.overlaps(cur->livei)) + cur->addInterference(node); + ++it; + } } - assert(next->join == next); - prev = next; + values.pop_front(); + active.push_back(cur); } } void -RegAlloc::collectLValues(DLList &list, bool assignedOnly) +GCRA::calculateSpillWeights() { - for (int n = 0; n < insns.getSize(); ++n) { - Instruction *i = insnBySerial(n); + for (unsigned int i = 0; i < nodeCount; ++i) { + RIG_Node *const n = &nodes[i]; + if (!nodes[i].colors || nodes[i].livei.isEmpty()) + continue; + if (nodes[i].reg >= 0) { + // update max reg + regs.occupy(n->f, n->reg, n->colors); + continue; + } + LValue *val = nodes[i].getValue(); - for (int d = 0; i->defExists(d); ++d) - if (!i->getDef(d)->livei.isEmpty()) - if (!assignedOnly || i->getDef(d)->reg.data.id >= 0) - insertOrderedTail(list, i->getDef(d)); + if (!val->noSpill) { + int rc = 0; + for (Value::DefIterator it = val->defs.begin(); + it != val->defs.end(); + ++it) + rc += (*it)->get()->refCount(); + + nodes[i].weight = + (float)rc * (float)rc / (float)nodes[i].livei.extent(); + } + + if (nodes[i].degree < nodes[i].degreeLimit) { + int l = 0; + if (val->reg.size > 4) + l = 1; + DLLIST_ADDHEAD(&lo[l], &nodes[i]); + } else { + DLLIST_ADDHEAD(&hi, &nodes[i]); + } } - checkList(list); + if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) + printNodeInfo(); } -bool -RegAlloc::allocateConstrainedValues() +void +GCRA::simplifyEdge(RIG_Node *a, RIG_Node *b) { - Value *defs[4]; - RegisterSet regSet[4]; - DLList regVals; + bool move = b->degree >= b->degreeLimit; - INFO_DBG(prog->dbgFlags, REG_ALLOC, "RA: allocating constrained values\n"); + INFO_DBG(prog->dbgFlags, REG_ALLOC, + "edge: (%%%i, deg %u/%u) >-< (%%%i, deg %u/%u)\n", + a->getValue()->id, a->degree, a->degreeLimit, + b->getValue()->id, b->degree, b->degreeLimit); - collectLValues(regVals, true); + b->degree -= relDegree[a->colors][b->colors]; - for (int c = 0; c < 4; ++c) - regSet[c].init(prog->getTarget()); + move = move && b->degree < b->degreeLimit; + if (move && !DLLIST_EMPTY(b)) { + int l = (b->getValue()->reg.size > 4) ? 1 : 0; + DLLIST_DEL(b); + DLLIST_ADDTAIL(&lo[l], b); + } +} - for (int n = 0; n < insns.getSize(); ++n) { - Instruction *i = insnBySerial(n); +void +GCRA::simplifyNode(RIG_Node *node) +{ + for (Graph::EdgeIterator ei = node->outgoing(); !ei.end(); ei.next()) + simplifyEdge(node, RIG_Node::get(ei)); - const int vecSize = i->defCount(0xf); - if (vecSize < 2) - continue; - assert(vecSize <= 4); + for (Graph::EdgeIterator ei = node->incident(); !ei.end(); ei.next()) + simplifyEdge(node, RIG_Node::get(ei)); - for (int c = 0; c < vecSize; ++c) - defs[c] = i->def[c].rep(); + DLLIST_DEL(node); + stack.push(node->getValue()->id); - if (defs[0]->reg.data.id >= 0) { - for (int c = 1; c < vecSize; ++c) { - assert(defs[c]->reg.data.id >= 0); + INFO_DBG(prog->dbgFlags, REG_ALLOC, "SIMPLIFY: pushed %%%i%s\n", + node->getValue()->id, + (node->degree < node->degreeLimit) ? "" : "(spill)"); +} + +void +GCRA::simplify() +{ + for (;;) { + if (!DLLIST_EMPTY(&lo[0])) { + do { + simplifyNode(lo[0].next); + } while (!DLLIST_EMPTY(&lo[0])); + } else + if (!DLLIST_EMPTY(&lo[1])) { + simplifyNode(lo[1].next); + } else + if (!DLLIST_EMPTY(&hi)) { + RIG_Node *best = hi.next; + float bestScore = best->weight / (float)best->degree; + // spill candidate + for (RIG_Node *it = best->next; it != &hi; it = it->next) { + float score = it->weight / (float)it->degree; + if (score < bestScore) { + best = it; + bestScore = score; + } } - continue; + if (isinf(bestScore)) { + ERROR("no viable spill candidates left\n"); + break; + } + simplifyNode(best); + } else { + break; } + } +} + +void +GCRA::checkInterference(const RIG_Node *node, Graph::EdgeIterator& ei) +{ + const RIG_Node *intf = RIG_Node::get(ei); + + if (intf->reg < 0) + return; + const LValue *vA = node->getValue(); + const LValue *vB = intf->getValue(); + + const uint8_t intfMask = ((1 << intf->colors) - 1) << (intf->reg & 7); - for (int c = 0; c < vecSize; ++c) { - uint32_t mask; - regSet[c].reset(); + if (vA->compound | vB->compound) { + // NOTE: this only works for >aligned< register tuples ! + for (Value::DefCIterator D = vA->defs.begin(); D != vA->defs.end(); ++D) { + for (Value::DefCIterator d = vB->defs.begin(); d != vB->defs.end(); ++d) { + const LValue *vD = (*D)->get()->asLValue(); + const LValue *vd = (*d)->get()->asLValue(); - for (DLList::Iterator it = regVals.iterator(); !it.end(); it.next()) { - Value *rVal = Value::get(it); - if (rVal->reg.data.id >= 0 && rVal->livei.overlaps(defs[c]->livei)) - regSet[c].occupy(rVal); + if (!vD->livei.overlaps(vd->livei)) { + INFO_DBG(prog->dbgFlags, REG_ALLOC, "(%%%i) X (%%%i): no overlap\n", + vD->id, vd->id); + continue; + } + + uint8_t mask = vD->compound ? vD->compMask : ~0; + if (vd->compound) { + assert(vB->compound); + mask &= vd->compMask & vB->compMask; + } else { + mask &= intfMask; } - mask = 0x11111111; - if (vecSize == 2) // granularity is 2 instead of 4 - mask |= 0x11111111 << 2; - regSet[c].periodicMask(defs[0]->reg.file, 0, ~(mask << c)); - if (!defs[c]->livei.isEmpty()) - insertOrderedTail(regVals, defs[c]); + INFO_DBG(prog->dbgFlags, REG_ALLOC, + "(%%%i)%02x X (%%%i)%02x & %02x: $r%i.%02x\n", + vD->id, + vD->compound ? vD->compMask : 0xff, + vd->id, + vd->compound ? vd->compMask : intfMask, + vB->compMask, intf->reg & ~7, mask); + if (mask) + regs.occupyMask(node->f, intf->reg & ~7, mask); } - for (int c = 1; c < vecSize; ++c) - regSet[0].intersect(defs[0]->reg.file, ®Set[c]); + } + } else { + INFO_DBG(prog->dbgFlags, REG_ALLOC, + "(%%%i) X (%%%i): $r%i + %u\n", + vA->id, vB->id, intf->reg, intf->colors); + regs.occupy(node->f, intf->reg, intf->colors); + } +} - if (!regSet[0].assign(&defs[0], vecSize)) // TODO: spilling - return false; +bool +GCRA::selectRegisters() +{ + INFO_DBG(prog->dbgFlags, REG_ALLOC, "\nSELECT phase\n"); + + while (!stack.empty()) { + RIG_Node *node = &nodes[stack.top()]; + stack.pop(); + + regs.reset(node->f); + + INFO_DBG(prog->dbgFlags, REG_ALLOC, "\nNODE[%%%i, %u colors]\n", + node->getValue()->id, node->colors); + + for (Graph::EdgeIterator ei = node->outgoing(); !ei.end(); ei.next()) + checkInterference(node, ei); + for (Graph::EdgeIterator ei = node->incident(); !ei.end(); ei.next()) + checkInterference(node, ei); + + if (!node->prefRegs.empty()) { + for (std::list::const_iterator it = node->prefRegs.begin(); + it != node->prefRegs.end(); + ++it) { + if ((*it)->reg >= 0 && + regs.testOccupy(node->f, (*it)->reg, node->colors)) { + node->reg = (*it)->reg; + break; + } + } + } + if (node->reg >= 0) + continue; + LValue *lval = node->getValue(); + if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) + regs.print(); + bool ret = regs.assign(node->reg, node->f, node->colors); + if (ret) { + INFO_DBG(prog->dbgFlags, REG_ALLOC, "assigned reg %i\n", node->reg); + lval->compMask = node->getCompMask(); + } else { + INFO_DBG(prog->dbgFlags, REG_ALLOC, "must spill: %%%i (size %u)\n", + lval->id, lval->reg.size); + Symbol *slot = NULL; + if (lval->reg.file == FILE_GPR) + slot = spill.assignSlot(node->livei, lval->reg.size); + mustSpill.push_back(ValuePair(lval, slot)); + } + } + if (!mustSpill.empty()) + return false; + for (unsigned int i = 0; i < nodeCount; ++i) { + LValue *lval = nodes[i].getValue(); + if (nodes[i].reg >= 0 && nodes[i].colors > 0) + lval->reg.data.id = + regs.unitsToId(nodes[i].f, nodes[i].reg, lval->reg.size); } - for (int c = 0; c < 4; c += 2) - if (regSet[c].getMaxAssigned(FILE_GPR) > prog->maxGPR) - prog->maxGPR = regSet[c].getMaxAssigned(FILE_GPR); return true; } bool -RegAlloc::linearScan() +GCRA::allocateRegisters(ArrayList& insns) { - Value *cur, *val; - DLList unhandled, active, inactive; - RegisterSet f(prog->getTarget()), free(prog->getTarget()); + bool ret; - INFO_DBG(prog->dbgFlags, REG_ALLOC, "RA: linear scan\n"); + INFO_DBG(prog->dbgFlags, REG_ALLOC, + "allocateRegisters to %u instructions\n", insns.getSize()); - collectLValues(unhandled, false); + nodeCount = func->allLValues.getSize(); + nodes = new RIG_Node[nodeCount]; + if (!nodes) + return false; + for (unsigned int i = 0; i < nodeCount; ++i) { + LValue *lval = reinterpret_cast(func->allLValues.get(i)); + if (lval) { + nodes[i].init(regs, lval); + RIG.insert(&nodes[i]); + } + } - for (DLList::Iterator cI = unhandled.iterator(); !cI.end();) { - cur = Value::get(cI); - cI.erase(); + // coalesce first, we use only 1 RIG node for a group of joined values + ret = coalesce(insns); + if (!ret) + goto out; - for (DLList::Iterator aI = active.iterator(); !aI.end();) { - val = Value::get(aI); - if (val->livei.end() <= cur->livei.begin()) { - free.release(val); - aI.erase(); - } else - if (!val->livei.contains(cur->livei.begin())) { - free.release(val); - aI.moveToList(inactive); - } else { - aI.next(); - } + if (func->getProgram()->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) + func->printLiveIntervals(); + + buildRIG(insns); + calculateSpillWeights(); + simplify(); + + ret = selectRegisters(); + if (!ret) { + INFO_DBG(prog->dbgFlags, REG_ALLOC, + "selectRegisters failed, inserting spill code ...\n"); + regs.reset(FILE_GPR, true); + spill.run(mustSpill); + if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) + func->print(); + } else { + prog->maxGPR = std::max(prog->maxGPR, regs.getMaxAssigned(FILE_GPR)); + } + +out: + cleanup(ret); + return ret; +} + +void +GCRA::cleanup(const bool success) +{ + mustSpill.clear(); + + for (ArrayList::Iterator it = func->allLValues.iterator(); + !it.end(); it.next()) { + LValue *lval = reinterpret_cast(it.get()); + + lval->livei.clear(); + + lval->compound = 0; + lval->compMask = 0; + + if (lval->join == lval) + continue; + + if (success) { + lval->reg.data.id = lval->join->reg.data.id; + } else { + for (Value::DefIterator d = lval->defs.begin(); d != lval->defs.end(); + ++d) + lval->join->defs.remove(*d); + lval->join = lval; } + } - for (DLList::Iterator iI = inactive.iterator(); !iI.end();) { - val = Value::get(iI); - if (val->livei.end() <= cur->livei.begin()) { - iI.erase(); - } else - if (val->livei.contains(cur->livei.begin())) { - free.occupy(val); - iI.moveToList(active); - } else { - iI.next(); + if (success) + resolveSplitsAndMerges(); + splits.clear(); // avoid duplicate entries on next coalesce pass + merges.clear(); + + delete[] nodes; + nodes = NULL; +} + +Symbol * +SpillCodeInserter::assignSlot(const Interval &livei, const unsigned int size) +{ + SpillSlot slot; + int32_t offsetBase = stackSize; + int32_t offset; + std::list::iterator pos = slots.end(), it = slots.begin(); + + if (offsetBase % size) + offsetBase += size - (offsetBase % size); + + slot.sym = NULL; + + for (offset = offsetBase; offset < stackSize; offset += size) { + const int32_t entryEnd = offset + size; + while (it != slots.end() && it->offset < offset) + ++it; + if (it == slots.end()) // no slots left + break; + std::list::iterator bgn = it; + + while (it != slots.end() && it->offset < entryEnd) { + it->occup.print(); + if (it->occup.overlaps(livei)) + break; + ++it; + } + if (it == slots.end() || it->offset >= entryEnd) { + // fits + for (; bgn != slots.end() && bgn->offset < entryEnd; ++bgn) { + bgn->occup.insert(livei); + if (bgn->size() == size) + slot.sym = bgn->sym; } + break; } - f = free; + } + if (!slot.sym) { + stackSize = offset + size; + slot.offset = offset; + slot.sym = new_Symbol(func->getProgram(), FILE_MEMORY_LOCAL); + if (!func->stackPtr) + offset += func->tlsBase; + slot.sym->setAddress(NULL, offset); + slot.sym->reg.size = size; + slots.insert(pos, slot)->occup.insert(livei); + } + return slot.sym; +} - for (DLList::Iterator iI = inactive.iterator(); !iI.end(); iI.next()) { - val = Value::get(iI); - if (val->livei.overlaps(cur->livei)) - f.occupy(val); - } +void +SpillCodeInserter::spill(Instruction *defi, Value *slot, LValue *lval) +{ + const DataType ty = typeOfSize(slot->reg.size); + + Instruction *st; + if (slot->reg.file == FILE_MEMORY_LOCAL) { + st = new_Instruction(func, OP_STORE, ty); + st->setSrc(0, slot); + st->setSrc(1, lval); + lval->noSpill = 1; + } else { + st = new_Instruction(func, OP_CVT, ty); + st->setDef(0, slot); + st->setSrc(0, lval); + } + defi->bb->insertAfter(defi, st); +} - for (DLList::Iterator uI = unhandled.iterator(); !uI.end(); uI.next()) { - val = Value::get(uI); - if (val->reg.data.id >= 0 && val->livei.overlaps(cur->livei)) - f.occupy(val); - } +LValue * +SpillCodeInserter::unspill(Instruction *usei, LValue *lval, Value *slot) +{ + const DataType ty = typeOfSize(slot->reg.size); + + lval = cloneShallow(func, lval); + + Instruction *ld; + if (slot->reg.file == FILE_MEMORY_LOCAL) { + lval->noSpill = 1; + ld = new_Instruction(func, OP_LOAD, ty); + } else { + ld = new_Instruction(func, OP_CVT, ty); + } + ld->setDef(0, lval); + ld->setSrc(0, slot); + + usei->bb->insertBefore(usei, ld); + return lval; +} - if (cur->reg.data.id < 0) { - bool spill = !f.assign(&cur, 1); - if (spill) { - ERROR("out of registers of file %u\n", cur->reg.file); - abort(); +bool +SpillCodeInserter::run(const std::list& lst) +{ + for (std::list::const_iterator it = lst.begin(); it != lst.end(); + ++it) { + LValue *lval = it->first->asLValue(); + Symbol *mem = it->second ? it->second->asSym() : NULL; + + for (Value::DefIterator d = lval->defs.begin(); d != lval->defs.end(); + ++d) { + Value *slot = mem ? + static_cast(mem) : new_LValue(func, FILE_GPR); + Value *tmp = NULL; + Instruction *last = NULL; + + LValue *dval = (*d)->get()->asLValue(); + Instruction *defi = (*d)->getInsn(); + + // handle uses first or they'll contain the spill stores + while (!dval->uses.empty()) { + ValueRef *u = dval->uses.front(); + Instruction *usei = u->getInsn(); + assert(usei); + if (usei->op == OP_PHI) { + tmp = (slot->reg.file == FILE_MEMORY_LOCAL) ? NULL : slot; + last = NULL; + } else + if (!last || usei != last->next) { // TODO: sort uses + tmp = unspill(usei, dval, slot); + last = usei; + } + u->set(tmp); + } + + assert(defi); + if (defi->op == OP_PHI) { + d = lval->defs.erase(d); + --d; + if (slot->reg.file == FILE_MEMORY_LOCAL) + delete_Instruction(func->getProgram(), defi); + else + defi->setDef(0, slot); + } else { + spill(defi, slot, dval); } } - free.occupy(cur); - active.insert(cur); + } - if (f.getMaxAssigned(FILE_GPR) > prog->maxGPR) - prog->maxGPR = f.getMaxAssigned(FILE_GPR); - if (free.getMaxAssigned(FILE_GPR) > prog->maxGPR) - prog->maxGPR = free.getMaxAssigned(FILE_GPR); + // TODO: We're not trying to reuse old slots in a potential next iteration. + // We have to update the slots' livei intervals to be able to do that. + stackBase = stackSize; + slots.clear(); return true; } bool RegAlloc::exec() { - for (ArrayList::Iterator fi = prog->allFuncs.iterator(); - !fi.end(); fi.next()) { - func = reinterpret_cast(fi.get()); + for (IteratorRef it = prog->calls.iteratorDFS(false); + !it->end(); it->next()) { + func = Function::get(reinterpret_cast(it->get())); + + func->tlsBase = prog->tlsSize; if (!execFunc()) return false; + prog->tlsSize += func->tlsSize; } return true; } @@ -710,70 +1576,99 @@ bool RegAlloc::execFunc() { InsertConstraintsPass insertConstr; - PhiMovesPass insertMoves; + PhiMovesPass insertPhiMoves; + ArgumentMovesPass insertArgMoves; BuildIntervalsPass buildIntervals; + SpillCodeInserter insertSpills(func); + + GCRA gcra(func, insertSpills); - unsigned int i; + unsigned int i, retries; bool ret; - ret = insertConstr.exec(func); - if (!ret) - goto out; + if (!func->ins.empty()) { + // Insert a nop at the entry so inputs only used by the first instruction + // don't count as having an empty live range. + Instruction *nop = new_Instruction(func, OP_NOP, TYPE_NONE); + BasicBlock::get(func->cfg.getRoot())->insertHead(nop); + } - ret = insertMoves.run(func); + ret = insertConstr.exec(func); if (!ret) goto out; - for (sequence = func->cfg.nextSequence(), i = 0; - ret && i <= func->loopNestingBound; - sequence = func->cfg.nextSequence(), ++i) - ret = buildLiveSets(BasicBlock::get(func->cfg.getRoot())); + ret = insertPhiMoves.run(func); if (!ret) goto out; - func->orderInstructions(this->insns); - - ret = buildIntervals.run(func); + ret = insertArgMoves.run(func); if (!ret) goto out; - ret = coalesceValues(JOIN_MASK_PHI); - if (!ret) - goto out; - switch (prog->getTarget()->getChipset() & 0xf0) { - case 0x50: - ret = coalesceValues(JOIN_MASK_UNION | JOIN_MASK_TEX); - break; - case 0xc0: - ret = coalesceValues(JOIN_MASK_UNION | JOIN_MASK_CONSTRAINT); - break; - default: - break; - } - if (!ret) - goto out; - ret = coalesceValues(JOIN_MASK_MOV); - if (!ret) - goto out; + // TODO: need to fix up spill slot usage ranges to support > 1 retry + for (retries = 0; retries < 3; ++retries) { + if (retries && (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC)) + INFO("Retry: %i\n", retries); + if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) + func->print(); + + // spilling to registers may add live ranges, need to rebuild everything + ret = true; + for (sequence = func->cfg.nextSequence(), i = 0; + ret && i <= func->loopNestingBound; + sequence = func->cfg.nextSequence(), ++i) + ret = buildLiveSets(BasicBlock::get(func->cfg.getRoot())); + if (!ret) + break; + func->orderInstructions(this->insns); - if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) { - func->print(); - func->printLiveIntervals(); + ret = buildIntervals.run(func); + if (!ret) + break; + ret = gcra.allocateRegisters(insns); + if (ret) + break; // success } + INFO_DBG(prog->dbgFlags, REG_ALLOC, "RegAlloc done: %i\n", ret); - ret = allocateConstrainedValues() && linearScan(); - if (!ret) - goto out; - + func->tlsSize = insertSpills.getStackSize(); out: - // TODO: should probably call destructor on LValues later instead - for (ArrayList::Iterator it = func->allLValues.iterator(); - !it.end(); it.next()) - reinterpret_cast(it.get())->livei.clear(); - return ret; } +// TODO: check if modifying Instruction::join here breaks anything +void +GCRA::resolveSplitsAndMerges() +{ + for (std::list::iterator it = splits.begin(); + it != splits.end(); + ++it) { + Instruction *split = *it; + unsigned int reg = regs.idToBytes(split->getSrc(0)); + for (int d = 0; split->defExists(d); ++d) { + Value *v = split->getDef(d); + v->reg.data.id = regs.bytesToId(v, reg); + v->join = v; + reg += v->reg.size; + } + } + splits.clear(); + + for (std::list::iterator it = merges.begin(); + it != merges.end(); + ++it) { + Instruction *merge = *it; + unsigned int reg = regs.idToBytes(merge->getDef(0)); + for (int s = 0; merge->srcExists(s); ++s) { + Value *v = merge->getSrc(s); + v->reg.data.id = regs.bytesToId(v, reg); + v->join = v; + reg += v->reg.size; + } + } + merges.clear(); +} + bool Program::registerAllocation() { RegAlloc ra(this); @@ -810,36 +1705,30 @@ RegAlloc::InsertConstraintsPass::textureMask(TexInstruction *tex) } tex->tex.mask = mask; -#if 0 // reorder or set the unused ones NULL ? - for (c = 0; c < 4; ++c) - if (!(tex->tex.mask & (1 << c))) - def[d++] = tex->getDef(c); -#endif for (c = 0; c < d; ++c) tex->setDef(c, def[c]); -#if 1 for (; c < 4; ++c) tex->setDef(c, NULL); -#endif } bool RegAlloc::InsertConstraintsPass::detectConflict(Instruction *cst, int s) { + Value *v = cst->getSrc(s); + // current register allocation can't handle it if a value participates in // multiple constraints - for (ValueRef::Iterator it = cst->src[s].iterator(); !it.end(); it.next()) { - Instruction *insn = it.get()->getInsn(); - if (insn != cst) + for (Value::UseIterator it = v->uses.begin(); it != v->uses.end(); ++it) { + if (cst != (*it)->getInsn()) return true; } // can start at s + 1 because detectConflict is called on all sources for (int c = s + 1; cst->srcExists(c); ++c) - if (cst->getSrc(c) == cst->getSrc(s)) + if (v == cst->getSrc(c)) return true; - Instruction *defi = cst->getSrc(s)->getInsn(); + Instruction *defi = v->getInsn(); return (!defi || defi->constrainedDefs()); } @@ -851,8 +1740,10 @@ RegAlloc::InsertConstraintsPass::addConstraint(Instruction *i, int s, int n) int d; // first, look for an existing identical constraint op - for (DLList::Iterator it = constrList.iterator(); !it.end(); it.next()) { - cst = reinterpret_cast(it.get()); + for (std::list::iterator it = constrList.begin(); + it != constrList.end(); + ++it) { + cst = (*it); if (!i->bb->dominatedBy(cst->bb)) break; for (d = 0; d < n; ++d) @@ -873,7 +1764,7 @@ RegAlloc::InsertConstraintsPass::addConstraint(Instruction *i, int s, int n) } i->bb->insertBefore(i, cst); - constrList.insert(cst); + constrList.push_back(cst); } // Add a dummy use of the pointer source of >= 8 byte loads after the load @@ -888,6 +1779,143 @@ RegAlloc::InsertConstraintsPass::addHazard(Instruction *i, const ValueRef *src) } +// b32 { %r0 %r1 %r2 %r3 } -> b128 %r0q +void +RegAlloc::InsertConstraintsPass::condenseDefs(Instruction *insn) +{ + uint8_t size = 0; + int n; + for (n = 0; insn->defExists(n) && insn->def(n).getFile() == FILE_GPR; ++n) + size += insn->getDef(n)->reg.size; + if (n < 2) + return; + LValue *lval = new_LValue(func, FILE_GPR); + lval->reg.size = size; + + Instruction *split = new_Instruction(func, OP_SPLIT, typeOfSize(size)); + split->setSrc(0, lval); + for (int d = 0; d < n; ++d) { + split->setDef(d, insn->getDef(d)); + insn->setDef(d, NULL); + } + insn->setDef(0, lval); + + for (int k = 1, d = n; insn->defExists(d); ++d, ++k) { + insn->setDef(k, insn->getDef(d)); + insn->setDef(d, NULL); + } + // carry over predicate if any (mainly for OP_UNION uses) + split->setPredicate(insn->cc, insn->getPredicate()); + + insn->bb->insertAfter(insn, split); + constrList.push_back(split); +} +void +RegAlloc::InsertConstraintsPass::condenseSrcs(Instruction *insn, + const int a, const int b) +{ + uint8_t size = 0; + if (a >= b) + return; + for (int s = a; s <= b; ++s) + size += insn->getSrc(s)->reg.size; + if (!size) + return; + LValue *lval = new_LValue(func, FILE_GPR); + lval->reg.size = size; + + Value *save[3]; + insn->takeExtraSources(0, save); + + Instruction *merge = new_Instruction(func, OP_MERGE, typeOfSize(size)); + merge->setDef(0, lval); + for (int s = a, i = 0; s <= b; ++s, ++i) { + merge->setSrc(i, insn->getSrc(s)); + insn->setSrc(s, NULL); + } + insn->setSrc(a, lval); + + for (int k = a + 1, s = b + 1; insn->srcExists(s); ++s, ++k) { + insn->setSrc(k, insn->getSrc(s)); + insn->setSrc(s, NULL); + } + insn->bb->insertBefore(insn, merge); + + insn->putExtraSources(0, save); + + constrList.push_back(merge); +} + +void +RegAlloc::InsertConstraintsPass::texConstraintNVE0(TexInstruction *tex) +{ + textureMask(tex); + condenseDefs(tex); + + int n = tex->srcCount(0xff, true); + if (n > 4) { + condenseSrcs(tex, 0, 3); + if (n > 5) // NOTE: first call modified positions already + condenseSrcs(tex, 4 - (4 - 1), n - 1 - (4 - 1)); + } else + if (n > 1) { + condenseSrcs(tex, 0, n - 1); + } +} + +void +RegAlloc::InsertConstraintsPass::texConstraintNVC0(TexInstruction *tex) +{ + int n, s; + + textureMask(tex); + + if (tex->op == OP_TXQ) { + s = tex->srcCount(0xff); + n = 0; + } else { + s = tex->tex.target.getArgCount(); + if (!tex->tex.target.isArray() && + (tex->tex.rIndirectSrc >= 0 || tex->tex.sIndirectSrc >= 0)) + ++s; + if (tex->op == OP_TXD && tex->tex.useOffsets) + ++s; + n = tex->srcCount(0xff) - s; + assert(n <= 4); + } + + if (s > 1) + condenseSrcs(tex, 0, s - 1); + if (n > 1) // NOTE: first call modified positions already + condenseSrcs(tex, 1, n); + + condenseDefs(tex); +} + +void +RegAlloc::InsertConstraintsPass::texConstraintNV50(TexInstruction *tex) +{ + Value *pred = tex->getPredicate(); + if (pred) + tex->setPredicate(tex->cc, NULL); + + textureMask(tex); + + assert(tex->defExists(0) && tex->srcExists(0)); + // make src and def count match + int c; + for (c = 0; tex->srcExists(c) || tex->defExists(c); ++c) { + if (!tex->srcExists(c)) + tex->setSrc(c, new_LValue(func, tex->getSrc(0)->asLValue())); + if (!tex->defExists(c)) + tex->setDef(c, new_LValue(func, tex->getDef(0)->asLValue())); + } + if (pred) + tex->setPredicate(tex->cc, pred); + condenseDefs(tex); + condenseSrcs(tex, 0, c - 1); +} + // Insert constraint markers for instructions whose multiple sources must be // located in consecutive registers. bool @@ -895,43 +1923,49 @@ RegAlloc::InsertConstraintsPass::visit(BasicBlock *bb) { TexInstruction *tex; Instruction *next; - int s, n, size; + int s, size; + + targ = bb->getProgram()->getTarget(); for (Instruction *i = bb->getEntry(); i; i = next) { next = i->next; if ((tex = i->asTex())) { - textureMask(tex); - - // FIXME: this is target specific - if (tex->op == OP_TXQ) { - s = tex->srcCount(0xff); - n = 0; - } else { - s = tex->tex.target.getArgCount(); - if (!tex->tex.target.isArray() && - (tex->tex.rIndirectSrc >= 0 || tex->tex.sIndirectSrc >= 0)) - ++s; - n = tex->srcCount(0xff) - s; - assert(n <= 4); + switch (targ->getChipset() & ~0xf) { + case 0x50: + case 0x80: + case 0x90: + case 0xa0: + texConstraintNV50(tex); + break; + case 0xc0: + case 0xd0: + texConstraintNVC0(tex); + break; + case 0xe0: + case NVISA_GK110_CHIPSET: + texConstraintNVE0(tex); + break; + default: + break; } - - if (s > 1) - addConstraint(i, 0, s); - if (n > 1) - addConstraint(i, s, n); } else if (i->op == OP_EXPORT || i->op == OP_STORE) { for (size = typeSizeof(i->dType), s = 1; size > 0; ++s) { assert(i->srcExists(s)); size -= i->getSrc(s)->reg.size; } - if ((s - 1) > 1) - addConstraint(i, 1, s - 1); + condenseSrcs(i, 1, s - 1); + } else + if (i->op == OP_LOAD || i->op == OP_VFETCH) { + condenseDefs(i); + if (i->src(0).isIndirect(0) && typeSizeof(i->dType) >= 8) + addHazard(i, i->src(0).getIndirect(0)); } else - if (i->op == OP_LOAD) { - if (i->src[0].isIndirect(0) && typeSizeof(i->dType) >= 8) - addHazard(i, i->src[0].getIndirect(0)); + if (i->op == OP_UNION || + i->op == OP_MERGE || + i->op == OP_SPLIT) { + constrList.push_back(i); } } return true; @@ -942,21 +1976,65 @@ RegAlloc::InsertConstraintsPass::visit(BasicBlock *bb) bool RegAlloc::InsertConstraintsPass::insertConstraintMoves() { - for (DLList::Iterator it = constrList.iterator(); !it.end(); it.next()) { - Instruction *cst = reinterpret_cast(it.get()); + for (std::list::iterator it = constrList.begin(); + it != constrList.end(); + ++it) { + Instruction *cst = *it; + Instruction *mov; + + if (cst->op == OP_SPLIT && 0) { + // spilling splits is annoying, just make sure they're separate + for (int d = 0; cst->defExists(d); ++d) { + if (!cst->getDef(d)->refCount()) + continue; + LValue *lval = new_LValue(func, cst->def(d).getFile()); + const uint8_t size = cst->def(d).getSize(); + lval->reg.size = size; + + mov = new_Instruction(func, OP_MOV, typeOfSize(size)); + mov->setSrc(0, lval); + mov->setDef(0, cst->getDef(d)); + cst->setDef(d, mov->getSrc(0)); + cst->bb->insertAfter(cst, mov); + + cst->getSrc(0)->asLValue()->noSpill = 1; + mov->getSrc(0)->asLValue()->noSpill = 1; + } + } else + if (cst->op == OP_MERGE || cst->op == OP_UNION) { + for (int s = 0; cst->srcExists(s); ++s) { + const uint8_t size = cst->src(s).getSize(); + + if (!cst->getSrc(s)->defs.size()) { + mov = new_Instruction(func, OP_NOP, typeOfSize(size)); + mov->setDef(0, cst->getSrc(s)); + cst->bb->insertBefore(cst, mov); + continue; + } + assert(cst->getSrc(s)->defs.size() == 1); // still SSA + + Instruction *defi = cst->getSrc(s)->defs.front()->getInsn(); + // catch some cases where don't really need MOVs + if (cst->getSrc(s)->refCount() == 1 && !defi->constrainedDefs()) + continue; + + LValue *lval = new_LValue(func, cst->src(s).getFile()); + lval->reg.size = size; - for (int s = 0; cst->srcExists(s); ++s) { - if (!detectConflict(cst, s)) - continue; - Instruction *mov = new_Instruction(func, OP_MOV, - typeOfSize(cst->src[s].getSize())); - mov->setSrc(0, cst->getSrc(s)); - mov->setDef(0, new_LValue(func, FILE_GPR)); - cst->setSrc(s, mov->getDef(0)); + mov = new_Instruction(func, OP_MOV, typeOfSize(size)); + mov->setDef(0, lval); + mov->setSrc(0, cst->getSrc(s)); + cst->setSrc(s, mov->getDef(0)); + cst->bb->insertBefore(cst, mov); - cst->bb->insertBefore(cst, mov); + cst->getDef(0)->asLValue()->noSpill = 1; // doesn't help + + if (cst->op == OP_UNION) + mov->setPredicate(defi->cc, defi->getPredicate()); + } } } + return true; }