bool assign(int32_t& reg, DataFile f, unsigned int size);
void release(DataFile f, int32_t reg, unsigned int size);
- bool occupy(DataFile f, int32_t reg, unsigned int size);
- bool occupy(const Value *);
+ void occupy(DataFile f, int32_t reg, unsigned int size);
+ void occupy(const Value *);
void occupyMask(DataFile f, int32_t reg, uint8_t mask);
+ bool isOccupied(DataFile f, int32_t reg, unsigned int size) const;
+ bool testOccupy(const Value *);
+ bool testOccupy(DataFile f, int32_t reg, unsigned int size);
inline int getMaxAssigned(DataFile f) const { return fill[f]; }
return size >> unit[f];
}
// for regs of size >= 4, id is counted in 4-byte words (like nv50/c0 binary)
- inline unsigned int idToBytes(Value *v) const
+ inline unsigned int idToBytes(const Value *v) const
{
return v->reg.data.id * MIN2(v->reg.size, 4);
}
- inline unsigned int idToUnits(Value *v) const
+ inline unsigned int idToUnits(const Value *v) const
{
return units(v->reg.file, idToBytes(v));
}
}
bool
+RegisterSet::isOccupied(DataFile f, int32_t reg, unsigned int size) const
+{
+ return bits[f].testRange(reg, size);
+}
+
+void
RegisterSet::occupy(const Value *v)
{
- return occupy(v->reg.file, v->reg.data.id, v->reg.size >> unit[v->reg.file]);
+ occupy(v->reg.file, idToUnits(v), v->reg.size >> unit[v->reg.file]);
}
void
bits[f].setMask(reg & ~31, static_cast<uint32_t>(mask) << (reg % 32));
}
-bool
+void
RegisterSet::occupy(DataFile f, int32_t reg, unsigned int size)
{
- if (bits[f].testRange(reg, size))
- return false;
-
bits[f].setRange(reg, size);
INFO_DBG(0, REG_ALLOC, "reg occupy: %u[%i] %u\n", f, reg, size);
fill[f] = MAX2(fill[f], (int32_t)(reg + size - 1));
+}
+
+bool
+RegisterSet::testOccupy(const Value *v)
+{
+ return testOccupy(v->reg.file,
+ idToUnits(v), v->reg.size >> unit[v->reg.file]);
+}
+bool
+RegisterSet::testOccupy(DataFile f, int32_t reg, unsigned int size)
+{
+ if (isOccupied(f, reg, size))
+ return false;
+ occupy(f, reg, size);
return true;
}
bool run(const std::list<ValuePair>&);
- Symbol *assignSlot(const Interval&, unsigned int size);
+ Symbol *assignSlot(const Interval&, const unsigned int size);
inline int32_t getStackSize() const { return stackSize; }
private:
for (std::deque<Value *>::iterator it = cal->target.fn->clobbers.begin();
it != cal->target.fn->clobbers.end();
++it) {
- if (clobberSet.occupy(*it)) {
+ if (clobberSet.testOccupy(*it)) {
Value *tmp = new_LValue(func, (*it)->asLValue());
tmp->reg.data.id = (*it)->reg.data.id;
cal->setDef(cal->defCount(), tmp);
if (bn->cfg.visit(sequence))
if (!buildLiveSets(bn))
return false;
- if (n++ == 0)
- bb->liveSet = bn->liveSet;
- else
+ if (n++ || bb->liveSet.marker)
bb->liveSet |= bn->liveSet;
+ else
+ bb->liveSet = bn->liveSet;
}
if (!n && !bb->liveSet.marker)
bb->liveSet.fill(0);
}
}
+// Used when coalescing moves. The non-compound value will become one, e.g.:
+// mov b32 $r0 $r2 / merge b64 $r0d { $r0 $r1 }
+// split b64 { $r0 $r1 } $r0d / mov b64 $r0d f64 $r2d
static inline void copyCompound(Value *dst, Value *src)
{
LValue *ldst = dst->asLValue();
LValue *lsrc = src->asLValue();
+ if (ldst->compound && !lsrc->compound) {
+ LValue *swap = lsrc;
+ lsrc = ldst;
+ ldst = swap;
+ }
+
ldst->compound = lsrc->compound;
ldst->compMask = lsrc->compMask;
}
RIG_Node *cur = values.front();
for (std::list<RIG_Node *>::iterator it = active.begin();
- it != active.end();
- ++it) {
+ it != active.end();) {
RIG_Node *node = *it;
if (node->livei.end() <= cur->livei.begin()) {
it = active.erase(it);
- --it;
- } else
- if (node->f == cur->f && node->livei.overlaps(cur->livei)) {
- cur->addInterference(node);
+ } else {
+ if (node->f == cur->f && node->livei.overlaps(cur->livei))
+ cur->addInterference(node);
+ ++it;
}
}
values.pop_front();
it != node->prefRegs.end();
++it) {
if ((*it)->reg >= 0 &&
- regs.occupy(node->f, (*it)->reg, node->colors)) {
+ regs.testOccupy(node->f, (*it)->reg, node->colors)) {
node->reg = (*it)->reg;
break;
}
if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC)
func->print();
} else {
- prog->maxGPR = regs.getMaxAssigned(FILE_GPR);
+ prog->maxGPR = std::max(prog->maxGPR, regs.getMaxAssigned(FILE_GPR));
}
out:
}
Symbol *
-SpillCodeInserter::assignSlot(const Interval &livei, unsigned int size)
+SpillCodeInserter::assignSlot(const Interval &livei, const unsigned int size)
{
SpillSlot slot;
int32_t offsetBase = stackSize;
slot.sym = NULL;
for (offset = offsetBase; offset < stackSize; offset += size) {
+ const int32_t entryEnd = offset + size;
while (it != slots.end() && it->offset < offset)
++it;
if (it == slots.end()) // no slots left
break;
std::list<SpillSlot>::iterator bgn = it;
- while (it != slots.end() && it->offset < (offset + size)) {
+ while (it != slots.end() && it->offset < entryEnd) {
it->occup.print();
if (it->occup.overlaps(livei))
break;
++it;
}
- if (it == slots.end() || it->offset >= (offset + size)) {
+ if (it == slots.end() || it->offset >= entryEnd) {
// fits
- for (; bgn != slots.end() && bgn->offset < (offset + size); ++bgn) {
+ for (; bgn != slots.end() && bgn->offset < entryEnd; ++bgn) {
bgn->occup.insert(livei);
if (bgn->size() == size)
slot.sym = bgn->sym;
unsigned int i, retries;
bool ret;
+ if (!func->ins.empty()) {
+ // Insert a nop at the entry so inputs only used by the first instruction
+ // don't count as having an empty live range.
+ Instruction *nop = new_Instruction(func, OP_NOP, TYPE_NONE);
+ BasicBlock::get(func->cfg.getRoot())->insertHead(nop);
+ }
+
ret = insertConstr.exec(func);
if (!ret)
goto out;
v->join = v;
reg += v->reg.size;
}
- delete_Instruction(prog, split);
}
splits.clear();
v->join = v;
reg += v->reg.size;
}
- delete_Instruction(prog, merge);
}
merges.clear();
}
int n = tex->srcCount(0xff, true);
if (n > 4) {
condenseSrcs(tex, 0, 3);
- if (n > 5)
- condenseSrcs(tex, 4, n - 1);
+ if (n > 5) // NOTE: first call modified positions already
+ condenseSrcs(tex, 4 - (4 - 1), n - 1 - (4 - 1));
} else
if (n > 1) {
condenseSrcs(tex, 0, n - 1);
if (s > 1)
condenseSrcs(tex, 0, s - 1);
- if (n > 1)
- condenseSrcs(tex, s, s + (n - 1));
+ if (n > 1) // NOTE: first call modified positions already
+ condenseSrcs(tex, 1, n);
condenseDefs(tex);
}
texConstraintNVC0(tex);
break;
case 0xe0:
+ case NVISA_GK110_CHIPSET:
texConstraintNVE0(tex);
break;
default:
if (i->src(0).isIndirect(0) && typeSizeof(i->dType) >= 8)
addHazard(i, i->src(0).getIndirect(0));
} else
- if (i->op == OP_UNION) {
+ if (i->op == OP_UNION ||
+ i->op == OP_MERGE ||
+ i->op == OP_SPLIT) {
constrList.push_back(i);
}
}