From 8cc2eca5df0116aa7fb8233a9ab6ad1c9e4203cd Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Thu, 29 Mar 2012 21:18:24 +0200 Subject: [PATCH] nv50/ir: Add support for unlimited instruction arguments. --- src/gallium/drivers/nv50/codegen/nv50_ir.cpp | 150 ++++++++---------- src/gallium/drivers/nv50/codegen/nv50_ir.h | 89 ++++------- .../drivers/nv50/codegen/nv50_ir_inlines.h | 61 +++---- .../drivers/nv50/codegen/nv50_ir_peephole.cpp | 16 +- .../drivers/nv50/codegen/nv50_ir_print.cpp | 2 +- .../drivers/nv50/codegen/nv50_ir_ra.cpp | 17 +- .../drivers/nv50/codegen/nv50_ir_ssa.cpp | 17 +- 7 files changed, 141 insertions(+), 211 deletions(-) diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir.cpp index 855d4241a2e..ebcdff4fb79 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir.cpp +++ b/src/gallium/drivers/nv50/codegen/nv50_ir.cpp @@ -58,13 +58,19 @@ Modifier Modifier::operator*(const Modifier m) const return Modifier(a | c); } -ValueRef::ValueRef() : value(0), insn(0), next(this), prev(this) +ValueRef::ValueRef() : value(NULL), insn(NULL) { indirect[0] = -1; indirect[1] = -1; usedAsPtr = false; } +ValueRef::ValueRef(const ValueRef& ref) : value(NULL), insn(ref.insn) +{ + set(ref); + usedAsPtr = ref.usedAsPtr; +} + ValueRef::~ValueRef() { this->set(NULL); @@ -85,11 +91,16 @@ ImmediateValue *ValueRef::getImmediate() const return NULL; } -ValueDef::ValueDef() : value(0), insn(0), next(this), prev(this) +ValueDef::ValueDef() : value(NULL), insn(NULL) { // nothing to do } +ValueDef::ValueDef(const ValueDef& def) : value(NULL), insn(NULL) +{ + set(def.get()); +} + ValueDef::~ValueDef() { this->set(NULL); @@ -109,83 +120,43 @@ ValueRef::set(Value *refVal) { if (value == refVal) return; - if (value) { - if (value->uses == this) - value->uses = (next == this) ? NULL : next; - value->unref(); - DLLIST_DEL(this); - } + if (value) + value->uses.remove(this); + if (refVal) + refVal->uses.push_back(this); - if (refVal) { - if (refVal->uses) - DLLIST_ADDTAIL(refVal->uses, this); - else - refVal->uses = this; - refVal->ref(); - } value = refVal; } void ValueDef::set(Value *defVal) { - assert(next != this || prev == this); // check that SSA hack isn't active - if (value == defVal) return; - if (value) { - if (value->defs == this) - value->defs = (next == this) ? NULL : next; - DLLIST_DEL(this); - } + if (value) + value->defs.remove(this); + if (defVal) + defVal->defs.push_back(this); - if (defVal) { - if (defVal->defs) - DLLIST_ADDTAIL(defVal->defs, this); - else - defVal->defs = this; - } value = defVal; } -// TODO: make me faster by using a safe iterator void ValueDef::replace(Value *repVal, bool doSet) { - ValueRef **refs = new ValueRef * [value->refCount()]; - int n = 0; + if (value == repVal) + return; - if (!refs && value->refCount()) - FATAL("memory allocation failed"); - - for (ValueRef::Iterator iter = value->uses->iterator(); !iter.end(); - iter.next()) { - assert(n < value->refCount()); - refs[n++] = iter.get(); - } - while (n) - refs[--n]->set(repVal); + while (value->refCount()) + value->uses.front()->set(repVal); if (doSet) - this->set(repVal); - - if (refs) - delete[] refs; -} - -void -ValueDef::mergeDefs(ValueDef *join) -{ - DLLIST_MERGE(this, join, ValueDef *); + set(repVal); } Value::Value() { - refCnt = 0; - uses = NULL; - defs = NULL; join = this; - memset(®, 0, sizeof(reg)); reg.size = 4; } @@ -213,7 +184,7 @@ Value::coalesce(Value *jval, bool force) } // need to check all fixed register values of the program for overlap - Function *func = defs->getInsn()->bb->getFunction(); + Function *func = defs.front()->getInsn()->bb->getFunction(); // TODO: put values in by register-id bins per function ArrayList::Iterator iter = func->allLValues.iterator(); @@ -232,11 +203,11 @@ Value::coalesce(Value *jval, bool force) INFO("NOTE: forced coalescing with live range overlap\n"); } - ValueDef::Iterator iter = jrep->defs->iterator(); - for (; !iter.end(); iter.next()) - iter.get()->get()->join = repr; + for (DefIterator it = jrep->defs.begin(); it != jrep->defs.end(); ++it) + (*it)->get()->join = repr; - repr->defs->mergeDefs(jrep->defs); + repr->defs.insert(repr->defs.end(), + jrep->defs.begin(), jrep->defs.end()); repr->livei.unify(jrep->livei); assert(repr->join == repr && jval->join == repr); @@ -540,11 +511,6 @@ void Instruction::init() postFactor = 0; - for (int p = 0; p < NV50_IR_MAX_DEFS; ++p) - def[p].setInsn(this); - for (int p = 0; p < NV50_IR_MAX_SRCS; ++p) - src[p].setInsn(this); - predSrc = -1; flagsDef = -1; flagsSrc = -1; @@ -587,7 +553,31 @@ Instruction::~Instruction() } void -Instruction::setSrc(int s, ValueRef& ref) +Instruction::setDef(int i, Value *val) +{ + int size = def.size(); + if (i >= size) { + def.resize(i + 1); + while (size <= i) + def[size++].setInsn(this); + } + def[i].set(val); +} + +void +Instruction::setSrc(int s, Value *val) +{ + int size = src.size(); + if (s >= size) { + src.resize(s + 1); + while (size <= s) + src[size++].setInsn(this); + } + src[s].set(val); +} + +void +Instruction::setSrc(int s, const ValueRef& ref) { setSrc(s, ref.get()); src[s].mod = ref.mod; @@ -673,7 +663,7 @@ Instruction::cloneBase(Instruction *insn, bool deep) const } for (int s = 0; this->srcExists(s); ++s) - insn->src[s].set(this->src[s]); + insn->setSrc(s, this->src[s]); insn->predSrc = this->predSrc; insn->flagsDef = this->flagsDef; @@ -703,17 +693,15 @@ Instruction::srcCount(unsigned int mask) const bool Instruction::setIndirect(int s, int dim, Value *value) { - int p = src[s].indirect[dim]; - assert(this->srcExists(s)); + + int p = src[s].indirect[dim]; if (p < 0) { if (!value) return true; - for (p = s + 1; this->srcExists(p); ++p); + p = src.size(); } - assert(p < NV50_IR_MAX_SRCS); - - src[p] = value; + setSrc(p, value); src[p].usedAsPtr = (value != 0); src[s].indirect[dim] = value ? p : -1; return true; @@ -732,22 +720,18 @@ Instruction::setPredicate(CondCode ccode, Value *value) return true; } - if (predSrc < 0) { - int s; - for (s = 0; this->srcExists(s); ++s) - assert(s < NV50_IR_MAX_SRCS); - predSrc = s; - } - src[predSrc] = value; + if (predSrc < 0) + predSrc = src.size(); + + setSrc(predSrc, value); return true; } bool Instruction::writesPredicate() const { - for (int d = 0; d < 2 && def[d].exists(); ++d) - if (def[d].exists() && - (getDef(d)->inFile(FILE_PREDICATE) || getDef(d)->inFile(FILE_FLAGS))) + for (int d = 0; defExists(d); ++d) + if (getDef(d)->inFile(FILE_PREDICATE) || getDef(d)->inFile(FILE_FLAGS)) return true; return false; } diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir.h b/src/gallium/drivers/nv50/codegen/nv50_ir.h index eb9f0ff7e3b..ddd066df7f8 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir.h +++ b/src/gallium/drivers/nv50/codegen/nv50_ir.h @@ -26,6 +26,9 @@ #include #include #include +#include +#include +#include #include "nv50_ir_util.h" #include "nv50_ir_graph.h" @@ -379,6 +382,7 @@ class ValueRef { public: ValueRef(); + ValueRef(const ValueRef&); ~ValueRef(); inline ValueRef& operator=(Value *val) { this->set(val); return *this; } @@ -402,21 +406,6 @@ public: // SSA: return eventual (traverse MOVs) literal value, if it exists ImmediateValue *getImmediate() const; - class Iterator - { - public: - Iterator(ValueRef *ref) : pos(ref), ini(ref) { } - - inline ValueRef *get() const { return pos; } - inline bool end() const { return pos == NULL; } - inline void next() { pos = (pos->next != ini) ? pos->next : 0; } - - private: - ValueRef *pos, *ini; - }; - - inline Iterator iterator() { return Iterator(this); } - public: Modifier mod; int8_t indirect[2]; // >= 0 if relative to lvalue in insn->src[indirect[i]] @@ -427,14 +416,13 @@ public: private: Value *value; Instruction *insn; - ValueRef *next; // to link uses of the value - ValueRef *prev; }; class ValueDef { public: ValueDef(); + ValueDef(const ValueDef&); ~ValueDef(); inline ValueDef& operator=(Value *val) { this->set(val); return *this; } @@ -452,33 +440,13 @@ public: inline DataFile getFile() const; inline unsigned getSize() const; - // HACK: save the pre-SSA value in 'prev', in SSA we don't need the def list - // but we'll use it again for coalescing in register allocation inline void setSSA(LValue *); inline const LValue *preSSA() const; - inline void restoreDefList(); // after having been abused for SSA hack - void mergeDefs(ValueDef *); - - class Iterator - { - public: - Iterator(ValueDef *def) : pos(def), ini(def) { } - - inline ValueDef *get() const { return pos; } - inline bool end() const { return pos == NULL; } - inline void next() { pos = (pos->next != ini) ? pos->next : NULL; } - - private: - ValueDef *pos, *ini; - }; - - inline Iterator iterator() { return Iterator(this); } private: Value *value; // should make this LValue * ... + LValue *origin; // pre SSA value Instruction *insn; - ValueDef *next; // circular list of all definitions of the same value - ValueDef *prev; }; class Value @@ -496,9 +464,7 @@ public: inline Instruction *getUniqueInsn() const; inline Instruction *getInsn() const; // use when uniqueness is certain - inline int refCount() { return refCnt; } - inline int ref() { return ++refCnt; } - inline int unref() { --refCnt; assert(refCnt >= 0); return refCnt; } + inline int refCount() { return uses.size(); } inline LValue *asLValue(); inline Symbol *asSym(); @@ -512,16 +478,14 @@ public: static inline Value *get(Iterator&); -protected: - int refCnt; - - friend class ValueDef; - friend class ValueRef; + std::list uses; + std::list defs; + typedef std::list::iterator UseIterator; + typedef std::list::const_iterator UseCIterator; + typedef std::list::iterator DefIterator; + typedef std::list::const_iterator DefCIterator; -public: int id; - ValueRef *uses; - ValueDef *defs; Storage reg; // TODO: these should be in LValue: @@ -605,10 +569,6 @@ public: virtual int print(char *, size_t, DataType ty = TYPE_NONE) const; }; - -#define NV50_IR_MAX_DEFS 4 -#define NV50_IR_MAX_SRCS 8 - class Instruction { public: @@ -618,9 +578,9 @@ public: virtual Instruction *clone(bool deep) const; - inline void setDef(int i, Value *val) { def[i].set(val); } - inline void setSrc(int s, Value *val) { src[s].set(val); } - void setSrc(int s, ValueRef&); + void setDef(int i, Value *); + void setSrc(int s, Value *); + void setSrc(int s, const ValueRef&); void swapSources(int a, int b); bool setIndirect(int s, int dim, Value *); @@ -628,10 +588,16 @@ public: inline Value *getSrc(int s) const { return src[s].get(); } inline Value *getIndirect(int s, int dim) const; - inline bool defExists(int d) const { return d < 4 && def[d].exists(); } - inline bool srcExists(int s) const { return s < 8 && src[s].exists(); } + inline bool defExists(unsigned d) const + { + return d < def.size() && def[d].exists(); + } + inline bool srcExists(unsigned s) const + { + return s < src.size() && src[s].exists(); + } - inline bool constrainedDefs() const { return def[1].exists(); } + inline bool constrainedDefs() const { return defExists(1); } bool setPredicate(CondCode ccode, Value *); inline Value *getPredicate() const; @@ -705,9 +671,8 @@ public: int8_t flagsDef; int8_t flagsSrc; - // NOTE: should make these pointers, saves space and work on shuffling - ValueDef def[NV50_IR_MAX_DEFS]; // no gaps ! - ValueRef src[NV50_IR_MAX_SRCS]; // no gaps ! + std::deque def; // no gaps ! + std::deque src; // no gaps ! BasicBlock *bb; diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h b/src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h index 6d0848e6e07..d511c93232a 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h @@ -152,57 +152,44 @@ unsigned int ValueDef::getSize() const void ValueDef::setSSA(LValue *lval) { - Value *save = value; - - this->set(NULL); - prev = reinterpret_cast(save); - value = lval; - lval->defs = this; -} - -void ValueDef::restoreDefList() -{ - if (next == this) - prev = this; + origin = value->asLValue(); + set(lval); } const LValue *ValueDef::preSSA() const { - return reinterpret_cast(prev); + return origin; } Instruction *Value::getInsn() const { - assert(!defs || getUniqueInsn()); - return defs ? defs->getInsn() : NULL; + return defs.empty() ? NULL : defs.front()->getInsn(); } Instruction *Value::getUniqueInsn() const { - if (defs) { - if (join != this) { - ValueDef::Iterator it = defs->iterator(); - while (!it.end() && it.get()->get() != this) - it.next(); - assert(it.get()->get() == this); - return it.get()->getInsn(); - } + if (defs.empty()) + return NULL; - // after regalloc, the definitions of coalesced values are linked - if (reg.data.id < 0) { - ValueDef::Iterator it = defs->iterator(); - int nDef; - for (nDef = 0; !it.end() && nDef < 2; it.next()) - if (it.get()->get() == this) // don't count joined values - ++nDef; - if (nDef > 1) - WARN("value %%%i not uniquely defined\n", id); // return NULL ? - } - - assert(defs->get() == this); - return defs->getInsn(); + // after regalloc, the definitions of coalesced values are linked + if (join != this) { + for (DefCIterator it = defs.begin(); it != defs.end(); ++it) + if ((*it)->get() == this) + return (*it)->getInsn(); + // should be unreachable and trigger assertion at the end } - return NULL; +#ifdef DEBUG + if (reg.data.id < 0) { + int n = 0; + for (DefCIterator it = defs.begin(); n < 2 && it != defs.end(); ++it) + if ((*it)->get() == this) // don't count joined values + ++n; + if (n > 1) + WARN("value %%%i not uniquely defined\n", id); // return NULL ? + } +#endif + assert(defs.front()->get() == this); + return defs.front()->getInsn(); } Value *Instruction::getIndirect(int s, int dim) const diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp index bf648dddb84..046f04bbf35 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp @@ -40,7 +40,7 @@ Instruction::isNop() const if (!fixed && op == OP_NOP) return true; - if (def[0].exists() && def[0].rep()->reg.data.id < 0) { + if (defExists(0) && def[0].rep()->reg.data.id < 0) { for (int d = 1; defExists(d); ++d) if (def[d].rep()->reg.data.id >= 0) WARN("part of vector result is unused !\n"); @@ -249,8 +249,8 @@ ConstantFolding::visit(BasicBlock *bb) if (i->op == OP_MOV) // continue early, MOV appears frequently continue; - ImmediateValue *src0 = i->src[0].getImmediate(); - ImmediateValue *src1 = i->src[1].getImmediate(); + ImmediateValue *src0 = i->srcExists(0) ? i->src[0].getImmediate() : NULL; + ImmediateValue *src1 = i->srcExists(1) ? i->src[1].getImmediate() : NULL; if (src0 && src1) expr(i, src0, src1); @@ -577,7 +577,7 @@ ConstantFolding::tryCollapseChainedMULs(Instruction *mul2, // b = mul a, imm // d = mul b, c -> d = mul_x_imm a, c int s2, t2; - insn = mul2->getDef(0)->uses->getInsn(); + insn = mul2->getDef(0)->uses.front()->getInsn(); if (!insn) return; mul1 = mul2; @@ -2090,10 +2090,10 @@ LocalCSE::visit(BasicBlock *bb) src = ir->getSrc(s); if (src) { - for (ValueRef::Iterator refs = src->uses->iterator(); !refs.end(); - refs.next()) { - Instruction *ik = refs.get()->getInsn(); - if (ik->serial < ir->serial && ik->bb == ir->bb) + for (Value::UseIterator it = src->uses.begin(); + it != src->uses.end(); ++it) { + Instruction *ik = (*it)->getInsn(); + if (ik && ik->serial < ir->serial && ik->bb == ir->bb) if (tryReplace(&ir, ik)) break; } diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp index 4040a4d9c0f..904b9c0ca69 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp @@ -451,7 +451,7 @@ void Instruction::print() const if (rnd != ROUND_N) PRINT(" %s", RoundModeStr[rnd]); - if (def[1].exists()) + if (defExists(1)) PRINT(" {"); for (d = 0; defExists(d); ++d) { SPACE(); diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_ra.cpp index f08026cf844..60ec4a3c089 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_ra.cpp +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_ra.cpp @@ -442,7 +442,7 @@ RegAlloc::BuildIntervalsPass::visit(BasicBlock *bb) for (Instruction *i = out->getPhi(); i && i->op == OP_PHI; i = i->next) { bb->liveSet.clr(i->getDef(0)->id); - for (int s = 0; s < NV50_IR_MAX_SRCS && i->src[s].exists(); ++s) { + for (int s = 0; i->srcExists(s); ++s) { assert(i->src[s].getInsn()); if (i->getSrc(s)->getUniqueInsn()->bb == bb) // XXX: reachableBy ? bb->liveSet.set(i->getSrc(s)->id); @@ -513,7 +513,9 @@ RegAlloc::coalesceValues(unsigned int mask) case OP_MOV: if (!(mask & JOIN_MASK_MOV)) break; - i = insn->getDef(0)->uses ? insn->getDef(0)->uses->getInsn() : NULL; + i = NULL; + if (!insn->getDef(0)->uses.empty()) + i = insn->getDef(0)->uses.front()->getInsn(); // if this is a contraint-move there will only be a single use if (i && i->op == OP_CONSTRAINT) break; @@ -851,20 +853,21 @@ RegAlloc::InsertConstraintsPass::textureMask(TexInstruction *tex) bool RegAlloc::InsertConstraintsPass::detectConflict(Instruction *cst, int s) { + Value *v = cst->getSrc(s); + // current register allocation can't handle it if a value participates in // multiple constraints - for (ValueRef::Iterator it = cst->src[s].iterator(); !it.end(); it.next()) { - Instruction *insn = it.get()->getInsn(); - if (insn != cst) + for (Value::UseIterator it = v->uses.begin(); it != v->uses.end(); ++it) { + if (cst != (*it)->getInsn()) return true; } // can start at s + 1 because detectConflict is called on all sources for (int c = s + 1; cst->srcExists(c); ++c) - if (cst->getSrc(c) == cst->getSrc(s)) + if (v == cst->getSrc(c)) return true; - Instruction *defi = cst->getSrc(s)->getInsn(); + Instruction *defi = v->getInsn(); return (!defi || defi->constrainedDefs()); } diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_ssa.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_ssa.cpp index 60b12b308c1..52902591d2a 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_ssa.cpp +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_ssa.cpp @@ -322,7 +322,7 @@ Function::convertToSSA() if (!allLValues.get(var)) continue; lval = reinterpret_cast(allLValues.get(var))->asLValue(); - if (!lval || !lval->defs) + if (!lval || lval->defs.empty()) continue; ++iterCount; @@ -330,8 +330,9 @@ Function::convertToSSA() // the BB they're defined in // gather blocks with assignments to lval in workList - for (ValueDef::Iterator d = lval->defs->iterator(); !d.end(); d.next()) { - bb = d.get()->getInsn()->bb; + for (Value::DefIterator d = lval->defs.begin(); + d != lval->defs.end(); ++d) { + bb = (*d)->getInsn()->bb; if (!bb) continue; // instruction likely been removed but not XXX deleted @@ -359,9 +360,6 @@ Function::convertToSSA() if (!dfBB->liveSet.test(lval->id)) continue; - // TODO: use dedicated PhiInstruction to lift this limit - assert(dfBB->cfg.incidentCount() <= NV50_IR_MAX_SRCS); - phi = new_Instruction(this, OP_PHI, typeOfSize(lval->reg.size)); dfBB->insertTail(phi); @@ -413,13 +411,6 @@ bool RenamePass::run() return false; search(BasicBlock::get(func->domTree->getRoot())); - ArrayList::Iterator iter = func->allInsns.iterator(); - for (; !iter.end(); iter.next()) { - Instruction *insn = reinterpret_cast(iter.get()); - for (int d = 0; insn->defExists(d); ++d) - insn->def[d].restoreDefList(); - } - return true; } -- 2.30.2