OP_SPLIT, // $r0d -> { $r0, $r1 } ($r0d and $r0/$r1 will be coalesced)
OP_MERGE, // opposite of split, e.g. combine 2 32 bit into a 64 bit value
OP_CONSTRAINT, // copy values into consecutive registers
- OP_MOV,
+ OP_MOV, // simple copy, no modifiers allowed
OP_LOAD,
OP_STORE,
OP_ADD,
OP_SHR,
OP_MAX,
OP_MIN,
- OP_SAT, // CLAMP(f32, 0.0, 1.0)
+ OP_SAT, // CLAMP(f32, 0.0, 1.0)
OP_CEIL,
OP_FLOOR,
OP_TRUNC,
OP_JOIN, // converge
OP_DISCARD,
OP_EXIT,
- OP_MEMBAR,
+ OP_MEMBAR, // memory barrier (mfence, lfence, sfence)
OP_VFETCH, // indirection 0 in attribute space, indirection 1 is vertex base
OP_PFETCH, // fetch base address of vertex src0 (immediate) [+ src1]
OP_EXPORT,
OP_TXQ, // texture size query
OP_TXD, // texture derivatives
OP_TXG, // texture gather
- OP_TEXCSAA,
- OP_SULD, // surface load
- OP_SUST, // surface store
+ OP_TEXCSAA, // texture op for coverage sampling
+ OP_TEXPREP, // turn cube map array into 2d array coordinates
+ OP_SULDB, // surface load (raw)
+ OP_SULDP, // surface load (formatted)
+ OP_SUSTB, // surface store (raw)
+ OP_SUSTP, // surface store (formatted)
+ OP_SUREDB,
+ OP_SUREDP, // surface reduction (atomic op)
+ OP_SULEA, // surface load effective address
+ OP_SUBFM, // surface bitfield manipulation
+ OP_SUCLAMP, // clamp surface coordinates
+ OP_SUEAU, // surface effective address
+ OP_MADSP, // special integer multiply-add
+ OP_TEXBAR, // texture dependency barrier
OP_DFDX,
OP_DFDY,
OP_RDSV, // read system value
OP_WRSV, // write system value
- OP_PIXLD,
OP_QUADOP,
OP_QUADON,
OP_QUADPOP,
OP_POPCNT, // bitcount(src0 & src1)
OP_INSBF, // insert first src1[8:15] bits of src0 into src2 at src1[0:7]
- OP_EXTBF,
+ OP_EXTBF, // place bits [K,K+N) of src0 into dst, src1 = 0xNNKK
+ OP_PERMT, // dst = bytes from src2,src0 selected by src1 (nvc0's src order)
+ OP_ATOM,
+ OP_BAR, // execution barrier, sources = { id, thread count, predicate }
+ OP_VADD, // byte/word vector operations
+ OP_VAVG,
+ OP_VMIN,
+ OP_VMAX,
+ OP_VSAD,
+ OP_VSET,
+ OP_VSHR,
+ OP_VSHL,
+ OP_VSEL,
OP_LAST
};
+// various instruction-specific modifier definitions Instruction::subOp
+// MOV_FINAL marks a MOV originating from an EXPORT (used for placing TEXBARs)
#define NV50_IR_SUBOP_MUL_HIGH 1
#define NV50_IR_SUBOP_EMIT_RESTART 1
#define NV50_IR_SUBOP_LDC_IL 1
#define NV50_IR_SUBOP_LDC_IS 2
#define NV50_IR_SUBOP_LDC_ISL 3
#define NV50_IR_SUBOP_SHIFT_WRAP 1
+#define NV50_IR_SUBOP_EMU_PRERET 1
+#define NV50_IR_SUBOP_TEXBAR(n) n
+#define NV50_IR_SUBOP_MOV_FINAL 1
+#define NV50_IR_SUBOP_EXTBF_REV 1
+#define NV50_IR_SUBOP_PERMT_F4E 1
+#define NV50_IR_SUBOP_PERMT_B4E 2
+#define NV50_IR_SUBOP_PERMT_RC8 3
+#define NV50_IR_SUBOP_PERMT_ECL 4
+#define NV50_IR_SUBOP_PERMT_ECR 5
+#define NV50_IR_SUBOP_PERMT_RC16 6
+#define NV50_IR_SUBOP_BAR_SYNC 0
+#define NV50_IR_SUBOP_BAR_ARRIVE 1
+#define NV50_IR_SUBOP_BAR_RED_AND 2
+#define NV50_IR_SUBOP_BAR_RED_OR 3
+#define NV50_IR_SUBOP_BAR_RED_POPC 4
+#define NV50_IR_SUBOP_MEMBAR_L 1
+#define NV50_IR_SUBOP_MEMBAR_S 2
+#define NV50_IR_SUBOP_MEMBAR_M 3
+#define NV50_IR_SUBOP_MEMBAR_CTA (0 << 2)
+#define NV50_IR_SUBOP_MEMBAR_GL (1 << 2)
+#define NV50_IR_SUBOP_MEMBAR_SYS (2 << 2)
+#define NV50_IR_SUBOP_MEMBAR_DIR(m) ((m) & 0x3)
+#define NV50_IR_SUBOP_MEMBAR_SCOPE(m) ((m) & ~0x3)
+#define NV50_IR_SUBOP_MEMBAR(d,s) \
+ (NV50_IR_SUBOP_MEMBAR_##d | NV50_IR_SUBOP_MEMBAR_##s)
+#define NV50_IR_SUBOP_ATOM_ADD 0
+#define NV50_IR_SUBOP_ATOM_MIN 1
+#define NV50_IR_SUBOP_ATOM_MAX 2
+#define NV50_IR_SUBOP_ATOM_INC 3
+#define NV50_IR_SUBOP_ATOM_DEC 4
+#define NV50_IR_SUBOP_ATOM_AND 5
+#define NV50_IR_SUBOP_ATOM_OR 6
+#define NV50_IR_SUBOP_ATOM_XOR 7
+#define NV50_IR_SUBOP_ATOM_CAS 8
+#define NV50_IR_SUBOP_ATOM_EXCH 9
+#define NV50_IR_SUBOP_SUST_IGN 0
+#define NV50_IR_SUBOP_SUST_TRAP 1
+#define NV50_IR_SUBOP_SUST_SDCL 3
+#define NV50_IR_SUBOP_SULD_ZERO 0
+#define NV50_IR_SUBOP_SULD_TRAP 1
+#define NV50_IR_SUBOP_SULD_SDCL 3
+#define NV50_IR_SUBOP_SUBFM_3D 1
+#define NV50_IR_SUBOP_SUCLAMP_2D 0x10
+#define NV50_IR_SUBOP_SUCLAMP_SD(r, d) (( 0 + (r)) | ((d == 2) ? 0x10 : 0))
+#define NV50_IR_SUBOP_SUCLAMP_PL(r, d) (( 5 + (r)) | ((d == 2) ? 0x10 : 0))
+#define NV50_IR_SUBOP_SUCLAMP_BL(r, d) ((10 + (r)) | ((d == 2) ? 0x10 : 0))
+#define NV50_IR_SUBOP_MADSP_SD 0xffff
+// Yes, we could represent those with DataType.
+// Or put the type into operation and have a couple 1000 values in that enum.
+// This will have to do for now.
+// The bitfields are supposed to correspond to nve4 ISA.
+#define NV50_IR_SUBOP_MADSP(a,b,c) (((c) << 8) | ((b) << 4) | (a))
+#define NV50_IR_SUBOP_V1(d,a,b) (((d) << 10) | ((b) << 5) | (a) | 0x0000)
+#define NV50_IR_SUBOP_V2(d,a,b) (((d) << 10) | ((b) << 5) | (a) | 0x4000)
+#define NV50_IR_SUBOP_V4(d,a,b) (((d) << 10) | ((b) << 5) | (a) | 0x8000)
+#define NV50_IR_SUBOP_Vn(n) ((n) >> 14)
enum DataType
{
FILE_PREDICATE, // boolean predicate
FILE_FLAGS, // zero/sign/carry/overflow bits
FILE_ADDRESS,
+ LAST_REGISTER_FILE = FILE_ADDRESS,
FILE_IMMEDIATE,
FILE_MEMORY_CONST,
FILE_SHADER_INPUT,
float f32;
double f64;
int32_t offset; // offset from 0 (base of address space)
- int32_t id; // register id (< 0 if virtual/unassigned)
+ int32_t id; // register id (< 0 if virtual/unassigned, in units <= 4)
struct {
SVSemantic sv;
int index;
// @return new Modifier applying a after b (asserts if unrepresentable)
Modifier operator*(const Modifier) const;
+ Modifier operator*=(const Modifier m) { *this = *this * m; return *this; }
Modifier operator==(const Modifier m) const { return m.bits == bits; }
Modifier operator!=(const Modifier m) const { return m.bits != bits; }
inline int neg() const { return (bits & NV50_IR_MOD_NEG) ? 1 : 0; }
inline int abs() const { return (bits & NV50_IR_MOD_ABS) ? 1 : 0; }
- inline operator bool() { return bits ? true : false; }
+ inline operator bool() const { return bits ? true : false; }
void applyTo(ImmediateValue &imm) const;
class ValueRef
{
public:
- ValueRef();
+ ValueRef(Value * = NULL);
ValueRef(const ValueRef&);
~ValueRef();
- inline ValueRef& operator=(Value *val) { this->set(val); return *this; }
-
inline bool exists() const { return value != NULL; }
void set(Value *);
inline unsigned getSize() const;
// SSA: return eventual (traverse MOVs) literal value, if it exists
- ImmediateValue *getImmediate() const;
+ bool getImmediate(ImmediateValue&) const;
public:
Modifier mod;
- int8_t indirect[2]; // >= 0 if relative to lvalue in insn->src[indirect[i]]
+ int8_t indirect[2]; // >= 0 if relative to lvalue in insn->src(indirect[i])
uint8_t swizzle;
bool usedAsPtr; // for printing
class ValueDef
{
public:
- ValueDef();
+ ValueDef(Value * = NULL);
ValueDef(const ValueDef&);
~ValueDef();
- inline ValueDef& operator=(Value *val) { this->set(val); return *this; }
-
inline bool exists() const { return value != NULL; }
inline Value *get() const { return value; }
inline Value *rep() const;
void set(Value *);
- void replace(Value *, bool doSet); // replace all uses of the old value
+ bool mayReplace(const ValueRef &);
+ void replace(const ValueRef &, bool doSet); // replace all uses of the old value
inline Instruction *getInsn() const { return insn; }
inline void setInsn(Instruction *inst) { insn = inst; }
{
public:
Value();
+ virtual ~Value() { }
- virtual Value *clone(Function *) const { return NULL; }
+ virtual Value *clone(ClonePolicy<Function>&) const = 0;
virtual int print(char *, size_t, DataType ty = TYPE_NONE) const = 0;
virtual bool equals(const Value *, bool strict = false) const;
virtual bool interfers(const Value *) const;
+ virtual bool isUniform() const { return true; }
+
+ inline Value *rep() const { return join; }
inline Instruction *getUniqueInsn() const;
inline Instruction *getInsn() const; // use when uniqueness is certain
inline const Symbol *asSym() const;
inline const ImmediateValue *asImm() const;
- bool coalesce(Value *, bool force = false);
-
inline bool inFile(DataFile f) { return reg.file == f; }
static inline Value *get(Iterator&);
public:
LValue(Function *, DataFile file);
LValue(Function *, LValue *);
+ ~LValue() { }
+
+ virtual bool isUniform() const;
- virtual Value *clone(Function *) const;
+ virtual LValue *clone(ClonePolicy<Function>&) const;
virtual int print(char *, size_t, DataType ty = TYPE_NONE) const;
public:
- unsigned ssa : 1;
-
- int affinity;
+ unsigned compMask : 8; // compound/component mask
+ unsigned compound : 1; // used by RA, value involved in split/merge
+ unsigned ssa : 1;
+ unsigned fixedReg : 1; // set & used by RA, earlier just use (id < 0)
+ unsigned noSpill : 1; // do not spill (e.g. if spill temporary already)
};
class Symbol : public Value
{
public:
Symbol(Program *, DataFile file = FILE_MEMORY_CONST, ubyte fileIdx = 0);
+ ~Symbol() { }
- virtual Value *clone(Function *) const;
+ virtual Symbol *clone(ClonePolicy<Function>&) const;
virtual bool equals(const Value *that, bool strict) const;
+ virtual bool isUniform() const;
+
virtual int print(char *, size_t, DataType ty = TYPE_NONE) const;
// print with indirect values
class ImmediateValue : public Value
{
public:
+ ImmediateValue() { }
ImmediateValue(Program *, uint32_t);
ImmediateValue(Program *, float);
ImmediateValue(Program *, double);
-
// NOTE: not added to program with
ImmediateValue(const ImmediateValue *, DataType ty);
+ ~ImmediateValue() { };
+
+ virtual ImmediateValue *clone(ClonePolicy<Function>&) const;
virtual bool equals(const Value *that, bool strict) const;
ImmediateValue operator*(const ImmediateValue&) const;
ImmediateValue operator/(const ImmediateValue&) const;
+ ImmediateValue& operator=(const ImmediateValue&); // only sets value !
+
bool compare(CondCode cc, float fval) const;
virtual int print(char *, size_t, DataType ty = TYPE_NONE) const;
Instruction(Function *, operation, DataType);
virtual ~Instruction();
- virtual Instruction *clone(bool deep) const;
+ virtual Instruction *clone(ClonePolicy<Function>&,
+ Instruction * = NULL) const;
void setDef(int i, Value *);
void setSrc(int s, Value *);
void setSrc(int s, const ValueRef&);
void swapSources(int a, int b);
+ void moveSources(int s, int delta); // NOTE: only delta > 0 implemented
bool setIndirect(int s, int dim, Value *);
- inline Value *getDef(int d) const { return def[d].get(); }
- inline Value *getSrc(int s) const { return src[s].get(); }
+ inline ValueRef& src(int s) { return srcs[s]; }
+ inline ValueDef& def(int s) { return defs[s]; }
+ inline const ValueRef& src(int s) const { return srcs[s]; }
+ inline const ValueDef& def(int s) const { return defs[s]; }
+
+ inline Value *getDef(int d) const { return defs[d].get(); }
+ inline Value *getSrc(int s) const { return srcs[s].get(); }
inline Value *getIndirect(int s, int dim) const;
inline bool defExists(unsigned d) const
{
- return d < def.size() && def[d].exists();
+ return d < defs.size() && defs[d].exists();
}
inline bool srcExists(unsigned s) const
{
- return s < src.size() && src[s].exists();
+ return s < srcs.size() && srcs[s].exists();
}
- inline bool constrainedDefs() const { return defExists(1); }
+ inline bool constrainedDefs() const;
bool setPredicate(CondCode ccode, Value *);
inline Value *getPredicate() const;
bool writesPredicate() const;
+ inline bool isPredicated() const { return predSrc >= 0; }
inline void setFlagsSrc(int s, Value *);
inline void setFlagsDef(int d, Value *);
- unsigned int defCount(unsigned int mask) const;
- unsigned int srcCount(unsigned int mask) const;
+ unsigned int defCount() const { return defs.size(); };
+ unsigned int defCount(unsigned int mask, bool singleFile = false) const;
+ unsigned int srcCount() const { return srcs.size(); };
+ unsigned int srcCount(unsigned int mask, bool singleFile = false) const;
// save & remove / set indirect[0,1] and predicate source
void takeExtraSources(int s, Value *[3]);
RoundMode rnd;
CacheMode cache;
- uint8_t subOp; // quadop, 1 for mul-high, etc.
+ uint16_t subOp; // quadop, 1 for mul-high, etc.
unsigned encSize : 4; // encoding size in bytes
unsigned saturate : 1; // to [0.0f, 1.0f]
unsigned join : 1; // converge control flow (use OP_JOIN until end)
unsigned fixed : 1; // prevent dead code elimination
unsigned terminator : 1; // end of basic block
- unsigned atomic : 1;
unsigned ftz : 1; // flush denormal to zero
unsigned dnz : 1; // denormals, NaN are zero
unsigned ipa : 4; // interpolation mode
unsigned lanes : 4;
unsigned perPatch : 1;
unsigned exit : 1; // terminate program after insn
+ unsigned mask : 4; // for vector ops
int8_t postFactor; // MUL/DIV(if < 0) by 1 << postFactor
int8_t flagsDef;
int8_t flagsSrc;
- std::deque<ValueDef> def; // no gaps !
- std::deque<ValueRef> src; // no gaps !
+ uint8_t sched; // scheduling data (NOTE: maybe move to separate storage)
BasicBlock *bb;
+protected:
+ std::deque<ValueDef> defs; // no gaps !
+ std::deque<ValueRef> srcs; // no gaps !
+
// instruction specific methods:
// (don't want to subclass, would need more constructors and memory pools)
public:
private:
void init();
-protected:
- void cloneBase(Instruction *clone, bool deep) const;
};
enum TexQuery
Target& operator=(TexTarget targ)
{
assert(targ < TEX_TARGET_COUNT);
+ target = targ;
return *this;
}
TexInstruction(Function *, operation);
virtual ~TexInstruction();
- virtual Instruction *clone(bool deep) const;
+ virtual TexInstruction *clone(ClonePolicy<Function>&,
+ Instruction * = NULL) const;
inline void setTexture(Target targ, uint8_t r, uint8_t s)
{
public:
CmpInstruction(Function *, operation);
- virtual Instruction *clone(bool deep) const;
+ virtual CmpInstruction *clone(ClonePolicy<Function>&,
+ Instruction * = NULL) const;
void setCondition(CondCode cond) { setCond = cond; }
CondCode getCondition() const { return setCond; }
class FlowInstruction : public Instruction
{
public:
- FlowInstruction(Function *, operation, BasicBlock *target);
+ FlowInstruction(Function *, operation, void *target);
+
+ virtual FlowInstruction *clone(ClonePolicy<Function>&,
+ Instruction * = NULL) const;
public:
unsigned allWarp : 1;
unsigned absolute : 1;
unsigned limit : 1;
unsigned builtin : 1; // true for calls to emulation code
+ unsigned indirect : 1; // target in src(0)
union {
BasicBlock *bb;
BasicBlock(Function *);
~BasicBlock();
+ BasicBlock *clone(ClonePolicy<Function>&) const;
+
inline int getId() const { return id; }
inline unsigned int getInsnCount() const { return numInsns; }
inline bool isTerminated() const { return exit && exit->terminator; }
bool dominatedBy(BasicBlock *bb);
- inline bool reachableBy(BasicBlock *by, BasicBlock *term);
+ inline bool reachableBy(const BasicBlock *by, const BasicBlock *term);
// returns mask of conditional out blocks
// e.g. 3 for IF { .. } ELSE { .. } ENDIF, 1 for IF { .. } ENDIF
Graph::Node dom;
BitSet liveSet;
+ BitSet defSet;
uint32_t binPos;
uint32_t binSize;
class Function
{
public:
- Function(Program *, const char *name);
+ Function(Program *, const char *name, uint32_t label);
~Function();
+ static inline Function *get(Graph::Node *node);
+
inline Program *getProgram() const { return prog; }
inline const char *getName() const { return name; }
inline int getId() const { return id; }
+ inline uint32_t getLabel() const { return label; }
void print();
void printLiveIntervals() const;
inline LValue *getLValue(int id);
+ void buildLiveSets();
+ void buildDefSets();
bool convertToSSA();
public:
+ std::deque<ValueDef> ins;
+ std::deque<ValueRef> outs;
+ std::deque<Value *> clobbers;
+
Graph cfg;
Graph::Node *cfgExit;
Graph *domTree;
uint32_t binPos;
uint32_t binSize;
+ Value *stackPtr;
+
+ uint32_t tlsBase; // base address for l[] space (if no stack pointer is used)
+ uint32_t tlsSize;
+
ArrayList allBBlocks;
ArrayList allInsns;
ArrayList allLValues;
private:
void buildLiveSetsPreSSA(BasicBlock *, const int sequence);
+ void buildDefSetsPreSSA(BasicBlock *bb, const int seq);
private:
+ uint32_t label;
int id;
const char *const name;
Program *prog;
Type getType() const { return progType; }
inline void add(Function *fn, int& id) { allFuncs.insert(fn, id); }
+ inline void del(Function *fn, int& id) { allFuncs.remove(id); }
inline void add(Value *rval, int& id) { allRValues.insert(rval, id); }
bool makeFromTGSI(struct nv50_ir_prog_info *);
const Target *getTarget() const { return target; }
private:
+ void emitSymbolTable(struct nv50_ir_prog_info *);
+
Type progType;
Target *target;
uint32_t *code;
uint32_t binSize;
+ uint32_t tlsSize; // size required for FILE_MEMORY_LOCAL
int maxGPR;
MemoryPool mem_ImmediateValue;
uint32_t dbgFlags;
+ uint8_t optLevel;
+
+ void *targetPriv; // e.g. to carry information between passes
void releaseInstruction(Instruction *);
void releaseValue(Value *);