#include <list>
#include <vector>
+#include "codegen/unordered_set.h"
#include "codegen/nv50_ir_util.h"
#include "codegen/nv50_ir_graph.h"
OP_MAD,
OP_FMA,
OP_SAD, // abs(src0 - src1) + src2
+ OP_SHLADD,
+ // extended multiply-add (GM107+), does a lot of things.
+ // see envytools for detailed documentation
+ OP_XMAD,
OP_ABS,
OP_NEG,
OP_NOT,
OP_MEMBAR, // memory barrier (mfence, lfence, sfence)
OP_VFETCH, // indirection 0 in attribute space, indirection 1 is vertex base
OP_PFETCH, // fetch base address of vertex src0 (immediate) [+ src1]
+ OP_AFETCH, // fetch base address of shader input (a[%r1+0x10])
OP_EXPORT,
OP_LINTERP,
OP_PINTERP,
OP_SUBFM, // surface bitfield manipulation
OP_SUCLAMP, // clamp surface coordinates
OP_SUEAU, // surface effective address
+ OP_SUQ, // surface query
OP_MADSP, // special integer multiply-add
OP_TEXBAR, // texture dependency barrier
OP_DFDX,
OP_VSHL,
OP_VSEL,
OP_CCTL, // cache control
+ OP_SHFL, // warp shuffle
+ OP_VOTE,
+ OP_BUFQ, // buffer query
OP_LAST
};
#define NV50_IR_SUBOP_LDC_IS 2
#define NV50_IR_SUBOP_LDC_ISL 3
#define NV50_IR_SUBOP_SHIFT_WRAP 1
+#define NV50_IR_SUBOP_SHIFT_HIGH 2
#define NV50_IR_SUBOP_EMU_PRERET 1
#define NV50_IR_SUBOP_TEXBAR(n) n
#define NV50_IR_SUBOP_MOV_FINAL 1
#define NV50_IR_SUBOP_EXTBF_REV 1
#define NV50_IR_SUBOP_BFIND_SAMT 1
+#define NV50_IR_SUBOP_RCPRSQ_64H 1
#define NV50_IR_SUBOP_PERMT_F4E 1
#define NV50_IR_SUBOP_PERMT_B4E 2
#define NV50_IR_SUBOP_PERMT_RC8 3
#define NV50_IR_SUBOP_PIXLD_OFFSET 3
#define NV50_IR_SUBOP_PIXLD_CENT_OFFSET 4
#define NV50_IR_SUBOP_PIXLD_SAMPLEID 5
+#define NV50_IR_SUBOP_SHFL_IDX 0
+#define NV50_IR_SUBOP_SHFL_UP 1
+#define NV50_IR_SUBOP_SHFL_DOWN 2
+#define NV50_IR_SUBOP_SHFL_BFLY 3
+#define NV50_IR_SUBOP_LOAD_LOCKED 1
+#define NV50_IR_SUBOP_STORE_UNLOCKED 2
#define NV50_IR_SUBOP_MADSP_SD 0xffff
// Yes, we could represent those with DataType.
// Or put the type into operation and have a couple 1000 values in that enum.
#define NV50_IR_SUBOP_V2(d,a,b) (((d) << 10) | ((b) << 5) | (a) | 0x4000)
#define NV50_IR_SUBOP_V4(d,a,b) (((d) << 10) | ((b) << 5) | (a) | 0x8000)
#define NV50_IR_SUBOP_Vn(n) ((n) >> 14)
+#define NV50_IR_SUBOP_VOTE_ALL 0
+#define NV50_IR_SUBOP_VOTE_ANY 1
+#define NV50_IR_SUBOP_VOTE_UNI 2
+
+#define NV50_IR_SUBOP_MINMAX_LOW 1
+#define NV50_IR_SUBOP_MINMAX_MED 2
+#define NV50_IR_SUBOP_MINMAX_HIGH 3
+
+// xmad(src0, src1, 0) << 16 + src2
+#define NV50_IR_SUBOP_XMAD_PSL (1 << 0)
+// (xmad(src0, src1, src2) & 0xffff) | (src1 << 16)
+#define NV50_IR_SUBOP_XMAD_MRG (1 << 1)
+// xmad(src0, src1, src2.lo)
+#define NV50_IR_SUBOP_XMAD_CLO (1 << 2)
+// xmad(src0, src1, src2.hi)
+#define NV50_IR_SUBOP_XMAD_CHI (2 << 2)
+// if both operands to the multiplication are non-zero, subtract 65536 for each
+// negative operand
+#define NV50_IR_SUBOP_XMAD_CSFU (3 << 2)
+// xmad(src0, src1, src2) + src1 << 16
+#define NV50_IR_SUBOP_XMAD_CBCC (4 << 2)
+#define NV50_IR_SUBOP_XMAD_CMODE_SHIFT 2
+#define NV50_IR_SUBOP_XMAD_CMODE_MASK (0x7 << NV50_IR_SUBOP_XMAD_CMODE_SHIFT)
+
+// use the high 16 bits instead of the low 16 bits for the multiplication.
+// if the instruction's sType is signed, sign extend the operand from 16 bits
+// to 32 before multiplication.
+#define NV50_IR_SUBOP_XMAD_H1_SHIFT 5
+#define NV50_IR_SUBOP_XMAD_H1(i) (1 << (NV50_IR_SUBOP_XMAD_H1_SHIFT + (i)))
+#define NV50_IR_SUBOP_XMAD_H1_MASK (0x3 << NV50_IR_SUBOP_XMAD_H1_SHIFT)
enum DataType
{
FILE_MEMORY_CONST,
FILE_SHADER_INPUT,
FILE_SHADER_OUTPUT,
+ FILE_MEMORY_BUFFER,
FILE_MEMORY_GLOBAL,
FILE_MEMORY_SHARED,
FILE_MEMORY_LOCAL,
TEX_TARGET_COUNT
};
+enum ImgFormat
+{
+ FMT_NONE,
+
+ FMT_RGBA32F,
+ FMT_RGBA16F,
+ FMT_RG32F,
+ FMT_RG16F,
+ FMT_R11G11B10F,
+ FMT_R32F,
+ FMT_R16F,
+
+ FMT_RGBA32UI,
+ FMT_RGBA16UI,
+ FMT_RGB10A2UI,
+ FMT_RGBA8UI,
+ FMT_RG32UI,
+ FMT_RG16UI,
+ FMT_RG8UI,
+ FMT_R32UI,
+ FMT_R16UI,
+ FMT_R8UI,
+
+ FMT_RGBA32I,
+ FMT_RGBA16I,
+ FMT_RGBA8I,
+ FMT_RG32I,
+ FMT_RG16I,
+ FMT_RG8I,
+ FMT_R32I,
+ FMT_R16I,
+ FMT_R8I,
+
+ FMT_RGBA16,
+ FMT_RGB10A2,
+ FMT_RGBA8,
+ FMT_RG16,
+ FMT_RG8,
+ FMT_R16,
+ FMT_R8,
+
+ FMT_RGBA16_SNORM,
+ FMT_RGBA8_SNORM,
+ FMT_RG16_SNORM,
+ FMT_RG8_SNORM,
+ FMT_R16_SNORM,
+ FMT_R8_SNORM,
+
+ FMT_BGRA8,
+
+ IMG_FORMAT_COUNT,
+};
+
+enum ImgType {
+ UINT,
+ SINT,
+ UNORM,
+ SNORM,
+ FLOAT,
+};
+
enum SVSemantic
{
SV_POSITION, // WPOS
SV_SAMPLE_INDEX,
SV_SAMPLE_POS,
SV_SAMPLE_MASK,
- SV_TESS_FACTOR,
+ SV_TESS_OUTER,
+ SV_TESS_INNER,
SV_TESS_COORD,
SV_TID,
+ SV_COMBINED_TID,
SV_CTAID,
SV_NTID,
SV_GRIDID,
SV_LBASE,
SV_SBASE,
SV_VERTEX_STRIDE,
+ SV_INVOCATION_INFO,
+ SV_THREAD_KILL,
+ SV_BASEVERTEX,
+ SV_BASEINSTANCE,
+ SV_DRAWID,
+ SV_WORK_DIM,
+ SV_LANEMASK_EQ,
+ SV_LANEMASK_LT,
+ SV_LANEMASK_LE,
+ SV_LANEMASK_GT,
+ SV_LANEMASK_GE,
SV_UNDEFINED,
SV_LAST
};
public:
Modifier mod;
int8_t indirect[2]; // >= 0 if relative to lvalue in insn->src(indirect[i])
- uint8_t swizzle;
bool usedAsPtr; // for printing
inline const Symbol *asSym() const;
inline const ImmediateValue *asImm() const;
- inline bool inFile(DataFile f) { return reg.file == f; }
+ inline bool inFile(DataFile f) const { return reg.file == f; }
static inline Value *get(Iterator&);
- std::list<ValueRef *> uses;
+ unordered_set<ValueRef *> uses;
std::list<ValueDef *> defs;
- typedef std::list<ValueRef *>::iterator UseIterator;
- typedef std::list<ValueRef *>::const_iterator UseCIterator;
+ typedef unordered_set<ValueRef *>::iterator UseIterator;
+ typedef unordered_set<ValueRef *>::const_iterator UseCIterator;
typedef std::list<ValueDef *>::iterator DefIterator;
typedef std::list<ValueDef *>::const_iterator DefCIterator;
bool isActionEqual(const Instruction *) const;
bool isResultEqual(const Instruction *) const;
+ // check whether the defs interfere with srcs and defs of another instruction
+ bool canCommuteDefDef(const Instruction *) const;
+ bool canCommuteDefSrc(const Instruction *) const;
+
void print() const;
inline CmpInstruction *asCmp();
unsigned perPatch : 1;
unsigned exit : 1; // terminate program after insn
unsigned mask : 4; // for vector ops
+ // prevent algebraic optimisations that aren't bit-for-bit identical
+ unsigned precise : 1;
int8_t postFactor; // MUL/DIV(if < 0) by 1 << postFactor
enum TexQuery
{
- TXQ_DIMS,
- TXQ_TYPE,
+ TXQ_DIMS, /* x, y, z, levels */
+ TXQ_TYPE, /* ?, ?, samples, ? */
TXQ_SAMPLE_POSITION,
TXQ_FILTER,
TXQ_LOD,
enum TexTarget target;
};
+public:
+ struct ImgFormatDesc
+ {
+ char name[19];
+ uint8_t components;
+ uint8_t bits[4];
+ ImgType type;
+ bool bgra;
+ };
+
+ static const struct ImgFormatDesc formatTable[IMG_FORMAT_COUNT];
+
public:
TexInstruction(Function *, operation);
virtual ~TexInstruction();
bool liveOnly; // only execute on live pixels of a quad (optimization)
bool levelZero;
bool derivAll;
+ bool bindless;
int8_t useOffsets; // 0, 1, or 4 for textureGatherOffsets
int8_t offset[3]; // only used on nv50
enum TexQuery query;
+ const struct ImgFormatDesc *format;
+
+ bool scalar; // for GM107s TEXS, TLDS, TLD4S
} tex;
ValueRef dPdx[3];
inline void add(Value *rval, int& id) { allRValues.insert(rval, id); }
bool makeFromTGSI(struct nv50_ir_prog_info *);
- bool makeFromSM4(struct nv50_ir_prog_info *);
bool convertToSSA();
bool optimizeSSA(int level);
bool optimizePostRA(int level);
uint32_t tlsSize; // size required for FILE_MEMORY_LOCAL
int maxGPR;
+ bool fp64;
MemoryPool mem_Instruction;
MemoryPool mem_CmpInstruction;