#include <list>
#include <vector>
+#include "codegen/unordered_set.h"
#include "codegen/nv50_ir_util.h"
#include "codegen/nv50_ir_graph.h"
OP_MAD,
OP_FMA,
OP_SAD, // abs(src0 - src1) + src2
+ OP_SHLADD,
OP_ABS,
OP_NEG,
OP_NOT,
OP_MEMBAR, // memory barrier (mfence, lfence, sfence)
OP_VFETCH, // indirection 0 in attribute space, indirection 1 is vertex base
OP_PFETCH, // fetch base address of vertex src0 (immediate) [+ src1]
+ OP_AFETCH, // fetch base address of shader input (a[%r1+0x10])
OP_EXPORT,
OP_LINTERP,
OP_PINTERP,
OP_TXQ, // texture size query
OP_TXD, // texture derivatives
OP_TXG, // texture gather
+ OP_TXLQ, // texture query lod
OP_TEXCSAA, // texture op for coverage sampling
OP_TEXPREP, // turn cube map array into 2d array coordinates
OP_SULDB, // surface load (raw)
OP_SUBFM, // surface bitfield manipulation
OP_SUCLAMP, // clamp surface coordinates
OP_SUEAU, // surface effective address
+ OP_SUQ, // surface query
OP_MADSP, // special integer multiply-add
OP_TEXBAR, // texture dependency barrier
OP_DFDX,
OP_DFDY,
OP_RDSV, // read system value
OP_WRSV, // write system value
+ OP_PIXLD, // get info about raster object or surfaces
OP_QUADOP,
OP_QUADON,
OP_QUADPOP,
OP_POPCNT, // bitcount(src0 & src1)
OP_INSBF, // insert first src1[8:15] bits of src0 into src2 at src1[0:7]
OP_EXTBF, // place bits [K,K+N) of src0 into dst, src1 = 0xNNKK
+ OP_BFIND, // find highest/lowest set bit
OP_PERMT, // dst = bytes from src2,src0 selected by src1 (nvc0's src order)
OP_ATOM,
OP_BAR, // execution barrier, sources = { id, thread count, predicate }
OP_VSHL,
OP_VSEL,
OP_CCTL, // cache control
+ OP_SHFL, // warp shuffle
+ OP_VOTE,
+ OP_BUFQ, // buffer query
OP_LAST
};
#define NV50_IR_SUBOP_LDC_IS 2
#define NV50_IR_SUBOP_LDC_ISL 3
#define NV50_IR_SUBOP_SHIFT_WRAP 1
+#define NV50_IR_SUBOP_SHIFT_HIGH 2
#define NV50_IR_SUBOP_EMU_PRERET 1
#define NV50_IR_SUBOP_TEXBAR(n) n
#define NV50_IR_SUBOP_MOV_FINAL 1
#define NV50_IR_SUBOP_EXTBF_REV 1
+#define NV50_IR_SUBOP_BFIND_SAMT 1
+#define NV50_IR_SUBOP_RCPRSQ_64H 1
#define NV50_IR_SUBOP_PERMT_F4E 1
#define NV50_IR_SUBOP_PERMT_B4E 2
#define NV50_IR_SUBOP_PERMT_RC8 3
#define NV50_IR_SUBOP_SUCLAMP_SD(r, d) (( 0 + (r)) | ((d == 2) ? 0x10 : 0))
#define NV50_IR_SUBOP_SUCLAMP_PL(r, d) (( 5 + (r)) | ((d == 2) ? 0x10 : 0))
#define NV50_IR_SUBOP_SUCLAMP_BL(r, d) ((10 + (r)) | ((d == 2) ? 0x10 : 0))
+#define NV50_IR_SUBOP_PIXLD_COUNT 0
+#define NV50_IR_SUBOP_PIXLD_COVMASK 1
+#define NV50_IR_SUBOP_PIXLD_COVERED 2
+#define NV50_IR_SUBOP_PIXLD_OFFSET 3
+#define NV50_IR_SUBOP_PIXLD_CENT_OFFSET 4
+#define NV50_IR_SUBOP_PIXLD_SAMPLEID 5
+#define NV50_IR_SUBOP_SHFL_IDX 0
+#define NV50_IR_SUBOP_SHFL_UP 1
+#define NV50_IR_SUBOP_SHFL_DOWN 2
+#define NV50_IR_SUBOP_SHFL_BFLY 3
+#define NV50_IR_SUBOP_LOAD_LOCKED 1
+#define NV50_IR_SUBOP_STORE_UNLOCKED 2
#define NV50_IR_SUBOP_MADSP_SD 0xffff
// Yes, we could represent those with DataType.
// Or put the type into operation and have a couple 1000 values in that enum.
#define NV50_IR_SUBOP_V2(d,a,b) (((d) << 10) | ((b) << 5) | (a) | 0x4000)
#define NV50_IR_SUBOP_V4(d,a,b) (((d) << 10) | ((b) << 5) | (a) | 0x8000)
#define NV50_IR_SUBOP_Vn(n) ((n) >> 14)
+#define NV50_IR_SUBOP_VOTE_ALL 0
+#define NV50_IR_SUBOP_VOTE_ANY 1
+#define NV50_IR_SUBOP_VOTE_UNI 2
+
+#define NV50_IR_SUBOP_MINMAX_LOW 1
+#define NV50_IR_SUBOP_MINMAX_MED 2
+#define NV50_IR_SUBOP_MINMAX_HIGH 3
enum DataType
{
FILE_MEMORY_CONST,
FILE_SHADER_INPUT,
FILE_SHADER_OUTPUT,
+ FILE_MEMORY_BUFFER,
FILE_MEMORY_GLOBAL,
FILE_MEMORY_SHARED,
FILE_MEMORY_LOCAL,
TEX_TARGET_COUNT
};
+enum ImgFormat
+{
+ FMT_NONE,
+
+ FMT_RGBA32F,
+ FMT_RGBA16F,
+ FMT_RG32F,
+ FMT_RG16F,
+ FMT_R11G11B10F,
+ FMT_R32F,
+ FMT_R16F,
+
+ FMT_RGBA32UI,
+ FMT_RGBA16UI,
+ FMT_RGB10A2UI,
+ FMT_RGBA8UI,
+ FMT_RG32UI,
+ FMT_RG16UI,
+ FMT_RG8UI,
+ FMT_R32UI,
+ FMT_R16UI,
+ FMT_R8UI,
+
+ FMT_RGBA32I,
+ FMT_RGBA16I,
+ FMT_RGBA8I,
+ FMT_RG32I,
+ FMT_RG16I,
+ FMT_RG8I,
+ FMT_R32I,
+ FMT_R16I,
+ FMT_R8I,
+
+ FMT_RGBA16,
+ FMT_RGB10A2,
+ FMT_RGBA8,
+ FMT_RG16,
+ FMT_RG8,
+ FMT_R16,
+ FMT_R8,
+
+ FMT_RGBA16_SNORM,
+ FMT_RGBA8_SNORM,
+ FMT_RG16_SNORM,
+ FMT_RG8_SNORM,
+ FMT_R16_SNORM,
+ FMT_R8_SNORM,
+
+ FMT_BGRA8,
+
+ IMG_FORMAT_COUNT,
+};
+
+enum ImgType {
+ UINT,
+ SINT,
+ UNORM,
+ SNORM,
+ FLOAT,
+};
+
enum SVSemantic
{
SV_POSITION, // WPOS
SV_POINT_COORD,
SV_CLIP_DISTANCE,
SV_SAMPLE_INDEX,
- SV_TESS_FACTOR,
+ SV_SAMPLE_POS,
+ SV_SAMPLE_MASK,
+ SV_TESS_OUTER,
+ SV_TESS_INNER,
SV_TESS_COORD,
SV_TID,
SV_CTAID,
SV_LBASE,
SV_SBASE,
SV_VERTEX_STRIDE,
+ SV_INVOCATION_INFO,
+ SV_THREAD_KILL,
+ SV_BASEVERTEX,
+ SV_BASEINSTANCE,
+ SV_DRAWID,
+ SV_WORK_DIM,
SV_UNDEFINED,
SV_LAST
};
inline const Symbol *asSym() const;
inline const ImmediateValue *asImm() const;
- inline bool inFile(DataFile f) { return reg.file == f; }
+ inline bool inFile(DataFile f) const { return reg.file == f; }
static inline Value *get(Iterator&);
- std::list<ValueRef *> uses;
+ unordered_set<ValueRef *> uses;
std::list<ValueDef *> defs;
- typedef std::list<ValueRef *>::iterator UseIterator;
- typedef std::list<ValueRef *>::const_iterator UseCIterator;
+ typedef unordered_set<ValueRef *>::iterator UseIterator;
+ typedef unordered_set<ValueRef *>::const_iterator UseCIterator;
typedef std::list<ValueDef *>::iterator DefIterator;
typedef std::list<ValueDef *>::const_iterator DefCIterator;
bool isActionEqual(const Instruction *) const;
bool isResultEqual(const Instruction *) const;
+ // check whether the defs interfere with srcs and defs of another instruction
+ bool canCommuteDefDef(const Instruction *) const;
+ bool canCommuteDefSrc(const Instruction *) const;
+
void print() const;
inline CmpInstruction *asCmp();
int8_t flagsDef;
int8_t flagsSrc;
- uint8_t sched; // scheduling data (NOTE: maybe move to separate storage)
+ uint32_t sched; // scheduling data (NOTE: maybe move to separate storage)
BasicBlock *bb;
enum TexQuery
{
- TXQ_DIMS,
- TXQ_TYPE,
+ TXQ_DIMS, /* x, y, z, levels */
+ TXQ_TYPE, /* ?, ?, samples, ? */
TXQ_SAMPLE_POSITION,
TXQ_FILTER,
TXQ_LOD,
enum TexTarget target;
};
+public:
+ struct ImgFormatDesc
+ {
+ char name[19];
+ uint8_t components;
+ uint8_t bits[4];
+ ImgType type;
+ bool bgra;
+ };
+
+ static const struct ImgFormatDesc formatTable[IMG_FORMAT_COUNT];
+
public:
TexInstruction(Function *, operation);
virtual ~TexInstruction();
bool derivAll;
int8_t useOffsets; // 0, 1, or 4 for textureGatherOffsets
- int8_t offset[4][3];
+ int8_t offset[3]; // only used on nv50
enum TexQuery query;
+ const struct ImgFormatDesc *format;
} tex;
ValueRef dPdx[3];
ValueRef dPdy[3];
+ ValueRef offset[4][3];
};
class CmpInstruction : public Instruction