static nv50_ir::DataFile translateFile(uint file);
static nv50_ir::TexTarget translateTexture(uint texTarg);
static nv50_ir::SVSemantic translateSysVal(uint sysval);
+static nv50_ir::CacheMode translateCacheMode(uint qualifier);
+static nv50_ir::ImgFormat translateImgFormat(uint format);
class Instruction
{
return tgsi_util_get_src_register_swizzle(®, chan);
}
+ int getArrayId() const
+ {
+ if (isIndirect(0))
+ return fsr->Indirect.ArrayID;
+ return 0;
+ }
+
nv50_ir::Modifier getMod(int chan) const;
SrcRegister getIndirect(int dim) const
return SrcRegister(fdr->Indirect);
}
+ int getArrayId() const
+ {
+ if (isIndirect(0))
+ return fdr->Indirect.ArrayID;
+ return 0;
+ }
+
private:
const struct tgsi_dst_register reg;
const struct tgsi_full_dst_register *fdr;
nv50_ir::TexInstruction::Target getTexture(const Source *, int s) const;
+ nv50_ir::CacheMode getCacheMode() const {
+ if (!insn->Instruction.Memory)
+ return nv50_ir::CACHE_CA;
+ return translateCacheMode(insn->Memory.Qualifier);
+ }
+
inline uint getLabel() { return insn->Label.Label; }
unsigned getSaturate() const { return insn->Instruction.Saturate; }
x |= 2;
return x;
}
+ case TGSI_OPCODE_PK2H:
+ return 0x3;
+ case TGSI_OPCODE_UP2H:
+ return 0x1;
default:
break;
}
case TGSI_FILE_PREDICATE: return nv50_ir::FILE_PREDICATE;
case TGSI_FILE_IMMEDIATE: return nv50_ir::FILE_IMMEDIATE;
case TGSI_FILE_SYSTEM_VALUE: return nv50_ir::FILE_SYSTEM_VALUE;
- case TGSI_FILE_RESOURCE: return nv50_ir::FILE_MEMORY_GLOBAL;
+ case TGSI_FILE_BUFFER: return nv50_ir::FILE_MEMORY_BUFFER;
+ case TGSI_FILE_IMAGE: return nv50_ir::FILE_MEMORY_GLOBAL;
+ case TGSI_FILE_MEMORY: return nv50_ir::FILE_MEMORY_GLOBAL;
case TGSI_FILE_SAMPLER:
case TGSI_FILE_NULL:
default:
case TGSI_SEMANTIC_TESSOUTER: return nv50_ir::SV_TESS_OUTER;
case TGSI_SEMANTIC_TESSINNER: return nv50_ir::SV_TESS_INNER;
case TGSI_SEMANTIC_VERTICESIN: return nv50_ir::SV_VERTEX_COUNT;
+ case TGSI_SEMANTIC_HELPER_INVOCATION: return nv50_ir::SV_THREAD_KILL;
+ case TGSI_SEMANTIC_BASEVERTEX: return nv50_ir::SV_BASEVERTEX;
+ case TGSI_SEMANTIC_BASEINSTANCE: return nv50_ir::SV_BASEINSTANCE;
+ case TGSI_SEMANTIC_DRAWID: return nv50_ir::SV_DRAWID;
+ case TGSI_SEMANTIC_WORK_DIM: return nv50_ir::SV_WORK_DIM;
default:
assert(0);
return nv50_ir::SV_CLOCK;
}
}
+static nv50_ir::CacheMode translateCacheMode(uint qualifier)
+{
+ if (qualifier & TGSI_MEMORY_VOLATILE)
+ return nv50_ir::CACHE_CV;
+ if (qualifier & TGSI_MEMORY_COHERENT)
+ return nv50_ir::CACHE_CG;
+ return nv50_ir::CACHE_CA;
+}
+
+static nv50_ir::ImgFormat translateImgFormat(uint format)
+{
+
+#define FMT_CASE(a, b) \
+ case PIPE_FORMAT_ ## a: return nv50_ir::FMT_ ## b
+
+ switch (format) {
+ FMT_CASE(NONE, NONE);
+
+ FMT_CASE(R32G32B32A32_FLOAT, RGBA32F);
+ FMT_CASE(R16G16B16A16_FLOAT, RGBA16F);
+ FMT_CASE(R32G32_FLOAT, RG32F);
+ FMT_CASE(R16G16_FLOAT, RG16F);
+ FMT_CASE(R11G11B10_FLOAT, R11G11B10F);
+ FMT_CASE(R32_FLOAT, R32F);
+ FMT_CASE(R16_FLOAT, R16F);
+
+ FMT_CASE(R32G32B32A32_UINT, RGBA32UI);
+ FMT_CASE(R16G16B16A16_UINT, RGBA16UI);
+ FMT_CASE(R10G10B10A2_UINT, RGB10A2UI);
+ FMT_CASE(R8G8B8A8_UINT, RGBA8UI);
+ FMT_CASE(R32G32_UINT, RG32UI);
+ FMT_CASE(R16G16_UINT, RG16UI);
+ FMT_CASE(R8G8_UINT, RG8UI);
+ FMT_CASE(R32_UINT, R32UI);
+ FMT_CASE(R16_UINT, R16UI);
+ FMT_CASE(R8_UINT, R8UI);
+
+ FMT_CASE(R32G32B32A32_SINT, RGBA32I);
+ FMT_CASE(R16G16B16A16_SINT, RGBA16I);
+ FMT_CASE(R8G8B8A8_SINT, RGBA8I);
+ FMT_CASE(R32G32_SINT, RG32I);
+ FMT_CASE(R16G16_SINT, RG16I);
+ FMT_CASE(R8G8_SINT, RG8I);
+ FMT_CASE(R32_SINT, R32I);
+ FMT_CASE(R16_SINT, R16I);
+ FMT_CASE(R8_SINT, R8I);
+
+ FMT_CASE(R16G16B16A16_UNORM, RGBA16);
+ FMT_CASE(R10G10B10A2_UNORM, RGB10A2);
+ FMT_CASE(R8G8B8A8_UNORM, RGBA8);
+ FMT_CASE(R16G16_UNORM, RG16);
+ FMT_CASE(R8G8_UNORM, RG8);
+ FMT_CASE(R16_UNORM, R16);
+ FMT_CASE(R8_UNORM, R8);
+
+ FMT_CASE(R16G16B16A16_SNORM, RGBA16_SNORM);
+ FMT_CASE(R8G8B8A8_SNORM, RGBA8_SNORM);
+ FMT_CASE(R16G16_SNORM, RG16_SNORM);
+ FMT_CASE(R8G8_SNORM, RG8_SNORM);
+ FMT_CASE(R16_SNORM, R16_SNORM);
+ FMT_CASE(R8_SNORM, R8_SNORM);
+ }
+
+ assert(!"Unexpected format");
+ return nv50_ir::FMT_NONE;
+}
+
nv50_ir::DataType Instruction::inferSrcType() const
{
switch (getOpcode()) {
case TGSI_OPCODE_ATOMUMAX:
case TGSI_OPCODE_UBFE:
case TGSI_OPCODE_UMSB:
+ case TGSI_OPCODE_UP2H:
+ case TGSI_OPCODE_VOTE_ALL:
+ case TGSI_OPCODE_VOTE_ANY:
+ case TGSI_OPCODE_VOTE_EQ:
return nv50_ir::TYPE_U32;
case TGSI_OPCODE_I2F:
case TGSI_OPCODE_I2D:
case TGSI_OPCODE_DRCP:
case TGSI_OPCODE_DSQRT:
case TGSI_OPCODE_DMAD:
+ case TGSI_OPCODE_DFMA:
case TGSI_OPCODE_DFRAC:
case TGSI_OPCODE_DRSQ:
case TGSI_OPCODE_DTRUNC:
case TGSI_OPCODE_DSGE:
case TGSI_OPCODE_DSLT:
case TGSI_OPCODE_DSNE:
+ case TGSI_OPCODE_PK2H:
return nv50_ir::TYPE_U32;
case TGSI_OPCODE_I2F:
case TGSI_OPCODE_U2F:
case TGSI_OPCODE_D2F:
+ case TGSI_OPCODE_UP2H:
return nv50_ir::TYPE_F32;
case TGSI_OPCODE_I2D:
case TGSI_OPCODE_U2D:
NV50_IR_OPCODE_CASE(RCP, RCP);
NV50_IR_OPCODE_CASE(RSQ, RSQ);
+ NV50_IR_OPCODE_CASE(SQRT, SQRT);
NV50_IR_OPCODE_CASE(MUL, MUL);
NV50_IR_OPCODE_CASE(ADD, ADD);
NV50_IR_OPCODE_CASE(SLT, SET);
NV50_IR_OPCODE_CASE(SGE, SET);
NV50_IR_OPCODE_CASE(MAD, MAD);
+ NV50_IR_OPCODE_CASE(FMA, FMA);
NV50_IR_OPCODE_CASE(SUB, SUB);
NV50_IR_OPCODE_CASE(FLR, FLOOR);
NV50_IR_OPCODE_CASE(SAD, SAD);
NV50_IR_OPCODE_CASE(TXF, TXF);
NV50_IR_OPCODE_CASE(TXQ, TXQ);
+ NV50_IR_OPCODE_CASE(TXQS, TXQ);
NV50_IR_OPCODE_CASE(TG4, TXG);
NV50_IR_OPCODE_CASE(LODQ, TXLQ);
NV50_IR_OPCODE_CASE(DRCP, RCP);
NV50_IR_OPCODE_CASE(DSQRT, SQRT);
NV50_IR_OPCODE_CASE(DMAD, MAD);
+ NV50_IR_OPCODE_CASE(DFMA, FMA);
NV50_IR_OPCODE_CASE(D2I, CVT);
NV50_IR_OPCODE_CASE(D2U, CVT);
NV50_IR_OPCODE_CASE(I2D, CVT);
NV50_IR_OPCODE_CASE(IMSB, BFIND);
NV50_IR_OPCODE_CASE(UMSB, BFIND);
+ NV50_IR_OPCODE_CASE(VOTE_ALL, VOTE);
+ NV50_IR_OPCODE_CASE(VOTE_ANY, VOTE);
+ NV50_IR_OPCODE_CASE(VOTE_EQ, VOTE);
+
NV50_IR_OPCODE_CASE(END, EXIT);
default:
case TGSI_OPCODE_IMUL_HI:
case TGSI_OPCODE_UMUL_HI:
return NV50_IR_SUBOP_MUL_HIGH;
+ case TGSI_OPCODE_VOTE_ALL: return NV50_IR_SUBOP_VOTE_ALL;
+ case TGSI_OPCODE_VOTE_ANY: return NV50_IR_SUBOP_VOTE_ANY;
+ case TGSI_OPCODE_VOTE_EQ: return NV50_IR_SUBOP_VOTE_UNI;
default:
return 0;
}
// these registers are per-subroutine, cannot be used for parameter passing
std::set<Location> locals;
- bool mainTempsInLMem;
+ std::set<int> indirectTempArrays;
+ std::map<int, int> indirectTempOffsets;
+ std::map<int, std::pair<int, int> > tempArrayInfo;
+ std::vector<int> tempArrayId;
int clipVertexOutput;
};
std::vector<TextureView> textureViews;
+ /*
struct Resource {
uint8_t target; // TGSI_TEXTURE_*
bool raw;
uint8_t slot; // $surface index
};
std::vector<Resource> resources;
+ */
+
+ struct Image {
+ uint8_t target; // TGSI_TEXTURE_*
+ bool raw;
+ uint8_t slot;
+ uint16_t format; // PIPE_FORMAT_*
+ };
+ std::vector<Image> images;
+
+ struct MemoryFile {
+ uint8_t mem_type; // TGSI_MEMORY_TYPE_*
+ };
+ std::vector<MemoryFile> memoryFiles;
private:
int inferSysValDirection(unsigned sn) const;
if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
tgsi_dump(tokens, 0);
-
- mainTempsInLMem = false;
}
Source::~Source()
clipVertexOutput = -1;
textureViews.resize(scan.file_max[TGSI_FILE_SAMPLER_VIEW] + 1);
- resources.resize(scan.file_max[TGSI_FILE_RESOURCE] + 1);
+ //resources.resize(scan.file_max[TGSI_FILE_RESOURCE] + 1);
+ images.resize(scan.file_max[TGSI_FILE_IMAGE] + 1);
+ tempArrayId.resize(scan.file_max[TGSI_FILE_TEMPORARY] + 1);
+ memoryFiles.resize(scan.file_max[TGSI_FILE_MEMORY] + 1);
info->immd.bufSize = 0;
}
info->io.viewportId = -1;
+ info->prop.cp.numThreads = 1;
info->immd.data = (uint32_t *)MALLOC(scan.immediate_count * 16);
info->immd.type = (ubyte *)MALLOC(scan.immediate_count * sizeof(ubyte));
}
tgsi_parse_free(&parse);
- if (mainTempsInLMem)
- info->bin.tlsSpace += (scan.file_max[TGSI_FILE_TEMPORARY] + 1) * 16;
+ if (indirectTempArrays.size()) {
+ int tempBase = 0;
+ for (std::set<int>::const_iterator it = indirectTempArrays.begin();
+ it != indirectTempArrays.end(); ++it) {
+ std::pair<int, int>& info = tempArrayInfo[*it];
+ indirectTempOffsets.insert(std::make_pair(*it, tempBase - info.first));
+ tempBase += info.second;
+ }
+ info->bin.tlsSpace += tempBase * 16;
+ }
if (info->io.genUserClip > 0) {
- info->io.clipDistanceMask = (1 << info->io.genUserClip) - 1;
+ info->io.clipDistances = info->io.genUserClip;
const unsigned int nOut = (info->io.genUserClip + 3) / 4;
info->out[i].id = i;
info->out[i].sn = TGSI_SEMANTIC_CLIPDIST;
info->out[i].si = n;
- info->out[i].mask = info->io.clipDistanceMask >> (n * 4);
+ info->out[i].mask = ((1 << info->io.clipDistances) - 1) >> (n * 4);
}
}
else
info->prop.tp.outputPrim = PIPE_PRIM_TRIANGLES; /* anything but points */
break;
+ case TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH:
+ case TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT:
+ case TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH:
+ info->prop.cp.numThreads *= prop->u[0].Data;
+ break;
+ case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED:
+ info->io.clipDistances = prop->u[0].Data;
+ break;
+ case TGSI_PROPERTY_NUM_CULLDIST_ENABLED:
+ info->io.cullDistances = prop->u[0].Data;
+ break;
+ case TGSI_PROPERTY_NEXT_SHADER:
+ /* Do not need to know the next shader stage. */
+ break;
+ case TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL:
+ info->prop.fp.earlyFragTests = prop->u[0].Data;
+ break;
default:
INFO("unhandled TGSI property %d\n", prop->Property.PropertyName);
break;
unsigned sn = TGSI_SEMANTIC_GENERIC;
unsigned si = 0;
const unsigned first = decl->Range.First, last = decl->Range.Last;
+ const int arrayId = decl->Array.ArrayID;
if (decl->Declaration.Semantic) {
sn = decl->Semantic.Name;
si = decl->Semantic.Index;
}
- if (decl->Declaration.Local) {
+ if (decl->Declaration.Local || decl->Declaration.File == TGSI_FILE_ADDRESS) {
for (i = first; i <= last; ++i) {
for (c = 0; c < 4; ++c) {
locals.insert(
default:
break;
}
- if (decl->Interp.Location || info->io.sampleInterp)
+ if (decl->Interp.Location)
info->in[i].centroid = 1;
}
clipVertexOutput = i;
break;
case TGSI_SEMANTIC_CLIPDIST:
- info->io.clipDistanceMask |=
- decl->Declaration.UsageMask << (si * 4);
info->io.genUserClip = -1;
break;
case TGSI_SEMANTIC_SAMPLEMASK:
case TGSI_SEMANTIC_VERTEXID:
info->io.vertexId = first;
break;
+ case TGSI_SEMANTIC_BASEVERTEX:
+ case TGSI_SEMANTIC_BASEINSTANCE:
+ case TGSI_SEMANTIC_DRAWID:
+ info->prop.vp.usesDrawParameters = true;
+ break;
+ case TGSI_SEMANTIC_SAMPLEID:
+ case TGSI_SEMANTIC_SAMPLEPOS:
+ info->prop.fp.persampleInvocation = true;
+ break;
+ case TGSI_SEMANTIC_SAMPLEMASK:
+ info->prop.fp.usesSampleMaskIn = true;
+ break;
default:
break;
}
}
}
break;
+/*
case TGSI_FILE_RESOURCE:
for (i = first; i <= last; ++i) {
resources[i].target = decl->Resource.Resource;
resources[i].slot = i;
}
break;
+*/
+ case TGSI_FILE_IMAGE:
+ for (i = first; i <= last; ++i) {
+ images[i].target = decl->Image.Resource;
+ images[i].raw = decl->Image.Raw;
+ images[i].format = decl->Image.Format;
+ images[i].slot = i;
+ }
+ break;
case TGSI_FILE_SAMPLER_VIEW:
for (i = first; i <= last; ++i)
textureViews[i].target = decl->SamplerView.Resource;
break;
+ case TGSI_FILE_MEMORY:
+ for (i = first; i <= last; ++i)
+ memoryFiles[i].mem_type = decl->Declaration.MemType;
+ break;
case TGSI_FILE_NULL:
case TGSI_FILE_TEMPORARY:
+ for (i = first; i <= last; ++i)
+ tempArrayId[i] = arrayId;
+ if (arrayId)
+ tempArrayInfo.insert(std::make_pair(arrayId, std::make_pair(
+ first, last - first + 1)));
+ break;
case TGSI_FILE_ADDRESS:
case TGSI_FILE_CONSTANT:
case TGSI_FILE_IMMEDIATE:
case TGSI_FILE_PREDICATE:
case TGSI_FILE_SAMPLER:
+ case TGSI_FILE_BUFFER:
break;
default:
ERROR("unhandled TGSI_FILE %d\n", decl->Declaration.File);
info->numBarriers = 1;
if (insn.dstCount()) {
- if (insn.getDst(0).getFile() == TGSI_FILE_OUTPUT) {
- Instruction::DstRegister dst = insn.getDst(0);
+ Instruction::DstRegister dst = insn.getDst(0);
+ if (dst.getFile() == TGSI_FILE_OUTPUT) {
if (dst.isIndirect(0))
for (unsigned i = 0; i < info->numOutputs; ++i)
info->out[i].mask = 0xf;
if (isEdgeFlagPassthrough(insn))
info->io.edgeFlagIn = insn.getSrc(0).getIndex(0);
} else
- if (insn.getDst(0).getFile() == TGSI_FILE_TEMPORARY) {
- if (insn.getDst(0).isIndirect(0))
- mainTempsInLMem = true;
+ if (dst.getFile() == TGSI_FILE_TEMPORARY) {
+ if (dst.isIndirect(0))
+ indirectTempArrays.insert(dst.getArrayId());
+ } else
+ if (dst.getFile() == TGSI_FILE_BUFFER ||
+ dst.getFile() == TGSI_FILE_IMAGE ||
+ (dst.getFile() == TGSI_FILE_MEMORY &&
+ memoryFiles[dst.getIndex(0)].mem_type == TGSI_MEMORY_TYPE_GLOBAL)) {
+ info->io.globalAccess |= 0x2;
}
}
Instruction::SrcRegister src = insn.getSrc(s);
if (src.getFile() == TGSI_FILE_TEMPORARY) {
if (src.isIndirect(0))
- mainTempsInLMem = true;
+ indirectTempArrays.insert(src.getArrayId());
} else
- if (src.getFile() == TGSI_FILE_RESOURCE) {
- if (src.getIndex(0) == TGSI_RESOURCE_GLOBAL)
- info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ?
+ if (src.getFile() == TGSI_FILE_BUFFER ||
+ src.getFile() == TGSI_FILE_IMAGE ||
+ (src.getFile() == TGSI_FILE_MEMORY &&
+ memoryFiles[src.getIndex(0)].mem_type == TGSI_MEMORY_TYPE_GLOBAL)) {
+ info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ?
0x1 : 0x2;
} else
if (src.getFile() == TGSI_FILE_OUTPUT) {
unsigned int r;
switch (getSrc(s).getFile()) {
+/*
case TGSI_FILE_RESOURCE:
r = getSrc(s).getIndex(0);
return translateTexture(code->resources.at(r).target);
+*/
case TGSI_FILE_SAMPLER_VIEW:
r = getSrc(s).getIndex(0);
return translateTexture(code->textureViews.at(r).target);
Value *shiftAddress(Value *);
Value *getVertexBase(int s);
+ Value *getOutputBase(int s);
DataArray *getArrayForFile(unsigned file, int idx);
Value *fetchSrc(int s, int c);
Value *acquireDst(int d, int c);
void storeDst(const tgsi::Instruction::DstRegister dst, int c,
Value *val, Value *ptr);
+ void adjustTempIndex(int arrayId, int &idx, int &idx2d) const;
Value *applySrcMod(Value *, int s, int c);
Symbol *makeSym(uint file, int fileIndex, int idx, int c, uint32_t addr);
void setTexRS(TexInstruction *, unsigned int& s, int R, int S);
void handleTEX(Value *dst0[4], int R, int S, int L, int C, int Dx, int Dy);
void handleTXF(Value *dst0[4], int R, int L_M);
- void handleTXQ(Value *dst0[4], enum TexQuery);
+ void handleTXQ(Value *dst0[4], enum TexQuery, int R);
void handleLIT(Value *dst0[4]);
void handleUserClipPlanes();
- Symbol *getResourceBase(int r);
- void getResourceCoords(std::vector<Value *>&, int r, int s);
+ // Symbol *getResourceBase(int r);
+ void getImageCoords(std::vector<Value *>&, int r, int s);
void handleLOAD(Value *dst0[4]);
void handleSTORE();
void handleINTERP(Value *dst0[4]);
+ uint8_t translateInterpMode(const struct nv50_ir_varying *var,
+ operation& op);
Value *interpolate(tgsi::Instruction::SrcRegister, int c, Value *ptr);
void insertConvergenceOps(BasicBlock *conv, BasicBlock *fork);
DataType srcTy;
DataArray tData; // TGSI_FILE_TEMPORARY
+ DataArray lData; // TGSI_FILE_TEMPORARY, for indirect arrays
DataArray aData; // TGSI_FILE_ADDRESS
DataArray pData; // TGSI_FILE_PREDICATE
DataArray oData; // TGSI_FILE_OUTPUT (if outputs in registers)
Value *vtxBase[5]; // base address of vertex in primitive (for TP/GP)
uint8_t vtxBaseValid;
+ Value *outBase; // base address of vertex out patch (for TCP)
+
Stack condBBs; // fork BB, then else clause BB
Stack joinBBs; // fork BB, for inserting join ops on ENDIF
Stack loopBBs; // loop headers
sym->reg.fileIndex = fileIdx;
+ if (tgsiFile == TGSI_FILE_MEMORY) {
+ switch (code->memoryFiles[fileIdx].mem_type) {
+ case TGSI_MEMORY_TYPE_GLOBAL:
+ /* No-op this is the default for TGSI_FILE_MEMORY */
+ sym->setFile(FILE_MEMORY_GLOBAL);
+ break;
+ case TGSI_MEMORY_TYPE_SHARED:
+ sym->setFile(FILE_MEMORY_SHARED);
+ break;
+ case TGSI_MEMORY_TYPE_INPUT:
+ assert(prog->getType() == Program::TYPE_COMPUTE);
+ assert(idx == -1);
+ sym->setFile(FILE_SHADER_INPUT);
+ address += info->prop.cp.inputOffset;
+ break;
+ default:
+ assert(0); /* TODO: Add support for global and private memory */
+ }
+ }
+
if (idx >= 0) {
if (sym->reg.file == FILE_SHADER_INPUT)
sym->setOffset(info->in[idx].slot[c] * 4);
return sym;
}
-static inline uint8_t
-translateInterpMode(const struct nv50_ir_varying *var, operation& op)
+uint8_t
+Converter::translateInterpMode(const struct nv50_ir_varying *var, operation& op)
{
uint8_t mode = NV50_IR_INTERP_PERSPECTIVE;
return vtxBase[s];
}
+Value *
+Converter::getOutputBase(int s)
+{
+ assert(s < 5);
+ if (!(vtxBaseValid & (1 << s))) {
+ Value *offset = loadImm(NULL, tgsi.getSrc(s).getIndex(1));
+ if (tgsi.getSrc(s).isIndirect(1))
+ offset = mkOp2v(OP_ADD, TYPE_U32, getSSA(),
+ fetchSrc(tgsi.getSrc(s).getIndirect(1), 0, NULL),
+ offset);
+ vtxBaseValid |= 1 << s;
+ vtxBase[s] = mkOp2v(OP_ADD, TYPE_U32, getSSA(), outBase, offset);
+ }
+ return vtxBase[s];
+}
+
Value *
Converter::fetchSrc(int s, int c)
{
if (src.is2D()) {
switch (src.getFile()) {
case TGSI_FILE_OUTPUT:
+ dimRel = getOutputBase(s);
+ break;
case TGSI_FILE_INPUT:
dimRel = getVertexBase(s);
break;
{
switch (file) {
case TGSI_FILE_TEMPORARY:
- return &tData;
+ return idx == 0 ? &tData : &lData;
case TGSI_FILE_PREDICATE:
return &pData;
case TGSI_FILE_ADDRESS:
return mkOp2v(OP_SHL, TYPE_U32, getSSA(4, FILE_ADDRESS), index, mkImm(4));
}
+void
+Converter::adjustTempIndex(int arrayId, int &idx, int &idx2d) const
+{
+ std::map<int, int>::const_iterator it =
+ code->indirectTempOffsets.find(arrayId);
+ if (it == code->indirectTempOffsets.end())
+ return;
+
+ idx2d = 1;
+ idx += it->second;
+}
+
Value *
Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr)
{
- const int idx2d = src.is2D() ? src.getIndex(1) : 0;
- const int idx = src.getIndex(0);
+ int idx2d = src.is2D() ? src.getIndex(1) : 0;
+ int idx = src.getIndex(0);
const int swz = src.getSwizzle(c);
Instruction *ld;
// don't load masked inputs, won't be assigned a slot
if (!ptr && !(info->in[idx].mask & (1 << swz)))
return loadImm(NULL, swz == TGSI_SWIZZLE_W ? 1.0f : 0.0f);
- if (!ptr && info->in[idx].sn == TGSI_SEMANTIC_FACE)
- return mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_FACE, 0));
return interpolate(src, c, shiftAddress(ptr));
} else
if (prog->getType() == Program::TYPE_GEOMETRY) {
ld = mkOp1(OP_RDSV, TYPE_U32, getSSA(), srcToSym(src, c));
ld->perPatch = info->sv[idx].patch;
return ld->getDef(0);
+ case TGSI_FILE_TEMPORARY: {
+ int arrayid = src.getArrayId();
+ if (!arrayid)
+ arrayid = code->tempArrayId[idx];
+ adjustTempIndex(arrayid, idx, idx2d);
+ }
+ /* fallthrough */
default:
return getArrayForFile(src.getFile(), idx2d)->load(
sub.cur->values, idx, swz, shiftAddress(ptr));
{
const tgsi::Instruction::DstRegister dst = tgsi.getDst(d);
const unsigned f = dst.getFile();
- const int idx = dst.getIndex(0);
- const int idx2d = dst.is2D() ? dst.getIndex(1) : 0;
+ int idx = dst.getIndex(0);
+ int idx2d = dst.is2D() ? dst.getIndex(1) : 0;
- if (dst.isMasked(c) || f == TGSI_FILE_RESOURCE)
+ if (dst.isMasked(c) || f == TGSI_FILE_BUFFER || f == TGSI_FILE_MEMORY ||
+ f == TGSI_FILE_IMAGE)
return NULL;
if (dst.isIndirect(0) ||
(f == TGSI_FILE_OUTPUT && prog->getType() != Program::TYPE_FRAGMENT))
return getScratch();
+ if (f == TGSI_FILE_TEMPORARY) {
+ int arrayid = dst.getArrayId();
+ if (!arrayid)
+ arrayid = code->tempArrayId[idx];
+ adjustTempIndex(arrayid, idx, idx2d);
+ }
+
return getArrayForFile(f, idx2d)-> acquire(sub.cur->values, idx, c);
}
Value *val, Value *ptr)
{
const unsigned f = dst.getFile();
- const int idx = dst.getIndex(0);
- const int idx2d = dst.is2D() ? dst.getIndex(1) : 0;
+ int idx = dst.getIndex(0);
+ int idx2d = dst.is2D() ? dst.getIndex(1) : 0;
if (f == TGSI_FILE_SYSTEM_VALUE) {
assert(!ptr);
f == TGSI_FILE_PREDICATE ||
f == TGSI_FILE_ADDRESS ||
f == TGSI_FILE_OUTPUT) {
+ if (f == TGSI_FILE_TEMPORARY) {
+ int arrayid = dst.getArrayId();
+ if (!arrayid)
+ arrayid = code->tempArrayId[idx];
+ adjustTempIndex(arrayid, idx, idx2d);
+ }
+
getArrayForFile(f, idx2d)->store(sub.cur->values, idx, c, ptr, val);
} else {
assert(!"invalid dst file");
}
void
-Converter::handleTXQ(Value *dst0[4], enum TexQuery query)
+Converter::handleTXQ(Value *dst0[4], enum TexQuery query, int R)
{
TexInstruction *tex = new_TexInstruction(func, OP_TXQ);
tex->tex.query = query;
tex->tex.mask |= 1 << c;
tex->setDef(d++, dst0[c]);
}
- tex->setSrc((c = 0), fetchSrc(0, 0)); // mip level
+ if (query == TXQ_DIMS)
+ tex->setSrc((c = 0), fetchSrc(0, 0)); // mip level
+ else
+ tex->setSrc((c = 0), zero);
- setTexRS(tex, ++c, 1, -1);
+ setTexRS(tex, ++c, R, -1);
bb->insertTail(tex);
}
void
Converter::handleTEX(Value *dst[4], int R, int S, int L, int C, int Dx, int Dy)
{
- Value *val;
Value *arg[4], *src[8];
Value *lod = NULL, *shd = NULL;
unsigned int s, c, d;
shd = fetchSrc(C >> 4, C & 3);
if (texi->op == OP_TXD) {
- for (c = 0; c < tgt.getDim(); ++c) {
+ for (c = 0; c < tgt.getDim() + tgt.isCube(); ++c) {
texi->dPdx[c].set(fetchSrc(Dx >> 4, (Dx & 3) + c));
texi->dPdy[c].set(fetchSrc(Dy >> 4, (Dy & 3) + c));
}
shd = src[n - 1];
}
- if (tgt.isCube()) {
- for (c = 0; c < 3; ++c)
- src[c] = mkOp1v(OP_ABS, TYPE_F32, getSSA(), arg[c]);
- val = getScratch();
- mkOp2(OP_MAX, TYPE_F32, val, src[0], src[1]);
- mkOp2(OP_MAX, TYPE_F32, val, src[2], val);
- mkOp1(OP_RCP, TYPE_F32, val, val);
- for (c = 0; c < 3; ++c)
- src[c] = mkOp2v(OP_MUL, TYPE_F32, getSSA(), arg[c], val);
- }
-
for (c = 0, d = 0; c < 4; ++c) {
if (dst[c]) {
texi->setDef(d++, dst[c]);
}
}
+/* Keep this around for now as reference when adding img support
static inline bool
isResourceSpecial(const int r)
{
switch (r) {
case TGSI_RESOURCE_GLOBAL:
- sym = new_Symbol(prog, nv50_ir::FILE_MEMORY_GLOBAL, 15);
+ sym = new_Symbol(prog, nv50_ir::FILE_MEMORY_GLOBAL,
+ info->io.auxCBSlot);
break;
case TGSI_RESOURCE_LOCAL:
assert(prog->getType() == Program::TYPE_COMPUTE);
coords[0] = mkOp1v(OP_MOV, TYPE_U32, getScratch(4, FILE_ADDRESS),
coords[0]);
}
-
+*/
static inline int
partitionLoadStore(uint8_t comp[2], uint8_t size[2], uint8_t mask)
{
return n + 1;
}
+static inline nv50_ir::TexTarget
+getImageTarget(const tgsi::Source *code, int r)
+{
+ return tgsi::translateTexture(code->images.at(r).target);
+}
+
+static inline const nv50_ir::TexInstruction::ImgFormatDesc *
+getImageFormat(const tgsi::Source *code, int r)
+{
+ return &nv50_ir::TexInstruction::formatTable[
+ tgsi::translateImgFormat(code->images.at(r).format)];
+}
+
+void
+Converter::getImageCoords(std::vector<Value *> &coords, int r, int s)
+{
+ TexInstruction::Target t =
+ TexInstruction::Target(getImageTarget(code, r));
+ const int arg = t.getDim() + (t.isArray() || t.isCube());
+
+ for (int c = 0; c < arg; ++c)
+ coords.push_back(fetchSrc(s, c));
+
+ if (t.isMS())
+ coords.push_back(fetchSrc(s, 3));
+}
+
// For raw loads, granularity is 4 byte.
// Usage of the texture read mask on OP_SULDP is not allowed.
void
int c;
std::vector<Value *> off, src, ldv, def;
+ switch (tgsi.getSrc(0).getFile()) {
+ case TGSI_FILE_BUFFER:
+ case TGSI_FILE_MEMORY:
+ for (c = 0; c < 4; ++c) {
+ if (!dst0[c])
+ continue;
+
+ Value *off;
+ Symbol *sym;
+ uint32_t src0_component_offset = tgsi.getSrc(0).getSwizzle(c) * 4;
+
+ if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE) {
+ off = NULL;
+ sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c,
+ tgsi.getSrc(1).getValueU32(0, info) +
+ src0_component_offset);
+ } else {
+ // yzw are ignored for buffers
+ off = fetchSrc(1, 0);
+ sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c,
+ src0_component_offset);
+ }
+
+ Instruction *ld = mkLoad(TYPE_U32, dst0[c], sym, off);
+ ld->cache = tgsi.getCacheMode();
+ if (tgsi.getSrc(0).isIndirect(0))
+ ld->setIndirect(0, 1, fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0));
+ }
+ break;
+ case TGSI_FILE_IMAGE: {
+ assert(!code->images[r].raw);
+
+ getImageCoords(off, r, 1);
+ def.resize(4);
+
+ for (c = 0; c < 4; ++c) {
+ if (!dst0[c] || tgsi.getSrc(0).getSwizzle(c) != (TGSI_SWIZZLE_X + c))
+ def[c] = getScratch();
+ else
+ def[c] = dst0[c];
+ }
+
+ TexInstruction *ld =
+ mkTex(OP_SULDP, getImageTarget(code, r), code->images[r].slot, 0,
+ def, off);
+ ld->tex.mask = tgsi.getDst(0).getMask();
+ ld->tex.format = getImageFormat(code, r);
+ ld->cache = tgsi.getCacheMode();
+ if (tgsi.getSrc(0).isIndirect(0))
+ ld->setIndirectR(fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, NULL));
+
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
+ if (dst0[c] != def[c])
+ mkMov(dst0[c], def[tgsi.getSrc(0).getSwizzle(c)]);
+ }
+ break;
+ default:
+ assert(!"Unsupported srcFile for LOAD");
+ }
+
+/* Keep this around for now as reference when adding img support
getResourceCoords(off, r, 1);
if (isResourceRaw(code, r)) {
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
if (dst0[c] != def[c])
mkMov(dst0[c], def[tgsi.getSrc(0).getSwizzle(c)]);
+*/
}
// For formatted stores, the write mask on OP_SUSTP can be used.
int c;
std::vector<Value *> off, src, dummy;
+ switch (tgsi.getDst(0).getFile()) {
+ case TGSI_FILE_BUFFER:
+ case TGSI_FILE_MEMORY:
+ for (c = 0; c < 4; ++c) {
+ if (!(tgsi.getDst(0).getMask() & (1 << c)))
+ continue;
+
+ Symbol *sym;
+ Value *off;
+ if (tgsi.getSrc(0).getFile() == TGSI_FILE_IMMEDIATE) {
+ off = NULL;
+ sym = makeSym(tgsi.getDst(0).getFile(), r, -1, c,
+ tgsi.getSrc(0).getValueU32(0, info) + 4 * c);
+ } else {
+ // yzw are ignored for buffers
+ off = fetchSrc(0, 0);
+ sym = makeSym(tgsi.getDst(0).getFile(), r, -1, c, 4 * c);
+ }
+
+ Instruction *st = mkStore(OP_STORE, TYPE_U32, sym, off, fetchSrc(1, c));
+ st->cache = tgsi.getCacheMode();
+ if (tgsi.getDst(0).isIndirect(0))
+ st->setIndirect(0, 1, fetchSrc(tgsi.getDst(0).getIndirect(0), 0, 0));
+ }
+ break;
+ case TGSI_FILE_IMAGE: {
+ assert(!code->images[r].raw);
+
+ getImageCoords(off, r, 0);
+ src = off;
+
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
+ src.push_back(fetchSrc(1, c));
+
+ TexInstruction *st =
+ mkTex(OP_SUSTP, getImageTarget(code, r), code->images[r].slot,
+ 0, dummy, src);
+ st->tex.mask = tgsi.getDst(0).getMask();
+ st->tex.format = getImageFormat(code, r);
+ st->cache = tgsi.getCacheMode();
+ if (tgsi.getDst(0).isIndirect(0))
+ st->setIndirectR(fetchSrc(tgsi.getDst(0).getIndirect(0), 0, NULL));
+ }
+ break;
+ default:
+ assert(!"Unsupported dstFile for STORE");
+ }
+
+/* Keep this around for now as reference when adding img support
getResourceCoords(off, r, 0);
src = off;
const int s = src.size();
mkTex(OP_SUSTP, getResourceTarget(code, r), code->resources[r].slot, 0,
dummy, src)->tex.mask = tgsi.getDst(0).getMask();
}
+*/
}
// XXX: These only work on resources with the single-component u32/s32 formats.
std::vector<Value *> defv;
LValue *dst = getScratch();
+ switch (tgsi.getSrc(0).getFile()) {
+ case TGSI_FILE_BUFFER:
+ case TGSI_FILE_MEMORY:
+ for (int c = 0; c < 4; ++c) {
+ if (!dst0[c])
+ continue;
+
+ Instruction *insn;
+ Value *off = fetchSrc(1, c), *off2 = NULL;
+ Value *sym;
+ if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE)
+ sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c,
+ tgsi.getSrc(1).getValueU32(c, info));
+ else
+ sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c, 0);
+ if (tgsi.getSrc(0).isIndirect(0))
+ off2 = fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0);
+ if (subOp == NV50_IR_SUBOP_ATOM_CAS)
+ insn = mkOp3(OP_ATOM, ty, dst, sym, fetchSrc(2, c), fetchSrc(3, c));
+ else
+ insn = mkOp2(OP_ATOM, ty, dst, sym, fetchSrc(2, c));
+ if (tgsi.getSrc(1).getFile() != TGSI_FILE_IMMEDIATE)
+ insn->setIndirect(0, 0, off);
+ if (off2)
+ insn->setIndirect(0, 1, off2);
+ insn->subOp = subOp;
+ }
+ for (int c = 0; c < 4; ++c)
+ if (dst0[c])
+ dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov
+ break;
+ case TGSI_FILE_IMAGE: {
+ assert(!code->images[r].raw);
+
+ getImageCoords(srcv, r, 1);
+ defv.push_back(dst);
+ srcv.push_back(fetchSrc(2, 0));
+
+ if (subOp == NV50_IR_SUBOP_ATOM_CAS)
+ srcv.push_back(fetchSrc(3, 0));
+
+ TexInstruction *tex = mkTex(OP_SUREDP, getImageTarget(code, r),
+ code->images[r].slot, 0, defv, srcv);
+ tex->subOp = subOp;
+ tex->tex.mask = 1;
+ tex->tex.format = getImageFormat(code, r);
+ tex->setType(ty);
+ if (tgsi.getSrc(0).isIndirect(0))
+ tex->setIndirectR(fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, NULL));
+
+ for (int c = 0; c < 4; ++c)
+ if (dst0[c])
+ dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov
+ }
+ break;
+ default:
+ assert(!"Unsupported srcFile for ATOM");
+ }
+
+/* Keep this around for now as reference when adding img support
getResourceCoords(srcv, r, 1);
if (isResourceSpecial(r)) {
for (int c = 0; c < 4; ++c)
if (dst0[c])
dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov
+*/
}
void
// Check whether the input is linear. All other attributes ignored.
Instruction *insn;
Value *offset = NULL, *ptr = NULL, *w = NULL;
+ Symbol *sym[4] = { NULL };
bool linear;
operation op;
int c, mode;
tgsi::Instruction::SrcRegister src = tgsi.getSrc(0);
- assert(src.getFile() == TGSI_FILE_INPUT);
- if (src.isIndirect(0))
- ptr = fetchSrc(src.getIndirect(0), 0, NULL);
-
- // XXX: no way to know interp mode if we don't know the index
- linear = info->in[ptr ? 0 : src.getIndex(0)].linear;
- if (linear) {
- op = OP_LINTERP;
- mode = NV50_IR_INTERP_LINEAR;
+ // In some odd cases, in large part due to varying packing, the source
+ // might not actually be an input. This is illegal TGSI, but it's easier to
+ // account for it here than it is to fix it where the TGSI is being
+ // generated. In that case, it's going to be a straight up mov (or sequence
+ // of mov's) from the input in question. We follow the mov chain to see
+ // which input we need to use.
+ if (src.getFile() != TGSI_FILE_INPUT) {
+ if (src.isIndirect(0)) {
+ ERROR("Ignoring indirect input interpolation\n");
+ return;
+ }
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ Value *val = fetchSrc(0, c);
+ assert(val->defs.size() == 1);
+ insn = val->getInsn();
+ while (insn->op == OP_MOV) {
+ assert(insn->getSrc(0)->defs.size() == 1);
+ insn = insn->getSrc(0)->getInsn();
+ if (!insn) {
+ ERROR("Miscompiling shader due to unhandled INTERP\n");
+ return;
+ }
+ }
+ if (insn->op != OP_LINTERP && insn->op != OP_PINTERP) {
+ ERROR("Trying to interpolate non-input, this is not allowed.\n");
+ return;
+ }
+ sym[c] = insn->getSrc(0)->asSym();
+ assert(sym[c]);
+ op = insn->op;
+ mode = insn->ipa;
+ }
} else {
- op = OP_PINTERP;
- mode = NV50_IR_INTERP_PERSPECTIVE;
+ if (src.isIndirect(0))
+ ptr = fetchSrc(src.getIndirect(0), 0, NULL);
+
+ // We can assume that the fixed index will point to an input of the same
+ // interpolation type in case of an indirect.
+ // TODO: Make use of ArrayID.
+ linear = info->in[src.getIndex(0)].linear;
+ if (linear) {
+ op = OP_LINTERP;
+ mode = NV50_IR_INTERP_LINEAR;
+ } else {
+ op = OP_PINTERP;
+ mode = NV50_IR_INTERP_PERSPECTIVE;
+ }
}
switch (tgsi.getOpcode()) {
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
- insn = mkOp1(op, TYPE_F32, dst[c], srcToSym(src, c));
+ insn = mkOp1(op, TYPE_F32, dst[c], sym[c] ? sym[c] : srcToSym(src, c));
if (op == OP_PINTERP)
insn->setSrc(1, w);
if (ptr)
case TGSI_OPCODE_MAD:
case TGSI_OPCODE_UMAD:
case TGSI_OPCODE_SAD:
+ case TGSI_OPCODE_FMA:
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
src0 = fetchSrc(0, c);
src1 = fetchSrc(1, c);
case TGSI_OPCODE_FLR:
case TGSI_OPCODE_TRUNC:
case TGSI_OPCODE_RCP:
+ case TGSI_OPCODE_SQRT:
case TGSI_OPCODE_IABS:
case TGSI_OPCODE_INEG:
case TGSI_OPCODE_NOT:
mkCmp(op, tgsi.getSetCond(), dstTy, dst0[c], srcTy, src0, src1);
}
break;
+ case TGSI_OPCODE_VOTE_ALL:
+ case TGSI_OPCODE_VOTE_ANY:
+ case TGSI_OPCODE_VOTE_EQ:
+ val0 = new_LValue(func, FILE_PREDICATE);
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ mkCmp(OP_SET, CC_NE, TYPE_U32, val0, TYPE_U32, fetchSrc(0, c), zero);
+ mkOp1(op, dstTy, val0, val0)
+ ->subOp = tgsi::opcodeToSubOp(tgsi.getOpcode());
+ mkCvt(OP_CVT, TYPE_U32, dst0[c], TYPE_U8, val0);
+ }
+ break;
case TGSI_OPCODE_KILL_IF:
val0 = new_LValue(func, FILE_PREDICATE);
mask = 0;
break;
case TGSI_OPCODE_TXQ:
case TGSI_OPCODE_SVIEWINFO:
- handleTXQ(dst0, TXQ_DIMS);
+ handleTXQ(dst0, TXQ_DIMS, 1);
+ break;
+ case TGSI_OPCODE_TXQS:
+ // The TXQ_TYPE query returns samples in its 3rd arg, but we need it to
+ // be in .x
+ dst0[1] = dst0[2] = dst0[3] = NULL;
+ std::swap(dst0[0], dst0[2]);
+ handleTXQ(dst0, TXQ_TYPE, 0);
+ std::swap(dst0[0], dst0[2]);
break;
case TGSI_OPCODE_F2I:
case TGSI_OPCODE_F2U:
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
mkCvt(OP_CVT, dstTy, dst0[c], srcTy, fetchSrc(0, c));
break;
+ case TGSI_OPCODE_PK2H:
+ val0 = getScratch();
+ val1 = getScratch();
+ mkCvt(OP_CVT, TYPE_F16, val0, TYPE_F32, fetchSrc(0, 0));
+ mkCvt(OP_CVT, TYPE_F16, val1, TYPE_F32, fetchSrc(0, 1));
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
+ mkOp3(OP_INSBF, TYPE_U32, dst0[c], val1, mkImm(0x1010), val0);
+ break;
+ case TGSI_OPCODE_UP2H:
+ src0 = fetchSrc(0, 0);
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ geni = mkCvt(OP_CVT, TYPE_F32, dst0[c], TYPE_F16, src0);
+ geni->subOp = c & 1;
+ }
+ break;
case TGSI_OPCODE_EMIT:
/* export the saved viewport index */
if (viewport != NULL) {
unsigned int stream = tgsi.getSrc(0).getValueU32(0, info);
if (stream && op == OP_RESTART)
break;
+ if (info->prop.gp.maxVertices == 0)
+ break;
src0 = mkImm(stream);
mkOp1(op, TYPE_U32, NULL, src0)->fixed = 1;
break;
bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK);
}
setPosition(reinterpret_cast<BasicBlock *>(breakBBs.pop().u.p), true);
+
+ // If the loop never breaks (e.g. only has RET's inside), then there
+ // will be no way to get to the break bb. However BGNLOOP will have
+ // already made a PREBREAK to it, so it must be in the CFG.
+ if (getBB()->cfg.incidentCount() == 0)
+ loopBB->cfg.attach(&getBB()->cfg, Graph::Edge::TREE);
}
break;
case TGSI_OPCODE_BRK:
geni->fixed = 1;
geni->subOp = tgsi::opcodeToSubOp(tgsi.getOpcode());
break;
+ case TGSI_OPCODE_MEMBAR:
+ geni = mkOp(OP_MEMBAR, TYPE_NONE, NULL);
+ geni->fixed = 1;
+ if (tgsi.getSrc(0).getValueU32(0, info) & TGSI_MEMBAR_THREAD_GROUP)
+ geni->subOp = NV50_IR_SUBOP_MEMBAR(M, CTA);
+ else
+ geni->subOp = NV50_IR_SUBOP_MEMBAR(M, GL);
+ break;
case TGSI_OPCODE_ATOMUADD:
case TGSI_OPCODE_ATOMXCHG:
case TGSI_OPCODE_ATOMCAS:
case TGSI_OPCODE_ATOMIMAX:
handleATOM(dst0, dstTy, tgsi::opcodeToSubOp(tgsi.getOpcode()));
break;
+ case TGSI_OPCODE_RESQ:
+ if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER) {
+ geni = mkOp1(OP_BUFQ, TYPE_U32, dst0[0],
+ makeSym(tgsi.getSrc(0).getFile(),
+ tgsi.getSrc(0).getIndex(0), -1, 0, 0));
+ if (tgsi.getSrc(0).isIndirect(0))
+ geni->setIndirect(0, 1,
+ fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0));
+ } else {
+ assert(tgsi.getSrc(0).getFile() == TGSI_FILE_IMAGE);
+
+ TexInstruction *texi = new_TexInstruction(func, OP_SUQ);
+ for (int c = 0, d = 0; c < 4; ++c) {
+ if (dst0[c]) {
+ texi->setDef(d++, dst0[c]);
+ texi->tex.mask |= 1 << c;
+ }
+ }
+ texi->tex.r = tgsi.getSrc(0).getIndex(0);
+ texi->tex.target = getImageTarget(code, texi->tex.r);
+ bb->insertTail(texi);
+
+ if (tgsi.getSrc(0).isIndirect(0))
+ texi->setIndirectR(fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, NULL));
+ }
+ break;
case TGSI_OPCODE_IBFE:
case TGSI_OPCODE_UBFE:
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
src0 = fetchSrc(0, c);
- src1 = fetchSrc(1, c);
- src2 = fetchSrc(2, c);
- mkOp3(OP_INSBF, TYPE_U32, src1, src2, mkImm(0x808), src1);
+ if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE &&
+ tgsi.getSrc(2).getFile() == TGSI_FILE_IMMEDIATE) {
+ src1 = loadImm(NULL, tgsi.getSrc(2).getValueU32(c, info) << 8 |
+ tgsi.getSrc(1).getValueU32(c, info));
+ } else {
+ src1 = fetchSrc(1, c);
+ src2 = fetchSrc(2, c);
+ mkOp3(OP_INSBF, TYPE_U32, src1, src2, mkImm(0x808), src1);
+ }
mkOp2(OP_EXTBF, dstTy, dst0[c], src0, src1);
}
break;
src0 = fetchSrc(0, pos);
src1 = fetchSrc(0, pos + 1);
mkOp2(OP_MERGE, TYPE_U64, dreg, src0, src1);
- mkCvt(OP_CVT, dstTy, dst0[c], srcTy, dreg);
+ Instruction *cvt = mkCvt(OP_CVT, dstTy, dst0[c], srcTy, dreg);
+ if (!isFloatType(dstTy))
+ cvt->rnd = ROUND_Z;
pos += 2;
}
break;
}
break;
case TGSI_OPCODE_DMAD:
+ case TGSI_OPCODE_DFMA:
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
src0 = getSSA(8);
src1 = getSSA(8);
for (c = 0; c < 4; ++c) {
for (i = 0; i < info->io.genUserClip; ++i) {
- Symbol *sym = mkSymbol(FILE_MEMORY_CONST, info->io.ucpCBSlot,
+ Symbol *sym = mkSymbol(FILE_MEMORY_CONST, info->io.auxCBSlot,
TYPE_F32, info->io.ucpBase + i * 16 + c * 4);
Value *ucp = mkLoadv(TYPE_F32, sym, NULL);
if (c == 0)
Symbol *sym = mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_F32,
info->out[i].slot[c] * 4);
Value *val = oData.load(sub.cur->values, i, c, NULL);
- if (val)
+ if (val) {
+ if (info->out[i].sn == TGSI_SEMANTIC_POSITION)
+ mkOp1(OP_SAT, TYPE_F32, val, val);
mkStore(OP_EXPORT, TYPE_F32, sym, NULL, val);
+ }
}
}
}
Converter::Converter(Program *ir, const tgsi::Source *code) : BuildUtil(ir),
code(code),
tgsi(NULL),
- tData(this), aData(this), pData(this), oData(this)
+ tData(this), lData(this), aData(this), pData(this), oData(this)
{
info = code->info;
- const DataFile tFile = code->mainTempsInLMem ? FILE_MEMORY_LOCAL : FILE_GPR;
-
const unsigned tSize = code->fileSize(TGSI_FILE_TEMPORARY);
const unsigned pSize = code->fileSize(TGSI_FILE_PREDICATE);
const unsigned aSize = code->fileSize(TGSI_FILE_ADDRESS);
const unsigned oSize = code->fileSize(TGSI_FILE_OUTPUT);
- tData.setup(TGSI_FILE_TEMPORARY, 0, 0, tSize, 4, 4, tFile, 0);
+ tData.setup(TGSI_FILE_TEMPORARY, 0, 0, tSize, 4, 4, FILE_GPR, 0);
+ lData.setup(TGSI_FILE_TEMPORARY, 1, 0, tSize, 4, 4, FILE_MEMORY_LOCAL, 0);
pData.setup(TGSI_FILE_PREDICATE, 0, 0, pSize, 4, 4, FILE_PREDICATE, 0);
aData.setup(TGSI_FILE_ADDRESS, 0, 0, aSize, 4, 4, FILE_GPR, 0);
oData.setup(TGSI_FILE_OUTPUT, 0, 0, oSize, 4, 4, FILE_GPR, 0);
clipVtx[c] = getScratch();
}
- if (prog->getType() == Program::TYPE_FRAGMENT) {
+ switch (prog->getType()) {
+ case Program::TYPE_TESSELLATION_CONTROL:
+ outBase = mkOp2v(
+ OP_SUB, TYPE_U32, getSSA(),
+ mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LANEID, 0)),
+ mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_INVOCATION_ID, 0)));
+ break;
+ case Program::TYPE_FRAGMENT: {
Symbol *sv = mkSysVal(SV_POSITION, 3);
fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv);
mkOp1(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]);
+ break;
+ }
+ default:
+ break;
}
if (info->io.viewportId >= 0)