* OTHER DEALINGS IN THE SOFTWARE.
*/
-extern "C" {
#include "tgsi/tgsi_dump.h"
#include "tgsi/tgsi_scan.h"
#include "tgsi/tgsi_util.h"
-}
#include <set>
case TGSI_OPCODE_TXD:
case TGSI_OPCODE_TXL:
case TGSI_OPCODE_TXP:
+ case TGSI_OPCODE_LODQ:
{
const struct tgsi_instruction_texture *tex = &insn->Texture;
if (mask & 4) x |= 0x3;
return x;
}
+ case TGSI_OPCODE_D2I:
+ case TGSI_OPCODE_D2U:
+ case TGSI_OPCODE_D2F:
+ case TGSI_OPCODE_DSLT:
+ case TGSI_OPCODE_DSGE:
+ case TGSI_OPCODE_DSEQ:
+ case TGSI_OPCODE_DSNE:
+ switch (util_bitcount(mask)) {
+ case 1: return 0x3;
+ case 2: return 0xf;
+ default:
+ assert(!"unexpected mask");
+ return 0xf;
+ }
+ case TGSI_OPCODE_I2D:
+ case TGSI_OPCODE_U2D:
+ case TGSI_OPCODE_F2D: {
+ unsigned int x = 0;
+ if ((mask & 0x3) == 0x3)
+ x |= 1;
+ if ((mask & 0xc) == 0xc)
+ x |= 2;
+ return x;
+ }
default:
break;
}
case TGSI_SEMANTIC_BLOCK_ID: return nv50_ir::SV_CTAID;
case TGSI_SEMANTIC_BLOCK_SIZE: return nv50_ir::SV_NTID;
case TGSI_SEMANTIC_THREAD_ID: return nv50_ir::SV_TID;
+ case TGSI_SEMANTIC_SAMPLEID: return nv50_ir::SV_SAMPLE_INDEX;
+ case TGSI_SEMANTIC_SAMPLEPOS: return nv50_ir::SV_SAMPLE_POS;
+ case TGSI_SEMANTIC_SAMPLEMASK: return nv50_ir::SV_SAMPLE_MASK;
+ case TGSI_SEMANTIC_INVOCATIONID: return nv50_ir::SV_INVOCATION_ID;
+ case TGSI_SEMANTIC_TESSCOORD: return nv50_ir::SV_TESS_COORD;
+ case TGSI_SEMANTIC_TESSOUTER: return nv50_ir::SV_TESS_OUTER;
+ case TGSI_SEMANTIC_TESSINNER: return nv50_ir::SV_TESS_INNER;
+ case TGSI_SEMANTIC_VERTICESIN: return nv50_ir::SV_VERTEX_COUNT;
+ case TGSI_SEMANTIC_HELPER_INVOCATION: return nv50_ir::SV_THREAD_KILL;
default:
assert(0);
return nv50_ir::SV_CLOCK;
case TGSI_OPCODE_OR:
case TGSI_OPCODE_XOR:
case TGSI_OPCODE_NOT:
+ case TGSI_OPCODE_SHL:
case TGSI_OPCODE_U2F:
+ case TGSI_OPCODE_U2D:
case TGSI_OPCODE_UADD:
case TGSI_OPCODE_UDIV:
case TGSI_OPCODE_UMOD:
case TGSI_OPCODE_UMAD:
case TGSI_OPCODE_UMUL:
+ case TGSI_OPCODE_UMUL_HI:
case TGSI_OPCODE_UMAX:
case TGSI_OPCODE_UMIN:
case TGSI_OPCODE_USEQ:
case TGSI_OPCODE_USLT:
case TGSI_OPCODE_USNE:
case TGSI_OPCODE_USHR:
- case TGSI_OPCODE_UCMP:
case TGSI_OPCODE_ATOMUADD:
case TGSI_OPCODE_ATOMXCHG:
case TGSI_OPCODE_ATOMCAS:
case TGSI_OPCODE_ATOMXOR:
case TGSI_OPCODE_ATOMUMIN:
case TGSI_OPCODE_ATOMUMAX:
+ case TGSI_OPCODE_UBFE:
+ case TGSI_OPCODE_UMSB:
return nv50_ir::TYPE_U32;
case TGSI_OPCODE_I2F:
+ case TGSI_OPCODE_I2D:
case TGSI_OPCODE_IDIV:
+ case TGSI_OPCODE_IMUL_HI:
case TGSI_OPCODE_IMAX:
case TGSI_OPCODE_IMIN:
case TGSI_OPCODE_IABS:
case TGSI_OPCODE_UARL:
case TGSI_OPCODE_ATOMIMIN:
case TGSI_OPCODE_ATOMIMAX:
+ case TGSI_OPCODE_IBFE:
+ case TGSI_OPCODE_IMSB:
return nv50_ir::TYPE_S32;
+ case TGSI_OPCODE_D2F:
+ case TGSI_OPCODE_D2I:
+ case TGSI_OPCODE_D2U:
+ case TGSI_OPCODE_DABS:
+ case TGSI_OPCODE_DNEG:
+ case TGSI_OPCODE_DADD:
+ case TGSI_OPCODE_DMUL:
+ case TGSI_OPCODE_DMAX:
+ case TGSI_OPCODE_DMIN:
+ case TGSI_OPCODE_DSLT:
+ case TGSI_OPCODE_DSGE:
+ case TGSI_OPCODE_DSEQ:
+ case TGSI_OPCODE_DSNE:
+ case TGSI_OPCODE_DRCP:
+ case TGSI_OPCODE_DSQRT:
+ case TGSI_OPCODE_DMAD:
+ case TGSI_OPCODE_DFRAC:
+ case TGSI_OPCODE_DRSQ:
+ case TGSI_OPCODE_DTRUNC:
+ case TGSI_OPCODE_DCEIL:
+ case TGSI_OPCODE_DFLR:
+ case TGSI_OPCODE_DROUND:
+ return nv50_ir::TYPE_F64;
default:
return nv50_ir::TYPE_F32;
}
nv50_ir::DataType Instruction::inferDstType() const
{
switch (getOpcode()) {
+ case TGSI_OPCODE_D2U:
case TGSI_OPCODE_F2U: return nv50_ir::TYPE_U32;
+ case TGSI_OPCODE_D2I:
case TGSI_OPCODE_F2I: return nv50_ir::TYPE_S32;
case TGSI_OPCODE_FSEQ:
case TGSI_OPCODE_FSGE:
case TGSI_OPCODE_FSLT:
case TGSI_OPCODE_FSNE:
+ case TGSI_OPCODE_DSEQ:
+ case TGSI_OPCODE_DSGE:
+ case TGSI_OPCODE_DSLT:
+ case TGSI_OPCODE_DSNE:
return nv50_ir::TYPE_U32;
case TGSI_OPCODE_I2F:
case TGSI_OPCODE_U2F:
+ case TGSI_OPCODE_D2F:
return nv50_ir::TYPE_F32;
+ case TGSI_OPCODE_I2D:
+ case TGSI_OPCODE_U2D:
+ case TGSI_OPCODE_F2D:
+ return nv50_ir::TYPE_F64;
default:
return inferSrcType();
}
case TGSI_OPCODE_ISLT:
case TGSI_OPCODE_USLT:
case TGSI_OPCODE_FSLT:
+ case TGSI_OPCODE_DSLT:
return CC_LT;
case TGSI_OPCODE_SLE:
return CC_LE;
case TGSI_OPCODE_ISGE:
case TGSI_OPCODE_USGE:
case TGSI_OPCODE_FSGE:
+ case TGSI_OPCODE_DSGE:
return CC_GE;
case TGSI_OPCODE_SGT:
return CC_GT;
case TGSI_OPCODE_SEQ:
case TGSI_OPCODE_USEQ:
case TGSI_OPCODE_FSEQ:
+ case TGSI_OPCODE_DSEQ:
return CC_EQ;
case TGSI_OPCODE_SNE:
case TGSI_OPCODE_FSNE:
+ case TGSI_OPCODE_DSNE:
return CC_NEU;
case TGSI_OPCODE_USNE:
return CC_NE;
- case TGSI_OPCODE_SFL:
- return CC_NEVER;
- case TGSI_OPCODE_STR:
default:
return CC_ALWAYS;
}
NV50_IR_OPCODE_CASE(COS, COS);
NV50_IR_OPCODE_CASE(DDX, DFDX);
+ NV50_IR_OPCODE_CASE(DDX_FINE, DFDX);
NV50_IR_OPCODE_CASE(DDY, DFDY);
+ NV50_IR_OPCODE_CASE(DDY_FINE, DFDY);
NV50_IR_OPCODE_CASE(KILL, DISCARD);
NV50_IR_OPCODE_CASE(SEQ, SET);
- NV50_IR_OPCODE_CASE(SFL, SET);
NV50_IR_OPCODE_CASE(SGT, SET);
NV50_IR_OPCODE_CASE(SIN, SIN);
NV50_IR_OPCODE_CASE(SLE, SET);
NV50_IR_OPCODE_CASE(SNE, SET);
- NV50_IR_OPCODE_CASE(STR, SET);
NV50_IR_OPCODE_CASE(TEX, TEX);
NV50_IR_OPCODE_CASE(TXD, TXD);
NV50_IR_OPCODE_CASE(TXP, TEX);
- NV50_IR_OPCODE_CASE(BRA, BRA);
NV50_IR_OPCODE_CASE(CAL, CALL);
NV50_IR_OPCODE_CASE(RET, RET);
NV50_IR_OPCODE_CASE(CMP, SLCT);
NV50_IR_OPCODE_CASE(SAD, SAD);
NV50_IR_OPCODE_CASE(TXF, TXF);
NV50_IR_OPCODE_CASE(TXQ, TXQ);
+ NV50_IR_OPCODE_CASE(TXQS, TXQ);
+ NV50_IR_OPCODE_CASE(TG4, TXG);
+ NV50_IR_OPCODE_CASE(LODQ, TXLQ);
NV50_IR_OPCODE_CASE(EMIT, EMIT);
NV50_IR_OPCODE_CASE(ENDPRIM, RESTART);
NV50_IR_OPCODE_CASE(USLT, SET);
NV50_IR_OPCODE_CASE(USNE, SET);
+ NV50_IR_OPCODE_CASE(DABS, ABS);
+ NV50_IR_OPCODE_CASE(DNEG, NEG);
+ NV50_IR_OPCODE_CASE(DADD, ADD);
+ NV50_IR_OPCODE_CASE(DMUL, MUL);
+ NV50_IR_OPCODE_CASE(DMAX, MAX);
+ NV50_IR_OPCODE_CASE(DMIN, MIN);
+ NV50_IR_OPCODE_CASE(DSLT, SET);
+ NV50_IR_OPCODE_CASE(DSGE, SET);
+ NV50_IR_OPCODE_CASE(DSEQ, SET);
+ NV50_IR_OPCODE_CASE(DSNE, SET);
+ NV50_IR_OPCODE_CASE(DRCP, RCP);
+ NV50_IR_OPCODE_CASE(DSQRT, SQRT);
+ NV50_IR_OPCODE_CASE(DMAD, MAD);
+ NV50_IR_OPCODE_CASE(D2I, CVT);
+ NV50_IR_OPCODE_CASE(D2U, CVT);
+ NV50_IR_OPCODE_CASE(I2D, CVT);
+ NV50_IR_OPCODE_CASE(U2D, CVT);
+ NV50_IR_OPCODE_CASE(DRSQ, RSQ);
+ NV50_IR_OPCODE_CASE(DTRUNC, TRUNC);
+ NV50_IR_OPCODE_CASE(DCEIL, CEIL);
+ NV50_IR_OPCODE_CASE(DFLR, FLOOR);
+ NV50_IR_OPCODE_CASE(DROUND, CVT);
+
+ NV50_IR_OPCODE_CASE(IMUL_HI, MUL);
+ NV50_IR_OPCODE_CASE(UMUL_HI, MUL);
+
NV50_IR_OPCODE_CASE(SAMPLE, TEX);
NV50_IR_OPCODE_CASE(SAMPLE_B, TXB);
NV50_IR_OPCODE_CASE(SAMPLE_C, TEX);
NV50_IR_OPCODE_CASE(TXB2, TXB);
NV50_IR_OPCODE_CASE(TXL2, TXL);
+ NV50_IR_OPCODE_CASE(IBFE, EXTBF);
+ NV50_IR_OPCODE_CASE(UBFE, EXTBF);
+ NV50_IR_OPCODE_CASE(BFI, INSBF);
+ NV50_IR_OPCODE_CASE(BREV, EXTBF);
+ NV50_IR_OPCODE_CASE(POPC, POPCNT);
+ NV50_IR_OPCODE_CASE(LSB, BFIND);
+ NV50_IR_OPCODE_CASE(IMSB, BFIND);
+ NV50_IR_OPCODE_CASE(UMSB, BFIND);
+
NV50_IR_OPCODE_CASE(END, EXIT);
default:
case TGSI_OPCODE_ATOMIMIN: return NV50_IR_SUBOP_ATOM_MIN;
case TGSI_OPCODE_ATOMUMAX: return NV50_IR_SUBOP_ATOM_MAX;
case TGSI_OPCODE_ATOMIMAX: return NV50_IR_SUBOP_ATOM_MAX;
+ case TGSI_OPCODE_IMUL_HI:
+ case TGSI_OPCODE_UMUL_HI:
+ return NV50_IR_SUBOP_MUL_HIGH;
default:
return 0;
}
if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
tgsi_dump(tokens, 0);
- mainTempsInLMem = FALSE;
+ mainTempsInLMem = false;
}
Source::~Source()
info->prop.gp.instanceCount = 1; // default value
}
+ info->io.viewportId = -1;
+
info->immd.data = (uint32_t *)MALLOC(scan.immediate_count * 16);
info->immd.type = (ubyte *)MALLOC(scan.immediate_count * sizeof(ubyte));
info->bin.tlsSpace += (scan.file_max[TGSI_FILE_TEMPORARY] + 1) * 16;
if (info->io.genUserClip > 0) {
- info->io.clipDistanceMask = (1 << info->io.genUserClip) - 1;
+ info->io.clipDistances = info->io.genUserClip;
const unsigned int nOut = (info->io.genUserClip + 3) / 4;
info->out[i].id = i;
info->out[i].sn = TGSI_SEMANTIC_CLIPDIST;
info->out[i].si = n;
- info->out[i].mask = info->io.clipDistanceMask >> (n * 4);
+ info->out[i].mask = ((1 << info->io.clipDistances) - 1) >> (n * 4);
}
}
case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
info->prop.gp.maxVertices = prop->u[0].Data;
break;
-#if 0
- case TGSI_PROPERTY_GS_INSTANCE_COUNT:
+ case TGSI_PROPERTY_GS_INVOCATIONS:
info->prop.gp.instanceCount = prop->u[0].Data;
break;
-#endif
case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
- info->prop.fp.separateFragData = TRUE;
+ info->prop.fp.separateFragData = true;
break;
case TGSI_PROPERTY_FS_COORD_ORIGIN:
case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER:
case TGSI_PROPERTY_VS_PROHIBIT_UCPS:
info->io.genUserClip = -1;
break;
+ case TGSI_PROPERTY_TCS_VERTICES_OUT:
+ info->prop.tp.outputPatchSize = prop->u[0].Data;
+ break;
+ case TGSI_PROPERTY_TES_PRIM_MODE:
+ info->prop.tp.domain = prop->u[0].Data;
+ break;
+ case TGSI_PROPERTY_TES_SPACING:
+ info->prop.tp.partitioning = prop->u[0].Data;
+ break;
+ case TGSI_PROPERTY_TES_VERTEX_ORDER_CW:
+ info->prop.tp.winding = prop->u[0].Data;
+ break;
+ case TGSI_PROPERTY_TES_POINT_MODE:
+ if (prop->u[0].Data)
+ info->prop.tp.outputPrim = PIPE_PRIM_POINTS;
+ else
+ info->prop.tp.outputPrim = PIPE_PRIM_TRIANGLES; /* anything but points */
+ break;
+ case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED:
+ info->io.clipDistances = prop->u[0].Data;
+ break;
+ case TGSI_PROPERTY_NUM_CULLDIST_ENABLED:
+ info->io.cullDistances = prop->u[0].Data;
+ break;
default:
INFO("unhandled TGSI property %d\n", prop->Property.PropertyName);
break;
case TGSI_SEMANTIC_INSTANCEID:
case TGSI_SEMANTIC_VERTEXID:
return 1;
-#if 0
case TGSI_SEMANTIC_LAYER:
+#if 0
case TGSI_SEMANTIC_VIEWPORTINDEX:
return 0;
#endif
default:
break;
}
- if (decl->Interp.Centroid)
+ if (decl->Interp.Location)
info->in[i].centroid = 1;
}
+
+ if (sn == TGSI_SEMANTIC_PATCH)
+ info->in[i].patch = 1;
+ if (sn == TGSI_SEMANTIC_PATCH)
+ info->numPatchConstants = MAX2(info->numPatchConstants, si + 1);
}
}
break;
clipVertexOutput = i;
break;
case TGSI_SEMANTIC_CLIPDIST:
- info->io.clipDistanceMask |=
- decl->Declaration.UsageMask << (si * 4);
info->io.genUserClip = -1;
break;
+ case TGSI_SEMANTIC_SAMPLEMASK:
+ info->io.sampleMask = i;
+ break;
+ case TGSI_SEMANTIC_VIEWPORT_INDEX:
+ info->io.viewportId = i;
+ break;
+ case TGSI_SEMANTIC_PATCH:
+ info->numPatchConstants = MAX2(info->numPatchConstants, si + 1);
+ /* fallthrough */
+ case TGSI_SEMANTIC_TESSOUTER:
+ case TGSI_SEMANTIC_TESSINNER:
+ info->out[i].patch = 1;
+ break;
default:
break;
}
case TGSI_SEMANTIC_VERTEXID:
info->io.vertexId = first;
break;
+ case TGSI_SEMANTIC_SAMPLEID:
+ case TGSI_SEMANTIC_SAMPLEPOS:
+ info->prop.fp.sampleInterp = 1;
+ break;
default:
break;
}
info->sv[i].sn = sn;
info->sv[i].si = si;
info->sv[i].input = inferSysValDirection(sn);
+
+ switch (sn) {
+ case TGSI_SEMANTIC_TESSOUTER:
+ case TGSI_SEMANTIC_TESSINNER:
+ info->sv[i].patch = 1;
+ break;
+ }
}
break;
case TGSI_FILE_RESOURCE:
if (info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_PSIZE ||
info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_PRIMID ||
+ info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_LAYER ||
+ info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_VIEWPORT_INDEX ||
info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_FOG)
info->out[dst.getIndex(0)].mask &= 1;
} else
if (insn.getDst(0).getFile() == TGSI_FILE_TEMPORARY) {
if (insn.getDst(0).isIndirect(0))
- mainTempsInLMem = TRUE;
+ mainTempsInLMem = true;
}
}
Instruction::SrcRegister src = insn.getSrc(s);
if (src.getFile() == TGSI_FILE_TEMPORARY) {
if (src.isIndirect(0))
- mainTempsInLMem = TRUE;
+ mainTempsInLMem = true;
} else
if (src.getFile() == TGSI_FILE_RESOURCE) {
if (src.getIndex(0) == TGSI_RESOURCE_GLOBAL)
info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ?
0x1 : 0x2;
+ } else
+ if (src.getFile() == TGSI_FILE_OUTPUT) {
+ if (src.isIndirect(0)) {
+ // We don't know which one is accessed, just mark everything for
+ // reading. This is an extremely unlikely occurrence.
+ for (unsigned i = 0; i < info->numOutputs; ++i)
+ info->out[i].oread = 1;
+ } else {
+ info->out[src.getIndex(0)].oread = 1;
+ }
}
if (src.getFile() != TGSI_FILE_INPUT)
continue;
ValueMap values;
};
+ Value *shiftAddress(Value *);
Value *getVertexBase(int s);
+ Value *getOutputBase(int s);
DataArray *getArrayForFile(unsigned file, int idx);
Value *fetchSrc(int s, int c);
Value *acquireDst(int d, int c);
void setTexRS(TexInstruction *, unsigned int& s, int R, int S);
void handleTEX(Value *dst0[4], int R, int S, int L, int C, int Dx, int Dy);
void handleTXF(Value *dst0[4], int R, int L_M);
- void handleTXQ(Value *dst0[4], enum TexQuery);
+ void handleTXQ(Value *dst0[4], enum TexQuery, int R);
void handleLIT(Value *dst0[4]);
void handleUserClipPlanes();
void handleSTORE();
void handleATOM(Value *dst0[4], DataType, uint16_t subOp);
+ void handleINTERP(Value *dst0[4]);
+
+ uint8_t translateInterpMode(const struct nv50_ir_varying *var,
+ operation& op);
Value *interpolate(tgsi::Instruction::SrcRegister, int c, Value *ptr);
void insertConvergenceOps(BasicBlock *conv, BasicBlock *fork);
};
private:
- const struct tgsi::Source *code;
+ const tgsi::Source *code;
const struct nv50_ir_prog_info *info;
struct {
Value *vtxBase[5]; // base address of vertex in primitive (for TP/GP)
uint8_t vtxBaseValid;
+ Value *outBase; // base address of vertex out patch (for TCP)
+
Stack condBBs; // fork BB, then else clause BB
Stack joinBBs; // fork BB, for inserting join ops on ENDIF
Stack loopBBs; // loop headers
Stack breakBBs; // end of / after loop
+
+ Value *viewport;
};
Symbol *
{
const int swz = src.getSwizzle(c);
+ /* TODO: Use Array ID when it's available for the index */
return makeSym(src.getFile(),
src.is2D() ? src.getIndex(1) : 0,
- src.isIndirect(0) ? -1 : src.getIndex(0), swz,
+ src.getIndex(0), swz,
src.getIndex(0) * 16 + swz * 4);
}
Symbol *
Converter::dstToSym(tgsi::Instruction::DstRegister dst, int c)
{
+ /* TODO: Use Array ID when it's available for the index */
return makeSym(dst.getFile(),
dst.is2D() ? dst.getIndex(1) : 0,
- dst.isIndirect(0) ? -1 : dst.getIndex(0), c,
+ dst.getIndex(0), c,
dst.getIndex(0) * 16 + c * 4);
}
return sym;
}
-static inline uint8_t
-translateInterpMode(const struct nv50_ir_varying *var, operation& op)
+uint8_t
+Converter::translateInterpMode(const struct nv50_ir_varying *var, operation& op)
{
uint8_t mode = NV50_IR_INTERP_PERSPECTIVE;
op = (mode == NV50_IR_INTERP_PERSPECTIVE || mode == NV50_IR_INTERP_SC)
? OP_PINTERP : OP_LINTERP;
- if (var->centroid)
+ if (var->centroid || info->prop.fp.sampleInterp)
mode |= NV50_IR_INTERP_CENTROID;
return mode;
if (tgsi.getSrc(s).isIndirect(1))
rel = fetchSrc(tgsi.getSrc(s).getIndirect(1), 0, NULL);
vtxBaseValid |= 1 << s;
- vtxBase[s] = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(), mkImm(index), rel);
+ vtxBase[s] = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(4, FILE_ADDRESS),
+ mkImm(index), rel);
+ }
+ return vtxBase[s];
+}
+
+Value *
+Converter::getOutputBase(int s)
+{
+ assert(s < 5);
+ if (!(vtxBaseValid & (1 << s))) {
+ Value *offset = loadImm(NULL, tgsi.getSrc(s).getIndex(1));
+ if (tgsi.getSrc(s).isIndirect(1))
+ offset = mkOp2v(OP_ADD, TYPE_U32, getSSA(),
+ fetchSrc(tgsi.getSrc(s).getIndirect(1), 0, NULL),
+ offset);
+ vtxBaseValid |= 1 << s;
+ vtxBase[s] = mkOp2v(OP_ADD, TYPE_U32, getSSA(), outBase, offset);
}
return vtxBase[s];
}
if (src.is2D()) {
switch (src.getFile()) {
+ case TGSI_FILE_OUTPUT:
+ dimRel = getOutputBase(s);
+ break;
case TGSI_FILE_INPUT:
dimRel = getVertexBase(s);
break;
}
}
+Value *
+Converter::shiftAddress(Value *index)
+{
+ if (!index)
+ return NULL;
+ return mkOp2v(OP_SHL, TYPE_U32, getSSA(4, FILE_ADDRESS), index, mkImm(4));
+}
+
Value *
Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr)
{
const int idx2d = src.is2D() ? src.getIndex(1) : 0;
const int idx = src.getIndex(0);
const int swz = src.getSwizzle(c);
+ Instruction *ld;
switch (src.getFile()) {
case TGSI_FILE_IMMEDIATE:
assert(!ptr);
return loadImm(NULL, info->immd.data[idx * 4 + swz]);
case TGSI_FILE_CONSTANT:
- return mkLoadv(TYPE_U32, srcToSym(src, c), ptr);
+ return mkLoadv(TYPE_U32, srcToSym(src, c), shiftAddress(ptr));
case TGSI_FILE_INPUT:
if (prog->getType() == Program::TYPE_FRAGMENT) {
// don't load masked inputs, won't be assigned a slot
if (!ptr && !(info->in[idx].mask & (1 << swz)))
return loadImm(NULL, swz == TGSI_SWIZZLE_W ? 1.0f : 0.0f);
- if (!ptr && info->in[idx].sn == TGSI_SEMANTIC_FACE)
+ if (!ptr && info->in[idx].sn == TGSI_SEMANTIC_FACE)
return mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_FACE, 0));
- return interpolate(src, c, ptr);
+ return interpolate(src, c, shiftAddress(ptr));
+ } else
+ if (prog->getType() == Program::TYPE_GEOMETRY) {
+ if (!ptr && info->in[idx].sn == TGSI_SEMANTIC_PRIMID)
+ return mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_PRIMITIVE_ID, 0));
+ // XXX: This is going to be a problem with scalar arrays, i.e. when
+ // we cannot assume that the address is given in units of vec4.
+ //
+ // nv50 and nvc0 need different things here, so let the lowering
+ // passes decide what to do with the address
+ if (ptr)
+ return mkLoadv(TYPE_U32, srcToSym(src, c), ptr);
}
- return mkLoadv(TYPE_U32, srcToSym(src, c), ptr);
+ ld = mkLoad(TYPE_U32, getSSA(), srcToSym(src, c), shiftAddress(ptr));
+ ld->perPatch = info->in[idx].patch;
+ return ld->getDef(0);
case TGSI_FILE_OUTPUT:
- assert(!"load from output file");
- return NULL;
+ assert(prog->getType() == Program::TYPE_TESSELLATION_CONTROL);
+ ld = mkLoad(TYPE_U32, getSSA(), srcToSym(src, c), shiftAddress(ptr));
+ ld->perPatch = info->out[idx].patch;
+ return ld->getDef(0);
case TGSI_FILE_SYSTEM_VALUE:
assert(!ptr);
- return mkOp1v(OP_RDSV, TYPE_U32, getSSA(), srcToSym(src, c));
+ ld = mkOp1(OP_RDSV, TYPE_U32, getSSA(), srcToSym(src, c));
+ ld->perPatch = info->sv[idx].patch;
+ return ld->getDef(0);
default:
return getArrayForFile(src.getFile(), idx2d)->load(
- sub.cur->values, idx, swz, ptr);
+ sub.cur->values, idx, swz, shiftAddress(ptr));
}
}
{
const tgsi::Instruction::DstRegister dst = tgsi.getDst(d);
- switch (tgsi.getSaturate()) {
- case TGSI_SAT_NONE:
- break;
- case TGSI_SAT_ZERO_ONE:
+ if (tgsi.getSaturate()) {
mkOp1(OP_SAT, dstTy, val, val);
- break;
- case TGSI_SAT_MINUS_PLUS_ONE:
- mkOp2(OP_MAX, dstTy, val, val, mkImm(-1.0f));
- mkOp2(OP_MIN, dstTy, val, val, mkImm(+1.0f));
- break;
- default:
- assert(!"invalid saturation mode");
- break;
}
- Value *ptr = dst.isIndirect(0) ?
- fetchSrc(dst.getIndirect(0), 0, NULL) : NULL;
+ Value *ptr = NULL;
+ if (dst.isIndirect(0))
+ ptr = shiftAddress(fetchSrc(dst.getIndirect(0), 0, NULL));
if (info->io.genUserClip > 0 &&
dst.getFile() == TGSI_FILE_OUTPUT &&
mkOp2(OP_WRSV, TYPE_U32, NULL, dstToSym(dst, c), val);
} else
if (f == TGSI_FILE_OUTPUT && prog->getType() != Program::TYPE_FRAGMENT) {
- if (ptr || (info->out[idx].mask & (1 << c)))
- mkStore(OP_EXPORT, TYPE_U32, dstToSym(dst, c), ptr, val);
+
+ if (ptr || (info->out[idx].mask & (1 << c))) {
+ /* Save the viewport index into a scratch register so that it can be
+ exported at EMIT time */
+ if (info->out[idx].sn == TGSI_SEMANTIC_VIEWPORT_INDEX &&
+ viewport != NULL)
+ mkOp1(OP_MOV, TYPE_U32, viewport, val);
+ else
+ mkStore(OP_EXPORT, TYPE_U32, dstToSym(dst, c), ptr, val)->perPatch =
+ info->out[idx].patch;
+ }
} else
if (f == TGSI_FILE_TEMPORARY ||
f == TGSI_FILE_PREDICATE ||
join->fixed = 1;
conv->insertHead(join);
+ assert(!fork->joinAt);
fork->joinAt = new_FlowInstruction(func, OP_JOINAT, conv);
fork->insertBefore(fork->getExit(), fork->joinAt);
}
}
void
-Converter::handleTXQ(Value *dst0[4], enum TexQuery query)
+Converter::handleTXQ(Value *dst0[4], enum TexQuery query, int R)
{
TexInstruction *tex = new_TexInstruction(func, OP_TXQ);
tex->tex.query = query;
tex->tex.mask |= 1 << c;
tex->setDef(d++, dst0[c]);
}
- tex->setSrc((c = 0), fetchSrc(0, 0)); // mip level
+ if (query == TXQ_DIMS)
+ tex->setSrc((c = 0), fetchSrc(0, 0)); // mip level
+ else
+ tex->setSrc((c = 0), zero);
- setTexRS(tex, c, 1, -1);
+ setTexRS(tex, ++c, R, -1);
bb->insertTail(tex);
}
if (C == 0x0f)
C = 0x00 | MAX2(tgt.getArgCount(), 2); // guess DC src
- if (tgt.isShadow())
+ if (tgsi.getOpcode() == TGSI_OPCODE_TG4 &&
+ tgt == TEX_TARGET_CUBE_ARRAY_SHADOW)
+ shd = fetchSrc(1, 0);
+ else if (tgt.isShadow())
shd = fetchSrc(C >> 4, C & 3);
if (texi->op == OP_TXD) {
if (tgsi.getOpcode() == TGSI_OPCODE_SAMPLE_C_LZ)
texi->tex.levelZero = true;
+ if (tgsi.getOpcode() == TGSI_OPCODE_TG4 && !tgt.isShadow())
+ texi->tex.gatherComp = tgsi.getSrc(1).getValueU32(0, info);
+
+ texi->tex.useOffsets = tgsi.getNumTexOffsets();
+ for (s = 0; s < tgsi.getNumTexOffsets(); ++s) {
+ for (c = 0; c < 3; ++c) {
+ texi->offset[s][c].set(fetchSrc(tgsi.getTexOffset(s), c, NULL));
+ texi->offset[s][c].setInsn(texi);
+ }
+ }
bb->insertTail(texi);
}
setTexRS(texi, c, R, -1);
+ texi->tex.useOffsets = tgsi.getNumTexOffsets();
for (s = 0; s < tgsi.getNumTexOffsets(); ++s) {
for (c = 0; c < 3; ++c) {
- texi->tex.offset[s][c] = tgsi.getTexOffset(s).getValueU32(c, info);
- if (texi->tex.offset[s][c])
- texi->tex.useOffsets = s + 1;
+ texi->offset[s][c].set(fetchSrc(tgsi.getTexOffset(s), c, NULL));
+ texi->offset[s][c].setInsn(texi);
}
}
mkOp2(OP_MIN, TYPE_F32, val3, val3, pos128);
mkOp2(OP_POW, TYPE_F32, val3, val1, val3);
- mkCmp(OP_SLCT, CC_GT, TYPE_F32, dst0[2], val3, zero, val0);
+ mkCmp(OP_SLCT, CC_GT, TYPE_F32, dst0[2], TYPE_F32, val3, zero, val0);
}
}
}
static inline bool
-isResourceRaw(const struct tgsi::Source *code, const int r)
+isResourceRaw(const tgsi::Source *code, const int r)
{
return isResourceSpecial(r) || code->resources[r].raw;
}
static inline nv50_ir::TexTarget
-getResourceTarget(const struct tgsi::Source *code, int r)
+getResourceTarget(const tgsi::Source *code, int r)
{
if (isResourceSpecial(r))
return nv50_ir::TEX_TARGET_BUFFER;
dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov
}
+void
+Converter::handleINTERP(Value *dst[4])
+{
+ // Check whether the input is linear. All other attributes ignored.
+ Instruction *insn;
+ Value *offset = NULL, *ptr = NULL, *w = NULL;
+ bool linear;
+ operation op;
+ int c, mode;
+
+ tgsi::Instruction::SrcRegister src = tgsi.getSrc(0);
+ assert(src.getFile() == TGSI_FILE_INPUT);
+
+ if (src.isIndirect(0))
+ ptr = fetchSrc(src.getIndirect(0), 0, NULL);
+
+ // XXX: no way to know interp mode if we don't know the index
+ linear = info->in[ptr ? 0 : src.getIndex(0)].linear;
+ if (linear) {
+ op = OP_LINTERP;
+ mode = NV50_IR_INTERP_LINEAR;
+ } else {
+ op = OP_PINTERP;
+ mode = NV50_IR_INTERP_PERSPECTIVE;
+ }
+
+ switch (tgsi.getOpcode()) {
+ case TGSI_OPCODE_INTERP_CENTROID:
+ mode |= NV50_IR_INTERP_CENTROID;
+ break;
+ case TGSI_OPCODE_INTERP_SAMPLE:
+ insn = mkOp1(OP_PIXLD, TYPE_U32, (offset = getScratch()), fetchSrc(1, 0));
+ insn->subOp = NV50_IR_SUBOP_PIXLD_OFFSET;
+ mode |= NV50_IR_INTERP_OFFSET;
+ break;
+ case TGSI_OPCODE_INTERP_OFFSET: {
+ // The input in src1.xy is float, but we need a single 32-bit value
+ // where the upper and lower 16 bits are encoded in S0.12 format. We need
+ // to clamp the input coordinates to (-0.5, 0.4375), multiply by 4096,
+ // and then convert to s32.
+ Value *offs[2];
+ for (c = 0; c < 2; c++) {
+ offs[c] = fetchSrc(1, c);
+ mkOp2(OP_MIN, TYPE_F32, offs[c], offs[c], loadImm(NULL, 0.4375f));
+ mkOp2(OP_MAX, TYPE_F32, offs[c], offs[c], loadImm(NULL, -0.5f));
+ mkOp2(OP_MUL, TYPE_F32, offs[c], offs[c], loadImm(NULL, 4096.0f));
+ mkCvt(OP_CVT, TYPE_S32, offs[c], TYPE_F32, offs[c]);
+ }
+ offset = mkOp3v(OP_INSBF, TYPE_U32, getScratch(),
+ offs[1], mkImm(0x1010), offs[0]);
+ mode |= NV50_IR_INTERP_OFFSET;
+ break;
+ }
+ }
+
+ if (op == OP_PINTERP) {
+ if (offset) {
+ w = mkOp2v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 3), offset);
+ mkOp1(OP_RCP, TYPE_F32, w, w);
+ } else {
+ w = fragCoord[3];
+ }
+ }
+
+
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ insn = mkOp1(op, TYPE_F32, dst[c], srcToSym(src, c));
+ if (op == OP_PINTERP)
+ insn->setSrc(1, w);
+ if (ptr)
+ insn->setIndirect(0, 0, ptr);
+ if (offset)
+ insn->setSrc(op == OP_PINTERP ? 2 : 1, offset);
+
+ insn->setInterpolate(mode);
+ }
+}
+
Converter::Subroutine *
Converter::getSubroutine(unsigned ip)
{
Instruction *geni;
Value *dst0[4], *rDst0[4];
- Value *src0, *src1, *src2;
+ Value *src0, *src1, *src2, *src3;
Value *val0, *val1;
int c;
case TGSI_OPCODE_UMOD:
case TGSI_OPCODE_MUL:
case TGSI_OPCODE_UMUL:
+ case TGSI_OPCODE_IMUL_HI:
+ case TGSI_OPCODE_UMUL_HI:
case TGSI_OPCODE_OR:
- case TGSI_OPCODE_POW:
case TGSI_OPCODE_SHL:
case TGSI_OPCODE_ISHR:
case TGSI_OPCODE_USHR:
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
src0 = fetchSrc(0, c);
src1 = fetchSrc(1, c);
- mkOp2(op, dstTy, dst0[c], src0, src1);
+ geni = mkOp2(op, dstTy, dst0[c], src0, src1);
+ geni->subOp = tgsi::opcodeToSubOp(tgsi.getOpcode());
}
break;
case TGSI_OPCODE_MAD:
case TGSI_OPCODE_NOT:
case TGSI_OPCODE_DDX:
case TGSI_OPCODE_DDY:
+ case TGSI_OPCODE_DDX_FINE:
+ case TGSI_OPCODE_DDY_FINE:
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
mkOp1(op, dstTy, dst0[c], fetchSrc(0, c));
break;
mkMov(dst0[c], val0);
break;
case TGSI_OPCODE_ARL:
+ case TGSI_OPCODE_ARR:
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ const RoundMode rnd =
+ tgsi.getOpcode() == TGSI_OPCODE_ARR ? ROUND_N : ROUND_M;
src0 = fetchSrc(0, c);
- mkCvt(OP_CVT, TYPE_S32, dst0[c], TYPE_F32, src0)->rnd = ROUND_M;
- mkOp2(OP_SHL, TYPE_U32, dst0[c], dst0[c], mkImm(4));
+ mkCvt(OP_CVT, TYPE_S32, dst0[c], TYPE_F32, src0)->rnd = rnd;
}
break;
case TGSI_OPCODE_UARL:
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
- mkOp2(OP_SHL, TYPE_U32, dst0[c], fetchSrc(0, c), mkImm(4));
+ mkOp1(OP_MOV, TYPE_U32, dst0[c], fetchSrc(0, c));
+ break;
+ case TGSI_OPCODE_POW:
+ val0 = mkOp2v(op, TYPE_F32, getScratch(), fetchSrc(0, 0), fetchSrc(1, 0));
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
+ mkOp1(OP_MOV, TYPE_F32, dst0[c], val0);
break;
case TGSI_OPCODE_EX2:
case TGSI_OPCODE_LG2:
src0 = fetchSrc(0, c);
val0 = getScratch();
val1 = getScratch();
- mkCmp(OP_SET, CC_GT, srcTy, val0, src0, zero);
- mkCmp(OP_SET, CC_LT, srcTy, val1, src0, zero);
+ mkCmp(OP_SET, CC_GT, srcTy, val0, srcTy, src0, zero);
+ mkCmp(OP_SET, CC_LT, srcTy, val1, srcTy, src0, zero);
if (srcTy == TYPE_F32)
mkOp2(OP_SUB, TYPE_F32, dst0[c], val0, val1);
else
}
break;
case TGSI_OPCODE_UCMP:
+ srcTy = TYPE_U32;
+ /* fallthrough */
case TGSI_OPCODE_CMP:
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
src0 = fetchSrc(0, c);
mkMov(dst0[c], src1);
else
mkCmp(OP_SLCT, (srcTy == TYPE_F32) ? CC_LT : CC_NE,
- srcTy, dst0[c], src1, src2, src0);
+ srcTy, dst0[c], srcTy, src1, src2, src0);
}
break;
case TGSI_OPCODE_FRC:
case TGSI_OPCODE_SLT:
case TGSI_OPCODE_SGE:
case TGSI_OPCODE_SEQ:
- case TGSI_OPCODE_SFL:
case TGSI_OPCODE_SGT:
case TGSI_OPCODE_SLE:
case TGSI_OPCODE_SNE:
- case TGSI_OPCODE_STR:
case TGSI_OPCODE_FSEQ:
case TGSI_OPCODE_FSGE:
case TGSI_OPCODE_FSLT:
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
src0 = fetchSrc(0, c);
src1 = fetchSrc(1, c);
- mkCmp(op, tgsi.getSetCond(), dstTy, dst0[c], src0, src1);
+ mkCmp(op, tgsi.getSetCond(), dstTy, dst0[c], srcTy, src0, src1);
}
break;
case TGSI_OPCODE_KILL_IF:
val0 = new_LValue(func, FILE_PREDICATE);
+ mask = 0;
for (c = 0; c < 4; ++c) {
- mkCmp(OP_SET, CC_LT, TYPE_F32, val0, fetchSrc(0, c), zero);
+ const int s = tgsi.getSrc(0).getSwizzle(c);
+ if (mask & (1 << s))
+ continue;
+ mask |= 1 << s;
+ mkCmp(OP_SET, CC_LT, TYPE_F32, val0, TYPE_F32, fetchSrc(0, c), zero);
mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, val0);
}
break;
case TGSI_OPCODE_TXB:
case TGSI_OPCODE_TXL:
case TGSI_OPCODE_TXP:
+ case TGSI_OPCODE_LODQ:
// R S L C Dx Dy
handleTEX(dst0, 1, 1, 0x03, 0x0f, 0x00, 0x00);
break;
case TGSI_OPCODE_TXD:
handleTEX(dst0, 3, 3, 0x03, 0x0f, 0x10, 0x20);
break;
+ case TGSI_OPCODE_TG4:
+ handleTEX(dst0, 2, 2, 0x03, 0x0f, 0x00, 0x00);
+ break;
case TGSI_OPCODE_TEX2:
handleTEX(dst0, 2, 2, 0x03, 0x10, 0x00, 0x00);
break;
case TGSI_OPCODE_TXB2:
case TGSI_OPCODE_TXL2:
- handleTEX(dst0, 2, 2, 0x10, 0x11, 0x00, 0x00);
+ handleTEX(dst0, 2, 2, 0x10, 0x0f, 0x00, 0x00);
break;
case TGSI_OPCODE_SAMPLE:
case TGSI_OPCODE_SAMPLE_B:
break;
case TGSI_OPCODE_TXQ:
case TGSI_OPCODE_SVIEWINFO:
- handleTXQ(dst0, TXQ_DIMS);
+ handleTXQ(dst0, TXQ_DIMS, 1);
+ break;
+ case TGSI_OPCODE_TXQS:
+ // The TXQ_TYPE query returns samples in its 3rd arg, but we need it to
+ // be in .x
+ dst0[1] = dst0[2] = dst0[3] = NULL;
+ std::swap(dst0[0], dst0[2]);
+ handleTXQ(dst0, TXQ_TYPE, 0);
+ std::swap(dst0[0], dst0[2]);
break;
case TGSI_OPCODE_F2I:
case TGSI_OPCODE_F2U:
mkCvt(OP_CVT, dstTy, dst0[c], srcTy, fetchSrc(0, c));
break;
case TGSI_OPCODE_EMIT:
+ /* export the saved viewport index */
+ if (viewport != NULL) {
+ Symbol *vpSym = mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_U32,
+ info->out[info->io.viewportId].slot[0] * 4);
+ mkStore(OP_EXPORT, TYPE_U32, vpSym, NULL, viewport);
+ }
+ /* fallthrough */
case TGSI_OPCODE_ENDPRIM:
- // get vertex stream if specified (must be immediate)
- src0 = tgsi.srcCount() ?
- mkImm(tgsi.getSrc(0).getValueU32(0, info)) : zero;
+ {
+ // get vertex stream (must be immediate)
+ unsigned int stream = tgsi.getSrc(0).getValueU32(0, info);
+ if (stream && op == OP_RESTART)
+ break;
+ src0 = mkImm(stream);
mkOp1(op, TYPE_U32, NULL, src0)->fixed = 1;
break;
+ }
case TGSI_OPCODE_IF:
case TGSI_OPCODE_UIF:
{
bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK);
}
setPosition(reinterpret_cast<BasicBlock *>(breakBBs.pop().u.p), true);
+
+ // If the loop never breaks (e.g. only has RET's inside), then there
+ // will be no way to get to the break bb. However BGNLOOP will have
+ // already made a PREBREAK to it, so it must be in the CFG.
+ if (getBB()->cfg.incidentCount() == 0)
+ loopBB->cfg.attach(&getBB()->cfg, Graph::Edge::TREE);
}
break;
case TGSI_OPCODE_BRK:
case TGSI_OPCODE_ATOMIMAX:
handleATOM(dst0, dstTy, tgsi::opcodeToSubOp(tgsi.getOpcode()));
break;
+ case TGSI_OPCODE_IBFE:
+ case TGSI_OPCODE_UBFE:
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ src0 = fetchSrc(0, c);
+ if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE &&
+ tgsi.getSrc(2).getFile() == TGSI_FILE_IMMEDIATE) {
+ src1 = loadImm(NULL, tgsi.getSrc(2).getValueU32(c, info) << 8 |
+ tgsi.getSrc(1).getValueU32(c, info));
+ } else {
+ src1 = fetchSrc(1, c);
+ src2 = fetchSrc(2, c);
+ mkOp3(OP_INSBF, TYPE_U32, src1, src2, mkImm(0x808), src1);
+ }
+ mkOp2(OP_EXTBF, dstTy, dst0[c], src0, src1);
+ }
+ break;
+ case TGSI_OPCODE_BFI:
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ src0 = fetchSrc(0, c);
+ src1 = fetchSrc(1, c);
+ src2 = fetchSrc(2, c);
+ src3 = fetchSrc(3, c);
+ mkOp3(OP_INSBF, TYPE_U32, src2, src3, mkImm(0x808), src2);
+ mkOp3(OP_INSBF, TYPE_U32, dst0[c], src1, src2, src0);
+ }
+ break;
+ case TGSI_OPCODE_LSB:
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ src0 = fetchSrc(0, c);
+ geni = mkOp2(OP_EXTBF, TYPE_U32, src0, src0, mkImm(0x2000));
+ geni->subOp = NV50_IR_SUBOP_EXTBF_REV;
+ geni = mkOp1(OP_BFIND, TYPE_U32, dst0[c], src0);
+ geni->subOp = NV50_IR_SUBOP_BFIND_SAMT;
+ }
+ break;
+ case TGSI_OPCODE_IMSB:
+ case TGSI_OPCODE_UMSB:
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ src0 = fetchSrc(0, c);
+ mkOp1(OP_BFIND, srcTy, dst0[c], src0);
+ }
+ break;
+ case TGSI_OPCODE_BREV:
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ src0 = fetchSrc(0, c);
+ geni = mkOp2(OP_EXTBF, TYPE_U32, dst0[c], src0, mkImm(0x2000));
+ geni->subOp = NV50_IR_SUBOP_EXTBF_REV;
+ }
+ break;
+ case TGSI_OPCODE_POPC:
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ src0 = fetchSrc(0, c);
+ mkOp2(OP_POPCNT, TYPE_U32, dst0[c], src0, src0);
+ }
+ break;
+ case TGSI_OPCODE_INTERP_CENTROID:
+ case TGSI_OPCODE_INTERP_SAMPLE:
+ case TGSI_OPCODE_INTERP_OFFSET:
+ handleINTERP(dst0);
+ break;
+ case TGSI_OPCODE_D2I:
+ case TGSI_OPCODE_D2U:
+ case TGSI_OPCODE_D2F: {
+ int pos = 0;
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ Value *dreg = getSSA(8);
+ src0 = fetchSrc(0, pos);
+ src1 = fetchSrc(0, pos + 1);
+ mkOp2(OP_MERGE, TYPE_U64, dreg, src0, src1);
+ mkCvt(OP_CVT, dstTy, dst0[c], srcTy, dreg);
+ pos += 2;
+ }
+ break;
+ }
+ case TGSI_OPCODE_I2D:
+ case TGSI_OPCODE_U2D:
+ case TGSI_OPCODE_F2D:
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ Value *dreg = getSSA(8);
+ mkCvt(OP_CVT, dstTy, dreg, srcTy, fetchSrc(0, c / 2));
+ mkSplit(&dst0[c], 4, dreg);
+ c++;
+ }
+ break;
+ case TGSI_OPCODE_DABS:
+ case TGSI_OPCODE_DNEG:
+ case TGSI_OPCODE_DRCP:
+ case TGSI_OPCODE_DSQRT:
+ case TGSI_OPCODE_DRSQ:
+ case TGSI_OPCODE_DTRUNC:
+ case TGSI_OPCODE_DCEIL:
+ case TGSI_OPCODE_DFLR:
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ src0 = getSSA(8);
+ Value *dst = getSSA(8), *tmp[2];
+ tmp[0] = fetchSrc(0, c);
+ tmp[1] = fetchSrc(0, c + 1);
+ mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
+ mkOp1(op, dstTy, dst, src0);
+ mkSplit(&dst0[c], 4, dst);
+ c++;
+ }
+ break;
+ case TGSI_OPCODE_DFRAC:
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ src0 = getSSA(8);
+ Value *dst = getSSA(8), *tmp[2];
+ tmp[0] = fetchSrc(0, c);
+ tmp[1] = fetchSrc(0, c + 1);
+ mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
+ mkOp1(OP_FLOOR, TYPE_F64, dst, src0);
+ mkOp2(OP_SUB, TYPE_F64, dst, src0, dst);
+ mkSplit(&dst0[c], 4, dst);
+ c++;
+ }
+ break;
+ case TGSI_OPCODE_DSLT:
+ case TGSI_OPCODE_DSGE:
+ case TGSI_OPCODE_DSEQ:
+ case TGSI_OPCODE_DSNE: {
+ int pos = 0;
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ Value *tmp[2];
+
+ src0 = getSSA(8);
+ src1 = getSSA(8);
+ tmp[0] = fetchSrc(0, pos);
+ tmp[1] = fetchSrc(0, pos + 1);
+ mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
+ tmp[0] = fetchSrc(1, pos);
+ tmp[1] = fetchSrc(1, pos + 1);
+ mkOp2(OP_MERGE, TYPE_U64, src1, tmp[0], tmp[1]);
+ mkCmp(op, tgsi.getSetCond(), dstTy, dst0[c], srcTy, src0, src1);
+ pos += 2;
+ }
+ break;
+ }
+ case TGSI_OPCODE_DADD:
+ case TGSI_OPCODE_DMUL:
+ case TGSI_OPCODE_DMAX:
+ case TGSI_OPCODE_DMIN:
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ src0 = getSSA(8);
+ src1 = getSSA(8);
+ Value *dst = getSSA(8), *tmp[2];
+ tmp[0] = fetchSrc(0, c);
+ tmp[1] = fetchSrc(0, c + 1);
+ mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
+ tmp[0] = fetchSrc(1, c);
+ tmp[1] = fetchSrc(1, c + 1);
+ mkOp2(OP_MERGE, TYPE_U64, src1, tmp[0], tmp[1]);
+ mkOp2(op, dstTy, dst, src0, src1);
+ mkSplit(&dst0[c], 4, dst);
+ c++;
+ }
+ break;
+ case TGSI_OPCODE_DMAD:
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ src0 = getSSA(8);
+ src1 = getSSA(8);
+ src2 = getSSA(8);
+ Value *dst = getSSA(8), *tmp[2];
+ tmp[0] = fetchSrc(0, c);
+ tmp[1] = fetchSrc(0, c + 1);
+ mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
+ tmp[0] = fetchSrc(1, c);
+ tmp[1] = fetchSrc(1, c + 1);
+ mkOp2(OP_MERGE, TYPE_U64, src1, tmp[0], tmp[1]);
+ tmp[0] = fetchSrc(2, c);
+ tmp[1] = fetchSrc(2, c + 1);
+ mkOp2(OP_MERGE, TYPE_U64, src2, tmp[0], tmp[1]);
+ mkOp3(op, dstTy, dst, src0, src1, src2);
+ mkSplit(&dst0[c], 4, dst);
+ c++;
+ }
+ break;
+ case TGSI_OPCODE_DROUND:
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ src0 = getSSA(8);
+ Value *dst = getSSA(8), *tmp[2];
+ tmp[0] = fetchSrc(0, c);
+ tmp[1] = fetchSrc(0, c + 1);
+ mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
+ mkCvt(OP_CVT, TYPE_F64, dst, TYPE_F64, src0)
+ ->rnd = ROUND_NI;
+ mkSplit(&dst0[c], 4, dst);
+ c++;
+ }
+ break;
+ case TGSI_OPCODE_DSSG:
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ src0 = getSSA(8);
+ Value *dst = getSSA(8), *dstF32 = getSSA(), *tmp[2];
+ tmp[0] = fetchSrc(0, c);
+ tmp[1] = fetchSrc(0, c + 1);
+ mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
+
+ val0 = getScratch();
+ val1 = getScratch();
+ // The zero is wrong here since it's only 32-bit, but it works out in
+ // the end since it gets replaced with $r63.
+ mkCmp(OP_SET, CC_GT, TYPE_F32, val0, TYPE_F64, src0, zero);
+ mkCmp(OP_SET, CC_LT, TYPE_F32, val1, TYPE_F64, src0, zero);
+ mkOp2(OP_SUB, TYPE_F32, dstF32, val0, val1);
+ mkCvt(OP_CVT, TYPE_F64, dst, TYPE_F32, dstF32);
+ mkSplit(&dst0[c], 4, dst);
+ c++;
+ }
+ break;
default:
ERROR("unhandled TGSI opcode: %u\n", tgsi.getOpcode());
assert(0);
tData.setup(TGSI_FILE_TEMPORARY, 0, 0, tSize, 4, 4, tFile, 0);
pData.setup(TGSI_FILE_PREDICATE, 0, 0, pSize, 4, 4, FILE_PREDICATE, 0);
- aData.setup(TGSI_FILE_ADDRESS, 0, 0, aSize, 4, 4, FILE_ADDRESS, 0);
+ aData.setup(TGSI_FILE_ADDRESS, 0, 0, aSize, 4, 4, FILE_GPR, 0);
oData.setup(TGSI_FILE_OUTPUT, 0, 0, oSize, 4, 4, FILE_GPR, 0);
zero = mkImm((uint32_t)0);
clipVtx[c] = getScratch();
}
- if (prog->getType() == Program::TYPE_FRAGMENT) {
+ switch (prog->getType()) {
+ case Program::TYPE_TESSELLATION_CONTROL:
+ outBase = mkOp2v(
+ OP_SUB, TYPE_U32, getSSA(),
+ mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LANEID, 0)),
+ mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_INVOCATION_ID, 0)));
+ break;
+ case Program::TYPE_FRAGMENT: {
Symbol *sv = mkSysVal(SV_POSITION, 3);
fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv);
mkOp1(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]);
+ break;
}
+ default:
+ break;
+ }
+
+ if (info->io.viewportId >= 0)
+ viewport = getScratch();
+ else
+ viewport = NULL;
for (ip = 0; ip < code->scan.num_instructions; ++ip) {
if (!handleInstruction(&code->insns[ip]))