X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fnouveau%2Fcodegen%2Fnv50_ir_from_tgsi.cpp;h=1d2caaba72fc8016f27e1cd54ddd846976189adb;hb=99581ca393037e10d17aab1f4c90ff2bdb1ec557;hp=3193ea668a3251abe28db47e1e3c4a12179f939a;hpb=5eb7ff1175a644ffe3b0f1a75cb235400355f9fb;p=mesa.git diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp index 3193ea668a3..1d2caaba72f 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp @@ -20,11 +20,9 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -extern "C" { #include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_scan.h" #include "tgsi/tgsi_util.h" -} #include @@ -255,6 +253,7 @@ unsigned int Instruction::srcMask(unsigned int s) const case TGSI_OPCODE_TXD: case TGSI_OPCODE_TXL: case TGSI_OPCODE_TXP: + case TGSI_OPCODE_LODQ: { const struct tgsi_instruction_texture *tex = &insn->Texture; @@ -296,6 +295,30 @@ unsigned int Instruction::srcMask(unsigned int s) const if (mask & 4) x |= 0x3; return x; } + case TGSI_OPCODE_D2I: + case TGSI_OPCODE_D2U: + case TGSI_OPCODE_D2F: + case TGSI_OPCODE_DSLT: + case TGSI_OPCODE_DSGE: + case TGSI_OPCODE_DSEQ: + case TGSI_OPCODE_DSNE: + switch (util_bitcount(mask)) { + case 1: return 0x3; + case 2: return 0xf; + default: + assert(!"unexpected mask"); + return 0xf; + } + case TGSI_OPCODE_I2D: + case TGSI_OPCODE_U2D: + case TGSI_OPCODE_F2D: { + unsigned int x = 0; + if ((mask & 0x3) == 0x3) + x |= 1; + if ((mask & 0xc) == 0xc) + x |= 2; + return x; + } default: break; } @@ -345,6 +368,15 @@ static nv50_ir::SVSemantic translateSysVal(uint sysval) case TGSI_SEMANTIC_BLOCK_ID: return nv50_ir::SV_CTAID; case TGSI_SEMANTIC_BLOCK_SIZE: return nv50_ir::SV_NTID; case TGSI_SEMANTIC_THREAD_ID: return nv50_ir::SV_TID; + case TGSI_SEMANTIC_SAMPLEID: return nv50_ir::SV_SAMPLE_INDEX; + case TGSI_SEMANTIC_SAMPLEPOS: return nv50_ir::SV_SAMPLE_POS; + case TGSI_SEMANTIC_SAMPLEMASK: return nv50_ir::SV_SAMPLE_MASK; + case TGSI_SEMANTIC_INVOCATIONID: return nv50_ir::SV_INVOCATION_ID; + case TGSI_SEMANTIC_TESSCOORD: return nv50_ir::SV_TESS_COORD; + case TGSI_SEMANTIC_TESSOUTER: return nv50_ir::SV_TESS_OUTER; + case TGSI_SEMANTIC_TESSINNER: return nv50_ir::SV_TESS_INNER; + case TGSI_SEMANTIC_VERTICESIN: return nv50_ir::SV_VERTEX_COUNT; + case TGSI_SEMANTIC_HELPER_INVOCATION: return nv50_ir::SV_THREAD_KILL; default: assert(0); return nv50_ir::SV_CLOCK; @@ -391,12 +423,15 @@ nv50_ir::DataType Instruction::inferSrcType() const case TGSI_OPCODE_OR: case TGSI_OPCODE_XOR: case TGSI_OPCODE_NOT: + case TGSI_OPCODE_SHL: case TGSI_OPCODE_U2F: + case TGSI_OPCODE_U2D: case TGSI_OPCODE_UADD: case TGSI_OPCODE_UDIV: case TGSI_OPCODE_UMOD: case TGSI_OPCODE_UMAD: case TGSI_OPCODE_UMUL: + case TGSI_OPCODE_UMUL_HI: case TGSI_OPCODE_UMAX: case TGSI_OPCODE_UMIN: case TGSI_OPCODE_USEQ: @@ -404,7 +439,6 @@ nv50_ir::DataType Instruction::inferSrcType() const case TGSI_OPCODE_USLT: case TGSI_OPCODE_USNE: case TGSI_OPCODE_USHR: - case TGSI_OPCODE_UCMP: case TGSI_OPCODE_ATOMUADD: case TGSI_OPCODE_ATOMXCHG: case TGSI_OPCODE_ATOMCAS: @@ -413,9 +447,13 @@ nv50_ir::DataType Instruction::inferSrcType() const case TGSI_OPCODE_ATOMXOR: case TGSI_OPCODE_ATOMUMIN: case TGSI_OPCODE_ATOMUMAX: + case TGSI_OPCODE_UBFE: + case TGSI_OPCODE_UMSB: return nv50_ir::TYPE_U32; case TGSI_OPCODE_I2F: + case TGSI_OPCODE_I2D: case TGSI_OPCODE_IDIV: + case TGSI_OPCODE_IMUL_HI: case TGSI_OPCODE_IMAX: case TGSI_OPCODE_IMIN: case TGSI_OPCODE_IABS: @@ -429,7 +467,32 @@ nv50_ir::DataType Instruction::inferSrcType() const case TGSI_OPCODE_UARL: case TGSI_OPCODE_ATOMIMIN: case TGSI_OPCODE_ATOMIMAX: + case TGSI_OPCODE_IBFE: + case TGSI_OPCODE_IMSB: return nv50_ir::TYPE_S32; + case TGSI_OPCODE_D2F: + case TGSI_OPCODE_D2I: + case TGSI_OPCODE_D2U: + case TGSI_OPCODE_DABS: + case TGSI_OPCODE_DNEG: + case TGSI_OPCODE_DADD: + case TGSI_OPCODE_DMUL: + case TGSI_OPCODE_DMAX: + case TGSI_OPCODE_DMIN: + case TGSI_OPCODE_DSLT: + case TGSI_OPCODE_DSGE: + case TGSI_OPCODE_DSEQ: + case TGSI_OPCODE_DSNE: + case TGSI_OPCODE_DRCP: + case TGSI_OPCODE_DSQRT: + case TGSI_OPCODE_DMAD: + case TGSI_OPCODE_DFRAC: + case TGSI_OPCODE_DRSQ: + case TGSI_OPCODE_DTRUNC: + case TGSI_OPCODE_DCEIL: + case TGSI_OPCODE_DFLR: + case TGSI_OPCODE_DROUND: + return nv50_ir::TYPE_F64; default: return nv50_ir::TYPE_F32; } @@ -438,16 +501,27 @@ nv50_ir::DataType Instruction::inferSrcType() const nv50_ir::DataType Instruction::inferDstType() const { switch (getOpcode()) { + case TGSI_OPCODE_D2U: case TGSI_OPCODE_F2U: return nv50_ir::TYPE_U32; + case TGSI_OPCODE_D2I: case TGSI_OPCODE_F2I: return nv50_ir::TYPE_S32; case TGSI_OPCODE_FSEQ: case TGSI_OPCODE_FSGE: case TGSI_OPCODE_FSLT: case TGSI_OPCODE_FSNE: + case TGSI_OPCODE_DSEQ: + case TGSI_OPCODE_DSGE: + case TGSI_OPCODE_DSLT: + case TGSI_OPCODE_DSNE: return nv50_ir::TYPE_U32; case TGSI_OPCODE_I2F: case TGSI_OPCODE_U2F: + case TGSI_OPCODE_D2F: return nv50_ir::TYPE_F32; + case TGSI_OPCODE_I2D: + case TGSI_OPCODE_U2D: + case TGSI_OPCODE_F2D: + return nv50_ir::TYPE_F64; default: return inferSrcType(); } @@ -462,6 +536,7 @@ nv50_ir::CondCode Instruction::getSetCond() const case TGSI_OPCODE_ISLT: case TGSI_OPCODE_USLT: case TGSI_OPCODE_FSLT: + case TGSI_OPCODE_DSLT: return CC_LT; case TGSI_OPCODE_SLE: return CC_LE; @@ -469,21 +544,21 @@ nv50_ir::CondCode Instruction::getSetCond() const case TGSI_OPCODE_ISGE: case TGSI_OPCODE_USGE: case TGSI_OPCODE_FSGE: + case TGSI_OPCODE_DSGE: return CC_GE; case TGSI_OPCODE_SGT: return CC_GT; case TGSI_OPCODE_SEQ: case TGSI_OPCODE_USEQ: case TGSI_OPCODE_FSEQ: + case TGSI_OPCODE_DSEQ: return CC_EQ; case TGSI_OPCODE_SNE: case TGSI_OPCODE_FSNE: + case TGSI_OPCODE_DSNE: return CC_NEU; case TGSI_OPCODE_USNE: return CC_NE; - case TGSI_OPCODE_SFL: - return CC_NEVER; - case TGSI_OPCODE_STR: default: return CC_ALWAYS; } @@ -520,21 +595,20 @@ static nv50_ir::operation translateOpcode(uint opcode) NV50_IR_OPCODE_CASE(COS, COS); NV50_IR_OPCODE_CASE(DDX, DFDX); + NV50_IR_OPCODE_CASE(DDX_FINE, DFDX); NV50_IR_OPCODE_CASE(DDY, DFDY); + NV50_IR_OPCODE_CASE(DDY_FINE, DFDY); NV50_IR_OPCODE_CASE(KILL, DISCARD); NV50_IR_OPCODE_CASE(SEQ, SET); - NV50_IR_OPCODE_CASE(SFL, SET); NV50_IR_OPCODE_CASE(SGT, SET); NV50_IR_OPCODE_CASE(SIN, SIN); NV50_IR_OPCODE_CASE(SLE, SET); NV50_IR_OPCODE_CASE(SNE, SET); - NV50_IR_OPCODE_CASE(STR, SET); NV50_IR_OPCODE_CASE(TEX, TEX); NV50_IR_OPCODE_CASE(TXD, TXD); NV50_IR_OPCODE_CASE(TXP, TEX); - NV50_IR_OPCODE_CASE(BRA, BRA); NV50_IR_OPCODE_CASE(CAL, CALL); NV50_IR_OPCODE_CASE(RET, RET); NV50_IR_OPCODE_CASE(CMP, SLCT); @@ -558,6 +632,9 @@ static nv50_ir::operation translateOpcode(uint opcode) NV50_IR_OPCODE_CASE(SAD, SAD); NV50_IR_OPCODE_CASE(TXF, TXF); NV50_IR_OPCODE_CASE(TXQ, TXQ); + NV50_IR_OPCODE_CASE(TXQS, TXQ); + NV50_IR_OPCODE_CASE(TG4, TXG); + NV50_IR_OPCODE_CASE(LODQ, TXLQ); NV50_IR_OPCODE_CASE(EMIT, EMIT); NV50_IR_OPCODE_CASE(ENDPRIM, RESTART); @@ -592,6 +669,32 @@ static nv50_ir::operation translateOpcode(uint opcode) NV50_IR_OPCODE_CASE(USLT, SET); NV50_IR_OPCODE_CASE(USNE, SET); + NV50_IR_OPCODE_CASE(DABS, ABS); + NV50_IR_OPCODE_CASE(DNEG, NEG); + NV50_IR_OPCODE_CASE(DADD, ADD); + NV50_IR_OPCODE_CASE(DMUL, MUL); + NV50_IR_OPCODE_CASE(DMAX, MAX); + NV50_IR_OPCODE_CASE(DMIN, MIN); + NV50_IR_OPCODE_CASE(DSLT, SET); + NV50_IR_OPCODE_CASE(DSGE, SET); + NV50_IR_OPCODE_CASE(DSEQ, SET); + NV50_IR_OPCODE_CASE(DSNE, SET); + NV50_IR_OPCODE_CASE(DRCP, RCP); + NV50_IR_OPCODE_CASE(DSQRT, SQRT); + NV50_IR_OPCODE_CASE(DMAD, MAD); + NV50_IR_OPCODE_CASE(D2I, CVT); + NV50_IR_OPCODE_CASE(D2U, CVT); + NV50_IR_OPCODE_CASE(I2D, CVT); + NV50_IR_OPCODE_CASE(U2D, CVT); + NV50_IR_OPCODE_CASE(DRSQ, RSQ); + NV50_IR_OPCODE_CASE(DTRUNC, TRUNC); + NV50_IR_OPCODE_CASE(DCEIL, CEIL); + NV50_IR_OPCODE_CASE(DFLR, FLOOR); + NV50_IR_OPCODE_CASE(DROUND, CVT); + + NV50_IR_OPCODE_CASE(IMUL_HI, MUL); + NV50_IR_OPCODE_CASE(UMUL_HI, MUL); + NV50_IR_OPCODE_CASE(SAMPLE, TEX); NV50_IR_OPCODE_CASE(SAMPLE_B, TXB); NV50_IR_OPCODE_CASE(SAMPLE_C, TEX); @@ -618,6 +721,15 @@ static nv50_ir::operation translateOpcode(uint opcode) NV50_IR_OPCODE_CASE(TXB2, TXB); NV50_IR_OPCODE_CASE(TXL2, TXL); + NV50_IR_OPCODE_CASE(IBFE, EXTBF); + NV50_IR_OPCODE_CASE(UBFE, EXTBF); + NV50_IR_OPCODE_CASE(BFI, INSBF); + NV50_IR_OPCODE_CASE(BREV, EXTBF); + NV50_IR_OPCODE_CASE(POPC, POPCNT); + NV50_IR_OPCODE_CASE(LSB, BFIND); + NV50_IR_OPCODE_CASE(IMSB, BFIND); + NV50_IR_OPCODE_CASE(UMSB, BFIND); + NV50_IR_OPCODE_CASE(END, EXIT); default: @@ -641,6 +753,9 @@ static uint16_t opcodeToSubOp(uint opcode) case TGSI_OPCODE_ATOMIMIN: return NV50_IR_SUBOP_ATOM_MIN; case TGSI_OPCODE_ATOMUMAX: return NV50_IR_SUBOP_ATOM_MAX; case TGSI_OPCODE_ATOMIMAX: return NV50_IR_SUBOP_ATOM_MAX; + case TGSI_OPCODE_IMUL_HI: + case TGSI_OPCODE_UMUL_HI: + return NV50_IR_SUBOP_MUL_HIGH; default: return 0; } @@ -717,7 +832,7 @@ Source::Source(struct nv50_ir_prog_info *prog) : info(prog) if (prog->dbgFlags & NV50_IR_DEBUG_BASIC) tgsi_dump(tokens, 0); - mainTempsInLMem = FALSE; + mainTempsInLMem = false; } Source::~Source() @@ -762,6 +877,8 @@ bool Source::scanSource() info->prop.gp.instanceCount = 1; // default value } + info->io.viewportId = -1; + info->immd.data = (uint32_t *)MALLOC(scan.immediate_count * 16); info->immd.type = (ubyte *)MALLOC(scan.immediate_count * sizeof(ubyte)); @@ -794,7 +911,7 @@ bool Source::scanSource() info->bin.tlsSpace += (scan.file_max[TGSI_FILE_TEMPORARY] + 1) * 16; if (info->io.genUserClip > 0) { - info->io.clipDistanceMask = (1 << info->io.genUserClip) - 1; + info->io.clipDistances = info->io.genUserClip; const unsigned int nOut = (info->io.genUserClip + 3) / 4; @@ -803,7 +920,7 @@ bool Source::scanSource() info->out[i].id = i; info->out[i].sn = TGSI_SEMANTIC_CLIPDIST; info->out[i].si = n; - info->out[i].mask = info->io.clipDistanceMask >> (n * 4); + info->out[i].mask = ((1 << info->io.clipDistances) - 1) >> (n * 4); } } @@ -822,13 +939,11 @@ void Source::scanProperty(const struct tgsi_full_property *prop) case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES: info->prop.gp.maxVertices = prop->u[0].Data; break; -#if 0 - case TGSI_PROPERTY_GS_INSTANCE_COUNT: + case TGSI_PROPERTY_GS_INVOCATIONS: info->prop.gp.instanceCount = prop->u[0].Data; break; -#endif case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS: - info->prop.fp.separateFragData = TRUE; + info->prop.fp.separateFragData = true; break; case TGSI_PROPERTY_FS_COORD_ORIGIN: case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER: @@ -837,6 +952,30 @@ void Source::scanProperty(const struct tgsi_full_property *prop) case TGSI_PROPERTY_VS_PROHIBIT_UCPS: info->io.genUserClip = -1; break; + case TGSI_PROPERTY_TCS_VERTICES_OUT: + info->prop.tp.outputPatchSize = prop->u[0].Data; + break; + case TGSI_PROPERTY_TES_PRIM_MODE: + info->prop.tp.domain = prop->u[0].Data; + break; + case TGSI_PROPERTY_TES_SPACING: + info->prop.tp.partitioning = prop->u[0].Data; + break; + case TGSI_PROPERTY_TES_VERTEX_ORDER_CW: + info->prop.tp.winding = prop->u[0].Data; + break; + case TGSI_PROPERTY_TES_POINT_MODE: + if (prop->u[0].Data) + info->prop.tp.outputPrim = PIPE_PRIM_POINTS; + else + info->prop.tp.outputPrim = PIPE_PRIM_TRIANGLES; /* anything but points */ + break; + case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED: + info->io.clipDistances = prop->u[0].Data; + break; + case TGSI_PROPERTY_NUM_CULLDIST_ENABLED: + info->io.cullDistances = prop->u[0].Data; + break; default: INFO("unhandled TGSI property %d\n", prop->Property.PropertyName); break; @@ -861,8 +1000,8 @@ int Source::inferSysValDirection(unsigned sn) const case TGSI_SEMANTIC_INSTANCEID: case TGSI_SEMANTIC_VERTEXID: return 1; -#if 0 case TGSI_SEMANTIC_LAYER: +#if 0 case TGSI_SEMANTIC_VIEWPORTINDEX: return 0; #endif @@ -922,9 +1061,14 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl) default: break; } - if (decl->Interp.Centroid) + if (decl->Interp.Location) info->in[i].centroid = 1; } + + if (sn == TGSI_SEMANTIC_PATCH) + info->in[i].patch = 1; + if (sn == TGSI_SEMANTIC_PATCH) + info->numPatchConstants = MAX2(info->numPatchConstants, si + 1); } } break; @@ -949,10 +1093,21 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl) clipVertexOutput = i; break; case TGSI_SEMANTIC_CLIPDIST: - info->io.clipDistanceMask |= - decl->Declaration.UsageMask << (si * 4); info->io.genUserClip = -1; break; + case TGSI_SEMANTIC_SAMPLEMASK: + info->io.sampleMask = i; + break; + case TGSI_SEMANTIC_VIEWPORT_INDEX: + info->io.viewportId = i; + break; + case TGSI_SEMANTIC_PATCH: + info->numPatchConstants = MAX2(info->numPatchConstants, si + 1); + /* fallthrough */ + case TGSI_SEMANTIC_TESSOUTER: + case TGSI_SEMANTIC_TESSINNER: + info->out[i].patch = 1; + break; default: break; } @@ -969,6 +1124,10 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl) case TGSI_SEMANTIC_VERTEXID: info->io.vertexId = first; break; + case TGSI_SEMANTIC_SAMPLEID: + case TGSI_SEMANTIC_SAMPLEPOS: + info->prop.fp.sampleInterp = 1; + break; default: break; } @@ -976,6 +1135,13 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl) info->sv[i].sn = sn; info->sv[i].si = si; info->sv[i].input = inferSysValDirection(sn); + + switch (sn) { + case TGSI_SEMANTIC_TESSOUTER: + case TGSI_SEMANTIC_TESSINNER: + info->sv[i].patch = 1; + break; + } } break; case TGSI_FILE_RESOURCE: @@ -1030,6 +1196,8 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst) if (info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_PSIZE || info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_PRIMID || + info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_LAYER || + info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_VIEWPORT_INDEX || info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_FOG) info->out[dst.getIndex(0)].mask &= 1; @@ -1038,7 +1206,7 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst) } else if (insn.getDst(0).getFile() == TGSI_FILE_TEMPORARY) { if (insn.getDst(0).isIndirect(0)) - mainTempsInLMem = TRUE; + mainTempsInLMem = true; } } @@ -1046,12 +1214,22 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst) Instruction::SrcRegister src = insn.getSrc(s); if (src.getFile() == TGSI_FILE_TEMPORARY) { if (src.isIndirect(0)) - mainTempsInLMem = TRUE; + mainTempsInLMem = true; } else if (src.getFile() == TGSI_FILE_RESOURCE) { if (src.getIndex(0) == TGSI_RESOURCE_GLOBAL) info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ? 0x1 : 0x2; + } else + if (src.getFile() == TGSI_FILE_OUTPUT) { + if (src.isIndirect(0)) { + // We don't know which one is accessed, just mark everything for + // reading. This is an extremely unlikely occurrence. + for (unsigned i = 0; i < info->numOutputs; ++i) + info->out[i].oread = 1; + } else { + info->out[src.getIndex(0)].oread = 1; + } } if (src.getFile() != TGSI_FILE_INPUT) continue; @@ -1126,7 +1304,9 @@ private: ValueMap values; }; + Value *shiftAddress(Value *); Value *getVertexBase(int s); + Value *getOutputBase(int s); DataArray *getArrayForFile(unsigned file, int idx); Value *fetchSrc(int s, int c); Value *acquireDst(int d, int c); @@ -1154,7 +1334,7 @@ private: void setTexRS(TexInstruction *, unsigned int& s, int R, int S); void handleTEX(Value *dst0[4], int R, int S, int L, int C, int Dx, int Dy); void handleTXF(Value *dst0[4], int R, int L_M); - void handleTXQ(Value *dst0[4], enum TexQuery); + void handleTXQ(Value *dst0[4], enum TexQuery, int R); void handleLIT(Value *dst0[4]); void handleUserClipPlanes(); @@ -1165,6 +1345,10 @@ private: void handleSTORE(); void handleATOM(Value *dst0[4], DataType, uint16_t subOp); + void handleINTERP(Value *dst0[4]); + + uint8_t translateInterpMode(const struct nv50_ir_varying *var, + operation& op); Value *interpolate(tgsi::Instruction::SrcRegister, int c, Value *ptr); void insertConvergenceOps(BasicBlock *conv, BasicBlock *fork); @@ -1195,7 +1379,7 @@ private: }; private: - const struct tgsi::Source *code; + const tgsi::Source *code; const struct nv50_ir_prog_info *info; struct { @@ -1222,10 +1406,14 @@ private: Value *vtxBase[5]; // base address of vertex in primitive (for TP/GP) uint8_t vtxBaseValid; + Value *outBase; // base address of vertex out patch (for TCP) + Stack condBBs; // fork BB, then else clause BB Stack joinBBs; // fork BB, for inserting join ops on ENDIF Stack loopBBs; // loop headers Stack breakBBs; // end of / after loop + + Value *viewport; }; Symbol * @@ -1233,18 +1421,20 @@ Converter::srcToSym(tgsi::Instruction::SrcRegister src, int c) { const int swz = src.getSwizzle(c); + /* TODO: Use Array ID when it's available for the index */ return makeSym(src.getFile(), src.is2D() ? src.getIndex(1) : 0, - src.isIndirect(0) ? -1 : src.getIndex(0), swz, + src.getIndex(0), swz, src.getIndex(0) * 16 + swz * 4); } Symbol * Converter::dstToSym(tgsi::Instruction::DstRegister dst, int c) { + /* TODO: Use Array ID when it's available for the index */ return makeSym(dst.getFile(), dst.is2D() ? dst.getIndex(1) : 0, - dst.isIndirect(0) ? -1 : dst.getIndex(0), c, + dst.getIndex(0), c, dst.getIndex(0) * 16 + c * 4); } @@ -1272,8 +1462,8 @@ Converter::makeSym(uint tgsiFile, int fileIdx, int idx, int c, uint32_t address) return sym; } -static inline uint8_t -translateInterpMode(const struct nv50_ir_varying *var, operation& op) +uint8_t +Converter::translateInterpMode(const struct nv50_ir_varying *var, operation& op) { uint8_t mode = NV50_IR_INTERP_PERSPECTIVE; @@ -1289,7 +1479,7 @@ translateInterpMode(const struct nv50_ir_varying *var, operation& op) op = (mode == NV50_IR_INTERP_PERSPECTIVE || mode == NV50_IR_INTERP_SC) ? OP_PINTERP : OP_LINTERP; - if (var->centroid) + if (var->centroid || info->prop.fp.sampleInterp) mode |= NV50_IR_INTERP_CENTROID; return mode; @@ -1344,7 +1534,24 @@ Converter::getVertexBase(int s) if (tgsi.getSrc(s).isIndirect(1)) rel = fetchSrc(tgsi.getSrc(s).getIndirect(1), 0, NULL); vtxBaseValid |= 1 << s; - vtxBase[s] = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(), mkImm(index), rel); + vtxBase[s] = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(4, FILE_ADDRESS), + mkImm(index), rel); + } + return vtxBase[s]; +} + +Value * +Converter::getOutputBase(int s) +{ + assert(s < 5); + if (!(vtxBaseValid & (1 << s))) { + Value *offset = loadImm(NULL, tgsi.getSrc(s).getIndex(1)); + if (tgsi.getSrc(s).isIndirect(1)) + offset = mkOp2v(OP_ADD, TYPE_U32, getSSA(), + fetchSrc(tgsi.getSrc(s).getIndirect(1), 0, NULL), + offset); + vtxBaseValid |= 1 << s; + vtxBase[s] = mkOp2v(OP_ADD, TYPE_U32, getSSA(), outBase, offset); } return vtxBase[s]; } @@ -1362,6 +1569,9 @@ Converter::fetchSrc(int s, int c) if (src.is2D()) { switch (src.getFile()) { + case TGSI_FILE_OUTPUT: + dimRel = getOutputBase(s); + break; case TGSI_FILE_INPUT: dimRel = getVertexBase(s); break; @@ -1402,38 +1612,64 @@ Converter::getArrayForFile(unsigned file, int idx) } } +Value * +Converter::shiftAddress(Value *index) +{ + if (!index) + return NULL; + return mkOp2v(OP_SHL, TYPE_U32, getSSA(4, FILE_ADDRESS), index, mkImm(4)); +} + Value * Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr) { const int idx2d = src.is2D() ? src.getIndex(1) : 0; const int idx = src.getIndex(0); const int swz = src.getSwizzle(c); + Instruction *ld; switch (src.getFile()) { case TGSI_FILE_IMMEDIATE: assert(!ptr); return loadImm(NULL, info->immd.data[idx * 4 + swz]); case TGSI_FILE_CONSTANT: - return mkLoadv(TYPE_U32, srcToSym(src, c), ptr); + return mkLoadv(TYPE_U32, srcToSym(src, c), shiftAddress(ptr)); case TGSI_FILE_INPUT: if (prog->getType() == Program::TYPE_FRAGMENT) { // don't load masked inputs, won't be assigned a slot if (!ptr && !(info->in[idx].mask & (1 << swz))) return loadImm(NULL, swz == TGSI_SWIZZLE_W ? 1.0f : 0.0f); - if (!ptr && info->in[idx].sn == TGSI_SEMANTIC_FACE) + if (!ptr && info->in[idx].sn == TGSI_SEMANTIC_FACE) return mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_FACE, 0)); - return interpolate(src, c, ptr); + return interpolate(src, c, shiftAddress(ptr)); + } else + if (prog->getType() == Program::TYPE_GEOMETRY) { + if (!ptr && info->in[idx].sn == TGSI_SEMANTIC_PRIMID) + return mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_PRIMITIVE_ID, 0)); + // XXX: This is going to be a problem with scalar arrays, i.e. when + // we cannot assume that the address is given in units of vec4. + // + // nv50 and nvc0 need different things here, so let the lowering + // passes decide what to do with the address + if (ptr) + return mkLoadv(TYPE_U32, srcToSym(src, c), ptr); } - return mkLoadv(TYPE_U32, srcToSym(src, c), ptr); + ld = mkLoad(TYPE_U32, getSSA(), srcToSym(src, c), shiftAddress(ptr)); + ld->perPatch = info->in[idx].patch; + return ld->getDef(0); case TGSI_FILE_OUTPUT: - assert(!"load from output file"); - return NULL; + assert(prog->getType() == Program::TYPE_TESSELLATION_CONTROL); + ld = mkLoad(TYPE_U32, getSSA(), srcToSym(src, c), shiftAddress(ptr)); + ld->perPatch = info->out[idx].patch; + return ld->getDef(0); case TGSI_FILE_SYSTEM_VALUE: assert(!ptr); - return mkOp1v(OP_RDSV, TYPE_U32, getSSA(), srcToSym(src, c)); + ld = mkOp1(OP_RDSV, TYPE_U32, getSSA(), srcToSym(src, c)); + ld->perPatch = info->sv[idx].patch; + return ld->getDef(0); default: return getArrayForFile(src.getFile(), idx2d)->load( - sub.cur->values, idx, swz, ptr); + sub.cur->values, idx, swz, shiftAddress(ptr)); } } @@ -1461,23 +1697,13 @@ Converter::storeDst(int d, int c, Value *val) { const tgsi::Instruction::DstRegister dst = tgsi.getDst(d); - switch (tgsi.getSaturate()) { - case TGSI_SAT_NONE: - break; - case TGSI_SAT_ZERO_ONE: + if (tgsi.getSaturate()) { mkOp1(OP_SAT, dstTy, val, val); - break; - case TGSI_SAT_MINUS_PLUS_ONE: - mkOp2(OP_MAX, dstTy, val, val, mkImm(-1.0f)); - mkOp2(OP_MIN, dstTy, val, val, mkImm(+1.0f)); - break; - default: - assert(!"invalid saturation mode"); - break; } - Value *ptr = dst.isIndirect(0) ? - fetchSrc(dst.getIndirect(0), 0, NULL) : NULL; + Value *ptr = NULL; + if (dst.isIndirect(0)) + ptr = shiftAddress(fetchSrc(dst.getIndirect(0), 0, NULL)); if (info->io.genUserClip > 0 && dst.getFile() == TGSI_FILE_OUTPUT && @@ -1502,8 +1728,17 @@ Converter::storeDst(const tgsi::Instruction::DstRegister dst, int c, mkOp2(OP_WRSV, TYPE_U32, NULL, dstToSym(dst, c), val); } else if (f == TGSI_FILE_OUTPUT && prog->getType() != Program::TYPE_FRAGMENT) { - if (ptr || (info->out[idx].mask & (1 << c))) - mkStore(OP_EXPORT, TYPE_U32, dstToSym(dst, c), ptr, val); + + if (ptr || (info->out[idx].mask & (1 << c))) { + /* Save the viewport index into a scratch register so that it can be + exported at EMIT time */ + if (info->out[idx].sn == TGSI_SEMANTIC_VIEWPORT_INDEX && + viewport != NULL) + mkOp1(OP_MOV, TYPE_U32, viewport, val); + else + mkStore(OP_EXPORT, TYPE_U32, dstToSym(dst, c), ptr, val)->perPatch = + info->out[idx].patch; + } } else if (f == TGSI_FILE_TEMPORARY || f == TGSI_FILE_PREDICATE || @@ -1544,6 +1779,7 @@ Converter::insertConvergenceOps(BasicBlock *conv, BasicBlock *fork) join->fixed = 1; conv->insertHead(join); + assert(!fork->joinAt); fork->joinAt = new_FlowInstruction(func, OP_JOINAT, conv); fork->insertBefore(fork->getExit(), fork->joinAt); } @@ -1571,7 +1807,7 @@ Converter::setTexRS(TexInstruction *tex, unsigned int& s, int R, int S) } void -Converter::handleTXQ(Value *dst0[4], enum TexQuery query) +Converter::handleTXQ(Value *dst0[4], enum TexQuery query, int R) { TexInstruction *tex = new_TexInstruction(func, OP_TXQ); tex->tex.query = query; @@ -1583,9 +1819,12 @@ Converter::handleTXQ(Value *dst0[4], enum TexQuery query) tex->tex.mask |= 1 << c; tex->setDef(d++, dst0[c]); } - tex->setSrc((c = 0), fetchSrc(0, 0)); // mip level + if (query == TXQ_DIMS) + tex->setSrc((c = 0), fetchSrc(0, 0)); // mip level + else + tex->setSrc((c = 0), zero); - setTexRS(tex, c, 1, -1); + setTexRS(tex, ++c, R, -1); bb->insertTail(tex); } @@ -1651,7 +1890,10 @@ Converter::handleTEX(Value *dst[4], int R, int S, int L, int C, int Dx, int Dy) if (C == 0x0f) C = 0x00 | MAX2(tgt.getArgCount(), 2); // guess DC src - if (tgt.isShadow()) + if (tgsi.getOpcode() == TGSI_OPCODE_TG4 && + tgt == TEX_TARGET_CUBE_ARRAY_SHADOW) + shd = fetchSrc(1, 0); + else if (tgt.isShadow()) shd = fetchSrc(C >> 4, C & 3); if (texi->op == OP_TXD) { @@ -1704,6 +1946,16 @@ Converter::handleTEX(Value *dst[4], int R, int S, int L, int C, int Dx, int Dy) if (tgsi.getOpcode() == TGSI_OPCODE_SAMPLE_C_LZ) texi->tex.levelZero = true; + if (tgsi.getOpcode() == TGSI_OPCODE_TG4 && !tgt.isShadow()) + texi->tex.gatherComp = tgsi.getSrc(1).getValueU32(0, info); + + texi->tex.useOffsets = tgsi.getNumTexOffsets(); + for (s = 0; s < tgsi.getNumTexOffsets(); ++s) { + for (c = 0; c < 3; ++c) { + texi->offset[s][c].set(fetchSrc(tgsi.getTexOffset(s), c, NULL)); + texi->offset[s][c].setInsn(texi); + } + } bb->insertTail(texi); } @@ -1734,11 +1986,11 @@ Converter::handleTXF(Value *dst[4], int R, int L_M) setTexRS(texi, c, R, -1); + texi->tex.useOffsets = tgsi.getNumTexOffsets(); for (s = 0; s < tgsi.getNumTexOffsets(); ++s) { for (c = 0; c < 3; ++c) { - texi->tex.offset[s][c] = tgsi.getTexOffset(s).getValueU32(c, info); - if (texi->tex.offset[s][c]) - texi->tex.useOffsets = s + 1; + texi->offset[s][c].set(fetchSrc(tgsi.getTexOffset(s), c, NULL)); + texi->offset[s][c].setInsn(texi); } } @@ -1776,7 +2028,7 @@ Converter::handleLIT(Value *dst0[4]) mkOp2(OP_MIN, TYPE_F32, val3, val3, pos128); mkOp2(OP_POW, TYPE_F32, val3, val1, val3); - mkCmp(OP_SLCT, CC_GT, TYPE_F32, dst0[2], val3, zero, val0); + mkCmp(OP_SLCT, CC_GT, TYPE_F32, dst0[2], TYPE_F32, val3, zero, val0); } } @@ -1790,13 +2042,13 @@ isResourceSpecial(const int r) } static inline bool -isResourceRaw(const struct tgsi::Source *code, const int r) +isResourceRaw(const tgsi::Source *code, const int r) { return isResourceSpecial(r) || code->resources[r].raw; } static inline nv50_ir::TexTarget -getResourceTarget(const struct tgsi::Source *code, int r) +getResourceTarget(const tgsi::Source *code, int r) { if (isResourceSpecial(r)) return nv50_ir::TEX_TARGET_BUFFER; @@ -2051,6 +2303,84 @@ Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp) dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov } +void +Converter::handleINTERP(Value *dst[4]) +{ + // Check whether the input is linear. All other attributes ignored. + Instruction *insn; + Value *offset = NULL, *ptr = NULL, *w = NULL; + bool linear; + operation op; + int c, mode; + + tgsi::Instruction::SrcRegister src = tgsi.getSrc(0); + assert(src.getFile() == TGSI_FILE_INPUT); + + if (src.isIndirect(0)) + ptr = fetchSrc(src.getIndirect(0), 0, NULL); + + // XXX: no way to know interp mode if we don't know the index + linear = info->in[ptr ? 0 : src.getIndex(0)].linear; + if (linear) { + op = OP_LINTERP; + mode = NV50_IR_INTERP_LINEAR; + } else { + op = OP_PINTERP; + mode = NV50_IR_INTERP_PERSPECTIVE; + } + + switch (tgsi.getOpcode()) { + case TGSI_OPCODE_INTERP_CENTROID: + mode |= NV50_IR_INTERP_CENTROID; + break; + case TGSI_OPCODE_INTERP_SAMPLE: + insn = mkOp1(OP_PIXLD, TYPE_U32, (offset = getScratch()), fetchSrc(1, 0)); + insn->subOp = NV50_IR_SUBOP_PIXLD_OFFSET; + mode |= NV50_IR_INTERP_OFFSET; + break; + case TGSI_OPCODE_INTERP_OFFSET: { + // The input in src1.xy is float, but we need a single 32-bit value + // where the upper and lower 16 bits are encoded in S0.12 format. We need + // to clamp the input coordinates to (-0.5, 0.4375), multiply by 4096, + // and then convert to s32. + Value *offs[2]; + for (c = 0; c < 2; c++) { + offs[c] = fetchSrc(1, c); + mkOp2(OP_MIN, TYPE_F32, offs[c], offs[c], loadImm(NULL, 0.4375f)); + mkOp2(OP_MAX, TYPE_F32, offs[c], offs[c], loadImm(NULL, -0.5f)); + mkOp2(OP_MUL, TYPE_F32, offs[c], offs[c], loadImm(NULL, 4096.0f)); + mkCvt(OP_CVT, TYPE_S32, offs[c], TYPE_F32, offs[c]); + } + offset = mkOp3v(OP_INSBF, TYPE_U32, getScratch(), + offs[1], mkImm(0x1010), offs[0]); + mode |= NV50_IR_INTERP_OFFSET; + break; + } + } + + if (op == OP_PINTERP) { + if (offset) { + w = mkOp2v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 3), offset); + mkOp1(OP_RCP, TYPE_F32, w, w); + } else { + w = fragCoord[3]; + } + } + + + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { + insn = mkOp1(op, TYPE_F32, dst[c], srcToSym(src, c)); + if (op == OP_PINTERP) + insn->setSrc(1, w); + if (ptr) + insn->setIndirect(0, 0, ptr); + if (offset) + insn->setSrc(op == OP_PINTERP ? 2 : 1, offset); + + insn->setInterpolate(mode); + } +} + Converter::Subroutine * Converter::getSubroutine(unsigned ip) { @@ -2092,7 +2422,7 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) Instruction *geni; Value *dst0[4], *rDst0[4]; - Value *src0, *src1, *src2; + Value *src0, *src1, *src2, *src3; Value *val0, *val1; int c; @@ -2130,8 +2460,9 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) case TGSI_OPCODE_UMOD: case TGSI_OPCODE_MUL: case TGSI_OPCODE_UMUL: + case TGSI_OPCODE_IMUL_HI: + case TGSI_OPCODE_UMUL_HI: case TGSI_OPCODE_OR: - case TGSI_OPCODE_POW: case TGSI_OPCODE_SHL: case TGSI_OPCODE_ISHR: case TGSI_OPCODE_USHR: @@ -2140,7 +2471,8 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { src0 = fetchSrc(0, c); src1 = fetchSrc(1, c); - mkOp2(op, dstTy, dst0[c], src0, src1); + geni = mkOp2(op, dstTy, dst0[c], src0, src1); + geni->subOp = tgsi::opcodeToSubOp(tgsi.getOpcode()); } break; case TGSI_OPCODE_MAD: @@ -2164,6 +2496,8 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) case TGSI_OPCODE_NOT: case TGSI_OPCODE_DDX: case TGSI_OPCODE_DDY: + case TGSI_OPCODE_DDX_FINE: + case TGSI_OPCODE_DDY_FINE: FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) mkOp1(op, dstTy, dst0[c], fetchSrc(0, c)); break; @@ -2176,15 +2510,22 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) mkMov(dst0[c], val0); break; case TGSI_OPCODE_ARL: + case TGSI_OPCODE_ARR: FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { + const RoundMode rnd = + tgsi.getOpcode() == TGSI_OPCODE_ARR ? ROUND_N : ROUND_M; src0 = fetchSrc(0, c); - mkCvt(OP_CVT, TYPE_S32, dst0[c], TYPE_F32, src0)->rnd = ROUND_M; - mkOp2(OP_SHL, TYPE_U32, dst0[c], dst0[c], mkImm(4)); + mkCvt(OP_CVT, TYPE_S32, dst0[c], TYPE_F32, src0)->rnd = rnd; } break; case TGSI_OPCODE_UARL: FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) - mkOp2(OP_SHL, TYPE_U32, dst0[c], fetchSrc(0, c), mkImm(4)); + mkOp1(OP_MOV, TYPE_U32, dst0[c], fetchSrc(0, c)); + break; + case TGSI_OPCODE_POW: + val0 = mkOp2v(op, TYPE_F32, getScratch(), fetchSrc(0, 0), fetchSrc(1, 0)); + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) + mkOp1(OP_MOV, TYPE_F32, dst0[c], val0); break; case TGSI_OPCODE_EX2: case TGSI_OPCODE_LG2: @@ -2315,8 +2656,8 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) src0 = fetchSrc(0, c); val0 = getScratch(); val1 = getScratch(); - mkCmp(OP_SET, CC_GT, srcTy, val0, src0, zero); - mkCmp(OP_SET, CC_LT, srcTy, val1, src0, zero); + mkCmp(OP_SET, CC_GT, srcTy, val0, srcTy, src0, zero); + mkCmp(OP_SET, CC_LT, srcTy, val1, srcTy, src0, zero); if (srcTy == TYPE_F32) mkOp2(OP_SUB, TYPE_F32, dst0[c], val0, val1); else @@ -2324,6 +2665,8 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) } break; case TGSI_OPCODE_UCMP: + srcTy = TYPE_U32; + /* fallthrough */ case TGSI_OPCODE_CMP: FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { src0 = fetchSrc(0, c); @@ -2333,7 +2676,7 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) mkMov(dst0[c], src1); else mkCmp(OP_SLCT, (srcTy == TYPE_F32) ? CC_LT : CC_NE, - srcTy, dst0[c], src1, src2, src0); + srcTy, dst0[c], srcTy, src1, src2, src0); } break; case TGSI_OPCODE_FRC: @@ -2362,11 +2705,9 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) case TGSI_OPCODE_SLT: case TGSI_OPCODE_SGE: case TGSI_OPCODE_SEQ: - case TGSI_OPCODE_SFL: case TGSI_OPCODE_SGT: case TGSI_OPCODE_SLE: case TGSI_OPCODE_SNE: - case TGSI_OPCODE_STR: case TGSI_OPCODE_FSEQ: case TGSI_OPCODE_FSGE: case TGSI_OPCODE_FSLT: @@ -2380,13 +2721,18 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { src0 = fetchSrc(0, c); src1 = fetchSrc(1, c); - mkCmp(op, tgsi.getSetCond(), dstTy, dst0[c], src0, src1); + mkCmp(op, tgsi.getSetCond(), dstTy, dst0[c], srcTy, src0, src1); } break; case TGSI_OPCODE_KILL_IF: val0 = new_LValue(func, FILE_PREDICATE); + mask = 0; for (c = 0; c < 4; ++c) { - mkCmp(OP_SET, CC_LT, TYPE_F32, val0, fetchSrc(0, c), zero); + const int s = tgsi.getSrc(0).getSwizzle(c); + if (mask & (1 << s)) + continue; + mask |= 1 << s; + mkCmp(OP_SET, CC_LT, TYPE_F32, val0, TYPE_F32, fetchSrc(0, c), zero); mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, val0); } break; @@ -2397,18 +2743,22 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) case TGSI_OPCODE_TXB: case TGSI_OPCODE_TXL: case TGSI_OPCODE_TXP: + case TGSI_OPCODE_LODQ: // R S L C Dx Dy handleTEX(dst0, 1, 1, 0x03, 0x0f, 0x00, 0x00); break; case TGSI_OPCODE_TXD: handleTEX(dst0, 3, 3, 0x03, 0x0f, 0x10, 0x20); break; + case TGSI_OPCODE_TG4: + handleTEX(dst0, 2, 2, 0x03, 0x0f, 0x00, 0x00); + break; case TGSI_OPCODE_TEX2: handleTEX(dst0, 2, 2, 0x03, 0x10, 0x00, 0x00); break; case TGSI_OPCODE_TXB2: case TGSI_OPCODE_TXL2: - handleTEX(dst0, 2, 2, 0x10, 0x11, 0x00, 0x00); + handleTEX(dst0, 2, 2, 0x10, 0x0f, 0x00, 0x00); break; case TGSI_OPCODE_SAMPLE: case TGSI_OPCODE_SAMPLE_B: @@ -2429,7 +2779,15 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) break; case TGSI_OPCODE_TXQ: case TGSI_OPCODE_SVIEWINFO: - handleTXQ(dst0, TXQ_DIMS); + handleTXQ(dst0, TXQ_DIMS, 1); + break; + case TGSI_OPCODE_TXQS: + // The TXQ_TYPE query returns samples in its 3rd arg, but we need it to + // be in .x + dst0[1] = dst0[2] = dst0[3] = NULL; + std::swap(dst0[0], dst0[2]); + handleTXQ(dst0, TXQ_TYPE, 0); + std::swap(dst0[0], dst0[2]); break; case TGSI_OPCODE_F2I: case TGSI_OPCODE_F2U: @@ -2442,12 +2800,23 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) mkCvt(OP_CVT, dstTy, dst0[c], srcTy, fetchSrc(0, c)); break; case TGSI_OPCODE_EMIT: + /* export the saved viewport index */ + if (viewport != NULL) { + Symbol *vpSym = mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_U32, + info->out[info->io.viewportId].slot[0] * 4); + mkStore(OP_EXPORT, TYPE_U32, vpSym, NULL, viewport); + } + /* fallthrough */ case TGSI_OPCODE_ENDPRIM: - // get vertex stream if specified (must be immediate) - src0 = tgsi.srcCount() ? - mkImm(tgsi.getSrc(0).getValueU32(0, info)) : zero; + { + // get vertex stream (must be immediate) + unsigned int stream = tgsi.getSrc(0).getValueU32(0, info); + if (stream && op == OP_RESTART) + break; + src0 = mkImm(stream); mkOp1(op, TYPE_U32, NULL, src0)->fixed = 1; break; + } case TGSI_OPCODE_IF: case TGSI_OPCODE_UIF: { @@ -2524,6 +2893,12 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK); } setPosition(reinterpret_cast(breakBBs.pop().u.p), true); + + // If the loop never breaks (e.g. only has RET's inside), then there + // will be no way to get to the break bb. However BGNLOOP will have + // already made a PREBREAK to it, so it must be in the CFG. + if (getBB()->cfg.incidentCount() == 0) + loopBB->cfg.attach(&getBB()->cfg, Graph::Edge::TREE); } break; case TGSI_OPCODE_BRK: @@ -2640,6 +3015,215 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) case TGSI_OPCODE_ATOMIMAX: handleATOM(dst0, dstTy, tgsi::opcodeToSubOp(tgsi.getOpcode())); break; + case TGSI_OPCODE_IBFE: + case TGSI_OPCODE_UBFE: + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { + src0 = fetchSrc(0, c); + if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE && + tgsi.getSrc(2).getFile() == TGSI_FILE_IMMEDIATE) { + src1 = loadImm(NULL, tgsi.getSrc(2).getValueU32(c, info) << 8 | + tgsi.getSrc(1).getValueU32(c, info)); + } else { + src1 = fetchSrc(1, c); + src2 = fetchSrc(2, c); + mkOp3(OP_INSBF, TYPE_U32, src1, src2, mkImm(0x808), src1); + } + mkOp2(OP_EXTBF, dstTy, dst0[c], src0, src1); + } + break; + case TGSI_OPCODE_BFI: + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { + src0 = fetchSrc(0, c); + src1 = fetchSrc(1, c); + src2 = fetchSrc(2, c); + src3 = fetchSrc(3, c); + mkOp3(OP_INSBF, TYPE_U32, src2, src3, mkImm(0x808), src2); + mkOp3(OP_INSBF, TYPE_U32, dst0[c], src1, src2, src0); + } + break; + case TGSI_OPCODE_LSB: + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { + src0 = fetchSrc(0, c); + geni = mkOp2(OP_EXTBF, TYPE_U32, src0, src0, mkImm(0x2000)); + geni->subOp = NV50_IR_SUBOP_EXTBF_REV; + geni = mkOp1(OP_BFIND, TYPE_U32, dst0[c], src0); + geni->subOp = NV50_IR_SUBOP_BFIND_SAMT; + } + break; + case TGSI_OPCODE_IMSB: + case TGSI_OPCODE_UMSB: + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { + src0 = fetchSrc(0, c); + mkOp1(OP_BFIND, srcTy, dst0[c], src0); + } + break; + case TGSI_OPCODE_BREV: + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { + src0 = fetchSrc(0, c); + geni = mkOp2(OP_EXTBF, TYPE_U32, dst0[c], src0, mkImm(0x2000)); + geni->subOp = NV50_IR_SUBOP_EXTBF_REV; + } + break; + case TGSI_OPCODE_POPC: + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { + src0 = fetchSrc(0, c); + mkOp2(OP_POPCNT, TYPE_U32, dst0[c], src0, src0); + } + break; + case TGSI_OPCODE_INTERP_CENTROID: + case TGSI_OPCODE_INTERP_SAMPLE: + case TGSI_OPCODE_INTERP_OFFSET: + handleINTERP(dst0); + break; + case TGSI_OPCODE_D2I: + case TGSI_OPCODE_D2U: + case TGSI_OPCODE_D2F: { + int pos = 0; + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { + Value *dreg = getSSA(8); + src0 = fetchSrc(0, pos); + src1 = fetchSrc(0, pos + 1); + mkOp2(OP_MERGE, TYPE_U64, dreg, src0, src1); + mkCvt(OP_CVT, dstTy, dst0[c], srcTy, dreg); + pos += 2; + } + break; + } + case TGSI_OPCODE_I2D: + case TGSI_OPCODE_U2D: + case TGSI_OPCODE_F2D: + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { + Value *dreg = getSSA(8); + mkCvt(OP_CVT, dstTy, dreg, srcTy, fetchSrc(0, c / 2)); + mkSplit(&dst0[c], 4, dreg); + c++; + } + break; + case TGSI_OPCODE_DABS: + case TGSI_OPCODE_DNEG: + case TGSI_OPCODE_DRCP: + case TGSI_OPCODE_DSQRT: + case TGSI_OPCODE_DRSQ: + case TGSI_OPCODE_DTRUNC: + case TGSI_OPCODE_DCEIL: + case TGSI_OPCODE_DFLR: + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { + src0 = getSSA(8); + Value *dst = getSSA(8), *tmp[2]; + tmp[0] = fetchSrc(0, c); + tmp[1] = fetchSrc(0, c + 1); + mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]); + mkOp1(op, dstTy, dst, src0); + mkSplit(&dst0[c], 4, dst); + c++; + } + break; + case TGSI_OPCODE_DFRAC: + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { + src0 = getSSA(8); + Value *dst = getSSA(8), *tmp[2]; + tmp[0] = fetchSrc(0, c); + tmp[1] = fetchSrc(0, c + 1); + mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]); + mkOp1(OP_FLOOR, TYPE_F64, dst, src0); + mkOp2(OP_SUB, TYPE_F64, dst, src0, dst); + mkSplit(&dst0[c], 4, dst); + c++; + } + break; + case TGSI_OPCODE_DSLT: + case TGSI_OPCODE_DSGE: + case TGSI_OPCODE_DSEQ: + case TGSI_OPCODE_DSNE: { + int pos = 0; + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { + Value *tmp[2]; + + src0 = getSSA(8); + src1 = getSSA(8); + tmp[0] = fetchSrc(0, pos); + tmp[1] = fetchSrc(0, pos + 1); + mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]); + tmp[0] = fetchSrc(1, pos); + tmp[1] = fetchSrc(1, pos + 1); + mkOp2(OP_MERGE, TYPE_U64, src1, tmp[0], tmp[1]); + mkCmp(op, tgsi.getSetCond(), dstTy, dst0[c], srcTy, src0, src1); + pos += 2; + } + break; + } + case TGSI_OPCODE_DADD: + case TGSI_OPCODE_DMUL: + case TGSI_OPCODE_DMAX: + case TGSI_OPCODE_DMIN: + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { + src0 = getSSA(8); + src1 = getSSA(8); + Value *dst = getSSA(8), *tmp[2]; + tmp[0] = fetchSrc(0, c); + tmp[1] = fetchSrc(0, c + 1); + mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]); + tmp[0] = fetchSrc(1, c); + tmp[1] = fetchSrc(1, c + 1); + mkOp2(OP_MERGE, TYPE_U64, src1, tmp[0], tmp[1]); + mkOp2(op, dstTy, dst, src0, src1); + mkSplit(&dst0[c], 4, dst); + c++; + } + break; + case TGSI_OPCODE_DMAD: + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { + src0 = getSSA(8); + src1 = getSSA(8); + src2 = getSSA(8); + Value *dst = getSSA(8), *tmp[2]; + tmp[0] = fetchSrc(0, c); + tmp[1] = fetchSrc(0, c + 1); + mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]); + tmp[0] = fetchSrc(1, c); + tmp[1] = fetchSrc(1, c + 1); + mkOp2(OP_MERGE, TYPE_U64, src1, tmp[0], tmp[1]); + tmp[0] = fetchSrc(2, c); + tmp[1] = fetchSrc(2, c + 1); + mkOp2(OP_MERGE, TYPE_U64, src2, tmp[0], tmp[1]); + mkOp3(op, dstTy, dst, src0, src1, src2); + mkSplit(&dst0[c], 4, dst); + c++; + } + break; + case TGSI_OPCODE_DROUND: + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { + src0 = getSSA(8); + Value *dst = getSSA(8), *tmp[2]; + tmp[0] = fetchSrc(0, c); + tmp[1] = fetchSrc(0, c + 1); + mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]); + mkCvt(OP_CVT, TYPE_F64, dst, TYPE_F64, src0) + ->rnd = ROUND_NI; + mkSplit(&dst0[c], 4, dst); + c++; + } + break; + case TGSI_OPCODE_DSSG: + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { + src0 = getSSA(8); + Value *dst = getSSA(8), *dstF32 = getSSA(), *tmp[2]; + tmp[0] = fetchSrc(0, c); + tmp[1] = fetchSrc(0, c + 1); + mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]); + + val0 = getScratch(); + val1 = getScratch(); + // The zero is wrong here since it's only 32-bit, but it works out in + // the end since it gets replaced with $r63. + mkCmp(OP_SET, CC_GT, TYPE_F32, val0, TYPE_F64, src0, zero); + mkCmp(OP_SET, CC_LT, TYPE_F32, val1, TYPE_F64, src0, zero); + mkOp2(OP_SUB, TYPE_F32, dstF32, val0, val1); + mkCvt(OP_CVT, TYPE_F64, dst, TYPE_F32, dstF32); + mkSplit(&dst0[c], 4, dst); + c++; + } + break; default: ERROR("unhandled TGSI opcode: %u\n", tgsi.getOpcode()); assert(0); @@ -2721,7 +3305,7 @@ Converter::Converter(Program *ir, const tgsi::Source *code) : BuildUtil(ir), tData.setup(TGSI_FILE_TEMPORARY, 0, 0, tSize, 4, 4, tFile, 0); pData.setup(TGSI_FILE_PREDICATE, 0, 0, pSize, 4, 4, FILE_PREDICATE, 0); - aData.setup(TGSI_FILE_ADDRESS, 0, 0, aSize, 4, 4, FILE_ADDRESS, 0); + aData.setup(TGSI_FILE_ADDRESS, 0, 0, aSize, 4, 4, FILE_GPR, 0); oData.setup(TGSI_FILE_OUTPUT, 0, 0, oSize, 4, 4, FILE_GPR, 0); zero = mkImm((uint32_t)0); @@ -2816,11 +3400,27 @@ Converter::run() clipVtx[c] = getScratch(); } - if (prog->getType() == Program::TYPE_FRAGMENT) { + switch (prog->getType()) { + case Program::TYPE_TESSELLATION_CONTROL: + outBase = mkOp2v( + OP_SUB, TYPE_U32, getSSA(), + mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LANEID, 0)), + mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_INVOCATION_ID, 0))); + break; + case Program::TYPE_FRAGMENT: { Symbol *sv = mkSysVal(SV_POSITION, 3); fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv); mkOp1(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]); + break; } + default: + break; + } + + if (info->io.viewportId >= 0) + viewport = getScratch(); + else + viewport = NULL; for (ip = 0; ip < code->scan.num_instructions; ++ip) { if (!handleInstruction(&code->insns[ip]))