X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fnouveau%2Fcodegen%2Fnv50_ir_from_tgsi.cpp;h=3375c599e7593bf8a4829b1f7fb251ea2bfd85e4;hb=bdf20d324bfec6a6cbabf7492cb4b19f7d9de5ad;hp=735e2891cf25fa575b63ced0055d0e9ba282a406;hpb=7b9a77b905bda3003dc57efb99879499ebc4ba41;p=mesa.git diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp index 735e2891cf2..3375c599e75 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp @@ -20,6 +20,7 @@ * OTHER DEALINGS IN THE SOFTWARE. */ +#include "tgsi/tgsi_build.h" #include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_scan.h" #include "tgsi/tgsi_util.h" @@ -27,8 +28,8 @@ #include #include "codegen/nv50_ir.h" +#include "codegen/nv50_ir_from_common.h" #include "codegen/nv50_ir_util.h" -#include "codegen/nv50_ir_build_util.h" namespace tgsi { @@ -162,6 +163,12 @@ public: return SrcRegister(fdr->Indirect); } + struct tgsi_full_src_register asSrc() + { + assert(fdr); + return tgsi_full_src_register_from_dst(fdr); + } + int getArrayId() const { if (isIndirect(0)) @@ -181,6 +188,7 @@ public: // mask of used components of source s unsigned int srcMask(unsigned int s) const; + unsigned int texOffsetMask() const; SrcRegister getSrc(unsigned int s) const { @@ -214,6 +222,14 @@ public: nv50_ir::TexInstruction::Target getTexture(const Source *, int s) const; + const nv50_ir::TexInstruction::ImgFormatDesc *getImageFormat() const { + return nv50_ir::TexInstruction::translateImgFormat((enum pipe_format)insn->Memory.Format); + } + + nv50_ir::TexTarget getImageTarget() const { + return translateTexture(insn->Memory.Texture); + } + nv50_ir::CacheMode getCacheMode() const { if (!insn->Instruction.Memory) return nv50_ir::CACHE_CA; @@ -233,6 +249,35 @@ private: const struct tgsi_full_instruction *insn; }; +unsigned int Instruction::texOffsetMask() const +{ + const struct tgsi_instruction_texture *tex = &insn->Texture; + assert(insn->Instruction.Texture); + + switch (tex->Texture) { + case TGSI_TEXTURE_BUFFER: + case TGSI_TEXTURE_1D: + case TGSI_TEXTURE_SHADOW1D: + case TGSI_TEXTURE_1D_ARRAY: + case TGSI_TEXTURE_SHADOW1D_ARRAY: + return 0x1; + case TGSI_TEXTURE_2D: + case TGSI_TEXTURE_SHADOW2D: + case TGSI_TEXTURE_2D_ARRAY: + case TGSI_TEXTURE_SHADOW2D_ARRAY: + case TGSI_TEXTURE_RECT: + case TGSI_TEXTURE_SHADOWRECT: + case TGSI_TEXTURE_2D_MSAA: + case TGSI_TEXTURE_2D_ARRAY_MSAA: + return 0x3; + case TGSI_TEXTURE_3D: + return 0x7; + default: + assert(!"Unexpected texture target"); + return 0xf; + } +} + unsigned int Instruction::srcMask(unsigned int s) const { unsigned int mask = insn->Dst[0].Register.WriteMask; @@ -246,7 +291,6 @@ unsigned int Instruction::srcMask(unsigned int s) const case TGSI_OPCODE_DP3: return 0x7; case TGSI_OPCODE_DP4: - case TGSI_OPCODE_DPH: case TGSI_OPCODE_KILL_IF: /* WriteMask ignored */ return 0xf; case TGSI_OPCODE_DST: @@ -258,7 +302,6 @@ unsigned int Instruction::srcMask(unsigned int s) const case TGSI_OPCODE_POW: case TGSI_OPCODE_RCP: case TGSI_OPCODE_RSQ: - case TGSI_OPCODE_SCS: return 0x1; case TGSI_OPCODE_IF: case TGSI_OPCODE_UIF: @@ -274,6 +317,10 @@ unsigned int Instruction::srcMask(unsigned int s) const case TGSI_OPCODE_TXD: case TGSI_OPCODE_TXL: case TGSI_OPCODE_TXP: + case TGSI_OPCODE_TXF: + case TGSI_OPCODE_TG4: + case TGSI_OPCODE_TEX_LZ: + case TGSI_OPCODE_TXF_LZ: case TGSI_OPCODE_LODQ: { const struct tgsi_instruction_texture *tex = &insn->Texture; @@ -282,6 +329,8 @@ unsigned int Instruction::srcMask(unsigned int s) const mask = 0x7; if (insn->Instruction.Opcode != TGSI_OPCODE_TEX && + insn->Instruction.Opcode != TGSI_OPCODE_TEX_LZ && + insn->Instruction.Opcode != TGSI_OPCODE_TXF_LZ && insn->Instruction.Opcode != TGSI_OPCODE_TXD) mask |= 0x8; /* bias, lod or proj */ @@ -308,14 +357,8 @@ unsigned int Instruction::srcMask(unsigned int s) const } } return mask; - case TGSI_OPCODE_XPD: - { - unsigned int x = 0; - if (mask & 1) x |= 0x6; - if (mask & 2) x |= 0x5; - if (mask & 4) x |= 0x3; - return x; - } + case TGSI_OPCODE_TXQ: + return 1; case TGSI_OPCODE_D2I: case TGSI_OPCODE_D2U: case TGSI_OPCODE_D2F: @@ -323,6 +366,14 @@ unsigned int Instruction::srcMask(unsigned int s) const case TGSI_OPCODE_DSGE: case TGSI_OPCODE_DSEQ: case TGSI_OPCODE_DSNE: + case TGSI_OPCODE_U64SEQ: + case TGSI_OPCODE_U64SNE: + case TGSI_OPCODE_I64SLT: + case TGSI_OPCODE_U64SLT: + case TGSI_OPCODE_I64SGE: + case TGSI_OPCODE_U64SGE: + case TGSI_OPCODE_I642F: + case TGSI_OPCODE_U642F: switch (util_bitcount(mask)) { case 1: return 0x3; case 2: return 0xf; @@ -370,10 +421,11 @@ static nv50_ir::DataFile translateFile(uint file) case TGSI_FILE_OUTPUT: return nv50_ir::FILE_SHADER_OUTPUT; case TGSI_FILE_TEMPORARY: return nv50_ir::FILE_GPR; case TGSI_FILE_ADDRESS: return nv50_ir::FILE_ADDRESS; - case TGSI_FILE_PREDICATE: return nv50_ir::FILE_PREDICATE; case TGSI_FILE_IMMEDIATE: return nv50_ir::FILE_IMMEDIATE; case TGSI_FILE_SYSTEM_VALUE: return nv50_ir::FILE_SYSTEM_VALUE; - case TGSI_FILE_BUFFER: return nv50_ir::FILE_MEMORY_GLOBAL; + case TGSI_FILE_BUFFER: return nv50_ir::FILE_MEMORY_BUFFER; + case TGSI_FILE_IMAGE: return nv50_ir::FILE_MEMORY_GLOBAL; + case TGSI_FILE_MEMORY: return nv50_ir::FILE_MEMORY_GLOBAL; case TGSI_FILE_SAMPLER: case TGSI_FILE_NULL: default: @@ -405,6 +457,13 @@ static nv50_ir::SVSemantic translateSysVal(uint sysval) case TGSI_SEMANTIC_BASEVERTEX: return nv50_ir::SV_BASEVERTEX; case TGSI_SEMANTIC_BASEINSTANCE: return nv50_ir::SV_BASEINSTANCE; case TGSI_SEMANTIC_DRAWID: return nv50_ir::SV_DRAWID; + case TGSI_SEMANTIC_WORK_DIM: return nv50_ir::SV_WORK_DIM; + case TGSI_SEMANTIC_SUBGROUP_INVOCATION: return nv50_ir::SV_LANEID; + case TGSI_SEMANTIC_SUBGROUP_EQ_MASK: return nv50_ir::SV_LANEMASK_EQ; + case TGSI_SEMANTIC_SUBGROUP_LT_MASK: return nv50_ir::SV_LANEMASK_LT; + case TGSI_SEMANTIC_SUBGROUP_LE_MASK: return nv50_ir::SV_LANEMASK_LE; + case TGSI_SEMANTIC_SUBGROUP_GT_MASK: return nv50_ir::SV_LANEMASK_GT; + case TGSI_SEMANTIC_SUBGROUP_GE_MASK: return nv50_ir::SV_LANEMASK_GE; default: assert(0); return nv50_ir::SV_CLOCK; @@ -463,6 +522,7 @@ nv50_ir::DataType Instruction::inferSrcType() const case TGSI_OPCODE_SHL: case TGSI_OPCODE_U2F: case TGSI_OPCODE_U2D: + case TGSI_OPCODE_U2I64: case TGSI_OPCODE_UADD: case TGSI_OPCODE_UDIV: case TGSI_OPCODE_UMOD: @@ -484,12 +544,18 @@ nv50_ir::DataType Instruction::inferSrcType() const case TGSI_OPCODE_ATOMXOR: case TGSI_OPCODE_ATOMUMIN: case TGSI_OPCODE_ATOMUMAX: + case TGSI_OPCODE_ATOMDEC_WRAP: + case TGSI_OPCODE_ATOMINC_WRAP: case TGSI_OPCODE_UBFE: case TGSI_OPCODE_UMSB: case TGSI_OPCODE_UP2H: + case TGSI_OPCODE_VOTE_ALL: + case TGSI_OPCODE_VOTE_ANY: + case TGSI_OPCODE_VOTE_EQ: return nv50_ir::TYPE_U32; case TGSI_OPCODE_I2F: case TGSI_OPCODE_I2D: + case TGSI_OPCODE_I2I64: case TGSI_OPCODE_IDIV: case TGSI_OPCODE_IMUL_HI: case TGSI_OPCODE_IMAX: @@ -500,7 +566,6 @@ nv50_ir::DataType Instruction::inferSrcType() const case TGSI_OPCODE_ISHR: case TGSI_OPCODE_ISLT: case TGSI_OPCODE_ISSG: - case TGSI_OPCODE_SAD: // not sure about SAD, but no one has a float version case TGSI_OPCODE_MOD: case TGSI_OPCODE_UARL: case TGSI_OPCODE_ATOMIMIN: @@ -511,10 +576,13 @@ nv50_ir::DataType Instruction::inferSrcType() const case TGSI_OPCODE_D2F: case TGSI_OPCODE_D2I: case TGSI_OPCODE_D2U: + case TGSI_OPCODE_D2I64: + case TGSI_OPCODE_D2U64: case TGSI_OPCODE_DABS: case TGSI_OPCODE_DNEG: case TGSI_OPCODE_DADD: case TGSI_OPCODE_DMUL: + case TGSI_OPCODE_DDIV: case TGSI_OPCODE_DMAX: case TGSI_OPCODE_DMIN: case TGSI_OPCODE_DSLT: @@ -524,6 +592,7 @@ nv50_ir::DataType Instruction::inferSrcType() const case TGSI_OPCODE_DRCP: case TGSI_OPCODE_DSQRT: case TGSI_OPCODE_DMAD: + case TGSI_OPCODE_DFMA: case TGSI_OPCODE_DFRAC: case TGSI_OPCODE_DRSQ: case TGSI_OPCODE_DTRUNC: @@ -531,6 +600,34 @@ nv50_ir::DataType Instruction::inferSrcType() const case TGSI_OPCODE_DFLR: case TGSI_OPCODE_DROUND: return nv50_ir::TYPE_F64; + case TGSI_OPCODE_U64SEQ: + case TGSI_OPCODE_U64SNE: + case TGSI_OPCODE_U64SLT: + case TGSI_OPCODE_U64SGE: + case TGSI_OPCODE_U64MIN: + case TGSI_OPCODE_U64MAX: + case TGSI_OPCODE_U64ADD: + case TGSI_OPCODE_U64MUL: + case TGSI_OPCODE_U64SHL: + case TGSI_OPCODE_U64SHR: + case TGSI_OPCODE_U64DIV: + case TGSI_OPCODE_U64MOD: + case TGSI_OPCODE_U642F: + case TGSI_OPCODE_U642D: + return nv50_ir::TYPE_U64; + case TGSI_OPCODE_I64ABS: + case TGSI_OPCODE_I64SSG: + case TGSI_OPCODE_I64NEG: + case TGSI_OPCODE_I64SLT: + case TGSI_OPCODE_I64SGE: + case TGSI_OPCODE_I64MIN: + case TGSI_OPCODE_I64MAX: + case TGSI_OPCODE_I64SHR: + case TGSI_OPCODE_I64DIV: + case TGSI_OPCODE_I64MOD: + case TGSI_OPCODE_I642F: + case TGSI_OPCODE_I642D: + return nv50_ir::TYPE_S64; default: return nv50_ir::TYPE_F32; } @@ -551,17 +648,35 @@ nv50_ir::DataType Instruction::inferDstType() const case TGSI_OPCODE_DSGE: case TGSI_OPCODE_DSLT: case TGSI_OPCODE_DSNE: + case TGSI_OPCODE_I64SLT: + case TGSI_OPCODE_I64SGE: + case TGSI_OPCODE_U64SEQ: + case TGSI_OPCODE_U64SNE: + case TGSI_OPCODE_U64SLT: + case TGSI_OPCODE_U64SGE: case TGSI_OPCODE_PK2H: return nv50_ir::TYPE_U32; case TGSI_OPCODE_I2F: case TGSI_OPCODE_U2F: case TGSI_OPCODE_D2F: + case TGSI_OPCODE_I642F: + case TGSI_OPCODE_U642F: case TGSI_OPCODE_UP2H: return nv50_ir::TYPE_F32; case TGSI_OPCODE_I2D: case TGSI_OPCODE_U2D: case TGSI_OPCODE_F2D: + case TGSI_OPCODE_I642D: + case TGSI_OPCODE_U642D: return nv50_ir::TYPE_F64; + case TGSI_OPCODE_I2I64: + case TGSI_OPCODE_U2I64: + case TGSI_OPCODE_F2I64: + case TGSI_OPCODE_D2I64: + return nv50_ir::TYPE_S64; + case TGSI_OPCODE_F2U64: + case TGSI_OPCODE_D2U64: + return nv50_ir::TYPE_U64; default: return inferSrcType(); } @@ -577,6 +692,8 @@ nv50_ir::CondCode Instruction::getSetCond() const case TGSI_OPCODE_USLT: case TGSI_OPCODE_FSLT: case TGSI_OPCODE_DSLT: + case TGSI_OPCODE_I64SLT: + case TGSI_OPCODE_U64SLT: return CC_LT; case TGSI_OPCODE_SLE: return CC_LE; @@ -585,6 +702,8 @@ nv50_ir::CondCode Instruction::getSetCond() const case TGSI_OPCODE_USGE: case TGSI_OPCODE_FSGE: case TGSI_OPCODE_DSGE: + case TGSI_OPCODE_I64SGE: + case TGSI_OPCODE_U64SGE: return CC_GE; case TGSI_OPCODE_SGT: return CC_GT; @@ -592,10 +711,12 @@ nv50_ir::CondCode Instruction::getSetCond() const case TGSI_OPCODE_USEQ: case TGSI_OPCODE_FSEQ: case TGSI_OPCODE_DSEQ: + case TGSI_OPCODE_U64SEQ: return CC_EQ; case TGSI_OPCODE_SNE: case TGSI_OPCODE_FSNE: case TGSI_OPCODE_DSNE: + case TGSI_OPCODE_U64SNE: return CC_NEU; case TGSI_OPCODE_USNE: return CC_NE; @@ -614,6 +735,7 @@ static nv50_ir::operation translateOpcode(uint opcode) NV50_IR_OPCODE_CASE(RCP, RCP); NV50_IR_OPCODE_CASE(RSQ, RSQ); + NV50_IR_OPCODE_CASE(SQRT, SQRT); NV50_IR_OPCODE_CASE(MUL, MUL); NV50_IR_OPCODE_CASE(ADD, ADD); @@ -623,7 +745,7 @@ static nv50_ir::operation translateOpcode(uint opcode) NV50_IR_OPCODE_CASE(SLT, SET); NV50_IR_OPCODE_CASE(SGE, SET); NV50_IR_OPCODE_CASE(MAD, MAD); - NV50_IR_OPCODE_CASE(SUB, SUB); + NV50_IR_OPCODE_CASE(FMA, FMA); NV50_IR_OPCODE_CASE(FLR, FLOOR); NV50_IR_OPCODE_CASE(ROUND, CVT); @@ -631,14 +753,13 @@ static nv50_ir::operation translateOpcode(uint opcode) NV50_IR_OPCODE_CASE(LG2, LG2); NV50_IR_OPCODE_CASE(POW, POW); - NV50_IR_OPCODE_CASE(ABS, ABS); - NV50_IR_OPCODE_CASE(COS, COS); NV50_IR_OPCODE_CASE(DDX, DFDX); NV50_IR_OPCODE_CASE(DDX_FINE, DFDX); NV50_IR_OPCODE_CASE(DDY, DFDY); NV50_IR_OPCODE_CASE(DDY_FINE, DFDY); NV50_IR_OPCODE_CASE(KILL, DISCARD); + NV50_IR_OPCODE_CASE(DEMOTE, DISCARD); NV50_IR_OPCODE_CASE(SEQ, SET); NV50_IR_OPCODE_CASE(SGT, SET); @@ -658,6 +779,7 @@ static nv50_ir::operation translateOpcode(uint opcode) NV50_IR_OPCODE_CASE(DIV, DIV); NV50_IR_OPCODE_CASE(TXL, TXL); + NV50_IR_OPCODE_CASE(TEX_LZ, TXL); NV50_IR_OPCODE_CASE(CEIL, CEIL); NV50_IR_OPCODE_CASE(I2F, CVT); @@ -669,8 +791,8 @@ static nv50_ir::operation translateOpcode(uint opcode) NV50_IR_OPCODE_CASE(OR, OR); NV50_IR_OPCODE_CASE(MOD, MOD); NV50_IR_OPCODE_CASE(XOR, XOR); - NV50_IR_OPCODE_CASE(SAD, SAD); NV50_IR_OPCODE_CASE(TXF, TXF); + NV50_IR_OPCODE_CASE(TXF_LZ, TXF); NV50_IR_OPCODE_CASE(TXQ, TXQ); NV50_IR_OPCODE_CASE(TXQS, TXQ); NV50_IR_OPCODE_CASE(TG4, TXG); @@ -713,6 +835,7 @@ static nv50_ir::operation translateOpcode(uint opcode) NV50_IR_OPCODE_CASE(DNEG, NEG); NV50_IR_OPCODE_CASE(DADD, ADD); NV50_IR_OPCODE_CASE(DMUL, MUL); + NV50_IR_OPCODE_CASE(DDIV, DIV); NV50_IR_OPCODE_CASE(DMAX, MAX); NV50_IR_OPCODE_CASE(DMIN, MIN); NV50_IR_OPCODE_CASE(DSLT, SET); @@ -722,6 +845,7 @@ static nv50_ir::operation translateOpcode(uint opcode) NV50_IR_OPCODE_CASE(DRCP, RCP); NV50_IR_OPCODE_CASE(DSQRT, SQRT); NV50_IR_OPCODE_CASE(DMAD, MAD); + NV50_IR_OPCODE_CASE(DFMA, FMA); NV50_IR_OPCODE_CASE(D2I, CVT); NV50_IR_OPCODE_CASE(D2U, CVT); NV50_IR_OPCODE_CASE(I2D, CVT); @@ -732,6 +856,35 @@ static nv50_ir::operation translateOpcode(uint opcode) NV50_IR_OPCODE_CASE(DFLR, FLOOR); NV50_IR_OPCODE_CASE(DROUND, CVT); + NV50_IR_OPCODE_CASE(U64SEQ, SET); + NV50_IR_OPCODE_CASE(U64SNE, SET); + NV50_IR_OPCODE_CASE(U64SLT, SET); + NV50_IR_OPCODE_CASE(U64SGE, SET); + NV50_IR_OPCODE_CASE(I64SLT, SET); + NV50_IR_OPCODE_CASE(I64SGE, SET); + NV50_IR_OPCODE_CASE(I2I64, CVT); + NV50_IR_OPCODE_CASE(U2I64, CVT); + NV50_IR_OPCODE_CASE(F2I64, CVT); + NV50_IR_OPCODE_CASE(F2U64, CVT); + NV50_IR_OPCODE_CASE(D2I64, CVT); + NV50_IR_OPCODE_CASE(D2U64, CVT); + NV50_IR_OPCODE_CASE(I642F, CVT); + NV50_IR_OPCODE_CASE(U642F, CVT); + NV50_IR_OPCODE_CASE(I642D, CVT); + NV50_IR_OPCODE_CASE(U642D, CVT); + + NV50_IR_OPCODE_CASE(I64MIN, MIN); + NV50_IR_OPCODE_CASE(U64MIN, MIN); + NV50_IR_OPCODE_CASE(I64MAX, MAX); + NV50_IR_OPCODE_CASE(U64MAX, MAX); + NV50_IR_OPCODE_CASE(I64ABS, ABS); + NV50_IR_OPCODE_CASE(I64NEG, NEG); + NV50_IR_OPCODE_CASE(U64ADD, ADD); + NV50_IR_OPCODE_CASE(U64MUL, MUL); + NV50_IR_OPCODE_CASE(U64SHL, SHL); + NV50_IR_OPCODE_CASE(I64SHR, SHR); + NV50_IR_OPCODE_CASE(U64SHR, SHR); + NV50_IR_OPCODE_CASE(IMUL_HI, MUL); NV50_IR_OPCODE_CASE(UMUL_HI, MUL); @@ -756,6 +909,9 @@ static nv50_ir::operation translateOpcode(uint opcode) NV50_IR_OPCODE_CASE(ATOMUMAX, ATOM); NV50_IR_OPCODE_CASE(ATOMIMIN, ATOM); NV50_IR_OPCODE_CASE(ATOMIMAX, ATOM); + NV50_IR_OPCODE_CASE(ATOMFADD, ATOM); + NV50_IR_OPCODE_CASE(ATOMDEC_WRAP, ATOM); + NV50_IR_OPCODE_CASE(ATOMINC_WRAP, ATOM); NV50_IR_OPCODE_CASE(TEX2, TEX); NV50_IR_OPCODE_CASE(TXB2, TXB); @@ -770,6 +926,14 @@ static nv50_ir::operation translateOpcode(uint opcode) NV50_IR_OPCODE_CASE(IMSB, BFIND); NV50_IR_OPCODE_CASE(UMSB, BFIND); + NV50_IR_OPCODE_CASE(VOTE_ALL, VOTE); + NV50_IR_OPCODE_CASE(VOTE_ANY, VOTE); + NV50_IR_OPCODE_CASE(VOTE_EQ, VOTE); + + NV50_IR_OPCODE_CASE(BALLOT, VOTE); + NV50_IR_OPCODE_CASE(READ_INVOC, SHFL); + NV50_IR_OPCODE_CASE(READ_FIRST, SHFL); + NV50_IR_OPCODE_CASE(END, EXIT); default: @@ -780,9 +944,6 @@ static nv50_ir::operation translateOpcode(uint opcode) static uint16_t opcodeToSubOp(uint opcode) { switch (opcode) { - case TGSI_OPCODE_LFENCE: return NV50_IR_SUBOP_MEMBAR(L, GL); - case TGSI_OPCODE_SFENCE: return NV50_IR_SUBOP_MEMBAR(S, GL); - case TGSI_OPCODE_MFENCE: return NV50_IR_SUBOP_MEMBAR(M, GL); case TGSI_OPCODE_ATOMUADD: return NV50_IR_SUBOP_ATOM_ADD; case TGSI_OPCODE_ATOMXCHG: return NV50_IR_SUBOP_ATOM_EXCH; case TGSI_OPCODE_ATOMCAS: return NV50_IR_SUBOP_ATOM_CAS; @@ -793,9 +954,15 @@ static uint16_t opcodeToSubOp(uint opcode) case TGSI_OPCODE_ATOMIMIN: return NV50_IR_SUBOP_ATOM_MIN; case TGSI_OPCODE_ATOMUMAX: return NV50_IR_SUBOP_ATOM_MAX; case TGSI_OPCODE_ATOMIMAX: return NV50_IR_SUBOP_ATOM_MAX; + case TGSI_OPCODE_ATOMFADD: return NV50_IR_SUBOP_ATOM_ADD; + case TGSI_OPCODE_ATOMDEC_WRAP: return NV50_IR_SUBOP_ATOM_DEC; + case TGSI_OPCODE_ATOMINC_WRAP: return NV50_IR_SUBOP_ATOM_INC; case TGSI_OPCODE_IMUL_HI: case TGSI_OPCODE_UMUL_HI: return NV50_IR_SUBOP_MUL_HIGH; + case TGSI_OPCODE_VOTE_ALL: return NV50_IR_SUBOP_VOTE_ALL; + case TGSI_OPCODE_VOTE_ANY: return NV50_IR_SUBOP_VOTE_ANY; + case TGSI_OPCODE_VOTE_EQ: return NV50_IR_SUBOP_VOTE_UNI; default: return 0; } @@ -851,17 +1018,29 @@ public: }; std::vector textureViews; + /* struct Resource { uint8_t target; // TGSI_TEXTURE_* bool raw; uint8_t slot; // $surface index }; std::vector resources; + */ + + struct MemoryFile { + uint8_t mem_type; // TGSI_MEMORY_TYPE_* + }; + std::vector memoryFiles; + + std::vector bufferAtomics; private: int inferSysValDirection(unsigned sn) const; bool scanDeclaration(const struct tgsi_full_declaration *); bool scanInstruction(const struct tgsi_full_instruction *); + void scanInstructionSrc(const Instruction& insn, + const Instruction::SrcRegister& src, + unsigned mask); void scanProperty(const struct tgsi_full_property *); void scanImmediate(const struct tgsi_full_immediate *); @@ -904,6 +1083,8 @@ bool Source::scanSource() textureViews.resize(scan.file_max[TGSI_FILE_SAMPLER_VIEW] + 1); //resources.resize(scan.file_max[TGSI_FILE_RESOURCE] + 1); tempArrayId.resize(scan.file_max[TGSI_FILE_TEMPORARY] + 1); + memoryFiles.resize(scan.file_max[TGSI_FILE_MEMORY] + 1); + bufferAtomics.resize(scan.file_max[TGSI_FILE_BUFFER] + 1); info->immd.bufSize = 0; @@ -913,7 +1094,7 @@ bool Source::scanSource() if (info->type == PIPE_SHADER_FRAGMENT) { info->prop.fp.writesDepth = scan.writes_z; - info->prop.fp.usesDiscard = scan.uses_kill; + info->prop.fp.usesDiscard = scan.uses_kill || info->io.alphaRefBase; } else if (info->type == PIPE_SHADER_GEOMETRY) { info->prop.gp.instanceCount = 1; // default value @@ -997,6 +1178,7 @@ void Source::scanProperty(const struct tgsi_full_property *prop) break; case TGSI_PROPERTY_FS_COORD_ORIGIN: case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER: + case TGSI_PROPERTY_FS_DEPTH_LAYOUT: // we don't care break; case TGSI_PROPERTY_VS_PROHIBIT_UCPS: @@ -1020,12 +1202,33 @@ void Source::scanProperty(const struct tgsi_full_property *prop) else info->prop.tp.outputPrim = PIPE_PRIM_TRIANGLES; /* anything but points */ break; + case TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH: + info->prop.cp.numThreads[0] = prop->u[0].Data; + break; + case TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT: + info->prop.cp.numThreads[1] = prop->u[0].Data; + break; + case TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH: + info->prop.cp.numThreads[2] = prop->u[0].Data; + break; case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED: info->io.clipDistances = prop->u[0].Data; break; case TGSI_PROPERTY_NUM_CULLDIST_ENABLED: info->io.cullDistances = prop->u[0].Data; break; + case TGSI_PROPERTY_NEXT_SHADER: + /* Do not need to know the next shader stage. */ + break; + case TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL: + info->prop.fp.earlyFragTests = prop->u[0].Data; + break; + case TGSI_PROPERTY_FS_POST_DEPTH_COVERAGE: + info->prop.fp.postDepthCoverage = prop->u[0].Data; + break; + case TGSI_PROPERTY_MUL_ZERO_WINS: + info->io.mul_zero_wins = prop->u[0].Data; + break; default: INFO("unhandled TGSI property %d\n", prop->Property.PropertyName); break; @@ -1075,7 +1278,7 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl) si = decl->Semantic.Index; } - if (decl->Declaration.Local) { + if (decl->Declaration.Local || decl->Declaration.File == TGSI_FILE_ADDRESS) { for (i = first; i <= last; ++i) { for (c = 0; c < 4; ++c) { locals.insert( @@ -1175,15 +1378,18 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl) case TGSI_SEMANTIC_VERTEXID: info->io.vertexId = first; break; - case TGSI_SEMANTIC_SAMPLEID: - case TGSI_SEMANTIC_SAMPLEPOS: - info->prop.fp.sampleInterp = 1; - break; case TGSI_SEMANTIC_BASEVERTEX: case TGSI_SEMANTIC_BASEINSTANCE: case TGSI_SEMANTIC_DRAWID: info->prop.vp.usesDrawParameters = true; break; + case TGSI_SEMANTIC_SAMPLEID: + case TGSI_SEMANTIC_SAMPLEPOS: + info->prop.fp.persampleInvocation = true; + break; + case TGSI_SEMANTIC_SAMPLEMASK: + info->prop.fp.usesSampleMaskIn = true; + break; default: break; } @@ -1213,6 +1419,11 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl) for (i = first; i <= last; ++i) textureViews[i].target = decl->SamplerView.Resource; break; + case TGSI_FILE_MEMORY: + for (i = first; i <= last; ++i) + memoryFiles[i].mem_type = decl->Declaration.MemType; + break; + case TGSI_FILE_NULL: case TGSI_FILE_TEMPORARY: for (i = first; i <= last; ++i) tempArrayId[i] = arrayId; @@ -1220,13 +1431,15 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl) tempArrayInfo.insert(std::make_pair(arrayId, std::make_pair( first, last - first + 1))); break; - case TGSI_FILE_NULL: + case TGSI_FILE_BUFFER: + for (i = first; i <= last; ++i) + bufferAtomics[i] = decl->Declaration.Atomic; + break; case TGSI_FILE_ADDRESS: case TGSI_FILE_CONSTANT: case TGSI_FILE_IMMEDIATE: - case TGSI_FILE_PREDICATE: case TGSI_FILE_SAMPLER: - case TGSI_FILE_BUFFER: + case TGSI_FILE_IMAGE: break; default: ERROR("unhandled TGSI_FILE %d\n", decl->Declaration.File); @@ -1242,6 +1455,58 @@ inline bool Source::isEdgeFlagPassthrough(const Instruction& insn) const insn.getSrc(0).getFile() == TGSI_FILE_INPUT; } +void Source::scanInstructionSrc(const Instruction& insn, + const Instruction::SrcRegister& src, + unsigned mask) +{ + if (src.getFile() == TGSI_FILE_TEMPORARY) { + if (src.isIndirect(0)) + indirectTempArrays.insert(src.getArrayId()); + } else + if (src.getFile() == TGSI_FILE_OUTPUT) { + if (src.isIndirect(0)) { + // We don't know which one is accessed, just mark everything for + // reading. This is an extremely unlikely occurrence. + for (unsigned i = 0; i < info->numOutputs; ++i) + info->out[i].oread = 1; + } else { + info->out[src.getIndex(0)].oread = 1; + } + } + if (src.getFile() == TGSI_FILE_SYSTEM_VALUE) { + if (info->sv[src.getIndex(0)].sn == TGSI_SEMANTIC_SAMPLEPOS) + info->prop.fp.readsSampleLocations = true; + } + if (src.getFile() != TGSI_FILE_INPUT) + return; + + if (src.isIndirect(0)) { + for (unsigned i = 0; i < info->numInputs; ++i) + info->in[i].mask = 0xf; + } else { + const int i = src.getIndex(0); + for (unsigned c = 0; c < 4; ++c) { + if (!(mask & (1 << c))) + continue; + int k = src.getSwizzle(c); + if (k <= TGSI_SWIZZLE_W) + info->in[i].mask |= 1 << k; + } + switch (info->in[i].sn) { + case TGSI_SEMANTIC_PSIZE: + case TGSI_SEMANTIC_PRIMID: + case TGSI_SEMANTIC_FOG: + info->in[i].mask &= 0x1; + break; + case TGSI_SEMANTIC_PCOORD: + info->in[i].mask &= 0x3; + break; + default: + break; + } + } +} + bool Source::scanInstruction(const struct tgsi_full_instruction *inst) { Instruction insn(inst); @@ -1249,10 +1514,30 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst) if (insn.getOpcode() == TGSI_OPCODE_BARRIER) info->numBarriers = 1; + if (insn.getOpcode() == TGSI_OPCODE_FBFETCH) + info->prop.fp.readsFramebuffer = true; + + if (insn.getOpcode() == TGSI_OPCODE_INTERP_SAMPLE) + info->prop.fp.readsSampleLocations = true; + + if (insn.getOpcode() == TGSI_OPCODE_DEMOTE) + info->prop.fp.usesDiscard = true; + if (insn.dstCount()) { - if (insn.getDst(0).getFile() == TGSI_FILE_OUTPUT) { - Instruction::DstRegister dst = insn.getDst(0); + Instruction::DstRegister dst = insn.getDst(0); + + if (insn.getOpcode() == TGSI_OPCODE_STORE && + dst.getFile() != TGSI_FILE_MEMORY) { + info->io.globalAccess |= 0x2; + if (dst.getFile() == TGSI_FILE_INPUT) { + // TODO: Handle indirect somehow? + const int i = dst.getIndex(0); + info->in[i].mask |= 1; + } + } + + if (dst.getFile() == TGSI_FILE_OUTPUT) { if (dst.isIndirect(0)) for (unsigned i = 0; i < info->numOutputs; ++i) info->out[i].mask = 0xf; @@ -1269,65 +1554,50 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst) if (isEdgeFlagPassthrough(insn)) info->io.edgeFlagIn = insn.getSrc(0).getIndex(0); } else - if (insn.getDst(0).getFile() == TGSI_FILE_TEMPORARY) { - if (insn.getDst(0).isIndirect(0)) - indirectTempArrays.insert(insn.getDst(0).getArrayId()); + if (dst.getFile() == TGSI_FILE_TEMPORARY) { + if (dst.isIndirect(0)) + indirectTempArrays.insert(dst.getArrayId()); } else - if (insn.getDst(0).getFile() == TGSI_FILE_BUFFER) { + if (dst.getFile() == TGSI_FILE_BUFFER || + dst.getFile() == TGSI_FILE_IMAGE || + (dst.getFile() == TGSI_FILE_MEMORY && + memoryFiles[dst.getIndex(0)].mem_type == TGSI_MEMORY_TYPE_GLOBAL)) { info->io.globalAccess |= 0x2; } } - for (unsigned s = 0; s < insn.srcCount(); ++s) { - Instruction::SrcRegister src = insn.getSrc(s); - if (src.getFile() == TGSI_FILE_TEMPORARY) { - if (src.isIndirect(0)) - indirectTempArrays.insert(src.getArrayId()); - } else - if (src.getFile() == TGSI_FILE_BUFFER) { + if (insn.srcCount() && ( + insn.getSrc(0).getFile() != TGSI_FILE_MEMORY || + memoryFiles[insn.getSrc(0).getIndex(0)].mem_type == + TGSI_MEMORY_TYPE_GLOBAL)) { + switch (insn.getOpcode()) { + case TGSI_OPCODE_ATOMUADD: + case TGSI_OPCODE_ATOMXCHG: + case TGSI_OPCODE_ATOMCAS: + case TGSI_OPCODE_ATOMAND: + case TGSI_OPCODE_ATOMOR: + case TGSI_OPCODE_ATOMXOR: + case TGSI_OPCODE_ATOMUMIN: + case TGSI_OPCODE_ATOMIMIN: + case TGSI_OPCODE_ATOMUMAX: + case TGSI_OPCODE_ATOMIMAX: + case TGSI_OPCODE_ATOMFADD: + case TGSI_OPCODE_ATOMDEC_WRAP: + case TGSI_OPCODE_ATOMINC_WRAP: + case TGSI_OPCODE_LOAD: info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ? - 0x1 : 0x2; - } else - if (src.getFile() == TGSI_FILE_OUTPUT) { - if (src.isIndirect(0)) { - // We don't know which one is accessed, just mark everything for - // reading. This is an extremely unlikely occurrence. - for (unsigned i = 0; i < info->numOutputs; ++i) - info->out[i].oread = 1; - } else { - info->out[src.getIndex(0)].oread = 1; - } - } - if (src.getFile() != TGSI_FILE_INPUT) - continue; - unsigned mask = insn.srcMask(s); - - if (src.isIndirect(0)) { - for (unsigned i = 0; i < info->numInputs; ++i) - info->in[i].mask = 0xf; - } else { - const int i = src.getIndex(0); - for (unsigned c = 0; c < 4; ++c) { - if (!(mask & (1 << c))) - continue; - int k = src.getSwizzle(c); - if (k <= TGSI_SWIZZLE_W) - info->in[i].mask |= 1 << k; - } - switch (info->in[i].sn) { - case TGSI_SEMANTIC_PSIZE: - case TGSI_SEMANTIC_PRIMID: - case TGSI_SEMANTIC_FOG: - info->in[i].mask &= 0x1; - break; - case TGSI_SEMANTIC_PCOORD: - info->in[i].mask &= 0x3; - break; - default: - break; - } + 0x1 : 0x2; + break; } } + + + for (unsigned s = 0; s < insn.srcCount(); ++s) + scanInstructionSrc(insn, insn.getSrc(s), insn.srcMask(s)); + + for (unsigned s = 0; s < insn.getNumTexOffsets(); ++s) + scanInstructionSrc(insn, insn.getTexOffset(s), insn.texOffsetMask()); + return true; } @@ -1357,7 +1627,7 @@ namespace { using namespace nv50_ir; -class Converter : public BuildUtil +class Converter : public ConverterCommon { public: Converter(Program *, const tgsi::Source *); @@ -1366,18 +1636,12 @@ public: bool run(); private: - struct Subroutine - { - Subroutine(Function *f) : f(f) { } - Function *f; - ValueMap values; - }; - Value *shiftAddress(Value *); Value *getVertexBase(int s); Value *getOutputBase(int s); DataArray *getArrayForFile(unsigned file, int idx); Value *fetchSrc(int s, int c); + Value *fetchDst(int d, int c); Value *acquireDst(int d, int c); void storeDst(int d, int c, Value *); @@ -1392,10 +1656,10 @@ private: Symbol *srcToSym(tgsi::Instruction::SrcRegister, int c); Symbol *dstToSym(tgsi::Instruction::DstRegister, int c); + bool isSubGroupMask(uint8_t semantic); + bool handleInstruction(const struct tgsi_full_instruction *); void exportOutputs(); - inline Subroutine *getSubroutine(unsigned ip); - inline Subroutine *getSubroutine(Function *); inline bool isEndOfSubroutine(uint ip); void loadProjTexCoords(Value *dst[4], Value *src[4], unsigned int mask); @@ -1405,11 +1669,11 @@ private: void handleTEX(Value *dst0[4], int R, int S, int L, int C, int Dx, int Dy); void handleTXF(Value *dst0[4], int R, int L_M); void handleTXQ(Value *dst0[4], enum TexQuery, int R); + void handleFBFETCH(Value *dst0[4]); void handleLIT(Value *dst0[4]); - void handleUserClipPlanes(); - Symbol *getResourceBase(int r); - void getResourceCoords(std::vector&, int r, int s); + // Symbol *getResourceBase(int r); + void getImageCoords(std::vector&, int s); void handleLOAD(Value *dst0[4]); void handleSTORE(); @@ -1417,8 +1681,6 @@ private: void handleINTERP(Value *dst0[4]); - uint8_t translateInterpMode(const struct nv50_ir_varying *var, - operation& op); Value *interpolate(tgsi::Instruction::SrcRegister, int c, Value *ptr); void insertConvergenceOps(BasicBlock *conv, BasicBlock *fork); @@ -1450,12 +1712,6 @@ private: private: const tgsi::Source *code; - const struct nv50_ir_prog_info *info; - - struct { - std::map map; - Subroutine *cur; - } sub; uint ip; // instruction pointer @@ -1467,18 +1723,13 @@ private: DataArray tData; // TGSI_FILE_TEMPORARY DataArray lData; // TGSI_FILE_TEMPORARY, for indirect arrays DataArray aData; // TGSI_FILE_ADDRESS - DataArray pData; // TGSI_FILE_PREDICATE DataArray oData; // TGSI_FILE_OUTPUT (if outputs in registers) Value *zero; - Value *fragCoord[4]; - Value *clipVtx[4]; Value *vtxBase[5]; // base address of vertex in primitive (for TP/GP) uint8_t vtxBaseValid; - Value *outBase; // base address of vertex out patch (for TCP) - Stack condBBs; // fork BB, then else clause BB Stack joinBBs; // fork BB, for inserting join ops on ENDIF Stack loopBBs; // loop headers @@ -1516,6 +1767,26 @@ Converter::makeSym(uint tgsiFile, int fileIdx, int idx, int c, uint32_t address) sym->reg.fileIndex = fileIdx; + if (tgsiFile == TGSI_FILE_MEMORY) { + switch (code->memoryFiles[fileIdx].mem_type) { + case TGSI_MEMORY_TYPE_GLOBAL: + /* No-op this is the default for TGSI_FILE_MEMORY */ + sym->setFile(FILE_MEMORY_GLOBAL); + break; + case TGSI_MEMORY_TYPE_SHARED: + sym->setFile(FILE_MEMORY_SHARED); + break; + case TGSI_MEMORY_TYPE_INPUT: + assert(prog->getType() == Program::TYPE_COMPUTE); + assert(idx == -1); + sym->setFile(FILE_SHADER_INPUT); + address += info->prop.cp.inputOffset; + break; + default: + assert(0); /* TODO: Add support for global and private memory */ + } + } + if (idx >= 0) { if (sym->reg.file == FILE_SHADER_INPUT) sym->setOffset(info->in[idx].slot[c] * 4); @@ -1533,29 +1804,6 @@ Converter::makeSym(uint tgsiFile, int fileIdx, int idx, int c, uint32_t address) return sym; } -uint8_t -Converter::translateInterpMode(const struct nv50_ir_varying *var, operation& op) -{ - uint8_t mode = NV50_IR_INTERP_PERSPECTIVE; - - if (var->flat) - mode = NV50_IR_INTERP_FLAT; - else - if (var->linear) - mode = NV50_IR_INTERP_LINEAR; - else - if (var->sc) - mode = NV50_IR_INTERP_SC; - - op = (mode == NV50_IR_INTERP_PERSPECTIVE || mode == NV50_IR_INTERP_SC) - ? OP_PINTERP : OP_LINTERP; - - if (var->centroid || info->prop.fp.sampleInterp) - mode |= NV50_IR_INTERP_CENTROID; - - return mode; -} - Value * Converter::interpolate(tgsi::Instruction::SrcRegister src, int c, Value *ptr) { @@ -1664,14 +1912,53 @@ Converter::fetchSrc(int s, int c) return applySrcMod(res, s, c); } +Value * +Converter::fetchDst(int d, int c) +{ + Value *res; + Value *ptr = NULL, *dimRel = NULL; + + tgsi::Instruction::DstRegister dst = tgsi.getDst(d); + + if (dst.isIndirect(0)) + ptr = fetchSrc(dst.getIndirect(0), 0, NULL); + + if (dst.is2D()) { + switch (dst.getFile()) { + case TGSI_FILE_OUTPUT: + assert(0); // TODO + dimRel = NULL; + break; + case TGSI_FILE_INPUT: + assert(0); // TODO + dimRel = NULL; + break; + case TGSI_FILE_CONSTANT: + // on NVC0, this is valid and c{I+J}[k] == cI[(J << 16) + k] + if (dst.isIndirect(1)) + dimRel = fetchSrc(dst.getIndirect(1), 0, 0); + break; + default: + break; + } + } + + struct tgsi_full_src_register fsr = dst.asSrc(); + tgsi::Instruction::SrcRegister src(&fsr); + res = fetchSrc(src, c, ptr); + + if (dimRel) + res->getInsn()->setIndirect(0, 1, dimRel); + + return res; +} + Converter::DataArray * Converter::getArrayForFile(unsigned file, int idx) { switch (file) { case TGSI_FILE_TEMPORARY: return idx == 0 ? &tData : &lData; - case TGSI_FILE_PREDICATE: - return &pData; case TGSI_FILE_ADDRESS: return &aData; case TGSI_FILE_OUTPUT: @@ -1703,6 +1990,21 @@ Converter::adjustTempIndex(int arrayId, int &idx, int &idx2d) const idx += it->second; } +bool +Converter::isSubGroupMask(uint8_t semantic) +{ + switch (semantic) { + case TGSI_SEMANTIC_SUBGROUP_EQ_MASK: + case TGSI_SEMANTIC_SUBGROUP_LT_MASK: + case TGSI_SEMANTIC_SUBGROUP_LE_MASK: + case TGSI_SEMANTIC_SUBGROUP_GT_MASK: + case TGSI_SEMANTIC_SUBGROUP_GE_MASK: + return true; + default: + return false; + } +} + Value * Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr) { @@ -1745,6 +2047,13 @@ Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr) return ld->getDef(0); case TGSI_FILE_SYSTEM_VALUE: assert(!ptr); + if (info->sv[idx].sn == TGSI_SEMANTIC_THREAD_ID && + info->prop.cp.numThreads[swz] == 1) + return loadImm(NULL, 0u); + if (isSubGroupMask(info->sv[idx].sn) && swz > 0) + return loadImm(NULL, 0u); + if (info->sv[idx].sn == TGSI_SEMANTIC_SUBGROUP_SIZE) + return loadImm(NULL, 32u); ld = mkOp1(OP_RDSV, TYPE_U32, getSSA(), srcToSym(src, c)); ld->perPatch = info->sv[idx].patch; return ld->getDef(0); @@ -1769,7 +2078,8 @@ Converter::acquireDst(int d, int c) int idx = dst.getIndex(0); int idx2d = dst.is2D() ? dst.getIndex(1) : 0; - if (dst.isMasked(c) || f == TGSI_FILE_BUFFER) + if (dst.isMasked(c) || f == TGSI_FILE_BUFFER || f == TGSI_FILE_MEMORY || + f == TGSI_FILE_IMAGE) return NULL; if (dst.isIndirect(0) || @@ -1828,6 +2138,7 @@ Converter::storeDst(const tgsi::Instruction::DstRegister dst, int c, /* Save the viewport index into a scratch register so that it can be exported at EMIT time */ if (info->out[idx].sn == TGSI_SEMANTIC_VIEWPORT_INDEX && + prog->getType() == Program::TYPE_GEOMETRY && viewport != NULL) mkOp1(OP_MOV, TYPE_U32, viewport, val); else @@ -1836,7 +2147,6 @@ Converter::storeDst(const tgsi::Instruction::DstRegister dst, int c, } } else if (f == TGSI_FILE_TEMPORARY || - f == TGSI_FILE_PREDICATE || f == TGSI_FILE_ADDRESS || f == TGSI_FILE_OUTPUT) { if (f == TGSI_FILE_TEMPORARY) { @@ -1864,12 +2174,14 @@ Converter::buildDot(int dim) Value *src0 = fetchSrc(0, 0), *src1 = fetchSrc(1, 0); Value *dotp = getScratch(); - mkOp2(OP_MUL, TYPE_F32, dotp, src0, src1); + mkOp2(OP_MUL, TYPE_F32, dotp, src0, src1) + ->dnz = info->io.mul_zero_wins; for (int c = 1; c < dim; ++c) { src0 = fetchSrc(0, c); src1 = fetchSrc(1, c); - mkOp3(OP_MAD, TYPE_F32, dotp, src0, src1, dotp); + mkOp3(OP_MAD, TYPE_F32, dotp, src0, src1, dotp) + ->dnz = info->io.mul_zero_wins; } return dotp; } @@ -1891,6 +2203,16 @@ Converter::setTexRS(TexInstruction *tex, unsigned int& s, int R, int S) { unsigned rIdx = 0, sIdx = 0; + if (R >= 0 && tgsi.getSrc(R).getFile() != TGSI_FILE_SAMPLER) { + // This is the bindless case. We have to get the actual value and pass + // it in. This will be the complete handle. + tex->tex.rIndirectSrc = s; + tex->setSrc(s++, fetchSrc(R, 0)); + tex->setTexture(tgsi.getTexture(code, R), 0xff, 0x1f); + tex->tex.bindless = true; + return; + } + if (R >= 0) rIdx = tgsi.getSrc(R).getIndex(0); if (S >= 0) @@ -1975,7 +2297,6 @@ Converter::loadProjTexCoords(Value *dst[4], Value *src[4], unsigned int mask) void Converter::handleTEX(Value *dst[4], int R, int S, int L, int C, int Dx, int Dy) { - Value *val; Value *arg[4], *src[8]; Value *lod = NULL, *shd = NULL; unsigned int s, c, d; @@ -1986,15 +2307,23 @@ Converter::handleTEX(Value *dst[4], int R, int S, int L, int C, int Dx, int Dy) for (s = 0; s < tgt.getArgCount(); ++s) arg[s] = src[s] = fetchSrc(0, s); - if (texi->op == OP_TXL || texi->op == OP_TXB) + if (tgsi.getOpcode() == TGSI_OPCODE_TEX_LZ) + lod = loadImm(NULL, 0); + else if (texi->op == OP_TXL || texi->op == OP_TXB) lod = fetchSrc(L >> 4, L & 3); if (C == 0x0f) C = 0x00 | MAX2(tgt.getArgCount(), 2); // guess DC src - if (tgsi.getOpcode() == TGSI_OPCODE_TG4 && - tgt == TEX_TARGET_CUBE_ARRAY_SHADOW) - shd = fetchSrc(1, 0); + if (tgt == TEX_TARGET_CUBE_ARRAY_SHADOW) { + switch (tgsi.getOpcode()) { + case TGSI_OPCODE_TG4: shd = fetchSrc(1, 0); break; + case TGSI_OPCODE_TEX2: shd = fetchSrc(1, 0); break; + case TGSI_OPCODE_TXB2: shd = fetchSrc(1, 1); break; + case TGSI_OPCODE_TXL2: shd = fetchSrc(1, 1); break; + default: assert(!"unexpected opcode with cube array shadow"); break; + } + } else if (tgt.isShadow()) shd = fetchSrc(C >> 4, C & 3); @@ -2018,17 +2347,6 @@ Converter::handleTEX(Value *dst[4], int R, int S, int L, int C, int Dx, int Dy) shd = src[n - 1]; } - if (tgt.isCube()) { - for (c = 0; c < 3; ++c) - src[c] = mkOp1v(OP_ABS, TYPE_F32, getSSA(), arg[c]); - val = getScratch(); - mkOp2(OP_MAX, TYPE_F32, val, src[0], src[1]); - mkOp2(OP_MAX, TYPE_F32, val, src[2], val); - mkOp1(OP_RCP, TYPE_F32, val, val); - for (c = 0; c < 3; ++c) - src[c] = mkOp2v(OP_MUL, TYPE_F32, getSSA(), arg[c], val); - } - for (c = 0, d = 0; c < 4; ++c) { if (dst[c]) { texi->setDef(d++, dst[c]); @@ -2048,6 +2366,11 @@ Converter::handleTEX(Value *dst[4], int R, int S, int L, int C, int Dx, int Dy) if (tgsi.getOpcode() == TGSI_OPCODE_SAMPLE_C_LZ) texi->tex.levelZero = true; + if (prog->getType() != Program::TYPE_FRAGMENT && + (tgsi.getOpcode() == TGSI_OPCODE_TEX || + tgsi.getOpcode() == TGSI_OPCODE_TEX2 || + tgsi.getOpcode() == TGSI_OPCODE_TXP)) + texi->tex.levelZero = true; if (tgsi.getOpcode() == TGSI_OPCODE_TG4 && !tgt.isShadow()) texi->tex.gatherComp = tgsi.getSrc(1).getValueU32(0, info); @@ -2084,7 +2407,10 @@ Converter::handleTXF(Value *dst[4], int R, int L_M) } for (c = 0; c < (texi->tex.target.getArgCount() - ms); ++c) texi->setSrc(c, fetchSrc(0, c)); - texi->setSrc(c++, fetchSrc(L_M >> 4, L_M & 3)); // lod or ms + if (!ms && tgsi.getOpcode() == TGSI_OPCODE_TXF_LZ) + texi->setSrc(c++, loadImm(NULL, 0)); + else + texi->setSrc(c++, fetchSrc(L_M >> 4, L_M & 3)); // lod or ms setTexRS(texi, c, R, -1); @@ -2099,6 +2425,40 @@ Converter::handleTXF(Value *dst[4], int R, int L_M) bb->insertTail(texi); } +void +Converter::handleFBFETCH(Value *dst[4]) +{ + TexInstruction *texi = new_TexInstruction(func, OP_TXF); + unsigned int c, d; + + texi->tex.target = TEX_TARGET_2D_MS_ARRAY; + texi->tex.levelZero = 1; + texi->tex.useOffsets = 0; + + for (c = 0, d = 0; c < 4; ++c) { + if (dst[c]) { + texi->setDef(d++, dst[c]); + texi->tex.mask |= 1 << c; + } + } + + Value *x = mkOp1v(OP_RDSV, TYPE_F32, getScratch(), mkSysVal(SV_POSITION, 0)); + Value *y = mkOp1v(OP_RDSV, TYPE_F32, getScratch(), mkSysVal(SV_POSITION, 1)); + Value *z = mkOp1v(OP_RDSV, TYPE_U32, getScratch(), mkSysVal(SV_LAYER, 0)); + Value *ms = mkOp1v(OP_RDSV, TYPE_U32, getScratch(), mkSysVal(SV_SAMPLE_INDEX, 0)); + + mkCvt(OP_CVT, TYPE_U32, x, TYPE_F32, x)->rnd = ROUND_Z; + mkCvt(OP_CVT, TYPE_U32, y, TYPE_F32, y)->rnd = ROUND_Z; + texi->setSrc(0, x); + texi->setSrc(1, y); + texi->setSrc(2, z); + texi->setSrc(3, ms); + + texi->tex.r = texi->tex.s = -1; + + bb->insertTail(texi); +} + void Converter::handleLIT(Value *dst0[4]) { @@ -2134,6 +2494,7 @@ Converter::handleLIT(Value *dst0[4]) } } +/* Keep this around for now as reference when adding img support static inline bool isResourceSpecial(const int r) { @@ -2164,7 +2525,8 @@ Converter::getResourceBase(const int r) switch (r) { case TGSI_RESOURCE_GLOBAL: - sym = new_Symbol(prog, nv50_ir::FILE_MEMORY_GLOBAL, 15); + sym = new_Symbol(prog, nv50_ir::FILE_MEMORY_GLOBAL, + info->io.auxCBSlot); break; case TGSI_RESOURCE_LOCAL: assert(prog->getType() == Program::TYPE_COMPUTE); @@ -2229,6 +2591,20 @@ partitionLoadStore(uint8_t comp[2], uint8_t size[2], uint8_t mask) } return n + 1; } +*/ +void +Converter::getImageCoords(std::vector &coords, int s) +{ + TexInstruction::Target t = + TexInstruction::Target(tgsi.getImageTarget()); + const int arg = t.getDim() + (t.isArray() || t.isCube()); + + for (int c = 0; c < arg; ++c) + coords.push_back(fetchSrc(s, c)); + + if (t.isMS()) + coords.push_back(fetchSrc(s, 3)); +} // For raw loads, granularity is 4 byte. // Usage of the texture read mask on OP_SULDP is not allowed. @@ -2238,29 +2614,79 @@ Converter::handleLOAD(Value *dst0[4]) const int r = tgsi.getSrc(0).getIndex(0); int c; std::vector off, src, ldv, def; + Value *ind = NULL; + + if (tgsi.getSrc(0).isIndirect(0)) + ind = fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0); - if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER) { + switch (tgsi.getSrc(0).getFile()) { + case TGSI_FILE_BUFFER: + case TGSI_FILE_MEMORY: for (c = 0; c < 4; ++c) { if (!dst0[c]) continue; - Value *off = fetchSrc(1, c); + Value *off; Symbol *sym; + uint32_t src0_component_offset = tgsi.getSrc(0).getSwizzle(c) * 4; + if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE) { off = NULL; - sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, tgsi.getSrc(1).getValueU32(0, info) + 4 * c); + sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c, + tgsi.getSrc(1).getValueU32(0, info) + + src0_component_offset); } else { - sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, 4 * c); + // yzw are ignored for buffers + off = fetchSrc(1, 0); + sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c, + src0_component_offset); } Instruction *ld = mkLoad(TYPE_U32, dst0[c], sym, off); - ld->cache = tgsi.getCacheMode(); - if (tgsi.getSrc(0).isIndirect(0)) - ld->setIndirect(0, 1, fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0)); + if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER && + code->bufferAtomics[r]) + ld->cache = nv50_ir::CACHE_CG; + else + ld->cache = tgsi.getCacheMode(); + if (ind) + ld->setIndirect(0, 1, ind); } - return; + break; + default: { + getImageCoords(off, 1); + def.resize(4); + + for (c = 0; c < 4; ++c) { + if (!dst0[c] || tgsi.getSrc(0).getSwizzle(c) != (TGSI_SWIZZLE_X + c)) + def[c] = getScratch(); + else + def[c] = dst0[c]; + } + + bool bindless = tgsi.getSrc(0).getFile() != TGSI_FILE_IMAGE; + if (bindless) + ind = fetchSrc(0, 0); + + TexInstruction *ld = + mkTex(OP_SULDP, tgsi.getImageTarget(), 0, 0, def, off); + ld->tex.mask = tgsi.getDst(0).getMask(); + ld->tex.format = tgsi.getImageFormat(); + ld->cache = tgsi.getCacheMode(); + ld->tex.bindless = bindless; + if (!bindless) + ld->tex.r = r; + if (ind) + ld->setIndirectR(ind); + + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) + if (dst0[c] != def[c]) + mkMov(dst0[c], def[tgsi.getSrc(0).getSwizzle(c)]); + break; + } } + +/* Keep this around for now as reference when adding img support getResourceCoords(off, r, 1); if (isResourceRaw(code, r)) { @@ -2326,6 +2752,7 @@ Converter::handleLOAD(Value *dst0[4]) FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) if (dst0[c] != def[c]) mkMov(dst0[c], def[tgsi.getSrc(0).getSwizzle(c)]); +*/ } // For formatted stores, the write mask on OP_SUSTP can be used. @@ -2336,8 +2763,14 @@ Converter::handleSTORE() const int r = tgsi.getDst(0).getIndex(0); int c; std::vector off, src, dummy; + Value *ind = NULL; - if (tgsi.getDst(0).getFile() == TGSI_FILE_BUFFER) { + if (tgsi.getDst(0).isIndirect(0)) + ind = fetchSrc(tgsi.getDst(0).getIndirect(0), 0, 0); + + switch (tgsi.getDst(0).getFile()) { + case TGSI_FILE_BUFFER: + case TGSI_FILE_MEMORY: for (c = 0; c < 4; ++c) { if (!(tgsi.getDst(0).getMask() & (1 << c))) continue; @@ -2346,21 +2779,47 @@ Converter::handleSTORE() Value *off; if (tgsi.getSrc(0).getFile() == TGSI_FILE_IMMEDIATE) { off = NULL; - sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, + sym = makeSym(tgsi.getDst(0).getFile(), r, -1, c, tgsi.getSrc(0).getValueU32(0, info) + 4 * c); } else { + // yzw are ignored for buffers off = fetchSrc(0, 0); - sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, 4 * c); + sym = makeSym(tgsi.getDst(0).getFile(), r, -1, c, 4 * c); } Instruction *st = mkStore(OP_STORE, TYPE_U32, sym, off, fetchSrc(1, c)); st->cache = tgsi.getCacheMode(); - if (tgsi.getDst(0).isIndirect(0)) - st->setIndirect(0, 1, fetchSrc(tgsi.getDst(0).getIndirect(0), 0, 0)); + if (ind) + st->setIndirect(0, 1, ind); } - return; + break; + default: { + getImageCoords(off, 0); + src = off; + + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) + src.push_back(fetchSrc(1, c)); + + bool bindless = tgsi.getDst(0).getFile() != TGSI_FILE_IMAGE; + if (bindless) + ind = fetchDst(0, 0); + + TexInstruction *st = + mkTex(OP_SUSTP, tgsi.getImageTarget(), 0, 0, dummy, src); + st->tex.mask = tgsi.getDst(0).getMask(); + st->tex.format = tgsi.getImageFormat(); + st->cache = tgsi.getCacheMode(); + st->tex.bindless = bindless; + if (!bindless) + st->tex.r = r; + if (ind) + st->setIndirectR(ind); + + break; + } } +/* Keep this around for now as reference when adding img support getResourceCoords(off, r, 0); src = off; const int s = src.size(); @@ -2408,6 +2867,7 @@ Converter::handleSTORE() mkTex(OP_SUSTP, getResourceTarget(code, r), code->resources[r].slot, 0, dummy, src)->tex.mask = tgsi.getDst(0).getMask(); } +*/ } // XXX: These only work on resources with the single-component u32/s32 formats. @@ -2421,8 +2881,14 @@ Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp) std::vector srcv; std::vector defv; LValue *dst = getScratch(); + Value *ind = NULL; + + if (tgsi.getSrc(0).isIndirect(0)) + ind = fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0); - if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER) { + switch (tgsi.getSrc(0).getFile()) { + case TGSI_FILE_BUFFER: + case TGSI_FILE_MEMORY: for (int c = 0; c < 4; ++c) { if (!dst0[c]) continue; @@ -2431,25 +2897,56 @@ Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp) Value *off = fetchSrc(1, c); Value *sym; if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE) - sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, tgsi.getSrc(1).getValueU32(c, info)); + sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c, + tgsi.getSrc(1).getValueU32(c, info)); else - sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, 0); - insn = mkOp2(OP_ATOM, ty, dst, sym, fetchSrc(2, c)); + sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c, 0); + if (subOp == NV50_IR_SUBOP_ATOM_CAS) + insn = mkOp3(OP_ATOM, ty, dst, sym, fetchSrc(2, c), fetchSrc(3, c)); + else + insn = mkOp2(OP_ATOM, ty, dst, sym, fetchSrc(2, c)); if (tgsi.getSrc(1).getFile() != TGSI_FILE_IMMEDIATE) insn->setIndirect(0, 0, off); - if (tgsi.getSrc(0).isIndirect(0)) - insn->setIndirect(0, 1, fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0)); + if (ind) + insn->setIndirect(0, 1, ind); insn->subOp = subOp; - if (subOp == NV50_IR_SUBOP_ATOM_CAS) - insn->setSrc(2, fetchSrc(3, 0)); } for (int c = 0; c < 4; ++c) if (dst0[c]) dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov - return; - } + break; + default: { + getImageCoords(srcv, 1); + defv.push_back(dst); + srcv.push_back(fetchSrc(2, 0)); + + if (subOp == NV50_IR_SUBOP_ATOM_CAS) + srcv.push_back(fetchSrc(3, 0)); + + bool bindless = tgsi.getSrc(0).getFile() != TGSI_FILE_IMAGE; + if (bindless) + ind = fetchSrc(0, 0); + + TexInstruction *tex = mkTex(OP_SUREDP, tgsi.getImageTarget(), + 0, 0, defv, srcv); + tex->subOp = subOp; + tex->tex.mask = 1; + tex->tex.format = tgsi.getImageFormat(); + tex->setType(ty); + tex->tex.bindless = bindless; + if (!bindless) + tex->tex.r = r; + if (ind) + tex->setIndirectR(ind); + for (int c = 0; c < 4; ++c) + if (dst0[c]) + dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov + break; + } + } +/* Keep this around for now as reference when adding img support getResourceCoords(srcv, r, 1); if (isResourceSpecial(r)) { @@ -2477,6 +2974,7 @@ Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp) for (int c = 0; c < 4; ++c) if (dst0[c]) dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov +*/ } void @@ -2485,24 +2983,61 @@ Converter::handleINTERP(Value *dst[4]) // Check whether the input is linear. All other attributes ignored. Instruction *insn; Value *offset = NULL, *ptr = NULL, *w = NULL; + Symbol *sym[4] = { NULL }; bool linear; - operation op; - int c, mode; + operation op = OP_NOP; + int c, mode = 0; tgsi::Instruction::SrcRegister src = tgsi.getSrc(0); - assert(src.getFile() == TGSI_FILE_INPUT); - - if (src.isIndirect(0)) - ptr = fetchSrc(src.getIndirect(0), 0, NULL); - // XXX: no way to know interp mode if we don't know the index - linear = info->in[ptr ? 0 : src.getIndex(0)].linear; - if (linear) { - op = OP_LINTERP; - mode = NV50_IR_INTERP_LINEAR; + // In some odd cases, in large part due to varying packing, the source + // might not actually be an input. This is illegal TGSI, but it's easier to + // account for it here than it is to fix it where the TGSI is being + // generated. In that case, it's going to be a straight up mov (or sequence + // of mov's) from the input in question. We follow the mov chain to see + // which input we need to use. + if (src.getFile() != TGSI_FILE_INPUT) { + if (src.isIndirect(0)) { + ERROR("Ignoring indirect input interpolation\n"); + return; + } + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { + Value *val = fetchSrc(0, c); + assert(val->defs.size() == 1); + insn = val->getInsn(); + while (insn->op == OP_MOV) { + assert(insn->getSrc(0)->defs.size() == 1); + insn = insn->getSrc(0)->getInsn(); + if (!insn) { + ERROR("Miscompiling shader due to unhandled INTERP\n"); + return; + } + } + if (insn->op != OP_LINTERP && insn->op != OP_PINTERP) { + ERROR("Trying to interpolate non-input, this is not allowed.\n"); + return; + } + sym[c] = insn->getSrc(0)->asSym(); + assert(sym[c]); + op = insn->op; + mode = insn->ipa; + ptr = insn->getIndirect(0, 0); + } } else { - op = OP_PINTERP; - mode = NV50_IR_INTERP_PERSPECTIVE; + if (src.isIndirect(0)) + ptr = shiftAddress(fetchSrc(src.getIndirect(0), 0, NULL)); + + // We can assume that the fixed index will point to an input of the same + // interpolation type in case of an indirect. + // TODO: Make use of ArrayID. + linear = info->in[src.getIndex(0)].linear; + if (linear) { + op = OP_LINTERP; + mode = NV50_IR_INTERP_LINEAR; + } else { + op = OP_PINTERP; + mode = NV50_IR_INTERP_PERSPECTIVE; + } } switch (tgsi.getOpcode()) { @@ -2521,8 +3056,8 @@ Converter::handleINTERP(Value *dst[4]) // and then convert to s32. Value *offs[2]; for (c = 0; c < 2; c++) { - offs[c] = fetchSrc(1, c); - mkOp2(OP_MIN, TYPE_F32, offs[c], offs[c], loadImm(NULL, 0.4375f)); + offs[c] = getScratch(); + mkOp2(OP_MIN, TYPE_F32, offs[c], fetchSrc(1, c), loadImm(NULL, 0.4375f)); mkOp2(OP_MAX, TYPE_F32, offs[c], offs[c], loadImm(NULL, -0.5f)); mkOp2(OP_MUL, TYPE_F32, offs[c], offs[c], loadImm(NULL, 4096.0f)); mkCvt(OP_CVT, TYPE_S32, offs[c], TYPE_F32, offs[c]); @@ -2545,42 +3080,18 @@ Converter::handleINTERP(Value *dst[4]) FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { - insn = mkOp1(op, TYPE_F32, dst[c], srcToSym(src, c)); + insn = mkOp1(op, TYPE_F32, dst[c], sym[c] ? sym[c] : srcToSym(src, c)); if (op == OP_PINTERP) insn->setSrc(1, w); - if (ptr) - insn->setIndirect(0, 0, ptr); if (offset) insn->setSrc(op == OP_PINTERP ? 2 : 1, offset); + if (ptr) + insn->setIndirect(0, 0, ptr); insn->setInterpolate(mode); } } -Converter::Subroutine * -Converter::getSubroutine(unsigned ip) -{ - std::map::iterator it = sub.map.find(ip); - - if (it == sub.map.end()) - it = sub.map.insert(std::make_pair( - ip, Subroutine(new Function(prog, "SUB", ip)))).first; - - return &it->second; -} - -Converter::Subroutine * -Converter::getSubroutine(Function *f) -{ - unsigned ip = f->getLabel(); - std::map::iterator it = sub.map.find(ip); - - if (it == sub.map.end()) - it = sub.map.insert(std::make_pair(ip, Subroutine(f))).first; - - return &it->second; -} - bool Converter::isEndOfSubroutine(uint ip) { @@ -2612,7 +3123,7 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) unsigned int mask = tgsi.dstCount() ? tgsi.getDst(0).getMask() : 0; - if (tgsi.dstCount()) { + if (tgsi.dstCount() && tgsi.getOpcode() != TGSI_OPCODE_STORE) { for (c = 0; c < 4; ++c) { rDst0[c] = acquireDst(0, c); dst0[c] = (useScratchDst && rDst0[c]) ? getScratch() : rDst0[c]; @@ -2642,31 +3153,36 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) case TGSI_OPCODE_SHL: case TGSI_OPCODE_ISHR: case TGSI_OPCODE_USHR: - case TGSI_OPCODE_SUB: case TGSI_OPCODE_XOR: FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { src0 = fetchSrc(0, c); src1 = fetchSrc(1, c); geni = mkOp2(op, dstTy, dst0[c], src0, src1); geni->subOp = tgsi::opcodeToSubOp(tgsi.getOpcode()); + if (op == OP_MUL && dstTy == TYPE_F32) + geni->dnz = info->io.mul_zero_wins; + geni->precise = insn->Instruction.Precise; } break; case TGSI_OPCODE_MAD: case TGSI_OPCODE_UMAD: - case TGSI_OPCODE_SAD: + case TGSI_OPCODE_FMA: FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { src0 = fetchSrc(0, c); src1 = fetchSrc(1, c); src2 = fetchSrc(2, c); - mkOp3(op, dstTy, dst0[c], src0, src1, src2); + geni = mkOp3(op, dstTy, dst0[c], src0, src1, src2); + if (dstTy == TYPE_F32) + geni->dnz = info->io.mul_zero_wins; + geni->precise = insn->Instruction.Precise; } break; case TGSI_OPCODE_MOV: - case TGSI_OPCODE_ABS: case TGSI_OPCODE_CEIL: case TGSI_OPCODE_FLR: case TGSI_OPCODE_TRUNC: case TGSI_OPCODE_RCP: + case TGSI_OPCODE_SQRT: case TGSI_OPCODE_IABS: case TGSI_OPCODE_INEG: case TGSI_OPCODE_NOT: @@ -2724,19 +3240,6 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) mkOp1(op, TYPE_F32, dst0[3], val0); } break; - case TGSI_OPCODE_SCS: - if (mask & 3) { - val0 = mkOp1v(OP_PRESIN, TYPE_F32, getSSA(), fetchSrc(0, 0)); - if (dst0[0]) - mkOp1(OP_COS, TYPE_F32, dst0[0], val0); - if (dst0[1]) - mkOp1(OP_SIN, TYPE_F32, dst0[1], val0); - } - if (dst0[2]) - loadImm(dst0[2], 0.0f); - if (dst0[3]) - loadImm(dst0[3], 1.0f); - break; case TGSI_OPCODE_EXP: src0 = fetchSrc(0, 0); val0 = mkOp1v(OP_FLOOR, TYPE_F32, getSSA(), src0); @@ -2757,7 +3260,8 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) if (dst0[1]) { mkOp1(OP_EX2, TYPE_F32, dst0[1], val1); mkOp1(OP_RCP, TYPE_F32, dst0[1], dst0[1]); - mkOp2(OP_MUL, TYPE_F32, dst0[1], dst0[1], src0); + mkOp2(OP_MUL, TYPE_F32, dst0[1], dst0[1], src0) + ->dnz = info->io.mul_zero_wins; } if (dst0[3]) loadImm(dst0[3], 1.0f); @@ -2777,20 +3281,14 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) mkMov(dst0[c], val0); break; - case TGSI_OPCODE_DPH: - val0 = buildDot(3); - src1 = fetchSrc(1, 3); - mkOp2(OP_ADD, TYPE_F32, val0, val0, src1); - FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) - mkMov(dst0[c], val0); - break; case TGSI_OPCODE_DST: if (dst0[0]) loadImm(dst0[0], 1.0f); if (dst0[1]) { src0 = fetchSrc(0, 1); src1 = fetchSrc(1, 1); - mkOp2(OP_MUL, TYPE_F32, dst0[1], src0, src1); + mkOp2(OP_MUL, TYPE_F32, dst0[1], src0, src1) + ->dnz = info->io.mul_zero_wins; } if (dst0[2]) mkMov(dst0[2], fetchSrc(0, 2)); @@ -2803,29 +3301,13 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) src1 = fetchSrc(1, c); src2 = fetchSrc(2, c); mkOp3(OP_MAD, TYPE_F32, dst0[c], - mkOp2v(OP_SUB, TYPE_F32, getSSA(), src1, src2), src0, src2); + mkOp2v(OP_SUB, TYPE_F32, getSSA(), src1, src2), src0, src2) + ->dnz = info->io.mul_zero_wins; } break; case TGSI_OPCODE_LIT: handleLIT(dst0); break; - case TGSI_OPCODE_XPD: - FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { - if (c < 3) { - val0 = getSSA(); - src0 = fetchSrc(1, (c + 1) % 3); - src1 = fetchSrc(0, (c + 2) % 3); - mkOp2(OP_MUL, TYPE_F32, val0, src0, src1); - mkOp1(OP_NEG, TYPE_F32, val0, val0); - - src0 = fetchSrc(0, (c + 1) % 3); - src1 = fetchSrc(1, (c + 2) % 3); - mkOp3(OP_MAD, TYPE_F32, dst0[c], src0, src1, val0); - } else { - loadImm(dst0[c], 1.0f); - } - } - break; case TGSI_OPCODE_ISSG: case TGSI_OPCODE_SSG: FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { @@ -2868,16 +3350,6 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) mkCvt(OP_CVT, TYPE_F32, dst0[c], TYPE_F32, fetchSrc(0, c)) ->rnd = ROUND_NI; break; - case TGSI_OPCODE_CLAMP: - FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { - src0 = fetchSrc(0, c); - src1 = fetchSrc(1, c); - src2 = fetchSrc(2, c); - val0 = getScratch(); - mkOp2(OP_MIN, TYPE_F32, val0, src0, src1); - mkOp2(OP_MAX, TYPE_F32, dst0[c], val0, src2); - } - break; case TGSI_OPCODE_SLT: case TGSI_OPCODE_SGE: case TGSI_OPCODE_SEQ: @@ -2900,6 +3372,56 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) mkCmp(op, tgsi.getSetCond(), dstTy, dst0[c], srcTy, src0, src1); } break; + case TGSI_OPCODE_VOTE_ALL: + case TGSI_OPCODE_VOTE_ANY: + case TGSI_OPCODE_VOTE_EQ: + val0 = new_LValue(func, FILE_PREDICATE); + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { + mkCmp(OP_SET, CC_NE, TYPE_U32, val0, TYPE_U32, fetchSrc(0, c), zero); + mkOp1(op, dstTy, val0, val0) + ->subOp = tgsi::opcodeToSubOp(tgsi.getOpcode()); + mkCvt(OP_CVT, TYPE_U32, dst0[c], TYPE_U8, val0); + } + break; + case TGSI_OPCODE_BALLOT: + if (!tgsi.getDst(0).isMasked(0)) { + val0 = new_LValue(func, FILE_PREDICATE); + mkCmp(OP_SET, CC_NE, TYPE_U32, val0, TYPE_U32, fetchSrc(0, 0), zero); + mkOp1(op, TYPE_U32, dst0[0], val0)->subOp = NV50_IR_SUBOP_VOTE_ANY; + } + if (!tgsi.getDst(0).isMasked(1)) + mkMov(dst0[1], zero, TYPE_U32); + break; + case TGSI_OPCODE_READ_FIRST: + // ReadFirstInvocationARB(src) is implemented as + // ReadInvocationARB(src, findLSB(ballot(true))) + val0 = getScratch(); + mkOp1(OP_VOTE, TYPE_U32, val0, mkImm(1))->subOp = NV50_IR_SUBOP_VOTE_ANY; + mkOp2(OP_EXTBF, TYPE_U32, val0, val0, mkImm(0x2000)) + ->subOp = NV50_IR_SUBOP_EXTBF_REV; + mkOp1(OP_BFIND, TYPE_U32, val0, val0)->subOp = NV50_IR_SUBOP_BFIND_SAMT; + src1 = val0; + /* fallthrough */ + case TGSI_OPCODE_READ_INVOC: + if (tgsi.getOpcode() == TGSI_OPCODE_READ_INVOC) + src1 = fetchSrc(1, 0); + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { + geni = mkOp3(op, dstTy, dst0[c], fetchSrc(0, c), src1, mkImm(0x1f)); + geni->subOp = NV50_IR_SUBOP_SHFL_IDX; + } + break; + case TGSI_OPCODE_CLOCK: + // Stick the 32-bit clock into the high dword of the logical result. + if (!tgsi.getDst(0).isMasked(0)) + mkOp1(OP_MOV, TYPE_U32, dst0[0], zero); + if (!tgsi.getDst(0).isMasked(1)) + mkOp1(OP_RDSV, TYPE_U32, dst0[1], mkSysVal(SV_CLOCK, 0))->fixed = 1; + break; + case TGSI_OPCODE_READ_HELPER: + if (!tgsi.getDst(0).isMasked(0)) + mkOp1(OP_RDSV, TYPE_U32, dst0[0], mkSysVal(SV_THREAD_KILL, 0)) + ->fixed = 1; + break; case TGSI_OPCODE_KILL_IF: val0 = new_LValue(func, FILE_PREDICATE); mask = 0; @@ -2913,9 +3435,13 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) } break; case TGSI_OPCODE_KILL: + case TGSI_OPCODE_DEMOTE: + // TODO: Should we make KILL exit that invocation? Some old shaders + // don't like that. mkOp(OP_DISCARD, TYPE_NONE, NULL); break; case TGSI_OPCODE_TEX: + case TGSI_OPCODE_TEX_LZ: case TGSI_OPCODE_TXB: case TGSI_OPCODE_TXL: case TGSI_OPCODE_TXP: @@ -2944,6 +3470,7 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) case TGSI_OPCODE_SAMPLE_C_LZ: handleTEX(dst0, 1, 2, 0x30, 0x30, 0x30, 0x40); break; + case TGSI_OPCODE_TXF_LZ: case TGSI_OPCODE_TXF: handleTXF(dst0, 1, 0x03); break; @@ -2965,6 +3492,9 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) handleTXQ(dst0, TXQ_TYPE, 0); std::swap(dst0[0], dst0[2]); break; + case TGSI_OPCODE_FBFETCH: + handleFBFETCH(dst0); + break; case TGSI_OPCODE_F2I: case TGSI_OPCODE_F2U: FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) @@ -2997,6 +3527,9 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) info->out[info->io.viewportId].slot[0] * 4); mkStore(OP_EXPORT, TYPE_U32, vpSym, NULL, viewport); } + /* handle user clip planes for each emitted vertex */ + if (info->io.genUserClip > 0) + handleUserClipPlanes(); /* fallthrough */ case TGSI_OPCODE_ENDPRIM: { @@ -3004,6 +3537,8 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) unsigned int stream = tgsi.getSrc(0).getValueU32(0, info); if (stream && op == OP_RESTART) break; + if (info->prop.gp.maxVertices == 0) + break; src0 = mkImm(stream); mkOp1(op, TYPE_U32, NULL, src0)->fixed = 1; break; @@ -3149,10 +3684,13 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) if (!isEndOfSubroutine(ip + 1)) { // insert a PRERET at the entry if this is an early return // (only needed for sharing code in the epilogue) - BasicBlock *pos = getBB(); - setPosition(BasicBlock::get(func->cfg.getRoot()), false); - mkFlow(OP_PRERET, leave, CC_ALWAYS, NULL)->fixed = 1; - setPosition(pos, true); + BasicBlock *root = BasicBlock::get(func->cfg.getRoot()); + if (root->getEntry() == NULL || root->getEntry()->op != OP_PRERET) { + BasicBlock *pos = getBB(); + setPosition(root, false); + mkFlow(OP_PRERET, leave, CC_ALWAYS, NULL)->fixed = 1; + setPosition(pos, true); + } } mkFlow(OP_RET, NULL, CC_ALWAYS, NULL)->fixed = 1; bb->cfg.attach(&leave->cfg, Graph::Edge::CROSS); @@ -3166,7 +3704,9 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) setPosition(epilogue, true); if (prog->getType() == Program::TYPE_FRAGMENT) exportOutputs(); - if (info->io.genUserClip > 0) + if ((prog->getType() == Program::TYPE_VERTEX || + prog->getType() == Program::TYPE_TESSELLATION_EVAL + ) && info->io.genUserClip > 0) handleUserClipPlanes(); mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1; } @@ -3187,12 +3727,16 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) geni->fixed = 1; geni->subOp = NV50_IR_SUBOP_BAR_SYNC; break; - case TGSI_OPCODE_MFENCE: - case TGSI_OPCODE_LFENCE: - case TGSI_OPCODE_SFENCE: + case TGSI_OPCODE_MEMBAR: + { + uint32_t level = tgsi.getSrc(0).getValueU32(0, info); geni = mkOp(OP_MEMBAR, TYPE_NONE, NULL); geni->fixed = 1; - geni->subOp = tgsi::opcodeToSubOp(tgsi.getOpcode()); + if (!(level & ~(TGSI_MEMBAR_THREAD_GROUP | TGSI_MEMBAR_SHARED))) + geni->subOp = NV50_IR_SUBOP_MEMBAR(M, CTA); + else + geni->subOp = NV50_IR_SUBOP_MEMBAR(M, GL); + } break; case TGSI_OPCODE_ATOMUADD: case TGSI_OPCODE_ATOMXCHG: @@ -3204,28 +3748,57 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) case TGSI_OPCODE_ATOMIMIN: case TGSI_OPCODE_ATOMUMAX: case TGSI_OPCODE_ATOMIMAX: + case TGSI_OPCODE_ATOMFADD: + case TGSI_OPCODE_ATOMDEC_WRAP: + case TGSI_OPCODE_ATOMINC_WRAP: handleATOM(dst0, dstTy, tgsi::opcodeToSubOp(tgsi.getOpcode())); break; case TGSI_OPCODE_RESQ: - geni = mkOp1(OP_SUQ, TYPE_U32, dst0[0], - makeSym(TGSI_FILE_BUFFER, tgsi.getSrc(0).getIndex(0), -1, 0, 0)); - if (tgsi.getSrc(0).isIndirect(0)) - geni->setIndirect(0, 1, fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0)); + if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER) { + Value *ind = NULL; + if (tgsi.getSrc(0).isIndirect(0)) + ind = fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0); + geni = mkOp1(OP_BUFQ, TYPE_U32, dst0[0], + makeSym(tgsi.getSrc(0).getFile(), + tgsi.getSrc(0).getIndex(0), -1, 0, 0)); + if (ind) + geni->setIndirect(0, 1, ind); + } else { + TexInstruction *texi = new_TexInstruction(func, OP_SUQ); + for (int c = 0, d = 0; c < 4; ++c) { + if (dst0[c]) { + texi->setDef(d++, dst0[c]); + texi->tex.mask |= 1 << c; + } + } + if (tgsi.getSrc(0).getFile() == TGSI_FILE_IMAGE) { + texi->tex.r = tgsi.getSrc(0).getIndex(0); + if (tgsi.getSrc(0).isIndirect(0)) + texi->setIndirectR(fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, NULL)); + } else { + texi->tex.bindless = true; + texi->setIndirectR(fetchSrc(0, 0)); + } + texi->tex.target = tgsi.getImageTarget(); + + bb->insertTail(texi); + } break; case TGSI_OPCODE_IBFE: case TGSI_OPCODE_UBFE: FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { src0 = fetchSrc(0, c); + val0 = getScratch(); if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE && tgsi.getSrc(2).getFile() == TGSI_FILE_IMMEDIATE) { - src1 = loadImm(NULL, tgsi.getSrc(2).getValueU32(c, info) << 8 | - tgsi.getSrc(1).getValueU32(c, info)); + loadImm(val0, (tgsi.getSrc(2).getValueU32(c, info) << 8) | + tgsi.getSrc(1).getValueU32(c, info)); } else { src1 = fetchSrc(1, c); src2 = fetchSrc(2, c); - mkOp3(OP_INSBF, TYPE_U32, src1, src2, mkImm(0x808), src1); + mkOp3(OP_INSBF, TYPE_U32, val0, src2, mkImm(0x808), src1); } - mkOp2(OP_EXTBF, dstTy, dst0[c], src0, src1); + mkOp2(OP_EXTBF, dstTy, dst0[c], src0, val0); } break; case TGSI_OPCODE_BFI: @@ -3234,16 +3807,18 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) src1 = fetchSrc(1, c); src2 = fetchSrc(2, c); src3 = fetchSrc(3, c); - mkOp3(OP_INSBF, TYPE_U32, src2, src3, mkImm(0x808), src2); - mkOp3(OP_INSBF, TYPE_U32, dst0[c], src1, src2, src0); + val0 = getScratch(); + mkOp3(OP_INSBF, TYPE_U32, val0, src3, mkImm(0x808), src2); + mkOp3(OP_INSBF, TYPE_U32, dst0[c], src1, val0, src0); } break; case TGSI_OPCODE_LSB: FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { src0 = fetchSrc(0, c); - geni = mkOp2(OP_EXTBF, TYPE_U32, src0, src0, mkImm(0x2000)); + val0 = getScratch(); + geni = mkOp2(OP_EXTBF, TYPE_U32, val0, src0, mkImm(0x2000)); geni->subOp = NV50_IR_SUBOP_EXTBF_REV; - geni = mkOp1(OP_BFIND, TYPE_U32, dst0[c], src0); + geni = mkOp1(OP_BFIND, TYPE_U32, dst0[c], val0); geni->subOp = NV50_IR_SUBOP_BFIND_SAMT; } break; @@ -3272,6 +3847,8 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) case TGSI_OPCODE_INTERP_OFFSET: handleINTERP(dst0); break; + case TGSI_OPCODE_I642F: + case TGSI_OPCODE_U642F: case TGSI_OPCODE_D2I: case TGSI_OPCODE_D2U: case TGSI_OPCODE_D2F: { @@ -3281,21 +3858,86 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) src0 = fetchSrc(0, pos); src1 = fetchSrc(0, pos + 1); mkOp2(OP_MERGE, TYPE_U64, dreg, src0, src1); - mkCvt(OP_CVT, dstTy, dst0[c], srcTy, dreg); + Instruction *cvt = mkCvt(OP_CVT, dstTy, dst0[c], srcTy, dreg); + if (!isFloatType(dstTy)) + cvt->rnd = ROUND_Z; pos += 2; } break; } + case TGSI_OPCODE_I2I64: + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { + dst0[c] = fetchSrc(0, c / 2); + mkOp2(OP_SHR, TYPE_S32, dst0[c + 1], dst0[c], loadImm(NULL, 31)); + c++; + } + break; + case TGSI_OPCODE_U2I64: + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { + dst0[c] = fetchSrc(0, c / 2); + dst0[c + 1] = zero; + c++; + } + break; + case TGSI_OPCODE_F2I64: + case TGSI_OPCODE_F2U64: case TGSI_OPCODE_I2D: case TGSI_OPCODE_U2D: case TGSI_OPCODE_F2D: FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { Value *dreg = getSSA(8); - mkCvt(OP_CVT, dstTy, dreg, srcTy, fetchSrc(0, c / 2)); + Instruction *cvt = mkCvt(OP_CVT, dstTy, dreg, srcTy, fetchSrc(0, c / 2)); + if (!isFloatType(dstTy)) + cvt->rnd = ROUND_Z; mkSplit(&dst0[c], 4, dreg); c++; } break; + case TGSI_OPCODE_D2I64: + case TGSI_OPCODE_D2U64: + case TGSI_OPCODE_I642D: + case TGSI_OPCODE_U642D: + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { + src0 = getSSA(8); + Value *dst = getSSA(8), *tmp[2]; + tmp[0] = fetchSrc(0, c); + tmp[1] = fetchSrc(0, c + 1); + mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]); + Instruction *cvt = mkCvt(OP_CVT, dstTy, dst, srcTy, src0); + if (!isFloatType(dstTy)) + cvt->rnd = ROUND_Z; + mkSplit(&dst0[c], 4, dst); + c++; + } + break; + case TGSI_OPCODE_I64NEG: + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { + src0 = getSSA(8); + Value *dst = getSSA(8), *tmp[2]; + tmp[0] = fetchSrc(0, c); + tmp[1] = fetchSrc(0, c + 1); + mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]); + mkOp2(OP_SUB, dstTy, dst, zero, src0); + mkSplit(&dst0[c], 4, dst); + c++; + } + break; + case TGSI_OPCODE_I64ABS: + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { + src0 = getSSA(8); + Value *neg = getSSA(8), *srcComp[2], *negComp[2]; + srcComp[0] = fetchSrc(0, c); + srcComp[1] = fetchSrc(0, c + 1); + mkOp2(OP_MERGE, TYPE_U64, src0, srcComp[0], srcComp[1]); + mkOp2(OP_SUB, dstTy, neg, zero, src0); + mkSplit(negComp, 4, neg); + mkCmp(OP_SLCT, CC_LT, TYPE_S32, dst0[c], TYPE_S32, + negComp[0], srcComp[0], srcComp[1]); + mkCmp(OP_SLCT, CC_LT, TYPE_S32, dst0[c + 1], TYPE_S32, + negComp[1], srcComp[1], srcComp[1]); + c++; + } + break; case TGSI_OPCODE_DABS: case TGSI_OPCODE_DNEG: case TGSI_OPCODE_DRCP: @@ -3328,6 +3970,12 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) c++; } break; + case TGSI_OPCODE_U64SEQ: + case TGSI_OPCODE_U64SNE: + case TGSI_OPCODE_U64SLT: + case TGSI_OPCODE_U64SGE: + case TGSI_OPCODE_I64SLT: + case TGSI_OPCODE_I64SGE: case TGSI_OPCODE_DSLT: case TGSI_OPCODE_DSGE: case TGSI_OPCODE_DSEQ: @@ -3349,8 +3997,51 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) } break; } + case TGSI_OPCODE_U64MIN: + case TGSI_OPCODE_U64MAX: + case TGSI_OPCODE_I64MIN: + case TGSI_OPCODE_I64MAX: { + dstTy = isSignedIntType(dstTy) ? TYPE_S32 : TYPE_U32; + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { + Value *flag = getSSA(1, FILE_FLAGS); + src0 = fetchSrc(0, c + 1); + src1 = fetchSrc(1, c + 1); + geni = mkOp2(op, dstTy, dst0[c + 1], src0, src1); + geni->subOp = NV50_IR_SUBOP_MINMAX_HIGH; + geni->setFlagsDef(1, flag); + + src0 = fetchSrc(0, c); + src1 = fetchSrc(1, c); + geni = mkOp2(op, TYPE_U32, dst0[c], src0, src1); + geni->subOp = NV50_IR_SUBOP_MINMAX_LOW; + geni->setFlagsSrc(2, flag); + + c++; + } + break; + } + case TGSI_OPCODE_U64SHL: + case TGSI_OPCODE_I64SHR: + case TGSI_OPCODE_U64SHR: + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { + src0 = getSSA(8); + Value *dst = getSSA(8), *tmp[2]; + tmp[0] = fetchSrc(0, c); + tmp[1] = fetchSrc(0, c + 1); + mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]); + // Theoretically src1 is a 64-bit value but in practice only the low + // bits matter. The IR expects this to be a 32-bit value. + src1 = fetchSrc(1, c); + mkOp2(op, dstTy, dst, src0, src1); + mkSplit(&dst0[c], 4, dst); + c++; + } + break; + case TGSI_OPCODE_U64ADD: + case TGSI_OPCODE_U64MUL: case TGSI_OPCODE_DADD: case TGSI_OPCODE_DMUL: + case TGSI_OPCODE_DDIV: case TGSI_OPCODE_DMAX: case TGSI_OPCODE_DMIN: FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { @@ -3369,6 +4060,7 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) } break; case TGSI_OPCODE_DMAD: + case TGSI_OPCODE_DFMA: FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { src0 = getSSA(8); src1 = getSSA(8); @@ -3421,13 +4113,30 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) c++; } break; + case TGSI_OPCODE_I64SSG: + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { + src0 = getSSA(8); + Value *tmp[2]; + tmp[0] = fetchSrc(0, c); + tmp[1] = fetchSrc(0, c + 1); + mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]); + + val0 = getScratch(); + val1 = getScratch(); + mkCmp(OP_SET, CC_GT, TYPE_U32, val0, TYPE_S64, src0, zero); + mkCmp(OP_SET, CC_LT, TYPE_U32, val1, TYPE_S64, src0, zero); + mkOp2(OP_SUB, TYPE_S32, dst0[c], val1, val0); + mkOp2(OP_SHR, TYPE_S32, dst0[c + 1], dst0[c], loadImm(0, 31)); + c++; + } + break; default: ERROR("unhandled TGSI opcode: %u\n", tgsi.getOpcode()); assert(0); break; } - if (tgsi.dstCount()) { + if (tgsi.dstCount() && tgsi.getOpcode() != TGSI_OPCODE_STORE) { for (c = 0; c < 4; ++c) { if (!dst0[c]) continue; @@ -3442,37 +4151,30 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) } void -Converter::handleUserClipPlanes() +Converter::exportOutputs() { - Value *res[8]; - int n, i, c; + if (info->io.alphaRefBase) { + for (unsigned int i = 0; i < info->numOutputs; ++i) { + if (info->out[i].sn != TGSI_SEMANTIC_COLOR || + info->out[i].si != 0) + continue; + const unsigned int c = 3; + if (!oData.exists(sub.cur->values, i, c)) + continue; + Value *val = oData.load(sub.cur->values, i, c, NULL); + if (!val) + continue; - for (c = 0; c < 4; ++c) { - for (i = 0; i < info->io.genUserClip; ++i) { - Symbol *sym = mkSymbol(FILE_MEMORY_CONST, info->io.auxCBSlot, - TYPE_F32, info->io.ucpBase + i * 16 + c * 4); - Value *ucp = mkLoadv(TYPE_F32, sym, NULL); - if (c == 0) - res[i] = mkOp2v(OP_MUL, TYPE_F32, getScratch(), clipVtx[c], ucp); - else - mkOp3(OP_MAD, TYPE_F32, res[i], clipVtx[c], ucp, res[i]); + Symbol *ref = mkSymbol(FILE_MEMORY_CONST, info->io.auxCBSlot, + TYPE_U32, info->io.alphaRefBase); + Value *pred = new_LValue(func, FILE_PREDICATE); + mkCmp(OP_SET, CC_TR, TYPE_U32, pred, TYPE_F32, val, + mkLoadv(TYPE_U32, ref, NULL)) + ->subOp = 1; + mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_NOT_P, pred); } } - const int first = info->numOutputs - (info->io.genUserClip + 3) / 4; - - for (i = 0; i < info->io.genUserClip; ++i) { - n = i / 4 + first; - c = i % 4; - Symbol *sym = - mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_F32, info->out[n].slot[c] * 4); - mkStore(OP_EXPORT, TYPE_F32, sym, NULL, res[i]); - } -} - -void -Converter::exportOutputs() -{ for (unsigned int i = 0; i < info->numOutputs; ++i) { for (unsigned int c = 0; c < 4; ++c) { if (!oData.exists(sub.cur->values, i, c)) @@ -3480,27 +4182,26 @@ Converter::exportOutputs() Symbol *sym = mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_F32, info->out[i].slot[c] * 4); Value *val = oData.load(sub.cur->values, i, c, NULL); - if (val) + if (val) { + if (info->out[i].sn == TGSI_SEMANTIC_POSITION) + mkOp1(OP_SAT, TYPE_F32, val, val); mkStore(OP_EXPORT, TYPE_F32, sym, NULL, val); + } } } } -Converter::Converter(Program *ir, const tgsi::Source *code) : BuildUtil(ir), +Converter::Converter(Program *ir, const tgsi::Source *code) : ConverterCommon(ir, code->info), code(code), tgsi(NULL), - tData(this), lData(this), aData(this), pData(this), oData(this) + tData(this), lData(this), aData(this), oData(this) { - info = code->info; - const unsigned tSize = code->fileSize(TGSI_FILE_TEMPORARY); - const unsigned pSize = code->fileSize(TGSI_FILE_PREDICATE); const unsigned aSize = code->fileSize(TGSI_FILE_ADDRESS); const unsigned oSize = code->fileSize(TGSI_FILE_OUTPUT); tData.setup(TGSI_FILE_TEMPORARY, 0, 0, tSize, 4, 4, FILE_GPR, 0); lData.setup(TGSI_FILE_TEMPORARY, 1, 0, tSize, 4, 4, FILE_MEMORY_LOCAL, 0); - pData.setup(TGSI_FILE_PREDICATE, 0, 0, pSize, 4, 4, FILE_PREDICATE, 0); aData.setup(TGSI_FILE_ADDRESS, 0, 0, aSize, 4, 4, FILE_GPR, 0); oData.setup(TGSI_FILE_OUTPUT, 0, 0, oSize, 4, 4, FILE_GPR, 0); @@ -3569,7 +4270,7 @@ Converter::BindArgumentsPass::visit(Function *f) } } - if (func == prog->main && prog->getType() != Program::TYPE_COMPUTE) + if (func == prog->main /* && prog->getType() != Program::TYPE_COMPUTE */) return true; updatePrototype(&BasicBlock::get(f->cfg.getRoot())->liveSet, &Function::buildLiveSets, &Function::ins);