X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fnouveau%2Fcodegen%2Fnv50_ir_from_tgsi.cpp;h=9d0ab336c751bbdb3dba76e27c8baca36e46b9ca;hb=e8d9be40cbaf4922adec34a2e8106c04e2c7660d;hp=3e4f26c6bb1ad13bac9451366ad870be01a8fac6;hpb=cd45d758ff87305ceecca899fe7325779bb6755b;p=mesa.git diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp index 3e4f26c6bb1..9d0ab336c75 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp @@ -20,6 +20,7 @@ * OTHER DEALINGS IN THE SOFTWARE. */ +#include "tgsi/tgsi_build.h" #include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_scan.h" #include "tgsi/tgsi_util.h" @@ -27,8 +28,8 @@ #include #include "codegen/nv50_ir.h" +#include "codegen/nv50_ir_from_common.h" #include "codegen/nv50_ir_util.h" -#include "codegen/nv50_ir_build_util.h" namespace tgsi { @@ -163,6 +164,12 @@ public: return SrcRegister(fdr->Indirect); } + struct tgsi_full_src_register asSrc() + { + assert(fdr); + return tgsi_full_src_register_from_dst(fdr); + } + int getArrayId() const { if (isIndirect(0)) @@ -216,6 +223,15 @@ public: nv50_ir::TexInstruction::Target getTexture(const Source *, int s) const; + const nv50_ir::TexInstruction::ImgFormatDesc *getImageFormat() const { + return &nv50_ir::TexInstruction::formatTable[ + translateImgFormat(insn->Memory.Format)]; + } + + nv50_ir::TexTarget getImageTarget() const { + return translateTexture(insn->Memory.Texture); + } + nv50_ir::CacheMode getCacheMode() const { if (!insn->Instruction.Memory) return nv50_ir::CACHE_CA; @@ -277,7 +293,6 @@ unsigned int Instruction::srcMask(unsigned int s) const case TGSI_OPCODE_DP3: return 0x7; case TGSI_OPCODE_DP4: - case TGSI_OPCODE_DPH: case TGSI_OPCODE_KILL_IF: /* WriteMask ignored */ return 0xf; case TGSI_OPCODE_DST: @@ -289,7 +304,6 @@ unsigned int Instruction::srcMask(unsigned int s) const case TGSI_OPCODE_POW: case TGSI_OPCODE_RCP: case TGSI_OPCODE_RSQ: - case TGSI_OPCODE_SCS: return 0x1; case TGSI_OPCODE_IF: case TGSI_OPCODE_UIF: @@ -305,6 +319,10 @@ unsigned int Instruction::srcMask(unsigned int s) const case TGSI_OPCODE_TXD: case TGSI_OPCODE_TXL: case TGSI_OPCODE_TXP: + case TGSI_OPCODE_TXF: + case TGSI_OPCODE_TG4: + case TGSI_OPCODE_TEX_LZ: + case TGSI_OPCODE_TXF_LZ: case TGSI_OPCODE_LODQ: { const struct tgsi_instruction_texture *tex = &insn->Texture; @@ -313,6 +331,8 @@ unsigned int Instruction::srcMask(unsigned int s) const mask = 0x7; if (insn->Instruction.Opcode != TGSI_OPCODE_TEX && + insn->Instruction.Opcode != TGSI_OPCODE_TEX_LZ && + insn->Instruction.Opcode != TGSI_OPCODE_TXF_LZ && insn->Instruction.Opcode != TGSI_OPCODE_TXD) mask |= 0x8; /* bias, lod or proj */ @@ -339,14 +359,8 @@ unsigned int Instruction::srcMask(unsigned int s) const } } return mask; - case TGSI_OPCODE_XPD: - { - unsigned int x = 0; - if (mask & 1) x |= 0x6; - if (mask & 2) x |= 0x5; - if (mask & 4) x |= 0x3; - return x; - } + case TGSI_OPCODE_TXQ: + return 1; case TGSI_OPCODE_D2I: case TGSI_OPCODE_D2U: case TGSI_OPCODE_D2F: @@ -354,6 +368,14 @@ unsigned int Instruction::srcMask(unsigned int s) const case TGSI_OPCODE_DSGE: case TGSI_OPCODE_DSEQ: case TGSI_OPCODE_DSNE: + case TGSI_OPCODE_U64SEQ: + case TGSI_OPCODE_U64SNE: + case TGSI_OPCODE_I64SLT: + case TGSI_OPCODE_U64SLT: + case TGSI_OPCODE_I64SGE: + case TGSI_OPCODE_U64SGE: + case TGSI_OPCODE_I642F: + case TGSI_OPCODE_U642F: switch (util_bitcount(mask)) { case 1: return 0x3; case 2: return 0xf; @@ -401,7 +423,6 @@ static nv50_ir::DataFile translateFile(uint file) case TGSI_FILE_OUTPUT: return nv50_ir::FILE_SHADER_OUTPUT; case TGSI_FILE_TEMPORARY: return nv50_ir::FILE_GPR; case TGSI_FILE_ADDRESS: return nv50_ir::FILE_ADDRESS; - case TGSI_FILE_PREDICATE: return nv50_ir::FILE_PREDICATE; case TGSI_FILE_IMMEDIATE: return nv50_ir::FILE_IMMEDIATE; case TGSI_FILE_SYSTEM_VALUE: return nv50_ir::FILE_SYSTEM_VALUE; case TGSI_FILE_BUFFER: return nv50_ir::FILE_MEMORY_BUFFER; @@ -439,6 +460,12 @@ static nv50_ir::SVSemantic translateSysVal(uint sysval) case TGSI_SEMANTIC_BASEINSTANCE: return nv50_ir::SV_BASEINSTANCE; case TGSI_SEMANTIC_DRAWID: return nv50_ir::SV_DRAWID; case TGSI_SEMANTIC_WORK_DIM: return nv50_ir::SV_WORK_DIM; + case TGSI_SEMANTIC_SUBGROUP_INVOCATION: return nv50_ir::SV_LANEID; + case TGSI_SEMANTIC_SUBGROUP_EQ_MASK: return nv50_ir::SV_LANEMASK_EQ; + case TGSI_SEMANTIC_SUBGROUP_LT_MASK: return nv50_ir::SV_LANEMASK_LT; + case TGSI_SEMANTIC_SUBGROUP_LE_MASK: return nv50_ir::SV_LANEMASK_LE; + case TGSI_SEMANTIC_SUBGROUP_GT_MASK: return nv50_ir::SV_LANEMASK_GT; + case TGSI_SEMANTIC_SUBGROUP_GE_MASK: return nv50_ir::SV_LANEMASK_GE; default: assert(0); return nv50_ir::SV_CLOCK; @@ -557,6 +584,7 @@ nv50_ir::DataType Instruction::inferSrcType() const case TGSI_OPCODE_SHL: case TGSI_OPCODE_U2F: case TGSI_OPCODE_U2D: + case TGSI_OPCODE_U2I64: case TGSI_OPCODE_UADD: case TGSI_OPCODE_UDIV: case TGSI_OPCODE_UMOD: @@ -587,6 +615,7 @@ nv50_ir::DataType Instruction::inferSrcType() const return nv50_ir::TYPE_U32; case TGSI_OPCODE_I2F: case TGSI_OPCODE_I2D: + case TGSI_OPCODE_I2I64: case TGSI_OPCODE_IDIV: case TGSI_OPCODE_IMUL_HI: case TGSI_OPCODE_IMAX: @@ -597,7 +626,6 @@ nv50_ir::DataType Instruction::inferSrcType() const case TGSI_OPCODE_ISHR: case TGSI_OPCODE_ISLT: case TGSI_OPCODE_ISSG: - case TGSI_OPCODE_SAD: // not sure about SAD, but no one has a float version case TGSI_OPCODE_MOD: case TGSI_OPCODE_UARL: case TGSI_OPCODE_ATOMIMIN: @@ -608,10 +636,13 @@ nv50_ir::DataType Instruction::inferSrcType() const case TGSI_OPCODE_D2F: case TGSI_OPCODE_D2I: case TGSI_OPCODE_D2U: + case TGSI_OPCODE_D2I64: + case TGSI_OPCODE_D2U64: case TGSI_OPCODE_DABS: case TGSI_OPCODE_DNEG: case TGSI_OPCODE_DADD: case TGSI_OPCODE_DMUL: + case TGSI_OPCODE_DDIV: case TGSI_OPCODE_DMAX: case TGSI_OPCODE_DMIN: case TGSI_OPCODE_DSLT: @@ -629,6 +660,34 @@ nv50_ir::DataType Instruction::inferSrcType() const case TGSI_OPCODE_DFLR: case TGSI_OPCODE_DROUND: return nv50_ir::TYPE_F64; + case TGSI_OPCODE_U64SEQ: + case TGSI_OPCODE_U64SNE: + case TGSI_OPCODE_U64SLT: + case TGSI_OPCODE_U64SGE: + case TGSI_OPCODE_U64MIN: + case TGSI_OPCODE_U64MAX: + case TGSI_OPCODE_U64ADD: + case TGSI_OPCODE_U64MUL: + case TGSI_OPCODE_U64SHL: + case TGSI_OPCODE_U64SHR: + case TGSI_OPCODE_U64DIV: + case TGSI_OPCODE_U64MOD: + case TGSI_OPCODE_U642F: + case TGSI_OPCODE_U642D: + return nv50_ir::TYPE_U64; + case TGSI_OPCODE_I64ABS: + case TGSI_OPCODE_I64SSG: + case TGSI_OPCODE_I64NEG: + case TGSI_OPCODE_I64SLT: + case TGSI_OPCODE_I64SGE: + case TGSI_OPCODE_I64MIN: + case TGSI_OPCODE_I64MAX: + case TGSI_OPCODE_I64SHR: + case TGSI_OPCODE_I64DIV: + case TGSI_OPCODE_I64MOD: + case TGSI_OPCODE_I642F: + case TGSI_OPCODE_I642D: + return nv50_ir::TYPE_S64; default: return nv50_ir::TYPE_F32; } @@ -649,17 +708,35 @@ nv50_ir::DataType Instruction::inferDstType() const case TGSI_OPCODE_DSGE: case TGSI_OPCODE_DSLT: case TGSI_OPCODE_DSNE: + case TGSI_OPCODE_I64SLT: + case TGSI_OPCODE_I64SGE: + case TGSI_OPCODE_U64SEQ: + case TGSI_OPCODE_U64SNE: + case TGSI_OPCODE_U64SLT: + case TGSI_OPCODE_U64SGE: case TGSI_OPCODE_PK2H: return nv50_ir::TYPE_U32; case TGSI_OPCODE_I2F: case TGSI_OPCODE_U2F: case TGSI_OPCODE_D2F: + case TGSI_OPCODE_I642F: + case TGSI_OPCODE_U642F: case TGSI_OPCODE_UP2H: return nv50_ir::TYPE_F32; case TGSI_OPCODE_I2D: case TGSI_OPCODE_U2D: case TGSI_OPCODE_F2D: + case TGSI_OPCODE_I642D: + case TGSI_OPCODE_U642D: return nv50_ir::TYPE_F64; + case TGSI_OPCODE_I2I64: + case TGSI_OPCODE_U2I64: + case TGSI_OPCODE_F2I64: + case TGSI_OPCODE_D2I64: + return nv50_ir::TYPE_S64; + case TGSI_OPCODE_F2U64: + case TGSI_OPCODE_D2U64: + return nv50_ir::TYPE_U64; default: return inferSrcType(); } @@ -675,6 +752,8 @@ nv50_ir::CondCode Instruction::getSetCond() const case TGSI_OPCODE_USLT: case TGSI_OPCODE_FSLT: case TGSI_OPCODE_DSLT: + case TGSI_OPCODE_I64SLT: + case TGSI_OPCODE_U64SLT: return CC_LT; case TGSI_OPCODE_SLE: return CC_LE; @@ -683,6 +762,8 @@ nv50_ir::CondCode Instruction::getSetCond() const case TGSI_OPCODE_USGE: case TGSI_OPCODE_FSGE: case TGSI_OPCODE_DSGE: + case TGSI_OPCODE_I64SGE: + case TGSI_OPCODE_U64SGE: return CC_GE; case TGSI_OPCODE_SGT: return CC_GT; @@ -690,10 +771,12 @@ nv50_ir::CondCode Instruction::getSetCond() const case TGSI_OPCODE_USEQ: case TGSI_OPCODE_FSEQ: case TGSI_OPCODE_DSEQ: + case TGSI_OPCODE_U64SEQ: return CC_EQ; case TGSI_OPCODE_SNE: case TGSI_OPCODE_FSNE: case TGSI_OPCODE_DSNE: + case TGSI_OPCODE_U64SNE: return CC_NEU; case TGSI_OPCODE_USNE: return CC_NE; @@ -723,7 +806,6 @@ static nv50_ir::operation translateOpcode(uint opcode) NV50_IR_OPCODE_CASE(SGE, SET); NV50_IR_OPCODE_CASE(MAD, MAD); NV50_IR_OPCODE_CASE(FMA, FMA); - NV50_IR_OPCODE_CASE(SUB, SUB); NV50_IR_OPCODE_CASE(FLR, FLOOR); NV50_IR_OPCODE_CASE(ROUND, CVT); @@ -731,8 +813,6 @@ static nv50_ir::operation translateOpcode(uint opcode) NV50_IR_OPCODE_CASE(LG2, LG2); NV50_IR_OPCODE_CASE(POW, POW); - NV50_IR_OPCODE_CASE(ABS, ABS); - NV50_IR_OPCODE_CASE(COS, COS); NV50_IR_OPCODE_CASE(DDX, DFDX); NV50_IR_OPCODE_CASE(DDX_FINE, DFDX); @@ -758,6 +838,7 @@ static nv50_ir::operation translateOpcode(uint opcode) NV50_IR_OPCODE_CASE(DIV, DIV); NV50_IR_OPCODE_CASE(TXL, TXL); + NV50_IR_OPCODE_CASE(TEX_LZ, TXL); NV50_IR_OPCODE_CASE(CEIL, CEIL); NV50_IR_OPCODE_CASE(I2F, CVT); @@ -769,8 +850,8 @@ static nv50_ir::operation translateOpcode(uint opcode) NV50_IR_OPCODE_CASE(OR, OR); NV50_IR_OPCODE_CASE(MOD, MOD); NV50_IR_OPCODE_CASE(XOR, XOR); - NV50_IR_OPCODE_CASE(SAD, SAD); NV50_IR_OPCODE_CASE(TXF, TXF); + NV50_IR_OPCODE_CASE(TXF_LZ, TXF); NV50_IR_OPCODE_CASE(TXQ, TXQ); NV50_IR_OPCODE_CASE(TXQS, TXQ); NV50_IR_OPCODE_CASE(TG4, TXG); @@ -813,6 +894,7 @@ static nv50_ir::operation translateOpcode(uint opcode) NV50_IR_OPCODE_CASE(DNEG, NEG); NV50_IR_OPCODE_CASE(DADD, ADD); NV50_IR_OPCODE_CASE(DMUL, MUL); + NV50_IR_OPCODE_CASE(DDIV, DIV); NV50_IR_OPCODE_CASE(DMAX, MAX); NV50_IR_OPCODE_CASE(DMIN, MIN); NV50_IR_OPCODE_CASE(DSLT, SET); @@ -833,6 +915,35 @@ static nv50_ir::operation translateOpcode(uint opcode) NV50_IR_OPCODE_CASE(DFLR, FLOOR); NV50_IR_OPCODE_CASE(DROUND, CVT); + NV50_IR_OPCODE_CASE(U64SEQ, SET); + NV50_IR_OPCODE_CASE(U64SNE, SET); + NV50_IR_OPCODE_CASE(U64SLT, SET); + NV50_IR_OPCODE_CASE(U64SGE, SET); + NV50_IR_OPCODE_CASE(I64SLT, SET); + NV50_IR_OPCODE_CASE(I64SGE, SET); + NV50_IR_OPCODE_CASE(I2I64, CVT); + NV50_IR_OPCODE_CASE(U2I64, CVT); + NV50_IR_OPCODE_CASE(F2I64, CVT); + NV50_IR_OPCODE_CASE(F2U64, CVT); + NV50_IR_OPCODE_CASE(D2I64, CVT); + NV50_IR_OPCODE_CASE(D2U64, CVT); + NV50_IR_OPCODE_CASE(I642F, CVT); + NV50_IR_OPCODE_CASE(U642F, CVT); + NV50_IR_OPCODE_CASE(I642D, CVT); + NV50_IR_OPCODE_CASE(U642D, CVT); + + NV50_IR_OPCODE_CASE(I64MIN, MIN); + NV50_IR_OPCODE_CASE(U64MIN, MIN); + NV50_IR_OPCODE_CASE(I64MAX, MAX); + NV50_IR_OPCODE_CASE(U64MAX, MAX); + NV50_IR_OPCODE_CASE(I64ABS, ABS); + NV50_IR_OPCODE_CASE(I64NEG, NEG); + NV50_IR_OPCODE_CASE(U64ADD, ADD); + NV50_IR_OPCODE_CASE(U64MUL, MUL); + NV50_IR_OPCODE_CASE(U64SHL, SHL); + NV50_IR_OPCODE_CASE(I64SHR, SHR); + NV50_IR_OPCODE_CASE(U64SHR, SHR); + NV50_IR_OPCODE_CASE(IMUL_HI, MUL); NV50_IR_OPCODE_CASE(UMUL_HI, MUL); @@ -857,6 +968,7 @@ static nv50_ir::operation translateOpcode(uint opcode) NV50_IR_OPCODE_CASE(ATOMUMAX, ATOM); NV50_IR_OPCODE_CASE(ATOMIMIN, ATOM); NV50_IR_OPCODE_CASE(ATOMIMAX, ATOM); + NV50_IR_OPCODE_CASE(ATOMFADD, ATOM); NV50_IR_OPCODE_CASE(TEX2, TEX); NV50_IR_OPCODE_CASE(TXB2, TXB); @@ -875,6 +987,10 @@ static nv50_ir::operation translateOpcode(uint opcode) NV50_IR_OPCODE_CASE(VOTE_ANY, VOTE); NV50_IR_OPCODE_CASE(VOTE_EQ, VOTE); + NV50_IR_OPCODE_CASE(BALLOT, VOTE); + NV50_IR_OPCODE_CASE(READ_INVOC, SHFL); + NV50_IR_OPCODE_CASE(READ_FIRST, SHFL); + NV50_IR_OPCODE_CASE(END, EXIT); default: @@ -885,9 +1001,6 @@ static nv50_ir::operation translateOpcode(uint opcode) static uint16_t opcodeToSubOp(uint opcode) { switch (opcode) { - case TGSI_OPCODE_LFENCE: return NV50_IR_SUBOP_MEMBAR(L, GL); - case TGSI_OPCODE_SFENCE: return NV50_IR_SUBOP_MEMBAR(S, GL); - case TGSI_OPCODE_MFENCE: return NV50_IR_SUBOP_MEMBAR(M, GL); case TGSI_OPCODE_ATOMUADD: return NV50_IR_SUBOP_ATOM_ADD; case TGSI_OPCODE_ATOMXCHG: return NV50_IR_SUBOP_ATOM_EXCH; case TGSI_OPCODE_ATOMCAS: return NV50_IR_SUBOP_ATOM_CAS; @@ -898,6 +1011,7 @@ static uint16_t opcodeToSubOp(uint opcode) case TGSI_OPCODE_ATOMIMIN: return NV50_IR_SUBOP_ATOM_MIN; case TGSI_OPCODE_ATOMUMAX: return NV50_IR_SUBOP_ATOM_MAX; case TGSI_OPCODE_ATOMIMAX: return NV50_IR_SUBOP_ATOM_MAX; + case TGSI_OPCODE_ATOMFADD: return NV50_IR_SUBOP_ATOM_ADD; case TGSI_OPCODE_IMUL_HI: case TGSI_OPCODE_UMUL_HI: return NV50_IR_SUBOP_MUL_HIGH; @@ -968,19 +1082,13 @@ public: std::vector resources; */ - struct Image { - uint8_t target; // TGSI_TEXTURE_* - bool raw; - uint8_t slot; - uint16_t format; // PIPE_FORMAT_* - }; - std::vector images; - struct MemoryFile { uint8_t mem_type; // TGSI_MEMORY_TYPE_* }; std::vector memoryFiles; + std::vector bufferAtomics; + private: int inferSysValDirection(unsigned sn) const; bool scanDeclaration(const struct tgsi_full_declaration *); @@ -1029,9 +1137,9 @@ bool Source::scanSource() textureViews.resize(scan.file_max[TGSI_FILE_SAMPLER_VIEW] + 1); //resources.resize(scan.file_max[TGSI_FILE_RESOURCE] + 1); - images.resize(scan.file_max[TGSI_FILE_IMAGE] + 1); tempArrayId.resize(scan.file_max[TGSI_FILE_TEMPORARY] + 1); memoryFiles.resize(scan.file_max[TGSI_FILE_MEMORY] + 1); + bufferAtomics.resize(scan.file_max[TGSI_FILE_BUFFER] + 1); info->immd.bufSize = 0; @@ -1048,7 +1156,6 @@ bool Source::scanSource() } info->io.viewportId = -1; - info->prop.cp.numThreads = 1; info->immd.data = (uint32_t *)MALLOC(scan.immediate_count * 16); info->immd.type = (ubyte *)MALLOC(scan.immediate_count * sizeof(ubyte)); @@ -1151,9 +1258,13 @@ void Source::scanProperty(const struct tgsi_full_property *prop) info->prop.tp.outputPrim = PIPE_PRIM_TRIANGLES; /* anything but points */ break; case TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH: + info->prop.cp.numThreads[0] = prop->u[0].Data; + break; case TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT: + info->prop.cp.numThreads[1] = prop->u[0].Data; + break; case TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH: - info->prop.cp.numThreads *= prop->u[0].Data; + info->prop.cp.numThreads[2] = prop->u[0].Data; break; case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED: info->io.clipDistances = prop->u[0].Data; @@ -1167,6 +1278,12 @@ void Source::scanProperty(const struct tgsi_full_property *prop) case TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL: info->prop.fp.earlyFragTests = prop->u[0].Data; break; + case TGSI_PROPERTY_FS_POST_DEPTH_COVERAGE: + info->prop.fp.postDepthCoverage = prop->u[0].Data; + break; + case TGSI_PROPERTY_MUL_ZERO_WINS: + info->io.mul_zero_wins = prop->u[0].Data; + break; default: INFO("unhandled TGSI property %d\n", prop->Property.PropertyName); break; @@ -1353,14 +1470,6 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl) } break; */ - case TGSI_FILE_IMAGE: - for (i = first; i <= last; ++i) { - images[i].target = decl->Image.Resource; - images[i].raw = decl->Image.Raw; - images[i].format = decl->Image.Format; - images[i].slot = i; - } - break; case TGSI_FILE_SAMPLER_VIEW: for (i = first; i <= last; ++i) textureViews[i].target = decl->SamplerView.Resource; @@ -1377,12 +1486,15 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl) tempArrayInfo.insert(std::make_pair(arrayId, std::make_pair( first, last - first + 1))); break; + case TGSI_FILE_BUFFER: + for (i = first; i <= last; ++i) + bufferAtomics[i] = decl->Declaration.Atomic; + break; case TGSI_FILE_ADDRESS: case TGSI_FILE_CONSTANT: case TGSI_FILE_IMMEDIATE: - case TGSI_FILE_PREDICATE: case TGSI_FILE_SAMPLER: - case TGSI_FILE_BUFFER: + case TGSI_FILE_IMAGE: break; default: ERROR("unhandled TGSI_FILE %d\n", decl->Declaration.File); @@ -1406,13 +1518,6 @@ void Source::scanInstructionSrc(const Instruction& insn, if (src.isIndirect(0)) indirectTempArrays.insert(src.getArrayId()); } else - if (src.getFile() == TGSI_FILE_BUFFER || - src.getFile() == TGSI_FILE_IMAGE || - (src.getFile() == TGSI_FILE_MEMORY && - memoryFiles[src.getIndex(0)].mem_type == TGSI_MEMORY_TYPE_GLOBAL)) { - info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ? - 0x1 : 0x2; - } else if (src.getFile() == TGSI_FILE_OUTPUT) { if (src.isIndirect(0)) { // We don't know which one is accessed, just mark everything for @@ -1423,6 +1528,10 @@ void Source::scanInstructionSrc(const Instruction& insn, info->out[src.getIndex(0)].oread = 1; } } + if (src.getFile() == TGSI_FILE_SYSTEM_VALUE) { + if (info->sv[src.getIndex(0)].sn == TGSI_SEMANTIC_SAMPLEPOS) + info->prop.fp.readsSampleLocations = true; + } if (src.getFile() != TGSI_FILE_INPUT) return; @@ -1460,9 +1569,20 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst) if (insn.getOpcode() == TGSI_OPCODE_BARRIER) info->numBarriers = 1; + if (insn.getOpcode() == TGSI_OPCODE_FBFETCH) + info->prop.fp.readsFramebuffer = true; + + if (insn.getOpcode() == TGSI_OPCODE_INTERP_SAMPLE) + info->prop.fp.readsSampleLocations = true; + if (insn.dstCount()) { Instruction::DstRegister dst = insn.getDst(0); + if (insn.getOpcode() == TGSI_OPCODE_STORE && + dst.getFile() != TGSI_FILE_MEMORY) { + info->io.globalAccess |= 0x2; + } + if (dst.getFile() == TGSI_FILE_OUTPUT) { if (dst.isIndirect(0)) for (unsigned i = 0; i < info->numOutputs; ++i) @@ -1492,6 +1612,30 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst) } } + if (insn.srcCount() && ( + insn.getSrc(0).getFile() != TGSI_FILE_MEMORY || + memoryFiles[insn.getSrc(0).getIndex(0)].mem_type == + TGSI_MEMORY_TYPE_GLOBAL)) { + switch (insn.getOpcode()) { + case TGSI_OPCODE_ATOMUADD: + case TGSI_OPCODE_ATOMXCHG: + case TGSI_OPCODE_ATOMCAS: + case TGSI_OPCODE_ATOMAND: + case TGSI_OPCODE_ATOMOR: + case TGSI_OPCODE_ATOMXOR: + case TGSI_OPCODE_ATOMUMIN: + case TGSI_OPCODE_ATOMIMIN: + case TGSI_OPCODE_ATOMUMAX: + case TGSI_OPCODE_ATOMIMAX: + case TGSI_OPCODE_ATOMFADD: + case TGSI_OPCODE_LOAD: + info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ? + 0x1 : 0x2; + break; + } + } + + for (unsigned s = 0; s < insn.srcCount(); ++s) scanInstructionSrc(insn, insn.getSrc(s), insn.srcMask(s)); @@ -1527,7 +1671,7 @@ namespace { using namespace nv50_ir; -class Converter : public BuildUtil +class Converter : public ConverterCommon { public: Converter(Program *, const tgsi::Source *); @@ -1536,18 +1680,12 @@ public: bool run(); private: - struct Subroutine - { - Subroutine(Function *f) : f(f) { } - Function *f; - ValueMap values; - }; - Value *shiftAddress(Value *); Value *getVertexBase(int s); Value *getOutputBase(int s); DataArray *getArrayForFile(unsigned file, int idx); Value *fetchSrc(int s, int c); + Value *fetchDst(int d, int c); Value *acquireDst(int d, int c); void storeDst(int d, int c, Value *); @@ -1562,10 +1700,10 @@ private: Symbol *srcToSym(tgsi::Instruction::SrcRegister, int c); Symbol *dstToSym(tgsi::Instruction::DstRegister, int c); + bool isSubGroupMask(uint8_t semantic); + bool handleInstruction(const struct tgsi_full_instruction *); void exportOutputs(); - inline Subroutine *getSubroutine(unsigned ip); - inline Subroutine *getSubroutine(Function *); inline bool isEndOfSubroutine(uint ip); void loadProjTexCoords(Value *dst[4], Value *src[4], unsigned int mask); @@ -1575,11 +1713,11 @@ private: void handleTEX(Value *dst0[4], int R, int S, int L, int C, int Dx, int Dy); void handleTXF(Value *dst0[4], int R, int L_M); void handleTXQ(Value *dst0[4], enum TexQuery, int R); + void handleFBFETCH(Value *dst0[4]); void handleLIT(Value *dst0[4]); - void handleUserClipPlanes(); // Symbol *getResourceBase(int r); - void getImageCoords(std::vector&, int r, int s); + void getImageCoords(std::vector&, int s); void handleLOAD(Value *dst0[4]); void handleSTORE(); @@ -1587,8 +1725,6 @@ private: void handleINTERP(Value *dst0[4]); - uint8_t translateInterpMode(const struct nv50_ir_varying *var, - operation& op); Value *interpolate(tgsi::Instruction::SrcRegister, int c, Value *ptr); void insertConvergenceOps(BasicBlock *conv, BasicBlock *fork); @@ -1620,12 +1756,6 @@ private: private: const tgsi::Source *code; - const struct nv50_ir_prog_info *info; - - struct { - std::map map; - Subroutine *cur; - } sub; uint ip; // instruction pointer @@ -1637,18 +1767,13 @@ private: DataArray tData; // TGSI_FILE_TEMPORARY DataArray lData; // TGSI_FILE_TEMPORARY, for indirect arrays DataArray aData; // TGSI_FILE_ADDRESS - DataArray pData; // TGSI_FILE_PREDICATE DataArray oData; // TGSI_FILE_OUTPUT (if outputs in registers) Value *zero; - Value *fragCoord[4]; - Value *clipVtx[4]; Value *vtxBase[5]; // base address of vertex in primitive (for TP/GP) uint8_t vtxBaseValid; - Value *outBase; // base address of vertex out patch (for TCP) - Stack condBBs; // fork BB, then else clause BB Stack joinBBs; // fork BB, for inserting join ops on ENDIF Stack loopBBs; // loop headers @@ -1723,29 +1848,6 @@ Converter::makeSym(uint tgsiFile, int fileIdx, int idx, int c, uint32_t address) return sym; } -uint8_t -Converter::translateInterpMode(const struct nv50_ir_varying *var, operation& op) -{ - uint8_t mode = NV50_IR_INTERP_PERSPECTIVE; - - if (var->flat) - mode = NV50_IR_INTERP_FLAT; - else - if (var->linear) - mode = NV50_IR_INTERP_LINEAR; - else - if (var->sc) - mode = NV50_IR_INTERP_SC; - - op = (mode == NV50_IR_INTERP_PERSPECTIVE || mode == NV50_IR_INTERP_SC) - ? OP_PINTERP : OP_LINTERP; - - if (var->centroid) - mode |= NV50_IR_INTERP_CENTROID; - - return mode; -} - Value * Converter::interpolate(tgsi::Instruction::SrcRegister src, int c, Value *ptr) { @@ -1854,14 +1956,53 @@ Converter::fetchSrc(int s, int c) return applySrcMod(res, s, c); } +Value * +Converter::fetchDst(int d, int c) +{ + Value *res; + Value *ptr = NULL, *dimRel = NULL; + + tgsi::Instruction::DstRegister dst = tgsi.getDst(d); + + if (dst.isIndirect(0)) + ptr = fetchSrc(dst.getIndirect(0), 0, NULL); + + if (dst.is2D()) { + switch (dst.getFile()) { + case TGSI_FILE_OUTPUT: + assert(0); // TODO + dimRel = NULL; + break; + case TGSI_FILE_INPUT: + assert(0); // TODO + dimRel = NULL; + break; + case TGSI_FILE_CONSTANT: + // on NVC0, this is valid and c{I+J}[k] == cI[(J << 16) + k] + if (dst.isIndirect(1)) + dimRel = fetchSrc(dst.getIndirect(1), 0, 0); + break; + default: + break; + } + } + + struct tgsi_full_src_register fsr = dst.asSrc(); + tgsi::Instruction::SrcRegister src(&fsr); + res = fetchSrc(src, c, ptr); + + if (dimRel) + res->getInsn()->setIndirect(0, 1, dimRel); + + return res; +} + Converter::DataArray * Converter::getArrayForFile(unsigned file, int idx) { switch (file) { case TGSI_FILE_TEMPORARY: return idx == 0 ? &tData : &lData; - case TGSI_FILE_PREDICATE: - return &pData; case TGSI_FILE_ADDRESS: return &aData; case TGSI_FILE_OUTPUT: @@ -1893,6 +2034,21 @@ Converter::adjustTempIndex(int arrayId, int &idx, int &idx2d) const idx += it->second; } +bool +Converter::isSubGroupMask(uint8_t semantic) +{ + switch (semantic) { + case TGSI_SEMANTIC_SUBGROUP_EQ_MASK: + case TGSI_SEMANTIC_SUBGROUP_LT_MASK: + case TGSI_SEMANTIC_SUBGROUP_LE_MASK: + case TGSI_SEMANTIC_SUBGROUP_GT_MASK: + case TGSI_SEMANTIC_SUBGROUP_GE_MASK: + return true; + default: + return false; + } +} + Value * Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr) { @@ -1935,6 +2091,13 @@ Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr) return ld->getDef(0); case TGSI_FILE_SYSTEM_VALUE: assert(!ptr); + if (info->sv[idx].sn == TGSI_SEMANTIC_THREAD_ID && + info->prop.cp.numThreads[swz] == 1) + return loadImm(NULL, 0u); + if (isSubGroupMask(info->sv[idx].sn) && swz > 0) + return loadImm(NULL, 0u); + if (info->sv[idx].sn == TGSI_SEMANTIC_SUBGROUP_SIZE) + return loadImm(NULL, 32u); ld = mkOp1(OP_RDSV, TYPE_U32, getSSA(), srcToSym(src, c)); ld->perPatch = info->sv[idx].patch; return ld->getDef(0); @@ -2019,6 +2182,7 @@ Converter::storeDst(const tgsi::Instruction::DstRegister dst, int c, /* Save the viewport index into a scratch register so that it can be exported at EMIT time */ if (info->out[idx].sn == TGSI_SEMANTIC_VIEWPORT_INDEX && + prog->getType() == Program::TYPE_GEOMETRY && viewport != NULL) mkOp1(OP_MOV, TYPE_U32, viewport, val); else @@ -2027,7 +2191,6 @@ Converter::storeDst(const tgsi::Instruction::DstRegister dst, int c, } } else if (f == TGSI_FILE_TEMPORARY || - f == TGSI_FILE_PREDICATE || f == TGSI_FILE_ADDRESS || f == TGSI_FILE_OUTPUT) { if (f == TGSI_FILE_TEMPORARY) { @@ -2055,12 +2218,14 @@ Converter::buildDot(int dim) Value *src0 = fetchSrc(0, 0), *src1 = fetchSrc(1, 0); Value *dotp = getScratch(); - mkOp2(OP_MUL, TYPE_F32, dotp, src0, src1); + mkOp2(OP_MUL, TYPE_F32, dotp, src0, src1) + ->dnz = info->io.mul_zero_wins; for (int c = 1; c < dim; ++c) { src0 = fetchSrc(0, c); src1 = fetchSrc(1, c); - mkOp3(OP_MAD, TYPE_F32, dotp, src0, src1, dotp); + mkOp3(OP_MAD, TYPE_F32, dotp, src0, src1, dotp) + ->dnz = info->io.mul_zero_wins; } return dotp; } @@ -2082,6 +2247,16 @@ Converter::setTexRS(TexInstruction *tex, unsigned int& s, int R, int S) { unsigned rIdx = 0, sIdx = 0; + if (R >= 0 && tgsi.getSrc(R).getFile() != TGSI_FILE_SAMPLER) { + // This is the bindless case. We have to get the actual value and pass + // it in. This will be the complete handle. + tex->tex.rIndirectSrc = s; + tex->setSrc(s++, fetchSrc(R, 0)); + tex->setTexture(tgsi.getTexture(code, R), 0xff, 0x1f); + tex->tex.bindless = true; + return; + } + if (R >= 0) rIdx = tgsi.getSrc(R).getIndex(0); if (S >= 0) @@ -2176,7 +2351,9 @@ Converter::handleTEX(Value *dst[4], int R, int S, int L, int C, int Dx, int Dy) for (s = 0; s < tgt.getArgCount(); ++s) arg[s] = src[s] = fetchSrc(0, s); - if (texi->op == OP_TXL || texi->op == OP_TXB) + if (tgsi.getOpcode() == TGSI_OPCODE_TEX_LZ) + lod = loadImm(NULL, 0); + else if (texi->op == OP_TXL || texi->op == OP_TXB) lod = fetchSrc(L >> 4, L & 3); if (C == 0x0f) @@ -2227,6 +2404,11 @@ Converter::handleTEX(Value *dst[4], int R, int S, int L, int C, int Dx, int Dy) if (tgsi.getOpcode() == TGSI_OPCODE_SAMPLE_C_LZ) texi->tex.levelZero = true; + if (prog->getType() != Program::TYPE_FRAGMENT && + (tgsi.getOpcode() == TGSI_OPCODE_TEX || + tgsi.getOpcode() == TGSI_OPCODE_TEX2 || + tgsi.getOpcode() == TGSI_OPCODE_TXP)) + texi->tex.levelZero = true; if (tgsi.getOpcode() == TGSI_OPCODE_TG4 && !tgt.isShadow()) texi->tex.gatherComp = tgsi.getSrc(1).getValueU32(0, info); @@ -2263,7 +2445,10 @@ Converter::handleTXF(Value *dst[4], int R, int L_M) } for (c = 0; c < (texi->tex.target.getArgCount() - ms); ++c) texi->setSrc(c, fetchSrc(0, c)); - texi->setSrc(c++, fetchSrc(L_M >> 4, L_M & 3)); // lod or ms + if (!ms && tgsi.getOpcode() == TGSI_OPCODE_TXF_LZ) + texi->setSrc(c++, loadImm(NULL, 0)); + else + texi->setSrc(c++, fetchSrc(L_M >> 4, L_M & 3)); // lod or ms setTexRS(texi, c, R, -1); @@ -2278,6 +2463,40 @@ Converter::handleTXF(Value *dst[4], int R, int L_M) bb->insertTail(texi); } +void +Converter::handleFBFETCH(Value *dst[4]) +{ + TexInstruction *texi = new_TexInstruction(func, OP_TXF); + unsigned int c, d; + + texi->tex.target = TEX_TARGET_2D_MS_ARRAY; + texi->tex.levelZero = 1; + texi->tex.useOffsets = 0; + + for (c = 0, d = 0; c < 4; ++c) { + if (dst[c]) { + texi->setDef(d++, dst[c]); + texi->tex.mask |= 1 << c; + } + } + + Value *x = mkOp1v(OP_RDSV, TYPE_F32, getScratch(), mkSysVal(SV_POSITION, 0)); + Value *y = mkOp1v(OP_RDSV, TYPE_F32, getScratch(), mkSysVal(SV_POSITION, 1)); + Value *z = mkOp1v(OP_RDSV, TYPE_U32, getScratch(), mkSysVal(SV_LAYER, 0)); + Value *ms = mkOp1v(OP_RDSV, TYPE_U32, getScratch(), mkSysVal(SV_SAMPLE_INDEX, 0)); + + mkCvt(OP_CVT, TYPE_U32, x, TYPE_F32, x)->rnd = ROUND_Z; + mkCvt(OP_CVT, TYPE_U32, y, TYPE_F32, y)->rnd = ROUND_Z; + texi->setSrc(0, x); + texi->setSrc(1, y); + texi->setSrc(2, z); + texi->setSrc(3, ms); + + texi->tex.r = texi->tex.s = -1; + + bb->insertTail(texi); +} + void Converter::handleLIT(Value *dst0[4]) { @@ -2385,7 +2604,7 @@ Converter::getResourceCoords(std::vector &coords, int r, int s) coords[0] = mkOp1v(OP_MOV, TYPE_U32, getScratch(4, FILE_ADDRESS), coords[0]); } -*/ + static inline int partitionLoadStore(uint8_t comp[2], uint8_t size[2], uint8_t mask) { @@ -2410,25 +2629,12 @@ partitionLoadStore(uint8_t comp[2], uint8_t size[2], uint8_t mask) } return n + 1; } - -static inline nv50_ir::TexTarget -getImageTarget(const tgsi::Source *code, int r) -{ - return tgsi::translateTexture(code->images.at(r).target); -} - -static inline const nv50_ir::TexInstruction::ImgFormatDesc * -getImageFormat(const tgsi::Source *code, int r) -{ - return &nv50_ir::TexInstruction::formatTable[ - tgsi::translateImgFormat(code->images.at(r).format)]; -} - +*/ void -Converter::getImageCoords(std::vector &coords, int r, int s) +Converter::getImageCoords(std::vector &coords, int s) { TexInstruction::Target t = - TexInstruction::Target(getImageTarget(code, r)); + TexInstruction::Target(tgsi.getImageTarget()); const int arg = t.getDim() + (t.isArray() || t.isCube()); for (int c = 0; c < arg; ++c) @@ -2446,6 +2652,10 @@ Converter::handleLOAD(Value *dst0[4]) const int r = tgsi.getSrc(0).getIndex(0); int c; std::vector off, src, ldv, def; + Value *ind = NULL; + + if (tgsi.getSrc(0).isIndirect(0)) + ind = fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0); switch (tgsi.getSrc(0).getFile()) { case TGSI_FILE_BUFFER: @@ -2471,15 +2681,17 @@ Converter::handleLOAD(Value *dst0[4]) } Instruction *ld = mkLoad(TYPE_U32, dst0[c], sym, off); - ld->cache = tgsi.getCacheMode(); - if (tgsi.getSrc(0).isIndirect(0)) - ld->setIndirect(0, 1, fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0)); + if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER && + code->bufferAtomics[r]) + ld->cache = nv50_ir::CACHE_CG; + else + ld->cache = tgsi.getCacheMode(); + if (ind) + ld->setIndirect(0, 1, ind); } break; - case TGSI_FILE_IMAGE: { - assert(!code->images[r].raw); - - getImageCoords(off, r, 1); + default: { + getImageCoords(off, 1); def.resize(4); for (c = 0; c < 4; ++c) { @@ -2489,23 +2701,28 @@ Converter::handleLOAD(Value *dst0[4]) def[c] = dst0[c]; } + bool bindless = tgsi.getSrc(0).getFile() != TGSI_FILE_IMAGE; + if (bindless) + ind = fetchSrc(0, 0); + TexInstruction *ld = - mkTex(OP_SULDP, getImageTarget(code, r), code->images[r].slot, 0, - def, off); + mkTex(OP_SULDP, tgsi.getImageTarget(), 0, 0, def, off); ld->tex.mask = tgsi.getDst(0).getMask(); - ld->tex.format = getImageFormat(code, r); + ld->tex.format = tgsi.getImageFormat(); ld->cache = tgsi.getCacheMode(); - if (tgsi.getSrc(0).isIndirect(0)) - ld->setIndirectR(fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, NULL)); + ld->tex.bindless = bindless; + if (!bindless) + ld->tex.r = r; + if (ind) + ld->setIndirectR(ind); FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) if (dst0[c] != def[c]) mkMov(dst0[c], def[tgsi.getSrc(0).getSwizzle(c)]); - } break; - default: - assert(!"Unsupported srcFile for LOAD"); } + } + /* Keep this around for now as reference when adding img support getResourceCoords(off, r, 1); @@ -2584,6 +2801,10 @@ Converter::handleSTORE() const int r = tgsi.getDst(0).getIndex(0); int c; std::vector off, src, dummy; + Value *ind = NULL; + + if (tgsi.getDst(0).isIndirect(0)) + ind = fetchSrc(tgsi.getDst(0).getIndirect(0), 0, 0); switch (tgsi.getDst(0).getFile()) { case TGSI_FILE_BUFFER: @@ -2606,31 +2827,34 @@ Converter::handleSTORE() Instruction *st = mkStore(OP_STORE, TYPE_U32, sym, off, fetchSrc(1, c)); st->cache = tgsi.getCacheMode(); - if (tgsi.getDst(0).isIndirect(0)) - st->setIndirect(0, 1, fetchSrc(tgsi.getDst(0).getIndirect(0), 0, 0)); + if (ind) + st->setIndirect(0, 1, ind); } break; - case TGSI_FILE_IMAGE: { - assert(!code->images[r].raw); - - getImageCoords(off, r, 0); + default: { + getImageCoords(off, 0); src = off; FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) src.push_back(fetchSrc(1, c)); + bool bindless = tgsi.getDst(0).getFile() != TGSI_FILE_IMAGE; + if (bindless) + ind = fetchDst(0, 0); + TexInstruction *st = - mkTex(OP_SUSTP, getImageTarget(code, r), code->images[r].slot, - 0, dummy, src); + mkTex(OP_SUSTP, tgsi.getImageTarget(), 0, 0, dummy, src); st->tex.mask = tgsi.getDst(0).getMask(); - st->tex.format = getImageFormat(code, r); + st->tex.format = tgsi.getImageFormat(); st->cache = tgsi.getCacheMode(); - if (tgsi.getDst(0).isIndirect(0)) - st->setIndirectR(fetchSrc(tgsi.getDst(0).getIndirect(0), 0, NULL)); - } + st->tex.bindless = bindless; + if (!bindless) + st->tex.r = r; + if (ind) + st->setIndirectR(ind); + break; - default: - assert(!"Unsupported dstFile for STORE"); + } } /* Keep this around for now as reference when adding img support @@ -2695,6 +2919,10 @@ Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp) std::vector srcv; std::vector defv; LValue *dst = getScratch(); + Value *ind = NULL; + + if (tgsi.getSrc(0).isIndirect(0)) + ind = fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0); switch (tgsi.getSrc(0).getFile()) { case TGSI_FILE_BUFFER: @@ -2704,55 +2932,56 @@ Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp) continue; Instruction *insn; - Value *off = fetchSrc(1, c), *off2 = NULL; + Value *off = fetchSrc(1, c); Value *sym; if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE) sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c, tgsi.getSrc(1).getValueU32(c, info)); else sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c, 0); - if (tgsi.getSrc(0).isIndirect(0)) - off2 = fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0); if (subOp == NV50_IR_SUBOP_ATOM_CAS) insn = mkOp3(OP_ATOM, ty, dst, sym, fetchSrc(2, c), fetchSrc(3, c)); else insn = mkOp2(OP_ATOM, ty, dst, sym, fetchSrc(2, c)); if (tgsi.getSrc(1).getFile() != TGSI_FILE_IMMEDIATE) insn->setIndirect(0, 0, off); - if (off2) - insn->setIndirect(0, 1, off2); + if (ind) + insn->setIndirect(0, 1, ind); insn->subOp = subOp; } for (int c = 0; c < 4; ++c) if (dst0[c]) dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov break; - case TGSI_FILE_IMAGE: { - assert(!code->images[r].raw); - - getImageCoords(srcv, r, 1); + default: { + getImageCoords(srcv, 1); defv.push_back(dst); srcv.push_back(fetchSrc(2, 0)); if (subOp == NV50_IR_SUBOP_ATOM_CAS) srcv.push_back(fetchSrc(3, 0)); - TexInstruction *tex = mkTex(OP_SUREDP, getImageTarget(code, r), - code->images[r].slot, 0, defv, srcv); + bool bindless = tgsi.getSrc(0).getFile() != TGSI_FILE_IMAGE; + if (bindless) + ind = fetchSrc(0, 0); + + TexInstruction *tex = mkTex(OP_SUREDP, tgsi.getImageTarget(), + 0, 0, defv, srcv); tex->subOp = subOp; tex->tex.mask = 1; - tex->tex.format = getImageFormat(code, r); + tex->tex.format = tgsi.getImageFormat(); tex->setType(ty); - if (tgsi.getSrc(0).isIndirect(0)) - tex->setIndirectR(fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, NULL)); + tex->tex.bindless = bindless; + if (!bindless) + tex->tex.r = r; + if (ind) + tex->setIndirectR(ind); for (int c = 0; c < 4; ++c) if (dst0[c]) dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov - } break; - default: - assert(!"Unsupported srcFile for ATOM"); + } } /* Keep this around for now as reference when adding img support @@ -2830,10 +3059,11 @@ Converter::handleINTERP(Value *dst[4]) assert(sym[c]); op = insn->op; mode = insn->ipa; + ptr = insn->getIndirect(0, 0); } } else { if (src.isIndirect(0)) - ptr = fetchSrc(src.getIndirect(0), 0, NULL); + ptr = shiftAddress(fetchSrc(src.getIndirect(0), 0, NULL)); // We can assume that the fixed index will point to an input of the same // interpolation type in case of an indirect. @@ -2864,8 +3094,8 @@ Converter::handleINTERP(Value *dst[4]) // and then convert to s32. Value *offs[2]; for (c = 0; c < 2; c++) { - offs[c] = fetchSrc(1, c); - mkOp2(OP_MIN, TYPE_F32, offs[c], offs[c], loadImm(NULL, 0.4375f)); + offs[c] = getScratch(); + mkOp2(OP_MIN, TYPE_F32, offs[c], fetchSrc(1, c), loadImm(NULL, 0.4375f)); mkOp2(OP_MAX, TYPE_F32, offs[c], offs[c], loadImm(NULL, -0.5f)); mkOp2(OP_MUL, TYPE_F32, offs[c], offs[c], loadImm(NULL, 4096.0f)); mkCvt(OP_CVT, TYPE_S32, offs[c], TYPE_F32, offs[c]); @@ -2891,39 +3121,15 @@ Converter::handleINTERP(Value *dst[4]) insn = mkOp1(op, TYPE_F32, dst[c], sym[c] ? sym[c] : srcToSym(src, c)); if (op == OP_PINTERP) insn->setSrc(1, w); - if (ptr) - insn->setIndirect(0, 0, ptr); if (offset) insn->setSrc(op == OP_PINTERP ? 2 : 1, offset); + if (ptr) + insn->setIndirect(0, 0, ptr); insn->setInterpolate(mode); } } -Converter::Subroutine * -Converter::getSubroutine(unsigned ip) -{ - std::map::iterator it = sub.map.find(ip); - - if (it == sub.map.end()) - it = sub.map.insert(std::make_pair( - ip, Subroutine(new Function(prog, "SUB", ip)))).first; - - return &it->second; -} - -Converter::Subroutine * -Converter::getSubroutine(Function *f) -{ - unsigned ip = f->getLabel(); - std::map::iterator it = sub.map.find(ip); - - if (it == sub.map.end()) - it = sub.map.insert(std::make_pair(ip, Subroutine(f))).first; - - return &it->second; -} - bool Converter::isEndOfSubroutine(uint ip) { @@ -2955,7 +3161,7 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) unsigned int mask = tgsi.dstCount() ? tgsi.getDst(0).getMask() : 0; - if (tgsi.dstCount()) { + if (tgsi.dstCount() && tgsi.getOpcode() != TGSI_OPCODE_STORE) { for (c = 0; c < 4; ++c) { rDst0[c] = acquireDst(0, c); dst0[c] = (useScratchDst && rDst0[c]) ? getScratch() : rDst0[c]; @@ -2985,28 +3191,31 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) case TGSI_OPCODE_SHL: case TGSI_OPCODE_ISHR: case TGSI_OPCODE_USHR: - case TGSI_OPCODE_SUB: case TGSI_OPCODE_XOR: FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { src0 = fetchSrc(0, c); src1 = fetchSrc(1, c); geni = mkOp2(op, dstTy, dst0[c], src0, src1); geni->subOp = tgsi::opcodeToSubOp(tgsi.getOpcode()); + if (op == OP_MUL && dstTy == TYPE_F32) + geni->dnz = info->io.mul_zero_wins; + geni->precise = insn->Instruction.Precise; } break; case TGSI_OPCODE_MAD: case TGSI_OPCODE_UMAD: - case TGSI_OPCODE_SAD: case TGSI_OPCODE_FMA: FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { src0 = fetchSrc(0, c); src1 = fetchSrc(1, c); src2 = fetchSrc(2, c); - mkOp3(op, dstTy, dst0[c], src0, src1, src2); + geni = mkOp3(op, dstTy, dst0[c], src0, src1, src2); + if (dstTy == TYPE_F32) + geni->dnz = info->io.mul_zero_wins; + geni->precise = insn->Instruction.Precise; } break; case TGSI_OPCODE_MOV: - case TGSI_OPCODE_ABS: case TGSI_OPCODE_CEIL: case TGSI_OPCODE_FLR: case TGSI_OPCODE_TRUNC: @@ -3069,19 +3278,6 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) mkOp1(op, TYPE_F32, dst0[3], val0); } break; - case TGSI_OPCODE_SCS: - if (mask & 3) { - val0 = mkOp1v(OP_PRESIN, TYPE_F32, getSSA(), fetchSrc(0, 0)); - if (dst0[0]) - mkOp1(OP_COS, TYPE_F32, dst0[0], val0); - if (dst0[1]) - mkOp1(OP_SIN, TYPE_F32, dst0[1], val0); - } - if (dst0[2]) - loadImm(dst0[2], 0.0f); - if (dst0[3]) - loadImm(dst0[3], 1.0f); - break; case TGSI_OPCODE_EXP: src0 = fetchSrc(0, 0); val0 = mkOp1v(OP_FLOOR, TYPE_F32, getSSA(), src0); @@ -3102,7 +3298,8 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) if (dst0[1]) { mkOp1(OP_EX2, TYPE_F32, dst0[1], val1); mkOp1(OP_RCP, TYPE_F32, dst0[1], dst0[1]); - mkOp2(OP_MUL, TYPE_F32, dst0[1], dst0[1], src0); + mkOp2(OP_MUL, TYPE_F32, dst0[1], dst0[1], src0) + ->dnz = info->io.mul_zero_wins; } if (dst0[3]) loadImm(dst0[3], 1.0f); @@ -3122,20 +3319,14 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) mkMov(dst0[c], val0); break; - case TGSI_OPCODE_DPH: - val0 = buildDot(3); - src1 = fetchSrc(1, 3); - mkOp2(OP_ADD, TYPE_F32, val0, val0, src1); - FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) - mkMov(dst0[c], val0); - break; case TGSI_OPCODE_DST: if (dst0[0]) loadImm(dst0[0], 1.0f); if (dst0[1]) { src0 = fetchSrc(0, 1); src1 = fetchSrc(1, 1); - mkOp2(OP_MUL, TYPE_F32, dst0[1], src0, src1); + mkOp2(OP_MUL, TYPE_F32, dst0[1], src0, src1) + ->dnz = info->io.mul_zero_wins; } if (dst0[2]) mkMov(dst0[2], fetchSrc(0, 2)); @@ -3148,29 +3339,13 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) src1 = fetchSrc(1, c); src2 = fetchSrc(2, c); mkOp3(OP_MAD, TYPE_F32, dst0[c], - mkOp2v(OP_SUB, TYPE_F32, getSSA(), src1, src2), src0, src2); + mkOp2v(OP_SUB, TYPE_F32, getSSA(), src1, src2), src0, src2) + ->dnz = info->io.mul_zero_wins; } break; case TGSI_OPCODE_LIT: handleLIT(dst0); break; - case TGSI_OPCODE_XPD: - FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { - if (c < 3) { - val0 = getSSA(); - src0 = fetchSrc(1, (c + 1) % 3); - src1 = fetchSrc(0, (c + 2) % 3); - mkOp2(OP_MUL, TYPE_F32, val0, src0, src1); - mkOp1(OP_NEG, TYPE_F32, val0, val0); - - src0 = fetchSrc(0, (c + 1) % 3); - src1 = fetchSrc(1, (c + 2) % 3); - mkOp3(OP_MAD, TYPE_F32, dst0[c], src0, src1, val0); - } else { - loadImm(dst0[c], 1.0f); - } - } - break; case TGSI_OPCODE_ISSG: case TGSI_OPCODE_SSG: FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { @@ -3213,16 +3388,6 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) mkCvt(OP_CVT, TYPE_F32, dst0[c], TYPE_F32, fetchSrc(0, c)) ->rnd = ROUND_NI; break; - case TGSI_OPCODE_CLAMP: - FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { - src0 = fetchSrc(0, c); - src1 = fetchSrc(1, c); - src2 = fetchSrc(2, c); - val0 = getScratch(); - mkOp2(OP_MIN, TYPE_F32, val0, src0, src1); - mkOp2(OP_MAX, TYPE_F32, dst0[c], val0, src2); - } - break; case TGSI_OPCODE_SLT: case TGSI_OPCODE_SGE: case TGSI_OPCODE_SEQ: @@ -3256,6 +3421,40 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) mkCvt(OP_CVT, TYPE_U32, dst0[c], TYPE_U8, val0); } break; + case TGSI_OPCODE_BALLOT: + if (!tgsi.getDst(0).isMasked(0)) { + val0 = new_LValue(func, FILE_PREDICATE); + mkCmp(OP_SET, CC_NE, TYPE_U32, val0, TYPE_U32, fetchSrc(0, 0), zero); + mkOp1(op, TYPE_U32, dst0[0], val0)->subOp = NV50_IR_SUBOP_VOTE_ANY; + } + if (!tgsi.getDst(0).isMasked(1)) + mkMov(dst0[1], zero, TYPE_U32); + break; + case TGSI_OPCODE_READ_FIRST: + // ReadFirstInvocationARB(src) is implemented as + // ReadInvocationARB(src, findLSB(ballot(true))) + val0 = getScratch(); + mkOp1(OP_VOTE, TYPE_U32, val0, mkImm(1))->subOp = NV50_IR_SUBOP_VOTE_ANY; + mkOp2(OP_EXTBF, TYPE_U32, val0, val0, mkImm(0x2000)) + ->subOp = NV50_IR_SUBOP_EXTBF_REV; + mkOp1(OP_BFIND, TYPE_U32, val0, val0)->subOp = NV50_IR_SUBOP_BFIND_SAMT; + src1 = val0; + /* fallthrough */ + case TGSI_OPCODE_READ_INVOC: + if (tgsi.getOpcode() == TGSI_OPCODE_READ_INVOC) + src1 = fetchSrc(1, 0); + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { + geni = mkOp3(op, dstTy, dst0[c], fetchSrc(0, c), src1, mkImm(0x1f)); + geni->subOp = NV50_IR_SUBOP_SHFL_IDX; + } + break; + case TGSI_OPCODE_CLOCK: + // Stick the 32-bit clock into the high dword of the logical result. + if (!tgsi.getDst(0).isMasked(0)) + mkOp1(OP_MOV, TYPE_U32, dst0[0], zero); + if (!tgsi.getDst(0).isMasked(1)) + mkOp1(OP_RDSV, TYPE_U32, dst0[1], mkSysVal(SV_CLOCK, 0))->fixed = 1; + break; case TGSI_OPCODE_KILL_IF: val0 = new_LValue(func, FILE_PREDICATE); mask = 0; @@ -3272,6 +3471,7 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) mkOp(OP_DISCARD, TYPE_NONE, NULL); break; case TGSI_OPCODE_TEX: + case TGSI_OPCODE_TEX_LZ: case TGSI_OPCODE_TXB: case TGSI_OPCODE_TXL: case TGSI_OPCODE_TXP: @@ -3300,6 +3500,7 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) case TGSI_OPCODE_SAMPLE_C_LZ: handleTEX(dst0, 1, 2, 0x30, 0x30, 0x30, 0x40); break; + case TGSI_OPCODE_TXF_LZ: case TGSI_OPCODE_TXF: handleTXF(dst0, 1, 0x03); break; @@ -3321,6 +3522,9 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) handleTXQ(dst0, TXQ_TYPE, 0); std::swap(dst0[0], dst0[2]); break; + case TGSI_OPCODE_FBFETCH: + handleFBFETCH(dst0); + break; case TGSI_OPCODE_F2I: case TGSI_OPCODE_F2U: FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) @@ -3353,6 +3557,9 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) info->out[info->io.viewportId].slot[0] * 4); mkStore(OP_EXPORT, TYPE_U32, vpSym, NULL, viewport); } + /* handle user clip planes for each emitted vertex */ + if (info->io.genUserClip > 0) + handleUserClipPlanes(); /* fallthrough */ case TGSI_OPCODE_ENDPRIM: { @@ -3527,7 +3734,9 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) setPosition(epilogue, true); if (prog->getType() == Program::TYPE_FRAGMENT) exportOutputs(); - if (info->io.genUserClip > 0) + if ((prog->getType() == Program::TYPE_VERTEX || + prog->getType() == Program::TYPE_TESSELLATION_EVAL + ) && info->io.genUserClip > 0) handleUserClipPlanes(); mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1; } @@ -3548,20 +3757,16 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) geni->fixed = 1; geni->subOp = NV50_IR_SUBOP_BAR_SYNC; break; - case TGSI_OPCODE_MFENCE: - case TGSI_OPCODE_LFENCE: - case TGSI_OPCODE_SFENCE: - geni = mkOp(OP_MEMBAR, TYPE_NONE, NULL); - geni->fixed = 1; - geni->subOp = tgsi::opcodeToSubOp(tgsi.getOpcode()); - break; case TGSI_OPCODE_MEMBAR: + { + uint32_t level = tgsi.getSrc(0).getValueU32(0, info); geni = mkOp(OP_MEMBAR, TYPE_NONE, NULL); geni->fixed = 1; - if (tgsi.getSrc(0).getValueU32(0, info) & TGSI_MEMBAR_THREAD_GROUP) + if (!(level & ~(TGSI_MEMBAR_THREAD_GROUP | TGSI_MEMBAR_SHARED))) geni->subOp = NV50_IR_SUBOP_MEMBAR(M, CTA); else geni->subOp = NV50_IR_SUBOP_MEMBAR(M, GL); + } break; case TGSI_OPCODE_ATOMUADD: case TGSI_OPCODE_ATOMXCHG: @@ -3573,19 +3778,20 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) case TGSI_OPCODE_ATOMIMIN: case TGSI_OPCODE_ATOMUMAX: case TGSI_OPCODE_ATOMIMAX: + case TGSI_OPCODE_ATOMFADD: handleATOM(dst0, dstTy, tgsi::opcodeToSubOp(tgsi.getOpcode())); break; case TGSI_OPCODE_RESQ: if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER) { + Value *ind = NULL; + if (tgsi.getSrc(0).isIndirect(0)) + ind = fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0); geni = mkOp1(OP_BUFQ, TYPE_U32, dst0[0], makeSym(tgsi.getSrc(0).getFile(), tgsi.getSrc(0).getIndex(0), -1, 0, 0)); - if (tgsi.getSrc(0).isIndirect(0)) - geni->setIndirect(0, 1, - fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0)); + if (ind) + geni->setIndirect(0, 1, ind); } else { - assert(tgsi.getSrc(0).getFile() == TGSI_FILE_IMAGE); - TexInstruction *texi = new_TexInstruction(func, OP_SUQ); for (int c = 0, d = 0; c < 4; ++c) { if (dst0[c]) { @@ -3593,28 +3799,34 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) texi->tex.mask |= 1 << c; } } - texi->tex.r = tgsi.getSrc(0).getIndex(0); - texi->tex.target = getImageTarget(code, texi->tex.r); - bb->insertTail(texi); + if (tgsi.getSrc(0).getFile() == TGSI_FILE_IMAGE) { + texi->tex.r = tgsi.getSrc(0).getIndex(0); + if (tgsi.getSrc(0).isIndirect(0)) + texi->setIndirectR(fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, NULL)); + } else { + texi->tex.bindless = true; + texi->setIndirectR(fetchSrc(0, 0)); + } + texi->tex.target = tgsi.getImageTarget(); - if (tgsi.getSrc(0).isIndirect(0)) - texi->setIndirectR(fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, NULL)); + bb->insertTail(texi); } break; case TGSI_OPCODE_IBFE: case TGSI_OPCODE_UBFE: FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { src0 = fetchSrc(0, c); + val0 = getScratch(); if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE && tgsi.getSrc(2).getFile() == TGSI_FILE_IMMEDIATE) { - src1 = loadImm(NULL, tgsi.getSrc(2).getValueU32(c, info) << 8 | - tgsi.getSrc(1).getValueU32(c, info)); + loadImm(val0, (tgsi.getSrc(2).getValueU32(c, info) << 8) | + tgsi.getSrc(1).getValueU32(c, info)); } else { src1 = fetchSrc(1, c); src2 = fetchSrc(2, c); - mkOp3(OP_INSBF, TYPE_U32, src1, src2, mkImm(0x808), src1); + mkOp3(OP_INSBF, TYPE_U32, val0, src2, mkImm(0x808), src1); } - mkOp2(OP_EXTBF, dstTy, dst0[c], src0, src1); + mkOp2(OP_EXTBF, dstTy, dst0[c], src0, val0); } break; case TGSI_OPCODE_BFI: @@ -3623,16 +3835,18 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) src1 = fetchSrc(1, c); src2 = fetchSrc(2, c); src3 = fetchSrc(3, c); - mkOp3(OP_INSBF, TYPE_U32, src2, src3, mkImm(0x808), src2); - mkOp3(OP_INSBF, TYPE_U32, dst0[c], src1, src2, src0); + val0 = getScratch(); + mkOp3(OP_INSBF, TYPE_U32, val0, src3, mkImm(0x808), src2); + mkOp3(OP_INSBF, TYPE_U32, dst0[c], src1, val0, src0); } break; case TGSI_OPCODE_LSB: FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { src0 = fetchSrc(0, c); - geni = mkOp2(OP_EXTBF, TYPE_U32, src0, src0, mkImm(0x2000)); + val0 = getScratch(); + geni = mkOp2(OP_EXTBF, TYPE_U32, val0, src0, mkImm(0x2000)); geni->subOp = NV50_IR_SUBOP_EXTBF_REV; - geni = mkOp1(OP_BFIND, TYPE_U32, dst0[c], src0); + geni = mkOp1(OP_BFIND, TYPE_U32, dst0[c], val0); geni->subOp = NV50_IR_SUBOP_BFIND_SAMT; } break; @@ -3661,6 +3875,8 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) case TGSI_OPCODE_INTERP_OFFSET: handleINTERP(dst0); break; + case TGSI_OPCODE_I642F: + case TGSI_OPCODE_U642F: case TGSI_OPCODE_D2I: case TGSI_OPCODE_D2U: case TGSI_OPCODE_D2F: { @@ -3677,16 +3893,79 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) } break; } + case TGSI_OPCODE_I2I64: + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { + dst0[c] = fetchSrc(0, c / 2); + mkOp2(OP_SHR, TYPE_S32, dst0[c + 1], dst0[c], loadImm(NULL, 31)); + c++; + } + break; + case TGSI_OPCODE_U2I64: + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { + dst0[c] = fetchSrc(0, c / 2); + dst0[c + 1] = zero; + c++; + } + break; + case TGSI_OPCODE_F2I64: + case TGSI_OPCODE_F2U64: case TGSI_OPCODE_I2D: case TGSI_OPCODE_U2D: case TGSI_OPCODE_F2D: FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { Value *dreg = getSSA(8); - mkCvt(OP_CVT, dstTy, dreg, srcTy, fetchSrc(0, c / 2)); + Instruction *cvt = mkCvt(OP_CVT, dstTy, dreg, srcTy, fetchSrc(0, c / 2)); + if (!isFloatType(dstTy)) + cvt->rnd = ROUND_Z; mkSplit(&dst0[c], 4, dreg); c++; } break; + case TGSI_OPCODE_D2I64: + case TGSI_OPCODE_D2U64: + case TGSI_OPCODE_I642D: + case TGSI_OPCODE_U642D: + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { + src0 = getSSA(8); + Value *dst = getSSA(8), *tmp[2]; + tmp[0] = fetchSrc(0, c); + tmp[1] = fetchSrc(0, c + 1); + mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]); + Instruction *cvt = mkCvt(OP_CVT, dstTy, dst, srcTy, src0); + if (!isFloatType(dstTy)) + cvt->rnd = ROUND_Z; + mkSplit(&dst0[c], 4, dst); + c++; + } + break; + case TGSI_OPCODE_I64NEG: + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { + src0 = getSSA(8); + Value *dst = getSSA(8), *tmp[2]; + tmp[0] = fetchSrc(0, c); + tmp[1] = fetchSrc(0, c + 1); + mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]); + mkOp2(OP_SUB, dstTy, dst, zero, src0); + mkSplit(&dst0[c], 4, dst); + c++; + } + break; + case TGSI_OPCODE_I64ABS: + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { + src0 = getSSA(8); + Value *neg = getSSA(8), *srcComp[2], *negComp[2]; + srcComp[0] = fetchSrc(0, c); + srcComp[1] = fetchSrc(0, c + 1); + mkOp2(OP_MERGE, TYPE_U64, src0, srcComp[0], srcComp[1]); + mkOp2(OP_SUB, dstTy, neg, zero, src0); + mkSplit(negComp, 4, neg); + mkCmp(OP_SLCT, CC_LT, TYPE_S32, dst0[c], TYPE_S32, + negComp[0], srcComp[0], srcComp[1]); + mkCmp(OP_SLCT, CC_LT, TYPE_S32, dst0[c + 1], TYPE_S32, + negComp[1], srcComp[1], srcComp[1]); + c++; + } + break; case TGSI_OPCODE_DABS: case TGSI_OPCODE_DNEG: case TGSI_OPCODE_DRCP: @@ -3719,6 +3998,12 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) c++; } break; + case TGSI_OPCODE_U64SEQ: + case TGSI_OPCODE_U64SNE: + case TGSI_OPCODE_U64SLT: + case TGSI_OPCODE_U64SGE: + case TGSI_OPCODE_I64SLT: + case TGSI_OPCODE_I64SGE: case TGSI_OPCODE_DSLT: case TGSI_OPCODE_DSGE: case TGSI_OPCODE_DSEQ: @@ -3740,8 +4025,51 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) } break; } + case TGSI_OPCODE_U64MIN: + case TGSI_OPCODE_U64MAX: + case TGSI_OPCODE_I64MIN: + case TGSI_OPCODE_I64MAX: { + dstTy = isSignedIntType(dstTy) ? TYPE_S32 : TYPE_U32; + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { + Value *flag = getSSA(1, FILE_FLAGS); + src0 = fetchSrc(0, c + 1); + src1 = fetchSrc(1, c + 1); + geni = mkOp2(op, dstTy, dst0[c + 1], src0, src1); + geni->subOp = NV50_IR_SUBOP_MINMAX_HIGH; + geni->setFlagsDef(1, flag); + + src0 = fetchSrc(0, c); + src1 = fetchSrc(1, c); + geni = mkOp2(op, TYPE_U32, dst0[c], src0, src1); + geni->subOp = NV50_IR_SUBOP_MINMAX_LOW; + geni->setFlagsSrc(2, flag); + + c++; + } + break; + } + case TGSI_OPCODE_U64SHL: + case TGSI_OPCODE_I64SHR: + case TGSI_OPCODE_U64SHR: + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { + src0 = getSSA(8); + Value *dst = getSSA(8), *tmp[2]; + tmp[0] = fetchSrc(0, c); + tmp[1] = fetchSrc(0, c + 1); + mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]); + // Theoretically src1 is a 64-bit value but in practice only the low + // bits matter. The IR expects this to be a 32-bit value. + src1 = fetchSrc(1, c); + mkOp2(op, dstTy, dst, src0, src1); + mkSplit(&dst0[c], 4, dst); + c++; + } + break; + case TGSI_OPCODE_U64ADD: + case TGSI_OPCODE_U64MUL: case TGSI_OPCODE_DADD: case TGSI_OPCODE_DMUL: + case TGSI_OPCODE_DDIV: case TGSI_OPCODE_DMAX: case TGSI_OPCODE_DMIN: FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { @@ -3813,13 +4141,30 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) c++; } break; + case TGSI_OPCODE_I64SSG: + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { + src0 = getSSA(8); + Value *tmp[2]; + tmp[0] = fetchSrc(0, c); + tmp[1] = fetchSrc(0, c + 1); + mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]); + + val0 = getScratch(); + val1 = getScratch(); + mkCmp(OP_SET, CC_GT, TYPE_U32, val0, TYPE_S64, src0, zero); + mkCmp(OP_SET, CC_LT, TYPE_U32, val1, TYPE_S64, src0, zero); + mkOp2(OP_SUB, TYPE_S32, dst0[c], val1, val0); + mkOp2(OP_SHR, TYPE_S32, dst0[c + 1], dst0[c], loadImm(0, 31)); + c++; + } + break; default: ERROR("unhandled TGSI opcode: %u\n", tgsi.getOpcode()); assert(0); break; } - if (tgsi.dstCount()) { + if (tgsi.dstCount() && tgsi.getOpcode() != TGSI_OPCODE_STORE) { for (c = 0; c < 4; ++c) { if (!dst0[c]) continue; @@ -3833,35 +4178,6 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) return true; } -void -Converter::handleUserClipPlanes() -{ - Value *res[8]; - int n, i, c; - - for (c = 0; c < 4; ++c) { - for (i = 0; i < info->io.genUserClip; ++i) { - Symbol *sym = mkSymbol(FILE_MEMORY_CONST, info->io.auxCBSlot, - TYPE_F32, info->io.ucpBase + i * 16 + c * 4); - Value *ucp = mkLoadv(TYPE_F32, sym, NULL); - if (c == 0) - res[i] = mkOp2v(OP_MUL, TYPE_F32, getScratch(), clipVtx[c], ucp); - else - mkOp3(OP_MAD, TYPE_F32, res[i], clipVtx[c], ucp, res[i]); - } - } - - const int first = info->numOutputs - (info->io.genUserClip + 3) / 4; - - for (i = 0; i < info->io.genUserClip; ++i) { - n = i / 4 + first; - c = i % 4; - Symbol *sym = - mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_F32, info->out[n].slot[c] * 4); - mkStore(OP_EXPORT, TYPE_F32, sym, NULL, res[i]); - } -} - void Converter::exportOutputs() { @@ -3903,21 +4219,17 @@ Converter::exportOutputs() } } -Converter::Converter(Program *ir, const tgsi::Source *code) : BuildUtil(ir), +Converter::Converter(Program *ir, const tgsi::Source *code) : ConverterCommon(ir, code->info), code(code), tgsi(NULL), - tData(this), lData(this), aData(this), pData(this), oData(this) + tData(this), lData(this), aData(this), oData(this) { - info = code->info; - const unsigned tSize = code->fileSize(TGSI_FILE_TEMPORARY); - const unsigned pSize = code->fileSize(TGSI_FILE_PREDICATE); const unsigned aSize = code->fileSize(TGSI_FILE_ADDRESS); const unsigned oSize = code->fileSize(TGSI_FILE_OUTPUT); tData.setup(TGSI_FILE_TEMPORARY, 0, 0, tSize, 4, 4, FILE_GPR, 0); lData.setup(TGSI_FILE_TEMPORARY, 1, 0, tSize, 4, 4, FILE_MEMORY_LOCAL, 0); - pData.setup(TGSI_FILE_PREDICATE, 0, 0, pSize, 4, 4, FILE_PREDICATE, 0); aData.setup(TGSI_FILE_ADDRESS, 0, 0, aSize, 4, 4, FILE_GPR, 0); oData.setup(TGSI_FILE_OUTPUT, 0, 0, oSize, 4, 4, FILE_GPR, 0);