* OTHER DEALINGS IN THE SOFTWARE.
*/
+#include "tgsi/tgsi_build.h"
#include "tgsi/tgsi_dump.h"
#include "tgsi/tgsi_scan.h"
#include "tgsi/tgsi_util.h"
static nv50_ir::TexTarget translateTexture(uint texTarg);
static nv50_ir::SVSemantic translateSysVal(uint sysval);
static nv50_ir::CacheMode translateCacheMode(uint qualifier);
+static nv50_ir::ImgFormat translateImgFormat(uint format);
class Instruction
{
return SrcRegister(fdr->Indirect);
}
+ struct tgsi_full_src_register asSrc()
+ {
+ assert(fdr);
+ return tgsi_full_src_register_from_dst(fdr);
+ }
+
int getArrayId() const
{
if (isIndirect(0))
// mask of used components of source s
unsigned int srcMask(unsigned int s) const;
+ unsigned int texOffsetMask() const;
SrcRegister getSrc(unsigned int s) const
{
nv50_ir::TexInstruction::Target getTexture(const Source *, int s) const;
+ const nv50_ir::TexInstruction::ImgFormatDesc *getImageFormat() const {
+ return &nv50_ir::TexInstruction::formatTable[
+ translateImgFormat(insn->Memory.Format)];
+ }
+
+ nv50_ir::TexTarget getImageTarget() const {
+ return translateTexture(insn->Memory.Texture);
+ }
+
nv50_ir::CacheMode getCacheMode() const {
if (!insn->Instruction.Memory)
return nv50_ir::CACHE_CA;
const struct tgsi_full_instruction *insn;
};
+unsigned int Instruction::texOffsetMask() const
+{
+ const struct tgsi_instruction_texture *tex = &insn->Texture;
+ assert(insn->Instruction.Texture);
+
+ switch (tex->Texture) {
+ case TGSI_TEXTURE_BUFFER:
+ case TGSI_TEXTURE_1D:
+ case TGSI_TEXTURE_SHADOW1D:
+ case TGSI_TEXTURE_1D_ARRAY:
+ case TGSI_TEXTURE_SHADOW1D_ARRAY:
+ return 0x1;
+ case TGSI_TEXTURE_2D:
+ case TGSI_TEXTURE_SHADOW2D:
+ case TGSI_TEXTURE_2D_ARRAY:
+ case TGSI_TEXTURE_SHADOW2D_ARRAY:
+ case TGSI_TEXTURE_RECT:
+ case TGSI_TEXTURE_SHADOWRECT:
+ case TGSI_TEXTURE_2D_MSAA:
+ case TGSI_TEXTURE_2D_ARRAY_MSAA:
+ return 0x3;
+ case TGSI_TEXTURE_3D:
+ return 0x7;
+ default:
+ assert(!"Unexpected texture target");
+ return 0xf;
+ }
+}
+
unsigned int Instruction::srcMask(unsigned int s) const
{
unsigned int mask = insn->Dst[0].Register.WriteMask;
case TGSI_OPCODE_DP3:
return 0x7;
case TGSI_OPCODE_DP4:
- case TGSI_OPCODE_DPH:
case TGSI_OPCODE_KILL_IF: /* WriteMask ignored */
return 0xf;
case TGSI_OPCODE_DST:
case TGSI_OPCODE_POW:
case TGSI_OPCODE_RCP:
case TGSI_OPCODE_RSQ:
- case TGSI_OPCODE_SCS:
return 0x1;
case TGSI_OPCODE_IF:
case TGSI_OPCODE_UIF:
case TGSI_OPCODE_TXD:
case TGSI_OPCODE_TXL:
case TGSI_OPCODE_TXP:
+ case TGSI_OPCODE_TXF:
+ case TGSI_OPCODE_TG4:
+ case TGSI_OPCODE_TEX_LZ:
+ case TGSI_OPCODE_TXF_LZ:
case TGSI_OPCODE_LODQ:
{
const struct tgsi_instruction_texture *tex = &insn->Texture;
mask = 0x7;
if (insn->Instruction.Opcode != TGSI_OPCODE_TEX &&
+ insn->Instruction.Opcode != TGSI_OPCODE_TEX_LZ &&
+ insn->Instruction.Opcode != TGSI_OPCODE_TXF_LZ &&
insn->Instruction.Opcode != TGSI_OPCODE_TXD)
mask |= 0x8; /* bias, lod or proj */
}
}
return mask;
- case TGSI_OPCODE_XPD:
- {
- unsigned int x = 0;
- if (mask & 1) x |= 0x6;
- if (mask & 2) x |= 0x5;
- if (mask & 4) x |= 0x3;
- return x;
- }
+ case TGSI_OPCODE_TXQ:
+ return 1;
case TGSI_OPCODE_D2I:
case TGSI_OPCODE_D2U:
case TGSI_OPCODE_D2F:
case TGSI_OPCODE_DSGE:
case TGSI_OPCODE_DSEQ:
case TGSI_OPCODE_DSNE:
+ case TGSI_OPCODE_U64SEQ:
+ case TGSI_OPCODE_U64SNE:
+ case TGSI_OPCODE_I64SLT:
+ case TGSI_OPCODE_U64SLT:
+ case TGSI_OPCODE_I64SGE:
+ case TGSI_OPCODE_U64SGE:
+ case TGSI_OPCODE_I642F:
+ case TGSI_OPCODE_U642F:
switch (util_bitcount(mask)) {
case 1: return 0x3;
case 2: return 0xf;
case TGSI_FILE_OUTPUT: return nv50_ir::FILE_SHADER_OUTPUT;
case TGSI_FILE_TEMPORARY: return nv50_ir::FILE_GPR;
case TGSI_FILE_ADDRESS: return nv50_ir::FILE_ADDRESS;
- case TGSI_FILE_PREDICATE: return nv50_ir::FILE_PREDICATE;
case TGSI_FILE_IMMEDIATE: return nv50_ir::FILE_IMMEDIATE;
case TGSI_FILE_SYSTEM_VALUE: return nv50_ir::FILE_SYSTEM_VALUE;
case TGSI_FILE_BUFFER: return nv50_ir::FILE_MEMORY_BUFFER;
case TGSI_SEMANTIC_BASEVERTEX: return nv50_ir::SV_BASEVERTEX;
case TGSI_SEMANTIC_BASEINSTANCE: return nv50_ir::SV_BASEINSTANCE;
case TGSI_SEMANTIC_DRAWID: return nv50_ir::SV_DRAWID;
+ case TGSI_SEMANTIC_WORK_DIM: return nv50_ir::SV_WORK_DIM;
+ case TGSI_SEMANTIC_SUBGROUP_INVOCATION: return nv50_ir::SV_LANEID;
+ case TGSI_SEMANTIC_SUBGROUP_EQ_MASK: return nv50_ir::SV_LANEMASK_EQ;
+ case TGSI_SEMANTIC_SUBGROUP_LT_MASK: return nv50_ir::SV_LANEMASK_LT;
+ case TGSI_SEMANTIC_SUBGROUP_LE_MASK: return nv50_ir::SV_LANEMASK_LE;
+ case TGSI_SEMANTIC_SUBGROUP_GT_MASK: return nv50_ir::SV_LANEMASK_GT;
+ case TGSI_SEMANTIC_SUBGROUP_GE_MASK: return nv50_ir::SV_LANEMASK_GE;
default:
assert(0);
return nv50_ir::SV_CLOCK;
return nv50_ir::CACHE_CA;
}
+static nv50_ir::ImgFormat translateImgFormat(uint format)
+{
+
+#define FMT_CASE(a, b) \
+ case PIPE_FORMAT_ ## a: return nv50_ir::FMT_ ## b
+
+ switch (format) {
+ FMT_CASE(NONE, NONE);
+
+ FMT_CASE(R32G32B32A32_FLOAT, RGBA32F);
+ FMT_CASE(R16G16B16A16_FLOAT, RGBA16F);
+ FMT_CASE(R32G32_FLOAT, RG32F);
+ FMT_CASE(R16G16_FLOAT, RG16F);
+ FMT_CASE(R11G11B10_FLOAT, R11G11B10F);
+ FMT_CASE(R32_FLOAT, R32F);
+ FMT_CASE(R16_FLOAT, R16F);
+
+ FMT_CASE(R32G32B32A32_UINT, RGBA32UI);
+ FMT_CASE(R16G16B16A16_UINT, RGBA16UI);
+ FMT_CASE(R10G10B10A2_UINT, RGB10A2UI);
+ FMT_CASE(R8G8B8A8_UINT, RGBA8UI);
+ FMT_CASE(R32G32_UINT, RG32UI);
+ FMT_CASE(R16G16_UINT, RG16UI);
+ FMT_CASE(R8G8_UINT, RG8UI);
+ FMT_CASE(R32_UINT, R32UI);
+ FMT_CASE(R16_UINT, R16UI);
+ FMT_CASE(R8_UINT, R8UI);
+
+ FMT_CASE(R32G32B32A32_SINT, RGBA32I);
+ FMT_CASE(R16G16B16A16_SINT, RGBA16I);
+ FMT_CASE(R8G8B8A8_SINT, RGBA8I);
+ FMT_CASE(R32G32_SINT, RG32I);
+ FMT_CASE(R16G16_SINT, RG16I);
+ FMT_CASE(R8G8_SINT, RG8I);
+ FMT_CASE(R32_SINT, R32I);
+ FMT_CASE(R16_SINT, R16I);
+ FMT_CASE(R8_SINT, R8I);
+
+ FMT_CASE(R16G16B16A16_UNORM, RGBA16);
+ FMT_CASE(R10G10B10A2_UNORM, RGB10A2);
+ FMT_CASE(R8G8B8A8_UNORM, RGBA8);
+ FMT_CASE(R16G16_UNORM, RG16);
+ FMT_CASE(R8G8_UNORM, RG8);
+ FMT_CASE(R16_UNORM, R16);
+ FMT_CASE(R8_UNORM, R8);
+
+ FMT_CASE(R16G16B16A16_SNORM, RGBA16_SNORM);
+ FMT_CASE(R8G8B8A8_SNORM, RGBA8_SNORM);
+ FMT_CASE(R16G16_SNORM, RG16_SNORM);
+ FMT_CASE(R8G8_SNORM, RG8_SNORM);
+ FMT_CASE(R16_SNORM, R16_SNORM);
+ FMT_CASE(R8_SNORM, R8_SNORM);
+
+ FMT_CASE(B8G8R8A8_UNORM, BGRA8);
+ }
+
+ assert(!"Unexpected format");
+ return nv50_ir::FMT_NONE;
+}
+
nv50_ir::DataType Instruction::inferSrcType() const
{
switch (getOpcode()) {
case TGSI_OPCODE_SHL:
case TGSI_OPCODE_U2F:
case TGSI_OPCODE_U2D:
+ case TGSI_OPCODE_U2I64:
case TGSI_OPCODE_UADD:
case TGSI_OPCODE_UDIV:
case TGSI_OPCODE_UMOD:
case TGSI_OPCODE_UBFE:
case TGSI_OPCODE_UMSB:
case TGSI_OPCODE_UP2H:
+ case TGSI_OPCODE_VOTE_ALL:
+ case TGSI_OPCODE_VOTE_ANY:
+ case TGSI_OPCODE_VOTE_EQ:
return nv50_ir::TYPE_U32;
case TGSI_OPCODE_I2F:
case TGSI_OPCODE_I2D:
+ case TGSI_OPCODE_I2I64:
case TGSI_OPCODE_IDIV:
case TGSI_OPCODE_IMUL_HI:
case TGSI_OPCODE_IMAX:
case TGSI_OPCODE_ISHR:
case TGSI_OPCODE_ISLT:
case TGSI_OPCODE_ISSG:
- case TGSI_OPCODE_SAD: // not sure about SAD, but no one has a float version
case TGSI_OPCODE_MOD:
case TGSI_OPCODE_UARL:
case TGSI_OPCODE_ATOMIMIN:
case TGSI_OPCODE_D2F:
case TGSI_OPCODE_D2I:
case TGSI_OPCODE_D2U:
+ case TGSI_OPCODE_D2I64:
+ case TGSI_OPCODE_D2U64:
case TGSI_OPCODE_DABS:
case TGSI_OPCODE_DNEG:
case TGSI_OPCODE_DADD:
case TGSI_OPCODE_DMUL:
+ case TGSI_OPCODE_DDIV:
case TGSI_OPCODE_DMAX:
case TGSI_OPCODE_DMIN:
case TGSI_OPCODE_DSLT:
case TGSI_OPCODE_DFLR:
case TGSI_OPCODE_DROUND:
return nv50_ir::TYPE_F64;
+ case TGSI_OPCODE_U64SEQ:
+ case TGSI_OPCODE_U64SNE:
+ case TGSI_OPCODE_U64SLT:
+ case TGSI_OPCODE_U64SGE:
+ case TGSI_OPCODE_U64MIN:
+ case TGSI_OPCODE_U64MAX:
+ case TGSI_OPCODE_U64ADD:
+ case TGSI_OPCODE_U64MUL:
+ case TGSI_OPCODE_U64SHL:
+ case TGSI_OPCODE_U64SHR:
+ case TGSI_OPCODE_U64DIV:
+ case TGSI_OPCODE_U64MOD:
+ case TGSI_OPCODE_U642F:
+ case TGSI_OPCODE_U642D:
+ return nv50_ir::TYPE_U64;
+ case TGSI_OPCODE_I64ABS:
+ case TGSI_OPCODE_I64SSG:
+ case TGSI_OPCODE_I64NEG:
+ case TGSI_OPCODE_I64SLT:
+ case TGSI_OPCODE_I64SGE:
+ case TGSI_OPCODE_I64MIN:
+ case TGSI_OPCODE_I64MAX:
+ case TGSI_OPCODE_I64SHR:
+ case TGSI_OPCODE_I64DIV:
+ case TGSI_OPCODE_I64MOD:
+ case TGSI_OPCODE_I642F:
+ case TGSI_OPCODE_I642D:
+ return nv50_ir::TYPE_S64;
default:
return nv50_ir::TYPE_F32;
}
case TGSI_OPCODE_DSGE:
case TGSI_OPCODE_DSLT:
case TGSI_OPCODE_DSNE:
+ case TGSI_OPCODE_I64SLT:
+ case TGSI_OPCODE_I64SGE:
+ case TGSI_OPCODE_U64SEQ:
+ case TGSI_OPCODE_U64SNE:
+ case TGSI_OPCODE_U64SLT:
+ case TGSI_OPCODE_U64SGE:
case TGSI_OPCODE_PK2H:
return nv50_ir::TYPE_U32;
case TGSI_OPCODE_I2F:
case TGSI_OPCODE_U2F:
case TGSI_OPCODE_D2F:
+ case TGSI_OPCODE_I642F:
+ case TGSI_OPCODE_U642F:
case TGSI_OPCODE_UP2H:
return nv50_ir::TYPE_F32;
case TGSI_OPCODE_I2D:
case TGSI_OPCODE_U2D:
case TGSI_OPCODE_F2D:
+ case TGSI_OPCODE_I642D:
+ case TGSI_OPCODE_U642D:
return nv50_ir::TYPE_F64;
+ case TGSI_OPCODE_I2I64:
+ case TGSI_OPCODE_U2I64:
+ case TGSI_OPCODE_F2I64:
+ case TGSI_OPCODE_D2I64:
+ return nv50_ir::TYPE_S64;
+ case TGSI_OPCODE_F2U64:
+ case TGSI_OPCODE_D2U64:
+ return nv50_ir::TYPE_U64;
default:
return inferSrcType();
}
case TGSI_OPCODE_USLT:
case TGSI_OPCODE_FSLT:
case TGSI_OPCODE_DSLT:
+ case TGSI_OPCODE_I64SLT:
+ case TGSI_OPCODE_U64SLT:
return CC_LT;
case TGSI_OPCODE_SLE:
return CC_LE;
case TGSI_OPCODE_USGE:
case TGSI_OPCODE_FSGE:
case TGSI_OPCODE_DSGE:
+ case TGSI_OPCODE_I64SGE:
+ case TGSI_OPCODE_U64SGE:
return CC_GE;
case TGSI_OPCODE_SGT:
return CC_GT;
case TGSI_OPCODE_USEQ:
case TGSI_OPCODE_FSEQ:
case TGSI_OPCODE_DSEQ:
+ case TGSI_OPCODE_U64SEQ:
return CC_EQ;
case TGSI_OPCODE_SNE:
case TGSI_OPCODE_FSNE:
case TGSI_OPCODE_DSNE:
+ case TGSI_OPCODE_U64SNE:
return CC_NEU;
case TGSI_OPCODE_USNE:
return CC_NE;
NV50_IR_OPCODE_CASE(SGE, SET);
NV50_IR_OPCODE_CASE(MAD, MAD);
NV50_IR_OPCODE_CASE(FMA, FMA);
- NV50_IR_OPCODE_CASE(SUB, SUB);
NV50_IR_OPCODE_CASE(FLR, FLOOR);
NV50_IR_OPCODE_CASE(ROUND, CVT);
NV50_IR_OPCODE_CASE(LG2, LG2);
NV50_IR_OPCODE_CASE(POW, POW);
- NV50_IR_OPCODE_CASE(ABS, ABS);
-
NV50_IR_OPCODE_CASE(COS, COS);
NV50_IR_OPCODE_CASE(DDX, DFDX);
NV50_IR_OPCODE_CASE(DDX_FINE, DFDX);
NV50_IR_OPCODE_CASE(DIV, DIV);
NV50_IR_OPCODE_CASE(TXL, TXL);
+ NV50_IR_OPCODE_CASE(TEX_LZ, TXL);
NV50_IR_OPCODE_CASE(CEIL, CEIL);
NV50_IR_OPCODE_CASE(I2F, CVT);
NV50_IR_OPCODE_CASE(OR, OR);
NV50_IR_OPCODE_CASE(MOD, MOD);
NV50_IR_OPCODE_CASE(XOR, XOR);
- NV50_IR_OPCODE_CASE(SAD, SAD);
NV50_IR_OPCODE_CASE(TXF, TXF);
+ NV50_IR_OPCODE_CASE(TXF_LZ, TXF);
NV50_IR_OPCODE_CASE(TXQ, TXQ);
NV50_IR_OPCODE_CASE(TXQS, TXQ);
NV50_IR_OPCODE_CASE(TG4, TXG);
NV50_IR_OPCODE_CASE(DNEG, NEG);
NV50_IR_OPCODE_CASE(DADD, ADD);
NV50_IR_OPCODE_CASE(DMUL, MUL);
+ NV50_IR_OPCODE_CASE(DDIV, DIV);
NV50_IR_OPCODE_CASE(DMAX, MAX);
NV50_IR_OPCODE_CASE(DMIN, MIN);
NV50_IR_OPCODE_CASE(DSLT, SET);
NV50_IR_OPCODE_CASE(DFLR, FLOOR);
NV50_IR_OPCODE_CASE(DROUND, CVT);
+ NV50_IR_OPCODE_CASE(U64SEQ, SET);
+ NV50_IR_OPCODE_CASE(U64SNE, SET);
+ NV50_IR_OPCODE_CASE(U64SLT, SET);
+ NV50_IR_OPCODE_CASE(U64SGE, SET);
+ NV50_IR_OPCODE_CASE(I64SLT, SET);
+ NV50_IR_OPCODE_CASE(I64SGE, SET);
+ NV50_IR_OPCODE_CASE(I2I64, CVT);
+ NV50_IR_OPCODE_CASE(U2I64, CVT);
+ NV50_IR_OPCODE_CASE(F2I64, CVT);
+ NV50_IR_OPCODE_CASE(F2U64, CVT);
+ NV50_IR_OPCODE_CASE(D2I64, CVT);
+ NV50_IR_OPCODE_CASE(D2U64, CVT);
+ NV50_IR_OPCODE_CASE(I642F, CVT);
+ NV50_IR_OPCODE_CASE(U642F, CVT);
+ NV50_IR_OPCODE_CASE(I642D, CVT);
+ NV50_IR_OPCODE_CASE(U642D, CVT);
+
+ NV50_IR_OPCODE_CASE(I64MIN, MIN);
+ NV50_IR_OPCODE_CASE(U64MIN, MIN);
+ NV50_IR_OPCODE_CASE(I64MAX, MAX);
+ NV50_IR_OPCODE_CASE(U64MAX, MAX);
+ NV50_IR_OPCODE_CASE(I64ABS, ABS);
+ NV50_IR_OPCODE_CASE(I64NEG, NEG);
+ NV50_IR_OPCODE_CASE(U64ADD, ADD);
+ NV50_IR_OPCODE_CASE(U64MUL, MUL);
+ NV50_IR_OPCODE_CASE(U64SHL, SHL);
+ NV50_IR_OPCODE_CASE(I64SHR, SHR);
+ NV50_IR_OPCODE_CASE(U64SHR, SHR);
+
NV50_IR_OPCODE_CASE(IMUL_HI, MUL);
NV50_IR_OPCODE_CASE(UMUL_HI, MUL);
NV50_IR_OPCODE_CASE(IMSB, BFIND);
NV50_IR_OPCODE_CASE(UMSB, BFIND);
+ NV50_IR_OPCODE_CASE(VOTE_ALL, VOTE);
+ NV50_IR_OPCODE_CASE(VOTE_ANY, VOTE);
+ NV50_IR_OPCODE_CASE(VOTE_EQ, VOTE);
+
+ NV50_IR_OPCODE_CASE(BALLOT, VOTE);
+ NV50_IR_OPCODE_CASE(READ_INVOC, SHFL);
+ NV50_IR_OPCODE_CASE(READ_FIRST, SHFL);
+
NV50_IR_OPCODE_CASE(END, EXIT);
default:
static uint16_t opcodeToSubOp(uint opcode)
{
switch (opcode) {
- case TGSI_OPCODE_LFENCE: return NV50_IR_SUBOP_MEMBAR(L, GL);
- case TGSI_OPCODE_SFENCE: return NV50_IR_SUBOP_MEMBAR(S, GL);
- case TGSI_OPCODE_MFENCE: return NV50_IR_SUBOP_MEMBAR(M, GL);
case TGSI_OPCODE_ATOMUADD: return NV50_IR_SUBOP_ATOM_ADD;
case TGSI_OPCODE_ATOMXCHG: return NV50_IR_SUBOP_ATOM_EXCH;
case TGSI_OPCODE_ATOMCAS: return NV50_IR_SUBOP_ATOM_CAS;
case TGSI_OPCODE_IMUL_HI:
case TGSI_OPCODE_UMUL_HI:
return NV50_IR_SUBOP_MUL_HIGH;
+ case TGSI_OPCODE_VOTE_ALL: return NV50_IR_SUBOP_VOTE_ALL;
+ case TGSI_OPCODE_VOTE_ANY: return NV50_IR_SUBOP_VOTE_ANY;
+ case TGSI_OPCODE_VOTE_EQ: return NV50_IR_SUBOP_VOTE_UNI;
default:
return 0;
}
std::vector<Resource> resources;
*/
- struct Image {
- uint8_t target; // TGSI_TEXTURE_*
- bool raw;
- uint8_t slot;
- uint16_t format; // PIPE_FORMAT_*
- };
- std::vector<Image> images;
-
struct MemoryFile {
uint8_t mem_type; // TGSI_MEMORY_TYPE_*
};
int inferSysValDirection(unsigned sn) const;
bool scanDeclaration(const struct tgsi_full_declaration *);
bool scanInstruction(const struct tgsi_full_instruction *);
+ void scanInstructionSrc(const Instruction& insn,
+ const Instruction::SrcRegister& src,
+ unsigned mask);
void scanProperty(const struct tgsi_full_property *);
void scanImmediate(const struct tgsi_full_immediate *);
textureViews.resize(scan.file_max[TGSI_FILE_SAMPLER_VIEW] + 1);
//resources.resize(scan.file_max[TGSI_FILE_RESOURCE] + 1);
- images.resize(scan.file_max[TGSI_FILE_IMAGE] + 1);
tempArrayId.resize(scan.file_max[TGSI_FILE_TEMPORARY] + 1);
memoryFiles.resize(scan.file_max[TGSI_FILE_MEMORY] + 1);
if (info->type == PIPE_SHADER_FRAGMENT) {
info->prop.fp.writesDepth = scan.writes_z;
- info->prop.fp.usesDiscard = scan.uses_kill;
+ info->prop.fp.usesDiscard = scan.uses_kill || info->io.alphaRefBase;
} else
if (info->type == PIPE_SHADER_GEOMETRY) {
info->prop.gp.instanceCount = 1; // default value
break;
case TGSI_PROPERTY_FS_COORD_ORIGIN:
case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER:
+ case TGSI_PROPERTY_FS_DEPTH_LAYOUT:
// we don't care
break;
case TGSI_PROPERTY_VS_PROHIBIT_UCPS:
else
info->prop.tp.outputPrim = PIPE_PRIM_TRIANGLES; /* anything but points */
break;
+ case TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH:
+ info->prop.cp.numThreads[0] = prop->u[0].Data;
+ break;
+ case TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT:
+ info->prop.cp.numThreads[1] = prop->u[0].Data;
+ break;
+ case TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH:
+ info->prop.cp.numThreads[2] = prop->u[0].Data;
+ break;
case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED:
info->io.clipDistances = prop->u[0].Data;
break;
case TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL:
info->prop.fp.earlyFragTests = prop->u[0].Data;
break;
+ case TGSI_PROPERTY_FS_POST_DEPTH_COVERAGE:
+ info->prop.fp.postDepthCoverage = prop->u[0].Data;
+ break;
+ case TGSI_PROPERTY_MUL_ZERO_WINS:
+ info->io.mul_zero_wins = prop->u[0].Data;
+ break;
default:
INFO("unhandled TGSI property %d\n", prop->Property.PropertyName);
break;
si = decl->Semantic.Index;
}
- if (decl->Declaration.Local) {
+ if (decl->Declaration.Local || decl->Declaration.File == TGSI_FILE_ADDRESS) {
for (i = first; i <= last; ++i) {
for (c = 0; c < 4; ++c) {
locals.insert(
case TGSI_SEMANTIC_DRAWID:
info->prop.vp.usesDrawParameters = true;
break;
+ case TGSI_SEMANTIC_SAMPLEID:
+ case TGSI_SEMANTIC_SAMPLEPOS:
+ info->prop.fp.persampleInvocation = true;
+ break;
+ case TGSI_SEMANTIC_SAMPLEMASK:
+ info->prop.fp.usesSampleMaskIn = true;
+ break;
default:
break;
}
}
break;
*/
- case TGSI_FILE_IMAGE:
- for (i = first; i <= last; ++i) {
- images[i].target = decl->Image.Resource;
- images[i].raw = decl->Image.Raw;
- images[i].format = decl->Image.Format;
- images[i].slot = i;
- }
- break;
case TGSI_FILE_SAMPLER_VIEW:
for (i = first; i <= last; ++i)
textureViews[i].target = decl->SamplerView.Resource;
case TGSI_FILE_ADDRESS:
case TGSI_FILE_CONSTANT:
case TGSI_FILE_IMMEDIATE:
- case TGSI_FILE_PREDICATE:
case TGSI_FILE_SAMPLER:
case TGSI_FILE_BUFFER:
+ case TGSI_FILE_IMAGE:
break;
default:
ERROR("unhandled TGSI_FILE %d\n", decl->Declaration.File);
insn.getSrc(0).getFile() == TGSI_FILE_INPUT;
}
+void Source::scanInstructionSrc(const Instruction& insn,
+ const Instruction::SrcRegister& src,
+ unsigned mask)
+{
+ if (src.getFile() == TGSI_FILE_TEMPORARY) {
+ if (src.isIndirect(0))
+ indirectTempArrays.insert(src.getArrayId());
+ } else
+ if (src.getFile() == TGSI_FILE_OUTPUT) {
+ if (src.isIndirect(0)) {
+ // We don't know which one is accessed, just mark everything for
+ // reading. This is an extremely unlikely occurrence.
+ for (unsigned i = 0; i < info->numOutputs; ++i)
+ info->out[i].oread = 1;
+ } else {
+ info->out[src.getIndex(0)].oread = 1;
+ }
+ }
+ if (src.getFile() == TGSI_FILE_SYSTEM_VALUE) {
+ if (info->sv[src.getIndex(0)].sn == TGSI_SEMANTIC_SAMPLEPOS)
+ info->prop.fp.readsSampleLocations = true;
+ }
+ if (src.getFile() != TGSI_FILE_INPUT)
+ return;
+
+ if (src.isIndirect(0)) {
+ for (unsigned i = 0; i < info->numInputs; ++i)
+ info->in[i].mask = 0xf;
+ } else {
+ const int i = src.getIndex(0);
+ for (unsigned c = 0; c < 4; ++c) {
+ if (!(mask & (1 << c)))
+ continue;
+ int k = src.getSwizzle(c);
+ if (k <= TGSI_SWIZZLE_W)
+ info->in[i].mask |= 1 << k;
+ }
+ switch (info->in[i].sn) {
+ case TGSI_SEMANTIC_PSIZE:
+ case TGSI_SEMANTIC_PRIMID:
+ case TGSI_SEMANTIC_FOG:
+ info->in[i].mask &= 0x1;
+ break;
+ case TGSI_SEMANTIC_PCOORD:
+ info->in[i].mask &= 0x3;
+ break;
+ default:
+ break;
+ }
+ }
+}
+
bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
{
Instruction insn(inst);
if (insn.getOpcode() == TGSI_OPCODE_BARRIER)
info->numBarriers = 1;
+ if (insn.getOpcode() == TGSI_OPCODE_FBFETCH)
+ info->prop.fp.readsFramebuffer = true;
+
+ if (insn.getOpcode() == TGSI_OPCODE_INTERP_SAMPLE)
+ info->prop.fp.readsSampleLocations = true;
+
if (insn.dstCount()) {
Instruction::DstRegister dst = insn.getDst(0);
+ if (insn.getOpcode() == TGSI_OPCODE_STORE &&
+ dst.getFile() != TGSI_FILE_MEMORY) {
+ info->io.globalAccess |= 0x2;
+ }
+
if (dst.getFile() == TGSI_FILE_OUTPUT) {
if (dst.isIndirect(0))
for (unsigned i = 0; i < info->numOutputs; ++i)
indirectTempArrays.insert(dst.getArrayId());
} else
if (dst.getFile() == TGSI_FILE_BUFFER ||
- dst.getFile() == TGSI_FILE_IMAGE ||
+ dst.getFile() == TGSI_FILE_IMAGE ||
(dst.getFile() == TGSI_FILE_MEMORY &&
memoryFiles[dst.getIndex(0)].mem_type == TGSI_MEMORY_TYPE_GLOBAL)) {
info->io.globalAccess |= 0x2;
}
}
- for (unsigned s = 0; s < insn.srcCount(); ++s) {
- Instruction::SrcRegister src = insn.getSrc(s);
- if (src.getFile() == TGSI_FILE_TEMPORARY) {
- if (src.isIndirect(0))
- indirectTempArrays.insert(src.getArrayId());
- } else
- if (src.getFile() == TGSI_FILE_BUFFER ||
- src.getFile() == TGSI_FILE_IMAGE ||
- (src.getFile() == TGSI_FILE_MEMORY &&
- memoryFiles[src.getIndex(0)].mem_type == TGSI_MEMORY_TYPE_GLOBAL)) {
+ if (insn.srcCount() && (
+ insn.getSrc(0).getFile() != TGSI_FILE_MEMORY ||
+ memoryFiles[insn.getSrc(0).getIndex(0)].mem_type ==
+ TGSI_MEMORY_TYPE_GLOBAL)) {
+ switch (insn.getOpcode()) {
+ case TGSI_OPCODE_ATOMUADD:
+ case TGSI_OPCODE_ATOMXCHG:
+ case TGSI_OPCODE_ATOMCAS:
+ case TGSI_OPCODE_ATOMAND:
+ case TGSI_OPCODE_ATOMOR:
+ case TGSI_OPCODE_ATOMXOR:
+ case TGSI_OPCODE_ATOMUMIN:
+ case TGSI_OPCODE_ATOMIMIN:
+ case TGSI_OPCODE_ATOMUMAX:
+ case TGSI_OPCODE_ATOMIMAX:
+ case TGSI_OPCODE_LOAD:
info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ?
- 0x1 : 0x2;
- } else
- if (src.getFile() == TGSI_FILE_OUTPUT) {
- if (src.isIndirect(0)) {
- // We don't know which one is accessed, just mark everything for
- // reading. This is an extremely unlikely occurrence.
- for (unsigned i = 0; i < info->numOutputs; ++i)
- info->out[i].oread = 1;
- } else {
- info->out[src.getIndex(0)].oread = 1;
- }
- }
- if (src.getFile() != TGSI_FILE_INPUT)
- continue;
- unsigned mask = insn.srcMask(s);
-
- if (src.isIndirect(0)) {
- for (unsigned i = 0; i < info->numInputs; ++i)
- info->in[i].mask = 0xf;
- } else {
- const int i = src.getIndex(0);
- for (unsigned c = 0; c < 4; ++c) {
- if (!(mask & (1 << c)))
- continue;
- int k = src.getSwizzle(c);
- if (k <= TGSI_SWIZZLE_W)
- info->in[i].mask |= 1 << k;
- }
- switch (info->in[i].sn) {
- case TGSI_SEMANTIC_PSIZE:
- case TGSI_SEMANTIC_PRIMID:
- case TGSI_SEMANTIC_FOG:
- info->in[i].mask &= 0x1;
- break;
- case TGSI_SEMANTIC_PCOORD:
- info->in[i].mask &= 0x3;
- break;
- default:
- break;
- }
+ 0x1 : 0x2;
+ break;
}
}
+
+
+ for (unsigned s = 0; s < insn.srcCount(); ++s)
+ scanInstructionSrc(insn, insn.getSrc(s), insn.srcMask(s));
+
+ for (unsigned s = 0; s < insn.getNumTexOffsets(); ++s)
+ scanInstructionSrc(insn, insn.getTexOffset(s), insn.texOffsetMask());
+
return true;
}
Value *getOutputBase(int s);
DataArray *getArrayForFile(unsigned file, int idx);
Value *fetchSrc(int s, int c);
+ Value *fetchDst(int d, int c);
Value *acquireDst(int d, int c);
void storeDst(int d, int c, Value *);
Symbol *srcToSym(tgsi::Instruction::SrcRegister, int c);
Symbol *dstToSym(tgsi::Instruction::DstRegister, int c);
+ bool isSubGroupMask(uint8_t semantic);
+
bool handleInstruction(const struct tgsi_full_instruction *);
void exportOutputs();
inline Subroutine *getSubroutine(unsigned ip);
void handleTEX(Value *dst0[4], int R, int S, int L, int C, int Dx, int Dy);
void handleTXF(Value *dst0[4], int R, int L_M);
void handleTXQ(Value *dst0[4], enum TexQuery, int R);
+ void handleFBFETCH(Value *dst0[4]);
void handleLIT(Value *dst0[4]);
void handleUserClipPlanes();
// Symbol *getResourceBase(int r);
- void getImageCoords(std::vector<Value *>&, int r, int s);
+ void getImageCoords(std::vector<Value *>&, int s);
void handleLOAD(Value *dst0[4]);
void handleSTORE();
DataArray tData; // TGSI_FILE_TEMPORARY
DataArray lData; // TGSI_FILE_TEMPORARY, for indirect arrays
DataArray aData; // TGSI_FILE_ADDRESS
- DataArray pData; // TGSI_FILE_PREDICATE
DataArray oData; // TGSI_FILE_OUTPUT (if outputs in registers)
Value *zero;
return applySrcMod(res, s, c);
}
+Value *
+Converter::fetchDst(int d, int c)
+{
+ Value *res;
+ Value *ptr = NULL, *dimRel = NULL;
+
+ tgsi::Instruction::DstRegister dst = tgsi.getDst(d);
+
+ if (dst.isIndirect(0))
+ ptr = fetchSrc(dst.getIndirect(0), 0, NULL);
+
+ if (dst.is2D()) {
+ switch (dst.getFile()) {
+ case TGSI_FILE_OUTPUT:
+ assert(0); // TODO
+ dimRel = NULL;
+ break;
+ case TGSI_FILE_INPUT:
+ assert(0); // TODO
+ dimRel = NULL;
+ break;
+ case TGSI_FILE_CONSTANT:
+ // on NVC0, this is valid and c{I+J}[k] == cI[(J << 16) + k]
+ if (dst.isIndirect(1))
+ dimRel = fetchSrc(dst.getIndirect(1), 0, 0);
+ break;
+ default:
+ break;
+ }
+ }
+
+ struct tgsi_full_src_register fsr = dst.asSrc();
+ tgsi::Instruction::SrcRegister src(&fsr);
+ res = fetchSrc(src, c, ptr);
+
+ if (dimRel)
+ res->getInsn()->setIndirect(0, 1, dimRel);
+
+ return res;
+}
+
Converter::DataArray *
Converter::getArrayForFile(unsigned file, int idx)
{
switch (file) {
case TGSI_FILE_TEMPORARY:
return idx == 0 ? &tData : &lData;
- case TGSI_FILE_PREDICATE:
- return &pData;
case TGSI_FILE_ADDRESS:
return &aData;
case TGSI_FILE_OUTPUT:
idx += it->second;
}
+bool
+Converter::isSubGroupMask(uint8_t semantic)
+{
+ switch (semantic) {
+ case TGSI_SEMANTIC_SUBGROUP_EQ_MASK:
+ case TGSI_SEMANTIC_SUBGROUP_LT_MASK:
+ case TGSI_SEMANTIC_SUBGROUP_LE_MASK:
+ case TGSI_SEMANTIC_SUBGROUP_GT_MASK:
+ case TGSI_SEMANTIC_SUBGROUP_GE_MASK:
+ return true;
+ default:
+ return false;
+ }
+}
+
Value *
Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr)
{
return ld->getDef(0);
case TGSI_FILE_SYSTEM_VALUE:
assert(!ptr);
+ if (info->sv[idx].sn == TGSI_SEMANTIC_THREAD_ID &&
+ info->prop.cp.numThreads[swz] == 1)
+ return loadImm(NULL, 0u);
+ if (isSubGroupMask(info->sv[idx].sn) && swz > 0)
+ return loadImm(NULL, 0u);
+ if (info->sv[idx].sn == TGSI_SEMANTIC_SUBGROUP_SIZE)
+ return loadImm(NULL, 32u);
ld = mkOp1(OP_RDSV, TYPE_U32, getSSA(), srcToSym(src, c));
ld->perPatch = info->sv[idx].patch;
return ld->getDef(0);
/* Save the viewport index into a scratch register so that it can be
exported at EMIT time */
if (info->out[idx].sn == TGSI_SEMANTIC_VIEWPORT_INDEX &&
+ prog->getType() == Program::TYPE_GEOMETRY &&
viewport != NULL)
mkOp1(OP_MOV, TYPE_U32, viewport, val);
else
}
} else
if (f == TGSI_FILE_TEMPORARY ||
- f == TGSI_FILE_PREDICATE ||
f == TGSI_FILE_ADDRESS ||
f == TGSI_FILE_OUTPUT) {
if (f == TGSI_FILE_TEMPORARY) {
Value *src0 = fetchSrc(0, 0), *src1 = fetchSrc(1, 0);
Value *dotp = getScratch();
- mkOp2(OP_MUL, TYPE_F32, dotp, src0, src1);
+ mkOp2(OP_MUL, TYPE_F32, dotp, src0, src1)
+ ->dnz = info->io.mul_zero_wins;
for (int c = 1; c < dim; ++c) {
src0 = fetchSrc(0, c);
src1 = fetchSrc(1, c);
- mkOp3(OP_MAD, TYPE_F32, dotp, src0, src1, dotp);
+ mkOp3(OP_MAD, TYPE_F32, dotp, src0, src1, dotp)
+ ->dnz = info->io.mul_zero_wins;
}
return dotp;
}
{
unsigned rIdx = 0, sIdx = 0;
+ if (R >= 0 && tgsi.getSrc(R).getFile() != TGSI_FILE_SAMPLER) {
+ // This is the bindless case. We have to get the actual value and pass
+ // it in. This will be the complete handle.
+ tex->tex.rIndirectSrc = s;
+ tex->setSrc(s++, fetchSrc(R, 0));
+ tex->setTexture(tgsi.getTexture(code, R), 0xff, 0x1f);
+ tex->tex.bindless = true;
+ return;
+ }
+
if (R >= 0)
rIdx = tgsi.getSrc(R).getIndex(0);
if (S >= 0)
for (s = 0; s < tgt.getArgCount(); ++s)
arg[s] = src[s] = fetchSrc(0, s);
- if (texi->op == OP_TXL || texi->op == OP_TXB)
+ if (tgsi.getOpcode() == TGSI_OPCODE_TEX_LZ)
+ lod = loadImm(NULL, 0);
+ else if (texi->op == OP_TXL || texi->op == OP_TXB)
lod = fetchSrc(L >> 4, L & 3);
if (C == 0x0f)
if (tgsi.getOpcode() == TGSI_OPCODE_SAMPLE_C_LZ)
texi->tex.levelZero = true;
+ if (prog->getType() != Program::TYPE_FRAGMENT &&
+ (tgsi.getOpcode() == TGSI_OPCODE_TEX ||
+ tgsi.getOpcode() == TGSI_OPCODE_TEX2 ||
+ tgsi.getOpcode() == TGSI_OPCODE_TXP))
+ texi->tex.levelZero = true;
if (tgsi.getOpcode() == TGSI_OPCODE_TG4 && !tgt.isShadow())
texi->tex.gatherComp = tgsi.getSrc(1).getValueU32(0, info);
}
for (c = 0; c < (texi->tex.target.getArgCount() - ms); ++c)
texi->setSrc(c, fetchSrc(0, c));
- texi->setSrc(c++, fetchSrc(L_M >> 4, L_M & 3)); // lod or ms
+ if (!ms && tgsi.getOpcode() == TGSI_OPCODE_TXF_LZ)
+ texi->setSrc(c++, loadImm(NULL, 0));
+ else
+ texi->setSrc(c++, fetchSrc(L_M >> 4, L_M & 3)); // lod or ms
setTexRS(texi, c, R, -1);
bb->insertTail(texi);
}
+void
+Converter::handleFBFETCH(Value *dst[4])
+{
+ TexInstruction *texi = new_TexInstruction(func, OP_TXF);
+ unsigned int c, d;
+
+ texi->tex.target = TEX_TARGET_2D_MS_ARRAY;
+ texi->tex.levelZero = 1;
+ texi->tex.useOffsets = 0;
+
+ for (c = 0, d = 0; c < 4; ++c) {
+ if (dst[c]) {
+ texi->setDef(d++, dst[c]);
+ texi->tex.mask |= 1 << c;
+ }
+ }
+
+ Value *x = mkOp1v(OP_RDSV, TYPE_F32, getScratch(), mkSysVal(SV_POSITION, 0));
+ Value *y = mkOp1v(OP_RDSV, TYPE_F32, getScratch(), mkSysVal(SV_POSITION, 1));
+ Value *z = mkOp1v(OP_RDSV, TYPE_U32, getScratch(), mkSysVal(SV_LAYER, 0));
+ Value *ms = mkOp1v(OP_RDSV, TYPE_U32, getScratch(), mkSysVal(SV_SAMPLE_INDEX, 0));
+
+ mkCvt(OP_CVT, TYPE_U32, x, TYPE_F32, x)->rnd = ROUND_Z;
+ mkCvt(OP_CVT, TYPE_U32, y, TYPE_F32, y)->rnd = ROUND_Z;
+ texi->setSrc(0, x);
+ texi->setSrc(1, y);
+ texi->setSrc(2, z);
+ texi->setSrc(3, ms);
+
+ texi->tex.r = texi->tex.s = -1;
+
+ bb->insertTail(texi);
+}
+
void
Converter::handleLIT(Value *dst0[4])
{
return n + 1;
}
-static inline nv50_ir::TexTarget
-getImageTarget(const tgsi::Source *code, int r)
-{
- return tgsi::translateTexture(code->images.at(r).target);
-}
-
-static inline const nv50_ir::TexInstruction::ImgFormatDesc *
-getImageFormat(const tgsi::Source *code, int r)
-{
- return &nv50_ir::TexInstruction::formatTable[
- tgsi::translateImgFormat(code->images.at(r).format)];
-}
-
void
-Converter::getImageCoords(std::vector<Value *> &coords, int r, int s)
+Converter::getImageCoords(std::vector<Value *> &coords, int s)
{
TexInstruction::Target t =
- TexInstruction::Target(getImageTarget(code, r));
+ TexInstruction::Target(tgsi.getImageTarget());
const int arg = t.getDim() + (t.isArray() || t.isCube());
for (int c = 0; c < arg; ++c)
coords.push_back(fetchSrc(s, c));
+
+ if (t.isMS())
+ coords.push_back(fetchSrc(s, 3));
}
// For raw loads, granularity is 4 byte.
const int r = tgsi.getSrc(0).getIndex(0);
int c;
std::vector<Value *> off, src, ldv, def;
+ Value *ind = NULL;
+
+ if (tgsi.getSrc(0).isIndirect(0))
+ ind = fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0);
switch (tgsi.getSrc(0).getFile()) {
case TGSI_FILE_BUFFER:
if (!dst0[c])
continue;
- Value *off = fetchSrc(1, c);
+ Value *off;
Symbol *sym;
+ uint32_t src0_component_offset = tgsi.getSrc(0).getSwizzle(c) * 4;
+
if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE) {
off = NULL;
sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c,
- tgsi.getSrc(1).getValueU32(0, info) + 4 * c);
+ tgsi.getSrc(1).getValueU32(0, info) +
+ src0_component_offset);
} else {
- sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c, 4 * c);
+ // yzw are ignored for buffers
+ off = fetchSrc(1, 0);
+ sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c,
+ src0_component_offset);
}
Instruction *ld = mkLoad(TYPE_U32, dst0[c], sym, off);
ld->cache = tgsi.getCacheMode();
- if (tgsi.getSrc(0).isIndirect(0))
- ld->setIndirect(0, 1, fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0));
+ if (ind)
+ ld->setIndirect(0, 1, ind);
}
break;
- case TGSI_FILE_IMAGE: {
- assert(!code->images[r].raw);
-
- getImageCoords(off, r, 1);
+ default: {
+ getImageCoords(off, 1);
def.resize(4);
for (c = 0; c < 4; ++c) {
def[c] = dst0[c];
}
+ bool bindless = tgsi.getSrc(0).getFile() != TGSI_FILE_IMAGE;
+ if (bindless)
+ ind = fetchSrc(0, 0);
+
TexInstruction *ld =
- mkTex(OP_SULDP, getImageTarget(code, r), code->images[r].slot, 0,
- def, off);
+ mkTex(OP_SULDP, tgsi.getImageTarget(), 0, 0, def, off);
ld->tex.mask = tgsi.getDst(0).getMask();
- ld->tex.format = getImageFormat(code, r);
+ ld->tex.format = tgsi.getImageFormat();
ld->cache = tgsi.getCacheMode();
- if (tgsi.getSrc(0).isIndirect(0))
- ld->setIndirectR(fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, NULL));
+ ld->tex.bindless = bindless;
+ if (!bindless)
+ ld->tex.r = r;
+ if (ind)
+ ld->setIndirectR(ind);
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
if (dst0[c] != def[c])
mkMov(dst0[c], def[tgsi.getSrc(0).getSwizzle(c)]);
- }
break;
- default:
- assert(!"Unsupported srcFile for LOAD");
}
+ }
+
/* Keep this around for now as reference when adding img support
getResourceCoords(off, r, 1);
const int r = tgsi.getDst(0).getIndex(0);
int c;
std::vector<Value *> off, src, dummy;
+ Value *ind = NULL;
+
+ if (tgsi.getDst(0).isIndirect(0))
+ ind = fetchSrc(tgsi.getDst(0).getIndirect(0), 0, 0);
switch (tgsi.getDst(0).getFile()) {
case TGSI_FILE_BUFFER:
sym = makeSym(tgsi.getDst(0).getFile(), r, -1, c,
tgsi.getSrc(0).getValueU32(0, info) + 4 * c);
} else {
+ // yzw are ignored for buffers
off = fetchSrc(0, 0);
sym = makeSym(tgsi.getDst(0).getFile(), r, -1, c, 4 * c);
}
Instruction *st = mkStore(OP_STORE, TYPE_U32, sym, off, fetchSrc(1, c));
st->cache = tgsi.getCacheMode();
- if (tgsi.getDst(0).isIndirect(0))
- st->setIndirect(0, 1, fetchSrc(tgsi.getDst(0).getIndirect(0), 0, 0));
+ if (ind)
+ st->setIndirect(0, 1, ind);
}
break;
- case TGSI_FILE_IMAGE: {
- assert(!code->images[r].raw);
-
- getImageCoords(off, r, 0);
+ default: {
+ getImageCoords(off, 0);
src = off;
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
src.push_back(fetchSrc(1, c));
+ bool bindless = tgsi.getDst(0).getFile() != TGSI_FILE_IMAGE;
+ if (bindless)
+ ind = fetchDst(0, 0);
+
TexInstruction *st =
- mkTex(OP_SUSTP, getImageTarget(code, r), code->images[r].slot,
- 0, dummy, src);
+ mkTex(OP_SUSTP, tgsi.getImageTarget(), 0, 0, dummy, src);
st->tex.mask = tgsi.getDst(0).getMask();
+ st->tex.format = tgsi.getImageFormat();
st->cache = tgsi.getCacheMode();
- if (tgsi.getDst(0).isIndirect(0))
- st->setIndirectR(fetchSrc(tgsi.getDst(0).getIndirect(0), 0, NULL));
- }
+ st->tex.bindless = bindless;
+ if (!bindless)
+ st->tex.r = r;
+ if (ind)
+ st->setIndirectR(ind);
+
break;
- default:
- assert(!"Unsupported dstFile for STORE");
+ }
}
/* Keep this around for now as reference when adding img support
std::vector<Value *> srcv;
std::vector<Value *> defv;
LValue *dst = getScratch();
+ Value *ind = NULL;
+
+ if (tgsi.getSrc(0).isIndirect(0))
+ ind = fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0);
switch (tgsi.getSrc(0).getFile()) {
case TGSI_FILE_BUFFER:
continue;
Instruction *insn;
- Value *off = fetchSrc(1, c), *off2 = NULL;
+ Value *off = fetchSrc(1, c);
Value *sym;
if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE)
sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c,
tgsi.getSrc(1).getValueU32(c, info));
else
sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c, 0);
- if (tgsi.getSrc(0).isIndirect(0))
- off2 = fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0);
if (subOp == NV50_IR_SUBOP_ATOM_CAS)
insn = mkOp3(OP_ATOM, ty, dst, sym, fetchSrc(2, c), fetchSrc(3, c));
else
insn = mkOp2(OP_ATOM, ty, dst, sym, fetchSrc(2, c));
if (tgsi.getSrc(1).getFile() != TGSI_FILE_IMMEDIATE)
insn->setIndirect(0, 0, off);
- if (off2)
- insn->setIndirect(0, 1, off2);
+ if (ind)
+ insn->setIndirect(0, 1, ind);
insn->subOp = subOp;
}
for (int c = 0; c < 4; ++c)
if (dst0[c])
dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov
break;
- case TGSI_FILE_IMAGE: {
- assert(!code->images[r].raw);
-
- getImageCoords(srcv, r, 1);
+ default: {
+ getImageCoords(srcv, 1);
defv.push_back(dst);
srcv.push_back(fetchSrc(2, 0));
if (subOp == NV50_IR_SUBOP_ATOM_CAS)
srcv.push_back(fetchSrc(3, 0));
- TexInstruction *tex = mkTex(OP_SUREDP, getImageTarget(code, r),
- code->images[r].slot, 0, defv, srcv);
+ bool bindless = tgsi.getSrc(0).getFile() != TGSI_FILE_IMAGE;
+ if (bindless)
+ ind = fetchSrc(0, 0);
+
+ TexInstruction *tex = mkTex(OP_SUREDP, tgsi.getImageTarget(),
+ 0, 0, defv, srcv);
tex->subOp = subOp;
tex->tex.mask = 1;
+ tex->tex.format = tgsi.getImageFormat();
tex->setType(ty);
- if (tgsi.getSrc(0).isIndirect(0))
- tex->setIndirectR(fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, NULL));
+ tex->tex.bindless = bindless;
+ if (!bindless)
+ tex->tex.r = r;
+ if (ind)
+ tex->setIndirectR(ind);
for (int c = 0; c < 4; ++c)
if (dst0[c])
dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov
- }
break;
- default:
- assert(!"Unsupported srcFile for ATOM");
+ }
}
/* Keep this around for now as reference when adding img support
// Check whether the input is linear. All other attributes ignored.
Instruction *insn;
Value *offset = NULL, *ptr = NULL, *w = NULL;
+ Symbol *sym[4] = { NULL };
bool linear;
- operation op;
- int c, mode;
+ operation op = OP_NOP;
+ int c, mode = 0;
tgsi::Instruction::SrcRegister src = tgsi.getSrc(0);
- assert(src.getFile() == TGSI_FILE_INPUT);
-
- if (src.isIndirect(0))
- ptr = fetchSrc(src.getIndirect(0), 0, NULL);
- // XXX: no way to know interp mode if we don't know the index
- linear = info->in[ptr ? 0 : src.getIndex(0)].linear;
- if (linear) {
- op = OP_LINTERP;
- mode = NV50_IR_INTERP_LINEAR;
+ // In some odd cases, in large part due to varying packing, the source
+ // might not actually be an input. This is illegal TGSI, but it's easier to
+ // account for it here than it is to fix it where the TGSI is being
+ // generated. In that case, it's going to be a straight up mov (or sequence
+ // of mov's) from the input in question. We follow the mov chain to see
+ // which input we need to use.
+ if (src.getFile() != TGSI_FILE_INPUT) {
+ if (src.isIndirect(0)) {
+ ERROR("Ignoring indirect input interpolation\n");
+ return;
+ }
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ Value *val = fetchSrc(0, c);
+ assert(val->defs.size() == 1);
+ insn = val->getInsn();
+ while (insn->op == OP_MOV) {
+ assert(insn->getSrc(0)->defs.size() == 1);
+ insn = insn->getSrc(0)->getInsn();
+ if (!insn) {
+ ERROR("Miscompiling shader due to unhandled INTERP\n");
+ return;
+ }
+ }
+ if (insn->op != OP_LINTERP && insn->op != OP_PINTERP) {
+ ERROR("Trying to interpolate non-input, this is not allowed.\n");
+ return;
+ }
+ sym[c] = insn->getSrc(0)->asSym();
+ assert(sym[c]);
+ op = insn->op;
+ mode = insn->ipa;
+ ptr = insn->getIndirect(0, 0);
+ }
} else {
- op = OP_PINTERP;
- mode = NV50_IR_INTERP_PERSPECTIVE;
+ if (src.isIndirect(0))
+ ptr = shiftAddress(fetchSrc(src.getIndirect(0), 0, NULL));
+
+ // We can assume that the fixed index will point to an input of the same
+ // interpolation type in case of an indirect.
+ // TODO: Make use of ArrayID.
+ linear = info->in[src.getIndex(0)].linear;
+ if (linear) {
+ op = OP_LINTERP;
+ mode = NV50_IR_INTERP_LINEAR;
+ } else {
+ op = OP_PINTERP;
+ mode = NV50_IR_INTERP_PERSPECTIVE;
+ }
}
switch (tgsi.getOpcode()) {
// and then convert to s32.
Value *offs[2];
for (c = 0; c < 2; c++) {
- offs[c] = fetchSrc(1, c);
- mkOp2(OP_MIN, TYPE_F32, offs[c], offs[c], loadImm(NULL, 0.4375f));
+ offs[c] = getScratch();
+ mkOp2(OP_MIN, TYPE_F32, offs[c], fetchSrc(1, c), loadImm(NULL, 0.4375f));
mkOp2(OP_MAX, TYPE_F32, offs[c], offs[c], loadImm(NULL, -0.5f));
mkOp2(OP_MUL, TYPE_F32, offs[c], offs[c], loadImm(NULL, 4096.0f));
mkCvt(OP_CVT, TYPE_S32, offs[c], TYPE_F32, offs[c]);
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
- insn = mkOp1(op, TYPE_F32, dst[c], srcToSym(src, c));
+ insn = mkOp1(op, TYPE_F32, dst[c], sym[c] ? sym[c] : srcToSym(src, c));
if (op == OP_PINTERP)
insn->setSrc(1, w);
- if (ptr)
- insn->setIndirect(0, 0, ptr);
if (offset)
insn->setSrc(op == OP_PINTERP ? 2 : 1, offset);
+ if (ptr)
+ insn->setIndirect(0, 0, ptr);
insn->setInterpolate(mode);
}
unsigned int mask = tgsi.dstCount() ? tgsi.getDst(0).getMask() : 0;
- if (tgsi.dstCount()) {
+ if (tgsi.dstCount() && tgsi.getOpcode() != TGSI_OPCODE_STORE) {
for (c = 0; c < 4; ++c) {
rDst0[c] = acquireDst(0, c);
dst0[c] = (useScratchDst && rDst0[c]) ? getScratch() : rDst0[c];
case TGSI_OPCODE_SHL:
case TGSI_OPCODE_ISHR:
case TGSI_OPCODE_USHR:
- case TGSI_OPCODE_SUB:
case TGSI_OPCODE_XOR:
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
src0 = fetchSrc(0, c);
src1 = fetchSrc(1, c);
geni = mkOp2(op, dstTy, dst0[c], src0, src1);
geni->subOp = tgsi::opcodeToSubOp(tgsi.getOpcode());
+ if (op == OP_MUL && dstTy == TYPE_F32)
+ geni->dnz = info->io.mul_zero_wins;
+ geni->precise = insn->Instruction.Precise;
}
break;
case TGSI_OPCODE_MAD:
case TGSI_OPCODE_UMAD:
- case TGSI_OPCODE_SAD:
case TGSI_OPCODE_FMA:
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
src0 = fetchSrc(0, c);
src1 = fetchSrc(1, c);
src2 = fetchSrc(2, c);
- mkOp3(op, dstTy, dst0[c], src0, src1, src2);
+ geni = mkOp3(op, dstTy, dst0[c], src0, src1, src2);
+ if (dstTy == TYPE_F32)
+ geni->dnz = info->io.mul_zero_wins;
+ geni->precise = insn->Instruction.Precise;
}
break;
case TGSI_OPCODE_MOV:
- case TGSI_OPCODE_ABS:
case TGSI_OPCODE_CEIL:
case TGSI_OPCODE_FLR:
case TGSI_OPCODE_TRUNC:
mkOp1(op, TYPE_F32, dst0[3], val0);
}
break;
- case TGSI_OPCODE_SCS:
- if (mask & 3) {
- val0 = mkOp1v(OP_PRESIN, TYPE_F32, getSSA(), fetchSrc(0, 0));
- if (dst0[0])
- mkOp1(OP_COS, TYPE_F32, dst0[0], val0);
- if (dst0[1])
- mkOp1(OP_SIN, TYPE_F32, dst0[1], val0);
- }
- if (dst0[2])
- loadImm(dst0[2], 0.0f);
- if (dst0[3])
- loadImm(dst0[3], 1.0f);
- break;
case TGSI_OPCODE_EXP:
src0 = fetchSrc(0, 0);
val0 = mkOp1v(OP_FLOOR, TYPE_F32, getSSA(), src0);
if (dst0[1]) {
mkOp1(OP_EX2, TYPE_F32, dst0[1], val1);
mkOp1(OP_RCP, TYPE_F32, dst0[1], dst0[1]);
- mkOp2(OP_MUL, TYPE_F32, dst0[1], dst0[1], src0);
+ mkOp2(OP_MUL, TYPE_F32, dst0[1], dst0[1], src0)
+ ->dnz = info->io.mul_zero_wins;
}
if (dst0[3])
loadImm(dst0[3], 1.0f);
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
mkMov(dst0[c], val0);
break;
- case TGSI_OPCODE_DPH:
- val0 = buildDot(3);
- src1 = fetchSrc(1, 3);
- mkOp2(OP_ADD, TYPE_F32, val0, val0, src1);
- FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
- mkMov(dst0[c], val0);
- break;
case TGSI_OPCODE_DST:
if (dst0[0])
loadImm(dst0[0], 1.0f);
if (dst0[1]) {
src0 = fetchSrc(0, 1);
src1 = fetchSrc(1, 1);
- mkOp2(OP_MUL, TYPE_F32, dst0[1], src0, src1);
+ mkOp2(OP_MUL, TYPE_F32, dst0[1], src0, src1)
+ ->dnz = info->io.mul_zero_wins;
}
if (dst0[2])
mkMov(dst0[2], fetchSrc(0, 2));
src1 = fetchSrc(1, c);
src2 = fetchSrc(2, c);
mkOp3(OP_MAD, TYPE_F32, dst0[c],
- mkOp2v(OP_SUB, TYPE_F32, getSSA(), src1, src2), src0, src2);
+ mkOp2v(OP_SUB, TYPE_F32, getSSA(), src1, src2), src0, src2)
+ ->dnz = info->io.mul_zero_wins;
}
break;
case TGSI_OPCODE_LIT:
handleLIT(dst0);
break;
- case TGSI_OPCODE_XPD:
- FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
- if (c < 3) {
- val0 = getSSA();
- src0 = fetchSrc(1, (c + 1) % 3);
- src1 = fetchSrc(0, (c + 2) % 3);
- mkOp2(OP_MUL, TYPE_F32, val0, src0, src1);
- mkOp1(OP_NEG, TYPE_F32, val0, val0);
-
- src0 = fetchSrc(0, (c + 1) % 3);
- src1 = fetchSrc(1, (c + 2) % 3);
- mkOp3(OP_MAD, TYPE_F32, dst0[c], src0, src1, val0);
- } else {
- loadImm(dst0[c], 1.0f);
- }
- }
- break;
case TGSI_OPCODE_ISSG:
case TGSI_OPCODE_SSG:
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
mkCvt(OP_CVT, TYPE_F32, dst0[c], TYPE_F32, fetchSrc(0, c))
->rnd = ROUND_NI;
break;
- case TGSI_OPCODE_CLAMP:
- FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
- src0 = fetchSrc(0, c);
- src1 = fetchSrc(1, c);
- src2 = fetchSrc(2, c);
- val0 = getScratch();
- mkOp2(OP_MIN, TYPE_F32, val0, src0, src1);
- mkOp2(OP_MAX, TYPE_F32, dst0[c], val0, src2);
- }
- break;
case TGSI_OPCODE_SLT:
case TGSI_OPCODE_SGE:
case TGSI_OPCODE_SEQ:
mkCmp(op, tgsi.getSetCond(), dstTy, dst0[c], srcTy, src0, src1);
}
break;
+ case TGSI_OPCODE_VOTE_ALL:
+ case TGSI_OPCODE_VOTE_ANY:
+ case TGSI_OPCODE_VOTE_EQ:
+ val0 = new_LValue(func, FILE_PREDICATE);
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ mkCmp(OP_SET, CC_NE, TYPE_U32, val0, TYPE_U32, fetchSrc(0, c), zero);
+ mkOp1(op, dstTy, val0, val0)
+ ->subOp = tgsi::opcodeToSubOp(tgsi.getOpcode());
+ mkCvt(OP_CVT, TYPE_U32, dst0[c], TYPE_U8, val0);
+ }
+ break;
+ case TGSI_OPCODE_BALLOT:
+ if (!tgsi.getDst(0).isMasked(0)) {
+ val0 = new_LValue(func, FILE_PREDICATE);
+ mkCmp(OP_SET, CC_NE, TYPE_U32, val0, TYPE_U32, fetchSrc(0, 0), zero);
+ mkOp1(op, TYPE_U32, dst0[0], val0)->subOp = NV50_IR_SUBOP_VOTE_ANY;
+ }
+ if (!tgsi.getDst(0).isMasked(1))
+ mkMov(dst0[1], zero, TYPE_U32);
+ break;
+ case TGSI_OPCODE_READ_FIRST:
+ // ReadFirstInvocationARB(src) is implemented as
+ // ReadInvocationARB(src, findLSB(ballot(true)))
+ val0 = getScratch();
+ mkOp1(OP_VOTE, TYPE_U32, val0, mkImm(1))->subOp = NV50_IR_SUBOP_VOTE_ANY;
+ mkOp2(OP_EXTBF, TYPE_U32, val0, val0, mkImm(0x2000))
+ ->subOp = NV50_IR_SUBOP_EXTBF_REV;
+ mkOp1(OP_BFIND, TYPE_U32, val0, val0)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
+ src1 = val0;
+ /* fallthrough */
+ case TGSI_OPCODE_READ_INVOC:
+ if (tgsi.getOpcode() == TGSI_OPCODE_READ_INVOC)
+ src1 = fetchSrc(1, 0);
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ geni = mkOp3(op, dstTy, dst0[c], fetchSrc(0, c), src1, mkImm(0x1f));
+ geni->subOp = NV50_IR_SUBOP_SHFL_IDX;
+ }
+ break;
+ case TGSI_OPCODE_CLOCK:
+ // Stick the 32-bit clock into the high dword of the logical result.
+ if (!tgsi.getDst(0).isMasked(0))
+ mkOp1(OP_MOV, TYPE_U32, dst0[0], zero);
+ if (!tgsi.getDst(0).isMasked(1))
+ mkOp1(OP_RDSV, TYPE_U32, dst0[1], mkSysVal(SV_CLOCK, 0))->fixed = 1;
+ break;
case TGSI_OPCODE_KILL_IF:
val0 = new_LValue(func, FILE_PREDICATE);
mask = 0;
mkOp(OP_DISCARD, TYPE_NONE, NULL);
break;
case TGSI_OPCODE_TEX:
+ case TGSI_OPCODE_TEX_LZ:
case TGSI_OPCODE_TXB:
case TGSI_OPCODE_TXL:
case TGSI_OPCODE_TXP:
case TGSI_OPCODE_SAMPLE_C_LZ:
handleTEX(dst0, 1, 2, 0x30, 0x30, 0x30, 0x40);
break;
+ case TGSI_OPCODE_TXF_LZ:
case TGSI_OPCODE_TXF:
handleTXF(dst0, 1, 0x03);
break;
handleTXQ(dst0, TXQ_TYPE, 0);
std::swap(dst0[0], dst0[2]);
break;
+ case TGSI_OPCODE_FBFETCH:
+ handleFBFETCH(dst0);
+ break;
case TGSI_OPCODE_F2I:
case TGSI_OPCODE_F2U:
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
unsigned int stream = tgsi.getSrc(0).getValueU32(0, info);
if (stream && op == OP_RESTART)
break;
+ if (info->prop.gp.maxVertices == 0)
+ break;
src0 = mkImm(stream);
mkOp1(op, TYPE_U32, NULL, src0)->fixed = 1;
break;
if (!isEndOfSubroutine(ip + 1)) {
// insert a PRERET at the entry if this is an early return
// (only needed for sharing code in the epilogue)
- BasicBlock *pos = getBB();
- setPosition(BasicBlock::get(func->cfg.getRoot()), false);
- mkFlow(OP_PRERET, leave, CC_ALWAYS, NULL)->fixed = 1;
- setPosition(pos, true);
+ BasicBlock *root = BasicBlock::get(func->cfg.getRoot());
+ if (root->getEntry() == NULL || root->getEntry()->op != OP_PRERET) {
+ BasicBlock *pos = getBB();
+ setPosition(root, false);
+ mkFlow(OP_PRERET, leave, CC_ALWAYS, NULL)->fixed = 1;
+ setPosition(pos, true);
+ }
}
mkFlow(OP_RET, NULL, CC_ALWAYS, NULL)->fixed = 1;
bb->cfg.attach(&leave->cfg, Graph::Edge::CROSS);
geni->fixed = 1;
geni->subOp = NV50_IR_SUBOP_BAR_SYNC;
break;
- case TGSI_OPCODE_MFENCE:
- case TGSI_OPCODE_LFENCE:
- case TGSI_OPCODE_SFENCE:
- geni = mkOp(OP_MEMBAR, TYPE_NONE, NULL);
- geni->fixed = 1;
- geni->subOp = tgsi::opcodeToSubOp(tgsi.getOpcode());
- break;
case TGSI_OPCODE_MEMBAR:
+ {
+ uint32_t level = tgsi.getSrc(0).getValueU32(0, info);
geni = mkOp(OP_MEMBAR, TYPE_NONE, NULL);
geni->fixed = 1;
- if (tgsi.getSrc(0).getValueU32(0, info) & TGSI_MEMBAR_THREAD_GROUP)
+ if (!(level & ~(TGSI_MEMBAR_THREAD_GROUP | TGSI_MEMBAR_SHARED)))
geni->subOp = NV50_IR_SUBOP_MEMBAR(M, CTA);
else
geni->subOp = NV50_IR_SUBOP_MEMBAR(M, GL);
+ }
break;
case TGSI_OPCODE_ATOMUADD:
case TGSI_OPCODE_ATOMXCHG:
handleATOM(dst0, dstTy, tgsi::opcodeToSubOp(tgsi.getOpcode()));
break;
case TGSI_OPCODE_RESQ:
- geni = mkOp1(OP_SUQ, TYPE_U32, dst0[0],
- makeSym(TGSI_FILE_BUFFER, tgsi.getSrc(0).getIndex(0), -1, 0, 0));
- if (tgsi.getSrc(0).isIndirect(0))
- geni->setIndirect(0, 1, fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0));
+ if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER) {
+ Value *ind = NULL;
+ if (tgsi.getSrc(0).isIndirect(0))
+ ind = fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0);
+ geni = mkOp1(OP_BUFQ, TYPE_U32, dst0[0],
+ makeSym(tgsi.getSrc(0).getFile(),
+ tgsi.getSrc(0).getIndex(0), -1, 0, 0));
+ if (ind)
+ geni->setIndirect(0, 1, ind);
+ } else {
+ TexInstruction *texi = new_TexInstruction(func, OP_SUQ);
+ for (int c = 0, d = 0; c < 4; ++c) {
+ if (dst0[c]) {
+ texi->setDef(d++, dst0[c]);
+ texi->tex.mask |= 1 << c;
+ }
+ }
+ if (tgsi.getSrc(0).getFile() == TGSI_FILE_IMAGE) {
+ texi->tex.r = tgsi.getSrc(0).getIndex(0);
+ if (tgsi.getSrc(0).isIndirect(0))
+ texi->setIndirectR(fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, NULL));
+ } else {
+ texi->tex.bindless = true;
+ texi->setIndirectR(fetchSrc(0, 0));
+ }
+ texi->tex.target = tgsi.getImageTarget();
+
+ bb->insertTail(texi);
+ }
break;
case TGSI_OPCODE_IBFE:
case TGSI_OPCODE_UBFE:
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
src0 = fetchSrc(0, c);
+ val0 = getScratch();
if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE &&
tgsi.getSrc(2).getFile() == TGSI_FILE_IMMEDIATE) {
- src1 = loadImm(NULL, tgsi.getSrc(2).getValueU32(c, info) << 8 |
- tgsi.getSrc(1).getValueU32(c, info));
+ loadImm(val0, (tgsi.getSrc(2).getValueU32(c, info) << 8) |
+ tgsi.getSrc(1).getValueU32(c, info));
} else {
src1 = fetchSrc(1, c);
src2 = fetchSrc(2, c);
- mkOp3(OP_INSBF, TYPE_U32, src1, src2, mkImm(0x808), src1);
+ mkOp3(OP_INSBF, TYPE_U32, val0, src2, mkImm(0x808), src1);
}
- mkOp2(OP_EXTBF, dstTy, dst0[c], src0, src1);
+ mkOp2(OP_EXTBF, dstTy, dst0[c], src0, val0);
}
break;
case TGSI_OPCODE_BFI:
src1 = fetchSrc(1, c);
src2 = fetchSrc(2, c);
src3 = fetchSrc(3, c);
- mkOp3(OP_INSBF, TYPE_U32, src2, src3, mkImm(0x808), src2);
- mkOp3(OP_INSBF, TYPE_U32, dst0[c], src1, src2, src0);
+ val0 = getScratch();
+ mkOp3(OP_INSBF, TYPE_U32, val0, src3, mkImm(0x808), src2);
+ mkOp3(OP_INSBF, TYPE_U32, dst0[c], src1, val0, src0);
}
break;
case TGSI_OPCODE_LSB:
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
src0 = fetchSrc(0, c);
- geni = mkOp2(OP_EXTBF, TYPE_U32, src0, src0, mkImm(0x2000));
+ val0 = getScratch();
+ geni = mkOp2(OP_EXTBF, TYPE_U32, val0, src0, mkImm(0x2000));
geni->subOp = NV50_IR_SUBOP_EXTBF_REV;
- geni = mkOp1(OP_BFIND, TYPE_U32, dst0[c], src0);
+ geni = mkOp1(OP_BFIND, TYPE_U32, dst0[c], val0);
geni->subOp = NV50_IR_SUBOP_BFIND_SAMT;
}
break;
case TGSI_OPCODE_INTERP_OFFSET:
handleINTERP(dst0);
break;
+ case TGSI_OPCODE_I642F:
+ case TGSI_OPCODE_U642F:
case TGSI_OPCODE_D2I:
case TGSI_OPCODE_D2U:
case TGSI_OPCODE_D2F: {
src0 = fetchSrc(0, pos);
src1 = fetchSrc(0, pos + 1);
mkOp2(OP_MERGE, TYPE_U64, dreg, src0, src1);
- mkCvt(OP_CVT, dstTy, dst0[c], srcTy, dreg);
+ Instruction *cvt = mkCvt(OP_CVT, dstTy, dst0[c], srcTy, dreg);
+ if (!isFloatType(dstTy))
+ cvt->rnd = ROUND_Z;
pos += 2;
}
break;
}
+ case TGSI_OPCODE_I2I64:
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ dst0[c] = fetchSrc(0, c / 2);
+ mkOp2(OP_SHR, TYPE_S32, dst0[c + 1], dst0[c], loadImm(NULL, 31));
+ c++;
+ }
+ break;
+ case TGSI_OPCODE_U2I64:
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ dst0[c] = fetchSrc(0, c / 2);
+ dst0[c + 1] = zero;
+ c++;
+ }
+ break;
+ case TGSI_OPCODE_F2I64:
+ case TGSI_OPCODE_F2U64:
case TGSI_OPCODE_I2D:
case TGSI_OPCODE_U2D:
case TGSI_OPCODE_F2D:
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
Value *dreg = getSSA(8);
- mkCvt(OP_CVT, dstTy, dreg, srcTy, fetchSrc(0, c / 2));
+ Instruction *cvt = mkCvt(OP_CVT, dstTy, dreg, srcTy, fetchSrc(0, c / 2));
+ if (!isFloatType(dstTy))
+ cvt->rnd = ROUND_Z;
mkSplit(&dst0[c], 4, dreg);
c++;
}
break;
+ case TGSI_OPCODE_D2I64:
+ case TGSI_OPCODE_D2U64:
+ case TGSI_OPCODE_I642D:
+ case TGSI_OPCODE_U642D:
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ src0 = getSSA(8);
+ Value *dst = getSSA(8), *tmp[2];
+ tmp[0] = fetchSrc(0, c);
+ tmp[1] = fetchSrc(0, c + 1);
+ mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
+ Instruction *cvt = mkCvt(OP_CVT, dstTy, dst, srcTy, src0);
+ if (!isFloatType(dstTy))
+ cvt->rnd = ROUND_Z;
+ mkSplit(&dst0[c], 4, dst);
+ c++;
+ }
+ break;
+ case TGSI_OPCODE_I64NEG:
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ src0 = getSSA(8);
+ Value *dst = getSSA(8), *tmp[2];
+ tmp[0] = fetchSrc(0, c);
+ tmp[1] = fetchSrc(0, c + 1);
+ mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
+ mkOp2(OP_SUB, dstTy, dst, zero, src0);
+ mkSplit(&dst0[c], 4, dst);
+ c++;
+ }
+ break;
+ case TGSI_OPCODE_I64ABS:
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ src0 = getSSA(8);
+ Value *neg = getSSA(8), *srcComp[2], *negComp[2];
+ srcComp[0] = fetchSrc(0, c);
+ srcComp[1] = fetchSrc(0, c + 1);
+ mkOp2(OP_MERGE, TYPE_U64, src0, srcComp[0], srcComp[1]);
+ mkOp2(OP_SUB, dstTy, neg, zero, src0);
+ mkSplit(negComp, 4, neg);
+ mkCmp(OP_SLCT, CC_LT, TYPE_S32, dst0[c], TYPE_S32,
+ negComp[0], srcComp[0], srcComp[1]);
+ mkCmp(OP_SLCT, CC_LT, TYPE_S32, dst0[c + 1], TYPE_S32,
+ negComp[1], srcComp[1], srcComp[1]);
+ c++;
+ }
+ break;
case TGSI_OPCODE_DABS:
case TGSI_OPCODE_DNEG:
case TGSI_OPCODE_DRCP:
c++;
}
break;
+ case TGSI_OPCODE_U64SEQ:
+ case TGSI_OPCODE_U64SNE:
+ case TGSI_OPCODE_U64SLT:
+ case TGSI_OPCODE_U64SGE:
+ case TGSI_OPCODE_I64SLT:
+ case TGSI_OPCODE_I64SGE:
case TGSI_OPCODE_DSLT:
case TGSI_OPCODE_DSGE:
case TGSI_OPCODE_DSEQ:
}
break;
}
+ case TGSI_OPCODE_U64MIN:
+ case TGSI_OPCODE_U64MAX:
+ case TGSI_OPCODE_I64MIN:
+ case TGSI_OPCODE_I64MAX: {
+ dstTy = isSignedIntType(dstTy) ? TYPE_S32 : TYPE_U32;
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ Value *flag = getSSA(1, FILE_FLAGS);
+ src0 = fetchSrc(0, c + 1);
+ src1 = fetchSrc(1, c + 1);
+ geni = mkOp2(op, dstTy, dst0[c + 1], src0, src1);
+ geni->subOp = NV50_IR_SUBOP_MINMAX_HIGH;
+ geni->setFlagsDef(1, flag);
+
+ src0 = fetchSrc(0, c);
+ src1 = fetchSrc(1, c);
+ geni = mkOp2(op, TYPE_U32, dst0[c], src0, src1);
+ geni->subOp = NV50_IR_SUBOP_MINMAX_LOW;
+ geni->setFlagsSrc(2, flag);
+
+ c++;
+ }
+ break;
+ }
+ case TGSI_OPCODE_U64SHL:
+ case TGSI_OPCODE_I64SHR:
+ case TGSI_OPCODE_U64SHR:
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ src0 = getSSA(8);
+ Value *dst = getSSA(8), *tmp[2];
+ tmp[0] = fetchSrc(0, c);
+ tmp[1] = fetchSrc(0, c + 1);
+ mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
+ // Theoretically src1 is a 64-bit value but in practice only the low
+ // bits matter. The IR expects this to be a 32-bit value.
+ src1 = fetchSrc(1, c);
+ mkOp2(op, dstTy, dst, src0, src1);
+ mkSplit(&dst0[c], 4, dst);
+ c++;
+ }
+ break;
+ case TGSI_OPCODE_U64ADD:
+ case TGSI_OPCODE_U64MUL:
case TGSI_OPCODE_DADD:
case TGSI_OPCODE_DMUL:
+ case TGSI_OPCODE_DDIV:
case TGSI_OPCODE_DMAX:
case TGSI_OPCODE_DMIN:
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
c++;
}
break;
+ case TGSI_OPCODE_I64SSG:
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ src0 = getSSA(8);
+ Value *tmp[2];
+ tmp[0] = fetchSrc(0, c);
+ tmp[1] = fetchSrc(0, c + 1);
+ mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
+
+ val0 = getScratch();
+ val1 = getScratch();
+ mkCmp(OP_SET, CC_GT, TYPE_U32, val0, TYPE_S64, src0, zero);
+ mkCmp(OP_SET, CC_LT, TYPE_U32, val1, TYPE_S64, src0, zero);
+ mkOp2(OP_SUB, TYPE_S32, dst0[c], val1, val0);
+ mkOp2(OP_SHR, TYPE_S32, dst0[c + 1], dst0[c], loadImm(0, 31));
+ c++;
+ }
+ break;
default:
ERROR("unhandled TGSI opcode: %u\n", tgsi.getOpcode());
assert(0);
break;
}
- if (tgsi.dstCount()) {
+ if (tgsi.dstCount() && tgsi.getOpcode() != TGSI_OPCODE_STORE) {
for (c = 0; c < 4; ++c) {
if (!dst0[c])
continue;
void
Converter::exportOutputs()
{
+ if (info->io.alphaRefBase) {
+ for (unsigned int i = 0; i < info->numOutputs; ++i) {
+ if (info->out[i].sn != TGSI_SEMANTIC_COLOR ||
+ info->out[i].si != 0)
+ continue;
+ const unsigned int c = 3;
+ if (!oData.exists(sub.cur->values, i, c))
+ continue;
+ Value *val = oData.load(sub.cur->values, i, c, NULL);
+ if (!val)
+ continue;
+
+ Symbol *ref = mkSymbol(FILE_MEMORY_CONST, info->io.auxCBSlot,
+ TYPE_U32, info->io.alphaRefBase);
+ Value *pred = new_LValue(func, FILE_PREDICATE);
+ mkCmp(OP_SET, CC_TR, TYPE_U32, pred, TYPE_F32, val,
+ mkLoadv(TYPE_U32, ref, NULL))
+ ->subOp = 1;
+ mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_NOT_P, pred);
+ }
+ }
+
for (unsigned int i = 0; i < info->numOutputs; ++i) {
for (unsigned int c = 0; c < 4; ++c) {
if (!oData.exists(sub.cur->values, i, c))
Converter::Converter(Program *ir, const tgsi::Source *code) : BuildUtil(ir),
code(code),
tgsi(NULL),
- tData(this), lData(this), aData(this), pData(this), oData(this)
+ tData(this), lData(this), aData(this), oData(this)
{
info = code->info;
const unsigned tSize = code->fileSize(TGSI_FILE_TEMPORARY);
- const unsigned pSize = code->fileSize(TGSI_FILE_PREDICATE);
const unsigned aSize = code->fileSize(TGSI_FILE_ADDRESS);
const unsigned oSize = code->fileSize(TGSI_FILE_OUTPUT);
tData.setup(TGSI_FILE_TEMPORARY, 0, 0, tSize, 4, 4, FILE_GPR, 0);
lData.setup(TGSI_FILE_TEMPORARY, 1, 0, tSize, 4, 4, FILE_MEMORY_LOCAL, 0);
- pData.setup(TGSI_FILE_PREDICATE, 0, 0, pSize, 4, 4, FILE_PREDICATE, 0);
aData.setup(TGSI_FILE_ADDRESS, 0, 0, aSize, 4, 4, FILE_GPR, 0);
oData.setup(TGSI_FILE_OUTPUT, 0, 0, oSize, 4, 4, FILE_GPR, 0);