X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fnouveau%2Fcodegen%2Fnv50_ir_target_nv50.cpp;h=ec94590a3f8e60ef05e2452c2010a8876d4e6d93;hb=bab4f6c724d384cfee7e7f98ff3b52648850641d;hp=f3ddcaa51999f7f11baed423d172b568a7d5a7df;hpb=9d60793a03e40e1d139b78fce0144cad57438741;p=mesa.git diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp index f3ddcaa5199..ec94590a3f8 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp @@ -99,6 +99,7 @@ static const struct opProperties _initProps[] = { OP_SET, 0x3, 0x3, 0x0, 0x0, 0x2, 0x1, 0x1, 0x0 }, { OP_PREEX2, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }, { OP_PRESIN, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }, + { OP_EX2, 0x0, 0x0, 0x0, 0x8, 0x0, 0x0, 0x0, 0x0 }, { OP_LG2, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }, { OP_RCP, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }, { OP_RSQ, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }, @@ -110,15 +111,15 @@ void TargetNV50::initOpInfo() { unsigned int i, j; - static const uint32_t commutative[(OP_LAST + 31) / 32] = + static const operation commutativeList[] = { - // ADD,MAD,MUL,AND,OR,XOR,MAX,MIN - 0x0670ca00, 0x0000003f, 0x00000000, 0x00000000 + OP_ADD, OP_MUL, OP_MAD, OP_FMA, OP_AND, OP_OR, OP_XOR, OP_MAX, OP_MIN, + OP_SET_AND, OP_SET_OR, OP_SET_XOR, OP_SET, OP_SELP, OP_SLCT }; - static const uint32_t shortForm[(OP_LAST + 31) / 32] = + static const operation shortFormList[] = { - // MOV,ADD,SUB,MUL,MAD,SAD,L/PINTERP,RCP,TEX,TXF - 0x00014e40, 0x00000040, 0x00000930, 0x00000000 + OP_MOV, OP_ADD, OP_SUB, OP_MUL, OP_MAD, OP_SAD, OP_RCP, OP_LINTERP, + OP_PINTERP, OP_TEX, OP_TXF }; static const operation noDestList[] = { @@ -155,18 +156,22 @@ void TargetNV50::initOpInfo() opInfo[i].hasDest = 1; opInfo[i].vector = (i >= OP_TEX && i <= OP_TEXCSAA); - opInfo[i].commutative = (commutative[i / 32] >> (i % 32)) & 1; + opInfo[i].commutative = false; /* set below */ opInfo[i].pseudo = (i < OP_MOV); opInfo[i].predicate = !opInfo[i].pseudo; opInfo[i].flow = (i >= OP_BRA && i <= OP_JOIN); - opInfo[i].minEncSize = (shortForm[i / 32] & (1 << (i % 32))) ? 4 : 8; + opInfo[i].minEncSize = 8; /* set below */ } - for (i = 0; i < sizeof(noDestList) / sizeof(noDestList[0]); ++i) + for (i = 0; i < ARRAY_SIZE(commutativeList); ++i) + opInfo[commutativeList[i]].commutative = true; + for (i = 0; i < ARRAY_SIZE(shortFormList); ++i) + opInfo[shortFormList[i]].minEncSize = 4; + for (i = 0; i < ARRAY_SIZE(noDestList); ++i) opInfo[noDestList[i]].hasDest = 0; - for (i = 0; i < sizeof(noPredList) / sizeof(noPredList[0]); ++i) + for (i = 0; i < ARRAY_SIZE(noPredList); ++i) opInfo[noPredList[i]].predicate = 0; - for (i = 0; i < sizeof(_initProps) / sizeof(_initProps[0]); ++i) { + for (i = 0; i < ARRAY_SIZE(_initProps); ++i) { const struct opProperties *prop = &_initProps[i]; for (int s = 0; s < 3; ++s) { @@ -198,7 +203,7 @@ TargetNV50::getFileSize(DataFile file) const { switch (file) { case FILE_NULL: return 0; - case FILE_GPR: return 256; // in 16-bit units ** + case FILE_GPR: return 254; // in 16-bit units ** case FILE_PREDICATE: return 0; case FILE_FLAGS: return 4; case FILE_ADDRESS: return 4; @@ -206,6 +211,7 @@ TargetNV50::getFileSize(DataFile file) const case FILE_MEMORY_CONST: return 65536; case FILE_SHADER_INPUT: return 0x200; case FILE_SHADER_OUTPUT: return 0x200; + case FILE_MEMORY_BUFFER: return 0xffffffff; case FILE_MEMORY_GLOBAL: return 0xffffffff; case FILE_MEMORY_SHARED: return 16 << 10; case FILE_MEMORY_LOCAL: return 48 << 10; @@ -251,6 +257,7 @@ TargetNV50::getSVAddress(DataFile shaderFile, const Symbol *sym) const case SV_NTID: return 0x2 + 2 * sym->reg.data.sv.index; case SV_TID: + case SV_COMBINED_TID: return 0; case SV_SAMPLE_POS: return 0; /* sample position is handled differently */ @@ -268,6 +275,12 @@ TargetNV50::insnCanLoad(const Instruction *i, int s, { DataFile sf = ld->src(0).getFile(); + // immediate 0 can be represented by GPR $r63/$r127 + if (sf == FILE_IMMEDIATE && ld->getSrc(0)->reg.data.u64 == 0) + return (!i->isPseudo() && + !i->asTex() && + i->op != OP_EXPORT && i->op != OP_STORE); + if (sf == FILE_IMMEDIATE && (i->predSrc >= 0 || i->flagsDef >= 0)) return false; if (s >= opInfo[i->op].srcNr) @@ -343,7 +356,7 @@ TargetNV50::insnCanLoad(const Instruction *i, int s, } if (sf == FILE_IMMEDIATE) - return true; + return ldSize <= 4; // Check if memory access is encodable: @@ -378,13 +391,29 @@ TargetNV50::insnCanLoad(const Instruction *i, int s, return true; } +bool +TargetNV50::insnCanLoadOffset(const Instruction *i, int s, int offset) const +{ + if (!i->src(s).isIndirect(0)) + return true; + offset += i->src(s).get()->reg.data.offset; + if (i->op == OP_LOAD || i->op == OP_STORE) { + // There are some restrictions in theory, but in practice they're never + // going to be hit. When we enable shared/global memory, this will + // become more important. + return true; + } + return offset >= 0 && offset <= (int32_t)(127 * i->src(s).get()->reg.size); +} + bool TargetNV50::isAccessSupported(DataFile file, DataType ty) const { if (ty == TYPE_B96 || ty == TYPE_NONE) return false; if (typeSizeof(ty) > 4) - return (file == FILE_MEMORY_LOCAL) || (file == FILE_MEMORY_GLOBAL); + return (file == FILE_MEMORY_LOCAL) || (file == FILE_MEMORY_GLOBAL) || + (file == FILE_MEMORY_BUFFER); return true; } @@ -413,6 +442,8 @@ TargetNV50::isOpSupported(operation op, DataType ty) const case OP_EXTBF: case OP_EXIT: // want exit modifier instead (on NOP if required) case OP_MEMBAR: + case OP_SHLADD: + case OP_XMAD: return false; case OP_SAD: return ty == TYPE_S32; @@ -454,7 +485,7 @@ TargetNV50::isModSupported(const Instruction *insn, int s, Modifier mod) const return false; } } - if (s >= 3) + if (s >= opInfo[insn->op].srcNr || s >= 3) return false; return (mod & Modifier(opInfo[insn->op].srcMods[s])) == mod; } @@ -487,6 +518,7 @@ int TargetNV50::getLatency(const Instruction *i) const switch (i->src(0).getFile()) { case FILE_MEMORY_LOCAL: case FILE_MEMORY_GLOBAL: + case FILE_MEMORY_BUFFER: return 100; // really 400 to 800 default: return 22; @@ -567,6 +599,8 @@ TargetNV50::parseDriverInfo(const struct nv50_ir_prog_info *info) wposMask = 0x8; sysvalLocation[SV_POSITION] = 0; } + + Target::parseDriverInfo(info); } } // namespace nv50_ir