return new TargetNV50(chipset);
}
-TargetNV50::TargetNV50(unsigned int card) : Target(true, false)
+TargetNV50::TargetNV50(unsigned int card) : Target(true, true, false)
{
chipset = card;
{
// neg abs not sat c[] s[], a[], imm
{ OP_ADD, 0x3, 0x0, 0x0, 0x8, 0x2, 0x1, 0x1, 0x2 },
- { OP_SUB, 0x3, 0x0, 0x0, 0x0, 0x2, 0x1, 0x1, 0x2 },
+ { OP_SUB, 0x3, 0x0, 0x0, 0x8, 0x2, 0x1, 0x1, 0x2 },
{ OP_MUL, 0x3, 0x0, 0x0, 0x0, 0x2, 0x1, 0x1, 0x2 },
{ OP_MAX, 0x3, 0x3, 0x0, 0x0, 0x2, 0x1, 0x1, 0x0 },
{ OP_MIN, 0x3, 0x3, 0x0, 0x0, 0x2, 0x1, 0x1, 0x0 },
- { OP_MAD, 0x7, 0x0, 0x0, 0x0, 0x6, 0x1, 0x1, 0x0 }, // special constraint
+ { OP_MAD, 0x7, 0x0, 0x0, 0x8, 0x6, 0x1, 0x1, 0x0 }, // special constraint
{ OP_ABS, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x1, 0x0 },
{ OP_NEG, 0x0, 0x1, 0x0, 0x0, 0x0, 0x1, 0x1, 0x0 },
{ OP_CVT, 0x1, 0x1, 0x0, 0x8, 0x0, 0x1, 0x1, 0x0 },
{ OP_SET, 0x3, 0x3, 0x0, 0x0, 0x2, 0x1, 0x1, 0x0 },
{ OP_PREEX2, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 },
{ OP_PRESIN, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 },
+ { OP_EX2, 0x0, 0x0, 0x0, 0x8, 0x0, 0x0, 0x0, 0x0 },
{ OP_LG2, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 },
{ OP_RCP, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 },
{ OP_RSQ, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 },
{
unsigned int i, j;
- static const uint32_t commutative[(OP_LAST + 31) / 32] =
+ static const operation commutativeList[] =
{
- // ADD,MAD,MUL,AND,OR,XOR,MAX,MIN
- 0x0670ca00, 0x0000003f, 0x00000000, 0x00000000
+ OP_ADD, OP_MUL, OP_MAD, OP_FMA, OP_AND, OP_OR, OP_XOR, OP_MAX, OP_MIN,
+ OP_SET_AND, OP_SET_OR, OP_SET_XOR, OP_SET, OP_SELP, OP_SLCT
};
- static const uint32_t shortForm[(OP_LAST + 31) / 32] =
+ static const operation shortFormList[] =
{
- // MOV,ADD,SUB,MUL,SAD,L/PINTERP,RCP,TEX,TXF
- 0x00010e40, 0x00000040, 0x00000498, 0x00000000
+ OP_MOV, OP_ADD, OP_SUB, OP_MUL, OP_MAD, OP_SAD, OP_RCP, OP_LINTERP,
+ OP_PINTERP, OP_TEX, OP_TXF
};
static const operation noDestList[] =
{
opInfo[i].hasDest = 1;
opInfo[i].vector = (i >= OP_TEX && i <= OP_TEXCSAA);
- opInfo[i].commutative = (commutative[i / 32] >> (i % 32)) & 1;
+ opInfo[i].commutative = false; /* set below */
opInfo[i].pseudo = (i < OP_MOV);
opInfo[i].predicate = !opInfo[i].pseudo;
opInfo[i].flow = (i >= OP_BRA && i <= OP_JOIN);
- opInfo[i].minEncSize = (shortForm[i / 32] & (1 << (i % 32))) ? 4 : 8;
+ opInfo[i].minEncSize = 8; /* set below */
}
- for (i = 0; i < sizeof(noDestList) / sizeof(noDestList[0]); ++i)
+ for (i = 0; i < ARRAY_SIZE(commutativeList); ++i)
+ opInfo[commutativeList[i]].commutative = true;
+ for (i = 0; i < ARRAY_SIZE(shortFormList); ++i)
+ opInfo[shortFormList[i]].minEncSize = 4;
+ for (i = 0; i < ARRAY_SIZE(noDestList); ++i)
opInfo[noDestList[i]].hasDest = 0;
- for (i = 0; i < sizeof(noPredList) / sizeof(noPredList[0]); ++i)
+ for (i = 0; i < ARRAY_SIZE(noPredList); ++i)
opInfo[noPredList[i]].predicate = 0;
- for (i = 0; i < sizeof(_initProps) / sizeof(_initProps[0]); ++i) {
+ for (i = 0; i < ARRAY_SIZE(_initProps); ++i) {
const struct opProperties *prop = &_initProps[i];
for (int s = 0; s < 3; ++s) {
if (prop->mSat & 8)
opInfo[prop->op].dstMods = NV50_IR_MOD_SAT;
}
+
+ if (chipset >= 0xa0)
+ opInfo[OP_MUL].dstMods = NV50_IR_MOD_SAT;
}
unsigned int
{
switch (file) {
case FILE_NULL: return 0;
- case FILE_GPR: return 256; // in 16-bit units **
+ case FILE_GPR: return 254; // in 16-bit units **
case FILE_PREDICATE: return 0;
case FILE_FLAGS: return 4;
case FILE_ADDRESS: return 4;
case FILE_MEMORY_CONST: return 65536;
case FILE_SHADER_INPUT: return 0x200;
case FILE_SHADER_OUTPUT: return 0x200;
+ case FILE_MEMORY_BUFFER: return 0xffffffff;
case FILE_MEMORY_GLOBAL: return 0xffffffff;
case FILE_MEMORY_SHARED: return 16 << 10;
case FILE_MEMORY_LOCAL: return 48 << 10;
addr += 4;
return addr;
}
+ case SV_PRIMITIVE_ID:
+ return shaderFile == FILE_SHADER_INPUT ? 0x18 :
+ sysvalLocation[sym->reg.data.sv.sv];
case SV_NCTAID:
return 0x8 + 2 * sym->reg.data.sv.index;
case SV_CTAID:
case SV_NTID:
return 0x2 + 2 * sym->reg.data.sv.index;
case SV_TID:
+ case SV_COMBINED_TID:
return 0;
+ case SV_SAMPLE_POS:
+ return 0; /* sample position is handled differently */
default:
return sysvalLocation[sym->reg.data.sv.sv];
}
{
DataFile sf = ld->src(0).getFile();
+ // immediate 0 can be represented by GPR $r63/$r127
+ if (sf == FILE_IMMEDIATE && ld->getSrc(0)->reg.data.u64 == 0)
+ return (!i->isPseudo() &&
+ !i->asTex() &&
+ i->op != OP_EXPORT && i->op != OP_STORE);
+
if (sf == FILE_IMMEDIATE && (i->predSrc >= 0 || i->flagsDef >= 0))
return false;
if (s >= opInfo[i->op].srcNr)
return false;
// NOTE: don't rely on flagsDef
- for (int d = 0; i->defExists(d); ++d)
- if (i->def(d).getFile() == FILE_FLAGS)
- return false;
+ if (sf == FILE_IMMEDIATE)
+ for (int d = 0; i->defExists(d); ++d)
+ if (i->def(d).getFile() == FILE_FLAGS)
+ return false;
unsigned mode = 0;
return false;
if (sf == FILE_IMMEDIATE)
return false;
+ if (i->subOp == NV50_IR_SUBOP_MUL_HIGH && sf == FILE_MEMORY_CONST)
+ return false;
ldSize = 2;
} else {
ldSize = typeSizeof(ld->dType);
}
if (sf == FILE_IMMEDIATE)
- return true;
+ return ldSize <= 4;
// Check if memory access is encodable:
return true;
}
+bool
+TargetNV50::insnCanLoadOffset(const Instruction *i, int s, int offset) const
+{
+ if (!i->src(s).isIndirect(0))
+ return true;
+ offset += i->src(s).get()->reg.data.offset;
+ if (i->op == OP_LOAD || i->op == OP_STORE) {
+ // There are some restrictions in theory, but in practice they're never
+ // going to be hit. When we enable shared/global memory, this will
+ // become more important.
+ return true;
+ }
+ return offset >= 0 && offset <= (int32_t)(127 * i->src(s).get()->reg.size);
+}
+
bool
TargetNV50::isAccessSupported(DataFile file, DataType ty) const
{
if (ty == TYPE_B96 || ty == TYPE_NONE)
return false;
if (typeSizeof(ty) > 4)
- return (file == FILE_MEMORY_LOCAL) || (file == FILE_MEMORY_GLOBAL);
+ return (file == FILE_MEMORY_LOCAL) || (file == FILE_MEMORY_GLOBAL) ||
+ (file == FILE_MEMORY_BUFFER);
return true;
}
case OP_EXTBF:
case OP_EXIT: // want exit modifier instead (on NOP if required)
case OP_MEMBAR:
+ case OP_SHLADD:
+ case OP_XMAD:
return false;
case OP_SAD:
return ty == TYPE_S32;
+ case OP_SET:
+ return !isFloatType(ty);
default:
return true;
}
return false;
}
}
- if (s > 3)
+ if (s >= opInfo[insn->op].srcNr || s >= 3)
return false;
return (mod & Modifier(opInfo[insn->op].srcMods[s])) == mod;
}
switch (i->src(0).getFile()) {
case FILE_MEMORY_LOCAL:
case FILE_MEMORY_GLOBAL:
+ case FILE_MEMORY_BUFFER:
return 100; // really 400 to 800
default:
return 22;
case TGSI_SEMANTIC_INSTANCEID: locs[SV_INSTANCE_ID] = addr; break;
case TGSI_SEMANTIC_VERTEXID: locs[SV_VERTEX_ID] = addr; break;
case TGSI_SEMANTIC_PRIMID: locs[SV_PRIMITIVE_ID] = addr; break;
- case NV50_SEMANTIC_LAYER: locs[SV_LAYER] = addr; break;
- case NV50_SEMANTIC_VIEWPORTINDEX: locs[SV_VIEWPORT_INDEX] = addr; break;
+ case TGSI_SEMANTIC_LAYER: locs[SV_LAYER] = addr; break;
+ case TGSI_SEMANTIC_VIEWPORT_INDEX: locs[SV_VIEWPORT_INDEX] = addr; break;
default:
break;
}
wposMask = 0x8;
sysvalLocation[SV_POSITION] = 0;
}
+
+ Target::parseDriverInfo(info);
}
} // namespace nv50_ir