return new TargetNVC0(chipset);
}
-TargetNVC0::TargetNVC0(unsigned int card) : Target(false, card >= 0xe4)
+TargetNVC0::TargetNVC0(unsigned int card) :
+ Target(card < 0x110, false, card >= 0xe4)
{
chipset = card;
initOpInfo();
// lazyness -> will just hardcode everything for the time being
-#include "target_lib_nvc0.asm.h"
-#include "target_lib_nve4.asm.h"
-#include "target_lib_nvf0.asm.h"
+#include "lib/gf100.asm.h"
+#include "lib/gk104.asm.h"
+#include "lib/gk110.asm.h"
void
TargetNVC0::getBuiltinCode(const uint32_t **code, uint32_t *size) const
{
switch (chipset & ~0xf) {
case 0xe0:
- *code = (const uint32_t *)&nve4_builtin_code[0];
- *size = sizeof(nve4_builtin_code);
- break;
+ if (chipset < NVISA_GK20A_CHIPSET) {
+ *code = (const uint32_t *)&gk104_builtin_code[0];
+ *size = sizeof(gk104_builtin_code);
+ break;
+ }
+ /* fall-through for GK20A */
case 0xf0:
case 0x100:
- *code = (const uint32_t *)&nvf0_builtin_code[0];
- *size = sizeof(nvf0_builtin_code);
+ *code = (const uint32_t *)&gk110_builtin_code[0];
+ *size = sizeof(gk110_builtin_code);
break;
default:
- *code = (const uint32_t *)&nvc0_builtin_code[0];
- *size = sizeof(nvc0_builtin_code);
+ *code = (const uint32_t *)&gf100_builtin_code[0];
+ *size = sizeof(gf100_builtin_code);
break;
}
}
switch (chipset & ~0xf) {
case 0xe0:
- return nve4_builtin_offsets[builtin];
+ if (chipset < NVISA_GK20A_CHIPSET)
+ return gk104_builtin_offsets[builtin];
+ /* fall-through for GK20A */
case 0xf0:
case 0x100:
- return nvf0_builtin_offsets[builtin];
+ return gk110_builtin_offsets[builtin];
default:
- return nvc0_builtin_offsets[builtin];
+ return gf100_builtin_offsets[builtin];
}
}
{ OP_MUL, 0x3, 0x0, 0x0, 0x8, 0x2, 0x2 | 0x8 },
{ OP_MAX, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
{ OP_MIN, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
- { OP_MAD, 0x7, 0x0, 0x0, 0x8, 0x6, 0x2 | 0x8 }, // special c[] constraint
+ { OP_MAD, 0x7, 0x0, 0x0, 0x8, 0x6, 0x2 }, // special c[] constraint
+ { OP_FMA, 0x7, 0x0, 0x0, 0x8, 0x6, 0x2 }, // keep the same as OP_MAD
+ { OP_SHLADD, 0x5, 0x0, 0x0, 0x0, 0x4, 0x6 },
{ OP_MADSP, 0x0, 0x0, 0x0, 0x0, 0x6, 0x2 },
{ OP_ABS, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0 },
{ OP_NEG, 0x0, 0x1, 0x0, 0x0, 0x1, 0x0 },
{ OP_DFDX, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0 },
{ OP_DFDY, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0 },
{ OP_CALL, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0 },
- { OP_INSBF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x4 },
+ { OP_POPCNT, 0x0, 0x0, 0x3, 0x0, 0x2, 0x2 },
+ { OP_INSBF, 0x0, 0x0, 0x0, 0x0, 0x6, 0x2 },
+ { OP_EXTBF, 0x0, 0x0, 0x0, 0x0, 0x2, 0x2 },
+ { OP_BFIND, 0x0, 0x0, 0x1, 0x0, 0x1, 0x1 },
{ OP_PERMT, 0x0, 0x0, 0x0, 0x0, 0x6, 0x2 },
{ OP_SET_AND, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
{ OP_SET_OR, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
static const uint32_t commutative[(OP_LAST + 31) / 32] =
{
- // ADD, MAD, MUL, AND, OR, XOR, MAX, MIN
- 0x0670ca00, 0x0000003f, 0x00000000, 0x00000000
+ // ADD, MUL, MAD, FMA, AND, OR, XOR, MAX, MIN, SET_AND, SET_OR, SET_XOR,
+ // SET, SELP, SLCT
+ 0x0ce0ca00, 0x0000007e, 0x00000000, 0x00000000
};
static const uint32_t shortForm[(OP_LAST + 31) / 32] =
{
- // ADD, MAD, MUL, AND, OR, XOR, PRESIN, PREEX2, SFN, CVT, PINTERP, MOV
- 0x0670ca00, 0x00000000, 0x00000000, 0x00000000
+ // ADD, MUL, MAD, FMA, AND, OR, XOR, MAX, MIN
+ 0x0ce0ca00, 0x00000000, 0x00000000, 0x00000000
};
static const operation noDest[] =
unsigned int
TargetNVC0::getFileSize(DataFile file) const
{
+ const unsigned int gprs = (chipset >= NVISA_GK20A_CHIPSET) ? 255 : 63;
+ const unsigned int smregs = (chipset >= NVISA_GK104_CHIPSET) ? 65536 : 32768;
switch (file) {
case FILE_NULL: return 0;
- case FILE_GPR: return (chipset >= NVISA_GK110_CHIPSET) ? 255 : 63;
+ case FILE_GPR: return MIN2(gprs, smregs / threads);
case FILE_PREDICATE: return 7;
case FILE_FLAGS: return 1;
case FILE_ADDRESS: return 0;
case FILE_MEMORY_CONST: return 65536;
case FILE_SHADER_INPUT: return 0x400;
case FILE_SHADER_OUTPUT: return 0x400;
+ case FILE_MEMORY_BUFFER: return 0xffffffff;
case FILE_MEMORY_GLOBAL: return 0xffffffff;
case FILE_MEMORY_SHARED: return 16 << 10;
case FILE_MEMORY_LOCAL: return 48 << 10;
case SV_CLIP_DISTANCE: return 0x2c0 + idx * 4;
case SV_POINT_COORD: return 0x2e0 + idx * 4;
case SV_FACE: return 0x3fc;
- case SV_TESS_FACTOR: return 0x000 + idx * 4;
+ case SV_TESS_OUTER: return 0x000 + idx * 4;
+ case SV_TESS_INNER: return 0x010 + idx * 4;
case SV_TESS_COORD: return 0x2f0 + idx * 4;
case SV_NTID: return kepler ? (0x00 + idx * 4) : ~0;
case SV_NCTAID: return kepler ? (0x0c + idx * 4) : ~0;
case SV_GRIDID: return kepler ? 0x18 : ~0;
+ case SV_WORK_DIM: return 0x1c;
case SV_SAMPLE_INDEX: return 0;
case SV_SAMPLE_POS: return 0;
+ case SV_SAMPLE_MASK: return 0;
+ case SV_BASEVERTEX: return 0;
+ case SV_BASEINSTANCE: return 0;
+ case SV_DRAWID: return 0;
default:
return 0xffffffff;
}
// indirect loads can only be done by OP_LOAD/VFETCH/INTERP on nvc0
if (ld->src(0).isIndirect(0))
return false;
+ // these are implemented using shf.r and shf.l which can't load consts
+ if ((i->op == OP_SHL || i->op == OP_SHR) && typeSizeof(i->sType) == 8 &&
+ sf == FILE_MEMORY_CONST)
+ return false;
for (int k = 0; i->srcExists(k); ++k) {
if (i->src(k).getFile() == FILE_IMMEDIATE) {
if (k == 2 && i->op == OP_SUCLAMP) // special case
continue;
+ if (k == 1 && i->op == OP_SHLADD) // special case
+ continue;
if (i->getSrc(k)->reg.data.u64 != 0)
return false;
} else
if (i->src(k).getFile() != FILE_GPR &&
- i->src(k).getFile() != FILE_PREDICATE) {
+ i->src(k).getFile() != FILE_PREDICATE &&
+ i->src(k).getFile() != FILE_FLAGS) {
return false;
}
}
if (sf == FILE_IMMEDIATE) {
Storage ® = ld->getSrc(0)->asImm()->reg;
- if (opInfo[i->op].immdBits != 0xffffffff) {
- if (i->sType == TYPE_F32) {
+ if (opInfo[i->op].immdBits != 0xffffffff || typeSizeof(i->sType) > 4) {
+ switch (i->sType) {
+ case TYPE_F64:
+ if (reg.data.u64 & 0x00000fffffffffffULL)
+ return false;
+ break;
+ case TYPE_F32:
if (reg.data.u32 & 0xfff)
return false;
- } else
- if (i->sType == TYPE_S32 || i->sType == TYPE_U32) {
+ break;
+ case TYPE_S32:
+ case TYPE_U32:
// with u32, 0xfffff counts as 0xffffffff as well
if (reg.data.s32 > 0x7ffff || reg.data.s32 < -0x80000)
return false;
- }
- } else
- if (i->op == OP_MAD || i->op == OP_FMA) {
- // requires src == dst, cannot decide before RA
- // (except if we implement more constraints)
- if (ld->getSrc(0)->asImm()->reg.data.u32 & 0xfff)
+ break;
+ case TYPE_U8:
+ case TYPE_S8:
+ case TYPE_U16:
+ case TYPE_S16:
+ case TYPE_F16:
+ break;
+ default:
return false;
+ }
} else
if (i->op == OP_ADD && i->sType == TYPE_F32) {
// add f32 LIMM cannot saturate
return true;
}
+bool
+TargetNVC0::insnCanLoadOffset(const Instruction *insn, int s, int offset) const
+{
+ const ValueRef& ref = insn->src(s);
+ if (ref.getFile() == FILE_MEMORY_CONST &&
+ (insn->op != OP_LOAD || insn->subOp != NV50_IR_SUBOP_LDC_IS))
+ return offset >= -0x8000 && offset < 0x8000;
+ return true;
+}
+
bool
TargetNVC0::isAccessSupported(DataFile file, DataType ty) const
{
if (ty == TYPE_NONE)
return false;
- if (file == FILE_MEMORY_CONST && getChipset() >= 0xe0) // wrong encoding ?
- return typeSizeof(ty) <= 8;
+ if (file == FILE_MEMORY_CONST) {
+ if (getChipset() >= NVISA_GM107_CHIPSET)
+ return typeSizeof(ty) <= 4;
+ else
+ if (getChipset() >= NVISA_GK104_CHIPSET) // wrong encoding ?
+ return typeSizeof(ty) <= 8;
+ }
if (ty == TYPE_B96)
return false;
return true;
bool
TargetNVC0::isOpSupported(operation op, DataType ty) const
{
- if ((op == OP_MAD || op == OP_FMA) && (ty != TYPE_F32))
- return false;
if (op == OP_SAD && ty != TYPE_S32 && ty != TYPE_U32)
return false;
if (op == OP_POW || op == OP_SQRT || op == OP_DIV || op == OP_MOD)
case OP_AND:
case OP_OR:
case OP_XOR:
+ case OP_POPCNT:
+ case OP_BFIND:
break;
case OP_SET:
if (insn->sType != TYPE_F32)
if (s == 0)
return insn->src(1).mod.neg() ? false : true;
break;
+ case OP_SHLADD:
+ if (s == 1)
+ return false;
+ if (insn->src(s ? 0 : 2).mod.neg())
+ return false;
+ break;
default:
return false;
}
}
- if (s > 3)
+ if (s >= opInfo[insn->op].srcNr || s >= 3)
return false;
return (mod & Modifier(opInfo[insn->op].srcMods[s])) == mod;
}
// not if the 2nd instruction isn't necessarily executed
if (clA == OPCLASS_TEXTURE || clA == OPCLASS_FLOW)
return false;
+
+ // Check that a and b don't write to the same sources, nor that b reads
+ // anything that a writes.
+ if (!a->canCommuteDefDef(b) || !a->canCommuteDefSrc(b))
+ return false;
+
// anything with MOV
if (a->op == OP_MOV || b->op == OP_MOV)
return true;
if (clA == clB) {
- // only F32 arith or integer additions
- if (clA != OPCLASS_ARITH)
+ switch (clA) {
+ // there might be more
+ case OPCLASS_COMPARE:
+ if ((a->op == OP_MIN || a->op == OP_MAX) &&
+ (b->op == OP_MIN || b->op == OP_MAX))
+ break;
return false;
+ case OPCLASS_ARITH:
+ break;
+ default:
+ return false;
+ }
+ // only F32 arith or integer additions
return (a->dType == TYPE_F32 || a->op == OP_ADD ||
b->dType == TYPE_F32 || b->op == OP_ADD);
}
// nothing with TEXBAR
if (a->op == OP_TEXBAR || b->op == OP_TEXBAR)
return false;
- // no loads and stores accessing the the same space
+ // no loads and stores accessing the same space
if ((clA == OPCLASS_LOAD && clB == OPCLASS_STORE) ||
(clB == OPCLASS_LOAD && clA == OPCLASS_STORE))
if (a->src(0).getFile() == b->src(0).getFile())