mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U32, off), ptr);
}
-/* On nvc0, surface info is obtained via the surface binding points passed
- * to the SULD/SUST instructions.
- * On nve4, surface info is stored in c[] and is used by various special
- * instructions, e.g. for clamping coordinates or generating an address.
- * They couldn't just have added an equivalent to TIC now, couldn't they ?
- */
-#define NVC0_SU_INFO_ADDR 0x00
-#define NVC0_SU_INFO_FMT 0x04
-#define NVC0_SU_INFO_DIM_X 0x08
-#define NVC0_SU_INFO_PITCH 0x0c
-#define NVC0_SU_INFO_DIM_Y 0x10
-#define NVC0_SU_INFO_ARRAY 0x14
-#define NVC0_SU_INFO_DIM_Z 0x18
-#define NVC0_SU_INFO_UNK1C 0x1c
-#define NVC0_SU_INFO_WIDTH 0x20
-#define NVC0_SU_INFO_HEIGHT 0x24
-#define NVC0_SU_INFO_DEPTH 0x28
-#define NVC0_SU_INFO_TARGET 0x2c
-#define NVC0_SU_INFO_BSIZE 0x30
-#define NVC0_SU_INFO_RAW_X 0x34
-#define NVC0_SU_INFO_MS_X 0x38
-#define NVC0_SU_INFO_MS_Y 0x3c
-
-#define NVC0_SU_INFO__STRIDE 0x40
-
-#define NVC0_SU_INFO_DIM(i) (0x08 + (i) * 8)
-#define NVC0_SU_INFO_SIZE(i) (0x20 + (i) * 4)
-#define NVC0_SU_INFO_MS(i) (0x38 + (i) * 4)
-
inline Value *
NVC0LoweringPass::loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless)
{
prog->driver->io.suInfoBase);
}
+Value *
+NVC0LoweringPass::loadMsAdjInfo32(TexInstruction::Target target, uint32_t index, int slot, Value *ind, bool bindless)
+{
+ if (!bindless || targ->getChipset() < NVISA_GM107_CHIPSET)
+ return loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(index), bindless);
+
+ assert(bindless);
+
+ Value *samples = bld.getSSA();
+ // this shouldn't be lowered because it's being inserted before the current instruction
+ TexInstruction *tex = new_TexInstruction(func, OP_TXQ);
+ tex->tex.target = target;
+ tex->tex.query = TXQ_TYPE;
+ tex->tex.mask = 0x4;
+ tex->tex.r = 0xff;
+ tex->tex.s = 0x1f;
+ tex->tex.rIndirectSrc = 0;
+ tex->setDef(0, samples);
+ tex->setSrc(0, ind);
+ tex->setSrc(1, bld.loadImm(NULL, 0));
+ bld.insert(tex);
+
+ // doesn't work with sample counts other than 1/2/4/8 but they aren't supported
+ switch (index) {
+ case 0: {
+ Value *tmp = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getSSA(), samples, bld.mkImm(2));
+ return bld.mkOp2v(OP_SHR, TYPE_U32, bld.getSSA(), tmp, bld.mkImm(2));
+ }
+ case 1: {
+ Value *tmp = bld.mkCmp(OP_SET, CC_GT, TYPE_U32, bld.getSSA(), TYPE_U32, samples, bld.mkImm(2))->getDef(0);
+ return bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), tmp, bld.mkImm(1));
+ }
+ default: {
+ assert(false);
+ return NULL;
+ }
+ }
+}
+
static inline uint16_t getSuClampSubOp(const TexInstruction *su, int c)
{
switch (su->tex.target.getEnum()) {
Value *tx = bld.getSSA(), *ty = bld.getSSA(), *ts = bld.getSSA();
Value *ind = tex->getIndirectR();
- Value *ms_x = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0), tex->tex.bindless);
- Value *ms_y = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1), tex->tex.bindless);
+ Value *ms_x = loadMsAdjInfo32(tex->tex.target, 0, slot, ind, tex->tex.bindless);
+ Value *ms_y = loadMsAdjInfo32(tex->tex.target, 1, slot, ind, tex->tex.bindless);
bld.mkOp2(OP_SHL, TYPE_U32, tx, x, ms_x);
bld.mkOp2(OP_SHL, TYPE_U32, ty, y, ms_y);
}
}
+void
+NVC0LoweringPass::insertOOBSurfaceOpResult(TexInstruction *su)
+{
+ if (!su->getPredicate())
+ return;
+
+ bld.setPosition(su, true);
+
+ for (unsigned i = 0; su->defExists(i); ++i) {
+ ValueDef &def = su->def(i);
+
+ Instruction *mov = bld.mkMov(bld.getSSA(), bld.loadImm(NULL, 0));
+ assert(su->cc == CC_NOT_P);
+ mov->setPredicate(CC_P, su->getPredicate());
+ Instruction *uni = bld.mkOp2(OP_UNION, TYPE_U32, bld.getSSA(), NULL, mov->getDef(0));
+
+ def.replace(uni->getDef(0), false);
+ uni->setSrc(0, def.get());
+ }
+}
+
void
NVC0LoweringPass::handleSurfaceOpNVE4(TexInstruction *su)
{
processSurfaceCoordsNVE4(su);
- if (su->op == OP_SULDP)
+ if (su->op == OP_SULDP) {
convertSurfaceFormat(su);
+ insertOOBSurfaceOpResult(su);
+ }
if (su->op == OP_SUREDB || su->op == OP_SUREDP) {
assert(su->getPredicate());
processSurfaceCoordsNVC0(su);
- if (su->op == OP_SULDP)
+ if (su->op == OP_SULDP) {
convertSurfaceFormat(su);
+ insertOOBSurfaceOpResult(su);
+ }
if (su->op == OP_SUREDB || su->op == OP_SUREDP) {
const int dim = su->tex.target.getDim();
bld.setPosition(su, false);
+ adjustCoordinatesMS(su);
+
// add texture handle
switch (su->op) {
case OP_SUSTP:
{
processSurfaceCoordsGM107(su);
- if (su->op == OP_SULDP)
+ if (su->op == OP_SULDP) {
convertSurfaceFormat(su);
+ insertOOBSurfaceOpResult(su);
+ }
if (su->op == OP_SUREDP) {
Value *def = su->getDef(0);
assert(prog->getType() != Program::TYPE_FRAGMENT); // INTERP
}
} else if (i->src(0).getFile() == FILE_MEMORY_CONST) {
+ int8_t fileIndex = i->getSrc(0)->reg.fileIndex - 1;
+ Value *ind = i->getIndirect(0, 1);
+
if (targ->getChipset() >= NVISA_GK104_CHIPSET &&
- prog->getType() == Program::TYPE_COMPUTE) {
+ prog->getType() == Program::TYPE_COMPUTE &&
+ (fileIndex >= 6 || ind)) {
// The launch descriptor only allows to set up 8 CBs, but OpenGL
- // requires at least 12 UBOs. To bypass this limitation, we store the
- // addrs into the driver constbuf and we directly load from the global
- // memory.
- int8_t fileIndex = i->getSrc(0)->reg.fileIndex - 1;
- Value *ind = i->getIndirect(0, 1);
-
- if (!ind && fileIndex == -1)
- return;
-
+ // requires at least 12 UBOs. To bypass this limitation, for constant
+ // buffers 7+, we store the addrs into the driver constbuf and we
+ // directly load from the global memory.
if (ind) {
// Clamp the UBO index when an indirect access is used to avoid
// loading information from the wrong place in the driver cb.
// TGSI backend may use 4th component of TID,NTID,CTAID,NCTAID
i->op = OP_MOV;
i->setSrc(0, bld.mkImm((sv == SV_NTID || sv == SV_NCTAID) ? 1 : 0));
+ } else
+ if (sv == SV_TID) {
+ // Help CSE combine TID fetches
+ Value *tid = bld.mkOp1v(OP_RDSV, TYPE_U32, bld.getScratch(),
+ bld.mkSysVal(SV_COMBINED_TID, 0));
+ i->op = OP_EXTBF;
+ i->setSrc(0, tid);
+ switch (sym->reg.data.sv.index) {
+ case 0: i->setSrc(1, bld.mkImm(0x1000)); break;
+ case 1: i->setSrc(1, bld.mkImm(0x0a10)); break;
+ case 2: i->setSrc(1, bld.mkImm(0x061a)); break;
+ }
}
if (sv == SV_VERTEX_COUNT) {
bld.setPosition(i, true);
bool
NVC0LoweringPass::handleSQRT(Instruction *i)
{
+ if (targ->isOpSupported(OP_SQRT, i->dType))
+ return true;
+
if (i->dType == TYPE_F64) {
Value *pred = bld.getSSA(1, FILE_PREDICATE);
Value *zero = bld.loadImm(NULL, 0.0);