prog->driver->io.suInfoBase);
}
+Value *
+NVC0LoweringPass::loadMsAdjInfo32(TexInstruction::Target target, uint32_t index, int slot, Value *ind, bool bindless)
+{
+ if (!bindless || targ->getChipset() < NVISA_GM107_CHIPSET)
+ return loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(index), bindless);
+
+ assert(bindless);
+
+ Value *samples = bld.getSSA();
+ // this shouldn't be lowered because it's being inserted before the current instruction
+ TexInstruction *tex = new_TexInstruction(func, OP_TXQ);
+ tex->tex.target = target;
+ tex->tex.query = TXQ_TYPE;
+ tex->tex.mask = 0x4;
+ tex->tex.r = 0xff;
+ tex->tex.s = 0x1f;
+ tex->tex.rIndirectSrc = 0;
+ tex->setDef(0, samples);
+ tex->setSrc(0, ind);
+ tex->setSrc(1, bld.loadImm(NULL, 0));
+ bld.insert(tex);
+
+ // doesn't work with sample counts other than 1/2/4/8 but they aren't supported
+ switch (index) {
+ case 0: {
+ Value *tmp = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getSSA(), samples, bld.mkImm(2));
+ return bld.mkOp2v(OP_SHR, TYPE_U32, bld.getSSA(), tmp, bld.mkImm(2));
+ }
+ case 1: {
+ Value *tmp = bld.mkCmp(OP_SET, CC_GT, TYPE_U32, bld.getSSA(), TYPE_U32, samples, bld.mkImm(2))->getDef(0);
+ return bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), tmp, bld.mkImm(1));
+ }
+ default: {
+ assert(false);
+ return NULL;
+ }
+ }
+}
+
static inline uint16_t getSuClampSubOp(const TexInstruction *su, int c)
{
switch (su->tex.target.getEnum()) {
Value *tx = bld.getSSA(), *ty = bld.getSSA(), *ts = bld.getSSA();
Value *ind = tex->getIndirectR();
- Value *ms_x = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0), tex->tex.bindless);
- Value *ms_y = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1), tex->tex.bindless);
+ Value *ms_x = loadMsAdjInfo32(tex->tex.target, 0, slot, ind, tex->tex.bindless);
+ Value *ms_y = loadMsAdjInfo32(tex->tex.target, 1, slot, ind, tex->tex.bindless);
bld.mkOp2(OP_SHL, TYPE_U32, tx, x, ms_x);
bld.mkOp2(OP_SHL, TYPE_U32, ty, y, ms_y);
}
}
+void
+NVC0LoweringPass::insertOOBSurfaceOpResult(TexInstruction *su)
+{
+ if (!su->getPredicate())
+ return;
+
+ bld.setPosition(su, true);
+
+ for (unsigned i = 0; su->defExists(i); ++i) {
+ ValueDef &def = su->def(i);
+
+ Instruction *mov = bld.mkMov(bld.getSSA(), bld.loadImm(NULL, 0));
+ assert(su->cc == CC_NOT_P);
+ mov->setPredicate(CC_P, su->getPredicate());
+ Instruction *uni = bld.mkOp2(OP_UNION, TYPE_U32, bld.getSSA(), NULL, mov->getDef(0));
+
+ def.replace(uni->getDef(0), false);
+ uni->setSrc(0, def.get());
+ }
+}
+
void
NVC0LoweringPass::handleSurfaceOpNVE4(TexInstruction *su)
{
processSurfaceCoordsNVE4(su);
- if (su->op == OP_SULDP)
+ if (su->op == OP_SULDP) {
convertSurfaceFormat(su);
+ insertOOBSurfaceOpResult(su);
+ }
if (su->op == OP_SUREDB || su->op == OP_SUREDP) {
assert(su->getPredicate());
processSurfaceCoordsNVC0(su);
- if (su->op == OP_SULDP)
+ if (su->op == OP_SULDP) {
convertSurfaceFormat(su);
+ insertOOBSurfaceOpResult(su);
+ }
if (su->op == OP_SUREDB || su->op == OP_SUREDP) {
const int dim = su->tex.target.getDim();
{
processSurfaceCoordsGM107(su);
- if (su->op == OP_SULDP)
+ if (su->op == OP_SULDP) {
convertSurfaceFormat(su);
+ insertOOBSurfaceOpResult(su);
+ }
if (su->op == OP_SUREDP) {
Value *def = su->getDef(0);
assert(prog->getType() != Program::TYPE_FRAGMENT); // INTERP
}
} else if (i->src(0).getFile() == FILE_MEMORY_CONST) {
+ int8_t fileIndex = i->getSrc(0)->reg.fileIndex - 1;
+ Value *ind = i->getIndirect(0, 1);
+
if (targ->getChipset() >= NVISA_GK104_CHIPSET &&
- prog->getType() == Program::TYPE_COMPUTE) {
+ prog->getType() == Program::TYPE_COMPUTE &&
+ (fileIndex >= 6 || ind)) {
// The launch descriptor only allows to set up 8 CBs, but OpenGL
- // requires at least 12 UBOs. To bypass this limitation, we store the
- // addrs into the driver constbuf and we directly load from the global
- // memory.
- int8_t fileIndex = i->getSrc(0)->reg.fileIndex - 1;
- Value *ind = i->getIndirect(0, 1);
-
- if (!ind && fileIndex == -1)
- return;
-
+ // requires at least 12 UBOs. To bypass this limitation, for constant
+ // buffers 7+, we store the addrs into the driver constbuf and we
+ // directly load from the global memory.
if (ind) {
// Clamp the UBO index when an indirect access is used to avoid
// loading information from the wrong place in the driver cb.