+void
+NVC0LoweringPass::processSurfaceCoordsNVC0(TexInstruction *su)
+{
+ const int slot = su->tex.r;
+ const int dim = su->tex.target.getDim();
+ const int arg = dim + (su->tex.target.isArray() || su->tex.target.isCube());
+ int c;
+ Value *zero = bld.mkImm(0);
+ Value *src[3];
+ Value *v;
+ Value *ind = su->getIndirectR();
+
+ bld.setPosition(su, false);
+
+ adjustCoordinatesMS(su);
+
+ if (ind) {
+ Value *ptr;
+ ptr = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getSSA(), ind, bld.mkImm(su->tex.r));
+ ptr = bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), ptr, bld.mkImm(7));
+ su->setIndirectR(ptr);
+ }
+
+ // get surface coordinates
+ for (c = 0; c < arg; ++c)
+ src[c] = su->getSrc(c);
+ for (; c < 3; ++c)
+ src[c] = zero;
+
+ // calculate pixel offset
+ if (su->op == OP_SULDP || su->op == OP_SUREDP) {
+ v = loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE, su->tex.bindless);
+ su->setSrc(0, bld.mkOp2v(OP_MUL, TYPE_U32, bld.getSSA(), src[0], v));
+ }
+
+ // add array layer offset
+ if (su->tex.target.isArray() || su->tex.target.isCube()) {
+ v = loadSuInfo32(ind, slot, NVC0_SU_INFO_ARRAY, su->tex.bindless);
+ assert(dim > 1);
+ su->setSrc(2, bld.mkOp2v(OP_MUL, TYPE_U32, bld.getSSA(), src[2], v));
+ }
+
+ // prevent read fault when the image is not actually bound
+ CmpInstruction *pred =
+ bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE),
+ TYPE_U32, bld.mkImm(0),
+ loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR, su->tex.bindless));
+ if (su->op != OP_SUSTP && su->tex.format) {
+ const TexInstruction::ImgFormatDesc *format = su->tex.format;
+ int blockwidth = format->bits[0] + format->bits[1] +
+ format->bits[2] + format->bits[3];
+
+ assert(format->components != 0);
+ // make sure that the format doesn't mismatch when it's not FMT_NONE
+ bld.mkCmp(OP_SET_OR, CC_NE, TYPE_U32, pred->getDef(0),
+ TYPE_U32, bld.loadImm(NULL, blockwidth / 8),
+ loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE, su->tex.bindless),
+ pred->getDef(0));
+ }
+ su->setPredicate(CC_NOT_P, pred->getDef(0));
+}
+
+void
+NVC0LoweringPass::handleSurfaceOpNVC0(TexInstruction *su)
+{
+ if (su->tex.target == TEX_TARGET_1D_ARRAY) {
+ /* As 1d arrays also need 3 coordinates, switching to TEX_TARGET_2D_ARRAY
+ * will simplify the lowering pass and the texture constraints. */
+ su->moveSources(1, 1);
+ su->setSrc(1, bld.loadImm(NULL, 0));
+ su->tex.target = TEX_TARGET_2D_ARRAY;
+ }
+
+ processSurfaceCoordsNVC0(su);
+
+ if (su->op == OP_SULDP) {
+ convertSurfaceFormat(su);
+ insertOOBSurfaceOpResult(su);
+ }
+
+ if (su->op == OP_SUREDB || su->op == OP_SUREDP) {
+ const int dim = su->tex.target.getDim();
+ const int arg = dim + (su->tex.target.isArray() || su->tex.target.isCube());
+ LValue *addr = bld.getSSA(8);
+ Value *def = su->getDef(0);
+
+ su->op = OP_SULEA;
+
+ // Set the destination to the address
+ su->dType = TYPE_U64;
+ su->setDef(0, addr);
+ su->setDef(1, su->getPredicate());
+
+ bld.setPosition(su, true);
+
+ // Perform the atomic op
+ Instruction *red = bld.mkOp(OP_ATOM, su->sType, bld.getSSA());
+ red->subOp = su->subOp;
+ red->setSrc(0, bld.mkSymbol(FILE_MEMORY_GLOBAL, 0, su->sType, 0));
+ red->setSrc(1, su->getSrc(arg));
+ if (red->subOp == NV50_IR_SUBOP_ATOM_CAS)
+ red->setSrc(2, su->getSrc(arg + 1));
+ red->setIndirect(0, 0, addr);
+
+ // make sure to initialize dst value when the atomic operation is not
+ // performed
+ Instruction *mov = bld.mkMov(bld.getSSA(), bld.loadImm(NULL, 0));
+
+ assert(su->cc == CC_NOT_P);
+ red->setPredicate(su->cc, su->getPredicate());
+ mov->setPredicate(CC_P, su->getPredicate());
+
+ bld.mkOp2(OP_UNION, TYPE_U32, def, red->getDef(0), mov->getDef(0));
+
+ handleCasExch(red, false);
+ }
+}
+
+void
+NVC0LoweringPass::processSurfaceCoordsGM107(TexInstruction *su)
+{
+ const int slot = su->tex.r;
+ const int dim = su->tex.target.getDim();
+ const int arg = dim + (su->tex.target.isArray() || su->tex.target.isCube());
+ Value *ind = su->getIndirectR();
+ Value *handle;
+ int pos = 0;
+
+ bld.setPosition(su, false);
+
+ adjustCoordinatesMS(su);
+
+ // add texture handle
+ switch (su->op) {
+ case OP_SUSTP:
+ pos = 4;
+ break;
+ case OP_SUREDP:
+ pos = (su->subOp == NV50_IR_SUBOP_ATOM_CAS) ? 2 : 1;
+ break;
+ default:
+ assert(pos == 0);
+ break;
+ }
+ if (su->tex.bindless)
+ handle = ind;
+ else
+ handle = loadTexHandle(ind, slot + 32);
+ su->setSrc(arg + pos, handle);
+
+ // The address check doesn't make sense here. The format check could make
+ // sense but it's a bit of a pain.
+ if (su->tex.bindless)
+ return;
+
+ // prevent read fault when the image is not actually bound
+ CmpInstruction *pred =
+ bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE),
+ TYPE_U32, bld.mkImm(0),
+ loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR, su->tex.bindless));
+ if (su->op != OP_SUSTP && su->tex.format) {
+ const TexInstruction::ImgFormatDesc *format = su->tex.format;
+ int blockwidth = format->bits[0] + format->bits[1] +
+ format->bits[2] + format->bits[3];
+
+ assert(format->components != 0);
+ // make sure that the format doesn't mismatch when it's not FMT_NONE
+ bld.mkCmp(OP_SET_OR, CC_NE, TYPE_U32, pred->getDef(0),
+ TYPE_U32, bld.loadImm(NULL, blockwidth / 8),
+ loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE, su->tex.bindless),
+ pred->getDef(0));
+ }
+ su->setPredicate(CC_NOT_P, pred->getDef(0));
+}
+
+void
+NVC0LoweringPass::handleSurfaceOpGM107(TexInstruction *su)
+{
+ processSurfaceCoordsGM107(su);
+
+ if (su->op == OP_SULDP) {
+ convertSurfaceFormat(su);
+ insertOOBSurfaceOpResult(su);
+ }
+
+ if (su->op == OP_SUREDP) {
+ Value *def = su->getDef(0);
+
+ su->op = OP_SUREDB;
+
+ // There may not be a predicate in the bindless case.
+ if (su->getPredicate()) {
+ su->setDef(0, bld.getSSA());
+
+ bld.setPosition(su, true);
+
+ // make sure to initialize dst value when the atomic operation is not
+ // performed
+ Instruction *mov = bld.mkMov(bld.getSSA(), bld.loadImm(NULL, 0));
+
+ assert(su->cc == CC_NOT_P);
+ mov->setPredicate(CC_P, su->getPredicate());
+
+ bld.mkOp2(OP_UNION, TYPE_U32, def, su->getDef(0), mov->getDef(0));
+ }
+ }
+}
+