nv50/ir/nir: implement load_(interpolated_)input/output
authorKarol Herbst <kherbst@redhat.com>
Mon, 11 Dec 2017 15:21:06 +0000 (16:21 +0100)
committerKarol Herbst <kherbst@redhat.com>
Sun, 17 Mar 2019 09:33:28 +0000 (10:33 +0100)
v3: and load_output
v4: use smarter getIndirect helper
    use new getSlotAddress helper
v5: don't use const_offset directly
    fix for indirects
v6: add support for interpolateAt
v7: fix compiler warnings
    add load_barycentric_sample
    handle load_output for fragment shaders
v8: set info->prop.fp.readsSampleLocations for at_sample interpolation
    don't require C++11 features
v9: convert to C++ style comments

Signed-off-by: Karol Herbst <kherbst@redhat.com>
src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp

index 6e26e00d91f9d12c4753aeec537316c9d04871fd..70c4aecd69972d738ffd8431d12bbeb943b1b55f 100644 (file)
@@ -1597,6 +1597,141 @@ Converter::visit(nir_intrinsic_instr *insn)
       }
       break;
    }
+   case nir_intrinsic_load_input:
+   case nir_intrinsic_load_interpolated_input:
+   case nir_intrinsic_load_output: {
+      LValues &newDefs = convert(&insn->dest);
+
+      // FBFetch
+      if (prog->getType() == Program::TYPE_FRAGMENT &&
+          op == nir_intrinsic_load_output) {
+         std::vector<Value*> defs, srcs;
+         uint8_t mask = 0;
+
+         srcs.push_back(getSSA());
+         srcs.push_back(getSSA());
+         Value *x = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 0));
+         Value *y = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 1));
+         mkCvt(OP_CVT, TYPE_U32, srcs[0], TYPE_F32, x)->rnd = ROUND_Z;
+         mkCvt(OP_CVT, TYPE_U32, srcs[1], TYPE_F32, y)->rnd = ROUND_Z;
+
+         srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LAYER, 0)));
+         srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_SAMPLE_INDEX, 0)));
+
+         for (uint8_t i = 0u; i < insn->num_components; ++i) {
+            defs.push_back(newDefs[i]);
+            mask |= 1 << i;
+         }
+
+         TexInstruction *texi = mkTex(OP_TXF, TEX_TARGET_2D_MS_ARRAY, 0, 0, defs, srcs);
+         texi->tex.levelZero = 1;
+         texi->tex.mask = mask;
+         texi->tex.useOffsets = 0;
+         texi->tex.r = 0xffff;
+         texi->tex.s = 0xffff;
+
+         info->prop.fp.readsFramebuffer = true;
+         break;
+      }
+
+      const DataType dType = getDType(insn);
+      Value *indirect;
+      bool input = op != nir_intrinsic_load_output;
+      operation nvirOp;
+      uint32_t mode = 0;
+
+      uint32_t idx = getIndirect(insn, op == nir_intrinsic_load_interpolated_input ? 1 : 0, 0, indirect);
+      nv50_ir_varying& vary = input ? info->in[idx] : info->out[idx];
+
+      // see load_barycentric_* handling
+      if (prog->getType() == Program::TYPE_FRAGMENT) {
+         mode = translateInterpMode(&vary, nvirOp);
+         if (op == nir_intrinsic_load_interpolated_input) {
+            ImmediateValue immMode;
+            if (getSrc(&insn->src[0], 1)->getUniqueInsn()->src(0).getImmediate(immMode))
+               mode |= immMode.reg.data.u32;
+         }
+      }
+
+      for (uint8_t i = 0u; i < insn->num_components; ++i) {
+         uint32_t address = getSlotAddress(insn, idx, i);
+         Symbol *sym = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address);
+         if (prog->getType() == Program::TYPE_FRAGMENT) {
+            int s = 1;
+            if (typeSizeof(dType) == 8) {
+               Value *lo = getSSA();
+               Value *hi = getSSA();
+               Instruction *interp;
+
+               interp = mkOp1(nvirOp, TYPE_U32, lo, sym);
+               if (nvirOp == OP_PINTERP)
+                  interp->setSrc(s++, fp.position);
+               if (mode & NV50_IR_INTERP_OFFSET)
+                  interp->setSrc(s++, getSrc(&insn->src[0], 0));
+               interp->setInterpolate(mode);
+               interp->setIndirect(0, 0, indirect);
+
+               Symbol *sym1 = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address + 4);
+               interp = mkOp1(nvirOp, TYPE_U32, hi, sym1);
+               if (nvirOp == OP_PINTERP)
+                  interp->setSrc(s++, fp.position);
+               if (mode & NV50_IR_INTERP_OFFSET)
+                  interp->setSrc(s++, getSrc(&insn->src[0], 0));
+               interp->setInterpolate(mode);
+               interp->setIndirect(0, 0, indirect);
+
+               mkOp2(OP_MERGE, dType, newDefs[i], lo, hi);
+            } else {
+               Instruction *interp = mkOp1(nvirOp, dType, newDefs[i], sym);
+               if (nvirOp == OP_PINTERP)
+                  interp->setSrc(s++, fp.position);
+               if (mode & NV50_IR_INTERP_OFFSET)
+                  interp->setSrc(s++, getSrc(&insn->src[0], 0));
+               interp->setInterpolate(mode);
+               interp->setIndirect(0, 0, indirect);
+            }
+         } else {
+            mkLoad(dType, newDefs[i], sym, indirect)->perPatch = vary.patch;
+         }
+      }
+      break;
+   }
+   case nir_intrinsic_load_barycentric_at_offset:
+   case nir_intrinsic_load_barycentric_at_sample:
+   case nir_intrinsic_load_barycentric_centroid:
+   case nir_intrinsic_load_barycentric_pixel:
+   case nir_intrinsic_load_barycentric_sample: {
+      LValues &newDefs = convert(&insn->dest);
+      uint32_t mode;
+
+      if (op == nir_intrinsic_load_barycentric_centroid ||
+          op == nir_intrinsic_load_barycentric_sample) {
+         mode = NV50_IR_INTERP_CENTROID;
+      } else if (op == nir_intrinsic_load_barycentric_at_offset) {
+         Value *offs[2];
+         for (uint8_t c = 0; c < 2; c++) {
+            offs[c] = getScratch();
+            mkOp2(OP_MIN, TYPE_F32, offs[c], getSrc(&insn->src[0], c), loadImm(NULL, 0.4375f));
+            mkOp2(OP_MAX, TYPE_F32, offs[c], offs[c], loadImm(NULL, -0.5f));
+            mkOp2(OP_MUL, TYPE_F32, offs[c], offs[c], loadImm(NULL, 4096.0f));
+            mkCvt(OP_CVT, TYPE_S32, offs[c], TYPE_F32, offs[c]);
+         }
+         mkOp3v(OP_INSBF, TYPE_U32, newDefs[0], offs[1], mkImm(0x1010), offs[0]);
+
+         mode = NV50_IR_INTERP_OFFSET;
+      } else if (op == nir_intrinsic_load_barycentric_pixel) {
+         mode = NV50_IR_INTERP_DEFAULT;
+      } else if (op == nir_intrinsic_load_barycentric_at_sample) {
+         info->prop.fp.readsSampleLocations = true;
+         mkOp1(OP_PIXLD, TYPE_U32, newDefs[0], getSrc(&insn->src[0], 0))->subOp = NV50_IR_SUBOP_PIXLD_OFFSET;
+         mode = NV50_IR_INTERP_OFFSET;
+      } else {
+         unreachable("all intrinsics already handled above");
+      }
+
+      loadImm(newDefs[1], mode);
+      break;
+   }
    default:
       ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
       return false;