+SVSemantic
+Converter::convert(nir_intrinsic_op intr)
+{
+ switch (intr) {
+ case nir_intrinsic_load_base_vertex:
+ return SV_BASEVERTEX;
+ case nir_intrinsic_load_base_instance:
+ return SV_BASEINSTANCE;
+ case nir_intrinsic_load_draw_id:
+ return SV_DRAWID;
+ case nir_intrinsic_load_front_face:
+ return SV_FACE;
+ case nir_intrinsic_is_helper_invocation:
+ case nir_intrinsic_load_helper_invocation:
+ return SV_THREAD_KILL;
+ case nir_intrinsic_load_instance_id:
+ return SV_INSTANCE_ID;
+ case nir_intrinsic_load_invocation_id:
+ return SV_INVOCATION_ID;
+ case nir_intrinsic_load_local_group_size:
+ return SV_NTID;
+ case nir_intrinsic_load_local_invocation_id:
+ return SV_TID;
+ case nir_intrinsic_load_num_work_groups:
+ return SV_NCTAID;
+ case nir_intrinsic_load_patch_vertices_in:
+ return SV_VERTEX_COUNT;
+ case nir_intrinsic_load_primitive_id:
+ return SV_PRIMITIVE_ID;
+ case nir_intrinsic_load_sample_id:
+ return SV_SAMPLE_INDEX;
+ case nir_intrinsic_load_sample_mask_in:
+ return SV_SAMPLE_MASK;
+ case nir_intrinsic_load_sample_pos:
+ return SV_SAMPLE_POS;
+ case nir_intrinsic_load_subgroup_eq_mask:
+ return SV_LANEMASK_EQ;
+ case nir_intrinsic_load_subgroup_ge_mask:
+ return SV_LANEMASK_GE;
+ case nir_intrinsic_load_subgroup_gt_mask:
+ return SV_LANEMASK_GT;
+ case nir_intrinsic_load_subgroup_le_mask:
+ return SV_LANEMASK_LE;
+ case nir_intrinsic_load_subgroup_lt_mask:
+ return SV_LANEMASK_LT;
+ case nir_intrinsic_load_subgroup_invocation:
+ return SV_LANEID;
+ case nir_intrinsic_load_tess_coord:
+ return SV_TESS_COORD;
+ case nir_intrinsic_load_tess_level_inner:
+ return SV_TESS_INNER;
+ case nir_intrinsic_load_tess_level_outer:
+ return SV_TESS_OUTER;
+ case nir_intrinsic_load_vertex_id:
+ return SV_VERTEX_ID;
+ case nir_intrinsic_load_work_group_id:
+ return SV_CTAID;
+ case nir_intrinsic_load_work_dim:
+ return SV_WORK_DIM;
+ default:
+ ERROR("unknown SVSemantic for nir_intrinsic_op %s\n",
+ nir_intrinsic_infos[intr].name);
+ assert(false);
+ return SV_LAST;
+ }
+}
+
+bool
+Converter::visit(nir_intrinsic_instr *insn)
+{
+ nir_intrinsic_op op = insn->intrinsic;
+ const nir_intrinsic_info &opInfo = nir_intrinsic_infos[op];
+ unsigned dest_components = nir_intrinsic_dest_components(insn);
+
+ switch (op) {
+ case nir_intrinsic_load_uniform: {
+ LValues &newDefs = convert(&insn->dest);
+ const DataType dType = getDType(insn);
+ Value *indirect;
+ uint32_t coffset = getIndirect(insn, 0, 0, indirect);
+ for (uint8_t i = 0; i < dest_components; ++i) {
+ loadFrom(FILE_MEMORY_CONST, 0, dType, newDefs[i], 16 * coffset, i, indirect);
+ }
+ break;
+ }
+ case nir_intrinsic_store_output:
+ case nir_intrinsic_store_per_vertex_output: {
+ Value *indirect;
+ DataType dType = getSType(insn->src[0], false, false);
+ uint32_t idx = getIndirect(insn, op == nir_intrinsic_store_output ? 1 : 2, 0, indirect);
+
+ for (uint8_t i = 0u; i < nir_intrinsic_src_components(insn, 0); ++i) {
+ if (!((1u << i) & nir_intrinsic_write_mask(insn)))
+ continue;
+
+ uint8_t offset = 0;
+ Value *src = getSrc(&insn->src[0], i);
+ switch (prog->getType()) {
+ case Program::TYPE_FRAGMENT: {
+ if (info_out->out[idx].sn == TGSI_SEMANTIC_POSITION) {
+ // TGSI uses a different interface than NIR, TGSI stores that
+ // value in the z component, NIR in X
+ offset += 2;
+ src = mkOp1v(OP_SAT, TYPE_F32, getScratch(), src);
+ }
+ break;
+ }
+ case Program::TYPE_GEOMETRY:
+ case Program::TYPE_TESSELLATION_EVAL:
+ case Program::TYPE_VERTEX: {
+ if (info_out->io.genUserClip > 0 && idx == (uint32_t)clipVertexOutput) {
+ mkMov(clipVtx[i], src);
+ src = clipVtx[i];
+ }
+ break;
+ }
+ default:
+ break;
+ }
+
+ storeTo(insn, FILE_SHADER_OUTPUT, OP_EXPORT, dType, src, idx, i + offset, indirect);
+ }
+ break;
+ }
+ case nir_intrinsic_load_input:
+ case nir_intrinsic_load_interpolated_input:
+ case nir_intrinsic_load_output: {
+ LValues &newDefs = convert(&insn->dest);
+
+ // FBFetch
+ if (prog->getType() == Program::TYPE_FRAGMENT &&
+ op == nir_intrinsic_load_output) {
+ std::vector<Value*> defs, srcs;
+ uint8_t mask = 0;
+
+ srcs.push_back(getSSA());
+ srcs.push_back(getSSA());
+ Value *x = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 0));
+ Value *y = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 1));
+ mkCvt(OP_CVT, TYPE_U32, srcs[0], TYPE_F32, x)->rnd = ROUND_Z;
+ mkCvt(OP_CVT, TYPE_U32, srcs[1], TYPE_F32, y)->rnd = ROUND_Z;
+
+ srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LAYER, 0)));
+ srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_SAMPLE_INDEX, 0)));
+
+ for (uint8_t i = 0u; i < dest_components; ++i) {
+ defs.push_back(newDefs[i]);
+ mask |= 1 << i;
+ }
+
+ TexInstruction *texi = mkTex(OP_TXF, TEX_TARGET_2D_MS_ARRAY, 0, 0, defs, srcs);
+ texi->tex.levelZero = 1;
+ texi->tex.mask = mask;
+ texi->tex.useOffsets = 0;
+ texi->tex.r = 0xffff;
+ texi->tex.s = 0xffff;
+
+ info_out->prop.fp.readsFramebuffer = true;
+ break;
+ }
+
+ const DataType dType = getDType(insn);
+ Value *indirect;
+ bool input = op != nir_intrinsic_load_output;
+ operation nvirOp;
+ uint32_t mode = 0;
+
+ uint32_t idx = getIndirect(insn, op == nir_intrinsic_load_interpolated_input ? 1 : 0, 0, indirect);
+ nv50_ir_varying& vary = input ? info_out->in[idx] : info_out->out[idx];
+
+ // see load_barycentric_* handling
+ if (prog->getType() == Program::TYPE_FRAGMENT) {
+ if (op == nir_intrinsic_load_interpolated_input) {
+ ImmediateValue immMode;
+ if (getSrc(&insn->src[0], 1)->getUniqueInsn()->src(0).getImmediate(immMode))
+ mode = immMode.reg.data.u32;
+ }
+ if (mode == NV50_IR_INTERP_DEFAULT)
+ mode |= translateInterpMode(&vary, nvirOp);
+ else {
+ if (vary.linear) {
+ nvirOp = OP_LINTERP;
+ mode |= NV50_IR_INTERP_LINEAR;
+ } else {
+ nvirOp = OP_PINTERP;
+ mode |= NV50_IR_INTERP_PERSPECTIVE;
+ }
+ }
+ }
+
+ for (uint8_t i = 0u; i < dest_components; ++i) {
+ uint32_t address = getSlotAddress(insn, idx, i);
+ Symbol *sym = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address);
+ if (prog->getType() == Program::TYPE_FRAGMENT) {
+ int s = 1;
+ if (typeSizeof(dType) == 8) {
+ Value *lo = getSSA();
+ Value *hi = getSSA();
+ Instruction *interp;
+
+ interp = mkOp1(nvirOp, TYPE_U32, lo, sym);
+ if (nvirOp == OP_PINTERP)
+ interp->setSrc(s++, fp.position);
+ if (mode & NV50_IR_INTERP_OFFSET)
+ interp->setSrc(s++, getSrc(&insn->src[0], 0));
+ interp->setInterpolate(mode);
+ interp->setIndirect(0, 0, indirect);
+
+ Symbol *sym1 = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address + 4);
+ interp = mkOp1(nvirOp, TYPE_U32, hi, sym1);
+ if (nvirOp == OP_PINTERP)
+ interp->setSrc(s++, fp.position);
+ if (mode & NV50_IR_INTERP_OFFSET)
+ interp->setSrc(s++, getSrc(&insn->src[0], 0));
+ interp->setInterpolate(mode);
+ interp->setIndirect(0, 0, indirect);
+
+ mkOp2(OP_MERGE, dType, newDefs[i], lo, hi);
+ } else {
+ Instruction *interp = mkOp1(nvirOp, dType, newDefs[i], sym);
+ if (nvirOp == OP_PINTERP)
+ interp->setSrc(s++, fp.position);
+ if (mode & NV50_IR_INTERP_OFFSET)
+ interp->setSrc(s++, getSrc(&insn->src[0], 0));
+ interp->setInterpolate(mode);
+ interp->setIndirect(0, 0, indirect);
+ }
+ } else {
+ mkLoad(dType, newDefs[i], sym, indirect)->perPatch = vary.patch;
+ }
+ }
+ break;
+ }
+ case nir_intrinsic_load_barycentric_at_offset:
+ case nir_intrinsic_load_barycentric_at_sample:
+ case nir_intrinsic_load_barycentric_centroid:
+ case nir_intrinsic_load_barycentric_pixel:
+ case nir_intrinsic_load_barycentric_sample: {
+ LValues &newDefs = convert(&insn->dest);
+ uint32_t mode;
+
+ if (op == nir_intrinsic_load_barycentric_centroid ||
+ op == nir_intrinsic_load_barycentric_sample) {
+ mode = NV50_IR_INTERP_CENTROID;
+ } else if (op == nir_intrinsic_load_barycentric_at_offset) {
+ Value *offs[2];
+ for (uint8_t c = 0; c < 2; c++) {
+ offs[c] = getScratch();
+ mkOp2(OP_MIN, TYPE_F32, offs[c], getSrc(&insn->src[0], c), loadImm(NULL, 0.4375f));
+ mkOp2(OP_MAX, TYPE_F32, offs[c], offs[c], loadImm(NULL, -0.5f));
+ mkOp2(OP_MUL, TYPE_F32, offs[c], offs[c], loadImm(NULL, 4096.0f));
+ mkCvt(OP_CVT, TYPE_S32, offs[c], TYPE_F32, offs[c]);
+ }
+ mkOp3v(OP_INSBF, TYPE_U32, newDefs[0], offs[1], mkImm(0x1010), offs[0]);
+
+ mode = NV50_IR_INTERP_OFFSET;
+ } else if (op == nir_intrinsic_load_barycentric_pixel) {
+ mode = NV50_IR_INTERP_DEFAULT;
+ } else if (op == nir_intrinsic_load_barycentric_at_sample) {
+ info_out->prop.fp.readsSampleLocations = true;
+ mkOp1(OP_PIXLD, TYPE_U32, newDefs[0], getSrc(&insn->src[0], 0))->subOp = NV50_IR_SUBOP_PIXLD_OFFSET;
+ mode = NV50_IR_INTERP_OFFSET;
+ } else {
+ unreachable("all intrinsics already handled above");
+ }
+
+ loadImm(newDefs[1], mode);
+ break;
+ }
+ case nir_intrinsic_demote:
+ case nir_intrinsic_discard:
+ mkOp(OP_DISCARD, TYPE_NONE, NULL);
+ break;
+ case nir_intrinsic_demote_if:
+ case nir_intrinsic_discard_if: {
+ Value *pred = getSSA(1, FILE_PREDICATE);
+ if (insn->num_components > 1) {
+ ERROR("nir_intrinsic_discard_if only with 1 component supported!\n");
+ assert(false);
+ return false;
+ }
+ mkCmp(OP_SET, CC_NE, TYPE_U8, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
+ mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, pred);
+ break;
+ }
+ case nir_intrinsic_load_base_vertex:
+ case nir_intrinsic_load_base_instance:
+ case nir_intrinsic_load_draw_id:
+ case nir_intrinsic_load_front_face:
+ case nir_intrinsic_is_helper_invocation:
+ case nir_intrinsic_load_helper_invocation:
+ case nir_intrinsic_load_instance_id:
+ case nir_intrinsic_load_invocation_id:
+ case nir_intrinsic_load_local_group_size:
+ case nir_intrinsic_load_local_invocation_id:
+ case nir_intrinsic_load_num_work_groups:
+ case nir_intrinsic_load_patch_vertices_in:
+ case nir_intrinsic_load_primitive_id:
+ case nir_intrinsic_load_sample_id:
+ case nir_intrinsic_load_sample_mask_in:
+ case nir_intrinsic_load_sample_pos:
+ case nir_intrinsic_load_subgroup_eq_mask:
+ case nir_intrinsic_load_subgroup_ge_mask:
+ case nir_intrinsic_load_subgroup_gt_mask:
+ case nir_intrinsic_load_subgroup_le_mask:
+ case nir_intrinsic_load_subgroup_lt_mask:
+ case nir_intrinsic_load_subgroup_invocation:
+ case nir_intrinsic_load_tess_coord:
+ case nir_intrinsic_load_tess_level_inner:
+ case nir_intrinsic_load_tess_level_outer:
+ case nir_intrinsic_load_vertex_id:
+ case nir_intrinsic_load_work_group_id:
+ case nir_intrinsic_load_work_dim: {
+ const DataType dType = getDType(insn);
+ SVSemantic sv = convert(op);
+ LValues &newDefs = convert(&insn->dest);
+
+ for (uint8_t i = 0u; i < nir_intrinsic_dest_components(insn); ++i) {
+ Value *def;
+ if (typeSizeof(dType) == 8)
+ def = getSSA();
+ else
+ def = newDefs[i];
+
+ if (sv == SV_TID && info->prop.cp.numThreads[i] == 1) {
+ loadImm(def, 0u);
+ } else {
+ Symbol *sym = mkSysVal(sv, i);
+ Instruction *rdsv = mkOp1(OP_RDSV, TYPE_U32, def, sym);
+ if (sv == SV_TESS_OUTER || sv == SV_TESS_INNER)
+ rdsv->perPatch = 1;
+ }
+
+ if (typeSizeof(dType) == 8)
+ mkOp2(OP_MERGE, dType, newDefs[i], def, loadImm(getSSA(), 0u));
+ }
+ break;
+ }
+ // constants
+ case nir_intrinsic_load_subgroup_size: {
+ LValues &newDefs = convert(&insn->dest);
+ loadImm(newDefs[0], 32u);
+ break;
+ }
+ case nir_intrinsic_vote_all:
+ case nir_intrinsic_vote_any:
+ case nir_intrinsic_vote_ieq: {
+ LValues &newDefs = convert(&insn->dest);
+ Value *pred = getScratch(1, FILE_PREDICATE);
+ mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
+ mkOp1(OP_VOTE, TYPE_U32, pred, pred)->subOp = getSubOp(op);
+ mkCvt(OP_CVT, TYPE_U32, newDefs[0], TYPE_U8, pred);
+ break;
+ }
+ case nir_intrinsic_ballot: {
+ LValues &newDefs = convert(&insn->dest);
+ Value *pred = getSSA(1, FILE_PREDICATE);
+ mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
+ mkOp1(OP_VOTE, TYPE_U32, newDefs[0], pred)->subOp = NV50_IR_SUBOP_VOTE_ANY;
+ break;
+ }
+ case nir_intrinsic_read_first_invocation:
+ case nir_intrinsic_read_invocation: {
+ LValues &newDefs = convert(&insn->dest);
+ const DataType dType = getDType(insn);
+ Value *tmp = getScratch();
+
+ if (op == nir_intrinsic_read_first_invocation) {
+ mkOp1(OP_VOTE, TYPE_U32, tmp, mkImm(1))->subOp = NV50_IR_SUBOP_VOTE_ANY;
+ mkOp1(OP_BREV, TYPE_U32, tmp, tmp);
+ mkOp1(OP_BFIND, TYPE_U32, tmp, tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
+ } else
+ tmp = getSrc(&insn->src[1], 0);
+
+ for (uint8_t i = 0; i < dest_components; ++i) {
+ mkOp3(OP_SHFL, dType, newDefs[i], getSrc(&insn->src[0], i), tmp, mkImm(0x1f))
+ ->subOp = NV50_IR_SUBOP_SHFL_IDX;
+ }
+ break;
+ }
+ case nir_intrinsic_load_per_vertex_input: {
+ const DataType dType = getDType(insn);
+ LValues &newDefs = convert(&insn->dest);
+ Value *indirectVertex;
+ Value *indirectOffset;
+ uint32_t baseVertex = getIndirect(&insn->src[0], 0, indirectVertex);
+ uint32_t idx = getIndirect(insn, 1, 0, indirectOffset);
+
+ Value *vtxBase = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(4, FILE_ADDRESS),
+ mkImm(baseVertex), indirectVertex);
+ for (uint8_t i = 0u; i < dest_components; ++i) {
+ uint32_t address = getSlotAddress(insn, idx, i);
+ loadFrom(FILE_SHADER_INPUT, 0, dType, newDefs[i], address, 0,
+ indirectOffset, vtxBase, info_out->in[idx].patch);
+ }
+ break;
+ }
+ case nir_intrinsic_load_per_vertex_output: {
+ const DataType dType = getDType(insn);
+ LValues &newDefs = convert(&insn->dest);
+ Value *indirectVertex;
+ Value *indirectOffset;
+ uint32_t baseVertex = getIndirect(&insn->src[0], 0, indirectVertex);
+ uint32_t idx = getIndirect(insn, 1, 0, indirectOffset);
+ Value *vtxBase = NULL;
+
+ if (indirectVertex)
+ vtxBase = indirectVertex;
+ else
+ vtxBase = loadImm(NULL, baseVertex);
+
+ vtxBase = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, FILE_ADDRESS), outBase, vtxBase);
+
+ for (uint8_t i = 0u; i < dest_components; ++i) {
+ uint32_t address = getSlotAddress(insn, idx, i);
+ loadFrom(FILE_SHADER_OUTPUT, 0, dType, newDefs[i], address, 0,
+ indirectOffset, vtxBase, info_out->in[idx].patch);
+ }
+ break;
+ }
+ case nir_intrinsic_emit_vertex: {
+ if (info_out->io.genUserClip > 0)
+ handleUserClipPlanes();
+ uint32_t idx = nir_intrinsic_stream_id(insn);
+ mkOp1(getOperation(op), TYPE_U32, NULL, mkImm(idx))->fixed = 1;
+ break;
+ }
+ case nir_intrinsic_end_primitive: {
+ uint32_t idx = nir_intrinsic_stream_id(insn);
+ if (idx)
+ break;
+ mkOp1(getOperation(op), TYPE_U32, NULL, mkImm(idx))->fixed = 1;
+ break;
+ }
+ case nir_intrinsic_load_ubo: {
+ const DataType dType = getDType(insn);
+ LValues &newDefs = convert(&insn->dest);
+ Value *indirectIndex;
+ Value *indirectOffset;
+ uint32_t index = getIndirect(&insn->src[0], 0, indirectIndex) + 1;
+ uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
+
+ for (uint8_t i = 0u; i < dest_components; ++i) {
+ loadFrom(FILE_MEMORY_CONST, index, dType, newDefs[i], offset, i,
+ indirectOffset, indirectIndex);
+ }
+ break;
+ }
+ case nir_intrinsic_get_buffer_size: {
+ LValues &newDefs = convert(&insn->dest);
+ const DataType dType = getDType(insn);
+ Value *indirectBuffer;
+ uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
+
+ Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, dType, 0);
+ mkOp1(OP_BUFQ, dType, newDefs[0], sym)->setIndirect(0, 0, indirectBuffer);
+ break;
+ }
+ case nir_intrinsic_store_ssbo: {
+ DataType sType = getSType(insn->src[0], false, false);
+ Value *indirectBuffer;
+ Value *indirectOffset;
+ uint32_t buffer = getIndirect(&insn->src[1], 0, indirectBuffer);
+ uint32_t offset = getIndirect(&insn->src[2], 0, indirectOffset);
+
+ for (uint8_t i = 0u; i < nir_intrinsic_src_components(insn, 0); ++i) {
+ if (!((1u << i) & nir_intrinsic_write_mask(insn)))
+ continue;
+ Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, sType,
+ offset + i * typeSizeof(sType));
+ mkStore(OP_STORE, sType, sym, indirectOffset, getSrc(&insn->src[0], i))
+ ->setIndirect(0, 1, indirectBuffer);
+ }
+ info_out->io.globalAccess |= 0x2;
+ break;
+ }
+ case nir_intrinsic_load_ssbo: {
+ const DataType dType = getDType(insn);
+ LValues &newDefs = convert(&insn->dest);
+ Value *indirectBuffer;
+ Value *indirectOffset;
+ uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
+ uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
+
+ for (uint8_t i = 0u; i < dest_components; ++i)
+ loadFrom(FILE_MEMORY_BUFFER, buffer, dType, newDefs[i], offset, i,
+ indirectOffset, indirectBuffer);
+
+ info_out->io.globalAccess |= 0x1;
+ break;
+ }
+ case nir_intrinsic_shared_atomic_add:
+ case nir_intrinsic_shared_atomic_and:
+ case nir_intrinsic_shared_atomic_comp_swap:
+ case nir_intrinsic_shared_atomic_exchange:
+ case nir_intrinsic_shared_atomic_or:
+ case nir_intrinsic_shared_atomic_imax:
+ case nir_intrinsic_shared_atomic_imin:
+ case nir_intrinsic_shared_atomic_umax:
+ case nir_intrinsic_shared_atomic_umin:
+ case nir_intrinsic_shared_atomic_xor: {
+ const DataType dType = getDType(insn);
+ LValues &newDefs = convert(&insn->dest);
+ Value *indirectOffset;
+ uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
+ Symbol *sym = mkSymbol(FILE_MEMORY_SHARED, 0, dType, offset);
+ Instruction *atom = mkOp2(OP_ATOM, dType, newDefs[0], sym, getSrc(&insn->src[1], 0));
+ if (op == nir_intrinsic_shared_atomic_comp_swap)
+ atom->setSrc(2, getSrc(&insn->src[2], 0));
+ atom->setIndirect(0, 0, indirectOffset);
+ atom->subOp = getSubOp(op);
+ break;
+ }
+ case nir_intrinsic_ssbo_atomic_add:
+ case nir_intrinsic_ssbo_atomic_and:
+ case nir_intrinsic_ssbo_atomic_comp_swap:
+ case nir_intrinsic_ssbo_atomic_exchange:
+ case nir_intrinsic_ssbo_atomic_or:
+ case nir_intrinsic_ssbo_atomic_imax:
+ case nir_intrinsic_ssbo_atomic_imin:
+ case nir_intrinsic_ssbo_atomic_umax:
+ case nir_intrinsic_ssbo_atomic_umin:
+ case nir_intrinsic_ssbo_atomic_xor: {
+ const DataType dType = getDType(insn);
+ LValues &newDefs = convert(&insn->dest);
+ Value *indirectBuffer;
+ Value *indirectOffset;
+ uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
+ uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
+
+ Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, dType, offset);
+ Instruction *atom = mkOp2(OP_ATOM, dType, newDefs[0], sym,
+ getSrc(&insn->src[2], 0));
+ if (op == nir_intrinsic_ssbo_atomic_comp_swap)
+ atom->setSrc(2, getSrc(&insn->src[3], 0));
+ atom->setIndirect(0, 0, indirectOffset);
+ atom->setIndirect(0, 1, indirectBuffer);
+ atom->subOp = getSubOp(op);
+
+ info_out->io.globalAccess |= 0x2;
+ break;
+ }
+ case nir_intrinsic_global_atomic_add:
+ case nir_intrinsic_global_atomic_and:
+ case nir_intrinsic_global_atomic_comp_swap:
+ case nir_intrinsic_global_atomic_exchange:
+ case nir_intrinsic_global_atomic_or:
+ case nir_intrinsic_global_atomic_imax:
+ case nir_intrinsic_global_atomic_imin:
+ case nir_intrinsic_global_atomic_umax:
+ case nir_intrinsic_global_atomic_umin:
+ case nir_intrinsic_global_atomic_xor: {
+ const DataType dType = getDType(insn);
+ LValues &newDefs = convert(&insn->dest);
+ Value *address;
+ uint32_t offset = getIndirect(&insn->src[0], 0, address);
+
+ Symbol *sym = mkSymbol(FILE_MEMORY_GLOBAL, 0, dType, offset);
+ Instruction *atom =
+ mkOp2(OP_ATOM, dType, newDefs[0], sym, getSrc(&insn->src[1], 0));
+ if (op == nir_intrinsic_global_atomic_comp_swap)
+ atom->setSrc(2, getSrc(&insn->src[2], 0));
+ atom->setIndirect(0, 0, address);
+ atom->subOp = getSubOp(op);
+
+ info_out->io.globalAccess |= 0x2;
+ break;
+ }
+ case nir_intrinsic_bindless_image_atomic_add:
+ case nir_intrinsic_bindless_image_atomic_and:
+ case nir_intrinsic_bindless_image_atomic_comp_swap:
+ case nir_intrinsic_bindless_image_atomic_exchange:
+ case nir_intrinsic_bindless_image_atomic_imax:
+ case nir_intrinsic_bindless_image_atomic_umax:
+ case nir_intrinsic_bindless_image_atomic_imin:
+ case nir_intrinsic_bindless_image_atomic_umin:
+ case nir_intrinsic_bindless_image_atomic_or:
+ case nir_intrinsic_bindless_image_atomic_xor:
+ case nir_intrinsic_bindless_image_atomic_inc_wrap:
+ case nir_intrinsic_bindless_image_atomic_dec_wrap:
+ case nir_intrinsic_bindless_image_load:
+ case nir_intrinsic_bindless_image_samples:
+ case nir_intrinsic_bindless_image_size:
+ case nir_intrinsic_bindless_image_store:
+ case nir_intrinsic_image_atomic_add:
+ case nir_intrinsic_image_atomic_and:
+ case nir_intrinsic_image_atomic_comp_swap:
+ case nir_intrinsic_image_atomic_exchange:
+ case nir_intrinsic_image_atomic_imax:
+ case nir_intrinsic_image_atomic_umax:
+ case nir_intrinsic_image_atomic_imin:
+ case nir_intrinsic_image_atomic_umin:
+ case nir_intrinsic_image_atomic_or:
+ case nir_intrinsic_image_atomic_xor:
+ case nir_intrinsic_image_atomic_inc_wrap:
+ case nir_intrinsic_image_atomic_dec_wrap:
+ case nir_intrinsic_image_load:
+ case nir_intrinsic_image_samples:
+ case nir_intrinsic_image_size:
+ case nir_intrinsic_image_store: {
+ std::vector<Value*> srcs, defs;
+ Value *indirect;
+ DataType ty;
+
+ uint32_t mask = 0;
+ TexInstruction::Target target =
+ convert(nir_intrinsic_image_dim(insn), !!nir_intrinsic_image_array(insn), false);
+ unsigned int argCount = getNIRArgCount(target);
+ uint16_t location = 0;
+
+ if (opInfo.has_dest) {
+ LValues &newDefs = convert(&insn->dest);
+ for (uint8_t i = 0u; i < newDefs.size(); ++i) {
+ defs.push_back(newDefs[i]);
+ mask |= 1 << i;
+ }
+ }
+
+ int lod_src = -1;
+ bool bindless = false;
+ switch (op) {
+ case nir_intrinsic_bindless_image_atomic_add:
+ case nir_intrinsic_bindless_image_atomic_and:
+ case nir_intrinsic_bindless_image_atomic_comp_swap:
+ case nir_intrinsic_bindless_image_atomic_exchange:
+ case nir_intrinsic_bindless_image_atomic_imax:
+ case nir_intrinsic_bindless_image_atomic_umax:
+ case nir_intrinsic_bindless_image_atomic_imin:
+ case nir_intrinsic_bindless_image_atomic_umin:
+ case nir_intrinsic_bindless_image_atomic_or:
+ case nir_intrinsic_bindless_image_atomic_xor:
+ case nir_intrinsic_bindless_image_atomic_inc_wrap:
+ case nir_intrinsic_bindless_image_atomic_dec_wrap:
+ ty = getDType(insn);
+ bindless = true;
+ info_out->io.globalAccess |= 0x2;
+ mask = 0x1;
+ break;
+ case nir_intrinsic_image_atomic_add:
+ case nir_intrinsic_image_atomic_and:
+ case nir_intrinsic_image_atomic_comp_swap:
+ case nir_intrinsic_image_atomic_exchange:
+ case nir_intrinsic_image_atomic_imax:
+ case nir_intrinsic_image_atomic_umax:
+ case nir_intrinsic_image_atomic_imin:
+ case nir_intrinsic_image_atomic_umin:
+ case nir_intrinsic_image_atomic_or:
+ case nir_intrinsic_image_atomic_xor:
+ case nir_intrinsic_image_atomic_inc_wrap:
+ case nir_intrinsic_image_atomic_dec_wrap:
+ ty = getDType(insn);
+ bindless = false;
+ info_out->io.globalAccess |= 0x2;
+ mask = 0x1;
+ break;
+ case nir_intrinsic_bindless_image_load:
+ case nir_intrinsic_image_load:
+ ty = TYPE_U32;
+ bindless = op == nir_intrinsic_bindless_image_load;
+ info_out->io.globalAccess |= 0x1;
+ lod_src = 4;
+ break;
+ case nir_intrinsic_bindless_image_store:
+ case nir_intrinsic_image_store:
+ ty = TYPE_U32;
+ mask = 0xf;
+ bindless = op == nir_intrinsic_bindless_image_store;
+ info_out->io.globalAccess |= 0x2;
+ lod_src = 5;
+ mask = 0xf;
+ break;
+ case nir_intrinsic_bindless_image_samples:
+ mask = 0x8;
+ case nir_intrinsic_image_samples:
+ ty = TYPE_U32;
+ bindless = op == nir_intrinsic_bindless_image_samples;
+ mask = 0x8;
+ break;
+ case nir_intrinsic_bindless_image_size:
+ case nir_intrinsic_image_size:
+ assert(nir_src_as_uint(insn->src[1]) == 0);
+ ty = TYPE_U32;
+ bindless = op == nir_intrinsic_bindless_image_size;
+ break;
+ default:
+ unreachable("unhandled image opcode");
+ break;
+ }
+
+ if (bindless)
+ indirect = getSrc(&insn->src[0], 0);
+ else
+ location = getIndirect(&insn->src[0], 0, indirect);
+
+ // coords
+ if (opInfo.num_srcs >= 2)
+ for (unsigned int i = 0u; i < argCount; ++i)
+ srcs.push_back(getSrc(&insn->src[1], i));
+
+ // the sampler is just another src added after coords
+ if (opInfo.num_srcs >= 3 && target.isMS())
+ srcs.push_back(getSrc(&insn->src[2], 0));
+
+ if (opInfo.num_srcs >= 4 && lod_src != 4) {
+ unsigned components = opInfo.src_components[3] ? opInfo.src_components[3] : insn->num_components;
+ for (uint8_t i = 0u; i < components; ++i)
+ srcs.push_back(getSrc(&insn->src[3], i));
+ }
+
+ if (opInfo.num_srcs >= 5 && lod_src != 5)
+ // 1 for aotmic swap
+ for (uint8_t i = 0u; i < opInfo.src_components[4]; ++i)
+ srcs.push_back(getSrc(&insn->src[4], i));
+
+ TexInstruction *texi = mkTex(getOperation(op), target.getEnum(), location, 0, defs, srcs);
+ texi->tex.bindless = bindless;
+ texi->tex.format = nv50_ir::TexInstruction::translateImgFormat(nir_intrinsic_format(insn));
+ texi->tex.mask = mask;
+ texi->cache = convert(nir_intrinsic_access(insn));
+ texi->setType(ty);
+ texi->subOp = getSubOp(op);
+
+ if (indirect)
+ texi->setIndirectR(indirect);
+
+ break;
+ }
+ case nir_intrinsic_store_scratch:
+ case nir_intrinsic_store_shared: {
+ DataType sType = getSType(insn->src[0], false, false);
+ Value *indirectOffset;
+ uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
+
+ for (uint8_t i = 0u; i < nir_intrinsic_src_components(insn, 0); ++i) {
+ if (!((1u << i) & nir_intrinsic_write_mask(insn)))
+ continue;
+ Symbol *sym = mkSymbol(getFile(op), 0, sType, offset + i * typeSizeof(sType));
+ mkStore(OP_STORE, sType, sym, indirectOffset, getSrc(&insn->src[0], i));
+ }
+ break;
+ }
+ case nir_intrinsic_load_kernel_input:
+ case nir_intrinsic_load_scratch:
+ case nir_intrinsic_load_shared: {
+ const DataType dType = getDType(insn);
+ LValues &newDefs = convert(&insn->dest);
+ Value *indirectOffset;
+ uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
+
+ for (uint8_t i = 0u; i < dest_components; ++i)
+ loadFrom(getFile(op), 0, dType, newDefs[i], offset, i, indirectOffset);
+
+ break;
+ }
+ case nir_intrinsic_control_barrier: {
+ // TODO: add flag to shader_info
+ info_out->numBarriers = 1;
+ Instruction *bar = mkOp2(OP_BAR, TYPE_U32, NULL, mkImm(0), mkImm(0));
+ bar->fixed = 1;
+ bar->subOp = NV50_IR_SUBOP_BAR_SYNC;
+ break;
+ }
+ case nir_intrinsic_group_memory_barrier:
+ case nir_intrinsic_memory_barrier:
+ case nir_intrinsic_memory_barrier_buffer:
+ case nir_intrinsic_memory_barrier_image:
+ case nir_intrinsic_memory_barrier_shared: {
+ Instruction *bar = mkOp(OP_MEMBAR, TYPE_NONE, NULL);
+ bar->fixed = 1;
+ bar->subOp = getSubOp(op);
+ break;
+ }
+ case nir_intrinsic_memory_barrier_tcs_patch:
+ break;
+ case nir_intrinsic_shader_clock: {
+ const DataType dType = getDType(insn);
+ LValues &newDefs = convert(&insn->dest);
+
+ loadImm(newDefs[0], 0u);
+ mkOp1(OP_RDSV, dType, newDefs[1], mkSysVal(SV_CLOCK, 0))->fixed = 1;
+ break;
+ }
+ case nir_intrinsic_load_global: {
+ const DataType dType = getDType(insn);
+ LValues &newDefs = convert(&insn->dest);
+ Value *indirectOffset;
+ uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
+
+ for (auto i = 0u; i < dest_components; ++i)
+ loadFrom(FILE_MEMORY_GLOBAL, 0, dType, newDefs[i], offset, i, indirectOffset);
+
+ info_out->io.globalAccess |= 0x1;
+ break;
+ }
+ case nir_intrinsic_store_global: {
+ DataType sType = getSType(insn->src[0], false, false);
+
+ for (auto i = 0u; i < nir_intrinsic_src_components(insn, 0); ++i) {
+ if (!((1u << i) & nir_intrinsic_write_mask(insn)))
+ continue;
+ if (typeSizeof(sType) == 8) {
+ Value *split[2];
+ mkSplit(split, 4, getSrc(&insn->src[0], i));
+
+ Symbol *sym = mkSymbol(FILE_MEMORY_GLOBAL, 0, TYPE_U32, i * typeSizeof(sType));
+ mkStore(OP_STORE, TYPE_U32, sym, getSrc(&insn->src[1], 0), split[0]);
+
+ sym = mkSymbol(FILE_MEMORY_GLOBAL, 0, TYPE_U32, i * typeSizeof(sType) + 4);
+ mkStore(OP_STORE, TYPE_U32, sym, getSrc(&insn->src[1], 0), split[1]);
+ } else {
+ Symbol *sym = mkSymbol(FILE_MEMORY_GLOBAL, 0, sType, i * typeSizeof(sType));
+ mkStore(OP_STORE, sType, sym, getSrc(&insn->src[1], 0), getSrc(&insn->src[0], i));
+ }
+ }
+
+ info_out->io.globalAccess |= 0x2;
+ break;
+ }
+ default:
+ ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
+ return false;
+ }
+
+ return true;
+}
+