+static void
+ntq_emit_per_sample_color_write(struct v3d_compile *c,
+ nir_intrinsic_instr *instr)
+{
+ assert(instr->intrinsic == nir_intrinsic_store_tlb_sample_color_v3d);
+
+ unsigned rt = nir_src_as_uint(instr->src[1]);
+ assert(rt < V3D_MAX_DRAW_BUFFERS);
+
+ unsigned sample_idx = nir_intrinsic_base(instr);
+ assert(sample_idx < V3D_MAX_SAMPLES);
+
+ unsigned offset = (rt * V3D_MAX_SAMPLES + sample_idx) * 4;
+ for (int i = 0; i < instr->num_components; i++) {
+ c->sample_colors[offset + i] =
+ vir_MOV(c, ntq_get_src(c, instr->src[0], i));
+ }
+}
+
+static void
+ntq_emit_color_write(struct v3d_compile *c,
+ nir_intrinsic_instr *instr)
+{
+ unsigned offset = (nir_intrinsic_base(instr) +
+ nir_src_as_uint(instr->src[1])) * 4 +
+ nir_intrinsic_component(instr);
+ for (int i = 0; i < instr->num_components; i++) {
+ c->outputs[offset + i] =
+ vir_MOV(c, ntq_get_src(c, instr->src[0], i));
+ }
+}
+
+static void
+emit_store_output_gs(struct v3d_compile *c, nir_intrinsic_instr *instr)
+{
+ assert(instr->num_components == 1);
+
+ uint32_t base_offset = nir_intrinsic_base(instr);
+ struct qreg src_offset = ntq_get_src(c, instr->src[1], 0);
+ struct qreg offset =
+ vir_ADD(c, vir_uniform_ui(c, base_offset), src_offset);
+
+ /* Usually, for VS or FS, we only emit outputs once at program end so
+ * our VPM writes are never in non-uniform control flow, but this
+ * is not true for GS, where we are emitting multiple vertices.
+ */
+ if (vir_in_nonuniform_control_flow(c)) {
+ vir_set_pf(vir_MOV_dest(c, vir_nop_reg(), c->execute),
+ V3D_QPU_PF_PUSHZ);
+ }
+
+ struct qreg val = ntq_get_src(c, instr->src[0], 0);
+
+ /* The offset isn’t necessarily dynamically uniform for a geometry
+ * shader. This can happen if the shader sometimes doesn’t emit one of
+ * the vertices. In that case subsequent vertices will be written to
+ * different offsets in the VPM and we need to use the scatter write
+ * instruction to have a different offset for each lane.
+ */
+ if (nir_src_is_dynamically_uniform(instr->src[1]))
+ vir_VPM_WRITE_indirect(c, val, offset);
+ else
+ vir_STVPMD(c, offset, val);
+
+ if (vir_in_nonuniform_control_flow(c)) {
+ struct qinst *last_inst =
+ (struct qinst *)c->cur_block->instructions.prev;
+ vir_set_cond(last_inst, V3D_QPU_COND_IFA);
+ }
+}
+