+static void
+emit_store_output_gs(struct v3d_compile *c, nir_intrinsic_instr *instr)
+{
+ assert(instr->num_components == 1);
+
+ struct qreg offset = ntq_get_src(c, instr->src[1], 0);
+
+ uint32_t base_offset = nir_intrinsic_base(instr);
+
+ if (base_offset)
+ offset = vir_ADD(c, vir_uniform_ui(c, base_offset), offset);
+
+ /* Usually, for VS or FS, we only emit outputs once at program end so
+ * our VPM writes are never in non-uniform control flow, but this
+ * is not true for GS, where we are emitting multiple vertices.
+ */
+ if (vir_in_nonuniform_control_flow(c)) {
+ vir_set_pf(vir_MOV_dest(c, vir_nop_reg(), c->execute),
+ V3D_QPU_PF_PUSHZ);
+ }
+
+ struct qreg val = ntq_get_src(c, instr->src[0], 0);
+
+ /* The offset isn’t necessarily dynamically uniform for a geometry
+ * shader. This can happen if the shader sometimes doesn’t emit one of
+ * the vertices. In that case subsequent vertices will be written to
+ * different offsets in the VPM and we need to use the scatter write
+ * instruction to have a different offset for each lane.
+ */
+ if (nir_src_is_dynamically_uniform(instr->src[1]))
+ vir_VPM_WRITE_indirect(c, val, offset);
+ else
+ vir_STVPMD(c, offset, val);
+
+ if (vir_in_nonuniform_control_flow(c)) {
+ struct qinst *last_inst =
+ (struct qinst *)c->cur_block->instructions.prev;
+ vir_set_cond(last_inst, V3D_QPU_COND_IFA);
+ }
+}
+
+static void
+ntq_emit_store_output(struct v3d_compile *c, nir_intrinsic_instr *instr)
+{
+ /* XXX perf: Use stvpmv with uniform non-constant offsets and
+ * stvpmd with non-uniform offsets and enable
+ * PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR.
+ */
+ if (c->s->info.stage == MESA_SHADER_FRAGMENT) {
+ ntq_emit_color_write(c, instr);
+ } else if (c->s->info.stage == MESA_SHADER_GEOMETRY) {
+ emit_store_output_gs(c, instr);
+ } else {
+ assert(c->s->info.stage == MESA_SHADER_VERTEX);
+ assert(instr->num_components == 1);
+
+ vir_VPM_WRITE(c,
+ ntq_get_src(c, instr->src[0], 0),
+ nir_intrinsic_base(instr));
+ }
+}
+