+/**
+ * When demoting a shader down to single-threaded, removes the THRSW
+ * instructions (one will still be inserted at v3d_vir_to_qpu() for the
+ * program end).
+ */
+static void
+vir_remove_thrsw(struct v3d_compile *c)
+{
+ vir_for_each_block(block, c) {
+ vir_for_each_inst_safe(inst, block) {
+ if (inst->qpu.sig.thrsw)
+ vir_remove_instruction(c, inst);
+ }
+ }
+
+ c->last_thrsw = NULL;
+}
+
+static void
+vir_emit_last_thrsw(struct v3d_compile *c)
+{
+ /* On V3D before 4.1, we need a TMU op to be outstanding when thread
+ * switching, so disable threads if we didn't do any TMU ops (each of
+ * which would have emitted a THRSW).
+ */
+ if (!c->last_thrsw_at_top_level && c->devinfo->ver < 41) {
+ c->threads = 1;
+ if (c->last_thrsw)
+ vir_remove_thrsw(c);
+ return;
+ }
+
+ /* If we're threaded and the last THRSW was in conditional code, then
+ * we need to emit another one so that we can flag it as the last
+ * thrsw.
+ */
+ if (c->last_thrsw && !c->last_thrsw_at_top_level) {
+ assert(c->devinfo->ver >= 41);
+ vir_emit_thrsw(c);
+ }
+
+ /* If we're threaded, then we need to mark the last THRSW instruction
+ * so we can emit a pair of them at QPU emit time.
+ *
+ * For V3D 4.x, we can spawn the non-fragment shaders already in the
+ * post-last-THRSW state, so we can skip this.
+ */
+ if (!c->last_thrsw && c->s->info.stage == MESA_SHADER_FRAGMENT) {
+ assert(c->devinfo->ver >= 41);
+ vir_emit_thrsw(c);
+ }
+
+ if (c->last_thrsw)
+ c->last_thrsw->is_last_thrsw = true;
+}
+