if (c->execute.file != QFILE_NULL) {
                         last_inst->dst.index = qregs[chan].index;
 
-                        /* Set the flags to the current exec mask.  To insert
-                         * the flags push, we temporarily remove our SSA
-                         * instruction.
+                        /* Set the flags to the current exec mask.
                          */
-                        list_del(&last_inst->link);
+                        c->cursor = vir_before_inst(last_inst);
                         vir_PF(c, c->execute, V3D_QPU_PF_PUSHZ);
-                        list_addtail(&last_inst->link,
-                                     &c->cur_block->instructions);
+                        c->cursor = vir_after_inst(last_inst);
 
                         vir_set_cond(last_inst, V3D_QPU_COND_IFA);
                         last_inst->cond_is_exec_mask = true;
 
         /** @} */
 };
 
+/** Which util/list.h add mode we should use when inserting an instruction. */
+enum vir_cursor_mode {
+        vir_cursor_add,
+        vir_cursor_addtail,
+};
+
+/**
+ * Tracking structure for where new instructions should be inserted.  Create
+ * with one of the vir_after_inst()-style helper functions.
+ *
+ * This does not protect against removal of the block or instruction, so we
+ * have an assert in instruction removal to try to catch it.
+ */
+struct vir_cursor {
+        enum vir_cursor_mode mode;
+        struct list_head *link;
+};
+
+static inline struct vir_cursor
+vir_before_inst(struct qinst *inst)
+{
+        return (struct vir_cursor){ vir_cursor_addtail, &inst->link };
+}
+
+static inline struct vir_cursor
+vir_after_inst(struct qinst *inst)
+{
+        return (struct vir_cursor){ vir_cursor_add, &inst->link };
+}
+
+static inline struct vir_cursor
+vir_before_block(struct qblock *block)
+{
+        return (struct vir_cursor){ vir_cursor_add, &block->instructions };
+}
+
+static inline struct vir_cursor
+vir_after_block(struct qblock *block)
+{
+        return (struct vir_cursor){ vir_cursor_addtail, &block->instructions };
+}
+
 /**
  * Compiler state saved across compiler invocations, for any expensive global
  * setup.
         struct qreg undef;
         uint32_t num_temps;
 
+        struct vir_cursor cursor;
         struct list_head blocks;
         int next_block_index;
         struct qblock *cur_block;
 
 static void
 vir_emit(struct v3d_compile *c, struct qinst *inst)
 {
-        list_addtail(&inst->link, &c->cur_block->instructions);
+        switch (c->cursor.mode) {
+        case vir_cursor_add:
+                list_add(&inst->link, c->cursor.link);
+                break;
+        case vir_cursor_addtail:
+                list_addtail(&inst->link, c->cursor.link);
+                break;
+        }
+
+        c->cursor = vir_after_inst(inst);
 }
 
 /* Updates inst to write to a new temporary, emits it, and notes the def. */
 vir_set_emit_block(struct v3d_compile *c, struct qblock *block)
 {
         c->cur_block = block;
+        c->cursor = vir_after_block(block);
         list_addtail(&block->link, &c->blocks);
 }
 
         if (qinst->dst.file == QFILE_TEMP)
                 c->defs[qinst->dst.index] = NULL;
 
+        assert(&qinst->link != c->cursor.link);
+
         list_del(&qinst->link);
         free(qinst);
 }
 void
 vir_compile_destroy(struct v3d_compile *c)
 {
+        /* Defuse the assert that we aren't removing the cursor's instruction.
+         */
+        c->cursor.link = NULL;
+
         vir_for_each_block(block, c) {
                 while (!list_empty(&block->instructions)) {
                         struct qinst *qinst =
 {
         struct qinst *last_inst = NULL;
 
-        if (!list_empty(&c->cur_block->instructions))
+        if (!list_empty(&c->cur_block->instructions)) {
                 last_inst = (struct qinst *)c->cur_block->instructions.prev;
 
+                /* Can't stuff the PF into the last last inst if our cursor
+                 * isn't pointing after it.
+                 */
+                struct vir_cursor after_inst = vir_after_inst(last_inst);
+                if (c->cursor.mode != after_inst.mode ||
+                    c->cursor.link != after_inst.link)
+                        last_inst = NULL;
+        }
+
         if (src.file != QFILE_TEMP ||
             !c->defs[src.index] ||
             last_inst != c->defs[src.index]) {