v3d: fix glDrawTransformFeedback{Instanced}()
[mesa.git] / src / gallium / drivers / vc4 / vc4_opt_coalesce_ff_writes.c
index 4e5b1193d3ad406ed5e2bde24f20641b1f16b4e2..e4f8e57fcd756424b0170b00e2ebc4cc0bf6fbbd 100644 (file)
@@ -24,8 +24,8 @@
 /**
  * @file vc4_opt_coalesce_ff_writes.c
  *
- * This modifies instructions that generate the value consumed by a VPM write
- * to write directly into the VPM.
+ * This modifies instructions that generate the value consumed by a VPM or TMU
+ * coordinate write to write directly into the VPM or TMU.
  */
 
 #include "vc4_qir.h"
 bool
 qir_opt_coalesce_ff_writes(struct vc4_compile *c)
 {
-        if (c->stage == QSTAGE_FRAG)
-                return false;
-
         /* For now, only do this pass when we don't have control flow. */
         struct qblock *block = qir_entry_block(c);
         if (block != qir_exit_block(c))
                 return false;
 
         bool progress = false;
-        struct qinst *vpm_writes[64] = { 0 };
         uint32_t use_count[c->num_temps];
-        uint32_t vpm_write_count = 0;
         memset(&use_count, 0, sizeof(use_count));
 
         qir_for_each_inst_inorder(inst, c) {
-                switch (inst->dst.file) {
-                case QFILE_VPM:
-                        vpm_writes[vpm_write_count++] = inst;
-                        break;
-                default:
-                        break;
-                }
-
                 for (int i = 0; i < qir_get_nsrc(inst); i++) {
                         if (inst->src[i].file == QFILE_TEMP) {
                                 uint32_t temp = inst->src[i].index;
@@ -64,13 +51,19 @@ qir_opt_coalesce_ff_writes(struct vc4_compile *c)
                 }
         }
 
-        for (int i = 0; i < vpm_write_count; i++) {
-                if (!qir_is_raw_mov(vpm_writes[i]) ||
-                    vpm_writes[i]->src[0].file != QFILE_TEMP) {
+        qir_for_each_inst_inorder(mov_inst, c) {
+                if (!qir_is_raw_mov(mov_inst) || mov_inst->sf)
+                        continue;
+                if (mov_inst->src[0].file != QFILE_TEMP)
+                        continue;
+
+                if (!(mov_inst->dst.file == QFILE_VPM ||
+                      mov_inst->dst.file == QFILE_TLB_COLOR_WRITE ||
+                      mov_inst->dst.file == QFILE_TLB_COLOR_WRITE_MS ||
+                      qir_is_tex(mov_inst)))
                         continue;
-                }
 
-                uint32_t temp = vpm_writes[i]->src[0].index;
+                uint32_t temp = mov_inst->src[0].index;
                 if (use_count[temp] != 1)
                         continue;
 
@@ -78,25 +71,38 @@ qir_opt_coalesce_ff_writes(struct vc4_compile *c)
                 if (!inst)
                         continue;
 
+                /* Don't bother trying to fold in an ALU op using a uniform to
+                 * a texture op, as we'll just have to lower the uniform back
+                 * out.
+                 */
+                if (qir_is_tex(mov_inst) && qir_has_uniform_read(inst))
+                        continue;
+
                 if (qir_depends_on_flags(inst) || inst->sf)
                         continue;
 
                 if (qir_has_side_effects(c, inst) ||
-                    qir_has_side_effect_reads(c, inst)) {
+                    qir_has_side_effect_reads(c, inst) ||
+                    inst->op == QOP_TLB_COLOR_READ ||
+                    inst->op == QOP_VARY_ADD_C) {
                         continue;
                 }
 
-                /* Move the generating instruction to the end of the program
-                 * to maintain the order of the VPM writes.
+                /* Move the generating instruction into the position of the FF
+                 * write.
                  */
-                assert(!vpm_writes[i]->sf);
+                c->defs[inst->dst.index] = NULL;
+                inst->dst.file = mov_inst->dst.file;
+                inst->dst.index = mov_inst->dst.index;
+                if (qir_has_implicit_tex_uniform(mov_inst)) {
+                        inst->src[qir_get_tex_uniform_src(inst)] =
+                                mov_inst->src[qir_get_tex_uniform_src(mov_inst)];
+                }
+
                 list_del(&inst->link);
-                list_addtail(&inst->link, &vpm_writes[i]->link);
-                qir_remove_instruction(c, vpm_writes[i]);
+                list_addtail(&inst->link, &mov_inst->link);
 
-                c->defs[inst->dst.index] = NULL;
-                inst->dst.file = QFILE_VPM;
-                inst->dst.index = 0;
+                qir_remove_instruction(c, mov_inst);
 
                 progress = true;
         }