vc4: Make SF be a flag on the QIR instructions.
authorEric Anholt <eric@anholt.net>
Thu, 12 Feb 2015 22:17:21 +0000 (14:17 -0800)
committerEric Anholt <eric@anholt.net>
Fri, 13 Feb 2015 00:33:16 +0000 (16:33 -0800)
Right now the places that used to emit a mov.sf just put the SF on the
previous instruction when it generated the source of the SF value.  Even
without optimization to push the sf up further (and kill thus potentially
kill more MOVs), this gets us:

total uniforms in shared programs: 13455 -> 13457 (0.01%)
uniforms in affected programs:     3 -> 5 (66.67%)
total instructions in shared programs: 40296 -> 40198 (-0.24%)
instructions in affected programs:     12595 -> 12497 (-0.78%)

src/gallium/drivers/vc4/vc4_opt_algebraic.c
src/gallium/drivers/vc4/vc4_opt_cse.c
src/gallium/drivers/vc4/vc4_opt_dead_code.c
src/gallium/drivers/vc4/vc4_opt_vpm_writes.c
src/gallium/drivers/vc4/vc4_program.c
src/gallium/drivers/vc4/vc4_qir.c
src/gallium/drivers/vc4/vc4_qir.h
src/gallium/drivers/vc4/vc4_qpu_emit.c

index 994fa907f7745648b8ebf6da00d3d26a63f94d90..1e0b8c9c0974874fc9b3f8a347fd8013425ca047 100644 (file)
@@ -149,22 +149,6 @@ qir_opt_algebraic(struct vc4_compile *c)
                         defs[inst->dst.index] = inst;
 
                 switch (inst->op) {
-                case QOP_SF:
-                        /* SF just looks at the sign bit, or whether all the
-                         * bits are 0.  This is preserved across an itof
-                         * transformation.
-                         */
-                        if (inst->src[0].file == QFILE_TEMP &&
-                            defs[inst->src[0].index]->op == QOP_ITOF) {
-                                dump_from(c, inst);
-                                inst->src[0] =
-                                        defs[inst->src[0].index]->src[0];
-                                progress =  true;
-                                dump_to(c, inst);
-                                break;
-                        }
-                        break;
-
                 case QOP_SEL_X_Y_ZS:
                 case QOP_SEL_X_Y_ZC:
                 case QOP_SEL_X_Y_NS:
index c11c90efcdc7fb6ad7ec02f342c907b828f778c1..71794f7d1cf0068690e32e5c55ac80741f79b8a0 100644 (file)
@@ -45,7 +45,7 @@ struct inst_key {
         enum qop op;
         struct qreg src[4];
         /**
-         * If the instruction depends on the flags, how many QOP_SFs have been
+         * If the instruction depends on the flags, how many SFs have been
          * seen before this instruction, or if it depends on r4, how many r4
          * writes have been seen.
          */
@@ -122,7 +122,6 @@ qir_opt_cse(struct vc4_compile *c)
 {
         bool progress = false;
         struct simple_node *node, *t;
-        struct qinst *last_sf = NULL;
         uint32_t sf_count = 0, r4_count = 0;
 
         struct hash_table *ht = _mesa_hash_table_create(NULL, NULL,
@@ -135,27 +134,11 @@ qir_opt_cse(struct vc4_compile *c)
 
                 if (qir_has_side_effects(c, inst) ||
                     qir_has_side_effect_reads(c, inst)) {
-                        if (inst->op == QOP_TLB_DISCARD_SETUP)
-                                last_sf = NULL;
                         continue;
                 }
 
-                if (inst->op == QOP_SF) {
-                        if (last_sf &&
-                            qir_reg_equals(last_sf->src[0], inst->src[0])) {
-                                if (debug) {
-                                        fprintf(stderr,
-                                                "Removing redundant SF: ");
-                                        qir_dump_inst(c, inst);
-                                        fprintf(stderr, "\n");
-                                }
-                                qir_remove_instruction(inst);
-                                progress = true;
-                                continue;
-                        } else {
-                                last_sf = inst;
-                                sf_count++;
-                        }
+                if (inst->sf) {
+                        sf_count++;
                 } else {
                         struct qinst *cse = vc4_find_cse(c, ht, inst,
                                                          sf_count, r4_count);
index 94ab382500daea82774f401edc3641bce20d5a36..dd1561d68d4713fb59dbd59696b7dd9d37c90c52 100644 (file)
@@ -43,6 +43,7 @@ dce(struct vc4_compile *c, struct qinst *inst)
                 qir_dump_inst(c, inst);
                 fprintf(stderr, "\n");
         }
+        assert(!inst->sf);
         qir_remove_instruction(inst);
 }
 
@@ -93,6 +94,7 @@ qir_opt_dead_code(struct vc4_compile *c)
 
                 if (inst->dst.file == QFILE_TEMP &&
                     !used[inst->dst.index] &&
+                    !inst->sf &&
                     (!qir_has_side_effects(c, inst) ||
                      inst->op == QOP_TEX_RESULT) &&
                     !has_nonremovable_reads(c, inst)) {
@@ -120,11 +122,16 @@ qir_opt_dead_code(struct vc4_compile *c)
 
                 if (qir_depends_on_flags(inst))
                         sf_used = true;
-                if (inst->op == QOP_SF) {
+                if (inst->sf) {
                         if (!sf_used) {
-                                dce(c, inst);
+                                if (debug) {
+                                        fprintf(stderr, "Removing SF on: ");
+                                        qir_dump_inst(c, inst);
+                                        fprintf(stderr, "\n");
+                                }
+
+                                inst->sf = false;
                                 progress = true;
-                                continue;
                         }
                         sf_used = false;
                 }
index 0269e32494a961b44066a684017e95276ae4e73e..ba322b6421c196b3a43d2f6ab15831f281b326ed 100644 (file)
@@ -79,7 +79,7 @@ qir_opt_vpm_writes(struct vc4_compile *c)
                 if (qir_is_multi_instruction(inst))
                         continue;
 
-                if (qir_depends_on_flags(inst))
+                if (qir_depends_on_flags(inst) || inst->sf)
                         continue;
 
                 if (qir_has_side_effects(c, inst) ||
@@ -106,6 +106,7 @@ qir_opt_vpm_writes(struct vc4_compile *c)
                 /* Move the generating instruction to the end of the program
                  * to maintain the order of the VPM writes.
                  */
+                assert(!vpm_writes[i]->sf);
                 move_to_tail(&vpm_writes[i]->link, &inst->link);
                 qir_remove_instruction(vpm_writes[i]);
 
index 3f0de2caee1dc0233402cf068595d2ea9577f8b3..9d3d868ac9041f741287d9fca3fa6cc65e8ac458 100644 (file)
@@ -2163,6 +2163,12 @@ vc4_shader_tgsi_to_qir(struct vc4_context *vc4, enum qstage stage,
         }
 
         tgsi_parse_free(&c->parser);
+        if (vc4_debug & VC4_DEBUG_QIR) {
+                fprintf(stderr, "%s prog %d/%d pre-opt QIR:\n",
+                        qir_get_stage_name(c->stage),
+                        c->program_id, c->variant_id);
+                qir_dump(c);
+        }
 
         qir_optimize(c);
 
index feb585d69aee3b8b1a24c4aba043860565fa56c2..9e0ee1f0ae5965eb791b5e53852be7b435487952 100644 (file)
@@ -59,7 +59,6 @@ static const struct qir_op_info qir_op_info[] = {
         [QOP_XOR] = { "xor", 1, 2 },
         [QOP_NOT] = { "not", 1, 1 },
 
-        [QOP_SF] = { "sf", 0, 1 },
         [QOP_SEL_X_0_NS] = { "fsel_x_0_ns", 1, 1, false, true },
         [QOP_SEL_X_0_NC] = { "fsel_x_0_nc", 1, 1, false, true },
         [QOP_SEL_X_0_ZS] = { "fsel_x_0_zs", 1, 1, false, true },
@@ -282,7 +281,9 @@ qir_print_reg(struct vc4_compile *c, struct qreg reg, bool write)
 void
 qir_dump_inst(struct vc4_compile *c, struct qinst *inst)
 {
-        fprintf(stderr, "%s ", qir_get_op_name(inst->op));
+        fprintf(stderr, "%s%s ",
+                qir_get_op_name(inst->op),
+                inst->sf ? ".sf" : "");
 
         qir_print_reg(c, inst->dst, true);
         for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
@@ -416,6 +417,20 @@ qir_get_stage_name(enum qstage stage)
         return names[stage];
 }
 
+void
+qir_SF(struct vc4_compile *c, struct qreg src)
+{
+        assert(!is_empty_list(&c->instructions));
+        struct qinst *last_inst = (struct qinst *)c->instructions.prev;
+        if (last_inst->dst.file != src.file ||
+            last_inst->dst.index != src.index ||
+            qir_is_multi_instruction(last_inst)) {
+                src = qir_MOV(c, src);
+                last_inst = (struct qinst *)c->instructions.prev;
+        }
+        last_inst->sf = true;
+}
+
 #define OPTPASS(func)                                                   \
         do {                                                            \
                 bool stage_progress = func(c);                          \
index ee869940954f89b05381dc88a96df70e3701bedc..6da6ff6542e84b4407f7bd81fd31b564e8871e1e 100644 (file)
@@ -24,6 +24,7 @@
 #ifndef VC4_QIR_H
 #define VC4_QIR_H
 
+#include <assert.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdbool.h>
@@ -76,9 +77,6 @@ enum qop {
         QOP_XOR,
         QOP_NOT,
 
-        /* Sets the flag register according to src. */
-        QOP_SF,
-
         /* Note: Orderings of these compares must be the same as in
          * qpu_defines.h.  Selects the src[0] if the ns flag bit is set,
          * otherwise 0. */
@@ -173,6 +171,7 @@ struct qinst {
         enum qop op;
         struct qreg dst;
         struct qreg *src;
+        bool sf;
 };
 
 enum qstage {
@@ -397,6 +396,8 @@ bool qir_opt_vpm_writes(struct vc4_compile *c);
 
 void qpu_schedule_instructions(struct vc4_compile *c);
 
+void qir_SF(struct vc4_compile *c, struct qreg src);
+
 #define QIR_ALU0(name)                                                   \
 static inline struct qreg                                                \
 qir_##name(struct vc4_compile *c)                                        \
@@ -443,7 +444,6 @@ QIR_ALU2(FADD)
 QIR_ALU2(FSUB)
 QIR_ALU2(FMUL)
 QIR_ALU2(MUL24)
-QIR_NODST_1(SF)
 QIR_ALU1(SEL_X_0_ZS)
 QIR_ALU1(SEL_X_0_ZC)
 QIR_ALU1(SEL_X_0_NS)
index 7531be5cf89840db61a8ac861315ac79984321de..eeb8d3a21ff6633811f923fbd62abb6628ae79fe 100644 (file)
@@ -270,11 +270,6 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
                         }
                         break;
 
-                case QOP_SF:
-                        queue(c, qpu_a_MOV(qpu_ra(QPU_W_NOP), src[0]));
-                        *last_inst(c) |= QPU_SF;
-                        break;
-
                 case QOP_SEL_X_0_ZS:
                 case QOP_SEL_X_0_ZC:
                 case QOP_SEL_X_0_NS:
@@ -548,6 +543,11 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
 
                         break;
                 }
+
+                if (qinst->sf) {
+                        assert(!qir_is_multi_instruction(qinst));
+                        *last_inst(c) |= QPU_SF;
+                }
         }
 
         qpu_schedule_instructions(c);