vc4: Add a CSE optimization pass.
authorEric Anholt <eric@anholt.net>
Mon, 25 Aug 2014 07:12:21 +0000 (00:12 -0700)
committerEric Anholt <eric@anholt.net>
Thu, 4 Sep 2014 18:39:51 +0000 (11:39 -0700)
Debugging a regression in discard support was just too full of duplicate
instructions, so I decided to remove them instead of re-analyzing each of
them as I dumped their outputs in simulation.

src/gallium/drivers/vc4/Makefile.sources
src/gallium/drivers/vc4/vc4_opt_cse.c [new file with mode: 0644]
src/gallium/drivers/vc4/vc4_qir.c
src/gallium/drivers/vc4/vc4_qir.h

index f8e04e400094936ee17a8d6dcae92d49b7698679..aa4ee6b6866c74dc250dfa9e411b822b1da16f5b 100644 (file)
@@ -7,6 +7,7 @@ C_SOURCES := \
        vc4_formats.c \
        vc4_opt_algebraic.c \
        vc4_opt_copy_propagation.c \
+       vc4_opt_cse.c \
        vc4_opt_dead_code.c \
        vc4_program.c \
        vc4_qir.c \
diff --git a/src/gallium/drivers/vc4/vc4_opt_cse.c b/src/gallium/drivers/vc4/vc4_opt_cse.c
new file mode 100644 (file)
index 0000000..511e3b9
--- /dev/null
@@ -0,0 +1,183 @@
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * @file vc4_opt_cse.c
+ *
+ * Implements CSE for QIR without control flow.
+ *
+ * For each operation that writes a destination (and isn't just a MOV), put it
+ * in the hash table of all instructions that do so.  When faced with another
+ * one, look it up in the hash table by its opcode and operands.  If there's
+ * an entry in the table, then just reuse the entry's destination as the
+ * source of a MOV instead of reproducing the computation.  That MOV will then
+ * get cleaned up by copy propagation.
+ */
+
+#include "vc4_qir.h"
+
+#include "util/hash_table.h"
+#include "util/ralloc.h"
+
+static bool debug;
+
+struct inst_key {
+        enum qop op;
+        struct qreg src[4];
+        /**
+         * If the instruction depends on the flags, how many QOP_SFs have been
+         * seen before this instruction, or if it depends on r4, how many r4
+         * writes have been seen.
+         */
+        uint32_t implicit_arg_update_count;
+};
+
+static bool
+inst_key_equals(const void *a, const void *b)
+{
+        const struct inst_key *key_a = a;
+        const struct inst_key *key_b = b;
+
+        return memcmp(key_a, key_b, sizeof(*key_a)) == 0;
+}
+
+static struct qinst *
+vc4_find_cse(struct hash_table *ht, struct qinst *inst, uint32_t sf_count,
+             uint32_t r4_count)
+{
+        if (inst->dst.file != QFILE_TEMP ||
+            inst->op == QOP_MOV ||
+            qir_get_op_nsrc(inst->op) > 4) {
+                return NULL;
+        }
+
+        struct inst_key key;
+        memset(&key, 0, sizeof(key));
+        key.op = inst->op;
+        memcpy(key.src, inst->src,
+               qir_get_op_nsrc(inst->op) * sizeof(key.src[0]));
+        if (qir_depends_on_flags(inst))
+                key.implicit_arg_update_count = sf_count;
+        if (qir_reads_r4(inst))
+                key.implicit_arg_update_count = r4_count;
+
+        uint32_t hash = _mesa_hash_data(&key, sizeof(key));
+        struct hash_entry *entry =
+                _mesa_hash_table_search(ht, hash, &key);
+
+        if (entry) {
+                if (debug) {
+                        fprintf(stderr, "CSE found match:\n");
+
+                        fprintf(stderr, "  Original inst: ");
+                        qir_dump_inst(entry->data);
+                        fprintf(stderr, "\n");
+
+                        fprintf(stderr, "  Our inst:      ");
+                        qir_dump_inst(inst);
+                        fprintf(stderr, "\n");
+                }
+
+                return entry->data;
+        }
+
+        struct inst_key *alloc_key = ralloc(ht, struct inst_key);
+        if (!alloc_key)
+                return NULL;
+        memcpy(alloc_key, &key, sizeof(*alloc_key));
+        _mesa_hash_table_insert(ht, hash, alloc_key, inst);
+
+        if (debug) {
+                fprintf(stderr, "Added to CSE HT: ");
+                qir_dump_inst(inst);
+                fprintf(stderr, "\n");
+        }
+
+        return NULL;
+}
+
+bool
+qir_opt_cse(struct qcompile *c)
+{
+        bool progress = false;
+        struct simple_node *node, *t;
+        struct qinst *last_sf = NULL;
+        uint32_t sf_count = 0, r4_count = 0;
+
+        return false;
+        struct hash_table *ht = _mesa_hash_table_create(NULL, inst_key_equals);
+        if (!ht)
+                return false;
+
+        foreach_s(node, t, &c->instructions) {
+                struct qinst *inst = (struct qinst *)node;
+
+                if (qir_has_side_effects(inst)) {
+                        if (inst->op == QOP_TLB_DISCARD_SETUP)
+                                last_sf = NULL;
+                        continue;
+                }
+
+                if (inst->op == QOP_SF) {
+                        if (last_sf &&
+                            qir_reg_equals(last_sf->src[0], inst->src[0])) {
+                                if (debug) {
+                                        fprintf(stderr,
+                                                "Removing redundant SF: ");
+                                        qir_dump_inst(inst);
+                                        fprintf(stderr, "\n");
+                                }
+                                remove_from_list(&inst->link);
+                                progress = true;
+                                continue;
+                        } else {
+                                last_sf = inst;
+                                sf_count++;
+                        }
+                } else {
+                        struct qinst *cse = vc4_find_cse(ht, inst,
+                                                         sf_count, r4_count);
+                        if (cse) {
+                                inst->src[0] = cse->dst;
+                                for (int i = 1; i < qir_get_op_nsrc(inst->op);
+                                     i++)
+                                        inst->src[i] = c->undef;
+                                inst->op = QOP_MOV;
+                                progress = true;
+
+                                if (debug) {
+                                        fprintf(stderr, "  Turned into:   ");
+                                        qir_dump_inst(inst);
+                                        fprintf(stderr, "\n");
+                                }
+                        }
+                }
+
+                if (qir_reads_r4(inst))
+                        r4_count++;
+        }
+
+        ralloc_free(ht);
+
+        return progress;
+}
index 93f97c219f7b42dac97071ed220d1648b2f9e172..a017a72c14be540f9464b9477baa8bf291dab9db 100644 (file)
@@ -127,6 +127,54 @@ qir_has_side_effects(struct qinst *inst)
         return qir_op_info[inst->op].has_side_effects;
 }
 
+bool
+qir_depends_on_flags(struct qinst *inst)
+{
+        switch (inst->op) {
+        case QOP_SEL_X_0_NS:
+        case QOP_SEL_X_0_NC:
+        case QOP_SEL_X_0_ZS:
+        case QOP_SEL_X_0_ZC:
+        case QOP_SEL_X_Y_NS:
+        case QOP_SEL_X_Y_NC:
+        case QOP_SEL_X_Y_ZS:
+        case QOP_SEL_X_Y_ZC:
+                return true;
+        default:
+                return false;
+        }
+}
+
+bool
+qir_writes_r4(struct qinst *inst)
+{
+        switch (inst->op) {
+        case QOP_TEX_RESULT:
+        case QOP_TLB_COLOR_READ:
+        case QOP_RCP:
+        case QOP_RSQ:
+        case QOP_EXP2:
+        case QOP_LOG2:
+                return true;
+        default:
+                return false;
+        }
+}
+
+bool
+qir_reads_r4(struct qinst *inst)
+{
+        switch (inst->op) {
+        case QOP_R4_UNPACK_A:
+        case QOP_R4_UNPACK_B:
+        case QOP_R4_UNPACK_C:
+        case QOP_R4_UNPACK_D:
+                return true;
+        default:
+                return false;
+        }
+}
+
 static void
 qir_print_reg(struct qreg reg)
 {
@@ -274,6 +322,7 @@ qir_optimize(struct qcompile *c)
                 bool progress = false;
 
                 OPTPASS(qir_opt_algebraic);
+                OPTPASS(qir_opt_cse);
                 OPTPASS(qir_opt_copy_propagation);
                 OPTPASS(qir_opt_dead_code);
 
index 2e210c3bd60ae24f4bfe36fee6c404ff4d064078..7336a3733de69d5e867cd04b38c698783ba8622a 100644 (file)
@@ -28,6 +28,7 @@
 #include <stdlib.h>
 #include <stdbool.h>
 #include <stdint.h>
+#include <string.h>
 
 #include "util/u_simple_list.h"
 
@@ -227,6 +228,9 @@ struct qreg qir_get_temp(struct qcompile *c);
 int qir_get_op_nsrc(enum qop qop);
 bool qir_reg_equals(struct qreg a, struct qreg b);
 bool qir_has_side_effects(struct qinst *inst);
+bool qir_depends_on_flags(struct qinst *inst);
+bool qir_writes_r4(struct qinst *inst);
+bool qir_reads_r4(struct qinst *inst);
 
 void qir_dump(struct qcompile *c);
 void qir_dump_inst(struct qinst *inst);
@@ -235,6 +239,7 @@ const char *qir_get_stage_name(enum qstage stage);
 void qir_optimize(struct qcompile *c);
 bool qir_opt_algebraic(struct qcompile *c);
 bool qir_opt_copy_propagation(struct qcompile *c);
+bool qir_opt_cse(struct qcompile *c);
 bool qir_opt_dead_code(struct qcompile *c);
 
 #define QIR_ALU0(name)                                                   \