vc4: Allow dead code elimination of VPM reads.
authorEric Anholt <eric@anholt.net>
Sat, 10 Jan 2015 02:01:48 +0000 (15:01 +1300)
committerEric Anholt <eric@anholt.net>
Sat, 10 Jan 2015 07:55:37 +0000 (20:55 +1300)
This gets a bunch of dead reads out of the CSes, which don't read most
attributes generally.

total instructions in shared programs: 39753 -> 39487 (-0.67%)
instructions in affected programs:     4721 -> 4455 (-5.63%)

src/gallium/drivers/vc4/vc4_opt_dead_code.c
src/gallium/drivers/vc4/vc4_qir.h

index f555fcb600e8b8ec6571b32289f5307b115e6463..94ab382500daea82774f401edc3641bce20d5a36 100644 (file)
@@ -46,6 +46,36 @@ dce(struct vc4_compile *c, struct qinst *inst)
         qir_remove_instruction(inst);
 }
 
+static bool
+has_nonremovable_reads(struct vc4_compile *c, struct qinst *inst)
+{
+        for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
+                if (inst->src[i].file == QFILE_VPM) {
+                        uint32_t attr = inst->src[i].index / 4;
+                        uint32_t offset = (inst->src[i].index % 4) * 4;
+
+                        if (c->vattr_sizes[attr] != offset + 4)
+                                return true;
+
+                        /* Can't get rid of the last VPM read, or the
+                         * simulator (at least) throws an error.
+                         */
+                        uint32_t total_size = 0;
+                        for (uint32_t i = 0; i < ARRAY_SIZE(c->vattr_sizes); i++)
+                                total_size += c->vattr_sizes[i];
+                        if (total_size == 4)
+                                return true;
+                }
+
+                if (inst->src[i].file == QFILE_VARY &&
+                    c->input_semantics[inst->src[i].index].semantic == 0xff) {
+                        return true;
+                }
+        }
+
+        return false;
+}
+
 bool
 qir_opt_dead_code(struct vc4_compile *c)
 {
@@ -65,12 +95,24 @@ qir_opt_dead_code(struct vc4_compile *c)
                     !used[inst->dst.index] &&
                     (!qir_has_side_effects(c, inst) ||
                      inst->op == QOP_TEX_RESULT) &&
-                    !(qir_has_side_effect_reads(c, inst))) {
+                    !has_nonremovable_reads(c, inst)) {
                         if (inst->op == QOP_TEX_RESULT) {
                                 dce_tex = true;
                                 c->num_texture_samples--;
                         }
 
+                        for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
+                                if (inst->src[i].file != QFILE_VPM)
+                                        continue;
+                                uint32_t attr = inst->src[i].index / 4;
+                                uint32_t offset = (inst->src[i].index % 4) * 4;
+
+                                if (c->vattr_sizes[attr] == offset + 4) {
+                                        c->num_inputs--;
+                                        c->vattr_sizes[attr] -= 4;
+                                }
+                        }
+
                         dce(c, inst);
                         progress = true;
                         continue;
index d2f89ae9e69cd8ad1083899f1085dc0daaec7aad..307a79f77c679d89a911c7a0022c5d419359f200 100644 (file)
@@ -30,6 +30,7 @@
 #include <stdint.h>
 #include <string.h>
 
+#include "util/macros.h"
 #include "util/u_simple_list.h"
 #include "tgsi/tgsi_parse.h"