From: Eric Anholt Date: Sat, 10 Jan 2015 02:01:48 +0000 (+1300) Subject: vc4: Allow dead code elimination of VPM reads. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=a8e14c293b19a2d298f91f283d6b6839f36fb518;p=mesa.git vc4: Allow dead code elimination of VPM reads. This gets a bunch of dead reads out of the CSes, which don't read most attributes generally. total instructions in shared programs: 39753 -> 39487 (-0.67%) instructions in affected programs: 4721 -> 4455 (-5.63%) --- diff --git a/src/gallium/drivers/vc4/vc4_opt_dead_code.c b/src/gallium/drivers/vc4/vc4_opt_dead_code.c index f555fcb600e..94ab382500d 100644 --- a/src/gallium/drivers/vc4/vc4_opt_dead_code.c +++ b/src/gallium/drivers/vc4/vc4_opt_dead_code.c @@ -46,6 +46,36 @@ dce(struct vc4_compile *c, struct qinst *inst) qir_remove_instruction(inst); } +static bool +has_nonremovable_reads(struct vc4_compile *c, struct qinst *inst) +{ + for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) { + if (inst->src[i].file == QFILE_VPM) { + uint32_t attr = inst->src[i].index / 4; + uint32_t offset = (inst->src[i].index % 4) * 4; + + if (c->vattr_sizes[attr] != offset + 4) + return true; + + /* Can't get rid of the last VPM read, or the + * simulator (at least) throws an error. + */ + uint32_t total_size = 0; + for (uint32_t i = 0; i < ARRAY_SIZE(c->vattr_sizes); i++) + total_size += c->vattr_sizes[i]; + if (total_size == 4) + return true; + } + + if (inst->src[i].file == QFILE_VARY && + c->input_semantics[inst->src[i].index].semantic == 0xff) { + return true; + } + } + + return false; +} + bool qir_opt_dead_code(struct vc4_compile *c) { @@ -65,12 +95,24 @@ qir_opt_dead_code(struct vc4_compile *c) !used[inst->dst.index] && (!qir_has_side_effects(c, inst) || inst->op == QOP_TEX_RESULT) && - !(qir_has_side_effect_reads(c, inst))) { + !has_nonremovable_reads(c, inst)) { if (inst->op == QOP_TEX_RESULT) { dce_tex = true; c->num_texture_samples--; } + for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) { + if (inst->src[i].file != QFILE_VPM) + continue; + uint32_t attr = inst->src[i].index / 4; + uint32_t offset = (inst->src[i].index % 4) * 4; + + if (c->vattr_sizes[attr] == offset + 4) { + c->num_inputs--; + c->vattr_sizes[attr] -= 4; + } + } + dce(c, inst); progress = true; continue; diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h index d2f89ae9e69..307a79f77c6 100644 --- a/src/gallium/drivers/vc4/vc4_qir.h +++ b/src/gallium/drivers/vc4/vc4_qir.h @@ -30,6 +30,7 @@ #include #include +#include "util/macros.h" #include "util/u_simple_list.h" #include "tgsi/tgsi_parse.h"