From: Eric Anholt Date: Sat, 4 Jun 2016 00:09:14 +0000 (-0700) Subject: vc4: Optimize out redundant SF updates. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=8f2af4763a60c5ea5f64829321ae97bbfa51a4ce;p=mesa.git vc4: Optimize out redundant SF updates. Tiny change on shader-db currently, but it will be important when we start emitting a lot of SFs from the same variable as part of control flow support. total instructions in shared programs: 89463 -> 89430 (-0.04%) instructions in affected programs: 1522 -> 1489 (-2.17%) total estimated cycles in shared programs: 250060 -> 250015 (-0.02%) estimated cycles in affected programs: 8568 -> 8523 (-0.53%) --- diff --git a/src/gallium/drivers/vc4/vc4_opt_peephole_sf.c b/src/gallium/drivers/vc4/vc4_opt_peephole_sf.c index 0bc3e67acb2..5536f8dd204 100644 --- a/src/gallium/drivers/vc4/vc4_opt_peephole_sf.c +++ b/src/gallium/drivers/vc4/vc4_opt_peephole_sf.c @@ -24,7 +24,7 @@ /** * @file vc4_opt_peephole_sf.c * - * Quick optimization to eliminate unused SF updates. + * Quick optimization to eliminate unused or identical SF updates. */ #include "vc4_qir.h" @@ -33,12 +33,12 @@ static bool debug; static void -dump_from(struct vc4_compile *c, struct qinst *inst) +dump_from(struct vc4_compile *c, struct qinst *inst, const char *type) { if (!debug) return; - fprintf(stderr, "optimizing: "); + fprintf(stderr, "optimizing %s: ", type); qir_dump_inst(c, inst); fprintf(stderr, "\n"); } @@ -54,26 +54,98 @@ dump_to(struct vc4_compile *c, struct qinst *inst) fprintf(stderr, "\n"); } +static bool +inst_srcs_updated(struct qinst *inst, struct qinst *writer) +{ + /* If the sources get overwritten, stop tracking the + * last instruction writing SF. + */ + switch (writer->dst.file) { + case QFILE_TEMP: + for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) { + if (inst->src[i].file == QFILE_TEMP && + inst->src[i].index == writer->dst.index) { + return true; + } + } + return false; + default: + return false; + } +} + +static bool +src_file_varies_on_reread(struct qreg reg) +{ + switch (reg.file) { + case QFILE_VARY: + case QFILE_VPM: + return true; + default: + return false; + } +} + +static bool +inst_result_equals(struct qinst *a, struct qinst *b) +{ + if (a->op != b->op || + qir_depends_on_flags(a) || + qir_depends_on_flags(b)) { + return false; + } + + for (int i = 0; i < qir_get_op_nsrc(a->op); i++) { + if (!qir_reg_equals(a->src[i], b->src[i]) || + src_file_varies_on_reread(a->src[i]) || + src_file_varies_on_reread(b->src[i])) { + return false; + } + } + + return true; +} + bool qir_opt_peephole_sf(struct vc4_compile *c) { bool progress = false; bool sf_live = false; + struct qinst *last_sf = NULL; /* Walk the block from bottom to top, tracking if the SF is used, and - * removing unused ones. + * removing unused or repeated ones. */ list_for_each_entry_rev(struct qinst, inst, &c->instructions, link) { if (inst->sf) { if (!sf_live) { - dump_from(c, inst); + /* Our instruction's SF isn't read, so drop it. + */ + dump_from(c, inst, "dead SF"); inst->sf = false; dump_to(c, inst); progress = true; + } else if (last_sf && + inst_result_equals(last_sf, inst)) { + /* The last_sf sets up same value as inst, so + * just drop the later one. + */ + dump_from(c, last_sf, "repeated SF"); + last_sf->sf = false; + dump_to(c, last_sf); + progress = true; + last_sf = inst; + } else { + last_sf = inst; } sf_live = false; } + if (last_sf) { + if (inst_srcs_updated(last_sf, inst)) + last_sf = NULL; + } + if (qir_depends_on_flags(inst)) sf_live = true; } diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c index b36c0d934cc..526e3a179aa 100644 --- a/src/gallium/drivers/vc4/vc4_qir.c +++ b/src/gallium/drivers/vc4/vc4_qir.c @@ -388,7 +388,7 @@ qir_emit(struct vc4_compile *c, struct qinst *inst) bool qir_reg_equals(struct qreg a, struct qreg b) { - return a.file == b.file && a.index == b.index; + return a.file == b.file && a.index == b.index && a.pack == b.pack; } struct vc4_compile *