From e06b0778f59980429fececb1aa0de0f0a3f23427 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 17 Dec 2014 20:35:17 -0800 Subject: [PATCH] vc4: Coalesce MOVs into VPM with the instructions generating the values. total instructions in shared programs: 41168 -> 40976 (-0.47%) instructions in affected programs: 18156 -> 17964 (-1.06%) --- src/gallium/drivers/vc4/Makefile.sources | 1 + src/gallium/drivers/vc4/vc4_opt_vpm_writes.c | 117 +++++++++++++++++++ src/gallium/drivers/vc4/vc4_qir.c | 38 +++--- src/gallium/drivers/vc4/vc4_qir.h | 2 + 4 files changed, 143 insertions(+), 15 deletions(-) create mode 100644 src/gallium/drivers/vc4/vc4_opt_vpm_writes.c diff --git a/src/gallium/drivers/vc4/Makefile.sources b/src/gallium/drivers/vc4/Makefile.sources index 1f8e8c41bf4..95f1a340ab3 100644 --- a/src/gallium/drivers/vc4/Makefile.sources +++ b/src/gallium/drivers/vc4/Makefile.sources @@ -16,6 +16,7 @@ C_SOURCES := \ vc4_opt_cse.c \ vc4_opt_dead_code.c \ vc4_opt_small_immediates.c \ + vc4_opt_vpm_writes.c \ vc4_packet.h \ vc4_program.c \ vc4_qir.c \ diff --git a/src/gallium/drivers/vc4/vc4_opt_vpm_writes.c b/src/gallium/drivers/vc4/vc4_opt_vpm_writes.c new file mode 100644 index 00000000000..477d32605a4 --- /dev/null +++ b/src/gallium/drivers/vc4/vc4_opt_vpm_writes.c @@ -0,0 +1,117 @@ +/* + * Copyright © 2014 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** + * @file vc4_opt_vpm_writes.c + * + * This modifies instructions that generate the value consumed by a VPM write + * to write directly into the VPM. + */ + +#include "vc4_qir.h" + +bool +qir_opt_vpm_writes(struct vc4_compile *c) +{ + if (c->stage == QSTAGE_FRAG) + return false; + + bool progress = false; + struct simple_node *node; + struct qinst *defs[c->num_temps]; + struct qinst *vpm_writes[64] = { 0 }; + uint32_t use_count[c->num_temps]; + uint32_t vpm_write_count = 0; + memset(&defs, 0, sizeof(defs)); + memset(&use_count, 0, sizeof(use_count)); + + foreach(node, &c->instructions) { + struct qinst *inst = (struct qinst *)node; + + switch (inst->dst.file) { + case QFILE_TEMP: + defs[inst->dst.index] = inst; + break; + case QFILE_VPM: + vpm_writes[vpm_write_count++] = inst; + break; + default: + break; + } + + for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) { + if (inst->src[i].file == QFILE_TEMP) + use_count[inst->src[i].index]++; + } + } + + for (int i = 0; i < vpm_write_count; i++) { + if (vpm_writes[i]->op != QOP_MOV || + vpm_writes[i]->src[0].file != QFILE_TEMP) { + continue; + } + + uint32_t temp = vpm_writes[i]->src[0].index; + if (use_count[temp] != 1) + continue; + + struct qinst *inst = defs[temp]; + if (qir_is_multi_instruction(inst)) + continue; + + if (qir_depends_on_flags(inst)) + continue; + + if (qir_has_side_effects(c, inst)) + continue; + + /* A QOP_TEX_RESULT destination is r4, so we can't move + * accesses to it past another QOP_TEX_RESULT which would + * update it. + */ + int src; + for (src = 0; src < qir_get_op_nsrc(inst->op); src++) { + if (inst->src[src].file == QFILE_TEMP) { + if (defs[inst->src[src].index]->op == + QOP_TEX_RESULT) { + break; + } + } + } + if (src != qir_get_op_nsrc(inst->op)) + continue; + + /* Move the generating instruction to the end of the program + * to maintain the order of the VPM writes. + */ + move_to_tail(&vpm_writes[i]->link, &inst->link); + qir_remove_instruction(vpm_writes[i]); + + inst->dst.file = QFILE_VPM; + inst->dst.index = 0; + + progress = true; + } + + return progress; +} diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c index 91bdefe81e5..3fd39413222 100644 --- a/src/gallium/drivers/vc4/vc4_qir.c +++ b/src/gallium/drivers/vc4/vc4_qir.c @@ -32,6 +32,7 @@ struct qir_op_info { const char *name; uint8_t ndst, nsrc; bool has_side_effects; + bool multi_instruction; }; static const struct qir_op_info qir_op_info[] = { @@ -59,21 +60,21 @@ static const struct qir_op_info qir_op_info[] = { [QOP_NOT] = { "not", 1, 1 }, [QOP_SF] = { "sf", 0, 1 }, - [QOP_SEL_X_0_NS] = { "fsel_x_0_ns", 1, 1 }, - [QOP_SEL_X_0_NC] = { "fsel_x_0_nc", 1, 1 }, - [QOP_SEL_X_0_ZS] = { "fsel_x_0_zs", 1, 1 }, - [QOP_SEL_X_0_ZC] = { "fsel_x_0_zc", 1, 1 }, - [QOP_SEL_X_Y_NS] = { "fsel_x_y_ns", 1, 2 }, - [QOP_SEL_X_Y_NC] = { "fsel_x_y_nc", 1, 2 }, - [QOP_SEL_X_Y_ZS] = { "fsel_x_y_zs", 1, 2 }, - [QOP_SEL_X_Y_ZC] = { "fsel_x_y_zc", 1, 2 }, - - [QOP_RCP] = { "rcp", 1, 1 }, - [QOP_RSQ] = { "rsq", 1, 1 }, - [QOP_EXP2] = { "exp2", 1, 2 }, - [QOP_LOG2] = { "log2", 1, 2 }, - [QOP_PACK_COLORS] = { "pack_colors", 1, 4 }, - [QOP_PACK_SCALED] = { "pack_scaled", 1, 2 }, + [QOP_SEL_X_0_NS] = { "fsel_x_0_ns", 1, 1, false, true }, + [QOP_SEL_X_0_NC] = { "fsel_x_0_nc", 1, 1, false, true }, + [QOP_SEL_X_0_ZS] = { "fsel_x_0_zs", 1, 1, false, true }, + [QOP_SEL_X_0_ZC] = { "fsel_x_0_zc", 1, 1, false, true }, + [QOP_SEL_X_Y_NS] = { "fsel_x_y_ns", 1, 2, false, true }, + [QOP_SEL_X_Y_NC] = { "fsel_x_y_nc", 1, 2, false, true }, + [QOP_SEL_X_Y_ZS] = { "fsel_x_y_zs", 1, 2, false, true }, + [QOP_SEL_X_Y_ZC] = { "fsel_x_y_zc", 1, 2, false, true }, + + [QOP_RCP] = { "rcp", 1, 1, false, true }, + [QOP_RSQ] = { "rsq", 1, 1, false, true }, + [QOP_EXP2] = { "exp2", 1, 2, false, true }, + [QOP_LOG2] = { "log2", 1, 2, false, true }, + [QOP_PACK_COLORS] = { "pack_colors", 1, 4, false, true }, + [QOP_PACK_SCALED] = { "pack_scaled", 1, 2, false, true }, [QOP_VPM_READ] = { "vpm_read", 0, 1, true }, [QOP_TLB_DISCARD_SETUP] = { "discard", 0, 1, true }, [QOP_TLB_STENCIL_SETUP] = { "tlb_stencil_setup", 0, 1, true }, @@ -155,6 +156,12 @@ qir_has_side_effects(struct vc4_compile *c, struct qinst *inst) return qir_op_info[inst->op].has_side_effects; } +bool +qir_is_multi_instruction(struct qinst *inst) +{ + return qir_op_info[inst->op].multi_instruction; +} + bool qir_depends_on_flags(struct qinst *inst) { @@ -397,6 +404,7 @@ qir_optimize(struct vc4_compile *c) OPTPASS(qir_opt_copy_propagation); OPTPASS(qir_opt_dead_code); OPTPASS(qir_opt_small_immediates); + OPTPASS(qir_opt_vpm_writes); if (!progress) break; diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h index dd9866e126f..f7d59a80dac 100644 --- a/src/gallium/drivers/vc4/vc4_qir.h +++ b/src/gallium/drivers/vc4/vc4_qir.h @@ -374,6 +374,7 @@ struct qreg qir_get_temp(struct vc4_compile *c); int qir_get_op_nsrc(enum qop qop); bool qir_reg_equals(struct qreg a, struct qreg b); bool qir_has_side_effects(struct vc4_compile *c, struct qinst *inst); +bool qir_is_multi_instruction(struct qinst *inst); bool qir_depends_on_flags(struct qinst *inst); bool qir_writes_r4(struct qinst *inst); bool qir_reads_r4(struct qinst *inst); @@ -389,6 +390,7 @@ bool qir_opt_copy_propagation(struct vc4_compile *c); bool qir_opt_cse(struct vc4_compile *c); bool qir_opt_dead_code(struct vc4_compile *c); bool qir_opt_small_immediates(struct vc4_compile *c); +bool qir_opt_vpm_writes(struct vc4_compile *c); void qpu_schedule_instructions(struct vc4_compile *c); -- 2.30.2