X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fvc4%2Fvc4_opt_coalesce_ff_writes.c;h=e4f8e57fcd756424b0170b00e2ebc4cc0bf6fbbd;hb=09c1c13c4442148e45a4aeac3425382bbe90e8cd;hp=4e5b1193d3ad406ed5e2bde24f20641b1f16b4e2;hpb=27544ea8d330309a7f1604bece6d2fcb4e9a8ae3;p=mesa.git diff --git a/src/gallium/drivers/vc4/vc4_opt_coalesce_ff_writes.c b/src/gallium/drivers/vc4/vc4_opt_coalesce_ff_writes.c index 4e5b1193d3a..e4f8e57fcd7 100644 --- a/src/gallium/drivers/vc4/vc4_opt_coalesce_ff_writes.c +++ b/src/gallium/drivers/vc4/vc4_opt_coalesce_ff_writes.c @@ -24,8 +24,8 @@ /** * @file vc4_opt_coalesce_ff_writes.c * - * This modifies instructions that generate the value consumed by a VPM write - * to write directly into the VPM. + * This modifies instructions that generate the value consumed by a VPM or TMU + * coordinate write to write directly into the VPM or TMU. */ #include "vc4_qir.h" @@ -33,29 +33,16 @@ bool qir_opt_coalesce_ff_writes(struct vc4_compile *c) { - if (c->stage == QSTAGE_FRAG) - return false; - /* For now, only do this pass when we don't have control flow. */ struct qblock *block = qir_entry_block(c); if (block != qir_exit_block(c)) return false; bool progress = false; - struct qinst *vpm_writes[64] = { 0 }; uint32_t use_count[c->num_temps]; - uint32_t vpm_write_count = 0; memset(&use_count, 0, sizeof(use_count)); qir_for_each_inst_inorder(inst, c) { - switch (inst->dst.file) { - case QFILE_VPM: - vpm_writes[vpm_write_count++] = inst; - break; - default: - break; - } - for (int i = 0; i < qir_get_nsrc(inst); i++) { if (inst->src[i].file == QFILE_TEMP) { uint32_t temp = inst->src[i].index; @@ -64,13 +51,19 @@ qir_opt_coalesce_ff_writes(struct vc4_compile *c) } } - for (int i = 0; i < vpm_write_count; i++) { - if (!qir_is_raw_mov(vpm_writes[i]) || - vpm_writes[i]->src[0].file != QFILE_TEMP) { + qir_for_each_inst_inorder(mov_inst, c) { + if (!qir_is_raw_mov(mov_inst) || mov_inst->sf) + continue; + if (mov_inst->src[0].file != QFILE_TEMP) + continue; + + if (!(mov_inst->dst.file == QFILE_VPM || + mov_inst->dst.file == QFILE_TLB_COLOR_WRITE || + mov_inst->dst.file == QFILE_TLB_COLOR_WRITE_MS || + qir_is_tex(mov_inst))) continue; - } - uint32_t temp = vpm_writes[i]->src[0].index; + uint32_t temp = mov_inst->src[0].index; if (use_count[temp] != 1) continue; @@ -78,25 +71,38 @@ qir_opt_coalesce_ff_writes(struct vc4_compile *c) if (!inst) continue; + /* Don't bother trying to fold in an ALU op using a uniform to + * a texture op, as we'll just have to lower the uniform back + * out. + */ + if (qir_is_tex(mov_inst) && qir_has_uniform_read(inst)) + continue; + if (qir_depends_on_flags(inst) || inst->sf) continue; if (qir_has_side_effects(c, inst) || - qir_has_side_effect_reads(c, inst)) { + qir_has_side_effect_reads(c, inst) || + inst->op == QOP_TLB_COLOR_READ || + inst->op == QOP_VARY_ADD_C) { continue; } - /* Move the generating instruction to the end of the program - * to maintain the order of the VPM writes. + /* Move the generating instruction into the position of the FF + * write. */ - assert(!vpm_writes[i]->sf); + c->defs[inst->dst.index] = NULL; + inst->dst.file = mov_inst->dst.file; + inst->dst.index = mov_inst->dst.index; + if (qir_has_implicit_tex_uniform(mov_inst)) { + inst->src[qir_get_tex_uniform_src(inst)] = + mov_inst->src[qir_get_tex_uniform_src(mov_inst)]; + } + list_del(&inst->link); - list_addtail(&inst->link, &vpm_writes[i]->link); - qir_remove_instruction(c, vpm_writes[i]); + list_addtail(&inst->link, &mov_inst->link); - c->defs[inst->dst.index] = NULL; - inst->dst.file = QFILE_VPM; - inst->dst.index = 0; + qir_remove_instruction(c, mov_inst); progress = true; }