X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fvc4%2Fvc4_opt_dead_code.c;h=f04d0ff97ab7cbb2fd27e7de883d98602e79de8b;hb=8c9b9aac7d09e65195dca6681d59c10e4ef713d9;hp=0c273fff8f53d2fbbe239f5ea3ca0bc1124b630b;hpb=b73cab6826b54cdfa138aefb476f1f0d42b87b6a;p=mesa.git diff --git a/src/gallium/drivers/vc4/vc4_opt_dead_code.c b/src/gallium/drivers/vc4/vc4_opt_dead_code.c index 0c273fff8f5..f04d0ff97ab 100644 --- a/src/gallium/drivers/vc4/vc4_opt_dead_code.c +++ b/src/gallium/drivers/vc4/vc4_opt_dead_code.c @@ -24,11 +24,15 @@ /** * @file vc4_opt_dead_code.c * - * This is a simmple dead code eliminator for QIR with no control flow. + * This is a simple dead code eliminator for SSA values in QIR. * - * It walks from the bottom of the instruction list, removing instructions - * with a destination that is never used, and marking the sources of non-dead - * instructions as used. + * It walks all the instructions finding what temps are used, then walks again + * to remove instructions writing unused temps. + * + * This is an inefficient implementation if you have long chains of + * instructions where the entire chain is dead, but we expect those to have + * been eliminated at the NIR level, and here we're just cleaning up small + * problems produced by NIR->QIR. */ #include "vc4_qir.h" @@ -43,7 +47,38 @@ dce(struct vc4_compile *c, struct qinst *inst) qir_dump_inst(c, inst); fprintf(stderr, "\n"); } - qir_remove_instruction(inst); + assert(!inst->sf); + qir_remove_instruction(c, inst); +} + +static bool +has_nonremovable_reads(struct vc4_compile *c, struct qinst *inst) +{ + for (int i = 0; i < qir_get_nsrc(inst); i++) { + if (inst->src[i].file == QFILE_VPM) { + uint32_t attr = inst->src[i].index / 4; + uint32_t offset = (inst->src[i].index % 4) * 4; + + if (c->vattr_sizes[attr] != offset + 4) + return true; + + /* Can't get rid of the last VPM read, or the + * simulator (at least) throws an error. + */ + uint32_t total_size = 0; + for (uint32_t i = 0; i < ARRAY_SIZE(c->vattr_sizes); i++) + total_size += c->vattr_sizes[i]; + if (total_size == 4) + return true; + } + + if (inst->src[i].file == QFILE_VARY && + c->input_slots[inst->src[i].index].slot == 0xff) { + return true; + } + } + + return false; } bool @@ -51,36 +86,64 @@ qir_opt_dead_code(struct vc4_compile *c) { bool progress = false; bool *used = calloc(c->num_temps, sizeof(bool)); - bool sf_used = false; - - struct simple_node *node, *t; - for (node = c->instructions.prev, t = node->prev; - &c->instructions != node; - node = t, t = t->prev) { - struct qinst *inst = (struct qinst *)node; - if (inst->dst.file == QFILE_TEMP && - !used[inst->dst.index] && - !qir_has_side_effects(inst)) { - dce(c, inst); - progress = true; - continue; + qir_for_each_inst_inorder(inst, c) { + for (int i = 0; i < qir_get_nsrc(inst); i++) { + if (inst->src[i].file == QFILE_TEMP) + used[inst->src[i].index] = true; } + } - if (qir_depends_on_flags(inst)) - sf_used = true; - if (inst->op == QOP_SF) { - if (!sf_used) { - dce(c, inst); - progress = true; + qir_for_each_block(block, c) { + qir_for_each_inst_safe(inst, block) { + if (inst->dst.file != QFILE_NULL && + !(inst->dst.file == QFILE_TEMP && + !used[inst->dst.index])) { continue; } - sf_used = false; - } - for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) { - if (inst->src[i].file == QFILE_TEMP) - used[inst->src[i].index] = true; + if (qir_has_side_effects(c, inst)) + continue; + + if (inst->sf || + has_nonremovable_reads(c, inst)) { + /* If we can't remove the instruction, but we + * don't need its destination value, just + * remove the destination. The register + * allocator would trivially color it and it + * wouldn't cause any register pressure, but + * it's nicer to read the QIR code without + * unused destination regs. + */ + if (inst->dst.file == QFILE_TEMP) { + if (debug) { + fprintf(stderr, + "Removing dst from: "); + qir_dump_inst(c, inst); + fprintf(stderr, "\n"); + } + c->defs[inst->dst.index] = NULL; + inst->dst.file = QFILE_NULL; + progress = true; + } + continue; + } + + for (int i = 0; i < qir_get_nsrc(inst); i++) { + if (inst->src[i].file != QFILE_VPM) + continue; + uint32_t attr = inst->src[i].index / 4; + uint32_t offset = (inst->src[i].index % 4) * 4; + + if (c->vattr_sizes[attr] == offset + 4) { + c->num_inputs--; + c->vattr_sizes[attr] -= 4; + } + } + + dce(c, inst); + progress = true; + continue; } }