From 85316d059c899ac096331251de6b233229aa0b4f Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 19 Feb 2015 13:22:31 -0800 Subject: [PATCH] vc4: Keep an array of pointers to instructions defining the temps around. The optimization passes are always regenerating it and throwing it away, but it's not hard to keep track of. --- src/gallium/drivers/vc4/vc4_opt_algebraic.c | 73 +++++++++---------- .../drivers/vc4/vc4_opt_copy_propagation.c | 9 +-- src/gallium/drivers/vc4/vc4_opt_dead_code.c | 2 +- .../drivers/vc4/vc4_opt_small_immediates.c | 6 +- src/gallium/drivers/vc4/vc4_opt_vpm_writes.c | 12 +-- src/gallium/drivers/vc4/vc4_qir.c | 25 +++++-- src/gallium/drivers/vc4/vc4_qir.h | 7 +- .../drivers/vc4/vc4_qir_lower_uniforms.c | 1 + 8 files changed, 67 insertions(+), 68 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_opt_algebraic.c b/src/gallium/drivers/vc4/vc4_opt_algebraic.c index 1e0b8c9c097..d17669abaff 100644 --- a/src/gallium/drivers/vc4/vc4_opt_algebraic.c +++ b/src/gallium/drivers/vc4/vc4_opt_algebraic.c @@ -60,7 +60,7 @@ dump_to(struct vc4_compile *c, struct qinst *inst) } static bool -is_constant_value(struct vc4_compile *c, struct qinst **defs, struct qreg reg, +is_constant_value(struct vc4_compile *c, struct qreg reg, uint32_t val) { if (reg.file == QFILE_UNIF && @@ -76,17 +76,17 @@ is_constant_value(struct vc4_compile *c, struct qinst **defs, struct qreg reg, } static bool -is_zero(struct vc4_compile *c, struct qinst **defs, struct qreg reg) +is_zero(struct vc4_compile *c, struct qreg reg) { - reg = qir_follow_movs(defs, reg); - return is_constant_value(c, defs, reg, 0); + reg = qir_follow_movs(c, reg); + return is_constant_value(c, reg, 0); } static bool -is_1f(struct vc4_compile *c, struct qinst **defs, struct qreg reg) +is_1f(struct vc4_compile *c, struct qreg reg) { - reg = qir_follow_movs(defs, reg); - return is_constant_value(c, defs, reg, fui(1.0)); + reg = qir_follow_movs(c, reg); + return is_constant_value(c, reg, fui(1.0)); } static void @@ -101,11 +101,10 @@ replace_with_mov(struct vc4_compile *c, struct qinst *inst, struct qreg arg) static bool replace_x_0_with_x(struct vc4_compile *c, - struct qinst **defs, struct qinst *inst, int arg) { - if (!is_zero(c, defs, inst->src[arg])) + if (!is_zero(c, inst->src[arg])) return false; replace_with_mov(c, inst, inst->src[1 - arg]); return true; @@ -113,11 +112,10 @@ replace_x_0_with_x(struct vc4_compile *c, static bool replace_x_0_with_0(struct vc4_compile *c, - struct qinst **defs, struct qinst *inst, int arg) { - if (!is_zero(c, defs, inst->src[arg])) + if (!is_zero(c, inst->src[arg])) return false; replace_with_mov(c, inst, inst->src[arg]); return true; @@ -125,11 +123,10 @@ replace_x_0_with_0(struct vc4_compile *c, static bool fmul_replace_one(struct vc4_compile *c, - struct qinst **defs, struct qinst *inst, int arg) { - if (!is_1f(c, defs, inst->src[arg])) + if (!is_1f(c, inst->src[arg])) return false; replace_with_mov(c, inst, inst->src[1 - arg]); return true; @@ -140,14 +137,10 @@ qir_opt_algebraic(struct vc4_compile *c) { bool progress = false; struct simple_node *node; - struct qinst *defs[c->num_temps]; foreach(node, &c->instructions) { struct qinst *inst = (struct qinst *)node; - if (inst->dst.file == QFILE_TEMP) - defs[inst->dst.index] = inst; - switch (inst->op) { case QOP_SEL_X_Y_ZS: case QOP_SEL_X_Y_ZC: @@ -162,7 +155,7 @@ qir_opt_algebraic(struct vc4_compile *c) break; } - if (is_zero(c, defs, inst->src[1])) { + if (is_zero(c, inst->src[1])) { /* Replace references to a 0 uniform value * with the SEL_X_0 equivalent. */ @@ -174,7 +167,7 @@ qir_opt_algebraic(struct vc4_compile *c) break; } - if (is_zero(c, defs, inst->src[0])) { + if (is_zero(c, inst->src[0])) { /* Replace references to a 0 uniform value * with the SEL_X_0 equivalent, flipping the * condition being evaluated since the operand @@ -195,31 +188,31 @@ qir_opt_algebraic(struct vc4_compile *c) case QOP_FSUB: case QOP_SUB: - if (is_zero(c, defs, inst->src[1])) { + if (is_zero(c, inst->src[1])) { replace_with_mov(c, inst, inst->src[0]); } break; case QOP_ADD: - if (replace_x_0_with_x(c, defs, inst, 0) || - replace_x_0_with_x(c, defs, inst, 1)) { + if (replace_x_0_with_x(c, inst, 0) || + replace_x_0_with_x(c, inst, 1)) { progress = true; break; } break; case QOP_FADD: - if (replace_x_0_with_x(c, defs, inst, 0) || - replace_x_0_with_x(c, defs, inst, 1)) { + if (replace_x_0_with_x(c, inst, 0) || + replace_x_0_with_x(c, inst, 1)) { progress = true; break; } /* FADD(a, FSUB(0, b)) -> FSUB(a, b) */ if (inst->src[1].file == QFILE_TEMP && - defs[inst->src[1].index]->op == QOP_FSUB) { - struct qinst *fsub = defs[inst->src[1].index]; - if (is_zero(c, defs, fsub->src[0])) { + c->defs[inst->src[1].index]->op == QOP_FSUB) { + struct qinst *fsub = c->defs[inst->src[1].index]; + if (is_zero(c, fsub->src[0])) { dump_from(c, inst); inst->op = QOP_FSUB; inst->src[1] = fsub->src[1]; @@ -231,9 +224,9 @@ qir_opt_algebraic(struct vc4_compile *c) /* FADD(FSUB(0, b), a) -> FSUB(a, b) */ if (inst->src[0].file == QFILE_TEMP && - defs[inst->src[0].index]->op == QOP_FSUB) { - struct qinst *fsub = defs[inst->src[0].index]; - if (is_zero(c, defs, fsub->src[0])) { + c->defs[inst->src[0].index]->op == QOP_FSUB) { + struct qinst *fsub = c->defs[inst->src[0].index]; + if (is_zero(c, fsub->src[0])) { dump_from(c, inst); inst->op = QOP_FSUB; inst->src[0] = inst->src[1]; @@ -246,28 +239,28 @@ qir_opt_algebraic(struct vc4_compile *c) break; case QOP_FMUL: - if (replace_x_0_with_0(c, defs, inst, 0) || - replace_x_0_with_0(c, defs, inst, 1) || - fmul_replace_one(c, defs, inst, 0) || - fmul_replace_one(c, defs, inst, 1)) { + if (replace_x_0_with_0(c, inst, 0) || + replace_x_0_with_0(c, inst, 1) || + fmul_replace_one(c, inst, 0) || + fmul_replace_one(c, inst, 1)) { progress = true; break; } break; case QOP_AND: - if (replace_x_0_with_0(c, defs, inst, 0) || - replace_x_0_with_0(c, defs, inst, 1)) { + if (replace_x_0_with_0(c, inst, 0) || + replace_x_0_with_0(c, inst, 1)) { progress = true; break; } - if (is_constant_value(c, defs, inst->src[0], ~0)) { + if (is_constant_value(c, inst->src[0], ~0)) { replace_with_mov(c, inst, inst->src[1]); progress = true; break; } - if (is_constant_value(c, defs, inst->src[1], ~0)) { + if (is_constant_value(c, inst->src[1], ~0)) { replace_with_mov(c, inst, inst->src[0]); progress = true; break; @@ -275,8 +268,8 @@ qir_opt_algebraic(struct vc4_compile *c) break; case QOP_OR: - if (replace_x_0_with_x(c, defs, inst, 0) || - replace_x_0_with_x(c, defs, inst, 1)) { + if (replace_x_0_with_x(c, inst, 0) || + replace_x_0_with_x(c, inst, 1)) { progress = true; break; } diff --git a/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c b/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c index f8c49a44bd3..5189a401248 100644 --- a/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c +++ b/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c @@ -41,14 +41,10 @@ qir_opt_copy_propagation(struct vc4_compile *c) struct simple_node *node; bool debug = false; struct qreg *movs = calloc(c->num_temps, sizeof(struct qreg)); - struct qinst **defs = calloc(c->num_temps, sizeof(struct qreg)); foreach(node, &c->instructions) { struct qinst *inst = (struct qinst *)node; - if (inst->dst.file == QFILE_TEMP) - defs[inst->dst.index] = inst; - for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) { int index = inst->src[i].index; if (inst->src[i].file == QFILE_TEMP && @@ -76,13 +72,12 @@ qir_opt_copy_propagation(struct vc4_compile *c) inst->dst.file == QFILE_TEMP && inst->src[0].file != QFILE_VPM && !(inst->src[0].file == QFILE_TEMP && - (defs[inst->src[0].index]->op == QOP_TEX_RESULT || - defs[inst->src[0].index]->op == QOP_TLB_COLOR_READ))) { + (c->defs[inst->src[0].index]->op == QOP_TEX_RESULT || + c->defs[inst->src[0].index]->op == QOP_TLB_COLOR_READ))) { movs[inst->dst.index] = inst->src[0]; } } free(movs); - free(defs); return progress; } diff --git a/src/gallium/drivers/vc4/vc4_opt_dead_code.c b/src/gallium/drivers/vc4/vc4_opt_dead_code.c index dd1561d68d4..e4ead46c9c2 100644 --- a/src/gallium/drivers/vc4/vc4_opt_dead_code.c +++ b/src/gallium/drivers/vc4/vc4_opt_dead_code.c @@ -44,7 +44,7 @@ dce(struct vc4_compile *c, struct qinst *inst) fprintf(stderr, "\n"); } assert(!inst->sf); - qir_remove_instruction(inst); + qir_remove_instruction(c, inst); } static bool diff --git a/src/gallium/drivers/vc4/vc4_opt_small_immediates.c b/src/gallium/drivers/vc4/vc4_opt_small_immediates.c index 74304b99888..a329ac69d11 100644 --- a/src/gallium/drivers/vc4/vc4_opt_small_immediates.c +++ b/src/gallium/drivers/vc4/vc4_opt_small_immediates.c @@ -38,14 +38,10 @@ qir_opt_small_immediates(struct vc4_compile *c) { bool progress = false; struct simple_node *node; - struct qinst *defs[c->num_temps]; foreach(node, &c->instructions) { struct qinst *inst = (struct qinst *)node; - if (inst->dst.file == QFILE_TEMP) - defs[inst->dst.index] = inst; - /* The small immediate value sits in the raddr B field, so we * can't have 2 small immediates in one instruction (unless * they're the same value, but that should be optimized away @@ -60,7 +56,7 @@ qir_opt_small_immediates(struct vc4_compile *c) continue; for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) { - struct qreg src = qir_follow_movs(defs, inst->src[i]); + struct qreg src = qir_follow_movs(c, inst->src[i]); if (src.file != QFILE_UNIF || c->uniform_contents[src.index] != diff --git a/src/gallium/drivers/vc4/vc4_opt_vpm_writes.c b/src/gallium/drivers/vc4/vc4_opt_vpm_writes.c index ba322b6421c..e9711f222cd 100644 --- a/src/gallium/drivers/vc4/vc4_opt_vpm_writes.c +++ b/src/gallium/drivers/vc4/vc4_opt_vpm_writes.c @@ -38,20 +38,15 @@ qir_opt_vpm_writes(struct vc4_compile *c) bool progress = false; struct simple_node *node; - struct qinst *defs[c->num_temps]; struct qinst *vpm_writes[64] = { 0 }; uint32_t use_count[c->num_temps]; uint32_t vpm_write_count = 0; - memset(&defs, 0, sizeof(defs)); memset(&use_count, 0, sizeof(use_count)); foreach(node, &c->instructions) { struct qinst *inst = (struct qinst *)node; switch (inst->dst.file) { - case QFILE_TEMP: - defs[inst->dst.index] = inst; - break; case QFILE_VPM: vpm_writes[vpm_write_count++] = inst; break; @@ -75,7 +70,7 @@ qir_opt_vpm_writes(struct vc4_compile *c) if (use_count[temp] != 1) continue; - struct qinst *inst = defs[temp]; + struct qinst *inst = c->defs[temp]; if (qir_is_multi_instruction(inst)) continue; @@ -94,7 +89,7 @@ qir_opt_vpm_writes(struct vc4_compile *c) int src; for (src = 0; src < qir_get_op_nsrc(inst->op); src++) { if (inst->src[src].file == QFILE_TEMP) { - if (defs[inst->src[src].index]->op == + if (c->defs[inst->src[src].index]->op == QOP_TEX_RESULT) { break; } @@ -108,8 +103,9 @@ qir_opt_vpm_writes(struct vc4_compile *c) */ assert(!vpm_writes[i]->sf); move_to_tail(&vpm_writes[i]->link, &inst->link); - qir_remove_instruction(vpm_writes[i]); + qir_remove_instruction(c, vpm_writes[i]); + c->defs[inst->dst.index] = NULL; inst->dst.file = QFILE_VPM; inst->dst.index = 0; diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c index 9addf9cafc8..e453d848096 100644 --- a/src/gallium/drivers/vc4/vc4_qir.c +++ b/src/gallium/drivers/vc4/vc4_qir.c @@ -318,6 +318,15 @@ qir_get_temp(struct vc4_compile *c) reg.file = QFILE_TEMP; reg.index = c->num_temps++; + if (c->num_temps > c->defs_array_size) { + uint32_t old_size = c->defs_array_size; + c->defs_array_size = MAX2(old_size * 2, 16); + c->defs = reralloc(c, c->defs, struct qinst *, + c->defs_array_size); + memset(&c->defs[old_size], 0, + sizeof(c->defs[0]) * (c->defs_array_size - old_size)); + } + return reg; } @@ -358,6 +367,9 @@ qir_inst4(enum qop op, struct qreg dst, void qir_emit(struct vc4_compile *c, struct qinst *inst) { + if (inst->dst.file == QFILE_TEMP) + c->defs[inst->dst.index] = inst; + insert_at_tail(&c->instructions, &inst->link); } @@ -383,18 +395,21 @@ qir_compile_init(void) } void -qir_remove_instruction(struct qinst *qinst) +qir_remove_instruction(struct vc4_compile *c, struct qinst *qinst) { + if (qinst->dst.file == QFILE_TEMP) + c->defs[qinst->dst.index] = NULL; + remove_from_list(&qinst->link); free(qinst->src); free(qinst); } struct qreg -qir_follow_movs(struct qinst **defs, struct qreg reg) +qir_follow_movs(struct vc4_compile *c, struct qreg reg) { - while (reg.file == QFILE_TEMP && defs[reg.index]->op == QOP_MOV) - reg = defs[reg.index]->src[0]; + while (reg.file == QFILE_TEMP && c->defs[reg.index]->op == QOP_MOV) + reg = c->defs[reg.index]->src[0]; return reg; } @@ -405,7 +420,7 @@ qir_compile_destroy(struct vc4_compile *c) while (!is_empty_list(&c->instructions)) { struct qinst *qinst = (struct qinst *)first_elem(&c->instructions); - qir_remove_instruction(qinst); + qir_remove_instruction(c, qinst); } ralloc_free(c); diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h index af92c8c66a9..4f910e3c3df 100644 --- a/src/gallium/drivers/vc4/vc4_qir.h +++ b/src/gallium/drivers/vc4/vc4_qir.h @@ -284,6 +284,9 @@ struct vc4_compile { struct vc4_context *vc4; struct tgsi_parse_context parser; struct qreg *temps; + /* For each temp, the instruction generating its value. */ + struct qinst **defs; + uint32_t defs_array_size; /** * Inputs to the shader, arranged by TGSI declaration order. * @@ -368,7 +371,7 @@ struct qinst *qir_inst4(enum qop op, struct qreg dst, struct qreg b, struct qreg c, struct qreg d); -void qir_remove_instruction(struct qinst *qinst); +void qir_remove_instruction(struct vc4_compile *c, struct qinst *qinst); struct qreg qir_uniform(struct vc4_compile *c, enum quniform_contents contents, uint32_t data); @@ -385,7 +388,7 @@ bool qir_depends_on_flags(struct qinst *inst); bool qir_writes_r4(struct qinst *inst); bool qir_reads_r4(struct qinst *inst); bool qir_src_needs_a_file(struct qinst *inst); -struct qreg qir_follow_movs(struct qinst **defs, struct qreg reg); +struct qreg qir_follow_movs(struct vc4_compile *c, struct qreg reg); void qir_dump(struct vc4_compile *c); void qir_dump_inst(struct vc4_compile *c, struct qinst *inst); diff --git a/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c b/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c index d527889e76f..63f5eb22858 100644 --- a/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c +++ b/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c @@ -138,6 +138,7 @@ qir_lower_uniforms(struct vc4_compile *c) struct qreg unif = { QFILE_UNIF, max_index }; struct qinst *mov = qir_inst(QOP_MOV, temp, unif, c->undef); insert_at_head(&c->instructions, &mov->link); + c->defs[temp.index] = mov; foreach(node, &c->instructions) { struct qinst *inst = (struct qinst *)node; uint32_t nsrc = qir_get_op_nsrc(inst->op); -- 2.30.2