From 314f0c57e4c00b0a5cb544fa43e356c1069acd8f Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 15 Nov 2016 12:40:36 -0800 Subject: [PATCH] vc4: Refactor qir_get_op_nsrc(enum qop) to qir_get_nsrc(struct qinst *). Every caller was dereffing the qinst, and this will let us make the number of sources vary depending on the destination of the qinst so that we can have general ALU ops that store to tex_[strb] and get an implicit uniform. --- src/gallium/drivers/vc4/vc4_opt_constant_folding.c | 2 +- src/gallium/drivers/vc4/vc4_opt_copy_propagation.c | 4 ++-- src/gallium/drivers/vc4/vc4_opt_dead_code.c | 6 +++--- src/gallium/drivers/vc4/vc4_opt_peephole_sf.c | 4 ++-- src/gallium/drivers/vc4/vc4_opt_small_immediates.c | 4 ++-- src/gallium/drivers/vc4/vc4_opt_vpm.c | 6 +++--- src/gallium/drivers/vc4/vc4_program.c | 2 +- src/gallium/drivers/vc4/vc4_qir.c | 12 +++++------- src/gallium/drivers/vc4/vc4_qir.h | 2 +- .../drivers/vc4/vc4_qir_emit_uniform_stream_resets.c | 2 +- src/gallium/drivers/vc4/vc4_qir_live_variables.c | 2 +- src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c | 6 +++--- src/gallium/drivers/vc4/vc4_qir_schedule.c | 8 ++++---- src/gallium/drivers/vc4/vc4_qir_validate.c | 2 +- src/gallium/drivers/vc4/vc4_qpu_emit.c | 4 ++-- src/gallium/drivers/vc4/vc4_register_allocate.c | 2 +- src/gallium/drivers/vc4/vc4_reorder_uniforms.c | 2 +- 17 files changed, 34 insertions(+), 36 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_opt_constant_folding.c b/src/gallium/drivers/vc4/vc4_opt_constant_folding.c index 7ff91615545..de642d46582 100644 --- a/src/gallium/drivers/vc4/vc4_opt_constant_folding.c +++ b/src/gallium/drivers/vc4/vc4_opt_constant_folding.c @@ -58,7 +58,7 @@ dump_to(struct vc4_compile *c, struct qinst *inst) static bool constant_fold(struct vc4_compile *c, struct qinst *inst) { - int nsrc = qir_get_op_nsrc(inst->op); + int nsrc = qir_get_nsrc(inst); uint32_t ui[nsrc]; for (int i = 0; i < nsrc; i++) { diff --git a/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c b/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c index d20ee5e227d..9a6320a9a20 100644 --- a/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c +++ b/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c @@ -67,7 +67,7 @@ try_copy_prop(struct vc4_compile *c, struct qinst *inst, struct qinst **movs) bool debug = false; bool progress = false; - for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) { + for (int i = 0; i < qir_get_nsrc(inst); i++) { if (inst->src[i].file != QFILE_TEMP) continue; @@ -113,7 +113,7 @@ try_copy_prop(struct vc4_compile *c, struct qinst *inst, struct qinst **movs) * this instruction doesn't already use it. */ bool already_has_unpack = false; - for (int j = 0; j < qir_get_op_nsrc(inst->op); j++) { + for (int j = 0; j < qir_get_nsrc(inst); j++) { if (inst->src[j].pack) already_has_unpack = true; } diff --git a/src/gallium/drivers/vc4/vc4_opt_dead_code.c b/src/gallium/drivers/vc4/vc4_opt_dead_code.c index 1838c394ff9..f04d0ff97ab 100644 --- a/src/gallium/drivers/vc4/vc4_opt_dead_code.c +++ b/src/gallium/drivers/vc4/vc4_opt_dead_code.c @@ -54,7 +54,7 @@ dce(struct vc4_compile *c, struct qinst *inst) static bool has_nonremovable_reads(struct vc4_compile *c, struct qinst *inst) { - for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) { + for (int i = 0; i < qir_get_nsrc(inst); i++) { if (inst->src[i].file == QFILE_VPM) { uint32_t attr = inst->src[i].index / 4; uint32_t offset = (inst->src[i].index % 4) * 4; @@ -88,7 +88,7 @@ qir_opt_dead_code(struct vc4_compile *c) bool *used = calloc(c->num_temps, sizeof(bool)); qir_for_each_inst_inorder(inst, c) { - for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) { + for (int i = 0; i < qir_get_nsrc(inst); i++) { if (inst->src[i].file == QFILE_TEMP) used[inst->src[i].index] = true; } @@ -129,7 +129,7 @@ qir_opt_dead_code(struct vc4_compile *c) continue; } - for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) { + for (int i = 0; i < qir_get_nsrc(inst); i++) { if (inst->src[i].file != QFILE_VPM) continue; uint32_t attr = inst->src[i].index / 4; diff --git a/src/gallium/drivers/vc4/vc4_opt_peephole_sf.c b/src/gallium/drivers/vc4/vc4_opt_peephole_sf.c index f4856673ba2..577290b1fc4 100644 --- a/src/gallium/drivers/vc4/vc4_opt_peephole_sf.c +++ b/src/gallium/drivers/vc4/vc4_opt_peephole_sf.c @@ -62,7 +62,7 @@ inst_srcs_updated(struct qinst *inst, struct qinst *writer) */ switch (writer->dst.file) { case QFILE_TEMP: - for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) { + for (int i = 0; i < qir_get_nsrc(inst); i++) { if (inst->src[i].file == QFILE_TEMP && inst->src[i].index == writer->dst.index) { return true; @@ -95,7 +95,7 @@ inst_result_equals(struct qinst *a, struct qinst *b) return false; } - for (int i = 0; i < qir_get_op_nsrc(a->op); i++) { + for (int i = 0; i < qir_get_nsrc(a); i++) { if (!qir_reg_equals(a->src[i], b->src[i]) || src_file_varies_on_reread(a->src[i]) || src_file_varies_on_reread(b->src[i])) { diff --git a/src/gallium/drivers/vc4/vc4_opt_small_immediates.c b/src/gallium/drivers/vc4/vc4_opt_small_immediates.c index 4c105f37344..15cbd12773f 100644 --- a/src/gallium/drivers/vc4/vc4_opt_small_immediates.c +++ b/src/gallium/drivers/vc4/vc4_opt_small_immediates.c @@ -45,14 +45,14 @@ qir_opt_small_immediates(struct vc4_compile *c) * elsewhere). */ bool uses_small_imm = false; - for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) { + for (int i = 0; i < qir_get_nsrc(inst); i++) { if (inst->src[i].file == QFILE_SMALL_IMM) uses_small_imm = true; } if (uses_small_imm) continue; - for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) { + for (int i = 0; i < qir_get_nsrc(inst); i++) { struct qreg src = qir_follow_movs(c, inst->src[i]); if (src.file != QFILE_UNIF || diff --git a/src/gallium/drivers/vc4/vc4_opt_vpm.c b/src/gallium/drivers/vc4/vc4_opt_vpm.c index 83ba11b817f..b3bef272254 100644 --- a/src/gallium/drivers/vc4/vc4_opt_vpm.c +++ b/src/gallium/drivers/vc4/vc4_opt_vpm.c @@ -58,7 +58,7 @@ qir_opt_vpm(struct vc4_compile *c) break; } - for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) { + for (int i = 0; i < qir_get_nsrc(inst); i++) { if (inst->src[i].file == QFILE_TEMP) { uint32_t temp = inst->src[i].index; use_count[temp]++; @@ -81,7 +81,7 @@ qir_opt_vpm(struct vc4_compile *c) qir_is_tex(inst)) continue; - for (int j = 0; j < qir_get_op_nsrc(inst->op); j++) { + for (int j = 0; j < qir_get_nsrc(inst); j++) { if (inst->src[j].file != QFILE_TEMP || inst->src[j].pack) continue; @@ -106,7 +106,7 @@ qir_opt_vpm(struct vc4_compile *c) } uint32_t temps = 0; - for (int k = 0; k < qir_get_op_nsrc(inst->op); k++) { + for (int k = 0; k < qir_get_nsrc(inst); k++) { if (inst->src[k].file == QFILE_TEMP) temps++; } diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index f8c7c458622..97cbabbd511 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -2441,7 +2441,7 @@ vc4_setup_compiled_fs_inputs(struct vc4_context *vc4, struct vc4_compile *c, memset(input_live, 0, sizeof(input_live)); qir_for_each_inst_inorder(inst, c) { - for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) { + for (int i = 0; i < qir_get_nsrc(inst); i++) { if (inst->src[i].file == QFILE_VARY) input_live[inst->src[i].index] = true; } diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c index 8bd016c5535..2c9119d9ccf 100644 --- a/src/gallium/drivers/vc4/vc4_qir.c +++ b/src/gallium/drivers/vc4/vc4_qir.c @@ -104,12 +104,10 @@ qir_get_op_name(enum qop qop) } int -qir_get_op_nsrc(enum qop qop) +qir_get_nsrc(struct qinst *inst) { - if (qop < ARRAY_SIZE(qir_op_info) && qir_op_info[qop].name) - return qir_op_info[qop].nsrc; - else - abort(); + assert(qir_op_info[inst->op].name); + return qir_op_info[inst->op].nsrc; } /** @@ -140,7 +138,7 @@ qir_has_side_effect_reads(struct vc4_compile *c, struct qinst *inst) * point/line coordinates reads, because they're generated by * fixed-function hardware. */ - for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) { + for (int i = 0; i < qir_get_nsrc(inst); i++) { if (inst->src[i].file == QFILE_VARY && c->input_slots[inst->src[i].index].slot == 0xff) { return true; @@ -372,7 +370,7 @@ qir_dump_inst(struct vc4_compile *c, struct qinst *inst) } } - for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) { + for (int i = 0; i < qir_get_nsrc(inst); i++) { fprintf(stderr, ", "); qir_print_reg(c, inst->src[i], false); vc4_qpu_disasm_unpack(stderr, inst->src[i].pack); diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h index eec50c3439e..a3b8762951d 100644 --- a/src/gallium/drivers/vc4/vc4_qir.h +++ b/src/gallium/drivers/vc4/vc4_qir.h @@ -577,7 +577,7 @@ struct qinst *qir_emit_nondef(struct vc4_compile *c, struct qinst *inst); struct qreg qir_get_temp(struct vc4_compile *c); void qir_calculate_live_intervals(struct vc4_compile *c); -int qir_get_op_nsrc(enum qop qop); +int qir_get_nsrc(struct qinst *inst); bool qir_reg_equals(struct qreg a, struct qreg b); bool qir_has_side_effects(struct vc4_compile *c, struct qinst *inst); bool qir_has_side_effect_reads(struct vc4_compile *c, struct qinst *inst); diff --git a/src/gallium/drivers/vc4/vc4_qir_emit_uniform_stream_resets.c b/src/gallium/drivers/vc4/vc4_qir_emit_uniform_stream_resets.c index 3fd6358e3d3..23ae8ebfa6f 100644 --- a/src/gallium/drivers/vc4/vc4_qir_emit_uniform_stream_resets.c +++ b/src/gallium/drivers/vc4/vc4_qir_emit_uniform_stream_resets.c @@ -41,7 +41,7 @@ inst_reads_a_uniform(struct qinst *inst) if (qir_is_tex(inst)) return true; - for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) { + for (int i = 0; i < qir_get_nsrc(inst); i++) { if (inst->src[i].file == QFILE_UNIF) return true; } diff --git a/src/gallium/drivers/vc4/vc4_qir_live_variables.c b/src/gallium/drivers/vc4/vc4_qir_live_variables.c index dc058f5b4a7..330e1c8f7a9 100644 --- a/src/gallium/drivers/vc4/vc4_qir_live_variables.c +++ b/src/gallium/drivers/vc4/vc4_qir_live_variables.c @@ -205,7 +205,7 @@ qir_setup_def_use(struct vc4_compile *c) _mesa_hash_table_clear(partial_update_ht, NULL); qir_for_each_inst(inst, block) { - for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) + for (int i = 0; i < qir_get_nsrc(inst); i++) qir_setup_use(c, block, ip, inst->src[i]); qir_setup_def(c, block, ip, partial_update_ht, inst); diff --git a/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c b/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c index 8ec6c797396..1884cfa5b78 100644 --- a/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c +++ b/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c @@ -89,7 +89,7 @@ qir_get_instruction_uniform_count(struct qinst *inst) { uint32_t count = 0; - for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) { + for (int i = 0; i < qir_get_nsrc(inst); i++) { if (inst->src[i].file != QFILE_UNIF) continue; @@ -119,7 +119,7 @@ qir_lower_uniforms(struct vc4_compile *c) * ht. */ qir_for_each_inst_inorder(inst, c) { - uint32_t nsrc = qir_get_op_nsrc(inst->op); + uint32_t nsrc = qir_get_nsrc(inst); if (qir_get_instruction_uniform_count(inst) <= 1) continue; @@ -155,7 +155,7 @@ qir_lower_uniforms(struct vc4_compile *c) struct qinst *mov = NULL; qir_for_each_inst(inst, block) { - uint32_t nsrc = qir_get_op_nsrc(inst->op); + uint32_t nsrc = qir_get_nsrc(inst); uint32_t count = qir_get_instruction_uniform_count(inst); diff --git a/src/gallium/drivers/vc4/vc4_qir_schedule.c b/src/gallium/drivers/vc4/vc4_qir_schedule.c index 6eaa2e075ac..c1a2db5e3c5 100644 --- a/src/gallium/drivers/vc4/vc4_qir_schedule.c +++ b/src/gallium/drivers/vc4/vc4_qir_schedule.c @@ -187,7 +187,7 @@ calculate_deps(struct schedule_setup_state *state, struct schedule_node *n) * ignore uniforms accesses, because qir_reorder_uniforms() happens * after this. */ - for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) { + for (int i = 0; i < qir_get_nsrc(inst); i++) { switch (inst->src[i].file) { case QFILE_TEMP: add_dep(dir, @@ -305,7 +305,7 @@ calculate_forward_deps(struct vc4_compile *c, void *mem_ctx, calculate_deps(&state, n); - for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) { + for (int i = 0; i < qir_get_nsrc(inst); i++) { switch (inst->src[i].file) { case QFILE_UNIF: add_dep(state.dir, state.last_uniforms_reset, n); @@ -429,7 +429,7 @@ get_register_pressure_cost(struct schedule_state *state, struct qinst *inst) state->temp_writes[inst->dst.index] == 1) cost--; - for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) { + for (int i = 0; i < qir_get_nsrc(inst); i++) { if (inst->src[i].file == QFILE_TEMP && !BITSET_TEST(state->temp_live, inst->src[i].index)) { cost++; @@ -648,7 +648,7 @@ schedule_instructions(struct vc4_compile *c, } /* Update our tracking of register pressure. */ - for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) { + for (int i = 0; i < qir_get_nsrc(inst); i++) { if (inst->src[i].file == QFILE_TEMP) BITSET_SET(state->temp_live, inst->src[i].index); } diff --git a/src/gallium/drivers/vc4/vc4_qir_validate.c b/src/gallium/drivers/vc4/vc4_qir_validate.c index e7cfe5ad217..9579f7a15cb 100644 --- a/src/gallium/drivers/vc4/vc4_qir_validate.c +++ b/src/gallium/drivers/vc4/vc4_qir_validate.c @@ -86,7 +86,7 @@ void qir_validate(struct vc4_compile *c) break; } - for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) { + for (int i = 0; i < qir_get_nsrc(inst); i++) { struct qreg src = inst->src[i]; switch (src.file) { diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c index 2cc0f3013ef..9d9e5d84ecd 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_emit.c +++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c @@ -289,7 +289,7 @@ vc4_generate_code_block(struct vc4_compile *c, uint64_t unpack = 0; struct qpu_reg src[ARRAY_SIZE(qinst->src)]; - for (int i = 0; i < qir_get_op_nsrc(qinst->op); i++) { + for (int i = 0; i < qir_get_nsrc(qinst); i++) { int index = qinst->src[i].index; switch (qinst->src[i].file) { case QFILE_NULL: @@ -538,7 +538,7 @@ vc4_generate_code_block(struct vc4_compile *c, * argument slot as well so that we don't take up * another raddr just to get unused data. */ - if (qir_get_op_nsrc(qinst->op) == 1) + if (qir_get_nsrc(qinst) == 1) src[1] = src[0]; fixup_raddr_conflict(block, dst, &src[0], &src[1], diff --git a/src/gallium/drivers/vc4/vc4_register_allocate.c b/src/gallium/drivers/vc4/vc4_register_allocate.c index 2992a6be2f2..e48b8ee5992 100644 --- a/src/gallium/drivers/vc4/vc4_register_allocate.c +++ b/src/gallium/drivers/vc4/vc4_register_allocate.c @@ -306,7 +306,7 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c) * can only be done from regfile A, while float unpacks can be * either A or R4. */ - for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) { + for (int i = 0; i < qir_get_nsrc(inst); i++) { if (inst->src[i].file == QFILE_TEMP && inst->src[i].pack) { if (qir_is_float_input(inst)) { diff --git a/src/gallium/drivers/vc4/vc4_reorder_uniforms.c b/src/gallium/drivers/vc4/vc4_reorder_uniforms.c index 7d5076f429e..37acefdc0ba 100644 --- a/src/gallium/drivers/vc4/vc4_reorder_uniforms.c +++ b/src/gallium/drivers/vc4/vc4_reorder_uniforms.c @@ -46,7 +46,7 @@ qir_reorder_uniforms(struct vc4_compile *c) qir_for_each_inst_inorder(inst, c) { uint32_t new = ~0; - for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) { + for (int i = 0; i < qir_get_nsrc(inst); i++) { if (inst->src[i].file != QFILE_UNIF) continue; -- 2.30.2