From 2b9f0dffe00bdc556436da02c099b8a50ecc4f49 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 15 Mar 2016 18:00:22 -0700 Subject: [PATCH] vc4: Move discard handling to the condition flag. Now that the field exists in the instruction, we can make discards less special. As a bonus, that means that we should be able to merge some more .sf instructions together when we get around to that. This causes some scheduling changes, as it allows tlb_color_reads to be delayed past the discard condition setup. Since the tlb_color_read ends up later, this may mean performance improvements, but I haven't tested. total instructions in shared programs: 78114 -> 78035 (-0.10%) instructions in affected programs: 1922 -> 1843 (-4.11%) total estimated cycles in shared programs: 234318 -> 234329 (0.00%) estimated cycles in affected programs: 8200 -> 8211 (0.13%) --- src/gallium/drivers/vc4/vc4_program.c | 19 +++++++++++++------ src/gallium/drivers/vc4/vc4_qir.c | 1 - src/gallium/drivers/vc4/vc4_qir.h | 16 ++++++++++------ src/gallium/drivers/vc4/vc4_qir_schedule.c | 5 ----- src/gallium/drivers/vc4/vc4_qpu_emit.c | 22 ++++++---------------- 5 files changed, 29 insertions(+), 34 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 81e8e9150d6..f5826d85174 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -1184,8 +1184,11 @@ emit_frag_end(struct vc4_compile *c) color = qir_uniform_ui(c, 0); } - if (c->discard.file != QFILE_NULL) - qir_TLB_DISCARD_SETUP(c, c->discard); + uint32_t discard_cond = QPU_COND_ALWAYS; + if (c->discard.file != QFILE_NULL) { + qir_SF(c, c->discard); + discard_cond = QPU_COND_ZS; + } if (c->fs_key->stencil_enabled) { qir_TLB_STENCIL_SETUP(c, qir_uniform(c, QUNIFORM_STENCIL, 0)); @@ -1209,14 +1212,18 @@ emit_frag_end(struct vc4_compile *c) } else { z = qir_FRAG_Z(c); } - qir_TLB_Z_WRITE(c, z); + struct qinst *inst = qir_TLB_Z_WRITE(c, z); + inst->cond = discard_cond; } if (!c->msaa_per_sample_output) { - qir_TLB_COLOR_WRITE(c, color); + struct qinst *inst = qir_TLB_COLOR_WRITE(c, color); + inst->cond = discard_cond; } else { - for (int i = 0; i < VC4_MAX_SAMPLES; i++) - qir_TLB_COLOR_WRITE_MS(c, c->sample_colors[i]); + for (int i = 0; i < VC4_MAX_SAMPLES; i++) { + struct qinst *inst = qir_TLB_COLOR_WRITE_MS(c, c->sample_colors[i]); + inst->cond = discard_cond; + } } } diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c index 125a9525e1b..e73e3899410 100644 --- a/src/gallium/drivers/vc4/vc4_qir.c +++ b/src/gallium/drivers/vc4/vc4_qir.c @@ -69,7 +69,6 @@ static const struct qir_op_info qir_op_info[] = { [QOP_RSQ] = { "rsq", 1, 1, false, true }, [QOP_EXP2] = { "exp2", 1, 2, false, true }, [QOP_LOG2] = { "log2", 1, 2, false, true }, - [QOP_TLB_DISCARD_SETUP] = { "discard", 0, 1, true }, [QOP_TLB_STENCIL_SETUP] = { "tlb_stencil_setup", 0, 1, true }, [QOP_TLB_Z_WRITE] = { "tlb_z", 0, 1, true }, [QOP_TLB_COLOR_WRITE] = { "tlb_color", 0, 1, true }, diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h index 4f39d72f552..3fbf5d749e7 100644 --- a/src/gallium/drivers/vc4/vc4_qir.h +++ b/src/gallium/drivers/vc4/vc4_qir.h @@ -101,7 +101,6 @@ enum qop { QOP_LOG2, QOP_VW_SETUP, QOP_VR_SETUP, - QOP_TLB_DISCARD_SETUP, QOP_TLB_STENCIL_SETUP, QOP_TLB_Z_WRITE, QOP_TLB_COLOR_WRITE, @@ -551,17 +550,23 @@ qir_##name##_dest(struct vc4_compile *c, struct qreg dest, \ } #define QIR_NODST_1(name) \ -static inline void \ +static inline struct qinst * \ qir_##name(struct vc4_compile *c, struct qreg a) \ { \ - qir_emit(c, qir_inst(QOP_##name, c->undef, a, c->undef)); \ + struct qinst *inst = qir_inst(QOP_##name, c->undef, \ + a, c->undef); \ + qir_emit(c, inst); \ + return inst; \ } #define QIR_NODST_2(name) \ -static inline void \ +static inline struct qinst * \ qir_##name(struct vc4_compile *c, struct qreg a, struct qreg b) \ { \ - qir_emit(c, qir_inst(QOP_##name, c->undef, a, b)); \ + struct qinst *inst = qir_inst(QOP_##name, c->undef, \ + a, b); \ + qir_emit(c, inst); \ + return inst; \ } #define QIR_PACK(name) \ @@ -623,7 +628,6 @@ QIR_ALU0(TLB_COLOR_READ) QIR_NODST_1(TLB_COLOR_WRITE) QIR_NODST_1(TLB_COLOR_WRITE_MS) QIR_NODST_1(TLB_Z_WRITE) -QIR_NODST_1(TLB_DISCARD_SETUP) QIR_NODST_1(TLB_STENCIL_SETUP) QIR_NODST_1(MS_MASK) diff --git a/src/gallium/drivers/vc4/vc4_qir_schedule.c b/src/gallium/drivers/vc4/vc4_qir_schedule.c index ee1e9aafbb9..186e81be750 100644 --- a/src/gallium/drivers/vc4/vc4_qir_schedule.c +++ b/src/gallium/drivers/vc4/vc4_qir_schedule.c @@ -236,11 +236,6 @@ calculate_deps(struct schedule_setup_state *state, struct schedule_node *n) add_write_dep(dir, &state->last_tlb, n); break; - case QOP_TLB_DISCARD_SETUP: - add_write_dep(dir, &state->last_sf, n); - add_write_dep(dir, &state->last_tlb, n); - break; - default: break; } diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c index 450b97fc014..b507e370683 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_emit.c +++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c @@ -171,7 +171,6 @@ void vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) { struct qpu_reg *temp_registers = vc4_register_allocate(vc4, c); - bool discard = false; uint32_t inputs_remaining = c->num_inputs; uint32_t vpm_read_fifo_count = 0; uint32_t vpm_read_offset = 0; @@ -375,12 +374,6 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) */ break; - case QOP_TLB_DISCARD_SETUP: - discard = true; - queue(c, qpu_a_MOV(src[0], src[0]) | unpack); - *last_inst(c) |= QPU_SF; - break; - case QOP_TLB_STENCIL_SETUP: assert(!unpack); queue(c, qpu_a_MOV(qpu_ra(QPU_W_TLB_STENCIL_SETUP), @@ -390,9 +383,8 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) case QOP_TLB_Z_WRITE: queue(c, qpu_a_MOV(qpu_ra(QPU_W_TLB_Z), src[0]) | unpack); - if (discard) { - set_last_cond_add(c, QPU_COND_ZS); - } + set_last_cond_add(c, qinst->cond); + handled_qinst_cond = true; break; case QOP_TLB_COLOR_READ: @@ -406,16 +398,14 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) case QOP_TLB_COLOR_WRITE: queue(c, qpu_a_MOV(qpu_tlbc(), src[0]) | unpack); - if (discard) { - set_last_cond_add(c, QPU_COND_ZS); - } + set_last_cond_add(c, qinst->cond); + handled_qinst_cond = true; break; case QOP_TLB_COLOR_WRITE_MS: queue(c, qpu_a_MOV(qpu_tlbc_ms(), src[0])); - if (discard) { - set_last_cond_add(c, QPU_COND_ZS); - } + set_last_cond_add(c, qinst->cond); + handled_qinst_cond = true; break; case QOP_VARY_ADD_C: -- 2.30.2