From f029932cac36859df5a6d04d1dd7343672ced83a Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 21 Mar 2016 13:12:41 -0700 Subject: [PATCH] vc4: Allow TLB Z/color/stencil writes from any ALU operation in QIR. This lets us write the Z directly from the FTOI for computed Z, and may let us coalesce color writes in the future. No change in my shader-db, but clearly drops an instruction in piglit's early-z test. --- src/gallium/drivers/vc4/vc4_program.c | 29 +++++++------ src/gallium/drivers/vc4/vc4_qir.c | 42 +++++++++++++++---- src/gallium/drivers/vc4/vc4_qir.h | 12 ++---- src/gallium/drivers/vc4/vc4_qir_schedule.c | 35 +++++++++++----- src/gallium/drivers/vc4/vc4_qpu_emit.c | 47 ++++++++++------------ 5 files changed, 100 insertions(+), 65 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index d1e893a76a9..35bad7e9296 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -1192,12 +1192,15 @@ emit_frag_end(struct vc4_compile *c) } if (c->fs_key->stencil_enabled) { - qir_TLB_STENCIL_SETUP(c, qir_uniform(c, QUNIFORM_STENCIL, 0)); + qir_MOV_dest(c, qir_reg(QFILE_TLB_STENCIL_SETUP, 0), + qir_uniform(c, QUNIFORM_STENCIL, 0)); if (c->fs_key->stencil_twoside) { - qir_TLB_STENCIL_SETUP(c, qir_uniform(c, QUNIFORM_STENCIL, 1)); + qir_MOV_dest(c, qir_reg(QFILE_TLB_STENCIL_SETUP, 0), + qir_uniform(c, QUNIFORM_STENCIL, 1)); } if (c->fs_key->stencil_full_writemasks) { - qir_TLB_STENCIL_SETUP(c, qir_uniform(c, QUNIFORM_STENCIL, 2)); + qir_MOV_dest(c, qir_reg(QFILE_TLB_STENCIL_SETUP, 0), + qir_uniform(c, QUNIFORM_STENCIL, 2)); } } @@ -1206,24 +1209,24 @@ emit_frag_end(struct vc4_compile *c) } if (c->fs_key->depth_enabled) { - struct qreg z; if (c->output_position_index != -1) { - z = qir_FTOI(c, qir_FMUL(c, c->outputs[c->output_position_index + 2], - qir_uniform_f(c, 0xffffff))); + qir_FTOI_dest(c, qir_reg(QFILE_TLB_Z_WRITE, 0), + qir_FMUL(c, + c->outputs[c->output_position_index + 2], + qir_uniform_f(c, 0xffffff)))->cond = discard_cond; } else { - z = qir_FRAG_Z(c); + qir_MOV_dest(c, qir_reg(QFILE_TLB_Z_WRITE, 0), + qir_FRAG_Z(c))->cond = discard_cond; } - struct qinst *inst = qir_TLB_Z_WRITE(c, z); - inst->cond = discard_cond; } if (!c->msaa_per_sample_output) { - struct qinst *inst = qir_TLB_COLOR_WRITE(c, color); - inst->cond = discard_cond; + qir_MOV_dest(c, qir_reg(QFILE_TLB_COLOR_WRITE, 0), + color)->cond = discard_cond; } else { for (int i = 0; i < VC4_MAX_SAMPLES; i++) { - struct qinst *inst = qir_TLB_COLOR_WRITE_MS(c, c->sample_colors[i]); - inst->cond = discard_cond; + qir_MOV_dest(c, qir_reg(QFILE_TLB_COLOR_WRITE_MS, 0), + c->sample_colors[i])->cond = discard_cond; } } } diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c index c6d5a79eae1..10b82ffc16c 100644 --- a/src/gallium/drivers/vc4/vc4_qir.c +++ b/src/gallium/drivers/vc4/vc4_qir.c @@ -68,10 +68,6 @@ static const struct qir_op_info qir_op_info[] = { [QOP_RSQ] = { "rsq", 1, 1 }, [QOP_EXP2] = { "exp2", 1, 2 }, [QOP_LOG2] = { "log2", 1, 2 }, - [QOP_TLB_STENCIL_SETUP] = { "tlb_stencil_setup", 0, 1, true }, - [QOP_TLB_Z_WRITE] = { "tlb_z", 0, 1, true }, - [QOP_TLB_COLOR_WRITE] = { "tlb_color", 0, 1, true }, - [QOP_TLB_COLOR_WRITE_MS] = { "tlb_color_ms", 0, 1, true }, [QOP_TLB_COLOR_READ] = { "tlb_color_read", 1, 0 }, [QOP_MS_MASK] = { "ms_mask", 0, 1, true }, [QOP_VARY_ADD_C] = { "vary_add_c", 1, 1 }, @@ -115,6 +111,16 @@ qir_get_op_nsrc(enum qop qop) bool qir_has_side_effects(struct vc4_compile *c, struct qinst *inst) { + switch (inst->dst.file) { + case QFILE_TLB_Z_WRITE: + case QFILE_TLB_COLOR_WRITE: + case QFILE_TLB_COLOR_WRITE_MS: + case QFILE_TLB_STENCIL_SETUP: + return true; + default: + break; + } + return qir_op_info[inst->op].has_side_effects; } @@ -226,24 +232,44 @@ qir_print_reg(struct vc4_compile *c, struct qreg reg, bool write) [QFILE_TEMP] = "t", [QFILE_VARY] = "v", [QFILE_UNIF] = "u", + [QFILE_TLB_COLOR_WRITE] = "tlb_c", + [QFILE_TLB_COLOR_WRITE_MS] = "tlb_c_ms", + [QFILE_TLB_Z_WRITE] = "tlb_z", + [QFILE_TLB_STENCIL_SETUP] = "tlb_stencil", }; - if (reg.file == QFILE_NULL) { + switch (reg.file) { + + case QFILE_NULL: fprintf(stderr, "null"); - } else if (reg.file == QFILE_SMALL_IMM) { + break; + + case QFILE_SMALL_IMM: if ((int)reg.index >= -16 && (int)reg.index <= 15) fprintf(stderr, "%d", reg.index); else fprintf(stderr, "%f", uif(reg.index)); - } else if (reg.file == QFILE_VPM) { + break; + + case QFILE_VPM: if (write) { fprintf(stderr, "vpm"); } else { fprintf(stderr, "vpm%d.%d", reg.index / 4, reg.index % 4); } - } else { + break; + + case QFILE_TLB_COLOR_WRITE: + case QFILE_TLB_COLOR_WRITE_MS: + case QFILE_TLB_Z_WRITE: + case QFILE_TLB_STENCIL_SETUP: + fprintf(stderr, "%s", files[reg.file]); + break; + + default: fprintf(stderr, "%s%d", files[reg.file], reg.index); + break; } if (reg.file == QFILE_UNIF && diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h index 4aec313831f..970eafd542b 100644 --- a/src/gallium/drivers/vc4/vc4_qir.h +++ b/src/gallium/drivers/vc4/vc4_qir.h @@ -49,6 +49,10 @@ enum qfile { QFILE_VARY, QFILE_UNIF, QFILE_VPM, + QFILE_TLB_COLOR_WRITE, + QFILE_TLB_COLOR_WRITE_MS, + QFILE_TLB_Z_WRITE, + QFILE_TLB_STENCIL_SETUP, /** * Stores an immediate value in the index field that can be turned @@ -106,10 +110,6 @@ enum qop { QOP_LOG2, QOP_VW_SETUP, QOP_VR_SETUP, - QOP_TLB_STENCIL_SETUP, - QOP_TLB_Z_WRITE, - QOP_TLB_COLOR_WRITE, - QOP_TLB_COLOR_WRITE_MS, QOP_TLB_COLOR_READ, QOP_MS_MASK, QOP_VARY_ADD_C, @@ -629,10 +629,6 @@ QIR_ALU0(FRAG_W) QIR_ALU0(FRAG_REV_FLAG) QIR_ALU0(TEX_RESULT) QIR_ALU0(TLB_COLOR_READ) -QIR_NODST_1(TLB_COLOR_WRITE) -QIR_NODST_1(TLB_COLOR_WRITE_MS) -QIR_NODST_1(TLB_Z_WRITE) -QIR_NODST_1(TLB_STENCIL_SETUP) QIR_NODST_1(MS_MASK) static inline struct qreg diff --git a/src/gallium/drivers/vc4/vc4_qir_schedule.c b/src/gallium/drivers/vc4/vc4_qir_schedule.c index 4585918bc7d..8b843a3a158 100644 --- a/src/gallium/drivers/vc4/vc4_qir_schedule.c +++ b/src/gallium/drivers/vc4/vc4_qir_schedule.c @@ -228,11 +228,7 @@ calculate_deps(struct schedule_setup_state *state, struct schedule_node *n) add_write_dep(dir, &state->last_tex_result, n); break; - case QOP_TLB_COLOR_WRITE: - case QOP_TLB_COLOR_WRITE_MS: case QOP_TLB_COLOR_READ: - case QOP_TLB_Z_WRITE: - case QOP_TLB_STENCIL_SETUP: case QOP_MS_MASK: add_write_dep(dir, &state->last_tlb, n); break; @@ -241,10 +237,25 @@ calculate_deps(struct schedule_setup_state *state, struct schedule_node *n) break; } - if (inst->dst.file == QFILE_VPM) + switch (inst->dst.file) { + case QFILE_VPM: add_write_dep(dir, &state->last_vpm_write, n); - else if (inst->dst.file == QFILE_TEMP) + break; + + case QFILE_TEMP: add_write_dep(dir, &state->last_temp_write[inst->dst.index], n); + break; + + case QFILE_TLB_COLOR_WRITE: + case QFILE_TLB_COLOR_WRITE_MS: + case QFILE_TLB_Z_WRITE: + case QFILE_TLB_STENCIL_SETUP: + add_write_dep(dir, &state->last_tlb, n); + break; + + default: + break; + } if (qir_depends_on_flags(inst)) add_dep(dir, state->last_sf, n); @@ -358,11 +369,13 @@ get_register_pressure_cost(struct schedule_state *state, struct qinst *inst) static bool locks_scoreboard(struct qinst *inst) { - switch (inst->op) { - case QOP_TLB_Z_WRITE: - case QOP_TLB_COLOR_WRITE: - case QOP_TLB_COLOR_WRITE_MS: - case QOP_TLB_COLOR_READ: + if (inst->op == QOP_TLB_COLOR_READ) + return true; + + switch (inst->dst.file) { + case QFILE_TLB_Z_WRITE: + case QFILE_TLB_COLOR_WRITE: + case QFILE_TLB_COLOR_WRITE_MS: return true; default: return false; diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c index 63e1ee54305..5c655495c2b 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_emit.c +++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c @@ -300,6 +300,11 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) last_vpm_read_index = qinst->src[i].index; src[i] = qpu_ra(QPU_R_VPM); break; + case QFILE_TLB_COLOR_WRITE: + case QFILE_TLB_COLOR_WRITE_MS: + case QFILE_TLB_Z_WRITE: + case QFILE_TLB_STENCIL_SETUP: + unreachable("bad qir src file"); } } @@ -314,6 +319,23 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) case QFILE_VPM: dst = qpu_ra(QPU_W_VPM); break; + + case QFILE_TLB_COLOR_WRITE: + dst = qpu_tlbc(); + break; + + case QFILE_TLB_COLOR_WRITE_MS: + dst = qpu_tlbc_ms(); + break; + + case QFILE_TLB_Z_WRITE: + dst = qpu_ra(QPU_W_TLB_Z); + break; + + case QFILE_TLB_STENCIL_SETUP: + dst = qpu_ra(QPU_W_TLB_STENCIL_SETUP); + break; + case QFILE_VARY: case QFILE_UNIF: case QFILE_SMALL_IMM: @@ -383,19 +405,6 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) */ break; - case QOP_TLB_STENCIL_SETUP: - assert(!unpack); - queue(c, qpu_a_MOV(qpu_ra(QPU_W_TLB_STENCIL_SETUP), - src[0]) | unpack); - break; - - case QOP_TLB_Z_WRITE: - queue(c, qpu_a_MOV(qpu_ra(QPU_W_TLB_Z), - src[0]) | unpack); - set_last_cond_add(c, qinst->cond); - handled_qinst_cond = true; - break; - case QOP_TLB_COLOR_READ: queue(c, qpu_NOP()); *last_inst(c) = qpu_set_sig(*last_inst(c), @@ -403,18 +412,6 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) handle_r4_qpu_write(c, qinst, dst); break; - case QOP_TLB_COLOR_WRITE: - queue(c, qpu_a_MOV(qpu_tlbc(), src[0]) | unpack); - set_last_cond_add(c, qinst->cond); - handled_qinst_cond = true; - break; - - case QOP_TLB_COLOR_WRITE_MS: - queue(c, qpu_a_MOV(qpu_tlbc_ms(), src[0])); - set_last_cond_add(c, qinst->cond); - handled_qinst_cond = true; - break; - case QOP_VARY_ADD_C: queue(c, qpu_a_FADD(dst, src[0], qpu_r5()) | unpack); break; -- 2.30.2