From: Eric Anholt Date: Fri, 6 May 2016 01:11:04 +0000 (-0700) Subject: vc4: Emit resets of the uniform stream at the starts of blocks. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=9194473dd260fe72042807a97be0072c6f0537da;p=mesa.git vc4: Emit resets of the uniform stream at the starts of blocks. If a block might be entered from multiple locations, then the uniform stream will (probably) be at different points, and we need to make sure that it's pointing where we expect it to be. The kernel also enforces that any block reading a uniform resets uniforms, to prevent reading outside of the uniform stream by using looping. --- diff --git a/src/gallium/drivers/vc4/Makefile.sources b/src/gallium/drivers/vc4/Makefile.sources index 76e46f54089..76e52ce142d 100644 --- a/src/gallium/drivers/vc4/Makefile.sources +++ b/src/gallium/drivers/vc4/Makefile.sources @@ -31,6 +31,7 @@ C_SOURCES := \ vc4_opt_vpm.c \ vc4_program.c \ vc4_qir.c \ + vc4_qir_emit_uniform_stream_resets.c \ vc4_qir_live_variables.c \ vc4_qir_lower_uniforms.c \ vc4_qir_schedule.c \ diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 465e052053e..521f971272a 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -2114,6 +2114,7 @@ vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage, qir_lower_uniforms(c); qir_schedule_instructions(c); + qir_emit_uniform_stream_resets(c); if (vc4_debug & VC4_DEBUG_QIR) { fprintf(stderr, "%s prog %d/%d QIR:\n", diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c index e1d663dd3a7..9ff15611ef9 100644 --- a/src/gallium/drivers/vc4/vc4_qir.c +++ b/src/gallium/drivers/vc4/vc4_qir.c @@ -85,6 +85,7 @@ static const struct qir_op_info qir_op_info[] = { [QOP_LOAD_IMM] = { "load_imm", 0, 1 }, [QOP_BRANCH] = { "branch", 0, 0, true }, + [QOP_UNIFORMS_RESET] = { "uniforms_reset", 0, 2, true }, }; static const char * diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h index e7ddfaa1fcb..88eda225d80 100644 --- a/src/gallium/drivers/vc4/vc4_qir.h +++ b/src/gallium/drivers/vc4/vc4_qir.h @@ -162,6 +162,12 @@ enum qop { * that block->successor[1] may be unset if the condition is ALWAYS. */ QOP_BRANCH, + + /* Emits an ADD from src[0] to src[1], where src[0] must be a + * QOP_LOAD_IMM result and src[1] is a QUNIFORM_UNIFORMS_ADDRESS, + * required by the kernel as part of its branch validation. + */ + QOP_UNIFORMS_RESET, }; struct queued_qpu_inst { @@ -260,6 +266,11 @@ enum quniform_contents { QUNIFORM_ALPHA_REF, QUNIFORM_SAMPLE_MASK, + + /* Placeholder uniform that will be updated by the kernel when used by + * an instruction writing to QPU_W_UNIFORMS_ADDRESS. + */ + QUNIFORM_UNIFORMS_ADDRESS, }; struct vc4_varying_slot { @@ -521,6 +532,7 @@ struct qreg qir_uniform(struct vc4_compile *c, uint32_t data); void qir_schedule_instructions(struct vc4_compile *c); void qir_reorder_uniforms(struct vc4_compile *c); +void qir_emit_uniform_stream_resets(struct vc4_compile *c); struct qreg qir_emit_def(struct vc4_compile *c, struct qinst *inst); struct qinst *qir_emit_nondef(struct vc4_compile *c, struct qinst *inst); diff --git a/src/gallium/drivers/vc4/vc4_qir_emit_uniform_stream_resets.c b/src/gallium/drivers/vc4/vc4_qir_emit_uniform_stream_resets.c new file mode 100644 index 00000000000..3fd6358e3d3 --- /dev/null +++ b/src/gallium/drivers/vc4/vc4_qir_emit_uniform_stream_resets.c @@ -0,0 +1,101 @@ +/* + * Copyright © 2014 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** + * @file vc4_qir_emit_uniform_stream_resets.c + * + * Adds updates to the uniform stream address at the start of each basic block + * that uses uniforms. + * + * This will be done just before the translation to QPU instructions, once we + * have performed optimization know how many uniforms are used in each block. + */ + +#include "vc4_qir.h" +#include "util/hash_table.h" +#include "util/u_math.h" + +static bool +inst_reads_a_uniform(struct qinst *inst) +{ + if (qir_is_tex(inst)) + return true; + + for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) { + if (inst->src[i].file == QFILE_UNIF) + return true; + } + + return false; +} + +static bool +block_reads_any_uniform(struct qblock *block) +{ + qir_for_each_inst(inst, block) { + if (inst_reads_a_uniform(inst)) + return true; + } + + return false; +} + +void +qir_emit_uniform_stream_resets(struct vc4_compile *c) +{ + uint32_t uniform_count = 0; + + qir_for_each_block(block, c) { + if (block != qir_entry_block(c) && + (block_reads_any_uniform(block) || + block == qir_exit_block(c))) { + struct qreg t = qir_get_temp(c); + struct qreg uni_addr = + qir_uniform(c, QUNIFORM_UNIFORMS_ADDRESS, 0); + + /* Load the offset of the next uniform in the stream + * after the one we're generating here. + */ + struct qinst *load_imm = + qir_inst(QOP_LOAD_IMM, + t, + qir_reg(QFILE_LOAD_IMM, + (uniform_count + 1) * 4), + c->undef); + struct qinst *add = + qir_inst(QOP_UNIFORMS_RESET, c->undef, + t, uni_addr); + + /* Pushes to the top of the block, so in reverse + * order. + */ + list_add(&add->link, &block->instructions); + list_add(&load_imm->link, &block->instructions); + } + + qir_for_each_inst(inst, block) { + if (inst_reads_a_uniform(inst)) + uniform_count++; + } + } +} diff --git a/src/gallium/drivers/vc4/vc4_qir_schedule.c b/src/gallium/drivers/vc4/vc4_qir_schedule.c index 903c6108824..69bd0dd623e 100644 --- a/src/gallium/drivers/vc4/vc4_qir_schedule.c +++ b/src/gallium/drivers/vc4/vc4_qir_schedule.c @@ -138,6 +138,7 @@ struct schedule_setup_state { struct schedule_node *last_tex_coord; struct schedule_node *last_tex_result; struct schedule_node *last_tlb; + struct schedule_node *last_uniforms_reset; enum direction dir; /** @@ -280,6 +281,16 @@ calculate_forward_deps(struct vc4_compile *c, void *mem_ctx, calculate_deps(&state, n); + for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) { + switch (inst->src[i].file) { + case QFILE_UNIF: + add_dep(state.dir, state.last_uniforms_reset, n); + break; + default: + break; + } + } + switch (inst->op) { case QOP_TEX_S: case QOP_TEX_T: @@ -324,6 +335,11 @@ calculate_forward_deps(struct vc4_compile *c, void *mem_ctx, memset(&state.tex_fifo[state.tex_fifo_pos], 0, sizeof(state.tex_fifo[0])); break; + + case QOP_UNIFORMS_RESET: + add_write_dep(state.dir, &state.last_uniforms_reset, n); + break; + default: assert(!qir_is_tex(inst)); break; diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c index 9001643507e..6a10e1b68de 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_emit.c +++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c @@ -427,6 +427,14 @@ vc4_generate_code_block(struct vc4_compile *c, handled_qinst_cond = true; break; + case QOP_UNIFORMS_RESET: + fixup_raddr_conflict(block, dst, &src[0], &src[1], + qinst, &unpack); + + queue(block, qpu_a_ADD(qpu_ra(QPU_W_UNIFORMS_ADDRESS), + src[0], src[1])); + break; + default: assert(qinst->op < ARRAY_SIZE(translate)); assert(translate[qinst->op].op != 0); /* NOPs */ diff --git a/src/gallium/drivers/vc4/vc4_qpu_schedule.c b/src/gallium/drivers/vc4/vc4_qpu_schedule.c index a55b0351402..1caee51a581 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_schedule.c +++ b/src/gallium/drivers/vc4/vc4_qpu_schedule.c @@ -92,6 +92,7 @@ struct schedule_state { struct schedule_node *last_tmu_write; struct schedule_node *last_tlb; struct schedule_node *last_vpm; + struct schedule_node *last_uniforms_reset; enum direction dir; /* Estimated cycle when the current instruction would start. */ uint32_t time; @@ -184,6 +185,9 @@ process_raddr_deps(struct schedule_state *state, struct schedule_node *n, break; case QPU_R_UNIF: + add_read_dep(state, state->last_uniforms_reset, n); + break; + case QPU_R_NOP: case QPU_R_ELEM_QPU: case QPU_R_XY_PIXEL_COORD: @@ -259,6 +263,7 @@ process_waddr_deps(struct schedule_state *state, struct schedule_node *n, } } else if (is_tmu_write(waddr)) { add_write_dep(state, &state->last_tmu_write, n); + add_read_dep(state, state->last_uniforms_reset, n); } else if (qpu_waddr_is_tlb(waddr) || waddr == QPU_W_MS_FLAGS) { add_write_dep(state, &state->last_tlb, n); @@ -305,6 +310,10 @@ process_waddr_deps(struct schedule_state *state, struct schedule_node *n, add_write_dep(state, &state->last_tlb, n); break; + case QPU_W_UNIFORMS_ADDRESS: + add_write_dep(state, &state->last_uniforms_reset, n); + break; + case QPU_W_NOP: break; @@ -442,6 +451,7 @@ calculate_reverse_deps(struct vc4_compile *c, struct list_head *schedule_list) struct choose_scoreboard { int tick; int last_sfu_write_tick; + int last_uniforms_reset_tick; uint32_t last_waddr_a, last_waddr_b; }; @@ -476,6 +486,11 @@ reads_too_soon_after_write(struct choose_scoreboard *scoreboard, uint64_t inst) } } + if (reads_uniform(inst) && + scoreboard->tick - scoreboard->last_uniforms_reset_tick <= 2) { + return true; + } + return false; } @@ -614,6 +629,11 @@ update_scoreboard_for_chosen(struct choose_scoreboard *scoreboard, (waddr_mul >= QPU_W_SFU_RECIP && waddr_mul <= QPU_W_SFU_LOG)) { scoreboard->last_sfu_write_tick = scoreboard->tick; } + + if (waddr_add == QPU_W_UNIFORMS_ADDRESS || + waddr_mul == QPU_W_UNIFORMS_ADDRESS) { + scoreboard->last_uniforms_reset_tick = scoreboard->tick; + } } static void @@ -971,6 +991,7 @@ qpu_schedule_instructions(struct vc4_compile *c) scoreboard.last_waddr_a = ~0; scoreboard.last_waddr_b = ~0; scoreboard.last_sfu_write_tick = -10; + scoreboard.last_uniforms_reset_tick = -10; if (debug) { fprintf(stderr, "Pre-schedule instructions\n"); diff --git a/src/gallium/drivers/vc4/vc4_uniforms.c b/src/gallium/drivers/vc4/vc4_uniforms.c index 4715a7fffd5..ee21771dd89 100644 --- a/src/gallium/drivers/vc4/vc4_uniforms.c +++ b/src/gallium/drivers/vc4/vc4_uniforms.c @@ -324,6 +324,11 @@ vc4_write_uniforms(struct vc4_context *vc4, struct vc4_compiled_shader *shader, case QUNIFORM_SAMPLE_MASK: cl_aligned_u32(&uniforms, vc4->sample_mask); break; + + case QUNIFORM_UNIFORMS_ADDRESS: + /* This will be filled in by the kernel. */ + cl_aligned_u32(&uniforms, 0xd0d0d0d0); + break; } #if 0 uint32_t written_val = *((uint32_t *)uniforms - 1); @@ -345,6 +350,7 @@ vc4_set_shader_uniform_dirty_flags(struct vc4_compiled_shader *shader) for (int i = 0; i < shader->uniforms.count; i++) { switch (shader->uniforms.contents[i]) { case QUNIFORM_CONSTANT: + case QUNIFORM_UNIFORMS_ADDRESS: break; case QUNIFORM_UNIFORM: case QUNIFORM_UBO_ADDR: