vc4_opt_vpm.c \
vc4_program.c \
vc4_qir.c \
+ vc4_qir_emit_uniform_stream_resets.c \
vc4_qir_live_variables.c \
vc4_qir_lower_uniforms.c \
vc4_qir_schedule.c \
qir_lower_uniforms(c);
qir_schedule_instructions(c);
+ qir_emit_uniform_stream_resets(c);
if (vc4_debug & VC4_DEBUG_QIR) {
fprintf(stderr, "%s prog %d/%d QIR:\n",
[QOP_LOAD_IMM] = { "load_imm", 0, 1 },
[QOP_BRANCH] = { "branch", 0, 0, true },
+ [QOP_UNIFORMS_RESET] = { "uniforms_reset", 0, 2, true },
};
static const char *
* that block->successor[1] may be unset if the condition is ALWAYS.
*/
QOP_BRANCH,
+
+ /* Emits an ADD from src[0] to src[1], where src[0] must be a
+ * QOP_LOAD_IMM result and src[1] is a QUNIFORM_UNIFORMS_ADDRESS,
+ * required by the kernel as part of its branch validation.
+ */
+ QOP_UNIFORMS_RESET,
};
struct queued_qpu_inst {
QUNIFORM_ALPHA_REF,
QUNIFORM_SAMPLE_MASK,
+
+ /* Placeholder uniform that will be updated by the kernel when used by
+ * an instruction writing to QPU_W_UNIFORMS_ADDRESS.
+ */
+ QUNIFORM_UNIFORMS_ADDRESS,
};
struct vc4_varying_slot {
uint32_t data);
void qir_schedule_instructions(struct vc4_compile *c);
void qir_reorder_uniforms(struct vc4_compile *c);
+void qir_emit_uniform_stream_resets(struct vc4_compile *c);
struct qreg qir_emit_def(struct vc4_compile *c, struct qinst *inst);
struct qinst *qir_emit_nondef(struct vc4_compile *c, struct qinst *inst);
--- /dev/null
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * @file vc4_qir_emit_uniform_stream_resets.c
+ *
+ * Adds updates to the uniform stream address at the start of each basic block
+ * that uses uniforms.
+ *
+ * This will be done just before the translation to QPU instructions, once we
+ * have performed optimization know how many uniforms are used in each block.
+ */
+
+#include "vc4_qir.h"
+#include "util/hash_table.h"
+#include "util/u_math.h"
+
+static bool
+inst_reads_a_uniform(struct qinst *inst)
+{
+ if (qir_is_tex(inst))
+ return true;
+
+ for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
+ if (inst->src[i].file == QFILE_UNIF)
+ return true;
+ }
+
+ return false;
+}
+
+static bool
+block_reads_any_uniform(struct qblock *block)
+{
+ qir_for_each_inst(inst, block) {
+ if (inst_reads_a_uniform(inst))
+ return true;
+ }
+
+ return false;
+}
+
+void
+qir_emit_uniform_stream_resets(struct vc4_compile *c)
+{
+ uint32_t uniform_count = 0;
+
+ qir_for_each_block(block, c) {
+ if (block != qir_entry_block(c) &&
+ (block_reads_any_uniform(block) ||
+ block == qir_exit_block(c))) {
+ struct qreg t = qir_get_temp(c);
+ struct qreg uni_addr =
+ qir_uniform(c, QUNIFORM_UNIFORMS_ADDRESS, 0);
+
+ /* Load the offset of the next uniform in the stream
+ * after the one we're generating here.
+ */
+ struct qinst *load_imm =
+ qir_inst(QOP_LOAD_IMM,
+ t,
+ qir_reg(QFILE_LOAD_IMM,
+ (uniform_count + 1) * 4),
+ c->undef);
+ struct qinst *add =
+ qir_inst(QOP_UNIFORMS_RESET, c->undef,
+ t, uni_addr);
+
+ /* Pushes to the top of the block, so in reverse
+ * order.
+ */
+ list_add(&add->link, &block->instructions);
+ list_add(&load_imm->link, &block->instructions);
+ }
+
+ qir_for_each_inst(inst, block) {
+ if (inst_reads_a_uniform(inst))
+ uniform_count++;
+ }
+ }
+}
struct schedule_node *last_tex_coord;
struct schedule_node *last_tex_result;
struct schedule_node *last_tlb;
+ struct schedule_node *last_uniforms_reset;
enum direction dir;
/**
calculate_deps(&state, n);
+ for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
+ switch (inst->src[i].file) {
+ case QFILE_UNIF:
+ add_dep(state.dir, state.last_uniforms_reset, n);
+ break;
+ default:
+ break;
+ }
+ }
+
switch (inst->op) {
case QOP_TEX_S:
case QOP_TEX_T:
memset(&state.tex_fifo[state.tex_fifo_pos], 0,
sizeof(state.tex_fifo[0]));
break;
+
+ case QOP_UNIFORMS_RESET:
+ add_write_dep(state.dir, &state.last_uniforms_reset, n);
+ break;
+
default:
assert(!qir_is_tex(inst));
break;
handled_qinst_cond = true;
break;
+ case QOP_UNIFORMS_RESET:
+ fixup_raddr_conflict(block, dst, &src[0], &src[1],
+ qinst, &unpack);
+
+ queue(block, qpu_a_ADD(qpu_ra(QPU_W_UNIFORMS_ADDRESS),
+ src[0], src[1]));
+ break;
+
default:
assert(qinst->op < ARRAY_SIZE(translate));
assert(translate[qinst->op].op != 0); /* NOPs */
struct schedule_node *last_tmu_write;
struct schedule_node *last_tlb;
struct schedule_node *last_vpm;
+ struct schedule_node *last_uniforms_reset;
enum direction dir;
/* Estimated cycle when the current instruction would start. */
uint32_t time;
break;
case QPU_R_UNIF:
+ add_read_dep(state, state->last_uniforms_reset, n);
+ break;
+
case QPU_R_NOP:
case QPU_R_ELEM_QPU:
case QPU_R_XY_PIXEL_COORD:
}
} else if (is_tmu_write(waddr)) {
add_write_dep(state, &state->last_tmu_write, n);
+ add_read_dep(state, state->last_uniforms_reset, n);
} else if (qpu_waddr_is_tlb(waddr) ||
waddr == QPU_W_MS_FLAGS) {
add_write_dep(state, &state->last_tlb, n);
add_write_dep(state, &state->last_tlb, n);
break;
+ case QPU_W_UNIFORMS_ADDRESS:
+ add_write_dep(state, &state->last_uniforms_reset, n);
+ break;
+
case QPU_W_NOP:
break;
struct choose_scoreboard {
int tick;
int last_sfu_write_tick;
+ int last_uniforms_reset_tick;
uint32_t last_waddr_a, last_waddr_b;
};
}
}
+ if (reads_uniform(inst) &&
+ scoreboard->tick - scoreboard->last_uniforms_reset_tick <= 2) {
+ return true;
+ }
+
return false;
}
(waddr_mul >= QPU_W_SFU_RECIP && waddr_mul <= QPU_W_SFU_LOG)) {
scoreboard->last_sfu_write_tick = scoreboard->tick;
}
+
+ if (waddr_add == QPU_W_UNIFORMS_ADDRESS ||
+ waddr_mul == QPU_W_UNIFORMS_ADDRESS) {
+ scoreboard->last_uniforms_reset_tick = scoreboard->tick;
+ }
}
static void
scoreboard.last_waddr_a = ~0;
scoreboard.last_waddr_b = ~0;
scoreboard.last_sfu_write_tick = -10;
+ scoreboard.last_uniforms_reset_tick = -10;
if (debug) {
fprintf(stderr, "Pre-schedule instructions\n");
case QUNIFORM_SAMPLE_MASK:
cl_aligned_u32(&uniforms, vc4->sample_mask);
break;
+
+ case QUNIFORM_UNIFORMS_ADDRESS:
+ /* This will be filled in by the kernel. */
+ cl_aligned_u32(&uniforms, 0xd0d0d0d0);
+ break;
}
#if 0
uint32_t written_val = *((uint32_t *)uniforms - 1);
for (int i = 0; i < shader->uniforms.count; i++) {
switch (shader->uniforms.contents[i]) {
case QUNIFORM_CONSTANT:
+ case QUNIFORM_UNIFORMS_ADDRESS:
break;
case QUNIFORM_UNIFORM:
case QUNIFORM_UBO_ADDR: