vc4_opt_copy_propagation.c \
vc4_opt_cse.c \
vc4_opt_dead_code.c \
+ vc4_opt_small_immediates.c \
vc4_packet.h \
vc4_program.c \
vc4_qir.c \
int tmu = waddr > QPU_W_TMU0_B;
bool submit = is_tmu_submit(waddr);
bool is_direct = submit && validation_state->tmu_write_count[tmu] == 0;
+ uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
if (is_direct) {
uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A);
uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B);
uint32_t clamp_offset = ~0;
+ if (sig == QPU_SIG_SMALL_IMM) {
+ DRM_ERROR("direct TMU read used small immediate\n");
+ return false;
+ }
+
/* Make sure that this texture load is an add of the base
* address of the UBO to a clamped offset within the UBO.
*/
validation_state->tmu_setup[tmu].is_direct = true;
} else {
- if (raddr_a == QPU_R_UNIF || raddr_b == QPU_R_UNIF) {
+ if (raddr_a == QPU_R_UNIF || (sig != QPU_SIG_SMALL_IMM &&
+ raddr_b == QPU_R_UNIF)) {
DRM_ERROR("uniform read in the same instruction as "
"texture setup.\n");
return false;
uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B);
uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
+ uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
bool is_b = inst & QPU_WS;
uint32_t live_reg_index;
return;
if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) &&
- !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF)) {
+ !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF &&
+ sig != QPU_SIG_SMALL_IMM)) {
return;
}
{
uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
+ uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
if (raddr_a == QPU_R_UNIF ||
- raddr_b == QPU_R_UNIF) {
+ (raddr_b == QPU_R_UNIF && sig != QPU_SIG_SMALL_IMM)) {
/* This can't overflow the uint32_t, because we're reading 8
* bytes of instruction to increment by 4 here, so we'd
* already be OOM.
case QPU_SIG_LOAD_TMU0:
case QPU_SIG_LOAD_TMU1:
case QPU_SIG_PROG_END:
+ case QPU_SIG_SMALL_IMM:
if (!check_instruction_writes(inst, validated_shader,
&validation_state)) {
DRM_ERROR("Bad write at ip %d\n", ip);
fprintf(stderr, "\n");
}
+static bool
+is_constant_value(struct vc4_compile *c, struct qinst **defs, struct qreg reg,
+ uint32_t val)
+{
+ if (reg.file == QFILE_UNIF &&
+ c->uniform_contents[reg.index] == QUNIFORM_CONSTANT &&
+ c->uniform_data[reg.index] == val) {
+ return true;
+ }
+
+ if (reg.file == QFILE_SMALL_IMM && reg.index == val)
+ return true;
+
+ return false;
+}
+
static bool
is_zero(struct vc4_compile *c, struct qinst **defs, struct qreg reg)
{
reg = qir_follow_movs(defs, reg);
-
- return (reg.file == QFILE_UNIF &&
- c->uniform_contents[reg.index] == QUNIFORM_CONSTANT &&
- c->uniform_data[reg.index] == 0);
+ return is_constant_value(c, defs, reg, 0);
}
static bool
is_1f(struct vc4_compile *c, struct qinst **defs, struct qreg reg)
{
reg = qir_follow_movs(defs, reg);
-
- return (reg.file == QFILE_UNIF &&
- c->uniform_contents[reg.index] == QUNIFORM_CONSTANT &&
- c->uniform_data[reg.index] == fui(1.0));
+ return is_constant_value(c, defs, reg, fui(1.0));
}
static void
--- /dev/null
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * @file vc4_opt_small_immediates.c
+ *
+ * Turns references to small constant uniform values into small immediates
+ * fields.
+ */
+
+#include "vc4_qir.h"
+#include "vc4_qpu.h"
+
+static bool debug;
+
+bool
+qir_opt_small_immediates(struct vc4_compile *c)
+{
+ bool progress = false;
+ struct simple_node *node;
+ struct qinst *defs[c->num_temps];
+
+ foreach(node, &c->instructions) {
+ struct qinst *inst = (struct qinst *)node;
+
+ if (inst->dst.file == QFILE_TEMP)
+ defs[inst->dst.index] = inst;
+
+ /* The small immediate value sits in the raddr B field, so we
+ * can't have 2 small immediates in one instruction (unless
+ * they're the same value, but that should be optimized away
+ * elsewhere).
+ */
+ bool uses_small_imm = false;
+ for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
+ if (inst->src[i].file == QFILE_SMALL_IMM)
+ uses_small_imm = true;
+ }
+ if (uses_small_imm)
+ continue;
+
+ for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
+ struct qreg src = qir_follow_movs(defs, inst->src[i]);
+
+ if (src.file != QFILE_UNIF ||
+ c->uniform_contents[src.index] !=
+ QUNIFORM_CONSTANT) {
+ continue;
+ }
+
+ if (i == 1 &&
+ (inst->op == QOP_TEX_S ||
+ inst->op == QOP_TEX_T ||
+ inst->op == QOP_TEX_R ||
+ inst->op == QOP_TEX_B)) {
+ /* No turning the implicit uniform read into
+ * an immediate.
+ */
+ continue;
+ }
+
+ uint32_t imm = c->uniform_data[src.index];
+ uint32_t small_imm = qpu_encode_small_immediate(imm);
+ if (small_imm == ~0)
+ continue;
+
+ if (debug) {
+ fprintf(stderr, "opt_small_immediate() from: ");
+ qir_dump_inst(c, inst);
+ fprintf(stderr, "\n");
+ }
+ inst->src[i].file = QFILE_SMALL_IMM;
+ inst->src[i].index = imm;
+ if (debug) {
+ fprintf(stderr, "to: ");
+ qir_dump_inst(c, inst);
+ fprintf(stderr, "\n");
+ }
+ progress = true;
+ break;
+ }
+ }
+
+ return progress;
+}
static void
qir_print_reg(struct vc4_compile *c, struct qreg reg)
{
- const char *files[] = {
+ static const char *files[] = {
[QFILE_TEMP] = "t",
[QFILE_VARY] = "v",
[QFILE_UNIF] = "u",
};
- if (reg.file == QFILE_NULL)
+ if (reg.file == QFILE_NULL) {
fprintf(stderr, "null");
- else
+ } else if (reg.file == QFILE_SMALL_IMM) {
+ if ((int)reg.index >= -16 && (int)reg.index <= 15)
+ fprintf(stderr, "%d", reg.index);
+ else
+ fprintf(stderr, "%f", uif(reg.index));
+ } else {
fprintf(stderr, "%s%d", files[reg.file], reg.index);
+ }
if (reg.file == QFILE_UNIF &&
c->uniform_contents[reg.index] == QUNIFORM_CONSTANT) {
OPTPASS(qir_opt_cse);
OPTPASS(qir_opt_copy_propagation);
OPTPASS(qir_opt_dead_code);
+ OPTPASS(qir_opt_small_immediates);
if (!progress)
break;
QFILE_TEMP,
QFILE_VARY,
QFILE_UNIF,
+
+ /**
+ * Stores an immediate value in the index field that can be turned
+ * into a small immediate field by qpu_encode_small_immediate().
+ */
+ QFILE_SMALL_IMM,
};
struct qreg {
bool qir_opt_copy_propagation(struct vc4_compile *c);
bool qir_opt_cse(struct vc4_compile *c);
bool qir_opt_dead_code(struct vc4_compile *c);
+bool qir_opt_small_immediates(struct vc4_compile *c);
void qpu_schedule_instructions(struct vc4_compile *c);
#include "vc4_qir.h"
#include "vc4_qpu.h"
+#define QPU_MUX(mux, muxfield) \
+ QPU_SET_FIELD(mux != QPU_MUX_SMALL_IMM ? mux : QPU_MUX_B, muxfield)
+
static uint64_t
set_src_raddr(uint64_t inst, struct qpu_reg src)
{
}
if (src.mux == QPU_MUX_B) {
- assert(QPU_GET_FIELD(inst, QPU_RADDR_B) == QPU_R_NOP ||
- QPU_GET_FIELD(inst, QPU_RADDR_B) == src.addr);
+ assert((QPU_GET_FIELD(inst, QPU_RADDR_B) == QPU_R_NOP ||
+ QPU_GET_FIELD(inst, QPU_RADDR_B) == src.addr) &&
+ QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_SMALL_IMM);
return QPU_UPDATE_FIELD(inst, src.addr, QPU_RADDR_B);
}
+ if (src.mux == QPU_MUX_SMALL_IMM) {
+ if (QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_SMALL_IMM) {
+ assert(QPU_GET_FIELD(inst, QPU_RADDR_B) == src.addr);
+ } else {
+ inst = qpu_set_sig(inst, QPU_SIG_SMALL_IMM);
+ assert(QPU_GET_FIELD(inst, QPU_RADDR_B) == QPU_R_NOP);
+ }
+ return ((inst & ~QPU_RADDR_B_MASK) |
+ QPU_SET_FIELD(src.addr, QPU_RADDR_B));
+ }
+
return inst;
}
{
uint64_t inst = 0;
+ inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);
inst |= QPU_SET_FIELD(QPU_A_OR, QPU_OP_ADD);
inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);
inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B);
inst |= qpu_a_dst(dst);
inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_ADD);
- inst |= QPU_SET_FIELD(src.mux, QPU_ADD_A);
- inst |= QPU_SET_FIELD(src.mux, QPU_ADD_B);
+ inst |= QPU_MUX(src.mux, QPU_ADD_A);
+ inst |= QPU_MUX(src.mux, QPU_ADD_B);
inst = set_src_raddr(inst, src);
- inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);
inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL);
return inst;
{
uint64_t inst = 0;
+ inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);
inst |= QPU_SET_FIELD(QPU_M_V8MIN, QPU_OP_MUL);
inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);
inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B);
inst |= qpu_m_dst(dst);
inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_MUL);
- inst |= QPU_SET_FIELD(src.mux, QPU_MUL_A);
- inst |= QPU_SET_FIELD(src.mux, QPU_MUL_B);
+ inst |= QPU_MUX(src.mux, QPU_MUL_A);
+ inst |= QPU_MUX(src.mux, QPU_MUL_B);
inst = set_src_raddr(inst, src);
- inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);
inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD);
return inst;
{
uint64_t inst = 0;
+ inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);
inst |= QPU_SET_FIELD(op, QPU_OP_ADD);
inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);
inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B);
inst |= qpu_a_dst(dst);
inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_ADD);
- inst |= QPU_SET_FIELD(src0.mux, QPU_ADD_A);
+ inst |= QPU_MUX(src0.mux, QPU_ADD_A);
inst = set_src_raddr(inst, src0);
- inst |= QPU_SET_FIELD(src1.mux, QPU_ADD_B);
+ inst |= QPU_MUX(src1.mux, QPU_ADD_B);
inst = set_src_raddr(inst, src1);
- inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);
inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL);
return inst;
{
uint64_t inst = 0;
+ inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);
inst |= QPU_SET_FIELD(op, QPU_OP_MUL);
inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);
inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B);
inst |= qpu_m_dst(dst);
inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_MUL);
- inst |= QPU_SET_FIELD(src0.mux, QPU_MUL_A);
+ inst |= QPU_MUX(src0.mux, QPU_MUL_A);
inst = set_src_raddr(inst, src0);
- inst |= QPU_SET_FIELD(src1.mux, QPU_MUL_B);
+ inst |= QPU_MUX(src1.mux, QPU_MUL_B);
inst = set_src_raddr(inst, src1);
- inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);
inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD);
return inst;
if (raddr_a == QPU_R_MUTEX_ACQUIRE)
accesses++;
- if (raddr_b == QPU_R_MUTEX_ACQUIRE)
+ if (raddr_b == QPU_R_MUTEX_ACQUIRE &&
+ QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_SMALL_IMM)
accesses++;
/* XXX: semaphore, combined color read/write? */
{
uint64_t merge = a | b;
bool ok = true;
+ uint32_t a_sig = QPU_GET_FIELD(a, QPU_SIG);
+ uint32_t b_sig = QPU_GET_FIELD(b, QPU_SIG);
if (QPU_GET_FIELD(a, QPU_OP_ADD) != QPU_A_NOP &&
QPU_GET_FIELD(b, QPU_OP_ADD) != QPU_A_NOP) {
if (qpu_num_sf_accesses(a) && qpu_num_sf_accesses(b))
return 0;
- if (QPU_GET_FIELD(a, QPU_SIG) == QPU_SIG_LOAD_IMM ||
- QPU_GET_FIELD(b, QPU_SIG) == QPU_SIG_LOAD_IMM) {
+ if (a_sig == QPU_SIG_LOAD_IMM ||
+ b_sig == QPU_SIG_LOAD_IMM ||
+ a_sig == QPU_SIG_SMALL_IMM ||
+ b_sig == QPU_SIG_SMALL_IMM) {
return 0;
}
sig == QPU_SIG_WAIT_FOR_SCOREBOARD);
}
+/**
+ * Returns the small immediate value to be encoded in to the raddr b field if
+ * the argument can be represented as one, or ~0 otherwise.
+ */
+uint32_t
+qpu_encode_small_immediate(uint32_t i)
+{
+ if (i <= 15)
+ return i;
+ if ((int)i < 0 && (int)i >= -16)
+ return i + 32;
+
+ switch (i) {
+ case 0x3f800000:
+ return 32;
+ case 0x40000000:
+ return 33;
+ case 0x40800000:
+ return 34;
+ case 0x41000000:
+ return 35;
+ case 0x41800000:
+ return 36;
+ case 0x42000000:
+ return 37;
+ case 0x42800000:
+ return 38;
+ case 0x43000000:
+ return 39;
+ case 0x3b800000:
+ return 40;
+ case 0x3c000000:
+ return 41;
+ case 0x3c800000:
+ return 42;
+ case 0x3d000000:
+ return 43;
+ case 0x3d800000:
+ return 44;
+ case 0x3e000000:
+ return 45;
+ case 0x3e800000:
+ return 46;
+ case 0x3f000000:
+ return 47;
+ }
+
+ return ~0;
+}
+
void
qpu_serialize_one_inst(struct vc4_compile *c, uint64_t inst)
{
uint64_t qpu_set_sig(uint64_t inst, uint32_t sig);
uint64_t qpu_set_cond_add(uint64_t inst, uint32_t cond);
uint64_t qpu_set_cond_mul(uint64_t inst, uint32_t cond);
+uint32_t qpu_encode_small_immediate(uint32_t i);
bool qpu_waddr_is_tlb(uint32_t waddr);
bool qpu_inst_is_tlb(uint64_t inst);
QPU_MUX_A,
QPU_MUX_B,
- /* non-hardware mux values */
- QPU_MUX_IMM,
+ /**
+ * Non-hardware mux value, stores a small immediate field to be
+ * programmed into raddr_b in the qpu_reg.index.
+ */
+ QPU_MUX_SMALL_IMM,
};
enum qpu_cond {
else if (si <= 39)
fprintf(stderr, "%.1f", (float)(1 << (si - 32)));
else if (si <= 47)
- fprintf(stderr, "%f", 1.0f / (256 / (si - 39)));
+ fprintf(stderr, "%f", 1.0f / (1 << (48 - si)));
else
- fprintf(stderr, "???");
+ fprintf(stderr, "<bad imm %d>", si);
} else if (raddr <= 31)
fprintf(stderr, "r%s%d", file, raddr);
else {
switch (src->addr) {
case QPU_R_UNIF:
case QPU_R_VARY:
- if (src->mux == QPU_MUX_A)
- src->mux = QPU_MUX_B;
- else
- src->mux = QPU_MUX_A;
- return true;
+ if (src->mux == QPU_MUX_SMALL_IMM) {
+ return false;
+ } else {
+ if (src->mux == QPU_MUX_A)
+ src->mux = QPU_MUX_B;
+ else
+ src->mux = QPU_MUX_A;
+ return true;
+ }
default:
return false;
struct qpu_reg *src0, struct qpu_reg *src1,
bool r3_live)
{
- if ((src0->mux != QPU_MUX_A && src0->mux != QPU_MUX_B) ||
- src0->mux != src1->mux ||
- src0->addr == src1->addr) {
+ uint32_t mux0 = src0->mux == QPU_MUX_SMALL_IMM ? QPU_MUX_B : src0->mux;
+ uint32_t mux1 = src1->mux == QPU_MUX_SMALL_IMM ? QPU_MUX_B : src1->mux;
+
+ if (mux0 <= QPU_MUX_R5 ||
+ mux0 != mux1 ||
+ (src0->addr == src1->addr &&
+ src0->mux == src1->mux)) {
return false;
}
if (swap_file(src0) || swap_file(src1))
return false;
- if (src0->mux == QPU_MUX_A) {
+ if (mux0 == QPU_MUX_A) {
/* If we're conflicting over the A regfile, then we can just
* use the reserved rb31.
*/
case QFILE_VARY:
src[i] = qpu_vary();
break;
+ case QFILE_SMALL_IMM:
+ src[i].mux = QPU_MUX_SMALL_IMM;
+ src[i].addr = qpu_encode_small_immediate(qinst->src[i].index);
+ /* This should only have returned a valid
+ * small immediate field, not ~0 for failure.
+ */
+ assert(src[i].addr <= 47);
+ break;
}
}
break;
case QFILE_VARY:
case QFILE_UNIF:
+ case QFILE_SMALL_IMM:
assert(!"not reached");
break;
}
return false;
return (QPU_GET_FIELD(inst, QPU_RADDR_A) == QPU_R_UNIF ||
- QPU_GET_FIELD(inst, QPU_RADDR_B) == QPU_R_UNIF ||
+ (QPU_GET_FIELD(inst, QPU_RADDR_B) == QPU_R_UNIF &&
+ QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_SMALL_IMM) ||
is_tmu_write(QPU_GET_FIELD(inst, QPU_WADDR_ADD)) ||
is_tmu_write(QPU_GET_FIELD(inst, QPU_WADDR_MUL)));
}
if (sig != QPU_SIG_LOAD_IMM) {
process_raddr_deps(state, n, raddr_a, true);
- process_raddr_deps(state, n, raddr_b, false);
+ if (sig != QPU_SIG_SMALL_IMM)
+ process_raddr_deps(state, n, raddr_b, false);
}
if (add_op != QPU_A_NOP) {
{
uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
+ uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
uint32_t src_muxes[] = {
QPU_GET_FIELD(inst, QPU_ADD_A),
QPU_GET_FIELD(inst, QPU_ADD_B),
raddr_a < 32 &&
scoreboard->last_waddr_a == raddr_a) ||
(src_muxes[i] == QPU_MUX_B &&
+ sig != QPU_SIG_SMALL_IMM &&
raddr_b < 32 &&
scoreboard->last_waddr_b == raddr_b)) {
return true;
return true;
if (!ignore_b &&
+ QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_SMALL_IMM &&
src_regs[i].mux == QPU_MUX_B &&
(QPU_GET_FIELD(inst, QPU_RADDR_B) == r))
return true;