From dfee62eed3cacbf77ca3168143be6577849c998d Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 3 Jan 2018 21:42:33 -0800 Subject: [PATCH] broadcom/vc5: Add support for V3Dv4 signal bits. The WRTMUC replaces the implicit uniform loads in the first two texture instructions. LDVPM disappears in favor of an ALU op. LDVARY, LDTMU, LDTLB, and LDUNIF*RF now write to arbitrary registers, which required passing the devinfo through to a few more functions. --- src/broadcom/compiler/qpu_schedule.c | 41 ++++-- src/broadcom/compiler/qpu_validate.c | 6 +- src/broadcom/compiler/v3d_compiler.h | 4 +- src/broadcom/compiler/vir.c | 14 +- src/broadcom/compiler/vir_dump.c | 45 ++++++- src/broadcom/compiler/vir_register_allocate.c | 4 +- src/broadcom/compiler/vir_to_qpu.c | 9 +- src/broadcom/qpu/qpu_disasm.c | 53 +++++++- src/broadcom/qpu/qpu_instr.c | 42 +++++- src/broadcom/qpu/qpu_instr.h | 16 ++- src/broadcom/qpu/qpu_pack.c | 126 ++++++++++++++++-- src/broadcom/qpu/tests/qpu_disasm.c | 7 + 12 files changed, 322 insertions(+), 45 deletions(-) diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c index 799da805906..7fe46202636 100644 --- a/src/broadcom/compiler/qpu_schedule.c +++ b/src/broadcom/compiler/qpu_schedule.c @@ -78,6 +78,7 @@ struct schedule_node_child { enum direction { F, R }; struct schedule_state { + const struct v3d_device_info *devinfo; struct schedule_node *last_r[6]; struct schedule_node *last_rf[64]; struct schedule_node *last_sf; @@ -265,6 +266,7 @@ process_uf_deps(struct schedule_state *state, struct schedule_node *n, static void calculate_deps(struct schedule_state *state, struct schedule_node *n) { + const struct v3d_device_info *devinfo = state->devinfo; struct qinst *qinst = n->inst; struct v3d_qpu_instr *inst = &qinst->qpu; @@ -356,12 +358,16 @@ calculate_deps(struct schedule_state *state, struct schedule_node *n) process_waddr_deps(state, n, inst->alu.mul.waddr, inst->alu.mul.magic_write); } + if (v3d_qpu_sig_writes_address(devinfo, &inst->sig)) { + process_waddr_deps(state, n, inst->sig_addr, + inst->sig_magic); + } - if (v3d_qpu_writes_r3(inst)) + if (v3d_qpu_writes_r3(devinfo, inst)) add_write_dep(state, &state->last_r[3], n); - if (v3d_qpu_writes_r4(inst)) + if (v3d_qpu_writes_r4(devinfo, inst)) add_write_dep(state, &state->last_r[4], n); - if (v3d_qpu_writes_r5(inst)) + if (v3d_qpu_writes_r5(devinfo, inst)) add_write_dep(state, &state->last_r[5], n); if (inst->sig.thrsw) { @@ -410,6 +416,7 @@ calculate_forward_deps(struct v3d_compile *c, struct list_head *schedule_list) struct schedule_state state; memset(&state, 0, sizeof(state)); + state.devinfo = c->devinfo; state.dir = F; list_for_each_entry(struct schedule_node, node, schedule_list, link) @@ -423,6 +430,7 @@ calculate_reverse_deps(struct v3d_compile *c, struct list_head *schedule_list) struct schedule_state state; memset(&state, 0, sizeof(state)); + state.devinfo = c->devinfo; state.dir = R; for (node = schedule_list->prev; schedule_list != node; node = node->prev) { @@ -514,7 +522,8 @@ reads_too_soon_after_write(struct choose_scoreboard *scoreboard, } static bool -writes_too_soon_after_write(struct choose_scoreboard *scoreboard, +writes_too_soon_after_write(const struct v3d_device_info *devinfo, + struct choose_scoreboard *scoreboard, struct qinst *qinst) { const struct v3d_qpu_instr *inst = &qinst->qpu; @@ -524,7 +533,7 @@ writes_too_soon_after_write(struct choose_scoreboard *scoreboard, * occur if a dead SFU computation makes it to scheduling. */ if (scoreboard->tick - scoreboard->last_sfu_write_tick < 2 && - v3d_qpu_writes_r4(inst)) + v3d_qpu_writes_r4(devinfo, inst)) return true; return false; @@ -605,7 +614,8 @@ qpu_accesses_peripheral(const struct v3d_qpu_instr *inst) return (inst->sig.ldvpm || inst->sig.ldtmu || inst->sig.ldtlb || - inst->sig.ldtlbu); + inst->sig.ldtlbu || + inst->sig.wrtmuc); } static bool @@ -619,7 +629,11 @@ qpu_merge_inst(const struct v3d_device_info *devinfo, return false; } - /* Can't do more than one peripheral access in an instruction. */ + /* Can't do more than one peripheral access in an instruction. + * + * XXX: V3D 4.1 allows TMU read along with a VPM read or write, and + * WRTMUC with a TMU magic register write (other than tmuc). + */ if (qpu_accesses_peripheral(a) && qpu_accesses_peripheral(b)) return false; @@ -663,6 +677,9 @@ qpu_merge_inst(const struct v3d_device_info *devinfo, merge.sig.thrsw |= b->sig.thrsw; merge.sig.ldunif |= b->sig.ldunif; + merge.sig.ldunifrf |= b->sig.ldunifrf; + merge.sig.ldunifa |= b->sig.ldunifa; + merge.sig.ldunifarf |= b->sig.ldunifarf; merge.sig.ldtmu |= b->sig.ldtmu; merge.sig.ldvary |= b->sig.ldvary; merge.sig.ldvpm |= b->sig.ldvpm; @@ -673,6 +690,12 @@ qpu_merge_inst(const struct v3d_device_info *devinfo, merge.sig.rotate |= b->sig.rotate; merge.sig.wrtmuc |= b->sig.wrtmuc; + if (v3d_qpu_sig_writes_address(devinfo, &a->sig) && + v3d_qpu_sig_writes_address(devinfo, &b->sig)) + return false; + merge.sig_addr |= b->sig_addr; + merge.sig_magic |= b->sig_magic; + uint64_t packed; bool ok = v3d_qpu_instr_pack(devinfo, &merge, &packed); @@ -719,7 +742,7 @@ choose_instruction_to_schedule(const struct v3d_device_info *devinfo, if (reads_too_soon_after_write(scoreboard, n->inst)) continue; - if (writes_too_soon_after_write(scoreboard, n->inst)) + if (writes_too_soon_after_write(devinfo, scoreboard, n->inst)) continue; /* "A scoreboard wait must not occur in the first two @@ -735,7 +758,7 @@ choose_instruction_to_schedule(const struct v3d_device_info *devinfo, * otherwise get scheduled so ldunif and ldvary try to update * r5 in the same tick. */ - if (inst->sig.ldunif && + if ((inst->sig.ldunif || inst->sig.ldunifa) && scoreboard->tick == scoreboard->last_ldvary_tick + 1) { continue; } diff --git a/src/broadcom/compiler/qpu_validate.c b/src/broadcom/compiler/qpu_validate.c index d99d76a8beb..3b2c10eabc6 100644 --- a/src/broadcom/compiler/qpu_validate.c +++ b/src/broadcom/compiler/qpu_validate.c @@ -85,6 +85,7 @@ qpu_magic_waddr_matches(const struct v3d_qpu_instr *inst, static void qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst) { + const struct v3d_device_info *devinfo = state->c->devinfo; const struct v3d_qpu_instr *inst = &qinst->qpu; if (inst->type != V3D_QPU_INSTR_TYPE_ALU) @@ -94,7 +95,8 @@ qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst) * r5 one instruction later, which is illegal to have * together. */ - if (state->last && state->last->sig.ldvary && inst->sig.ldunif) { + if (state->last && state->last->sig.ldvary && + (inst->sig.ldunif || inst->sig.ldunifa)) { fail_instr(state, "LDUNIF after a LDVARY"); } @@ -143,7 +145,7 @@ qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst) if (v3d_qpu_uses_mux(inst, V3D_QPU_MUX_R4)) fail_instr(state, "R4 read too soon after SFU"); - if (v3d_qpu_writes_r4(inst)) + if (v3d_qpu_writes_r4(devinfo, inst)) fail_instr(state, "R4 write too soon after SFU"); if (sfu_writes) diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h index 85def2cb02c..4ced588fbbe 100644 --- a/src/broadcom/compiler/v3d_compiler.h +++ b/src/broadcom/compiler/v3d_compiler.h @@ -650,8 +650,8 @@ bool vir_is_add(struct qinst *inst); bool vir_is_mul(struct qinst *inst); bool vir_is_float_input(struct qinst *inst); bool vir_depends_on_flags(struct qinst *inst); -bool vir_writes_r3(struct qinst *inst); -bool vir_writes_r4(struct qinst *inst); +bool vir_writes_r3(const struct v3d_device_info *devinfo, struct qinst *inst); +bool vir_writes_r4(const struct v3d_device_info *devinfo, struct qinst *inst); struct qreg vir_follow_movs(struct v3d_compile *c, struct qreg reg); uint8_t vir_channels_written(struct qinst *inst); diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c index 4e78a477bd7..c129bb047e6 100644 --- a/src/broadcom/compiler/vir.c +++ b/src/broadcom/compiler/vir.c @@ -21,6 +21,7 @@ * IN THE SOFTWARE. */ +#include "broadcom/common/v3d_device_info.h" #include "v3d_compiler.h" int @@ -198,7 +199,7 @@ vir_depends_on_flags(struct qinst *inst) } bool -vir_writes_r3(struct qinst *inst) +vir_writes_r3(const struct v3d_device_info *devinfo, struct qinst *inst) { for (int i = 0; i < vir_get_nsrc(inst); i++) { switch (inst->src[i].file) { @@ -210,11 +211,18 @@ vir_writes_r3(struct qinst *inst) } } + if (devinfo->ver < 41 && (inst->qpu.sig.ldvary || + inst->qpu.sig.ldtlb || + inst->qpu.sig.ldtlbu || + inst->qpu.sig.ldvpm)) { + return true; + } + return false; } bool -vir_writes_r4(struct qinst *inst) +vir_writes_r4(const struct v3d_device_info *devinfo, struct qinst *inst) { switch (inst->dst.file) { case QFILE_MAGIC: @@ -231,7 +239,7 @@ vir_writes_r4(struct qinst *inst) break; } - if (inst->qpu.sig.ldtmu) + if (devinfo->ver < 41 && inst->qpu.sig.ldtmu) return true; return false; diff --git a/src/broadcom/compiler/vir_dump.c b/src/broadcom/compiler/vir_dump.c index ad5c061a138..cdb1928ed00 100644 --- a/src/broadcom/compiler/vir_dump.c +++ b/src/broadcom/compiler/vir_dump.c @@ -21,6 +21,7 @@ * IN THE SOFTWARE. */ +#include "broadcom/common/v3d_device_info.h" #include "v3d_compiler.h" static void @@ -145,6 +146,24 @@ vir_print_reg(struct v3d_compile *c, struct qreg reg) } } +static void +vir_dump_sig_addr(const struct v3d_device_info *devinfo, + const struct v3d_qpu_instr *instr) +{ + if (devinfo->ver < 41) + return; + + if (!instr->sig_magic) + fprintf(stderr, ".rf%d", instr->sig_addr); + else { + const char *name = v3d_qpu_magic_waddr_name(instr->sig_addr); + if (name) + fprintf(stderr, ".%s", name); + else + fprintf(stderr, ".UNKNOWN%d", instr->sig_addr); + } +} + static void vir_dump_sig(struct v3d_compile *c, struct qinst *inst) { @@ -152,14 +171,36 @@ vir_dump_sig(struct v3d_compile *c, struct qinst *inst) if (sig->thrsw) fprintf(stderr, "; thrsw"); - if (sig->ldvary) + if (sig->ldvary) { fprintf(stderr, "; ldvary"); + vir_dump_sig_addr(c->devinfo, &inst->qpu); + } if (sig->ldvpm) fprintf(stderr, "; ldvpm"); - if (sig->ldtmu) + if (sig->ldtmu) { fprintf(stderr, "; ldtmu"); + vir_dump_sig_addr(c->devinfo, &inst->qpu); + } + if (sig->ldtlb) { + fprintf(stderr, "; ldtlb"); + vir_dump_sig_addr(c->devinfo, &inst->qpu); + } + if (sig->ldtlbu) { + fprintf(stderr, "; ldtlbu"); + vir_dump_sig_addr(c->devinfo, &inst->qpu); + } if (sig->ldunif) fprintf(stderr, "; ldunif"); + if (sig->ldunifrf) { + fprintf(stderr, "; ldunifrf"); + vir_dump_sig_addr(c->devinfo, &inst->qpu); + } + if (sig->ldunifa) + fprintf(stderr, "; ldunifa"); + if (sig->ldunifarf) { + fprintf(stderr, "; ldunifarf"); + vir_dump_sig_addr(c->devinfo, &inst->qpu); + } if (sig->wrtmuc) fprintf(stderr, "; wrtmuc"); } diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c index 9ebf2cd69b4..f39f0c2829b 100644 --- a/src/broadcom/compiler/vir_register_allocate.c +++ b/src/broadcom/compiler/vir_register_allocate.c @@ -139,7 +139,7 @@ v3d_register_allocate(struct v3d_compile *c) * result to a temp), nothing else can be stored in r3/r4 across * it. */ - if (vir_writes_r3(inst)) { + if (vir_writes_r3(c->devinfo, inst)) { for (int i = 0; i < c->num_temps; i++) { if (c->temp_start[i] < ip && c->temp_end[i] > ip) { @@ -149,7 +149,7 @@ v3d_register_allocate(struct v3d_compile *c) } } } - if (vir_writes_r4(inst)) { + if (vir_writes_r4(c->devinfo, inst)) { for (int i = 0; i < c->num_temps; i++) { if (c->temp_start[i] < ip && c->temp_end[i] > ip) { diff --git a/src/broadcom/compiler/vir_to_qpu.c b/src/broadcom/compiler/vir_to_qpu.c index eeb7b0bc291..525638df691 100644 --- a/src/broadcom/compiler/vir_to_qpu.c +++ b/src/broadcom/compiler/vir_to_qpu.c @@ -264,7 +264,14 @@ v3d_generate_code_block(struct v3d_compile *c, } if (qinst->qpu.type == V3D_QPU_INSTR_TYPE_ALU) { - if (qinst->qpu.alu.add.op != V3D_QPU_A_NOP) { + if (v3d_qpu_sig_writes_address(c->devinfo, + &qinst->qpu.sig)) { + assert(qinst->qpu.alu.add.op == V3D_QPU_A_NOP); + assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP); + + qinst->qpu.sig_addr = dst.index; + qinst->qpu.sig_magic = dst.magic; + } else if (qinst->qpu.alu.add.op != V3D_QPU_A_NOP) { assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP); if (nsrc >= 1) { set_src(&qinst->qpu, diff --git a/src/broadcom/qpu/qpu_disasm.c b/src/broadcom/qpu/qpu_disasm.c index 5ee834852bd..73b43f8c3d6 100644 --- a/src/broadcom/qpu/qpu_disasm.c +++ b/src/broadcom/qpu/qpu_disasm.c @@ -91,7 +91,8 @@ v3d_qpu_disasm_add(struct disasm_state *disasm, int num_src = v3d_qpu_add_op_num_src(instr->alu.add.op); append(disasm, "%s", v3d_qpu_add_op_name(instr->alu.add.op)); - append(disasm, "%s", v3d_qpu_cond_name(instr->flags.ac)); + if (!v3d_qpu_sig_writes_address(disasm->devinfo, &instr->sig)) + append(disasm, "%s", v3d_qpu_cond_name(instr->flags.ac)); append(disasm, "%s", v3d_qpu_pf_name(instr->flags.apf)); append(disasm, "%s", v3d_qpu_uf_name(instr->flags.auf)); @@ -130,7 +131,8 @@ v3d_qpu_disasm_mul(struct disasm_state *disasm, append(disasm, "; "); append(disasm, "%s", v3d_qpu_mul_op_name(instr->alu.mul.op)); - append(disasm, "%s", v3d_qpu_cond_name(instr->flags.mc)); + if (!v3d_qpu_sig_writes_address(disasm->devinfo, &instr->sig)) + append(disasm, "%s", v3d_qpu_cond_name(instr->flags.mc)); append(disasm, "%s", v3d_qpu_pf_name(instr->flags.mpf)); append(disasm, "%s", v3d_qpu_uf_name(instr->flags.muf)); @@ -161,6 +163,24 @@ v3d_qpu_disasm_mul(struct disasm_state *disasm, } } +static void +v3d_qpu_disasm_sig_addr(struct disasm_state *disasm, + const struct v3d_qpu_instr *instr) +{ + if (disasm->devinfo->ver < 41) + return; + + if (!instr->sig_magic) + append(disasm, ".rf%d", instr->sig_addr); + else { + const char *name = v3d_qpu_magic_waddr_name(instr->sig_addr); + if (name) + append(disasm, ".%s", name); + else + append(disasm, ".UNKNOWN%d", instr->sig_addr); + } +} + static void v3d_qpu_disasm_sig(struct disasm_state *disasm, const struct v3d_qpu_instr *instr) @@ -172,6 +192,9 @@ v3d_qpu_disasm_sig(struct disasm_state *disasm, !sig->ldvpm && !sig->ldtmu && !sig->ldunif && + !sig->ldunifrf && + !sig->ldunifa && + !sig->ldunifarf && !sig->wrtmuc) { return; } @@ -180,14 +203,36 @@ v3d_qpu_disasm_sig(struct disasm_state *disasm, if (sig->thrsw) append(disasm, "; thrsw"); - if (sig->ldvary) + if (sig->ldvary) { append(disasm, "; ldvary"); + v3d_qpu_disasm_sig_addr(disasm, instr); + } if (sig->ldvpm) append(disasm, "; ldvpm"); - if (sig->ldtmu) + if (sig->ldtmu) { append(disasm, "; ldtmu"); + v3d_qpu_disasm_sig_addr(disasm, instr); + } + if (sig->ldtlb) { + append(disasm, "; ldtlb"); + v3d_qpu_disasm_sig_addr(disasm, instr); + } + if (sig->ldtlbu) { + append(disasm, "; ldtlbu"); + v3d_qpu_disasm_sig_addr(disasm, instr); + } if (sig->ldunif) append(disasm, "; ldunif"); + if (sig->ldunifrf) { + append(disasm, "; ldunifrf"); + v3d_qpu_disasm_sig_addr(disasm, instr); + } + if (sig->ldunifa) + append(disasm, "; ldunifa"); + if (sig->ldunifarf) { + append(disasm, "; ldunifarf"); + v3d_qpu_disasm_sig_addr(disasm, instr); + } if (sig->wrtmuc) append(disasm, "; wrtmuc"); } diff --git a/src/broadcom/qpu/qpu_instr.c b/src/broadcom/qpu/qpu_instr.c index 7695e0b9358..c07f3802fd4 100644 --- a/src/broadcom/qpu/qpu_instr.c +++ b/src/broadcom/qpu/qpu_instr.c @@ -23,6 +23,7 @@ #include #include "util/macros.h" +#include "broadcom/common/v3d_device_info.h" #include "qpu_instr.h" #ifndef QPU_MASK @@ -600,7 +601,8 @@ v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr) } bool -v3d_qpu_writes_r3(const struct v3d_qpu_instr *inst) +v3d_qpu_writes_r3(const struct v3d_device_info *devinfo, + const struct v3d_qpu_instr *inst) { if (inst->type == V3D_QPU_INSTR_TYPE_ALU) { if (inst->alu.add.magic_write && @@ -614,11 +616,17 @@ v3d_qpu_writes_r3(const struct v3d_qpu_instr *inst) } } + if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) && + inst->sig_magic && inst->sig_addr == V3D_QPU_WADDR_R3) { + return true; + } + return inst->sig.ldvary || inst->sig.ldvpm; } bool -v3d_qpu_writes_r4(const struct v3d_qpu_instr *inst) +v3d_qpu_writes_r4(const struct v3d_device_info *devinfo, + const struct v3d_qpu_instr *inst) { if (inst->sig.ldtmu) return true; @@ -637,11 +645,17 @@ v3d_qpu_writes_r4(const struct v3d_qpu_instr *inst) } } + if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) && + inst->sig_magic && inst->sig_addr == V3D_QPU_WADDR_R4) { + return true; + } + return false; } bool -v3d_qpu_writes_r5(const struct v3d_qpu_instr *inst) +v3d_qpu_writes_r5(const struct v3d_device_info *devinfo, + const struct v3d_qpu_instr *inst) { if (inst->type == V3D_QPU_INSTR_TYPE_ALU) { if (inst->alu.add.magic_write && @@ -655,7 +669,12 @@ v3d_qpu_writes_r5(const struct v3d_qpu_instr *inst) } } - return inst->sig.ldvary || inst->sig.ldunif; + if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) && + inst->sig_magic && inst->sig_addr == V3D_QPU_WADDR_R5) { + return true; + } + + return inst->sig.ldvary || inst->sig.ldunif || inst->sig.ldunifa; } bool @@ -669,3 +688,18 @@ v3d_qpu_uses_mux(const struct v3d_qpu_instr *inst, enum v3d_qpu_mux mux) (mul_nsrc > 0 && inst->alu.mul.a == mux) || (mul_nsrc > 1 && inst->alu.mul.b == mux)); } + +bool +v3d_qpu_sig_writes_address(const struct v3d_device_info *devinfo, + const struct v3d_qpu_sig *sig) +{ + if (devinfo->ver < 41) + return false; + + return (sig->ldunifrf || + sig->ldunifarf || + sig->ldvary || + sig->ldtmu || + sig->ldtlb || + sig->ldtlbu); +} diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h index a425fae8b25..cab1885acc4 100644 --- a/src/broadcom/qpu/qpu_instr.h +++ b/src/broadcom/qpu/qpu_instr.h @@ -42,6 +42,9 @@ struct v3d_device_info; struct v3d_qpu_sig { bool thrsw:1; bool ldunif:1; + bool ldunifa:1; + bool ldunifrf:1; + bool ldunifarf:1; bool ldtmu:1; bool ldvary:1; bool ldvpm:1; @@ -347,6 +350,8 @@ struct v3d_qpu_instr { enum v3d_qpu_instr_type type; struct v3d_qpu_sig sig; + uint8_t sig_addr; + bool sig_magic; /* If the signal writes to a magic address */ uint8_t raddr_a; uint8_t raddr_b; struct v3d_qpu_flags flags; @@ -403,9 +408,14 @@ bool v3d_qpu_magic_waddr_is_tmu(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST; bool v3d_qpu_magic_waddr_is_tlb(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST; bool v3d_qpu_magic_waddr_is_vpm(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST; bool v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST; -bool v3d_qpu_writes_r3(const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST; -bool v3d_qpu_writes_r4(const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST; -bool v3d_qpu_writes_r5(const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST; +bool v3d_qpu_writes_r3(const struct v3d_device_info *devinfo, + const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST; +bool v3d_qpu_writes_r4(const struct v3d_device_info *devinfo, + const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST; +bool v3d_qpu_writes_r5(const struct v3d_device_info *devinfo, + const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST; bool v3d_qpu_uses_mux(const struct v3d_qpu_instr *inst, enum v3d_qpu_mux mux); +bool v3d_qpu_sig_writes_address(const struct v3d_device_info *devinfo, + const struct v3d_qpu_sig *sig) ATTRIBUTE_CONST; #endif diff --git a/src/broadcom/qpu/qpu_pack.c b/src/broadcom/qpu/qpu_pack.c index 02aa1b86aa4..f9fb016f610 100644 --- a/src/broadcom/qpu/qpu_pack.c +++ b/src/broadcom/qpu/qpu_pack.c @@ -55,11 +55,7 @@ #define VC5_QPU_COND_SHIFT 46 #define VC5_QPU_COND_MASK QPU_MASK(52, 46) - -#define VC5_QPU_COND_IFA 0 -#define VC5_QPU_COND_IFB 1 -#define VC5_QPU_COND_IFNA 2 -#define VC5_QPU_COND_IFNB 3 +#define VC5_QPU_COND_SIG_MAGIC_ADDR (1 << 6) #define VC5_QPU_MM QPU_MASK(45, 45) #define VC5_QPU_MA QPU_MASK(44, 44) @@ -113,6 +109,9 @@ #define THRSW .thrsw = true #define LDUNIF .ldunif = true +#define LDUNIFRF .ldunifrf = true +#define LDUNIFA .ldunifa = true +#define LDUNIFARF .ldunifarf = true #define LDTMU .ldtmu = true #define LDVARY .ldvary = true #define LDVPM .ldvpm = true @@ -156,6 +155,67 @@ static const struct v3d_qpu_sig v33_sig_map[] = { [31] = { SMIMM, }, }; +static const struct v3d_qpu_sig v40_sig_map[] = { + /* MISC R3 R4 R5 */ + [0] = { }, + [1] = { THRSW, }, + [2] = { LDUNIF }, + [3] = { THRSW, LDUNIF }, + [4] = { LDTMU, }, + [5] = { THRSW, LDTMU, }, + [6] = { LDTMU, LDUNIF }, + [7] = { THRSW, LDTMU, LDUNIF }, + [8] = { LDVARY, }, + [9] = { THRSW, LDVARY, }, + [10] = { LDVARY, LDUNIF }, + [11] = { THRSW, LDVARY, LDUNIF }, + /* 12-13 reserved */ + [14] = { SMIMM, LDVARY, }, + [15] = { SMIMM, }, + [16] = { LDTLB, }, + [17] = { LDTLBU, }, + [18] = { WRTMUC }, + [19] = { THRSW, WRTMUC }, + [20] = { LDVARY, WRTMUC }, + [21] = { THRSW, LDVARY, WRTMUC }, + [22] = { UCB, }, + [23] = { ROT, }, + /* 24-30 reserved */ + [31] = { SMIMM, LDTMU, }, +}; + +static const struct v3d_qpu_sig v41_sig_map[] = { + /* MISC phys R5 */ + [0] = { }, + [1] = { THRSW, }, + [2] = { LDUNIF }, + [3] = { THRSW, LDUNIF }, + [4] = { LDTMU, }, + [5] = { THRSW, LDTMU, }, + [6] = { LDTMU, LDUNIF }, + [7] = { THRSW, LDTMU, LDUNIF }, + [8] = { LDVARY, }, + [9] = { THRSW, LDVARY, }, + [10] = { LDVARY, LDUNIF }, + [11] = { THRSW, LDVARY, LDUNIF }, + [12] = { LDUNIFRF }, + [13] = { THRSW, LDUNIFRF }, + [14] = { SMIMM, LDVARY, }, + [15] = { SMIMM, }, + [16] = { LDTLB, }, + [17] = { LDTLBU, }, + [18] = { WRTMUC }, + [19] = { THRSW, WRTMUC }, + [20] = { LDVARY, WRTMUC }, + [21] = { THRSW, LDVARY, WRTMUC }, + [22] = { UCB, }, + [23] = { ROT, }, + /* 24-30 reserved */ + [24] = { LDUNIFA}, + [25] = { LDUNIFARF }, + [31] = { SMIMM, LDTMU, }, +}; + bool v3d_qpu_sig_unpack(const struct v3d_device_info *devinfo, uint32_t packed_sig, @@ -164,7 +224,12 @@ v3d_qpu_sig_unpack(const struct v3d_device_info *devinfo, if (packed_sig >= ARRAY_SIZE(v33_sig_map)) return false; - *sig = v33_sig_map[packed_sig]; + if (devinfo->ver >= 41) + *sig = v41_sig_map[packed_sig]; + else if (devinfo->ver == 40) + *sig = v40_sig_map[packed_sig]; + else + *sig = v33_sig_map[packed_sig]; /* Signals with zeroed unpacked contents after element 0 are reserved. */ return (packed_sig == 0 || @@ -178,7 +243,12 @@ v3d_qpu_sig_pack(const struct v3d_device_info *devinfo, { static const struct v3d_qpu_sig *map; - map = v33_sig_map; + if (devinfo->ver >= 41) + map = v41_sig_map; + else if (devinfo->ver == 40) + map = v40_sig_map; + else + map = v33_sig_map; for (int i = 0; i < ARRAY_SIZE(v33_sig_map); i++) { if (memcmp(&map[i], sig, sizeof(*sig)) == 0) { @@ -1063,10 +1133,21 @@ v3d_qpu_instr_unpack_alu(const struct v3d_device_info *devinfo, &instr->sig)) return false; - if (!v3d_qpu_flags_unpack(devinfo, - QPU_GET_FIELD(packed_instr, VC5_QPU_COND), - &instr->flags)) - return false; + uint32_t packed_cond = QPU_GET_FIELD(packed_instr, VC5_QPU_COND); + if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) { + instr->sig_addr = packed_cond & ~VC5_QPU_COND_SIG_MAGIC_ADDR; + instr->sig_magic = packed_cond & VC5_QPU_COND_SIG_MAGIC_ADDR; + + instr->flags.ac = V3D_QPU_COND_NONE; + instr->flags.mc = V3D_QPU_COND_NONE; + instr->flags.apf = V3D_QPU_PF_NONE; + instr->flags.mpf = V3D_QPU_PF_NONE; + instr->flags.auf = V3D_QPU_UF_NONE; + instr->flags.muf = V3D_QPU_UF_NONE; + } else { + if (!v3d_qpu_flags_unpack(devinfo, packed_cond, &instr->flags)) + return false; + } instr->raddr_a = QPU_GET_FIELD(packed_instr, VC5_QPU_RADDR_A); instr->raddr_b = QPU_GET_FIELD(packed_instr, VC5_QPU_RADDR_B); @@ -1164,9 +1245,28 @@ v3d_qpu_instr_pack_alu(const struct v3d_device_info *devinfo, return false; uint32_t flags; - if (!v3d_qpu_flags_pack(devinfo, &instr->flags, &flags)) - return false; + if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) { + if (instr->flags.ac != V3D_QPU_COND_NONE || + instr->flags.mc != V3D_QPU_COND_NONE || + instr->flags.apf != V3D_QPU_PF_NONE || + instr->flags.mpf != V3D_QPU_PF_NONE || + instr->flags.auf != V3D_QPU_UF_NONE || + instr->flags.muf != V3D_QPU_UF_NONE) { + return false; + } + + flags = instr->sig_addr; + if (instr->sig_magic) + flags |= VC5_QPU_COND_SIG_MAGIC_ADDR; + } else { + if (!v3d_qpu_flags_pack(devinfo, &instr->flags, &flags)) + return false; + } + *packed_instr |= QPU_SET_FIELD(flags, VC5_QPU_COND); + } else { + if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) + return false; } return true; diff --git a/src/broadcom/qpu/tests/qpu_disasm.c b/src/broadcom/qpu/tests/qpu_disasm.c index 59668a86ecc..4f6ded73d48 100644 --- a/src/broadcom/qpu/tests/qpu_disasm.c +++ b/src/broadcom/qpu/tests/qpu_disasm.c @@ -63,6 +63,13 @@ static const struct { { 33, 0x041618d57c453000ull, "shl.andn exp, r3, r2; add.ifb rf35, r1, r2" }, { 33, 0x7048e5da49272800ull, "fsub.ifa rf26, r2.l, rf32; fmul.pushc sin, r1.h, r1.abs; ldunif" }, + /* v4.1 signals */ + { 41, 0x1f010520cf60a000ull, "fcmp.andz rf32, r2.h, r1.h; vfmul rf20, r0.hh, r3; ldunifa" }, + { 41, 0x932045e6c16ea000ull, "fcmp rf38, r2.abs, r5; fmul rf23.l, r3, r3.abs; ldunifarf.rf1" }, + { 41, 0xd72f0434e43ae5c0ull, "fcmp rf52.h, rf23, r5.abs; fmul rf16.h, rf23, r1; ldunifarf.rf60" }, + { 41, 0xdb3048eb9d533780ull, "fmax rf43.l, r3.h, rf30; fmul rf35.h, r4, r2.l; ldunifarf.r1" }, + { 41, 0x733620471e6ce700ull, "faddnf rf7.l, rf28.h, r1.l; fmul r1, r3.h, r3.abs; ldunifarf.rsqrt2" }, + { 41, 0x9c094adef634b000ull, "ffloor.ifb rf30.l, r3; fmul.pushz rf43.l, r5, r1.h" }, }; static void -- 2.30.2