broadcom/vc5: Add support for V3Dv4 signal bits.
authorEric Anholt <eric@anholt.net>
Thu, 4 Jan 2018 05:42:33 +0000 (21:42 -0800)
committerEric Anholt <eric@anholt.net>
Sat, 13 Jan 2018 05:53:45 +0000 (21:53 -0800)
The WRTMUC replaces the implicit uniform loads in the first two texture
instructions.  LDVPM disappears in favor of an ALU op.  LDVARY, LDTMU,
LDTLB, and LDUNIF*RF now write to arbitrary registers, which required
passing the devinfo through to a few more functions.

12 files changed:
src/broadcom/compiler/qpu_schedule.c
src/broadcom/compiler/qpu_validate.c
src/broadcom/compiler/v3d_compiler.h
src/broadcom/compiler/vir.c
src/broadcom/compiler/vir_dump.c
src/broadcom/compiler/vir_register_allocate.c
src/broadcom/compiler/vir_to_qpu.c
src/broadcom/qpu/qpu_disasm.c
src/broadcom/qpu/qpu_instr.c
src/broadcom/qpu/qpu_instr.h
src/broadcom/qpu/qpu_pack.c
src/broadcom/qpu/tests/qpu_disasm.c

index 799da805906daa9f2ed614ca958dfcf0f562b8f6..7fe46202636081de961ae5d0002fd690b565c78a 100644 (file)
@@ -78,6 +78,7 @@ struct schedule_node_child {
 enum direction { F, R };
 
 struct schedule_state {
+        const struct v3d_device_info *devinfo;
         struct schedule_node *last_r[6];
         struct schedule_node *last_rf[64];
         struct schedule_node *last_sf;
@@ -265,6 +266,7 @@ process_uf_deps(struct schedule_state *state, struct schedule_node *n,
 static void
 calculate_deps(struct schedule_state *state, struct schedule_node *n)
 {
+        const struct v3d_device_info *devinfo = state->devinfo;
         struct qinst *qinst = n->inst;
         struct v3d_qpu_instr *inst = &qinst->qpu;
 
@@ -356,12 +358,16 @@ calculate_deps(struct schedule_state *state, struct schedule_node *n)
                 process_waddr_deps(state, n, inst->alu.mul.waddr,
                                    inst->alu.mul.magic_write);
         }
+        if (v3d_qpu_sig_writes_address(devinfo, &inst->sig)) {
+                process_waddr_deps(state, n, inst->sig_addr,
+                                   inst->sig_magic);
+        }
 
-        if (v3d_qpu_writes_r3(inst))
+        if (v3d_qpu_writes_r3(devinfo, inst))
                 add_write_dep(state, &state->last_r[3], n);
-        if (v3d_qpu_writes_r4(inst))
+        if (v3d_qpu_writes_r4(devinfo, inst))
                 add_write_dep(state, &state->last_r[4], n);
-        if (v3d_qpu_writes_r5(inst))
+        if (v3d_qpu_writes_r5(devinfo, inst))
                 add_write_dep(state, &state->last_r[5], n);
 
         if (inst->sig.thrsw) {
@@ -410,6 +416,7 @@ calculate_forward_deps(struct v3d_compile *c, struct list_head *schedule_list)
         struct schedule_state state;
 
         memset(&state, 0, sizeof(state));
+        state.devinfo = c->devinfo;
         state.dir = F;
 
         list_for_each_entry(struct schedule_node, node, schedule_list, link)
@@ -423,6 +430,7 @@ calculate_reverse_deps(struct v3d_compile *c, struct list_head *schedule_list)
         struct schedule_state state;
 
         memset(&state, 0, sizeof(state));
+        state.devinfo = c->devinfo;
         state.dir = R;
 
         for (node = schedule_list->prev; schedule_list != node; node = node->prev) {
@@ -514,7 +522,8 @@ reads_too_soon_after_write(struct choose_scoreboard *scoreboard,
 }
 
 static bool
-writes_too_soon_after_write(struct choose_scoreboard *scoreboard,
+writes_too_soon_after_write(const struct v3d_device_info *devinfo,
+                            struct choose_scoreboard *scoreboard,
                             struct qinst *qinst)
 {
         const struct v3d_qpu_instr *inst = &qinst->qpu;
@@ -524,7 +533,7 @@ writes_too_soon_after_write(struct choose_scoreboard *scoreboard,
          * occur if a dead SFU computation makes it to scheduling.
          */
         if (scoreboard->tick - scoreboard->last_sfu_write_tick < 2 &&
-            v3d_qpu_writes_r4(inst))
+            v3d_qpu_writes_r4(devinfo, inst))
                 return true;
 
         return false;
@@ -605,7 +614,8 @@ qpu_accesses_peripheral(const struct v3d_qpu_instr *inst)
         return (inst->sig.ldvpm ||
                 inst->sig.ldtmu ||
                 inst->sig.ldtlb ||
-                inst->sig.ldtlbu);
+                inst->sig.ldtlbu ||
+                inst->sig.wrtmuc);
 }
 
 static bool
@@ -619,7 +629,11 @@ qpu_merge_inst(const struct v3d_device_info *devinfo,
                 return false;
         }
 
-        /* Can't do more than one peripheral access in an instruction. */
+        /* Can't do more than one peripheral access in an instruction.
+         *
+         * XXX: V3D 4.1 allows TMU read along with a VPM read or write, and
+         * WRTMUC with a TMU magic register write (other than tmuc).
+         */
         if (qpu_accesses_peripheral(a) && qpu_accesses_peripheral(b))
                 return false;
 
@@ -663,6 +677,9 @@ qpu_merge_inst(const struct v3d_device_info *devinfo,
 
         merge.sig.thrsw |= b->sig.thrsw;
         merge.sig.ldunif |= b->sig.ldunif;
+        merge.sig.ldunifrf |= b->sig.ldunifrf;
+        merge.sig.ldunifa |= b->sig.ldunifa;
+        merge.sig.ldunifarf |= b->sig.ldunifarf;
         merge.sig.ldtmu |= b->sig.ldtmu;
         merge.sig.ldvary |= b->sig.ldvary;
         merge.sig.ldvpm |= b->sig.ldvpm;
@@ -673,6 +690,12 @@ qpu_merge_inst(const struct v3d_device_info *devinfo,
         merge.sig.rotate |= b->sig.rotate;
         merge.sig.wrtmuc |= b->sig.wrtmuc;
 
+        if (v3d_qpu_sig_writes_address(devinfo, &a->sig) &&
+            v3d_qpu_sig_writes_address(devinfo, &b->sig))
+                return false;
+        merge.sig_addr |= b->sig_addr;
+        merge.sig_magic |= b->sig_magic;
+
         uint64_t packed;
         bool ok = v3d_qpu_instr_pack(devinfo, &merge, &packed);
 
@@ -719,7 +742,7 @@ choose_instruction_to_schedule(const struct v3d_device_info *devinfo,
                 if (reads_too_soon_after_write(scoreboard, n->inst))
                         continue;
 
-                if (writes_too_soon_after_write(scoreboard, n->inst))
+                if (writes_too_soon_after_write(devinfo, scoreboard, n->inst))
                         continue;
 
                 /* "A scoreboard wait must not occur in the first two
@@ -735,7 +758,7 @@ choose_instruction_to_schedule(const struct v3d_device_info *devinfo,
                  * otherwise get scheduled so ldunif and ldvary try to update
                  * r5 in the same tick.
                  */
-                if (inst->sig.ldunif &&
+                if ((inst->sig.ldunif || inst->sig.ldunifa) &&
                     scoreboard->tick == scoreboard->last_ldvary_tick + 1) {
                         continue;
                 }
index d99d76a8bebf42e2d118c6519d9ef8c1b68d59c6..3b2c10eabc6852dee4a2681a8fe0d01af5f0f356 100644 (file)
@@ -85,6 +85,7 @@ qpu_magic_waddr_matches(const struct v3d_qpu_instr *inst,
 static void
 qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst)
 {
+        const struct v3d_device_info *devinfo = state->c->devinfo;
         const struct v3d_qpu_instr *inst = &qinst->qpu;
 
         if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
@@ -94,7 +95,8 @@ qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst)
          * r5 one instruction later, which is illegal to have
          * together.
          */
-        if (state->last && state->last->sig.ldvary && inst->sig.ldunif) {
+        if (state->last && state->last->sig.ldvary &&
+            (inst->sig.ldunif || inst->sig.ldunifa)) {
                 fail_instr(state, "LDUNIF after a LDVARY");
         }
 
@@ -143,7 +145,7 @@ qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst)
                 if (v3d_qpu_uses_mux(inst, V3D_QPU_MUX_R4))
                         fail_instr(state, "R4 read too soon after SFU");
 
-                if (v3d_qpu_writes_r4(inst))
+                if (v3d_qpu_writes_r4(devinfo, inst))
                         fail_instr(state, "R4 write too soon after SFU");
 
                 if (sfu_writes)
index 85def2cb02c54635de56b2b9ff04842f30a83738..4ced588fbbe80bfb046f7e6beaf5d6e71b8b23ab 100644 (file)
@@ -650,8 +650,8 @@ bool vir_is_add(struct qinst *inst);
 bool vir_is_mul(struct qinst *inst);
 bool vir_is_float_input(struct qinst *inst);
 bool vir_depends_on_flags(struct qinst *inst);
-bool vir_writes_r3(struct qinst *inst);
-bool vir_writes_r4(struct qinst *inst);
+bool vir_writes_r3(const struct v3d_device_info *devinfo, struct qinst *inst);
+bool vir_writes_r4(const struct v3d_device_info *devinfo, struct qinst *inst);
 struct qreg vir_follow_movs(struct v3d_compile *c, struct qreg reg);
 uint8_t vir_channels_written(struct qinst *inst);
 
index 4e78a477bd7d430cc86b24304125056f094dc4b3..c129bb047e679ce354c21397645af1ea45775ca0 100644 (file)
@@ -21,6 +21,7 @@
  * IN THE SOFTWARE.
  */
 
+#include "broadcom/common/v3d_device_info.h"
 #include "v3d_compiler.h"
 
 int
@@ -198,7 +199,7 @@ vir_depends_on_flags(struct qinst *inst)
 }
 
 bool
-vir_writes_r3(struct qinst *inst)
+vir_writes_r3(const struct v3d_device_info *devinfo, struct qinst *inst)
 {
         for (int i = 0; i < vir_get_nsrc(inst); i++) {
                 switch (inst->src[i].file) {
@@ -210,11 +211,18 @@ vir_writes_r3(struct qinst *inst)
                 }
         }
 
+        if (devinfo->ver < 41 && (inst->qpu.sig.ldvary ||
+                                  inst->qpu.sig.ldtlb ||
+                                  inst->qpu.sig.ldtlbu ||
+                                  inst->qpu.sig.ldvpm)) {
+                return true;
+        }
+
         return false;
 }
 
 bool
-vir_writes_r4(struct qinst *inst)
+vir_writes_r4(const struct v3d_device_info *devinfo, struct qinst *inst)
 {
         switch (inst->dst.file) {
         case QFILE_MAGIC:
@@ -231,7 +239,7 @@ vir_writes_r4(struct qinst *inst)
                 break;
         }
 
-        if (inst->qpu.sig.ldtmu)
+        if (devinfo->ver < 41 && inst->qpu.sig.ldtmu)
                 return true;
 
         return false;
index ad5c061a1383b9aa2b444feaa7042476e1de3f40..cdb1928ed006de58ab0150528d324d2d2c6be9cf 100644 (file)
@@ -21,6 +21,7 @@
  * IN THE SOFTWARE.
  */
 
+#include "broadcom/common/v3d_device_info.h"
 #include "v3d_compiler.h"
 
 static void
@@ -145,6 +146,24 @@ vir_print_reg(struct v3d_compile *c, struct qreg reg)
         }
 }
 
+static void
+vir_dump_sig_addr(const struct v3d_device_info *devinfo,
+                  const struct v3d_qpu_instr *instr)
+{
+        if (devinfo->ver < 41)
+                return;
+
+        if (!instr->sig_magic)
+                fprintf(stderr, ".rf%d", instr->sig_addr);
+        else {
+                const char *name = v3d_qpu_magic_waddr_name(instr->sig_addr);
+                if (name)
+                        fprintf(stderr, ".%s", name);
+                else
+                        fprintf(stderr, ".UNKNOWN%d", instr->sig_addr);
+        }
+}
+
 static void
 vir_dump_sig(struct v3d_compile *c, struct qinst *inst)
 {
@@ -152,14 +171,36 @@ vir_dump_sig(struct v3d_compile *c, struct qinst *inst)
 
         if (sig->thrsw)
                 fprintf(stderr, "; thrsw");
-        if (sig->ldvary)
+        if (sig->ldvary) {
                 fprintf(stderr, "; ldvary");
+                vir_dump_sig_addr(c->devinfo, &inst->qpu);
+        }
         if (sig->ldvpm)
                 fprintf(stderr, "; ldvpm");
-        if (sig->ldtmu)
+        if (sig->ldtmu) {
                 fprintf(stderr, "; ldtmu");
+                vir_dump_sig_addr(c->devinfo, &inst->qpu);
+        }
+        if (sig->ldtlb) {
+                fprintf(stderr, "; ldtlb");
+                vir_dump_sig_addr(c->devinfo, &inst->qpu);
+        }
+        if (sig->ldtlbu) {
+                fprintf(stderr, "; ldtlbu");
+                vir_dump_sig_addr(c->devinfo, &inst->qpu);
+        }
         if (sig->ldunif)
                 fprintf(stderr, "; ldunif");
+        if (sig->ldunifrf) {
+                fprintf(stderr, "; ldunifrf");
+                vir_dump_sig_addr(c->devinfo, &inst->qpu);
+        }
+        if (sig->ldunifa)
+                fprintf(stderr, "; ldunifa");
+        if (sig->ldunifarf) {
+                fprintf(stderr, "; ldunifarf");
+                vir_dump_sig_addr(c->devinfo, &inst->qpu);
+        }
         if (sig->wrtmuc)
                 fprintf(stderr, "; wrtmuc");
 }
index 9ebf2cd69b4ee9ac80076caad38cd214805bbb0a..f39f0c2829b9855f84836be857e41072ba4c2977 100644 (file)
@@ -139,7 +139,7 @@ v3d_register_allocate(struct v3d_compile *c)
                  * result to a temp), nothing else can be stored in r3/r4 across
                  * it.
                  */
-                if (vir_writes_r3(inst)) {
+                if (vir_writes_r3(c->devinfo, inst)) {
                         for (int i = 0; i < c->num_temps; i++) {
                                 if (c->temp_start[i] < ip &&
                                     c->temp_end[i] > ip) {
@@ -149,7 +149,7 @@ v3d_register_allocate(struct v3d_compile *c)
                                 }
                         }
                 }
-                if (vir_writes_r4(inst)) {
+                if (vir_writes_r4(c->devinfo, inst)) {
                         for (int i = 0; i < c->num_temps; i++) {
                                 if (c->temp_start[i] < ip &&
                                     c->temp_end[i] > ip) {
index eeb7b0bc291ec8d6173f6839b7d95e9f3eeb5d84..525638df691837bfaa66f93dfe875f32f3d0afeb 100644 (file)
@@ -264,7 +264,14 @@ v3d_generate_code_block(struct v3d_compile *c,
                 }
 
                 if (qinst->qpu.type == V3D_QPU_INSTR_TYPE_ALU) {
-                        if (qinst->qpu.alu.add.op != V3D_QPU_A_NOP) {
+                        if (v3d_qpu_sig_writes_address(c->devinfo,
+                                                       &qinst->qpu.sig)) {
+                                assert(qinst->qpu.alu.add.op == V3D_QPU_A_NOP);
+                                assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
+
+                                qinst->qpu.sig_addr = dst.index;
+                                qinst->qpu.sig_magic = dst.magic;
+                        } else if (qinst->qpu.alu.add.op != V3D_QPU_A_NOP) {
                                 assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
                                 if (nsrc >= 1) {
                                         set_src(&qinst->qpu,
index 5ee834852bd77e90c35ae80f79b23d395e31443e..73b43f8c3d63b5d13a5b8c83b51f9bddb45645e4 100644 (file)
@@ -91,7 +91,8 @@ v3d_qpu_disasm_add(struct disasm_state *disasm,
         int num_src = v3d_qpu_add_op_num_src(instr->alu.add.op);
 
         append(disasm, "%s", v3d_qpu_add_op_name(instr->alu.add.op));
-        append(disasm, "%s", v3d_qpu_cond_name(instr->flags.ac));
+        if (!v3d_qpu_sig_writes_address(disasm->devinfo, &instr->sig))
+                append(disasm, "%s", v3d_qpu_cond_name(instr->flags.ac));
         append(disasm, "%s", v3d_qpu_pf_name(instr->flags.apf));
         append(disasm, "%s", v3d_qpu_uf_name(instr->flags.auf));
 
@@ -130,7 +131,8 @@ v3d_qpu_disasm_mul(struct disasm_state *disasm,
         append(disasm, "; ");
 
         append(disasm, "%s", v3d_qpu_mul_op_name(instr->alu.mul.op));
-        append(disasm, "%s", v3d_qpu_cond_name(instr->flags.mc));
+        if (!v3d_qpu_sig_writes_address(disasm->devinfo, &instr->sig))
+                append(disasm, "%s", v3d_qpu_cond_name(instr->flags.mc));
         append(disasm, "%s", v3d_qpu_pf_name(instr->flags.mpf));
         append(disasm, "%s", v3d_qpu_uf_name(instr->flags.muf));
 
@@ -161,6 +163,24 @@ v3d_qpu_disasm_mul(struct disasm_state *disasm,
         }
 }
 
+static void
+v3d_qpu_disasm_sig_addr(struct disasm_state *disasm,
+                        const struct v3d_qpu_instr *instr)
+{
+        if (disasm->devinfo->ver < 41)
+                return;
+
+        if (!instr->sig_magic)
+                append(disasm, ".rf%d", instr->sig_addr);
+        else {
+                const char *name = v3d_qpu_magic_waddr_name(instr->sig_addr);
+                if (name)
+                        append(disasm, ".%s", name);
+                else
+                        append(disasm, ".UNKNOWN%d", instr->sig_addr);
+        }
+}
+
 static void
 v3d_qpu_disasm_sig(struct disasm_state *disasm,
                    const struct v3d_qpu_instr *instr)
@@ -172,6 +192,9 @@ v3d_qpu_disasm_sig(struct disasm_state *disasm,
             !sig->ldvpm &&
             !sig->ldtmu &&
             !sig->ldunif &&
+            !sig->ldunifrf &&
+            !sig->ldunifa &&
+            !sig->ldunifarf &&
             !sig->wrtmuc) {
                 return;
         }
@@ -180,14 +203,36 @@ v3d_qpu_disasm_sig(struct disasm_state *disasm,
 
         if (sig->thrsw)
                 append(disasm, "; thrsw");
-        if (sig->ldvary)
+        if (sig->ldvary) {
                 append(disasm, "; ldvary");
+                v3d_qpu_disasm_sig_addr(disasm, instr);
+        }
         if (sig->ldvpm)
                 append(disasm, "; ldvpm");
-        if (sig->ldtmu)
+        if (sig->ldtmu) {
                 append(disasm, "; ldtmu");
+                v3d_qpu_disasm_sig_addr(disasm, instr);
+        }
+        if (sig->ldtlb) {
+                append(disasm, "; ldtlb");
+                v3d_qpu_disasm_sig_addr(disasm, instr);
+        }
+        if (sig->ldtlbu) {
+                append(disasm, "; ldtlbu");
+                v3d_qpu_disasm_sig_addr(disasm, instr);
+        }
         if (sig->ldunif)
                 append(disasm, "; ldunif");
+        if (sig->ldunifrf) {
+                append(disasm, "; ldunifrf");
+                v3d_qpu_disasm_sig_addr(disasm, instr);
+        }
+        if (sig->ldunifa)
+                append(disasm, "; ldunifa");
+        if (sig->ldunifarf) {
+                append(disasm, "; ldunifarf");
+                v3d_qpu_disasm_sig_addr(disasm, instr);
+        }
         if (sig->wrtmuc)
                 append(disasm, "; wrtmuc");
 }
index 7695e0b93581ee2258d1b876afb00c0281a87312..c07f3802fd484538a92c057315b27588d89a7768 100644 (file)
@@ -23,6 +23,7 @@
 
 #include <stdlib.h>
 #include "util/macros.h"
+#include "broadcom/common/v3d_device_info.h"
 #include "qpu_instr.h"
 
 #ifndef QPU_MASK
@@ -600,7 +601,8 @@ v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr)
 }
 
 bool
-v3d_qpu_writes_r3(const struct v3d_qpu_instr *inst)
+v3d_qpu_writes_r3(const struct v3d_device_info *devinfo,
+                  const struct v3d_qpu_instr *inst)
 {
         if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
                 if (inst->alu.add.magic_write &&
@@ -614,11 +616,17 @@ v3d_qpu_writes_r3(const struct v3d_qpu_instr *inst)
                 }
         }
 
+        if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) &&
+            inst->sig_magic && inst->sig_addr == V3D_QPU_WADDR_R3) {
+                return true;
+        }
+
         return inst->sig.ldvary || inst->sig.ldvpm;
 }
 
 bool
-v3d_qpu_writes_r4(const struct v3d_qpu_instr *inst)
+v3d_qpu_writes_r4(const struct v3d_device_info *devinfo,
+                  const struct v3d_qpu_instr *inst)
 {
         if (inst->sig.ldtmu)
                 return true;
@@ -637,11 +645,17 @@ v3d_qpu_writes_r4(const struct v3d_qpu_instr *inst)
                 }
         }
 
+        if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) &&
+            inst->sig_magic && inst->sig_addr == V3D_QPU_WADDR_R4) {
+                return true;
+        }
+
         return false;
 }
 
 bool
-v3d_qpu_writes_r5(const struct v3d_qpu_instr *inst)
+v3d_qpu_writes_r5(const struct v3d_device_info *devinfo,
+                  const struct v3d_qpu_instr *inst)
 {
         if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
                 if (inst->alu.add.magic_write &&
@@ -655,7 +669,12 @@ v3d_qpu_writes_r5(const struct v3d_qpu_instr *inst)
                 }
         }
 
-        return inst->sig.ldvary || inst->sig.ldunif;
+        if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) &&
+            inst->sig_magic && inst->sig_addr == V3D_QPU_WADDR_R5) {
+                return true;
+        }
+
+        return inst->sig.ldvary || inst->sig.ldunif || inst->sig.ldunifa;
 }
 
 bool
@@ -669,3 +688,18 @@ v3d_qpu_uses_mux(const struct v3d_qpu_instr *inst, enum v3d_qpu_mux mux)
                 (mul_nsrc > 0 && inst->alu.mul.a == mux) ||
                 (mul_nsrc > 1 && inst->alu.mul.b == mux));
 }
+
+bool
+v3d_qpu_sig_writes_address(const struct v3d_device_info *devinfo,
+                           const struct v3d_qpu_sig *sig)
+{
+        if (devinfo->ver < 41)
+                return false;
+
+        return (sig->ldunifrf ||
+                sig->ldunifarf ||
+                sig->ldvary ||
+                sig->ldtmu ||
+                sig->ldtlb ||
+                sig->ldtlbu);
+}
index a425fae8b250afc1fd3026f43dfceebc5dc183ee..cab1885acc4e8e4e437302bf00a0973050c2d682 100644 (file)
@@ -42,6 +42,9 @@ struct v3d_device_info;
 struct v3d_qpu_sig {
         bool thrsw:1;
         bool ldunif:1;
+        bool ldunifa:1;
+        bool ldunifrf:1;
+        bool ldunifarf:1;
         bool ldtmu:1;
         bool ldvary:1;
         bool ldvpm:1;
@@ -347,6 +350,8 @@ struct v3d_qpu_instr {
         enum v3d_qpu_instr_type type;
 
         struct v3d_qpu_sig sig;
+        uint8_t sig_addr;
+        bool sig_magic; /* If the signal writes to a magic address */
         uint8_t raddr_a;
         uint8_t raddr_b;
         struct v3d_qpu_flags flags;
@@ -403,9 +408,14 @@ bool v3d_qpu_magic_waddr_is_tmu(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
 bool v3d_qpu_magic_waddr_is_tlb(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
 bool v3d_qpu_magic_waddr_is_vpm(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
 bool v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
-bool v3d_qpu_writes_r3(const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST;
-bool v3d_qpu_writes_r4(const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST;
-bool v3d_qpu_writes_r5(const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST;
+bool v3d_qpu_writes_r3(const struct v3d_device_info *devinfo,
+                       const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST;
+bool v3d_qpu_writes_r4(const struct v3d_device_info *devinfo,
+                       const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST;
+bool v3d_qpu_writes_r5(const struct v3d_device_info *devinfo,
+                       const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST;
 bool v3d_qpu_uses_mux(const struct v3d_qpu_instr *inst, enum v3d_qpu_mux mux);
+bool v3d_qpu_sig_writes_address(const struct v3d_device_info *devinfo,
+                                const struct v3d_qpu_sig *sig) ATTRIBUTE_CONST;
 
 #endif
index 02aa1b86aa4c935c2fcd4835cb6a4acc6765ec4f..f9fb016f61002cd09ff1676a4438b654b2e13853 100644 (file)
 
 #define VC5_QPU_COND_SHIFT                  46
 #define VC5_QPU_COND_MASK                   QPU_MASK(52, 46)
-
-#define VC5_QPU_COND_IFA                    0
-#define VC5_QPU_COND_IFB                    1
-#define VC5_QPU_COND_IFNA                   2
-#define VC5_QPU_COND_IFNB                   3
+#define VC5_QPU_COND_SIG_MAGIC_ADDR         (1 << 6)
 
 #define VC5_QPU_MM                          QPU_MASK(45, 45)
 #define VC5_QPU_MA                          QPU_MASK(44, 44)
 
 #define THRSW .thrsw = true
 #define LDUNIF .ldunif = true
+#define LDUNIFRF .ldunifrf = true
+#define LDUNIFA .ldunifa = true
+#define LDUNIFARF .ldunifarf = true
 #define LDTMU .ldtmu = true
 #define LDVARY .ldvary = true
 #define LDVPM .ldvpm = true
@@ -156,6 +155,67 @@ static const struct v3d_qpu_sig v33_sig_map[] = {
         [31] = { SMIMM,                        },
 };
 
+static const struct v3d_qpu_sig v40_sig_map[] = {
+        /*      MISC    R3      R4      R5 */
+        [0]  = {                               },
+        [1]  = { THRSW,                        },
+        [2]  = {                        LDUNIF },
+        [3]  = { THRSW,                 LDUNIF },
+        [4]  = {                LDTMU,         },
+        [5]  = { THRSW,         LDTMU,         },
+        [6]  = {                LDTMU,  LDUNIF },
+        [7]  = { THRSW,         LDTMU,  LDUNIF },
+        [8]  = {        LDVARY,                },
+        [9]  = { THRSW, LDVARY,                },
+        [10] = {        LDVARY,         LDUNIF },
+        [11] = { THRSW, LDVARY,         LDUNIF },
+        /* 12-13 reserved */
+        [14] = { SMIMM, LDVARY,                },
+        [15] = { SMIMM,                        },
+        [16] = {        LDTLB,                 },
+        [17] = {        LDTLBU,                },
+        [18] = {                        WRTMUC },
+        [19] = { THRSW,                 WRTMUC },
+        [20] = {        LDVARY,         WRTMUC },
+        [21] = { THRSW, LDVARY,         WRTMUC },
+        [22] = { UCB,                          },
+        [23] = { ROT,                          },
+        /* 24-30 reserved */
+        [31] = { SMIMM,         LDTMU,         },
+};
+
+static const struct v3d_qpu_sig v41_sig_map[] = {
+        /*      MISC       phys    R5 */
+        [0]  = {                          },
+        [1]  = { THRSW,                   },
+        [2]  = {                   LDUNIF },
+        [3]  = { THRSW,            LDUNIF },
+        [4]  = {           LDTMU,         },
+        [5]  = { THRSW,    LDTMU,         },
+        [6]  = {           LDTMU,  LDUNIF },
+        [7]  = { THRSW,    LDTMU,  LDUNIF },
+        [8]  = {           LDVARY,        },
+        [9]  = { THRSW,    LDVARY,        },
+        [10] = {           LDVARY, LDUNIF },
+        [11] = { THRSW,    LDVARY, LDUNIF },
+        [12] = { LDUNIFRF                 },
+        [13] = { THRSW,    LDUNIFRF       },
+        [14] = { SMIMM,    LDVARY,        },
+        [15] = { SMIMM,                   },
+        [16] = {           LDTLB,         },
+        [17] = {           LDTLBU,        },
+        [18] = {                          WRTMUC },
+        [19] = { THRSW,                   WRTMUC },
+        [20] = {           LDVARY,        WRTMUC },
+        [21] = { THRSW,    LDVARY,        WRTMUC },
+        [22] = { UCB,                     },
+        [23] = { ROT,                     },
+        /* 24-30 reserved */
+        [24] = {                   LDUNIFA},
+        [25] = { LDUNIFARF                },
+        [31] = { SMIMM,            LDTMU, },
+};
+
 bool
 v3d_qpu_sig_unpack(const struct v3d_device_info *devinfo,
                    uint32_t packed_sig,
@@ -164,7 +224,12 @@ v3d_qpu_sig_unpack(const struct v3d_device_info *devinfo,
         if (packed_sig >= ARRAY_SIZE(v33_sig_map))
                 return false;
 
-        *sig = v33_sig_map[packed_sig];
+        if (devinfo->ver >= 41)
+                *sig = v41_sig_map[packed_sig];
+        else if (devinfo->ver == 40)
+                *sig = v40_sig_map[packed_sig];
+        else
+                *sig = v33_sig_map[packed_sig];
 
         /* Signals with zeroed unpacked contents after element 0 are reserved. */
         return (packed_sig == 0 ||
@@ -178,7 +243,12 @@ v3d_qpu_sig_pack(const struct v3d_device_info *devinfo,
 {
         static const struct v3d_qpu_sig *map;
 
-        map = v33_sig_map;
+        if (devinfo->ver >= 41)
+                map = v41_sig_map;
+        else if (devinfo->ver == 40)
+                map = v40_sig_map;
+        else
+                map = v33_sig_map;
 
         for (int i = 0; i < ARRAY_SIZE(v33_sig_map); i++) {
                 if (memcmp(&map[i], sig, sizeof(*sig)) == 0) {
@@ -1063,10 +1133,21 @@ v3d_qpu_instr_unpack_alu(const struct v3d_device_info *devinfo,
                                 &instr->sig))
                 return false;
 
-        if (!v3d_qpu_flags_unpack(devinfo,
-                                  QPU_GET_FIELD(packed_instr, VC5_QPU_COND),
-                                  &instr->flags))
-                return false;
+        uint32_t packed_cond = QPU_GET_FIELD(packed_instr, VC5_QPU_COND);
+        if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) {
+                instr->sig_addr = packed_cond & ~VC5_QPU_COND_SIG_MAGIC_ADDR;
+                instr->sig_magic = packed_cond & VC5_QPU_COND_SIG_MAGIC_ADDR;
+
+                instr->flags.ac = V3D_QPU_COND_NONE;
+                instr->flags.mc = V3D_QPU_COND_NONE;
+                instr->flags.apf = V3D_QPU_PF_NONE;
+                instr->flags.mpf = V3D_QPU_PF_NONE;
+                instr->flags.auf = V3D_QPU_UF_NONE;
+                instr->flags.muf = V3D_QPU_UF_NONE;
+        } else {
+                if (!v3d_qpu_flags_unpack(devinfo, packed_cond, &instr->flags))
+                        return false;
+        }
 
         instr->raddr_a = QPU_GET_FIELD(packed_instr, VC5_QPU_RADDR_A);
         instr->raddr_b = QPU_GET_FIELD(packed_instr, VC5_QPU_RADDR_B);
@@ -1164,9 +1245,28 @@ v3d_qpu_instr_pack_alu(const struct v3d_device_info *devinfo,
                         return false;
 
                 uint32_t flags;
-                if (!v3d_qpu_flags_pack(devinfo, &instr->flags, &flags))
-                        return false;
+                if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) {
+                        if (instr->flags.ac != V3D_QPU_COND_NONE ||
+                            instr->flags.mc != V3D_QPU_COND_NONE ||
+                            instr->flags.apf != V3D_QPU_PF_NONE ||
+                            instr->flags.mpf != V3D_QPU_PF_NONE ||
+                            instr->flags.auf != V3D_QPU_UF_NONE ||
+                            instr->flags.muf != V3D_QPU_UF_NONE) {
+                                return false;
+                        }
+
+                        flags = instr->sig_addr;
+                        if (instr->sig_magic)
+                                flags |= VC5_QPU_COND_SIG_MAGIC_ADDR;
+                } else {
+                        if (!v3d_qpu_flags_pack(devinfo, &instr->flags, &flags))
+                                return false;
+                }
+
                 *packed_instr |= QPU_SET_FIELD(flags, VC5_QPU_COND);
+        } else {
+                if (v3d_qpu_sig_writes_address(devinfo, &instr->sig))
+                        return false;
         }
 
         return true;
index 59668a86ecc7cb51f439644b219f3d79e26a1d41..4f6ded73d4889942f3a4c6cad5bce6860ec3dc1d 100644 (file)
@@ -63,6 +63,13 @@ static const struct {
         { 33, 0x041618d57c453000ull, "shl.andn  exp, r3, r2; add.ifb  rf35, r1, r2" },
         { 33, 0x7048e5da49272800ull, "fsub.ifa  rf26, r2.l, rf32; fmul.pushc  sin, r1.h, r1.abs; ldunif" },
 
+        /* v4.1 signals */
+        { 41, 0x1f010520cf60a000ull, "fcmp.andz  rf32, r2.h, r1.h; vfmul  rf20, r0.hh, r3; ldunifa" },
+        { 41, 0x932045e6c16ea000ull, "fcmp  rf38, r2.abs, r5; fmul  rf23.l, r3, r3.abs; ldunifarf.rf1" },
+        { 41, 0xd72f0434e43ae5c0ull, "fcmp  rf52.h, rf23, r5.abs; fmul  rf16.h, rf23, r1; ldunifarf.rf60" },
+        { 41, 0xdb3048eb9d533780ull, "fmax  rf43.l, r3.h, rf30; fmul  rf35.h, r4, r2.l; ldunifarf.r1" },
+        { 41, 0x733620471e6ce700ull, "faddnf  rf7.l, rf28.h, r1.l; fmul  r1, r3.h, r3.abs; ldunifarf.rsqrt2" },
+        { 41, 0x9c094adef634b000ull, "ffloor.ifb  rf30.l, r3; fmul.pushz  rf43.l, r5, r1.h" },
 };
 
 static void