v3d: Add support for the TMUWT instruction.
authorEric Anholt <eric@anholt.net>
Thu, 14 Dec 2017 17:28:42 +0000 (09:28 -0800)
committerEric Anholt <eric@anholt.net>
Tue, 31 Jul 2018 23:05:04 +0000 (16:05 -0700)
This instruction is used to ensure that TMU stores have been processed
before moving on.  In particular, you need any TMU ops to be done by the
time the shader ends.

src/broadcom/compiler/qpu_schedule.c
src/broadcom/compiler/v3d_compiler.h
src/broadcom/compiler/vir.c
src/broadcom/qpu/qpu_instr.c
src/broadcom/qpu/qpu_instr.h

index 441b6327825da8bea372bf8bc5dd63072e03edda..fb5ecd6410c930d56da52e693381d3e9b44e16c9 100644 (file)
@@ -402,7 +402,7 @@ calculate_deps(struct schedule_state *state, struct schedule_node *n)
                 add_write_dep(state, &state->last_tmu_config, n);
         }
 
-        if (inst->sig.ldtmu) {
+        if (v3d_qpu_waits_on_tmu(inst)) {
                 /* TMU loads are coming from a FIFO, so ordering is important.
                  */
                 add_write_dep(state, &state->last_tmu_write, n);
@@ -564,7 +564,7 @@ get_instruction_priority(const struct v3d_qpu_instr *inst)
         next_score++;
 
         /* Schedule texture read results collection late to hide latency. */
-        if (inst->sig.ldtmu)
+        if (v3d_qpu_waits_on_tmu(inst))
                 return next_score;
         next_score++;
 
@@ -605,6 +605,9 @@ qpu_accesses_peripheral(const struct v3d_qpu_instr *inst)
                         return true;
                 }
 
+                if (inst->alu.add.op == V3D_QPU_A_TMUWT)
+                        return true;
+
                 if (inst->alu.mul.op != V3D_QPU_M_NOP &&
                     inst->alu.mul.magic_write &&
                     qpu_magic_waddr_is_periph(inst->alu.mul.waddr)) {
@@ -910,7 +913,7 @@ static uint32_t magic_waddr_latency(enum v3d_qpu_waddr waddr,
          *
          * because we associate the first load_tmu0 with the *second* tmu0_s.
          */
-        if (v3d_qpu_magic_waddr_is_tmu(waddr) && after->sig.ldtmu)
+        if (v3d_qpu_magic_waddr_is_tmu(waddr) && v3d_qpu_waits_on_tmu(after))
                 return 100;
 
         /* Assume that anything depending on us is consuming the SFU result. */
index a02b5a6404a74bc6486ecf3dfce83bfb4bde3525..33a9942734d0e80c4d459f08d7189e7dc0080b4e 100644 (file)
@@ -940,6 +940,7 @@ VIR_A_ALU0(TIDX)
 VIR_A_ALU0(EIDX)
 VIR_A_ALU1(LDVPMV_IN)
 VIR_A_ALU1(LDVPMV_OUT)
+VIR_A_ALU0(TMUWT)
 
 VIR_A_ALU0(FXCD)
 VIR_A_ALU0(XCD)
index b5539b6ef50b58d02de4dc7b33393e054aba0a7c..86379faa5bb76073a73af40bc58176a88bd28dd7 100644 (file)
@@ -98,6 +98,7 @@ vir_has_side_effects(struct v3d_compile *c, struct qinst *inst)
                 case V3D_QPU_A_STVPMD:
                 case V3D_QPU_A_STVPMP:
                 case V3D_QPU_A_VPMWT:
+                case V3D_QPU_A_TMUWT:
                         return true;
                 default:
                         break;
@@ -194,6 +195,11 @@ vir_is_tex(struct qinst *inst)
         if (inst->dst.file == QFILE_MAGIC)
                 return v3d_qpu_magic_waddr_is_tmu(inst->dst.index);
 
+        if (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU &&
+            inst->qpu.alu.add.op == V3D_QPU_A_TMUWT) {
+                return true;
+        }
+
         return false;
 }
 
index a7fb4186e1a9997bef6252325951c34656dc7e70..0846cc861741634221e80157e9ce6e7931b66b54 100644 (file)
@@ -525,6 +525,14 @@ v3d_qpu_magic_waddr_is_tmu(enum v3d_qpu_waddr waddr)
                  waddr <= V3D_QPU_WADDR_TMUHSLOD));
 }
 
+bool
+v3d_qpu_waits_on_tmu(const struct v3d_qpu_instr *inst)
+{
+        return (inst->sig.ldtmu ||
+                (inst->type == V3D_QPU_INSTR_TYPE_ALU &&
+                 inst->alu.add.op == V3D_QPU_A_TMUWT));
+}
+
 bool
 v3d_qpu_magic_waddr_is_tlb(enum v3d_qpu_waddr waddr)
 {
index c37abac3cf8bd8749714d2c13ff3e1a44fe4ee24..c2b4ebd199595bb0095cf7737f604b54e7bc9a54 100644 (file)
@@ -452,6 +452,7 @@ bool v3d_qpu_writes_r4(const struct v3d_device_info *devinfo,
                        const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST;
 bool v3d_qpu_writes_r5(const struct v3d_device_info *devinfo,
                        const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST;
+bool v3d_qpu_waits_on_tmu(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
 bool v3d_qpu_uses_mux(const struct v3d_qpu_instr *inst, enum v3d_qpu_mux mux);
 bool v3d_qpu_uses_vpm(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
 bool v3d_qpu_reads_vpm(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;