v3d: handle tlb read dependency tracking as if they were writes
[mesa.git] / src / broadcom / compiler / qpu_schedule.c
index d275940c136024859150232a37e39c95472bafc2..b8e04f6ea1303c07d8c7dd723c86ebc60bbc6590 100644 (file)
@@ -128,6 +128,9 @@ add_write_dep(struct schedule_state *state,
 static bool
 qpu_inst_is_tlb(const struct v3d_qpu_instr *inst)
 {
+        if (inst->sig.ldtlb || inst->sig.ldtlbu)
+                return true;
+
         if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
                 return false;
 
@@ -153,7 +156,10 @@ process_mux_deps(struct schedule_state *state, struct schedule_node *n,
                 add_read_dep(state, state->last_rf[n->inst->qpu.raddr_a], n);
                 break;
         case V3D_QPU_MUX_B:
-                add_read_dep(state, state->last_rf[n->inst->qpu.raddr_b], n);
+                if (!n->inst->qpu.sig.small_imm) {
+                        add_read_dep(state,
+                                     state->last_rf[n->inst->qpu.raddr_b], n);
+                }
                 break;
         default:
                 add_read_dep(state, state->last_r[mux - V3D_QPU_MUX_R0], n);
@@ -376,7 +382,7 @@ calculate_deps(struct schedule_state *state, struct schedule_node *n)
                 add_write_dep(state, &state->last_tmu_config, n);
 
         if (inst->sig.ldtlb | inst->sig.ldtlbu)
-                add_read_dep(state, state->last_tlb, n);
+                add_write_dep(state, &state->last_tlb, n);
 
         if (inst->sig.ldvpm) {
                 add_write_dep(state, &state->last_vpm_read, n);
@@ -601,6 +607,37 @@ qpu_accesses_peripheral(const struct v3d_qpu_instr *inst)
                 inst->sig.wrtmuc);
 }
 
+static bool
+qpu_compatible_peripheral_access(const struct v3d_device_info *devinfo,
+                                 const struct v3d_qpu_instr *a,
+                                 const struct v3d_qpu_instr *b)
+{
+        const bool a_uses_peripheral = qpu_accesses_peripheral(a);
+        const bool b_uses_peripheral = qpu_accesses_peripheral(b);
+
+        /* We can always do one peripheral access per instruction. */
+        if (!a_uses_peripheral || !b_uses_peripheral)
+                return true;
+
+        if (devinfo->ver < 41)
+                return false;
+
+        /* V3D 4.1 and later allow TMU read along with a VPM read or write, and
+         * WRTMUC with a TMU magic register write (other than tmuc).
+         */
+        if ((a->sig.ldtmu && v3d_qpu_uses_vpm(b)) ||
+            (b->sig.ldtmu && v3d_qpu_uses_vpm(a))) {
+                return true;
+        }
+
+        if ((a->sig.wrtmuc && v3d_qpu_writes_tmu_not_tmuc(b)) ||
+            (b->sig.wrtmuc && v3d_qpu_writes_tmu_not_tmuc(a))) {
+                return true;
+        }
+
+        return false;
+}
+
 static bool
 qpu_merge_inst(const struct v3d_device_info *devinfo,
                struct v3d_qpu_instr *result,
@@ -612,12 +649,7 @@ qpu_merge_inst(const struct v3d_device_info *devinfo,
                 return false;
         }
 
-        /* Can't do more than one peripheral access in an instruction.
-         *
-         * XXX: V3D 4.1 allows TMU read along with a VPM read or write, and
-         * WRTMUC with a TMU magic register write (other than tmuc).
-         */
-        if (qpu_accesses_peripheral(a) && qpu_accesses_peripheral(b))
+        if (!qpu_compatible_peripheral_access(devinfo, a, b))
                 return false;
 
         struct v3d_qpu_instr merge = *a;