broadcom/vc5: Fix scheduling for a non-SFU R4 write after a dead R4 write.
authorEric Anholt <eric@anholt.net>
Tue, 7 Nov 2017 17:51:56 +0000 (09:51 -0800)
committerEric Anholt <eric@anholt.net>
Tue, 7 Nov 2017 20:57:49 +0000 (12:57 -0800)
The v3d_qpu_writes_r*() were only checking for fixed-function accumulator
writes, not normal ALU writes to those regs.

Fixes fs-discard-exit-2 on simulation (but not HW).

src/broadcom/compiler/qpu_schedule.c
src/broadcom/qpu/qpu_instr.c

index dd221e027ec203fea67219f22519f8c7fae165d5..799da805906daa9f2ed614ca958dfcf0f562b8f6 100644 (file)
@@ -201,13 +201,15 @@ process_waddr_deps(struct schedule_state *state, struct schedule_node *n,
                 case V3D_QPU_WADDR_R0:
                 case V3D_QPU_WADDR_R1:
                 case V3D_QPU_WADDR_R2:
-                case V3D_QPU_WADDR_R3:
-                case V3D_QPU_WADDR_R4:
-                case V3D_QPU_WADDR_R5:
                         add_write_dep(state,
                                       &state->last_r[waddr - V3D_QPU_WADDR_R0],
                                       n);
                         break;
+                case V3D_QPU_WADDR_R3:
+                case V3D_QPU_WADDR_R4:
+                case V3D_QPU_WADDR_R5:
+                        /* Handled by v3d_qpu_writes_r*() checks below. */
+                        break;
 
                 case V3D_QPU_WADDR_VPM:
                 case V3D_QPU_WADDR_VPMU:
index 7499170de3d976245de1e2a16b844d77550bf4e1..7695e0b93581ee2258d1b876afb00c0281a87312 100644 (file)
@@ -602,6 +602,18 @@ v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr)
 bool
 v3d_qpu_writes_r3(const struct v3d_qpu_instr *inst)
 {
+        if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
+                if (inst->alu.add.magic_write &&
+                    inst->alu.add.waddr == V3D_QPU_WADDR_R3) {
+                        return true;
+                }
+
+                if (inst->alu.mul.magic_write &&
+                    inst->alu.mul.waddr == V3D_QPU_WADDR_R3) {
+                        return true;
+                }
+        }
+
         return inst->sig.ldvary || inst->sig.ldvpm;
 }
 
@@ -613,12 +625,14 @@ v3d_qpu_writes_r4(const struct v3d_qpu_instr *inst)
 
         if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
                 if (inst->alu.add.magic_write &&
-                    v3d_qpu_magic_waddr_is_sfu(inst->alu.add.waddr)) {
+                    (inst->alu.add.waddr == V3D_QPU_WADDR_R4 ||
+                     v3d_qpu_magic_waddr_is_sfu(inst->alu.add.waddr))) {
                         return true;
                 }
 
                 if (inst->alu.mul.magic_write &&
-                    v3d_qpu_magic_waddr_is_sfu(inst->alu.mul.waddr)) {
+                    (inst->alu.mul.waddr == V3D_QPU_WADDR_R4 ||
+                     v3d_qpu_magic_waddr_is_sfu(inst->alu.mul.waddr))) {
                         return true;
                 }
         }
@@ -629,6 +643,18 @@ v3d_qpu_writes_r4(const struct v3d_qpu_instr *inst)
 bool
 v3d_qpu_writes_r5(const struct v3d_qpu_instr *inst)
 {
+        if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
+                if (inst->alu.add.magic_write &&
+                    inst->alu.add.waddr == V3D_QPU_WADDR_R5) {
+                        return true;
+                }
+
+                if (inst->alu.mul.magic_write &&
+                    inst->alu.mul.waddr == V3D_QPU_WADDR_R5) {
+                        return true;
+                }
+        }
+
         return inst->sig.ldvary || inst->sig.ldunif;
 }