broadcom/vc5: Use THRSW to enable multi-threaded shaders.
[mesa.git] / src / broadcom / compiler / qpu_validate.c
index 3b2c10eabc6852dee4a2681a8fe0d01af5f0f356..4ef587c1d52bdab30ce7afceeb09de1c01e36592 100644 (file)
@@ -39,6 +39,10 @@ struct v3d_qpu_validate_state {
         const struct v3d_qpu_instr *last;
         int ip;
         int last_sfu_write;
+        int last_branch_ip;
+        int last_thrsw_ip;
+        bool last_thrsw_found;
+        int thrsw_count;
 };
 
 static void
@@ -62,6 +66,18 @@ fail_instr(struct v3d_qpu_validate_state *state, const char *msg)
         abort();
 }
 
+static bool
+in_branch_delay_slots(struct v3d_qpu_validate_state *state)
+{
+        return (state->ip - state->last_branch_ip) < 3;
+}
+
+static bool
+in_thrsw_delay_slots(struct v3d_qpu_validate_state *state)
+{
+        return (state->ip - state->last_thrsw_ip) < 3;
+}
+
 static bool
 qpu_magic_waddr_matches(const struct v3d_qpu_instr *inst,
                         bool (*predicate)(enum v3d_qpu_waddr waddr))
@@ -136,6 +152,19 @@ qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst)
                 }
         }
 
+        if (in_thrsw_delay_slots(state)) {
+                /* There's no way you want to start SFU during the THRSW delay
+                 * slots, since the result would land in the other thread.
+                 */
+                if (sfu_writes) {
+                        fail_instr(state,
+                                   "SFU write started during THRSW delay slots ");
+                }
+
+                if (inst->sig.ldvary)
+                        fail_instr(state, "LDVARY during THRSW delay slots");
+        }
+
         (void)qpu_magic_waddr_matches; /* XXX */
 
         /* SFU r4 results come back two instructions later.  No doing
@@ -170,6 +199,35 @@ qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst)
 
         if (sfu_writes)
                 state->last_sfu_write = state->ip;
+
+        if (inst->sig.thrsw) {
+                if (in_branch_delay_slots(state))
+                        fail_instr(state, "THRSW in a branch delay slot.");
+
+                if (state->last_thrsw_ip == state->ip - 1) {
+                        /* If it's the second THRSW in a row, then it's just a
+                         * last-thrsw signal.
+                         */
+                        if (state->last_thrsw_found)
+                                fail_instr(state, "Two last-THRSW signals");
+                        state->last_thrsw_found = true;
+                } else {
+                        if (in_thrsw_delay_slots(state)) {
+                                fail_instr(state,
+                                           "THRSW too close to another THRSW.");
+                        }
+                        state->thrsw_count++;
+                        state->last_thrsw_ip = state->ip;
+                }
+        }
+
+        if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH) {
+                if (in_branch_delay_slots(state))
+                        fail_instr(state, "branch in a branch delay slot.");
+                if (in_thrsw_delay_slots(state))
+                        fail_instr(state, "branch in a THRSW delay slot.");
+                state->last_branch_ip = state->ip;
+        }
 }
 
 static void
@@ -201,10 +259,22 @@ qpu_validate(struct v3d_compile *c)
         struct v3d_qpu_validate_state state = {
                 .c = c,
                 .last_sfu_write = -10,
+                .last_thrsw_ip = -10,
+                .last_branch_ip = -10,
                 .ip = 0,
         };
 
         vir_for_each_block(block, c) {
                 qpu_validate_block(&state, block);
         }
+
+        if (state.thrsw_count > 1 && !state.last_thrsw_found) {
+                fail_instr(&state,
+                           "thread switch found without last-THRSW in program");
+        }
+
+        if (state.thrsw_count == 0 ||
+            (state.last_thrsw_found && state.thrsw_count == 1)) {
+                fail_instr(&state, "No program-end THRSW found");
+        }
 }