vc4: Add support for QPU scheduling of thread switch instructions.
authorEric Anholt <eric@anholt.net>
Fri, 11 Nov 2016 02:02:37 +0000 (18:02 -0800)
committerEric Anholt <eric@anholt.net>
Sun, 13 Nov 2016 02:46:35 +0000 (18:46 -0800)
This is vaguely based off of Jonas Pfeil's thread switch support branch.

src/gallium/drivers/vc4/vc4_qpu_schedule.c

index 680191542b822e0b49f04041f2fb70b0f7d3dafa..45360f73410c01c81f47389912923dc9fa2954e6 100644 (file)
@@ -385,12 +385,27 @@ calculate_deps(struct schedule_state *state, struct schedule_node *n)
         switch (sig) {
         case QPU_SIG_SW_BREAKPOINT:
         case QPU_SIG_NONE:
-        case QPU_SIG_THREAD_SWITCH:
-        case QPU_SIG_LAST_THREAD_SWITCH:
         case QPU_SIG_SMALL_IMM:
         case QPU_SIG_LOAD_IMM:
                 break;
 
+        case QPU_SIG_THREAD_SWITCH:
+        case QPU_SIG_LAST_THREAD_SWITCH:
+                /* All accumulator contents and flags are undefined after the
+                 * switch.
+                 */
+                for (int i = 0; i < ARRAY_SIZE(state->last_r); i++)
+                        add_write_dep(state, &state->last_r[i], n);
+                add_write_dep(state, &state->last_sf, n);
+
+                /* Scoreboard-locking operations have to stay after the last
+                 * thread switch.
+                 */
+                add_write_dep(state, &state->last_tlb, n);
+
+                add_write_dep(state, &state->last_tmu_write, n);
+                break;
+
         case QPU_SIG_LOAD_TMU0:
         case QPU_SIG_LOAD_TMU1:
                 /* TMU loads are coming from a FIFO, so ordering is important.
@@ -902,6 +917,16 @@ schedule_instructions(struct vc4_compile *c,
                         qpu_serialize_one_inst(c, inst);
                         qpu_serialize_one_inst(c, inst);
                         qpu_serialize_one_inst(c, inst);
+                } else if (QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_THREAD_SWITCH ||
+                           QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_LAST_THREAD_SWITCH) {
+                        /* The thread switch occurs after two delay slots.  We
+                         * should fit things in these slots, but we don't
+                         * currently.
+                         */
+                        inst = qpu_NOP();
+                        update_scoreboard_for_chosen(scoreboard, inst);
+                        qpu_serialize_one_inst(c, inst);
+                        qpu_serialize_one_inst(c, inst);
                 }
         }