vc4: Add a thread switch QIR instruction.
authorEric Anholt <eric@anholt.net>
Thu, 10 Nov 2016 23:23:19 +0000 (15:23 -0800)
committerEric Anholt <eric@anholt.net>
Sun, 13 Nov 2016 02:46:35 +0000 (18:46 -0800)
This will eventually be generated at the QIR level, so that
vc4_qir_schedule.c can arrange the separation of tex_strb from tex_result
correctly.  It will also be important so that register allocation set the
register classes appropriately for values that are live across the switch.

src/gallium/drivers/vc4/vc4_qir.c
src/gallium/drivers/vc4/vc4_qir.h
src/gallium/drivers/vc4/vc4_qpu_emit.c

index a34eccf5091d8ccf7963b09dffe8206d022b7fc6..f9c9703e3f9b41393706b5a2286264a1464e3880 100644 (file)
@@ -82,6 +82,8 @@ static const struct qir_op_info qir_op_info[] = {
         [QOP_TEX_DIRECT] = { "tex_direct", 0, 2, true },
         [QOP_TEX_RESULT] = { "tex_result", 1, 0, true },
 
+        [QOP_THRSW] = { "thrsw", 0, 0, true },
+
         [QOP_LOAD_IMM] = { "load_imm", 0, 1 },
         [QOP_LOAD_IMM_U2] = { "load_imm_u2", 0, 1 },
         [QOP_LOAD_IMM_I2] = { "load_imm_i2", 0, 1 },
index c76aeb2bf4eb883c67426cc55b4c334d310fbc66..7f321c46868f45cb384d42bedcdf9c9eb7a50ee0 100644 (file)
@@ -156,6 +156,16 @@ enum qop {
          */
         QOP_TEX_RESULT,
 
+        /**
+         * Insert the signal for switching threads in a threaded fragment
+         * shader.  No value can be live in an accumulator across a thrsw.
+         *
+         * At the QPU level, this will have several delay slots before the
+         * switch happens.  Those slots are the responsibility of the
+         * scheduler.
+         */
+        QOP_THRSW,
+
         /* 32-bit immediate loaded to each SIMD channel */
         QOP_LOAD_IMM,
 
index eedee55a9f58e06c545c6a7a1410d0bcc2630bb6..58fcbc93d97c6bea0031c6942dd7570b9ab3020c 100644 (file)
@@ -500,6 +500,12 @@ vc4_generate_code_block(struct vc4_compile *c,
                         handle_r4_qpu_write(block, qinst, dst);
                         break;
 
+                case QOP_THRSW:
+                        queue(block, qpu_NOP());
+                        *last_inst(block) = qpu_set_sig(*last_inst(block),
+                                                        QPU_SIG_THREAD_SWITCH);
+                        break;
+
                 case QOP_BRANCH:
                         /* The branch target will be updated at QPU scheduling
                          * time.