v3d: Add support for CS barrier() intrinsics.
authorEric Anholt <eric@anholt.net>
Mon, 10 Dec 2018 06:03:42 +0000 (22:03 -0800)
committerEric Anholt <eric@anholt.net>
Mon, 14 Jan 2019 23:40:55 +0000 (15:40 -0800)
src/broadcom/compiler/nir_to_vir.c
src/broadcom/compiler/qpu_schedule.c
src/broadcom/compiler/v3d_compiler.h

index 3c24d5dfd9cdfba87caa287d84346cd2faf3aecf..a5e75f650e8927fae92e71fb10231efa65f123e6 100644 (file)
 #define GENERAL_TMU_WRITE_OP_ATOMIC_XOR              (10 << 3)
 #define GENERAL_TMU_WRITE_OP_WRITE                   (15 << 3)
 
+#define V3D_TSY_SET_QUORUM          0
+#define V3D_TSY_INC_WAITERS         1
+#define V3D_TSY_DEC_WAITERS         2
+#define V3D_TSY_INC_QUORUM          3
+#define V3D_TSY_DEC_QUORUM          4
+#define V3D_TSY_FREE_ALL            5
+#define V3D_TSY_RELEASE             6
+#define V3D_TSY_ACQUIRE             7
+#define V3D_TSY_WAIT                8
+#define V3D_TSY_WAIT_INC            9
+#define V3D_TSY_WAIT_CHECK          10
+#define V3D_TSY_WAIT_INC_CHECK      11
+#define V3D_TSY_WAIT_CV             12
+#define V3D_TSY_INC_SEMAPHORE       13
+#define V3D_TSY_DEC_SEMAPHORE       14
+#define V3D_TSY_SET_QUORUM_FREE_ALL 15
+
 static void
 ntq_emit_cf_list(struct v3d_compile *c, struct exec_list *list);
 
@@ -1937,6 +1954,33 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
                  */
                 break;
 
+        case nir_intrinsic_barrier:
+                /* Emit a TSY op to get all invocations in the workgroup
+                 * (actually supergroup) to block until the last invocation
+                 * reaches the TSY op.
+                 */
+                if (c->devinfo->ver >= 42) {
+                        vir_BARRIERID_dest(c, vir_reg(QFILE_MAGIC,
+                                                      V3D_QPU_WADDR_SYNCB));
+                } else {
+                        struct qinst *sync =
+                                vir_BARRIERID_dest(c,
+                                                   vir_reg(QFILE_MAGIC,
+                                                           V3D_QPU_WADDR_SYNCU));
+                        sync->src[vir_get_implicit_uniform_src(sync)] =
+                                vir_uniform_ui(c,
+                                               0xffffff00 |
+                                               V3D_TSY_WAIT_INC_CHECK);
+
+                }
+
+                /* The blocking of a TSY op only happens at the next thread
+                 * switch.  No texturing may be outstanding at the time of a
+                 * TSY blocking operation.
+                 */
+                vir_emit_thrsw(c);
+                break;
+
         case nir_intrinsic_load_num_work_groups:
                 for (int i = 0; i < 3; i++) {
                         ntq_store_dest(c, &instr->dest, i,
@@ -2337,6 +2381,12 @@ nir_to_vir(struct v3d_compile *c)
                 }
                 break;
         case MESA_SHADER_COMPUTE:
+                /* Set up the TSO for barriers, assuming we do some. */
+                if (c->devinfo->ver < 42) {
+                        vir_BARRIERID_dest(c, vir_reg(QFILE_MAGIC,
+                                                      V3D_QPU_WADDR_SYNC));
+                }
+
                 if (c->s->info.system_values_read &
                     ((1ull << SYSTEM_VALUE_LOCAL_INVOCATION_INDEX) |
                      (1ull << SYSTEM_VALUE_WORK_GROUP_ID))) {
index be794a88c14b20f919ee52b22ac3a5b387fd8ac2..0f8001ff52d24889df2e74625a3bf62ebab4fc9d 100644 (file)
@@ -236,6 +236,16 @@ process_waddr_deps(struct schedule_state *state, struct schedule_node *n,
                         add_write_dep(state, &state->last_tlb, n);
                         break;
 
+                case V3D_QPU_WADDR_SYNC:
+                case V3D_QPU_WADDR_SYNCB:
+                case V3D_QPU_WADDR_SYNCU:
+                        /* For CS barrier(): Sync against any other memory
+                         * accesses.  There doesn't appear to be any need for
+                         * barriers to affect ALU operations.
+                         */
+                        add_write_dep(state, &state->last_tmu_write, n);
+                        break;
+
                 case V3D_QPU_WADDR_NOP:
                         break;
 
index 8cf6c5605f264347f4cdc80f1fb0e031f2edb919..de56d7e4f107cac614ad63366a9c48237aed10c2 100644 (file)
@@ -1003,6 +1003,7 @@ VIR_A_ALU0(FYCD)
 VIR_A_ALU0(YCD)
 VIR_A_ALU0(MSF)
 VIR_A_ALU0(REVF)
+VIR_A_ALU0(BARRIERID)
 VIR_A_NODST_1(VPMSETUP)
 VIR_A_NODST_0(VPMWT)
 VIR_A_ALU2(FCMP)