v3d: Add support for CS workgroup/invocation id intrinsics.
authorEric Anholt <eric@anholt.net>
Fri, 7 Dec 2018 23:47:18 +0000 (15:47 -0800)
committerEric Anholt <eric@anholt.net>
Mon, 14 Jan 2019 23:40:55 +0000 (15:40 -0800)
We get a payload for the ivec3 workgroup and an int local invocation
index, and we use the core lowering to turn into the global invocation id
and the local invocation id ivec3s.

src/broadcom/compiler/nir_to_vir.c
src/broadcom/compiler/v3d_compiler.h
src/broadcom/compiler/vir.c
src/broadcom/compiler/vir_dump.c
src/broadcom/compiler/vir_register_allocate.c

index f10ed5975c1683688271f3976cfe03588a051483..c1889a7d645bab34650c811ffc2fa4a0f11db3f2 100644 (file)
@@ -1899,6 +1899,32 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
                  */
                 break;
 
+        case nir_intrinsic_load_num_work_groups:
+                for (int i = 0; i < 3; i++) {
+                        ntq_store_dest(c, &instr->dest, i,
+                                       vir_uniform(c, QUNIFORM_NUM_WORK_GROUPS,
+                                                   i));
+                }
+                break;
+
+        case nir_intrinsic_load_local_invocation_index:
+                ntq_store_dest(c, &instr->dest, 0,
+                               vir_SHR(c, c->cs_payload[1],
+                                       vir_uniform_ui(c, 32 - c->local_invocation_index_bits)));
+                break;
+
+        case nir_intrinsic_load_work_group_id:
+                ntq_store_dest(c, &instr->dest, 0,
+                               vir_AND(c, c->cs_payload[0],
+                                       vir_uniform_ui(c, 0xffff)));
+                ntq_store_dest(c, &instr->dest, 1,
+                               vir_SHR(c, c->cs_payload[0],
+                                       vir_uniform_ui(c, 16)));
+                ntq_store_dest(c, &instr->dest, 2,
+                               vir_AND(c, c->cs_payload[1],
+                                       vir_uniform_ui(c, 0xffff)));
+                break;
+
         default:
                 fprintf(stderr, "Unknown intrinsic: ");
                 nir_print_instr(&instr->instr, stderr);
@@ -2255,7 +2281,8 @@ ntq_emit_impl(struct v3d_compile *c, nir_function_impl *impl)
 static void
 nir_to_vir(struct v3d_compile *c)
 {
-        if (c->s->info.stage == MESA_SHADER_FRAGMENT) {
+        switch (c->s->info.stage) {
+        case MESA_SHADER_FRAGMENT:
                 c->payload_w = vir_MOV(c, vir_reg(QFILE_REG, 0));
                 c->payload_w_centroid = vir_MOV(c, vir_reg(QFILE_REG, 1));
                 c->payload_z = vir_MOV(c, vir_reg(QFILE_REG, 2));
@@ -2270,6 +2297,30 @@ nir_to_vir(struct v3d_compile *c)
                 } else if (c->fs_key->is_lines) {
                         c->line_x = emit_fragment_varying(c, NULL, 0, 0);
                 }
+                break;
+        case MESA_SHADER_COMPUTE:
+                if (c->s->info.system_values_read &
+                    ((1ull << SYSTEM_VALUE_LOCAL_INVOCATION_INDEX) |
+                     (1ull << SYSTEM_VALUE_WORK_GROUP_ID))) {
+                        c->cs_payload[0] = vir_MOV(c, vir_reg(QFILE_REG, 0));
+                }
+                if (c->s->info.system_values_read &
+                    ((1ull << SYSTEM_VALUE_WORK_GROUP_ID))) {
+                        c->cs_payload[1] = vir_MOV(c, vir_reg(QFILE_REG, 2));
+                }
+
+                /* Set up the division between gl_LocalInvocationIndex and
+                 * wg_in_mem in the payload reg.
+                 */
+                int wg_size = (c->s->info.cs.local_size[0] *
+                               c->s->info.cs.local_size[1] *
+                               c->s->info.cs.local_size[2]);
+                c->local_invocation_index_bits =
+                        ffs(util_next_power_of_two(MAX2(wg_size, 64))) - 1;
+                assert(c->local_invocation_index_bits <= 8);
+                break;
+        default:
+                break;
         }
 
         if (c->s->info.stage == MESA_SHADER_FRAGMENT)
@@ -2298,6 +2349,7 @@ const nir_shader_compiler_options v3d_nir_options = {
         .lower_bitfield_extract_to_shifts = true,
         .lower_bitfield_reverse = true,
         .lower_bit_count = true,
+        .lower_cs_local_id_from_index = true,
         .lower_pack_unorm_2x16 = true,
         .lower_pack_snorm_2x16 = true,
         .lower_pack_unorm_4x8 = true,
index a5f99d3dae8eb8899bdb3f50eeaf4f0584a6a596..c61e0c951569de6e8f736be76265d225a478841e 100644 (file)
@@ -259,6 +259,11 @@ enum quniform_contents {
 
         QUNIFORM_ALPHA_REF,
 
+        /* Number of workgroups passed to glDispatchCompute in the dimension
+         * selected by the data value.
+         */
+        QUNIFORM_NUM_WORK_GROUPS,
+
         /**
          * Returns the the offset of the scratch buffer for register spilling.
          */
@@ -540,6 +545,9 @@ struct v3d_compile {
         /* Fragment shader payload regs. */
         struct qreg payload_w, payload_w_centroid, payload_z;
 
+        struct qreg cs_payload[2];
+        int local_invocation_index_bits;
+
         uint8_t vattr_sizes[V3D_MAX_VS_INPUTS];
         uint32_t num_vpm_writes;
 
index 55a0212332219dd0ed3b52aaf4fa6aeff21d382e..10105fbd8611e9ea7b2ff91ddd40097dc372ffcb 100644 (file)
@@ -639,6 +639,7 @@ v3d_lower_nir(struct v3d_compile *c)
         }
 
         NIR_PASS_V(c->s, nir_lower_tex, &tex_options);
+        NIR_PASS_V(c->s, nir_lower_system_values);
 }
 
 static void
index 028e2b36c4b4c367d4dd850996d00173d8d1d329..0ec3070dedcfc4e03b0fb4512215ac544f951712 100644 (file)
@@ -108,6 +108,10 @@ vir_dump_uniform(enum quniform_contents contents,
                 fprintf(stderr, "ssbo_size[%d]", data);
                 break;
 
+        case QUNIFORM_NUM_WORK_GROUPS:
+                fprintf(stderr, "num_wg.%c", data < 3 ? "xyz"[data] : '?');
+                break;
+
         default:
                 if (quniform_contents_is_texture_p0(contents)) {
                         fprintf(stderr, "tex[%d].p0: 0x%08x",
index accc07a3a335ac2e4deed5ee143146fa4b98f3e3..79ab5acd7640a513664973a31db53d165c9ac242 100644 (file)
@@ -482,6 +482,7 @@ v3d_register_allocate(struct v3d_compile *c, bool *spilled)
                         case 0:
                         case 1:
                         case 2:
+                        case 3:
                                 /* Payload setup instructions: Force allocate
                                  * the dst to the given register (so the MOV
                                  * will disappear).