panfrost: XMLify invocations
authorAlyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Wed, 26 Aug 2020 17:04:17 +0000 (13:04 -0400)
committerMarge Bot <eric+marge@anholt.net>
Fri, 28 Aug 2020 14:53:53 +0000 (14:53 +0000)
Not so bad :)

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6476>

src/gallium/drivers/panfrost/pan_compute.c
src/gallium/drivers/panfrost/pan_context.c
src/panfrost/bifrost/test/bi_submit.c
src/panfrost/include/panfrost-job.h
src/panfrost/lib/decode.c
src/panfrost/lib/pan_blit.c
src/panfrost/lib/pan_encoder.h
src/panfrost/lib/pan_invocation.c

index 53a23bcb6a0cdd5f352b2eb7192231d762c48261..cef9ec46aaad219df77bde77094978759f04b2af 100644 (file)
@@ -105,6 +105,7 @@ panfrost_launch_grid(struct pipe_context *pipe,
 
         /* TODO: Stub */
         struct midgard_payload_vertex_tiler payload = { 0 };
 
         /* TODO: Stub */
         struct midgard_payload_vertex_tiler payload = { 0 };
+        struct mali_invocation_packed invocation;
         struct mali_draw_packed postfix;
 
         /* We implement OpenCL inputs as uniforms (or a UBO -- same thing), so
         struct mali_draw_packed postfix;
 
         /* We implement OpenCL inputs as uniforms (or a UBO -- same thing), so
@@ -139,12 +140,13 @@ panfrost_launch_grid(struct pipe_context *pipe,
 
         /* Invoke according to the grid info */
 
 
         /* Invoke according to the grid info */
 
-        panfrost_pack_work_groups_compute(&payload.prefix,
+        panfrost_pack_work_groups_compute(&invocation,
                                           info->grid[0], info->grid[1],
                                           info->grid[2],
                                           info->block[0], info->block[1],
                                           info->block[2],
                                           false);
                                           info->grid[0], info->grid[1],
                                           info->grid[2],
                                           info->block[0], info->block[1],
                                           info->block[2],
                                           false);
+        payload.prefix.invocation = invocation;
 
         panfrost_new_job(&batch->pool, &batch->scoreboard,
                         MALI_JOB_TYPE_COMPUTE, true, 0, &payload,
 
         panfrost_new_job(&batch->pool, &batch->scoreboard,
                         MALI_JOB_TYPE_COMPUTE, true, 0, &payload,
index 4da307dcfcd6ca6d43451684205b831b42dd28d6..640fe9ee30918802f14bca0432ff44c5bfc81aa1 100644 (file)
@@ -327,6 +327,7 @@ panfrost_draw_vbo(
         struct mali_vertex_tiler_prefix vertex_prefix = { 0 }, tiler_prefix = { 0 };
         struct mali_draw_packed vertex_postfix, tiler_postfix;
         struct mali_primitive_packed primitive;
         struct mali_vertex_tiler_prefix vertex_prefix = { 0 }, tiler_prefix = { 0 };
         struct mali_draw_packed vertex_postfix, tiler_postfix;
         struct mali_primitive_packed primitive;
+        struct mali_invocation_packed invocation;
         union midgard_primitive_size primitive_size;
         unsigned vertex_count = ctx->vertex_count;
 
         union midgard_primitive_size primitive_size;
         unsigned vertex_count = ctx->vertex_count;
 
@@ -373,9 +374,12 @@ panfrost_draw_vbo(
 
         panfrost_statistics_record(ctx, info);
 
 
         panfrost_statistics_record(ctx, info);
 
-        panfrost_pack_work_groups_fused(&vertex_prefix, &tiler_prefix,
+        panfrost_pack_work_groups_compute(&invocation,
                                         1, vertex_count, info->instance_count,
                                         1, vertex_count, info->instance_count,
-                                        1, 1, 1);
+                                        1, 1, 1, true);
+
+        vertex_prefix.invocation = invocation;
+        tiler_prefix.invocation = invocation;
 
         /* Emit all sort of descriptors. */
         mali_ptr varyings = 0, vs_vary = 0, fs_vary = 0, pos = 0, psiz = 0;
 
         /* Emit all sort of descriptors. */
         mali_ptr varyings = 0, vs_vary = 0, fs_vary = 0, pos = 0, psiz = 0;
index 8b46569971fb3547a5134bbb57acf438268016ec..73c971df4dd281dc93960b9f0b5adcee811e3710 100644 (file)
@@ -199,11 +199,15 @@ bit_vertex(struct panfrost_device *dev, panfrost_program prog,
                 },
         };
 
                 },
         };
 
-        panfrost_pack_work_groups_compute(&payload.prefix,
+        struct mali_invocation_packed invocation;
+
+        panfrost_pack_work_groups_compute(&invocation,
                         1, 1, 1,
                         1, 1, 1,
                         true);
 
                         1, 1, 1,
                         1, 1, 1,
                         true);
 
+        payload.prefix.invocation = invocation;
+
         struct panfrost_bo *bos[] = {
                 scratchpad, shmem, shader, shader_desc, ubo, var, attr
         };
         struct panfrost_bo *bos[] = {
                 scratchpad, shmem, shader, shader_desc, ubo, var, attr
         };
index 8cb30ed0cbfb4538fdac0cec272fcc149c1230c2..879023eacb417f2af2040a44f09ca23667570406 100644 (file)
@@ -421,42 +421,7 @@ struct mali_payload_write_value {
  */
 
 struct mali_vertex_tiler_prefix {
  */
 
 struct mali_vertex_tiler_prefix {
-        /* This is a dynamic bitfield containing the following things in this order:
-         *
-         * - gl_WorkGroupSize.x
-         * - gl_WorkGroupSize.y
-         * - gl_WorkGroupSize.z
-         * - gl_NumWorkGroups.x
-         * - gl_NumWorkGroups.y
-         * - gl_NumWorkGroups.z
-         *
-         * The number of bits allocated for each number is based on the *_shift
-         * fields below. For example, workgroups_y_shift gives the bit that
-         * gl_NumWorkGroups.y starts at, and workgroups_z_shift gives the bit
-         * that gl_NumWorkGroups.z starts at (and therefore one after the bit
-         * that gl_NumWorkGroups.y ends at). The actual value for each gl_*
-         * value is one more than the stored value, since if any of the values
-         * are zero, then there would be no invocations (and hence no job). If
-         * there were 0 bits allocated to a given field, then it must be zero,
-         * and hence the real value is one.
-         *
-         * Vertex jobs reuse the same job dispatch mechanism as compute jobs,
-         * effectively doing glDispatchCompute(1, vertex_count, instance_count)
-         * where vertex count is the number of vertices.
-         */
-        u32 invocation_count;
-
-        /* Bitfield for shifts:
-         *
-         * size_y_shift : 5
-         * size_z_shift : 5
-         * workgroups_x_shift : 6
-         * workgroups_y_shift : 6
-         * workgroups_z_shift : 6
-         * workgroups_x_shift_2 : 4
-         */
-        u32 invocation_shifts;
-
+        struct mali_invocation_packed invocation;
         struct mali_primitive_packed primitive;
 } __attribute__((packed));
 
         struct mali_primitive_packed primitive;
 } __attribute__((packed));
 
index 78b0a542ae53c4fc91c946452e7372a36e2ad4f4..c6e23b1eda91caca6c17261f3798dba38e080ae2 100644 (file)
@@ -1248,21 +1248,17 @@ pandecode_vertex_tiler_prefix(struct mali_vertex_tiler_prefix *p, int job_no, bo
         /* Decode invocation_count. See the comment before the definition of
          * invocation_count for an explanation.
          */
         /* Decode invocation_count. See the comment before the definition of
          * invocation_count for an explanation.
          */
+        struct MALI_INVOCATION invocation;
+        struct mali_invocation_packed invocation_packed = p->invocation;
+        MALI_INVOCATION_unpack((const uint8_t *) &invocation_packed, &invocation);
 
 
-        unsigned size_y_shift = bits(p->invocation_shifts, 0, 5);
-        unsigned size_z_shift = bits(p->invocation_shifts, 5, 10);
-        unsigned workgroups_x_shift = bits(p->invocation_shifts, 10, 16);
-        unsigned workgroups_y_shift = bits(p->invocation_shifts, 16, 22);
-        unsigned workgroups_z_shift = bits(p->invocation_shifts, 22, 28);
-        unsigned workgroups_x_shift_2 = bits(p->invocation_shifts, 28, 32);
+        unsigned size_x = bits(invocation.invocations, 0, invocation.size_y_shift) + 1;
+        unsigned size_y = bits(invocation.invocations, invocation.size_y_shift, invocation.size_z_shift) + 1;
+        unsigned size_z = bits(invocation.invocations, invocation.size_z_shift, invocation.workgroups_x_shift) + 1;
 
 
-        unsigned size_x = bits(p->invocation_count, 0, size_y_shift) + 1;
-        unsigned size_y = bits(p->invocation_count, size_y_shift, size_z_shift) + 1;
-        unsigned size_z = bits(p->invocation_count, size_z_shift, workgroups_x_shift) + 1;
-
-        unsigned groups_x = bits(p->invocation_count, workgroups_x_shift, workgroups_y_shift) + 1;
-        unsigned groups_y = bits(p->invocation_count, workgroups_y_shift, workgroups_z_shift) + 1;
-        unsigned groups_z = bits(p->invocation_count, workgroups_z_shift, 32) + 1;
+        unsigned groups_x = bits(invocation.invocations, invocation.workgroups_x_shift, invocation.workgroups_y_shift) + 1;
+        unsigned groups_y = bits(invocation.invocations, invocation.workgroups_y_shift, invocation.workgroups_z_shift) + 1;
+        unsigned groups_z = bits(invocation.invocations, invocation.workgroups_z_shift, 32) + 1;
 
         /* Even though we have this decoded, we want to ensure that the
          * representation is "unique" so we don't lose anything by printing only
 
         /* Even though we have this decoded, we want to ensure that the
          * representation is "unique" so we don't lose anything by printing only
@@ -1272,30 +1268,17 @@ pandecode_vertex_tiler_prefix(struct mali_vertex_tiler_prefix *p, int job_no, bo
          * decode and pack it ourselves! If it is bit exact with what we
          * decoded, we're good to go. */
 
          * decode and pack it ourselves! If it is bit exact with what we
          * decoded, we're good to go. */
 
-        struct mali_vertex_tiler_prefix ref;
+        struct mali_invocation_packed ref;
         panfrost_pack_work_groups_compute(&ref, groups_x, groups_y, groups_z, size_x, size_y, size_z, graphics);
 
         panfrost_pack_work_groups_compute(&ref, groups_x, groups_y, groups_z, size_x, size_y, size_z, graphics);
 
-        bool canonical =
-                (p->invocation_count == ref.invocation_count) &&
-                (p->invocation_shifts == ref.invocation_shifts);
-
-        if (!canonical) {
+        if (memcmp(&ref, &invocation_packed, sizeof(ref))) {
                 pandecode_msg("XXX: non-canonical workgroups packing\n");
                 pandecode_msg("XXX: non-canonical workgroups packing\n");
-                pandecode_msg("expected: %X, %X",
-                                ref.invocation_count,
-                                ref.invocation_shifts);
-
-                pandecode_prop("invocation_count = 0x%" PRIx32, p->invocation_count);
-                pandecode_prop("size_y_shift = %d", size_y_shift);
-                pandecode_prop("size_z_shift = %d", size_z_shift);
-                pandecode_prop("workgroups_x_shift = %d", workgroups_x_shift);
-                pandecode_prop("workgroups_y_shift = %d", workgroups_y_shift);
-                pandecode_prop("workgroups_z_shift = %d", workgroups_z_shift);
-                pandecode_prop("workgroups_x_shift_2 = %d", workgroups_x_shift_2);
+                MALI_INVOCATION_print(pandecode_dump_stream, &invocation, 1 * 2);
         }
 
         /* Regardless, print the decode */
         }
 
         /* Regardless, print the decode */
-        pandecode_msg("size (%d, %d, %d), count (%d, %d, %d)\n",
+        fprintf(pandecode_dump_stream,
+                        "Invocation (%d, %d, %d) x (%d, %d, %d)\n",
                         size_x, size_y, size_z,
                         groups_x, groups_y, groups_z);
 
                         size_x, size_y, size_z,
                         groups_x, groups_y, groups_z);
 
index f9ca87ea58806b477f412e8c8c8dcbb091053c01..79a91200d8e60400bc62b12e41ec124d5486a5cf 100644 (file)
@@ -346,6 +346,7 @@ panfrost_load_midg(
         struct midgard_payload_vertex_tiler payload = {};
         struct mali_primitive_packed primitive;
         struct mali_draw_packed draw;
         struct midgard_payload_vertex_tiler payload = {};
         struct mali_primitive_packed primitive;
         struct mali_draw_packed draw;
+        struct mali_invocation_packed invocation;
 
         pan_pack(&draw, DRAW, cfg) {
                 cfg.unknown_1 = 0x7;
 
         pan_pack(&draw, DRAW, cfg) {
                 cfg.unknown_1 = 0x7;
@@ -365,10 +366,11 @@ panfrost_load_midg(
                 cfg.unknown_3 = 6;
         }
 
                 cfg.unknown_3 = 6;
         }
 
-        memcpy(&payload.prefix.primitive, &primitive, MALI_DRAW_LENGTH);
-        memcpy(&payload.postfix, &draw, MALI_DRAW_LENGTH);
+        panfrost_pack_work_groups_compute(&invocation, 1, vertex_count, 1, 1, 1, 1, true);
 
 
-        panfrost_pack_work_groups_compute(&payload.prefix, 1, vertex_count, 1, 1, 1, 1, true);
+        payload.prefix.primitive = primitive;
+        memcpy(&payload.postfix, &draw, MALI_DRAW_LENGTH);
+        payload.prefix.invocation = invocation;
 
         panfrost_new_job(pool, scoreboard, MALI_JOB_TYPE_TILER, false, 0, &payload, sizeof(payload), true);
 }
 
         panfrost_new_job(pool, scoreboard, MALI_JOB_TYPE_TILER, false, 0, &payload, sizeof(payload), true);
 }
index 0471701dbfb5524458478fd6e9a1a898d9c048a0..9433f02de611e9a18ead79c6956c0ec69c749429 100644 (file)
@@ -34,7 +34,7 @@
 
 void
 panfrost_pack_work_groups_compute(
 
 void
 panfrost_pack_work_groups_compute(
-        struct mali_vertex_tiler_prefix *out,
+        struct mali_invocation_packed *out,
         unsigned num_x,
         unsigned num_y,
         unsigned num_z,
         unsigned num_x,
         unsigned num_y,
         unsigned num_z,
@@ -43,17 +43,6 @@ panfrost_pack_work_groups_compute(
         unsigned size_z,
         bool quirk_graphics);
 
         unsigned size_z,
         bool quirk_graphics);
 
-void
-panfrost_pack_work_groups_fused(
-        struct mali_vertex_tiler_prefix *vertex,
-        struct mali_vertex_tiler_prefix *tiler,
-        unsigned num_x,
-        unsigned num_y,
-        unsigned num_z,
-        unsigned size_x,
-        unsigned size_y,
-        unsigned size_z);
-
 /* Tiler structure size computation */
 
 unsigned
 /* Tiler structure size computation */
 
 unsigned
index cfb5becbf614106f57981d1a6b6d64d069ac4142..4c0f64578511e8395984d885db5c54c2f6c892da 100644 (file)
@@ -41,7 +41,7 @@
 
 void
 panfrost_pack_work_groups_compute(
 
 void
 panfrost_pack_work_groups_compute(
-        struct mali_vertex_tiler_prefix *out,
+        struct mali_invocation_packed *out,
         unsigned num_x,
         unsigned num_y,
         unsigned num_z,
         unsigned num_x,
         unsigned num_y,
         unsigned num_z,
@@ -77,53 +77,24 @@ panfrost_pack_work_groups_compute(
                 shifts[i + 1] = shifts[i] + bit_count;
         }
 
                 shifts[i + 1] = shifts[i] + bit_count;
         }
 
-        /* Quirk: for non-instanced graphics, the blob sets workgroups_z_shift
-         * = 32. This doesn't appear to matter to the hardware, but it's good
-         * to be bit-identical. */
+        pan_pack(out, INVOCATION, cfg) {
+                cfg.invocations = packed;
+                cfg.size_y_shift = shifts[1];
+                cfg.size_z_shift = shifts[2];
+                cfg.workgroups_x_shift = shifts[3];
+                cfg.workgroups_y_shift = shifts[4];
+                cfg.workgroups_z_shift = shifts[5];
 
 
-        if (quirk_graphics && (num_z <= 1))
-                shifts[5] = 32;
+                /* Quirk: for non-instanced graphics, the blob sets
+                 * workgroups_z_shift = 32. This doesn't appear to matter to
+                 * the hardware, but it's good to be bit-identical. */
 
 
-        /* Quirk: for graphics, workgroups_x_shift_2 must be at least 2,
-         * whereas for OpenCL it is simply equal to workgroups_x_shift. For GL
-         * compute, it is always 2 if no barriers are in use, but is equal to
-         * workgroups_x_shift is barriers are in use. */
+                if (quirk_graphics && (num_z <= 1))
+                        cfg.workgroups_z_shift = 32;
 
 
-        unsigned shift_2 = shifts[3];
+                /* Quirk: for graphics, >= 2.  For compute, 2 without barriers
+                 * but equal to workgroups_x_shift with barriers */
 
 
-        if (quirk_graphics)
-                shift_2 = MAX2(shift_2, 2);
-
-        /* Pack them in */
-        uint32_t packed_shifts =
-                (shifts[1] << 0) |
-                (shifts[2] << 5) |
-                (shifts[3] << 10) |
-                (shifts[4] << 16) |
-                (shifts[5] << 22) |
-                (shift_2 << 28);
-
-        /* Upload the packed bitfields */
-        out->invocation_count = packed;
-        out->invocation_shifts = packed_shifts;
-}
-
-/* Packs vertex/tiler descriptors simultaneously */
-void
-panfrost_pack_work_groups_fused(
-        struct mali_vertex_tiler_prefix *vertex,
-        struct mali_vertex_tiler_prefix *tiler,
-        unsigned num_x,
-        unsigned num_y,
-        unsigned num_z,
-        unsigned size_x,
-        unsigned size_y,
-        unsigned size_z)
-{
-        panfrost_pack_work_groups_compute(vertex, num_x, num_y, num_z, size_x, size_y, size_z, true);
-
-        /* Copy results over */
-        tiler->invocation_count = vertex->invocation_count;
-        tiler->invocation_shifts = vertex->invocation_shifts;
+                cfg.unknown_shift = quirk_graphics ? 2 : cfg.workgroups_x_shift;
+        }
 }
 }
-