panfrost: XMLify Bifrost preload
authorAlyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Fri, 21 Aug 2020 00:42:32 +0000 (20:42 -0400)
committerTomeu Vizoso <tomeu.vizoso@collabora.com>
Tue, 25 Aug 2020 15:05:36 +0000 (17:05 +0200)
There's a lot of code here since the meaning of this field changes
depending on shader state. The good news is that our careful handling
allows preload registers to be decoded now, which pandecode could not
previously do. Likewise, the cmdstream code to emit this is now much
more obvious.

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Reviewed-by: Tomeu Vizoso <tomeu.vizoso@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6440>

src/gallium/drivers/panfrost/pan_cmdstream.c
src/panfrost/bifrost/test/bi_submit.c
src/panfrost/include/panfrost-job.h
src/panfrost/lib/decode.c
src/panfrost/lib/midgard.xml

index 8f9771ba3bc47980e39f55cabcff58e16be29871..620425196ba5ce1f9540b2e9fc733e69414eedd7 100644 (file)
@@ -323,16 +323,22 @@ panfrost_emit_compute_shader(struct panfrost_context *ctx,
 
         if (dev->quirks & IS_BIFROST) {
                 struct mali_bifrost_properties_packed prop;
 
         if (dev->quirks & IS_BIFROST) {
                 struct mali_bifrost_properties_packed prop;
+                struct mali_preload_vertex_packed preload;
 
                 pan_pack(&prop, BIFROST_PROPERTIES, cfg) {
                         cfg.unknown = 0x800000; /* XXX */
                         cfg.uniform_buffer_count = panfrost_ubo_count(ctx, st);
                 }
 
 
                 pan_pack(&prop, BIFROST_PROPERTIES, cfg) {
                         cfg.unknown = 0x800000; /* XXX */
                         cfg.uniform_buffer_count = panfrost_ubo_count(ctx, st);
                 }
 
-                memcpy(&meta->bifrost_props, &prop, sizeof(prop));
+                /* TODO: True compute shaders */
+                pan_pack(&preload, PRELOAD_VERTEX, cfg) {
+                        cfg.uniform_count = ss->uniform_count;
+                        cfg.vertex_id = true;
+                        cfg.instance_id = true;
+                }
 
 
-                meta->bifrost2.preload_regs = 0xC0;
-                meta->bifrost2.uniform_count = ss->uniform_count;
+                memcpy(&meta->bifrost_props, &prop, sizeof(prop));
+                memcpy(&meta->bifrost_preload, &preload, sizeof(preload));
         } else {
                 struct mali_midgard_properties_packed prop;
 
         } else {
                 struct mali_midgard_properties_packed prop;
 
@@ -572,6 +578,7 @@ panfrost_emit_frag_shader(struct panfrost_context *ctx,
 
         if (dev->quirks & IS_BIFROST) {
                 struct mali_bifrost_properties_packed prop;
 
         if (dev->quirks & IS_BIFROST) {
                 struct mali_bifrost_properties_packed prop;
+                struct mali_preload_fragment_packed preload;
 
                 bool no_blend = true;
 
 
                 bool no_blend = true;
 
@@ -584,12 +591,13 @@ panfrost_emit_frag_shader(struct panfrost_context *ctx,
                         cfg.early_z_enable = !fs->can_discard && !fs->writes_depth && no_blend;
                 }
 
                         cfg.early_z_enable = !fs->can_discard && !fs->writes_depth && no_blend;
                 }
 
-                memcpy(&fragmeta->bifrost_props, &prop, sizeof(prop));
-
-                fragmeta->bifrost2.preload_regs = 0x1;
-                SET_BIT(fragmeta->bifrost2.preload_regs, 0x10, fs->reads_frag_coord);
+                pan_pack(&preload, PRELOAD_FRAGMENT, cfg) {
+                        cfg.uniform_count = fs->uniform_count;
+                        cfg.fragment_position = fs->reads_frag_coord;
+                }
 
 
-                fragmeta->bifrost2.uniform_count = fs->uniform_count;
+                memcpy(&fragmeta->bifrost_props, &prop, sizeof(prop));
+                memcpy(&fragmeta->bifrost_preload, &preload, sizeof(preload));
         } else {
                 struct mali_midgard_properties_packed prop;
 
         } else {
                 struct mali_midgard_properties_packed prop;
 
index 3688e2be1a586632d2211844631c9f25a5da8412..542ed72c16ccf33a129a8733e6fc9d18efdea532 100644 (file)
@@ -176,12 +176,7 @@ bit_vertex(struct panfrost_device *dev, panfrost_program prog,
                 .attribute_count = 1,
                 .varying_count = 1,
                 .bifrost_props = { .opaque = { 0x80020001 } },
                 .attribute_count = 1,
                 .varying_count = 1,
                 .bifrost_props = { .opaque = { 0x80020001 } },
-                .bifrost2 = {
-                        .unk3 = 0x0,
-                        .preload_regs = 0xc0,
-                        .uniform_count = sz_ubo / 16,
-                        .unk4 = 0x0,
-                },
+                .bifrost_preload = { .opaque = { (sz_ubo / 16) << 15 } },
         };
 
         memcpy(shader_desc->cpu, &meta, sizeof(meta));
         };
 
         memcpy(shader_desc->cpu, &meta, sizeof(meta));
index af70f56be3ab8ce730938678ca814377a934604e..e2267069860b1028362b515208c623b15c6a32c6 100644 (file)
@@ -326,52 +326,7 @@ struct mali_shader_meta {
         struct mali_stencil_packed stencil_back;
 
         union {
         struct mali_stencil_packed stencil_back;
 
         union {
-                struct {
-                        u32 unk3 : 7;
-                        /* On Bifrost, some system values are preloaded in
-                         * registers R55-R62 by the thread dispatcher prior to
-                         * the start of shader execution. This is a bitfield
-                         * with one entry for each register saying which
-                         * registers need to be preloaded. Right now, the known
-                         * values are:
-                         *
-                         * Vertex/compute:
-                         * - R55 : gl_LocalInvocationID.xy
-                         * - R56 : gl_LocalInvocationID.z + unknown in high 16 bits
-                         * - R57 : gl_WorkGroupID.x
-                         * - R58 : gl_WorkGroupID.y
-                         * - R59 : gl_WorkGroupID.z
-                         * - R60 : gl_GlobalInvocationID.x
-                         * - R61 : gl_GlobalInvocationID.y/gl_VertexID (without base)
-                         * - R62 : gl_GlobalInvocationID.z/gl_InstanceID (without base)
-                         *
-                         * Fragment:
-                         * - R55 : unknown, never seen (but the bit for this is
-                         *   always set?)
-                         * - R56 : unknown (bit always unset)
-                         * - R57 : gl_PrimitiveID
-                         * - R58 : gl_FrontFacing in low bit, potentially other stuff
-                         * - R59 : u16 fragment coordinates (used to compute
-                         *   gl_FragCoord.xy, together with sample positions)
-                         * - R60 : gl_SampleMask (used in epilog, so pretty
-                         *   much always used, but the bit is always 0 -- is
-                         *   this just always pushed?)
-                         * - R61 : gl_SampleMaskIn and gl_SampleID, used by
-                         *   varying interpolation.
-                         * - R62 : unknown (bit always unset).
-                         *
-                         * Later GPUs (starting with Mali-G52?) support
-                         * preloading float varyings into r0-r7. This is
-                         * indicated by setting 0x40. There is no distinction
-                         * here between 1 varying and 2.
-                         */
-                        u32 preload_regs : 8;
-                        /* In units of 8 bytes or 64 bits, since the
-                         * uniform/const port loads 64 bits at a time.
-                         */
-                        u32 uniform_count : 7;
-                        u32 unk4 : 10; // = 2
-                } bifrost2;
+                struct mali_preload_packed bifrost_preload;
                 struct {
                         u32 unknown2_7;
                 } midgard2;
                 struct {
                         u32 unknown2_7;
                 } midgard2;
index 6b2e71ae12bd67ec0cb8e6ed9b639c2d7c7c078f..fcfc590745181f1818dd647641dc82a618b765c6 100644 (file)
@@ -1731,6 +1731,7 @@ pandecode_vertex_tiler_postfix_pre(
 
                 struct MALI_MIDGARD_PROPERTIES midg_props;
                 struct MALI_BIFROST_PROPERTIES bi_props;
 
                 struct MALI_MIDGARD_PROPERTIES midg_props;
                 struct MALI_BIFROST_PROPERTIES bi_props;
+                struct MALI_PRELOAD bi_preload;
 
                 pandecode_log("struct mali_shader_meta shader_meta_%"PRIx64"_%d%s = {\n", p->shader, job_no, suffix);
                 pandecode_indent++;
 
                 pandecode_log("struct mali_shader_meta shader_meta_%"PRIx64"_%d%s = {\n", p->shader, job_no, suffix);
                 pandecode_indent++;
@@ -1745,7 +1746,10 @@ pandecode_vertex_tiler_postfix_pre(
                         uint32_t opaque = s->bifrost_props.opaque[0];
                         MALI_BIFROST_PROPERTIES_unpack((const uint8_t *) &opaque, &bi_props);
 
                         uint32_t opaque = s->bifrost_props.opaque[0];
                         MALI_BIFROST_PROPERTIES_unpack((const uint8_t *) &opaque, &bi_props);
 
-                        uniform_count = s->bifrost2.uniform_count;
+                        opaque = s->bifrost_preload.opaque[0];
+                        MALI_PRELOAD_unpack((const uint8_t *) &opaque, &bi_preload);
+
+                        uniform_count = bi_preload.uniform_count;
                         uniform_buffer_count = bi_props.uniform_buffer_count;
                 } else {
                         uint32_t opaque = s->midgard_props.opaque[0];
                         uniform_buffer_count = bi_props.uniform_buffer_count;
                 } else {
                         uint32_t opaque = s->midgard_props.opaque[0];
@@ -1767,6 +1771,24 @@ pandecode_vertex_tiler_postfix_pre(
                 else
                         MALI_MIDGARD_PROPERTIES_print(pandecode_dump_stream, &midg_props, 2);
 
                 else
                         MALI_MIDGARD_PROPERTIES_print(pandecode_dump_stream, &midg_props, 2);
 
+                if (is_bifrost) {
+                        uint32_t opaque = s->bifrost_preload.opaque[0];
+                        switch (job_type) {
+                        case MALI_JOB_TYPE_VERTEX:
+                                DUMP_CL("Preload", PRELOAD_VERTEX, &opaque, 2);
+                                break;
+                        case MALI_JOB_TYPE_TILER:
+                                DUMP_CL("Preload", PRELOAD_FRAGMENT, &opaque, 2);
+                                break;
+                        case MALI_JOB_TYPE_COMPUTE:
+                                DUMP_CL("Preload", PRELOAD_COMPUTE, &opaque, 2);
+                                break;
+                        default:
+                                DUMP_CL("Preload", PRELOAD, &opaque, 2);
+                                break;
+                        }
+                }
+
                 if (s->depth_units || s->depth_factor) {
                         pandecode_prop("depth_factor = %f", s->depth_factor);
                         pandecode_prop("depth_units = %f", s->depth_units);
                 if (s->depth_units || s->depth_factor) {
                         pandecode_prop("depth_factor = %f", s->depth_factor);
                         pandecode_prop("depth_units = %f", s->depth_units);
@@ -1809,18 +1831,7 @@ pandecode_vertex_tiler_postfix_pre(
                 DUMP_CL("Stencil front", STENCIL, &s->stencil_front, 1);
                 DUMP_CL("Stencil back", STENCIL, &s->stencil_back, 1);
 
                 DUMP_CL("Stencil front", STENCIL, &s->stencil_front, 1);
                 DUMP_CL("Stencil back", STENCIL, &s->stencil_back, 1);
 
-                if (is_bifrost) {
-                        pandecode_log(".bifrost2 = {\n");
-                        pandecode_indent++;
-
-                        pandecode_prop("unk3 = 0x%" PRIx32, s->bifrost2.unk3);
-                        pandecode_prop("preload_regs = 0x%" PRIx32, s->bifrost2.preload_regs);
-                        pandecode_prop("uniform_count = %" PRId32, s->bifrost2.uniform_count);
-                        pandecode_prop("unk4 = 0x%" PRIx32, s->bifrost2.unk4);
-
-                        pandecode_indent--;
-                        pandecode_log("},\n");
-                } else if (s->midgard2.unknown2_7) {
+                if (!is_bifrost && s->midgard2.unknown2_7) {
                         pandecode_log(".midgard2 = {\n");
                         pandecode_indent++;
 
                         pandecode_log(".midgard2 = {\n");
                         pandecode_indent++;
 
index 352398b515f3e203ae83ab5b79d81edef005f6b9..d6fa7e4a134c0ae7374c888fdf32eaf29b822b70 100644 (file)
     <field name="Unknown" size="32" start="0" type="uint"/>
   </struct>
 
     <field name="Unknown" size="32" start="0" type="uint"/>
   </struct>
 
+  <struct name="Preload" size="1">
+    <field name="Untyped" size="15" start="0" type="uint"/>
+    <field name="Uniform count" size="7" start="15" type="uint"/>
+  </struct>
+
+  <struct name="Preload Compute" size="1">
+    <field name="Local Invocation XY" size="1" start="7" type="bool"/>
+    <field name="Local Invocation Z" size="1" start="8" type="bool"/>
+    <field name="Work group X" size="1" start="9" type="bool"/>
+    <field name="Work group Y" size="1" start="10" type="bool"/>
+    <field name="Work group Z" size="1" start="11" type="bool"/>
+    <field name="Global Invocation X" size="1" start="12" type="bool"/>
+    <field name="Global Invocation Y" size="1" start="13" type="bool"/>
+    <field name="Global Invocation Z" size="1" start="14" type="bool"/>
+    <field name="Uniform count" size="7" start="15" type="uint"/>
+  </struct>
+
+  <struct name="Preload Vertex" size="1">
+    <field name="Vertex ID" size="1" start="13" type="bool"/>
+    <field name="Instance ID" size="1" start="14" type="bool"/>
+    <field name="Uniform count" size="7" start="15" type="uint"/>
+  </struct>
+
+  <struct name="Preload Fragment" size="1">
+    <field name="Unknown" size="1" start="7" type="bool" default="true"/>
+    <field name="Primitive ID" size="1" start="9" type="bool"/>
+    <field name="Front facing" size="1" start="10" type="bool"/>
+    <field name="Fragment position" size="1" start="11" type="bool"/>
+    <field name="Sample mask/ID" size="1" start="12" type="bool"/>
+    <field name="Uniform count" size="7" start="15" type="uint"/>
+  </struct>
+
   <struct name="Stencil">
     <field name="Reference Value" size="8" start="0" type="uint"/>
     <field name="Mask" size="8" start="8" type="uint" default="0xFF"/>
   <struct name="Stencil">
     <field name="Reference Value" size="8" start="0" type="uint"/>
     <field name="Mask" size="8" start="8" type="uint" default="0xFF"/>