From 7a95ed2ecf4883bb637dc865aeb28ff393480ecc Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Thu, 20 Aug 2020 20:42:32 -0400 Subject: [PATCH] panfrost: XMLify Bifrost preload There's a lot of code here since the meaning of this field changes depending on shader state. The good news is that our careful handling allows preload registers to be decoded now, which pandecode could not previously do. Likewise, the cmdstream code to emit this is now much more obvious. Signed-off-by: Alyssa Rosenzweig Reviewed-by: Tomeu Vizoso Part-of: --- src/gallium/drivers/panfrost/pan_cmdstream.c | 24 ++++++---- src/panfrost/bifrost/test/bi_submit.c | 7 +-- src/panfrost/include/panfrost-job.h | 47 +------------------- src/panfrost/lib/decode.c | 37 +++++++++------ src/panfrost/lib/midgard.xml | 32 +++++++++++++ 5 files changed, 74 insertions(+), 73 deletions(-) diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c b/src/gallium/drivers/panfrost/pan_cmdstream.c index 8f9771ba3bc..620425196ba 100644 --- a/src/gallium/drivers/panfrost/pan_cmdstream.c +++ b/src/gallium/drivers/panfrost/pan_cmdstream.c @@ -323,16 +323,22 @@ panfrost_emit_compute_shader(struct panfrost_context *ctx, if (dev->quirks & IS_BIFROST) { struct mali_bifrost_properties_packed prop; + struct mali_preload_vertex_packed preload; pan_pack(&prop, BIFROST_PROPERTIES, cfg) { cfg.unknown = 0x800000; /* XXX */ cfg.uniform_buffer_count = panfrost_ubo_count(ctx, st); } - memcpy(&meta->bifrost_props, &prop, sizeof(prop)); + /* TODO: True compute shaders */ + pan_pack(&preload, PRELOAD_VERTEX, cfg) { + cfg.uniform_count = ss->uniform_count; + cfg.vertex_id = true; + cfg.instance_id = true; + } - meta->bifrost2.preload_regs = 0xC0; - meta->bifrost2.uniform_count = ss->uniform_count; + memcpy(&meta->bifrost_props, &prop, sizeof(prop)); + memcpy(&meta->bifrost_preload, &preload, sizeof(preload)); } else { struct mali_midgard_properties_packed prop; @@ -572,6 +578,7 @@ panfrost_emit_frag_shader(struct panfrost_context *ctx, if (dev->quirks & IS_BIFROST) { struct mali_bifrost_properties_packed prop; + struct mali_preload_fragment_packed preload; bool no_blend = true; @@ -584,12 +591,13 @@ panfrost_emit_frag_shader(struct panfrost_context *ctx, cfg.early_z_enable = !fs->can_discard && !fs->writes_depth && no_blend; } - memcpy(&fragmeta->bifrost_props, &prop, sizeof(prop)); - - fragmeta->bifrost2.preload_regs = 0x1; - SET_BIT(fragmeta->bifrost2.preload_regs, 0x10, fs->reads_frag_coord); + pan_pack(&preload, PRELOAD_FRAGMENT, cfg) { + cfg.uniform_count = fs->uniform_count; + cfg.fragment_position = fs->reads_frag_coord; + } - fragmeta->bifrost2.uniform_count = fs->uniform_count; + memcpy(&fragmeta->bifrost_props, &prop, sizeof(prop)); + memcpy(&fragmeta->bifrost_preload, &preload, sizeof(preload)); } else { struct mali_midgard_properties_packed prop; diff --git a/src/panfrost/bifrost/test/bi_submit.c b/src/panfrost/bifrost/test/bi_submit.c index 3688e2be1a5..542ed72c16c 100644 --- a/src/panfrost/bifrost/test/bi_submit.c +++ b/src/panfrost/bifrost/test/bi_submit.c @@ -176,12 +176,7 @@ bit_vertex(struct panfrost_device *dev, panfrost_program prog, .attribute_count = 1, .varying_count = 1, .bifrost_props = { .opaque = { 0x80020001 } }, - .bifrost2 = { - .unk3 = 0x0, - .preload_regs = 0xc0, - .uniform_count = sz_ubo / 16, - .unk4 = 0x0, - }, + .bifrost_preload = { .opaque = { (sz_ubo / 16) << 15 } }, }; memcpy(shader_desc->cpu, &meta, sizeof(meta)); diff --git a/src/panfrost/include/panfrost-job.h b/src/panfrost/include/panfrost-job.h index af70f56be3a..e2267069860 100644 --- a/src/panfrost/include/panfrost-job.h +++ b/src/panfrost/include/panfrost-job.h @@ -326,52 +326,7 @@ struct mali_shader_meta { struct mali_stencil_packed stencil_back; union { - struct { - u32 unk3 : 7; - /* On Bifrost, some system values are preloaded in - * registers R55-R62 by the thread dispatcher prior to - * the start of shader execution. This is a bitfield - * with one entry for each register saying which - * registers need to be preloaded. Right now, the known - * values are: - * - * Vertex/compute: - * - R55 : gl_LocalInvocationID.xy - * - R56 : gl_LocalInvocationID.z + unknown in high 16 bits - * - R57 : gl_WorkGroupID.x - * - R58 : gl_WorkGroupID.y - * - R59 : gl_WorkGroupID.z - * - R60 : gl_GlobalInvocationID.x - * - R61 : gl_GlobalInvocationID.y/gl_VertexID (without base) - * - R62 : gl_GlobalInvocationID.z/gl_InstanceID (without base) - * - * Fragment: - * - R55 : unknown, never seen (but the bit for this is - * always set?) - * - R56 : unknown (bit always unset) - * - R57 : gl_PrimitiveID - * - R58 : gl_FrontFacing in low bit, potentially other stuff - * - R59 : u16 fragment coordinates (used to compute - * gl_FragCoord.xy, together with sample positions) - * - R60 : gl_SampleMask (used in epilog, so pretty - * much always used, but the bit is always 0 -- is - * this just always pushed?) - * - R61 : gl_SampleMaskIn and gl_SampleID, used by - * varying interpolation. - * - R62 : unknown (bit always unset). - * - * Later GPUs (starting with Mali-G52?) support - * preloading float varyings into r0-r7. This is - * indicated by setting 0x40. There is no distinction - * here between 1 varying and 2. - */ - u32 preload_regs : 8; - /* In units of 8 bytes or 64 bits, since the - * uniform/const port loads 64 bits at a time. - */ - u32 uniform_count : 7; - u32 unk4 : 10; // = 2 - } bifrost2; + struct mali_preload_packed bifrost_preload; struct { u32 unknown2_7; } midgard2; diff --git a/src/panfrost/lib/decode.c b/src/panfrost/lib/decode.c index 6b2e71ae12b..fcfc5907451 100644 --- a/src/panfrost/lib/decode.c +++ b/src/panfrost/lib/decode.c @@ -1731,6 +1731,7 @@ pandecode_vertex_tiler_postfix_pre( struct MALI_MIDGARD_PROPERTIES midg_props; struct MALI_BIFROST_PROPERTIES bi_props; + struct MALI_PRELOAD bi_preload; pandecode_log("struct mali_shader_meta shader_meta_%"PRIx64"_%d%s = {\n", p->shader, job_no, suffix); pandecode_indent++; @@ -1745,7 +1746,10 @@ pandecode_vertex_tiler_postfix_pre( uint32_t opaque = s->bifrost_props.opaque[0]; MALI_BIFROST_PROPERTIES_unpack((const uint8_t *) &opaque, &bi_props); - uniform_count = s->bifrost2.uniform_count; + opaque = s->bifrost_preload.opaque[0]; + MALI_PRELOAD_unpack((const uint8_t *) &opaque, &bi_preload); + + uniform_count = bi_preload.uniform_count; uniform_buffer_count = bi_props.uniform_buffer_count; } else { uint32_t opaque = s->midgard_props.opaque[0]; @@ -1767,6 +1771,24 @@ pandecode_vertex_tiler_postfix_pre( else MALI_MIDGARD_PROPERTIES_print(pandecode_dump_stream, &midg_props, 2); + if (is_bifrost) { + uint32_t opaque = s->bifrost_preload.opaque[0]; + switch (job_type) { + case MALI_JOB_TYPE_VERTEX: + DUMP_CL("Preload", PRELOAD_VERTEX, &opaque, 2); + break; + case MALI_JOB_TYPE_TILER: + DUMP_CL("Preload", PRELOAD_FRAGMENT, &opaque, 2); + break; + case MALI_JOB_TYPE_COMPUTE: + DUMP_CL("Preload", PRELOAD_COMPUTE, &opaque, 2); + break; + default: + DUMP_CL("Preload", PRELOAD, &opaque, 2); + break; + } + } + if (s->depth_units || s->depth_factor) { pandecode_prop("depth_factor = %f", s->depth_factor); pandecode_prop("depth_units = %f", s->depth_units); @@ -1809,18 +1831,7 @@ pandecode_vertex_tiler_postfix_pre( DUMP_CL("Stencil front", STENCIL, &s->stencil_front, 1); DUMP_CL("Stencil back", STENCIL, &s->stencil_back, 1); - if (is_bifrost) { - pandecode_log(".bifrost2 = {\n"); - pandecode_indent++; - - pandecode_prop("unk3 = 0x%" PRIx32, s->bifrost2.unk3); - pandecode_prop("preload_regs = 0x%" PRIx32, s->bifrost2.preload_regs); - pandecode_prop("uniform_count = %" PRId32, s->bifrost2.uniform_count); - pandecode_prop("unk4 = 0x%" PRIx32, s->bifrost2.unk4); - - pandecode_indent--; - pandecode_log("},\n"); - } else if (s->midgard2.unknown2_7) { + if (!is_bifrost && s->midgard2.unknown2_7) { pandecode_log(".midgard2 = {\n"); pandecode_indent++; diff --git a/src/panfrost/lib/midgard.xml b/src/panfrost/lib/midgard.xml index 352398b515f..d6fa7e4a134 100644 --- a/src/panfrost/lib/midgard.xml +++ b/src/panfrost/lib/midgard.xml @@ -363,6 +363,38 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + -- 2.30.2