panfrost: Inline bifrost_tiler_only
[mesa.git] / src / panfrost / lib / decode.c
index 53f6f301b9c6cbe4f125b2d47d6cd2284bf32813..f3705094f2e499971fa1599ecccaab81b18d8ea8 100644 (file)
@@ -228,29 +228,6 @@ static const struct pandecode_flag_info clear_flag_info[] = {
 };
 #undef FLAG_INFO
 
-#define FLAG_INFO(flag) { MALI_##flag, "MALI_" #flag }
-static const struct pandecode_flag_info u3_flag_info[] = {
-        FLAG_INFO(HAS_MSAA),
-        FLAG_INFO(PER_SAMPLE),
-        FLAG_INFO(CAN_DISCARD),
-        FLAG_INFO(HAS_BLEND_SHADER),
-        FLAG_INFO(DEPTH_WRITEMASK),
-        FLAG_INFO(DEPTH_CLIP_NEAR),
-        FLAG_INFO(DEPTH_CLIP_FAR),
-        {}
-};
-
-static const struct pandecode_flag_info u4_flag_info[] = {
-        FLAG_INFO(NO_MSAA),
-        FLAG_INFO(NO_DITHER),
-        FLAG_INFO(DEPTH_RANGE_A),
-        FLAG_INFO(DEPTH_RANGE_B),
-        FLAG_INFO(STENCIL_TEST),
-        FLAG_INFO(ALPHA_TO_COVERAGE),
-        {}
-};
-#undef FLAG_INFO
-
 #define FLAG_INFO(flag) { MALI_MFBD_FORMAT_##flag, "MALI_MFBD_FORMAT_" #flag }
 static const struct pandecode_flag_info mfbd_fmt_flag_info[] = {
         FLAG_INFO(SRGB),
@@ -279,16 +256,6 @@ static const struct pandecode_flag_info mfbd_extra_flag_lo_info[] = {
 };
 #undef FLAG_INFO
 
-#define FLAG_INFO(flag) { MALI_BIFROST_##flag, "MALI_BIFROST_" #flag }
-static const struct pandecode_flag_info shader_bifrost_info [] = {
-        FLAG_INFO(FULL_THREAD),
-        FLAG_INFO(EARLY_Z),
-        FLAG_INFO(FIRST_ATEST),
-        {}
-};
-
-#undef FLAG_INFO
-
 #define FLAG_INFO(flag) { MALI_MFBD_##flag, "MALI_MFBD_" #flag }
 static const struct pandecode_flag_info mfbd_flag_info [] = {
         FLAG_INFO(DEPTH_WRITE),
@@ -1281,21 +1248,17 @@ pandecode_vertex_tiler_prefix(struct mali_vertex_tiler_prefix *p, int job_no, bo
         /* Decode invocation_count. See the comment before the definition of
          * invocation_count for an explanation.
          */
+        struct MALI_INVOCATION invocation;
+        struct mali_invocation_packed invocation_packed = p->invocation;
+        MALI_INVOCATION_unpack((const uint8_t *) &invocation_packed, &invocation);
 
-        unsigned size_y_shift = bits(p->invocation_shifts, 0, 5);
-        unsigned size_z_shift = bits(p->invocation_shifts, 5, 10);
-        unsigned workgroups_x_shift = bits(p->invocation_shifts, 10, 16);
-        unsigned workgroups_y_shift = bits(p->invocation_shifts, 16, 22);
-        unsigned workgroups_z_shift = bits(p->invocation_shifts, 22, 28);
-        unsigned workgroups_x_shift_2 = bits(p->invocation_shifts, 28, 32);
+        unsigned size_x = bits(invocation.invocations, 0, invocation.size_y_shift) + 1;
+        unsigned size_y = bits(invocation.invocations, invocation.size_y_shift, invocation.size_z_shift) + 1;
+        unsigned size_z = bits(invocation.invocations, invocation.size_z_shift, invocation.workgroups_x_shift) + 1;
 
-        unsigned size_x = bits(p->invocation_count, 0, size_y_shift) + 1;
-        unsigned size_y = bits(p->invocation_count, size_y_shift, size_z_shift) + 1;
-        unsigned size_z = bits(p->invocation_count, size_z_shift, workgroups_x_shift) + 1;
-
-        unsigned groups_x = bits(p->invocation_count, workgroups_x_shift, workgroups_y_shift) + 1;
-        unsigned groups_y = bits(p->invocation_count, workgroups_y_shift, workgroups_z_shift) + 1;
-        unsigned groups_z = bits(p->invocation_count, workgroups_z_shift, 32) + 1;
+        unsigned groups_x = bits(invocation.invocations, invocation.workgroups_x_shift, invocation.workgroups_y_shift) + 1;
+        unsigned groups_y = bits(invocation.invocations, invocation.workgroups_y_shift, invocation.workgroups_z_shift) + 1;
+        unsigned groups_z = bits(invocation.invocations, invocation.workgroups_z_shift, 32) + 1;
 
         /* Even though we have this decoded, we want to ensure that the
          * representation is "unique" so we don't lose anything by printing only
@@ -1305,76 +1268,44 @@ pandecode_vertex_tiler_prefix(struct mali_vertex_tiler_prefix *p, int job_no, bo
          * decode and pack it ourselves! If it is bit exact with what we
          * decoded, we're good to go. */
 
-        struct mali_vertex_tiler_prefix ref;
+        struct mali_invocation_packed ref;
         panfrost_pack_work_groups_compute(&ref, groups_x, groups_y, groups_z, size_x, size_y, size_z, graphics);
 
-        bool canonical =
-                (p->invocation_count == ref.invocation_count) &&
-                (p->invocation_shifts == ref.invocation_shifts);
-
-        if (!canonical) {
+        if (memcmp(&ref, &invocation_packed, sizeof(ref))) {
                 pandecode_msg("XXX: non-canonical workgroups packing\n");
-                pandecode_msg("expected: %X, %X",
-                                ref.invocation_count,
-                                ref.invocation_shifts);
-
-                pandecode_prop("invocation_count = 0x%" PRIx32, p->invocation_count);
-                pandecode_prop("size_y_shift = %d", size_y_shift);
-                pandecode_prop("size_z_shift = %d", size_z_shift);
-                pandecode_prop("workgroups_x_shift = %d", workgroups_x_shift);
-                pandecode_prop("workgroups_y_shift = %d", workgroups_y_shift);
-                pandecode_prop("workgroups_z_shift = %d", workgroups_z_shift);
-                pandecode_prop("workgroups_x_shift_2 = %d", workgroups_x_shift_2);
+                MALI_INVOCATION_print(pandecode_dump_stream, &invocation, 1 * 2);
         }
 
         /* Regardless, print the decode */
-        pandecode_msg("size (%d, %d, %d), count (%d, %d, %d)\n",
+        fprintf(pandecode_dump_stream,
+                        "Invocation (%d, %d, %d) x (%d, %d, %d)\n",
                         size_x, size_y, size_z,
                         groups_x, groups_y, groups_z);
 
-        /* TODO: Decode */
-        if (p->unknown_draw)
-                pandecode_prop("unknown_draw = 0x%" PRIx32, p->unknown_draw);
-
-        pandecode_prop("workgroups_x_shift_3 = 0x%" PRIx32, p->workgroups_x_shift_3);
-
-        if (p->draw_mode != MALI_DRAW_MODE_NONE)
-                pandecode_prop("draw_mode = %s", mali_draw_mode_as_str(p->draw_mode));
-
-        /* Index count only exists for tiler jobs anyway */
-
-        if (p->index_count)
-                pandecode_prop("index_count = MALI_POSITIVE(%" PRId32 ")", p->index_count + 1);
-
-
-        unsigned index_raw_size = (p->unknown_draw & MALI_DRAW_INDEXED_SIZE);
-        index_raw_size >>= MALI_DRAW_INDEXED_SHIFT;
+        fprintf(pandecode_dump_stream, "Primitive\n");
+        struct MALI_PRIMITIVE primitive;
+        struct mali_primitive_packed prim_packed = p->primitive;
+        MALI_PRIMITIVE_unpack((const uint8_t *) &prim_packed, &primitive);
+        MALI_PRIMITIVE_print(pandecode_dump_stream, &primitive, 1 * 2);
 
         /* Validate an index buffer is present if we need one. TODO: verify
          * relationship between invocation_count and index_count */
 
-        if (p->indices) {
-                unsigned count = p->index_count;
-
+        if (primitive.indices) {
                 /* Grab the size */
-                unsigned size = (index_raw_size == 0x3) ? 4 : index_raw_size;
+                unsigned size = (primitive.index_type == MALI_INDEX_TYPE_UINT32) ?
+                        sizeof(uint32_t) : primitive.index_type;
 
                 /* Ensure we got a size, and if so, validate the index buffer
                  * is large enough to hold a full set of indices of the given
                  * size */
 
-                if (!index_raw_size)
+                if (!size)
                         pandecode_msg("XXX: index size missing\n");
                 else
-                        pandecode_validate_buffer(p->indices, count * size);
-        } else if (index_raw_size)
-                pandecode_msg("XXX: unexpected index size %u\n", index_raw_size);
-
-        if (p->offset_bias_correction)
-                pandecode_prop("offset_bias_correction = %d", p->offset_bias_correction);
-
-        /* TODO: Figure out what this is. It's not zero */
-        pandecode_prop("zero1 = 0x%" PRIx32, p->zero1);
+                        pandecode_validate_buffer(primitive.indices, primitive.index_count * size);
+        } else if (primitive.index_type)
+                pandecode_msg("XXX: unexpected index size\n");
 
         pandecode_indent--;
         pandecode_log("},\n");
@@ -1719,7 +1650,7 @@ pandecode_vertex_tiler_postfix_pre(
 
         if (p->shader) {
                 struct pandecode_mapped_memory *smem = pandecode_find_mapped_gpu_mem_containing(p->shader);
-                struct mali_shader_meta *PANDECODE_PTR_VAR(s, smem, p->shader);
+                uint32_t *cl = pandecode_fetch_gpu_mem(smem, p->shader, MALI_STATE_LENGTH);
 
                 /* Disassemble ahead-of-time to get stats. Initialize with
                  * stats for the missing-shader case so we get validation
@@ -1736,131 +1667,78 @@ pandecode_vertex_tiler_postfix_pre(
                         .uniform_buffer_count = 0
                 };
 
-                if (s->shader & ~0xF)
-                        info = pandecode_shader_disassemble(s->shader & ~0xF, job_no, job_type, is_bifrost, gpu_id);
-
+                struct MALI_STATE state;
                 struct MALI_MIDGARD_PROPERTIES midg_props;
+                struct MALI_BIFROST_PROPERTIES bi_props;
 
-                pandecode_log("struct mali_shader_meta shader_meta_%"PRIx64"_%d%s = {\n", p->shader, job_no, suffix);
-                pandecode_indent++;
+                MALI_STATE_unpack((const uint8_t *) cl, &state);
+
+                if (state.shader.shader & ~0xF)
+                        info = pandecode_shader_disassemble(state.shader.shader & ~0xF, job_no, job_type, is_bifrost, gpu_id);
+
+                fprintf(pandecode_dump_stream, "State %"PRIx64"\n", p->shader);
+                MALI_STATE_print(pandecode_dump_stream, &state, 1 * 2);
 
                 /* Save for dumps */
-                attribute_count = s->attribute_count;
-                varying_count = s->varying_count;
-                texture_count = s->texture_count;
-                sampler_count = s->sampler_count;
+                attribute_count = state.shader.attribute_count;
+                varying_count = state.shader.varying_count;
+                texture_count = state.shader.texture_count;
+                sampler_count = state.shader.sampler_count;
 
+                fprintf(pandecode_dump_stream, "  Properties\n");
                 if (is_bifrost) {
-                        uniform_count = s->bifrost2.uniform_count;
-                        uniform_buffer_count = s->bifrost1.uniform_buffer_count;
+                        MALI_BIFROST_PROPERTIES_unpack((const uint8_t *) &state.properties, &bi_props);
+                        MALI_BIFROST_PROPERTIES_print(pandecode_dump_stream, &bi_props, 2 * 2);
+
+                        uniform_count = state.preload.uniform_count;
+                        uniform_buffer_count = bi_props.uniform_buffer_count;
                 } else {
-                        uint32_t opaque = s->midgard_props.opaque[0];
-                        MALI_MIDGARD_PROPERTIES_unpack((const uint8_t *) &opaque, &midg_props);
+                        MALI_MIDGARD_PROPERTIES_unpack((const uint8_t *) &state.properties, &midg_props);
+                        MALI_MIDGARD_PROPERTIES_print(pandecode_dump_stream, &midg_props, 2 * 2);
 
                         uniform_count = midg_props.uniform_count;
                         uniform_buffer_count = midg_props.uniform_buffer_count;
                 }
 
-                pandecode_shader_address("shader", s->shader);
-
-                pandecode_shader_prop("texture_count", s->texture_count, info.texture_count, false);
-                pandecode_shader_prop("sampler_count", s->sampler_count, info.sampler_count, false);
-                pandecode_shader_prop("attribute_count", s->attribute_count, info.attribute_count, false);
-                pandecode_shader_prop("varying_count", s->varying_count, info.varying_count, false);
+                pandecode_shader_prop("texture_count", texture_count, info.texture_count, false);
+                pandecode_shader_prop("sampler_count", sampler_count, info.sampler_count, false);
+                pandecode_shader_prop("attribute_count", attribute_count, info.attribute_count, false);
+                pandecode_shader_prop("varying_count", varying_count, info.varying_count, false);
 
                 if (is_bifrost) {
-                        pandecode_log("bifrost1.unk1 = ");
-                        pandecode_log_decoded_flags(shader_bifrost_info, s->bifrost1.unk1);
-                        pandecode_log_cont(",\n");
-                } else {
-                        MALI_MIDGARD_PROPERTIES_print(pandecode_dump_stream, &midg_props, 2);
-                }
-
-                if (s->depth_units || s->depth_factor) {
-                        pandecode_prop("depth_factor = %f", s->depth_factor);
-                        pandecode_prop("depth_units = %f", s->depth_units);
-                }
-
-                if (s->coverage_mask)
-                        pandecode_prop("coverage_mask = 0x%X", s->coverage_mask);
-
-                if (s->unknown2_2)
-                        pandecode_prop(".unknown2_2 = %X", s->unknown2_2);
-
-                if (s->unknown2_3 || s->unknown2_4) {
-                        pandecode_log(".unknown2_3 = ");
-
-                        int unknown2_3 = s->unknown2_3;
-                        int unknown2_4 = s->unknown2_4;
-
-                        /* We're not quite sure what these flags mean without the depth test, if anything */
-
-                        if (unknown2_3 & (MALI_DEPTH_WRITEMASK | MALI_DEPTH_FUNC_MASK)) {
-                                const char *func = mali_func_as_str(MALI_GET_DEPTH_FUNC(unknown2_3));
-                                unknown2_3 &= ~MALI_DEPTH_FUNC_MASK;
-
-                                pandecode_log_cont("MALI_DEPTH_FUNC(%s) | ", func);
+                        uint32_t opaque = state.preload.uniform_count << 15
+                                | state.preload.untyped;
+
+                        switch (job_type) {
+                        case MALI_JOB_TYPE_VERTEX:
+                                DUMP_CL("Preload", PRELOAD_VERTEX, &opaque, 2);
+                                break;
+                        case MALI_JOB_TYPE_TILER:
+                                DUMP_CL("Preload", PRELOAD_FRAGMENT, &opaque, 2);
+                                break;
+                        case MALI_JOB_TYPE_COMPUTE:
+                                DUMP_CL("Preload", PRELOAD_COMPUTE, &opaque, 2);
+                                break;
+                        default:
+                                DUMP_CL("Preload", PRELOAD, &opaque, 2);
+                                break;
                         }
-
-                        pandecode_log_decoded_flags(u3_flag_info, unknown2_3);
-                        pandecode_log_cont(",\n");
-
-                        pandecode_log(".unknown2_4 = ");
-                        pandecode_log_decoded_flags(u4_flag_info, unknown2_4);
-                        pandecode_log_cont(",\n");
-                }
-
-                if (s->stencil_mask_front || s->stencil_mask_back) {
-                        pandecode_prop("stencil_mask_front = 0x%02X", s->stencil_mask_front);
-                        pandecode_prop("stencil_mask_back = 0x%02X", s->stencil_mask_back);
-                }
-
-                DUMP_CL("Stencil front", STENCIL, &s->stencil_front, 1);
-                DUMP_CL("Stencil back", STENCIL, &s->stencil_back, 1);
-
-                if (is_bifrost) {
-                        pandecode_log(".bifrost2 = {\n");
-                        pandecode_indent++;
-
-                        pandecode_prop("unk3 = 0x%" PRIx32, s->bifrost2.unk3);
-                        pandecode_prop("preload_regs = 0x%" PRIx32, s->bifrost2.preload_regs);
-                        pandecode_prop("uniform_count = %" PRId32, s->bifrost2.uniform_count);
-                        pandecode_prop("unk4 = 0x%" PRIx32, s->bifrost2.unk4);
-
-                        pandecode_indent--;
-                        pandecode_log("},\n");
-                } else if (s->midgard2.unknown2_7) {
-                        pandecode_log(".midgard2 = {\n");
-                        pandecode_indent++;
-
-                        pandecode_prop("unknown2_7 = 0x%" PRIx32, s->midgard2.unknown2_7);
-                        pandecode_indent--;
-                        pandecode_log("},\n");
-                }
-
-                if (s->padding) {
-                        pandecode_msg("XXX: shader padding tripped\n");
-                        pandecode_prop("padding = 0x%" PRIx32, s->padding);
                 }
 
                 if (!is_bifrost) {
                         /* TODO: Blend shaders routing/disasm */
-                        union midgard_blend blend = s->blend;
-                        mali_ptr shader = pandecode_midgard_blend(&blend, s->unknown2_3 & MALI_HAS_BLEND_SHADER);
+                        union midgard_blend blend;
+                        memcpy(&blend, &state.sfbd_blend, sizeof(blend));
+                        mali_ptr shader = pandecode_midgard_blend(&blend, state.multisample_misc.sfbd_blend_shader);
                         if (shader & ~0xF)
                                 pandecode_blend_shader_disassemble(shader, job_no, job_type, false, gpu_id);
-                } else {
-                        pandecode_msg("mdg_blend = %" PRIx64 "\n", s->blend.shader);
                 }
 
-                pandecode_indent--;
-                pandecode_log("};\n");
-
                 /* MRT blend fields are used whenever MFBD is used, with
                  * per-RT descriptors */
 
                 if (job_type == MALI_JOB_TYPE_TILER && (is_bifrost || p->shared_memory & MALI_MFBD)) {
-                        void* blend_base = (void *) (s + 1);
+                        void* blend_base = ((void *) cl) + MALI_STATE_LENGTH;
 
                         for (unsigned i = 0; i < fbd_info.rt_count; i++) {
                                 mali_ptr shader = 0;
@@ -2091,30 +1969,6 @@ pandecode_primitive_size(union midgard_primitive_size u, bool constant)
         pandecode_log("},\n");
 }
 
-static void
-pandecode_tiler_only_bfr(const struct bifrost_tiler_only *t, int job_no)
-{
-        pandecode_log_cont("{\n");
-        pandecode_indent++;
-
-        /* TODO: gl_PointSize on Bifrost */
-        pandecode_primitive_size(t->primitive_size, true);
-
-        if (t->zero1 || t->zero2 || t->zero3 || t->zero4 || t->zero5
-            || t->zero6) {
-                pandecode_msg("XXX: tiler only zero tripped\n");
-                pandecode_prop("zero1 = 0x%" PRIx64, t->zero1);
-                pandecode_prop("zero2 = 0x%" PRIx64, t->zero2);
-                pandecode_prop("zero3 = 0x%" PRIx64, t->zero3);
-                pandecode_prop("zero4 = 0x%" PRIx64, t->zero4);
-                pandecode_prop("zero5 = 0x%" PRIx64, t->zero5);
-                pandecode_prop("zero6 = 0x%" PRIx64, t->zero6);
-        }
-
-        pandecode_indent--;
-        pandecode_log("},\n");
-}
-
 static int
 pandecode_vertex_job_bfr(const struct mali_job_descriptor_header *h,
                                 const struct pandecode_mapped_memory *mem,
@@ -2144,15 +1998,26 @@ pandecode_tiler_job_bfr(const struct mali_job_descriptor_header *h,
         struct bifrost_payload_tiler *PANDECODE_PTR_VAR(t, mem, payload);
 
         pandecode_vertex_tiler_postfix_pre(&t->postfix, job_no, h->job_type, "", true, gpu_id);
-        pandecode_tiler_meta(t->tiler.tiler_meta, job_no);
+        pandecode_tiler_meta(t->tiler_meta, job_no);
 
         pandecode_log("struct bifrost_payload_tiler payload_%"PRIx64"_%d = {\n", payload, job_no);
         pandecode_indent++;
 
         pandecode_vertex_tiler_prefix(&t->prefix, job_no, false);
 
-        pandecode_log(".tiler = ");
-        pandecode_tiler_only_bfr(&t->tiler, job_no);
+        /* TODO: gl_PointSize on Bifrost */
+        pandecode_primitive_size(t->primitive_size, true);
+
+        if (t->zero1 || t->zero2 || t->zero3 || t->zero4 || t->zero5
+            || t->zero6) {
+                pandecode_msg("XXX: tiler only zero tripped\n");
+                pandecode_prop("zero1 = 0x%" PRIx64, t->zero1);
+                pandecode_prop("zero2 = 0x%" PRIx64, t->zero2);
+                pandecode_prop("zero3 = 0x%" PRIx64, t->zero3);
+                pandecode_prop("zero4 = 0x%" PRIx64, t->zero4);
+                pandecode_prop("zero5 = 0x%" PRIx64, t->zero5);
+                pandecode_prop("zero6 = 0x%" PRIx64, t->zero6);
+        }
 
         pandecode_vertex_tiler_postfix(&t->postfix, job_no, true);
 
@@ -2178,8 +2043,11 @@ pandecode_vertex_or_tiler_job_mdg(const struct mali_job_descriptor_header *h,
         pandecode_vertex_tiler_prefix(&v->prefix, job_no, is_graphics);
         pandecode_vertex_tiler_postfix(&v->postfix, job_no, false);
 
-        bool has_primitive_pointer = v->prefix.unknown_draw & MALI_DRAW_VARYING_SIZE;
-        pandecode_primitive_size(v->primitive_size, !has_primitive_pointer);
+        struct MALI_PRIMITIVE primitive;
+        struct mali_primitive_packed prim_packed = v->prefix.primitive;
+        MALI_PRIMITIVE_unpack((const uint8_t *) &prim_packed, &primitive);
+
+        pandecode_primitive_size(v->primitive_size, primitive.point_size_array == 0);
 
         pandecode_indent--;
         pandecode_log("};\n");
@@ -2302,7 +2170,7 @@ pandecode_jc(mali_ptr jc_gpu_va, bool bifrost, unsigned gpu_id, bool minimal)
                 h = PANDECODE_PTR(mem, jc_gpu_va, struct mali_job_descriptor_header);
 
                 mali_ptr payload_ptr = jc_gpu_va + sizeof(*h);
-                payload = pandecode_fetch_gpu_mem(mem, payload_ptr, 256);
+                payload = pandecode_fetch_gpu_mem(mem, payload_ptr, 64);
 
                 int job_no = job_descriptor_number++;