pan/decode: Verify and omit polygon size
[mesa.git] / src / panfrost / pandecode / decode.c
index 71eb2e972e23b600f4d9543de0518f7a819b3f60..17f974ef33e0a3a8f5a158f25ce15fcd92af424d 100644 (file)
@@ -37,6 +37,8 @@
 #include "midgard/disassemble.h"
 #include "bifrost/disassemble.h"
 
+#include "pan_encoder.h"
+
 int pandecode_jc(mali_ptr jc_gpu_va, bool bifrost);
 
 #define MEMORY_PROP(obj, p) {\
@@ -55,11 +57,6 @@ int pandecode_jc(mali_ptr jc_gpu_va, bool bifrost);
         } \
 }
 
-#define DYN_MEMORY_PROP(obj, no, p) { \
-       if (obj->p) \
-               pandecode_prop("%s = %s_%d_p", #p, #p, no); \
-}
-
 /* Semantic logging type.
  *
  * Raw: for raw messages to be printed as is.
@@ -459,34 +456,81 @@ pandecode_mfbd_block_format(enum mali_mfbd_block_format fmt)
 }
 #undef DEFINE_CASE
 
+#define DEFINE_CASE(name) case MALI_EXCEPTION_ACCESS_## name: return ""#name
+static char *
+pandecode_exception_access(enum mali_exception_access fmt)
+{
+        switch (fmt) {
+                DEFINE_CASE(NONE);
+                DEFINE_CASE(EXECUTE);
+                DEFINE_CASE(READ);
+                DEFINE_CASE(WRITE);
+
+        default:
+                unreachable("Invalid case");
+        }
+}
+#undef DEFINE_CASE
+
 /* Midgard's tiler descriptor is embedded within the
  * larger FBD */
 
 static void
-pandecode_midgard_tiler_descriptor(const struct midgard_tiler_descriptor *t)
+pandecode_midgard_tiler_descriptor(
+                const struct midgard_tiler_descriptor *t,
+                unsigned width,
+                unsigned height)
 {
         pandecode_log(".tiler = {\n");
         pandecode_indent++;
 
         pandecode_prop("hierarchy_mask = 0x%" PRIx16, t->hierarchy_mask);
         pandecode_prop("flags = 0x%" PRIx16, t->flags);
-        pandecode_prop("polygon_list_size = 0x%x", t->polygon_list_size);
 
         MEMORY_PROP(t, polygon_list);
-        MEMORY_PROP(t, polygon_list_body);
 
-        MEMORY_PROP(t, heap_start);
+        /* The body is offset from the base of the polygon list */
+        assert(t->polygon_list_body > t->polygon_list);
+        unsigned body_offset = t->polygon_list_body - t->polygon_list;
 
-        if (t->heap_start == t->heap_end) {
-              /* Print identically to show symmetry for empty tiler heaps */  
-                MEMORY_PROP(t, heap_end);
-        } else {
-                /* Points to the end of a buffer */
-                char *a = pointer_as_memory_reference(t->heap_end - 1);
-                pandecode_prop("heap_end = %s + 1", a);
-                free(a);
+        /* It needs to fit inside the reported size */
+        assert(t->polygon_list_size >= body_offset);
+
+        /* Check that we fit */
+        struct pandecode_mapped_memory *plist =
+                pandecode_find_mapped_gpu_mem_containing(t->polygon_list);
+
+        assert(t->polygon_list_size <= plist->length);
+
+        /* Now that we've sanity checked, we'll try to calculate the sizes
+         * ourselves for comparison */
+
+        unsigned ref_header = panfrost_tiler_header_size(width, height, t->hierarchy_mask);
+        unsigned ref_body = panfrost_tiler_body_size(width, height, t->hierarchy_mask);
+        unsigned ref_size = ref_header + ref_body;
+
+        if (!((ref_header == body_offset) && (ref_size == t->polygon_list_size))) {
+                pandecode_msg("XXX: bad polygon list size (expected %d / 0x%x)\n",
+                                ref_header, ref_size);
+                pandecode_prop("polygon_list_size = 0x%x", t->polygon_list_size);
+                pandecode_msg("body offset %d\n", body_offset);
         }
 
+        /* The tiler heap has a start and end specified, so check that
+         * everything fits in a contiguous BO (otherwise, we risk out-of-bounds
+         * reads) */
+
+        MEMORY_PROP(t, heap_start);
+        assert(t->heap_end >= t->heap_start);
+
+        struct pandecode_mapped_memory *heap =
+                pandecode_find_mapped_gpu_mem_containing(t->heap_start);
+
+        unsigned heap_size = t->heap_end - t->heap_start;
+        assert(heap_size <= heap->length);
+
+        pandecode_msg("heap size %d\n", heap_size);
+
         bool nonzero_weights = false;
 
         for (unsigned w = 0; w < ARRAY_SIZE(t->weights); ++w) {
@@ -566,7 +610,7 @@ pandecode_sfbd(uint64_t gpu_va, int job_no)
 
         MEMORY_PROP(s, unknown_address_0);
         const struct midgard_tiler_descriptor t = s->tiler;
-        pandecode_midgard_tiler_descriptor(&t);
+        pandecode_midgard_tiler_descriptor(&t, s->width + 1, s->height + 1);
 
         pandecode_indent--;
         pandecode_log("};\n");
@@ -795,7 +839,7 @@ pandecode_mfbd_bfr(uint64_t gpu_va, int job_no, bool with_render_targets)
         pandecode_prop("unknown2 = 0x%x", fb->unknown2);
         MEMORY_PROP(fb, scratchpad);
         const struct midgard_tiler_descriptor t = fb->tiler;
-        pandecode_midgard_tiler_descriptor(&t);
+        pandecode_midgard_tiler_descriptor(&t, fb->width1 + 1, fb->height1 + 1);
 
         if (fb->zero3 || fb->zero4) {
                 pandecode_msg("framebuffer zeros tripped\n");
@@ -1301,34 +1345,67 @@ bits(u32 word, u32 lo, u32 hi)
 }
 
 static void
-pandecode_vertex_tiler_prefix(struct mali_vertex_tiler_prefix *p, int job_no)
+pandecode_vertex_tiler_prefix(struct mali_vertex_tiler_prefix *p, int job_no, bool noninstanced)
 {
         pandecode_log_cont("{\n");
         pandecode_indent++;
 
-        pandecode_prop("invocation_count = 0x%" PRIx32, p->invocation_count);
-        pandecode_prop("size_y_shift = %d", p->size_y_shift);
-        pandecode_prop("size_z_shift = %d", p->size_z_shift);
-        pandecode_prop("workgroups_x_shift = %d", p->workgroups_x_shift);
-        pandecode_prop("workgroups_y_shift = %d", p->workgroups_y_shift);
-        pandecode_prop("workgroups_z_shift = %d", p->workgroups_z_shift);
-        pandecode_prop("workgroups_x_shift_2 = 0x%" PRIx32, p->workgroups_x_shift_2);
-
         /* Decode invocation_count. See the comment before the definition of
          * invocation_count for an explanation.
          */
-        pandecode_msg("size: (%d, %d, %d)\n",
-                      bits(p->invocation_count, 0, p->size_y_shift) + 1,
-                      bits(p->invocation_count, p->size_y_shift, p->size_z_shift) + 1,
-                      bits(p->invocation_count, p->size_z_shift,
-                           p->workgroups_x_shift) + 1);
-        pandecode_msg("workgroups: (%d, %d, %d)\n",
-                      bits(p->invocation_count, p->workgroups_x_shift,
-                           p->workgroups_y_shift) + 1,
-                      bits(p->invocation_count, p->workgroups_y_shift,
-                           p->workgroups_z_shift) + 1,
-                      bits(p->invocation_count, p->workgroups_z_shift,
-                           32) + 1);
+
+        unsigned size_x = bits(p->invocation_count, 0, p->size_y_shift) + 1;
+        unsigned size_y = bits(p->invocation_count, p->size_y_shift, p->size_z_shift) + 1;
+        unsigned size_z = bits(p->invocation_count, p->size_z_shift, p->workgroups_x_shift) + 1;
+
+        unsigned groups_x = bits(p->invocation_count, p->workgroups_x_shift, p->workgroups_y_shift) + 1;
+        unsigned groups_y = bits(p->invocation_count, p->workgroups_y_shift, p->workgroups_z_shift) + 1;
+        unsigned groups_z = bits(p->invocation_count, p->workgroups_z_shift, 32) + 1;
+
+        /* Even though we have this decoded, we want to ensure that the
+         * representation is "unique" so we don't lose anything by printing only
+         * the final result. More specifically, we need to check that we were
+         * passed something in canonical form, since the definition per the
+         * hardware is inherently not unique. How? Well, take the resulting
+         * decode and pack it ourselves! If it is bit exact with what we
+         * decoded, we're good to go. */
+
+        struct mali_vertex_tiler_prefix ref;
+        panfrost_pack_work_groups_compute(&ref, groups_x, groups_y, groups_z, size_x, size_y, size_z, noninstanced);
+
+        bool canonical =
+                (p->invocation_count == ref.invocation_count) &&
+                (p->size_y_shift == ref.size_y_shift) &&
+                (p->size_z_shift == ref.size_z_shift) &&
+                (p->workgroups_x_shift == ref.workgroups_x_shift) &&
+                (p->workgroups_y_shift == ref.workgroups_y_shift) &&
+                (p->workgroups_z_shift == ref.workgroups_z_shift) &&
+                (p->workgroups_x_shift_2 == ref.workgroups_x_shift_2);
+
+        if (!canonical) {
+                pandecode_msg("XXX: non-canonical workgroups packing\n");
+                pandecode_msg("expected: %X, %d, %d, %d, %d, %d\n",
+                                ref.invocation_count,
+                                ref.size_y_shift,
+                                ref.size_z_shift,
+                                ref.workgroups_x_shift,
+                                ref.workgroups_y_shift,
+                                ref.workgroups_z_shift,
+                                ref.workgroups_x_shift_2);
+
+                pandecode_prop("invocation_count = 0x%" PRIx32, p->invocation_count);
+                pandecode_prop("size_y_shift = %d", p->size_y_shift);
+                pandecode_prop("size_z_shift = %d", p->size_z_shift);
+                pandecode_prop("workgroups_x_shift = %d", p->workgroups_x_shift);
+                pandecode_prop("workgroups_y_shift = %d", p->workgroups_y_shift);
+                pandecode_prop("workgroups_z_shift = %d", p->workgroups_z_shift);
+                pandecode_prop("workgroups_x_shift_2 = %d", p->workgroups_x_shift_2);
+        }
+
+        /* Regardless, print the decode */
+        pandecode_msg("size (%d, %d, %d), count (%d, %d, %d)\n",
+                        size_x, size_y, size_z,
+                        groups_x, groups_y, groups_z);
 
         /* TODO: Decode */
         if (p->unknown_draw)
@@ -1336,7 +1413,8 @@ pandecode_vertex_tiler_prefix(struct mali_vertex_tiler_prefix *p, int job_no)
 
         pandecode_prop("workgroups_x_shift_3 = 0x%" PRIx32, p->workgroups_x_shift_3);
 
-        pandecode_prop("draw_mode = %s", pandecode_draw_mode(p->draw_mode));
+        if (p->draw_mode != MALI_DRAW_NONE)
+                pandecode_prop("draw_mode = %s", pandecode_draw_mode(p->draw_mode));
 
         /* Index count only exists for tiler jobs anyway */
 
@@ -1346,8 +1424,6 @@ pandecode_vertex_tiler_prefix(struct mali_vertex_tiler_prefix *p, int job_no)
         if (p->offset_bias_correction)
                 pandecode_prop("offset_bias_correction = %d", p->offset_bias_correction);
 
-        DYN_MEMORY_PROP(p, job_no, indices);
-
         if (p->zero1) {
                 pandecode_msg("Zero tripped\n");
                 pandecode_prop("zero1 = 0x%" PRIx32, p->zero1);
@@ -1932,28 +2008,17 @@ pandecode_vertex_tiler_postfix_pre(const struct mali_vertex_tiler_postfix *p,
 static void
 pandecode_vertex_tiler_postfix(const struct mali_vertex_tiler_postfix *p, int job_no, bool is_bifrost)
 {
-        pandecode_log_cont("{\n");
+        if (!(p->position_varying || p->occlusion_counter || p->flags))
+                return;
+
+        pandecode_log(".postfix = {\n");
         pandecode_indent++;
 
         MEMORY_PROP(p, position_varying);
-        DYN_MEMORY_PROP(p, job_no, uniform_buffers);
-        DYN_MEMORY_PROP(p, job_no, texture_trampoline);
-        DYN_MEMORY_PROP(p, job_no, sampler_descriptor);
-        DYN_MEMORY_PROP(p, job_no, uniforms);
-        DYN_MEMORY_PROP(p, job_no, attributes);
-        DYN_MEMORY_PROP(p, job_no, attribute_meta);
-        DYN_MEMORY_PROP(p, job_no, varyings);
-        DYN_MEMORY_PROP(p, job_no, varying_meta);
-        DYN_MEMORY_PROP(p, job_no, viewport);
-        DYN_MEMORY_PROP(p, job_no, occlusion_counter);
-
-        if (is_bifrost)
-                pandecode_prop("framebuffer = scratchpad_%d_p", job_no);
-        else
-                pandecode_prop("framebuffer = framebuffer_%d_p | %s", job_no, p->framebuffer & MALI_MFBD ? "MALI_MFBD" : "0");
+        MEMORY_PROP(p, occlusion_counter);
 
-        pandecode_prop("_shader_upper = (shader_meta_%d_p) >> 4", job_no);
-        pandecode_prop("flags = %d", p->flags);
+        if (p->flags)
+                pandecode_prop("flags = %d", p->flags);
 
         pandecode_indent--;
         pandecode_log("},\n");
@@ -2042,7 +2107,6 @@ pandecode_tiler_meta(mali_ptr gpu_va, int job_no)
 
         pandecode_prop("width = MALI_POSITIVE(%d)", t->width + 1);
         pandecode_prop("height = MALI_POSITIVE(%d)", t->height + 1);
-        DYN_MEMORY_PROP(t, job_no, tiler_heap_meta);
 
         for (int i = 0; i < 12; i++) {
                 if (t->zeros[i] != 0) {
@@ -2093,7 +2157,6 @@ pandecode_tiler_only_bfr(const struct bifrost_tiler_only *t, int job_no)
         /* TODO: gl_PointSize on Bifrost */
         pandecode_primitive_size(t->primitive_size, true);
 
-        DYN_MEMORY_PROP(t, job_no, tiler_meta);
         pandecode_gl_enables(t->gl_enables, JOB_TYPE_TILER);
 
         if (t->zero1 || t->zero2 || t->zero3 || t->zero4 || t->zero5
@@ -2126,12 +2189,11 @@ pandecode_vertex_job_bfr(const struct mali_job_descriptor_header *h,
         pandecode_indent++;
 
         pandecode_log(".prefix = ");
-        pandecode_vertex_tiler_prefix(&v->prefix, job_no);
+        pandecode_vertex_tiler_prefix(&v->prefix, job_no, false);
 
         pandecode_log(".vertex = ");
         pandecode_vertex_only_bfr(&v->vertex);
 
-        pandecode_log(".postfix = ");
         pandecode_vertex_tiler_postfix(&v->postfix, job_no, true);
 
         pandecode_indent--;
@@ -2156,12 +2218,11 @@ pandecode_tiler_job_bfr(const struct mali_job_descriptor_header *h,
         pandecode_indent++;
 
         pandecode_log(".prefix = ");
-        pandecode_vertex_tiler_prefix(&t->prefix, job_no);
+        pandecode_vertex_tiler_prefix(&t->prefix, job_no, false);
 
         pandecode_log(".tiler = ");
         pandecode_tiler_only_bfr(&t->tiler, job_no);
 
-        pandecode_log(".postfix = ");
         pandecode_vertex_tiler_postfix(&t->postfix, job_no, true);
 
         pandecode_indent--;
@@ -2187,8 +2248,11 @@ pandecode_vertex_or_tiler_job_mdg(const struct mali_job_descriptor_header *h,
         bool has_primitive_pointer = v->prefix.unknown_draw & MALI_DRAW_VARYING_SIZE;
         pandecode_primitive_size(v->primitive_size, !has_primitive_pointer);
 
+        bool instanced = v->instance_shift || v->instance_odd;
+        bool is_graphics = (h->job_type == JOB_TYPE_VERTEX) || (h->job_type == JOB_TYPE_TILER);
+
         pandecode_log(".prefix = ");
-        pandecode_vertex_tiler_prefix(&v->prefix, job_no);
+        pandecode_vertex_tiler_prefix(&v->prefix, job_no, !instanced && is_graphics);
 
         pandecode_gl_enables(v->gl_enables, h->job_type);
 
@@ -2209,7 +2273,6 @@ pandecode_vertex_or_tiler_job_mdg(const struct mali_job_descriptor_header *h,
                 pandecode_prop("zero5 = 0x%" PRIx64, v->zero5);
         }
 
-        pandecode_log(".postfix = ");
         pandecode_vertex_tiler_postfix(&v->postfix, job_no, false);
 
         pandecode_indent--;
@@ -2274,10 +2337,15 @@ pandecode_fragment_job(const struct pandecode_mapped_memory *mem,
 
         const char *fbd_type = s->framebuffer & MALI_MFBD ? "MALI_MFBD" : "MALI_SFBD";
 
+        /* TODO: Decode */
+        unsigned extra_flags = (s->framebuffer & ~FBD_MASK) & ~MALI_MFBD;
+
         if (fbd_dumped)
-                pandecode_prop("framebuffer = framebuffer_%d_p | %s", job_no, fbd_type);
+                pandecode_prop("framebuffer = framebuffer_%d_p | %s | 0x%X", job_no,
+                                fbd_type, extra_flags);
         else
-                pandecode_prop("framebuffer = %s | %s", pointer_as_memory_reference(p), fbd_type);
+                pandecode_prop("framebuffer = %s | %s | 0x%X", pointer_as_memory_reference(p),
+                                fbd_type, extra_flags);
 
         pandecode_indent--;
         pandecode_log("};\n");
@@ -2332,11 +2400,11 @@ pandecode_jc(mali_ptr jc_gpu_va, bool bifrost)
                 if (h->job_descriptor_size)
                         pandecode_prop("job_descriptor_size = %d", h->job_descriptor_size);
 
-                if (h->exception_status != 0x1)
-                        pandecode_prop("exception_status = %x (source ID: 0x%x access: 0x%x exception: 0x%x)",
+                if (h->exception_status && h->exception_status != 0x1)
+                        pandecode_prop("exception_status = %x (source ID: 0x%x access: %s exception: 0x%x)",
                                        h->exception_status,
                                        (h->exception_status >> 16) & 0xFFFF,
-                                       (h->exception_status >> 8) & 0x3,
+                                       pandecode_exception_access((h->exception_status >> 8) & 0x3),
                                        h->exception_status  & 0xFF);
 
                 if (h->first_incomplete_task)