pan/decode: Silence workgroups_x_shift_2
[mesa.git] / src / panfrost / pandecode / decode.c
index 75fa19712370a5219b8ec1f25c40e625ca727d42..905fa316698344952091e96b59a8f53bf8a2dfe4 100644 (file)
@@ -37,6 +37,8 @@
 #include "midgard/disassemble.h"
 #include "bifrost/disassemble.h"
 
+#include "pan_encoder.h"
+
 int pandecode_jc(mali_ptr jc_gpu_va, bool bifrost);
 
 #define MEMORY_PROP(obj, p) {\
@@ -55,11 +57,6 @@ int pandecode_jc(mali_ptr jc_gpu_va, bool bifrost);
         } \
 }
 
-#define DYN_MEMORY_PROP(obj, no, p) { \
-       if (obj->p) \
-               pandecode_prop("%s = %s_%d_p", #p, #p, no); \
-}
-
 /* Semantic logging type.
  *
  * Raw: for raw messages to be printed as is.
@@ -459,6 +456,22 @@ pandecode_mfbd_block_format(enum mali_mfbd_block_format fmt)
 }
 #undef DEFINE_CASE
 
+#define DEFINE_CASE(name) case MALI_EXCEPTION_ACCESS_## name: return ""#name
+static char *
+pandecode_exception_access(enum mali_exception_access fmt)
+{
+        switch (fmt) {
+                DEFINE_CASE(NONE);
+                DEFINE_CASE(EXECUTE);
+                DEFINE_CASE(READ);
+                DEFINE_CASE(WRITE);
+
+        default:
+                unreachable("Invalid case");
+        }
+}
+#undef DEFINE_CASE
+
 /* Midgard's tiler descriptor is embedded within the
  * larger FBD */
 
@@ -479,7 +492,7 @@ pandecode_midgard_tiler_descriptor(const struct midgard_tiler_descriptor *t)
 
         if (t->heap_start == t->heap_end) {
               /* Print identically to show symmetry for empty tiler heaps */  
-                MEMORY_PROP(t, heap_start);
+                MEMORY_PROP(t, heap_end);
         } else {
                 /* Points to the end of a buffer */
                 char *a = pointer_as_memory_reference(t->heap_end - 1);
@@ -1301,34 +1314,67 @@ bits(u32 word, u32 lo, u32 hi)
 }
 
 static void
-pandecode_vertex_tiler_prefix(struct mali_vertex_tiler_prefix *p, int job_no)
+pandecode_vertex_tiler_prefix(struct mali_vertex_tiler_prefix *p, int job_no, bool noninstanced)
 {
         pandecode_log_cont("{\n");
         pandecode_indent++;
 
-        pandecode_prop("invocation_count = 0x%" PRIx32, p->invocation_count);
-        pandecode_prop("size_y_shift = %d", p->size_y_shift);
-        pandecode_prop("size_z_shift = %d", p->size_z_shift);
-        pandecode_prop("workgroups_x_shift = %d", p->workgroups_x_shift);
-        pandecode_prop("workgroups_y_shift = %d", p->workgroups_y_shift);
-        pandecode_prop("workgroups_z_shift = %d", p->workgroups_z_shift);
-        pandecode_prop("workgroups_x_shift_2 = 0x%" PRIx32, p->workgroups_x_shift_2);
-
         /* Decode invocation_count. See the comment before the definition of
          * invocation_count for an explanation.
          */
-        pandecode_msg("size: (%d, %d, %d)\n",
-                      bits(p->invocation_count, 0, p->size_y_shift) + 1,
-                      bits(p->invocation_count, p->size_y_shift, p->size_z_shift) + 1,
-                      bits(p->invocation_count, p->size_z_shift,
-                           p->workgroups_x_shift) + 1);
-        pandecode_msg("workgroups: (%d, %d, %d)\n",
-                      bits(p->invocation_count, p->workgroups_x_shift,
-                           p->workgroups_y_shift) + 1,
-                      bits(p->invocation_count, p->workgroups_y_shift,
-                           p->workgroups_z_shift) + 1,
-                      bits(p->invocation_count, p->workgroups_z_shift,
-                           32) + 1);
+
+        unsigned size_x = bits(p->invocation_count, 0, p->size_y_shift) + 1;
+        unsigned size_y = bits(p->invocation_count, p->size_y_shift, p->size_z_shift) + 1;
+        unsigned size_z = bits(p->invocation_count, p->size_z_shift, p->workgroups_x_shift) + 1;
+
+        unsigned groups_x = bits(p->invocation_count, p->workgroups_x_shift, p->workgroups_y_shift) + 1;
+        unsigned groups_y = bits(p->invocation_count, p->workgroups_y_shift, p->workgroups_z_shift) + 1;
+        unsigned groups_z = bits(p->invocation_count, p->workgroups_z_shift, 32) + 1;
+
+        /* Even though we have this decoded, we want to ensure that the
+         * representation is "unique" so we don't lose anything by printing only
+         * the final result. More specifically, we need to check that we were
+         * passed something in canonical form, since the definition per the
+         * hardware is inherently not unique. How? Well, take the resulting
+         * decode and pack it ourselves! If it is bit exact with what we
+         * decoded, we're good to go. */
+
+        struct mali_vertex_tiler_prefix ref;
+        panfrost_pack_work_groups_compute(&ref, groups_x, groups_y, groups_z, size_x, size_y, size_z, noninstanced);
+
+        bool canonical =
+                (p->invocation_count == ref.invocation_count) &&
+                (p->size_y_shift == ref.size_y_shift) &&
+                (p->size_z_shift == ref.size_z_shift) &&
+                (p->workgroups_x_shift == ref.workgroups_x_shift) &&
+                (p->workgroups_y_shift == ref.workgroups_y_shift) &&
+                (p->workgroups_z_shift == ref.workgroups_z_shift) &&
+                (p->workgroups_x_shift_2 == ref.workgroups_x_shift_2);
+
+        if (!canonical) {
+                pandecode_msg("XXX: non-canonical workgroups packing\n");
+                pandecode_msg("expected: %X, %d, %d, %d, %d, %d\n",
+                                ref.invocation_count,
+                                ref.size_y_shift,
+                                ref.size_z_shift,
+                                ref.workgroups_x_shift,
+                                ref.workgroups_y_shift,
+                                ref.workgroups_z_shift,
+                                ref.workgroups_x_shift_2);
+
+                pandecode_prop("invocation_count = 0x%" PRIx32, p->invocation_count);
+                pandecode_prop("size_y_shift = %d", p->size_y_shift);
+                pandecode_prop("size_z_shift = %d", p->size_z_shift);
+                pandecode_prop("workgroups_x_shift = %d", p->workgroups_x_shift);
+                pandecode_prop("workgroups_y_shift = %d", p->workgroups_y_shift);
+                pandecode_prop("workgroups_z_shift = %d", p->workgroups_z_shift);
+                pandecode_prop("workgroups_x_shift_2 = %d", p->workgroups_x_shift_2);
+        }
+
+        /* Regardless, print the decode */
+        pandecode_msg("size (%d, %d, %d), count (%d, %d, %d)\n",
+                        size_x, size_y, size_z,
+                        groups_x, groups_y, groups_z);
 
         /* TODO: Decode */
         if (p->unknown_draw)
@@ -1336,7 +1382,8 @@ pandecode_vertex_tiler_prefix(struct mali_vertex_tiler_prefix *p, int job_no)
 
         pandecode_prop("workgroups_x_shift_3 = 0x%" PRIx32, p->workgroups_x_shift_3);
 
-        pandecode_prop("draw_mode = %s", pandecode_draw_mode(p->draw_mode));
+        if (p->draw_mode != MALI_DRAW_NONE)
+                pandecode_prop("draw_mode = %s", pandecode_draw_mode(p->draw_mode));
 
         /* Index count only exists for tiler jobs anyway */
 
@@ -1346,8 +1393,6 @@ pandecode_vertex_tiler_prefix(struct mali_vertex_tiler_prefix *p, int job_no)
         if (p->offset_bias_correction)
                 pandecode_prop("offset_bias_correction = %d", p->offset_bias_correction);
 
-        DYN_MEMORY_PROP(p, job_no, indices);
-
         if (p->zero1) {
                 pandecode_msg("Zero tripped\n");
                 pandecode_prop("zero1 = 0x%" PRIx32, p->zero1);
@@ -1932,28 +1977,17 @@ pandecode_vertex_tiler_postfix_pre(const struct mali_vertex_tiler_postfix *p,
 static void
 pandecode_vertex_tiler_postfix(const struct mali_vertex_tiler_postfix *p, int job_no, bool is_bifrost)
 {
-        pandecode_log_cont("{\n");
+        if (!(p->position_varying || p->occlusion_counter || p->flags))
+                return;
+
+        pandecode_log(".postfix = {\n");
         pandecode_indent++;
 
         MEMORY_PROP(p, position_varying);
-        DYN_MEMORY_PROP(p, job_no, uniform_buffers);
-        DYN_MEMORY_PROP(p, job_no, texture_trampoline);
-        DYN_MEMORY_PROP(p, job_no, sampler_descriptor);
-        DYN_MEMORY_PROP(p, job_no, uniforms);
-        DYN_MEMORY_PROP(p, job_no, attributes);
-        DYN_MEMORY_PROP(p, job_no, attribute_meta);
-        DYN_MEMORY_PROP(p, job_no, varyings);
-        DYN_MEMORY_PROP(p, job_no, varying_meta);
-        DYN_MEMORY_PROP(p, job_no, viewport);
-        DYN_MEMORY_PROP(p, job_no, occlusion_counter);
+        MEMORY_PROP(p, occlusion_counter);
 
-        if (is_bifrost)
-                pandecode_prop("framebuffer = scratchpad_%d_p", job_no);
-        else
-                pandecode_prop("framebuffer = framebuffer_%d_p | %s", job_no, p->framebuffer & MALI_MFBD ? "MALI_MFBD" : "0");
-
-        pandecode_prop("_shader_upper = (shader_meta_%d_p) >> 4", job_no);
-        pandecode_prop("flags = %d", p->flags);
+        if (p->flags)
+                pandecode_prop("flags = %d", p->flags);
 
         pandecode_indent--;
         pandecode_log("},\n");
@@ -2042,7 +2076,6 @@ pandecode_tiler_meta(mali_ptr gpu_va, int job_no)
 
         pandecode_prop("width = MALI_POSITIVE(%d)", t->width + 1);
         pandecode_prop("height = MALI_POSITIVE(%d)", t->height + 1);
-        DYN_MEMORY_PROP(t, job_no, tiler_heap_meta);
 
         for (int i = 0; i < 12; i++) {
                 if (t->zeros[i] != 0) {
@@ -2093,7 +2126,6 @@ pandecode_tiler_only_bfr(const struct bifrost_tiler_only *t, int job_no)
         /* TODO: gl_PointSize on Bifrost */
         pandecode_primitive_size(t->primitive_size, true);
 
-        DYN_MEMORY_PROP(t, job_no, tiler_meta);
         pandecode_gl_enables(t->gl_enables, JOB_TYPE_TILER);
 
         if (t->zero1 || t->zero2 || t->zero3 || t->zero4 || t->zero5
@@ -2126,12 +2158,11 @@ pandecode_vertex_job_bfr(const struct mali_job_descriptor_header *h,
         pandecode_indent++;
 
         pandecode_log(".prefix = ");
-        pandecode_vertex_tiler_prefix(&v->prefix, job_no);
+        pandecode_vertex_tiler_prefix(&v->prefix, job_no, false);
 
         pandecode_log(".vertex = ");
         pandecode_vertex_only_bfr(&v->vertex);
 
-        pandecode_log(".postfix = ");
         pandecode_vertex_tiler_postfix(&v->postfix, job_no, true);
 
         pandecode_indent--;
@@ -2156,12 +2187,11 @@ pandecode_tiler_job_bfr(const struct mali_job_descriptor_header *h,
         pandecode_indent++;
 
         pandecode_log(".prefix = ");
-        pandecode_vertex_tiler_prefix(&t->prefix, job_no);
+        pandecode_vertex_tiler_prefix(&t->prefix, job_no, false);
 
         pandecode_log(".tiler = ");
         pandecode_tiler_only_bfr(&t->tiler, job_no);
 
-        pandecode_log(".postfix = ");
         pandecode_vertex_tiler_postfix(&t->postfix, job_no, true);
 
         pandecode_indent--;
@@ -2187,8 +2217,11 @@ pandecode_vertex_or_tiler_job_mdg(const struct mali_job_descriptor_header *h,
         bool has_primitive_pointer = v->prefix.unknown_draw & MALI_DRAW_VARYING_SIZE;
         pandecode_primitive_size(v->primitive_size, !has_primitive_pointer);
 
+        bool instanced = v->instance_shift || v->instance_odd;
+        bool is_graphics = (h->job_type == JOB_TYPE_VERTEX) || (h->job_type == JOB_TYPE_TILER);
+
         pandecode_log(".prefix = ");
-        pandecode_vertex_tiler_prefix(&v->prefix, job_no);
+        pandecode_vertex_tiler_prefix(&v->prefix, job_no, !instanced && is_graphics);
 
         pandecode_gl_enables(v->gl_enables, h->job_type);
 
@@ -2209,7 +2242,6 @@ pandecode_vertex_or_tiler_job_mdg(const struct mali_job_descriptor_header *h,
                 pandecode_prop("zero5 = 0x%" PRIx64, v->zero5);
         }
 
-        pandecode_log(".postfix = ");
         pandecode_vertex_tiler_postfix(&v->postfix, job_no, false);
 
         pandecode_indent--;
@@ -2274,10 +2306,15 @@ pandecode_fragment_job(const struct pandecode_mapped_memory *mem,
 
         const char *fbd_type = s->framebuffer & MALI_MFBD ? "MALI_MFBD" : "MALI_SFBD";
 
+        /* TODO: Decode */
+        unsigned extra_flags = (s->framebuffer & ~FBD_MASK) & ~MALI_MFBD;
+
         if (fbd_dumped)
-                pandecode_prop("framebuffer = framebuffer_%d_p | %s", job_no, fbd_type);
+                pandecode_prop("framebuffer = framebuffer_%d_p | %s | 0x%X", job_no,
+                                fbd_type, extra_flags);
         else
-                pandecode_prop("framebuffer = %s | %s", pointer_as_memory_reference(p), fbd_type);
+                pandecode_prop("framebuffer = %s | %s | 0x%X", pointer_as_memory_reference(p),
+                                fbd_type, extra_flags);
 
         pandecode_indent--;
         pandecode_log("};\n");
@@ -2333,10 +2370,10 @@ pandecode_jc(mali_ptr jc_gpu_va, bool bifrost)
                         pandecode_prop("job_descriptor_size = %d", h->job_descriptor_size);
 
                 if (h->exception_status != 0x1)
-                        pandecode_prop("exception_status = %x (source ID: 0x%x access: 0x%x exception: 0x%x)",
+                        pandecode_prop("exception_status = %x (source ID: 0x%x access: %s exception: 0x%x)",
                                        h->exception_status,
                                        (h->exception_status >> 16) & 0xFFFF,
-                                       (h->exception_status >> 8) & 0x3,
+                                       pandecode_exception_access((h->exception_status >> 8) & 0x3),
                                        h->exception_status  & 0xFF);
 
                 if (h->first_incomplete_task)