switch (type) {
DEFINE_CASE(NULL);
- DEFINE_CASE(SET_VALUE);
+ DEFINE_CASE(WRITE_VALUE);
DEFINE_CASE(CACHE_FLUSH);
DEFINE_CASE(COMPUTE);
DEFINE_CASE(VERTEX);
}
#undef DEFINE_CASE
-/* Why is this duplicated? Who knows... */
-#define DEFINE_CASE(name) case MALI_ALT_FUNC_ ## name: return "MALI_ALT_FUNC_" #name
-static char *
-pandecode_alt_func(enum mali_alt_func mode)
-{
- switch (mode) {
- DEFINE_CASE(NEVER);
- DEFINE_CASE(LESS);
- DEFINE_CASE(EQUAL);
- DEFINE_CASE(LEQUAL);
- DEFINE_CASE(GREATER);
- DEFINE_CASE(NOTEQUAL);
- DEFINE_CASE(GEQUAL);
- DEFINE_CASE(ALWAYS);
-
- default:
- pandecode_msg("XXX: invalid alt func %X\n", mode);
- return "";
- }
-}
-#undef DEFINE_CASE
-
#define DEFINE_CASE(name) case MALI_STENCIL_ ## name: return "MALI_STENCIL_" #name
static char *
pandecode_stencil_op(enum mali_stencil_op op)
return "instanced_npot";
case MALI_ATTR_IMAGE:
return "image";
- case MALI_ATTR_INTERNAL:
- return "internal";
default:
pandecode_msg("XXX: invalid attribute mode %X\n", mode);
return "";
}
static const char *
-pandecode_special_varying(uint64_t v)
+pandecode_special_record(uint64_t v, bool* attribute)
{
switch(v) {
+ case MALI_ATTR_VERTEXID:
+ *attribute = true;
+ return "gl_VertexID";
+ case MALI_ATTR_INSTANCEID:
+ *attribute = true;
+ return "gl_InstanceID";
case MALI_VARYING_FRAG_COORD:
return "gl_FragCoord";
case MALI_VARYING_FRONT_FACING:
case MALI_VARYING_POINT_COORD:
return "gl_PointCoord";
default:
- pandecode_msg("XXX: invalid special varying %" PRIx64 "\n", v);
+ pandecode_msg("XXX: invalid special record %" PRIx64 "\n", v);
return "";
}
}
#undef DEFINE_CASE
#define DEFINE_CASE(name) case MALI_EXCEPTION_ACCESS_## name: return ""#name
-static char *
+char *
pandecode_exception_access(enum mali_exception_access access)
{
switch (access) {
const struct midgard_tiler_descriptor *t,
unsigned width,
unsigned height,
- bool is_fragment)
+ bool is_fragment,
+ bool has_hierarchy)
{
pandecode_log(".tiler = {\n");
pandecode_indent++;
/* Now that we've sanity checked, we'll try to calculate the sizes
* ourselves for comparison */
- unsigned ref_header = panfrost_tiler_header_size(width, height, t->hierarchy_mask);
- unsigned ref_size = panfrost_tiler_full_size(width, height, t->hierarchy_mask);
+ unsigned ref_header = panfrost_tiler_header_size(width, height, t->hierarchy_mask, has_hierarchy);
+ unsigned ref_size = panfrost_tiler_full_size(width, height, t->hierarchy_mask, has_hierarchy);
if (!((ref_header == body_offset) && (ref_size == t->polygon_list_size))) {
pandecode_msg("XXX: bad polygon list size (expected %d / 0x%x)\n",
pandecode_log("}\n");
}
-static void
-pandecode_midgard_tiler_descriptor_0x20(
- const struct midgard_tiler_descriptor *t)
-{
- pandecode_log(".tiler = {\n");
- pandecode_indent++;
-
- pandecode_prop("hierarchy_mask = 0x%" PRIx16, t->hierarchy_mask);
- pandecode_prop("flags = 0x%" PRIx16, t->flags);
- MEMORY_PROP(t, polygon_list);
- MEMORY_PROP(t, polygon_list_body);
- pandecode_prop("polygon_list_size = 0x%x", t->polygon_list_size);
- MEMORY_PROP(t, heap_start);
- MEMORY_PROP(t, heap_end);
-
- /* We've never seen weights used in practice, but we know from the
- * kernel these fields are there */
-
- bool nonzero_weights = false;
-
- for (unsigned w = 0; w < ARRAY_SIZE(t->weights); ++w) {
- nonzero_weights |= t->weights[w] != 0x0;
- }
-
- if (nonzero_weights) {
- pandecode_log(".weights = {");
-
- for (unsigned w = 0; w < ARRAY_SIZE(t->weights); ++w) {
- pandecode_log("%d, ", t->weights[w]);
- }
-
- pandecode_log("},");
- }
-
- pandecode_indent--;
- pandecode_log("}\n");
-}
-
/* Information about the framebuffer passed back for
* additional analysis */
pandecode_log_cont(",\n");
pandecode_prop("nr_channels = MALI_POSITIVE(%d)",
- MALI_NEGATIVE(format.nr_channels));
+ (format.nr_channels + 1));
pandecode_log(".unk2 = ");
pandecode_log_decoded_flags(sfbd_unk2_info, format.unk2);
pandecode_prop("clear_stencil = 0x%x", s->clear_stencil);
}
- MEMORY_PROP(s, unknown_address_0);
+ MEMORY_PROP(s, scratchpad);
const struct midgard_tiler_descriptor t = s->tiler;
- if (gpu_id == 0x0720 || gpu_id == 0x0820 || gpu_id == 0x0830)
- /* These ones don't have an "Advanced Tiling Unit" */
- pandecode_midgard_tiler_descriptor_0x20(&t);
- else
- pandecode_midgard_tiler_descriptor(&t, s->width + 1, s->height + 1, is_fragment);
+
+ bool has_hierarchy = !(gpu_id == 0x0720 || gpu_id == 0x0820 || gpu_id == 0x0830);
+ pandecode_midgard_tiler_descriptor(&t, s->width + 1, s->height + 1, is_fragment, has_hierarchy);
pandecode_indent--;
pandecode_log("};\n");
pandecode_log_cont(",\n");
pandecode_prop("nr_channels = MALI_POSITIVE(%d)",
- MALI_NEGATIVE(format.nr_channels));
+ (format.nr_channels + 1));
pandecode_log(".flags = ");
pandecode_log_decoded_flags(mfbd_fmt_flag_info, format.flags);
pandecode_log("struct bifrost_render_target rts_list_%"PRIx64"_%d[] = {\n", gpu_va, job_no);
pandecode_indent++;
- for (int i = 0; i < MALI_NEGATIVE(fb->rt_count_1); i++) {
+ for (int i = 0; i < (fb->rt_count_1 + 1); i++) {
mali_ptr rt_va = gpu_va + i * sizeof(struct bifrost_render_target);
struct pandecode_mapped_memory *mem =
pandecode_find_mapped_gpu_mem_containing(rt_va);
pandecode_log("struct bifrost_framebuffer framebuffer_%"PRIx64"_%d = {\n", gpu_va, job_no);
pandecode_indent++;
+ pandecode_prop("stack_shift = 0x%x", fb->stack_shift);
pandecode_prop("unk0 = 0x%x", fb->unk0);
if (fb->sample_locations)
pandecode_prop("unknown2 = 0x%x", fb->unknown2);
MEMORY_PROP(fb, scratchpad);
const struct midgard_tiler_descriptor t = fb->tiler;
- pandecode_midgard_tiler_descriptor(&t, fb->width1 + 1, fb->height1 + 1, is_fragment);
+ pandecode_midgard_tiler_descriptor(&t, fb->width1 + 1, fb->height1 + 1, is_fragment, true);
if (fb->zero3 || fb->zero4) {
pandecode_msg("XXX: framebuffer zeros tripped\n");
for (int i = 0; i < count; ++i) {
/* First, check for special records */
- if (attr[i].elements < MALI_VARYING_SPECIAL) {
- /* Special records are always varyings */
+ if (attr[i].elements < MALI_RECORD_SPECIAL) {
+ if (attr[i].size)
+ pandecode_msg("XXX: tripped size=%d\n", attr[i].size);
+
+ if (attr[i].stride) {
+ /* gl_InstanceID passes a magic divisor in the
+ * stride field to divide by the padded vertex
+ * count. No other records should do so, so
+ * stride should otherwise be zero. Note that
+ * stride in the usual attribute sense doesn't
+ * apply to special records. */
+
+ bool has_divisor = attr[i].elements == MALI_ATTR_INSTANCEID;
+
+ pandecode_log_cont("/* %smagic divisor = %X */ ",
+ has_divisor ? "" : "XXX: ", attr[i].stride);
+ }
- if (!varying)
- pandecode_msg("XXX: Special varying in attribute field\n");
+ if (attr[i].shift || attr[i].extra_flags) {
+ /* Attributes use these fields for
+ * instancing/padding/etc type issues, but
+ * varyings don't */
- if (job_type != JOB_TYPE_TILER)
- pandecode_msg("XXX: Special varying in non-FS\n");
+ pandecode_log_cont("/* %sshift=%d, extra=%d */ ",
+ varying ? "XXX: " : "",
+ attr[i].shift, attr[i].extra_flags);
+ }
- /* We're special, so all fields should be zero */
- unsigned zero = attr[i].stride | attr[i].size;
- zero |= attr[i].shift | attr[i].extra_flags;
+ /* Print the special record name */
+ bool attribute = false;
+ pandecode_log("%s_%d = %s;\n", prefix, i, pandecode_special_record(attr[i].elements, &attribute));
- if (zero)
- pandecode_msg("XXX: Special varying has non-zero fields\n");
- else {
- /* Print the special varying name */
- pandecode_log("varying_%d = %s;", i, pandecode_special_varying(attr[i].elements));
- continue;
- }
- }
+ /* Sanity check */
+ if (attribute == varying)
+ pandecode_msg("XXX: mismatched special record\n");
+
+ continue;
+ }
enum mali_attr_mode mode = attr[i].elements & 7;
}
static void
-pandecode_vertex_tiler_prefix(struct mali_vertex_tiler_prefix *p, int job_no, bool noninstanced)
+pandecode_vertex_tiler_prefix(struct mali_vertex_tiler_prefix *p, int job_no, bool graphics)
{
pandecode_log_cont("{\n");
pandecode_indent++;
* invocation_count for an explanation.
*/
- unsigned size_x = bits(p->invocation_count, 0, p->size_y_shift) + 1;
- unsigned size_y = bits(p->invocation_count, p->size_y_shift, p->size_z_shift) + 1;
- unsigned size_z = bits(p->invocation_count, p->size_z_shift, p->workgroups_x_shift) + 1;
+ unsigned size_y_shift = bits(p->invocation_shifts, 0, 5);
+ unsigned size_z_shift = bits(p->invocation_shifts, 5, 10);
+ unsigned workgroups_x_shift = bits(p->invocation_shifts, 10, 16);
+ unsigned workgroups_y_shift = bits(p->invocation_shifts, 16, 22);
+ unsigned workgroups_z_shift = bits(p->invocation_shifts, 22, 28);
+ unsigned workgroups_x_shift_2 = bits(p->invocation_shifts, 28, 32);
- unsigned groups_x = bits(p->invocation_count, p->workgroups_x_shift, p->workgroups_y_shift) + 1;
- unsigned groups_y = bits(p->invocation_count, p->workgroups_y_shift, p->workgroups_z_shift) + 1;
- unsigned groups_z = bits(p->invocation_count, p->workgroups_z_shift, 32) + 1;
+ unsigned size_x = bits(p->invocation_count, 0, size_y_shift) + 1;
+ unsigned size_y = bits(p->invocation_count, size_y_shift, size_z_shift) + 1;
+ unsigned size_z = bits(p->invocation_count, size_z_shift, workgroups_x_shift) + 1;
+
+ unsigned groups_x = bits(p->invocation_count, workgroups_x_shift, workgroups_y_shift) + 1;
+ unsigned groups_y = bits(p->invocation_count, workgroups_y_shift, workgroups_z_shift) + 1;
+ unsigned groups_z = bits(p->invocation_count, workgroups_z_shift, 32) + 1;
/* Even though we have this decoded, we want to ensure that the
* representation is "unique" so we don't lose anything by printing only
* decoded, we're good to go. */
struct mali_vertex_tiler_prefix ref;
- panfrost_pack_work_groups_compute(&ref, groups_x, groups_y, groups_z, size_x, size_y, size_z, noninstanced);
+ panfrost_pack_work_groups_compute(&ref, groups_x, groups_y, groups_z, size_x, size_y, size_z, graphics);
bool canonical =
(p->invocation_count == ref.invocation_count) &&
- (p->size_y_shift == ref.size_y_shift) &&
- (p->size_z_shift == ref.size_z_shift) &&
- (p->workgroups_x_shift == ref.workgroups_x_shift) &&
- (p->workgroups_y_shift == ref.workgroups_y_shift) &&
- (p->workgroups_z_shift == ref.workgroups_z_shift) &&
- (p->workgroups_x_shift_2 == ref.workgroups_x_shift_2);
+ (p->invocation_shifts == ref.invocation_shifts);
if (!canonical) {
pandecode_msg("XXX: non-canonical workgroups packing\n");
- pandecode_msg("expected: %X, %d, %d, %d, %d, %d, %d\n",
+ pandecode_msg("expected: %X, %X",
ref.invocation_count,
- ref.size_y_shift,
- ref.size_z_shift,
- ref.workgroups_x_shift,
- ref.workgroups_y_shift,
- ref.workgroups_z_shift,
- ref.workgroups_x_shift_2);
+ ref.invocation_shifts);
pandecode_prop("invocation_count = 0x%" PRIx32, p->invocation_count);
- pandecode_prop("size_y_shift = %d", p->size_y_shift);
- pandecode_prop("size_z_shift = %d", p->size_z_shift);
- pandecode_prop("workgroups_x_shift = %d", p->workgroups_x_shift);
- pandecode_prop("workgroups_y_shift = %d", p->workgroups_y_shift);
- pandecode_prop("workgroups_z_shift = %d", p->workgroups_z_shift);
- pandecode_prop("workgroups_x_shift_2 = %d", p->workgroups_x_shift_2);
+ pandecode_prop("size_y_shift = %d", size_y_shift);
+ pandecode_prop("size_z_shift = %d", size_z_shift);
+ pandecode_prop("workgroups_x_shift = %d", workgroups_x_shift);
+ pandecode_prop("workgroups_y_shift = %d", workgroups_y_shift);
+ pandecode_prop("workgroups_z_shift = %d", workgroups_z_shift);
+ pandecode_prop("workgroups_x_shift_2 = %d", workgroups_x_shift_2);
}
/* Regardless, print the decode */
pandecode_log("};\n");
}
+static const char *
+shader_type_for_job(unsigned type)
+{
+ switch (type) {
+ case JOB_TYPE_VERTEX: return "VERTEX";
+ case JOB_TYPE_TILER: return "FRAGMENT";
+ case JOB_TYPE_COMPUTE: return "COMPUTE";
+ default:
+ return "UNKNOWN";
+ }
+}
+
static unsigned shader_id = 0;
static struct midgard_disasm_stats
MESA_SHADER_FRAGMENT : MESA_SHADER_VERTEX);
}
- /* Print shader-db stats */
+ /* Print shader-db stats. Skip COMPUTE jobs since they are used for
+ * driver-internal purposes with the blob and interfere */
+
+ bool should_shaderdb = type != JOB_TYPE_COMPUTE;
- unsigned nr_threads =
- (stats.work_count <= 4) ? 4 :
- (stats.work_count <= 8) ? 2 :
- 1;
+ if (should_shaderdb) {
+ unsigned nr_threads =
+ (stats.work_count <= 4) ? 4 :
+ (stats.work_count <= 8) ? 2 :
+ 1;
- printf("shader%d - %s shader: "
- "%u inst, %u bundles, %u quadwords, "
- "%u registers, %u threads, 0 loops\n\n\n",
- shader_id++,
- (type == JOB_TYPE_TILER) ? "FRAGMENT" : "VERTEX",
- stats.instruction_count, stats.bundle_count, stats.quadword_count,
- stats.work_count, nr_threads);
+ printf("shader%d - MESA_SHADER_%s shader: "
+ "%u inst, %u bundles, %u quadwords, "
+ "%u registers, %u threads, 0 loops, 0:0 spills:fills\n\n\n",
+ shader_id++,
+ shader_type_for_job(type),
+ stats.instruction_count, stats.bundle_count, stats.quadword_count,
+ stats.work_count, nr_threads);
+ }
return stats;
* properties, but dump extra
* possibilities to futureproof */
- int bitmap_count = MALI_NEGATIVE(t->levels);
+ int bitmap_count = t->levels + 1;
/* Miptree for each face */
if (f.type == MALI_TEX_CUBE)
bitmap_count *= 6;
+ else if (f.type == MALI_TEX_3D)
+ bitmap_count *= t->depth;
/* Array of textures */
- bitmap_count *= MALI_NEGATIVE(t->array_size);
+ bitmap_count *= (t->array_size + 1);
/* Stride for each element */
if (f.manual_stride)
bitmap_count *= 2;
- /* Sanity check the size */
- int max_count = sizeof(t->payload) / sizeof(t->payload[0]);
- assert (bitmap_count <= max_count);
-
+ mali_ptr *pointers_and_strides = pandecode_fetch_gpu_mem(tmem,
+ u + sizeof(*t), sizeof(mali_ptr) * bitmap_count);
for (int i = 0; i < bitmap_count; ++i) {
/* How we dump depends if this is a stride or a pointer */
if (f.manual_stride && (i & 1)) {
/* signed 32-bit snuck in as a 64-bit pointer */
- uint64_t stride_set = t->payload[i];
+ uint64_t stride_set = pointers_and_strides[i];
uint32_t clamped_stride = stride_set;
int32_t stride = clamped_stride;
assert(stride_set == clamped_stride);
pandecode_log("(mali_ptr) %d /* stride */, \n", stride);
} else {
- char *a = pointer_as_memory_reference(t->payload[i]);
+ char *a = pointer_as_memory_reference(pointers_and_strides[i]);
pandecode_log("%s, \n", a);
free(a);
}
};
if (is_bifrost)
- pandecode_scratchpad(p->framebuffer & ~FBD_TYPE, job_no, suffix);
+ pandecode_scratchpad(p->framebuffer & ~1, job_no, suffix);
else if (p->framebuffer & MALI_MFBD)
fbd_info = pandecode_mfbd_bfr((u64) ((uintptr_t) p->framebuffer) & FBD_MASK, job_no, false);
else if (job_type == JOB_TYPE_COMPUTE)
pandecode_prop("wrap_t = %s", pandecode_wrap_mode(s->wrap_t));
pandecode_prop("wrap_r = %s", pandecode_wrap_mode(s->wrap_r));
- pandecode_prop("compare_func = %s", pandecode_alt_func(s->compare_func));
+ pandecode_prop("compare_func = %s", pandecode_func(s->compare_func));
if (s->zero || s->zero2) {
pandecode_msg("XXX: sampler zero tripped\n");
pandecode_vertex_tiler_postfix(const struct mali_vertex_tiler_postfix *p, int job_no, bool is_bifrost)
{
if (p->shader & 0xF)
- pandecode_msg("warn: shader tagged %X\n", p->shader & 0xF);
+ pandecode_msg("warn: shader tagged %X\n", (unsigned) (p->shader & 0xF));
if (!(p->position_varying || p->occlusion_counter))
return;
bool has_primitive_pointer = v->prefix.unknown_draw & MALI_DRAW_VARYING_SIZE;
pandecode_primitive_size(v->primitive_size, !has_primitive_pointer);
- bool instanced = v->instance_shift || v->instance_odd;
bool is_graphics = (h->job_type == JOB_TYPE_VERTEX) || (h->job_type == JOB_TYPE_TILER);
pandecode_log(".prefix = ");
- pandecode_vertex_tiler_prefix(&v->prefix, job_no, !instanced && is_graphics);
+ pandecode_vertex_tiler_prefix(&v->prefix, job_no, is_graphics);
pandecode_gl_enables(v->gl_enables, h->job_type);
{
const struct mali_payload_fragment *PANDECODE_PTR_VAR(s, mem, payload);
- bool is_mfbd = (s->framebuffer & FBD_TYPE) == MALI_MFBD;
+ bool is_mfbd = s->framebuffer & MALI_MFBD;
/* Bifrost theoretically may retain support for SFBD on compute jobs,
* but for graphics workloads with a FRAGMENT payload, use MFBD */
* additional structures follow the MFBD header (an extra payload or
* not, as well as a count of render targets) */
- unsigned expected_tag = is_mfbd ? MALI_MFBD : MALI_SFBD;
+ unsigned expected_tag = is_mfbd ? MALI_MFBD : 0;
if (is_mfbd) {
if (info.has_extra)
* reason. */
switch (h->job_type) {
- case JOB_TYPE_SET_VALUE: {
- struct mali_payload_set_value *s = payload;
- pandecode_log("struct mali_payload_set_value payload_%"PRIx64"_%d = {\n", payload_ptr, job_no);
+ case JOB_TYPE_WRITE_VALUE: {
+ struct mali_payload_write_value *s = payload;
+ pandecode_log("struct mali_payload_write_value payload_%"PRIx64"_%d = {\n", payload_ptr, job_no);
pandecode_indent++;
- MEMORY_PROP(s, out);
- pandecode_prop("unknown = 0x%" PRIX64, s->unknown);
+ MEMORY_PROP(s, address);
+
+ if (s->value_descriptor != MALI_WRITE_VALUE_ZERO) {
+ pandecode_msg("XXX: unknown value descriptor\n");
+ pandecode_prop("value_descriptor = 0x%" PRIX32, s->value_descriptor);
+ }
+
+ if (s->reserved) {
+ pandecode_msg("XXX: set value tripped\n");
+ pandecode_prop("reserved = 0x%" PRIX32, s->reserved);
+ }
+
+ pandecode_prop("immediate = 0x%" PRIX64, s->immediate);
pandecode_indent--;
pandecode_log("};\n");
if (!first) {
pandecode_log("((struct mali_job_descriptor_header *) (uintptr_t) job_%d_p)->", job_no - 1);
-
- if (last_size)
- pandecode_log_cont("next_job_64 = job_%d_p;\n\n", job_no);
- else
- pandecode_log_cont("next_job_32 = (u32) (uintptr_t) job_%d_p;\n\n", job_no);
+ pandecode_log_cont("next_job = job_%d_p;\n\n", job_no);
}
first = false;
- } while ((jc_gpu_va = h->job_descriptor_size ? h->next_job_64 : h->next_job_32));
+ } while ((jc_gpu_va = h->next_job));
return start_number;
}