From 31fc52a4e74e11452f93498b64b4e235b23b4ecc Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Wed, 10 Jul 2019 07:22:19 -0700 Subject: [PATCH] panfrost: Identify shared tiler structure This is identical across SFBD/MFBD so pull it out to allow for better code sharing. Signed-off-by: Alyssa Rosenzweig --- .../drivers/panfrost/include/panfrost-job.h | 76 ++++++----- src/gallium/drivers/panfrost/pan_context.c | 121 +++++++++--------- .../drivers/panfrost/pandecode/decode.c | 80 +++++++----- 3 files changed, 142 insertions(+), 135 deletions(-) diff --git a/src/gallium/drivers/panfrost/include/panfrost-job.h b/src/gallium/drivers/panfrost/include/panfrost-job.h index 5c93f319b65..0c559309946 100644 --- a/src/gallium/drivers/panfrost/include/panfrost-job.h +++ b/src/gallium/drivers/panfrost/include/panfrost-job.h @@ -1334,6 +1334,40 @@ struct mali_payload_fragment { #define MALI_CLEAR_SLOW (1 << 28) #define MALI_CLEAR_SLOW_STENCIL (1 << 31) +/* Configures hierarchical tiling on Midgard for both SFBD/MFBD (embedded + * within the larget framebuffer descriptor). Analogous to + * bifrost_tiler_heap_meta and bifrost_tiler_meta*/ + +struct midgard_tiler_descriptor { + /* Size of the entire polygon list; see pan_tiler.c for the + * computation. It's based on hierarchical tiling */ + + u32 polygon_list_size; + + /* Name known from the replay workaround in the kernel. What exactly is + * flagged here is less known. We do that (tiler_hierarchy_mask & 0x1ff) + * specifies a mask of hierarchy weights, which explains some of the + * performance mysteries around setting it. We also see the bottom bit + * of tiler_flags set in the kernel, but no comment why. */ + + u16 hierarchy_mask; + u16 flags; + + /* See mali_tiler.c for an explanation */ + mali_ptr polygon_list; + mali_ptr polygon_list_body; + + /* Names based on we see symmetry with replay jobs which name these + * explicitly */ + + mali_ptr heap_start; /* tiler heap_free_address */ + mali_ptr heap_end; + + /* Hierarchy weights. We know these are weights based on the kernel, + * but I've never seen them be anything other than zero */ + u32 weights[8]; +}; + struct mali_single_framebuffer { u32 unknown1; u32 unknown2; @@ -1394,22 +1428,7 @@ struct mali_single_framebuffer { u32 zero6[7]; - /* Logically, by symmetry to the MFBD, this ought to be the size of the - * polygon list. But this doesn't quite compute up. More investigation - * is needed. */ - - u32 tiler_resolution_check; - - u16 tiler_hierarchy_mask; - u16 tiler_flags; - - /* See pan_tiler.c */ - mali_ptr tiler_polygon_list; - mali_ptr tiler_polygon_list_body; - - /* See mali_kbase_replay.c */ - mali_ptr tiler_heap_free; - mali_ptr tiler_heap_end; + struct midgard_tiler_descriptor tiler; /* More below this, maybe */ } __attribute__((packed)); @@ -1574,30 +1593,7 @@ struct bifrost_framebuffer { u32 mfbd_flags : 24; // = 0x100 float clear_depth; - - /* Tiler section begins here */ - u32 tiler_polygon_list_size; - - /* Name known from the replay workaround in the kernel. What exactly is - * flagged here is less known. We do that (tiler_hierarchy_mask & 0x1ff) - * specifies a mask of hierarchy weights, which explains some of the - * performance mysteries around setting it. We also see the bottom bit - * of tiler_flags set in the kernel, but no comment why. */ - - u16 tiler_hierarchy_mask; - u16 tiler_flags; - - /* See mali_tiler.c for an explanation */ - mali_ptr tiler_polygon_list; - mali_ptr tiler_polygon_list_body; - - /* Names based on we see symmetry with replay jobs which name these - * explicitly */ - - mali_ptr tiler_heap_start; /* tiler heap_free_address */ - mali_ptr tiler_heap_end; - - u32 tiler_weights[8]; + struct midgard_tiler_descriptor tiler; /* optional: struct bifrost_fb_extra extra */ /* struct bifrost_render_target rts[] */ diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c index 29e36d02f8b..7d2a75482b8 100644 --- a/src/gallium/drivers/panfrost/pan_context.c +++ b/src/gallium/drivers/panfrost/pan_context.c @@ -78,38 +78,78 @@ panfrost_job_type_for_pipe(enum pipe_shader_type type) /* Framebuffer descriptor */ -static void -panfrost_set_framebuffer_resolution(struct mali_single_framebuffer *fb, int w, int h) +static struct midgard_tiler_descriptor +panfrost_emit_midg_tiler( + struct panfrost_context *ctx, + unsigned width, + unsigned height, + unsigned vertex_count) { - fb->width = MALI_POSITIVE(w); - fb->height = MALI_POSITIVE(h); + struct midgard_tiler_descriptor t = {}; - /* No idea why this is needed, but it's how resolution_check is - * calculated. It's not clear to us yet why the hardware wants this. - * The formula itself was discovered mostly by manual bruteforce and - * aggressive algebraic simplification. */ + t.hierarchy_mask = + panfrost_choose_hierarchy_mask(width, height, vertex_count); - fb->tiler_resolution_check = ((w + h) / 3) << 4; + /* Compute the polygon header size and use that to offset the body */ + + unsigned header_size = panfrost_tiler_header_size( + width, height, t.hierarchy_mask); + + unsigned body_size = panfrost_tiler_body_size( + width, height, t.hierarchy_mask); + + /* Sanity check */ + + unsigned total_size = header_size + body_size; + + if (t.hierarchy_mask) { + assert(ctx->tiler_polygon_list.bo->size >= total_size); + + /* Specify allocated tiler structures */ + t.polygon_list = ctx->tiler_polygon_list.bo->gpu; + + /* Allow the entire tiler heap */ + t.heap_start = ctx->tiler_heap.bo->gpu; + t.heap_end = + ctx->tiler_heap.bo->gpu + ctx->tiler_heap.bo->size; + } else { + /* The tiler is disabled, so don't allow the tiler heap */ + t.heap_start = ctx->tiler_heap.bo->gpu; + t.heap_end = t.heap_start; + + /* Use a dummy polygon list */ + t.polygon_list = ctx->tiler_dummy.bo->gpu; + + /* Also, set a "tiler disabled?" flag? */ + t.hierarchy_mask |= 0x1000; + } + + t.polygon_list_body = + t.polygon_list + header_size; + + t.polygon_list_size = + header_size + body_size; + + return t; } struct mali_single_framebuffer panfrost_emit_sfbd(struct panfrost_context *ctx, unsigned vertex_count) { + unsigned width = ctx->pipe_framebuffer.width; + unsigned height = ctx->pipe_framebuffer.height; + struct mali_single_framebuffer framebuffer = { + .width = MALI_POSITIVE(width), + .height = MALI_POSITIVE(width), .unknown2 = 0x1f, .format = 0x30000000, .clear_flags = 0x1000, .unknown_address_0 = ctx->scratchpad.bo->gpu, - .tiler_polygon_list = ctx->tiler_polygon_list.bo->gpu, - .tiler_polygon_list_body = ctx->tiler_polygon_list.bo->gpu + 40960, - .tiler_hierarchy_mask = 0xF0, - .tiler_flags = 0x0, - .tiler_heap_free = ctx->tiler_heap.bo->gpu, - .tiler_heap_end = ctx->tiler_heap.bo->gpu + ctx->tiler_heap.bo->size, + .tiler = panfrost_emit_midg_tiler(ctx, + width, height, vertex_count), }; - panfrost_set_framebuffer_resolution(&framebuffer, ctx->pipe_framebuffer.width, ctx->pipe_framebuffer.height); - return framebuffer; } @@ -134,53 +174,10 @@ panfrost_emit_mfbd(struct panfrost_context *ctx, unsigned vertex_count) .unknown2 = 0x1f, .scratchpad = ctx->scratchpad.bo->gpu, + .tiler = panfrost_emit_midg_tiler(ctx, + width, height, vertex_count) }; - framebuffer.tiler_hierarchy_mask = - panfrost_choose_hierarchy_mask(width, height, vertex_count); - - /* Compute the polygon header size and use that to offset the body */ - - unsigned header_size = panfrost_tiler_header_size( - width, height, framebuffer.tiler_hierarchy_mask); - - unsigned body_size = panfrost_tiler_body_size( - width, height, framebuffer.tiler_hierarchy_mask); - - /* Sanity check */ - - unsigned total_size = header_size + body_size; - - if (framebuffer.tiler_hierarchy_mask) { - assert(ctx->tiler_polygon_list.bo->size >= total_size); - - /* Specify allocated tiler structures */ - framebuffer.tiler_polygon_list = ctx->tiler_polygon_list.bo->gpu; - - /* Allow the entire tiler heap */ - framebuffer.tiler_heap_start = ctx->tiler_heap.bo->gpu; - framebuffer.tiler_heap_end = - ctx->tiler_heap.bo->gpu + ctx->tiler_heap.bo->size; - } else { - /* The tiler is disabled, so don't allow the tiler heap */ - framebuffer.tiler_heap_start = ctx->tiler_heap.bo->gpu; - framebuffer.tiler_heap_end = framebuffer.tiler_heap_start; - - /* Use a dummy polygon list */ - framebuffer.tiler_polygon_list = ctx->tiler_dummy.bo->gpu; - - /* Also, set a "tiler disabled?" flag? */ - framebuffer.tiler_hierarchy_mask |= 0x1000; - } - - framebuffer.tiler_polygon_list_body = - framebuffer.tiler_polygon_list + header_size; - - framebuffer.tiler_polygon_list_size = - header_size + body_size; - - - return framebuffer; } diff --git a/src/gallium/drivers/panfrost/pandecode/decode.c b/src/gallium/drivers/panfrost/pandecode/decode.c index 189c2482316..1326fd1a916 100644 --- a/src/gallium/drivers/panfrost/pandecode/decode.c +++ b/src/gallium/drivers/panfrost/pandecode/decode.c @@ -444,6 +444,51 @@ pandecode_decode_fbd_type(enum mali_fbd_type type) else return "WATFBD /* XXX */"; } +/* Midgard's tiler descriptor is embedded within the + * larger FBD */ + +static void +pandecode_midgard_tiler_descriptor(const struct midgard_tiler_descriptor *t) +{ + pandecode_log(".tiler = {\n"); + pandecode_indent++; + + pandecode_prop("hierarchy_mask = 0x%" PRIx16, t->hierarchy_mask); + pandecode_prop("flags = 0x%" PRIx16, t->flags); + pandecode_prop("polygon_list_size = 0x%x", t->polygon_list_size); + + MEMORY_PROP(t, polygon_list); + MEMORY_PROP(t, polygon_list_body); + + MEMORY_PROP(t, heap_start); + + { + /* Points to the end of a buffer */ + char *a = pointer_as_memory_reference(t->heap_end - 1); + pandecode_prop("heap_end = %s + 1", a); + free(a); + } + + bool nonzero_weights = false; + + for (unsigned w = 0; w < ARRAY_SIZE(t->weights); ++w) { + nonzero_weights |= t->weights[w] != 0x0; + } + + if (nonzero_weights) { + pandecode_log(".weights = {"); + + for (unsigned w = 0; w < ARRAY_SIZE(t->weights); ++w) { + pandecode_log("%d, ", t->weights[w]); + } + + pandecode_log("},"); + } + + pandecode_indent--; + pandecode_log("}\n"); +} + static void pandecode_replay_sfbd(uint64_t gpu_va, int job_no) { @@ -502,15 +547,7 @@ pandecode_replay_sfbd(uint64_t gpu_va, int job_no) } MEMORY_PROP(s, unknown_address_0); - MEMORY_PROP(s, tiler_polygon_list); - MEMORY_PROP(s, tiler_polygon_list_body); - - pandecode_prop("tiler_resolution_check = 0x%" PRIx32, s->tiler_resolution_check); - pandecode_prop("tiler_hierarchy_mask = 0x%" PRIx16, s->tiler_hierarchy_mask); - pandecode_prop("tiler_flags = 0x%" PRIx16, s->tiler_flags); - - MEMORY_PROP(s, tiler_heap_free); - MEMORY_PROP(s, tiler_heap_end); + pandecode_midgard_tiler_descriptor(&s->tiler); pandecode_indent--; pandecode_log("};\n"); @@ -716,10 +753,6 @@ pandecode_replay_mfbd_bfr(uint64_t gpu_va, int job_no, bool with_render_targets) * now */ MEMORY_PROP(fb, unknown1); - pandecode_prop("tiler_polygon_list_size = 0x%x", fb->tiler_polygon_list_size); - pandecode_prop("tiler_hierarchy_mask = 0x%" PRIx16, fb->tiler_hierarchy_mask); - pandecode_prop("tiler_flags = 0x%" PRIx16, fb->tiler_flags); - pandecode_prop("width1 = MALI_POSITIVE(%d)", fb->width1 + 1); pandecode_prop("height1 = MALI_POSITIVE(%d)", fb->height1 + 1); pandecode_prop("width2 = MALI_POSITIVE(%d)", fb->width2 + 1); @@ -739,10 +772,7 @@ pandecode_replay_mfbd_bfr(uint64_t gpu_va, int job_no, bool with_render_targets) pandecode_prop("unknown2 = 0x%x", fb->unknown2); MEMORY_PROP(fb, scratchpad); - MEMORY_PROP(fb, tiler_polygon_list); - MEMORY_PROP(fb, tiler_polygon_list_body); - MEMORY_PROP(fb, tiler_heap_start); - MEMORY_PROP(fb, tiler_heap_end); + pandecode_midgard_tiler_descriptor(&fb->tiler); if (fb->zero3 || fb->zero4) { pandecode_msg("framebuffer zeros tripped\n"); @@ -750,22 +780,6 @@ pandecode_replay_mfbd_bfr(uint64_t gpu_va, int job_no, bool with_render_targets) pandecode_prop("zero4 = 0x%" PRIx32, fb->zero4); } - bool nonzero_weights = false; - - for (unsigned w = 0; w < ARRAY_SIZE(fb->tiler_weights); ++w) { - nonzero_weights |= fb->tiler_weights[w] != 0x0; - } - - if (nonzero_weights) { - pandecode_log(".tiler_weights = {"); - - for (unsigned w = 0; w < ARRAY_SIZE(fb->tiler_weights); ++w) { - pandecode_log("%d, ", fb->tiler_weights[w]); - } - - pandecode_log("},"); - } - pandecode_indent--; pandecode_log("};\n"); -- 2.30.2