From 9603126b74d03bc6974ea116ce4f7d80fb9573aa Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Thu, 6 Feb 2020 14:29:42 -0500 Subject: [PATCH] panfrost: Allocate RAM backing of shared memory Unlike other GPUs, Mali does not have dedicated shared memory for compute workloads. Instead, we allocate shared memory (backed to RAM), and the general memory access functions have modes to access shared memory (essentially, think of these modes as adding this allocates base + workgroupid * stride in harder). So let's allocate enough memory based on the shared_size parameter and supply it. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/gallium/drivers/panfrost/pan_assemble.c | 1 + src/gallium/drivers/panfrost/pan_compute.c | 14 +++++++++++++- src/gallium/drivers/panfrost/pan_context.h | 1 + src/gallium/drivers/panfrost/pan_job.c | 18 ++++++++++++++++++ src/gallium/drivers/panfrost/pan_job.h | 11 ++++++++++- 5 files changed, 43 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/panfrost/pan_assemble.c b/src/gallium/drivers/panfrost/pan_assemble.c index 6f84248b2f1..31156c48fd5 100644 --- a/src/gallium/drivers/panfrost/pan_assemble.c +++ b/src/gallium/drivers/panfrost/pan_assemble.c @@ -124,6 +124,7 @@ panfrost_shader_compile( /* TODO: images */ meta->attribute_count = 0; meta->varying_count = 0; + state->shared_size = s->info.cs.shared_size; break; default: unreachable("Unknown shader state"); diff --git a/src/gallium/drivers/panfrost/pan_compute.c b/src/gallium/drivers/panfrost/pan_compute.c index f4c28c36429..1901f58dda7 100644 --- a/src/gallium/drivers/panfrost/pan_compute.c +++ b/src/gallium/drivers/panfrost/pan_compute.c @@ -27,6 +27,7 @@ */ #include "pan_context.h" +#include "pan_bo.h" #include "util/u_memory.h" #include "nir_serialize.h" @@ -111,6 +112,8 @@ panfrost_launch_grid(struct pipe_context *pipe, /* TODO: Stub */ struct midgard_payload_vertex_tiler *payload = &ctx->payloads[PIPE_SHADER_COMPUTE]; + struct panfrost_shader_variants *all = ctx->shader[PIPE_SHADER_COMPUTE]; + struct panfrost_shader_state *ss = &all->variants[all->active_variant]; /* We implement OpenCL inputs as uniforms (or a UBO -- same thing), so * reuse the graphics path for this by lowering to Gallium */ @@ -127,8 +130,17 @@ panfrost_launch_grid(struct pipe_context *pipe, panfrost_emit_for_draw(ctx, false); + unsigned single_size = util_next_power_of_two(MAX2(ss->shared_size, 128)); + unsigned shared_size = single_size * info->grid[0] * info->grid[1] * info->grid[2] * 4; + struct mali_shared_memory shared = { - .shared_workgroup_count = ~0 + .shared_memory = panfrost_batch_get_shared_memory(batch, shared_size, 1)->gpu, + .shared_workgroup_count = + util_logbase2_ceil(info->grid[0]) + + util_logbase2_ceil(info->grid[1]) + + util_logbase2_ceil(info->grid[2]), + .shared_unk1 = 0x2, + .shared_shift = util_logbase2(single_size) - 1 }; payload->postfix.shared_memory = diff --git a/src/gallium/drivers/panfrost/pan_context.h b/src/gallium/drivers/panfrost/pan_context.h index fcfcafb135a..b2736d46d24 100644 --- a/src/gallium/drivers/panfrost/pan_context.h +++ b/src/gallium/drivers/panfrost/pan_context.h @@ -216,6 +216,7 @@ struct panfrost_shader_state { bool reads_face; bool reads_frag_coord; unsigned stack_size; + unsigned shared_size; struct mali_attr_meta varyings[PIPE_MAX_ATTRIBS]; gl_varying_slot varyings_loc[PIPE_MAX_ATTRIBS]; diff --git a/src/gallium/drivers/panfrost/pan_job.c b/src/gallium/drivers/panfrost/pan_job.c index 8f3acd24e1b..fb9812d29fd 100644 --- a/src/gallium/drivers/panfrost/pan_job.c +++ b/src/gallium/drivers/panfrost/pan_job.c @@ -670,6 +670,24 @@ panfrost_batch_get_scratchpad(struct panfrost_batch *batch, return batch->scratchpad; } +struct panfrost_bo * +panfrost_batch_get_shared_memory(struct panfrost_batch *batch, + unsigned size, + unsigned workgroup_count) +{ + if (batch->shared_memory) { + assert(batch->shared_memory->size >= size); + } else { + batch->shared_memory = panfrost_batch_create_bo(batch, size, + PAN_BO_INVISIBLE, + PAN_BO_ACCESS_PRIVATE | + PAN_BO_ACCESS_RW | + PAN_BO_ACCESS_VERTEX_TILER); + } + + return batch->shared_memory; +} + struct panfrost_bo * panfrost_batch_get_tiler_heap(struct panfrost_batch *batch) { diff --git a/src/gallium/drivers/panfrost/pan_job.h b/src/gallium/drivers/panfrost/pan_job.h index ab2db010ef9..55da6455302 100644 --- a/src/gallium/drivers/panfrost/pan_job.h +++ b/src/gallium/drivers/panfrost/pan_job.h @@ -86,6 +86,9 @@ struct panfrost_batch { /* Amount of thread local storage required per thread */ unsigned stack_size; + /* Amount of shared memory needed per workgroup (for compute) */ + unsigned shared_size; + /* Whether this job uses the corresponding requirement (PAN_REQ_* * bitmask) */ unsigned requirements; @@ -142,9 +145,12 @@ struct panfrost_batch { /* Polygon list bound to the batch, or NULL if none bound yet */ struct panfrost_bo *polygon_list; - /* Scratchpath BO bound to the batch, or NULL if none bound yet */ + /* Scratchpad BO bound to the batch, or NULL if none bound yet */ struct panfrost_bo *scratchpad; + /* Shared memory BO bound to the batch, or NULL if none bound yet */ + struct panfrost_bo *shared_memory; + /* Tiler heap BO bound to the batch, or NULL if none bound yet */ struct panfrost_bo *tiler_heap; @@ -205,6 +211,9 @@ panfrost_batch_set_requirements(struct panfrost_batch *batch); struct panfrost_bo * panfrost_batch_get_scratchpad(struct panfrost_batch *batch, unsigned shift, unsigned thread_tls_alloc, unsigned core_count); +struct panfrost_bo * +panfrost_batch_get_shared_memory(struct panfrost_batch *batch, unsigned size, unsigned workgroup_count); + mali_ptr panfrost_batch_get_polygon_list(struct panfrost_batch *batch, unsigned size); -- 2.30.2