panfrost: Allocate RAM backing of shared memory
authorAlyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Thu, 6 Feb 2020 19:29:42 +0000 (14:29 -0500)
committerAlyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Sun, 16 Feb 2020 14:16:46 +0000 (09:16 -0500)
Unlike other GPUs, Mali does not have dedicated shared memory for
compute workloads. Instead, we allocate shared memory (backed to RAM),
and the general memory access functions have modes to access shared
memory (essentially, think of these modes as adding this allocates base
+ workgroupid * stride  in harder). So let's allocate enough memory
based on the shared_size parameter and supply it.

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3835>

src/gallium/drivers/panfrost/pan_assemble.c
src/gallium/drivers/panfrost/pan_compute.c
src/gallium/drivers/panfrost/pan_context.h
src/gallium/drivers/panfrost/pan_job.c
src/gallium/drivers/panfrost/pan_job.h

index 6f84248b2f1fb1ddfd8b5315f550054d9c6cedc2..31156c48fd5b04c779935a6b707aa9af55f67f1f 100644 (file)
@@ -124,6 +124,7 @@ panfrost_shader_compile(
                 /* TODO: images */
                 meta->attribute_count = 0;
                 meta->varying_count = 0;
+                state->shared_size = s->info.cs.shared_size;
                 break;
         default:
                 unreachable("Unknown shader state");
index f4c28c3642998537f0f6b8cb201ec4b3b5b5adea..1901f58dda7f73654b268f818d5ab406805ef452 100644 (file)
@@ -27,6 +27,7 @@
  */
 
 #include "pan_context.h"
+#include "pan_bo.h"
 #include "util/u_memory.h"
 #include "nir_serialize.h"
 
@@ -111,6 +112,8 @@ panfrost_launch_grid(struct pipe_context *pipe,
 
         /* TODO: Stub */
         struct midgard_payload_vertex_tiler *payload = &ctx->payloads[PIPE_SHADER_COMPUTE];
+        struct panfrost_shader_variants *all = ctx->shader[PIPE_SHADER_COMPUTE];
+        struct panfrost_shader_state *ss = &all->variants[all->active_variant];
 
         /* We implement OpenCL inputs as uniforms (or a UBO -- same thing), so
          * reuse the graphics path for this by lowering to Gallium */
@@ -127,8 +130,17 @@ panfrost_launch_grid(struct pipe_context *pipe,
 
         panfrost_emit_for_draw(ctx, false);
 
+        unsigned single_size = util_next_power_of_two(MAX2(ss->shared_size, 128));
+        unsigned shared_size = single_size * info->grid[0] * info->grid[1] * info->grid[2] * 4;
+
         struct mali_shared_memory shared = {
-                .shared_workgroup_count = ~0
+                .shared_memory = panfrost_batch_get_shared_memory(batch, shared_size, 1)->gpu,
+                .shared_workgroup_count =
+                        util_logbase2_ceil(info->grid[0]) +
+                        util_logbase2_ceil(info->grid[1]) +
+                        util_logbase2_ceil(info->grid[2]),
+                .shared_unk1 = 0x2,
+                .shared_shift = util_logbase2(single_size) - 1
         };
 
         payload->postfix.shared_memory =
index fcfcafb135a484a49f44ff136b151080cd48777b..b2736d46d2471d16e1b9deea6b0284739f841290 100644 (file)
@@ -216,6 +216,7 @@ struct panfrost_shader_state {
         bool reads_face;
         bool reads_frag_coord;
         unsigned stack_size;
+        unsigned shared_size;
 
         struct mali_attr_meta varyings[PIPE_MAX_ATTRIBS];
         gl_varying_slot varyings_loc[PIPE_MAX_ATTRIBS];
index 8f3acd24e1b6194517d47011303496a3e569c2e7..fb9812d29fdf1a7f15364df54ca2e2f4770eeb57 100644 (file)
@@ -670,6 +670,24 @@ panfrost_batch_get_scratchpad(struct panfrost_batch *batch,
         return batch->scratchpad;
 }
 
+struct panfrost_bo *
+panfrost_batch_get_shared_memory(struct panfrost_batch *batch,
+                unsigned size,
+                unsigned workgroup_count)
+{
+        if (batch->shared_memory) {
+                assert(batch->shared_memory->size >= size);
+        } else {
+                batch->shared_memory = panfrost_batch_create_bo(batch, size,
+                                             PAN_BO_INVISIBLE,
+                                             PAN_BO_ACCESS_PRIVATE |
+                                             PAN_BO_ACCESS_RW |
+                                             PAN_BO_ACCESS_VERTEX_TILER);
+        }
+
+        return batch->shared_memory;
+}
+
 struct panfrost_bo *
 panfrost_batch_get_tiler_heap(struct panfrost_batch *batch)
 {
index ab2db010ef951ae71b77332b8b6060c6bb098954..55da645530296c82e53fc0f1cdd9c32c1826a2ed 100644 (file)
@@ -86,6 +86,9 @@ struct panfrost_batch {
         /* Amount of thread local storage required per thread */
         unsigned stack_size;
 
+        /* Amount of shared memory needed per workgroup (for compute) */
+        unsigned shared_size;
+
         /* Whether this job uses the corresponding requirement (PAN_REQ_*
          * bitmask) */
         unsigned requirements;
@@ -142,9 +145,12 @@ struct panfrost_batch {
         /* Polygon list bound to the batch, or NULL if none bound yet */
         struct panfrost_bo *polygon_list;
 
-        /* Scratchpath BO bound to the batch, or NULL if none bound yet */
+        /* Scratchpad BO bound to the batch, or NULL if none bound yet */
         struct panfrost_bo *scratchpad;
 
+        /* Shared memory BO bound to the batch, or NULL if none bound yet */
+        struct panfrost_bo *shared_memory;
+
         /* Tiler heap BO bound to the batch, or NULL if none bound yet */
         struct panfrost_bo *tiler_heap;
 
@@ -205,6 +211,9 @@ panfrost_batch_set_requirements(struct panfrost_batch *batch);
 struct panfrost_bo *
 panfrost_batch_get_scratchpad(struct panfrost_batch *batch, unsigned shift, unsigned thread_tls_alloc, unsigned core_count);
 
+struct panfrost_bo *
+panfrost_batch_get_shared_memory(struct panfrost_batch *batch, unsigned size, unsigned workgroup_count);
+
 mali_ptr
 panfrost_batch_get_polygon_list(struct panfrost_batch *batch, unsigned size);