panfrost: Allocate polygon lists on-demand
authorAlyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Fri, 2 Aug 2019 17:18:48 +0000 (19:18 +0200)
committerBoris Brezillon <boris.brezillon@collabora.com>
Fri, 2 Aug 2019 19:54:58 +0000 (21:54 +0200)
Rather than alloacting a huge (64MB) polygon list on context creation
and sharing it across framebuffers, we instead allocate polygon lists as
BOs (which consistently hit the cache) sized appropriately; for about a
month, we've known how to calculate the polygon list size so this has
only recently become possible.

The good news is we can render to truly massive framebuffers without
crashing and, more importantly, we eliminate the 64MB upfront overhead.
If a list that size isn't actually needed, it's not allocated.

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
src/gallium/drivers/panfrost/pan_context.c
src/gallium/drivers/panfrost/pan_context.h
src/gallium/drivers/panfrost/pan_drm.c
src/gallium/drivers/panfrost/pan_job.c
src/gallium/drivers/panfrost/pan_job.h
src/gallium/drivers/panfrost/pan_scoreboard.c

index d261a284212494ff0bc35edc043f9991018d58b9..e9d18605dd021b70f223c56ebcd1e9bbe4661c7f 100644 (file)
@@ -62,6 +62,7 @@ panfrost_emit_midg_tiler(
         unsigned vertex_count)
 {
         struct midgard_tiler_descriptor t = {};
+        struct panfrost_job *batch = panfrost_get_job_for_fbo(ctx);
 
         t.hierarchy_mask =
                 panfrost_choose_hierarchy_mask(width, height, vertex_count);
@@ -77,10 +78,7 @@ panfrost_emit_midg_tiler(
         /* Sanity check */
 
         if (t.hierarchy_mask) {
-                assert(ctx->tiler_polygon_list.bo->size >= (header_size + body_size));
-
-                /* Specify allocated tiler structures */
-                t.polygon_list = ctx->tiler_polygon_list.bo->gpu;
+                t.polygon_list = panfrost_job_get_polygon_list(batch, header_size + body_size);
 
                 /* Allow the entire tiler heap */
                 t.heap_start = ctx->tiler_heap.bo->gpu;
@@ -2532,7 +2530,6 @@ panfrost_destroy(struct pipe_context *pipe)
         panfrost_drm_free_slab(screen, &panfrost->scratchpad);
         panfrost_drm_free_slab(screen, &panfrost->shaders);
         panfrost_drm_free_slab(screen, &panfrost->tiler_heap);
-        panfrost_drm_free_slab(screen, &panfrost->tiler_polygon_list);
         panfrost_drm_free_slab(screen, &panfrost->tiler_dummy);
 
         ralloc_free(pipe);
@@ -2678,7 +2675,6 @@ panfrost_setup_hardware(struct panfrost_context *ctx)
         panfrost_drm_allocate_slab(screen, &ctx->scratchpad, 64*4, false, 0, 0, 0);
         panfrost_drm_allocate_slab(screen, &ctx->shaders, 4096, true, PAN_ALLOCATE_EXECUTE, 0, 0);
         panfrost_drm_allocate_slab(screen, &ctx->tiler_heap, 4096, false, PAN_ALLOCATE_INVISIBLE | PAN_ALLOCATE_GROWABLE, 1, 128);
-        panfrost_drm_allocate_slab(screen, &ctx->tiler_polygon_list, 128*128, false, PAN_ALLOCATE_INVISIBLE | PAN_ALLOCATE_GROWABLE, 1, 128);
         panfrost_drm_allocate_slab(screen, &ctx->tiler_dummy, 1, false, PAN_ALLOCATE_INVISIBLE, 0, 0);
 }
 
index d8dbd4d66f6c21591ecfb3ffa2c00e5a4b4f578b..e8d2417e0cb2ba6f274f1876ad2aec6ce0e73fc5 100644 (file)
@@ -111,7 +111,6 @@ struct panfrost_context {
         struct panfrost_memory shaders;
         struct panfrost_memory scratchpad;
         struct panfrost_memory tiler_heap;
-        struct panfrost_memory tiler_polygon_list;
         struct panfrost_memory tiler_dummy;
         struct panfrost_memory depth_stencil_buffer;
 
index 89c7019dd9c75f6f5b0a1298ad0dfd134f9565d1..42cf175033448ac944e4bb9171efc90e788b9ba1 100644 (file)
@@ -288,7 +288,7 @@ panfrost_drm_submit_vs_fs_job(struct panfrost_context *ctx, bool has_draws, bool
         panfrost_job_add_bo(job, ctx->shaders.bo);
         panfrost_job_add_bo(job, ctx->scratchpad.bo);
         panfrost_job_add_bo(job, ctx->tiler_heap.bo);
-        panfrost_job_add_bo(job, ctx->tiler_polygon_list.bo);
+        panfrost_job_add_bo(job, job->polygon_list);
 
         if (job->first_job.gpu) {
                 ret = panfrost_drm_submit_job(ctx, job->first_job.gpu, 0);
index d75af0c5330834f4f0b9d47145b4605c2777a676..661f8ae154e269e6aee93d0d5dd67553729f0dda 100644 (file)
@@ -72,6 +72,9 @@ panfrost_free_job(struct panfrost_context *ctx, struct panfrost_job *job)
                 BITSET_SET(screen->free_transient, *index);
         }
 
+        /* Unreference the polygon list */
+        panfrost_bo_unreference(ctx->base.screen, job->polygon_list);
+
         _mesa_hash_table_remove_key(ctx->jobs, &job->key);
 
         if (ctx->job == job)
@@ -160,6 +163,27 @@ panfrost_job_add_bo(struct panfrost_job *job, struct panfrost_bo *bo)
         _mesa_set_add(job->bos, bo);
 }
 
+/* Returns the polygon list's GPU address if available, or otherwise allocates
+ * the polygon list.  It's perfectly fast to use allocate/free BO directly,
+ * since we'll hit the BO cache and this is one-per-batch anyway. */
+
+mali_ptr
+panfrost_job_get_polygon_list(struct panfrost_job *batch, unsigned size)
+{
+        if (batch->polygon_list) {
+                assert(batch->polygon_list->size >= size);
+        } else {
+                struct panfrost_screen *screen = pan_screen(batch->ctx->base.screen);
+
+                /* Create the BO as invisible, as there's no reason to map */
+
+                batch->polygon_list = panfrost_drm_create_bo(screen,
+                                size, PAN_ALLOCATE_INVISIBLE);
+        }
+
+        return batch->polygon_list->gpu;
+}
+
 void
 panfrost_flush_jobs_writing_resource(struct panfrost_context *panfrost,
                                      struct pipe_resource *prsc)
index c6ae2a4eb9f4039ed149d69fb6b64f2baadf4dff..a5ea5bf41cc152adf61e1f06083bb975bb565e2c 100644 (file)
@@ -112,6 +112,9 @@ struct panfrost_job {
 
         /* Within the topmost transient BO, how much has been used? */
         unsigned transient_offset;
+
+        /* Polygon list bound to the batch, or NULL if none bound yet */
+        struct panfrost_bo *polygon_list;
 };
 
 /* Functions for managing the above */
@@ -150,6 +153,9 @@ void
 panfrost_job_set_requirements(struct panfrost_context *ctx,
                               struct panfrost_job *job);
 
+mali_ptr
+panfrost_job_get_polygon_list(struct panfrost_job *batch, unsigned size);
+
 void
 panfrost_job_clear(struct panfrost_context *ctx,
                    struct panfrost_job *job,
index 1d15ca8033814cca4b027dc3f6f11bdfae069312..bae9dfb3c251186887ababb09c6fbceeff54ca16 100644 (file)
@@ -303,10 +303,11 @@ panfrost_scoreboard_set_value(struct panfrost_job *batch)
         if (!batch->last_tiler.gpu)
                 return;
 
-        /* Okay, we do. Let's generate it */
+        /* Okay, we do. Let's generate it. We'll need the job's polygon list
+         * regardless of size. */
 
         struct panfrost_context *ctx = batch->ctx;
-        mali_ptr polygon_list = ctx->tiler_polygon_list.bo->gpu;
+        mali_ptr polygon_list = panfrost_job_get_polygon_list(batch, 0);
 
         struct panfrost_transfer job =
                 panfrost_set_value_job(ctx, polygon_list);