From d8deb1eb6a2244e765a1789c87b32ff43bd5349c Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Mon, 17 Aug 2020 13:14:54 -0400 Subject: [PATCH] panfrost: Share tiler_heap across batches/contexts There's only one tiler, so this is safe. (The blob does the same optimization.) This avoids allocating multiple heaps for multiple batches, which wastes memory and CPU time. Signed-off-by: Alyssa Rosenzweig Reviewed-by: Tomeu Vizoso Part-of: --- src/gallium/drivers/panfrost/pan_context.c | 10 ++----- src/gallium/drivers/panfrost/pan_job.c | 34 +++++++--------------- src/gallium/drivers/panfrost/pan_job.h | 3 -- src/panfrost/lib/pan_device.h | 8 +++++ src/panfrost/lib/pan_props.c | 8 +++++ 5 files changed, 28 insertions(+), 35 deletions(-) diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c index e9ddba22256..fa4fd00d1a9 100644 --- a/src/gallium/drivers/panfrost/pan_context.c +++ b/src/gallium/drivers/panfrost/pan_context.c @@ -76,20 +76,14 @@ panfrost_emit_midg_tiler(struct panfrost_batch *batch, unsigned vertex_count) t.polygon_list_size = panfrost_tiler_full_size( width, height, t.hierarchy_mask, hierarchy); - /* Sanity check */ - if (vertex_count) { - struct panfrost_bo *tiler_heap; - - tiler_heap = panfrost_batch_get_tiler_heap(batch); t.polygon_list = panfrost_batch_get_polygon_list(batch, header_size + t.polygon_list_size); - /* Allow the entire tiler heap */ - t.heap_start = tiler_heap->gpu; - t.heap_end = tiler_heap->gpu + tiler_heap->size; + t.heap_start = device->tiler_heap->gpu; + t.heap_end = device->tiler_heap->gpu + device->tiler_heap->size; } else { struct panfrost_bo *tiler_dummy; diff --git a/src/gallium/drivers/panfrost/pan_job.c b/src/gallium/drivers/panfrost/pan_job.c index b1c6805b6e6..115afade0b6 100644 --- a/src/gallium/drivers/panfrost/pan_job.c +++ b/src/gallium/drivers/panfrost/pan_job.c @@ -679,23 +679,6 @@ panfrost_batch_get_shared_memory(struct panfrost_batch *batch, return batch->shared_memory; } -struct panfrost_bo * -panfrost_batch_get_tiler_heap(struct panfrost_batch *batch) -{ - if (batch->tiler_heap) - return batch->tiler_heap; - - batch->tiler_heap = panfrost_batch_create_bo(batch, 4096 * 4096, - PAN_BO_INVISIBLE | - PAN_BO_GROWABLE, - PAN_BO_ACCESS_PRIVATE | - PAN_BO_ACCESS_RW | - PAN_BO_ACCESS_VERTEX_TILER | - PAN_BO_ACCESS_FRAGMENT); - assert(batch->tiler_heap); - return batch->tiler_heap; -} - mali_ptr panfrost_batch_get_tiler_meta(struct panfrost_batch *batch, unsigned vertex_count) { @@ -705,14 +688,13 @@ panfrost_batch_get_tiler_meta(struct panfrost_batch *batch, unsigned vertex_coun if (batch->tiler_meta) return batch->tiler_meta; - struct panfrost_bo *tiler_heap; - tiler_heap = panfrost_batch_get_tiler_heap(batch); + struct panfrost_device *dev = pan_device(batch->ctx->base.screen); struct bifrost_tiler_heap_meta tiler_heap_meta = { - .heap_size = tiler_heap->size, - .tiler_heap_start = tiler_heap->gpu, - .tiler_heap_free = tiler_heap->gpu, - .tiler_heap_end = tiler_heap->gpu + tiler_heap->size, + .heap_size = dev->tiler_heap->size, + .tiler_heap_start = dev->tiler_heap->gpu, + .tiler_heap_free = dev->tiler_heap->gpu, + .tiler_heap_end = dev->tiler_heap->gpu + dev->tiler_heap->size, .unk1 = 0x1, .unk7e007e = 0x7e007e, }; @@ -997,7 +979,7 @@ panfrost_batch_submit_ioctl(struct panfrost_batch *batch, submit.jc = first_job_desc; submit.requirements = reqs; - bo_handles = calloc(batch->pool.bos->entries + batch->invisible_pool.bos->entries + batch->bos->entries, sizeof(*bo_handles)); + bo_handles = calloc(batch->pool.bos->entries + batch->invisible_pool.bos->entries + batch->bos->entries + 1, sizeof(*bo_handles)); assert(bo_handles); hash_table_foreach(batch->bos, entry) @@ -1009,6 +991,10 @@ panfrost_batch_submit_ioctl(struct panfrost_batch *batch, hash_table_foreach(batch->invisible_pool.bos, entry) panfrost_batch_record_bo(entry, bo_handles, submit.bo_handle_count++); + /* Used by all tiler jobs (XXX: skip for compute-only) */ + if (!(reqs & PANFROST_JD_REQ_FS)) + bo_handles[submit.bo_handle_count++] = dev->tiler_heap->gem_handle; + submit.bo_handles = (u64) (uintptr_t) bo_handles; ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_SUBMIT, &submit); free(bo_handles); diff --git a/src/gallium/drivers/panfrost/pan_job.h b/src/gallium/drivers/panfrost/pan_job.h index 664d5da6683..eb3cc58d573 100644 --- a/src/gallium/drivers/panfrost/pan_job.h +++ b/src/gallium/drivers/panfrost/pan_job.h @@ -182,9 +182,6 @@ panfrost_batch_get_shared_memory(struct panfrost_batch *batch, unsigned size, un mali_ptr panfrost_batch_get_polygon_list(struct panfrost_batch *batch, unsigned size); -struct panfrost_bo * -panfrost_batch_get_tiler_heap(struct panfrost_batch *batch); - struct panfrost_bo * panfrost_batch_get_tiler_dummy(struct panfrost_batch *batch); diff --git a/src/panfrost/lib/pan_device.h b/src/panfrost/lib/pan_device.h index 29eb599e1bf..d3d0706f02a 100644 --- a/src/panfrost/lib/pan_device.h +++ b/src/panfrost/lib/pan_device.h @@ -127,6 +127,14 @@ struct panfrost_device { } bo_cache; struct pan_blit_shaders blit_shaders; + + /* Tiler heap shared across all tiler jobs, allocated against the + * device since there's only a single tiler. Since this is invisible to + * the CPU, it's okay for multiple contexts to reference it + * simultaneously; by keeping on the device struct, we eliminate a + * costly per-context allocation. */ + + struct panfrost_bo *tiler_heap; }; void diff --git a/src/panfrost/lib/pan_props.c b/src/panfrost/lib/pan_props.c index a4ff28506df..7176f9ba115 100644 --- a/src/panfrost/lib/pan_props.c +++ b/src/panfrost/lib/pan_props.c @@ -172,12 +172,20 @@ panfrost_open_device(void *memctx, int fd, struct panfrost_device *dev) for (unsigned i = 0; i < ARRAY_SIZE(dev->bo_cache.buckets); ++i) list_inithead(&dev->bo_cache.buckets[i]); + + /* Tiler heap is internally required by the tiler, which can only be + * active for a single job chain at once, so a single heap can be + * shared across batches/contextes */ + + dev->tiler_heap = panfrost_bo_create(dev, 4096 * 4096, + PAN_BO_INVISIBLE | PAN_BO_GROWABLE); } void panfrost_close_device(struct panfrost_device *dev) { panfrost_bo_unreference(dev->blit_shaders.bo); + panfrost_bo_unreference(dev->tiler_heap); panfrost_bo_cache_evict_all(dev); pthread_mutex_destroy(&dev->bo_cache.lock); drmFreeVersion(dev->kernel_version); -- 2.30.2