From: Alyssa Rosenzweig Date: Mon, 17 Aug 2020 14:31:02 +0000 (-0400) Subject: panfrost: Introduce invisible pool X-Git-Url: https://git.libre-soc.org/?p=mesa.git;a=commitdiff_plain;h=17c617cdb7f9710651b10f5b00669fc31c372c50 panfrost: Introduce invisible pool Whereas the main batch->pool is CPU read/write, the new batch->invisible_pool is not. This enables GPU-internal structures that the CPU must allocate from a pool dynamically but does not read, corresponding to the BO_INVISIBLE create flag. The use case is speeding up varying allocation by skipping the CPU-side mmap/munmap. We simultaneously half the pool's minimal allocation to avoid negatively affecting memory usage. Signed-off-by: Alyssa Rosenzweig Reviewed-by: Tomeu Vizoso Part-of: --- diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c b/src/gallium/drivers/panfrost/pan_cmdstream.c index 90ff477ec66..02e50cb3c78 100644 --- a/src/gallium/drivers/panfrost/pan_cmdstream.c +++ b/src/gallium/drivers/panfrost/pan_cmdstream.c @@ -1492,7 +1492,7 @@ panfrost_emit_varyings(struct panfrost_batch *batch, unsigned stride, unsigned count) { unsigned size = stride * count; - mali_ptr ptr = panfrost_pool_alloc(&batch->pool, size).gpu; + mali_ptr ptr = panfrost_pool_alloc(&batch->invisible_pool, size).gpu; pan_pack(slot, ATTRIBUTE_BUFFER, cfg) { cfg.stride = stride; diff --git a/src/gallium/drivers/panfrost/pan_job.c b/src/gallium/drivers/panfrost/pan_job.c index 50616eb9017..a0160293f95 100644 --- a/src/gallium/drivers/panfrost/pan_job.c +++ b/src/gallium/drivers/panfrost/pan_job.c @@ -100,6 +100,7 @@ panfrost_create_batch(struct panfrost_context *ctx, const struct pipe_framebuffer_state *key) { struct panfrost_batch *batch = rzalloc(ctx, struct panfrost_batch); + struct panfrost_device *dev = pan_device(ctx->base.screen); batch->ctx = ctx; @@ -112,7 +113,15 @@ panfrost_create_batch(struct panfrost_context *ctx, batch->out_sync = panfrost_create_batch_fence(batch); util_copy_framebuffer_state(&batch->key, key); - batch->pool = panfrost_create_pool(batch, pan_device(ctx->base.screen), 0, true); + /* Preallocate the main pool, since every batch has at least one job + * structure so it will be used */ + batch->pool = panfrost_create_pool(batch, dev, 0, true); + + /* Don't preallocate the invisible pool, since not every batch will use + * the pre-allocation, particularly if the varyings are larger than the + * preallocation and a reallocation is needed after anyway. */ + batch->invisible_pool = + panfrost_create_pool(batch, dev, PAN_BO_INVISIBLE, false); panfrost_batch_add_fbo_bos(batch); @@ -170,6 +179,9 @@ panfrost_free_batch(struct panfrost_batch *batch) hash_table_foreach(batch->pool.bos, entry) panfrost_bo_unreference((struct panfrost_bo *)entry->key); + hash_table_foreach(batch->invisible_pool.bos, entry) + panfrost_bo_unreference((struct panfrost_bo *)entry->key); + util_dynarray_foreach(&batch->dependencies, struct panfrost_batch_fence *, dep) { panfrost_batch_fence_unreference(*dep); @@ -985,7 +997,7 @@ panfrost_batch_submit_ioctl(struct panfrost_batch *batch, submit.jc = first_job_desc; submit.requirements = reqs; - bo_handles = calloc(batch->pool.bos->entries + batch->bos->entries, sizeof(*bo_handles)); + bo_handles = calloc(batch->pool.bos->entries + batch->invisible_pool.bos->entries + batch->bos->entries, sizeof(*bo_handles)); assert(bo_handles); hash_table_foreach(batch->bos, entry) @@ -994,6 +1006,9 @@ panfrost_batch_submit_ioctl(struct panfrost_batch *batch, hash_table_foreach(batch->pool.bos, entry) panfrost_batch_record_bo(entry, bo_handles, submit.bo_handle_count++); + hash_table_foreach(batch->invisible_pool.bos, entry) + panfrost_batch_record_bo(entry, bo_handles, submit.bo_handle_count++); + submit.bo_handles = (u64) (uintptr_t) bo_handles; ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_SUBMIT, &submit); free(bo_handles); diff --git a/src/gallium/drivers/panfrost/pan_job.h b/src/gallium/drivers/panfrost/pan_job.h index e94dd76ad0c..00edd9574cb 100644 --- a/src/gallium/drivers/panfrost/pan_job.h +++ b/src/gallium/drivers/panfrost/pan_job.h @@ -95,6 +95,11 @@ struct panfrost_batch { /* Pool owned by this batch (released when the batch is released) used for temporary descriptors */ struct pan_pool pool; + /* Pool also owned by this batch that is not CPU mapped (created as + * INVISIBLE) used for private GPU-internal structures, particularly + * varyings */ + struct pan_pool invisible_pool; + /* Job scoreboarding state */ struct pan_scoreboard scoreboard; diff --git a/src/panfrost/lib/pan_device.h b/src/panfrost/lib/pan_device.h index b1996d0a645..29eb599e1bf 100644 --- a/src/panfrost/lib/pan_device.h +++ b/src/panfrost/lib/pan_device.h @@ -45,7 +45,7 @@ /* Transient slab size. This is a balance between fragmentation against cache * locality and ease of bookkeeping */ -#define TRANSIENT_SLAB_PAGES (32) /* 128kb */ +#define TRANSIENT_SLAB_PAGES (16) /* 64kb */ #define TRANSIENT_SLAB_SIZE (4096 * TRANSIENT_SLAB_PAGES) /* Maximum number of transient slabs so we don't need dynamic arrays. Most