X-Git-Url: https://git.libre-soc.org/?p=mesa.git;a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fpanfrost%2Fpan_job.c;h=43aee264b3c394d99eb8abffe719ba2382f4ff3f;hp=7075b439c6f69c385085b56a4553dfa89114f306;hb=4047c691bff192fdcac20a16fb9a4f4071de4af9;hpb=85a2216fe4fb72a1caf3474493243d41b072b052 diff --git a/src/gallium/drivers/panfrost/pan_job.c b/src/gallium/drivers/panfrost/pan_job.c index 7075b439c6f..43aee264b3c 100644 --- a/src/gallium/drivers/panfrost/pan_job.c +++ b/src/gallium/drivers/panfrost/pan_job.c @@ -36,7 +36,7 @@ #include "util/rounding.h" #include "pan_util.h" #include "pan_blending.h" -#include "pandecode/decode.h" +#include "decode.h" #include "panfrost-quirks.h" /* panfrost_bo_access is here to help us keep track of batch accesses to BOs @@ -64,16 +64,11 @@ static struct panfrost_batch_fence * panfrost_create_batch_fence(struct panfrost_batch *batch) { struct panfrost_batch_fence *fence; - ASSERTED int ret; fence = rzalloc(NULL, struct panfrost_batch_fence); assert(fence); pipe_reference_init(&fence->reference, 1); - fence->ctx = batch->ctx; fence->batch = batch; - ret = drmSyncobjCreate(pan_device(batch->ctx->base.screen)->fd, 0, - &fence->syncobj); - assert(!ret); return fence; } @@ -81,8 +76,6 @@ panfrost_create_batch_fence(struct panfrost_batch *batch) static void panfrost_free_batch_fence(struct panfrost_batch_fence *fence) { - drmSyncobjDestroy(pan_device(fence->ctx->base.screen)->fd, - fence->syncobj); ralloc_free(fence); } @@ -99,11 +92,15 @@ panfrost_batch_fence_reference(struct panfrost_batch_fence *fence) pipe_reference(NULL, &fence->reference); } +static void +panfrost_batch_add_fbo_bos(struct panfrost_batch *batch); + static struct panfrost_batch * panfrost_create_batch(struct panfrost_context *ctx, const struct pipe_framebuffer_state *key) { struct panfrost_batch *batch = rzalloc(ctx, struct panfrost_batch); + struct panfrost_device *dev = pan_device(ctx->base.screen); batch->ctx = ctx; @@ -116,7 +113,16 @@ panfrost_create_batch(struct panfrost_context *ctx, batch->out_sync = panfrost_create_batch_fence(batch); util_copy_framebuffer_state(&batch->key, key); - batch->pool = panfrost_create_pool(batch, pan_device(ctx->base.screen)); + /* Preallocate the main pool, since every batch has at least one job + * structure so it will be used */ + panfrost_pool_init(&batch->pool, batch, dev, 0, true); + + /* Don't preallocate the invisible pool, since not every batch will use + * the pre-allocation, particularly if the varyings are larger than the + * preallocation and a reallocation is needed after anyway. */ + panfrost_pool_init(&batch->invisible_pool, batch, dev, PAN_BO_INVISIBLE, false); + + panfrost_batch_add_fbo_bos(batch); return batch; } @@ -128,22 +134,15 @@ panfrost_freeze_batch(struct panfrost_batch *batch) struct hash_entry *entry; /* Remove the entry in the FBO -> batch hash table if the batch - * matches. This way, next draws/clears targeting this FBO will trigger - * the creation of a new batch. + * matches and drop the context reference. This way, next draws/clears + * targeting this FBO will trigger the creation of a new batch. */ entry = _mesa_hash_table_search(ctx->batches, &batch->key); if (entry && entry->data == batch) _mesa_hash_table_remove(ctx->batches, entry); - /* If this is the bound batch, the panfrost_context parameters are - * relevant so submitting it invalidates those parameters, but if it's - * not bound, the context parameters are for some other batch so we - * can't invalidate them. - */ - if (ctx->batch == batch) { - panfrost_invalidate_frame(ctx); + if (ctx->batch == batch) ctx->batch = NULL; - } } #ifdef PAN_BATCH_DEBUG @@ -179,11 +178,16 @@ panfrost_free_batch(struct panfrost_batch *batch) hash_table_foreach(batch->pool.bos, entry) panfrost_bo_unreference((struct panfrost_bo *)entry->key); + hash_table_foreach(batch->invisible_pool.bos, entry) + panfrost_bo_unreference((struct panfrost_bo *)entry->key); + util_dynarray_foreach(&batch->dependencies, struct panfrost_batch_fence *, dep) { panfrost_batch_fence_unreference(*dep); } + util_dynarray_fini(&batch->dependencies); + /* The out_sync fence lifetime is different from the the batch one * since other batches might want to wait on a fence of already * submitted/signaled batch. All we need to do here is make sure the @@ -320,30 +324,12 @@ panfrost_get_fresh_batch_for_fbo(struct panfrost_context *ctx) return panfrost_get_batch(ctx, &ctx->pipe_framebuffer); } -static bool -panfrost_batch_fence_is_signaled(struct panfrost_batch_fence *fence) -{ - if (fence->signaled) - return true; - - /* Batch has not been submitted yet. */ - if (fence->batch) - return false; - - int ret = drmSyncobjWait(pan_device(fence->ctx->base.screen)->fd, - &fence->syncobj, 1, 0, 0, NULL); - - /* Cache whether the fence was signaled */ - fence->signaled = ret >= 0; - return fence->signaled; -} - static void panfrost_bo_access_gc_fences(struct panfrost_context *ctx, struct panfrost_bo_access *access, const struct panfrost_bo *bo) { - if (access->writer && panfrost_batch_fence_is_signaled(access->writer)) { + if (access->writer) { panfrost_batch_fence_unreference(access->writer); access->writer = NULL; } @@ -356,13 +342,8 @@ panfrost_bo_access_gc_fences(struct panfrost_context *ctx, if (!(*reader)) continue; - if (panfrost_batch_fence_is_signaled(*reader)) { - panfrost_batch_fence_unreference(*reader); - *reader = NULL; - } else { - /* Build a new array of only unsignaled fences in-place */ - *(new_readers++) = *reader; - } + panfrost_batch_fence_unreference(*reader); + *reader = NULL; } if (!util_dynarray_resize(&access->readers, struct panfrost_batch_fence *, @@ -596,7 +577,8 @@ panfrost_batch_add_resource_bos(struct panfrost_batch *batch, panfrost_batch_add_bo(batch, rsrc->separate_stencil->bo, flags); } -void panfrost_batch_add_fbo_bos(struct panfrost_batch *batch) +static void +panfrost_batch_add_fbo_bos(struct panfrost_batch *batch) { uint32_t flags = PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_WRITE | PAN_BO_ACCESS_VERTEX_TILER | @@ -658,11 +640,11 @@ panfrost_batch_get_polygon_list(struct panfrost_batch *batch, unsigned size) struct panfrost_bo * panfrost_batch_get_scratchpad(struct panfrost_batch *batch, - unsigned shift, + unsigned size_per_thread, unsigned thread_tls_alloc, unsigned core_count) { - unsigned size = panfrost_get_total_stack_size(shift, + unsigned size = panfrost_get_total_stack_size(size_per_thread, thread_tls_alloc, core_count); @@ -698,23 +680,6 @@ panfrost_batch_get_shared_memory(struct panfrost_batch *batch, return batch->shared_memory; } -struct panfrost_bo * -panfrost_batch_get_tiler_heap(struct panfrost_batch *batch) -{ - if (batch->tiler_heap) - return batch->tiler_heap; - - batch->tiler_heap = panfrost_batch_create_bo(batch, 4096 * 4096, - PAN_BO_INVISIBLE | - PAN_BO_GROWABLE, - PAN_BO_ACCESS_PRIVATE | - PAN_BO_ACCESS_RW | - PAN_BO_ACCESS_VERTEX_TILER | - PAN_BO_ACCESS_FRAGMENT); - assert(batch->tiler_heap); - return batch->tiler_heap; -} - mali_ptr panfrost_batch_get_tiler_meta(struct panfrost_batch *batch, unsigned vertex_count) { @@ -724,14 +689,13 @@ panfrost_batch_get_tiler_meta(struct panfrost_batch *batch, unsigned vertex_coun if (batch->tiler_meta) return batch->tiler_meta; - struct panfrost_bo *tiler_heap; - tiler_heap = panfrost_batch_get_tiler_heap(batch); + struct panfrost_device *dev = pan_device(batch->ctx->base.screen); struct bifrost_tiler_heap_meta tiler_heap_meta = { - .heap_size = tiler_heap->size, - .tiler_heap_start = tiler_heap->gpu, - .tiler_heap_free = tiler_heap->gpu, - .tiler_heap_end = tiler_heap->gpu + tiler_heap->size, + .heap_size = dev->tiler_heap->size, + .tiler_heap_start = dev->tiler_heap->gpu, + .tiler_heap_free = dev->tiler_heap->gpu, + .tiler_heap_end = dev->tiler_heap->gpu + dev->tiler_heap->size, .unk1 = 0x1, .unk7e007e = 0x7e007e, }; @@ -741,10 +705,10 @@ panfrost_batch_get_tiler_meta(struct panfrost_batch *batch, unsigned vertex_coun .flags = 0x0, .width = MALI_POSITIVE(batch->key.width), .height = MALI_POSITIVE(batch->key.height), - .tiler_heap_meta = panfrost_pool_upload(&batch->pool, &tiler_heap_meta, sizeof(tiler_heap_meta)), + .tiler_heap_meta = panfrost_pool_upload_aligned(&batch->pool, &tiler_heap_meta, sizeof(tiler_heap_meta), 64) }; - batch->tiler_meta = panfrost_pool_upload(&batch->pool, &tiler_meta, sizeof(tiler_meta)); + batch->tiler_meta = panfrost_pool_upload_aligned(&batch->pool, &tiler_meta, sizeof(tiler_meta), 64); return batch->tiler_meta; } @@ -783,7 +747,7 @@ panfrost_batch_reserve_framebuffer(struct panfrost_batch *batch) sizeof(struct mali_single_framebuffer) : sizeof(struct mali_framebuffer); - batch->framebuffer = panfrost_pool_alloc(&batch->pool, size); + batch->framebuffer = panfrost_pool_alloc_aligned(&batch->pool, size, 64); /* Tag the pointer */ if (!(dev->quirks & MIDGARD_SFBD)) @@ -866,27 +830,22 @@ panfrost_load_surface(struct panfrost_batch *batch, struct pipe_surface *surf, u format = util_format_stencil_only(format); } - enum mali_texture_type type = - panfrost_translate_texture_type(rsrc->base.target); - - unsigned nr_samples = surf->nr_samples; - - if (!nr_samples) - nr_samples = surf->texture->nr_samples; + enum mali_texture_dimension dim = + panfrost_translate_texture_dimension(rsrc->base.target); struct pan_image img = { .width0 = rsrc->base.width0, .height0 = rsrc->base.height0, .depth0 = rsrc->base.depth0, .format = format, - .type = type, - .layout = rsrc->layout, + .dim = dim, + .modifier = rsrc->modifier, .array_size = rsrc->base.array_size, .first_level = level, .last_level = level, .first_layer = surf->u.tex.first_layer, .last_layer = surf->u.tex.last_layer, - .nr_samples = nr_samples, + .nr_samples = rsrc->base.nr_samples, .cubemap_stride = rsrc->cubemap_stride, .bo = rsrc->bo, .slices = rsrc->slices @@ -910,8 +869,8 @@ panfrost_load_surface(struct panfrost_batch *batch, struct pipe_surface *surf, u blend_shader = bo->gpu | b->first_tag; } - struct panfrost_transfer transfer = panfrost_pool_alloc(&batch->pool, - 4 * 4 * 6 * rsrc->damage.inverted_len); + struct panfrost_transfer transfer = panfrost_pool_alloc_aligned(&batch->pool, + 4 * 4 * 6 * rsrc->damage.inverted_len, 64); for (unsigned i = 0; i < rsrc->damage.inverted_len; ++i) { float *o = (float *) (transfer.cpu + (4 * 4 * 6 * i)); @@ -995,7 +954,8 @@ panfrost_batch_record_bo(struct hash_entry *entry, unsigned *bo_handles, unsigne static int panfrost_batch_submit_ioctl(struct panfrost_batch *batch, mali_ptr first_job_desc, - uint32_t reqs) + uint32_t reqs, + uint32_t out_sync) { struct panfrost_context *ctx = batch->ctx; struct pipe_context *gallium = (struct pipe_context *) ctx; @@ -1004,11 +964,23 @@ panfrost_batch_submit_ioctl(struct panfrost_batch *batch, uint32_t *bo_handles; int ret; - submit.out_sync = batch->out_sync->syncobj; + /* If we trace, we always need a syncobj, so make one of our own if we + * weren't given one to use. Remember that we did so, so we can free it + * after we're done but preventing double-frees if we were given a + * syncobj */ + + bool our_sync = false; + + if (!out_sync && dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC)) { + drmSyncobjCreate(dev->fd, 0, &out_sync); + our_sync = false; + } + + submit.out_sync = out_sync; submit.jc = first_job_desc; submit.requirements = reqs; - bo_handles = calloc(batch->pool.bos->entries + batch->bos->entries, sizeof(*bo_handles)); + bo_handles = calloc(batch->pool.bos->entries + batch->invisible_pool.bos->entries + batch->bos->entries + 1, sizeof(*bo_handles)); assert(bo_handles); hash_table_foreach(batch->bos, entry) @@ -1017,6 +989,13 @@ panfrost_batch_submit_ioctl(struct panfrost_batch *batch, hash_table_foreach(batch->pool.bos, entry) panfrost_batch_record_bo(entry, bo_handles, submit.bo_handle_count++); + hash_table_foreach(batch->invisible_pool.bos, entry) + panfrost_batch_record_bo(entry, bo_handles, submit.bo_handle_count++); + + /* Used by all tiler jobs (XXX: skip for compute-only) */ + if (!(reqs & PANFROST_JD_REQ_FS)) + bo_handles[submit.bo_handle_count++] = dev->tiler_heap->gem_handle; + submit.bo_handles = (u64) (uintptr_t) bo_handles; ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_SUBMIT, &submit); free(bo_handles); @@ -1031,7 +1010,7 @@ panfrost_batch_submit_ioctl(struct panfrost_batch *batch, /* Trace the job if we're doing that */ if (dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC)) { /* Wait so we can get errors reported back */ - drmSyncobjWait(dev->fd, &batch->out_sync->syncobj, 1, + drmSyncobjWait(dev->fd, &out_sync, 1, INT64_MAX, 0, NULL); /* Trace gets priority over sync */ @@ -1039,21 +1018,31 @@ panfrost_batch_submit_ioctl(struct panfrost_batch *batch, pandecode_jc(submit.jc, dev->quirks & IS_BIFROST, dev->gpu_id, minimal); } + /* Cleanup if we created the syncobj */ + if (our_sync) + drmSyncobjDestroy(dev->fd, out_sync); + return 0; } +/* Submit both vertex/tiler and fragment jobs for a batch, possibly with an + * outsync corresponding to the later of the two (since there will be an + * implicit dep between them) */ + static int -panfrost_batch_submit_jobs(struct panfrost_batch *batch) +panfrost_batch_submit_jobs(struct panfrost_batch *batch, uint32_t out_sync) { bool has_draws = batch->scoreboard.first_job; + bool has_frag = batch->scoreboard.tiler_dep || batch->clear; int ret = 0; if (has_draws) { - ret = panfrost_batch_submit_ioctl(batch, batch->scoreboard.first_job, 0); + ret = panfrost_batch_submit_ioctl(batch, batch->scoreboard.first_job, + 0, has_frag ? 0 : out_sync); assert(!ret); } - if (batch->scoreboard.tiler_dep || batch->clear) { + if (has_frag) { /* Whether we program the fragment job for draws or not depends * on whether there is any *tiler* activity (so fragment * shaders). If there are draws but entirely RASTERIZER_DISCARD @@ -1063,7 +1052,8 @@ panfrost_batch_submit_jobs(struct panfrost_batch *batch) mali_ptr fragjob = panfrost_fragment_job(batch, batch->scoreboard.tiler_dep != 0); - ret = panfrost_batch_submit_ioctl(batch, fragjob, PANFROST_JD_REQ_FS); + ret = panfrost_batch_submit_ioctl(batch, fragjob, + PANFROST_JD_REQ_FS, out_sync); assert(!ret); } @@ -1071,28 +1061,27 @@ panfrost_batch_submit_jobs(struct panfrost_batch *batch) } static void -panfrost_batch_submit(struct panfrost_batch *batch) +panfrost_batch_submit(struct panfrost_batch *batch, uint32_t out_sync) { assert(batch); struct panfrost_device *dev = pan_device(batch->ctx->base.screen); - /* Submit the dependencies first. */ + /* Submit the dependencies first. Don't pass along the out_sync since + * they are guaranteed to terminate sooner */ util_dynarray_foreach(&batch->dependencies, struct panfrost_batch_fence *, dep) { if ((*dep)->batch) - panfrost_batch_submit((*dep)->batch); + panfrost_batch_submit((*dep)->batch, 0); } int ret; /* Nothing to do! */ if (!batch->scoreboard.first_job && !batch->clear) { - /* Mark the fence as signaled so the fence logic does not try - * to wait on it. - */ - batch->out_sync->signaled = true; + if (out_sync) + drmSyncobjSignal(dev->fd, &out_sync, 1); goto out; - } + } panfrost_batch_draw_wallpaper(batch); @@ -1115,7 +1104,7 @@ panfrost_batch_submit(struct panfrost_batch *batch) panfrost_scoreboard_initialize_tiler(&batch->pool, &batch->scoreboard, polygon_list); - ret = panfrost_batch_submit_jobs(batch); + ret = panfrost_batch_submit_jobs(batch, out_sync); if (ret && dev->debug & PAN_DBG_MSGS) fprintf(stderr, "panfrost_batch_submit failed: %d\n", ret); @@ -1142,14 +1131,19 @@ out: panfrost_free_batch(batch); } +/* Submit all batches, applying the out_sync to the currently bound batch */ + void -panfrost_flush_all_batches(struct panfrost_context *ctx) +panfrost_flush_all_batches(struct panfrost_context *ctx, uint32_t out_sync) { + struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx); + panfrost_batch_submit(batch, out_sync); + hash_table_foreach(ctx->batches, hentry) { struct panfrost_batch *batch = hentry->data; assert(batch); - panfrost_batch_submit(batch); + panfrost_batch_submit(batch, 0); } assert(!ctx->batches->entries); @@ -1198,7 +1192,7 @@ panfrost_flush_batches_accessing_bo(struct panfrost_context *ctx, return; if (access->writer && access->writer->batch) - panfrost_batch_submit(access->writer->batch); + panfrost_batch_submit(access->writer->batch, 0); if (!flush_readers) return; @@ -1206,7 +1200,7 @@ panfrost_flush_batches_accessing_bo(struct panfrost_context *ctx, util_dynarray_foreach(&access->readers, struct panfrost_batch_fence *, reader) { if (*reader && (*reader)->batch) - panfrost_batch_submit((*reader)->batch); + panfrost_batch_submit((*reader)->batch, 0); } } @@ -1215,15 +1209,15 @@ panfrost_batch_set_requirements(struct panfrost_batch *batch) { struct panfrost_context *ctx = batch->ctx; - if (ctx->rasterizer && ctx->rasterizer->base.multisample) + if (ctx->rasterizer->base.multisample) batch->requirements |= PAN_REQ_MSAA; - if (ctx->depth_stencil && ctx->depth_stencil->depth.writemask) { + if (ctx->depth_stencil && ctx->depth_stencil->base.depth.writemask) { batch->requirements |= PAN_REQ_DEPTH_WRITE; batch->draws |= PIPE_CLEAR_DEPTH; } - if (ctx->depth_stencil && ctx->depth_stencil->stencil[0].enabled) + if (ctx->depth_stencil && ctx->depth_stencil->base.stencil[0].enabled) batch->draws |= PIPE_CLEAR_STENCIL; }