#include "util/rounding.h"
#include "pan_util.h"
#include "pan_blending.h"
-#include "pandecode/decode.h"
+#include "decode.h"
#include "panfrost-quirks.h"
/* panfrost_bo_access is here to help us keep track of batch accesses to BOs
panfrost_create_batch_fence(struct panfrost_batch *batch)
{
struct panfrost_batch_fence *fence;
- ASSERTED int ret;
fence = rzalloc(NULL, struct panfrost_batch_fence);
assert(fence);
pipe_reference_init(&fence->reference, 1);
- fence->ctx = batch->ctx;
fence->batch = batch;
- ret = drmSyncobjCreate(pan_device(batch->ctx->base.screen)->fd, 0,
- &fence->syncobj);
- assert(!ret);
return fence;
}
static void
panfrost_free_batch_fence(struct panfrost_batch_fence *fence)
{
- drmSyncobjDestroy(pan_device(fence->ctx->base.screen)->fd,
- fence->syncobj);
ralloc_free(fence);
}
pipe_reference(NULL, &fence->reference);
}
+static void
+panfrost_batch_add_fbo_bos(struct panfrost_batch *batch);
+
static struct panfrost_batch *
panfrost_create_batch(struct panfrost_context *ctx,
const struct pipe_framebuffer_state *key)
{
struct panfrost_batch *batch = rzalloc(ctx, struct panfrost_batch);
+ struct panfrost_device *dev = pan_device(ctx->base.screen);
batch->ctx = ctx;
batch->out_sync = panfrost_create_batch_fence(batch);
util_copy_framebuffer_state(&batch->key, key);
- batch->pool = panfrost_create_pool(batch, pan_device(ctx->base.screen));
+ /* Preallocate the main pool, since every batch has at least one job
+ * structure so it will be used */
+ panfrost_pool_init(&batch->pool, batch, dev, 0, true);
+
+ /* Don't preallocate the invisible pool, since not every batch will use
+ * the pre-allocation, particularly if the varyings are larger than the
+ * preallocation and a reallocation is needed after anyway. */
+ panfrost_pool_init(&batch->invisible_pool, batch, dev, PAN_BO_INVISIBLE, false);
+
+ panfrost_batch_add_fbo_bos(batch);
return batch;
}
struct hash_entry *entry;
/* Remove the entry in the FBO -> batch hash table if the batch
- * matches. This way, next draws/clears targeting this FBO will trigger
- * the creation of a new batch.
+ * matches and drop the context reference. This way, next draws/clears
+ * targeting this FBO will trigger the creation of a new batch.
*/
entry = _mesa_hash_table_search(ctx->batches, &batch->key);
if (entry && entry->data == batch)
_mesa_hash_table_remove(ctx->batches, entry);
- /* If this is the bound batch, the panfrost_context parameters are
- * relevant so submitting it invalidates those parameters, but if it's
- * not bound, the context parameters are for some other batch so we
- * can't invalidate them.
- */
- if (ctx->batch == batch) {
- panfrost_invalidate_frame(ctx);
+ if (ctx->batch == batch)
ctx->batch = NULL;
- }
}
#ifdef PAN_BATCH_DEBUG
hash_table_foreach(batch->bos, entry)
panfrost_bo_unreference((struct panfrost_bo *)entry->key);
- hash_table_foreach(batch->pool.bos, entry)
- panfrost_bo_unreference((struct panfrost_bo *)entry->key);
+ panfrost_pool_cleanup(&batch->pool);
+ panfrost_pool_cleanup(&batch->invisible_pool);
util_dynarray_foreach(&batch->dependencies,
struct panfrost_batch_fence *, dep) {
panfrost_batch_fence_unreference(*dep);
}
+ util_dynarray_fini(&batch->dependencies);
+
/* The out_sync fence lifetime is different from the the batch one
* since other batches might want to wait on a fence of already
* submitted/signaled batch. All we need to do here is make sure the
return panfrost_get_batch(ctx, &ctx->pipe_framebuffer);
}
-static bool
-panfrost_batch_fence_is_signaled(struct panfrost_batch_fence *fence)
-{
- if (fence->signaled)
- return true;
-
- /* Batch has not been submitted yet. */
- if (fence->batch)
- return false;
-
- int ret = drmSyncobjWait(pan_device(fence->ctx->base.screen)->fd,
- &fence->syncobj, 1, 0, 0, NULL);
-
- /* Cache whether the fence was signaled */
- fence->signaled = ret >= 0;
- return fence->signaled;
-}
-
static void
panfrost_bo_access_gc_fences(struct panfrost_context *ctx,
struct panfrost_bo_access *access,
const struct panfrost_bo *bo)
{
- if (access->writer && panfrost_batch_fence_is_signaled(access->writer)) {
+ if (access->writer) {
panfrost_batch_fence_unreference(access->writer);
access->writer = NULL;
}
if (!(*reader))
continue;
- if (panfrost_batch_fence_is_signaled(*reader)) {
- panfrost_batch_fence_unreference(*reader);
- *reader = NULL;
- } else {
- /* Build a new array of only unsignaled fences in-place */
- *(new_readers++) = *reader;
- }
+ panfrost_batch_fence_unreference(*reader);
+ *reader = NULL;
}
if (!util_dynarray_resize(&access->readers, struct panfrost_batch_fence *,
panfrost_batch_add_bo(batch, rsrc->separate_stencil->bo, flags);
}
-void panfrost_batch_add_fbo_bos(struct panfrost_batch *batch)
+static void
+panfrost_batch_add_fbo_bos(struct panfrost_batch *batch)
{
uint32_t flags = PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_WRITE |
PAN_BO_ACCESS_VERTEX_TILER |
struct panfrost_bo *
panfrost_batch_get_scratchpad(struct panfrost_batch *batch,
- unsigned shift,
+ unsigned size_per_thread,
unsigned thread_tls_alloc,
unsigned core_count)
{
- unsigned size = panfrost_get_total_stack_size(shift,
+ unsigned size = panfrost_get_total_stack_size(size_per_thread,
thread_tls_alloc,
core_count);
return batch->shared_memory;
}
-struct panfrost_bo *
-panfrost_batch_get_tiler_heap(struct panfrost_batch *batch)
-{
- if (batch->tiler_heap)
- return batch->tiler_heap;
-
- batch->tiler_heap = panfrost_batch_create_bo(batch, 4096 * 4096,
- PAN_BO_INVISIBLE |
- PAN_BO_GROWABLE,
- PAN_BO_ACCESS_PRIVATE |
- PAN_BO_ACCESS_RW |
- PAN_BO_ACCESS_VERTEX_TILER |
- PAN_BO_ACCESS_FRAGMENT);
- assert(batch->tiler_heap);
- return batch->tiler_heap;
-}
-
mali_ptr
panfrost_batch_get_tiler_meta(struct panfrost_batch *batch, unsigned vertex_count)
{
if (batch->tiler_meta)
return batch->tiler_meta;
- struct panfrost_bo *tiler_heap;
- tiler_heap = panfrost_batch_get_tiler_heap(batch);
+ struct panfrost_device *dev = pan_device(batch->ctx->base.screen);
struct bifrost_tiler_heap_meta tiler_heap_meta = {
- .heap_size = tiler_heap->size,
- .tiler_heap_start = tiler_heap->gpu,
- .tiler_heap_free = tiler_heap->gpu,
- .tiler_heap_end = tiler_heap->gpu + tiler_heap->size,
+ .heap_size = dev->tiler_heap->size,
+ .tiler_heap_start = dev->tiler_heap->gpu,
+ .tiler_heap_free = dev->tiler_heap->gpu,
+ .tiler_heap_end = dev->tiler_heap->gpu + dev->tiler_heap->size,
.unk1 = 0x1,
.unk7e007e = 0x7e007e,
};
.flags = 0x0,
.width = MALI_POSITIVE(batch->key.width),
.height = MALI_POSITIVE(batch->key.height),
- .tiler_heap_meta = panfrost_pool_upload(&batch->pool, &tiler_heap_meta, sizeof(tiler_heap_meta)),
+ .tiler_heap_meta = panfrost_pool_upload_aligned(&batch->pool, &tiler_heap_meta, sizeof(tiler_heap_meta), 64)
};
- batch->tiler_meta = panfrost_pool_upload(&batch->pool, &tiler_meta, sizeof(tiler_meta));
+ batch->tiler_meta = panfrost_pool_upload_aligned(&batch->pool, &tiler_meta, sizeof(tiler_meta), 64);
return batch->tiler_meta;
}
sizeof(struct mali_single_framebuffer) :
sizeof(struct mali_framebuffer);
- batch->framebuffer = panfrost_pool_alloc(&batch->pool, size);
+ batch->framebuffer = panfrost_pool_alloc_aligned(&batch->pool, size, 64);
/* Tag the pointer */
if (!(dev->quirks & MIDGARD_SFBD))
format = util_format_stencil_only(format);
}
- enum mali_texture_type type =
- panfrost_translate_texture_type(rsrc->base.target);
-
- unsigned nr_samples = surf->nr_samples;
-
- if (!nr_samples)
- nr_samples = surf->texture->nr_samples;
+ enum mali_texture_dimension dim =
+ panfrost_translate_texture_dimension(rsrc->base.target);
struct pan_image img = {
.width0 = rsrc->base.width0,
.height0 = rsrc->base.height0,
.depth0 = rsrc->base.depth0,
.format = format,
- .type = type,
- .layout = rsrc->layout,
+ .dim = dim,
+ .modifier = rsrc->modifier,
.array_size = rsrc->base.array_size,
.first_level = level,
.last_level = level,
.first_layer = surf->u.tex.first_layer,
.last_layer = surf->u.tex.last_layer,
- .nr_samples = nr_samples,
+ .nr_samples = rsrc->base.nr_samples,
.cubemap_stride = rsrc->cubemap_stride,
.bo = rsrc->bo,
.slices = rsrc->slices
blend_shader = bo->gpu | b->first_tag;
}
- struct panfrost_transfer transfer = panfrost_pool_alloc(&batch->pool,
- 4 * 4 * 6 * rsrc->damage.inverted_len);
+ struct panfrost_transfer transfer = panfrost_pool_alloc_aligned(&batch->pool,
+ 4 * 4 * 6 * rsrc->damage.inverted_len, 64);
for (unsigned i = 0; i < rsrc->damage.inverted_len; ++i) {
float *o = (float *) (transfer.cpu + (4 * 4 * 6 * i));
static int
panfrost_batch_submit_ioctl(struct panfrost_batch *batch,
mali_ptr first_job_desc,
- uint32_t reqs)
+ uint32_t reqs,
+ uint32_t out_sync)
{
struct panfrost_context *ctx = batch->ctx;
struct pipe_context *gallium = (struct pipe_context *) ctx;
struct panfrost_device *dev = pan_device(gallium->screen);
struct drm_panfrost_submit submit = {0,};
- uint32_t *bo_handles, *in_syncs = NULL;
- bool is_fragment_shader;
+ uint32_t *bo_handles;
int ret;
- is_fragment_shader = (reqs & PANFROST_JD_REQ_FS) && batch->scoreboard.first_job;
- if (is_fragment_shader)
- submit.in_sync_count = 1;
- else
- submit.in_sync_count = util_dynarray_num_elements(&batch->dependencies,
- struct panfrost_batch_fence *);
-
- if (submit.in_sync_count) {
- in_syncs = calloc(submit.in_sync_count, sizeof(*in_syncs));
- assert(in_syncs);
- }
+ /* If we trace, we always need a syncobj, so make one of our own if we
+ * weren't given one to use. Remember that we did so, so we can free it
+ * after we're done but preventing double-frees if we were given a
+ * syncobj */
- /* The fragment job always depends on the vertex/tiler job if there's
- * one
- */
- if (is_fragment_shader) {
- in_syncs[0] = batch->out_sync->syncobj;
- } else {
- unsigned int i = 0;
+ bool our_sync = false;
- util_dynarray_foreach(&batch->dependencies,
- struct panfrost_batch_fence *, dep)
- in_syncs[i++] = (*dep)->syncobj;
+ if (!out_sync && dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC)) {
+ drmSyncobjCreate(dev->fd, 0, &out_sync);
+ our_sync = false;
}
- submit.in_syncs = (uintptr_t)in_syncs;
- submit.out_sync = batch->out_sync->syncobj;
+ submit.out_sync = out_sync;
submit.jc = first_job_desc;
submit.requirements = reqs;
- bo_handles = calloc(batch->pool.bos->entries + batch->bos->entries, sizeof(*bo_handles));
+ bo_handles = calloc(panfrost_pool_num_bos(&batch->pool) +
+ panfrost_pool_num_bos(&batch->invisible_pool) +
+ batch->bos->entries + 1,
+ sizeof(*bo_handles));
assert(bo_handles);
hash_table_foreach(batch->bos, entry)
panfrost_batch_record_bo(entry, bo_handles, submit.bo_handle_count++);
- hash_table_foreach(batch->pool.bos, entry)
- panfrost_batch_record_bo(entry, bo_handles, submit.bo_handle_count++);
+ panfrost_pool_get_bo_handles(&batch->pool, bo_handles + submit.bo_handle_count);
+ submit.bo_handle_count += panfrost_pool_num_bos(&batch->pool);
+ panfrost_pool_get_bo_handles(&batch->invisible_pool, bo_handles + submit.bo_handle_count);
+ submit.bo_handle_count += panfrost_pool_num_bos(&batch->invisible_pool);
+
+ /* Used by all tiler jobs (XXX: skip for compute-only) */
+ if (!(reqs & PANFROST_JD_REQ_FS))
+ bo_handles[submit.bo_handle_count++] = dev->tiler_heap->gem_handle;
submit.bo_handles = (u64) (uintptr_t) bo_handles;
ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_SUBMIT, &submit);
free(bo_handles);
- free(in_syncs);
if (ret) {
if (dev->debug & PAN_DBG_MSGS)
/* Trace the job if we're doing that */
if (dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC)) {
/* Wait so we can get errors reported back */
- drmSyncobjWait(dev->fd, &batch->out_sync->syncobj, 1,
+ drmSyncobjWait(dev->fd, &out_sync, 1,
INT64_MAX, 0, NULL);
/* Trace gets priority over sync */
pandecode_jc(submit.jc, dev->quirks & IS_BIFROST, dev->gpu_id, minimal);
}
+ /* Cleanup if we created the syncobj */
+ if (our_sync)
+ drmSyncobjDestroy(dev->fd, out_sync);
+
return 0;
}
+/* Submit both vertex/tiler and fragment jobs for a batch, possibly with an
+ * outsync corresponding to the later of the two (since there will be an
+ * implicit dep between them) */
+
static int
-panfrost_batch_submit_jobs(struct panfrost_batch *batch)
+panfrost_batch_submit_jobs(struct panfrost_batch *batch, uint32_t out_sync)
{
bool has_draws = batch->scoreboard.first_job;
+ bool has_frag = batch->scoreboard.tiler_dep || batch->clear;
int ret = 0;
if (has_draws) {
- ret = panfrost_batch_submit_ioctl(batch, batch->scoreboard.first_job, 0);
+ ret = panfrost_batch_submit_ioctl(batch, batch->scoreboard.first_job,
+ 0, has_frag ? 0 : out_sync);
assert(!ret);
}
- if (batch->scoreboard.tiler_dep || batch->clear) {
+ if (has_frag) {
/* Whether we program the fragment job for draws or not depends
* on whether there is any *tiler* activity (so fragment
* shaders). If there are draws but entirely RASTERIZER_DISCARD
mali_ptr fragjob = panfrost_fragment_job(batch,
batch->scoreboard.tiler_dep != 0);
- ret = panfrost_batch_submit_ioctl(batch, fragjob, PANFROST_JD_REQ_FS);
+ ret = panfrost_batch_submit_ioctl(batch, fragjob,
+ PANFROST_JD_REQ_FS, out_sync);
assert(!ret);
}
}
static void
-panfrost_batch_submit(struct panfrost_batch *batch)
+panfrost_batch_submit(struct panfrost_batch *batch, uint32_t out_sync)
{
assert(batch);
struct panfrost_device *dev = pan_device(batch->ctx->base.screen);
- /* Submit the dependencies first. */
+ /* Submit the dependencies first. Don't pass along the out_sync since
+ * they are guaranteed to terminate sooner */
util_dynarray_foreach(&batch->dependencies,
struct panfrost_batch_fence *, dep) {
if ((*dep)->batch)
- panfrost_batch_submit((*dep)->batch);
+ panfrost_batch_submit((*dep)->batch, 0);
}
int ret;
/* Nothing to do! */
if (!batch->scoreboard.first_job && !batch->clear) {
- /* Mark the fence as signaled so the fence logic does not try
- * to wait on it.
- */
- batch->out_sync->signaled = true;
+ if (out_sync)
+ drmSyncobjSignal(dev->fd, &out_sync, 1);
goto out;
- }
+ }
panfrost_batch_draw_wallpaper(batch);
panfrost_scoreboard_initialize_tiler(&batch->pool, &batch->scoreboard, polygon_list);
- ret = panfrost_batch_submit_jobs(batch);
+ ret = panfrost_batch_submit_jobs(batch, out_sync);
if (ret && dev->debug & PAN_DBG_MSGS)
fprintf(stderr, "panfrost_batch_submit failed: %d\n", ret);
panfrost_free_batch(batch);
}
+/* Submit all batches, applying the out_sync to the currently bound batch */
+
void
-panfrost_flush_all_batches(struct panfrost_context *ctx)
+panfrost_flush_all_batches(struct panfrost_context *ctx, uint32_t out_sync)
{
+ struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
+ panfrost_batch_submit(batch, out_sync);
+
hash_table_foreach(ctx->batches, hentry) {
struct panfrost_batch *batch = hentry->data;
assert(batch);
- panfrost_batch_submit(batch);
+ panfrost_batch_submit(batch, 0);
}
assert(!ctx->batches->entries);
return;
if (access->writer && access->writer->batch)
- panfrost_batch_submit(access->writer->batch);
+ panfrost_batch_submit(access->writer->batch, 0);
if (!flush_readers)
return;
util_dynarray_foreach(&access->readers, struct panfrost_batch_fence *,
reader) {
if (*reader && (*reader)->batch)
- panfrost_batch_submit((*reader)->batch);
+ panfrost_batch_submit((*reader)->batch, 0);
}
}
{
struct panfrost_context *ctx = batch->ctx;
- if (ctx->rasterizer && ctx->rasterizer->base.multisample)
+ if (ctx->rasterizer->base.multisample)
batch->requirements |= PAN_REQ_MSAA;
- if (ctx->depth_stencil && ctx->depth_stencil->depth.writemask) {
+ if (ctx->depth_stencil && ctx->depth_stencil->base.depth.writemask) {
batch->requirements |= PAN_REQ_DEPTH_WRITE;
batch->draws |= PIPE_CLEAR_DEPTH;
}
- if (ctx->depth_stencil && ctx->depth_stencil->stencil[0].enabled)
+ if (ctx->depth_stencil && ctx->depth_stencil->base.stencil[0].enabled)
batch->draws |= PIPE_CLEAR_STENCIL;
}