panfrost: Track BO lifetime with jobs and reference counts
authorAlyssa Rosenzweig <alyssa@rosenzweig.io>
Sun, 14 Apr 2019 22:42:44 +0000 (22:42 +0000)
committerAlyssa Rosenzweig <alyssa@rosenzweig.io>
Fri, 19 Apr 2019 22:50:20 +0000 (22:50 +0000)
This (fairly large) patch continues work surrounding the panfrost_job
abstraction to improve job lifetime management. In particular, we add
infrastructure to track which BOs are used by a particular job
(currently limited to the vertex buffer BOs), to reference count these
BOs, and to automatically manage the BOs memory based on the reference
count. This set of changes serves as a code cleanup, as a way of future
proofing for allowing flushing BOs, and immediately as a bugfix to
workaround the missing reference counting for vertex buffer BOs.
Meanwhile, there are a few cleanups to vertex buffer handling code
itself, so in the short-term, this allows us to remove the costly VBO
staging workaround, since this patch addresses the underlying causes.

v2: Use pipe_reference for BO reference counting, rather than managing
it ourselves. Don't duplicate hash-table key removal. Fix vertex buffer
counting.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
src/gallium/drivers/panfrost/pan_context.c
src/gallium/drivers/panfrost/pan_context.h
src/gallium/drivers/panfrost/pan_drm.c
src/gallium/drivers/panfrost/pan_job.c
src/gallium/drivers/panfrost/pan_job.h
src/gallium/drivers/panfrost/pan_resource.c
src/gallium/drivers/panfrost/pan_resource.h
src/gallium/drivers/panfrost/pan_screen.c
src/gallium/drivers/panfrost/pan_screen.h

index 0c97c0a2b4378952003d0ad1b04c970b8f7ddb9c..1319b7944aba4e12da6d3f5f59cf79f26af66991 100644 (file)
@@ -36,6 +36,8 @@
 #include "util/u_memory.h"
 #include "util/u_vbuf.h"
 #include "util/half_float.h"
+#include "util/u_helpers.h"
+#include "util/u_format.h"
 #include "indices/u_primconvert.h"
 #include "tgsi/tgsi_parse.h"
 
@@ -737,14 +739,18 @@ panfrost_emit_varying_descriptor(
  * excepting some obscure circumstances */
 
 static void
-panfrost_emit_vertex_data(struct panfrost_context *ctx)
+panfrost_emit_vertex_data(struct panfrost_context *ctx, struct panfrost_job *job)
 {
-        /* TODO: Only update the dirtied buffers */
+        /* Staged mali_attr, and index into them. i =/= k, depending on the
+         * vertex buffer mask */
         union mali_attr attrs[PIPE_MAX_ATTRIBS];
+        unsigned k = 0;
 
         unsigned invocation_count = MALI_NEGATIVE(ctx->payload_tiler.prefix.invocation_count);
 
-        for (int i = 0; i < ctx->vertex_buffer_count; ++i) {
+        for (int i = 0; i < ARRAY_SIZE(ctx->vertex_buffers); ++i) {
+                if (!(ctx->vb_mask & (1 << i))) continue;
+
                 struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[i];
                 struct panfrost_resource *rsrc = (struct panfrost_resource *) (buf->buffer.resource);
 
@@ -767,8 +773,8 @@ panfrost_emit_vertex_data(struct panfrost_context *ctx)
                 }
 
                 /* Offset vertex count by draw_start to make sure we upload enough */
-                attrs[i].stride = buf->stride;
-                attrs[i].size = buf->stride * (ctx->payload_vertex.draw_start + invocation_count) + max_src_offset;
+                attrs[k].stride = buf->stride;
+                attrs[k].size = buf->stride * (ctx->payload_vertex.draw_start + invocation_count) + max_src_offset;
 
                 /* Vertex elements are -already- GPU-visible, at
                  * rsrc->gpu. However, attribute buffers must be 64 aligned. If
@@ -776,16 +782,19 @@ panfrost_emit_vertex_data(struct panfrost_context *ctx)
 
                 mali_ptr effective_address = rsrc ? (rsrc->bo->gpu + buf->buffer_offset) : 0;
 
-                if (effective_address) {
-                        attrs[i].elements = panfrost_upload_transient(ctx, rsrc->bo->cpu + buf->buffer_offset, attrs[i].size) | MALI_ATTR_LINEAR;
+                if (effective_address & 63) {
+                        attrs[k].elements = panfrost_upload_transient(ctx, rsrc->bo->cpu + buf->buffer_offset, attrs[i].size) | MALI_ATTR_LINEAR;
                 } else if (effective_address) {
-                        attrs[i].elements = effective_address | MALI_ATTR_LINEAR;
+                        panfrost_job_add_bo(job, rsrc->bo);
+                        attrs[k].elements = effective_address | MALI_ATTR_LINEAR;
                 } else {
                         /* Leave unset? */
                 }
+
+                ++k;
         }
 
-        ctx->payload_vertex.postfix.attributes = panfrost_upload_transient(ctx, attrs, ctx->vertex_buffer_count * sizeof(union mali_attr));
+        ctx->payload_vertex.postfix.attributes = panfrost_upload_transient(ctx, attrs, k * sizeof(union mali_attr));
 
         panfrost_emit_varying_descriptor(ctx, invocation_count);
 }
@@ -807,7 +816,7 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data)
         struct panfrost_job *job = panfrost_get_job_for_fbo(ctx);
 
         if (with_vertex_data) {
-                panfrost_emit_vertex_data(ctx);
+                panfrost_emit_vertex_data(ctx, job);
         }
 
         bool msaa = ctx->rasterizer->base.multisample;
@@ -1252,7 +1261,8 @@ panfrost_link_jobs(struct panfrost_context *ctx)
 
 static void
 panfrost_submit_frame(struct panfrost_context *ctx, bool flush_immediate,
-                     struct pipe_fence_handle **fence)
+                     struct pipe_fence_handle **fence,
+                      struct panfrost_job *job)
 {
         struct pipe_context *gallium = (struct pipe_context *) ctx;
         struct panfrost_screen *screen = pan_screen(gallium->screen);
@@ -1274,15 +1284,15 @@ panfrost_submit_frame(struct panfrost_context *ctx, bool flush_immediate,
 #ifndef DRY_RUN
         
         bool is_scanout = panfrost_is_scanout(ctx);
-        int fragment_id = screen->driver->submit_vs_fs_job(ctx, has_draws, is_scanout);
+        screen->driver->submit_vs_fs_job(ctx, has_draws, is_scanout);
 
         /* If visual, we can stall a frame */
 
         if (!flush_immediate)
                 screen->driver->force_flush_fragment(ctx, fence);
 
-        screen->last_fragment_id = fragment_id;
         screen->last_fragment_flushed = false;
+        screen->last_job = job;
 
         /* If readback, flush now (hurts the pipelined performance) */
         if (flush_immediate)
@@ -1317,7 +1327,7 @@ panfrost_flush(
         bool flush_immediate = flags & PIPE_FLUSH_END_OF_FRAME;
 
         /* Submit the frame itself */
-        panfrost_submit_frame(ctx, flush_immediate, fence);
+        panfrost_submit_frame(ctx, flush_immediate, fence, job);
 
         /* Prepare for the next frame */
         panfrost_invalidate_frame(ctx);
@@ -1793,22 +1803,8 @@ panfrost_set_vertex_buffers(
         const struct pipe_vertex_buffer *buffers)
 {
         struct panfrost_context *ctx = pan_context(pctx);
-        assert(num_buffers <= PIPE_MAX_ATTRIBS);
-
-        /* XXX: Dirty tracking? etc */
-        if (buffers) {
-                size_t sz = sizeof(buffers[0]) * num_buffers;
-                ctx->vertex_buffers = malloc(sz);
-                ctx->vertex_buffer_count = num_buffers;
-                memcpy(ctx->vertex_buffers, buffers, sz);
-        } else {
-                if (ctx->vertex_buffers) {
-                        free(ctx->vertex_buffers);
-                        ctx->vertex_buffers = NULL;
-                }
 
-                ctx->vertex_buffer_count = 0;
-        }
+        util_set_vertex_buffers_mask(ctx->vertex_buffers, &ctx->vb_mask, buffers, start_slot, num_buffers);
 }
 
 static void
index d071da1c62fad36cfd32f506597c74fb54ff806e..a6a542776328857b54e548c0713c250191662c41 100644 (file)
@@ -114,6 +114,9 @@ struct panfrost_context {
         struct panfrost_job *job;
         struct hash_table *jobs;
 
+        /* panfrost_resource -> panfrost_job */
+        struct hash_table *write_jobs;
+
         /* Bit mask for supported PIPE_DRAW for this hardware */
         unsigned draw_modes;
 
@@ -187,8 +190,8 @@ struct panfrost_context {
 
         struct panfrost_vertex_state *vertex;
 
-        struct pipe_vertex_buffer *vertex_buffers;
-        unsigned vertex_buffer_count;
+        struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS];
+        uint32_t vb_mask;
 
         struct panfrost_sampler_state *samplers[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
         unsigned sampler_count[PIPE_SHADER_TYPES];
index 70d8d7498d44b5fa10962bbb34200cc302e3c686..981b1c6ce13fe8a41c2b4c09bafe999a70405d34 100644 (file)
@@ -1,5 +1,6 @@
 /*
  * © Copyright 2019 Collabora, Ltd.
+ * Copyright 2019 Alyssa Rosenzweig
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -138,6 +139,7 @@ panfrost_drm_import_bo(struct panfrost_screen *screen, struct winsys_handle *wha
 
        bo->gem_handle = gem_handle;
         bo->gpu = (mali_ptr) get_bo_offset.offset;
+        pipe_reference_init(&bo->reference, 1);
 
        // TODO map and unmap on demand?
        mmap_bo.handle = gem_handle;
@@ -227,6 +229,7 @@ panfrost_drm_submit_job(struct panfrost_context *ctx, u64 job_desc, int reqs, st
        }
 
        /* TODO: Add here the transient pools */
+        /* TODO: Add here the BOs listed in the panfrost_job */
        bo_handles[submit.bo_handle_count++] = ctx->shaders.gem_handle;
        bo_handles[submit.bo_handle_count++] = ctx->scratchpad.gem_handle;
        bo_handles[submit.bo_handle_count++] = ctx->tiler_heap.gem_handle;
@@ -303,6 +306,9 @@ panfrost_drm_force_flush_fragment(struct panfrost_context *ctx,
         if (!screen->last_fragment_flushed) {
                drmSyncobjWait(drm->fd, &ctx->out_sync, 1, INT64_MAX, 0, NULL);
                 screen->last_fragment_flushed = true;
+
+                /* The job finished up, so we're safe to clean it up now */
+                panfrost_free_job(ctx, screen->last_job);
        }
 
         if (fence) {
index 66a8b0d4b07d618d7a6b3fe40d149d0e50e04325..3b03cd650c8ecea1e2eca19a2bb0a526a3784855 100644 (file)
@@ -35,9 +35,32 @@ panfrost_create_job(struct panfrost_context *ctx)
 
         job->ctx = ctx;
 
+        job->bos = _mesa_set_create(job,
+                                    _mesa_hash_pointer,
+                                    _mesa_key_pointer_equal);
         return job;
 }
 
+void
+panfrost_free_job(struct panfrost_context *ctx, struct panfrost_job *job)
+{
+        if (!job)
+                return;
+
+        set_foreach(job->bos, entry) {
+                struct panfrost_bo *bo = (struct panfrost_bo *)entry->key;
+                panfrost_bo_unreference(ctx->base.screen, bo);
+        }
+
+        _mesa_hash_table_remove_key(ctx->jobs, &job->key);
+
+        if (ctx->job == job)
+                ctx->job = NULL;
+
+        ralloc_free(job);
+}
+
 struct panfrost_job *
 panfrost_get_job(struct panfrost_context *ctx,
                 struct pipe_surface **cbufs, struct pipe_surface *zsbuf)
@@ -90,6 +113,53 @@ panfrost_get_job_for_fbo(struct panfrost_context *ctx)
         return job;
 }
 
+void
+panfrost_job_add_bo(struct panfrost_job *job, struct panfrost_bo *bo)
+{
+        if (!bo)
+                return;
+
+        if (_mesa_set_search(job->bos, bo))
+                return;
+
+        panfrost_bo_reference(bo);
+        _mesa_set_add(job->bos, bo);
+}
+
+void
+panfrost_flush_jobs_writing_resource(struct panfrost_context *panfrost,
+                                struct pipe_resource *prsc)
+{
+#if 0
+        struct hash_entry *entry = _mesa_hash_table_search(panfrost->write_jobs,
+                                                           prsc);
+        if (entry) {
+                struct panfrost_job *job = entry->data;
+                panfrost_job_submit(panfrost, job);
+        }
+#endif
+        /* TODO stub */
+}
+
+void
+panfrost_flush_jobs_reading_resource(struct panfrost_context *panfrost,
+                                struct pipe_resource *prsc)
+{
+        struct panfrost_resource *rsc = pan_resource(prsc);
+
+        panfrost_flush_jobs_writing_resource(panfrost, prsc);
+
+        hash_table_foreach(panfrost->jobs, entry) {
+                struct panfrost_job *job = entry->data;
+
+                if (_mesa_set_search(job->bos, rsc->bo)) {
+                        printf("TODO: submit job for flush\n");
+                        //panfrost_job_submit(panfrost, job);
+                        continue;
+                }
+        }
+}
+
 static bool
 panfrost_job_compare(const void *a, const void *b)
 {
@@ -109,4 +179,9 @@ panfrost_job_init(struct panfrost_context *ctx)
         ctx->jobs = _mesa_hash_table_create(NULL,
                                             panfrost_job_hash,
                                             panfrost_job_compare);
+
+        ctx->write_jobs = _mesa_hash_table_create(NULL,
+                                            _mesa_hash_pointer,
+                                            _mesa_key_pointer_equal);
+
 }
index 30f1cf4bd5c6ca594533380a529b8571e4f9ac5b..1b28084c599a020c0f07e38df033915b9cf776f6 100644 (file)
@@ -55,6 +55,8 @@ struct panfrost_job {
          * bitmask) */
         unsigned requirements;
 
+        /* BOs referenced -- will be used for flushing logic */
+        struct set *bos;
 };
 
 /* Functions for managing the above */
@@ -62,6 +64,9 @@ struct panfrost_job {
 struct panfrost_job *
 panfrost_create_job(struct panfrost_context *ctx);
 
+void
+panfrost_free_job(struct panfrost_context *ctx, struct panfrost_job *job);
+
 struct panfrost_job *
 panfrost_get_job(struct panfrost_context *ctx,
                 struct pipe_surface **cbufs, struct pipe_surface *zsbuf);
@@ -72,4 +77,15 @@ panfrost_get_job_for_fbo(struct panfrost_context *ctx);
 void
 panfrost_job_init(struct panfrost_context *ctx);
 
+void
+panfrost_job_add_bo(struct panfrost_job *job, struct panfrost_bo *bo);
+
+void
+panfrost_flush_jobs_writing_resource(struct panfrost_context *panfrost,
+                                struct pipe_resource *prsc);
+
+void
+panfrost_flush_jobs_reading_resource(struct panfrost_context *panfrost,
+                                struct pipe_resource *prsc);
+
 #endif
index 15d522f19639d94ac7dc262680d31c8f38a79ead..73f987c45bcf0160a913fa3790acd9334459d0b0 100644 (file)
@@ -228,6 +228,7 @@ static struct panfrost_bo *
 panfrost_create_bo(struct panfrost_screen *screen, const struct pipe_resource *template)
 {
        struct panfrost_bo *bo = CALLOC_STRUCT(panfrost_bo);
+        pipe_reference_init(&bo->reference, 1);
 
         /* Based on the usage, figure out what storing will be used. There are
          * various tradeoffs:
@@ -297,6 +298,8 @@ panfrost_resource_create(struct pipe_screen *screen,
                         assert(0);
         }
 
+        util_range_init(&so->valid_buffer_range);
+
         if (template->bind & PIPE_BIND_DISPLAY_TARGET ||
             template->bind & PIPE_BIND_SCANOUT ||
             template->bind & PIPE_BIND_SHARED) {
@@ -359,6 +362,22 @@ panfrost_destroy_bo(struct panfrost_screen *screen, struct panfrost_bo *pbo)
         }
 }
 
+void
+panfrost_bo_reference(struct panfrost_bo *bo)
+{
+        pipe_reference(NULL, &bo->reference);
+}
+
+void
+panfrost_bo_unreference(struct pipe_screen *screen, struct panfrost_bo *bo)
+{
+        /* When the reference count goes to zero, we need to cleanup */
+
+        if (pipe_reference(&bo->reference, NULL)) {
+                panfrost_destroy_bo(pan_screen(screen), bo);
+        }
+}
+
 static void
 panfrost_resource_destroy(struct pipe_screen *screen,
                           struct pipe_resource *pt)
@@ -370,8 +389,9 @@ panfrost_resource_destroy(struct pipe_screen *screen,
                renderonly_scanout_destroy(rsrc->scanout, pscreen->ro);
 
        if (rsrc->bo)
-               panfrost_destroy_bo(pscreen, rsrc->bo);
+                panfrost_bo_unreference(screen, rsrc->bo);
 
+        util_range_destroy(&rsrc->valid_buffer_range);
        FREE(rsrc);
 }
 
@@ -384,7 +404,8 @@ panfrost_transfer_map(struct pipe_context *pctx,
                       struct pipe_transfer **out_transfer)
 {
         int bytes_per_pixel = util_format_get_blocksize(resource->format);
-        struct panfrost_bo *bo = pan_resource(resource)->bo;
+        struct panfrost_resource *rsrc = pan_resource(resource);
+        struct panfrost_bo *bo = rsrc->bo;
 
         struct panfrost_gtransfer *transfer = CALLOC_STRUCT(panfrost_gtransfer);
         transfer->base.level = level;
@@ -405,6 +426,25 @@ panfrost_transfer_map(struct pipe_context *pctx,
                 panfrost_flush(pctx, NULL, PIPE_FLUSH_END_OF_FRAME);
         }
 
+        if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
+                /* TODO: reallocate */
+                //printf("debug: Missed reallocate\n");
+        } else if ((usage & PIPE_TRANSFER_WRITE)
+                        && resource->target == PIPE_BUFFER
+                        && !util_ranges_intersect(&rsrc->valid_buffer_range, box->x, box->x + box->width)) {
+                /* No flush for writes to uninitialized */
+        } else if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
+                if (usage & PIPE_TRANSFER_WRITE) {
+                        /* STUB: flush reading */
+                        //printf("debug: missed reading flush %d\n", resource->target);
+                } else if (usage & PIPE_TRANSFER_READ) {
+                        /* STUB: flush writing */
+                        //printf("debug: missed writing flush %d (%d-%d)\n", resource->target, box->x, box->x + box->width);
+                } else {
+                        /* Why are you even mapping?! */
+                }
+        }
+
         if (bo->layout != PAN_LINEAR) {
                 /* Non-linear resources need to be indirectly mapped */
 
@@ -460,9 +500,9 @@ panfrost_transfer_unmap(struct pipe_context *pctx,
         /* Gallium expects writeback here, so we tile */
 
         struct panfrost_gtransfer *trans = pan_transfer(transfer);
+        struct panfrost_resource *prsrc = (struct panfrost_resource *) transfer->resource;
 
         if (trans->map) {
-                struct panfrost_resource *prsrc = (struct panfrost_resource *) transfer->resource;
                 struct panfrost_bo *bo = prsrc->bo;
 
                 if (transfer->usage & PIPE_TRANSFER_WRITE) {
@@ -480,6 +520,11 @@ panfrost_transfer_unmap(struct pipe_context *pctx,
                 free(trans->map);
         }
 
+
+       util_range_add(&prsrc->valid_buffer_range,
+                        transfer->box.x,
+                        transfer->box.x + transfer->box.width);
+
         /* Derefence the resource */
         pipe_resource_reference(&transfer->resource, NULL);
 
@@ -487,6 +532,20 @@ panfrost_transfer_unmap(struct pipe_context *pctx,
         free(transfer);
 }
 
+static void
+panfrost_transfer_flush_region(struct pipe_context *pctx,
+               struct pipe_transfer *transfer,
+               const struct pipe_box *box)
+{
+       struct panfrost_resource *rsc = pan_resource(transfer->resource);
+
+       if (transfer->resource->target == PIPE_BUFFER) {
+               util_range_add(&rsc->valid_buffer_range,
+                                          transfer->box.x + box->x,
+                                          transfer->box.x + box->x + box->width);
+        }
+}
+
 static struct pb_slab *
 panfrost_slab_alloc(void *priv, unsigned heap, unsigned entry_size, unsigned group_index)
 {
@@ -564,7 +623,7 @@ static const struct u_transfer_vtbl transfer_vtbl = {
         .resource_destroy         = panfrost_resource_destroy,
         .transfer_map             = panfrost_transfer_map,
         .transfer_unmap           = panfrost_transfer_unmap,
-        .transfer_flush_region    = u_default_transfer_flush_region,
+        .transfer_flush_region    = panfrost_transfer_flush_region,
         .get_internal_format      = panfrost_resource_get_internal_format,
         .set_stencil              = panfrost_resource_set_stencil,
         .get_stencil              = panfrost_resource_get_stencil,
index a1315ab1b43ede8e96217fbddee3dcfa0f003d21..269268e9d2213cfeea784168dc397855a66ac95a 100644 (file)
@@ -30,6 +30,7 @@
 #include "pan_screen.h"
 #include "pan_allocate.h"
 #include "drm-uapi/drm.h"
+#include "util/u_range.h"
 
 /* Describes the memory layout of a BO */
 
@@ -45,6 +46,9 @@ struct panfrost_slice {
 };
 
 struct panfrost_bo {
+        struct pipe_reference reference;
+
+        /* Description of the mip levels */
         struct panfrost_slice slices[MAX_MIP_LEVELS];
 
         /* Mapping for the entire object (all levels) */
@@ -83,6 +87,12 @@ struct panfrost_bo {
         int gem_handle;
 };
 
+void
+panfrost_bo_reference(struct panfrost_bo *bo);
+
+void
+panfrost_bo_unreference(struct pipe_screen *screen, struct panfrost_bo *bo);
+
 struct panfrost_resource {
         struct pipe_resource base;
 
@@ -90,6 +100,8 @@ struct panfrost_resource {
         struct renderonly_scanout *scanout;
 
         struct panfrost_resource *separate_stencil;
+
+        struct util_range valid_buffer_range;
 };
 
 static inline struct panfrost_resource *
index 71c6175d06970ce703def7dac72090b4d12fff09..6d3aca594f1a88c502a73ee05f71016e892bd293 100644 (file)
@@ -597,8 +597,8 @@ panfrost_create_screen(int fd, struct renderonly *ro)
         screen->base.fence_reference = panfrost_fence_reference;
         screen->base.fence_finish = panfrost_fence_finish;
 
-       screen->last_fragment_id = -1;
        screen->last_fragment_flushed = true;
+        screen->last_job = NULL;
 
         panfrost_resource_screen_init(screen);
 
index cbadf8136751cb81002191019e06f1641b605ba3..4e5efa91f22f184979f9dd1294f11c1a6210431c 100644 (file)
@@ -92,8 +92,11 @@ struct panfrost_screen {
         /* TODO: Where? */
         struct panfrost_resource *display_target;
 
-       int last_fragment_id;
+        /* While we're busy building up the job for frame N, the GPU is
+         * still busy executing frame N-1. So hold a reference to
+         * yesterjob */
        int last_fragment_flushed;
+        struct panfrost_job *last_job;
 };
 
 #endif /* PAN_SCREEN_H */