panfrost: Pass IS_BIFROST to pandecode_jc
[mesa.git] / src / gallium / drivers / panfrost / pan_job.c
index 3ccf4bb6b3e9d9b31d49e0b9625a2bcc8370a4d6..cf74a73a86d11f88f626c44576abe50372596c64 100644 (file)
 #include "pan_context.h"
 #include "util/hash_table.h"
 #include "util/ralloc.h"
-#include "util/u_format.h"
+#include "util/format/u_format.h"
 #include "util/u_pack_color.h"
 #include "pan_util.h"
 #include "pandecode/decode.h"
+#include "panfrost-quirks.h"
 
 /* panfrost_bo_access is here to help us keep track of batch accesses to BOs
  * and build a proper dependency graph such that batches can be pipelined for
@@ -70,7 +71,7 @@ panfrost_create_batch_fence(struct panfrost_batch *batch)
         pipe_reference_init(&fence->reference, 1);
         fence->ctx = batch->ctx;
         fence->batch = batch;
-        ret = drmSyncobjCreate(pan_screen(batch->ctx->base.screen)->fd, 0,
+        ret = drmSyncobjCreate(pan_device(batch->ctx->base.screen)->fd, 0,
                                &fence->syncobj);
         assert(!ret);
 
@@ -80,7 +81,7 @@ panfrost_create_batch_fence(struct panfrost_batch *batch)
 static void
 panfrost_free_batch_fence(struct panfrost_batch_fence *fence)
 {
-        drmSyncobjDestroy(pan_screen(fence->ctx->base.screen)->fd,
+        drmSyncobjDestroy(pan_device(fence->ctx->base.screen)->fd,
                           fence->syncobj);
         ralloc_free(fence);
 }
@@ -113,9 +114,6 @@ panfrost_create_batch(struct panfrost_context *ctx,
         batch->maxx = batch->maxy = 0;
         batch->transient_offset = 0;
 
-        util_dynarray_init(&batch->headers, batch);
-        util_dynarray_init(&batch->gpu_headers, batch);
-        util_dynarray_init(&batch->dependencies, batch);
         batch->out_sync = panfrost_create_batch_fence(batch);
         util_copy_framebuffer_state(&batch->key, key);
 
@@ -293,6 +291,27 @@ panfrost_get_batch_for_fbo(struct panfrost_context *ctx)
         return batch;
 }
 
+struct panfrost_batch *
+panfrost_get_fresh_batch_for_fbo(struct panfrost_context *ctx)
+{
+        struct panfrost_batch *batch;
+
+        batch = panfrost_get_batch(ctx, &ctx->pipe_framebuffer);
+
+        /* The batch has no draw/clear queued, let's return it directly.
+         * Note that it's perfectly fine to re-use a batch with an
+         * existing clear, we'll just update it with the new clear request.
+         */
+        if (!batch->first_job)
+                return batch;
+
+        /* Otherwise, we need to freeze the existing one and instantiate a new
+         * one.
+         */
+        panfrost_freeze_batch(batch);
+        return panfrost_get_batch(ctx, &ctx->pipe_framebuffer);
+}
+
 static bool
 panfrost_batch_fence_is_signaled(struct panfrost_batch_fence *fence)
 {
@@ -303,7 +322,7 @@ panfrost_batch_fence_is_signaled(struct panfrost_batch_fence *fence)
         if (fence->batch)
                 return false;
 
-        int ret = drmSyncobjWait(pan_screen(fence->ctx->base.screen)->fd,
+        int ret = drmSyncobjWait(pan_device(fence->ctx->base.screen)->fd,
                                  &fence->syncobj, 1, 0, 0, NULL);
 
         /* Cache whether the fence was signaled */
@@ -321,7 +340,9 @@ panfrost_bo_access_gc_fences(struct panfrost_context *ctx,
                 access->writer = NULL;
         }
 
-        unsigned nreaders = 0;
+        struct panfrost_batch_fence **readers_array = util_dynarray_begin(&access->readers);
+        struct panfrost_batch_fence **new_readers = readers_array;
+
         util_dynarray_foreach(&access->readers, struct panfrost_batch_fence *,
                               reader) {
                 if (!(*reader))
@@ -331,12 +352,15 @@ panfrost_bo_access_gc_fences(struct panfrost_context *ctx,
                         panfrost_batch_fence_unreference(*reader);
                         *reader = NULL;
                 } else {
-                        nreaders++;
+                        /* Build a new array of only unsignaled fences in-place */
+                        *(new_readers++) = *reader;
                 }
         }
 
-        if (!nreaders)
-                util_dynarray_clear(&access->readers);
+        if (!util_dynarray_resize(&access->readers, struct panfrost_batch_fence *,
+                                  new_readers - readers_array) &&
+            new_readers != readers_array)
+                unreachable("Invalid dynarray access->readers");
 }
 
 /* Collect signaled fences to keep the kernel-side syncobj-map small. The
@@ -358,8 +382,10 @@ panfrost_gc_fences(struct panfrost_context *ctx)
                 panfrost_bo_access_gc_fences(ctx, access, entry->key);
                 if (!util_dynarray_num_elements(&access->readers,
                                                 struct panfrost_batch_fence *) &&
-                    !access->writer)
+                    !access->writer) {
+                        ralloc_free(access);
                         _mesa_hash_table_remove(ctx->accessed_bos, entry);
+                }
         }
 }
 
@@ -577,12 +603,12 @@ panfrost_batch_create_bo(struct panfrost_batch *batch, size_t size,
 {
         struct panfrost_bo *bo;
 
-        bo = panfrost_bo_create(pan_screen(batch->ctx->base.screen), size,
+        bo = pan_bo_create(pan_device(batch->ctx->base.screen), size,
                                 create_flags);
         panfrost_batch_add_bo(batch, bo, access_flags);
 
         /* panfrost_batch_add_bo() has retained a reference and
-         * panfrost_bo_create() initialize the refcnt to 1, so let's
+         * pan_bo_create() initialize the refcnt to 1, so let's
          * unreference the BO here so it gets released when the batch is
          * destroyed (unless it's retained by someone else in the meantime).
          */
@@ -601,6 +627,7 @@ panfrost_batch_get_polygon_list(struct panfrost_batch *batch, unsigned size)
                 assert(batch->polygon_list->size >= size);
         } else {
                 /* Create the BO as invisible, as there's no reason to map */
+                size = util_next_power_of_two(size);
 
                 batch->polygon_list = panfrost_batch_create_bo(batch, size,
                                                                PAN_BO_INVISIBLE,
@@ -614,21 +641,47 @@ panfrost_batch_get_polygon_list(struct panfrost_batch *batch, unsigned size)
 }
 
 struct panfrost_bo *
-panfrost_batch_get_scratchpad(struct panfrost_batch *batch)
+panfrost_batch_get_scratchpad(struct panfrost_batch *batch,
+                unsigned shift,
+                unsigned thread_tls_alloc,
+                unsigned core_count)
 {
-        if (batch->scratchpad)
-                return batch->scratchpad;
+        unsigned size = panfrost_get_total_stack_size(shift,
+                        thread_tls_alloc,
+                        core_count);
+
+        if (batch->scratchpad) {
+                assert(batch->scratchpad->size >= size);
+        } else {
+                batch->scratchpad = panfrost_batch_create_bo(batch, size,
+                                             PAN_BO_INVISIBLE,
+                                             PAN_BO_ACCESS_PRIVATE |
+                                             PAN_BO_ACCESS_RW |
+                                             PAN_BO_ACCESS_VERTEX_TILER |
+                                             PAN_BO_ACCESS_FRAGMENT);
+        }
 
-        batch->scratchpad = panfrost_batch_create_bo(batch, 64 * 4 * 4096,
-                                                     PAN_BO_INVISIBLE,
-                                                     PAN_BO_ACCESS_PRIVATE |
-                                                     PAN_BO_ACCESS_RW |
-                                                     PAN_BO_ACCESS_VERTEX_TILER |
-                                                     PAN_BO_ACCESS_FRAGMENT);
-        assert(batch->scratchpad);
         return batch->scratchpad;
 }
 
+struct panfrost_bo *
+panfrost_batch_get_shared_memory(struct panfrost_batch *batch,
+                unsigned size,
+                unsigned workgroup_count)
+{
+        if (batch->shared_memory) {
+                assert(batch->shared_memory->size >= size);
+        } else {
+                batch->shared_memory = panfrost_batch_create_bo(batch, size,
+                                             PAN_BO_INVISIBLE,
+                                             PAN_BO_ACCESS_PRIVATE |
+                                             PAN_BO_ACCESS_RW |
+                                             PAN_BO_ACCESS_VERTEX_TILER);
+        }
+
+        return batch->shared_memory;
+}
+
 struct panfrost_bo *
 panfrost_batch_get_tiler_heap(struct panfrost_batch *batch)
 {
@@ -649,11 +702,18 @@ panfrost_batch_get_tiler_heap(struct panfrost_batch *batch)
 struct panfrost_bo *
 panfrost_batch_get_tiler_dummy(struct panfrost_batch *batch)
 {
+        struct panfrost_device *dev = pan_device(batch->ctx->base.screen);
+
+        uint32_t create_flags = 0;
+
         if (batch->tiler_dummy)
                 return batch->tiler_dummy;
 
+        if (!(dev->quirks & MIDGARD_NO_HIER_TILING))
+                create_flags = PAN_BO_INVISIBLE;
+
         batch->tiler_dummy = panfrost_batch_create_bo(batch, 4096,
-                                                      PAN_BO_INVISIBLE,
+                                                      create_flags,
                                                       PAN_BO_ACCESS_PRIVATE |
                                                       PAN_BO_ACCESS_RW |
                                                       PAN_BO_ACCESS_VERTEX_TILER |
@@ -665,10 +725,23 @@ panfrost_batch_get_tiler_dummy(struct panfrost_batch *batch)
 static void
 panfrost_batch_draw_wallpaper(struct panfrost_batch *batch)
 {
+        /* Color 0 is cleared, no need to draw the wallpaper.
+         * TODO: MRT wallpapers.
+         */
+        if (batch->clear & PIPE_CLEAR_COLOR0)
+                return;
+
         /* Nothing to reload? TODO: MRT wallpapers */
         if (batch->key.cbufs[0] == NULL)
                 return;
 
+        /* No draw calls, and no clear on the depth/stencil bufs.
+         * Drawing the wallpaper would be useless.
+         */
+        if (!batch->tiler_dep &&
+            !(batch->clear & PIPE_CLEAR_DEPTHSTENCIL))
+                return;
+
         /* Check if the buffer has any content on it worth preserving */
 
         struct pipe_surface *surf = batch->key.cbufs[0];
@@ -716,9 +789,11 @@ panfrost_batch_draw_wallpaper(struct panfrost_batch *batch)
         damage.maxx = MIN2(batch->maxx,
                            rsrc->damage.biggest_rect.x +
                            rsrc->damage.biggest_rect.width);
+        damage.maxx = MAX2(damage.maxx, damage.minx);
         damage.maxy = MIN2(batch->maxy,
                            rsrc->damage.biggest_rect.y +
                            rsrc->damage.biggest_rect.height);
+        damage.maxy = MAX2(damage.maxy, damage.miny);
 
         /* One damage rectangle means we can end up with at most 4 reload
          * regions:
@@ -769,13 +844,13 @@ panfrost_batch_submit_ioctl(struct panfrost_batch *batch,
 {
         struct panfrost_context *ctx = batch->ctx;
         struct pipe_context *gallium = (struct pipe_context *) ctx;
-        struct panfrost_screen *screen = pan_screen(gallium->screen);
+        struct panfrost_device *dev = pan_device(gallium->screen);
         struct drm_panfrost_submit submit = {0,};
         uint32_t *bo_handles, *in_syncs = NULL;
         bool is_fragment_shader;
         int ret;
 
-        is_fragment_shader = (reqs & PANFROST_JD_REQ_FS) && batch->first_job.gpu;
+        is_fragment_shader = (reqs & PANFROST_JD_REQ_FS) && batch->first_job;
         if (is_fragment_shader)
                 submit.in_sync_count = 1;
         else
@@ -810,26 +885,40 @@ panfrost_batch_submit_ioctl(struct panfrost_batch *batch,
 
         hash_table_foreach(batch->bos, entry) {
                 struct panfrost_bo *bo = (struct panfrost_bo *)entry->key;
+                uint32_t flags = (uintptr_t)entry->data;
+
                 assert(bo->gem_handle > 0);
                 bo_handles[submit.bo_handle_count++] = bo->gem_handle;
+
+                /* Update the BO access flags so that panfrost_bo_wait() knows
+                 * about all pending accesses.
+                 * We only keep the READ/WRITE info since this is all the BO
+                 * wait logic cares about.
+                 * We also preserve existing flags as this batch might not
+                 * be the first one to access the BO.
+                 */
+                bo->gpu_access |= flags & (PAN_BO_ACCESS_RW);
         }
 
         submit.bo_handles = (u64) (uintptr_t) bo_handles;
-        ret = drmIoctl(screen->fd, DRM_IOCTL_PANFROST_SUBMIT, &submit);
+        ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_SUBMIT, &submit);
         free(bo_handles);
         free(in_syncs);
 
         if (ret) {
-                fprintf(stderr, "Error submitting: %m\n");
+                DBG("Error submitting: %m\n");
                 return errno;
         }
 
         /* Trace the job if we're doing that */
-        if (pan_debug & PAN_DBG_TRACE) {
+        if (pan_debug & (PAN_DBG_TRACE | PAN_DBG_SYNC)) {
                 /* Wait so we can get errors reported back */
-                drmSyncobjWait(screen->fd, &batch->out_sync->syncobj, 1,
+                drmSyncobjWait(dev->fd, &batch->out_sync->syncobj, 1,
                                INT64_MAX, 0, NULL);
-                pandecode_jc(submit.jc, FALSE);
+
+                /* Trace gets priority over sync */
+                bool minimal = !(pan_debug & PAN_DBG_TRACE);
+                pandecode_jc(submit.jc, dev->quirks & IS_BIFROST, dev->gpu_id, minimal);
         }
 
         return 0;
@@ -838,17 +927,16 @@ panfrost_batch_submit_ioctl(struct panfrost_batch *batch,
 static int
 panfrost_batch_submit_jobs(struct panfrost_batch *batch)
 {
-        bool has_draws = batch->first_job.gpu;
+        bool has_draws = batch->first_job;
         int ret = 0;
 
         if (has_draws) {
-                ret = panfrost_batch_submit_ioctl(batch, batch->first_job.gpu, 0);
+                ret = panfrost_batch_submit_ioctl(batch, batch->first_job, 0);
                 assert(!ret);
         }
 
-        if (batch->first_tiler.gpu || batch->clear) {
+        if (batch->tiler_dep || batch->clear) {
                 mali_ptr fragjob = panfrost_fragment_job(batch, has_draws);
-
                 ret = panfrost_batch_submit_ioctl(batch, fragjob, PANFROST_JD_REQ_FS);
                 assert(!ret);
         }
@@ -868,11 +956,10 @@ panfrost_batch_submit(struct panfrost_batch *batch)
                         panfrost_batch_submit((*dep)->batch);
         }
 
-        struct panfrost_context *ctx = batch->ctx;
         int ret;
 
         /* Nothing to do! */
-        if (!batch->last_job.gpu && !batch->clear) {
+        if (!batch->first_job && !batch->clear) {
                 /* Mark the fence as signaled so the fence logic does not try
                  * to wait on it.
                  */
@@ -880,30 +967,51 @@ panfrost_batch_submit(struct panfrost_batch *batch)
                 goto out;
         }
 
-        if (!batch->clear && batch->last_tiler.gpu)
-                panfrost_batch_draw_wallpaper(batch);
+        panfrost_batch_draw_wallpaper(batch);
+
+        /* Now that all draws are in, we can finally prepare the
+         * FBD for the batch */
 
-        panfrost_scoreboard_link_batch(batch);
+        if (batch->framebuffer.gpu && batch->first_job) {
+                struct panfrost_context *ctx = batch->ctx;
+                struct pipe_context *gallium = (struct pipe_context *) ctx;
+                struct panfrost_device *dev = pan_device(gallium->screen);
+
+                if (dev->quirks & MIDGARD_SFBD)
+                        panfrost_attach_sfbd(batch, ~0);
+                else
+                        panfrost_attach_mfbd(batch, ~0);
+        }
+
+        panfrost_scoreboard_initialize_tiler(batch);
 
         ret = panfrost_batch_submit_jobs(batch);
 
         if (ret)
-                fprintf(stderr, "panfrost_batch_submit failed: %d\n", ret);
+                DBG("panfrost_batch_submit failed: %d\n", ret);
+
+        /* We must reset the damage info of our render targets here even
+         * though a damage reset normally happens when the DRI layer swaps
+         * buffers. That's because there can be implicit flushes the GL
+         * app is not aware of, and those might impact the damage region: if
+         * part of the damaged portion is drawn during those implicit flushes,
+         * you have to reload those areas before next draws are pushed, and
+         * since the driver can't easily know what's been modified by the draws
+         * it flushed, the easiest solution is to reload everything.
+         */
+        for (unsigned i = 0; i < batch->key.nr_cbufs; i++) {
+                struct panfrost_resource *res;
 
-out:
-        panfrost_freeze_batch(batch);
+                if (!batch->key.cbufs[i])
+                        continue;
 
-        /* We always stall the pipeline for correct results since pipelined
-         * rendering is quite broken right now (to be fixed by the panfrost_job
-         * refactor, just take the perf hit for correctness)
-         */
-        if (!batch->out_sync->signaled)
-                drmSyncobjWait(pan_screen(ctx->base.screen)->fd,
-                               &batch->out_sync->syncobj, 1, INT64_MAX, 0,
-                               NULL);
+                res = pan_resource(batch->key.cbufs[i]->texture);
+                panfrost_resource_reset_damage(res);
+        }
 
+out:
+        panfrost_freeze_batch(batch);
         panfrost_free_batch(batch);
-
 }
 
 void
@@ -940,7 +1048,7 @@ panfrost_flush_all_batches(struct panfrost_context *ctx, bool wait)
         if (!wait)
                 return;
 
-        drmSyncobjWait(pan_screen(ctx->base.screen)->fd,
+        drmSyncobjWait(pan_device(ctx->base.screen)->fd,
                        util_dynarray_begin(&syncobjs),
                        util_dynarray_num_elements(&syncobjs, uint32_t),
                        INT64_MAX, DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL, NULL);
@@ -952,6 +1060,61 @@ panfrost_flush_all_batches(struct panfrost_context *ctx, bool wait)
         util_dynarray_fini(&syncobjs);
 }
 
+bool
+panfrost_pending_batches_access_bo(struct panfrost_context *ctx,
+                                   const struct panfrost_bo *bo)
+{
+        struct panfrost_bo_access *access;
+        struct hash_entry *hentry;
+
+        hentry = _mesa_hash_table_search(ctx->accessed_bos, bo);
+        access = hentry ? hentry->data : NULL;
+        if (!access)
+                return false;
+
+        if (access->writer && access->writer->batch)
+                return true;
+
+        util_dynarray_foreach(&access->readers, struct panfrost_batch_fence *,
+                              reader) {
+                if (*reader && (*reader)->batch)
+                        return true;
+        }
+
+        return false;
+}
+
+void
+panfrost_flush_batches_accessing_bo(struct panfrost_context *ctx,
+                                    struct panfrost_bo *bo,
+                                    uint32_t access_type)
+{
+        struct panfrost_bo_access *access;
+        struct hash_entry *hentry;
+
+        /* It doesn't make any to flush only the readers. */
+        assert(access_type == PAN_BO_ACCESS_WRITE ||
+               access_type == PAN_BO_ACCESS_RW);
+
+        hentry = _mesa_hash_table_search(ctx->accessed_bos, bo);
+        access = hentry ? hentry->data : NULL;
+        if (!access)
+                return;
+
+        if (access_type & PAN_BO_ACCESS_WRITE && access->writer &&
+            access->writer->batch)
+                panfrost_batch_submit(access->writer->batch);
+
+        if (!(access_type & PAN_BO_ACCESS_READ))
+                return;
+
+        util_dynarray_foreach(&access->readers, struct panfrost_batch_fence *,
+                              reader) {
+                if (*reader && (*reader)->batch)
+                        panfrost_batch_submit((*reader)->batch);
+        }
+}
+
 void
 panfrost_batch_set_requirements(struct panfrost_batch *batch)
 {
@@ -964,6 +1127,22 @@ panfrost_batch_set_requirements(struct panfrost_batch *batch)
                 batch->requirements |= PAN_REQ_DEPTH_WRITE;
 }
 
+void
+panfrost_batch_adjust_stack_size(struct panfrost_batch *batch)
+{
+        struct panfrost_context *ctx = batch->ctx;
+
+        for (unsigned i = 0; i < PIPE_SHADER_TYPES; ++i) {
+                struct panfrost_shader_state *ss;
+
+                ss = panfrost_get_shader_state(ctx, i);
+                if (!ss)
+                        continue;
+
+                batch->stack_size = MAX2(batch->stack_size, ss->stack_size);
+        }
+}
+
 /* Helper to smear a 32-bit color across 128-bit components */
 
 static void
@@ -997,10 +1176,10 @@ pan_pack_color(uint32_t *packed, const union pipe_color_union *color, enum pipe_
 
         if (util_format_is_rgba8_variant(desc)) {
                 pan_pack_color_32(packed,
-                                  (float_to_ubyte(clear_alpha) << 24) |
-                                  (float_to_ubyte(color->f[2]) << 16) |
-                                  (float_to_ubyte(color->f[1]) <<  8) |
-                                  (float_to_ubyte(color->f[0]) <<  0));
+                                  ((uint32_t) float_to_ubyte(clear_alpha) << 24) |
+                                  ((uint32_t) float_to_ubyte(color->f[2]) << 16) |
+                                  ((uint32_t) float_to_ubyte(color->f[1]) <<  8) |
+                                  ((uint32_t) float_to_ubyte(color->f[0]) <<  0));
         } else if (format == PIPE_FORMAT_B5G6R5_UNORM) {
                 /* First, we convert the components to R5, G6, B5 separately */
                 unsigned r5 = CLAMP(color->f[0], 0.0, 1.0) * 31.0;
@@ -1046,8 +1225,10 @@ pan_pack_color(uint32_t *packed, const union pipe_color_union *color, enum pipe_
                         pan_pack_color_32(packed, s | (s << 16));
                 } else if (size == 2)
                         pan_pack_color_32(packed, out.ui[0] | (out.ui[0] << 16));
-                else if (size == 4)
+                else if (size == 3 || size == 4)
                         pan_pack_color_32(packed, out.ui[0]);
+                else if (size == 6)
+                        pan_pack_color_64(packed, out.ui[0], out.ui[1] | (out.ui[1] << 16)); /* RGB16F -- RGBB */
                 else if (size == 8)
                         pan_pack_color_64(packed, out.ui[0], out.ui[1]);
                 else if (size == 16)
@@ -1132,7 +1313,7 @@ panfrost_batch_intersection_scissor(struct panfrost_batch *batch,
         batch->maxy = MIN2(batch->maxy, maxy);
 }
 
-/* Are we currently rendering to the screen (rather than an FBO)? */
+/* Are we currently rendering to the dev (rather than an FBO)? */
 
 bool
 panfrost_batch_is_scanout(struct panfrost_batch *batch)