panfrost: Pass IS_BIFROST to pandecode_jc

[mesa.git] / src / gallium / drivers / panfrost / pan_job.c
diff --git a/src/gallium/drivers/panfrost/pan_job.c b/src/gallium/drivers/panfrost/pan_job.c

index 9cff52325197c123bac63294ecaf4ad30207a49a..cf74a73a86d11f88f626c44576abe50372596c64 100644 (file)
--- a/src/gallium/drivers/panfrost/pan_job.c
+++ b/src/gallium/drivers/panfrost/pan_job.c
@@ -31,10 +31,11 @@
  #include "pan_context.h"
  #include "util/hash_table.h"
  #include "util/ralloc.h"
-#include "util/u_format.h"
+#include "util/format/u_format.h"
  #include "util/u_pack_color.h"
  #include "pan_util.h"
  #include "pandecode/decode.h"
+#include "panfrost-quirks.h"
  
  /* panfrost_bo_access is here to help us keep track of batch accesses to BOs
   * and build a proper dependency graph such that batches can be pipelined for
@@ -70,7 +71,7 @@ panfrost_create_batch_fence(struct panfrost_batch *batch)
          pipe_reference_init(&fence->reference, 1);
          fence->ctx = batch->ctx;
          fence->batch = batch;
-        ret = drmSyncobjCreate(pan_screen(batch->ctx->base.screen)->fd, 0,
+        ret = drmSyncobjCreate(pan_device(batch->ctx->base.screen)->fd, 0,
                                 &fence->syncobj);
          assert(!ret);
  
@@ -80,7 +81,7 @@ panfrost_create_batch_fence(struct panfrost_batch *batch)
  static void
  panfrost_free_batch_fence(struct panfrost_batch_fence *fence)
  {
-        drmSyncobjDestroy(pan_screen(fence->ctx->base.screen)->fd,
+        drmSyncobjDestroy(pan_device(fence->ctx->base.screen)->fd,
                            fence->syncobj);
          ralloc_free(fence);
  }
@@ -113,9 +114,6 @@ panfrost_create_batch(struct panfrost_context *ctx,
          batch->maxx = batch->maxy = 0;
          batch->transient_offset = 0;
  
-        util_dynarray_init(&batch->headers, batch);
-        util_dynarray_init(&batch->gpu_headers, batch);
-        util_dynarray_init(&batch->dependencies, batch);
          batch->out_sync = panfrost_create_batch_fence(batch);
          util_copy_framebuffer_state(&batch->key, key);
  
@@ -304,7 +302,7 @@ panfrost_get_fresh_batch_for_fbo(struct panfrost_context *ctx)
           * Note that it's perfectly fine to re-use a batch with an
           * existing clear, we'll just update it with the new clear request.
           */
-        if (!batch->last_job.gpu)
+        if (!batch->first_job)
                  return batch;
  
          /* Otherwise, we need to freeze the existing one and instantiate a new
@@ -324,7 +322,7 @@ panfrost_batch_fence_is_signaled(struct panfrost_batch_fence *fence)
          if (fence->batch)
                  return false;
  
-        int ret = drmSyncobjWait(pan_screen(fence->ctx->base.screen)->fd,
+        int ret = drmSyncobjWait(pan_device(fence->ctx->base.screen)->fd,
                                   &fence->syncobj, 1, 0, 0, NULL);
  
          /* Cache whether the fence was signaled */
@@ -342,7 +340,9 @@ panfrost_bo_access_gc_fences(struct panfrost_context *ctx,
                  access->writer = NULL;
          }
  
-        unsigned nreaders = 0;
+        struct panfrost_batch_fence **readers_array = util_dynarray_begin(&access->readers);
+        struct panfrost_batch_fence **new_readers = readers_array;
+
          util_dynarray_foreach(&access->readers, struct panfrost_batch_fence *,
                                reader) {
                  if (!(*reader))
@@ -352,12 +352,15 @@ panfrost_bo_access_gc_fences(struct panfrost_context *ctx,
                          panfrost_batch_fence_unreference(*reader);
                          *reader = NULL;
                  } else {
-                        nreaders++;
+                        /* Build a new array of only unsignaled fences in-place */
+                        *(new_readers++) = *reader;
                  }
          }
  
-        if (!nreaders)
-                util_dynarray_clear(&access->readers);
+        if (!util_dynarray_resize(&access->readers, struct panfrost_batch_fence *,
+                                  new_readers - readers_array) &&
+            new_readers != readers_array)
+                unreachable("Invalid dynarray access->readers");
  }
  
  /* Collect signaled fences to keep the kernel-side syncobj-map small. The
@@ -379,8 +382,10 @@ panfrost_gc_fences(struct panfrost_context *ctx)
                  panfrost_bo_access_gc_fences(ctx, access, entry->key);
                  if (!util_dynarray_num_elements(&access->readers,
                                                  struct panfrost_batch_fence *) &&
-                    !access->writer)
+                    !access->writer) {
+                        ralloc_free(access);
                          _mesa_hash_table_remove(ctx->accessed_bos, entry);
+                }
          }
  }
  
@@ -598,12 +603,12 @@ panfrost_batch_create_bo(struct panfrost_batch *batch, size_t size,
  {
          struct panfrost_bo *bo;
  
-        bo = panfrost_bo_create(pan_screen(batch->ctx->base.screen), size,
+        bo = pan_bo_create(pan_device(batch->ctx->base.screen), size,
                                  create_flags);
          panfrost_batch_add_bo(batch, bo, access_flags);
  
          /* panfrost_batch_add_bo() has retained a reference and
-         * panfrost_bo_create() initialize the refcnt to 1, so let's
+         * pan_bo_create() initialize the refcnt to 1, so let's
           * unreference the BO here so it gets released when the batch is
           * destroyed (unless it's retained by someone else in the meantime).
           */
@@ -622,6 +627,7 @@ panfrost_batch_get_polygon_list(struct panfrost_batch *batch, unsigned size)
                  assert(batch->polygon_list->size >= size);
          } else {
                  /* Create the BO as invisible, as there's no reason to map */
+                size = util_next_power_of_two(size);
  
                  batch->polygon_list = panfrost_batch_create_bo(batch, size,
                                                                 PAN_BO_INVISIBLE,
@@ -635,21 +641,47 @@ panfrost_batch_get_polygon_list(struct panfrost_batch *batch, unsigned size)
  }
  
  struct panfrost_bo *
-panfrost_batch_get_scratchpad(struct panfrost_batch *batch)
+panfrost_batch_get_scratchpad(struct panfrost_batch *batch,
+                unsigned shift,
+                unsigned thread_tls_alloc,
+                unsigned core_count)
  {
-        if (batch->scratchpad)
-                return batch->scratchpad;
+        unsigned size = panfrost_get_total_stack_size(shift,
+                        thread_tls_alloc,
+                        core_count);
+
+        if (batch->scratchpad) {
+                assert(batch->scratchpad->size >= size);
+        } else {
+                batch->scratchpad = panfrost_batch_create_bo(batch, size,
+                                             PAN_BO_INVISIBLE,
+                                             PAN_BO_ACCESS_PRIVATE |
+                                             PAN_BO_ACCESS_RW |
+                                             PAN_BO_ACCESS_VERTEX_TILER |
+                                             PAN_BO_ACCESS_FRAGMENT);
+        }
  
-        batch->scratchpad = panfrost_batch_create_bo(batch, 64 * 4 * 4096,
-                                                     PAN_BO_INVISIBLE,
-                                                     PAN_BO_ACCESS_PRIVATE |
-                                                     PAN_BO_ACCESS_RW |
-                                                     PAN_BO_ACCESS_VERTEX_TILER |
-                                                     PAN_BO_ACCESS_FRAGMENT);
-        assert(batch->scratchpad);
          return batch->scratchpad;
  }
  
+struct panfrost_bo *
+panfrost_batch_get_shared_memory(struct panfrost_batch *batch,
+                unsigned size,
+                unsigned workgroup_count)
+{
+        if (batch->shared_memory) {
+                assert(batch->shared_memory->size >= size);
+        } else {
+                batch->shared_memory = panfrost_batch_create_bo(batch, size,
+                                             PAN_BO_INVISIBLE,
+                                             PAN_BO_ACCESS_PRIVATE |
+                                             PAN_BO_ACCESS_RW |
+                                             PAN_BO_ACCESS_VERTEX_TILER);
+        }
+
+        return batch->shared_memory;
+}
+
  struct panfrost_bo *
  panfrost_batch_get_tiler_heap(struct panfrost_batch *batch)
  {
@@ -670,11 +702,18 @@ panfrost_batch_get_tiler_heap(struct panfrost_batch *batch)
  struct panfrost_bo *
  panfrost_batch_get_tiler_dummy(struct panfrost_batch *batch)
  {
+        struct panfrost_device *dev = pan_device(batch->ctx->base.screen);
+
+        uint32_t create_flags = 0;
+
          if (batch->tiler_dummy)
                  return batch->tiler_dummy;
  
+        if (!(dev->quirks & MIDGARD_NO_HIER_TILING))
+                create_flags = PAN_BO_INVISIBLE;
+
          batch->tiler_dummy = panfrost_batch_create_bo(batch, 4096,
-                                                      PAN_BO_INVISIBLE,
+                                                      create_flags,
                                                        PAN_BO_ACCESS_PRIVATE |
                                                        PAN_BO_ACCESS_RW |
                                                        PAN_BO_ACCESS_VERTEX_TILER |
@@ -699,7 +738,7 @@ panfrost_batch_draw_wallpaper(struct panfrost_batch *batch)
          /* No draw calls, and no clear on the depth/stencil bufs.
           * Drawing the wallpaper would be useless.
           */
-        if (!batch->last_tiler.gpu &&
+        if (!batch->tiler_dep &&
              !(batch->clear & PIPE_CLEAR_DEPTHSTENCIL))
                  return;
  
@@ -750,9 +789,11 @@ panfrost_batch_draw_wallpaper(struct panfrost_batch *batch)
          damage.maxx = MIN2(batch->maxx,
                             rsrc->damage.biggest_rect.x +
                             rsrc->damage.biggest_rect.width);
+        damage.maxx = MAX2(damage.maxx, damage.minx);
          damage.maxy = MIN2(batch->maxy,
                             rsrc->damage.biggest_rect.y +
                             rsrc->damage.biggest_rect.height);
+        damage.maxy = MAX2(damage.maxy, damage.miny);
  
          /* One damage rectangle means we can end up with at most 4 reload
           * regions:
@@ -803,13 +844,13 @@ panfrost_batch_submit_ioctl(struct panfrost_batch *batch,
  {
          struct panfrost_context *ctx = batch->ctx;
          struct pipe_context *gallium = (struct pipe_context *) ctx;
-        struct panfrost_screen *screen = pan_screen(gallium->screen);
+        struct panfrost_device *dev = pan_device(gallium->screen);
          struct drm_panfrost_submit submit = {0,};
          uint32_t *bo_handles, *in_syncs = NULL;
          bool is_fragment_shader;
          int ret;
  
-        is_fragment_shader = (reqs & PANFROST_JD_REQ_FS) && batch->first_job.gpu;
+        is_fragment_shader = (reqs & PANFROST_JD_REQ_FS) && batch->first_job;
          if (is_fragment_shader)
                  submit.in_sync_count = 1;
          else
@@ -860,21 +901,24 @@ panfrost_batch_submit_ioctl(struct panfrost_batch *batch,
          }
  
          submit.bo_handles = (u64) (uintptr_t) bo_handles;
-        ret = drmIoctl(screen->fd, DRM_IOCTL_PANFROST_SUBMIT, &submit);
+        ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_SUBMIT, &submit);
          free(bo_handles);
          free(in_syncs);
  
          if (ret) {
-                fprintf(stderr, "Error submitting: %m\n");
+                DBG("Error submitting: %m\n");
                  return errno;
          }
  
          /* Trace the job if we're doing that */
-        if (pan_debug & PAN_DBG_TRACE) {
+        if (pan_debug & (PAN_DBG_TRACE | PAN_DBG_SYNC)) {
                  /* Wait so we can get errors reported back */
-                drmSyncobjWait(screen->fd, &batch->out_sync->syncobj, 1,
+                drmSyncobjWait(dev->fd, &batch->out_sync->syncobj, 1,
                                 INT64_MAX, 0, NULL);
-                pandecode_jc(submit.jc, FALSE);
+
+                /* Trace gets priority over sync */
+                bool minimal = !(pan_debug & PAN_DBG_TRACE);
+                pandecode_jc(submit.jc, dev->quirks & IS_BIFROST, dev->gpu_id, minimal);
          }
  
          return 0;
@@ -883,17 +927,16 @@ panfrost_batch_submit_ioctl(struct panfrost_batch *batch,
  static int
  panfrost_batch_submit_jobs(struct panfrost_batch *batch)
  {
-        bool has_draws = batch->first_job.gpu;
+        bool has_draws = batch->first_job;
          int ret = 0;
  
          if (has_draws) {
-                ret = panfrost_batch_submit_ioctl(batch, batch->first_job.gpu, 0);
+                ret = panfrost_batch_submit_ioctl(batch, batch->first_job, 0);
                  assert(!ret);
          }
  
-        if (batch->first_tiler.gpu || batch->clear) {
+        if (batch->tiler_dep || batch->clear) {
                  mali_ptr fragjob = panfrost_fragment_job(batch, has_draws);
-
                  ret = panfrost_batch_submit_ioctl(batch, fragjob, PANFROST_JD_REQ_FS);
                  assert(!ret);
          }
@@ -916,7 +959,7 @@ panfrost_batch_submit(struct panfrost_batch *batch)
          int ret;
  
          /* Nothing to do! */
-        if (!batch->last_job.gpu && !batch->clear) {
+        if (!batch->first_job && !batch->clear) {
                  /* Mark the fence as signaled so the fence logic does not try
                   * to wait on it.
                   */
@@ -926,12 +969,45 @@ panfrost_batch_submit(struct panfrost_batch *batch)
  
          panfrost_batch_draw_wallpaper(batch);
  
-        panfrost_scoreboard_link_batch(batch);
+        /* Now that all draws are in, we can finally prepare the
+         * FBD for the batch */
+
+        if (batch->framebuffer.gpu && batch->first_job) {
+                struct panfrost_context *ctx = batch->ctx;
+                struct pipe_context *gallium = (struct pipe_context *) ctx;
+                struct panfrost_device *dev = pan_device(gallium->screen);
+
+                if (dev->quirks & MIDGARD_SFBD)
+                        panfrost_attach_sfbd(batch, ~0);
+                else
+                        panfrost_attach_mfbd(batch, ~0);
+        }
+
+        panfrost_scoreboard_initialize_tiler(batch);
  
          ret = panfrost_batch_submit_jobs(batch);
  
          if (ret)
-                fprintf(stderr, "panfrost_batch_submit failed: %d\n", ret);
+                DBG("panfrost_batch_submit failed: %d\n", ret);
+
+        /* We must reset the damage info of our render targets here even
+         * though a damage reset normally happens when the DRI layer swaps
+         * buffers. That's because there can be implicit flushes the GL
+         * app is not aware of, and those might impact the damage region: if
+         * part of the damaged portion is drawn during those implicit flushes,
+         * you have to reload those areas before next draws are pushed, and
+         * since the driver can't easily know what's been modified by the draws
+         * it flushed, the easiest solution is to reload everything.
+         */
+        for (unsigned i = 0; i < batch->key.nr_cbufs; i++) {
+                struct panfrost_resource *res;
+
+                if (!batch->key.cbufs[i])
+                        continue;
+
+                res = pan_resource(batch->key.cbufs[i]->texture);
+                panfrost_resource_reset_damage(res);
+        }
  
  out:
          panfrost_freeze_batch(batch);
@@ -972,7 +1048,7 @@ panfrost_flush_all_batches(struct panfrost_context *ctx, bool wait)
          if (!wait)
                  return;
  
-        drmSyncobjWait(pan_screen(ctx->base.screen)->fd,
+        drmSyncobjWait(pan_device(ctx->base.screen)->fd,
                         util_dynarray_begin(&syncobjs),
                         util_dynarray_num_elements(&syncobjs, uint32_t),
                         INT64_MAX, DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL, NULL);
@@ -1051,6 +1127,22 @@ panfrost_batch_set_requirements(struct panfrost_batch *batch)
                  batch->requirements |= PAN_REQ_DEPTH_WRITE;
  }
  
+void
+panfrost_batch_adjust_stack_size(struct panfrost_batch *batch)
+{
+        struct panfrost_context *ctx = batch->ctx;
+
+        for (unsigned i = 0; i < PIPE_SHADER_TYPES; ++i) {
+                struct panfrost_shader_state *ss;
+
+                ss = panfrost_get_shader_state(ctx, i);
+                if (!ss)
+                        continue;
+
+                batch->stack_size = MAX2(batch->stack_size, ss->stack_size);
+        }
+}
+
  /* Helper to smear a 32-bit color across 128-bit components */
  
  static void
@@ -1084,10 +1176,10 @@ pan_pack_color(uint32_t *packed, const union pipe_color_union *color, enum pipe_
  
          if (util_format_is_rgba8_variant(desc)) {
                  pan_pack_color_32(packed,
-                                  (float_to_ubyte(clear_alpha) << 24) |
-                                  (float_to_ubyte(color->f[2]) << 16) |
-                                  (float_to_ubyte(color->f[1]) <<  8) |
-                                  (float_to_ubyte(color->f[0]) <<  0));
+                                  ((uint32_t) float_to_ubyte(clear_alpha) << 24) |
+                                  ((uint32_t) float_to_ubyte(color->f[2]) << 16) |
+                                  ((uint32_t) float_to_ubyte(color->f[1]) <<  8) |
+                                  ((uint32_t) float_to_ubyte(color->f[0]) <<  0));
          } else if (format == PIPE_FORMAT_B5G6R5_UNORM) {
                  /* First, we convert the components to R5, G6, B5 separately */
                  unsigned r5 = CLAMP(color->f[0], 0.0, 1.0) * 31.0;
@@ -1135,6 +1227,8 @@ pan_pack_color(uint32_t *packed, const union pipe_color_union *color, enum pipe_
                          pan_pack_color_32(packed, out.ui[0] | (out.ui[0] << 16));
                  else if (size == 3 || size == 4)
                          pan_pack_color_32(packed, out.ui[0]);
+                else if (size == 6)
+                        pan_pack_color_64(packed, out.ui[0], out.ui[1] | (out.ui[1] << 16)); /* RGB16F -- RGBB */
                  else if (size == 8)
                          pan_pack_color_64(packed, out.ui[0], out.ui[1]);
                  else if (size == 16)
@@ -1219,7 +1313,7 @@ panfrost_batch_intersection_scissor(struct panfrost_batch *batch,
          batch->maxy = MIN2(batch->maxy, maxy);
  }
  
-/* Are we currently rendering to the screen (rather than an FBO)? */
+/* Are we currently rendering to the dev (rather than an FBO)? */
  
  bool
  panfrost_batch_is_scanout(struct panfrost_batch *batch)