panfrost: Rewrite scoreboarding routines

author Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>

Sun, 16 Feb 2020 19:59:11 +0000 (14:59 -0500)

committer Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>

Tue, 18 Feb 2020 13:45:21 +0000 (08:45 -0500)
author Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Sun, 16 Feb 2020 19:59:11 +0000 (14:59 -0500)
committer Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Tue, 18 Feb 2020 13:45:21 +0000 (08:45 -0500)
diff --git a/src/gallium/drivers/panfrost/pan_compute.c b/src/gallium/drivers/panfrost/pan_compute.c

index 1901f58dda7f73654b268f818d5ab406805ef452..9747c5cdab7bae9fb764280b6b5b43c4249403b5 100644 (file)
--- a/src/gallium/drivers/panfrost/pan_compute.c
+++ b/src/gallium/drivers/panfrost/pan_compute.c
@@ -104,12 +104,6 @@ panfrost_launch_grid(struct pipe_context *pipe,
  
          ctx->compute_grid = info;
  
-        struct mali_job_descriptor_header job = {
-                .job_type = JOB_TYPE_COMPUTE,
-                .job_descriptor_size = 1,
-                .job_barrier = 1
-        };
-
          /* TODO: Stub */
          struct midgard_payload_vertex_tiler *payload = &ctx->payloads[PIPE_SHADER_COMPUTE];
          struct panfrost_shader_variants *all = ctx->shader[PIPE_SHADER_COMPUTE];
@@ -152,15 +146,7 @@ panfrost_launch_grid(struct pipe_context *pipe,
                          info->grid[0], info->grid[1], info->grid[2],
                          info->block[0], info->block[1], info->block[2], false);
  
-        /* Upload the payload */
-
-        struct panfrost_transfer transfer = panfrost_allocate_transient(batch, sizeof(job) + sizeof(*payload));
-        memcpy(transfer.cpu, &job, sizeof(job));
-        memcpy(transfer.cpu + sizeof(job), payload, sizeof(*payload));
-
-        /* Queue the job */
-        panfrost_scoreboard_queue_compute_job(batch, transfer);
-
+        panfrost_new_job(batch, JOB_TYPE_COMPUTE, true, 0, payload, sizeof(*payload), false);
          panfrost_flush_all_batches(ctx, true);
  }
  
diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c

index 9bb2cf7923ab12332aabe026307873b28bb27130..6e03839e8ea24e582d0ba7021aca339d56274858 100644 (file)
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -359,29 +359,6 @@ panfrost_default_shader_backend(struct panfrost_context *ctx)
          memcpy(&ctx->fragment_shader_core, &shader, sizeof(shader));
  }
  
-/* Generates a vertex/tiler job. This is, in some sense, the heart of the
- * graphics command stream. It should be called once per draw, accordding to
- * presentations. Set is_tiler for "tiler" jobs (fragment shader jobs, but in
- * Mali parlance, "fragment" refers to framebuffer writeout). Clear it for
- * vertex jobs. */
-
-struct panfrost_transfer
-panfrost_vertex_tiler_job(struct panfrost_context *ctx, bool is_tiler)
-{
-        struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
-        struct mali_job_descriptor_header job = {
-                .job_type = is_tiler ? JOB_TYPE_TILER : JOB_TYPE_VERTEX,
-                .job_descriptor_size = 1,
-        };
-
-        struct midgard_payload_vertex_tiler *payload = is_tiler ? &ctx->payloads[PIPE_SHADER_FRAGMENT] : &ctx->payloads[PIPE_SHADER_VERTEX];
-
-        struct panfrost_transfer transfer = panfrost_allocate_transient(batch, sizeof(job) + sizeof(*payload));
-        memcpy(transfer.cpu, &job, sizeof(job));
-        memcpy(transfer.cpu + sizeof(job), payload, sizeof(*payload));
-        return transfer;
-}
-
  mali_ptr
  panfrost_vertex_buffer_address(struct panfrost_context *ctx, unsigned i)
  {
@@ -1272,20 +1249,23 @@ panfrost_queue_draw(struct panfrost_context *ctx)
          bool rasterizer_discard = ctx->rasterizer
                                    && ctx->rasterizer->base.rasterizer_discard;
  
-        struct panfrost_transfer vertex = panfrost_vertex_tiler_job(ctx, false);
-        struct panfrost_transfer tiler;
  
-        if (!rasterizer_discard)
-                tiler = panfrost_vertex_tiler_job(ctx, true);
+        struct midgard_payload_vertex_tiler *vertex_payload = &ctx->payloads[PIPE_SHADER_VERTEX];
+        struct midgard_payload_vertex_tiler *tiler_payload = &ctx->payloads[PIPE_SHADER_FRAGMENT];
  
          struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
+        bool wallpapering = ctx->wallpaper_batch && batch->tiler_dep;
  
-        if (rasterizer_discard)
-                panfrost_scoreboard_queue_vertex_job(batch, vertex, FALSE);
-        else if (ctx->wallpaper_batch && batch->first_tiler.gpu)
-                panfrost_scoreboard_queue_fused_job_prepend(batch, vertex, tiler);
-        else
-                panfrost_scoreboard_queue_fused_job(batch, vertex, tiler);
+        if (wallpapering) {
+                /* Inject in reverse order, with "predicted" job indices. THIS IS A HACK XXX */
+                panfrost_new_job(batch, JOB_TYPE_TILER, false, batch->job_index + 2, tiler_payload, sizeof(*tiler_payload), true);
+                panfrost_new_job(batch, JOB_TYPE_VERTEX, false, 0, vertex_payload, sizeof(*vertex_payload), true);
+        } else  {
+                unsigned vertex = panfrost_new_job(batch, JOB_TYPE_VERTEX, false, 0, vertex_payload, sizeof(*vertex_payload), false);
+
+                if (!rasterizer_discard)
+                        panfrost_new_job(batch, JOB_TYPE_TILER, false, vertex, tiler_payload, sizeof(*tiler_payload), false);
+        }
  
          for (unsigned i = 0; i < PIPE_SHADER_TYPES; ++i) {
                  struct panfrost_shader_variants *all = ctx->shader[i];
diff --git a/src/gallium/drivers/panfrost/pan_context.h b/src/gallium/drivers/panfrost/pan_context.h

index b2736d46d2471d16e1b9deea6b0284739f841290..f33f6ec846ccb4775a803b4abef3c8d9a195086a 100644 (file)
--- a/src/gallium/drivers/panfrost/pan_context.h
+++ b/src/gallium/drivers/panfrost/pan_context.h
@@ -317,8 +317,7 @@ struct midgard_tiler_descriptor
  panfrost_emit_midg_tiler(struct panfrost_batch *batch, unsigned vertex_count);
  
  mali_ptr
-panfrost_fragment_job(struct panfrost_batch *batch, bool has_draws,
-                      struct mali_job_descriptor_header **header_cpu);
+panfrost_fragment_job(struct panfrost_batch *batch, bool has_draws);
  
  void
  panfrost_shader_compile(
diff --git a/src/gallium/drivers/panfrost/pan_fragment.c b/src/gallium/drivers/panfrost/pan_fragment.c

index 88b2db3c94900deeae95c1714006d218de500853..e2d71c57a10b2271b206785a97a1271f5bae205c 100644 (file)
--- a/src/gallium/drivers/panfrost/pan_fragment.c
+++ b/src/gallium/drivers/panfrost/pan_fragment.c
@@ -49,8 +49,7 @@ panfrost_initialize_surface(
   * presentations, this is supposed to correspond to eglSwapBuffers) */
  
  mali_ptr
-panfrost_fragment_job(struct panfrost_batch *batch, bool has_draws,
-                      struct mali_job_descriptor_header **header_cpu)
+panfrost_fragment_job(struct panfrost_batch *batch, bool has_draws)
  {
          struct panfrost_screen *screen = pan_screen(batch->ctx->base.screen);
  
@@ -105,6 +104,5 @@ panfrost_fragment_job(struct panfrost_batch *batch, bool has_draws,
          struct panfrost_transfer transfer = panfrost_allocate_transient(batch, sizeof(header) + sizeof(payload));
          memcpy(transfer.cpu, &header, sizeof(header));
          memcpy(transfer.cpu + sizeof(header), &payload, sizeof(payload));
-        *header_cpu = (struct mali_job_descriptor_header *)transfer.cpu;
          return transfer.gpu;
  }
diff --git a/src/gallium/drivers/panfrost/pan_job.c b/src/gallium/drivers/panfrost/pan_job.c

index 35d5e0be67e6caa5b71ba69a5bb54483fe21a8b4..62ce2e36bc9b8e4cc48117cc967480a39b586562 100644 (file)
--- a/src/gallium/drivers/panfrost/pan_job.c
+++ b/src/gallium/drivers/panfrost/pan_job.c
@@ -114,9 +114,6 @@ panfrost_create_batch(struct panfrost_context *ctx,
          batch->maxx = batch->maxy = 0;
          batch->transient_offset = 0;
  
-        util_dynarray_init(&batch->headers, batch);
-        util_dynarray_init(&batch->gpu_headers, batch);
-        util_dynarray_init(&batch->dependencies, batch);
          batch->out_sync = panfrost_create_batch_fence(batch);
          util_copy_framebuffer_state(&batch->key, key);
  
@@ -181,9 +178,6 @@ panfrost_free_batch(struct panfrost_batch *batch)
                  panfrost_batch_fence_unreference(*dep);
          }
  
-        util_dynarray_fini(&batch->headers);
-        util_dynarray_fini(&batch->gpu_headers);
-
          /* The out_sync fence lifetime is different from the the batch one
           * since other batches might want to wait on a fence of already
           * submitted/signaled batch. All we need to do here is make sure the
@@ -308,7 +302,7 @@ panfrost_get_fresh_batch_for_fbo(struct panfrost_context *ctx)
           * Note that it's perfectly fine to re-use a batch with an
           * existing clear, we'll just update it with the new clear request.
           */
-        if (!batch->last_job.gpu)
+        if (!batch->first_job)
                  return batch;
  
          /* Otherwise, we need to freeze the existing one and instantiate a new
@@ -744,7 +738,7 @@ panfrost_batch_draw_wallpaper(struct panfrost_batch *batch)
          /* No draw calls, and no clear on the depth/stencil bufs.
           * Drawing the wallpaper would be useless.
           */
-        if (!batch->last_tiler.gpu &&
+        if (!batch->tiler_dep &&
              !(batch->clear & PIPE_CLEAR_DEPTHSTENCIL))
                  return;
  
@@ -846,8 +840,7 @@ panfrost_batch_draw_wallpaper(struct panfrost_batch *batch)
  static int
  panfrost_batch_submit_ioctl(struct panfrost_batch *batch,
                              mali_ptr first_job_desc,
-                            uint32_t reqs,
-                            struct mali_job_descriptor_header *header)
+                            uint32_t reqs)
  {
          struct panfrost_context *ctx = batch->ctx;
          struct pipe_context *gallium = (struct pipe_context *) ctx;
@@ -857,7 +850,7 @@ panfrost_batch_submit_ioctl(struct panfrost_batch *batch,
          bool is_fragment_shader;
          int ret;
  
-        is_fragment_shader = (reqs & PANFROST_JD_REQ_FS) && batch->first_job.gpu;
+        is_fragment_shader = (reqs & PANFROST_JD_REQ_FS) && batch->first_job;
          if (is_fragment_shader)
                  submit.in_sync_count = 1;
          else
@@ -934,20 +927,17 @@ panfrost_batch_submit_ioctl(struct panfrost_batch *batch,
  static int
  panfrost_batch_submit_jobs(struct panfrost_batch *batch)
  {
-        bool has_draws = batch->first_job.gpu;
-        struct mali_job_descriptor_header *header;
+        bool has_draws = batch->first_job;
          int ret = 0;
  
          if (has_draws) {
-                header = (struct mali_job_descriptor_header *)batch->first_job.cpu;
-                ret = panfrost_batch_submit_ioctl(batch, batch->first_job.gpu, 0, header);
+                ret = panfrost_batch_submit_ioctl(batch, batch->first_job, 0);
                  assert(!ret);
          }
  
-        if (batch->first_tiler.gpu || batch->clear) {
-                mali_ptr fragjob = panfrost_fragment_job(batch, has_draws, &header);
-
-                ret = panfrost_batch_submit_ioctl(batch, fragjob, PANFROST_JD_REQ_FS, header);
+        if (batch->tiler_dep || batch->clear) {
+                mali_ptr fragjob = panfrost_fragment_job(batch, has_draws);
+                ret = panfrost_batch_submit_ioctl(batch, fragjob, PANFROST_JD_REQ_FS);
                  assert(!ret);
          }
  
@@ -969,7 +959,7 @@ panfrost_batch_submit(struct panfrost_batch *batch)
          int ret;
  
          /* Nothing to do! */
-        if (!batch->last_job.gpu && !batch->clear) {
+        if (!batch->first_job && !batch->clear) {
                  /* Mark the fence as signaled so the fence logic does not try
                   * to wait on it.
                   */
@@ -982,7 +972,7 @@ panfrost_batch_submit(struct panfrost_batch *batch)
          /* Now that all draws are in, we can finally prepare the
           * FBD for the batch */
  
-        if (batch->framebuffer.gpu && batch->first_job.gpu) {
+        if (batch->framebuffer.gpu && batch->first_job) {
                  struct panfrost_context *ctx = batch->ctx;
                  struct pipe_context *gallium = (struct pipe_context *) ctx;
                  struct panfrost_screen *screen = pan_screen(gallium->screen);
@@ -993,7 +983,7 @@ panfrost_batch_submit(struct panfrost_batch *batch)
                          panfrost_attach_mfbd(batch, ~0);
          }
  
-        panfrost_scoreboard_link_batch(batch);
+        panfrost_scoreboard_initialize_tiler(batch);
  
          ret = panfrost_batch_submit_jobs(batch);
  
diff --git a/src/gallium/drivers/panfrost/pan_job.h b/src/gallium/drivers/panfrost/pan_job.h

index 55da645530296c82e53fc0f1cdd9c32c1826a2ed..59279925e36412ed351ab8f5915defad98ad22f9 100644 (file)
--- a/src/gallium/drivers/panfrost/pan_job.h
+++ b/src/gallium/drivers/panfrost/pan_job.h
@@ -99,40 +99,22 @@ struct panfrost_batch {
          unsigned minx, miny;
          unsigned maxx, maxy;
  
-        /* CPU pointers to the job descriptor headers. next_job is only
-         * set at submit time (since only then are all the dependencies
-         * known). The upshot is that this is append-only.
-         *
-         * These arrays contain the headers for the "primary batch", our jargon
-         * referring to the part of the panfrost_job that actually contains
-         * meaningful work. In an OpenGL ES setting, that means the
-         * WRITE_VALUE/VERTEX/TILER jobs. Excluded is specifically the FRAGMENT
-         * job, which is sent on as a secondary batch containing only a single
-         * hardware job. Since there's one and only one FRAGMENT job issued per
-         * panfrost_job, there is no need to do any scoreboarding / management;
-         * it's easy enough to open-code it and it's not like we can get any
-         * better anyway. */
-        struct util_dynarray headers;
-
-        /* (And the GPU versions; TODO maybe combine) */
-        struct util_dynarray gpu_headers;
-
-        /* The last job in the primary batch */
-        struct panfrost_transfer last_job;
-
-        /* The first/last tiler job */
-        struct panfrost_transfer first_tiler;
-        struct panfrost_transfer last_tiler;
-
-        /* The first vertex job used as the input to a tiler job */
-        struct panfrost_transfer first_vertex_for_tiler;
-
-        /* The first job. Notice we've created a linked list */
-        struct panfrost_transfer first_job;
+        /* The first job in the batch */
+        mali_ptr first_job;
  
          /* The number of jobs in the primary batch, essentially */
          unsigned job_index;
  
+        /* A CPU-side pointer to the previous job for next_job linking */
+        struct mali_job_descriptor_header *prev_job;
+
+        /* The dependency for tiler jobs (i.e. the index of the last emitted
+         * tiler job, or zero if none have been emitted) */
+        unsigned tiler_dep;
+
+        /* The job index of the WRITE_VALUE job (before it has been created) */
+        unsigned write_value_index;
+
          /* BOs referenced -- will be used for flushing logic */
          struct hash_table *bos;
  
@@ -241,35 +223,16 @@ panfrost_batch_intersection_scissor(struct panfrost_batch *batch,
  
  /* Scoreboarding */
  
-void
-panfrost_scoreboard_queue_compute_job(
-        struct panfrost_batch *batch,
-        struct panfrost_transfer job);
-
-void
-panfrost_scoreboard_queue_vertex_job(
-        struct panfrost_batch *batch,
-        struct panfrost_transfer vertex,
-        bool requires_tiling);
-
-void
-panfrost_scoreboard_queue_tiler_job(
-        struct panfrost_batch *batch,
-        struct panfrost_transfer tiler);
-
-void
-panfrost_scoreboard_queue_fused_job(
-        struct panfrost_batch *batch,
-        struct panfrost_transfer vertex,
-        struct panfrost_transfer tiler);
-void
-panfrost_scoreboard_queue_fused_job_prepend(
-        struct panfrost_batch *batch,
-        struct panfrost_transfer vertex,
-        struct panfrost_transfer tiler);
+unsigned
+panfrost_new_job(
+                struct panfrost_batch *batch,
+                enum mali_job_type type,
+                bool barrier,
+                unsigned local_dep,
+                void *payload, size_t payload_size,
+                bool inject);
  
-void
-panfrost_scoreboard_link_batch(struct panfrost_batch *batch);
+void panfrost_scoreboard_initialize_tiler(struct panfrost_batch *batch);
  
  bool
  panfrost_batch_is_scanout(struct panfrost_batch *batch);
diff --git a/src/gallium/drivers/panfrost/pan_scoreboard.c b/src/gallium/drivers/panfrost/pan_scoreboard.c

index 927a6f61f6b3313f9d630defdbd832d7df53493b..0e27a0ae64ce7b854f5fd623ee2f6437561c8d4a 100644 (file)
--- a/src/gallium/drivers/panfrost/pan_scoreboard.c
+++ b/src/gallium/drivers/panfrost/pan_scoreboard.c
@@ -100,170 +100,96 @@
   *
   */
  
-/* Coerce a panfrost_transfer to a header */
-
-static inline struct mali_job_descriptor_header *
-job_descriptor_header(struct panfrost_transfer t)
+/* Generates, uploads, and queues a a new job. All fields are written in order
+ * except for next_job accounting (TODO: Should we be clever and defer the
+ * upload of the header here until next job to keep the access pattern totally
+ * linear? Or is that just a micro op at this point?). Returns the generated
+ * index for dep management.
+ *
+ * Inject is used to inject a job at the front, for wallpapering. If you are
+ * not wallpapering and set this, dragons will eat you. */
+
+unsigned
+panfrost_new_job(
+                struct panfrost_batch *batch,
+                enum mali_job_type type,
+                bool barrier,
+                unsigned local_dep,
+                void *payload, size_t payload_size,
+                bool inject)
  {
-        return (struct mali_job_descriptor_header *) t.cpu;
-}
+        unsigned global_dep = 0;
+
+        if (type == JOB_TYPE_TILER) {
+                /* Tiler jobs must be chained, and the first tiler job must
+                 * depend on the write value job, whose index we reserve now */
+
+                if (batch->tiler_dep)
+                        global_dep = batch->tiler_dep;
+                else {
+                        batch->write_value_index = ++batch->job_index;
+                        global_dep = batch->write_value_index;
+                }
+        }
  
-static void
-panfrost_assign_index(
-        struct panfrost_batch *batch,
-        struct panfrost_transfer transfer)
-{
          /* Assign the index */
          unsigned index = ++batch->job_index;
-        job_descriptor_header(transfer)->job_index = index;
-}
  
-/* Helper to add a dependency to a job */
+        struct mali_job_descriptor_header job = {
+                .job_descriptor_size = 1,
+                .job_type = type,
+                .job_barrier = barrier,
+                .job_index = index,
+                .job_dependency_index_1 = local_dep,
+                .job_dependency_index_2 = global_dep,
+        };
  
-static void
-panfrost_add_dependency(
-        struct panfrost_transfer depender,
-        struct panfrost_transfer dependent)
-{
+        if (inject)
+                job.next_job = batch->first_job;
  
-        struct mali_job_descriptor_header *first =
-                job_descriptor_header(dependent);
+        struct panfrost_transfer transfer = panfrost_allocate_transient(batch, sizeof(job) + payload_size);
+        memcpy(transfer.cpu, &job, sizeof(job));
+        memcpy(transfer.cpu + sizeof(job), payload, payload_size);
  
-        struct mali_job_descriptor_header *second =
-                job_descriptor_header(depender);
+        if (inject) {
+                batch->first_job = transfer.gpu;
+                return index;
+        }
  
-        /* Look for an open slot */
+        /* Form a chain */
+        if (type == JOB_TYPE_TILER)
+                batch->tiler_dep = index;
  
-        if (!second->job_dependency_index_1)
-                second->job_dependency_index_1 = first->job_index;
-        else if (!second->job_dependency_index_2)
-                second->job_dependency_index_2 = first->job_index;
+        if (batch->prev_job)
+                batch->prev_job->next_job = transfer.gpu;
          else
-                unreachable("No available slot for new dependency");
-}
-
-/* Queues a job WITHOUT updating pointers. Be careful. */
-
-static void
-panfrost_scoreboard_queue_job_internal(
-        struct panfrost_batch *batch,
-        struct panfrost_transfer job)
-{
-        panfrost_assign_index(batch, job);
-
-        /* Queue a pointer to the job */
-        util_dynarray_append(&batch->headers, void*, job.cpu);
-        util_dynarray_append(&batch->gpu_headers, mali_ptr, job.gpu);
-}
-
-
-/* Queues a compute job, with no special dependencies. This is a bit of a
- * misnomer -- internally, all job types are queued with this function, but
- * outside of this file, it's for pure compute jobs */
-
-void
-panfrost_scoreboard_queue_compute_job(
-        struct panfrost_batch *batch,
-        struct panfrost_transfer job)
-{
-        panfrost_scoreboard_queue_job_internal(batch, job);
-
-        /* Update the linked list metadata as appropriate */
-        batch->last_job = job;
-
-        if (!batch->first_job.gpu)
-                batch->first_job = job;
-}
-
-/* Queues a vertex job. There are no special dependencies yet, but if
- * tiling is required (anytime 'rasterize discard' is disabled), we have
- * some extra bookkeeping for later */
-
-void
-panfrost_scoreboard_queue_vertex_job(
-        struct panfrost_batch *batch,
-        struct panfrost_transfer vertex,
-        bool requires_tiling)
-{
-        panfrost_scoreboard_queue_compute_job(batch, vertex);
+                batch->first_job = transfer.gpu;
  
-        if (requires_tiling && !batch->first_vertex_for_tiler.gpu)
-                batch->first_vertex_for_tiler = vertex;
+        batch->prev_job = (struct mali_job_descriptor_header *) transfer.cpu;
+        return index;
  }
  
-/* Queues a tiler job, respecting the dependency of each tiler job on the
- * previous */
+/* Generates a write value job, used to initialize the tiler structures. Note
+ * this is called right before frame submission. */
  
  void
-panfrost_scoreboard_queue_tiler_job(
-        struct panfrost_batch *batch,
-        struct panfrost_transfer tiler)
-{
-        panfrost_scoreboard_queue_compute_job(batch, tiler);
-
-        if (!batch->first_tiler.gpu)
-                batch->first_tiler = tiler;
-
-        if (batch->last_tiler.gpu)
-                panfrost_add_dependency(tiler, batch->last_tiler);
-
-        batch->last_tiler = tiler;
-}
-
-/* Queues a fused (vertex/tiler) job, or a pair of vertex/tiler jobs if
- * fused jobs are not supported (the default until Bifrost rolls out) */
-
-void
-panfrost_scoreboard_queue_fused_job(
-        struct panfrost_batch *batch,
-        struct panfrost_transfer vertex,
-        struct panfrost_transfer tiler)
-{
-        panfrost_scoreboard_queue_vertex_job(batch, vertex, true);
-        panfrost_scoreboard_queue_tiler_job(batch, tiler);
-        panfrost_add_dependency(tiler, vertex);
-}
-
-/* Queues a fused (vertex/tiler) job prepended *before* the usual set, used for
- * wallpaper blits */
-
-void
-panfrost_scoreboard_queue_fused_job_prepend(
-        struct panfrost_batch *batch,
-        struct panfrost_transfer vertex,
-        struct panfrost_transfer tiler)
+panfrost_scoreboard_initialize_tiler(struct panfrost_batch *batch)
  {
-        /* Sanity check */
-        assert(batch->last_tiler.gpu);
-        assert(batch->first_tiler.gpu);
-
-        /* First, we add the vertex job directly to the queue, forcing it to
-         * the front */
-
-        panfrost_scoreboard_queue_job_internal(batch, vertex);
-        batch->first_job = vertex;
-        batch->first_vertex_for_tiler = vertex;
-
-        /* Similarly, we add the tiler job directly to the queue, forcing it to
-         * the front (second place), manually setting the tiler on vertex
-         * dependency (since this is pseudofused) and forcing a dependency of
-         * the now-second tiler on us (since all tiler jobs are linked in order
-         * and we're injecting ourselves at the front) */
+        /* Check if we even need tiling */
+        if (!batch->tiler_dep)
+                return;
  
-        panfrost_scoreboard_queue_job_internal(batch, tiler);
-        panfrost_add_dependency(tiler, vertex);
-        panfrost_add_dependency(batch->first_tiler, tiler);
-        batch->first_tiler = tiler;
-}
+        /* Okay, we do. Let's generate it. We'll need the job's polygon list
+         * regardless of size. */
  
-/* Generates a write value job, used to initialize the tiler structures. */
+        mali_ptr polygon_list = panfrost_batch_get_polygon_list(batch,
+                MALI_TILER_MINIMUM_HEADER_SIZE);
  
-static struct panfrost_transfer
-panfrost_write_value_job(struct panfrost_batch *batch, mali_ptr polygon_list)
-{
          struct mali_job_descriptor_header job = {
                  .job_type = JOB_TYPE_WRITE_VALUE,
+                .job_index = batch->write_value_index,
                  .job_descriptor_size = 1,
+                .next_job = batch->first_job
          };
  
          struct mali_payload_write_value payload = {
@@ -275,224 +201,5 @@ panfrost_write_value_job(struct panfrost_batch *batch, mali_ptr polygon_list)
          memcpy(transfer.cpu, &job, sizeof(job));
          memcpy(transfer.cpu + sizeof(job), &payload, sizeof(payload));
  
-        return transfer;
-}
-
-/* If there are any tiler jobs, we need to initialize the tiler by writing
- * zeroes to a magic tiler structure. We do so via a WRITE_VALUE job linked to
- * the first vertex job feeding into tiling. */
-
-static void
-panfrost_scoreboard_initialize_tiler(struct panfrost_batch *batch)
-{
-        /* Check if we even need tiling */
-        if (!batch->last_tiler.gpu)
-                return;
-
-        /* Okay, we do. Let's generate it. We'll need the job's polygon list
-         * regardless of size. */
-
-        mali_ptr polygon_list = panfrost_batch_get_polygon_list(batch,
-                MALI_TILER_MINIMUM_HEADER_SIZE);
-
-        struct panfrost_transfer job =
-                panfrost_write_value_job(batch, polygon_list);
-
-        /* Queue it */
-        panfrost_scoreboard_queue_compute_job(batch, job);
-
-        /* Tiler jobs need us */
-        panfrost_add_dependency(batch->first_tiler, job);
-}
-
-/* Once all jobs have been added to a batch and we're ready to submit, we need
- * to order them to set each of the next_job fields, obeying the golden rule:
- * "A job's dependencies must appear earlier in the job chain than itself".
- * Fortunately, computing this job chain is a well-studied graph theory problem
- * known as "topological sorting", which has linear time algorithms. We let
- * each job represent a node, each dependency a directed edge, and the entire
- * set of jobs to be a dependency graph. This graph is inherently acyclic, as
- * otherwise there are unresolveable dependencies.
- *
- * We implement Kahn's algorithm here to compute the next_job chain:
- * https://en.wikipedia.org/wiki/Topological_sorting#Kahn's_algorithm
- *
- * A few implementation notes: we represent S explicitly with a bitset, L
- * implicitly in the next_job fields. The indices of the bitset are off-by-one:
- * nodes are numbered [0, node_count - 1], whereas in reality job_index in the
- * hardware and dependencies are [1, node_count].
- *
- * We represent edge removal implicitly with another pair of bitsets, rather
- * than explicitly removing the edges, since we need to keep the dependencies
- * there for the hardware.
- */
-
-#define DESCRIPTOR_FOR_NODE(count) \
-        *(util_dynarray_element(&batch->headers, \
-                struct mali_job_descriptor_header*, count))
-
-#define GPU_ADDRESS_FOR_NODE(count) \
-        *(util_dynarray_element(&batch->gpu_headers, \
-                mali_ptr, count))
-
-void
-panfrost_scoreboard_link_batch(struct panfrost_batch *batch)
-{
-        /* Finalize the batch */
-        panfrost_scoreboard_initialize_tiler(batch);
-
-        /* Let no_incoming represent the set S described. */
-
-        unsigned node_count = batch->job_index;
-
-        size_t sz = BITSET_WORDS(node_count) * sizeof(BITSET_WORD);
-        BITSET_WORD *no_incoming = calloc(sz, 1);
-
-        /* Sets for edges being removed in dep 1 or 2 respectively */
-
-        BITSET_WORD *edge_removal_1 = calloc(sz, 1);
-        BITSET_WORD *edge_removal_2 = calloc(sz, 1);
-
-        /* We compute no_incoming by traversing the batch. Simultaneously, we
-         * would like to keep track of a parity-reversed version of the
-         * dependency graph. Dependency indices are 16-bit and in practice (for
-         * ES3.0, at least), we can guarantee a given node will be depended on
-         * by no more than one other nodes. P.f:
-         *
-         * Proposition: Given a node N of type T, no more than one other node
-         * depends on N.
-         *
-         * If type is WRITE_VALUE: The only dependency added against us is from
-         * the first tiler job, so there is 1 dependent.
-         *
-         * If type is VERTEX: If there is a tiler node, that tiler node depends
-         * on us; if there is not (transform feedback), nothing depends on us.
-         * Therefore there is at most 1 dependent.
-         *
-         * If type is TILER: If there is another TILER job in succession, that
-         * node depends on us. No other job type depends on us. Therefore there
-         * is at most 1 dependent.
-         *
-         * If type is FRAGMENT: This type cannot be in a primary chain, so it
-         * is irrelevant. Just for kicks, nobody would depend on us, so there
-         * are zero dependents, so it holds anyway.
-         *
-         * TODO: Revise this logic for ES3.1 and above. This result may not
-         * hold for COMPUTE/FUSED/GEOMETRY jobs; we might need to special case
-         * those. Can FBO dependencies be expressed within a chain?
-         * ---
-         *
-         * Point is, we only need to hold a single dependent, which is a pretty
-         * helpful result.
-         */
-
-        unsigned *dependents = calloc(node_count, sizeof(unsigned));
-
-        for (unsigned i = 0; i < node_count; ++i) {
-                struct mali_job_descriptor_header *node = DESCRIPTOR_FOR_NODE(i);
-
-                unsigned dep_1 = node->job_dependency_index_1;
-                unsigned dep_2 = node->job_dependency_index_2;
-
-                /* Record no_incoming info for this node */
-
-                if (!(dep_1 || dep_2))
-                        BITSET_SET(no_incoming, i);
-
-                /* Record this node as the dependent of each of its
-                 * dependencies */
-
-                if (dep_1) {
-                        assert(!dependents[dep_1 - 1]);
-                        dependents[dep_1 - 1] = i + 1;
-                }
-
-                if (dep_2) {
-                        assert(!dependents[dep_2 - 1]);
-                        dependents[dep_2 - 1] = i + 1;
-                }
-        }
-
-        /* No next_job fields are set at the beginning, so L is implciitly the
-         * empty set. As next_job fields are filled, L is implicitly set. Tail
-         * is the tail of L, however. */
-
-        struct mali_job_descriptor_header *tail = NULL;
-
-        /* We iterate, popping off elements of S. A simple foreach won't do,
-         * since we mutate S as we go (even adding elements) */
-
-        unsigned arr_size = BITSET_WORDS(node_count);
-
-        for (unsigned node_n_1 = __bitset_ffs(no_incoming, arr_size);
-             (node_n_1 != 0);
-             node_n_1 = __bitset_ffs(no_incoming, arr_size)) {
-
-                unsigned node_n = node_n_1 - 1;
-
-                /* We've got a node n, pop it off */
-                BITSET_CLEAR(no_incoming, node_n);
-
-                /* Add it to the list */
-                struct mali_job_descriptor_header *n =
-                        DESCRIPTOR_FOR_NODE(node_n);
-
-                mali_ptr addr = GPU_ADDRESS_FOR_NODE(node_n);
-
-                if (tail) {
-                        /* Link us to the last node */
-                        tail->next_job = addr;
-                } else {
-                        /* We are the first/last node */
-                        batch->first_job.cpu = (uint8_t *) n;
-                        batch->first_job.gpu = addr;
-                }
-
-                tail = n;
-
-                /* Grab the dependent, if there is one */
-                unsigned node_m_1 = dependents[node_n];
-
-                if (node_m_1) {
-                        unsigned node_m = node_m_1 - 1;
-
-                        struct mali_job_descriptor_header *m =
-                                DESCRIPTOR_FOR_NODE(node_m);
-
-                        /* Get the deps, accounting for removal */
-                        unsigned dep_1 = m->job_dependency_index_1;
-                        unsigned dep_2 = m->job_dependency_index_2;
-
-                        if (BITSET_TEST(edge_removal_1, node_m))
-                                dep_1 = 0;
-
-                        if (BITSET_TEST(edge_removal_2, node_m))
-                                dep_2 = 0;
-
-                        /* Pretend to remove edges */
-                        if (dep_1 == node_n_1) {
-                                BITSET_SET(edge_removal_1, node_m);
-                                dep_1 = 0;
-                        } else if (dep_2 == node_n_1) {
-                                BITSET_SET(edge_removal_2, node_m);
-                                dep_2 = 0;
-                        } else {
-                                /* This node has no relevant dependencies */
-                                assert(0);
-                        }
-
-                        /* Are there edges left? If not, add us to S */
-                        bool has_edges = dep_1 || dep_2;
-
-                        if (!has_edges)
-                                BITSET_SET(no_incoming, node_m);
-                }
-        }
-
-        /* Cleanup */
-        free(no_incoming);
-        free(dependents);
-        free(edge_removal_1);
-        free(edge_removal_2);
-
+        batch->first_job = transfer.gpu;
  }
author	Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
	Sun, 16 Feb 2020 19:59:11 +0000 (14:59 -0500)
committer	Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
	Tue, 18 Feb 2020 13:45:21 +0000 (08:45 -0500)
src/gallium/drivers/panfrost/pan_compute.c		patch \| blob \| history
src/gallium/drivers/panfrost/pan_context.c		patch \| blob \| history
src/gallium/drivers/panfrost/pan_context.h		patch \| blob \| history
src/gallium/drivers/panfrost/pan_fragment.c		patch \| blob \| history
src/gallium/drivers/panfrost/pan_job.c		patch \| blob \| history
src/gallium/drivers/panfrost/pan_job.h		patch \| blob \| history
src/gallium/drivers/panfrost/pan_scoreboard.c		patch \| blob \| history