panfrost: Remove 32-bit next_job path
[mesa.git] / src / gallium / drivers / panfrost / pan_scoreboard.c
index 0c4cbfe5d9b40a17db61c437e34c296adc953012..927a6f61f6b3313f9d630defdbd832d7df53493b 100644 (file)
@@ -30,7 +30,7 @@
 /*
  * Within a batch (panfrost_job), there are various types of Mali jobs:
  *
- *  - SET_VALUE: initializes tiler
+ *  - WRITE_VALUE: generic write primitive, used to zero tiler field
  *  - VERTEX: runs a vertex shader
  *  - TILER: runs tiling and sets up a fragment shader
  *  - FRAGMENT: runs fragment shaders and writes out
  *
  */
 
-/* Accessor to set the next job field */
-
-static void
-panfrost_set_job_next(struct mali_job_descriptor_header *first, mali_ptr next)
-{
-        if (first->job_descriptor_size)
-                first->next_job_64 = (u64) (uintptr_t) next;
-        else
-                first->next_job_32 = (u32) (uintptr_t) next;
-}
-
 /* Coerce a panfrost_transfer to a header */
 
 static inline struct mali_job_descriptor_header *
@@ -121,11 +110,11 @@ job_descriptor_header(struct panfrost_transfer t)
 
 static void
 panfrost_assign_index(
-                struct panfrost_job *job,
-                struct panfrost_transfer transfer)
+        struct panfrost_batch *batch,
+        struct panfrost_transfer transfer)
 {
         /* Assign the index */
-        unsigned index = ++job->job_index;
+        unsigned index = ++batch->job_index;
         job_descriptor_header(transfer)->job_index = index;
 }
 
@@ -133,8 +122,8 @@ panfrost_assign_index(
 
 static void
 panfrost_add_dependency(
-                struct panfrost_transfer depender,
-                struct panfrost_transfer dependent)
+        struct panfrost_transfer depender,
+        struct panfrost_transfer dependent)
 {
 
         struct mali_job_descriptor_header *first =
@@ -143,10 +132,6 @@ panfrost_add_dependency(
         struct mali_job_descriptor_header *second =
                 job_descriptor_header(depender);
 
-        /* Ensure we're ready for dependencies */
-        assert(second->job_index);
-        assert(first->job_index);
-
         /* Look for an open slot */
 
         if (!second->job_dependency_index_1)
@@ -161,8 +146,8 @@ panfrost_add_dependency(
 
 static void
 panfrost_scoreboard_queue_job_internal(
-                struct panfrost_job *batch,
-                struct panfrost_transfer job)
+        struct panfrost_batch *batch,
+        struct panfrost_transfer job)
 {
         panfrost_assign_index(batch, job);
 
@@ -178,8 +163,8 @@ panfrost_scoreboard_queue_job_internal(
 
 void
 panfrost_scoreboard_queue_compute_job(
-                struct panfrost_job *batch,
-                struct panfrost_transfer job)
+        struct panfrost_batch *batch,
+        struct panfrost_transfer job)
 {
         panfrost_scoreboard_queue_job_internal(batch, job);
 
@@ -196,9 +181,9 @@ panfrost_scoreboard_queue_compute_job(
 
 void
 panfrost_scoreboard_queue_vertex_job(
-                struct panfrost_job *batch,
-                struct panfrost_transfer vertex,
-                bool requires_tiling)
+        struct panfrost_batch *batch,
+        struct panfrost_transfer vertex,
+        bool requires_tiling)
 {
         panfrost_scoreboard_queue_compute_job(batch, vertex);
 
@@ -211,8 +196,8 @@ panfrost_scoreboard_queue_vertex_job(
 
 void
 panfrost_scoreboard_queue_tiler_job(
-                struct panfrost_job *batch,
-                struct panfrost_transfer tiler)
+        struct panfrost_batch *batch,
+        struct panfrost_transfer tiler)
 {
         panfrost_scoreboard_queue_compute_job(batch, tiler);
 
@@ -230,9 +215,9 @@ panfrost_scoreboard_queue_tiler_job(
 
 void
 panfrost_scoreboard_queue_fused_job(
-                struct panfrost_job *batch,
-                struct panfrost_transfer vertex,
-                struct panfrost_transfer tiler)
+        struct panfrost_batch *batch,
+        struct panfrost_transfer vertex,
+        struct panfrost_transfer tiler)
 {
         panfrost_scoreboard_queue_vertex_job(batch, vertex, true);
         panfrost_scoreboard_queue_tiler_job(batch, tiler);
@@ -244,9 +229,9 @@ panfrost_scoreboard_queue_fused_job(
 
 void
 panfrost_scoreboard_queue_fused_job_prepend(
-                struct panfrost_job *batch,
-                struct panfrost_transfer vertex,
-                struct panfrost_transfer tiler)
+        struct panfrost_batch *batch,
+        struct panfrost_transfer vertex,
+        struct panfrost_transfer tiler)
 {
         /* Sanity check */
         assert(batch->last_tiler.gpu);
@@ -271,45 +256,47 @@ panfrost_scoreboard_queue_fused_job_prepend(
         batch->first_tiler = tiler;
 }
 
-/* Generates a set value job, used below as part of TILER job scheduling. */
+/* Generates a write value job, used to initialize the tiler structures. */
 
 static struct panfrost_transfer
-panfrost_set_value_job(struct panfrost_context *ctx, mali_ptr polygon_list)
+panfrost_write_value_job(struct panfrost_batch *batch, mali_ptr polygon_list)
 {
         struct mali_job_descriptor_header job = {
-                .job_type = JOB_TYPE_SET_VALUE,
+                .job_type = JOB_TYPE_WRITE_VALUE,
                 .job_descriptor_size = 1,
         };
 
-        struct mali_payload_set_value payload = {
-                .out = polygon_list,
-                .unknown = 0x3,
+        struct mali_payload_write_value payload = {
+                .address = polygon_list,
+                .value_descriptor = MALI_WRITE_VALUE_ZERO,
         };
 
-        struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, sizeof(job) + sizeof(payload));
+        struct panfrost_transfer transfer = panfrost_allocate_transient(batch, sizeof(job) + sizeof(payload));
         memcpy(transfer.cpu, &job, sizeof(job));
         memcpy(transfer.cpu + sizeof(job), &payload, sizeof(payload));
 
         return transfer;
 }
 
-/* If there are any tiler jobs, there needs to be a corresponding set value job
- * linked to the first vertex job feeding into tiling. */
+/* If there are any tiler jobs, we need to initialize the tiler by writing
+ * zeroes to a magic tiler structure. We do so via a WRITE_VALUE job linked to
+ * the first vertex job feeding into tiling. */
 
 static void
-panfrost_scoreboard_set_value(struct panfrost_job *batch)
+panfrost_scoreboard_initialize_tiler(struct panfrost_batch *batch)
 {
         /* Check if we even need tiling */
         if (!batch->last_tiler.gpu)
                 return;
 
-        /* Okay, we do. Let's generate it */
+        /* Okay, we do. Let's generate it. We'll need the job's polygon list
+         * regardless of size. */
 
-        struct panfrost_context *ctx = batch->ctx;
-        mali_ptr polygon_list = ctx->tiler_polygon_list.gpu;
+        mali_ptr polygon_list = panfrost_batch_get_polygon_list(batch,
+                MALI_TILER_MINIMUM_HEADER_SIZE);
 
         struct panfrost_transfer job =
-                panfrost_set_value_job(ctx, polygon_list);
+                panfrost_write_value_job(batch, polygon_list);
 
         /* Queue it */
         panfrost_scoreboard_queue_compute_job(batch, job);
@@ -349,10 +336,10 @@ panfrost_scoreboard_set_value(struct panfrost_job *batch)
                 mali_ptr, count))
 
 void
-panfrost_scoreboard_link_batch(struct panfrost_job *batch)
+panfrost_scoreboard_link_batch(struct panfrost_batch *batch)
 {
         /* Finalize the batch */
-        panfrost_scoreboard_set_value(batch);
+        panfrost_scoreboard_initialize_tiler(batch);
 
         /* Let no_incoming represent the set S described. */
 
@@ -366,7 +353,40 @@ panfrost_scoreboard_link_batch(struct panfrost_job *batch)
         BITSET_WORD *edge_removal_1 = calloc(sz, 1);
         BITSET_WORD *edge_removal_2 = calloc(sz, 1);
 
-        /* We compute no_incoming by traversing the batch. */
+        /* We compute no_incoming by traversing the batch. Simultaneously, we
+         * would like to keep track of a parity-reversed version of the
+         * dependency graph. Dependency indices are 16-bit and in practice (for
+         * ES3.0, at least), we can guarantee a given node will be depended on
+         * by no more than one other nodes. P.f:
+         *
+         * Proposition: Given a node N of type T, no more than one other node
+         * depends on N.
+         *
+         * If type is WRITE_VALUE: The only dependency added against us is from
+         * the first tiler job, so there is 1 dependent.
+         *
+         * If type is VERTEX: If there is a tiler node, that tiler node depends
+         * on us; if there is not (transform feedback), nothing depends on us.
+         * Therefore there is at most 1 dependent.
+         *
+         * If type is TILER: If there is another TILER job in succession, that
+         * node depends on us. No other job type depends on us. Therefore there
+         * is at most 1 dependent.
+         *
+         * If type is FRAGMENT: This type cannot be in a primary chain, so it
+         * is irrelevant. Just for kicks, nobody would depend on us, so there
+         * are zero dependents, so it holds anyway.
+         *
+         * TODO: Revise this logic for ES3.1 and above. This result may not
+         * hold for COMPUTE/FUSED/GEOMETRY jobs; we might need to special case
+         * those. Can FBO dependencies be expressed within a chain?
+         * ---
+         *
+         * Point is, we only need to hold a single dependent, which is a pretty
+         * helpful result.
+         */
+
+        unsigned *dependents = calloc(node_count, sizeof(unsigned));
 
         for (unsigned i = 0; i < node_count; ++i) {
                 struct mali_job_descriptor_header *node = DESCRIPTOR_FOR_NODE(i);
@@ -374,8 +394,23 @@ panfrost_scoreboard_link_batch(struct panfrost_job *batch)
                 unsigned dep_1 = node->job_dependency_index_1;
                 unsigned dep_2 = node->job_dependency_index_2;
 
+                /* Record no_incoming info for this node */
+
                 if (!(dep_1 || dep_2))
                         BITSET_SET(no_incoming, i);
+
+                /* Record this node as the dependent of each of its
+                 * dependencies */
+
+                if (dep_1) {
+                        assert(!dependents[dep_1 - 1]);
+                        dependents[dep_1 - 1] = i + 1;
+                }
+
+                if (dep_2) {
+                        assert(!dependents[dep_2 - 1]);
+                        dependents[dep_2 - 1] = i + 1;
+                }
         }
 
         /* No next_job fields are set at the beginning, so L is implciitly the
@@ -390,8 +425,8 @@ panfrost_scoreboard_link_batch(struct panfrost_job *batch)
         unsigned arr_size = BITSET_WORDS(node_count);
 
         for (unsigned node_n_1 = __bitset_ffs(no_incoming, arr_size);
-                        (node_n_1 != 0);
-                        node_n_1 = __bitset_ffs(no_incoming, arr_size)) {
+             (node_n_1 != 0);
+             node_n_1 = __bitset_ffs(no_incoming, arr_size)) {
 
                 unsigned node_n = node_n_1 - 1;
 
@@ -406,7 +441,7 @@ panfrost_scoreboard_link_batch(struct panfrost_job *batch)
 
                 if (tail) {
                         /* Link us to the last node */
-                        panfrost_set_job_next(tail, addr);
+                        tail->next_job = addr;
                 } else {
                         /* We are the first/last node */
                         batch->first_job.cpu = (uint8_t *) n;
@@ -415,8 +450,12 @@ panfrost_scoreboard_link_batch(struct panfrost_job *batch)
 
                 tail = n;
 
-                /* Scan dependencies */
-                for (unsigned node_m = 0; node_m < node_count; ++node_m) {
+                /* Grab the dependent, if there is one */
+                unsigned node_m_1 = dependents[node_n];
+
+                if (node_m_1) {
+                        unsigned node_m = node_m_1 - 1;
+
                         struct mali_job_descriptor_header *m =
                                 DESCRIPTOR_FOR_NODE(node_m);
 
@@ -439,7 +478,7 @@ panfrost_scoreboard_link_batch(struct panfrost_job *batch)
                                 dep_2 = 0;
                         } else {
                                 /* This node has no relevant dependencies */
-                                continue;
+                                assert(0);
                         }
 
                         /* Are there edges left? If not, add us to S */
@@ -450,4 +489,10 @@ panfrost_scoreboard_link_batch(struct panfrost_job *batch)
                 }
         }
 
+        /* Cleanup */
+        free(no_incoming);
+        free(dependents);
+        free(edge_removal_1);
+        free(edge_removal_2);
+
 }