radeonsi: fix user fence space when MCBP is enabled

[mesa.git] / src / gallium / drivers / v3d / v3d_job.c
diff --git a/src/gallium/drivers/v3d/v3d_job.c b/src/gallium/drivers/v3d/v3d_job.c

index 3226059161acd4db99068d32de2b67b88d84acfc..58410484d3f93a44350564847f85fae814dd3ec7 100644 (file)
--- a/src/gallium/drivers/v3d/v3d_job.c
+++ b/src/gallium/drivers/v3d/v3d_job.c
@@ -37,7 +37,7 @@
  #include "util/set.h"
  #include "broadcom/clif/clif_dump.h"
  
-static void
+void
  v3d_job_free(struct v3d_context *v3d, struct v3d_job *job)
  {
          set_foreach(job->bos, entry) {
@@ -85,7 +85,7 @@ v3d_job_free(struct v3d_context *v3d, struct v3d_job *job)
          ralloc_free(job);
  }
  
-static struct v3d_job *
+struct v3d_job *
  v3d_job_create(struct v3d_context *v3d)
  {
          struct v3d_job *job = rzalloc(v3d, struct v3d_job);
@@ -184,30 +184,45 @@ v3d_job_writes_resource_from_tf(struct v3d_job *job,
  void
  v3d_flush_jobs_writing_resource(struct v3d_context *v3d,
                                  struct pipe_resource *prsc,
-                                bool always_flush)
+                                enum v3d_flush_cond flush_cond,
+                                bool is_compute_pipeline)
  {
          struct hash_entry *entry = _mesa_hash_table_search(v3d->write_jobs,
                                                             prsc);
+        struct v3d_resource *rsc = v3d_resource(prsc);
+
+        /* We need to sync if graphics pipeline reads a resource written
+         * by the compute pipeline. The same would be needed for the case of
+         * graphics-compute dependency but nowadays all compute jobs
+         * are serialized with the previous submitted job.
+         */
+        if (!is_compute_pipeline && rsc->bo != NULL && rsc->compute_written) {
+           v3d->sync_on_last_compute_job = true;
+           rsc->compute_written = false;
+        }
+
          if (!entry)
                  return;
  
          struct v3d_job *job = entry->data;
  
-        /* For writes from TF in the same job we use the "Wait for TF"
-         * feature provided by the hardware so we don't want to flush.
-         * The exception to this is when the caller is about to map the
-         * resource since in that case we don't have a 'Wait for TF' command
-         * the in command stream. In this scenario the caller is expected
-         * to set 'always_flush' to True.
-         */
          bool needs_flush;
-        if (always_flush) {
+        switch (flush_cond) {
+        case V3D_FLUSH_ALWAYS:
                  needs_flush = true;
-        } else if (!v3d->job || v3d->job != job) {
-                /* Write from a different job: always flush */
-                needs_flush = true;
-        } else {
-                /* Write from currrent job: flush if not TF */
+                break;
+        case V3D_FLUSH_NOT_CURRENT_JOB:
+                needs_flush = !v3d->job || v3d->job != job;
+                break;
+        case V3D_FLUSH_DEFAULT:
+        default:
+                /* For writes from TF in the same job we use the "Wait for TF"
+                 * feature provided by the hardware so we don't want to flush.
+                 * The exception to this is when the caller is about to map the
+                 * resource since in that case we don't have a 'Wait for TF'
+                 * command the in command stream. In this scenario the caller
+                 * is expected to set 'always_flush' to True.
+                 */
                  needs_flush = !v3d_job_writes_resource_from_tf(job, prsc);
          }
  
@@ -217,7 +232,9 @@ v3d_flush_jobs_writing_resource(struct v3d_context *v3d,
  
  void
  v3d_flush_jobs_reading_resource(struct v3d_context *v3d,
-                                struct pipe_resource *prsc)
+                                struct pipe_resource *prsc,
+                                enum v3d_flush_cond flush_cond,
+                                bool is_compute_pipeline)
  {
          struct v3d_resource *rsc = v3d_resource(prsc);
  
@@ -227,18 +244,33 @@ v3d_flush_jobs_reading_resource(struct v3d_context *v3d,
           * caller intends to write to the resource, so we don't care if
           * there was a previous TF write to it.
           */
-        v3d_flush_jobs_writing_resource(v3d, prsc, false);
+        v3d_flush_jobs_writing_resource(v3d, prsc, flush_cond,
+                                        is_compute_pipeline);
  
          hash_table_foreach(v3d->jobs, entry) {
                  struct v3d_job *job = entry->data;
  
-                if (_mesa_set_search(job->bos, rsc->bo)) {
-                        v3d_job_submit(v3d, job);
-                        /* Reminder: v3d->jobs is safe to keep iterating even
-                         * after deletion of an entry.
-                         */
+                if (!_mesa_set_search(job->bos, rsc->bo))
                          continue;
+
+                bool needs_flush;
+                switch (flush_cond) {
+                case V3D_FLUSH_NOT_CURRENT_JOB:
+                        needs_flush = !v3d->job || v3d->job != job;
+                        break;
+                case V3D_FLUSH_ALWAYS:
+                case V3D_FLUSH_DEFAULT:
+                default:
+                        needs_flush = true;
                  }
+
+                if (needs_flush)
+                        v3d_job_submit(v3d, job);
+
+                /* Reminder: v3d->jobs is safe to keep iterating even
+                 * after deletion of an entry.
+                 */
+                continue;
          }
  }
  
@@ -311,7 +343,9 @@ v3d_get_job(struct v3d_context *v3d,
  
          for (int i = 0; i < V3D_MAX_DRAW_BUFFERS; i++) {
                  if (cbufs[i]) {
-                        v3d_flush_jobs_reading_resource(v3d, cbufs[i]->texture);
+                        v3d_flush_jobs_reading_resource(v3d, cbufs[i]->texture,
+                                                        V3D_FLUSH_DEFAULT,
+                                                        false);
                          pipe_surface_reference(&job->cbufs[i], cbufs[i]);
  
                          if (cbufs[i]->texture->nr_samples > 1)
@@ -319,7 +353,9 @@ v3d_get_job(struct v3d_context *v3d,
                  }
          }
          if (zsbuf) {
-                v3d_flush_jobs_reading_resource(v3d, zsbuf->texture);
+                v3d_flush_jobs_reading_resource(v3d, zsbuf->texture,
+                                                V3D_FLUSH_DEFAULT,
+                                                false);
                  pipe_surface_reference(&job->zsbuf, zsbuf);
                  if (zsbuf->texture->nr_samples > 1)
                          job->msaa = true;
@@ -336,7 +372,9 @@ v3d_get_job(struct v3d_context *v3d,
                  struct v3d_resource *rsc = v3d_resource(zsbuf->texture);
                  if (rsc->separate_stencil) {
                          v3d_flush_jobs_reading_resource(v3d,
-                                                        &rsc->separate_stencil->base);
+                                                        &rsc->separate_stencil->base,
+                                                        V3D_FLUSH_DEFAULT,
+                                                        false);
                          _mesa_hash_table_insert(v3d->write_jobs,
                                                  &rsc->separate_stencil->base,
                                                  job);
@@ -429,6 +467,24 @@ v3d_clif_dump(struct v3d_context *v3d, struct v3d_job *job)
          clif_dump_destroy(clif);
  }
  
+static void
+v3d_read_and_accumulate_primitive_counters(struct v3d_context *v3d)
+{
+        assert(v3d->prim_counts);
+
+        perf_debug("stalling on TF counts readback\n");
+        struct v3d_resource *rsc = v3d_resource(v3d->prim_counts);
+        if (v3d_bo_wait(rsc->bo, PIPE_TIMEOUT_INFINITE, "prim-counts")) {
+                uint32_t *map = v3d_bo_map(rsc->bo) + v3d->prim_counts_offset;
+                v3d->tf_prims_generated += map[V3D_PRIM_COUNTS_TF_WRITTEN];
+                /* When we only have a vertex shader we determine the primitive
+                 * count in the CPU so don't update it here again.
+                 */
+                if (v3d->prog.gs)
+                        v3d->prims_generated += map[V3D_PRIM_COUNTS_WRITTEN];
+        }
+}
+
  /**
   * Submits the job to the kernel and then reinitializes it.
   */
@@ -464,6 +520,10 @@ v3d_job_submit(struct v3d_context *v3d, struct v3d_job *job)
          job->submit.bcl_end = job->bcl.bo->offset + cl_offset(&job->bcl);
          job->submit.rcl_end = job->rcl.bo->offset + cl_offset(&job->rcl);
  
+        job->submit.flags = 0;
+        if (job->tmu_dirty_rcl && screen->has_cache_flush)
+                job->submit.flags |= DRM_V3D_SUBMIT_CL_FLUSH_CACHE;
+
          /* On V3D 4.1, the tile alloc/state setup moved to register writes
           * instead of binner packets.
           */
@@ -488,6 +548,22 @@ v3d_job_submit(struct v3d_context *v3d, struct v3d_job *job)
                                          "Expect corruption.\n", strerror(errno));
                          warned = true;
                  }
+
+                /* If we are submitting a job in the middle of transform
+                 * feedback we need to read the primitive counts and accumulate
+                 * them, otherwise they will be reset at the start of the next
+                 * draw when we emit the Tile Binning Mode Configuration packet.
+                 *
+                 * If the job doesn't have any TF draw calls, then we know
+                 * the primitive count must be zero and we can skip stalling
+                 * for this. This also fixes a problem because it seems that
+                 * in this scenario the counters are not reset with the Tile
+                 * Binning Mode Configuration packet, which would translate
+                 * to us reading an obsolete (possibly non-zero) value from
+                 * the GPU counters.
+                 */
+                if (v3d->streamout.num_targets && job->tf_draw_calls_queued > 0)
+                        v3d_read_and_accumulate_primitive_counters(v3d);
          }
  
  done: