radeonsi: use shader_info::cs::local_size_variable to clean up some code
[mesa.git] / src / gallium / drivers / v3d / v3d_job.c
index c5abfa7edb2e97e072e0e375bdb7a2f8ce8a04cd..58410484d3f93a44350564847f85fae814dd3ec7 100644 (file)
@@ -37,7 +37,7 @@
 #include "util/set.h"
 #include "broadcom/clif/clif_dump.h"
 
-static void
+void
 v3d_job_free(struct v3d_context *v3d, struct v3d_job *job)
 {
         set_foreach(job->bos, entry) {
@@ -85,7 +85,7 @@ v3d_job_free(struct v3d_context *v3d, struct v3d_job *job)
         ralloc_free(job);
 }
 
-static struct v3d_job *
+struct v3d_job *
 v3d_job_create(struct v3d_context *v3d)
 {
         struct v3d_job *job = rzalloc(v3d, struct v3d_job);
@@ -184,10 +184,23 @@ v3d_job_writes_resource_from_tf(struct v3d_job *job,
 void
 v3d_flush_jobs_writing_resource(struct v3d_context *v3d,
                                 struct pipe_resource *prsc,
-                                enum v3d_flush_cond flush_cond)
+                                enum v3d_flush_cond flush_cond,
+                                bool is_compute_pipeline)
 {
         struct hash_entry *entry = _mesa_hash_table_search(v3d->write_jobs,
                                                            prsc);
+        struct v3d_resource *rsc = v3d_resource(prsc);
+
+        /* We need to sync if graphics pipeline reads a resource written
+         * by the compute pipeline. The same would be needed for the case of
+         * graphics-compute dependency but nowadays all compute jobs
+         * are serialized with the previous submitted job.
+         */
+        if (!is_compute_pipeline && rsc->bo != NULL && rsc->compute_written) {
+           v3d->sync_on_last_compute_job = true;
+           rsc->compute_written = false;
+        }
+
         if (!entry)
                 return;
 
@@ -220,7 +233,8 @@ v3d_flush_jobs_writing_resource(struct v3d_context *v3d,
 void
 v3d_flush_jobs_reading_resource(struct v3d_context *v3d,
                                 struct pipe_resource *prsc,
-                                enum v3d_flush_cond flush_cond)
+                                enum v3d_flush_cond flush_cond,
+                                bool is_compute_pipeline)
 {
         struct v3d_resource *rsc = v3d_resource(prsc);
 
@@ -230,7 +244,8 @@ v3d_flush_jobs_reading_resource(struct v3d_context *v3d,
          * caller intends to write to the resource, so we don't care if
          * there was a previous TF write to it.
          */
-        v3d_flush_jobs_writing_resource(v3d, prsc, flush_cond);
+        v3d_flush_jobs_writing_resource(v3d, prsc, flush_cond,
+                                        is_compute_pipeline);
 
         hash_table_foreach(v3d->jobs, entry) {
                 struct v3d_job *job = entry->data;
@@ -329,7 +344,8 @@ v3d_get_job(struct v3d_context *v3d,
         for (int i = 0; i < V3D_MAX_DRAW_BUFFERS; i++) {
                 if (cbufs[i]) {
                         v3d_flush_jobs_reading_resource(v3d, cbufs[i]->texture,
-                                                        V3D_FLUSH_DEFAULT);
+                                                        V3D_FLUSH_DEFAULT,
+                                                        false);
                         pipe_surface_reference(&job->cbufs[i], cbufs[i]);
 
                         if (cbufs[i]->texture->nr_samples > 1)
@@ -338,7 +354,8 @@ v3d_get_job(struct v3d_context *v3d,
         }
         if (zsbuf) {
                 v3d_flush_jobs_reading_resource(v3d, zsbuf->texture,
-                                                V3D_FLUSH_DEFAULT);
+                                                V3D_FLUSH_DEFAULT,
+                                                false);
                 pipe_surface_reference(&job->zsbuf, zsbuf);
                 if (zsbuf->texture->nr_samples > 1)
                         job->msaa = true;
@@ -356,7 +373,8 @@ v3d_get_job(struct v3d_context *v3d,
                 if (rsc->separate_stencil) {
                         v3d_flush_jobs_reading_resource(v3d,
                                                         &rsc->separate_stencil->base,
-                                                        V3D_FLUSH_DEFAULT);
+                                                        V3D_FLUSH_DEFAULT,
+                                                        false);
                         _mesa_hash_table_insert(v3d->write_jobs,
                                                 &rsc->separate_stencil->base,
                                                 job);
@@ -459,6 +477,11 @@ v3d_read_and_accumulate_primitive_counters(struct v3d_context *v3d)
         if (v3d_bo_wait(rsc->bo, PIPE_TIMEOUT_INFINITE, "prim-counts")) {
                 uint32_t *map = v3d_bo_map(rsc->bo) + v3d->prim_counts_offset;
                 v3d->tf_prims_generated += map[V3D_PRIM_COUNTS_TF_WRITTEN];
+                /* When we only have a vertex shader we determine the primitive
+                 * count in the CPU so don't update it here again.
+                 */
+                if (v3d->prog.gs)
+                        v3d->prims_generated += map[V3D_PRIM_COUNTS_WRITTEN];
         }
 }
 
@@ -497,6 +520,10 @@ v3d_job_submit(struct v3d_context *v3d, struct v3d_job *job)
         job->submit.bcl_end = job->bcl.bo->offset + cl_offset(&job->bcl);
         job->submit.rcl_end = job->rcl.bo->offset + cl_offset(&job->rcl);
 
+        job->submit.flags = 0;
+        if (job->tmu_dirty_rcl && screen->has_cache_flush)
+                job->submit.flags |= DRM_V3D_SUBMIT_CL_FLUSH_CACHE;
+
         /* On V3D 4.1, the tile alloc/state setup moved to register writes
          * instead of binner packets.
          */