#include "util/set.h"
#include "broadcom/clif/clif_dump.h"
-static void
-remove_from_ht(struct hash_table *ht, void *key)
-{
- struct hash_entry *entry = _mesa_hash_table_search(ht, key);
- _mesa_hash_table_remove(ht, entry);
-}
-
-static void
+void
v3d_job_free(struct v3d_context *v3d, struct v3d_job *job)
{
set_foreach(job->bos, entry) {
v3d_bo_unreference(&bo);
}
- remove_from_ht(v3d->jobs, &job->key);
+ _mesa_hash_table_remove_key(v3d->jobs, &job->key);
if (job->write_prscs) {
set_foreach(job->write_prscs, entry) {
const struct pipe_resource *prsc = entry->key;
- remove_from_ht(v3d->write_jobs, (void *)prsc);
+ _mesa_hash_table_remove_key(v3d->write_jobs, prsc);
}
}
- for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) {
+ for (int i = 0; i < V3D_MAX_DRAW_BUFFERS; i++) {
if (job->cbufs[i]) {
- remove_from_ht(v3d->write_jobs, job->cbufs[i]->texture);
+ _mesa_hash_table_remove_key(v3d->write_jobs,
+ job->cbufs[i]->texture);
pipe_surface_reference(&job->cbufs[i], NULL);
}
}
if (job->zsbuf) {
struct v3d_resource *rsc = v3d_resource(job->zsbuf->texture);
if (rsc->separate_stencil)
- remove_from_ht(v3d->write_jobs,
- &rsc->separate_stencil->base);
+ _mesa_hash_table_remove_key(v3d->write_jobs,
+ &rsc->separate_stencil->base);
- remove_from_ht(v3d->write_jobs, job->zsbuf->texture);
+ _mesa_hash_table_remove_key(v3d->write_jobs,
+ job->zsbuf->texture);
pipe_surface_reference(&job->zsbuf, NULL);
}
ralloc_free(job);
}
-static struct v3d_job *
+struct v3d_job *
v3d_job_create(struct v3d_context *v3d)
{
struct v3d_job *job = rzalloc(v3d, struct v3d_job);
}
void
-v3d_flush_jobs_writing_resource(struct v3d_context *v3d,
+v3d_flush_jobs_using_bo(struct v3d_context *v3d, struct v3d_bo *bo)
+{
+ hash_table_foreach(v3d->jobs, entry) {
+ struct v3d_job *job = entry->data;
+
+ if (_mesa_set_search(job->bos, bo))
+ v3d_job_submit(v3d, job);
+ }
+}
+
+void
+v3d_job_add_tf_write_resource(struct v3d_job *job, struct pipe_resource *prsc)
+{
+ v3d_job_add_write_resource(job, prsc);
+
+ if (!job->tf_write_prscs)
+ job->tf_write_prscs = _mesa_pointer_set_create(job);
+
+ _mesa_set_add(job->tf_write_prscs, prsc);
+}
+
+static bool
+v3d_job_writes_resource_from_tf(struct v3d_job *job,
struct pipe_resource *prsc)
+{
+ if (!job->tf_enabled)
+ return false;
+
+ if (!job->tf_write_prscs)
+ return false;
+
+ return _mesa_set_search(job->tf_write_prscs, prsc) != NULL;
+}
+
+void
+v3d_flush_jobs_writing_resource(struct v3d_context *v3d,
+ struct pipe_resource *prsc,
+ enum v3d_flush_cond flush_cond,
+ bool is_compute_pipeline)
{
struct hash_entry *entry = _mesa_hash_table_search(v3d->write_jobs,
prsc);
- if (entry) {
- struct v3d_job *job = entry->data;
- v3d_job_submit(v3d, job);
+ struct v3d_resource *rsc = v3d_resource(prsc);
+
+ /* We need to sync if graphics pipeline reads a resource written
+ * by the compute pipeline. The same would be needed for the case of
+ * graphics-compute dependency but nowadays all compute jobs
+ * are serialized with the previous submitted job.
+ */
+ if (!is_compute_pipeline && rsc->bo != NULL && rsc->compute_written) {
+ v3d->sync_on_last_compute_job = true;
+ rsc->compute_written = false;
+ }
+
+ if (!entry)
+ return;
+
+ struct v3d_job *job = entry->data;
+
+ bool needs_flush;
+ switch (flush_cond) {
+ case V3D_FLUSH_ALWAYS:
+ needs_flush = true;
+ break;
+ case V3D_FLUSH_NOT_CURRENT_JOB:
+ needs_flush = !v3d->job || v3d->job != job;
+ break;
+ case V3D_FLUSH_DEFAULT:
+ default:
+ /* For writes from TF in the same job we use the "Wait for TF"
+ * feature provided by the hardware so we don't want to flush.
+ * The exception to this is when the caller is about to map the
+ * resource since in that case we don't have a 'Wait for TF'
+ * command the in command stream. In this scenario the caller
+ * is expected to set 'always_flush' to True.
+ */
+ needs_flush = !v3d_job_writes_resource_from_tf(job, prsc);
}
+
+ if (needs_flush)
+ v3d_job_submit(v3d, job);
}
void
v3d_flush_jobs_reading_resource(struct v3d_context *v3d,
- struct pipe_resource *prsc)
+ struct pipe_resource *prsc,
+ enum v3d_flush_cond flush_cond,
+ bool is_compute_pipeline)
{
struct v3d_resource *rsc = v3d_resource(prsc);
- v3d_flush_jobs_writing_resource(v3d, prsc);
+ /* We only need to force the flush on TF writes, which is the only
+ * case where we might skip the flush to use the 'Wait for TF'
+ * command. Here we are flushing for a read, which means that the
+ * caller intends to write to the resource, so we don't care if
+ * there was a previous TF write to it.
+ */
+ v3d_flush_jobs_writing_resource(v3d, prsc, flush_cond,
+ is_compute_pipeline);
hash_table_foreach(v3d->jobs, entry) {
struct v3d_job *job = entry->data;
- if (_mesa_set_search(job->bos, rsc->bo)) {
- v3d_job_submit(v3d, job);
- /* Reminder: v3d->jobs is safe to keep iterating even
- * after deletion of an entry.
- */
+ if (!_mesa_set_search(job->bos, rsc->bo))
continue;
+
+ bool needs_flush;
+ switch (flush_cond) {
+ case V3D_FLUSH_NOT_CURRENT_JOB:
+ needs_flush = !v3d->job || v3d->job != job;
+ break;
+ case V3D_FLUSH_ALWAYS:
+ case V3D_FLUSH_DEFAULT:
+ default:
+ needs_flush = true;
}
+
+ if (needs_flush)
+ v3d_job_submit(v3d, job);
+
+ /* Reminder: v3d->jobs is safe to keep iterating even
+ * after deletion of an entry.
+ */
+ continue;
}
}
tile_size_index++;
int max_bpp = RENDER_TARGET_MAXIMUM_32BPP;
- for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) {
+ for (int i = 0; i < V3D_MAX_DRAW_BUFFERS; i++) {
if (job->cbufs[i]) {
struct v3d_surface *surf = v3d_surface(job->cbufs[i]);
max_bpp = MAX2(max_bpp, surf->internal_bpp);
/**
* Returns a v3d_job struture for tracking V3D rendering to a particular FBO.
*
- * If we've already started rendering to this FBO, then return old same job,
+ * If we've already started rendering to this FBO, then return the same job,
* otherwise make a new one. If we're beginning rendering to an FBO, make
* sure that any previous reads of the FBO (or writes to its color/Z surfaces)
* have been flushed.
*/
struct v3d_job *job = v3d_job_create(v3d);
- for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) {
+ for (int i = 0; i < V3D_MAX_DRAW_BUFFERS; i++) {
if (cbufs[i]) {
- v3d_flush_jobs_reading_resource(v3d, cbufs[i]->texture);
+ v3d_flush_jobs_reading_resource(v3d, cbufs[i]->texture,
+ V3D_FLUSH_DEFAULT,
+ false);
pipe_surface_reference(&job->cbufs[i], cbufs[i]);
if (cbufs[i]->texture->nr_samples > 1)
}
}
if (zsbuf) {
- v3d_flush_jobs_reading_resource(v3d, zsbuf->texture);
+ v3d_flush_jobs_reading_resource(v3d, zsbuf->texture,
+ V3D_FLUSH_DEFAULT,
+ false);
pipe_surface_reference(&job->zsbuf, zsbuf);
if (zsbuf->texture->nr_samples > 1)
job->msaa = true;
}
- v3d_job_set_tile_buffer_size(job);
-
- for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) {
+ for (int i = 0; i < V3D_MAX_DRAW_BUFFERS; i++) {
if (cbufs[i])
_mesa_hash_table_insert(v3d->write_jobs,
cbufs[i]->texture, job);
struct v3d_resource *rsc = v3d_resource(zsbuf->texture);
if (rsc->separate_stencil) {
v3d_flush_jobs_reading_resource(v3d,
- &rsc->separate_stencil->base);
+ &rsc->separate_stencil->base,
+ V3D_FLUSH_DEFAULT,
+ false);
_mesa_hash_table_insert(v3d->write_jobs,
&rsc->separate_stencil->base,
job);
struct pipe_surface *zsbuf = v3d->framebuffer.zsbuf;
struct v3d_job *job = v3d_get_job(v3d, cbufs, zsbuf);
+ if (v3d->framebuffer.samples >= 1)
+ job->msaa = true;
+
+ v3d_job_set_tile_buffer_size(job);
+
/* The dirty flags are tracking what's been updated while v3d->job has
* been bound, so set them all to ~0 when switching between jobs. We
* also need to reset all state at the start of rendering.
if (zsbuf) {
struct v3d_resource *rsc = v3d_resource(zsbuf->texture);
if (!rsc->writes)
- job->clear |= PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL;
+ job->clear |= PIPE_CLEAR_DEPTH;
+
+ if (rsc->separate_stencil)
+ rsc = rsc->separate_stencil;
+
+ if (!rsc->writes)
+ job->clear |= PIPE_CLEAR_STENCIL;
}
job->draw_tiles_x = DIV_ROUND_UP(v3d->framebuffer.width,
clif_dump_destroy(clif);
}
+static void
+v3d_read_and_accumulate_primitive_counters(struct v3d_context *v3d)
+{
+ assert(v3d->prim_counts);
+
+ perf_debug("stalling on TF counts readback\n");
+ struct v3d_resource *rsc = v3d_resource(v3d->prim_counts);
+ if (v3d_bo_wait(rsc->bo, PIPE_TIMEOUT_INFINITE, "prim-counts")) {
+ uint32_t *map = v3d_bo_map(rsc->bo) + v3d->prim_counts_offset;
+ v3d->tf_prims_generated += map[V3D_PRIM_COUNTS_TF_WRITTEN];
+ /* When we only have a vertex shader we determine the primitive
+ * count in the CPU so don't update it here again.
+ */
+ if (v3d->prog.gs)
+ v3d->prims_generated += map[V3D_PRIM_COUNTS_WRITTEN];
+ }
+}
+
/**
* Submits the job to the kernel and then reinitializes it.
*/
void
v3d_job_submit(struct v3d_context *v3d, struct v3d_job *job)
{
- MAYBE_UNUSED struct v3d_screen *screen = v3d->screen;
+ struct v3d_screen *screen = v3d->screen;
if (!job->needs_flush)
goto done;
- if (v3d->screen->devinfo.ver >= 41)
+ if (screen->devinfo.ver >= 41)
v3d41_emit_rcl(job);
else
v3d33_emit_rcl(job);
v3d33_bcl_epilogue(v3d, job);
}
+ /* While the RCL will implicitly depend on the last RCL to have
+ * finished, we also need to block on any previous TFU job we may have
+ * dispatched.
+ */
+ job->submit.in_sync_rcl = v3d->out_sync;
+
+ /* Update the sync object for the last rendering by our context. */
job->submit.out_sync = v3d->out_sync;
+
job->submit.bcl_end = job->bcl.bo->offset + cl_offset(&job->bcl);
job->submit.rcl_end = job->rcl.bo->offset + cl_offset(&job->rcl);
+ job->submit.flags = 0;
+ if (job->tmu_dirty_rcl && screen->has_cache_flush)
+ job->submit.flags |= DRM_V3D_SUBMIT_CL_FLUSH_CACHE;
+
/* On V3D 4.1, the tile alloc/state setup moved to register writes
* instead of binner packets.
*/
if (!(V3D_DEBUG & V3D_DEBUG_NORAST)) {
int ret;
-#ifndef USE_V3D_SIMULATOR
- ret = drmIoctl(v3d->fd, DRM_IOCTL_V3D_SUBMIT_CL, &job->submit);
-#else
- ret = v3d_simulator_flush(v3d, &job->submit, job);
-#endif
+ ret = v3d_ioctl(v3d->fd, DRM_IOCTL_V3D_SUBMIT_CL, &job->submit);
static bool warned = false;
if (ret && !warned) {
fprintf(stderr, "Draw call returned %s. "
"Expect corruption.\n", strerror(errno));
warned = true;
}
+
+ /* If we are submitting a job in the middle of transform
+ * feedback we need to read the primitive counts and accumulate
+ * them, otherwise they will be reset at the start of the next
+ * draw when we emit the Tile Binning Mode Configuration packet.
+ *
+ * If the job doesn't have any TF draw calls, then we know
+ * the primitive count must be zero and we can skip stalling
+ * for this. This also fixes a problem because it seems that
+ * in this scenario the counters are not reset with the Tile
+ * Binning Mode Configuration packet, which would translate
+ * to us reading an obsolete (possibly non-zero) value from
+ * the GPU counters.
+ */
+ if (v3d->streamout.num_targets && job->tf_draw_calls_queued > 0)
+ v3d_read_and_accumulate_primitive_counters(v3d);
}
done: