*/
#include <xf86drm.h>
+#include "vc4_cl_dump.h"
#include "vc4_context.h"
+#include "util/hash_table.h"
+
+static void
+remove_from_ht(struct hash_table *ht, void *key)
+{
+ struct hash_entry *entry = _mesa_hash_table_search(ht, key);
+ _mesa_hash_table_remove(ht, entry);
+}
+
+static void
+vc4_job_free(struct vc4_context *vc4, struct vc4_job *job)
+{
+ struct vc4_bo **referenced_bos = job->bo_pointers.base;
+ for (int i = 0; i < cl_offset(&job->bo_handles) / 4; i++) {
+ vc4_bo_unreference(&referenced_bos[i]);
+ }
+
+ remove_from_ht(vc4->jobs, &job->key);
+
+ if (job->color_write) {
+ remove_from_ht(vc4->write_jobs, job->color_write->texture);
+ pipe_surface_reference(&job->color_write, NULL);
+ }
+ if (job->msaa_color_write) {
+ remove_from_ht(vc4->write_jobs, job->msaa_color_write->texture);
+ pipe_surface_reference(&job->msaa_color_write, NULL);
+ }
+ if (job->zs_write) {
+ remove_from_ht(vc4->write_jobs, job->zs_write->texture);
+ pipe_surface_reference(&job->zs_write, NULL);
+ }
+ if (job->msaa_zs_write) {
+ remove_from_ht(vc4->write_jobs, job->msaa_zs_write->texture);
+ pipe_surface_reference(&job->msaa_zs_write, NULL);
+ }
+
+ pipe_surface_reference(&job->color_read, NULL);
+ pipe_surface_reference(&job->zs_read, NULL);
+
+ if (vc4->job == job)
+ vc4->job = NULL;
+
+ ralloc_free(job);
+}
+
+static struct vc4_job *
+vc4_job_create(struct vc4_context *vc4)
+{
+ struct vc4_job *job = rzalloc(vc4, struct vc4_job);
+
+ vc4_init_cl(job, &job->bcl);
+ vc4_init_cl(job, &job->shader_rec);
+ vc4_init_cl(job, &job->uniforms);
+ vc4_init_cl(job, &job->bo_handles);
+ vc4_init_cl(job, &job->bo_pointers);
+
+ job->draw_min_x = ~0;
+ job->draw_min_y = ~0;
+ job->draw_max_x = 0;
+ job->draw_max_y = 0;
+
+ job->last_gem_handle_hindex = ~0;
+
+ return job;
+}
void
-vc4_job_init(struct vc4_context *vc4)
+vc4_flush_jobs_writing_resource(struct vc4_context *vc4,
+ struct pipe_resource *prsc)
{
- vc4_init_cl(vc4, &vc4->bcl);
- vc4_init_cl(vc4, &vc4->shader_rec);
- vc4_init_cl(vc4, &vc4->uniforms);
- vc4_init_cl(vc4, &vc4->bo_handles);
- vc4_init_cl(vc4, &vc4->bo_pointers);
- vc4_job_reset(vc4);
+ struct hash_entry *entry = _mesa_hash_table_search(vc4->write_jobs,
+ prsc);
+ if (entry) {
+ struct vc4_job *job = entry->data;
+ vc4_job_submit(vc4, job);
+ }
}
void
-vc4_job_reset(struct vc4_context *vc4)
+vc4_flush_jobs_reading_resource(struct vc4_context *vc4,
+ struct pipe_resource *prsc)
{
- struct vc4_bo **referenced_bos = vc4->bo_pointers.base;
- for (int i = 0; i < cl_offset(&vc4->bo_handles) / 4; i++) {
- vc4_bo_unreference(&referenced_bos[i]);
+ struct vc4_resource *rsc = vc4_resource(prsc);
+
+ vc4_flush_jobs_writing_resource(vc4, prsc);
+
+ struct hash_entry *entry;
+ hash_table_foreach(vc4->jobs, entry) {
+ struct vc4_job *job = entry->data;
+
+ struct vc4_bo **referenced_bos = job->bo_pointers.base;
+ bool found = false;
+ for (int i = 0; i < cl_offset(&job->bo_handles) / 4; i++) {
+ if (referenced_bos[i] == rsc->bo) {
+ found = true;
+ break;
+ }
+ }
+ if (found) {
+ vc4_job_submit(vc4, job);
+ continue;
+ }
+
+ /* Also check for the Z/color buffers, since the references to
+ * those are only added immediately before submit.
+ */
+ if (job->color_read && !(job->cleared & PIPE_CLEAR_COLOR)) {
+ struct vc4_resource *ctex =
+ vc4_resource(job->color_read->texture);
+ if (ctex->bo == rsc->bo) {
+ vc4_job_submit(vc4, job);
+ continue;
+ }
+ }
+
+ if (job->zs_read && !(job->cleared &
+ (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) {
+ struct vc4_resource *ztex =
+ vc4_resource(job->zs_read->texture);
+ if (ztex->bo == rsc->bo) {
+ vc4_job_submit(vc4, job);
+ continue;
+ }
+ }
}
- vc4_reset_cl(&vc4->bcl);
- vc4_reset_cl(&vc4->shader_rec);
- vc4_reset_cl(&vc4->uniforms);
- vc4_reset_cl(&vc4->bo_handles);
- vc4_reset_cl(&vc4->bo_pointers);
- vc4->shader_rec_count = 0;
+}
+
+/**
+ * Returns a vc4_job struture for tracking V3D rendering to a particular FBO.
+ *
+ * If we've already started rendering to this FBO, then return old same job,
+ * otherwise make a new one. If we're beginning rendering to an FBO, make
+ * sure that any previous reads of the FBO (or writes to its color/Z surfaces)
+ * have been flushed.
+ */
+struct vc4_job *
+vc4_get_job(struct vc4_context *vc4,
+ struct pipe_surface *cbuf, struct pipe_surface *zsbuf)
+{
+ /* Return the existing job for this FBO if we have one */
+ struct vc4_job_key local_key = {.cbuf = cbuf, .zsbuf = zsbuf};
+ struct hash_entry *entry = _mesa_hash_table_search(vc4->jobs,
+ &local_key);
+ if (entry)
+ return entry->data;
+
+ /* Creating a new job. Make sure that any previous jobs reading or
+ * writing these buffers are flushed.
+ */
+ if (cbuf)
+ vc4_flush_jobs_reading_resource(vc4, cbuf->texture);
+ if (zsbuf)
+ vc4_flush_jobs_reading_resource(vc4, zsbuf->texture);
+
+ struct vc4_job *job = vc4_job_create(vc4);
+
+ if (cbuf) {
+ if (cbuf->texture->nr_samples > 1) {
+ job->msaa = true;
+ pipe_surface_reference(&job->msaa_color_write, cbuf);
+ } else {
+ pipe_surface_reference(&job->color_write, cbuf);
+ }
+ }
+
+ if (zsbuf) {
+ if (zsbuf->texture->nr_samples > 1) {
+ job->msaa = true;
+ pipe_surface_reference(&job->msaa_zs_write, zsbuf);
+ } else {
+ pipe_surface_reference(&job->zs_write, zsbuf);
+ }
+ }
+
+ if (job->msaa) {
+ job->tile_width = 32;
+ job->tile_height = 32;
+ } else {
+ job->tile_width = 64;
+ job->tile_height = 64;
+ }
+
+ if (cbuf)
+ _mesa_hash_table_insert(vc4->write_jobs, cbuf->texture, job);
+ if (zsbuf)
+ _mesa_hash_table_insert(vc4->write_jobs, zsbuf->texture, job);
+
+ job->key.cbuf = cbuf;
+ job->key.zsbuf = zsbuf;
+ _mesa_hash_table_insert(vc4->jobs, &job->key, job);
+
+ return job;
+}
+
+struct vc4_job *
+vc4_get_job_for_fbo(struct vc4_context *vc4)
+{
+ if (vc4->job)
+ return vc4->job;
- vc4->needs_flush = false;
- vc4->draw_calls_queued = 0;
+ struct pipe_surface *cbuf = vc4->framebuffer.cbufs[0];
+ struct pipe_surface *zsbuf = vc4->framebuffer.zsbuf;
+ struct vc4_job *job = vc4_get_job(vc4, cbuf, zsbuf);
- /* We have no hardware context saved between our draw calls, so we
- * need to flag the next draw as needing all state emitted. Emitting
- * all state at the start of our draws is also what ensures that we
- * return to the state we need after a previous tile has finished.
+ /* The dirty flags are tracking what's been updated while vc4->job has
+ * been bound, so set them all to ~0 when switching between jobs. We
+ * also need to reset all state at the start of rendering.
*/
vc4->dirty = ~0;
- vc4->resolve = 0;
- vc4->cleared = 0;
- vc4->draw_min_x = ~0;
- vc4->draw_min_y = ~0;
- vc4->draw_max_x = 0;
- vc4->draw_max_y = 0;
+ /* Set up the read surfaces in the job. If they aren't actually
+ * getting read (due to a clear starting the frame), job->cleared will
+ * mask out the read.
+ */
+ pipe_surface_reference(&job->color_read, cbuf);
+ pipe_surface_reference(&job->zs_read, zsbuf);
+
+ /* If we're binding to uninitialized buffers, no need to load their
+ * contents before drawing.
+ */
+ if (cbuf) {
+ struct vc4_resource *rsc = vc4_resource(cbuf->texture);
+ if (!rsc->writes)
+ job->cleared |= PIPE_CLEAR_COLOR0;
+ }
+
+ if (zsbuf) {
+ struct vc4_resource *rsc = vc4_resource(zsbuf->texture);
+ if (!rsc->writes)
+ job->cleared |= PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL;
+ }
+
+ job->draw_tiles_x = DIV_ROUND_UP(vc4->framebuffer.width,
+ job->tile_width);
+ job->draw_tiles_y = DIV_ROUND_UP(vc4->framebuffer.height,
+ job->tile_height);
+
+ /* Initialize the job with the raster order flags -- each draw will
+ * check that we haven't changed the flags, since that requires a
+ * flush.
+ */
+ if (vc4->rasterizer)
+ job->flags = vc4->rasterizer->tile_raster_order_flags;
+
+ vc4->job = job;
+
+ return job;
}
static void
-vc4_submit_setup_rcl_surface(struct vc4_context *vc4,
+vc4_submit_setup_rcl_surface(struct vc4_job *job,
struct drm_vc4_submit_rcl_surface *submit_surf,
struct pipe_surface *psurf,
bool is_depth, bool is_write)
{
struct vc4_surface *surf = vc4_surface(psurf);
- if (!surf) {
- submit_surf->hindex = ~0;
+ if (!surf)
return;
- }
struct vc4_resource *rsc = vc4_resource(psurf->texture);
- submit_surf->hindex = vc4_gem_hindex(vc4, rsc->bo);
+ submit_surf->hindex = vc4_gem_hindex(job, rsc->bo);
submit_surf->offset = surf->offset;
- if (is_depth) {
- submit_surf->bits =
- VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_ZS,
- VC4_LOADSTORE_TILE_BUFFER_BUFFER);
+ if (psurf->texture->nr_samples <= 1) {
+ if (is_depth) {
+ submit_surf->bits =
+ VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_ZS,
+ VC4_LOADSTORE_TILE_BUFFER_BUFFER);
+ } else {
+ submit_surf->bits =
+ VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_COLOR,
+ VC4_LOADSTORE_TILE_BUFFER_BUFFER) |
+ VC4_SET_FIELD(vc4_rt_format_is_565(psurf->format) ?
+ VC4_LOADSTORE_TILE_BUFFER_BGR565 :
+ VC4_LOADSTORE_TILE_BUFFER_RGBA8888,
+ VC4_LOADSTORE_TILE_BUFFER_FORMAT);
+ }
+ submit_surf->bits |=
+ VC4_SET_FIELD(surf->tiling,
+ VC4_LOADSTORE_TILE_BUFFER_TILING);
} else {
- submit_surf->bits =
- VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_COLOR,
- VC4_LOADSTORE_TILE_BUFFER_BUFFER) |
- VC4_SET_FIELD(vc4_rt_format_is_565(psurf->format) ?
- VC4_LOADSTORE_TILE_BUFFER_BGR565 :
- VC4_LOADSTORE_TILE_BUFFER_RGBA8888,
- VC4_LOADSTORE_TILE_BUFFER_FORMAT);
+ assert(!is_write);
+ submit_surf->flags |= VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES;
}
- submit_surf->bits |=
- VC4_SET_FIELD(surf->tiling, VC4_LOADSTORE_TILE_BUFFER_TILING);
if (is_write)
rsc->writes++;
}
static void
-vc4_submit_setup_rcl_render_config_surface(struct vc4_context *vc4,
+vc4_submit_setup_rcl_render_config_surface(struct vc4_job *job,
struct drm_vc4_submit_rcl_surface *submit_surf,
struct pipe_surface *psurf)
{
struct vc4_surface *surf = vc4_surface(psurf);
- if (!surf) {
- submit_surf->hindex = ~0;
+ if (!surf)
return;
- }
struct vc4_resource *rsc = vc4_resource(psurf->texture);
- submit_surf->hindex = vc4_gem_hindex(vc4, rsc->bo);
+ submit_surf->hindex = vc4_gem_hindex(job, rsc->bo);
submit_surf->offset = surf->offset;
- submit_surf->bits =
- VC4_SET_FIELD(vc4_rt_format_is_565(surf->base.format) ?
- VC4_RENDER_CONFIG_FORMAT_BGR565 :
- VC4_RENDER_CONFIG_FORMAT_RGBA8888,
- VC4_RENDER_CONFIG_FORMAT) |
- VC4_SET_FIELD(surf->tiling, VC4_RENDER_CONFIG_MEMORY_FORMAT);
+ if (psurf->texture->nr_samples <= 1) {
+ submit_surf->bits =
+ VC4_SET_FIELD(vc4_rt_format_is_565(surf->base.format) ?
+ VC4_RENDER_CONFIG_FORMAT_BGR565 :
+ VC4_RENDER_CONFIG_FORMAT_RGBA8888,
+ VC4_RENDER_CONFIG_FORMAT) |
+ VC4_SET_FIELD(surf->tiling,
+ VC4_RENDER_CONFIG_MEMORY_FORMAT);
+ }
rsc->writes++;
}
+static void
+vc4_submit_setup_rcl_msaa_surface(struct vc4_job *job,
+ struct drm_vc4_submit_rcl_surface *submit_surf,
+ struct pipe_surface *psurf)
+{
+ struct vc4_surface *surf = vc4_surface(psurf);
+
+ if (!surf)
+ return;
+
+ struct vc4_resource *rsc = vc4_resource(psurf->texture);
+ submit_surf->hindex = vc4_gem_hindex(job, rsc->bo);
+ submit_surf->offset = surf->offset;
+ submit_surf->bits = 0;
+ rsc->writes++;
+}
+
/**
* Submits the job to the kernel and then reinitializes it.
*/
void
-vc4_job_submit(struct vc4_context *vc4)
+vc4_job_submit(struct vc4_context *vc4, struct vc4_job *job)
{
+ if (!job->needs_flush)
+ goto done;
+
+ /* The RCL setup would choke if the draw bounds cause no drawing, so
+ * just drop the drawing if that's the case.
+ */
+ if (job->draw_max_x <= job->draw_min_x ||
+ job->draw_max_y <= job->draw_min_y) {
+ goto done;
+ }
+
if (vc4_debug & VC4_DEBUG_CL) {
fprintf(stderr, "BCL:\n");
- vc4_dump_cl(vc4->bcl.base, cl_offset(&vc4->bcl), false);
- }
-
- struct drm_vc4_submit_cl submit;
- memset(&submit, 0, sizeof(submit));
-
- cl_ensure_space(&vc4->bo_handles, 4 * sizeof(uint32_t));
- cl_ensure_space(&vc4->bo_pointers, 4 * sizeof(struct vc4_bo *));
-
- vc4_submit_setup_rcl_surface(vc4, &submit.color_read,
- vc4->color_read, false, false);
- vc4_submit_setup_rcl_render_config_surface(vc4, &submit.color_write,
- vc4->color_write);
- vc4_submit_setup_rcl_surface(vc4, &submit.zs_read,
- vc4->zs_read, true, false);
- vc4_submit_setup_rcl_surface(vc4, &submit.zs_write,
- vc4->zs_write, true, true);
- submit.msaa_color_write.hindex = ~0;
- submit.msaa_zs_write.hindex = ~0;
-
- submit.bo_handles = (uintptr_t)vc4->bo_handles.base;
- submit.bo_handle_count = cl_offset(&vc4->bo_handles) / 4;
- submit.bin_cl = (uintptr_t)vc4->bcl.base;
- submit.bin_cl_size = cl_offset(&vc4->bcl);
- submit.shader_rec = (uintptr_t)vc4->shader_rec.base;
- submit.shader_rec_size = cl_offset(&vc4->shader_rec);
- submit.shader_rec_count = vc4->shader_rec_count;
- submit.uniforms = (uintptr_t)vc4->uniforms.base;
- submit.uniforms_size = cl_offset(&vc4->uniforms);
-
- assert(vc4->draw_min_x != ~0 && vc4->draw_min_y != ~0);
- submit.min_x_tile = vc4->draw_min_x / 64;
- submit.min_y_tile = vc4->draw_min_y / 64;
- submit.max_x_tile = (vc4->draw_max_x - 1) / 64;
- submit.max_y_tile = (vc4->draw_max_y - 1) / 64;
- submit.width = vc4->draw_width;
- submit.height = vc4->draw_height;
- if (vc4->cleared) {
+ vc4_dump_cl(job->bcl.base, cl_offset(&job->bcl), false);
+ }
+
+ if (cl_offset(&job->bcl) > 0) {
+ /* Increment the semaphore indicating that binning is done and
+ * unblocking the render thread. Note that this doesn't act
+ * until the FLUSH completes.
+ */
+ cl_ensure_space(&job->bcl, 8);
+ cl_emit(&job->bcl, INCREMENT_SEMAPHORE, incr);
+ /* The FLUSH caps all of our bin lists with a
+ * VC4_PACKET_RETURN.
+ */
+ cl_emit(&job->bcl, FLUSH, flush);
+ }
+ struct drm_vc4_submit_cl submit = {
+ .color_read.hindex = ~0,
+ .zs_read.hindex = ~0,
+ .color_write.hindex = ~0,
+ .msaa_color_write.hindex = ~0,
+ .zs_write.hindex = ~0,
+ .msaa_zs_write.hindex = ~0,
+ };
+
+ cl_ensure_space(&job->bo_handles, 6 * sizeof(uint32_t));
+ cl_ensure_space(&job->bo_pointers, 6 * sizeof(struct vc4_bo *));
+
+ if (job->resolve & PIPE_CLEAR_COLOR) {
+ if (!(job->cleared & PIPE_CLEAR_COLOR)) {
+ vc4_submit_setup_rcl_surface(job, &submit.color_read,
+ job->color_read,
+ false, false);
+ }
+ vc4_submit_setup_rcl_render_config_surface(job,
+ &submit.color_write,
+ job->color_write);
+ vc4_submit_setup_rcl_msaa_surface(job,
+ &submit.msaa_color_write,
+ job->msaa_color_write);
+ }
+ if (job->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
+ if (!(job->cleared & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) {
+ vc4_submit_setup_rcl_surface(job, &submit.zs_read,
+ job->zs_read, true, false);
+ }
+ vc4_submit_setup_rcl_surface(job, &submit.zs_write,
+ job->zs_write, true, true);
+ vc4_submit_setup_rcl_msaa_surface(job, &submit.msaa_zs_write,
+ job->msaa_zs_write);
+ }
+
+ if (job->msaa) {
+ /* This bit controls how many pixels the general
+ * (i.e. subsampled) loads/stores are iterating over
+ * (multisample loads replicate out to the other samples).
+ */
+ submit.color_write.bits |= VC4_RENDER_CONFIG_MS_MODE_4X;
+ /* Controls whether color_write's
+ * VC4_PACKET_STORE_MS_TILE_BUFFER does 4x decimation
+ */
+ submit.color_write.bits |= VC4_RENDER_CONFIG_DECIMATE_MODE_4X;
+ }
+
+ submit.bo_handles = (uintptr_t)job->bo_handles.base;
+ submit.bo_handle_count = cl_offset(&job->bo_handles) / 4;
+ submit.bin_cl = (uintptr_t)job->bcl.base;
+ submit.bin_cl_size = cl_offset(&job->bcl);
+ submit.shader_rec = (uintptr_t)job->shader_rec.base;
+ submit.shader_rec_size = cl_offset(&job->shader_rec);
+ submit.shader_rec_count = job->shader_rec_count;
+ submit.uniforms = (uintptr_t)job->uniforms.base;
+ submit.uniforms_size = cl_offset(&job->uniforms);
+
+ assert(job->draw_min_x != ~0 && job->draw_min_y != ~0);
+ submit.min_x_tile = job->draw_min_x / job->tile_width;
+ submit.min_y_tile = job->draw_min_y / job->tile_height;
+ submit.max_x_tile = (job->draw_max_x - 1) / job->tile_width;
+ submit.max_y_tile = (job->draw_max_y - 1) / job->tile_height;
+ submit.width = job->draw_width;
+ submit.height = job->draw_height;
+ if (job->cleared) {
submit.flags |= VC4_SUBMIT_CL_USE_CLEAR_COLOR;
- submit.clear_color[0] = vc4->clear_color[0];
- submit.clear_color[1] = vc4->clear_color[1];
- submit.clear_z = vc4->clear_depth;
- submit.clear_s = vc4->clear_stencil;
+ submit.clear_color[0] = job->clear_color[0];
+ submit.clear_color[1] = job->clear_color[1];
+ submit.clear_z = job->clear_depth;
+ submit.clear_s = job->clear_stencil;
}
+ submit.flags |= job->flags;
if (!(vc4_debug & VC4_DEBUG_NORAST)) {
int ret;
#ifndef USE_VC4_SIMULATOR
ret = drmIoctl(vc4->fd, DRM_IOCTL_VC4_SUBMIT_CL, &submit);
#else
- ret = vc4_simulator_flush(vc4, &submit);
+ ret = vc4_simulator_flush(vc4, &submit, job);
#endif
- if (ret) {
- fprintf(stderr, "VC4 submit failed\n");
- abort();
+ static bool warned = false;
+ if (ret && !warned) {
+ fprintf(stderr, "Draw call returned %s. "
+ "Expect corruption.\n", strerror(errno));
+ warned = true;
+ } else if (!ret) {
+ vc4->last_emit_seqno = submit.seqno;
}
}
- vc4->last_emit_seqno = submit.seqno;
+ if (vc4->last_emit_seqno - vc4->screen->finished_seqno > 5) {
+ if (!vc4_wait_seqno(vc4->screen,
+ vc4->last_emit_seqno - 5,
+ PIPE_TIMEOUT_INFINITE,
+ "job throttling")) {
+ fprintf(stderr, "Job throttling failed\n");
+ }
+ }
if (vc4_debug & VC4_DEBUG_ALWAYS_SYNC) {
if (!vc4_wait_seqno(vc4->screen, vc4->last_emit_seqno,
}
}
- vc4_job_reset(vc4);
+done:
+ vc4_job_free(vc4, job);
+}
+
+static bool
+vc4_job_compare(const void *a, const void *b)
+{
+ return memcmp(a, b, sizeof(struct vc4_job_key)) == 0;
}
+
+static uint32_t
+vc4_job_hash(const void *key)
+{
+ return _mesa_hash_data(key, sizeof(struct vc4_job_key));
+}
+
+void
+vc4_job_init(struct vc4_context *vc4)
+{
+ vc4->jobs = _mesa_hash_table_create(vc4,
+ vc4_job_hash,
+ vc4_job_compare);
+ vc4->write_jobs = _mesa_hash_table_create(vc4,
+ _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+}
+