X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fvc4%2Fvc4_job.c;h=5d18cb9bb8261ea72f103c109985b18b34c1050e;hb=09c1c13c4442148e45a4aeac3425382bbe90e8cd;hp=9ad79c2ea108062933d2f6648c42cada6e53961f;hpb=7d7fbcdf4e1683d1aef19c7ee08cc222d8279672;p=mesa.git diff --git a/src/gallium/drivers/vc4/vc4_job.c b/src/gallium/drivers/vc4/vc4_job.c index 9ad79c2ea10..5d18cb9bb82 100644 --- a/src/gallium/drivers/vc4/vc4_job.c +++ b/src/gallium/drivers/vc4/vc4_job.c @@ -27,165 +27,450 @@ */ #include +#include "vc4_cl_dump.h" #include "vc4_context.h" +#include "util/hash_table.h" + +static void +remove_from_ht(struct hash_table *ht, void *key) +{ + struct hash_entry *entry = _mesa_hash_table_search(ht, key); + _mesa_hash_table_remove(ht, entry); +} + +static void +vc4_job_free(struct vc4_context *vc4, struct vc4_job *job) +{ + struct vc4_bo **referenced_bos = job->bo_pointers.base; + for (int i = 0; i < cl_offset(&job->bo_handles) / 4; i++) { + vc4_bo_unreference(&referenced_bos[i]); + } + + remove_from_ht(vc4->jobs, &job->key); + + if (job->color_write) { + remove_from_ht(vc4->write_jobs, job->color_write->texture); + pipe_surface_reference(&job->color_write, NULL); + } + if (job->msaa_color_write) { + remove_from_ht(vc4->write_jobs, job->msaa_color_write->texture); + pipe_surface_reference(&job->msaa_color_write, NULL); + } + if (job->zs_write) { + remove_from_ht(vc4->write_jobs, job->zs_write->texture); + pipe_surface_reference(&job->zs_write, NULL); + } + if (job->msaa_zs_write) { + remove_from_ht(vc4->write_jobs, job->msaa_zs_write->texture); + pipe_surface_reference(&job->msaa_zs_write, NULL); + } + + pipe_surface_reference(&job->color_read, NULL); + pipe_surface_reference(&job->zs_read, NULL); + + if (vc4->job == job) + vc4->job = NULL; + + ralloc_free(job); +} + +static struct vc4_job * +vc4_job_create(struct vc4_context *vc4) +{ + struct vc4_job *job = rzalloc(vc4, struct vc4_job); + + vc4_init_cl(job, &job->bcl); + vc4_init_cl(job, &job->shader_rec); + vc4_init_cl(job, &job->uniforms); + vc4_init_cl(job, &job->bo_handles); + vc4_init_cl(job, &job->bo_pointers); + + job->draw_min_x = ~0; + job->draw_min_y = ~0; + job->draw_max_x = 0; + job->draw_max_y = 0; + + job->last_gem_handle_hindex = ~0; + + return job; +} void -vc4_job_init(struct vc4_context *vc4) +vc4_flush_jobs_writing_resource(struct vc4_context *vc4, + struct pipe_resource *prsc) { - vc4_init_cl(vc4, &vc4->bcl); - vc4_init_cl(vc4, &vc4->shader_rec); - vc4_init_cl(vc4, &vc4->uniforms); - vc4_init_cl(vc4, &vc4->bo_handles); - vc4_init_cl(vc4, &vc4->bo_pointers); - vc4_job_reset(vc4); + struct hash_entry *entry = _mesa_hash_table_search(vc4->write_jobs, + prsc); + if (entry) { + struct vc4_job *job = entry->data; + vc4_job_submit(vc4, job); + } } void -vc4_job_reset(struct vc4_context *vc4) +vc4_flush_jobs_reading_resource(struct vc4_context *vc4, + struct pipe_resource *prsc) { - struct vc4_bo **referenced_bos = vc4->bo_pointers.base; - for (int i = 0; i < cl_offset(&vc4->bo_handles) / 4; i++) { - vc4_bo_unreference(&referenced_bos[i]); + struct vc4_resource *rsc = vc4_resource(prsc); + + vc4_flush_jobs_writing_resource(vc4, prsc); + + struct hash_entry *entry; + hash_table_foreach(vc4->jobs, entry) { + struct vc4_job *job = entry->data; + + struct vc4_bo **referenced_bos = job->bo_pointers.base; + bool found = false; + for (int i = 0; i < cl_offset(&job->bo_handles) / 4; i++) { + if (referenced_bos[i] == rsc->bo) { + found = true; + break; + } + } + if (found) { + vc4_job_submit(vc4, job); + continue; + } + + /* Also check for the Z/color buffers, since the references to + * those are only added immediately before submit. + */ + if (job->color_read && !(job->cleared & PIPE_CLEAR_COLOR)) { + struct vc4_resource *ctex = + vc4_resource(job->color_read->texture); + if (ctex->bo == rsc->bo) { + vc4_job_submit(vc4, job); + continue; + } + } + + if (job->zs_read && !(job->cleared & + (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) { + struct vc4_resource *ztex = + vc4_resource(job->zs_read->texture); + if (ztex->bo == rsc->bo) { + vc4_job_submit(vc4, job); + continue; + } + } } - vc4_reset_cl(&vc4->bcl); - vc4_reset_cl(&vc4->shader_rec); - vc4_reset_cl(&vc4->uniforms); - vc4_reset_cl(&vc4->bo_handles); - vc4_reset_cl(&vc4->bo_pointers); - vc4->shader_rec_count = 0; +} + +/** + * Returns a vc4_job struture for tracking V3D rendering to a particular FBO. + * + * If we've already started rendering to this FBO, then return old same job, + * otherwise make a new one. If we're beginning rendering to an FBO, make + * sure that any previous reads of the FBO (or writes to its color/Z surfaces) + * have been flushed. + */ +struct vc4_job * +vc4_get_job(struct vc4_context *vc4, + struct pipe_surface *cbuf, struct pipe_surface *zsbuf) +{ + /* Return the existing job for this FBO if we have one */ + struct vc4_job_key local_key = {.cbuf = cbuf, .zsbuf = zsbuf}; + struct hash_entry *entry = _mesa_hash_table_search(vc4->jobs, + &local_key); + if (entry) + return entry->data; + + /* Creating a new job. Make sure that any previous jobs reading or + * writing these buffers are flushed. + */ + if (cbuf) + vc4_flush_jobs_reading_resource(vc4, cbuf->texture); + if (zsbuf) + vc4_flush_jobs_reading_resource(vc4, zsbuf->texture); + + struct vc4_job *job = vc4_job_create(vc4); + + if (cbuf) { + if (cbuf->texture->nr_samples > 1) { + job->msaa = true; + pipe_surface_reference(&job->msaa_color_write, cbuf); + } else { + pipe_surface_reference(&job->color_write, cbuf); + } + } + + if (zsbuf) { + if (zsbuf->texture->nr_samples > 1) { + job->msaa = true; + pipe_surface_reference(&job->msaa_zs_write, zsbuf); + } else { + pipe_surface_reference(&job->zs_write, zsbuf); + } + } + + if (job->msaa) { + job->tile_width = 32; + job->tile_height = 32; + } else { + job->tile_width = 64; + job->tile_height = 64; + } + + if (cbuf) + _mesa_hash_table_insert(vc4->write_jobs, cbuf->texture, job); + if (zsbuf) + _mesa_hash_table_insert(vc4->write_jobs, zsbuf->texture, job); + + job->key.cbuf = cbuf; + job->key.zsbuf = zsbuf; + _mesa_hash_table_insert(vc4->jobs, &job->key, job); + + return job; +} + +struct vc4_job * +vc4_get_job_for_fbo(struct vc4_context *vc4) +{ + if (vc4->job) + return vc4->job; - vc4->needs_flush = false; - vc4->draw_calls_queued = 0; + struct pipe_surface *cbuf = vc4->framebuffer.cbufs[0]; + struct pipe_surface *zsbuf = vc4->framebuffer.zsbuf; + struct vc4_job *job = vc4_get_job(vc4, cbuf, zsbuf); - /* We have no hardware context saved between our draw calls, so we - * need to flag the next draw as needing all state emitted. Emitting - * all state at the start of our draws is also what ensures that we - * return to the state we need after a previous tile has finished. + /* The dirty flags are tracking what's been updated while vc4->job has + * been bound, so set them all to ~0 when switching between jobs. We + * also need to reset all state at the start of rendering. */ vc4->dirty = ~0; - vc4->resolve = 0; - vc4->cleared = 0; - vc4->draw_min_x = ~0; - vc4->draw_min_y = ~0; - vc4->draw_max_x = 0; - vc4->draw_max_y = 0; + /* Set up the read surfaces in the job. If they aren't actually + * getting read (due to a clear starting the frame), job->cleared will + * mask out the read. + */ + pipe_surface_reference(&job->color_read, cbuf); + pipe_surface_reference(&job->zs_read, zsbuf); + + /* If we're binding to uninitialized buffers, no need to load their + * contents before drawing. + */ + if (cbuf) { + struct vc4_resource *rsc = vc4_resource(cbuf->texture); + if (!rsc->writes) + job->cleared |= PIPE_CLEAR_COLOR0; + } + + if (zsbuf) { + struct vc4_resource *rsc = vc4_resource(zsbuf->texture); + if (!rsc->writes) + job->cleared |= PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL; + } + + job->draw_tiles_x = DIV_ROUND_UP(vc4->framebuffer.width, + job->tile_width); + job->draw_tiles_y = DIV_ROUND_UP(vc4->framebuffer.height, + job->tile_height); + + /* Initialize the job with the raster order flags -- each draw will + * check that we haven't changed the flags, since that requires a + * flush. + */ + if (vc4->rasterizer) + job->flags = vc4->rasterizer->tile_raster_order_flags; + + vc4->job = job; + + return job; } static void -vc4_submit_setup_rcl_surface(struct vc4_context *vc4, +vc4_submit_setup_rcl_surface(struct vc4_job *job, struct drm_vc4_submit_rcl_surface *submit_surf, struct pipe_surface *psurf, bool is_depth, bool is_write) { struct vc4_surface *surf = vc4_surface(psurf); - if (!surf) { - submit_surf->hindex = ~0; + if (!surf) return; - } struct vc4_resource *rsc = vc4_resource(psurf->texture); - submit_surf->hindex = vc4_gem_hindex(vc4, rsc->bo); + submit_surf->hindex = vc4_gem_hindex(job, rsc->bo); submit_surf->offset = surf->offset; - if (is_depth) { - submit_surf->bits = - VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_ZS, - VC4_LOADSTORE_TILE_BUFFER_BUFFER); + if (psurf->texture->nr_samples <= 1) { + if (is_depth) { + submit_surf->bits = + VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_ZS, + VC4_LOADSTORE_TILE_BUFFER_BUFFER); + } else { + submit_surf->bits = + VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_COLOR, + VC4_LOADSTORE_TILE_BUFFER_BUFFER) | + VC4_SET_FIELD(vc4_rt_format_is_565(psurf->format) ? + VC4_LOADSTORE_TILE_BUFFER_BGR565 : + VC4_LOADSTORE_TILE_BUFFER_RGBA8888, + VC4_LOADSTORE_TILE_BUFFER_FORMAT); + } + submit_surf->bits |= + VC4_SET_FIELD(surf->tiling, + VC4_LOADSTORE_TILE_BUFFER_TILING); } else { - submit_surf->bits = - VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_COLOR, - VC4_LOADSTORE_TILE_BUFFER_BUFFER) | - VC4_SET_FIELD(vc4_rt_format_is_565(psurf->format) ? - VC4_LOADSTORE_TILE_BUFFER_BGR565 : - VC4_LOADSTORE_TILE_BUFFER_RGBA8888, - VC4_LOADSTORE_TILE_BUFFER_FORMAT); + assert(!is_write); + submit_surf->flags |= VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES; } - submit_surf->bits |= - VC4_SET_FIELD(surf->tiling, VC4_LOADSTORE_TILE_BUFFER_TILING); if (is_write) rsc->writes++; } static void -vc4_submit_setup_ms_rcl_surface(struct vc4_context *vc4, - struct drm_vc4_submit_rcl_surface *submit_surf, - struct pipe_surface *psurf) +vc4_submit_setup_rcl_render_config_surface(struct vc4_job *job, + struct drm_vc4_submit_rcl_surface *submit_surf, + struct pipe_surface *psurf) { struct vc4_surface *surf = vc4_surface(psurf); - if (!surf) { - submit_surf->hindex = ~0; + if (!surf) return; - } struct vc4_resource *rsc = vc4_resource(psurf->texture); - submit_surf->hindex = vc4_gem_hindex(vc4, rsc->bo); + submit_surf->hindex = vc4_gem_hindex(job, rsc->bo); submit_surf->offset = surf->offset; - submit_surf->bits = - VC4_SET_FIELD(vc4_rt_format_is_565(surf->base.format) ? - VC4_RENDER_CONFIG_FORMAT_BGR565 : - VC4_RENDER_CONFIG_FORMAT_RGBA8888, - VC4_RENDER_CONFIG_FORMAT) | - VC4_SET_FIELD(surf->tiling, VC4_RENDER_CONFIG_MEMORY_FORMAT); + if (psurf->texture->nr_samples <= 1) { + submit_surf->bits = + VC4_SET_FIELD(vc4_rt_format_is_565(surf->base.format) ? + VC4_RENDER_CONFIG_FORMAT_BGR565 : + VC4_RENDER_CONFIG_FORMAT_RGBA8888, + VC4_RENDER_CONFIG_FORMAT) | + VC4_SET_FIELD(surf->tiling, + VC4_RENDER_CONFIG_MEMORY_FORMAT); + } rsc->writes++; } +static void +vc4_submit_setup_rcl_msaa_surface(struct vc4_job *job, + struct drm_vc4_submit_rcl_surface *submit_surf, + struct pipe_surface *psurf) +{ + struct vc4_surface *surf = vc4_surface(psurf); + + if (!surf) + return; + + struct vc4_resource *rsc = vc4_resource(psurf->texture); + submit_surf->hindex = vc4_gem_hindex(job, rsc->bo); + submit_surf->offset = surf->offset; + submit_surf->bits = 0; + rsc->writes++; +} + /** * Submits the job to the kernel and then reinitializes it. */ void -vc4_job_submit(struct vc4_context *vc4) +vc4_job_submit(struct vc4_context *vc4, struct vc4_job *job) { + if (!job->needs_flush) + goto done; + + /* The RCL setup would choke if the draw bounds cause no drawing, so + * just drop the drawing if that's the case. + */ + if (job->draw_max_x <= job->draw_min_x || + job->draw_max_y <= job->draw_min_y) { + goto done; + } + if (vc4_debug & VC4_DEBUG_CL) { fprintf(stderr, "BCL:\n"); - vc4_dump_cl(vc4->bcl.base, cl_offset(&vc4->bcl), false); - } - - struct drm_vc4_submit_cl submit; - memset(&submit, 0, sizeof(submit)); - - cl_ensure_space(&vc4->bo_handles, 4 * sizeof(uint32_t)); - cl_ensure_space(&vc4->bo_pointers, 4 * sizeof(struct vc4_bo *)); - - vc4_submit_setup_rcl_surface(vc4, &submit.color_read, - vc4->color_read, false, false); - vc4_submit_setup_ms_rcl_surface(vc4, &submit.color_ms_write, - vc4->color_write); - vc4_submit_setup_rcl_surface(vc4, &submit.zs_read, - vc4->zs_read, true, false); - vc4_submit_setup_rcl_surface(vc4, &submit.zs_write, - vc4->zs_write, true, true); - - submit.bo_handles = (uintptr_t)vc4->bo_handles.base; - submit.bo_handle_count = cl_offset(&vc4->bo_handles) / 4; - submit.bin_cl = (uintptr_t)vc4->bcl.base; - submit.bin_cl_size = cl_offset(&vc4->bcl); - submit.shader_rec = (uintptr_t)vc4->shader_rec.base; - submit.shader_rec_size = cl_offset(&vc4->shader_rec); - submit.shader_rec_count = vc4->shader_rec_count; - submit.uniforms = (uintptr_t)vc4->uniforms.base; - submit.uniforms_size = cl_offset(&vc4->uniforms); - - assert(vc4->draw_min_x != ~0 && vc4->draw_min_y != ~0); - submit.min_x_tile = vc4->draw_min_x / 64; - submit.min_y_tile = vc4->draw_min_y / 64; - submit.max_x_tile = (vc4->draw_max_x - 1) / 64; - submit.max_y_tile = (vc4->draw_max_y - 1) / 64; - submit.width = vc4->draw_width; - submit.height = vc4->draw_height; - if (vc4->cleared) { + vc4_dump_cl(job->bcl.base, cl_offset(&job->bcl), false); + } + + if (cl_offset(&job->bcl) > 0) { + /* Increment the semaphore indicating that binning is done and + * unblocking the render thread. Note that this doesn't act + * until the FLUSH completes. + */ + cl_ensure_space(&job->bcl, 8); + cl_emit(&job->bcl, INCREMENT_SEMAPHORE, incr); + /* The FLUSH caps all of our bin lists with a + * VC4_PACKET_RETURN. + */ + cl_emit(&job->bcl, FLUSH, flush); + } + struct drm_vc4_submit_cl submit = { + .color_read.hindex = ~0, + .zs_read.hindex = ~0, + .color_write.hindex = ~0, + .msaa_color_write.hindex = ~0, + .zs_write.hindex = ~0, + .msaa_zs_write.hindex = ~0, + }; + + cl_ensure_space(&job->bo_handles, 6 * sizeof(uint32_t)); + cl_ensure_space(&job->bo_pointers, 6 * sizeof(struct vc4_bo *)); + + if (job->resolve & PIPE_CLEAR_COLOR) { + if (!(job->cleared & PIPE_CLEAR_COLOR)) { + vc4_submit_setup_rcl_surface(job, &submit.color_read, + job->color_read, + false, false); + } + vc4_submit_setup_rcl_render_config_surface(job, + &submit.color_write, + job->color_write); + vc4_submit_setup_rcl_msaa_surface(job, + &submit.msaa_color_write, + job->msaa_color_write); + } + if (job->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) { + if (!(job->cleared & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) { + vc4_submit_setup_rcl_surface(job, &submit.zs_read, + job->zs_read, true, false); + } + vc4_submit_setup_rcl_surface(job, &submit.zs_write, + job->zs_write, true, true); + vc4_submit_setup_rcl_msaa_surface(job, &submit.msaa_zs_write, + job->msaa_zs_write); + } + + if (job->msaa) { + /* This bit controls how many pixels the general + * (i.e. subsampled) loads/stores are iterating over + * (multisample loads replicate out to the other samples). + */ + submit.color_write.bits |= VC4_RENDER_CONFIG_MS_MODE_4X; + /* Controls whether color_write's + * VC4_PACKET_STORE_MS_TILE_BUFFER does 4x decimation + */ + submit.color_write.bits |= VC4_RENDER_CONFIG_DECIMATE_MODE_4X; + } + + submit.bo_handles = (uintptr_t)job->bo_handles.base; + submit.bo_handle_count = cl_offset(&job->bo_handles) / 4; + submit.bin_cl = (uintptr_t)job->bcl.base; + submit.bin_cl_size = cl_offset(&job->bcl); + submit.shader_rec = (uintptr_t)job->shader_rec.base; + submit.shader_rec_size = cl_offset(&job->shader_rec); + submit.shader_rec_count = job->shader_rec_count; + submit.uniforms = (uintptr_t)job->uniforms.base; + submit.uniforms_size = cl_offset(&job->uniforms); + + assert(job->draw_min_x != ~0 && job->draw_min_y != ~0); + submit.min_x_tile = job->draw_min_x / job->tile_width; + submit.min_y_tile = job->draw_min_y / job->tile_height; + submit.max_x_tile = (job->draw_max_x - 1) / job->tile_width; + submit.max_y_tile = (job->draw_max_y - 1) / job->tile_height; + submit.width = job->draw_width; + submit.height = job->draw_height; + if (job->cleared) { submit.flags |= VC4_SUBMIT_CL_USE_CLEAR_COLOR; - submit.clear_color[0] = vc4->clear_color[0]; - submit.clear_color[1] = vc4->clear_color[1]; - submit.clear_z = vc4->clear_depth; - submit.clear_s = vc4->clear_stencil; + submit.clear_color[0] = job->clear_color[0]; + submit.clear_color[1] = job->clear_color[1]; + submit.clear_z = job->clear_depth; + submit.clear_s = job->clear_stencil; } + submit.flags |= job->flags; if (!(vc4_debug & VC4_DEBUG_NORAST)) { int ret; @@ -193,15 +478,26 @@ vc4_job_submit(struct vc4_context *vc4) #ifndef USE_VC4_SIMULATOR ret = drmIoctl(vc4->fd, DRM_IOCTL_VC4_SUBMIT_CL, &submit); #else - ret = vc4_simulator_flush(vc4, &submit); + ret = vc4_simulator_flush(vc4, &submit, job); #endif - if (ret) { - fprintf(stderr, "VC4 submit failed\n"); - abort(); + static bool warned = false; + if (ret && !warned) { + fprintf(stderr, "Draw call returned %s. " + "Expect corruption.\n", strerror(errno)); + warned = true; + } else if (!ret) { + vc4->last_emit_seqno = submit.seqno; } } - vc4->last_emit_seqno = submit.seqno; + if (vc4->last_emit_seqno - vc4->screen->finished_seqno > 5) { + if (!vc4_wait_seqno(vc4->screen, + vc4->last_emit_seqno - 5, + PIPE_TIMEOUT_INFINITE, + "job throttling")) { + fprintf(stderr, "Job throttling failed\n"); + } + } if (vc4_debug & VC4_DEBUG_ALWAYS_SYNC) { if (!vc4_wait_seqno(vc4->screen, vc4->last_emit_seqno, @@ -211,5 +507,30 @@ vc4_job_submit(struct vc4_context *vc4) } } - vc4_job_reset(vc4); +done: + vc4_job_free(vc4, job); +} + +static bool +vc4_job_compare(const void *a, const void *b) +{ + return memcmp(a, b, sizeof(struct vc4_job_key)) == 0; } + +static uint32_t +vc4_job_hash(const void *key) +{ + return _mesa_hash_data(key, sizeof(struct vc4_job_key)); +} + +void +vc4_job_init(struct vc4_context *vc4) +{ + vc4->jobs = _mesa_hash_table_create(vc4, + vc4_job_hash, + vc4_job_compare); + vc4->write_jobs = _mesa_hash_table_create(vc4, + _mesa_hash_pointer, + _mesa_key_pointer_equal); +} +