From 5ad0c88dbe3e5805a10d8f1fef9d0cf1bbecdd46 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Tue, 12 Mar 2019 14:51:22 -0700 Subject: [PATCH] iris: Replace buffer backing storage and rebind to update addresses. This implements PIPE_CAP_INVALIDATE_BUFFER and invalidate_resource(), as well as the PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE flag. When either of these happen, we swap out the backing storage of the buffer for a new idle BO, allowing us to write to it immediately without stalling or queueing a blit. On my Skylake GT4e at 1920x1080, this improves performance in games: ----------------------------------------------- | DiRT Rally | +25% (avg) | +17% (max) | | Bioshock Infinite | +22% (avg) | +11% (max) | | Shadow of Mordor | +27% (avg) | +83% (max) | ----------------------------------------------- --- src/gallium/drivers/iris/iris_context.h | 3 + src/gallium/drivers/iris/iris_resource.c | 47 ++++++++- src/gallium/drivers/iris/iris_screen.c | 1 + src/gallium/drivers/iris/iris_state.c | 125 +++++++++++++++++++++++ 4 files changed, 171 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/iris/iris_context.h b/src/gallium/drivers/iris/iris_context.h index 1b69b256947..ab70fc58718 100644 --- a/src/gallium/drivers/iris/iris_context.h +++ b/src/gallium/drivers/iris/iris_context.h @@ -355,6 +355,9 @@ struct iris_vtable { void (*upload_compute_state)(struct iris_context *ice, struct iris_batch *batch, const struct pipe_grid_info *grid); + void (*rebind_buffer)(struct iris_context *ice, + struct iris_resource *res, + uint64_t old_address); void (*load_register_reg32)(struct iris_batch *batch, uint32_t dst, uint32_t src); void (*load_register_reg64)(struct iris_batch *batch, uint32_t dst, diff --git a/src/gallium/drivers/iris/iris_resource.c b/src/gallium/drivers/iris/iris_resource.c index 0011439949e..293f71aa1f9 100644 --- a/src/gallium/drivers/iris/iris_resource.c +++ b/src/gallium/drivers/iris/iris_resource.c @@ -38,6 +38,7 @@ #include "util/u_cpu_detect.h" #include "util/u_inlines.h" #include "util/u_format.h" +#include "util/u_threaded_context.h" #include "util/u_transfer.h" #include "util/u_transfer_helper.h" #include "util/u_upload_mgr.h" @@ -877,6 +878,37 @@ iris_resource_get_handle(struct pipe_screen *pscreen, return false; } +static void +iris_invalidate_resource(struct pipe_context *ctx, + struct pipe_resource *resource) +{ + struct iris_screen *screen = (void *) ctx->screen; + struct iris_context *ice = (void *) ctx; + struct iris_resource *res = (void *) resource; + + if (resource->target != PIPE_BUFFER) + return; + + /* We can't reallocate memory we didn't allocate in the first place. */ + if (res->bo->userptr) + return; + + // XXX: We should support this. + if (res->bind_history & PIPE_BIND_STREAM_OUTPUT) + return; + + struct iris_bo *old_bo = res->bo; + struct iris_bo *new_bo = + iris_bo_alloc(screen->bufmgr, res->bo->name, resource->width0, + iris_memzone_for_address(old_bo->gtt_offset)); + if (!new_bo) + return; + + res->bo = new_bo; + ice->vtbl.rebind_buffer(ice, res, old_bo->gtt_offset); + iris_bo_unreference(old_bo); +} + static void iris_flush_staging_region(struct pipe_transfer *xfer, const struct pipe_box *flush_box) @@ -1280,11 +1312,15 @@ iris_transfer_map(struct pipe_context *ctx, struct iris_resource *res = (struct iris_resource *)resource; struct isl_surf *surf = &res->surf; - /* If we can discard the whole resource, we can also discard the - * subrange being accessed. - */ - if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) - usage |= PIPE_TRANSFER_DISCARD_RANGE; + if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) { + /* Replace the backing storage with a fresh buffer for non-async maps */ + if (!(usage & (PIPE_TRANSFER_UNSYNCHRONIZED | + TC_TRANSFER_MAP_NO_INVALIDATE))) + iris_invalidate_resource(ctx, resource); + + /* If we can discard the whole resource, we can discard the range. */ + usage |= PIPE_TRANSFER_DISCARD_RANGE; + } bool map_would_stall = false; @@ -1536,6 +1572,7 @@ void iris_init_resource_functions(struct pipe_context *ctx) { ctx->flush_resource = iris_flush_resource; + ctx->invalidate_resource = iris_invalidate_resource; ctx->transfer_map = u_transfer_helper_transfer_map; ctx->transfer_flush_region = u_transfer_helper_transfer_flush_region; ctx->transfer_unmap = u_transfer_helper_transfer_unmap; diff --git a/src/gallium/drivers/iris/iris_screen.c b/src/gallium/drivers/iris/iris_screen.c index 38cdbc1507d..1ede9c4335f 100644 --- a/src/gallium/drivers/iris/iris_screen.c +++ b/src/gallium/drivers/iris/iris_screen.c @@ -178,6 +178,7 @@ iris_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_DRAW_PARAMETERS: case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL: case PIPE_CAP_COMPUTE_SHADER_DERIVATIVES: + case PIPE_CAP_INVALIDATE_BUFFER: return true; case PIPE_CAP_TGSI_FS_FBFETCH: case PIPE_CAP_POST_DEPTH_COVERAGE: diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c index 073da79a28a..d6a8ba4fb47 100644 --- a/src/gallium/drivers/iris/iris_state.c +++ b/src/gallium/drivers/iris/iris_state.c @@ -5460,6 +5460,130 @@ iris_destroy_state(struct iris_context *ice) /* ------------------------------------------------------------------- */ +static void +iris_rebind_buffer(struct iris_context *ice, + struct iris_resource *res, + uint64_t old_address) +{ + struct pipe_context *ctx = &ice->ctx; + struct iris_screen *screen = (void *) ctx->screen; + struct iris_genx_state *genx = ice->state.genx; + + assert(res->base.target == PIPE_BUFFER); + + /* Buffers can't be framebuffer attachments, nor display related, + * and we don't have upstream Clover support. + */ + assert(!(res->bind_history & (PIPE_BIND_DEPTH_STENCIL | + PIPE_BIND_RENDER_TARGET | + PIPE_BIND_BLENDABLE | + PIPE_BIND_DISPLAY_TARGET | + PIPE_BIND_CURSOR | + PIPE_BIND_COMPUTE_RESOURCE | + PIPE_BIND_GLOBAL))); + + if (res->bind_history & PIPE_BIND_VERTEX_BUFFER) { + uint64_t bound_vbs = ice->state.bound_vertex_buffers; + while (bound_vbs) { + const int i = u_bit_scan64(&bound_vbs); + struct iris_vertex_buffer_state *state = &genx->vertex_buffers[i]; + + /* Update the CPU struct */ + STATIC_ASSERT(GENX(VERTEX_BUFFER_STATE_BufferStartingAddress_start) == 32); + STATIC_ASSERT(GENX(VERTEX_BUFFER_STATE_BufferStartingAddress_bits) == 64); + uint64_t *addr = (uint64_t *) &state->state[1]; + + if (*addr == old_address) { + *addr = res->bo->gtt_offset; + ice->state.dirty |= IRIS_DIRTY_VERTEX_BUFFERS; + } + } + } + + /* No need to handle these: + * - PIPE_BIND_INDEX_BUFFER (emitted for every indexed draw) + * - PIPE_BIND_COMMAND_ARGS_BUFFER (emitted for every indirect draw) + * - PIPE_BIND_QUERY_BUFFER (no persistent state references) + */ + + if (res->bind_history & PIPE_BIND_STREAM_OUTPUT) { + /* XXX: be careful about resetting vs appending... */ + assert(false); + } + + for (int s = MESA_SHADER_VERTEX; s < MESA_SHADER_STAGES; s++) { + struct iris_shader_state *shs = &ice->state.shaders[s]; + enum pipe_shader_type p_stage = stage_to_pipe(s); + + if (res->bind_history & PIPE_BIND_CONSTANT_BUFFER) { + /* Skip constant buffer 0, it's for regular uniforms, not UBOs */ + uint32_t bound_cbufs = shs->bound_cbufs & ~1u; + while (bound_cbufs) { + const int i = u_bit_scan(&bound_cbufs); + struct pipe_shader_buffer *cbuf = &shs->constbuf[i]; + struct iris_state_ref *surf_state = &shs->constbuf_surf_state[i]; + + if (res->bo == iris_resource_bo(cbuf->buffer)) { + upload_ubo_ssbo_surf_state(ice, cbuf, surf_state, false); + ice->state.dirty |= IRIS_DIRTY_CONSTANTS_VS << s; + } + } + } + + if (res->bind_history & PIPE_BIND_SHADER_BUFFER) { + uint32_t bound_ssbos = shs->bound_ssbos; + while (bound_ssbos) { + const int i = u_bit_scan(&bound_ssbos); + struct pipe_shader_buffer *ssbo = &shs->ssbo[i]; + + if (res->bo == iris_resource_bo(ssbo->buffer)) { + struct pipe_shader_buffer buf = { + .buffer = &res->base, + .buffer_offset = ssbo->buffer_offset, + .buffer_size = ssbo->buffer_size, + }; + iris_set_shader_buffers(ctx, p_stage, i, 1, &buf, + (shs->writable_ssbos >> i) & 1); + } + } + } + + if (res->bind_history & PIPE_BIND_SAMPLER_VIEW) { + uint32_t bound_sampler_views = shs->bound_sampler_views; + while (bound_sampler_views) { + const int i = u_bit_scan(&bound_sampler_views); + struct iris_sampler_view *isv = shs->textures[i]; + + if (res->bo == iris_resource_bo(isv->base.texture)) { + void *map = alloc_surface_states(ice->state.surface_uploader, + &isv->surface_state, + isv->res->aux.sampler_usages); + assert(map); + fill_buffer_surface_state(&screen->isl_dev, isv->res->bo, map, + isv->view.format, isv->view.swizzle, + isv->base.u.buf.offset, + isv->base.u.buf.size); + ice->state.dirty |= IRIS_DIRTY_BINDINGS_VS << s; + } + } + } + + if (res->bind_history & PIPE_BIND_SHADER_IMAGE) { + uint32_t bound_image_views = shs->bound_image_views; + while (bound_image_views) { + const int i = u_bit_scan(&bound_image_views); + struct iris_image_view *iv = &shs->image[i]; + + if (res->bo == iris_resource_bo(iv->base.resource)) { + iris_set_shader_images(ctx, p_stage, i, 1, &iv->base); + } + } + } + } +} + +/* ------------------------------------------------------------------- */ + static void iris_load_register_reg32(struct iris_batch *batch, uint32_t dst, uint32_t src) @@ -6075,6 +6199,7 @@ genX(init_state)(struct iris_context *ice) ice->vtbl.update_surface_base_address = iris_update_surface_base_address; ice->vtbl.upload_compute_state = iris_upload_compute_state; ice->vtbl.emit_raw_pipe_control = iris_emit_raw_pipe_control; + ice->vtbl.rebind_buffer = iris_rebind_buffer; ice->vtbl.load_register_reg32 = iris_load_register_reg32; ice->vtbl.load_register_reg64 = iris_load_register_reg64; ice->vtbl.load_register_imm32 = iris_load_register_imm32; -- 2.30.2