From 06a993dac2aa2c73435105d06dc8260aef7afc77 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Fri, 19 Oct 2018 01:14:38 -0700 Subject: [PATCH] iris: rewrite grid surface handling now we only upload a new grid when it's actually changed, which saves us from having to emit a new binding table every time it changes. this also moves a bunch of non-gen-specific stuff out of iris_state.c --- src/gallium/drivers/iris/iris_context.h | 7 +++ src/gallium/drivers/iris/iris_draw.c | 52 +++++++++++++++++- src/gallium/drivers/iris/iris_state.c | 71 ++++++------------------- 3 files changed, 73 insertions(+), 57 deletions(-) diff --git a/src/gallium/drivers/iris/iris_context.h b/src/gallium/drivers/iris/iris_context.h index cc28af26375..096a3d2ff51 100644 --- a/src/gallium/drivers/iris/iris_context.h +++ b/src/gallium/drivers/iris/iris_context.h @@ -412,6 +412,13 @@ struct iris_context { enum pipe_prim_type prim_mode:8; uint8_t vertices_per_patch; + /** The last compute grid size */ + uint32_t last_grid[3]; + /** Reference to the BO containing the compute grid size */ + struct iris_state_ref grid_size; + /** Reference to the SURFACE_STATE for the compute grid resource */ + struct iris_state_ref grid_surf_state; + /** Are depth writes enabled? (Depth buffer may or may not exist.) */ bool depth_writes_enabled; diff --git a/src/gallium/drivers/iris/iris_draw.c b/src/gallium/drivers/iris/iris_draw.c index 2bde382b49f..c2216a22bed 100644 --- a/src/gallium/drivers/iris/iris_draw.c +++ b/src/gallium/drivers/iris/iris_draw.c @@ -34,6 +34,7 @@ #include "pipe/p_screen.h" #include "util/u_inlines.h" #include "util/u_transfer.h" +#include "util/u_upload_mgr.h" #include "intel/compiler/brw_compiler.h" #include "iris_context.h" @@ -93,8 +94,53 @@ iris_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) iris_postdraw_update_resolve_tracking(ice, batch); } +static void +iris_update_grid_size_resource(struct iris_context *ice, + const struct pipe_grid_info *grid) +{ + const struct iris_screen *screen = (void *) ice->ctx.screen; + const struct isl_device *isl_dev = &screen->isl_dev; + struct iris_state_ref *grid_ref = &ice->state.grid_size; + struct iris_state_ref *state_ref = &ice->state.grid_surf_state; + + // XXX: if the shader doesn't actually care about the grid info, + // don't bother uploading the surface? + + if (grid->indirect) { + grid_ref->res = grid->indirect; + grid_ref->offset = grid->indirect_offset; + } else { + /* If the size is the same, we don't need to upload anything. */ + if (memcmp(ice->state.last_grid, grid->grid, sizeof(grid->grid)) == 0) + return; + + memcpy(ice->state.last_grid, grid->grid, sizeof(grid->grid)); + + u_upload_data(ice->state.dynamic_uploader, 0, sizeof(grid->grid), 4, + grid->grid, &grid_ref->offset, &grid_ref->res); + grid_ref->offset += + iris_bo_offset_from_base_address(iris_resource_bo(grid_ref->res)); + } + + void *surf_map = NULL; + u_upload_alloc(ice->state.surface_uploader, 0, isl_dev->ss.size, + isl_dev->ss.align, &state_ref->offset, &state_ref->res, + &surf_map); + state_ref->offset += + iris_bo_offset_from_base_address(iris_resource_bo(state_ref->res)); + isl_buffer_fill_state(&screen->isl_dev, surf_map, + .address = grid_ref->offset + + iris_resource_bo(grid_ref->res)->gtt_offset, + .size_B = sizeof(grid->grid), + .format = ISL_FORMAT_RAW, + .stride_B = 1, + .mocs = 4); // XXX: MOCS + + ice->state.dirty |= IRIS_DIRTY_BINDINGS_CS; +} + void -iris_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info *info) +iris_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info *grid) { struct iris_context *ice = (struct iris_context *) ctx; struct iris_batch *batch = &ice->compute_batch; @@ -111,9 +157,11 @@ iris_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info *info) // XXX: predraw resolves / cache flushing + iris_update_grid_size_resource(ice, grid); + iris_binder_reserve_compute(ice); ice->vtbl.update_surface_base_address(batch, &ice->state.binder); - ice->vtbl.upload_compute_state(ice, batch, info); + ice->vtbl.upload_compute_state(ice, batch, grid); // XXX: this is wrong. we need separate dirty tracking for compute/render ice->state.dirty = 0ull; diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c index 5f85600e70a..c11e4e3e87b 100644 --- a/src/gallium/drivers/iris/iris_state.c +++ b/src/gallium/drivers/iris/iris_state.c @@ -3361,8 +3361,7 @@ static void iris_populate_binding_table(struct iris_context *ice, struct iris_batch *batch, gl_shader_stage stage, - bool pin_only, - struct iris_state_ref *grid_size_surf) + bool pin_only) { const struct iris_binder *binder = &ice->state.binder; struct iris_compiled_shader *shader = ice->shaders.prog[stage]; @@ -3385,12 +3384,11 @@ iris_populate_binding_table(struct iris_context *ice, if (stage == MESA_SHADER_COMPUTE) { /* surface for gl_NumWorkGroups */ - assert(grid_size_surf || pin_only); - if (grid_size_surf) { - struct iris_bo *bo = iris_resource_bo(grid_size_surf->res); - iris_use_pinned_bo(batch, bo, false); - push_bt_entry(grid_size_surf->offset); - } + struct iris_state_ref *grid_data = &ice->state.grid_size; + struct iris_state_ref *grid_state = &ice->state.grid_surf_state; + iris_use_pinned_bo(batch, iris_resource_bo(grid_data->res), false); + iris_use_pinned_bo(batch, iris_resource_bo(grid_state->res), false); + push_bt_entry(grid_state->offset); } if (stage == MESA_SHADER_FRAGMENT) { @@ -3536,7 +3534,7 @@ iris_restore_render_saved_bos(struct iris_context *ice, for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) { if (clean & (IRIS_DIRTY_BINDINGS_VS << stage)) { /* Re-pin any buffers referred to by the binding table. */ - iris_populate_binding_table(ice, batch, stage, true, NULL); + iris_populate_binding_table(ice, batch, stage, true); } } @@ -3622,7 +3620,7 @@ iris_restore_compute_saved_bos(struct iris_context *ice, if (clean & IRIS_DIRTY_BINDINGS_CS) { /* Re-pin any buffers referred to by the binding table. */ - iris_populate_binding_table(ice, batch, stage, true, NULL); + iris_populate_binding_table(ice, batch, stage, true); } struct pipe_resource *sampler_res = shs->sampler_table.res; @@ -3871,7 +3869,7 @@ iris_upload_dirty_render_state(struct iris_context *ice, for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) { if (dirty & (IRIS_DIRTY_BINDINGS_VS << stage)) { - iris_populate_binding_table(ice, batch, stage, false, NULL); + iris_populate_binding_table(ice, batch, stage, false); } } @@ -4277,46 +4275,8 @@ iris_upload_compute_state(struct iris_context *ice, // XXX: L3 configuration not set up for SLM assert(prog_data->total_shared == 0); - struct pipe_resource *grid_size_res = NULL; - uint32_t grid_size_offset; - if (grid->indirect) { - grid_size_res = grid->indirect; - grid_size_offset = grid->indirect_offset; - } else { - uint32_t *grid_size_map = - stream_state(batch, ice->state.surface_uploader, &grid_size_res, 12, 4, - &grid_size_offset); - grid_size_map[0] = grid->grid[0]; - grid_size_map[1] = grid->grid[1]; - grid_size_map[2] = grid->grid[2]; - struct iris_bo *grid_size_bo = iris_resource_bo(grid_size_res); - grid_size_offset -= iris_bo_offset_from_base_address(grid_size_bo); - } - - struct iris_state_ref grid_size_surf; - memset(&grid_size_surf, 0, sizeof(grid_size_surf)); - void *grid_surf_state_map = - upload_state(ice->state.surface_uploader, - &grid_size_surf, - 4 * GENX(RENDER_SURFACE_STATE_length), 64); - assert(grid_surf_state_map); - struct iris_bo *grid_size_bo = iris_resource_bo(grid_size_res); - iris_use_pinned_bo(batch, grid_size_bo, false); - grid_size_surf.offset += - iris_bo_offset_from_base_address(iris_resource_bo(grid_size_surf.res)); - isl_buffer_fill_state(&screen->isl_dev, grid_surf_state_map, - .address = - grid_size_bo->gtt_offset + grid_size_offset, - .size_B = 12, - .format = ISL_FORMAT_RAW, - .stride_B = 1, - .mocs = MOCS_WB); - - // XXX: this will update the binding table on every dispatch, should - // XXX: check if the grid size actually changed (or indirect buf changed) - if (dirty & IRIS_DIRTY_BINDINGS_CS || grid_size_res) - iris_populate_binding_table(ice, batch, MESA_SHADER_COMPUTE, false, - &grid_size_surf); + if (dirty & IRIS_DIRTY_BINDINGS_CS) + iris_populate_binding_table(ice, batch, MESA_SHADER_COMPUTE, false); iris_use_optional_res(batch, shs->sampler_table.res, false); iris_use_pinned_bo(batch, iris_resource_bo(shader->assembly.res), false); @@ -4419,18 +4379,19 @@ iris_upload_compute_state(struct iris_context *ice, #define GPGPU_DISPATCHDIMZ 0x2508 if (grid->indirect) { - struct iris_bo *bo = iris_resource_bo(grid_size_res); + struct iris_state_ref *grid_size = &ice->state.grid_size; + struct iris_bo *bo = iris_resource_bo(grid_size->res); iris_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) { lrm.RegisterAddress = GPGPU_DISPATCHDIMX; - lrm.MemoryAddress = ro_bo(bo, grid_size_offset + 0); + lrm.MemoryAddress = ro_bo(bo, grid_size->offset + 0); } iris_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) { lrm.RegisterAddress = GPGPU_DISPATCHDIMY; - lrm.MemoryAddress = ro_bo(bo, grid_size_offset + 4); + lrm.MemoryAddress = ro_bo(bo, grid_size->offset + 4); } iris_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) { lrm.RegisterAddress = GPGPU_DISPATCHDIMZ; - lrm.MemoryAddress = ro_bo(bo, grid_size_offset + 8); + lrm.MemoryAddress = ro_bo(bo, grid_size->offset + 8); } } -- 2.30.2