From 9b9f973ca6d3cc1ec5be27857def00a83c032464 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Kristian=20H=C3=B8gsberg=20Kristensen?= Date: Fri, 19 Jun 2015 15:41:30 -0700 Subject: [PATCH] vk: Implement scratch buffers to make spilling work --- src/vulkan/compiler.cpp | 67 ++++++++++++++++++++++++----------------- src/vulkan/device.c | 19 ++++++++++-- src/vulkan/pipeline.c | 23 +++++++------- src/vulkan/private.h | 10 +++--- 4 files changed, 72 insertions(+), 47 deletions(-) diff --git a/src/vulkan/compiler.cpp b/src/vulkan/compiler.cpp index 5f5dacb5406..19a403aa1c1 100644 --- a/src/vulkan/compiler.cpp +++ b/src/vulkan/compiler.cpp @@ -248,13 +248,6 @@ really_do_vs_prog(struct brw_context *brw, ralloc_free(mem_ctx); - if (stage_prog_data->total_scratch > 0) - if (!anv_bo_init_new(&pipeline->vs_scratch_bo, - pipeline->device, - stage_prog_data->total_scratch)) - return false; - - return true; } @@ -535,12 +528,6 @@ really_do_wm_prog(struct brw_context *brw, ralloc_free(mem_ctx); - if (prog_data->base.total_scratch > 0) - if (!anv_bo_init_new(&pipeline->ps_scratch_bo, - pipeline->device, - prog_data->base.total_scratch)) - return false; - return true; } @@ -594,13 +581,6 @@ really_do_gs_prog(struct brw_context *brw, ralloc_free(output.mem_ctx); - if (output.prog_data.base.base.total_scratch) { - if (!anv_bo_init_new(&pipeline->gs_scratch_bo, - pipeline->device, - output.prog_data.base.base.total_scratch)) - return false; - } - return true; } @@ -684,6 +664,7 @@ fail_on_compile_error(int status, const char *msg) } struct anv_compiler { + struct anv_device *device; struct intel_screen *screen; struct brw_context *brw; struct gl_pipeline_object pipeline; @@ -710,6 +691,8 @@ anv_compiler_create(struct anv_device *device) if (compiler->brw == NULL) goto fail; + compiler->device = device; + compiler->brw->optionCache.info = NULL; compiler->brw->bufmgr = NULL; compiler->brw->gen = devinfo->gen; @@ -967,6 +950,28 @@ anv_compile_shader_spirv(struct anv_compiler *compiler, unreachable("SPIR-V is not supported yet!"); } +static void +add_compiled_stage(struct anv_pipeline *pipeline, uint32_t stage, + struct brw_stage_prog_data *prog_data) +{ + struct brw_device_info *devinfo = &pipeline->device->info; + uint32_t max_threads[] = { + [VK_SHADER_STAGE_VERTEX] = devinfo->max_vs_threads, + [VK_SHADER_STAGE_TESS_CONTROL] = 0, + [VK_SHADER_STAGE_TESS_EVALUATION] = 0, + [VK_SHADER_STAGE_GEOMETRY] = devinfo->max_gs_threads, + [VK_SHADER_STAGE_FRAGMENT] = devinfo->max_wm_threads, + [VK_SHADER_STAGE_COMPUTE] = devinfo->max_cs_threads, + }; + + pipeline->prog_data[stage] = prog_data; + pipeline->active_stages |= 1 << stage; + pipeline->scratch_start[stage] = pipeline->total_scratch; + pipeline->total_scratch = + ALIGN_U32(pipeline->total_scratch, 1024) + + prog_data->total_scratch * max_threads[stage]; +} + int anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline) { @@ -978,6 +983,7 @@ anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline) * of various prog_data pointers. Make them NULL by default. */ memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data)); + memset(pipeline->scratch_start, 0, sizeof(pipeline->scratch_start)); brw->use_rep_send = pipeline->use_repclear; brw->no_simd8 = pipeline->use_repclear; @@ -1024,6 +1030,7 @@ anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline) bool success; pipeline->active_stages = 0; + pipeline->total_scratch = 0; if (pipeline->shaders[VK_SHADER_STAGE_VERTEX]) { struct brw_vs_prog_key vs_key; @@ -1035,8 +1042,8 @@ anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline) success = really_do_vs_prog(brw, program, bvp, &vs_key, pipeline); fail_if(!success, "do_wm_prog failed\n"); - pipeline->prog_data[VK_SHADER_STAGE_VERTEX] = &pipeline->vs_prog_data.base.base; - pipeline->active_stages |= VK_SHADER_STAGE_VERTEX_BIT;; + add_compiled_stage(pipeline, VK_SHADER_STAGE_VERTEX, + &pipeline->vs_prog_data.base.base); } else { memset(&pipeline->vs_prog_data, 0, sizeof(pipeline->vs_prog_data)); pipeline->vs_simd8 = NO_KERNEL; @@ -1053,8 +1060,8 @@ anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline) success = really_do_gs_prog(brw, program, bgp, &gs_key, pipeline); fail_if(!success, "do_gs_prog failed\n"); - pipeline->active_stages |= VK_SHADER_STAGE_GEOMETRY_BIT; - pipeline->prog_data[VK_SHADER_STAGE_GEOMETRY] = &pipeline->gs_prog_data.base.base; + add_compiled_stage(pipeline, VK_SHADER_STAGE_GEOMETRY, + &pipeline->gs_prog_data.base.base); } else { pipeline->gs_vec4 = NO_KERNEL; } @@ -1069,8 +1076,8 @@ anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline) success = really_do_wm_prog(brw, program, bfp, &wm_key, pipeline); fail_if(!success, "do_wm_prog failed\n"); - pipeline->prog_data[VK_SHADER_STAGE_FRAGMENT] = &pipeline->wm_prog_data.base; - pipeline->active_stages |= VK_SHADER_STAGE_FRAGMENT_BIT; + add_compiled_stage(pipeline, VK_SHADER_STAGE_FRAGMENT, + &pipeline->wm_prog_data.base); } if (pipeline->shaders[VK_SHADER_STAGE_COMPUTE]) { @@ -1083,12 +1090,16 @@ anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline) success = brw_codegen_cs_prog(brw, program, bcp, &cs_key, pipeline); fail_if(!success, "brw_codegen_cs_prog failed\n"); - pipeline->prog_data[VK_SHADER_STAGE_COMPUTE] = &pipeline->cs_prog_data.base; - pipeline->active_stages |= VK_SHADER_STAGE_COMPUTE_BIT; + add_compiled_stage(pipeline, VK_SHADER_STAGE_COMPUTE, + &pipeline->cs_prog_data.base); } brw->ctx.Driver.DeleteShaderProgram(&brw->ctx, program); + struct anv_device *device = compiler->device; + while (device->scratch_block_pool.bo.size < pipeline->total_scratch) + anv_block_pool_alloc(&device->scratch_block_pool); + gen7_compute_urb_partition(pipeline); return 0; diff --git a/src/vulkan/device.c b/src/vulkan/device.c index e55e66fd74f..c9e66743def 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -404,6 +404,8 @@ VkResult anv_CreateDevice( anv_state_pool_init(&device->surface_state_pool, &device->surface_state_block_pool); + anv_block_pool_init(&device->scratch_block_pool, device, 0x10000); + device->info = *physicalDevice->info; device->compiler = anv_compiler_create(device); @@ -2387,9 +2389,14 @@ static void anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer) { struct anv_device *device = cmd_buffer->device; + struct anv_bo *scratch_bo = NULL; + + cmd_buffer->scratch_size = device->scratch_block_pool.size; + if (cmd_buffer->scratch_size > 0) + scratch_bo = &device->scratch_block_pool.bo; anv_batch_emit(&cmd_buffer->batch, GEN8_STATE_BASE_ADDRESS, - .GeneralStateBaseAddress = { NULL, 0 }, + .GeneralStateBaseAddress = { scratch_bo, 0 }, .GeneralStateMemoryObjectControlState = GEN8_MOCS, .GeneralStateBaseAddressModifyEnable = true, .GeneralStateBufferSize = 0xfffff, @@ -3213,8 +3220,16 @@ anv_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) } } - if (cmd_buffer->dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY) + if (cmd_buffer->dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY) { + /* If somebody compiled a pipeline after starting a command buffer the + * scratch bo may have grown since we started this cmd buffer (and + * emitted STATE_BASE_ADDRESS). If we're binding that pipeline now, + * reemit STATE_BASE_ADDRESS so that we use the bigger scratch bo. */ + if (cmd_buffer->scratch_size < pipeline->total_scratch) + anv_cmd_buffer_emit_state_base_address(cmd_buffer); + anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); + } if (cmd_buffer->descriptors_dirty) flush_descriptor_sets(cmd_buffer); diff --git a/src/vulkan/pipeline.c b/src/vulkan/pipeline.c index aa24ad43004..8be47c91205 100644 --- a/src/vulkan/pipeline.c +++ b/src/vulkan/pipeline.c @@ -573,8 +573,8 @@ anv_pipeline_create( .BindingTableEntryCount = 0, .ExpectedVertexCount = pipeline->gs_vertex_count, - .PerThreadScratchSpace = 0, - .ScratchSpaceBasePointer = 0, + .ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_GEOMETRY], + .PerThreadScratchSpace = ffs(gs_prog_data->base.base.total_scratch / 2048), .OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1, .OutputTopology = gs_prog_data->output_topology, @@ -628,11 +628,8 @@ anv_pipeline_create( .AccessesUAV = false, .SoftwareExceptionEnable = false, - /* FIXME: pointer needs to be assigned outside as it aliases - * PerThreadScratchSpace. - */ - .ScratchSpaceBasePointer = 0, - .PerThreadScratchSpace = 0, + .ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_VERTEX], + .PerThreadScratchSpace = ffs(vue_prog_data->base.total_scratch / 2048), .DispatchGRFStartRegisterForURBData = vue_prog_data->base.dispatch_grf_start_reg, @@ -676,8 +673,8 @@ anv_pipeline_create( .VectorMaskEnable = true, .SamplerCount = 1, - .ScratchSpaceBasePointer = 0, - .PerThreadScratchSpace = 0, + .ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_FRAGMENT], + .PerThreadScratchSpace = ffs(wm_prog_data->base.total_scratch / 2048), .MaximumNumberofThreadsPerPSD = 64 - 2, .PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ? @@ -757,11 +754,13 @@ VkResult anv_CreateComputePipeline( anv_compiler_run(device->compiler, pipeline); + const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data; + anv_batch_emit(&pipeline->batch, GEN8_MEDIA_VFE_STATE, - .ScratchSpaceBasePointer = 0, /* FIXME: Scratch bo, this should be a reloc? */ - .StackSize = 0, - .PerThreadScratchSpace = 0, + .ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_FRAGMENT], + .PerThreadScratchSpace = ffs(cs_prog_data->base.total_scratch / 2048), .ScratchSpaceBasePointerHigh = 0, + .StackSize = 0, .MaximumNumberofThreads = device->info.max_cs_threads - 1, .NumberofURBEntries = 2, diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 08dea1526e8..abc0cfb8ec2 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -384,6 +384,8 @@ struct anv_device { struct anv_queue queue; + struct anv_block_pool scratch_block_pool; + struct anv_compiler * compiler; struct anv_aub_writer * aub_writer; pthread_mutex_t mutex; @@ -655,6 +657,7 @@ struct anv_cmd_buffer { uint32_t dirty; uint32_t compute_dirty; uint32_t descriptors_dirty; + uint32_t scratch_size; struct anv_pipeline * pipeline; struct anv_pipeline * compute_pipeline; struct anv_framebuffer * framebuffer; @@ -696,6 +699,8 @@ struct anv_pipeline { struct brw_gs_prog_data gs_prog_data; struct brw_cs_prog_data cs_prog_data; struct brw_stage_prog_data * prog_data[VK_NUM_SHADER_STAGE]; + uint32_t scratch_start[VK_NUM_SHADER_STAGE]; + uint32_t total_scratch; struct { uint32_t vs_start; uint32_t vs_size; @@ -705,11 +710,6 @@ struct anv_pipeline { uint32_t nr_gs_entries; } urb; - struct anv_bo vs_scratch_bo; - struct anv_bo ps_scratch_bo; - struct anv_bo gs_scratch_bo; - struct anv_bo cs_scratch_bo; - uint32_t active_stages; struct anv_state_stream program_stream; struct anv_state blend_state; -- 2.30.2