From: Bas Nieuwenhuizen Date: Sun, 12 Mar 2017 21:43:51 +0000 (+0100) Subject: radv: Add suballocation for shaders. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=c9d4b571ad5dc3e622603a0f50d293192850d749;p=mesa.git radv: Add suballocation for shaders. This reduces the number of BOs that we need for the BO lists during a submission. Currently uses a fairly simple linear search for finding free space, that could eventually be improved to a binary tree, which with some per-node info could make a check for space O(1) and finding it O(log n), in the number of buffers in that slab. Signed-off-by: Bas Nieuwenhuizen Reviewed-by: Dave Airlie --- diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 7b41e16e916..e7bff2a88cf 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -546,7 +546,7 @@ radv_emit_hw_vs(struct radv_cmd_buffer *cmd_buffer, struct ac_vs_output_info *outinfo) { struct radeon_winsys *ws = cmd_buffer->device->ws; - uint64_t va = ws->buffer_get_va(shader->bo); + uint64_t va = ws->buffer_get_va(shader->bo) + shader->bo_offset; unsigned export_count; ws->cs_add_buffer(cmd_buffer->cs, shader->bo, 8); @@ -596,7 +596,7 @@ radv_emit_hw_es(struct radv_cmd_buffer *cmd_buffer, struct ac_es_output_info *outinfo) { struct radeon_winsys *ws = cmd_buffer->device->ws; - uint64_t va = ws->buffer_get_va(shader->bo); + uint64_t va = ws->buffer_get_va(shader->bo) + shader->bo_offset; ws->cs_add_buffer(cmd_buffer->cs, shader->bo, 8); radv_emit_prefetch(cmd_buffer, va, shader->code_size); @@ -615,7 +615,7 @@ radv_emit_hw_ls(struct radv_cmd_buffer *cmd_buffer, struct radv_shader_variant *shader) { struct radeon_winsys *ws = cmd_buffer->device->ws; - uint64_t va = ws->buffer_get_va(shader->bo); + uint64_t va = ws->buffer_get_va(shader->bo) + shader->bo_offset; uint32_t rsrc2 = shader->rsrc2; ws->cs_add_buffer(cmd_buffer->cs, shader->bo, 8); @@ -640,7 +640,7 @@ radv_emit_hw_hs(struct radv_cmd_buffer *cmd_buffer, struct radv_shader_variant *shader) { struct radeon_winsys *ws = cmd_buffer->device->ws; - uint64_t va = ws->buffer_get_va(shader->bo); + uint64_t va = ws->buffer_get_va(shader->bo) + shader->bo_offset; ws->cs_add_buffer(cmd_buffer->cs, shader->bo, 8); radv_emit_prefetch(cmd_buffer, va, shader->code_size); @@ -775,7 +775,7 @@ radv_emit_geometry_shader(struct radv_cmd_buffer *cmd_buffer, S_028B90_CNT(MIN2(gs_num_invocations, 127)) | S_028B90_ENABLE(gs_num_invocations > 0)); - va = ws->buffer_get_va(gs->bo); + va = ws->buffer_get_va(gs->bo) + gs->bo_offset; ws->cs_add_buffer(cmd_buffer->cs, gs->bo, 8); radv_emit_prefetch(cmd_buffer, va, gs->code_size); @@ -816,8 +816,7 @@ radv_emit_fragment_shader(struct radv_cmd_buffer *cmd_buffer, assert (pipeline->shaders[MESA_SHADER_FRAGMENT]); ps = pipeline->shaders[MESA_SHADER_FRAGMENT]; - - va = ws->buffer_get_va(ps->bo); + va = ws->buffer_get_va(ps->bo) + ps->bo_offset; ws->cs_add_buffer(cmd_buffer->cs, ps->bo, 8); radv_emit_prefetch(cmd_buffer, va, ps->code_size); @@ -2256,7 +2255,7 @@ radv_emit_compute_pipeline(struct radv_cmd_buffer *cmd_buffer) cmd_buffer->state.emitted_compute_pipeline = pipeline; compute_shader = pipeline->shaders[MESA_SHADER_COMPUTE]; - va = ws->buffer_get_va(compute_shader->bo); + va = ws->buffer_get_va(compute_shader->bo) + compute_shader->bo_offset; ws->cs_add_buffer(cmd_buffer->cs, compute_shader->bo, 8); radv_emit_prefetch(cmd_buffer, va, compute_shader->code_size); diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index 19f1e105779..13fb19cf48b 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -1080,6 +1080,9 @@ VkResult radv_CreateDevice( else device->alloc = physical_device->instance->alloc; + mtx_init(&device->shader_slab_mutex, mtx_plain); + list_inithead(&device->shader_slabs); + for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) { const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i]; uint32_t qfi = queue_create->queueFamilyIndex; @@ -1270,6 +1273,8 @@ void radv_DestroyDevice( VkPipelineCache pc = radv_pipeline_cache_to_handle(device->mem_cache); radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL); + radv_destroy_shader_slabs(device); + vk_free(&device->alloc, device); } diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 28389368cec..bd5eeb776c4 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -381,7 +381,10 @@ void radv_shader_variant_destroy(struct radv_device *device, if (!p_atomic_dec_zero(&variant->ref_count)) return; - device->ws->buffer_destroy(variant->bo); + mtx_lock(&device->shader_slab_mutex); + list_del(&variant->slab_list); + mtx_unlock(&device->shader_slab_mutex); + free(variant); } @@ -431,14 +434,8 @@ static void radv_fill_shader_variant(struct radv_device *device, S_00B848_DX10_CLAMP(1) | S_00B848_FLOAT_MODE(variant->config.float_mode); - variant->bo = device->ws->buffer_create(device->ws, binary->code_size, 256, - RADEON_DOMAIN_VRAM, RADEON_FLAG_CPU_ACCESS); - - void *ptr = device->ws->buffer_map(variant->bo); + void *ptr = radv_alloc_shader_memory(device, variant); memcpy(ptr, binary->code, binary->code_size); - device->ws->buffer_unmap(variant->bo); - - } static struct radv_shader_variant *radv_shader_variant_create(struct radv_device *device, @@ -2426,3 +2423,56 @@ VkResult radv_CreateComputePipelines( return result; } + +void *radv_alloc_shader_memory(struct radv_device *device, + struct radv_shader_variant *shader) +{ + mtx_lock(&device->shader_slab_mutex); + list_for_each_entry(struct radv_shader_slab, slab, &device->shader_slabs, slabs) { + uint64_t offset = 0; + list_for_each_entry(struct radv_shader_variant, s, &slab->shaders, slab_list) { + if (s->bo_offset - offset >= shader->code_size) { + shader->bo = slab->bo; + shader->bo_offset = offset; + list_addtail(&shader->slab_list, &s->slab_list); + mtx_unlock(&device->shader_slab_mutex); + return slab->ptr + offset; + } + offset = align_u64(s->bo_offset + s->code_size, 256); + } + if (slab->size - offset >= shader->code_size) { + shader->bo = slab->bo; + shader->bo_offset = offset; + list_addtail(&shader->slab_list, &slab->shaders); + mtx_unlock(&device->shader_slab_mutex); + return slab->ptr + offset; + } + } + + mtx_unlock(&device->shader_slab_mutex); + struct radv_shader_slab *slab = calloc(1, sizeof(struct radv_shader_slab)); + + slab->size = 256 * 1024; + slab->bo = device->ws->buffer_create(device->ws, slab->size, 256, + RADEON_DOMAIN_VRAM, 0); + slab->ptr = (char*)device->ws->buffer_map(slab->bo); + list_inithead(&slab->shaders); + + mtx_lock(&device->shader_slab_mutex); + list_add(&slab->slabs, &device->shader_slabs); + + shader->bo = slab->bo; + shader->bo_offset = 0; + list_add(&shader->slab_list, &slab->shaders); + mtx_unlock(&device->shader_slab_mutex); + return slab->ptr; +} + +void radv_destroy_shader_slabs(struct radv_device *device) +{ + list_for_each_entry_safe(struct radv_shader_slab, slab, &device->shader_slabs, slabs) { + device->ws->buffer_destroy(slab->bo); + free(slab); + } + mtx_destroy(&device->shader_slab_mutex); +} diff --git a/src/amd/vulkan/radv_pipeline_cache.c b/src/amd/vulkan/radv_pipeline_cache.c index 86479f66236..3995434d903 100644 --- a/src/amd/vulkan/radv_pipeline_cache.c +++ b/src/amd/vulkan/radv_pipeline_cache.c @@ -168,6 +168,7 @@ radv_create_shader_variant_from_pipeline_cache(struct radv_device *device, if (!variant) return NULL; + variant->code_size = entry->code_size; variant->config = entry->config; variant->info = entry->variant_info; variant->rsrc1 = entry->rsrc1; @@ -175,12 +176,8 @@ radv_create_shader_variant_from_pipeline_cache(struct radv_device *device, variant->code_size = entry->code_size; variant->ref_count = 1; - variant->bo = device->ws->buffer_create(device->ws, entry->code_size, 256, - RADEON_DOMAIN_VRAM, RADEON_FLAG_CPU_ACCESS); - - void *ptr = device->ws->buffer_map(variant->bo); + void *ptr = radv_alloc_shader_memory(device, variant); memcpy(ptr, entry->code, entry->code_size); - device->ws->buffer_unmap(variant->bo); entry->variant = variant; } diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 25afd497da0..8e86f5c1d52 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -549,6 +549,9 @@ struct radv_device { struct radv_pipeline_cache * mem_cache; uint32_t image_mrt_offset_counter; + + struct list_head shader_slabs; + mtx_t shader_slab_mutex; }; struct radv_device_memory { @@ -981,17 +984,35 @@ mesa_to_vk_shader_stage(gl_shader_stage mesa_stage) stage = __builtin_ffs(__tmp) - 1, __tmp; \ __tmp &= ~(1 << (stage))) + +struct radv_shader_slab { + struct list_head slabs; + struct list_head shaders; + struct radeon_winsys_bo *bo; + uint64_t size; + char *ptr; +}; + struct radv_shader_variant { uint32_t ref_count; struct radeon_winsys_bo *bo; + uint64_t bo_offset; struct ac_shader_config config; struct ac_shader_variant_info info; unsigned rsrc1; unsigned rsrc2; uint32_t code_size; + + struct list_head slab_list; }; + +void *radv_alloc_shader_memory(struct radv_device *device, + struct radv_shader_variant *shader); + +void radv_destroy_shader_slabs(struct radv_device *device); + struct radv_depth_stencil_state { uint32_t db_depth_control; uint32_t db_stencil_control;