X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;ds=sidebyside;f=src%2Fvulkan%2Foverlay-layer%2Foverlay.cpp;h=652478407e7ac073b8dd50c1b67b45f8330cb40d;hb=3a28281bf8dfd2ad990ccc0a0903a3cc3cada8fa;hp=979bb7bf913011f00fbc9996da82e685385b85e7;hpb=4438188f492e1f7c32c2728ef26a3772cd80081d;p=mesa.git diff --git a/src/vulkan/overlay-layer/overlay.cpp b/src/vulkan/overlay-layer/overlay.cpp index 979bb7bf913..652478407e7 100644 --- a/src/vulkan/overlay-layer/overlay.cpp +++ b/src/vulkan/overlay-layer/overlay.cpp @@ -34,6 +34,7 @@ #include "util/debug.h" #include "util/hash_table.h" +#include "util/list.h" #include "util/ralloc.h" #include "util/os_time.h" #include "util/simple_mtx.h" @@ -47,6 +48,9 @@ struct instance_data { VkInstance instance; struct overlay_params params; + bool pipeline_statistics_enabled; + + bool first_line_printed; }; struct frame_stat { @@ -83,9 +87,12 @@ struct command_buffer_data { VkCommandBuffer cmd_buffer; VkQueryPool pipeline_query_pool; + VkQueryPool timestamp_query_pool; uint32_t query_index; struct frame_stat stats; + + struct list_head link; /* link into queue_data::running_command_buffer */ }; /* Mapped from VkQueue */ @@ -95,6 +102,28 @@ struct queue_data { VkQueue queue; VkQueueFlags flags; uint32_t family_index; + uint64_t timestamp_mask; + + VkFence queries_fence; + + struct list_head running_command_buffer; +}; + +struct overlay_draw { + struct list_head link; + + VkCommandBuffer command_buffer; + + VkSemaphore semaphore; + VkFence fence; + + VkBuffer vertex_buffer; + VkDeviceMemory vertex_buffer_mem; + VkDeviceSize vertex_buffer_size; + + VkBuffer index_buffer; + VkDeviceMemory index_buffer_mem; + VkDeviceSize index_buffer_size; }; /* Mapped from VkSwapchainKHR */ @@ -123,17 +152,7 @@ struct swapchain_data { VkCommandPool command_pool; - struct { - VkCommandBuffer command_buffer; - - VkBuffer vertex_buffer; - VkDeviceMemory vertex_buffer_mem; - VkDeviceSize vertex_buffer_size; - - VkBuffer index_buffer; - VkDeviceMemory index_buffer_mem; - VkDeviceSize index_buffer_size; - } frame_data[2]; + struct list_head draws; /* List of struct overlay_draw */ bool font_uploaded; VkImage font_image; @@ -142,9 +161,6 @@ struct swapchain_data { VkBuffer upload_font_buffer; VkDeviceMemory upload_font_buffer_mem; - VkFence fence; - VkSemaphore submission_semaphore; - /**/ ImGuiContext* imgui_context; ImVec2 window_size; @@ -158,6 +174,7 @@ struct swapchain_data { double fps; enum overlay_param_enabled stat_selector; + double time_dividor; struct frame_stat stats_min, stats_max; struct frame_stat frames_stats[200]; @@ -168,49 +185,55 @@ struct swapchain_data { struct frame_stat accumulated_stats; }; -static struct hash_table *vk_object_to_data = NULL; +static const VkQueryPipelineStatisticFlags overlay_query_flags = + VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_VERTICES_BIT | + VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT | + VK_QUERY_PIPELINE_STATISTIC_VERTEX_SHADER_INVOCATIONS_BIT | + VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_INVOCATIONS_BIT | + VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT | + VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT | + VK_QUERY_PIPELINE_STATISTIC_CLIPPING_PRIMITIVES_BIT | + VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT | + VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_CONTROL_SHADER_PATCHES_BIT | + VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_EVALUATION_SHADER_INVOCATIONS_BIT | + VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT; +#define OVERLAY_QUERY_COUNT (11) + +static struct hash_table_u64 *vk_object_to_data = NULL; static simple_mtx_t vk_object_to_data_mutex = _SIMPLE_MTX_INITIALIZER_NP; thread_local ImGuiContext* __MesaImGui; static inline void ensure_vk_object_map(void) { - if (!vk_object_to_data) { - vk_object_to_data = _mesa_hash_table_create(NULL, - _mesa_hash_pointer, - _mesa_key_pointer_equal); - } + if (!vk_object_to_data) + vk_object_to_data = _mesa_hash_table_u64_create(NULL); } -#define FIND_SWAPCHAIN_DATA(obj) ((struct swapchain_data *)find_object_data((void *) obj)) -#define FIND_CMD_BUFFER_DATA(obj) ((struct command_buffer_data *)find_object_data((void *) obj)) -#define FIND_DEVICE_DATA(obj) ((struct device_data *)find_object_data((void *) obj)) -#define FIND_QUEUE_DATA(obj) ((struct queue_data *)find_object_data((void *) obj)) -#define FIND_PHYSICAL_DEVICE_DATA(obj) ((struct instance_data *)find_object_data((void *) obj)) -#define FIND_INSTANCE_DATA(obj) ((struct instance_data *)find_object_data((void *) obj)) -static void *find_object_data(void *obj) +#define HKEY(obj) ((uint64_t)(obj)) +#define FIND(type, obj) ((type *)find_object_data(HKEY(obj))) + +static void *find_object_data(uint64_t obj) { simple_mtx_lock(&vk_object_to_data_mutex); ensure_vk_object_map(); - struct hash_entry *entry = _mesa_hash_table_search(vk_object_to_data, obj); - void *data = entry ? entry->data : NULL; + void *data = _mesa_hash_table_u64_search(vk_object_to_data, obj); simple_mtx_unlock(&vk_object_to_data_mutex); return data; } -static void map_object(void *obj, void *data) +static void map_object(uint64_t obj, void *data) { simple_mtx_lock(&vk_object_to_data_mutex); ensure_vk_object_map(); - _mesa_hash_table_insert(vk_object_to_data, obj, data); + _mesa_hash_table_u64_insert(vk_object_to_data, obj, data); simple_mtx_unlock(&vk_object_to_data_mutex); } -static void unmap_object(void *obj) +static void unmap_object(uint64_t obj) { simple_mtx_lock(&vk_object_to_data_mutex); - struct hash_entry *entry = _mesa_hash_table_search(vk_object_to_data, obj); - _mesa_hash_table_remove(vk_object_to_data, entry); + _mesa_hash_table_u64_remove(vk_object_to_data, obj); simple_mtx_unlock(&vk_object_to_data_mutex); } @@ -251,13 +274,45 @@ static VkLayerDeviceCreateInfo *get_device_chain_info(const VkDeviceCreateInfo * return NULL; } +static struct VkBaseOutStructure * +clone_chain(const struct VkBaseInStructure *chain) +{ + struct VkBaseOutStructure *head = NULL, *tail = NULL; + + vk_foreach_struct_const(item, chain) { + size_t item_size = vk_structure_type_size(item); + struct VkBaseOutStructure *new_item = + (struct VkBaseOutStructure *)malloc(item_size);; + + memcpy(new_item, item, item_size); + + if (!head) + head = new_item; + if (tail) + tail->pNext = new_item; + tail = new_item; + } + + return head; +} + +static void +free_chain(struct VkBaseOutStructure *chain) +{ + while (chain) { + void *node = chain; + chain = chain->pNext; + free(node); + } +} + /**/ static struct instance_data *new_instance_data(VkInstance instance) { struct instance_data *data = rzalloc(NULL, struct instance_data); data->instance = instance; - map_object(data->instance, data); + map_object(HKEY(data->instance), data); return data; } @@ -265,7 +320,7 @@ static void destroy_instance_data(struct instance_data *data) { if (data->params.output_file) fclose(data->params.output_file); - unmap_object(data->instance); + unmap_object(HKEY(data->instance)); ralloc_free(data); } @@ -284,9 +339,9 @@ static void instance_data_map_physical_devices(struct instance_data *instance_da for (uint32_t i = 0; i < physicalDeviceCount; i++) { if (map) - map_object(physicalDevices[i], instance_data); + map_object(HKEY(physicalDevices[i]), instance_data); else - unmap_object(physicalDevices[i]); + unmap_object(HKEY(physicalDevices[i])); } free(physicalDevices); @@ -298,7 +353,7 @@ static struct device_data *new_device_data(VkDevice device, struct instance_data struct device_data *data = rzalloc(NULL, struct device_data); data->instance = instance; data->device = device; - map_object(data->device, data); + map_object(HKEY(data->device), data); return data; } @@ -311,8 +366,19 @@ static struct queue_data *new_queue_data(VkQueue queue, data->device = device_data; data->queue = queue; data->flags = family_props->queueFlags; + data->timestamp_mask = (1ull << family_props->timestampValidBits) - 1; data->family_index = family_index; - map_object(data->queue, data); + list_inithead(&data->running_command_buffer); + map_object(HKEY(data->queue), data); + + /* Fence synchronizing access to queries on that queue. */ + VkFenceCreateInfo fence_info = {}; + fence_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; + fence_info.flags = VK_FENCE_CREATE_SIGNALED_BIT; + VK_CHECK(device_data->vtable.CreateFence(device_data->device, + &fence_info, + NULL, + &data->queries_fence)); if (data->flags & VK_QUEUE_GRAPHICS_BIT) device_data->graphic_queue = data; @@ -320,6 +386,14 @@ static struct queue_data *new_queue_data(VkQueue queue, return data; } +static void destroy_queue(struct queue_data *data) +{ + struct device_data *device_data = data->device; + device_data->vtable.DestroyFence(device_data->device, data->queries_fence, NULL); + unmap_object(HKEY(data->queue)); + ralloc_free(data); +} + static void device_map_queues(struct device_data *data, const VkDeviceCreateInfo *pCreateInfo) { @@ -360,53 +434,122 @@ static void device_map_queues(struct device_data *data, static void device_unmap_queues(struct device_data *data) { for (uint32_t i = 0; i < data->n_queues; i++) - unmap_object(data->queues[i]->queue); + destroy_queue(data->queues[i]); } static void destroy_device_data(struct device_data *data) { - unmap_object(data->device); + unmap_object(HKEY(data->device)); ralloc_free(data); } /**/ static struct command_buffer_data *new_command_buffer_data(VkCommandBuffer cmd_buffer, VkCommandBufferLevel level, + VkQueryPool pipeline_query_pool, + VkQueryPool timestamp_query_pool, + uint32_t query_index, struct device_data *device_data) { struct command_buffer_data *data = rzalloc(NULL, struct command_buffer_data); data->device = device_data; data->cmd_buffer = cmd_buffer; data->level = level; - map_object((void *) data->cmd_buffer, data); + data->pipeline_query_pool = pipeline_query_pool; + data->timestamp_query_pool = timestamp_query_pool; + data->query_index = query_index; + list_inithead(&data->link); + map_object(HKEY(data->cmd_buffer), data); return data; } static void destroy_command_buffer_data(struct command_buffer_data *data) { - unmap_object((void *) data->cmd_buffer); + unmap_object(HKEY(data->cmd_buffer)); + list_delinit(&data->link); ralloc_free(data); } - /**/ static struct swapchain_data *new_swapchain_data(VkSwapchainKHR swapchain, struct device_data *device_data) { + struct instance_data *instance_data = device_data->instance; struct swapchain_data *data = rzalloc(NULL, struct swapchain_data); data->device = device_data; data->swapchain = swapchain; - data->window_size = ImVec2(300, 300); - map_object((void *) data->swapchain, data); + data->window_size = ImVec2(instance_data->params.width, instance_data->params.height); + list_inithead(&data->draws); + map_object(HKEY(data->swapchain), data); return data; } static void destroy_swapchain_data(struct swapchain_data *data) { - unmap_object((void *) data->swapchain); + unmap_object(HKEY(data->swapchain)); ralloc_free(data); } +struct overlay_draw *get_overlay_draw(struct swapchain_data *data) +{ + struct device_data *device_data = data->device; + struct overlay_draw *draw = list_is_empty(&data->draws) ? + NULL : list_first_entry(&data->draws, struct overlay_draw, link); + + VkSemaphoreCreateInfo sem_info = {}; + sem_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; + + if (draw && device_data->vtable.GetFenceStatus(device_data->device, draw->fence) == VK_SUCCESS) { + list_del(&draw->link); + VK_CHECK(device_data->vtable.ResetFences(device_data->device, + 1, &draw->fence)); + list_addtail(&draw->link, &data->draws); + return draw; + } + + draw = rzalloc(data, struct overlay_draw); + + VkCommandBufferAllocateInfo cmd_buffer_info = {}; + cmd_buffer_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; + cmd_buffer_info.commandPool = data->command_pool; + cmd_buffer_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + cmd_buffer_info.commandBufferCount = 1; + VK_CHECK(device_data->vtable.AllocateCommandBuffers(device_data->device, + &cmd_buffer_info, + &draw->command_buffer)); + VK_CHECK(device_data->set_device_loader_data(device_data->device, + draw->command_buffer)); + + + VkFenceCreateInfo fence_info = {}; + fence_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; + VK_CHECK(device_data->vtable.CreateFence(device_data->device, + &fence_info, + NULL, + &draw->fence)); + + VK_CHECK(device_data->vtable.CreateSemaphore(device_data->device, &sem_info, + NULL, &draw->semaphore)); + + list_addtail(&draw->link, &data->draws); + + return draw; +} + +static const char *param_unit(enum overlay_param_enabled param) +{ + switch (param) { + case OVERLAY_PARAM_ENABLED_frame_timing: + case OVERLAY_PARAM_ENABLED_acquire_timing: + case OVERLAY_PARAM_ENABLED_present_timing: + return "(us)"; + case OVERLAY_PARAM_ENABLED_gpu_timing: + return "(ns)"; + default: + return ""; + } +} + static void snapshot_swapchain_frame(struct swapchain_data *data) { struct device_data *device_data = data->device; @@ -430,7 +573,38 @@ static void snapshot_swapchain_frame(struct swapchain_data *data) if (elapsed >= instance_data->params.fps_sampling_period) { data->fps = 1000000.0f * data->n_frames_since_update / elapsed; if (instance_data->params.output_file) { - fprintf(instance_data->params.output_file, "%.2f\n", data->fps); + if (!instance_data->first_line_printed) { + bool first_column = true; + + instance_data->first_line_printed = true; + +#define OVERLAY_PARAM_BOOL(name) \ + if (instance_data->params.enabled[OVERLAY_PARAM_ENABLED_##name]) { \ + fprintf(instance_data->params.output_file, \ + "%s%s%s", first_column ? "" : ", ", #name, \ + param_unit(OVERLAY_PARAM_ENABLED_##name)); \ + first_column = false; \ + } +#define OVERLAY_PARAM_CUSTOM(name) + OVERLAY_PARAMS +#undef OVERLAY_PARAM_BOOL +#undef OVERLAY_PARAM_CUSTOM + fprintf(instance_data->params.output_file, "\n"); + } + + for (int s = 0; s < OVERLAY_PARAM_ENABLED_MAX; s++) { + if (!instance_data->params.enabled[s]) + continue; + if (s == OVERLAY_PARAM_ENABLED_fps) { + fprintf(instance_data->params.output_file, + "%s%.2f", s == 0 ? "" : ", ", data->fps); + } else { + fprintf(instance_data->params.output_file, + "%s%" PRIu64, s == 0 ? "" : ", ", + data->accumulated_stats.stats[s]); + } + } + fprintf(instance_data->params.output_file, "\n"); fflush(instance_data->params.output_file); } @@ -461,7 +635,7 @@ static float get_time_stat(void *_data, int _idx) _idx + data->n_frames; idx %= ARRAY_SIZE(data->frames_stats); /* Time stats are in us. */ - return data->frames_stats[idx].stats[data->stat_selector] / 1000.0f; + return data->frames_stats[idx].stats[data->stat_selector] / data->time_dividor; } static float get_stat(void *_data, int _idx) @@ -482,24 +656,25 @@ static void position_layer(struct swapchain_data *data) { struct device_data *device_data = data->device; struct instance_data *instance_data = device_data->instance; + const float margin = 10.0f; ImGui::SetNextWindowBgAlpha(0.5); ImGui::SetNextWindowSize(data->window_size, ImGuiCond_Always); switch (instance_data->params.position) { case LAYER_POSITION_TOP_LEFT: - ImGui::SetNextWindowPos(ImVec2(0, 0), ImGuiCond_Always); + ImGui::SetNextWindowPos(ImVec2(margin, margin), ImGuiCond_Always); break; case LAYER_POSITION_TOP_RIGHT: - ImGui::SetNextWindowPos(ImVec2(data->width - data->window_size.x, 0), + ImGui::SetNextWindowPos(ImVec2(data->width - data->window_size.x - margin, margin), ImGuiCond_Always); break; case LAYER_POSITION_BOTTOM_LEFT: - ImGui::SetNextWindowPos(ImVec2(0, data->height - data->window_size.y), + ImGui::SetNextWindowPos(ImVec2(margin, data->height - data->window_size.y - margin), ImGuiCond_Always); break; case LAYER_POSITION_BOTTOM_RIGHT: - ImGui::SetNextWindowPos(ImVec2(data->width - data->window_size.x, - data->height - data->window_size.y), + ImGui::SetNextWindowPos(ImVec2(data->width - data->window_size.x - margin, + data->height - data->window_size.y - margin), ImGuiCond_Always); break; } @@ -542,17 +717,23 @@ static void compute_swapchain_display(struct swapchain_data *data) for (uint32_t s = 0; s < OVERLAY_PARAM_ENABLED_MAX; s++) { if (!instance_data->params.enabled[s] || - s == OVERLAY_PARAM_ENABLED_fps) + s == OVERLAY_PARAM_ENABLED_fps || + s == OVERLAY_PARAM_ENABLED_frame) continue; char hash[40]; snprintf(hash, sizeof(hash), "##%s", overlay_param_names[s]); data->stat_selector = (enum overlay_param_enabled) s; + data->time_dividor = 1000.0f; + if (s == OVERLAY_PARAM_ENABLED_gpu_timing) + data->time_dividor = 1000000.0f; if (s == OVERLAY_PARAM_ENABLED_frame_timing || - s == OVERLAY_PARAM_ENABLED_acquire_timing) { - double min_time = data->stats_min.stats[s] / 1000.0f; - double max_time = data->stats_max.stats[s] / 1000.0f; + s == OVERLAY_PARAM_ENABLED_acquire_timing || + s == OVERLAY_PARAM_ENABLED_present_timing || + s == OVERLAY_PARAM_ENABLED_gpu_timing) { + double min_time = data->stats_min.stats[s] / data->time_dividor; + double max_time = data->stats_max.stats[s] / data->time_dividor; ImGui::PlotHistogram(hash, get_time_stat, data, ARRAY_SIZE(data->frames_stats), 0, NULL, min_time, max_time, @@ -730,17 +911,20 @@ static void CreateOrResizeBuffer(struct device_data *data, *buffer_size = new_size; } -static void render_swapchain_display(struct swapchain_data *data, unsigned image_index) +static struct overlay_draw *render_swapchain_display(struct swapchain_data *data, + struct queue_data *present_queue, + const VkSemaphore *wait_semaphores, + unsigned n_wait_semaphores, + unsigned image_index) { ImDrawData* draw_data = ImGui::GetDrawData(); if (draw_data->TotalVtxCount == 0) - return; + return NULL; struct device_data *device_data = data->device; - uint32_t idx = data->n_frames % ARRAY_SIZE(data->frame_data); - VkCommandBuffer command_buffer = data->frame_data[idx].command_buffer; + struct overlay_draw *draw = get_overlay_draw(data); - device_data->vtable.ResetCommandBuffer(command_buffer, 0); + device_data->vtable.ResetCommandBuffer(draw->command_buffer, 0); VkRenderPassBeginInfo render_pass_info = {}; render_pass_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; @@ -752,9 +936,9 @@ static void render_swapchain_display(struct swapchain_data *data, unsigned image VkCommandBufferBeginInfo buffer_begin_info = {}; buffer_begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; - device_data->vtable.BeginCommandBuffer(command_buffer, &buffer_begin_info); + device_data->vtable.BeginCommandBuffer(draw->command_buffer, &buffer_begin_info); - ensure_swapchain_fonts(data, command_buffer); + ensure_swapchain_fonts(data, draw->command_buffer); /* Bounce the image to display back to color attachment layout for * rendering on top of it. @@ -772,9 +956,9 @@ static void render_swapchain_display(struct swapchain_data *data, unsigned image imb.subresourceRange.levelCount = 1; imb.subresourceRange.baseArrayLayer = 0; imb.subresourceRange.layerCount = 1; - imb.srcQueueFamilyIndex = device_data->graphic_queue->family_index; + imb.srcQueueFamilyIndex = present_queue->family_index; imb.dstQueueFamilyIndex = device_data->graphic_queue->family_index; - device_data->vtable.CmdPipelineBarrier(command_buffer, + device_data->vtable.CmdPipelineBarrier(draw->command_buffer, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, 0, /* dependency flags */ @@ -782,37 +966,33 @@ static void render_swapchain_display(struct swapchain_data *data, unsigned image 0, nullptr, /* buffer memory barriers */ 1, &imb); /* image memory barriers */ - device_data->vtable.CmdBeginRenderPass(command_buffer, &render_pass_info, + device_data->vtable.CmdBeginRenderPass(draw->command_buffer, &render_pass_info, VK_SUBPASS_CONTENTS_INLINE); /* Create/Resize vertex & index buffers */ size_t vertex_size = draw_data->TotalVtxCount * sizeof(ImDrawVert); size_t index_size = draw_data->TotalIdxCount * sizeof(ImDrawIdx); - if (data->frame_data[idx].vertex_buffer_size < vertex_size) { + if (draw->vertex_buffer_size < vertex_size) { CreateOrResizeBuffer(device_data, - &data->frame_data[idx].vertex_buffer, - &data->frame_data[idx].vertex_buffer_mem, - &data->frame_data[idx].vertex_buffer_size, + &draw->vertex_buffer, + &draw->vertex_buffer_mem, + &draw->vertex_buffer_size, vertex_size, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT); } - if (data->frame_data[idx].index_buffer_size < index_size) { + if (draw->index_buffer_size < index_size) { CreateOrResizeBuffer(device_data, - &data->frame_data[idx].index_buffer, - &data->frame_data[idx].index_buffer_mem, - &data->frame_data[idx].index_buffer_size, + &draw->index_buffer, + &draw->index_buffer_mem, + &draw->index_buffer_size, index_size, VK_BUFFER_USAGE_INDEX_BUFFER_BIT); } /* Upload vertex & index data */ - VkBuffer vertex_buffer = data->frame_data[idx].vertex_buffer; - VkDeviceMemory vertex_mem = data->frame_data[idx].vertex_buffer_mem; - VkBuffer index_buffer = data->frame_data[idx].index_buffer; - VkDeviceMemory index_mem = data->frame_data[idx].index_buffer_mem; ImDrawVert* vtx_dst = NULL; ImDrawIdx* idx_dst = NULL; - VK_CHECK(device_data->vtable.MapMemory(device_data->device, vertex_mem, + VK_CHECK(device_data->vtable.MapMemory(device_data->device, draw->vertex_buffer_mem, 0, vertex_size, 0, (void**)(&vtx_dst))); - VK_CHECK(device_data->vtable.MapMemory(device_data->device, index_mem, + VK_CHECK(device_data->vtable.MapMemory(device_data->device, draw->index_buffer_mem, 0, index_size, 0, (void**)(&idx_dst))); for (int n = 0; n < draw_data->CmdListsCount; n++) { @@ -824,26 +1004,26 @@ static void render_swapchain_display(struct swapchain_data *data, unsigned image } VkMappedMemoryRange range[2] = {}; range[0].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; - range[0].memory = vertex_mem; + range[0].memory = draw->vertex_buffer_mem; range[0].size = VK_WHOLE_SIZE; range[1].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; - range[1].memory = index_mem; + range[1].memory = draw->index_buffer_mem; range[1].size = VK_WHOLE_SIZE; VK_CHECK(device_data->vtable.FlushMappedMemoryRanges(device_data->device, 2, range)); - device_data->vtable.UnmapMemory(device_data->device, vertex_mem); - device_data->vtable.UnmapMemory(device_data->device, index_mem); + device_data->vtable.UnmapMemory(device_data->device, draw->vertex_buffer_mem); + device_data->vtable.UnmapMemory(device_data->device, draw->index_buffer_mem); /* Bind pipeline and descriptor sets */ - device_data->vtable.CmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, data->pipeline); + device_data->vtable.CmdBindPipeline(draw->command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, data->pipeline); VkDescriptorSet desc_set[1] = { data->descriptor_set }; - device_data->vtable.CmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, + device_data->vtable.CmdBindDescriptorSets(draw->command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, data->pipeline_layout, 0, 1, desc_set, 0, NULL); /* Bind vertex & index buffers */ - VkBuffer vertex_buffers[1] = { vertex_buffer }; + VkBuffer vertex_buffers[1] = { draw->vertex_buffer }; VkDeviceSize vertex_offset[1] = { 0 }; - device_data->vtable.CmdBindVertexBuffers(command_buffer, 0, 1, vertex_buffers, vertex_offset); - device_data->vtable.CmdBindIndexBuffer(command_buffer, index_buffer, 0, VK_INDEX_TYPE_UINT16); + device_data->vtable.CmdBindVertexBuffers(draw->command_buffer, 0, 1, vertex_buffers, vertex_offset); + device_data->vtable.CmdBindIndexBuffer(draw->command_buffer, draw->index_buffer, 0, VK_INDEX_TYPE_UINT16); /* Setup viewport */ VkViewport viewport; @@ -853,7 +1033,7 @@ static void render_swapchain_display(struct swapchain_data *data, unsigned image viewport.height = draw_data->DisplaySize.y; viewport.minDepth = 0.0f; viewport.maxDepth = 1.0f; - device_data->vtable.CmdSetViewport(command_buffer, 0, 1, &viewport); + device_data->vtable.CmdSetViewport(draw->command_buffer, 0, 1, &viewport); /* Setup scale and translation through push constants : @@ -868,10 +1048,10 @@ static void render_swapchain_display(struct swapchain_data *data, unsigned image float translate[2]; translate[0] = -1.0f - draw_data->DisplayPos.x * scale[0]; translate[1] = -1.0f - draw_data->DisplayPos.y * scale[1]; - device_data->vtable.CmdPushConstants(command_buffer, data->pipeline_layout, + device_data->vtable.CmdPushConstants(draw->command_buffer, data->pipeline_layout, VK_SHADER_STAGE_VERTEX_BIT, sizeof(float) * 0, sizeof(float) * 2, scale); - device_data->vtable.CmdPushConstants(command_buffer, data->pipeline_layout, + device_data->vtable.CmdPushConstants(draw->command_buffer, data->pipeline_layout, VK_SHADER_STAGE_VERTEX_BIT, sizeof(float) * 2, sizeof(float) * 2, translate); @@ -892,42 +1072,57 @@ static void render_swapchain_display(struct swapchain_data *data, unsigned image scissor.offset.y = (int32_t)(pcmd->ClipRect.y - display_pos.y) > 0 ? (int32_t)(pcmd->ClipRect.y - display_pos.y) : 0; scissor.extent.width = (uint32_t)(pcmd->ClipRect.z - pcmd->ClipRect.x); scissor.extent.height = (uint32_t)(pcmd->ClipRect.w - pcmd->ClipRect.y + 1); // FIXME: Why +1 here? - device_data->vtable.CmdSetScissor(command_buffer, 0, 1, &scissor); + device_data->vtable.CmdSetScissor(draw->command_buffer, 0, 1, &scissor); // Draw - device_data->vtable.CmdDrawIndexed(command_buffer, pcmd->ElemCount, 1, idx_offset, vtx_offset, 0); + device_data->vtable.CmdDrawIndexed(draw->command_buffer, pcmd->ElemCount, 1, idx_offset, vtx_offset, 0); idx_offset += pcmd->ElemCount; } vtx_offset += cmd_list->VtxBuffer.Size; } - device_data->vtable.CmdEndRenderPass(command_buffer); - device_data->vtable.EndCommandBuffer(command_buffer); + device_data->vtable.CmdEndRenderPass(draw->command_buffer); - if (data->submission_semaphore) { - device_data->vtable.DestroySemaphore(device_data->device, - data->submission_semaphore, - NULL); - } - /* Submission semaphore */ - VkSemaphoreCreateInfo semaphore_info = {}; - semaphore_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; - VK_CHECK(device_data->vtable.CreateSemaphore(device_data->device, &semaphore_info, - NULL, &data->submission_semaphore)); + /* Bounce the image to display back to present layout. */ + imb.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + imb.pNext = nullptr; + imb.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + imb.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + imb.oldLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + imb.newLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; + imb.image = data->images[image_index]; + imb.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + imb.subresourceRange.baseMipLevel = 0; + imb.subresourceRange.levelCount = 1; + imb.subresourceRange.baseArrayLayer = 0; + imb.subresourceRange.layerCount = 1; + imb.srcQueueFamilyIndex = device_data->graphic_queue->family_index; + imb.dstQueueFamilyIndex = present_queue->family_index; + device_data->vtable.CmdPipelineBarrier(draw->command_buffer, + VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, + VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, + 0, /* dependency flags */ + 0, nullptr, /* memory barriers */ + 0, nullptr, /* buffer memory barriers */ + 1, &imb); /* image memory barriers */ + + device_data->vtable.EndCommandBuffer(draw->command_buffer); VkSubmitInfo submit_info = {}; VkPipelineStageFlags stage_wait = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; submit_info.commandBufferCount = 1; - submit_info.pCommandBuffers = &command_buffer; + submit_info.pCommandBuffers = &draw->command_buffer; submit_info.pWaitDstStageMask = &stage_wait; + submit_info.waitSemaphoreCount = n_wait_semaphores; + submit_info.pWaitSemaphores = wait_semaphores; submit_info.signalSemaphoreCount = 1; - submit_info.pSignalSemaphores = &data->submission_semaphore; + submit_info.pSignalSemaphores = &draw->semaphore; - device_data->vtable.WaitForFences(device_data->device, 1, &data->fence, VK_TRUE, UINT64_MAX); - device_data->vtable.ResetFences(device_data->device, 1, &data->fence); - device_data->vtable.QueueSubmit(device_data->graphic_queue->queue, 1, &submit_info, data->fence); + device_data->vtable.QueueSubmit(device_data->graphic_queue->queue, 1, &submit_info, draw->fence); + + return draw; } static const uint32_t overlay_vert_spv[] = { @@ -1292,7 +1487,7 @@ static void setup_swapchain_data(struct swapchain_data *data, NULL, &data->framebuffers[i])); } - /* Command buffer */ + /* Command buffer pool */ VkCommandPoolCreateInfo cmd_buffer_pool_info = {}; cmd_buffer_pool_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; cmd_buffer_pool_info.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; @@ -1300,36 +1495,21 @@ static void setup_swapchain_data(struct swapchain_data *data, VK_CHECK(device_data->vtable.CreateCommandPool(device_data->device, &cmd_buffer_pool_info, NULL, &data->command_pool)); - - VkCommandBuffer cmd_bufs[ARRAY_SIZE(data->frame_data)]; - - VkCommandBufferAllocateInfo cmd_buffer_info = {}; - cmd_buffer_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; - cmd_buffer_info.commandPool = data->command_pool; - cmd_buffer_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; - cmd_buffer_info.commandBufferCount = 2; - VK_CHECK(device_data->vtable.AllocateCommandBuffers(device_data->device, - &cmd_buffer_info, - cmd_bufs)); - for (uint32_t i = 0; i < ARRAY_SIZE(data->frame_data); i++) { - VK_CHECK(device_data->set_device_loader_data(device_data->device, - cmd_bufs[i])); - - data->frame_data[i].command_buffer = cmd_bufs[i]; - } - - /* Submission fence */ - VkFenceCreateInfo fence_info = {}; - fence_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; - fence_info.flags = VK_FENCE_CREATE_SIGNALED_BIT; - VK_CHECK(device_data->vtable.CreateFence(device_data->device, &fence_info, - NULL, &data->fence)); } static void shutdown_swapchain_data(struct swapchain_data *data) { struct device_data *device_data = data->device; + list_for_each_entry_safe(struct overlay_draw, draw, &data->draws, link) { + device_data->vtable.DestroySemaphore(device_data->device, draw->semaphore, NULL); + device_data->vtable.DestroyFence(device_data->device, draw->fence, NULL); + device_data->vtable.DestroyBuffer(device_data->device, draw->vertex_buffer, NULL); + device_data->vtable.DestroyBuffer(device_data->device, draw->index_buffer, NULL); + device_data->vtable.FreeMemory(device_data->device, draw->vertex_buffer_mem, NULL); + device_data->vtable.FreeMemory(device_data->device, draw->index_buffer_mem, NULL); + } + for (uint32_t i = 0; i < data->n_images; i++) { device_data->vtable.DestroyImageView(device_data->device, data->image_views[i], NULL); device_data->vtable.DestroyFramebuffer(device_data->device, data->framebuffers[i], NULL); @@ -1337,25 +1517,8 @@ static void shutdown_swapchain_data(struct swapchain_data *data) device_data->vtable.DestroyRenderPass(device_data->device, data->render_pass, NULL); - for (uint32_t i = 0; i < ARRAY_SIZE(data->frame_data); i++) { - device_data->vtable.FreeCommandBuffers(device_data->device, - data->command_pool, - 1, &data->frame_data[i].command_buffer); - if (data->frame_data[i].vertex_buffer) - device_data->vtable.DestroyBuffer(device_data->device, data->frame_data[i].vertex_buffer, NULL); - if (data->frame_data[i].index_buffer) - device_data->vtable.DestroyBuffer(device_data->device, data->frame_data[i].index_buffer, NULL); - if (data->frame_data[i].vertex_buffer_mem) - device_data->vtable.FreeMemory(device_data->device, data->frame_data[i].vertex_buffer_mem, NULL); - if (data->frame_data[i].index_buffer_mem) - device_data->vtable.FreeMemory(device_data->device, data->frame_data[i].index_buffer_mem, NULL); - } device_data->vtable.DestroyCommandPool(device_data->device, data->command_pool, NULL); - device_data->vtable.DestroyFence(device_data->device, data->fence, NULL); - if (data->submission_semaphore) - device_data->vtable.DestroySemaphore(device_data->device, data->submission_semaphore, NULL); - device_data->vtable.DestroyPipeline(device_data->device, data->pipeline, NULL); device_data->vtable.DestroyPipelineLayout(device_data->device, data->pipeline_layout, NULL); @@ -1375,24 +1538,34 @@ static void shutdown_swapchain_data(struct swapchain_data *data) ImGui::DestroyContext(data->imgui_context); } -static void before_present(struct swapchain_data *swapchain_data, - unsigned imageIndex) +static struct overlay_draw *before_present(struct swapchain_data *swapchain_data, + struct queue_data *present_queue, + const VkSemaphore *wait_semaphores, + unsigned n_wait_semaphores, + unsigned imageIndex) { + struct instance_data *instance_data = swapchain_data->device->instance; + struct overlay_draw *draw = NULL; + snapshot_swapchain_frame(swapchain_data); - if (swapchain_data->n_frames > 0) { + if (!instance_data->params.no_display && swapchain_data->n_frames > 0) { compute_swapchain_display(swapchain_data); - render_swapchain_display(swapchain_data, imageIndex); + draw = render_swapchain_display(swapchain_data, present_queue, + wait_semaphores, n_wait_semaphores, + imageIndex); } + + return draw; } -VKAPI_ATTR VkResult VKAPI_CALL overlay_CreateSwapchainKHR( +static VkResult overlay_CreateSwapchainKHR( VkDevice device, const VkSwapchainCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSwapchainKHR* pSwapchain) { - struct device_data *device_data = FIND_DEVICE_DATA(device); + struct device_data *device_data = FIND(struct device_data, device); VkResult result = device_data->vtable.CreateSwapchainKHR(device, pCreateInfo, pAllocator, pSwapchain); if (result != VK_SUCCESS) return result; @@ -1401,58 +1574,141 @@ VKAPI_ATTR VkResult VKAPI_CALL overlay_CreateSwapchainKHR( return result; } -VKAPI_ATTR void VKAPI_CALL overlay_DestroySwapchainKHR( +static void overlay_DestroySwapchainKHR( VkDevice device, VkSwapchainKHR swapchain, const VkAllocationCallbacks* pAllocator) { - struct swapchain_data *swapchain_data = FIND_SWAPCHAIN_DATA(swapchain); + struct swapchain_data *swapchain_data = + FIND(struct swapchain_data, swapchain); shutdown_swapchain_data(swapchain_data); swapchain_data->device->vtable.DestroySwapchainKHR(device, swapchain, pAllocator); destroy_swapchain_data(swapchain_data); } -VKAPI_ATTR VkResult VKAPI_CALL overlay_QueuePresentKHR( +static VkResult overlay_QueuePresentKHR( VkQueue queue, const VkPresentInfoKHR* pPresentInfo) { - struct queue_data *queue_data = FIND_QUEUE_DATA(queue); + struct queue_data *queue_data = FIND(struct queue_data, queue); struct device_data *device_data = queue_data->device; - - /* If we present on the graphic queue this layer is using to draw an - * overlay, we don't need more than submitting the overlay draw prior to - * present. - */ - if (queue_data == device_data->graphic_queue) { - for (uint32_t i = 0; i < pPresentInfo->swapchainCount; i++) { - struct swapchain_data *swapchain_data = FIND_SWAPCHAIN_DATA(pPresentInfo->pSwapchains[i]); - before_present(swapchain_data, pPresentInfo->pImageIndices[i]); + struct instance_data *instance_data = device_data->instance; + uint32_t query_results[OVERLAY_QUERY_COUNT]; + + device_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_frame]++; + + if (list_length(&queue_data->running_command_buffer) > 0) { + /* Before getting the query results, make sure the operations have + * completed. + */ + VK_CHECK(device_data->vtable.ResetFences(device_data->device, + 1, &queue_data->queries_fence)); + VK_CHECK(device_data->vtable.QueueSubmit(queue, 0, NULL, queue_data->queries_fence)); + VK_CHECK(device_data->vtable.WaitForFences(device_data->device, + 1, &queue_data->queries_fence, + VK_FALSE, UINT64_MAX)); + + /* Now get the results. */ + list_for_each_entry_safe(struct command_buffer_data, cmd_buffer_data, + &queue_data->running_command_buffer, link) { + list_delinit(&cmd_buffer_data->link); + + if (cmd_buffer_data->pipeline_query_pool) { + memset(query_results, 0, sizeof(query_results)); + VK_CHECK(device_data->vtable.GetQueryPoolResults(device_data->device, + cmd_buffer_data->pipeline_query_pool, + cmd_buffer_data->query_index, 1, + sizeof(uint32_t) * OVERLAY_QUERY_COUNT, + query_results, 0, VK_QUERY_RESULT_WAIT_BIT)); + + for (uint32_t i = OVERLAY_PARAM_ENABLED_vertices; + i <= OVERLAY_PARAM_ENABLED_compute_invocations; i++) { + device_data->frame_stats.stats[i] += query_results[i - OVERLAY_PARAM_ENABLED_vertices]; + } + } + if (cmd_buffer_data->timestamp_query_pool) { + uint64_t gpu_timestamps[2] = { 0 }; + VK_CHECK(device_data->vtable.GetQueryPoolResults(device_data->device, + cmd_buffer_data->timestamp_query_pool, + cmd_buffer_data->query_index * 2, 2, + 2 * sizeof(uint64_t), gpu_timestamps, sizeof(uint64_t), + VK_QUERY_RESULT_WAIT_BIT | VK_QUERY_RESULT_64_BIT)); + + gpu_timestamps[0] &= queue_data->timestamp_mask; + gpu_timestamps[1] &= queue_data->timestamp_mask; + device_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_gpu_timing] += + (gpu_timestamps[1] - gpu_timestamps[0]) * + device_data->properties.limits.timestampPeriod; + } } - return queue_data->device->vtable.QueuePresentKHR(queue, pPresentInfo); } - /* Otherwise we need to do cross queue synchronization to tie the overlay - * draw into the present queue. + /* Otherwise we need to add our overlay drawing semaphore to the list of + * semaphores to wait on. If we don't do that the presented picture might + * be have incomplete overlay drawings. */ - VkPresentInfoKHR present_info = *pPresentInfo; - VkSemaphore *semaphores = - (VkSemaphore *)malloc(sizeof(VkSemaphore) * (pPresentInfo->waitSemaphoreCount + pPresentInfo->swapchainCount)); - for (uint32_t i = 0; i < pPresentInfo->waitSemaphoreCount; i++) - semaphores[i] = pPresentInfo->pWaitSemaphores[i]; - for (uint32_t i = 0; i < pPresentInfo->swapchainCount; i++) { - struct swapchain_data *swapchain_data = FIND_SWAPCHAIN_DATA(pPresentInfo->pSwapchains[i]); - before_present(swapchain_data, pPresentInfo->pImageIndices[i]); - semaphores[pPresentInfo->waitSemaphoreCount + i] = swapchain_data->submission_semaphore; + VkResult result = VK_SUCCESS; + if (instance_data->params.no_display) { + for (uint32_t i = 0; i < pPresentInfo->swapchainCount; i++) { + VkSwapchainKHR swapchain = pPresentInfo->pSwapchains[i]; + struct swapchain_data *swapchain_data = + FIND(struct swapchain_data, swapchain); + + before_present(swapchain_data, + queue_data, + pPresentInfo->pWaitSemaphores, + pPresentInfo->waitSemaphoreCount, + pPresentInfo->pImageIndices[i]); + + VkPresentInfoKHR present_info = *pPresentInfo; + present_info.swapchainCount = 1; + present_info.pSwapchains = &swapchain; + + uint64_t ts0 = os_time_get(); + result = queue_data->device->vtable.QueuePresentKHR(queue, &present_info); + uint64_t ts1 = os_time_get(); + swapchain_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_present_timing] += ts1 - ts0; + } + } else { + for (uint32_t i = 0; i < pPresentInfo->swapchainCount; i++) { + VkSwapchainKHR swapchain = pPresentInfo->pSwapchains[i]; + struct swapchain_data *swapchain_data = + FIND(struct swapchain_data, swapchain); + VkPresentInfoKHR present_info = *pPresentInfo; + present_info.swapchainCount = 1; + present_info.pSwapchains = &swapchain; + + uint32_t image_index = pPresentInfo->pImageIndices[i]; + + struct overlay_draw *draw = before_present(swapchain_data, + queue_data, + pPresentInfo->pWaitSemaphores, + pPresentInfo->waitSemaphoreCount, + image_index); + + /* Because the submission of the overlay draw waits on the semaphores + * handed for present, we don't need to have this present operation + * wait on them as well, we can just wait on the overlay submission + * semaphore. + */ + present_info.pWaitSemaphores = &draw->semaphore; + present_info.waitSemaphoreCount = 1; + + uint64_t ts0 = os_time_get(); + VkResult chain_result = queue_data->device->vtable.QueuePresentKHR(queue, &present_info); + uint64_t ts1 = os_time_get(); + swapchain_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_present_timing] += ts1 - ts0; + if (pPresentInfo->pResults) + pPresentInfo->pResults[i] = chain_result; + if (chain_result != VK_SUCCESS && result == VK_SUCCESS) + result = chain_result; + } } - present_info.pWaitSemaphores = semaphores; - present_info.waitSemaphoreCount = pPresentInfo->waitSemaphoreCount + pPresentInfo->swapchainCount; - VkResult result = queue_data->device->vtable.QueuePresentKHR(queue, &present_info); - free(semaphores); return result; } -VKAPI_ATTR VkResult VKAPI_CALL overlay_AcquireNextImageKHR( +static VkResult overlay_AcquireNextImageKHR( VkDevice device, VkSwapchainKHR swapchain, uint64_t timeout, @@ -1460,7 +1716,8 @@ VKAPI_ATTR VkResult VKAPI_CALL overlay_AcquireNextImageKHR( VkFence fence, uint32_t* pImageIndex) { - struct swapchain_data *swapchain_data = FIND_SWAPCHAIN_DATA(swapchain); + struct swapchain_data *swapchain_data = + FIND(struct swapchain_data, swapchain); struct device_data *device_data = swapchain_data->device; uint64_t ts0 = os_time_get(); @@ -1474,12 +1731,13 @@ VKAPI_ATTR VkResult VKAPI_CALL overlay_AcquireNextImageKHR( return result; } -VKAPI_ATTR VkResult VKAPI_CALL overlay_AcquireNextImage2KHR( +static VkResult overlay_AcquireNextImage2KHR( VkDevice device, const VkAcquireNextImageInfoKHR* pAcquireInfo, uint32_t* pImageIndex) { - struct swapchain_data *swapchain_data = FIND_SWAPCHAIN_DATA(pAcquireInfo->swapchain); + struct swapchain_data *swapchain_data = + FIND(struct swapchain_data, pAcquireInfo->swapchain); struct device_data *device_data = swapchain_data->device; uint64_t ts0 = os_time_get(); @@ -1492,21 +1750,22 @@ VKAPI_ATTR VkResult VKAPI_CALL overlay_AcquireNextImage2KHR( return result; } -VKAPI_ATTR void VKAPI_CALL overlay_CmdDraw( +static void overlay_CmdDraw( VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t instanceCount, uint32_t firstVertex, uint32_t firstInstance) { - struct command_buffer_data *cmd_buffer_data = FIND_CMD_BUFFER_DATA(commandBuffer); + struct command_buffer_data *cmd_buffer_data = + FIND(struct command_buffer_data, commandBuffer); cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_draw]++; struct device_data *device_data = cmd_buffer_data->device; device_data->vtable.CmdDraw(commandBuffer, vertexCount, instanceCount, firstVertex, firstInstance); } -VKAPI_ATTR void VKAPI_CALL overlay_CmdDrawIndexed( +static void overlay_CmdDrawIndexed( VkCommandBuffer commandBuffer, uint32_t indexCount, uint32_t instanceCount, @@ -1514,40 +1773,43 @@ VKAPI_ATTR void VKAPI_CALL overlay_CmdDrawIndexed( int32_t vertexOffset, uint32_t firstInstance) { - struct command_buffer_data *cmd_buffer_data = FIND_CMD_BUFFER_DATA(commandBuffer); + struct command_buffer_data *cmd_buffer_data = + FIND(struct command_buffer_data, commandBuffer); cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_draw_indexed]++; struct device_data *device_data = cmd_buffer_data->device; device_data->vtable.CmdDrawIndexed(commandBuffer, indexCount, instanceCount, firstIndex, vertexOffset, firstInstance); } -VKAPI_ATTR void VKAPI_CALL overlay_CmdDrawIndirect( +static void overlay_CmdDrawIndirect( VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t drawCount, uint32_t stride) { - struct command_buffer_data *cmd_buffer_data = FIND_CMD_BUFFER_DATA(commandBuffer); + struct command_buffer_data *cmd_buffer_data = + FIND(struct command_buffer_data, commandBuffer); cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_draw_indirect]++; struct device_data *device_data = cmd_buffer_data->device; device_data->vtable.CmdDrawIndirect(commandBuffer, buffer, offset, drawCount, stride); } -VKAPI_ATTR void VKAPI_CALL overlay_CmdDrawIndexedIndirect( +static void overlay_CmdDrawIndexedIndirect( VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t drawCount, uint32_t stride) { - struct command_buffer_data *cmd_buffer_data = FIND_CMD_BUFFER_DATA(commandBuffer); + struct command_buffer_data *cmd_buffer_data = + FIND(struct command_buffer_data, commandBuffer); cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_draw_indexed_indirect]++; struct device_data *device_data = cmd_buffer_data->device; device_data->vtable.CmdDrawIndexedIndirect(commandBuffer, buffer, offset, drawCount, stride); } -VKAPI_ATTR void VKAPI_CALL overlay_CmdDrawIndirectCountKHR( +static void overlay_CmdDrawIndirectCountKHR( VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, @@ -1556,7 +1818,8 @@ VKAPI_ATTR void VKAPI_CALL overlay_CmdDrawIndirectCountKHR( uint32_t maxDrawCount, uint32_t stride) { - struct command_buffer_data *cmd_buffer_data = FIND_CMD_BUFFER_DATA(commandBuffer); + struct command_buffer_data *cmd_buffer_data = + FIND(struct command_buffer_data, commandBuffer); cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_draw_indirect_count]++; struct device_data *device_data = cmd_buffer_data->device; device_data->vtable.CmdDrawIndirectCountKHR(commandBuffer, buffer, offset, @@ -1564,7 +1827,7 @@ VKAPI_ATTR void VKAPI_CALL overlay_CmdDrawIndirectCountKHR( maxDrawCount, stride); } -VKAPI_ATTR void VKAPI_CALL overlay_CmdDrawIndexedIndirectCountKHR( +static void overlay_CmdDrawIndexedIndirectCountKHR( VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, @@ -1573,7 +1836,8 @@ VKAPI_ATTR void VKAPI_CALL overlay_CmdDrawIndexedIndirectCountKHR( uint32_t maxDrawCount, uint32_t stride) { - struct command_buffer_data *cmd_buffer_data = FIND_CMD_BUFFER_DATA(commandBuffer); + struct command_buffer_data *cmd_buffer_data = + FIND(struct command_buffer_data, commandBuffer); cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_draw_indexed_indirect_count]++; struct device_data *device_data = cmd_buffer_data->device; device_data->vtable.CmdDrawIndexedIndirectCountKHR(commandBuffer, buffer, offset, @@ -1581,35 +1845,38 @@ VKAPI_ATTR void VKAPI_CALL overlay_CmdDrawIndexedIndirectCountKHR( maxDrawCount, stride); } -VKAPI_ATTR void VKAPI_CALL overlay_CmdDispatch( +static void overlay_CmdDispatch( VkCommandBuffer commandBuffer, uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ) { - struct command_buffer_data *cmd_buffer_data = FIND_CMD_BUFFER_DATA(commandBuffer); + struct command_buffer_data *cmd_buffer_data = + FIND(struct command_buffer_data, commandBuffer); cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_dispatch]++; struct device_data *device_data = cmd_buffer_data->device; device_data->vtable.CmdDispatch(commandBuffer, groupCountX, groupCountY, groupCountZ); } -VKAPI_ATTR void VKAPI_CALL overlay_CmdDispatchIndirect( +static void overlay_CmdDispatchIndirect( VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset) { - struct command_buffer_data *cmd_buffer_data = FIND_CMD_BUFFER_DATA(commandBuffer); + struct command_buffer_data *cmd_buffer_data = + FIND(struct command_buffer_data, commandBuffer); cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_dispatch_indirect]++; struct device_data *device_data = cmd_buffer_data->device; device_data->vtable.CmdDispatchIndirect(commandBuffer, buffer, offset); } -VKAPI_ATTR void VKAPI_CALL overlay_CmdBindPipeline( +static void overlay_CmdBindPipeline( VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipeline pipeline) { - struct command_buffer_data *cmd_buffer_data = FIND_CMD_BUFFER_DATA(commandBuffer); + struct command_buffer_data *cmd_buffer_data = + FIND(struct command_buffer_data, commandBuffer); switch (pipelineBindPoint) { case VK_PIPELINE_BIND_POINT_GRAPHICS: cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_pipeline_graphics]++; break; case VK_PIPELINE_BIND_POINT_COMPUTE: cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_pipeline_compute]++; break; @@ -1620,22 +1887,95 @@ VKAPI_ATTR void VKAPI_CALL overlay_CmdBindPipeline( device_data->vtable.CmdBindPipeline(commandBuffer, pipelineBindPoint, pipeline); } -VKAPI_ATTR VkResult VKAPI_CALL overlay_BeginCommandBuffer( +static VkResult overlay_BeginCommandBuffer( VkCommandBuffer commandBuffer, const VkCommandBufferBeginInfo* pBeginInfo) { - struct command_buffer_data *cmd_buffer_data = FIND_CMD_BUFFER_DATA(commandBuffer); + struct command_buffer_data *cmd_buffer_data = + FIND(struct command_buffer_data, commandBuffer); struct device_data *device_data = cmd_buffer_data->device; - return device_data->vtable.BeginCommandBuffer(commandBuffer, pBeginInfo); + memset(&cmd_buffer_data->stats, 0, sizeof(cmd_buffer_data->stats)); + + /* We don't record any query in secondary command buffers, just make sure + * we have the right inheritance. + */ + if (cmd_buffer_data->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) { + VkCommandBufferBeginInfo *begin_info = (VkCommandBufferBeginInfo *) + clone_chain((const struct VkBaseInStructure *)pBeginInfo); + VkCommandBufferInheritanceInfo *parent_inhe_info = (VkCommandBufferInheritanceInfo *) + vk_find_struct(begin_info, COMMAND_BUFFER_INHERITANCE_INFO); + VkCommandBufferInheritanceInfo inhe_info = { + VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO, + NULL, + VK_NULL_HANDLE, + 0, + VK_NULL_HANDLE, + VK_FALSE, + 0, + overlay_query_flags, + }; + + if (parent_inhe_info) + parent_inhe_info->pipelineStatistics = overlay_query_flags; + else { + inhe_info.pNext = begin_info->pNext; + begin_info->pNext = &inhe_info; + } + + VkResult result = device_data->vtable.BeginCommandBuffer(commandBuffer, pBeginInfo); + + if (!parent_inhe_info) + begin_info->pNext = inhe_info.pNext; + + free_chain((struct VkBaseOutStructure *)begin_info); + + return result; + } + + /* Otherwise record a begin query as first command. */ + VkResult result = device_data->vtable.BeginCommandBuffer(commandBuffer, pBeginInfo); + + if (result == VK_SUCCESS) { + if (cmd_buffer_data->pipeline_query_pool) { + device_data->vtable.CmdResetQueryPool(commandBuffer, + cmd_buffer_data->pipeline_query_pool, + cmd_buffer_data->query_index, 1); + } + if (cmd_buffer_data->timestamp_query_pool) { + device_data->vtable.CmdResetQueryPool(commandBuffer, + cmd_buffer_data->timestamp_query_pool, + cmd_buffer_data->query_index * 2, 2); + } + if (cmd_buffer_data->pipeline_query_pool) { + device_data->vtable.CmdBeginQuery(commandBuffer, + cmd_buffer_data->pipeline_query_pool, + cmd_buffer_data->query_index, 0); + } + if (cmd_buffer_data->timestamp_query_pool) { + device_data->vtable.CmdWriteTimestamp(commandBuffer, + VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, + cmd_buffer_data->timestamp_query_pool, + cmd_buffer_data->query_index * 2); + } + } + + return result; } -VKAPI_ATTR VkResult VKAPI_CALL overlay_EndCommandBuffer( +static VkResult overlay_EndCommandBuffer( VkCommandBuffer commandBuffer) { - struct command_buffer_data *cmd_buffer_data = FIND_CMD_BUFFER_DATA(commandBuffer); + struct command_buffer_data *cmd_buffer_data = + FIND(struct command_buffer_data, commandBuffer); struct device_data *device_data = cmd_buffer_data->device; + if (cmd_buffer_data->timestamp_query_pool) { + device_data->vtable.CmdWriteTimestamp(commandBuffer, + VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, + cmd_buffer_data->timestamp_query_pool, + cmd_buffer_data->query_index * 2 + 1); + } if (cmd_buffer_data->pipeline_query_pool) { device_data->vtable.CmdEndQuery(commandBuffer, cmd_buffer_data->pipeline_query_pool, @@ -1645,11 +1985,12 @@ VKAPI_ATTR VkResult VKAPI_CALL overlay_EndCommandBuffer( return device_data->vtable.EndCommandBuffer(commandBuffer); } -VKAPI_ATTR VkResult VKAPI_CALL overlay_ResetCommandBuffer( +static VkResult overlay_ResetCommandBuffer( VkCommandBuffer commandBuffer, VkCommandBufferResetFlags flags) { - struct command_buffer_data *cmd_buffer_data = FIND_CMD_BUFFER_DATA(commandBuffer); + struct command_buffer_data *cmd_buffer_data = + FIND(struct command_buffer_data, commandBuffer); struct device_data *device_data = cmd_buffer_data->device; memset(&cmd_buffer_data->stats, 0, sizeof(cmd_buffer_data->stats)); @@ -1657,17 +1998,19 @@ VKAPI_ATTR VkResult VKAPI_CALL overlay_ResetCommandBuffer( return device_data->vtable.ResetCommandBuffer(commandBuffer, flags); } -VKAPI_ATTR void VKAPI_CALL overlay_CmdExecuteCommands( +static void overlay_CmdExecuteCommands( VkCommandBuffer commandBuffer, uint32_t commandBufferCount, const VkCommandBuffer* pCommandBuffers) { - struct command_buffer_data *cmd_buffer_data = FIND_CMD_BUFFER_DATA(commandBuffer); + struct command_buffer_data *cmd_buffer_data = + FIND(struct command_buffer_data, commandBuffer); struct device_data *device_data = cmd_buffer_data->device; /* Add the stats of the executed command buffers to the primary one. */ for (uint32_t c = 0; c < commandBufferCount; c++) { - struct command_buffer_data *sec_cmd_buffer_data = FIND_CMD_BUFFER_DATA(pCommandBuffers[c]); + struct command_buffer_data *sec_cmd_buffer_data = + FIND(struct command_buffer_data, pCommandBuffers[c]); for (uint32_t s = 0; s < OVERLAY_PARAM_ENABLED_MAX; s++) cmd_buffer_data->stats.stats[s] += sec_cmd_buffer_data->stats.stats[s]; @@ -1676,42 +2019,104 @@ VKAPI_ATTR void VKAPI_CALL overlay_CmdExecuteCommands( device_data->vtable.CmdExecuteCommands(commandBuffer, commandBufferCount, pCommandBuffers); } -VKAPI_ATTR VkResult VKAPI_CALL overlay_AllocateCommandBuffers( +static VkResult overlay_AllocateCommandBuffers( VkDevice device, const VkCommandBufferAllocateInfo* pAllocateInfo, VkCommandBuffer* pCommandBuffers) { - struct device_data *device_data = FIND_DEVICE_DATA(device); + struct device_data *device_data = FIND(struct device_data, device); VkResult result = device_data->vtable.AllocateCommandBuffers(device, pAllocateInfo, pCommandBuffers); if (result != VK_SUCCESS) return result; - for (uint32_t i = 0; i < pAllocateInfo->commandBufferCount; i++) - new_command_buffer_data(pCommandBuffers[i], pAllocateInfo->level, device_data); + + VkQueryPool pipeline_query_pool = VK_NULL_HANDLE; + VkQueryPool timestamp_query_pool = VK_NULL_HANDLE; + if (device_data->instance->pipeline_statistics_enabled && + pAllocateInfo->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) { + VkQueryPoolCreateInfo pool_info = { + VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, + NULL, + 0, + VK_QUERY_TYPE_PIPELINE_STATISTICS, + pAllocateInfo->commandBufferCount, + overlay_query_flags, + }; + VK_CHECK(device_data->vtable.CreateQueryPool(device_data->device, &pool_info, + NULL, &pipeline_query_pool)); + } + if (device_data->instance->params.enabled[OVERLAY_PARAM_ENABLED_gpu_timing]) { + VkQueryPoolCreateInfo pool_info = { + VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, + NULL, + 0, + VK_QUERY_TYPE_TIMESTAMP, + pAllocateInfo->commandBufferCount * 2, + 0, + }; + VK_CHECK(device_data->vtable.CreateQueryPool(device_data->device, &pool_info, + NULL, ×tamp_query_pool)); + } + + for (uint32_t i = 0; i < pAllocateInfo->commandBufferCount; i++) { + new_command_buffer_data(pCommandBuffers[i], pAllocateInfo->level, + pipeline_query_pool, timestamp_query_pool, + i, device_data); + } + + if (pipeline_query_pool) + map_object(HKEY(pipeline_query_pool), (void *)(uintptr_t) pAllocateInfo->commandBufferCount); + if (timestamp_query_pool) + map_object(HKEY(timestamp_query_pool), (void *)(uintptr_t) pAllocateInfo->commandBufferCount); + return result; } -VKAPI_ATTR void VKAPI_CALL overlay_FreeCommandBuffers(VkDevice device, - VkCommandPool commandPool, - uint32_t commandBufferCount, - const VkCommandBuffer* pCommandBuffers) +static void overlay_FreeCommandBuffers( + VkDevice device, + VkCommandPool commandPool, + uint32_t commandBufferCount, + const VkCommandBuffer* pCommandBuffers) { - struct device_data *device_data = FIND_DEVICE_DATA(device); + struct device_data *device_data = FIND(struct device_data, device); for (uint32_t i = 0; i < commandBufferCount; i++) { - struct command_buffer_data *cmd_buffer_data = FIND_CMD_BUFFER_DATA(pCommandBuffers[i]); + struct command_buffer_data *cmd_buffer_data = + FIND(struct command_buffer_data, pCommandBuffers[i]); + + /* It is legal to free a NULL command buffer*/ + if (!cmd_buffer_data) + continue; + + uint64_t count = (uintptr_t)find_object_data(HKEY(cmd_buffer_data->pipeline_query_pool)); + if (count == 1) { + unmap_object(HKEY(cmd_buffer_data->pipeline_query_pool)); + device_data->vtable.DestroyQueryPool(device_data->device, + cmd_buffer_data->pipeline_query_pool, NULL); + } else if (count != 0) { + map_object(HKEY(cmd_buffer_data->pipeline_query_pool), (void *)(uintptr_t)(count - 1)); + } + count = (uintptr_t)find_object_data(HKEY(cmd_buffer_data->timestamp_query_pool)); + if (count == 1) { + unmap_object(HKEY(cmd_buffer_data->timestamp_query_pool)); + device_data->vtable.DestroyQueryPool(device_data->device, + cmd_buffer_data->timestamp_query_pool, NULL); + } else if (count != 0) { + map_object(HKEY(cmd_buffer_data->timestamp_query_pool), (void *)(uintptr_t)(count - 1)); + } destroy_command_buffer_data(cmd_buffer_data); } + device_data->vtable.FreeCommandBuffers(device, commandPool, commandBufferCount, pCommandBuffers); } -VKAPI_ATTR VkResult VKAPI_CALL overlay_QueueSubmit( +static VkResult overlay_QueueSubmit( VkQueue queue, uint32_t submitCount, const VkSubmitInfo* pSubmits, VkFence fence) { - struct queue_data *queue_data = FIND_QUEUE_DATA(queue); + struct queue_data *queue_data = FIND(struct queue_data, queue); struct device_data *device_data = queue_data->device; device_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_submit]++; @@ -1719,24 +2124,40 @@ VKAPI_ATTR VkResult VKAPI_CALL overlay_QueueSubmit( for (uint32_t s = 0; s < submitCount; s++) { for (uint32_t c = 0; c < pSubmits[s].commandBufferCount; c++) { struct command_buffer_data *cmd_buffer_data = - FIND_CMD_BUFFER_DATA(pSubmits[s].pCommandBuffers[c]); + FIND(struct command_buffer_data, pSubmits[s].pCommandBuffers[c]); /* Merge the submitted command buffer stats into the device. */ for (uint32_t st = 0; st < OVERLAY_PARAM_ENABLED_MAX; st++) device_data->frame_stats.stats[st] += cmd_buffer_data->stats.stats[st]; + + /* Attach the command buffer to the queue so we remember to read its + * pipeline statistics & timestamps at QueuePresent(). + */ + if (!cmd_buffer_data->pipeline_query_pool && + !cmd_buffer_data->timestamp_query_pool) + continue; + + if (list_is_empty(&cmd_buffer_data->link)) { + list_addtail(&cmd_buffer_data->link, + &queue_data->running_command_buffer); + } else { + fprintf(stderr, "Command buffer submitted multiple times before present.\n" + "This could lead to invalid data.\n"); + } } } return device_data->vtable.QueueSubmit(queue, submitCount, pSubmits, fence); } -VKAPI_ATTR VkResult VKAPI_CALL overlay_CreateDevice( +static VkResult overlay_CreateDevice( VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkDevice* pDevice) { - struct instance_data *instance_data = FIND_PHYSICAL_DEVICE_DATA(physicalDevice); + struct instance_data *instance_data = + FIND(struct instance_data, physicalDevice); VkLayerDeviceCreateInfo *chain_info = get_device_chain_info(pCreateInfo, VK_LAYER_LINK_INFO); @@ -1751,7 +2172,19 @@ VKAPI_ATTR VkResult VKAPI_CALL overlay_CreateDevice( // Advance the link info for the next element on the chain chain_info->u.pLayerInfo = chain_info->u.pLayerInfo->pNext; - VkResult result = fpCreateDevice(physicalDevice, pCreateInfo, pAllocator, pDevice); + VkPhysicalDeviceFeatures device_features = {}; + VkDeviceCreateInfo device_info = *pCreateInfo; + + if (pCreateInfo->pEnabledFeatures) + device_features = *(pCreateInfo->pEnabledFeatures); + if (instance_data->pipeline_statistics_enabled) { + device_features.inheritedQueries = true; + device_features.pipelineStatisticsQuery = true; + } + device_info.pEnabledFeatures = &device_features; + + + VkResult result = fpCreateDevice(physicalDevice, &device_info, pAllocator, pDevice); if (result != VK_SUCCESS) return result; struct device_data *device_data = new_device_data(*pDevice, instance_data); @@ -1770,17 +2203,17 @@ VKAPI_ATTR VkResult VKAPI_CALL overlay_CreateDevice( return result; } -VKAPI_ATTR void VKAPI_CALL overlay_DestroyDevice( +static void overlay_DestroyDevice( VkDevice device, const VkAllocationCallbacks* pAllocator) { - struct device_data *device_data = FIND_DEVICE_DATA(device); + struct device_data *device_data = FIND(struct device_data, device); device_unmap_queues(device_data); device_data->vtable.DestroyDevice(device, pAllocator); destroy_device_data(device_data); } -VKAPI_ATTR VkResult VKAPI_CALL overlay_CreateInstance( +static VkResult overlay_CreateInstance( const VkInstanceCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkInstance* pInstance) @@ -1811,14 +2244,22 @@ VKAPI_ATTR VkResult VKAPI_CALL overlay_CreateInstance( parse_overlay_env(&instance_data->params, getenv("VK_LAYER_MESA_OVERLAY_CONFIG")); + for (int i = OVERLAY_PARAM_ENABLED_vertices; + i <= OVERLAY_PARAM_ENABLED_compute_invocations; i++) { + if (instance_data->params.enabled[i]) { + instance_data->pipeline_statistics_enabled = true; + break; + } + } + return result; } -VKAPI_ATTR void VKAPI_CALL overlay_DestroyInstance( +static void overlay_DestroyInstance( VkInstance instance, const VkAllocationCallbacks* pAllocator) { - struct instance_data *instance_data = FIND_INSTANCE_DATA(instance); + struct instance_data *instance_data = FIND(struct instance_data, instance); instance_data_map_physical_devices(instance_data, false); instance_data->vtable.DestroyInstance(instance, pAllocator); destroy_instance_data(instance_data); @@ -1839,6 +2280,7 @@ static const struct { ADD_HOOK(CmdDraw), ADD_HOOK(CmdDrawIndexed), + ADD_HOOK(CmdDrawIndirect), ADD_HOOK(CmdDrawIndexedIndirect), ADD_HOOK(CmdDispatch), ADD_HOOK(CmdDispatchIndirect), @@ -1854,10 +2296,12 @@ static const struct { ADD_HOOK(AcquireNextImage2KHR), ADD_HOOK(QueueSubmit), - ADD_HOOK(CreateInstance), - ADD_HOOK(DestroyInstance), + ADD_HOOK(CreateDevice), ADD_HOOK(DestroyDevice), + + ADD_HOOK(CreateInstance), + ADD_HOOK(DestroyInstance), #undef ADD_HOOK }; @@ -1879,7 +2323,7 @@ VK_LAYER_EXPORT VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vkGetDeviceProcAddr(VkD if (dev == NULL) return NULL; - struct device_data *device_data = FIND_DEVICE_DATA(dev); + struct device_data *device_data = FIND(struct device_data, dev); if (device_data->vtable.GetDeviceProcAddr == NULL) return NULL; return device_data->vtable.GetDeviceProcAddr(dev, funcName); } @@ -1892,7 +2336,7 @@ VK_LAYER_EXPORT VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vkGetInstanceProcAddr(V if (instance == NULL) return NULL; - struct instance_data *instance_data = FIND_INSTANCE_DATA(instance); + struct instance_data *instance_data = FIND(struct instance_data, instance); if (instance_data->vtable.GetInstanceProcAddr == NULL) return NULL; return instance_data->vtable.GetInstanceProcAddr(instance, funcName); }