X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fvulkan%2Foverlay-layer%2Foverlay.cpp;h=c415d6a82059b2133ed4f37b35659018f58d8a7d;hb=323d5bbfd9a1d6684bb5381b67b8965ea98d5fc0;hp=f3678198b00ccf541857fc11b296295c16d3e33d;hpb=20c370c6b16282424f3a2fc3166132c1ed82863a;p=mesa.git diff --git a/src/vulkan/overlay-layer/overlay.cpp b/src/vulkan/overlay-layer/overlay.cpp index f3678198b00..c415d6a8205 100644 --- a/src/vulkan/overlay-layer/overlay.cpp +++ b/src/vulkan/overlay-layer/overlay.cpp @@ -25,104 +25,57 @@ #include #include -#include #include -#include #include -#include "vk_layer_data.h" -#include "vk_layer_table.h" -#include "vk_layer_extension_utils.h" + +#include "git_sha1.h" #include "imgui.h" +#include "overlay_params.h" + #include "util/debug.h" #include "util/hash_table.h" +#include "util/list.h" #include "util/ralloc.h" #include "util/os_time.h" +#include "util/os_socket.h" #include "util/simple_mtx.h" #include "vk_enum_to_str.h" - -enum layer_position { - LAYER_POSITION_TOP_LEFT, - LAYER_POSITION_TOP_RIGHT, - LAYER_POSITION_BOTTOM_LEFT, - LAYER_POSITION_BOTTOM_RIGHT, -}; - -static enum layer_position -parse_layer_position(const char *str) -{ - if (!str || !strcmp(str, "top-left")) - return LAYER_POSITION_TOP_LEFT; - if (!strcmp(str, "top-right")) - return LAYER_POSITION_TOP_RIGHT; - if (!strcmp(str, "bottom-left")) - return LAYER_POSITION_BOTTOM_LEFT; - if (!strcmp(str, "bottom-right")) - return LAYER_POSITION_BOTTOM_RIGHT; - return LAYER_POSITION_TOP_LEFT; -} +#include "vk_util.h" /* Mapped from VkInstace/VkPhysicalDevice */ struct instance_data { - VkLayerInstanceDispatchTable vtable; + struct vk_instance_dispatch_table vtable; VkInstance instance; - enum layer_position position; - uint64_t enabled_stats; -}; + struct overlay_params params; + bool pipeline_statistics_enabled; -enum frame_stat_type { - FRAME_STAT_SUBMIT, - FRAME_STAT_DRAW, - FRAME_STAT_DRAW_INDEXED, - FRAME_STAT_DRAW_INDIRECT, - FRAME_STAT_DRAW_INDEXED_INDIRECT, - FRAME_STAT_DRAW_INDIRECT_COUNT, - FRAME_STAT_DRAW_INDEXED_INDIRECT_COUNT, - FRAME_STAT_DISPATCH, - FRAME_STAT_DISPATCH_INDIRECT, - FRAME_STAT_PIPELINE_GRAPHICS, - FRAME_STAT_PIPELINE_COMPUTE, - FRAME_STAT_PIPELINE_RAYTRACING, - - FRAME_STAT_COUNT, - - FRAME_STAT_ACQUIRE_TIMING = FRAME_STAT_COUNT, - FRAME_STAT_HELP, -}; + bool first_line_printed; + + int control_client; -#define FRAME_STAT_ENABLED(id) (1ULL << (FRAME_STAT_ ## id)) - -static struct debug_control enable_flags[] = { - { "submit", FRAME_STAT_ENABLED(SUBMIT) }, - { "draw", FRAME_STAT_ENABLED(DRAW) }, - { "draw-indexed", FRAME_STAT_ENABLED(DRAW_INDEXED) }, - { "draw-indirect", FRAME_STAT_ENABLED(DRAW_INDIRECT) }, - { "draw-indexed-indirect", FRAME_STAT_ENABLED(DRAW_INDEXED_INDIRECT) }, - { "draw-indirect-count", FRAME_STAT_ENABLED(DRAW_INDIRECT_COUNT) }, - { "draw-indexed-indirect-count", FRAME_STAT_ENABLED(DRAW_INDEXED_INDIRECT_COUNT) }, - { "dispatch", FRAME_STAT_ENABLED(DISPATCH) }, - { "dispatch-indirect", FRAME_STAT_ENABLED(DISPATCH_INDIRECT) }, - { "pipeline-graphics", FRAME_STAT_ENABLED(PIPELINE_GRAPHICS) }, - { "pipeline-compute", FRAME_STAT_ENABLED(PIPELINE_COMPUTE) }, - { "pipeline-raytracing", FRAME_STAT_ENABLED(PIPELINE_RAYTRACING) }, - { "acquire-timing", FRAME_STAT_ENABLED(ACQUIRE_TIMING) }, - { "help", FRAME_STAT_ENABLED(HELP) }, - { NULL, 0 }, + /* Dumping of frame stats to a file has been enabled. */ + bool capture_enabled; + + /* Dumping of frame stats to a file has been enabled and started. */ + bool capture_started; }; struct frame_stat { - uint32_t stats[FRAME_STAT_COUNT]; + uint64_t stats[OVERLAY_PARAM_ENABLED_MAX]; }; -/* Mapped from VkDevice/VkCommandBuffer */ +/* Mapped from VkDevice */ struct queue_data; struct device_data { struct instance_data *instance; - VkLayerDispatchTable vtable; + PFN_vkSetDeviceLoaderData set_device_loader_data; + + struct vk_device_dispatch_table vtable; VkPhysicalDevice physical_device; VkDevice device; @@ -133,7 +86,24 @@ struct device_data { struct queue_data **queues; uint32_t n_queues; + /* For a single frame */ + struct frame_stat frame_stats; +}; + +/* Mapped from VkCommandBuffer */ +struct command_buffer_data { + struct device_data *device; + + VkCommandBufferLevel level; + + VkCommandBuffer cmd_buffer; + VkQueryPool pipeline_query_pool; + VkQueryPool timestamp_query_pool; + uint32_t query_index; + struct frame_stat stats; + + struct list_head link; /* link into queue_data::running_command_buffer */ }; /* Mapped from VkQueue */ @@ -143,6 +113,30 @@ struct queue_data { VkQueue queue; VkQueueFlags flags; uint32_t family_index; + uint64_t timestamp_mask; + + VkFence queries_fence; + + struct list_head running_command_buffer; +}; + +struct overlay_draw { + struct list_head link; + + VkCommandBuffer command_buffer; + + VkSemaphore cross_engine_semaphore; + + VkSemaphore semaphore; + VkFence fence; + + VkBuffer vertex_buffer; + VkDeviceMemory vertex_buffer_mem; + VkDeviceSize vertex_buffer_size; + + VkBuffer index_buffer; + VkDeviceMemory index_buffer_mem; + VkDeviceSize index_buffer_size; }; /* Mapped from VkSwapchainKHR */ @@ -171,17 +165,7 @@ struct swapchain_data { VkCommandPool command_pool; - struct { - VkCommandBuffer command_buffer; - - VkBuffer vertex_buffer; - VkDeviceMemory vertex_buffer_mem; - VkDeviceSize vertex_buffer_size; - - VkBuffer index_buffer; - VkDeviceMemory index_buffer_mem; - VkDeviceSize index_buffer_size; - } frame_data[2]; + struct list_head draws; /* List of struct overlay_draw */ bool font_uploaded; VkImage font_image; @@ -190,9 +174,6 @@ struct swapchain_data { VkBuffer upload_font_buffer; VkDeviceMemory upload_font_buffer_mem; - VkFence fence; - VkSemaphore submission_semaphore; - /**/ ImGuiContext* imgui_context; ImVec2 window_size; @@ -201,73 +182,161 @@ struct swapchain_data { uint64_t n_frames; uint64_t last_present_time; - double frame_times[200]; + unsigned n_frames_since_update; + uint64_t last_fps_update; + double fps; - double acquire_times[200]; - uint64_t n_acquire; - - enum frame_stat_type stat_selector; + enum overlay_param_enabled stat_selector; + double time_dividor; struct frame_stat stats_min, stats_max; - struct frame_stat stats[200]; + struct frame_stat frames_stats[200]; + + /* Over a single frame */ + struct frame_stat frame_stats; + + /* Over fps_sampling_period */ + struct frame_stat accumulated_stats; }; -static struct hash_table *vk_object_to_data = NULL; +static const VkQueryPipelineStatisticFlags overlay_query_flags = + VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_VERTICES_BIT | + VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT | + VK_QUERY_PIPELINE_STATISTIC_VERTEX_SHADER_INVOCATIONS_BIT | + VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_INVOCATIONS_BIT | + VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT | + VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT | + VK_QUERY_PIPELINE_STATISTIC_CLIPPING_PRIMITIVES_BIT | + VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT | + VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_CONTROL_SHADER_PATCHES_BIT | + VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_EVALUATION_SHADER_INVOCATIONS_BIT | + VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT; +#define OVERLAY_QUERY_COUNT (11) + +static struct hash_table_u64 *vk_object_to_data = NULL; static simple_mtx_t vk_object_to_data_mutex = _SIMPLE_MTX_INITIALIZER_NP; thread_local ImGuiContext* __MesaImGui; static inline void ensure_vk_object_map(void) { - if (!vk_object_to_data) { - vk_object_to_data = _mesa_hash_table_create(NULL, - _mesa_hash_pointer, - _mesa_key_pointer_equal); - } + if (!vk_object_to_data) + vk_object_to_data = _mesa_hash_table_u64_create(NULL); } -#define FIND_SWAPCHAIN_DATA(obj) ((struct swapchain_data *)find_object_data((void *) obj)) -#define FIND_DEVICE_DATA(obj) ((struct device_data *)find_object_data((void *) obj)) -#define FIND_QUEUE_DATA(obj) ((struct queue_data *)find_object_data((void *) obj)) -#define FIND_PHYSICAL_DEVICE_DATA(obj) ((struct instance_data *)find_object_data((void *) obj)) -#define FIND_INSTANCE_DATA(obj) ((struct instance_data *)find_object_data((void *) obj)) -static void *find_object_data(void *obj) +#define HKEY(obj) ((uint64_t)(obj)) +#define FIND(type, obj) ((type *)find_object_data(HKEY(obj))) + +static void *find_object_data(uint64_t obj) { simple_mtx_lock(&vk_object_to_data_mutex); ensure_vk_object_map(); - struct hash_entry *entry = _mesa_hash_table_search(vk_object_to_data, obj); - void *data = entry ? entry->data : NULL; + void *data = _mesa_hash_table_u64_search(vk_object_to_data, obj); simple_mtx_unlock(&vk_object_to_data_mutex); return data; } -static void map_object(void *obj, void *data) +static void map_object(uint64_t obj, void *data) { simple_mtx_lock(&vk_object_to_data_mutex); ensure_vk_object_map(); - _mesa_hash_table_insert(vk_object_to_data, obj, data); + _mesa_hash_table_u64_insert(vk_object_to_data, obj, data); simple_mtx_unlock(&vk_object_to_data_mutex); } -static void unmap_object(void *obj) +static void unmap_object(uint64_t obj) { simple_mtx_lock(&vk_object_to_data_mutex); - struct hash_entry *entry = _mesa_hash_table_search(vk_object_to_data, obj); - _mesa_hash_table_remove(vk_object_to_data, entry); + _mesa_hash_table_u64_remove(vk_object_to_data, obj); simple_mtx_unlock(&vk_object_to_data_mutex); } /**/ + +#define VK_CHECK(expr) \ + do { \ + VkResult __result = (expr); \ + if (__result != VK_SUCCESS) { \ + fprintf(stderr, "'%s' line %i failed with %s\n", \ + #expr, __LINE__, vk_Result_to_str(__result)); \ + } \ + } while (0) + +/**/ + +static VkLayerInstanceCreateInfo *get_instance_chain_info(const VkInstanceCreateInfo *pCreateInfo, + VkLayerFunction func) +{ + vk_foreach_struct(item, pCreateInfo->pNext) { + if (item->sType == VK_STRUCTURE_TYPE_LOADER_INSTANCE_CREATE_INFO && + ((VkLayerInstanceCreateInfo *) item)->function == func) + return (VkLayerInstanceCreateInfo *) item; + } + unreachable("instance chain info not found"); + return NULL; +} + +static VkLayerDeviceCreateInfo *get_device_chain_info(const VkDeviceCreateInfo *pCreateInfo, + VkLayerFunction func) +{ + vk_foreach_struct(item, pCreateInfo->pNext) { + if (item->sType == VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO && + ((VkLayerDeviceCreateInfo *) item)->function == func) + return (VkLayerDeviceCreateInfo *)item; + } + unreachable("device chain info not found"); + return NULL; +} + +static struct VkBaseOutStructure * +clone_chain(const struct VkBaseInStructure *chain) +{ + struct VkBaseOutStructure *head = NULL, *tail = NULL; + + vk_foreach_struct_const(item, chain) { + size_t item_size = vk_structure_type_size(item); + struct VkBaseOutStructure *new_item = + (struct VkBaseOutStructure *)malloc(item_size);; + + memcpy(new_item, item, item_size); + + if (!head) + head = new_item; + if (tail) + tail->pNext = new_item; + tail = new_item; + } + + return head; +} + +static void +free_chain(struct VkBaseOutStructure *chain) +{ + while (chain) { + void *node = chain; + chain = chain->pNext; + free(node); + } +} + +/**/ + static struct instance_data *new_instance_data(VkInstance instance) { struct instance_data *data = rzalloc(NULL, struct instance_data); data->instance = instance; - map_object(data->instance, data); + data->control_client = -1; + map_object(HKEY(data->instance), data); return data; } static void destroy_instance_data(struct instance_data *data) { - unmap_object(data->instance); + if (data->params.output_file) + fclose(data->params.output_file); + if (data->params.control >= 0) + os_socket_close(data->params.control); + unmap_object(HKEY(data->instance)); ralloc_free(data); } @@ -286,9 +355,9 @@ static void instance_data_map_physical_devices(struct instance_data *instance_da for (uint32_t i = 0; i < physicalDeviceCount; i++) { if (map) - map_object(physicalDevices[i], instance_data); + map_object(HKEY(physicalDevices[i]), instance_data); else - unmap_object(physicalDevices[i]); + unmap_object(HKEY(physicalDevices[i])); } free(physicalDevices); @@ -300,7 +369,7 @@ static struct device_data *new_device_data(VkDevice device, struct instance_data struct device_data *data = rzalloc(NULL, struct device_data); data->instance = instance; data->device = device; - map_object(data->device, data); + map_object(HKEY(data->device), data); return data; } @@ -313,8 +382,19 @@ static struct queue_data *new_queue_data(VkQueue queue, data->device = device_data; data->queue = queue; data->flags = family_props->queueFlags; + data->timestamp_mask = (1ull << family_props->timestampValidBits) - 1; data->family_index = family_index; - map_object(data->queue, data); + list_inithead(&data->running_command_buffer); + map_object(HKEY(data->queue), data); + + /* Fence synchronizing access to queries on that queue. */ + VkFenceCreateInfo fence_info = {}; + fence_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; + fence_info.flags = VK_FENCE_CREATE_SIGNALED_BIT; + VK_CHECK(device_data->vtable.CreateFence(device_data->device, + &fence_info, + NULL, + &data->queries_fence)); if (data->flags & VK_QUEUE_GRAPHICS_BIT) device_data->graphic_queue = data; @@ -322,6 +402,14 @@ static struct queue_data *new_queue_data(VkQueue queue, return data; } +static void destroy_queue(struct queue_data *data) +{ + struct device_data *device_data = data->device; + device_data->vtable.DestroyFence(device_data->device, data->queries_fence, NULL); + unmap_object(HKEY(data->queue)); + ralloc_free(data); +} + static void device_map_queues(struct device_data *data, const VkDeviceCreateInfo *pCreateInfo) { @@ -347,6 +435,9 @@ static void device_map_queues(struct device_data *data, data->vtable.GetDeviceQueue(data->device, pCreateInfo->pQueueCreateInfos[i].queueFamilyIndex, j, &queue); + + VK_CHECK(data->set_device_loader_data(data->device, queue)); + data->queues[queue_index++] = new_queue_data(queue, &family_props[pCreateInfo->pQueueCreateInfos[i].queueFamilyIndex], pCreateInfo->pQueueCreateInfos[i].queueFamilyIndex, data); @@ -359,93 +450,444 @@ static void device_map_queues(struct device_data *data, static void device_unmap_queues(struct device_data *data) { for (uint32_t i = 0; i < data->n_queues; i++) - unmap_object(data->queues[i]->queue); + destroy_queue(data->queues[i]); } static void destroy_device_data(struct device_data *data) { - unmap_object(data->device); + unmap_object(HKEY(data->device)); ralloc_free(data); } -static void check_vk_result(VkResult err) +/**/ +static struct command_buffer_data *new_command_buffer_data(VkCommandBuffer cmd_buffer, + VkCommandBufferLevel level, + VkQueryPool pipeline_query_pool, + VkQueryPool timestamp_query_pool, + uint32_t query_index, + struct device_data *device_data) +{ + struct command_buffer_data *data = rzalloc(NULL, struct command_buffer_data); + data->device = device_data; + data->cmd_buffer = cmd_buffer; + data->level = level; + data->pipeline_query_pool = pipeline_query_pool; + data->timestamp_query_pool = timestamp_query_pool; + data->query_index = query_index; + list_inithead(&data->link); + map_object(HKEY(data->cmd_buffer), data); + return data; +} + +static void destroy_command_buffer_data(struct command_buffer_data *data) { - if (err != VK_SUCCESS) - printf("ERROR!\n"); + unmap_object(HKEY(data->cmd_buffer)); + list_delinit(&data->link); + ralloc_free(data); } /**/ static struct swapchain_data *new_swapchain_data(VkSwapchainKHR swapchain, struct device_data *device_data) { + struct instance_data *instance_data = device_data->instance; struct swapchain_data *data = rzalloc(NULL, struct swapchain_data); data->device = device_data; data->swapchain = swapchain; - data->window_size = ImVec2(300, 300); - map_object(data->swapchain, data); + data->window_size = ImVec2(instance_data->params.width, instance_data->params.height); + list_inithead(&data->draws); + map_object(HKEY(data->swapchain), data); return data; } static void destroy_swapchain_data(struct swapchain_data *data) { - unmap_object(data->swapchain); + unmap_object(HKEY(data->swapchain)); ralloc_free(data); } +struct overlay_draw *get_overlay_draw(struct swapchain_data *data) +{ + struct device_data *device_data = data->device; + struct overlay_draw *draw = list_is_empty(&data->draws) ? + NULL : list_first_entry(&data->draws, struct overlay_draw, link); + + VkSemaphoreCreateInfo sem_info = {}; + sem_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; + + if (draw && device_data->vtable.GetFenceStatus(device_data->device, draw->fence) == VK_SUCCESS) { + list_del(&draw->link); + VK_CHECK(device_data->vtable.ResetFences(device_data->device, + 1, &draw->fence)); + list_addtail(&draw->link, &data->draws); + return draw; + } + + draw = rzalloc(data, struct overlay_draw); + + VkCommandBufferAllocateInfo cmd_buffer_info = {}; + cmd_buffer_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; + cmd_buffer_info.commandPool = data->command_pool; + cmd_buffer_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + cmd_buffer_info.commandBufferCount = 1; + VK_CHECK(device_data->vtable.AllocateCommandBuffers(device_data->device, + &cmd_buffer_info, + &draw->command_buffer)); + VK_CHECK(device_data->set_device_loader_data(device_data->device, + draw->command_buffer)); + + + VkFenceCreateInfo fence_info = {}; + fence_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; + VK_CHECK(device_data->vtable.CreateFence(device_data->device, + &fence_info, + NULL, + &draw->fence)); + + VK_CHECK(device_data->vtable.CreateSemaphore(device_data->device, &sem_info, + NULL, &draw->semaphore)); + VK_CHECK(device_data->vtable.CreateSemaphore(device_data->device, &sem_info, + NULL, &draw->cross_engine_semaphore)); + + list_addtail(&draw->link, &data->draws); + + return draw; +} + +static const char *param_unit(enum overlay_param_enabled param) +{ + switch (param) { + case OVERLAY_PARAM_ENABLED_frame_timing: + case OVERLAY_PARAM_ENABLED_acquire_timing: + case OVERLAY_PARAM_ENABLED_present_timing: + return "(us)"; + case OVERLAY_PARAM_ENABLED_gpu_timing: + return "(ns)"; + default: + return ""; + } +} + +static void parse_command(struct instance_data *instance_data, + const char *cmd, unsigned cmdlen, + const char *param, unsigned paramlen) +{ + if (!strncmp(cmd, "capture", cmdlen)) { + int value = atoi(param); + bool enabled = value > 0; + + if (enabled) { + instance_data->capture_enabled = true; + } else { + instance_data->capture_enabled = false; + instance_data->capture_started = false; + } + } +} + +#define BUFSIZE 4096 + +/** + * This function will process commands through the control file. + * + * A command starts with a colon, followed by the command, and followed by an + * option '=' and a parameter. It has to end with a semi-colon. A full command + * + parameter looks like: + * + * :cmd=param; + */ +static void process_char(struct instance_data *instance_data, char c) +{ + static char cmd[BUFSIZE]; + static char param[BUFSIZE]; + + static unsigned cmdpos = 0; + static unsigned parampos = 0; + static bool reading_cmd = false; + static bool reading_param = false; + + switch (c) { + case ':': + cmdpos = 0; + parampos = 0; + reading_cmd = true; + reading_param = false; + break; + case ';': + if (!reading_cmd) + break; + cmd[cmdpos++] = '\0'; + param[parampos++] = '\0'; + parse_command(instance_data, cmd, cmdpos, param, parampos); + reading_cmd = false; + reading_param = false; + break; + case '=': + if (!reading_cmd) + break; + reading_param = true; + break; + default: + if (!reading_cmd) + break; + + if (reading_param) { + /* overflow means an invalid parameter */ + if (parampos >= BUFSIZE - 1) { + reading_cmd = false; + reading_param = false; + break; + } + + param[parampos++] = c; + } else { + /* overflow means an invalid command */ + if (cmdpos >= BUFSIZE - 1) { + reading_cmd = false; + break; + } + + cmd[cmdpos++] = c; + } + } +} + +static void control_send(struct instance_data *instance_data, + const char *cmd, unsigned cmdlen, + const char *param, unsigned paramlen) +{ + unsigned msglen = 0; + char buffer[BUFSIZE]; + + assert(cmdlen + paramlen + 3 < BUFSIZE); + + buffer[msglen++] = ':'; + + memcpy(&buffer[msglen], cmd, cmdlen); + msglen += cmdlen; + + if (paramlen > 0) { + buffer[msglen++] = '='; + memcpy(&buffer[msglen], param, paramlen); + msglen += paramlen; + buffer[msglen++] = ';'; + } + + os_socket_send(instance_data->control_client, buffer, msglen, 0); +} + +static void control_send_connection_string(struct device_data *device_data) +{ + struct instance_data *instance_data = device_data->instance; + + const char *controlVersionCmd = "MesaOverlayControlVersion"; + const char *controlVersionString = "1"; + + control_send(instance_data, controlVersionCmd, strlen(controlVersionCmd), + controlVersionString, strlen(controlVersionString)); + + const char *deviceCmd = "DeviceName"; + const char *deviceName = device_data->properties.deviceName; + + control_send(instance_data, deviceCmd, strlen(deviceCmd), + deviceName, strlen(deviceName)); + + const char *mesaVersionCmd = "MesaVersion"; + const char *mesaVersionString = "Mesa " PACKAGE_VERSION MESA_GIT_SHA1; + + control_send(instance_data, mesaVersionCmd, strlen(mesaVersionCmd), + mesaVersionString, strlen(mesaVersionString)); +} + +static void control_client_check(struct device_data *device_data) +{ + struct instance_data *instance_data = device_data->instance; + + /* Already connected, just return. */ + if (instance_data->control_client >= 0) + return; + + int socket = os_socket_accept(instance_data->params.control); + if (socket == -1) { + if (errno != EAGAIN && errno != EWOULDBLOCK && errno != ECONNABORTED) + fprintf(stderr, "ERROR on socket: %s\n", strerror(errno)); + return; + } + + if (socket >= 0) { + os_socket_block(socket, false); + instance_data->control_client = socket; + control_send_connection_string(device_data); + } +} + +static void control_client_disconnected(struct instance_data *instance_data) +{ + os_socket_close(instance_data->control_client); + instance_data->control_client = -1; +} + +static void process_control_socket(struct instance_data *instance_data) +{ + const int client = instance_data->control_client; + if (client >= 0) { + char buf[BUFSIZE]; + + while (true) { + ssize_t n = os_socket_recv(client, buf, BUFSIZE, 0); + + if (n == -1) { + if (errno == EAGAIN || errno == EWOULDBLOCK) { + /* nothing to read, try again later */ + break; + } + + if (errno != ECONNRESET) + fprintf(stderr, "ERROR on connection: %s\n", strerror(errno)); + + control_client_disconnected(instance_data); + } else if (n == 0) { + /* recv() returns 0 when the client disconnects */ + control_client_disconnected(instance_data); + } + + for (ssize_t i = 0; i < n; i++) { + process_char(instance_data, buf[i]); + } + + /* If we try to read BUFSIZE and receive BUFSIZE bytes from the + * socket, there's a good chance that there's still more data to be + * read, so we will try again. Otherwise, simply be done for this + * iteration and try again on the next frame. + */ + if (n < BUFSIZE) + break; + } + } +} + static void snapshot_swapchain_frame(struct swapchain_data *data) { - uint64_t now = os_time_get(); + struct device_data *device_data = data->device; + struct instance_data *instance_data = device_data->instance; + uint32_t f_idx = data->n_frames % ARRAY_SIZE(data->frames_stats); + uint64_t now = os_time_get(); /* us */ + + if (instance_data->params.control >= 0) { + control_client_check(device_data); + process_control_socket(instance_data); + } if (data->last_present_time) { - data->frame_times[(data->n_frames - 1) % ARRAY_SIZE(data->frame_times)] = - ((double)now - (double)data->last_present_time) / 1000.0; + data->frame_stats.stats[OVERLAY_PARAM_ENABLED_frame_timing] = + now - data->last_present_time; } - struct device_data *device_data = data->device; - data->stats[data->n_frames % ARRAY_SIZE(data->frame_times)] = device_data->stats; - memset(&device_data->stats, 0, sizeof(device_data->stats)); + memset(&data->frames_stats[f_idx], 0, sizeof(data->frames_stats[f_idx])); + for (int s = 0; s < OVERLAY_PARAM_ENABLED_MAX; s++) { + data->frames_stats[f_idx].stats[s] += device_data->frame_stats.stats[s] + data->frame_stats.stats[s]; + data->accumulated_stats.stats[s] += device_data->frame_stats.stats[s] + data->frame_stats.stats[s]; + } + + /* If capture has been enabled but it hasn't started yet, it means we are on + * the first snapshot after it has been enabled. At this point we want to + * use the stats captured so far to update the display, but we don't want + * this data to cause noise to the stats that we want to capture from now + * on. + * + * capture_begin == true will trigger an update of the fps on display, and a + * flush of the data, but no stats will be written to the output file. This + * way, we will have only stats from after the capture has been enabled + * written to the output_file. + */ + const bool capture_begin = + instance_data->capture_enabled && !instance_data->capture_started; + + if (data->last_fps_update) { + double elapsed = (double)(now - data->last_fps_update); /* us */ + if (capture_begin || + elapsed >= instance_data->params.fps_sampling_period) { + data->fps = 1000000.0f * data->n_frames_since_update / elapsed; + if (instance_data->capture_started) { + if (!instance_data->first_line_printed) { + bool first_column = true; + + instance_data->first_line_printed = true; + +#define OVERLAY_PARAM_BOOL(name) \ + if (instance_data->params.enabled[OVERLAY_PARAM_ENABLED_##name]) { \ + fprintf(instance_data->params.output_file, \ + "%s%s%s", first_column ? "" : ", ", #name, \ + param_unit(OVERLAY_PARAM_ENABLED_##name)); \ + first_column = false; \ + } +#define OVERLAY_PARAM_CUSTOM(name) + OVERLAY_PARAMS +#undef OVERLAY_PARAM_BOOL +#undef OVERLAY_PARAM_CUSTOM + fprintf(instance_data->params.output_file, "\n"); + } + + for (int s = 0; s < OVERLAY_PARAM_ENABLED_MAX; s++) { + if (!instance_data->params.enabled[s]) + continue; + if (s == OVERLAY_PARAM_ENABLED_fps) { + fprintf(instance_data->params.output_file, + "%s%.2f", s == 0 ? "" : ", ", data->fps); + } else { + fprintf(instance_data->params.output_file, + "%s%" PRIu64, s == 0 ? "" : ", ", + data->accumulated_stats.stats[s]); + } + } + fprintf(instance_data->params.output_file, "\n"); + fflush(instance_data->params.output_file); + } + + memset(&data->accumulated_stats, 0, sizeof(data->accumulated_stats)); + data->n_frames_since_update = 0; + data->last_fps_update = now; + + if (capture_begin) + instance_data->capture_started = true; + } + } else { + data->last_fps_update = now; + } + + memset(&device_data->frame_stats, 0, sizeof(device_data->frame_stats)); + memset(&data->frame_stats, 0, sizeof(device_data->frame_stats)); data->last_present_time = now; data->n_frames++; + data->n_frames_since_update++; } -static float get_frame_timing(void *_data, int _idx) -{ - struct swapchain_data *data = (struct swapchain_data *) _data; - if ((ARRAY_SIZE(data->frame_times) - _idx) > (data->n_frames - 2)) - return 0.0f; - int idx = ARRAY_SIZE(data->frame_times) + - (data->n_frames - 2) < ARRAY_SIZE(data->frame_times) ? - _idx - (data->n_frames - 2) : - _idx + (data->n_frames - 2); - idx %= ARRAY_SIZE(data->frame_times); - return data->frame_times[idx]; -} - -static float get_acquire_timing(void *_data, int _idx) +static float get_time_stat(void *_data, int _idx) { struct swapchain_data *data = (struct swapchain_data *) _data; - if ((ARRAY_SIZE(data->acquire_times) - _idx) > data->n_acquire) + if ((ARRAY_SIZE(data->frames_stats) - _idx) > data->n_frames) return 0.0f; - int idx = ARRAY_SIZE(data->acquire_times) + - data->n_acquire < ARRAY_SIZE(data->acquire_times) ? - _idx - data->n_acquire : - _idx + data->n_acquire; - idx %= ARRAY_SIZE(data->acquire_times); - return data->acquire_times[idx]; + int idx = ARRAY_SIZE(data->frames_stats) + + data->n_frames < ARRAY_SIZE(data->frames_stats) ? + _idx - data->n_frames : + _idx + data->n_frames; + idx %= ARRAY_SIZE(data->frames_stats); + /* Time stats are in us. */ + return data->frames_stats[idx].stats[data->stat_selector] / data->time_dividor; } static float get_stat(void *_data, int _idx) { struct swapchain_data *data = (struct swapchain_data *) _data; - if ((ARRAY_SIZE(data->stats) - _idx) > data->n_frames) + if ((ARRAY_SIZE(data->frames_stats) - _idx) > data->n_frames) return 0.0f; - int idx = ARRAY_SIZE(data->stats) + - data->n_frames < ARRAY_SIZE(data->stats) ? + int idx = ARRAY_SIZE(data->frames_stats) + + data->n_frames < ARRAY_SIZE(data->frames_stats) ? _idx - data->n_frames : _idx + data->n_frames; - idx %= ARRAY_SIZE(data->stats); - return data->stats[idx].stats[data->stat_selector]; + idx %= ARRAY_SIZE(data->frames_stats); + return data->frames_stats[idx].stats[data->stat_selector]; } static void position_layer(struct swapchain_data *data) @@ -453,24 +895,25 @@ static void position_layer(struct swapchain_data *data) { struct device_data *device_data = data->device; struct instance_data *instance_data = device_data->instance; + const float margin = 10.0f; ImGui::SetNextWindowBgAlpha(0.5); ImGui::SetNextWindowSize(data->window_size, ImGuiCond_Always); - switch (instance_data->position) { + switch (instance_data->params.position) { case LAYER_POSITION_TOP_LEFT: - ImGui::SetNextWindowPos(ImVec2(0, 0), ImGuiCond_Always); + ImGui::SetNextWindowPos(ImVec2(margin, margin), ImGuiCond_Always); break; case LAYER_POSITION_TOP_RIGHT: - ImGui::SetNextWindowPos(ImVec2(data->width - data->window_size.x, 0), + ImGui::SetNextWindowPos(ImVec2(data->width - data->window_size.x - margin, margin), ImGuiCond_Always); break; case LAYER_POSITION_BOTTOM_LEFT: - ImGui::SetNextWindowPos(ImVec2(0, data->height - data->window_size.y), + ImGui::SetNextWindowPos(ImVec2(margin, data->height - data->window_size.y - margin), ImGuiCond_Always); break; case LAYER_POSITION_BOTTOM_RIGHT: - ImGui::SetNextWindowPos(ImVec2(data->width - data->window_size.x, - data->height - data->window_size.y), + ImGui::SetNextWindowPos(ImVec2(data->width - data->window_size.x - margin, + data->height - data->window_size.y - margin), ImGuiCond_Always); break; } @@ -490,68 +933,64 @@ static void compute_swapchain_display(struct swapchain_data *data) const char *format_name = vk_Format_to_str(data->format); format_name = format_name ? (format_name + strlen("VK_FORMAT_")) : "unknown"; ImGui::Text("Swapchain format: %s", format_name); - ImGui::Text("Frames: %lu", data->n_frames); - - { - double min_time = FLT_MAX, max_time = 0.0f; - for (uint32_t i = 0; i < MIN2(data->n_frames - 2, ARRAY_SIZE(data->frame_times)); i++) { - min_time = MIN2(min_time, data->frame_times[i]); - max_time = MAX2(max_time, data->frame_times[i]); - } - ImGui::PlotHistogram("##Frame timings", get_frame_timing, data, - ARRAY_SIZE(data->frame_times), 0, - NULL, min_time, max_time, - ImVec2(ImGui::GetContentRegionAvailWidth(), 30)); - ImGui::Text("Frame timing: %.3fms [%.3f, %.3f]", - get_frame_timing(data, ARRAY_SIZE(data->frame_times) - 1), - min_time, max_time); + ImGui::Text("Frames: %" PRIu64, data->n_frames); + if (instance_data->params.enabled[OVERLAY_PARAM_ENABLED_fps]) + ImGui::Text("FPS: %.2f" , data->fps); + + /* Recompute min/max */ + for (uint32_t s = 0; s < OVERLAY_PARAM_ENABLED_MAX; s++) { + data->stats_min.stats[s] = UINT64_MAX; + data->stats_max.stats[s] = 0; } - - if (instance_data->enabled_stats & FRAME_STAT_ENABLED(ACQUIRE_TIMING)) { - double min_time = FLT_MAX, max_time = 0.0f; - for (uint32_t i = 0; i < MIN2(data->n_acquire - 2, ARRAY_SIZE(data->acquire_times)); i++) { - min_time = MIN2(min_time, data->acquire_times[i]); - max_time = MAX2(max_time, data->acquire_times[i]); - } - ImGui::PlotHistogram("##Acquire timings", get_acquire_timing, data, - ARRAY_SIZE(data->acquire_times), 0, - NULL, min_time, max_time, - ImVec2(ImGui::GetContentRegionAvailWidth(), 30)); - ImGui::Text("Acquire timing: %.3fms [%.3f, %.3f]", - get_acquire_timing(data, ARRAY_SIZE(data->acquire_times) - 1), - min_time, max_time); - - for (uint32_t i = 0; i < ARRAY_SIZE(data->stats_min.stats); i++) { - data->stats_min.stats[i] = UINT32_MAX; - data->stats_max.stats[i] = 0; - } - for (uint32_t i = 0; i < MIN2(data->n_frames - 1, ARRAY_SIZE(data->stats)); i++) { - for (uint32_t j = 0; j < ARRAY_SIZE(data->stats[0].stats); j++) { - data->stats_min.stats[j] = MIN2(data->stats[i].stats[j], - data->stats_min.stats[j]); - data->stats_max.stats[j] = MAX2(data->stats[i].stats[j], - data->stats_max.stats[j]); - } + for (uint32_t f = 0; f < MIN2(data->n_frames, ARRAY_SIZE(data->frames_stats)); f++) { + for (uint32_t s = 0; s < OVERLAY_PARAM_ENABLED_MAX; s++) { + data->stats_min.stats[s] = MIN2(data->frames_stats[f].stats[s], + data->stats_min.stats[s]); + data->stats_max.stats[s] = MAX2(data->frames_stats[f].stats[s], + data->stats_max.stats[s]); } } + for (uint32_t s = 0; s < OVERLAY_PARAM_ENABLED_MAX; s++) { + assert(data->stats_min.stats[s] != UINT64_MAX); + } - for (uint32_t i = 0; i < ARRAY_SIZE(device_data->stats.stats); i++) { - if (!(instance_data->enabled_stats & (1ULL << i))) + for (uint32_t s = 0; s < OVERLAY_PARAM_ENABLED_MAX; s++) { + if (!instance_data->params.enabled[s] || + s == OVERLAY_PARAM_ENABLED_fps || + s == OVERLAY_PARAM_ENABLED_frame) continue; char hash[40]; - snprintf(hash, sizeof(hash), "##%s", enable_flags[i].string); - data->stat_selector = (enum frame_stat_type) i; - - ImGui::PlotHistogram(hash, get_stat, data, - ARRAY_SIZE(data->stats), 0, - NULL, - data->stats_min.stats[i], - data->stats_max.stats[i], - ImVec2(ImGui::GetContentRegionAvailWidth(), 30)); - ImGui::Text("%s: %.0f [%u, %u]", enable_flags[i].string, - get_stat(data, ARRAY_SIZE(data->stats) - 1), - data->stats_min.stats[i], data->stats_max.stats[i]); + snprintf(hash, sizeof(hash), "##%s", overlay_param_names[s]); + data->stat_selector = (enum overlay_param_enabled) s; + data->time_dividor = 1000.0f; + if (s == OVERLAY_PARAM_ENABLED_gpu_timing) + data->time_dividor = 1000000.0f; + + if (s == OVERLAY_PARAM_ENABLED_frame_timing || + s == OVERLAY_PARAM_ENABLED_acquire_timing || + s == OVERLAY_PARAM_ENABLED_present_timing || + s == OVERLAY_PARAM_ENABLED_gpu_timing) { + double min_time = data->stats_min.stats[s] / data->time_dividor; + double max_time = data->stats_max.stats[s] / data->time_dividor; + ImGui::PlotHistogram(hash, get_time_stat, data, + ARRAY_SIZE(data->frames_stats), 0, + NULL, min_time, max_time, + ImVec2(ImGui::GetContentRegionAvailWidth(), 30)); + ImGui::Text("%s: %.3fms [%.3f, %.3f]", overlay_param_names[s], + get_time_stat(data, ARRAY_SIZE(data->frames_stats) - 1), + min_time, max_time); + } else { + ImGui::PlotHistogram(hash, get_stat, data, + ARRAY_SIZE(data->frames_stats), 0, + NULL, + data->stats_min.stats[s], + data->stats_max.stats[s], + ImVec2(ImGui::GetContentRegionAvailWidth(), 30)); + ImGui::Text("%s: %.0f [%" PRIu64 ", %" PRIu64 "]", overlay_param_names[s], + get_stat(data, ARRAY_SIZE(data->frames_stats) - 1), + data->stats_min.stats[s], data->stats_max.stats[s]); + } } data->window_size = ImVec2(data->window_size.x, ImGui::GetCursorPosY() + 10.0f); ImGui::End(); @@ -580,7 +1019,6 @@ static void ensure_swapchain_fonts(struct swapchain_data *data, data->font_uploaded = true; struct device_data *device_data = data->device; - VkResult err; ImGuiIO& io = ImGui::GetIO(); unsigned char* pixels; int width, height; @@ -593,9 +1031,8 @@ static void ensure_swapchain_fonts(struct swapchain_data *data, buffer_info.size = upload_size; buffer_info.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - err = device_data->vtable.CreateBuffer(device_data->device, &buffer_info, - NULL, &data->upload_font_buffer); - check_vk_result(err); + VK_CHECK(device_data->vtable.CreateBuffer(device_data->device, &buffer_info, + NULL, &data->upload_font_buffer)); VkMemoryRequirements upload_buffer_req; device_data->vtable.GetBufferMemoryRequirements(device_data->device, data->upload_font_buffer, @@ -606,29 +1043,25 @@ static void ensure_swapchain_fonts(struct swapchain_data *data, upload_alloc_info.memoryTypeIndex = vk_memory_type(device_data, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, upload_buffer_req.memoryTypeBits); - err = device_data->vtable.AllocateMemory(device_data->device, - &upload_alloc_info, - NULL, - &data->upload_font_buffer_mem); - check_vk_result(err); - err = device_data->vtable.BindBufferMemory(device_data->device, - data->upload_font_buffer, - data->upload_font_buffer_mem, 0); - check_vk_result(err); + VK_CHECK(device_data->vtable.AllocateMemory(device_data->device, + &upload_alloc_info, + NULL, + &data->upload_font_buffer_mem)); + VK_CHECK(device_data->vtable.BindBufferMemory(device_data->device, + data->upload_font_buffer, + data->upload_font_buffer_mem, 0)); /* Upload to Buffer */ char* map = NULL; - err = device_data->vtable.MapMemory(device_data->device, - data->upload_font_buffer_mem, - 0, upload_size, 0, (void**)(&map)); - check_vk_result(err); + VK_CHECK(device_data->vtable.MapMemory(device_data->device, + data->upload_font_buffer_mem, + 0, upload_size, 0, (void**)(&map))); memcpy(map, pixels, upload_size); VkMappedMemoryRange range[1] = {}; range[0].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; range[0].memory = data->upload_font_buffer_mem; range[0].size = upload_size; - err = device_data->vtable.FlushMappedMemoryRanges(device_data->device, 1, range); - check_vk_result(err); + VK_CHECK(device_data->vtable.FlushMappedMemoryRanges(device_data->device, 1, range)); device_data->vtable.UnmapMemory(device_data->device, data->upload_font_buffer_mem); @@ -692,7 +1125,6 @@ static void CreateOrResizeBuffer(struct device_data *data, VkDeviceSize *buffer_size, size_t new_size, VkBufferUsageFlagBits usage) { - VkResult err; if (*buffer != VK_NULL_HANDLE) data->vtable.DestroyBuffer(data->device, *buffer, NULL); if (*buffer_memory) @@ -703,8 +1135,7 @@ static void CreateOrResizeBuffer(struct device_data *data, buffer_info.size = new_size; buffer_info.usage = usage; buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - err = data->vtable.CreateBuffer(data->device, &buffer_info, NULL, buffer); - check_vk_result(err); + VK_CHECK(data->vtable.CreateBuffer(data->device, &buffer_info, NULL, buffer)); VkMemoryRequirements req; data->vtable.GetBufferMemoryRequirements(data->device, *buffer, &req); @@ -713,26 +1144,26 @@ static void CreateOrResizeBuffer(struct device_data *data, alloc_info.allocationSize = req.size; alloc_info.memoryTypeIndex = vk_memory_type(data, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, req.memoryTypeBits); - err = data->vtable.AllocateMemory(data->device, &alloc_info, NULL, buffer_memory); - check_vk_result(err); + VK_CHECK(data->vtable.AllocateMemory(data->device, &alloc_info, NULL, buffer_memory)); - err = data->vtable.BindBufferMemory(data->device, *buffer, *buffer_memory, 0); - check_vk_result(err); + VK_CHECK(data->vtable.BindBufferMemory(data->device, *buffer, *buffer_memory, 0)); *buffer_size = new_size; } -static void render_swapchain_display(struct swapchain_data *data, unsigned image_index) +static struct overlay_draw *render_swapchain_display(struct swapchain_data *data, + struct queue_data *present_queue, + const VkSemaphore *wait_semaphores, + unsigned n_wait_semaphores, + unsigned image_index) { ImDrawData* draw_data = ImGui::GetDrawData(); if (draw_data->TotalVtxCount == 0) - return; + return NULL; struct device_data *device_data = data->device; - uint32_t idx = data->n_frames % ARRAY_SIZE(data->frame_data); - VkCommandBuffer command_buffer = data->frame_data[idx].command_buffer; - VkResult err; + struct overlay_draw *draw = get_overlay_draw(data); - device_data->vtable.ResetCommandBuffer(command_buffer, 0); + device_data->vtable.ResetCommandBuffer(draw->command_buffer, 0); VkRenderPassBeginInfo render_pass_info = {}; render_pass_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; @@ -744,9 +1175,9 @@ static void render_swapchain_display(struct swapchain_data *data, unsigned image VkCommandBufferBeginInfo buffer_begin_info = {}; buffer_begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; - device_data->vtable.BeginCommandBuffer(command_buffer, &buffer_begin_info); + device_data->vtable.BeginCommandBuffer(draw->command_buffer, &buffer_begin_info); - ensure_swapchain_fonts(data, command_buffer); + ensure_swapchain_fonts(data, draw->command_buffer); /* Bounce the image to display back to color attachment layout for * rendering on top of it. @@ -754,8 +1185,8 @@ static void render_swapchain_display(struct swapchain_data *data, unsigned image VkImageMemoryBarrier imb; imb.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; imb.pNext = nullptr; - imb.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT; imb.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + imb.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; imb.oldLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; imb.newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; imb.image = data->images[image_index]; @@ -764,50 +1195,44 @@ static void render_swapchain_display(struct swapchain_data *data, unsigned image imb.subresourceRange.levelCount = 1; imb.subresourceRange.baseArrayLayer = 0; imb.subresourceRange.layerCount = 1; - imb.srcQueueFamilyIndex = device_data->graphic_queue->family_index; + imb.srcQueueFamilyIndex = present_queue->family_index; imb.dstQueueFamilyIndex = device_data->graphic_queue->family_index; - device_data->vtable.CmdPipelineBarrier(command_buffer, - VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + device_data->vtable.CmdPipelineBarrier(draw->command_buffer, + VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, + VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, 0, /* dependency flags */ 0, nullptr, /* memory barriers */ 0, nullptr, /* buffer memory barriers */ 1, &imb); /* image memory barriers */ - device_data->vtable.CmdBeginRenderPass(command_buffer, &render_pass_info, + device_data->vtable.CmdBeginRenderPass(draw->command_buffer, &render_pass_info, VK_SUBPASS_CONTENTS_INLINE); /* Create/Resize vertex & index buffers */ size_t vertex_size = draw_data->TotalVtxCount * sizeof(ImDrawVert); size_t index_size = draw_data->TotalIdxCount * sizeof(ImDrawIdx); - if (data->frame_data[idx].vertex_buffer_size < vertex_size) { + if (draw->vertex_buffer_size < vertex_size) { CreateOrResizeBuffer(device_data, - &data->frame_data[idx].vertex_buffer, - &data->frame_data[idx].vertex_buffer_mem, - &data->frame_data[idx].vertex_buffer_size, + &draw->vertex_buffer, + &draw->vertex_buffer_mem, + &draw->vertex_buffer_size, vertex_size, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT); } - if (data->frame_data[idx].index_buffer_size < index_size) { + if (draw->index_buffer_size < index_size) { CreateOrResizeBuffer(device_data, - &data->frame_data[idx].index_buffer, - &data->frame_data[idx].index_buffer_mem, - &data->frame_data[idx].index_buffer_size, + &draw->index_buffer, + &draw->index_buffer_mem, + &draw->index_buffer_size, index_size, VK_BUFFER_USAGE_INDEX_BUFFER_BIT); } /* Upload vertex & index data */ - VkBuffer vertex_buffer = data->frame_data[idx].vertex_buffer; - VkDeviceMemory vertex_mem = data->frame_data[idx].vertex_buffer_mem; - VkBuffer index_buffer = data->frame_data[idx].index_buffer; - VkDeviceMemory index_mem = data->frame_data[idx].index_buffer_mem; ImDrawVert* vtx_dst = NULL; ImDrawIdx* idx_dst = NULL; - err = device_data->vtable.MapMemory(device_data->device, vertex_mem, - 0, vertex_size, 0, (void**)(&vtx_dst)); - check_vk_result(err); - err = device_data->vtable.MapMemory(device_data->device, index_mem, - 0, index_size, 0, (void**)(&idx_dst)); - check_vk_result(err); + VK_CHECK(device_data->vtable.MapMemory(device_data->device, draw->vertex_buffer_mem, + 0, vertex_size, 0, (void**)(&vtx_dst))); + VK_CHECK(device_data->vtable.MapMemory(device_data->device, draw->index_buffer_mem, + 0, index_size, 0, (void**)(&idx_dst))); for (int n = 0; n < draw_data->CmdListsCount; n++) { const ImDrawList* cmd_list = draw_data->CmdLists[n]; @@ -818,27 +1243,26 @@ static void render_swapchain_display(struct swapchain_data *data, unsigned image } VkMappedMemoryRange range[2] = {}; range[0].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; - range[0].memory = vertex_mem; + range[0].memory = draw->vertex_buffer_mem; range[0].size = VK_WHOLE_SIZE; range[1].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; - range[1].memory = index_mem; + range[1].memory = draw->index_buffer_mem; range[1].size = VK_WHOLE_SIZE; - err = device_data->vtable.FlushMappedMemoryRanges(device_data->device, 2, range); - check_vk_result(err); - device_data->vtable.UnmapMemory(device_data->device, vertex_mem); - device_data->vtable.UnmapMemory(device_data->device, index_mem); + VK_CHECK(device_data->vtable.FlushMappedMemoryRanges(device_data->device, 2, range)); + device_data->vtable.UnmapMemory(device_data->device, draw->vertex_buffer_mem); + device_data->vtable.UnmapMemory(device_data->device, draw->index_buffer_mem); /* Bind pipeline and descriptor sets */ - device_data->vtable.CmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, data->pipeline); + device_data->vtable.CmdBindPipeline(draw->command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, data->pipeline); VkDescriptorSet desc_set[1] = { data->descriptor_set }; - device_data->vtable.CmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, + device_data->vtable.CmdBindDescriptorSets(draw->command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, data->pipeline_layout, 0, 1, desc_set, 0, NULL); /* Bind vertex & index buffers */ - VkBuffer vertex_buffers[1] = { vertex_buffer }; + VkBuffer vertex_buffers[1] = { draw->vertex_buffer }; VkDeviceSize vertex_offset[1] = { 0 }; - device_data->vtable.CmdBindVertexBuffers(command_buffer, 0, 1, vertex_buffers, vertex_offset); - device_data->vtable.CmdBindIndexBuffer(command_buffer, index_buffer, 0, VK_INDEX_TYPE_UINT16); + device_data->vtable.CmdBindVertexBuffers(draw->command_buffer, 0, 1, vertex_buffers, vertex_offset); + device_data->vtable.CmdBindIndexBuffer(draw->command_buffer, draw->index_buffer, 0, VK_INDEX_TYPE_UINT16); /* Setup viewport */ VkViewport viewport; @@ -848,7 +1272,7 @@ static void render_swapchain_display(struct swapchain_data *data, unsigned image viewport.height = draw_data->DisplaySize.y; viewport.minDepth = 0.0f; viewport.maxDepth = 1.0f; - device_data->vtable.CmdSetViewport(command_buffer, 0, 1, &viewport); + device_data->vtable.CmdSetViewport(draw->command_buffer, 0, 1, &viewport); /* Setup scale and translation through push constants : @@ -863,10 +1287,10 @@ static void render_swapchain_display(struct swapchain_data *data, unsigned image float translate[2]; translate[0] = -1.0f - draw_data->DisplayPos.x * scale[0]; translate[1] = -1.0f - draw_data->DisplayPos.y * scale[1]; - device_data->vtable.CmdPushConstants(command_buffer, data->pipeline_layout, + device_data->vtable.CmdPushConstants(draw->command_buffer, data->pipeline_layout, VK_SHADER_STAGE_VERTEX_BIT, sizeof(float) * 0, sizeof(float) * 2, scale); - device_data->vtable.CmdPushConstants(command_buffer, data->pipeline_layout, + device_data->vtable.CmdPushConstants(draw->command_buffer, data->pipeline_layout, VK_SHADER_STAGE_VERTEX_BIT, sizeof(float) * 2, sizeof(float) * 2, translate); @@ -887,41 +1311,99 @@ static void render_swapchain_display(struct swapchain_data *data, unsigned image scissor.offset.y = (int32_t)(pcmd->ClipRect.y - display_pos.y) > 0 ? (int32_t)(pcmd->ClipRect.y - display_pos.y) : 0; scissor.extent.width = (uint32_t)(pcmd->ClipRect.z - pcmd->ClipRect.x); scissor.extent.height = (uint32_t)(pcmd->ClipRect.w - pcmd->ClipRect.y + 1); // FIXME: Why +1 here? - device_data->vtable.CmdSetScissor(command_buffer, 0, 1, &scissor); + device_data->vtable.CmdSetScissor(draw->command_buffer, 0, 1, &scissor); // Draw - device_data->vtable.CmdDrawIndexed(command_buffer, pcmd->ElemCount, 1, idx_offset, vtx_offset, 0); + device_data->vtable.CmdDrawIndexed(draw->command_buffer, pcmd->ElemCount, 1, idx_offset, vtx_offset, 0); idx_offset += pcmd->ElemCount; } vtx_offset += cmd_list->VtxBuffer.Size; } - device_data->vtable.CmdEndRenderPass(command_buffer); - device_data->vtable.EndCommandBuffer(command_buffer); + device_data->vtable.CmdEndRenderPass(draw->command_buffer); - if (data->submission_semaphore) { - device_data->vtable.DestroySemaphore(device_data->device, - data->submission_semaphore, - NULL); + if (device_data->graphic_queue->family_index != present_queue->family_index) + { + /* Transfer the image back to the present queue family + * image layout was already changed to present by the render pass + */ + imb.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + imb.pNext = nullptr; + imb.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + imb.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + imb.oldLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; + imb.newLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; + imb.image = data->images[image_index]; + imb.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + imb.subresourceRange.baseMipLevel = 0; + imb.subresourceRange.levelCount = 1; + imb.subresourceRange.baseArrayLayer = 0; + imb.subresourceRange.layerCount = 1; + imb.srcQueueFamilyIndex = device_data->graphic_queue->family_index; + imb.dstQueueFamilyIndex = present_queue->family_index; + device_data->vtable.CmdPipelineBarrier(draw->command_buffer, + VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, + VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, + 0, /* dependency flags */ + 0, nullptr, /* memory barriers */ + 0, nullptr, /* buffer memory barriers */ + 1, &imb); /* image memory barriers */ } - /* Submission semaphore */ - VkSemaphoreCreateInfo semaphore_info = {}; - semaphore_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; - err = device_data->vtable.CreateSemaphore(device_data->device, &semaphore_info, - NULL, &data->submission_semaphore); - check_vk_result(err); - VkSubmitInfo submit_info = {}; - submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; - submit_info.commandBufferCount = 1; - submit_info.pCommandBuffers = &command_buffer; - submit_info.signalSemaphoreCount = 1; - submit_info.pSignalSemaphores = &data->submission_semaphore; + device_data->vtable.EndCommandBuffer(draw->command_buffer); + + /* When presenting on a different queue than where we're drawing the + * overlay *AND* when the application does not provide a semaphore to + * vkQueuePresent, insert our own cross engine synchronization + * semaphore. + */ + if (n_wait_semaphores == 0 && device_data->graphic_queue->queue != present_queue->queue) { + VkPipelineStageFlags stages_wait = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; + VkSubmitInfo submit_info = {}; + submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submit_info.commandBufferCount = 0; + submit_info.pWaitDstStageMask = &stages_wait; + submit_info.waitSemaphoreCount = 0; + submit_info.signalSemaphoreCount = 1; + submit_info.pSignalSemaphores = &draw->cross_engine_semaphore; + + device_data->vtable.QueueSubmit(present_queue->queue, 1, &submit_info, VK_NULL_HANDLE); + + submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submit_info.commandBufferCount = 1; + submit_info.pWaitDstStageMask = &stages_wait; + submit_info.pCommandBuffers = &draw->command_buffer; + submit_info.waitSemaphoreCount = 1; + submit_info.pWaitSemaphores = &draw->cross_engine_semaphore; + submit_info.signalSemaphoreCount = 1; + submit_info.pSignalSemaphores = &draw->semaphore; + + device_data->vtable.QueueSubmit(device_data->graphic_queue->queue, 1, &submit_info, draw->fence); + } else { + VkPipelineStageFlags *stages_wait = (VkPipelineStageFlags*) malloc(sizeof(VkPipelineStageFlags) * n_wait_semaphores); + for (unsigned i = 0; i < n_wait_semaphores; i++) + { + // wait in the fragment stage until the swapchain image is ready + stages_wait[i] = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + } + + VkSubmitInfo submit_info = {}; + submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submit_info.commandBufferCount = 1; + submit_info.pCommandBuffers = &draw->command_buffer; + submit_info.pWaitDstStageMask = stages_wait; + submit_info.waitSemaphoreCount = n_wait_semaphores; + submit_info.pWaitSemaphores = wait_semaphores; + submit_info.signalSemaphoreCount = 1; + submit_info.pSignalSemaphores = &draw->semaphore; - device_data->vtable.WaitForFences(device_data->device, 1, &data->fence, VK_TRUE, UINT64_MAX); - device_data->vtable.ResetFences(device_data->device, 1, &data->fence); - device_data->vtable.QueueSubmit(device_data->graphic_queue->queue, 1, &submit_info, data->fence); + device_data->vtable.QueueSubmit(device_data->graphic_queue->queue, 1, &submit_info, draw->fence); + + free(stages_wait); + } + + return draw; } static const uint32_t overlay_vert_spv[] = { @@ -935,23 +1417,20 @@ static void setup_swapchain_data_pipeline(struct swapchain_data *data) { struct device_data *device_data = data->device; VkShaderModule vert_module, frag_module; - VkResult err; /* Create shader modules */ VkShaderModuleCreateInfo vert_info = {}; vert_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; vert_info.codeSize = sizeof(overlay_vert_spv); vert_info.pCode = overlay_vert_spv; - err = device_data->vtable.CreateShaderModule(device_data->device, - &vert_info, NULL, &vert_module); - check_vk_result(err); + VK_CHECK(device_data->vtable.CreateShaderModule(device_data->device, + &vert_info, NULL, &vert_module)); VkShaderModuleCreateInfo frag_info = {}; frag_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; frag_info.codeSize = sizeof(overlay_frag_spv); frag_info.pCode = (uint32_t*)overlay_frag_spv; - err = device_data->vtable.CreateShaderModule(device_data->device, - &frag_info, NULL, &frag_module); - check_vk_result(err); + VK_CHECK(device_data->vtable.CreateShaderModule(device_data->device, + &frag_info, NULL, &frag_module)); /* Font sampler */ VkSamplerCreateInfo sampler_info = {}; @@ -965,9 +1444,8 @@ static void setup_swapchain_data_pipeline(struct swapchain_data *data) sampler_info.minLod = -1000; sampler_info.maxLod = 1000; sampler_info.maxAnisotropy = 1.0f; - err = device_data->vtable.CreateSampler(device_data->device, &sampler_info, - NULL, &data->font_sampler); - check_vk_result(err); + VK_CHECK(device_data->vtable.CreateSampler(device_data->device, &sampler_info, + NULL, &data->font_sampler)); /* Descriptor pool */ VkDescriptorPoolSize sampler_pool_size = {}; @@ -978,10 +1456,9 @@ static void setup_swapchain_data_pipeline(struct swapchain_data *data) desc_pool_info.maxSets = 1; desc_pool_info.poolSizeCount = 1; desc_pool_info.pPoolSizes = &sampler_pool_size; - err = device_data->vtable.CreateDescriptorPool(device_data->device, - &desc_pool_info, - NULL, &data->descriptor_pool); - check_vk_result(err); + VK_CHECK(device_data->vtable.CreateDescriptorPool(device_data->device, + &desc_pool_info, + NULL, &data->descriptor_pool)); /* Descriptor layout */ VkSampler sampler[1] = { data->font_sampler }; @@ -994,10 +1471,9 @@ static void setup_swapchain_data_pipeline(struct swapchain_data *data) set_layout_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; set_layout_info.bindingCount = 1; set_layout_info.pBindings = binding; - err = device_data->vtable.CreateDescriptorSetLayout(device_data->device, - &set_layout_info, - NULL, &data->descriptor_layout); - check_vk_result(err); + VK_CHECK(device_data->vtable.CreateDescriptorSetLayout(device_data->device, + &set_layout_info, + NULL, &data->descriptor_layout)); /* Descriptor set */ VkDescriptorSetAllocateInfo alloc_info = {}; @@ -1005,10 +1481,9 @@ static void setup_swapchain_data_pipeline(struct swapchain_data *data) alloc_info.descriptorPool = data->descriptor_pool; alloc_info.descriptorSetCount = 1; alloc_info.pSetLayouts = &data->descriptor_layout; - err = device_data->vtable.AllocateDescriptorSets(device_data->device, - &alloc_info, - &data->descriptor_set); - check_vk_result(err); + VK_CHECK(device_data->vtable.AllocateDescriptorSets(device_data->device, + &alloc_info, + &data->descriptor_set)); /* Constants: we are using 'vec2 offset' and 'vec2 scale' instead of a full * 3d projection matrix @@ -1023,11 +1498,9 @@ static void setup_swapchain_data_pipeline(struct swapchain_data *data) layout_info.pSetLayouts = &data->descriptor_layout; layout_info.pushConstantRangeCount = 1; layout_info.pPushConstantRanges = push_constants; - err = device_data->vtable.CreatePipelineLayout(device_data->device, - &layout_info, - NULL, &data->pipeline_layout); - check_vk_result(err); - + VK_CHECK(device_data->vtable.CreatePipelineLayout(device_data->device, + &layout_info, + NULL, &data->pipeline_layout)); VkPipelineShaderStageCreateInfo stage[2] = {}; stage[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; @@ -1124,10 +1597,10 @@ static void setup_swapchain_data_pipeline(struct swapchain_data *data) info.pDynamicState = &dynamic_state; info.layout = data->pipeline_layout; info.renderPass = data->render_pass; - err = device_data->vtable.CreateGraphicsPipelines(device_data->device, VK_NULL_HANDLE, - 1, &info, - NULL, &data->pipeline); - check_vk_result(err); + VK_CHECK( + device_data->vtable.CreateGraphicsPipelines(device_data->device, VK_NULL_HANDLE, + 1, &info, + NULL, &data->pipeline)); device_data->vtable.DestroyShaderModule(device_data->device, vert_module, NULL); device_data->vtable.DestroyShaderModule(device_data->device, frag_module, NULL); @@ -1152,9 +1625,8 @@ static void setup_swapchain_data_pipeline(struct swapchain_data *data) image_info.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT; image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; image_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; - err = device_data->vtable.CreateImage(device_data->device, &image_info, - NULL, &data->font_image); - check_vk_result(err); + VK_CHECK(device_data->vtable.CreateImage(device_data->device, &image_info, + NULL, &data->font_image)); VkMemoryRequirements font_image_req; device_data->vtable.GetImageMemoryRequirements(device_data->device, data->font_image, &font_image_req); @@ -1164,13 +1636,11 @@ static void setup_swapchain_data_pipeline(struct swapchain_data *data) image_alloc_info.memoryTypeIndex = vk_memory_type(device_data, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, font_image_req.memoryTypeBits); - err = device_data->vtable.AllocateMemory(device_data->device, &image_alloc_info, - NULL, &data->font_mem); - check_vk_result(err); - err = device_data->vtable.BindImageMemory(device_data->device, - data->font_image, - data->font_mem, 0); - check_vk_result(err); + VK_CHECK(device_data->vtable.AllocateMemory(device_data->device, &image_alloc_info, + NULL, &data->font_mem)); + VK_CHECK(device_data->vtable.BindImageMemory(device_data->device, + data->font_image, + data->font_mem, 0)); /* Font image view */ VkImageViewCreateInfo view_info = {}; @@ -1181,9 +1651,8 @@ static void setup_swapchain_data_pipeline(struct swapchain_data *data) view_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; view_info.subresourceRange.levelCount = 1; view_info.subresourceRange.layerCount = 1; - err = device_data->vtable.CreateImageView(device_data->device, &view_info, - NULL, &data->font_image_view); - check_vk_result(err); + VK_CHECK(device_data->vtable.CreateImageView(device_data->device, &view_info, + NULL, &data->font_image_view)); /* Descriptor set */ VkDescriptorImageInfo desc_image[1] = {}; @@ -1213,7 +1682,6 @@ static void setup_swapchain_data(struct swapchain_data *data, ImGui::GetIO().DisplaySize = ImVec2((float)data->width, (float)data->height); struct device_data *device_data = data->device; - VkResult err; /* Render pass */ VkAttachmentDescription attachment_desc = {}; @@ -1247,25 +1715,25 @@ static void setup_swapchain_data(struct swapchain_data *data, render_pass_info.pSubpasses = &subpass; render_pass_info.dependencyCount = 1; render_pass_info.pDependencies = &dependency; - err = device_data->vtable.CreateRenderPass(device_data->device, - &render_pass_info, - NULL, &data->render_pass); - check_vk_result(err); + VK_CHECK(device_data->vtable.CreateRenderPass(device_data->device, + &render_pass_info, + NULL, &data->render_pass)); setup_swapchain_data_pipeline(data); - device_data->vtable.GetSwapchainImagesKHR(device_data->device, - data->swapchain, - &data->n_images, - NULL); + VK_CHECK(device_data->vtable.GetSwapchainImagesKHR(device_data->device, + data->swapchain, + &data->n_images, + NULL)); + data->images = ralloc_array(data, VkImage, data->n_images); data->image_views = ralloc_array(data, VkImageView, data->n_images); data->framebuffers = ralloc_array(data, VkFramebuffer, data->n_images); - device_data->vtable.GetSwapchainImagesKHR(device_data->device, - data->swapchain, - &data->n_images, - data->images); + VK_CHECK(device_data->vtable.GetSwapchainImagesKHR(device_data->device, + data->swapchain, + &data->n_images, + data->images)); /* Image views */ VkImageViewCreateInfo view_info = {}; @@ -1279,9 +1747,9 @@ static void setup_swapchain_data(struct swapchain_data *data, view_info.subresourceRange = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 }; for (uint32_t i = 0; i < data->n_images; i++) { view_info.image = data->images[i]; - err = device_data->vtable.CreateImageView(device_data->device, &view_info, - NULL, &data->image_views[i]); - check_vk_result(err); + VK_CHECK(device_data->vtable.CreateImageView(device_data->device, + &view_info, NULL, + &data->image_views[i])); } /* Framebuffers */ @@ -1296,51 +1764,34 @@ static void setup_swapchain_data(struct swapchain_data *data, fb_info.layers = 1; for (uint32_t i = 0; i < data->n_images; i++) { attachment[0] = data->image_views[i]; - err = device_data->vtable.CreateFramebuffer(device_data->device, &fb_info, - NULL, &data->framebuffers[i]); - check_vk_result(err); + VK_CHECK(device_data->vtable.CreateFramebuffer(device_data->device, &fb_info, + NULL, &data->framebuffers[i])); } - /* Command buffer */ + /* Command buffer pool */ VkCommandPoolCreateInfo cmd_buffer_pool_info = {}; cmd_buffer_pool_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; cmd_buffer_pool_info.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; cmd_buffer_pool_info.queueFamilyIndex = device_data->graphic_queue->family_index; - err = device_data->vtable.CreateCommandPool(device_data->device, - &cmd_buffer_pool_info, - NULL, &data->command_pool); - check_vk_result(err); - - VkCommandBuffer cmd_bufs[ARRAY_SIZE(data->frame_data)]; - - VkCommandBufferAllocateInfo cmd_buffer_info = {}; - cmd_buffer_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; - cmd_buffer_info.commandPool = data->command_pool; - cmd_buffer_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; - cmd_buffer_info.commandBufferCount = 2; - err = device_data->vtable.AllocateCommandBuffers(device_data->device, - &cmd_buffer_info, - cmd_bufs); - check_vk_result(err); - - for (uint32_t i = 0; i < ARRAY_SIZE(data->frame_data); i++) - data->frame_data[i].command_buffer = cmd_bufs[i]; - - - /* Submission fence */ - VkFenceCreateInfo fence_info = {}; - fence_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; - fence_info.flags = VK_FENCE_CREATE_SIGNALED_BIT; - err = device_data->vtable.CreateFence(device_data->device, &fence_info, - NULL, &data->fence); - check_vk_result(err); - + VK_CHECK(device_data->vtable.CreateCommandPool(device_data->device, + &cmd_buffer_pool_info, + NULL, &data->command_pool)); } static void shutdown_swapchain_data(struct swapchain_data *data) { struct device_data *device_data = data->device; + list_for_each_entry_safe(struct overlay_draw, draw, &data->draws, link) { + device_data->vtable.DestroySemaphore(device_data->device, draw->cross_engine_semaphore, NULL); + device_data->vtable.DestroySemaphore(device_data->device, draw->semaphore, NULL); + device_data->vtable.DestroyFence(device_data->device, draw->fence, NULL); + device_data->vtable.DestroyBuffer(device_data->device, draw->vertex_buffer, NULL); + device_data->vtable.DestroyBuffer(device_data->device, draw->index_buffer, NULL); + device_data->vtable.FreeMemory(device_data->device, draw->vertex_buffer_mem, NULL); + device_data->vtable.FreeMemory(device_data->device, draw->index_buffer_mem, NULL); + } + for (uint32_t i = 0; i < data->n_images; i++) { device_data->vtable.DestroyImageView(device_data->device, data->image_views[i], NULL); device_data->vtable.DestroyFramebuffer(device_data->device, data->framebuffers[i], NULL); @@ -1348,30 +1799,11 @@ static void shutdown_swapchain_data(struct swapchain_data *data) device_data->vtable.DestroyRenderPass(device_data->device, data->render_pass, NULL); - for (uint32_t i = 0; i < ARRAY_SIZE(data->frame_data); i++) { - device_data->vtable.FreeCommandBuffers(device_data->device, - data->command_pool, - 1, &data->frame_data[i].command_buffer); - if (data->frame_data[i].vertex_buffer) - device_data->vtable.DestroyBuffer(device_data->device, data->frame_data[i].vertex_buffer, NULL); - if (data->frame_data[i].index_buffer) - device_data->vtable.DestroyBuffer(device_data->device, data->frame_data[i].index_buffer, NULL); - if (data->frame_data[i].vertex_buffer_mem) - device_data->vtable.FreeMemory(device_data->device, data->frame_data[i].vertex_buffer_mem, NULL); - if (data->frame_data[i].index_buffer_mem) - device_data->vtable.FreeMemory(device_data->device, data->frame_data[i].index_buffer_mem, NULL); - } device_data->vtable.DestroyCommandPool(device_data->device, data->command_pool, NULL); - device_data->vtable.DestroyFence(device_data->device, data->fence, NULL); - if (data->submission_semaphore) - device_data->vtable.DestroySemaphore(device_data->device, data->submission_semaphore, NULL); - device_data->vtable.DestroyPipeline(device_data->device, data->pipeline, NULL); device_data->vtable.DestroyPipelineLayout(device_data->device, data->pipeline_layout, NULL); - device_data->vtable.FreeDescriptorSets(device_data->device, data->descriptor_pool, - 1, &data->descriptor_set); device_data->vtable.DestroyDescriptorPool(device_data->device, data->descriptor_pool, NULL); device_data->vtable.DestroyDescriptorSetLayout(device_data->device, @@ -1388,22 +1820,34 @@ static void shutdown_swapchain_data(struct swapchain_data *data) ImGui::DestroyContext(data->imgui_context); } -static void before_present(struct swapchain_data *swapchain_data, - unsigned imageIndex) +static struct overlay_draw *before_present(struct swapchain_data *swapchain_data, + struct queue_data *present_queue, + const VkSemaphore *wait_semaphores, + unsigned n_wait_semaphores, + unsigned imageIndex) { + struct instance_data *instance_data = swapchain_data->device->instance; + struct overlay_draw *draw = NULL; + snapshot_swapchain_frame(swapchain_data); - compute_swapchain_display(swapchain_data); - render_swapchain_display(swapchain_data, imageIndex); + if (!instance_data->params.no_display && swapchain_data->n_frames > 0) { + compute_swapchain_display(swapchain_data); + draw = render_swapchain_display(swapchain_data, present_queue, + wait_semaphores, n_wait_semaphores, + imageIndex); + } + + return draw; } -VKAPI_ATTR VkResult VKAPI_CALL overlay_CreateSwapchainKHR( +static VkResult overlay_CreateSwapchainKHR( VkDevice device, const VkSwapchainCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSwapchainKHR* pSwapchain) { - struct device_data *device_data = FIND_DEVICE_DATA(device); + struct device_data *device_data = FIND(struct device_data, device); VkResult result = device_data->vtable.CreateSwapchainKHR(device, pCreateInfo, pAllocator, pSwapchain); if (result != VK_SUCCESS) return result; @@ -1412,58 +1856,152 @@ VKAPI_ATTR VkResult VKAPI_CALL overlay_CreateSwapchainKHR( return result; } -VKAPI_ATTR void VKAPI_CALL overlay_DestroySwapchainKHR( +static void overlay_DestroySwapchainKHR( VkDevice device, VkSwapchainKHR swapchain, const VkAllocationCallbacks* pAllocator) { - struct swapchain_data *swapchain_data = FIND_SWAPCHAIN_DATA(swapchain); + if (swapchain == VK_NULL_HANDLE) { + struct device_data *device_data = FIND(struct device_data, device); + device_data->vtable.DestroySwapchainKHR(device, swapchain, pAllocator); + return; + } + + struct swapchain_data *swapchain_data = + FIND(struct swapchain_data, swapchain); shutdown_swapchain_data(swapchain_data); swapchain_data->device->vtable.DestroySwapchainKHR(device, swapchain, pAllocator); destroy_swapchain_data(swapchain_data); } -VKAPI_ATTR VkResult VKAPI_CALL overlay_QueuePresentKHR( +static VkResult overlay_QueuePresentKHR( VkQueue queue, const VkPresentInfoKHR* pPresentInfo) { - struct queue_data *queue_data = FIND_QUEUE_DATA(queue); + struct queue_data *queue_data = FIND(struct queue_data, queue); struct device_data *device_data = queue_data->device; - - /* If we present on the graphic queue this layer is using to draw an - * overlay, we don't need more than submitting the overlay draw prior to - * present. - */ - if (queue_data == device_data->graphic_queue) { - for (uint32_t i = 0; i < pPresentInfo->swapchainCount; i++) { - struct swapchain_data *swapchain_data = FIND_SWAPCHAIN_DATA(pPresentInfo->pSwapchains[i]); - before_present(swapchain_data, pPresentInfo->pImageIndices[i]); + struct instance_data *instance_data = device_data->instance; + uint32_t query_results[OVERLAY_QUERY_COUNT]; + + device_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_frame]++; + + if (list_length(&queue_data->running_command_buffer) > 0) { + /* Before getting the query results, make sure the operations have + * completed. + */ + VK_CHECK(device_data->vtable.ResetFences(device_data->device, + 1, &queue_data->queries_fence)); + VK_CHECK(device_data->vtable.QueueSubmit(queue, 0, NULL, queue_data->queries_fence)); + VK_CHECK(device_data->vtable.WaitForFences(device_data->device, + 1, &queue_data->queries_fence, + VK_FALSE, UINT64_MAX)); + + /* Now get the results. */ + list_for_each_entry_safe(struct command_buffer_data, cmd_buffer_data, + &queue_data->running_command_buffer, link) { + list_delinit(&cmd_buffer_data->link); + + if (cmd_buffer_data->pipeline_query_pool) { + memset(query_results, 0, sizeof(query_results)); + VK_CHECK(device_data->vtable.GetQueryPoolResults(device_data->device, + cmd_buffer_data->pipeline_query_pool, + cmd_buffer_data->query_index, 1, + sizeof(uint32_t) * OVERLAY_QUERY_COUNT, + query_results, 0, VK_QUERY_RESULT_WAIT_BIT)); + + for (uint32_t i = OVERLAY_PARAM_ENABLED_vertices; + i <= OVERLAY_PARAM_ENABLED_compute_invocations; i++) { + device_data->frame_stats.stats[i] += query_results[i - OVERLAY_PARAM_ENABLED_vertices]; + } + } + if (cmd_buffer_data->timestamp_query_pool) { + uint64_t gpu_timestamps[2] = { 0 }; + VK_CHECK(device_data->vtable.GetQueryPoolResults(device_data->device, + cmd_buffer_data->timestamp_query_pool, + cmd_buffer_data->query_index * 2, 2, + 2 * sizeof(uint64_t), gpu_timestamps, sizeof(uint64_t), + VK_QUERY_RESULT_WAIT_BIT | VK_QUERY_RESULT_64_BIT)); + + gpu_timestamps[0] &= queue_data->timestamp_mask; + gpu_timestamps[1] &= queue_data->timestamp_mask; + device_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_gpu_timing] += + (gpu_timestamps[1] - gpu_timestamps[0]) * + device_data->properties.limits.timestampPeriod; + } } - return queue_data->device->vtable.QueuePresentKHR(queue, pPresentInfo); } - /* Otherwise we need to do cross queue synchronization to tie the overlay - * draw into the present queue. + /* Otherwise we need to add our overlay drawing semaphore to the list of + * semaphores to wait on. If we don't do that the presented picture might + * be have incomplete overlay drawings. */ - VkPresentInfoKHR present_info = *pPresentInfo; - VkSemaphore *semaphores = - (VkSemaphore *)malloc(sizeof(VkSemaphore) * (pPresentInfo->waitSemaphoreCount + pPresentInfo->swapchainCount)); - for (uint32_t i = 0; i < pPresentInfo->waitSemaphoreCount; i++) - semaphores[i] = pPresentInfo->pWaitSemaphores[i]; - for (uint32_t i = 0; i < pPresentInfo->swapchainCount; i++) { - struct swapchain_data *swapchain_data = FIND_SWAPCHAIN_DATA(pPresentInfo->pSwapchains[i]); - before_present(swapchain_data, pPresentInfo->pImageIndices[i]); - semaphores[pPresentInfo->waitSemaphoreCount + i] = swapchain_data->submission_semaphore; + VkResult result = VK_SUCCESS; + if (instance_data->params.no_display) { + for (uint32_t i = 0; i < pPresentInfo->swapchainCount; i++) { + VkSwapchainKHR swapchain = pPresentInfo->pSwapchains[i]; + struct swapchain_data *swapchain_data = + FIND(struct swapchain_data, swapchain); + + uint32_t image_index = pPresentInfo->pImageIndices[i]; + + before_present(swapchain_data, + queue_data, + pPresentInfo->pWaitSemaphores, + pPresentInfo->waitSemaphoreCount, + image_index); + + VkPresentInfoKHR present_info = *pPresentInfo; + present_info.swapchainCount = 1; + present_info.pSwapchains = &swapchain; + present_info.pImageIndices = &image_index; + + uint64_t ts0 = os_time_get(); + result = queue_data->device->vtable.QueuePresentKHR(queue, &present_info); + uint64_t ts1 = os_time_get(); + swapchain_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_present_timing] += ts1 - ts0; + } + } else { + for (uint32_t i = 0; i < pPresentInfo->swapchainCount; i++) { + VkSwapchainKHR swapchain = pPresentInfo->pSwapchains[i]; + struct swapchain_data *swapchain_data = + FIND(struct swapchain_data, swapchain); + + uint32_t image_index = pPresentInfo->pImageIndices[i]; + + VkPresentInfoKHR present_info = *pPresentInfo; + present_info.swapchainCount = 1; + present_info.pSwapchains = &swapchain; + present_info.pImageIndices = &image_index; + + struct overlay_draw *draw = before_present(swapchain_data, + queue_data, + pPresentInfo->pWaitSemaphores, + pPresentInfo->waitSemaphoreCount, + image_index); + + /* Because the submission of the overlay draw waits on the semaphores + * handed for present, we don't need to have this present operation + * wait on them as well, we can just wait on the overlay submission + * semaphore. + */ + present_info.pWaitSemaphores = &draw->semaphore; + present_info.waitSemaphoreCount = 1; + + uint64_t ts0 = os_time_get(); + VkResult chain_result = queue_data->device->vtable.QueuePresentKHR(queue, &present_info); + uint64_t ts1 = os_time_get(); + swapchain_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_present_timing] += ts1 - ts0; + if (pPresentInfo->pResults) + pPresentInfo->pResults[i] = chain_result; + if (chain_result != VK_SUCCESS && result == VK_SUCCESS) + result = chain_result; + } } - present_info.pWaitSemaphores = semaphores; - present_info.waitSemaphoreCount = pPresentInfo->waitSemaphoreCount + pPresentInfo->swapchainCount; - VkResult result = queue_data->device->vtable.QueuePresentKHR(queue, &present_info); - free(semaphores); return result; } -VKAPI_ATTR VkResult VKAPI_CALL overlay_AcquireNextImageKHR( +static VkResult overlay_AcquireNextImageKHR( VkDevice device, VkSwapchainKHR swapchain, uint64_t timeout, @@ -1471,7 +2009,8 @@ VKAPI_ATTR VkResult VKAPI_CALL overlay_AcquireNextImageKHR( VkFence fence, uint32_t* pImageIndex) { - struct swapchain_data *swapchain_data = FIND_SWAPCHAIN_DATA(swapchain); + struct swapchain_data *swapchain_data = + FIND(struct swapchain_data, swapchain); struct device_data *device_data = swapchain_data->device; uint64_t ts0 = os_time_get(); @@ -1479,48 +2018,47 @@ VKAPI_ATTR VkResult VKAPI_CALL overlay_AcquireNextImageKHR( semaphore, fence, pImageIndex); uint64_t ts1 = os_time_get(); - swapchain_data->acquire_times[swapchain_data->n_acquire % - ARRAY_SIZE(swapchain_data->acquire_times)] = - ((double)ts1 - (double)ts0) / 1000.0; - swapchain_data->n_acquire++; + swapchain_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_acquire_timing] += ts1 - ts0; + swapchain_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_acquire]++; return result; } -VKAPI_ATTR VkResult VKAPI_CALL overlay_AcquireNextImage2KHR( +static VkResult overlay_AcquireNextImage2KHR( VkDevice device, const VkAcquireNextImageInfoKHR* pAcquireInfo, uint32_t* pImageIndex) { - struct swapchain_data *swapchain_data = FIND_SWAPCHAIN_DATA(pAcquireInfo->swapchain); + struct swapchain_data *swapchain_data = + FIND(struct swapchain_data, pAcquireInfo->swapchain); struct device_data *device_data = swapchain_data->device; uint64_t ts0 = os_time_get(); VkResult result = device_data->vtable.AcquireNextImage2KHR(device, pAcquireInfo, pImageIndex); uint64_t ts1 = os_time_get(); - swapchain_data->acquire_times[swapchain_data->n_acquire % - ARRAY_SIZE(swapchain_data->acquire_times)] = - ((double)ts1 - (double)ts0) / 1000.0; - swapchain_data->n_acquire++; + swapchain_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_acquire_timing] += ts1 - ts0; + swapchain_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_acquire]++; return result; } -VKAPI_ATTR void VKAPI_CALL overlay_CmdDraw( +static void overlay_CmdDraw( VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t instanceCount, uint32_t firstVertex, uint32_t firstInstance) { - struct device_data *device_data = FIND_DEVICE_DATA(commandBuffer); + struct command_buffer_data *cmd_buffer_data = + FIND(struct command_buffer_data, commandBuffer); + cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_draw]++; + struct device_data *device_data = cmd_buffer_data->device; device_data->vtable.CmdDraw(commandBuffer, vertexCount, instanceCount, firstVertex, firstInstance); - device_data->stats.stats[FRAME_STAT_DRAW]++; } -VKAPI_ATTR void VKAPI_CALL overlay_CmdDrawIndexed( +static void overlay_CmdDrawIndexed( VkCommandBuffer commandBuffer, uint32_t indexCount, uint32_t instanceCount, @@ -1528,37 +2066,43 @@ VKAPI_ATTR void VKAPI_CALL overlay_CmdDrawIndexed( int32_t vertexOffset, uint32_t firstInstance) { - struct device_data *device_data = FIND_DEVICE_DATA(commandBuffer); + struct command_buffer_data *cmd_buffer_data = + FIND(struct command_buffer_data, commandBuffer); + cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_draw_indexed]++; + struct device_data *device_data = cmd_buffer_data->device; device_data->vtable.CmdDrawIndexed(commandBuffer, indexCount, instanceCount, firstIndex, vertexOffset, firstInstance); - device_data->stats.stats[FRAME_STAT_DRAW_INDEXED]++; } -VKAPI_ATTR void VKAPI_CALL overlay_CmdDrawIndirect( +static void overlay_CmdDrawIndirect( VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t drawCount, uint32_t stride) { - struct device_data *device_data = FIND_DEVICE_DATA(commandBuffer); + struct command_buffer_data *cmd_buffer_data = + FIND(struct command_buffer_data, commandBuffer); + cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_draw_indirect]++; + struct device_data *device_data = cmd_buffer_data->device; device_data->vtable.CmdDrawIndirect(commandBuffer, buffer, offset, drawCount, stride); - device_data->stats.stats[FRAME_STAT_DRAW_INDIRECT]++; } -VKAPI_ATTR void VKAPI_CALL overlay_CmdDrawIndexedIndirect( +static void overlay_CmdDrawIndexedIndirect( VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t drawCount, uint32_t stride) { - struct device_data *device_data = FIND_DEVICE_DATA(commandBuffer); + struct command_buffer_data *cmd_buffer_data = + FIND(struct command_buffer_data, commandBuffer); + cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_draw_indexed_indirect]++; + struct device_data *device_data = cmd_buffer_data->device; device_data->vtable.CmdDrawIndexedIndirect(commandBuffer, buffer, offset, drawCount, stride); - device_data->stats.stats[FRAME_STAT_DRAW_INDEXED_INDIRECT]++; } -VKAPI_ATTR void VKAPI_CALL overlay_CmdDrawIndirectCountKHR( +static void overlay_CmdDrawIndirectCount( VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, @@ -1567,14 +2111,16 @@ VKAPI_ATTR void VKAPI_CALL overlay_CmdDrawIndirectCountKHR( uint32_t maxDrawCount, uint32_t stride) { - struct device_data *device_data = FIND_DEVICE_DATA(commandBuffer); - device_data->vtable.CmdDrawIndirectCountKHR(commandBuffer, buffer, offset, - countBuffer, countBufferOffset, - maxDrawCount, stride); - device_data->stats.stats[FRAME_STAT_DRAW_INDIRECT_COUNT]++; + struct command_buffer_data *cmd_buffer_data = + FIND(struct command_buffer_data, commandBuffer); + cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_draw_indirect_count]++; + struct device_data *device_data = cmd_buffer_data->device; + device_data->vtable.CmdDrawIndirectCount(commandBuffer, buffer, offset, + countBuffer, countBufferOffset, + maxDrawCount, stride); } -VKAPI_ATTR void VKAPI_CALL overlay_CmdDrawIndexedIndirectCountKHR( +static void overlay_CmdDrawIndexedIndirectCount( VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, @@ -1583,101 +2129,330 @@ VKAPI_ATTR void VKAPI_CALL overlay_CmdDrawIndexedIndirectCountKHR( uint32_t maxDrawCount, uint32_t stride) { - struct device_data *device_data = FIND_DEVICE_DATA(commandBuffer); - device_data->vtable.CmdDrawIndexedIndirectCountKHR(commandBuffer, buffer, offset, - countBuffer, countBufferOffset, - maxDrawCount, stride); - device_data->stats.stats[FRAME_STAT_DRAW_INDEXED_INDIRECT_COUNT]++; + struct command_buffer_data *cmd_buffer_data = + FIND(struct command_buffer_data, commandBuffer); + cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_draw_indexed_indirect_count]++; + struct device_data *device_data = cmd_buffer_data->device; + device_data->vtable.CmdDrawIndexedIndirectCount(commandBuffer, buffer, offset, + countBuffer, countBufferOffset, + maxDrawCount, stride); } -VKAPI_ATTR void VKAPI_CALL overlay_CmdDispatch( +static void overlay_CmdDispatch( VkCommandBuffer commandBuffer, uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ) { - struct device_data *device_data = FIND_DEVICE_DATA(commandBuffer); + struct command_buffer_data *cmd_buffer_data = + FIND(struct command_buffer_data, commandBuffer); + cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_dispatch]++; + struct device_data *device_data = cmd_buffer_data->device; device_data->vtable.CmdDispatch(commandBuffer, groupCountX, groupCountY, groupCountZ); - device_data->stats.stats[FRAME_STAT_DISPATCH]++; } -VKAPI_ATTR void VKAPI_CALL overlay_CmdDispatchIndirect( +static void overlay_CmdDispatchIndirect( VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset) { - struct device_data *device_data = FIND_DEVICE_DATA(commandBuffer); + struct command_buffer_data *cmd_buffer_data = + FIND(struct command_buffer_data, commandBuffer); + cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_dispatch_indirect]++; + struct device_data *device_data = cmd_buffer_data->device; device_data->vtable.CmdDispatchIndirect(commandBuffer, buffer, offset); - device_data->stats.stats[FRAME_STAT_DISPATCH_INDIRECT]++; } -VKAPI_ATTR void VKAPI_CALL overlay_CmdBindPipeline( +static void overlay_CmdBindPipeline( VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipeline pipeline) { - struct device_data *device_data = FIND_DEVICE_DATA(commandBuffer); - device_data->vtable.CmdBindPipeline(commandBuffer, pipelineBindPoint, pipeline); + struct command_buffer_data *cmd_buffer_data = + FIND(struct command_buffer_data, commandBuffer); switch (pipelineBindPoint) { - case VK_PIPELINE_BIND_POINT_GRAPHICS: device_data->stats.stats[FRAME_STAT_PIPELINE_GRAPHICS]++; break; - case VK_PIPELINE_BIND_POINT_COMPUTE: device_data->stats.stats[FRAME_STAT_PIPELINE_COMPUTE]++; break; - case VK_PIPELINE_BIND_POINT_RAY_TRACING_NV: device_data->stats.stats[FRAME_STAT_PIPELINE_RAYTRACING]++; break; + case VK_PIPELINE_BIND_POINT_GRAPHICS: cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_pipeline_graphics]++; break; + case VK_PIPELINE_BIND_POINT_COMPUTE: cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_pipeline_compute]++; break; + case VK_PIPELINE_BIND_POINT_RAY_TRACING_NV: cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_pipeline_raytracing]++; break; default: break; } + struct device_data *device_data = cmd_buffer_data->device; + device_data->vtable.CmdBindPipeline(commandBuffer, pipelineBindPoint, pipeline); +} + +static VkResult overlay_BeginCommandBuffer( + VkCommandBuffer commandBuffer, + const VkCommandBufferBeginInfo* pBeginInfo) +{ + struct command_buffer_data *cmd_buffer_data = + FIND(struct command_buffer_data, commandBuffer); + struct device_data *device_data = cmd_buffer_data->device; + + memset(&cmd_buffer_data->stats, 0, sizeof(cmd_buffer_data->stats)); + + /* We don't record any query in secondary command buffers, just make sure + * we have the right inheritance. + */ + if (cmd_buffer_data->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) { + VkCommandBufferBeginInfo *begin_info = (VkCommandBufferBeginInfo *) + clone_chain((const struct VkBaseInStructure *)pBeginInfo); + VkCommandBufferInheritanceInfo *parent_inhe_info = (VkCommandBufferInheritanceInfo *) + vk_find_struct(begin_info, COMMAND_BUFFER_INHERITANCE_INFO); + VkCommandBufferInheritanceInfo inhe_info = { + VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO, + NULL, + VK_NULL_HANDLE, + 0, + VK_NULL_HANDLE, + VK_FALSE, + 0, + overlay_query_flags, + }; + + if (parent_inhe_info) + parent_inhe_info->pipelineStatistics = overlay_query_flags; + else { + inhe_info.pNext = begin_info->pNext; + begin_info->pNext = &inhe_info; + } + + VkResult result = device_data->vtable.BeginCommandBuffer(commandBuffer, pBeginInfo); + + if (!parent_inhe_info) + begin_info->pNext = inhe_info.pNext; + + free_chain((struct VkBaseOutStructure *)begin_info); + + return result; + } + + /* Otherwise record a begin query as first command. */ + VkResult result = device_data->vtable.BeginCommandBuffer(commandBuffer, pBeginInfo); + + if (result == VK_SUCCESS) { + if (cmd_buffer_data->pipeline_query_pool) { + device_data->vtable.CmdResetQueryPool(commandBuffer, + cmd_buffer_data->pipeline_query_pool, + cmd_buffer_data->query_index, 1); + } + if (cmd_buffer_data->timestamp_query_pool) { + device_data->vtable.CmdResetQueryPool(commandBuffer, + cmd_buffer_data->timestamp_query_pool, + cmd_buffer_data->query_index * 2, 2); + } + if (cmd_buffer_data->pipeline_query_pool) { + device_data->vtable.CmdBeginQuery(commandBuffer, + cmd_buffer_data->pipeline_query_pool, + cmd_buffer_data->query_index, 0); + } + if (cmd_buffer_data->timestamp_query_pool) { + device_data->vtable.CmdWriteTimestamp(commandBuffer, + VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, + cmd_buffer_data->timestamp_query_pool, + cmd_buffer_data->query_index * 2); + } + } + + return result; +} + +static VkResult overlay_EndCommandBuffer( + VkCommandBuffer commandBuffer) +{ + struct command_buffer_data *cmd_buffer_data = + FIND(struct command_buffer_data, commandBuffer); + struct device_data *device_data = cmd_buffer_data->device; + + if (cmd_buffer_data->timestamp_query_pool) { + device_data->vtable.CmdWriteTimestamp(commandBuffer, + VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, + cmd_buffer_data->timestamp_query_pool, + cmd_buffer_data->query_index * 2 + 1); + } + if (cmd_buffer_data->pipeline_query_pool) { + device_data->vtable.CmdEndQuery(commandBuffer, + cmd_buffer_data->pipeline_query_pool, + cmd_buffer_data->query_index); + } + + return device_data->vtable.EndCommandBuffer(commandBuffer); } -VKAPI_ATTR VkResult VKAPI_CALL overlay_AllocateCommandBuffers(VkDevice device, - const VkCommandBufferAllocateInfo* pAllocateInfo, - VkCommandBuffer* pCommandBuffers) +static VkResult overlay_ResetCommandBuffer( + VkCommandBuffer commandBuffer, + VkCommandBufferResetFlags flags) { - struct device_data *device_data = FIND_DEVICE_DATA(device); + struct command_buffer_data *cmd_buffer_data = + FIND(struct command_buffer_data, commandBuffer); + struct device_data *device_data = cmd_buffer_data->device; + + memset(&cmd_buffer_data->stats, 0, sizeof(cmd_buffer_data->stats)); + return device_data->vtable.ResetCommandBuffer(commandBuffer, flags); +} + +static void overlay_CmdExecuteCommands( + VkCommandBuffer commandBuffer, + uint32_t commandBufferCount, + const VkCommandBuffer* pCommandBuffers) +{ + struct command_buffer_data *cmd_buffer_data = + FIND(struct command_buffer_data, commandBuffer); + struct device_data *device_data = cmd_buffer_data->device; + + /* Add the stats of the executed command buffers to the primary one. */ + for (uint32_t c = 0; c < commandBufferCount; c++) { + struct command_buffer_data *sec_cmd_buffer_data = + FIND(struct command_buffer_data, pCommandBuffers[c]); + + for (uint32_t s = 0; s < OVERLAY_PARAM_ENABLED_MAX; s++) + cmd_buffer_data->stats.stats[s] += sec_cmd_buffer_data->stats.stats[s]; + } + + device_data->vtable.CmdExecuteCommands(commandBuffer, commandBufferCount, pCommandBuffers); +} + +static VkResult overlay_AllocateCommandBuffers( + VkDevice device, + const VkCommandBufferAllocateInfo* pAllocateInfo, + VkCommandBuffer* pCommandBuffers) +{ + struct device_data *device_data = FIND(struct device_data, device); VkResult result = device_data->vtable.AllocateCommandBuffers(device, pAllocateInfo, pCommandBuffers); - if (result != VK_SUCCESS) return result; + if (result != VK_SUCCESS) + return result; + + VkQueryPool pipeline_query_pool = VK_NULL_HANDLE; + VkQueryPool timestamp_query_pool = VK_NULL_HANDLE; + if (device_data->instance->pipeline_statistics_enabled && + pAllocateInfo->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) { + VkQueryPoolCreateInfo pool_info = { + VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, + NULL, + 0, + VK_QUERY_TYPE_PIPELINE_STATISTICS, + pAllocateInfo->commandBufferCount, + overlay_query_flags, + }; + VK_CHECK(device_data->vtable.CreateQueryPool(device_data->device, &pool_info, + NULL, &pipeline_query_pool)); + } + if (device_data->instance->params.enabled[OVERLAY_PARAM_ENABLED_gpu_timing]) { + VkQueryPoolCreateInfo pool_info = { + VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, + NULL, + 0, + VK_QUERY_TYPE_TIMESTAMP, + pAllocateInfo->commandBufferCount * 2, + 0, + }; + VK_CHECK(device_data->vtable.CreateQueryPool(device_data->device, &pool_info, + NULL, ×tamp_query_pool)); + } + + for (uint32_t i = 0; i < pAllocateInfo->commandBufferCount; i++) { + new_command_buffer_data(pCommandBuffers[i], pAllocateInfo->level, + pipeline_query_pool, timestamp_query_pool, + i, device_data); + } - for (uint32_t i = 0; i < pAllocateInfo->commandBufferCount; i++) - map_object(pCommandBuffers[i], device_data); + if (pipeline_query_pool) + map_object(HKEY(pipeline_query_pool), (void *)(uintptr_t) pAllocateInfo->commandBufferCount); + if (timestamp_query_pool) + map_object(HKEY(timestamp_query_pool), (void *)(uintptr_t) pAllocateInfo->commandBufferCount); return result; } -VKAPI_ATTR void VKAPI_CALL overlay_FreeCommandBuffers(VkDevice device, - VkCommandPool commandPool, - uint32_t commandBufferCount, - const VkCommandBuffer* pCommandBuffers) +static void overlay_FreeCommandBuffers( + VkDevice device, + VkCommandPool commandPool, + uint32_t commandBufferCount, + const VkCommandBuffer* pCommandBuffers) { - struct device_data *device_data = FIND_DEVICE_DATA(device); + struct device_data *device_data = FIND(struct device_data, device); + for (uint32_t i = 0; i < commandBufferCount; i++) { + struct command_buffer_data *cmd_buffer_data = + FIND(struct command_buffer_data, pCommandBuffers[i]); - for (uint32_t i = 0; i < commandBufferCount; i++) - unmap_object(pCommandBuffers[i]); + /* It is legal to free a NULL command buffer*/ + if (!cmd_buffer_data) + continue; + + uint64_t count = (uintptr_t)find_object_data(HKEY(cmd_buffer_data->pipeline_query_pool)); + if (count == 1) { + unmap_object(HKEY(cmd_buffer_data->pipeline_query_pool)); + device_data->vtable.DestroyQueryPool(device_data->device, + cmd_buffer_data->pipeline_query_pool, NULL); + } else if (count != 0) { + map_object(HKEY(cmd_buffer_data->pipeline_query_pool), (void *)(uintptr_t)(count - 1)); + } + count = (uintptr_t)find_object_data(HKEY(cmd_buffer_data->timestamp_query_pool)); + if (count == 1) { + unmap_object(HKEY(cmd_buffer_data->timestamp_query_pool)); + device_data->vtable.DestroyQueryPool(device_data->device, + cmd_buffer_data->timestamp_query_pool, NULL); + } else if (count != 0) { + map_object(HKEY(cmd_buffer_data->timestamp_query_pool), (void *)(uintptr_t)(count - 1)); + } + destroy_command_buffer_data(cmd_buffer_data); + } device_data->vtable.FreeCommandBuffers(device, commandPool, commandBufferCount, pCommandBuffers); } -VKAPI_ATTR VkResult VKAPI_CALL overlay_QueueSubmit( +static VkResult overlay_QueueSubmit( VkQueue queue, uint32_t submitCount, const VkSubmitInfo* pSubmits, VkFence fence) { - struct queue_data *queue_data = FIND_QUEUE_DATA(queue); + struct queue_data *queue_data = FIND(struct queue_data, queue); struct device_data *device_data = queue_data->device; - device_data->stats.stats[FRAME_STAT_SUBMIT]++; + device_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_submit]++; + + for (uint32_t s = 0; s < submitCount; s++) { + for (uint32_t c = 0; c < pSubmits[s].commandBufferCount; c++) { + struct command_buffer_data *cmd_buffer_data = + FIND(struct command_buffer_data, pSubmits[s].pCommandBuffers[c]); + + /* Merge the submitted command buffer stats into the device. */ + for (uint32_t st = 0; st < OVERLAY_PARAM_ENABLED_MAX; st++) + device_data->frame_stats.stats[st] += cmd_buffer_data->stats.stats[st]; + + /* Attach the command buffer to the queue so we remember to read its + * pipeline statistics & timestamps at QueuePresent(). + */ + if (!cmd_buffer_data->pipeline_query_pool && + !cmd_buffer_data->timestamp_query_pool) + continue; + + if (list_is_empty(&cmd_buffer_data->link)) { + list_addtail(&cmd_buffer_data->link, + &queue_data->running_command_buffer); + } else { + fprintf(stderr, "Command buffer submitted multiple times before present.\n" + "This could lead to invalid data.\n"); + } + } + } return device_data->vtable.QueueSubmit(queue, submitCount, pSubmits, fence); } -VKAPI_ATTR VkResult VKAPI_CALL overlay_CreateDevice( +static VkResult overlay_CreateDevice( VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkDevice* pDevice) { - struct instance_data *instance_data = FIND_PHYSICAL_DEVICE_DATA(physicalDevice); - VkLayerDeviceCreateInfo *chain_info = get_chain_info(pCreateInfo, VK_LAYER_LINK_INFO); + struct instance_data *instance_data = + FIND(struct instance_data, physicalDevice); + VkLayerDeviceCreateInfo *chain_info = + get_device_chain_info(pCreateInfo, VK_LAYER_LINK_INFO); assert(chain_info->u.pLayerInfo); PFN_vkGetInstanceProcAddr fpGetInstanceProcAddr = chain_info->u.pLayerInfo->pfnNextGetInstanceProcAddr; @@ -1690,37 +2465,54 @@ VKAPI_ATTR VkResult VKAPI_CALL overlay_CreateDevice( // Advance the link info for the next element on the chain chain_info->u.pLayerInfo = chain_info->u.pLayerInfo->pNext; - VkResult result = fpCreateDevice(physicalDevice, pCreateInfo, pAllocator, pDevice); + VkPhysicalDeviceFeatures device_features = {}; + VkDeviceCreateInfo device_info = *pCreateInfo; + + if (pCreateInfo->pEnabledFeatures) + device_features = *(pCreateInfo->pEnabledFeatures); + if (instance_data->pipeline_statistics_enabled) { + device_features.inheritedQueries = true; + device_features.pipelineStatisticsQuery = true; + } + device_info.pEnabledFeatures = &device_features; + + + VkResult result = fpCreateDevice(physicalDevice, &device_info, pAllocator, pDevice); if (result != VK_SUCCESS) return result; struct device_data *device_data = new_device_data(*pDevice, instance_data); device_data->physical_device = physicalDevice; - layer_init_device_dispatch_table(*pDevice, &device_data->vtable, fpGetDeviceProcAddr); + vk_load_device_commands(*pDevice, fpGetDeviceProcAddr, &device_data->vtable); instance_data->vtable.GetPhysicalDeviceProperties(device_data->physical_device, &device_data->properties); + VkLayerDeviceCreateInfo *load_data_info = + get_device_chain_info(pCreateInfo, VK_LOADER_DATA_CALLBACK); + device_data->set_device_loader_data = load_data_info->u.pfnSetDeviceLoaderData; + device_map_queues(device_data, pCreateInfo); return result; } -VKAPI_ATTR void VKAPI_CALL overlay_DestroyDevice( +static void overlay_DestroyDevice( VkDevice device, const VkAllocationCallbacks* pAllocator) { - struct device_data *device_data = FIND_DEVICE_DATA(device); + struct device_data *device_data = FIND(struct device_data, device); device_unmap_queues(device_data); device_data->vtable.DestroyDevice(device, pAllocator); destroy_device_data(device_data); } -VKAPI_ATTR VkResult VKAPI_CALL overlay_CreateInstance( +static VkResult overlay_CreateInstance( const VkInstanceCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkInstance* pInstance) { - VkLayerInstanceCreateInfo *chain_info = get_chain_info(pCreateInfo, VK_LAYER_LINK_INFO); + VkLayerInstanceCreateInfo *chain_info = + get_instance_chain_info(pCreateInfo, VK_LAYER_LINK_INFO); assert(chain_info->u.pLayerInfo); PFN_vkGetInstanceProcAddr fpGetInstanceProcAddr = @@ -1738,37 +2530,36 @@ VKAPI_ATTR VkResult VKAPI_CALL overlay_CreateInstance( if (result != VK_SUCCESS) return result; struct instance_data *instance_data = new_instance_data(*pInstance); - layer_init_instance_dispatch_table(instance_data->instance, - &instance_data->vtable, - fpGetInstanceProcAddr); + vk_load_instance_commands(instance_data->instance, + fpGetInstanceProcAddr, + &instance_data->vtable); instance_data_map_physical_devices(instance_data, true); - const char *stats_config = getenv("VK_LAYER_MESA_OVERLAY_STATS"); - instance_data->enabled_stats = parse_debug_string(stats_config, - enable_flags); - - if (instance_data->enabled_stats & FRAME_STAT_ENABLED(HELP)) { - fprintf(stderr, "Available stats:\n"); - for (uint32_t i = 0; enable_flags[i].string != NULL; i++) - fprintf(stderr, "\t%s\n", enable_flags[i].string); - fprintf(stderr, "Position layer using VK_LAYER_MESA_OVERLAY_POSITION=\n" - "\ttop-left\n" - "\ttop-right\n" - "\tbottom-left\n" - "\tbottom-right\n"); - } + parse_overlay_env(&instance_data->params, getenv("VK_LAYER_MESA_OVERLAY_CONFIG")); - instance_data->position = - parse_layer_position(getenv("VK_LAYER_MESA_OVERLAY_POSITION")); + /* If there's no control file, and an output_file was specified, start + * capturing fps data right away. + */ + instance_data->capture_enabled = + instance_data->params.output_file && instance_data->params.control < 0; + instance_data->capture_started = instance_data->capture_enabled; + + for (int i = OVERLAY_PARAM_ENABLED_vertices; + i <= OVERLAY_PARAM_ENABLED_compute_invocations; i++) { + if (instance_data->params.enabled[i]) { + instance_data->pipeline_statistics_enabled = true; + break; + } + } return result; } -VKAPI_ATTR void VKAPI_CALL overlay_DestroyInstance( +static void overlay_DestroyInstance( VkInstance instance, const VkAllocationCallbacks* pAllocator) { - struct instance_data *instance_data = FIND_INSTANCE_DATA(instance); + struct instance_data *instance_data = FIND(struct instance_data, instance); instance_data_map_physical_devices(instance_data, false); instance_data->vtable.DestroyInstance(instance, pAllocator); destroy_instance_data(instance_data); @@ -1780,15 +2571,24 @@ static const struct { } name_to_funcptr_map[] = { { "vkGetDeviceProcAddr", (void *) vkGetDeviceProcAddr }, #define ADD_HOOK(fn) { "vk" # fn, (void *) overlay_ ## fn } +#define ADD_ALIAS_HOOK(alias, fn) { "vk" # alias, (void *) overlay_ ## fn } ADD_HOOK(AllocateCommandBuffers), + ADD_HOOK(FreeCommandBuffers), + ADD_HOOK(ResetCommandBuffer), + ADD_HOOK(BeginCommandBuffer), + ADD_HOOK(EndCommandBuffer), + ADD_HOOK(CmdExecuteCommands), ADD_HOOK(CmdDraw), ADD_HOOK(CmdDrawIndexed), + ADD_HOOK(CmdDrawIndirect), ADD_HOOK(CmdDrawIndexedIndirect), ADD_HOOK(CmdDispatch), ADD_HOOK(CmdDispatchIndirect), - ADD_HOOK(CmdDrawIndirectCountKHR), - ADD_HOOK(CmdDrawIndexedIndirectCountKHR), + ADD_HOOK(CmdDrawIndirectCount), + ADD_ALIAS_HOOK(CmdDrawIndirectCountKHR, CmdDrawIndirectCount), + ADD_HOOK(CmdDrawIndexedIndirectCount), + ADD_ALIAS_HOOK(CmdDrawIndexedIndirectCountKHR, CmdDrawIndexedIndirectCount), ADD_HOOK(CmdBindPipeline), @@ -1799,10 +2599,12 @@ static const struct { ADD_HOOK(AcquireNextImage2KHR), ADD_HOOK(QueueSubmit), - ADD_HOOK(CreateInstance), - ADD_HOOK(DestroyInstance), + ADD_HOOK(CreateDevice), ADD_HOOK(DestroyDevice), + + ADD_HOOK(CreateInstance), + ADD_HOOK(DestroyInstance), #undef ADD_HOOK }; @@ -1824,7 +2626,7 @@ VK_LAYER_EXPORT VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vkGetDeviceProcAddr(VkD if (dev == NULL) return NULL; - struct device_data *device_data = FIND_DEVICE_DATA(dev); + struct device_data *device_data = FIND(struct device_data, dev); if (device_data->vtable.GetDeviceProcAddr == NULL) return NULL; return device_data->vtable.GetDeviceProcAddr(dev, funcName); } @@ -1837,7 +2639,7 @@ VK_LAYER_EXPORT VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vkGetInstanceProcAddr(V if (instance == NULL) return NULL; - struct instance_data *instance_data = FIND_INSTANCE_DATA(instance); + struct instance_data *instance_data = FIND(struct instance_data, instance); if (instance_data->vtable.GetInstanceProcAddr == NULL) return NULL; return instance_data->vtable.GetInstanceProcAddr(instance, funcName); }