#include <stdlib.h>
#include <assert.h>
-#include <vk_loader_platform.h>
#include <vulkan/vulkan.h>
-#include <vk_dispatch_table_helper.h>
#include <vulkan/vk_layer.h>
-#include "vk_layer_data.h"
-#include "vk_layer_table.h"
-#include "vk_layer_extension_utils.h"
#include "imgui.h"
+#include "overlay_params.h"
+
#include "util/debug.h"
#include "util/hash_table.h"
+#include "util/list.h"
#include "util/ralloc.h"
#include "util/os_time.h"
#include "util/simple_mtx.h"
#include "vk_enum_to_str.h"
-
-enum layer_position {
- LAYER_POSITION_TOP_LEFT,
- LAYER_POSITION_TOP_RIGHT,
- LAYER_POSITION_BOTTOM_LEFT,
- LAYER_POSITION_BOTTOM_RIGHT,
-};
-
-static enum layer_position
-parse_layer_position(const char *str)
-{
- if (!str || !strcmp(str, "top-left"))
- return LAYER_POSITION_TOP_LEFT;
- if (!strcmp(str, "top-right"))
- return LAYER_POSITION_TOP_RIGHT;
- if (!strcmp(str, "bottom-left"))
- return LAYER_POSITION_BOTTOM_LEFT;
- if (!strcmp(str, "bottom-right"))
- return LAYER_POSITION_BOTTOM_RIGHT;
- return LAYER_POSITION_TOP_LEFT;
-}
+#include "vk_util.h"
/* Mapped from VkInstace/VkPhysicalDevice */
struct instance_data {
- VkLayerInstanceDispatchTable vtable;
+ struct vk_instance_dispatch_table vtable;
VkInstance instance;
- enum layer_position position;
- uint64_t enabled_stats;
-};
+ struct overlay_params params;
+ bool pipeline_statistics_enabled;
-enum frame_stat_type {
- FRAME_STAT_SUBMIT,
- FRAME_STAT_DRAW,
- FRAME_STAT_DRAW_INDEXED,
- FRAME_STAT_DRAW_INDIRECT,
- FRAME_STAT_DRAW_INDEXED_INDIRECT,
- FRAME_STAT_DRAW_INDIRECT_COUNT,
- FRAME_STAT_DRAW_INDEXED_INDIRECT_COUNT,
- FRAME_STAT_DISPATCH,
- FRAME_STAT_DISPATCH_INDIRECT,
- FRAME_STAT_PIPELINE_GRAPHICS,
- FRAME_STAT_PIPELINE_COMPUTE,
- FRAME_STAT_PIPELINE_RAYTRACING,
-
- FRAME_STAT_COUNT,
-
- FRAME_STAT_ACQUIRE_TIMING = FRAME_STAT_COUNT,
- FRAME_STAT_HELP,
-};
-
-#define FRAME_STAT_ENABLED(id) (1ULL << (FRAME_STAT_ ## id))
-
-static struct debug_control enable_flags[] = {
- { "submit", FRAME_STAT_ENABLED(SUBMIT) },
- { "draw", FRAME_STAT_ENABLED(DRAW) },
- { "draw-indexed", FRAME_STAT_ENABLED(DRAW_INDEXED) },
- { "draw-indirect", FRAME_STAT_ENABLED(DRAW_INDIRECT) },
- { "draw-indexed-indirect", FRAME_STAT_ENABLED(DRAW_INDEXED_INDIRECT) },
- { "draw-indirect-count", FRAME_STAT_ENABLED(DRAW_INDIRECT_COUNT) },
- { "draw-indexed-indirect-count", FRAME_STAT_ENABLED(DRAW_INDEXED_INDIRECT_COUNT) },
- { "dispatch", FRAME_STAT_ENABLED(DISPATCH) },
- { "dispatch-indirect", FRAME_STAT_ENABLED(DISPATCH_INDIRECT) },
- { "pipeline-graphics", FRAME_STAT_ENABLED(PIPELINE_GRAPHICS) },
- { "pipeline-compute", FRAME_STAT_ENABLED(PIPELINE_COMPUTE) },
- { "pipeline-raytracing", FRAME_STAT_ENABLED(PIPELINE_RAYTRACING) },
- { "acquire-timing", FRAME_STAT_ENABLED(ACQUIRE_TIMING) },
- { "help", FRAME_STAT_ENABLED(HELP) },
- { NULL, 0 },
+ bool first_line_printed;
};
struct frame_stat {
- uint32_t stats[FRAME_STAT_COUNT];
+ uint64_t stats[OVERLAY_PARAM_ENABLED_MAX];
};
-/* Mapped from VkDevice/VkCommandBuffer */
+/* Mapped from VkDevice */
struct queue_data;
struct device_data {
struct instance_data *instance;
- VkLayerDispatchTable vtable;
+ PFN_vkSetDeviceLoaderData set_device_loader_data;
+
+ struct vk_device_dispatch_table vtable;
VkPhysicalDevice physical_device;
VkDevice device;
struct queue_data **queues;
uint32_t n_queues;
+ /* For a single frame */
+ struct frame_stat frame_stats;
+};
+
+/* Mapped from VkCommandBuffer */
+struct command_buffer_data {
+ struct device_data *device;
+
+ VkCommandBufferLevel level;
+
+ VkCommandBuffer cmd_buffer;
+ VkQueryPool pipeline_query_pool;
+ VkQueryPool timestamp_query_pool;
+ uint32_t query_index;
+
struct frame_stat stats;
+
+ struct list_head link; /* link into queue_data::running_command_buffer */
};
/* Mapped from VkQueue */
VkQueue queue;
VkQueueFlags flags;
uint32_t family_index;
+ uint64_t timestamp_mask;
+
+ VkFence queries_fence;
+
+ struct list_head running_command_buffer;
+};
+
+struct overlay_draw {
+ struct list_head link;
+
+ VkCommandBuffer command_buffer;
+
+ VkSemaphore semaphore;
+ VkFence fence;
+
+ VkBuffer vertex_buffer;
+ VkDeviceMemory vertex_buffer_mem;
+ VkDeviceSize vertex_buffer_size;
+
+ VkBuffer index_buffer;
+ VkDeviceMemory index_buffer_mem;
+ VkDeviceSize index_buffer_size;
};
/* Mapped from VkSwapchainKHR */
VkCommandPool command_pool;
- struct {
- VkCommandBuffer command_buffer;
-
- VkBuffer vertex_buffer;
- VkDeviceMemory vertex_buffer_mem;
- VkDeviceSize vertex_buffer_size;
-
- VkBuffer index_buffer;
- VkDeviceMemory index_buffer_mem;
- VkDeviceSize index_buffer_size;
- } frame_data[2];
+ struct list_head draws; /* List of struct overlay_draw */
bool font_uploaded;
VkImage font_image;
VkBuffer upload_font_buffer;
VkDeviceMemory upload_font_buffer_mem;
- VkFence fence;
- VkSemaphore submission_semaphore;
-
/**/
ImGuiContext* imgui_context;
ImVec2 window_size;
uint64_t n_frames;
uint64_t last_present_time;
- double frame_times[200];
+ unsigned n_frames_since_update;
+ uint64_t last_fps_update;
+ double fps;
- double acquire_times[200];
- uint64_t n_acquire;
-
- enum frame_stat_type stat_selector;
+ enum overlay_param_enabled stat_selector;
+ double time_dividor;
struct frame_stat stats_min, stats_max;
- struct frame_stat stats[200];
+ struct frame_stat frames_stats[200];
+
+ /* Over a single frame */
+ struct frame_stat frame_stats;
+
+ /* Over fps_sampling_period */
+ struct frame_stat accumulated_stats;
};
-static struct hash_table *vk_object_to_data = NULL;
+static const VkQueryPipelineStatisticFlags overlay_query_flags =
+ VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_VERTICES_BIT |
+ VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT |
+ VK_QUERY_PIPELINE_STATISTIC_VERTEX_SHADER_INVOCATIONS_BIT |
+ VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_INVOCATIONS_BIT |
+ VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT |
+ VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT |
+ VK_QUERY_PIPELINE_STATISTIC_CLIPPING_PRIMITIVES_BIT |
+ VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT |
+ VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_CONTROL_SHADER_PATCHES_BIT |
+ VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_EVALUATION_SHADER_INVOCATIONS_BIT |
+ VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT;
+#define OVERLAY_QUERY_COUNT (11)
+
+static struct hash_table_u64 *vk_object_to_data = NULL;
static simple_mtx_t vk_object_to_data_mutex = _SIMPLE_MTX_INITIALIZER_NP;
thread_local ImGuiContext* __MesaImGui;
static inline void ensure_vk_object_map(void)
{
- if (!vk_object_to_data) {
- vk_object_to_data = _mesa_hash_table_create(NULL,
- _mesa_hash_pointer,
- _mesa_key_pointer_equal);
- }
+ if (!vk_object_to_data)
+ vk_object_to_data = _mesa_hash_table_u64_create(NULL);
}
-#define FIND_SWAPCHAIN_DATA(obj) ((struct swapchain_data *)find_object_data((void *) obj))
-#define FIND_DEVICE_DATA(obj) ((struct device_data *)find_object_data((void *) obj))
-#define FIND_QUEUE_DATA(obj) ((struct queue_data *)find_object_data((void *) obj))
-#define FIND_PHYSICAL_DEVICE_DATA(obj) ((struct instance_data *)find_object_data((void *) obj))
-#define FIND_INSTANCE_DATA(obj) ((struct instance_data *)find_object_data((void *) obj))
-static void *find_object_data(void *obj)
+#define HKEY(obj) ((uint64_t)(obj))
+#define FIND(type, obj) ((type *)find_object_data(HKEY(obj)))
+
+static void *find_object_data(uint64_t obj)
{
simple_mtx_lock(&vk_object_to_data_mutex);
ensure_vk_object_map();
- struct hash_entry *entry = _mesa_hash_table_search(vk_object_to_data, obj);
- void *data = entry ? entry->data : NULL;
+ void *data = _mesa_hash_table_u64_search(vk_object_to_data, obj);
simple_mtx_unlock(&vk_object_to_data_mutex);
return data;
}
-static void map_object(void *obj, void *data)
+static void map_object(uint64_t obj, void *data)
{
simple_mtx_lock(&vk_object_to_data_mutex);
ensure_vk_object_map();
- _mesa_hash_table_insert(vk_object_to_data, obj, data);
+ _mesa_hash_table_u64_insert(vk_object_to_data, obj, data);
simple_mtx_unlock(&vk_object_to_data_mutex);
}
-static void unmap_object(void *obj)
+static void unmap_object(uint64_t obj)
{
simple_mtx_lock(&vk_object_to_data_mutex);
- struct hash_entry *entry = _mesa_hash_table_search(vk_object_to_data, obj);
- _mesa_hash_table_remove(vk_object_to_data, entry);
+ _mesa_hash_table_u64_remove(vk_object_to_data, obj);
simple_mtx_unlock(&vk_object_to_data_mutex);
}
/**/
+
+#define VK_CHECK(expr) \
+ do { \
+ VkResult __result = (expr); \
+ if (__result != VK_SUCCESS) { \
+ fprintf(stderr, "'%s' line %i failed with %s\n", \
+ #expr, __LINE__, vk_Result_to_str(__result)); \
+ } \
+ } while (0)
+
+/**/
+
+static VkLayerInstanceCreateInfo *get_instance_chain_info(const VkInstanceCreateInfo *pCreateInfo,
+ VkLayerFunction func)
+{
+ vk_foreach_struct(item, pCreateInfo->pNext) {
+ if (item->sType == VK_STRUCTURE_TYPE_LOADER_INSTANCE_CREATE_INFO &&
+ ((VkLayerInstanceCreateInfo *) item)->function == func)
+ return (VkLayerInstanceCreateInfo *) item;
+ }
+ unreachable("instance chain info not found");
+ return NULL;
+}
+
+static VkLayerDeviceCreateInfo *get_device_chain_info(const VkDeviceCreateInfo *pCreateInfo,
+ VkLayerFunction func)
+{
+ vk_foreach_struct(item, pCreateInfo->pNext) {
+ if (item->sType == VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO &&
+ ((VkLayerDeviceCreateInfo *) item)->function == func)
+ return (VkLayerDeviceCreateInfo *)item;
+ }
+ unreachable("device chain info not found");
+ return NULL;
+}
+
+static struct VkBaseOutStructure *
+clone_chain(const struct VkBaseInStructure *chain)
+{
+ struct VkBaseOutStructure *head = NULL, *tail = NULL;
+
+ vk_foreach_struct_const(item, chain) {
+ size_t item_size = vk_structure_type_size(item);
+ struct VkBaseOutStructure *new_item =
+ (struct VkBaseOutStructure *)malloc(item_size);;
+
+ memcpy(new_item, item, item_size);
+
+ if (!head)
+ head = new_item;
+ if (tail)
+ tail->pNext = new_item;
+ tail = new_item;
+ }
+
+ return head;
+}
+
+static void
+free_chain(struct VkBaseOutStructure *chain)
+{
+ while (chain) {
+ void *node = chain;
+ chain = chain->pNext;
+ free(node);
+ }
+}
+
+/**/
+
static struct instance_data *new_instance_data(VkInstance instance)
{
struct instance_data *data = rzalloc(NULL, struct instance_data);
data->instance = instance;
- map_object(data->instance, data);
+ map_object(HKEY(data->instance), data);
return data;
}
static void destroy_instance_data(struct instance_data *data)
{
- unmap_object(data->instance);
+ if (data->params.output_file)
+ fclose(data->params.output_file);
+ unmap_object(HKEY(data->instance));
ralloc_free(data);
}
for (uint32_t i = 0; i < physicalDeviceCount; i++) {
if (map)
- map_object(physicalDevices[i], instance_data);
+ map_object(HKEY(physicalDevices[i]), instance_data);
else
- unmap_object(physicalDevices[i]);
+ unmap_object(HKEY(physicalDevices[i]));
}
free(physicalDevices);
struct device_data *data = rzalloc(NULL, struct device_data);
data->instance = instance;
data->device = device;
- map_object(data->device, data);
+ map_object(HKEY(data->device), data);
return data;
}
data->device = device_data;
data->queue = queue;
data->flags = family_props->queueFlags;
+ data->timestamp_mask = (1ull << family_props->timestampValidBits) - 1;
data->family_index = family_index;
- map_object(data->queue, data);
+ LIST_INITHEAD(&data->running_command_buffer);
+ map_object(HKEY(data->queue), data);
+
+ /* Fence synchronizing access to queries on that queue. */
+ VkFenceCreateInfo fence_info = {};
+ fence_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
+ fence_info.flags = VK_FENCE_CREATE_SIGNALED_BIT;
+ VK_CHECK(device_data->vtable.CreateFence(device_data->device,
+ &fence_info,
+ NULL,
+ &data->queries_fence));
if (data->flags & VK_QUEUE_GRAPHICS_BIT)
device_data->graphic_queue = data;
return data;
}
+static void destroy_queue(struct queue_data *data)
+{
+ struct device_data *device_data = data->device;
+ device_data->vtable.DestroyFence(device_data->device, data->queries_fence, NULL);
+ unmap_object(HKEY(data->queue));
+ ralloc_free(data);
+}
+
static void device_map_queues(struct device_data *data,
const VkDeviceCreateInfo *pCreateInfo)
{
data->vtable.GetDeviceQueue(data->device,
pCreateInfo->pQueueCreateInfos[i].queueFamilyIndex,
j, &queue);
+
+ VK_CHECK(data->set_device_loader_data(data->device, queue));
+
data->queues[queue_index++] =
new_queue_data(queue, &family_props[pCreateInfo->pQueueCreateInfos[i].queueFamilyIndex],
pCreateInfo->pQueueCreateInfos[i].queueFamilyIndex, data);
static void device_unmap_queues(struct device_data *data)
{
for (uint32_t i = 0; i < data->n_queues; i++)
- unmap_object(data->queues[i]->queue);
+ destroy_queue(data->queues[i]);
}
static void destroy_device_data(struct device_data *data)
{
- unmap_object(data->device);
+ unmap_object(HKEY(data->device));
ralloc_free(data);
}
-static void check_vk_result(VkResult err)
+/**/
+static struct command_buffer_data *new_command_buffer_data(VkCommandBuffer cmd_buffer,
+ VkCommandBufferLevel level,
+ VkQueryPool pipeline_query_pool,
+ VkQueryPool timestamp_query_pool,
+ uint32_t query_index,
+ struct device_data *device_data)
+{
+ struct command_buffer_data *data = rzalloc(NULL, struct command_buffer_data);
+ data->device = device_data;
+ data->cmd_buffer = cmd_buffer;
+ data->level = level;
+ data->pipeline_query_pool = pipeline_query_pool;
+ data->timestamp_query_pool = timestamp_query_pool;
+ data->query_index = query_index;
+ list_inithead(&data->link);
+ map_object(HKEY(data->cmd_buffer), data);
+ return data;
+}
+
+static void destroy_command_buffer_data(struct command_buffer_data *data)
{
- if (err != VK_SUCCESS)
- printf("ERROR!\n");
+ unmap_object(HKEY(data->cmd_buffer));
+ list_delinit(&data->link);
+ ralloc_free(data);
}
/**/
static struct swapchain_data *new_swapchain_data(VkSwapchainKHR swapchain,
struct device_data *device_data)
{
+ struct instance_data *instance_data = device_data->instance;
struct swapchain_data *data = rzalloc(NULL, struct swapchain_data);
data->device = device_data;
data->swapchain = swapchain;
- data->window_size = ImVec2(300, 300);
- map_object((void *) data->swapchain, data);
+ data->window_size = ImVec2(instance_data->params.width, instance_data->params.height);
+ list_inithead(&data->draws);
+ map_object(HKEY(data->swapchain), data);
return data;
}
static void destroy_swapchain_data(struct swapchain_data *data)
{
- unmap_object((void *) data->swapchain);
+ unmap_object(HKEY(data->swapchain));
ralloc_free(data);
}
+struct overlay_draw *get_overlay_draw(struct swapchain_data *data)
+{
+ struct device_data *device_data = data->device;
+ struct overlay_draw *draw = list_empty(&data->draws) ?
+ NULL : list_first_entry(&data->draws, struct overlay_draw, link);
+
+ VkSemaphoreCreateInfo sem_info = {};
+ sem_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
+
+ if (draw && device_data->vtable.GetFenceStatus(device_data->device, draw->fence) == VK_SUCCESS) {
+ list_del(&draw->link);
+ VK_CHECK(device_data->vtable.ResetFences(device_data->device,
+ 1, &draw->fence));
+ list_addtail(&draw->link, &data->draws);
+ return draw;
+ }
+
+ draw = rzalloc(data, struct overlay_draw);
+
+ VkCommandBufferAllocateInfo cmd_buffer_info = {};
+ cmd_buffer_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
+ cmd_buffer_info.commandPool = data->command_pool;
+ cmd_buffer_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
+ cmd_buffer_info.commandBufferCount = 1;
+ VK_CHECK(device_data->vtable.AllocateCommandBuffers(device_data->device,
+ &cmd_buffer_info,
+ &draw->command_buffer));
+ VK_CHECK(device_data->set_device_loader_data(device_data->device,
+ draw->command_buffer));
+
+
+ VkFenceCreateInfo fence_info = {};
+ fence_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
+ VK_CHECK(device_data->vtable.CreateFence(device_data->device,
+ &fence_info,
+ NULL,
+ &draw->fence));
+
+ VK_CHECK(device_data->vtable.CreateSemaphore(device_data->device, &sem_info,
+ NULL, &draw->semaphore));
+
+ list_addtail(&draw->link, &data->draws);
+
+ return draw;
+}
+
+static const char *param_unit(enum overlay_param_enabled param)
+{
+ switch (param) {
+ case OVERLAY_PARAM_ENABLED_frame_timing:
+ case OVERLAY_PARAM_ENABLED_acquire_timing:
+ case OVERLAY_PARAM_ENABLED_present_timing:
+ return "(us)";
+ case OVERLAY_PARAM_ENABLED_gpu_timing:
+ return "(ns)";
+ default:
+ return "";
+ }
+}
+
static void snapshot_swapchain_frame(struct swapchain_data *data)
{
- uint64_t now = os_time_get();
+ struct device_data *device_data = data->device;
+ struct instance_data *instance_data = device_data->instance;
+ uint32_t f_idx = data->n_frames % ARRAY_SIZE(data->frames_stats);
+ uint64_t now = os_time_get(); /* us */
if (data->last_present_time) {
- data->frame_times[(data->n_frames - 1) % ARRAY_SIZE(data->frame_times)] =
- ((double)now - (double)data->last_present_time) / 1000.0;
+ data->frame_stats.stats[OVERLAY_PARAM_ENABLED_frame_timing] =
+ now - data->last_present_time;
}
- struct device_data *device_data = data->device;
- data->stats[data->n_frames % ARRAY_SIZE(data->frame_times)] = device_data->stats;
- memset(&device_data->stats, 0, sizeof(device_data->stats));
+ memset(&data->frames_stats[f_idx], 0, sizeof(data->frames_stats[f_idx]));
+ for (int s = 0; s < OVERLAY_PARAM_ENABLED_MAX; s++) {
+ data->frames_stats[f_idx].stats[s] += device_data->frame_stats.stats[s] + data->frame_stats.stats[s];
+ data->accumulated_stats.stats[s] += device_data->frame_stats.stats[s] + data->frame_stats.stats[s];
+ }
+
+ if (data->last_fps_update) {
+ double elapsed = (double)(now - data->last_fps_update); /* us */
+ if (elapsed >= instance_data->params.fps_sampling_period) {
+ data->fps = 1000000.0f * data->n_frames_since_update / elapsed;
+ if (instance_data->params.output_file) {
+ if (!instance_data->first_line_printed) {
+ bool first_column = true;
+
+ instance_data->first_line_printed = true;
+
+#define OVERLAY_PARAM_BOOL(name) \
+ if (instance_data->params.enabled[OVERLAY_PARAM_ENABLED_##name]) { \
+ fprintf(instance_data->params.output_file, \
+ "%s%s%s", first_column ? "" : ", ", #name, \
+ param_unit(OVERLAY_PARAM_ENABLED_##name)); \
+ first_column = false; \
+ }
+#define OVERLAY_PARAM_CUSTOM(name)
+ OVERLAY_PARAMS
+#undef OVERLAY_PARAM_BOOL
+#undef OVERLAY_PARAM_CUSTOM
+ fprintf(instance_data->params.output_file, "\n");
+ }
+
+ for (int s = 0; s < OVERLAY_PARAM_ENABLED_MAX; s++) {
+ if (!instance_data->params.enabled[s])
+ continue;
+ if (s == OVERLAY_PARAM_ENABLED_fps) {
+ fprintf(instance_data->params.output_file,
+ "%s%.2f", s == 0 ? "" : ", ", data->fps);
+ } else {
+ fprintf(instance_data->params.output_file,
+ "%s%" PRIu64, s == 0 ? "" : ", ",
+ data->accumulated_stats.stats[s]);
+ }
+ }
+ fprintf(instance_data->params.output_file, "\n");
+ fflush(instance_data->params.output_file);
+ }
+
+ memset(&data->accumulated_stats, 0, sizeof(data->accumulated_stats));
+ data->n_frames_since_update = 0;
+ data->last_fps_update = now;
+ }
+ } else {
+ data->last_fps_update = now;
+ }
+
+ memset(&device_data->frame_stats, 0, sizeof(device_data->frame_stats));
+ memset(&data->frame_stats, 0, sizeof(device_data->frame_stats));
data->last_present_time = now;
data->n_frames++;
+ data->n_frames_since_update++;
}
-static float get_frame_timing(void *_data, int _idx)
-{
- struct swapchain_data *data = (struct swapchain_data *) _data;
- if ((ARRAY_SIZE(data->frame_times) - _idx) > (data->n_frames - 2))
- return 0.0f;
- int idx = ARRAY_SIZE(data->frame_times) +
- (data->n_frames - 2) < ARRAY_SIZE(data->frame_times) ?
- _idx - (data->n_frames - 2) :
- _idx + (data->n_frames - 2);
- idx %= ARRAY_SIZE(data->frame_times);
- return data->frame_times[idx];
-}
-
-static float get_acquire_timing(void *_data, int _idx)
+static float get_time_stat(void *_data, int _idx)
{
struct swapchain_data *data = (struct swapchain_data *) _data;
- if ((ARRAY_SIZE(data->acquire_times) - _idx) > data->n_acquire)
+ if ((ARRAY_SIZE(data->frames_stats) - _idx) > data->n_frames)
return 0.0f;
- int idx = ARRAY_SIZE(data->acquire_times) +
- data->n_acquire < ARRAY_SIZE(data->acquire_times) ?
- _idx - data->n_acquire :
- _idx + data->n_acquire;
- idx %= ARRAY_SIZE(data->acquire_times);
- return data->acquire_times[idx];
+ int idx = ARRAY_SIZE(data->frames_stats) +
+ data->n_frames < ARRAY_SIZE(data->frames_stats) ?
+ _idx - data->n_frames :
+ _idx + data->n_frames;
+ idx %= ARRAY_SIZE(data->frames_stats);
+ /* Time stats are in us. */
+ return data->frames_stats[idx].stats[data->stat_selector] / data->time_dividor;
}
static float get_stat(void *_data, int _idx)
{
struct swapchain_data *data = (struct swapchain_data *) _data;
- if ((ARRAY_SIZE(data->stats) - _idx) > data->n_frames)
+ if ((ARRAY_SIZE(data->frames_stats) - _idx) > data->n_frames)
return 0.0f;
- int idx = ARRAY_SIZE(data->stats) +
- data->n_frames < ARRAY_SIZE(data->stats) ?
+ int idx = ARRAY_SIZE(data->frames_stats) +
+ data->n_frames < ARRAY_SIZE(data->frames_stats) ?
_idx - data->n_frames :
_idx + data->n_frames;
- idx %= ARRAY_SIZE(data->stats);
- return data->stats[idx].stats[data->stat_selector];
+ idx %= ARRAY_SIZE(data->frames_stats);
+ return data->frames_stats[idx].stats[data->stat_selector];
}
static void position_layer(struct swapchain_data *data)
{
struct device_data *device_data = data->device;
struct instance_data *instance_data = device_data->instance;
+ const float margin = 10.0f;
ImGui::SetNextWindowBgAlpha(0.5);
ImGui::SetNextWindowSize(data->window_size, ImGuiCond_Always);
- switch (instance_data->position) {
+ switch (instance_data->params.position) {
case LAYER_POSITION_TOP_LEFT:
- ImGui::SetNextWindowPos(ImVec2(0, 0), ImGuiCond_Always);
+ ImGui::SetNextWindowPos(ImVec2(margin, margin), ImGuiCond_Always);
break;
case LAYER_POSITION_TOP_RIGHT:
- ImGui::SetNextWindowPos(ImVec2(data->width - data->window_size.x, 0),
+ ImGui::SetNextWindowPos(ImVec2(data->width - data->window_size.x - margin, margin),
ImGuiCond_Always);
break;
case LAYER_POSITION_BOTTOM_LEFT:
- ImGui::SetNextWindowPos(ImVec2(0, data->height - data->window_size.y),
+ ImGui::SetNextWindowPos(ImVec2(margin, data->height - data->window_size.y - margin),
ImGuiCond_Always);
break;
case LAYER_POSITION_BOTTOM_RIGHT:
- ImGui::SetNextWindowPos(ImVec2(data->width - data->window_size.x,
- data->height - data->window_size.y),
+ ImGui::SetNextWindowPos(ImVec2(data->width - data->window_size.x - margin,
+ data->height - data->window_size.y - margin),
ImGuiCond_Always);
break;
}
format_name = format_name ? (format_name + strlen("VK_FORMAT_")) : "unknown";
ImGui::Text("Swapchain format: %s", format_name);
ImGui::Text("Frames: %" PRIu64, data->n_frames);
+ if (instance_data->params.enabled[OVERLAY_PARAM_ENABLED_fps])
+ ImGui::Text("FPS: %.2f" , data->fps);
- {
- double min_time = FLT_MAX, max_time = 0.0f;
- for (uint32_t i = 0; i < MIN2(data->n_frames - 2, ARRAY_SIZE(data->frame_times)); i++) {
- min_time = MIN2(min_time, data->frame_times[i]);
- max_time = MAX2(max_time, data->frame_times[i]);
- }
- ImGui::PlotHistogram("##Frame timings", get_frame_timing, data,
- ARRAY_SIZE(data->frame_times), 0,
- NULL, min_time, max_time,
- ImVec2(ImGui::GetContentRegionAvailWidth(), 30));
- ImGui::Text("Frame timing: %.3fms [%.3f, %.3f]",
- get_frame_timing(data, ARRAY_SIZE(data->frame_times) - 1),
- min_time, max_time);
+ /* Recompute min/max */
+ for (uint32_t s = 0; s < OVERLAY_PARAM_ENABLED_MAX; s++) {
+ data->stats_min.stats[s] = UINT64_MAX;
+ data->stats_max.stats[s] = 0;
}
-
- if (instance_data->enabled_stats & FRAME_STAT_ENABLED(ACQUIRE_TIMING)) {
- double min_time = FLT_MAX, max_time = 0.0f;
- for (uint32_t i = 0; i < MIN2(data->n_acquire - 2, ARRAY_SIZE(data->acquire_times)); i++) {
- min_time = MIN2(min_time, data->acquire_times[i]);
- max_time = MAX2(max_time, data->acquire_times[i]);
- }
- ImGui::PlotHistogram("##Acquire timings", get_acquire_timing, data,
- ARRAY_SIZE(data->acquire_times), 0,
- NULL, min_time, max_time,
- ImVec2(ImGui::GetContentRegionAvailWidth(), 30));
- ImGui::Text("Acquire timing: %.3fms [%.3f, %.3f]",
- get_acquire_timing(data, ARRAY_SIZE(data->acquire_times) - 1),
- min_time, max_time);
-
- for (uint32_t i = 0; i < ARRAY_SIZE(data->stats_min.stats); i++) {
- data->stats_min.stats[i] = UINT32_MAX;
- data->stats_max.stats[i] = 0;
- }
- for (uint32_t i = 0; i < MIN2(data->n_frames - 1, ARRAY_SIZE(data->stats)); i++) {
- for (uint32_t j = 0; j < ARRAY_SIZE(data->stats[0].stats); j++) {
- data->stats_min.stats[j] = MIN2(data->stats[i].stats[j],
- data->stats_min.stats[j]);
- data->stats_max.stats[j] = MAX2(data->stats[i].stats[j],
- data->stats_max.stats[j]);
- }
+ for (uint32_t f = 0; f < MIN2(data->n_frames, ARRAY_SIZE(data->frames_stats)); f++) {
+ for (uint32_t s = 0; s < OVERLAY_PARAM_ENABLED_MAX; s++) {
+ data->stats_min.stats[s] = MIN2(data->frames_stats[f].stats[s],
+ data->stats_min.stats[s]);
+ data->stats_max.stats[s] = MAX2(data->frames_stats[f].stats[s],
+ data->stats_max.stats[s]);
}
}
+ for (uint32_t s = 0; s < OVERLAY_PARAM_ENABLED_MAX; s++) {
+ assert(data->stats_min.stats[s] != UINT64_MAX);
+ }
- for (uint32_t i = 0; i < ARRAY_SIZE(device_data->stats.stats); i++) {
- if (!(instance_data->enabled_stats & (1ULL << i)))
+ for (uint32_t s = 0; s < OVERLAY_PARAM_ENABLED_MAX; s++) {
+ if (!instance_data->params.enabled[s] ||
+ s == OVERLAY_PARAM_ENABLED_fps ||
+ s == OVERLAY_PARAM_ENABLED_frame)
continue;
char hash[40];
- snprintf(hash, sizeof(hash), "##%s", enable_flags[i].string);
- data->stat_selector = (enum frame_stat_type) i;
-
- ImGui::PlotHistogram(hash, get_stat, data,
- ARRAY_SIZE(data->stats), 0,
- NULL,
- data->stats_min.stats[i],
- data->stats_max.stats[i],
- ImVec2(ImGui::GetContentRegionAvailWidth(), 30));
- ImGui::Text("%s: %.0f [%u, %u]", enable_flags[i].string,
- get_stat(data, ARRAY_SIZE(data->stats) - 1),
- data->stats_min.stats[i], data->stats_max.stats[i]);
+ snprintf(hash, sizeof(hash), "##%s", overlay_param_names[s]);
+ data->stat_selector = (enum overlay_param_enabled) s;
+ data->time_dividor = 1000.0f;
+ if (s == OVERLAY_PARAM_ENABLED_gpu_timing)
+ data->time_dividor = 1000000.0f;
+
+ if (s == OVERLAY_PARAM_ENABLED_frame_timing ||
+ s == OVERLAY_PARAM_ENABLED_acquire_timing ||
+ s == OVERLAY_PARAM_ENABLED_present_timing ||
+ s == OVERLAY_PARAM_ENABLED_gpu_timing) {
+ double min_time = data->stats_min.stats[s] / data->time_dividor;
+ double max_time = data->stats_max.stats[s] / data->time_dividor;
+ ImGui::PlotHistogram(hash, get_time_stat, data,
+ ARRAY_SIZE(data->frames_stats), 0,
+ NULL, min_time, max_time,
+ ImVec2(ImGui::GetContentRegionAvailWidth(), 30));
+ ImGui::Text("%s: %.3fms [%.3f, %.3f]", overlay_param_names[s],
+ get_time_stat(data, ARRAY_SIZE(data->frames_stats) - 1),
+ min_time, max_time);
+ } else {
+ ImGui::PlotHistogram(hash, get_stat, data,
+ ARRAY_SIZE(data->frames_stats), 0,
+ NULL,
+ data->stats_min.stats[s],
+ data->stats_max.stats[s],
+ ImVec2(ImGui::GetContentRegionAvailWidth(), 30));
+ ImGui::Text("%s: %.0f [%" PRIu64 ", %" PRIu64 "]", overlay_param_names[s],
+ get_stat(data, ARRAY_SIZE(data->frames_stats) - 1),
+ data->stats_min.stats[s], data->stats_max.stats[s]);
+ }
}
data->window_size = ImVec2(data->window_size.x, ImGui::GetCursorPosY() + 10.0f);
ImGui::End();
data->font_uploaded = true;
struct device_data *device_data = data->device;
- VkResult err;
ImGuiIO& io = ImGui::GetIO();
unsigned char* pixels;
int width, height;
buffer_info.size = upload_size;
buffer_info.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
- err = device_data->vtable.CreateBuffer(device_data->device, &buffer_info,
- NULL, &data->upload_font_buffer);
- check_vk_result(err);
+ VK_CHECK(device_data->vtable.CreateBuffer(device_data->device, &buffer_info,
+ NULL, &data->upload_font_buffer));
VkMemoryRequirements upload_buffer_req;
device_data->vtable.GetBufferMemoryRequirements(device_data->device,
data->upload_font_buffer,
upload_alloc_info.memoryTypeIndex = vk_memory_type(device_data,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT,
upload_buffer_req.memoryTypeBits);
- err = device_data->vtable.AllocateMemory(device_data->device,
- &upload_alloc_info,
- NULL,
- &data->upload_font_buffer_mem);
- check_vk_result(err);
- err = device_data->vtable.BindBufferMemory(device_data->device,
- data->upload_font_buffer,
- data->upload_font_buffer_mem, 0);
- check_vk_result(err);
+ VK_CHECK(device_data->vtable.AllocateMemory(device_data->device,
+ &upload_alloc_info,
+ NULL,
+ &data->upload_font_buffer_mem));
+ VK_CHECK(device_data->vtable.BindBufferMemory(device_data->device,
+ data->upload_font_buffer,
+ data->upload_font_buffer_mem, 0));
/* Upload to Buffer */
char* map = NULL;
- err = device_data->vtable.MapMemory(device_data->device,
- data->upload_font_buffer_mem,
- 0, upload_size, 0, (void**)(&map));
- check_vk_result(err);
+ VK_CHECK(device_data->vtable.MapMemory(device_data->device,
+ data->upload_font_buffer_mem,
+ 0, upload_size, 0, (void**)(&map)));
memcpy(map, pixels, upload_size);
VkMappedMemoryRange range[1] = {};
range[0].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
range[0].memory = data->upload_font_buffer_mem;
range[0].size = upload_size;
- err = device_data->vtable.FlushMappedMemoryRanges(device_data->device, 1, range);
- check_vk_result(err);
+ VK_CHECK(device_data->vtable.FlushMappedMemoryRanges(device_data->device, 1, range));
device_data->vtable.UnmapMemory(device_data->device,
data->upload_font_buffer_mem);
VkDeviceSize *buffer_size,
size_t new_size, VkBufferUsageFlagBits usage)
{
- VkResult err;
if (*buffer != VK_NULL_HANDLE)
data->vtable.DestroyBuffer(data->device, *buffer, NULL);
if (*buffer_memory)
buffer_info.size = new_size;
buffer_info.usage = usage;
buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
- err = data->vtable.CreateBuffer(data->device, &buffer_info, NULL, buffer);
- check_vk_result(err);
+ VK_CHECK(data->vtable.CreateBuffer(data->device, &buffer_info, NULL, buffer));
VkMemoryRequirements req;
data->vtable.GetBufferMemoryRequirements(data->device, *buffer, &req);
alloc_info.allocationSize = req.size;
alloc_info.memoryTypeIndex =
vk_memory_type(data, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, req.memoryTypeBits);
- err = data->vtable.AllocateMemory(data->device, &alloc_info, NULL, buffer_memory);
- check_vk_result(err);
+ VK_CHECK(data->vtable.AllocateMemory(data->device, &alloc_info, NULL, buffer_memory));
- err = data->vtable.BindBufferMemory(data->device, *buffer, *buffer_memory, 0);
- check_vk_result(err);
+ VK_CHECK(data->vtable.BindBufferMemory(data->device, *buffer, *buffer_memory, 0));
*buffer_size = new_size;
}
-static void render_swapchain_display(struct swapchain_data *data, unsigned image_index)
+static struct overlay_draw *render_swapchain_display(struct swapchain_data *data,
+ const VkSemaphore *wait_semaphores,
+ unsigned n_wait_semaphores,
+ unsigned image_index)
{
ImDrawData* draw_data = ImGui::GetDrawData();
if (draw_data->TotalVtxCount == 0)
- return;
+ return NULL;
struct device_data *device_data = data->device;
- uint32_t idx = data->n_frames % ARRAY_SIZE(data->frame_data);
- VkCommandBuffer command_buffer = data->frame_data[idx].command_buffer;
- VkResult err;
+ struct overlay_draw *draw = get_overlay_draw(data);
- device_data->vtable.ResetCommandBuffer(command_buffer, 0);
+ device_data->vtable.ResetCommandBuffer(draw->command_buffer, 0);
VkRenderPassBeginInfo render_pass_info = {};
render_pass_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
VkCommandBufferBeginInfo buffer_begin_info = {};
buffer_begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
- device_data->vtable.BeginCommandBuffer(command_buffer, &buffer_begin_info);
+ device_data->vtable.BeginCommandBuffer(draw->command_buffer, &buffer_begin_info);
- ensure_swapchain_fonts(data, command_buffer);
+ ensure_swapchain_fonts(data, draw->command_buffer);
/* Bounce the image to display back to color attachment layout for
* rendering on top of it.
VkImageMemoryBarrier imb;
imb.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
imb.pNext = nullptr;
- imb.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT;
imb.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
+ imb.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
imb.oldLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
imb.newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
imb.image = data->images[image_index];
imb.subresourceRange.layerCount = 1;
imb.srcQueueFamilyIndex = device_data->graphic_queue->family_index;
imb.dstQueueFamilyIndex = device_data->graphic_queue->family_index;
- device_data->vtable.CmdPipelineBarrier(command_buffer,
- VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+ device_data->vtable.CmdPipelineBarrier(draw->command_buffer,
+ VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT,
+ VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT,
0, /* dependency flags */
0, nullptr, /* memory barriers */
0, nullptr, /* buffer memory barriers */
1, &imb); /* image memory barriers */
- device_data->vtable.CmdBeginRenderPass(command_buffer, &render_pass_info,
+ device_data->vtable.CmdBeginRenderPass(draw->command_buffer, &render_pass_info,
VK_SUBPASS_CONTENTS_INLINE);
/* Create/Resize vertex & index buffers */
size_t vertex_size = draw_data->TotalVtxCount * sizeof(ImDrawVert);
size_t index_size = draw_data->TotalIdxCount * sizeof(ImDrawIdx);
- if (data->frame_data[idx].vertex_buffer_size < vertex_size) {
+ if (draw->vertex_buffer_size < vertex_size) {
CreateOrResizeBuffer(device_data,
- &data->frame_data[idx].vertex_buffer,
- &data->frame_data[idx].vertex_buffer_mem,
- &data->frame_data[idx].vertex_buffer_size,
+ &draw->vertex_buffer,
+ &draw->vertex_buffer_mem,
+ &draw->vertex_buffer_size,
vertex_size, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
}
- if (data->frame_data[idx].index_buffer_size < index_size) {
+ if (draw->index_buffer_size < index_size) {
CreateOrResizeBuffer(device_data,
- &data->frame_data[idx].index_buffer,
- &data->frame_data[idx].index_buffer_mem,
- &data->frame_data[idx].index_buffer_size,
+ &draw->index_buffer,
+ &draw->index_buffer_mem,
+ &draw->index_buffer_size,
index_size, VK_BUFFER_USAGE_INDEX_BUFFER_BIT);
}
/* Upload vertex & index data */
- VkBuffer vertex_buffer = data->frame_data[idx].vertex_buffer;
- VkDeviceMemory vertex_mem = data->frame_data[idx].vertex_buffer_mem;
- VkBuffer index_buffer = data->frame_data[idx].index_buffer;
- VkDeviceMemory index_mem = data->frame_data[idx].index_buffer_mem;
ImDrawVert* vtx_dst = NULL;
ImDrawIdx* idx_dst = NULL;
- err = device_data->vtable.MapMemory(device_data->device, vertex_mem,
- 0, vertex_size, 0, (void**)(&vtx_dst));
- check_vk_result(err);
- err = device_data->vtable.MapMemory(device_data->device, index_mem,
- 0, index_size, 0, (void**)(&idx_dst));
- check_vk_result(err);
+ VK_CHECK(device_data->vtable.MapMemory(device_data->device, draw->vertex_buffer_mem,
+ 0, vertex_size, 0, (void**)(&vtx_dst)));
+ VK_CHECK(device_data->vtable.MapMemory(device_data->device, draw->index_buffer_mem,
+ 0, index_size, 0, (void**)(&idx_dst)));
for (int n = 0; n < draw_data->CmdListsCount; n++)
{
const ImDrawList* cmd_list = draw_data->CmdLists[n];
}
VkMappedMemoryRange range[2] = {};
range[0].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
- range[0].memory = vertex_mem;
+ range[0].memory = draw->vertex_buffer_mem;
range[0].size = VK_WHOLE_SIZE;
range[1].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
- range[1].memory = index_mem;
+ range[1].memory = draw->index_buffer_mem;
range[1].size = VK_WHOLE_SIZE;
- err = device_data->vtable.FlushMappedMemoryRanges(device_data->device, 2, range);
- check_vk_result(err);
- device_data->vtable.UnmapMemory(device_data->device, vertex_mem);
- device_data->vtable.UnmapMemory(device_data->device, index_mem);
+ VK_CHECK(device_data->vtable.FlushMappedMemoryRanges(device_data->device, 2, range));
+ device_data->vtable.UnmapMemory(device_data->device, draw->vertex_buffer_mem);
+ device_data->vtable.UnmapMemory(device_data->device, draw->index_buffer_mem);
/* Bind pipeline and descriptor sets */
- device_data->vtable.CmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, data->pipeline);
+ device_data->vtable.CmdBindPipeline(draw->command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, data->pipeline);
VkDescriptorSet desc_set[1] = { data->descriptor_set };
- device_data->vtable.CmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
+ device_data->vtable.CmdBindDescriptorSets(draw->command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
data->pipeline_layout, 0, 1, desc_set, 0, NULL);
/* Bind vertex & index buffers */
- VkBuffer vertex_buffers[1] = { vertex_buffer };
+ VkBuffer vertex_buffers[1] = { draw->vertex_buffer };
VkDeviceSize vertex_offset[1] = { 0 };
- device_data->vtable.CmdBindVertexBuffers(command_buffer, 0, 1, vertex_buffers, vertex_offset);
- device_data->vtable.CmdBindIndexBuffer(command_buffer, index_buffer, 0, VK_INDEX_TYPE_UINT16);
+ device_data->vtable.CmdBindVertexBuffers(draw->command_buffer, 0, 1, vertex_buffers, vertex_offset);
+ device_data->vtable.CmdBindIndexBuffer(draw->command_buffer, draw->index_buffer, 0, VK_INDEX_TYPE_UINT16);
/* Setup viewport */
VkViewport viewport;
viewport.height = draw_data->DisplaySize.y;
viewport.minDepth = 0.0f;
viewport.maxDepth = 1.0f;
- device_data->vtable.CmdSetViewport(command_buffer, 0, 1, &viewport);
+ device_data->vtable.CmdSetViewport(draw->command_buffer, 0, 1, &viewport);
/* Setup scale and translation through push constants :
float translate[2];
translate[0] = -1.0f - draw_data->DisplayPos.x * scale[0];
translate[1] = -1.0f - draw_data->DisplayPos.y * scale[1];
- device_data->vtable.CmdPushConstants(command_buffer, data->pipeline_layout,
+ device_data->vtable.CmdPushConstants(draw->command_buffer, data->pipeline_layout,
VK_SHADER_STAGE_VERTEX_BIT,
sizeof(float) * 0, sizeof(float) * 2, scale);
- device_data->vtable.CmdPushConstants(command_buffer, data->pipeline_layout,
+ device_data->vtable.CmdPushConstants(draw->command_buffer, data->pipeline_layout,
VK_SHADER_STAGE_VERTEX_BIT,
sizeof(float) * 2, sizeof(float) * 2, translate);
scissor.offset.y = (int32_t)(pcmd->ClipRect.y - display_pos.y) > 0 ? (int32_t)(pcmd->ClipRect.y - display_pos.y) : 0;
scissor.extent.width = (uint32_t)(pcmd->ClipRect.z - pcmd->ClipRect.x);
scissor.extent.height = (uint32_t)(pcmd->ClipRect.w - pcmd->ClipRect.y + 1); // FIXME: Why +1 here?
- device_data->vtable.CmdSetScissor(command_buffer, 0, 1, &scissor);
+ device_data->vtable.CmdSetScissor(draw->command_buffer, 0, 1, &scissor);
// Draw
- device_data->vtable.CmdDrawIndexed(command_buffer, pcmd->ElemCount, 1, idx_offset, vtx_offset, 0);
+ device_data->vtable.CmdDrawIndexed(draw->command_buffer, pcmd->ElemCount, 1, idx_offset, vtx_offset, 0);
idx_offset += pcmd->ElemCount;
}
vtx_offset += cmd_list->VtxBuffer.Size;
}
- device_data->vtable.CmdEndRenderPass(command_buffer);
- device_data->vtable.EndCommandBuffer(command_buffer);
-
- if (data->submission_semaphore) {
- device_data->vtable.DestroySemaphore(device_data->device,
- data->submission_semaphore,
- NULL);
- }
- /* Submission semaphore */
- VkSemaphoreCreateInfo semaphore_info = {};
- semaphore_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
- err = device_data->vtable.CreateSemaphore(device_data->device, &semaphore_info,
- NULL, &data->submission_semaphore);
- check_vk_result(err);
+ device_data->vtable.CmdEndRenderPass(draw->command_buffer);
+ device_data->vtable.EndCommandBuffer(draw->command_buffer);
VkSubmitInfo submit_info = {};
+ VkPipelineStageFlags stage_wait = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
submit_info.commandBufferCount = 1;
- submit_info.pCommandBuffers = &command_buffer;
+ submit_info.pCommandBuffers = &draw->command_buffer;
+ submit_info.pWaitDstStageMask = &stage_wait;
+ submit_info.waitSemaphoreCount = n_wait_semaphores;
+ submit_info.pWaitSemaphores = wait_semaphores;
submit_info.signalSemaphoreCount = 1;
- submit_info.pSignalSemaphores = &data->submission_semaphore;
+ submit_info.pSignalSemaphores = &draw->semaphore;
+
+ device_data->vtable.QueueSubmit(device_data->graphic_queue->queue, 1, &submit_info, draw->fence);
- device_data->vtable.WaitForFences(device_data->device, 1, &data->fence, VK_TRUE, UINT64_MAX);
- device_data->vtable.ResetFences(device_data->device, 1, &data->fence);
- device_data->vtable.QueueSubmit(device_data->graphic_queue->queue, 1, &submit_info, data->fence);
+ return draw;
}
static const uint32_t overlay_vert_spv[] = {
{
struct device_data *device_data = data->device;
VkShaderModule vert_module, frag_module;
- VkResult err;
/* Create shader modules */
VkShaderModuleCreateInfo vert_info = {};
vert_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
vert_info.codeSize = sizeof(overlay_vert_spv);
vert_info.pCode = overlay_vert_spv;
- err = device_data->vtable.CreateShaderModule(device_data->device,
- &vert_info, NULL, &vert_module);
- check_vk_result(err);
+ VK_CHECK(device_data->vtable.CreateShaderModule(device_data->device,
+ &vert_info, NULL, &vert_module));
VkShaderModuleCreateInfo frag_info = {};
frag_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
frag_info.codeSize = sizeof(overlay_frag_spv);
frag_info.pCode = (uint32_t*)overlay_frag_spv;
- err = device_data->vtable.CreateShaderModule(device_data->device,
- &frag_info, NULL, &frag_module);
- check_vk_result(err);
+ VK_CHECK(device_data->vtable.CreateShaderModule(device_data->device,
+ &frag_info, NULL, &frag_module));
/* Font sampler */
VkSamplerCreateInfo sampler_info = {};
sampler_info.minLod = -1000;
sampler_info.maxLod = 1000;
sampler_info.maxAnisotropy = 1.0f;
- err = device_data->vtable.CreateSampler(device_data->device, &sampler_info,
- NULL, &data->font_sampler);
- check_vk_result(err);
+ VK_CHECK(device_data->vtable.CreateSampler(device_data->device, &sampler_info,
+ NULL, &data->font_sampler));
/* Descriptor pool */
VkDescriptorPoolSize sampler_pool_size = {};
desc_pool_info.maxSets = 1;
desc_pool_info.poolSizeCount = 1;
desc_pool_info.pPoolSizes = &sampler_pool_size;
- err = device_data->vtable.CreateDescriptorPool(device_data->device,
- &desc_pool_info,
- NULL, &data->descriptor_pool);
- check_vk_result(err);
+ VK_CHECK(device_data->vtable.CreateDescriptorPool(device_data->device,
+ &desc_pool_info,
+ NULL, &data->descriptor_pool));
/* Descriptor layout */
VkSampler sampler[1] = { data->font_sampler };
set_layout_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
set_layout_info.bindingCount = 1;
set_layout_info.pBindings = binding;
- err = device_data->vtable.CreateDescriptorSetLayout(device_data->device,
- &set_layout_info,
- NULL, &data->descriptor_layout);
- check_vk_result(err);
+ VK_CHECK(device_data->vtable.CreateDescriptorSetLayout(device_data->device,
+ &set_layout_info,
+ NULL, &data->descriptor_layout));
/* Descriptor set */
VkDescriptorSetAllocateInfo alloc_info = {};
alloc_info.descriptorPool = data->descriptor_pool;
alloc_info.descriptorSetCount = 1;
alloc_info.pSetLayouts = &data->descriptor_layout;
- err = device_data->vtable.AllocateDescriptorSets(device_data->device,
- &alloc_info,
- &data->descriptor_set);
- check_vk_result(err);
+ VK_CHECK(device_data->vtable.AllocateDescriptorSets(device_data->device,
+ &alloc_info,
+ &data->descriptor_set));
/* Constants: we are using 'vec2 offset' and 'vec2 scale' instead of a full
* 3d projection matrix
layout_info.pSetLayouts = &data->descriptor_layout;
layout_info.pushConstantRangeCount = 1;
layout_info.pPushConstantRanges = push_constants;
- err = device_data->vtable.CreatePipelineLayout(device_data->device,
- &layout_info,
- NULL, &data->pipeline_layout);
- check_vk_result(err);
-
+ VK_CHECK(device_data->vtable.CreatePipelineLayout(device_data->device,
+ &layout_info,
+ NULL, &data->pipeline_layout));
VkPipelineShaderStageCreateInfo stage[2] = {};
stage[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
info.pDynamicState = &dynamic_state;
info.layout = data->pipeline_layout;
info.renderPass = data->render_pass;
- err = device_data->vtable.CreateGraphicsPipelines(device_data->device, VK_NULL_HANDLE,
- 1, &info,
- NULL, &data->pipeline);
- check_vk_result(err);
+ VK_CHECK(
+ device_data->vtable.CreateGraphicsPipelines(device_data->device, VK_NULL_HANDLE,
+ 1, &info,
+ NULL, &data->pipeline));
device_data->vtable.DestroyShaderModule(device_data->device, vert_module, NULL);
device_data->vtable.DestroyShaderModule(device_data->device, frag_module, NULL);
image_info.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT;
image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
image_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
- err = device_data->vtable.CreateImage(device_data->device, &image_info,
- NULL, &data->font_image);
- check_vk_result(err);
+ VK_CHECK(device_data->vtable.CreateImage(device_data->device, &image_info,
+ NULL, &data->font_image));
VkMemoryRequirements font_image_req;
device_data->vtable.GetImageMemoryRequirements(device_data->device,
data->font_image, &font_image_req);
image_alloc_info.memoryTypeIndex = vk_memory_type(device_data,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
font_image_req.memoryTypeBits);
- err = device_data->vtable.AllocateMemory(device_data->device, &image_alloc_info,
- NULL, &data->font_mem);
- check_vk_result(err);
- err = device_data->vtable.BindImageMemory(device_data->device,
- data->font_image,
- data->font_mem, 0);
- check_vk_result(err);
+ VK_CHECK(device_data->vtable.AllocateMemory(device_data->device, &image_alloc_info,
+ NULL, &data->font_mem));
+ VK_CHECK(device_data->vtable.BindImageMemory(device_data->device,
+ data->font_image,
+ data->font_mem, 0));
/* Font image view */
VkImageViewCreateInfo view_info = {};
view_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
view_info.subresourceRange.levelCount = 1;
view_info.subresourceRange.layerCount = 1;
- err = device_data->vtable.CreateImageView(device_data->device, &view_info,
- NULL, &data->font_image_view);
- check_vk_result(err);
+ VK_CHECK(device_data->vtable.CreateImageView(device_data->device, &view_info,
+ NULL, &data->font_image_view));
/* Descriptor set */
VkDescriptorImageInfo desc_image[1] = {};
ImGui::GetIO().DisplaySize = ImVec2((float)data->width, (float)data->height);
struct device_data *device_data = data->device;
- VkResult err;
/* Render pass */
VkAttachmentDescription attachment_desc = {};
render_pass_info.pSubpasses = &subpass;
render_pass_info.dependencyCount = 1;
render_pass_info.pDependencies = &dependency;
- err = device_data->vtable.CreateRenderPass(device_data->device,
- &render_pass_info,
- NULL, &data->render_pass);
- check_vk_result(err);
+ VK_CHECK(device_data->vtable.CreateRenderPass(device_data->device,
+ &render_pass_info,
+ NULL, &data->render_pass));
setup_swapchain_data_pipeline(data);
- device_data->vtable.GetSwapchainImagesKHR(device_data->device,
- data->swapchain,
- &data->n_images,
- NULL);
+ VK_CHECK(device_data->vtable.GetSwapchainImagesKHR(device_data->device,
+ data->swapchain,
+ &data->n_images,
+ NULL));
+
data->images = ralloc_array(data, VkImage, data->n_images);
data->image_views = ralloc_array(data, VkImageView, data->n_images);
data->framebuffers = ralloc_array(data, VkFramebuffer, data->n_images);
- device_data->vtable.GetSwapchainImagesKHR(device_data->device,
- data->swapchain,
- &data->n_images,
- data->images);
+ VK_CHECK(device_data->vtable.GetSwapchainImagesKHR(device_data->device,
+ data->swapchain,
+ &data->n_images,
+ data->images));
/* Image views */
VkImageViewCreateInfo view_info = {};
view_info.subresourceRange = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 };
for (uint32_t i = 0; i < data->n_images; i++) {
view_info.image = data->images[i];
- err = device_data->vtable.CreateImageView(device_data->device, &view_info,
- NULL, &data->image_views[i]);
- check_vk_result(err);
+ VK_CHECK(device_data->vtable.CreateImageView(device_data->device,
+ &view_info, NULL,
+ &data->image_views[i]));
}
/* Framebuffers */
fb_info.layers = 1;
for (uint32_t i = 0; i < data->n_images; i++) {
attachment[0] = data->image_views[i];
- err = device_data->vtable.CreateFramebuffer(device_data->device, &fb_info,
- NULL, &data->framebuffers[i]);
- check_vk_result(err);
+ VK_CHECK(device_data->vtable.CreateFramebuffer(device_data->device, &fb_info,
+ NULL, &data->framebuffers[i]));
}
- /* Command buffer */
+ /* Command buffer pool */
VkCommandPoolCreateInfo cmd_buffer_pool_info = {};
cmd_buffer_pool_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
cmd_buffer_pool_info.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
cmd_buffer_pool_info.queueFamilyIndex = device_data->graphic_queue->family_index;
- err = device_data->vtable.CreateCommandPool(device_data->device,
- &cmd_buffer_pool_info,
- NULL, &data->command_pool);
- check_vk_result(err);
-
- VkCommandBuffer cmd_bufs[ARRAY_SIZE(data->frame_data)];
-
- VkCommandBufferAllocateInfo cmd_buffer_info = {};
- cmd_buffer_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
- cmd_buffer_info.commandPool = data->command_pool;
- cmd_buffer_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
- cmd_buffer_info.commandBufferCount = 2;
- err = device_data->vtable.AllocateCommandBuffers(device_data->device,
- &cmd_buffer_info,
- cmd_bufs);
- check_vk_result(err);
-
- for (uint32_t i = 0; i < ARRAY_SIZE(data->frame_data); i++)
- data->frame_data[i].command_buffer = cmd_bufs[i];
-
-
- /* Submission fence */
- VkFenceCreateInfo fence_info = {};
- fence_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
- fence_info.flags = VK_FENCE_CREATE_SIGNALED_BIT;
- err = device_data->vtable.CreateFence(device_data->device, &fence_info,
- NULL, &data->fence);
- check_vk_result(err);
-
+ VK_CHECK(device_data->vtable.CreateCommandPool(device_data->device,
+ &cmd_buffer_pool_info,
+ NULL, &data->command_pool));
}
static void shutdown_swapchain_data(struct swapchain_data *data)
{
struct device_data *device_data = data->device;
+ list_for_each_entry_safe(struct overlay_draw, draw, &data->draws, link) {
+ device_data->vtable.DestroySemaphore(device_data->device, draw->semaphore, NULL);
+ device_data->vtable.DestroyFence(device_data->device, draw->fence, NULL);
+ device_data->vtable.DestroyBuffer(device_data->device, draw->vertex_buffer, NULL);
+ device_data->vtable.DestroyBuffer(device_data->device, draw->index_buffer, NULL);
+ device_data->vtable.FreeMemory(device_data->device, draw->vertex_buffer_mem, NULL);
+ device_data->vtable.FreeMemory(device_data->device, draw->index_buffer_mem, NULL);
+ }
+
for (uint32_t i = 0; i < data->n_images; i++) {
device_data->vtable.DestroyImageView(device_data->device, data->image_views[i], NULL);
device_data->vtable.DestroyFramebuffer(device_data->device, data->framebuffers[i], NULL);
device_data->vtable.DestroyRenderPass(device_data->device, data->render_pass, NULL);
- for (uint32_t i = 0; i < ARRAY_SIZE(data->frame_data); i++) {
- device_data->vtable.FreeCommandBuffers(device_data->device,
- data->command_pool,
- 1, &data->frame_data[i].command_buffer);
- if (data->frame_data[i].vertex_buffer)
- device_data->vtable.DestroyBuffer(device_data->device, data->frame_data[i].vertex_buffer, NULL);
- if (data->frame_data[i].index_buffer)
- device_data->vtable.DestroyBuffer(device_data->device, data->frame_data[i].index_buffer, NULL);
- if (data->frame_data[i].vertex_buffer_mem)
- device_data->vtable.FreeMemory(device_data->device, data->frame_data[i].vertex_buffer_mem, NULL);
- if (data->frame_data[i].index_buffer_mem)
- device_data->vtable.FreeMemory(device_data->device, data->frame_data[i].index_buffer_mem, NULL);
- }
device_data->vtable.DestroyCommandPool(device_data->device, data->command_pool, NULL);
- device_data->vtable.DestroyFence(device_data->device, data->fence, NULL);
- if (data->submission_semaphore)
- device_data->vtable.DestroySemaphore(device_data->device, data->submission_semaphore, NULL);
-
device_data->vtable.DestroyPipeline(device_data->device, data->pipeline, NULL);
device_data->vtable.DestroyPipelineLayout(device_data->device, data->pipeline_layout, NULL);
- device_data->vtable.FreeDescriptorSets(device_data->device, data->descriptor_pool,
- 1, &data->descriptor_set);
device_data->vtable.DestroyDescriptorPool(device_data->device,
data->descriptor_pool, NULL);
device_data->vtable.DestroyDescriptorSetLayout(device_data->device,
ImGui::DestroyContext(data->imgui_context);
}
-static void before_present(struct swapchain_data *swapchain_data,
- unsigned imageIndex)
+static struct overlay_draw *before_present(struct swapchain_data *swapchain_data,
+ const VkSemaphore *wait_semaphores,
+ unsigned n_wait_semaphores,
+ unsigned imageIndex)
{
+ struct instance_data *instance_data = swapchain_data->device->instance;
+ struct overlay_draw *draw = NULL;
+
snapshot_swapchain_frame(swapchain_data);
- compute_swapchain_display(swapchain_data);
- render_swapchain_display(swapchain_data, imageIndex);
+ if (!instance_data->params.no_display && swapchain_data->n_frames > 0) {
+ compute_swapchain_display(swapchain_data);
+ draw = render_swapchain_display(swapchain_data,
+ wait_semaphores, n_wait_semaphores,
+ imageIndex);
+ }
+
+ return draw;
}
-VKAPI_ATTR VkResult VKAPI_CALL overlay_CreateSwapchainKHR(
+static VkResult overlay_CreateSwapchainKHR(
VkDevice device,
const VkSwapchainCreateInfoKHR* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkSwapchainKHR* pSwapchain)
{
- struct device_data *device_data = FIND_DEVICE_DATA(device);
+ struct device_data *device_data = FIND(struct device_data, device);
VkResult result = device_data->vtable.CreateSwapchainKHR(device, pCreateInfo, pAllocator, pSwapchain);
if (result != VK_SUCCESS) return result;
return result;
}
-VKAPI_ATTR void VKAPI_CALL overlay_DestroySwapchainKHR(
+static void overlay_DestroySwapchainKHR(
VkDevice device,
VkSwapchainKHR swapchain,
const VkAllocationCallbacks* pAllocator)
{
- struct swapchain_data *swapchain_data = FIND_SWAPCHAIN_DATA(swapchain);
+ struct swapchain_data *swapchain_data =
+ FIND(struct swapchain_data, swapchain);
shutdown_swapchain_data(swapchain_data);
swapchain_data->device->vtable.DestroySwapchainKHR(device, swapchain, pAllocator);
destroy_swapchain_data(swapchain_data);
}
-VKAPI_ATTR VkResult VKAPI_CALL overlay_QueuePresentKHR(
+static VkResult overlay_QueuePresentKHR(
VkQueue queue,
const VkPresentInfoKHR* pPresentInfo)
{
- struct queue_data *queue_data = FIND_QUEUE_DATA(queue);
+ struct queue_data *queue_data = FIND(struct queue_data, queue);
struct device_data *device_data = queue_data->device;
-
- /* If we present on the graphic queue this layer is using to draw an
- * overlay, we don't need more than submitting the overlay draw prior to
- * present.
- */
- if (queue_data == device_data->graphic_queue) {
- for (uint32_t i = 0; i < pPresentInfo->swapchainCount; i++) {
- struct swapchain_data *swapchain_data = FIND_SWAPCHAIN_DATA(pPresentInfo->pSwapchains[i]);
- before_present(swapchain_data, pPresentInfo->pImageIndices[i]);
+ struct instance_data *instance_data = device_data->instance;
+ uint32_t query_results[OVERLAY_QUERY_COUNT];
+
+ device_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_frame]++;
+
+ if (list_length(&queue_data->running_command_buffer) > 0) {
+ /* Before getting the query results, make sure the operations have
+ * completed.
+ */
+ VK_CHECK(device_data->vtable.ResetFences(device_data->device,
+ 1, &queue_data->queries_fence));
+ VK_CHECK(device_data->vtable.QueueSubmit(queue, 0, NULL, queue_data->queries_fence));
+ VK_CHECK(device_data->vtable.WaitForFences(device_data->device,
+ 1, &queue_data->queries_fence,
+ VK_FALSE, UINT64_MAX));
+
+ /* Now get the results. */
+ list_for_each_entry_safe(struct command_buffer_data, cmd_buffer_data,
+ &queue_data->running_command_buffer, link) {
+ list_delinit(&cmd_buffer_data->link);
+
+ if (cmd_buffer_data->pipeline_query_pool) {
+ memset(query_results, 0, sizeof(query_results));
+ VK_CHECK(device_data->vtable.GetQueryPoolResults(device_data->device,
+ cmd_buffer_data->pipeline_query_pool,
+ cmd_buffer_data->query_index, 1,
+ sizeof(uint32_t) * OVERLAY_QUERY_COUNT,
+ query_results, 0, VK_QUERY_RESULT_WAIT_BIT));
+
+ for (uint32_t i = OVERLAY_PARAM_ENABLED_vertices;
+ i <= OVERLAY_PARAM_ENABLED_compute_invocations; i++) {
+ device_data->frame_stats.stats[i] += query_results[i - OVERLAY_PARAM_ENABLED_vertices];
+ }
+ }
+ if (cmd_buffer_data->timestamp_query_pool) {
+ uint64_t gpu_timestamps[2] = { 0 };
+ VK_CHECK(device_data->vtable.GetQueryPoolResults(device_data->device,
+ cmd_buffer_data->timestamp_query_pool,
+ cmd_buffer_data->query_index * 2, 2,
+ 2 * sizeof(uint64_t), gpu_timestamps, sizeof(uint64_t),
+ VK_QUERY_RESULT_WAIT_BIT | VK_QUERY_RESULT_64_BIT));
+
+ gpu_timestamps[0] &= queue_data->timestamp_mask;
+ gpu_timestamps[1] &= queue_data->timestamp_mask;
+ device_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_gpu_timing] +=
+ (gpu_timestamps[1] - gpu_timestamps[0]) *
+ device_data->properties.limits.timestampPeriod;
+ }
}
- return queue_data->device->vtable.QueuePresentKHR(queue, pPresentInfo);
}
- /* Otherwise we need to do cross queue synchronization to tie the overlay
- * draw into the present queue.
+ /* Otherwise we need to add our overlay drawing semaphore to the list of
+ * semaphores to wait on. If we don't do that the presented picture might
+ * be have incomplete overlay drawings.
*/
- VkPresentInfoKHR present_info = *pPresentInfo;
- VkSemaphore *semaphores =
- (VkSemaphore *)malloc(sizeof(VkSemaphore) * (pPresentInfo->waitSemaphoreCount + pPresentInfo->swapchainCount));
- for (uint32_t i = 0; i < pPresentInfo->waitSemaphoreCount; i++)
- semaphores[i] = pPresentInfo->pWaitSemaphores[i];
- for (uint32_t i = 0; i < pPresentInfo->swapchainCount; i++) {
- struct swapchain_data *swapchain_data = FIND_SWAPCHAIN_DATA(pPresentInfo->pSwapchains[i]);
- before_present(swapchain_data, pPresentInfo->pImageIndices[i]);
- semaphores[pPresentInfo->waitSemaphoreCount + i] = swapchain_data->submission_semaphore;
+ VkResult result = VK_SUCCESS;
+ if (instance_data->params.no_display) {
+ for (uint32_t i = 0; i < pPresentInfo->swapchainCount; i++) {
+ VkSwapchainKHR swapchain = pPresentInfo->pSwapchains[i];
+ struct swapchain_data *swapchain_data =
+ FIND(struct swapchain_data, swapchain);
+
+ before_present(swapchain_data,
+ pPresentInfo->pWaitSemaphores,
+ pPresentInfo->waitSemaphoreCount,
+ pPresentInfo->pImageIndices[i]);
+
+ VkPresentInfoKHR present_info = *pPresentInfo;
+ present_info.swapchainCount = 1;
+ present_info.pSwapchains = &swapchain;
+
+ uint64_t ts0 = os_time_get();
+ result = queue_data->device->vtable.QueuePresentKHR(queue, &present_info);
+ uint64_t ts1 = os_time_get();
+ swapchain_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_present_timing] += ts1 - ts0;
+ }
+ } else {
+ for (uint32_t i = 0; i < pPresentInfo->swapchainCount; i++) {
+ VkSwapchainKHR swapchain = pPresentInfo->pSwapchains[i];
+ struct swapchain_data *swapchain_data =
+ FIND(struct swapchain_data, swapchain);
+ VkPresentInfoKHR present_info = *pPresentInfo;
+ present_info.swapchainCount = 1;
+ present_info.pSwapchains = &swapchain;
+
+ uint32_t image_index = pPresentInfo->pImageIndices[i];
+
+ struct overlay_draw *draw = before_present(swapchain_data,
+ pPresentInfo->pWaitSemaphores,
+ pPresentInfo->waitSemaphoreCount,
+ image_index);
+
+ /* Because the submission of the overlay draw waits on the semaphores
+ * handed for present, we don't need to have this present operation
+ * wait on them as well, we can just wait on the overlay submission
+ * semaphore.
+ */
+ present_info.pWaitSemaphores = &draw->semaphore;
+ present_info.waitSemaphoreCount = 1;
+
+ uint64_t ts0 = os_time_get();
+ VkResult chain_result = queue_data->device->vtable.QueuePresentKHR(queue, &present_info);
+ uint64_t ts1 = os_time_get();
+ swapchain_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_present_timing] += ts1 - ts0;
+ if (pPresentInfo->pResults)
+ pPresentInfo->pResults[i] = chain_result;
+ if (chain_result != VK_SUCCESS && result == VK_SUCCESS)
+ result = chain_result;
+ }
}
- present_info.pWaitSemaphores = semaphores;
- present_info.waitSemaphoreCount = pPresentInfo->waitSemaphoreCount + pPresentInfo->swapchainCount;
- VkResult result = queue_data->device->vtable.QueuePresentKHR(queue, &present_info);
- free(semaphores);
return result;
}
-VKAPI_ATTR VkResult VKAPI_CALL overlay_AcquireNextImageKHR(
+static VkResult overlay_AcquireNextImageKHR(
VkDevice device,
VkSwapchainKHR swapchain,
uint64_t timeout,
VkFence fence,
uint32_t* pImageIndex)
{
- struct swapchain_data *swapchain_data = FIND_SWAPCHAIN_DATA(swapchain);
+ struct swapchain_data *swapchain_data =
+ FIND(struct swapchain_data, swapchain);
struct device_data *device_data = swapchain_data->device;
uint64_t ts0 = os_time_get();
semaphore, fence, pImageIndex);
uint64_t ts1 = os_time_get();
- swapchain_data->acquire_times[swapchain_data->n_acquire %
- ARRAY_SIZE(swapchain_data->acquire_times)] =
- ((double)ts1 - (double)ts0) / 1000.0;
- swapchain_data->n_acquire++;
+ swapchain_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_acquire_timing] += ts1 - ts0;
+ swapchain_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_acquire]++;
return result;
}
-VKAPI_ATTR VkResult VKAPI_CALL overlay_AcquireNextImage2KHR(
+static VkResult overlay_AcquireNextImage2KHR(
VkDevice device,
const VkAcquireNextImageInfoKHR* pAcquireInfo,
uint32_t* pImageIndex)
{
- struct swapchain_data *swapchain_data = FIND_SWAPCHAIN_DATA(pAcquireInfo->swapchain);
+ struct swapchain_data *swapchain_data =
+ FIND(struct swapchain_data, pAcquireInfo->swapchain);
struct device_data *device_data = swapchain_data->device;
uint64_t ts0 = os_time_get();
VkResult result = device_data->vtable.AcquireNextImage2KHR(device, pAcquireInfo, pImageIndex);
uint64_t ts1 = os_time_get();
- swapchain_data->acquire_times[swapchain_data->n_acquire %
- ARRAY_SIZE(swapchain_data->acquire_times)] =
- ((double)ts1 - (double)ts0) / 1000.0;
- swapchain_data->n_acquire++;
+ swapchain_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_acquire_timing] += ts1 - ts0;
+ swapchain_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_acquire]++;
return result;
}
-VKAPI_ATTR void VKAPI_CALL overlay_CmdDraw(
+static void overlay_CmdDraw(
VkCommandBuffer commandBuffer,
uint32_t vertexCount,
uint32_t instanceCount,
uint32_t firstVertex,
uint32_t firstInstance)
{
- struct device_data *device_data = FIND_DEVICE_DATA(commandBuffer);
+ struct command_buffer_data *cmd_buffer_data =
+ FIND(struct command_buffer_data, commandBuffer);
+ cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_draw]++;
+ struct device_data *device_data = cmd_buffer_data->device;
device_data->vtable.CmdDraw(commandBuffer, vertexCount, instanceCount,
firstVertex, firstInstance);
- device_data->stats.stats[FRAME_STAT_DRAW]++;
}
-VKAPI_ATTR void VKAPI_CALL overlay_CmdDrawIndexed(
+static void overlay_CmdDrawIndexed(
VkCommandBuffer commandBuffer,
uint32_t indexCount,
uint32_t instanceCount,
int32_t vertexOffset,
uint32_t firstInstance)
{
- struct device_data *device_data = FIND_DEVICE_DATA(commandBuffer);
+ struct command_buffer_data *cmd_buffer_data =
+ FIND(struct command_buffer_data, commandBuffer);
+ cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_draw_indexed]++;
+ struct device_data *device_data = cmd_buffer_data->device;
device_data->vtable.CmdDrawIndexed(commandBuffer, indexCount, instanceCount,
firstIndex, vertexOffset, firstInstance);
- device_data->stats.stats[FRAME_STAT_DRAW_INDEXED]++;
}
-VKAPI_ATTR void VKAPI_CALL overlay_CmdDrawIndirect(
+static void overlay_CmdDrawIndirect(
VkCommandBuffer commandBuffer,
VkBuffer buffer,
VkDeviceSize offset,
uint32_t drawCount,
uint32_t stride)
{
- struct device_data *device_data = FIND_DEVICE_DATA(commandBuffer);
+ struct command_buffer_data *cmd_buffer_data =
+ FIND(struct command_buffer_data, commandBuffer);
+ cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_draw_indirect]++;
+ struct device_data *device_data = cmd_buffer_data->device;
device_data->vtable.CmdDrawIndirect(commandBuffer, buffer, offset, drawCount, stride);
- device_data->stats.stats[FRAME_STAT_DRAW_INDIRECT]++;
}
-VKAPI_ATTR void VKAPI_CALL overlay_CmdDrawIndexedIndirect(
+static void overlay_CmdDrawIndexedIndirect(
VkCommandBuffer commandBuffer,
VkBuffer buffer,
VkDeviceSize offset,
uint32_t drawCount,
uint32_t stride)
{
- struct device_data *device_data = FIND_DEVICE_DATA(commandBuffer);
+ struct command_buffer_data *cmd_buffer_data =
+ FIND(struct command_buffer_data, commandBuffer);
+ cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_draw_indexed_indirect]++;
+ struct device_data *device_data = cmd_buffer_data->device;
device_data->vtable.CmdDrawIndexedIndirect(commandBuffer, buffer, offset, drawCount, stride);
- device_data->stats.stats[FRAME_STAT_DRAW_INDEXED_INDIRECT]++;
}
-VKAPI_ATTR void VKAPI_CALL overlay_CmdDrawIndirectCountKHR(
+static void overlay_CmdDrawIndirectCountKHR(
VkCommandBuffer commandBuffer,
VkBuffer buffer,
VkDeviceSize offset,
uint32_t maxDrawCount,
uint32_t stride)
{
- struct device_data *device_data = FIND_DEVICE_DATA(commandBuffer);
+ struct command_buffer_data *cmd_buffer_data =
+ FIND(struct command_buffer_data, commandBuffer);
+ cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_draw_indirect_count]++;
+ struct device_data *device_data = cmd_buffer_data->device;
device_data->vtable.CmdDrawIndirectCountKHR(commandBuffer, buffer, offset,
countBuffer, countBufferOffset,
maxDrawCount, stride);
- device_data->stats.stats[FRAME_STAT_DRAW_INDIRECT_COUNT]++;
}
-VKAPI_ATTR void VKAPI_CALL overlay_CmdDrawIndexedIndirectCountKHR(
+static void overlay_CmdDrawIndexedIndirectCountKHR(
VkCommandBuffer commandBuffer,
VkBuffer buffer,
VkDeviceSize offset,
uint32_t maxDrawCount,
uint32_t stride)
{
- struct device_data *device_data = FIND_DEVICE_DATA(commandBuffer);
+ struct command_buffer_data *cmd_buffer_data =
+ FIND(struct command_buffer_data, commandBuffer);
+ cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_draw_indexed_indirect_count]++;
+ struct device_data *device_data = cmd_buffer_data->device;
device_data->vtable.CmdDrawIndexedIndirectCountKHR(commandBuffer, buffer, offset,
countBuffer, countBufferOffset,
maxDrawCount, stride);
- device_data->stats.stats[FRAME_STAT_DRAW_INDEXED_INDIRECT_COUNT]++;
}
-VKAPI_ATTR void VKAPI_CALL overlay_CmdDispatch(
+static void overlay_CmdDispatch(
VkCommandBuffer commandBuffer,
uint32_t groupCountX,
uint32_t groupCountY,
uint32_t groupCountZ)
{
- struct device_data *device_data = FIND_DEVICE_DATA(commandBuffer);
+ struct command_buffer_data *cmd_buffer_data =
+ FIND(struct command_buffer_data, commandBuffer);
+ cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_dispatch]++;
+ struct device_data *device_data = cmd_buffer_data->device;
device_data->vtable.CmdDispatch(commandBuffer, groupCountX, groupCountY, groupCountZ);
- device_data->stats.stats[FRAME_STAT_DISPATCH]++;
}
-VKAPI_ATTR void VKAPI_CALL overlay_CmdDispatchIndirect(
+static void overlay_CmdDispatchIndirect(
VkCommandBuffer commandBuffer,
VkBuffer buffer,
VkDeviceSize offset)
{
- struct device_data *device_data = FIND_DEVICE_DATA(commandBuffer);
+ struct command_buffer_data *cmd_buffer_data =
+ FIND(struct command_buffer_data, commandBuffer);
+ cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_dispatch_indirect]++;
+ struct device_data *device_data = cmd_buffer_data->device;
device_data->vtable.CmdDispatchIndirect(commandBuffer, buffer, offset);
- device_data->stats.stats[FRAME_STAT_DISPATCH_INDIRECT]++;
}
-VKAPI_ATTR void VKAPI_CALL overlay_CmdBindPipeline(
+static void overlay_CmdBindPipeline(
VkCommandBuffer commandBuffer,
VkPipelineBindPoint pipelineBindPoint,
VkPipeline pipeline)
{
- struct device_data *device_data = FIND_DEVICE_DATA(commandBuffer);
- device_data->vtable.CmdBindPipeline(commandBuffer, pipelineBindPoint, pipeline);
+ struct command_buffer_data *cmd_buffer_data =
+ FIND(struct command_buffer_data, commandBuffer);
switch (pipelineBindPoint) {
- case VK_PIPELINE_BIND_POINT_GRAPHICS: device_data->stats.stats[FRAME_STAT_PIPELINE_GRAPHICS]++; break;
- case VK_PIPELINE_BIND_POINT_COMPUTE: device_data->stats.stats[FRAME_STAT_PIPELINE_COMPUTE]++; break;
- case VK_PIPELINE_BIND_POINT_RAY_TRACING_NV: device_data->stats.stats[FRAME_STAT_PIPELINE_RAYTRACING]++; break;
+ case VK_PIPELINE_BIND_POINT_GRAPHICS: cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_pipeline_graphics]++; break;
+ case VK_PIPELINE_BIND_POINT_COMPUTE: cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_pipeline_compute]++; break;
+ case VK_PIPELINE_BIND_POINT_RAY_TRACING_NV: cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_pipeline_raytracing]++; break;
default: break;
}
+ struct device_data *device_data = cmd_buffer_data->device;
+ device_data->vtable.CmdBindPipeline(commandBuffer, pipelineBindPoint, pipeline);
}
-VKAPI_ATTR VkResult VKAPI_CALL overlay_AllocateCommandBuffers(VkDevice device,
- const VkCommandBufferAllocateInfo* pAllocateInfo,
- VkCommandBuffer* pCommandBuffers)
+static VkResult overlay_BeginCommandBuffer(
+ VkCommandBuffer commandBuffer,
+ const VkCommandBufferBeginInfo* pBeginInfo)
{
- struct device_data *device_data = FIND_DEVICE_DATA(device);
+ struct command_buffer_data *cmd_buffer_data =
+ FIND(struct command_buffer_data, commandBuffer);
+ struct device_data *device_data = cmd_buffer_data->device;
+
+ memset(&cmd_buffer_data->stats, 0, sizeof(cmd_buffer_data->stats));
+ /* We don't record any query in secondary command buffers, just make sure
+ * we have the right inheritance.
+ */
+ if (cmd_buffer_data->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) {
+ VkCommandBufferBeginInfo *begin_info = (VkCommandBufferBeginInfo *)
+ clone_chain((const struct VkBaseInStructure *)pBeginInfo);
+ VkCommandBufferInheritanceInfo *parent_inhe_info = (VkCommandBufferInheritanceInfo *)
+ vk_find_struct(begin_info, COMMAND_BUFFER_INHERITANCE_INFO);
+ VkCommandBufferInheritanceInfo inhe_info = {
+ VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO,
+ NULL,
+ VK_NULL_HANDLE,
+ 0,
+ VK_NULL_HANDLE,
+ VK_FALSE,
+ 0,
+ overlay_query_flags,
+ };
+
+ if (parent_inhe_info)
+ parent_inhe_info->pipelineStatistics = overlay_query_flags;
+ else {
+ inhe_info.pNext = begin_info->pNext;
+ begin_info->pNext = &inhe_info;
+ }
+
+ VkResult result = device_data->vtable.BeginCommandBuffer(commandBuffer, pBeginInfo);
+
+ if (!parent_inhe_info)
+ begin_info->pNext = inhe_info.pNext;
+
+ free_chain((struct VkBaseOutStructure *)begin_info);
+
+ return result;
+ }
+
+ /* Otherwise record a begin query as first command. */
+ VkResult result = device_data->vtable.BeginCommandBuffer(commandBuffer, pBeginInfo);
+
+ if (result == VK_SUCCESS) {
+ if (cmd_buffer_data->pipeline_query_pool) {
+ device_data->vtable.CmdResetQueryPool(commandBuffer,
+ cmd_buffer_data->pipeline_query_pool,
+ cmd_buffer_data->query_index, 1);
+ }
+ if (cmd_buffer_data->timestamp_query_pool) {
+ device_data->vtable.CmdResetQueryPool(commandBuffer,
+ cmd_buffer_data->timestamp_query_pool,
+ cmd_buffer_data->query_index * 2, 2);
+ }
+ if (cmd_buffer_data->pipeline_query_pool) {
+ device_data->vtable.CmdBeginQuery(commandBuffer,
+ cmd_buffer_data->pipeline_query_pool,
+ cmd_buffer_data->query_index, 0);
+ }
+ if (cmd_buffer_data->timestamp_query_pool) {
+ device_data->vtable.CmdWriteTimestamp(commandBuffer,
+ VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+ cmd_buffer_data->timestamp_query_pool,
+ cmd_buffer_data->query_index * 2);
+ }
+ }
+
+ return result;
+}
+
+static VkResult overlay_EndCommandBuffer(
+ VkCommandBuffer commandBuffer)
+{
+ struct command_buffer_data *cmd_buffer_data =
+ FIND(struct command_buffer_data, commandBuffer);
+ struct device_data *device_data = cmd_buffer_data->device;
+
+ if (cmd_buffer_data->timestamp_query_pool) {
+ device_data->vtable.CmdWriteTimestamp(commandBuffer,
+ VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+ cmd_buffer_data->timestamp_query_pool,
+ cmd_buffer_data->query_index * 2 + 1);
+ }
+ if (cmd_buffer_data->pipeline_query_pool) {
+ device_data->vtable.CmdEndQuery(commandBuffer,
+ cmd_buffer_data->pipeline_query_pool,
+ cmd_buffer_data->query_index);
+ }
+
+ return device_data->vtable.EndCommandBuffer(commandBuffer);
+}
+
+static VkResult overlay_ResetCommandBuffer(
+ VkCommandBuffer commandBuffer,
+ VkCommandBufferResetFlags flags)
+{
+ struct command_buffer_data *cmd_buffer_data =
+ FIND(struct command_buffer_data, commandBuffer);
+ struct device_data *device_data = cmd_buffer_data->device;
+
+ memset(&cmd_buffer_data->stats, 0, sizeof(cmd_buffer_data->stats));
+
+ return device_data->vtable.ResetCommandBuffer(commandBuffer, flags);
+}
+
+static void overlay_CmdExecuteCommands(
+ VkCommandBuffer commandBuffer,
+ uint32_t commandBufferCount,
+ const VkCommandBuffer* pCommandBuffers)
+{
+ struct command_buffer_data *cmd_buffer_data =
+ FIND(struct command_buffer_data, commandBuffer);
+ struct device_data *device_data = cmd_buffer_data->device;
+
+ /* Add the stats of the executed command buffers to the primary one. */
+ for (uint32_t c = 0; c < commandBufferCount; c++) {
+ struct command_buffer_data *sec_cmd_buffer_data =
+ FIND(struct command_buffer_data, pCommandBuffers[c]);
+
+ for (uint32_t s = 0; s < OVERLAY_PARAM_ENABLED_MAX; s++)
+ cmd_buffer_data->stats.stats[s] += sec_cmd_buffer_data->stats.stats[s];
+ }
+
+ device_data->vtable.CmdExecuteCommands(commandBuffer, commandBufferCount, pCommandBuffers);
+}
+
+static VkResult overlay_AllocateCommandBuffers(
+ VkDevice device,
+ const VkCommandBufferAllocateInfo* pAllocateInfo,
+ VkCommandBuffer* pCommandBuffers)
+{
+ struct device_data *device_data = FIND(struct device_data, device);
VkResult result =
device_data->vtable.AllocateCommandBuffers(device, pAllocateInfo, pCommandBuffers);
- if (result != VK_SUCCESS) return result;
+ if (result != VK_SUCCESS)
+ return result;
+
+ VkQueryPool pipeline_query_pool = VK_NULL_HANDLE;
+ VkQueryPool timestamp_query_pool = VK_NULL_HANDLE;
+ if (device_data->instance->pipeline_statistics_enabled &&
+ pAllocateInfo->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) {
+ VkQueryPoolCreateInfo pool_info = {
+ VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
+ NULL,
+ 0,
+ VK_QUERY_TYPE_PIPELINE_STATISTICS,
+ pAllocateInfo->commandBufferCount,
+ overlay_query_flags,
+ };
+ VK_CHECK(device_data->vtable.CreateQueryPool(device_data->device, &pool_info,
+ NULL, &pipeline_query_pool));
+ }
+ if (device_data->instance->params.enabled[OVERLAY_PARAM_ENABLED_gpu_timing]) {
+ VkQueryPoolCreateInfo pool_info = {
+ VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
+ NULL,
+ 0,
+ VK_QUERY_TYPE_TIMESTAMP,
+ pAllocateInfo->commandBufferCount * 2,
+ 0,
+ };
+ VK_CHECK(device_data->vtable.CreateQueryPool(device_data->device, &pool_info,
+ NULL, ×tamp_query_pool));
+ }
+
+ for (uint32_t i = 0; i < pAllocateInfo->commandBufferCount; i++) {
+ new_command_buffer_data(pCommandBuffers[i], pAllocateInfo->level,
+ pipeline_query_pool, timestamp_query_pool,
+ i, device_data);
+ }
- for (uint32_t i = 0; i < pAllocateInfo->commandBufferCount; i++)
- map_object(pCommandBuffers[i], device_data);
+ if (pipeline_query_pool)
+ map_object(HKEY(pipeline_query_pool), (void *)(uintptr_t) pAllocateInfo->commandBufferCount);
+ if (timestamp_query_pool)
+ map_object(HKEY(timestamp_query_pool), (void *)(uintptr_t) pAllocateInfo->commandBufferCount);
return result;
}
-VKAPI_ATTR void VKAPI_CALL overlay_FreeCommandBuffers(VkDevice device,
- VkCommandPool commandPool,
- uint32_t commandBufferCount,
- const VkCommandBuffer* pCommandBuffers)
+static void overlay_FreeCommandBuffers(
+ VkDevice device,
+ VkCommandPool commandPool,
+ uint32_t commandBufferCount,
+ const VkCommandBuffer* pCommandBuffers)
{
- struct device_data *device_data = FIND_DEVICE_DATA(device);
+ struct device_data *device_data = FIND(struct device_data, device);
+ for (uint32_t i = 0; i < commandBufferCount; i++) {
+ struct command_buffer_data *cmd_buffer_data =
+ FIND(struct command_buffer_data, pCommandBuffers[i]);
- for (uint32_t i = 0; i < commandBufferCount; i++)
- unmap_object(pCommandBuffers[i]);
+ /* It is legal to free a NULL command buffer*/
+ if (!cmd_buffer_data)
+ continue;
+
+ uint64_t count = (uintptr_t)find_object_data(HKEY(cmd_buffer_data->pipeline_query_pool));
+ if (count == 1) {
+ unmap_object(HKEY(cmd_buffer_data->pipeline_query_pool));
+ device_data->vtable.DestroyQueryPool(device_data->device,
+ cmd_buffer_data->pipeline_query_pool, NULL);
+ } else if (count != 0) {
+ map_object(HKEY(cmd_buffer_data->pipeline_query_pool), (void *)(uintptr_t)(count - 1));
+ }
+ count = (uintptr_t)find_object_data(HKEY(cmd_buffer_data->timestamp_query_pool));
+ if (count == 1) {
+ unmap_object(HKEY(cmd_buffer_data->timestamp_query_pool));
+ device_data->vtable.DestroyQueryPool(device_data->device,
+ cmd_buffer_data->timestamp_query_pool, NULL);
+ } else if (count != 0) {
+ map_object(HKEY(cmd_buffer_data->timestamp_query_pool), (void *)(uintptr_t)(count - 1));
+ }
+ destroy_command_buffer_data(cmd_buffer_data);
+ }
device_data->vtable.FreeCommandBuffers(device, commandPool,
commandBufferCount, pCommandBuffers);
}
-VKAPI_ATTR VkResult VKAPI_CALL overlay_QueueSubmit(
+static VkResult overlay_QueueSubmit(
VkQueue queue,
uint32_t submitCount,
const VkSubmitInfo* pSubmits,
VkFence fence)
{
- struct queue_data *queue_data = FIND_QUEUE_DATA(queue);
+ struct queue_data *queue_data = FIND(struct queue_data, queue);
struct device_data *device_data = queue_data->device;
- device_data->stats.stats[FRAME_STAT_SUBMIT]++;
+ device_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_submit]++;
+
+ for (uint32_t s = 0; s < submitCount; s++) {
+ for (uint32_t c = 0; c < pSubmits[s].commandBufferCount; c++) {
+ struct command_buffer_data *cmd_buffer_data =
+ FIND(struct command_buffer_data, pSubmits[s].pCommandBuffers[c]);
+
+ /* Merge the submitted command buffer stats into the device. */
+ for (uint32_t st = 0; st < OVERLAY_PARAM_ENABLED_MAX; st++)
+ device_data->frame_stats.stats[st] += cmd_buffer_data->stats.stats[st];
+
+ /* Attach the command buffer to the queue so we remember to read its
+ * pipeline statistics & timestamps at QueuePresent().
+ */
+ if (!cmd_buffer_data->pipeline_query_pool &&
+ !cmd_buffer_data->timestamp_query_pool)
+ continue;
+
+ if (list_empty(&cmd_buffer_data->link)) {
+ list_addtail(&cmd_buffer_data->link,
+ &queue_data->running_command_buffer);
+ } else {
+ fprintf(stderr, "Command buffer submitted multiple times before present.\n"
+ "This could lead to invalid data.\n");
+ }
+ }
+ }
return device_data->vtable.QueueSubmit(queue, submitCount, pSubmits, fence);
}
-VKAPI_ATTR VkResult VKAPI_CALL overlay_CreateDevice(
+static VkResult overlay_CreateDevice(
VkPhysicalDevice physicalDevice,
const VkDeviceCreateInfo* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkDevice* pDevice)
{
- struct instance_data *instance_data = FIND_PHYSICAL_DEVICE_DATA(physicalDevice);
- VkLayerDeviceCreateInfo *chain_info = get_chain_info(pCreateInfo, VK_LAYER_LINK_INFO);
+ struct instance_data *instance_data =
+ FIND(struct instance_data, physicalDevice);
+ VkLayerDeviceCreateInfo *chain_info =
+ get_device_chain_info(pCreateInfo, VK_LAYER_LINK_INFO);
assert(chain_info->u.pLayerInfo);
PFN_vkGetInstanceProcAddr fpGetInstanceProcAddr = chain_info->u.pLayerInfo->pfnNextGetInstanceProcAddr;
// Advance the link info for the next element on the chain
chain_info->u.pLayerInfo = chain_info->u.pLayerInfo->pNext;
- VkResult result = fpCreateDevice(physicalDevice, pCreateInfo, pAllocator, pDevice);
+ VkPhysicalDeviceFeatures device_features = {};
+ VkDeviceCreateInfo device_info = *pCreateInfo;
+
+ if (pCreateInfo->pEnabledFeatures)
+ device_features = *(pCreateInfo->pEnabledFeatures);
+ if (instance_data->pipeline_statistics_enabled) {
+ device_features.inheritedQueries = true;
+ device_features.pipelineStatisticsQuery = true;
+ }
+ device_info.pEnabledFeatures = &device_features;
+
+
+ VkResult result = fpCreateDevice(physicalDevice, &device_info, pAllocator, pDevice);
if (result != VK_SUCCESS) return result;
struct device_data *device_data = new_device_data(*pDevice, instance_data);
device_data->physical_device = physicalDevice;
- layer_init_device_dispatch_table(*pDevice, &device_data->vtable, fpGetDeviceProcAddr);
+ vk_load_device_commands(*pDevice, fpGetDeviceProcAddr, &device_data->vtable);
instance_data->vtable.GetPhysicalDeviceProperties(device_data->physical_device,
&device_data->properties);
+ VkLayerDeviceCreateInfo *load_data_info =
+ get_device_chain_info(pCreateInfo, VK_LOADER_DATA_CALLBACK);
+ device_data->set_device_loader_data = load_data_info->u.pfnSetDeviceLoaderData;
+
device_map_queues(device_data, pCreateInfo);
return result;
}
-VKAPI_ATTR void VKAPI_CALL overlay_DestroyDevice(
+static void overlay_DestroyDevice(
VkDevice device,
const VkAllocationCallbacks* pAllocator)
{
- struct device_data *device_data = FIND_DEVICE_DATA(device);
+ struct device_data *device_data = FIND(struct device_data, device);
device_unmap_queues(device_data);
device_data->vtable.DestroyDevice(device, pAllocator);
destroy_device_data(device_data);
}
-VKAPI_ATTR VkResult VKAPI_CALL overlay_CreateInstance(
+static VkResult overlay_CreateInstance(
const VkInstanceCreateInfo* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkInstance* pInstance)
{
- VkLayerInstanceCreateInfo *chain_info = get_chain_info(pCreateInfo, VK_LAYER_LINK_INFO);
+ VkLayerInstanceCreateInfo *chain_info =
+ get_instance_chain_info(pCreateInfo, VK_LAYER_LINK_INFO);
assert(chain_info->u.pLayerInfo);
PFN_vkGetInstanceProcAddr fpGetInstanceProcAddr =
if (result != VK_SUCCESS) return result;
struct instance_data *instance_data = new_instance_data(*pInstance);
- layer_init_instance_dispatch_table(instance_data->instance,
- &instance_data->vtable,
- fpGetInstanceProcAddr);
+ vk_load_instance_commands(instance_data->instance,
+ fpGetInstanceProcAddr,
+ &instance_data->vtable);
instance_data_map_physical_devices(instance_data, true);
- const char *stats_config = getenv("VK_LAYER_MESA_OVERLAY_STATS");
- instance_data->enabled_stats = parse_debug_string(stats_config,
- enable_flags);
-
- if (instance_data->enabled_stats & FRAME_STAT_ENABLED(HELP)) {
- fprintf(stderr, "Available stats:\n");
- for (uint32_t i = 0; enable_flags[i].string != NULL; i++)
- fprintf(stderr, "\t%s\n", enable_flags[i].string);
- fprintf(stderr, "Position layer using VK_LAYER_MESA_OVERLAY_POSITION=\n"
- "\ttop-left\n"
- "\ttop-right\n"
- "\tbottom-left\n"
- "\tbottom-right\n");
- }
+ parse_overlay_env(&instance_data->params, getenv("VK_LAYER_MESA_OVERLAY_CONFIG"));
- instance_data->position =
- parse_layer_position(getenv("VK_LAYER_MESA_OVERLAY_POSITION"));
+ for (int i = OVERLAY_PARAM_ENABLED_vertices;
+ i <= OVERLAY_PARAM_ENABLED_compute_invocations; i++) {
+ if (instance_data->params.enabled[i]) {
+ instance_data->pipeline_statistics_enabled = true;
+ break;
+ }
+ }
return result;
}
-VKAPI_ATTR void VKAPI_CALL overlay_DestroyInstance(
+static void overlay_DestroyInstance(
VkInstance instance,
const VkAllocationCallbacks* pAllocator)
{
- struct instance_data *instance_data = FIND_INSTANCE_DATA(instance);
+ struct instance_data *instance_data = FIND(struct instance_data, instance);
instance_data_map_physical_devices(instance_data, false);
instance_data->vtable.DestroyInstance(instance, pAllocator);
destroy_instance_data(instance_data);
{ "vkGetDeviceProcAddr", (void *) vkGetDeviceProcAddr },
#define ADD_HOOK(fn) { "vk" # fn, (void *) overlay_ ## fn }
ADD_HOOK(AllocateCommandBuffers),
+ ADD_HOOK(FreeCommandBuffers),
+ ADD_HOOK(ResetCommandBuffer),
+ ADD_HOOK(BeginCommandBuffer),
+ ADD_HOOK(EndCommandBuffer),
+ ADD_HOOK(CmdExecuteCommands),
ADD_HOOK(CmdDraw),
ADD_HOOK(CmdDrawIndexed),
+ ADD_HOOK(CmdDrawIndirect),
ADD_HOOK(CmdDrawIndexedIndirect),
ADD_HOOK(CmdDispatch),
ADD_HOOK(CmdDispatchIndirect),
ADD_HOOK(AcquireNextImage2KHR),
ADD_HOOK(QueueSubmit),
- ADD_HOOK(CreateInstance),
- ADD_HOOK(DestroyInstance),
+
ADD_HOOK(CreateDevice),
ADD_HOOK(DestroyDevice),
+
+ ADD_HOOK(CreateInstance),
+ ADD_HOOK(DestroyInstance),
#undef ADD_HOOK
};
if (dev == NULL) return NULL;
- struct device_data *device_data = FIND_DEVICE_DATA(dev);
+ struct device_data *device_data = FIND(struct device_data, dev);
if (device_data->vtable.GetDeviceProcAddr == NULL) return NULL;
return device_data->vtable.GetDeviceProcAddr(dev, funcName);
}
if (instance == NULL) return NULL;
- struct instance_data *instance_data = FIND_INSTANCE_DATA(instance);
+ struct instance_data *instance_data = FIND(struct instance_data, instance);
if (instance_data->vtable.GetInstanceProcAddr == NULL) return NULL;
return instance_data->vtable.GetInstanceProcAddr(instance, funcName);
}