+ struct instance_data *instance_data = device_data->instance;
+ uint32_t query_results[OVERLAY_QUERY_COUNT];
+
+ device_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_frame]++;
+
+ if (list_length(&queue_data->running_command_buffer) > 0) {
+ /* Before getting the query results, make sure the operations have
+ * completed.
+ */
+ VK_CHECK(device_data->vtable.ResetFences(device_data->device,
+ 1, &queue_data->queries_fence));
+ VK_CHECK(device_data->vtable.QueueSubmit(queue, 0, NULL, queue_data->queries_fence));
+ VK_CHECK(device_data->vtable.WaitForFences(device_data->device,
+ 1, &queue_data->queries_fence,
+ VK_FALSE, UINT64_MAX));
+
+ /* Now get the results. */
+ list_for_each_entry_safe(struct command_buffer_data, cmd_buffer_data,
+ &queue_data->running_command_buffer, link) {
+ list_delinit(&cmd_buffer_data->link);
+
+ if (cmd_buffer_data->pipeline_query_pool) {
+ memset(query_results, 0, sizeof(query_results));
+ VK_CHECK(device_data->vtable.GetQueryPoolResults(device_data->device,
+ cmd_buffer_data->pipeline_query_pool,
+ cmd_buffer_data->query_index, 1,
+ sizeof(uint32_t) * OVERLAY_QUERY_COUNT,
+ query_results, 0, VK_QUERY_RESULT_WAIT_BIT));
+
+ for (uint32_t i = OVERLAY_PARAM_ENABLED_vertices;
+ i <= OVERLAY_PARAM_ENABLED_compute_invocations; i++) {
+ device_data->frame_stats.stats[i] += query_results[i - OVERLAY_PARAM_ENABLED_vertices];
+ }
+ }
+ if (cmd_buffer_data->timestamp_query_pool) {
+ uint64_t gpu_timestamps[2] = { 0 };
+ VK_CHECK(device_data->vtable.GetQueryPoolResults(device_data->device,
+ cmd_buffer_data->timestamp_query_pool,
+ cmd_buffer_data->query_index * 2, 2,
+ 2 * sizeof(uint64_t), gpu_timestamps, sizeof(uint64_t),
+ VK_QUERY_RESULT_WAIT_BIT | VK_QUERY_RESULT_64_BIT));
+
+ gpu_timestamps[0] &= queue_data->timestamp_mask;
+ gpu_timestamps[1] &= queue_data->timestamp_mask;
+ device_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_gpu_timing] +=
+ (gpu_timestamps[1] - gpu_timestamps[0]) *
+ device_data->properties.limits.timestampPeriod;
+ }