struct gen_aux_map_context;
struct gen_perf_config;
+struct gen_perf_counter_pass;
+struct gen_perf_query_result;
#include <vulkan/vulkan.h>
#include <vulkan/vulkan_intel.h>
*/
#define ANV_PREDICATE_RESULT_REG 0x2678 /* MI_ALU_REG15 */
+/* We reserve this MI ALU register to pass around an offset computed from
+ * VkPerformanceQuerySubmitInfoKHR::counterPassIndex VK_KHR_performance_query.
+ * Other code which uses the MI ALU should leave it alone.
+ */
+#define ANV_PERF_QUERY_OFFSET_REG 0x2670 /* MI_ALU_REG14 */
+
/* For gen12 we set the streamout buffers using 4 separate commands
* (3DSTATE_SO_BUFFER_INDEX_*) instead of 3DSTATE_SO_BUFFER. However the layout
* of the 3DSTATE_SO_BUFFER_INDEX_* commands is identical to that of
*/
uintptr_t * fence_bos;
+ int perf_query_pass;
+
const VkAllocationCallbacks * alloc;
VkSystemAllocationScope alloc_scope;
_dst = NULL; \
}))
+/* #define __gen_get_batch_dwords anv_batch_emit_dwords */
+/* #define __gen_get_batch_address anv_batch_address */
+/* #define __gen_address_value anv_address_physical */
+/* #define __gen_address_offset anv_address_add */
+
struct anv_device_memory {
struct vk_object_base base;
VkCommandBufferUsageFlags usage_flags;
VkCommandBufferLevel level;
+ struct anv_query_pool *perf_query_pool;
+
struct anv_cmd_state state;
struct anv_address return_addr;
const VkSemaphore *out_semaphores,
const uint64_t *out_signal_values,
uint32_t num_out_semaphores,
- VkFence fence);
+ VkFence fence,
+ int perf_query_pass);
VkResult anv_cmd_buffer_reset(struct anv_cmd_buffer *cmd_buffer);
#define ANV_PIPELINE_STATISTICS_MASK 0x000007ff
+#define OA_SNAPSHOT_SIZE (256)
+#define ANV_KHR_PERF_QUERY_SIZE (ALIGN(sizeof(uint64_t), 64) + 2 * OA_SNAPSHOT_SIZE)
+
struct anv_query_pool {
struct vk_object_base base;
/** Number of slots in this query pool */
uint32_t slots;
struct anv_bo * bo;
+
+ /* Perf queries : */
+ struct anv_bo reset_bo;
+ uint32_t n_counters;
+ struct gen_perf_counter_pass *counter_pass;
+ uint32_t n_passes;
+ struct gen_perf_query_info **pass_query;
};
+static inline uint32_t khr_perf_query_preamble_offset(struct anv_query_pool *pool,
+ uint32_t pass)
+{
+ return pass * ANV_KHR_PERF_QUERY_SIZE + 8;
+}
+
int anv_get_instance_entrypoint_index(const char *name);
int anv_get_device_entrypoint_index(const char *name);
int anv_get_physical_device_entrypoint_index(const char *name);
struct gen_perf_config *anv_get_perf(const struct gen_device_info *devinfo, int fd);
void anv_device_perf_init(struct anv_device *device);
+void anv_perf_write_pass_results(struct gen_perf_config *perf,
+ struct anv_query_pool *pool, uint32_t pass,
+ const struct gen_perf_query_result *accumulated_results,
+ union VkPerformanceCounterResultKHR *results);
#define ANV_FROM_HANDLE(__anv_type, __name, __handle) \
VK_FROM_HANDLE(__anv_type, __name, __handle)