X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fintel%2Fvulkan%2Fanv_private.h;h=9500b41deadcc0afd39351d191a4afc7c4a86a5e;hb=4d44848c470c9d214c03906d8decd8056829c4ce;hp=28c2aa5633a9be9f8b7d382cfd86eeeea5ef02f7;hpb=bb3545a6ee419c4802ac4153eb690a93dc2f339d;p=mesa.git diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 28c2aa5633a..9500b41dead 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -79,6 +79,8 @@ struct anv_instance; struct gen_aux_map_context; struct gen_perf_config; +struct gen_perf_counter_pass; +struct gen_perf_query_result; #include #include @@ -221,6 +223,12 @@ struct gen_perf_config; */ #define ANV_PREDICATE_RESULT_REG 0x2678 /* MI_ALU_REG15 */ +/* We reserve this MI ALU register to pass around an offset computed from + * VkPerformanceQuerySubmitInfoKHR::counterPassIndex VK_KHR_performance_query. + * Other code which uses the MI ALU should leave it alone. + */ +#define ANV_PERF_QUERY_OFFSET_REG 0x2670 /* MI_ALU_REG14 */ + /* For gen12 we set the streamout buffers using 4 separate commands * (3DSTATE_SO_BUFFER_INDEX_*) instead of 3DSTATE_SO_BUFFER. However the layout * of the 3DSTATE_SO_BUFFER_INDEX_* commands is identical to that of @@ -1028,8 +1036,11 @@ struct anv_memory_heap { VkDeviceSize size; VkMemoryHeapFlags flags; - /* Driver-internal book-keeping */ - VkDeviceSize used; + /** Driver-internal book-keeping. + * + * Align it to 64 bits to make atomic operations faster on 32 bit platforms. + */ + VkDeviceSize used __attribute__ ((aligned (8))); }; struct anv_physical_device { @@ -1076,6 +1087,7 @@ struct anv_physical_device { bool use_softpin; bool always_use_bindless; + bool use_call_secondary; /** True if we can access buffers using A64 messages */ bool has_a64_buffer_access; @@ -1084,6 +1096,13 @@ struct anv_physical_device { /** True if we can use bindless access for samplers */ bool has_bindless_samplers; + /** True if we can read the GPU timestamp register + * + * When running in a virtual context, the timestamp register is unreadable + * on Gen12+. + */ + bool has_reg_timestamp; + /** True if this device has implicit AUX * * If true, CCS is handled as an implicit attachment to the BO rather than @@ -1193,6 +1212,8 @@ struct anv_queue_submit { */ uintptr_t * fence_bos; + int perf_query_pass; + const VkAllocationCallbacks * alloc; VkSystemAllocationScope alloc_scope; @@ -1223,6 +1244,8 @@ struct anv_pipeline_cache { struct hash_table * nir_cache; struct hash_table * cache; + + bool external_sync; }; struct nir_xfb_info; @@ -1230,7 +1253,8 @@ struct anv_pipeline_bind_map; void anv_pipeline_cache_init(struct anv_pipeline_cache *cache, struct anv_device *device, - bool cache_enabled); + bool cache_enabled, + bool external_sync); void anv_pipeline_cache_finish(struct anv_pipeline_cache *cache); struct anv_shader_bin * @@ -1287,6 +1311,11 @@ anv_device_upload_nir(struct anv_device *device, const struct nir_shader *nir, unsigned char sha1_key[20]); +struct anv_address { + struct anv_bo *bo; + uint32_t offset; +}; + struct anv_device { struct vk_device vk; @@ -1330,6 +1359,8 @@ struct anv_device { * For that, we use the high bytes (>= 1024) of the workaround BO. */ struct anv_bo * workaround_bo; + struct anv_address workaround_address; + struct anv_bo * trivial_batch_bo; struct anv_bo * hiz_clear_bo; struct anv_state null_surface_state; @@ -1533,8 +1564,7 @@ bool anv_gem_get_bit6_swizzle(int fd, uint32_t tiling); int anv_gem_gpu_get_reset_stats(struct anv_device *device, uint32_t *active, uint32_t *pending); int anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle); -int anv_gem_reg_read(struct anv_device *device, - uint32_t offset, uint64_t *result); +int anv_gem_reg_read(int fd, uint32_t offset, uint64_t *result); uint32_t anv_gem_fd_to_handle(struct anv_device *device, int fd); int anv_gem_set_caching(struct anv_device *device, uint32_t gem_handle, uint32_t caching); int anv_gem_set_domain(struct anv_device *device, uint32_t gem_handle, @@ -1595,6 +1625,8 @@ struct anv_batch_bo { struct anv_batch { const VkAllocationCallbacks * alloc; + struct anv_address start_addr; + void * start; void * end; void * next; @@ -1621,6 +1653,16 @@ void *anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords); void anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other); uint64_t anv_batch_emit_reloc(struct anv_batch *batch, void *location, struct anv_bo *bo, uint32_t offset); +struct anv_address anv_batch_address(struct anv_batch *batch, void *batch_location); + +static inline void +anv_batch_set_storage(struct anv_batch *batch, struct anv_address addr, + void *map, size_t size) +{ + batch->start_addr = addr; + batch->next = batch->start = map; + batch->end = map + size; +} static inline VkResult anv_batch_set_error(struct anv_batch *batch, VkResult error) @@ -1637,11 +1679,6 @@ anv_batch_has_error(struct anv_batch *batch) return batch->status != VK_SUCCESS; } -struct anv_address { - struct anv_bo *bo; - uint32_t offset; -}; - #define ANV_NULL_ADDRESS ((struct anv_address) { NULL, 0 }) static inline bool @@ -1754,6 +1791,11 @@ _anv_combine_address(struct anv_batch *batch, void *location, _dst = NULL; \ })) +/* #define __gen_get_batch_dwords anv_batch_emit_dwords */ +/* #define __gen_get_batch_address anv_batch_address */ +/* #define __gen_address_value anv_address_physical */ +/* #define __gen_address_offset anv_address_add */ + struct anv_device_memory { struct vk_object_base base; @@ -2824,6 +2866,7 @@ enum anv_cmd_buffer_exec_mode { ANV_CMD_BUFFER_EXEC_MODE_GROW_AND_EMIT, ANV_CMD_BUFFER_EXEC_MODE_CHAIN, ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN, + ANV_CMD_BUFFER_EXEC_MODE_CALL_AND_RETURN, }; struct anv_cmd_buffer { @@ -2871,8 +2914,12 @@ struct anv_cmd_buffer { VkCommandBufferUsageFlags usage_flags; VkCommandBufferLevel level; + struct anv_query_pool *perf_query_pool; + struct anv_cmd_state state; + struct anv_address return_addr; + /* Set by SetPerformanceMarkerINTEL, written into queries by CmdBeginQuery */ uint64_t intel_perf_marker; }; @@ -2892,7 +2939,8 @@ VkResult anv_cmd_buffer_execbuf(struct anv_queue *queue, const VkSemaphore *out_semaphores, const uint64_t *out_signal_values, uint32_t num_out_semaphores, - VkFence fence); + VkFence fence, + int perf_query_pass); VkResult anv_cmd_buffer_reset(struct anv_cmd_buffer *cmd_buffer); @@ -3368,10 +3416,22 @@ anv_pipeline_get_last_vue_prog_data(const struct anv_graphics_pipeline *pipeline } VkResult -anv_pipeline_init(struct anv_graphics_pipeline *pipeline, struct anv_device *device, - struct anv_pipeline_cache *cache, - const VkGraphicsPipelineCreateInfo *pCreateInfo, - const VkAllocationCallbacks *alloc); +anv_pipeline_init(struct anv_pipeline *pipeline, + struct anv_device *device, + enum anv_pipeline_type type, + VkPipelineCreateFlags flags, + const VkAllocationCallbacks *pAllocator); + +void +anv_pipeline_finish(struct anv_pipeline *pipeline, + struct anv_device *device, + const VkAllocationCallbacks *pAllocator); + +VkResult +anv_graphics_pipeline_init(struct anv_graphics_pipeline *pipeline, struct anv_device *device, + struct anv_pipeline_cache *cache, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const VkAllocationCallbacks *alloc); VkResult anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline, @@ -3381,11 +3441,14 @@ anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline, const char *entrypoint, const VkSpecializationInfo *spec_info); -uint32_t -anv_cs_workgroup_size(const struct anv_compute_pipeline *pipeline); +struct anv_cs_parameters { + uint32_t group_size; + uint32_t simd_size; + uint32_t threads; +}; -uint32_t -anv_cs_threads(const struct anv_compute_pipeline *pipeline); +struct anv_cs_parameters +anv_cs_parameters(const struct anv_compute_pipeline *pipeline); struct anv_format_plane { enum isl_format isl_format:16; @@ -4221,6 +4284,9 @@ struct anv_render_pass { #define ANV_PIPELINE_STATISTICS_MASK 0x000007ff +#define OA_SNAPSHOT_SIZE (256) +#define ANV_KHR_PERF_QUERY_SIZE (ALIGN(sizeof(uint64_t), 64) + 2 * OA_SNAPSHOT_SIZE) + struct anv_query_pool { struct vk_object_base base; @@ -4231,8 +4297,21 @@ struct anv_query_pool { /** Number of slots in this query pool */ uint32_t slots; struct anv_bo * bo; + + /* Perf queries : */ + struct anv_bo reset_bo; + uint32_t n_counters; + struct gen_perf_counter_pass *counter_pass; + uint32_t n_passes; + struct gen_perf_query_info **pass_query; }; +static inline uint32_t khr_perf_query_preamble_offset(struct anv_query_pool *pool, + uint32_t pass) +{ + return pass * ANV_KHR_PERF_QUERY_SIZE + 8; +} + int anv_get_instance_entrypoint_index(const char *name); int anv_get_device_entrypoint_index(const char *name); int anv_get_physical_device_entrypoint_index(const char *name); @@ -4252,6 +4331,8 @@ anv_device_entrypoint_is_enabled(int index, uint32_t core_version, const struct anv_instance_extension_table *instance, const struct anv_device_extension_table *device); +void *anv_resolve_device_entrypoint(const struct gen_device_info *devinfo, + uint32_t index); void *anv_lookup_entrypoint(const struct gen_device_info *devinfo, const char *name); @@ -4286,6 +4367,10 @@ anv_get_subpass_id(const struct anv_cmd_state * const cmd_state) struct gen_perf_config *anv_get_perf(const struct gen_device_info *devinfo, int fd); void anv_device_perf_init(struct anv_device *device); +void anv_perf_write_pass_results(struct gen_perf_config *perf, + struct anv_query_pool *pool, uint32_t pass, + const struct gen_perf_query_result *accumulated_results, + union VkPerformanceCounterResultKHR *results); #define ANV_FROM_HANDLE(__anv_type, __name, __handle) \ VK_FROM_HANDLE(__anv_type, __name, __handle)