X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Famd%2Fvulkan%2Fradv_private.h;h=6b54a8ad60011d04923494e156b45fe40dcf07bd;hb=b1ef1c1211373f9336aaf87de865ef7f0c29fc44;hp=599c24b1e0f69ff741d70fe687ec077dc6130645;hpb=7bbf497b68430934ab34316823e0f59ee34f9a4f;p=mesa.git diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 599c24b1e0f..6b54a8ad600 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -49,7 +49,6 @@ #include "util/macros.h" #include "util/list.h" #include "util/xmlconfig.h" -#include "main/macros.h" #include "vk_alloc.h" #include "vk_debug_report.h" @@ -65,8 +64,6 @@ #include "radv_extensions.h" #include "sid.h" -#include - /* Pre-declarations needed for WSI entrypoints */ struct wl_surface; struct wl_display; @@ -109,25 +106,6 @@ struct gfx10_format { #include "gfx10_format_table.h" -enum radv_mem_heap { - RADV_MEM_HEAP_VRAM, - RADV_MEM_HEAP_VRAM_CPU_ACCESS, - RADV_MEM_HEAP_GTT, - RADV_MEM_HEAP_COUNT -}; - -enum radv_mem_type { - RADV_MEM_TYPE_VRAM, - RADV_MEM_TYPE_GTT_WRITE_COMBINE, - RADV_MEM_TYPE_VRAM_CPU_ACCESS, - RADV_MEM_TYPE_GTT_CACHED, - RADV_MEM_TYPE_VRAM_UNCACHED, - RADV_MEM_TYPE_GTT_WRITE_COMBINE_VRAM_UNCACHED, - RADV_MEM_TYPE_VRAM_CPU_ACCESS_UNCACHED, - RADV_MEM_TYPE_GTT_CACHED_VRAM_UNCACHED, - RADV_MEM_TYPE_COUNT -}; - enum radv_secure_compile_type { RADV_SC_TYPE_INIT_SUCCESS, RADV_SC_TYPE_INIT_FAILURE, @@ -285,18 +263,30 @@ void radv_logi_v(const char *format, va_list va); return; \ } while (0) -void *radv_lookup_entrypoint_unchecked(const char *name); -void *radv_lookup_entrypoint_checked(const char *name, - uint32_t core_version, - const struct radv_instance_extension_table *instance, - const struct radv_device_extension_table *device); -void *radv_lookup_physical_device_entrypoint_checked(const char *name, - uint32_t core_version, - const struct radv_instance_extension_table *instance); +int radv_get_instance_entrypoint_index(const char *name); +int radv_get_device_entrypoint_index(const char *name); +int radv_get_physical_device_entrypoint_index(const char *name); + +const char *radv_get_instance_entry_name(int index); +const char *radv_get_physical_device_entry_name(int index); +const char *radv_get_device_entry_name(int index); + +bool radv_instance_entrypoint_is_enabled(int index, uint32_t core_version, + const struct radv_instance_extension_table *instance); +bool radv_physical_device_entrypoint_is_enabled(int index, uint32_t core_version, + const struct radv_instance_extension_table *instance); +bool radv_device_entrypoint_is_enabled(int index, uint32_t core_version, + const struct radv_instance_extension_table *instance, + const struct radv_device_extension_table *device); + +void *radv_lookup_entrypoint(const char *name); struct radv_physical_device { VK_LOADER_DATA _loader_data; + /* Link in radv_instance::physical_devices */ + struct list_head link; + struct radv_instance * instance; struct radeon_winsys *ws; @@ -321,6 +311,9 @@ struct radv_physical_device { /* Whether to enable NGG. */ bool use_ngg; + /* Whether to enable NGG GS. */ + bool use_ngg_gs; + /* Whether to enable NGG streamout. */ bool use_ngg_streamout; @@ -338,7 +331,8 @@ struct radv_physical_device { struct disk_cache * disk_cache; VkPhysicalDeviceMemoryProperties memory_properties; - enum radv_mem_type mem_type_indices[RADV_MEM_TYPE_COUNT]; + enum radeon_bo_domain memory_domains[VK_MAX_MEMORY_TYPES]; + enum radeon_bo_flag memory_flags[VK_MAX_MEMORY_TYPES]; drmPciBusInfo bus_info; @@ -351,8 +345,6 @@ struct radv_instance { VkAllocationCallbacks alloc; uint32_t apiVersion; - int physicalDeviceCount; - struct radv_physical_device physicalDevices[RADV_MAX_DRM_DEVICES]; char * engineName; uint32_t engineVersion; @@ -364,6 +356,12 @@ struct radv_instance { struct vk_debug_report_instance debug_report_callbacks; struct radv_instance_extension_table enabled_extensions; + struct radv_instance_dispatch_table dispatch; + struct radv_physical_device_dispatch_table physical_device_dispatch; + struct radv_device_dispatch_table device_dispatch; + + bool physical_devices_enumerated; + struct list_head physical_devices; struct driOptionCache dri_options; struct driOptionCache available_dri_options; @@ -721,6 +719,7 @@ struct radv_queue { uint32_t gsvs_ring_size; bool has_tess_rings; bool has_gds; + bool has_gds_oa; bool has_sample_positions; struct radeon_winsys_bo *scratch_bo; @@ -745,6 +744,11 @@ struct radv_bo_list { pthread_mutex_t mutex; }; +VkResult radv_bo_list_add(struct radv_device *device, + struct radeon_winsys_bo *bo); +void radv_bo_list_remove(struct radv_device *device, + struct radeon_winsys_bo *bo); + struct radv_secure_compile_process { /* Secure process file descriptors. Used to communicate between the * user facing device and the idle forked device used to fork a clean @@ -834,6 +838,7 @@ struct radv_device { uint64_t dmesg_timestamp; struct radv_device_extension_table enabled_extensions; + struct radv_device_dispatch_table dispatch; /* Whether the app has enabled the robustBufferAccess feature. */ bool robust_buffer_access; @@ -851,6 +856,19 @@ struct radv_device { /* Condition variable for legacy timelines, to notify waiters when a * new point gets submitted. */ pthread_cond_t timeline_cond; + + /* Thread trace. */ + struct radeon_cmdbuf *thread_trace_start_cs[2]; + struct radeon_cmdbuf *thread_trace_stop_cs[2]; + struct radeon_winsys_bo *thread_trace_bo; + void *thread_trace_ptr; + uint32_t thread_trace_buffer_size; + int thread_trace_start_frame; + + /* Overallocation. */ + bool overallocation_disallowed; + uint64_t allocated_memory_size[VK_MAX_MEMORY_HEAPS]; + mtx_t overallocation_mutex; }; struct radv_device_memory { @@ -858,8 +876,8 @@ struct radv_device_memory { /* for dedicated allocations */ struct radv_image *image; struct radv_buffer *buffer; - uint32_t type_index; - VkDeviceSize map_size; + uint32_t heap_index; + uint64_t alloc_size; void * map; void * user_ptr; @@ -877,6 +895,7 @@ struct radv_descriptor_range { struct radv_descriptor_set { const struct radv_descriptor_set_layout *layout; uint32_t size; + uint32_t buffer_count; struct radeon_winsys_bo *bo; uint64_t va; @@ -970,7 +989,8 @@ enum radv_dynamic_state_bits { RADV_DYNAMIC_STENCIL_REFERENCE = 1 << 8, RADV_DYNAMIC_DISCARD_RECTANGLE = 1 << 9, RADV_DYNAMIC_SAMPLE_LOCATIONS = 1 << 10, - RADV_DYNAMIC_ALL = (1 << 11) - 1, + RADV_DYNAMIC_LINE_STIPPLE = 1 << 11, + RADV_DYNAMIC_ALL = (1 << 12) - 1, }; enum radv_cmd_dirty_bits { @@ -987,12 +1007,13 @@ enum radv_cmd_dirty_bits { RADV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE = 1 << 8, RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE = 1 << 9, RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS = 1 << 10, - RADV_CMD_DIRTY_DYNAMIC_ALL = (1 << 11) - 1, - RADV_CMD_DIRTY_PIPELINE = 1 << 11, - RADV_CMD_DIRTY_INDEX_BUFFER = 1 << 12, - RADV_CMD_DIRTY_FRAMEBUFFER = 1 << 13, - RADV_CMD_DIRTY_VERTEX_BUFFER = 1 << 14, - RADV_CMD_DIRTY_STREAMOUT_BUFFER = 1 << 15, + RADV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE = 1 << 11, + RADV_CMD_DIRTY_DYNAMIC_ALL = (1 << 12) - 1, + RADV_CMD_DIRTY_PIPELINE = 1 << 12, + RADV_CMD_DIRTY_INDEX_BUFFER = 1 << 13, + RADV_CMD_DIRTY_FRAMEBUFFER = 1 << 14, + RADV_CMD_DIRTY_VERTEX_BUFFER = 1 << 15, + RADV_CMD_DIRTY_STREAMOUT_BUFFER = 1 << 16, }; enum radv_cmd_flush_bits { @@ -1124,6 +1145,11 @@ struct radv_dynamic_state { struct radv_discard_rectangle_state discard_rectangle; struct radv_sample_locations_state sample_location; + + struct { + uint32_t factor; + uint16_t pattern; + } line_stipple; }; extern const struct radv_dynamic_state default_dynamic_state; @@ -1259,6 +1285,7 @@ struct radv_cmd_state { unsigned active_occlusion_queries; bool perfect_occlusion_queries_enabled; unsigned active_pipeline_queries; + unsigned active_pipeline_gds_queries; float offset_scale; uint32_t trace_id; uint32_t last_ia_multi_vgt_param; @@ -1267,6 +1294,10 @@ struct radv_cmd_state { uint32_t last_first_instance; uint32_t last_vertex_offset; + uint32_t last_sx_ps_downconvert; + uint32_t last_sx_blend_opt_epsilon; + uint32_t last_sx_blend_opt_control; + /* Whether CP DMA is busy/idle. */ bool dma_is_busy; @@ -1274,7 +1305,15 @@ struct radv_cmd_state { int predication_type; /* -1: disabled, 0: normal, 1: inverted */ uint64_t predication_va; + /* Inheritance info. */ + VkQueryPipelineStatisticFlags inherited_pipeline_statistics; + bool context_roll_without_scissor_emitted; + + /* SQTT related state. */ + uint32_t current_event_type; + uint32_t num_events; + uint32_t num_layout_transitions; }; struct radv_cmd_pool { @@ -1321,7 +1360,7 @@ struct radv_cmd_buffer { VkShaderStageFlags push_constant_stages; struct radv_descriptor_set meta_push_descriptors; - struct radv_descriptor_state descriptors[VK_PIPELINE_BIND_POINT_RANGE_SIZE]; + struct radv_descriptor_state descriptors[MAX_BIND_POINTS]; struct radv_cmd_buffer_upload upload; @@ -1332,7 +1371,8 @@ struct radv_cmd_buffer { uint32_t esgs_ring_size_needed; uint32_t gsvs_ring_size_needed; bool tess_rings_needed; - bool gds_needed; /* for GFX10 streamout */ + bool gds_needed; /* for GFX10 streamout and NGG GS queries */ + bool gds_oa_needed; /* for GFX10 streamout */ bool sample_positions_needed; VkResult record_result; @@ -1357,7 +1397,7 @@ struct radv_image_view; bool radv_cmd_buffer_uses_mec(struct radv_cmd_buffer *cmd_buffer); -void si_emit_graphics(struct radv_physical_device *physical_device, +void si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs); void si_emit_compute(struct radv_physical_device *physical_device, struct radeon_cmdbuf *cs); @@ -1422,11 +1462,11 @@ void radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer *cmd_buffer); void radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer); void radv_depth_stencil_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer, VkImageAspectFlags aspects, - VkResolveModeFlagBitsKHR resolve_mode); + VkResolveModeFlagBits resolve_mode); void radv_cmd_buffer_resolve_subpass_fs(struct radv_cmd_buffer *cmd_buffer); void radv_depth_stencil_resolve_subpass_fs(struct radv_cmd_buffer *cmd_buffer, VkImageAspectFlags aspects, - VkResolveModeFlagBitsKHR resolve_mode); + VkResolveModeFlagBits resolve_mode); void radv_emit_default_sample_locations(struct radeon_cmdbuf *cs, int nr_samples); unsigned radv_get_default_max_sample_dist(int log_samples); void radv_device_init_msaa(struct radv_device *device); @@ -1520,13 +1560,11 @@ struct radv_event { struct radv_shader_module; -#define RADV_HASH_SHADER_IS_GEOM_COPY_SHADER (1 << 0) -#define RADV_HASH_SHADER_SISCHED (1 << 1) -#define RADV_HASH_SHADER_NO_NGG (1 << 2) -#define RADV_HASH_SHADER_CS_WAVE32 (1 << 3) -#define RADV_HASH_SHADER_PS_WAVE32 (1 << 4) -#define RADV_HASH_SHADER_GE_WAVE32 (1 << 5) -#define RADV_HASH_SHADER_ACO (1 << 6) +#define RADV_HASH_SHADER_NO_NGG (1 << 0) +#define RADV_HASH_SHADER_CS_WAVE32 (1 << 1) +#define RADV_HASH_SHADER_PS_WAVE32 (1 << 2) +#define RADV_HASH_SHADER_GE_WAVE32 (1 << 3) +#define RADV_HASH_SHADER_ACO (1 << 4) void radv_hash_shaders(unsigned char *hash, @@ -1630,6 +1668,7 @@ struct radv_pipeline { bool can_use_guardband; uint32_t needed_dynamic_state; bool disable_out_of_order_rast_for_occlusion; + uint8_t topology; /* Used for rbplus */ uint32_t col_format; @@ -1656,6 +1695,8 @@ static inline bool radv_pipeline_has_tess(const struct radv_pipeline *pipeline) bool radv_pipeline_has_ngg(const struct radv_pipeline *pipeline); +bool radv_pipeline_has_ngg_passthrough(const struct radv_pipeline *pipeline); + bool radv_pipeline_has_gs_copy_shader(const struct radv_pipeline *pipeline); struct radv_userdata_info *radv_lookup_user_sgpr(struct radv_pipeline *pipeline, @@ -1671,9 +1712,9 @@ struct radv_graphics_pipeline_create_info { bool db_stencil_clear; bool db_depth_disable_expclear; bool db_stencil_disable_expclear; - bool db_flush_depth_inplace; - bool db_flush_stencil_inplace; - bool db_resummarize; + bool depth_compress_disable; + bool stencil_compress_disable; + bool resummarize_enable; uint32_t custom_blend_mode; }; @@ -1685,6 +1726,15 @@ radv_graphics_pipeline_create(VkDevice device, const VkAllocationCallbacks *alloc, VkPipeline *pPipeline); +struct radv_binning_settings { + unsigned context_states_per_bin; /* allowed range: [1, 6] */ + unsigned persistent_states_per_bin; /* allowed range: [1, 32] */ + unsigned fpovs_per_batch; /* allowed range: [0, 255], 0 = unlimited */ +}; + +struct radv_binning_settings +radv_get_binning_settings(const struct radv_physical_device *pdev); + struct vk_format_description; uint32_t radv_translate_buffer_dataformat(const struct vk_format_description *desc, int first_non_void); @@ -1766,13 +1816,6 @@ struct radv_image { struct radv_image_plane planes[0]; }; -/* Whether the image has a htile that is known consistent with the contents of - * the image. */ -bool radv_layout_has_htile(const struct radv_image *image, - VkImageLayout layout, - bool in_render_loop, - unsigned queue_mask); - /* Whether the image has a htile that is known consistent with the contents of * the image and is allowed to be in compressed form. * @@ -2144,8 +2187,8 @@ struct radv_subpass { struct radv_subpass_attachment * resolve_attachments; struct radv_subpass_attachment * depth_stencil_attachment; struct radv_subpass_attachment * ds_resolve_attachment; - VkResolveModeFlagBitsKHR depth_resolve_mode; - VkResolveModeFlagBitsKHR stencil_resolve_mode; + VkResolveModeFlagBits depth_resolve_mode; + VkResolveModeFlagBits stencil_resolve_mode; /** Subpass has at least one color resolve attachment */ bool has_color_resolve; @@ -2260,6 +2303,9 @@ struct radv_semaphore { struct radv_semaphore_part temporary; }; +bool radv_queue_internal_submit(struct radv_queue *queue, + struct radeon_cmdbuf *cs); + void radv_set_descriptor_set(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point, struct radv_descriptor_set *set, @@ -2307,16 +2353,11 @@ struct radv_fence { /* radv_nir_to_llvm.c */ struct radv_shader_args; -void radv_compile_gs_copy_shader(struct ac_llvm_compiler *ac_llvm, - struct nir_shader *geom_shader, - struct radv_shader_binary **rbinary, - const struct radv_shader_args *args); - -void radv_compile_nir_shader(struct ac_llvm_compiler *ac_llvm, - struct radv_shader_binary **rbinary, - const struct radv_shader_args *args, - struct nir_shader *const *nir, - int nir_count); +void llvm_compile_shader(struct radv_device *device, + unsigned shader_count, + struct nir_shader *const *shaders, + struct radv_shader_binary **binary, + struct radv_shader_args *args); unsigned radv_nir_get_max_workgroup_size(enum chip_class chip_class, gl_shader_stage stage, @@ -2329,10 +2370,101 @@ struct radv_shader_variant_key; void radv_nir_shader_info_pass(const struct nir_shader *nir, const struct radv_pipeline_layout *layout, const struct radv_shader_variant_key *key, - struct radv_shader_info *info); + struct radv_shader_info *info, + bool use_aco); void radv_nir_shader_info_init(struct radv_shader_info *info); +/* radv_sqtt.c */ +struct radv_thread_trace_info { + uint32_t cur_offset; + uint32_t trace_status; + union { + uint32_t gfx9_write_counter; + uint32_t gfx10_dropped_cntr; + }; +}; + +struct radv_thread_trace_se { + struct radv_thread_trace_info info; + void *data_ptr; + uint32_t shader_engine; + uint32_t compute_unit; +}; + +struct radv_thread_trace { + uint32_t num_traces; + struct radv_thread_trace_se traces[4]; +}; + +bool radv_thread_trace_init(struct radv_device *device); +void radv_thread_trace_finish(struct radv_device *device); +bool radv_begin_thread_trace(struct radv_queue *queue); +bool radv_end_thread_trace(struct radv_queue *queue); +bool radv_get_thread_trace(struct radv_queue *queue, + struct radv_thread_trace *thread_trace); +void radv_emit_thread_trace_userdata(struct radeon_cmdbuf *cs, + const void *data, uint32_t num_dwords); + +/* radv_rgp.c */ +int radv_dump_thread_trace(struct radv_device *device, + const struct radv_thread_trace *trace); + +/* radv_sqtt_layer_.c */ +struct radv_barrier_data { + union { + struct { + uint16_t depth_stencil_expand : 1; + uint16_t htile_hiz_range_expand : 1; + uint16_t depth_stencil_resummarize : 1; + uint16_t dcc_decompress : 1; + uint16_t fmask_decompress : 1; + uint16_t fast_clear_eliminate : 1; + uint16_t fmask_color_expand : 1; + uint16_t init_mask_ram : 1; + uint16_t reserved : 8; + }; + uint16_t all; + } layout_transitions; +}; + +/** + * Value for the reason field of an RGP barrier start marker originating from + * the Vulkan client (does not include PAL-defined values). (Table 15) + */ +enum rgp_barrier_reason { + RGP_BARRIER_UNKNOWN_REASON = 0xFFFFFFFF, + + /* External app-generated barrier reasons, i.e. API synchronization + * commands Range of valid values: [0x00000001 ... 0x7FFFFFFF]. + */ + RGP_BARRIER_EXTERNAL_CMD_PIPELINE_BARRIER = 0x00000001, + RGP_BARRIER_EXTERNAL_RENDER_PASS_SYNC = 0x00000002, + RGP_BARRIER_EXTERNAL_CMD_WAIT_EVENTS = 0x00000003, + + /* Internal barrier reasons, i.e. implicit synchronization inserted by + * the Vulkan driver Range of valid values: [0xC0000000 ... 0xFFFFFFFE]. + */ + RGP_BARRIER_INTERNAL_BASE = 0xC0000000, + RGP_BARRIER_INTERNAL_PRE_RESET_QUERY_POOL_SYNC = RGP_BARRIER_INTERNAL_BASE + 0, + RGP_BARRIER_INTERNAL_POST_RESET_QUERY_POOL_SYNC = RGP_BARRIER_INTERNAL_BASE + 1, + RGP_BARRIER_INTERNAL_GPU_EVENT_RECYCLE_STALL = RGP_BARRIER_INTERNAL_BASE + 2, + RGP_BARRIER_INTERNAL_PRE_COPY_QUERY_POOL_RESULTS_SYNC = RGP_BARRIER_INTERNAL_BASE + 3 +}; + +void radv_describe_begin_cmd_buffer(struct radv_cmd_buffer *cmd_buffer); +void radv_describe_end_cmd_buffer(struct radv_cmd_buffer *cmd_buffer); +void radv_describe_draw(struct radv_cmd_buffer *cmd_buffer); +void radv_describe_dispatch(struct radv_cmd_buffer *cmd_buffer, int x, int y, int z); +void radv_describe_begin_render_pass_clear(struct radv_cmd_buffer *cmd_buffer, + VkImageAspectFlagBits aspects); +void radv_describe_end_render_pass_clear(struct radv_cmd_buffer *cmd_buffer); +void radv_describe_barrier_start(struct radv_cmd_buffer *cmd_buffer, + enum rgp_barrier_reason reason); +void radv_describe_barrier_end(struct radv_cmd_buffer *cmd_buffer); +void radv_describe_layout_transition(struct radv_cmd_buffer *cmd_buffer, + const struct radv_barrier_data *barrier); + struct radeon_winsys_sem; uint64_t radv_get_current_time(void); @@ -2361,6 +2493,10 @@ si_conv_gl_prim_to_vertices(unsigned gl_prim) } } +void radv_cmd_buffer_begin_render_pass(struct radv_cmd_buffer *cmd_buffer, + const VkRenderPassBeginInfo *pRenderPassBegin); +void radv_cmd_buffer_end_render_pass(struct radv_cmd_buffer *cmd_buffer); + #define RADV_DEFINE_HANDLE_CASTS(__radv_type, __VkType) \ \ static inline struct __radv_type * \