X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Famd%2Fvulkan%2Fradv_private.h;h=bdd73e91a7e04d78dcd980ae8d67699bf160380d;hb=ccce8f591523236423af6a90afc22674993e2d99;hp=df335b43d8d9137488438d9acb2e52a8cc98b288;hpb=7a57c827675f3bceecd3b59968e9e5b37dcafcef;p=mesa.git diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index df335b43d8d..bdd73e91a7e 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -48,6 +48,7 @@ #include "compiler/shader_enums.h" #include "util/macros.h" #include "util/list.h" +#include "util/xmlconfig.h" #include "main/macros.h" #include "vk_alloc.h" #include "vk_debug_report.h" @@ -58,6 +59,7 @@ #include "ac_gpu_info.h" #include "ac_surface.h" #include "ac_llvm_build.h" +#include "ac_llvm_util.h" #include "radv_descriptor_set.h" #include "radv_extensions.h" #include "radv_cs.h" @@ -81,6 +83,17 @@ typedef uint32_t xcb_window_t; #include "wsi_common.h" #include "wsi_common_display.h" +struct gfx10_format { + unsigned img_format:9; + + /* Various formats are only supported with workarounds for vertex fetch, + * and some 32_32_32 formats are supported natively, but only for buffers + * (possibly with some image support, actually, but no filtering). */ + bool buffers_only:1; +}; + +#include "gfx10_format_table.h" + #define ATI_VENDOR_ID 0x1002 #define MAX_VBS 32 @@ -89,15 +102,21 @@ typedef uint32_t xcb_window_t; #define MAX_VIEWPORTS 16 #define MAX_SCISSORS 16 #define MAX_DISCARD_RECTANGLES 4 +#define MAX_SAMPLE_LOCATIONS 32 #define MAX_PUSH_CONSTANTS_SIZE 128 #define MAX_PUSH_DESCRIPTORS 32 #define MAX_DYNAMIC_UNIFORM_BUFFERS 16 #define MAX_DYNAMIC_STORAGE_BUFFERS 8 #define MAX_DYNAMIC_BUFFERS (MAX_DYNAMIC_UNIFORM_BUFFERS + MAX_DYNAMIC_STORAGE_BUFFERS) #define MAX_SAMPLES_LOG2 4 -#define NUM_META_FS_KEYS 13 +#define NUM_META_FS_KEYS 12 #define RADV_MAX_DRM_DEVICES 8 #define MAX_VIEWS 8 +#define MAX_SO_STREAMS 4 +#define MAX_SO_BUFFERS 4 +#define MAX_SO_OUTPUTS 64 +#define MAX_INLINE_UNIFORM_BLOCK_SIZE (4ull * 1024 * 1024) +#define MAX_INLINE_UNIFORM_BLOCK_COUNT 64 #define NUM_DEPTH_CLEAR_PIPELINES 3 @@ -107,6 +126,8 @@ typedef uint32_t xcb_window_t; */ #define RADV_BUFFER_OPS_CS_THRESHOLD 4096 +#define RADV_BUFFER_UPDATE_THRESHOLD 1024 + enum radv_mem_heap { RADV_MEM_HEAP_VRAM, RADV_MEM_HEAP_VRAM_CPU_ACCESS, @@ -271,6 +292,9 @@ void *radv_lookup_entrypoint_checked(const char *name, uint32_t core_version, const struct radv_instance_extension_table *instance, const struct radv_device_extension_table *device); +void *radv_lookup_physical_device_entrypoint_checked(const char *name, + uint32_t core_version, + const struct radv_instance_extension_table *instance); struct radv_physical_device { VK_LOADER_DATA _loader_data; @@ -279,7 +303,6 @@ struct radv_physical_device { struct radeon_winsys *ws; struct radeon_info rad_info; - char path[20]; char name[VK_MAX_PHYSICAL_DEVICE_NAME_SIZE]; uint8_t driver_uuid[VK_UUID_SIZE]; uint8_t device_uuid[VK_UUID_SIZE]; @@ -301,6 +324,15 @@ struct radv_physical_device { /* Whether DCC should be enabled for MSAA textures. */ bool dcc_msaa_allowed; + /* Whether LOAD_CONTEXT_REG packets are supported. */ + bool has_load_ctx_reg_pkt; + + /* Whether to enable the AMD_shader_ballot extension */ + bool use_shader_ballot; + + /* Whether DISABLE_CONSTANT_ENCODE_REG is supported. */ + bool has_dcc_constant_encode; + /* This is the drivers on-disk cache used as a fallback as opposed to * the pipeline cache defined by apps. */ @@ -309,6 +341,8 @@ struct radv_physical_device { VkPhysicalDeviceMemoryProperties memory_properties; enum radv_mem_type mem_type_indices[RADV_MEM_TYPE_COUNT]; + drmPciBusInfo bus_info; + struct radv_device_extension_table supported_extensions; }; @@ -327,6 +361,9 @@ struct radv_instance { struct vk_debug_report_instance debug_report_callbacks; struct radv_instance_extension_table enabled_extensions; + + struct driOptionCache dri_options; + struct driOptionCache available_dri_options; }; VkResult radv_init_wsi(struct radv_physical_device *physical_device); @@ -355,7 +392,12 @@ struct radv_pipeline_cache { struct radv_pipeline_key { uint32_t instance_rate_inputs; uint32_t instance_rate_divisors[MAX_VERTEX_ATTRIBS]; + uint8_t vertex_attribute_formats[MAX_VERTEX_ATTRIBS]; + uint32_t vertex_attribute_bindings[MAX_VERTEX_ATTRIBS]; + uint32_t vertex_attribute_offsets[MAX_VERTEX_ATTRIBS]; + uint32_t vertex_attribute_strides[MAX_VERTEX_ATTRIBS]; uint64_t vertex_alpha_adjust; + uint32_t vertex_post_shuffle; unsigned tess_input_vertices; uint32_t col_format; uint32_t is_int8; @@ -366,30 +408,31 @@ struct radv_pipeline_key { uint32_t optimisations_disabled : 1; }; +struct radv_shader_binary; +struct radv_shader_variant; + void radv_pipeline_cache_init(struct radv_pipeline_cache *cache, struct radv_device *device); void radv_pipeline_cache_finish(struct radv_pipeline_cache *cache); -void +bool radv_pipeline_cache_load(struct radv_pipeline_cache *cache, const void *data, size_t size); -struct radv_shader_variant; - bool radv_create_shader_variants_from_pipeline_cache(struct radv_device *device, struct radv_pipeline_cache *cache, const unsigned char *sha1, - struct radv_shader_variant **variants); + struct radv_shader_variant **variants, + bool *found_in_application_cache); void radv_pipeline_cache_insert_shaders(struct radv_device *device, struct radv_pipeline_cache *cache, const unsigned char *sha1, struct radv_shader_variant **variants, - const void *const *codes, - const unsigned *code_sizes); + struct radv_shader_binary *const *binaries); enum radv_blit_ds_layout { RADV_BLIT_DS_LAYOUT_TILE_ENABLE, @@ -428,6 +471,12 @@ struct radv_meta_state { struct radv_pipeline_cache cache; + /* + * For on-demand pipeline creation, makes sure that + * only one thread tries to build a pipeline at the same time. + */ + mtx_t mtx; + /** * Use array element `i` for images with `2^i` samples. */ @@ -443,6 +492,12 @@ struct radv_meta_state { VkPipelineLayout clear_color_p_layout; VkPipelineLayout clear_depth_p_layout; + + /* Optimized compute fast HTILE clear for stencil or depth only. */ + VkPipeline clear_htile_mask_pipeline; + VkPipelineLayout clear_htile_mask_p_layout; + VkDescriptorSetLayout clear_htile_mask_ds_layout; + struct { VkRenderPass render_pass[NUM_META_FS_KEYS][RADV_META_DST_LAYOUT_COUNT]; @@ -494,18 +549,33 @@ struct radv_meta_state { VkPipeline pipeline; VkPipeline pipeline_3d; } btoi; + struct { + VkPipelineLayout img_p_layout; + VkDescriptorSetLayout img_ds_layout; + VkPipeline pipeline; + } btoi_r32g32b32; struct { VkPipelineLayout img_p_layout; VkDescriptorSetLayout img_ds_layout; VkPipeline pipeline; VkPipeline pipeline_3d; } itoi; + struct { + VkPipelineLayout img_p_layout; + VkDescriptorSetLayout img_ds_layout; + VkPipeline pipeline; + } itoi_r32g32b32; struct { VkPipelineLayout img_p_layout; VkDescriptorSetLayout img_ds_layout; VkPipeline pipeline; VkPipeline pipeline_3d; } cleari; + struct { + VkPipelineLayout img_p_layout; + VkDescriptorSetLayout img_ds_layout; + VkPipeline pipeline; + } cleari_r32g32b32; struct { VkPipelineLayout p_layout; @@ -521,6 +591,19 @@ struct radv_meta_state { VkPipeline i_pipeline; VkPipeline srgb_pipeline; } rc[MAX_SAMPLES_LOG2]; + + VkPipeline depth_zero_pipeline; + struct { + VkPipeline average_pipeline; + VkPipeline max_pipeline; + VkPipeline min_pipeline; + } depth[MAX_SAMPLES_LOG2]; + + VkPipeline stencil_zero_pipeline; + struct { + VkPipeline max_pipeline; + VkPipeline min_pipeline; + } stencil[MAX_SAMPLES_LOG2]; } resolve_compute; struct { @@ -531,6 +614,21 @@ struct radv_meta_state { VkRenderPass render_pass[NUM_META_FS_KEYS][RADV_META_DST_LAYOUT_COUNT]; VkPipeline pipeline[NUM_META_FS_KEYS]; } rc[MAX_SAMPLES_LOG2]; + + VkRenderPass depth_render_pass; + VkPipeline depth_zero_pipeline; + struct { + VkPipeline average_pipeline; + VkPipeline max_pipeline; + VkPipeline min_pipeline; + } depth[MAX_SAMPLES_LOG2]; + + VkRenderPass stencil_render_pass; + VkPipeline stencil_zero_pipeline; + struct { + VkPipeline max_pipeline; + VkPipeline min_pipeline; + } stencil[MAX_SAMPLES_LOG2]; } resolve_fragment; struct { @@ -566,7 +664,14 @@ struct radv_meta_state { VkPipelineLayout p_layout; VkPipeline occlusion_query_pipeline; VkPipeline pipeline_statistics_query_pipeline; + VkPipeline tfb_query_pipeline; } query; + + struct { + VkDescriptorSetLayout ds_layout; + VkPipelineLayout p_layout; + VkPipeline pipeline[MAX_SAMPLES_LOG2]; + } fmask_expand; }; /* queue types */ @@ -642,9 +747,8 @@ struct radv_device { float sample_locations_2x[2][2]; float sample_locations_4x[4][2]; float sample_locations_8x[8][2]; - float sample_locations_16x[16][2]; - /* CIK and later */ + /* GFX7 and later */ uint32_t gfx_init_size_dw; struct radeon_winsys_bo *gfx_init; @@ -677,6 +781,9 @@ struct radv_device { bool use_global_bo_list; struct radv_bo_list bo_list; + + /* Whether anisotropy is forced with RADV_TEX_ANISO (-1 is disabled). */ + int force_aniso; }; struct radv_device_memory { @@ -750,7 +857,8 @@ struct radv_descriptor_update_template_entry { uint32_t buffer_offset; /* Only valid for combined image samplers and samplers */ - uint16_t has_sampler; + uint8_t has_sampler; + uint8_t sampler_offset; /* In bytes */ size_t src_offset; @@ -790,7 +898,8 @@ enum radv_dynamic_state_bits { RADV_DYNAMIC_STENCIL_WRITE_MASK = 1 << 7, RADV_DYNAMIC_STENCIL_REFERENCE = 1 << 8, RADV_DYNAMIC_DISCARD_RECTANGLE = 1 << 9, - RADV_DYNAMIC_ALL = (1 << 10) - 1, + RADV_DYNAMIC_SAMPLE_LOCATIONS = 1 << 10, + RADV_DYNAMIC_ALL = (1 << 11) - 1, }; enum radv_cmd_dirty_bits { @@ -806,36 +915,43 @@ enum radv_cmd_dirty_bits { RADV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK = 1 << 7, RADV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE = 1 << 8, RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE = 1 << 9, - RADV_CMD_DIRTY_DYNAMIC_ALL = (1 << 10) - 1, - RADV_CMD_DIRTY_PIPELINE = 1 << 10, - RADV_CMD_DIRTY_INDEX_BUFFER = 1 << 11, - RADV_CMD_DIRTY_FRAMEBUFFER = 1 << 12, - RADV_CMD_DIRTY_VERTEX_BUFFER = 1 << 13, + RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS = 1 << 10, + RADV_CMD_DIRTY_DYNAMIC_ALL = (1 << 11) - 1, + RADV_CMD_DIRTY_PIPELINE = 1 << 11, + RADV_CMD_DIRTY_INDEX_BUFFER = 1 << 12, + RADV_CMD_DIRTY_FRAMEBUFFER = 1 << 13, + RADV_CMD_DIRTY_VERTEX_BUFFER = 1 << 14, + RADV_CMD_DIRTY_STREAMOUT_BUFFER = 1 << 15, }; enum radv_cmd_flush_bits { - RADV_CMD_FLAG_INV_ICACHE = 1 << 0, - /* SMEM L1, other names: KCACHE, constant cache, DCACHE, data cache */ - RADV_CMD_FLAG_INV_SMEM_L1 = 1 << 1, - /* VMEM L1 can optionally be bypassed (GLC=1). Other names: TC L1 */ - RADV_CMD_FLAG_INV_VMEM_L1 = 1 << 2, - /* Used by everything except CB/DB, can be bypassed (SLC=1). Other names: TC L2 */ - RADV_CMD_FLAG_INV_GLOBAL_L2 = 1 << 3, - /* Same as above, but only writes back and doesn't invalidate */ - RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2 = 1 << 4, + /* Instruction cache. */ + RADV_CMD_FLAG_INV_ICACHE = 1 << 0, + /* Scalar L1 cache. */ + RADV_CMD_FLAG_INV_SCACHE = 1 << 1, + /* Vector L1 cache. */ + RADV_CMD_FLAG_INV_VCACHE = 1 << 2, + /* L2 cache + L2 metadata cache writeback & invalidate. + * GFX6-8: Used by shaders only. GFX9-10: Used by everything. */ + RADV_CMD_FLAG_INV_L2 = 1 << 3, + /* L2 writeback (write dirty L2 lines to memory for non-L2 clients). + * Only used for coherency with non-L2 clients like CB, DB, CP on GFX6-8. + * GFX6-7 will do complete invalidation, because the writeback is unsupported. */ + RADV_CMD_FLAG_WB_L2 = 1 << 4, /* Framebuffer caches */ - RADV_CMD_FLAG_FLUSH_AND_INV_CB_META = 1 << 5, - RADV_CMD_FLAG_FLUSH_AND_INV_DB_META = 1 << 6, - RADV_CMD_FLAG_FLUSH_AND_INV_DB = 1 << 7, - RADV_CMD_FLAG_FLUSH_AND_INV_CB = 1 << 8, + RADV_CMD_FLAG_FLUSH_AND_INV_CB_META = 1 << 5, + RADV_CMD_FLAG_FLUSH_AND_INV_DB_META = 1 << 6, + RADV_CMD_FLAG_FLUSH_AND_INV_DB = 1 << 7, + RADV_CMD_FLAG_FLUSH_AND_INV_CB = 1 << 8, /* Engine synchronization. */ - RADV_CMD_FLAG_VS_PARTIAL_FLUSH = 1 << 9, - RADV_CMD_FLAG_PS_PARTIAL_FLUSH = 1 << 10, - RADV_CMD_FLAG_CS_PARTIAL_FLUSH = 1 << 11, - RADV_CMD_FLAG_VGT_FLUSH = 1 << 12, + RADV_CMD_FLAG_VS_PARTIAL_FLUSH = 1 << 9, + RADV_CMD_FLAG_PS_PARTIAL_FLUSH = 1 << 10, + RADV_CMD_FLAG_CS_PARTIAL_FLUSH = 1 << 11, + RADV_CMD_FLAG_VGT_FLUSH = 1 << 12, /* Pipeline query controls. */ - RADV_CMD_FLAG_START_PIPELINE_STATS = 1 << 13, - RADV_CMD_FLAG_STOP_PIPELINE_STATS = 1 << 14, + RADV_CMD_FLAG_START_PIPELINE_STATS = 1 << 13, + RADV_CMD_FLAG_STOP_PIPELINE_STATS = 1 << 14, + RADV_CMD_FLAG_VGT_STREAMOUT_SYNC = 1 << 15, RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER = (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_CB_META | @@ -848,6 +964,29 @@ struct radv_vertex_binding { VkDeviceSize offset; }; +struct radv_streamout_binding { + struct radv_buffer *buffer; + VkDeviceSize offset; + VkDeviceSize size; +}; + +struct radv_streamout_state { + /* Mask of bound streamout buffers. */ + uint8_t enabled_mask; + + /* External state that comes from the last vertex stage, it must be + * set explicitely when binding a new graphics pipeline. + */ + uint16_t stride_in_dw[MAX_SO_BUFFERS]; + uint32_t enabled_stream_buffers_mask; /* stream0 buffers0-3 in 4 LSB */ + + /* State of VGT_STRMOUT_BUFFER_(CONFIG|END) */ + uint32_t hw_enabled_mask; + + /* State of VGT_STRMOUT_(CONFIG|EN) */ + bool streamout_enabled; +}; + struct radv_viewport_state { uint32_t count; VkViewport viewports[MAX_VIEWPORTS]; @@ -863,6 +1002,13 @@ struct radv_discard_rectangle_state { VkRect2D rectangles[MAX_DISCARD_RECTANGLES]; }; +struct radv_sample_locations_state { + VkSampleCountFlagBits per_pixel; + VkExtent2D grid_size; + uint32_t count; + VkSampleLocationEXT locations[MAX_SAMPLE_LOCATIONS]; +}; + struct radv_dynamic_state { /** * Bitmask of (1 << VK_DYNAMIC_STATE_*). @@ -905,6 +1051,8 @@ struct radv_dynamic_state { } stencil_reference; struct radv_discard_rectangle_state discard_rectangle; + + struct radv_sample_locations_state sample_location; }; extern const struct radv_dynamic_state default_dynamic_state; @@ -925,6 +1073,7 @@ struct radv_attachment_state { uint32_t cleared_views; VkClearValue clear_value; VkImageLayout current_layout; + struct radv_sample_locations_state sample_location; }; struct radv_descriptor_state { @@ -936,6 +1085,11 @@ struct radv_descriptor_state { uint32_t dynamic_buffers[4 * MAX_DYNAMIC_BUFFERS]; }; +struct radv_subpass_sample_locs_state { + uint32_t subpass_idx; + struct radv_sample_locations_state sample_location; +}; + struct radv_cmd_state { /* Vertex descriptors */ uint64_t vb_va; @@ -955,8 +1109,12 @@ struct radv_cmd_state { const struct radv_subpass * subpass; struct radv_dynamic_state dynamic; struct radv_attachment_state * attachments; + struct radv_streamout_state streamout; VkRect2D render_area; + uint32_t num_subpass_sample_locs; + struct radv_subpass_sample_locs_state * subpass_sample_locs; + /* Index buffer */ struct radv_buffer *index_buffer; uint64_t index_offset; @@ -978,6 +1136,15 @@ struct radv_cmd_state { uint32_t last_num_instances; uint32_t last_first_instance; uint32_t last_vertex_offset; + + /* Whether CP DMA is busy/idle. */ + bool dma_is_busy; + + /* Conditional rendering info. */ + int predication_type; /* -1: disabled, 0: normal, 1: inverted */ + uint64_t predication_va; + + bool context_roll_without_scissor_emitted; }; struct radv_cmd_pool { @@ -1017,6 +1184,7 @@ struct radv_cmd_buffer { struct radeon_cmdbuf *cs; struct radv_cmd_state state; struct radv_vertex_binding vertex_bindings[MAX_VBS]; + struct radv_streamout_binding streamout_bindings[MAX_SO_BUFFERS]; uint32_t queue_family_index; uint8_t push_constants[MAX_PUSH_CONSTANTS_SIZE]; @@ -1036,23 +1204,30 @@ struct radv_cmd_buffer { VkResult record_result; - int ring_offsets_idx; /* just used for verification */ - uint32_t gfx9_fence_offset; - struct radeon_winsys_bo *gfx9_fence_bo; + uint64_t gfx9_fence_va; uint32_t gfx9_fence_idx; + uint64_t gfx9_eop_bug_va; /** * Whether a query pool has been resetted and we have to flush caches. */ bool pending_reset_query; + + /** + * Bitmask of pending active query flushes. + */ + enum radv_cmd_flush_bits active_query_flush_bits; }; struct radv_image; +struct radv_image_view; bool radv_cmd_buffer_uses_mec(struct radv_cmd_buffer *cmd_buffer); -void si_init_compute(struct radv_cmd_buffer *cmd_buffer); -void si_init_config(struct radv_cmd_buffer *cmd_buffer); +void si_emit_graphics(struct radv_physical_device *physical_device, + struct radeon_cmdbuf *cs); +void si_emit_compute(struct radv_physical_device *physical_device, + struct radeon_cmdbuf *cs); void cik_create_gfx_config(struct radv_device *device); @@ -1063,6 +1238,7 @@ void si_write_scissors(struct radeon_cmdbuf *cs, int first, const VkViewport *viewports, bool can_use_guardband); uint32_t si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_draw, bool indirect_draw, + bool count_from_stream_output, uint32_t draw_vertex_count); void si_cs_emit_write_event_eop(struct radeon_cmdbuf *cs, enum chip_class chip_class, @@ -1070,19 +1246,20 @@ void si_cs_emit_write_event_eop(struct radeon_cmdbuf *cs, unsigned event, unsigned event_flags, unsigned data_sel, uint64_t va, - uint32_t old_fence, - uint32_t new_fence); + uint32_t new_fence, + uint64_t gfx9_eop_bug_va); -void si_emit_wait_fence(struct radeon_cmdbuf *cs, - uint64_t va, uint32_t ref, - uint32_t mask); +void radv_cp_wait_mem(struct radeon_cmdbuf *cs, uint32_t op, uint64_t va, + uint32_t ref, uint32_t mask); void si_cs_emit_cache_flush(struct radeon_cmdbuf *cs, enum chip_class chip_class, uint32_t *fence_ptr, uint64_t va, bool is_mec, - enum radv_cmd_flush_bits flush_bits); + enum radv_cmd_flush_bits flush_bits, + uint64_t gfx9_eop_bug_va); void si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer); -void si_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer, uint64_t va); +void si_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer, + bool inverted, uint64_t va); void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer, uint64_t src_va, uint64_t dest_va, uint64_t size); @@ -1090,6 +1267,8 @@ void si_cp_dma_prefetch(struct radv_cmd_buffer *cmd_buffer, uint64_t va, unsigned size); void si_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint64_t size, unsigned value); +void si_cp_dma_wait_for_idle(struct radv_cmd_buffer *cmd_buffer); + void radv_set_db_count_control(struct radv_cmd_buffer *cmd_buffer); bool radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer, @@ -1099,8 +1278,7 @@ radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer, void **ptr); void radv_cmd_buffer_set_subpass(struct radv_cmd_buffer *cmd_buffer, - const struct radv_subpass *subpass, - bool transitions); + const struct radv_subpass *subpass); bool radv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer, unsigned size, unsigned alignmnet, @@ -1109,9 +1287,15 @@ radv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer, void radv_cmd_buffer_clear_subpass(struct radv_cmd_buffer *cmd_buffer); void radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer *cmd_buffer); void radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer); +void radv_depth_stencil_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer, + VkImageAspectFlags aspects, + VkResolveModeFlagBitsKHR resolve_mode); void radv_cmd_buffer_resolve_subpass_fs(struct radv_cmd_buffer *cmd_buffer); -void radv_cayman_emit_msaa_sample_locs(struct radeon_cmdbuf *cs, int nr_samples); -unsigned radv_cayman_get_maxdist(int log_samples); +void radv_depth_stencil_resolve_subpass_fs(struct radv_cmd_buffer *cmd_buffer, + VkImageAspectFlags aspects, + VkResolveModeFlagBitsKHR resolve_mode); +void radv_emit_default_sample_locations(struct radeon_cmdbuf *cs, int nr_samples); +unsigned radv_get_default_max_sample_dist(int log_samples); void radv_device_init_msaa(struct radv_device *device); void radv_update_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, @@ -1120,13 +1304,18 @@ void radv_update_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, VkImageAspectFlags aspects); void radv_update_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer, - struct radv_image *image, + const struct radv_image_view *iview, int cb_idx, uint32_t color_values[2]); -void radv_set_dcc_need_cmask_elim_pred(struct radv_cmd_buffer *cmd_buffer, - struct radv_image *image, - bool value); +void radv_update_fce_metadata(struct radv_cmd_buffer *cmd_buffer, + struct radv_image *image, + const VkImageSubresourceRange *range, bool value); + +void radv_update_dcc_metadata(struct radv_cmd_buffer *cmd_buffer, + struct radv_image *image, + const VkImageSubresourceRange *range, bool value); + uint32_t radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer, struct radeon_winsys_bo *bo, uint64_t offset, uint64_t size, uint32_t value); @@ -1164,7 +1353,7 @@ radv_emit_shader_pointer(struct radv_device *device, struct radeon_cmdbuf *cs, uint32_t sh_offset, uint64_t va, bool global) { - bool use_32bit_pointers = HAVE_32BIT_POINTERS && !global; + bool use_32bit_pointers = !global; radv_emit_shader_pointer_head(cs, sh_offset, 1, use_32bit_pointers); radv_emit_shader_pointer_body(device, cs, va, use_32bit_pointers); @@ -1229,6 +1418,7 @@ mesa_to_vk_shader_stage(gl_shader_stage mesa_stage) stage = __builtin_ffs(__tmp) - 1, __tmp; \ __tmp &= ~(1 << (stage))) +extern const VkFormat radv_fs_key_format_exemplars[NUM_META_FS_KEYS]; unsigned radv_format_meta_fs_key(VkFormat format); struct radv_multisample_state { @@ -1247,11 +1437,7 @@ struct radv_prim_vertex_count { }; struct radv_vertex_elements_info { - uint32_t rsrc_word3[MAX_VERTEX_ATTRIBS]; uint32_t format_size[MAX_VERTEX_ATTRIBS]; - uint32_t binding[MAX_VERTEX_ATTRIBS]; - uint32_t offset[MAX_VERTEX_ATTRIBS]; - uint32_t count; }; struct radv_ia_multi_vgt_param_helpers { @@ -1277,10 +1463,13 @@ struct radv_pipeline { VkShaderStageFlags active_stages; struct radeon_cmdbuf cs; + uint32_t ctx_cs_hash; + struct radeon_cmdbuf ctx_cs; struct radv_vertex_elements_info vertex_elements; uint32_t binding_stride[MAX_VBS]; + uint8_t num_vertex_bindings; uint32_t user_data_0[MESA_SHADER_STAGES]; union { @@ -1306,6 +1495,9 @@ struct radv_pipeline { unsigned max_waves; unsigned scratch_bytes_per_wave; + + /* Not NULL if graphics pipeline uses streamout. */ + struct radv_shader_variant *streamout_shader; }; static inline bool radv_pipeline_has_gs(const struct radv_pipeline *pipeline) @@ -1350,6 +1542,7 @@ uint32_t radv_translate_buffer_dataformat(const struct vk_format_description *de int first_non_void); uint32_t radv_translate_buffer_numformat(const struct vk_format_description *desc, int first_non_void); +bool radv_is_buffer_format_supported(VkFormat format, bool *scaled); uint32_t radv_translate_colorformat(VkFormat format); uint32_t radv_translate_color_numformat(VkFormat format, const struct vk_format_description *desc, @@ -1369,6 +1562,7 @@ bool radv_format_pack_clear_color(VkFormat format, bool radv_is_colorbuffer_format_supported(VkFormat format, bool *blendable); bool radv_dcc_formats_compatible(VkFormat format1, VkFormat format2); +bool radv_device_supports_etc(struct radv_physical_device *physical_device); struct radv_fmask_info { uint64_t offset; @@ -1379,6 +1573,7 @@ struct radv_fmask_info { unsigned slice_tile_max; unsigned tile_mode_index; unsigned tile_swizzle; + uint64_t slice_size; }; struct radv_cmask_info { @@ -1386,6 +1581,14 @@ struct radv_cmask_info { uint64_t size; unsigned alignment; unsigned slice_tile_max; + unsigned slice_size; +}; + + +struct radv_image_plane { + VkFormat format; + struct radeon_surf surface; + uint64_t offset; }; struct radv_image { @@ -1413,15 +1616,27 @@ struct radv_image { uint64_t dcc_offset; uint64_t htile_offset; bool tc_compatible_htile; - struct radeon_surf surface; + bool tc_compatible_cmask; struct radv_fmask_info fmask; struct radv_cmask_info cmask; uint64_t clear_value_offset; + uint64_t fce_pred_offset; uint64_t dcc_pred_offset; + /* + * Metadata for the TC-compat zrange workaround. If the 32-bit value + * stored at this offset is UINT_MAX, the driver will emit + * DB_Z_INFO.ZRANGE_PRECISION=0, otherwise it will skip the + * SET_CONTEXT_REG packet. + */ + uint64_t tc_compat_zrange_offset; + /* For VK_ANDROID_native_buffer, the WSI image owns the memory, */ VkDeviceMemory owned_memory; + + unsigned plane_count; + struct radv_image_plane planes[0]; }; /* Whether the image has a htile that is known consistent with the contents of @@ -1472,7 +1687,16 @@ radv_image_has_fmask(const struct radv_image *image) static inline bool radv_image_has_dcc(const struct radv_image *image) { - return image->surface.dcc_size; + return image->planes[0].surface.dcc_size; +} + +/** + * Return whether the image is TC-compatible CMASK. + */ +static inline bool +radv_image_is_tc_compat_cmask(const struct radv_image *image) +{ + return radv_image_has_fmask(image) && image->tc_compatible_cmask; } /** @@ -1482,7 +1706,18 @@ static inline bool radv_dcc_enabled(const struct radv_image *image, unsigned level) { return radv_image_has_dcc(image) && - level < image->surface.num_dcc_levels; + level < image->planes[0].surface.num_dcc_levels; +} + +/** + * Return whether the image has CB metadata. + */ +static inline bool +radv_image_has_CB_metadata(const struct radv_image *image) +{ + return radv_image_has_cmask(image) || + radv_image_has_fmask(image) || + radv_image_has_dcc(image); } /** @@ -1491,7 +1726,7 @@ radv_dcc_enabled(const struct radv_image *image, unsigned level) static inline bool radv_image_has_htile(const struct radv_image *image) { - return image->surface.htile_size; + return image->planes[0].surface.htile_size; } /** @@ -1512,6 +1747,33 @@ radv_image_is_tc_compat_htile(const struct radv_image *image) return radv_image_has_htile(image) && image->tc_compatible_htile; } +static inline uint64_t +radv_image_get_fast_clear_va(const struct radv_image *image, + uint32_t base_level) +{ + uint64_t va = radv_buffer_get_va(image->bo); + va += image->offset + image->clear_value_offset + base_level * 8; + return va; +} + +static inline uint64_t +radv_image_get_fce_pred_va(const struct radv_image *image, + uint32_t base_level) +{ + uint64_t va = radv_buffer_get_va(image->bo); + va += image->offset + image->fce_pred_offset + base_level * 8; + return va; +} + +static inline uint64_t +radv_image_get_dcc_pred_va(const struct radv_image *image, + uint32_t base_level) +{ + uint64_t va = radv_buffer_get_va(image->bo); + va += image->offset + image->dcc_pred_offset + base_level * 8; + return va; +} + unsigned radv_image_queue_family_mask(const struct radv_image *image, uint32_t family, uint32_t queue_family); static inline uint32_t @@ -1536,6 +1798,21 @@ radv_init_metadata(struct radv_device *device, struct radv_image *image, struct radeon_bo_metadata *metadata); +void +radv_image_override_offset_stride(struct radv_device *device, + struct radv_image *image, + uint64_t offset, uint32_t stride); + +union radv_descriptor { + struct { + uint32_t plane0_descriptor[8]; + uint32_t fmask_descriptor[8]; + }; + struct { + uint32_t plane_descriptors[3][8]; + }; +}; + struct radv_image_view { struct radv_image *image; /**< VkImageViewCreateInfo::image */ struct radeon_winsys_bo *bo; @@ -1543,24 +1820,27 @@ struct radv_image_view { VkImageViewType type; VkImageAspectFlags aspect_mask; VkFormat vk_format; + unsigned plane_id; + bool multiple_planes; uint32_t base_layer; uint32_t layer_count; uint32_t base_mip; uint32_t level_count; VkExtent3D extent; /**< Extent of VkImageViewCreateInfo::baseMipLevel. */ - uint32_t descriptor[16]; + union radv_descriptor descriptor; /* Descriptor for use as a storage image as opposed to a sampled image. * This has a few differences for cube maps (e.g. type). */ - uint32_t storage_descriptor[16]; + union radv_descriptor storage_descriptor; }; struct radv_image_create_info { const VkImageCreateInfo *vk_info; bool scanout; bool no_metadata_planes; + const struct radeon_bo_metadata *bo_metadata; }; VkResult radv_image_create(VkDevice _device, @@ -1579,6 +1859,17 @@ void radv_image_view_init(struct radv_image_view *view, struct radv_device *device, const VkImageViewCreateInfo* pCreateInfo); +VkFormat radv_get_aspect_format(struct radv_image *image, VkImageAspectFlags mask); + +struct radv_sampler_ycbcr_conversion { + VkFormat format; + VkSamplerYcbcrModelConversion ycbcr_model; + VkSamplerYcbcrRange ycbcr_range; + VkComponentMapping components; + VkChromaLocation chroma_offsets[2]; + VkFilter chroma_filter; +}; + struct radv_buffer_view { struct radeon_winsys_bo *bo; VkFormat vk_format; @@ -1634,6 +1925,7 @@ radv_image_extent_compare(const struct radv_image *image, struct radv_sampler { uint32_t state[4]; + struct radv_sampler_ycbcr_conversion *ycbcr_sampler; }; struct radv_color_buffer_info { @@ -1641,7 +1933,6 @@ struct radv_color_buffer_info { uint64_t cb_color_cmask; uint64_t cb_color_fmask; uint64_t cb_dcc_base; - uint32_t cb_color_pitch; uint32_t cb_color_slice; uint32_t cb_color_view; uint32_t cb_color_info; @@ -1650,6 +1941,10 @@ struct radv_color_buffer_info { uint32_t cb_dcc_control; uint32_t cb_color_cmask_slice; uint32_t cb_color_fmask_slice; + union { + uint32_t cb_color_pitch; // GFX6-GFX8 + uint32_t cb_mrt_epitch; // GFX9+ + }; }; struct radv_ds_buffer_info { @@ -1694,16 +1989,33 @@ struct radv_subpass_barrier { VkAccessFlags dst_access_mask; }; +void radv_subpass_barrier(struct radv_cmd_buffer *cmd_buffer, + const struct radv_subpass_barrier *barrier); + +struct radv_subpass_attachment { + uint32_t attachment; + VkImageLayout layout; +}; + struct radv_subpass { + uint32_t attachment_count; + struct radv_subpass_attachment * attachments; + uint32_t input_count; uint32_t color_count; - VkAttachmentReference * input_attachments; - VkAttachmentReference * color_attachments; - VkAttachmentReference * resolve_attachments; - VkAttachmentReference depth_stencil_attachment; + struct radv_subpass_attachment * input_attachments; + struct radv_subpass_attachment * color_attachments; + struct radv_subpass_attachment * resolve_attachments; + struct radv_subpass_attachment * depth_stencil_attachment; + struct radv_subpass_attachment * ds_resolve_attachment; + VkResolveModeFlagBitsKHR depth_resolve_mode; + VkResolveModeFlagBitsKHR stencil_resolve_mode; - /** Subpass has at least one resolve attachment */ - bool has_resolve; + /** Subpass has at least one color resolve attachment */ + bool has_color_resolve; + + /** Subpass has at least one color attachment */ + bool has_color_att; struct radv_subpass_barrier start_barrier; @@ -1711,6 +2023,9 @@ struct radv_subpass { VkSampleCountFlagBits max_sample_count; }; +uint32_t +radv_get_subpass_id(struct radv_cmd_buffer *cmd_buffer); + struct radv_render_pass_attachment { VkFormat format; uint32_t samples; @@ -1718,13 +2033,16 @@ struct radv_render_pass_attachment { VkAttachmentLoadOp stencil_load_op; VkImageLayout initial_layout; VkImageLayout final_layout; - uint32_t view_mask; + + /* The subpass id in which the attachment will be used first/last. */ + uint32_t first_subpass_idx; + uint32_t last_subpass_idx; }; struct radv_render_pass { uint32_t attachment_count; uint32_t subpass_count; - VkAttachmentReference * subpass_attachments; + struct radv_subpass_attachment * subpass_attachments; struct radv_render_pass_attachment * attachments; struct radv_subpass_barrier end_barrier; struct radv_subpass subpasses[0]; @@ -1768,7 +2086,7 @@ void radv_update_descriptor_set_with_template(struct radv_device *device, struct radv_cmd_buffer *cmd_buffer, struct radv_descriptor_set *set, - VkDescriptorUpdateTemplateKHR descriptorUpdateTemplate, + VkDescriptorUpdateTemplate descriptorUpdateTemplate, const void *pData); void radv_meta_push_descriptor_set(struct radv_cmd_buffer *cmd_buffer, @@ -1779,13 +2097,16 @@ void radv_meta_push_descriptor_set(struct radv_cmd_buffer *cmd_buffer, const VkWriteDescriptorSet *pDescriptorWrites); void radv_initialize_dcc(struct radv_cmd_buffer *cmd_buffer, - struct radv_image *image, uint32_t value); + struct radv_image *image, + const VkImageSubresourceRange *range, uint32_t value); + +void radv_initialize_fmask(struct radv_cmd_buffer *cmd_buffer, + struct radv_image *image, + const VkImageSubresourceRange *range); struct radv_fence { struct radeon_winsys_fence *fence; struct wsi_fence *fence_wsi; - bool submitted; - bool signalled; uint32_t syncobj; uint32_t temp_syncobj; @@ -1795,21 +2116,22 @@ struct radv_fence { struct radv_shader_variant_info; struct radv_nir_compiler_options; -void radv_compile_gs_copy_shader(LLVMTargetMachineRef tm, +void radv_compile_gs_copy_shader(struct ac_llvm_compiler *ac_llvm, struct nir_shader *geom_shader, - struct ac_shader_binary *binary, - struct ac_shader_config *config, + struct radv_shader_binary **rbinary, struct radv_shader_variant_info *shader_info, const struct radv_nir_compiler_options *option); -void radv_compile_nir_shader(LLVMTargetMachineRef tm, - struct ac_shader_binary *binary, - struct ac_shader_config *config, +void radv_compile_nir_shader(struct ac_llvm_compiler *ac_llvm, + struct radv_shader_binary **rbinary, struct radv_shader_variant_info *shader_info, struct nir_shader *const *nir, int nir_count, const struct radv_nir_compiler_options *options); +unsigned radv_nir_get_max_workgroup_size(enum chip_class chip_class, + const struct nir_shader *nir); + /* radv_shader_info.h */ struct radv_shader_info; @@ -1817,8 +2139,12 @@ void radv_nir_shader_info_pass(const struct nir_shader *nir, const struct radv_nir_compiler_options *options, struct radv_shader_info *info); +void radv_nir_shader_info_init(struct radv_shader_info *info); + struct radeon_winsys_sem; +uint64_t radv_get_current_time(void); + #define RADV_DEFINE_HANDLE_CASTS(__radv_type, __VkType) \ \ static inline struct __radv_type * \ @@ -1862,7 +2188,7 @@ RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_buffer_view, VkBufferView) RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_pool, VkDescriptorPool) RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_set, VkDescriptorSet) RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_set_layout, VkDescriptorSetLayout) -RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_update_template, VkDescriptorUpdateTemplateKHR) +RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_update_template, VkDescriptorUpdateTemplate) RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_device_memory, VkDeviceMemory) RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_fence, VkFence) RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_event, VkEvent) @@ -1875,6 +2201,7 @@ RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_pipeline_layout, VkPipelineLayout) RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_query_pool, VkQueryPool) RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_render_pass, VkRenderPass) RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_sampler, VkSampler) +RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_sampler_ycbcr_conversion, VkSamplerYcbcrConversion) RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_shader_module, VkShaderModule) RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_semaphore, VkSemaphore)