X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;ds=sidebyside;f=src%2Fintel%2Fvulkan%2Fanv_private.h;h=fa40c71839a360aabc9bdd38fc61caa8e4288ae0;hb=0c4e89ad5b9cc9a3e2afdab86602f643e69e9412;hp=1316077ae09e1ffade29dad939484e667454ee7e;hpb=ce790c96a958615f7d3b42635e2c4bac1fdf7543;p=mesa.git diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 1316077ae09..fa40c71839a 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -57,6 +57,7 @@ #include "util/u_vector.h" #include "util/u_math.h" #include "util/vma.h" +#include "util/xmlconfig.h" #include "vk_alloc.h" #include "vk_debug_report.h" @@ -83,7 +84,7 @@ struct gen_l3_config; #include "anv_extensions.h" #include "isl/isl.h" -#include "common/gen_debug.h" +#include "dev/gen_debug.h" #include "common/intel_log.h" #include "wsi_common.h" @@ -121,12 +122,9 @@ struct gen_l3_config; #define INSTRUCTION_STATE_POOL_MIN_ADDRESS 0x000180000000ULL /* 6 GiB */ #define INSTRUCTION_STATE_POOL_MAX_ADDRESS 0x0001bfffffffULL #define HIGH_HEAP_MIN_ADDRESS 0x0001c0000000ULL /* 7 GiB */ -#define HIGH_HEAP_MAX_ADDRESS 0xfffeffffffffULL #define LOW_HEAP_SIZE \ (LOW_HEAP_MAX_ADDRESS - LOW_HEAP_MIN_ADDRESS + 1) -#define HIGH_HEAP_SIZE \ - (HIGH_HEAP_MAX_ADDRESS - HIGH_HEAP_MIN_ADDRESS + 1) #define DYNAMIC_STATE_POOL_SIZE \ (DYNAMIC_STATE_POOL_MAX_ADDRESS - DYNAMIC_STATE_POOL_MIN_ADDRESS + 1) #define BINDING_TABLE_POOL_SIZE \ @@ -161,11 +159,22 @@ struct gen_l3_config; #define MAX_PUSH_CONSTANTS_SIZE 128 #define MAX_DYNAMIC_BUFFERS 16 #define MAX_IMAGES 64 -#define MAX_GEN8_IMAGES 8 #define MAX_PUSH_DESCRIPTORS 32 /* Minimum requirement */ #define MAX_INLINE_UNIFORM_BLOCK_SIZE 4096 #define MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS 32 +/* From the Skylake PRM Vol. 7 "Binding Table Surface State Model": + * + * "The surface state model is used when a Binding Table Index (specified + * in the message descriptor) of less than 240 is specified. In this model, + * the Binding Table Index is used to index into the binding table, and the + * binding table entry contains a pointer to the SURFACE_STATE." + * + * Binding table values above 240 are used for various things in the hardware + * such as stateless, stateless with incoherent cache, SLM, and bindless. + */ +#define MAX_BINDING_TABLE_SIZE 240 + /* The kernel relocation API has a limitation of a 32-bit delta value * applied to the address before it is written which, in spite of it being * unsigned, is treated as signed . Because of the way that this maps to @@ -191,7 +200,7 @@ struct gen_l3_config; /* We reserve this MI ALU register for the purpose of handling predication. * Other code which uses the MI ALU should leave it alone. */ -#define ANV_PREDICATE_RESULT_REG MI_ALU_REG15 +#define ANV_PREDICATE_RESULT_REG 0x2678 /* MI_ALU_REG15 */ #define anv_printflike(a, b) __attribute__((__format__(__printf__, a, b))) @@ -721,7 +730,7 @@ struct anv_fixed_size_state_pool { }; #define ANV_MIN_STATE_SIZE_LOG2 6 -#define ANV_MAX_STATE_SIZE_LOG2 20 +#define ANV_MAX_STATE_SIZE_LOG2 21 #define ANV_STATE_BUCKETS (ANV_MAX_STATE_SIZE_LOG2 - ANV_MIN_STATE_SIZE_LOG2 + 1) @@ -736,7 +745,7 @@ struct anv_state_table { struct anv_free_entry *map; uint32_t size; struct anv_block_state state; - struct u_vector mmap_cleanups; + struct u_vector cleanups; }; struct anv_state_pool { @@ -901,7 +910,10 @@ struct anv_memory_heap { VkMemoryHeapFlags flags; /* Driver-internal book-keeping */ + uint64_t vma_start; + uint64_t vma_size; bool supports_48bit_addresses; + VkDeviceSize used; }; struct anv_physical_device { @@ -939,6 +951,15 @@ struct anv_physical_device { bool has_context_priority; bool use_softpin; bool has_context_isolation; + bool has_mem_available; + bool always_use_bindless; + + /** True if we can access buffers using A64 messages */ + bool has_a64_buffer_access; + /** True if we can use bindless access for images */ + bool has_bindless_images; + /** True if we can use bindless access for samplers */ + bool has_bindless_samplers; struct anv_device_extension_table supported_extensions; @@ -989,6 +1010,9 @@ struct anv_instance { bool pipeline_cache_enabled; struct vk_debug_report_instance debug_report_callbacks; + + struct driOptionCache dri_options; + struct driOptionCache available_dri_options; }; VkResult anv_init_wsi(struct anv_physical_device *physical_device); @@ -1034,6 +1058,8 @@ anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, uint32_t constant_data_size, const struct brw_stage_prog_data *prog_data, uint32_t prog_data_size, + const struct brw_compile_stats *stats, + uint32_t num_stats, const struct nir_xfb_info *xfb_info, const struct anv_pipeline_bind_map *bind_map); @@ -1052,6 +1078,8 @@ anv_device_upload_kernel(struct anv_device *device, uint32_t constant_data_size, const struct brw_stage_prog_data *prog_data, uint32_t prog_data_size, + const struct brw_compile_stats *stats, + uint32_t num_stats, const struct nir_xfb_info *xfb_info, const struct anv_pipeline_bind_map *bind_map); @@ -1094,6 +1122,9 @@ struct anv_device { uint64_t vma_lo_available; uint64_t vma_hi_available; + /** List of all anv_device_memory objects */ + struct list_head memory_objects; + struct anv_bo_pool batch_bo_pool; struct anv_bo_cache bo_cache; @@ -1107,17 +1138,13 @@ struct anv_device { struct anv_bo trivial_batch_bo; struct anv_bo hiz_clear_bo; - /* Set of pointers to anv_buffer objects for all pinned buffers. Pinned - * buffers are always resident because they could be used at any time via - * VK_EXT_buffer_device_address. - */ - struct set * pinned_buffers; - struct anv_pipeline_cache default_pipeline_cache; struct blorp_context blorp; struct anv_state border_colors; + struct anv_state slice_hash; + struct anv_queue queue; struct anv_scratch_pool scratch_pool; @@ -1483,7 +1510,14 @@ _anv_combine_address(struct anv_batch *batch, void *location, #define GEN11_MOCS GEN9_MOCS #define GEN11_EXTERNAL_MOCS GEN9_EXTERNAL_MOCS +/* TigerLake MOCS */ +#define GEN12_MOCS GEN9_MOCS +/* TC=1/LLC Only, LeCC=1/Uncacheable, LRUM=0, L3CC=1/Uncacheable */ +#define GEN12_EXTERNAL_MOCS (3 << 1) + struct anv_device_memory { + struct list_head link; + struct anv_bo * bo; struct anv_memory_type * type; VkDeviceSize map_size; @@ -1508,6 +1542,62 @@ struct anv_vue_header { float PointWidth; }; +/** Struct representing a sampled image descriptor + * + * This descriptor layout is used for sampled images, bare sampler, and + * combined image/sampler descriptors. + */ +struct anv_sampled_image_descriptor { + /** Bindless image handle + * + * This is expected to already be shifted such that the 20-bit + * SURFACE_STATE table index is in the top 20 bits. + */ + uint32_t image; + + /** Bindless sampler handle + * + * This is assumed to be a 32B-aligned SAMPLER_STATE pointer relative + * to the dynamic state base address. + */ + uint32_t sampler; +}; + +struct anv_texture_swizzle_descriptor { + /** Texture swizzle + * + * See also nir_intrinsic_channel_select_intel + */ + uint8_t swizzle[4]; + + /** Unused padding to ensure the struct is a multiple of 64 bits */ + uint32_t _pad; +}; + +/** Struct representing a storage image descriptor */ +struct anv_storage_image_descriptor { + /** Bindless image handles + * + * These are expected to already be shifted such that the 20-bit + * SURFACE_STATE table index is in the top 20 bits. + */ + uint32_t read_write; + uint32_t write_only; +}; + +/** Struct representing a address/range descriptor + * + * The fields of this struct correspond directly to the data layout of + * nir_address_format_64bit_bounded_global addresses. The last field is the + * offset in the NIR address so it must be zero so that when you load the + * descriptor you get a pointer to the start of the range. + */ +struct anv_address_range_descriptor { + uint64_t address; + uint32_t range; + uint32_t zero; +}; + enum anv_descriptor_data { /** The descriptor contains a BTI reference to a surface state */ ANV_DESCRIPTOR_SURFACE_STATE = (1 << 0), @@ -1519,6 +1609,14 @@ enum anv_descriptor_data { ANV_DESCRIPTOR_IMAGE_PARAM = (1 << 3), /** The descriptor contains auxiliary image layout data */ ANV_DESCRIPTOR_INLINE_UNIFORM = (1 << 4), + /** anv_address_range_descriptor with a buffer address and range */ + ANV_DESCRIPTOR_ADDRESS_RANGE = (1 << 5), + /** Bindless surface handle */ + ANV_DESCRIPTOR_SAMPLED_IMAGE = (1 << 6), + /** Storage image handles */ + ANV_DESCRIPTOR_STORAGE_IMAGE = (1 << 7), + /** Storage image handles */ + ANV_DESCRIPTOR_TEXTURE_SWIZZLE = (1 << 8), }; struct anv_descriptor_set_binding_layout { @@ -1527,9 +1625,15 @@ struct anv_descriptor_set_binding_layout { VkDescriptorType type; #endif + /* Flags provided when this binding was created */ + VkDescriptorBindingFlagsEXT flags; + /* Bitfield representing the type of data this descriptor contains */ enum anv_descriptor_data data; + /* Maximum number of YCbCr texture/sampler planes */ + uint8_t max_plane_count; + /* Number of array elements in this binding (or size in bytes for inline * uniform data) */ @@ -1556,6 +1660,14 @@ unsigned anv_descriptor_size(const struct anv_descriptor_set_binding_layout *lay unsigned anv_descriptor_type_size(const struct anv_physical_device *pdevice, VkDescriptorType type); +bool anv_descriptor_supports_bindless(const struct anv_physical_device *pdevice, + const struct anv_descriptor_set_binding_layout *binding, + bool sampler); + +bool anv_descriptor_requires_bindless(const struct anv_physical_device *pdevice, + const struct anv_descriptor_set_binding_layout *binding, + bool sampler); + struct anv_descriptor_set_layout { /* Descriptor set layouts can be destroyed at almost any time */ uint32_t ref_cnt; @@ -1850,14 +1962,57 @@ enum anv_cmd_dirty_bits { ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK = 1 << 6, /* VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK */ ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK = 1 << 7, /* VK_DYNAMIC_STATE_STENCIL_WRITE_MASK */ ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE = 1 << 8, /* VK_DYNAMIC_STATE_STENCIL_REFERENCE */ - ANV_CMD_DIRTY_DYNAMIC_ALL = (1 << 9) - 1, ANV_CMD_DIRTY_PIPELINE = 1 << 9, ANV_CMD_DIRTY_INDEX_BUFFER = 1 << 10, ANV_CMD_DIRTY_RENDER_TARGETS = 1 << 11, ANV_CMD_DIRTY_XFB_ENABLE = 1 << 12, + ANV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE = 1 << 13, /* VK_DYNAMIC_STATE_LINE_STIPPLE_EXT */ }; typedef uint32_t anv_cmd_dirty_mask_t; +#define ANV_CMD_DIRTY_DYNAMIC_ALL \ + (ANV_CMD_DIRTY_DYNAMIC_VIEWPORT | \ + ANV_CMD_DIRTY_DYNAMIC_SCISSOR | \ + ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH | \ + ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS | \ + ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS | \ + ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS | \ + ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK | \ + ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK | \ + ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE | \ + ANV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE) + +static inline enum anv_cmd_dirty_bits +anv_cmd_dirty_bit_for_vk_dynamic_state(VkDynamicState vk_state) +{ + switch (vk_state) { + case VK_DYNAMIC_STATE_VIEWPORT: + return ANV_CMD_DIRTY_DYNAMIC_VIEWPORT; + case VK_DYNAMIC_STATE_SCISSOR: + return ANV_CMD_DIRTY_DYNAMIC_SCISSOR; + case VK_DYNAMIC_STATE_LINE_WIDTH: + return ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH; + case VK_DYNAMIC_STATE_DEPTH_BIAS: + return ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS; + case VK_DYNAMIC_STATE_BLEND_CONSTANTS: + return ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS; + case VK_DYNAMIC_STATE_DEPTH_BOUNDS: + return ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS; + case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK: + return ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK; + case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK: + return ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK; + case VK_DYNAMIC_STATE_STENCIL_REFERENCE: + return ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE; + case VK_DYNAMIC_STATE_LINE_STIPPLE_EXT: + return ANV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE; + default: + assert(!"Unsupported dynamic state"); + return 0; + } +} + + enum anv_pipe_bits { ANV_PIPE_DEPTH_CACHE_FLUSH_BIT = (1 << 0), ANV_PIPE_STALL_AT_SCOREBOARD_BIT = (1 << 1), @@ -2065,23 +2220,19 @@ struct anv_xfb_binding { }; #define ANV_PARAM_PUSH(offset) ((1 << 16) | (uint32_t)(offset)) +#define ANV_PARAM_IS_PUSH(param) ((uint32_t)(param) >> 16 == 1) #define ANV_PARAM_PUSH_OFFSET(param) ((param) & 0xffff) -struct anv_push_constants { - /* Current allocated size of this push constants data structure. - * Because a decent chunk of it may not be used (images on SKL, for - * instance), we won't actually allocate the entire structure up-front. - */ - uint32_t size; +#define ANV_PARAM_DYN_OFFSET(offset) ((2 << 16) | (uint32_t)(offset)) +#define ANV_PARAM_IS_DYN_OFFSET(param) ((uint32_t)(param) >> 16 == 2) +#define ANV_PARAM_DYN_OFFSET_IDX(param) ((param) & 0xffff) +struct anv_push_constants { /* Push constant data provided by the client through vkPushConstants */ uint8_t client_data[MAX_PUSH_CONSTANTS_SIZE]; /* Used for vkCmdDispatchBase */ uint32_t base_work_group_id[3]; - - /* Image data for image_load_store on pre-SKL */ - struct brw_image_param images[MAX_GEN8_IMAGES]; }; struct anv_dynamic_state { @@ -2124,13 +2275,18 @@ struct anv_dynamic_state { uint32_t front; uint32_t back; } stencil_reference; + + struct { + uint32_t factor; + uint16_t pattern; + } line_stipple; }; extern const struct anv_dynamic_state default_dynamic_state; -void anv_dynamic_state_copy(struct anv_dynamic_state *dest, - const struct anv_dynamic_state *src, - uint32_t copy_mask); +uint32_t anv_dynamic_state_copy(struct anv_dynamic_state *dest, + const struct anv_dynamic_state *src, + uint32_t copy_mask); struct anv_surface_state { struct anv_state state; @@ -2180,6 +2336,7 @@ struct anv_attachment_state { * have not been cleared yet when multiview is active. */ uint32_t pending_clear_views; + struct anv_image_view * image_view; }; /** State tracking for particular pipeline bind point @@ -2259,7 +2416,7 @@ struct anv_cmd_state { bool xfb_enabled; struct anv_xfb_binding xfb_bindings[MAX_XFB_BUFFERS]; VkShaderStageFlags push_constant_stages; - struct anv_push_constants * push_constants[MESA_SHADER_STAGES]; + struct anv_push_constants push_constants[MESA_SHADER_STAGES]; struct anv_state binding_tables[MESA_SHADER_STAGES]; struct anv_state samplers[MESA_SHADER_STAGES]; @@ -2279,6 +2436,12 @@ struct anv_cmd_state { bool conditional_render_enabled; + /** + * Last rendering scale argument provided to + * genX(cmd_buffer_emit_hashing_mode)(). + */ + unsigned current_hash_scale; + /** * Array length is anv_cmd_state::pass::attachment_count. Array content is * valid only when recording a render pass instance. @@ -2379,14 +2542,6 @@ VkResult anv_cmd_buffer_execbuf(struct anv_device *device, VkResult anv_cmd_buffer_reset(struct anv_cmd_buffer *cmd_buffer); -VkResult -anv_cmd_buffer_ensure_push_constants_size(struct anv_cmd_buffer *cmd_buffer, - gl_shader_stage stage, uint32_t size); -#define anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, stage, field) \ - anv_cmd_buffer_ensure_push_constants_size(cmd_buffer, stage, \ - (offsetof(struct anv_push_constants, field) + \ - sizeof(cmd_buffer->state.push_constants[0]->field))) - struct anv_state anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer, const void *data, uint32_t size, uint32_t alignment); struct anv_state anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer, @@ -2589,7 +2744,6 @@ mesa_to_vk_shader_stage(gl_shader_stage mesa_stage) struct anv_pipeline_bind_map { uint32_t surface_count; uint32_t sampler_count; - uint32_t image_param_count; struct anv_pipeline_binding * surface_to_descriptor; struct anv_pipeline_binding * sampler_to_descriptor; @@ -2614,6 +2768,9 @@ struct anv_shader_bin { const struct brw_stage_prog_data *prog_data; uint32_t prog_data_size; + struct brw_compile_stats stats[3]; + uint32_t num_stats; + struct nir_xfb_info *xfb_info; struct anv_pipeline_bind_map bind_map; @@ -2626,6 +2783,7 @@ anv_shader_bin_create(struct anv_device *device, const void *constant_data, uint32_t constant_data_size, const struct brw_stage_prog_data *prog_data, uint32_t prog_data_size, const void *prog_data_param, + const struct brw_compile_stats *stats, uint32_t num_stats, const struct nir_xfb_info *xfb_info, const struct anv_pipeline_bind_map *bind_map); @@ -2647,20 +2805,37 @@ anv_shader_bin_unref(struct anv_device *device, struct anv_shader_bin *shader) anv_shader_bin_destroy(device, shader); } +/* 5 possible simultaneous shader stages and FS may have up to 3 binaries */ +#define MAX_PIPELINE_EXECUTABLES 7 + +struct anv_pipeline_executable { + gl_shader_stage stage; + + struct brw_compile_stats stats; + + char *disasm; +}; + struct anv_pipeline { struct anv_device * device; struct anv_batch batch; uint32_t batch_data[512]; struct anv_reloc_list batch_relocs; - uint32_t dynamic_state_mask; + anv_cmd_dirty_mask_t dynamic_state_mask; struct anv_dynamic_state dynamic_state; + void * mem_ctx; + + VkPipelineCreateFlags flags; struct anv_subpass * subpass; bool needs_data_cache; struct anv_shader_bin * shaders[MESA_SHADER_STAGES]; + uint32_t num_executables; + struct anv_pipeline_executable executables[MAX_PIPELINE_EXECUTABLES]; + struct { const struct gen_l3_config * l3_config; uint32_t total_size; @@ -2894,7 +3069,8 @@ struct anv_image { uint32_t array_size; uint32_t samples; /**< VkImageCreateInfo::samples */ uint32_t n_planes; - VkImageUsageFlags usage; /**< Superset of VkImageCreateInfo::usage. */ + VkImageUsageFlags usage; /**< VkImageCreateInfo::usage. */ + VkImageUsageFlags stencil_usage; VkImageCreateFlags create_flags; /* Flags used when creating image. */ VkImageTiling tiling; /** VkImageCreateInfo::tiling */ @@ -3116,7 +3292,13 @@ anv_can_sample_with_hiz(const struct gen_device_info * const devinfo, if (!(image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT)) return false; - if (devinfo->gen < 8) + /* Allow this feature on BDW even though it is disabled in the BDW devinfo + * struct. There's documentation which suggests that this feature actually + * reduces performance on BDW, but it has only been observed to help so + * far. Sampling fast-cleared blocks on BDW must also be handled with care + * (see depth_stencil_attachment_compute_aux_usage() for more info). + */ + if (devinfo->gen != 8 && !devinfo->has_sample_with_hiz) return false; return image->samples == 1; @@ -3195,6 +3377,7 @@ anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer, void anv_image_copy_to_shadow(struct anv_cmd_buffer *cmd_buffer, const struct anv_image *image, + VkImageAspectFlagBits aspect, uint32_t base_level, uint32_t level_count, uint32_t base_layer, uint32_t layer_count); @@ -3413,6 +3596,11 @@ struct anv_sampler { uint32_t state[3][4]; uint32_t n_planes; struct anv_ycbcr_conversion *conversion; + + /* Blob of sampler state data which is guaranteed to be 32-byte aligned + * and with a 32-byte stride for use as bindless samplers. + */ + struct anv_state bindless_state; }; struct anv_framebuffer { @@ -3530,8 +3718,7 @@ enum anv_dump_action { void anv_dump_start(struct anv_device *device, enum anv_dump_action actions); void anv_dump_finish(void); -void anv_dump_add_framebuffer(struct anv_cmd_buffer *cmd_buffer, - struct anv_framebuffer *fb); +void anv_dump_add_attachments(struct anv_cmd_buffer *cmd_buffer); static inline uint32_t anv_get_subpass_id(const struct anv_cmd_state * const cmd_state) @@ -3631,6 +3818,9 @@ ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_ycbcr_conversion, VkSamplerYcbcrConversion) # define genX(x) gen11_##x # include "anv_genX.h" # undef genX +# define genX(x) gen12_##x +# include "anv_genX.h" +# undef genX #endif #endif /* ANV_PRIVATE_H */