X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fintel%2Fvulkan%2Fanv_private.h;h=67049cc37fe584a042c334b87af39dfe1f2decee;hb=9f0db069d3a507fbb4a64393d50df18fa9376b62;hp=6e89ab7a80b6fe6bb5f0fd1cb67b35adacb928bf;hpb=39925d60ecfb3d41c65b37d2d72710aff4aa83d1;p=mesa.git diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 6e89ab7a80b..488ffdec820 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -30,7 +30,7 @@ #include #include #include -#include +#include "drm-uapi/i915_drm.h" #ifdef HAVE_VALGRIND #include @@ -40,24 +40,29 @@ #define __gen_validate_value(x) VALGRIND_CHECK_MEM_IS_DEFINED(&(x), sizeof(x)) #endif #else -#define VG(x) +#define VG(x) ((void)0) #endif #include "common/gen_clflush.h" +#include "common/gen_decoder.h" #include "common/gen_gem.h" +#include "common/gen_l3_config.h" #include "dev/gen_device_info.h" #include "blorp/blorp.h" #include "compiler/brw_compiler.h" +#include "util/bitset.h" #include "util/macros.h" #include "util/hash_table.h" #include "util/list.h" -#include "util/set.h" +#include "util/sparse_array.h" #include "util/u_atomic.h" #include "util/u_vector.h" #include "util/u_math.h" #include "util/vma.h" +#include "util/xmlconfig.h" #include "vk_alloc.h" #include "vk_debug_report.h" +#include "vk_object.h" /* Pre-declarations needed for WSI entrypoints */ struct wl_surface; @@ -66,12 +71,16 @@ typedef struct xcb_connection_t xcb_connection_t; typedef uint32_t xcb_visualid_t; typedef uint32_t xcb_window_t; +struct anv_batch; struct anv_buffer; struct anv_buffer_view; struct anv_image_view; struct anv_instance; -struct gen_l3_config; +struct gen_aux_map_context; +struct gen_perf_config; +struct gen_perf_counter_pass; +struct gen_perf_query_result; #include #include @@ -82,10 +91,12 @@ struct gen_l3_config; #include "anv_extensions.h" #include "isl/isl.h" -#include "common/gen_debug.h" +#include "dev/gen_debug.h" #include "common/intel_log.h" #include "wsi_common.h" +#define NSEC_PER_SEC 1000000000ull + /* anv Virtual Memory Layout * ========================= * @@ -119,13 +130,12 @@ struct gen_l3_config; #define SURFACE_STATE_POOL_MAX_ADDRESS 0x00017fffffffULL #define INSTRUCTION_STATE_POOL_MIN_ADDRESS 0x000180000000ULL /* 6 GiB */ #define INSTRUCTION_STATE_POOL_MAX_ADDRESS 0x0001bfffffffULL -#define HIGH_HEAP_MIN_ADDRESS 0x0001c0000000ULL /* 7 GiB */ -#define HIGH_HEAP_MAX_ADDRESS 0xfffeffffffffULL +#define CLIENT_VISIBLE_HEAP_MIN_ADDRESS 0x0001c0000000ULL /* 7 GiB */ +#define CLIENT_VISIBLE_HEAP_MAX_ADDRESS 0x0002bfffffffULL +#define HIGH_HEAP_MIN_ADDRESS 0x0002c0000000ULL /* 11 GiB */ #define LOW_HEAP_SIZE \ (LOW_HEAP_MAX_ADDRESS - LOW_HEAP_MIN_ADDRESS + 1) -#define HIGH_HEAP_SIZE \ - (HIGH_HEAP_MAX_ADDRESS - HIGH_HEAP_MIN_ADDRESS + 1) #define DYNAMIC_STATE_POOL_SIZE \ (DYNAMIC_STATE_POOL_MAX_ADDRESS - DYNAMIC_STATE_POOL_MIN_ADDRESS + 1) #define BINDING_TABLE_POOL_SIZE \ @@ -134,6 +144,8 @@ struct gen_l3_config; (SURFACE_STATE_POOL_MAX_ADDRESS - SURFACE_STATE_POOL_MIN_ADDRESS + 1) #define INSTRUCTION_STATE_POOL_SIZE \ (INSTRUCTION_STATE_POOL_MAX_ADDRESS - INSTRUCTION_STATE_POOL_MIN_ADDRESS + 1) +#define CLIENT_VISIBLE_HEAP_SIZE \ + (CLIENT_VISIBLE_HEAP_MAX_ADDRESS - CLIENT_VISIBLE_HEAP_MIN_ADDRESS + 1) /* Allowing different clear colors requires us to perform a depth resolve at * the end of certain render passes. This is because while slow clears store @@ -151,6 +163,8 @@ struct gen_l3_config; #define ANV_HZ_FC_VAL 1.0f #define MAX_VBS 28 +#define MAX_XFB_BUFFERS 4 +#define MAX_XFB_STREAMS 4 #define MAX_SETS 8 #define MAX_RTS 8 #define MAX_VIEWPORTS 16 @@ -158,8 +172,29 @@ struct gen_l3_config; #define MAX_PUSH_CONSTANTS_SIZE 128 #define MAX_DYNAMIC_BUFFERS 16 #define MAX_IMAGES 64 -#define MAX_GEN8_IMAGES 8 #define MAX_PUSH_DESCRIPTORS 32 /* Minimum requirement */ +#define MAX_INLINE_UNIFORM_BLOCK_SIZE 4096 +#define MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS 32 +/* We need 16 for UBO block reads to work and 32 for push UBOs. However, we + * use 64 here to avoid cache issues. This could most likely bring it back to + * 32 if we had different virtual addresses for the different views on a given + * GEM object. + */ +#define ANV_UBO_ALIGNMENT 64 +#define ANV_SSBO_BOUNDS_CHECK_ALIGNMENT 4 +#define MAX_VIEWS_FOR_PRIMITIVE_REPLICATION 16 + +/* From the Skylake PRM Vol. 7 "Binding Table Surface State Model": + * + * "The surface state model is used when a Binding Table Index (specified + * in the message descriptor) of less than 240 is specified. In this model, + * the Binding Table Index is used to index into the binding table, and the + * binding table entry contains a pointer to the SURFACE_STATE." + * + * Binding table values above 240 are used for various things in the hardware + * such as stateless, stateless with incoherent cache, SLM, and bindless. + */ +#define MAX_BINDING_TABLE_SIZE 240 /* The kernel relocation API has a limitation of a 32-bit delta value * applied to the address before it is written which, in spite of it being @@ -186,8 +221,23 @@ struct gen_l3_config; /* We reserve this MI ALU register for the purpose of handling predication. * Other code which uses the MI ALU should leave it alone. */ -#define ANV_PREDICATE_RESULT_REG MI_ALU_REG15 +#define ANV_PREDICATE_RESULT_REG 0x2678 /* MI_ALU_REG15 */ +/* We reserve this MI ALU register to pass around an offset computed from + * VkPerformanceQuerySubmitInfoKHR::counterPassIndex VK_KHR_performance_query. + * Other code which uses the MI ALU should leave it alone. + */ +#define ANV_PERF_QUERY_OFFSET_REG 0x2670 /* MI_ALU_REG14 */ + +/* For gen12 we set the streamout buffers using 4 separate commands + * (3DSTATE_SO_BUFFER_INDEX_*) instead of 3DSTATE_SO_BUFFER. However the layout + * of the 3DSTATE_SO_BUFFER_INDEX_* commands is identical to that of + * 3DSTATE_SO_BUFFER apart from the SOBufferIndex field, so for now we use the + * 3DSTATE_SO_BUFFER command, but change the 3DCommandSubOpcode. + * SO_BUFFER_INDEX_0_CMD is actually the 3DCommandSubOpcode for + * 3DSTATE_SO_BUFFER_INDEX_0. + */ +#define SO_BUFFER_INDEX_0_CMD 0x60 #define anv_printflike(a, b) __attribute__((__format__(__printf__, a, b))) static inline uint32_t @@ -196,18 +246,31 @@ align_down_npot_u32(uint32_t v, uint32_t a) return v - (v % a); } +static inline uint32_t +align_down_u32(uint32_t v, uint32_t a) +{ + assert(a != 0 && a == (a & -a)); + return v & ~(a - 1); +} + static inline uint32_t align_u32(uint32_t v, uint32_t a) { assert(a != 0 && a == (a & -a)); - return (v + a - 1) & ~(a - 1); + return align_down_u32(v + a - 1, a); } static inline uint64_t -align_u64(uint64_t v, uint64_t a) +align_down_u64(uint64_t v, uint64_t a) { assert(a != 0 && a == (a & -a)); - return (v + a - 1) & ~(a - 1); + return v & ~(a - 1); +} + +static inline uint64_t +align_u64(uint64_t v, uint64_t a) +{ + return align_down_u64(v + a - 1, a); } static inline int32_t @@ -271,6 +334,20 @@ vk_to_isl_color(VkClearColorValue color) }; } +static inline void *anv_unpack_ptr(uintptr_t ptr, int bits, int *flags) +{ + uintptr_t mask = (1ull << bits) - 1; + *flags = ptr & mask; + return (void *) (ptr & ~mask); +} + +static inline uintptr_t anv_pack_ptr(void *ptr, int bits, int flags) +{ + uintptr_t value = (uintptr_t) ptr; + uintptr_t mask = (1ull << bits) - 1; + return value | (mask & flags); +} + #define for_each_bit(b, dword) \ for (uint32_t __dword = (dword); \ (b) = __builtin_ffs(__dword) - 1, __dword; \ @@ -394,21 +471,23 @@ VkResult __vk_errorv(struct anv_instance *instance, const void *object, VkResult __vk_errorf(struct anv_instance *instance, const void *object, VkDebugReportObjectTypeEXT type, VkResult error, - const char *file, int line, const char *format, ...); + const char *file, int line, const char *format, ...) + anv_printflike(7, 8); #ifdef DEBUG #define vk_error(error) __vk_errorf(NULL, NULL,\ VK_DEBUG_REPORT_OBJECT_TYPE_UNKNOWN_EXT,\ error, __FILE__, __LINE__, NULL) -#define vk_errorv(instance, obj, error, format, args)\ - __vk_errorv(instance, obj, REPORT_OBJECT_TYPE(obj), error,\ - __FILE__, __LINE__, format, args) -#define vk_errorf(instance, obj, error, format, ...)\ +#define vk_errorfi(instance, obj, error, format, ...)\ __vk_errorf(instance, obj, REPORT_OBJECT_TYPE(obj), error,\ __FILE__, __LINE__, format, ## __VA_ARGS__) +#define vk_errorf(device, obj, error, format, ...)\ + vk_errorfi(anv_device_instance_or_null(device),\ + obj, error, format, ## __VA_ARGS__) #else #define vk_error(error) error -#define vk_errorf(instance, obj, error, format, ...) error +#define vk_errorfi(instance, obj, error, format, ...) error +#define vk_errorf(device, obj, error, format, ...) error #endif /** @@ -429,7 +508,7 @@ VkResult __vk_errorf(struct anv_instance *instance, const void *object, #define anv_debug_ignored_stype(sType) \ intel_logd("%s: ignored VkStructureType %u\n", __func__, (sType)) -void __anv_perf_warn(struct anv_instance *instance, const void *object, +void __anv_perf_warn(struct anv_device *device, const void *object, VkDebugReportObjectTypeEXT type, const char *file, int line, const char *format, ...) anv_printflike(6, 7); @@ -570,41 +649,100 @@ anv_multialloc_alloc2(struct anv_multialloc *ma, return anv_multialloc_alloc(ma, alloc ? alloc : parent_alloc, scope); } -/* Extra ANV-defined BO flags which won't be passed to the kernel */ -#define ANV_BO_EXTERNAL (1ull << 31) -#define ANV_BO_FLAG_MASK (1ull << 31) - struct anv_bo { uint32_t gem_handle; + uint32_t refcount; + /* Index into the current validation list. This is used by the * validation list building alrogithm to track which buffers are already * in the validation list so that we can ensure uniqueness. */ uint32_t index; + /* Index for use with util_sparse_array_free_list */ + uint32_t free_index; + /* Last known offset. This value is provided by the kernel when we * execbuf and is used as the presumed offset for the next bunch of * relocations. */ uint64_t offset; + /** Size of the buffer not including implicit aux */ uint64_t size; + + /* Map for internally mapped BOs. + * + * If ANV_BO_WRAPPER is set in flags, map points to the wrapped BO. + */ void *map; + /** Size of the implicit CCS range at the end of the buffer + * + * On Gen12, CCS data is always a direct 1/256 scale-down. A single 64K + * page of main surface data maps to a 256B chunk of CCS data and that + * mapping is provided on TGL-LP by the AUX table which maps virtual memory + * addresses in the main surface to virtual memory addresses for CCS data. + * + * Because we can't change these maps around easily and because Vulkan + * allows two VkImages to be bound to overlapping memory regions (as long + * as the app is careful), it's not feasible to make this mapping part of + * the image. (On Gen11 and earlier, the mapping was provided via + * RENDER_SURFACE_STATE so each image had its own main -> CCS mapping.) + * Instead, we attach the CCS data directly to the buffer object and setup + * the AUX table mapping at BO creation time. + * + * This field is for internal tracking use by the BO allocator only and + * should not be touched by other parts of the code. If something wants to + * know if a BO has implicit CCS data, it should instead look at the + * has_implicit_ccs boolean below. + * + * This data is not included in maps of this buffer. + */ + uint32_t _ccs_size; + /** Flags to pass to the kernel through drm_i915_exec_object2::flags */ uint32_t flags; + + /** True if this BO may be shared with other processes */ + bool is_external:1; + + /** True if this BO is a wrapper + * + * When set to true, none of the fields in this BO are meaningful except + * for anv_bo::is_wrapper and anv_bo::map which points to the actual BO. + * See also anv_bo_unwrap(). Wrapper BOs are not allowed when use_softpin + * is set in the physical device. + */ + bool is_wrapper:1; + + /** See also ANV_BO_ALLOC_FIXED_ADDRESS */ + bool has_fixed_address:1; + + /** True if this BO wraps a host pointer */ + bool from_host_ptr:1; + + /** See also ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS */ + bool has_client_visible_address:1; + + /** True if this BO has implicit CCS data attached to it */ + bool has_implicit_ccs:1; }; -static inline void -anv_bo_init(struct anv_bo *bo, uint32_t gem_handle, uint64_t size) +static inline struct anv_bo * +anv_bo_ref(struct anv_bo *bo) +{ + p_atomic_inc(&bo->refcount); + return bo; +} + +static inline struct anv_bo * +anv_bo_unwrap(struct anv_bo *bo) { - bo->gem_handle = gem_handle; - bo->index = 0; - bo->offset = -1; - bo->size = size; - bo->map = NULL; - bo->flags = 0; + while (bo->is_wrapper) + bo = bo->map; + return bo; } /* Represents a lock-free linked list of "free" things. This is used by @@ -618,7 +756,10 @@ union anv_free_list { /* A simple count that is incremented every time the head changes. */ uint32_t count; }; - uint64_t u64; + /* Make sure it's aligned to 64 bits. This will make atomic operations + * faster on 32 bit platforms. + */ + uint64_t u64 __attribute__ ((aligned (8))); }; #define ANV_FREE_LIST_EMPTY ((union anv_free_list) { { UINT32_MAX, 0 } }) @@ -629,21 +770,32 @@ struct anv_block_state { uint32_t next; uint32_t end; }; - uint64_t u64; + /* Make sure it's aligned to 64 bits. This will make atomic operations + * faster on 32 bit platforms. + */ + uint64_t u64 __attribute__ ((aligned (8))); }; }; #define anv_block_pool_foreach_bo(bo, pool) \ - for (bo = (pool)->bos; bo != &(pool)->bos[(pool)->nbos]; bo++) + for (struct anv_bo **_pp_bo = (pool)->bos, *bo; \ + _pp_bo != &(pool)->bos[(pool)->nbos] && (bo = *_pp_bo, true); \ + _pp_bo++) #define ANV_MAX_BLOCK_POOL_BOS 20 struct anv_block_pool { struct anv_device *device; + bool use_softpin; - uint64_t bo_flags; + /* Wrapper BO for use in relocation lists. This BO is simply a wrapper + * around the actual BO so that we grow the pool after the wrapper BO has + * been put in a relocation list. This is only used in the non-softpin + * case. + */ + struct anv_bo wrapper_bo; - struct anv_bo bos[ANV_MAX_BLOCK_POOL_BOS]; + struct anv_bo *bos[ANV_MAX_BLOCK_POOL_BOS]; struct anv_bo *bo; uint32_t nbos; @@ -661,6 +813,19 @@ struct anv_block_pool { */ uint32_t center_bo_offset; + /* Current memory map of the block pool. This pointer may or may not + * point to the actual beginning of the block pool memory. If + * anv_block_pool_alloc_back has ever been called, then this pointer + * will point to the "center" position of the buffer and all offsets + * (negative or positive) given out by the block pool alloc functions + * will be valid relative to this pointer. + * + * In particular, map == bo.map + center_offset + * + * DO NOT access this pointer directly. Use anv_block_pool_map() instead, + * since it will handle the softpin case as well, where this points to NULL. + */ + void *map; int fd; /** @@ -703,7 +868,7 @@ struct anv_fixed_size_state_pool { }; #define ANV_MIN_STATE_SIZE_LOG2 6 -#define ANV_MAX_STATE_SIZE_LOG2 20 +#define ANV_MAX_STATE_SIZE_LOG2 21 #define ANV_STATE_BUCKETS (ANV_MAX_STATE_SIZE_LOG2 - ANV_MIN_STATE_SIZE_LOG2 + 1) @@ -718,12 +883,17 @@ struct anv_state_table { struct anv_free_entry *map; uint32_t size; struct anv_block_state state; - struct u_vector mmap_cleanups; + struct u_vector cleanups; }; struct anv_state_pool { struct anv_block_pool block_pool; + /* Offset into the relevant state base address where the state pool starts + * allocating memory. + */ + int32_t start_offset; + struct anv_state_table table; /* The size of blocks which will be allocated from the block pool */ @@ -735,7 +905,11 @@ struct anv_state_pool { struct anv_fixed_size_state_pool buckets[ANV_STATE_BUCKETS]; }; -struct anv_state_stream_block; +struct anv_state_reserved_pool { + struct anv_state_pool *pool; + union anv_free_list reserved_blocks; + uint32_t count; +}; struct anv_state_stream { struct anv_state_pool *state_pool; @@ -750,7 +924,7 @@ struct anv_state_stream { uint32_t next; /* List of all blocks allocated from this pool */ - struct anv_state_stream_block *block_list; + struct util_dynarray all_blocks; }; /* The block_pool functions exported for testing only. The block pool should @@ -759,20 +933,20 @@ struct anv_state_stream { VkResult anv_block_pool_init(struct anv_block_pool *pool, struct anv_device *device, uint64_t start_address, - uint32_t initial_size, - uint64_t bo_flags); + uint32_t initial_size); void anv_block_pool_finish(struct anv_block_pool *pool); int32_t anv_block_pool_alloc(struct anv_block_pool *pool, uint32_t block_size, uint32_t *padding); int32_t anv_block_pool_alloc_back(struct anv_block_pool *pool, uint32_t block_size); -void* anv_block_pool_map(struct anv_block_pool *pool, int32_t offset); +void* anv_block_pool_map(struct anv_block_pool *pool, int32_t offset, uint32_t +size); VkResult anv_state_pool_init(struct anv_state_pool *pool, struct anv_device *device, - uint64_t start_address, - uint32_t block_size, - uint64_t bo_flags); + uint64_t base_address, + int32_t start_offset, + uint32_t block_size); void anv_state_pool_finish(struct anv_state_pool *pool); struct anv_state anv_state_pool_alloc(struct anv_state_pool *pool, uint32_t state_size, uint32_t alignment); @@ -785,6 +959,15 @@ void anv_state_stream_finish(struct anv_state_stream *stream); struct anv_state anv_state_stream_alloc(struct anv_state_stream *stream, uint32_t size, uint32_t alignment); +void anv_state_reserved_pool_init(struct anv_state_reserved_pool *pool, + struct anv_state_pool *parent, + uint32_t count, uint32_t size, + uint32_t alignment); +void anv_state_reserved_pool_finish(struct anv_state_reserved_pool *pool); +struct anv_state anv_state_reserved_pool_alloc(struct anv_state_reserved_pool *pool); +void anv_state_reserved_pool_free(struct anv_state_reserved_pool *pool, + struct anv_state state); + VkResult anv_state_table_init(struct anv_state_table *table, struct anv_device *device, uint32_t initial_entries); @@ -810,26 +993,18 @@ anv_state_table_get(struct anv_state_table *table, uint32_t idx) struct anv_bo_pool { struct anv_device *device; - uint64_t bo_flags; - - void *free_list[16]; + struct util_sparse_array_free_list free_list[16]; }; -void anv_bo_pool_init(struct anv_bo_pool *pool, struct anv_device *device, - uint64_t bo_flags); +void anv_bo_pool_init(struct anv_bo_pool *pool, struct anv_device *device); void anv_bo_pool_finish(struct anv_bo_pool *pool); -VkResult anv_bo_pool_alloc(struct anv_bo_pool *pool, struct anv_bo *bo, - uint32_t size); -void anv_bo_pool_free(struct anv_bo_pool *pool, const struct anv_bo *bo); - -struct anv_scratch_bo { - bool exists; - struct anv_bo bo; -}; +VkResult anv_bo_pool_alloc(struct anv_bo_pool *pool, uint32_t size, + struct anv_bo **bo_out); +void anv_bo_pool_free(struct anv_bo_pool *pool, struct anv_bo *bo); struct anv_scratch_pool { /* Indexed by Per-Thread Scratch Space number (the hardware value) and stage */ - struct anv_scratch_bo bos[16][MESA_SHADER_STAGES]; + struct anv_bo *bos[16][MESA_SHADER_STAGES]; }; void anv_scratch_pool_init(struct anv_device *device, @@ -843,34 +1018,17 @@ struct anv_bo *anv_scratch_pool_alloc(struct anv_device *device, /** Implements a BO cache that ensures a 1-1 mapping of GEM BOs to anv_bos */ struct anv_bo_cache { - struct hash_table *bo_map; + struct util_sparse_array bo_map; pthread_mutex_t mutex; }; VkResult anv_bo_cache_init(struct anv_bo_cache *cache); void anv_bo_cache_finish(struct anv_bo_cache *cache); -VkResult anv_bo_cache_alloc(struct anv_device *device, - struct anv_bo_cache *cache, - uint64_t size, uint64_t bo_flags, - struct anv_bo **bo); -VkResult anv_bo_cache_import(struct anv_device *device, - struct anv_bo_cache *cache, - int fd, uint64_t bo_flags, - struct anv_bo **bo); -VkResult anv_bo_cache_export(struct anv_device *device, - struct anv_bo_cache *cache, - struct anv_bo *bo_in, int *fd_out); -void anv_bo_cache_release(struct anv_device *device, - struct anv_bo_cache *cache, - struct anv_bo *bo); struct anv_memory_type { /* Standard bits passed on to the client */ VkMemoryPropertyFlags propertyFlags; uint32_t heapIndex; - - /* Driver-internal book-keeping */ - VkBufferUsageFlags valid_buffer_usage; }; struct anv_memory_heap { @@ -878,15 +1036,20 @@ struct anv_memory_heap { VkDeviceSize size; VkMemoryHeapFlags flags; - /* Driver-internal book-keeping */ - bool supports_48bit_addresses; + /** Driver-internal book-keeping. + * + * Align it to 64 bits to make atomic operations faster on 32 bit platforms. + */ + VkDeviceSize used __attribute__ ((aligned (8))); }; struct anv_physical_device { - VK_LOADER_DATA _loader_data; + struct vk_object_base base; + + /* Link in anv_instance::physical_devices */ + struct list_head link; struct anv_instance * instance; - uint32_t chipset_id; bool no_hw; char path[20]; const char * name; @@ -908,15 +1071,46 @@ struct anv_physical_device { bool supports_48bit_addresses; struct brw_compiler * compiler; struct isl_device isl_dev; + struct gen_perf_config * perf; int cmd_parser_version; + bool has_softpin; bool has_exec_async; bool has_exec_capture; bool has_exec_fence; bool has_syncobj; bool has_syncobj_wait; bool has_context_priority; - bool use_softpin; bool has_context_isolation; + bool has_mem_available; + bool has_mmap_offset; + uint64_t gtt_size; + + bool use_softpin; + bool always_use_bindless; + bool use_call_secondary; + + /** True if we can access buffers using A64 messages */ + bool has_a64_buffer_access; + /** True if we can use bindless access for images */ + bool has_bindless_images; + /** True if we can use bindless access for samplers */ + bool has_bindless_samplers; + + /** True if we can read the GPU timestamp register + * + * When running in a virtual context, the timestamp register is unreadable + * on Gen12+. + */ + bool has_reg_timestamp; + + /** True if this device has implicit AUX + * + * If true, CCS is handled as an implicit attachment to the BO rather than + * as an explicitly bound surface. + */ + bool has_implicit_ccs; + + bool always_flush_cache; struct anv_device_extension_table supported_extensions; @@ -951,7 +1145,7 @@ struct anv_app_info { }; struct anv_instance { - VK_LOADER_DATA _loader_data; + struct vk_object_base base; VkAllocationCallbacks alloc; @@ -959,14 +1153,18 @@ struct anv_instance { struct anv_instance_extension_table enabled_extensions; struct anv_instance_dispatch_table dispatch; + struct anv_physical_device_dispatch_table physical_device_dispatch; struct anv_device_dispatch_table device_dispatch; - int physicalDeviceCount; - struct anv_physical_device physicalDevice; + bool physical_devices_enumerated; + struct list_head physical_devices; bool pipeline_cache_enabled; struct vk_debug_report_instance debug_report_callbacks; + + struct driOptionCache dri_options; + struct driOptionCache available_dri_options; }; VkResult anv_init_wsi(struct anv_physical_device *physical_device); @@ -976,21 +1174,78 @@ uint32_t anv_physical_device_api_version(struct anv_physical_device *dev); bool anv_physical_device_extension_supported(struct anv_physical_device *dev, const char *name); +struct anv_queue_submit { + struct anv_cmd_buffer * cmd_buffer; + + uint32_t fence_count; + uint32_t fence_array_length; + struct drm_i915_gem_exec_fence * fences; + + uint32_t temporary_semaphore_count; + uint32_t temporary_semaphore_array_length; + struct anv_semaphore_impl * temporary_semaphores; + + /* Semaphores to be signaled with a SYNC_FD. */ + struct anv_semaphore ** sync_fd_semaphores; + uint32_t sync_fd_semaphore_count; + uint32_t sync_fd_semaphore_array_length; + + /* Allocated only with non shareable timelines. */ + struct anv_timeline ** wait_timelines; + uint32_t wait_timeline_count; + uint32_t wait_timeline_array_length; + uint64_t * wait_timeline_values; + + struct anv_timeline ** signal_timelines; + uint32_t signal_timeline_count; + uint32_t signal_timeline_array_length; + uint64_t * signal_timeline_values; + + int in_fence; + bool need_out_fence; + int out_fence; + + uint32_t fence_bo_count; + uint32_t fence_bo_array_length; + /* An array of struct anv_bo pointers with lower bit used as a flag to + * signal we will wait on that BO (see anv_(un)pack_ptr). + */ + uintptr_t * fence_bos; + + int perf_query_pass; + + const VkAllocationCallbacks * alloc; + VkSystemAllocationScope alloc_scope; + + struct anv_bo * simple_bo; + uint32_t simple_bo_size; + + struct list_head link; +}; + struct anv_queue { - VK_LOADER_DATA _loader_data; + struct vk_object_base base; struct anv_device * device; + /* + * A list of struct anv_queue_submit to be submitted to i915. + */ + struct list_head queued_submits; + VkDeviceQueueCreateFlags flags; }; struct anv_pipeline_cache { + struct vk_object_base base; struct anv_device * device; pthread_mutex_t mutex; struct hash_table * nir_cache; struct hash_table * cache; + + bool external_sync; }; struct nir_xfb_info; @@ -998,7 +1253,8 @@ struct anv_pipeline_bind_map; void anv_pipeline_cache_init(struct anv_pipeline_cache *cache, struct anv_device *device, - bool cache_enabled); + bool cache_enabled, + bool external_sync); void anv_pipeline_cache_finish(struct anv_pipeline_cache *cache); struct anv_shader_bin * @@ -1006,29 +1262,36 @@ anv_pipeline_cache_search(struct anv_pipeline_cache *cache, const void *key, uint32_t key_size); struct anv_shader_bin * anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, + gl_shader_stage stage, const void *key_data, uint32_t key_size, const void *kernel_data, uint32_t kernel_size, const void *constant_data, uint32_t constant_data_size, const struct brw_stage_prog_data *prog_data, uint32_t prog_data_size, + const struct brw_compile_stats *stats, + uint32_t num_stats, const struct nir_xfb_info *xfb_info, const struct anv_pipeline_bind_map *bind_map); struct anv_shader_bin * anv_device_search_for_kernel(struct anv_device *device, struct anv_pipeline_cache *cache, - const void *key_data, uint32_t key_size); + const void *key_data, uint32_t key_size, + bool *user_cache_bit); struct anv_shader_bin * anv_device_upload_kernel(struct anv_device *device, struct anv_pipeline_cache *cache, + gl_shader_stage stage, const void *key_data, uint32_t key_size, const void *kernel_data, uint32_t kernel_size, const void *constant_data, uint32_t constant_data_size, const struct brw_stage_prog_data *prog_data, uint32_t prog_data_size, + const struct brw_compile_stats *stats, + uint32_t num_stats, const struct nir_xfb_info *xfb_info, const struct anv_pipeline_bind_map *bind_map); @@ -1048,13 +1311,15 @@ anv_device_upload_nir(struct anv_device *device, const struct nir_shader *nir, unsigned char sha1_key[20]); -struct anv_device { - VK_LOADER_DATA _loader_data; +struct anv_address { + struct anv_bo *bo; + uint32_t offset; +}; - VkAllocationCallbacks alloc; +struct anv_device { + struct vk_device vk; - struct anv_instance * instance; - uint32_t chipset_id; + struct anv_physical_device * physical; bool no_hw; struct gen_device_info info; struct isl_device isl_dev; @@ -1067,9 +1332,11 @@ struct anv_device { pthread_mutex_t vma_mutex; struct util_vma_heap vma_lo; + struct util_vma_heap vma_cva; struct util_vma_heap vma_hi; - uint64_t vma_lo_available; - uint64_t vma_hi_available; + + /** List of all anv_device_memory objects */ + struct list_head memory_objects; struct anv_bo_pool batch_bo_pool; @@ -1080,31 +1347,64 @@ struct anv_device { struct anv_state_pool binding_table_pool; struct anv_state_pool surface_state_pool; - struct anv_bo workaround_bo; - struct anv_bo trivial_batch_bo; - struct anv_bo hiz_clear_bo; + struct anv_state_reserved_pool custom_border_colors; + + /** BO used for various workarounds + * + * There are a number of workarounds on our hardware which require writing + * data somewhere and it doesn't really matter where. For that, we use + * this BO and just write to the first dword or so. + * + * We also need to be able to handle NULL buffers bound as pushed UBOs. + * For that, we use the high bytes (>= 1024) of the workaround BO. + */ + struct anv_bo * workaround_bo; + struct anv_address workaround_address; + + struct anv_bo * trivial_batch_bo; + struct anv_bo * hiz_clear_bo; + struct anv_state null_surface_state; struct anv_pipeline_cache default_pipeline_cache; struct blorp_context blorp; struct anv_state border_colors; + struct anv_state slice_hash; + struct anv_queue queue; struct anv_scratch_pool scratch_pool; - uint32_t default_mocs; - uint32_t external_mocs; - pthread_mutex_t mutex; pthread_cond_t queue_submit; - bool _lost; + int _lost; + + struct gen_batch_decode_ctx decoder_ctx; + /* + * When decoding a anv_cmd_buffer, we might need to search for BOs through + * the cmd_buffer's list. + */ + struct anv_cmd_buffer *cmd_buffer_being_decoded; + + int perf_fd; /* -1 if no opened */ + uint64_t perf_metric; /* 0 if unset */ + + struct gen_aux_map_context *aux_map_ctx; + + struct gen_debug_block_frame *debug_frame_desc; }; +static inline struct anv_instance * +anv_device_instance_or_null(const struct anv_device *device) +{ + return device ? device->physical->instance : NULL; +} + static inline struct anv_state_pool * anv_binding_table_pool(struct anv_device *device) { - if (device->instance->physicalDevice.use_softpin) + if (device->physical->use_softpin) return &device->binding_table_pool; else return &device->surface_state_pool; @@ -1112,7 +1412,7 @@ anv_binding_table_pool(struct anv_device *device) static inline struct anv_state anv_binding_table_pool_alloc(struct anv_device *device) { - if (device->instance->physicalDevice.use_softpin) + if (device->physical->use_softpin) return anv_state_pool_alloc(&device->binding_table_pool, device->binding_table_pool.block_size, 0); else @@ -1127,38 +1427,123 @@ anv_binding_table_pool_free(struct anv_device *device, struct anv_state state) { static inline uint32_t anv_mocs_for_bo(const struct anv_device *device, const struct anv_bo *bo) { - if (bo->flags & ANV_BO_EXTERNAL) - return device->external_mocs; + if (bo->is_external) + return device->isl_dev.mocs.external; else - return device->default_mocs; + return device->isl_dev.mocs.internal; } void anv_device_init_blorp(struct anv_device *device); void anv_device_finish_blorp(struct anv_device *device); +void _anv_device_set_all_queue_lost(struct anv_device *device); VkResult _anv_device_set_lost(struct anv_device *device, const char *file, int line, - const char *msg, ...); + const char *msg, ...) + anv_printflike(4, 5); +VkResult _anv_queue_set_lost(struct anv_queue *queue, + const char *file, int line, + const char *msg, ...) + anv_printflike(4, 5); #define anv_device_set_lost(dev, ...) \ _anv_device_set_lost(dev, __FILE__, __LINE__, __VA_ARGS__) +#define anv_queue_set_lost(queue, ...) \ + _anv_queue_set_lost(queue, __FILE__, __LINE__, __VA_ARGS__) static inline bool anv_device_is_lost(struct anv_device *device) { - return unlikely(device->_lost); + return unlikely(p_atomic_read(&device->_lost)); } -VkResult anv_device_execbuf(struct anv_device *device, - struct drm_i915_gem_execbuffer2 *execbuf, - struct anv_bo **execbuf_bos); VkResult anv_device_query_status(struct anv_device *device); + + +enum anv_bo_alloc_flags { + /** Specifies that the BO must have a 32-bit address + * + * This is the opposite of EXEC_OBJECT_SUPPORTS_48B_ADDRESS. + */ + ANV_BO_ALLOC_32BIT_ADDRESS = (1 << 0), + + /** Specifies that the BO may be shared externally */ + ANV_BO_ALLOC_EXTERNAL = (1 << 1), + + /** Specifies that the BO should be mapped */ + ANV_BO_ALLOC_MAPPED = (1 << 2), + + /** Specifies that the BO should be snooped so we get coherency */ + ANV_BO_ALLOC_SNOOPED = (1 << 3), + + /** Specifies that the BO should be captured in error states */ + ANV_BO_ALLOC_CAPTURE = (1 << 4), + + /** Specifies that the BO will have an address assigned by the caller + * + * Such BOs do not exist in any VMA heap. + */ + ANV_BO_ALLOC_FIXED_ADDRESS = (1 << 5), + + /** Enables implicit synchronization on the BO + * + * This is the opposite of EXEC_OBJECT_ASYNC. + */ + ANV_BO_ALLOC_IMPLICIT_SYNC = (1 << 6), + + /** Enables implicit synchronization on the BO + * + * This is equivalent to EXEC_OBJECT_WRITE. + */ + ANV_BO_ALLOC_IMPLICIT_WRITE = (1 << 7), + + /** Has an address which is visible to the client */ + ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS = (1 << 8), + + /** This buffer has implicit CCS data attached to it */ + ANV_BO_ALLOC_IMPLICIT_CCS = (1 << 9), +}; + +VkResult anv_device_alloc_bo(struct anv_device *device, uint64_t size, + enum anv_bo_alloc_flags alloc_flags, + uint64_t explicit_address, + struct anv_bo **bo); +VkResult anv_device_import_bo_from_host_ptr(struct anv_device *device, + void *host_ptr, uint32_t size, + enum anv_bo_alloc_flags alloc_flags, + uint64_t client_address, + struct anv_bo **bo_out); +VkResult anv_device_import_bo(struct anv_device *device, int fd, + enum anv_bo_alloc_flags alloc_flags, + uint64_t client_address, + struct anv_bo **bo); +VkResult anv_device_export_bo(struct anv_device *device, + struct anv_bo *bo, int *fd_out); +void anv_device_release_bo(struct anv_device *device, + struct anv_bo *bo); + +static inline struct anv_bo * +anv_device_lookup_bo(struct anv_device *device, uint32_t gem_handle) +{ + return util_sparse_array_get(&device->bo_cache.bo_map, gem_handle); +} + VkResult anv_device_bo_busy(struct anv_device *device, struct anv_bo *bo); VkResult anv_device_wait(struct anv_device *device, struct anv_bo *bo, int64_t timeout); +VkResult anv_queue_init(struct anv_device *device, struct anv_queue *queue); +void anv_queue_finish(struct anv_queue *queue); + +VkResult anv_queue_execbuf_locked(struct anv_queue *queue, struct anv_queue_submit *submit); +VkResult anv_queue_submit_simple_batch(struct anv_queue *queue, + struct anv_batch *batch); + +uint64_t anv_gettime_ns(void); +uint64_t anv_get_absolute_timeout(uint64_t timeout); + void* anv_gem_mmap(struct anv_device *device, uint32_t gem_handle, uint64_t offset, uint64_t size, uint32_t flags); -void anv_gem_munmap(void *p, uint64_t size); +void anv_gem_munmap(struct anv_device *device, void *p, uint64_t size); uint32_t anv_gem_create(struct anv_device *device, uint64_t size); void anv_gem_close(struct anv_device *device, uint32_t gem_handle); uint32_t anv_gem_userptr(struct anv_device *device, void *mem, size_t size); @@ -1178,12 +1563,10 @@ int anv_gem_get_context_param(int fd, int context, uint32_t param, int anv_gem_get_param(int fd, uint32_t param); int anv_gem_get_tiling(struct anv_device *device, uint32_t gem_handle); bool anv_gem_get_bit6_swizzle(int fd, uint32_t tiling); -int anv_gem_get_aperture(int fd, uint64_t *size); int anv_gem_gpu_get_reset_stats(struct anv_device *device, uint32_t *active, uint32_t *pending); int anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle); -int anv_gem_reg_read(struct anv_device *device, - uint32_t offset, uint64_t *result); +int anv_gem_reg_read(int fd, uint32_t offset, uint64_t *result); uint32_t anv_gem_fd_to_handle(struct anv_device *device, int fd); int anv_gem_set_caching(struct anv_device *device, uint32_t gem_handle, uint32_t caching); int anv_gem_set_domain(struct anv_device *device, uint32_t gem_handle, @@ -1203,17 +1586,20 @@ int anv_gem_syncobj_wait(struct anv_device *device, uint32_t *handles, uint32_t num_handles, int64_t abs_timeout_ns, bool wait_all); -bool anv_vma_alloc(struct anv_device *device, struct anv_bo *bo); -void anv_vma_free(struct anv_device *device, struct anv_bo *bo); - -VkResult anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size); +uint64_t anv_vma_alloc(struct anv_device *device, + uint64_t size, uint64_t align, + enum anv_bo_alloc_flags alloc_flags, + uint64_t client_address); +void anv_vma_free(struct anv_device *device, + uint64_t address, uint64_t size); struct anv_reloc_list { uint32_t num_relocs; uint32_t array_length; struct drm_i915_gem_relocation_entry * relocs; struct anv_bo ** reloc_bos; - struct set * deps; + uint32_t dep_words; + BITSET_WORD * deps; }; VkResult anv_reloc_list_init(struct anv_reloc_list *list, @@ -1224,13 +1610,13 @@ void anv_reloc_list_finish(struct anv_reloc_list *list, VkResult anv_reloc_list_add(struct anv_reloc_list *list, const VkAllocationCallbacks *alloc, uint32_t offset, struct anv_bo *target_bo, - uint32_t delta); + uint32_t delta, uint64_t *address_u64_out); struct anv_batch_bo { /* Link in the anv_cmd_buffer.owned_batch_bos list */ struct list_head link; - struct anv_bo bo; + struct anv_bo * bo; /* Bytes actually consumed in this batch BO */ uint32_t length; @@ -1241,6 +1627,8 @@ struct anv_batch_bo { struct anv_batch { const VkAllocationCallbacks * alloc; + struct anv_address start_addr; + void * start; void * end; void * next; @@ -1267,8 +1655,16 @@ void *anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords); void anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other); uint64_t anv_batch_emit_reloc(struct anv_batch *batch, void *location, struct anv_bo *bo, uint32_t offset); -VkResult anv_device_submit_simple_batch(struct anv_device *device, - struct anv_batch *batch); +struct anv_address anv_batch_address(struct anv_batch *batch, void *batch_location); + +static inline void +anv_batch_set_storage(struct anv_batch *batch, struct anv_address addr, + void *map, size_t size) +{ + batch->start_addr = addr; + batch->next = batch->start = map; + batch->end = map + size; +} static inline VkResult anv_batch_set_error(struct anv_batch *batch, VkResult error) @@ -1285,11 +1681,6 @@ anv_batch_has_error(struct anv_batch *batch) return batch->status != VK_SUCCESS; } -struct anv_address { - struct anv_bo *bo; - uint32_t offset; -}; - #define ANV_NULL_ADDRESS ((struct anv_address) { NULL, 0 }) static inline bool @@ -1402,52 +1793,16 @@ _anv_combine_address(struct anv_batch *batch, void *location, _dst = NULL; \ })) -/* MEMORY_OBJECT_CONTROL_STATE: - * .GraphicsDataTypeGFDT = 0, - * .LLCCacheabilityControlLLCCC = 0, - * .L3CacheabilityControlL3CC = 1, - */ -#define GEN7_MOCS 1 - -/* MEMORY_OBJECT_CONTROL_STATE: - * .LLCeLLCCacheabilityControlLLCCC = 0, - * .L3CacheabilityControlL3CC = 1, - */ -#define GEN75_MOCS 1 - -/* MEMORY_OBJECT_CONTROL_STATE: - * .MemoryTypeLLCeLLCCacheabilityControl = WB, - * .TargetCache = L3DefertoPATforLLCeLLCselection, - * .AgeforQUADLRU = 0 - */ -#define GEN8_MOCS 0x78 - -/* MEMORY_OBJECT_CONTROL_STATE: - * .MemoryTypeLLCeLLCCacheabilityControl = UCwithFenceifcoherentcycle, - * .TargetCache = L3DefertoPATforLLCeLLCselection, - * .AgeforQUADLRU = 0 - */ -#define GEN8_EXTERNAL_MOCS 0x18 - -/* Skylake: MOCS is now an index into an array of 62 different caching - * configurations programmed by the kernel. - */ - -/* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */ -#define GEN9_MOCS 2 - -/* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */ -#define GEN9_EXTERNAL_MOCS 1 +/* #define __gen_get_batch_dwords anv_batch_emit_dwords */ +/* #define __gen_get_batch_address anv_batch_address */ +/* #define __gen_address_value anv_address_physical */ +/* #define __gen_address_offset anv_address_add */ -/* Cannonlake MOCS defines are duplicates of Skylake MOCS defines. */ -#define GEN10_MOCS GEN9_MOCS -#define GEN10_EXTERNAL_MOCS GEN9_EXTERNAL_MOCS +struct anv_device_memory { + struct vk_object_base base; -/* Ice Lake MOCS defines are duplicates of Skylake MOCS defines. */ -#define GEN11_MOCS GEN9_MOCS -#define GEN11_EXTERNAL_MOCS GEN9_EXTERNAL_MOCS + struct list_head link; -struct anv_device_memory { struct anv_bo * bo; struct anv_memory_type * type; VkDeviceSize map_size; @@ -1457,6 +1812,9 @@ struct anv_device_memory { * which we must release when memory is freed. */ struct AHardwareBuffer * ahw; + + /* If set, this memory comes from a host pointer. */ + void * host_ptr; }; /** @@ -1469,13 +1827,101 @@ struct anv_vue_header { float PointWidth; }; +/** Struct representing a sampled image descriptor + * + * This descriptor layout is used for sampled images, bare sampler, and + * combined image/sampler descriptors. + */ +struct anv_sampled_image_descriptor { + /** Bindless image handle + * + * This is expected to already be shifted such that the 20-bit + * SURFACE_STATE table index is in the top 20 bits. + */ + uint32_t image; + + /** Bindless sampler handle + * + * This is assumed to be a 32B-aligned SAMPLER_STATE pointer relative + * to the dynamic state base address. + */ + uint32_t sampler; +}; + +struct anv_texture_swizzle_descriptor { + /** Texture swizzle + * + * See also nir_intrinsic_channel_select_intel + */ + uint8_t swizzle[4]; + + /** Unused padding to ensure the struct is a multiple of 64 bits */ + uint32_t _pad; +}; + +/** Struct representing a storage image descriptor */ +struct anv_storage_image_descriptor { + /** Bindless image handles + * + * These are expected to already be shifted such that the 20-bit + * SURFACE_STATE table index is in the top 20 bits. + */ + uint32_t read_write; + uint32_t write_only; +}; + +/** Struct representing a address/range descriptor + * + * The fields of this struct correspond directly to the data layout of + * nir_address_format_64bit_bounded_global addresses. The last field is the + * offset in the NIR address so it must be zero so that when you load the + * descriptor you get a pointer to the start of the range. + */ +struct anv_address_range_descriptor { + uint64_t address; + uint32_t range; + uint32_t zero; +}; + +enum anv_descriptor_data { + /** The descriptor contains a BTI reference to a surface state */ + ANV_DESCRIPTOR_SURFACE_STATE = (1 << 0), + /** The descriptor contains a BTI reference to a sampler state */ + ANV_DESCRIPTOR_SAMPLER_STATE = (1 << 1), + /** The descriptor contains an actual buffer view */ + ANV_DESCRIPTOR_BUFFER_VIEW = (1 << 2), + /** The descriptor contains auxiliary image layout data */ + ANV_DESCRIPTOR_IMAGE_PARAM = (1 << 3), + /** The descriptor contains auxiliary image layout data */ + ANV_DESCRIPTOR_INLINE_UNIFORM = (1 << 4), + /** anv_address_range_descriptor with a buffer address and range */ + ANV_DESCRIPTOR_ADDRESS_RANGE = (1 << 5), + /** Bindless surface handle */ + ANV_DESCRIPTOR_SAMPLED_IMAGE = (1 << 6), + /** Storage image handles */ + ANV_DESCRIPTOR_STORAGE_IMAGE = (1 << 7), + /** Storage image handles */ + ANV_DESCRIPTOR_TEXTURE_SWIZZLE = (1 << 8), +}; + struct anv_descriptor_set_binding_layout { #ifndef NDEBUG /* The type of the descriptors in this binding */ VkDescriptorType type; #endif - /* Number of array elements in this binding */ + /* Flags provided when this binding was created */ + VkDescriptorBindingFlagsEXT flags; + + /* Bitfield representing the type of data this descriptor contains */ + enum anv_descriptor_data data; + + /* Maximum number of YCbCr texture/sampler planes */ + uint8_t max_plane_count; + + /* Number of array elements in this binding (or size in bytes for inline + * uniform data) + */ uint16_t array_size; /* Index into the flattend descriptor set */ @@ -1485,24 +1931,31 @@ struct anv_descriptor_set_binding_layout { int16_t dynamic_offset_index; /* Index into the descriptor set buffer views */ - int16_t buffer_index; + int16_t buffer_view_index; - struct { - /* Index into the binding table for the associated surface */ - int16_t surface_index; - - /* Index into the sampler table for the associated sampler */ - int16_t sampler_index; - - /* Index into the image table for the associated image */ - int16_t image_index; - } stage[MESA_SHADER_STAGES]; + /* Offset into the descriptor buffer where this descriptor lives */ + uint32_t descriptor_offset; /* Immutable samplers (or NULL if no immutable samplers) */ struct anv_sampler **immutable_samplers; }; +unsigned anv_descriptor_size(const struct anv_descriptor_set_binding_layout *layout); + +unsigned anv_descriptor_type_size(const struct anv_physical_device *pdevice, + VkDescriptorType type); + +bool anv_descriptor_supports_bindless(const struct anv_physical_device *pdevice, + const struct anv_descriptor_set_binding_layout *binding, + bool sampler); + +bool anv_descriptor_requires_bindless(const struct anv_physical_device *pdevice, + const struct anv_descriptor_set_binding_layout *binding, + bool sampler); + struct anv_descriptor_set_layout { + struct vk_object_base base; + /* Descriptor set layouts can be destroyed at almost any time */ uint32_t ref_cnt; @@ -1515,16 +1968,25 @@ struct anv_descriptor_set_layout { /* Shader stages affected by this descriptor set */ uint16_t shader_stages; - /* Number of buffers in this descriptor set */ - uint16_t buffer_count; + /* Number of buffer views in this descriptor set */ + uint16_t buffer_view_count; /* Number of dynamic offsets used by this descriptor set */ uint16_t dynamic_offset_count; + /* For each shader stage, which offsets apply to that stage */ + uint16_t stage_dynamic_offsets[MESA_SHADER_STAGES]; + + /* Size of the descriptor buffer for this descriptor set */ + uint32_t descriptor_buffer_size; + /* Bindings in this descriptor set */ struct anv_descriptor_set_binding_layout binding[0]; }; +void anv_descriptor_set_layout_destroy(struct anv_device *device, + struct anv_descriptor_set_layout *layout); + static inline void anv_descriptor_set_layout_ref(struct anv_descriptor_set_layout *layout) { @@ -1538,7 +2000,7 @@ anv_descriptor_set_layout_unref(struct anv_device *device, { assert(layout && layout->ref_cnt >= 1); if (p_atomic_dec_zero(&layout->ref_cnt)) - vk_free(&device->alloc, layout); + anv_descriptor_set_layout_destroy(device, layout); } struct anv_descriptor { @@ -1562,14 +2024,33 @@ struct anv_descriptor { }; struct anv_descriptor_set { + struct vk_object_base base; + + struct anv_descriptor_pool *pool; struct anv_descriptor_set_layout *layout; + + /* Amount of space occupied in the the pool by this descriptor set. It can + * be larger than the size of the descriptor set. + */ uint32_t size; - uint32_t buffer_count; + + /* State relative to anv_descriptor_pool::bo */ + struct anv_state desc_mem; + /* Surface state for the descriptor buffer */ + struct anv_state desc_surface_state; + + uint32_t buffer_view_count; struct anv_buffer_view *buffer_views; + + /* Link to descriptor pool's desc_sets list . */ + struct list_head pool_link; + struct anv_descriptor descriptors[0]; }; struct anv_buffer_view { + struct vk_object_base base; + enum isl_format format; /**< VkBufferViewCreateInfo::format */ uint64_t range; /**< VkBufferViewCreateInfo::range */ @@ -1588,17 +2069,30 @@ struct anv_push_descriptor_set { /* Put this field right behind anv_descriptor_set so it fills up the * descriptors[0] field. */ struct anv_descriptor descriptors[MAX_PUSH_DESCRIPTORS]; + + /** True if the descriptor set buffer has been referenced by a draw or + * dispatch command. + */ + bool set_used_on_gpu; + struct anv_buffer_view buffer_views[MAX_PUSH_DESCRIPTORS]; }; struct anv_descriptor_pool { + struct vk_object_base base; + uint32_t size; uint32_t next; uint32_t free_list; + struct anv_bo *bo; + struct util_vma_heap bo_heap; + struct anv_state_stream surface_state_stream; void *surface_state_free_list; + struct list_head desc_sets; + char data[0]; }; @@ -1629,6 +2123,8 @@ struct anv_descriptor_template_entry { }; struct anv_descriptor_update_template { + struct vk_object_base base; + VkPipelineBindPoint bind_point; /* The descriptor set this template corresponds to. This value is only @@ -1648,23 +2144,24 @@ size_t anv_descriptor_set_layout_size(const struct anv_descriptor_set_layout *layout); void -anv_descriptor_set_write_image_view(struct anv_descriptor_set *set, - const struct gen_device_info * const devinfo, +anv_descriptor_set_write_image_view(struct anv_device *device, + struct anv_descriptor_set *set, const VkDescriptorImageInfo * const info, VkDescriptorType type, uint32_t binding, uint32_t element); void -anv_descriptor_set_write_buffer_view(struct anv_descriptor_set *set, +anv_descriptor_set_write_buffer_view(struct anv_device *device, + struct anv_descriptor_set *set, VkDescriptorType type, struct anv_buffer_view *buffer_view, uint32_t binding, uint32_t element); void -anv_descriptor_set_write_buffer(struct anv_descriptor_set *set, - struct anv_device *device, +anv_descriptor_set_write_buffer(struct anv_device *device, + struct anv_descriptor_set *set, struct anv_state_stream *alloc_stream, VkDescriptorType type, struct anv_buffer *buffer, @@ -1672,10 +2169,17 @@ anv_descriptor_set_write_buffer(struct anv_descriptor_set *set, uint32_t element, VkDeviceSize offset, VkDeviceSize range); +void +anv_descriptor_set_write_inline_uniform_data(struct anv_device *device, + struct anv_descriptor_set *set, + uint32_t binding, + const void *data, + size_t offset, + size_t size); void -anv_descriptor_set_write_template(struct anv_descriptor_set *set, - struct anv_device *device, +anv_descriptor_set_write_template(struct anv_device *device, + struct anv_descriptor_set *set, struct anv_state_stream *alloc_stream, const struct anv_descriptor_update_template *template, const void *data); @@ -1691,33 +2195,68 @@ anv_descriptor_set_destroy(struct anv_device *device, struct anv_descriptor_pool *pool, struct anv_descriptor_set *set); +#define ANV_DESCRIPTOR_SET_NULL (UINT8_MAX - 5) +#define ANV_DESCRIPTOR_SET_PUSH_CONSTANTS (UINT8_MAX - 4) +#define ANV_DESCRIPTOR_SET_DESCRIPTORS (UINT8_MAX - 3) +#define ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS (UINT8_MAX - 2) #define ANV_DESCRIPTOR_SET_SHADER_CONSTANTS (UINT8_MAX - 1) #define ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS UINT8_MAX struct anv_pipeline_binding { - /* The descriptor set this surface corresponds to. The special value of - * ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS indicates that the offset refers - * to a color attachment and not a regular descriptor. + /** Index in the descriptor set + * + * This is a flattened index; the descriptor set layout is already taken + * into account. + */ + uint32_t index; + + /** The descriptor set this surface corresponds to. + * + * The special ANV_DESCRIPTOR_SET_* values above indicates that this + * binding is not a normal descriptor set but something else. */ uint8_t set; - /* Binding in the descriptor set */ - uint32_t binding; + union { + /** Plane in the binding index for images */ + uint8_t plane; + + /** Input attachment index (relative to the subpass) */ + uint8_t input_attachment_index; + + /** Dynamic offset index (for dynamic UBOs and SSBOs) */ + uint8_t dynamic_offset_index; + }; + + /** For a storage image, whether it is write-only */ + uint8_t write_only; - /* Index in the binding */ + /** Pad to 64 bits so that there are no holes and we can safely memcmp + * assuming POD zero-initialization. + */ + uint8_t pad; +}; + +struct anv_push_range { + /** Index in the descriptor set */ uint32_t index; - /* Plane in the binding index */ - uint8_t plane; + /** Descriptor set index */ + uint8_t set; - /* Input attachment index (relative to the subpass) */ - uint8_t input_attachment_index; + /** Dynamic offset index (for dynamic UBOs) */ + uint8_t dynamic_offset_index; - /* For a storage image, whether it is write-only */ - bool write_only; + /** Start offset in units of 32B */ + uint8_t start; + + /** Range in units of 32B */ + uint8_t length; }; struct anv_pipeline_layout { + struct vk_object_base base; + struct { struct anv_descriptor_set_layout *layout; uint32_t dynamic_offset_start; @@ -1725,14 +2264,12 @@ struct anv_pipeline_layout { uint32_t num_sets; - struct { - bool has_dynamic_offsets; - } stage[MESA_SHADER_STAGES]; - unsigned char sha1[20]; }; struct anv_buffer { + struct vk_object_base base; + struct anv_device * device; VkDeviceSize size; @@ -1756,22 +2293,108 @@ anv_buffer_get_range(struct anv_buffer *buffer, uint64_t offset, uint64_t range) } enum anv_cmd_dirty_bits { - ANV_CMD_DIRTY_DYNAMIC_VIEWPORT = 1 << 0, /* VK_DYNAMIC_STATE_VIEWPORT */ - ANV_CMD_DIRTY_DYNAMIC_SCISSOR = 1 << 1, /* VK_DYNAMIC_STATE_SCISSOR */ - ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH = 1 << 2, /* VK_DYNAMIC_STATE_LINE_WIDTH */ - ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS = 1 << 3, /* VK_DYNAMIC_STATE_DEPTH_BIAS */ - ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS = 1 << 4, /* VK_DYNAMIC_STATE_BLEND_CONSTANTS */ - ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS = 1 << 5, /* VK_DYNAMIC_STATE_DEPTH_BOUNDS */ - ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK = 1 << 6, /* VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK */ - ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK = 1 << 7, /* VK_DYNAMIC_STATE_STENCIL_WRITE_MASK */ - ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE = 1 << 8, /* VK_DYNAMIC_STATE_STENCIL_REFERENCE */ - ANV_CMD_DIRTY_DYNAMIC_ALL = (1 << 9) - 1, - ANV_CMD_DIRTY_PIPELINE = 1 << 9, - ANV_CMD_DIRTY_INDEX_BUFFER = 1 << 10, - ANV_CMD_DIRTY_RENDER_TARGETS = 1 << 11, + ANV_CMD_DIRTY_DYNAMIC_VIEWPORT = 1 << 0, /* VK_DYNAMIC_STATE_VIEWPORT */ + ANV_CMD_DIRTY_DYNAMIC_SCISSOR = 1 << 1, /* VK_DYNAMIC_STATE_SCISSOR */ + ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH = 1 << 2, /* VK_DYNAMIC_STATE_LINE_WIDTH */ + ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS = 1 << 3, /* VK_DYNAMIC_STATE_DEPTH_BIAS */ + ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS = 1 << 4, /* VK_DYNAMIC_STATE_BLEND_CONSTANTS */ + ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS = 1 << 5, /* VK_DYNAMIC_STATE_DEPTH_BOUNDS */ + ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK = 1 << 6, /* VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK */ + ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK = 1 << 7, /* VK_DYNAMIC_STATE_STENCIL_WRITE_MASK */ + ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE = 1 << 8, /* VK_DYNAMIC_STATE_STENCIL_REFERENCE */ + ANV_CMD_DIRTY_PIPELINE = 1 << 9, + ANV_CMD_DIRTY_INDEX_BUFFER = 1 << 10, + ANV_CMD_DIRTY_RENDER_TARGETS = 1 << 11, + ANV_CMD_DIRTY_XFB_ENABLE = 1 << 12, + ANV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE = 1 << 13, /* VK_DYNAMIC_STATE_LINE_STIPPLE_EXT */ + ANV_CMD_DIRTY_DYNAMIC_CULL_MODE = 1 << 14, /* VK_DYNAMIC_STATE_CULL_MODE_EXT */ + ANV_CMD_DIRTY_DYNAMIC_FRONT_FACE = 1 << 15, /* VK_DYNAMIC_STATE_FRONT_FACE_EXT */ + ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY = 1 << 16, /* VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY_EXT */ + ANV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE = 1 << 17, /* VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT */ + ANV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE = 1 << 18, /* VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT */ + ANV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE = 1 << 19, /* VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT */ + ANV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP = 1 << 20, /* VK_DYNAMIC_STATE_DEPTH_COMPARE_OP_EXT */ + ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE = 1 << 21, /* VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE_EXT */ + ANV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE = 1 << 22, /* VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT */ + ANV_CMD_DIRTY_DYNAMIC_STENCIL_OP = 1 << 23, /* VK_DYNAMIC_STATE_STENCIL_OP_EXT */ }; typedef uint32_t anv_cmd_dirty_mask_t; +#define ANV_CMD_DIRTY_DYNAMIC_ALL \ + (ANV_CMD_DIRTY_DYNAMIC_VIEWPORT | \ + ANV_CMD_DIRTY_DYNAMIC_SCISSOR | \ + ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH | \ + ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS | \ + ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS | \ + ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS | \ + ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK | \ + ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK | \ + ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE | \ + ANV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE | \ + ANV_CMD_DIRTY_DYNAMIC_CULL_MODE | \ + ANV_CMD_DIRTY_DYNAMIC_FRONT_FACE | \ + ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY | \ + ANV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE | \ + ANV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE | \ + ANV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE | \ + ANV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP | \ + ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE | \ + ANV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE | \ + ANV_CMD_DIRTY_DYNAMIC_STENCIL_OP) + +static inline enum anv_cmd_dirty_bits +anv_cmd_dirty_bit_for_vk_dynamic_state(VkDynamicState vk_state) +{ + switch (vk_state) { + case VK_DYNAMIC_STATE_VIEWPORT: + case VK_DYNAMIC_STATE_VIEWPORT_WITH_COUNT_EXT: + return ANV_CMD_DIRTY_DYNAMIC_VIEWPORT; + case VK_DYNAMIC_STATE_SCISSOR: + case VK_DYNAMIC_STATE_SCISSOR_WITH_COUNT_EXT: + return ANV_CMD_DIRTY_DYNAMIC_SCISSOR; + case VK_DYNAMIC_STATE_LINE_WIDTH: + return ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH; + case VK_DYNAMIC_STATE_DEPTH_BIAS: + return ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS; + case VK_DYNAMIC_STATE_BLEND_CONSTANTS: + return ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS; + case VK_DYNAMIC_STATE_DEPTH_BOUNDS: + return ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS; + case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK: + return ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK; + case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK: + return ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK; + case VK_DYNAMIC_STATE_STENCIL_REFERENCE: + return ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE; + case VK_DYNAMIC_STATE_LINE_STIPPLE_EXT: + return ANV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE; + case VK_DYNAMIC_STATE_CULL_MODE_EXT: + return ANV_CMD_DIRTY_DYNAMIC_CULL_MODE; + case VK_DYNAMIC_STATE_FRONT_FACE_EXT: + return ANV_CMD_DIRTY_DYNAMIC_FRONT_FACE; + case VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY_EXT: + return ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY; + case VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT: + return ANV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE; + case VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT: + return ANV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE; + case VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT: + return ANV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE; + case VK_DYNAMIC_STATE_DEPTH_COMPARE_OP_EXT: + return ANV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP; + case VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE_EXT: + return ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE; + case VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT: + return ANV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE; + case VK_DYNAMIC_STATE_STENCIL_OP_EXT: + return ANV_CMD_DIRTY_DYNAMIC_STENCIL_OP; + default: + assert(!"Unsupported dynamic state"); + return 0; + } +} + + enum anv_pipe_bits { ANV_PIPE_DEPTH_CACHE_FLUSH_BIT = (1 << 0), ANV_PIPE_STALL_AT_SCOREBOARD_BIT = (1 << 1), @@ -1779,18 +2402,20 @@ enum anv_pipe_bits { ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT = (1 << 3), ANV_PIPE_VF_CACHE_INVALIDATE_BIT = (1 << 4), ANV_PIPE_DATA_CACHE_FLUSH_BIT = (1 << 5), + ANV_PIPE_TILE_CACHE_FLUSH_BIT = (1 << 6), ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT = (1 << 10), ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT = (1 << 11), ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT = (1 << 12), ANV_PIPE_DEPTH_STALL_BIT = (1 << 13), ANV_PIPE_CS_STALL_BIT = (1 << 20), + ANV_PIPE_END_OF_PIPE_SYNC_BIT = (1 << 21), /* This bit does not exist directly in PIPE_CONTROL. Instead it means that * a flush has happened but not a CS stall. The next time we do any sort * of invalidation we need to insert a CS stall at that time. Otherwise, * we would have to CS stall on every flush which could be bad. */ - ANV_PIPE_NEEDS_CS_STALL_BIT = (1 << 21), + ANV_PIPE_NEEDS_END_OF_PIPE_SYNC_BIT = (1 << 22), /* This bit does not exist directly in PIPE_CONTROL. It means that render * target operations related to transfer commands with VkBuffer as @@ -1798,13 +2423,26 @@ enum anv_pipe_bits { * streamer might need to be aware of this to trigger the appropriate stall * before they can proceed with the copy. */ - ANV_PIPE_RENDER_TARGET_BUFFER_WRITES = (1 << 22), + ANV_PIPE_RENDER_TARGET_BUFFER_WRITES = (1 << 23), + + /* This bit does not exist directly in PIPE_CONTROL. It means that Gen12 + * AUX-TT data has changed and we need to invalidate AUX-TT data. This is + * done by writing the AUX-TT register. + */ + ANV_PIPE_AUX_TABLE_INVALIDATE_BIT = (1 << 24), + + /* This bit does not exist directly in PIPE_CONTROL. It means that a + * PIPE_CONTROL with a post-sync operation will follow. This is used to + * implement a workaround for Gen9. + */ + ANV_PIPE_POST_SYNC_BIT = (1 << 25), }; #define ANV_PIPE_FLUSH_BITS ( \ ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | \ ANV_PIPE_DATA_CACHE_FLUSH_BIT | \ - ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT) + ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | \ + ANV_PIPE_TILE_CACHE_FLUSH_BIT) #define ANV_PIPE_STALL_BITS ( \ ANV_PIPE_STALL_AT_SCOREBOARD_BIT | \ @@ -1817,7 +2455,8 @@ enum anv_pipe_bits { ANV_PIPE_VF_CACHE_INVALIDATE_BIT | \ ANV_PIPE_DATA_CACHE_FLUSH_BIT | \ ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT | \ - ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT) + ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT | \ + ANV_PIPE_AUX_TABLE_INVALIDATE_BIT) static inline enum anv_pipe_bits anv_pipe_flush_bits_for_access_flags(VkAccessFlags flags) @@ -1970,26 +2609,42 @@ anv_pipe_invalidate_bits_for_access_flags(VkAccessFlags flags) struct anv_vertex_binding { struct anv_buffer * buffer; VkDeviceSize offset; + VkDeviceSize stride; + VkDeviceSize size; }; -#define ANV_PARAM_PUSH(offset) ((1 << 16) | (uint32_t)(offset)) -#define ANV_PARAM_PUSH_OFFSET(param) ((param) & 0xffff) +struct anv_xfb_binding { + struct anv_buffer * buffer; + VkDeviceSize offset; + VkDeviceSize size; +}; struct anv_push_constants { - /* Current allocated size of this push constants data structure. - * Because a decent chunk of it may not be used (images on SKL, for - * instance), we won't actually allocate the entire structure up-front. - */ - uint32_t size; - - /* Push constant data provided by the client through vkPushConstants */ + /** Push constant data provided by the client through vkPushConstants */ uint8_t client_data[MAX_PUSH_CONSTANTS_SIZE]; - /* Used for vkCmdDispatchBase */ - uint32_t base_work_group_id[3]; + /** Dynamic offsets for dynamic UBOs and SSBOs */ + uint32_t dynamic_offsets[MAX_DYNAMIC_BUFFERS]; + + uint64_t push_reg_mask; + + /** Pad out to a multiple of 32 bytes */ + uint32_t pad[2]; + + struct { + /** Base workgroup ID + * + * Used for vkCmdDispatchBase. + */ + uint32_t base_work_group_id[3]; - /* Image data for image_load_store on pre-SKL */ - struct brw_image_param images[MAX_GEN8_IMAGES]; + /** Subgroup ID + * + * This is never set by software but is implicitly filled out when + * uploading the push constants for compute shaders. + */ + uint32_t subgroup_id; + } cs; }; struct anv_dynamic_state { @@ -2032,13 +2687,44 @@ struct anv_dynamic_state { uint32_t front; uint32_t back; } stencil_reference; + + struct { + struct { + VkStencilOp fail_op; + VkStencilOp pass_op; + VkStencilOp depth_fail_op; + VkCompareOp compare_op; + } front; + struct { + VkStencilOp fail_op; + VkStencilOp pass_op; + VkStencilOp depth_fail_op; + VkCompareOp compare_op; + } back; + } stencil_op; + + struct { + uint32_t factor; + uint16_t pattern; + } line_stipple; + + VkCullModeFlags cull_mode; + VkFrontFace front_face; + VkPrimitiveTopology primitive_topology; + bool depth_test_enable; + bool depth_write_enable; + VkCompareOp depth_compare_op; + bool depth_bounds_test_enable; + bool stencil_test_enable; + bool dyn_vbo_stride; + bool dyn_vbo_size; }; extern const struct anv_dynamic_state default_dynamic_state; -void anv_dynamic_state_copy(struct anv_dynamic_state *dest, - const struct anv_dynamic_state *src, - uint32_t copy_mask); +uint32_t anv_dynamic_state_copy(struct anv_dynamic_state *dest, + const struct anv_dynamic_state *src, + uint32_t copy_mask); struct anv_surface_state { struct anv_state state; @@ -2069,17 +2755,15 @@ struct anv_surface_state { */ struct anv_attachment_state { enum isl_aux_usage aux_usage; - enum isl_aux_usage input_aux_usage; struct anv_surface_state color; struct anv_surface_state input; VkImageLayout current_layout; + VkImageLayout current_stencil_layout; VkImageAspectFlags pending_clear_aspects; VkImageAspectFlags pending_load_aspects; bool fast_clear; VkClearValue clear_value; - bool clear_color_is_zero_one; - bool clear_color_is_zero; /* When multiview is active, attachments with a renderpass clear * operation have their respective layers cleared on the first @@ -2088,6 +2772,28 @@ struct anv_attachment_state { * have not been cleared yet when multiview is active. */ uint32_t pending_clear_views; + struct anv_image_view * image_view; +}; + +/** State tracking for vertex buffer flushes + * + * On Gen8-9, the VF cache only considers the bottom 32 bits of memory + * addresses. If you happen to have two vertex buffers which get placed + * exactly 4 GiB apart and use them in back-to-back draw calls, you can get + * collisions. In order to solve this problem, we track vertex address ranges + * which are live in the cache and invalidate the cache if one ever exceeds 32 + * bits. + */ +struct anv_vb_cache_range { + /* Virtual address at which the live vertex buffer cache range starts for + * this vertex buffer index. + */ + uint64_t start; + + /* Virtual address of the byte after where vertex buffer cache range ends. + * This is exclusive such that end - start is the size of the range. + */ + uint64_t end; }; /** State tracking for particular pipeline bind point @@ -2099,12 +2805,7 @@ struct anv_attachment_state { * per-stage array in anv_cmd_state. */ struct anv_cmd_pipeline_state { - struct anv_pipeline *pipeline; - struct anv_pipeline_layout *layout; - struct anv_descriptor_set *descriptors[MAX_SETS]; - uint32_t dynamic_offsets[MAX_DYNAMIC_BUFFERS]; - struct anv_push_descriptor_set *push_descriptors[MAX_SETS]; }; @@ -2118,11 +2819,20 @@ struct anv_cmd_pipeline_state { struct anv_cmd_graphics_state { struct anv_cmd_pipeline_state base; + struct anv_graphics_pipeline *pipeline; + anv_cmd_dirty_mask_t dirty; uint32_t vb_dirty; + struct anv_vb_cache_range ib_bound_range; + struct anv_vb_cache_range ib_dirty_range; + struct anv_vb_cache_range vb_bound_ranges[33]; + struct anv_vb_cache_range vb_dirty_ranges[33]; + struct anv_dynamic_state dynamic; + uint32_t primitive_topology; + struct { struct anv_buffer *index_buffer; uint32_t index_type; /**< 3DSTATE_INDEX_BUFFER.IndexFormat */ @@ -2140,6 +2850,8 @@ struct anv_cmd_graphics_state { struct anv_cmd_compute_state { struct anv_cmd_pipeline_state base; + struct anv_compute_pipeline *pipeline; + bool pipeline_dirty; struct anv_address num_workgroups; @@ -2150,6 +2862,7 @@ struct anv_cmd_state { /* PIPELINE_SELECT.PipelineSelection */ uint32_t current_pipeline; const struct gen_l3_config * current_l3_config; + uint32_t last_aux_map_state; struct anv_cmd_graphics_state gfx; struct anv_cmd_compute_state compute; @@ -2164,11 +2877,17 @@ struct anv_cmd_state { VkRect2D render_area; uint32_t restart_index; struct anv_vertex_binding vertex_bindings[MAX_VBS]; + bool xfb_enabled; + struct anv_xfb_binding xfb_bindings[MAX_XFB_BUFFERS]; VkShaderStageFlags push_constant_stages; - struct anv_push_constants * push_constants[MESA_SHADER_STAGES]; + struct anv_push_constants push_constants[MESA_SHADER_STAGES]; struct anv_state binding_tables[MESA_SHADER_STAGES]; struct anv_state samplers[MESA_SHADER_STAGES]; + unsigned char sampler_sha1s[MESA_SHADER_STAGES][20]; + unsigned char surface_sha1s[MESA_SHADER_STAGES][20]; + unsigned char push_sha1s[MESA_SHADER_STAGES][20]; + /** * Whether or not the gen8 PMA fix is enabled. We ensure that, at the top * of any command buffer it is disabled by disabling it in EndCommandBuffer @@ -2185,6 +2904,12 @@ struct anv_cmd_state { bool conditional_render_enabled; + /** + * Last rendering scale argument provided to + * genX(cmd_buffer_emit_hashing_mode)(). + */ + unsigned current_hash_scale; + /** * Array length is anv_cmd_state::pass::attachment_count. Array content is * valid only when recording a render pass instance. @@ -2196,16 +2921,17 @@ struct anv_cmd_state { * flat array. For depth-stencil attachments, the surface state is simply * left blank. */ - struct anv_state render_pass_states; + struct anv_state attachment_states; /** * A null surface state of the right size to match the framebuffer. This - * is one of the states in render_pass_states. + * is one of the states in attachment_states. */ struct anv_state null_surface_state; }; struct anv_cmd_pool { + struct vk_object_base base; VkAllocationCallbacks alloc; struct list_head cmd_buffers; }; @@ -2218,10 +2944,11 @@ enum anv_cmd_buffer_exec_mode { ANV_CMD_BUFFER_EXEC_MODE_GROW_AND_EMIT, ANV_CMD_BUFFER_EXEC_MODE_CHAIN, ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN, + ANV_CMD_BUFFER_EXEC_MODE_CALL_AND_RETURN, }; struct anv_cmd_buffer { - VK_LOADER_DATA _loader_data; + struct vk_object_base base; struct anv_device * device; @@ -2249,7 +2976,7 @@ struct anv_cmd_buffer { * initialized by anv_cmd_buffer_init_batch_bo_chain() */ struct u_vector bt_block_states; - uint32_t bt_next; + struct anv_state bt_next; struct anv_reloc_list surface_relocs; /** Last seen surface state block pool center bo offset */ @@ -2265,7 +2992,14 @@ struct anv_cmd_buffer { VkCommandBufferUsageFlags usage_flags; VkCommandBufferLevel level; + struct anv_query_pool *perf_query_pool; + struct anv_cmd_state state; + + struct anv_address return_addr; + + /* Set by SetPerformanceMarkerINTEL, written into queries by CmdBeginQuery */ + uint64_t intel_perf_marker; }; VkResult anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer); @@ -2275,24 +3009,19 @@ void anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer); void anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary, struct anv_cmd_buffer *secondary); void anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer); -VkResult anv_cmd_buffer_execbuf(struct anv_device *device, +VkResult anv_cmd_buffer_execbuf(struct anv_queue *queue, struct anv_cmd_buffer *cmd_buffer, const VkSemaphore *in_semaphores, + const uint64_t *in_wait_values, uint32_t num_in_semaphores, const VkSemaphore *out_semaphores, + const uint64_t *out_signal_values, uint32_t num_out_semaphores, - VkFence fence); + VkFence fence, + int perf_query_pass); VkResult anv_cmd_buffer_reset(struct anv_cmd_buffer *cmd_buffer); -VkResult -anv_cmd_buffer_ensure_push_constants_size(struct anv_cmd_buffer *cmd_buffer, - gl_shader_stage stage, uint32_t size); -#define anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, stage, field) \ - anv_cmd_buffer_ensure_push_constants_size(cmd_buffer, stage, \ - (offsetof(struct anv_push_constants, field) + \ - sizeof(cmd_buffer->state.push_constants[0]->field))) - struct anv_state anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer, const void *data, uint32_t size, uint32_t alignment); struct anv_state anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer, @@ -2347,6 +3076,7 @@ void anv_cmd_emit_conditional_render_predicate(struct anv_cmd_buffer *cmd_buffer enum anv_fence_type { ANV_FENCE_TYPE_NONE = 0, ANV_FENCE_TYPE_BO, + ANV_FENCE_TYPE_WSI_BO, ANV_FENCE_TYPE_SYNCOBJ, ANV_FENCE_TYPE_WSI, }; @@ -2377,7 +3107,7 @@ struct anv_fence_impl { * will say it's idle in this case. */ struct { - struct anv_bo bo; + struct anv_bo *bo; enum anv_bo_fence_state state; } bo; @@ -2390,6 +3120,8 @@ struct anv_fence_impl { }; struct anv_fence { + struct vk_object_base base; + /* Permanent fence state. Every fence has some form of permanent state * (type != ANV_SEMAPHORE_TYPE_NONE). This may be a BO to fence on (for * cross-process fences) or it could just be a dummy for use internally. @@ -2406,7 +3138,11 @@ struct anv_fence { struct anv_fence_impl temporary; }; +void anv_fence_reset_temporary(struct anv_device *device, + struct anv_fence *fence); + struct anv_event { + struct vk_object_base base; uint64_t semaphore; struct anv_state state; }; @@ -2415,18 +3151,46 @@ enum anv_semaphore_type { ANV_SEMAPHORE_TYPE_NONE = 0, ANV_SEMAPHORE_TYPE_DUMMY, ANV_SEMAPHORE_TYPE_BO, + ANV_SEMAPHORE_TYPE_WSI_BO, ANV_SEMAPHORE_TYPE_SYNC_FILE, ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ, + ANV_SEMAPHORE_TYPE_TIMELINE, +}; + +struct anv_timeline_point { + struct list_head link; + + uint64_t serial; + + /* Number of waiter on this point, when > 0 the point should not be garbage + * collected. + */ + int waiting; + + /* BO used for synchronization. */ + struct anv_bo *bo; +}; + +struct anv_timeline { + pthread_mutex_t mutex; + pthread_cond_t cond; + + uint64_t highest_past; + uint64_t highest_pending; + + struct list_head points; + struct list_head free_points; }; struct anv_semaphore_impl { enum anv_semaphore_type type; union { - /* A BO representing this semaphore when type == ANV_SEMAPHORE_TYPE_BO. - * This BO will be added to the object list on any execbuf2 calls for - * which this semaphore is used as a wait or signal fence. When used as - * a signal fence, the EXEC_OBJECT_WRITE flag will be set. + /* A BO representing this semaphore when type == ANV_SEMAPHORE_TYPE_BO + * or type == ANV_SEMAPHORE_TYPE_WSI_BO. This BO will be added to the + * object list on any execbuf2 calls for which this semaphore is used as + * a wait or signal fence. When used as a signal fence or when type == + * ANV_SEMAPHORE_TYPE_WSI_BO, the EXEC_OBJECT_WRITE flag will be set. */ struct anv_bo *bo; @@ -2441,10 +3205,20 @@ struct anv_semaphore_impl { * import so we don't need to bother with a userspace cache. */ uint32_t syncobj; + + /* Non shareable timeline semaphore + * + * Used when kernel don't have support for timeline semaphores. + */ + struct anv_timeline timeline; }; }; struct anv_semaphore { + struct vk_object_base base; + + uint32_t refcount; + /* Permanent semaphore state. Every semaphore has some form of permanent * state (type != ANV_SEMAPHORE_TYPE_NONE). This may be a BO to fence on * (for cross-process semaphores0 or it could just be a dummy for use @@ -2466,6 +3240,8 @@ void anv_semaphore_reset_temporary(struct anv_device *device, struct anv_semaphore *semaphore); struct anv_shader_module { + struct vk_object_base base; + unsigned char sha1[20]; uint32_t size; char data[0]; @@ -2493,12 +3269,17 @@ mesa_to_vk_shader_stage(gl_shader_stage mesa_stage) __tmp &= ~(1 << (stage))) struct anv_pipeline_bind_map { + unsigned char surface_sha1[20]; + unsigned char sampler_sha1[20]; + unsigned char push_sha1[20]; + uint32_t surface_count; uint32_t sampler_count; - uint32_t image_count; struct anv_pipeline_binding * surface_to_descriptor; struct anv_pipeline_binding * sampler_to_descriptor; + + struct anv_push_range push_ranges[4]; }; struct anv_shader_bin_key { @@ -2509,6 +3290,8 @@ struct anv_shader_bin_key { struct anv_shader_bin { uint32_t ref_cnt; + gl_shader_stage stage; + const struct anv_shader_bin_key *key; struct anv_state kernel; @@ -2520,6 +3303,9 @@ struct anv_shader_bin { const struct brw_stage_prog_data *prog_data; uint32_t prog_data_size; + struct brw_compile_stats stats[3]; + uint32_t num_stats; + struct nir_xfb_info *xfb_info; struct anv_pipeline_bind_map bind_map; @@ -2527,11 +3313,13 @@ struct anv_shader_bin { struct anv_shader_bin * anv_shader_bin_create(struct anv_device *device, + gl_shader_stage stage, const void *key, uint32_t key_size, const void *kernel, uint32_t kernel_size, const void *constant_data, uint32_t constant_data_size, const struct brw_stage_prog_data *prog_data, - uint32_t prog_data_size, const void *prog_data_param, + uint32_t prog_data_size, + const struct brw_compile_stats *stats, uint32_t num_stats, const struct nir_xfb_info *xfb_info, const struct anv_pipeline_bind_map *bind_map); @@ -2553,51 +3341,83 @@ anv_shader_bin_unref(struct anv_device *device, struct anv_shader_bin *shader) anv_shader_bin_destroy(device, shader); } +struct anv_pipeline_executable { + gl_shader_stage stage; + + struct brw_compile_stats stats; + + char *nir; + char *disasm; +}; + +enum anv_pipeline_type { + ANV_PIPELINE_GRAPHICS, + ANV_PIPELINE_COMPUTE, +}; + struct anv_pipeline { + struct vk_object_base base; + struct anv_device * device; + struct anv_batch batch; - uint32_t batch_data[512]; struct anv_reloc_list batch_relocs; - uint32_t dynamic_state_mask; - struct anv_dynamic_state dynamic_state; - struct anv_subpass * subpass; + void * mem_ctx; - bool needs_data_cache; + enum anv_pipeline_type type; + VkPipelineCreateFlags flags; - struct anv_shader_bin * shaders[MESA_SHADER_STAGES]; + struct util_dynarray executables; - struct { - const struct gen_l3_config * l3_config; - uint32_t total_size; - } urb; + const struct gen_l3_config * l3_config; +}; - VkShaderStageFlags active_stages; - struct anv_state blend_state; +struct anv_graphics_pipeline { + struct anv_pipeline base; - uint32_t vb_used; - struct anv_pipeline_vertex_binding { - uint32_t stride; - bool instanced; - uint32_t instance_divisor; - } vb[MAX_VBS]; + uint32_t batch_data[512]; + + anv_cmd_dirty_mask_t dynamic_state_mask; + struct anv_dynamic_state dynamic_state; - bool primitive_restart; uint32_t topology; - uint32_t cs_right_mask; + struct anv_subpass * subpass; + struct anv_shader_bin * shaders[MESA_SHADER_STAGES]; + + VkShaderStageFlags active_stages; + + bool primitive_restart; bool writes_depth; bool depth_test_enable; bool writes_stencil; bool stencil_test_enable; bool depth_clamp_enable; + bool depth_clip_enable; bool sample_shading_enable; bool kill_pixel; + bool depth_bounds_test_enable; + + /* When primitive replication is used, subpass->view_mask will describe what + * views to replicate. + */ + bool use_primitive_replication; + + struct anv_state blend_state; + + uint32_t vb_used; + struct anv_pipeline_vertex_binding { + uint32_t stride; + bool instanced; + uint32_t instance_divisor; + } vb[MAX_VBS]; struct { uint32_t sf[7]; uint32_t depth_stencil_state[3]; + uint32_t clip[4]; } gen7; struct { @@ -2609,38 +3429,62 @@ struct anv_pipeline { struct { uint32_t wm_depth_stencil[4]; } gen9; +}; + +struct anv_compute_pipeline { + struct anv_pipeline base; + struct anv_shader_bin * cs; + uint32_t cs_right_mask; + uint32_t batch_data[9]; uint32_t interface_descriptor_data[8]; }; +#define ANV_DECL_PIPELINE_DOWNCAST(pipe_type, pipe_enum) \ + static inline struct anv_##pipe_type##_pipeline * \ + anv_pipeline_to_##pipe_type(struct anv_pipeline *pipeline) \ + { \ + assert(pipeline->type == pipe_enum); \ + return (struct anv_##pipe_type##_pipeline *) pipeline; \ + } + +ANV_DECL_PIPELINE_DOWNCAST(graphics, ANV_PIPELINE_GRAPHICS) +ANV_DECL_PIPELINE_DOWNCAST(compute, ANV_PIPELINE_COMPUTE) + static inline bool -anv_pipeline_has_stage(const struct anv_pipeline *pipeline, +anv_pipeline_has_stage(const struct anv_graphics_pipeline *pipeline, gl_shader_stage stage) { return (pipeline->active_stages & mesa_to_vk_shader_stage(stage)) != 0; } -#define ANV_DECL_GET_PROG_DATA_FUNC(prefix, stage) \ -static inline const struct brw_##prefix##_prog_data * \ -get_##prefix##_prog_data(const struct anv_pipeline *pipeline) \ -{ \ - if (anv_pipeline_has_stage(pipeline, stage)) { \ - return (const struct brw_##prefix##_prog_data *) \ - pipeline->shaders[stage]->prog_data; \ - } else { \ - return NULL; \ - } \ +#define ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(prefix, stage) \ +static inline const struct brw_##prefix##_prog_data * \ +get_##prefix##_prog_data(const struct anv_graphics_pipeline *pipeline) \ +{ \ + if (anv_pipeline_has_stage(pipeline, stage)) { \ + return (const struct brw_##prefix##_prog_data *) \ + pipeline->shaders[stage]->prog_data; \ + } else { \ + return NULL; \ + } \ } -ANV_DECL_GET_PROG_DATA_FUNC(vs, MESA_SHADER_VERTEX) -ANV_DECL_GET_PROG_DATA_FUNC(tcs, MESA_SHADER_TESS_CTRL) -ANV_DECL_GET_PROG_DATA_FUNC(tes, MESA_SHADER_TESS_EVAL) -ANV_DECL_GET_PROG_DATA_FUNC(gs, MESA_SHADER_GEOMETRY) -ANV_DECL_GET_PROG_DATA_FUNC(wm, MESA_SHADER_FRAGMENT) -ANV_DECL_GET_PROG_DATA_FUNC(cs, MESA_SHADER_COMPUTE) +ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(vs, MESA_SHADER_VERTEX) +ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(tcs, MESA_SHADER_TESS_CTRL) +ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(tes, MESA_SHADER_TESS_EVAL) +ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(gs, MESA_SHADER_GEOMETRY) +ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(wm, MESA_SHADER_FRAGMENT) + +static inline const struct brw_cs_prog_data * +get_cs_prog_data(const struct anv_compute_pipeline *pipeline) +{ + assert(pipeline->cs); + return (const struct brw_cs_prog_data *) pipeline->cs->prog_data; +} static inline const struct brw_vue_prog_data * -anv_pipeline_get_last_vue_prog_data(const struct anv_pipeline *pipeline) +anv_pipeline_get_last_vue_prog_data(const struct anv_graphics_pipeline *pipeline) { if (anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) return &get_gs_prog_data(pipeline)->base; @@ -2651,19 +3495,40 @@ anv_pipeline_get_last_vue_prog_data(const struct anv_pipeline *pipeline) } VkResult -anv_pipeline_init(struct anv_pipeline *pipeline, struct anv_device *device, - struct anv_pipeline_cache *cache, - const VkGraphicsPipelineCreateInfo *pCreateInfo, - const VkAllocationCallbacks *alloc); +anv_pipeline_init(struct anv_pipeline *pipeline, + struct anv_device *device, + enum anv_pipeline_type type, + VkPipelineCreateFlags flags, + const VkAllocationCallbacks *pAllocator); + +void +anv_pipeline_finish(struct anv_pipeline *pipeline, + struct anv_device *device, + const VkAllocationCallbacks *pAllocator); + +VkResult +anv_graphics_pipeline_init(struct anv_graphics_pipeline *pipeline, struct anv_device *device, + struct anv_pipeline_cache *cache, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const VkAllocationCallbacks *alloc); VkResult -anv_pipeline_compile_cs(struct anv_pipeline *pipeline, +anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline, struct anv_pipeline_cache *cache, const VkComputePipelineCreateInfo *info, const struct anv_shader_module *module, const char *entrypoint, const VkSpecializationInfo *spec_info); +struct anv_cs_parameters { + uint32_t group_size; + uint32_t simd_size; + uint32_t threads; +}; + +struct anv_cs_parameters +anv_cs_parameters(const struct anv_compute_pipeline *pipeline); + struct anv_format_plane { enum isl_format isl_format:16; struct isl_swizzle swizzle; @@ -2689,6 +3554,12 @@ struct anv_format { bool can_ycbcr; }; +/** + * Return the aspect's _format_ plane, not its _memory_ plane (using the + * vocabulary of VK_EXT_image_drm_format_modifier). As a consequence, \a + * aspect_mask may contain VK_IMAGE_ASPECT_PLANE_*, but must not contain + * VK_IMAGE_ASPECT_MEMORY_PLANE_* . + */ static inline uint32_t anv_image_aspect_to_plane(VkImageAspectFlags image_aspects, VkImageAspectFlags aspect_mask) @@ -2752,6 +3623,12 @@ anv_get_isl_format(const struct gen_device_info *devinfo, VkFormat vk_format, return anv_get_format_plane(devinfo, vk_format, aspect, tiling).isl_format; } +bool anv_formats_ccs_e_compatible(const struct gen_device_info *devinfo, + VkImageCreateFlags create_flags, + VkFormat vk_format, + VkImageTiling vk_tiling, + const VkImageFormatListCreateInfoKHR *fmt_list); + static inline struct isl_swizzle anv_swizzle_for_render(struct isl_swizzle swizzle) { @@ -2784,7 +3661,9 @@ struct anv_surface { }; struct anv_image { - VkImageType type; + struct vk_object_base base; + + VkImageType type; /**< VkImageCreateInfo::imageType */ /* The original VkFormat provided by the client. This may not match any * of the actual surface formats. */ @@ -2797,7 +3676,8 @@ struct anv_image { uint32_t array_size; uint32_t samples; /**< VkImageCreateInfo::samples */ uint32_t n_planes; - VkImageUsageFlags usage; /**< Superset of VkImageCreateInfo::usage. */ + VkImageUsageFlags usage; /**< VkImageCreateInfo::usage. */ + VkImageUsageFlags stencil_usage; VkImageCreateFlags create_flags; /* Flags used when creating image. */ VkImageTiling tiling; /** VkImageCreateInfo::tiling */ @@ -2824,11 +3704,6 @@ struct anv_image { */ bool disjoint; - /* All the formats that can be used when creating views of this image - * are CCS_E compatible. - */ - bool ccs_e_compatible; - /* Image was created with external format. */ bool external_format; @@ -2888,11 +3763,9 @@ struct anv_image { struct anv_surface shadow_surface; /** - * For color images, this is the aux usage for this image when not used - * as a color attachment. - * - * For depth/stencil images, this is set to ISL_AUX_USAGE_HIZ if the - * image has a HiZ buffer. + * The base aux usage for this image. For color images, this can be + * either CCS_E or CCS_D depending on whether or not we can reliably + * leave CCS on all the time. */ enum isl_aux_usage aux_usage; @@ -2932,8 +3805,13 @@ anv_image_aux_levels(const struct anv_image * const image, VkImageAspectFlagBits aspect) { uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect); - return image->planes[plane].aux_surface.isl.size_B > 0 ? - image->planes[plane].aux_surface.isl.levels : 0; + if (image->planes[plane].aux_usage == ISL_AUX_USAGE_NONE) + return 0; + + /* The Gen12 CCS aux surface is represented with only one level. */ + return image->planes[plane].aux_surface.isl.tiling == ISL_TILING_GEN12_CCS ? + image->planes[plane].surface.isl.levels : + image->planes[plane].aux_surface.isl.levels; } /* Returns the number of auxiliary buffer layers attached to an image. */ @@ -2954,13 +3832,20 @@ anv_image_aux_layers(const struct anv_image * const image, return 0; } else { uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect); - return MAX2(image->planes[plane].aux_surface.isl.logical_level0_px.array_len, - image->planes[plane].aux_surface.isl.logical_level0_px.depth >> miplevel); + + /* The Gen12 CCS aux surface is represented with only one layer. */ + const struct isl_extent4d *aux_logical_level0_px = + image->planes[plane].aux_surface.isl.tiling == ISL_TILING_GEN12_CCS ? + &image->planes[plane].surface.isl.logical_level0_px : + &image->planes[plane].aux_surface.isl.logical_level0_px; + + return MAX2(aux_logical_level0_px->array_len, + aux_logical_level0_px->depth >> miplevel); } } static inline struct anv_address -anv_image_get_clear_color_addr(const struct anv_device *device, +anv_image_get_clear_color_addr(UNUSED const struct anv_device *device, const struct anv_image *image, VkImageAspectFlagBits aspect) { @@ -3008,6 +3893,8 @@ anv_image_get_compression_state_addr(const struct anv_device *device, } addr.offset += array_layer * 4; + assert(addr.offset < + image->planes[plane].address.offset + image->planes[plane].size); return addr; } @@ -3019,12 +3906,37 @@ anv_can_sample_with_hiz(const struct gen_device_info * const devinfo, if (!(image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT)) return false; - if (devinfo->gen < 8) + /* For Gen8-11, there are some restrictions around sampling from HiZ. + * The Skylake PRM docs for RENDER_SURFACE_STATE::AuxiliarySurfaceMode + * say: + * + * "If this field is set to AUX_HIZ, Number of Multisamples must + * be MULTISAMPLECOUNT_1, and Surface Type cannot be SURFTYPE_3D." + */ + if (image->type == VK_IMAGE_TYPE_3D) + return false; + + /* Allow this feature on BDW even though it is disabled in the BDW devinfo + * struct. There's documentation which suggests that this feature actually + * reduces performance on BDW, but it has only been observed to help so + * far. Sampling fast-cleared blocks on BDW must also be handled with care + * (see depth_stencil_attachment_compute_aux_usage() for more info). + */ + if (devinfo->gen != 8 && !devinfo->has_sample_with_hiz) return false; return image->samples == 1; } +static inline bool +anv_image_plane_uses_aux_map(const struct anv_device *device, + const struct anv_image *image, + uint32_t plane) +{ + return device->info.has_aux_map && + isl_aux_usage_has_ccs(image->planes[plane].aux_usage); +} + void anv_cmd_buffer_mark_image_written(struct anv_cmd_buffer *cmd_buffer, const struct anv_image *image, @@ -3081,7 +3993,7 @@ anv_image_hiz_clear(struct anv_cmd_buffer *cmd_buffer, void anv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer, const struct anv_image *image, - enum isl_format format, + enum isl_format format, struct isl_swizzle swizzle, VkImageAspectFlagBits aspect, uint32_t base_layer, uint32_t layer_count, enum isl_aux_op mcs_op, union isl_color_value *clear_value, @@ -3089,7 +4001,7 @@ anv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer, void anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer, const struct anv_image *image, - enum isl_format format, + enum isl_format format, struct isl_swizzle swizzle, VkImageAspectFlagBits aspect, uint32_t level, uint32_t base_layer, uint32_t layer_count, enum isl_aux_op ccs_op, union isl_color_value *clear_value, @@ -3098,13 +4010,21 @@ anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer, void anv_image_copy_to_shadow(struct anv_cmd_buffer *cmd_buffer, const struct anv_image *image, + VkImageAspectFlagBits aspect, uint32_t base_level, uint32_t level_count, uint32_t base_layer, uint32_t layer_count); +enum isl_aux_state +anv_layout_to_aux_state(const struct gen_device_info * const devinfo, + const struct anv_image *image, + const VkImageAspectFlagBits aspect, + const VkImageLayout layout); + enum isl_aux_usage anv_layout_to_aux_usage(const struct gen_device_info * const devinfo, const struct anv_image *image, const VkImageAspectFlagBits aspect, + const VkImageUsageFlagBits usage, const VkImageLayout layout); enum anv_fast_clear_type @@ -3159,6 +4079,8 @@ anv_image_aspects_compatible(VkImageAspectFlags aspects1, } struct anv_image_view { + struct vk_object_base base; + const struct anv_image *image; /**< VkImageViewCreateInfo::image */ VkImageAspectFlags aspect_mask; @@ -3230,16 +4152,12 @@ VkResult anv_image_create(VkDevice _device, const VkAllocationCallbacks* alloc, VkImage *pImage); -const struct anv_surface * -anv_image_get_surface_for_aspect_mask(const struct anv_image *image, - VkImageAspectFlags aspect_mask); - enum isl_format anv_isl_format_for_descriptor_type(VkDescriptorType type); -static inline struct VkExtent3D +static inline VkExtent3D anv_sanitize_image_extent(const VkImageType imageType, - const struct VkExtent3D imageExtent) + const VkExtent3D imageExtent) { switch (imageType) { case VK_IMAGE_TYPE_1D: @@ -3253,9 +4171,9 @@ anv_sanitize_image_extent(const VkImageType imageType, } } -static inline struct VkOffset3D +static inline VkOffset3D anv_sanitize_image_offset(const VkImageType imageType, - const struct VkOffset3D imageOffset) + const VkOffset3D imageOffset) { switch (imageType) { case VK_IMAGE_TYPE_1D: @@ -3302,7 +4220,37 @@ anv_clear_color_from_att_state(union isl_color_value *clear_color, } +/* Haswell border color is a bit of a disaster. Float and unorm formats use a + * straightforward 32-bit float color in the first 64 bytes. Instead of using + * a nice float/integer union like Gen8+, Haswell specifies the integer border + * color as a separate entry /after/ the float color. The layout of this entry + * also depends on the format's bpp (with extra hacks for RG32), and overlaps. + * + * Since we don't know the format/bpp, we can't make any of the border colors + * containing '1' work for all formats, as it would be in the wrong place for + * some of them. We opt to make 32-bit integers work as this seems like the + * most common option. Fortunately, transparent black works regardless, as + * all zeroes is the same in every bit-size. + */ +struct hsw_border_color { + float float32[4]; + uint32_t _pad0[12]; + uint32_t uint32[4]; + uint32_t _pad1[108]; +}; + +struct gen8_border_color { + union { + float float32[4]; + uint32_t uint32[4]; + }; + /* Pad out to 64 bytes */ + uint32_t _pad[12]; +}; + struct anv_ycbcr_conversion { + struct vk_object_base base; + const struct anv_format * format; VkSamplerYcbcrModelConversion ycbcr_model; VkSamplerYcbcrRange ycbcr_range; @@ -3313,12 +4261,23 @@ struct anv_ycbcr_conversion { }; struct anv_sampler { + struct vk_object_base base; + uint32_t state[3][4]; uint32_t n_planes; struct anv_ycbcr_conversion *conversion; + + /* Blob of sampler state data which is guaranteed to be 32-byte aligned + * and with a 32-byte stride for use as bindless samplers. + */ + struct anv_state bindless_state; + + struct anv_state custom_border_color; }; struct anv_framebuffer { + struct vk_object_base base; + uint32_t width; uint32_t height; uint32_t layers; @@ -3331,6 +4290,9 @@ struct anv_subpass_attachment { VkImageUsageFlagBits usage; uint32_t attachment; VkImageLayout layout; + + /* Used only with attachment containing stencil data. */ + VkImageLayout stencil_layout; }; struct anv_subpass { @@ -3381,11 +4343,16 @@ struct anv_render_pass_attachment { VkImageLayout final_layout; VkImageLayout first_subpass_layout; + VkImageLayout stencil_initial_layout; + VkImageLayout stencil_final_layout; + /* The subpass id in which the attachment will be used last. */ uint32_t last_subpass_idx; }; struct anv_render_pass { + struct vk_object_base base; + uint32_t attachment_count; uint32_t subpass_count; /* An array of subpass_count+1 flushes, one per subpass boundary */ @@ -3396,28 +4363,55 @@ struct anv_render_pass { #define ANV_PIPELINE_STATISTICS_MASK 0x000007ff +#define OA_SNAPSHOT_SIZE (256) +#define ANV_KHR_PERF_QUERY_SIZE (ALIGN(sizeof(uint64_t), 64) + 2 * OA_SNAPSHOT_SIZE) + struct anv_query_pool { + struct vk_object_base base; + VkQueryType type; VkQueryPipelineStatisticFlags pipeline_statistics; /** Stride between slots, in bytes */ uint32_t stride; /** Number of slots in this query pool */ uint32_t slots; - struct anv_bo bo; + struct anv_bo * bo; + + /* Perf queries : */ + struct anv_bo reset_bo; + uint32_t n_counters; + struct gen_perf_counter_pass *counter_pass; + uint32_t n_passes; + struct gen_perf_query_info **pass_query; }; +static inline uint32_t khr_perf_query_preamble_offset(struct anv_query_pool *pool, + uint32_t pass) +{ + return pass * ANV_KHR_PERF_QUERY_SIZE + 8; +} + int anv_get_instance_entrypoint_index(const char *name); int anv_get_device_entrypoint_index(const char *name); +int anv_get_physical_device_entrypoint_index(const char *name); + +const char *anv_get_instance_entry_name(int index); +const char *anv_get_physical_device_entry_name(int index); +const char *anv_get_device_entry_name(int index); bool anv_instance_entrypoint_is_enabled(int index, uint32_t core_version, const struct anv_instance_extension_table *instance); - +bool +anv_physical_device_entrypoint_is_enabled(int index, uint32_t core_version, + const struct anv_instance_extension_table *instance); bool anv_device_entrypoint_is_enabled(int index, uint32_t core_version, const struct anv_instance_extension_table *instance, const struct anv_device_extension_table *device); +void *anv_resolve_device_entrypoint(const struct gen_device_info *devinfo, + uint32_t index); void *anv_lookup_entrypoint(const struct gen_device_info *devinfo, const char *name); @@ -3433,8 +4427,7 @@ enum anv_dump_action { void anv_dump_start(struct anv_device *device, enum anv_dump_action actions); void anv_dump_finish(void); -void anv_dump_add_framebuffer(struct anv_cmd_buffer *cmd_buffer, - struct anv_framebuffer *fb); +void anv_dump_add_attachments(struct anv_cmd_buffer *cmd_buffer); static inline uint32_t anv_get_subpass_id(const struct anv_cmd_state * const cmd_state) @@ -3451,66 +4444,68 @@ anv_get_subpass_id(const struct anv_cmd_state * const cmd_state) return subpass_id; } -#define ANV_DEFINE_HANDLE_CASTS(__anv_type, __VkType) \ - \ - static inline struct __anv_type * \ - __anv_type ## _from_handle(__VkType _handle) \ - { \ - return (struct __anv_type *) _handle; \ - } \ - \ - static inline __VkType \ - __anv_type ## _to_handle(struct __anv_type *_obj) \ - { \ - return (__VkType) _obj; \ - } - -#define ANV_DEFINE_NONDISP_HANDLE_CASTS(__anv_type, __VkType) \ - \ - static inline struct __anv_type * \ - __anv_type ## _from_handle(__VkType _handle) \ - { \ - return (struct __anv_type *)(uintptr_t) _handle; \ - } \ - \ - static inline __VkType \ - __anv_type ## _to_handle(struct __anv_type *_obj) \ - { \ - return (__VkType)(uintptr_t) _obj; \ - } +struct gen_perf_config *anv_get_perf(const struct gen_device_info *devinfo, int fd); +void anv_device_perf_init(struct anv_device *device); +void anv_perf_write_pass_results(struct gen_perf_config *perf, + struct anv_query_pool *pool, uint32_t pass, + const struct gen_perf_query_result *accumulated_results, + union VkPerformanceCounterResultKHR *results); #define ANV_FROM_HANDLE(__anv_type, __name, __handle) \ - struct __anv_type *__name = __anv_type ## _from_handle(__handle) - -ANV_DEFINE_HANDLE_CASTS(anv_cmd_buffer, VkCommandBuffer) -ANV_DEFINE_HANDLE_CASTS(anv_device, VkDevice) -ANV_DEFINE_HANDLE_CASTS(anv_instance, VkInstance) -ANV_DEFINE_HANDLE_CASTS(anv_physical_device, VkPhysicalDevice) -ANV_DEFINE_HANDLE_CASTS(anv_queue, VkQueue) - -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_cmd_pool, VkCommandPool) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer, VkBuffer) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer_view, VkBufferView) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_pool, VkDescriptorPool) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set, VkDescriptorSet) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set_layout, VkDescriptorSetLayout) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_update_template, VkDescriptorUpdateTemplate) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_device_memory, VkDeviceMemory) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_fence, VkFence) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_event, VkEvent) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_framebuffer, VkFramebuffer) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_image, VkImage) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_image_view, VkImageView); -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline_cache, VkPipelineCache) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline, VkPipeline) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline_layout, VkPipelineLayout) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_query_pool, VkQueryPool) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_render_pass, VkRenderPass) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_sampler, VkSampler) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_semaphore, VkSemaphore) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_shader_module, VkShaderModule) -ANV_DEFINE_NONDISP_HANDLE_CASTS(vk_debug_report_callback, VkDebugReportCallbackEXT) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_ycbcr_conversion, VkSamplerYcbcrConversion) + VK_FROM_HANDLE(__anv_type, __name, __handle) + +VK_DEFINE_HANDLE_CASTS(anv_cmd_buffer, base, VkCommandBuffer, + VK_OBJECT_TYPE_COMMAND_BUFFER) +VK_DEFINE_HANDLE_CASTS(anv_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE) +VK_DEFINE_HANDLE_CASTS(anv_instance, base, VkInstance, VK_OBJECT_TYPE_INSTANCE) +VK_DEFINE_HANDLE_CASTS(anv_physical_device, base, VkPhysicalDevice, + VK_OBJECT_TYPE_PHYSICAL_DEVICE) +VK_DEFINE_HANDLE_CASTS(anv_queue, base, VkQueue, VK_OBJECT_TYPE_QUEUE) + +VK_DEFINE_NONDISP_HANDLE_CASTS(anv_cmd_pool, base, VkCommandPool, + VK_OBJECT_TYPE_COMMAND_POOL) +VK_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer, base, VkBuffer, + VK_OBJECT_TYPE_BUFFER) +VK_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer_view, base, VkBufferView, + VK_OBJECT_TYPE_BUFFER_VIEW) +VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_pool, base, VkDescriptorPool, + VK_OBJECT_TYPE_DESCRIPTOR_POOL) +VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set, base, VkDescriptorSet, + VK_OBJECT_TYPE_DESCRIPTOR_SET) +VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set_layout, base, + VkDescriptorSetLayout, + VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT) +VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_update_template, base, + VkDescriptorUpdateTemplate, + VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE) +VK_DEFINE_NONDISP_HANDLE_CASTS(anv_device_memory, base, VkDeviceMemory, + VK_OBJECT_TYPE_DEVICE_MEMORY) +VK_DEFINE_NONDISP_HANDLE_CASTS(anv_fence, base, VkFence, VK_OBJECT_TYPE_FENCE) +VK_DEFINE_NONDISP_HANDLE_CASTS(anv_event, base, VkEvent, VK_OBJECT_TYPE_EVENT) +VK_DEFINE_NONDISP_HANDLE_CASTS(anv_framebuffer, base, VkFramebuffer, + VK_OBJECT_TYPE_FRAMEBUFFER) +VK_DEFINE_NONDISP_HANDLE_CASTS(anv_image, base, VkImage, VK_OBJECT_TYPE_IMAGE) +VK_DEFINE_NONDISP_HANDLE_CASTS(anv_image_view, base, VkImageView, + VK_OBJECT_TYPE_IMAGE_VIEW); +VK_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline_cache, base, VkPipelineCache, + VK_OBJECT_TYPE_PIPELINE_CACHE) +VK_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline, base, VkPipeline, + VK_OBJECT_TYPE_PIPELINE) +VK_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline_layout, base, VkPipelineLayout, + VK_OBJECT_TYPE_PIPELINE_LAYOUT) +VK_DEFINE_NONDISP_HANDLE_CASTS(anv_query_pool, base, VkQueryPool, + VK_OBJECT_TYPE_QUERY_POOL) +VK_DEFINE_NONDISP_HANDLE_CASTS(anv_render_pass, base, VkRenderPass, + VK_OBJECT_TYPE_RENDER_PASS) +VK_DEFINE_NONDISP_HANDLE_CASTS(anv_sampler, base, VkSampler, + VK_OBJECT_TYPE_SAMPLER) +VK_DEFINE_NONDISP_HANDLE_CASTS(anv_semaphore, base, VkSemaphore, + VK_OBJECT_TYPE_SEMAPHORE) +VK_DEFINE_NONDISP_HANDLE_CASTS(anv_shader_module, base, VkShaderModule, + VK_OBJECT_TYPE_SHADER_MODULE) +VK_DEFINE_NONDISP_HANDLE_CASTS(anv_ycbcr_conversion, base, + VkSamplerYcbcrConversion, + VK_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION) /* Gen-specific function declarations */ #ifdef genX @@ -3534,6 +4529,9 @@ ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_ycbcr_conversion, VkSamplerYcbcrConversion) # define genX(x) gen11_##x # include "anv_genX.h" # undef genX +# define genX(x) gen12_##x +# include "anv_genX.h" +# undef genX #endif #endif /* ANV_PRIVATE_H */