#include "common/gen_clflush.h"
#include "common/gen_decoder.h"
#include "common/gen_gem.h"
+#include "common/gen_l3_config.h"
#include "dev/gen_device_info.h"
#include "blorp/blorp.h"
#include "compiler/brw_compiler.h"
+#include "util/bitset.h"
#include "util/macros.h"
#include "util/hash_table.h"
#include "util/list.h"
-#include "util/set.h"
#include "util/sparse_array.h"
#include "util/u_atomic.h"
#include "util/u_vector.h"
typedef uint32_t xcb_visualid_t;
typedef uint32_t xcb_window_t;
+struct anv_batch;
struct anv_buffer;
struct anv_buffer_view;
struct anv_image_view;
struct anv_instance;
struct gen_aux_map_context;
-struct gen_l3_config;
struct gen_perf_config;
#include <vulkan/vulkan.h>
#include "common/intel_log.h"
#include "wsi_common.h"
+#define NSEC_PER_SEC 1000000000ull
+
/* anv Virtual Memory Layout
* =========================
*
#define SURFACE_STATE_POOL_MAX_ADDRESS 0x00017fffffffULL
#define INSTRUCTION_STATE_POOL_MIN_ADDRESS 0x000180000000ULL /* 6 GiB */
#define INSTRUCTION_STATE_POOL_MAX_ADDRESS 0x0001bfffffffULL
-#define HIGH_HEAP_MIN_ADDRESS 0x0001c0000000ULL /* 7 GiB */
+#define CLIENT_VISIBLE_HEAP_MIN_ADDRESS 0x0001c0000000ULL /* 7 GiB */
+#define CLIENT_VISIBLE_HEAP_MAX_ADDRESS 0x0002bfffffffULL
+#define HIGH_HEAP_MIN_ADDRESS 0x0002c0000000ULL /* 11 GiB */
#define LOW_HEAP_SIZE \
(LOW_HEAP_MAX_ADDRESS - LOW_HEAP_MIN_ADDRESS + 1)
(SURFACE_STATE_POOL_MAX_ADDRESS - SURFACE_STATE_POOL_MIN_ADDRESS + 1)
#define INSTRUCTION_STATE_POOL_SIZE \
(INSTRUCTION_STATE_POOL_MAX_ADDRESS - INSTRUCTION_STATE_POOL_MIN_ADDRESS + 1)
+#define CLIENT_VISIBLE_HEAP_SIZE \
+ (CLIENT_VISIBLE_HEAP_MAX_ADDRESS - CLIENT_VISIBLE_HEAP_MIN_ADDRESS + 1)
/* Allowing different clear colors requires us to perform a depth resolve at
* the end of certain render passes. This is because while slow clears store
#define MAX_PUSH_DESCRIPTORS 32 /* Minimum requirement */
#define MAX_INLINE_UNIFORM_BLOCK_SIZE 4096
#define MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS 32
+#define ANV_UBO_BOUNDS_CHECK_ALIGNMENT 32
+#define ANV_SSBO_BOUNDS_CHECK_ALIGNMENT 4
/* From the Skylake PRM Vol. 7 "Binding Table Surface State Model":
*
return v - (v % a);
}
+static inline uint32_t
+align_down_u32(uint32_t v, uint32_t a)
+{
+ assert(a != 0 && a == (a & -a));
+ return v & ~(a - 1);
+}
+
static inline uint32_t
align_u32(uint32_t v, uint32_t a)
{
assert(a != 0 && a == (a & -a));
- return (v + a - 1) & ~(a - 1);
+ return align_down_u32(v + a - 1, a);
}
static inline uint64_t
-align_u64(uint64_t v, uint64_t a)
+align_down_u64(uint64_t v, uint64_t a)
{
assert(a != 0 && a == (a & -a));
- return (v + a - 1) & ~(a - 1);
+ return v & ~(a - 1);
+}
+
+static inline uint64_t
+align_u64(uint64_t v, uint64_t a)
+{
+ return align_down_u64(v + a - 1, a);
}
static inline int32_t
};
}
+static inline void *anv_unpack_ptr(uintptr_t ptr, int bits, int *flags)
+{
+ uintptr_t mask = (1ull << bits) - 1;
+ *flags = ptr & mask;
+ return (void *) (ptr & ~mask);
+}
+
+static inline uintptr_t anv_pack_ptr(void *ptr, int bits, int flags)
+{
+ uintptr_t value = (uintptr_t) ptr;
+ uintptr_t mask = (1ull << bits) - 1;
+ return value | (mask & flags);
+}
+
#define for_each_bit(b, dword) \
for (uint32_t __dword = (dword); \
(b) = __builtin_ffs(__dword) - 1, __dword; \
#define vk_error(error) __vk_errorf(NULL, NULL,\
VK_DEBUG_REPORT_OBJECT_TYPE_UNKNOWN_EXT,\
error, __FILE__, __LINE__, NULL)
-#define vk_errorv(instance, obj, error, format, args)\
- __vk_errorv(instance, obj, REPORT_OBJECT_TYPE(obj), error,\
- __FILE__, __LINE__, format, args)
-#define vk_errorf(instance, obj, error, format, ...)\
+#define vk_errorfi(instance, obj, error, format, ...)\
__vk_errorf(instance, obj, REPORT_OBJECT_TYPE(obj), error,\
__FILE__, __LINE__, format, ## __VA_ARGS__)
+#define vk_errorf(device, obj, error, format, ...)\
+ vk_errorfi(anv_device_instance_or_null(device),\
+ obj, error, format, ## __VA_ARGS__)
#else
#define vk_error(error) error
-#define vk_errorf(instance, obj, error, format, ...) error
+#define vk_errorfi(instance, obj, error, format, ...) error
+#define vk_errorf(device, obj, error, format, ...) error
#endif
/**
#define anv_debug_ignored_stype(sType) \
intel_logd("%s: ignored VkStructureType %u\n", __func__, (sType))
-void __anv_perf_warn(struct anv_instance *instance, const void *object,
+void __anv_perf_warn(struct anv_device *device, const void *object,
VkDebugReportObjectTypeEXT type, const char *file,
int line, const char *format, ...)
anv_printflike(6, 7);
*/
uint32_t index;
+ /* Index for use with util_sparse_array_free_list */
+ uint32_t free_index;
+
/* Last known offset. This value is provided by the kernel when we
* execbuf and is used as the presumed offset for the next bunch of
* relocations.
*/
uint64_t offset;
+ /** Size of the buffer not including implicit aux */
uint64_t size;
/* Map for internally mapped BOs.
*/
void *map;
+ /** Size of the implicit CCS range at the end of the buffer
+ *
+ * On Gen12, CCS data is always a direct 1/256 scale-down. A single 64K
+ * page of main surface data maps to a 256B chunk of CCS data and that
+ * mapping is provided on TGL-LP by the AUX table which maps virtual memory
+ * addresses in the main surface to virtual memory addresses for CCS data.
+ *
+ * Because we can't change these maps around easily and because Vulkan
+ * allows two VkImages to be bound to overlapping memory regions (as long
+ * as the app is careful), it's not feasible to make this mapping part of
+ * the image. (On Gen11 and earlier, the mapping was provided via
+ * RENDER_SURFACE_STATE so each image had its own main -> CCS mapping.)
+ * Instead, we attach the CCS data directly to the buffer object and setup
+ * the AUX table mapping at BO creation time.
+ *
+ * This field is for internal tracking use by the BO allocator only and
+ * should not be touched by other parts of the code. If something wants to
+ * know if a BO has implicit CCS data, it should instead look at the
+ * has_implicit_ccs boolean below.
+ *
+ * This data is not included in maps of this buffer.
+ */
+ uint32_t _ccs_size;
+
/** Flags to pass to the kernel through drm_i915_exec_object2::flags */
uint32_t flags;
/** True if this BO wraps a host pointer */
bool from_host_ptr:1;
+
+ /** See also ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS */
+ bool has_client_visible_address:1;
+
+ /** True if this BO has implicit CCS data attached to it */
+ bool has_implicit_ccs:1;
};
-static inline void
-anv_bo_init(struct anv_bo *bo, uint32_t gem_handle, uint64_t size)
+static inline struct anv_bo *
+anv_bo_ref(struct anv_bo *bo)
{
- bo->gem_handle = gem_handle;
- bo->refcount = 1;
- bo->index = 0;
- bo->offset = -1;
- bo->size = size;
- bo->map = NULL;
- bo->flags = 0;
- bo->is_external = false;
- bo->is_wrapper = false;
- bo->has_fixed_address = false;
- bo->from_host_ptr = false;
+ p_atomic_inc(&bo->refcount);
+ return bo;
}
static inline struct anv_bo *
struct anv_fixed_size_state_pool buckets[ANV_STATE_BUCKETS];
};
-struct anv_state_stream_block;
-
struct anv_state_stream {
struct anv_state_pool *state_pool;
uint32_t next;
/* List of all blocks allocated from this pool */
- struct anv_state_stream_block *block_list;
+ struct util_dynarray all_blocks;
};
/* The block_pool functions exported for testing only. The block pool should
uint32_t block_size, uint32_t *padding);
int32_t anv_block_pool_alloc_back(struct anv_block_pool *pool,
uint32_t block_size);
-void* anv_block_pool_map(struct anv_block_pool *pool, int32_t offset);
+void* anv_block_pool_map(struct anv_block_pool *pool, int32_t offset, uint32_t
+size);
VkResult anv_state_pool_init(struct anv_state_pool *pool,
struct anv_device *device,
struct anv_bo_pool {
struct anv_device *device;
- uint64_t bo_flags;
-
- void *free_list[16];
+ struct util_sparse_array_free_list free_list[16];
};
-void anv_bo_pool_init(struct anv_bo_pool *pool, struct anv_device *device,
- uint64_t bo_flags);
+void anv_bo_pool_init(struct anv_bo_pool *pool, struct anv_device *device);
void anv_bo_pool_finish(struct anv_bo_pool *pool);
-VkResult anv_bo_pool_alloc(struct anv_bo_pool *pool, struct anv_bo *bo,
- uint32_t size);
-void anv_bo_pool_free(struct anv_bo_pool *pool, const struct anv_bo *bo);
-
-struct anv_scratch_bo {
- bool exists;
- struct anv_bo bo;
-};
+VkResult anv_bo_pool_alloc(struct anv_bo_pool *pool, uint32_t size,
+ struct anv_bo **bo_out);
+void anv_bo_pool_free(struct anv_bo_pool *pool, struct anv_bo *bo);
struct anv_scratch_pool {
/* Indexed by Per-Thread Scratch Space number (the hardware value) and stage */
- struct anv_scratch_bo bos[16][MESA_SHADER_STAGES];
+ struct anv_bo *bos[16][MESA_SHADER_STAGES];
};
void anv_scratch_pool_init(struct anv_device *device,
/* Standard bits passed on to the client */
VkMemoryPropertyFlags propertyFlags;
uint32_t heapIndex;
-
- /* Driver-internal book-keeping */
- VkBufferUsageFlags valid_buffer_usage;
};
struct anv_memory_heap {
VkMemoryHeapFlags flags;
/* Driver-internal book-keeping */
- uint64_t vma_start;
- uint64_t vma_size;
- bool supports_48bit_addresses;
VkDeviceSize used;
};
struct anv_physical_device {
VK_LOADER_DATA _loader_data;
+ /* Link in anv_instance::physical_devices */
+ struct list_head link;
+
struct anv_instance * instance;
- uint32_t chipset_id;
bool no_hw;
char path[20];
const char * name;
struct isl_device isl_dev;
struct gen_perf_config * perf;
int cmd_parser_version;
+ bool has_softpin;
bool has_exec_async;
bool has_exec_capture;
bool has_exec_fence;
bool has_syncobj;
bool has_syncobj_wait;
bool has_context_priority;
- bool use_softpin;
bool has_context_isolation;
bool has_mem_available;
+ uint64_t gtt_size;
+
+ bool use_softpin;
bool always_use_bindless;
/** True if we can access buffers using A64 messages */
/** True if we can use bindless access for samplers */
bool has_bindless_samplers;
+ /** True if this device has implicit AUX
+ *
+ * If true, CCS is handled as an implicit attachment to the BO rather than
+ * as an explicitly bound surface.
+ */
+ bool has_implicit_ccs;
+
+ bool always_flush_cache;
+
struct anv_device_extension_table supported_extensions;
- struct anv_physical_device_dispatch_table dispatch;
uint32_t eu_total;
uint32_t subslice_total;
struct anv_instance_extension_table enabled_extensions;
struct anv_instance_dispatch_table dispatch;
+ struct anv_physical_device_dispatch_table physical_device_dispatch;
struct anv_device_dispatch_table device_dispatch;
- int physicalDeviceCount;
- struct anv_physical_device physicalDevice;
+ bool physical_devices_enumerated;
+ struct list_head physical_devices;
bool pipeline_cache_enabled;
bool anv_physical_device_extension_supported(struct anv_physical_device *dev,
const char *name);
+struct anv_queue_submit {
+ struct anv_cmd_buffer * cmd_buffer;
+
+ uint32_t fence_count;
+ uint32_t fence_array_length;
+ struct drm_i915_gem_exec_fence * fences;
+
+ uint32_t temporary_semaphore_count;
+ uint32_t temporary_semaphore_array_length;
+ struct anv_semaphore_impl * temporary_semaphores;
+
+ /* Semaphores to be signaled with a SYNC_FD. */
+ struct anv_semaphore ** sync_fd_semaphores;
+ uint32_t sync_fd_semaphore_count;
+ uint32_t sync_fd_semaphore_array_length;
+
+ /* Allocated only with non shareable timelines. */
+ struct anv_timeline ** wait_timelines;
+ uint32_t wait_timeline_count;
+ uint32_t wait_timeline_array_length;
+ uint64_t * wait_timeline_values;
+
+ struct anv_timeline ** signal_timelines;
+ uint32_t signal_timeline_count;
+ uint32_t signal_timeline_array_length;
+ uint64_t * signal_timeline_values;
+
+ int in_fence;
+ bool need_out_fence;
+ int out_fence;
+
+ uint32_t fence_bo_count;
+ uint32_t fence_bo_array_length;
+ /* An array of struct anv_bo pointers with lower bit used as a flag to
+ * signal we will wait on that BO (see anv_(un)pack_ptr).
+ */
+ uintptr_t * fence_bos;
+
+ const VkAllocationCallbacks * alloc;
+ VkSystemAllocationScope alloc_scope;
+
+ struct anv_bo * simple_bo;
+ uint32_t simple_bo_size;
+
+ struct list_head link;
+};
+
struct anv_queue {
VK_LOADER_DATA _loader_data;
struct anv_device * device;
+ /*
+ * A list of struct anv_queue_submit to be submitted to i915.
+ */
+ struct list_head queued_submits;
+
VkDeviceQueueCreateFlags flags;
};
const void *key, uint32_t key_size);
struct anv_shader_bin *
anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
+ gl_shader_stage stage,
const void *key_data, uint32_t key_size,
const void *kernel_data, uint32_t kernel_size,
const void *constant_data,
struct anv_shader_bin *
anv_device_upload_kernel(struct anv_device *device,
struct anv_pipeline_cache *cache,
+ gl_shader_stage stage,
const void *key_data, uint32_t key_size,
const void *kernel_data, uint32_t kernel_size,
const void *constant_data,
VkAllocationCallbacks alloc;
- struct anv_instance * instance;
- uint32_t chipset_id;
+ struct anv_physical_device * physical;
bool no_hw;
struct gen_device_info info;
struct isl_device isl_dev;
pthread_mutex_t vma_mutex;
struct util_vma_heap vma_lo;
+ struct util_vma_heap vma_cva;
struct util_vma_heap vma_hi;
- uint64_t vma_lo_available;
- uint64_t vma_hi_available;
/** List of all anv_device_memory objects */
struct list_head memory_objects;
struct anv_state_pool binding_table_pool;
struct anv_state_pool surface_state_pool;
- struct anv_bo workaround_bo;
- struct anv_bo trivial_batch_bo;
- struct anv_bo hiz_clear_bo;
+ struct anv_bo * workaround_bo;
+ struct anv_bo * trivial_batch_bo;
+ struct anv_bo * hiz_clear_bo;
struct anv_pipeline_cache default_pipeline_cache;
struct blorp_context blorp;
struct anv_scratch_pool scratch_pool;
- uint32_t default_mocs;
- uint32_t external_mocs;
-
pthread_mutex_t mutex;
pthread_cond_t queue_submit;
- bool _lost;
+ int _lost;
struct gen_batch_decode_ctx decoder_ctx;
/*
struct gen_aux_map_context *aux_map_ctx;
};
+static inline struct anv_instance *
+anv_device_instance_or_null(const struct anv_device *device)
+{
+ return device ? device->physical->instance : NULL;
+}
+
static inline struct anv_state_pool *
anv_binding_table_pool(struct anv_device *device)
{
- if (device->instance->physicalDevice.use_softpin)
+ if (device->physical->use_softpin)
return &device->binding_table_pool;
else
return &device->surface_state_pool;
static inline struct anv_state
anv_binding_table_pool_alloc(struct anv_device *device) {
- if (device->instance->physicalDevice.use_softpin)
+ if (device->physical->use_softpin)
return anv_state_pool_alloc(&device->binding_table_pool,
device->binding_table_pool.block_size, 0);
else
anv_mocs_for_bo(const struct anv_device *device, const struct anv_bo *bo)
{
if (bo->is_external)
- return device->external_mocs;
+ return device->isl_dev.mocs.external;
else
- return device->default_mocs;
+ return device->isl_dev.mocs.internal;
}
void anv_device_init_blorp(struct anv_device *device);
void anv_device_finish_blorp(struct anv_device *device);
+void _anv_device_set_all_queue_lost(struct anv_device *device);
VkResult _anv_device_set_lost(struct anv_device *device,
const char *file, int line,
const char *msg, ...)
anv_printflike(4, 5);
+VkResult _anv_queue_set_lost(struct anv_queue *queue,
+ const char *file, int line,
+ const char *msg, ...)
+ anv_printflike(4, 5);
#define anv_device_set_lost(dev, ...) \
_anv_device_set_lost(dev, __FILE__, __LINE__, __VA_ARGS__)
+#define anv_queue_set_lost(queue, ...) \
+ _anv_queue_set_lost(queue, __FILE__, __LINE__, __VA_ARGS__)
static inline bool
anv_device_is_lost(struct anv_device *device)
{
- return unlikely(device->_lost);
+ return unlikely(p_atomic_read(&device->_lost));
}
-VkResult anv_device_execbuf(struct anv_device *device,
- struct drm_i915_gem_execbuffer2 *execbuf,
- struct anv_bo **execbuf_bos);
VkResult anv_device_query_status(struct anv_device *device);
/** Specifies that the BO should be captured in error states */
ANV_BO_ALLOC_CAPTURE = (1 << 4),
- /** Specifies that the BO will have an address assigned by the caller */
+ /** Specifies that the BO will have an address assigned by the caller
+ *
+ * Such BOs do not exist in any VMA heap.
+ */
ANV_BO_ALLOC_FIXED_ADDRESS = (1 << 5),
/** Enables implicit synchronization on the BO
* This is equivalent to EXEC_OBJECT_WRITE.
*/
ANV_BO_ALLOC_IMPLICIT_WRITE = (1 << 7),
+
+ /** Has an address which is visible to the client */
+ ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS = (1 << 8),
+
+ /** This buffer has implicit CCS data attached to it */
+ ANV_BO_ALLOC_IMPLICIT_CCS = (1 << 9),
};
VkResult anv_device_alloc_bo(struct anv_device *device, uint64_t size,
enum anv_bo_alloc_flags alloc_flags,
+ uint64_t explicit_address,
struct anv_bo **bo);
VkResult anv_device_import_bo_from_host_ptr(struct anv_device *device,
void *host_ptr, uint32_t size,
enum anv_bo_alloc_flags alloc_flags,
+ uint64_t client_address,
struct anv_bo **bo_out);
VkResult anv_device_import_bo(struct anv_device *device, int fd,
enum anv_bo_alloc_flags alloc_flags,
+ uint64_t client_address,
struct anv_bo **bo);
VkResult anv_device_export_bo(struct anv_device *device,
struct anv_bo *bo, int *fd_out);
VkResult anv_device_wait(struct anv_device *device, struct anv_bo *bo,
int64_t timeout);
+VkResult anv_queue_init(struct anv_device *device, struct anv_queue *queue);
+void anv_queue_finish(struct anv_queue *queue);
+
+VkResult anv_queue_execbuf_locked(struct anv_queue *queue, struct anv_queue_submit *submit);
+VkResult anv_queue_submit_simple_batch(struct anv_queue *queue,
+ struct anv_batch *batch);
+
+uint64_t anv_gettime_ns(void);
+uint64_t anv_get_absolute_timeout(uint64_t timeout);
+
void* anv_gem_mmap(struct anv_device *device,
uint32_t gem_handle, uint64_t offset, uint64_t size, uint32_t flags);
void anv_gem_munmap(void *p, uint64_t size);
uint32_t *handles, uint32_t num_handles,
int64_t abs_timeout_ns, bool wait_all);
-bool anv_vma_alloc(struct anv_device *device, struct anv_bo *bo);
-void anv_vma_free(struct anv_device *device, struct anv_bo *bo);
-
-VkResult anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size);
+uint64_t anv_vma_alloc(struct anv_device *device,
+ uint64_t size, uint64_t align,
+ enum anv_bo_alloc_flags alloc_flags,
+ uint64_t client_address);
+void anv_vma_free(struct anv_device *device,
+ uint64_t address, uint64_t size);
struct anv_reloc_list {
uint32_t num_relocs;
uint32_t array_length;
struct drm_i915_gem_relocation_entry * relocs;
struct anv_bo ** reloc_bos;
- struct set * deps;
+ uint32_t dep_words;
+ BITSET_WORD * deps;
};
VkResult anv_reloc_list_init(struct anv_reloc_list *list,
/* Link in the anv_cmd_buffer.owned_batch_bos list */
struct list_head link;
- struct anv_bo bo;
+ struct anv_bo * bo;
/* Bytes actually consumed in this batch BO */
uint32_t length;
void anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other);
uint64_t anv_batch_emit_reloc(struct anv_batch *batch,
void *location, struct anv_bo *bo, uint32_t offset);
-VkResult anv_device_submit_simple_batch(struct anv_device *device,
- struct anv_batch *batch);
static inline VkResult
anv_batch_set_error(struct anv_batch *batch, VkResult error)
_dst = NULL; \
}))
-/* MEMORY_OBJECT_CONTROL_STATE:
- * .GraphicsDataTypeGFDT = 0,
- * .LLCCacheabilityControlLLCCC = 0,
- * .L3CacheabilityControlL3CC = 1,
- */
-#define GEN7_MOCS 1
-
-/* MEMORY_OBJECT_CONTROL_STATE:
- * .LLCeLLCCacheabilityControlLLCCC = 0,
- * .L3CacheabilityControlL3CC = 1,
- */
-#define GEN75_MOCS 1
-
-/* MEMORY_OBJECT_CONTROL_STATE:
- * .MemoryTypeLLCeLLCCacheabilityControl = WB,
- * .TargetCache = L3DefertoPATforLLCeLLCselection,
- * .AgeforQUADLRU = 0
- */
-#define GEN8_MOCS 0x78
-
-/* MEMORY_OBJECT_CONTROL_STATE:
- * .MemoryTypeLLCeLLCCacheabilityControl = UCwithFenceifcoherentcycle,
- * .TargetCache = L3DefertoPATforLLCeLLCselection,
- * .AgeforQUADLRU = 0
- */
-#define GEN8_EXTERNAL_MOCS 0x18
-
-/* Skylake: MOCS is now an index into an array of 62 different caching
- * configurations programmed by the kernel.
- */
-
-/* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */
-#define GEN9_MOCS (2 << 1)
-
-/* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */
-#define GEN9_EXTERNAL_MOCS (1 << 1)
-
-/* Cannonlake MOCS defines are duplicates of Skylake MOCS defines. */
-#define GEN10_MOCS GEN9_MOCS
-#define GEN10_EXTERNAL_MOCS GEN9_EXTERNAL_MOCS
-
-/* Ice Lake MOCS defines are duplicates of Skylake MOCS defines. */
-#define GEN11_MOCS GEN9_MOCS
-#define GEN11_EXTERNAL_MOCS GEN9_EXTERNAL_MOCS
-
-/* TigerLake MOCS */
-#define GEN12_MOCS GEN9_MOCS
-/* TC=1/LLC Only, LeCC=1/Uncacheable, LRUM=0, L3CC=1/Uncacheable */
-#define GEN12_EXTERNAL_MOCS (3 << 1)
-
struct anv_device_memory {
struct list_head link;
/* Number of dynamic offsets used by this descriptor set */
uint16_t dynamic_offset_count;
+ /* For each shader stage, which offsets apply to that stage */
+ uint16_t stage_dynamic_offsets[MESA_SHADER_STAGES];
+
/* Size of the descriptor buffer for this descriptor set */
uint32_t descriptor_buffer_size;
struct anv_descriptor_pool *pool,
struct anv_descriptor_set *set);
+#define ANV_DESCRIPTOR_SET_NULL (UINT8_MAX - 5)
+#define ANV_DESCRIPTOR_SET_PUSH_CONSTANTS (UINT8_MAX - 4)
#define ANV_DESCRIPTOR_SET_DESCRIPTORS (UINT8_MAX - 3)
#define ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS (UINT8_MAX - 2)
#define ANV_DESCRIPTOR_SET_SHADER_CONSTANTS (UINT8_MAX - 1)
#define ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS UINT8_MAX
struct anv_pipeline_binding {
- /* The descriptor set this surface corresponds to. The special value of
- * ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS indicates that the offset refers
- * to a color attachment and not a regular descriptor.
+ /** Index in the descriptor set
+ *
+ * This is a flattened index; the descriptor set layout is already taken
+ * into account.
+ */
+ uint32_t index;
+
+ /** The descriptor set this surface corresponds to.
+ *
+ * The special ANV_DESCRIPTOR_SET_* values above indicates that this
+ * binding is not a normal descriptor set but something else.
*/
uint8_t set;
- /* Binding in the descriptor set */
- uint32_t binding;
+ union {
+ /** Plane in the binding index for images */
+ uint8_t plane;
+
+ /** Input attachment index (relative to the subpass) */
+ uint8_t input_attachment_index;
+
+ /** Dynamic offset index (for dynamic UBOs and SSBOs) */
+ uint8_t dynamic_offset_index;
+ };
- /* Index in the binding */
+ /** For a storage image, whether it is write-only */
+ uint8_t write_only;
+
+ /** Pad to 64 bits so that there are no holes and we can safely memcmp
+ * assuming POD zero-initialization.
+ */
+ uint8_t pad;
+};
+
+struct anv_push_range {
+ /** Index in the descriptor set */
uint32_t index;
- /* Plane in the binding index */
- uint8_t plane;
+ /** Descriptor set index */
+ uint8_t set;
+
+ /** Dynamic offset index (for dynamic UBOs) */
+ uint8_t dynamic_offset_index;
- /* Input attachment index (relative to the subpass) */
- uint8_t input_attachment_index;
+ /** Start offset in units of 32B */
+ uint8_t start;
- /* For a storage image, whether it is write-only */
- bool write_only;
+ /** Range in units of 32B */
+ uint8_t length;
};
struct anv_pipeline_layout {
ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT = (1 << 12),
ANV_PIPE_DEPTH_STALL_BIT = (1 << 13),
ANV_PIPE_CS_STALL_BIT = (1 << 20),
+ ANV_PIPE_END_OF_PIPE_SYNC_BIT = (1 << 21),
/* This bit does not exist directly in PIPE_CONTROL. Instead it means that
* a flush has happened but not a CS stall. The next time we do any sort
* of invalidation we need to insert a CS stall at that time. Otherwise,
* we would have to CS stall on every flush which could be bad.
*/
- ANV_PIPE_NEEDS_CS_STALL_BIT = (1 << 21),
+ ANV_PIPE_NEEDS_END_OF_PIPE_SYNC_BIT = (1 << 22),
/* This bit does not exist directly in PIPE_CONTROL. It means that render
* target operations related to transfer commands with VkBuffer as
* streamer might need to be aware of this to trigger the appropriate stall
* before they can proceed with the copy.
*/
- ANV_PIPE_RENDER_TARGET_BUFFER_WRITES = (1 << 22),
+ ANV_PIPE_RENDER_TARGET_BUFFER_WRITES = (1 << 23),
+
+ /* This bit does not exist directly in PIPE_CONTROL. It means that Gen12
+ * AUX-TT data has changed and we need to invalidate AUX-TT data. This is
+ * done by writing the AUX-TT register.
+ */
+ ANV_PIPE_AUX_TABLE_INVALIDATE_BIT = (1 << 24),
+
+ /* This bit does not exist directly in PIPE_CONTROL. It means that a
+ * PIPE_CONTROL with a post-sync operation will follow. This is used to
+ * implement a workaround for Gen9.
+ */
+ ANV_PIPE_POST_SYNC_BIT = (1 << 25),
};
#define ANV_PIPE_FLUSH_BITS ( \
ANV_PIPE_VF_CACHE_INVALIDATE_BIT | \
ANV_PIPE_DATA_CACHE_FLUSH_BIT | \
ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT | \
- ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT)
+ ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT | \
+ ANV_PIPE_AUX_TABLE_INVALIDATE_BIT)
static inline enum anv_pipe_bits
anv_pipe_flush_bits_for_access_flags(VkAccessFlags flags)
VkDeviceSize size;
};
-#define ANV_PARAM_PUSH(offset) ((1 << 16) | (uint32_t)(offset))
-#define ANV_PARAM_IS_PUSH(param) ((uint32_t)(param) >> 16 == 1)
-#define ANV_PARAM_PUSH_OFFSET(param) ((param) & 0xffff)
-
-#define ANV_PARAM_DYN_OFFSET(offset) ((2 << 16) | (uint32_t)(offset))
-#define ANV_PARAM_IS_DYN_OFFSET(param) ((uint32_t)(param) >> 16 == 2)
-#define ANV_PARAM_DYN_OFFSET_IDX(param) ((param) & 0xffff)
-
struct anv_push_constants {
- /* Push constant data provided by the client through vkPushConstants */
+ /** Push constant data provided by the client through vkPushConstants */
uint8_t client_data[MAX_PUSH_CONSTANTS_SIZE];
- /* Used for vkCmdDispatchBase */
- uint32_t base_work_group_id[3];
+ /** Dynamic offsets for dynamic UBOs and SSBOs */
+ uint32_t dynamic_offsets[MAX_DYNAMIC_BUFFERS];
+
+ /** Pad out to a multiple of 32 bytes */
+ uint32_t push_ubo_sizes[4];
+
+ struct {
+ /** Base workgroup ID
+ *
+ * Used for vkCmdDispatchBase.
+ */
+ uint32_t base_work_group_id[3];
+
+ /** Subgroup ID
+ *
+ * This is never set by software but is implicitly filled out when
+ * uploading the push constants for compute shaders.
+ */
+ uint32_t subgroup_id;
+ } cs;
};
struct anv_dynamic_state {
struct anv_surface_state input;
VkImageLayout current_layout;
+ VkImageLayout current_stencil_layout;
VkImageAspectFlags pending_clear_aspects;
VkImageAspectFlags pending_load_aspects;
bool fast_clear;
struct anv_image_view * image_view;
};
+/** State tracking for vertex buffer flushes
+ *
+ * On Gen8-9, the VF cache only considers the bottom 32 bits of memory
+ * addresses. If you happen to have two vertex buffers which get placed
+ * exactly 4 GiB apart and use them in back-to-back draw calls, you can get
+ * collisions. In order to solve this problem, we track vertex address ranges
+ * which are live in the cache and invalidate the cache if one ever exceeds 32
+ * bits.
+ */
+struct anv_vb_cache_range {
+ /* Virtual address at which the live vertex buffer cache range starts for
+ * this vertex buffer index.
+ */
+ uint64_t start;
+
+ /* Virtual address of the byte after where vertex buffer cache range ends.
+ * This is exclusive such that end - start is the size of the range.
+ */
+ uint64_t end;
+};
+
/** State tracking for particular pipeline bind point
*
* This struct is the base struct for anv_cmd_graphics_state and
* per-stage array in anv_cmd_state.
*/
struct anv_cmd_pipeline_state {
- struct anv_pipeline *pipeline;
- struct anv_pipeline_layout *layout;
-
struct anv_descriptor_set *descriptors[MAX_SETS];
- uint32_t dynamic_offsets[MAX_DYNAMIC_BUFFERS];
-
struct anv_push_descriptor_set *push_descriptors[MAX_SETS];
};
struct anv_cmd_graphics_state {
struct anv_cmd_pipeline_state base;
+ struct anv_graphics_pipeline *pipeline;
+
anv_cmd_dirty_mask_t dirty;
uint32_t vb_dirty;
+ struct anv_vb_cache_range ib_bound_range;
+ struct anv_vb_cache_range ib_dirty_range;
+ struct anv_vb_cache_range vb_bound_ranges[33];
+ struct anv_vb_cache_range vb_dirty_ranges[33];
+
struct anv_dynamic_state dynamic;
struct {
struct anv_cmd_compute_state {
struct anv_cmd_pipeline_state base;
+ struct anv_compute_pipeline *pipeline;
+
bool pipeline_dirty;
struct anv_address num_workgroups;
struct anv_state binding_tables[MESA_SHADER_STAGES];
struct anv_state samplers[MESA_SHADER_STAGES];
+ unsigned char sampler_sha1s[MESA_SHADER_STAGES][20];
+ unsigned char surface_sha1s[MESA_SHADER_STAGES][20];
+ unsigned char push_sha1s[MESA_SHADER_STAGES][20];
+
/**
* Whether or not the gen8 PMA fix is enabled. We ensure that, at the top
* of any command buffer it is disabled by disabling it in EndCommandBuffer
* initialized by anv_cmd_buffer_init_batch_bo_chain()
*/
struct u_vector bt_block_states;
- uint32_t bt_next;
+ struct anv_state bt_next;
struct anv_reloc_list surface_relocs;
/** Last seen surface state block pool center bo offset */
void anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary,
struct anv_cmd_buffer *secondary);
void anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer);
-VkResult anv_cmd_buffer_execbuf(struct anv_device *device,
+VkResult anv_cmd_buffer_execbuf(struct anv_queue *queue,
struct anv_cmd_buffer *cmd_buffer,
const VkSemaphore *in_semaphores,
+ const uint64_t *in_wait_values,
uint32_t num_in_semaphores,
const VkSemaphore *out_semaphores,
+ const uint64_t *out_signal_values,
uint32_t num_out_semaphores,
VkFence fence);
enum anv_fence_type {
ANV_FENCE_TYPE_NONE = 0,
ANV_FENCE_TYPE_BO,
+ ANV_FENCE_TYPE_WSI_BO,
ANV_FENCE_TYPE_SYNCOBJ,
ANV_FENCE_TYPE_WSI,
};
* will say it's idle in this case.
*/
struct {
- struct anv_bo bo;
+ struct anv_bo *bo;
enum anv_bo_fence_state state;
} bo;
struct anv_fence_impl temporary;
};
+void anv_fence_reset_temporary(struct anv_device *device,
+ struct anv_fence *fence);
+
struct anv_event {
uint64_t semaphore;
struct anv_state state;
ANV_SEMAPHORE_TYPE_NONE = 0,
ANV_SEMAPHORE_TYPE_DUMMY,
ANV_SEMAPHORE_TYPE_BO,
+ ANV_SEMAPHORE_TYPE_WSI_BO,
ANV_SEMAPHORE_TYPE_SYNC_FILE,
ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ,
+ ANV_SEMAPHORE_TYPE_TIMELINE,
+};
+
+struct anv_timeline_point {
+ struct list_head link;
+
+ uint64_t serial;
+
+ /* Number of waiter on this point, when > 0 the point should not be garbage
+ * collected.
+ */
+ int waiting;
+
+ /* BO used for synchronization. */
+ struct anv_bo *bo;
+};
+
+struct anv_timeline {
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+
+ uint64_t highest_past;
+ uint64_t highest_pending;
+
+ struct list_head points;
+ struct list_head free_points;
};
struct anv_semaphore_impl {
enum anv_semaphore_type type;
union {
- /* A BO representing this semaphore when type == ANV_SEMAPHORE_TYPE_BO.
- * This BO will be added to the object list on any execbuf2 calls for
- * which this semaphore is used as a wait or signal fence. When used as
- * a signal fence, the EXEC_OBJECT_WRITE flag will be set.
+ /* A BO representing this semaphore when type == ANV_SEMAPHORE_TYPE_BO
+ * or type == ANV_SEMAPHORE_TYPE_WSI_BO. This BO will be added to the
+ * object list on any execbuf2 calls for which this semaphore is used as
+ * a wait or signal fence. When used as a signal fence or when type ==
+ * ANV_SEMAPHORE_TYPE_WSI_BO, the EXEC_OBJECT_WRITE flag will be set.
*/
struct anv_bo *bo;
* import so we don't need to bother with a userspace cache.
*/
uint32_t syncobj;
+
+ /* Non shareable timeline semaphore
+ *
+ * Used when kernel don't have support for timeline semaphores.
+ */
+ struct anv_timeline timeline;
};
};
struct anv_semaphore {
+ uint32_t refcount;
+
/* Permanent semaphore state. Every semaphore has some form of permanent
* state (type != ANV_SEMAPHORE_TYPE_NONE). This may be a BO to fence on
* (for cross-process semaphores0 or it could just be a dummy for use
__tmp &= ~(1 << (stage)))
struct anv_pipeline_bind_map {
+ unsigned char surface_sha1[20];
+ unsigned char sampler_sha1[20];
+ unsigned char push_sha1[20];
+
uint32_t surface_count;
uint32_t sampler_count;
struct anv_pipeline_binding * surface_to_descriptor;
struct anv_pipeline_binding * sampler_to_descriptor;
+
+ struct anv_push_range push_ranges[4];
};
struct anv_shader_bin_key {
struct anv_shader_bin {
uint32_t ref_cnt;
+ gl_shader_stage stage;
+
const struct anv_shader_bin_key *key;
struct anv_state kernel;
struct anv_shader_bin *
anv_shader_bin_create(struct anv_device *device,
+ gl_shader_stage stage,
const void *key, uint32_t key_size,
const void *kernel, uint32_t kernel_size,
const void *constant_data, uint32_t constant_data_size,
const struct brw_stage_prog_data *prog_data,
- uint32_t prog_data_size, const void *prog_data_param,
+ uint32_t prog_data_size,
const struct brw_compile_stats *stats, uint32_t num_stats,
const struct nir_xfb_info *xfb_info,
const struct anv_pipeline_bind_map *bind_map);
anv_shader_bin_destroy(device, shader);
}
-/* 5 possible simultaneous shader stages and FS may have up to 3 binaries */
-#define MAX_PIPELINE_EXECUTABLES 7
-
struct anv_pipeline_executable {
gl_shader_stage stage;
char *disasm;
};
+enum anv_pipeline_type {
+ ANV_PIPELINE_GRAPHICS,
+ ANV_PIPELINE_COMPUTE,
+};
+
struct anv_pipeline {
struct anv_device * device;
+
struct anv_batch batch;
- uint32_t batch_data[512];
struct anv_reloc_list batch_relocs;
- anv_cmd_dirty_mask_t dynamic_state_mask;
- struct anv_dynamic_state dynamic_state;
void * mem_ctx;
+ enum anv_pipeline_type type;
VkPipelineCreateFlags flags;
- struct anv_subpass * subpass;
- bool needs_data_cache;
+ struct util_dynarray executables;
- struct anv_shader_bin * shaders[MESA_SHADER_STAGES];
+ const struct gen_l3_config * l3_config;
+};
- uint32_t num_executables;
- struct anv_pipeline_executable executables[MAX_PIPELINE_EXECUTABLES];
+struct anv_graphics_pipeline {
+ struct anv_pipeline base;
- struct {
- const struct gen_l3_config * l3_config;
- uint32_t total_size;
- } urb;
+ uint32_t batch_data[512];
- VkShaderStageFlags active_stages;
- struct anv_state blend_state;
+ anv_cmd_dirty_mask_t dynamic_state_mask;
+ struct anv_dynamic_state dynamic_state;
- uint32_t vb_used;
- struct anv_pipeline_vertex_binding {
- uint32_t stride;
- bool instanced;
- uint32_t instance_divisor;
- } vb[MAX_VBS];
+ uint32_t topology;
- uint8_t xfb_used;
+ struct anv_subpass * subpass;
- bool primitive_restart;
- uint32_t topology;
+ struct anv_shader_bin * shaders[MESA_SHADER_STAGES];
- uint32_t cs_right_mask;
+ VkShaderStageFlags active_stages;
+ bool primitive_restart;
bool writes_depth;
bool depth_test_enable;
bool writes_stencil;
bool kill_pixel;
bool depth_bounds_test_enable;
+ struct anv_state blend_state;
+
+ uint32_t vb_used;
+ struct anv_pipeline_vertex_binding {
+ uint32_t stride;
+ bool instanced;
+ uint32_t instance_divisor;
+ } vb[MAX_VBS];
+
struct {
uint32_t sf[7];
uint32_t depth_stencil_state[3];
struct {
uint32_t wm_depth_stencil[4];
} gen9;
+};
+
+struct anv_compute_pipeline {
+ struct anv_pipeline base;
+ struct anv_shader_bin * cs;
+ uint32_t cs_right_mask;
+ uint32_t batch_data[9];
uint32_t interface_descriptor_data[8];
};
+#define ANV_DECL_PIPELINE_DOWNCAST(pipe_type, pipe_enum) \
+ static inline struct anv_##pipe_type##_pipeline * \
+ anv_pipeline_to_##pipe_type(struct anv_pipeline *pipeline) \
+ { \
+ assert(pipeline->type == pipe_enum); \
+ return (struct anv_##pipe_type##_pipeline *) pipeline; \
+ }
+
+ANV_DECL_PIPELINE_DOWNCAST(graphics, ANV_PIPELINE_GRAPHICS)
+ANV_DECL_PIPELINE_DOWNCAST(compute, ANV_PIPELINE_COMPUTE)
+
static inline bool
-anv_pipeline_has_stage(const struct anv_pipeline *pipeline,
+anv_pipeline_has_stage(const struct anv_graphics_pipeline *pipeline,
gl_shader_stage stage)
{
return (pipeline->active_stages & mesa_to_vk_shader_stage(stage)) != 0;
}
-#define ANV_DECL_GET_PROG_DATA_FUNC(prefix, stage) \
-static inline const struct brw_##prefix##_prog_data * \
-get_##prefix##_prog_data(const struct anv_pipeline *pipeline) \
-{ \
- if (anv_pipeline_has_stage(pipeline, stage)) { \
- return (const struct brw_##prefix##_prog_data *) \
- pipeline->shaders[stage]->prog_data; \
- } else { \
- return NULL; \
- } \
+#define ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(prefix, stage) \
+static inline const struct brw_##prefix##_prog_data * \
+get_##prefix##_prog_data(const struct anv_graphics_pipeline *pipeline) \
+{ \
+ if (anv_pipeline_has_stage(pipeline, stage)) { \
+ return (const struct brw_##prefix##_prog_data *) \
+ pipeline->shaders[stage]->prog_data; \
+ } else { \
+ return NULL; \
+ } \
}
-ANV_DECL_GET_PROG_DATA_FUNC(vs, MESA_SHADER_VERTEX)
-ANV_DECL_GET_PROG_DATA_FUNC(tcs, MESA_SHADER_TESS_CTRL)
-ANV_DECL_GET_PROG_DATA_FUNC(tes, MESA_SHADER_TESS_EVAL)
-ANV_DECL_GET_PROG_DATA_FUNC(gs, MESA_SHADER_GEOMETRY)
-ANV_DECL_GET_PROG_DATA_FUNC(wm, MESA_SHADER_FRAGMENT)
-ANV_DECL_GET_PROG_DATA_FUNC(cs, MESA_SHADER_COMPUTE)
+ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(vs, MESA_SHADER_VERTEX)
+ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(tcs, MESA_SHADER_TESS_CTRL)
+ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(tes, MESA_SHADER_TESS_EVAL)
+ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(gs, MESA_SHADER_GEOMETRY)
+ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(wm, MESA_SHADER_FRAGMENT)
+
+static inline const struct brw_cs_prog_data *
+get_cs_prog_data(const struct anv_compute_pipeline *pipeline)
+{
+ assert(pipeline->cs);
+ return (const struct brw_cs_prog_data *) pipeline->cs->prog_data;
+}
static inline const struct brw_vue_prog_data *
-anv_pipeline_get_last_vue_prog_data(const struct anv_pipeline *pipeline)
+anv_pipeline_get_last_vue_prog_data(const struct anv_graphics_pipeline *pipeline)
{
if (anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY))
return &get_gs_prog_data(pipeline)->base;
}
VkResult
-anv_pipeline_init(struct anv_pipeline *pipeline, struct anv_device *device,
+anv_pipeline_init(struct anv_graphics_pipeline *pipeline, struct anv_device *device,
struct anv_pipeline_cache *cache,
const VkGraphicsPipelineCreateInfo *pCreateInfo,
const VkAllocationCallbacks *alloc);
VkResult
-anv_pipeline_compile_cs(struct anv_pipeline *pipeline,
+anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
struct anv_pipeline_cache *cache,
const VkComputePipelineCreateInfo *info,
const struct anv_shader_module *module,
bool can_ycbcr;
};
+/**
+ * Return the aspect's _format_ plane, not its _memory_ plane (using the
+ * vocabulary of VK_EXT_image_drm_format_modifier). As a consequence, \a
+ * aspect_mask may contain VK_IMAGE_ASPECT_PLANE_*, but must not contain
+ * VK_IMAGE_ASPECT_MEMORY_PLANE_* .
+ */
static inline uint32_t
anv_image_aspect_to_plane(VkImageAspectFlags image_aspects,
VkImageAspectFlags aspect_mask)
return anv_get_format_plane(devinfo, vk_format, aspect, tiling).isl_format;
}
+bool anv_formats_ccs_e_compatible(const struct gen_device_info *devinfo,
+ VkImageCreateFlags create_flags,
+ VkFormat vk_format,
+ VkImageTiling vk_tiling,
+ const VkImageFormatListCreateInfoKHR *fmt_list);
+
static inline struct isl_swizzle
anv_swizzle_for_render(struct isl_swizzle swizzle)
{
*/
bool disjoint;
- /* All the formats that can be used when creating views of this image
- * are CCS_E compatible.
- */
- bool ccs_e_compatible;
-
/* Image was created with external format. */
bool external_format;
struct anv_surface shadow_surface;
/**
- * For color images, this is the aux usage for this image when not used
- * as a color attachment.
- *
- * For depth/stencil images, this is set to ISL_AUX_USAGE_HIZ if the
- * image has a HiZ buffer.
+ * The base aux usage for this image. For color images, this can be
+ * either CCS_E or CCS_D depending on whether or not we can reliably
+ * leave CCS on all the time.
*/
enum isl_aux_usage aux_usage;
*/
struct anv_address address;
- /**
- * Address of the main surface used to fill the aux map table. This is
- * used at destruction of the image since the Vulkan spec does not
- * guarantee that the address.bo field we still be valid at destruction.
- */
- uint64_t aux_map_surface_address;
-
/**
* When destroying the image, also free the bo.
* */
VkImageAspectFlagBits aspect)
{
uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect);
+ if (image->planes[plane].aux_usage == ISL_AUX_USAGE_NONE)
+ return 0;
/* The Gen12 CCS aux surface is represented with only one level. */
- const uint8_t aux_logical_levels =
- image->planes[plane].aux_surface.isl.tiling == ISL_TILING_GEN12_CCS ?
- image->planes[plane].surface.isl.levels :
- image->planes[plane].aux_surface.isl.levels;
-
- return image->planes[plane].aux_surface.isl.size_B > 0 ?
- aux_logical_levels : 0;
+ return image->planes[plane].aux_surface.isl.tiling == ISL_TILING_GEN12_CCS ?
+ image->planes[plane].surface.isl.levels :
+ image->planes[plane].aux_surface.isl.levels;
}
/* Returns the number of auxiliary buffer layers attached to an image. */
if (!(image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT))
return false;
+ /* For Gen8-11, there are some restrictions around sampling from HiZ.
+ * The Skylake PRM docs for RENDER_SURFACE_STATE::AuxiliarySurfaceMode
+ * say:
+ *
+ * "If this field is set to AUX_HIZ, Number of Multisamples must
+ * be MULTISAMPLECOUNT_1, and Surface Type cannot be SURFTYPE_3D."
+ */
+ if (image->type == VK_IMAGE_TYPE_3D)
+ return false;
+
/* Allow this feature on BDW even though it is disabled in the BDW devinfo
* struct. There's documentation which suggests that this feature actually
* reduces performance on BDW, but it has only been observed to help so
void
anv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer,
const struct anv_image *image,
- enum isl_format format,
+ enum isl_format format, struct isl_swizzle swizzle,
VkImageAspectFlagBits aspect,
uint32_t base_layer, uint32_t layer_count,
enum isl_aux_op mcs_op, union isl_color_value *clear_value,
void
anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer,
const struct anv_image *image,
- enum isl_format format,
+ enum isl_format format, struct isl_swizzle swizzle,
VkImageAspectFlagBits aspect, uint32_t level,
uint32_t base_layer, uint32_t layer_count,
enum isl_aux_op ccs_op, union isl_color_value *clear_value,
uint32_t base_level, uint32_t level_count,
uint32_t base_layer, uint32_t layer_count);
+enum isl_aux_state
+anv_layout_to_aux_state(const struct gen_device_info * const devinfo,
+ const struct anv_image *image,
+ const VkImageAspectFlagBits aspect,
+ const VkImageLayout layout);
+
enum isl_aux_usage
anv_layout_to_aux_usage(const struct gen_device_info * const devinfo,
const struct anv_image *image,
const VkImageAspectFlagBits aspect,
+ const VkImageUsageFlagBits usage,
const VkImageLayout layout);
enum anv_fast_clear_type
const VkAllocationCallbacks* alloc,
VkImage *pImage);
-const struct anv_surface *
-anv_image_get_surface_for_aspect_mask(const struct anv_image *image,
- VkImageAspectFlags aspect_mask);
-
enum isl_format
anv_isl_format_for_descriptor_type(VkDescriptorType type);
-static inline struct VkExtent3D
+static inline VkExtent3D
anv_sanitize_image_extent(const VkImageType imageType,
- const struct VkExtent3D imageExtent)
+ const VkExtent3D imageExtent)
{
switch (imageType) {
case VK_IMAGE_TYPE_1D:
}
}
-static inline struct VkOffset3D
+static inline VkOffset3D
anv_sanitize_image_offset(const VkImageType imageType,
- const struct VkOffset3D imageOffset)
+ const VkOffset3D imageOffset)
{
switch (imageType) {
case VK_IMAGE_TYPE_1D:
VkImageUsageFlagBits usage;
uint32_t attachment;
VkImageLayout layout;
+
+ /* Used only with attachment containing stencil data. */
+ VkImageLayout stencil_layout;
};
struct anv_subpass {
VkImageLayout final_layout;
VkImageLayout first_subpass_layout;
+ VkImageLayout stencil_initial_layout;
+ VkImageLayout stencil_final_layout;
+
/* The subpass id in which the attachment will be used last. */
uint32_t last_subpass_idx;
};