#define __gen_validate_value(x) VALGRIND_CHECK_MEM_IS_DEFINED(&(x), sizeof(x))
#endif
#else
-#define VG(x)
+#define VG(x) ((void)0)
#endif
#include "common/gen_clflush.h"
#include "dev/gen_device_info.h"
#include "blorp/blorp.h"
#include "compiler/brw_compiler.h"
+#include "util/bitset.h"
#include "util/macros.h"
#include "util/hash_table.h"
#include "util/list.h"
-#include "util/set.h"
+#include "util/sparse_array.h"
#include "util/u_atomic.h"
#include "util/u_vector.h"
#include "util/u_math.h"
#include "util/vma.h"
+#include "util/xmlconfig.h"
#include "vk_alloc.h"
#include "vk_debug_report.h"
typedef uint32_t xcb_visualid_t;
typedef uint32_t xcb_window_t;
+struct anv_batch;
struct anv_buffer;
struct anv_buffer_view;
struct anv_image_view;
struct anv_instance;
+struct gen_aux_map_context;
struct gen_l3_config;
+struct gen_perf_config;
#include <vulkan/vulkan.h>
#include <vulkan/vulkan_intel.h>
#include "common/intel_log.h"
#include "wsi_common.h"
+#define NSEC_PER_SEC 1000000000ull
+
/* anv Virtual Memory Layout
* =========================
*
*/
#define ANV_PREDICATE_RESULT_REG 0x2678 /* MI_ALU_REG15 */
+/* For gen12 we set the streamout buffers using 4 separate commands
+ * (3DSTATE_SO_BUFFER_INDEX_*) instead of 3DSTATE_SO_BUFFER. However the layout
+ * of the 3DSTATE_SO_BUFFER_INDEX_* commands is identical to that of
+ * 3DSTATE_SO_BUFFER apart from the SOBufferIndex field, so for now we use the
+ * 3DSTATE_SO_BUFFER command, but change the 3DCommandSubOpcode.
+ * SO_BUFFER_INDEX_0_CMD is actually the 3DCommandSubOpcode for
+ * 3DSTATE_SO_BUFFER_INDEX_0.
+ */
+#define SO_BUFFER_INDEX_0_CMD 0x60
#define anv_printflike(a, b) __attribute__((__format__(__printf__, a, b)))
static inline uint32_t
};
}
+static inline void *anv_unpack_ptr(uintptr_t ptr, int bits, int *flags)
+{
+ uintptr_t mask = (1ull << bits) - 1;
+ *flags = ptr & mask;
+ return (void *) (ptr & ~mask);
+}
+
+static inline uintptr_t anv_pack_ptr(void *ptr, int bits, int flags)
+{
+ uintptr_t value = (uintptr_t) ptr;
+ uintptr_t mask = (1ull << bits) - 1;
+ return value | (mask & flags);
+}
+
#define for_each_bit(b, dword) \
for (uint32_t __dword = (dword); \
(b) = __builtin_ffs(__dword) - 1, __dword; \
VkResult __vk_errorf(struct anv_instance *instance, const void *object,
VkDebugReportObjectTypeEXT type, VkResult error,
- const char *file, int line, const char *format, ...);
+ const char *file, int line, const char *format, ...)
+ anv_printflike(7, 8);
#ifdef DEBUG
#define vk_error(error) __vk_errorf(NULL, NULL,\
return anv_multialloc_alloc(ma, alloc ? alloc : parent_alloc, scope);
}
-/* Extra ANV-defined BO flags which won't be passed to the kernel */
-#define ANV_BO_EXTERNAL (1ull << 31)
-#define ANV_BO_FLAG_MASK (1ull << 31)
-
struct anv_bo {
uint32_t gem_handle;
+ uint32_t refcount;
+
/* Index into the current validation list. This is used by the
* validation list building alrogithm to track which buffers are already
* in the validation list so that we can ensure uniqueness.
*/
uint32_t index;
+ /* Index for use with util_sparse_array_free_list */
+ uint32_t free_index;
+
/* Last known offset. This value is provided by the kernel when we
* execbuf and is used as the presumed offset for the next bunch of
* relocations.
uint64_t offset;
uint64_t size;
+
+ /* Map for internally mapped BOs.
+ *
+ * If ANV_BO_WRAPPER is set in flags, map points to the wrapped BO.
+ */
void *map;
/** Flags to pass to the kernel through drm_i915_exec_object2::flags */
uint32_t flags;
+
+ /** True if this BO may be shared with other processes */
+ bool is_external:1;
+
+ /** True if this BO is a wrapper
+ *
+ * When set to true, none of the fields in this BO are meaningful except
+ * for anv_bo::is_wrapper and anv_bo::map which points to the actual BO.
+ * See also anv_bo_unwrap(). Wrapper BOs are not allowed when use_softpin
+ * is set in the physical device.
+ */
+ bool is_wrapper:1;
+
+ /** See also ANV_BO_ALLOC_FIXED_ADDRESS */
+ bool has_fixed_address:1;
+
+ /** True if this BO wraps a host pointer */
+ bool from_host_ptr:1;
};
-static inline void
-anv_bo_init(struct anv_bo *bo, uint32_t gem_handle, uint64_t size)
+static inline struct anv_bo *
+anv_bo_unwrap(struct anv_bo *bo)
{
- bo->gem_handle = gem_handle;
- bo->index = 0;
- bo->offset = -1;
- bo->size = size;
- bo->map = NULL;
- bo->flags = 0;
+ while (bo->is_wrapper)
+ bo = bo->map;
+ return bo;
}
/* Represents a lock-free linked list of "free" things. This is used by
/* A simple count that is incremented every time the head changes. */
uint32_t count;
};
- uint64_t u64;
+ /* Make sure it's aligned to 64 bits. This will make atomic operations
+ * faster on 32 bit platforms.
+ */
+ uint64_t u64 __attribute__ ((aligned (8)));
};
#define ANV_FREE_LIST_EMPTY ((union anv_free_list) { { UINT32_MAX, 0 } })
uint32_t next;
uint32_t end;
};
- uint64_t u64;
+ /* Make sure it's aligned to 64 bits. This will make atomic operations
+ * faster on 32 bit platforms.
+ */
+ uint64_t u64 __attribute__ ((aligned (8)));
};
};
#define anv_block_pool_foreach_bo(bo, pool) \
- for (bo = (pool)->bos; bo != &(pool)->bos[(pool)->nbos]; bo++)
+ for (struct anv_bo **_pp_bo = (pool)->bos, *bo; \
+ _pp_bo != &(pool)->bos[(pool)->nbos] && (bo = *_pp_bo, true); \
+ _pp_bo++)
#define ANV_MAX_BLOCK_POOL_BOS 20
struct anv_block_pool {
struct anv_device *device;
+ bool use_softpin;
- uint64_t bo_flags;
+ /* Wrapper BO for use in relocation lists. This BO is simply a wrapper
+ * around the actual BO so that we grow the pool after the wrapper BO has
+ * been put in a relocation list. This is only used in the non-softpin
+ * case.
+ */
+ struct anv_bo wrapper_bo;
- struct anv_bo bos[ANV_MAX_BLOCK_POOL_BOS];
+ struct anv_bo *bos[ANV_MAX_BLOCK_POOL_BOS];
struct anv_bo *bo;
uint32_t nbos;
VkResult anv_block_pool_init(struct anv_block_pool *pool,
struct anv_device *device,
uint64_t start_address,
- uint32_t initial_size,
- uint64_t bo_flags);
+ uint32_t initial_size);
void anv_block_pool_finish(struct anv_block_pool *pool);
int32_t anv_block_pool_alloc(struct anv_block_pool *pool,
uint32_t block_size, uint32_t *padding);
VkResult anv_state_pool_init(struct anv_state_pool *pool,
struct anv_device *device,
uint64_t start_address,
- uint32_t block_size,
- uint64_t bo_flags);
+ uint32_t block_size);
void anv_state_pool_finish(struct anv_state_pool *pool);
struct anv_state anv_state_pool_alloc(struct anv_state_pool *pool,
uint32_t state_size, uint32_t alignment);
struct anv_bo_pool {
struct anv_device *device;
- uint64_t bo_flags;
-
- void *free_list[16];
+ struct util_sparse_array_free_list free_list[16];
};
-void anv_bo_pool_init(struct anv_bo_pool *pool, struct anv_device *device,
- uint64_t bo_flags);
+void anv_bo_pool_init(struct anv_bo_pool *pool, struct anv_device *device);
void anv_bo_pool_finish(struct anv_bo_pool *pool);
-VkResult anv_bo_pool_alloc(struct anv_bo_pool *pool, struct anv_bo *bo,
- uint32_t size);
-void anv_bo_pool_free(struct anv_bo_pool *pool, const struct anv_bo *bo);
-
-struct anv_scratch_bo {
- bool exists;
- struct anv_bo bo;
-};
+VkResult anv_bo_pool_alloc(struct anv_bo_pool *pool, uint32_t size,
+ struct anv_bo **bo_out);
+void anv_bo_pool_free(struct anv_bo_pool *pool, struct anv_bo *bo);
struct anv_scratch_pool {
/* Indexed by Per-Thread Scratch Space number (the hardware value) and stage */
- struct anv_scratch_bo bos[16][MESA_SHADER_STAGES];
+ struct anv_bo *bos[16][MESA_SHADER_STAGES];
};
void anv_scratch_pool_init(struct anv_device *device,
/** Implements a BO cache that ensures a 1-1 mapping of GEM BOs to anv_bos */
struct anv_bo_cache {
- struct hash_table *bo_map;
+ struct util_sparse_array bo_map;
pthread_mutex_t mutex;
};
VkResult anv_bo_cache_init(struct anv_bo_cache *cache);
void anv_bo_cache_finish(struct anv_bo_cache *cache);
-VkResult anv_bo_cache_alloc(struct anv_device *device,
- struct anv_bo_cache *cache,
- uint64_t size, uint64_t bo_flags,
- struct anv_bo **bo);
-VkResult anv_bo_cache_import_host_ptr(struct anv_device *device,
- struct anv_bo_cache *cache,
- void *host_ptr, uint32_t size,
- uint64_t bo_flags, struct anv_bo **bo_out);
-VkResult anv_bo_cache_import(struct anv_device *device,
- struct anv_bo_cache *cache,
- int fd, uint64_t bo_flags,
- struct anv_bo **bo);
-VkResult anv_bo_cache_export(struct anv_device *device,
- struct anv_bo_cache *cache,
- struct anv_bo *bo_in, int *fd_out);
-void anv_bo_cache_release(struct anv_device *device,
- struct anv_bo_cache *cache,
- struct anv_bo *bo);
struct anv_memory_type {
/* Standard bits passed on to the client */
bool supports_48bit_addresses;
struct brw_compiler * compiler;
struct isl_device isl_dev;
+ struct gen_perf_config * perf;
int cmd_parser_version;
bool has_exec_async;
bool has_exec_capture;
/** True if we can use bindless access for samplers */
bool has_bindless_samplers;
+ bool always_flush_cache;
+
struct anv_device_extension_table supported_extensions;
+ struct anv_physical_device_dispatch_table dispatch;
uint32_t eu_total;
uint32_t subslice_total;
bool pipeline_cache_enabled;
struct vk_debug_report_instance debug_report_callbacks;
+
+ struct driOptionCache dri_options;
+ struct driOptionCache available_dri_options;
};
VkResult anv_init_wsi(struct anv_physical_device *physical_device);
bool anv_physical_device_extension_supported(struct anv_physical_device *dev,
const char *name);
+struct anv_queue_submit {
+ struct anv_cmd_buffer * cmd_buffer;
+
+ uint32_t fence_count;
+ uint32_t fence_array_length;
+ struct drm_i915_gem_exec_fence * fences;
+
+ uint32_t temporary_semaphore_count;
+ uint32_t temporary_semaphore_array_length;
+ struct anv_semaphore_impl * temporary_semaphores;
+
+ /* Semaphores to be signaled with a SYNC_FD. */
+ struct anv_semaphore ** sync_fd_semaphores;
+ uint32_t sync_fd_semaphore_count;
+ uint32_t sync_fd_semaphore_array_length;
+
+ /* Allocated only with non shareable timelines. */
+ struct anv_timeline ** wait_timelines;
+ uint32_t wait_timeline_count;
+ uint32_t wait_timeline_array_length;
+ uint64_t * wait_timeline_values;
+
+ struct anv_timeline ** signal_timelines;
+ uint32_t signal_timeline_count;
+ uint32_t signal_timeline_array_length;
+ uint64_t * signal_timeline_values;
+
+ int in_fence;
+ bool need_out_fence;
+ int out_fence;
+
+ uint32_t fence_bo_count;
+ uint32_t fence_bo_array_length;
+ /* An array of struct anv_bo pointers with lower bit used as a flag to
+ * signal we will wait on that BO (see anv_(un)pack_ptr).
+ */
+ uintptr_t * fence_bos;
+
+ const VkAllocationCallbacks * alloc;
+ VkSystemAllocationScope alloc_scope;
+
+ struct anv_bo * simple_bo;
+ uint32_t simple_bo_size;
+
+ struct list_head link;
+};
+
struct anv_queue {
VK_LOADER_DATA _loader_data;
struct anv_device * device;
+ /*
+ * A list of struct anv_queue_submit to be submitted to i915.
+ */
+ struct list_head queued_submits;
+
VkDeviceQueueCreateFlags flags;
};
struct anv_state_pool binding_table_pool;
struct anv_state_pool surface_state_pool;
- struct anv_bo workaround_bo;
- struct anv_bo trivial_batch_bo;
- struct anv_bo hiz_clear_bo;
+ struct anv_bo * workaround_bo;
+ struct anv_bo * trivial_batch_bo;
+ struct anv_bo * hiz_clear_bo;
struct anv_pipeline_cache default_pipeline_cache;
struct blorp_context blorp;
struct anv_scratch_pool scratch_pool;
- uint32_t default_mocs;
- uint32_t external_mocs;
-
pthread_mutex_t mutex;
pthread_cond_t queue_submit;
- bool _lost;
+ int _lost;
struct gen_batch_decode_ctx decoder_ctx;
/*
* the cmd_buffer's list.
*/
struct anv_cmd_buffer *cmd_buffer_being_decoded;
+
+ int perf_fd; /* -1 if no opened */
+ uint64_t perf_metric; /* 0 if unset */
+
+ struct gen_aux_map_context *aux_map_ctx;
};
static inline struct anv_state_pool *
static inline uint32_t
anv_mocs_for_bo(const struct anv_device *device, const struct anv_bo *bo)
{
- if (bo->flags & ANV_BO_EXTERNAL)
- return device->external_mocs;
+ if (bo->is_external)
+ return device->isl_dev.mocs.external;
else
- return device->default_mocs;
+ return device->isl_dev.mocs.internal;
}
void anv_device_init_blorp(struct anv_device *device);
void anv_device_finish_blorp(struct anv_device *device);
+void _anv_device_set_all_queue_lost(struct anv_device *device);
VkResult _anv_device_set_lost(struct anv_device *device,
const char *file, int line,
- const char *msg, ...);
+ const char *msg, ...)
+ anv_printflike(4, 5);
+VkResult _anv_queue_set_lost(struct anv_queue *queue,
+ const char *file, int line,
+ const char *msg, ...)
+ anv_printflike(4, 5);
#define anv_device_set_lost(dev, ...) \
_anv_device_set_lost(dev, __FILE__, __LINE__, __VA_ARGS__)
+#define anv_queue_set_lost(queue, ...) \
+ _anv_queue_set_lost(queue, __FILE__, __LINE__, __VA_ARGS__)
static inline bool
anv_device_is_lost(struct anv_device *device)
{
- return unlikely(device->_lost);
+ return unlikely(p_atomic_read(&device->_lost));
}
-VkResult anv_device_execbuf(struct anv_device *device,
- struct drm_i915_gem_execbuffer2 *execbuf,
- struct anv_bo **execbuf_bos);
VkResult anv_device_query_status(struct anv_device *device);
+
+
+enum anv_bo_alloc_flags {
+ /** Specifies that the BO must have a 32-bit address
+ *
+ * This is the opposite of EXEC_OBJECT_SUPPORTS_48B_ADDRESS.
+ */
+ ANV_BO_ALLOC_32BIT_ADDRESS = (1 << 0),
+
+ /** Specifies that the BO may be shared externally */
+ ANV_BO_ALLOC_EXTERNAL = (1 << 1),
+
+ /** Specifies that the BO should be mapped */
+ ANV_BO_ALLOC_MAPPED = (1 << 2),
+
+ /** Specifies that the BO should be snooped so we get coherency */
+ ANV_BO_ALLOC_SNOOPED = (1 << 3),
+
+ /** Specifies that the BO should be captured in error states */
+ ANV_BO_ALLOC_CAPTURE = (1 << 4),
+
+ /** Specifies that the BO will have an address assigned by the caller */
+ ANV_BO_ALLOC_FIXED_ADDRESS = (1 << 5),
+
+ /** Enables implicit synchronization on the BO
+ *
+ * This is the opposite of EXEC_OBJECT_ASYNC.
+ */
+ ANV_BO_ALLOC_IMPLICIT_SYNC = (1 << 6),
+
+ /** Enables implicit synchronization on the BO
+ *
+ * This is equivalent to EXEC_OBJECT_WRITE.
+ */
+ ANV_BO_ALLOC_IMPLICIT_WRITE = (1 << 7),
+};
+
+VkResult anv_device_alloc_bo(struct anv_device *device, uint64_t size,
+ enum anv_bo_alloc_flags alloc_flags,
+ struct anv_bo **bo);
+VkResult anv_device_import_bo_from_host_ptr(struct anv_device *device,
+ void *host_ptr, uint32_t size,
+ enum anv_bo_alloc_flags alloc_flags,
+ struct anv_bo **bo_out);
+VkResult anv_device_import_bo(struct anv_device *device, int fd,
+ enum anv_bo_alloc_flags alloc_flags,
+ struct anv_bo **bo);
+VkResult anv_device_export_bo(struct anv_device *device,
+ struct anv_bo *bo, int *fd_out);
+void anv_device_release_bo(struct anv_device *device,
+ struct anv_bo *bo);
+
+static inline struct anv_bo *
+anv_device_lookup_bo(struct anv_device *device, uint32_t gem_handle)
+{
+ return util_sparse_array_get(&device->bo_cache.bo_map, gem_handle);
+}
+
VkResult anv_device_bo_busy(struct anv_device *device, struct anv_bo *bo);
VkResult anv_device_wait(struct anv_device *device, struct anv_bo *bo,
int64_t timeout);
+VkResult anv_queue_init(struct anv_device *device, struct anv_queue *queue);
+void anv_queue_finish(struct anv_queue *queue);
+
+VkResult anv_queue_execbuf_locked(struct anv_queue *queue, struct anv_queue_submit *submit);
+VkResult anv_queue_submit_simple_batch(struct anv_queue *queue,
+ struct anv_batch *batch);
+
+uint64_t anv_gettime_ns(void);
+uint64_t anv_get_absolute_timeout(uint64_t timeout);
+
void* anv_gem_mmap(struct anv_device *device,
uint32_t gem_handle, uint64_t offset, uint64_t size, uint32_t flags);
void anv_gem_munmap(void *p, uint64_t size);
bool anv_vma_alloc(struct anv_device *device, struct anv_bo *bo);
void anv_vma_free(struct anv_device *device, struct anv_bo *bo);
-VkResult anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size);
-
struct anv_reloc_list {
uint32_t num_relocs;
uint32_t array_length;
struct drm_i915_gem_relocation_entry * relocs;
struct anv_bo ** reloc_bos;
- struct set * deps;
+ uint32_t dep_words;
+ BITSET_WORD * deps;
};
VkResult anv_reloc_list_init(struct anv_reloc_list *list,
VkResult anv_reloc_list_add(struct anv_reloc_list *list,
const VkAllocationCallbacks *alloc,
uint32_t offset, struct anv_bo *target_bo,
- uint32_t delta);
+ uint32_t delta, uint64_t *address_u64_out);
struct anv_batch_bo {
/* Link in the anv_cmd_buffer.owned_batch_bos list */
struct list_head link;
- struct anv_bo bo;
+ struct anv_bo * bo;
/* Bytes actually consumed in this batch BO */
uint32_t length;
void anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other);
uint64_t anv_batch_emit_reloc(struct anv_batch *batch,
void *location, struct anv_bo *bo, uint32_t offset);
-VkResult anv_device_submit_simple_batch(struct anv_device *device,
- struct anv_batch *batch);
static inline VkResult
anv_batch_set_error(struct anv_batch *batch, VkResult error)
_dst = NULL; \
}))
-/* MEMORY_OBJECT_CONTROL_STATE:
- * .GraphicsDataTypeGFDT = 0,
- * .LLCCacheabilityControlLLCCC = 0,
- * .L3CacheabilityControlL3CC = 1,
- */
-#define GEN7_MOCS 1
-
-/* MEMORY_OBJECT_CONTROL_STATE:
- * .LLCeLLCCacheabilityControlLLCCC = 0,
- * .L3CacheabilityControlL3CC = 1,
- */
-#define GEN75_MOCS 1
-
-/* MEMORY_OBJECT_CONTROL_STATE:
- * .MemoryTypeLLCeLLCCacheabilityControl = WB,
- * .TargetCache = L3DefertoPATforLLCeLLCselection,
- * .AgeforQUADLRU = 0
- */
-#define GEN8_MOCS 0x78
-
-/* MEMORY_OBJECT_CONTROL_STATE:
- * .MemoryTypeLLCeLLCCacheabilityControl = UCwithFenceifcoherentcycle,
- * .TargetCache = L3DefertoPATforLLCeLLCselection,
- * .AgeforQUADLRU = 0
- */
-#define GEN8_EXTERNAL_MOCS 0x18
-
-/* Skylake: MOCS is now an index into an array of 62 different caching
- * configurations programmed by the kernel.
- */
-
-/* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */
-#define GEN9_MOCS (2 << 1)
-
-/* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */
-#define GEN9_EXTERNAL_MOCS (1 << 1)
-
-/* Cannonlake MOCS defines are duplicates of Skylake MOCS defines. */
-#define GEN10_MOCS GEN9_MOCS
-#define GEN10_EXTERNAL_MOCS GEN9_EXTERNAL_MOCS
-
-/* Ice Lake MOCS defines are duplicates of Skylake MOCS defines. */
-#define GEN11_MOCS GEN9_MOCS
-#define GEN11_EXTERNAL_MOCS GEN9_EXTERNAL_MOCS
-
-/* TigerLake MOCS */
-#define GEN12_MOCS GEN9_MOCS
-/* TC=1/LLC Only, LeCC=1/Uncacheable, LRUM=0, L3CC=1/Uncacheable */
-#define GEN12_EXTERNAL_MOCS (3 << 1)
-
struct anv_device_memory {
struct list_head link;
/* Number of dynamic offsets used by this descriptor set */
uint16_t dynamic_offset_count;
+ /* For each shader stage, which offsets apply to that stage */
+ uint16_t stage_dynamic_offsets[MESA_SHADER_STAGES];
+
/* Size of the descriptor buffer for this descriptor set */
uint32_t descriptor_buffer_size;
uint32_t next;
uint32_t free_list;
- struct anv_bo bo;
+ struct anv_bo *bo;
struct util_vma_heap bo_heap;
struct anv_state_stream surface_state_stream;
struct anv_descriptor_pool *pool,
struct anv_descriptor_set *set);
+#define ANV_DESCRIPTOR_SET_NULL (UINT8_MAX - 5)
+#define ANV_DESCRIPTOR_SET_PUSH_CONSTANTS (UINT8_MAX - 4)
#define ANV_DESCRIPTOR_SET_DESCRIPTORS (UINT8_MAX - 3)
#define ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS (UINT8_MAX - 2)
#define ANV_DESCRIPTOR_SET_SHADER_CONSTANTS (UINT8_MAX - 1)
#define ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS UINT8_MAX
struct anv_pipeline_binding {
- /* The descriptor set this surface corresponds to. The special value of
- * ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS indicates that the offset refers
- * to a color attachment and not a regular descriptor.
+ /** Index in the descriptor set
+ *
+ * This is a flattened index; the descriptor set layout is already taken
+ * into account.
+ */
+ uint32_t index;
+
+ /** The descriptor set this surface corresponds to.
+ *
+ * The special ANV_DESCRIPTOR_SET_* values above indicates that this
+ * binding is not a normal descriptor set but something else.
*/
uint8_t set;
- /* Binding in the descriptor set */
- uint32_t binding;
+ union {
+ /** Plane in the binding index for images */
+ uint8_t plane;
+
+ /** Input attachment index (relative to the subpass) */
+ uint8_t input_attachment_index;
+
+ /** Dynamic offset index (for dynamic UBOs and SSBOs) */
+ uint8_t dynamic_offset_index;
+ };
- /* Index in the binding */
+ /** For a storage image, whether it is write-only */
+ uint8_t write_only;
+
+ /** Pad to 64 bits so that there are no holes and we can safely memcmp
+ * assuming POD zero-initialization.
+ */
+ uint8_t pad;
+};
+
+struct anv_push_range {
+ /** Index in the descriptor set */
uint32_t index;
- /* Plane in the binding index */
- uint8_t plane;
+ /** Descriptor set index */
+ uint8_t set;
+
+ /** Dynamic offset index (for dynamic UBOs) */
+ uint8_t dynamic_offset_index;
- /* Input attachment index (relative to the subpass) */
- uint8_t input_attachment_index;
+ /** Start offset in units of 32B */
+ uint8_t start;
- /* For a storage image, whether it is write-only */
- bool write_only;
+ /** Range in units of 32B */
+ uint8_t length;
};
struct anv_pipeline_layout {
ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT = (1 << 3),
ANV_PIPE_VF_CACHE_INVALIDATE_BIT = (1 << 4),
ANV_PIPE_DATA_CACHE_FLUSH_BIT = (1 << 5),
+ ANV_PIPE_TILE_CACHE_FLUSH_BIT = (1 << 6),
ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT = (1 << 10),
ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT = (1 << 11),
ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT = (1 << 12),
#define ANV_PIPE_FLUSH_BITS ( \
ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | \
ANV_PIPE_DATA_CACHE_FLUSH_BIT | \
- ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT)
+ ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | \
+ ANV_PIPE_TILE_CACHE_FLUSH_BIT)
#define ANV_PIPE_STALL_BITS ( \
ANV_PIPE_STALL_AT_SCOREBOARD_BIT | \
VkDeviceSize size;
};
-#define ANV_PARAM_PUSH(offset) ((1 << 16) | (uint32_t)(offset))
-#define ANV_PARAM_IS_PUSH(param) ((uint32_t)(param) >> 16 == 1)
-#define ANV_PARAM_PUSH_OFFSET(param) ((param) & 0xffff)
-
-#define ANV_PARAM_DYN_OFFSET(offset) ((2 << 16) | (uint32_t)(offset))
-#define ANV_PARAM_IS_DYN_OFFSET(param) ((uint32_t)(param) >> 16 == 2)
-#define ANV_PARAM_DYN_OFFSET_IDX(param) ((param) & 0xffff)
-
struct anv_push_constants {
- /* Push constant data provided by the client through vkPushConstants */
+ /** Push constant data provided by the client through vkPushConstants */
uint8_t client_data[MAX_PUSH_CONSTANTS_SIZE];
- /* Used for vkCmdDispatchBase */
- uint32_t base_work_group_id[3];
+ /** Dynamic offsets for dynamic UBOs and SSBOs */
+ uint32_t dynamic_offsets[MAX_DYNAMIC_BUFFERS];
+
+ struct {
+ /** Base workgroup ID
+ *
+ * Used for vkCmdDispatchBase.
+ */
+ uint32_t base_work_group_id[3];
+
+ /** Subgroup ID
+ *
+ * This is never set by software but is implicitly filled out when
+ * uploading the push constants for compute shaders.
+ */
+ uint32_t subgroup_id;
+
+ /** Pad out to a multiple of 32 bytes */
+ uint32_t pad[4];
+ } cs;
};
struct anv_dynamic_state {
struct anv_surface_state input;
VkImageLayout current_layout;
+ VkImageLayout current_stencil_layout;
VkImageAspectFlags pending_clear_aspects;
VkImageAspectFlags pending_load_aspects;
bool fast_clear;
*/
struct anv_cmd_pipeline_state {
struct anv_pipeline *pipeline;
- struct anv_pipeline_layout *layout;
struct anv_descriptor_set *descriptors[MAX_SETS];
- uint32_t dynamic_offsets[MAX_DYNAMIC_BUFFERS];
-
struct anv_push_descriptor_set *push_descriptors[MAX_SETS];
};
/* PIPELINE_SELECT.PipelineSelection */
uint32_t current_pipeline;
const struct gen_l3_config * current_l3_config;
+ uint32_t last_aux_map_state;
struct anv_cmd_graphics_state gfx;
struct anv_cmd_compute_state compute;
struct anv_state binding_tables[MESA_SHADER_STAGES];
struct anv_state samplers[MESA_SHADER_STAGES];
+ unsigned char sampler_sha1s[MESA_SHADER_STAGES][20];
+ unsigned char surface_sha1s[MESA_SHADER_STAGES][20];
+ unsigned char push_sha1s[MESA_SHADER_STAGES][20];
+
/**
* Whether or not the gen8 PMA fix is enabled. We ensure that, at the top
* of any command buffer it is disabled by disabling it in EndCommandBuffer
* initialized by anv_cmd_buffer_init_batch_bo_chain()
*/
struct u_vector bt_block_states;
- uint32_t bt_next;
+ struct anv_state bt_next;
struct anv_reloc_list surface_relocs;
/** Last seen surface state block pool center bo offset */
VkCommandBufferLevel level;
struct anv_cmd_state state;
+
+ /* Set by SetPerformanceMarkerINTEL, written into queries by CmdBeginQuery */
+ uint64_t intel_perf_marker;
};
VkResult anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer);
void anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary,
struct anv_cmd_buffer *secondary);
void anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer);
-VkResult anv_cmd_buffer_execbuf(struct anv_device *device,
+VkResult anv_cmd_buffer_execbuf(struct anv_queue *queue,
struct anv_cmd_buffer *cmd_buffer,
const VkSemaphore *in_semaphores,
+ const uint64_t *in_wait_values,
uint32_t num_in_semaphores,
const VkSemaphore *out_semaphores,
+ const uint64_t *out_signal_values,
uint32_t num_out_semaphores,
VkFence fence);
* will say it's idle in this case.
*/
struct {
- struct anv_bo bo;
+ struct anv_bo *bo;
enum anv_bo_fence_state state;
} bo;
ANV_SEMAPHORE_TYPE_BO,
ANV_SEMAPHORE_TYPE_SYNC_FILE,
ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ,
+ ANV_SEMAPHORE_TYPE_TIMELINE,
+};
+
+struct anv_timeline_point {
+ struct list_head link;
+
+ uint64_t serial;
+
+ /* Number of waiter on this point, when > 0 the point should not be garbage
+ * collected.
+ */
+ int waiting;
+
+ /* BO used for synchronization. */
+ struct anv_bo *bo;
+};
+
+struct anv_timeline {
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+
+ uint64_t highest_past;
+ uint64_t highest_pending;
+
+ struct list_head points;
+ struct list_head free_points;
};
struct anv_semaphore_impl {
* import so we don't need to bother with a userspace cache.
*/
uint32_t syncobj;
+
+ /* Non shareable timeline semaphore
+ *
+ * Used when kernel don't have support for timeline semaphores.
+ */
+ struct anv_timeline timeline;
};
};
struct anv_semaphore {
+ uint32_t refcount;
+
/* Permanent semaphore state. Every semaphore has some form of permanent
* state (type != ANV_SEMAPHORE_TYPE_NONE). This may be a BO to fence on
* (for cross-process semaphores0 or it could just be a dummy for use
__tmp &= ~(1 << (stage)))
struct anv_pipeline_bind_map {
+ unsigned char surface_sha1[20];
+ unsigned char sampler_sha1[20];
+ unsigned char push_sha1[20];
+
uint32_t surface_count;
uint32_t sampler_count;
struct anv_pipeline_binding * surface_to_descriptor;
struct anv_pipeline_binding * sampler_to_descriptor;
+
+ struct anv_push_range push_ranges[4];
};
struct anv_shader_bin_key {
struct brw_compile_stats stats;
+ char *nir;
char *disasm;
};
bool depth_clip_enable;
bool sample_shading_enable;
bool kill_pixel;
+ bool depth_bounds_test_enable;
struct {
uint32_t sf[7];
*/
struct anv_address address;
+ /**
+ * Address of the main surface used to fill the aux map table. This is
+ * used at destruction of the image since the Vulkan spec does not
+ * guarantee that the address.bo field we still be valid at destruction.
+ */
+ uint64_t aux_map_surface_address;
+
/**
* When destroying the image, also free the bo.
* */
VkImageAspectFlagBits aspect)
{
uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect);
+
+ /* The Gen12 CCS aux surface is represented with only one level. */
+ const uint8_t aux_logical_levels =
+ image->planes[plane].aux_surface.isl.tiling == ISL_TILING_GEN12_CCS ?
+ image->planes[plane].surface.isl.levels :
+ image->planes[plane].aux_surface.isl.levels;
+
return image->planes[plane].aux_surface.isl.size_B > 0 ?
- image->planes[plane].aux_surface.isl.levels : 0;
+ aux_logical_levels : 0;
}
/* Returns the number of auxiliary buffer layers attached to an image. */
return 0;
} else {
uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect);
- return MAX2(image->planes[plane].aux_surface.isl.logical_level0_px.array_len,
- image->planes[plane].aux_surface.isl.logical_level0_px.depth >> miplevel);
+
+ /* The Gen12 CCS aux surface is represented with only one layer. */
+ const struct isl_extent4d *aux_logical_level0_px =
+ image->planes[plane].aux_surface.isl.tiling == ISL_TILING_GEN12_CCS ?
+ &image->planes[plane].surface.isl.logical_level0_px :
+ &image->planes[plane].aux_surface.isl.logical_level0_px;
+
+ return MAX2(aux_logical_level0_px->array_len,
+ aux_logical_level0_px->depth >> miplevel);
}
}
}
addr.offset += array_layer * 4;
+ assert(addr.offset <
+ image->planes[plane].address.offset + image->planes[plane].size);
return addr;
}
return image->samples == 1;
}
+static inline bool
+anv_image_plane_uses_aux_map(const struct anv_device *device,
+ const struct anv_image *image,
+ uint32_t plane)
+{
+ return device->info.has_aux_map &&
+ isl_aux_usage_has_ccs(image->planes[plane].aux_usage);
+}
+
void
anv_cmd_buffer_mark_image_written(struct anv_cmd_buffer *cmd_buffer,
const struct anv_image *image,
VkImageUsageFlagBits usage;
uint32_t attachment;
VkImageLayout layout;
+
+ /* Used only with attachment containing stencil data. */
+ VkImageLayout stencil_layout;
};
struct anv_subpass {
VkImageLayout final_layout;
VkImageLayout first_subpass_layout;
+ VkImageLayout stencil_initial_layout;
+ VkImageLayout stencil_final_layout;
+
/* The subpass id in which the attachment will be used last. */
uint32_t last_subpass_idx;
};
uint32_t stride;
/** Number of slots in this query pool */
uint32_t slots;
- struct anv_bo bo;
+ struct anv_bo * bo;
};
int anv_get_instance_entrypoint_index(const char *name);
int anv_get_device_entrypoint_index(const char *name);
+int anv_get_physical_device_entrypoint_index(const char *name);
+
+const char *anv_get_instance_entry_name(int index);
+const char *anv_get_physical_device_entry_name(int index);
+const char *anv_get_device_entry_name(int index);
bool
anv_instance_entrypoint_is_enabled(int index, uint32_t core_version,
const struct anv_instance_extension_table *instance);
-
+bool
+anv_physical_device_entrypoint_is_enabled(int index, uint32_t core_version,
+ const struct anv_instance_extension_table *instance);
bool
anv_device_entrypoint_is_enabled(int index, uint32_t core_version,
const struct anv_instance_extension_table *instance,
return subpass_id;
}
+struct gen_perf_config *anv_get_perf(const struct gen_device_info *devinfo, int fd);
+void anv_device_perf_init(struct anv_device *device);
+
#define ANV_DEFINE_HANDLE_CASTS(__anv_type, __VkType) \
\
static inline struct __anv_type * \