#include <pthread.h>
#include <assert.h>
#include <stdint.h>
-#include <i915_drm.h>
+#include "drm-uapi/i915_drm.h"
#ifdef HAVE_VALGRIND
#include <valgrind.h>
#endif
#include "common/gen_clflush.h"
+#include "common/gen_decoder.h"
#include "common/gen_gem.h"
#include "dev/gen_device_info.h"
#include "blorp/blorp.h"
#include "util/u_vector.h"
#include "util/u_math.h"
#include "util/vma.h"
+#include "util/xmlconfig.h"
#include "vk_alloc.h"
#include "vk_debug_report.h"
#include "anv_extensions.h"
#include "isl/isl.h"
-#include "common/gen_debug.h"
+#include "dev/gen_debug.h"
#include "common/intel_log.h"
#include "wsi_common.h"
#define INSTRUCTION_STATE_POOL_MIN_ADDRESS 0x000180000000ULL /* 6 GiB */
#define INSTRUCTION_STATE_POOL_MAX_ADDRESS 0x0001bfffffffULL
#define HIGH_HEAP_MIN_ADDRESS 0x0001c0000000ULL /* 7 GiB */
-#define HIGH_HEAP_MAX_ADDRESS 0xfffeffffffffULL
#define LOW_HEAP_SIZE \
(LOW_HEAP_MAX_ADDRESS - LOW_HEAP_MIN_ADDRESS + 1)
-#define HIGH_HEAP_SIZE \
- (HIGH_HEAP_MAX_ADDRESS - HIGH_HEAP_MIN_ADDRESS + 1)
#define DYNAMIC_STATE_POOL_SIZE \
(DYNAMIC_STATE_POOL_MAX_ADDRESS - DYNAMIC_STATE_POOL_MIN_ADDRESS + 1)
#define BINDING_TABLE_POOL_SIZE \
#define ANV_HZ_FC_VAL 1.0f
#define MAX_VBS 28
+#define MAX_XFB_BUFFERS 4
+#define MAX_XFB_STREAMS 4
#define MAX_SETS 8
#define MAX_RTS 8
#define MAX_VIEWPORTS 16
#define MAX_SCISSORS 16
#define MAX_PUSH_CONSTANTS_SIZE 128
#define MAX_DYNAMIC_BUFFERS 16
-#define MAX_IMAGES 8
+#define MAX_IMAGES 64
#define MAX_PUSH_DESCRIPTORS 32 /* Minimum requirement */
+#define MAX_INLINE_UNIFORM_BLOCK_SIZE 4096
+#define MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS 32
+
+/* From the Skylake PRM Vol. 7 "Binding Table Surface State Model":
+ *
+ * "The surface state model is used when a Binding Table Index (specified
+ * in the message descriptor) of less than 240 is specified. In this model,
+ * the Binding Table Index is used to index into the binding table, and the
+ * binding table entry contains a pointer to the SURFACE_STATE."
+ *
+ * Binding table values above 240 are used for various things in the hardware
+ * such as stateless, stateless with incoherent cache, SLM, and bindless.
+ */
+#define MAX_BINDING_TABLE_SIZE 240
/* The kernel relocation API has a limitation of a 32-bit delta value
* applied to the address before it is written which, in spite of it being
#define ANV_SVGS_VB_INDEX MAX_VBS
#define ANV_DRAWID_VB_INDEX (MAX_VBS + 1)
+/* We reserve this MI ALU register for the purpose of handling predication.
+ * Other code which uses the MI ALU should leave it alone.
+ */
+#define ANV_PREDICATE_RESULT_REG 0x2678 /* MI_ALU_REG15 */
+
#define anv_printflike(a, b) __attribute__((__format__(__printf__, a, b)))
static inline uint32_t
*/
union anv_free_list {
struct {
- int32_t offset;
+ uint32_t offset;
/* A simple count that is incremented every time the head changes. */
uint32_t count;
};
- uint64_t u64;
+ /* Make sure it's aligned to 64 bits. This will make atomic operations
+ * faster on 32 bit platforms.
+ */
+ uint64_t u64 __attribute__ ((aligned (8)));
};
-#define ANV_FREE_LIST_EMPTY ((union anv_free_list) { { 1, 0 } })
+#define ANV_FREE_LIST_EMPTY ((union anv_free_list) { { UINT32_MAX, 0 } })
struct anv_block_state {
union {
uint32_t next;
uint32_t end;
};
- uint64_t u64;
+ /* Make sure it's aligned to 64 bits. This will make atomic operations
+ * faster on 32 bit platforms.
+ */
+ uint64_t u64 __attribute__ ((aligned (8)));
};
};
+#define anv_block_pool_foreach_bo(bo, pool) \
+ for (bo = (pool)->bos; bo != &(pool)->bos[(pool)->nbos]; bo++)
+
+#define ANV_MAX_BLOCK_POOL_BOS 20
+
struct anv_block_pool {
struct anv_device *device;
uint64_t bo_flags;
- struct anv_bo bo;
+ struct anv_bo bos[ANV_MAX_BLOCK_POOL_BOS];
+ struct anv_bo *bo;
+ uint32_t nbos;
+
+ uint64_t size;
/* The address where the start of the pool is pinned. The various bos that
* are created as the pool grows will have addresses in the range
* will be valid relative to this pointer.
*
* In particular, map == bo.map + center_offset
+ *
+ * DO NOT access this pointer directly. Use anv_block_pool_map() instead,
+ * since it will handle the softpin case as well, where this points to NULL.
*/
void *map;
int fd;
int32_t offset;
uint32_t alloc_size;
void *map;
+ uint32_t idx;
};
#define ANV_STATE_NULL ((struct anv_state) { .alloc_size = 0 })
};
#define ANV_MIN_STATE_SIZE_LOG2 6
-#define ANV_MAX_STATE_SIZE_LOG2 20
+#define ANV_MAX_STATE_SIZE_LOG2 21
#define ANV_STATE_BUCKETS (ANV_MAX_STATE_SIZE_LOG2 - ANV_MIN_STATE_SIZE_LOG2 + 1)
+struct anv_free_entry {
+ uint32_t next;
+ struct anv_state state;
+};
+
+struct anv_state_table {
+ struct anv_device *device;
+ int fd;
+ struct anv_free_entry *map;
+ uint32_t size;
+ struct anv_block_state state;
+ struct u_vector cleanups;
+};
+
struct anv_state_pool {
struct anv_block_pool block_pool;
+ struct anv_state_table table;
+
/* The size of blocks which will be allocated from the block pool */
uint32_t block_size;
uint64_t bo_flags);
void anv_block_pool_finish(struct anv_block_pool *pool);
int32_t anv_block_pool_alloc(struct anv_block_pool *pool,
- uint32_t block_size);
+ uint32_t block_size, uint32_t *padding);
int32_t anv_block_pool_alloc_back(struct anv_block_pool *pool,
uint32_t block_size);
+void* anv_block_pool_map(struct anv_block_pool *pool, int32_t offset);
VkResult anv_state_pool_init(struct anv_state_pool *pool,
struct anv_device *device,
struct anv_state anv_state_stream_alloc(struct anv_state_stream *stream,
uint32_t size, uint32_t alignment);
+VkResult anv_state_table_init(struct anv_state_table *table,
+ struct anv_device *device,
+ uint32_t initial_entries);
+void anv_state_table_finish(struct anv_state_table *table);
+VkResult anv_state_table_add(struct anv_state_table *table, uint32_t *idx,
+ uint32_t count);
+void anv_free_list_push(union anv_free_list *list,
+ struct anv_state_table *table,
+ uint32_t idx, uint32_t count);
+struct anv_state* anv_free_list_pop(union anv_free_list *list,
+ struct anv_state_table *table);
+
+
+static inline struct anv_state *
+anv_state_table_get(struct anv_state_table *table, uint32_t idx)
+{
+ return &table->map[idx].state;
+}
/**
* Implements a pool of re-usable BOs. The interface is identical to that
* of block_pool except that each block is its own BO.
struct anv_bo_cache *cache,
uint64_t size, uint64_t bo_flags,
struct anv_bo **bo);
+VkResult anv_bo_cache_import_host_ptr(struct anv_device *device,
+ struct anv_bo_cache *cache,
+ void *host_ptr, uint32_t size,
+ uint64_t bo_flags, struct anv_bo **bo_out);
VkResult anv_bo_cache_import(struct anv_device *device,
struct anv_bo_cache *cache,
int fd, uint64_t bo_flags,
VkMemoryHeapFlags flags;
/* Driver-internal book-keeping */
+ uint64_t vma_start;
+ uint64_t vma_size;
bool supports_48bit_addresses;
+ VkDeviceSize used;
};
struct anv_physical_device {
bool has_context_priority;
bool use_softpin;
bool has_context_isolation;
+ bool has_mem_available;
+ bool always_use_bindless;
+
+ /** True if we can access buffers using A64 messages */
+ bool has_a64_buffer_access;
+ /** True if we can use bindless access for images */
+ bool has_bindless_images;
+ /** True if we can use bindless access for samplers */
+ bool has_bindless_samplers;
struct anv_device_extension_table supported_extensions;
+ struct anv_physical_device_dispatch_table dispatch;
uint32_t eu_total;
uint32_t subslice_total;
bool pipeline_cache_enabled;
struct vk_debug_report_instance debug_report_callbacks;
+
+ struct driOptionCache dri_options;
+ struct driOptionCache available_dri_options;
};
VkResult anv_init_wsi(struct anv_physical_device *physical_device);
struct anv_device * device;
pthread_mutex_t mutex;
+ struct hash_table * nir_cache;
+
struct hash_table * cache;
};
+struct nir_xfb_info;
struct anv_pipeline_bind_map;
void anv_pipeline_cache_init(struct anv_pipeline_cache *cache,
uint32_t constant_data_size,
const struct brw_stage_prog_data *prog_data,
uint32_t prog_data_size,
+ const struct brw_compile_stats *stats,
+ uint32_t num_stats,
+ const struct nir_xfb_info *xfb_info,
const struct anv_pipeline_bind_map *bind_map);
struct anv_shader_bin *
anv_device_search_for_kernel(struct anv_device *device,
struct anv_pipeline_cache *cache,
- const void *key_data, uint32_t key_size);
+ const void *key_data, uint32_t key_size,
+ bool *user_cache_bit);
struct anv_shader_bin *
anv_device_upload_kernel(struct anv_device *device,
uint32_t constant_data_size,
const struct brw_stage_prog_data *prog_data,
uint32_t prog_data_size,
+ const struct brw_compile_stats *stats,
+ uint32_t num_stats,
+ const struct nir_xfb_info *xfb_info,
const struct anv_pipeline_bind_map *bind_map);
+struct nir_shader;
+struct nir_shader_compiler_options;
+
+struct nir_shader *
+anv_device_search_for_nir(struct anv_device *device,
+ struct anv_pipeline_cache *cache,
+ const struct nir_shader_compiler_options *nir_options,
+ unsigned char sha1_key[20],
+ void *mem_ctx);
+
+void
+anv_device_upload_nir(struct anv_device *device,
+ struct anv_pipeline_cache *cache,
+ const struct nir_shader *nir,
+ unsigned char sha1_key[20]);
+
struct anv_device {
VK_LOADER_DATA _loader_data;
uint64_t vma_lo_available;
uint64_t vma_hi_available;
+ /** List of all anv_device_memory objects */
+ struct list_head memory_objects;
+
struct anv_bo_pool batch_bo_pool;
struct anv_bo_cache bo_cache;
struct anv_state border_colors;
+ struct anv_state slice_hash;
+
struct anv_queue queue;
struct anv_scratch_pool scratch_pool;
pthread_mutex_t mutex;
pthread_cond_t queue_submit;
bool _lost;
+
+ struct gen_batch_decode_ctx decoder_ctx;
+ /*
+ * When decoding a anv_cmd_buffer, we might need to search for BOs through
+ * the cmd_buffer's list.
+ */
+ struct anv_cmd_buffer *cmd_buffer_being_decoded;
};
static inline struct anv_state_pool *
return device->default_mocs;
}
-static void inline
-anv_state_flush(struct anv_device *device, struct anv_state state)
-{
- if (device->info.has_llc)
- return;
-
- gen_flush_range(state.map, state.alloc_size);
-}
-
void anv_device_init_blorp(struct anv_device *device);
void anv_device_finish_blorp(struct anv_device *device);
*/
/* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */
-#define GEN9_MOCS 2
+#define GEN9_MOCS (2 << 1)
/* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */
-#define GEN9_EXTERNAL_MOCS 1
+#define GEN9_EXTERNAL_MOCS (1 << 1)
/* Cannonlake MOCS defines are duplicates of Skylake MOCS defines. */
#define GEN10_MOCS GEN9_MOCS
#define GEN11_MOCS GEN9_MOCS
#define GEN11_EXTERNAL_MOCS GEN9_EXTERNAL_MOCS
+/* TigerLake MOCS */
+#define GEN12_MOCS GEN9_MOCS
+/* TC=1/LLC Only, LeCC=1/Uncacheable, LRUM=0, L3CC=1/Uncacheable */
+#define GEN12_EXTERNAL_MOCS (3 << 1)
+
struct anv_device_memory {
+ struct list_head link;
+
struct anv_bo * bo;
struct anv_memory_type * type;
VkDeviceSize map_size;
* which we must release when memory is freed.
*/
struct AHardwareBuffer * ahw;
+
+ /* If set, this memory comes from a host pointer. */
+ void * host_ptr;
};
/**
float PointWidth;
};
+/** Struct representing a sampled image descriptor
+ *
+ * This descriptor layout is used for sampled images, bare sampler, and
+ * combined image/sampler descriptors.
+ */
+struct anv_sampled_image_descriptor {
+ /** Bindless image handle
+ *
+ * This is expected to already be shifted such that the 20-bit
+ * SURFACE_STATE table index is in the top 20 bits.
+ */
+ uint32_t image;
+
+ /** Bindless sampler handle
+ *
+ * This is assumed to be a 32B-aligned SAMPLER_STATE pointer relative
+ * to the dynamic state base address.
+ */
+ uint32_t sampler;
+};
+
+struct anv_texture_swizzle_descriptor {
+ /** Texture swizzle
+ *
+ * See also nir_intrinsic_channel_select_intel
+ */
+ uint8_t swizzle[4];
+
+ /** Unused padding to ensure the struct is a multiple of 64 bits */
+ uint32_t _pad;
+};
+
+/** Struct representing a storage image descriptor */
+struct anv_storage_image_descriptor {
+ /** Bindless image handles
+ *
+ * These are expected to already be shifted such that the 20-bit
+ * SURFACE_STATE table index is in the top 20 bits.
+ */
+ uint32_t read_write;
+ uint32_t write_only;
+};
+
+/** Struct representing a address/range descriptor
+ *
+ * The fields of this struct correspond directly to the data layout of
+ * nir_address_format_64bit_bounded_global addresses. The last field is the
+ * offset in the NIR address so it must be zero so that when you load the
+ * descriptor you get a pointer to the start of the range.
+ */
+struct anv_address_range_descriptor {
+ uint64_t address;
+ uint32_t range;
+ uint32_t zero;
+};
+
+enum anv_descriptor_data {
+ /** The descriptor contains a BTI reference to a surface state */
+ ANV_DESCRIPTOR_SURFACE_STATE = (1 << 0),
+ /** The descriptor contains a BTI reference to a sampler state */
+ ANV_DESCRIPTOR_SAMPLER_STATE = (1 << 1),
+ /** The descriptor contains an actual buffer view */
+ ANV_DESCRIPTOR_BUFFER_VIEW = (1 << 2),
+ /** The descriptor contains auxiliary image layout data */
+ ANV_DESCRIPTOR_IMAGE_PARAM = (1 << 3),
+ /** The descriptor contains auxiliary image layout data */
+ ANV_DESCRIPTOR_INLINE_UNIFORM = (1 << 4),
+ /** anv_address_range_descriptor with a buffer address and range */
+ ANV_DESCRIPTOR_ADDRESS_RANGE = (1 << 5),
+ /** Bindless surface handle */
+ ANV_DESCRIPTOR_SAMPLED_IMAGE = (1 << 6),
+ /** Storage image handles */
+ ANV_DESCRIPTOR_STORAGE_IMAGE = (1 << 7),
+ /** Storage image handles */
+ ANV_DESCRIPTOR_TEXTURE_SWIZZLE = (1 << 8),
+};
+
struct anv_descriptor_set_binding_layout {
#ifndef NDEBUG
/* The type of the descriptors in this binding */
VkDescriptorType type;
#endif
- /* Number of array elements in this binding */
+ /* Flags provided when this binding was created */
+ VkDescriptorBindingFlagsEXT flags;
+
+ /* Bitfield representing the type of data this descriptor contains */
+ enum anv_descriptor_data data;
+
+ /* Maximum number of YCbCr texture/sampler planes */
+ uint8_t max_plane_count;
+
+ /* Number of array elements in this binding (or size in bytes for inline
+ * uniform data)
+ */
uint16_t array_size;
/* Index into the flattend descriptor set */
int16_t dynamic_offset_index;
/* Index into the descriptor set buffer views */
- int16_t buffer_index;
-
- struct {
- /* Index into the binding table for the associated surface */
- int16_t surface_index;
-
- /* Index into the sampler table for the associated sampler */
- int16_t sampler_index;
+ int16_t buffer_view_index;
- /* Index into the image table for the associated image */
- int16_t image_index;
- } stage[MESA_SHADER_STAGES];
+ /* Offset into the descriptor buffer where this descriptor lives */
+ uint32_t descriptor_offset;
/* Immutable samplers (or NULL if no immutable samplers) */
struct anv_sampler **immutable_samplers;
};
+unsigned anv_descriptor_size(const struct anv_descriptor_set_binding_layout *layout);
+
+unsigned anv_descriptor_type_size(const struct anv_physical_device *pdevice,
+ VkDescriptorType type);
+
+bool anv_descriptor_supports_bindless(const struct anv_physical_device *pdevice,
+ const struct anv_descriptor_set_binding_layout *binding,
+ bool sampler);
+
+bool anv_descriptor_requires_bindless(const struct anv_physical_device *pdevice,
+ const struct anv_descriptor_set_binding_layout *binding,
+ bool sampler);
+
struct anv_descriptor_set_layout {
/* Descriptor set layouts can be destroyed at almost any time */
uint32_t ref_cnt;
/* Shader stages affected by this descriptor set */
uint16_t shader_stages;
- /* Number of buffers in this descriptor set */
- uint16_t buffer_count;
+ /* Number of buffer views in this descriptor set */
+ uint16_t buffer_view_count;
/* Number of dynamic offsets used by this descriptor set */
uint16_t dynamic_offset_count;
+ /* Size of the descriptor buffer for this descriptor set */
+ uint32_t descriptor_buffer_size;
+
/* Bindings in this descriptor set */
struct anv_descriptor_set_binding_layout binding[0];
};
};
struct anv_descriptor_set {
+ struct anv_descriptor_pool *pool;
struct anv_descriptor_set_layout *layout;
uint32_t size;
- uint32_t buffer_count;
+
+ /* State relative to anv_descriptor_pool::bo */
+ struct anv_state desc_mem;
+ /* Surface state for the descriptor buffer */
+ struct anv_state desc_surface_state;
+
+ uint32_t buffer_view_count;
struct anv_buffer_view *buffer_views;
+
+ /* Link to descriptor pool's desc_sets list . */
+ struct list_head pool_link;
+
struct anv_descriptor descriptors[0];
};
/* Put this field right behind anv_descriptor_set so it fills up the
* descriptors[0] field. */
struct anv_descriptor descriptors[MAX_PUSH_DESCRIPTORS];
+
+ /** True if the descriptor set buffer has been referenced by a draw or
+ * dispatch command.
+ */
+ bool set_used_on_gpu;
+
struct anv_buffer_view buffer_views[MAX_PUSH_DESCRIPTORS];
};
uint32_t next;
uint32_t free_list;
+ struct anv_bo bo;
+ struct util_vma_heap bo_heap;
+
struct anv_state_stream surface_state_stream;
void *surface_state_free_list;
+ struct list_head desc_sets;
+
char data[0];
};
/* The descriptor set this template corresponds to. This value is only
* valid if the template was created with the templateType
- * VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR.
+ * VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET.
*/
uint8_t set;
anv_descriptor_set_layout_size(const struct anv_descriptor_set_layout *layout);
void
-anv_descriptor_set_write_image_view(struct anv_descriptor_set *set,
- const struct gen_device_info * const devinfo,
+anv_descriptor_set_write_image_view(struct anv_device *device,
+ struct anv_descriptor_set *set,
const VkDescriptorImageInfo * const info,
VkDescriptorType type,
uint32_t binding,
uint32_t element);
void
-anv_descriptor_set_write_buffer_view(struct anv_descriptor_set *set,
+anv_descriptor_set_write_buffer_view(struct anv_device *device,
+ struct anv_descriptor_set *set,
VkDescriptorType type,
struct anv_buffer_view *buffer_view,
uint32_t binding,
uint32_t element);
void
-anv_descriptor_set_write_buffer(struct anv_descriptor_set *set,
- struct anv_device *device,
+anv_descriptor_set_write_buffer(struct anv_device *device,
+ struct anv_descriptor_set *set,
struct anv_state_stream *alloc_stream,
VkDescriptorType type,
struct anv_buffer *buffer,
uint32_t element,
VkDeviceSize offset,
VkDeviceSize range);
+void
+anv_descriptor_set_write_inline_uniform_data(struct anv_device *device,
+ struct anv_descriptor_set *set,
+ uint32_t binding,
+ const void *data,
+ size_t offset,
+ size_t size);
void
-anv_descriptor_set_write_template(struct anv_descriptor_set *set,
- struct anv_device *device,
+anv_descriptor_set_write_template(struct anv_device *device,
+ struct anv_descriptor_set *set,
struct anv_state_stream *alloc_stream,
const struct anv_descriptor_update_template *template,
const void *data);
struct anv_descriptor_pool *pool,
struct anv_descriptor_set *set);
+#define ANV_DESCRIPTOR_SET_DESCRIPTORS (UINT8_MAX - 3)
+#define ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS (UINT8_MAX - 2)
#define ANV_DESCRIPTOR_SET_SHADER_CONSTANTS (UINT8_MAX - 1)
#define ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS UINT8_MAX
uint32_t num_sets;
- struct {
- bool has_dynamic_offsets;
- } stage[MESA_SHADER_STAGES];
-
unsigned char sha1[20];
};
ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK = 1 << 6, /* VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK */
ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK = 1 << 7, /* VK_DYNAMIC_STATE_STENCIL_WRITE_MASK */
ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE = 1 << 8, /* VK_DYNAMIC_STATE_STENCIL_REFERENCE */
- ANV_CMD_DIRTY_DYNAMIC_ALL = (1 << 9) - 1,
ANV_CMD_DIRTY_PIPELINE = 1 << 9,
ANV_CMD_DIRTY_INDEX_BUFFER = 1 << 10,
ANV_CMD_DIRTY_RENDER_TARGETS = 1 << 11,
+ ANV_CMD_DIRTY_XFB_ENABLE = 1 << 12,
+ ANV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE = 1 << 13, /* VK_DYNAMIC_STATE_LINE_STIPPLE_EXT */
};
typedef uint32_t anv_cmd_dirty_mask_t;
+#define ANV_CMD_DIRTY_DYNAMIC_ALL \
+ (ANV_CMD_DIRTY_DYNAMIC_VIEWPORT | \
+ ANV_CMD_DIRTY_DYNAMIC_SCISSOR | \
+ ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH | \
+ ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS | \
+ ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS | \
+ ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS | \
+ ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK | \
+ ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK | \
+ ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE | \
+ ANV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE)
+
+static inline enum anv_cmd_dirty_bits
+anv_cmd_dirty_bit_for_vk_dynamic_state(VkDynamicState vk_state)
+{
+ switch (vk_state) {
+ case VK_DYNAMIC_STATE_VIEWPORT:
+ return ANV_CMD_DIRTY_DYNAMIC_VIEWPORT;
+ case VK_DYNAMIC_STATE_SCISSOR:
+ return ANV_CMD_DIRTY_DYNAMIC_SCISSOR;
+ case VK_DYNAMIC_STATE_LINE_WIDTH:
+ return ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH;
+ case VK_DYNAMIC_STATE_DEPTH_BIAS:
+ return ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS;
+ case VK_DYNAMIC_STATE_BLEND_CONSTANTS:
+ return ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS;
+ case VK_DYNAMIC_STATE_DEPTH_BOUNDS:
+ return ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS;
+ case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK:
+ return ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK;
+ case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK:
+ return ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK;
+ case VK_DYNAMIC_STATE_STENCIL_REFERENCE:
+ return ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE;
+ case VK_DYNAMIC_STATE_LINE_STIPPLE_EXT:
+ return ANV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE;
+ default:
+ assert(!"Unsupported dynamic state");
+ return 0;
+ }
+}
+
+
enum anv_pipe_bits {
ANV_PIPE_DEPTH_CACHE_FLUSH_BIT = (1 << 0),
ANV_PIPE_STALL_AT_SCOREBOARD_BIT = (1 << 1),
ANV_PIPE_NEEDS_CS_STALL_BIT = (1 << 21),
/* This bit does not exist directly in PIPE_CONTROL. It means that render
- * target operations are ongoing. Some operations like copies on the
- * command streamer might need to be aware of this to trigger the
- * appropriate stall before they can proceed with the copy.
+ * target operations related to transfer commands with VkBuffer as
+ * destination are ongoing. Some operations like copies on the command
+ * streamer might need to be aware of this to trigger the appropriate stall
+ * before they can proceed with the copy.
*/
- ANV_PIPE_RENDER_TARGET_WRITES = (1 << 22),
+ ANV_PIPE_RENDER_TARGET_BUFFER_WRITES = (1 << 22),
};
#define ANV_PIPE_FLUSH_BITS ( \
for_each_bit(b, flags) {
switch ((VkAccessFlagBits)(1 << b)) {
case VK_ACCESS_SHADER_WRITE_BIT:
+ /* We're transitioning a buffer that was previously used as write
+ * destination through the data port. To make its content available
+ * to future operations, flush the data cache.
+ */
pipe_bits |= ANV_PIPE_DATA_CACHE_FLUSH_BIT;
break;
case VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT:
+ /* We're transitioning a buffer that was previously used as render
+ * target. To make its content available to future operations, flush
+ * the render target cache.
+ */
pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
break;
case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT:
+ /* We're transitioning a buffer that was previously used as depth
+ * buffer. To make its content available to future operations, flush
+ * the depth cache.
+ */
pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT;
break;
case VK_ACCESS_TRANSFER_WRITE_BIT:
+ /* We're transitioning a buffer that was previously used as a
+ * transfer write destination. Generic write operations include color
+ * & depth operations as well as buffer operations like :
+ * - vkCmdClearColorImage()
+ * - vkCmdClearDepthStencilImage()
+ * - vkCmdBlitImage()
+ * - vkCmdCopy*(), vkCmdUpdate*(), vkCmdFill*()
+ *
+ * Most of these operations are implemented using Blorp which writes
+ * through the render target, so flush that cache to make it visible
+ * to future operations. And for depth related operations we also
+ * need to flush the depth cache.
+ */
pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT;
break;
case VK_ACCESS_MEMORY_WRITE_BIT:
+ /* We're transitioning a buffer for generic write operations. Flush
+ * all the caches.
+ */
pipe_bits |= ANV_PIPE_FLUSH_BITS;
break;
default:
for_each_bit(b, flags) {
switch ((VkAccessFlagBits)(1 << b)) {
case VK_ACCESS_INDIRECT_COMMAND_READ_BIT:
+ /* Indirect draw commands take a buffer as input that we're going to
+ * read from the command streamer to load some of the HW registers
+ * (see genX_cmd_buffer.c:load_indirect_parameters). This requires a
+ * command streamer stall so that all the cache flushes have
+ * completed before the command streamer loads from memory.
+ */
+ pipe_bits |= ANV_PIPE_CS_STALL_BIT;
+ /* Indirect draw commands also set gl_BaseVertex & gl_BaseIndex
+ * through a vertex buffer, so invalidate that cache.
+ */
+ pipe_bits |= ANV_PIPE_VF_CACHE_INVALIDATE_BIT;
+ /* For CmdDipatchIndirect, we also load gl_NumWorkGroups through a
+ * UBO from the buffer, so we need to invalidate constant cache.
+ */
+ pipe_bits |= ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT;
+ break;
case VK_ACCESS_INDEX_READ_BIT:
case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT:
+ /* We transitioning a buffer to be used for as input for vkCmdDraw*
+ * commands, so we invalidate the VF cache to make sure there is no
+ * stale data when we start rendering.
+ */
pipe_bits |= ANV_PIPE_VF_CACHE_INVALIDATE_BIT;
break;
case VK_ACCESS_UNIFORM_READ_BIT:
+ /* We transitioning a buffer to be used as uniform data. Because
+ * uniform is accessed through the data port & sampler, we need to
+ * invalidate the texture cache (sampler) & constant cache (data
+ * port) to avoid stale data.
+ */
pipe_bits |= ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT;
pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
break;
case VK_ACCESS_SHADER_READ_BIT:
case VK_ACCESS_INPUT_ATTACHMENT_READ_BIT:
case VK_ACCESS_TRANSFER_READ_BIT:
+ /* Transitioning a buffer to be read through the sampler, so
+ * invalidate the texture cache, we don't want any stale data.
+ */
pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
break;
case VK_ACCESS_MEMORY_READ_BIT:
+ /* Transitioning a buffer for generic read, invalidate all the
+ * caches.
+ */
pipe_bits |= ANV_PIPE_INVALIDATE_BITS;
break;
case VK_ACCESS_MEMORY_WRITE_BIT:
+ /* Generic write, make sure all previously written things land in
+ * memory.
+ */
pipe_bits |= ANV_PIPE_FLUSH_BITS;
break;
+ case VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT:
+ /* Transitioning a buffer for conditional rendering. We'll load the
+ * content of this buffer into HW registers using the command
+ * streamer, so we need to stall the command streamer to make sure
+ * any in-flight flush operations have completed.
+ */
+ pipe_bits |= ANV_PIPE_CS_STALL_BIT;
+ break;
default:
break; /* Nothing to do */
}
VkDeviceSize offset;
};
+struct anv_xfb_binding {
+ struct anv_buffer * buffer;
+ VkDeviceSize offset;
+ VkDeviceSize size;
+};
+
#define ANV_PARAM_PUSH(offset) ((1 << 16) | (uint32_t)(offset))
+#define ANV_PARAM_IS_PUSH(param) ((uint32_t)(param) >> 16 == 1)
#define ANV_PARAM_PUSH_OFFSET(param) ((param) & 0xffff)
-struct anv_push_constants {
- /* Current allocated size of this push constants data structure.
- * Because a decent chunk of it may not be used (images on SKL, for
- * instance), we won't actually allocate the entire structure up-front.
- */
- uint32_t size;
+#define ANV_PARAM_DYN_OFFSET(offset) ((2 << 16) | (uint32_t)(offset))
+#define ANV_PARAM_IS_DYN_OFFSET(param) ((uint32_t)(param) >> 16 == 2)
+#define ANV_PARAM_DYN_OFFSET_IDX(param) ((param) & 0xffff)
+struct anv_push_constants {
/* Push constant data provided by the client through vkPushConstants */
uint8_t client_data[MAX_PUSH_CONSTANTS_SIZE];
/* Used for vkCmdDispatchBase */
uint32_t base_work_group_id[3];
-
- /* Image data for image_load_store on pre-SKL */
- struct brw_image_param images[MAX_IMAGES];
};
struct anv_dynamic_state {
uint32_t front;
uint32_t back;
} stencil_reference;
+
+ struct {
+ uint32_t factor;
+ uint16_t pattern;
+ } line_stipple;
};
extern const struct anv_dynamic_state default_dynamic_state;
-void anv_dynamic_state_copy(struct anv_dynamic_state *dest,
- const struct anv_dynamic_state *src,
- uint32_t copy_mask);
+uint32_t anv_dynamic_state_copy(struct anv_dynamic_state *dest,
+ const struct anv_dynamic_state *src,
+ uint32_t copy_mask);
struct anv_surface_state {
struct anv_state state;
* have not been cleared yet when multiview is active.
*/
uint32_t pending_clear_views;
+ struct anv_image_view * image_view;
};
/** State tracking for particular pipeline bind point
VkRect2D render_area;
uint32_t restart_index;
struct anv_vertex_binding vertex_bindings[MAX_VBS];
+ bool xfb_enabled;
+ struct anv_xfb_binding xfb_bindings[MAX_XFB_BUFFERS];
VkShaderStageFlags push_constant_stages;
- struct anv_push_constants * push_constants[MESA_SHADER_STAGES];
+ struct anv_push_constants push_constants[MESA_SHADER_STAGES];
struct anv_state binding_tables[MESA_SHADER_STAGES];
struct anv_state samplers[MESA_SHADER_STAGES];
*/
bool hiz_enabled;
+ bool conditional_render_enabled;
+
+ /**
+ * Last rendering scale argument provided to
+ * genX(cmd_buffer_emit_hashing_mode)().
+ */
+ unsigned current_hash_scale;
+
/**
* Array length is anv_cmd_state::pass::attachment_count. Array content is
* valid only when recording a render pass instance.
VkResult anv_cmd_buffer_reset(struct anv_cmd_buffer *cmd_buffer);
-VkResult
-anv_cmd_buffer_ensure_push_constants_size(struct anv_cmd_buffer *cmd_buffer,
- gl_shader_stage stage, uint32_t size);
-#define anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, stage, field) \
- anv_cmd_buffer_ensure_push_constants_size(cmd_buffer, stage, \
- (offsetof(struct anv_push_constants, field) + \
- sizeof(cmd_buffer->state.push_constants[0]->field)))
-
struct anv_state anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer,
const void *data, uint32_t size, uint32_t alignment);
struct anv_state anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer,
struct anv_state
anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer);
-void anv_cmd_buffer_resolve_subpass(struct anv_cmd_buffer *cmd_buffer);
-
const struct anv_image_view *
anv_cmd_buffer_get_depth_stencil_view(const struct anv_cmd_buffer *cmd_buffer);
void anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer);
+void anv_cmd_emit_conditional_render_predicate(struct anv_cmd_buffer *cmd_buffer);
+
enum anv_fence_type {
ANV_FENCE_TYPE_NONE = 0,
ANV_FENCE_TYPE_BO,
struct anv_pipeline_bind_map {
uint32_t surface_count;
uint32_t sampler_count;
- uint32_t image_count;
struct anv_pipeline_binding * surface_to_descriptor;
struct anv_pipeline_binding * sampler_to_descriptor;
const struct brw_stage_prog_data *prog_data;
uint32_t prog_data_size;
+ struct brw_compile_stats stats[3];
+ uint32_t num_stats;
+
+ struct nir_xfb_info *xfb_info;
+
struct anv_pipeline_bind_map bind_map;
};
const void *constant_data, uint32_t constant_data_size,
const struct brw_stage_prog_data *prog_data,
uint32_t prog_data_size, const void *prog_data_param,
+ const struct brw_compile_stats *stats, uint32_t num_stats,
+ const struct nir_xfb_info *xfb_info,
const struct anv_pipeline_bind_map *bind_map);
void
anv_shader_bin_destroy(device, shader);
}
+/* 5 possible simultaneous shader stages and FS may have up to 3 binaries */
+#define MAX_PIPELINE_EXECUTABLES 7
+
+struct anv_pipeline_executable {
+ gl_shader_stage stage;
+
+ struct brw_compile_stats stats;
+
+ char *nir;
+ char *disasm;
+};
+
struct anv_pipeline {
struct anv_device * device;
struct anv_batch batch;
uint32_t batch_data[512];
struct anv_reloc_list batch_relocs;
- uint32_t dynamic_state_mask;
+ anv_cmd_dirty_mask_t dynamic_state_mask;
struct anv_dynamic_state dynamic_state;
+ void * mem_ctx;
+
+ VkPipelineCreateFlags flags;
struct anv_subpass * subpass;
bool needs_data_cache;
struct anv_shader_bin * shaders[MESA_SHADER_STAGES];
+ uint32_t num_executables;
+ struct anv_pipeline_executable executables[MAX_PIPELINE_EXECUTABLES];
+
struct {
const struct gen_l3_config * l3_config;
uint32_t total_size;
uint32_t instance_divisor;
} vb[MAX_VBS];
+ uint8_t xfb_used;
+
bool primitive_restart;
uint32_t topology;
bool writes_stencil;
bool stencil_test_enable;
bool depth_clamp_enable;
+ bool depth_clip_enable;
bool sample_shading_enable;
bool kill_pixel;
};
struct anv_image {
- VkImageType type;
+ VkImageType type; /**< VkImageCreateInfo::imageType */
/* The original VkFormat provided by the client. This may not match any
* of the actual surface formats.
*/
uint32_t array_size;
uint32_t samples; /**< VkImageCreateInfo::samples */
uint32_t n_planes;
- VkImageUsageFlags usage; /**< Superset of VkImageCreateInfo::usage. */
+ VkImageUsageFlags usage; /**< VkImageCreateInfo::usage. */
+ VkImageUsageFlags stencil_usage;
VkImageCreateFlags create_flags; /* Flags used when creating image. */
VkImageTiling tiling; /** VkImageCreateInfo::tiling */
if (!(image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT))
return false;
- if (devinfo->gen < 8)
+ /* Allow this feature on BDW even though it is disabled in the BDW devinfo
+ * struct. There's documentation which suggests that this feature actually
+ * reduces performance on BDW, but it has only been observed to help so
+ * far. Sampling fast-cleared blocks on BDW must also be handled with care
+ * (see depth_stencil_attachment_compute_aux_usage() for more info).
+ */
+ if (devinfo->gen != 8 && !devinfo->has_sample_with_hiz)
return false;
return image->samples == 1;
VkRect2D area,
float depth_value, uint8_t stencil_value);
void
+anv_image_msaa_resolve(struct anv_cmd_buffer *cmd_buffer,
+ const struct anv_image *src_image,
+ enum isl_aux_usage src_aux_usage,
+ uint32_t src_level, uint32_t src_base_layer,
+ const struct anv_image *dst_image,
+ enum isl_aux_usage dst_aux_usage,
+ uint32_t dst_level, uint32_t dst_base_layer,
+ VkImageAspectFlagBits aspect,
+ uint32_t src_x, uint32_t src_y,
+ uint32_t dst_x, uint32_t dst_y,
+ uint32_t width, uint32_t height,
+ uint32_t layer_count,
+ enum blorp_filter filter);
+void
anv_image_hiz_op(struct anv_cmd_buffer *cmd_buffer,
const struct anv_image *image,
VkImageAspectFlagBits aspect, uint32_t level,
void
anv_image_copy_to_shadow(struct anv_cmd_buffer *cmd_buffer,
const struct anv_image *image,
+ VkImageAspectFlagBits aspect,
uint32_t base_level, uint32_t level_count,
uint32_t base_layer, uint32_t layer_count);
uint32_t state[3][4];
uint32_t n_planes;
struct anv_ycbcr_conversion *conversion;
+
+ /* Blob of sampler state data which is guaranteed to be 32-byte aligned
+ * and with a 32-byte stride for use as bindless samplers.
+ */
+ struct anv_state bindless_state;
};
struct anv_framebuffer {
struct anv_subpass_attachment * resolve_attachments;
struct anv_subpass_attachment * depth_stencil_attachment;
+ struct anv_subpass_attachment * ds_resolve_attachment;
+ VkResolveModeFlagBitsKHR depth_resolve_mode;
+ VkResolveModeFlagBitsKHR stencil_resolve_mode;
uint32_t view_mask;
/** Subpass has a depth/stencil self-dependency */
bool has_ds_self_dep;
- /** Subpass has at least one resolve attachment */
- bool has_resolve;
+ /** Subpass has at least one color resolve attachment */
+ bool has_color_resolve;
};
static inline unsigned
int anv_get_instance_entrypoint_index(const char *name);
int anv_get_device_entrypoint_index(const char *name);
+int anv_get_physical_device_entrypoint_index(const char *name);
+
+const char *anv_get_instance_entry_name(int index);
+const char *anv_get_physical_device_entry_name(int index);
+const char *anv_get_device_entry_name(int index);
bool
anv_instance_entrypoint_is_enabled(int index, uint32_t core_version,
const struct anv_instance_extension_table *instance);
-
+bool
+anv_physical_device_entrypoint_is_enabled(int index, uint32_t core_version,
+ const struct anv_instance_extension_table *instance);
bool
anv_device_entrypoint_is_enabled(int index, uint32_t core_version,
const struct anv_instance_extension_table *instance,
void anv_dump_start(struct anv_device *device, enum anv_dump_action actions);
void anv_dump_finish(void);
-void anv_dump_add_framebuffer(struct anv_cmd_buffer *cmd_buffer,
- struct anv_framebuffer *fb);
+void anv_dump_add_attachments(struct anv_cmd_buffer *cmd_buffer);
static inline uint32_t
anv_get_subpass_id(const struct anv_cmd_state * const cmd_state)
ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_pool, VkDescriptorPool)
ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set, VkDescriptorSet)
ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set_layout, VkDescriptorSetLayout)
-ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_update_template, VkDescriptorUpdateTemplateKHR)
+ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_update_template, VkDescriptorUpdateTemplate)
ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_device_memory, VkDeviceMemory)
ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_fence, VkFence)
ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_event, VkEvent)
# define genX(x) gen11_##x
# include "anv_genX.h"
# undef genX
+# define genX(x) gen12_##x
+# include "anv_genX.h"
+# undef genX
#endif
#endif /* ANV_PRIVATE_H */