#include "util/u_vector.h"
#include "util/u_math.h"
#include "util/vma.h"
+#include "util/xmlconfig.h"
#include "vk_alloc.h"
#include "vk_debug_report.h"
#define MAX_INLINE_UNIFORM_BLOCK_SIZE 4096
#define MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS 32
+/* From the Skylake PRM Vol. 7 "Binding Table Surface State Model":
+ *
+ * "The surface state model is used when a Binding Table Index (specified
+ * in the message descriptor) of less than 240 is specified. In this model,
+ * the Binding Table Index is used to index into the binding table, and the
+ * binding table entry contains a pointer to the SURFACE_STATE."
+ *
+ * Binding table values above 240 are used for various things in the hardware
+ * such as stateless, stateless with incoherent cache, SLM, and bindless.
+ */
+#define MAX_BINDING_TABLE_SIZE 240
+
/* The kernel relocation API has a limitation of a 32-bit delta value
* applied to the address before it is written which, in spite of it being
* unsigned, is treated as signed . Because of the way that this maps to
/* A simple count that is incremented every time the head changes. */
uint32_t count;
};
- uint64_t u64;
+ /* Make sure it's aligned to 64 bits. This will make atomic operations
+ * faster on 32 bit platforms.
+ */
+ uint64_t u64 __attribute__ ((aligned (8)));
};
#define ANV_FREE_LIST_EMPTY ((union anv_free_list) { { UINT32_MAX, 0 } })
uint32_t next;
uint32_t end;
};
- uint64_t u64;
+ /* Make sure it's aligned to 64 bits. This will make atomic operations
+ * faster on 32 bit platforms.
+ */
+ uint64_t u64 __attribute__ ((aligned (8)));
};
};
};
#define ANV_MIN_STATE_SIZE_LOG2 6
-#define ANV_MAX_STATE_SIZE_LOG2 20
+#define ANV_MAX_STATE_SIZE_LOG2 21
#define ANV_STATE_BUCKETS (ANV_MAX_STATE_SIZE_LOG2 - ANV_MIN_STATE_SIZE_LOG2 + 1)
struct anv_free_entry *map;
uint32_t size;
struct anv_block_state state;
- struct u_vector mmap_cleanups;
+ struct u_vector cleanups;
};
struct anv_state_pool {
uint64_t vma_start;
uint64_t vma_size;
bool supports_48bit_addresses;
+ VkDeviceSize used;
};
struct anv_physical_device {
bool has_context_priority;
bool use_softpin;
bool has_context_isolation;
+ bool has_mem_available;
+ bool always_use_bindless;
+
+ /** True if we can access buffers using A64 messages */
+ bool has_a64_buffer_access;
+ /** True if we can use bindless access for images */
+ bool has_bindless_images;
+ /** True if we can use bindless access for samplers */
+ bool has_bindless_samplers;
struct anv_device_extension_table supported_extensions;
+ struct anv_physical_device_dispatch_table dispatch;
uint32_t eu_total;
uint32_t subslice_total;
bool pipeline_cache_enabled;
struct vk_debug_report_instance debug_report_callbacks;
+
+ struct driOptionCache dri_options;
+ struct driOptionCache available_dri_options;
};
VkResult anv_init_wsi(struct anv_physical_device *physical_device);
uint32_t constant_data_size,
const struct brw_stage_prog_data *prog_data,
uint32_t prog_data_size,
+ const struct brw_compile_stats *stats,
+ uint32_t num_stats,
const struct nir_xfb_info *xfb_info,
const struct anv_pipeline_bind_map *bind_map);
uint32_t constant_data_size,
const struct brw_stage_prog_data *prog_data,
uint32_t prog_data_size,
+ const struct brw_compile_stats *stats,
+ uint32_t num_stats,
const struct nir_xfb_info *xfb_info,
const struct anv_pipeline_bind_map *bind_map);
struct anv_state border_colors;
+ struct anv_state slice_hash;
+
struct anv_queue queue;
struct anv_scratch_pool scratch_pool;
#define GEN11_MOCS GEN9_MOCS
#define GEN11_EXTERNAL_MOCS GEN9_EXTERNAL_MOCS
+/* TigerLake MOCS */
+#define GEN12_MOCS GEN9_MOCS
+/* TC=1/LLC Only, LeCC=1/Uncacheable, LRUM=0, L3CC=1/Uncacheable */
+#define GEN12_EXTERNAL_MOCS (3 << 1)
+
struct anv_device_memory {
struct list_head link;
float PointWidth;
};
+/** Struct representing a sampled image descriptor
+ *
+ * This descriptor layout is used for sampled images, bare sampler, and
+ * combined image/sampler descriptors.
+ */
+struct anv_sampled_image_descriptor {
+ /** Bindless image handle
+ *
+ * This is expected to already be shifted such that the 20-bit
+ * SURFACE_STATE table index is in the top 20 bits.
+ */
+ uint32_t image;
+
+ /** Bindless sampler handle
+ *
+ * This is assumed to be a 32B-aligned SAMPLER_STATE pointer relative
+ * to the dynamic state base address.
+ */
+ uint32_t sampler;
+};
+
+struct anv_texture_swizzle_descriptor {
+ /** Texture swizzle
+ *
+ * See also nir_intrinsic_channel_select_intel
+ */
+ uint8_t swizzle[4];
+
+ /** Unused padding to ensure the struct is a multiple of 64 bits */
+ uint32_t _pad;
+};
+
+/** Struct representing a storage image descriptor */
+struct anv_storage_image_descriptor {
+ /** Bindless image handles
+ *
+ * These are expected to already be shifted such that the 20-bit
+ * SURFACE_STATE table index is in the top 20 bits.
+ */
+ uint32_t read_write;
+ uint32_t write_only;
+};
+
+/** Struct representing a address/range descriptor
+ *
+ * The fields of this struct correspond directly to the data layout of
+ * nir_address_format_64bit_bounded_global addresses. The last field is the
+ * offset in the NIR address so it must be zero so that when you load the
+ * descriptor you get a pointer to the start of the range.
+ */
+struct anv_address_range_descriptor {
+ uint64_t address;
+ uint32_t range;
+ uint32_t zero;
+};
+
enum anv_descriptor_data {
/** The descriptor contains a BTI reference to a surface state */
ANV_DESCRIPTOR_SURFACE_STATE = (1 << 0),
ANV_DESCRIPTOR_IMAGE_PARAM = (1 << 3),
/** The descriptor contains auxiliary image layout data */
ANV_DESCRIPTOR_INLINE_UNIFORM = (1 << 4),
+ /** anv_address_range_descriptor with a buffer address and range */
+ ANV_DESCRIPTOR_ADDRESS_RANGE = (1 << 5),
+ /** Bindless surface handle */
+ ANV_DESCRIPTOR_SAMPLED_IMAGE = (1 << 6),
+ /** Storage image handles */
+ ANV_DESCRIPTOR_STORAGE_IMAGE = (1 << 7),
+ /** Storage image handles */
+ ANV_DESCRIPTOR_TEXTURE_SWIZZLE = (1 << 8),
};
struct anv_descriptor_set_binding_layout {
VkDescriptorType type;
#endif
+ /* Flags provided when this binding was created */
+ VkDescriptorBindingFlagsEXT flags;
+
/* Bitfield representing the type of data this descriptor contains */
enum anv_descriptor_data data;
+ /* Maximum number of YCbCr texture/sampler planes */
+ uint8_t max_plane_count;
+
/* Number of array elements in this binding (or size in bytes for inline
* uniform data)
*/
unsigned anv_descriptor_type_size(const struct anv_physical_device *pdevice,
VkDescriptorType type);
+bool anv_descriptor_supports_bindless(const struct anv_physical_device *pdevice,
+ const struct anv_descriptor_set_binding_layout *binding,
+ bool sampler);
+
+bool anv_descriptor_requires_bindless(const struct anv_physical_device *pdevice,
+ const struct anv_descriptor_set_binding_layout *binding,
+ bool sampler);
+
struct anv_descriptor_set_layout {
/* Descriptor set layouts can be destroyed at almost any time */
uint32_t ref_cnt;
ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK = 1 << 6, /* VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK */
ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK = 1 << 7, /* VK_DYNAMIC_STATE_STENCIL_WRITE_MASK */
ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE = 1 << 8, /* VK_DYNAMIC_STATE_STENCIL_REFERENCE */
- ANV_CMD_DIRTY_DYNAMIC_ALL = (1 << 9) - 1,
ANV_CMD_DIRTY_PIPELINE = 1 << 9,
ANV_CMD_DIRTY_INDEX_BUFFER = 1 << 10,
ANV_CMD_DIRTY_RENDER_TARGETS = 1 << 11,
ANV_CMD_DIRTY_XFB_ENABLE = 1 << 12,
+ ANV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE = 1 << 13, /* VK_DYNAMIC_STATE_LINE_STIPPLE_EXT */
};
typedef uint32_t anv_cmd_dirty_mask_t;
+#define ANV_CMD_DIRTY_DYNAMIC_ALL \
+ (ANV_CMD_DIRTY_DYNAMIC_VIEWPORT | \
+ ANV_CMD_DIRTY_DYNAMIC_SCISSOR | \
+ ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH | \
+ ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS | \
+ ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS | \
+ ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS | \
+ ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK | \
+ ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK | \
+ ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE | \
+ ANV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE)
+
+static inline enum anv_cmd_dirty_bits
+anv_cmd_dirty_bit_for_vk_dynamic_state(VkDynamicState vk_state)
+{
+ switch (vk_state) {
+ case VK_DYNAMIC_STATE_VIEWPORT:
+ return ANV_CMD_DIRTY_DYNAMIC_VIEWPORT;
+ case VK_DYNAMIC_STATE_SCISSOR:
+ return ANV_CMD_DIRTY_DYNAMIC_SCISSOR;
+ case VK_DYNAMIC_STATE_LINE_WIDTH:
+ return ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH;
+ case VK_DYNAMIC_STATE_DEPTH_BIAS:
+ return ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS;
+ case VK_DYNAMIC_STATE_BLEND_CONSTANTS:
+ return ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS;
+ case VK_DYNAMIC_STATE_DEPTH_BOUNDS:
+ return ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS;
+ case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK:
+ return ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK;
+ case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK:
+ return ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK;
+ case VK_DYNAMIC_STATE_STENCIL_REFERENCE:
+ return ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE;
+ case VK_DYNAMIC_STATE_LINE_STIPPLE_EXT:
+ return ANV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE;
+ default:
+ assert(!"Unsupported dynamic state");
+ return 0;
+ }
+}
+
+
enum anv_pipe_bits {
ANV_PIPE_DEPTH_CACHE_FLUSH_BIT = (1 << 0),
ANV_PIPE_STALL_AT_SCOREBOARD_BIT = (1 << 1),
};
#define ANV_PARAM_PUSH(offset) ((1 << 16) | (uint32_t)(offset))
+#define ANV_PARAM_IS_PUSH(param) ((uint32_t)(param) >> 16 == 1)
#define ANV_PARAM_PUSH_OFFSET(param) ((param) & 0xffff)
-struct anv_push_constants {
- /* Current allocated size of this push constants data structure.
- * Because a decent chunk of it may not be used (images on SKL, for
- * instance), we won't actually allocate the entire structure up-front.
- */
- uint32_t size;
+#define ANV_PARAM_DYN_OFFSET(offset) ((2 << 16) | (uint32_t)(offset))
+#define ANV_PARAM_IS_DYN_OFFSET(param) ((uint32_t)(param) >> 16 == 2)
+#define ANV_PARAM_DYN_OFFSET_IDX(param) ((param) & 0xffff)
+struct anv_push_constants {
/* Push constant data provided by the client through vkPushConstants */
uint8_t client_data[MAX_PUSH_CONSTANTS_SIZE];
uint32_t front;
uint32_t back;
} stencil_reference;
+
+ struct {
+ uint32_t factor;
+ uint16_t pattern;
+ } line_stipple;
};
extern const struct anv_dynamic_state default_dynamic_state;
-void anv_dynamic_state_copy(struct anv_dynamic_state *dest,
- const struct anv_dynamic_state *src,
- uint32_t copy_mask);
+uint32_t anv_dynamic_state_copy(struct anv_dynamic_state *dest,
+ const struct anv_dynamic_state *src,
+ uint32_t copy_mask);
struct anv_surface_state {
struct anv_state state;
* have not been cleared yet when multiview is active.
*/
uint32_t pending_clear_views;
+ struct anv_image_view * image_view;
};
/** State tracking for particular pipeline bind point
bool xfb_enabled;
struct anv_xfb_binding xfb_bindings[MAX_XFB_BUFFERS];
VkShaderStageFlags push_constant_stages;
- struct anv_push_constants * push_constants[MESA_SHADER_STAGES];
+ struct anv_push_constants push_constants[MESA_SHADER_STAGES];
struct anv_state binding_tables[MESA_SHADER_STAGES];
struct anv_state samplers[MESA_SHADER_STAGES];
bool conditional_render_enabled;
+ /**
+ * Last rendering scale argument provided to
+ * genX(cmd_buffer_emit_hashing_mode)().
+ */
+ unsigned current_hash_scale;
+
/**
* Array length is anv_cmd_state::pass::attachment_count. Array content is
* valid only when recording a render pass instance.
VkResult anv_cmd_buffer_reset(struct anv_cmd_buffer *cmd_buffer);
-VkResult
-anv_cmd_buffer_ensure_push_constants_size(struct anv_cmd_buffer *cmd_buffer,
- gl_shader_stage stage, uint32_t size);
-#define anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, stage, field) \
- anv_cmd_buffer_ensure_push_constants_size(cmd_buffer, stage, \
- (offsetof(struct anv_push_constants, field) + \
- sizeof(cmd_buffer->state.push_constants[0]->field)))
-
struct anv_state anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer,
const void *data, uint32_t size, uint32_t alignment);
struct anv_state anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer,
const struct brw_stage_prog_data *prog_data;
uint32_t prog_data_size;
+ struct brw_compile_stats stats[3];
+ uint32_t num_stats;
+
struct nir_xfb_info *xfb_info;
struct anv_pipeline_bind_map bind_map;
const void *constant_data, uint32_t constant_data_size,
const struct brw_stage_prog_data *prog_data,
uint32_t prog_data_size, const void *prog_data_param,
+ const struct brw_compile_stats *stats, uint32_t num_stats,
const struct nir_xfb_info *xfb_info,
const struct anv_pipeline_bind_map *bind_map);
anv_shader_bin_destroy(device, shader);
}
+/* 5 possible simultaneous shader stages and FS may have up to 3 binaries */
+#define MAX_PIPELINE_EXECUTABLES 7
+
+struct anv_pipeline_executable {
+ gl_shader_stage stage;
+
+ struct brw_compile_stats stats;
+
+ char *nir;
+ char *disasm;
+};
+
struct anv_pipeline {
struct anv_device * device;
struct anv_batch batch;
uint32_t batch_data[512];
struct anv_reloc_list batch_relocs;
- uint32_t dynamic_state_mask;
+ anv_cmd_dirty_mask_t dynamic_state_mask;
struct anv_dynamic_state dynamic_state;
+ void * mem_ctx;
+
+ VkPipelineCreateFlags flags;
struct anv_subpass * subpass;
bool needs_data_cache;
struct anv_shader_bin * shaders[MESA_SHADER_STAGES];
+ uint32_t num_executables;
+ struct anv_pipeline_executable executables[MAX_PIPELINE_EXECUTABLES];
+
struct {
const struct gen_l3_config * l3_config;
uint32_t total_size;
- unsigned entry_size[4];
} urb;
VkShaderStageFlags active_stages;
uint32_t array_size;
uint32_t samples; /**< VkImageCreateInfo::samples */
uint32_t n_planes;
- VkImageUsageFlags usage; /**< Superset of VkImageCreateInfo::usage. */
+ VkImageUsageFlags usage; /**< VkImageCreateInfo::usage. */
+ VkImageUsageFlags stencil_usage;
VkImageCreateFlags create_flags; /* Flags used when creating image. */
VkImageTiling tiling; /** VkImageCreateInfo::tiling */
if (!(image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT))
return false;
- if (devinfo->gen < 8)
+ /* Allow this feature on BDW even though it is disabled in the BDW devinfo
+ * struct. There's documentation which suggests that this feature actually
+ * reduces performance on BDW, but it has only been observed to help so
+ * far. Sampling fast-cleared blocks on BDW must also be handled with care
+ * (see depth_stencil_attachment_compute_aux_usage() for more info).
+ */
+ if (devinfo->gen != 8 && !devinfo->has_sample_with_hiz)
return false;
return image->samples == 1;
void
anv_image_copy_to_shadow(struct anv_cmd_buffer *cmd_buffer,
const struct anv_image *image,
+ VkImageAspectFlagBits aspect,
uint32_t base_level, uint32_t level_count,
uint32_t base_layer, uint32_t layer_count);
uint32_t state[3][4];
uint32_t n_planes;
struct anv_ycbcr_conversion *conversion;
+
+ /* Blob of sampler state data which is guaranteed to be 32-byte aligned
+ * and with a 32-byte stride for use as bindless samplers.
+ */
+ struct anv_state bindless_state;
};
struct anv_framebuffer {
int anv_get_instance_entrypoint_index(const char *name);
int anv_get_device_entrypoint_index(const char *name);
+int anv_get_physical_device_entrypoint_index(const char *name);
+
+const char *anv_get_instance_entry_name(int index);
+const char *anv_get_physical_device_entry_name(int index);
+const char *anv_get_device_entry_name(int index);
bool
anv_instance_entrypoint_is_enabled(int index, uint32_t core_version,
const struct anv_instance_extension_table *instance);
-
+bool
+anv_physical_device_entrypoint_is_enabled(int index, uint32_t core_version,
+ const struct anv_instance_extension_table *instance);
bool
anv_device_entrypoint_is_enabled(int index, uint32_t core_version,
const struct anv_instance_extension_table *instance,
void anv_dump_start(struct anv_device *device, enum anv_dump_action actions);
void anv_dump_finish(void);
-void anv_dump_add_framebuffer(struct anv_cmd_buffer *cmd_buffer,
- struct anv_framebuffer *fb);
+void anv_dump_add_attachments(struct anv_cmd_buffer *cmd_buffer);
static inline uint32_t
anv_get_subpass_id(const struct anv_cmd_state * const cmd_state)
# define genX(x) gen11_##x
# include "anv_genX.h"
# undef genX
+# define genX(x) gen12_##x
+# include "anv_genX.h"
+# undef genX
#endif
#endif /* ANV_PRIVATE_H */