struct gen_aux_map_context;
struct gen_perf_config;
+struct gen_perf_counter_pass;
+struct gen_perf_query_result;
#include <vulkan/vulkan.h>
#include <vulkan/vulkan_intel.h>
*/
#define ANV_PREDICATE_RESULT_REG 0x2678 /* MI_ALU_REG15 */
+/* We reserve this MI ALU register to pass around an offset computed from
+ * VkPerformanceQuerySubmitInfoKHR::counterPassIndex VK_KHR_performance_query.
+ * Other code which uses the MI ALU should leave it alone.
+ */
+#define ANV_PERF_QUERY_OFFSET_REG 0x2670 /* MI_ALU_REG14 */
+
/* For gen12 we set the streamout buffers using 4 separate commands
* (3DSTATE_SO_BUFFER_INDEX_*) instead of 3DSTATE_SO_BUFFER. However the layout
* of the 3DSTATE_SO_BUFFER_INDEX_* commands is identical to that of
struct anv_fixed_size_state_pool buckets[ANV_STATE_BUCKETS];
};
+struct anv_state_reserved_pool {
+ struct anv_state_pool *pool;
+ union anv_free_list reserved_blocks;
+ uint32_t count;
+};
+
struct anv_state_stream {
struct anv_state_pool *state_pool;
struct anv_state anv_state_stream_alloc(struct anv_state_stream *stream,
uint32_t size, uint32_t alignment);
+void anv_state_reserved_pool_init(struct anv_state_reserved_pool *pool,
+ struct anv_state_pool *parent,
+ uint32_t count, uint32_t size,
+ uint32_t alignment);
+void anv_state_reserved_pool_finish(struct anv_state_reserved_pool *pool);
+struct anv_state anv_state_reserved_pool_alloc(struct anv_state_reserved_pool *pool);
+void anv_state_reserved_pool_free(struct anv_state_reserved_pool *pool,
+ struct anv_state state);
+
VkResult anv_state_table_init(struct anv_state_table *table,
struct anv_device *device,
uint32_t initial_entries);
VkDeviceSize size;
VkMemoryHeapFlags flags;
- /* Driver-internal book-keeping */
- VkDeviceSize used;
+ /** Driver-internal book-keeping.
+ *
+ * Align it to 64 bits to make atomic operations faster on 32 bit platforms.
+ */
+ VkDeviceSize used __attribute__ ((aligned (8)));
};
struct anv_physical_device {
bool has_exec_fence;
bool has_syncobj;
bool has_syncobj_wait;
+ bool has_syncobj_wait_available;
bool has_context_priority;
bool has_context_isolation;
+ bool has_thread_submit;
bool has_mem_available;
bool has_mmap_offset;
uint64_t gtt_size;
bool use_softpin;
bool always_use_bindless;
+ bool use_call_secondary;
/** True if we can access buffers using A64 messages */
bool has_a64_buffer_access;
bool has_bindless_images;
/** True if we can use bindless access for samplers */
bool has_bindless_samplers;
+ /** True if we can use timeline semaphores through execbuf */
+ bool has_exec_timeline;
+
+ /** True if we can read the GPU timestamp register
+ *
+ * When running in a virtual context, the timestamp register is unreadable
+ * on Gen12+.
+ */
+ bool has_reg_timestamp;
/** True if this device has implicit AUX
*
uint32_t fence_count;
uint32_t fence_array_length;
struct drm_i915_gem_exec_fence * fences;
+ uint64_t * fence_values;
uint32_t temporary_semaphore_count;
uint32_t temporary_semaphore_array_length;
uint32_t sync_fd_semaphore_array_length;
/* Allocated only with non shareable timelines. */
- struct anv_timeline ** wait_timelines;
+ union {
+ struct anv_timeline ** wait_timelines;
+ uint32_t * wait_timeline_syncobjs;
+ };
uint32_t wait_timeline_count;
uint32_t wait_timeline_array_length;
uint64_t * wait_timeline_values;
*/
uintptr_t * fence_bos;
+ int perf_query_pass;
+
const VkAllocationCallbacks * alloc;
VkSystemAllocationScope alloc_scope;
struct anv_queue {
struct vk_object_base base;
- struct anv_device * device;
+ struct anv_device * device;
- /*
- * A list of struct anv_queue_submit to be submitted to i915.
- */
- struct list_head queued_submits;
+ VkDeviceQueueCreateFlags flags;
+
+ /* Set once from the device api calls. */
+ bool lost_signaled;
+
+ /* Only set once atomically by the queue */
+ int lost;
+ int error_line;
+ const char * error_file;
+ char error_msg[80];
+
+ /*
+ * This mutext protects the variables below.
+ */
+ pthread_mutex_t mutex;
+
+ pthread_t thread;
+ pthread_cond_t cond;
+
+ /*
+ * A list of struct anv_queue_submit to be submitted to i915.
+ */
+ struct list_head queued_submits;
- VkDeviceQueueCreateFlags flags;
+ /* Set to true to stop the submission thread */
+ bool quit;
};
struct anv_pipeline_cache {
struct hash_table * nir_cache;
struct hash_table * cache;
+
+ bool external_sync;
};
struct nir_xfb_info;
void anv_pipeline_cache_init(struct anv_pipeline_cache *cache,
struct anv_device *device,
- bool cache_enabled);
+ bool cache_enabled,
+ bool external_sync);
void anv_pipeline_cache_finish(struct anv_pipeline_cache *cache);
struct anv_shader_bin *
const struct nir_shader *nir,
unsigned char sha1_key[20]);
+struct anv_address {
+ struct anv_bo *bo;
+ uint32_t offset;
+};
+
struct anv_device {
struct vk_device vk;
int fd;
bool can_chain_batches;
bool robust_buffer_access;
+ bool has_thread_submit;
struct anv_device_extension_table enabled_extensions;
struct anv_device_dispatch_table dispatch;
struct anv_state_pool binding_table_pool;
struct anv_state_pool surface_state_pool;
+ struct anv_state_reserved_pool custom_border_colors;
+
/** BO used for various workarounds
*
* There are a number of workarounds on our hardware which require writing
* For that, we use the high bytes (>= 1024) of the workaround BO.
*/
struct anv_bo * workaround_bo;
+ struct anv_address workaround_address;
+
struct anv_bo * trivial_batch_bo;
struct anv_bo * hiz_clear_bo;
struct anv_state null_surface_state;
pthread_mutex_t mutex;
pthread_cond_t queue_submit;
int _lost;
+ int lost_reported;
struct gen_batch_decode_ctx decoder_ctx;
/*
uint64_t perf_metric; /* 0 if unset */
struct gen_aux_map_context *aux_map_ctx;
+
+ struct gen_debug_block_frame *debug_frame_desc;
};
static inline struct anv_instance *
void anv_device_init_blorp(struct anv_device *device);
void anv_device_finish_blorp(struct anv_device *device);
-void _anv_device_set_all_queue_lost(struct anv_device *device);
+void _anv_device_report_lost(struct anv_device *device);
VkResult _anv_device_set_lost(struct anv_device *device,
const char *file, int line,
const char *msg, ...)
#define anv_device_set_lost(dev, ...) \
_anv_device_set_lost(dev, __FILE__, __LINE__, __VA_ARGS__)
#define anv_queue_set_lost(queue, ...) \
- _anv_queue_set_lost(queue, __FILE__, __LINE__, __VA_ARGS__)
+ (queue)->device->has_thread_submit ? \
+ _anv_queue_set_lost(queue, __FILE__, __LINE__, __VA_ARGS__) : \
+ _anv_device_set_lost(queue->device, __FILE__, __LINE__, __VA_ARGS__)
static inline bool
anv_device_is_lost(struct anv_device *device)
{
- return unlikely(p_atomic_read(&device->_lost));
+ int lost = p_atomic_read(&device->_lost);
+ if (unlikely(lost && !device->lost_reported))
+ _anv_device_report_lost(device);
+ return lost;
}
VkResult anv_device_query_status(struct anv_device *device);
int anv_gem_get_context_param(int fd, int context, uint32_t param,
uint64_t *value);
int anv_gem_get_param(int fd, uint32_t param);
+uint64_t anv_gem_get_drm_cap(int fd, uint32_t capability);
int anv_gem_get_tiling(struct anv_device *device, uint32_t gem_handle);
bool anv_gem_get_bit6_swizzle(int fd, uint32_t tiling);
-int anv_gem_get_aperture(int fd, uint64_t *size);
int anv_gem_gpu_get_reset_stats(struct anv_device *device,
uint32_t *active, uint32_t *pending);
int anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle);
-int anv_gem_reg_read(struct anv_device *device,
- uint32_t offset, uint64_t *result);
+int anv_gem_reg_read(int fd, uint32_t offset, uint64_t *result);
uint32_t anv_gem_fd_to_handle(struct anv_device *device, int fd);
int anv_gem_set_caching(struct anv_device *device, uint32_t gem_handle, uint32_t caching);
int anv_gem_set_domain(struct anv_device *device, uint32_t gem_handle,
void anv_gem_syncobj_reset(struct anv_device *device, uint32_t handle);
bool anv_gem_supports_syncobj_wait(int fd);
int anv_gem_syncobj_wait(struct anv_device *device,
- uint32_t *handles, uint32_t num_handles,
+ const uint32_t *handles, uint32_t num_handles,
int64_t abs_timeout_ns, bool wait_all);
+int anv_gem_syncobj_timeline_wait(struct anv_device *device,
+ const uint32_t *handles, const uint64_t *points,
+ uint32_t num_items, int64_t abs_timeout_ns,
+ bool wait_all, bool wait_materialize);
+int anv_gem_syncobj_timeline_signal(struct anv_device *device,
+ const uint32_t *handles, const uint64_t *points,
+ uint32_t num_items);
+int anv_gem_syncobj_timeline_query(struct anv_device *device,
+ const uint32_t *handles, uint64_t *points,
+ uint32_t num_items);
uint64_t anv_vma_alloc(struct anv_device *device,
uint64_t size, uint64_t align,
struct anv_batch {
const VkAllocationCallbacks * alloc;
+ struct anv_address start_addr;
+
void * start;
void * end;
void * next;
void anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other);
uint64_t anv_batch_emit_reloc(struct anv_batch *batch,
void *location, struct anv_bo *bo, uint32_t offset);
+struct anv_address anv_batch_address(struct anv_batch *batch, void *batch_location);
+
+static inline void
+anv_batch_set_storage(struct anv_batch *batch, struct anv_address addr,
+ void *map, size_t size)
+{
+ batch->start_addr = addr;
+ batch->next = batch->start = map;
+ batch->end = map + size;
+}
static inline VkResult
anv_batch_set_error(struct anv_batch *batch, VkResult error)
return batch->status != VK_SUCCESS;
}
-struct anv_address {
- struct anv_bo *bo;
- uint32_t offset;
-};
-
#define ANV_NULL_ADDRESS ((struct anv_address) { NULL, 0 })
static inline bool
_dst = NULL; \
}))
+/* #define __gen_get_batch_dwords anv_batch_emit_dwords */
+/* #define __gen_get_batch_address anv_batch_address */
+/* #define __gen_address_value anv_address_physical */
+/* #define __gen_address_offset anv_address_add */
+
struct anv_device_memory {
struct vk_object_base base;
/* Number of dynamic offsets used by this descriptor set */
uint16_t dynamic_offset_count;
- /* For each shader stage, which offsets apply to that stage */
- uint16_t stage_dynamic_offsets[MESA_SHADER_STAGES];
+ /* For each dynamic buffer, which VkShaderStageFlagBits stages are using
+ * this buffer
+ */
+ VkShaderStageFlags dynamic_offset_stages[MAX_DYNAMIC_BUFFERS];
/* Size of the descriptor buffer for this descriptor set */
uint32_t descriptor_buffer_size;
struct anv_descriptor_pool *pool;
struct anv_descriptor_set_layout *layout;
+
+ /* Amount of space occupied in the the pool by this descriptor set. It can
+ * be larger than the size of the descriptor set.
+ */
uint32_t size;
/* State relative to anv_descriptor_pool::bo */
}
enum anv_cmd_dirty_bits {
- ANV_CMD_DIRTY_DYNAMIC_VIEWPORT = 1 << 0, /* VK_DYNAMIC_STATE_VIEWPORT */
- ANV_CMD_DIRTY_DYNAMIC_SCISSOR = 1 << 1, /* VK_DYNAMIC_STATE_SCISSOR */
- ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH = 1 << 2, /* VK_DYNAMIC_STATE_LINE_WIDTH */
- ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS = 1 << 3, /* VK_DYNAMIC_STATE_DEPTH_BIAS */
- ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS = 1 << 4, /* VK_DYNAMIC_STATE_BLEND_CONSTANTS */
- ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS = 1 << 5, /* VK_DYNAMIC_STATE_DEPTH_BOUNDS */
- ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK = 1 << 6, /* VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK */
- ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK = 1 << 7, /* VK_DYNAMIC_STATE_STENCIL_WRITE_MASK */
- ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE = 1 << 8, /* VK_DYNAMIC_STATE_STENCIL_REFERENCE */
- ANV_CMD_DIRTY_PIPELINE = 1 << 9,
- ANV_CMD_DIRTY_INDEX_BUFFER = 1 << 10,
- ANV_CMD_DIRTY_RENDER_TARGETS = 1 << 11,
- ANV_CMD_DIRTY_XFB_ENABLE = 1 << 12,
- ANV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE = 1 << 13, /* VK_DYNAMIC_STATE_LINE_STIPPLE_EXT */
+ ANV_CMD_DIRTY_DYNAMIC_VIEWPORT = 1 << 0, /* VK_DYNAMIC_STATE_VIEWPORT */
+ ANV_CMD_DIRTY_DYNAMIC_SCISSOR = 1 << 1, /* VK_DYNAMIC_STATE_SCISSOR */
+ ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH = 1 << 2, /* VK_DYNAMIC_STATE_LINE_WIDTH */
+ ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS = 1 << 3, /* VK_DYNAMIC_STATE_DEPTH_BIAS */
+ ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS = 1 << 4, /* VK_DYNAMIC_STATE_BLEND_CONSTANTS */
+ ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS = 1 << 5, /* VK_DYNAMIC_STATE_DEPTH_BOUNDS */
+ ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK = 1 << 6, /* VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK */
+ ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK = 1 << 7, /* VK_DYNAMIC_STATE_STENCIL_WRITE_MASK */
+ ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE = 1 << 8, /* VK_DYNAMIC_STATE_STENCIL_REFERENCE */
+ ANV_CMD_DIRTY_PIPELINE = 1 << 9,
+ ANV_CMD_DIRTY_INDEX_BUFFER = 1 << 10,
+ ANV_CMD_DIRTY_RENDER_TARGETS = 1 << 11,
+ ANV_CMD_DIRTY_XFB_ENABLE = 1 << 12,
+ ANV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE = 1 << 13, /* VK_DYNAMIC_STATE_LINE_STIPPLE_EXT */
+ ANV_CMD_DIRTY_DYNAMIC_CULL_MODE = 1 << 14, /* VK_DYNAMIC_STATE_CULL_MODE_EXT */
+ ANV_CMD_DIRTY_DYNAMIC_FRONT_FACE = 1 << 15, /* VK_DYNAMIC_STATE_FRONT_FACE_EXT */
+ ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY = 1 << 16, /* VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY_EXT */
+ ANV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE = 1 << 17, /* VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT */
+ ANV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE = 1 << 18, /* VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT */
+ ANV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE = 1 << 19, /* VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT */
+ ANV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP = 1 << 20, /* VK_DYNAMIC_STATE_DEPTH_COMPARE_OP_EXT */
+ ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE = 1 << 21, /* VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE_EXT */
+ ANV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE = 1 << 22, /* VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT */
+ ANV_CMD_DIRTY_DYNAMIC_STENCIL_OP = 1 << 23, /* VK_DYNAMIC_STATE_STENCIL_OP_EXT */
};
typedef uint32_t anv_cmd_dirty_mask_t;
-#define ANV_CMD_DIRTY_DYNAMIC_ALL \
- (ANV_CMD_DIRTY_DYNAMIC_VIEWPORT | \
- ANV_CMD_DIRTY_DYNAMIC_SCISSOR | \
- ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH | \
- ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS | \
- ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS | \
- ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS | \
- ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK | \
- ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK | \
- ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE | \
- ANV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE)
+#define ANV_CMD_DIRTY_DYNAMIC_ALL \
+ (ANV_CMD_DIRTY_DYNAMIC_VIEWPORT | \
+ ANV_CMD_DIRTY_DYNAMIC_SCISSOR | \
+ ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH | \
+ ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS | \
+ ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS | \
+ ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS | \
+ ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK | \
+ ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK | \
+ ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE | \
+ ANV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE | \
+ ANV_CMD_DIRTY_DYNAMIC_CULL_MODE | \
+ ANV_CMD_DIRTY_DYNAMIC_FRONT_FACE | \
+ ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY | \
+ ANV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE | \
+ ANV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE | \
+ ANV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE | \
+ ANV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP | \
+ ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE | \
+ ANV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE | \
+ ANV_CMD_DIRTY_DYNAMIC_STENCIL_OP)
static inline enum anv_cmd_dirty_bits
anv_cmd_dirty_bit_for_vk_dynamic_state(VkDynamicState vk_state)
{
switch (vk_state) {
case VK_DYNAMIC_STATE_VIEWPORT:
+ case VK_DYNAMIC_STATE_VIEWPORT_WITH_COUNT_EXT:
return ANV_CMD_DIRTY_DYNAMIC_VIEWPORT;
case VK_DYNAMIC_STATE_SCISSOR:
+ case VK_DYNAMIC_STATE_SCISSOR_WITH_COUNT_EXT:
return ANV_CMD_DIRTY_DYNAMIC_SCISSOR;
case VK_DYNAMIC_STATE_LINE_WIDTH:
return ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH;
return ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE;
case VK_DYNAMIC_STATE_LINE_STIPPLE_EXT:
return ANV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE;
+ case VK_DYNAMIC_STATE_CULL_MODE_EXT:
+ return ANV_CMD_DIRTY_DYNAMIC_CULL_MODE;
+ case VK_DYNAMIC_STATE_FRONT_FACE_EXT:
+ return ANV_CMD_DIRTY_DYNAMIC_FRONT_FACE;
+ case VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY_EXT:
+ return ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY;
+ case VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT:
+ return ANV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE;
+ case VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT:
+ return ANV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE;
+ case VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT:
+ return ANV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE;
+ case VK_DYNAMIC_STATE_DEPTH_COMPARE_OP_EXT:
+ return ANV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP;
+ case VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE_EXT:
+ return ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE;
+ case VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT:
+ return ANV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE;
+ case VK_DYNAMIC_STATE_STENCIL_OP_EXT:
+ return ANV_CMD_DIRTY_DYNAMIC_STENCIL_OP;
default:
assert(!"Unsupported dynamic state");
return 0;
struct anv_vertex_binding {
struct anv_buffer * buffer;
VkDeviceSize offset;
+ VkDeviceSize stride;
+ VkDeviceSize size;
};
struct anv_xfb_binding {
/** Dynamic offsets for dynamic UBOs and SSBOs */
uint32_t dynamic_offsets[MAX_DYNAMIC_BUFFERS];
- uint64_t push_reg_mask;
+ /* Robust access pushed registers. */
+ uint64_t push_reg_mask[MESA_SHADER_STAGES];
/** Pad out to a multiple of 32 bytes */
uint32_t pad[2];
uint32_t back;
} stencil_reference;
+ struct {
+ struct {
+ VkStencilOp fail_op;
+ VkStencilOp pass_op;
+ VkStencilOp depth_fail_op;
+ VkCompareOp compare_op;
+ } front;
+ struct {
+ VkStencilOp fail_op;
+ VkStencilOp pass_op;
+ VkStencilOp depth_fail_op;
+ VkCompareOp compare_op;
+ } back;
+ } stencil_op;
+
struct {
uint32_t factor;
uint16_t pattern;
} line_stipple;
+
+ VkCullModeFlags cull_mode;
+ VkFrontFace front_face;
+ VkPrimitiveTopology primitive_topology;
+ bool depth_test_enable;
+ bool depth_write_enable;
+ VkCompareOp depth_compare_op;
+ bool depth_bounds_test_enable;
+ bool stencil_test_enable;
+ bool dyn_vbo_stride;
+ bool dyn_vbo_size;
};
extern const struct anv_dynamic_state default_dynamic_state;
struct anv_cmd_pipeline_state {
struct anv_descriptor_set *descriptors[MAX_SETS];
struct anv_push_descriptor_set *push_descriptors[MAX_SETS];
+
+ struct anv_push_constants push_constants;
+
+ /* Push constant state allocated when flushing push constants. */
+ struct anv_state push_constants_state;
};
/** State tracking for graphics pipeline
struct anv_vb_cache_range vb_bound_ranges[33];
struct anv_vb_cache_range vb_dirty_ranges[33];
+ VkShaderStageFlags push_constant_stages;
+
struct anv_dynamic_state dynamic;
+ uint32_t primitive_topology;
+
struct {
struct anv_buffer *index_buffer;
uint32_t index_type; /**< 3DSTATE_INDEX_BUFFER.IndexFormat */
struct anv_vertex_binding vertex_bindings[MAX_VBS];
bool xfb_enabled;
struct anv_xfb_binding xfb_bindings[MAX_XFB_BUFFERS];
- VkShaderStageFlags push_constant_stages;
- struct anv_push_constants push_constants[MESA_SHADER_STAGES];
struct anv_state binding_tables[MESA_SHADER_STAGES];
struct anv_state samplers[MESA_SHADER_STAGES];
ANV_CMD_BUFFER_EXEC_MODE_GROW_AND_EMIT,
ANV_CMD_BUFFER_EXEC_MODE_CHAIN,
ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN,
+ ANV_CMD_BUFFER_EXEC_MODE_CALL_AND_RETURN,
};
struct anv_cmd_buffer {
VkCommandBufferUsageFlags usage_flags;
VkCommandBufferLevel level;
+ struct anv_query_pool *perf_query_pool;
+
struct anv_cmd_state state;
+ struct anv_address return_addr;
+
/* Set by SetPerformanceMarkerINTEL, written into queries by CmdBeginQuery */
uint64_t intel_perf_marker;
};
const VkSemaphore *out_semaphores,
const uint64_t *out_signal_values,
uint32_t num_out_semaphores,
- VkFence fence);
+ VkFence fence,
+ int perf_query_pass);
VkResult anv_cmd_buffer_reset(struct anv_cmd_buffer *cmd_buffer);
void anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer);
struct anv_state
-anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer,
- gl_shader_stage stage);
+anv_cmd_buffer_gfx_push_constants(struct anv_cmd_buffer *cmd_buffer);
struct anv_state
anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer);
ANV_SEMAPHORE_TYPE_SYNC_FILE,
ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ,
ANV_SEMAPHORE_TYPE_TIMELINE,
+ ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE,
};
struct anv_timeline_point {
struct {
uint32_t sf[7];
uint32_t depth_stencil_state[3];
+ uint32_t clip[4];
} gen7;
struct {
}
VkResult
-anv_pipeline_init(struct anv_graphics_pipeline *pipeline, struct anv_device *device,
- struct anv_pipeline_cache *cache,
- const VkGraphicsPipelineCreateInfo *pCreateInfo,
- const VkAllocationCallbacks *alloc);
+anv_pipeline_init(struct anv_pipeline *pipeline,
+ struct anv_device *device,
+ enum anv_pipeline_type type,
+ VkPipelineCreateFlags flags,
+ const VkAllocationCallbacks *pAllocator);
+
+void
+anv_pipeline_finish(struct anv_pipeline *pipeline,
+ struct anv_device *device,
+ const VkAllocationCallbacks *pAllocator);
+
+VkResult
+anv_graphics_pipeline_init(struct anv_graphics_pipeline *pipeline, struct anv_device *device,
+ struct anv_pipeline_cache *cache,
+ const VkGraphicsPipelineCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *alloc);
VkResult
anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
const char *entrypoint,
const VkSpecializationInfo *spec_info);
-uint32_t
-anv_cs_workgroup_size(const struct anv_compute_pipeline *pipeline);
+struct anv_cs_parameters {
+ uint32_t group_size;
+ uint32_t simd_size;
+ uint32_t threads;
+};
-uint32_t
-anv_cs_threads(const struct anv_compute_pipeline *pipeline);
+struct anv_cs_parameters
+anv_cs_parameters(const struct anv_compute_pipeline *pipeline);
struct anv_format_plane {
enum isl_format isl_format:16;
}
static inline struct anv_address
-anv_image_get_clear_color_addr(const struct anv_device *device,
+anv_image_get_clear_color_addr(UNUSED const struct anv_device *device,
const struct anv_image *image,
VkImageAspectFlagBits aspect)
{
}
+/* Haswell border color is a bit of a disaster. Float and unorm formats use a
+ * straightforward 32-bit float color in the first 64 bytes. Instead of using
+ * a nice float/integer union like Gen8+, Haswell specifies the integer border
+ * color as a separate entry /after/ the float color. The layout of this entry
+ * also depends on the format's bpp (with extra hacks for RG32), and overlaps.
+ *
+ * Since we don't know the format/bpp, we can't make any of the border colors
+ * containing '1' work for all formats, as it would be in the wrong place for
+ * some of them. We opt to make 32-bit integers work as this seems like the
+ * most common option. Fortunately, transparent black works regardless, as
+ * all zeroes is the same in every bit-size.
+ */
+struct hsw_border_color {
+ float float32[4];
+ uint32_t _pad0[12];
+ uint32_t uint32[4];
+ uint32_t _pad1[108];
+};
+
+struct gen8_border_color {
+ union {
+ float float32[4];
+ uint32_t uint32[4];
+ };
+ /* Pad out to 64 bytes */
+ uint32_t _pad[12];
+};
+
struct anv_ycbcr_conversion {
struct vk_object_base base;
* and with a 32-byte stride for use as bindless samplers.
*/
struct anv_state bindless_state;
+
+ struct anv_state custom_border_color;
};
struct anv_framebuffer {
#define ANV_PIPELINE_STATISTICS_MASK 0x000007ff
+#define OA_SNAPSHOT_SIZE (256)
+#define ANV_KHR_PERF_QUERY_SIZE (ALIGN(sizeof(uint64_t), 64) + 2 * OA_SNAPSHOT_SIZE)
+
struct anv_query_pool {
struct vk_object_base base;
/** Number of slots in this query pool */
uint32_t slots;
struct anv_bo * bo;
+
+ /* Perf queries : */
+ struct anv_bo reset_bo;
+ uint32_t n_counters;
+ struct gen_perf_counter_pass *counter_pass;
+ uint32_t n_passes;
+ struct gen_perf_query_info **pass_query;
};
+static inline uint32_t khr_perf_query_preamble_offset(struct anv_query_pool *pool,
+ uint32_t pass)
+{
+ return pass * ANV_KHR_PERF_QUERY_SIZE + 8;
+}
+
int anv_get_instance_entrypoint_index(const char *name);
int anv_get_device_entrypoint_index(const char *name);
int anv_get_physical_device_entrypoint_index(const char *name);
const struct anv_instance_extension_table *instance,
const struct anv_device_extension_table *device);
+void *anv_resolve_device_entrypoint(const struct gen_device_info *devinfo,
+ uint32_t index);
void *anv_lookup_entrypoint(const struct gen_device_info *devinfo,
const char *name);
ANV_DUMP_FRAMEBUFFERS_BIT = 0x1,
};
+#ifdef DEBUG
+PUBLIC
+#endif
void anv_dump_start(struct anv_device *device, enum anv_dump_action actions);
+#ifdef DEBUG
+PUBLIC
+#endif
void anv_dump_finish(void);
void anv_dump_add_attachments(struct anv_cmd_buffer *cmd_buffer);
return subpass_id;
}
+struct anv_performance_configuration_intel {
+ struct vk_object_base base;
+
+ struct gen_perf_registers *register_config;
+
+ uint64_t config_id;
+};
+
struct gen_perf_config *anv_get_perf(const struct gen_device_info *devinfo, int fd);
void anv_device_perf_init(struct anv_device *device);
+void anv_perf_write_pass_results(struct gen_perf_config *perf,
+ struct anv_query_pool *pool, uint32_t pass,
+ const struct gen_perf_query_result *accumulated_results,
+ union VkPerformanceCounterResultKHR *results);
#define ANV_FROM_HANDLE(__anv_type, __name, __handle) \
VK_FROM_HANDLE(__anv_type, __name, __handle)
VK_DEFINE_NONDISP_HANDLE_CASTS(anv_ycbcr_conversion, base,
VkSamplerYcbcrConversion,
VK_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION)
+VK_DEFINE_NONDISP_HANDLE_CASTS(anv_performance_configuration_intel, base,
+ VkPerformanceConfigurationINTEL,
+ VK_OBJECT_TYPE_PERFORMANCE_CONFIGURATION_INTEL)
/* Gen-specific function declarations */
#ifdef genX