#define ANV_SVGS_VB_INDEX MAX_VBS
#define ANV_DRAWID_VB_INDEX (MAX_VBS + 1)
+/* We reserve this MI ALU register for the purpose of handling predication.
+ * Other code which uses the MI ALU should leave it alone.
+ */
+#define ANV_PREDICATE_RESULT_REG MI_ALU_REG15
+
#define anv_printflike(a, b) __attribute__((__format__(__printf__, a, b)))
static inline uint32_t
*/
union anv_free_list {
struct {
- int32_t offset;
+ uint32_t offset;
/* A simple count that is incremented every time the head changes. */
uint32_t count;
uint64_t u64;
};
-#define ANV_FREE_LIST_EMPTY ((union anv_free_list) { { 1, 0 } })
+#define ANV_FREE_LIST_EMPTY ((union anv_free_list) { { UINT32_MAX, 0 } })
struct anv_block_state {
union {
};
};
+#define anv_block_pool_foreach_bo(bo, pool) \
+ for (bo = (pool)->bos; bo != &(pool)->bos[(pool)->nbos]; bo++)
+
+#define ANV_MAX_BLOCK_POOL_BOS 20
+
struct anv_block_pool {
struct anv_device *device;
uint64_t bo_flags;
- struct anv_bo bo;
+ struct anv_bo bos[ANV_MAX_BLOCK_POOL_BOS];
+ struct anv_bo *bo;
+ uint32_t nbos;
+
+ uint64_t size;
/* The address where the start of the pool is pinned. The various bos that
* are created as the pool grows will have addresses in the range
*/
uint32_t center_bo_offset;
- /* Current memory map of the block pool. This pointer may or may not
- * point to the actual beginning of the block pool memory. If
- * anv_block_pool_alloc_back has ever been called, then this pointer
- * will point to the "center" position of the buffer and all offsets
- * (negative or positive) given out by the block pool alloc functions
- * will be valid relative to this pointer.
- *
- * In particular, map == bo.map + center_offset
- */
- void *map;
int fd;
/**
int32_t offset;
uint32_t alloc_size;
void *map;
+ uint32_t idx;
};
#define ANV_STATE_NULL ((struct anv_state) { .alloc_size = 0 })
#define ANV_STATE_BUCKETS (ANV_MAX_STATE_SIZE_LOG2 - ANV_MIN_STATE_SIZE_LOG2 + 1)
+struct anv_free_entry {
+ uint32_t next;
+ struct anv_state state;
+};
+
+struct anv_state_table {
+ struct anv_device *device;
+ int fd;
+ struct anv_free_entry *map;
+ uint32_t size;
+ struct anv_block_state state;
+ struct u_vector mmap_cleanups;
+};
+
struct anv_state_pool {
struct anv_block_pool block_pool;
+ struct anv_state_table table;
+
/* The size of blocks which will be allocated from the block pool */
uint32_t block_size;
uint64_t bo_flags);
void anv_block_pool_finish(struct anv_block_pool *pool);
int32_t anv_block_pool_alloc(struct anv_block_pool *pool,
- uint32_t block_size);
+ uint32_t block_size, uint32_t *padding);
int32_t anv_block_pool_alloc_back(struct anv_block_pool *pool,
uint32_t block_size);
+void* anv_block_pool_map(struct anv_block_pool *pool, int32_t offset);
VkResult anv_state_pool_init(struct anv_state_pool *pool,
struct anv_device *device,
struct anv_state anv_state_stream_alloc(struct anv_state_stream *stream,
uint32_t size, uint32_t alignment);
+VkResult anv_state_table_init(struct anv_state_table *table,
+ struct anv_device *device,
+ uint32_t initial_entries);
+void anv_state_table_finish(struct anv_state_table *table);
+VkResult anv_state_table_add(struct anv_state_table *table, uint32_t *idx,
+ uint32_t count);
+void anv_free_list_push(union anv_free_list *list,
+ struct anv_state_table *table,
+ uint32_t idx, uint32_t count);
+struct anv_state* anv_free_list_pop(union anv_free_list *list,
+ struct anv_state_table *table);
+
+
+static inline struct anv_state *
+anv_state_table_get(struct anv_state_table *table, uint32_t idx)
+{
+ return &table->map[idx].state;
+}
/**
* Implements a pool of re-usable BOs. The interface is identical to that
* of block_pool except that each block is its own BO.
return device->default_mocs;
}
-static void inline
-anv_state_flush(struct anv_device *device, struct anv_state state)
-{
- if (device->info.has_llc)
- return;
-
- gen_flush_range(state.map, state.alloc_size);
-}
-
void anv_device_init_blorp(struct anv_device *device);
void anv_device_finish_blorp(struct anv_device *device);
ANV_PIPE_NEEDS_CS_STALL_BIT = (1 << 21),
/* This bit does not exist directly in PIPE_CONTROL. It means that render
- * target operations are ongoing. Some operations like copies on the
- * command streamer might need to be aware of this to trigger the
- * appropriate stall before they can proceed with the copy.
+ * target operations related to transfer commands with VkBuffer as
+ * destination are ongoing. Some operations like copies on the command
+ * streamer might need to be aware of this to trigger the appropriate stall
+ * before they can proceed with the copy.
*/
- ANV_PIPE_RENDER_TARGET_WRITES = (1 << 22),
+ ANV_PIPE_RENDER_TARGET_BUFFER_WRITES = (1 << 22),
};
#define ANV_PIPE_FLUSH_BITS ( \
for_each_bit(b, flags) {
switch ((VkAccessFlagBits)(1 << b)) {
case VK_ACCESS_SHADER_WRITE_BIT:
+ /* We're transitioning a buffer that was previously used as write
+ * destination through the data port. To make its content available
+ * to future operations, flush the data cache.
+ */
pipe_bits |= ANV_PIPE_DATA_CACHE_FLUSH_BIT;
break;
case VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT:
+ /* We're transitioning a buffer that was previously used as render
+ * target. To make its content available to future operations, flush
+ * the render target cache.
+ */
pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
break;
case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT:
+ /* We're transitioning a buffer that was previously used as depth
+ * buffer. To make its content available to future operations, flush
+ * the depth cache.
+ */
pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT;
break;
case VK_ACCESS_TRANSFER_WRITE_BIT:
+ /* We're transitioning a buffer that was previously used as a
+ * transfer write destination. Generic write operations include color
+ * & depth operations as well as buffer operations like :
+ * - vkCmdClearColorImage()
+ * - vkCmdClearDepthStencilImage()
+ * - vkCmdBlitImage()
+ * - vkCmdCopy*(), vkCmdUpdate*(), vkCmdFill*()
+ *
+ * Most of these operations are implemented using Blorp which writes
+ * through the render target, so flush that cache to make it visible
+ * to future operations. And for depth related operations we also
+ * need to flush the depth cache.
+ */
pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT;
break;
case VK_ACCESS_MEMORY_WRITE_BIT:
+ /* We're transitioning a buffer for generic write operations. Flush
+ * all the caches.
+ */
pipe_bits |= ANV_PIPE_FLUSH_BITS;
break;
default:
for_each_bit(b, flags) {
switch ((VkAccessFlagBits)(1 << b)) {
case VK_ACCESS_INDIRECT_COMMAND_READ_BIT:
+ /* Indirect draw commands take a buffer as input that we're going to
+ * read from the command streamer to load some of the HW registers
+ * (see genX_cmd_buffer.c:load_indirect_parameters). This requires a
+ * command streamer stall so that all the cache flushes have
+ * completed before the command streamer loads from memory.
+ */
+ pipe_bits |= ANV_PIPE_CS_STALL_BIT;
+ /* Indirect draw commands also set gl_BaseVertex & gl_BaseIndex
+ * through a vertex buffer, so invalidate that cache.
+ */
+ pipe_bits |= ANV_PIPE_VF_CACHE_INVALIDATE_BIT;
+ /* For CmdDipatchIndirect, we also load gl_NumWorkGroups through a
+ * UBO from the buffer, so we need to invalidate constant cache.
+ */
+ pipe_bits |= ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT;
+ break;
case VK_ACCESS_INDEX_READ_BIT:
case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT:
+ /* We transitioning a buffer to be used for as input for vkCmdDraw*
+ * commands, so we invalidate the VF cache to make sure there is no
+ * stale data when we start rendering.
+ */
pipe_bits |= ANV_PIPE_VF_CACHE_INVALIDATE_BIT;
break;
case VK_ACCESS_UNIFORM_READ_BIT:
+ /* We transitioning a buffer to be used as uniform data. Because
+ * uniform is accessed through the data port & sampler, we need to
+ * invalidate the texture cache (sampler) & constant cache (data
+ * port) to avoid stale data.
+ */
pipe_bits |= ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT;
pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
break;
case VK_ACCESS_SHADER_READ_BIT:
case VK_ACCESS_INPUT_ATTACHMENT_READ_BIT:
case VK_ACCESS_TRANSFER_READ_BIT:
+ /* Transitioning a buffer to be read through the sampler, so
+ * invalidate the texture cache, we don't want any stale data.
+ */
pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
break;
case VK_ACCESS_MEMORY_READ_BIT:
+ /* Transitioning a buffer for generic read, invalidate all the
+ * caches.
+ */
pipe_bits |= ANV_PIPE_INVALIDATE_BITS;
break;
case VK_ACCESS_MEMORY_WRITE_BIT:
+ /* Generic write, make sure all previously written things land in
+ * memory.
+ */
pipe_bits |= ANV_PIPE_FLUSH_BITS;
break;
+ case VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT:
+ /* Transitioning a buffer for conditional rendering. We'll load the
+ * content of this buffer into HW registers using the command
+ * streamer, so we need to stall the command streamer to make sure
+ * any in-flight flush operations have completed.
+ */
+ pipe_bits |= ANV_PIPE_CS_STALL_BIT;
+ break;
default:
break; /* Nothing to do */
}
*/
bool hiz_enabled;
+ bool conditional_render_enabled;
+
/**
* Array length is anv_cmd_state::pass::attachment_count. Array content is
* valid only when recording a render pass instance.
void anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer);
+void anv_cmd_emit_conditional_render_predicate(struct anv_cmd_buffer *cmd_buffer);
+
enum anv_fence_type {
ANV_FENCE_TYPE_NONE = 0,
ANV_FENCE_TYPE_BO,