#include "anv_private.h"
+#include "gen7_pack.h"
+#include "gen8_pack.h"
+
/** \file anv_batch_chain.c
*
* This file contains functions related to anv_cmd_buffer as a data
static VkResult
anv_reloc_list_init_clone(struct anv_reloc_list *list,
- struct anv_device *device,
+ const VkAllocationCallbacks *alloc,
const struct anv_reloc_list *other_list)
{
if (other_list) {
}
list->relocs =
- anv_device_alloc(device, list->array_length * sizeof(*list->relocs), 8,
- VK_SYSTEM_ALLOC_TYPE_INTERNAL);
+ anv_alloc(alloc, list->array_length * sizeof(*list->relocs), 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (list->relocs == NULL)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
list->reloc_bos =
- anv_device_alloc(device, list->array_length * sizeof(*list->reloc_bos), 8,
- VK_SYSTEM_ALLOC_TYPE_INTERNAL);
+ anv_alloc(alloc, list->array_length * sizeof(*list->reloc_bos), 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (list->reloc_bos == NULL) {
- anv_device_free(device, list->relocs);
+ anv_free(alloc, list->relocs);
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
}
}
VkResult
-anv_reloc_list_init(struct anv_reloc_list *list, struct anv_device *device)
+anv_reloc_list_init(struct anv_reloc_list *list,
+ const VkAllocationCallbacks *alloc)
{
- return anv_reloc_list_init_clone(list, device, NULL);
+ return anv_reloc_list_init_clone(list, alloc, NULL);
}
void
-anv_reloc_list_finish(struct anv_reloc_list *list, struct anv_device *device)
+anv_reloc_list_finish(struct anv_reloc_list *list,
+ const VkAllocationCallbacks *alloc)
{
- anv_device_free(device, list->relocs);
- anv_device_free(device, list->reloc_bos);
+ anv_free(alloc, list->relocs);
+ anv_free(alloc, list->reloc_bos);
}
static VkResult
-anv_reloc_list_grow(struct anv_reloc_list *list, struct anv_device *device,
+anv_reloc_list_grow(struct anv_reloc_list *list,
+ const VkAllocationCallbacks *alloc,
size_t num_additional_relocs)
{
if (list->num_relocs + num_additional_relocs <= list->array_length)
new_length *= 2;
struct drm_i915_gem_relocation_entry *new_relocs =
- anv_device_alloc(device, new_length * sizeof(*list->relocs), 8,
- VK_SYSTEM_ALLOC_TYPE_INTERNAL);
+ anv_alloc(alloc, new_length * sizeof(*list->relocs), 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (new_relocs == NULL)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
struct anv_bo **new_reloc_bos =
- anv_device_alloc(device, new_length * sizeof(*list->reloc_bos), 8,
- VK_SYSTEM_ALLOC_TYPE_INTERNAL);
+ anv_alloc(alloc, new_length * sizeof(*list->reloc_bos), 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (new_relocs == NULL) {
- anv_device_free(device, new_relocs);
+ anv_free(alloc, new_relocs);
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
}
memcpy(new_reloc_bos, list->reloc_bos,
list->num_relocs * sizeof(*list->reloc_bos));
- anv_device_free(device, list->relocs);
- anv_device_free(device, list->reloc_bos);
+ anv_free(alloc, list->relocs);
+ anv_free(alloc, list->reloc_bos);
list->array_length = new_length;
list->relocs = new_relocs;
}
uint64_t
-anv_reloc_list_add(struct anv_reloc_list *list, struct anv_device *device,
+anv_reloc_list_add(struct anv_reloc_list *list,
+ const VkAllocationCallbacks *alloc,
uint32_t offset, struct anv_bo *target_bo, uint32_t delta)
{
struct drm_i915_gem_relocation_entry *entry;
int index;
- anv_reloc_list_grow(list, device, 1);
+ anv_reloc_list_grow(list, alloc, 1);
/* TODO: Handle failure */
/* XXX: Can we use I915_EXEC_HANDLE_LUT? */
entry->presumed_offset = target_bo->offset;
entry->read_domains = 0;
entry->write_domain = 0;
+ VG(VALGRIND_CHECK_MEM_IS_DEFINED(entry, sizeof(*entry)));
return target_bo->offset + delta;
}
static void
-anv_reloc_list_append(struct anv_reloc_list *list, struct anv_device *device,
+anv_reloc_list_append(struct anv_reloc_list *list,
+ const VkAllocationCallbacks *alloc,
struct anv_reloc_list *other, uint32_t offset)
{
- anv_reloc_list_grow(list, device, other->num_relocs);
+ anv_reloc_list_grow(list, alloc, other->num_relocs);
/* TODO: Handle failure */
memcpy(&list->relocs[list->num_relocs], &other->relocs[0],
anv_batch_emit_reloc(struct anv_batch *batch,
void *location, struct anv_bo *bo, uint32_t delta)
{
- return anv_reloc_list_add(batch->relocs, batch->device,
+ return anv_reloc_list_add(batch->relocs, batch->alloc,
location - batch->start, bo, delta);
}
memcpy(batch->next, other->start, size);
offset = batch->next - batch->start;
- anv_reloc_list_append(batch->relocs, batch->device,
+ anv_reloc_list_append(batch->relocs, batch->alloc,
other->relocs, offset);
batch->next += size;
*-----------------------------------------------------------------------*/
static VkResult
-anv_batch_bo_create(struct anv_device *device, struct anv_batch_bo **bbo_out)
+anv_batch_bo_create(struct anv_cmd_buffer *cmd_buffer,
+ struct anv_batch_bo **bbo_out)
{
VkResult result;
- struct anv_batch_bo *bbo =
- anv_device_alloc(device, sizeof(*bbo), 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL);
+ struct anv_batch_bo *bbo = anv_alloc(&cmd_buffer->pool->alloc, sizeof(*bbo),
+ 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (bbo == NULL)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
- result = anv_bo_pool_alloc(&device->batch_bo_pool, &bbo->bo);
+ result = anv_bo_pool_alloc(&cmd_buffer->device->batch_bo_pool, &bbo->bo);
if (result != VK_SUCCESS)
goto fail_alloc;
- result = anv_reloc_list_init(&bbo->relocs, device);
+ result = anv_reloc_list_init(&bbo->relocs, &cmd_buffer->pool->alloc);
if (result != VK_SUCCESS)
goto fail_bo_alloc;
return VK_SUCCESS;
fail_bo_alloc:
- anv_bo_pool_free(&device->batch_bo_pool, &bbo->bo);
+ anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, &bbo->bo);
fail_alloc:
- anv_device_free(device, bbo);
+ anv_free(&cmd_buffer->pool->alloc, bbo);
return result;
}
static VkResult
-anv_batch_bo_clone(struct anv_device *device,
+anv_batch_bo_clone(struct anv_cmd_buffer *cmd_buffer,
const struct anv_batch_bo *other_bbo,
struct anv_batch_bo **bbo_out)
{
VkResult result;
- struct anv_batch_bo *bbo =
- anv_device_alloc(device, sizeof(*bbo), 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL);
+ struct anv_batch_bo *bbo = anv_alloc(&cmd_buffer->pool->alloc, sizeof(*bbo),
+ 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (bbo == NULL)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
- result = anv_bo_pool_alloc(&device->batch_bo_pool, &bbo->bo);
+ result = anv_bo_pool_alloc(&cmd_buffer->device->batch_bo_pool, &bbo->bo);
if (result != VK_SUCCESS)
goto fail_alloc;
- result = anv_reloc_list_init_clone(&bbo->relocs, device, &other_bbo->relocs);
+ result = anv_reloc_list_init_clone(&bbo->relocs, &cmd_buffer->pool->alloc,
+ &other_bbo->relocs);
if (result != VK_SUCCESS)
goto fail_bo_alloc;
bbo->length = other_bbo->length;
memcpy(bbo->bo.map, other_bbo->bo.map, other_bbo->length);
+ bbo->last_ss_pool_bo_offset = other_bbo->last_ss_pool_bo_offset;
+
*bbo_out = bbo;
return VK_SUCCESS;
fail_bo_alloc:
- anv_bo_pool_free(&device->batch_bo_pool, &bbo->bo);
+ anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, &bbo->bo);
fail_alloc:
- anv_device_free(device, bbo);
+ anv_free(&cmd_buffer->pool->alloc, bbo);
return result;
}
batch->next = batch->start = bbo->bo.map;
batch->end = bbo->bo.map + bbo->bo.size - batch_padding;
batch->relocs = &bbo->relocs;
+ bbo->last_ss_pool_bo_offset = 0;
bbo->relocs.num_relocs = 0;
}
}
static void
-anv_batch_bo_destroy(struct anv_batch_bo *bbo, struct anv_device *device)
+anv_batch_bo_destroy(struct anv_batch_bo *bbo,
+ struct anv_cmd_buffer *cmd_buffer)
{
- anv_reloc_list_finish(&bbo->relocs, device);
- anv_bo_pool_free(&device->batch_bo_pool, &bbo->bo);
- anv_device_free(device, bbo);
+ anv_reloc_list_finish(&bbo->relocs, &cmd_buffer->pool->alloc);
+ anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, &bbo->bo);
+ anv_free(&cmd_buffer->pool->alloc, bbo);
}
static VkResult
-anv_batch_bo_list_clone(const struct list_head *list, struct anv_device *device,
+anv_batch_bo_list_clone(const struct list_head *list,
+ struct anv_cmd_buffer *cmd_buffer,
struct list_head *new_list)
{
VkResult result = VK_SUCCESS;
struct anv_batch_bo *prev_bbo = NULL;
list_for_each_entry(struct anv_batch_bo, bbo, list, link) {
struct anv_batch_bo *new_bbo;
- result = anv_batch_bo_clone(device, bbo, &new_bbo);
+ result = anv_batch_bo_clone(cmd_buffer, bbo, &new_bbo);
if (result != VK_SUCCESS)
break;
list_addtail(&new_bbo->link, new_list);
if (result != VK_SUCCESS) {
list_for_each_entry_safe(struct anv_batch_bo, bbo, new_list, link)
- anv_batch_bo_destroy(bbo, device);
+ anv_batch_bo_destroy(bbo, cmd_buffer);
}
return result;
return LIST_ENTRY(struct anv_batch_bo, cmd_buffer->batch_bos.prev, link);
}
-static inline struct anv_batch_bo *
-anv_cmd_buffer_current_surface_bbo(struct anv_cmd_buffer *cmd_buffer)
+struct anv_address
+anv_cmd_buffer_surface_base_address(struct anv_cmd_buffer *cmd_buffer)
{
- return LIST_ENTRY(struct anv_batch_bo, cmd_buffer->surface_bos.prev, link);
+ return (struct anv_address) {
+ .bo = &cmd_buffer->device->surface_state_block_pool.bo,
+ .offset = *(int32_t *)anv_vector_head(&cmd_buffer->bt_blocks),
+ };
}
-struct anv_bo *
-anv_cmd_buffer_current_surface_bo(struct anv_cmd_buffer *cmd_buffer)
+static void
+emit_batch_buffer_start(struct anv_cmd_buffer *cmd_buffer,
+ struct anv_bo *bo, uint32_t offset)
{
- return &anv_cmd_buffer_current_surface_bbo(cmd_buffer)->bo;
-}
+ /* In gen8+ the address field grew to two dwords to accomodate 48 bit
+ * offsets. The high 16 bits are in the last dword, so we can use the gen8
+ * version in either case, as long as we set the instruction length in the
+ * header accordingly. This means that we always emit three dwords here
+ * and all the padding and adjustment we do in this file works for all
+ * gens.
+ */
-struct anv_reloc_list *
-anv_cmd_buffer_current_surface_relocs(struct anv_cmd_buffer *cmd_buffer)
-{
- return &anv_cmd_buffer_current_surface_bbo(cmd_buffer)->relocs;
+ const uint32_t gen7_length =
+ GEN7_MI_BATCH_BUFFER_START_length - GEN7_MI_BATCH_BUFFER_START_length_bias;
+ const uint32_t gen8_length =
+ GEN8_MI_BATCH_BUFFER_START_length - GEN8_MI_BATCH_BUFFER_START_length_bias;
+
+ anv_batch_emit(&cmd_buffer->batch, GEN8_MI_BATCH_BUFFER_START,
+ .DwordLength = cmd_buffer->device->info.gen < 8 ?
+ gen7_length : gen8_length,
+ ._2ndLevelBatchBuffer = _1stlevelbatch,
+ .AddressSpaceIndicator = ASI_PPGTT,
+ .BatchBufferStartAddress = { bo, offset });
}
static void
batch->end += GEN8_MI_BATCH_BUFFER_START_length * 4;
assert(batch->end == current_bbo->bo.map + current_bbo->bo.size);
- anv_batch_emit(batch, GEN8_MI_BATCH_BUFFER_START,
- GEN8_MI_BATCH_BUFFER_START_header,
- ._2ndLevelBatchBuffer = _1stlevelbatch,
- .AddressSpaceIndicator = ASI_PPGTT,
- .BatchBufferStartAddress = { &bbo->bo, 0 },
- );
+ emit_batch_buffer_start(cmd_buffer, &bbo->bo, 0);
anv_batch_bo_finish(current_bbo, batch);
}
struct anv_cmd_buffer *cmd_buffer = _data;
struct anv_batch_bo *new_bbo;
- VkResult result = anv_batch_bo_create(cmd_buffer->device, &new_bbo);
+ VkResult result = anv_batch_bo_create(cmd_buffer, &new_bbo);
if (result != VK_SUCCESS)
return result;
struct anv_batch_bo **seen_bbo = anv_vector_add(&cmd_buffer->seen_bbos);
if (seen_bbo == NULL) {
- anv_batch_bo_destroy(new_bbo, cmd_buffer->device);
+ anv_batch_bo_destroy(new_bbo, cmd_buffer);
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
}
*seen_bbo = new_bbo;
}
struct anv_state
-anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer,
- uint32_t size, uint32_t alignment)
+anv_cmd_buffer_alloc_binding_table(struct anv_cmd_buffer *cmd_buffer,
+ uint32_t entries, uint32_t *state_offset)
{
- struct anv_bo *surface_bo =
- anv_cmd_buffer_current_surface_bo(cmd_buffer);
+ struct anv_block_pool *block_pool =
+ &cmd_buffer->device->surface_state_block_pool;
+ int32_t *bt_block = anv_vector_head(&cmd_buffer->bt_blocks);
struct anv_state state;
- state.offset = align_u32(cmd_buffer->surface_next, alignment);
- if (state.offset + size > surface_bo->size)
+ state.alloc_size = align_u32(entries * 4, 32);
+
+ if (cmd_buffer->bt_next + state.alloc_size > block_pool->block_size)
return (struct anv_state) { 0 };
- state.map = surface_bo->map + state.offset;
- state.alloc_size = size;
- cmd_buffer->surface_next = state.offset + size;
+ state.offset = cmd_buffer->bt_next;
+ state.map = block_pool->map + *bt_block + state.offset;
- assert(state.offset + size <= surface_bo->size);
+ cmd_buffer->bt_next += state.alloc_size;
+
+ assert(*bt_block < 0);
+ *state_offset = -(*bt_block);
return state;
}
+struct anv_state
+anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer)
+{
+ return anv_state_stream_alloc(&cmd_buffer->surface_state_stream, 64, 64);
+}
+
struct anv_state
anv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer,
uint32_t size, uint32_t alignment)
}
VkResult
-anv_cmd_buffer_new_surface_state_bo(struct anv_cmd_buffer *cmd_buffer)
+anv_cmd_buffer_new_binding_table_block(struct anv_cmd_buffer *cmd_buffer)
{
- struct anv_batch_bo *new_bbo, *old_bbo =
- anv_cmd_buffer_current_surface_bbo(cmd_buffer);
-
- /* Finish off the old buffer */
- old_bbo->length = cmd_buffer->surface_next;
-
- VkResult result = anv_batch_bo_create(cmd_buffer->device, &new_bbo);
- if (result != VK_SUCCESS)
- return result;
+ struct anv_block_pool *block_pool =
+ &cmd_buffer->device->surface_state_block_pool;
- struct anv_batch_bo **seen_bbo = anv_vector_add(&cmd_buffer->seen_bbos);
- if (seen_bbo == NULL) {
- anv_batch_bo_destroy(new_bbo, cmd_buffer->device);
+ int32_t *offset = anv_vector_add(&cmd_buffer->bt_blocks);
+ if (offset == NULL)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
- }
- *seen_bbo = new_bbo;
-
- cmd_buffer->surface_next = 1;
- list_addtail(&new_bbo->link, &cmd_buffer->surface_bos);
+ *offset = anv_block_pool_alloc_back(block_pool);
+ cmd_buffer->bt_next = 0;
return VK_SUCCESS;
}
VkResult
anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer)
{
- struct anv_batch_bo *batch_bo, *surface_bbo;
- struct anv_device *device = cmd_buffer->device;
+ struct anv_batch_bo *batch_bo;
VkResult result;
list_inithead(&cmd_buffer->batch_bos);
- list_inithead(&cmd_buffer->surface_bos);
- result = anv_batch_bo_create(device, &batch_bo);
+ result = anv_batch_bo_create(cmd_buffer, &batch_bo);
if (result != VK_SUCCESS)
return result;
list_addtail(&batch_bo->link, &cmd_buffer->batch_bos);
- cmd_buffer->batch.device = device;
+ cmd_buffer->batch.alloc = &cmd_buffer->pool->alloc;
cmd_buffer->batch.extend_cb = anv_cmd_buffer_chain_batch;
cmd_buffer->batch.user_data = cmd_buffer;
anv_batch_bo_start(batch_bo, &cmd_buffer->batch,
GEN8_MI_BATCH_BUFFER_START_length * 4);
- result = anv_batch_bo_create(device, &surface_bbo);
- if (result != VK_SUCCESS)
- goto fail_batch_bo;
-
- list_addtail(&surface_bbo->link, &cmd_buffer->surface_bos);
-
int success = anv_vector_init(&cmd_buffer->seen_bbos,
sizeof(struct anv_bo *),
8 * sizeof(struct anv_bo *));
if (!success)
- goto fail_surface_bo;
+ goto fail_batch_bo;
*(struct anv_batch_bo **)anv_vector_add(&cmd_buffer->seen_bbos) = batch_bo;
- *(struct anv_batch_bo **)anv_vector_add(&cmd_buffer->seen_bbos) = surface_bbo;
- /* Start surface_next at 1 so surface offset 0 is invalid. */
- cmd_buffer->surface_next = 1;
+ success = anv_vector_init(&cmd_buffer->bt_blocks, sizeof(int32_t),
+ 8 * sizeof(int32_t));
+ if (!success)
+ goto fail_seen_bbos;
+
+ result = anv_reloc_list_init(&cmd_buffer->surface_relocs,
+ &cmd_buffer->pool->alloc);
+ if (result != VK_SUCCESS)
+ goto fail_bt_blocks;
+
+ anv_cmd_buffer_new_binding_table_block(cmd_buffer);
cmd_buffer->execbuf2.objects = NULL;
cmd_buffer->execbuf2.bos = NULL;
return VK_SUCCESS;
- fail_surface_bo:
- anv_batch_bo_destroy(surface_bbo, device);
+ fail_bt_blocks:
+ anv_vector_finish(&cmd_buffer->bt_blocks);
+ fail_seen_bbos:
+ anv_vector_finish(&cmd_buffer->seen_bbos);
fail_batch_bo:
- anv_batch_bo_destroy(batch_bo, device);
+ anv_batch_bo_destroy(batch_bo, cmd_buffer);
return result;
}
void
anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer)
{
- struct anv_device *device = cmd_buffer->device;
+ int32_t *bt_block;
+ anv_vector_foreach(bt_block, &cmd_buffer->bt_blocks) {
+ anv_block_pool_free(&cmd_buffer->device->surface_state_block_pool,
+ *bt_block);
+ }
+ anv_vector_finish(&cmd_buffer->bt_blocks);
+
+ anv_reloc_list_finish(&cmd_buffer->surface_relocs, &cmd_buffer->pool->alloc);
anv_vector_finish(&cmd_buffer->seen_bbos);
/* Destroy all of the batch buffers */
list_for_each_entry_safe(struct anv_batch_bo, bbo,
&cmd_buffer->batch_bos, link) {
- anv_batch_bo_destroy(bbo, device);
- }
-
- /* Destroy all of the surface state buffers */
- list_for_each_entry_safe(struct anv_batch_bo, bbo,
- &cmd_buffer->surface_bos, link) {
- anv_batch_bo_destroy(bbo, device);
+ anv_batch_bo_destroy(bbo, cmd_buffer);
}
- anv_device_free(device, cmd_buffer->execbuf2.objects);
- anv_device_free(device, cmd_buffer->execbuf2.bos);
+ anv_free(&cmd_buffer->pool->alloc, cmd_buffer->execbuf2.objects);
+ anv_free(&cmd_buffer->pool->alloc, cmd_buffer->execbuf2.bos);
}
void
anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer)
{
- struct anv_device *device = cmd_buffer->device;
-
/* Delete all but the first batch bo */
assert(!list_empty(&cmd_buffer->batch_bos));
while (cmd_buffer->batch_bos.next != cmd_buffer->batch_bos.prev) {
struct anv_batch_bo *bbo = anv_cmd_buffer_current_batch_bo(cmd_buffer);
list_del(&bbo->link);
- anv_batch_bo_destroy(bbo, device);
+ anv_batch_bo_destroy(bbo, cmd_buffer);
}
assert(!list_empty(&cmd_buffer->batch_bos));
&cmd_buffer->batch,
GEN8_MI_BATCH_BUFFER_START_length * 4);
- /* Delete all but the first batch bo */
- assert(!list_empty(&cmd_buffer->batch_bos));
- while (cmd_buffer->surface_bos.next != cmd_buffer->surface_bos.prev) {
- struct anv_batch_bo *bbo = anv_cmd_buffer_current_surface_bbo(cmd_buffer);
- list_del(&bbo->link);
- anv_batch_bo_destroy(bbo, device);
+ while (anv_vector_length(&cmd_buffer->bt_blocks) > 1) {
+ int32_t *bt_block = anv_vector_remove(&cmd_buffer->bt_blocks);
+ anv_block_pool_free(&cmd_buffer->device->surface_state_block_pool,
+ *bt_block);
}
- assert(!list_empty(&cmd_buffer->batch_bos));
-
- anv_cmd_buffer_current_surface_bbo(cmd_buffer)->relocs.num_relocs = 0;
+ assert(anv_vector_length(&cmd_buffer->bt_blocks) == 1);
+ cmd_buffer->bt_next = 0;
- cmd_buffer->surface_next = 1;
+ cmd_buffer->surface_relocs.num_relocs = 0;
/* Reset the list of seen buffers */
cmd_buffer->seen_bbos.head = 0;
*(struct anv_batch_bo **)anv_vector_add(&cmd_buffer->seen_bbos) =
anv_cmd_buffer_current_batch_bo(cmd_buffer);
- *(struct anv_batch_bo **)anv_vector_add(&cmd_buffer->seen_bbos) =
- anv_cmd_buffer_current_surface_bbo(cmd_buffer);
}
void
anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer)
{
struct anv_batch_bo *batch_bo = anv_cmd_buffer_current_batch_bo(cmd_buffer);
- struct anv_batch_bo *surface_bbo =
- anv_cmd_buffer_current_surface_bbo(cmd_buffer);
- if (cmd_buffer->level == VK_CMD_BUFFER_LEVEL_PRIMARY) {
- anv_batch_emit(&cmd_buffer->batch, GEN8_MI_BATCH_BUFFER_END);
+ if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) {
+ anv_batch_emit(&cmd_buffer->batch, GEN7_MI_BATCH_BUFFER_END);
/* Round batch up to an even number of dwords. */
if ((cmd_buffer->batch.next - cmd_buffer->batch.start) & 4)
- anv_batch_emit(&cmd_buffer->batch, GEN8_MI_NOOP);
+ anv_batch_emit(&cmd_buffer->batch, GEN7_MI_NOOP);
cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_PRIMARY;
- } else {
+ }
+
+ anv_batch_bo_finish(batch_bo, &cmd_buffer->batch);
+
+ if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) {
/* If this is a secondary command buffer, we need to determine the
* mode in which it will be executed with vkExecuteCommands. We
* determine this statically here so that this stays in sync with the
* probably better of simply copying it into our batch.
*/
cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_EMIT;
- } else if (cmd_buffer->opt_flags &
- VK_CMD_BUFFER_OPTIMIZE_NO_SIMULTANEOUS_USE_BIT) {
+ } else if (!(cmd_buffer->usage_flags &
+ VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT)) {
cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_CHAIN;
- /* For chaining mode, we need to increment the number of
- * relocations. This is because, when we chain, we need to add
- * an MI_BATCH_BUFFER_START command. Adding this command will
- * also add a relocation. In order to handle theis we'll
- * increment it here and decrement it right before adding the
+ /* When we chain, we need to add an MI_BATCH_BUFFER_START command
+ * with its relocation. In order to handle this we'll increment here
+ * so we can unconditionally decrement right before adding the
* MI_BATCH_BUFFER_START command.
*/
anv_cmd_buffer_current_batch_bo(cmd_buffer)->relocs.num_relocs++;
+ cmd_buffer->batch.next += GEN8_MI_BATCH_BUFFER_START_length * 4;
} else {
cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN;
}
}
-
- anv_batch_bo_finish(batch_bo, &cmd_buffer->batch);
-
- surface_bbo->length = cmd_buffer->surface_next;
}
static inline VkResult
struct anv_batch_bo *last_bbo =
list_last_entry(&secondary->batch_bos, struct anv_batch_bo, link);
- anv_batch_emit(&primary->batch, GEN8_MI_BATCH_BUFFER_START,
- GEN8_MI_BATCH_BUFFER_START_header,
- ._2ndLevelBatchBuffer = _1stlevelbatch,
- .AddressSpaceIndicator = ASI_PPGTT,
- .BatchBufferStartAddress = { &first_bbo->bo, 0 },
- );
+ emit_batch_buffer_start(primary, &first_bbo->bo, 0);
struct anv_batch_bo *this_bbo = anv_cmd_buffer_current_batch_bo(primary);
assert(primary->batch.start == this_bbo->bo.map);
uint32_t offset = primary->batch.next - primary->batch.start;
+ const uint32_t inst_size = GEN8_MI_BATCH_BUFFER_START_length * 4;
- struct GEN8_MI_BATCH_BUFFER_START ret = {
- GEN8_MI_BATCH_BUFFER_START_header,
- ._2ndLevelBatchBuffer = _1stlevelbatch,
- .AddressSpaceIndicator = ASI_PPGTT,
- .BatchBufferStartAddress = { &this_bbo->bo, offset },
- };
- last_bbo->relocs.num_relocs++;
- GEN8_MI_BATCH_BUFFER_START_pack(&secondary->batch,
- last_bbo->bo.map + last_bbo->length,
- &ret);
-
+ /* Roll back the previous MI_BATCH_BUFFER_START and its relocation so we
+ * can emit a new command and relocation for the current splice. In
+ * order to handle the initial-use case, we incremented next and
+ * num_relocs in end_batch_buffer() so we can alyways just subtract
+ * here.
+ */
+ last_bbo->relocs.num_relocs--;
+ secondary->batch.next -= inst_size;
+ emit_batch_buffer_start(secondary, &this_bbo->bo, offset);
anv_cmd_buffer_add_seen_bbos(primary, &secondary->batch_bos);
+
+ /* After patching up the secondary buffer, we need to clflush the
+ * modified instruction in case we're on a !llc platform. We use a
+ * little loop to handle the case where the instruction crosses a cache
+ * line boundary.
+ */
+ if (!primary->device->info.has_llc) {
+ void *inst = secondary->batch.next - inst_size;
+ void *p = (void *) (((uintptr_t) inst) & ~CACHELINE_MASK);
+ __builtin_ia32_sfence();
+ while (p < secondary->batch.next) {
+ __builtin_ia32_clflush(p);
+ p += CACHELINE_SIZE;
+ }
+ }
+
break;
}
case ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN: {
struct list_head copy_list;
VkResult result = anv_batch_bo_list_clone(&secondary->batch_bos,
- secondary->device,
+ secondary,
©_list);
if (result != VK_SUCCESS)
return; /* FIXME */
assert(!"Invalid execution mode");
}
- /* Mark the surface buffer from the secondary as seen */
- anv_cmd_buffer_add_seen_bbos(primary, &secondary->surface_bos);
+ anv_reloc_list_append(&primary->surface_relocs, &primary->pool->alloc,
+ &secondary->surface_relocs, 0);
}
static VkResult
cmd_buffer->execbuf2.array_length * 2 : 64;
struct drm_i915_gem_exec_object2 *new_objects =
- anv_device_alloc(cmd_buffer->device, new_len * sizeof(*new_objects),
- 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL);
+ anv_alloc(&cmd_buffer->pool->alloc, new_len * sizeof(*new_objects),
+ 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (new_objects == NULL)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
struct anv_bo **new_bos =
- anv_device_alloc(cmd_buffer->device, new_len * sizeof(*new_bos),
- 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL);
+ anv_alloc(&cmd_buffer->pool->alloc, new_len * sizeof(*new_bos),
+ 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (new_objects == NULL) {
- anv_device_free(cmd_buffer->device, new_objects);
+ anv_free(&cmd_buffer->pool->alloc, new_objects);
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
}
obj->relocation_count = relocs->num_relocs;
obj->relocs_ptr = (uintptr_t) relocs->relocs;
- for (size_t i = 0; i < relocs->num_relocs; i++)
+ for (size_t i = 0; i < relocs->num_relocs; i++) {
+ /* A quick sanity check on relocations */
+ assert(relocs->relocs[i].offset < bo->size);
anv_cmd_buffer_add_bo(cmd_buffer, relocs->reloc_bos[i], NULL);
+ }
}
return VK_SUCCESS;
}
}
+static void
+adjust_relocations_from_block_pool(struct anv_block_pool *pool,
+ struct anv_reloc_list *relocs)
+{
+ for (size_t i = 0; i < relocs->num_relocs; i++) {
+ /* In general, we don't know how stale the relocated value is. It
+ * may have been used last time or it may not. Since we don't want
+ * to stomp it while the GPU may be accessing it, we haven't updated
+ * it anywhere else in the code. Instead, we just set the presumed
+ * offset to what it is now based on the delta and the data in the
+ * block pool. Then the kernel will update it for us if needed.
+ */
+ assert(relocs->relocs[i].offset < pool->state.end);
+ uint32_t *reloc_data = pool->map + relocs->relocs[i].offset;
+
+ /* We're reading back the relocated value from potentially incoherent
+ * memory here. However, any change to the value will be from the kernel
+ * writing out relocations, which will keep the CPU cache up to date.
+ */
+ relocs->relocs[i].presumed_offset = *reloc_data - relocs->relocs[i].delta;
+
+ /* All of the relocations from this block pool to other BO's should
+ * have been emitted relative to the surface block pool center. We
+ * need to add the center offset to make them relative to the
+ * beginning of the actual GEM bo.
+ */
+ relocs->relocs[i].offset += pool->center_bo_offset;
+ }
+}
+
+static void
+adjust_relocations_to_block_pool(struct anv_block_pool *pool,
+ struct anv_bo *from_bo,
+ struct anv_reloc_list *relocs,
+ uint32_t *last_pool_center_bo_offset)
+{
+ assert(*last_pool_center_bo_offset <= pool->center_bo_offset);
+ uint32_t delta = pool->center_bo_offset - *last_pool_center_bo_offset;
+
+ /* When we initially emit relocations into a block pool, we don't
+ * actually know what the final center_bo_offset will be so we just emit
+ * it as if center_bo_offset == 0. Now that we know what the center
+ * offset is, we need to walk the list of relocations and adjust any
+ * relocations that point to the pool bo with the correct offset.
+ */
+ for (size_t i = 0; i < relocs->num_relocs; i++) {
+ if (relocs->reloc_bos[i] == &pool->bo) {
+ /* Adjust the delta value in the relocation to correctly
+ * correspond to the new delta. Initially, this value may have
+ * been negative (if treated as unsigned), but we trust in
+ * uint32_t roll-over to fix that for us at this point.
+ */
+ relocs->relocs[i].delta += delta;
+
+ /* Since the delta has changed, we need to update the actual
+ * relocated value with the new presumed value. This function
+ * should only be called on batch buffers, so we know it isn't in
+ * use by the GPU at the moment.
+ */
+ assert(relocs->relocs[i].offset < from_bo->size);
+ uint32_t *reloc_data = from_bo->map + relocs->relocs[i].offset;
+ *reloc_data = relocs->relocs[i].presumed_offset +
+ relocs->relocs[i].delta;
+ }
+ }
+
+ *last_pool_center_bo_offset = pool->center_bo_offset;
+}
+
void
anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer)
{
struct anv_batch *batch = &cmd_buffer->batch;
+ struct anv_block_pool *ss_pool =
+ &cmd_buffer->device->surface_state_block_pool;
cmd_buffer->execbuf2.bo_count = 0;
cmd_buffer->execbuf2.need_reloc = false;
+ adjust_relocations_from_block_pool(ss_pool, &cmd_buffer->surface_relocs);
+ anv_cmd_buffer_add_bo(cmd_buffer, &ss_pool->bo, &cmd_buffer->surface_relocs);
+
/* First, we walk over all of the bos we've seen and add them and their
* relocations to the validate list.
*/
struct anv_batch_bo **bbo;
- anv_vector_foreach(bbo, &cmd_buffer->seen_bbos)
+ anv_vector_foreach(bbo, &cmd_buffer->seen_bbos) {
+ adjust_relocations_to_block_pool(ss_pool, &(*bbo)->bo, &(*bbo)->relocs,
+ &(*bbo)->last_ss_pool_bo_offset);
+
anv_cmd_buffer_add_bo(cmd_buffer, &(*bbo)->bo, &(*bbo)->relocs);
+ }
struct anv_batch_bo *first_batch_bo =
list_first_entry(&cmd_buffer->batch_bos, struct anv_batch_bo, link);
*/
if (first_batch_bo->bo.index != cmd_buffer->execbuf2.bo_count - 1) {
uint32_t idx = first_batch_bo->bo.index;
+ uint32_t last_idx = cmd_buffer->execbuf2.bo_count - 1;
struct drm_i915_gem_exec_object2 tmp_obj =
cmd_buffer->execbuf2.objects[idx];
assert(cmd_buffer->execbuf2.bos[idx] == &first_batch_bo->bo);
- cmd_buffer->execbuf2.objects[idx] =
- cmd_buffer->execbuf2.objects[cmd_buffer->execbuf2.bo_count - 1];
- cmd_buffer->execbuf2.bos[idx] =
- cmd_buffer->execbuf2.bos[cmd_buffer->execbuf2.bo_count - 1];
+ cmd_buffer->execbuf2.objects[idx] = cmd_buffer->execbuf2.objects[last_idx];
+ cmd_buffer->execbuf2.bos[idx] = cmd_buffer->execbuf2.bos[last_idx];
cmd_buffer->execbuf2.bos[idx]->index = idx;
- cmd_buffer->execbuf2.objects[cmd_buffer->execbuf2.bo_count - 1] = tmp_obj;
- cmd_buffer->execbuf2.bos[cmd_buffer->execbuf2.bo_count - 1] =
- &first_batch_bo->bo;
- first_batch_bo->bo.index = cmd_buffer->execbuf2.bo_count - 1;
+ cmd_buffer->execbuf2.objects[last_idx] = tmp_obj;
+ cmd_buffer->execbuf2.bos[last_idx] = &first_batch_bo->bo;
+ first_batch_bo->bo.index = last_idx;
}
/* Now we go through and fixup all of the relocation lists to point to
anv_vector_foreach(bbo, &cmd_buffer->seen_bbos)
anv_cmd_buffer_process_relocs(cmd_buffer, &(*bbo)->relocs);
+ anv_cmd_buffer_process_relocs(cmd_buffer, &cmd_buffer->surface_relocs);
+
+ if (!cmd_buffer->device->info.has_llc) {
+ __builtin_ia32_sfence();
+ anv_vector_foreach(bbo, &cmd_buffer->seen_bbos) {
+ for (uint32_t i = 0; i < (*bbo)->length; i += CACHELINE_SIZE)
+ __builtin_ia32_clflush((*bbo)->bo.map + i);
+ }
+ }
+
cmd_buffer->execbuf2.execbuf = (struct drm_i915_gem_execbuffer2) {
.buffers_ptr = (uintptr_t) cmd_buffer->execbuf2.objects,
.buffer_count = cmd_buffer->execbuf2.bo_count,
.num_cliprects = 0,
.DR1 = 0,
.DR4 = 0,
- .flags = I915_EXEC_HANDLE_LUT | I915_EXEC_RENDER,
+ .flags = I915_EXEC_HANDLE_LUT | I915_EXEC_RENDER |
+ I915_EXEC_CONSTANTS_REL_GENERAL,
.rsvd1 = cmd_buffer->device->context_id,
.rsvd2 = 0,
};