}
}
+static VkResult
+anv_queue_init(struct anv_device *device, struct anv_queue *queue)
+{
+ queue->device = device;
+ queue->pool = &device->surface_state_pool;
+
+ queue->completed_serial = anv_state_pool_alloc(queue->pool, 4, 4);
+ if (queue->completed_serial.map == NULL)
+ return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
+
+ *(uint32_t *)queue->completed_serial.map = 0;
+ queue->next_serial = 1;
+
+ return VK_SUCCESS;
+}
+
+static void
+anv_queue_finish(struct anv_queue *queue)
+{
+#ifdef HAVE_VALGRIND
+ /* This gets torn down with the device so we only need to do this if
+ * valgrind is present.
+ */
+ anv_state_pool_free(queue->pool, queue->completed_serial);
+#endif
+}
+
+static void
+anv_device_init_border_colors(struct anv_device *device)
+{
+ float float_border_colors[][4] = {
+ [VK_BORDER_COLOR_OPAQUE_WHITE] = { 1.0, 1.0, 1.0, 1.0 },
+ [VK_BORDER_COLOR_TRANSPARENT_BLACK] = { 0.0, 0.0, 0.0, 0.0 },
+ [VK_BORDER_COLOR_OPAQUE_BLACK] = { 0.0, 0.0, 0.0, 1.0 }
+ };
+
+ uint32_t uint32_border_colors[][4] = {
+ [VK_BORDER_COLOR_OPAQUE_WHITE] = { 1, 1, 1, 1 },
+ [VK_BORDER_COLOR_TRANSPARENT_BLACK] = { 0, 0, 0, 0 },
+ [VK_BORDER_COLOR_OPAQUE_BLACK] = { 0, 0, 0, 1 }
+ };
+
+ device->float_border_colors =
+ anv_state_pool_alloc(&device->dynamic_state_pool,
+ sizeof(float_border_colors), 32);
+ memcpy(device->float_border_colors.map,
+ float_border_colors, sizeof(float_border_colors));
+
+ device->uint32_border_colors =
+ anv_state_pool_alloc(&device->dynamic_state_pool,
+ sizeof(uint32_border_colors), 32);
+ memcpy(device->uint32_border_colors.map,
+ uint32_border_colors, sizeof(uint32_border_colors));
+
+}
+
+static const uint32_t BATCH_SIZE = 8192;
+
VkResult anv_CreateDevice(
VkPhysicalDevice _physicalDevice,
const VkDeviceCreateInfo* pCreateInfo,
if (device->context_id == -1)
goto fail_fd;
+ anv_bo_pool_init(&device->batch_bo_pool, device, BATCH_SIZE);
+
anv_block_pool_init(&device->dynamic_state_block_pool, device, 2048);
anv_state_pool_init(&device->dynamic_state_pool,
anv_block_pool_init(&device->instruction_block_pool, device, 2048);
anv_block_pool_init(&device->surface_state_block_pool, device, 2048);
-
- /* Binding table pointers are only 16 bits so we have to make sure that
- * they get allocated at the beginning of the surface state BO. To
- * handle this, we create a separate block pool that works out of the
- * first 64 KB of the surface state BO.
- */
- anv_block_pool_init_slave(&device->binding_table_block_pool,
- &device->surface_state_block_pool, 32);
-
anv_state_pool_init(&device->surface_state_pool,
&device->surface_state_block_pool);
- device->compiler = anv_compiler_create(device->fd);
- device->aub_writer = NULL;
+ anv_block_pool_init(&device->scratch_block_pool, device, 0x10000);
device->info = *physicalDevice->info;
+ device->compiler = anv_compiler_create(device);
+ device->aub_writer = NULL;
+
pthread_mutex_init(&device->mutex, NULL);
+ anv_queue_init(device, &device->queue);
+
anv_device_init_meta(device);
+ anv_device_init_border_colors(device);
+
*pDevice = (VkDevice) device;
return VK_SUCCESS;
anv_compiler_destroy(device->compiler);
+ anv_queue_finish(&device->queue);
+
+ anv_device_finish_meta(device);
+
+#ifdef HAVE_VALGRIND
+ /* We only need to free these to prevent valgrind errors. The backing
+ * BO will go away in a couple of lines so we don't actually leak.
+ */
+ anv_state_pool_free(&device->dynamic_state_pool,
+ device->float_border_colors);
+ anv_state_pool_free(&device->dynamic_state_pool,
+ device->uint32_border_colors);
+#endif
+
+ anv_bo_pool_finish(&device->batch_bo_pool);
anv_block_pool_finish(&device->dynamic_state_block_pool);
anv_block_pool_finish(&device->instruction_block_pool);
anv_block_pool_finish(&device->surface_state_block_pool);
VkQueue* pQueue)
{
struct anv_device *device = (struct anv_device *) _device;
- struct anv_queue *queue;
- /* FIXME: Should allocate these at device create time. */
+ assert(queueIndex == 0);
- queue = anv_device_alloc(device, sizeof(*queue), 8,
- VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
- if (queue == NULL)
- return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+ *pQueue = (VkQueue) &device->queue;
- queue->device = device;
- queue->pool = &device->surface_state_pool;
+ return VK_SUCCESS;
+}
- queue->completed_serial = anv_state_pool_alloc(queue->pool, 4, 4);
- *(uint32_t *)queue->completed_serial.map = 0;
- queue->next_serial = 1;
+VkResult
+anv_reloc_list_init(struct anv_reloc_list *list, struct anv_device *device)
+{
+ list->num_relocs = 0;
+ list->array_length = 256;
+ list->relocs =
+ anv_device_alloc(device, list->array_length * sizeof(*list->relocs), 8,
+ VK_SYSTEM_ALLOC_TYPE_INTERNAL);
+
+ if (list->relocs == NULL)
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ list->reloc_bos =
+ anv_device_alloc(device, list->array_length * sizeof(*list->reloc_bos), 8,
+ VK_SYSTEM_ALLOC_TYPE_INTERNAL);
- *pQueue = (VkQueue) queue;
+ if (list->relocs == NULL) {
+ anv_device_free(device, list->relocs);
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+ }
return VK_SUCCESS;
}
-static const uint32_t BATCH_SIZE = 8192;
+void
+anv_reloc_list_finish(struct anv_reloc_list *list, struct anv_device *device)
+{
+ anv_device_free(device, list->relocs);
+ anv_device_free(device, list->reloc_bos);
+}
-VkResult
-anv_batch_init(struct anv_batch *batch, struct anv_device *device)
+static VkResult
+anv_reloc_list_grow(struct anv_reloc_list *list, struct anv_device *device,
+ size_t num_additional_relocs)
{
- VkResult result;
+ if (list->num_relocs + num_additional_relocs <= list->array_length)
+ return VK_SUCCESS;
- result = anv_bo_init_new(&batch->bo, device, BATCH_SIZE);
- if (result != VK_SUCCESS)
- return result;
+ size_t new_length = list->array_length * 2;
+ while (new_length < list->num_relocs + num_additional_relocs)
+ new_length *= 2;
+
+ struct drm_i915_gem_relocation_entry *new_relocs =
+ anv_device_alloc(device, new_length * sizeof(*list->relocs), 8,
+ VK_SYSTEM_ALLOC_TYPE_INTERNAL);
+ if (new_relocs == NULL)
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
- batch->bo.map =
- anv_gem_mmap(device, batch->bo.gem_handle, 0, BATCH_SIZE);
- if (batch->bo.map == NULL) {
- result = vk_error(VK_ERROR_MEMORY_MAP_FAILED);
- goto fail_bo;
+ struct anv_bo **new_reloc_bos =
+ anv_device_alloc(device, new_length * sizeof(*list->reloc_bos), 8,
+ VK_SYSTEM_ALLOC_TYPE_INTERNAL);
+ if (new_relocs == NULL) {
+ anv_device_free(device, new_relocs);
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
}
- batch->cmd_relocs.num_relocs = 0;
- batch->next = batch->bo.map;
+ memcpy(new_relocs, list->relocs, list->num_relocs * sizeof(*list->relocs));
+ memcpy(new_reloc_bos, list->reloc_bos,
+ list->num_relocs * sizeof(*list->reloc_bos));
+
+ anv_device_free(device, list->relocs);
+ anv_device_free(device, list->reloc_bos);
+
+ list->relocs = new_relocs;
+ list->reloc_bos = new_reloc_bos;
return VK_SUCCESS;
+}
- fail_bo:
- anv_gem_close(device, batch->bo.gem_handle);
+static VkResult
+anv_batch_bo_create(struct anv_device *device, struct anv_batch_bo **bbo_out)
+{
+ VkResult result;
- return result;
+ struct anv_batch_bo *bbo =
+ anv_device_alloc(device, sizeof(*bbo), 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL);
+ if (bbo == NULL)
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ bbo->num_relocs = 0;
+ bbo->prev_batch_bo = NULL;
+
+ result = anv_bo_pool_alloc(&device->batch_bo_pool, &bbo->bo);
+ if (result != VK_SUCCESS) {
+ anv_device_free(device, bbo);
+ return result;
+ }
+
+ *bbo_out = bbo;
+ return VK_SUCCESS;
}
-void
-anv_batch_finish(struct anv_batch *batch, struct anv_device *device)
+static void
+anv_batch_bo_start(struct anv_batch_bo *bbo, struct anv_batch *batch,
+ size_t batch_padding)
{
- anv_gem_munmap(batch->bo.map, BATCH_SIZE);
- anv_gem_close(device, batch->bo.gem_handle);
+ batch->next = batch->start = bbo->bo.map;
+ batch->end = bbo->bo.map + bbo->bo.size - batch_padding;
+ bbo->first_reloc = batch->relocs.num_relocs;
}
-void
-anv_batch_reset(struct anv_batch *batch)
+static void
+anv_batch_bo_finish(struct anv_batch_bo *bbo, struct anv_batch *batch)
+{
+ assert(batch->start == bbo->bo.map);
+ bbo->length = batch->next - batch->start;
+ VG(VALGRIND_CHECK_MEM_IS_DEFINED(batch->start, bbo->length));
+ bbo->num_relocs = batch->relocs.num_relocs - bbo->first_reloc;
+}
+
+static void
+anv_batch_bo_destroy(struct anv_batch_bo *bbo, struct anv_device *device)
{
- batch->next = batch->bo.map;
- batch->cmd_relocs.num_relocs = 0;
+ anv_bo_pool_free(&device->batch_bo_pool, &bbo->bo);
+ anv_device_free(device, bbo);
}
void *
anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords)
{
+ if (batch->next + num_dwords * 4 > batch->end)
+ batch->extend_cb(batch, batch->user_data);
+
void *p = batch->next;
batch->next += num_dwords * 4;
+ assert(batch->next <= batch->end);
return p;
}
static void
-anv_reloc_list_append(struct anv_reloc_list *list,
+anv_reloc_list_append(struct anv_reloc_list *list, struct anv_device *device,
struct anv_reloc_list *other, uint32_t offset)
{
- uint32_t i, count;
+ anv_reloc_list_grow(list, device, other->num_relocs);
+ /* TODO: Handle failure */
- count = list->num_relocs;
- memcpy(&list->relocs[count], &other->relocs[0],
+ memcpy(&list->relocs[list->num_relocs], &other->relocs[0],
other->num_relocs * sizeof(other->relocs[0]));
- memcpy(&list->reloc_bos[count], &other->reloc_bos[0],
+ memcpy(&list->reloc_bos[list->num_relocs], &other->reloc_bos[0],
other->num_relocs * sizeof(other->reloc_bos[0]));
- for (i = 0; i < other->num_relocs; i++)
- list->relocs[i + count].offset += offset;
- count += other->num_relocs;
+ for (uint32_t i = 0; i < other->num_relocs; i++)
+ list->relocs[i + list->num_relocs].offset += offset;
+
+ list->num_relocs += other->num_relocs;
}
static uint64_t
-anv_reloc_list_add(struct anv_reloc_list *list,
- uint32_t offset,
- struct anv_bo *target_bo, uint32_t delta)
+anv_reloc_list_add(struct anv_reloc_list *list, struct anv_device *device,
+ uint32_t offset, struct anv_bo *target_bo, uint32_t delta)
{
struct drm_i915_gem_relocation_entry *entry;
int index;
- assert(list->num_relocs < ANV_BATCH_MAX_RELOCS);
+ anv_reloc_list_grow(list, device, 1);
+ /* TODO: Handle failure */
/* XXX: Can we use I915_EXEC_HANDLE_LUT? */
index = list->num_relocs++;
{
uint32_t size, offset;
- size = other->next - other->bo.map;
- memcpy(batch->next, other->bo.map, size);
+ size = other->next - other->start;
+ assert(size % 4 == 0);
+
+ if (batch->next + size > batch->end)
+ batch->extend_cb(batch, batch->user_data);
+
+ assert(batch->next + size <= batch->end);
+
+ memcpy(batch->next, other->start, size);
- offset = batch->next - batch->bo.map;
- anv_reloc_list_append(&batch->cmd_relocs, &other->cmd_relocs, offset);
+ offset = batch->next - batch->start;
+ anv_reloc_list_append(&batch->relocs, batch->device,
+ &other->relocs, offset);
batch->next += size;
}
anv_batch_emit_reloc(struct anv_batch *batch,
void *location, struct anv_bo *bo, uint32_t delta)
{
- return anv_reloc_list_add(&batch->cmd_relocs,
- location - batch->bo.map, bo, delta);
+ return anv_reloc_list_add(&batch->relocs, batch->device,
+ location - batch->start, bo, delta);
}
VkResult anv_QueueSubmit(
state = anv_state_pool_alloc(&device->dynamic_state_pool, 32, 32);
bo = &device->dynamic_state_pool.block_pool->bo;
- batch.next = state.map;
+ batch.start = batch.next = state.map;
+ batch.end = state.map + 32;
anv_batch_emit(&batch, GEN8_MI_BATCH_BUFFER_END);
anv_batch_emit(&batch, GEN8_MI_NOOP);
return VK_SUCCESS;
case VK_OBJECT_TYPE_BUFFER:
- case VK_OBJECT_TYPE_BUFFER_VIEW:
case VK_OBJECT_TYPE_IMAGE:
- case VK_OBJECT_TYPE_IMAGE_VIEW:
- case VK_OBJECT_TYPE_COLOR_ATTACHMENT_VIEW:
case VK_OBJECT_TYPE_DEPTH_STENCIL_VIEW:
case VK_OBJECT_TYPE_SHADER:
case VK_OBJECT_TYPE_PIPELINE_LAYOUT:
case VK_OBJECT_TYPE_FENCE:
case VK_OBJECT_TYPE_QUERY_POOL:
case VK_OBJECT_TYPE_FRAMEBUFFER:
+ case VK_OBJECT_TYPE_BUFFER_VIEW:
+ case VK_OBJECT_TYPE_IMAGE_VIEW:
+ case VK_OBJECT_TYPE_COLOR_ATTACHMENT_VIEW:
(object->destructor)(device, object, objType);
return VK_SUCCESS;
return VK_SUCCESS;
default:
- return VK_UNSUPPORTED;
+ return vk_error(VK_UNSUPPORTED);
}
}
fence->bo.map =
anv_gem_mmap(device, fence->bo.gem_handle, 0, fence->bo.size);
- batch.next = fence->bo.map;
+ batch.next = batch.start = fence->bo.map;
+ batch.end = fence->bo.map + fence->bo.size;
anv_batch_emit(&batch, GEN8_MI_BATCH_BUFFER_END);
anv_batch_emit(&batch, GEN8_MI_NOOP);
fence->execbuf.rsvd1 = device->context_id;
fence->execbuf.rsvd2 = 0;
- *pFence = (VkQueryPool) fence;
+ *pFence = (VkFence) fence;
return VK_SUCCESS;
{
struct anv_fence **fences = (struct anv_fence **) pFences;
- for (uint32_t i; i < fenceCount; i++)
+ for (uint32_t i = 0; i < fenceCount; i++)
fences[i]->ready = false;
return VK_SUCCESS;
stub_return(VK_UNSUPPORTED);
}
-// Query functions
-
-static void
-anv_query_pool_destroy(struct anv_device *device,
- struct anv_object *object,
- VkObjectType obj_type)
-{
- struct anv_query_pool *pool = (struct anv_query_pool *) object;
-
- assert(obj_type == VK_OBJECT_TYPE_QUERY_POOL);
-
- anv_gem_munmap(pool->bo.map, pool->bo.size);
- anv_gem_close(device, pool->bo.gem_handle);
- anv_device_free(device, pool);
-}
-
-VkResult anv_CreateQueryPool(
- VkDevice _device,
- const VkQueryPoolCreateInfo* pCreateInfo,
- VkQueryPool* pQueryPool)
-{
- struct anv_device *device = (struct anv_device *) _device;
- struct anv_query_pool *pool;
- VkResult result;
- size_t size;
-
- assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO);
-
- switch (pCreateInfo->queryType) {
- case VK_QUERY_TYPE_OCCLUSION:
- break;
- case VK_QUERY_TYPE_PIPELINE_STATISTICS:
- return VK_UNSUPPORTED;
- default:
- unreachable("");
- }
-
- pool = anv_device_alloc(device, sizeof(*pool), 8,
- VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
- if (pool == NULL)
- return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
-
- pool->base.destructor = anv_query_pool_destroy;
-
- pool->type = pCreateInfo->queryType;
- size = pCreateInfo->slots * sizeof(struct anv_query_pool_slot);
- result = anv_bo_init_new(&pool->bo, device, size);
- if (result != VK_SUCCESS)
- goto fail;
-
- pool->bo.map = anv_gem_mmap(device, pool->bo.gem_handle, 0, size);
-
- *pQueryPool = (VkQueryPool) pool;
-
- return VK_SUCCESS;
-
- fail:
- anv_device_free(device, pool);
-
- return result;
-}
-
-VkResult anv_GetQueryPoolResults(
- VkDevice _device,
- VkQueryPool queryPool,
- uint32_t startQuery,
- uint32_t queryCount,
- size_t* pDataSize,
- void* pData,
- VkQueryResultFlags flags)
-{
- struct anv_device *device = (struct anv_device *) _device;
- struct anv_query_pool *pool = (struct anv_query_pool *) queryPool;
- struct anv_query_pool_slot *slot = pool->bo.map;
- int64_t timeout = INT64_MAX;
- uint32_t *dst32 = pData;
- uint64_t *dst64 = pData;
- uint64_t result;
- int ret;
-
- if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
- /* Where is the availabilty info supposed to go? */
- anv_finishme("VK_QUERY_RESULT_WITH_AVAILABILITY_BIT");
- return VK_UNSUPPORTED;
- }
-
- assert(pool->type == VK_QUERY_TYPE_OCCLUSION);
-
- if (flags & VK_QUERY_RESULT_64_BIT)
- *pDataSize = queryCount * sizeof(uint64_t);
- else
- *pDataSize = queryCount * sizeof(uint32_t);
-
- if (pData == NULL)
- return VK_SUCCESS;
-
- if (flags & VK_QUERY_RESULT_WAIT_BIT) {
- ret = anv_gem_wait(device, pool->bo.gem_handle, &timeout);
- if (ret == -1)
- return vk_error(VK_ERROR_UNKNOWN);
- }
-
- for (uint32_t i = 0; i < queryCount; i++) {
- result = slot[startQuery + i].end - slot[startQuery + i].begin;
- if (flags & VK_QUERY_RESULT_64_BIT) {
- *dst64++ = result;
- } else {
- if (result > UINT32_MAX)
- result = UINT32_MAX;
- *dst32++ = result;
- }
- }
-
- return VK_SUCCESS;
-}
-
// Buffer functions
VkResult anv_CreateBuffer(
// Buffer view functions
-VkResult anv_CreateBufferView(
- VkDevice _device,
- const VkBufferViewCreateInfo* pCreateInfo,
- VkBufferView* pView)
+static void
+fill_buffer_surface_state(void *state, VkFormat format,
+ uint32_t offset, uint32_t range)
{
- struct anv_device *device = (struct anv_device *) _device;
- struct anv_buffer *buffer = (struct anv_buffer *) pCreateInfo->buffer;
- struct anv_surface_view *view;
- const struct anv_format *format;
+ const struct anv_format *info;
- assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO);
-
- view = anv_device_alloc(device, sizeof(*view), 8,
- VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
- if (view == NULL)
- return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
-
- view->bo = buffer->bo;
- view->offset = buffer->offset + pCreateInfo->offset;
- view->surface_state =
- anv_state_pool_alloc(&device->surface_state_pool, 64, 64);
- view->format = pCreateInfo->format;
-
- format = anv_format_for_vk_format(pCreateInfo->format);
+ info = anv_format_for_vk_format(format);
/* This assumes RGBA float format. */
uint32_t stride = 4;
- uint32_t num_elements = pCreateInfo->range / stride;
+ uint32_t num_elements = range / stride;
+
struct GEN8_RENDER_SURFACE_STATE surface_state = {
.SurfaceType = SURFTYPE_BUFFER,
.SurfaceArray = false,
- .SurfaceFormat = format->format,
+ .SurfaceFormat = info->surface_format,
.SurfaceVerticalAlignment = VALIGN4,
.SurfaceHorizontalAlignment = HALIGN4,
.TileMode = LINEAR,
.SamplerL2BypassModeDisable = true,
.RenderCacheReadWriteMode = WriteOnlyCache,
.MemoryObjectControlState = GEN8_MOCS,
- .BaseMipLevel = 0,
+ .BaseMipLevel = 0.0,
.SurfaceQPitch = 0,
.Height = (num_elements >> 7) & 0x3fff,
.Width = num_elements & 0x7f,
.ShaderChannelSelectGreen = SCS_GREEN,
.ShaderChannelSelectBlue = SCS_BLUE,
.ShaderChannelSelectAlpha = SCS_ALPHA,
- .ResourceMinLOD = 0,
+ .ResourceMinLOD = 0.0,
/* FIXME: We assume that the image must be bound at this time. */
- .SurfaceBaseAddress = { NULL, view->offset },
+ .SurfaceBaseAddress = { NULL, offset },
};
- GEN8_RENDER_SURFACE_STATE_pack(NULL, view->surface_state.map, &surface_state);
+ GEN8_RENDER_SURFACE_STATE_pack(NULL, state, &surface_state);
+}
+
+VkResult anv_CreateBufferView(
+ VkDevice _device,
+ const VkBufferViewCreateInfo* pCreateInfo,
+ VkBufferView* pView)
+{
+ struct anv_device *device = (struct anv_device *) _device;
+ struct anv_buffer *buffer = (struct anv_buffer *) pCreateInfo->buffer;
+ struct anv_surface_view *view;
+
+ assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO);
+
+ view = anv_device_alloc(device, sizeof(*view), 8,
+ VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
+ if (view == NULL)
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ view->base.destructor = anv_surface_view_destroy;
- *pView = (VkImageView) view;
+ view->bo = buffer->bo;
+ view->offset = buffer->offset + pCreateInfo->offset;
+ view->surface_state =
+ anv_state_pool_alloc(&device->surface_state_pool, 64, 64);
+ view->format = pCreateInfo->format;
+ view->range = pCreateInfo->range;
+
+ fill_buffer_surface_state(view->surface_state.map,
+ pCreateInfo->format, view->offset, pCreateInfo->range);
+
+ *pView = (VkBufferView) view;
return VK_SUCCESS;
}
{
struct anv_device *device = (struct anv_device *) _device;
struct anv_sampler *sampler;
+ uint32_t mag_filter, min_filter, max_anisotropy;
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
static const uint32_t vk_to_gen_tex_filter[] = {
- [VK_TEX_FILTER_NEAREST] = MAPFILTER_NEAREST,
- [VK_TEX_FILTER_LINEAR] = MAPFILTER_LINEAR
+ [VK_TEX_FILTER_NEAREST] = MAPFILTER_NEAREST,
+ [VK_TEX_FILTER_LINEAR] = MAPFILTER_LINEAR
};
static const uint32_t vk_to_gen_mipmap_mode[] = {
- [VK_TEX_MIPMAP_MODE_BASE] = MIPFILTER_NONE,
- [VK_TEX_MIPMAP_MODE_NEAREST] = MIPFILTER_NEAREST,
- [VK_TEX_MIPMAP_MODE_LINEAR] = MIPFILTER_LINEAR
+ [VK_TEX_MIPMAP_MODE_BASE] = MIPFILTER_NONE,
+ [VK_TEX_MIPMAP_MODE_NEAREST] = MIPFILTER_NEAREST,
+ [VK_TEX_MIPMAP_MODE_LINEAR] = MIPFILTER_LINEAR
};
static const uint32_t vk_to_gen_tex_address[] = {
- [VK_TEX_ADDRESS_WRAP] = TCM_WRAP,
- [VK_TEX_ADDRESS_MIRROR] = TCM_MIRROR,
- [VK_TEX_ADDRESS_CLAMP] = TCM_CLAMP,
- [VK_TEX_ADDRESS_MIRROR_ONCE] = TCM_MIRROR_ONCE,
- [VK_TEX_ADDRESS_CLAMP_BORDER] = TCM_CLAMP_BORDER,
+ [VK_TEX_ADDRESS_WRAP] = TCM_WRAP,
+ [VK_TEX_ADDRESS_MIRROR] = TCM_MIRROR,
+ [VK_TEX_ADDRESS_CLAMP] = TCM_CLAMP,
+ [VK_TEX_ADDRESS_MIRROR_ONCE] = TCM_MIRROR_ONCE,
+ [VK_TEX_ADDRESS_CLAMP_BORDER] = TCM_CLAMP_BORDER,
};
static const uint32_t vk_to_gen_compare_op[] = {
- [VK_COMPARE_OP_NEVER] = PREFILTEROPNEVER,
- [VK_COMPARE_OP_LESS] = PREFILTEROPLESS,
- [VK_COMPARE_OP_EQUAL] = PREFILTEROPEQUAL,
- [VK_COMPARE_OP_LESS_EQUAL] = PREFILTEROPLEQUAL,
- [VK_COMPARE_OP_GREATER] = PREFILTEROPGREATER,
- [VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROPNOTEQUAL,
- [VK_COMPARE_OP_GREATER_EQUAL] = PREFILTEROPGEQUAL,
- [VK_COMPARE_OP_ALWAYS] = PREFILTEROPALWAYS,
+ [VK_COMPARE_OP_NEVER] = PREFILTEROPNEVER,
+ [VK_COMPARE_OP_LESS] = PREFILTEROPLESS,
+ [VK_COMPARE_OP_EQUAL] = PREFILTEROPEQUAL,
+ [VK_COMPARE_OP_LESS_EQUAL] = PREFILTEROPLEQUAL,
+ [VK_COMPARE_OP_GREATER] = PREFILTEROPGREATER,
+ [VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROPNOTEQUAL,
+ [VK_COMPARE_OP_GREATER_EQUAL] = PREFILTEROPGEQUAL,
+ [VK_COMPARE_OP_ALWAYS] = PREFILTEROPALWAYS,
};
- if (pCreateInfo->maxAnisotropy > 0)
- anv_finishme("missing support for anisotropic filtering");
-
+ if (pCreateInfo->maxAnisotropy > 1) {
+ mag_filter = MAPFILTER_ANISOTROPIC;
+ min_filter = MAPFILTER_ANISOTROPIC;
+ max_anisotropy = (pCreateInfo->maxAnisotropy - 2) / 2;
+ } else {
+ mag_filter = vk_to_gen_tex_filter[pCreateInfo->magFilter];
+ min_filter = vk_to_gen_tex_filter[pCreateInfo->minFilter];
+ max_anisotropy = RATIO21;
+ }
+
struct GEN8_SAMPLER_STATE sampler_state = {
.SamplerDisable = false,
.TextureBorderColorMode = DX10OGL,
.LODPreClampMode = 0,
- .BaseMipLevel = 0,
+ .BaseMipLevel = 0.0,
.MipModeFilter = vk_to_gen_mipmap_mode[pCreateInfo->mipMode],
- .MagModeFilter = vk_to_gen_tex_filter[pCreateInfo->magFilter],
- .MinModeFilter = vk_to_gen_tex_filter[pCreateInfo->minFilter],
+ .MagModeFilter = mag_filter,
+ .MinModeFilter = min_filter,
.TextureLODBias = pCreateInfo->mipLodBias * 256,
.AnisotropicAlgorithm = EWAApproximation,
- .MinLOD = pCreateInfo->minLod * 256,
- .MaxLOD = pCreateInfo->maxLod * 256,
+ .MinLOD = pCreateInfo->minLod,
+ .MaxLOD = pCreateInfo->maxLod,
.ChromaKeyEnable = 0,
.ChromaKeyIndex = 0,
.ChromaKeyMode = 0,
.ShadowFunction = vk_to_gen_compare_op[pCreateInfo->compareOp],
.CubeSurfaceControlMode = 0,
- .IndirectStatePointer = 0,
+
+ .IndirectStatePointer =
+ device->float_border_colors.offset +
+ pCreateInfo->borderColor * sizeof(float) * 4,
+
.LODClampMagnificationMode = MIPNONE,
- .MaximumAnisotropy = 0,
+ .MaximumAnisotropy = max_anisotropy,
.RAddressMinFilterRoundingEnable = 0,
.RAddressMagFilterRoundingEnable = 0,
.VAddressMinFilterRoundingEnable = 0,
uint32_t surface_count[VK_NUM_SHADER_STAGE] = { 0, };
uint32_t num_dynamic_buffers = 0;
uint32_t count = 0;
+ uint32_t stages = 0;
uint32_t s;
for (uint32_t i = 0; i < pCreateInfo->count; i++) {
switch (pCreateInfo->pBinding[i].descriptorType) {
case VK_DESCRIPTOR_TYPE_SAMPLER:
+ case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
for_each_bit(s, pCreateInfo->pBinding[i].stageFlags)
sampler_count[s] += pCreateInfo->pBinding[i].count;
break;
+ default:
+ break;
+ }
+ switch (pCreateInfo->pBinding[i].descriptorType) {
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
- for_each_bit(s, pCreateInfo->pBinding[i].stageFlags)
- sampler_count[s] += pCreateInfo->pBinding[i].count;
-
- /* fall through */
-
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
break;
}
- count += pCreateInfo->pBinding[i].count;
- }
-
- for (uint32_t i = 0; i < pCreateInfo->count; i++) {
switch (pCreateInfo->pBinding[i].descriptorType) {
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
- num_dynamic_buffers++;
+ num_dynamic_buffers += pCreateInfo->pBinding[i].count;
break;
default:
break;
}
+
+ stages |= pCreateInfo->pBinding[i].stageFlags;
+ count += pCreateInfo->pBinding[i].count;
}
uint32_t sampler_total = 0;
}
size_t size = sizeof(*set_layout) +
- (sampler_total + surface_total) * sizeof(uint32_t);
+ (sampler_total + surface_total) * sizeof(set_layout->entries[0]);
set_layout = anv_device_alloc(device, size, 8,
VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
if (!set_layout)
set_layout->num_dynamic_buffers = num_dynamic_buffers;
set_layout->count = count;
+ set_layout->shader_stages = stages;
- uint32_t *p = set_layout->entries;
- uint32_t *sampler[VK_NUM_SHADER_STAGE];
- uint32_t *surface[VK_NUM_SHADER_STAGE];
+ struct anv_descriptor_slot *p = set_layout->entries;
+ struct anv_descriptor_slot *sampler[VK_NUM_SHADER_STAGE];
+ struct anv_descriptor_slot *surface[VK_NUM_SHADER_STAGE];
for (uint32_t s = 0; s < VK_NUM_SHADER_STAGE; s++) {
set_layout->stage[s].surface_count = surface_count[s];
set_layout->stage[s].surface_start = surface[s] = p;
}
uint32_t descriptor = 0;
+ int8_t dynamic_slot = 0;
+ bool is_dynamic;
for (uint32_t i = 0; i < pCreateInfo->count; i++) {
switch (pCreateInfo->pBinding[i].descriptorType) {
case VK_DESCRIPTOR_TYPE_SAMPLER:
- for_each_bit(s, pCreateInfo->pBinding[i].stageFlags)
- for (uint32_t j = 0; j < pCreateInfo->pBinding[i].count; j++)
- *(sampler[s])++ = descriptor + j;
- break;
-
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
for_each_bit(s, pCreateInfo->pBinding[i].stageFlags)
- for (uint32_t j = 0; j < pCreateInfo->pBinding[i].count; j++)
- *(sampler[s])++ = descriptor + j;
+ for (uint32_t j = 0; j < pCreateInfo->pBinding[i].count; j++) {
+ sampler[s]->index = descriptor + j;
+ sampler[s]->dynamic_slot = -1;
+ sampler[s]++;
+ }
+ break;
+ default:
+ break;
+ }
- /* fallthrough */
+ switch (pCreateInfo->pBinding[i].descriptorType) {
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
+ is_dynamic = true;
+ break;
+ default:
+ is_dynamic = false;
+ break;
+ }
+ switch (pCreateInfo->pBinding[i].descriptorType) {
+ case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
for_each_bit(s, pCreateInfo->pBinding[i].stageFlags)
for (uint32_t j = 0; j < pCreateInfo->pBinding[i].count; j++) {
- *(surface[s])++ = descriptor + j;
+ surface[s]->index = descriptor + j;
+ if (is_dynamic)
+ surface[s]->dynamic_slot = dynamic_slot + j;
+ else
+ surface[s]->dynamic_slot = -1;
+ surface[s]++;
}
break;
default:
- unreachable("");
+ break;
}
+
+ if (is_dynamic)
+ dynamic_slot += pCreateInfo->pBinding[i].count;
+
descriptor += pCreateInfo->pBinding[i].count;
}
/* Is this what we need to do? */
.StencilBufferWriteEnable = pCreateInfo->stencilWriteMask != 0,
- .StencilTestMask = pCreateInfo->stencilReadMask,
- .StencilWriteMask = pCreateInfo->stencilWriteMask,
+ .StencilTestMask = pCreateInfo->stencilReadMask & 0xff,
+ .StencilWriteMask = pCreateInfo->stencilWriteMask & 0xff,
- .BackfaceStencilTestMask = pCreateInfo->stencilReadMask,
- .BackfaceStencilWriteMask = pCreateInfo->stencilWriteMask,
+ .BackfaceStencilTestMask = pCreateInfo->stencilReadMask & 0xff,
+ .BackfaceStencilWriteMask = pCreateInfo->stencilWriteMask & 0xff,
};
GEN8_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, state->state_wm_depth_stencil,
assert(obj_type == VK_OBJECT_TYPE_COMMAND_BUFFER);
- anv_gem_munmap(cmd_buffer->surface_bo.map, BATCH_SIZE);
- anv_gem_close(device, cmd_buffer->surface_bo.gem_handle);
+ /* Destroy all of the batch buffers */
+ struct anv_batch_bo *bbo = cmd_buffer->last_batch_bo;
+ while (bbo) {
+ struct anv_batch_bo *prev = bbo->prev_batch_bo;
+ anv_batch_bo_destroy(bbo, device);
+ bbo = prev;
+ }
+ anv_reloc_list_finish(&cmd_buffer->batch.relocs, device);
+
+ /* Destroy all of the surface state buffers */
+ bbo = cmd_buffer->surface_batch_bo;
+ while (bbo) {
+ struct anv_batch_bo *prev = bbo->prev_batch_bo;
+ anv_batch_bo_destroy(bbo, device);
+ bbo = prev;
+ }
+ anv_reloc_list_finish(&cmd_buffer->surface_relocs, device);
+
anv_state_stream_finish(&cmd_buffer->surface_state_stream);
anv_state_stream_finish(&cmd_buffer->dynamic_state_stream);
- anv_state_stream_finish(&cmd_buffer->binding_table_state_stream);
- anv_batch_finish(&cmd_buffer->batch, device);
anv_device_free(device, cmd_buffer->exec2_objects);
anv_device_free(device, cmd_buffer->exec2_bos);
anv_device_free(device, cmd_buffer);
}
+static VkResult
+anv_cmd_buffer_chain_batch(struct anv_batch *batch, void *_data)
+{
+ struct anv_cmd_buffer *cmd_buffer = _data;
+
+ struct anv_batch_bo *new_bbo, *old_bbo = cmd_buffer->last_batch_bo;
+
+ VkResult result = anv_batch_bo_create(cmd_buffer->device, &new_bbo);
+ if (result != VK_SUCCESS)
+ return result;
+
+ /* We set the end of the batch a little short so we would be sure we
+ * have room for the chaining command. Since we're about to emit the
+ * chaining command, let's set it back where it should go.
+ */
+ batch->end += GEN8_MI_BATCH_BUFFER_START_length * 4;
+ assert(batch->end == old_bbo->bo.map + old_bbo->bo.size);
+
+ anv_batch_emit(batch, GEN8_MI_BATCH_BUFFER_START,
+ GEN8_MI_BATCH_BUFFER_START_header,
+ ._2ndLevelBatchBuffer = _1stlevelbatch,
+ .AddressSpaceIndicator = ASI_PPGTT,
+ .BatchBufferStartAddress = { &new_bbo->bo, 0 },
+ );
+
+ /* Pad out to a 2-dword aligned boundary with zeros */
+ if ((uintptr_t)batch->next % 8 != 0) {
+ *(uint32_t *)batch->next = 0;
+ batch->next += 4;
+ }
+
+ anv_batch_bo_finish(cmd_buffer->last_batch_bo, batch);
+
+ new_bbo->prev_batch_bo = old_bbo;
+ cmd_buffer->last_batch_bo = new_bbo;
+
+ anv_batch_bo_start(new_bbo, batch, GEN8_MI_BATCH_BUFFER_START_length * 4);
+
+ return VK_SUCCESS;
+}
+
VkResult anv_CreateCommandBuffer(
VkDevice _device,
const VkCmdBufferCreateInfo* pCreateInfo,
cmd_buffer->device = device;
cmd_buffer->rs_state = NULL;
cmd_buffer->vp_state = NULL;
- memset(&cmd_buffer->default_bindings, 0, sizeof(cmd_buffer->default_bindings));
- cmd_buffer->bindings = &cmd_buffer->default_bindings;
+ cmd_buffer->cb_state = NULL;
+ cmd_buffer->ds_state = NULL;
+ memset(&cmd_buffer->descriptors, 0, sizeof(cmd_buffer->descriptors));
- result = anv_batch_init(&cmd_buffer->batch, device);
+ result = anv_batch_bo_create(device, &cmd_buffer->last_batch_bo);
if (result != VK_SUCCESS)
goto fail;
- result = anv_bo_init_new(&cmd_buffer->surface_bo, device, BATCH_SIZE);
+ result = anv_reloc_list_init(&cmd_buffer->batch.relocs, device);
if (result != VK_SUCCESS)
- goto fail_batch;
+ goto fail_batch_bo;
- cmd_buffer->surface_bo.map =
- anv_gem_mmap(device, cmd_buffer->surface_bo.gem_handle, 0, BATCH_SIZE);
- if (cmd_buffer->surface_bo.map == NULL) {
- result = vk_error(VK_ERROR_MEMORY_MAP_FAILED);
- goto fail_surface_bo;
- }
+ cmd_buffer->batch.device = device;
+ cmd_buffer->batch.extend_cb = anv_cmd_buffer_chain_batch;
+ cmd_buffer->batch.user_data = cmd_buffer;
+
+ anv_batch_bo_start(cmd_buffer->last_batch_bo, &cmd_buffer->batch,
+ GEN8_MI_BATCH_BUFFER_START_length * 4);
+
+ result = anv_batch_bo_create(device, &cmd_buffer->surface_batch_bo);
+ if (result != VK_SUCCESS)
+ goto fail_batch_relocs;
+ cmd_buffer->surface_batch_bo->first_reloc = 0;
+
+ result = anv_reloc_list_init(&cmd_buffer->surface_relocs, device);
+ if (result != VK_SUCCESS)
+ goto fail_ss_batch_bo;
/* Start surface_next at 1 so surface offset 0 is invalid. */
cmd_buffer->surface_next = 1;
- cmd_buffer->surface_relocs.num_relocs = 0;
- cmd_buffer->exec2_objects =
- anv_device_alloc(device, 8192 * sizeof(cmd_buffer->exec2_objects[0]), 8,
- VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
- if (cmd_buffer->exec2_objects == NULL) {
- result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
- goto fail_surface_map;
- }
+ cmd_buffer->exec2_objects = NULL;
+ cmd_buffer->exec2_bos = NULL;
+ cmd_buffer->exec2_array_length = 0;
- cmd_buffer->exec2_bos =
- anv_device_alloc(device, 8192 * sizeof(cmd_buffer->exec2_bos[0]), 8,
- VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
- if (cmd_buffer->exec2_bos == NULL) {
- result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
- goto fail_exec2_objects;
- }
-
- anv_state_stream_init(&cmd_buffer->binding_table_state_stream,
- &device->binding_table_block_pool);
anv_state_stream_init(&cmd_buffer->surface_state_stream,
&device->surface_state_block_pool);
anv_state_stream_init(&cmd_buffer->dynamic_state_stream,
cmd_buffer->dirty = 0;
cmd_buffer->vb_dirty = 0;
+ cmd_buffer->descriptors_dirty = 0;
cmd_buffer->pipeline = NULL;
+ cmd_buffer->vp_state = NULL;
+ cmd_buffer->rs_state = NULL;
+ cmd_buffer->ds_state = NULL;
*pCmdBuffer = (VkCmdBuffer) cmd_buffer;
return VK_SUCCESS;
- fail_exec2_objects:
- anv_device_free(device, cmd_buffer->exec2_objects);
- fail_surface_map:
- anv_gem_munmap(cmd_buffer->surface_bo.map, BATCH_SIZE);
- fail_surface_bo:
- anv_gem_close(device, cmd_buffer->surface_bo.gem_handle);
- fail_batch:
- anv_batch_finish(&cmd_buffer->batch, device);
+ fail_ss_batch_bo:
+ anv_batch_bo_destroy(cmd_buffer->surface_batch_bo, device);
+ fail_batch_relocs:
+ anv_reloc_list_finish(&cmd_buffer->batch.relocs, device);
+ fail_batch_bo:
+ anv_batch_bo_destroy(cmd_buffer->last_batch_bo, device);
fail:
anv_device_free(device, cmd_buffer);
return result;
}
-VkResult anv_BeginCommandBuffer(
- VkCmdBuffer cmdBuffer,
- const VkCmdBufferBeginInfo* pBeginInfo)
+static void
+anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer)
{
- struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
struct anv_device *device = cmd_buffer->device;
+ struct anv_bo *scratch_bo = NULL;
- anv_batch_emit(&cmd_buffer->batch, GEN8_PIPELINE_SELECT,
- .PipelineSelection = _3D);
- anv_batch_emit(&cmd_buffer->batch, GEN8_STATE_SIP);
+ cmd_buffer->scratch_size = device->scratch_block_pool.size;
+ if (cmd_buffer->scratch_size > 0)
+ scratch_bo = &device->scratch_block_pool.bo;
anv_batch_emit(&cmd_buffer->batch, GEN8_STATE_BASE_ADDRESS,
- .GeneralStateBaseAddress = { NULL, 0 },
+ .GeneralStateBaseAddress = { scratch_bo, 0 },
.GeneralStateMemoryObjectControlState = GEN8_MOCS,
.GeneralStateBaseAddressModifyEnable = true,
.GeneralStateBufferSize = 0xfffff,
.GeneralStateBufferSizeModifyEnable = true,
- .SurfaceStateBaseAddress = { &cmd_buffer->surface_bo, 0 },
+ .SurfaceStateBaseAddress = { &cmd_buffer->surface_batch_bo->bo, 0 },
.SurfaceStateMemoryObjectControlState = GEN8_MOCS,
.SurfaceStateBaseAddressModifyEnable = true,
.IndirectObjectBaseAddressModifyEnable = true,
.IndirectObjectBufferSize = 0xfffff,
.IndirectObjectBufferSizeModifyEnable = true,
-
+
.InstructionBaseAddress = { &device->instruction_block_pool.bo, 0 },
.InstructionMemoryObjectControlState = GEN8_MOCS,
.InstructionBaseAddressModifyEnable = true,
.InstructionBufferSize = 0xfffff,
.InstructionBuffersizeModifyEnable = true);
+}
+
+VkResult anv_BeginCommandBuffer(
+ VkCmdBuffer cmdBuffer,
+ const VkCmdBufferBeginInfo* pBeginInfo)
+{
+ struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
- anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VF_STATISTICS,
- .StatisticsEnable = true);
- anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_HS, .Enable = false);
- anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_TE, .TEEnable = false);
- anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DS, .FunctionEnable = false);
- anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_STREAMOUT, .SOFunctionEnable = false);
-
- anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_VS,
- .ConstantBufferOffset = 0,
- .ConstantBufferSize = 4);
- anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_GS,
- .ConstantBufferOffset = 4,
- .ConstantBufferSize = 4);
- anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_PS,
- .ConstantBufferOffset = 8,
- .ConstantBufferSize = 4);
-
- anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_WM_CHROMAKEY,
- .ChromaKeyKillEnable = false);
- anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_SBE_SWIZ);
- anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_AA_LINE_PARAMETERS);
+ anv_cmd_buffer_emit_state_base_address(cmd_buffer);
+ cmd_buffer->current_pipeline = UINT32_MAX;
return VK_SUCCESS;
}
-static void
+static VkResult
anv_cmd_buffer_add_bo(struct anv_cmd_buffer *cmd_buffer,
- struct anv_bo *bo, struct anv_reloc_list *list)
+ struct anv_bo *bo,
+ struct drm_i915_gem_relocation_entry *relocs,
+ size_t num_relocs)
{
struct drm_i915_gem_exec_object2 *obj;
- bo->index = cmd_buffer->bo_count;
+ if (bo->index < cmd_buffer->bo_count &&
+ cmd_buffer->exec2_bos[bo->index] == bo)
+ return VK_SUCCESS;
+
+ if (cmd_buffer->bo_count >= cmd_buffer->exec2_array_length) {
+ uint32_t new_len = cmd_buffer->exec2_objects ?
+ cmd_buffer->exec2_array_length * 2 : 64;
+
+ struct drm_i915_gem_exec_object2 *new_objects =
+ anv_device_alloc(cmd_buffer->device, new_len * sizeof(*new_objects),
+ 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL);
+ if (new_objects == NULL)
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ struct anv_bo **new_bos =
+ anv_device_alloc(cmd_buffer->device, new_len * sizeof(*new_bos),
+ 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL);
+ if (new_objects == NULL) {
+ anv_device_free(cmd_buffer->device, new_objects);
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+ }
+
+ if (cmd_buffer->exec2_objects) {
+ memcpy(new_objects, cmd_buffer->exec2_objects,
+ cmd_buffer->bo_count * sizeof(*new_objects));
+ memcpy(new_bos, cmd_buffer->exec2_bos,
+ cmd_buffer->bo_count * sizeof(*new_bos));
+ }
+
+ cmd_buffer->exec2_objects = new_objects;
+ cmd_buffer->exec2_bos = new_bos;
+ cmd_buffer->exec2_array_length = new_len;
+ }
+
+ assert(cmd_buffer->bo_count < cmd_buffer->exec2_array_length);
+
+ bo->index = cmd_buffer->bo_count++;
obj = &cmd_buffer->exec2_objects[bo->index];
cmd_buffer->exec2_bos[bo->index] = bo;
- cmd_buffer->bo_count++;
obj->handle = bo->gem_handle;
obj->relocation_count = 0;
obj->rsvd1 = 0;
obj->rsvd2 = 0;
- if (list) {
- obj->relocation_count = list->num_relocs;
- obj->relocs_ptr = (uintptr_t) list->relocs;
+ if (relocs) {
+ obj->relocation_count = num_relocs;
+ obj->relocs_ptr = (uintptr_t) relocs;
}
+
+ return VK_SUCCESS;
}
static void
anv_cmd_buffer_add_validate_bos(struct anv_cmd_buffer *cmd_buffer,
struct anv_reloc_list *list)
{
- struct anv_bo *bo, *batch_bo;
-
- batch_bo = &cmd_buffer->batch.bo;
- for (size_t i = 0; i < list->num_relocs; i++) {
- bo = list->reloc_bos[i];
- /* Skip any relocations targeting the batch bo. We need to make sure
- * it's the last in the list so we'll add it manually later.
- */
- if (bo == batch_bo)
- continue;
- if (bo->index < cmd_buffer->bo_count && cmd_buffer->exec2_bos[bo->index] == bo)
- continue;
-
- anv_cmd_buffer_add_bo(cmd_buffer, bo, NULL);
- }
+ for (size_t i = 0; i < list->num_relocs; i++)
+ anv_cmd_buffer_add_bo(cmd_buffer, list->reloc_bos[i], NULL, 0);
}
static void
anv_batch_emit(batch, GEN8_MI_BATCH_BUFFER_END);
/* Round batch up to an even number of dwords. */
- if ((batch->next - batch->bo.map) & 4)
+ if ((batch->next - batch->start) & 4)
anv_batch_emit(batch, GEN8_MI_NOOP);
+ anv_batch_bo_finish(cmd_buffer->last_batch_bo, &cmd_buffer->batch);
+ cmd_buffer->surface_batch_bo->num_relocs =
+ cmd_buffer->surface_relocs.num_relocs - cmd_buffer->surface_batch_bo->first_reloc;
+ cmd_buffer->surface_batch_bo->length = cmd_buffer->surface_next;
+
cmd_buffer->bo_count = 0;
cmd_buffer->need_reloc = false;
/* Lock for access to bo->index. */
pthread_mutex_lock(&device->mutex);
- /* Add block pool bos first so we can add them with their relocs. */
- anv_cmd_buffer_add_bo(cmd_buffer, &cmd_buffer->surface_bo,
- &cmd_buffer->surface_relocs);
+ /* Add surface state bos first so we can add them with their relocs. */
+ for (struct anv_batch_bo *bbo = cmd_buffer->surface_batch_bo;
+ bbo != NULL; bbo = bbo->prev_batch_bo) {
+ anv_cmd_buffer_add_bo(cmd_buffer, &bbo->bo,
+ &cmd_buffer->surface_relocs.relocs[bbo->first_reloc],
+ bbo->num_relocs);
+ }
+ /* Add all of the BOs referenced by surface state */
anv_cmd_buffer_add_validate_bos(cmd_buffer, &cmd_buffer->surface_relocs);
- anv_cmd_buffer_add_validate_bos(cmd_buffer, &batch->cmd_relocs);
- anv_cmd_buffer_add_bo(cmd_buffer, &batch->bo, &batch->cmd_relocs);
+
+ /* Add all but the first batch BO */
+ struct anv_batch_bo *batch_bo = cmd_buffer->last_batch_bo;
+ while (batch_bo->prev_batch_bo) {
+ anv_cmd_buffer_add_bo(cmd_buffer, &batch_bo->bo,
+ &batch->relocs.relocs[batch_bo->first_reloc],
+ batch_bo->num_relocs);
+ batch_bo = batch_bo->prev_batch_bo;
+ }
+
+ /* Add everything referenced by the batches */
+ anv_cmd_buffer_add_validate_bos(cmd_buffer, &batch->relocs);
+
+ /* Add the first batch bo last */
+ assert(batch_bo->prev_batch_bo == NULL && batch_bo->first_reloc == 0);
+ anv_cmd_buffer_add_bo(cmd_buffer, &batch_bo->bo,
+ &batch->relocs.relocs[batch_bo->first_reloc],
+ batch_bo->num_relocs);
+ assert(batch_bo->bo.index == cmd_buffer->bo_count - 1);
+
anv_cmd_buffer_process_relocs(cmd_buffer, &cmd_buffer->surface_relocs);
- anv_cmd_buffer_process_relocs(cmd_buffer, &batch->cmd_relocs);
+ anv_cmd_buffer_process_relocs(cmd_buffer, &batch->relocs);
cmd_buffer->execbuf.buffers_ptr = (uintptr_t) cmd_buffer->exec2_objects;
cmd_buffer->execbuf.buffer_count = cmd_buffer->bo_count;
cmd_buffer->execbuf.batch_start_offset = 0;
- cmd_buffer->execbuf.batch_len = batch->next - batch->bo.map;
+ cmd_buffer->execbuf.batch_len = batch->next - batch->start;
cmd_buffer->execbuf.cliprects_ptr = 0;
cmd_buffer->execbuf.num_cliprects = 0;
cmd_buffer->execbuf.DR1 = 0;
{
struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
- anv_batch_reset(&cmd_buffer->batch);
- cmd_buffer->surface_next = 0;
+ /* Delete all but the first batch bo */
+ while (cmd_buffer->last_batch_bo->prev_batch_bo) {
+ struct anv_batch_bo *prev = cmd_buffer->last_batch_bo->prev_batch_bo;
+ anv_batch_bo_destroy(cmd_buffer->last_batch_bo, cmd_buffer->device);
+ cmd_buffer->last_batch_bo = prev;
+ }
+ assert(cmd_buffer->last_batch_bo->prev_batch_bo == NULL);
+
+ cmd_buffer->batch.relocs.num_relocs = 0;
+ anv_batch_bo_start(cmd_buffer->last_batch_bo, &cmd_buffer->batch,
+ GEN8_MI_BATCH_BUFFER_START_length * 4);
+
+ /* Delete all but the first batch bo */
+ while (cmd_buffer->surface_batch_bo->prev_batch_bo) {
+ struct anv_batch_bo *prev = cmd_buffer->surface_batch_bo->prev_batch_bo;
+ anv_batch_bo_destroy(cmd_buffer->surface_batch_bo, cmd_buffer->device);
+ cmd_buffer->surface_batch_bo = prev;
+ }
+ assert(cmd_buffer->surface_batch_bo->prev_batch_bo == NULL);
+
+ cmd_buffer->surface_next = 1;
cmd_buffer->surface_relocs.num_relocs = 0;
+ cmd_buffer->rs_state = NULL;
+ cmd_buffer->vp_state = NULL;
+ cmd_buffer->cb_state = NULL;
+ cmd_buffer->ds_state = NULL;
+
return VK_SUCCESS;
}
struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
struct anv_pipeline *pipeline = (struct anv_pipeline *) _pipeline;
- cmd_buffer->pipeline = pipeline;
- cmd_buffer->vb_dirty |= pipeline->vb_used;
- cmd_buffer->dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY;
+ switch (pipelineBindPoint) {
+ case VK_PIPELINE_BIND_POINT_COMPUTE:
+ cmd_buffer->compute_pipeline = pipeline;
+ cmd_buffer->compute_dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY;
+ break;
+
+ case VK_PIPELINE_BIND_POINT_GRAPHICS:
+ cmd_buffer->pipeline = pipeline;
+ cmd_buffer->vb_dirty |= pipeline->vb_used;
+ cmd_buffer->dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY;
+ break;
+
+ default:
+ assert(!"invalid bind point");
+ break;
+ }
}
void anv_CmdBindDynamicStateObject(
VkDynamicStateObject dynamicState)
{
struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
- struct anv_dynamic_vp_state *vp_state;
switch (stateBindPoint) {
case VK_STATE_BIND_POINT_VIEWPORT:
- vp_state = (struct anv_dynamic_vp_state *) dynamicState;
- /* We emit state immediately, but set cmd_buffer->vp_state to indicate
- * that vp state has been set in this command buffer. */
- cmd_buffer->vp_state = vp_state;
- anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_SCISSOR_STATE_POINTERS,
- .ScissorRectPointer = vp_state->scissor.offset);
- anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC,
- .CCViewportPointer = vp_state->cc_vp.offset);
- anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP,
- .SFClipViewportPointer = vp_state->sf_clip_vp.offset);
+ cmd_buffer->vp_state = (struct anv_dynamic_vp_state *) dynamicState;
+ cmd_buffer->dirty |= ANV_CMD_BUFFER_VP_DIRTY;
break;
case VK_STATE_BIND_POINT_RASTER:
cmd_buffer->rs_state = (struct anv_dynamic_rs_state *) dynamicState;
cmd_buffer->dirty |= ANV_CMD_BUFFER_RS_DIRTY;
break;
case VK_STATE_BIND_POINT_COLOR_BLEND:
+ cmd_buffer->cb_state = (struct anv_dynamic_cb_state *) dynamicState;
+ cmd_buffer->dirty |= ANV_CMD_BUFFER_CB_DIRTY;
break;
case VK_STATE_BIND_POINT_DEPTH_STENCIL:
cmd_buffer->ds_state = (struct anv_dynamic_ds_state *) dynamicState;
{
struct anv_state state;
- state.offset = ALIGN_U32(cmd_buffer->surface_next, alignment);
- state.map = cmd_buffer->surface_bo.map + state.offset;
+ state.offset = align_u32(cmd_buffer->surface_next, alignment);
+ if (state.offset + size > cmd_buffer->surface_batch_bo->bo.size)
+ return (struct anv_state) { 0 };
+
+ state.map = cmd_buffer->surface_batch_bo->bo.map + state.offset;
state.alloc_size = size;
cmd_buffer->surface_next = state.offset + size;
- assert(state.offset + size < cmd_buffer->surface_bo.size);
+ assert(state.offset + size <= cmd_buffer->surface_batch_bo->bo.size);
return state;
}
+static VkResult
+anv_cmd_buffer_new_surface_state_bo(struct anv_cmd_buffer *cmd_buffer)
+{
+ struct anv_batch_bo *new_bbo, *old_bbo = cmd_buffer->surface_batch_bo;
+
+ /* Finish off the old buffer */
+ old_bbo->num_relocs =
+ cmd_buffer->surface_relocs.num_relocs - old_bbo->first_reloc;
+ old_bbo->length = cmd_buffer->surface_next;
+
+ VkResult result = anv_batch_bo_create(cmd_buffer->device, &new_bbo);
+ if (result != VK_SUCCESS)
+ return result;
+
+ new_bbo->first_reloc = cmd_buffer->surface_relocs.num_relocs;
+ cmd_buffer->surface_next = 1;
+
+ new_bbo->prev_batch_bo = old_bbo;
+ cmd_buffer->surface_batch_bo = new_bbo;
+
+ /* Re-emit state base addresses so we get the new surface state base
+ * address before we start emitting binding tables etc.
+ */
+ anv_cmd_buffer_emit_state_base_address(cmd_buffer);
+
+ /* It seems like just changing the state base addresses isn't enough.
+ * Invalidating the cache seems to be enough to cause things to
+ * propagate. However, I'm not 100% sure what we're supposed to do.
+ */
+ anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL,
+ .TextureCacheInvalidationEnable = true);
+
+ return VK_SUCCESS;
+}
+
void anv_CmdBindDescriptorSets(
VkCmdBuffer cmdBuffer,
VkPipelineBindPoint pipelineBindPoint,
const uint32_t* pDynamicOffsets)
{
struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
- struct anv_pipeline_layout *layout = cmd_buffer->pipeline->layout;
- struct anv_bindings *bindings = cmd_buffer->bindings;
+ struct anv_pipeline_layout *layout;
+ struct anv_descriptor_set *set;
+ struct anv_descriptor_set_layout *set_layout;
- uint32_t offset = 0;
- for (uint32_t i = 0; i < setCount; i++) {
- struct anv_descriptor_set *set =
- (struct anv_descriptor_set *) pDescriptorSets[i];
- struct anv_descriptor_set_layout *set_layout = layout->set[firstSet + i].layout;
-
- for (uint32_t s = 0; s < VK_NUM_SHADER_STAGE; s++) {
- uint32_t *surface_to_desc = set_layout->stage[s].surface_start;
- uint32_t *sampler_to_desc = set_layout->stage[s].sampler_start;
- uint32_t bias = s == VK_SHADER_STAGE_FRAGMENT ? MAX_RTS : 0;
- uint32_t start;
-
- start = bias + layout->set[firstSet + i].surface_start[s];
- for (uint32_t b = 0; b < set_layout->stage[s].surface_count; b++) {
- struct anv_surface_view *view = set->descriptors[surface_to_desc[b]].view;
- if (!view)
- continue;
-
- struct anv_state state =
- anv_cmd_buffer_alloc_surface_state(cmd_buffer, 64, 64);
- memcpy(state.map, view->surface_state.map, 64);
+ assert(firstSet + setCount < MAX_SETS);
- /* The address goes in dwords 8 and 9 of the SURFACE_STATE */
- *(uint64_t *)(state.map + 8 * 4) =
- anv_reloc_list_add(&cmd_buffer->surface_relocs,
- state.offset + 8 * 4,
- view->bo, view->offset);
+ if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS)
+ layout = cmd_buffer->pipeline->layout;
+ else
+ layout = cmd_buffer->compute_pipeline->layout;
- bindings->descriptors[s].surfaces[start + b] = state.offset;
- }
+ uint32_t dynamic_slot = 0;
+ for (uint32_t i = 0; i < setCount; i++) {
+ set = (struct anv_descriptor_set *) pDescriptorSets[i];
+ set_layout = layout->set[firstSet + i].layout;
- start = layout->set[firstSet + i].sampler_start[s];
- for (uint32_t b = 0; b < set_layout->stage[s].sampler_count; b++) {
- struct anv_sampler *sampler = set->descriptors[sampler_to_desc[b]].sampler;
- if (!sampler)
- continue;
+ cmd_buffer->descriptors[firstSet + i].set = set;
- memcpy(&bindings->descriptors[s].samplers[start + b],
- sampler->state, sizeof(sampler->state));
- }
- }
+ assert(set_layout->num_dynamic_buffers <
+ ARRAY_SIZE(cmd_buffer->descriptors[0].dynamic_offsets));
+ memcpy(cmd_buffer->descriptors[firstSet + i].dynamic_offsets,
+ pDynamicOffsets + dynamic_slot,
+ set_layout->num_dynamic_buffers * sizeof(*pDynamicOffsets));
- offset += layout->set[firstSet + i].layout->num_dynamic_buffers;
- }
+ cmd_buffer->descriptors_dirty |= set_layout->shader_stages;
- cmd_buffer->dirty |= ANV_CMD_BUFFER_DESCRIPTOR_SET_DIRTY;
+ dynamic_slot += set_layout->num_dynamic_buffers;
+ }
}
void anv_CmdBindIndexBuffer(
struct anv_buffer *buffer = (struct anv_buffer *) _buffer;
static const uint32_t vk_to_gen_index_type[] = {
- [VK_INDEX_TYPE_UINT8] = INDEX_BYTE,
- [VK_INDEX_TYPE_UINT16] = INDEX_WORD,
- [VK_INDEX_TYPE_UINT32] = INDEX_DWORD,
+ [VK_INDEX_TYPE_UINT8] = INDEX_BYTE,
+ [VK_INDEX_TYPE_UINT16] = INDEX_WORD,
+ [VK_INDEX_TYPE_UINT32] = INDEX_DWORD,
};
anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_INDEX_BUFFER,
const VkDeviceSize* pOffsets)
{
struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
- struct anv_bindings *bindings = cmd_buffer->bindings;
+ struct anv_vertex_binding *vb = cmd_buffer->vertex_bindings;
/* We have to defer setting up vertex buffer since we need the buffer
* stride from the pipeline. */
+ assert(startBinding + bindingCount < MAX_VBS);
for (uint32_t i = 0; i < bindingCount; i++) {
- bindings->vb[startBinding + i].buffer = (struct anv_buffer *) pBuffers[i];
- bindings->vb[startBinding + i].offset = pOffsets[i];
+ vb[startBinding + i].buffer = (struct anv_buffer *) pBuffers[i];
+ vb[startBinding + i].offset = pOffsets[i];
cmd_buffer->vb_dirty |= 1 << (startBinding + i);
}
}
-static void
-flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer)
+static VkResult
+cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
+ unsigned stage, struct anv_state *bt_state)
{
- struct anv_pipeline_layout *layout = cmd_buffer->pipeline->layout;
- struct anv_bindings *bindings = cmd_buffer->bindings;
- uint32_t layers = cmd_buffer->framebuffer->layers;
+ struct anv_pipeline_layout *layout;
+ uint32_t color_attachments, bias, size;
- for (uint32_t s = 0; s < VK_NUM_SHADER_STAGE; s++) {
- uint32_t bias;
+ if (stage == VK_SHADER_STAGE_COMPUTE)
+ layout = cmd_buffer->compute_pipeline->layout;
+ else
+ layout = cmd_buffer->pipeline->layout;
- if (s == VK_SHADER_STAGE_FRAGMENT) {
- bias = MAX_RTS;
- layers = cmd_buffer->framebuffer->layers;
- } else {
- bias = 0;
- layers = 0;
+ if (stage == VK_SHADER_STAGE_FRAGMENT) {
+ bias = MAX_RTS;
+ color_attachments = cmd_buffer->framebuffer->color_attachment_count;
+ } else {
+ bias = 0;
+ color_attachments = 0;
+ }
+
+ /* This is a little awkward: layout can be NULL but we still have to
+ * allocate and set a binding table for the PS stage for render
+ * targets. */
+ uint32_t surface_count = layout ? layout->stage[stage].surface_count : 0;
+
+ if (color_attachments + surface_count == 0)
+ return VK_SUCCESS;
+
+ size = (bias + surface_count) * sizeof(uint32_t);
+ *bt_state = anv_cmd_buffer_alloc_surface_state(cmd_buffer, size, 32);
+ uint32_t *bt_map = bt_state->map;
+
+ if (bt_state->map == NULL)
+ return VK_ERROR_OUT_OF_DEVICE_MEMORY;
+
+ for (uint32_t ca = 0; ca < color_attachments; ca++) {
+ const struct anv_surface_view *view =
+ cmd_buffer->framebuffer->color_attachments[ca];
+
+ struct anv_state state =
+ anv_cmd_buffer_alloc_surface_state(cmd_buffer, 64, 64);
+
+ if (state.map == NULL)
+ return VK_ERROR_OUT_OF_DEVICE_MEMORY;
+
+ memcpy(state.map, view->surface_state.map, 64);
+
+ /* The address goes in dwords 8 and 9 of the SURFACE_STATE */
+ *(uint64_t *)(state.map + 8 * 4) =
+ anv_reloc_list_add(&cmd_buffer->surface_relocs,
+ cmd_buffer->device,
+ state.offset + 8 * 4,
+ view->bo, view->offset);
+
+ bt_map[ca] = state.offset;
+ }
+
+ if (layout == NULL)
+ return VK_SUCCESS;
+
+ for (uint32_t set = 0; set < layout->num_sets; set++) {
+ struct anv_descriptor_set_binding *d = &cmd_buffer->descriptors[set];
+ struct anv_descriptor_set_layout *set_layout = layout->set[set].layout;
+ struct anv_descriptor_slot *surface_slots =
+ set_layout->stage[stage].surface_start;
+
+ uint32_t start = bias + layout->set[set].surface_start[stage];
+
+ for (uint32_t b = 0; b < set_layout->stage[stage].surface_count; b++) {
+ struct anv_surface_view *view =
+ d->set->descriptors[surface_slots[b].index].view;
+
+ if (!view)
+ continue;
+
+ struct anv_state state =
+ anv_cmd_buffer_alloc_surface_state(cmd_buffer, 64, 64);
+
+ if (state.map == NULL)
+ return VK_ERROR_OUT_OF_DEVICE_MEMORY;
+
+ uint32_t offset;
+ if (surface_slots[b].dynamic_slot >= 0) {
+ uint32_t dynamic_offset =
+ d->dynamic_offsets[surface_slots[b].dynamic_slot];
+
+ offset = view->offset + dynamic_offset;
+ fill_buffer_surface_state(state.map, view->format, offset,
+ view->range - dynamic_offset);
+ } else {
+ offset = view->offset;
+ memcpy(state.map, view->surface_state.map, 64);
+ }
+
+ /* The address goes in dwords 8 and 9 of the SURFACE_STATE */
+ *(uint64_t *)(state.map + 8 * 4) =
+ anv_reloc_list_add(&cmd_buffer->surface_relocs,
+ cmd_buffer->device,
+ state.offset + 8 * 4,
+ view->bo, offset);
+
+ bt_map[start + b] = state.offset;
}
+ }
- /* This is a little awkward: layout can be NULL but we still have to
- * allocate and set a binding table for the PS stage for render
- * targets. */
- uint32_t surface_count = layout ? layout->stage[s].surface_count : 0;
-
- if (layers + surface_count > 0) {
- struct anv_state state;
- uint32_t size;
-
- size = (bias + surface_count) * sizeof(uint32_t);
- state = anv_cmd_buffer_alloc_surface_state(cmd_buffer, size, 32);
- memcpy(state.map, bindings->descriptors[s].surfaces, size);
-
- static const uint32_t binding_table_opcodes[] = {
- [VK_SHADER_STAGE_VERTEX] = 38,
- [VK_SHADER_STAGE_TESS_CONTROL] = 39,
- [VK_SHADER_STAGE_TESS_EVALUATION] = 40,
- [VK_SHADER_STAGE_GEOMETRY] = 41,
- [VK_SHADER_STAGE_FRAGMENT] = 42,
- [VK_SHADER_STAGE_COMPUTE] = 0,
- };
+ return VK_SUCCESS;
+}
+
+static VkResult
+cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer,
+ unsigned stage, struct anv_state *state)
+{
+ struct anv_pipeline_layout *layout;
+ uint32_t sampler_count;
+
+ if (stage == VK_SHADER_STAGE_COMPUTE)
+ layout = cmd_buffer->compute_pipeline->layout;
+ else
+ layout = cmd_buffer->pipeline->layout;
+
+ sampler_count = layout ? layout->stage[stage].sampler_count : 0;
+ if (sampler_count == 0)
+ return VK_SUCCESS;
+
+ uint32_t size = sampler_count * 16;
+ *state = anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream, size, 32);
+
+ if (state->map == NULL)
+ return VK_ERROR_OUT_OF_DEVICE_MEMORY;
+
+ for (uint32_t set = 0; set < layout->num_sets; set++) {
+ struct anv_descriptor_set_binding *d = &cmd_buffer->descriptors[set];
+ struct anv_descriptor_set_layout *set_layout = layout->set[set].layout;
+ struct anv_descriptor_slot *sampler_slots =
+ set_layout->stage[stage].sampler_start;
+
+ uint32_t start = layout->set[set].sampler_start[stage];
+
+ for (uint32_t b = 0; b < set_layout->stage[stage].sampler_count; b++) {
+ struct anv_sampler *sampler =
+ d->set->descriptors[sampler_slots[b].index].sampler;
+
+ if (!sampler)
+ continue;
- anv_batch_emit(&cmd_buffer->batch,
- GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS,
- ._3DCommandSubOpcode = binding_table_opcodes[s],
- .PointertoVSBindingTable = state.offset);
+ memcpy(state->map + (start + b) * 16,
+ sampler->state, sizeof(sampler->state));
}
+ }
- if (layout && layout->stage[s].sampler_count > 0) {
- struct anv_state state;
- size_t size;
-
- size = layout->stage[s].sampler_count * 16;
- state = anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream, size, 32);
- memcpy(state.map, bindings->descriptors[s].samplers, size);
-
- static const uint32_t sampler_state_opcodes[] = {
- [VK_SHADER_STAGE_VERTEX] = 43,
- [VK_SHADER_STAGE_TESS_CONTROL] = 44, /* HS */
- [VK_SHADER_STAGE_TESS_EVALUATION] = 45, /* DS */
- [VK_SHADER_STAGE_GEOMETRY] = 46,
- [VK_SHADER_STAGE_FRAGMENT] = 47,
- [VK_SHADER_STAGE_COMPUTE] = 0,
- };
+ return VK_SUCCESS;
+}
- anv_batch_emit(&cmd_buffer->batch,
- GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS,
- ._3DCommandSubOpcode = sampler_state_opcodes[s],
- .PointertoVSSamplerState = state.offset);
+static VkResult
+flush_descriptor_set(struct anv_cmd_buffer *cmd_buffer, uint32_t stage)
+{
+ struct anv_state surfaces = { 0, }, samplers = { 0, };
+ VkResult result;
+
+ result = cmd_buffer_emit_samplers(cmd_buffer, stage, &samplers);
+ if (result != VK_SUCCESS)
+ return result;
+ result = cmd_buffer_emit_binding_table(cmd_buffer, stage, &surfaces);
+ if (result != VK_SUCCESS)
+ return result;
+
+ static const uint32_t sampler_state_opcodes[] = {
+ [VK_SHADER_STAGE_VERTEX] = 43,
+ [VK_SHADER_STAGE_TESS_CONTROL] = 44, /* HS */
+ [VK_SHADER_STAGE_TESS_EVALUATION] = 45, /* DS */
+ [VK_SHADER_STAGE_GEOMETRY] = 46,
+ [VK_SHADER_STAGE_FRAGMENT] = 47,
+ [VK_SHADER_STAGE_COMPUTE] = 0,
+ };
+
+ static const uint32_t binding_table_opcodes[] = {
+ [VK_SHADER_STAGE_VERTEX] = 38,
+ [VK_SHADER_STAGE_TESS_CONTROL] = 39,
+ [VK_SHADER_STAGE_TESS_EVALUATION] = 40,
+ [VK_SHADER_STAGE_GEOMETRY] = 41,
+ [VK_SHADER_STAGE_FRAGMENT] = 42,
+ [VK_SHADER_STAGE_COMPUTE] = 0,
+ };
+
+ if (samplers.alloc_size > 0) {
+ anv_batch_emit(&cmd_buffer->batch,
+ GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS,
+ ._3DCommandSubOpcode = sampler_state_opcodes[stage],
+ .PointertoVSSamplerState = samplers.offset);
+ }
+
+ if (surfaces.alloc_size > 0) {
+ anv_batch_emit(&cmd_buffer->batch,
+ GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS,
+ ._3DCommandSubOpcode = binding_table_opcodes[stage],
+ .PointertoVSBindingTable = surfaces.offset);
+ }
+
+ return VK_SUCCESS;
+}
+
+static void
+flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer)
+{
+ uint32_t s, dirty = cmd_buffer->descriptors_dirty &
+ cmd_buffer->pipeline->active_stages;
+
+ VkResult result;
+ for_each_bit(s, dirty) {
+ result = flush_descriptor_set(cmd_buffer, s);
+ if (result != VK_SUCCESS)
+ break;
+ }
+
+ if (result != VK_SUCCESS) {
+ assert(result == VK_ERROR_OUT_OF_DEVICE_MEMORY);
+
+ result = anv_cmd_buffer_new_surface_state_bo(cmd_buffer);
+ assert(result == VK_SUCCESS);
+
+ /* Re-emit all active binding tables */
+ for_each_bit(s, cmd_buffer->pipeline->active_stages) {
+ result = flush_descriptor_set(cmd_buffer, s);
+
+ /* It had better succeed this time */
+ assert(result == VK_SUCCESS);
}
}
+
+ cmd_buffer->descriptors_dirty &= ~cmd_buffer->pipeline->active_stages;
}
static struct anv_state
anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer,
uint32_t *a, uint32_t dwords, uint32_t alignment)
{
- struct anv_device *device = cmd_buffer->device;
struct anv_state state;
- state = anv_state_pool_alloc(&device->dynamic_state_pool, dwords * 4, alignment);
+ state = anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream,
+ dwords * 4, alignment);
memcpy(state.map, a, dwords * 4);
+ VG(VALGRIND_CHECK_MEM_IS_DEFINED(state.map, dwords * 4));
+
return state;
}
static struct anv_state
anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer,
- uint32_t *a, uint32_t *b, uint32_t dwords, uint32_t alignment)
+ uint32_t *a, uint32_t *b,
+ uint32_t dwords, uint32_t alignment)
{
- struct anv_device *device = cmd_buffer->device;
struct anv_state state;
uint32_t *p;
- state = anv_state_pool_alloc(&device->dynamic_state_pool, dwords * 4, alignment);
+ state = anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream,
+ dwords * 4, alignment);
p = state.map;
for (uint32_t i = 0; i < dwords; i++)
p[i] = a[i] | b[i];
+ VG(VALGRIND_CHECK_MEM_IS_DEFINED(p, dwords * 4));
+
return state;
}
+static VkResult
+flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer)
+{
+ struct anv_device *device = cmd_buffer->device;
+ struct anv_pipeline *pipeline = cmd_buffer->compute_pipeline;
+ struct anv_state surfaces = { 0, }, samplers = { 0, };
+ VkResult result;
+
+ result = cmd_buffer_emit_samplers(cmd_buffer,
+ VK_SHADER_STAGE_COMPUTE, &samplers);
+ if (result != VK_SUCCESS)
+ return result;
+ result = cmd_buffer_emit_binding_table(cmd_buffer,
+ VK_SHADER_STAGE_COMPUTE, &surfaces);
+ if (result != VK_SUCCESS)
+ return result;
+
+ struct GEN8_INTERFACE_DESCRIPTOR_DATA desc = {
+ .KernelStartPointer = pipeline->cs_simd,
+ .KernelStartPointerHigh = 0,
+ .BindingTablePointer = surfaces.offset,
+ .BindingTableEntryCount = 0,
+ .SamplerStatePointer = samplers.offset,
+ .SamplerCount = 0,
+ .NumberofThreadsinGPGPUThreadGroup = 0 /* FIXME: Really? */
+ };
+
+ uint32_t size = GEN8_INTERFACE_DESCRIPTOR_DATA_length * sizeof(uint32_t);
+ struct anv_state state =
+ anv_state_pool_alloc(&device->dynamic_state_pool, size, 64);
+
+ GEN8_INTERFACE_DESCRIPTOR_DATA_pack(NULL, state.map, &desc);
+
+ anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD,
+ .InterfaceDescriptorTotalLength = size,
+ .InterfaceDescriptorDataStartAddress = state.offset);
+
+ return VK_SUCCESS;
+}
+
+static void
+anv_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer)
+{
+ struct anv_pipeline *pipeline = cmd_buffer->compute_pipeline;
+ VkResult result;
+
+ assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT);
+
+ if (cmd_buffer->current_pipeline != GPGPU) {
+ anv_batch_emit(&cmd_buffer->batch, GEN8_PIPELINE_SELECT,
+ .PipelineSelection = GPGPU);
+ cmd_buffer->current_pipeline = GPGPU;
+ }
+
+ if (cmd_buffer->compute_dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY)
+ anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch);
+
+ if ((cmd_buffer->descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) ||
+ (cmd_buffer->compute_dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY)) {
+ result = flush_compute_descriptor_set(cmd_buffer);
+ if (result != VK_SUCCESS) {
+ result = anv_cmd_buffer_new_surface_state_bo(cmd_buffer);
+ assert(result == VK_SUCCESS);
+ result = flush_compute_descriptor_set(cmd_buffer);
+ assert(result == VK_SUCCESS);
+ }
+ cmd_buffer->descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE;
+ }
+
+ cmd_buffer->compute_dirty = 0;
+}
+
static void
anv_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer)
{
struct anv_pipeline *pipeline = cmd_buffer->pipeline;
- struct anv_bindings *bindings = cmd_buffer->bindings;
uint32_t *p;
uint32_t vb_emit = cmd_buffer->vb_dirty & pipeline->vb_used;
- const uint32_t num_buffers = __builtin_popcount(vb_emit);
- const uint32_t num_dwords = 1 + num_buffers * 4;
+
+ assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0);
+
+ if (cmd_buffer->current_pipeline != _3D) {
+ anv_batch_emit(&cmd_buffer->batch, GEN8_PIPELINE_SELECT,
+ .PipelineSelection = _3D);
+ cmd_buffer->current_pipeline = _3D;
+ }
if (vb_emit) {
+ const uint32_t num_buffers = __builtin_popcount(vb_emit);
+ const uint32_t num_dwords = 1 + num_buffers * 4;
+
p = anv_batch_emitn(&cmd_buffer->batch, num_dwords,
GEN8_3DSTATE_VERTEX_BUFFERS);
uint32_t vb, i = 0;
for_each_bit(vb, vb_emit) {
- struct anv_buffer *buffer = bindings->vb[vb].buffer;
- uint32_t offset = bindings->vb[vb].offset;
+ struct anv_buffer *buffer = cmd_buffer->vertex_bindings[vb].buffer;
+ uint32_t offset = cmd_buffer->vertex_bindings[vb].offset;
struct GEN8_VERTEX_BUFFER_STATE state = {
.VertexBufferIndex = vb,
}
}
- if (cmd_buffer->dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY)
+ if (cmd_buffer->dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY) {
+ /* If somebody compiled a pipeline after starting a command buffer the
+ * scratch bo may have grown since we started this cmd buffer (and
+ * emitted STATE_BASE_ADDRESS). If we're binding that pipeline now,
+ * reemit STATE_BASE_ADDRESS so that we use the bigger scratch bo. */
+ if (cmd_buffer->scratch_size < pipeline->total_scratch)
+ anv_cmd_buffer_emit_state_base_address(cmd_buffer);
+
anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch);
+ }
- if (cmd_buffer->dirty & ANV_CMD_BUFFER_DESCRIPTOR_SET_DIRTY)
+ if (cmd_buffer->descriptors_dirty)
flush_descriptor_sets(cmd_buffer);
+ if (cmd_buffer->dirty & ANV_CMD_BUFFER_VP_DIRTY) {
+ anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_SCISSOR_STATE_POINTERS,
+ .ScissorRectPointer = cmd_buffer->vp_state->scissor.offset);
+ anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC,
+ .CCViewportPointer = cmd_buffer->vp_state->cc_vp.offset);
+ anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP,
+ .SFClipViewportPointer = cmd_buffer->vp_state->sf_clip_vp.offset);
+ }
+
if (cmd_buffer->dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | ANV_CMD_BUFFER_RS_DIRTY)) {
anv_batch_emit_merge(&cmd_buffer->batch,
cmd_buffer->rs_state->state_sf, pipeline->state_sf);
if (cmd_buffer->dirty & (ANV_CMD_BUFFER_CB_DIRTY | ANV_CMD_BUFFER_DS_DIRTY)) {
struct anv_state state;
- if (cmd_buffer->ds_state)
+ if (cmd_buffer->ds_state == NULL)
+ state = anv_cmd_buffer_emit_dynamic(cmd_buffer,
+ cmd_buffer->cb_state->state_color_calc,
+ GEN8_COLOR_CALC_STATE_length, 64);
+ else if (cmd_buffer->cb_state == NULL)
+ state = anv_cmd_buffer_emit_dynamic(cmd_buffer,
+ cmd_buffer->ds_state->state_color_calc,
+ GEN8_COLOR_CALC_STATE_length, 64);
+ else
state = anv_cmd_buffer_merge_dynamic(cmd_buffer,
cmd_buffer->ds_state->state_color_calc,
cmd_buffer->cb_state->state_color_calc,
- GEN8_COLOR_CALC_STATE_length, 32);
- else
- state = anv_cmd_buffer_emit_dynamic(cmd_buffer,
- cmd_buffer->cb_state->state_color_calc,
- GEN8_COLOR_CALC_STATE_length, 32);
+ GEN8_COLOR_CALC_STATE_length, 64);
anv_batch_emit(&cmd_buffer->batch,
GEN8_3DSTATE_CC_STATE_POINTERS,
.StartVertexLocation = firstIndex,
.InstanceCount = instanceCount,
.StartInstanceLocation = firstInstance,
- .BaseVertexLocation = 0);
+ .BaseVertexLocation = vertexOffset);
}
static void
uint32_t y,
uint32_t z)
{
- stub();
+ struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
+ struct anv_pipeline *pipeline = cmd_buffer->compute_pipeline;
+ struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data;
+
+ anv_cmd_buffer_flush_compute_state(cmd_buffer);
+
+ anv_batch_emit(&cmd_buffer->batch, GEN8_GPGPU_WALKER,
+ .SIMDSize = prog_data->simd_size / 16,
+ .ThreadDepthCounterMaximum = 0,
+ .ThreadHeightCounterMaximum = 0,
+ .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max,
+ .ThreadGroupIDXDimension = x,
+ .ThreadGroupIDYDimension = y,
+ .ThreadGroupIDZDimension = z,
+ .RightExecutionMask = pipeline->cs_right_mask,
+ .BottomExecutionMask = 0xffffffff);
+
+ anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_STATE_FLUSH);
}
+#define GPGPU_DISPATCHDIMX 0x2500
+#define GPGPU_DISPATCHDIMY 0x2504
+#define GPGPU_DISPATCHDIMZ 0x2508
+
void anv_CmdDispatchIndirect(
VkCmdBuffer cmdBuffer,
- VkBuffer buffer,
+ VkBuffer _buffer,
VkDeviceSize offset)
{
- stub();
+ struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
+ struct anv_pipeline *pipeline = cmd_buffer->compute_pipeline;
+ struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data;
+ struct anv_buffer *buffer = (struct anv_buffer *) _buffer;
+ struct anv_bo *bo = buffer->bo;
+ uint32_t bo_offset = buffer->offset + offset;
+
+ anv_cmd_buffer_flush_compute_state(cmd_buffer);
+
+ anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMX, bo, bo_offset);
+ anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMY, bo, bo_offset + 4);
+ anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMZ, bo, bo_offset + 8);
+
+ anv_batch_emit(&cmd_buffer->batch, GEN8_GPGPU_WALKER,
+ .IndirectParameterEnable = true,
+ .SIMDSize = prog_data->simd_size / 16,
+ .ThreadDepthCounterMaximum = 0,
+ .ThreadHeightCounterMaximum = 0,
+ .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max,
+ .RightExecutionMask = pipeline->cs_right_mask,
+ .BottomExecutionMask = 0xffffffff);
+
+ anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_STATE_FLUSH);
}
void anv_CmdSetEvent(
uint32_t memBarrierCount,
const void** ppMemBarriers)
{
- stub();
-}
-
-static void
-anv_batch_emit_ps_depth_count(struct anv_batch *batch,
- struct anv_bo *bo, uint32_t offset)
-{
- anv_batch_emit(batch, GEN8_PIPE_CONTROL,
- .DestinationAddressType = DAT_PPGTT,
- .PostSyncOperation = WritePSDepthCount,
- .Address = { bo, offset }); /* FIXME: This is only lower 32 bits */
-}
+ struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *)cmdBuffer;
+ uint32_t b, *dw;
-void anv_CmdBeginQuery(
- VkCmdBuffer cmdBuffer,
- VkQueryPool queryPool,
- uint32_t slot,
- VkQueryControlFlags flags)
-{
- struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
- struct anv_query_pool *pool = (struct anv_query_pool *) queryPool;
+ struct GEN8_PIPE_CONTROL cmd = {
+ GEN8_PIPE_CONTROL_header,
+ .PostSyncOperation = NoWrite,
+ };
- switch (pool->type) {
- case VK_QUERY_TYPE_OCCLUSION:
- anv_batch_emit_ps_depth_count(&cmd_buffer->batch, &pool->bo,
- slot * sizeof(struct anv_query_pool_slot));
- break;
+ /* XXX: I think waitEvent is a no-op on our HW. We should verify that. */
- case VK_QUERY_TYPE_PIPELINE_STATISTICS:
- default:
- unreachable("");
+ for (uint32_t i = 0; i < pipeEventCount; i++) {
+ switch (pPipeEvents[i]) {
+ case VK_PIPE_EVENT_TOP_OF_PIPE:
+ /* This is just what PIPE_CONTROL does */
+ break;
+ case VK_PIPE_EVENT_VERTEX_PROCESSING_COMPLETE:
+ case VK_PIPE_EVENT_LOCAL_FRAGMENT_PROCESSING_COMPLETE:
+ case VK_PIPE_EVENT_FRAGMENT_PROCESSING_COMPLETE:
+ cmd.StallAtPixelScoreboard = true;
+ break;
+ case VK_PIPE_EVENT_GRAPHICS_PIPELINE_COMPLETE:
+ case VK_PIPE_EVENT_COMPUTE_PIPELINE_COMPLETE:
+ case VK_PIPE_EVENT_TRANSFER_COMPLETE:
+ case VK_PIPE_EVENT_COMMANDS_COMPLETE:
+ cmd.CommandStreamerStallEnable = true;
+ break;
+ default:
+ unreachable("Invalid VkPipeEvent");
+ }
}
-}
-
-void anv_CmdEndQuery(
- VkCmdBuffer cmdBuffer,
- VkQueryPool queryPool,
- uint32_t slot)
-{
- struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
- struct anv_query_pool *pool = (struct anv_query_pool *) queryPool;
- switch (pool->type) {
- case VK_QUERY_TYPE_OCCLUSION:
- anv_batch_emit_ps_depth_count(&cmd_buffer->batch, &pool->bo,
- slot * sizeof(struct anv_query_pool_slot) + 8);
- break;
+ /* XXX: Right now, we're really dumb and just flush whatever categories
+ * the app asks for. One of these days we may make this a bit better
+ * but right now that's all the hardware allows for in most areas.
+ */
+ VkMemoryOutputFlags out_flags = 0;
+ VkMemoryInputFlags in_flags = 0;
- case VK_QUERY_TYPE_PIPELINE_STATISTICS:
- default:
- unreachable("");
+ for (uint32_t i = 0; i < memBarrierCount; i++) {
+ const struct anv_common *common = ppMemBarriers[i];
+ switch (common->sType) {
+ case VK_STRUCTURE_TYPE_MEMORY_BARRIER: {
+ const VkMemoryBarrier *barrier = (VkMemoryBarrier *)common;
+ out_flags |= barrier->outputMask;
+ in_flags |= barrier->inputMask;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER: {
+ const VkBufferMemoryBarrier *barrier = (VkBufferMemoryBarrier *)common;
+ out_flags |= barrier->outputMask;
+ in_flags |= barrier->inputMask;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER: {
+ const VkImageMemoryBarrier *barrier = (VkImageMemoryBarrier *)common;
+ out_flags |= barrier->outputMask;
+ in_flags |= barrier->inputMask;
+ break;
+ }
+ default:
+ unreachable("Invalid memory barrier type");
+ }
}
-}
-
-void anv_CmdResetQueryPool(
- VkCmdBuffer cmdBuffer,
- VkQueryPool queryPool,
- uint32_t startQuery,
- uint32_t queryCount)
-{
- stub();
-}
-
-#define TIMESTAMP 0x2358
-void anv_CmdWriteTimestamp(
- VkCmdBuffer cmdBuffer,
- VkTimestampType timestampType,
- VkBuffer destBuffer,
- VkDeviceSize destOffset)
-{
- struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
- struct anv_buffer *buffer = (struct anv_buffer *) destBuffer;
- struct anv_bo *bo = buffer->bo;
-
- switch (timestampType) {
- case VK_TIMESTAMP_TYPE_TOP:
- anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM,
- .RegisterAddress = TIMESTAMP,
- .MemoryAddress = { bo, buffer->offset + destOffset });
- anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM,
- .RegisterAddress = TIMESTAMP + 4,
- .MemoryAddress = { bo, buffer->offset + destOffset + 4 });
- break;
-
- case VK_TIMESTAMP_TYPE_BOTTOM:
- anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL,
- .DestinationAddressType = DAT_PPGTT,
- .PostSyncOperation = WriteTimestamp,
- .Address = /* FIXME: This is only lower 32 bits */
- { bo, buffer->offset + destOffset });
- break;
-
- default:
- break;
+ for_each_bit(b, out_flags) {
+ switch ((VkMemoryOutputFlags)(1 << b)) {
+ case VK_MEMORY_OUTPUT_CPU_WRITE_BIT:
+ break; /* FIXME: Little-core systems */
+ case VK_MEMORY_OUTPUT_SHADER_WRITE_BIT:
+ cmd.DCFlushEnable = true;
+ break;
+ case VK_MEMORY_OUTPUT_COLOR_ATTACHMENT_BIT:
+ cmd.RenderTargetCacheFlushEnable = true;
+ break;
+ case VK_MEMORY_OUTPUT_DEPTH_STENCIL_ATTACHMENT_BIT:
+ cmd.DepthCacheFlushEnable = true;
+ break;
+ case VK_MEMORY_OUTPUT_TRANSFER_BIT:
+ cmd.RenderTargetCacheFlushEnable = true;
+ cmd.DepthCacheFlushEnable = true;
+ break;
+ default:
+ unreachable("Invalid memory output flag");
+ }
}
-}
-#define alu_opcode(v) __gen_field((v), 20, 31)
-#define alu_operand1(v) __gen_field((v), 10, 19)
-#define alu_operand2(v) __gen_field((v), 0, 9)
-#define alu(opcode, operand1, operand2) \
- alu_opcode(opcode) | alu_operand1(operand1) | alu_operand2(operand2)
-
-#define OPCODE_NOOP 0x000
-#define OPCODE_LOAD 0x080
-#define OPCODE_LOADINV 0x480
-#define OPCODE_LOAD0 0x081
-#define OPCODE_LOAD1 0x481
-#define OPCODE_ADD 0x100
-#define OPCODE_SUB 0x101
-#define OPCODE_AND 0x102
-#define OPCODE_OR 0x103
-#define OPCODE_XOR 0x104
-#define OPCODE_STORE 0x180
-#define OPCODE_STOREINV 0x580
-
-#define OPERAND_R0 0x00
-#define OPERAND_R1 0x01
-#define OPERAND_R2 0x02
-#define OPERAND_R3 0x03
-#define OPERAND_R4 0x04
-#define OPERAND_SRCA 0x20
-#define OPERAND_SRCB 0x21
-#define OPERAND_ACCU 0x31
-#define OPERAND_ZF 0x32
-#define OPERAND_CF 0x33
-
-#define CS_GPR(n) (0x2600 + (n) * 8)
-
-static void
-emit_load_alu_reg_u64(struct anv_batch *batch, uint32_t reg,
- struct anv_bo *bo, uint32_t offset)
-{
- anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_MEM,
- .RegisterAddress = reg,
- .MemoryAddress = { bo, offset });
- anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_MEM,
- .RegisterAddress = reg + 4,
- .MemoryAddress = { bo, offset + 4 });
-}
-
-void anv_CmdCopyQueryPoolResults(
- VkCmdBuffer cmdBuffer,
- VkQueryPool queryPool,
- uint32_t startQuery,
- uint32_t queryCount,
- VkBuffer destBuffer,
- VkDeviceSize destOffset,
- VkDeviceSize destStride,
- VkQueryResultFlags flags)
-{
- struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
- struct anv_query_pool *pool = (struct anv_query_pool *) queryPool;
- struct anv_buffer *buffer = (struct anv_buffer *) destBuffer;
- uint32_t slot_offset, dst_offset;
-
- if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
- /* Where is the availabilty info supposed to go? */
- anv_finishme("VK_QUERY_RESULT_WITH_AVAILABILITY_BIT");
- return;
+ for_each_bit(b, out_flags) {
+ switch ((VkMemoryInputFlags)(1 << b)) {
+ case VK_MEMORY_INPUT_CPU_READ_BIT:
+ break; /* FIXME: Little-core systems */
+ case VK_MEMORY_INPUT_INDIRECT_COMMAND_BIT:
+ case VK_MEMORY_INPUT_INDEX_FETCH_BIT:
+ case VK_MEMORY_INPUT_VERTEX_ATTRIBUTE_FETCH_BIT:
+ cmd.VFCacheInvalidationEnable = true;
+ break;
+ case VK_MEMORY_INPUT_UNIFORM_READ_BIT:
+ cmd.ConstantCacheInvalidationEnable = true;
+ /* fallthrough */
+ case VK_MEMORY_INPUT_SHADER_READ_BIT:
+ cmd.DCFlushEnable = true;
+ cmd.TextureCacheInvalidationEnable = true;
+ break;
+ case VK_MEMORY_INPUT_COLOR_ATTACHMENT_BIT:
+ case VK_MEMORY_INPUT_DEPTH_STENCIL_ATTACHMENT_BIT:
+ break; /* XXX: Hunh? */
+ case VK_MEMORY_INPUT_TRANSFER_BIT:
+ cmd.TextureCacheInvalidationEnable = true;
+ break;
+ }
}
- assert(pool->type == VK_QUERY_TYPE_OCCLUSION);
-
- /* FIXME: If we're not waiting, should we just do this on the CPU? */
- if (flags & VK_QUERY_RESULT_WAIT_BIT)
- anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL,
- .CommandStreamerStallEnable = true);
-
- dst_offset = buffer->offset + destOffset;
- for (uint32_t i = 0; i < queryCount; i++) {
-
- slot_offset = (startQuery + i) * sizeof(struct anv_query_pool_slot);
-
- emit_load_alu_reg_u64(&cmd_buffer->batch, CS_GPR(0), &pool->bo, slot_offset);
- emit_load_alu_reg_u64(&cmd_buffer->batch, CS_GPR(1), &pool->bo, slot_offset + 8);
-
- /* FIXME: We need to clamp the result for 32 bit. */
-
- uint32_t *dw = anv_batch_emitn(&cmd_buffer->batch, 5, GEN8_MI_MATH);
- dw[1] = alu(OPCODE_LOAD, OPERAND_SRCA, OPERAND_R1);
- dw[2] = alu(OPCODE_LOAD, OPERAND_SRCB, OPERAND_R0);
- dw[3] = alu(OPCODE_SUB, 0, 0);
- dw[4] = alu(OPCODE_STORE, OPERAND_R2, OPERAND_ACCU);
-
- anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM,
- .RegisterAddress = CS_GPR(2),
- /* FIXME: This is only lower 32 bits */
- .MemoryAddress = { buffer->bo, dst_offset });
-
- if (flags & VK_QUERY_RESULT_64_BIT)
- anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM,
- .RegisterAddress = CS_GPR(2) + 4,
- /* FIXME: This is only lower 32 bits */
- .MemoryAddress = { buffer->bo, dst_offset + 4 });
-
- dst_offset += destStride;
- }
+ dw = anv_batch_emit_dwords(&cmd_buffer->batch, GEN8_PIPE_CONTROL_length);
+ GEN8_PIPE_CONTROL_pack(&cmd_buffer->batch, dw, &cmd);
}
void anv_CmdInitAtomicCounters(
framebuffer->height = pCreateInfo->height;
framebuffer->layers = pCreateInfo->layers;
- vkCreateDynamicViewportState((VkDevice) device,
- &(VkDynamicVpStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_DYNAMIC_VP_STATE_CREATE_INFO,
- .viewportAndScissorCount = 1,
- .pViewports = (VkViewport[]) {
- {
- .originX = 0,
- .originY = 0,
- .width = pCreateInfo->width,
- .height = pCreateInfo->height,
- .minDepth = 0,
- .maxDepth = 1
- },
- },
- .pScissors = (VkRect[]) {
- { { 0, 0 },
- { pCreateInfo->width, pCreateInfo->height } },
- }
- },
- &framebuffer->vp_state);
+ anv_CreateDynamicViewportState((VkDevice) device,
+ &(VkDynamicVpStateCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_DYNAMIC_VP_STATE_CREATE_INFO,
+ .viewportAndScissorCount = 1,
+ .pViewports = (VkViewport[]) {
+ {
+ .originX = 0,
+ .originY = 0,
+ .width = pCreateInfo->width,
+ .height = pCreateInfo->height,
+ .minDepth = 0,
+ .maxDepth = 1
+ },
+ },
+ .pScissors = (VkRect[]) {
+ { { 0, 0 },
+ { pCreateInfo->width, pCreateInfo->height } },
+ }
+ },
+ &framebuffer->vp_state);
*pFramebuffer = (VkFramebuffer) framebuffer;
return VK_SUCCESS;
}
-void
-anv_cmd_buffer_fill_render_targets(struct anv_cmd_buffer *cmd_buffer)
-{
- struct anv_framebuffer *framebuffer = cmd_buffer->framebuffer;
- struct anv_bindings *bindings = cmd_buffer->bindings;
-
- for (uint32_t i = 0; i < framebuffer->color_attachment_count; i++) {
- const struct anv_surface_view *view = framebuffer->color_attachments[i];
-
- struct anv_state state =
- anv_cmd_buffer_alloc_surface_state(cmd_buffer, 64, 64);
- memcpy(state.map, view->surface_state.map, 64);
-
- /* The address goes in dwords 8 and 9 of the SURFACE_STATE */
- *(uint64_t *)(state.map + 8 * 4) =
- anv_reloc_list_add(&cmd_buffer->surface_relocs,
- state.offset + 8 * 4,
- view->bo, view->offset);
-
- bindings->descriptors[VK_SHADER_STAGE_FRAGMENT].surfaces[i] = state.offset;
- }
- cmd_buffer->dirty |= ANV_CMD_BUFFER_DESCRIPTOR_SET_DIRTY;
-}
-
static void
anv_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer,
struct anv_render_pass *pass)
cmd_buffer->framebuffer->depth_stencil;
/* FIXME: Implement the PMA stall W/A */
+ /* FIXME: Width and Height are wrong */
anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DEPTH_BUFFER,
.SurfaceType = SURFTYPE_2D,
.MinimumArrayElement = 0,
.DepthBufferObjectControlState = GEN8_MOCS,
.RenderTargetViewExtent = 1 - 1,
- .SurfaceQPitch = 0);
+ .SurfaceQPitch = view->depth_qpitch >> 2);
/* Disable hierarchial depth buffers. */
anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_HIER_DEPTH_BUFFER);
.StencilBufferObjectControlState = GEN8_MOCS,
.SurfacePitch = view->stencil_stride > 0 ? view->stencil_stride - 1 : 0,
.SurfaceBaseAddress = { view->bo, view->stencil_offset },
- .SurfaceQPitch = 0);
+ .SurfaceQPitch = view->stencil_qpitch >> 2);
/* Clear the clear params. */
anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_CLEAR_PARAMS);
cmd_buffer->framebuffer = framebuffer;
+ cmd_buffer->descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT;
+
anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DRAWING_RECTANGLE,
.ClippedDrawingRectangleYMin = pass->render_area.offset.y,
.ClippedDrawingRectangleXMin = pass->render_area.offset.x,
.DrawingRectangleOriginY = 0,
.DrawingRectangleOriginX = 0);
- anv_cmd_buffer_fill_render_targets(cmd_buffer);
-
anv_cmd_buffer_emit_depth_stencil(cmd_buffer, pass);
anv_cmd_buffer_clear(cmd_buffer, pass);