* is concerned, most of anv_cmd_buffer is magic.
*/
+/* TODO: These are taken from GLES. We should check the Vulkan spec */
+const struct anv_dynamic_state default_dynamic_state = {
+ .viewport = {
+ .count = 0,
+ },
+ .scissor = {
+ .count = 0,
+ },
+ .line_width = 1.0f,
+ .depth_bias = {
+ .bias = 0.0f,
+ .clamp = 0.0f,
+ .slope = 0.0f,
+ },
+ .blend_constants = { 0.0f, 0.0f, 0.0f, 0.0f },
+ .depth_bounds = {
+ .min = 0.0f,
+ .max = 1.0f,
+ },
+ .stencil_compare_mask = {
+ .front = ~0u,
+ .back = ~0u,
+ },
+ .stencil_write_mask = {
+ .front = ~0u,
+ .back = ~0u,
+ },
+ .stencil_reference = {
+ .front = 0u,
+ .back = 0u,
+ },
+};
+
+void
+anv_dynamic_state_copy(struct anv_dynamic_state *dest,
+ const struct anv_dynamic_state *src,
+ uint32_t copy_mask)
+{
+ if (copy_mask & (1 << VK_DYNAMIC_STATE_VIEWPORT)) {
+ dest->viewport.count = src->viewport.count;
+ typed_memcpy(dest->viewport.viewports, src->viewport.viewports,
+ src->viewport.count);
+ }
+
+ if (copy_mask & (1 << VK_DYNAMIC_STATE_SCISSOR)) {
+ dest->scissor.count = src->scissor.count;
+ typed_memcpy(dest->scissor.scissors, src->scissor.scissors,
+ src->scissor.count);
+ }
+
+ if (copy_mask & (1 << VK_DYNAMIC_STATE_LINE_WIDTH))
+ dest->line_width = src->line_width;
+
+ if (copy_mask & (1 << VK_DYNAMIC_STATE_DEPTH_BIAS))
+ dest->depth_bias = src->depth_bias;
+
+ if (copy_mask & (1 << VK_DYNAMIC_STATE_BLEND_CONSTANTS))
+ typed_memcpy(dest->blend_constants, src->blend_constants, 4);
+
+ if (copy_mask & (1 << VK_DYNAMIC_STATE_DEPTH_BOUNDS))
+ dest->depth_bounds = src->depth_bounds;
+
+ if (copy_mask & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK))
+ dest->stencil_compare_mask = src->stencil_compare_mask;
+
+ if (copy_mask & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK))
+ dest->stencil_write_mask = src->stencil_write_mask;
+
+ if (copy_mask & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE))
+ dest->stencil_reference = src->stencil_reference;
+}
+
static void
anv_cmd_state_init(struct anv_cmd_state *state)
{
- state->rs_state = NULL;
- state->vp_state = NULL;
- state->cb_state = NULL;
- state->ds_state = NULL;
- memset(&state->state_vf, 0, sizeof(state->state_vf));
memset(&state->descriptors, 0, sizeof(state->descriptors));
+ memset(&state->push_constants, 0, sizeof(state->push_constants));
- state->dirty = 0;
+ state->dirty = ~0;
state->vb_dirty = 0;
state->descriptors_dirty = 0;
+ state->push_constants_dirty = 0;
state->pipeline = NULL;
- state->vp_state = NULL;
- state->rs_state = NULL;
- state->ds_state = NULL;
+ state->restart_index = UINT32_MAX;
+ state->dynamic = default_dynamic_state;
+
+ state->gen7.index_buffer = NULL;
}
-VkResult anv_CreateCommandBuffer(
- VkDevice _device,
- const VkCmdBufferCreateInfo* pCreateInfo,
- VkCmdBuffer* pCmdBuffer)
+static VkResult
+anv_cmd_buffer_ensure_push_constants_size(struct anv_cmd_buffer *cmd_buffer,
+ gl_shader_stage stage, uint32_t size)
+{
+ struct anv_push_constants **ptr = &cmd_buffer->state.push_constants[stage];
+
+ if (*ptr == NULL) {
+ *ptr = anv_alloc(&cmd_buffer->pool->alloc, size, 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (*ptr == NULL)
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+ } else if ((*ptr)->size < size) {
+ *ptr = anv_realloc(&cmd_buffer->pool->alloc, *ptr, size, 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (*ptr == NULL)
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+ }
+ (*ptr)->size = size;
+
+ return VK_SUCCESS;
+}
+
+#define anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, stage, field) \
+ anv_cmd_buffer_ensure_push_constants_size(cmd_buffer, stage, \
+ (offsetof(struct anv_push_constants, field) + \
+ sizeof(cmd_buffer->state.push_constants[0]->field)))
+
+static VkResult anv_create_cmd_buffer(
+ struct anv_device * device,
+ struct anv_cmd_pool * pool,
+ VkCommandBufferLevel level,
+ VkCommandBuffer* pCommandBuffer)
{
- ANV_FROM_HANDLE(anv_device, device, _device);
- ANV_FROM_HANDLE(anv_cmd_pool, pool, pCreateInfo->cmdPool);
struct anv_cmd_buffer *cmd_buffer;
VkResult result;
- cmd_buffer = anv_device_alloc(device, sizeof(*cmd_buffer), 8,
- VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
+ cmd_buffer = anv_alloc(&pool->alloc, sizeof(*cmd_buffer), 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (cmd_buffer == NULL)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+ cmd_buffer->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
cmd_buffer->device = device;
+ cmd_buffer->pool = pool;
result = anv_cmd_buffer_init_batch_bo_chain(cmd_buffer);
if (result != VK_SUCCESS)
anv_state_stream_init(&cmd_buffer->dynamic_state_stream,
&device->dynamic_state_block_pool);
- cmd_buffer->level = pCreateInfo->level;
- cmd_buffer->opt_flags = 0;
+ cmd_buffer->level = level;
+ cmd_buffer->usage_flags = 0;
anv_cmd_state_init(&cmd_buffer->state);
list_inithead(&cmd_buffer->pool_link);
}
- *pCmdBuffer = anv_cmd_buffer_to_handle(cmd_buffer);
+ *pCommandBuffer = anv_cmd_buffer_to_handle(cmd_buffer);
return VK_SUCCESS;
- fail: anv_device_free(device, cmd_buffer);
+ fail:
+ anv_free(&cmd_buffer->pool->alloc, cmd_buffer);
return result;
}
-VkResult anv_DestroyCommandBuffer(
+VkResult anv_AllocateCommandBuffers(
VkDevice _device,
- VkCmdBuffer _cmd_buffer)
+ const VkCommandBufferAllocateInfo* pAllocateInfo,
+ VkCommandBuffer* pCommandBuffers)
{
ANV_FROM_HANDLE(anv_device, device, _device);
- ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, _cmd_buffer);
+ ANV_FROM_HANDLE(anv_cmd_pool, pool, pAllocateInfo->commandPool);
+
+ VkResult result = VK_SUCCESS;
+ uint32_t i;
+
+ for (i = 0; i < pAllocateInfo->bufferCount; i++) {
+ result = anv_create_cmd_buffer(device, pool, pAllocateInfo->level,
+ &pCommandBuffers[i]);
+ if (result != VK_SUCCESS)
+ break;
+ }
+ if (result != VK_SUCCESS)
+ anv_FreeCommandBuffers(_device, pAllocateInfo->commandPool,
+ i, pCommandBuffers);
+
+ return result;
+}
+
+static void
+anv_cmd_buffer_destroy(struct anv_cmd_buffer *cmd_buffer)
+{
list_del(&cmd_buffer->pool_link);
anv_cmd_buffer_fini_batch_bo_chain(cmd_buffer);
anv_state_stream_finish(&cmd_buffer->surface_state_stream);
anv_state_stream_finish(&cmd_buffer->dynamic_state_stream);
- anv_device_free(device, cmd_buffer);
- return VK_SUCCESS;
+ anv_free(&cmd_buffer->pool->alloc, cmd_buffer);
+}
+
+void anv_FreeCommandBuffers(
+ VkDevice device,
+ VkCommandPool commandPool,
+ uint32_t commandBufferCount,
+ const VkCommandBuffer* pCommandBuffers)
+{
+ for (uint32_t i = 0; i < commandBufferCount; i++) {
+ ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, pCommandBuffers[i]);
+
+ anv_cmd_buffer_destroy(cmd_buffer);
+ }
}
VkResult anv_ResetCommandBuffer(
- VkCmdBuffer cmdBuffer,
- VkCmdBufferResetFlags flags)
+ VkCommandBuffer commandBuffer,
+ VkCommandBufferResetFlags flags)
{
- ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
+ ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
anv_cmd_buffer_reset_batch_bo_chain(cmd_buffer);
void
anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer)
{
- struct anv_device *device = cmd_buffer->device;
- struct anv_bo *scratch_bo = NULL;
-
- cmd_buffer->state.scratch_size =
- anv_block_pool_size(&device->scratch_block_pool);
- if (cmd_buffer->state.scratch_size > 0)
- scratch_bo = &device->scratch_block_pool.bo;
-
- anv_batch_emit(&cmd_buffer->batch, GEN8_STATE_BASE_ADDRESS,
- .GeneralStateBaseAddress = { scratch_bo, 0 },
- .GeneralStateMemoryObjectControlState = GEN8_MOCS,
- .GeneralStateBaseAddressModifyEnable = true,
- .GeneralStateBufferSize = 0xfffff,
- .GeneralStateBufferSizeModifyEnable = true,
-
- .SurfaceStateBaseAddress = { anv_cmd_buffer_current_surface_bo(cmd_buffer), 0 },
- .SurfaceStateMemoryObjectControlState = GEN8_MOCS,
- .SurfaceStateBaseAddressModifyEnable = true,
-
- .DynamicStateBaseAddress = { &device->dynamic_state_block_pool.bo, 0 },
- .DynamicStateMemoryObjectControlState = GEN8_MOCS,
- .DynamicStateBaseAddressModifyEnable = true,
- .DynamicStateBufferSize = 0xfffff,
- .DynamicStateBufferSizeModifyEnable = true,
-
- .IndirectObjectBaseAddress = { NULL, 0 },
- .IndirectObjectMemoryObjectControlState = GEN8_MOCS,
- .IndirectObjectBaseAddressModifyEnable = true,
- .IndirectObjectBufferSize = 0xfffff,
- .IndirectObjectBufferSizeModifyEnable = true,
-
- .InstructionBaseAddress = { &device->instruction_block_pool.bo, 0 },
- .InstructionMemoryObjectControlState = GEN8_MOCS,
- .InstructionBaseAddressModifyEnable = true,
- .InstructionBufferSize = 0xfffff,
- .InstructionBuffersizeModifyEnable = true);
-
- /* After re-setting the surface state base address, we have to do some
- * cache flusing so that the sampler engine will pick up the new
- * SURFACE_STATE objects and binding tables. From the Broadwell PRM,
- * Shared Function > 3D Sampler > State > State Caching (page 96):
- *
- * Coherency with system memory in the state cache, like the texture
- * cache is handled partially by software. It is expected that the
- * command stream or shader will issue Cache Flush operation or
- * Cache_Flush sampler message to ensure that the L1 cache remains
- * coherent with system memory.
- *
- * [...]
- *
- * Whenever the value of the Dynamic_State_Base_Addr,
- * Surface_State_Base_Addr are altered, the L1 state cache must be
- * invalidated to ensure the new surface or sampler state is fetched
- * from system memory.
- *
- * The PIPE_CONTROL command has a "State Cache Invalidation Enable" bit
- * which, according the PIPE_CONTROL instruction documentation in the
- * Broadwell PRM:
- *
- * Setting this bit is independent of any other bit in this packet.
- * This bit controls the invalidation of the L1 and L2 state caches
- * at the top of the pipe i.e. at the parsing time.
- *
- * Unfortunately, experimentation seems to indicate that state cache
- * invalidation through a PIPE_CONTROL does nothing whatsoever in
- * regards to surface state and binding tables. In stead, it seems that
- * invalidating the texture cache is what is actually needed.
- *
- * XXX: As far as we have been able to determine through
- * experimentation, shows that flush the texture cache appears to be
- * sufficient. The theory here is that all of the sampling/rendering
- * units cache the binding table in the texture cache. However, we have
- * yet to be able to actually confirm this.
- */
- anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL,
- .TextureCacheInvalidationEnable = true);
+ switch (cmd_buffer->device->info.gen) {
+ case 7:
+ if (cmd_buffer->device->info.is_haswell)
+ return gen7_cmd_buffer_emit_state_base_address(cmd_buffer);
+ else
+ return gen7_cmd_buffer_emit_state_base_address(cmd_buffer);
+ case 8:
+ return gen8_cmd_buffer_emit_state_base_address(cmd_buffer);
+ case 9:
+ return gen9_cmd_buffer_emit_state_base_address(cmd_buffer);
+ default:
+ unreachable("unsupported gen\n");
+ }
}
VkResult anv_BeginCommandBuffer(
- VkCmdBuffer cmdBuffer,
- const VkCmdBufferBeginInfo* pBeginInfo)
+ VkCommandBuffer commandBuffer,
+ const VkCommandBufferBeginInfo* pBeginInfo)
{
- ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
+ ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
- cmd_buffer->opt_flags = pBeginInfo->flags;
+ anv_cmd_buffer_reset_batch_bo_chain(cmd_buffer);
- if (cmd_buffer->level == VK_CMD_BUFFER_LEVEL_SECONDARY) {
+ cmd_buffer->usage_flags = pBeginInfo->flags;
+
+ if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) {
cmd_buffer->state.framebuffer =
anv_framebuffer_from_handle(pBeginInfo->framebuffer);
cmd_buffer->state.pass =
anv_render_pass_from_handle(pBeginInfo->renderPass);
- /* FIXME: We shouldn't be starting on the first subpass */
- anv_cmd_buffer_begin_subpass(cmd_buffer,
- &cmd_buffer->state.pass->subpasses[0]);
+ struct anv_subpass *subpass =
+ &cmd_buffer->state.pass->subpasses[pBeginInfo->subpass];
+
+ anv_cmd_buffer_begin_subpass(cmd_buffer, subpass);
}
anv_cmd_buffer_emit_state_base_address(cmd_buffer);
}
VkResult anv_EndCommandBuffer(
- VkCmdBuffer cmdBuffer)
+ VkCommandBuffer commandBuffer)
{
- ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
+ ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
struct anv_device *device = cmd_buffer->device;
anv_cmd_buffer_end_batch_buffer(cmd_buffer);
- if (cmd_buffer->level == VK_CMD_BUFFER_LEVEL_PRIMARY) {
+ if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) {
/* The algorithm used to compute the validate list is not threadsafe as
* it uses the bo->index field. We have to lock the device around it.
* Fortunately, the chances for contention here are probably very low.
}
void anv_CmdBindPipeline(
- VkCmdBuffer cmdBuffer,
+ VkCommandBuffer commandBuffer,
VkPipelineBindPoint pipelineBindPoint,
VkPipeline _pipeline)
{
- ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
+ ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
switch (pipelineBindPoint) {
case VK_PIPELINE_BIND_POINT_COMPUTE:
cmd_buffer->state.compute_pipeline = pipeline;
- cmd_buffer->state.compute_dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY;
+ cmd_buffer->state.compute_dirty |= ANV_CMD_DIRTY_PIPELINE;
+ cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_COMPUTE_BIT;
break;
case VK_PIPELINE_BIND_POINT_GRAPHICS:
cmd_buffer->state.pipeline = pipeline;
cmd_buffer->state.vb_dirty |= pipeline->vb_used;
- cmd_buffer->state.dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY;
+ cmd_buffer->state.dirty |= ANV_CMD_DIRTY_PIPELINE;
+ cmd_buffer->state.push_constants_dirty |= pipeline->active_stages;
+
+ /* Apply the dynamic state from the pipeline */
+ cmd_buffer->state.dirty |= pipeline->dynamic_state_mask;
+ anv_dynamic_state_copy(&cmd_buffer->state.dynamic,
+ &pipeline->dynamic_state,
+ pipeline->dynamic_state_mask);
break;
default:
}
}
-void anv_CmdBindDynamicViewportState(
- VkCmdBuffer cmdBuffer,
- VkDynamicViewportState dynamicViewportState)
+void anv_CmdSetViewport(
+ VkCommandBuffer commandBuffer,
+ uint32_t viewportCount,
+ const VkViewport* pViewports)
{
- ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
- ANV_FROM_HANDLE(anv_dynamic_vp_state, vp_state, dynamicViewportState);
+ ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
- cmd_buffer->state.vp_state = vp_state;
- cmd_buffer->state.dirty |= ANV_CMD_BUFFER_VP_DIRTY;
+ cmd_buffer->state.dynamic.viewport.count = viewportCount;
+ memcpy(cmd_buffer->state.dynamic.viewport.viewports,
+ pViewports, viewportCount * sizeof(*pViewports));
+
+ cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_VIEWPORT;
+}
+
+void anv_CmdSetScissor(
+ VkCommandBuffer commandBuffer,
+ uint32_t scissorCount,
+ const VkRect2D* pScissors)
+{
+ ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+
+ cmd_buffer->state.dynamic.scissor.count = scissorCount;
+ memcpy(cmd_buffer->state.dynamic.scissor.scissors,
+ pScissors, scissorCount * sizeof(*pScissors));
+
+ cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_SCISSOR;
}
-void anv_CmdBindDynamicRasterState(
- VkCmdBuffer cmdBuffer,
- VkDynamicRasterState dynamicRasterState)
+void anv_CmdSetLineWidth(
+ VkCommandBuffer commandBuffer,
+ float lineWidth)
{
- ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
- ANV_FROM_HANDLE(anv_dynamic_rs_state, rs_state, dynamicRasterState);
+ ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
- cmd_buffer->state.rs_state = rs_state;
- cmd_buffer->state.dirty |= ANV_CMD_BUFFER_RS_DIRTY;
+ cmd_buffer->state.dynamic.line_width = lineWidth;
+ cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH;
}
-void anv_CmdBindDynamicColorBlendState(
- VkCmdBuffer cmdBuffer,
- VkDynamicColorBlendState dynamicColorBlendState)
+void anv_CmdSetDepthBias(
+ VkCommandBuffer commandBuffer,
+ float depthBiasConstantFactor,
+ float depthBiasClamp,
+ float depthBiasSlopeFactor)
{
- ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
- ANV_FROM_HANDLE(anv_dynamic_cb_state, cb_state, dynamicColorBlendState);
+ ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+
+ cmd_buffer->state.dynamic.depth_bias.bias = depthBiasConstantFactor;
+ cmd_buffer->state.dynamic.depth_bias.clamp = depthBiasClamp;
+ cmd_buffer->state.dynamic.depth_bias.slope = depthBiasSlopeFactor;
- cmd_buffer->state.cb_state = cb_state;
- cmd_buffer->state.dirty |= ANV_CMD_BUFFER_CB_DIRTY;
+ cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS;
}
-void anv_CmdBindDynamicDepthStencilState(
- VkCmdBuffer cmdBuffer,
- VkDynamicDepthStencilState dynamicDepthStencilState)
+void anv_CmdSetBlendConstants(
+ VkCommandBuffer commandBuffer,
+ const float blendConstants[4])
{
- ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
- ANV_FROM_HANDLE(anv_dynamic_ds_state, ds_state, dynamicDepthStencilState);
+ ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+
+ memcpy(cmd_buffer->state.dynamic.blend_constants,
+ blendConstants, sizeof(float) * 4);
- cmd_buffer->state.ds_state = ds_state;
- cmd_buffer->state.dirty |= ANV_CMD_BUFFER_DS_DIRTY;
+ cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS;
+}
+
+void anv_CmdSetDepthBounds(
+ VkCommandBuffer commandBuffer,
+ float minDepthBounds,
+ float maxDepthBounds)
+{
+ ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+
+ cmd_buffer->state.dynamic.depth_bounds.min = minDepthBounds;
+ cmd_buffer->state.dynamic.depth_bounds.max = maxDepthBounds;
+
+ cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS;
+}
+
+void anv_CmdSetStencilCompareMask(
+ VkCommandBuffer commandBuffer,
+ VkStencilFaceFlags faceMask,
+ uint32_t compareMask)
+{
+ ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+
+ if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
+ cmd_buffer->state.dynamic.stencil_compare_mask.front = compareMask;
+ if (faceMask & VK_STENCIL_FACE_BACK_BIT)
+ cmd_buffer->state.dynamic.stencil_compare_mask.back = compareMask;
+
+ cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK;
+}
+
+void anv_CmdSetStencilWriteMask(
+ VkCommandBuffer commandBuffer,
+ VkStencilFaceFlags faceMask,
+ uint32_t writeMask)
+{
+ ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+
+ if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
+ cmd_buffer->state.dynamic.stencil_write_mask.front = writeMask;
+ if (faceMask & VK_STENCIL_FACE_BACK_BIT)
+ cmd_buffer->state.dynamic.stencil_write_mask.back = writeMask;
+
+ cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK;
+}
+
+void anv_CmdSetStencilReference(
+ VkCommandBuffer commandBuffer,
+ VkStencilFaceFlags faceMask,
+ uint32_t reference)
+{
+ ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+
+ if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
+ cmd_buffer->state.dynamic.stencil_reference.front = reference;
+ if (faceMask & VK_STENCIL_FACE_BACK_BIT)
+ cmd_buffer->state.dynamic.stencil_reference.back = reference;
+
+ cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE;
}
void anv_CmdBindDescriptorSets(
- VkCmdBuffer cmdBuffer,
+ VkCommandBuffer commandBuffer,
VkPipelineBindPoint pipelineBindPoint,
VkPipelineLayout _layout,
uint32_t firstSet,
- uint32_t setCount,
+ uint32_t descriptorSetCount,
const VkDescriptorSet* pDescriptorSets,
uint32_t dynamicOffsetCount,
const uint32_t* pDynamicOffsets)
{
- ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
+ ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
ANV_FROM_HANDLE(anv_pipeline_layout, layout, _layout);
struct anv_descriptor_set_layout *set_layout;
- assert(firstSet + setCount < MAX_SETS);
+ assert(firstSet + descriptorSetCount < MAX_SETS);
uint32_t dynamic_slot = 0;
- for (uint32_t i = 0; i < setCount; i++) {
+ for (uint32_t i = 0; i < descriptorSetCount; i++) {
ANV_FROM_HANDLE(anv_descriptor_set, set, pDescriptorSets[i]);
set_layout = layout->set[firstSet + i].layout;
- cmd_buffer->state.descriptors[firstSet + i].set = set;
-
- assert(set_layout->num_dynamic_buffers <
- ARRAY_SIZE(cmd_buffer->state.descriptors[0].dynamic_offsets));
- memcpy(cmd_buffer->state.descriptors[firstSet + i].dynamic_offsets,
- pDynamicOffsets + dynamic_slot,
- set_layout->num_dynamic_buffers * sizeof(*pDynamicOffsets));
-
- cmd_buffer->state.descriptors_dirty |= set_layout->shader_stages;
+ if (cmd_buffer->state.descriptors[firstSet + i] != set) {
+ cmd_buffer->state.descriptors[firstSet + i] = set;
+ cmd_buffer->state.descriptors_dirty |= set_layout->shader_stages;
+ }
- dynamic_slot += set_layout->num_dynamic_buffers;
+ if (set_layout->dynamic_offset_count > 0) {
+ anv_foreach_stage(s, set_layout->shader_stages) {
+ anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, s, dynamic);
+
+ struct anv_push_constants *push =
+ cmd_buffer->state.push_constants[s];
+
+ unsigned d = layout->set[firstSet + i].dynamic_offset_start;
+ const uint32_t *offsets = pDynamicOffsets + dynamic_slot;
+ struct anv_descriptor *desc = set->descriptors;
+
+ for (unsigned b = 0; b < set_layout->binding_count; b++) {
+ if (set_layout->binding[b].dynamic_offset_index < 0)
+ continue;
+
+ unsigned array_size = set_layout->binding[b].array_size;
+ for (unsigned j = 0; j < array_size; j++) {
+ uint32_t range = 0;
+ if (desc->buffer_view)
+ range = desc->buffer_view;
+ push->dynamic[d].offset = *(offsets++);
+ push->dynamic[d].range = range;
+ desc++;
+ d++;
+ }
+ }
+ }
+ cmd_buffer->state.push_constants_dirty |= set_layout->shader_stages;
+ }
}
}
-void anv_CmdBindIndexBuffer(
- VkCmdBuffer cmdBuffer,
- VkBuffer _buffer,
- VkDeviceSize offset,
- VkIndexType indexType)
-{
- ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
- ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
-
- static const uint32_t vk_to_gen_index_type[] = {
- [VK_INDEX_TYPE_UINT16] = INDEX_WORD,
- [VK_INDEX_TYPE_UINT32] = INDEX_DWORD,
- };
-
- struct GEN8_3DSTATE_VF vf = {
- GEN8_3DSTATE_VF_header,
- .CutIndex = (indexType == VK_INDEX_TYPE_UINT16) ? UINT16_MAX : UINT32_MAX,
- };
- GEN8_3DSTATE_VF_pack(NULL, cmd_buffer->state.state_vf, &vf);
-
- cmd_buffer->state.dirty |= ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY;
-
- anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_INDEX_BUFFER,
- .IndexFormat = vk_to_gen_index_type[indexType],
- .MemoryObjectControlState = GEN8_MOCS,
- .BufferStartingAddress = { buffer->bo, buffer->offset + offset },
- .BufferSize = buffer->size - offset);
-}
-
void anv_CmdBindVertexBuffers(
- VkCmdBuffer cmdBuffer,
+ VkCommandBuffer commandBuffer,
uint32_t startBinding,
uint32_t bindingCount,
const VkBuffer* pBuffers,
const VkDeviceSize* pOffsets)
{
- ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
+ ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
struct anv_vertex_binding *vb = cmd_buffer->state.vertex_bindings;
/* We have to defer setting up vertex buffer since we need the buffer
}
}
-static VkResult
-cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
- unsigned stage, struct anv_state *bt_state)
+static void
+add_surface_state_reloc(struct anv_cmd_buffer *cmd_buffer,
+ struct anv_state state, struct anv_bo *bo, uint32_t offset)
+{
+ /* The address goes in SURFACE_STATE dword 1 for gens < 8 and dwords 8 and
+ * 9 for gen8+. We only write the first dword for gen8+ here and rely on
+ * the initial state to set the high bits to 0. */
+
+ const uint32_t dword = cmd_buffer->device->info.gen < 8 ? 1 : 8;
+
+ anv_reloc_list_add(&cmd_buffer->surface_relocs, &cmd_buffer->pool->alloc,
+ state.offset + dword * 4, bo, offset);
+}
+
+const struct anv_format *
+anv_format_for_descriptor_type(VkDescriptorType type)
+{
+ switch (type) {
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+ return anv_format_for_vk_format(VK_FORMAT_R32G32B32A32_SFLOAT);
+
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
+ return anv_format_for_vk_format(VK_FORMAT_UNDEFINED);
+
+ default:
+ unreachable("Invalid descriptor type");
+ }
+}
+
+VkResult
+anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
+ gl_shader_stage stage,
+ struct anv_state *bt_state)
{
struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
struct anv_subpass *subpass = cmd_buffer->state.subpass;
struct anv_pipeline_layout *layout;
- uint32_t attachments, bias, size;
+ uint32_t color_count, bias, state_offset;
- if (stage == VK_SHADER_STAGE_COMPUTE)
- layout = cmd_buffer->state.compute_pipeline->layout;
- else
+ switch (stage) {
+ case MESA_SHADER_FRAGMENT:
layout = cmd_buffer->state.pipeline->layout;
-
- if (stage == VK_SHADER_STAGE_FRAGMENT) {
bias = MAX_RTS;
- attachments = subpass->color_count;
- } else {
+ color_count = subpass->color_count;
+ break;
+ case MESA_SHADER_COMPUTE:
+ layout = cmd_buffer->state.compute_pipeline->layout;
+ bias = 1;
+ color_count = 0;
+ break;
+ default:
+ layout = cmd_buffer->state.pipeline->layout;
bias = 0;
- attachments = 0;
+ color_count = 0;
+ break;
}
/* This is a little awkward: layout can be NULL but we still have to
* targets. */
uint32_t surface_count = layout ? layout->stage[stage].surface_count : 0;
- if (attachments + surface_count == 0)
+ if (color_count + surface_count == 0)
return VK_SUCCESS;
- size = (bias + surface_count) * sizeof(uint32_t);
- *bt_state = anv_cmd_buffer_alloc_surface_state(cmd_buffer, size, 32);
+ *bt_state = anv_cmd_buffer_alloc_binding_table(cmd_buffer,
+ bias + surface_count,
+ &state_offset);
uint32_t *bt_map = bt_state->map;
if (bt_state->map == NULL)
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
- /* This is highly annoying. The Vulkan spec puts the depth-stencil
- * attachments in with the color attachments. Unfortunately, thanks to
- * other aspects of the API, we cana't really saparate them before this
- * point. Therefore, we have to walk all of the attachments but only
- * put the color attachments into the binding table.
- */
- for (uint32_t a = 0; a < attachments; a++) {
- const struct anv_attachment_view *attachment =
+ for (uint32_t a = 0; a < color_count; a++) {
+ const struct anv_image_view *iview =
fb->attachments[subpass->color_attachments[a]];
- assert(attachment->attachment_type == ANV_ATTACHMENT_VIEW_TYPE_COLOR);
- const struct anv_color_attachment_view *view =
- (const struct anv_color_attachment_view *)attachment;
+ bt_map[a] = iview->color_rt_surface_state.offset + state_offset;
+ add_surface_state_reloc(cmd_buffer, iview->color_rt_surface_state,
+ iview->bo, iview->offset);
+ }
- struct anv_state state =
- anv_cmd_buffer_alloc_surface_state(cmd_buffer, 64, 64);
+ if (stage == MESA_SHADER_COMPUTE &&
+ cmd_buffer->state.compute_pipeline->cs_prog_data.uses_num_work_groups) {
+ struct anv_bo *bo = cmd_buffer->state.num_workgroups_bo;
+ uint32_t bo_offset = cmd_buffer->state.num_workgroups_offset;
- if (state.map == NULL)
- return VK_ERROR_OUT_OF_DEVICE_MEMORY;
+ struct anv_state surface_state;
+ surface_state =
+ anv_cmd_buffer_alloc_surface_state(cmd_buffer);
- memcpy(state.map, view->view.surface_state.map, 64);
+ const struct anv_format *format =
+ anv_format_for_descriptor_type(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
+ anv_fill_buffer_surface_state(cmd_buffer->device, surface_state.map,
+ format->surface_format, bo_offset, 12, 1);
- /* The address goes in dwords 8 and 9 of the SURFACE_STATE */
- *(uint64_t *)(state.map + 8 * 4) =
- anv_reloc_list_add(anv_cmd_buffer_current_surface_relocs(cmd_buffer),
- cmd_buffer->device,
- state.offset + 8 * 4,
- view->view.bo, view->view.offset);
+ if (!cmd_buffer->device->info.has_llc)
+ anv_state_clflush(surface_state);
- bt_map[a] = state.offset;
+ bt_map[0] = surface_state.offset + state_offset;
+ add_surface_state_reloc(cmd_buffer, surface_state, bo, bo_offset);
}
if (layout == NULL)
- return VK_SUCCESS;
+ goto out;
- for (uint32_t set = 0; set < layout->num_sets; set++) {
- struct anv_descriptor_set_binding *d = &cmd_buffer->state.descriptors[set];
- struct anv_descriptor_set_layout *set_layout = layout->set[set].layout;
- struct anv_descriptor_slot *surface_slots =
- set_layout->stage[stage].surface_start;
+ if (layout->stage[stage].image_count > 0) {
+ VkResult result =
+ anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, stage, images);
+ if (result != VK_SUCCESS)
+ return result;
- uint32_t start = bias + layout->set[set].surface_start[stage];
+ cmd_buffer->state.push_constants_dirty |= 1 << stage;
+ }
+
+ uint32_t image = 0;
+ for (uint32_t s = 0; s < layout->stage[stage].surface_count; s++) {
+ struct anv_pipeline_binding *binding =
+ &layout->stage[stage].surface_to_descriptor[s];
+ struct anv_descriptor_set *set =
+ cmd_buffer->state.descriptors[binding->set];
+ struct anv_descriptor *desc = &set->descriptors[binding->offset];
+
+ struct anv_state surface_state;
+ struct anv_bo *bo;
+ uint32_t bo_offset;
+
+ switch (desc->type) {
+ case VK_DESCRIPTOR_TYPE_SAMPLER:
+ /* Nothing for us to do here */
+ continue;
+
+ case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+ case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+ case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
+ surface_state = desc->image_view->nonrt_surface_state;
+ bo = desc->image_view->bo;
+ bo_offset = desc->image_view->offset;
+ break;
- for (uint32_t b = 0; b < set_layout->stage[stage].surface_count; b++) {
- struct anv_surface_view *view =
- d->set->descriptors[surface_slots[b].index].view;
+ case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: {
+ surface_state = desc->image_view->storage_surface_state;
+ bo = desc->image_view->bo;
+ bo_offset = desc->image_view->offset;
- if (!view)
- continue;
+ struct brw_image_param *image_param =
+ &cmd_buffer->state.push_constants[stage]->images[image++];
- struct anv_state state =
- anv_cmd_buffer_alloc_surface_state(cmd_buffer, 64, 64);
+ anv_image_view_fill_image_param(cmd_buffer->device, desc->image_view,
+ image_param);
+ image_param->surface_idx = bias + s;
+ break;
+ }
- if (state.map == NULL)
- return VK_ERROR_OUT_OF_DEVICE_MEMORY;
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
+ case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+ surface_state = desc->buffer_view->surface_state;
+ bo = desc->buffer_view->bo;
+ bo_offset = desc->buffer_view->offset;
+ break;
- uint32_t offset;
- if (surface_slots[b].dynamic_slot >= 0) {
- uint32_t dynamic_offset =
- d->dynamic_offsets[surface_slots[b].dynamic_slot];
+ case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+ surface_state = desc->buffer_view->storage_surface_state;
+ bo = desc->buffer_view->bo;
+ bo_offset = desc->buffer_view->offset;
- offset = view->offset + dynamic_offset;
- gen8_fill_buffer_surface_state(state.map, view->format, offset,
- view->range - dynamic_offset);
- } else {
- offset = view->offset;
- memcpy(state.map, view->surface_state.map, 64);
- }
+ struct brw_image_param *image_param =
+ &cmd_buffer->state.push_constants[stage]->images[image++];
- /* The address goes in dwords 8 and 9 of the SURFACE_STATE */
- *(uint64_t *)(state.map + 8 * 4) =
- anv_reloc_list_add(anv_cmd_buffer_current_surface_relocs(cmd_buffer),
- cmd_buffer->device,
- state.offset + 8 * 4,
- view->bo, offset);
+ anv_buffer_view_fill_image_param(cmd_buffer->device, desc->buffer_view,
+ image_param);
+ image_param->surface_idx = bias + s;
+ break;
- bt_map[start + b] = state.offset;
+ default:
+ assert(!"Invalid descriptor type");
+ continue;
}
+
+ bt_map[bias + s] = surface_state.offset + state_offset;
+ add_surface_state_reloc(cmd_buffer, surface_state, bo, bo_offset);
}
+ assert(image == layout->stage[stage].image_count);
+
+ out:
+ if (!cmd_buffer->device->info.has_llc)
+ anv_state_clflush(*bt_state);
return VK_SUCCESS;
}
-static VkResult
-cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer,
- unsigned stage, struct anv_state *state)
+VkResult
+anv_cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer,
+ gl_shader_stage stage, struct anv_state *state)
{
struct anv_pipeline_layout *layout;
uint32_t sampler_count;
- if (stage == VK_SHADER_STAGE_COMPUTE)
+ if (stage == MESA_SHADER_COMPUTE)
layout = cmd_buffer->state.compute_pipeline->layout;
else
layout = cmd_buffer->state.pipeline->layout;
if (state->map == NULL)
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
- for (uint32_t set = 0; set < layout->num_sets; set++) {
- struct anv_descriptor_set_binding *d = &cmd_buffer->state.descriptors[set];
- struct anv_descriptor_set_layout *set_layout = layout->set[set].layout;
- struct anv_descriptor_slot *sampler_slots =
- set_layout->stage[stage].sampler_start;
-
- uint32_t start = layout->set[set].sampler_start[stage];
-
- for (uint32_t b = 0; b < set_layout->stage[stage].sampler_count; b++) {
- struct anv_sampler *sampler =
- d->set->descriptors[sampler_slots[b].index].sampler;
+ for (uint32_t s = 0; s < layout->stage[stage].sampler_count; s++) {
+ struct anv_pipeline_binding *binding =
+ &layout->stage[stage].sampler_to_descriptor[s];
+ struct anv_descriptor_set *set =
+ cmd_buffer->state.descriptors[binding->set];
+ struct anv_descriptor *desc = &set->descriptors[binding->offset];
- if (!sampler)
- continue;
+ if (desc->type != VK_DESCRIPTOR_TYPE_SAMPLER &&
+ desc->type != VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
+ continue;
- memcpy(state->map + (start + b) * 16,
- sampler->state, sizeof(sampler->state));
- }
- }
-
- return VK_SUCCESS;
-}
+ struct anv_sampler *sampler = desc->sampler;
-static VkResult
-flush_descriptor_set(struct anv_cmd_buffer *cmd_buffer, uint32_t stage)
-{
- struct anv_state surfaces = { 0, }, samplers = { 0, };
- VkResult result;
+ /* This can happen if we have an unfilled slot since TYPE_SAMPLER
+ * happens to be zero.
+ */
+ if (sampler == NULL)
+ continue;
- result = cmd_buffer_emit_samplers(cmd_buffer, stage, &samplers);
- if (result != VK_SUCCESS)
- return result;
- result = cmd_buffer_emit_binding_table(cmd_buffer, stage, &surfaces);
- if (result != VK_SUCCESS)
- return result;
-
- static const uint32_t sampler_state_opcodes[] = {
- [VK_SHADER_STAGE_VERTEX] = 43,
- [VK_SHADER_STAGE_TESS_CONTROL] = 44, /* HS */
- [VK_SHADER_STAGE_TESS_EVALUATION] = 45, /* DS */
- [VK_SHADER_STAGE_GEOMETRY] = 46,
- [VK_SHADER_STAGE_FRAGMENT] = 47,
- [VK_SHADER_STAGE_COMPUTE] = 0,
- };
-
- static const uint32_t binding_table_opcodes[] = {
- [VK_SHADER_STAGE_VERTEX] = 38,
- [VK_SHADER_STAGE_TESS_CONTROL] = 39,
- [VK_SHADER_STAGE_TESS_EVALUATION] = 40,
- [VK_SHADER_STAGE_GEOMETRY] = 41,
- [VK_SHADER_STAGE_FRAGMENT] = 42,
- [VK_SHADER_STAGE_COMPUTE] = 0,
- };
-
- if (samplers.alloc_size > 0) {
- anv_batch_emit(&cmd_buffer->batch,
- GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS,
- ._3DCommandSubOpcode = sampler_state_opcodes[stage],
- .PointertoVSSamplerState = samplers.offset);
+ memcpy(state->map + (s * 16),
+ sampler->state, sizeof(sampler->state));
}
- if (surfaces.alloc_size > 0) {
- anv_batch_emit(&cmd_buffer->batch,
- GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS,
- ._3DCommandSubOpcode = binding_table_opcodes[stage],
- .PointertoVSBindingTable = surfaces.offset);
- }
+ if (!cmd_buffer->device->info.has_llc)
+ anv_state_clflush(*state);
return VK_SUCCESS;
}
-static void
-flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer)
-{
- uint32_t s, dirty = cmd_buffer->state.descriptors_dirty &
- cmd_buffer->state.pipeline->active_stages;
-
- VkResult result = VK_SUCCESS;
- for_each_bit(s, dirty) {
- result = flush_descriptor_set(cmd_buffer, s);
- if (result != VK_SUCCESS)
- break;
- }
-
- if (result != VK_SUCCESS) {
- assert(result == VK_ERROR_OUT_OF_DEVICE_MEMORY);
-
- result = anv_cmd_buffer_new_surface_state_bo(cmd_buffer);
- assert(result == VK_SUCCESS);
-
- /* Re-emit state base addresses so we get the new surface state base
- * address before we start emitting binding tables etc.
- */
- anv_cmd_buffer_emit_state_base_address(cmd_buffer);
-
- /* Re-emit all active binding tables */
- for_each_bit(s, cmd_buffer->state.pipeline->active_stages) {
- result = flush_descriptor_set(cmd_buffer, s);
-
- /* It had better succeed this time */
- assert(result == VK_SUCCESS);
- }
- }
-
- cmd_buffer->state.descriptors_dirty &= ~cmd_buffer->state.pipeline->active_stages;
-}
-
-static struct anv_state
+struct anv_state
anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer,
- uint32_t *a, uint32_t dwords, uint32_t alignment)
+ const void *data, uint32_t size, uint32_t alignment)
{
struct anv_state state;
- state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
- dwords * 4, alignment);
- memcpy(state.map, a, dwords * 4);
+ state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, alignment);
+ memcpy(state.map, data, size);
+
+ if (!cmd_buffer->device->info.has_llc)
+ anv_state_clflush(state);
- VG(VALGRIND_CHECK_MEM_IS_DEFINED(state.map, dwords * 4));
+ VG(VALGRIND_CHECK_MEM_IS_DEFINED(state.map, size));
return state;
}
-static struct anv_state
+struct anv_state
anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer,
uint32_t *a, uint32_t *b,
uint32_t dwords, uint32_t alignment)
for (uint32_t i = 0; i < dwords; i++)
p[i] = a[i] | b[i];
+ if (!cmd_buffer->device->info.has_llc)
+ anv_state_clflush(state);
+
VG(VALGRIND_CHECK_MEM_IS_DEFINED(p, dwords * 4));
return state;
}
-static VkResult
-flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer)
-{
- struct anv_device *device = cmd_buffer->device;
- struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
- struct anv_state surfaces = { 0, }, samplers = { 0, };
- VkResult result;
-
- result = cmd_buffer_emit_samplers(cmd_buffer,
- VK_SHADER_STAGE_COMPUTE, &samplers);
- if (result != VK_SUCCESS)
- return result;
- result = cmd_buffer_emit_binding_table(cmd_buffer,
- VK_SHADER_STAGE_COMPUTE, &surfaces);
- if (result != VK_SUCCESS)
- return result;
-
- struct GEN8_INTERFACE_DESCRIPTOR_DATA desc = {
- .KernelStartPointer = pipeline->cs_simd,
- .KernelStartPointerHigh = 0,
- .BindingTablePointer = surfaces.offset,
- .BindingTableEntryCount = 0,
- .SamplerStatePointer = samplers.offset,
- .SamplerCount = 0,
- .NumberofThreadsinGPGPUThreadGroup = 0 /* FIXME: Really? */
- };
-
- uint32_t size = GEN8_INTERFACE_DESCRIPTOR_DATA_length * sizeof(uint32_t);
- struct anv_state state =
- anv_state_pool_alloc(&device->dynamic_state_pool, size, 64);
-
- GEN8_INTERFACE_DESCRIPTOR_DATA_pack(NULL, state.map, &desc);
-
- anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD,
- .InterfaceDescriptorTotalLength = size,
- .InterfaceDescriptorDataStartAddress = state.offset);
-
- return VK_SUCCESS;
-}
-
-static void
-anv_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer)
+void
+anv_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer,
+ struct anv_subpass *subpass)
{
- struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
- VkResult result;
-
- assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT);
-
- if (cmd_buffer->state.current_pipeline != GPGPU) {
- anv_batch_emit(&cmd_buffer->batch, GEN8_PIPELINE_SELECT,
- .PipelineSelection = GPGPU);
- cmd_buffer->state.current_pipeline = GPGPU;
- }
-
- if (cmd_buffer->state.compute_dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY)
- anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch);
-
- if ((cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) ||
- (cmd_buffer->state.compute_dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY)) {
- result = flush_compute_descriptor_set(cmd_buffer);
- assert(result == VK_SUCCESS);
- cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE;
+ switch (cmd_buffer->device->info.gen) {
+ case 7:
+ gen7_cmd_buffer_begin_subpass(cmd_buffer, subpass);
+ break;
+ case 8:
+ gen8_cmd_buffer_begin_subpass(cmd_buffer, subpass);
+ break;
+ case 9:
+ gen9_cmd_buffer_begin_subpass(cmd_buffer, subpass);
+ break;
+ default:
+ unreachable("unsupported gen\n");
}
-
- cmd_buffer->state.compute_dirty = 0;
}
-static void
-anv_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer)
+struct anv_state
+anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer,
+ gl_shader_stage stage)
{
- struct anv_pipeline *pipeline = cmd_buffer->state.pipeline;
- uint32_t *p;
+ struct anv_push_constants *data =
+ cmd_buffer->state.push_constants[stage];
+ struct brw_stage_prog_data *prog_data =
+ cmd_buffer->state.pipeline->prog_data[stage];
- uint32_t vb_emit = cmd_buffer->state.vb_dirty & pipeline->vb_used;
+ /* If we don't actually have any push constants, bail. */
+ if (data == NULL || prog_data->nr_params == 0)
+ return (struct anv_state) { .offset = 0 };
- assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0);
-
- if (cmd_buffer->state.current_pipeline != _3D) {
- anv_batch_emit(&cmd_buffer->batch, GEN8_PIPELINE_SELECT,
- .PipelineSelection = _3D);
- cmd_buffer->state.current_pipeline = _3D;
- }
-
- if (vb_emit) {
- const uint32_t num_buffers = __builtin_popcount(vb_emit);
- const uint32_t num_dwords = 1 + num_buffers * 4;
-
- p = anv_batch_emitn(&cmd_buffer->batch, num_dwords,
- GEN8_3DSTATE_VERTEX_BUFFERS);
- uint32_t vb, i = 0;
- for_each_bit(vb, vb_emit) {
- struct anv_buffer *buffer = cmd_buffer->state.vertex_bindings[vb].buffer;
- uint32_t offset = cmd_buffer->state.vertex_bindings[vb].offset;
-
- struct GEN8_VERTEX_BUFFER_STATE state = {
- .VertexBufferIndex = vb,
- .MemoryObjectControlState = GEN8_MOCS,
- .AddressModifyEnable = true,
- .BufferPitch = pipeline->binding_stride[vb],
- .BufferStartingAddress = { buffer->bo, buffer->offset + offset },
- .BufferSize = buffer->size - offset
- };
-
- GEN8_VERTEX_BUFFER_STATE_pack(&cmd_buffer->batch, &p[1 + i * 4], &state);
- i++;
- }
- }
-
- if (cmd_buffer->state.dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY) {
- /* If somebody compiled a pipeline after starting a command buffer the
- * scratch bo may have grown since we started this cmd buffer (and
- * emitted STATE_BASE_ADDRESS). If we're binding that pipeline now,
- * reemit STATE_BASE_ADDRESS so that we use the bigger scratch bo. */
- if (cmd_buffer->state.scratch_size < pipeline->total_scratch)
- anv_cmd_buffer_emit_state_base_address(cmd_buffer);
-
- anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch);
- }
-
- if (cmd_buffer->state.descriptors_dirty)
- flush_descriptor_sets(cmd_buffer);
-
- if (cmd_buffer->state.dirty & ANV_CMD_BUFFER_VP_DIRTY) {
- struct anv_dynamic_vp_state *vp_state = cmd_buffer->state.vp_state;
- anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_SCISSOR_STATE_POINTERS,
- .ScissorRectPointer = vp_state->scissor.offset);
- anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC,
- .CCViewportPointer = vp_state->cc_vp.offset);
- anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP,
- .SFClipViewportPointer = vp_state->sf_clip_vp.offset);
- }
-
- if (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY |
- ANV_CMD_BUFFER_RS_DIRTY)) {
- anv_batch_emit_merge(&cmd_buffer->batch,
- cmd_buffer->state.rs_state->state_sf,
- pipeline->state_sf);
- anv_batch_emit_merge(&cmd_buffer->batch,
- cmd_buffer->state.rs_state->state_raster,
- pipeline->state_raster);
- }
-
- if (cmd_buffer->state.ds_state &&
- (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY |
- ANV_CMD_BUFFER_DS_DIRTY))) {
- anv_batch_emit_merge(&cmd_buffer->batch,
- cmd_buffer->state.ds_state->state_wm_depth_stencil,
- pipeline->state_wm_depth_stencil);
- }
-
- if (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_CB_DIRTY |
- ANV_CMD_BUFFER_DS_DIRTY)) {
- struct anv_state state;
- if (cmd_buffer->state.ds_state == NULL)
- state = anv_cmd_buffer_emit_dynamic(cmd_buffer,
- cmd_buffer->state.cb_state->state_color_calc,
- GEN8_COLOR_CALC_STATE_length, 64);
- else if (cmd_buffer->state.cb_state == NULL)
- state = anv_cmd_buffer_emit_dynamic(cmd_buffer,
- cmd_buffer->state.ds_state->state_color_calc,
- GEN8_COLOR_CALC_STATE_length, 64);
- else
- state = anv_cmd_buffer_merge_dynamic(cmd_buffer,
- cmd_buffer->state.ds_state->state_color_calc,
- cmd_buffer->state.cb_state->state_color_calc,
- GEN8_COLOR_CALC_STATE_length, 64);
-
- anv_batch_emit(&cmd_buffer->batch,
- GEN8_3DSTATE_CC_STATE_POINTERS,
- .ColorCalcStatePointer = state.offset,
- .ColorCalcStatePointerValid = true);
+ struct anv_state state =
+ anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
+ prog_data->nr_params * sizeof(float),
+ 32 /* bottom 5 bits MBZ */);
+
+ /* Walk through the param array and fill the buffer with data */
+ uint32_t *u32_map = state.map;
+ for (unsigned i = 0; i < prog_data->nr_params; i++) {
+ uint32_t offset = (uintptr_t)prog_data->param[i];
+ u32_map[i] = *(uint32_t *)((uint8_t *)data + offset);
}
- if (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY |
- ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY)) {
- anv_batch_emit_merge(&cmd_buffer->batch,
- cmd_buffer->state.state_vf, pipeline->state_vf);
- }
+ if (!cmd_buffer->device->info.has_llc)
+ anv_state_clflush(state);
- cmd_buffer->state.vb_dirty &= ~vb_emit;
- cmd_buffer->state.dirty = 0;
-}
-
-void anv_CmdDraw(
- VkCmdBuffer cmdBuffer,
- uint32_t firstVertex,
- uint32_t vertexCount,
- uint32_t firstInstance,
- uint32_t instanceCount)
-{
- ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
-
- anv_cmd_buffer_flush_state(cmd_buffer);
-
- anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE,
- .VertexAccessType = SEQUENTIAL,
- .VertexCountPerInstance = vertexCount,
- .StartVertexLocation = firstVertex,
- .InstanceCount = instanceCount,
- .StartInstanceLocation = firstInstance,
- .BaseVertexLocation = 0);
-}
-
-void anv_CmdDrawIndexed(
- VkCmdBuffer cmdBuffer,
- uint32_t firstIndex,
- uint32_t indexCount,
- int32_t vertexOffset,
- uint32_t firstInstance,
- uint32_t instanceCount)
-{
- ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
-
- anv_cmd_buffer_flush_state(cmd_buffer);
-
- anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE,
- .VertexAccessType = RANDOM,
- .VertexCountPerInstance = indexCount,
- .StartVertexLocation = firstIndex,
- .InstanceCount = instanceCount,
- .StartInstanceLocation = firstInstance,
- .BaseVertexLocation = vertexOffset);
-}
-
-static void
-anv_batch_lrm(struct anv_batch *batch,
- uint32_t reg, struct anv_bo *bo, uint32_t offset)
-{
- anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_MEM,
- .RegisterAddress = reg,
- .MemoryAddress = { bo, offset });
-}
-
-static void
-anv_batch_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm)
-{
- anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_IMM,
- .RegisterOffset = reg,
- .DataDWord = imm);
-}
-
-/* Auto-Draw / Indirect Registers */
-#define GEN7_3DPRIM_END_OFFSET 0x2420
-#define GEN7_3DPRIM_START_VERTEX 0x2430
-#define GEN7_3DPRIM_VERTEX_COUNT 0x2434
-#define GEN7_3DPRIM_INSTANCE_COUNT 0x2438
-#define GEN7_3DPRIM_START_INSTANCE 0x243C
-#define GEN7_3DPRIM_BASE_VERTEX 0x2440
-
-void anv_CmdDrawIndirect(
- VkCmdBuffer cmdBuffer,
- VkBuffer _buffer,
- VkDeviceSize offset,
- uint32_t count,
- uint32_t stride)
-{
- ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
- ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
- struct anv_bo *bo = buffer->bo;
- uint32_t bo_offset = buffer->offset + offset;
-
- anv_cmd_buffer_flush_state(cmd_buffer);
-
- anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset);
- anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4);
- anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8);
- anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 12);
- anv_batch_lri(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, 0);
-
- anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE,
- .IndirectParameterEnable = true,
- .VertexAccessType = SEQUENTIAL);
-}
-
-void anv_CmdDrawIndexedIndirect(
- VkCmdBuffer cmdBuffer,
- VkBuffer _buffer,
- VkDeviceSize offset,
- uint32_t count,
- uint32_t stride)
-{
- ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
- ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
- struct anv_bo *bo = buffer->bo;
- uint32_t bo_offset = buffer->offset + offset;
-
- anv_cmd_buffer_flush_state(cmd_buffer);
-
- anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset);
- anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4);
- anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8);
- anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, bo, bo_offset + 12);
- anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 16);
-
- anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE,
- .IndirectParameterEnable = true,
- .VertexAccessType = RANDOM);
-}
-
-void anv_CmdDispatch(
- VkCmdBuffer cmdBuffer,
- uint32_t x,
- uint32_t y,
- uint32_t z)
-{
- ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
- struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
- struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data;
-
- anv_cmd_buffer_flush_compute_state(cmd_buffer);
-
- anv_batch_emit(&cmd_buffer->batch, GEN8_GPGPU_WALKER,
- .SIMDSize = prog_data->simd_size / 16,
- .ThreadDepthCounterMaximum = 0,
- .ThreadHeightCounterMaximum = 0,
- .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max,
- .ThreadGroupIDXDimension = x,
- .ThreadGroupIDYDimension = y,
- .ThreadGroupIDZDimension = z,
- .RightExecutionMask = pipeline->cs_right_mask,
- .BottomExecutionMask = 0xffffffff);
-
- anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_STATE_FLUSH);
+ return state;
}
-#define GPGPU_DISPATCHDIMX 0x2500
-#define GPGPU_DISPATCHDIMY 0x2504
-#define GPGPU_DISPATCHDIMZ 0x2508
-
-void anv_CmdDispatchIndirect(
- VkCmdBuffer cmdBuffer,
- VkBuffer _buffer,
- VkDeviceSize offset)
+struct anv_state
+anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer)
{
- ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
- ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
+ struct anv_push_constants *data =
+ cmd_buffer->state.push_constants[MESA_SHADER_COMPUTE];
struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
- struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data;
- struct anv_bo *bo = buffer->bo;
- uint32_t bo_offset = buffer->offset + offset;
-
- anv_cmd_buffer_flush_compute_state(cmd_buffer);
-
- anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMX, bo, bo_offset);
- anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMY, bo, bo_offset + 4);
- anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMZ, bo, bo_offset + 8);
-
- anv_batch_emit(&cmd_buffer->batch, GEN8_GPGPU_WALKER,
- .IndirectParameterEnable = true,
- .SIMDSize = prog_data->simd_size / 16,
- .ThreadDepthCounterMaximum = 0,
- .ThreadHeightCounterMaximum = 0,
- .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max,
- .RightExecutionMask = pipeline->cs_right_mask,
- .BottomExecutionMask = 0xffffffff);
-
- anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_STATE_FLUSH);
-}
-
-void anv_CmdSetEvent(
- VkCmdBuffer cmdBuffer,
- VkEvent event,
- VkPipelineStageFlags stageMask)
-{
- stub();
-}
-
-void anv_CmdResetEvent(
- VkCmdBuffer cmdBuffer,
- VkEvent event,
- VkPipelineStageFlags stageMask)
-{
- stub();
-}
-
-void anv_CmdWaitEvents(
- VkCmdBuffer cmdBuffer,
- uint32_t eventCount,
- const VkEvent* pEvents,
- VkPipelineStageFlags srcStageMask,
- VkPipelineStageFlags destStageMask,
- uint32_t memBarrierCount,
- const void* const* ppMemBarriers)
-{
- stub();
-}
-
-void anv_CmdPipelineBarrier(
- VkCmdBuffer cmdBuffer,
- VkPipelineStageFlags srcStageMask,
- VkPipelineStageFlags destStageMask,
- VkBool32 byRegion,
- uint32_t memBarrierCount,
- const void* const* ppMemBarriers)
-{
- ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
- uint32_t b, *dw;
+ const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data;
+ const struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
+
+ const unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8;
+ const unsigned push_constant_data_size =
+ (local_id_dwords + prog_data->nr_params) * 4;
+ const unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32);
+ const unsigned param_aligned_count =
+ reg_aligned_constant_size / sizeof(uint32_t);
+
+ /* If we don't actually have any push constants, bail. */
+ if (reg_aligned_constant_size == 0)
+ return (struct anv_state) { .offset = 0 };
+
+ const unsigned threads = pipeline->cs_thread_width_max;
+ const unsigned total_push_constants_size =
+ reg_aligned_constant_size * threads;
+ const unsigned push_constant_alignment =
+ cmd_buffer->device->info.gen < 8 ? 32 : 64;
+ const unsigned aligned_total_push_constants_size =
+ ALIGN(total_push_constants_size, push_constant_alignment);
+ struct anv_state state =
+ anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
+ aligned_total_push_constants_size,
+ push_constant_alignment);
- struct GEN8_PIPE_CONTROL cmd = {
- GEN8_PIPE_CONTROL_header,
- .PostSyncOperation = NoWrite,
- };
+ /* Walk through the param array and fill the buffer with data */
+ uint32_t *u32_map = state.map;
- /* XXX: I think waitEvent is a no-op on our HW. We should verify that. */
+ brw_cs_fill_local_id_payload(cs_prog_data, u32_map, threads,
+ reg_aligned_constant_size);
- if (anv_clear_mask(&srcStageMask, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT)) {
- /* This is just what PIPE_CONTROL does */
+ /* Setup uniform data for the first thread */
+ for (unsigned i = 0; i < prog_data->nr_params; i++) {
+ uint32_t offset = (uintptr_t)prog_data->param[i];
+ u32_map[local_id_dwords + i] = *(uint32_t *)((uint8_t *)data + offset);
}
- if (anv_clear_mask(&srcStageMask,
- VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT |
- VK_PIPELINE_STAGE_VERTEX_INPUT_BIT |
- VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
- VK_PIPELINE_STAGE_TESS_CONTROL_SHADER_BIT |
- VK_PIPELINE_STAGE_TESS_EVALUATION_SHADER_BIT |
- VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT |
- VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
- VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
- VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT |
- VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT)) {
- cmd.StallAtPixelScoreboard = true;
+ /* Copy uniform data from the first thread to every other thread */
+ const size_t uniform_data_size = prog_data->nr_params * sizeof(uint32_t);
+ for (unsigned t = 1; t < threads; t++) {
+ memcpy(&u32_map[t * param_aligned_count + local_id_dwords],
+ &u32_map[local_id_dwords],
+ uniform_data_size);
}
+ if (!cmd_buffer->device->info.has_llc)
+ anv_state_clflush(state);
- if (anv_clear_mask(&srcStageMask,
- VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT |
- VK_PIPELINE_STAGE_TRANSFER_BIT |
- VK_PIPELINE_STAGE_TRANSITION_BIT)) {
- cmd.CommandStreamerStallEnable = true;
- }
-
- if (anv_clear_mask(&srcStageMask, VK_PIPELINE_STAGE_HOST_BIT)) {
- anv_finishme("VK_PIPE_EVENT_CPU_SIGNAL_BIT");
- }
-
- /* On our hardware, all stages will wait for execution as needed. */
- (void)destStageMask;
-
- /* We checked all known VkPipeEventFlags. */
- anv_assert(srcStageMask == 0);
-
- /* XXX: Right now, we're really dumb and just flush whatever categories
- * the app asks for. One of these days we may make this a bit better
- * but right now that's all the hardware allows for in most areas.
- */
- VkMemoryOutputFlags out_flags = 0;
- VkMemoryInputFlags in_flags = 0;
-
- for (uint32_t i = 0; i < memBarrierCount; i++) {
- const struct anv_common *common = ppMemBarriers[i];
- switch (common->sType) {
- case VK_STRUCTURE_TYPE_MEMORY_BARRIER: {
- ANV_COMMON_TO_STRUCT(VkMemoryBarrier, barrier, common);
- out_flags |= barrier->outputMask;
- in_flags |= barrier->inputMask;
- break;
- }
- case VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER: {
- ANV_COMMON_TO_STRUCT(VkBufferMemoryBarrier, barrier, common);
- out_flags |= barrier->outputMask;
- in_flags |= barrier->inputMask;
- break;
- }
- case VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER: {
- ANV_COMMON_TO_STRUCT(VkImageMemoryBarrier, barrier, common);
- out_flags |= barrier->outputMask;
- in_flags |= barrier->inputMask;
- break;
- }
- default:
- unreachable("Invalid memory barrier type");
- }
- }
-
- for_each_bit(b, out_flags) {
- switch ((VkMemoryOutputFlags)(1 << b)) {
- case VK_MEMORY_OUTPUT_HOST_WRITE_BIT:
- break; /* FIXME: Little-core systems */
- case VK_MEMORY_OUTPUT_SHADER_WRITE_BIT:
- cmd.DCFlushEnable = true;
- break;
- case VK_MEMORY_OUTPUT_COLOR_ATTACHMENT_BIT:
- cmd.RenderTargetCacheFlushEnable = true;
- break;
- case VK_MEMORY_OUTPUT_DEPTH_STENCIL_ATTACHMENT_BIT:
- cmd.DepthCacheFlushEnable = true;
- break;
- case VK_MEMORY_OUTPUT_TRANSFER_BIT:
- cmd.RenderTargetCacheFlushEnable = true;
- cmd.DepthCacheFlushEnable = true;
- break;
- default:
- unreachable("Invalid memory output flag");
- }
- }
-
- for_each_bit(b, out_flags) {
- switch ((VkMemoryInputFlags)(1 << b)) {
- case VK_MEMORY_INPUT_HOST_READ_BIT:
- break; /* FIXME: Little-core systems */
- case VK_MEMORY_INPUT_INDIRECT_COMMAND_BIT:
- case VK_MEMORY_INPUT_INDEX_FETCH_BIT:
- case VK_MEMORY_INPUT_VERTEX_ATTRIBUTE_FETCH_BIT:
- cmd.VFCacheInvalidationEnable = true;
- break;
- case VK_MEMORY_INPUT_UNIFORM_READ_BIT:
- cmd.ConstantCacheInvalidationEnable = true;
- /* fallthrough */
- case VK_MEMORY_INPUT_SHADER_READ_BIT:
- cmd.DCFlushEnable = true;
- cmd.TextureCacheInvalidationEnable = true;
- break;
- case VK_MEMORY_INPUT_COLOR_ATTACHMENT_BIT:
- case VK_MEMORY_INPUT_DEPTH_STENCIL_ATTACHMENT_BIT:
- break; /* XXX: Hunh? */
- case VK_MEMORY_INPUT_TRANSFER_BIT:
- cmd.TextureCacheInvalidationEnable = true;
- break;
- }
- }
-
- dw = anv_batch_emit_dwords(&cmd_buffer->batch, GEN8_PIPE_CONTROL_length);
- GEN8_PIPE_CONTROL_pack(&cmd_buffer->batch, dw, &cmd);
+ return state;
}
void anv_CmdPushConstants(
- VkCmdBuffer cmdBuffer,
+ VkCommandBuffer commandBuffer,
VkPipelineLayout layout,
VkShaderStageFlags stageFlags,
- uint32_t start,
- uint32_t length,
- const void* values)
-{
- stub();
-}
-
-static void
-anv_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer)
+ uint32_t offset,
+ uint32_t size,
+ const void* pValues)
{
- struct anv_subpass *subpass = cmd_buffer->state.subpass;
- struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
- const struct anv_depth_stencil_view *view;
+ ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
- static const struct anv_depth_stencil_view null_view =
- { .depth_format = D16_UNORM, .depth_stride = 0, .stencil_stride = 0 };
+ anv_foreach_stage(stage, stageFlags) {
+ anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, stage, client_data);
- if (subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED) {
- const struct anv_attachment_view *aview =
- fb->attachments[subpass->depth_stencil_attachment];
- assert(aview->attachment_type == ANV_ATTACHMENT_VIEW_TYPE_DEPTH_STENCIL);
- view = (const struct anv_depth_stencil_view *)aview;
- } else {
- view = &null_view;
+ memcpy(cmd_buffer->state.push_constants[stage]->client_data + offset,
+ pValues, size);
}
- /* FIXME: Implement the PMA stall W/A */
- /* FIXME: Width and Height are wrong */
-
- anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DEPTH_BUFFER,
- .SurfaceType = SURFTYPE_2D,
- .DepthWriteEnable = view->depth_stride > 0,
- .StencilWriteEnable = view->stencil_stride > 0,
- .HierarchicalDepthBufferEnable = false,
- .SurfaceFormat = view->depth_format,
- .SurfacePitch = view->depth_stride > 0 ? view->depth_stride - 1 : 0,
- .SurfaceBaseAddress = { view->bo, view->depth_offset },
- .Height = cmd_buffer->state.framebuffer->height - 1,
- .Width = cmd_buffer->state.framebuffer->width - 1,
- .LOD = 0,
- .Depth = 1 - 1,
- .MinimumArrayElement = 0,
- .DepthBufferObjectControlState = GEN8_MOCS,
- .RenderTargetViewExtent = 1 - 1,
- .SurfaceQPitch = view->depth_qpitch >> 2);
-
- /* Disable hierarchial depth buffers. */
- anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_HIER_DEPTH_BUFFER);
-
- anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_STENCIL_BUFFER,
- .StencilBufferEnable = view->stencil_stride > 0,
- .StencilBufferObjectControlState = GEN8_MOCS,
- .SurfacePitch = view->stencil_stride > 0 ? view->stencil_stride - 1 : 0,
- .SurfaceBaseAddress = { view->bo, view->stencil_offset },
- .SurfaceQPitch = view->stencil_qpitch >> 2);
-
- /* Clear the clear params. */
- anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_CLEAR_PARAMS);
-}
-
-void
-anv_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer,
- struct anv_subpass *subpass)
-{
- cmd_buffer->state.subpass = subpass;
-
- cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT;
-
- anv_cmd_buffer_emit_depth_stencil(cmd_buffer);
-}
-
-void anv_CmdBeginRenderPass(
- VkCmdBuffer cmdBuffer,
- const VkRenderPassBeginInfo* pRenderPassBegin,
- VkRenderPassContents contents)
-{
- ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
- ANV_FROM_HANDLE(anv_render_pass, pass, pRenderPassBegin->renderPass);
- ANV_FROM_HANDLE(anv_framebuffer, framebuffer, pRenderPassBegin->framebuffer);
-
- cmd_buffer->state.framebuffer = framebuffer;
- cmd_buffer->state.pass = pass;
-
- const VkRect2D *render_area = &pRenderPassBegin->renderArea;
-
- anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DRAWING_RECTANGLE,
- .ClippedDrawingRectangleYMin = render_area->offset.y,
- .ClippedDrawingRectangleXMin = render_area->offset.x,
- .ClippedDrawingRectangleYMax =
- render_area->offset.y + render_area->extent.height - 1,
- .ClippedDrawingRectangleXMax =
- render_area->offset.x + render_area->extent.width - 1,
- .DrawingRectangleOriginY = 0,
- .DrawingRectangleOriginX = 0);
-
- anv_cmd_buffer_clear_attachments(cmd_buffer, pass,
- pRenderPassBegin->pAttachmentClearValues);
-
- anv_cmd_buffer_begin_subpass(cmd_buffer, pass->subpasses);
-}
-
-void anv_CmdNextSubpass(
- VkCmdBuffer cmdBuffer,
- VkRenderPassContents contents)
-{
- ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
-
- assert(cmd_buffer->level == VK_CMD_BUFFER_LEVEL_PRIMARY);
-
- anv_cmd_buffer_begin_subpass(cmd_buffer, cmd_buffer->state.subpass + 1);
-}
-
-void anv_CmdEndRenderPass(
- VkCmdBuffer cmdBuffer)
-{
- ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
-
- /* Emit a flushing pipe control at the end of a pass. This is kind of a
- * hack but it ensures that render targets always actually get written.
- * Eventually, we should do flushing based on image format transitions
- * or something of that nature.
- */
- anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL,
- .PostSyncOperation = NoWrite,
- .RenderTargetCacheFlushEnable = true,
- .InstructionCacheInvalidateEnable = true,
- .DepthCacheFlushEnable = true,
- .VFCacheInvalidationEnable = true,
- .TextureCacheInvalidationEnable = true,
- .CommandStreamerStallEnable = true);
+ cmd_buffer->state.push_constants_dirty |= stageFlags;
}
void anv_CmdExecuteCommands(
- VkCmdBuffer cmdBuffer,
- uint32_t cmdBuffersCount,
- const VkCmdBuffer* pCmdBuffers)
+ VkCommandBuffer commandBuffer,
+ uint32_t commandBuffersCount,
+ const VkCommandBuffer* pCmdBuffers)
{
- ANV_FROM_HANDLE(anv_cmd_buffer, primary, cmdBuffer);
+ ANV_FROM_HANDLE(anv_cmd_buffer, primary, commandBuffer);
- assert(primary->level == VK_CMD_BUFFER_LEVEL_PRIMARY);
+ assert(primary->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
anv_assert(primary->state.subpass == &primary->state.pass->subpasses[0]);
- for (uint32_t i = 0; i < cmdBuffersCount; i++) {
+ for (uint32_t i = 0; i < commandBuffersCount; i++) {
ANV_FROM_HANDLE(anv_cmd_buffer, secondary, pCmdBuffers[i]);
- assert(secondary->level == VK_CMD_BUFFER_LEVEL_SECONDARY);
+ assert(secondary->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY);
anv_cmd_buffer_add_secondary(primary, secondary);
}
VkResult anv_CreateCommandPool(
VkDevice _device,
- const VkCmdPoolCreateInfo* pCreateInfo,
- VkCmdPool* pCmdPool)
+ const VkCommandPoolCreateInfo* pCreateInfo,
+ const VkAllocationCallbacks* pAllocator,
+ VkCommandPool* pCmdPool)
{
ANV_FROM_HANDLE(anv_device, device, _device);
struct anv_cmd_pool *pool;
- pool = anv_device_alloc(device, sizeof(*pool), 8,
- VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
+ pool = anv_alloc2(&device->alloc, pAllocator, sizeof(*pool), 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (pool == NULL)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+ if (pAllocator)
+ pool->alloc = *pAllocator;
+ else
+ pool->alloc = device->alloc;
+
list_inithead(&pool->cmd_buffers);
*pCmdPool = anv_cmd_pool_to_handle(pool);
return VK_SUCCESS;
}
-VkResult anv_DestroyCommandPool(
+void anv_DestroyCommandPool(
VkDevice _device,
- VkCmdPool cmdPool)
+ VkCommandPool commandPool,
+ const VkAllocationCallbacks* pAllocator)
{
ANV_FROM_HANDLE(anv_device, device, _device);
- ANV_FROM_HANDLE(anv_cmd_pool, pool, cmdPool);
+ ANV_FROM_HANDLE(anv_cmd_pool, pool, commandPool);
- anv_ResetCommandPool(_device, cmdPool, 0);
+ anv_ResetCommandPool(_device, commandPool, 0);
- anv_device_free(device, pool);
-
- return VK_SUCCESS;
+ anv_free2(&device->alloc, pAllocator, pool);
}
VkResult anv_ResetCommandPool(
VkDevice device,
- VkCmdPool cmdPool,
- VkCmdPoolResetFlags flags)
+ VkCommandPool commandPool,
+ VkCommandPoolResetFlags flags)
{
- ANV_FROM_HANDLE(anv_cmd_pool, pool, cmdPool);
+ ANV_FROM_HANDLE(anv_cmd_pool, pool, commandPool);
list_for_each_entry_safe(struct anv_cmd_buffer, cmd_buffer,
&pool->cmd_buffers, pool_link) {
- anv_DestroyCommandBuffer(device, anv_cmd_buffer_to_handle(cmd_buffer));
+ anv_cmd_buffer_destroy(cmd_buffer);
}
return VK_SUCCESS;
}
+
+/**
+ * Return NULL if the current subpass has no depthstencil attachment.
+ */
+const struct anv_image_view *
+anv_cmd_buffer_get_depth_stencil_view(const struct anv_cmd_buffer *cmd_buffer)
+{
+ const struct anv_subpass *subpass = cmd_buffer->state.subpass;
+ const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
+
+ if (subpass->depth_stencil_attachment == VK_ATTACHMENT_UNUSED)
+ return NULL;
+
+ const struct anv_image_view *iview =
+ fb->attachments[subpass->depth_stencil_attachment];
+
+ assert(anv_format_is_depth_or_stencil(iview->format));
+
+ return iview;
+}