X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fvulkan%2Fanv_cmd_buffer.c;h=ee437aa6330e15060b90b3a485c20a87f2315080;hb=64e8af69b168ae3e4db0fde7cae4afbb0721b3e5;hp=f8a630bece5c1d7d2f561d5561fb948f68c29b03;hpb=87d02f515b84a03912f5fbf190e40269e423a1ab;p=mesa.git diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index f8a630bece5..ee437aa6330 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -38,45 +38,139 @@ * is concerned, most of anv_cmd_buffer is magic. */ +/* TODO: These are taken from GLES. We should check the Vulkan spec */ +const struct anv_dynamic_state default_dynamic_state = { + .viewport = { + .count = 0, + }, + .scissor = { + .count = 0, + }, + .line_width = 1.0f, + .depth_bias = { + .bias = 0.0f, + .clamp = 0.0f, + .slope = 0.0f, + }, + .blend_constants = { 0.0f, 0.0f, 0.0f, 0.0f }, + .depth_bounds = { + .min = 0.0f, + .max = 1.0f, + }, + .stencil_compare_mask = { + .front = ~0u, + .back = ~0u, + }, + .stencil_write_mask = { + .front = ~0u, + .back = ~0u, + }, + .stencil_reference = { + .front = 0u, + .back = 0u, + }, +}; + +void +anv_dynamic_state_copy(struct anv_dynamic_state *dest, + const struct anv_dynamic_state *src, + uint32_t copy_mask) +{ + if (copy_mask & (1 << VK_DYNAMIC_STATE_VIEWPORT)) { + dest->viewport.count = src->viewport.count; + typed_memcpy(dest->viewport.viewports, src->viewport.viewports, + src->viewport.count); + } + + if (copy_mask & (1 << VK_DYNAMIC_STATE_SCISSOR)) { + dest->scissor.count = src->scissor.count; + typed_memcpy(dest->scissor.scissors, src->scissor.scissors, + src->scissor.count); + } + + if (copy_mask & (1 << VK_DYNAMIC_STATE_LINE_WIDTH)) + dest->line_width = src->line_width; + + if (copy_mask & (1 << VK_DYNAMIC_STATE_DEPTH_BIAS)) + dest->depth_bias = src->depth_bias; + + if (copy_mask & (1 << VK_DYNAMIC_STATE_BLEND_CONSTANTS)) + typed_memcpy(dest->blend_constants, src->blend_constants, 4); + + if (copy_mask & (1 << VK_DYNAMIC_STATE_DEPTH_BOUNDS)) + dest->depth_bounds = src->depth_bounds; + + if (copy_mask & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK)) + dest->stencil_compare_mask = src->stencil_compare_mask; + + if (copy_mask & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)) + dest->stencil_write_mask = src->stencil_write_mask; + + if (copy_mask & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)) + dest->stencil_reference = src->stencil_reference; +} + static void anv_cmd_state_init(struct anv_cmd_state *state) { - state->rs_state = NULL; - state->vp_state = NULL; - state->cb_state = NULL; - state->ds_state = NULL; - memset(&state->state_vf, 0, sizeof(state->state_vf)); memset(&state->descriptors, 0, sizeof(state->descriptors)); memset(&state->push_constants, 0, sizeof(state->push_constants)); - state->dirty = 0; + state->dirty = ~0; state->vb_dirty = 0; state->descriptors_dirty = 0; state->push_constants_dirty = 0; state->pipeline = NULL; - state->vp_state = NULL; - state->rs_state = NULL; - state->ds_state = NULL; + state->restart_index = UINT32_MAX; + state->dynamic = default_dynamic_state; state->gen7.index_buffer = NULL; } -VkResult anv_CreateCommandBuffer( - VkDevice _device, - const VkCmdBufferCreateInfo* pCreateInfo, - VkCmdBuffer* pCmdBuffer) +static VkResult +anv_cmd_buffer_ensure_push_constants_size(struct anv_cmd_buffer *cmd_buffer, + gl_shader_stage stage, uint32_t size) +{ + struct anv_push_constants **ptr = &cmd_buffer->state.push_constants[stage]; + + if (*ptr == NULL) { + *ptr = anv_alloc(&cmd_buffer->pool->alloc, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (*ptr == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } else if ((*ptr)->size < size) { + *ptr = anv_realloc(&cmd_buffer->pool->alloc, *ptr, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (*ptr == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + (*ptr)->size = size; + + return VK_SUCCESS; +} + +#define anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, stage, field) \ + anv_cmd_buffer_ensure_push_constants_size(cmd_buffer, stage, \ + (offsetof(struct anv_push_constants, field) + \ + sizeof(cmd_buffer->state.push_constants[0]->field))) + +static VkResult anv_create_cmd_buffer( + struct anv_device * device, + struct anv_cmd_pool * pool, + VkCommandBufferLevel level, + VkCommandBuffer* pCommandBuffer) { - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_cmd_pool, pool, pCreateInfo->cmdPool); struct anv_cmd_buffer *cmd_buffer; VkResult result; - cmd_buffer = anv_device_alloc(device, sizeof(*cmd_buffer), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + cmd_buffer = anv_alloc(&pool->alloc, sizeof(*cmd_buffer), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (cmd_buffer == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + cmd_buffer->_loader_data.loaderMagic = ICD_LOADER_MAGIC; cmd_buffer->device = device; + cmd_buffer->pool = pool; result = anv_cmd_buffer_init_batch_bo_chain(cmd_buffer); if (result != VK_SUCCESS) @@ -87,8 +181,8 @@ VkResult anv_CreateCommandBuffer( anv_state_stream_init(&cmd_buffer->dynamic_state_stream, &device->dynamic_state_block_pool); - cmd_buffer->level = pCreateInfo->level; - cmd_buffer->opt_flags = 0; + cmd_buffer->level = level; + cmd_buffer->usage_flags = 0; anv_cmd_state_init(&cmd_buffer->state); @@ -101,38 +195,72 @@ VkResult anv_CreateCommandBuffer( list_inithead(&cmd_buffer->pool_link); } - *pCmdBuffer = anv_cmd_buffer_to_handle(cmd_buffer); + *pCommandBuffer = anv_cmd_buffer_to_handle(cmd_buffer); return VK_SUCCESS; - fail: anv_device_free(device, cmd_buffer); + fail: + anv_free(&cmd_buffer->pool->alloc, cmd_buffer); return result; } -VkResult anv_DestroyCommandBuffer( +VkResult anv_AllocateCommandBuffers( VkDevice _device, - VkCmdBuffer _cmd_buffer) + const VkCommandBufferAllocateInfo* pAllocateInfo, + VkCommandBuffer* pCommandBuffers) { ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, _cmd_buffer); + ANV_FROM_HANDLE(anv_cmd_pool, pool, pAllocateInfo->commandPool); + + VkResult result = VK_SUCCESS; + uint32_t i; + for (i = 0; i < pAllocateInfo->bufferCount; i++) { + result = anv_create_cmd_buffer(device, pool, pAllocateInfo->level, + &pCommandBuffers[i]); + if (result != VK_SUCCESS) + break; + } + + if (result != VK_SUCCESS) + anv_FreeCommandBuffers(_device, pAllocateInfo->commandPool, + i, pCommandBuffers); + + return result; +} + +static void +anv_cmd_buffer_destroy(struct anv_cmd_buffer *cmd_buffer) +{ list_del(&cmd_buffer->pool_link); anv_cmd_buffer_fini_batch_bo_chain(cmd_buffer); anv_state_stream_finish(&cmd_buffer->surface_state_stream); anv_state_stream_finish(&cmd_buffer->dynamic_state_stream); - anv_device_free(device, cmd_buffer); - return VK_SUCCESS; + anv_free(&cmd_buffer->pool->alloc, cmd_buffer); +} + +void anv_FreeCommandBuffers( + VkDevice device, + VkCommandPool commandPool, + uint32_t commandBufferCount, + const VkCommandBuffer* pCommandBuffers) +{ + for (uint32_t i = 0; i < commandBufferCount; i++) { + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, pCommandBuffers[i]); + + anv_cmd_buffer_destroy(cmd_buffer); + } } VkResult anv_ResetCommandBuffer( - VkCmdBuffer cmdBuffer, - VkCmdBufferResetFlags flags) + VkCommandBuffer commandBuffer, + VkCommandBufferResetFlags flags) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); anv_cmd_buffer_reset_batch_bo_chain(cmd_buffer); @@ -146,31 +274,39 @@ anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer) { switch (cmd_buffer->device->info.gen) { case 7: - return gen7_cmd_buffer_emit_state_base_address(cmd_buffer); + if (cmd_buffer->device->info.is_haswell) + return gen7_cmd_buffer_emit_state_base_address(cmd_buffer); + else + return gen7_cmd_buffer_emit_state_base_address(cmd_buffer); case 8: return gen8_cmd_buffer_emit_state_base_address(cmd_buffer); + case 9: + return gen9_cmd_buffer_emit_state_base_address(cmd_buffer); default: unreachable("unsupported gen\n"); } } VkResult anv_BeginCommandBuffer( - VkCmdBuffer cmdBuffer, - const VkCmdBufferBeginInfo* pBeginInfo) + VkCommandBuffer commandBuffer, + const VkCommandBufferBeginInfo* pBeginInfo) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + anv_cmd_buffer_reset_batch_bo_chain(cmd_buffer); - cmd_buffer->opt_flags = pBeginInfo->flags; + cmd_buffer->usage_flags = pBeginInfo->flags; - if (cmd_buffer->level == VK_CMD_BUFFER_LEVEL_SECONDARY) { + if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) { cmd_buffer->state.framebuffer = anv_framebuffer_from_handle(pBeginInfo->framebuffer); cmd_buffer->state.pass = anv_render_pass_from_handle(pBeginInfo->renderPass); - /* FIXME: We shouldn't be starting on the first subpass */ - anv_cmd_buffer_begin_subpass(cmd_buffer, - &cmd_buffer->state.pass->subpasses[0]); + struct anv_subpass *subpass = + &cmd_buffer->state.pass->subpasses[pBeginInfo->subpass]; + + anv_cmd_buffer_begin_subpass(cmd_buffer, subpass); } anv_cmd_buffer_emit_state_base_address(cmd_buffer); @@ -180,14 +316,14 @@ VkResult anv_BeginCommandBuffer( } VkResult anv_EndCommandBuffer( - VkCmdBuffer cmdBuffer) + VkCommandBuffer commandBuffer) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); struct anv_device *device = cmd_buffer->device; anv_cmd_buffer_end_batch_buffer(cmd_buffer); - if (cmd_buffer->level == VK_CMD_BUFFER_LEVEL_PRIMARY) { + if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) { /* The algorithm used to compute the validate list is not threadsafe as * it uses the bo->index field. We have to lock the device around it. * Fortunately, the chances for contention here are probably very low. @@ -201,25 +337,31 @@ VkResult anv_EndCommandBuffer( } void anv_CmdBindPipeline( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipeline _pipeline) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline); switch (pipelineBindPoint) { case VK_PIPELINE_BIND_POINT_COMPUTE: cmd_buffer->state.compute_pipeline = pipeline; - cmd_buffer->state.compute_dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY; + cmd_buffer->state.compute_dirty |= ANV_CMD_DIRTY_PIPELINE; cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_COMPUTE_BIT; break; case VK_PIPELINE_BIND_POINT_GRAPHICS: cmd_buffer->state.pipeline = pipeline; cmd_buffer->state.vb_dirty |= pipeline->vb_used; - cmd_buffer->state.dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY; + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_PIPELINE; cmd_buffer->state.push_constants_dirty |= pipeline->active_stages; + + /* Apply the dynamic state from the pipeline */ + cmd_buffer->state.dirty |= pipeline->dynamic_state_mask; + anv_dynamic_state_copy(&cmd_buffer->state.dynamic, + &pipeline->dynamic_state, + pipeline->dynamic_state_mask); break; default: @@ -228,93 +370,191 @@ void anv_CmdBindPipeline( } } -void anv_CmdBindDynamicViewportState( - VkCmdBuffer cmdBuffer, - VkDynamicViewportState dynamicViewportState) +void anv_CmdSetViewport( + VkCommandBuffer commandBuffer, + uint32_t viewportCount, + const VkViewport* pViewports) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + cmd_buffer->state.dynamic.viewport.count = viewportCount; + memcpy(cmd_buffer->state.dynamic.viewport.viewports, + pViewports, viewportCount * sizeof(*pViewports)); + + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_VIEWPORT; +} + +void anv_CmdSetScissor( + VkCommandBuffer commandBuffer, + uint32_t scissorCount, + const VkRect2D* pScissors) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_dynamic_vp_state, vp_state, dynamicViewportState); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - cmd_buffer->state.vp_state = vp_state; - cmd_buffer->state.dirty |= ANV_CMD_BUFFER_VP_DIRTY; + cmd_buffer->state.dynamic.scissor.count = scissorCount; + memcpy(cmd_buffer->state.dynamic.scissor.scissors, + pScissors, scissorCount * sizeof(*pScissors)); + + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_SCISSOR; } -void anv_CmdBindDynamicRasterState( - VkCmdBuffer cmdBuffer, - VkDynamicRasterState dynamicRasterState) +void anv_CmdSetLineWidth( + VkCommandBuffer commandBuffer, + float lineWidth) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_dynamic_rs_state, rs_state, dynamicRasterState); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - cmd_buffer->state.rs_state = rs_state; - cmd_buffer->state.dirty |= ANV_CMD_BUFFER_RS_DIRTY; + cmd_buffer->state.dynamic.line_width = lineWidth; + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH; } -void anv_CmdBindDynamicColorBlendState( - VkCmdBuffer cmdBuffer, - VkDynamicColorBlendState dynamicColorBlendState) +void anv_CmdSetDepthBias( + VkCommandBuffer commandBuffer, + float depthBiasConstantFactor, + float depthBiasClamp, + float depthBiasSlopeFactor) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_dynamic_cb_state, cb_state, dynamicColorBlendState); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + cmd_buffer->state.dynamic.depth_bias.bias = depthBiasConstantFactor; + cmd_buffer->state.dynamic.depth_bias.clamp = depthBiasClamp; + cmd_buffer->state.dynamic.depth_bias.slope = depthBiasSlopeFactor; - cmd_buffer->state.cb_state = cb_state; - cmd_buffer->state.dirty |= ANV_CMD_BUFFER_CB_DIRTY; + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS; } -void anv_CmdBindDynamicDepthStencilState( - VkCmdBuffer cmdBuffer, - VkDynamicDepthStencilState dynamicDepthStencilState) +void anv_CmdSetBlendConstants( + VkCommandBuffer commandBuffer, + const float blendConstants[4]) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_dynamic_ds_state, ds_state, dynamicDepthStencilState); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - cmd_buffer->state.ds_state = ds_state; - cmd_buffer->state.dirty |= ANV_CMD_BUFFER_DS_DIRTY; + memcpy(cmd_buffer->state.dynamic.blend_constants, + blendConstants, sizeof(float) * 4); + + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS; +} + +void anv_CmdSetDepthBounds( + VkCommandBuffer commandBuffer, + float minDepthBounds, + float maxDepthBounds) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + cmd_buffer->state.dynamic.depth_bounds.min = minDepthBounds; + cmd_buffer->state.dynamic.depth_bounds.max = maxDepthBounds; + + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS; +} + +void anv_CmdSetStencilCompareMask( + VkCommandBuffer commandBuffer, + VkStencilFaceFlags faceMask, + uint32_t compareMask) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + if (faceMask & VK_STENCIL_FACE_FRONT_BIT) + cmd_buffer->state.dynamic.stencil_compare_mask.front = compareMask; + if (faceMask & VK_STENCIL_FACE_BACK_BIT) + cmd_buffer->state.dynamic.stencil_compare_mask.back = compareMask; + + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK; +} + +void anv_CmdSetStencilWriteMask( + VkCommandBuffer commandBuffer, + VkStencilFaceFlags faceMask, + uint32_t writeMask) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + if (faceMask & VK_STENCIL_FACE_FRONT_BIT) + cmd_buffer->state.dynamic.stencil_write_mask.front = writeMask; + if (faceMask & VK_STENCIL_FACE_BACK_BIT) + cmd_buffer->state.dynamic.stencil_write_mask.back = writeMask; + + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK; +} + +void anv_CmdSetStencilReference( + VkCommandBuffer commandBuffer, + VkStencilFaceFlags faceMask, + uint32_t reference) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + if (faceMask & VK_STENCIL_FACE_FRONT_BIT) + cmd_buffer->state.dynamic.stencil_reference.front = reference; + if (faceMask & VK_STENCIL_FACE_BACK_BIT) + cmd_buffer->state.dynamic.stencil_reference.back = reference; + + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE; } void anv_CmdBindDescriptorSets( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipelineLayout _layout, uint32_t firstSet, - uint32_t setCount, + uint32_t descriptorSetCount, const VkDescriptorSet* pDescriptorSets, uint32_t dynamicOffsetCount, const uint32_t* pDynamicOffsets) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_pipeline_layout, layout, _layout); struct anv_descriptor_set_layout *set_layout; - assert(firstSet + setCount < MAX_SETS); + assert(firstSet + descriptorSetCount < MAX_SETS); uint32_t dynamic_slot = 0; - for (uint32_t i = 0; i < setCount; i++) { + for (uint32_t i = 0; i < descriptorSetCount; i++) { ANV_FROM_HANDLE(anv_descriptor_set, set, pDescriptorSets[i]); set_layout = layout->set[firstSet + i].layout; - cmd_buffer->state.descriptors[firstSet + i].set = set; + if (cmd_buffer->state.descriptors[firstSet + i] != set) { + cmd_buffer->state.descriptors[firstSet + i] = set; + cmd_buffer->state.descriptors_dirty |= set_layout->shader_stages; + } + + if (set_layout->dynamic_offset_count > 0) { + anv_foreach_stage(s, set_layout->shader_stages) { + anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, s, dynamic); + + struct anv_push_constants *push = + cmd_buffer->state.push_constants[s]; - assert(set_layout->num_dynamic_buffers < - ARRAY_SIZE(cmd_buffer->state.descriptors[0].dynamic_offsets)); - memcpy(cmd_buffer->state.descriptors[firstSet + i].dynamic_offsets, - pDynamicOffsets + dynamic_slot, - set_layout->num_dynamic_buffers * sizeof(*pDynamicOffsets)); + unsigned d = layout->set[firstSet + i].dynamic_offset_start; + const uint32_t *offsets = pDynamicOffsets + dynamic_slot; + struct anv_descriptor *desc = set->descriptors; - cmd_buffer->state.descriptors_dirty |= set_layout->shader_stages; + for (unsigned b = 0; b < set_layout->binding_count; b++) { + if (set_layout->binding[b].dynamic_offset_index < 0) + continue; - dynamic_slot += set_layout->num_dynamic_buffers; + unsigned array_size = set_layout->binding[b].array_size; + for (unsigned j = 0; j < array_size; j++) { + push->dynamic[d].offset = *(offsets++); + push->dynamic[d].range = (desc++)->range; + d++; + } + } + } + cmd_buffer->state.push_constants_dirty |= set_layout->shader_stages; + } } } void anv_CmdBindVertexBuffers( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, uint32_t startBinding, uint32_t bindingCount, const VkBuffer* pBuffers, const VkDeviceSize* pOffsets) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); struct anv_vertex_binding *vb = cmd_buffer->state.vertex_bindings; /* We have to defer setting up vertex buffer since we need the buffer @@ -338,31 +578,66 @@ add_surface_state_reloc(struct anv_cmd_buffer *cmd_buffer, const uint32_t dword = cmd_buffer->device->info.gen < 8 ? 1 : 8; - *(uint32_t *)(state.map + dword * 4) = - anv_reloc_list_add(anv_cmd_buffer_current_surface_relocs(cmd_buffer), - cmd_buffer->device, state.offset + dword * 4, bo, offset); + anv_reloc_list_add(&cmd_buffer->surface_relocs, &cmd_buffer->pool->alloc, + state.offset + dword * 4, bo, offset); +} + +static void +fill_descriptor_buffer_surface_state(struct anv_device *device, void *state, + gl_shader_stage stage, + VkDescriptorType type, + uint32_t offset, uint32_t range) +{ + VkFormat format; + uint32_t stride; + + switch (type) { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + if (device->instance->physicalDevice.compiler->scalar_stage[stage]) { + stride = 4; + } else { + stride = 16; + } + format = VK_FORMAT_R32G32B32A32_SFLOAT; + break; + + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + stride = 1; + format = VK_FORMAT_UNDEFINED; + break; + + default: + unreachable("Invalid descriptor type"); + } + + anv_fill_buffer_surface_state(device, state, + anv_format_for_vk_format(format), + offset, range, stride); } VkResult anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, - unsigned stage, struct anv_state *bt_state) + gl_shader_stage stage, + struct anv_state *bt_state) { struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; struct anv_subpass *subpass = cmd_buffer->state.subpass; struct anv_pipeline_layout *layout; - uint32_t attachments, bias, size; + uint32_t color_count, bias, state_offset; - if (stage == VK_SHADER_STAGE_COMPUTE) + if (stage == MESA_SHADER_COMPUTE) layout = cmd_buffer->state.compute_pipeline->layout; else layout = cmd_buffer->state.pipeline->layout; - if (stage == VK_SHADER_STAGE_FRAGMENT) { + if (stage == MESA_SHADER_FRAGMENT) { bias = MAX_RTS; - attachments = subpass->color_count; + color_count = subpass->color_count; } else { bias = 0; - attachments = 0; + color_count = 0; } /* This is a little awkward: layout can be NULL but we still have to @@ -370,98 +645,104 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, * targets. */ uint32_t surface_count = layout ? layout->stage[stage].surface_count : 0; - if (attachments + surface_count == 0) + if (color_count + surface_count == 0) return VK_SUCCESS; - size = (bias + surface_count) * sizeof(uint32_t); - *bt_state = anv_cmd_buffer_alloc_surface_state(cmd_buffer, size, 32); + *bt_state = anv_cmd_buffer_alloc_binding_table(cmd_buffer, + bias + surface_count, + &state_offset); uint32_t *bt_map = bt_state->map; if (bt_state->map == NULL) return VK_ERROR_OUT_OF_DEVICE_MEMORY; - /* This is highly annoying. The Vulkan spec puts the depth-stencil - * attachments in with the color attachments. Unfortunately, thanks to - * other aspects of the API, we cana't really saparate them before this - * point. Therefore, we have to walk all of the attachments but only - * put the color attachments into the binding table. - */ - for (uint32_t a = 0; a < attachments; a++) { - const struct anv_attachment_view *attachment = + for (uint32_t a = 0; a < color_count; a++) { + const struct anv_image_view *iview = fb->attachments[subpass->color_attachments[a]]; - assert(attachment->attachment_type == ANV_ATTACHMENT_VIEW_TYPE_COLOR); - const struct anv_color_attachment_view *view = - (const struct anv_color_attachment_view *)attachment; - - struct anv_state state = - anv_cmd_buffer_alloc_surface_state(cmd_buffer, 64, 64); - - if (state.map == NULL) - return VK_ERROR_OUT_OF_DEVICE_MEMORY; - - memcpy(state.map, view->view.surface_state.map, 64); - - add_surface_state_reloc(cmd_buffer, state, view->view.bo, view->view.offset); - - bt_map[a] = state.offset; + bt_map[a] = iview->color_rt_surface_state.offset + state_offset; + add_surface_state_reloc(cmd_buffer, iview->color_rt_surface_state, + iview->bo, iview->offset); } if (layout == NULL) - return VK_SUCCESS; - - for (uint32_t set = 0; set < layout->num_sets; set++) { - struct anv_descriptor_set_binding *d = &cmd_buffer->state.descriptors[set]; - struct anv_descriptor_set_layout *set_layout = layout->set[set].layout; - struct anv_descriptor_slot *surface_slots = - set_layout->stage[stage].surface_start; + goto out; + + for (uint32_t s = 0; s < layout->stage[stage].surface_count; s++) { + struct anv_pipeline_binding *binding = + &layout->stage[stage].surface_to_descriptor[s]; + struct anv_descriptor_set *set = + cmd_buffer->state.descriptors[binding->set]; + struct anv_descriptor *desc = &set->descriptors[binding->offset]; + + struct anv_state surface_state; + struct anv_bo *bo; + uint32_t bo_offset; + + switch (desc->type) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + /* Nothing for us to do here */ + continue; + + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: { + bo = desc->buffer->bo; + bo_offset = desc->buffer->offset + desc->offset; + + surface_state = + anv_cmd_buffer_alloc_surface_state(cmd_buffer); + + fill_descriptor_buffer_surface_state(cmd_buffer->device, + surface_state.map, + stage, desc->type, + bo_offset, desc->range); + + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(surface_state); - uint32_t start = bias + layout->set[set].surface_start[stage]; - - for (uint32_t b = 0; b < set_layout->stage[stage].surface_count; b++) { - struct anv_surface_view *view = - d->set->descriptors[surface_slots[b].index].view; - - if (!view) - continue; - - struct anv_state state = - anv_cmd_buffer_alloc_surface_state(cmd_buffer, 64, 64); - - if (state.map == NULL) - return VK_ERROR_OUT_OF_DEVICE_MEMORY; - - uint32_t offset; - if (surface_slots[b].dynamic_slot >= 0) { - uint32_t dynamic_offset = - d->dynamic_offsets[surface_slots[b].dynamic_slot]; + break; + } - offset = view->offset + dynamic_offset; - anv_fill_buffer_surface_state(cmd_buffer->device, - state.map, view->format, offset, - view->range - dynamic_offset); - } else { - offset = view->offset; - memcpy(state.map, view->surface_state.map, 64); - } + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + surface_state = desc->image_view->nonrt_surface_state; + bo = desc->image_view->bo; + bo_offset = desc->image_view->offset; + break; - add_surface_state_reloc(cmd_buffer, state, view->bo, offset); + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + assert(!"Unsupported descriptor type"); + break; - bt_map[start + b] = state.offset; + default: + assert(!"Invalid descriptor type"); + continue; } + + bt_map[bias + s] = surface_state.offset + state_offset; + add_surface_state_reloc(cmd_buffer, surface_state, bo, bo_offset); } + out: + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(*bt_state); + return VK_SUCCESS; } VkResult anv_cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, - unsigned stage, struct anv_state *state) + gl_shader_stage stage, struct anv_state *state) { struct anv_pipeline_layout *layout; uint32_t sampler_count; - if (stage == VK_SHADER_STAGE_COMPUTE) + if (stage == MESA_SHADER_COMPUTE) layout = cmd_buffer->state.compute_pipeline->layout; else layout = cmd_buffer->state.pipeline->layout; @@ -476,124 +757,48 @@ anv_cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, if (state->map == NULL) return VK_ERROR_OUT_OF_DEVICE_MEMORY; - for (uint32_t set = 0; set < layout->num_sets; set++) { - struct anv_descriptor_set_binding *d = &cmd_buffer->state.descriptors[set]; - struct anv_descriptor_set_layout *set_layout = layout->set[set].layout; - struct anv_descriptor_slot *sampler_slots = - set_layout->stage[stage].sampler_start; - - uint32_t start = layout->set[set].sampler_start[stage]; + for (uint32_t s = 0; s < layout->stage[stage].sampler_count; s++) { + struct anv_pipeline_binding *binding = + &layout->stage[stage].sampler_to_descriptor[s]; + struct anv_descriptor_set *set = + cmd_buffer->state.descriptors[binding->set]; + struct anv_descriptor *desc = &set->descriptors[binding->offset]; - for (uint32_t b = 0; b < set_layout->stage[stage].sampler_count; b++) { - struct anv_sampler *sampler = - d->set->descriptors[sampler_slots[b].index].sampler; + if (desc->type != VK_DESCRIPTOR_TYPE_SAMPLER && + desc->type != VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) + continue; - if (!sampler) - continue; - - memcpy(state->map + (start + b) * 16, - sampler->state, sizeof(sampler->state)); - } - } - - return VK_SUCCESS; -} + struct anv_sampler *sampler = desc->sampler; -static VkResult -flush_descriptor_set(struct anv_cmd_buffer *cmd_buffer, uint32_t stage) -{ - struct anv_state surfaces = { 0, }, samplers = { 0, }; - VkResult result; + /* This can happen if we have an unfilled slot since TYPE_SAMPLER + * happens to be zero. + */ + if (sampler == NULL) + continue; - result = anv_cmd_buffer_emit_samplers(cmd_buffer, stage, &samplers); - if (result != VK_SUCCESS) - return result; - result = anv_cmd_buffer_emit_binding_table(cmd_buffer, stage, &surfaces); - if (result != VK_SUCCESS) - return result; - - static const uint32_t sampler_state_opcodes[] = { - [VK_SHADER_STAGE_VERTEX] = 43, - [VK_SHADER_STAGE_TESS_CONTROL] = 44, /* HS */ - [VK_SHADER_STAGE_TESS_EVALUATION] = 45, /* DS */ - [VK_SHADER_STAGE_GEOMETRY] = 46, - [VK_SHADER_STAGE_FRAGMENT] = 47, - [VK_SHADER_STAGE_COMPUTE] = 0, - }; - - static const uint32_t binding_table_opcodes[] = { - [VK_SHADER_STAGE_VERTEX] = 38, - [VK_SHADER_STAGE_TESS_CONTROL] = 39, - [VK_SHADER_STAGE_TESS_EVALUATION] = 40, - [VK_SHADER_STAGE_GEOMETRY] = 41, - [VK_SHADER_STAGE_FRAGMENT] = 42, - [VK_SHADER_STAGE_COMPUTE] = 0, - }; - - if (samplers.alloc_size > 0) { - anv_batch_emit(&cmd_buffer->batch, - GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS, - ._3DCommandSubOpcode = sampler_state_opcodes[stage], - .PointertoVSSamplerState = samplers.offset); + memcpy(state->map + (s * 16), + sampler->state, sizeof(sampler->state)); } - if (surfaces.alloc_size > 0) { - anv_batch_emit(&cmd_buffer->batch, - GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS, - ._3DCommandSubOpcode = binding_table_opcodes[stage], - .PointertoVSBindingTable = surfaces.offset); - } + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(*state); return VK_SUCCESS; } -void -anv_flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) -{ - uint32_t s, dirty = cmd_buffer->state.descriptors_dirty & - cmd_buffer->state.pipeline->active_stages; - - VkResult result = VK_SUCCESS; - for_each_bit(s, dirty) { - result = flush_descriptor_set(cmd_buffer, s); - if (result != VK_SUCCESS) - break; - } - - if (result != VK_SUCCESS) { - assert(result == VK_ERROR_OUT_OF_DEVICE_MEMORY); - - result = anv_cmd_buffer_new_surface_state_bo(cmd_buffer); - assert(result == VK_SUCCESS); - - /* Re-emit state base addresses so we get the new surface state base - * address before we start emitting binding tables etc. - */ - anv_cmd_buffer_emit_state_base_address(cmd_buffer); - - /* Re-emit all active binding tables */ - for_each_bit(s, cmd_buffer->state.pipeline->active_stages) { - result = flush_descriptor_set(cmd_buffer, s); - - /* It had better succeed this time */ - assert(result == VK_SUCCESS); - } - } - - cmd_buffer->state.descriptors_dirty &= ~cmd_buffer->state.pipeline->active_stages; -} - struct anv_state anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer, - uint32_t *a, uint32_t dwords, uint32_t alignment) + const void *data, uint32_t size, uint32_t alignment) { struct anv_state state; - state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, - dwords * 4, alignment); - memcpy(state.map, a, dwords * 4); + state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, alignment); + memcpy(state.map, data, size); - VG(VALGRIND_CHECK_MEM_IS_DEFINED(state.map, dwords * 4)); + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(state); + + VG(VALGRIND_CHECK_MEM_IS_DEFINED(state.map, size)); return state; } @@ -612,6 +817,9 @@ anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer, for (uint32_t i = 0; i < dwords; i++) p[i] = a[i] | b[i]; + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(state); + VG(VALGRIND_CHECK_MEM_IS_DEFINED(p, dwords * 4)); return state; @@ -628,13 +836,16 @@ anv_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, case 8: gen8_cmd_buffer_begin_subpass(cmd_buffer, subpass); break; + case 9: + gen9_cmd_buffer_begin_subpass(cmd_buffer, subpass); + break; default: unreachable("unsupported gen\n"); } } void anv_CmdSetEvent( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkEvent event, VkPipelineStageFlags stageMask) { @@ -642,7 +853,7 @@ void anv_CmdSetEvent( } void anv_CmdResetEvent( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkEvent event, VkPipelineStageFlags stageMask) { @@ -650,7 +861,7 @@ void anv_CmdResetEvent( } void anv_CmdWaitEvents( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent* pEvents, VkPipelineStageFlags srcStageMask, @@ -663,10 +874,10 @@ void anv_CmdWaitEvents( struct anv_state anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer, - VkShaderStage stage) + gl_shader_stage stage) { - struct anv_push_constant_data *data = - cmd_buffer->state.push_constants[stage].data; + struct anv_push_constants *data = + cmd_buffer->state.push_constants[stage]; struct brw_stage_prog_data *prog_data = cmd_buffer->state.pipeline->prog_data[stage]; @@ -686,50 +897,47 @@ anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer, u32_map[i] = *(uint32_t *)((uint8_t *)data + offset); } + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(state); + return state; } void anv_CmdPushConstants( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkPipelineLayout layout, VkShaderStageFlags stageFlags, - uint32_t start, - uint32_t length, - const void* values) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - uint32_t stage; - - for_each_bit(stage, stageFlags) { - if (cmd_buffer->state.push_constants[stage].data == NULL) { - cmd_buffer->state.push_constants[stage].data = - anv_device_alloc(cmd_buffer->device, - sizeof(struct anv_push_constant_data), 8, - VK_SYSTEM_ALLOC_TYPE_INTERNAL); - } + uint32_t offset, + uint32_t size, + const void* pValues) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + anv_foreach_stage(stage, stageFlags) { + anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, stage, client_data); - memcpy(cmd_buffer->state.push_constants[stage].data->client_data + start, - values, length); + memcpy(cmd_buffer->state.push_constants[stage]->client_data + offset, + pValues, size); } cmd_buffer->state.push_constants_dirty |= stageFlags; } void anv_CmdExecuteCommands( - VkCmdBuffer cmdBuffer, - uint32_t cmdBuffersCount, - const VkCmdBuffer* pCmdBuffers) + VkCommandBuffer commandBuffer, + uint32_t commandBuffersCount, + const VkCommandBuffer* pCmdBuffers) { - ANV_FROM_HANDLE(anv_cmd_buffer, primary, cmdBuffer); + ANV_FROM_HANDLE(anv_cmd_buffer, primary, commandBuffer); - assert(primary->level == VK_CMD_BUFFER_LEVEL_PRIMARY); + assert(primary->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); anv_assert(primary->state.subpass == &primary->state.pass->subpasses[0]); - for (uint32_t i = 0; i < cmdBuffersCount; i++) { + for (uint32_t i = 0; i < commandBuffersCount; i++) { ANV_FROM_HANDLE(anv_cmd_buffer, secondary, pCmdBuffers[i]); - assert(secondary->level == VK_CMD_BUFFER_LEVEL_SECONDARY); + assert(secondary->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY); anv_cmd_buffer_add_secondary(primary, secondary); } @@ -737,17 +945,23 @@ void anv_CmdExecuteCommands( VkResult anv_CreateCommandPool( VkDevice _device, - const VkCmdPoolCreateInfo* pCreateInfo, - VkCmdPool* pCmdPool) + const VkCommandPoolCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkCommandPool* pCmdPool) { ANV_FROM_HANDLE(anv_device, device, _device); struct anv_cmd_pool *pool; - pool = anv_device_alloc(device, sizeof(*pool), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + pool = anv_alloc2(&device->alloc, pAllocator, sizeof(*pool), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (pool == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + if (pAllocator) + pool->alloc = *pAllocator; + else + pool->alloc = device->alloc; + list_inithead(&pool->cmd_buffers); *pCmdPool = anv_cmd_pool_to_handle(pool); @@ -755,30 +969,29 @@ VkResult anv_CreateCommandPool( return VK_SUCCESS; } -VkResult anv_DestroyCommandPool( +void anv_DestroyCommandPool( VkDevice _device, - VkCmdPool cmdPool) + VkCommandPool commandPool, + const VkAllocationCallbacks* pAllocator) { ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_cmd_pool, pool, cmdPool); + ANV_FROM_HANDLE(anv_cmd_pool, pool, commandPool); - anv_ResetCommandPool(_device, cmdPool, 0); + anv_ResetCommandPool(_device, commandPool, 0); - anv_device_free(device, pool); - - return VK_SUCCESS; + anv_free2(&device->alloc, pAllocator, pool); } VkResult anv_ResetCommandPool( VkDevice device, - VkCmdPool cmdPool, - VkCmdPoolResetFlags flags) + VkCommandPool commandPool, + VkCommandPoolResetFlags flags) { - ANV_FROM_HANDLE(anv_cmd_pool, pool, cmdPool); + ANV_FROM_HANDLE(anv_cmd_pool, pool, commandPool); list_for_each_entry_safe(struct anv_cmd_buffer, cmd_buffer, &pool->cmd_buffers, pool_link) { - anv_DestroyCommandBuffer(device, anv_cmd_buffer_to_handle(cmd_buffer)); + anv_cmd_buffer_destroy(cmd_buffer); } return VK_SUCCESS; @@ -787,7 +1000,7 @@ VkResult anv_ResetCommandPool( /** * Return NULL if the current subpass has no depthstencil attachment. */ -const struct anv_depth_stencil_view * +const struct anv_image_view * anv_cmd_buffer_get_depth_stencil_view(const struct anv_cmd_buffer *cmd_buffer) { const struct anv_subpass *subpass = cmd_buffer->state.subpass; @@ -796,10 +1009,10 @@ anv_cmd_buffer_get_depth_stencil_view(const struct anv_cmd_buffer *cmd_buffer) if (subpass->depth_stencil_attachment == VK_ATTACHMENT_UNUSED) return NULL; - const struct anv_attachment_view *aview = + const struct anv_image_view *iview = fb->attachments[subpass->depth_stencil_attachment]; - assert(aview->attachment_type == ANV_ATTACHMENT_VIEW_TYPE_DEPTH_STENCIL); + assert(anv_format_is_depth_or_stencil(iview->format)); - return (const struct anv_depth_stencil_view *) aview; + return iview; }