From 6cbdffd79ce2725bcac94cd2ce412461883a51c1 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Tue, 21 Jul 2020 11:33:33 +0200 Subject: [PATCH] tu: Implement VK_KHR_draw_indirect_count Part-of: --- src/freedreno/vulkan/tu_cmd_buffer.c | 73 +++++++++++++++++++++++++++ src/freedreno/vulkan/tu_device.c | 53 +++++++++++++++++++ src/freedreno/vulkan/tu_extensions.py | 1 + 3 files changed, 127 insertions(+) diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c index dafa9e6a9dc..d786a4589a9 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.c +++ b/src/freedreno/vulkan/tu_cmd_buffer.c @@ -3300,6 +3300,79 @@ tu_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, tu_bo_list_add(&cmd->bo_list, buf->bo, MSM_SUBMIT_BO_READ); } +void +tu_CmdDrawIndirectCount(VkCommandBuffer commandBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + VkBuffer countBuffer, + VkDeviceSize countBufferOffset, + uint32_t drawCount, + uint32_t stride) +{ + TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); + TU_FROM_HANDLE(tu_buffer, buf, _buffer); + TU_FROM_HANDLE(tu_buffer, count_buf, countBuffer); + struct tu_cs *cs = &cmd->draw_cs; + + cmd->state.vs_params = (struct tu_draw_state) {}; + + /* It turns out that the firmware we have for a650 only partially fixed the + * problem with CP_DRAW_INDIRECT_MULTI not waiting for WFI's to complete + * before reading indirect parameters. It waits for WFI's before reading + * the draw parameters, but after reading the indirect count :(. + */ + draw_wfm(cmd); + + tu6_draw_common(cmd, cs, false, 0); + + tu_cs_emit_pkt7(cs, CP_DRAW_INDIRECT_MULTI, 8); + tu_cs_emit(cs, tu_draw_initiator(cmd, DI_SRC_SEL_AUTO_INDEX)); + tu_cs_emit(cs, A6XX_CP_DRAW_INDIRECT_MULTI_1_OPCODE(INDIRECT_OP_INDIRECT_COUNT) | + A6XX_CP_DRAW_INDIRECT_MULTI_1_DST_OFF(vs_params_offset(cmd))); + tu_cs_emit(cs, drawCount); + tu_cs_emit_qw(cs, buf->bo->iova + buf->bo_offset + offset); + tu_cs_emit_qw(cs, count_buf->bo->iova + count_buf->bo_offset + countBufferOffset); + tu_cs_emit(cs, stride); + + tu_bo_list_add(&cmd->bo_list, buf->bo, MSM_SUBMIT_BO_READ); + tu_bo_list_add(&cmd->bo_list, count_buf->bo, MSM_SUBMIT_BO_READ); +} + +void +tu_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + VkBuffer countBuffer, + VkDeviceSize countBufferOffset, + uint32_t drawCount, + uint32_t stride) +{ + TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); + TU_FROM_HANDLE(tu_buffer, buf, _buffer); + TU_FROM_HANDLE(tu_buffer, count_buf, countBuffer); + struct tu_cs *cs = &cmd->draw_cs; + + cmd->state.vs_params = (struct tu_draw_state) {}; + + draw_wfm(cmd); + + tu6_draw_common(cmd, cs, true, 0); + + tu_cs_emit_pkt7(cs, CP_DRAW_INDIRECT_MULTI, 11); + tu_cs_emit(cs, tu_draw_initiator(cmd, DI_SRC_SEL_DMA)); + tu_cs_emit(cs, A6XX_CP_DRAW_INDIRECT_MULTI_1_OPCODE(INDIRECT_OP_INDIRECT_COUNT_INDEXED) | + A6XX_CP_DRAW_INDIRECT_MULTI_1_DST_OFF(vs_params_offset(cmd))); + tu_cs_emit(cs, drawCount); + tu_cs_emit_qw(cs, cmd->state.index_va); + tu_cs_emit(cs, cmd->state.max_index_count); + tu_cs_emit_qw(cs, buf->bo->iova + buf->bo_offset + offset); + tu_cs_emit_qw(cs, count_buf->bo->iova + count_buf->bo_offset + countBufferOffset); + tu_cs_emit(cs, stride); + + tu_bo_list_add(&cmd->bo_list, buf->bo, MSM_SUBMIT_BO_READ); + tu_bo_list_add(&cmd->bo_list, count_buf->bo, MSM_SUBMIT_BO_READ); +} + void tu_CmdDrawIndirectByteCountEXT(VkCommandBuffer commandBuffer, uint32_t instanceCount, uint32_t firstInstance, diff --git a/src/freedreno/vulkan/tu_device.c b/src/freedreno/vulkan/tu_device.c index 10eaf65698f..bf204021a56 100644 --- a/src/freedreno/vulkan/tu_device.c +++ b/src/freedreno/vulkan/tu_device.c @@ -669,6 +669,59 @@ tu_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice, features->shaderDrawParameters = true; break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES: { + VkPhysicalDeviceVulkan12Features *features = (void *) ext; + features->samplerMirrorClampToEdge = true; + features->drawIndirectCount = true; + features->storageBuffer8BitAccess = false; + features->uniformAndStorageBuffer8BitAccess = false; + features->storagePushConstant8 = false; + features->shaderBufferInt64Atomics = false; + features->shaderSharedInt64Atomics = false; + features->shaderFloat16 = false; + features->shaderInt8 = false; + + features->descriptorIndexing = false; + features->shaderInputAttachmentArrayDynamicIndexing = false; + features->shaderUniformTexelBufferArrayDynamicIndexing = false; + features->shaderStorageTexelBufferArrayDynamicIndexing = false; + features->shaderUniformBufferArrayNonUniformIndexing = false; + features->shaderSampledImageArrayNonUniformIndexing = false; + features->shaderStorageBufferArrayNonUniformIndexing = false; + features->shaderStorageImageArrayNonUniformIndexing = false; + features->shaderInputAttachmentArrayNonUniformIndexing = false; + features->shaderUniformTexelBufferArrayNonUniformIndexing = false; + features->shaderStorageTexelBufferArrayNonUniformIndexing = false; + features->descriptorBindingUniformBufferUpdateAfterBind = false; + features->descriptorBindingSampledImageUpdateAfterBind = false; + features->descriptorBindingStorageImageUpdateAfterBind = false; + features->descriptorBindingStorageBufferUpdateAfterBind = false; + features->descriptorBindingUniformTexelBufferUpdateAfterBind = false; + features->descriptorBindingStorageTexelBufferUpdateAfterBind = false; + features->descriptorBindingUpdateUnusedWhilePending = false; + features->descriptorBindingPartiallyBound = false; + features->descriptorBindingVariableDescriptorCount = false; + features->runtimeDescriptorArray = false; + + features->samplerFilterMinmax = true; + features->scalarBlockLayout = false; + features->imagelessFramebuffer = false; + features->uniformBufferStandardLayout = false; + features->shaderSubgroupExtendedTypes = false; + features->separateDepthStencilLayouts = false; + features->hostQueryReset = false; + features->timelineSemaphore = false; + features->bufferDeviceAddress = false; + features->bufferDeviceAddressCaptureReplay = false; + features->bufferDeviceAddressMultiDevice = false; + features->vulkanMemoryModel = false; + features->vulkanMemoryModelDeviceScope = false; + features->vulkanMemoryModelAvailabilityVisibilityChains = false; + features->shaderOutputViewportIndex = false; + features->shaderOutputLayer = false; + features->subgroupBroadcastDynamicId = false; + break; + } case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES: { VkPhysicalDeviceVariablePointersFeatures *features = (void *) ext; features->variablePointersStorageBuffer = true; diff --git a/src/freedreno/vulkan/tu_extensions.py b/src/freedreno/vulkan/tu_extensions.py index be078daf7ce..2f73306f8e3 100644 --- a/src/freedreno/vulkan/tu_extensions.py +++ b/src/freedreno/vulkan/tu_extensions.py @@ -88,6 +88,7 @@ EXTENSIONS = [ Extension('VK_EXT_private_data', 1, True), Extension('VK_EXT_shader_stencil_export', 1, True), Extension('VK_EXT_depth_clip_enable', 1, True), + Extension('VK_KHR_draw_indirect_count', 1, True), ] MAX_API_VERSION = VkVersion(MAX_API_VERSION) -- 2.30.2