From e62f8ae15a34b51a1fe1aa5752034e3037646d33 Mon Sep 17 00:00:00 2001 From: Jonathan Marek Date: Tue, 17 Mar 2020 22:28:38 -0400 Subject: [PATCH] turnip: improve vertex input handling Emit vertexBindingDescriptionCount bindings, instead of one per attribute. Verified with dEQP-VK.pipeline.vertex_input.* Signed-off-by: Jonathan Marek Part-of: --- src/freedreno/registers/a6xx.xml | 6 +- src/freedreno/vulkan/tu_cmd_buffer.c | 7 +- src/freedreno/vulkan/tu_device.c | 2 +- src/freedreno/vulkan/tu_pipeline.c | 123 ++++++++---------- src/freedreno/vulkan/tu_private.h | 4 - src/gallium/drivers/freedreno/a6xx/fd6_emit.c | 3 +- 6 files changed, 64 insertions(+), 81 deletions(-) diff --git a/src/freedreno/registers/a6xx.xml b/src/freedreno/registers/a6xx.xml index 967f1382401..044e6da65b1 100644 --- a/src/freedreno/registers/a6xx.xml +++ b/src/freedreno/registers/a6xx.xml @@ -2675,7 +2675,8 @@ to upconvert to 32b float internally? - + + @@ -2723,8 +2724,9 @@ to upconvert to 32b float internally? - + + diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c index f38c07f34e3..e629c941ef2 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.c +++ b/src/freedreno/vulkan/tu_cmd_buffer.c @@ -3439,18 +3439,15 @@ tu6_bind_draw_states(struct tu_cmd_buffer *cmd, (TU_CMD_DIRTY_PIPELINE | TU_CMD_DIRTY_VERTEX_BUFFERS)) { for (uint32_t i = 0; i < pipeline->vi.count; i++) { const uint32_t binding = pipeline->vi.bindings[i]; - const uint32_t stride = pipeline->vi.strides[i]; const struct tu_buffer *buf = cmd->state.vb.buffers[binding]; const VkDeviceSize offset = buf->bo_offset + - cmd->state.vb.offsets[binding] + - pipeline->vi.offsets[i]; + cmd->state.vb.offsets[binding]; const VkDeviceSize size = offset < buf->bo->size ? buf->bo->size - offset : 0; tu_cs_emit_regs(cs, A6XX_VFD_FETCH_BASE(i, .bo = buf->bo, .bo_offset = offset), - A6XX_VFD_FETCH_SIZE(i, size), - A6XX_VFD_FETCH_STRIDE(i, stride)); + A6XX_VFD_FETCH_SIZE(i, size)); } } diff --git a/src/freedreno/vulkan/tu_device.c b/src/freedreno/vulkan/tu_device.c index 6312a84bff9..6412fcaaf42 100644 --- a/src/freedreno/vulkan/tu_device.c +++ b/src/freedreno/vulkan/tu_device.c @@ -779,7 +779,7 @@ tu_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice, .maxDescriptorSetInputAttachments = max_descriptor_set_size, .maxVertexInputAttributes = 32, .maxVertexInputBindings = 32, - .maxVertexInputAttributeOffset = 2047, + .maxVertexInputAttributeOffset = 4095, .maxVertexInputBindingStride = 2048, .maxVertexOutputComponents = 128, .maxTessellationGenerationLevel = 64, diff --git a/src/freedreno/vulkan/tu_pipeline.c b/src/freedreno/vulkan/tu_pipeline.c index 816b3de5c32..adf848d5697 100644 --- a/src/freedreno/vulkan/tu_pipeline.c +++ b/src/freedreno/vulkan/tu_pipeline.c @@ -112,32 +112,6 @@ tu_shader_stage(VkShaderStageFlagBits stage) } } -static const VkVertexInputAttributeDescription * -tu_find_vertex_input_attribute( - const VkPipelineVertexInputStateCreateInfo *vi_info, uint32_t slot) -{ - assert(slot >= VERT_ATTRIB_GENERIC0); - slot -= VERT_ATTRIB_GENERIC0; - for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) { - if (vi_info->pVertexAttributeDescriptions[i].location == slot) - return &vi_info->pVertexAttributeDescriptions[i]; - } - return NULL; -} - -static const VkVertexInputBindingDescription * -tu_find_vertex_input_binding( - const VkPipelineVertexInputStateCreateInfo *vi_info, - const VkVertexInputAttributeDescription *vi_attr) -{ - assert(vi_attr); - for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) { - if (vi_info->pVertexBindingDescriptions[i].binding == vi_attr->binding) - return &vi_info->pVertexBindingDescriptions[i]; - } - return NULL; -} - static bool tu_logic_op_reads_dst(VkLogicOp op) { @@ -1380,61 +1354,76 @@ tu6_emit_program(struct tu_cs *cs, static void tu6_emit_vertex_input(struct tu_cs *cs, const struct ir3_shader_variant *vs, - const VkPipelineVertexInputStateCreateInfo *vi_info, + const VkPipelineVertexInputStateCreateInfo *info, uint8_t bindings[MAX_VERTEX_ATTRIBS], - uint16_t strides[MAX_VERTEX_ATTRIBS], - uint16_t offsets[MAX_VERTEX_ATTRIBS], uint32_t *count) { + uint32_t vfd_fetch_idx = 0; uint32_t vfd_decode_idx = 0; + uint32_t binding_instanced = 0; /* bitmask of instanced bindings */ - for (uint32_t i = 0; i < vs->inputs_count; i++) { - if (vs->inputs[i].sysval || !vs->inputs[i].compmask) - continue; + for (uint32_t i = 0; i < info->vertexBindingDescriptionCount; i++) { + const VkVertexInputBindingDescription *binding = + &info->pVertexBindingDescriptions[i]; - const VkVertexInputAttributeDescription *vi_attr = - tu_find_vertex_input_attribute(vi_info, vs->inputs[i].slot); - const VkVertexInputBindingDescription *vi_binding = - tu_find_vertex_input_binding(vi_info, vi_attr); - assert(vi_attr && vi_binding); + tu_cs_emit_regs(cs, + A6XX_VFD_FETCH_STRIDE(vfd_fetch_idx, binding->stride)); - const struct tu_native_format format = tu6_format_vtx(vi_attr->format); + if (binding->inputRate == VK_VERTEX_INPUT_RATE_INSTANCE) + binding_instanced |= 1 << binding->binding; - uint32_t vfd_decode = A6XX_VFD_DECODE_INSTR_IDX(vfd_decode_idx) | - A6XX_VFD_DECODE_INSTR_FORMAT(format.fmt) | - A6XX_VFD_DECODE_INSTR_SWAP(format.swap) | - A6XX_VFD_DECODE_INSTR_UNK30; - if (vi_binding->inputRate == VK_VERTEX_INPUT_RATE_INSTANCE) - vfd_decode |= A6XX_VFD_DECODE_INSTR_INSTANCED; - if (!vk_format_is_int(vi_attr->format)) - vfd_decode |= A6XX_VFD_DECODE_INSTR_FLOAT; + bindings[vfd_fetch_idx] = binding->binding; + vfd_fetch_idx++; + } - const uint32_t vfd_decode_step_rate = 1; + /* TODO: emit all VFD_DECODE/VFD_DEST_CNTL in same (two) pkt4 */ - const uint32_t vfd_dest_cntl = - A6XX_VFD_DEST_CNTL_INSTR_WRITEMASK(vs->inputs[i].compmask) | - A6XX_VFD_DEST_CNTL_INSTR_REGID(vs->inputs[i].regid); + for (uint32_t i = 0; i < info->vertexAttributeDescriptionCount; i++) { + const VkVertexInputAttributeDescription *attr = + &info->pVertexAttributeDescriptions[i]; + uint32_t binding_idx, input_idx; - tu_cs_emit_pkt4(cs, REG_A6XX_VFD_DECODE(vfd_decode_idx), 2); - tu_cs_emit(cs, vfd_decode); - tu_cs_emit(cs, vfd_decode_step_rate); + for (binding_idx = 0; binding_idx < vfd_fetch_idx; binding_idx++) { + if (bindings[binding_idx] == attr->binding) + break; + } + assert(binding_idx < vfd_fetch_idx); - tu_cs_emit_pkt4(cs, REG_A6XX_VFD_DEST_CNTL(vfd_decode_idx), 1); - tu_cs_emit(cs, vfd_dest_cntl); + for (input_idx = 0; input_idx < vs->inputs_count; input_idx++) { + if ((vs->inputs[input_idx].slot - VERT_ATTRIB_GENERIC0) == attr->location) + break; + } - bindings[vfd_decode_idx] = vi_binding->binding; - strides[vfd_decode_idx] = vi_binding->stride; - offsets[vfd_decode_idx] = vi_attr->offset; + /* attribute not used, skip it */ + if (input_idx == vs->inputs_count) + continue; + + const struct tu_native_format format = tu6_format_vtx(attr->format); + tu_cs_emit_regs(cs, + A6XX_VFD_DECODE_INSTR(vfd_decode_idx, + .idx = binding_idx, + .offset = attr->offset, + .instanced = binding_instanced & (1 << attr->binding), + .format = format.fmt, + .swap = format.swap, + .unk30 = 1, + ._float = !vk_format_is_int(attr->format)), + A6XX_VFD_DECODE_STEP_RATE(vfd_decode_idx, 1)); + + tu_cs_emit_regs(cs, + A6XX_VFD_DEST_CNTL_INSTR(vfd_decode_idx, + .writemask = vs->inputs[input_idx].compmask, + .regid = vs->inputs[input_idx].regid)); vfd_decode_idx++; - assert(vfd_decode_idx <= MAX_VERTEX_ATTRIBS); } - tu_cs_emit_pkt4(cs, REG_A6XX_VFD_CONTROL_0, 1); - tu_cs_emit( - cs, A6XX_VFD_CONTROL_0_VTXCNT(vfd_decode_idx) | (vfd_decode_idx << 8)); + tu_cs_emit_regs(cs, + A6XX_VFD_CONTROL_0( + .fetch_cnt = vfd_fetch_idx, + .decode_cnt = vfd_decode_idx)); - *count = vfd_decode_idx; + *count = vfd_fetch_idx; } static uint32_t @@ -2006,18 +1995,16 @@ tu_pipeline_builder_parse_vertex_input(struct tu_pipeline_builder *builder, struct tu_cs vi_cs; tu_cs_begin_sub_stream(&pipeline->cs, - MAX_VERTEX_ATTRIBS * 5 + 2, &vi_cs); + MAX_VERTEX_ATTRIBS * 7 + 2, &vi_cs); tu6_emit_vertex_input(&vi_cs, &vs->variants[0], vi_info, - pipeline->vi.bindings, pipeline->vi.strides, - pipeline->vi.offsets, &pipeline->vi.count); + pipeline->vi.bindings, &pipeline->vi.count); pipeline->vi.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &vi_cs); if (vs->has_binning_pass) { tu_cs_begin_sub_stream(&pipeline->cs, - MAX_VERTEX_ATTRIBS * 5 + 2, &vi_cs); + MAX_VERTEX_ATTRIBS * 7 + 2, &vi_cs); tu6_emit_vertex_input( &vi_cs, &vs->variants[1], vi_info, pipeline->vi.binning_bindings, - pipeline->vi.binning_strides, pipeline->vi.binning_offsets, &pipeline->vi.binning_count); pipeline->vi.binning_state_ib = tu_cs_end_sub_stream(&pipeline->cs, &vi_cs); diff --git a/src/freedreno/vulkan/tu_private.h b/src/freedreno/vulkan/tu_private.h index bc96ebb6d21..d5d63a6bd79 100644 --- a/src/freedreno/vulkan/tu_private.h +++ b/src/freedreno/vulkan/tu_private.h @@ -1208,13 +1208,9 @@ struct tu_pipeline struct { uint8_t bindings[MAX_VERTEX_ATTRIBS]; - uint16_t strides[MAX_VERTEX_ATTRIBS]; - uint16_t offsets[MAX_VERTEX_ATTRIBS]; uint32_t count; uint8_t binning_bindings[MAX_VERTEX_ATTRIBS]; - uint16_t binning_strides[MAX_VERTEX_ATTRIBS]; - uint16_t binning_offsets[MAX_VERTEX_ATTRIBS]; uint32_t binning_count; struct tu_cs_entry state_ib; diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c index 6f74fbd49c6..8fcb603bfa1 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c @@ -689,7 +689,8 @@ build_vbo_state(struct fd6_emit *emit, const struct ir3_shader_variant *vp) } OUT_PKT4(ring, REG_A6XX_VFD_CONTROL_0, 1); - OUT_RING(ring, A6XX_VFD_CONTROL_0_VTXCNT(j) | (j << 8)); + OUT_RING(ring, A6XX_VFD_CONTROL_0_FETCH_CNT(j) | + A6XX_VFD_CONTROL_0_DECODE_CNT(j)); return ring; } -- 2.30.2