turnip: improve vertex input handling
authorJonathan Marek <jonathan@marek.ca>
Wed, 18 Mar 2020 02:28:38 +0000 (22:28 -0400)
committerMarge Bot <eric+marge@anholt.net>
Thu, 9 Apr 2020 02:05:52 +0000 (02:05 +0000)
Emit vertexBindingDescriptionCount bindings, instead of one per attribute.

Verified with dEQP-VK.pipeline.vertex_input.*

Signed-off-by: Jonathan Marek <jonathan@marek.ca>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4224>

src/freedreno/registers/a6xx.xml
src/freedreno/vulkan/tu_cmd_buffer.c
src/freedreno/vulkan/tu_device.c
src/freedreno/vulkan/tu_pipeline.c
src/freedreno/vulkan/tu_private.h
src/gallium/drivers/freedreno/a6xx/fd6_emit.c

index 967f138240120af6632f7bf94187106bc2a406b1..044e6da65b197d54b046527e0b350050e91f51cc 100644 (file)
@@ -2675,7 +2675,8 @@ to upconvert to 32b float internally?
        <reg32 offset="0x9e72" name="PC_UNKNOWN_9E72"/>
 
        <reg32 offset="0xa000" name="VFD_CONTROL_0">
-               <bitfield name="VTXCNT" low="0" high="5" type="uint"/>
+               <bitfield name="FETCH_CNT" low="0" high="5" type="uint"/>
+               <bitfield name="DECODE_CNT" low="8" high="13" type="uint"/>
        </reg32>
        <reg32 offset="0xa001" name="VFD_CONTROL_1">
                <bitfield name="REGID4VTX" low="0" high="7" type="a3xx_regid"/>
@@ -2723,8 +2724,9 @@ to upconvert to 32b float internally?
        </array>
        <array offset="0xa090" name="VFD_DECODE" stride="2" length="32">
                <reg32 offset="0x0" name="INSTR">
-                       <!-- IDX appears to index into VFD_FETCH[] -->
+                       <!-- IDX and byte OFFSET into VFD_FETCH -->
                        <bitfield name="IDX" low="0" high="4" type="uint"/>
+                       <bitfield name="OFFSET" low="5" high="16"/>
                        <bitfield name="INSTANCED" pos="17" type="boolean"/>
                        <bitfield name="FORMAT" low="20" high="27" type="a6xx_format"/>
                        <bitfield name="SWAP" low="28" high="29" type="a3xx_color_swap"/>
index f38c07f34e367200cb5736842d9a0d44d508877e..e629c941ef23098e4566d23e0313030a556b8d64 100644 (file)
@@ -3439,18 +3439,15 @@ tu6_bind_draw_states(struct tu_cmd_buffer *cmd,
        (TU_CMD_DIRTY_PIPELINE | TU_CMD_DIRTY_VERTEX_BUFFERS)) {
       for (uint32_t i = 0; i < pipeline->vi.count; i++) {
          const uint32_t binding = pipeline->vi.bindings[i];
-         const uint32_t stride = pipeline->vi.strides[i];
          const struct tu_buffer *buf = cmd->state.vb.buffers[binding];
          const VkDeviceSize offset = buf->bo_offset +
-                                     cmd->state.vb.offsets[binding] +
-                                     pipeline->vi.offsets[i];
+                                     cmd->state.vb.offsets[binding];
          const VkDeviceSize size =
             offset < buf->bo->size ? buf->bo->size - offset : 0;
 
          tu_cs_emit_regs(cs,
                          A6XX_VFD_FETCH_BASE(i, .bo = buf->bo, .bo_offset = offset),
-                         A6XX_VFD_FETCH_SIZE(i, size),
-                         A6XX_VFD_FETCH_STRIDE(i, stride));
+                         A6XX_VFD_FETCH_SIZE(i, size));
       }
    }
 
index 6312a84bff9d2fd11ced9269ab50494d56fe4752..6412fcaaf4255453174daf2275c37a933734a497 100644 (file)
@@ -779,7 +779,7 @@ tu_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,
       .maxDescriptorSetInputAttachments = max_descriptor_set_size,
       .maxVertexInputAttributes = 32,
       .maxVertexInputBindings = 32,
-      .maxVertexInputAttributeOffset = 2047,
+      .maxVertexInputAttributeOffset = 4095,
       .maxVertexInputBindingStride = 2048,
       .maxVertexOutputComponents = 128,
       .maxTessellationGenerationLevel = 64,
index 816b3de5c324b1bef08e505b19a637740690aae3..adf848d56975f464dfb00e8fadd406a70ac48b45 100644 (file)
@@ -112,32 +112,6 @@ tu_shader_stage(VkShaderStageFlagBits stage)
    }
 }
 
-static const VkVertexInputAttributeDescription *
-tu_find_vertex_input_attribute(
-   const VkPipelineVertexInputStateCreateInfo *vi_info, uint32_t slot)
-{
-   assert(slot >= VERT_ATTRIB_GENERIC0);
-   slot -= VERT_ATTRIB_GENERIC0;
-   for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
-      if (vi_info->pVertexAttributeDescriptions[i].location == slot)
-         return &vi_info->pVertexAttributeDescriptions[i];
-   }
-   return NULL;
-}
-
-static const VkVertexInputBindingDescription *
-tu_find_vertex_input_binding(
-   const VkPipelineVertexInputStateCreateInfo *vi_info,
-   const VkVertexInputAttributeDescription *vi_attr)
-{
-   assert(vi_attr);
-   for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) {
-      if (vi_info->pVertexBindingDescriptions[i].binding == vi_attr->binding)
-         return &vi_info->pVertexBindingDescriptions[i];
-   }
-   return NULL;
-}
-
 static bool
 tu_logic_op_reads_dst(VkLogicOp op)
 {
@@ -1380,61 +1354,76 @@ tu6_emit_program(struct tu_cs *cs,
 static void
 tu6_emit_vertex_input(struct tu_cs *cs,
                       const struct ir3_shader_variant *vs,
-                      const VkPipelineVertexInputStateCreateInfo *vi_info,
+                      const VkPipelineVertexInputStateCreateInfo *info,
                       uint8_t bindings[MAX_VERTEX_ATTRIBS],
-                      uint16_t strides[MAX_VERTEX_ATTRIBS],
-                      uint16_t offsets[MAX_VERTEX_ATTRIBS],
                       uint32_t *count)
 {
+   uint32_t vfd_fetch_idx = 0;
    uint32_t vfd_decode_idx = 0;
+   uint32_t binding_instanced = 0; /* bitmask of instanced bindings */
 
-   for (uint32_t i = 0; i < vs->inputs_count; i++) {
-      if (vs->inputs[i].sysval || !vs->inputs[i].compmask)
-         continue;
+   for (uint32_t i = 0; i < info->vertexBindingDescriptionCount; i++) {
+      const VkVertexInputBindingDescription *binding =
+         &info->pVertexBindingDescriptions[i];
 
-      const VkVertexInputAttributeDescription *vi_attr =
-         tu_find_vertex_input_attribute(vi_info, vs->inputs[i].slot);
-      const VkVertexInputBindingDescription *vi_binding =
-         tu_find_vertex_input_binding(vi_info, vi_attr);
-      assert(vi_attr && vi_binding);
+      tu_cs_emit_regs(cs,
+                      A6XX_VFD_FETCH_STRIDE(vfd_fetch_idx, binding->stride));
 
-      const struct tu_native_format format = tu6_format_vtx(vi_attr->format);
+      if (binding->inputRate == VK_VERTEX_INPUT_RATE_INSTANCE)
+         binding_instanced |= 1 << binding->binding;
 
-      uint32_t vfd_decode = A6XX_VFD_DECODE_INSTR_IDX(vfd_decode_idx) |
-                            A6XX_VFD_DECODE_INSTR_FORMAT(format.fmt) |
-                            A6XX_VFD_DECODE_INSTR_SWAP(format.swap) |
-                            A6XX_VFD_DECODE_INSTR_UNK30;
-      if (vi_binding->inputRate == VK_VERTEX_INPUT_RATE_INSTANCE)
-         vfd_decode |= A6XX_VFD_DECODE_INSTR_INSTANCED;
-      if (!vk_format_is_int(vi_attr->format))
-         vfd_decode |= A6XX_VFD_DECODE_INSTR_FLOAT;
+      bindings[vfd_fetch_idx] = binding->binding;
+      vfd_fetch_idx++;
+   }
 
-      const uint32_t vfd_decode_step_rate = 1;
+   /* TODO: emit all VFD_DECODE/VFD_DEST_CNTL in same (two) pkt4 */
 
-      const uint32_t vfd_dest_cntl =
-         A6XX_VFD_DEST_CNTL_INSTR_WRITEMASK(vs->inputs[i].compmask) |
-         A6XX_VFD_DEST_CNTL_INSTR_REGID(vs->inputs[i].regid);
+   for (uint32_t i = 0; i < info->vertexAttributeDescriptionCount; i++) {
+      const VkVertexInputAttributeDescription *attr =
+         &info->pVertexAttributeDescriptions[i];
+      uint32_t binding_idx, input_idx;
 
-      tu_cs_emit_pkt4(cs, REG_A6XX_VFD_DECODE(vfd_decode_idx), 2);
-      tu_cs_emit(cs, vfd_decode);
-      tu_cs_emit(cs, vfd_decode_step_rate);
+      for (binding_idx = 0; binding_idx < vfd_fetch_idx; binding_idx++) {
+         if (bindings[binding_idx] == attr->binding)
+            break;
+      }
+      assert(binding_idx < vfd_fetch_idx);
 
-      tu_cs_emit_pkt4(cs, REG_A6XX_VFD_DEST_CNTL(vfd_decode_idx), 1);
-      tu_cs_emit(cs, vfd_dest_cntl);
+      for (input_idx = 0; input_idx < vs->inputs_count; input_idx++) {
+         if ((vs->inputs[input_idx].slot - VERT_ATTRIB_GENERIC0) == attr->location)
+            break;
+      }
 
-      bindings[vfd_decode_idx] = vi_binding->binding;
-      strides[vfd_decode_idx] = vi_binding->stride;
-      offsets[vfd_decode_idx] = vi_attr->offset;
+      /* attribute not used, skip it */
+      if (input_idx == vs->inputs_count)
+         continue;
+
+      const struct tu_native_format format = tu6_format_vtx(attr->format);
+      tu_cs_emit_regs(cs,
+                      A6XX_VFD_DECODE_INSTR(vfd_decode_idx,
+                        .idx = binding_idx,
+                        .offset = attr->offset,
+                        .instanced = binding_instanced & (1 << attr->binding),
+                        .format = format.fmt,
+                        .swap = format.swap,
+                        .unk30 = 1,
+                        ._float = !vk_format_is_int(attr->format)),
+                      A6XX_VFD_DECODE_STEP_RATE(vfd_decode_idx, 1));
+
+      tu_cs_emit_regs(cs,
+                      A6XX_VFD_DEST_CNTL_INSTR(vfd_decode_idx,
+                        .writemask = vs->inputs[input_idx].compmask,
+                        .regid = vs->inputs[input_idx].regid));
 
       vfd_decode_idx++;
-      assert(vfd_decode_idx <= MAX_VERTEX_ATTRIBS);
    }
 
-   tu_cs_emit_pkt4(cs, REG_A6XX_VFD_CONTROL_0, 1);
-   tu_cs_emit(
-      cs, A6XX_VFD_CONTROL_0_VTXCNT(vfd_decode_idx) | (vfd_decode_idx << 8));
+   tu_cs_emit_regs(cs,
+                   A6XX_VFD_CONTROL_0(
+                     .fetch_cnt = vfd_fetch_idx,
+                     .decode_cnt = vfd_decode_idx));
 
-   *count = vfd_decode_idx;
+   *count = vfd_fetch_idx;
 }
 
 static uint32_t
@@ -2006,18 +1995,16 @@ tu_pipeline_builder_parse_vertex_input(struct tu_pipeline_builder *builder,
 
    struct tu_cs vi_cs;
    tu_cs_begin_sub_stream(&pipeline->cs,
-                          MAX_VERTEX_ATTRIBS * 5 + 2, &vi_cs);
+                          MAX_VERTEX_ATTRIBS * 7 + 2, &vi_cs);
    tu6_emit_vertex_input(&vi_cs, &vs->variants[0], vi_info,
-                         pipeline->vi.bindings, pipeline->vi.strides,
-                         pipeline->vi.offsets, &pipeline->vi.count);
+                         pipeline->vi.bindings, &pipeline->vi.count);
    pipeline->vi.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &vi_cs);
 
    if (vs->has_binning_pass) {
       tu_cs_begin_sub_stream(&pipeline->cs,
-                             MAX_VERTEX_ATTRIBS * 5 + 2, &vi_cs);
+                             MAX_VERTEX_ATTRIBS * 7 + 2, &vi_cs);
       tu6_emit_vertex_input(
          &vi_cs, &vs->variants[1], vi_info, pipeline->vi.binning_bindings,
-         pipeline->vi.binning_strides, pipeline->vi.binning_offsets,
          &pipeline->vi.binning_count);
       pipeline->vi.binning_state_ib =
          tu_cs_end_sub_stream(&pipeline->cs, &vi_cs);
index bc96ebb6d2190478161f18c530cadf7d3fe61319..d5d63a6bd79cadda42afb1b1e24184e0fd4e10d4 100644 (file)
@@ -1208,13 +1208,9 @@ struct tu_pipeline
    struct
    {
       uint8_t bindings[MAX_VERTEX_ATTRIBS];
-      uint16_t strides[MAX_VERTEX_ATTRIBS];
-      uint16_t offsets[MAX_VERTEX_ATTRIBS];
       uint32_t count;
 
       uint8_t binning_bindings[MAX_VERTEX_ATTRIBS];
-      uint16_t binning_strides[MAX_VERTEX_ATTRIBS];
-      uint16_t binning_offsets[MAX_VERTEX_ATTRIBS];
       uint32_t binning_count;
 
       struct tu_cs_entry state_ib;
index 6f74fbd49c69b62b2b2866890e229eb060af26a7..8fcb603bfa1e214137613c44e98f4bfc674ffd62 100644 (file)
@@ -689,7 +689,8 @@ build_vbo_state(struct fd6_emit *emit, const struct ir3_shader_variant *vp)
        }
 
        OUT_PKT4(ring, REG_A6XX_VFD_CONTROL_0, 1);
-       OUT_RING(ring, A6XX_VFD_CONTROL_0_VTXCNT(j) | (j << 8));
+       OUT_RING(ring, A6XX_VFD_CONTROL_0_FETCH_CNT(j) |
+                       A6XX_VFD_CONTROL_0_DECODE_CNT(j));
 
        return ring;
 }