turnip: rework format helpers
[mesa.git] / src / freedreno / vulkan / tu_pipeline.c
index fa070fcf957ba1b9631593db04901e22cc9eaffc..adf848d56975f464dfb00e8fadd406a70ac48b45 100644 (file)
@@ -112,32 +112,6 @@ tu_shader_stage(VkShaderStageFlagBits stage)
    }
 }
 
-static const VkVertexInputAttributeDescription *
-tu_find_vertex_input_attribute(
-   const VkPipelineVertexInputStateCreateInfo *vi_info, uint32_t slot)
-{
-   assert(slot >= VERT_ATTRIB_GENERIC0);
-   slot -= VERT_ATTRIB_GENERIC0;
-   for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
-      if (vi_info->pVertexAttributeDescriptions[i].location == slot)
-         return &vi_info->pVertexAttributeDescriptions[i];
-   }
-   return NULL;
-}
-
-static const VkVertexInputBindingDescription *
-tu_find_vertex_input_binding(
-   const VkPipelineVertexInputStateCreateInfo *vi_info,
-   const VkVertexInputAttributeDescription *vi_attr)
-{
-   assert(vi_attr);
-   for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) {
-      if (vi_info->pVertexBindingDescriptions[i].binding == vi_attr->binding)
-         return &vi_info->pVertexBindingDescriptions[i];
-   }
-   return NULL;
-}
-
 static bool
 tu_logic_op_reads_dst(VkLogicOp op)
 {
@@ -438,19 +412,21 @@ static void
 tu6_emit_gs_config(struct tu_cs *cs, struct tu_shader *shader,
                    const struct ir3_shader_variant *gs)
 {
-   uint32_t sp_gs_config = 0;
-   if (gs->instrlen)
-      sp_gs_config |= A6XX_SP_GS_CONFIG_ENABLED;
-
+   bool has_gs = gs->type != MESA_SHADER_NONE;
    tu_cs_emit_pkt4(cs, REG_A6XX_SP_GS_UNKNOWN_A871, 1);
    tu_cs_emit(cs, 0);
 
    tu_cs_emit_pkt4(cs, REG_A6XX_SP_GS_CONFIG, 2);
-   tu_cs_emit(cs, sp_gs_config);
+   tu_cs_emit(cs, COND(has_gs,
+                       A6XX_SP_GS_CONFIG_ENABLED |
+                       A6XX_SP_GS_CONFIG_NIBO(ir3_shader_nibo(gs)) |
+                       A6XX_SP_GS_CONFIG_NTEX(gs->num_samp) |
+                       A6XX_SP_GS_CONFIG_NSAMP(gs->num_samp)));
    tu_cs_emit(cs, gs->instrlen);
 
    tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_GS_CNTL, 1);
-   tu_cs_emit(cs, A6XX_HLSQ_GS_CNTL_CONSTLEN(align(gs->constlen, 4)));
+   tu_cs_emit(cs, COND(has_gs, A6XX_HLSQ_GS_CNTL_ENABLED) |
+                  A6XX_HLSQ_GS_CNTL_CONSTLEN(align(gs->constlen, 4)));
 }
 
 static void
@@ -1378,61 +1354,76 @@ tu6_emit_program(struct tu_cs *cs,
 static void
 tu6_emit_vertex_input(struct tu_cs *cs,
                       const struct ir3_shader_variant *vs,
-                      const VkPipelineVertexInputStateCreateInfo *vi_info,
+                      const VkPipelineVertexInputStateCreateInfo *info,
                       uint8_t bindings[MAX_VERTEX_ATTRIBS],
-                      uint16_t strides[MAX_VERTEX_ATTRIBS],
-                      uint16_t offsets[MAX_VERTEX_ATTRIBS],
                       uint32_t *count)
 {
+   uint32_t vfd_fetch_idx = 0;
    uint32_t vfd_decode_idx = 0;
+   uint32_t binding_instanced = 0; /* bitmask of instanced bindings */
 
-   for (uint32_t i = 0; i < vs->inputs_count; i++) {
-      if (vs->inputs[i].sysval || !vs->inputs[i].compmask)
-         continue;
+   for (uint32_t i = 0; i < info->vertexBindingDescriptionCount; i++) {
+      const VkVertexInputBindingDescription *binding =
+         &info->pVertexBindingDescriptions[i];
 
-      const VkVertexInputAttributeDescription *vi_attr =
-         tu_find_vertex_input_attribute(vi_info, vs->inputs[i].slot);
-      const VkVertexInputBindingDescription *vi_binding =
-         tu_find_vertex_input_binding(vi_info, vi_attr);
-      assert(vi_attr && vi_binding);
+      tu_cs_emit_regs(cs,
+                      A6XX_VFD_FETCH_STRIDE(vfd_fetch_idx, binding->stride));
 
-      const struct tu_native_format format = tu6_format_vtx(vi_attr->format);
+      if (binding->inputRate == VK_VERTEX_INPUT_RATE_INSTANCE)
+         binding_instanced |= 1 << binding->binding;
 
-      uint32_t vfd_decode = A6XX_VFD_DECODE_INSTR_IDX(vfd_decode_idx) |
-                            A6XX_VFD_DECODE_INSTR_FORMAT(format.fmt) |
-                            A6XX_VFD_DECODE_INSTR_SWAP(format.swap) |
-                            A6XX_VFD_DECODE_INSTR_UNK30;
-      if (vi_binding->inputRate == VK_VERTEX_INPUT_RATE_INSTANCE)
-         vfd_decode |= A6XX_VFD_DECODE_INSTR_INSTANCED;
-      if (!vk_format_is_int(vi_attr->format))
-         vfd_decode |= A6XX_VFD_DECODE_INSTR_FLOAT;
+      bindings[vfd_fetch_idx] = binding->binding;
+      vfd_fetch_idx++;
+   }
 
-      const uint32_t vfd_decode_step_rate = 1;
+   /* TODO: emit all VFD_DECODE/VFD_DEST_CNTL in same (two) pkt4 */
 
-      const uint32_t vfd_dest_cntl =
-         A6XX_VFD_DEST_CNTL_INSTR_WRITEMASK(vs->inputs[i].compmask) |
-         A6XX_VFD_DEST_CNTL_INSTR_REGID(vs->inputs[i].regid);
+   for (uint32_t i = 0; i < info->vertexAttributeDescriptionCount; i++) {
+      const VkVertexInputAttributeDescription *attr =
+         &info->pVertexAttributeDescriptions[i];
+      uint32_t binding_idx, input_idx;
 
-      tu_cs_emit_pkt4(cs, REG_A6XX_VFD_DECODE(vfd_decode_idx), 2);
-      tu_cs_emit(cs, vfd_decode);
-      tu_cs_emit(cs, vfd_decode_step_rate);
+      for (binding_idx = 0; binding_idx < vfd_fetch_idx; binding_idx++) {
+         if (bindings[binding_idx] == attr->binding)
+            break;
+      }
+      assert(binding_idx < vfd_fetch_idx);
+
+      for (input_idx = 0; input_idx < vs->inputs_count; input_idx++) {
+         if ((vs->inputs[input_idx].slot - VERT_ATTRIB_GENERIC0) == attr->location)
+            break;
+      }
 
-      tu_cs_emit_pkt4(cs, REG_A6XX_VFD_DEST_CNTL(vfd_decode_idx), 1);
-      tu_cs_emit(cs, vfd_dest_cntl);
+      /* attribute not used, skip it */
+      if (input_idx == vs->inputs_count)
+         continue;
 
-      bindings[vfd_decode_idx] = vi_binding->binding;
-      strides[vfd_decode_idx] = vi_binding->stride;
-      offsets[vfd_decode_idx] = vi_attr->offset;
+      const struct tu_native_format format = tu6_format_vtx(attr->format);
+      tu_cs_emit_regs(cs,
+                      A6XX_VFD_DECODE_INSTR(vfd_decode_idx,
+                        .idx = binding_idx,
+                        .offset = attr->offset,
+                        .instanced = binding_instanced & (1 << attr->binding),
+                        .format = format.fmt,
+                        .swap = format.swap,
+                        .unk30 = 1,
+                        ._float = !vk_format_is_int(attr->format)),
+                      A6XX_VFD_DECODE_STEP_RATE(vfd_decode_idx, 1));
+
+      tu_cs_emit_regs(cs,
+                      A6XX_VFD_DEST_CNTL_INSTR(vfd_decode_idx,
+                        .writemask = vs->inputs[input_idx].compmask,
+                        .regid = vs->inputs[input_idx].regid));
 
       vfd_decode_idx++;
-      assert(vfd_decode_idx <= MAX_VERTEX_ATTRIBS);
    }
 
-   tu_cs_emit_pkt4(cs, REG_A6XX_VFD_CONTROL_0, 1);
-   tu_cs_emit(
-      cs, A6XX_VFD_CONTROL_0_VTXCNT(vfd_decode_idx) | (vfd_decode_idx << 8));
+   tu_cs_emit_regs(cs,
+                   A6XX_VFD_CONTROL_0(
+                     .fetch_cnt = vfd_fetch_idx,
+                     .decode_cnt = vfd_decode_idx));
 
-   *count = vfd_decode_idx;
+   *count = vfd_fetch_idx;
 }
 
 static uint32_t
@@ -1529,8 +1520,6 @@ tu6_emit_gras_unknowns(struct tu_cs *cs)
 {
    tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_UNKNOWN_8001, 1);
    tu_cs_emit(cs, 0x0);
-   tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_LAYER_CNTL, 1);
-   tu_cs_emit(cs, 0x0);
 }
 
 static void
@@ -1980,6 +1969,12 @@ tu_pipeline_builder_parse_shader_stages(struct tu_pipeline_builder *builder,
    pipeline->program.binning_state_ib =
       tu_cs_end_sub_stream(&pipeline->cs, &prog_cs);
 
+   VkShaderStageFlags stages = 0;
+   for (unsigned i = 0; i < builder->create_info->stageCount; i++) {
+      stages |= builder->create_info->pStages[i].stage;
+   }
+   pipeline->active_stages = stages;
+
    for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
       if (!builder->shaders[i])
          continue;
@@ -2000,18 +1995,16 @@ tu_pipeline_builder_parse_vertex_input(struct tu_pipeline_builder *builder,
 
    struct tu_cs vi_cs;
    tu_cs_begin_sub_stream(&pipeline->cs,
-                          MAX_VERTEX_ATTRIBS * 5 + 2, &vi_cs);
+                          MAX_VERTEX_ATTRIBS * 7 + 2, &vi_cs);
    tu6_emit_vertex_input(&vi_cs, &vs->variants[0], vi_info,
-                         pipeline->vi.bindings, pipeline->vi.strides,
-                         pipeline->vi.offsets, &pipeline->vi.count);
+                         pipeline->vi.bindings, &pipeline->vi.count);
    pipeline->vi.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &vi_cs);
 
    if (vs->has_binning_pass) {
       tu_cs_begin_sub_stream(&pipeline->cs,
-                             MAX_VERTEX_ATTRIBS * 5 + 2, &vi_cs);
+                             MAX_VERTEX_ATTRIBS * 7 + 2, &vi_cs);
       tu6_emit_vertex_input(
          &vi_cs, &vs->variants[1], vi_info, pipeline->vi.binning_bindings,
-         pipeline->vi.binning_strides, pipeline->vi.binning_offsets,
          &pipeline->vi.binning_count);
       pipeline->vi.binning_state_ib =
          tu_cs_end_sub_stream(&pipeline->cs, &vi_cs);