turnip: clean up primitive output state
authorJonathan Marek <jonathan@marek.ca>
Tue, 7 Jul 2020 14:37:40 +0000 (10:37 -0400)
committerMarge Bot <eric+marge@anholt.net>
Tue, 14 Jul 2020 04:05:24 +0000 (04:05 +0000)
We only need to emit one set of primitive output registers. This may differ
from the blob, because it seems to try to allow using the same pipeline
with tess/geom enabled/disabled.

Signed-off-by: Jonathan Marek <jonathan@marek.ca>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5790>

src/freedreno/vulkan/tu_clear_blit.c
src/freedreno/vulkan/tu_cmd_buffer.c
src/freedreno/vulkan/tu_pipeline.c

index 4c18a66cc8a05467dbdfbf349c42645b4528e444..f447b2e3e729df0b523cd7fc18f4e1f403a2c1f4 100644 (file)
@@ -524,8 +524,7 @@ r3d_common(struct tu_cmd_buffer *cmd, struct tu_cs *cs, bool blit, uint32_t num_
                       .persp_division_disable = 1,
                       .vp_xform_disable = 1,
                       .vp_clip_code_ignore = 1,
-                      .clip_disable = 1),
-                   A6XX_GRAS_VS_CL_CNTL(0));
+                      .clip_disable = 1));
    tu_cs_emit_regs(cs, A6XX_GRAS_SU_CNTL()); // XXX msaa enable?
 
    tu_cs_emit_regs(cs,
index de1a672b094d74403202db19ed7fca951f76907a..765732ad9a99ef97f7ddf74098c2638d051a9730 100644 (file)
@@ -746,7 +746,6 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
    tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_A982, 0);
    tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_A9A8, 0);
    tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_AB00, 0x5);
-   tu_cs_emit_write_reg(cs, REG_A6XX_VPC_VS_LAYER_CNTL, 0x0000ffff);
 
    /* TODO: set A6XX_VFD_ADD_OFFSET_INSTANCE and fix ir3 to avoid adding base instance */
    tu_cs_emit_write_reg(cs, REG_A6XX_VFD_ADD_OFFSET, A6XX_VFD_ADD_OFFSET_VERTEX);
@@ -769,7 +768,6 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
    tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_881E, 0);
    tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_88F0, 0);
 
-   tu_cs_emit_write_reg(cs, REG_A6XX_VPC_VS_CLIP_CNTL, 0xffff00);
    tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9107, 0);
 
    tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9236,
@@ -791,7 +789,6 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
    tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_B183, 0);
 
    tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_8099, 0);
-   tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_VS_LAYER_CNTL, 0);
    tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_80A0, 2);
    tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_80AF, 0);
    tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9210, 0);
index 74ab7872e9dcfc4e608a454d9005d162c1e0510d..08eda112260c4e9bca4d7b8905953285e678a11b 100644 (file)
@@ -732,6 +732,53 @@ tu6_emit_vpc(struct tu_cs *cs,
              const struct ir3_shader_variant *gs,
              const struct ir3_shader_variant *fs)
 {
+   /* note: doesn't compile as static because of the array regs.. */
+   const struct reg_config {
+      uint16_t reg_sp_xs_out_reg;
+      uint16_t reg_sp_xs_vpc_dst_reg;
+      uint16_t reg_vpc_xs_pack;
+      uint16_t reg_vpc_xs_clip_cntl;
+      uint16_t reg_gras_xs_cl_cntl;
+      uint16_t reg_pc_xs_out_cntl;
+      uint16_t reg_sp_xs_primitive_cntl;
+      uint16_t reg_vpc_xs_layer_cntl;
+      uint16_t reg_gras_xs_layer_cntl;
+   } reg_config[] = {
+      [MESA_SHADER_VERTEX] = {
+         REG_A6XX_SP_VS_OUT_REG(0),
+         REG_A6XX_SP_VS_VPC_DST_REG(0),
+         REG_A6XX_VPC_VS_PACK,
+         REG_A6XX_VPC_VS_CLIP_CNTL,
+         REG_A6XX_GRAS_VS_CL_CNTL,
+         REG_A6XX_PC_VS_OUT_CNTL,
+         REG_A6XX_SP_VS_PRIMITIVE_CNTL,
+         REG_A6XX_VPC_VS_LAYER_CNTL,
+         REG_A6XX_GRAS_VS_LAYER_CNTL
+      },
+      [MESA_SHADER_TESS_EVAL] = {
+         REG_A6XX_SP_DS_OUT_REG(0),
+         REG_A6XX_SP_DS_VPC_DST_REG(0),
+         REG_A6XX_VPC_DS_PACK,
+         REG_A6XX_VPC_DS_CLIP_CNTL,
+         REG_A6XX_GRAS_DS_CL_CNTL,
+         REG_A6XX_PC_DS_OUT_CNTL,
+         REG_A6XX_SP_DS_PRIMITIVE_CNTL,
+         REG_A6XX_VPC_DS_LAYER_CNTL,
+         REG_A6XX_GRAS_DS_LAYER_CNTL
+      },
+      [MESA_SHADER_GEOMETRY] = {
+         REG_A6XX_SP_GS_OUT_REG(0),
+         REG_A6XX_SP_GS_VPC_DST_REG(0),
+         REG_A6XX_VPC_GS_PACK,
+         REG_A6XX_VPC_GS_CLIP_CNTL,
+         REG_A6XX_GRAS_GS_CL_CNTL,
+         REG_A6XX_PC_GS_OUT_CNTL,
+         REG_A6XX_SP_GS_PRIMITIVE_CNTL,
+         REG_A6XX_VPC_GS_LAYER_CNTL,
+         REG_A6XX_GRAS_GS_LAYER_CNTL
+      },
+   };
+
    const struct ir3_shader_variant *last_shader;
    if (gs) {
       last_shader = gs;
@@ -740,6 +787,9 @@ tu6_emit_vpc(struct tu_cs *cs,
    } else {
       last_shader = vs;
    }
+
+   const struct reg_config *cfg = &reg_config[last_shader->type];
+
    struct ir3_shader_linkage linkage = { .primid_loc = 0xff };
    if (fs)
       ir3_link_shaders(&linkage, last_shader, fs, true);
@@ -766,6 +816,10 @@ tu6_emit_vpc(struct tu_cs *cs,
       ir3_find_output_regid(last_shader, VARYING_SLOT_PSIZ);
    const uint32_t layer_regid = gs ?
       ir3_find_output_regid(gs, VARYING_SLOT_LAYER) : regid(63, 0);
+   uint32_t primitive_regid = gs ?
+      ir3_find_sysval_regid(gs, SYSTEM_VALUE_PRIMITIVE_ID) : regid(63, 0);
+   uint32_t flags_regid = gs ?
+      ir3_find_output_regid(gs, VARYING_SLOT_GS_VERTEX_FLAGS_IR3) : 0;
 
    uint32_t pointsize_loc = 0xff, position_loc = 0xff, layer_loc = 0xff;
    if (layer_regid != regid(63, 0)) {
@@ -797,22 +851,39 @@ tu6_emit_vpc(struct tu_cs *cs,
          A6XX_SP_VS_VPC_DST_REG_OUTLOC0(linkage.var[i].loc);
    }
 
-   if (gs)
-      tu_cs_emit_pkt4(cs, REG_A6XX_SP_GS_OUT_REG(0), sp_out_count);
-   else if (hs)
-      tu_cs_emit_pkt4(cs, REG_A6XX_SP_DS_OUT_REG(0), sp_out_count);
-   else
-      tu_cs_emit_pkt4(cs, REG_A6XX_SP_VS_OUT_REG(0), sp_out_count);
+   tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_out_reg, sp_out_count);
    tu_cs_emit_array(cs, sp_out, sp_out_count);
 
-   if (gs)
-      tu_cs_emit_pkt4(cs, REG_A6XX_SP_GS_VPC_DST_REG(0), sp_vpc_dst_count);
-   else if (hs)
-      tu_cs_emit_pkt4(cs, REG_A6XX_SP_DS_VPC_DST_REG(0), sp_vpc_dst_count);
-   else
-      tu_cs_emit_pkt4(cs, REG_A6XX_SP_VS_VPC_DST_REG(0), sp_vpc_dst_count);
+   tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_vpc_dst_reg, sp_vpc_dst_count);
    tu_cs_emit_array(cs, sp_vpc_dst, sp_vpc_dst_count);
 
+   tu_cs_emit_pkt4(cs, cfg->reg_vpc_xs_pack, 1);
+   tu_cs_emit(cs, A6XX_VPC_VS_PACK_POSITIONLOC(position_loc) |
+                  A6XX_VPC_VS_PACK_PSIZELOC(pointsize_loc) |
+                  A6XX_VPC_VS_PACK_STRIDE_IN_VPC(linkage.max_loc));
+
+   tu_cs_emit_pkt4(cs, cfg->reg_vpc_xs_clip_cntl, 1);
+   tu_cs_emit(cs, 0xffff00);
+
+   tu_cs_emit_pkt4(cs, cfg->reg_gras_xs_cl_cntl, 1);
+   tu_cs_emit(cs, 0);
+
+   tu_cs_emit_pkt4(cs, cfg->reg_pc_xs_out_cntl, 1);
+   tu_cs_emit(cs, A6XX_PC_VS_OUT_CNTL_STRIDE_IN_VPC(linkage.max_loc) |
+                  CONDREG(pointsize_regid, A6XX_PC_VS_OUT_CNTL_PSIZE) |
+                  CONDREG(layer_regid, A6XX_PC_VS_OUT_CNTL_LAYER) |
+                  CONDREG(primitive_regid, A6XX_PC_VS_OUT_CNTL_PRIMITIVE_ID));
+
+   tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_primitive_cntl, 1);
+   tu_cs_emit(cs, A6XX_SP_VS_PRIMITIVE_CNTL_OUT(linkage.cnt) |
+                  A6XX_SP_GS_PRIMITIVE_CNTL_FLAGS_REGID(flags_regid));
+
+   tu_cs_emit_pkt4(cs, cfg->reg_vpc_xs_layer_cntl, 1);
+   tu_cs_emit(cs, A6XX_VPC_GS_LAYER_CNTL_LAYERLOC(layer_loc) | 0xff00);
+
+   tu_cs_emit_pkt4(cs, cfg->reg_gras_xs_layer_cntl, 1);
+   tu_cs_emit(cs, CONDREG(layer_regid, A6XX_GRAS_GS_LAYER_CNTL_WRITES_LAYER));
+
    tu_cs_emit_pkt4(cs, REG_A6XX_PC_PRIMID_CNTL, 1);
    tu_cs_emit(cs, COND(primid_passthru, A6XX_PC_PRIMID_CNTL_PRIMID_PASSTHRU));
 
@@ -822,11 +893,6 @@ tu6_emit_vpc(struct tu_cs *cs,
                   A6XX_VPC_CNTL_0_PRIMIDLOC(linkage.primid_loc) |
                   A6XX_VPC_CNTL_0_UNKLOC(0xff));
 
-   tu_cs_emit_pkt4(cs, REG_A6XX_VPC_VS_PACK, 1);
-   tu_cs_emit(cs, A6XX_VPC_VS_PACK_POSITIONLOC(position_loc) |
-                  A6XX_VPC_VS_PACK_PSIZELOC(pointsize_loc) |
-                  A6XX_VPC_VS_PACK_STRIDE_IN_VPC(linkage.max_loc));
-
    if (hs) {
       shader_info *hs_info = &hs->shader->nir->info;
       tu_cs_emit_pkt4(cs, REG_A6XX_PC_TESS_NUM_VERTEX, 1);
@@ -874,36 +940,6 @@ tu6_emit_vpc(struct tu_cs *cs,
       tu_cs_emit(cs, A6XX_PC_TESS_CNTL_SPACING(spacing) |
             A6XX_PC_TESS_CNTL_OUTPUT(output));
 
-      /* xxx: Misc tess unknowns: */
-      tu_cs_emit_pkt4(cs, REG_A6XX_VPC_DS_CLIP_CNTL, 1);
-      tu_cs_emit(cs, 0x00ffff00);
-
-      tu_cs_emit_pkt4(cs, REG_A6XX_VPC_DS_LAYER_CNTL, 1);
-      tu_cs_emit(cs, 0x0000ffff);
-
-      tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_DS_LAYER_CNTL, 1);
-      tu_cs_emit(cs, 0x0);
-
-      tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_DS_CL_CNTL, 1);
-      tu_cs_emit(cs, 0x0);
-
-      tu_cs_emit_pkt4(cs, REG_A6XX_VPC_VS_PACK, 1);
-      tu_cs_emit(cs, A6XX_VPC_VS_PACK_POSITIONLOC(position_loc) |
-             A6XX_VPC_VS_PACK_PSIZELOC(255) |
-             A6XX_VPC_VS_PACK_STRIDE_IN_VPC(linkage.max_loc));
-
-      tu_cs_emit_pkt4(cs, REG_A6XX_VPC_DS_PACK, 1);
-      tu_cs_emit(cs, A6XX_VPC_DS_PACK_POSITIONLOC(position_loc) |
-             A6XX_VPC_DS_PACK_PSIZELOC(pointsize_loc) |
-             A6XX_VPC_DS_PACK_STRIDE_IN_VPC(linkage.max_loc));
-
-      tu_cs_emit_pkt4(cs, REG_A6XX_SP_DS_PRIMITIVE_CNTL, 1);
-      tu_cs_emit(cs, A6XX_SP_DS_PRIMITIVE_CNTL_OUT(linkage.cnt));
-
-      tu_cs_emit_pkt4(cs, REG_A6XX_PC_DS_OUT_CNTL, 1);
-      tu_cs_emit(cs, A6XX_PC_DS_OUT_CNTL_STRIDE_IN_VPC(linkage.max_loc) |
-            CONDREG(pointsize_regid, 0x100));
-
       tu6_emit_link_map(cs, vs, hs, SB6_HS_SHADER);
       tu6_emit_link_map(cs, hs, ds, SB6_DS_SHADER);
    }
@@ -931,33 +967,6 @@ tu6_emit_vpc(struct tu_cs *cs,
          vec4_size = 0;
       }
 
-      uint32_t primitive_regid =
-            ir3_find_sysval_regid(gs, SYSTEM_VALUE_PRIMITIVE_ID);
-      tu_cs_emit_pkt4(cs, REG_A6XX_VPC_GS_PACK, 1);
-      tu_cs_emit(cs, A6XX_VPC_GS_PACK_POSITIONLOC(position_loc) |
-             A6XX_VPC_GS_PACK_PSIZELOC(pointsize_loc) |
-             A6XX_VPC_GS_PACK_STRIDE_IN_VPC(linkage.max_loc));
-
-      tu_cs_emit_pkt4(cs, REG_A6XX_VPC_GS_LAYER_CNTL, 1);
-      tu_cs_emit(cs, A6XX_VPC_GS_LAYER_CNTL_LAYERLOC(layer_loc) | 0xff00);
-
-      tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_GS_LAYER_CNTL, 1);
-      tu_cs_emit(cs, CONDREG(layer_regid,
-            A6XX_GRAS_GS_LAYER_CNTL_WRITES_LAYER));
-
-      uint32_t flags_regid = ir3_find_output_regid(gs,
-            VARYING_SLOT_GS_VERTEX_FLAGS_IR3);
-
-      tu_cs_emit_pkt4(cs, REG_A6XX_SP_GS_PRIMITIVE_CNTL, 1);
-      tu_cs_emit(cs, A6XX_SP_GS_PRIMITIVE_CNTL_OUT(linkage.cnt) |
-            A6XX_SP_GS_PRIMITIVE_CNTL_FLAGS_REGID(flags_regid));
-
-      tu_cs_emit_pkt4(cs, REG_A6XX_PC_GS_OUT_CNTL, 1);
-      tu_cs_emit(cs, A6XX_PC_GS_OUT_CNTL_STRIDE_IN_VPC(linkage.max_loc) |
-            CONDREG(pointsize_regid, A6XX_PC_GS_OUT_CNTL_PSIZE) |
-            CONDREG(layer_regid, A6XX_PC_GS_OUT_CNTL_LAYER) |
-            CONDREG(primitive_regid, A6XX_PC_GS_OUT_CNTL_PRIMITIVE_ID));
-
       tu_cs_emit_pkt4(cs, REG_A6XX_PC_PRIMITIVE_CNTL_5, 1);
       tu_cs_emit(cs,
             A6XX_PC_PRIMITIVE_CNTL_5_GS_VERTICES_OUT(vertices_out) |
@@ -967,15 +976,9 @@ tu6_emit_vpc(struct tu_cs *cs,
       tu_cs_emit_pkt4(cs, REG_A6XX_PC_PRIMITIVE_CNTL_3, 1);
       tu_cs_emit(cs, 0);
 
-      tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_GS_CL_CNTL, 1);
-      tu_cs_emit(cs, 0);
-
       tu_cs_emit_pkt4(cs, REG_A6XX_VPC_UNKNOWN_9100, 1);
       tu_cs_emit(cs, 0xff);
 
-      tu_cs_emit_pkt4(cs, REG_A6XX_VPC_GS_CLIP_CNTL, 1);
-      tu_cs_emit(cs, 0xffff00);
-
       tu_cs_emit_pkt4(cs, REG_A6XX_PC_PRIMITIVE_CNTL_6, 1);
       tu_cs_emit(cs, A6XX_PC_PRIMITIVE_CNTL_6_STRIDE_IN_VPC(vec4_size));
 
@@ -985,13 +988,6 @@ tu6_emit_vpc(struct tu_cs *cs,
       tu_cs_emit_pkt4(cs, REG_A6XX_SP_GS_PRIM_SIZE, 1);
       tu_cs_emit(cs, vs->output_size);
    }
-
-   tu_cs_emit_pkt4(cs, REG_A6XX_SP_VS_PRIMITIVE_CNTL, 1);
-   tu_cs_emit(cs, A6XX_SP_VS_PRIMITIVE_CNTL_OUT(linkage.cnt));
-
-   tu_cs_emit_pkt4(cs, REG_A6XX_PC_VS_OUT_CNTL, 1);
-   tu_cs_emit(cs, A6XX_PC_VS_OUT_CNTL_STRIDE_IN_VPC(linkage.max_loc) |
-         (last_shader->writes_psize ? A6XX_PC_VS_OUT_CNTL_PSIZE : 0));
 }
 
 static int
@@ -2222,7 +2218,7 @@ tu_pipeline_builder_parse_rasterization(struct tu_pipeline_builder *builder,
    enum a6xx_polygon_mode mode = tu6_polygon_mode(rast_info->polygonMode);
 
    struct tu_cs cs;
-   tu_cs_begin_sub_stream(&pipeline->cs, 11, &cs);
+   tu_cs_begin_sub_stream(&pipeline->cs, 9, &cs);
 
    tu_cs_emit_regs(&cs,
                    A6XX_GRAS_CL_CNTL(
@@ -2239,7 +2235,6 @@ tu_pipeline_builder_parse_rasterization(struct tu_pipeline_builder *builder,
                    A6XX_PC_POLYGON_MODE(.mode = mode));
 
    /* move to hw ctx init? */
-   tu_cs_emit_regs(&cs, A6XX_GRAS_VS_CL_CNTL());
    tu_cs_emit_regs(&cs,
                    A6XX_GRAS_SU_POINT_MINMAX(.min = 1.0f / 16.0f, .max = 4092.0f),
                    A6XX_GRAS_SU_POINT_SIZE(1.0f));