From a1a80c38ea2f8df3118f258f73cbe2b9440986a7 Mon Sep 17 00:00:00 2001 From: Jonathan Marek Date: Tue, 7 Jul 2020 10:37:40 -0400 Subject: [PATCH] turnip: clean up primitive output state We only need to emit one set of primitive output registers. This may differ from the blob, because it seems to try to allow using the same pipeline with tess/geom enabled/disabled. Signed-off-by: Jonathan Marek Part-of: --- src/freedreno/vulkan/tu_clear_blit.c | 3 +- src/freedreno/vulkan/tu_cmd_buffer.c | 3 - src/freedreno/vulkan/tu_pipeline.c | 173 +++++++++++++-------------- 3 files changed, 85 insertions(+), 94 deletions(-) diff --git a/src/freedreno/vulkan/tu_clear_blit.c b/src/freedreno/vulkan/tu_clear_blit.c index 4c18a66cc8a..f447b2e3e72 100644 --- a/src/freedreno/vulkan/tu_clear_blit.c +++ b/src/freedreno/vulkan/tu_clear_blit.c @@ -524,8 +524,7 @@ r3d_common(struct tu_cmd_buffer *cmd, struct tu_cs *cs, bool blit, uint32_t num_ .persp_division_disable = 1, .vp_xform_disable = 1, .vp_clip_code_ignore = 1, - .clip_disable = 1), - A6XX_GRAS_VS_CL_CNTL(0)); + .clip_disable = 1)); tu_cs_emit_regs(cs, A6XX_GRAS_SU_CNTL()); // XXX msaa enable? tu_cs_emit_regs(cs, diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c index de1a672b094..765732ad9a9 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.c +++ b/src/freedreno/vulkan/tu_cmd_buffer.c @@ -746,7 +746,6 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs) tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_A982, 0); tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_A9A8, 0); tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_AB00, 0x5); - tu_cs_emit_write_reg(cs, REG_A6XX_VPC_VS_LAYER_CNTL, 0x0000ffff); /* TODO: set A6XX_VFD_ADD_OFFSET_INSTANCE and fix ir3 to avoid adding base instance */ tu_cs_emit_write_reg(cs, REG_A6XX_VFD_ADD_OFFSET, A6XX_VFD_ADD_OFFSET_VERTEX); @@ -769,7 +768,6 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs) tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_881E, 0); tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_88F0, 0); - tu_cs_emit_write_reg(cs, REG_A6XX_VPC_VS_CLIP_CNTL, 0xffff00); tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9107, 0); tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9236, @@ -791,7 +789,6 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs) tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_B183, 0); tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_8099, 0); - tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_VS_LAYER_CNTL, 0); tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_80A0, 2); tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_80AF, 0); tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9210, 0); diff --git a/src/freedreno/vulkan/tu_pipeline.c b/src/freedreno/vulkan/tu_pipeline.c index 74ab7872e9d..08eda112260 100644 --- a/src/freedreno/vulkan/tu_pipeline.c +++ b/src/freedreno/vulkan/tu_pipeline.c @@ -732,6 +732,53 @@ tu6_emit_vpc(struct tu_cs *cs, const struct ir3_shader_variant *gs, const struct ir3_shader_variant *fs) { + /* note: doesn't compile as static because of the array regs.. */ + const struct reg_config { + uint16_t reg_sp_xs_out_reg; + uint16_t reg_sp_xs_vpc_dst_reg; + uint16_t reg_vpc_xs_pack; + uint16_t reg_vpc_xs_clip_cntl; + uint16_t reg_gras_xs_cl_cntl; + uint16_t reg_pc_xs_out_cntl; + uint16_t reg_sp_xs_primitive_cntl; + uint16_t reg_vpc_xs_layer_cntl; + uint16_t reg_gras_xs_layer_cntl; + } reg_config[] = { + [MESA_SHADER_VERTEX] = { + REG_A6XX_SP_VS_OUT_REG(0), + REG_A6XX_SP_VS_VPC_DST_REG(0), + REG_A6XX_VPC_VS_PACK, + REG_A6XX_VPC_VS_CLIP_CNTL, + REG_A6XX_GRAS_VS_CL_CNTL, + REG_A6XX_PC_VS_OUT_CNTL, + REG_A6XX_SP_VS_PRIMITIVE_CNTL, + REG_A6XX_VPC_VS_LAYER_CNTL, + REG_A6XX_GRAS_VS_LAYER_CNTL + }, + [MESA_SHADER_TESS_EVAL] = { + REG_A6XX_SP_DS_OUT_REG(0), + REG_A6XX_SP_DS_VPC_DST_REG(0), + REG_A6XX_VPC_DS_PACK, + REG_A6XX_VPC_DS_CLIP_CNTL, + REG_A6XX_GRAS_DS_CL_CNTL, + REG_A6XX_PC_DS_OUT_CNTL, + REG_A6XX_SP_DS_PRIMITIVE_CNTL, + REG_A6XX_VPC_DS_LAYER_CNTL, + REG_A6XX_GRAS_DS_LAYER_CNTL + }, + [MESA_SHADER_GEOMETRY] = { + REG_A6XX_SP_GS_OUT_REG(0), + REG_A6XX_SP_GS_VPC_DST_REG(0), + REG_A6XX_VPC_GS_PACK, + REG_A6XX_VPC_GS_CLIP_CNTL, + REG_A6XX_GRAS_GS_CL_CNTL, + REG_A6XX_PC_GS_OUT_CNTL, + REG_A6XX_SP_GS_PRIMITIVE_CNTL, + REG_A6XX_VPC_GS_LAYER_CNTL, + REG_A6XX_GRAS_GS_LAYER_CNTL + }, + }; + const struct ir3_shader_variant *last_shader; if (gs) { last_shader = gs; @@ -740,6 +787,9 @@ tu6_emit_vpc(struct tu_cs *cs, } else { last_shader = vs; } + + const struct reg_config *cfg = ®_config[last_shader->type]; + struct ir3_shader_linkage linkage = { .primid_loc = 0xff }; if (fs) ir3_link_shaders(&linkage, last_shader, fs, true); @@ -766,6 +816,10 @@ tu6_emit_vpc(struct tu_cs *cs, ir3_find_output_regid(last_shader, VARYING_SLOT_PSIZ); const uint32_t layer_regid = gs ? ir3_find_output_regid(gs, VARYING_SLOT_LAYER) : regid(63, 0); + uint32_t primitive_regid = gs ? + ir3_find_sysval_regid(gs, SYSTEM_VALUE_PRIMITIVE_ID) : regid(63, 0); + uint32_t flags_regid = gs ? + ir3_find_output_regid(gs, VARYING_SLOT_GS_VERTEX_FLAGS_IR3) : 0; uint32_t pointsize_loc = 0xff, position_loc = 0xff, layer_loc = 0xff; if (layer_regid != regid(63, 0)) { @@ -797,22 +851,39 @@ tu6_emit_vpc(struct tu_cs *cs, A6XX_SP_VS_VPC_DST_REG_OUTLOC0(linkage.var[i].loc); } - if (gs) - tu_cs_emit_pkt4(cs, REG_A6XX_SP_GS_OUT_REG(0), sp_out_count); - else if (hs) - tu_cs_emit_pkt4(cs, REG_A6XX_SP_DS_OUT_REG(0), sp_out_count); - else - tu_cs_emit_pkt4(cs, REG_A6XX_SP_VS_OUT_REG(0), sp_out_count); + tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_out_reg, sp_out_count); tu_cs_emit_array(cs, sp_out, sp_out_count); - if (gs) - tu_cs_emit_pkt4(cs, REG_A6XX_SP_GS_VPC_DST_REG(0), sp_vpc_dst_count); - else if (hs) - tu_cs_emit_pkt4(cs, REG_A6XX_SP_DS_VPC_DST_REG(0), sp_vpc_dst_count); - else - tu_cs_emit_pkt4(cs, REG_A6XX_SP_VS_VPC_DST_REG(0), sp_vpc_dst_count); + tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_vpc_dst_reg, sp_vpc_dst_count); tu_cs_emit_array(cs, sp_vpc_dst, sp_vpc_dst_count); + tu_cs_emit_pkt4(cs, cfg->reg_vpc_xs_pack, 1); + tu_cs_emit(cs, A6XX_VPC_VS_PACK_POSITIONLOC(position_loc) | + A6XX_VPC_VS_PACK_PSIZELOC(pointsize_loc) | + A6XX_VPC_VS_PACK_STRIDE_IN_VPC(linkage.max_loc)); + + tu_cs_emit_pkt4(cs, cfg->reg_vpc_xs_clip_cntl, 1); + tu_cs_emit(cs, 0xffff00); + + tu_cs_emit_pkt4(cs, cfg->reg_gras_xs_cl_cntl, 1); + tu_cs_emit(cs, 0); + + tu_cs_emit_pkt4(cs, cfg->reg_pc_xs_out_cntl, 1); + tu_cs_emit(cs, A6XX_PC_VS_OUT_CNTL_STRIDE_IN_VPC(linkage.max_loc) | + CONDREG(pointsize_regid, A6XX_PC_VS_OUT_CNTL_PSIZE) | + CONDREG(layer_regid, A6XX_PC_VS_OUT_CNTL_LAYER) | + CONDREG(primitive_regid, A6XX_PC_VS_OUT_CNTL_PRIMITIVE_ID)); + + tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_primitive_cntl, 1); + tu_cs_emit(cs, A6XX_SP_VS_PRIMITIVE_CNTL_OUT(linkage.cnt) | + A6XX_SP_GS_PRIMITIVE_CNTL_FLAGS_REGID(flags_regid)); + + tu_cs_emit_pkt4(cs, cfg->reg_vpc_xs_layer_cntl, 1); + tu_cs_emit(cs, A6XX_VPC_GS_LAYER_CNTL_LAYERLOC(layer_loc) | 0xff00); + + tu_cs_emit_pkt4(cs, cfg->reg_gras_xs_layer_cntl, 1); + tu_cs_emit(cs, CONDREG(layer_regid, A6XX_GRAS_GS_LAYER_CNTL_WRITES_LAYER)); + tu_cs_emit_pkt4(cs, REG_A6XX_PC_PRIMID_CNTL, 1); tu_cs_emit(cs, COND(primid_passthru, A6XX_PC_PRIMID_CNTL_PRIMID_PASSTHRU)); @@ -822,11 +893,6 @@ tu6_emit_vpc(struct tu_cs *cs, A6XX_VPC_CNTL_0_PRIMIDLOC(linkage.primid_loc) | A6XX_VPC_CNTL_0_UNKLOC(0xff)); - tu_cs_emit_pkt4(cs, REG_A6XX_VPC_VS_PACK, 1); - tu_cs_emit(cs, A6XX_VPC_VS_PACK_POSITIONLOC(position_loc) | - A6XX_VPC_VS_PACK_PSIZELOC(pointsize_loc) | - A6XX_VPC_VS_PACK_STRIDE_IN_VPC(linkage.max_loc)); - if (hs) { shader_info *hs_info = &hs->shader->nir->info; tu_cs_emit_pkt4(cs, REG_A6XX_PC_TESS_NUM_VERTEX, 1); @@ -874,36 +940,6 @@ tu6_emit_vpc(struct tu_cs *cs, tu_cs_emit(cs, A6XX_PC_TESS_CNTL_SPACING(spacing) | A6XX_PC_TESS_CNTL_OUTPUT(output)); - /* xxx: Misc tess unknowns: */ - tu_cs_emit_pkt4(cs, REG_A6XX_VPC_DS_CLIP_CNTL, 1); - tu_cs_emit(cs, 0x00ffff00); - - tu_cs_emit_pkt4(cs, REG_A6XX_VPC_DS_LAYER_CNTL, 1); - tu_cs_emit(cs, 0x0000ffff); - - tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_DS_LAYER_CNTL, 1); - tu_cs_emit(cs, 0x0); - - tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_DS_CL_CNTL, 1); - tu_cs_emit(cs, 0x0); - - tu_cs_emit_pkt4(cs, REG_A6XX_VPC_VS_PACK, 1); - tu_cs_emit(cs, A6XX_VPC_VS_PACK_POSITIONLOC(position_loc) | - A6XX_VPC_VS_PACK_PSIZELOC(255) | - A6XX_VPC_VS_PACK_STRIDE_IN_VPC(linkage.max_loc)); - - tu_cs_emit_pkt4(cs, REG_A6XX_VPC_DS_PACK, 1); - tu_cs_emit(cs, A6XX_VPC_DS_PACK_POSITIONLOC(position_loc) | - A6XX_VPC_DS_PACK_PSIZELOC(pointsize_loc) | - A6XX_VPC_DS_PACK_STRIDE_IN_VPC(linkage.max_loc)); - - tu_cs_emit_pkt4(cs, REG_A6XX_SP_DS_PRIMITIVE_CNTL, 1); - tu_cs_emit(cs, A6XX_SP_DS_PRIMITIVE_CNTL_OUT(linkage.cnt)); - - tu_cs_emit_pkt4(cs, REG_A6XX_PC_DS_OUT_CNTL, 1); - tu_cs_emit(cs, A6XX_PC_DS_OUT_CNTL_STRIDE_IN_VPC(linkage.max_loc) | - CONDREG(pointsize_regid, 0x100)); - tu6_emit_link_map(cs, vs, hs, SB6_HS_SHADER); tu6_emit_link_map(cs, hs, ds, SB6_DS_SHADER); } @@ -931,33 +967,6 @@ tu6_emit_vpc(struct tu_cs *cs, vec4_size = 0; } - uint32_t primitive_regid = - ir3_find_sysval_regid(gs, SYSTEM_VALUE_PRIMITIVE_ID); - tu_cs_emit_pkt4(cs, REG_A6XX_VPC_GS_PACK, 1); - tu_cs_emit(cs, A6XX_VPC_GS_PACK_POSITIONLOC(position_loc) | - A6XX_VPC_GS_PACK_PSIZELOC(pointsize_loc) | - A6XX_VPC_GS_PACK_STRIDE_IN_VPC(linkage.max_loc)); - - tu_cs_emit_pkt4(cs, REG_A6XX_VPC_GS_LAYER_CNTL, 1); - tu_cs_emit(cs, A6XX_VPC_GS_LAYER_CNTL_LAYERLOC(layer_loc) | 0xff00); - - tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_GS_LAYER_CNTL, 1); - tu_cs_emit(cs, CONDREG(layer_regid, - A6XX_GRAS_GS_LAYER_CNTL_WRITES_LAYER)); - - uint32_t flags_regid = ir3_find_output_regid(gs, - VARYING_SLOT_GS_VERTEX_FLAGS_IR3); - - tu_cs_emit_pkt4(cs, REG_A6XX_SP_GS_PRIMITIVE_CNTL, 1); - tu_cs_emit(cs, A6XX_SP_GS_PRIMITIVE_CNTL_OUT(linkage.cnt) | - A6XX_SP_GS_PRIMITIVE_CNTL_FLAGS_REGID(flags_regid)); - - tu_cs_emit_pkt4(cs, REG_A6XX_PC_GS_OUT_CNTL, 1); - tu_cs_emit(cs, A6XX_PC_GS_OUT_CNTL_STRIDE_IN_VPC(linkage.max_loc) | - CONDREG(pointsize_regid, A6XX_PC_GS_OUT_CNTL_PSIZE) | - CONDREG(layer_regid, A6XX_PC_GS_OUT_CNTL_LAYER) | - CONDREG(primitive_regid, A6XX_PC_GS_OUT_CNTL_PRIMITIVE_ID)); - tu_cs_emit_pkt4(cs, REG_A6XX_PC_PRIMITIVE_CNTL_5, 1); tu_cs_emit(cs, A6XX_PC_PRIMITIVE_CNTL_5_GS_VERTICES_OUT(vertices_out) | @@ -967,15 +976,9 @@ tu6_emit_vpc(struct tu_cs *cs, tu_cs_emit_pkt4(cs, REG_A6XX_PC_PRIMITIVE_CNTL_3, 1); tu_cs_emit(cs, 0); - tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_GS_CL_CNTL, 1); - tu_cs_emit(cs, 0); - tu_cs_emit_pkt4(cs, REG_A6XX_VPC_UNKNOWN_9100, 1); tu_cs_emit(cs, 0xff); - tu_cs_emit_pkt4(cs, REG_A6XX_VPC_GS_CLIP_CNTL, 1); - tu_cs_emit(cs, 0xffff00); - tu_cs_emit_pkt4(cs, REG_A6XX_PC_PRIMITIVE_CNTL_6, 1); tu_cs_emit(cs, A6XX_PC_PRIMITIVE_CNTL_6_STRIDE_IN_VPC(vec4_size)); @@ -985,13 +988,6 @@ tu6_emit_vpc(struct tu_cs *cs, tu_cs_emit_pkt4(cs, REG_A6XX_SP_GS_PRIM_SIZE, 1); tu_cs_emit(cs, vs->output_size); } - - tu_cs_emit_pkt4(cs, REG_A6XX_SP_VS_PRIMITIVE_CNTL, 1); - tu_cs_emit(cs, A6XX_SP_VS_PRIMITIVE_CNTL_OUT(linkage.cnt)); - - tu_cs_emit_pkt4(cs, REG_A6XX_PC_VS_OUT_CNTL, 1); - tu_cs_emit(cs, A6XX_PC_VS_OUT_CNTL_STRIDE_IN_VPC(linkage.max_loc) | - (last_shader->writes_psize ? A6XX_PC_VS_OUT_CNTL_PSIZE : 0)); } static int @@ -2222,7 +2218,7 @@ tu_pipeline_builder_parse_rasterization(struct tu_pipeline_builder *builder, enum a6xx_polygon_mode mode = tu6_polygon_mode(rast_info->polygonMode); struct tu_cs cs; - tu_cs_begin_sub_stream(&pipeline->cs, 11, &cs); + tu_cs_begin_sub_stream(&pipeline->cs, 9, &cs); tu_cs_emit_regs(&cs, A6XX_GRAS_CL_CNTL( @@ -2239,7 +2235,6 @@ tu_pipeline_builder_parse_rasterization(struct tu_pipeline_builder *builder, A6XX_PC_POLYGON_MODE(.mode = mode)); /* move to hw ctx init? */ - tu_cs_emit_regs(&cs, A6XX_GRAS_VS_CL_CNTL()); tu_cs_emit_regs(&cs, A6XX_GRAS_SU_POINT_MINMAX(.min = 1.0f / 16.0f, .max = 4092.0f), A6XX_GRAS_SU_POINT_SIZE(1.0f)); -- 2.30.2