From e4c6423c5efbcba66fd473e004c6526d9beb6430 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Fri, 18 Jan 2019 20:17:35 +0000 Subject: [PATCH] radv: avoid context rolls when binding graphics pipelines It's common in some applications to bind a new graphics pipeline without ending up changing any context registers. This has a pipline have two command buffers: one for setting context registers and one for everything else. The context register command buffer is only emitted if it differs from the previous pipeline's. v2: ensure late scissor emission is done when radv_emit_rbplus_state() is called v2: make use of cmd_buffer->state.workaround_scissor_bug v3: rename "workaround_scissor_bug" to "context_roll_without_scissor_emitted" Signed-off-by: Rhys Perry Reviewed-by: Bas Nieuwenhuizen --- src/amd/vulkan/radv_cmd_buffer.c | 30 ++++- src/amd/vulkan/radv_pipeline.c | 217 ++++++++++++++++--------------- src/amd/vulkan/radv_private.h | 2 + 3 files changed, 141 insertions(+), 108 deletions(-) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index f6c93284db3..5f1fefee869 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -661,6 +661,8 @@ radv_update_multisample_state(struct radv_cmd_buffer *cmd_buffer, radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0)); } + + cmd_buffer->state.context_roll_without_scissor_emitted = true; } static void @@ -857,10 +859,13 @@ radv_emit_rbplus_state(struct radv_cmd_buffer *cmd_buffer) sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (i * 4); sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (i * 4); } + /* TODO: avoid redundantly setting context registers */ radeon_set_context_reg_seq(cmd_buffer->cs, R_028754_SX_PS_DOWNCONVERT, 3); radeon_emit(cmd_buffer->cs, sx_ps_downconvert); radeon_emit(cmd_buffer->cs, sx_blend_opt_epsilon); radeon_emit(cmd_buffer->cs, sx_blend_opt_control); + + cmd_buffer->state.context_roll_without_scissor_emitted = true; } static void @@ -884,6 +889,15 @@ radv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer) radeon_emit_array(cmd_buffer->cs, pipeline->cs.buf, pipeline->cs.cdw); + if (!cmd_buffer->state.emitted_pipeline || + cmd_buffer->state.emitted_pipeline->ctx_cs.cdw != pipeline->ctx_cs.cdw || + cmd_buffer->state.emitted_pipeline->ctx_cs_hash != pipeline->ctx_cs_hash || + memcmp(cmd_buffer->state.emitted_pipeline->ctx_cs.buf, + pipeline->ctx_cs.buf, pipeline->ctx_cs.cdw * 4)) { + radeon_emit_array(cmd_buffer->cs, pipeline->ctx_cs.buf, pipeline->ctx_cs.cdw); + cmd_buffer->state.context_roll_without_scissor_emitted = true; + } + for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) { if (!pipeline->shaders[i]) continue; @@ -2939,6 +2953,8 @@ radv_emit_compute_pipeline(struct radv_cmd_buffer *cmd_buffer) if (!pipeline || pipeline == cmd_buffer->state.emitted_compute_pipeline) return; + assert(!pipeline->ctx_cs.cdw); + cmd_buffer->state.emitted_compute_pipeline = pipeline; radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, pipeline->cs.cdw); @@ -3630,20 +3646,16 @@ static bool radv_need_late_scissor_emission(struct radv_cmd_buffer *cmd_buffer, uint32_t used_states = cmd_buffer->state.pipeline->graphics.needed_dynamic_state | ~RADV_CMD_DIRTY_DYNAMIC_ALL; /* Index, vertex and streamout buffers don't change context regs, and - * pipeline is handled later. + * pipeline is already handled. */ used_states &= ~(RADV_CMD_DIRTY_INDEX_BUFFER | RADV_CMD_DIRTY_VERTEX_BUFFER | RADV_CMD_DIRTY_STREAMOUT_BUFFER | RADV_CMD_DIRTY_PIPELINE); - /* Assume all state changes except these two can imply context rolls. */ if (cmd_buffer->state.dirty & used_states) return true; - if (cmd_buffer->state.emitted_pipeline != cmd_buffer->state.pipeline) - return true; - if (info->indexed && state->pipeline->graphics.prim_restart_enable && (state->index_type ? 0xffffffffu : 0xffffu) != state->last_primitive_reset_index) return true; @@ -3655,7 +3667,7 @@ static void radv_emit_all_graphics_states(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *info) { - bool late_scissor_emission = radv_need_late_scissor_emission(cmd_buffer, info); + bool late_scissor_emission; if ((cmd_buffer->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER) || cmd_buffer->state.emitted_pipeline != cmd_buffer->state.pipeline) @@ -3664,6 +3676,12 @@ radv_emit_all_graphics_states(struct radv_cmd_buffer *cmd_buffer, if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE) radv_emit_graphics_pipeline(cmd_buffer); + /* This should be before the cmd_buffer->state.dirty is cleared + * (excluding RADV_CMD_DIRTY_PIPELINE) and after + * cmd_buffer->state.context_roll_without_scissor_emitted is set. */ + late_scissor_emission = + radv_need_late_scissor_emission(cmd_buffer, info); + if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER) radv_emit_framebuffer_state(cmd_buffer); diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 3bf3c7ffef7..138e153f9a4 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -2526,7 +2526,7 @@ radv_compute_bin_size(struct radv_pipeline *pipeline, const VkGraphicsPipelineCr } static void -radv_pipeline_generate_binning_state(struct radeon_cmdbuf *cs, +radv_pipeline_generate_binning_state(struct radeon_cmdbuf *ctx_cs, struct radv_pipeline *pipeline, const VkGraphicsPipelineCreateInfo *pCreateInfo) { @@ -2576,15 +2576,15 @@ radv_pipeline_generate_binning_state(struct radeon_cmdbuf *cs, S_028C44_OPTIMAL_BIN_SELECTION(1); } - radeon_set_context_reg(cs, R_028C44_PA_SC_BINNER_CNTL_0, + radeon_set_context_reg(ctx_cs, R_028C44_PA_SC_BINNER_CNTL_0, pa_sc_binner_cntl_0); - radeon_set_context_reg(cs, R_028060_DB_DFSM_CONTROL, + radeon_set_context_reg(ctx_cs, R_028060_DB_DFSM_CONTROL, db_dfsm_control); } static void -radv_pipeline_generate_depth_stencil_state(struct radeon_cmdbuf *cs, +radv_pipeline_generate_depth_stencil_state(struct radeon_cmdbuf *ctx_cs, struct radv_pipeline *pipeline, const VkGraphicsPipelineCreateInfo *pCreateInfo, const struct radv_graphics_pipeline_create_info *extra) @@ -2657,35 +2657,35 @@ radv_pipeline_generate_depth_stencil_state(struct radeon_cmdbuf *cs, db_render_override |= S_02800C_DISABLE_VIEWPORT_CLAMP(1); } - radeon_set_context_reg(cs, R_028800_DB_DEPTH_CONTROL, db_depth_control); - radeon_set_context_reg(cs, R_02842C_DB_STENCIL_CONTROL, db_stencil_control); + radeon_set_context_reg(ctx_cs, R_028800_DB_DEPTH_CONTROL, db_depth_control); + radeon_set_context_reg(ctx_cs, R_02842C_DB_STENCIL_CONTROL, db_stencil_control); - radeon_set_context_reg(cs, R_028000_DB_RENDER_CONTROL, db_render_control); - radeon_set_context_reg(cs, R_02800C_DB_RENDER_OVERRIDE, db_render_override); - radeon_set_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2, db_render_override2); + radeon_set_context_reg(ctx_cs, R_028000_DB_RENDER_CONTROL, db_render_control); + radeon_set_context_reg(ctx_cs, R_02800C_DB_RENDER_OVERRIDE, db_render_override); + radeon_set_context_reg(ctx_cs, R_028010_DB_RENDER_OVERRIDE2, db_render_override2); } static void -radv_pipeline_generate_blend_state(struct radeon_cmdbuf *cs, +radv_pipeline_generate_blend_state(struct radeon_cmdbuf *ctx_cs, struct radv_pipeline *pipeline, const struct radv_blend_state *blend) { - radeon_set_context_reg_seq(cs, R_028780_CB_BLEND0_CONTROL, 8); - radeon_emit_array(cs, blend->cb_blend_control, + radeon_set_context_reg_seq(ctx_cs, R_028780_CB_BLEND0_CONTROL, 8); + radeon_emit_array(ctx_cs, blend->cb_blend_control, 8); - radeon_set_context_reg(cs, R_028808_CB_COLOR_CONTROL, blend->cb_color_control); - radeon_set_context_reg(cs, R_028B70_DB_ALPHA_TO_MASK, blend->db_alpha_to_mask); + radeon_set_context_reg(ctx_cs, R_028808_CB_COLOR_CONTROL, blend->cb_color_control); + radeon_set_context_reg(ctx_cs, R_028B70_DB_ALPHA_TO_MASK, blend->db_alpha_to_mask); if (pipeline->device->physical_device->has_rbplus) { - radeon_set_context_reg_seq(cs, R_028760_SX_MRT0_BLEND_OPT, 8); - radeon_emit_array(cs, blend->sx_mrt_blend_opt, 8); + radeon_set_context_reg_seq(ctx_cs, R_028760_SX_MRT0_BLEND_OPT, 8); + radeon_emit_array(ctx_cs, blend->sx_mrt_blend_opt, 8); } - radeon_set_context_reg(cs, R_028714_SPI_SHADER_COL_FORMAT, blend->spi_shader_col_format); + radeon_set_context_reg(ctx_cs, R_028714_SPI_SHADER_COL_FORMAT, blend->spi_shader_col_format); - radeon_set_context_reg(cs, R_028238_CB_TARGET_MASK, blend->cb_target_mask); - radeon_set_context_reg(cs, R_02823C_CB_SHADER_MASK, blend->cb_shader_mask); + radeon_set_context_reg(ctx_cs, R_028238_CB_TARGET_MASK, blend->cb_target_mask); + radeon_set_context_reg(ctx_cs, R_02823C_CB_SHADER_MASK, blend->cb_shader_mask); pipeline->graphics.col_format = blend->spi_shader_col_format; pipeline->graphics.cb_target_mask = blend->cb_target_mask; @@ -2703,7 +2703,7 @@ radv_get_conservative_raster_mode(const VkPipelineRasterizationStateCreateInfo * } static void -radv_pipeline_generate_raster_state(struct radeon_cmdbuf *cs, +radv_pipeline_generate_raster_state(struct radeon_cmdbuf *ctx_cs, struct radv_pipeline *pipeline, const VkGraphicsPipelineCreateInfo *pCreateInfo) { @@ -2712,14 +2712,14 @@ radv_pipeline_generate_raster_state(struct radeon_cmdbuf *cs, radv_get_conservative_raster_mode(vkraster); uint32_t pa_sc_conservative_rast = S_028C4C_NULL_SQUAD_AA_MASK_ENABLE(1); - radeon_set_context_reg(cs, R_028810_PA_CL_CLIP_CNTL, + radeon_set_context_reg(ctx_cs, R_028810_PA_CL_CLIP_CNTL, S_028810_DX_CLIP_SPACE_DEF(1) | // vulkan uses DX conventions. S_028810_ZCLIP_NEAR_DISABLE(vkraster->depthClampEnable ? 1 : 0) | S_028810_ZCLIP_FAR_DISABLE(vkraster->depthClampEnable ? 1 : 0) | S_028810_DX_RASTERIZATION_KILL(vkraster->rasterizerDiscardEnable ? 1 : 0) | S_028810_DX_LINEAR_ATTR_CLIP_ENA(1)); - radeon_set_context_reg(cs, R_0286D4_SPI_INTERP_CONTROL_0, + radeon_set_context_reg(ctx_cs, R_0286D4_SPI_INTERP_CONTROL_0, S_0286D4_FLAT_SHADE_ENA(1) | S_0286D4_PNT_SPRITE_ENA(1) | S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) | @@ -2728,12 +2728,12 @@ radv_pipeline_generate_raster_state(struct radeon_cmdbuf *cs, S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1) | S_0286D4_PNT_SPRITE_TOP_1(0)); /* vulkan is top to bottom - 1.0 at bottom */ - radeon_set_context_reg(cs, R_028BE4_PA_SU_VTX_CNTL, + radeon_set_context_reg(ctx_cs, R_028BE4_PA_SU_VTX_CNTL, S_028BE4_PIX_CENTER(1) | // TODO verify S_028BE4_ROUND_MODE(V_028BE4_X_ROUND_TO_EVEN) | S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH)); - radeon_set_context_reg(cs, R_028814_PA_SU_SC_MODE_CNTL, + radeon_set_context_reg(ctx_cs, R_028814_PA_SU_SC_MODE_CNTL, S_028814_FACE(vkraster->frontFace) | S_028814_CULL_FRONT(!!(vkraster->cullMode & VK_CULL_MODE_FRONT_BIT)) | S_028814_CULL_BACK(!!(vkraster->cullMode & VK_CULL_MODE_BACK_BIT)) | @@ -2774,37 +2774,37 @@ radv_pipeline_generate_raster_state(struct radeon_cmdbuf *cs, } } - radeon_set_context_reg(cs, R_028C4C_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL, + radeon_set_context_reg(ctx_cs, R_028C4C_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL, pa_sc_conservative_rast); } static void -radv_pipeline_generate_multisample_state(struct radeon_cmdbuf *cs, +radv_pipeline_generate_multisample_state(struct radeon_cmdbuf *ctx_cs, struct radv_pipeline *pipeline) { struct radv_multisample_state *ms = &pipeline->graphics.ms; - radeon_set_context_reg_seq(cs, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2); - radeon_emit(cs, ms->pa_sc_aa_mask[0]); - radeon_emit(cs, ms->pa_sc_aa_mask[1]); + radeon_set_context_reg_seq(ctx_cs, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2); + radeon_emit(ctx_cs, ms->pa_sc_aa_mask[0]); + radeon_emit(ctx_cs, ms->pa_sc_aa_mask[1]); - radeon_set_context_reg(cs, R_028804_DB_EQAA, ms->db_eqaa); - radeon_set_context_reg(cs, R_028A4C_PA_SC_MODE_CNTL_1, ms->pa_sc_mode_cntl_1); + radeon_set_context_reg(ctx_cs, R_028804_DB_EQAA, ms->db_eqaa); + radeon_set_context_reg(ctx_cs, R_028A4C_PA_SC_MODE_CNTL_1, ms->pa_sc_mode_cntl_1); /* The exclusion bits can be set to improve rasterization efficiency * if no sample lies on the pixel boundary (-8 sample offset). It's * currently always TRUE because the driver doesn't support 16 samples. */ bool exclusion = pipeline->device->physical_device->rad_info.chip_class >= CIK; - radeon_set_context_reg(cs, R_02882C_PA_SU_PRIM_FILTER_CNTL, + radeon_set_context_reg(ctx_cs, R_02882C_PA_SU_PRIM_FILTER_CNTL, S_02882C_XMAX_RIGHT_EXCLUSION(exclusion) | S_02882C_YMAX_BOTTOM_EXCLUSION(exclusion)); } static void -radv_pipeline_generate_vgt_gs_mode(struct radeon_cmdbuf *cs, - const struct radv_pipeline *pipeline) +radv_pipeline_generate_vgt_gs_mode(struct radeon_cmdbuf *ctx_cs, + struct radv_pipeline *pipeline) { const struct radv_vs_output_info *outinfo = get_vs_output_info(pipeline); @@ -2822,12 +2822,13 @@ radv_pipeline_generate_vgt_gs_mode(struct radeon_cmdbuf *cs, vgt_primitiveid_en = true; } - radeon_set_context_reg(cs, R_028A84_VGT_PRIMITIVEID_EN, vgt_primitiveid_en); - radeon_set_context_reg(cs, R_028A40_VGT_GS_MODE, vgt_gs_mode); + radeon_set_context_reg(ctx_cs, R_028A84_VGT_PRIMITIVEID_EN, vgt_primitiveid_en); + radeon_set_context_reg(ctx_cs, R_028A40_VGT_GS_MODE, vgt_gs_mode); } static void -radv_pipeline_generate_hw_vs(struct radeon_cmdbuf *cs, +radv_pipeline_generate_hw_vs(struct radeon_cmdbuf *ctx_cs, + struct radeon_cmdbuf *cs, struct radv_pipeline *pipeline, struct radv_shader_variant *shader) { @@ -2848,10 +2849,10 @@ radv_pipeline_generate_hw_vs(struct radeon_cmdbuf *cs, outinfo->writes_layer || outinfo->writes_viewport_index; - radeon_set_context_reg(cs, R_0286C4_SPI_VS_OUT_CONFIG, + radeon_set_context_reg(ctx_cs, R_0286C4_SPI_VS_OUT_CONFIG, S_0286C4_VS_EXPORT_COUNT(MAX2(1, outinfo->param_exports) - 1)); - radeon_set_context_reg(cs, R_02870C_SPI_SHADER_POS_FORMAT, + radeon_set_context_reg(ctx_cs, R_02870C_SPI_SHADER_POS_FORMAT, S_02870C_POS0_EXPORT_FORMAT(V_02870C_SPI_SHADER_4COMP) | S_02870C_POS1_EXPORT_FORMAT(outinfo->pos_exports > 1 ? V_02870C_SPI_SHADER_4COMP : @@ -2863,13 +2864,13 @@ radv_pipeline_generate_hw_vs(struct radeon_cmdbuf *cs, V_02870C_SPI_SHADER_4COMP : V_02870C_SPI_SHADER_NONE)); - radeon_set_context_reg(cs, R_028818_PA_CL_VTE_CNTL, + radeon_set_context_reg(ctx_cs, R_028818_PA_CL_VTE_CNTL, S_028818_VTX_W0_FMT(1) | S_028818_VPORT_X_SCALE_ENA(1) | S_028818_VPORT_X_OFFSET_ENA(1) | S_028818_VPORT_Y_SCALE_ENA(1) | S_028818_VPORT_Y_OFFSET_ENA(1) | S_028818_VPORT_Z_SCALE_ENA(1) | S_028818_VPORT_Z_OFFSET_ENA(1)); - radeon_set_context_reg(cs, R_02881C_PA_CL_VS_OUT_CNTL, + radeon_set_context_reg(ctx_cs, R_02881C_PA_CL_VS_OUT_CNTL, S_02881C_USE_VTX_POINT_SIZE(outinfo->writes_pointsize) | S_02881C_USE_VTX_RENDER_TARGET_INDX(outinfo->writes_layer) | S_02881C_USE_VTX_VIEWPORT_INDX(outinfo->writes_viewport_index) | @@ -2881,7 +2882,7 @@ radv_pipeline_generate_hw_vs(struct radeon_cmdbuf *cs, clip_dist_mask); if (pipeline->device->physical_device->rad_info.chip_class <= VI) - radeon_set_context_reg(cs, R_028AB4_VGT_REUSE_OFF, + radeon_set_context_reg(ctx_cs, R_028AB4_VGT_REUSE_OFF, outinfo->writes_viewport_index); } @@ -2949,7 +2950,8 @@ radv_pipeline_generate_hw_hs(struct radeon_cmdbuf *cs, } static void -radv_pipeline_generate_vertex_shader(struct radeon_cmdbuf *cs, +radv_pipeline_generate_vertex_shader(struct radeon_cmdbuf *ctx_cs, + struct radeon_cmdbuf *cs, struct radv_pipeline *pipeline, const struct radv_tessellation_state *tess) { @@ -2965,11 +2967,12 @@ radv_pipeline_generate_vertex_shader(struct radeon_cmdbuf *cs, else if (vs->info.vs.as_es) radv_pipeline_generate_hw_es(cs, pipeline, vs); else - radv_pipeline_generate_hw_vs(cs, pipeline, vs); + radv_pipeline_generate_hw_vs(ctx_cs, cs, pipeline, vs); } static void -radv_pipeline_generate_tess_shaders(struct radeon_cmdbuf *cs, +radv_pipeline_generate_tess_shaders(struct radeon_cmdbuf *ctx_cs, + struct radeon_cmdbuf *cs, struct radv_pipeline *pipeline, const struct radv_tessellation_state *tess) { @@ -2985,24 +2988,25 @@ radv_pipeline_generate_tess_shaders(struct radeon_cmdbuf *cs, if (tes->info.tes.as_es) radv_pipeline_generate_hw_es(cs, pipeline, tes); else - radv_pipeline_generate_hw_vs(cs, pipeline, tes); + radv_pipeline_generate_hw_vs(ctx_cs, cs, pipeline, tes); } radv_pipeline_generate_hw_hs(cs, pipeline, tcs, tess); - radeon_set_context_reg(cs, R_028B6C_VGT_TF_PARAM, + radeon_set_context_reg(ctx_cs, R_028B6C_VGT_TF_PARAM, tess->tf_param); if (pipeline->device->physical_device->rad_info.chip_class >= CIK) - radeon_set_context_reg_idx(cs, R_028B58_VGT_LS_HS_CONFIG, 2, + radeon_set_context_reg_idx(ctx_cs, R_028B58_VGT_LS_HS_CONFIG, 2, tess->ls_hs_config); else - radeon_set_context_reg(cs, R_028B58_VGT_LS_HS_CONFIG, + radeon_set_context_reg(ctx_cs, R_028B58_VGT_LS_HS_CONFIG, tess->ls_hs_config); } static void -radv_pipeline_generate_geometry_shader(struct radeon_cmdbuf *cs, +radv_pipeline_generate_geometry_shader(struct radeon_cmdbuf *ctx_cs, + struct radeon_cmdbuf *cs, struct radv_pipeline *pipeline, const struct radv_gs_state *gs_state) { @@ -3023,32 +3027,32 @@ radv_pipeline_generate_geometry_shader(struct radeon_cmdbuf *cs, offset = num_components[0] * gs_max_out_vertices; - radeon_set_context_reg_seq(cs, R_028A60_VGT_GSVS_RING_OFFSET_1, 3); - radeon_emit(cs, offset); + radeon_set_context_reg_seq(ctx_cs, R_028A60_VGT_GSVS_RING_OFFSET_1, 3); + radeon_emit(ctx_cs, offset); if (max_stream >= 1) offset += num_components[1] * gs_max_out_vertices; - radeon_emit(cs, offset); + radeon_emit(ctx_cs, offset); if (max_stream >= 2) offset += num_components[2] * gs_max_out_vertices; - radeon_emit(cs, offset); + radeon_emit(ctx_cs, offset); if (max_stream >= 3) offset += num_components[3] * gs_max_out_vertices; - radeon_set_context_reg(cs, R_028AB0_VGT_GSVS_RING_ITEMSIZE, offset); + radeon_set_context_reg(ctx_cs, R_028AB0_VGT_GSVS_RING_ITEMSIZE, offset); - radeon_set_context_reg(cs, R_028B38_VGT_GS_MAX_VERT_OUT, gs->info.gs.vertices_out); + radeon_set_context_reg(ctx_cs, R_028B38_VGT_GS_MAX_VERT_OUT, gs->info.gs.vertices_out); - radeon_set_context_reg_seq(cs, R_028B5C_VGT_GS_VERT_ITEMSIZE, 4); - radeon_emit(cs, num_components[0]); - radeon_emit(cs, (max_stream >= 1) ? num_components[1] : 0); - radeon_emit(cs, (max_stream >= 2) ? num_components[2] : 0); - radeon_emit(cs, (max_stream >= 3) ? num_components[3] : 0); + radeon_set_context_reg_seq(ctx_cs, R_028B5C_VGT_GS_VERT_ITEMSIZE, 4); + radeon_emit(ctx_cs, num_components[0]); + radeon_emit(ctx_cs, (max_stream >= 1) ? num_components[1] : 0); + radeon_emit(ctx_cs, (max_stream >= 2) ? num_components[2] : 0); + radeon_emit(ctx_cs, (max_stream >= 3) ? num_components[3] : 0); uint32_t gs_num_invocations = gs->info.gs.invocations; - radeon_set_context_reg(cs, R_028B90_VGT_GS_INSTANCE_CNT, + radeon_set_context_reg(ctx_cs, R_028B90_VGT_GS_INSTANCE_CNT, S_028B90_CNT(MIN2(gs_num_invocations, 127)) | S_028B90_ENABLE(gs_num_invocations > 0)); - radeon_set_context_reg(cs, R_028AAC_VGT_ESGS_RING_ITEMSIZE, + radeon_set_context_reg(ctx_cs, R_028AAC_VGT_ESGS_RING_ITEMSIZE, gs_state->vgt_esgs_ring_itemsize); va = radv_buffer_get_va(gs->bo) + gs->bo_offset; @@ -3062,8 +3066,8 @@ radv_pipeline_generate_geometry_shader(struct radeon_cmdbuf *cs, radeon_emit(cs, gs->rsrc1); radeon_emit(cs, gs->rsrc2 | S_00B22C_LDS_SIZE(gs_state->lds_size)); - radeon_set_context_reg(cs, R_028A44_VGT_GS_ONCHIP_CNTL, gs_state->vgt_gs_onchip_cntl); - radeon_set_context_reg(cs, R_028A94_VGT_GS_MAX_PRIMS_PER_SUBGROUP, gs_state->vgt_gs_max_prims_per_subgroup); + radeon_set_context_reg(ctx_cs, R_028A44_VGT_GS_ONCHIP_CNTL, gs_state->vgt_gs_onchip_cntl); + radeon_set_context_reg(ctx_cs, R_028A94_VGT_GS_MAX_PRIMS_PER_SUBGROUP, gs_state->vgt_gs_max_prims_per_subgroup); } else { radeon_set_sh_reg_seq(cs, R_00B220_SPI_SHADER_PGM_LO_GS, 4); radeon_emit(cs, va >> 8); @@ -3072,7 +3076,7 @@ radv_pipeline_generate_geometry_shader(struct radeon_cmdbuf *cs, radeon_emit(cs, gs->rsrc2); } - radv_pipeline_generate_hw_vs(cs, pipeline, pipeline->gs_copy_shader); + radv_pipeline_generate_hw_vs(ctx_cs, cs, pipeline, pipeline->gs_copy_shader); } static uint32_t offset_to_ps_input(uint32_t offset, bool flat_shade) @@ -3094,8 +3098,8 @@ static uint32_t offset_to_ps_input(uint32_t offset, bool flat_shade) } static void -radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *cs, - struct radv_pipeline *pipeline) +radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *ctx_cs, + struct radv_pipeline *pipeline) { struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT]; const struct radv_vs_output_info *outinfo = get_vs_output_info(pipeline); @@ -3166,9 +3170,9 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *cs, } if (ps_offset) { - radeon_set_context_reg_seq(cs, R_028644_SPI_PS_INPUT_CNTL_0, ps_offset); + radeon_set_context_reg_seq(ctx_cs, R_028644_SPI_PS_INPUT_CNTL_0, ps_offset); for (unsigned i = 0; i < ps_offset; i++) { - radeon_emit(cs, ps_input_cntl[i]); + radeon_emit(ctx_cs, ps_input_cntl[i]); } } } @@ -3206,7 +3210,8 @@ radv_compute_db_shader_control(const struct radv_device *device, } static void -radv_pipeline_generate_fragment_shader(struct radeon_cmdbuf *cs, +radv_pipeline_generate_fragment_shader(struct radeon_cmdbuf *ctx_cs, + struct radeon_cmdbuf *cs, struct radv_pipeline *pipeline) { struct radv_shader_variant *ps; @@ -3222,22 +3227,22 @@ radv_pipeline_generate_fragment_shader(struct radeon_cmdbuf *cs, radeon_emit(cs, ps->rsrc1); radeon_emit(cs, ps->rsrc2); - radeon_set_context_reg(cs, R_02880C_DB_SHADER_CONTROL, + radeon_set_context_reg(ctx_cs, R_02880C_DB_SHADER_CONTROL, radv_compute_db_shader_control(pipeline->device, pipeline, ps)); - radeon_set_context_reg(cs, R_0286CC_SPI_PS_INPUT_ENA, + radeon_set_context_reg(ctx_cs, R_0286CC_SPI_PS_INPUT_ENA, ps->config.spi_ps_input_ena); - radeon_set_context_reg(cs, R_0286D0_SPI_PS_INPUT_ADDR, + radeon_set_context_reg(ctx_cs, R_0286D0_SPI_PS_INPUT_ADDR, ps->config.spi_ps_input_addr); - radeon_set_context_reg(cs, R_0286D8_SPI_PS_IN_CONTROL, + radeon_set_context_reg(ctx_cs, R_0286D8_SPI_PS_IN_CONTROL, S_0286D8_NUM_INTERP(ps->info.fs.num_interp)); - radeon_set_context_reg(cs, R_0286E0_SPI_BARYC_CNTL, pipeline->graphics.spi_baryc_cntl); + radeon_set_context_reg(ctx_cs, R_0286E0_SPI_BARYC_CNTL, pipeline->graphics.spi_baryc_cntl); - radeon_set_context_reg(cs, R_028710_SPI_SHADER_Z_FORMAT, + radeon_set_context_reg(ctx_cs, R_028710_SPI_SHADER_Z_FORMAT, ac_get_spi_shader_z_format(ps->info.info.ps.writes_z, ps->info.info.ps.writes_stencil, ps->info.info.ps.writes_sample_mask)); @@ -3250,7 +3255,7 @@ radv_pipeline_generate_fragment_shader(struct radeon_cmdbuf *cs, } static void -radv_pipeline_generate_vgt_vertex_reuse(struct radeon_cmdbuf *cs, +radv_pipeline_generate_vgt_vertex_reuse(struct radeon_cmdbuf *ctx_cs, struct radv_pipeline *pipeline) { if (pipeline->device->physical_device->rad_info.family < CHIP_POLARIS10) @@ -3261,7 +3266,7 @@ radv_pipeline_generate_vgt_vertex_reuse(struct radeon_cmdbuf *cs, radv_get_shader(pipeline, MESA_SHADER_TESS_EVAL)->info.tes.spacing == TESS_SPACING_FRACTIONAL_ODD) { vtx_reuse_depth = 14; } - radeon_set_context_reg(cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, + radeon_set_context_reg(ctx_cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, S_028C58_VTX_REUSE_DEPTH(vtx_reuse_depth)); } @@ -3331,38 +3336,46 @@ radv_pipeline_generate_pm4(struct radv_pipeline *pipeline, const struct radv_gs_state *gs, unsigned prim, unsigned gs_out) { - pipeline->cs.buf = malloc(4 * 256); - pipeline->cs.max_dw = 256; - - radv_pipeline_generate_depth_stencil_state(&pipeline->cs, pipeline, pCreateInfo, extra); - radv_pipeline_generate_blend_state(&pipeline->cs, pipeline, blend); - radv_pipeline_generate_raster_state(&pipeline->cs, pipeline, pCreateInfo); - radv_pipeline_generate_multisample_state(&pipeline->cs, pipeline); - radv_pipeline_generate_vgt_gs_mode(&pipeline->cs, pipeline); - radv_pipeline_generate_vertex_shader(&pipeline->cs, pipeline, tess); - radv_pipeline_generate_tess_shaders(&pipeline->cs, pipeline, tess); - radv_pipeline_generate_geometry_shader(&pipeline->cs, pipeline, gs); - radv_pipeline_generate_fragment_shader(&pipeline->cs, pipeline); - radv_pipeline_generate_ps_inputs(&pipeline->cs, pipeline); - radv_pipeline_generate_vgt_vertex_reuse(&pipeline->cs, pipeline); - radv_pipeline_generate_binning_state(&pipeline->cs, pipeline, pCreateInfo); - - radeon_set_context_reg(&pipeline->cs, R_0286E8_SPI_TMPRING_SIZE, + struct radeon_cmdbuf *ctx_cs = &pipeline->ctx_cs; + struct radeon_cmdbuf *cs = &pipeline->cs; + + cs->max_dw = 64; + ctx_cs->max_dw = 256; + cs->buf = malloc(4 * (cs->max_dw + ctx_cs->max_dw)); + ctx_cs->buf = cs->buf + cs->max_dw; + + radv_pipeline_generate_depth_stencil_state(ctx_cs, pipeline, pCreateInfo, extra); + radv_pipeline_generate_blend_state(ctx_cs, pipeline, blend); + radv_pipeline_generate_raster_state(ctx_cs, pipeline, pCreateInfo); + radv_pipeline_generate_multisample_state(ctx_cs, pipeline); + radv_pipeline_generate_vgt_gs_mode(ctx_cs, pipeline); + radv_pipeline_generate_vertex_shader(ctx_cs, cs, pipeline, tess); + radv_pipeline_generate_tess_shaders(ctx_cs, cs, pipeline, tess); + radv_pipeline_generate_geometry_shader(ctx_cs, cs, pipeline, gs); + radv_pipeline_generate_fragment_shader(ctx_cs, cs, pipeline); + radv_pipeline_generate_ps_inputs(ctx_cs, pipeline); + radv_pipeline_generate_vgt_vertex_reuse(ctx_cs, pipeline); + radv_pipeline_generate_binning_state(ctx_cs, pipeline, pCreateInfo); + + radeon_set_context_reg(ctx_cs, R_0286E8_SPI_TMPRING_SIZE, S_0286E8_WAVES(pipeline->max_waves) | S_0286E8_WAVESIZE(pipeline->scratch_bytes_per_wave >> 10)); - radeon_set_context_reg(&pipeline->cs, R_028B54_VGT_SHADER_STAGES_EN, radv_compute_vgt_shader_stages_en(pipeline)); + radeon_set_context_reg(ctx_cs, R_028B54_VGT_SHADER_STAGES_EN, radv_compute_vgt_shader_stages_en(pipeline)); if (pipeline->device->physical_device->rad_info.chip_class >= CIK) { - radeon_set_uconfig_reg_idx(&pipeline->cs, R_030908_VGT_PRIMITIVE_TYPE, 1, prim); + radeon_set_uconfig_reg_idx(cs, R_030908_VGT_PRIMITIVE_TYPE, 1, prim); } else { - radeon_set_config_reg(&pipeline->cs, R_008958_VGT_PRIMITIVE_TYPE, prim); + radeon_set_config_reg(cs, R_008958_VGT_PRIMITIVE_TYPE, prim); } - radeon_set_context_reg(&pipeline->cs, R_028A6C_VGT_GS_OUT_PRIM_TYPE, gs_out); + radeon_set_context_reg(ctx_cs, R_028A6C_VGT_GS_OUT_PRIM_TYPE, gs_out); - radeon_set_context_reg(&pipeline->cs, R_02820C_PA_SC_CLIPRECT_RULE, radv_compute_cliprect_rule(pCreateInfo)); + radeon_set_context_reg(ctx_cs, R_02820C_PA_SC_CLIPRECT_RULE, radv_compute_cliprect_rule(pCreateInfo)); - assert(pipeline->cs.cdw <= pipeline->cs.max_dw); + pipeline->ctx_cs_hash = _mesa_hash_data(ctx_cs->buf, ctx_cs->cdw * 4); + + assert(ctx_cs->cdw <= ctx_cs->max_dw); + assert(cs->cdw <= cs->max_dw); } static struct radv_ia_multi_vgt_param_helpers diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 064c221549c..280504ea03f 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -1367,6 +1367,8 @@ struct radv_pipeline { VkShaderStageFlags active_stages; struct radeon_cmdbuf cs; + uint32_t ctx_cs_hash; + struct radeon_cmdbuf ctx_cs; struct radv_vertex_elements_info vertex_elements; -- 2.30.2