From beeab44190a7d73d989e8dac413b4c0201822986 Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Sun, 14 Jan 2018 02:03:38 +0100 Subject: [PATCH] radv: Record a PM4 sequence for graphics pipeline switches. This gives about 2% performance improvement on dota2 for me. This is mostly a mechanical copy and replacement, but at bind time we still do: 1) Some stuff that is only based on num_samples changes. 2) Some command buffer state setting. Reviewed-by: Dave Airlie Reviewed-by: Samuel Pitoiset --- src/amd/vulkan/radv_cmd_buffer.c | 454 +---------------------------- src/amd/vulkan/radv_pipeline.c | 478 +++++++++++++++++++++++++++++++ src/amd/vulkan/radv_private.h | 2 + 3 files changed, 483 insertions(+), 451 deletions(-) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index c9df9c08d6a..f3f765a96e0 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -525,40 +525,6 @@ radv_save_descriptors(struct radv_cmd_buffer *cmd_buffer) radv_emit_write_data_packet(cs, va, MAX_SETS * 2, data); } -static void -radv_emit_graphics_blend_state(struct radv_cmd_buffer *cmd_buffer, - struct radv_pipeline *pipeline) -{ - radeon_set_context_reg_seq(cmd_buffer->cs, R_028780_CB_BLEND0_CONTROL, 8); - radeon_emit_array(cmd_buffer->cs, pipeline->graphics.blend.cb_blend_control, - 8); - radeon_set_context_reg(cmd_buffer->cs, R_028808_CB_COLOR_CONTROL, pipeline->graphics.blend.cb_color_control); - radeon_set_context_reg(cmd_buffer->cs, R_028B70_DB_ALPHA_TO_MASK, pipeline->graphics.blend.db_alpha_to_mask); - - if (cmd_buffer->device->physical_device->has_rbplus) { - - radeon_set_context_reg_seq(cmd_buffer->cs, R_028760_SX_MRT0_BLEND_OPT, 8); - radeon_emit_array(cmd_buffer->cs, pipeline->graphics.blend.sx_mrt_blend_opt, 8); - - radeon_set_context_reg_seq(cmd_buffer->cs, R_028754_SX_PS_DOWNCONVERT, 3); - radeon_emit(cmd_buffer->cs, 0); /* R_028754_SX_PS_DOWNCONVERT */ - radeon_emit(cmd_buffer->cs, 0); /* R_028758_SX_BLEND_OPT_EPSILON */ - radeon_emit(cmd_buffer->cs, 0); /* R_02875C_SX_BLEND_OPT_CONTROL */ - } -} - -static void -radv_emit_graphics_depth_stencil_state(struct radv_cmd_buffer *cmd_buffer, - struct radv_pipeline *pipeline) -{ - struct radv_depth_stencil_state *ds = &pipeline->graphics.ds; - radeon_set_context_reg(cmd_buffer->cs, R_028800_DB_DEPTH_CONTROL, ds->db_depth_control); - radeon_set_context_reg(cmd_buffer->cs, R_02842C_DB_STENCIL_CONTROL, ds->db_stencil_control); - - radeon_set_context_reg(cmd_buffer->cs, R_028000_DB_RENDER_CONTROL, ds->db_render_control); - radeon_set_context_reg(cmd_buffer->cs, R_028010_DB_RENDER_OVERRIDE2, ds->db_render_override2); -} - struct ac_userdata_info * radv_lookup_user_sgpr(struct radv_pipeline *pipeline, gl_shader_stage stage, @@ -605,42 +571,8 @@ radv_update_multisample_state(struct radv_cmd_buffer *cmd_buffer, struct radv_multisample_state *ms = &pipeline->graphics.ms; struct radv_pipeline *old_pipeline = cmd_buffer->state.emitted_pipeline; - radeon_set_context_reg_seq(cmd_buffer->cs, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2); - radeon_emit(cmd_buffer->cs, ms->pa_sc_aa_mask[0]); - radeon_emit(cmd_buffer->cs, ms->pa_sc_aa_mask[1]); - - radeon_set_context_reg(cmd_buffer->cs, R_028804_DB_EQAA, ms->db_eqaa); - radeon_set_context_reg(cmd_buffer->cs, R_028A4C_PA_SC_MODE_CNTL_1, ms->pa_sc_mode_cntl_1); - - if (pipeline->shaders[MESA_SHADER_FRAGMENT]->info.info.ps.needs_sample_positions) { - uint32_t offset; - struct ac_userdata_info *loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_FRAGMENT, AC_UD_PS_SAMPLE_POS_OFFSET); - uint32_t base_reg = pipeline->user_data_0[MESA_SHADER_FRAGMENT]; - if (loc->sgpr_idx == -1) - return; - assert(loc->num_sgprs == 1); - assert(!loc->indirect); - switch (num_samples) { - default: - offset = 0; - break; - case 2: - offset = 1; - break; - case 4: - offset = 3; - break; - case 8: - offset = 7; - break; - case 16: - offset = 15; - break; - } - - radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, offset); + if (pipeline->shaders[MESA_SHADER_FRAGMENT]->info.info.ps.needs_sample_positions) cmd_buffer->sample_positions_needed = true; - } if (old_pipeline && num_samples == old_pipeline->graphics.ms.num_samples) return; @@ -660,21 +592,7 @@ radv_update_multisample_state(struct radv_cmd_buffer *cmd_buffer, } } -static void -radv_emit_graphics_raster_state(struct radv_cmd_buffer *cmd_buffer, - struct radv_pipeline *pipeline) -{ - struct radv_raster_state *raster = &pipeline->graphics.raster; - - radeon_set_context_reg(cmd_buffer->cs, R_028810_PA_CL_CLIP_CNTL, - raster->pa_cl_clip_cntl); - radeon_set_context_reg(cmd_buffer->cs, R_0286D4_SPI_INTERP_CONTROL_0, - raster->spi_interp_control); - radeon_set_context_reg(cmd_buffer->cs, R_028BE4_PA_SU_VTX_CNTL, - raster->pa_su_vtx_cntl); - radeon_set_context_reg(cmd_buffer->cs, R_028814_PA_SU_SC_MODE_CNTL, - raster->pa_su_sc_mode_cntl); -} + static inline void radv_emit_prefetch_TC_L2_async(struct radv_cmd_buffer *cmd_buffer, uint64_t va, @@ -730,350 +648,6 @@ radv_emit_prefetch(struct radv_cmd_buffer *cmd_buffer, pipeline->shaders[MESA_SHADER_FRAGMENT]); } -static void -radv_emit_hw_vs(struct radv_cmd_buffer *cmd_buffer, - struct radv_pipeline *pipeline, - struct radv_shader_variant *shader) -{ - uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset; - - radeon_set_context_reg(cmd_buffer->cs, R_0286C4_SPI_VS_OUT_CONFIG, - pipeline->graphics.vs.spi_vs_out_config); - - radeon_set_context_reg(cmd_buffer->cs, R_02870C_SPI_SHADER_POS_FORMAT, - pipeline->graphics.vs.spi_shader_pos_format); - - radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B120_SPI_SHADER_PGM_LO_VS, 4); - radeon_emit(cmd_buffer->cs, va >> 8); - radeon_emit(cmd_buffer->cs, va >> 40); - radeon_emit(cmd_buffer->cs, shader->rsrc1); - radeon_emit(cmd_buffer->cs, shader->rsrc2); - - radeon_set_context_reg(cmd_buffer->cs, R_028818_PA_CL_VTE_CNTL, - S_028818_VTX_W0_FMT(1) | - S_028818_VPORT_X_SCALE_ENA(1) | S_028818_VPORT_X_OFFSET_ENA(1) | - S_028818_VPORT_Y_SCALE_ENA(1) | S_028818_VPORT_Y_OFFSET_ENA(1) | - S_028818_VPORT_Z_SCALE_ENA(1) | S_028818_VPORT_Z_OFFSET_ENA(1)); - - - radeon_set_context_reg(cmd_buffer->cs, R_02881C_PA_CL_VS_OUT_CNTL, - pipeline->graphics.vs.pa_cl_vs_out_cntl); - - if (cmd_buffer->device->physical_device->rad_info.chip_class <= VI) - radeon_set_context_reg(cmd_buffer->cs, R_028AB4_VGT_REUSE_OFF, - pipeline->graphics.vs.vgt_reuse_off); -} - -static void -radv_emit_hw_es(struct radv_cmd_buffer *cmd_buffer, - struct radv_pipeline *pipeline, - struct radv_shader_variant *shader) -{ - uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset; - - radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B320_SPI_SHADER_PGM_LO_ES, 4); - radeon_emit(cmd_buffer->cs, va >> 8); - radeon_emit(cmd_buffer->cs, va >> 40); - radeon_emit(cmd_buffer->cs, shader->rsrc1); - radeon_emit(cmd_buffer->cs, shader->rsrc2); -} - -static void -radv_emit_hw_ls(struct radv_cmd_buffer *cmd_buffer, - struct radv_shader_variant *shader) -{ - uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset; - uint32_t rsrc2 = shader->rsrc2; - - radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B520_SPI_SHADER_PGM_LO_LS, 2); - radeon_emit(cmd_buffer->cs, va >> 8); - radeon_emit(cmd_buffer->cs, va >> 40); - - rsrc2 |= S_00B52C_LDS_SIZE(cmd_buffer->state.pipeline->graphics.tess.lds_size); - if (cmd_buffer->device->physical_device->rad_info.chip_class == CIK && - cmd_buffer->device->physical_device->rad_info.family != CHIP_HAWAII) - radeon_set_sh_reg(cmd_buffer->cs, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, rsrc2); - - radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B528_SPI_SHADER_PGM_RSRC1_LS, 2); - radeon_emit(cmd_buffer->cs, shader->rsrc1); - radeon_emit(cmd_buffer->cs, rsrc2); -} - -static void -radv_emit_hw_hs(struct radv_cmd_buffer *cmd_buffer, - struct radv_shader_variant *shader) -{ - uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset; - - if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) { - radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B410_SPI_SHADER_PGM_LO_LS, 2); - radeon_emit(cmd_buffer->cs, va >> 8); - radeon_emit(cmd_buffer->cs, va >> 40); - - radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B428_SPI_SHADER_PGM_RSRC1_HS, 2); - radeon_emit(cmd_buffer->cs, shader->rsrc1); - radeon_emit(cmd_buffer->cs, shader->rsrc2 | - S_00B42C_LDS_SIZE(cmd_buffer->state.pipeline->graphics.tess.lds_size)); - } else { - radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B420_SPI_SHADER_PGM_LO_HS, 4); - radeon_emit(cmd_buffer->cs, va >> 8); - radeon_emit(cmd_buffer->cs, va >> 40); - radeon_emit(cmd_buffer->cs, shader->rsrc1); - radeon_emit(cmd_buffer->cs, shader->rsrc2); - } -} - -static void -radv_emit_vertex_shader(struct radv_cmd_buffer *cmd_buffer, - struct radv_pipeline *pipeline) -{ - struct radv_shader_variant *vs; - - radeon_set_context_reg(cmd_buffer->cs, R_028A84_VGT_PRIMITIVEID_EN, pipeline->graphics.vgt_primitiveid_en); - - /* Skip shaders merged into HS/GS */ - vs = pipeline->shaders[MESA_SHADER_VERTEX]; - if (!vs) - return; - - if (vs->info.vs.as_ls) - radv_emit_hw_ls(cmd_buffer, vs); - else if (vs->info.vs.as_es) - radv_emit_hw_es(cmd_buffer, pipeline, vs); - else - radv_emit_hw_vs(cmd_buffer, pipeline, vs); -} - - -static void -radv_emit_tess_shaders(struct radv_cmd_buffer *cmd_buffer, - struct radv_pipeline *pipeline) -{ - if (!radv_pipeline_has_tess(pipeline)) - return; - - struct radv_shader_variant *tes, *tcs; - - tcs = pipeline->shaders[MESA_SHADER_TESS_CTRL]; - tes = pipeline->shaders[MESA_SHADER_TESS_EVAL]; - - if (tes) { - if (tes->info.tes.as_es) - radv_emit_hw_es(cmd_buffer, pipeline, tes); - else - radv_emit_hw_vs(cmd_buffer, pipeline, tes); - } - - radv_emit_hw_hs(cmd_buffer, tcs); - - radeon_set_context_reg(cmd_buffer->cs, R_028B6C_VGT_TF_PARAM, - pipeline->graphics.tess.tf_param); - - if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) - radeon_set_context_reg_idx(cmd_buffer->cs, R_028B58_VGT_LS_HS_CONFIG, 2, - pipeline->graphics.tess.ls_hs_config); - else - radeon_set_context_reg(cmd_buffer->cs, R_028B58_VGT_LS_HS_CONFIG, - pipeline->graphics.tess.ls_hs_config); - - struct ac_userdata_info *loc; - - loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_TESS_CTRL, AC_UD_TCS_OFFCHIP_LAYOUT); - if (loc->sgpr_idx != -1) { - uint32_t base_reg = pipeline->user_data_0[MESA_SHADER_TESS_CTRL]; - assert(loc->num_sgprs == 4); - assert(!loc->indirect); - radeon_set_sh_reg_seq(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, 4); - radeon_emit(cmd_buffer->cs, pipeline->graphics.tess.offchip_layout); - radeon_emit(cmd_buffer->cs, pipeline->graphics.tess.tcs_out_offsets); - radeon_emit(cmd_buffer->cs, pipeline->graphics.tess.tcs_out_layout | - pipeline->graphics.tess.num_tcs_input_cp << 26); - radeon_emit(cmd_buffer->cs, pipeline->graphics.tess.tcs_in_layout); - } - - loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_TESS_EVAL, AC_UD_TES_OFFCHIP_LAYOUT); - if (loc->sgpr_idx != -1) { - uint32_t base_reg = pipeline->user_data_0[MESA_SHADER_TESS_EVAL]; - assert(loc->num_sgprs == 1); - assert(!loc->indirect); - - radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, - pipeline->graphics.tess.offchip_layout); - } - - loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_VERTEX, AC_UD_VS_LS_TCS_IN_LAYOUT); - if (loc->sgpr_idx != -1) { - uint32_t base_reg = pipeline->user_data_0[MESA_SHADER_VERTEX]; - assert(loc->num_sgprs == 1); - assert(!loc->indirect); - - radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, - pipeline->graphics.tess.tcs_in_layout); - } -} - -static void -radv_emit_geometry_shader(struct radv_cmd_buffer *cmd_buffer, - struct radv_pipeline *pipeline) -{ - struct radv_shader_variant *gs; - uint64_t va; - - radeon_set_context_reg(cmd_buffer->cs, R_028A40_VGT_GS_MODE, pipeline->graphics.vgt_gs_mode); - - gs = pipeline->shaders[MESA_SHADER_GEOMETRY]; - if (!gs) - return; - - uint32_t gsvs_itemsize = gs->info.gs.max_gsvs_emit_size >> 2; - - radeon_set_context_reg_seq(cmd_buffer->cs, R_028A60_VGT_GSVS_RING_OFFSET_1, 3); - radeon_emit(cmd_buffer->cs, gsvs_itemsize); - radeon_emit(cmd_buffer->cs, gsvs_itemsize); - radeon_emit(cmd_buffer->cs, gsvs_itemsize); - - radeon_set_context_reg(cmd_buffer->cs, R_028AB0_VGT_GSVS_RING_ITEMSIZE, gsvs_itemsize); - - radeon_set_context_reg(cmd_buffer->cs, R_028B38_VGT_GS_MAX_VERT_OUT, gs->info.gs.vertices_out); - - uint32_t gs_vert_itemsize = gs->info.gs.gsvs_vertex_size; - radeon_set_context_reg_seq(cmd_buffer->cs, R_028B5C_VGT_GS_VERT_ITEMSIZE, 4); - radeon_emit(cmd_buffer->cs, gs_vert_itemsize >> 2); - radeon_emit(cmd_buffer->cs, 0); - radeon_emit(cmd_buffer->cs, 0); - radeon_emit(cmd_buffer->cs, 0); - - uint32_t gs_num_invocations = gs->info.gs.invocations; - radeon_set_context_reg(cmd_buffer->cs, R_028B90_VGT_GS_INSTANCE_CNT, - S_028B90_CNT(MIN2(gs_num_invocations, 127)) | - S_028B90_ENABLE(gs_num_invocations > 0)); - - radeon_set_context_reg(cmd_buffer->cs, R_028AAC_VGT_ESGS_RING_ITEMSIZE, - pipeline->graphics.gs.vgt_esgs_ring_itemsize); - - va = radv_buffer_get_va(gs->bo) + gs->bo_offset; - - if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) { - radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B210_SPI_SHADER_PGM_LO_ES, 2); - radeon_emit(cmd_buffer->cs, va >> 8); - radeon_emit(cmd_buffer->cs, va >> 40); - - radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B228_SPI_SHADER_PGM_RSRC1_GS, 2); - radeon_emit(cmd_buffer->cs, gs->rsrc1); - radeon_emit(cmd_buffer->cs, gs->rsrc2 | - S_00B22C_LDS_SIZE(pipeline->graphics.gs.lds_size)); - - radeon_set_context_reg(cmd_buffer->cs, R_028A44_VGT_GS_ONCHIP_CNTL, pipeline->graphics.gs.vgt_gs_onchip_cntl); - radeon_set_context_reg(cmd_buffer->cs, R_028A94_VGT_GS_MAX_PRIMS_PER_SUBGROUP, pipeline->graphics.gs.vgt_gs_max_prims_per_subgroup); - } else { - radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B220_SPI_SHADER_PGM_LO_GS, 4); - radeon_emit(cmd_buffer->cs, va >> 8); - radeon_emit(cmd_buffer->cs, va >> 40); - radeon_emit(cmd_buffer->cs, gs->rsrc1); - radeon_emit(cmd_buffer->cs, gs->rsrc2); - } - - radv_emit_hw_vs(cmd_buffer, pipeline, pipeline->gs_copy_shader); - - struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.pipeline, MESA_SHADER_GEOMETRY, - AC_UD_GS_VS_RING_STRIDE_ENTRIES); - if (loc->sgpr_idx != -1) { - uint32_t stride = gs->info.gs.max_gsvs_emit_size; - uint32_t num_entries = 64; - bool is_vi = cmd_buffer->device->physical_device->rad_info.chip_class >= VI; - - if (is_vi) - num_entries *= stride; - - stride = S_008F04_STRIDE(stride); - radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B230_SPI_SHADER_USER_DATA_GS_0 + loc->sgpr_idx * 4, 2); - radeon_emit(cmd_buffer->cs, stride); - radeon_emit(cmd_buffer->cs, num_entries); - } -} - -static void -radv_emit_fragment_shader(struct radv_cmd_buffer *cmd_buffer, - struct radv_pipeline *pipeline) -{ - struct radv_shader_variant *ps; - uint64_t va; - struct radv_blend_state *blend = &pipeline->graphics.blend; - assert (pipeline->shaders[MESA_SHADER_FRAGMENT]); - - ps = pipeline->shaders[MESA_SHADER_FRAGMENT]; - va = radv_buffer_get_va(ps->bo) + ps->bo_offset; - - radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B020_SPI_SHADER_PGM_LO_PS, 4); - radeon_emit(cmd_buffer->cs, va >> 8); - radeon_emit(cmd_buffer->cs, va >> 40); - radeon_emit(cmd_buffer->cs, ps->rsrc1); - radeon_emit(cmd_buffer->cs, ps->rsrc2); - - radeon_set_context_reg(cmd_buffer->cs, R_02880C_DB_SHADER_CONTROL, - pipeline->graphics.db_shader_control); - - radeon_set_context_reg(cmd_buffer->cs, R_0286CC_SPI_PS_INPUT_ENA, - ps->config.spi_ps_input_ena); - - radeon_set_context_reg(cmd_buffer->cs, R_0286D0_SPI_PS_INPUT_ADDR, - ps->config.spi_ps_input_addr); - - radeon_set_context_reg(cmd_buffer->cs, R_0286D8_SPI_PS_IN_CONTROL, - S_0286D8_NUM_INTERP(ps->info.fs.num_interp)); - - radeon_set_context_reg(cmd_buffer->cs, R_0286E0_SPI_BARYC_CNTL, pipeline->graphics.spi_baryc_cntl); - - radeon_set_context_reg(cmd_buffer->cs, R_028710_SPI_SHADER_Z_FORMAT, - pipeline->graphics.shader_z_format); - - radeon_set_context_reg(cmd_buffer->cs, R_028714_SPI_SHADER_COL_FORMAT, blend->spi_shader_col_format); - - radeon_set_context_reg(cmd_buffer->cs, R_028238_CB_TARGET_MASK, blend->cb_target_mask); - radeon_set_context_reg(cmd_buffer->cs, R_02823C_CB_SHADER_MASK, blend->cb_shader_mask); - - if (cmd_buffer->device->dfsm_allowed) { - /* optimise this? */ - radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); - radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0)); - } - - if (pipeline->graphics.ps_input_cntl_num) { - radeon_set_context_reg_seq(cmd_buffer->cs, R_028644_SPI_PS_INPUT_CNTL_0, pipeline->graphics.ps_input_cntl_num); - for (unsigned i = 0; i < pipeline->graphics.ps_input_cntl_num; i++) { - radeon_emit(cmd_buffer->cs, pipeline->graphics.ps_input_cntl[i]); - } - } -} - -static void -radv_emit_vgt_vertex_reuse(struct radv_cmd_buffer *cmd_buffer, - struct radv_pipeline *pipeline) -{ - struct radeon_winsys_cs *cs = cmd_buffer->cs; - - if (cmd_buffer->device->physical_device->rad_info.family < CHIP_POLARIS10) - return; - - radeon_set_context_reg(cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, - pipeline->graphics.vtx_reuse_depth); -} - -static void -radv_emit_binning_state(struct radv_cmd_buffer *cmd_buffer, - struct radv_pipeline *pipeline) -{ - struct radeon_winsys_cs *cs = cmd_buffer->cs; - - if (cmd_buffer->device->physical_device->rad_info.chip_class < GFX9) - return; - - radeon_set_context_reg(cs, R_028C44_PA_SC_BINNER_CNTL_0, - pipeline->graphics.bin.pa_sc_binner_cntl_0); - radeon_set_context_reg(cs, R_028060_DB_DFSM_CONTROL, - pipeline->graphics.bin.db_dfsm_control); -} - static void radv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer) { @@ -1082,40 +656,18 @@ radv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer) if (!pipeline || cmd_buffer->state.emitted_pipeline == pipeline) return; - radv_emit_graphics_depth_stencil_state(cmd_buffer, pipeline); - radv_emit_graphics_blend_state(cmd_buffer, pipeline); - radv_emit_graphics_raster_state(cmd_buffer, pipeline); radv_update_multisample_state(cmd_buffer, pipeline); - radv_emit_vertex_shader(cmd_buffer, pipeline); - radv_emit_tess_shaders(cmd_buffer, pipeline); - radv_emit_geometry_shader(cmd_buffer, pipeline); - radv_emit_fragment_shader(cmd_buffer, pipeline); - radv_emit_vgt_vertex_reuse(cmd_buffer, pipeline); - radv_emit_binning_state(cmd_buffer, pipeline); cmd_buffer->scratch_size_needed = MAX2(cmd_buffer->scratch_size_needed, pipeline->max_waves * pipeline->scratch_bytes_per_wave); - radeon_set_context_reg(cmd_buffer->cs, R_0286E8_SPI_TMPRING_SIZE, - S_0286E8_WAVES(pipeline->max_waves) | - S_0286E8_WAVESIZE(pipeline->scratch_bytes_per_wave >> 10)); - if (!cmd_buffer->state.emitted_pipeline || cmd_buffer->state.emitted_pipeline->graphics.can_use_guardband != pipeline->graphics.can_use_guardband) cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_SCISSOR; - radeon_set_context_reg(cmd_buffer->cs, R_028B54_VGT_SHADER_STAGES_EN, pipeline->graphics.vgt_shader_stages_en); - - if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) { - radeon_set_uconfig_reg_idx(cmd_buffer->cs, R_030908_VGT_PRIMITIVE_TYPE, 1, pipeline->graphics.prim); - } else { - radeon_set_config_reg(cmd_buffer->cs, R_008958_VGT_PRIMITIVE_TYPE, pipeline->graphics.prim); - } - radeon_set_context_reg(cmd_buffer->cs, R_028A6C_VGT_GS_OUT_PRIM_TYPE, pipeline->graphics.gs_out); - - radeon_set_context_reg(cmd_buffer->cs, R_02820C_PA_SC_CLIPRECT_RULE, pipeline->graphics.pa_sc_cliprect_rule); + radeon_emit_array(cmd_buffer->cs, pipeline->cs.buf, pipeline->cs.cdw); if (unlikely(cmd_buffer->device->trace_bo)) radv_save_pipeline(cmd_buffer, pipeline, RING_GFX); diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 6bdb1160cb2..1c5d994af1c 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -29,6 +29,7 @@ #include "util/u_atomic.h" #include "radv_debug.h" #include "radv_private.h" +#include "radv_cs.h" #include "radv_shader.h" #include "nir/nir.h" #include "nir/nir_builder.h" @@ -60,6 +61,8 @@ radv_pipeline_destroy(struct radv_device *device, if (pipeline->gs_copy_shader) radv_shader_variant_destroy(device, pipeline->gs_copy_shader); + if(pipeline->cs.buf) + free(pipeline->cs.buf); vk_free2(&device->alloc, allocator, pipeline); } @@ -2447,6 +2450,479 @@ radv_compute_binning_state(struct radv_pipeline *pipeline, const VkGraphicsPipel assert(!pipeline->device->dfsm_allowed); } + +static void +radv_pipeline_generate_depth_stencil_state(struct radeon_winsys_cs *cs, + struct radv_pipeline *pipeline) +{ + struct radv_depth_stencil_state *ds = &pipeline->graphics.ds; + radeon_set_context_reg(cs, R_028800_DB_DEPTH_CONTROL, ds->db_depth_control); + radeon_set_context_reg(cs, R_02842C_DB_STENCIL_CONTROL, ds->db_stencil_control); + + radeon_set_context_reg(cs, R_028000_DB_RENDER_CONTROL, ds->db_render_control); + radeon_set_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2, ds->db_render_override2); +} + +static void +radv_pipeline_generate_blend_state(struct radeon_winsys_cs *cs, + struct radv_pipeline *pipeline) +{ + radeon_set_context_reg_seq(cs, R_028780_CB_BLEND0_CONTROL, 8); + radeon_emit_array(cs, pipeline->graphics.blend.cb_blend_control, + 8); + radeon_set_context_reg(cs, R_028808_CB_COLOR_CONTROL, pipeline->graphics.blend.cb_color_control); + radeon_set_context_reg(cs, R_028B70_DB_ALPHA_TO_MASK, pipeline->graphics.blend.db_alpha_to_mask); + + if (pipeline->device->physical_device->has_rbplus) { + + radeon_set_context_reg_seq(cs, R_028760_SX_MRT0_BLEND_OPT, 8); + radeon_emit_array(cs, pipeline->graphics.blend.sx_mrt_blend_opt, 8); + + radeon_set_context_reg_seq(cs, R_028754_SX_PS_DOWNCONVERT, 3); + radeon_emit(cs, 0); /* R_028754_SX_PS_DOWNCONVERT */ + radeon_emit(cs, 0); /* R_028758_SX_BLEND_OPT_EPSILON */ + radeon_emit(cs, 0); /* R_02875C_SX_BLEND_OPT_CONTROL */ + } +} + + +static void +radv_pipeline_generate_raster_state(struct radeon_winsys_cs *cs, + struct radv_pipeline *pipeline) +{ + struct radv_raster_state *raster = &pipeline->graphics.raster; + + radeon_set_context_reg(cs, R_028810_PA_CL_CLIP_CNTL, + raster->pa_cl_clip_cntl); + radeon_set_context_reg(cs, R_0286D4_SPI_INTERP_CONTROL_0, + raster->spi_interp_control); + radeon_set_context_reg(cs, R_028BE4_PA_SU_VTX_CNTL, + raster->pa_su_vtx_cntl); + radeon_set_context_reg(cs, R_028814_PA_SU_SC_MODE_CNTL, + raster->pa_su_sc_mode_cntl); +} + + +static void +radv_pipeline_generate_multisample_state(struct radeon_winsys_cs *cs, + struct radv_pipeline *pipeline) +{ + struct radv_multisample_state *ms = &pipeline->graphics.ms; + + radeon_set_context_reg_seq(cs, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2); + radeon_emit(cs, ms->pa_sc_aa_mask[0]); + radeon_emit(cs, ms->pa_sc_aa_mask[1]); + + radeon_set_context_reg(cs, R_028804_DB_EQAA, ms->db_eqaa); + radeon_set_context_reg(cs, R_028A4C_PA_SC_MODE_CNTL_1, ms->pa_sc_mode_cntl_1); + + if (pipeline->shaders[MESA_SHADER_FRAGMENT]->info.info.ps.needs_sample_positions) { + uint32_t offset; + struct ac_userdata_info *loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_FRAGMENT, AC_UD_PS_SAMPLE_POS_OFFSET); + uint32_t base_reg = pipeline->user_data_0[MESA_SHADER_FRAGMENT]; + if (loc->sgpr_idx == -1) + return; + assert(loc->num_sgprs == 1); + assert(!loc->indirect); + switch (pipeline->graphics.ms.num_samples) { + default: + offset = 0; + break; + case 2: + offset = 1; + break; + case 4: + offset = 3; + break; + case 8: + offset = 7; + break; + case 16: + offset = 15; + break; + } + + radeon_set_sh_reg(cs, base_reg + loc->sgpr_idx * 4, offset); + } +} + +static void +radv_pipeline_generate_hw_vs(struct radeon_winsys_cs *cs, + struct radv_pipeline *pipeline, + struct radv_shader_variant *shader) +{ + uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset; + + radeon_set_context_reg(cs, R_0286C4_SPI_VS_OUT_CONFIG, + pipeline->graphics.vs.spi_vs_out_config); + + radeon_set_context_reg(cs, R_02870C_SPI_SHADER_POS_FORMAT, + pipeline->graphics.vs.spi_shader_pos_format); + + radeon_set_sh_reg_seq(cs, R_00B120_SPI_SHADER_PGM_LO_VS, 4); + radeon_emit(cs, va >> 8); + radeon_emit(cs, va >> 40); + radeon_emit(cs, shader->rsrc1); + radeon_emit(cs, shader->rsrc2); + + radeon_set_context_reg(cs, R_028818_PA_CL_VTE_CNTL, + S_028818_VTX_W0_FMT(1) | + S_028818_VPORT_X_SCALE_ENA(1) | S_028818_VPORT_X_OFFSET_ENA(1) | + S_028818_VPORT_Y_SCALE_ENA(1) | S_028818_VPORT_Y_OFFSET_ENA(1) | + S_028818_VPORT_Z_SCALE_ENA(1) | S_028818_VPORT_Z_OFFSET_ENA(1)); + + + radeon_set_context_reg(cs, R_02881C_PA_CL_VS_OUT_CNTL, + pipeline->graphics.vs.pa_cl_vs_out_cntl); + + if (pipeline->device->physical_device->rad_info.chip_class <= VI) + radeon_set_context_reg(cs, R_028AB4_VGT_REUSE_OFF, + pipeline->graphics.vs.vgt_reuse_off); +} + +static void +radv_pipeline_generate_hw_es(struct radeon_winsys_cs *cs, + struct radv_pipeline *pipeline, + struct radv_shader_variant *shader) +{ + uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset; + + radeon_set_sh_reg_seq(cs, R_00B320_SPI_SHADER_PGM_LO_ES, 4); + radeon_emit(cs, va >> 8); + radeon_emit(cs, va >> 40); + radeon_emit(cs, shader->rsrc1); + radeon_emit(cs, shader->rsrc2); +} + +static void +radv_pipeline_generate_hw_ls(struct radeon_winsys_cs *cs, + struct radv_pipeline *pipeline, + struct radv_shader_variant *shader) +{ + uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset; + uint32_t rsrc2 = shader->rsrc2; + + radeon_set_sh_reg_seq(cs, R_00B520_SPI_SHADER_PGM_LO_LS, 2); + radeon_emit(cs, va >> 8); + radeon_emit(cs, va >> 40); + + rsrc2 |= S_00B52C_LDS_SIZE(pipeline->graphics.tess.lds_size); + if (pipeline->device->physical_device->rad_info.chip_class == CIK && + pipeline->device->physical_device->rad_info.family != CHIP_HAWAII) + radeon_set_sh_reg(cs, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, rsrc2); + + radeon_set_sh_reg_seq(cs, R_00B528_SPI_SHADER_PGM_RSRC1_LS, 2); + radeon_emit(cs, shader->rsrc1); + radeon_emit(cs, rsrc2); +} + +static void +radv_pipeline_generate_hw_hs(struct radeon_winsys_cs *cs, + struct radv_pipeline *pipeline, + struct radv_shader_variant *shader) +{ + uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset; + + if (pipeline->device->physical_device->rad_info.chip_class >= GFX9) { + radeon_set_sh_reg_seq(cs, R_00B410_SPI_SHADER_PGM_LO_LS, 2); + radeon_emit(cs, va >> 8); + radeon_emit(cs, va >> 40); + + radeon_set_sh_reg_seq(cs, R_00B428_SPI_SHADER_PGM_RSRC1_HS, 2); + radeon_emit(cs, shader->rsrc1); + radeon_emit(cs, shader->rsrc2 | + S_00B42C_LDS_SIZE(pipeline->graphics.tess.lds_size)); + } else { + radeon_set_sh_reg_seq(cs, R_00B420_SPI_SHADER_PGM_LO_HS, 4); + radeon_emit(cs, va >> 8); + radeon_emit(cs, va >> 40); + radeon_emit(cs, shader->rsrc1); + radeon_emit(cs, shader->rsrc2); + } +} + +static void +radv_pipeline_generate_vertex_shader(struct radeon_winsys_cs *cs, + struct radv_pipeline *pipeline) +{ + struct radv_shader_variant *vs; + + radeon_set_context_reg(cs, R_028A84_VGT_PRIMITIVEID_EN, pipeline->graphics.vgt_primitiveid_en); + + /* Skip shaders merged into HS/GS */ + vs = pipeline->shaders[MESA_SHADER_VERTEX]; + if (!vs) + return; + + if (vs->info.vs.as_ls) + radv_pipeline_generate_hw_ls(cs, pipeline, vs); + else if (vs->info.vs.as_es) + radv_pipeline_generate_hw_es(cs, pipeline, vs); + else + radv_pipeline_generate_hw_vs(cs, pipeline, vs); +} + +static void +radv_pipeline_generate_tess_shaders(struct radeon_winsys_cs *cs, + struct radv_pipeline *pipeline) +{ + if (!radv_pipeline_has_tess(pipeline)) + return; + + struct radv_shader_variant *tes, *tcs; + + tcs = pipeline->shaders[MESA_SHADER_TESS_CTRL]; + tes = pipeline->shaders[MESA_SHADER_TESS_EVAL]; + + if (tes) { + if (tes->info.tes.as_es) + radv_pipeline_generate_hw_es(cs, pipeline, tes); + else + radv_pipeline_generate_hw_vs(cs, pipeline, tes); + } + + radv_pipeline_generate_hw_hs(cs, pipeline, tcs); + + radeon_set_context_reg(cs, R_028B6C_VGT_TF_PARAM, + pipeline->graphics.tess.tf_param); + + if (pipeline->device->physical_device->rad_info.chip_class >= CIK) + radeon_set_context_reg_idx(cs, R_028B58_VGT_LS_HS_CONFIG, 2, + pipeline->graphics.tess.ls_hs_config); + else + radeon_set_context_reg(cs, R_028B58_VGT_LS_HS_CONFIG, + pipeline->graphics.tess.ls_hs_config); + + struct ac_userdata_info *loc; + + loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_TESS_CTRL, AC_UD_TCS_OFFCHIP_LAYOUT); + if (loc->sgpr_idx != -1) { + uint32_t base_reg = pipeline->user_data_0[MESA_SHADER_TESS_CTRL]; + assert(loc->num_sgprs == 4); + assert(!loc->indirect); + radeon_set_sh_reg_seq(cs, base_reg + loc->sgpr_idx * 4, 4); + radeon_emit(cs, pipeline->graphics.tess.offchip_layout); + radeon_emit(cs, pipeline->graphics.tess.tcs_out_offsets); + radeon_emit(cs, pipeline->graphics.tess.tcs_out_layout | + pipeline->graphics.tess.num_tcs_input_cp << 26); + radeon_emit(cs, pipeline->graphics.tess.tcs_in_layout); + } + + loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_TESS_EVAL, AC_UD_TES_OFFCHIP_LAYOUT); + if (loc->sgpr_idx != -1) { + uint32_t base_reg = pipeline->user_data_0[MESA_SHADER_TESS_EVAL]; + assert(loc->num_sgprs == 1); + assert(!loc->indirect); + + radeon_set_sh_reg(cs, base_reg + loc->sgpr_idx * 4, + pipeline->graphics.tess.offchip_layout); + } + + loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_VERTEX, AC_UD_VS_LS_TCS_IN_LAYOUT); + if (loc->sgpr_idx != -1) { + uint32_t base_reg = pipeline->user_data_0[MESA_SHADER_VERTEX]; + assert(loc->num_sgprs == 1); + assert(!loc->indirect); + + radeon_set_sh_reg(cs, base_reg + loc->sgpr_idx * 4, + pipeline->graphics.tess.tcs_in_layout); + } +} + +static void +radv_pipeline_generate_geometry_shader(struct radeon_winsys_cs *cs, + struct radv_pipeline *pipeline) +{ + struct radv_shader_variant *gs; + uint64_t va; + + radeon_set_context_reg(cs, R_028A40_VGT_GS_MODE, pipeline->graphics.vgt_gs_mode); + + gs = pipeline->shaders[MESA_SHADER_GEOMETRY]; + if (!gs) + return; + + uint32_t gsvs_itemsize = gs->info.gs.max_gsvs_emit_size >> 2; + + radeon_set_context_reg_seq(cs, R_028A60_VGT_GSVS_RING_OFFSET_1, 3); + radeon_emit(cs, gsvs_itemsize); + radeon_emit(cs, gsvs_itemsize); + radeon_emit(cs, gsvs_itemsize); + + radeon_set_context_reg(cs, R_028AB0_VGT_GSVS_RING_ITEMSIZE, gsvs_itemsize); + + radeon_set_context_reg(cs, R_028B38_VGT_GS_MAX_VERT_OUT, gs->info.gs.vertices_out); + + uint32_t gs_vert_itemsize = gs->info.gs.gsvs_vertex_size; + radeon_set_context_reg_seq(cs, R_028B5C_VGT_GS_VERT_ITEMSIZE, 4); + radeon_emit(cs, gs_vert_itemsize >> 2); + radeon_emit(cs, 0); + radeon_emit(cs, 0); + radeon_emit(cs, 0); + + uint32_t gs_num_invocations = gs->info.gs.invocations; + radeon_set_context_reg(cs, R_028B90_VGT_GS_INSTANCE_CNT, + S_028B90_CNT(MIN2(gs_num_invocations, 127)) | + S_028B90_ENABLE(gs_num_invocations > 0)); + + radeon_set_context_reg(cs, R_028AAC_VGT_ESGS_RING_ITEMSIZE, + pipeline->graphics.gs.vgt_esgs_ring_itemsize); + + va = radv_buffer_get_va(gs->bo) + gs->bo_offset; + + if (pipeline->device->physical_device->rad_info.chip_class >= GFX9) { + radeon_set_sh_reg_seq(cs, R_00B210_SPI_SHADER_PGM_LO_ES, 2); + radeon_emit(cs, va >> 8); + radeon_emit(cs, va >> 40); + + radeon_set_sh_reg_seq(cs, R_00B228_SPI_SHADER_PGM_RSRC1_GS, 2); + radeon_emit(cs, gs->rsrc1); + radeon_emit(cs, gs->rsrc2 | + S_00B22C_LDS_SIZE(pipeline->graphics.gs.lds_size)); + + radeon_set_context_reg(cs, R_028A44_VGT_GS_ONCHIP_CNTL, pipeline->graphics.gs.vgt_gs_onchip_cntl); + radeon_set_context_reg(cs, R_028A94_VGT_GS_MAX_PRIMS_PER_SUBGROUP, pipeline->graphics.gs.vgt_gs_max_prims_per_subgroup); + } else { + radeon_set_sh_reg_seq(cs, R_00B220_SPI_SHADER_PGM_LO_GS, 4); + radeon_emit(cs, va >> 8); + radeon_emit(cs, va >> 40); + radeon_emit(cs, gs->rsrc1); + radeon_emit(cs, gs->rsrc2); + } + + radv_pipeline_generate_hw_vs(cs, pipeline, pipeline->gs_copy_shader); + + struct ac_userdata_info *loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_GEOMETRY, + AC_UD_GS_VS_RING_STRIDE_ENTRIES); + if (loc->sgpr_idx != -1) { + uint32_t stride = gs->info.gs.max_gsvs_emit_size; + uint32_t num_entries = 64; + bool is_vi = pipeline->device->physical_device->rad_info.chip_class >= VI; + + if (is_vi) + num_entries *= stride; + + stride = S_008F04_STRIDE(stride); + radeon_set_sh_reg_seq(cs, R_00B230_SPI_SHADER_USER_DATA_GS_0 + loc->sgpr_idx * 4, 2); + radeon_emit(cs, stride); + radeon_emit(cs, num_entries); + } +} + + +static void +radv_pipeline_generate_fragment_shader(struct radeon_winsys_cs *cs, + struct radv_pipeline *pipeline) +{ + struct radv_shader_variant *ps; + uint64_t va; + struct radv_blend_state *blend = &pipeline->graphics.blend; + assert (pipeline->shaders[MESA_SHADER_FRAGMENT]); + + ps = pipeline->shaders[MESA_SHADER_FRAGMENT]; + va = radv_buffer_get_va(ps->bo) + ps->bo_offset; + + radeon_set_sh_reg_seq(cs, R_00B020_SPI_SHADER_PGM_LO_PS, 4); + radeon_emit(cs, va >> 8); + radeon_emit(cs, va >> 40); + radeon_emit(cs, ps->rsrc1); + radeon_emit(cs, ps->rsrc2); + + radeon_set_context_reg(cs, R_02880C_DB_SHADER_CONTROL, + pipeline->graphics.db_shader_control); + + radeon_set_context_reg(cs, R_0286CC_SPI_PS_INPUT_ENA, + ps->config.spi_ps_input_ena); + + radeon_set_context_reg(cs, R_0286D0_SPI_PS_INPUT_ADDR, + ps->config.spi_ps_input_addr); + + radeon_set_context_reg(cs, R_0286D8_SPI_PS_IN_CONTROL, + S_0286D8_NUM_INTERP(ps->info.fs.num_interp)); + + radeon_set_context_reg(cs, R_0286E0_SPI_BARYC_CNTL, pipeline->graphics.spi_baryc_cntl); + + radeon_set_context_reg(cs, R_028710_SPI_SHADER_Z_FORMAT, + pipeline->graphics.shader_z_format); + + radeon_set_context_reg(cs, R_028714_SPI_SHADER_COL_FORMAT, blend->spi_shader_col_format); + + radeon_set_context_reg(cs, R_028238_CB_TARGET_MASK, blend->cb_target_mask); + radeon_set_context_reg(cs, R_02823C_CB_SHADER_MASK, blend->cb_shader_mask); + + if (pipeline->device->dfsm_allowed) { + /* optimise this? */ + radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); + radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0)); + } + + if (pipeline->graphics.ps_input_cntl_num) { + radeon_set_context_reg_seq(cs, R_028644_SPI_PS_INPUT_CNTL_0, pipeline->graphics.ps_input_cntl_num); + for (unsigned i = 0; i < pipeline->graphics.ps_input_cntl_num; i++) { + radeon_emit(cs, pipeline->graphics.ps_input_cntl[i]); + } + } +} + +static void +radv_pipeline_generate_vgt_vertex_reuse(struct radeon_winsys_cs *cs, + struct radv_pipeline *pipeline) +{ + if (pipeline->device->physical_device->rad_info.family < CHIP_POLARIS10) + return; + + radeon_set_context_reg(cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, + pipeline->graphics.vtx_reuse_depth); +} + +static void +radv_pipeline_generate_binning_state(struct radeon_winsys_cs *cs, + struct radv_pipeline *pipeline) +{ + if (pipeline->device->physical_device->rad_info.chip_class < GFX9) + return; + + radeon_set_context_reg(cs, R_028C44_PA_SC_BINNER_CNTL_0, + pipeline->graphics.bin.pa_sc_binner_cntl_0); + radeon_set_context_reg(cs, R_028060_DB_DFSM_CONTROL, + pipeline->graphics.bin.db_dfsm_control); +} + +static void +radv_pipeline_generate_pm4(struct radv_pipeline *pipeline) +{ + pipeline->cs.buf = malloc(4 * 256); + pipeline->cs.max_dw = 256; + + radv_pipeline_generate_depth_stencil_state(&pipeline->cs, pipeline); + radv_pipeline_generate_blend_state(&pipeline->cs, pipeline); + radv_pipeline_generate_raster_state(&pipeline->cs, pipeline); + radv_pipeline_generate_multisample_state(&pipeline->cs, pipeline); + radv_pipeline_generate_vertex_shader(&pipeline->cs, pipeline); + radv_pipeline_generate_tess_shaders(&pipeline->cs, pipeline); + radv_pipeline_generate_geometry_shader(&pipeline->cs, pipeline); + radv_pipeline_generate_fragment_shader(&pipeline->cs, pipeline); + radv_pipeline_generate_vgt_vertex_reuse(&pipeline->cs, pipeline); + radv_pipeline_generate_binning_state(&pipeline->cs, pipeline); + + radeon_set_context_reg(&pipeline->cs, R_0286E8_SPI_TMPRING_SIZE, + S_0286E8_WAVES(pipeline->max_waves) | + S_0286E8_WAVESIZE(pipeline->scratch_bytes_per_wave >> 10)); + + radeon_set_context_reg(&pipeline->cs, R_028B54_VGT_SHADER_STAGES_EN, pipeline->graphics.vgt_shader_stages_en); + + if (pipeline->device->physical_device->rad_info.chip_class >= CIK) { + radeon_set_uconfig_reg_idx(&pipeline->cs, R_030908_VGT_PRIMITIVE_TYPE, 1, pipeline->graphics.prim); + } else { + radeon_set_config_reg(&pipeline->cs, R_008958_VGT_PRIMITIVE_TYPE, pipeline->graphics.prim); + } + radeon_set_context_reg(&pipeline->cs, R_028A6C_VGT_GS_OUT_PRIM_TYPE, pipeline->graphics.gs_out); + + radeon_set_context_reg(&pipeline->cs, R_02820C_PA_SC_CLIPRECT_RULE, pipeline->graphics.pa_sc_cliprect_rule); + + assert(pipeline->cs.cdw <= pipeline->cs.max_dw); +} + static VkResult radv_pipeline_init(struct radv_pipeline *pipeline, struct radv_device *device, @@ -2739,6 +3215,8 @@ radv_pipeline_init(struct radv_pipeline *pipeline, radv_compute_binning_state(pipeline, pCreateInfo); result = radv_pipeline_scratch_init(device, pipeline); + radv_pipeline_generate_pm4(pipeline); + return result; } diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 29c885a26ef..cf8cf5fd4e7 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -1222,6 +1222,8 @@ struct radv_pipeline { struct radv_shader_variant *gs_copy_shader; VkShaderStageFlags active_stages; + struct radeon_winsys_cs cs; + struct radv_vertex_elements_info vertex_elements; uint32_t binding_stride[MAX_VBS]; -- 2.30.2