+ VkExtent2D extent = {512, 512};
+
+ unsigned log_num_rb_per_se =
+ util_logbase2_ceil(pipeline->device->physical_device->rad_info.num_render_backends /
+ pipeline->device->physical_device->rad_info.max_se);
+ unsigned log_num_se = util_logbase2_ceil(pipeline->device->physical_device->rad_info.max_se);
+
+ unsigned total_samples = 1u << G_028BE0_MSAA_NUM_SAMPLES(pipeline->graphics.ms.pa_sc_mode_cntl_1);
+ unsigned ps_iter_samples = 1u << G_028804_PS_ITER_SAMPLES(pipeline->graphics.ms.db_eqaa);
+ unsigned effective_samples = total_samples;
+ unsigned color_bytes_per_pixel = 0;
+
+ const VkPipelineColorBlendStateCreateInfo *vkblend = pCreateInfo->pColorBlendState;
+ if (vkblend) {
+ for (unsigned i = 0; i < subpass->color_count; i++) {
+ if (!vkblend->pAttachments[i].colorWriteMask)
+ continue;
+
+ if (subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED)
+ continue;
+
+ VkFormat format = pass->attachments[subpass->color_attachments[i].attachment].format;
+ color_bytes_per_pixel += vk_format_get_blocksize(format);
+ }
+
+ /* MSAA images typically don't use all samples all the time. */
+ if (effective_samples >= 2 && ps_iter_samples <= 1)
+ effective_samples = 2;
+ color_bytes_per_pixel *= effective_samples;
+ }
+
+ const struct radv_bin_size_entry *color_entry = color_size_table[log_num_rb_per_se][log_num_se];
+ while(color_entry->bpp <= color_bytes_per_pixel)
+ ++color_entry;
+
+ extent = color_entry->extent;
+
+ if (subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) {
+ struct radv_render_pass_attachment *attachment = pass->attachments + subpass->depth_stencil_attachment.attachment;
+
+ /* Coefficients taken from AMDVLK */
+ unsigned depth_coeff = vk_format_is_depth(attachment->format) ? 5 : 0;
+ unsigned stencil_coeff = vk_format_is_stencil(attachment->format) ? 1 : 0;
+ unsigned ds_bytes_per_pixel = 4 * (depth_coeff + stencil_coeff) * total_samples;
+
+ const struct radv_bin_size_entry *ds_entry = ds_size_table[log_num_rb_per_se][log_num_se];
+ while(ds_entry->bpp <= ds_bytes_per_pixel)
+ ++ds_entry;
+
+ extent.width = MIN2(extent.width, ds_entry->extent.width);
+ extent.height = MIN2(extent.height, ds_entry->extent.height);
+ }
+
+ return extent;
+}
+
+static void
+radv_pipeline_generate_binning_state(struct radeon_winsys_cs *cs,
+ struct radv_pipeline *pipeline,
+ const VkGraphicsPipelineCreateInfo *pCreateInfo)
+{
+ if (pipeline->device->physical_device->rad_info.chip_class < GFX9)
+ return;
+
+ uint32_t pa_sc_binner_cntl_0 =
+ S_028C44_BINNING_MODE(V_028C44_DISABLE_BINNING_USE_LEGACY_SC) |
+ S_028C44_DISABLE_START_OF_PRIM(1);
+ uint32_t db_dfsm_control = S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF);
+
+ VkExtent2D bin_size = radv_compute_bin_size(pipeline, pCreateInfo);
+
+ unsigned context_states_per_bin; /* allowed range: [1, 6] */
+ unsigned persistent_states_per_bin; /* allowed range: [1, 32] */
+ unsigned fpovs_per_batch; /* allowed range: [0, 255], 0 = unlimited */
+
+ switch (pipeline->device->physical_device->rad_info.family) {
+ case CHIP_VEGA10:
+ case CHIP_VEGA12:
+ context_states_per_bin = 1;
+ persistent_states_per_bin = 1;
+ fpovs_per_batch = 63;
+ break;
+ case CHIP_RAVEN:
+ context_states_per_bin = 6;
+ persistent_states_per_bin = 32;
+ fpovs_per_batch = 63;
+ break;
+ default:
+ unreachable("unhandled family while determining binning state.");
+ }
+
+ if (pipeline->device->pbb_allowed && bin_size.width && bin_size.height) {
+ pa_sc_binner_cntl_0 =
+ S_028C44_BINNING_MODE(V_028C44_BINNING_ALLOWED) |
+ S_028C44_BIN_SIZE_X(bin_size.width == 16) |
+ S_028C44_BIN_SIZE_Y(bin_size.height == 16) |
+ S_028C44_BIN_SIZE_X_EXTEND(util_logbase2(MAX2(bin_size.width, 32)) - 5) |
+ S_028C44_BIN_SIZE_Y_EXTEND(util_logbase2(MAX2(bin_size.height, 32)) - 5) |
+ S_028C44_CONTEXT_STATES_PER_BIN(context_states_per_bin - 1) |
+ S_028C44_PERSISTENT_STATES_PER_BIN(persistent_states_per_bin - 1) |
+ S_028C44_DISABLE_START_OF_PRIM(1) |
+ S_028C44_FPOVS_PER_BATCH(fpovs_per_batch) |
+ S_028C44_OPTIMAL_BIN_SELECTION(1);
+ }
+
+ radeon_set_context_reg(cs, R_028C44_PA_SC_BINNER_CNTL_0,
+ pa_sc_binner_cntl_0);
+ radeon_set_context_reg(cs, R_028060_DB_DFSM_CONTROL,
+ db_dfsm_control);
+}
+
+
+static void
+radv_pipeline_generate_depth_stencil_state(struct radeon_winsys_cs *cs,
+ struct radv_pipeline *pipeline,
+ const VkGraphicsPipelineCreateInfo *pCreateInfo,
+ const struct radv_graphics_pipeline_create_info *extra)
+{
+ const VkPipelineDepthStencilStateCreateInfo *vkds = pCreateInfo->pDepthStencilState;
+ RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
+ struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
+ struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
+ struct radv_render_pass_attachment *attachment = NULL;
+ uint32_t db_depth_control = 0, db_stencil_control = 0;
+ uint32_t db_render_control = 0, db_render_override2 = 0;
+ uint32_t db_render_override = 0;
+
+ if (subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED)
+ attachment = pass->attachments + subpass->depth_stencil_attachment.attachment;
+
+ bool has_depth_attachment = attachment && vk_format_is_depth(attachment->format);
+ bool has_stencil_attachment = attachment && vk_format_is_stencil(attachment->format);
+
+ if (vkds && has_depth_attachment) {
+ db_depth_control = S_028800_Z_ENABLE(vkds->depthTestEnable ? 1 : 0) |
+ S_028800_Z_WRITE_ENABLE(vkds->depthWriteEnable ? 1 : 0) |
+ S_028800_ZFUNC(vkds->depthCompareOp) |
+ S_028800_DEPTH_BOUNDS_ENABLE(vkds->depthBoundsTestEnable ? 1 : 0);
+
+ /* from amdvlk: For 4xAA and 8xAA need to decompress on flush for better performance */
+ db_render_override2 |= S_028010_DECOMPRESS_Z_ON_FLUSH(attachment->samples > 2);
+ }
+
+ if (has_stencil_attachment && vkds && vkds->stencilTestEnable) {
+ db_depth_control |= S_028800_STENCIL_ENABLE(1) | S_028800_BACKFACE_ENABLE(1);
+ db_depth_control |= S_028800_STENCILFUNC(vkds->front.compareOp);
+ db_stencil_control |= S_02842C_STENCILFAIL(si_translate_stencil_op(vkds->front.failOp));
+ db_stencil_control |= S_02842C_STENCILZPASS(si_translate_stencil_op(vkds->front.passOp));
+ db_stencil_control |= S_02842C_STENCILZFAIL(si_translate_stencil_op(vkds->front.depthFailOp));
+
+ db_depth_control |= S_028800_STENCILFUNC_BF(vkds->back.compareOp);
+ db_stencil_control |= S_02842C_STENCILFAIL_BF(si_translate_stencil_op(vkds->back.failOp));
+ db_stencil_control |= S_02842C_STENCILZPASS_BF(si_translate_stencil_op(vkds->back.passOp));
+ db_stencil_control |= S_02842C_STENCILZFAIL_BF(si_translate_stencil_op(vkds->back.depthFailOp));
+ }
+
+ if (attachment && extra) {
+ db_render_control |= S_028000_DEPTH_CLEAR_ENABLE(extra->db_depth_clear);
+ db_render_control |= S_028000_STENCIL_CLEAR_ENABLE(extra->db_stencil_clear);
+
+ db_render_control |= S_028000_RESUMMARIZE_ENABLE(extra->db_resummarize);
+ db_render_control |= S_028000_DEPTH_COMPRESS_DISABLE(extra->db_flush_depth_inplace);
+ db_render_control |= S_028000_STENCIL_COMPRESS_DISABLE(extra->db_flush_stencil_inplace);
+ db_render_override2 |= S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(extra->db_depth_disable_expclear);
+ db_render_override2 |= S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(extra->db_stencil_disable_expclear);
+ }
+
+ db_render_override |= S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) |
+ S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE);
+
+ if (pipeline->device->enabled_extensions.EXT_depth_range_unrestricted &&
+ !pCreateInfo->pRasterizationState->depthClampEnable &&
+ ps->info.info.ps.writes_z) {
+ /* From VK_EXT_depth_range_unrestricted spec:
+ *
+ * "The behavior described in Primitive Clipping still applies.
+ * If depth clamping is disabled the depth values are still
+ * clipped to 0 ≤ zc ≤ wc before the viewport transform. If
+ * depth clamping is enabled the above equation is ignored and
+ * the depth values are instead clamped to the VkViewport
+ * minDepth and maxDepth values, which in the case of this
+ * extension can be outside of the 0.0 to 1.0 range."
+ */
+ db_render_override |= S_02800C_DISABLE_VIEWPORT_CLAMP(1);
+ }
+
+ radeon_set_context_reg(cs, R_028800_DB_DEPTH_CONTROL, db_depth_control);
+ radeon_set_context_reg(cs, R_02842C_DB_STENCIL_CONTROL, db_stencil_control);
+
+ radeon_set_context_reg(cs, R_028000_DB_RENDER_CONTROL, db_render_control);
+ radeon_set_context_reg(cs, R_02800C_DB_RENDER_OVERRIDE, db_render_override);
+ radeon_set_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2, db_render_override2);
+}
+
+static void
+radv_pipeline_generate_blend_state(struct radeon_winsys_cs *cs,
+ struct radv_pipeline *pipeline,
+ const struct radv_blend_state *blend)
+{
+ radeon_set_context_reg_seq(cs, R_028780_CB_BLEND0_CONTROL, 8);
+ radeon_emit_array(cs, blend->cb_blend_control,
+ 8);
+ radeon_set_context_reg(cs, R_028808_CB_COLOR_CONTROL, blend->cb_color_control);
+ radeon_set_context_reg(cs, R_028B70_DB_ALPHA_TO_MASK, blend->db_alpha_to_mask);
+
+ if (pipeline->device->physical_device->has_rbplus) {
+
+ radeon_set_context_reg_seq(cs, R_028760_SX_MRT0_BLEND_OPT, 8);
+ radeon_emit_array(cs, blend->sx_mrt_blend_opt, 8);
+ }
+
+ radeon_set_context_reg(cs, R_028714_SPI_SHADER_COL_FORMAT, blend->spi_shader_col_format);
+
+ radeon_set_context_reg(cs, R_028238_CB_TARGET_MASK, blend->cb_target_mask);
+ radeon_set_context_reg(cs, R_02823C_CB_SHADER_MASK, blend->cb_shader_mask);
+
+ pipeline->graphics.col_format = blend->spi_shader_col_format;
+ pipeline->graphics.cb_target_mask = blend->cb_target_mask;
+}
+
+
+static void
+radv_pipeline_generate_raster_state(struct radeon_winsys_cs *cs,
+ const VkGraphicsPipelineCreateInfo *pCreateInfo)
+{
+ const VkPipelineRasterizationStateCreateInfo *vkraster = pCreateInfo->pRasterizationState;
+
+ radeon_set_context_reg(cs, R_028810_PA_CL_CLIP_CNTL,
+ S_028810_PS_UCP_MODE(3) |
+ S_028810_DX_CLIP_SPACE_DEF(1) | // vulkan uses DX conventions.
+ S_028810_ZCLIP_NEAR_DISABLE(vkraster->depthClampEnable ? 1 : 0) |
+ S_028810_ZCLIP_FAR_DISABLE(vkraster->depthClampEnable ? 1 : 0) |
+ S_028810_DX_RASTERIZATION_KILL(vkraster->rasterizerDiscardEnable ? 1 : 0) |
+ S_028810_DX_LINEAR_ATTR_CLIP_ENA(1));
+
+ radeon_set_context_reg(cs, R_0286D4_SPI_INTERP_CONTROL_0,
+ S_0286D4_FLAT_SHADE_ENA(1) |
+ S_0286D4_PNT_SPRITE_ENA(1) |
+ S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) |
+ S_0286D4_PNT_SPRITE_OVRD_Y(V_0286D4_SPI_PNT_SPRITE_SEL_T) |
+ S_0286D4_PNT_SPRITE_OVRD_Z(V_0286D4_SPI_PNT_SPRITE_SEL_0) |
+ S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1) |
+ S_0286D4_PNT_SPRITE_TOP_1(0)); /* vulkan is top to bottom - 1.0 at bottom */
+
+ radeon_set_context_reg(cs, R_028BE4_PA_SU_VTX_CNTL,
+ S_028BE4_PIX_CENTER(1) | // TODO verify
+ S_028BE4_ROUND_MODE(V_028BE4_X_ROUND_TO_EVEN) |
+ S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH));
+
+ radeon_set_context_reg(cs, R_028814_PA_SU_SC_MODE_CNTL,
+ S_028814_FACE(vkraster->frontFace) |
+ S_028814_CULL_FRONT(!!(vkraster->cullMode & VK_CULL_MODE_FRONT_BIT)) |
+ S_028814_CULL_BACK(!!(vkraster->cullMode & VK_CULL_MODE_BACK_BIT)) |
+ S_028814_POLY_MODE(vkraster->polygonMode != VK_POLYGON_MODE_FILL) |
+ S_028814_POLYMODE_FRONT_PTYPE(si_translate_fill(vkraster->polygonMode)) |
+ S_028814_POLYMODE_BACK_PTYPE(si_translate_fill(vkraster->polygonMode)) |
+ S_028814_POLY_OFFSET_FRONT_ENABLE(vkraster->depthBiasEnable ? 1 : 0) |
+ S_028814_POLY_OFFSET_BACK_ENABLE(vkraster->depthBiasEnable ? 1 : 0) |
+ S_028814_POLY_OFFSET_PARA_ENABLE(vkraster->depthBiasEnable ? 1 : 0));
+}
+
+
+static void
+radv_pipeline_generate_multisample_state(struct radeon_winsys_cs *cs,
+ struct radv_pipeline *pipeline)
+{
+ struct radv_multisample_state *ms = &pipeline->graphics.ms;
+
+ radeon_set_context_reg_seq(cs, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2);
+ radeon_emit(cs, ms->pa_sc_aa_mask[0]);
+ radeon_emit(cs, ms->pa_sc_aa_mask[1]);
+
+ radeon_set_context_reg(cs, R_028804_DB_EQAA, ms->db_eqaa);
+ radeon_set_context_reg(cs, R_028A4C_PA_SC_MODE_CNTL_1, ms->pa_sc_mode_cntl_1);
+
+ if (pipeline->shaders[MESA_SHADER_FRAGMENT]->info.info.ps.needs_sample_positions) {
+ uint32_t offset;
+ struct radv_userdata_info *loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_FRAGMENT, AC_UD_PS_SAMPLE_POS_OFFSET);
+ uint32_t base_reg = pipeline->user_data_0[MESA_SHADER_FRAGMENT];
+ if (loc->sgpr_idx == -1)
+ return;
+ assert(loc->num_sgprs == 1);
+ assert(!loc->indirect);
+ switch (pipeline->graphics.ms.num_samples) {
+ default:
+ offset = 0;
+ break;
+ case 2:
+ offset = 1;
+ break;
+ case 4:
+ offset = 3;
+ break;
+ case 8:
+ offset = 7;
+ break;
+ case 16:
+ offset = 15;
+ break;
+ }
+
+ radeon_set_sh_reg(cs, base_reg + loc->sgpr_idx * 4, offset);
+ }
+}
+
+static void
+radv_pipeline_generate_vgt_gs_mode(struct radeon_winsys_cs *cs,
+ const struct radv_pipeline *pipeline)
+{
+ const struct radv_vs_output_info *outinfo = get_vs_output_info(pipeline);
+
+ uint32_t vgt_primitiveid_en = false;
+ uint32_t vgt_gs_mode = 0;
+
+ if (radv_pipeline_has_gs(pipeline)) {
+ const struct radv_shader_variant *gs =
+ pipeline->shaders[MESA_SHADER_GEOMETRY];
+
+ vgt_gs_mode = ac_vgt_gs_mode(gs->info.gs.vertices_out,
+ pipeline->device->physical_device->rad_info.chip_class);
+ } else if (outinfo->export_prim_id) {
+ vgt_gs_mode = S_028A40_MODE(V_028A40_GS_SCENARIO_A);
+ vgt_primitiveid_en = true;
+ }
+
+ radeon_set_context_reg(cs, R_028A84_VGT_PRIMITIVEID_EN, vgt_primitiveid_en);
+ radeon_set_context_reg(cs, R_028A40_VGT_GS_MODE, vgt_gs_mode);
+}
+
+static void
+radv_pipeline_generate_hw_vs(struct radeon_winsys_cs *cs,
+ struct radv_pipeline *pipeline,
+ struct radv_shader_variant *shader)
+{
+ uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
+
+ radeon_set_sh_reg_seq(cs, R_00B120_SPI_SHADER_PGM_LO_VS, 4);
+ radeon_emit(cs, va >> 8);
+ radeon_emit(cs, S_00B124_MEM_BASE(va >> 40));
+ radeon_emit(cs, shader->rsrc1);
+ radeon_emit(cs, shader->rsrc2);
+
+ const struct radv_vs_output_info *outinfo = get_vs_output_info(pipeline);
+ unsigned clip_dist_mask, cull_dist_mask, total_mask;
+ clip_dist_mask = outinfo->clip_dist_mask;
+ cull_dist_mask = outinfo->cull_dist_mask;
+ total_mask = clip_dist_mask | cull_dist_mask;
+ bool misc_vec_ena = outinfo->writes_pointsize ||
+ outinfo->writes_layer ||
+ outinfo->writes_viewport_index;
+
+ radeon_set_context_reg(cs, R_0286C4_SPI_VS_OUT_CONFIG,
+ S_0286C4_VS_EXPORT_COUNT(MAX2(1, outinfo->param_exports) - 1));
+
+ radeon_set_context_reg(cs, R_02870C_SPI_SHADER_POS_FORMAT,
+ S_02870C_POS0_EXPORT_FORMAT(V_02870C_SPI_SHADER_4COMP) |
+ S_02870C_POS1_EXPORT_FORMAT(outinfo->pos_exports > 1 ?
+ V_02870C_SPI_SHADER_4COMP :
+ V_02870C_SPI_SHADER_NONE) |
+ S_02870C_POS2_EXPORT_FORMAT(outinfo->pos_exports > 2 ?
+ V_02870C_SPI_SHADER_4COMP :
+ V_02870C_SPI_SHADER_NONE) |
+ S_02870C_POS3_EXPORT_FORMAT(outinfo->pos_exports > 3 ?
+ V_02870C_SPI_SHADER_4COMP :
+ V_02870C_SPI_SHADER_NONE));
+
+ radeon_set_context_reg(cs, R_028818_PA_CL_VTE_CNTL,
+ S_028818_VTX_W0_FMT(1) |
+ S_028818_VPORT_X_SCALE_ENA(1) | S_028818_VPORT_X_OFFSET_ENA(1) |
+ S_028818_VPORT_Y_SCALE_ENA(1) | S_028818_VPORT_Y_OFFSET_ENA(1) |
+ S_028818_VPORT_Z_SCALE_ENA(1) | S_028818_VPORT_Z_OFFSET_ENA(1));
+
+ radeon_set_context_reg(cs, R_02881C_PA_CL_VS_OUT_CNTL,
+ S_02881C_USE_VTX_POINT_SIZE(outinfo->writes_pointsize) |
+ S_02881C_USE_VTX_RENDER_TARGET_INDX(outinfo->writes_layer) |
+ S_02881C_USE_VTX_VIEWPORT_INDX(outinfo->writes_viewport_index) |
+ S_02881C_VS_OUT_MISC_VEC_ENA(misc_vec_ena) |
+ S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena) |
+ S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0f) != 0) |
+ S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xf0) != 0) |
+ cull_dist_mask << 8 |
+ clip_dist_mask);
+
+ if (pipeline->device->physical_device->rad_info.chip_class <= VI)
+ radeon_set_context_reg(cs, R_028AB4_VGT_REUSE_OFF,
+ outinfo->writes_viewport_index);
+}
+
+static void
+radv_pipeline_generate_hw_es(struct radeon_winsys_cs *cs,
+ struct radv_pipeline *pipeline,
+ struct radv_shader_variant *shader)
+{
+ uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
+
+ radeon_set_sh_reg_seq(cs, R_00B320_SPI_SHADER_PGM_LO_ES, 4);
+ radeon_emit(cs, va >> 8);
+ radeon_emit(cs, S_00B324_MEM_BASE(va >> 40));
+ radeon_emit(cs, shader->rsrc1);
+ radeon_emit(cs, shader->rsrc2);
+}
+
+static void
+radv_pipeline_generate_hw_ls(struct radeon_winsys_cs *cs,
+ struct radv_pipeline *pipeline,
+ struct radv_shader_variant *shader,
+ const struct radv_tessellation_state *tess)
+{
+ uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
+ uint32_t rsrc2 = shader->rsrc2;
+
+ radeon_set_sh_reg_seq(cs, R_00B520_SPI_SHADER_PGM_LO_LS, 2);
+ radeon_emit(cs, va >> 8);
+ radeon_emit(cs, S_00B524_MEM_BASE(va >> 40));
+
+ rsrc2 |= S_00B52C_LDS_SIZE(tess->lds_size);
+ if (pipeline->device->physical_device->rad_info.chip_class == CIK &&
+ pipeline->device->physical_device->rad_info.family != CHIP_HAWAII)
+ radeon_set_sh_reg(cs, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, rsrc2);
+
+ radeon_set_sh_reg_seq(cs, R_00B528_SPI_SHADER_PGM_RSRC1_LS, 2);
+ radeon_emit(cs, shader->rsrc1);
+ radeon_emit(cs, rsrc2);
+}
+
+static void
+radv_pipeline_generate_hw_hs(struct radeon_winsys_cs *cs,
+ struct radv_pipeline *pipeline,
+ struct radv_shader_variant *shader,
+ const struct radv_tessellation_state *tess)
+{
+ uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
+
+ if (pipeline->device->physical_device->rad_info.chip_class >= GFX9) {
+ radeon_set_sh_reg_seq(cs, R_00B410_SPI_SHADER_PGM_LO_LS, 2);
+ radeon_emit(cs, va >> 8);
+ radeon_emit(cs, S_00B414_MEM_BASE(va >> 40));
+
+ radeon_set_sh_reg_seq(cs, R_00B428_SPI_SHADER_PGM_RSRC1_HS, 2);
+ radeon_emit(cs, shader->rsrc1);
+ radeon_emit(cs, shader->rsrc2 |
+ S_00B42C_LDS_SIZE(tess->lds_size));
+ } else {
+ radeon_set_sh_reg_seq(cs, R_00B420_SPI_SHADER_PGM_LO_HS, 4);
+ radeon_emit(cs, va >> 8);
+ radeon_emit(cs, S_00B424_MEM_BASE(va >> 40));
+ radeon_emit(cs, shader->rsrc1);
+ radeon_emit(cs, shader->rsrc2);
+ }
+}
+
+static void
+radv_pipeline_generate_vertex_shader(struct radeon_winsys_cs *cs,
+ struct radv_pipeline *pipeline,
+ const struct radv_tessellation_state *tess)
+{
+ struct radv_shader_variant *vs;
+
+ /* Skip shaders merged into HS/GS */
+ vs = pipeline->shaders[MESA_SHADER_VERTEX];
+ if (!vs)
+ return;
+
+ if (vs->info.vs.as_ls)
+ radv_pipeline_generate_hw_ls(cs, pipeline, vs, tess);
+ else if (vs->info.vs.as_es)
+ radv_pipeline_generate_hw_es(cs, pipeline, vs);
+ else
+ radv_pipeline_generate_hw_vs(cs, pipeline, vs);
+}
+
+static void
+radv_pipeline_generate_tess_shaders(struct radeon_winsys_cs *cs,
+ struct radv_pipeline *pipeline,
+ const struct radv_tessellation_state *tess)
+{
+ if (!radv_pipeline_has_tess(pipeline))
+ return;
+
+ struct radv_shader_variant *tes, *tcs;
+
+ tcs = pipeline->shaders[MESA_SHADER_TESS_CTRL];
+ tes = pipeline->shaders[MESA_SHADER_TESS_EVAL];
+
+ if (tes) {
+ if (tes->info.tes.as_es)
+ radv_pipeline_generate_hw_es(cs, pipeline, tes);
+ else
+ radv_pipeline_generate_hw_vs(cs, pipeline, tes);
+ }
+
+ radv_pipeline_generate_hw_hs(cs, pipeline, tcs, tess);
+
+ radeon_set_context_reg(cs, R_028B6C_VGT_TF_PARAM,
+ tess->tf_param);
+
+ if (pipeline->device->physical_device->rad_info.chip_class >= CIK)
+ radeon_set_context_reg_idx(cs, R_028B58_VGT_LS_HS_CONFIG, 2,
+ tess->ls_hs_config);
+ else
+ radeon_set_context_reg(cs, R_028B58_VGT_LS_HS_CONFIG,
+ tess->ls_hs_config);
+}
+
+static void
+radv_pipeline_generate_geometry_shader(struct radeon_winsys_cs *cs,
+ struct radv_pipeline *pipeline,
+ const struct radv_gs_state *gs_state)
+{
+ struct radv_shader_variant *gs;
+ uint64_t va;
+
+ gs = pipeline->shaders[MESA_SHADER_GEOMETRY];
+ if (!gs)
+ return;
+
+ uint32_t gsvs_itemsize = gs->info.gs.max_gsvs_emit_size >> 2;
+
+ radeon_set_context_reg_seq(cs, R_028A60_VGT_GSVS_RING_OFFSET_1, 3);
+ radeon_emit(cs, gsvs_itemsize);
+ radeon_emit(cs, gsvs_itemsize);
+ radeon_emit(cs, gsvs_itemsize);
+
+ radeon_set_context_reg(cs, R_028AB0_VGT_GSVS_RING_ITEMSIZE, gsvs_itemsize);