+ radv_pipeline_generate_hw_hs(cs, pipeline, tcs, tess);
+
+ radeon_set_context_reg(cs, R_028B6C_VGT_TF_PARAM,
+ tess->tf_param);
+
+ if (pipeline->device->physical_device->rad_info.chip_class >= CIK)
+ radeon_set_context_reg_idx(cs, R_028B58_VGT_LS_HS_CONFIG, 2,
+ tess->ls_hs_config);
+ else
+ radeon_set_context_reg(cs, R_028B58_VGT_LS_HS_CONFIG,
+ tess->ls_hs_config);
+
+ struct ac_userdata_info *loc;
+
+ loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_TESS_CTRL, AC_UD_TCS_OFFCHIP_LAYOUT);
+ if (loc->sgpr_idx != -1) {
+ uint32_t base_reg = pipeline->user_data_0[MESA_SHADER_TESS_CTRL];
+ assert(loc->num_sgprs == 4);
+ assert(!loc->indirect);
+ radeon_set_sh_reg_seq(cs, base_reg + loc->sgpr_idx * 4, 4);
+ radeon_emit(cs, tess->offchip_layout);
+ radeon_emit(cs, tess->tcs_out_offsets);
+ radeon_emit(cs, tess->tcs_out_layout |
+ tess->num_tcs_input_cp << 26);
+ radeon_emit(cs, tess->tcs_in_layout);
+ }
+
+ loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_TESS_EVAL, AC_UD_TES_OFFCHIP_LAYOUT);
+ if (loc->sgpr_idx != -1) {
+ uint32_t base_reg = pipeline->user_data_0[MESA_SHADER_TESS_EVAL];
+ assert(loc->num_sgprs == 1);
+ assert(!loc->indirect);
+
+ radeon_set_sh_reg(cs, base_reg + loc->sgpr_idx * 4,
+ tess->offchip_layout);
+ }
+
+ loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_VERTEX, AC_UD_VS_LS_TCS_IN_LAYOUT);
+ if (loc->sgpr_idx != -1) {
+ uint32_t base_reg = pipeline->user_data_0[MESA_SHADER_VERTEX];
+ assert(loc->num_sgprs == 1);
+ assert(!loc->indirect);
+
+ radeon_set_sh_reg(cs, base_reg + loc->sgpr_idx * 4,
+ tess->tcs_in_layout);
+ }
+}
+
+static void
+radv_pipeline_generate_geometry_shader(struct radeon_winsys_cs *cs,
+ struct radv_pipeline *pipeline)
+{
+ struct radv_shader_variant *gs;
+ uint64_t va;
+
+ gs = pipeline->shaders[MESA_SHADER_GEOMETRY];
+ if (!gs)
+ return;
+
+ uint32_t gsvs_itemsize = gs->info.gs.max_gsvs_emit_size >> 2;
+
+ radeon_set_context_reg_seq(cs, R_028A60_VGT_GSVS_RING_OFFSET_1, 3);
+ radeon_emit(cs, gsvs_itemsize);
+ radeon_emit(cs, gsvs_itemsize);
+ radeon_emit(cs, gsvs_itemsize);
+
+ radeon_set_context_reg(cs, R_028AB0_VGT_GSVS_RING_ITEMSIZE, gsvs_itemsize);
+
+ radeon_set_context_reg(cs, R_028B38_VGT_GS_MAX_VERT_OUT, gs->info.gs.vertices_out);
+
+ uint32_t gs_vert_itemsize = gs->info.gs.gsvs_vertex_size;
+ radeon_set_context_reg_seq(cs, R_028B5C_VGT_GS_VERT_ITEMSIZE, 4);
+ radeon_emit(cs, gs_vert_itemsize >> 2);
+ radeon_emit(cs, 0);
+ radeon_emit(cs, 0);
+ radeon_emit(cs, 0);
+
+ uint32_t gs_num_invocations = gs->info.gs.invocations;
+ radeon_set_context_reg(cs, R_028B90_VGT_GS_INSTANCE_CNT,
+ S_028B90_CNT(MIN2(gs_num_invocations, 127)) |
+ S_028B90_ENABLE(gs_num_invocations > 0));
+
+ radeon_set_context_reg(cs, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
+ pipeline->graphics.gs.vgt_esgs_ring_itemsize);
+
+ va = radv_buffer_get_va(gs->bo) + gs->bo_offset;
+
+ if (pipeline->device->physical_device->rad_info.chip_class >= GFX9) {
+ radeon_set_sh_reg_seq(cs, R_00B210_SPI_SHADER_PGM_LO_ES, 2);
+ radeon_emit(cs, va >> 8);
+ radeon_emit(cs, va >> 40);
+
+ radeon_set_sh_reg_seq(cs, R_00B228_SPI_SHADER_PGM_RSRC1_GS, 2);
+ radeon_emit(cs, gs->rsrc1);
+ radeon_emit(cs, gs->rsrc2 |
+ S_00B22C_LDS_SIZE(pipeline->graphics.gs.lds_size));
+
+ radeon_set_context_reg(cs, R_028A44_VGT_GS_ONCHIP_CNTL, pipeline->graphics.gs.vgt_gs_onchip_cntl);
+ radeon_set_context_reg(cs, R_028A94_VGT_GS_MAX_PRIMS_PER_SUBGROUP, pipeline->graphics.gs.vgt_gs_max_prims_per_subgroup);
+ } else {
+ radeon_set_sh_reg_seq(cs, R_00B220_SPI_SHADER_PGM_LO_GS, 4);
+ radeon_emit(cs, va >> 8);
+ radeon_emit(cs, va >> 40);
+ radeon_emit(cs, gs->rsrc1);
+ radeon_emit(cs, gs->rsrc2);
+ }
+
+ radv_pipeline_generate_hw_vs(cs, pipeline, pipeline->gs_copy_shader);
+
+ struct ac_userdata_info *loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_GEOMETRY,
+ AC_UD_GS_VS_RING_STRIDE_ENTRIES);
+ if (loc->sgpr_idx != -1) {
+ uint32_t stride = gs->info.gs.max_gsvs_emit_size;
+ uint32_t num_entries = 64;
+ bool is_vi = pipeline->device->physical_device->rad_info.chip_class >= VI;
+
+ if (is_vi)
+ num_entries *= stride;
+
+ stride = S_008F04_STRIDE(stride);
+ radeon_set_sh_reg_seq(cs, R_00B230_SPI_SHADER_USER_DATA_GS_0 + loc->sgpr_idx * 4, 2);
+ radeon_emit(cs, stride);
+ radeon_emit(cs, num_entries);
+ }
+}
+
+static uint32_t offset_to_ps_input(uint32_t offset, bool flat_shade)
+{
+ uint32_t ps_input_cntl;
+ if (offset <= AC_EXP_PARAM_OFFSET_31) {
+ ps_input_cntl = S_028644_OFFSET(offset);
+ if (flat_shade)
+ ps_input_cntl |= S_028644_FLAT_SHADE(1);
+ } else {
+ /* The input is a DEFAULT_VAL constant. */
+ assert(offset >= AC_EXP_PARAM_DEFAULT_VAL_0000 &&
+ offset <= AC_EXP_PARAM_DEFAULT_VAL_1111);
+ offset -= AC_EXP_PARAM_DEFAULT_VAL_0000;
+ ps_input_cntl = S_028644_OFFSET(0x20) |
+ S_028644_DEFAULT_VAL(offset);
+ }
+ return ps_input_cntl;
+}
+
+static void
+radv_pipeline_generate_ps_inputs(struct radeon_winsys_cs *cs,
+ struct radv_pipeline *pipeline)
+{