+ offchip_layout = *num_patches |
+ (num_tcs_output_cp << 6) |
+ (pervertex_output_patch_size * *num_patches << 12);
+
+ /* Compute the LDS size. */
+ lds_size = output_patch0_offset + output_patch_size * *num_patches;
+
+ if (sctx->b.chip_class >= CIK) {
+ assert(lds_size <= 65536);
+ lds_size = align(lds_size, 512) / 512;
+ } else {
+ assert(lds_size <= 32768);
+ lds_size = align(lds_size, 256) / 256;
+ }
+
+ /* Set SI_SGPR_VS_STATE_BITS. */
+ sctx->current_vs_state &= C_VS_STATE_LS_OUT_PATCH_SIZE &
+ C_VS_STATE_LS_OUT_VERTEX_SIZE;
+ sctx->current_vs_state |= tcs_in_layout;
+
+ if (sctx->b.chip_class >= GFX9) {
+ unsigned hs_rsrc2 = ls_current->config.rsrc2 |
+ S_00B42C_LDS_SIZE(lds_size);
+
+ radeon_set_sh_reg(cs, R_00B42C_SPI_SHADER_PGM_RSRC2_HS, hs_rsrc2);
+
+ /* Set userdata SGPRs for merged LS-HS. */
+ radeon_set_sh_reg_seq(cs,
+ R_00B430_SPI_SHADER_USER_DATA_LS_0 +
+ GFX9_SGPR_TCS_OFFCHIP_LAYOUT * 4, 3);
+ radeon_emit(cs, offchip_layout);
+ radeon_emit(cs, tcs_out_offsets);
+ radeon_emit(cs, tcs_out_layout | (num_tcs_input_cp << 26));
+ } else {
+ unsigned ls_rsrc2 = ls_current->config.rsrc2;
+
+ si_multiwave_lds_size_workaround(sctx->screen, &lds_size);
+ ls_rsrc2 |= S_00B52C_LDS_SIZE(lds_size);
+
+ /* Due to a hw bug, RSRC2_LS must be written twice with another
+ * LS register written in between. */
+ if (sctx->b.chip_class == CIK && sctx->b.family != CHIP_HAWAII)
+ radeon_set_sh_reg(cs, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, ls_rsrc2);
+ radeon_set_sh_reg_seq(cs, R_00B528_SPI_SHADER_PGM_RSRC1_LS, 2);
+ radeon_emit(cs, ls_current->config.rsrc1);
+ radeon_emit(cs, ls_rsrc2);
+
+ /* Set userdata SGPRs for TCS. */
+ radeon_set_sh_reg_seq(cs,
+ R_00B430_SPI_SHADER_USER_DATA_HS_0 + GFX6_SGPR_TCS_OFFCHIP_LAYOUT * 4, 4);
+ radeon_emit(cs, offchip_layout);
+ radeon_emit(cs, tcs_out_offsets);
+ radeon_emit(cs, tcs_out_layout | (num_tcs_input_cp << 26));
+ radeon_emit(cs, tcs_in_layout);
+ }
+
+ /* Set userdata SGPRs for TES. */
+ radeon_set_sh_reg_seq(cs, tes_sh_base + SI_SGPR_TES_OFFCHIP_LAYOUT * 4, 2);
+ radeon_emit(cs, offchip_layout);
+ radeon_emit(cs, r600_resource(sctx->tess_offchip_ring)->gpu_address >> 16);