break;
case PIPE_SHADER_TESS_CTRL:
+ params[SI_PARAM_TCS_OFFCHIP_LAYOUT] = ctx->i32;
params[SI_PARAM_TCS_OUT_OFFSETS] = ctx->i32;
params[SI_PARAM_TCS_OUT_LAYOUT] = ctx->i32;
params[SI_PARAM_TCS_IN_LAYOUT] = ctx->i32;
break;
case PIPE_SHADER_TESS_EVAL:
+ params[SI_PARAM_TCS_OFFCHIP_LAYOUT] = ctx->i32;
params[SI_PARAM_TCS_OUT_OFFSETS] = ctx->i32;
params[SI_PARAM_TCS_OUT_LAYOUT] = ctx->i32;
num_params = SI_PARAM_TCS_OUT_LAYOUT+1;
params[SI_PARAM_SAMPLERS] = ctx.i64;
params[SI_PARAM_IMAGES] = ctx.i64;
params[SI_PARAM_SHADER_BUFFERS] = ctx.i64;
+ params[SI_PARAM_TCS_OFFCHIP_LAYOUT] = ctx.i32;
params[SI_PARAM_TCS_OUT_OFFSETS] = ctx.i32;
params[SI_PARAM_TCS_OUT_LAYOUT] = ctx.i32;
params[SI_PARAM_TCS_IN_LAYOUT] = ctx.i32;
SI_LS_NUM_USER_SGPR,
/* both TCS and TES */
- SI_SGPR_TCS_OUT_OFFSETS = SI_NUM_RESOURCE_SGPRS,
+ SI_SGPR_TCS_OFFCHIP_LAYOUT = SI_NUM_RESOURCE_SGPRS,
+ SI_SGPR_TCS_OUT_OFFSETS,
SI_SGPR_TCS_OUT_LAYOUT,
SI_TES_NUM_USER_SGPR,
SI_PARAM_LS_OUT_LAYOUT = SI_PARAM_START_INSTANCE + 1,
/* the other VS parameters are assigned dynamically */
+ /* Layout of TCS outputs in the offchip buffer
+ * [0:8] = the number of patches per threadgroup.
+ * [9:15] = the number of output vertices per patch.
+ * [16:31] = the offset of per patch attributes in the buffer in bytes.
+ */
+ SI_PARAM_TCS_OFFCHIP_LAYOUT = SI_NUM_RESOURCE_PARAMS, /* for TCS & TES */
+
/* Offsets where TCS outputs and TCS patch outputs live in LDS:
* [0:15] = TCS output patch0 offset / 16, max = NUM_PATCHES * 32 * 32
* [16:31] = TCS output patch0 offset for per-patch / 16, max = NUM_PATCHES*32*32* + 32*32
*/
- SI_PARAM_TCS_OUT_OFFSETS = SI_NUM_RESOURCE_PARAMS, /* for TCS & TES */
+ SI_PARAM_TCS_OUT_OFFSETS, /* for TCS & TES */
/* Layout of TCS outputs / TES inputs:
* [0:12] = stride between output patches in dwords, num_outputs * num_vertices * 4, max = 32*32*4
unsigned input_patch_size, output_patch_size, output_patch0_offset;
unsigned perpatch_output_offset, lds_size, ls_rsrc2;
unsigned tcs_in_layout, tcs_out_layout, tcs_out_offsets;
+ unsigned offchip_layout;
*num_patches = 1; /* TODO: calculate this */
((output_vertex_size / 4) << 13);
tcs_out_offsets = (output_patch0_offset / 16) |
((perpatch_output_offset / 16) << 16);
+ offchip_layout = (pervertex_output_patch_size * *num_patches << 16) |
+ (num_tcs_output_cp << 9) | *num_patches;
/* Set them for LS. */
radeon_set_sh_reg(cs,
/* Set them for TCS. */
radeon_set_sh_reg_seq(cs,
- R_00B430_SPI_SHADER_USER_DATA_HS_0 + SI_SGPR_TCS_OUT_OFFSETS * 4, 3);
+ R_00B430_SPI_SHADER_USER_DATA_HS_0 + SI_SGPR_TCS_OFFCHIP_LAYOUT * 4, 4);
+ radeon_emit(cs, offchip_layout);
radeon_emit(cs, tcs_out_offsets);
radeon_emit(cs, tcs_out_layout | (num_tcs_input_cp << 26));
radeon_emit(cs, tcs_in_layout);
/* Set them for TES. */
- radeon_set_sh_reg_seq(cs, tes_sh_base + SI_SGPR_TCS_OUT_OFFSETS * 4, 2);
+ radeon_set_sh_reg_seq(cs, tes_sh_base + SI_SGPR_TCS_OFFCHIP_LAYOUT * 4, 3);
+ radeon_emit(cs, offchip_layout);
radeon_emit(cs, tcs_out_offsets);
radeon_emit(cs, tcs_out_layout | (num_tcs_output_cp << 26));
}