TCS outputs are usually not written to LDS, so no stats here.
Tested-by: Dieter Nützel <Dieter@nuetzel-hh.de>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
return unpack_param(ctx, ctx->param_tcs_out_lds_layout, 0, 13);
}
return unpack_param(ctx, ctx->param_tcs_out_lds_layout, 0, 13);
}
+static unsigned get_tcs_out_vertex_dw_stride_constant(struct si_shader_context *ctx)
+{
+ assert(ctx->type == PIPE_SHADER_TESS_CTRL);
+
+ if (ctx->shader->key.mono.u.ff_tcs_inputs_to_copy)
+ return util_last_bit64(ctx->shader->key.mono.u.ff_tcs_inputs_to_copy) * 4;
+
+ return util_last_bit64(ctx->shader->selector->outputs_written) * 4;
+}
+
+static LLVMValueRef get_tcs_out_vertex_dw_stride(struct si_shader_context *ctx)
+{
+ unsigned stride = get_tcs_out_vertex_dw_stride_constant(ctx);
+
+ return LLVMConstInt(ctx->i32, stride, 0);
+}
+
static LLVMValueRef
get_tcs_out_patch0_offset(struct si_shader_context *ctx)
{
static LLVMValueRef
get_tcs_out_patch0_offset(struct si_shader_context *ctx)
{
LLVMValueRef dw_addr, stride;
if (reg->Register.Dimension) {
LLVMValueRef dw_addr, stride;
if (reg->Register.Dimension) {
- stride = unpack_param(ctx, ctx->param_tcs_out_lds_layout, 13, 8);
+ stride = get_tcs_out_vertex_dw_stride(ctx);
dw_addr = get_tcs_out_current_patch_offset(ctx);
dw_addr = get_dw_address(ctx, NULL, reg, stride, dw_addr);
} else {
dw_addr = get_tcs_out_current_patch_offset(ctx);
dw_addr = get_dw_address(ctx, NULL, reg, stride, dw_addr);
} else {
}
if (reg->Register.Dimension) {
}
if (reg->Register.Dimension) {
- stride = unpack_param(ctx, ctx->param_tcs_out_lds_layout, 13, 8);
+ stride = get_tcs_out_vertex_dw_stride(ctx);
dw_addr = get_tcs_out_current_patch_offset(ctx);
dw_addr = get_dw_address(ctx, reg, NULL, stride, dw_addr);
skip_lds_store = !sh_info->reads_pervertex_outputs;
dw_addr = get_tcs_out_current_patch_offset(ctx);
dw_addr = get_dw_address(ctx, reg, NULL, stride, dw_addr);
skip_lds_store = !sh_info->reads_pervertex_outputs;
/* Layout of TCS outputs / TES inputs:
* [0:12] = stride between output patches in DW, num_outputs * num_vertices * 4
* max = 32*32*4 + 32*4
/* Layout of TCS outputs / TES inputs:
* [0:12] = stride between output patches in DW, num_outputs * num_vertices * 4
* max = 32*32*4 + 32*4
- * [13:20] = stride between output vertices in DW = num_inputs * 4
- * max = 32*4
* [26:31] = gl_PatchVerticesIn, max = 32
*/
int param_tcs_out_lds_layout;
* [26:31] = gl_PatchVerticesIn, max = 32
*/
int param_tcs_out_lds_layout;
tcs_in_layout = S_VS_STATE_LS_OUT_PATCH_SIZE(input_patch_size / 4) |
S_VS_STATE_LS_OUT_VERTEX_SIZE(input_vertex_size / 4);
tcs_in_layout = S_VS_STATE_LS_OUT_PATCH_SIZE(input_patch_size / 4) |
S_VS_STATE_LS_OUT_VERTEX_SIZE(input_vertex_size / 4);
- tcs_out_layout = (output_patch_size / 4) |
- ((output_vertex_size / 4) << 13);
+ tcs_out_layout = output_patch_size / 4;
tcs_out_offsets = (output_patch0_offset / 16) |
((perpatch_output_offset / 16) << 16);
offchip_layout = *num_patches |
tcs_out_offsets = (output_patch0_offset / 16) |
((perpatch_output_offset / 16) << 16);
offchip_layout = *num_patches |