switch (ctx->type) {
case PIPE_SHADER_VERTEX:
- stride = util_last_bit64(ctx->shader->selector->outputs_written);
- return LLVMConstInt(ctx->i32, stride * 4, 0);
+ stride = ctx->shader->selector->lshs_vertex_stride / 4;
+ return LLVMConstInt(ctx->i32, stride, 0);
case PIPE_SHADER_TESS_CTRL:
if (ctx->screen->info.chip_class >= GFX9 &&
ctx->shader->is_monolithic) {
- stride = util_last_bit64(ctx->shader->key.part.tcs.ls->outputs_written);
- return LLVMConstInt(ctx->i32, stride * 4, 0);
+ stride = ctx->shader->key.part.tcs.ls->lshs_vertex_stride / 4;
+ return LLVMConstInt(ctx->i32, stride, 0);
}
return si_unpack_param(ctx, ctx->param_vs_state_bits, 24, 8);
ubyte culldist_mask;
/* ES parameters. */
- unsigned esgs_itemsize;
+ unsigned esgs_itemsize; /* vertex stride */
+ unsigned lshs_vertex_stride;
/* GS parameters. */
unsigned gs_input_verts_per_prim;
num_tcs_patch_outputs = 2; /* TESSINNER + TESSOUTER */
}
- input_vertex_size = num_tcs_inputs * 16;
+ input_vertex_size = ls->lshs_vertex_stride;
output_vertex_size = num_tcs_outputs * 16;
input_patch_size = num_tcs_input_cp * input_vertex_size;
}
}
sel->esgs_itemsize = util_last_bit64(sel->outputs_written) * 16;
+ sel->lshs_vertex_stride = sel->esgs_itemsize;
+
+ /* Add 1 dword to reduce LDS bank conflicts, so that each vertex
+ * will start on a different bank. (except for the maximum 32*16).
+ */
+ if (sel->lshs_vertex_stride < 32*16)
+ sel->lshs_vertex_stride += 4;
/* For the ESGS ring in LDS, add 1 dword to reduce LDS bank
* conflicts, i.e. each vertex will start at a different bank.