-44 bytes in a monolithic LS-HS binary.
Tested-by: Dieter Nützel <Dieter@nuetzel-hh.de>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
return LLVMConstInt(ctx->i32, stride * 4, 0);
case PIPE_SHADER_TESS_CTRL:
+ if (ctx->screen->b.chip_class >= GFX9 &&
+ ctx->shader->is_monolithic) {
+ stride = util_last_bit64(ctx->shader->key.part.tcs.ls->outputs_written);
+ return LLVMConstInt(ctx->i32, stride * 4, 0);
+ }
return unpack_param(ctx, ctx->param_vs_state_bits, 24, 8);
default:
* - remove the fixup for unused input VGPRs
*/
key->part.tcs.ls_prolog.ls_vgpr_fix = sctx->ls_vgpr_fix;
- key->opt.prefer_mono = sctx->ls_vgpr_fix;
+
+ /* The LS output / HS input layout can be communicated
+ * directly instead of via user SGPRs for merged LS-HS.
+ * The LS VGPR fix prefers this too.
+ */
+ key->opt.prefer_mono = 1;
}
key->part.tcs.epilog.prim_mode =