late_alloc_wave64 = 0;
else if (num_cu_per_sh <= 6)
late_alloc_wave64 = num_cu_per_sh - 2; /* All CUs enabled */
+ else if (shader->key.opt.ngg_culling & SI_NGG_CULL_GS_FAST_LAUNCH_ALL)
+ late_alloc_wave64 = (num_cu_per_sh - 2) * 6;
else
late_alloc_wave64 = (num_cu_per_sh - 2) * 4;
shader->ctx_reg.ngg.ge_pc_alloc = S_030980_OVERSUB_EN(1) |
S_030980_NUM_PC_LINES(oversub_pc_lines - 1);
- shader->ge_cntl =
- S_03096C_PRIM_GRP_SIZE(shader->ngg.max_gsprims) |
- S_03096C_VERT_GRP_SIZE(256) | /* 256 = disable vertex grouping */
- S_03096C_BREAK_WAVE_AT_EOI(break_wave_at_eoi);
+ if (shader->key.opt.ngg_culling & SI_NGG_CULL_GS_FAST_LAUNCH_TRI_LIST) {
+ shader->ge_cntl =
+ S_03096C_PRIM_GRP_SIZE(shader->ngg.max_gsprims) |
+ S_03096C_VERT_GRP_SIZE(shader->ngg.max_gsprims * 3);
+ } else if (shader->key.opt.ngg_culling & SI_NGG_CULL_GS_FAST_LAUNCH_TRI_STRIP) {
+ shader->ge_cntl =
+ S_03096C_PRIM_GRP_SIZE(shader->ngg.max_gsprims) |
+ S_03096C_VERT_GRP_SIZE(shader->ngg.max_gsprims + 2);
+ } else {
+ shader->ge_cntl =
+ S_03096C_PRIM_GRP_SIZE(shader->ngg.max_gsprims) |
+ S_03096C_VERT_GRP_SIZE(256) | /* 256 = disable vertex grouping */
+ S_03096C_BREAK_WAVE_AT_EOI(break_wave_at_eoi);
- /* Bug workaround for a possible hang with non-tessellation cases.
- * Tessellation always sets GE_CNTL.VERT_GRP_SIZE = 0
- *
- * Requirement: GE_CNTL.VERT_GRP_SIZE = VGT_GS_ONCHIP_CNTL.ES_VERTS_PER_SUBGRP - 5
- */
- if ((sscreen->info.family == CHIP_NAVI10 ||
- sscreen->info.family == CHIP_NAVI12 ||
- sscreen->info.family == CHIP_NAVI14) &&
- (es_type == PIPE_SHADER_VERTEX || gs_type == PIPE_SHADER_VERTEX) && /* = no tess */
- shader->ngg.hw_max_esverts != 256) {
- shader->ge_cntl &= C_03096C_VERT_GRP_SIZE;
-
- if (shader->ngg.hw_max_esverts > 5) {
- shader->ge_cntl |=
- S_03096C_VERT_GRP_SIZE(shader->ngg.hw_max_esverts - 5);
+ /* Bug workaround for a possible hang with non-tessellation cases.
+ * Tessellation always sets GE_CNTL.VERT_GRP_SIZE = 0
+ *
+ * Requirement: GE_CNTL.VERT_GRP_SIZE = VGT_GS_ONCHIP_CNTL.ES_VERTS_PER_SUBGRP - 5
+ */
+ if ((sscreen->info.family == CHIP_NAVI10 ||
+ sscreen->info.family == CHIP_NAVI12 ||
+ sscreen->info.family == CHIP_NAVI14) &&
+ (es_type == PIPE_SHADER_VERTEX || gs_type == PIPE_SHADER_VERTEX) && /* = no tess */
+ shader->ngg.hw_max_esverts != 256) {
+ shader->ge_cntl &= C_03096C_VERT_GRP_SIZE;
+
+ if (shader->ngg.hw_max_esverts > 5) {
+ shader->ge_cntl |=
+ S_03096C_VERT_GRP_SIZE(shader->ngg.hw_max_esverts - 5);
+ }
}
}
main_part->key.as_ngg = key->as_ngg;
main_part->is_monolithic = false;
- if (si_compile_shader(sscreen, compiler_state->compiler,
- main_part, &compiler_state->debug) != 0) {
+ if (!si_compile_shader(sscreen, compiler_state->compiler,
+ main_part, &compiler_state->debug)) {
FREE(main_part);
return false;
}
simple_mtx_unlock(&sscreen->shader_cache_mutex);
/* Compile the shader if it hasn't been loaded from the cache. */
- if (si_compile_shader(sscreen, compiler, shader,
- debug) != 0) {
+ if (!si_compile_shader(sscreen, compiler, shader, debug)) {
FREE(shader);
fprintf(stderr, "radeonsi: can't compile a main shader part\n");
return;
if (key.u.ngg) {
stages |= S_028B54_PRIMGEN_EN(1) |
+ S_028B54_GS_FAST_LAUNCH(key.u.ngg_gs_fast_launch) |
S_028B54_NGG_WAVE_ID_EN(key.u.streamout) |
S_028B54_PRIMGEN_PASSTHRU_EN(key.u.ngg_passthrough);
} else if (key.u.gs)
}
/* This must be done after the shader variant is selected. */
- if (sctx->ngg)
- key.u.ngg_passthrough = gfx10_is_ngg_passthrough(si_get_vs(sctx)->current);
+ if (sctx->ngg) {
+ struct si_shader *vs = si_get_vs(sctx)->current;
+
+ key.u.ngg_passthrough = gfx10_is_ngg_passthrough(vs);
+ key.u.ngg_gs_fast_launch = !!(vs->key.opt.ngg_culling &
+ SI_NGG_CULL_GS_FAST_LAUNCH_ALL);
+ }
si_update_vgt_shader_config(sctx, key);