/* 3D engine options: */
{"nogfx", DBG(NO_GFX), "Disable graphics. Only multimedia compute paths can be used."},
{"nongg", DBG(NO_NGG), "Disable NGG and use the legacy pipeline."},
- {"nggc", DBG(ALWAYS_NGG_CULLING), "Always use NGG culling even when it can hurt."},
+ {"nggc", DBG(ALWAYS_NGG_CULLING_ALL), "Always use NGG culling even when it can hurt."},
+ {"nggctess", DBG(ALWAYS_NGG_CULLING_TESS), "Always use NGG culling for tessellation."},
{"nonggc", DBG(NO_NGG_CULLING), "Disable NGG culling."},
{"alwayspd", DBG(ALWAYS_PD), "Always enable the primitive discard compute shader."},
{"pd", DBG(PD), "Enable the primitive discard compute shader for large draw calls."},
si_resource_reference(&sctx->wait_mem_scratch, NULL);
si_resource_reference(&sctx->small_prim_cull_info_buf, NULL);
- si_pm4_free_state(sctx, sctx->cs_preamble_state, ~0);
+ if (sctx->cs_preamble_state)
+ si_pm4_free_state(sctx, sctx->cs_preamble_state, ~0);
if (sctx->cs_preamble_gs_rings)
si_pm4_free_state(sctx, sctx->cs_preamble_gs_rings, ~0);
for (i = 0; i < ARRAY_SIZE(sctx->vgt_shader_config); i++)
enum pipe_reset_status status = sctx->ws->ctx_query_reset_status(sctx->ctx);
if (status != PIPE_NO_RESET) {
- /* Call the gallium frontend to set a no-op API dispatch. */
+ /* Call the gallium frontend to set a no-op API dispatch. */
if (sctx->device_reset_callback.reset) {
sctx->device_reset_callback.reset(sctx->device_reset_callback.data, status);
}
unsigned max_offchip_buffers_per_se;
if (sscreen->info.chip_class >= GFX10)
- max_offchip_buffers_per_se = 256;
+ max_offchip_buffers_per_se = 128;
/* Only certain chips can use the maximum value. */
else if (sscreen->info.family == CHIP_VEGA12 || sscreen->info.family == CHIP_VEGA20)
max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64;
sscreen->use_ngg = sscreen->info.chip_class >= GFX10 && sscreen->info.family != CHIP_NAVI14 &&
!(sscreen->debug_flags & DBG(NO_NGG));
sscreen->use_ngg_culling = sscreen->use_ngg && !(sscreen->debug_flags & DBG(NO_NGG_CULLING));
- sscreen->always_use_ngg_culling =
- sscreen->use_ngg_culling && sscreen->debug_flags & DBG(ALWAYS_NGG_CULLING);
+ sscreen->always_use_ngg_culling_all =
+ sscreen->use_ngg_culling && sscreen->debug_flags & DBG(ALWAYS_NGG_CULLING_ALL);
+ sscreen->always_use_ngg_culling_tess =
+ sscreen->use_ngg_culling && sscreen->debug_flags & DBG(ALWAYS_NGG_CULLING_TESS);
sscreen->use_ngg_streamout = false;
/* Only enable primitive binning on APUs by default. */
sscreen->compute_wave_size = 64;
if (sscreen->info.chip_class >= GFX10) {
- /* Pixels shaders: Wave64 is recommended.
- * Compute shaders: There are piglit failures with Wave32.
+ /* Pixel shaders: Wave64 is always fastest.
+ * Vertex shaders: Wave64 is probably better, because:
+ * - greater chance of L0 cache hits, because more threads are assigned
+ * to the same CU
+ * - scalar instructions are only executed once for 64 threads instead of twice
+ * - VGPR allocation granularity is half of Wave32, so 1 Wave64 can
+ * sometimes use fewer VGPRs than 2 Wave32
+ * - TessMark X64 with NGG culling is faster with Wave64
*/
- sscreen->ge_wave_size = 32;
-
if (sscreen->debug_flags & DBG(W32_GE))
sscreen->ge_wave_size = 32;
if (sscreen->debug_flags & DBG(W32_PS))