From: Marek Olšák Date: Wed, 6 Aug 2014 01:18:06 +0000 (+0200) Subject: radeonsi: always prefer SWITCH_ON_EOP(0) on CIK X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=0e229b8c5aa5eb758cbc9e1176001e0f4d985a6a;p=mesa.git radeonsi: always prefer SWITCH_ON_EOP(0) on CIK The code is rewritten to take known constraints into account, while always using 0 by default. This should improve performance for multi-SE parts in theory. A debug option is also added for easier debugging. (If there are hangs, use the option. If the hangs go away, you have found the problem.) Reviewed-by: Alex Deucher v2: fix a typo, set max_se for evergreen GPUs according to the kernel driver --- diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c index 3476021138a..eb44d7255ba 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.c +++ b/src/gallium/drivers/radeon/r600_pipe_common.c @@ -239,7 +239,6 @@ static const struct debug_named_value common_debug_options[] = { { "vm", DBG_VM, "Print virtual addresses when creating resources" }, { "trace_cs", DBG_TRACE_CS, "Trace cs and write rlockup_.c file with faulty cs" }, - /* shaders */ { "fs", DBG_FS, "Print fetch shaders" }, { "vs", DBG_VS, "Print vertex shaders" }, @@ -254,6 +253,7 @@ static const struct debug_named_value common_debug_options[] = { { "noinvalrange", DBG_NO_DISCARD_RANGE, "Disable handling of INVALIDATE_RANGE map flags" }, { "no2d", DBG_NO_2D_TILING, "Disable 2D tiling" }, { "notiling", DBG_NO_TILING, "Disable tiling" }, + { "switch_on_eop", DBG_SWITCH_ON_EOP, "Program WD/IA to switch on end-of-packet." }, DEBUG_NAMED_VALUE_END /* must be last */ }; diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index dcec2bb3b4b..ac69d5b8746 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -93,6 +93,7 @@ #define DBG_NO_DISCARD_RANGE (1 << 12) #define DBG_NO_2D_TILING (1 << 13) #define DBG_NO_TILING (1 << 14) +#define DBG_SWITCH_ON_EOP (1 << 15) /* The maximum allowed bit is 15. */ #define R600_MAP_BUFFER_ALIGNMENT 64 diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index 4e808a3eedf..ae839ba1642 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -401,25 +401,40 @@ static bool si_update_draw_info_state(struct si_context *sctx, if (sctx->b.chip_class >= CIK) { struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; - bool wd_switch_on_eop = prim == V_008958_DI_PT_POLYGON || - prim == V_008958_DI_PT_LINELOOP || - prim == V_008958_DI_PT_TRIFAN || - prim == V_008958_DI_PT_TRISTRIP_ADJ || - info->primitive_restart || - (rs ? rs->line_stipple_enable : false); - /* If the WD switch is false, the IA switch must be false too. */ - bool ia_switch_on_eop = wd_switch_on_eop; unsigned primgroup_size = 64; + /* SWITCH_ON_EOP(0) is always preferable. */ + bool wd_switch_on_eop = false; + bool ia_switch_on_eop = false; + + /* WD_SWITCH_ON_EOP has no effect on GPUs with less than + * 4 shader engines. Set 1 to pass the assertion below. + * The other cases are hardware requirements. */ + if (sctx->b.screen->info.max_se < 4 || + prim == V_008958_DI_PT_POLYGON || + prim == V_008958_DI_PT_LINELOOP || + prim == V_008958_DI_PT_TRIFAN || + prim == V_008958_DI_PT_TRISTRIP_ADJ || + info->primitive_restart) + wd_switch_on_eop = true; + /* Hawaii hangs if instancing is enabled and WD_SWITCH_ON_EOP is 0. * We don't know that for indirect drawing, so treat it as * always problematic. */ if (sctx->b.family == CHIP_HAWAII && - (info->indirect || info->instance_count > 1)) { + (info->indirect || info->instance_count > 1)) wd_switch_on_eop = true; + + /* This is a hardware requirement. */ + if ((rs && rs->line_stipple_enable) || + (sctx->b.screen->debug_flags & DBG_SWITCH_ON_EOP)) { ia_switch_on_eop = true; + wd_switch_on_eop = true; } + /* If the WD switch is false, the IA switch must be false too. */ + assert(wd_switch_on_eop || !ia_switch_on_eop); + si_pm4_set_reg(pm4, R_028B74_VGT_DISPATCH_DRAW_INDEX, ib->index_size == 4 ? 0xFC000000 : 0xFC00); diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c index 910d06b5dba..21567bbcf01 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c @@ -392,6 +392,26 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws) radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_SE, NULL, &ws->info.max_se); + if (!ws->info.max_se) { + switch (ws->info.family) { + default: + ws->info.max_se = 1; + break; + case CHIP_CYPRESS: + case CHIP_HEMLOCK: + case CHIP_BARTS: + case CHIP_CAYMAN: + case CHIP_TAHITI: + case CHIP_PITCAIRN: + case CHIP_BONAIRE: + ws->info.max_se = 2; + break; + case CHIP_HAWAII: + ws->info.max_se = 4; + break; + } + } + radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_SH_PER_SE, NULL, &ws->info.max_sh_per_se);