From d244a25c072557edd1133356b94557938c000f78 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 6 Jul 2020 22:05:00 -0400 Subject: [PATCH] radeonsi: add missing initialization of registers (random initial gfx10 commit:) Fixes: 78cdf9a99f0 - amd/addrlib: add gfx10 support Acked-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/gallium/drivers/radeonsi/si_compute.c | 23 ++++++++++++++++++++++- src/gallium/drivers/radeonsi/si_state.c | 21 +++++++++++++++++++++ 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index b3dea264982..bd7defd8f9a 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -364,10 +364,22 @@ void si_emit_initial_compute_regs(struct si_context *sctx, struct radeon_cmdbuf radeon_set_sh_reg_seq(cs, R_00B864_COMPUTE_STATIC_THREAD_MGMT_SE2, 2); radeon_emit(cs, S_00B858_SH0_CU_EN(0xffff) | S_00B858_SH1_CU_EN(0xffff)); radeon_emit(cs, S_00B858_SH0_CU_EN(0xffff) | S_00B858_SH1_CU_EN(0xffff)); + + /* Disable profiling on compute queues. */ + if (cs != sctx->gfx_cs || !sctx->screen->info.has_graphics) { + radeon_set_sh_reg(cs, R_00B82C_COMPUTE_PERFCOUNT_ENABLE, 0); + radeon_set_sh_reg(cs, R_00B878_COMPUTE_THREAD_TRACE_ENABLE, 0); + } } - if (sctx->chip_class >= GFX10) + if (sctx->chip_class >= GFX10) { + radeon_set_sh_reg(cs, R_00B890_COMPUTE_USER_ACCUM_0, 0); + radeon_set_sh_reg(cs, R_00B894_COMPUTE_USER_ACCUM_1, 0); + radeon_set_sh_reg(cs, R_00B898_COMPUTE_USER_ACCUM_2, 0); + radeon_set_sh_reg(cs, R_00B89C_COMPUTE_USER_ACCUM_3, 0); radeon_set_sh_reg(cs, R_00B8A0_COMPUTE_PGM_RSRC3, 0); + radeon_set_sh_reg(cs, R_00B9F4_COMPUTE_DISPATCH_TUNNEL, 0); + } /* This register has been moved to R_00CD20_COMPUTE_MAX_WAVE_ID * and is now per pipe, so it should be handled in the @@ -393,6 +405,15 @@ void si_emit_initial_compute_regs(struct si_context *sctx, struct radeon_cmdbuf radeon_set_config_reg(cs, R_00950C_TA_CS_BC_BASE_ADDR, bc_va >> 8); } } + + /* cs_preamble_state initializes this for the gfx queue, so only do this + * if we are on a compute queue. + */ + if (sctx->chip_class >= GFX9 && + (cs != sctx->gfx_cs || !sctx->screen->info.has_graphics)) { + radeon_set_uconfig_reg(cs, R_0301EC_CP_COHER_START_DELAY, + sctx->chip_class >= GFX10 ? 0x20 : 0); + } } static bool si_setup_compute_scratch_buffer(struct si_context *sctx, struct si_shader *shader, diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index feef17566fd..b070104a78e 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -5350,6 +5350,27 @@ void si_init_cs_preamble_state(struct si_context *sctx) si_pm4_set_reg(pm4, R_028C4C_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL, S_028C4C_NULL_SQUAD_AA_MASK_ENABLE(1)); si_pm4_set_reg(pm4, R_030968_VGT_INSTANCE_BASE_ID, 0); + si_pm4_set_reg(pm4, R_0301EC_CP_COHER_START_DELAY, + sctx->chip_class >= GFX10 ? 0x20 : 0); + } + + if (sctx->chip_class >= GFX10) { + si_pm4_set_reg(pm4, R_00B0C8_SPI_SHADER_USER_ACCUM_PS_0, 0); + si_pm4_set_reg(pm4, R_00B0CC_SPI_SHADER_USER_ACCUM_PS_1, 0); + si_pm4_set_reg(pm4, R_00B0D0_SPI_SHADER_USER_ACCUM_PS_2, 0); + si_pm4_set_reg(pm4, R_00B0D4_SPI_SHADER_USER_ACCUM_PS_3, 0); + si_pm4_set_reg(pm4, R_00B1C8_SPI_SHADER_USER_ACCUM_VS_0, 0); + si_pm4_set_reg(pm4, R_00B1CC_SPI_SHADER_USER_ACCUM_VS_1, 0); + si_pm4_set_reg(pm4, R_00B1D0_SPI_SHADER_USER_ACCUM_VS_2, 0); + si_pm4_set_reg(pm4, R_00B1D4_SPI_SHADER_USER_ACCUM_VS_3, 0); + si_pm4_set_reg(pm4, R_00B2C8_SPI_SHADER_USER_ACCUM_ESGS_0, 0); + si_pm4_set_reg(pm4, R_00B2CC_SPI_SHADER_USER_ACCUM_ESGS_1, 0); + si_pm4_set_reg(pm4, R_00B2D0_SPI_SHADER_USER_ACCUM_ESGS_2, 0); + si_pm4_set_reg(pm4, R_00B2D4_SPI_SHADER_USER_ACCUM_ESGS_3, 0); + si_pm4_set_reg(pm4, R_00B4C8_SPI_SHADER_USER_ACCUM_LSHS_0, 0); + si_pm4_set_reg(pm4, R_00B4CC_SPI_SHADER_USER_ACCUM_LSHS_1, 0); + si_pm4_set_reg(pm4, R_00B4D0_SPI_SHADER_USER_ACCUM_LSHS_2, 0); + si_pm4_set_reg(pm4, R_00B4D4_SPI_SHADER_USER_ACCUM_LSHS_3, 0); } sctx->cs_preamble_state = pm4; -- 2.30.2