From a23802bcb9a42a02d34a5a36d6e66d6532813a0d Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 26 Mar 2020 22:02:13 -0400 Subject: [PATCH] ac,radeonsi: start adding support for gfx10.3 Acked-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/amd/common/ac_gpu_info.c | 7 +++- src/amd/common/ac_surface.c | 2 + src/amd/common/amd_family.h | 1 + src/amd/registers/gfx10.json | 41 +++++++++++++++---- src/gallium/drivers/radeonsi/si_perfcounter.c | 1 + src/gallium/drivers/radeonsi/si_pipe.c | 8 +++- src/gallium/drivers/radeonsi/si_state.c | 17 +++++--- .../drivers/radeonsi/si_state_shaders.c | 4 +- 8 files changed, 64 insertions(+), 17 deletions(-) diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c index a8a43fdc8ee..517de226bd9 100644 --- a/src/amd/common/ac_gpu_info.c +++ b/src/amd/common/ac_gpu_info.c @@ -582,7 +582,8 @@ bool ac_query_gpu_info(int fd, void *dev_p, info->family == CHIP_VEGA12 || info->family == CHIP_RAVEN || info->family == CHIP_RAVEN2 || - info->family == CHIP_RENOIR); + info->family == CHIP_RENOIR || + info->chip_class >= GFX10_3); info->has_out_of_order_rast = info->chip_class >= GFX8 && info->chip_class <= GFX9 && @@ -736,7 +737,9 @@ bool ac_query_gpu_info(int fd, void *dev_p, if (info->chip_class >= GFX10) info->num_sdp_interfaces = device_info.num_tcc_blocks; - if (info->chip_class >= GFX10) + if (info->chip_class >= GFX10_3) + info->max_wave64_per_simd = 16; + else if (info->chip_class == GFX10) info->max_wave64_per_simd = 20; else if (info->family >= CHIP_POLARIS10 && info->family <= CHIP_VEGAM) info->max_wave64_per_simd = 8; diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c index cbbd86093e7..d7dd9561f6f 100644 --- a/src/amd/common/ac_surface.c +++ b/src/amd/common/ac_surface.c @@ -2127,6 +2127,7 @@ bool ac_surface_set_umd_metadata(const struct radeon_info *info, break; case GFX10: + case GFX10_3: surf->dcc_offset = ((uint64_t)G_00A018_META_DATA_ADDRESS_LO(desc[6]) << 8) | ((uint64_t)desc[7] << 16); surf->u.gfx9.dcc.pipe_aligned = G_00A018_META_PIPE_ALIGNED(desc[6]); @@ -2169,6 +2170,7 @@ void ac_surface_get_umd_metadata(const struct radeon_info *info, desc[5] |= S_008F24_META_DATA_ADDRESS(surf->dcc_offset >> 40); break; case GFX10: + case GFX10_3: desc[6] &= C_00A018_META_DATA_ADDRESS_LO; desc[6] |= S_00A018_META_DATA_ADDRESS_LO(surf->dcc_offset >> 8); desc[7] = surf->dcc_offset >> 16; diff --git a/src/amd/common/amd_family.h b/src/amd/common/amd_family.h index ffcc1bd9240..8262a3a40b7 100644 --- a/src/amd/common/amd_family.h +++ b/src/amd/common/amd_family.h @@ -119,6 +119,7 @@ enum chip_class { GFX8, GFX9, GFX10, + GFX10_3, }; enum ring_type { diff --git a/src/amd/registers/gfx10.json b/src/amd/registers/gfx10.json index 08f111c87c7..5c2b251ca43 100644 --- a/src/amd/registers/gfx10.json +++ b/src/amd/registers/gfx10.json @@ -16370,6 +16370,12 @@ "name": "SX_PERFCOUNTER3_SELECT", "type_ref": "SX_PERFCOUNTER0_SELECT" }, + { + "chips": ["gfx10"], + "map": {"at": 165712, "to": "mm"}, + "name": "SX_PS_DOWNCONVERT_CONTROL_GFX103", + "type_ref": "SX_PS_DOWNCONVERT_CONTROL" + }, { "chips": ["gfx10"], "map": {"at": 165716, "to": "mm"}, @@ -17248,7 +17254,9 @@ {"bits": [14, 17], "name": "LOSSY_ALPHA_PRECISION"}, {"bits": [18, 18], "name": "DISABLE_CONSTANT_ENCODE_REG"}, {"bits": [19, 19], "name": "ENABLE_CONSTANT_ENCODE_REG_WRITE"}, - {"bits": [20, 20], "name": "INDEPENDENT_128B_BLOCKS"} + {"bits": [20, 20], "name": "INDEPENDENT_128B_BLOCKS"}, + {"bits": [21, 21], "name": "SKIP_LOW_COMP_RATIO_GFX103"}, + {"bits": [22, 22], "name": "DCC_COMPRESS_DISABLE_GFX103"} ] }, "CB_COLOR0_INFO": { @@ -18715,7 +18723,8 @@ {"bits": [21, 21], "name": "PRESERVE_ZRANGE"}, {"bits": [22, 22], "name": "PRESERVE_SRESULTS"}, {"bits": [23, 23], "name": "DISABLE_FAST_PASS"}, - {"bits": [25, 25], "name": "ALLOW_PARTIAL_RES_HIER_KILL"} + {"bits": [25, 25], "name": "ALLOW_PARTIAL_RES_HIER_KILL"}, + {"bits": [27, 28], "name": "CENTROID_COMPUTATION_MODE_GFX103"} ] }, "DB_RMI_L2_CACHE_CONTROL": { @@ -19426,7 +19435,8 @@ "PA_CL_NGG_CNTL": { "fields": [ {"bits": [0, 0], "name": "VERTEX_REUSE_OFF"}, - {"bits": [1, 1], "name": "INDEX_BUF_EDGE_FLAG_ENA"} + {"bits": [1, 1], "name": "INDEX_BUF_EDGE_FLAG_ENA"}, + {"bits": [2, 9], "name": "VERTEX_REUSE_DEPTH_GFX103"} ] }, "PA_CL_OBJPRIM_ID_CNTL": { @@ -19493,8 +19503,9 @@ {"bits": [23, 23], "name": "VS_OUT_CCDIST1_VEC_ENA"}, {"bits": [24, 24], "name": "VS_OUT_MISC_SIDE_BUS_ENA"}, {"bits": [25, 25], "name": "USE_VTX_GS_CUT_FLAG"}, - {"bits": [26, 26], "name": "USE_VTX_SHD_OBJPRIM_ID"}, - {"bits": [27, 27], "name": "USE_VTX_LINE_WIDTH"} + {"bits": [27, 27], "name": "USE_VTX_LINE_WIDTH"}, + {"bits": [29, 29], "name": "BYPASS_VTX_RATE_COMBINER_GFX103"}, + {"bits": [30, 30], "name": "BYPASS_PRIM_RATE_COMBINER_GFX103"} ] }, "PA_CL_VTE_CNTL": { @@ -19540,7 +19551,9 @@ {"bits": [13, 16], "name": "MAX_SAMPLE_DIST"}, {"bits": [20, 22], "name": "MSAA_EXPOSED_SAMPLES"}, {"bits": [24, 25], "name": "DETAIL_TO_EXPOSED_MODE"}, - {"bits": [26, 27], "enum_ref": "CovToShaderSel", "name": "COVERAGE_TO_SHADER_SELECT"} + {"bits": [26, 27], "enum_ref": "CovToShaderSel", "name": "COVERAGE_TO_SHADER_SELECT"}, + {"bits": [28, 28], "name": "SAMPLE_COVERAGE_ENCODING_GFX103"}, + {"bits": [29, 29], "name": "COVERED_CENTROID_IS_CENTER_GFX103"} ] }, "PA_SC_AA_MASK_X0Y0_X1Y0": { @@ -21581,6 +21594,18 @@ {"bits": [10, 19], "name": "PERFCOUNTER_SELECT3"} ] }, + "SX_PS_DOWNCONVERT_CONTROL": { + "fields": [ + {"bits": [0, 0], "name": "MRT0_FMT_MAPPING_DISABLE"}, + {"bits": [1, 1], "name": "MRT1_FMT_MAPPING_DISABLE"}, + {"bits": [2, 2], "name": "MRT2_FMT_MAPPING_DISABLE"}, + {"bits": [3, 3], "name": "MRT3_FMT_MAPPING_DISABLE"}, + {"bits": [4, 4], "name": "MRT4_FMT_MAPPING_DISABLE"}, + {"bits": [5, 5], "name": "MRT5_FMT_MAPPING_DISABLE"}, + {"bits": [6, 6], "name": "MRT6_FMT_MAPPING_DISABLE"}, + {"bits": [7, 7], "name": "MRT7_FMT_MAPPING_DISABLE"} + ] + }, "SX_PS_DOWNCONVERT": { "fields": [ {"bits": [0, 3], "enum_ref": "SX_DOWNCONVERT_FORMAT", "name": "MRT0"}, @@ -21836,7 +21861,9 @@ "VGT_HS_OFFCHIP_PARAM_UMD": { "fields": [ {"bits": [0, 8], "name": "OFFCHIP_BUFFERING"}, - {"bits": [9, 10], "name": "OFFCHIP_GRANULARITY"} + {"bits": [9, 10], "name": "OFFCHIP_GRANULARITY"}, + {"bits": [0, 9], "name": "OFFCHIP_BUFFERING_GFX103"}, + {"bits": [10, 11], "name": "OFFCHIP_GRANULARITY_GFX103"} ] }, "VGT_INSTANCE_BASE_ID": { diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c b/src/gallium/drivers/radeonsi/si_perfcounter.c index d6b3fc85767..8825926064d 100644 --- a/src/gallium/drivers/radeonsi/si_perfcounter.c +++ b/src/gallium/drivers/radeonsi/si_perfcounter.c @@ -1438,6 +1438,7 @@ void si_init_perfcounters(struct si_screen *screen) num_blocks = ARRAY_SIZE(groups_gfx9); break; case GFX10: + case GFX10_3: blocks = groups_gfx10; num_blocks = ARRAY_SIZE(groups_gfx10); break; diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 1d14442b445..7fdbfa24c57 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -1088,7 +1088,11 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws, sscreen->tess_factor_ring_size = 32768 * sscreen->info.max_se; sscreen->tess_offchip_ring_size = max_offchip_buffers * sscreen->tess_offchip_block_dw_size * 4; - if (sscreen->info.chip_class >= GFX7) { + if (sscreen->info.chip_class >= GFX10_3) { + sscreen->vgt_hs_offchip_param = + S_03093C_OFFCHIP_BUFFERING_GFX103(max_offchip_buffers - 1) | + S_03093C_OFFCHIP_GRANULARITY_GFX103(offchip_granularity); + } else if (sscreen->info.chip_class >= GFX7) { if (sscreen->info.chip_class >= GFX8) --max_offchip_buffers; sscreen->vgt_hs_offchip_param = S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) | @@ -1125,7 +1129,7 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws, /* Only enable primitive binning on APUs by default. */ if (sscreen->info.chip_class >= GFX10) { sscreen->dpbb_allowed = true; - sscreen->dfsm_allowed = !sscreen->info.has_dedicated_vram; + /* DFSM is not supported on GFX 10.3 and not beneficial on Navi1x. */ } else if (sscreen->info.chip_class == GFX9) { sscreen->dpbb_allowed = !sscreen->info.has_dedicated_vram; sscreen->dfsm_allowed = !sscreen->info.has_dedicated_vram; diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index ecce673caf2..b59f28e028d 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -757,8 +757,9 @@ static void si_emit_clip_regs(struct si_context *sctx) unsigned initial_cdw = sctx->gfx_cs->current.cdw; unsigned pa_cl_cntl = S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0F) != 0) | - S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xF0) != 0) | clipdist_mask | - (culldist_mask << 8); + S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xF0) != 0) | + S_02881C_BYPASS_PRIM_RATE_COMBINER_GFX103(sctx->chip_class >= GFX10_3) | + clipdist_mask | (culldist_mask << 8); if (sctx->chip_class >= GFX10) { radeon_opt_set_context_reg_rmw(sctx, R_02881C_PA_CL_VS_OUT_CNTL, @@ -1384,8 +1385,9 @@ static void si_emit_db_render_state(struct si_context *sctx) radeon_opt_set_context_reg( sctx, R_028010_DB_RENDER_OVERRIDE2, SI_TRACKED_DB_RENDER_OVERRIDE2, S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(sctx->db_depth_disable_expclear) | - S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(sctx->db_stencil_disable_expclear) | - S_028010_DECOMPRESS_Z_ON_FLUSH(sctx->framebuffer.nr_samples >= 4)); + S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(sctx->db_stencil_disable_expclear) | + S_028010_DECOMPRESS_Z_ON_FLUSH(sctx->framebuffer.nr_samples >= 4) | + S_028010_CENTROID_COMPUTATION_MODE_GFX103(sctx->chip_class >= GFX10_3 ? 2 : 0)); db_shader_control = sctx->ps_db_shader_control; @@ -3535,7 +3537,8 @@ static void si_emit_msaa_config(struct si_context *sctx) sc_line_cntl |= S_028BDC_EXPAND_LINE_WIDTH(1); sc_aa_config = S_028BE0_MSAA_NUM_SAMPLES(log_samples) | S_028BE0_MAX_SAMPLE_DIST(max_dist[log_samples]) | - S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples); + S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples) | + S_028BE0_COVERED_CENTROID_IS_CENTER_GFX103(sctx->chip_class >= GFX10_3); if (sctx->framebuffer.nr_samples > 1) { db_eqaa |= S_028804_MAX_ANCHOR_SAMPLES(log_z_samples) | @@ -5329,6 +5332,7 @@ static void si_init_config(struct si_context *sctx) * a single primitive shader subgroup. */ si_pm4_set_reg(pm4, R_028C50_PA_SC_NGG_MODE_CNTL, S_028C50_MAX_DEALLOCS_IN_WAVE(512)); + /* Reuse for legacy (non-NGG) only. */ si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14); if (!has_clear_state) { @@ -5370,6 +5374,9 @@ static void si_init_config(struct si_context *sctx) S_00B0C0_SOFT_GROUPING_EN(1) | S_00B0C0_NUMBER_OF_REQUESTS_PER_CU(4 - 1)); si_pm4_set_reg(pm4, R_00B1C0_SPI_SHADER_REQ_CTRL_VS, 0); } + if (sctx->chip_class >= GFX10_3) { + si_pm4_set_reg(pm4, R_028750_SX_PS_DOWNCONVERT_CONTROL_GFX103, 0xff); + } if (sctx->chip_class >= GFX9) { si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION, diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 0fd1714f8f8..520eeada9e9 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -1217,7 +1217,9 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader * this. */ shader->ctx_reg.ngg.pa_cl_ngg_cntl = - S_028838_INDEX_BUF_EDGE_FLAG_ENA(gs_type == PIPE_SHADER_VERTEX); + S_028838_INDEX_BUF_EDGE_FLAG_ENA(gs_type == PIPE_SHADER_VERTEX) | + /* Reuse for NGG. */ + S_028838_VERTEX_REUSE_DEPTH_GFX103(sscreen->info.chip_class >= GFX10_3 ? 30 : 0); shader->pa_cl_vs_out_cntl = si_get_vs_out_cntl(gs_sel, true); /* Oversubscribe PC. This improves performance when there are too many varyings. */ -- 2.30.2