ac,radeonsi: start adding support for gfx10.3
authorMarek Olšák <marek.olsak@amd.com>
Fri, 27 Mar 2020 02:02:13 +0000 (22:02 -0400)
committerMarge Bot <eric+marge@anholt.net>
Tue, 9 Jun 2020 16:17:36 +0000 (16:17 +0000)
Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5383>

src/amd/common/ac_gpu_info.c
src/amd/common/ac_surface.c
src/amd/common/amd_family.h
src/amd/registers/gfx10.json
src/gallium/drivers/radeonsi/si_perfcounter.c
src/gallium/drivers/radeonsi/si_pipe.c
src/gallium/drivers/radeonsi/si_state.c
src/gallium/drivers/radeonsi/si_state_shaders.c

index a8a43fdc8ee1493848924a34a85976541739cf3f..517de226bd9f75e7e064481df0054d2cae0a5cb6 100644 (file)
@@ -582,7 +582,8 @@ bool ac_query_gpu_info(int fd, void *dev_p,
                                info->family == CHIP_VEGA12 ||
                                info->family == CHIP_RAVEN ||
                                info->family == CHIP_RAVEN2 ||
                                info->family == CHIP_VEGA12 ||
                                info->family == CHIP_RAVEN ||
                                info->family == CHIP_RAVEN2 ||
-                               info->family == CHIP_RENOIR);
+                               info->family == CHIP_RENOIR ||
+                               info->chip_class >= GFX10_3);
 
        info->has_out_of_order_rast = info->chip_class >= GFX8 &&
                                      info->chip_class <= GFX9 &&
 
        info->has_out_of_order_rast = info->chip_class >= GFX8 &&
                                      info->chip_class <= GFX9 &&
@@ -736,7 +737,9 @@ bool ac_query_gpu_info(int fd, void *dev_p,
        if (info->chip_class >= GFX10)
                info->num_sdp_interfaces = device_info.num_tcc_blocks;
 
        if (info->chip_class >= GFX10)
                info->num_sdp_interfaces = device_info.num_tcc_blocks;
 
-       if (info->chip_class >= GFX10)
+       if (info->chip_class >= GFX10_3)
+               info->max_wave64_per_simd = 16;
+       else if (info->chip_class == GFX10)
                info->max_wave64_per_simd = 20;
        else if (info->family >= CHIP_POLARIS10 && info->family <= CHIP_VEGAM)
                info->max_wave64_per_simd = 8;
                info->max_wave64_per_simd = 20;
        else if (info->family >= CHIP_POLARIS10 && info->family <= CHIP_VEGAM)
                info->max_wave64_per_simd = 8;
index cbbd86093e790aa372e96970ab039644283bc9a8..d7dd9561f6f3203f4e5f7189600ffd2bc1bccdfc 100644 (file)
@@ -2127,6 +2127,7 @@ bool ac_surface_set_umd_metadata(const struct radeon_info *info,
          break;
 
       case GFX10:
          break;
 
       case GFX10:
+      case GFX10_3:
          surf->dcc_offset =
             ((uint64_t)G_00A018_META_DATA_ADDRESS_LO(desc[6]) << 8) | ((uint64_t)desc[7] << 16);
          surf->u.gfx9.dcc.pipe_aligned = G_00A018_META_PIPE_ALIGNED(desc[6]);
          surf->dcc_offset =
             ((uint64_t)G_00A018_META_DATA_ADDRESS_LO(desc[6]) << 8) | ((uint64_t)desc[7] << 16);
          surf->u.gfx9.dcc.pipe_aligned = G_00A018_META_PIPE_ALIGNED(desc[6]);
@@ -2169,6 +2170,7 @@ void ac_surface_get_umd_metadata(const struct radeon_info *info,
       desc[5] |= S_008F24_META_DATA_ADDRESS(surf->dcc_offset >> 40);
       break;
    case GFX10:
       desc[5] |= S_008F24_META_DATA_ADDRESS(surf->dcc_offset >> 40);
       break;
    case GFX10:
+   case GFX10_3:
       desc[6] &= C_00A018_META_DATA_ADDRESS_LO;
       desc[6] |= S_00A018_META_DATA_ADDRESS_LO(surf->dcc_offset >> 8);
       desc[7] = surf->dcc_offset >> 16;
       desc[6] &= C_00A018_META_DATA_ADDRESS_LO;
       desc[6] |= S_00A018_META_DATA_ADDRESS_LO(surf->dcc_offset >> 8);
       desc[7] = surf->dcc_offset >> 16;
index ffcc1bd924096aef5a5ee85118c001f19feacbb5..8262a3a40b702526a489daffef331033969aacdb 100644 (file)
@@ -119,6 +119,7 @@ enum chip_class {
     GFX8,
     GFX9,
     GFX10,
     GFX8,
     GFX9,
     GFX10,
+    GFX10_3,
 };
 
 enum ring_type {
 };
 
 enum ring_type {
index 08f111c87c7d548252620601fa4e5cc3c41dffdf..5c2b251ca4368f94f72b3cc69ba995659fbd2521 100644 (file)
    "name": "SX_PERFCOUNTER3_SELECT",
    "type_ref": "SX_PERFCOUNTER0_SELECT"
   },
    "name": "SX_PERFCOUNTER3_SELECT",
    "type_ref": "SX_PERFCOUNTER0_SELECT"
   },
+  {
+   "chips": ["gfx10"],
+   "map": {"at": 165712, "to": "mm"},
+   "name": "SX_PS_DOWNCONVERT_CONTROL_GFX103",
+   "type_ref": "SX_PS_DOWNCONVERT_CONTROL"
+  },
   {
    "chips": ["gfx10"],
    "map": {"at": 165716, "to": "mm"},
   {
    "chips": ["gfx10"],
    "map": {"at": 165716, "to": "mm"},
     {"bits": [14, 17], "name": "LOSSY_ALPHA_PRECISION"},
     {"bits": [18, 18], "name": "DISABLE_CONSTANT_ENCODE_REG"},
     {"bits": [19, 19], "name": "ENABLE_CONSTANT_ENCODE_REG_WRITE"},
     {"bits": [14, 17], "name": "LOSSY_ALPHA_PRECISION"},
     {"bits": [18, 18], "name": "DISABLE_CONSTANT_ENCODE_REG"},
     {"bits": [19, 19], "name": "ENABLE_CONSTANT_ENCODE_REG_WRITE"},
-    {"bits": [20, 20], "name": "INDEPENDENT_128B_BLOCKS"}
+    {"bits": [20, 20], "name": "INDEPENDENT_128B_BLOCKS"},
+    {"bits": [21, 21], "name": "SKIP_LOW_COMP_RATIO_GFX103"},
+    {"bits": [22, 22], "name": "DCC_COMPRESS_DISABLE_GFX103"}
    ]
   },
   "CB_COLOR0_INFO": {
    ]
   },
   "CB_COLOR0_INFO": {
     {"bits": [21, 21], "name": "PRESERVE_ZRANGE"},
     {"bits": [22, 22], "name": "PRESERVE_SRESULTS"},
     {"bits": [23, 23], "name": "DISABLE_FAST_PASS"},
     {"bits": [21, 21], "name": "PRESERVE_ZRANGE"},
     {"bits": [22, 22], "name": "PRESERVE_SRESULTS"},
     {"bits": [23, 23], "name": "DISABLE_FAST_PASS"},
-    {"bits": [25, 25], "name": "ALLOW_PARTIAL_RES_HIER_KILL"}
+    {"bits": [25, 25], "name": "ALLOW_PARTIAL_RES_HIER_KILL"},
+    {"bits": [27, 28], "name": "CENTROID_COMPUTATION_MODE_GFX103"}
    ]
   },
   "DB_RMI_L2_CACHE_CONTROL": {
    ]
   },
   "DB_RMI_L2_CACHE_CONTROL": {
   "PA_CL_NGG_CNTL": {
    "fields": [
     {"bits": [0, 0], "name": "VERTEX_REUSE_OFF"},
   "PA_CL_NGG_CNTL": {
    "fields": [
     {"bits": [0, 0], "name": "VERTEX_REUSE_OFF"},
-    {"bits": [1, 1], "name": "INDEX_BUF_EDGE_FLAG_ENA"}
+    {"bits": [1, 1], "name": "INDEX_BUF_EDGE_FLAG_ENA"},
+    {"bits": [2, 9], "name": "VERTEX_REUSE_DEPTH_GFX103"}
    ]
   },
   "PA_CL_OBJPRIM_ID_CNTL": {
    ]
   },
   "PA_CL_OBJPRIM_ID_CNTL": {
     {"bits": [23, 23], "name": "VS_OUT_CCDIST1_VEC_ENA"},
     {"bits": [24, 24], "name": "VS_OUT_MISC_SIDE_BUS_ENA"},
     {"bits": [25, 25], "name": "USE_VTX_GS_CUT_FLAG"},
     {"bits": [23, 23], "name": "VS_OUT_CCDIST1_VEC_ENA"},
     {"bits": [24, 24], "name": "VS_OUT_MISC_SIDE_BUS_ENA"},
     {"bits": [25, 25], "name": "USE_VTX_GS_CUT_FLAG"},
-    {"bits": [26, 26], "name": "USE_VTX_SHD_OBJPRIM_ID"},
-    {"bits": [27, 27], "name": "USE_VTX_LINE_WIDTH"}
+    {"bits": [27, 27], "name": "USE_VTX_LINE_WIDTH"},
+    {"bits": [29, 29], "name": "BYPASS_VTX_RATE_COMBINER_GFX103"},
+    {"bits": [30, 30], "name": "BYPASS_PRIM_RATE_COMBINER_GFX103"}
    ]
   },
   "PA_CL_VTE_CNTL": {
    ]
   },
   "PA_CL_VTE_CNTL": {
     {"bits": [13, 16], "name": "MAX_SAMPLE_DIST"},
     {"bits": [20, 22], "name": "MSAA_EXPOSED_SAMPLES"},
     {"bits": [24, 25], "name": "DETAIL_TO_EXPOSED_MODE"},
     {"bits": [13, 16], "name": "MAX_SAMPLE_DIST"},
     {"bits": [20, 22], "name": "MSAA_EXPOSED_SAMPLES"},
     {"bits": [24, 25], "name": "DETAIL_TO_EXPOSED_MODE"},
-    {"bits": [26, 27], "enum_ref": "CovToShaderSel", "name": "COVERAGE_TO_SHADER_SELECT"}
+    {"bits": [26, 27], "enum_ref": "CovToShaderSel", "name": "COVERAGE_TO_SHADER_SELECT"},
+    {"bits": [28, 28], "name": "SAMPLE_COVERAGE_ENCODING_GFX103"},
+    {"bits": [29, 29], "name": "COVERED_CENTROID_IS_CENTER_GFX103"}
    ]
   },
   "PA_SC_AA_MASK_X0Y0_X1Y0": {
    ]
   },
   "PA_SC_AA_MASK_X0Y0_X1Y0": {
     {"bits": [10, 19], "name": "PERFCOUNTER_SELECT3"}
    ]
   },
     {"bits": [10, 19], "name": "PERFCOUNTER_SELECT3"}
    ]
   },
+  "SX_PS_DOWNCONVERT_CONTROL": {
+   "fields": [
+    {"bits": [0, 0], "name": "MRT0_FMT_MAPPING_DISABLE"},
+    {"bits": [1, 1], "name": "MRT1_FMT_MAPPING_DISABLE"},
+    {"bits": [2, 2], "name": "MRT2_FMT_MAPPING_DISABLE"},
+    {"bits": [3, 3], "name": "MRT3_FMT_MAPPING_DISABLE"},
+    {"bits": [4, 4], "name": "MRT4_FMT_MAPPING_DISABLE"},
+    {"bits": [5, 5], "name": "MRT5_FMT_MAPPING_DISABLE"},
+    {"bits": [6, 6], "name": "MRT6_FMT_MAPPING_DISABLE"},
+    {"bits": [7, 7], "name": "MRT7_FMT_MAPPING_DISABLE"}
+   ]
+  },
   "SX_PS_DOWNCONVERT": {
    "fields": [
     {"bits": [0, 3], "enum_ref": "SX_DOWNCONVERT_FORMAT", "name": "MRT0"},
   "SX_PS_DOWNCONVERT": {
    "fields": [
     {"bits": [0, 3], "enum_ref": "SX_DOWNCONVERT_FORMAT", "name": "MRT0"},
   "VGT_HS_OFFCHIP_PARAM_UMD": {
    "fields": [
     {"bits": [0, 8], "name": "OFFCHIP_BUFFERING"},
   "VGT_HS_OFFCHIP_PARAM_UMD": {
    "fields": [
     {"bits": [0, 8], "name": "OFFCHIP_BUFFERING"},
-    {"bits": [9, 10], "name": "OFFCHIP_GRANULARITY"}
+    {"bits": [9, 10], "name": "OFFCHIP_GRANULARITY"},
+    {"bits": [0, 9], "name": "OFFCHIP_BUFFERING_GFX103"},
+    {"bits": [10, 11], "name": "OFFCHIP_GRANULARITY_GFX103"}
    ]
   },
   "VGT_INSTANCE_BASE_ID": {
    ]
   },
   "VGT_INSTANCE_BASE_ID": {
index d6b3fc85767679b1c456dc51c3a8ec3607253db5..8825926064dafee2a612a15c4acff3926c99f30b 100644 (file)
@@ -1438,6 +1438,7 @@ void si_init_perfcounters(struct si_screen *screen)
       num_blocks = ARRAY_SIZE(groups_gfx9);
       break;
    case GFX10:
       num_blocks = ARRAY_SIZE(groups_gfx9);
       break;
    case GFX10:
+   case GFX10_3:
       blocks = groups_gfx10;
       num_blocks = ARRAY_SIZE(groups_gfx10);
       break;
       blocks = groups_gfx10;
       num_blocks = ARRAY_SIZE(groups_gfx10);
       break;
index 1d14442b445b68d04155db31ee21b794c4f3b3ca..7fdbfa24c574ab6b70f1f6a27e4bcd6eed6f38c2 100644 (file)
@@ -1088,7 +1088,11 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws,
    sscreen->tess_factor_ring_size = 32768 * sscreen->info.max_se;
    sscreen->tess_offchip_ring_size = max_offchip_buffers * sscreen->tess_offchip_block_dw_size * 4;
 
    sscreen->tess_factor_ring_size = 32768 * sscreen->info.max_se;
    sscreen->tess_offchip_ring_size = max_offchip_buffers * sscreen->tess_offchip_block_dw_size * 4;
 
-   if (sscreen->info.chip_class >= GFX7) {
+   if (sscreen->info.chip_class >= GFX10_3) {
+      sscreen->vgt_hs_offchip_param =
+            S_03093C_OFFCHIP_BUFFERING_GFX103(max_offchip_buffers - 1) |
+            S_03093C_OFFCHIP_GRANULARITY_GFX103(offchip_granularity);
+   } else if (sscreen->info.chip_class >= GFX7) {
       if (sscreen->info.chip_class >= GFX8)
          --max_offchip_buffers;
       sscreen->vgt_hs_offchip_param = S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) |
       if (sscreen->info.chip_class >= GFX8)
          --max_offchip_buffers;
       sscreen->vgt_hs_offchip_param = S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) |
@@ -1125,7 +1129,7 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws,
    /* Only enable primitive binning on APUs by default. */
    if (sscreen->info.chip_class >= GFX10) {
       sscreen->dpbb_allowed = true;
    /* Only enable primitive binning on APUs by default. */
    if (sscreen->info.chip_class >= GFX10) {
       sscreen->dpbb_allowed = true;
-      sscreen->dfsm_allowed = !sscreen->info.has_dedicated_vram;
+      /* DFSM is not supported on GFX 10.3 and not beneficial on Navi1x. */
    } else if (sscreen->info.chip_class == GFX9) {
       sscreen->dpbb_allowed = !sscreen->info.has_dedicated_vram;
       sscreen->dfsm_allowed = !sscreen->info.has_dedicated_vram;
    } else if (sscreen->info.chip_class == GFX9) {
       sscreen->dpbb_allowed = !sscreen->info.has_dedicated_vram;
       sscreen->dfsm_allowed = !sscreen->info.has_dedicated_vram;
index ecce673caf295bdd1d2229fd2383f990cc23f197..b59f28e028d06f43e0bc08d9ac98ccc504fa0edb 100644 (file)
@@ -757,8 +757,9 @@ static void si_emit_clip_regs(struct si_context *sctx)
 
    unsigned initial_cdw = sctx->gfx_cs->current.cdw;
    unsigned pa_cl_cntl = S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0F) != 0) |
 
    unsigned initial_cdw = sctx->gfx_cs->current.cdw;
    unsigned pa_cl_cntl = S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0F) != 0) |
-                         S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xF0) != 0) | clipdist_mask |
-                         (culldist_mask << 8);
+                         S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xF0) != 0) |
+                         S_02881C_BYPASS_PRIM_RATE_COMBINER_GFX103(sctx->chip_class >= GFX10_3) |
+                         clipdist_mask | (culldist_mask << 8);
 
    if (sctx->chip_class >= GFX10) {
       radeon_opt_set_context_reg_rmw(sctx, R_02881C_PA_CL_VS_OUT_CNTL,
 
    if (sctx->chip_class >= GFX10) {
       radeon_opt_set_context_reg_rmw(sctx, R_02881C_PA_CL_VS_OUT_CNTL,
@@ -1384,8 +1385,9 @@ static void si_emit_db_render_state(struct si_context *sctx)
    radeon_opt_set_context_reg(
       sctx, R_028010_DB_RENDER_OVERRIDE2, SI_TRACKED_DB_RENDER_OVERRIDE2,
       S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(sctx->db_depth_disable_expclear) |
    radeon_opt_set_context_reg(
       sctx, R_028010_DB_RENDER_OVERRIDE2, SI_TRACKED_DB_RENDER_OVERRIDE2,
       S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(sctx->db_depth_disable_expclear) |
-         S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(sctx->db_stencil_disable_expclear) |
-         S_028010_DECOMPRESS_Z_ON_FLUSH(sctx->framebuffer.nr_samples >= 4));
+      S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(sctx->db_stencil_disable_expclear) |
+      S_028010_DECOMPRESS_Z_ON_FLUSH(sctx->framebuffer.nr_samples >= 4) |
+      S_028010_CENTROID_COMPUTATION_MODE_GFX103(sctx->chip_class >= GFX10_3 ? 2 : 0));
 
    db_shader_control = sctx->ps_db_shader_control;
 
 
    db_shader_control = sctx->ps_db_shader_control;
 
@@ -3535,7 +3537,8 @@ static void si_emit_msaa_config(struct si_context *sctx)
       sc_line_cntl |= S_028BDC_EXPAND_LINE_WIDTH(1);
       sc_aa_config = S_028BE0_MSAA_NUM_SAMPLES(log_samples) |
                      S_028BE0_MAX_SAMPLE_DIST(max_dist[log_samples]) |
       sc_line_cntl |= S_028BDC_EXPAND_LINE_WIDTH(1);
       sc_aa_config = S_028BE0_MSAA_NUM_SAMPLES(log_samples) |
                      S_028BE0_MAX_SAMPLE_DIST(max_dist[log_samples]) |
-                     S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples);
+                     S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples) |
+                     S_028BE0_COVERED_CENTROID_IS_CENTER_GFX103(sctx->chip_class >= GFX10_3);
 
       if (sctx->framebuffer.nr_samples > 1) {
          db_eqaa |= S_028804_MAX_ANCHOR_SAMPLES(log_z_samples) |
 
       if (sctx->framebuffer.nr_samples > 1) {
          db_eqaa |= S_028804_MAX_ANCHOR_SAMPLES(log_z_samples) |
@@ -5329,6 +5332,7 @@ static void si_init_config(struct si_context *sctx)
        * a single primitive shader subgroup.
        */
       si_pm4_set_reg(pm4, R_028C50_PA_SC_NGG_MODE_CNTL, S_028C50_MAX_DEALLOCS_IN_WAVE(512));
        * a single primitive shader subgroup.
        */
       si_pm4_set_reg(pm4, R_028C50_PA_SC_NGG_MODE_CNTL, S_028C50_MAX_DEALLOCS_IN_WAVE(512));
+      /* Reuse for legacy (non-NGG) only. */
       si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14);
 
       if (!has_clear_state) {
       si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14);
 
       if (!has_clear_state) {
@@ -5370,6 +5374,9 @@ static void si_init_config(struct si_context *sctx)
                      S_00B0C0_SOFT_GROUPING_EN(1) | S_00B0C0_NUMBER_OF_REQUESTS_PER_CU(4 - 1));
       si_pm4_set_reg(pm4, R_00B1C0_SPI_SHADER_REQ_CTRL_VS, 0);
    }
                      S_00B0C0_SOFT_GROUPING_EN(1) | S_00B0C0_NUMBER_OF_REQUESTS_PER_CU(4 - 1));
       si_pm4_set_reg(pm4, R_00B1C0_SPI_SHADER_REQ_CTRL_VS, 0);
    }
+   if (sctx->chip_class >= GFX10_3) {
+      si_pm4_set_reg(pm4, R_028750_SX_PS_DOWNCONVERT_CONTROL_GFX103, 0xff);
+   }
 
    if (sctx->chip_class >= GFX9) {
       si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION,
 
    if (sctx->chip_class >= GFX9) {
       si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION,
index 0fd1714f8f83ebcdca22586f4d64e1d0d3a2b59d..520eeada9e9bfd32ed320c3f23ee172339c64ac2 100644 (file)
@@ -1217,7 +1217,9 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
     * this.
     */
    shader->ctx_reg.ngg.pa_cl_ngg_cntl =
     * this.
     */
    shader->ctx_reg.ngg.pa_cl_ngg_cntl =
-      S_028838_INDEX_BUF_EDGE_FLAG_ENA(gs_type == PIPE_SHADER_VERTEX);
+      S_028838_INDEX_BUF_EDGE_FLAG_ENA(gs_type == PIPE_SHADER_VERTEX) |
+      /* Reuse for NGG. */
+      S_028838_VERTEX_REUSE_DEPTH_GFX103(sscreen->info.chip_class >= GFX10_3 ? 30 : 0);
    shader->pa_cl_vs_out_cntl = si_get_vs_out_cntl(gs_sel, true);
 
    /* Oversubscribe PC. This improves performance when there are too many varyings. */
    shader->pa_cl_vs_out_cntl = si_get_vs_out_cntl(gs_sel, true);
 
    /* Oversubscribe PC. This improves performance when there are too many varyings. */