radeonsi: move tessellation ring info into si_screen
authorMarek Olšák <marek.olsak@amd.com>
Sat, 3 Feb 2018 00:51:53 +0000 (01:51 +0100)
committerMarek Olšák <marek.olsak@amd.com>
Sat, 24 Feb 2018 22:08:28 +0000 (23:08 +0100)
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
src/gallium/drivers/radeonsi/si_pipe.c
src/gallium/drivers/radeonsi/si_pipe.h
src/gallium/drivers/radeonsi/si_state_shaders.c

index ec543f2c0eee5e1b72ce96bb3bab24855ec3c364..b3cfc83ac5fbd079ede962b37159bb3cd9b178c3 100644 (file)
@@ -749,11 +749,45 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws,
        if (!debug_get_bool_option("RADEON_DISABLE_PERFCOUNTERS", false))
                si_init_perfcounters(sscreen);
 
+       /* Determine tessellation ring info. */
+       bool double_offchip_buffers = sscreen->info.chip_class >= CIK &&
+                                     sscreen->info.family != CHIP_CARRIZO &&
+                                     sscreen->info.family != CHIP_STONEY;
+       /* This must be one less than the maximum number due to a hw limitation.
+        * Various hardware bugs in SI, CIK, and GFX9 need this.
+        */
+       unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 127 : 63;
+       unsigned max_offchip_buffers = max_offchip_buffers_per_se *
+                                      sscreen->info.max_se;
+       unsigned offchip_granularity;
+
        /* Hawaii has a bug with offchip buffers > 256 that can be worked
         * around by setting 4K granularity.
         */
-       sscreen->tess_offchip_block_dw_size =
-               sscreen->info.family == CHIP_HAWAII ? 4096 : 8192;
+       if (sscreen->info.family == CHIP_HAWAII) {
+               sscreen->tess_offchip_block_dw_size = 4096;
+               offchip_granularity = V_03093C_X_4K_DWORDS;
+       } else {
+               sscreen->tess_offchip_block_dw_size = 8192;
+               offchip_granularity = V_03093C_X_8K_DWORDS;
+       }
+
+       sscreen->tess_factor_ring_size = 32768 * sscreen->info.max_se;
+       assert(((sscreen->tess_factor_ring_size / 4) & C_030938_SIZE) == 0);
+       sscreen->tess_offchip_ring_size = max_offchip_buffers *
+                                         sscreen->tess_offchip_block_dw_size * 4;
+
+       if (sscreen->info.chip_class >= CIK) {
+               if (sscreen->info.chip_class >= VI)
+                       --max_offchip_buffers;
+               sscreen->vgt_hs_offchip_param =
+                       S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) |
+                       S_03093C_OFFCHIP_GRANULARITY(offchip_granularity);
+       } else {
+               assert(offchip_granularity == V_03093C_X_8K_DWORDS);
+               sscreen->vgt_hs_offchip_param =
+                       S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers);
+       }
 
        /* The mere presense of CLEAR_STATE in the IB causes random GPU hangs
         * on SI. */
index 3a959f98da5386374d455c41874a2955acac97fd..7b23e8c9d73a4980e9867f270ac4d03f567040d0 100644 (file)
@@ -104,6 +104,9 @@ struct si_screen {
 
        unsigned                        gs_table_depth;
        unsigned                        tess_offchip_block_dw_size;
+       unsigned                        tess_offchip_ring_size;
+       unsigned                        tess_factor_ring_size;
+       unsigned                        vgt_hs_offchip_param;
        bool                            has_clear_state;
        bool                            has_distributed_tess;
        bool                            has_draw_indirect_multi;
index 357c75aa370ce866aee2e7aa09f214d6eb40b03e..0d1dba15b8bd98631429b750c8b181288675518f 100644 (file)
@@ -2970,50 +2970,25 @@ static bool si_update_spi_tmpring_size(struct si_context *sctx)
 
 static void si_init_tess_factor_ring(struct si_context *sctx)
 {
-       bool double_offchip_buffers = sctx->b.chip_class >= CIK &&
-                                     sctx->b.family != CHIP_CARRIZO &&
-                                     sctx->b.family != CHIP_STONEY;
-       /* This must be one less than the maximum number due to a hw limitation.
-        * Various hardware bugs in SI, CIK, and GFX9 need this.
-        */
-       unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 127 : 63;
-       unsigned max_offchip_buffers = max_offchip_buffers_per_se *
-                                      sctx->screen->info.max_se;
-       unsigned offchip_granularity;
-
-       switch (sctx->screen->tess_offchip_block_dw_size) {
-       default:
-               assert(0);
-               /* fall through */
-       case 8192:
-               offchip_granularity = V_03093C_X_8K_DWORDS;
-               break;
-       case 4096:
-               offchip_granularity = V_03093C_X_4K_DWORDS;
-               break;
-       }
-
        assert(!sctx->tf_ring);
+
        /* Use 64K alignment for both rings, so that we can pass the address
         * to shaders as one SGPR containing bits [16:47].
         */
        sctx->tf_ring = si_aligned_buffer_create(sctx->b.b.screen,
-                                                  R600_RESOURCE_FLAG_UNMAPPABLE,
-                                                  PIPE_USAGE_DEFAULT,
-                                                  32768 * sctx->screen->info.max_se,
-                                                  64 * 1024);
+                                                R600_RESOURCE_FLAG_UNMAPPABLE,
+                                                PIPE_USAGE_DEFAULT,
+                                                sctx->screen->tess_factor_ring_size,
+                                                64 * 1024);
        if (!sctx->tf_ring)
                return;
 
-       assert(((sctx->tf_ring->width0 / 4) & C_030938_SIZE) == 0);
-
        sctx->tess_offchip_ring =
                si_aligned_buffer_create(sctx->b.b.screen,
-                                          R600_RESOURCE_FLAG_UNMAPPABLE,
-                                          PIPE_USAGE_DEFAULT,
-                                          max_offchip_buffers *
-                                          sctx->screen->tess_offchip_block_dw_size * 4,
-                                          64 * 1024);
+                                        R600_RESOURCE_FLAG_UNMAPPABLE,
+                                        PIPE_USAGE_DEFAULT,
+                                        sctx->screen->tess_offchip_ring_size,
+                                        64 * 1024);
        if (!sctx->tess_offchip_ring)
                return;
 
@@ -3031,27 +3006,22 @@ static void si_init_tess_factor_ring(struct si_context *sctx)
 
        /* Append these registers to the init config state. */
        if (sctx->b.chip_class >= CIK) {
-               if (sctx->b.chip_class >= VI)
-                       --max_offchip_buffers;
-
                si_pm4_set_reg(sctx->init_config, R_030938_VGT_TF_RING_SIZE,
-                              S_030938_SIZE(sctx->tf_ring->width0 / 4));
+                              S_030938_SIZE(sctx->screen->tess_factor_ring_size / 4));
                si_pm4_set_reg(sctx->init_config, R_030940_VGT_TF_MEMORY_BASE,
                               factor_va >> 8);
                if (sctx->b.chip_class >= GFX9)
                        si_pm4_set_reg(sctx->init_config, R_030944_VGT_TF_MEMORY_BASE_HI,
                                       factor_va >> 40);
                si_pm4_set_reg(sctx->init_config, R_03093C_VGT_HS_OFFCHIP_PARAM,
-                            S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) |
-                            S_03093C_OFFCHIP_GRANULARITY(offchip_granularity));
+                              sctx->screen->vgt_hs_offchip_param);
        } else {
-               assert(offchip_granularity == V_03093C_X_8K_DWORDS);
                si_pm4_set_reg(sctx->init_config, R_008988_VGT_TF_RING_SIZE,
-                              S_008988_SIZE(sctx->tf_ring->width0 / 4));
+                              S_008988_SIZE(sctx->screen->tess_factor_ring_size / 4));
                si_pm4_set_reg(sctx->init_config, R_0089B8_VGT_TF_MEMORY_BASE,
                               factor_va >> 8);
                si_pm4_set_reg(sctx->init_config, R_0089B0_VGT_HS_OFFCHIP_PARAM,
-                              S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers));
+                              sctx->screen->vgt_hs_offchip_param);
        }
 
        if (sctx->b.chip_class >= GFX9) {