radeonsi: Program RASTER_CONFIG for harvested GPUs v5
authorTom Stellard <thomas.stellard@amd.com>
Tue, 9 Sep 2014 19:18:57 +0000 (15:18 -0400)
committerTom Stellard <thomas.stellard@amd.com>
Mon, 8 Dec 2014 22:20:50 +0000 (17:20 -0500)
Harvested GPUs have some of their render backends disabled, so
in order to prevent the hardware from trying to render things
with these disabled backends we need to correctly program
the PA_SC_RASTER_CONFIG register.

v2:
  - Write RASTER_CONFIG for all SEs.

v3:
  - Set GRBM_GFX_INDEX.INSTANCE_BROADCAST_WRITES bit.
  - Set GRBM_GFX_INFEX.SH_BROADCAST_WRITES bit when done setting
    PA_SC_RASTER_CONFIG.
  - Get num_se and num_sh_per_se from kernel.

v4:
  - Get correct value for num_se
  - Remove loop for setting PA_SC_RASTER_CONFIG
  - Only compute raster config when a backend has been disabled.

v5: Michel Dänzer
  - Fix computation for chips with multiple SEs

https://bugs.freedesktop.org/show_bug.cgi?id=60879

CC: "10.4 10.3" <mesa-stable@lists.freedesktop.org>
src/gallium/drivers/radeonsi/si_state.c
src/gallium/drivers/radeonsi/sid.h
src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
src/gallium/winsys/radeon/drm/radeon_winsys.h

index ea8e61a04195d16dc3f5b21e4a2be9aacdae7c1e..f24c28e92209c971bd2bb86a727563309604e62d 100644 (file)
@@ -3081,6 +3081,110 @@ void si_init_state_functions(struct si_context *sctx)
        sctx->b.b.draw_vbo = si_draw_vbo;
 }
 
+static void
+si_write_harvested_raster_configs(struct si_context *sctx,
+                                 struct si_pm4_state *pm4,
+                                 unsigned raster_config)
+{
+       unsigned sh_per_se = MAX2(sctx->screen->b.info.max_sh_per_se, 1);
+       unsigned num_se = MAX2(sctx->screen->b.info.max_se, 1);
+       unsigned rb_mask = sctx->screen->b.info.si_backend_enabled_mask;
+       unsigned num_rb = sctx->screen->b.info.r600_num_backends;
+       unsigned rb_per_pkr = num_rb / num_se / sh_per_se;
+       unsigned rb_per_se = num_rb / num_se;
+       unsigned se0_mask = (1 << rb_per_se) - 1;
+       unsigned se1_mask = se0_mask << rb_per_se;
+       unsigned se;
+
+       assert(num_se == 1 || num_se == 2);
+       assert(sh_per_se == 1 || sh_per_se == 2);
+       assert(rb_per_pkr == 1 || rb_per_pkr == 2);
+
+       /* XXX: I can't figure out what the *_XSEL and *_YSEL
+        * fields are for, so I'm leaving them as their default
+        * values. */
+
+       se0_mask &= rb_mask;
+       se1_mask &= rb_mask;
+       if (num_se == 2 && (!se0_mask || !se1_mask)) {
+               raster_config &= C_028350_SE_MAP;
+
+               if (!se0_mask) {
+                       raster_config |=
+                               S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_3);
+               } else {
+                       raster_config |=
+                               S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_0);
+               }
+       }
+
+       for (se = 0; se < num_se; se++) {
+               unsigned raster_config_se = raster_config;
+               unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
+               unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
+
+               pkr0_mask &= rb_mask;
+               pkr1_mask &= rb_mask;
+               if (sh_per_se == 2 && (!pkr0_mask || !pkr1_mask)) {
+                       raster_config_se &= C_028350_PKR_MAP;
+
+                       if (!pkr0_mask) {
+                               raster_config_se |=
+                                       S_028350_PKR_MAP(V_028350_RASTER_CONFIG_PKR_MAP_3);
+                       } else {
+                               raster_config_se |=
+                                       S_028350_PKR_MAP(V_028350_RASTER_CONFIG_PKR_MAP_0);
+                       }
+               }
+
+               if (rb_per_pkr == 2) {
+                       unsigned rb0_mask = 1 << (se * rb_per_se);
+                       unsigned rb1_mask = rb0_mask << 1;
+
+                       rb0_mask &= rb_mask;
+                       rb1_mask &= rb_mask;
+                       if (!rb0_mask || !rb1_mask) {
+                               raster_config_se &= C_028350_RB_MAP_PKR0;
+
+                               if (!rb0_mask) {
+                                       raster_config_se |=
+                                               S_028350_RB_MAP_PKR0(V_028350_RASTER_CONFIG_RB_MAP_3);
+                               } else {
+                                       raster_config_se |=
+                                               S_028350_RB_MAP_PKR0(V_028350_RASTER_CONFIG_RB_MAP_0);
+                               }
+                       }
+
+                       if (sh_per_se == 2) {
+                               rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
+                               rb1_mask = rb0_mask << 1;
+                               rb0_mask &= rb_mask;
+                               rb1_mask &= rb_mask;
+                               if (!rb0_mask || !rb1_mask) {
+                                       raster_config_se &= C_028350_RB_MAP_PKR1;
+
+                                       if (!rb0_mask) {
+                                               raster_config_se |=
+                                                       S_028350_RB_MAP_PKR1(V_028350_RASTER_CONFIG_RB_MAP_3);
+                                       } else {
+                                               raster_config_se |=
+                                                       S_028350_RB_MAP_PKR1(V_028350_RASTER_CONFIG_RB_MAP_0);
+                                       }
+                               }
+                       }
+               }
+
+               si_pm4_set_reg(pm4, GRBM_GFX_INDEX,
+                              SE_INDEX(se) | SH_BROADCAST_WRITES |
+                              INSTANCE_BROADCAST_WRITES);
+               si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, raster_config_se);
+       }
+
+       si_pm4_set_reg(pm4, GRBM_GFX_INDEX,
+                      SE_BROADCAST_WRITES | SH_BROADCAST_WRITES |
+                      INSTANCE_BROADCAST_WRITES);
+}
+
 void si_init_config(struct si_context *sctx)
 {
        struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
@@ -3152,24 +3256,38 @@ void si_init_config(struct si_context *sctx)
                        break;
                }
        } else {
+               unsigned rb_mask = sctx->screen->b.info.si_backend_enabled_mask;
+               unsigned num_rb = sctx->screen->b.info.r600_num_backends;
+               unsigned raster_config;
+
                switch (sctx->screen->b.family) {
                case CHIP_TAHITI:
                case CHIP_PITCAIRN:
-                       si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x2a00126a);
+                       raster_config = 0x2a00126a;
                        break;
                case CHIP_VERDE:
-                       si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x0000124a);
+                       raster_config = 0x0000124a;
                        break;
                case CHIP_OLAND:
-                       si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000082);
+                       raster_config = 0x00000082;
                        break;
                case CHIP_HAINAN:
-                       si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000000);
+                       raster_config = 0x00000000;
                        break;
                default:
-                       si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000000);
+                       fprintf(stderr,
+                               "radeonsi: Unknown GPU, using 0 for raster_config\n");
+                       raster_config = 0x00000000;
                        break;
                }
+
+               /* Always use the default config when all backends are enabled. */
+               if (rb_mask && util_bitcount(rb_mask) >= num_rb) {
+                       si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG,
+                                      raster_config);
+               } else {
+                       si_write_harvested_raster_configs(sctx, pm4, raster_config);
+               }
        }
 
        si_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1));
index c4d4afd6f024a2a6c696aa36d30de25390016b66..c78ba7c2991c92e536c477c1e8db1ff2da0457d3 100644 (file)
  * 6. COMMAND [29:22] | BYTE_COUNT [20:0]
  */
 
-
+#define GRBM_GFX_INDEX                                                  0x802C
+#define         INSTANCE_INDEX(x)                                     ((x) << 0)
+#define         SH_INDEX(x)                                           ((x) << 8)
+#define         SE_INDEX(x)                                           ((x) << 16)
+#define         SH_BROADCAST_WRITES                                   (1 << 29)
+#define         INSTANCE_BROADCAST_WRITES                             (1 << 30)
+#define         SE_BROADCAST_WRITES                                   (1 << 31)
 #define R_0084FC_CP_STRMOUT_CNTL                                       0x0084FC
 #define   S_0084FC_OFFSET_UPDATE_DONE(x)                             (((x) & 0x1) << 0)
 #define R_0085F0_CP_COHER_CNTL                                          0x0085F0
index c207a85c6f46d59058d6eb333b2f8e1ad7e9a573..3974034c91180db4d61de01e3ae056f8c91de7e0 100644 (file)
@@ -325,6 +325,9 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws)
                          &ws->info.max_sclk);
     ws->info.max_sclk /= 1000;
 
+    radeon_get_drm_value(ws->fd, RADEON_INFO_SI_BACKEND_ENABLED_MASK, NULL,
+                         &ws->info.si_backend_enabled_mask);
+
     ws->num_cpus = sysconf(_SC_NPROCESSORS_ONLN);
 
     /* Generation-specific queries. */
index 18fefbeebcbbe063e871d1d429fd07d0115c60eb..5dc9313ce0513d445753de9374b9120a9509a45b 100644 (file)
@@ -231,6 +231,7 @@ struct radeon_info {
 
     boolean                     si_tile_mode_array_valid;
     uint32_t                    si_tile_mode_array[32];
+    uint32_t                    si_backend_enabled_mask;
 
     boolean                     cik_macrotile_mode_array_valid;
     uint32_t                    cik_macrotile_mode_array[16];