ac: add radeon_info::num_good_cu_per_sh
authorMarek Olšák <marek.olsak@amd.com>
Sat, 25 Aug 2018 02:57:17 +0000 (22:57 -0400)
committerMarek Olšák <marek.olsak@amd.com>
Mon, 10 Sep 2018 19:19:56 +0000 (15:19 -0400)
Tested-by: Dieter Nützel <Dieter@nuetzel-hh.de>
src/amd/common/ac_gpu_info.c
src/amd/common/ac_gpu_info.h
src/gallium/drivers/radeonsi/si_state.c
src/gallium/winsys/radeon/drm/radeon_drm_winsys.c

index 8705d878f9a97730bbe48656a7a5bfdeeaee9c97..bfaff45219f01eecfbf19412501058152ed79f4a 100644 (file)
@@ -414,6 +414,8 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
                for (j = 0; j < info->max_sh_per_se; j++)
                        info->num_good_compute_units +=
                                util_bitcount(amdinfo->cu_bitmap[i][j]);
+       info->num_good_cu_per_sh = info->num_good_compute_units /
+                                  (info->max_se * info->max_sh_per_se);
 
        memcpy(info->si_tile_mode_array, amdinfo->gb_tile_mode,
                sizeof(amdinfo->gb_tile_mode));
@@ -543,6 +545,7 @@ void ac_print_gpu_info(struct radeon_info *info)
        printf("Shader core info:\n");
        printf("    max_shader_clock = %i\n", info->max_shader_clock);
        printf("    num_good_compute_units = %i\n", info->num_good_compute_units);
+       printf("    num_good_cu_per_sh = %i\n", info->num_good_cu_per_sh);
        printf("    num_tcc_blocks = %i\n", info->num_tcc_blocks);
        printf("    max_se = %i\n", info->max_se);
        printf("    max_sh_per_se = %i\n", info->max_sh_per_se);
index a897496da487153b4a08ad4476bebd68bfc9ede2..0583a6037f2d6d58ac2339ac48463d2873a0869b 100644 (file)
@@ -116,6 +116,7 @@ struct radeon_info {
        uint32_t                    r600_max_quad_pipes; /* wave size / 16 */
        uint32_t                    max_shader_clock;
        uint32_t                    num_good_compute_units;
+       uint32_t                    num_good_cu_per_sh;
        uint32_t                    num_tcc_blocks;
        uint32_t                    max_se; /* shader engines */
        uint32_t                    max_sh_per_se; /* shader arrays per shader engine */
index 40c478f0a46e3ee907f30d990b57812be7ba55c5..bc1417aadfbb0b8fec37c510744870a0d6758d58 100644 (file)
@@ -4928,9 +4928,7 @@ static void si_init_config(struct si_context *sctx)
                               S_00B21C_CU_EN(0xffff) | S_00B21C_WAVE_LIMIT(0x3F));
 
                /* Compute LATE_ALLOC_VS.LIMIT. */
-               unsigned num_cu_per_sh = sscreen->info.num_good_compute_units /
-                                        (sscreen->info.max_se *
-                                         sscreen->info.max_sh_per_se);
+               unsigned num_cu_per_sh = sscreen->info.num_good_cu_per_sh;
                unsigned late_alloc_limit; /* The limit is per SH. */
 
                if (sctx->family == CHIP_KABINI) {
index 19472a50ce159d2d91813310349384293a750f2d..3560c34c17a9dba77589b5ff1094e97099d1d4da 100644 (file)
@@ -525,6 +525,10 @@ static bool do_winsys_init(struct radeon_drm_winsys *ws)
 
     radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_SH_PER_SE, NULL,
                          &ws->info.max_sh_per_se);
+    if (ws->gen == DRV_SI) {
+        ws->info.num_good_cu_per_sh = ws->info.num_good_compute_units /
+                                      (ws->info.max_se * ws->info.max_sh_per_se);
+    }
 
     radeon_get_drm_value(ws->fd, RADEON_INFO_ACCEL_WORKING2, NULL,
                          &ws->accel_working2);