From: Marek Olšák Date: Fri, 22 May 2020 12:04:07 +0000 (-0400) Subject: ac/gpu_info: replace num_good_cu_per_sh with min/max_good_cu_per_sa X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=2cf46f2e3d89c9cd9a30835ee2ebdf24cdd8119b;p=mesa.git ac/gpu_info: replace num_good_cu_per_sh with min/max_good_cu_per_sa Perf counters use the new max number. Reviewed-by: Bas Nieuwenhuizen Part-of: --- diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c index f5eb421b115..bdd8de374dd 100644 --- a/src/amd/common/ac_gpu_info.c +++ b/src/amd/common/ac_gpu_info.c @@ -624,14 +624,15 @@ bool ac_query_gpu_info(int fd, void *dev_p, util_bitcount(info->cu_mask[i][j]); } } - info->num_good_cu_per_sh = info->num_good_compute_units / - (info->max_se * info->max_sh_per_se); - /* Round down to the nearest multiple of 2, because the hw can't - * disable CUs. It can only disable whole WGPs (dual-CUs). + /* On GFX10, only whole WGPs (in units of 2 CUs) can be disabled, + * and max - min <= 2. */ - if (info->chip_class >= GFX10) - info->num_good_cu_per_sh -= info->num_good_cu_per_sh % 2; + unsigned cu_group = info->chip_class >= GFX10 ? 2 : 1; + info->max_good_cu_per_sa = DIV_ROUND_UP(info->num_good_compute_units, + (info->max_se * info->max_sh_per_se * cu_group)) * cu_group; + info->min_good_cu_per_sa = (info->num_good_compute_units / + (info->max_se * info->max_sh_per_se * cu_group)) * cu_group; memcpy(info->si_tile_mode_array, amdinfo->gb_tile_mode, sizeof(amdinfo->gb_tile_mode)); @@ -910,7 +911,8 @@ void ac_print_gpu_info(struct radeon_info *info) printf("Shader core info:\n"); printf(" max_shader_clock = %i\n", info->max_shader_clock); printf(" num_good_compute_units = %i\n", info->num_good_compute_units); - printf(" num_good_cu_per_sh = %i\n", info->num_good_cu_per_sh); + printf(" max_good_cu_per_sa = %i\n", info->max_good_cu_per_sa); + printf(" min_good_cu_per_sa = %i\n", info->min_good_cu_per_sa); printf(" max_se = %i\n", info->max_se); printf(" max_sh_per_se = %i\n", info->max_sh_per_se); printf(" max_wave64_per_simd = %i\n", info->max_wave64_per_simd); diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h index 07da7fd4625..f5d7ea9f892 100644 --- a/src/amd/common/ac_gpu_info.h +++ b/src/amd/common/ac_gpu_info.h @@ -157,7 +157,8 @@ struct radeon_info { uint32_t r600_max_quad_pipes; /* wave size / 16 */ uint32_t max_shader_clock; uint32_t num_good_compute_units; - uint32_t num_good_cu_per_sh; + uint32_t max_good_cu_per_sa; + uint32_t min_good_cu_per_sa; /* min != max if SAs have different # of CUs */ uint32_t max_se; /* shader engines */ uint32_t max_sh_per_se; /* shader arrays per shader engine */ uint32_t max_wave64_per_simd; diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index dee5563e824..6b8ab29d5a9 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -1753,7 +1753,7 @@ void radv_GetPhysicalDeviceProperties2( properties->shaderArraysPerEngineCount = pdevice->rad_info.max_sh_per_se; properties->computeUnitsPerShaderArray = - pdevice->rad_info.num_good_cu_per_sh; + pdevice->rad_info.min_good_cu_per_sa; properties->simdPerComputeUnit = pdevice->rad_info.num_simd_per_compute_unit; properties->wavefrontsPerSimd = diff --git a/src/amd/vulkan/radv_rgp.c b/src/amd/vulkan/radv_rgp.c index 94253aae6d9..7003cf456b2 100644 --- a/src/amd/vulkan/radv_rgp.c +++ b/src/amd/vulkan/radv_rgp.c @@ -358,7 +358,7 @@ radv_fill_sqtt_asic_info(struct radv_device *device, chunk->vgprs_per_simd = rad_info->num_physical_wave64_vgprs_per_simd; chunk->sgprs_per_simd = rad_info->num_physical_sgprs_per_simd; chunk->shader_engines = rad_info->max_se; - chunk->compute_unit_per_shader_engine = rad_info->num_good_cu_per_sh; + chunk->compute_unit_per_shader_engine = rad_info->min_good_cu_per_sa; chunk->simd_per_compute_unit = rad_info->num_simd_per_compute_unit; chunk->wavefronts_per_simd = rad_info->max_wave64_per_simd; diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c index 28ca8560c09..a43821affb2 100644 --- a/src/amd/vulkan/si_cmd_buffer.c +++ b/src/amd/vulkan/si_cmd_buffer.c @@ -293,7 +293,7 @@ si_emit_graphics(struct radv_device *device, } /* Compute LATE_ALLOC_VS.LIMIT. */ - unsigned num_cu_per_sh = physical_device->rad_info.num_good_cu_per_sh; + unsigned num_cu_per_sh = physical_device->rad_info.min_good_cu_per_sa; unsigned late_alloc_wave64 = 0; /* The limit is per SH. */ unsigned late_alloc_wave64_gs = 0; unsigned cu_mask_vs = 0xffff; diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c b/src/gallium/drivers/radeonsi/si_perfcounter.c index 6e341727ece..a1a4eca1965 100644 --- a/src/gallium/drivers/radeonsi/si_perfcounter.c +++ b/src/gallium/drivers/radeonsi/si_perfcounter.c @@ -1256,7 +1256,7 @@ void si_init_perfcounters(struct si_screen *screen) else if (!strcmp(block->b->b->name, "TA") || !strcmp(block->b->b->name, "TCP") || !strcmp(block->b->b->name, "TD")) { - block->num_instances = MAX2(1, screen->info.num_good_cu_per_sh); + block->num_instances = MAX2(1, screen->info.max_good_cu_per_sa); } if (si_pc_block_has_per_instance_groups(pc, block)) { diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index df39a05403b..2ce68c781d0 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -5269,7 +5269,7 @@ static void si_init_config(struct si_context *sctx) } /* Compute LATE_ALLOC_VS.LIMIT. */ - unsigned num_cu_per_sh = sscreen->info.num_good_cu_per_sh; + unsigned num_cu_per_sh = sscreen->info.min_good_cu_per_sa; unsigned late_alloc_wave64 = 0; /* The limit is per SH. */ unsigned cu_mask_vs = 0xffff; unsigned cu_mask_gs = 0xffff; diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 759353c8bea..616747de88b 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -1143,7 +1143,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader S_00B22C_LDS_SIZE(shader->config.lds_size)); /* Determine LATE_ALLOC_GS. */ - unsigned num_cu_per_sh = sscreen->info.num_good_cu_per_sh; + unsigned num_cu_per_sh = sscreen->info.min_good_cu_per_sa; unsigned late_alloc_wave64; /* The limit is per SH. */ /* For Wave32, the hw will launch twice the number of late diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c index 002ebe07b55..5f52fda52dc 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c @@ -532,7 +532,8 @@ static bool do_winsys_init(struct radeon_drm_winsys *ws) radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_SH_PER_SE, NULL, &ws->info.max_sh_per_se); if (ws->gen == DRV_SI) { - ws->info.num_good_cu_per_sh = ws->info.num_good_compute_units / + ws->info.max_good_cu_per_sa = + ws->info.min_good_cu_per_sa = ws->info.num_good_compute_units / (ws->info.max_se * ws->info.max_sh_per_se); }