X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Famd%2Fcommon%2Fac_gpu_info.c;h=139cb51f95832b6ca831c33f7125c363a5e4e6de;hb=d90e656fa7c19d1c48287d03babb7e535dc6aa51;hp=0f81531d5e3985d179ccd1c68ec56bea37e5ebea;hpb=94297142332fbd42bf281c6b6e3b418d9479d63e;p=mesa.git diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c index 0f81531d5e3..139cb51f958 100644 --- a/src/amd/common/ac_gpu_info.c +++ b/src/amd/common/ac_gpu_info.c @@ -93,6 +93,14 @@ static bool has_syncobj(int fd) return value ? true : false; } +static uint64_t fix_vram_size(uint64_t size) +{ + /* The VRAM size is underreported, so we need to fix it, because + * it's used to compute the number of memory modules for harvesting. + */ + return align64(size, 256*1024*1024); +} + bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info, struct amdgpu_gpu_info *amdinfo) @@ -266,7 +274,7 @@ bool ac_query_gpu_info(int fd, void *dev_p, /* Note: usable_heap_size values can be random and can't be relied on. */ info->gart_size = meminfo.gtt.total_heap_size; - info->vram_size = meminfo.vram.total_heap_size; + info->vram_size = fix_vram_size(meminfo.vram.total_heap_size); info->vram_vis_size = meminfo.cpu_accessible_vram.total_heap_size; } else { /* This is a deprecated interface, which reports usable sizes @@ -297,7 +305,7 @@ bool ac_query_gpu_info(int fd, void *dev_p, } info->gart_size = gtt.heap_size; - info->vram_size = vram.heap_size; + info->vram_size = fix_vram_size(vram.heap_size); info->vram_vis_size = vram_vis.heap_size; } @@ -313,24 +321,24 @@ bool ac_query_gpu_info(int fd, void *dev_p, #define identify_chip(chipname) identify_chip2(chipname, chipname) switch (amdinfo->family_id) { - case AMDGPU_FAMILY_SI: + case FAMILY_SI: identify_chip(TAHITI); identify_chip(PITCAIRN); identify_chip2(CAPEVERDE, VERDE); identify_chip(OLAND); identify_chip(HAINAN); break; - case AMDGPU_FAMILY_CI: + case FAMILY_CI: identify_chip(BONAIRE); identify_chip(HAWAII); break; - case AMDGPU_FAMILY_KV: + case FAMILY_KV: identify_chip2(SPECTRE, KAVERI); identify_chip2(SPOOKY, KAVERI); identify_chip2(KALINDI, KABINI); identify_chip2(GODAVARI, KABINI); break; - case AMDGPU_FAMILY_VI: + case FAMILY_VI: identify_chip(ICELAND); identify_chip(TONGA); identify_chip(FIJI); @@ -339,22 +347,22 @@ bool ac_query_gpu_info(int fd, void *dev_p, identify_chip(POLARIS12); identify_chip(VEGAM); break; - case AMDGPU_FAMILY_CZ: + case FAMILY_CZ: identify_chip(CARRIZO); identify_chip(STONEY); break; - case AMDGPU_FAMILY_AI: + case FAMILY_AI: identify_chip(VEGA10); identify_chip(VEGA12); identify_chip(VEGA20); identify_chip(ARCTURUS); break; - case AMDGPU_FAMILY_RV: + case FAMILY_RV: identify_chip(RAVEN); identify_chip(RAVEN2); identify_chip(RENOIR); break; - case AMDGPU_FAMILY_NV: + case FAMILY_NV: identify_chip(NAVI10); identify_chip(NAVI12); identify_chip(NAVI14); @@ -402,6 +410,9 @@ bool ac_query_gpu_info(int fd, void *dev_p, else info->max_alloc_size = info->gart_size * 0.7; + /* Set which chips have uncached device memory. */ + info->has_l2_uncached = info->chip_class >= GFX9; + /* Set hardware information. */ info->gds_size = gds.gds_total_size; info->gds_gfx_partition_size = gds.gds_gfx_partition_size; @@ -462,6 +473,14 @@ bool ac_query_gpu_info(int fd, void *dev_p, } if (info->chip_class >= GFX10) { info->tcc_cache_line_size = 128; + + if (info->drm_minor >= 35) { + info->tcc_harvested = device_info.tcc_disabled_mask != 0; + } else { + /* This is a hack, but it's all we can do without a kernel upgrade. */ + info->tcc_harvested = + (info->vram_size / info->num_tcc_blocks) != 512*1024*1024; + } } else { info->tcc_cache_line_size = 64; } @@ -481,8 +500,15 @@ bool ac_query_gpu_info(int fd, void *dev_p, assert(util_is_power_of_two_or_zero(compute.available_rings + 1)); info->has_graphics = gfx.available_rings > 0; - info->num_sdma_rings = util_bitcount(dma.available_rings); - info->num_compute_rings = util_bitcount(compute.available_rings); + info->num_rings[RING_GFX] = util_bitcount(gfx.available_rings); + info->num_rings[RING_COMPUTE] = util_bitcount(compute.available_rings); + info->num_rings[RING_DMA] = util_bitcount(dma.available_rings); + info->num_rings[RING_UVD] = util_bitcount(uvd.available_rings); + info->num_rings[RING_VCE] = util_bitcount(vce.available_rings); + info->num_rings[RING_UVD_ENC] = util_bitcount(uvd_enc.available_rings); + info->num_rings[RING_VCN_DEC] = util_bitcount(vcn_dec.available_rings); + info->num_rings[RING_VCN_ENC] = util_bitcount(vcn_enc.available_rings); + info->num_rings[RING_VCN_JPEG] = util_bitcount(vcn_jpeg.available_rings); /* The mere presence of CLEAR_STATE in the IB causes random GPU hangs * on GFX6. Some CLEAR_STATE cause asic hang on radeon kernel, etc. @@ -543,6 +569,12 @@ bool ac_query_gpu_info(int fd, void *dev_p, info->num_good_cu_per_sh = info->num_good_compute_units / (info->max_se * info->max_sh_per_se); + /* Round down to the nearest multiple of 2, because the hw can't + * disable CUs. It can only disable whole WGPs (dual-CUs). + */ + if (info->chip_class >= GFX10) + info->num_good_cu_per_sh -= info->num_good_cu_per_sh % 2; + memcpy(info->si_tile_mode_array, amdinfo->gb_tile_mode, sizeof(amdinfo->gb_tile_mode)); info->enabled_rb_mask = amdinfo->enabled_rb_pipes_mask; @@ -601,6 +633,8 @@ bool ac_query_gpu_info(int fd, void *dev_p, case CHIP_NAVI14: pc_lines = 512; break; + case CHIP_ARCTURUS: + break; default: assert(0); } @@ -613,19 +647,9 @@ bool ac_query_gpu_info(int fd, void *dev_p, } } - if (info->chip_class >= GFX10) { - switch (info->family) { - case CHIP_NAVI10: - case CHIP_NAVI12: - info->num_sdp_interfaces = 16; - break; - case CHIP_NAVI14: - info->num_sdp_interfaces = 8; - break; - default: - assert(0); - } - } + /* The number of SDPs is the same as the number of TCCs for now. */ + if (info->chip_class >= GFX10) + info->num_sdp_interfaces = device_info.num_tcc_blocks; info->max_wave64_per_simd = info->family >= CHIP_POLARIS10 && info->family <= CHIP_VEGAM ? 8 : 10; @@ -678,15 +702,42 @@ void ac_print_gpu_info(struct radeon_info *info) printf(" pci (domain:bus:dev.func): %04x:%02x:%02x.%x\n", info->pci_domain, info->pci_bus, info->pci_dev, info->pci_func); + + printf(" name = %s\n", info->name); + printf(" marketing_name = %s\n", info->marketing_name); + printf(" is_pro_graphics = %u\n", info->is_pro_graphics); printf(" pci_id = 0x%x\n", info->pci_id); printf(" family = %i\n", info->family); printf(" chip_class = %i\n", info->chip_class); + printf(" family_id = %i\n", info->family_id); printf(" chip_external_rev = %i\n", info->chip_external_rev); - printf(" num_compute_rings = %u\n", info->num_compute_rings); - printf(" num_sdma_rings = %i\n", info->num_sdma_rings); printf(" clock_crystal_freq = %i\n", info->clock_crystal_freq); - printf(" tcc_cache_line_size = %u\n", info->tcc_cache_line_size); + printf("Features:\n"); + printf(" has_graphics = %i\n", info->has_graphics); + printf(" num_rings[RING_GFX] = %i\n", info->num_rings[RING_GFX]); + printf(" num_rings[RING_DMA] = %i\n", info->num_rings[RING_DMA]); + printf(" num_rings[RING_COMPUTE] = %u\n", info->num_rings[RING_COMPUTE]); + printf(" num_rings[RING_UVD] = %i\n", info->num_rings[RING_UVD]); + printf(" num_rings[RING_VCE] = %i\n", info->num_rings[RING_VCE]); + printf(" num_rings[RING_UVD_ENC] = %i\n", info->num_rings[RING_UVD_ENC]); + printf(" num_rings[RING_VCN_DEC] = %i\n", info->num_rings[RING_VCN_DEC]); + printf(" num_rings[RING_VCN_ENC] = %i\n", info->num_rings[RING_VCN_ENC]); + printf(" num_rings[RING_VCN_JPEG] = %i\n", info->num_rings[RING_VCN_JPEG]); + printf(" has_clear_state = %u\n", info->has_clear_state); + printf(" has_distributed_tess = %u\n", info->has_distributed_tess); + printf(" has_dcc_constant_encode = %u\n", info->has_dcc_constant_encode); + printf(" has_rbplus = %u\n", info->has_rbplus); + printf(" rbplus_allowed = %u\n", info->rbplus_allowed); + printf(" has_load_ctx_reg_pkt = %u\n", info->has_load_ctx_reg_pkt); + printf(" has_out_of_order_rast = %u\n", info->has_out_of_order_rast); + printf(" cpdma_prefetch_writes_memory = %u\n", info->cpdma_prefetch_writes_memory); + printf(" has_gfx9_scissor_bug = %i\n", info->has_gfx9_scissor_bug); + printf(" has_tc_compat_zrange_bug = %i\n", info->has_tc_compat_zrange_bug); + printf(" has_msaa_sample_loc_bug = %i\n", info->has_msaa_sample_loc_bug); + printf(" has_ls_vgpr_init_bug = %i\n", info->has_ls_vgpr_init_bug); + + printf("Display features:\n"); printf(" use_display_dcc_unaligned = %u\n", info->use_display_dcc_unaligned); printf(" use_display_dcc_with_retile_blit = %u\n", info->use_display_dcc_with_retile_blit); @@ -703,6 +754,10 @@ void ac_print_gpu_info(struct radeon_info *info) printf(" min_alloc_size = %u\n", info->min_alloc_size); printf(" address32_hi = %u\n", info->address32_hi); printf(" has_dedicated_vram = %u\n", info->has_dedicated_vram); + printf(" num_sdp_interfaces = %u\n", info->num_sdp_interfaces); + printf(" num_tcc_blocks = %i\n", info->num_tcc_blocks); + printf(" tcc_cache_line_size = %u\n", info->tcc_cache_line_size); + printf(" tcc_harvested = %u\n", info->tcc_harvested); printf("CP info:\n"); printf(" gfx_ib_pad_with_type2 = %i\n", info->gfx_ib_pad_with_type2); @@ -750,9 +805,11 @@ void ac_print_gpu_info(struct radeon_info *info) printf(" max_shader_clock = %i\n", info->max_shader_clock); printf(" num_good_compute_units = %i\n", info->num_good_compute_units); printf(" num_good_cu_per_sh = %i\n", info->num_good_cu_per_sh); - printf(" num_tcc_blocks = %i\n", info->num_tcc_blocks); printf(" max_se = %i\n", info->max_se); printf(" max_sh_per_se = %i\n", info->max_sh_per_se); + printf(" max_wave64_per_simd = %i\n", info->max_wave64_per_simd); + printf(" num_physical_sgprs_per_simd = %i\n", info->num_physical_sgprs_per_simd); + printf(" num_physical_wave64_vgprs_per_simd = %i\n", info->num_physical_wave64_vgprs_per_simd); printf("Render backend info:\n"); printf(" pa_sc_tile_steering_override = 0x%x\n", info->pa_sc_tile_steering_override); @@ -761,9 +818,17 @@ void ac_print_gpu_info(struct radeon_info *info) printf(" pipe_interleave_bytes = %i\n", info->pipe_interleave_bytes); printf(" enabled_rb_mask = 0x%x\n", info->enabled_rb_mask); printf(" max_alignment = %u\n", (unsigned)info->max_alignment); + printf(" pbb_max_alloc_count = %u\n", info->pbb_max_alloc_count); - printf("GB_ADDR_CONFIG:\n"); - if (info->chip_class >= GFX9) { + printf("GB_ADDR_CONFIG: 0x%08x\n", info->gb_addr_config); + if (info->chip_class >= GFX10) { + printf(" num_pipes = %u\n", + 1 << G_0098F8_NUM_PIPES(info->gb_addr_config)); + printf(" pipe_interleave_size = %u\n", + 256 << G_0098F8_PIPE_INTERLEAVE_SIZE_GFX9(info->gb_addr_config)); + printf(" max_compressed_frags = %u\n", + 1 << G_0098F8_MAX_COMPRESSED_FRAGS(info->gb_addr_config)); + } else if (info->chip_class == GFX9) { printf(" num_pipes = %u\n", 1 << G_0098F8_NUM_PIPES(info->gb_addr_config)); printf(" pipe_interleave_size = %u\n",