2 * Copyright © 2017 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining
5 * a copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
13 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
14 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
15 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
16 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
18 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
21 * The above copyright notice and this permission notice (including the
22 * next paragraph) shall be included in all copies or substantial portions
26 #include "ac_gpu_info.h"
30 #include "util/u_math.h"
35 #include <amdgpu_drm.h>
39 #define CIK_TILE_MODE_COLOR_2D 14
41 #define CIK__GB_TILE_MODE__PIPE_CONFIG(x) (((x) >> 6) & 0x1f)
42 #define CIK__PIPE_CONFIG__ADDR_SURF_P2 0
43 #define CIK__PIPE_CONFIG__ADDR_SURF_P4_8x16 4
44 #define CIK__PIPE_CONFIG__ADDR_SURF_P4_16x16 5
45 #define CIK__PIPE_CONFIG__ADDR_SURF_P4_16x32 6
46 #define CIK__PIPE_CONFIG__ADDR_SURF_P4_32x32 7
47 #define CIK__PIPE_CONFIG__ADDR_SURF_P8_16x16_8x16 8
48 #define CIK__PIPE_CONFIG__ADDR_SURF_P8_16x32_8x16 9
49 #define CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_8x16 10
50 #define CIK__PIPE_CONFIG__ADDR_SURF_P8_16x32_16x16 11
51 #define CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_16x16 12
52 #define CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_16x32 13
53 #define CIK__PIPE_CONFIG__ADDR_SURF_P8_32x64_32x32 14
54 #define CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_8X16 16
55 #define CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_16X16 17
57 static unsigned cik_get_num_tile_pipes(struct amdgpu_gpu_info
*info
)
59 unsigned mode2d
= info
->gb_tile_mode
[CIK_TILE_MODE_COLOR_2D
];
61 switch (CIK__GB_TILE_MODE__PIPE_CONFIG(mode2d
)) {
62 case CIK__PIPE_CONFIG__ADDR_SURF_P2
:
64 case CIK__PIPE_CONFIG__ADDR_SURF_P4_8x16
:
65 case CIK__PIPE_CONFIG__ADDR_SURF_P4_16x16
:
66 case CIK__PIPE_CONFIG__ADDR_SURF_P4_16x32
:
67 case CIK__PIPE_CONFIG__ADDR_SURF_P4_32x32
:
69 case CIK__PIPE_CONFIG__ADDR_SURF_P8_16x16_8x16
:
70 case CIK__PIPE_CONFIG__ADDR_SURF_P8_16x32_8x16
:
71 case CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_8x16
:
72 case CIK__PIPE_CONFIG__ADDR_SURF_P8_16x32_16x16
:
73 case CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_16x16
:
74 case CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_16x32
:
75 case CIK__PIPE_CONFIG__ADDR_SURF_P8_32x64_32x32
:
77 case CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_8X16
:
78 case CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_16X16
:
81 fprintf(stderr
, "Invalid CIK pipe configuration, assuming P2\n");
82 assert(!"this should never occur");
87 static bool has_syncobj(int fd
)
90 if (drmGetCap(fd
, DRM_CAP_SYNCOBJ
, &value
))
92 return value
? true : false;
95 bool ac_query_gpu_info(int fd
, amdgpu_device_handle dev
,
96 struct radeon_info
*info
,
97 struct amdgpu_gpu_info
*amdinfo
)
99 struct amdgpu_buffer_size_alignments alignment_info
= {};
100 struct amdgpu_heap_info vram
, vram_vis
, gtt
;
101 struct drm_amdgpu_info_hw_ip dma
= {}, compute
= {}, uvd
= {}, vce
= {}, vcn_dec
= {}, vcn_enc
= {};
102 uint32_t vce_version
= 0, vce_feature
= 0, uvd_version
= 0, uvd_feature
= 0;
104 drmDevicePtr devinfo
;
107 r
= drmGetDevice2(fd
, 0, &devinfo
);
109 fprintf(stderr
, "amdgpu: drmGetDevice2 failed.\n");
112 info
->pci_domain
= devinfo
->businfo
.pci
->domain
;
113 info
->pci_bus
= devinfo
->businfo
.pci
->bus
;
114 info
->pci_dev
= devinfo
->businfo
.pci
->dev
;
115 info
->pci_func
= devinfo
->businfo
.pci
->func
;
116 drmFreeDevice(&devinfo
);
118 /* Query hardware and driver information. */
119 r
= amdgpu_query_gpu_info(dev
, amdinfo
);
121 fprintf(stderr
, "amdgpu: amdgpu_query_gpu_info failed.\n");
125 r
= amdgpu_query_buffer_size_alignment(dev
, &alignment_info
);
127 fprintf(stderr
, "amdgpu: amdgpu_query_buffer_size_alignment failed.\n");
131 r
= amdgpu_query_heap_info(dev
, AMDGPU_GEM_DOMAIN_VRAM
, 0, &vram
);
133 fprintf(stderr
, "amdgpu: amdgpu_query_heap_info(vram) failed.\n");
137 r
= amdgpu_query_heap_info(dev
, AMDGPU_GEM_DOMAIN_VRAM
,
138 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED
,
141 fprintf(stderr
, "amdgpu: amdgpu_query_heap_info(vram_vis) failed.\n");
145 r
= amdgpu_query_heap_info(dev
, AMDGPU_GEM_DOMAIN_GTT
, 0, >t
);
147 fprintf(stderr
, "amdgpu: amdgpu_query_heap_info(gtt) failed.\n");
151 r
= amdgpu_query_hw_ip_info(dev
, AMDGPU_HW_IP_DMA
, 0, &dma
);
153 fprintf(stderr
, "amdgpu: amdgpu_query_hw_ip_info(dma) failed.\n");
157 r
= amdgpu_query_hw_ip_info(dev
, AMDGPU_HW_IP_COMPUTE
, 0, &compute
);
159 fprintf(stderr
, "amdgpu: amdgpu_query_hw_ip_info(compute) failed.\n");
163 r
= amdgpu_query_hw_ip_info(dev
, AMDGPU_HW_IP_UVD
, 0, &uvd
);
165 fprintf(stderr
, "amdgpu: amdgpu_query_hw_ip_info(uvd) failed.\n");
169 if (info
->drm_major
== 3 && info
->drm_minor
>= 17) {
170 r
= amdgpu_query_hw_ip_info(dev
, AMDGPU_HW_IP_VCN_DEC
, 0, &vcn_dec
);
172 fprintf(stderr
, "amdgpu: amdgpu_query_hw_ip_info(vcn_dec) failed.\n");
177 if (info
->drm_major
== 3 && info
->drm_minor
>= 17) {
178 r
= amdgpu_query_hw_ip_info(dev
, AMDGPU_HW_IP_VCN_ENC
, 0, &vcn_enc
);
180 fprintf(stderr
, "amdgpu: amdgpu_query_hw_ip_info(vcn_enc) failed.\n");
185 r
= amdgpu_query_firmware_version(dev
, AMDGPU_INFO_FW_GFX_ME
, 0, 0,
186 &info
->me_fw_version
,
187 &info
->me_fw_feature
);
189 fprintf(stderr
, "amdgpu: amdgpu_query_firmware_version(me) failed.\n");
193 r
= amdgpu_query_firmware_version(dev
, AMDGPU_INFO_FW_GFX_PFP
, 0, 0,
194 &info
->pfp_fw_version
,
195 &info
->pfp_fw_feature
);
197 fprintf(stderr
, "amdgpu: amdgpu_query_firmware_version(pfp) failed.\n");
201 r
= amdgpu_query_firmware_version(dev
, AMDGPU_INFO_FW_GFX_CE
, 0, 0,
202 &info
->ce_fw_version
,
203 &info
->ce_fw_feature
);
205 fprintf(stderr
, "amdgpu: amdgpu_query_firmware_version(ce) failed.\n");
209 r
= amdgpu_query_firmware_version(dev
, AMDGPU_INFO_FW_UVD
, 0, 0,
210 &uvd_version
, &uvd_feature
);
212 fprintf(stderr
, "amdgpu: amdgpu_query_firmware_version(uvd) failed.\n");
216 r
= amdgpu_query_hw_ip_info(dev
, AMDGPU_HW_IP_VCE
, 0, &vce
);
218 fprintf(stderr
, "amdgpu: amdgpu_query_hw_ip_info(vce) failed.\n");
222 r
= amdgpu_query_firmware_version(dev
, AMDGPU_INFO_FW_VCE
, 0, 0,
223 &vce_version
, &vce_feature
);
225 fprintf(stderr
, "amdgpu: amdgpu_query_firmware_version(vce) failed.\n");
229 /* Set chip identification. */
230 info
->pci_id
= amdinfo
->asic_id
; /* TODO: is this correct? */
231 info
->vce_harvest_config
= amdinfo
->vce_harvest_config
;
233 switch (info
->pci_id
) {
234 #define CHIPSET(pci_id, cfamily) case pci_id: info->family = CHIP_##cfamily; break;
235 #include "pci_ids/radeonsi_pci_ids.h"
239 fprintf(stderr
, "amdgpu: Invalid PCI ID.\n");
243 if (info
->family
>= CHIP_VEGA10
)
244 info
->chip_class
= GFX9
;
245 else if (info
->family
>= CHIP_TONGA
)
246 info
->chip_class
= VI
;
247 else if (info
->family
>= CHIP_BONAIRE
)
248 info
->chip_class
= CIK
;
249 else if (info
->family
>= CHIP_TAHITI
)
250 info
->chip_class
= SI
;
252 fprintf(stderr
, "amdgpu: Unknown family.\n");
256 /* Set which chips have dedicated VRAM. */
257 info
->has_dedicated_vram
=
258 !(amdinfo
->ids_flags
& AMDGPU_IDS_FLAGS_FUSION
);
260 /* Set hardware information. */
261 info
->gart_size
= gtt
.heap_size
;
262 info
->vram_size
= vram
.heap_size
;
263 info
->vram_vis_size
= vram_vis
.heap_size
;
264 /* The kernel can split large buffers in VRAM but not in GTT, so large
265 * allocations can fail or cause buffer movement failures in the kernel.
267 info
->max_alloc_size
= MIN2(info
->vram_size
* 0.9, info
->gart_size
* 0.7);
268 /* convert the shader clock from KHz to MHz */
269 info
->max_shader_clock
= amdinfo
->max_engine_clk
/ 1000;
270 info
->max_se
= amdinfo
->num_shader_engines
;
271 info
->max_sh_per_se
= amdinfo
->num_shader_arrays_per_engine
;
272 info
->has_hw_decode
=
273 (uvd
.available_rings
!= 0) || (vcn_dec
.available_rings
!= 0);
274 info
->uvd_fw_version
=
275 uvd
.available_rings
? uvd_version
: 0;
276 info
->vce_fw_version
=
277 vce
.available_rings
? vce_version
: 0;
278 info
->has_userptr
= true;
279 info
->has_syncobj
= has_syncobj(fd
);
280 info
->has_sync_file
= info
->has_syncobj
&& info
->drm_minor
>= 21;
281 info
->has_ctx_priority
= info
->drm_minor
>= 22;
282 info
->num_render_backends
= amdinfo
->rb_pipes
;
283 info
->clock_crystal_freq
= amdinfo
->gpu_counter_freq
;
284 if (!info
->clock_crystal_freq
) {
285 fprintf(stderr
, "amdgpu: clock crystal frequency is 0, timestamps will be wrong\n");
286 info
->clock_crystal_freq
= 1;
288 info
->tcc_cache_line_size
= 64; /* TC L2 line size on GCN */
289 if (info
->chip_class
== GFX9
) {
290 info
->num_tile_pipes
= 1 << G_0098F8_NUM_PIPES(amdinfo
->gb_addr_cfg
);
291 info
->pipe_interleave_bytes
=
292 256 << G_0098F8_PIPE_INTERLEAVE_SIZE_GFX9(amdinfo
->gb_addr_cfg
);
294 info
->num_tile_pipes
= cik_get_num_tile_pipes(amdinfo
);
295 info
->pipe_interleave_bytes
=
296 256 << G_0098F8_PIPE_INTERLEAVE_SIZE_GFX6(amdinfo
->gb_addr_cfg
);
298 info
->has_virtual_memory
= true;
300 assert(util_is_power_of_two(dma
.available_rings
+ 1));
301 assert(util_is_power_of_two(compute
.available_rings
+ 1));
303 info
->num_sdma_rings
= util_bitcount(dma
.available_rings
);
304 info
->num_compute_rings
= util_bitcount(compute
.available_rings
);
306 /* Get the number of good compute units. */
307 info
->num_good_compute_units
= 0;
308 for (i
= 0; i
< info
->max_se
; i
++)
309 for (j
= 0; j
< info
->max_sh_per_se
; j
++)
310 info
->num_good_compute_units
+=
311 util_bitcount(amdinfo
->cu_bitmap
[i
][j
]);
313 memcpy(info
->si_tile_mode_array
, amdinfo
->gb_tile_mode
,
314 sizeof(amdinfo
->gb_tile_mode
));
315 info
->enabled_rb_mask
= amdinfo
->enabled_rb_pipes_mask
;
317 memcpy(info
->cik_macrotile_mode_array
, amdinfo
->gb_macro_tile_mode
,
318 sizeof(amdinfo
->gb_macro_tile_mode
));
320 info
->pte_fragment_size
= alignment_info
.size_local
;
321 info
->gart_page_size
= alignment_info
.size_remote
;
323 if (info
->chip_class
== SI
)
324 info
->gfx_ib_pad_with_type2
= TRUE
;
329 void ac_compute_driver_uuid(char *uuid
, size_t size
)
331 char amd_uuid
[] = "AMD-MESA-DRV";
333 assert(size
>= sizeof(amd_uuid
));
335 memset(uuid
, 0, size
);
336 strncpy(uuid
, amd_uuid
, size
);
339 void ac_compute_device_uuid(struct radeon_info
*info
, char *uuid
, size_t size
)
341 uint32_t *uint_uuid
= (uint32_t*)uuid
;
343 assert(size
>= sizeof(uint32_t)*4);
346 * Use the device info directly instead of using a sha1. GL/VK UUIDs
347 * are 16 byte vs 20 byte for sha1, and the truncation that would be
348 * required would get rid of part of the little entropy we have.
350 memset(uuid
, 0, size
);
351 uint_uuid
[0] = info
->pci_domain
;
352 uint_uuid
[1] = info
->pci_bus
;
353 uint_uuid
[2] = info
->pci_dev
;
354 uint_uuid
[3] = info
->pci_func
;
357 void ac_print_gpu_info(struct radeon_info
*info
)
359 printf("pci (domain:bus:dev.func): %04x:%02x:%02x.%x\n",
360 info
->pci_domain
, info
->pci_bus
,
361 info
->pci_dev
, info
->pci_func
);
362 printf("pci_id = 0x%x\n", info
->pci_id
);
363 printf("family = %i\n", info
->family
);
364 printf("chip_class = %i\n", info
->chip_class
);
365 printf("pte_fragment_size = %u\n", info
->pte_fragment_size
);
366 printf("gart_page_size = %u\n", info
->gart_page_size
);
367 printf("gart_size = %i MB\n", (int)DIV_ROUND_UP(info
->gart_size
, 1024*1024));
368 printf("vram_size = %i MB\n", (int)DIV_ROUND_UP(info
->vram_size
, 1024*1024));
369 printf("vram_vis_size = %i MB\n", (int)DIV_ROUND_UP(info
->vram_vis_size
, 1024*1024));
370 printf("max_alloc_size = %i MB\n",
371 (int)DIV_ROUND_UP(info
->max_alloc_size
, 1024*1024));
372 printf("min_alloc_size = %u\n", info
->min_alloc_size
);
373 printf("has_dedicated_vram = %u\n", info
->has_dedicated_vram
);
374 printf("has_virtual_memory = %i\n", info
->has_virtual_memory
);
375 printf("gfx_ib_pad_with_type2 = %i\n", info
->gfx_ib_pad_with_type2
);
376 printf("has_hw_decode = %u\n", info
->has_hw_decode
);
377 printf("num_sdma_rings = %i\n", info
->num_sdma_rings
);
378 printf("num_compute_rings = %u\n", info
->num_compute_rings
);
379 printf("uvd_fw_version = %u\n", info
->uvd_fw_version
);
380 printf("vce_fw_version = %u\n", info
->vce_fw_version
);
381 printf("me_fw_version = %i\n", info
->me_fw_version
);
382 printf("me_fw_feature = %i\n", info
->me_fw_feature
);
383 printf("pfp_fw_version = %i\n", info
->pfp_fw_version
);
384 printf("pfp_fw_feature = %i\n", info
->pfp_fw_feature
);
385 printf("ce_fw_version = %i\n", info
->ce_fw_version
);
386 printf("ce_fw_feature = %i\n", info
->ce_fw_feature
);
387 printf("vce_harvest_config = %i\n", info
->vce_harvest_config
);
388 printf("clock_crystal_freq = %i\n", info
->clock_crystal_freq
);
389 printf("tcc_cache_line_size = %u\n", info
->tcc_cache_line_size
);
390 printf("drm = %i.%i.%i\n", info
->drm_major
,
391 info
->drm_minor
, info
->drm_patchlevel
);
392 printf("has_userptr = %i\n", info
->has_userptr
);
393 printf("has_syncobj = %u\n", info
->has_syncobj
);
394 printf("has_sync_file = %u\n", info
->has_sync_file
);
396 printf("r600_max_quad_pipes = %i\n", info
->r600_max_quad_pipes
);
397 printf("max_shader_clock = %i\n", info
->max_shader_clock
);
398 printf("num_good_compute_units = %i\n", info
->num_good_compute_units
);
399 printf("max_se = %i\n", info
->max_se
);
400 printf("max_sh_per_se = %i\n", info
->max_sh_per_se
);
402 printf("r600_gb_backend_map = %i\n", info
->r600_gb_backend_map
);
403 printf("r600_gb_backend_map_valid = %i\n", info
->r600_gb_backend_map_valid
);
404 printf("r600_num_banks = %i\n", info
->r600_num_banks
);
405 printf("num_render_backends = %i\n", info
->num_render_backends
);
406 printf("num_tile_pipes = %i\n", info
->num_tile_pipes
);
407 printf("pipe_interleave_bytes = %i\n", info
->pipe_interleave_bytes
);
408 printf("enabled_rb_mask = 0x%x\n", info
->enabled_rb_mask
);
409 printf("max_alignment = %u\n", (unsigned)info
->max_alignment
);